lynkr 7.2.5 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +4 -2
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +76 -3
- package/src/config/index.js +113 -35
- package/src/context/toon.js +173 -0
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +686 -211
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
package/test/routing.test.js
CHANGED
|
@@ -11,13 +11,22 @@ describe("Routing Logic", () => {
|
|
|
11
11
|
delete require.cache[require.resolve("../src/config/index.js")];
|
|
12
12
|
delete require.cache[require.resolve("../src/clients/routing")];
|
|
13
13
|
delete require.cache[require.resolve("../src/routing/index.js")];
|
|
14
|
-
delete require.cache[require.resolve("../src/
|
|
14
|
+
delete require.cache[require.resolve("../src/routing/model-tiers")];
|
|
15
|
+
delete require.cache[require.resolve("../src/routing/complexity-analyzer")];
|
|
16
|
+
delete require.cache[require.resolve("../src/routing/cost-optimizer")];
|
|
17
|
+
delete require.cache[require.resolve("../src/routing/agentic-detector")];
|
|
15
18
|
|
|
16
19
|
// Store original config
|
|
17
20
|
originalConfig = { ...process.env };
|
|
18
|
-
|
|
21
|
+
|
|
19
22
|
// Explicitly set valid fallback to override any local .env pollution (e.g. lmstudio)
|
|
20
23
|
process.env.FALLBACK_PROVIDER = "databricks";
|
|
24
|
+
|
|
25
|
+
// Ensure no TIER_* vars leak between tests
|
|
26
|
+
process.env.TIER_SIMPLE = "";
|
|
27
|
+
process.env.TIER_MEDIUM = "";
|
|
28
|
+
process.env.TIER_COMPLEX = "";
|
|
29
|
+
process.env.TIER_REASONING = "";
|
|
21
30
|
});
|
|
22
31
|
|
|
23
32
|
afterEach(() => {
|
|
@@ -25,23 +34,24 @@ describe("Routing Logic", () => {
|
|
|
25
34
|
process.env = originalConfig;
|
|
26
35
|
});
|
|
27
36
|
|
|
28
|
-
describe("
|
|
29
|
-
it("should return configured provider when
|
|
37
|
+
describe("static routing (tier routing disabled)", () => {
|
|
38
|
+
it("should return configured provider when tier routing is disabled", async () => {
|
|
30
39
|
process.env.MODEL_PROVIDER = "databricks";
|
|
31
|
-
process.env.
|
|
40
|
+
process.env.DATABRICKS_API_KEY = "test-key";
|
|
41
|
+
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
32
42
|
|
|
33
43
|
config = require("../src/config");
|
|
34
44
|
routing = require("../src/clients/routing");
|
|
35
45
|
|
|
36
46
|
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
37
|
-
const
|
|
47
|
+
const result = await routing.determineProviderSmart(payload);
|
|
38
48
|
|
|
39
|
-
assert.strictEqual(provider, "databricks");
|
|
49
|
+
assert.strictEqual(result.provider, "databricks");
|
|
50
|
+
assert.strictEqual(result.method, "static");
|
|
40
51
|
});
|
|
41
52
|
|
|
42
|
-
it("should
|
|
53
|
+
it("should return ollama when MODEL_PROVIDER is ollama", async () => {
|
|
43
54
|
process.env.MODEL_PROVIDER = "ollama";
|
|
44
|
-
process.env.PREFER_OLLAMA = "true";
|
|
45
55
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
46
56
|
|
|
47
57
|
config = require("../src/config");
|
|
@@ -52,15 +62,14 @@ describe("Routing Logic", () => {
|
|
|
52
62
|
tools: [],
|
|
53
63
|
};
|
|
54
64
|
|
|
55
|
-
const
|
|
56
|
-
assert.strictEqual(provider, "ollama");
|
|
65
|
+
const result = await routing.determineProviderSmart(payload);
|
|
66
|
+
assert.strictEqual(result.provider, "ollama");
|
|
67
|
+
assert.strictEqual(result.method, "static");
|
|
57
68
|
});
|
|
58
69
|
|
|
59
|
-
it("should
|
|
70
|
+
it("should return primary provider regardless of tool count", async () => {
|
|
60
71
|
process.env.MODEL_PROVIDER = "ollama";
|
|
61
|
-
process.env.PREFER_OLLAMA = "true";
|
|
62
72
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
63
|
-
process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "3";
|
|
64
73
|
|
|
65
74
|
config = require("../src/config");
|
|
66
75
|
routing = require("../src/clients/routing");
|
|
@@ -73,25 +82,15 @@ describe("Routing Logic", () => {
|
|
|
73
82
|
],
|
|
74
83
|
};
|
|
75
84
|
|
|
76
|
-
const
|
|
77
|
-
assert.strictEqual(provider, "ollama");
|
|
85
|
+
const result = await routing.determineProviderSmart(payload);
|
|
86
|
+
assert.strictEqual(result.provider, "ollama");
|
|
87
|
+
assert.strictEqual(result.method, "static");
|
|
78
88
|
});
|
|
79
89
|
|
|
80
|
-
it("should
|
|
81
|
-
process.env.MODEL_PROVIDER = "
|
|
82
|
-
process.env.PREFER_OLLAMA = "true";
|
|
83
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
84
|
-
process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "3";
|
|
85
|
-
process.env.OPENROUTER_MAX_TOOLS_FOR_ROUTING = "3"; // Set same as ollama to skip openrouter tier
|
|
86
|
-
process.env.FALLBACK_PROVIDER = "databricks";
|
|
87
|
-
process.env.FALLBACK_ENABLED = "true"; // Ensure fallback is enabled
|
|
90
|
+
it("should return primary provider even with many tools", async () => {
|
|
91
|
+
process.env.MODEL_PROVIDER = "databricks";
|
|
88
92
|
process.env.DATABRICKS_API_KEY = "test-key";
|
|
89
93
|
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
90
|
-
// Set Azure OpenAI to empty to prevent dotenv from loading .env values
|
|
91
|
-
// dotenv won't override existing vars, even if empty
|
|
92
|
-
process.env.AZURE_OPENAI_ENDPOINT = "";
|
|
93
|
-
process.env.AZURE_OPENAI_API_KEY = "";
|
|
94
|
-
process.env.OPENROUTER_API_KEY = "";
|
|
95
94
|
|
|
96
95
|
config = require("../src/config");
|
|
97
96
|
routing = require("../src/clients/routing");
|
|
@@ -107,16 +106,13 @@ describe("Routing Logic", () => {
|
|
|
107
106
|
],
|
|
108
107
|
};
|
|
109
108
|
|
|
110
|
-
const
|
|
111
|
-
assert.strictEqual(provider, "databricks");
|
|
109
|
+
const result = await routing.determineProviderSmart(payload);
|
|
110
|
+
assert.strictEqual(result.provider, "databricks");
|
|
111
|
+
assert.strictEqual(result.method, "static");
|
|
112
112
|
});
|
|
113
113
|
|
|
114
|
-
it("should
|
|
115
|
-
process.env.MODEL_PROVIDER = "
|
|
116
|
-
process.env.PREFER_OLLAMA = "true";
|
|
117
|
-
process.env.OLLAMA_MODEL = "llama3:latest"; // Non-tool-capable model
|
|
118
|
-
process.env.OLLAMA_FALLBACK_PROVIDER = "databricks";
|
|
119
|
-
process.env.FALLBACK_ENABLED = "true"; // Ensure fallback is enabled
|
|
114
|
+
it("should return configured MODEL_PROVIDER", async () => {
|
|
115
|
+
process.env.MODEL_PROVIDER = "databricks";
|
|
120
116
|
process.env.DATABRICKS_API_KEY = "test-key";
|
|
121
117
|
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
122
118
|
|
|
@@ -128,42 +124,55 @@ describe("Routing Logic", () => {
|
|
|
128
124
|
tools: [{ name: "tool1", description: "test" }],
|
|
129
125
|
};
|
|
130
126
|
|
|
131
|
-
const
|
|
132
|
-
assert.strictEqual(provider, "databricks");
|
|
127
|
+
const result = await routing.determineProviderSmart(payload);
|
|
128
|
+
assert.strictEqual(result.provider, "databricks");
|
|
129
|
+
assert.strictEqual(result.method, "static");
|
|
133
130
|
});
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
describe("determineProviderSmart()", () => {
|
|
134
|
+
it("should return static routing when tier routing is disabled (no TIER_* vars)", async () => {
|
|
135
|
+
process.env.MODEL_PROVIDER = "databricks";
|
|
136
|
+
process.env.DATABRICKS_API_KEY = "test-key";
|
|
137
|
+
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
134
138
|
|
|
135
|
-
|
|
139
|
+
config = require("../src/config");
|
|
140
|
+
routing = require("../src/clients/routing");
|
|
141
|
+
|
|
142
|
+
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
143
|
+
const result = await routing.determineProviderSmart(payload);
|
|
144
|
+
|
|
145
|
+
assert.strictEqual(result.provider, "databricks");
|
|
146
|
+
assert.strictEqual(result.method, "static");
|
|
147
|
+
assert.strictEqual(result.reason, "tier_routing_disabled");
|
|
148
|
+
assert.strictEqual(result.model, null);
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it("should use tier routing when TIER_* vars are set", async () => {
|
|
136
152
|
process.env.MODEL_PROVIDER = "ollama";
|
|
137
|
-
process.env.PREFER_OLLAMA = "true";
|
|
138
153
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
139
|
-
process.env.
|
|
140
|
-
process.env.
|
|
154
|
+
process.env.TIER_SIMPLE = "ollama:llama3.2";
|
|
155
|
+
process.env.TIER_MEDIUM = "ollama:llama3.2";
|
|
156
|
+
process.env.TIER_COMPLEX = "databricks:claude-sonnet";
|
|
157
|
+
process.env.TIER_REASONING = "databricks:claude-sonnet";
|
|
141
158
|
process.env.DATABRICKS_API_KEY = "test-key";
|
|
142
159
|
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
143
160
|
|
|
144
161
|
config = require("../src/config");
|
|
145
162
|
routing = require("../src/clients/routing");
|
|
146
163
|
|
|
147
|
-
const payload = {
|
|
148
|
-
|
|
149
|
-
tools: [
|
|
150
|
-
{ name: "tool1", description: "test" },
|
|
151
|
-
{ name: "tool2", description: "test" },
|
|
152
|
-
{ name: "tool3", description: "test" },
|
|
153
|
-
{ name: "tool4", description: "test" },
|
|
154
|
-
],
|
|
155
|
-
};
|
|
164
|
+
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
165
|
+
const result = await routing.determineProviderSmart(payload);
|
|
156
166
|
|
|
157
|
-
//
|
|
158
|
-
|
|
159
|
-
assert.
|
|
167
|
+
// When tier routing is enabled, method should not be 'static'
|
|
168
|
+
assert.notStrictEqual(result.method, "static");
|
|
169
|
+
assert.ok(result.provider, "provider should be set");
|
|
160
170
|
});
|
|
161
171
|
});
|
|
162
172
|
|
|
163
173
|
describe("isFallbackEnabled()", () => {
|
|
164
174
|
it("should return true by default", () => {
|
|
165
175
|
process.env.MODEL_PROVIDER = "ollama";
|
|
166
|
-
process.env.PREFER_OLLAMA = "true";
|
|
167
176
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
168
177
|
// Override .env file which sets FALLBACK_ENABLED=false
|
|
169
178
|
// Test default behavior when not set to "false"
|
|
@@ -177,7 +186,6 @@ describe("Routing Logic", () => {
|
|
|
177
186
|
|
|
178
187
|
it("should return false when explicitly disabled", () => {
|
|
179
188
|
process.env.MODEL_PROVIDER = "ollama";
|
|
180
|
-
process.env.PREFER_OLLAMA = "true";
|
|
181
189
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
182
190
|
process.env.FALLBACK_ENABLED = "false";
|
|
183
191
|
|
|
@@ -191,7 +199,6 @@ describe("Routing Logic", () => {
|
|
|
191
199
|
describe("getFallbackProvider()", () => {
|
|
192
200
|
it("should return databricks by default", () => {
|
|
193
201
|
process.env.MODEL_PROVIDER = "ollama";
|
|
194
|
-
process.env.PREFER_OLLAMA = "true";
|
|
195
202
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
196
203
|
process.env.DATABRICKS_API_KEY = "test-key";
|
|
197
204
|
process.env.DATABRICKS_API_BASE = "http://test.com";
|
|
@@ -204,7 +211,6 @@ describe("Routing Logic", () => {
|
|
|
204
211
|
|
|
205
212
|
it("should return configured fallback provider", () => {
|
|
206
213
|
process.env.MODEL_PROVIDER = "ollama";
|
|
207
|
-
process.env.PREFER_OLLAMA = "true";
|
|
208
214
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
209
215
|
process.env.FALLBACK_PROVIDER = "azure-anthropic";
|
|
210
216
|
process.env.AZURE_ANTHROPIC_ENDPOINT = "http://test.com";
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
const assert = require("assert");
|
|
2
|
+
const { describe, it } = require("node:test");
|
|
3
|
+
|
|
4
|
+
const { applyToonCompression } = require("../src/context/toon");
|
|
5
|
+
|
|
6
|
+
function createLargeJsonString() {
|
|
7
|
+
return JSON.stringify({
|
|
8
|
+
rows: Array.from({ length: 8 }, (_, idx) => ({
|
|
9
|
+
id: idx + 1,
|
|
10
|
+
label: `item-${idx + 1}`,
|
|
11
|
+
value: `value-${idx + 1}`.repeat(20),
|
|
12
|
+
})),
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
describe("TOON compression", () => {
|
|
17
|
+
it("is a no-op when TOON is disabled", () => {
|
|
18
|
+
const payload = {
|
|
19
|
+
stream: false,
|
|
20
|
+
tool_choice: { type: "auto" },
|
|
21
|
+
tools: [{ name: "Read", input_schema: { type: "object", properties: {} } }],
|
|
22
|
+
messages: [{ role: "user", content: createLargeJsonString() }],
|
|
23
|
+
};
|
|
24
|
+
const before = JSON.parse(JSON.stringify(payload));
|
|
25
|
+
|
|
26
|
+
const { payload: after, stats } = applyToonCompression(
|
|
27
|
+
payload,
|
|
28
|
+
{ enabled: false, minBytes: 1, failOpen: true },
|
|
29
|
+
{ encode: () => "should-not-run" },
|
|
30
|
+
);
|
|
31
|
+
|
|
32
|
+
assert.deepStrictEqual(after, before);
|
|
33
|
+
assert.strictEqual(stats.enabled, false);
|
|
34
|
+
assert.strictEqual(stats.convertedCount, 0);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("falls back safely when encoder throws (fail-open)", () => {
|
|
38
|
+
const payload = {
|
|
39
|
+
messages: [{ role: "user", content: createLargeJsonString() }],
|
|
40
|
+
};
|
|
41
|
+
const original = payload.messages[0].content;
|
|
42
|
+
|
|
43
|
+
const { payload: after, stats } = applyToonCompression(
|
|
44
|
+
payload,
|
|
45
|
+
{ enabled: true, minBytes: 1, failOpen: true, logStats: false },
|
|
46
|
+
{
|
|
47
|
+
encode: () => {
|
|
48
|
+
throw new Error("simulated toon encode failure");
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
assert.strictEqual(after.messages[0].content, original);
|
|
54
|
+
assert.strictEqual(stats.failureCount, 1);
|
|
55
|
+
assert.strictEqual(stats.convertedCount, 0);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("does not mutate protocol fields while compressing eligible message content", () => {
|
|
59
|
+
const payload = {
|
|
60
|
+
model: "kimi-k2.5",
|
|
61
|
+
stream: true,
|
|
62
|
+
tool_choice: { type: "tool", name: "Read" },
|
|
63
|
+
tools: [
|
|
64
|
+
{
|
|
65
|
+
name: "Read",
|
|
66
|
+
description: "Read files",
|
|
67
|
+
input_schema: {
|
|
68
|
+
type: "object",
|
|
69
|
+
properties: { file_path: { type: "string" } },
|
|
70
|
+
required: ["file_path"],
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
],
|
|
74
|
+
messages: [
|
|
75
|
+
{ role: "user", content: createLargeJsonString() },
|
|
76
|
+
{ role: "tool", content: createLargeJsonString() }, // tool role should never be touched
|
|
77
|
+
],
|
|
78
|
+
};
|
|
79
|
+
const beforeTools = JSON.parse(JSON.stringify(payload.tools));
|
|
80
|
+
const beforeToolChoice = JSON.parse(JSON.stringify(payload.tool_choice));
|
|
81
|
+
const beforeToolRoleContent = payload.messages[1].content;
|
|
82
|
+
|
|
83
|
+
const { payload: after, stats } = applyToonCompression(
|
|
84
|
+
payload,
|
|
85
|
+
{ enabled: true, minBytes: 1, failOpen: false, logStats: false },
|
|
86
|
+
{ encode: () => "rows[1]{id,label,value}:\n 1,item-1,value-1" },
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
assert.strictEqual(after.messages[0].content, "rows[1]{id,label,value}:\n 1,item-1,value-1");
|
|
90
|
+
assert.strictEqual(after.messages[1].content, beforeToolRoleContent);
|
|
91
|
+
assert.deepStrictEqual(after.tools, beforeTools);
|
|
92
|
+
assert.deepStrictEqual(after.tool_choice, beforeToolChoice);
|
|
93
|
+
assert.strictEqual(after.stream, true);
|
|
94
|
+
assert.strictEqual(after.model, "kimi-k2.5");
|
|
95
|
+
assert.strictEqual(stats.convertedCount, 1);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("compresses Anthropic text blocks while preserving tool protocol blocks", () => {
|
|
99
|
+
const largeJson = createLargeJsonString();
|
|
100
|
+
const payload = {
|
|
101
|
+
messages: [
|
|
102
|
+
{
|
|
103
|
+
role: "user",
|
|
104
|
+
content: [
|
|
105
|
+
{ type: "text", text: largeJson },
|
|
106
|
+
{ type: "input_text", input_text: largeJson },
|
|
107
|
+
{
|
|
108
|
+
type: "tool_result",
|
|
109
|
+
tool_use_id: "toolu_123",
|
|
110
|
+
content: largeJson,
|
|
111
|
+
is_error: false,
|
|
112
|
+
},
|
|
113
|
+
],
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
const originalToolResultContent = payload.messages[0].content[2].content;
|
|
119
|
+
|
|
120
|
+
const { payload: after, stats } = applyToonCompression(
|
|
121
|
+
payload,
|
|
122
|
+
{ enabled: true, minBytes: 1, failOpen: false, logStats: false },
|
|
123
|
+
{ encode: () => "rows[1]{id,label,value}:\n 1,item-1,value-1" },
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
assert.strictEqual(after.messages[0].content[0].text, "rows[1]{id,label,value}:\n 1,item-1,value-1");
|
|
127
|
+
assert.strictEqual(after.messages[0].content[1].input_text, "rows[1]{id,label,value}:\n 1,item-1,value-1");
|
|
128
|
+
assert.strictEqual(after.messages[0].content[2].content, originalToolResultContent);
|
|
129
|
+
assert.strictEqual(stats.convertedCount, 2);
|
|
130
|
+
});
|
|
131
|
+
});
|