lynkr 7.2.5 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +4 -2
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +76 -3
- package/src/config/index.js +113 -35
- package/src/context/toon.js +173 -0
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +686 -211
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
|
@@ -56,15 +56,20 @@ function testRoutingDecisionPerformance() {
|
|
|
56
56
|
delete require.cache[require.resolve('../src/config')];
|
|
57
57
|
delete require.cache[require.resolve('../src/clients/routing')];
|
|
58
58
|
|
|
59
|
-
process.env.
|
|
59
|
+
process.env.MODEL_PROVIDER = 'ollama';
|
|
60
60
|
process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
|
|
61
61
|
process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
|
|
62
62
|
process.env.DATABRICKS_API_KEY = 'test-key';
|
|
63
63
|
process.env.DATABRICKS_API_BASE = 'http://test.com';
|
|
64
|
+
// Set TIER_* to empty = tier routing disabled, determineProviderSync returns static provider
|
|
65
|
+
process.env.TIER_SIMPLE = "";
|
|
66
|
+
process.env.TIER_MEDIUM = "";
|
|
67
|
+
process.env.TIER_COMPLEX = "";
|
|
68
|
+
process.env.TIER_REASONING = "";
|
|
64
69
|
|
|
65
70
|
const routing = require('../src/clients/routing');
|
|
66
71
|
|
|
67
|
-
log('\n
|
|
72
|
+
log('\n Benchmarking routing decisions...', 'cyan');
|
|
68
73
|
|
|
69
74
|
// Test 1: Simple request (0 tools)
|
|
70
75
|
const simplePayload = {
|
|
@@ -75,10 +80,10 @@ function testRoutingDecisionPerformance() {
|
|
|
75
80
|
const { duration: simpleTime, throughput: simpleThroughput } = benchmark(
|
|
76
81
|
'Simple request routing',
|
|
77
82
|
100000,
|
|
78
|
-
() => routing.
|
|
83
|
+
() => routing.determineProviderSync(simplePayload)
|
|
79
84
|
);
|
|
80
85
|
|
|
81
|
-
log(
|
|
86
|
+
log(` Simple request: ${simpleTime.toFixed(2)}ms for 100k decisions`, 'cyan');
|
|
82
87
|
log(` Average: ${(simpleTime / 100000).toFixed(6)}ms per decision`, 'blue');
|
|
83
88
|
log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
84
89
|
|
|
@@ -94,10 +99,10 @@ function testRoutingDecisionPerformance() {
|
|
|
94
99
|
const { duration: complexTime, throughput: complexThroughput } = benchmark(
|
|
95
100
|
'Complex request routing',
|
|
96
101
|
100000,
|
|
97
|
-
() => routing.
|
|
102
|
+
() => routing.determineProviderSync(complexPayload)
|
|
98
103
|
);
|
|
99
104
|
|
|
100
|
-
log(
|
|
105
|
+
log(` Complex request: ${complexTime.toFixed(2)}ms for 100k decisions`, 'cyan');
|
|
101
106
|
log(` Average: ${(complexTime / 100000).toFixed(6)}ms per decision`, 'blue');
|
|
102
107
|
log(` Throughput: ${complexThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
103
108
|
|
|
@@ -110,18 +115,18 @@ function testRoutingDecisionPerformance() {
|
|
|
110
115
|
const { duration: toolCheckTime, throughput: toolCheckThroughput } = benchmark(
|
|
111
116
|
'Tool capability check',
|
|
112
117
|
100000,
|
|
113
|
-
() => routing.
|
|
118
|
+
() => routing.determineProviderSync(toolCapabilityPayload)
|
|
114
119
|
);
|
|
115
120
|
|
|
116
|
-
log(
|
|
121
|
+
log(` Tool capability check: ${toolCheckTime.toFixed(2)}ms for 100k decisions`, 'cyan');
|
|
117
122
|
log(` Average: ${(toolCheckTime / 100000).toFixed(6)}ms per decision`, 'blue');
|
|
118
123
|
log(` Throughput: ${toolCheckThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
119
124
|
|
|
120
125
|
// Analysis
|
|
121
|
-
log('\n
|
|
126
|
+
log('\n Analysis:', 'yellow');
|
|
122
127
|
log(` Routing adds <0.01ms per request (negligible overhead)`, 'green');
|
|
123
128
|
log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
|
|
124
|
-
log(`
|
|
129
|
+
log(` Routing is extremely fast and won't impact request latency`, 'green');
|
|
125
130
|
|
|
126
131
|
return {
|
|
127
132
|
simpleTime,
|
|
@@ -141,7 +146,7 @@ function testMetricsOverhead() {
|
|
|
141
146
|
const { getMetricsCollector } = require('../src/observability/metrics');
|
|
142
147
|
const metrics = getMetricsCollector();
|
|
143
148
|
|
|
144
|
-
log('\n
|
|
149
|
+
log('\n Benchmarking metrics operations...', 'cyan');
|
|
145
150
|
|
|
146
151
|
// Test recording provider routing
|
|
147
152
|
const { duration: routingTime, throughput: routingThroughput } = benchmark(
|
|
@@ -150,7 +155,7 @@ function testMetricsOverhead() {
|
|
|
150
155
|
() => metrics.recordProviderRouting('ollama')
|
|
151
156
|
);
|
|
152
157
|
|
|
153
|
-
log(
|
|
158
|
+
log(` Provider routing: ${routingTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
154
159
|
log(` Average: ${(routingTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
155
160
|
log(` Throughput: ${routingThroughput.toLocaleString()} ops/sec`, 'green');
|
|
156
161
|
|
|
@@ -161,7 +166,7 @@ function testMetricsOverhead() {
|
|
|
161
166
|
() => metrics.recordProviderSuccess('ollama', 450)
|
|
162
167
|
);
|
|
163
168
|
|
|
164
|
-
log(
|
|
169
|
+
log(` Provider success: ${successTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
165
170
|
log(` Average: ${(successTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
166
171
|
log(` Throughput: ${successThroughput.toLocaleString()} ops/sec`, 'green');
|
|
167
172
|
|
|
@@ -172,7 +177,7 @@ function testMetricsOverhead() {
|
|
|
172
177
|
() => metrics.recordFallbackAttempt('ollama', 'databricks', 'timeout')
|
|
173
178
|
);
|
|
174
179
|
|
|
175
|
-
log(
|
|
180
|
+
log(` Fallback attempts: ${fallbackTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
176
181
|
log(` Average: ${(fallbackTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
177
182
|
log(` Throughput: ${fallbackThroughput.toLocaleString()} ops/sec`, 'green');
|
|
178
183
|
|
|
@@ -183,15 +188,15 @@ function testMetricsOverhead() {
|
|
|
183
188
|
() => metrics.recordCostSavings(0.001)
|
|
184
189
|
);
|
|
185
190
|
|
|
186
|
-
log(
|
|
191
|
+
log(` Cost savings: ${costTime.toFixed(2)}ms for 100k recordings`, 'cyan');
|
|
187
192
|
log(` Average: ${(costTime / 100000).toFixed(6)}ms per record`, 'blue');
|
|
188
193
|
log(` Throughput: ${costThroughput.toLocaleString()} ops/sec`, 'green');
|
|
189
194
|
|
|
190
195
|
// Analysis
|
|
191
196
|
const avgMetricsTime = (routingTime + successTime + fallbackTime + costTime) / 4 / 100000;
|
|
192
|
-
log('\n
|
|
197
|
+
log('\n Analysis:', 'yellow');
|
|
193
198
|
log(` Average metrics overhead: ${avgMetricsTime.toFixed(6)}ms per operation`, 'green');
|
|
194
|
-
log(`
|
|
199
|
+
log(` Metrics collection is extremely lightweight`, 'green');
|
|
195
200
|
|
|
196
201
|
return {
|
|
197
202
|
routingTime,
|
|
@@ -212,14 +217,19 @@ function testCombinedStack() {
|
|
|
212
217
|
delete require.cache[require.resolve('../src/clients/routing')];
|
|
213
218
|
delete require.cache[require.resolve('../src/observability/metrics')];
|
|
214
219
|
|
|
215
|
-
process.env.
|
|
220
|
+
process.env.MODEL_PROVIDER = 'ollama';
|
|
216
221
|
process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
|
|
217
222
|
process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
|
|
223
|
+
// Set TIER_* to empty = static routing via determineProviderSync
|
|
224
|
+
process.env.TIER_SIMPLE = "";
|
|
225
|
+
process.env.TIER_MEDIUM = "";
|
|
226
|
+
process.env.TIER_COMPLEX = "";
|
|
227
|
+
process.env.TIER_REASONING = "";
|
|
218
228
|
|
|
219
229
|
const routing = require('../src/clients/routing');
|
|
220
230
|
const { getMetricsCollector } = require('../src/observability/metrics');
|
|
221
231
|
|
|
222
|
-
log('\n
|
|
232
|
+
log('\n Benchmarking complete routing + metrics stack...', 'cyan');
|
|
223
233
|
|
|
224
234
|
// Simulate full routing decision + metrics recording
|
|
225
235
|
const payload = {
|
|
@@ -232,21 +242,21 @@ function testCombinedStack() {
|
|
|
232
242
|
50000,
|
|
233
243
|
() => {
|
|
234
244
|
const metrics = getMetricsCollector();
|
|
235
|
-
const provider = routing.
|
|
245
|
+
const provider = routing.determineProviderSync(payload);
|
|
236
246
|
metrics.recordProviderRouting(provider);
|
|
237
247
|
metrics.recordProviderSuccess(provider, 450);
|
|
238
248
|
}
|
|
239
249
|
);
|
|
240
250
|
|
|
241
|
-
log(
|
|
251
|
+
log(` Full stack: ${fullTime.toFixed(2)}ms for 50k operations`, 'cyan');
|
|
242
252
|
log(` Average: ${(fullTime / 50000).toFixed(6)}ms per request`, 'blue');
|
|
243
253
|
log(` Throughput: ${fullThroughput.toLocaleString()} ops/sec`, 'green');
|
|
244
254
|
|
|
245
255
|
// Analysis
|
|
246
|
-
log('\n
|
|
256
|
+
log('\n Analysis:', 'yellow');
|
|
247
257
|
const overhead = (fullTime / 50000);
|
|
248
258
|
log(` Total routing + metrics overhead: ${overhead.toFixed(6)}ms`, 'green');
|
|
249
|
-
log(`
|
|
259
|
+
log(` Negligible impact on request latency (<0.02ms)`, 'green');
|
|
250
260
|
|
|
251
261
|
return {
|
|
252
262
|
fullTime,
|
|
@@ -263,7 +273,7 @@ function testHelperFunctions() {
|
|
|
263
273
|
|
|
264
274
|
delete require.cache[require.resolve('../src/clients/databricks')];
|
|
265
275
|
|
|
266
|
-
log('\n
|
|
276
|
+
log('\n Benchmarking helper functions...', 'cyan');
|
|
267
277
|
|
|
268
278
|
// Test categorizeFailure (we'll simulate it)
|
|
269
279
|
const categorizeFailure = (error) => {
|
|
@@ -296,7 +306,7 @@ function testHelperFunctions() {
|
|
|
296
306
|
}
|
|
297
307
|
);
|
|
298
308
|
|
|
299
|
-
log(
|
|
309
|
+
log(` Categorize failure: ${categorizeTime.toFixed(2)}ms for 400k operations`, 'cyan');
|
|
300
310
|
log(` Average: ${(categorizeTime / 400000).toFixed(6)}ms per categorization`, 'blue');
|
|
301
311
|
log(` Throughput: ${(categorizeThroughput * 4).toLocaleString()} ops/sec`, 'green');
|
|
302
312
|
|
|
@@ -315,13 +325,13 @@ function testHelperFunctions() {
|
|
|
315
325
|
() => estimateCostSavings(1000, 500)
|
|
316
326
|
);
|
|
317
327
|
|
|
318
|
-
log(
|
|
328
|
+
log(` Cost estimation: ${costCalcTime.toFixed(2)}ms for 100k calculations`, 'cyan');
|
|
319
329
|
log(` Average: ${(costCalcTime / 100000).toFixed(6)}ms per calculation`, 'blue');
|
|
320
330
|
log(` Throughput: ${costCalcThroughput.toLocaleString()} ops/sec`, 'green');
|
|
321
331
|
|
|
322
|
-
log('\n
|
|
332
|
+
log('\n Analysis:', 'yellow');
|
|
323
333
|
log(` Helper functions add negligible overhead (<0.001ms)`, 'green');
|
|
324
|
-
log(`
|
|
334
|
+
log(` No performance impact from utility functions`, 'green');
|
|
325
335
|
|
|
326
336
|
return {
|
|
327
337
|
categorizeTime,
|
|
@@ -333,54 +343,54 @@ function testHelperFunctions() {
|
|
|
333
343
|
// FINAL REPORT
|
|
334
344
|
// =============================================================================
|
|
335
345
|
function printFinalReport(results) {
|
|
336
|
-
section('
|
|
346
|
+
section('HYBRID ROUTING PERFORMANCE SUMMARY');
|
|
337
347
|
|
|
338
348
|
console.log('\n');
|
|
339
|
-
console.log('
|
|
340
|
-
console.log('
|
|
341
|
-
console.log('
|
|
349
|
+
console.log('+---------------------------------------------------------+');
|
|
350
|
+
console.log('| HYBRID ROUTING PERFORMANCE |');
|
|
351
|
+
console.log('+---------------------------------------------------------+');
|
|
342
352
|
|
|
343
|
-
log(
|
|
344
|
-
log(
|
|
345
|
-
log(
|
|
353
|
+
log(`| 1. Routing Decisions |`, 'bright');
|
|
354
|
+
log(`| Average: ${results.routing.avgDecisionTime.toFixed(6)}ms per decision |`, 'cyan');
|
|
355
|
+
log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
|
|
346
356
|
|
|
347
|
-
console.log('
|
|
357
|
+
console.log('+---------------------------------------------------------+');
|
|
348
358
|
|
|
349
|
-
log(
|
|
350
|
-
log(
|
|
351
|
-
log(
|
|
359
|
+
log(`| 2. Metrics Collection |`, 'bright');
|
|
360
|
+
log(`| Average: ${results.metrics.avgMetricsTime.toFixed(6)}ms per operation |`, 'cyan');
|
|
361
|
+
log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
|
|
352
362
|
|
|
353
|
-
console.log('
|
|
363
|
+
console.log('+---------------------------------------------------------+');
|
|
354
364
|
|
|
355
|
-
log(
|
|
356
|
-
log(
|
|
357
|
-
log(
|
|
358
|
-
log(
|
|
365
|
+
log(`| 3. Full Routing Stack |`, 'bright');
|
|
366
|
+
log(`| Average: ${results.combined.overhead.toFixed(6)}ms per request |`, 'cyan');
|
|
367
|
+
log(`| Throughput: ${results.combined.fullThroughput.toLocaleString()} ops/sec |`, 'cyan');
|
|
368
|
+
log(`| Impact: ${colors.green}Negligible (<0.02ms)${colors.reset} |`);
|
|
359
369
|
|
|
360
|
-
console.log('
|
|
370
|
+
console.log('+---------------------------------------------------------+');
|
|
361
371
|
|
|
362
|
-
log(
|
|
363
|
-
log(
|
|
372
|
+
log(`| 4. Helper Functions |`, 'bright');
|
|
373
|
+
log(`| Overhead: ${colors.green}Negligible (<0.001ms)${colors.reset} |`);
|
|
364
374
|
|
|
365
|
-
console.log('
|
|
375
|
+
console.log('+---------------------------------------------------------+');
|
|
366
376
|
|
|
367
377
|
// Overall assessment
|
|
368
378
|
console.log('\n');
|
|
369
|
-
log('
|
|
370
|
-
log('
|
|
371
|
-
log('
|
|
372
|
-
log('
|
|
373
|
-
log('
|
|
374
|
-
|
|
375
|
-
console.log('\n
|
|
376
|
-
log('
|
|
377
|
-
log('
|
|
378
|
-
log('
|
|
379
|
-
log('
|
|
380
|
-
log('
|
|
379
|
+
log('Overall Performance Assessment:', 'bright');
|
|
380
|
+
log(' Routing overhead: <0.01ms per request', 'green');
|
|
381
|
+
log(' Metrics overhead: <0.01ms per request', 'green');
|
|
382
|
+
log(' Combined overhead: <0.02ms per request', 'green');
|
|
383
|
+
log(' No measurable impact on API latency', 'green');
|
|
384
|
+
|
|
385
|
+
console.log('\n Expected Real-World Performance:');
|
|
386
|
+
log(' Ollama (local): ~500-1000ms per request', 'cyan');
|
|
387
|
+
log(' Cloud (Databricks): ~1500-2000ms per request', 'cyan');
|
|
388
|
+
log(' Routing overhead: ~0.02ms (0.001-0.002% of total)', 'cyan');
|
|
389
|
+
log(' Latency savings with Ollama: 40-60% faster', 'green');
|
|
390
|
+
log(' Cost savings with Ollama: 100% (free)', 'green');
|
|
381
391
|
|
|
382
392
|
console.log('\n');
|
|
383
|
-
log('
|
|
393
|
+
log('Conclusion: Hybrid routing adds negligible overhead while', 'bright');
|
|
384
394
|
log(' providing significant latency and cost improvements!', 'bright');
|
|
385
395
|
console.log('\n');
|
|
386
396
|
}
|
|
@@ -389,7 +399,7 @@ function printFinalReport(results) {
|
|
|
389
399
|
// RUN ALL TESTS
|
|
390
400
|
// =============================================================================
|
|
391
401
|
async function runAllTests() {
|
|
392
|
-
log('\n
|
|
402
|
+
log('\n Starting Hybrid Routing Performance Test Suite\n', 'bright');
|
|
393
403
|
|
|
394
404
|
try {
|
|
395
405
|
const results = {
|
|
@@ -401,10 +411,10 @@ async function runAllTests() {
|
|
|
401
411
|
|
|
402
412
|
printFinalReport(results);
|
|
403
413
|
|
|
404
|
-
log('\n
|
|
414
|
+
log('\n All performance tests completed successfully!\n', 'green');
|
|
405
415
|
process.exit(0);
|
|
406
416
|
} catch (error) {
|
|
407
|
-
log(`\n
|
|
417
|
+
log(`\n Performance test suite failed: ${error.message}\n`, 'red');
|
|
408
418
|
console.error(error);
|
|
409
419
|
process.exit(1);
|
|
410
420
|
}
|
|
@@ -11,6 +11,12 @@ describe("llama.cpp Integration", () => {
|
|
|
11
11
|
delete require.cache[require.resolve("../src/config")];
|
|
12
12
|
delete require.cache[require.resolve("../src/clients/routing")];
|
|
13
13
|
delete require.cache[require.resolve("../src/clients/openrouter-utils")];
|
|
14
|
+
|
|
15
|
+
// Prevent .env TIER_* values from being picked up by dotenv
|
|
16
|
+
process.env.TIER_SIMPLE = "";
|
|
17
|
+
process.env.TIER_MEDIUM = "";
|
|
18
|
+
process.env.TIER_COMPLEX = "";
|
|
19
|
+
process.env.TIER_REASONING = "";
|
|
14
20
|
});
|
|
15
21
|
|
|
16
22
|
afterEach(() => {
|
|
@@ -104,33 +110,46 @@ describe("llama.cpp Integration", () => {
|
|
|
104
110
|
});
|
|
105
111
|
|
|
106
112
|
describe("Routing", () => {
|
|
107
|
-
it("should route to llamacpp when MODEL_PROVIDER is llamacpp", () => {
|
|
113
|
+
it("should route to llamacpp when MODEL_PROVIDER is llamacpp", async () => {
|
|
108
114
|
process.env.MODEL_PROVIDER = "llamacpp";
|
|
109
115
|
process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
|
|
110
|
-
process.env.PREFER_OLLAMA = "false";
|
|
111
116
|
|
|
112
117
|
const config = require("../src/config");
|
|
113
118
|
const routing = require("../src/clients/routing");
|
|
114
119
|
|
|
115
120
|
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
116
|
-
const
|
|
121
|
+
const result = await routing.determineProviderSmart(payload);
|
|
117
122
|
|
|
118
|
-
assert.strictEqual(provider, "llamacpp");
|
|
123
|
+
assert.strictEqual(result.provider, "llamacpp");
|
|
124
|
+
assert.strictEqual(result.method, "static");
|
|
119
125
|
});
|
|
120
126
|
|
|
121
|
-
it("should
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
127
|
+
it("should return static routing from determineProviderSmart when tiers disabled", async () => {
|
|
128
|
+
process.env.MODEL_PROVIDER = "llamacpp";
|
|
129
|
+
process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
|
|
130
|
+
|
|
131
|
+
const config = require("../src/config");
|
|
132
|
+
const routing = require("../src/clients/routing");
|
|
133
|
+
|
|
134
|
+
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
135
|
+
const result = await routing.determineProviderSmart(payload);
|
|
136
|
+
|
|
137
|
+
assert.strictEqual(result.provider, "llamacpp");
|
|
138
|
+
assert.strictEqual(result.method, "static");
|
|
139
|
+
assert.strictEqual(result.reason, "tier_routing_disabled");
|
|
125
140
|
});
|
|
126
141
|
|
|
127
142
|
it("should throw error when llamacpp is set as FALLBACK_PROVIDER", () => {
|
|
128
143
|
process.env.MODEL_PROVIDER = "ollama";
|
|
129
|
-
process.env.PREFER_OLLAMA = "true";
|
|
130
144
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
131
145
|
process.env.FALLBACK_PROVIDER = "llamacpp";
|
|
132
146
|
process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
|
|
133
147
|
process.env.FALLBACK_ENABLED = "true";
|
|
148
|
+
// Enable tier routing so fallback validation runs
|
|
149
|
+
process.env.TIER_SIMPLE = "ollama:llama3.2";
|
|
150
|
+
process.env.TIER_MEDIUM = "ollama:llama3.2";
|
|
151
|
+
process.env.TIER_COMPLEX = "ollama:llama3.2";
|
|
152
|
+
process.env.TIER_REASONING = "ollama:llama3.2";
|
|
134
153
|
|
|
135
154
|
assert.throws(
|
|
136
155
|
() => require("../src/config"),
|
|
@@ -11,6 +11,12 @@ describe("LM Studio Integration", () => {
|
|
|
11
11
|
delete require.cache[require.resolve("../src/config")];
|
|
12
12
|
delete require.cache[require.resolve("../src/clients/routing")];
|
|
13
13
|
delete require.cache[require.resolve("../src/clients/openrouter-utils")];
|
|
14
|
+
|
|
15
|
+
// Prevent .env TIER_* values from being picked up by dotenv
|
|
16
|
+
process.env.TIER_SIMPLE = "";
|
|
17
|
+
process.env.TIER_MEDIUM = "";
|
|
18
|
+
process.env.TIER_COMPLEX = "";
|
|
19
|
+
process.env.TIER_REASONING = "";
|
|
14
20
|
});
|
|
15
21
|
|
|
16
22
|
afterEach(() => {
|
|
@@ -105,26 +111,33 @@ describe("LM Studio Integration", () => {
|
|
|
105
111
|
it("should route to lmstudio when MODEL_PROVIDER is lmstudio", () => {
|
|
106
112
|
process.env.MODEL_PROVIDER = "lmstudio";
|
|
107
113
|
process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
|
|
108
|
-
process.env.PREFER_OLLAMA = "false";
|
|
109
114
|
|
|
110
115
|
const config = require("../src/config");
|
|
111
116
|
const routing = require("../src/clients/routing");
|
|
112
117
|
|
|
113
118
|
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
114
|
-
const provider = routing.
|
|
119
|
+
const provider = routing.determineProviderSync(payload);
|
|
115
120
|
|
|
116
121
|
assert.strictEqual(provider, "lmstudio");
|
|
117
122
|
});
|
|
118
123
|
|
|
119
|
-
it("should
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
124
|
+
it("should return static routing from determineProviderSmart when tiers disabled", async () => {
|
|
125
|
+
process.env.MODEL_PROVIDER = "lmstudio";
|
|
126
|
+
process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
|
|
127
|
+
|
|
128
|
+
const config = require("../src/config");
|
|
129
|
+
const routing = require("../src/clients/routing");
|
|
130
|
+
|
|
131
|
+
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
132
|
+
const result = await routing.determineProviderSmart(payload);
|
|
133
|
+
|
|
134
|
+
assert.strictEqual(result.provider, "lmstudio");
|
|
135
|
+
assert.strictEqual(result.method, "static");
|
|
136
|
+
assert.strictEqual(result.reason, "tier_routing_disabled");
|
|
123
137
|
});
|
|
124
138
|
|
|
125
139
|
it("should throw error when lmstudio is set as FALLBACK_PROVIDER", () => {
|
|
126
140
|
process.env.MODEL_PROVIDER = "ollama";
|
|
127
|
-
process.env.PREFER_OLLAMA = "true";
|
|
128
141
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
129
142
|
process.env.FALLBACK_PROVIDER = "lmstudio";
|
|
130
143
|
process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
|
|
@@ -312,7 +325,6 @@ describe("LM Studio Integration", () => {
|
|
|
312
325
|
describe("Fallback Prevention", () => {
|
|
313
326
|
it("should prevent lmstudio from being used as fallback provider", () => {
|
|
314
327
|
process.env.MODEL_PROVIDER = "ollama";
|
|
315
|
-
process.env.PREFER_OLLAMA = "true";
|
|
316
328
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
317
329
|
process.env.FALLBACK_PROVIDER = "lmstudio";
|
|
318
330
|
process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
|
|
@@ -11,6 +11,12 @@ describe("OpenAI Integration", () => {
|
|
|
11
11
|
delete require.cache[require.resolve("../src/config")];
|
|
12
12
|
delete require.cache[require.resolve("../src/clients/routing")];
|
|
13
13
|
delete require.cache[require.resolve("../src/clients/openrouter-utils")];
|
|
14
|
+
|
|
15
|
+
// Prevent .env TIER_* values from being picked up by dotenv
|
|
16
|
+
process.env.TIER_SIMPLE = "";
|
|
17
|
+
process.env.TIER_MEDIUM = "";
|
|
18
|
+
process.env.TIER_COMPLEX = "";
|
|
19
|
+
process.env.TIER_REASONING = "";
|
|
14
20
|
});
|
|
15
21
|
|
|
16
22
|
afterEach(() => {
|
|
@@ -92,50 +98,41 @@ describe("OpenAI Integration", () => {
|
|
|
92
98
|
});
|
|
93
99
|
|
|
94
100
|
describe("Routing", () => {
|
|
95
|
-
it("should route to openai when MODEL_PROVIDER is openai", () => {
|
|
101
|
+
it("should route to openai when MODEL_PROVIDER is openai", async () => {
|
|
96
102
|
process.env.MODEL_PROVIDER = "openai";
|
|
97
103
|
process.env.OPENAI_API_KEY = "sk-test-key";
|
|
98
|
-
process.env.PREFER_OLLAMA = "false";
|
|
99
104
|
|
|
100
105
|
const config = require("../src/config");
|
|
101
106
|
const routing = require("../src/clients/routing");
|
|
102
107
|
|
|
103
108
|
const payload = { messages: [{ role: "user", content: "test" }] };
|
|
104
|
-
const
|
|
109
|
+
const result = await routing.determineProviderSmart(payload);
|
|
105
110
|
|
|
106
|
-
assert.strictEqual(provider, "openai");
|
|
111
|
+
assert.strictEqual(result.provider, "openai");
|
|
112
|
+
assert.strictEqual(result.method, "static");
|
|
107
113
|
});
|
|
108
114
|
|
|
109
|
-
it("should
|
|
110
|
-
|
|
111
|
-
delete process.env.OPENROUTER_API_KEY;
|
|
112
|
-
|
|
113
|
-
process.env.MODEL_PROVIDER = "ollama";
|
|
114
|
-
process.env.PREFER_OLLAMA = "true";
|
|
115
|
-
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
116
|
-
process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "2";
|
|
117
|
-
process.env.OPENROUTER_MAX_TOOLS_FOR_ROUTING = "5";
|
|
115
|
+
it("should return static routing from determineProviderSmart when tiers disabled", async () => {
|
|
116
|
+
process.env.MODEL_PROVIDER = "openai";
|
|
118
117
|
process.env.OPENAI_API_KEY = "sk-test-key";
|
|
119
|
-
process.env.FALLBACK_ENABLED = "true";
|
|
120
|
-
process.env.FALLBACK_PROVIDER = "openai";
|
|
121
118
|
|
|
122
119
|
const config = require("../src/config");
|
|
123
120
|
const routing = require("../src/clients/routing");
|
|
124
121
|
|
|
125
|
-
// 10 tools - above both Ollama and OpenRouter thresholds, should go to fallback
|
|
126
122
|
const payload = {
|
|
127
123
|
messages: [{ role: "user", content: "test" }],
|
|
128
124
|
tools: Array.from({ length: 10 }, (_, i) => ({ name: `tool${i}`, description: "test" })),
|
|
129
125
|
};
|
|
130
126
|
|
|
131
|
-
const
|
|
132
|
-
//
|
|
133
|
-
assert.strictEqual(provider, "openai");
|
|
127
|
+
const result = await routing.determineProviderSmart(payload);
|
|
128
|
+
// No TIER_* vars = static routing
|
|
129
|
+
assert.strictEqual(result.provider, "openai");
|
|
130
|
+
assert.strictEqual(result.method, "static");
|
|
131
|
+
assert.strictEqual(result.reason, "tier_routing_disabled");
|
|
134
132
|
});
|
|
135
133
|
|
|
136
134
|
it("should use openai as fallback provider when configured", () => {
|
|
137
135
|
process.env.MODEL_PROVIDER = "ollama";
|
|
138
|
-
process.env.PREFER_OLLAMA = "true";
|
|
139
136
|
process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
|
|
140
137
|
process.env.FALLBACK_PROVIDER = "openai";
|
|
141
138
|
process.env.OPENAI_API_KEY = "sk-test-key";
|
|
@@ -66,7 +66,7 @@ async function testDatabaseIndexes() {
|
|
|
66
66
|
if (!fs.existsSync(dbPath)) {
|
|
67
67
|
log('⚠️ Database not found. Creating test database...', 'yellow');
|
|
68
68
|
// Initialize database
|
|
69
|
-
require('
|
|
69
|
+
require('../src/db/index.js');
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
const db = new Database(dbPath);
|