lynkr 7.2.5 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +2 -2
  2. package/config/model-tiers.json +89 -0
  3. package/docs/docs.html +1 -0
  4. package/docs/index.md +7 -0
  5. package/docs/toon-integration-spec.md +130 -0
  6. package/documentation/README.md +3 -2
  7. package/documentation/claude-code-cli.md +23 -16
  8. package/documentation/cursor-integration.md +17 -14
  9. package/documentation/docker.md +11 -4
  10. package/documentation/embeddings.md +7 -5
  11. package/documentation/faq.md +66 -12
  12. package/documentation/features.md +22 -15
  13. package/documentation/installation.md +66 -14
  14. package/documentation/production.md +43 -8
  15. package/documentation/providers.md +145 -42
  16. package/documentation/routing.md +476 -0
  17. package/documentation/token-optimization.md +7 -5
  18. package/documentation/troubleshooting.md +81 -5
  19. package/install.sh +6 -1
  20. package/package.json +4 -2
  21. package/scripts/setup.js +0 -1
  22. package/src/agents/executor.js +14 -6
  23. package/src/api/middleware/session.js +15 -2
  24. package/src/api/openai-router.js +130 -37
  25. package/src/api/providers-handler.js +15 -1
  26. package/src/api/router.js +107 -2
  27. package/src/budget/index.js +4 -3
  28. package/src/clients/databricks.js +431 -234
  29. package/src/clients/gpt-utils.js +181 -0
  30. package/src/clients/ollama-utils.js +66 -140
  31. package/src/clients/routing.js +0 -1
  32. package/src/clients/standard-tools.js +76 -3
  33. package/src/config/index.js +113 -35
  34. package/src/context/toon.js +173 -0
  35. package/src/logger/index.js +23 -0
  36. package/src/orchestrator/index.js +686 -211
  37. package/src/routing/agentic-detector.js +320 -0
  38. package/src/routing/complexity-analyzer.js +202 -2
  39. package/src/routing/cost-optimizer.js +305 -0
  40. package/src/routing/index.js +168 -159
  41. package/src/routing/model-tiers.js +365 -0
  42. package/src/server.js +2 -2
  43. package/src/sessions/cleanup.js +3 -3
  44. package/src/sessions/record.js +10 -1
  45. package/src/sessions/store.js +7 -2
  46. package/src/tools/agent-task.js +48 -1
  47. package/src/tools/index.js +15 -2
  48. package/te +11622 -0
  49. package/test/README.md +1 -1
  50. package/test/azure-openai-config.test.js +17 -8
  51. package/test/azure-openai-integration.test.js +7 -1
  52. package/test/azure-openai-routing.test.js +41 -43
  53. package/test/bedrock-integration.test.js +18 -32
  54. package/test/hybrid-routing-integration.test.js +35 -20
  55. package/test/hybrid-routing-performance.test.js +74 -64
  56. package/test/llamacpp-integration.test.js +28 -9
  57. package/test/lmstudio-integration.test.js +20 -8
  58. package/test/openai-integration.test.js +17 -20
  59. package/test/performance-tests.js +1 -1
  60. package/test/routing.test.js +65 -59
  61. package/test/toon-compression.test.js +131 -0
  62. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  63. package/ROUTER_COMPARISON.md +0 -173
  64. package/TIER_ROUTING_PLAN.md +0 -771
@@ -56,15 +56,20 @@ function testRoutingDecisionPerformance() {
56
56
  delete require.cache[require.resolve('../src/config')];
57
57
  delete require.cache[require.resolve('../src/clients/routing')];
58
58
 
59
- process.env.PREFER_OLLAMA = 'true';
59
+ process.env.MODEL_PROVIDER = 'ollama';
60
60
  process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
61
61
  process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
62
62
  process.env.DATABRICKS_API_KEY = 'test-key';
63
63
  process.env.DATABRICKS_API_BASE = 'http://test.com';
64
+ // Set TIER_* to empty = tier routing disabled, determineProviderSync returns static provider
65
+ process.env.TIER_SIMPLE = "";
66
+ process.env.TIER_MEDIUM = "";
67
+ process.env.TIER_COMPLEX = "";
68
+ process.env.TIER_REASONING = "";
64
69
 
65
70
  const routing = require('../src/clients/routing');
66
71
 
67
- log('\n📊 Benchmarking routing decisions...', 'cyan');
72
+ log('\n Benchmarking routing decisions...', 'cyan');
68
73
 
69
74
  // Test 1: Simple request (0 tools)
70
75
  const simplePayload = {
@@ -75,10 +80,10 @@ function testRoutingDecisionPerformance() {
75
80
  const { duration: simpleTime, throughput: simpleThroughput } = benchmark(
76
81
  'Simple request routing',
77
82
  100000,
78
- () => routing.determineProvider(simplePayload)
83
+ () => routing.determineProviderSync(simplePayload)
79
84
  );
80
85
 
81
- log(`⏱️ Simple request: ${simpleTime.toFixed(2)}ms for 100k decisions`, 'cyan');
86
+ log(` Simple request: ${simpleTime.toFixed(2)}ms for 100k decisions`, 'cyan');
82
87
  log(` Average: ${(simpleTime / 100000).toFixed(6)}ms per decision`, 'blue');
83
88
  log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
84
89
 
@@ -94,10 +99,10 @@ function testRoutingDecisionPerformance() {
94
99
  const { duration: complexTime, throughput: complexThroughput } = benchmark(
95
100
  'Complex request routing',
96
101
  100000,
97
- () => routing.determineProvider(complexPayload)
102
+ () => routing.determineProviderSync(complexPayload)
98
103
  );
99
104
 
100
- log(`⏱️ Complex request: ${complexTime.toFixed(2)}ms for 100k decisions`, 'cyan');
105
+ log(` Complex request: ${complexTime.toFixed(2)}ms for 100k decisions`, 'cyan');
101
106
  log(` Average: ${(complexTime / 100000).toFixed(6)}ms per decision`, 'blue');
102
107
  log(` Throughput: ${complexThroughput.toLocaleString()} decisions/sec`, 'green');
103
108
 
@@ -110,18 +115,18 @@ function testRoutingDecisionPerformance() {
110
115
  const { duration: toolCheckTime, throughput: toolCheckThroughput } = benchmark(
111
116
  'Tool capability check',
112
117
  100000,
113
- () => routing.determineProvider(toolCapabilityPayload)
118
+ () => routing.determineProviderSync(toolCapabilityPayload)
114
119
  );
115
120
 
116
- log(`⏱️ Tool capability check: ${toolCheckTime.toFixed(2)}ms for 100k decisions`, 'cyan');
121
+ log(` Tool capability check: ${toolCheckTime.toFixed(2)}ms for 100k decisions`, 'cyan');
117
122
  log(` Average: ${(toolCheckTime / 100000).toFixed(6)}ms per decision`, 'blue');
118
123
  log(` Throughput: ${toolCheckThroughput.toLocaleString()} decisions/sec`, 'green');
119
124
 
120
125
  // Analysis
121
- log('\n📈 Analysis:', 'yellow');
126
+ log('\n Analysis:', 'yellow');
122
127
  log(` Routing adds <0.01ms per request (negligible overhead)`, 'green');
123
128
  log(` Throughput: ${simpleThroughput.toLocaleString()} decisions/sec`, 'green');
124
- log(` Routing is extremely fast and won't impact request latency`, 'green');
129
+ log(` Routing is extremely fast and won't impact request latency`, 'green');
125
130
 
126
131
  return {
127
132
  simpleTime,
@@ -141,7 +146,7 @@ function testMetricsOverhead() {
141
146
  const { getMetricsCollector } = require('../src/observability/metrics');
142
147
  const metrics = getMetricsCollector();
143
148
 
144
- log('\n📊 Benchmarking metrics operations...', 'cyan');
149
+ log('\n Benchmarking metrics operations...', 'cyan');
145
150
 
146
151
  // Test recording provider routing
147
152
  const { duration: routingTime, throughput: routingThroughput } = benchmark(
@@ -150,7 +155,7 @@ function testMetricsOverhead() {
150
155
  () => metrics.recordProviderRouting('ollama')
151
156
  );
152
157
 
153
- log(`⏱️ Provider routing: ${routingTime.toFixed(2)}ms for 100k recordings`, 'cyan');
158
+ log(` Provider routing: ${routingTime.toFixed(2)}ms for 100k recordings`, 'cyan');
154
159
  log(` Average: ${(routingTime / 100000).toFixed(6)}ms per record`, 'blue');
155
160
  log(` Throughput: ${routingThroughput.toLocaleString()} ops/sec`, 'green');
156
161
 
@@ -161,7 +166,7 @@ function testMetricsOverhead() {
161
166
  () => metrics.recordProviderSuccess('ollama', 450)
162
167
  );
163
168
 
164
- log(`⏱️ Provider success: ${successTime.toFixed(2)}ms for 100k recordings`, 'cyan');
169
+ log(` Provider success: ${successTime.toFixed(2)}ms for 100k recordings`, 'cyan');
165
170
  log(` Average: ${(successTime / 100000).toFixed(6)}ms per record`, 'blue');
166
171
  log(` Throughput: ${successThroughput.toLocaleString()} ops/sec`, 'green');
167
172
 
@@ -172,7 +177,7 @@ function testMetricsOverhead() {
172
177
  () => metrics.recordFallbackAttempt('ollama', 'databricks', 'timeout')
173
178
  );
174
179
 
175
- log(`⏱️ Fallback attempts: ${fallbackTime.toFixed(2)}ms for 100k recordings`, 'cyan');
180
+ log(` Fallback attempts: ${fallbackTime.toFixed(2)}ms for 100k recordings`, 'cyan');
176
181
  log(` Average: ${(fallbackTime / 100000).toFixed(6)}ms per record`, 'blue');
177
182
  log(` Throughput: ${fallbackThroughput.toLocaleString()} ops/sec`, 'green');
178
183
 
@@ -183,15 +188,15 @@ function testMetricsOverhead() {
183
188
  () => metrics.recordCostSavings(0.001)
184
189
  );
185
190
 
186
- log(`⏱️ Cost savings: ${costTime.toFixed(2)}ms for 100k recordings`, 'cyan');
191
+ log(` Cost savings: ${costTime.toFixed(2)}ms for 100k recordings`, 'cyan');
187
192
  log(` Average: ${(costTime / 100000).toFixed(6)}ms per record`, 'blue');
188
193
  log(` Throughput: ${costThroughput.toLocaleString()} ops/sec`, 'green');
189
194
 
190
195
  // Analysis
191
196
  const avgMetricsTime = (routingTime + successTime + fallbackTime + costTime) / 4 / 100000;
192
- log('\n📈 Analysis:', 'yellow');
197
+ log('\n Analysis:', 'yellow');
193
198
  log(` Average metrics overhead: ${avgMetricsTime.toFixed(6)}ms per operation`, 'green');
194
- log(` Metrics collection is extremely lightweight`, 'green');
199
+ log(` Metrics collection is extremely lightweight`, 'green');
195
200
 
196
201
  return {
197
202
  routingTime,
@@ -212,14 +217,19 @@ function testCombinedStack() {
212
217
  delete require.cache[require.resolve('../src/clients/routing')];
213
218
  delete require.cache[require.resolve('../src/observability/metrics')];
214
219
 
215
- process.env.PREFER_OLLAMA = 'true';
220
+ process.env.MODEL_PROVIDER = 'ollama';
216
221
  process.env.OLLAMA_ENDPOINT = 'http://localhost:11434';
217
222
  process.env.OLLAMA_MODEL = 'qwen2.5-coder:latest';
223
+ // Set TIER_* to empty = static routing via determineProviderSync
224
+ process.env.TIER_SIMPLE = "";
225
+ process.env.TIER_MEDIUM = "";
226
+ process.env.TIER_COMPLEX = "";
227
+ process.env.TIER_REASONING = "";
218
228
 
219
229
  const routing = require('../src/clients/routing');
220
230
  const { getMetricsCollector } = require('../src/observability/metrics');
221
231
 
222
- log('\n📊 Benchmarking complete routing + metrics stack...', 'cyan');
232
+ log('\n Benchmarking complete routing + metrics stack...', 'cyan');
223
233
 
224
234
  // Simulate full routing decision + metrics recording
225
235
  const payload = {
@@ -232,21 +242,21 @@ function testCombinedStack() {
232
242
  50000,
233
243
  () => {
234
244
  const metrics = getMetricsCollector();
235
- const provider = routing.determineProvider(payload);
245
+ const provider = routing.determineProviderSync(payload);
236
246
  metrics.recordProviderRouting(provider);
237
247
  metrics.recordProviderSuccess(provider, 450);
238
248
  }
239
249
  );
240
250
 
241
- log(`⏱️ Full stack: ${fullTime.toFixed(2)}ms for 50k operations`, 'cyan');
251
+ log(` Full stack: ${fullTime.toFixed(2)}ms for 50k operations`, 'cyan');
242
252
  log(` Average: ${(fullTime / 50000).toFixed(6)}ms per request`, 'blue');
243
253
  log(` Throughput: ${fullThroughput.toLocaleString()} ops/sec`, 'green');
244
254
 
245
255
  // Analysis
246
- log('\n📈 Analysis:', 'yellow');
256
+ log('\n Analysis:', 'yellow');
247
257
  const overhead = (fullTime / 50000);
248
258
  log(` Total routing + metrics overhead: ${overhead.toFixed(6)}ms`, 'green');
249
- log(` Negligible impact on request latency (<0.02ms)`, 'green');
259
+ log(` Negligible impact on request latency (<0.02ms)`, 'green');
250
260
 
251
261
  return {
252
262
  fullTime,
@@ -263,7 +273,7 @@ function testHelperFunctions() {
263
273
 
264
274
  delete require.cache[require.resolve('../src/clients/databricks')];
265
275
 
266
- log('\n📊 Benchmarking helper functions...', 'cyan');
276
+ log('\n Benchmarking helper functions...', 'cyan');
267
277
 
268
278
  // Test categorizeFailure (we'll simulate it)
269
279
  const categorizeFailure = (error) => {
@@ -296,7 +306,7 @@ function testHelperFunctions() {
296
306
  }
297
307
  );
298
308
 
299
- log(`⏱️ Categorize failure: ${categorizeTime.toFixed(2)}ms for 400k operations`, 'cyan');
309
+ log(` Categorize failure: ${categorizeTime.toFixed(2)}ms for 400k operations`, 'cyan');
300
310
  log(` Average: ${(categorizeTime / 400000).toFixed(6)}ms per categorization`, 'blue');
301
311
  log(` Throughput: ${(categorizeThroughput * 4).toLocaleString()} ops/sec`, 'green');
302
312
 
@@ -315,13 +325,13 @@ function testHelperFunctions() {
315
325
  () => estimateCostSavings(1000, 500)
316
326
  );
317
327
 
318
- log(`⏱️ Cost estimation: ${costCalcTime.toFixed(2)}ms for 100k calculations`, 'cyan');
328
+ log(` Cost estimation: ${costCalcTime.toFixed(2)}ms for 100k calculations`, 'cyan');
319
329
  log(` Average: ${(costCalcTime / 100000).toFixed(6)}ms per calculation`, 'blue');
320
330
  log(` Throughput: ${costCalcThroughput.toLocaleString()} ops/sec`, 'green');
321
331
 
322
- log('\n📈 Analysis:', 'yellow');
332
+ log('\n Analysis:', 'yellow');
323
333
  log(` Helper functions add negligible overhead (<0.001ms)`, 'green');
324
- log(` No performance impact from utility functions`, 'green');
334
+ log(` No performance impact from utility functions`, 'green');
325
335
 
326
336
  return {
327
337
  categorizeTime,
@@ -333,54 +343,54 @@ function testHelperFunctions() {
333
343
  // FINAL REPORT
334
344
  // =============================================================================
335
345
  function printFinalReport(results) {
336
- section('📊 HYBRID ROUTING PERFORMANCE SUMMARY');
346
+ section('HYBRID ROUTING PERFORMANCE SUMMARY');
337
347
 
338
348
  console.log('\n');
339
- console.log('┌────────────────────────────────────────────────────────────┐');
340
- console.log(' HYBRID ROUTING PERFORMANCE');
341
- console.log('├────────────────────────────────────────────────────────────┤');
349
+ console.log('+---------------------------------------------------------+');
350
+ console.log('| HYBRID ROUTING PERFORMANCE |');
351
+ console.log('+---------------------------------------------------------+');
342
352
 
343
- log(`│ 1. Routing Decisions │`, 'bright');
344
- log(`│ Average: ${results.routing.avgDecisionTime.toFixed(6)}ms per decision │`, 'cyan');
345
- log(`│ Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} │`);
353
+ log(`| 1. Routing Decisions |`, 'bright');
354
+ log(`| Average: ${results.routing.avgDecisionTime.toFixed(6)}ms per decision |`, 'cyan');
355
+ log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
346
356
 
347
- console.log('├────────────────────────────────────────────────────────────┤');
357
+ console.log('+---------------------------------------------------------+');
348
358
 
349
- log(`│ 2. Metrics Collection │`, 'bright');
350
- log(`│ Average: ${results.metrics.avgMetricsTime.toFixed(6)}ms per operation │`, 'cyan');
351
- log(`│ Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} │`);
359
+ log(`| 2. Metrics Collection |`, 'bright');
360
+ log(`| Average: ${results.metrics.avgMetricsTime.toFixed(6)}ms per operation |`, 'cyan');
361
+ log(`| Overhead: ${colors.green}Negligible (<0.01ms)${colors.reset} |`);
352
362
 
353
- console.log('├────────────────────────────────────────────────────────────┤');
363
+ console.log('+---------------------------------------------------------+');
354
364
 
355
- log(`│ 3. Full Routing Stack │`, 'bright');
356
- log(`│ Average: ${results.combined.overhead.toFixed(6)}ms per request │`, 'cyan');
357
- log(`│ Throughput: ${results.combined.fullThroughput.toLocaleString()} ops/sec │`, 'cyan');
358
- log(`│ Impact: ${colors.green}Negligible (<0.02ms)${colors.reset} │`);
365
+ log(`| 3. Full Routing Stack |`, 'bright');
366
+ log(`| Average: ${results.combined.overhead.toFixed(6)}ms per request |`, 'cyan');
367
+ log(`| Throughput: ${results.combined.fullThroughput.toLocaleString()} ops/sec |`, 'cyan');
368
+ log(`| Impact: ${colors.green}Negligible (<0.02ms)${colors.reset} |`);
359
369
 
360
- console.log('├────────────────────────────────────────────────────────────┤');
370
+ console.log('+---------------------------------------------------------+');
361
371
 
362
- log(`│ 4. Helper Functions │`, 'bright');
363
- log(`│ Overhead: ${colors.green}Negligible (<0.001ms)${colors.reset} │`);
372
+ log(`| 4. Helper Functions |`, 'bright');
373
+ log(`| Overhead: ${colors.green}Negligible (<0.001ms)${colors.reset} |`);
364
374
 
365
- console.log('└────────────────────────────────────────────────────────────┘');
375
+ console.log('+---------------------------------------------------------+');
366
376
 
367
377
  // Overall assessment
368
378
  console.log('\n');
369
- log('🏆 Overall Performance Assessment:', 'bright');
370
- log(' Routing overhead: <0.01ms per request', 'green');
371
- log(' Metrics overhead: <0.01ms per request', 'green');
372
- log(' Combined overhead: <0.02ms per request', 'green');
373
- log(' No measurable impact on API latency', 'green');
374
-
375
- console.log('\n📈 Expected Real-World Performance:');
376
- log(' Ollama (local): ~500-1000ms per request', 'cyan');
377
- log(' Cloud (Databricks): ~1500-2000ms per request', 'cyan');
378
- log(' Routing overhead: ~0.02ms (0.001-0.002% of total)', 'cyan');
379
- log(' Latency savings with Ollama: 40-60% faster', 'green');
380
- log(' Cost savings with Ollama: 100% (free)', 'green');
379
+ log('Overall Performance Assessment:', 'bright');
380
+ log(' Routing overhead: <0.01ms per request', 'green');
381
+ log(' Metrics overhead: <0.01ms per request', 'green');
382
+ log(' Combined overhead: <0.02ms per request', 'green');
383
+ log(' No measurable impact on API latency', 'green');
384
+
385
+ console.log('\n Expected Real-World Performance:');
386
+ log(' Ollama (local): ~500-1000ms per request', 'cyan');
387
+ log(' Cloud (Databricks): ~1500-2000ms per request', 'cyan');
388
+ log(' Routing overhead: ~0.02ms (0.001-0.002% of total)', 'cyan');
389
+ log(' Latency savings with Ollama: 40-60% faster', 'green');
390
+ log(' Cost savings with Ollama: 100% (free)', 'green');
381
391
 
382
392
  console.log('\n');
383
- log('🚀 Conclusion: Hybrid routing adds negligible overhead while', 'bright');
393
+ log('Conclusion: Hybrid routing adds negligible overhead while', 'bright');
384
394
  log(' providing significant latency and cost improvements!', 'bright');
385
395
  console.log('\n');
386
396
  }
@@ -389,7 +399,7 @@ function printFinalReport(results) {
389
399
  // RUN ALL TESTS
390
400
  // =============================================================================
391
401
  async function runAllTests() {
392
- log('\n🚀 Starting Hybrid Routing Performance Test Suite\n', 'bright');
402
+ log('\n Starting Hybrid Routing Performance Test Suite\n', 'bright');
393
403
 
394
404
  try {
395
405
  const results = {
@@ -401,10 +411,10 @@ async function runAllTests() {
401
411
 
402
412
  printFinalReport(results);
403
413
 
404
- log('\n All performance tests completed successfully!\n', 'green');
414
+ log('\n All performance tests completed successfully!\n', 'green');
405
415
  process.exit(0);
406
416
  } catch (error) {
407
- log(`\n Performance test suite failed: ${error.message}\n`, 'red');
417
+ log(`\n Performance test suite failed: ${error.message}\n`, 'red');
408
418
  console.error(error);
409
419
  process.exit(1);
410
420
  }
@@ -11,6 +11,12 @@ describe("llama.cpp Integration", () => {
11
11
  delete require.cache[require.resolve("../src/config")];
12
12
  delete require.cache[require.resolve("../src/clients/routing")];
13
13
  delete require.cache[require.resolve("../src/clients/openrouter-utils")];
14
+
15
+ // Prevent .env TIER_* values from being picked up by dotenv
16
+ process.env.TIER_SIMPLE = "";
17
+ process.env.TIER_MEDIUM = "";
18
+ process.env.TIER_COMPLEX = "";
19
+ process.env.TIER_REASONING = "";
14
20
  });
15
21
 
16
22
  afterEach(() => {
@@ -104,33 +110,46 @@ describe("llama.cpp Integration", () => {
104
110
  });
105
111
 
106
112
  describe("Routing", () => {
107
- it("should route to llamacpp when MODEL_PROVIDER is llamacpp", () => {
113
+ it("should route to llamacpp when MODEL_PROVIDER is llamacpp", async () => {
108
114
  process.env.MODEL_PROVIDER = "llamacpp";
109
115
  process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
110
- process.env.PREFER_OLLAMA = "false";
111
116
 
112
117
  const config = require("../src/config");
113
118
  const routing = require("../src/clients/routing");
114
119
 
115
120
  const payload = { messages: [{ role: "user", content: "test" }] };
116
- const provider = routing.determineProvider(payload);
121
+ const result = await routing.determineProviderSmart(payload);
117
122
 
118
- assert.strictEqual(provider, "llamacpp");
123
+ assert.strictEqual(result.provider, "llamacpp");
124
+ assert.strictEqual(result.method, "static");
119
125
  });
120
126
 
121
- it("should route to llamacpp for moderate tool count when other providers not configured", () => {
122
- // This test is skipped because llamacpp is checked AFTER openrouter/openai/azure in routing
123
- // and those providers may be present in the test environment
124
- // llama.cpp will be used when it's the PRIMARY provider or when it's the only option
127
+ it("should return static routing from determineProviderSmart when tiers disabled", async () => {
128
+ process.env.MODEL_PROVIDER = "llamacpp";
129
+ process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
130
+
131
+ const config = require("../src/config");
132
+ const routing = require("../src/clients/routing");
133
+
134
+ const payload = { messages: [{ role: "user", content: "test" }] };
135
+ const result = await routing.determineProviderSmart(payload);
136
+
137
+ assert.strictEqual(result.provider, "llamacpp");
138
+ assert.strictEqual(result.method, "static");
139
+ assert.strictEqual(result.reason, "tier_routing_disabled");
125
140
  });
126
141
 
127
142
  it("should throw error when llamacpp is set as FALLBACK_PROVIDER", () => {
128
143
  process.env.MODEL_PROVIDER = "ollama";
129
- process.env.PREFER_OLLAMA = "true";
130
144
  process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
131
145
  process.env.FALLBACK_PROVIDER = "llamacpp";
132
146
  process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
133
147
  process.env.FALLBACK_ENABLED = "true";
148
+ // Enable tier routing so fallback validation runs
149
+ process.env.TIER_SIMPLE = "ollama:llama3.2";
150
+ process.env.TIER_MEDIUM = "ollama:llama3.2";
151
+ process.env.TIER_COMPLEX = "ollama:llama3.2";
152
+ process.env.TIER_REASONING = "ollama:llama3.2";
134
153
 
135
154
  assert.throws(
136
155
  () => require("../src/config"),
@@ -11,6 +11,12 @@ describe("LM Studio Integration", () => {
11
11
  delete require.cache[require.resolve("../src/config")];
12
12
  delete require.cache[require.resolve("../src/clients/routing")];
13
13
  delete require.cache[require.resolve("../src/clients/openrouter-utils")];
14
+
15
+ // Prevent .env TIER_* values from being picked up by dotenv
16
+ process.env.TIER_SIMPLE = "";
17
+ process.env.TIER_MEDIUM = "";
18
+ process.env.TIER_COMPLEX = "";
19
+ process.env.TIER_REASONING = "";
14
20
  });
15
21
 
16
22
  afterEach(() => {
@@ -105,26 +111,33 @@ describe("LM Studio Integration", () => {
105
111
  it("should route to lmstudio when MODEL_PROVIDER is lmstudio", () => {
106
112
  process.env.MODEL_PROVIDER = "lmstudio";
107
113
  process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
108
- process.env.PREFER_OLLAMA = "false";
109
114
 
110
115
  const config = require("../src/config");
111
116
  const routing = require("../src/clients/routing");
112
117
 
113
118
  const payload = { messages: [{ role: "user", content: "test" }] };
114
- const provider = routing.determineProvider(payload);
119
+ const provider = routing.determineProviderSync(payload);
115
120
 
116
121
  assert.strictEqual(provider, "lmstudio");
117
122
  });
118
123
 
119
- it("should route to lmstudio for moderate tool count when other providers not configured", () => {
120
- // This test is skipped because lmstudio is the LAST option in routing
121
- // and other providers (openrouter, openai, azure, llamacpp) take precedence
122
- // LM Studio will be used when it's the PRIMARY provider, not in routing fallback
124
+ it("should return static routing from determineProviderSmart when tiers disabled", async () => {
125
+ process.env.MODEL_PROVIDER = "lmstudio";
126
+ process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
127
+
128
+ const config = require("../src/config");
129
+ const routing = require("../src/clients/routing");
130
+
131
+ const payload = { messages: [{ role: "user", content: "test" }] };
132
+ const result = await routing.determineProviderSmart(payload);
133
+
134
+ assert.strictEqual(result.provider, "lmstudio");
135
+ assert.strictEqual(result.method, "static");
136
+ assert.strictEqual(result.reason, "tier_routing_disabled");
123
137
  });
124
138
 
125
139
  it("should throw error when lmstudio is set as FALLBACK_PROVIDER", () => {
126
140
  process.env.MODEL_PROVIDER = "ollama";
127
- process.env.PREFER_OLLAMA = "true";
128
141
  process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
129
142
  process.env.FALLBACK_PROVIDER = "lmstudio";
130
143
  process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
@@ -312,7 +325,6 @@ describe("LM Studio Integration", () => {
312
325
  describe("Fallback Prevention", () => {
313
326
  it("should prevent lmstudio from being used as fallback provider", () => {
314
327
  process.env.MODEL_PROVIDER = "ollama";
315
- process.env.PREFER_OLLAMA = "true";
316
328
  process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
317
329
  process.env.FALLBACK_PROVIDER = "lmstudio";
318
330
  process.env.LMSTUDIO_ENDPOINT = "http://localhost:1234";
@@ -11,6 +11,12 @@ describe("OpenAI Integration", () => {
11
11
  delete require.cache[require.resolve("../src/config")];
12
12
  delete require.cache[require.resolve("../src/clients/routing")];
13
13
  delete require.cache[require.resolve("../src/clients/openrouter-utils")];
14
+
15
+ // Prevent .env TIER_* values from being picked up by dotenv
16
+ process.env.TIER_SIMPLE = "";
17
+ process.env.TIER_MEDIUM = "";
18
+ process.env.TIER_COMPLEX = "";
19
+ process.env.TIER_REASONING = "";
14
20
  });
15
21
 
16
22
  afterEach(() => {
@@ -92,50 +98,41 @@ describe("OpenAI Integration", () => {
92
98
  });
93
99
 
94
100
  describe("Routing", () => {
95
- it("should route to openai when MODEL_PROVIDER is openai", () => {
101
+ it("should route to openai when MODEL_PROVIDER is openai", async () => {
96
102
  process.env.MODEL_PROVIDER = "openai";
97
103
  process.env.OPENAI_API_KEY = "sk-test-key";
98
- process.env.PREFER_OLLAMA = "false";
99
104
 
100
105
  const config = require("../src/config");
101
106
  const routing = require("../src/clients/routing");
102
107
 
103
108
  const payload = { messages: [{ role: "user", content: "test" }] };
104
- const provider = routing.determineProvider(payload);
109
+ const result = await routing.determineProviderSmart(payload);
105
110
 
106
- assert.strictEqual(provider, "openai");
111
+ assert.strictEqual(result.provider, "openai");
112
+ assert.strictEqual(result.method, "static");
107
113
  });
108
114
 
109
- it("should route to openai as fallback when heavy tool count", () => {
110
- // Clear any existing OpenRouter key to ensure fallback to OpenAI
111
- delete process.env.OPENROUTER_API_KEY;
112
-
113
- process.env.MODEL_PROVIDER = "ollama";
114
- process.env.PREFER_OLLAMA = "true";
115
- process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
116
- process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "2";
117
- process.env.OPENROUTER_MAX_TOOLS_FOR_ROUTING = "5";
115
+ it("should return static routing from determineProviderSmart when tiers disabled", async () => {
116
+ process.env.MODEL_PROVIDER = "openai";
118
117
  process.env.OPENAI_API_KEY = "sk-test-key";
119
- process.env.FALLBACK_ENABLED = "true";
120
- process.env.FALLBACK_PROVIDER = "openai";
121
118
 
122
119
  const config = require("../src/config");
123
120
  const routing = require("../src/clients/routing");
124
121
 
125
- // 10 tools - above both Ollama and OpenRouter thresholds, should go to fallback
126
122
  const payload = {
127
123
  messages: [{ role: "user", content: "test" }],
128
124
  tools: Array.from({ length: 10 }, (_, i) => ({ name: `tool${i}`, description: "test" })),
129
125
  };
130
126
 
131
- const provider = routing.determineProvider(payload);
132
- // Should route to openai as the configured fallback provider
133
- assert.strictEqual(provider, "openai");
127
+ const result = await routing.determineProviderSmart(payload);
128
+ // No TIER_* vars = static routing
129
+ assert.strictEqual(result.provider, "openai");
130
+ assert.strictEqual(result.method, "static");
131
+ assert.strictEqual(result.reason, "tier_routing_disabled");
134
132
  });
135
133
 
136
134
  it("should use openai as fallback provider when configured", () => {
137
135
  process.env.MODEL_PROVIDER = "ollama";
138
- process.env.PREFER_OLLAMA = "true";
139
136
  process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
140
137
  process.env.FALLBACK_PROVIDER = "openai";
141
138
  process.env.OPENAI_API_KEY = "sk-test-key";
@@ -66,7 +66,7 @@ async function testDatabaseIndexes() {
66
66
  if (!fs.existsSync(dbPath)) {
67
67
  log('⚠️ Database not found. Creating test database...', 'yellow');
68
68
  // Initialize database
69
- require('./src/db/index.js');
69
+ require('../src/db/index.js');
70
70
  }
71
71
 
72
72
  const db = new Database(dbPath);