lynkr 7.2.5 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +2 -2
  2. package/config/model-tiers.json +89 -0
  3. package/docs/docs.html +1 -0
  4. package/docs/index.md +7 -0
  5. package/docs/toon-integration-spec.md +130 -0
  6. package/documentation/README.md +3 -2
  7. package/documentation/claude-code-cli.md +23 -16
  8. package/documentation/cursor-integration.md +17 -14
  9. package/documentation/docker.md +11 -4
  10. package/documentation/embeddings.md +7 -5
  11. package/documentation/faq.md +66 -12
  12. package/documentation/features.md +22 -15
  13. package/documentation/installation.md +66 -14
  14. package/documentation/production.md +43 -8
  15. package/documentation/providers.md +145 -42
  16. package/documentation/routing.md +476 -0
  17. package/documentation/token-optimization.md +7 -5
  18. package/documentation/troubleshooting.md +81 -5
  19. package/install.sh +6 -1
  20. package/package.json +4 -2
  21. package/scripts/setup.js +0 -1
  22. package/src/agents/executor.js +14 -6
  23. package/src/api/middleware/session.js +15 -2
  24. package/src/api/openai-router.js +130 -37
  25. package/src/api/providers-handler.js +15 -1
  26. package/src/api/router.js +107 -2
  27. package/src/budget/index.js +4 -3
  28. package/src/clients/databricks.js +431 -234
  29. package/src/clients/gpt-utils.js +181 -0
  30. package/src/clients/ollama-utils.js +66 -140
  31. package/src/clients/routing.js +0 -1
  32. package/src/clients/standard-tools.js +76 -3
  33. package/src/config/index.js +113 -35
  34. package/src/context/toon.js +173 -0
  35. package/src/logger/index.js +23 -0
  36. package/src/orchestrator/index.js +686 -211
  37. package/src/routing/agentic-detector.js +320 -0
  38. package/src/routing/complexity-analyzer.js +202 -2
  39. package/src/routing/cost-optimizer.js +305 -0
  40. package/src/routing/index.js +168 -159
  41. package/src/routing/model-tiers.js +365 -0
  42. package/src/server.js +2 -2
  43. package/src/sessions/cleanup.js +3 -3
  44. package/src/sessions/record.js +10 -1
  45. package/src/sessions/store.js +7 -2
  46. package/src/tools/agent-task.js +48 -1
  47. package/src/tools/index.js +15 -2
  48. package/te +11622 -0
  49. package/test/README.md +1 -1
  50. package/test/azure-openai-config.test.js +17 -8
  51. package/test/azure-openai-integration.test.js +7 -1
  52. package/test/azure-openai-routing.test.js +41 -43
  53. package/test/bedrock-integration.test.js +18 -32
  54. package/test/hybrid-routing-integration.test.js +35 -20
  55. package/test/hybrid-routing-performance.test.js +74 -64
  56. package/test/llamacpp-integration.test.js +28 -9
  57. package/test/lmstudio-integration.test.js +20 -8
  58. package/test/openai-integration.test.js +17 -20
  59. package/test/performance-tests.js +1 -1
  60. package/test/routing.test.js +65 -59
  61. package/test/toon-compression.test.js +131 -0
  62. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  63. package/ROUTER_COMPARISON.md +0 -173
  64. package/TIER_ROUTING_PLAN.md +0 -771
@@ -10,7 +10,6 @@
10
10
 
11
11
  const config = require('../config');
12
12
  const logger = require('../logger');
13
- const { modelNameSupportsTools } = require('../clients/ollama-utils');
14
13
  const {
15
14
  analyzeComplexity,
16
15
  shouldForceLocal,
@@ -19,6 +18,11 @@ const {
19
18
  analyzeWithEmbeddings,
20
19
  } = require('./complexity-analyzer');
21
20
 
21
+ // Intelligent routing modules
22
+ const { getAgenticDetector, AGENT_TYPES } = require('./agentic-detector');
23
+ const { getModelTierSelector, TIER_DEFINITIONS } = require('./model-tiers');
24
+ const { getCostOptimizer } = require('./cost-optimizer');
25
+
22
26
  // Local providers
23
27
  const LOCAL_PROVIDERS = ['ollama', 'llamacpp', 'lmstudio'];
24
28
 
@@ -49,26 +53,7 @@ function getFallbackProvider() {
49
53
  * @param {number} options.toolCount - Number of tools in the request (for hybrid routing)
50
54
  * @param {boolean} options.useHybridRouting - Whether to use hybrid routing logic (default: false)
51
55
  */
52
- function getBestCloudProvider(options = {}) {
53
- const { toolCount = 0, useHybridRouting = false } = options;
54
-
55
- // If hybrid routing is explicitly enabled and we have tools, use tool-based routing
56
- const preferOllama = config.modelProvider?.preferOllama ?? false;
57
-
58
- if (preferOllama && useHybridRouting && toolCount > 0) {
59
- const openRouterMaxTools = config.modelProvider?.openRouterMaxToolsForRouting ?? 15;
60
-
61
- // For moderate tool counts, prefer OpenRouter over Azure OpenAI
62
- if (toolCount <= openRouterMaxTools && config.openrouter?.apiKey) {
63
- return 'openrouter';
64
- }
65
-
66
- // For higher tool counts, use Azure OpenAI if available
67
- if (config.azureOpenAI?.endpoint && config.azureOpenAI?.apiKey) {
68
- return 'azure-openai';
69
- }
70
- }
71
-
56
+ function getBestCloudProvider() {
72
57
  // Standard priority order for cloud providers
73
58
  if (config.databricks?.url && config.databricks?.apiKey) return 'databricks';
74
59
  if (config.azureAnthropic?.endpoint && config.azureAnthropic?.apiKey) return 'azure-anthropic';
@@ -105,23 +90,43 @@ function getBestLocalProvider() {
105
90
  * @returns {Object} Routing decision with provider and metadata
106
91
  */
107
92
  async function determineProviderSmart(payload, options = {}) {
108
- const preferOllama = config.modelProvider?.preferOllama ?? false;
109
93
  const primaryProvider = config.modelProvider?.type ?? 'databricks';
110
94
 
111
- // If smart routing is disabled, use static configuration
112
- if (!preferOllama) {
95
+ // If tier routing is disabled, use static configuration
96
+ if (!config.modelTiers?.enabled) {
113
97
  return {
114
98
  provider: primaryProvider,
99
+ model: null,
115
100
  method: 'static',
116
- reason: 'smart_routing_disabled',
101
+ reason: 'tier_routing_disabled',
117
102
  };
118
103
  }
119
104
 
120
105
  // Quick check for force patterns
121
106
  if (shouldForceLocal(payload)) {
107
+ // When tier routing is enabled, respect TIER_SIMPLE instead of blindly choosing local
108
+ if (config.modelTiers?.enabled) {
109
+ try {
110
+ const selector = getModelTierSelector();
111
+ const modelSelection = selector.selectModel('SIMPLE', null);
112
+ const decision = {
113
+ provider: modelSelection.provider,
114
+ model: modelSelection.model,
115
+ tier: 'SIMPLE',
116
+ method: 'force',
117
+ reason: 'force_local_pattern',
118
+ score: 0,
119
+ };
120
+ routingMetrics.record(decision);
121
+ return decision;
122
+ } catch (err) {
123
+ logger.debug({ err: err.message }, 'Tier selection failed for force_local, falling back to local provider');
124
+ }
125
+ }
122
126
  const provider = getBestLocalProvider();
123
127
  const decision = {
124
128
  provider,
129
+ model: null,
125
130
  method: 'force',
126
131
  reason: 'force_local_pattern',
127
132
  score: 0,
@@ -131,10 +136,10 @@ async function determineProviderSmart(payload, options = {}) {
131
136
  }
132
137
 
133
138
  if (shouldForceCloud(payload) && isFallbackEnabled()) {
134
- const toolCount = payload?.tools?.length ?? 0;
135
- const provider = getBestCloudProvider({ toolCount });
139
+ const provider = getBestCloudProvider();
136
140
  const decision = {
137
141
  provider,
142
+ model: null,
138
143
  method: 'force',
139
144
  reason: 'force_cloud_pattern',
140
145
  score: 100,
@@ -143,60 +148,9 @@ async function determineProviderSmart(payload, options = {}) {
143
148
  return decision;
144
149
  }
145
150
 
146
- // Check tool count thresholds for hybrid routing
147
- const toolCount = payload?.tools?.length ?? 0;
148
- const ollamaMaxTools = config.modelProvider?.ollamaMaxToolsForRouting ?? 3;
149
-
150
- // If tool count is within Ollama's threshold, route to Ollama
151
- if (toolCount > 0 && toolCount <= ollamaMaxTools) {
152
- const ollamaModel = config.ollama?.model;
153
- const supportsTools = modelNameSupportsTools(ollamaModel);
154
-
155
- if (supportsTools) {
156
- const provider = getBestLocalProvider();
157
- const decision = {
158
- provider,
159
- method: 'tool_threshold',
160
- reason: 'within_ollama_tool_threshold',
161
- score: 0,
162
- toolCount,
163
- threshold: ollamaMaxTools,
164
- };
165
- routingMetrics.record(decision);
166
- return decision;
167
- }
168
- // If Ollama doesn't support tools, fall through to cloud routing
169
- if (isFallbackEnabled()) {
170
- const provider = getBestCloudProvider({ toolCount });
171
- const decision = {
172
- provider,
173
- method: 'tool_support',
174
- reason: 'local_model_no_tool_support',
175
- score: 0,
176
- toolCount,
177
- };
178
- routingMetrics.record(decision);
179
- return decision;
180
- }
181
- }
182
-
183
- // If tool count exceeds Ollama threshold but fallback is enabled, route to cloud
184
- if (toolCount > ollamaMaxTools && isFallbackEnabled()) {
185
- const provider = getBestCloudProvider({ toolCount, useHybridRouting: true });
186
- const decision = {
187
- provider,
188
- method: 'tool_threshold',
189
- reason: 'exceeds_ollama_tool_threshold',
190
- score: 50,
191
- toolCount,
192
- threshold: ollamaMaxTools,
193
- };
194
- routingMetrics.record(decision);
195
- return decision;
196
- }
197
-
198
- // Full complexity analysis for non-tool requests
199
- const analysis = analyzeComplexity(payload);
151
+ // Full complexity analysis
152
+ const useWeightedScoring = config.routing?.weightedScoring ?? false;
153
+ const analysis = analyzeComplexity(payload, { weighted: useWeightedScoring });
200
154
 
201
155
  // Phase 4: Optional embeddings adjustment
202
156
  let embeddingsResult = null;
@@ -214,25 +168,116 @@ async function determineProviderSmart(payload, options = {}) {
214
168
  }
215
169
  }
216
170
 
217
- // Apply routing decision based on complexity
171
+ // Agentic workflow detection
172
+ let agenticResult = null;
173
+ if (config.routing?.agenticDetection !== false) {
174
+ try {
175
+ const detector = getAgenticDetector();
176
+ agenticResult = detector.detect(payload);
177
+
178
+ // Boost complexity score for agentic workflows
179
+ if (agenticResult.isAgentic) {
180
+ analysis.score = Math.min(100, analysis.score + agenticResult.scoreBoost);
181
+ analysis.agenticBoost = agenticResult.scoreBoost;
182
+ analysis.agentType = agenticResult.agentType;
183
+
184
+ logger.debug({
185
+ agentType: agenticResult.agentType,
186
+ boost: agenticResult.scoreBoost,
187
+ newScore: analysis.score,
188
+ }, '[Routing] Agentic workflow detected, boosting score');
189
+
190
+ // Force cloud for autonomous workflows
191
+ if (agenticResult.agentType === 'AUTONOMOUS' && isFallbackEnabled()) {
192
+ const provider = getBestCloudProvider();
193
+ const decision = {
194
+ provider,
195
+ method: 'agentic',
196
+ reason: 'autonomous_workflow',
197
+ score: analysis.score,
198
+ agenticResult,
199
+ };
200
+ routingMetrics.record(decision);
201
+ return decision;
202
+ }
203
+ }
204
+ } catch (err) {
205
+ logger.debug({ err: err.message }, 'Agentic detection failed');
206
+ }
207
+ }
208
+
209
+ // Tier-based model selection
210
+ let selectedModel = null;
211
+ let tier = null;
212
+ if (config.modelTiers?.enabled) {
213
+ try {
214
+ const selector = getModelTierSelector();
215
+ tier = selector.getTier(analysis.score);
216
+
217
+ // Check if agentic detection requires a higher tier
218
+ if (agenticResult?.minTier) {
219
+ const agenticTierPriority = TIER_DEFINITIONS[agenticResult.minTier]?.priority || 0;
220
+ const currentTierPriority = TIER_DEFINITIONS[tier]?.priority || 0;
221
+ if (agenticTierPriority > currentTierPriority) {
222
+ tier = agenticResult.minTier;
223
+ logger.debug({ from: selector.getTier(analysis.score), to: tier }, '[Routing] Upgrading tier for agentic workflow');
224
+ }
225
+ }
226
+
227
+ // Select model for the tier (will be applied after provider selection)
228
+ analysis.tier = tier;
229
+ } catch (err) {
230
+ logger.debug({ err: err.message }, 'Tier selection failed');
231
+ }
232
+ }
233
+
234
+ // Apply routing decision based on tier config (TIER_* env vars are mandatory)
218
235
  let provider;
219
- let method = 'complexity';
220
-
221
- if (analysis.recommendation === 'local') {
222
- provider = getBestLocalProvider();
223
- } else {
224
- // Cloud recommendation
225
- if (isFallbackEnabled()) {
226
- provider = getBestCloudProvider({ toolCount });
227
- } else {
228
- // Fallback disabled, use local anyway
229
- provider = getBestLocalProvider();
230
- method = 'fallback_disabled';
236
+ let method = 'tier_config';
237
+
238
+ const selector = getModelTierSelector();
239
+ const modelSelection = selector.selectModel(tier, null);
240
+
241
+ provider = modelSelection.provider;
242
+ selectedModel = modelSelection.model;
243
+ logger.debug({ tier, provider, model: selectedModel }, '[Routing] Using tier config');
244
+
245
+ // Cost optimization: check if cheaper model can handle this tier
246
+ let costOptimized = false;
247
+ if (config.routing?.costOptimization && tier) {
248
+ try {
249
+ const optimizer = getCostOptimizer();
250
+ const availableProviders = [provider];
251
+
252
+ // Also consider local provider if not already selected
253
+ const localProvider = getBestLocalProvider();
254
+ if (localProvider !== provider) {
255
+ availableProviders.push(localProvider);
256
+ }
257
+
258
+ const cheapest = optimizer.findCheapestForTier(tier, availableProviders);
259
+ if (cheapest && cheapest.provider !== provider) {
260
+ logger.debug({
261
+ from: provider,
262
+ to: cheapest.provider,
263
+ tier,
264
+ savings: `${cheapest.model} is cheaper`,
265
+ }, '[Routing] Cost optimization: switching provider');
266
+
267
+ provider = cheapest.provider;
268
+ selectedModel = cheapest.model;
269
+ costOptimized = true;
270
+ method = 'cost_optimized';
271
+ }
272
+ } catch (err) {
273
+ logger.debug({ err: err.message }, 'Cost optimization failed');
231
274
  }
232
275
  }
233
276
 
234
277
  const decision = {
235
278
  provider,
279
+ model: selectedModel,
280
+ tier,
236
281
  method,
237
282
  reason: analysis.recommendation,
238
283
  score: analysis.score,
@@ -240,6 +285,8 @@ async function determineProviderSmart(payload, options = {}) {
240
285
  mode: analysis.mode,
241
286
  analysis,
242
287
  embeddingsResult,
288
+ agenticResult,
289
+ costOptimized,
243
290
  };
244
291
 
245
292
  // Phase 3: Record metrics
@@ -251,8 +298,8 @@ async function determineProviderSmart(payload, options = {}) {
251
298
  score: analysis.score,
252
299
  threshold: analysis.threshold,
253
300
  recommendation: analysis.recommendation,
254
- taskType: analysis.breakdown.taskType.reason,
255
- toolCount,
301
+ taskType: analysis.breakdown?.taskType?.reason,
302
+ toolCount: payload?.tools?.length ?? 0,
256
303
  },
257
304
  'Smart routing decision'
258
305
  );
@@ -260,67 +307,6 @@ async function determineProviderSmart(payload, options = {}) {
260
307
  return decision;
261
308
  }
262
309
 
263
- /**
264
- * Synchronous version of determineProvider for backward compatibility
265
- * Does not include Phase 4 embeddings analysis
266
- */
267
- function determineProvider(payload) {
268
- const preferOllama = config.modelProvider?.preferOllama ?? false;
269
- const primaryProvider = config.modelProvider?.type ?? 'databricks';
270
-
271
- // If smart routing is disabled, use static configuration
272
- if (!preferOllama) {
273
- return primaryProvider;
274
- }
275
-
276
- // Quick check for force patterns
277
- if (shouldForceLocal(payload)) {
278
- return getBestLocalProvider();
279
- }
280
-
281
- if (shouldForceCloud(payload) && isFallbackEnabled()) {
282
- const toolCount = payload?.tools?.length ?? 0;
283
- return getBestCloudProvider({ toolCount });
284
- }
285
-
286
- // Check tool count thresholds for hybrid routing
287
- const toolCount = payload?.tools?.length ?? 0;
288
- const ollamaMaxTools = config.modelProvider?.ollamaMaxToolsForRouting ?? 3;
289
-
290
- // If tool count is within Ollama's threshold, route to Ollama
291
- if (toolCount > 0 && toolCount <= ollamaMaxTools) {
292
- const ollamaModel = config.ollama?.model;
293
- const supportsTools = modelNameSupportsTools(ollamaModel);
294
-
295
- if (supportsTools) {
296
- return getBestLocalProvider();
297
- }
298
- // If Ollama doesn't support tools, fall through to cloud routing
299
- if (isFallbackEnabled()) {
300
- return getBestCloudProvider({ toolCount });
301
- }
302
- }
303
-
304
- // If tool count exceeds Ollama threshold but fallback is enabled, route to cloud
305
- if (toolCount > ollamaMaxTools && isFallbackEnabled()) {
306
- return getBestCloudProvider({ toolCount, useHybridRouting: true });
307
- }
308
-
309
- // Full complexity analysis (without embeddings) for non-tool requests
310
- const analysis = analyzeComplexity(payload);
311
-
312
- // Apply routing decision based on complexity
313
- if (analysis.recommendation === 'local') {
314
- return getBestLocalProvider();
315
- }
316
-
317
- if (isFallbackEnabled()) {
318
- return getBestCloudProvider({ toolCount });
319
- }
320
-
321
- return getBestLocalProvider();
322
- }
323
-
324
310
  /**
325
311
  * Get routing headers to include in response
326
312
  * Phase 3: Expose routing decision to clients
@@ -343,6 +329,23 @@ function getRoutingHeaders(decision) {
343
329
  headers['X-Lynkr-Routing-Reason'] = decision.reason;
344
330
  }
345
331
 
332
+ // Tier and model headers
333
+ if (decision.tier) {
334
+ headers['X-Lynkr-Tier'] = decision.tier;
335
+ }
336
+
337
+ if (decision.model) {
338
+ headers['X-Lynkr-Model'] = decision.model;
339
+ }
340
+
341
+ if (decision.agenticResult?.isAgentic) {
342
+ headers['X-Lynkr-Agentic'] = decision.agenticResult.agentType;
343
+ }
344
+
345
+ if (decision.costOptimized) {
346
+ headers['X-Lynkr-Cost-Optimized'] = 'true';
347
+ }
348
+
346
349
  return headers;
347
350
  }
348
351
 
@@ -355,8 +358,7 @@ function getRoutingStats() {
355
358
  }
356
359
 
357
360
  module.exports = {
358
- // Main routing functions
359
- determineProvider,
361
+ // Main routing function
360
362
  determineProviderSmart,
361
363
 
362
364
  // Helpers
@@ -372,4 +374,11 @@ module.exports = {
372
374
 
373
375
  // Re-export analyzer for direct access
374
376
  analyzeComplexity: require('./complexity-analyzer').analyzeComplexity,
377
+
378
+ // Intelligent routing modules
379
+ getAgenticDetector,
380
+ getModelTierSelector,
381
+ getCostOptimizer,
382
+ AGENT_TYPES,
383
+ TIER_DEFINITIONS,
375
384
  };