lynkr 7.2.5 → 8.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +3 -3
  2. package/config/model-tiers.json +89 -0
  3. package/install.sh +6 -1
  4. package/package.json +4 -2
  5. package/scripts/setup.js +0 -1
  6. package/src/agents/executor.js +14 -6
  7. package/src/api/middleware/session.js +15 -2
  8. package/src/api/openai-router.js +162 -37
  9. package/src/api/providers-handler.js +15 -1
  10. package/src/api/router.js +107 -2
  11. package/src/budget/index.js +4 -3
  12. package/src/clients/databricks.js +431 -234
  13. package/src/clients/gpt-utils.js +181 -0
  14. package/src/clients/ollama-utils.js +66 -140
  15. package/src/clients/routing.js +0 -1
  16. package/src/clients/standard-tools.js +99 -3
  17. package/src/config/index.js +133 -35
  18. package/src/context/toon.js +173 -0
  19. package/src/logger/index.js +23 -0
  20. package/src/orchestrator/index.js +688 -213
  21. package/src/routing/agentic-detector.js +320 -0
  22. package/src/routing/complexity-analyzer.js +202 -2
  23. package/src/routing/cost-optimizer.js +305 -0
  24. package/src/routing/index.js +168 -159
  25. package/src/routing/model-tiers.js +365 -0
  26. package/src/server.js +4 -14
  27. package/src/sessions/cleanup.js +3 -3
  28. package/src/sessions/record.js +10 -1
  29. package/src/sessions/store.js +7 -2
  30. package/src/tools/agent-task.js +48 -1
  31. package/src/tools/index.js +19 -2
  32. package/src/tools/lazy-loader.js +7 -0
  33. package/src/tools/tinyfish.js +358 -0
  34. package/src/tools/truncate.js +1 -0
  35. package/.github/FUNDING.yml +0 -15
  36. package/.github/workflows/README.md +0 -215
  37. package/.github/workflows/ci.yml +0 -69
  38. package/.github/workflows/index.yml +0 -62
  39. package/.github/workflows/web-tools-tests.yml +0 -56
  40. package/CITATIONS.bib +0 -6
  41. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  42. package/DEPLOYMENT.md +0 -1001
  43. package/LYNKR-TUI-PLAN.md +0 -984
  44. package/PERFORMANCE-REPORT.md +0 -866
  45. package/PLAN-per-client-model-routing.md +0 -252
  46. package/ROUTER_COMPARISON.md +0 -173
  47. package/TIER_ROUTING_PLAN.md +0 -771
  48. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  49. package/docs/BingSiteAuth.xml +0 -4
  50. package/docs/docs-style.css +0 -478
  51. package/docs/docs.html +0 -197
  52. package/docs/google5be250e608e6da39.html +0 -1
  53. package/docs/index.html +0 -577
  54. package/docs/index.md +0 -577
  55. package/docs/robots.txt +0 -4
  56. package/docs/sitemap.xml +0 -44
  57. package/docs/style.css +0 -1223
  58. package/documentation/README.md +0 -100
  59. package/documentation/api.md +0 -806
  60. package/documentation/claude-code-cli.md +0 -672
  61. package/documentation/codex-cli.md +0 -397
  62. package/documentation/contributing.md +0 -571
  63. package/documentation/cursor-integration.md +0 -731
  64. package/documentation/docker.md +0 -867
  65. package/documentation/embeddings.md +0 -760
  66. package/documentation/faq.md +0 -659
  67. package/documentation/features.md +0 -396
  68. package/documentation/headroom.md +0 -519
  69. package/documentation/installation.md +0 -706
  70. package/documentation/memory-system.md +0 -476
  71. package/documentation/production.md +0 -601
  72. package/documentation/providers.md +0 -906
  73. package/documentation/testing.md +0 -629
  74. package/documentation/token-optimization.md +0 -323
  75. package/documentation/tools.md +0 -697
  76. package/documentation/troubleshooting.md +0 -893
  77. package/final-test.js +0 -33
  78. package/headroom-sidecar/config.py +0 -93
  79. package/headroom-sidecar/requirements.txt +0 -14
  80. package/headroom-sidecar/server.py +0 -451
  81. package/monitor-agents.sh +0 -31
  82. package/scripts/audit-log-reader.js +0 -399
  83. package/scripts/compact-dictionary.js +0 -204
  84. package/scripts/test-deduplication.js +0 -448
  85. package/src/db/database.sqlite +0 -0
  86. package/test/README.md +0 -212
  87. package/test/azure-openai-config.test.js +0 -204
  88. package/test/azure-openai-error-resilience.test.js +0 -238
  89. package/test/azure-openai-format-conversion.test.js +0 -354
  90. package/test/azure-openai-integration.test.js +0 -281
  91. package/test/azure-openai-routing.test.js +0 -177
  92. package/test/azure-openai-streaming.test.js +0 -171
  93. package/test/bedrock-integration.test.js +0 -471
  94. package/test/comprehensive-test-suite.js +0 -928
  95. package/test/config-validation.test.js +0 -207
  96. package/test/cursor-integration.test.js +0 -484
  97. package/test/format-conversion.test.js +0 -578
  98. package/test/hybrid-routing-integration.test.js +0 -254
  99. package/test/hybrid-routing-performance.test.js +0 -418
  100. package/test/llamacpp-integration.test.js +0 -863
  101. package/test/lmstudio-integration.test.js +0 -335
  102. package/test/memory/extractor.test.js +0 -398
  103. package/test/memory/retriever.test.js +0 -613
  104. package/test/memory/retriever.test.js.bak +0 -585
  105. package/test/memory/search.test.js +0 -537
  106. package/test/memory/search.test.js.bak +0 -389
  107. package/test/memory/store.test.js +0 -344
  108. package/test/memory/store.test.js.bak +0 -312
  109. package/test/memory/surprise.test.js +0 -300
  110. package/test/memory-performance.test.js +0 -472
  111. package/test/openai-integration.test.js +0 -686
  112. package/test/openrouter-error-resilience.test.js +0 -418
  113. package/test/passthrough-mode.test.js +0 -385
  114. package/test/performance-benchmark.js +0 -351
  115. package/test/performance-tests.js +0 -528
  116. package/test/routing.test.js +0 -219
  117. package/test/web-tools.test.js +0 -329
  118. package/test-agents-simple.js +0 -43
  119. package/test-cli-connection.sh +0 -33
  120. package/test-learning-unit.js +0 -126
  121. package/test-learning.js +0 -112
  122. package/test-parallel-agents.sh +0 -124
  123. package/test-parallel-direct.js +0 -155
  124. package/test-subagents.sh +0 -117
@@ -1,910 +0,0 @@
1
- # Implementation Plan: ClawRouter-Inspired Intelligent Routing
2
-
3
- ## Overview
4
-
5
- Enhance Lynkr's existing routing system with 4 ClawRouter-inspired features:
6
- 1. **Enhanced Task Complexity Scorer** - 15-dimension weighted scoring
7
- 2. **Cost Optimization Routing** - Per-model cost tracking with cost-aware decisions
8
- 3. **Agentic Workflow Detection** - Auto-detect and route multi-step tool chains
9
- 4. **Multi-Tier Model Mapping** - SIMPLE/MEDIUM/COMPLEX/REASONING tiers
10
-
11
- ## Current State Analysis
12
-
13
- **Existing routing architecture** (`src/routing/`):
14
-
15
- | File | Current Function | Lines |
16
- |------|------------------|-------|
17
- | `index.js` | Main routing: `determineProviderSmart()`, `determineProvider()` | 376 |
18
- | `complexity-analyzer.js` | Scoring: `analyzeComplexity()` returns 0-100 score | ~550 |
19
-
20
- **Existing scoring breakdown** (complexity-analyzer.js):
21
- - `scoreTokens()` - 0-20 points (lines 184-192)
22
- - `scoreTools()` - 0-20 points (lines 198-207)
23
- - `scoreTaskType()` - 0-25 points (lines 212-267)
24
- - `scoreCodeComplexity()` - 0-20 points (lines 273-319)
25
- - `scoreReasoning()` - 0-15 points (lines 326-361)
26
- - Total: 0-100 with conversation bonus
27
-
28
- **Existing thresholds** (complexity-analyzer.js:366-378):
29
- - `aggressive`: 60 (more local)
30
- - `heuristic`: 40 (balanced, default)
31
- - `conservative`: 25 (more cloud)
32
-
33
- ## Feature 1: Enhanced Task Complexity Scorer (15 Dimensions)
34
-
35
- ### Goal
36
- Extend the existing `analyzeComplexity()` function with a ClawRouter-style weighted scorer.
37
-
38
- ### Approach: Extend, Don't Replace
39
- Instead of creating a new file, **extend** `src/routing/complexity-analyzer.js` with weighted scoring as an optional mode.
40
-
41
- ### Changes to `src/routing/complexity-analyzer.js`
42
-
43
- **Add after line 91 (after FORCE_LOCAL_PATTERNS):**
44
-
45
- ```javascript
46
- // ============================================================================
47
- // WEIGHTED SCORING (ClawRouter-Inspired)
48
- // ============================================================================
49
-
50
- const DIMENSION_WEIGHTS = {
51
- // Content Analysis (35%)
52
- tokenCount: 0.08,
53
- promptComplexity: 0.10,
54
- technicalDepth: 0.10,
55
- domainSpecificity: 0.07,
56
- // Tool Analysis (25%)
57
- toolCount: 0.08,
58
- toolComplexity: 0.10,
59
- toolChainPotential: 0.07,
60
- // Reasoning Requirements (25%)
61
- multiStepReasoning: 0.10,
62
- codeGeneration: 0.08,
63
- analysisDepth: 0.07,
64
- // Context Factors (15%)
65
- conversationDepth: 0.05,
66
- priorToolUsage: 0.05,
67
- ambiguity: 0.05,
68
- };
69
-
70
- // Tool complexity weights (higher = more complex)
71
- const TOOL_COMPLEXITY_WEIGHTS = {
72
- Bash: 0.9, // Can do anything
73
- Write: 0.8, // Creates files
74
- Edit: 0.7, // Modifies files
75
- NotebookEdit: 0.7,
76
- Task: 0.9, // Spawns agents
77
- WebSearch: 0.5,
78
- WebFetch: 0.4,
79
- Read: 0.3, // Read-only
80
- Glob: 0.2,
81
- Grep: 0.2,
82
- default: 0.5,
83
- };
84
-
85
- // Domain-specific keywords for complexity
86
- const DOMAIN_KEYWORDS = {
87
- security: /\b(auth|encrypt|vulnerability|injection|xss|csrf|jwt|oauth)\b/i,
88
- ml: /\b(model|train|inference|tensor|embedding|neural|llm|gpt|transformer)\b/i,
89
- distributed: /\b(microservice|kafka|redis|queue|scale|cluster|replicate)\b/i,
90
- database: /\b(sql|nosql|migration|index|query|transaction|orm)\b/i,
91
- };
92
- ```
93
-
94
- **Add new function after `scoreReasoning()` (after line 361):**
95
-
96
- ```javascript
97
- /**
98
- * Calculate weighted complexity score (0-100)
99
- * Uses 15 dimensions with configurable weights
100
- */
101
- function calculateWeightedScore(payload, content) {
102
- const dimensions = {};
103
-
104
- // 1. Token count (0-100)
105
- const tokens = estimateTokens(payload);
106
- dimensions.tokenCount = tokens < 500 ? 10 : tokens < 2000 ? 30 : tokens < 5000 ? 50 : tokens < 10000 ? 70 : 90;
107
-
108
- // 2. Prompt complexity (sentence structure)
109
- const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0);
110
- const avgLength = content.length / Math.max(sentences.length, 1);
111
- dimensions.promptComplexity = Math.min(avgLength / 2, 100);
112
-
113
- // 3. Technical depth (keyword density)
114
- const techMatches = (content.match(PATTERNS.technical) || []).length;
115
- dimensions.technicalDepth = Math.min(techMatches * 15, 100);
116
-
117
- // 4. Domain specificity
118
- let domainScore = 0;
119
- for (const [domain, regex] of Object.entries(DOMAIN_KEYWORDS)) {
120
- if (regex.test(content)) domainScore += 25;
121
- }
122
- dimensions.domainSpecificity = Math.min(domainScore, 100);
123
-
124
- // 5. Tool count
125
- const toolCount = payload?.tools?.length ?? 0;
126
- dimensions.toolCount = toolCount === 0 ? 0 : toolCount <= 3 ? 20 : toolCount <= 6 ? 40 : toolCount <= 10 ? 60 : toolCount <= 15 ? 80 : 100;
127
-
128
- // 6. Tool complexity (weighted by tool types)
129
- if (payload?.tools?.length > 0) {
130
- const avgWeight = payload.tools.reduce((sum, t) => {
131
- const name = t.name || t.function?.name || '';
132
- return sum + (TOOL_COMPLEXITY_WEIGHTS[name] || TOOL_COMPLEXITY_WEIGHTS.default);
133
- }, 0) / payload.tools.length;
134
- dimensions.toolComplexity = avgWeight * 100;
135
- } else {
136
- dimensions.toolComplexity = 0;
137
- }
138
-
139
- // 7. Tool chain potential
140
- dimensions.toolChainPotential = /\b(then|after|next|finally|first.*then)\b/i.test(content) ? 70 : 20;
141
-
142
- // 8. Multi-step reasoning
143
- dimensions.multiStepReasoning = ADVANCED_PATTERNS.reasoning.stepByStep.test(content) ? 80 :
144
- ADVANCED_PATTERNS.reasoning.planning.test(content) ? 60 : 20;
145
-
146
- // 9. Code generation
147
- dimensions.codeGeneration = /\b(write|create|implement|build|generate)\s+(a\s+)?(function|class|module|api|endpoint)/i.test(content) ? 80 : 20;
148
-
149
- // 10. Analysis depth
150
- dimensions.analysisDepth = ADVANCED_PATTERNS.reasoning.tradeoffs.test(content) ? 80 :
151
- ADVANCED_PATTERNS.reasoning.analysis.test(content) ? 60 : 20;
152
-
153
- // 11. Conversation depth
154
- const messageCount = payload?.messages?.length ?? 0;
155
- dimensions.conversationDepth = messageCount < 3 ? 10 : messageCount < 6 ? 30 : messageCount < 10 ? 50 : 70;
156
-
157
- // 12. Prior tool usage
158
- const toolResults = (payload?.messages || []).filter(m =>
159
- m.role === 'user' && Array.isArray(m.content) && m.content.some(c => c.type === 'tool_result')
160
- ).length;
161
- dimensions.priorToolUsage = toolResults === 0 ? 10 : toolResults < 3 ? 40 : toolResults < 6 ? 60 : 80;
162
-
163
- // 13. Ambiguity (inverse of specificity)
164
- const hasSpecifics = /\b(file|function|line|error|bug|at\s+\w+:\d+)\b/i.test(content);
165
- dimensions.ambiguity = hasSpecifics ? 20 : content.length < 50 ? 70 : 40;
166
-
167
- // Calculate weighted total
168
- let weightedTotal = 0;
169
- for (const [dimension, weight] of Object.entries(DIMENSION_WEIGHTS)) {
170
- weightedTotal += (dimensions[dimension] || 0) * weight;
171
- }
172
-
173
- return {
174
- score: Math.round(weightedTotal),
175
- dimensions,
176
- weights: DIMENSION_WEIGHTS,
177
- };
178
- }
179
- ```
180
-
181
- **Modify `analyzeComplexity()` (line 386) to use weighted scoring when enabled:**
182
-
183
- ```javascript
184
- function analyzeComplexity(payload) {
185
- const content = extractContent(payload);
186
- const useWeighted = config.routing?.weightedScoring ?? false;
187
-
188
- if (useWeighted) {
189
- const weighted = calculateWeightedScore(payload, content);
190
- const threshold = getThreshold();
191
- const recommendation = weighted.score >= threshold ? 'cloud' : 'local';
192
-
193
- return {
194
- score: weighted.score,
195
- threshold,
196
- mode: 'weighted',
197
- recommendation,
198
- breakdown: weighted.dimensions,
199
- weights: weighted.weights,
200
- };
201
- }
202
-
203
- // ... existing logic unchanged
204
- }
205
- ```
206
-
207
- ### Config Addition (`src/config/index.js`)
208
-
209
- ```javascript
210
- routing: {
211
- weightedScoring: process.env.ROUTING_WEIGHTED_SCORING === 'true',
212
- },
213
- ```
214
-
215
- ---
216
-
217
- ## Feature 2: Cost Optimization Routing
218
-
219
- ### Goal
220
- Track per-model costs and make cost-aware routing decisions.
221
-
222
- ### Data Source: models.dev API (250+ Models)
223
-
224
- Instead of maintaining a static config, we'll fetch from **https://models.dev/api.json** which provides:
225
-
226
- - **250+ models** across 15+ providers
227
- - **Real-time pricing** (input/output cost per 1M tokens)
228
- - **Capabilities** (tool_call, reasoning, structured_output)
229
- - **Context limits** and output limits
230
- - **Knowledge cutoff dates**
231
-
232
- ### API Response Structure
233
-
234
- ```json
235
- {
236
- "provider_id": {
237
- "id": "provider_id",
238
- "name": "Provider Name",
239
- "api": "https://api.provider.com/v1",
240
- "models": {
241
- "model-id": {
242
- "id": "model-id",
243
- "name": "Model Name",
244
- "family": "model-family",
245
- "cost": {
246
- "input": 3.00, // $ per 1M tokens
247
- "output": 15.00,
248
- "cache_read": 0.30,
249
- "cache_write": 3.75
250
- },
251
- "context": 200000,
252
- "output": 8192,
253
- "tool_call": true,
254
- "reasoning": true,
255
- "structured_output": true,
256
- "input": ["text", "image", "pdf"],
257
- "output": ["text"],
258
- "knowledge": "2024-04"
259
- }
260
- }
261
- }
262
- }
263
- ```
264
-
265
- ### Local Tier Config: `config/model-tiers.json`
266
-
267
- We still need a local config for **tier mappings** (which models to use at each complexity level):
268
-
269
- ```json
270
- {
271
- "tiers": {
272
- "SIMPLE": {
273
- "description": "Greetings, simple Q&A, confirmations",
274
- "range": [0, 25],
275
- "preferred": {
276
- "ollama": ["llama3.2", "gemma2", "phi3", "qwen2.5:7b"],
277
- "openai": ["gpt-4o-mini"],
278
- "anthropic": ["claude-3-haiku"],
279
- "google": ["gemini-2.0-flash"],
280
- "openrouter": ["deepseek/deepseek-chat", "google/gemini-flash-1.5"]
281
- }
282
- },
283
- "MEDIUM": {
284
- "description": "Code reading, simple edits, research",
285
- "range": [26, 50],
286
- "preferred": {
287
- "ollama": ["qwen2.5:32b", "deepseek-coder:33b"],
288
- "openai": ["gpt-4o"],
289
- "anthropic": ["claude-sonnet-4-5"],
290
- "google": ["gemini-1.5-pro"],
291
- "openrouter": ["anthropic/claude-3.5-sonnet"]
292
- }
293
- },
294
- "COMPLEX": {
295
- "description": "Multi-file changes, debugging, architecture",
296
- "range": [51, 75],
297
- "preferred": {
298
- "ollama": ["qwen2.5:72b", "llama3.1:70b"],
299
- "openai": ["o1-mini", "o3-mini"],
300
- "anthropic": ["claude-sonnet-4-5"],
301
- "openrouter": ["meta-llama/llama-3.1-405b"]
302
- }
303
- },
304
- "REASONING": {
305
- "description": "Complex analysis, security audits, novel problems",
306
- "range": [76, 100],
307
- "preferred": {
308
- "openai": ["o1", "o1-pro"],
309
- "anthropic": ["claude-opus-4-5"],
310
- "deepseek": ["deepseek-r1"],
311
- "openrouter": ["anthropic/claude-3-opus", "deepseek/deepseek-reasoner"]
312
- }
313
- }
314
- },
315
- "local_models": {
316
- "ollama": { "free": true, "default_tier": "SIMPLE" },
317
- "llamacpp": { "free": true, "default_tier": "SIMPLE" },
318
- "lmstudio": { "free": true, "default_tier": "SIMPLE" }
319
- }
320
- }
321
- ```
322
-
323
- ### New File: `src/routing/model-registry.js`
324
-
325
- ```javascript
326
- /**
327
- * Model Registry
328
- * Fetches model data from models.dev API and provides lookup
329
- * Caches data locally with configurable refresh interval
330
- */
331
-
332
- const fs = require('fs');
333
- const path = require('path');
334
- const logger = require('../logger');
335
- const config = require('../config');
336
-
337
- const API_URL = 'https://models.dev/api.json';
338
- const CACHE_FILE = path.join(__dirname, '../../data/models-cache.json');
339
- const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
340
-
341
- class ModelRegistry {
342
- constructor() {
343
- this.providers = {}; // Raw API data by provider
344
- this.modelIndex = new Map(); // model-id -> { provider, ...model }
345
- this.tiers = {}; // Local tier config
346
- this.loaded = false;
347
- this.lastFetch = 0;
348
- }
349
-
350
- /**
351
- * Initialize registry - fetch from API or load cache
352
- */
353
- async initialize() {
354
- if (this.loaded) return;
355
-
356
- // Load local tier config
357
- this._loadTierConfig();
358
-
359
- // Try to load from cache first
360
- if (this._loadFromCache()) {
361
- this.loaded = true;
362
- // Refresh in background if stale
363
- if (Date.now() - this.lastFetch > CACHE_TTL_MS) {
364
- this._fetchFromAPI().catch(err =>
365
- logger.warn({ err: err.message }, '[ModelRegistry] Background refresh failed')
366
- );
367
- }
368
- return;
369
- }
370
-
371
- // Fetch from API
372
- await this._fetchFromAPI();
373
- this.loaded = true;
374
- }
375
-
376
- /**
377
- * Fetch fresh data from models.dev API
378
- */
379
- async _fetchFromAPI() {
380
- try {
381
- const response = await fetch(API_URL, {
382
- signal: AbortSignal.timeout(10000),
383
- headers: { 'Accept': 'application/json' }
384
- });
385
-
386
- if (!response.ok) throw new Error(`HTTP ${response.status}`);
387
-
388
- const data = await response.json();
389
- this._processAPIData(data);
390
- this._saveToCache(data);
391
- this.lastFetch = Date.now();
392
-
393
- logger.info({
394
- providers: Object.keys(this.providers).length,
395
- models: this.modelIndex.size
396
- }, '[ModelRegistry] Loaded from API');
397
-
398
- } catch (err) {
399
- logger.error({ err: err.message }, '[ModelRegistry] API fetch failed');
400
- // Fall back to cache or defaults
401
- if (!this._loadFromCache()) {
402
- this._loadDefaults();
403
- }
404
- }
405
- }
406
-
407
- /**
408
- * Process API data into indexed format
409
- */
410
- _processAPIData(data) {
411
- this.providers = data;
412
- this.modelIndex.clear();
413
-
414
- for (const [providerId, providerData] of Object.entries(data)) {
415
- if (!providerData.models) continue;
416
-
417
- for (const [modelId, modelInfo] of Object.entries(providerData.models)) {
418
- const fullId = `${providerId}/${modelId}`;
419
- const entry = {
420
- id: modelId,
421
- fullId,
422
- provider: providerId,
423
- providerName: providerData.name,
424
- ...modelInfo,
425
- // Normalize cost
426
- cost: modelInfo.cost || { input: 0, output: 0 },
427
- };
428
-
429
- // Index by multiple keys for flexible lookup
430
- this.modelIndex.set(modelId.toLowerCase(), entry);
431
- this.modelIndex.set(fullId.toLowerCase(), entry);
432
- if (modelInfo.name) {
433
- this.modelIndex.set(modelInfo.name.toLowerCase(), entry);
434
- }
435
- }
436
- }
437
- }
438
-
439
- /**
440
- * Load local tier configuration
441
- */
442
- _loadTierConfig() {
443
- const tierPath = path.join(__dirname, '../../config/model-tiers.json');
444
- try {
445
- const data = JSON.parse(fs.readFileSync(tierPath, 'utf8'));
446
- this.tiers = data.tiers || {};
447
- this.localModels = data.local_models || {};
448
- } catch (err) {
449
- logger.warn({ err: err.message }, '[ModelRegistry] Using default tier config');
450
- this._loadDefaultTiers();
451
- }
452
- }
453
-
454
- /**
455
- * Get model info by name/id
456
- */
457
- getModel(name) {
458
- if (!name) return null;
459
- return this.modelIndex.get(name.toLowerCase()) || null;
460
- }
461
-
462
- /**
463
- * Get cost for a model
464
- */
465
- getCost(modelName) {
466
- const model = this.getModel(modelName);
467
- if (!model) return { input: 1.0, output: 3.0, tier: 'MEDIUM' };
468
-
469
- // Determine tier from capabilities
470
- let tier = 'MEDIUM';
471
- if (model.reasoning) tier = 'REASONING';
472
- else if (model.cost?.input >= 10) tier = 'COMPLEX';
473
- else if (model.cost?.input <= 0.5) tier = 'SIMPLE';
474
-
475
- return {
476
- input: model.cost?.input || 0,
477
- output: model.cost?.output || 0,
478
- cacheRead: model.cost?.cache_read,
479
- cacheWrite: model.cost?.cache_write,
480
- tier,
481
- context: model.context,
482
- toolCall: model.tool_call,
483
- reasoning: model.reasoning,
484
- };
485
- }
486
-
487
- /**
488
- * Get preferred models for a tier and provider
489
- */
490
- getTierModels(tier, provider) {
491
- return this.tiers[tier]?.preferred?.[provider] || [];
492
- }
493
-
494
- /**
495
- * Get all providers that have models for a tier
496
- */
497
- getProvidersForTier(tier) {
498
- return Object.keys(this.tiers[tier]?.preferred || {});
499
- }
500
-
501
- /**
502
- * Check if model supports tool calling
503
- */
504
- supportsTools(modelName) {
505
- const model = this.getModel(modelName);
506
- return model?.tool_call === true;
507
- }
508
-
509
- /**
510
- * Check if model is free (local)
511
- */
512
- isFree(modelName) {
513
- const model = this.getModel(modelName);
514
- if (!model) {
515
- // Check local models config
516
- const provider = modelName?.split('/')[0] || modelName;
517
- return this.localModels[provider]?.free === true;
518
- }
519
- return model.cost?.input === 0 && model.cost?.output === 0;
520
- }
521
-
522
- /**
523
- * Get all models matching criteria
524
- */
525
- findModels(criteria = {}) {
526
- const results = [];
527
- for (const model of this.modelIndex.values()) {
528
- if (criteria.toolCall && !model.tool_call) continue;
529
- if (criteria.reasoning && !model.reasoning) continue;
530
- if (criteria.maxInputCost && model.cost?.input > criteria.maxInputCost) continue;
531
- if (criteria.minContext && model.context < criteria.minContext) continue;
532
- results.push(model);
533
- }
534
- return results;
535
- }
536
-
537
- /**
538
- * Get stats for metrics endpoint
539
- */
540
- getStats() {
541
- return {
542
- totalModels: this.modelIndex.size,
543
- providers: Object.keys(this.providers).length,
544
- lastFetch: this.lastFetch,
545
- cacheAge: Date.now() - this.lastFetch,
546
- tiers: Object.keys(this.tiers),
547
- };
548
- }
549
-
550
- // Cache management
551
- _loadFromCache() {
552
- try {
553
- if (!fs.existsSync(CACHE_FILE)) return false;
554
- const cache = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
555
- this._processAPIData(cache.data);
556
- this.lastFetch = cache.timestamp || 0;
557
- logger.debug({ age: Date.now() - this.lastFetch }, '[ModelRegistry] Loaded from cache');
558
- return true;
559
- } catch (err) {
560
- return false;
561
- }
562
- }
563
-
564
- _saveToCache(data) {
565
- try {
566
- const dir = path.dirname(CACHE_FILE);
567
- if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
568
- fs.writeFileSync(CACHE_FILE, JSON.stringify({ data, timestamp: Date.now() }));
569
- } catch (err) {
570
- logger.warn({ err: err.message }, '[ModelRegistry] Cache save failed');
571
- }
572
- }
573
-
574
- _loadDefaults() {
575
- this.tiers = { SIMPLE: { preferred: { ollama: ['llama3.2'] } } };
576
- this.localModels = { ollama: { free: true } };
577
- }
578
-
579
- _loadDefaultTiers() {
580
- this.tiers = {
581
- SIMPLE: { range: [0, 25], preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
582
- MEDIUM: { range: [26, 50], preferred: { openai: ['gpt-4o'], anthropic: ['claude-sonnet-4-5'] } },
583
- COMPLEX: { range: [51, 75], preferred: { openai: ['o1-mini'] } },
584
- REASONING: { range: [76, 100], preferred: { openai: ['o1'], anthropic: ['claude-opus-4-5'] } },
585
- };
586
- }
587
- }
588
-
589
- // Singleton with async init
590
- let instance = null;
591
- async function getModelRegistry() {
592
- if (!instance) {
593
- instance = new ModelRegistry();
594
- await instance.initialize();
595
- }
596
- return instance;
597
- }
598
-
599
- // Sync getter for already-initialized registry
600
- function getModelRegistrySync() {
601
- if (!instance) {
602
- instance = new ModelRegistry();
603
- // Sync init - load from cache only
604
- instance._loadTierConfig();
605
- instance._loadFromCache() || instance._loadDefaults();
606
- instance.loaded = true;
607
- }
608
- return instance;
609
- }
610
-
611
- module.exports = { ModelRegistry, getModelRegistry, getModelRegistrySync };
612
- ```
613
-
614
- ---
615
-
616
- ## Feature 3: Agentic Workflow Detection
617
-
618
- ### Goal
619
- Auto-detect when a request is part of an agentic workflow and route to capable models.
620
-
621
- ### New File: `src/routing/agentic-detector.js`
622
-
623
- ```javascript
624
- /**
625
- * Agentic Workflow Detector
626
- * Detects multi-step tool chains and autonomous agent patterns
627
- */
628
-
629
- const logger = require('../logger');
630
-
631
- // Agent type classification
632
- const AGENT_TYPES = {
633
- SINGLE_SHOT: { minTier: 'SIMPLE', scoreBoost: 0 },
634
- TOOL_CHAIN: { minTier: 'MEDIUM', scoreBoost: 15, requiresToolUse: true },
635
- ITERATIVE: { minTier: 'COMPLEX', scoreBoost: 25, requiresToolUse: true },
636
- AUTONOMOUS: { minTier: 'REASONING', scoreBoost: 35, requiresToolUse: true },
637
- };
638
-
639
- // Detection patterns
640
- const PATTERNS = {
641
- toolChain: /\b(then\s+use|after\s+that|next\s+step|finally|first.*then)\b/i,
642
- iterative: /\b(keep\s+trying|until|repeat|loop|retry|iterate)\b/i,
643
- autonomous: /\b(figure\s+out|solve|complete\s+the\s+task|do\s+whatever|make\s+it\s+work)\b/i,
644
- multiFile: /\b(multiple\s+files?|across\s+(the\s+)?codebase|all\s+files?|refactor\s+entire)\b/i,
645
- planning: /\b(plan|design|architect|strategy|roadmap)\b/i,
646
- };
647
-
648
- // High-complexity tools that indicate agentic work
649
- const AGENTIC_TOOLS = new Set([
650
- 'Bash', 'bash', 'shell',
651
- 'Write', 'write_file', 'fs_write',
652
- 'Edit', 'edit_file', 'fs_edit', 'edit_patch',
653
- 'Task', 'agent_task', 'spawn_agent',
654
- 'Git', 'git_commit', 'git_push',
655
- 'Test', 'run_tests',
656
- ]);
657
-
658
- class AgenticDetector {
659
- detect(payload) {
660
- const messages = payload?.messages || [];
661
- const tools = payload?.tools || [];
662
- const content = this._extractContent(messages);
663
-
664
- let score = 0;
665
- const signals = [];
666
-
667
- // Signal 1: Tool count
668
- if (tools.length > 5) {
669
- score += 20;
670
- signals.push({ signal: 'high_tool_count', value: tools.length, weight: 20 });
671
- } else if (tools.length > 3) {
672
- score += 10;
673
- signals.push({ signal: 'moderate_tool_count', value: tools.length, weight: 10 });
674
- }
675
-
676
- // Signal 2: Prior tool results
677
- const toolResultCount = this._countToolResults(messages);
678
- if (toolResultCount > 3) {
679
- score += 25;
680
- signals.push({ signal: 'many_tool_results', value: toolResultCount, weight: 25 });
681
- } else if (toolResultCount > 0) {
682
- score += 15;
683
- signals.push({ signal: 'has_tool_results', value: toolResultCount, weight: 15 });
684
- }
685
-
686
- // Signal 3: Agentic tools present
687
- const agenticToolCount = tools.filter(t => {
688
- const name = t.name || t.function?.name || '';
689
- return AGENTIC_TOOLS.has(name);
690
- }).length;
691
- if (agenticToolCount > 2) {
692
- score += 20;
693
- signals.push({ signal: 'agentic_tools', value: agenticToolCount, weight: 20 });
694
- }
695
-
696
- // Signal 4: Pattern matching
697
- if (PATTERNS.autonomous.test(content)) {
698
- score += 25;
699
- signals.push({ signal: 'autonomous_pattern', weight: 25 });
700
- } else if (PATTERNS.iterative.test(content)) {
701
- score += 20;
702
- signals.push({ signal: 'iterative_pattern', weight: 20 });
703
- } else if (PATTERNS.toolChain.test(content)) {
704
- score += 15;
705
- signals.push({ signal: 'tool_chain_pattern', weight: 15 });
706
- }
707
-
708
- if (PATTERNS.multiFile.test(content)) {
709
- score += 15;
710
- signals.push({ signal: 'multi_file', weight: 15 });
711
- }
712
-
713
- if (PATTERNS.planning.test(content)) {
714
- score += 10;
715
- signals.push({ signal: 'planning', weight: 10 });
716
- }
717
-
718
- // Signal 5: Conversation depth
719
- if (messages.length > 10) {
720
- score += 15;
721
- signals.push({ signal: 'deep_conversation', value: messages.length, weight: 15 });
722
- } else if (messages.length > 5) {
723
- score += 8;
724
- signals.push({ signal: 'ongoing_conversation', value: messages.length, weight: 8 });
725
- }
726
-
727
- const agentType = this._classifyAgentType(score);
728
- const isAgentic = score >= 30;
729
-
730
- return {
731
- isAgentic,
732
- agentType,
733
- confidence: Math.min(score / 100, 1),
734
- score,
735
- signals,
736
- minTier: AGENT_TYPES[agentType].minTier,
737
- scoreBoost: AGENT_TYPES[agentType].scoreBoost,
738
- };
739
- }
740
-
741
- _classifyAgentType(score) {
742
- if (score >= 60) return 'AUTONOMOUS';
743
- if (score >= 40) return 'ITERATIVE';
744
- if (score >= 25) return 'TOOL_CHAIN';
745
- return 'SINGLE_SHOT';
746
- }
747
-
748
- _extractContent(messages) {
749
- const userMsgs = messages.filter(m => m?.role === 'user');
750
- if (userMsgs.length === 0) return '';
751
- const last = userMsgs[userMsgs.length - 1];
752
- if (typeof last.content === 'string') return last.content;
753
- if (Array.isArray(last.content)) {
754
- return last.content.filter(b => b?.type === 'text').map(b => b.text || '').join(' ');
755
- }
756
- return '';
757
- }
758
-
759
- _countToolResults(messages) {
760
- return messages.reduce((count, m) => {
761
- if (m?.role === 'user' && Array.isArray(m.content)) {
762
- return count + m.content.filter(c => c?.type === 'tool_result').length;
763
- }
764
- return count;
765
- }, 0);
766
- }
767
- }
768
-
769
- let instance = null;
770
- function getAgenticDetector() {
771
- if (!instance) instance = new AgenticDetector();
772
- return instance;
773
- }
774
-
775
- module.exports = { AgenticDetector, getAgenticDetector, AGENT_TYPES };
776
- ```
777
-
778
- ---
779
-
780
- ## Feature 4: Multi-Tier Model Mapping
781
-
782
- ### New File: `src/routing/model-tiers.js`
783
-
784
- ```javascript
785
- /**
786
- * Model Tier Selector
787
- * Maps complexity scores to appropriate models per provider
788
- */
789
-
790
- const logger = require('../logger');
791
- const config = require('../config');
792
- const { getModelRegistry } = require('./model-registry');
793
-
794
- const TIER_DEFINITIONS = {
795
- SIMPLE: { description: 'Greetings, simple Q&A', range: [0, 25], priority: 1 },
796
- MEDIUM: { description: 'Code reading, simple edits', range: [26, 50], priority: 2 },
797
- COMPLEX: { description: 'Multi-file changes, debugging', range: [51, 75], priority: 3 },
798
- REASONING: { description: 'Complex analysis, security audits', range: [76, 100], priority: 4 },
799
- };
800
-
801
- class ModelTierSelector {
802
- constructor() {
803
- this.registry = getModelRegistry();
804
- }
805
-
806
- getTier(complexityScore) {
807
- for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
808
- if (complexityScore >= def.range[0] && complexityScore <= def.range[1]) {
809
- return tier;
810
- }
811
- }
812
- return complexityScore > 75 ? 'REASONING' : 'SIMPLE';
813
- }
814
-
815
- selectModel(tier, provider) {
816
- const override = config.modelTiers?.overrides?.[tier];
817
- if (override) return { model: override, source: 'override' };
818
-
819
- const providerModels = this.registry.getTierModels(tier, provider);
820
- if (!providerModels?.length) {
821
- const fallback = this.getFallbackModel(tier, provider);
822
- if (fallback) return { model: fallback.model, source: 'fallback', actualTier: fallback.tier };
823
- return { model: null, error: `No models for ${provider} at tier ${tier}` };
824
- }
825
-
826
- return { model: providerModels[0], source: 'tier_mapping', tier };
827
- }
828
-
829
- getFallbackModel(requestedTier, provider) {
830
- const tierOrder = ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'];
831
- const startIndex = tierOrder.indexOf(requestedTier);
832
-
833
- for (let i = startIndex + 1; i < tierOrder.length; i++) {
834
- const models = this.registry.getTierModels(tierOrder[i], provider);
835
- if (models?.length) return { model: models[0], tier: tierOrder[i] };
836
- }
837
- return null;
838
- }
839
-
840
- getTierStats() {
841
- const modelCounts = {};
842
- for (const tier of Object.keys(TIER_DEFINITIONS)) {
843
- modelCounts[tier] = {};
844
- for (const provider of this.registry.getProvidersForTier(tier)) {
845
- modelCounts[tier][provider] = this.registry.getTierModels(tier, provider).length;
846
- }
847
- }
848
- return { tiers: TIER_DEFINITIONS, modelCounts };
849
- }
850
- }
851
-
852
- let instance = null;
853
- function getModelTierSelector() {
854
- if (!instance) instance = new ModelTierSelector();
855
- return instance;
856
- }
857
-
858
- module.exports = { ModelTierSelector, getModelTierSelector, TIER_DEFINITIONS };
859
- ```
860
-
861
- ---
862
-
863
- ## Files Summary
864
-
865
- ### Files to Create (5 files)
866
-
867
- | File | Lines | Purpose |
868
- |------|-------|---------|
869
- | `config/model-tiers.json` | ~60 | Local tier preferences |
870
- | `src/routing/model-registry.js` | ~220 | Fetches from models.dev API |
871
- | `src/routing/model-tiers.js` | ~80 | Tier selection |
872
- | `src/routing/cost-optimizer.js` | ~150 | Cost tracking |
873
- | `src/routing/agentic-detector.js` | ~170 | Workflow detection |
874
-
875
- ### Files to Modify
876
-
877
- | File | Changes |
878
- |------|---------|
879
- | `src/routing/complexity-analyzer.js` | Add weighted scoring (~80 lines) |
880
- | `src/routing/index.js` | Integrate all modules (~50 lines) |
881
- | `src/config/index.js` | Add config options (~25 lines) |
882
- | `src/api/router.js` | Add 4 endpoints (~40 lines) |
883
-
884
- ---
885
-
886
- ## Configuration
887
-
888
- ```bash
889
- # Intelligent Routing
890
- ROUTING_WEIGHTED_SCORING=false
891
- ROUTING_COST_OPTIMIZATION=false
892
- ROUTING_AGENTIC_DETECTION=true
893
- COST_BUDGET_DAILY_USD=0
894
- COST_BUDGET_SESSION_USD=0
895
-
896
- # Model Tiers
897
- MODEL_TIER_ENABLED=false
898
- MODEL_TIER_OVERRIDE_SIMPLE=gpt-4o-mini
899
- MODEL_TIER_OVERRIDE_REASONING=o1
900
- ```
901
-
902
- ---
903
-
904
- ## API Endpoints
905
-
906
- - `GET /routing/models` - Model registry stats (from models.dev)
907
- - `GET /routing/models/:model` - Specific model info
908
- - `GET /routing/tiers` - Tier definitions and counts
909
- - `GET /metrics/cost-optimization` - Cost stats
910
- - `POST /routing/analyze` - Test request analysis