lynkr 7.0.1 → 7.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAWROUTER_ROUTING_PLAN.md +910 -0
- package/README.md +41 -7
- package/package.json +9 -5
- package/src/api/health.js +10 -1
- package/src/api/openai-router.js +12 -1
- package/src/budget/index.js +10 -2
- package/src/clients/databricks.js +11 -0
- package/src/clients/ollama-startup.js +120 -0
- package/src/config/index.js +7 -3
- package/src/indexer/babel-parser.js +213 -0
- package/src/indexer/parser.js +126 -49
- package/src/server.js +8 -0
|
@@ -0,0 +1,910 @@
|
|
|
1
|
+
# Implementation Plan: ClawRouter-Inspired Intelligent Routing
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Enhance Lynkr's existing routing system with 4 ClawRouter-inspired features:
|
|
6
|
+
1. **Enhanced Task Complexity Scorer** - 15-dimension weighted scoring
|
|
7
|
+
2. **Cost Optimization Routing** - Per-model cost tracking with cost-aware decisions
|
|
8
|
+
3. **Agentic Workflow Detection** - Auto-detect and route multi-step tool chains
|
|
9
|
+
4. **Multi-Tier Model Mapping** - SIMPLE/MEDIUM/COMPLEX/REASONING tiers
|
|
10
|
+
|
|
11
|
+
## Current State Analysis
|
|
12
|
+
|
|
13
|
+
**Existing routing architecture** (`src/routing/`):
|
|
14
|
+
|
|
15
|
+
| File | Current Function | Lines |
|
|
16
|
+
|------|------------------|-------|
|
|
17
|
+
| `index.js` | Main routing: `determineProviderSmart()`, `determineProvider()` | 376 |
|
|
18
|
+
| `complexity-analyzer.js` | Scoring: `analyzeComplexity()` returns 0-100 score | ~550 |
|
|
19
|
+
|
|
20
|
+
**Existing scoring breakdown** (complexity-analyzer.js):
|
|
21
|
+
- `scoreTokens()` - 0-20 points (lines 184-192)
|
|
22
|
+
- `scoreTools()` - 0-20 points (lines 198-207)
|
|
23
|
+
- `scoreTaskType()` - 0-25 points (lines 212-267)
|
|
24
|
+
- `scoreCodeComplexity()` - 0-20 points (lines 273-319)
|
|
25
|
+
- `scoreReasoning()` - 0-15 points (lines 326-361)
|
|
26
|
+
- Total: 0-100 with conversation bonus
|
|
27
|
+
|
|
28
|
+
**Existing thresholds** (complexity-analyzer.js:366-378):
|
|
29
|
+
- `aggressive`: 60 (more local)
|
|
30
|
+
- `heuristic`: 40 (balanced, default)
|
|
31
|
+
- `conservative`: 25 (more cloud)
|
|
32
|
+
|
|
33
|
+
## Feature 1: Enhanced Task Complexity Scorer (15 Dimensions)
|
|
34
|
+
|
|
35
|
+
### Goal
|
|
36
|
+
Extend the existing `analyzeComplexity()` function with a ClawRouter-style weighted scorer.
|
|
37
|
+
|
|
38
|
+
### Approach: Extend, Don't Replace
|
|
39
|
+
Instead of creating a new file, **extend** `src/routing/complexity-analyzer.js` with weighted scoring as an optional mode.
|
|
40
|
+
|
|
41
|
+
### Changes to `src/routing/complexity-analyzer.js`
|
|
42
|
+
|
|
43
|
+
**Add after line 91 (after FORCE_LOCAL_PATTERNS):**
|
|
44
|
+
|
|
45
|
+
```javascript
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// WEIGHTED SCORING (ClawRouter-Inspired)
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
const DIMENSION_WEIGHTS = {
|
|
51
|
+
// Content Analysis (35%)
|
|
52
|
+
tokenCount: 0.08,
|
|
53
|
+
promptComplexity: 0.10,
|
|
54
|
+
technicalDepth: 0.10,
|
|
55
|
+
domainSpecificity: 0.07,
|
|
56
|
+
// Tool Analysis (25%)
|
|
57
|
+
toolCount: 0.08,
|
|
58
|
+
toolComplexity: 0.10,
|
|
59
|
+
toolChainPotential: 0.07,
|
|
60
|
+
// Reasoning Requirements (25%)
|
|
61
|
+
multiStepReasoning: 0.10,
|
|
62
|
+
codeGeneration: 0.08,
|
|
63
|
+
analysisDepth: 0.07,
|
|
64
|
+
// Context Factors (15%)
|
|
65
|
+
conversationDepth: 0.05,
|
|
66
|
+
priorToolUsage: 0.05,
|
|
67
|
+
ambiguity: 0.05,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// Tool complexity weights (higher = more complex)
|
|
71
|
+
const TOOL_COMPLEXITY_WEIGHTS = {
|
|
72
|
+
Bash: 0.9, // Can do anything
|
|
73
|
+
Write: 0.8, // Creates files
|
|
74
|
+
Edit: 0.7, // Modifies files
|
|
75
|
+
NotebookEdit: 0.7,
|
|
76
|
+
Task: 0.9, // Spawns agents
|
|
77
|
+
WebSearch: 0.5,
|
|
78
|
+
WebFetch: 0.4,
|
|
79
|
+
Read: 0.3, // Read-only
|
|
80
|
+
Glob: 0.2,
|
|
81
|
+
Grep: 0.2,
|
|
82
|
+
default: 0.5,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
// Domain-specific keywords for complexity
|
|
86
|
+
const DOMAIN_KEYWORDS = {
|
|
87
|
+
security: /\b(auth|encrypt|vulnerability|injection|xss|csrf|jwt|oauth)\b/i,
|
|
88
|
+
ml: /\b(model|train|inference|tensor|embedding|neural|llm|gpt|transformer)\b/i,
|
|
89
|
+
distributed: /\b(microservice|kafka|redis|queue|scale|cluster|replicate)\b/i,
|
|
90
|
+
database: /\b(sql|nosql|migration|index|query|transaction|orm)\b/i,
|
|
91
|
+
};
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Add new function after `scoreReasoning()` (after line 361):**
|
|
95
|
+
|
|
96
|
+
```javascript
|
|
97
|
+
/**
|
|
98
|
+
* Calculate weighted complexity score (0-100)
|
|
99
|
+
* Uses 15 dimensions with configurable weights
|
|
100
|
+
*/
|
|
101
|
+
function calculateWeightedScore(payload, content) {
|
|
102
|
+
const dimensions = {};
|
|
103
|
+
|
|
104
|
+
// 1. Token count (0-100)
|
|
105
|
+
const tokens = estimateTokens(payload);
|
|
106
|
+
dimensions.tokenCount = tokens < 500 ? 10 : tokens < 2000 ? 30 : tokens < 5000 ? 50 : tokens < 10000 ? 70 : 90;
|
|
107
|
+
|
|
108
|
+
// 2. Prompt complexity (sentence structure)
|
|
109
|
+
const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
110
|
+
const avgLength = content.length / Math.max(sentences.length, 1);
|
|
111
|
+
dimensions.promptComplexity = Math.min(avgLength / 2, 100);
|
|
112
|
+
|
|
113
|
+
// 3. Technical depth (keyword density)
|
|
114
|
+
const techMatches = (content.match(PATTERNS.technical) || []).length;
|
|
115
|
+
dimensions.technicalDepth = Math.min(techMatches * 15, 100);
|
|
116
|
+
|
|
117
|
+
// 4. Domain specificity
|
|
118
|
+
let domainScore = 0;
|
|
119
|
+
for (const [domain, regex] of Object.entries(DOMAIN_KEYWORDS)) {
|
|
120
|
+
if (regex.test(content)) domainScore += 25;
|
|
121
|
+
}
|
|
122
|
+
dimensions.domainSpecificity = Math.min(domainScore, 100);
|
|
123
|
+
|
|
124
|
+
// 5. Tool count
|
|
125
|
+
const toolCount = payload?.tools?.length ?? 0;
|
|
126
|
+
dimensions.toolCount = toolCount === 0 ? 0 : toolCount <= 3 ? 20 : toolCount <= 6 ? 40 : toolCount <= 10 ? 60 : toolCount <= 15 ? 80 : 100;
|
|
127
|
+
|
|
128
|
+
// 6. Tool complexity (weighted by tool types)
|
|
129
|
+
if (payload?.tools?.length > 0) {
|
|
130
|
+
const avgWeight = payload.tools.reduce((sum, t) => {
|
|
131
|
+
const name = t.name || t.function?.name || '';
|
|
132
|
+
return sum + (TOOL_COMPLEXITY_WEIGHTS[name] || TOOL_COMPLEXITY_WEIGHTS.default);
|
|
133
|
+
}, 0) / payload.tools.length;
|
|
134
|
+
dimensions.toolComplexity = avgWeight * 100;
|
|
135
|
+
} else {
|
|
136
|
+
dimensions.toolComplexity = 0;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// 7. Tool chain potential
|
|
140
|
+
dimensions.toolChainPotential = /\b(then|after|next|finally|first.*then)\b/i.test(content) ? 70 : 20;
|
|
141
|
+
|
|
142
|
+
// 8. Multi-step reasoning
|
|
143
|
+
dimensions.multiStepReasoning = ADVANCED_PATTERNS.reasoning.stepByStep.test(content) ? 80 :
|
|
144
|
+
ADVANCED_PATTERNS.reasoning.planning.test(content) ? 60 : 20;
|
|
145
|
+
|
|
146
|
+
// 9. Code generation
|
|
147
|
+
dimensions.codeGeneration = /\b(write|create|implement|build|generate)\s+(a\s+)?(function|class|module|api|endpoint)/i.test(content) ? 80 : 20;
|
|
148
|
+
|
|
149
|
+
// 10. Analysis depth
|
|
150
|
+
dimensions.analysisDepth = ADVANCED_PATTERNS.reasoning.tradeoffs.test(content) ? 80 :
|
|
151
|
+
ADVANCED_PATTERNS.reasoning.analysis.test(content) ? 60 : 20;
|
|
152
|
+
|
|
153
|
+
// 11. Conversation depth
|
|
154
|
+
const messageCount = payload?.messages?.length ?? 0;
|
|
155
|
+
dimensions.conversationDepth = messageCount < 3 ? 10 : messageCount < 6 ? 30 : messageCount < 10 ? 50 : 70;
|
|
156
|
+
|
|
157
|
+
// 12. Prior tool usage
|
|
158
|
+
const toolResults = (payload?.messages || []).filter(m =>
|
|
159
|
+
m.role === 'user' && Array.isArray(m.content) && m.content.some(c => c.type === 'tool_result')
|
|
160
|
+
).length;
|
|
161
|
+
dimensions.priorToolUsage = toolResults === 0 ? 10 : toolResults < 3 ? 40 : toolResults < 6 ? 60 : 80;
|
|
162
|
+
|
|
163
|
+
// 13. Ambiguity (inverse of specificity)
|
|
164
|
+
const hasSpecifics = /\b(file|function|line|error|bug|at\s+\w+:\d+)\b/i.test(content);
|
|
165
|
+
dimensions.ambiguity = hasSpecifics ? 20 : content.length < 50 ? 70 : 40;
|
|
166
|
+
|
|
167
|
+
// Calculate weighted total
|
|
168
|
+
let weightedTotal = 0;
|
|
169
|
+
for (const [dimension, weight] of Object.entries(DIMENSION_WEIGHTS)) {
|
|
170
|
+
weightedTotal += (dimensions[dimension] || 0) * weight;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
score: Math.round(weightedTotal),
|
|
175
|
+
dimensions,
|
|
176
|
+
weights: DIMENSION_WEIGHTS,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
**Modify `analyzeComplexity()` (line 386) to use weighted scoring when enabled:**
|
|
182
|
+
|
|
183
|
+
```javascript
|
|
184
|
+
function analyzeComplexity(payload) {
|
|
185
|
+
const content = extractContent(payload);
|
|
186
|
+
const useWeighted = config.routing?.weightedScoring ?? false;
|
|
187
|
+
|
|
188
|
+
if (useWeighted) {
|
|
189
|
+
const weighted = calculateWeightedScore(payload, content);
|
|
190
|
+
const threshold = getThreshold();
|
|
191
|
+
const recommendation = weighted.score >= threshold ? 'cloud' : 'local';
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
score: weighted.score,
|
|
195
|
+
threshold,
|
|
196
|
+
mode: 'weighted',
|
|
197
|
+
recommendation,
|
|
198
|
+
breakdown: weighted.dimensions,
|
|
199
|
+
weights: weighted.weights,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// ... existing logic unchanged
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Config Addition (`src/config/index.js`)
|
|
208
|
+
|
|
209
|
+
```javascript
|
|
210
|
+
routing: {
|
|
211
|
+
weightedScoring: process.env.ROUTING_WEIGHTED_SCORING === 'true',
|
|
212
|
+
},
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## Feature 2: Cost Optimization Routing
|
|
218
|
+
|
|
219
|
+
### Goal
|
|
220
|
+
Track per-model costs and make cost-aware routing decisions.
|
|
221
|
+
|
|
222
|
+
### Data Source: models.dev API (250+ Models)
|
|
223
|
+
|
|
224
|
+
Instead of maintaining a static config, we'll fetch from **https://models.dev/api.json** which provides:
|
|
225
|
+
|
|
226
|
+
- **250+ models** across 15+ providers
|
|
227
|
+
- **Real-time pricing** (input/output cost per 1M tokens)
|
|
228
|
+
- **Capabilities** (tool_call, reasoning, structured_output)
|
|
229
|
+
- **Context limits** and output limits
|
|
230
|
+
- **Knowledge cutoff dates**
|
|
231
|
+
|
|
232
|
+
### API Response Structure
|
|
233
|
+
|
|
234
|
+
```json
|
|
235
|
+
{
|
|
236
|
+
"provider_id": {
|
|
237
|
+
"id": "provider_id",
|
|
238
|
+
"name": "Provider Name",
|
|
239
|
+
"api": "https://api.provider.com/v1",
|
|
240
|
+
"models": {
|
|
241
|
+
"model-id": {
|
|
242
|
+
"id": "model-id",
|
|
243
|
+
"name": "Model Name",
|
|
244
|
+
"family": "model-family",
|
|
245
|
+
"cost": {
|
|
246
|
+
"input": 3.00, // $ per 1M tokens
|
|
247
|
+
"output": 15.00,
|
|
248
|
+
"cache_read": 0.30,
|
|
249
|
+
"cache_write": 3.75
|
|
250
|
+
},
|
|
251
|
+
"context": 200000,
|
|
252
|
+
"output": 8192,
|
|
253
|
+
"tool_call": true,
|
|
254
|
+
"reasoning": true,
|
|
255
|
+
"structured_output": true,
|
|
256
|
+
"input": ["text", "image", "pdf"],
|
|
257
|
+
"output": ["text"],
|
|
258
|
+
"knowledge": "2024-04"
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### Local Tier Config: `config/model-tiers.json`
|
|
266
|
+
|
|
267
|
+
We still need a local config for **tier mappings** (which models to use at each complexity level):
|
|
268
|
+
|
|
269
|
+
```json
|
|
270
|
+
{
|
|
271
|
+
"tiers": {
|
|
272
|
+
"SIMPLE": {
|
|
273
|
+
"description": "Greetings, simple Q&A, confirmations",
|
|
274
|
+
"range": [0, 25],
|
|
275
|
+
"preferred": {
|
|
276
|
+
"ollama": ["llama3.2", "gemma2", "phi3", "qwen2.5:7b"],
|
|
277
|
+
"openai": ["gpt-4o-mini"],
|
|
278
|
+
"anthropic": ["claude-3-haiku"],
|
|
279
|
+
"google": ["gemini-2.0-flash"],
|
|
280
|
+
"openrouter": ["deepseek/deepseek-chat", "google/gemini-flash-1.5"]
|
|
281
|
+
}
|
|
282
|
+
},
|
|
283
|
+
"MEDIUM": {
|
|
284
|
+
"description": "Code reading, simple edits, research",
|
|
285
|
+
"range": [26, 50],
|
|
286
|
+
"preferred": {
|
|
287
|
+
"ollama": ["qwen2.5:32b", "deepseek-coder:33b"],
|
|
288
|
+
"openai": ["gpt-4o"],
|
|
289
|
+
"anthropic": ["claude-sonnet-4-5"],
|
|
290
|
+
"google": ["gemini-1.5-pro"],
|
|
291
|
+
"openrouter": ["anthropic/claude-3.5-sonnet"]
|
|
292
|
+
}
|
|
293
|
+
},
|
|
294
|
+
"COMPLEX": {
|
|
295
|
+
"description": "Multi-file changes, debugging, architecture",
|
|
296
|
+
"range": [51, 75],
|
|
297
|
+
"preferred": {
|
|
298
|
+
"ollama": ["qwen2.5:72b", "llama3.1:70b"],
|
|
299
|
+
"openai": ["o1-mini", "o3-mini"],
|
|
300
|
+
"anthropic": ["claude-sonnet-4-5"],
|
|
301
|
+
"openrouter": ["meta-llama/llama-3.1-405b"]
|
|
302
|
+
}
|
|
303
|
+
},
|
|
304
|
+
"REASONING": {
|
|
305
|
+
"description": "Complex analysis, security audits, novel problems",
|
|
306
|
+
"range": [76, 100],
|
|
307
|
+
"preferred": {
|
|
308
|
+
"openai": ["o1", "o1-pro"],
|
|
309
|
+
"anthropic": ["claude-opus-4-5"],
|
|
310
|
+
"deepseek": ["deepseek-r1"],
|
|
311
|
+
"openrouter": ["anthropic/claude-3-opus", "deepseek/deepseek-reasoner"]
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
},
|
|
315
|
+
"local_models": {
|
|
316
|
+
"ollama": { "free": true, "default_tier": "SIMPLE" },
|
|
317
|
+
"llamacpp": { "free": true, "default_tier": "SIMPLE" },
|
|
318
|
+
"lmstudio": { "free": true, "default_tier": "SIMPLE" }
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### New File: `src/routing/model-registry.js`
|
|
324
|
+
|
|
325
|
+
```javascript
|
|
326
|
+
/**
|
|
327
|
+
* Model Registry
|
|
328
|
+
* Fetches model data from models.dev API and provides lookup
|
|
329
|
+
* Caches data locally with configurable refresh interval
|
|
330
|
+
*/
|
|
331
|
+
|
|
332
|
+
const fs = require('fs');
|
|
333
|
+
const path = require('path');
|
|
334
|
+
const logger = require('../logger');
|
|
335
|
+
const config = require('../config');
|
|
336
|
+
|
|
337
|
+
const API_URL = 'https://models.dev/api.json';
|
|
338
|
+
const CACHE_FILE = path.join(__dirname, '../../data/models-cache.json');
|
|
339
|
+
const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
340
|
+
|
|
341
|
+
class ModelRegistry {
|
|
342
|
+
constructor() {
|
|
343
|
+
this.providers = {}; // Raw API data by provider
|
|
344
|
+
this.modelIndex = new Map(); // model-id -> { provider, ...model }
|
|
345
|
+
this.tiers = {}; // Local tier config
|
|
346
|
+
this.loaded = false;
|
|
347
|
+
this.lastFetch = 0;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Initialize registry - fetch from API or load cache
|
|
352
|
+
*/
|
|
353
|
+
async initialize() {
|
|
354
|
+
if (this.loaded) return;
|
|
355
|
+
|
|
356
|
+
// Load local tier config
|
|
357
|
+
this._loadTierConfig();
|
|
358
|
+
|
|
359
|
+
// Try to load from cache first
|
|
360
|
+
if (this._loadFromCache()) {
|
|
361
|
+
this.loaded = true;
|
|
362
|
+
// Refresh in background if stale
|
|
363
|
+
if (Date.now() - this.lastFetch > CACHE_TTL_MS) {
|
|
364
|
+
this._fetchFromAPI().catch(err =>
|
|
365
|
+
logger.warn({ err: err.message }, '[ModelRegistry] Background refresh failed')
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Fetch from API
|
|
372
|
+
await this._fetchFromAPI();
|
|
373
|
+
this.loaded = true;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* Fetch fresh data from models.dev API
|
|
378
|
+
*/
|
|
379
|
+
async _fetchFromAPI() {
|
|
380
|
+
try {
|
|
381
|
+
const response = await fetch(API_URL, {
|
|
382
|
+
signal: AbortSignal.timeout(10000),
|
|
383
|
+
headers: { 'Accept': 'application/json' }
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
|
387
|
+
|
|
388
|
+
const data = await response.json();
|
|
389
|
+
this._processAPIData(data);
|
|
390
|
+
this._saveToCache(data);
|
|
391
|
+
this.lastFetch = Date.now();
|
|
392
|
+
|
|
393
|
+
logger.info({
|
|
394
|
+
providers: Object.keys(this.providers).length,
|
|
395
|
+
models: this.modelIndex.size
|
|
396
|
+
}, '[ModelRegistry] Loaded from API');
|
|
397
|
+
|
|
398
|
+
} catch (err) {
|
|
399
|
+
logger.error({ err: err.message }, '[ModelRegistry] API fetch failed');
|
|
400
|
+
// Fall back to cache or defaults
|
|
401
|
+
if (!this._loadFromCache()) {
|
|
402
|
+
this._loadDefaults();
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* Process API data into indexed format
|
|
409
|
+
*/
|
|
410
|
+
_processAPIData(data) {
|
|
411
|
+
this.providers = data;
|
|
412
|
+
this.modelIndex.clear();
|
|
413
|
+
|
|
414
|
+
for (const [providerId, providerData] of Object.entries(data)) {
|
|
415
|
+
if (!providerData.models) continue;
|
|
416
|
+
|
|
417
|
+
for (const [modelId, modelInfo] of Object.entries(providerData.models)) {
|
|
418
|
+
const fullId = `${providerId}/${modelId}`;
|
|
419
|
+
const entry = {
|
|
420
|
+
id: modelId,
|
|
421
|
+
fullId,
|
|
422
|
+
provider: providerId,
|
|
423
|
+
providerName: providerData.name,
|
|
424
|
+
...modelInfo,
|
|
425
|
+
// Normalize cost
|
|
426
|
+
cost: modelInfo.cost || { input: 0, output: 0 },
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
// Index by multiple keys for flexible lookup
|
|
430
|
+
this.modelIndex.set(modelId.toLowerCase(), entry);
|
|
431
|
+
this.modelIndex.set(fullId.toLowerCase(), entry);
|
|
432
|
+
if (modelInfo.name) {
|
|
433
|
+
this.modelIndex.set(modelInfo.name.toLowerCase(), entry);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Load local tier configuration
|
|
441
|
+
*/
|
|
442
|
+
_loadTierConfig() {
|
|
443
|
+
const tierPath = path.join(__dirname, '../../config/model-tiers.json');
|
|
444
|
+
try {
|
|
445
|
+
const data = JSON.parse(fs.readFileSync(tierPath, 'utf8'));
|
|
446
|
+
this.tiers = data.tiers || {};
|
|
447
|
+
this.localModels = data.local_models || {};
|
|
448
|
+
} catch (err) {
|
|
449
|
+
logger.warn({ err: err.message }, '[ModelRegistry] Using default tier config');
|
|
450
|
+
this._loadDefaultTiers();
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Get model info by name/id
|
|
456
|
+
*/
|
|
457
|
+
getModel(name) {
|
|
458
|
+
if (!name) return null;
|
|
459
|
+
return this.modelIndex.get(name.toLowerCase()) || null;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/**
|
|
463
|
+
* Get cost for a model
|
|
464
|
+
*/
|
|
465
|
+
getCost(modelName) {
|
|
466
|
+
const model = this.getModel(modelName);
|
|
467
|
+
if (!model) return { input: 1.0, output: 3.0, tier: 'MEDIUM' };
|
|
468
|
+
|
|
469
|
+
// Determine tier from capabilities
|
|
470
|
+
let tier = 'MEDIUM';
|
|
471
|
+
if (model.reasoning) tier = 'REASONING';
|
|
472
|
+
else if (model.cost?.input >= 10) tier = 'COMPLEX';
|
|
473
|
+
else if (model.cost?.input <= 0.5) tier = 'SIMPLE';
|
|
474
|
+
|
|
475
|
+
return {
|
|
476
|
+
input: model.cost?.input || 0,
|
|
477
|
+
output: model.cost?.output || 0,
|
|
478
|
+
cacheRead: model.cost?.cache_read,
|
|
479
|
+
cacheWrite: model.cost?.cache_write,
|
|
480
|
+
tier,
|
|
481
|
+
context: model.context,
|
|
482
|
+
toolCall: model.tool_call,
|
|
483
|
+
reasoning: model.reasoning,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Get preferred models for a tier and provider
|
|
489
|
+
*/
|
|
490
|
+
getTierModels(tier, provider) {
|
|
491
|
+
return this.tiers[tier]?.preferred?.[provider] || [];
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Get all providers that have models for a tier
|
|
496
|
+
*/
|
|
497
|
+
getProvidersForTier(tier) {
|
|
498
|
+
return Object.keys(this.tiers[tier]?.preferred || {});
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
/**
|
|
502
|
+
* Check if model supports tool calling
|
|
503
|
+
*/
|
|
504
|
+
supportsTools(modelName) {
|
|
505
|
+
const model = this.getModel(modelName);
|
|
506
|
+
return model?.tool_call === true;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
/**
|
|
510
|
+
* Check if model is free (local)
|
|
511
|
+
*/
|
|
512
|
+
isFree(modelName) {
|
|
513
|
+
const model = this.getModel(modelName);
|
|
514
|
+
if (!model) {
|
|
515
|
+
// Check local models config
|
|
516
|
+
const provider = modelName?.split('/')[0] || modelName;
|
|
517
|
+
return this.localModels[provider]?.free === true;
|
|
518
|
+
}
|
|
519
|
+
return model.cost?.input === 0 && model.cost?.output === 0;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Get all models matching criteria
|
|
524
|
+
*/
|
|
525
|
+
findModels(criteria = {}) {
|
|
526
|
+
const results = [];
|
|
527
|
+
for (const model of this.modelIndex.values()) {
|
|
528
|
+
if (criteria.toolCall && !model.tool_call) continue;
|
|
529
|
+
if (criteria.reasoning && !model.reasoning) continue;
|
|
530
|
+
if (criteria.maxInputCost && model.cost?.input > criteria.maxInputCost) continue;
|
|
531
|
+
if (criteria.minContext && model.context < criteria.minContext) continue;
|
|
532
|
+
results.push(model);
|
|
533
|
+
}
|
|
534
|
+
return results;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Get stats for metrics endpoint
|
|
539
|
+
*/
|
|
540
|
+
getStats() {
|
|
541
|
+
return {
|
|
542
|
+
totalModels: this.modelIndex.size,
|
|
543
|
+
providers: Object.keys(this.providers).length,
|
|
544
|
+
lastFetch: this.lastFetch,
|
|
545
|
+
cacheAge: Date.now() - this.lastFetch,
|
|
546
|
+
tiers: Object.keys(this.tiers),
|
|
547
|
+
};
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Cache management
|
|
551
|
+
_loadFromCache() {
|
|
552
|
+
try {
|
|
553
|
+
if (!fs.existsSync(CACHE_FILE)) return false;
|
|
554
|
+
const cache = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
|
|
555
|
+
this._processAPIData(cache.data);
|
|
556
|
+
this.lastFetch = cache.timestamp || 0;
|
|
557
|
+
logger.debug({ age: Date.now() - this.lastFetch }, '[ModelRegistry] Loaded from cache');
|
|
558
|
+
return true;
|
|
559
|
+
} catch (err) {
|
|
560
|
+
return false;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
_saveToCache(data) {
|
|
565
|
+
try {
|
|
566
|
+
const dir = path.dirname(CACHE_FILE);
|
|
567
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
568
|
+
fs.writeFileSync(CACHE_FILE, JSON.stringify({ data, timestamp: Date.now() }));
|
|
569
|
+
} catch (err) {
|
|
570
|
+
logger.warn({ err: err.message }, '[ModelRegistry] Cache save failed');
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
_loadDefaults() {
|
|
575
|
+
this.tiers = { SIMPLE: { preferred: { ollama: ['llama3.2'] } } };
|
|
576
|
+
this.localModels = { ollama: { free: true } };
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
_loadDefaultTiers() {
|
|
580
|
+
this.tiers = {
|
|
581
|
+
SIMPLE: { range: [0, 25], preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
|
|
582
|
+
MEDIUM: { range: [26, 50], preferred: { openai: ['gpt-4o'], anthropic: ['claude-sonnet-4-5'] } },
|
|
583
|
+
COMPLEX: { range: [51, 75], preferred: { openai: ['o1-mini'] } },
|
|
584
|
+
REASONING: { range: [76, 100], preferred: { openai: ['o1'], anthropic: ['claude-opus-4-5'] } },
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// Singleton with async init
|
|
590
|
+
let instance = null;
|
|
591
|
+
async function getModelRegistry() {
|
|
592
|
+
if (!instance) {
|
|
593
|
+
instance = new ModelRegistry();
|
|
594
|
+
await instance.initialize();
|
|
595
|
+
}
|
|
596
|
+
return instance;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// Sync getter for already-initialized registry
|
|
600
|
+
function getModelRegistrySync() {
|
|
601
|
+
if (!instance) {
|
|
602
|
+
instance = new ModelRegistry();
|
|
603
|
+
// Sync init - load from cache only
|
|
604
|
+
instance._loadTierConfig();
|
|
605
|
+
instance._loadFromCache() || instance._loadDefaults();
|
|
606
|
+
instance.loaded = true;
|
|
607
|
+
}
|
|
608
|
+
return instance;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
module.exports = { ModelRegistry, getModelRegistry, getModelRegistrySync };
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
---
|
|
615
|
+
|
|
616
|
+
## Feature 3: Agentic Workflow Detection
|
|
617
|
+
|
|
618
|
+
### Goal
|
|
619
|
+
Auto-detect when a request is part of an agentic workflow and route to capable models.
|
|
620
|
+
|
|
621
|
+
### New File: `src/routing/agentic-detector.js`
|
|
622
|
+
|
|
623
|
+
```javascript
|
|
624
|
+
/**
|
|
625
|
+
* Agentic Workflow Detector
|
|
626
|
+
* Detects multi-step tool chains and autonomous agent patterns
|
|
627
|
+
*/
|
|
628
|
+
|
|
629
|
+
const logger = require('../logger');
|
|
630
|
+
|
|
631
|
+
// Agent type classification
|
|
632
|
+
const AGENT_TYPES = {
|
|
633
|
+
SINGLE_SHOT: { minTier: 'SIMPLE', scoreBoost: 0 },
|
|
634
|
+
TOOL_CHAIN: { minTier: 'MEDIUM', scoreBoost: 15, requiresToolUse: true },
|
|
635
|
+
ITERATIVE: { minTier: 'COMPLEX', scoreBoost: 25, requiresToolUse: true },
|
|
636
|
+
AUTONOMOUS: { minTier: 'REASONING', scoreBoost: 35, requiresToolUse: true },
|
|
637
|
+
};
|
|
638
|
+
|
|
639
|
+
// Detection patterns
|
|
640
|
+
const PATTERNS = {
|
|
641
|
+
toolChain: /\b(then\s+use|after\s+that|next\s+step|finally|first.*then)\b/i,
|
|
642
|
+
iterative: /\b(keep\s+trying|until|repeat|loop|retry|iterate)\b/i,
|
|
643
|
+
autonomous: /\b(figure\s+out|solve|complete\s+the\s+task|do\s+whatever|make\s+it\s+work)\b/i,
|
|
644
|
+
multiFile: /\b(multiple\s+files?|across\s+(the\s+)?codebase|all\s+files?|refactor\s+entire)\b/i,
|
|
645
|
+
planning: /\b(plan|design|architect|strategy|roadmap)\b/i,
|
|
646
|
+
};
|
|
647
|
+
|
|
648
|
+
// High-complexity tools that indicate agentic work
|
|
649
|
+
const AGENTIC_TOOLS = new Set([
|
|
650
|
+
'Bash', 'bash', 'shell',
|
|
651
|
+
'Write', 'write_file', 'fs_write',
|
|
652
|
+
'Edit', 'edit_file', 'fs_edit', 'edit_patch',
|
|
653
|
+
'Task', 'agent_task', 'spawn_agent',
|
|
654
|
+
'Git', 'git_commit', 'git_push',
|
|
655
|
+
'Test', 'run_tests',
|
|
656
|
+
]);
|
|
657
|
+
|
|
658
|
+
class AgenticDetector {
|
|
659
|
+
detect(payload) {
|
|
660
|
+
const messages = payload?.messages || [];
|
|
661
|
+
const tools = payload?.tools || [];
|
|
662
|
+
const content = this._extractContent(messages);
|
|
663
|
+
|
|
664
|
+
let score = 0;
|
|
665
|
+
const signals = [];
|
|
666
|
+
|
|
667
|
+
// Signal 1: Tool count
|
|
668
|
+
if (tools.length > 5) {
|
|
669
|
+
score += 20;
|
|
670
|
+
signals.push({ signal: 'high_tool_count', value: tools.length, weight: 20 });
|
|
671
|
+
} else if (tools.length > 3) {
|
|
672
|
+
score += 10;
|
|
673
|
+
signals.push({ signal: 'moderate_tool_count', value: tools.length, weight: 10 });
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// Signal 2: Prior tool results
|
|
677
|
+
const toolResultCount = this._countToolResults(messages);
|
|
678
|
+
if (toolResultCount > 3) {
|
|
679
|
+
score += 25;
|
|
680
|
+
signals.push({ signal: 'many_tool_results', value: toolResultCount, weight: 25 });
|
|
681
|
+
} else if (toolResultCount > 0) {
|
|
682
|
+
score += 15;
|
|
683
|
+
signals.push({ signal: 'has_tool_results', value: toolResultCount, weight: 15 });
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
// Signal 3: Agentic tools present
|
|
687
|
+
const agenticToolCount = tools.filter(t => {
|
|
688
|
+
const name = t.name || t.function?.name || '';
|
|
689
|
+
return AGENTIC_TOOLS.has(name);
|
|
690
|
+
}).length;
|
|
691
|
+
if (agenticToolCount > 2) {
|
|
692
|
+
score += 20;
|
|
693
|
+
signals.push({ signal: 'agentic_tools', value: agenticToolCount, weight: 20 });
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// Signal 4: Pattern matching
|
|
697
|
+
if (PATTERNS.autonomous.test(content)) {
|
|
698
|
+
score += 25;
|
|
699
|
+
signals.push({ signal: 'autonomous_pattern', weight: 25 });
|
|
700
|
+
} else if (PATTERNS.iterative.test(content)) {
|
|
701
|
+
score += 20;
|
|
702
|
+
signals.push({ signal: 'iterative_pattern', weight: 20 });
|
|
703
|
+
} else if (PATTERNS.toolChain.test(content)) {
|
|
704
|
+
score += 15;
|
|
705
|
+
signals.push({ signal: 'tool_chain_pattern', weight: 15 });
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
if (PATTERNS.multiFile.test(content)) {
|
|
709
|
+
score += 15;
|
|
710
|
+
signals.push({ signal: 'multi_file', weight: 15 });
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
if (PATTERNS.planning.test(content)) {
|
|
714
|
+
score += 10;
|
|
715
|
+
signals.push({ signal: 'planning', weight: 10 });
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
// Signal 5: Conversation depth
|
|
719
|
+
if (messages.length > 10) {
|
|
720
|
+
score += 15;
|
|
721
|
+
signals.push({ signal: 'deep_conversation', value: messages.length, weight: 15 });
|
|
722
|
+
} else if (messages.length > 5) {
|
|
723
|
+
score += 8;
|
|
724
|
+
signals.push({ signal: 'ongoing_conversation', value: messages.length, weight: 8 });
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
const agentType = this._classifyAgentType(score);
|
|
728
|
+
const isAgentic = score >= 30;
|
|
729
|
+
|
|
730
|
+
return {
|
|
731
|
+
isAgentic,
|
|
732
|
+
agentType,
|
|
733
|
+
confidence: Math.min(score / 100, 1),
|
|
734
|
+
score,
|
|
735
|
+
signals,
|
|
736
|
+
minTier: AGENT_TYPES[agentType].minTier,
|
|
737
|
+
scoreBoost: AGENT_TYPES[agentType].scoreBoost,
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
_classifyAgentType(score) {
|
|
742
|
+
if (score >= 60) return 'AUTONOMOUS';
|
|
743
|
+
if (score >= 40) return 'ITERATIVE';
|
|
744
|
+
if (score >= 25) return 'TOOL_CHAIN';
|
|
745
|
+
return 'SINGLE_SHOT';
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
_extractContent(messages) {
|
|
749
|
+
const userMsgs = messages.filter(m => m?.role === 'user');
|
|
750
|
+
if (userMsgs.length === 0) return '';
|
|
751
|
+
const last = userMsgs[userMsgs.length - 1];
|
|
752
|
+
if (typeof last.content === 'string') return last.content;
|
|
753
|
+
if (Array.isArray(last.content)) {
|
|
754
|
+
return last.content.filter(b => b?.type === 'text').map(b => b.text || '').join(' ');
|
|
755
|
+
}
|
|
756
|
+
return '';
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
_countToolResults(messages) {
|
|
760
|
+
return messages.reduce((count, m) => {
|
|
761
|
+
if (m?.role === 'user' && Array.isArray(m.content)) {
|
|
762
|
+
return count + m.content.filter(c => c?.type === 'tool_result').length;
|
|
763
|
+
}
|
|
764
|
+
return count;
|
|
765
|
+
}, 0);
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
let instance = null;
|
|
770
|
+
function getAgenticDetector() {
|
|
771
|
+
if (!instance) instance = new AgenticDetector();
|
|
772
|
+
return instance;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
module.exports = { AgenticDetector, getAgenticDetector, AGENT_TYPES };
|
|
776
|
+
```
|
|
777
|
+
|
|
778
|
+
---
|
|
779
|
+
|
|
780
|
+
## Feature 4: Multi-Tier Model Mapping
|
|
781
|
+
|
|
782
|
+
### New File: `src/routing/model-tiers.js`
|
|
783
|
+
|
|
784
|
+
```javascript
|
|
785
|
+
/**
|
|
786
|
+
* Model Tier Selector
|
|
787
|
+
* Maps complexity scores to appropriate models per provider
|
|
788
|
+
*/
|
|
789
|
+
|
|
790
|
+
const logger = require('../logger');
|
|
791
|
+
const config = require('../config');
|
|
792
|
+
const { getModelRegistry } = require('./model-registry');
|
|
793
|
+
|
|
794
|
+
const TIER_DEFINITIONS = {
|
|
795
|
+
SIMPLE: { description: 'Greetings, simple Q&A', range: [0, 25], priority: 1 },
|
|
796
|
+
MEDIUM: { description: 'Code reading, simple edits', range: [26, 50], priority: 2 },
|
|
797
|
+
COMPLEX: { description: 'Multi-file changes, debugging', range: [51, 75], priority: 3 },
|
|
798
|
+
REASONING: { description: 'Complex analysis, security audits', range: [76, 100], priority: 4 },
|
|
799
|
+
};
|
|
800
|
+
|
|
801
|
+
class ModelTierSelector {
|
|
802
|
+
constructor() {
|
|
803
|
+
this.registry = getModelRegistry();
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
getTier(complexityScore) {
|
|
807
|
+
for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
|
|
808
|
+
if (complexityScore >= def.range[0] && complexityScore <= def.range[1]) {
|
|
809
|
+
return tier;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
return complexityScore > 75 ? 'REASONING' : 'SIMPLE';
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
selectModel(tier, provider) {
|
|
816
|
+
const override = config.modelTiers?.overrides?.[tier];
|
|
817
|
+
if (override) return { model: override, source: 'override' };
|
|
818
|
+
|
|
819
|
+
const providerModels = this.registry.getTierModels(tier, provider);
|
|
820
|
+
if (!providerModels?.length) {
|
|
821
|
+
const fallback = this.getFallbackModel(tier, provider);
|
|
822
|
+
if (fallback) return { model: fallback.model, source: 'fallback', actualTier: fallback.tier };
|
|
823
|
+
return { model: null, error: `No models for ${provider} at tier ${tier}` };
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
return { model: providerModels[0], source: 'tier_mapping', tier };
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
getFallbackModel(requestedTier, provider) {
|
|
830
|
+
const tierOrder = ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'];
|
|
831
|
+
const startIndex = tierOrder.indexOf(requestedTier);
|
|
832
|
+
|
|
833
|
+
for (let i = startIndex + 1; i < tierOrder.length; i++) {
|
|
834
|
+
const models = this.registry.getTierModels(tierOrder[i], provider);
|
|
835
|
+
if (models?.length) return { model: models[0], tier: tierOrder[i] };
|
|
836
|
+
}
|
|
837
|
+
return null;
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
getTierStats() {
|
|
841
|
+
const modelCounts = {};
|
|
842
|
+
for (const tier of Object.keys(TIER_DEFINITIONS)) {
|
|
843
|
+
modelCounts[tier] = {};
|
|
844
|
+
for (const provider of this.registry.getProvidersForTier(tier)) {
|
|
845
|
+
modelCounts[tier][provider] = this.registry.getTierModels(tier, provider).length;
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
return { tiers: TIER_DEFINITIONS, modelCounts };
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
let instance = null;
|
|
853
|
+
function getModelTierSelector() {
|
|
854
|
+
if (!instance) instance = new ModelTierSelector();
|
|
855
|
+
return instance;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
module.exports = { ModelTierSelector, getModelTierSelector, TIER_DEFINITIONS };
|
|
859
|
+
```
|
|
860
|
+
|
|
861
|
+
---
|
|
862
|
+
|
|
863
|
+
## Files Summary
|
|
864
|
+
|
|
865
|
+
### Files to Create (5 files)
|
|
866
|
+
|
|
867
|
+
| File | Lines | Purpose |
|
|
868
|
+
|------|-------|---------|
|
|
869
|
+
| `config/model-tiers.json` | ~60 | Local tier preferences |
|
|
870
|
+
| `src/routing/model-registry.js` | ~220 | Fetches from models.dev API |
|
|
871
|
+
| `src/routing/model-tiers.js` | ~80 | Tier selection |
|
|
872
|
+
| `src/routing/cost-optimizer.js` | ~150 | Cost tracking |
|
|
873
|
+
| `src/routing/agentic-detector.js` | ~170 | Workflow detection |
|
|
874
|
+
|
|
875
|
+
### Files to Modify
|
|
876
|
+
|
|
877
|
+
| File | Changes |
|
|
878
|
+
|------|---------|
|
|
879
|
+
| `src/routing/complexity-analyzer.js` | Add weighted scoring (~80 lines) |
|
|
880
|
+
| `src/routing/index.js` | Integrate all modules (~50 lines) |
|
|
881
|
+
| `src/config/index.js` | Add config options (~25 lines) |
|
|
882
|
+
| `src/api/router.js` | Add 4 endpoints (~40 lines) |
|
|
883
|
+
|
|
884
|
+
---
|
|
885
|
+
|
|
886
|
+
## Configuration
|
|
887
|
+
|
|
888
|
+
```bash
|
|
889
|
+
# Intelligent Routing
|
|
890
|
+
ROUTING_WEIGHTED_SCORING=false
|
|
891
|
+
ROUTING_COST_OPTIMIZATION=false
|
|
892
|
+
ROUTING_AGENTIC_DETECTION=true
|
|
893
|
+
COST_BUDGET_DAILY_USD=0
|
|
894
|
+
COST_BUDGET_SESSION_USD=0
|
|
895
|
+
|
|
896
|
+
# Model Tiers
|
|
897
|
+
MODEL_TIER_ENABLED=false
|
|
898
|
+
MODEL_TIER_OVERRIDE_SIMPLE=gpt-4o-mini
|
|
899
|
+
MODEL_TIER_OVERRIDE_REASONING=o1
|
|
900
|
+
```
|
|
901
|
+
|
|
902
|
+
---
|
|
903
|
+
|
|
904
|
+
## API Endpoints
|
|
905
|
+
|
|
906
|
+
- `GET /routing/models` - Model registry stats (from models.dev)
|
|
907
|
+
- `GET /routing/models/:model` - Specific model info
|
|
908
|
+
- `GET /routing/tiers` - Tier definitions and counts
|
|
909
|
+
- `GET /metrics/cost-optimization` - Cost stats
|
|
910
|
+
- `POST /routing/analyze` - Test request analysis
|