lynkr 7.2.4 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +2 -2
  2. package/config/model-tiers.json +89 -0
  3. package/docs/docs.html +1 -0
  4. package/docs/index.md +7 -0
  5. package/docs/toon-integration-spec.md +130 -0
  6. package/documentation/README.md +3 -2
  7. package/documentation/claude-code-cli.md +23 -16
  8. package/documentation/cursor-integration.md +17 -14
  9. package/documentation/docker.md +11 -4
  10. package/documentation/embeddings.md +7 -5
  11. package/documentation/faq.md +66 -12
  12. package/documentation/features.md +22 -15
  13. package/documentation/installation.md +66 -14
  14. package/documentation/production.md +43 -8
  15. package/documentation/providers.md +145 -42
  16. package/documentation/routing.md +476 -0
  17. package/documentation/token-optimization.md +7 -5
  18. package/documentation/troubleshooting.md +81 -5
  19. package/install.sh +6 -1
  20. package/package.json +5 -3
  21. package/scripts/setup.js +0 -1
  22. package/src/agents/executor.js +14 -6
  23. package/src/api/middleware/session.js +15 -2
  24. package/src/api/openai-router.js +130 -37
  25. package/src/api/providers-handler.js +15 -1
  26. package/src/api/router.js +107 -2
  27. package/src/budget/index.js +4 -3
  28. package/src/clients/databricks.js +431 -234
  29. package/src/clients/gpt-utils.js +181 -0
  30. package/src/clients/ollama-utils.js +66 -140
  31. package/src/clients/routing.js +0 -1
  32. package/src/clients/standard-tools.js +82 -5
  33. package/src/config/index.js +119 -35
  34. package/src/context/toon.js +173 -0
  35. package/src/headroom/launcher.js +8 -3
  36. package/src/logger/index.js +23 -0
  37. package/src/orchestrator/index.js +765 -212
  38. package/src/routing/agentic-detector.js +320 -0
  39. package/src/routing/complexity-analyzer.js +202 -2
  40. package/src/routing/cost-optimizer.js +305 -0
  41. package/src/routing/index.js +168 -159
  42. package/src/routing/model-registry.js +437 -0
  43. package/src/routing/model-tiers.js +365 -0
  44. package/src/server.js +2 -2
  45. package/src/sessions/cleanup.js +3 -3
  46. package/src/sessions/record.js +10 -1
  47. package/src/sessions/store.js +7 -2
  48. package/src/tools/agent-task.js +48 -1
  49. package/src/tools/index.js +15 -2
  50. package/src/tools/workspace.js +35 -4
  51. package/src/workspace/index.js +30 -0
  52. package/te +11622 -0
  53. package/test/README.md +1 -1
  54. package/test/azure-openai-config.test.js +17 -8
  55. package/test/azure-openai-integration.test.js +7 -1
  56. package/test/azure-openai-routing.test.js +41 -43
  57. package/test/bedrock-integration.test.js +18 -32
  58. package/test/hybrid-routing-integration.test.js +35 -20
  59. package/test/hybrid-routing-performance.test.js +74 -64
  60. package/test/llamacpp-integration.test.js +28 -9
  61. package/test/lmstudio-integration.test.js +20 -8
  62. package/test/openai-integration.test.js +17 -20
  63. package/test/performance-tests.js +1 -1
  64. package/test/routing.test.js +65 -59
  65. package/test/toon-compression.test.js +131 -0
  66. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  67. package/ROUTER_COMPARISON.md +0 -173
  68. package/TIER_ROUTING_PLAN.md +0 -771
@@ -0,0 +1,437 @@
1
+ /**
2
+ * Model Registry
3
+ * Multi-source pricing: LiteLLM -> models.dev -> Databricks fallback
4
+ * Caches data locally with 24h TTL
5
+ */
6
+
7
+ const fs = require('fs');
8
+ const path = require('path');
9
+ const logger = require('../logger');
10
+
11
+ // API URLs
12
+ const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
13
+ const MODELS_DEV_URL = 'https://models.dev/api.json';
14
+
15
+ // Cache settings
16
+ const CACHE_FILE = path.join(__dirname, '../../data/model-prices-cache.json');
17
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
18
+
19
+ // Databricks fallback pricing (based on Anthropic direct API prices)
20
+ const DATABRICKS_FALLBACK = {
21
+ // Claude models
22
+ 'databricks-claude-opus-4-6': { input: 5.0, output: 25.0, context: 1000000 },
23
+ 'databricks-claude-opus-4-5': { input: 5.0, output: 25.0, context: 200000 },
24
+ 'databricks-claude-opus-4-1': { input: 15.0, output: 75.0, context: 200000 },
25
+ 'databricks-claude-sonnet-4-5': { input: 3.0, output: 15.0, context: 200000 },
26
+ 'databricks-claude-sonnet-4': { input: 3.0, output: 15.0, context: 200000 },
27
+ 'databricks-claude-3-7-sonnet': { input: 3.0, output: 15.0, context: 200000 },
28
+ 'databricks-claude-haiku-4-5': { input: 1.0, output: 5.0, context: 200000 },
29
+
30
+ // Llama models
31
+ 'databricks-llama-4-maverick': { input: 1.0, output: 1.0, context: 128000 },
32
+ 'databricks-meta-llama-3-3-70b-instruct': { input: 0.9, output: 0.9, context: 128000 },
33
+ 'databricks-meta-llama-3-1-405b-instruct': { input: 2.0, output: 2.0, context: 128000 },
34
+ 'databricks-meta-llama-3-1-8b-instruct': { input: 0.2, output: 0.2, context: 128000 },
35
+
36
+ // GPT models via Databricks
37
+ 'databricks-gpt-5-2': { input: 5.0, output: 15.0, context: 200000 },
38
+ 'databricks-gpt-5-1': { input: 3.0, output: 12.0, context: 200000 },
39
+ 'databricks-gpt-5': { input: 2.5, output: 10.0, context: 128000 },
40
+ 'databricks-gpt-5-mini': { input: 0.5, output: 1.5, context: 128000 },
41
+ 'databricks-gpt-5-nano': { input: 0.15, output: 0.6, context: 128000 },
42
+
43
+ // Gemini models via Databricks
44
+ 'databricks-gemini-3-flash': { input: 0.075, output: 0.3, context: 1000000 },
45
+ 'databricks-gemini-3-pro': { input: 1.25, output: 5.0, context: 2000000 },
46
+ 'databricks-gemini-2-5-pro': { input: 1.25, output: 5.0, context: 1000000 },
47
+ 'databricks-gemini-2-5-flash': { input: 0.075, output: 0.3, context: 1000000 },
48
+
49
+ // DBRX
50
+ 'databricks-dbrx-instruct': { input: 0.75, output: 2.25, context: 32000 },
51
+
52
+ // Embedding models (price per 1M tokens)
53
+ 'databricks-gte-large-en': { input: 0.02, output: 0, context: 8192 },
54
+ 'databricks-bge-large-en': { input: 0.02, output: 0, context: 512 },
55
+ };
56
+
57
+ // Default cost for unknown models
58
+ const DEFAULT_COST = { input: 1.0, output: 3.0, context: 128000 };
59
+
60
+ class ModelRegistry {
61
+ constructor() {
62
+ this.litellmPrices = {};
63
+ this.modelsDevPrices = {};
64
+ this.loaded = false;
65
+ this.lastFetch = 0;
66
+ this.modelIndex = new Map();
67
+ }
68
+
69
+ /**
70
+ * Initialize registry - load from cache or fetch fresh data
71
+ */
72
+ async initialize() {
73
+ if (this.loaded) return;
74
+
75
+ // Try cache first
76
+ if (this._loadFromCache()) {
77
+ this.loaded = true;
78
+ // Background refresh if stale
79
+ if (Date.now() - this.lastFetch > CACHE_TTL_MS) {
80
+ this._fetchAll().catch(err =>
81
+ logger.warn({ err: err.message }, '[ModelRegistry] Background refresh failed')
82
+ );
83
+ }
84
+ return;
85
+ }
86
+
87
+ // Fetch fresh data
88
+ await this._fetchAll();
89
+ this.loaded = true;
90
+ }
91
+
92
+ /**
93
+ * Fetch from both sources
94
+ */
95
+ async _fetchAll() {
96
+ const results = await Promise.allSettled([
97
+ this._fetchLiteLLM(),
98
+ this._fetchModelsDev(),
99
+ ]);
100
+
101
+ const litellmOk = results[0].status === 'fulfilled';
102
+ const modelsDevOk = results[1].status === 'fulfilled';
103
+
104
+ if (litellmOk || modelsDevOk) {
105
+ this._buildIndex();
106
+ this._saveToCache();
107
+ this.lastFetch = Date.now();
108
+
109
+ logger.info({
110
+ litellm: litellmOk ? Object.keys(this.litellmPrices).length : 0,
111
+ modelsDev: modelsDevOk ? Object.keys(this.modelsDevPrices).length : 0,
112
+ total: this.modelIndex.size,
113
+ }, '[ModelRegistry] Loaded pricing data');
114
+ } else {
115
+ logger.warn('[ModelRegistry] All sources failed, using Databricks fallback only');
116
+ }
117
+ }
118
+
119
+ /**
120
+ * Fetch LiteLLM pricing
121
+ */
122
+ async _fetchLiteLLM() {
123
+ try {
124
+ const response = await fetch(LITELLM_URL, {
125
+ signal: AbortSignal.timeout(15000),
126
+ headers: { 'Accept': 'application/json' },
127
+ });
128
+
129
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
130
+
131
+ const data = await response.json();
132
+ this.litellmPrices = this._processLiteLLM(data);
133
+
134
+ logger.debug({ count: Object.keys(this.litellmPrices).length }, '[ModelRegistry] LiteLLM loaded');
135
+ } catch (err) {
136
+ logger.warn({ err: err.message }, '[ModelRegistry] LiteLLM fetch failed');
137
+ throw err;
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Process LiteLLM format into our format
143
+ * LiteLLM uses cost per token, we use cost per 1M tokens
144
+ */
145
+ _processLiteLLM(data) {
146
+ const prices = {};
147
+
148
+ for (const [modelId, info] of Object.entries(data)) {
149
+ if (!info || typeof info !== 'object') continue;
150
+
151
+ // Convert per-token to per-million-tokens
152
+ const inputCost = (info.input_cost_per_token || 0) * 1_000_000;
153
+ const outputCost = (info.output_cost_per_token || 0) * 1_000_000;
154
+
155
+ prices[modelId.toLowerCase()] = {
156
+ input: inputCost,
157
+ output: outputCost,
158
+ context: info.max_input_tokens || info.max_tokens || 128000,
159
+ maxOutput: info.max_output_tokens || 4096,
160
+ toolCall: info.supports_function_calling ?? true,
161
+ vision: info.supports_vision ?? false,
162
+ source: 'litellm',
163
+ };
164
+
165
+ // Also index without provider prefix for flexible lookup
166
+ const shortName = modelId.split('/').pop().toLowerCase();
167
+ if (shortName !== modelId.toLowerCase()) {
168
+ prices[shortName] = prices[modelId.toLowerCase()];
169
+ }
170
+ }
171
+
172
+ return prices;
173
+ }
174
+
175
+ /**
176
+ * Fetch models.dev pricing
177
+ */
178
+ async _fetchModelsDev() {
179
+ try {
180
+ const response = await fetch(MODELS_DEV_URL, {
181
+ signal: AbortSignal.timeout(15000),
182
+ headers: { 'Accept': 'application/json' },
183
+ });
184
+
185
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
186
+
187
+ const data = await response.json();
188
+ this.modelsDevPrices = this._processModelsDev(data);
189
+
190
+ logger.debug({ count: Object.keys(this.modelsDevPrices).length }, '[ModelRegistry] models.dev loaded');
191
+ } catch (err) {
192
+ logger.warn({ err: err.message }, '[ModelRegistry] models.dev fetch failed');
193
+ throw err;
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Process models.dev format into our format
199
+ */
200
+ _processModelsDev(data) {
201
+ const prices = {};
202
+
203
+ for (const [providerId, providerData] of Object.entries(data)) {
204
+ if (!providerData?.models) continue;
205
+
206
+ for (const [modelId, info] of Object.entries(providerData.models)) {
207
+ const fullId = `${providerId}/${modelId}`.toLowerCase();
208
+
209
+ prices[fullId] = {
210
+ input: info.cost?.input || 0,
211
+ output: info.cost?.output || 0,
212
+ cacheRead: info.cost?.cache_read,
213
+ cacheWrite: info.cost?.cache_write,
214
+ context: info.context || 128000,
215
+ maxOutput: info.output || 4096,
216
+ toolCall: info.tool_call ?? false,
217
+ reasoning: info.reasoning ?? false,
218
+ vision: Array.isArray(info.input) && info.input.includes('image'),
219
+ source: 'models.dev',
220
+ };
221
+
222
+ // Also index by short name
223
+ prices[modelId.toLowerCase()] = prices[fullId];
224
+ }
225
+ }
226
+
227
+ return prices;
228
+ }
229
+
230
+ /**
231
+ * Build unified index from all sources
232
+ */
233
+ _buildIndex() {
234
+ this.modelIndex.clear();
235
+
236
+ // Add Databricks fallback first (lowest priority)
237
+ for (const [modelId, info] of Object.entries(DATABRICKS_FALLBACK)) {
238
+ this.modelIndex.set(modelId.toLowerCase(), { ...info, source: 'databricks-fallback' });
239
+ }
240
+
241
+ // Add models.dev (medium priority)
242
+ for (const [modelId, info] of Object.entries(this.modelsDevPrices)) {
243
+ this.modelIndex.set(modelId, info);
244
+ }
245
+
246
+ // Add LiteLLM (highest priority)
247
+ for (const [modelId, info] of Object.entries(this.litellmPrices)) {
248
+ this.modelIndex.set(modelId, info);
249
+ }
250
+ }
251
+
252
+ /**
253
+ * Get cost for a model
254
+ * @param {string} modelName - Model name/ID
255
+ * @returns {Object} Cost info { input, output, context, ... }
256
+ */
257
+ getCost(modelName) {
258
+ if (!modelName) return { ...DEFAULT_COST, source: 'default' };
259
+
260
+ const normalizedName = modelName.toLowerCase();
261
+
262
+ // Direct lookup
263
+ if (this.modelIndex.has(normalizedName)) {
264
+ return this.modelIndex.get(normalizedName);
265
+ }
266
+
267
+ // Try common variations
268
+ const variations = [
269
+ normalizedName,
270
+ normalizedName.replace('databricks-', ''),
271
+ normalizedName.replace('azure/', ''),
272
+ normalizedName.replace('bedrock/', ''),
273
+ normalizedName.replace('anthropic.', ''),
274
+ normalizedName.split('/').pop(),
275
+ ];
276
+
277
+ for (const variant of variations) {
278
+ if (this.modelIndex.has(variant)) {
279
+ return this.modelIndex.get(variant);
280
+ }
281
+ }
282
+
283
+ // Fuzzy match for partial names
284
+ for (const [key, value] of this.modelIndex.entries()) {
285
+ if (key.includes(normalizedName) || normalizedName.includes(key)) {
286
+ return value;
287
+ }
288
+ }
289
+
290
+ logger.debug({ model: modelName }, '[ModelRegistry] Model not found, using default');
291
+ return { ...DEFAULT_COST, source: 'default' };
292
+ }
293
+
294
+ /**
295
+ * Get model info by name
296
+ */
297
+ getModel(modelName) {
298
+ return this.getCost(modelName);
299
+ }
300
+
301
+ /**
302
+ * Check if model is free (local)
303
+ */
304
+ isFree(modelName) {
305
+ const cost = this.getCost(modelName);
306
+ return cost.input === 0 && cost.output === 0;
307
+ }
308
+
309
+ /**
310
+ * Check if model supports tool calling
311
+ */
312
+ supportsTools(modelName) {
313
+ const model = this.getCost(modelName);
314
+ return model.toolCall === true;
315
+ }
316
+
317
+ /**
318
+ * Find models matching criteria
319
+ */
320
+ findModels(criteria = {}) {
321
+ const results = [];
322
+
323
+ for (const [modelId, info] of this.modelIndex.entries()) {
324
+ if (criteria.maxInputCost && info.input > criteria.maxInputCost) continue;
325
+ if (criteria.minContext && info.context < criteria.minContext) continue;
326
+ if (criteria.toolCall && !info.toolCall) continue;
327
+ if (criteria.reasoning && !info.reasoning) continue;
328
+ if (criteria.vision && !info.vision) continue;
329
+
330
+ results.push({ modelId, ...info });
331
+ }
332
+
333
+ // Sort by input cost ascending
334
+ return results.sort((a, b) => a.input - b.input);
335
+ }
336
+
337
+ /**
338
+ * Get stats for metrics endpoint
339
+ */
340
+ getStats() {
341
+ const sources = { litellm: 0, 'models.dev': 0, 'databricks-fallback': 0, default: 0 };
342
+
343
+ for (const info of this.modelIndex.values()) {
344
+ const source = info.source || 'default';
345
+ sources[source] = (sources[source] || 0) + 1;
346
+ }
347
+
348
+ return {
349
+ totalModels: this.modelIndex.size,
350
+ bySource: sources,
351
+ lastFetch: this.lastFetch,
352
+ cacheAge: this.lastFetch ? Date.now() - this.lastFetch : null,
353
+ cacheTTL: CACHE_TTL_MS,
354
+ };
355
+ }
356
+
357
+ /**
358
+ * Force refresh from APIs
359
+ */
360
+ async refresh() {
361
+ await this._fetchAll();
362
+ }
363
+
364
+ // Cache management
365
+ _loadFromCache() {
366
+ try {
367
+ if (!fs.existsSync(CACHE_FILE)) return false;
368
+
369
+ const cache = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
370
+ this.litellmPrices = cache.litellm || {};
371
+ this.modelsDevPrices = cache.modelsDev || {};
372
+ this.lastFetch = cache.timestamp || 0;
373
+
374
+ this._buildIndex();
375
+
376
+ logger.debug({
377
+ age: Math.round((Date.now() - this.lastFetch) / 60000) + 'min',
378
+ models: this.modelIndex.size,
379
+ }, '[ModelRegistry] Loaded from cache');
380
+
381
+ return true;
382
+ } catch (err) {
383
+ logger.debug({ err: err.message }, '[ModelRegistry] Cache load failed');
384
+ return false;
385
+ }
386
+ }
387
+
388
+ _saveToCache() {
389
+ try {
390
+ const dir = path.dirname(CACHE_FILE);
391
+ if (!fs.existsSync(dir)) {
392
+ fs.mkdirSync(dir, { recursive: true });
393
+ }
394
+
395
+ const cache = {
396
+ litellm: this.litellmPrices,
397
+ modelsDev: this.modelsDevPrices,
398
+ timestamp: Date.now(),
399
+ };
400
+
401
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2));
402
+ logger.debug('[ModelRegistry] Cache saved');
403
+ } catch (err) {
404
+ logger.warn({ err: err.message }, '[ModelRegistry] Cache save failed');
405
+ }
406
+ }
407
+ }
408
+
409
+ // Singleton with lazy initialization
410
+ let instance = null;
411
+
412
+ async function getModelRegistry() {
413
+ if (!instance) {
414
+ instance = new ModelRegistry();
415
+ await instance.initialize();
416
+ }
417
+ return instance;
418
+ }
419
+
420
+ // Sync getter (uses cache only, no network)
421
+ function getModelRegistrySync() {
422
+ if (!instance) {
423
+ instance = new ModelRegistry();
424
+ instance._loadFromCache();
425
+ instance._buildIndex();
426
+ instance.loaded = true;
427
+ }
428
+ return instance;
429
+ }
430
+
431
+ module.exports = {
432
+ ModelRegistry,
433
+ getModelRegistry,
434
+ getModelRegistrySync,
435
+ DATABRICKS_FALLBACK,
436
+ DEFAULT_COST,
437
+ };