lynkr 7.2.5 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +2 -2
  2. package/config/model-tiers.json +89 -0
  3. package/docs/docs.html +1 -0
  4. package/docs/index.md +7 -0
  5. package/docs/toon-integration-spec.md +130 -0
  6. package/documentation/README.md +3 -2
  7. package/documentation/claude-code-cli.md +23 -16
  8. package/documentation/cursor-integration.md +17 -14
  9. package/documentation/docker.md +11 -4
  10. package/documentation/embeddings.md +7 -5
  11. package/documentation/faq.md +66 -12
  12. package/documentation/features.md +22 -15
  13. package/documentation/installation.md +66 -14
  14. package/documentation/production.md +43 -8
  15. package/documentation/providers.md +145 -42
  16. package/documentation/routing.md +476 -0
  17. package/documentation/token-optimization.md +7 -5
  18. package/documentation/troubleshooting.md +81 -5
  19. package/install.sh +6 -1
  20. package/package.json +4 -2
  21. package/scripts/setup.js +0 -1
  22. package/src/agents/executor.js +14 -6
  23. package/src/api/middleware/session.js +15 -2
  24. package/src/api/openai-router.js +130 -37
  25. package/src/api/providers-handler.js +15 -1
  26. package/src/api/router.js +107 -2
  27. package/src/budget/index.js +4 -3
  28. package/src/clients/databricks.js +431 -234
  29. package/src/clients/gpt-utils.js +181 -0
  30. package/src/clients/ollama-utils.js +66 -140
  31. package/src/clients/routing.js +0 -1
  32. package/src/clients/standard-tools.js +76 -3
  33. package/src/config/index.js +113 -35
  34. package/src/context/toon.js +173 -0
  35. package/src/logger/index.js +23 -0
  36. package/src/orchestrator/index.js +686 -211
  37. package/src/routing/agentic-detector.js +320 -0
  38. package/src/routing/complexity-analyzer.js +202 -2
  39. package/src/routing/cost-optimizer.js +305 -0
  40. package/src/routing/index.js +168 -159
  41. package/src/routing/model-tiers.js +365 -0
  42. package/src/server.js +2 -2
  43. package/src/sessions/cleanup.js +3 -3
  44. package/src/sessions/record.js +10 -1
  45. package/src/sessions/store.js +7 -2
  46. package/src/tools/agent-task.js +48 -1
  47. package/src/tools/index.js +15 -2
  48. package/te +11622 -0
  49. package/test/README.md +1 -1
  50. package/test/azure-openai-config.test.js +17 -8
  51. package/test/azure-openai-integration.test.js +7 -1
  52. package/test/azure-openai-routing.test.js +41 -43
  53. package/test/bedrock-integration.test.js +18 -32
  54. package/test/hybrid-routing-integration.test.js +35 -20
  55. package/test/hybrid-routing-performance.test.js +74 -64
  56. package/test/llamacpp-integration.test.js +28 -9
  57. package/test/lmstudio-integration.test.js +20 -8
  58. package/test/openai-integration.test.js +17 -20
  59. package/test/performance-tests.js +1 -1
  60. package/test/routing.test.js +65 -59
  61. package/test/toon-compression.test.js +131 -0
  62. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  63. package/ROUTER_COMPARISON.md +0 -173
  64. package/TIER_ROUTING_PLAN.md +0 -771
@@ -0,0 +1,305 @@
1
+ /**
2
+ * Cost Optimizer Module
3
+ * Tracks and optimizes LLM costs across providers
4
+ * Uses ModelRegistry for dynamic pricing data
5
+ */
6
+
7
+ const logger = require('../logger');
8
+ const config = require('../config');
9
+ const { getModelRegistry, getModelRegistrySync } = require('./model-registry');
10
+ const { getModelTierSelector, TIER_DEFINITIONS } = require('./model-tiers');
11
+
12
+ // Session cost tracking (in-memory)
13
+ const sessionCosts = new Map(); // sessionId -> { total, requests, byModel, byProvider }
14
+
15
+ // Global stats
16
+ const globalStats = {
17
+ totalCost: 0,
18
+ totalSavings: 0,
19
+ requestCount: 0,
20
+ byProvider: {},
21
+ byTier: {},
22
+ };
23
+
24
+ class CostOptimizer {
25
+ constructor() {
26
+ this.registry = null;
27
+ this.tierSelector = null;
28
+ }
29
+
30
+ /**
31
+ * Initialize with registry (async)
32
+ */
33
+ async initialize() {
34
+ this.registry = await getModelRegistry();
35
+ this.tierSelector = getModelTierSelector();
36
+ }
37
+
38
+ /**
39
+ * Get registry (sync fallback)
40
+ */
41
+ _getRegistry() {
42
+ if (!this.registry) {
43
+ this.registry = getModelRegistrySync();
44
+ }
45
+ return this.registry;
46
+ }
47
+
48
+ /**
49
+ * Get tier selector
50
+ */
51
+ _getTierSelector() {
52
+ if (!this.tierSelector) {
53
+ this.tierSelector = getModelTierSelector();
54
+ }
55
+ return this.tierSelector;
56
+ }
57
+
58
+ /**
59
+ * Estimate cost for a request before sending
60
+ * @param {string} model - Model name
61
+ * @param {number} inputTokens - Estimated input tokens
62
+ * @param {number} outputTokens - Estimated output tokens (optional)
63
+ * @returns {Object} Cost estimate
64
+ */
65
+ estimateCost(model, inputTokens, outputTokens = null) {
66
+ const registry = this._getRegistry();
67
+ const costs = registry.getCost(model);
68
+
69
+ const inputCost = (inputTokens / 1_000_000) * costs.input;
70
+ const estimatedOutputTokens = outputTokens || Math.min(inputTokens * 0.5, 4096);
71
+ const outputCost = (estimatedOutputTokens / 1_000_000) * costs.output;
72
+
73
+ return {
74
+ inputCost: Math.round(inputCost * 1_000_000) / 1_000_000,
75
+ outputCost: Math.round(outputCost * 1_000_000) / 1_000_000,
76
+ totalEstimate: Math.round((inputCost + outputCost) * 1_000_000) / 1_000_000,
77
+ model,
78
+ inputTokens,
79
+ outputTokens: estimatedOutputTokens,
80
+ pricePerMillion: {
81
+ input: costs.input,
82
+ output: costs.output,
83
+ },
84
+ source: costs.source,
85
+ };
86
+ }
87
+
88
+ /**
89
+ * Find cheapest model capable of handling a complexity tier
90
+ * @param {string} requiredTier - Minimum tier required
91
+ * @param {string[]} availableProviders - Providers to consider
92
+ * @returns {Object|null} Cheapest model info
93
+ */
94
+ findCheapestForTier(requiredTier, availableProviders) {
95
+ const registry = this._getRegistry();
96
+ const tierSelector = this._getTierSelector();
97
+
98
+ const tierOrder = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
99
+ const minTierIndex = tierOrder.indexOf(requiredTier);
100
+
101
+ if (minTierIndex === -1) {
102
+ logger.warn({ tier: requiredTier }, '[CostOptimizer] Unknown tier');
103
+ return null;
104
+ }
105
+
106
+ const candidates = [];
107
+
108
+ // Collect models from all capable tiers (>= required tier)
109
+ for (let i = minTierIndex; i < tierOrder.length; i++) {
110
+ const tier = tierOrder[i];
111
+
112
+ for (const provider of availableProviders) {
113
+ const models = tierSelector.getPreferredModels(tier, provider);
114
+
115
+ for (const model of models) {
116
+ const cost = registry.getCost(model);
117
+ const totalCost = cost.input + cost.output; // Simple cost metric
118
+
119
+ candidates.push({
120
+ model,
121
+ provider,
122
+ tier,
123
+ inputCost: cost.input,
124
+ outputCost: cost.output,
125
+ totalCost,
126
+ context: cost.context,
127
+ source: cost.source,
128
+ });
129
+ }
130
+ }
131
+ }
132
+
133
+ if (candidates.length === 0) {
134
+ return null;
135
+ }
136
+
137
+ // Sort by total cost (input + output per 1M tokens)
138
+ candidates.sort((a, b) => a.totalCost - b.totalCost);
139
+
140
+ const cheapest = candidates[0];
141
+
142
+ logger.debug({
143
+ requiredTier,
144
+ selectedModel: cheapest.model,
145
+ selectedProvider: cheapest.provider,
146
+ cost: cheapest.totalCost,
147
+ candidateCount: candidates.length,
148
+ }, '[CostOptimizer] Found cheapest model');
149
+
150
+ return cheapest;
151
+ }
152
+
153
+ /**
154
+ * Record actual cost after response
155
+ * @param {string} sessionId - Session identifier
156
+ * @param {string} provider - Provider used
157
+ * @param {string} model - Model used
158
+ * @param {number} inputTokens - Actual input tokens
159
+ * @param {number} outputTokens - Actual output tokens
160
+ * @param {string} tier - Complexity tier
161
+ * @returns {number} Actual cost
162
+ */
163
+ recordCost(sessionId, provider, model, inputTokens, outputTokens, tier = 'MEDIUM') {
164
+ const registry = this._getRegistry();
165
+ const costs = registry.getCost(model);
166
+
167
+ const inputCost = (inputTokens / 1_000_000) * costs.input;
168
+ const outputCost = (outputTokens / 1_000_000) * costs.output;
169
+ const actualCost = inputCost + outputCost;
170
+
171
+ // Update session costs
172
+ if (sessionId) {
173
+ if (!sessionCosts.has(sessionId)) {
174
+ sessionCosts.set(sessionId, {
175
+ total: 0,
176
+ requests: 0,
177
+ byModel: {},
178
+ byProvider: {},
179
+ byTier: {},
180
+ });
181
+ }
182
+
183
+ const session = sessionCosts.get(sessionId);
184
+ session.total += actualCost;
185
+ session.requests++;
186
+ session.byModel[model] = (session.byModel[model] || 0) + actualCost;
187
+ session.byProvider[provider] = (session.byProvider[provider] || 0) + actualCost;
188
+ session.byTier[tier] = (session.byTier[tier] || 0) + actualCost;
189
+ }
190
+
191
+ // Update global stats
192
+ globalStats.totalCost += actualCost;
193
+ globalStats.requestCount++;
194
+ globalStats.byProvider[provider] = (globalStats.byProvider[provider] || 0) + actualCost;
195
+ globalStats.byTier[tier] = (globalStats.byTier[tier] || 0) + actualCost;
196
+
197
+ logger.debug({
198
+ sessionId,
199
+ provider,
200
+ model,
201
+ inputTokens,
202
+ outputTokens,
203
+ cost: actualCost.toFixed(6),
204
+ tier,
205
+ }, '[CostOptimizer] Recorded cost');
206
+
207
+ return actualCost;
208
+ }
209
+
210
+ /**
211
+ * Calculate potential savings from routing optimization
212
+ */
213
+ calculateSavings(originalModel, optimizedModel, tokens) {
214
+ const registry = this._getRegistry();
215
+
216
+ const originalCost = registry.getCost(originalModel);
217
+ const optimizedCost = registry.getCost(optimizedModel);
218
+
219
+ const originalTotal = (tokens / 1_000_000) * (originalCost.input + originalCost.output);
220
+ const optimizedTotal = (tokens / 1_000_000) * (optimizedCost.input + optimizedCost.output);
221
+
222
+ const savings = originalTotal - optimizedTotal;
223
+
224
+ if (savings > 0) {
225
+ globalStats.totalSavings += savings;
226
+ }
227
+
228
+ return {
229
+ originalCost: originalTotal,
230
+ optimizedCost: optimizedTotal,
231
+ savings: Math.max(0, savings),
232
+ percentSaved: originalTotal > 0 ? (savings / originalTotal) * 100 : 0,
233
+ };
234
+ }
235
+
236
+
237
+ /**
238
+ * Get session cost summary
239
+ */
240
+ getSessionCost(sessionId) {
241
+ return sessionCosts.get(sessionId) || {
242
+ total: 0,
243
+ requests: 0,
244
+ byModel: {},
245
+ byProvider: {},
246
+ byTier: {},
247
+ };
248
+ }
249
+
250
+ /**
251
+ * Get global stats
252
+ */
253
+ getStats() {
254
+ return {
255
+ ...globalStats,
256
+ sessionCount: sessionCosts.size,
257
+ avgCostPerRequest: globalStats.requestCount > 0
258
+ ? (globalStats.totalCost / globalStats.requestCount).toFixed(6)
259
+ : '0',
260
+ totalCostFormatted: `$${globalStats.totalCost.toFixed(4)}`,
261
+ totalSavingsFormatted: `$${globalStats.totalSavings.toFixed(4)}`,
262
+ };
263
+ }
264
+
265
+ /**
266
+ * Clear session data (for cleanup)
267
+ */
268
+ clearSession(sessionId) {
269
+ sessionCosts.delete(sessionId);
270
+ }
271
+
272
+ /**
273
+ * Reset all stats (for testing)
274
+ */
275
+ resetStats() {
276
+ sessionCosts.clear();
277
+ globalStats.totalCost = 0;
278
+ globalStats.totalSavings = 0;
279
+ globalStats.requestCount = 0;
280
+ globalStats.byProvider = {};
281
+ globalStats.byTier = {};
282
+ }
283
+ }
284
+
285
+ // Singleton instance
286
+ let instance = null;
287
+
288
+ function getCostOptimizer() {
289
+ if (!instance) {
290
+ instance = new CostOptimizer();
291
+ }
292
+ return instance;
293
+ }
294
+
295
+ async function getCostOptimizerAsync() {
296
+ const optimizer = getCostOptimizer();
297
+ await optimizer.initialize();
298
+ return optimizer;
299
+ }
300
+
301
+ module.exports = {
302
+ CostOptimizer,
303
+ getCostOptimizer,
304
+ getCostOptimizerAsync,
305
+ };