lynkr 7.2.5 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +4 -2
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +76 -3
- package/src/config/index.js +113 -35
- package/src/context/toon.js +173 -0
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +686 -211
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost Optimizer Module
|
|
3
|
+
* Tracks and optimizes LLM costs across providers
|
|
4
|
+
* Uses ModelRegistry for dynamic pricing data
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const logger = require('../logger');
|
|
8
|
+
const config = require('../config');
|
|
9
|
+
const { getModelRegistry, getModelRegistrySync } = require('./model-registry');
|
|
10
|
+
const { getModelTierSelector, TIER_DEFINITIONS } = require('./model-tiers');
|
|
11
|
+
|
|
12
|
+
// Session cost tracking (in-memory)
|
|
13
|
+
const sessionCosts = new Map(); // sessionId -> { total, requests, byModel, byProvider }
|
|
14
|
+
|
|
15
|
+
// Global stats
|
|
16
|
+
const globalStats = {
|
|
17
|
+
totalCost: 0,
|
|
18
|
+
totalSavings: 0,
|
|
19
|
+
requestCount: 0,
|
|
20
|
+
byProvider: {},
|
|
21
|
+
byTier: {},
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
class CostOptimizer {
|
|
25
|
+
constructor() {
|
|
26
|
+
this.registry = null;
|
|
27
|
+
this.tierSelector = null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Initialize with registry (async)
|
|
32
|
+
*/
|
|
33
|
+
async initialize() {
|
|
34
|
+
this.registry = await getModelRegistry();
|
|
35
|
+
this.tierSelector = getModelTierSelector();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Get registry (sync fallback)
|
|
40
|
+
*/
|
|
41
|
+
_getRegistry() {
|
|
42
|
+
if (!this.registry) {
|
|
43
|
+
this.registry = getModelRegistrySync();
|
|
44
|
+
}
|
|
45
|
+
return this.registry;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Get tier selector
|
|
50
|
+
*/
|
|
51
|
+
_getTierSelector() {
|
|
52
|
+
if (!this.tierSelector) {
|
|
53
|
+
this.tierSelector = getModelTierSelector();
|
|
54
|
+
}
|
|
55
|
+
return this.tierSelector;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Estimate cost for a request before sending
|
|
60
|
+
* @param {string} model - Model name
|
|
61
|
+
* @param {number} inputTokens - Estimated input tokens
|
|
62
|
+
* @param {number} outputTokens - Estimated output tokens (optional)
|
|
63
|
+
* @returns {Object} Cost estimate
|
|
64
|
+
*/
|
|
65
|
+
estimateCost(model, inputTokens, outputTokens = null) {
|
|
66
|
+
const registry = this._getRegistry();
|
|
67
|
+
const costs = registry.getCost(model);
|
|
68
|
+
|
|
69
|
+
const inputCost = (inputTokens / 1_000_000) * costs.input;
|
|
70
|
+
const estimatedOutputTokens = outputTokens || Math.min(inputTokens * 0.5, 4096);
|
|
71
|
+
const outputCost = (estimatedOutputTokens / 1_000_000) * costs.output;
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
inputCost: Math.round(inputCost * 1_000_000) / 1_000_000,
|
|
75
|
+
outputCost: Math.round(outputCost * 1_000_000) / 1_000_000,
|
|
76
|
+
totalEstimate: Math.round((inputCost + outputCost) * 1_000_000) / 1_000_000,
|
|
77
|
+
model,
|
|
78
|
+
inputTokens,
|
|
79
|
+
outputTokens: estimatedOutputTokens,
|
|
80
|
+
pricePerMillion: {
|
|
81
|
+
input: costs.input,
|
|
82
|
+
output: costs.output,
|
|
83
|
+
},
|
|
84
|
+
source: costs.source,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Find cheapest model capable of handling a complexity tier
|
|
90
|
+
* @param {string} requiredTier - Minimum tier required
|
|
91
|
+
* @param {string[]} availableProviders - Providers to consider
|
|
92
|
+
* @returns {Object|null} Cheapest model info
|
|
93
|
+
*/
|
|
94
|
+
findCheapestForTier(requiredTier, availableProviders) {
|
|
95
|
+
const registry = this._getRegistry();
|
|
96
|
+
const tierSelector = this._getTierSelector();
|
|
97
|
+
|
|
98
|
+
const tierOrder = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
|
|
99
|
+
const minTierIndex = tierOrder.indexOf(requiredTier);
|
|
100
|
+
|
|
101
|
+
if (minTierIndex === -1) {
|
|
102
|
+
logger.warn({ tier: requiredTier }, '[CostOptimizer] Unknown tier');
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const candidates = [];
|
|
107
|
+
|
|
108
|
+
// Collect models from all capable tiers (>= required tier)
|
|
109
|
+
for (let i = minTierIndex; i < tierOrder.length; i++) {
|
|
110
|
+
const tier = tierOrder[i];
|
|
111
|
+
|
|
112
|
+
for (const provider of availableProviders) {
|
|
113
|
+
const models = tierSelector.getPreferredModels(tier, provider);
|
|
114
|
+
|
|
115
|
+
for (const model of models) {
|
|
116
|
+
const cost = registry.getCost(model);
|
|
117
|
+
const totalCost = cost.input + cost.output; // Simple cost metric
|
|
118
|
+
|
|
119
|
+
candidates.push({
|
|
120
|
+
model,
|
|
121
|
+
provider,
|
|
122
|
+
tier,
|
|
123
|
+
inputCost: cost.input,
|
|
124
|
+
outputCost: cost.output,
|
|
125
|
+
totalCost,
|
|
126
|
+
context: cost.context,
|
|
127
|
+
source: cost.source,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (candidates.length === 0) {
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Sort by total cost (input + output per 1M tokens)
|
|
138
|
+
candidates.sort((a, b) => a.totalCost - b.totalCost);
|
|
139
|
+
|
|
140
|
+
const cheapest = candidates[0];
|
|
141
|
+
|
|
142
|
+
logger.debug({
|
|
143
|
+
requiredTier,
|
|
144
|
+
selectedModel: cheapest.model,
|
|
145
|
+
selectedProvider: cheapest.provider,
|
|
146
|
+
cost: cheapest.totalCost,
|
|
147
|
+
candidateCount: candidates.length,
|
|
148
|
+
}, '[CostOptimizer] Found cheapest model');
|
|
149
|
+
|
|
150
|
+
return cheapest;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Record actual cost after response
|
|
155
|
+
* @param {string} sessionId - Session identifier
|
|
156
|
+
* @param {string} provider - Provider used
|
|
157
|
+
* @param {string} model - Model used
|
|
158
|
+
* @param {number} inputTokens - Actual input tokens
|
|
159
|
+
* @param {number} outputTokens - Actual output tokens
|
|
160
|
+
* @param {string} tier - Complexity tier
|
|
161
|
+
* @returns {number} Actual cost
|
|
162
|
+
*/
|
|
163
|
+
recordCost(sessionId, provider, model, inputTokens, outputTokens, tier = 'MEDIUM') {
|
|
164
|
+
const registry = this._getRegistry();
|
|
165
|
+
const costs = registry.getCost(model);
|
|
166
|
+
|
|
167
|
+
const inputCost = (inputTokens / 1_000_000) * costs.input;
|
|
168
|
+
const outputCost = (outputTokens / 1_000_000) * costs.output;
|
|
169
|
+
const actualCost = inputCost + outputCost;
|
|
170
|
+
|
|
171
|
+
// Update session costs
|
|
172
|
+
if (sessionId) {
|
|
173
|
+
if (!sessionCosts.has(sessionId)) {
|
|
174
|
+
sessionCosts.set(sessionId, {
|
|
175
|
+
total: 0,
|
|
176
|
+
requests: 0,
|
|
177
|
+
byModel: {},
|
|
178
|
+
byProvider: {},
|
|
179
|
+
byTier: {},
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const session = sessionCosts.get(sessionId);
|
|
184
|
+
session.total += actualCost;
|
|
185
|
+
session.requests++;
|
|
186
|
+
session.byModel[model] = (session.byModel[model] || 0) + actualCost;
|
|
187
|
+
session.byProvider[provider] = (session.byProvider[provider] || 0) + actualCost;
|
|
188
|
+
session.byTier[tier] = (session.byTier[tier] || 0) + actualCost;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Update global stats
|
|
192
|
+
globalStats.totalCost += actualCost;
|
|
193
|
+
globalStats.requestCount++;
|
|
194
|
+
globalStats.byProvider[provider] = (globalStats.byProvider[provider] || 0) + actualCost;
|
|
195
|
+
globalStats.byTier[tier] = (globalStats.byTier[tier] || 0) + actualCost;
|
|
196
|
+
|
|
197
|
+
logger.debug({
|
|
198
|
+
sessionId,
|
|
199
|
+
provider,
|
|
200
|
+
model,
|
|
201
|
+
inputTokens,
|
|
202
|
+
outputTokens,
|
|
203
|
+
cost: actualCost.toFixed(6),
|
|
204
|
+
tier,
|
|
205
|
+
}, '[CostOptimizer] Recorded cost');
|
|
206
|
+
|
|
207
|
+
return actualCost;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Calculate potential savings from routing optimization
|
|
212
|
+
*/
|
|
213
|
+
calculateSavings(originalModel, optimizedModel, tokens) {
|
|
214
|
+
const registry = this._getRegistry();
|
|
215
|
+
|
|
216
|
+
const originalCost = registry.getCost(originalModel);
|
|
217
|
+
const optimizedCost = registry.getCost(optimizedModel);
|
|
218
|
+
|
|
219
|
+
const originalTotal = (tokens / 1_000_000) * (originalCost.input + originalCost.output);
|
|
220
|
+
const optimizedTotal = (tokens / 1_000_000) * (optimizedCost.input + optimizedCost.output);
|
|
221
|
+
|
|
222
|
+
const savings = originalTotal - optimizedTotal;
|
|
223
|
+
|
|
224
|
+
if (savings > 0) {
|
|
225
|
+
globalStats.totalSavings += savings;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
originalCost: originalTotal,
|
|
230
|
+
optimizedCost: optimizedTotal,
|
|
231
|
+
savings: Math.max(0, savings),
|
|
232
|
+
percentSaved: originalTotal > 0 ? (savings / originalTotal) * 100 : 0,
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Get session cost summary
|
|
239
|
+
*/
|
|
240
|
+
getSessionCost(sessionId) {
|
|
241
|
+
return sessionCosts.get(sessionId) || {
|
|
242
|
+
total: 0,
|
|
243
|
+
requests: 0,
|
|
244
|
+
byModel: {},
|
|
245
|
+
byProvider: {},
|
|
246
|
+
byTier: {},
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Get global stats
|
|
252
|
+
*/
|
|
253
|
+
getStats() {
|
|
254
|
+
return {
|
|
255
|
+
...globalStats,
|
|
256
|
+
sessionCount: sessionCosts.size,
|
|
257
|
+
avgCostPerRequest: globalStats.requestCount > 0
|
|
258
|
+
? (globalStats.totalCost / globalStats.requestCount).toFixed(6)
|
|
259
|
+
: '0',
|
|
260
|
+
totalCostFormatted: `$${globalStats.totalCost.toFixed(4)}`,
|
|
261
|
+
totalSavingsFormatted: `$${globalStats.totalSavings.toFixed(4)}`,
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Clear session data (for cleanup)
|
|
267
|
+
*/
|
|
268
|
+
clearSession(sessionId) {
|
|
269
|
+
sessionCosts.delete(sessionId);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Reset all stats (for testing)
|
|
274
|
+
*/
|
|
275
|
+
resetStats() {
|
|
276
|
+
sessionCosts.clear();
|
|
277
|
+
globalStats.totalCost = 0;
|
|
278
|
+
globalStats.totalSavings = 0;
|
|
279
|
+
globalStats.requestCount = 0;
|
|
280
|
+
globalStats.byProvider = {};
|
|
281
|
+
globalStats.byTier = {};
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Singleton instance
|
|
286
|
+
let instance = null;
|
|
287
|
+
|
|
288
|
+
function getCostOptimizer() {
|
|
289
|
+
if (!instance) {
|
|
290
|
+
instance = new CostOptimizer();
|
|
291
|
+
}
|
|
292
|
+
return instance;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
async function getCostOptimizerAsync() {
|
|
296
|
+
const optimizer = getCostOptimizer();
|
|
297
|
+
await optimizer.initialize();
|
|
298
|
+
return optimizer;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
module.exports = {
|
|
302
|
+
CostOptimizer,
|
|
303
|
+
getCostOptimizer,
|
|
304
|
+
getCostOptimizerAsync,
|
|
305
|
+
};
|