@meller/tokentalos 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api/api/v1/usage.js +49 -38
- package/lib/engine/ai_analyzer.js +17 -8
- package/lib/engine/analyzer.js +12 -10
- package/lib/engine/db.js +11 -2
- package/lib/engine/index.js +27 -26
- package/lib/engine/pricing.js +38 -14
- package/package.json +6 -4
package/api/api/v1/usage.js
CHANGED
|
@@ -30,12 +30,12 @@ router.post('/ingest', authMiddleware, async (req, res) => {
|
|
|
30
30
|
const totalTokens = (data.input_tokens || 0) + (data.output_tokens || 0);
|
|
31
31
|
const calculator = getCostCalculator();
|
|
32
32
|
const [inputCost, outputCost] = calculator.calculateCost(
|
|
33
|
-
provider,
|
|
34
|
-
model,
|
|
35
|
-
data.input_tokens || 0,
|
|
33
|
+
provider,
|
|
34
|
+
model,
|
|
35
|
+
data.input_tokens || 0,
|
|
36
36
|
data.output_tokens || 0
|
|
37
37
|
);
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
const totalCost = inputCost + outputCost;
|
|
40
40
|
const limitExceeded = totalTokens > (config.maxTokens || 32000);
|
|
41
41
|
|
|
@@ -46,7 +46,7 @@ router.post('/ingest', authMiddleware, async (req, res) => {
|
|
|
46
46
|
input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp
|
|
47
47
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
48
48
|
`, [
|
|
49
|
-
usageId, orgId, projectId, 'ingested', provider, model, data.full_prompt || null, data.response_content || null,
|
|
49
|
+
usageId, orgId, projectId, 'ingested', provider, model, data.full_prompt || null, data.response_content || null,
|
|
50
50
|
data.input_tokens || 0, data.output_tokens || 0,
|
|
51
51
|
totalTokens, inputCost, outputCost, totalCost, data.endpoint, data.latency_ms,
|
|
52
52
|
limitExceeded ? 1 : 0, data.timestamp || new Date().toISOString()
|
|
@@ -58,12 +58,12 @@ router.post('/ingest', authMiddleware, async (req, res) => {
|
|
|
58
58
|
INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
|
|
59
59
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
60
60
|
`, [
|
|
61
|
-
usageId,
|
|
62
|
-
v.name,
|
|
63
|
-
v.content || '',
|
|
61
|
+
usageId,
|
|
62
|
+
v.name,
|
|
63
|
+
v.content || '',
|
|
64
64
|
v.original_content || v.content || '',
|
|
65
|
-
v.token_count || 0,
|
|
66
|
-
v.char_count || 0,
|
|
65
|
+
v.token_count || 0,
|
|
66
|
+
v.char_count || 0,
|
|
67
67
|
v.position || 0
|
|
68
68
|
]);
|
|
69
69
|
|
|
@@ -87,10 +87,10 @@ router.post('/ingest', authMiddleware, async (req, res) => {
|
|
|
87
87
|
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
88
88
|
VALUES (?, ?, ?, ?, ?)
|
|
89
89
|
`, [
|
|
90
|
-
usageId,
|
|
91
|
-
action.target,
|
|
92
|
-
action.type,
|
|
93
|
-
action.method || null,
|
|
90
|
+
usageId,
|
|
91
|
+
action.target,
|
|
92
|
+
action.type,
|
|
93
|
+
action.method || null,
|
|
94
94
|
JSON.stringify(action)
|
|
95
95
|
]);
|
|
96
96
|
}
|
|
@@ -113,12 +113,12 @@ router.get('/recent', authMiddleware, async (req, res) => {
|
|
|
113
113
|
try {
|
|
114
114
|
let sql = 'SELECT * FROM usage_data WHERE org_id = ?';
|
|
115
115
|
let params = [orgId];
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
if (projectId) {
|
|
118
118
|
sql += ' AND project_id = ?';
|
|
119
119
|
params.push(projectId);
|
|
120
120
|
}
|
|
121
|
-
|
|
121
|
+
|
|
122
122
|
sql += ' ORDER BY timestamp DESC LIMIT ?';
|
|
123
123
|
params.push(parseInt(limit));
|
|
124
124
|
|
|
@@ -130,7 +130,7 @@ router.get('/recent', authMiddleware, async (req, res) => {
|
|
|
130
130
|
FROM prompt_variables
|
|
131
131
|
WHERE usage_id = ?
|
|
132
132
|
`, [record.id]);
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
record.explain_plan = await db.get(`
|
|
135
135
|
SELECT * FROM explain_plans WHERE usage_id = ?
|
|
136
136
|
`, [record.id]);
|
|
@@ -139,13 +139,23 @@ router.get('/recent', authMiddleware, async (req, res) => {
|
|
|
139
139
|
if (record.explain_plan.variable_analysis) record.explain_plan.variable_analysis = JSON.parse(record.explain_plan.variable_analysis);
|
|
140
140
|
if (record.explain_plan.detected_issues) record.explain_plan.detected_issues = JSON.parse(record.explain_plan.detected_issues);
|
|
141
141
|
if (record.explain_plan.optimization_suggestions) record.explain_plan.optimization_suggestions = JSON.parse(record.explain_plan.optimization_suggestions);
|
|
142
|
-
|
|
142
|
+
if (record.explain_plan.mce_alternatives && typeof record.explain_plan.mce_alternatives === 'string') {
|
|
143
|
+
record.explain_plan.mce_alternatives = JSON.parse(record.explain_plan.mce_alternatives);
|
|
144
|
+
}
|
|
145
|
+
|
|
143
146
|
// On-the-fly MCE calculation if missing from DB (for existing records)
|
|
144
|
-
if (!record.explain_plan.mce_best_alternative_model) {
|
|
147
|
+
if (!record.explain_plan.mce_best_alternative_model || !record.explain_plan.mce_alternatives) {
|
|
145
148
|
const calculator = getCostCalculator();
|
|
146
149
|
const bestAlt = calculator.getBestAlternative(record.provider, record.model, record.input_tokens, record.output_tokens);
|
|
150
|
+
const allAlts = calculator.getAllAlternatives(record.provider, record.model, record.input_tokens, record.output_tokens);
|
|
151
|
+
|
|
152
|
+
record.explain_plan.mce_alternatives = allAlts;
|
|
153
|
+
|
|
147
154
|
if (bestAlt) {
|
|
148
|
-
|
|
155
|
+
// Use calculated cost from tokens (record.total_cost may be 0 for passively ingested records)
|
|
156
|
+
const [calcInput, calcOutput] = calculator.calculateCost(record.provider, record.model, record.input_tokens, record.output_tokens);
|
|
157
|
+
const currentCost = (calcInput + calcOutput) > 0 ? (calcInput + calcOutput) : (record.total_cost || 0);
|
|
158
|
+
const savingsPct = currentCost > 0 ? ((currentCost - bestAlt.cost) / currentCost) * 100 : 0;
|
|
149
159
|
if (savingsPct > 10) {
|
|
150
160
|
record.explain_plan.mce_best_alternative_model = bestAlt.model;
|
|
151
161
|
record.explain_plan.mce_best_alternative_provider = bestAlt.provider;
|
|
@@ -169,7 +179,7 @@ router.post('/execute', authMiddleware, async (req, res) => {
|
|
|
169
179
|
try {
|
|
170
180
|
const engine = new TokenTalosEngine(config);
|
|
171
181
|
await engine.init();
|
|
172
|
-
|
|
182
|
+
|
|
173
183
|
const result = await engine.execute({
|
|
174
184
|
...req.body,
|
|
175
185
|
orgId: req.orgId,
|
|
@@ -188,14 +198,14 @@ router.post('/execute', authMiddleware, async (req, res) => {
|
|
|
188
198
|
router.post('/prompt/construct', authMiddleware, async (req, res) => {
|
|
189
199
|
const { provider, model, parts, endpoint, projectId } = req.body;
|
|
190
200
|
const orgId = req.orgId;
|
|
191
|
-
|
|
201
|
+
|
|
192
202
|
const { processedParts, metadata } = await processPromptParts(parts, config);
|
|
193
203
|
|
|
194
204
|
const finalProvider = provider || config.llmProvider || 'gemini';
|
|
195
205
|
const finalModel = model || config.defaultModel || 'gemini-3-flash-preview';
|
|
196
206
|
|
|
197
207
|
const prompt = new TokenTalosPrompt(finalProvider, finalModel);
|
|
198
|
-
|
|
208
|
+
|
|
199
209
|
for (const key in processedParts) {
|
|
200
210
|
if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
|
|
201
211
|
else if (key === 'context') prompt.addContext(processedParts[key], parts[key]);
|
|
@@ -206,7 +216,7 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
|
|
|
206
216
|
|
|
207
217
|
const messages = prompt.toMessages();
|
|
208
218
|
const trackingData = prompt.getTrackingData();
|
|
209
|
-
|
|
219
|
+
|
|
210
220
|
const maxTokens = config.maxTokens || 32000;
|
|
211
221
|
const thresholdAction = config.thresholdAction || 'warning';
|
|
212
222
|
|
|
@@ -248,10 +258,10 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
|
|
|
248
258
|
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
249
259
|
VALUES (?, ?, ?, ?, ?)
|
|
250
260
|
`, [
|
|
251
|
-
trackingData.id,
|
|
252
|
-
action.target,
|
|
253
|
-
action.type,
|
|
254
|
-
action.method || null,
|
|
261
|
+
trackingData.id,
|
|
262
|
+
action.target,
|
|
263
|
+
action.type,
|
|
264
|
+
action.method || null,
|
|
255
265
|
JSON.stringify(action)
|
|
256
266
|
]);
|
|
257
267
|
|
|
@@ -267,8 +277,8 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
|
|
|
267
277
|
}
|
|
268
278
|
|
|
269
279
|
// 3. Heuristic Analysis
|
|
270
|
-
const analysis = runHeuristicAnalysis({
|
|
271
|
-
total_tokens: trackingData.total_tokens,
|
|
280
|
+
const analysis = runHeuristicAnalysis({
|
|
281
|
+
total_tokens: trackingData.total_tokens,
|
|
272
282
|
total_cost: inputCost,
|
|
273
283
|
provider: finalProvider,
|
|
274
284
|
model: finalModel
|
|
@@ -280,20 +290,21 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
|
|
|
280
290
|
INSERT INTO explain_plans (
|
|
281
291
|
id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
|
|
282
292
|
estimated_savings_pct, estimated_savings_usd,
|
|
283
|
-
mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
|
|
284
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
293
|
+
mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct, mce_alternatives
|
|
294
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
285
295
|
`, [
|
|
286
|
-
planId,
|
|
287
|
-
trackingData.id,
|
|
288
|
-
JSON.stringify(analysis.variable_analysis),
|
|
289
|
-
JSON.stringify(analysis.detected_issues),
|
|
296
|
+
planId,
|
|
297
|
+
trackingData.id,
|
|
298
|
+
JSON.stringify(analysis.variable_analysis),
|
|
299
|
+
JSON.stringify(analysis.detected_issues),
|
|
290
300
|
JSON.stringify(analysis.optimization_suggestions),
|
|
291
|
-
analysis.estimated_savings_pct,
|
|
301
|
+
analysis.estimated_savings_pct,
|
|
292
302
|
analysis.estimated_savings_usd,
|
|
293
303
|
analysis.mce_best_alternative_model || null,
|
|
294
304
|
analysis.mce_best_alternative_provider || null,
|
|
295
305
|
analysis.mce_best_alternative_cost || 0,
|
|
296
|
-
analysis.mce_savings_pct || 0
|
|
306
|
+
analysis.mce_savings_pct || 0,
|
|
307
|
+
analysis.mce_alternatives ? JSON.stringify(analysis.mce_alternatives) : null
|
|
297
308
|
]);
|
|
298
309
|
}
|
|
299
310
|
|
|
@@ -9,7 +9,7 @@ export async function runAIAnalysis(config, usageRecord, variables) {
|
|
|
9
9
|
const model = genAI.getGenerativeModel({ model: 'gemini-1.5-flash' });
|
|
10
10
|
|
|
11
11
|
const variableInfo = variables.map(v => `${v.name} (${v.token_count} tokens): "${v.content.substring(0, 100)}..."`).join('\n');
|
|
12
|
-
|
|
12
|
+
|
|
13
13
|
const prompt = `
|
|
14
14
|
Analyze this LLM prompt structure and suggest optimizations to reduce costs while maintaining performance.
|
|
15
15
|
|
|
@@ -31,29 +31,38 @@ export async function runAIAnalysis(config, usageRecord, variables) {
|
|
|
31
31
|
const result = await model.generateContent(prompt);
|
|
32
32
|
const response = await result.response;
|
|
33
33
|
const text = response.text();
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
// Attempt to parse JSON from response
|
|
36
36
|
try {
|
|
37
37
|
const jsonStart = text.indexOf('{');
|
|
38
38
|
const jsonEnd = text.lastIndexOf('}') + 1;
|
|
39
39
|
const analysis = JSON.parse(text.substring(jsonStart, jsonEnd));
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
const calculator = getCostCalculator();
|
|
42
42
|
const mceResult = calculator.getBestAlternative(
|
|
43
|
-
usageRecord.provider,
|
|
44
|
-
usageRecord.model,
|
|
45
|
-
usageRecord.input_tokens,
|
|
43
|
+
usageRecord.provider,
|
|
44
|
+
usageRecord.model,
|
|
45
|
+
usageRecord.input_tokens,
|
|
46
46
|
usageRecord.output_tokens,
|
|
47
47
|
config.comparisonProviders
|
|
48
48
|
);
|
|
49
|
-
|
|
49
|
+
const allAlts = calculator.getAllAlternatives(
|
|
50
|
+
usageRecord.provider,
|
|
51
|
+
usageRecord.model,
|
|
52
|
+
usageRecord.input_tokens,
|
|
53
|
+
usageRecord.output_tokens,
|
|
54
|
+
config.comparisonProviders
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
analysis.mce_alternatives = allAlts;
|
|
58
|
+
|
|
50
59
|
if (mceResult) {
|
|
51
60
|
analysis.mce_best_alternative_model = mceResult.model;
|
|
52
61
|
analysis.mce_best_alternative_provider = mceResult.provider;
|
|
53
62
|
analysis.mce_best_alternative_cost = mceResult.cost;
|
|
54
63
|
analysis.mce_savings_pct = ((usageRecord.total_cost - mceResult.cost) / usageRecord.total_cost) * 100;
|
|
55
64
|
}
|
|
56
|
-
|
|
65
|
+
|
|
57
66
|
return analysis;
|
|
58
67
|
} catch (err) {
|
|
59
68
|
console.warn('AI analysis JSON parsing failed:', err);
|
package/lib/engine/analyzer.js
CHANGED
|
@@ -19,7 +19,8 @@ export function runHeuristicAnalysis(usageRecord, variables) {
|
|
|
19
19
|
const outputTokens = usageRecord.output_tokens || Math.floor(totalTokens * 0.2);
|
|
20
20
|
|
|
21
21
|
const bestAlt = calculator.getBestAlternative(provider, model, inputTokens, outputTokens);
|
|
22
|
-
|
|
22
|
+
const allAlts = calculator.getAllAlternatives(provider, model, inputTokens, outputTokens);
|
|
23
|
+
|
|
23
24
|
// Check if current model is deprecated
|
|
24
25
|
const currentPricing = PRICING_DATA[provider.toLowerCase()]?.[model.toLowerCase()];
|
|
25
26
|
if (currentPricing?.deprecated) {
|
|
@@ -27,18 +28,19 @@ export function runHeuristicAnalysis(usageRecord, variables) {
|
|
|
27
28
|
suggestions.push(`Migrate to a current stable model (e.g., Gemini 2.0 Flash) to ensure service continuity.`);
|
|
28
29
|
}
|
|
29
30
|
|
|
30
|
-
let mceResult = {
|
|
31
|
+
let mceResult = {
|
|
32
|
+
mce_alternatives: allAlts
|
|
33
|
+
};
|
|
34
|
+
|
|
31
35
|
if (bestAlt) {
|
|
32
36
|
const currentCost = usageRecord.total_cost || 0;
|
|
33
37
|
const savingsPct = currentCost > 0 ? ((currentCost - bestAlt.cost) / currentCost) * 100 : 0;
|
|
34
|
-
|
|
38
|
+
|
|
35
39
|
if (savingsPct > 10) { // Only suggest if savings are > 10%
|
|
36
|
-
mceResult =
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
mce_savings_pct: savingsPct
|
|
41
|
-
};
|
|
40
|
+
mceResult.mce_best_alternative_model = bestAlt.model;
|
|
41
|
+
mceResult.mce_best_alternative_provider = bestAlt.provider;
|
|
42
|
+
mceResult.mce_best_alternative_cost = bestAlt.cost;
|
|
43
|
+
mceResult.mce_savings_pct = savingsPct;
|
|
42
44
|
suggestions.push(`Potential Migration: Switching to ${bestAlt.provider}/${bestAlt.model} could reduce this prompt's cost by ${savingsPct.toFixed(0)}%.`);
|
|
43
45
|
}
|
|
44
46
|
}
|
|
@@ -46,7 +48,7 @@ export function runHeuristicAnalysis(usageRecord, variables) {
|
|
|
46
48
|
for (const v of variables) {
|
|
47
49
|
const rawPct = (v.token_count / totalTokens) * 100;
|
|
48
50
|
const pct = Math.min(rawPct, 100); // Cap at 100% for display sanity
|
|
49
|
-
|
|
51
|
+
|
|
50
52
|
const vAnalysis = {
|
|
51
53
|
variable_name: v.name,
|
|
52
54
|
token_count: v.token_count,
|
package/lib/engine/db.js
CHANGED
|
@@ -54,7 +54,7 @@ export async function initDb(config) {
|
|
|
54
54
|
*/
|
|
55
55
|
async function runMigrations(type, database, schemaName = '') {
|
|
56
56
|
const prefix = (type === 'postgres' && schemaName) ? `${schemaName}.` : '';
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
if (type === 'sqlite') {
|
|
59
59
|
// 1. Add org_id and project_id to usage_data if they don't exist
|
|
60
60
|
const columns = await database.all(`PRAGMA table_info(usage_data)`);
|
|
@@ -78,6 +78,11 @@ async function runMigrations(type, database, schemaName = '') {
|
|
|
78
78
|
const varColumns = await database.all(`PRAGMA table_info(prompt_variables)`);
|
|
79
79
|
const hasOriginalContent = varColumns.some(c => c.name === 'original_content');
|
|
80
80
|
if (!hasOriginalContent) await database.exec(`ALTER TABLE prompt_variables ADD COLUMN original_content TEXT`);
|
|
81
|
+
|
|
82
|
+
// Check explain_plans
|
|
83
|
+
const planColumns = await database.all(`PRAGMA table_info(explain_plans)`);
|
|
84
|
+
const hasMceAlternatives = planColumns.some(c => c.name === 'mce_alternatives');
|
|
85
|
+
if (!hasMceAlternatives) await database.exec(`ALTER TABLE explain_plans ADD COLUMN mce_alternatives TEXT`);
|
|
81
86
|
} else {
|
|
82
87
|
// Postgres migration check
|
|
83
88
|
try {
|
|
@@ -132,6 +137,9 @@ async function runMigrations(type, database, schemaName = '') {
|
|
|
132
137
|
if (!planCols.includes('variable_analysis')) {
|
|
133
138
|
await database.query(`ALTER TABLE ${prefix}explain_plans ADD COLUMN variable_analysis TEXT`);
|
|
134
139
|
}
|
|
140
|
+
if (!planCols.includes('mce_alternatives')) {
|
|
141
|
+
await database.query(`ALTER TABLE ${prefix}explain_plans ADD COLUMN mce_alternatives TEXT`);
|
|
142
|
+
}
|
|
135
143
|
} catch (e) {
|
|
136
144
|
console.warn('[TokenTalos] Migration check failed (Postgres):', e.message);
|
|
137
145
|
}
|
|
@@ -265,7 +273,8 @@ function getSchema(type, schemaName = '') {
|
|
|
265
273
|
mce_best_alternative_model TEXT,
|
|
266
274
|
mce_best_alternative_provider TEXT,
|
|
267
275
|
mce_best_alternative_cost REAL,
|
|
268
|
-
mce_savings_pct REAL
|
|
276
|
+
mce_savings_pct REAL,
|
|
277
|
+
mce_alternatives TEXT -- JSON representation of possible model alternatives
|
|
269
278
|
);
|
|
270
279
|
|
|
271
280
|
CREATE TABLE IF NOT EXISTS ${prefix}opv_results (
|
package/lib/engine/index.js
CHANGED
|
@@ -29,16 +29,16 @@ export class TokenTalosEngine {
|
|
|
29
29
|
|
|
30
30
|
async init() {
|
|
31
31
|
if (this.initialized) return;
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
// Default to SQLite if not specified
|
|
34
34
|
const dbConfig = {
|
|
35
35
|
databaseType: this.config.databaseType || 'sqlite',
|
|
36
|
-
sqlitePath: this.config.sqlitePath || ':memory:',
|
|
36
|
+
sqlitePath: this.config.sqlitePath || ':memory:',
|
|
37
37
|
...this.config
|
|
38
38
|
};
|
|
39
39
|
|
|
40
40
|
this.db = await initDb(dbConfig);
|
|
41
|
-
|
|
41
|
+
|
|
42
42
|
// Create default org if it doesn't exist
|
|
43
43
|
await this.ensureDefaultOrg();
|
|
44
44
|
|
|
@@ -52,10 +52,10 @@ export class TokenTalosEngine {
|
|
|
52
52
|
try {
|
|
53
53
|
// 1. Ensure Default Organization
|
|
54
54
|
await db.run('INSERT INTO organizations (id, name) VALUES (?, ?) ON CONFLICT DO NOTHING', ['default_org', 'Default Organization']);
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
// 2. Ensure Default User (for local mode)
|
|
57
57
|
await db.run('INSERT INTO users (id, email, name) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['local_user', 'dev@tokentalos.local', 'Local Developer']);
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
// 3. Ensure Membership
|
|
60
60
|
await db.run('INSERT INTO organization_members (org_id, user_id, role) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['default_org', 'local_user', 'admin']);
|
|
61
61
|
} catch (e) {
|
|
@@ -104,7 +104,7 @@ export class TokenTalosEngine {
|
|
|
104
104
|
const finalModel = model || this.config.defaultModel || 'gemini-3-flash-preview';
|
|
105
105
|
|
|
106
106
|
const prompt = this.createPrompt(finalProvider, finalModel);
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
// Add processed parts to the prompt
|
|
109
109
|
for (const key in processedParts) {
|
|
110
110
|
if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
|
|
@@ -131,7 +131,7 @@ export class TokenTalosEngine {
|
|
|
131
131
|
// Log the cache hit as a usage event with 0 cost but record saved tokens
|
|
132
132
|
const hitId = uuidv4();
|
|
133
133
|
const db = this.getDb();
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
// Calculate what it WOULD have cost
|
|
136
136
|
const [savedInputCost] = calculator.calculateCost(finalProvider, finalModel, prompt.getTrackingData().total_tokens, 0);
|
|
137
137
|
const trackingData = prompt.getTrackingData();
|
|
@@ -140,7 +140,7 @@ export class TokenTalosEngine {
|
|
|
140
140
|
INSERT INTO usage_data (id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, total_tokens, saved_tokens, saved_cost, input_cost, total_cost, endpoint, latency_ms, timestamp)
|
|
141
141
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
142
142
|
`, [
|
|
143
|
-
hitId, finalOrgId, finalProjectId, 'cache_hit', finalProvider, finalModel, fullPromptString, cached.response_content, 0, 0,
|
|
143
|
+
hitId, finalOrgId, finalProjectId, 'cache_hit', finalProvider, finalModel, fullPromptString, cached.response_content, 0, 0,
|
|
144
144
|
prompt.getTrackingData().total_tokens, savedInputCost, 0, 0, endpoint, Date.now() - startTime, trackingData.timestamp
|
|
145
145
|
]);
|
|
146
146
|
|
|
@@ -156,10 +156,10 @@ export class TokenTalosEngine {
|
|
|
156
156
|
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
157
157
|
VALUES (?, ?, ?, ?, ?)
|
|
158
158
|
`, [
|
|
159
|
-
hitId,
|
|
160
|
-
action.target,
|
|
161
|
-
action.type,
|
|
162
|
-
action.method || null,
|
|
159
|
+
hitId,
|
|
160
|
+
action.target,
|
|
161
|
+
action.type,
|
|
162
|
+
action.method || null,
|
|
163
163
|
JSON.stringify(action)
|
|
164
164
|
]);
|
|
165
165
|
}
|
|
@@ -211,10 +211,10 @@ export class TokenTalosEngine {
|
|
|
211
211
|
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
212
212
|
VALUES (?, ?, ?, ?, ?)
|
|
213
213
|
`, [
|
|
214
|
-
trackingData.id,
|
|
215
|
-
action.target,
|
|
216
|
-
action.type,
|
|
217
|
-
action.method || null,
|
|
214
|
+
trackingData.id,
|
|
215
|
+
action.target,
|
|
216
|
+
action.type,
|
|
217
|
+
action.method || null,
|
|
218
218
|
JSON.stringify(action)
|
|
219
219
|
]);
|
|
220
220
|
}
|
|
@@ -234,8 +234,8 @@ export class TokenTalosEngine {
|
|
|
234
234
|
}
|
|
235
235
|
|
|
236
236
|
// 5. Heuristic Analysis
|
|
237
|
-
const analysis = runHeuristicAnalysis({
|
|
238
|
-
total_tokens: result.input_tokens + result.output_tokens,
|
|
237
|
+
const analysis = runHeuristicAnalysis({
|
|
238
|
+
total_tokens: result.input_tokens + result.output_tokens,
|
|
239
239
|
input_tokens: result.input_tokens,
|
|
240
240
|
output_tokens: result.output_tokens,
|
|
241
241
|
total_cost: inputCost + outputCost,
|
|
@@ -248,20 +248,21 @@ export class TokenTalosEngine {
|
|
|
248
248
|
INSERT INTO explain_plans (
|
|
249
249
|
id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
|
|
250
250
|
estimated_savings_pct, estimated_savings_usd,
|
|
251
|
-
mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
|
|
252
|
-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
251
|
+
mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct, mce_alternatives
|
|
252
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
253
253
|
`, [
|
|
254
|
-
planId,
|
|
255
|
-
trackingData.id,
|
|
256
|
-
JSON.stringify(analysis.variable_analysis),
|
|
257
|
-
JSON.stringify(analysis.detected_issues),
|
|
254
|
+
planId,
|
|
255
|
+
trackingData.id,
|
|
256
|
+
JSON.stringify(analysis.variable_analysis),
|
|
257
|
+
JSON.stringify(analysis.detected_issues),
|
|
258
258
|
JSON.stringify(analysis.optimization_suggestions),
|
|
259
|
-
analysis.estimated_savings_pct,
|
|
259
|
+
analysis.estimated_savings_pct,
|
|
260
260
|
analysis.estimated_savings_usd,
|
|
261
261
|
analysis.mce_best_alternative_model || null,
|
|
262
262
|
analysis.mce_best_alternative_provider || null,
|
|
263
263
|
analysis.mce_best_alternative_cost || 0,
|
|
264
|
-
analysis.mce_savings_pct || 0
|
|
264
|
+
analysis.mce_savings_pct || 0,
|
|
265
|
+
analysis.mce_alternatives ? JSON.stringify(analysis.mce_alternatives) : null
|
|
265
266
|
]);
|
|
266
267
|
}
|
|
267
268
|
|
package/lib/engine/pricing.js
CHANGED
|
@@ -50,9 +50,26 @@ export const PRICING_DATA = {
|
|
|
50
50
|
}
|
|
51
51
|
};
|
|
52
52
|
|
|
53
|
+
// Map common provider aliases to their canonical PRICING_DATA keys
|
|
54
|
+
const PROVIDER_ALIASES = {
|
|
55
|
+
'gemini': 'google',
|
|
56
|
+
'gcp': 'google',
|
|
57
|
+
'openai-api': 'openai',
|
|
58
|
+
'claude': 'anthropic',
|
|
59
|
+
'aws': 'amazon',
|
|
60
|
+
'bedrock': 'amazon',
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
function normalizeProvider(provider) {
|
|
64
|
+
if (!provider) return provider;
|
|
65
|
+
const lower = provider.toLowerCase();
|
|
66
|
+
return PROVIDER_ALIASES[lower] || lower;
|
|
67
|
+
}
|
|
68
|
+
|
|
53
69
|
export class CostCalculator {
|
|
54
70
|
calculateCost(provider, model, inputTokens, outputTokens) {
|
|
55
|
-
const
|
|
71
|
+
const normalizedProvider = normalizeProvider(provider);
|
|
72
|
+
const providerPricing = PRICING_DATA[normalizedProvider];
|
|
56
73
|
if (!providerPricing) return [0, 0];
|
|
57
74
|
|
|
58
75
|
const modelPricing = providerPricing[model.toLowerCase()];
|
|
@@ -66,10 +83,16 @@ export class CostCalculator {
|
|
|
66
83
|
}
|
|
67
84
|
|
|
68
85
|
getBestAlternative(provider, model, inputTokens, outputTokens, preferredProviders = []) {
|
|
69
|
-
|
|
70
|
-
|
|
86
|
+
const alternatives = this.getAllAlternatives(provider, model, inputTokens, outputTokens, preferredProviders);
|
|
87
|
+
if (alternatives.length === 0) return null;
|
|
88
|
+
return alternatives[0];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
getAllAlternatives(provider, model, inputTokens, outputTokens, preferredProviders = []) {
|
|
92
|
+
const normalizedProvider = normalizeProvider(provider);
|
|
93
|
+
let alternatives = [];
|
|
94
|
+
let currentCost = this.calculateCost(normalizedProvider, model, inputTokens, outputTokens).reduce((a, b) => a + b, 0);
|
|
71
95
|
|
|
72
|
-
// If no preference, use all available in PRICING_DATA
|
|
73
96
|
const targets = preferredProviders.length > 0 ? preferredProviders : Object.keys(PRICING_DATA);
|
|
74
97
|
|
|
75
98
|
for (const targetProvider of targets) {
|
|
@@ -78,22 +101,23 @@ export class CostCalculator {
|
|
|
78
101
|
|
|
79
102
|
for (const targetModel in models) {
|
|
80
103
|
const pricing = models[targetModel];
|
|
81
|
-
// Skip current model
|
|
82
|
-
if ((targetProvider ===
|
|
104
|
+
// Skip the current model (compare against normalized provider)
|
|
105
|
+
if ((targetProvider === normalizedProvider && targetModel === model.toLowerCase()) || pricing.deprecated) continue;
|
|
83
106
|
|
|
84
107
|
const [altInput, altOutput] = this.calculateCost(targetProvider, targetModel, inputTokens, outputTokens);
|
|
85
108
|
const altTotal = altInput + altOutput;
|
|
86
109
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
}
|
|
110
|
+
alternatives.push({
|
|
111
|
+
model: targetModel,
|
|
112
|
+
provider: targetProvider,
|
|
113
|
+
cost: altTotal,
|
|
114
|
+
savingsPct: currentCost > 0 ? ((currentCost - altTotal) / currentCost) * 100 : 0
|
|
115
|
+
});
|
|
94
116
|
}
|
|
95
117
|
}
|
|
96
|
-
|
|
118
|
+
|
|
119
|
+
// Sort by cost ascending
|
|
120
|
+
return alternatives.sort((a, b) => a.cost - b.cost);
|
|
97
121
|
}
|
|
98
122
|
}
|
|
99
123
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@meller/tokentalos",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6",
|
|
4
4
|
"description": "Token Talos: The ORM for LLMs. A standalone gateway and library for cost-optimized, secure, and tracked prompt orchestration.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"publishConfig": {
|
|
@@ -57,8 +57,10 @@
|
|
|
57
57
|
"js-tiktoken": "^1.0.7",
|
|
58
58
|
"openai": "^6.22.0",
|
|
59
59
|
"pg": "^8.18.0",
|
|
60
|
-
"sqlite": "^5.0.1",
|
|
61
|
-
"sqlite3": "^5.1.6",
|
|
62
60
|
"uuid": "^9.0.1"
|
|
61
|
+
},
|
|
62
|
+
"optionalDependencies": {
|
|
63
|
+
"sqlite": "^5.0.1",
|
|
64
|
+
"sqlite3": "^5.1.6"
|
|
63
65
|
}
|
|
64
|
-
}
|
|
66
|
+
}
|