@meller/tokentalos 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,12 +30,12 @@ router.post('/ingest', authMiddleware, async (req, res) => {
30
30
  const totalTokens = (data.input_tokens || 0) + (data.output_tokens || 0);
31
31
  const calculator = getCostCalculator();
32
32
  const [inputCost, outputCost] = calculator.calculateCost(
33
- provider,
34
- model,
35
- data.input_tokens || 0,
33
+ provider,
34
+ model,
35
+ data.input_tokens || 0,
36
36
  data.output_tokens || 0
37
37
  );
38
-
38
+
39
39
  const totalCost = inputCost + outputCost;
40
40
  const limitExceeded = totalTokens > (config.maxTokens || 32000);
41
41
 
@@ -46,7 +46,7 @@ router.post('/ingest', authMiddleware, async (req, res) => {
46
46
  input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp
47
47
  ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
48
48
  `, [
49
- usageId, orgId, projectId, 'ingested', provider, model, data.full_prompt || null, data.response_content || null,
49
+ usageId, orgId, projectId, 'ingested', provider, model, data.full_prompt || null, data.response_content || null,
50
50
  data.input_tokens || 0, data.output_tokens || 0,
51
51
  totalTokens, inputCost, outputCost, totalCost, data.endpoint, data.latency_ms,
52
52
  limitExceeded ? 1 : 0, data.timestamp || new Date().toISOString()
@@ -58,12 +58,12 @@ router.post('/ingest', authMiddleware, async (req, res) => {
58
58
  INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
59
59
  VALUES (?, ?, ?, ?, ?, ?, ?)
60
60
  `, [
61
- usageId,
62
- v.name,
63
- v.content || '',
61
+ usageId,
62
+ v.name,
63
+ v.content || '',
64
64
  v.original_content || v.content || '',
65
- v.token_count || 0,
66
- v.char_count || 0,
65
+ v.token_count || 0,
66
+ v.char_count || 0,
67
67
  v.position || 0
68
68
  ]);
69
69
 
@@ -87,10 +87,10 @@ router.post('/ingest', authMiddleware, async (req, res) => {
87
87
  INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
88
88
  VALUES (?, ?, ?, ?, ?)
89
89
  `, [
90
- usageId,
91
- action.target,
92
- action.type,
93
- action.method || null,
90
+ usageId,
91
+ action.target,
92
+ action.type,
93
+ action.method || null,
94
94
  JSON.stringify(action)
95
95
  ]);
96
96
  }
@@ -113,12 +113,12 @@ router.get('/recent', authMiddleware, async (req, res) => {
113
113
  try {
114
114
  let sql = 'SELECT * FROM usage_data WHERE org_id = ?';
115
115
  let params = [orgId];
116
-
116
+
117
117
  if (projectId) {
118
118
  sql += ' AND project_id = ?';
119
119
  params.push(projectId);
120
120
  }
121
-
121
+
122
122
  sql += ' ORDER BY timestamp DESC LIMIT ?';
123
123
  params.push(parseInt(limit));
124
124
 
@@ -130,7 +130,7 @@ router.get('/recent', authMiddleware, async (req, res) => {
130
130
  FROM prompt_variables
131
131
  WHERE usage_id = ?
132
132
  `, [record.id]);
133
-
133
+
134
134
  record.explain_plan = await db.get(`
135
135
  SELECT * FROM explain_plans WHERE usage_id = ?
136
136
  `, [record.id]);
@@ -139,13 +139,23 @@ router.get('/recent', authMiddleware, async (req, res) => {
139
139
  if (record.explain_plan.variable_analysis) record.explain_plan.variable_analysis = JSON.parse(record.explain_plan.variable_analysis);
140
140
  if (record.explain_plan.detected_issues) record.explain_plan.detected_issues = JSON.parse(record.explain_plan.detected_issues);
141
141
  if (record.explain_plan.optimization_suggestions) record.explain_plan.optimization_suggestions = JSON.parse(record.explain_plan.optimization_suggestions);
142
-
142
+ if (record.explain_plan.mce_alternatives && typeof record.explain_plan.mce_alternatives === 'string') {
143
+ record.explain_plan.mce_alternatives = JSON.parse(record.explain_plan.mce_alternatives);
144
+ }
145
+
143
146
  // On-the-fly MCE calculation if missing from DB (for existing records)
144
- if (!record.explain_plan.mce_best_alternative_model) {
147
+ if (!record.explain_plan.mce_best_alternative_model || !record.explain_plan.mce_alternatives) {
145
148
  const calculator = getCostCalculator();
146
149
  const bestAlt = calculator.getBestAlternative(record.provider, record.model, record.input_tokens, record.output_tokens);
150
+ const allAlts = calculator.getAllAlternatives(record.provider, record.model, record.input_tokens, record.output_tokens);
151
+
152
+ record.explain_plan.mce_alternatives = allAlts;
153
+
147
154
  if (bestAlt) {
148
- const savingsPct = record.total_cost > 0 ? ((record.total_cost - bestAlt.cost) / record.total_cost) * 100 : 0;
155
+ // Use calculated cost from tokens (record.total_cost may be 0 for passively ingested records)
156
+ const [calcInput, calcOutput] = calculator.calculateCost(record.provider, record.model, record.input_tokens, record.output_tokens);
157
+ const currentCost = (calcInput + calcOutput) > 0 ? (calcInput + calcOutput) : (record.total_cost || 0);
158
+ const savingsPct = currentCost > 0 ? ((currentCost - bestAlt.cost) / currentCost) * 100 : 0;
149
159
  if (savingsPct > 10) {
150
160
  record.explain_plan.mce_best_alternative_model = bestAlt.model;
151
161
  record.explain_plan.mce_best_alternative_provider = bestAlt.provider;
@@ -169,7 +179,7 @@ router.post('/execute', authMiddleware, async (req, res) => {
169
179
  try {
170
180
  const engine = new TokenTalosEngine(config);
171
181
  await engine.init();
172
-
182
+
173
183
  const result = await engine.execute({
174
184
  ...req.body,
175
185
  orgId: req.orgId,
@@ -188,14 +198,14 @@ router.post('/execute', authMiddleware, async (req, res) => {
188
198
  router.post('/prompt/construct', authMiddleware, async (req, res) => {
189
199
  const { provider, model, parts, endpoint, projectId } = req.body;
190
200
  const orgId = req.orgId;
191
-
201
+
192
202
  const { processedParts, metadata } = await processPromptParts(parts, config);
193
203
 
194
204
  const finalProvider = provider || config.llmProvider || 'gemini';
195
205
  const finalModel = model || config.defaultModel || 'gemini-3-flash-preview';
196
206
 
197
207
  const prompt = new TokenTalosPrompt(finalProvider, finalModel);
198
-
208
+
199
209
  for (const key in processedParts) {
200
210
  if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
201
211
  else if (key === 'context') prompt.addContext(processedParts[key], parts[key]);
@@ -206,7 +216,7 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
206
216
 
207
217
  const messages = prompt.toMessages();
208
218
  const trackingData = prompt.getTrackingData();
209
-
219
+
210
220
  const maxTokens = config.maxTokens || 32000;
211
221
  const thresholdAction = config.thresholdAction || 'warning';
212
222
 
@@ -248,10 +258,10 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
248
258
  INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
249
259
  VALUES (?, ?, ?, ?, ?)
250
260
  `, [
251
- trackingData.id,
252
- action.target,
253
- action.type,
254
- action.method || null,
261
+ trackingData.id,
262
+ action.target,
263
+ action.type,
264
+ action.method || null,
255
265
  JSON.stringify(action)
256
266
  ]);
257
267
 
@@ -267,8 +277,8 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
267
277
  }
268
278
 
269
279
  // 3. Heuristic Analysis
270
- const analysis = runHeuristicAnalysis({
271
- total_tokens: trackingData.total_tokens,
280
+ const analysis = runHeuristicAnalysis({
281
+ total_tokens: trackingData.total_tokens,
272
282
  total_cost: inputCost,
273
283
  provider: finalProvider,
274
284
  model: finalModel
@@ -280,20 +290,21 @@ router.post('/prompt/construct', authMiddleware, async (req, res) => {
280
290
  INSERT INTO explain_plans (
281
291
  id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
282
292
  estimated_savings_pct, estimated_savings_usd,
283
- mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
284
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
293
+ mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct, mce_alternatives
294
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
285
295
  `, [
286
- planId,
287
- trackingData.id,
288
- JSON.stringify(analysis.variable_analysis),
289
- JSON.stringify(analysis.detected_issues),
296
+ planId,
297
+ trackingData.id,
298
+ JSON.stringify(analysis.variable_analysis),
299
+ JSON.stringify(analysis.detected_issues),
290
300
  JSON.stringify(analysis.optimization_suggestions),
291
- analysis.estimated_savings_pct,
301
+ analysis.estimated_savings_pct,
292
302
  analysis.estimated_savings_usd,
293
303
  analysis.mce_best_alternative_model || null,
294
304
  analysis.mce_best_alternative_provider || null,
295
305
  analysis.mce_best_alternative_cost || 0,
296
- analysis.mce_savings_pct || 0
306
+ analysis.mce_savings_pct || 0,
307
+ analysis.mce_alternatives ? JSON.stringify(analysis.mce_alternatives) : null
297
308
  ]);
298
309
  }
299
310
 
@@ -9,7 +9,7 @@ export async function runAIAnalysis(config, usageRecord, variables) {
9
9
  const model = genAI.getGenerativeModel({ model: 'gemini-1.5-flash' });
10
10
 
11
11
  const variableInfo = variables.map(v => `${v.name} (${v.token_count} tokens): "${v.content.substring(0, 100)}..."`).join('\n');
12
-
12
+
13
13
  const prompt = `
14
14
  Analyze this LLM prompt structure and suggest optimizations to reduce costs while maintaining performance.
15
15
 
@@ -31,29 +31,38 @@ export async function runAIAnalysis(config, usageRecord, variables) {
31
31
  const result = await model.generateContent(prompt);
32
32
  const response = await result.response;
33
33
  const text = response.text();
34
-
34
+
35
35
  // Attempt to parse JSON from response
36
36
  try {
37
37
  const jsonStart = text.indexOf('{');
38
38
  const jsonEnd = text.lastIndexOf('}') + 1;
39
39
  const analysis = JSON.parse(text.substring(jsonStart, jsonEnd));
40
-
40
+
41
41
  const calculator = getCostCalculator();
42
42
  const mceResult = calculator.getBestAlternative(
43
- usageRecord.provider,
44
- usageRecord.model,
45
- usageRecord.input_tokens,
43
+ usageRecord.provider,
44
+ usageRecord.model,
45
+ usageRecord.input_tokens,
46
46
  usageRecord.output_tokens,
47
47
  config.comparisonProviders
48
48
  );
49
-
49
+ const allAlts = calculator.getAllAlternatives(
50
+ usageRecord.provider,
51
+ usageRecord.model,
52
+ usageRecord.input_tokens,
53
+ usageRecord.output_tokens,
54
+ config.comparisonProviders
55
+ );
56
+
57
+ analysis.mce_alternatives = allAlts;
58
+
50
59
  if (mceResult) {
51
60
  analysis.mce_best_alternative_model = mceResult.model;
52
61
  analysis.mce_best_alternative_provider = mceResult.provider;
53
62
  analysis.mce_best_alternative_cost = mceResult.cost;
54
63
  analysis.mce_savings_pct = ((usageRecord.total_cost - mceResult.cost) / usageRecord.total_cost) * 100;
55
64
  }
56
-
65
+
57
66
  return analysis;
58
67
  } catch (err) {
59
68
  console.warn('AI analysis JSON parsing failed:', err);
@@ -19,7 +19,8 @@ export function runHeuristicAnalysis(usageRecord, variables) {
19
19
  const outputTokens = usageRecord.output_tokens || Math.floor(totalTokens * 0.2);
20
20
 
21
21
  const bestAlt = calculator.getBestAlternative(provider, model, inputTokens, outputTokens);
22
-
22
+ const allAlts = calculator.getAllAlternatives(provider, model, inputTokens, outputTokens);
23
+
23
24
  // Check if current model is deprecated
24
25
  const currentPricing = PRICING_DATA[provider.toLowerCase()]?.[model.toLowerCase()];
25
26
  if (currentPricing?.deprecated) {
@@ -27,18 +28,19 @@ export function runHeuristicAnalysis(usageRecord, variables) {
27
28
  suggestions.push(`Migrate to a current stable model (e.g., Gemini 2.0 Flash) to ensure service continuity.`);
28
29
  }
29
30
 
30
- let mceResult = {};
31
+ let mceResult = {
32
+ mce_alternatives: allAlts
33
+ };
34
+
31
35
  if (bestAlt) {
32
36
  const currentCost = usageRecord.total_cost || 0;
33
37
  const savingsPct = currentCost > 0 ? ((currentCost - bestAlt.cost) / currentCost) * 100 : 0;
34
-
38
+
35
39
  if (savingsPct > 10) { // Only suggest if savings are > 10%
36
- mceResult = {
37
- mce_best_alternative_model: bestAlt.model,
38
- mce_best_alternative_provider: bestAlt.provider,
39
- mce_best_alternative_cost: bestAlt.cost,
40
- mce_savings_pct: savingsPct
41
- };
40
+ mceResult.mce_best_alternative_model = bestAlt.model;
41
+ mceResult.mce_best_alternative_provider = bestAlt.provider;
42
+ mceResult.mce_best_alternative_cost = bestAlt.cost;
43
+ mceResult.mce_savings_pct = savingsPct;
42
44
  suggestions.push(`Potential Migration: Switching to ${bestAlt.provider}/${bestAlt.model} could reduce this prompt's cost by ${savingsPct.toFixed(0)}%.`);
43
45
  }
44
46
  }
@@ -46,7 +48,7 @@ export function runHeuristicAnalysis(usageRecord, variables) {
46
48
  for (const v of variables) {
47
49
  const rawPct = (v.token_count / totalTokens) * 100;
48
50
  const pct = Math.min(rawPct, 100); // Cap at 100% for display sanity
49
-
51
+
50
52
  const vAnalysis = {
51
53
  variable_name: v.name,
52
54
  token_count: v.token_count,
package/lib/engine/db.js CHANGED
@@ -54,7 +54,7 @@ export async function initDb(config) {
54
54
  */
55
55
  async function runMigrations(type, database, schemaName = '') {
56
56
  const prefix = (type === 'postgres' && schemaName) ? `${schemaName}.` : '';
57
-
57
+
58
58
  if (type === 'sqlite') {
59
59
  // 1. Add org_id and project_id to usage_data if they don't exist
60
60
  const columns = await database.all(`PRAGMA table_info(usage_data)`);
@@ -78,6 +78,11 @@ async function runMigrations(type, database, schemaName = '') {
78
78
  const varColumns = await database.all(`PRAGMA table_info(prompt_variables)`);
79
79
  const hasOriginalContent = varColumns.some(c => c.name === 'original_content');
80
80
  if (!hasOriginalContent) await database.exec(`ALTER TABLE prompt_variables ADD COLUMN original_content TEXT`);
81
+
82
+ // Check explain_plans
83
+ const planColumns = await database.all(`PRAGMA table_info(explain_plans)`);
84
+ const hasMceAlternatives = planColumns.some(c => c.name === 'mce_alternatives');
85
+ if (!hasMceAlternatives) await database.exec(`ALTER TABLE explain_plans ADD COLUMN mce_alternatives TEXT`);
81
86
  } else {
82
87
  // Postgres migration check
83
88
  try {
@@ -132,6 +137,9 @@ async function runMigrations(type, database, schemaName = '') {
132
137
  if (!planCols.includes('variable_analysis')) {
133
138
  await database.query(`ALTER TABLE ${prefix}explain_plans ADD COLUMN variable_analysis TEXT`);
134
139
  }
140
+ if (!planCols.includes('mce_alternatives')) {
141
+ await database.query(`ALTER TABLE ${prefix}explain_plans ADD COLUMN mce_alternatives TEXT`);
142
+ }
135
143
  } catch (e) {
136
144
  console.warn('[TokenTalos] Migration check failed (Postgres):', e.message);
137
145
  }
@@ -265,7 +273,8 @@ function getSchema(type, schemaName = '') {
265
273
  mce_best_alternative_model TEXT,
266
274
  mce_best_alternative_provider TEXT,
267
275
  mce_best_alternative_cost REAL,
268
- mce_savings_pct REAL
276
+ mce_savings_pct REAL,
277
+ mce_alternatives TEXT -- JSON representation of possible model alternatives
269
278
  );
270
279
 
271
280
  CREATE TABLE IF NOT EXISTS ${prefix}opv_results (
@@ -29,16 +29,16 @@ export class TokenTalosEngine {
29
29
 
30
30
  async init() {
31
31
  if (this.initialized) return;
32
-
32
+
33
33
  // Default to SQLite if not specified
34
34
  const dbConfig = {
35
35
  databaseType: this.config.databaseType || 'sqlite',
36
- sqlitePath: this.config.sqlitePath || ':memory:',
36
+ sqlitePath: this.config.sqlitePath || ':memory:',
37
37
  ...this.config
38
38
  };
39
39
 
40
40
  this.db = await initDb(dbConfig);
41
-
41
+
42
42
  // Create default org if it doesn't exist
43
43
  await this.ensureDefaultOrg();
44
44
 
@@ -52,10 +52,10 @@ export class TokenTalosEngine {
52
52
  try {
53
53
  // 1. Ensure Default Organization
54
54
  await db.run('INSERT INTO organizations (id, name) VALUES (?, ?) ON CONFLICT DO NOTHING', ['default_org', 'Default Organization']);
55
-
55
+
56
56
  // 2. Ensure Default User (for local mode)
57
57
  await db.run('INSERT INTO users (id, email, name) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['local_user', 'dev@tokentalos.local', 'Local Developer']);
58
-
58
+
59
59
  // 3. Ensure Membership
60
60
  await db.run('INSERT INTO organization_members (org_id, user_id, role) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['default_org', 'local_user', 'admin']);
61
61
  } catch (e) {
@@ -104,7 +104,7 @@ export class TokenTalosEngine {
104
104
  const finalModel = model || this.config.defaultModel || 'gemini-3-flash-preview';
105
105
 
106
106
  const prompt = this.createPrompt(finalProvider, finalModel);
107
-
107
+
108
108
  // Add processed parts to the prompt
109
109
  for (const key in processedParts) {
110
110
  if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
@@ -131,7 +131,7 @@ export class TokenTalosEngine {
131
131
  // Log the cache hit as a usage event with 0 cost but record saved tokens
132
132
  const hitId = uuidv4();
133
133
  const db = this.getDb();
134
-
134
+
135
135
  // Calculate what it WOULD have cost
136
136
  const [savedInputCost] = calculator.calculateCost(finalProvider, finalModel, prompt.getTrackingData().total_tokens, 0);
137
137
  const trackingData = prompt.getTrackingData();
@@ -140,7 +140,7 @@ export class TokenTalosEngine {
140
140
  INSERT INTO usage_data (id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, total_tokens, saved_tokens, saved_cost, input_cost, total_cost, endpoint, latency_ms, timestamp)
141
141
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
142
142
  `, [
143
- hitId, finalOrgId, finalProjectId, 'cache_hit', finalProvider, finalModel, fullPromptString, cached.response_content, 0, 0,
143
+ hitId, finalOrgId, finalProjectId, 'cache_hit', finalProvider, finalModel, fullPromptString, cached.response_content, 0, 0,
144
144
  prompt.getTrackingData().total_tokens, savedInputCost, 0, 0, endpoint, Date.now() - startTime, trackingData.timestamp
145
145
  ]);
146
146
 
@@ -156,10 +156,10 @@ export class TokenTalosEngine {
156
156
  INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
157
157
  VALUES (?, ?, ?, ?, ?)
158
158
  `, [
159
- hitId,
160
- action.target,
161
- action.type,
162
- action.method || null,
159
+ hitId,
160
+ action.target,
161
+ action.type,
162
+ action.method || null,
163
163
  JSON.stringify(action)
164
164
  ]);
165
165
  }
@@ -211,10 +211,10 @@ export class TokenTalosEngine {
211
211
  INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
212
212
  VALUES (?, ?, ?, ?, ?)
213
213
  `, [
214
- trackingData.id,
215
- action.target,
216
- action.type,
217
- action.method || null,
214
+ trackingData.id,
215
+ action.target,
216
+ action.type,
217
+ action.method || null,
218
218
  JSON.stringify(action)
219
219
  ]);
220
220
  }
@@ -234,8 +234,8 @@ export class TokenTalosEngine {
234
234
  }
235
235
 
236
236
  // 5. Heuristic Analysis
237
- const analysis = runHeuristicAnalysis({
238
- total_tokens: result.input_tokens + result.output_tokens,
237
+ const analysis = runHeuristicAnalysis({
238
+ total_tokens: result.input_tokens + result.output_tokens,
239
239
  input_tokens: result.input_tokens,
240
240
  output_tokens: result.output_tokens,
241
241
  total_cost: inputCost + outputCost,
@@ -248,20 +248,21 @@ export class TokenTalosEngine {
248
248
  INSERT INTO explain_plans (
249
249
  id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
250
250
  estimated_savings_pct, estimated_savings_usd,
251
- mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
252
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
251
+ mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct, mce_alternatives
252
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
253
253
  `, [
254
- planId,
255
- trackingData.id,
256
- JSON.stringify(analysis.variable_analysis),
257
- JSON.stringify(analysis.detected_issues),
254
+ planId,
255
+ trackingData.id,
256
+ JSON.stringify(analysis.variable_analysis),
257
+ JSON.stringify(analysis.detected_issues),
258
258
  JSON.stringify(analysis.optimization_suggestions),
259
- analysis.estimated_savings_pct,
259
+ analysis.estimated_savings_pct,
260
260
  analysis.estimated_savings_usd,
261
261
  analysis.mce_best_alternative_model || null,
262
262
  analysis.mce_best_alternative_provider || null,
263
263
  analysis.mce_best_alternative_cost || 0,
264
- analysis.mce_savings_pct || 0
264
+ analysis.mce_savings_pct || 0,
265
+ analysis.mce_alternatives ? JSON.stringify(analysis.mce_alternatives) : null
265
266
  ]);
266
267
  }
267
268
 
@@ -50,9 +50,26 @@ export const PRICING_DATA = {
50
50
  }
51
51
  };
52
52
 
53
+ // Map common provider aliases to their canonical PRICING_DATA keys
54
+ const PROVIDER_ALIASES = {
55
+ 'gemini': 'google',
56
+ 'gcp': 'google',
57
+ 'openai-api': 'openai',
58
+ 'claude': 'anthropic',
59
+ 'aws': 'amazon',
60
+ 'bedrock': 'amazon',
61
+ };
62
+
63
+ function normalizeProvider(provider) {
64
+ if (!provider) return provider;
65
+ const lower = provider.toLowerCase();
66
+ return PROVIDER_ALIASES[lower] || lower;
67
+ }
68
+
53
69
  export class CostCalculator {
54
70
  calculateCost(provider, model, inputTokens, outputTokens) {
55
- const providerPricing = PRICING_DATA[provider.toLowerCase()];
71
+ const normalizedProvider = normalizeProvider(provider);
72
+ const providerPricing = PRICING_DATA[normalizedProvider];
56
73
  if (!providerPricing) return [0, 0];
57
74
 
58
75
  const modelPricing = providerPricing[model.toLowerCase()];
@@ -66,10 +83,16 @@ export class CostCalculator {
66
83
  }
67
84
 
68
85
  getBestAlternative(provider, model, inputTokens, outputTokens, preferredProviders = []) {
69
- let bestAlt = null;
70
- let currentCost = this.calculateCost(provider, model, inputTokens, outputTokens).reduce((a, b) => a + b, 0);
86
+ const alternatives = this.getAllAlternatives(provider, model, inputTokens, outputTokens, preferredProviders);
87
+ if (alternatives.length === 0) return null;
88
+ return alternatives[0];
89
+ }
90
+
91
+ getAllAlternatives(provider, model, inputTokens, outputTokens, preferredProviders = []) {
92
+ const normalizedProvider = normalizeProvider(provider);
93
+ let alternatives = [];
94
+ let currentCost = this.calculateCost(normalizedProvider, model, inputTokens, outputTokens).reduce((a, b) => a + b, 0);
71
95
 
72
- // If no preference, use all available in PRICING_DATA
73
96
  const targets = preferredProviders.length > 0 ? preferredProviders : Object.keys(PRICING_DATA);
74
97
 
75
98
  for (const targetProvider of targets) {
@@ -78,22 +101,23 @@ export class CostCalculator {
78
101
 
79
102
  for (const targetModel in models) {
80
103
  const pricing = models[targetModel];
81
- // Skip current model or deprecated targets
82
- if ((targetProvider === provider.toLowerCase() && targetModel === model.toLowerCase()) || pricing.deprecated) continue;
104
+ // Skip the current model (compare against normalized provider)
105
+ if ((targetProvider === normalizedProvider && targetModel === model.toLowerCase()) || pricing.deprecated) continue;
83
106
 
84
107
  const [altInput, altOutput] = this.calculateCost(targetProvider, targetModel, inputTokens, outputTokens);
85
108
  const altTotal = altInput + altOutput;
86
109
 
87
- if (altTotal < currentCost && (!bestAlt || altTotal < bestAlt.cost)) {
88
- bestAlt = {
89
- model: targetModel,
90
- provider: targetProvider,
91
- cost: altTotal
92
- };
93
- }
110
+ alternatives.push({
111
+ model: targetModel,
112
+ provider: targetProvider,
113
+ cost: altTotal,
114
+ savingsPct: currentCost > 0 ? ((currentCost - altTotal) / currentCost) * 100 : 0
115
+ });
94
116
  }
95
117
  }
96
- return bestAlt;
118
+
119
+ // Sort by cost ascending
120
+ return alternatives.sort((a, b) => a.cost - b.cost);
97
121
  }
98
122
  }
99
123
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meller/tokentalos",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "Token Talos: The ORM for LLMs. A standalone gateway and library for cost-optimized, secure, and tracked prompt orchestration.",
5
5
  "type": "module",
6
6
  "publishConfig": {
@@ -57,8 +57,10 @@
57
57
  "js-tiktoken": "^1.0.7",
58
58
  "openai": "^6.22.0",
59
59
  "pg": "^8.18.0",
60
- "sqlite": "^5.0.1",
61
- "sqlite3": "^5.1.6",
62
60
  "uuid": "^9.0.1"
61
+ },
62
+ "optionalDependencies": {
63
+ "sqlite": "^5.0.1",
64
+ "sqlite3": "^5.1.6"
63
65
  }
64
- }
66
+ }