llmflow 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -0
- package/bin/llmflow.js +91 -0
- package/db.js +857 -0
- package/logger.js +122 -0
- package/otlp-export.js +564 -0
- package/otlp-logs.js +238 -0
- package/otlp-metrics.js +300 -0
- package/otlp.js +398 -0
- package/package.json +62 -0
- package/pricing.fallback.json +58 -0
- package/pricing.js +154 -0
- package/providers/anthropic.js +195 -0
- package/providers/azure.js +159 -0
- package/providers/base.js +145 -0
- package/providers/cohere.js +225 -0
- package/providers/gemini.js +278 -0
- package/providers/index.js +130 -0
- package/providers/ollama.js +36 -0
- package/providers/openai-compatible.js +77 -0
- package/providers/openai.js +217 -0
- package/providers/passthrough.js +573 -0
- package/public/app.js +1484 -0
- package/public/index.html +367 -0
- package/public/style.css +1152 -0
- package/server.js +1222 -0
package/otlp.js
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OTLP (OpenTelemetry Protocol) HTTP endpoint for LLMFlow
|
|
3
|
+
*
|
|
4
|
+
* Accepts OTLP/HTTP JSON traces and transforms them to LLMFlow span format.
|
|
5
|
+
* This allows users with existing OpenTelemetry/OpenLLMetry instrumentation
|
|
6
|
+
* to export traces directly to LLMFlow.
|
|
7
|
+
*
|
|
8
|
+
* Supports:
|
|
9
|
+
* - OTLP/HTTP JSON format (Content-Type: application/json)
|
|
10
|
+
* - gen_ai.* semantic conventions (OpenLLMetry)
|
|
11
|
+
* - Standard OTEL span attributes
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const db = require('./db');
|
|
15
|
+
const { calculateCost } = require('./pricing');
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Map gen_ai.system values to span types
|
|
19
|
+
*/
|
|
20
|
+
const PROVIDER_TO_SPAN_TYPE = {
|
|
21
|
+
'openai': 'llm',
|
|
22
|
+
'anthropic': 'llm',
|
|
23
|
+
'cohere': 'llm',
|
|
24
|
+
'bedrock': 'llm',
|
|
25
|
+
'azure': 'llm',
|
|
26
|
+
'google': 'llm',
|
|
27
|
+
'ollama': 'llm',
|
|
28
|
+
'groq': 'llm',
|
|
29
|
+
'together': 'llm',
|
|
30
|
+
'mistral': 'llm',
|
|
31
|
+
'replicate': 'llm',
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Map traceloop.span.kind to LLMFlow span types
|
|
36
|
+
*/
|
|
37
|
+
const TRACELOOP_KIND_TO_SPAN_TYPE = {
|
|
38
|
+
'workflow': 'trace',
|
|
39
|
+
'task': 'chain',
|
|
40
|
+
'agent': 'agent',
|
|
41
|
+
'tool': 'tool',
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Convert hex string to standard UUID format if needed
|
|
46
|
+
*/
|
|
47
|
+
function normalizeId(hexId) {
|
|
48
|
+
if (!hexId) return null;
|
|
49
|
+
// Remove any existing dashes and lowercase
|
|
50
|
+
const clean = hexId.replace(/-/g, '').toLowerCase();
|
|
51
|
+
// If it's already short enough, return as-is
|
|
52
|
+
if (clean.length <= 32) return clean;
|
|
53
|
+
return clean;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Extract attributes from OTLP KeyValue array format
|
|
58
|
+
* OTLP attributes are: [{ key: "foo", value: { stringValue: "bar" } }, ...]
|
|
59
|
+
*/
|
|
60
|
+
function extractAttributes(attrs) {
|
|
61
|
+
if (!attrs || !Array.isArray(attrs)) return {};
|
|
62
|
+
|
|
63
|
+
const result = {};
|
|
64
|
+
for (const attr of attrs) {
|
|
65
|
+
const key = attr.key;
|
|
66
|
+
const val = attr.value;
|
|
67
|
+
if (!val) continue;
|
|
68
|
+
|
|
69
|
+
// OTLP value types: stringValue, intValue, doubleValue, boolValue, arrayValue, kvlistValue
|
|
70
|
+
if (val.stringValue !== undefined) result[key] = val.stringValue;
|
|
71
|
+
else if (val.intValue !== undefined) result[key] = parseInt(val.intValue, 10);
|
|
72
|
+
else if (val.doubleValue !== undefined) result[key] = val.doubleValue;
|
|
73
|
+
else if (val.boolValue !== undefined) result[key] = val.boolValue;
|
|
74
|
+
else if (val.arrayValue?.values) {
|
|
75
|
+
result[key] = val.arrayValue.values.map(v =>
|
|
76
|
+
v.stringValue ?? v.intValue ?? v.doubleValue ?? v.boolValue ?? null
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Determine span type from OTEL attributes
|
|
85
|
+
*/
|
|
86
|
+
function determineSpanType(attrs) {
|
|
87
|
+
// Check for traceloop span kind first (LangChain, etc.)
|
|
88
|
+
const traceloopKind = attrs['traceloop.span.kind'];
|
|
89
|
+
if (traceloopKind && TRACELOOP_KIND_TO_SPAN_TYPE[traceloopKind]) {
|
|
90
|
+
return TRACELOOP_KIND_TO_SPAN_TYPE[traceloopKind];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Check for gen_ai.system (OpenLLMetry)
|
|
94
|
+
const genAiSystem = attrs['gen_ai.system'];
|
|
95
|
+
if (genAiSystem) {
|
|
96
|
+
return PROVIDER_TO_SPAN_TYPE[genAiSystem.toLowerCase()] || 'llm';
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Check for llm.request.type
|
|
100
|
+
const llmRequestType = attrs['llm.request.type'];
|
|
101
|
+
if (llmRequestType) {
|
|
102
|
+
return 'llm';
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Check for db.system (vector DBs)
|
|
106
|
+
const dbSystem = attrs['db.system'];
|
|
107
|
+
if (dbSystem) {
|
|
108
|
+
const vectorDbs = ['pinecone', 'chroma', 'weaviate', 'qdrant', 'milvus', 'pgvector'];
|
|
109
|
+
if (vectorDbs.some(v => dbSystem.toLowerCase().includes(v))) {
|
|
110
|
+
return 'retrieval';
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Check span name patterns
|
|
115
|
+
const spanName = attrs._spanName || '';
|
|
116
|
+
if (spanName.includes('embed')) return 'embedding';
|
|
117
|
+
if (spanName.includes('retriev') || spanName.includes('search')) return 'retrieval';
|
|
118
|
+
if (spanName.includes('agent')) return 'agent';
|
|
119
|
+
if (spanName.includes('tool') || spanName.includes('function')) return 'tool';
|
|
120
|
+
if (spanName.includes('chain')) return 'chain';
|
|
121
|
+
|
|
122
|
+
return 'custom';
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Extract model name from attributes
|
|
127
|
+
*/
|
|
128
|
+
function extractModel(attrs) {
|
|
129
|
+
return attrs['gen_ai.request.model']
|
|
130
|
+
|| attrs['gen_ai.response.model']
|
|
131
|
+
|| attrs['llm.model']
|
|
132
|
+
|| attrs['model']
|
|
133
|
+
|| null;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Extract token usage from attributes
|
|
138
|
+
*/
|
|
139
|
+
function extractTokens(attrs) {
|
|
140
|
+
return {
|
|
141
|
+
prompt: attrs['gen_ai.usage.prompt_tokens']
|
|
142
|
+
|| attrs['llm.usage.prompt_tokens']
|
|
143
|
+
|| attrs['llm.token_count.prompt']
|
|
144
|
+
|| 0,
|
|
145
|
+
completion: attrs['gen_ai.usage.completion_tokens']
|
|
146
|
+
|| attrs['llm.usage.completion_tokens']
|
|
147
|
+
|| attrs['llm.token_count.completion']
|
|
148
|
+
|| 0,
|
|
149
|
+
total: attrs['gen_ai.usage.total_tokens']
|
|
150
|
+
|| attrs['llm.usage.total_tokens']
|
|
151
|
+
|| attrs['llm.token_count.total']
|
|
152
|
+
|| 0
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Extract input/output from attributes or events
|
|
158
|
+
*/
|
|
159
|
+
function extractIO(attrs, events) {
|
|
160
|
+
let input = null;
|
|
161
|
+
let output = null;
|
|
162
|
+
|
|
163
|
+
// Try gen_ai.prompt / gen_ai.completion (OpenLLMetry)
|
|
164
|
+
if (attrs['gen_ai.prompt']) {
|
|
165
|
+
try {
|
|
166
|
+
input = typeof attrs['gen_ai.prompt'] === 'string'
|
|
167
|
+
? JSON.parse(attrs['gen_ai.prompt'])
|
|
168
|
+
: attrs['gen_ai.prompt'];
|
|
169
|
+
} catch {
|
|
170
|
+
input = { prompt: attrs['gen_ai.prompt'] };
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (attrs['gen_ai.completion']) {
|
|
175
|
+
try {
|
|
176
|
+
output = typeof attrs['gen_ai.completion'] === 'string'
|
|
177
|
+
? JSON.parse(attrs['gen_ai.completion'])
|
|
178
|
+
: attrs['gen_ai.completion'];
|
|
179
|
+
} catch {
|
|
180
|
+
output = { completion: attrs['gen_ai.completion'] };
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Check events for prompt/completion data
|
|
185
|
+
if (events && events.length > 0) {
|
|
186
|
+
for (const event of events) {
|
|
187
|
+
const eventAttrs = extractAttributes(event.attributes);
|
|
188
|
+
if (event.name === 'gen_ai.content.prompt' || event.name?.includes('prompt')) {
|
|
189
|
+
input = eventAttrs;
|
|
190
|
+
}
|
|
191
|
+
if (event.name === 'gen_ai.content.completion' || event.name?.includes('completion')) {
|
|
192
|
+
output = eventAttrs;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return { input, output };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Convert nanoseconds timestamp to milliseconds
|
|
202
|
+
*/
|
|
203
|
+
function nanoToMs(nanoStr) {
|
|
204
|
+
if (!nanoStr) return Date.now();
|
|
205
|
+
const nano = BigInt(nanoStr);
|
|
206
|
+
return Number(nano / BigInt(1000000));
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Transform a single OTLP span to LLMFlow format
|
|
211
|
+
*/
|
|
212
|
+
function transformSpan(span, resourceAttrs, scopeAttrs) {
|
|
213
|
+
const attrs = {
|
|
214
|
+
...extractAttributes(span.attributes),
|
|
215
|
+
_spanName: span.name,
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
const traceId = normalizeId(span.traceId);
|
|
219
|
+
const spanId = normalizeId(span.spanId);
|
|
220
|
+
const parentId = span.parentSpanId ? normalizeId(span.parentSpanId) : null;
|
|
221
|
+
|
|
222
|
+
const startTimeMs = nanoToMs(span.startTimeUnixNano);
|
|
223
|
+
const endTimeMs = nanoToMs(span.endTimeUnixNano);
|
|
224
|
+
const durationMs = endTimeMs - startTimeMs;
|
|
225
|
+
|
|
226
|
+
const spanType = determineSpanType(attrs);
|
|
227
|
+
const model = extractModel(attrs);
|
|
228
|
+
const tokens = extractTokens(attrs);
|
|
229
|
+
const { input, output } = extractIO(attrs, span.events);
|
|
230
|
+
|
|
231
|
+
// Calculate cost if we have model and tokens
|
|
232
|
+
const estimatedCost = model && (tokens.prompt || tokens.completion)
|
|
233
|
+
? calculateCost(model, tokens.prompt, tokens.completion)
|
|
234
|
+
: 0;
|
|
235
|
+
|
|
236
|
+
// Determine status
|
|
237
|
+
let status = 200;
|
|
238
|
+
if (span.status) {
|
|
239
|
+
// OTEL status: 0=UNSET, 1=OK, 2=ERROR
|
|
240
|
+
if (span.status.code === 2) {
|
|
241
|
+
status = 500;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Extract provider
|
|
246
|
+
const provider = attrs['gen_ai.system']
|
|
247
|
+
|| attrs['gen_ai.provider.name']
|
|
248
|
+
|| attrs['llm.vendor']
|
|
249
|
+
|| resourceAttrs['service.name']
|
|
250
|
+
|| null;
|
|
251
|
+
|
|
252
|
+
// Extract service name
|
|
253
|
+
const serviceName = resourceAttrs['service.name']
|
|
254
|
+
|| scopeAttrs?.name
|
|
255
|
+
|| 'otel';
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
id: spanId,
|
|
259
|
+
timestamp: startTimeMs,
|
|
260
|
+
duration_ms: durationMs,
|
|
261
|
+
provider,
|
|
262
|
+
model,
|
|
263
|
+
prompt_tokens: tokens.prompt,
|
|
264
|
+
completion_tokens: tokens.completion,
|
|
265
|
+
total_tokens: tokens.total || tokens.prompt + tokens.completion,
|
|
266
|
+
estimated_cost: estimatedCost,
|
|
267
|
+
status,
|
|
268
|
+
error: span.status?.message || attrs['error.message'] || null,
|
|
269
|
+
request_method: null,
|
|
270
|
+
request_path: null,
|
|
271
|
+
request_headers: {},
|
|
272
|
+
request_body: {},
|
|
273
|
+
response_status: status,
|
|
274
|
+
response_headers: {},
|
|
275
|
+
response_body: {},
|
|
276
|
+
tags: [],
|
|
277
|
+
trace_id: traceId,
|
|
278
|
+
parent_id: parentId,
|
|
279
|
+
span_type: spanType,
|
|
280
|
+
span_name: span.name || attrs['traceloop.entity.name'] || spanType,
|
|
281
|
+
input,
|
|
282
|
+
output,
|
|
283
|
+
attributes: {
|
|
284
|
+
...attrs,
|
|
285
|
+
...resourceAttrs,
|
|
286
|
+
otel_span_kind: span.kind,
|
|
287
|
+
},
|
|
288
|
+
service_name: serviceName
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Process OTLP/HTTP JSON traces request
|
|
294
|
+
*
|
|
295
|
+
* Expected format (OTLP/HTTP JSON):
|
|
296
|
+
* {
|
|
297
|
+
* "resourceSpans": [
|
|
298
|
+
* {
|
|
299
|
+
* "resource": { "attributes": [...] },
|
|
300
|
+
* "scopeSpans": [
|
|
301
|
+
* {
|
|
302
|
+
* "scope": { "name": "...", "version": "..." },
|
|
303
|
+
* "spans": [
|
|
304
|
+
* {
|
|
305
|
+
* "traceId": "hex",
|
|
306
|
+
* "spanId": "hex",
|
|
307
|
+
* "parentSpanId": "hex",
|
|
308
|
+
* "name": "span name",
|
|
309
|
+
* "kind": 1,
|
|
310
|
+
* "startTimeUnixNano": "...",
|
|
311
|
+
* "endTimeUnixNano": "...",
|
|
312
|
+
* "attributes": [...],
|
|
313
|
+
* "events": [...],
|
|
314
|
+
* "status": { "code": 0 }
|
|
315
|
+
* }
|
|
316
|
+
* ]
|
|
317
|
+
* }
|
|
318
|
+
* ]
|
|
319
|
+
* }
|
|
320
|
+
* ]
|
|
321
|
+
* }
|
|
322
|
+
*/
|
|
323
|
+
function processOtlpTraces(body) {
|
|
324
|
+
const results = {
|
|
325
|
+
accepted: 0,
|
|
326
|
+
rejected: 0,
|
|
327
|
+
errors: []
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
if (!body || !body.resourceSpans) {
|
|
331
|
+
return results;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
for (const resourceSpan of body.resourceSpans) {
|
|
335
|
+
const resourceAttrs = extractAttributes(resourceSpan.resource?.attributes);
|
|
336
|
+
|
|
337
|
+
for (const scopeSpan of (resourceSpan.scopeSpans || [])) {
|
|
338
|
+
const scopeAttrs = scopeSpan.scope || {};
|
|
339
|
+
|
|
340
|
+
for (const span of (scopeSpan.spans || [])) {
|
|
341
|
+
try {
|
|
342
|
+
const llmflowSpan = transformSpan(span, resourceAttrs, scopeAttrs);
|
|
343
|
+
db.insertTrace(llmflowSpan);
|
|
344
|
+
results.accepted++;
|
|
345
|
+
} catch (err) {
|
|
346
|
+
results.rejected++;
|
|
347
|
+
results.errors.push(err.message);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return results;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Express middleware for OTLP endpoint
|
|
358
|
+
*/
|
|
359
|
+
function createOtlpHandler() {
|
|
360
|
+
return (req, res) => {
|
|
361
|
+
const contentType = req.headers['content-type'] || '';
|
|
362
|
+
|
|
363
|
+
// Only support JSON for now
|
|
364
|
+
if (!contentType.includes('application/json')) {
|
|
365
|
+
return res.status(415).json({
|
|
366
|
+
error: 'Unsupported Media Type',
|
|
367
|
+
message: 'Only application/json is supported. Use OTLP/HTTP JSON format.'
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
try {
|
|
372
|
+
const results = processOtlpTraces(req.body);
|
|
373
|
+
|
|
374
|
+
// OTLP response format
|
|
375
|
+
res.status(200).json({
|
|
376
|
+
partialSuccess: results.rejected > 0 ? {
|
|
377
|
+
rejectedSpans: results.rejected,
|
|
378
|
+
errorMessage: results.errors.slice(0, 5).join('; ')
|
|
379
|
+
} : undefined
|
|
380
|
+
});
|
|
381
|
+
} catch (err) {
|
|
382
|
+
res.status(500).json({
|
|
383
|
+
error: 'Internal Server Error',
|
|
384
|
+
message: err.message
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
module.exports = {
|
|
391
|
+
processOtlpTraces,
|
|
392
|
+
createOtlpHandler,
|
|
393
|
+
transformSpan,
|
|
394
|
+
extractAttributes,
|
|
395
|
+
determineSpanType,
|
|
396
|
+
extractModel,
|
|
397
|
+
extractTokens
|
|
398
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "llmflow",
|
|
3
|
+
"version": "0.3.1",
|
|
4
|
+
"description": "See what your LLM calls cost. One command. No signup.",
|
|
5
|
+
"main": "server.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"llmflow": "./bin/llmflow.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"start": "node server.js",
|
|
11
|
+
"start:verbose": "VERBOSE=1 node server.js",
|
|
12
|
+
"demo": "node test/demo.js",
|
|
13
|
+
"demo:many": "node test/demo.js --count=20",
|
|
14
|
+
"test": "node test/run-tests.js",
|
|
15
|
+
"test:otlp": "node test/run-tests.js otlp-e2e.js",
|
|
16
|
+
"test:ws": "node test/run-tests.js websocket-e2e.js",
|
|
17
|
+
"test:logs": "node test/run-tests.js otlp-logs-e2e.js",
|
|
18
|
+
"test:metrics": "node test/run-tests.js otlp-metrics-e2e.js",
|
|
19
|
+
"test:providers": "node test/run-tests.js providers.js",
|
|
20
|
+
"test:providers-e2e": "node test/run-tests.js providers-e2e.js"
|
|
21
|
+
},
|
|
22
|
+
"dependencies": {
|
|
23
|
+
"better-sqlite3": "^11.0.0",
|
|
24
|
+
"express": "^4.18.0",
|
|
25
|
+
"uuid": "^9.0.0",
|
|
26
|
+
"ws": "^8.18.3"
|
|
27
|
+
},
|
|
28
|
+
"files": [
|
|
29
|
+
"bin/",
|
|
30
|
+
"providers/",
|
|
31
|
+
"public/",
|
|
32
|
+
"server.js",
|
|
33
|
+
"db.js",
|
|
34
|
+
"pricing.js",
|
|
35
|
+
"pricing.fallback.json",
|
|
36
|
+
"logger.js",
|
|
37
|
+
"otlp.js",
|
|
38
|
+
"otlp-logs.js",
|
|
39
|
+
"otlp-metrics.js",
|
|
40
|
+
"otlp-export.js"
|
|
41
|
+
],
|
|
42
|
+
"keywords": [
|
|
43
|
+
"llm",
|
|
44
|
+
"observability",
|
|
45
|
+
"tracing",
|
|
46
|
+
"openai",
|
|
47
|
+
"anthropic",
|
|
48
|
+
"langchain",
|
|
49
|
+
"opentelemetry",
|
|
50
|
+
"cost-tracking",
|
|
51
|
+
"ai"
|
|
52
|
+
],
|
|
53
|
+
"author": "Helge Sverre",
|
|
54
|
+
"license": "MIT",
|
|
55
|
+
"repository": {
|
|
56
|
+
"type": "git",
|
|
57
|
+
"url": "https://github.com/HelgeSverre/llmflow"
|
|
58
|
+
},
|
|
59
|
+
"engines": {
|
|
60
|
+
"node": ">=18"
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"gpt-4o": {
|
|
3
|
+
"input_cost_per_token": 0.0000025,
|
|
4
|
+
"output_cost_per_token": 0.00001
|
|
5
|
+
},
|
|
6
|
+
"gpt-4o-mini": {
|
|
7
|
+
"input_cost_per_token": 0.00000015,
|
|
8
|
+
"output_cost_per_token": 0.0000006
|
|
9
|
+
},
|
|
10
|
+
"gpt-4-turbo": {
|
|
11
|
+
"input_cost_per_token": 0.00001,
|
|
12
|
+
"output_cost_per_token": 0.00003
|
|
13
|
+
},
|
|
14
|
+
"gpt-4": {
|
|
15
|
+
"input_cost_per_token": 0.00003,
|
|
16
|
+
"output_cost_per_token": 0.00006
|
|
17
|
+
},
|
|
18
|
+
"gpt-3.5-turbo": {
|
|
19
|
+
"input_cost_per_token": 0.0000005,
|
|
20
|
+
"output_cost_per_token": 0.0000015
|
|
21
|
+
},
|
|
22
|
+
"gpt-3.5-turbo-16k": {
|
|
23
|
+
"input_cost_per_token": 0.000003,
|
|
24
|
+
"output_cost_per_token": 0.000004
|
|
25
|
+
},
|
|
26
|
+
"claude-3-5-sonnet-20241022": {
|
|
27
|
+
"input_cost_per_token": 0.000003,
|
|
28
|
+
"output_cost_per_token": 0.000015
|
|
29
|
+
},
|
|
30
|
+
"claude-3-5-haiku-20241022": {
|
|
31
|
+
"input_cost_per_token": 0.0000008,
|
|
32
|
+
"output_cost_per_token": 0.000004
|
|
33
|
+
},
|
|
34
|
+
"claude-3-opus-20240229": {
|
|
35
|
+
"input_cost_per_token": 0.000015,
|
|
36
|
+
"output_cost_per_token": 0.000075
|
|
37
|
+
},
|
|
38
|
+
"claude-3-sonnet-20240229": {
|
|
39
|
+
"input_cost_per_token": 0.000003,
|
|
40
|
+
"output_cost_per_token": 0.000015
|
|
41
|
+
},
|
|
42
|
+
"claude-3-haiku-20240307": {
|
|
43
|
+
"input_cost_per_token": 0.00000025,
|
|
44
|
+
"output_cost_per_token": 0.00000125
|
|
45
|
+
},
|
|
46
|
+
"gemini-1.5-pro": {
|
|
47
|
+
"input_cost_per_token": 0.00000125,
|
|
48
|
+
"output_cost_per_token": 0.000005
|
|
49
|
+
},
|
|
50
|
+
"gemini-1.5-flash": {
|
|
51
|
+
"input_cost_per_token": 0.000000075,
|
|
52
|
+
"output_cost_per_token": 0.0000003
|
|
53
|
+
},
|
|
54
|
+
"gemini-2.0-flash-exp": {
|
|
55
|
+
"input_cost_per_token": 0,
|
|
56
|
+
"output_cost_per_token": 0
|
|
57
|
+
}
|
|
58
|
+
}
|
package/pricing.js
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
const https = require('https');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
const DEFAULT_PRICING_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
|
|
6
|
+
const PRICING_URL = process.env.PRICING_URL || DEFAULT_PRICING_URL;
|
|
7
|
+
const REFRESH_INTERVAL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
8
|
+
const VERBOSE = process.env.VERBOSE === '1' || process.argv.includes('--verbose');
|
|
9
|
+
|
|
10
|
+
const fallbackPricingPath = path.join(__dirname, 'pricing.fallback.json');
|
|
11
|
+
|
|
12
|
+
let pricingData = {};
|
|
13
|
+
let lastFetchTime = 0;
|
|
14
|
+
|
|
15
|
+
function loadFallbackPricing() {
|
|
16
|
+
try {
|
|
17
|
+
if (fs.existsSync(fallbackPricingPath)) {
|
|
18
|
+
pricingData = JSON.parse(fs.readFileSync(fallbackPricingPath, 'utf8'));
|
|
19
|
+
}
|
|
20
|
+
} catch (err) {
|
|
21
|
+
// Silent fail, will use empty pricing
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function fetchJson(url) {
|
|
26
|
+
return new Promise((resolve, reject) => {
|
|
27
|
+
const request = https.get(url, { timeout: 10000 }, (res) => {
|
|
28
|
+
if (res.statusCode !== 200) {
|
|
29
|
+
reject(new Error(`HTTP ${res.statusCode}`));
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
let data = '';
|
|
34
|
+
res.on('data', chunk => data += chunk);
|
|
35
|
+
res.on('end', () => {
|
|
36
|
+
try {
|
|
37
|
+
resolve(JSON.parse(data));
|
|
38
|
+
} catch (err) {
|
|
39
|
+
reject(err);
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
request.on('error', reject);
|
|
45
|
+
request.on('timeout', () => {
|
|
46
|
+
request.destroy();
|
|
47
|
+
reject(new Error('Request timeout'));
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function loadPricing() {
|
|
53
|
+
try {
|
|
54
|
+
const json = await fetchJson(PRICING_URL);
|
|
55
|
+
pricingData = json;
|
|
56
|
+
lastFetchTime = Date.now();
|
|
57
|
+
if (VERBOSE) {
|
|
58
|
+
console.log(`\x1b[2m[pricing] Loaded ${Object.keys(pricingData).length} models\x1b[0m`);
|
|
59
|
+
}
|
|
60
|
+
} catch (err) {
|
|
61
|
+
if (Object.keys(pricingData).length === 0) {
|
|
62
|
+
loadFallbackPricing();
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function normalizeModelName(model) {
|
|
68
|
+
if (!model) return null;
|
|
69
|
+
|
|
70
|
+
let normalized = model.toLowerCase().trim();
|
|
71
|
+
|
|
72
|
+
// Remove common prefixes
|
|
73
|
+
const prefixes = ['openai/', 'anthropic/', 'google/', 'azure/', 'together/'];
|
|
74
|
+
for (const prefix of prefixes) {
|
|
75
|
+
if (normalized.startsWith(prefix)) {
|
|
76
|
+
normalized = normalized.slice(prefix.length);
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return normalized;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function findModelPricing(modelName) {
|
|
85
|
+
if (!modelName || !pricingData) return null;
|
|
86
|
+
|
|
87
|
+
const normalized = normalizeModelName(modelName);
|
|
88
|
+
if (!normalized) return null;
|
|
89
|
+
|
|
90
|
+
// Try exact match first
|
|
91
|
+
if (pricingData[normalized]) {
|
|
92
|
+
return pricingData[normalized];
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Try with provider prefixes
|
|
96
|
+
const providers = ['openai/', 'azure/', ''];
|
|
97
|
+
for (const prefix of providers) {
|
|
98
|
+
const key = prefix + normalized;
|
|
99
|
+
if (pricingData[key]) {
|
|
100
|
+
return pricingData[key];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Try partial match for versioned models (gpt-4-0125-preview -> gpt-4)
|
|
105
|
+
const baseName = normalized.split('-').slice(0, 2).join('-');
|
|
106
|
+
for (const key of Object.keys(pricingData)) {
|
|
107
|
+
if (key.startsWith(baseName) || key.includes(normalized)) {
|
|
108
|
+
return pricingData[key];
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return null;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function calculateCost(model, promptTokens, completionTokens) {
|
|
116
|
+
const pricing = findModelPricing(model);
|
|
117
|
+
|
|
118
|
+
if (!pricing) {
|
|
119
|
+
// Fallback to default pricing for unknown models
|
|
120
|
+
return ((promptTokens * 0.001) + (completionTokens * 0.002)) / 1000;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// LiteLLM format: input_cost_per_token, output_cost_per_token (per token, not per 1k)
|
|
124
|
+
const inputCost = pricing.input_cost_per_token || 0;
|
|
125
|
+
const outputCost = pricing.output_cost_per_token || pricing.input_cost_per_token || 0;
|
|
126
|
+
|
|
127
|
+
return (promptTokens * inputCost) + (completionTokens * outputCost);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function getPricingInfo(model) {
|
|
131
|
+
return findModelPricing(model);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function getPricingStats() {
|
|
135
|
+
return {
|
|
136
|
+
modelCount: Object.keys(pricingData).length,
|
|
137
|
+
lastFetchTime,
|
|
138
|
+
source: PRICING_URL
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Initialize pricing
|
|
143
|
+
loadFallbackPricing();
|
|
144
|
+
loadPricing();
|
|
145
|
+
|
|
146
|
+
// Refresh pricing periodically
|
|
147
|
+
setInterval(loadPricing, REFRESH_INTERVAL_MS);
|
|
148
|
+
|
|
149
|
+
module.exports = {
|
|
150
|
+
calculateCost,
|
|
151
|
+
getPricingInfo,
|
|
152
|
+
getPricingStats,
|
|
153
|
+
loadPricing
|
|
154
|
+
};
|