llmflow 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/otlp.js ADDED
@@ -0,0 +1,398 @@
1
+ /**
2
+ * OTLP (OpenTelemetry Protocol) HTTP endpoint for LLMFlow
3
+ *
4
+ * Accepts OTLP/HTTP JSON traces and transforms them to LLMFlow span format.
5
+ * This allows users with existing OpenTelemetry/OpenLLMetry instrumentation
6
+ * to export traces directly to LLMFlow.
7
+ *
8
+ * Supports:
9
+ * - OTLP/HTTP JSON format (Content-Type: application/json)
10
+ * - gen_ai.* semantic conventions (OpenLLMetry)
11
+ * - Standard OTEL span attributes
12
+ */
13
+
14
+ const db = require('./db');
15
+ const { calculateCost } = require('./pricing');
16
+
17
+ /**
18
+ * Map gen_ai.system values to span types
19
+ */
20
+ const PROVIDER_TO_SPAN_TYPE = {
21
+ 'openai': 'llm',
22
+ 'anthropic': 'llm',
23
+ 'cohere': 'llm',
24
+ 'bedrock': 'llm',
25
+ 'azure': 'llm',
26
+ 'google': 'llm',
27
+ 'ollama': 'llm',
28
+ 'groq': 'llm',
29
+ 'together': 'llm',
30
+ 'mistral': 'llm',
31
+ 'replicate': 'llm',
32
+ };
33
+
34
+ /**
35
+ * Map traceloop.span.kind to LLMFlow span types
36
+ */
37
+ const TRACELOOP_KIND_TO_SPAN_TYPE = {
38
+ 'workflow': 'trace',
39
+ 'task': 'chain',
40
+ 'agent': 'agent',
41
+ 'tool': 'tool',
42
+ };
43
+
44
+ /**
45
+ * Convert hex string to standard UUID format if needed
46
+ */
47
+ function normalizeId(hexId) {
48
+ if (!hexId) return null;
49
+ // Remove any existing dashes and lowercase
50
+ const clean = hexId.replace(/-/g, '').toLowerCase();
51
+ // If it's already short enough, return as-is
52
+ if (clean.length <= 32) return clean;
53
+ return clean;
54
+ }
55
+
56
+ /**
57
+ * Extract attributes from OTLP KeyValue array format
58
+ * OTLP attributes are: [{ key: "foo", value: { stringValue: "bar" } }, ...]
59
+ */
60
+ function extractAttributes(attrs) {
61
+ if (!attrs || !Array.isArray(attrs)) return {};
62
+
63
+ const result = {};
64
+ for (const attr of attrs) {
65
+ const key = attr.key;
66
+ const val = attr.value;
67
+ if (!val) continue;
68
+
69
+ // OTLP value types: stringValue, intValue, doubleValue, boolValue, arrayValue, kvlistValue
70
+ if (val.stringValue !== undefined) result[key] = val.stringValue;
71
+ else if (val.intValue !== undefined) result[key] = parseInt(val.intValue, 10);
72
+ else if (val.doubleValue !== undefined) result[key] = val.doubleValue;
73
+ else if (val.boolValue !== undefined) result[key] = val.boolValue;
74
+ else if (val.arrayValue?.values) {
75
+ result[key] = val.arrayValue.values.map(v =>
76
+ v.stringValue ?? v.intValue ?? v.doubleValue ?? v.boolValue ?? null
77
+ );
78
+ }
79
+ }
80
+ return result;
81
+ }
82
+
83
+ /**
84
+ * Determine span type from OTEL attributes
85
+ */
86
+ function determineSpanType(attrs) {
87
+ // Check for traceloop span kind first (LangChain, etc.)
88
+ const traceloopKind = attrs['traceloop.span.kind'];
89
+ if (traceloopKind && TRACELOOP_KIND_TO_SPAN_TYPE[traceloopKind]) {
90
+ return TRACELOOP_KIND_TO_SPAN_TYPE[traceloopKind];
91
+ }
92
+
93
+ // Check for gen_ai.system (OpenLLMetry)
94
+ const genAiSystem = attrs['gen_ai.system'];
95
+ if (genAiSystem) {
96
+ return PROVIDER_TO_SPAN_TYPE[genAiSystem.toLowerCase()] || 'llm';
97
+ }
98
+
99
+ // Check for llm.request.type
100
+ const llmRequestType = attrs['llm.request.type'];
101
+ if (llmRequestType) {
102
+ return 'llm';
103
+ }
104
+
105
+ // Check for db.system (vector DBs)
106
+ const dbSystem = attrs['db.system'];
107
+ if (dbSystem) {
108
+ const vectorDbs = ['pinecone', 'chroma', 'weaviate', 'qdrant', 'milvus', 'pgvector'];
109
+ if (vectorDbs.some(v => dbSystem.toLowerCase().includes(v))) {
110
+ return 'retrieval';
111
+ }
112
+ }
113
+
114
+ // Check span name patterns
115
+ const spanName = attrs._spanName || '';
116
+ if (spanName.includes('embed')) return 'embedding';
117
+ if (spanName.includes('retriev') || spanName.includes('search')) return 'retrieval';
118
+ if (spanName.includes('agent')) return 'agent';
119
+ if (spanName.includes('tool') || spanName.includes('function')) return 'tool';
120
+ if (spanName.includes('chain')) return 'chain';
121
+
122
+ return 'custom';
123
+ }
124
+
125
+ /**
126
+ * Extract model name from attributes
127
+ */
128
+ function extractModel(attrs) {
129
+ return attrs['gen_ai.request.model']
130
+ || attrs['gen_ai.response.model']
131
+ || attrs['llm.model']
132
+ || attrs['model']
133
+ || null;
134
+ }
135
+
136
+ /**
137
+ * Extract token usage from attributes
138
+ */
139
+ function extractTokens(attrs) {
140
+ return {
141
+ prompt: attrs['gen_ai.usage.prompt_tokens']
142
+ || attrs['llm.usage.prompt_tokens']
143
+ || attrs['llm.token_count.prompt']
144
+ || 0,
145
+ completion: attrs['gen_ai.usage.completion_tokens']
146
+ || attrs['llm.usage.completion_tokens']
147
+ || attrs['llm.token_count.completion']
148
+ || 0,
149
+ total: attrs['gen_ai.usage.total_tokens']
150
+ || attrs['llm.usage.total_tokens']
151
+ || attrs['llm.token_count.total']
152
+ || 0
153
+ };
154
+ }
155
+
156
+ /**
157
+ * Extract input/output from attributes or events
158
+ */
159
+ function extractIO(attrs, events) {
160
+ let input = null;
161
+ let output = null;
162
+
163
+ // Try gen_ai.prompt / gen_ai.completion (OpenLLMetry)
164
+ if (attrs['gen_ai.prompt']) {
165
+ try {
166
+ input = typeof attrs['gen_ai.prompt'] === 'string'
167
+ ? JSON.parse(attrs['gen_ai.prompt'])
168
+ : attrs['gen_ai.prompt'];
169
+ } catch {
170
+ input = { prompt: attrs['gen_ai.prompt'] };
171
+ }
172
+ }
173
+
174
+ if (attrs['gen_ai.completion']) {
175
+ try {
176
+ output = typeof attrs['gen_ai.completion'] === 'string'
177
+ ? JSON.parse(attrs['gen_ai.completion'])
178
+ : attrs['gen_ai.completion'];
179
+ } catch {
180
+ output = { completion: attrs['gen_ai.completion'] };
181
+ }
182
+ }
183
+
184
+ // Check events for prompt/completion data
185
+ if (events && events.length > 0) {
186
+ for (const event of events) {
187
+ const eventAttrs = extractAttributes(event.attributes);
188
+ if (event.name === 'gen_ai.content.prompt' || event.name?.includes('prompt')) {
189
+ input = eventAttrs;
190
+ }
191
+ if (event.name === 'gen_ai.content.completion' || event.name?.includes('completion')) {
192
+ output = eventAttrs;
193
+ }
194
+ }
195
+ }
196
+
197
+ return { input, output };
198
+ }
199
+
200
+ /**
201
+ * Convert nanoseconds timestamp to milliseconds
202
+ */
203
+ function nanoToMs(nanoStr) {
204
+ if (!nanoStr) return Date.now();
205
+ const nano = BigInt(nanoStr);
206
+ return Number(nano / BigInt(1000000));
207
+ }
208
+
209
+ /**
210
+ * Transform a single OTLP span to LLMFlow format
211
+ */
212
+ function transformSpan(span, resourceAttrs, scopeAttrs) {
213
+ const attrs = {
214
+ ...extractAttributes(span.attributes),
215
+ _spanName: span.name,
216
+ };
217
+
218
+ const traceId = normalizeId(span.traceId);
219
+ const spanId = normalizeId(span.spanId);
220
+ const parentId = span.parentSpanId ? normalizeId(span.parentSpanId) : null;
221
+
222
+ const startTimeMs = nanoToMs(span.startTimeUnixNano);
223
+ const endTimeMs = nanoToMs(span.endTimeUnixNano);
224
+ const durationMs = endTimeMs - startTimeMs;
225
+
226
+ const spanType = determineSpanType(attrs);
227
+ const model = extractModel(attrs);
228
+ const tokens = extractTokens(attrs);
229
+ const { input, output } = extractIO(attrs, span.events);
230
+
231
+ // Calculate cost if we have model and tokens
232
+ const estimatedCost = model && (tokens.prompt || tokens.completion)
233
+ ? calculateCost(model, tokens.prompt, tokens.completion)
234
+ : 0;
235
+
236
+ // Determine status
237
+ let status = 200;
238
+ if (span.status) {
239
+ // OTEL status: 0=UNSET, 1=OK, 2=ERROR
240
+ if (span.status.code === 2) {
241
+ status = 500;
242
+ }
243
+ }
244
+
245
+ // Extract provider
246
+ const provider = attrs['gen_ai.system']
247
+ || attrs['gen_ai.provider.name']
248
+ || attrs['llm.vendor']
249
+ || resourceAttrs['service.name']
250
+ || null;
251
+
252
+ // Extract service name
253
+ const serviceName = resourceAttrs['service.name']
254
+ || scopeAttrs?.name
255
+ || 'otel';
256
+
257
+ return {
258
+ id: spanId,
259
+ timestamp: startTimeMs,
260
+ duration_ms: durationMs,
261
+ provider,
262
+ model,
263
+ prompt_tokens: tokens.prompt,
264
+ completion_tokens: tokens.completion,
265
+ total_tokens: tokens.total || tokens.prompt + tokens.completion,
266
+ estimated_cost: estimatedCost,
267
+ status,
268
+ error: span.status?.message || attrs['error.message'] || null,
269
+ request_method: null,
270
+ request_path: null,
271
+ request_headers: {},
272
+ request_body: {},
273
+ response_status: status,
274
+ response_headers: {},
275
+ response_body: {},
276
+ tags: [],
277
+ trace_id: traceId,
278
+ parent_id: parentId,
279
+ span_type: spanType,
280
+ span_name: span.name || attrs['traceloop.entity.name'] || spanType,
281
+ input,
282
+ output,
283
+ attributes: {
284
+ ...attrs,
285
+ ...resourceAttrs,
286
+ otel_span_kind: span.kind,
287
+ },
288
+ service_name: serviceName
289
+ };
290
+ }
291
+
292
+ /**
293
+ * Process OTLP/HTTP JSON traces request
294
+ *
295
+ * Expected format (OTLP/HTTP JSON):
296
+ * {
297
+ * "resourceSpans": [
298
+ * {
299
+ * "resource": { "attributes": [...] },
300
+ * "scopeSpans": [
301
+ * {
302
+ * "scope": { "name": "...", "version": "..." },
303
+ * "spans": [
304
+ * {
305
+ * "traceId": "hex",
306
+ * "spanId": "hex",
307
+ * "parentSpanId": "hex",
308
+ * "name": "span name",
309
+ * "kind": 1,
310
+ * "startTimeUnixNano": "...",
311
+ * "endTimeUnixNano": "...",
312
+ * "attributes": [...],
313
+ * "events": [...],
314
+ * "status": { "code": 0 }
315
+ * }
316
+ * ]
317
+ * }
318
+ * ]
319
+ * }
320
+ * ]
321
+ * }
322
+ */
323
+ function processOtlpTraces(body) {
324
+ const results = {
325
+ accepted: 0,
326
+ rejected: 0,
327
+ errors: []
328
+ };
329
+
330
+ if (!body || !body.resourceSpans) {
331
+ return results;
332
+ }
333
+
334
+ for (const resourceSpan of body.resourceSpans) {
335
+ const resourceAttrs = extractAttributes(resourceSpan.resource?.attributes);
336
+
337
+ for (const scopeSpan of (resourceSpan.scopeSpans || [])) {
338
+ const scopeAttrs = scopeSpan.scope || {};
339
+
340
+ for (const span of (scopeSpan.spans || [])) {
341
+ try {
342
+ const llmflowSpan = transformSpan(span, resourceAttrs, scopeAttrs);
343
+ db.insertTrace(llmflowSpan);
344
+ results.accepted++;
345
+ } catch (err) {
346
+ results.rejected++;
347
+ results.errors.push(err.message);
348
+ }
349
+ }
350
+ }
351
+ }
352
+
353
+ return results;
354
+ }
355
+
356
+ /**
357
+ * Express middleware for OTLP endpoint
358
+ */
359
+ function createOtlpHandler() {
360
+ return (req, res) => {
361
+ const contentType = req.headers['content-type'] || '';
362
+
363
+ // Only support JSON for now
364
+ if (!contentType.includes('application/json')) {
365
+ return res.status(415).json({
366
+ error: 'Unsupported Media Type',
367
+ message: 'Only application/json is supported. Use OTLP/HTTP JSON format.'
368
+ });
369
+ }
370
+
371
+ try {
372
+ const results = processOtlpTraces(req.body);
373
+
374
+ // OTLP response format
375
+ res.status(200).json({
376
+ partialSuccess: results.rejected > 0 ? {
377
+ rejectedSpans: results.rejected,
378
+ errorMessage: results.errors.slice(0, 5).join('; ')
379
+ } : undefined
380
+ });
381
+ } catch (err) {
382
+ res.status(500).json({
383
+ error: 'Internal Server Error',
384
+ message: err.message
385
+ });
386
+ }
387
+ };
388
+ }
389
+
390
+ module.exports = {
391
+ processOtlpTraces,
392
+ createOtlpHandler,
393
+ transformSpan,
394
+ extractAttributes,
395
+ determineSpanType,
396
+ extractModel,
397
+ extractTokens
398
+ };
package/package.json ADDED
@@ -0,0 +1,62 @@
1
+ {
2
+ "name": "llmflow",
3
+ "version": "0.3.1",
4
+ "description": "See what your LLM calls cost. One command. No signup.",
5
+ "main": "server.js",
6
+ "bin": {
7
+ "llmflow": "./bin/llmflow.js"
8
+ },
9
+ "scripts": {
10
+ "start": "node server.js",
11
+ "start:verbose": "VERBOSE=1 node server.js",
12
+ "demo": "node test/demo.js",
13
+ "demo:many": "node test/demo.js --count=20",
14
+ "test": "node test/run-tests.js",
15
+ "test:otlp": "node test/run-tests.js otlp-e2e.js",
16
+ "test:ws": "node test/run-tests.js websocket-e2e.js",
17
+ "test:logs": "node test/run-tests.js otlp-logs-e2e.js",
18
+ "test:metrics": "node test/run-tests.js otlp-metrics-e2e.js",
19
+ "test:providers": "node test/run-tests.js providers.js",
20
+ "test:providers-e2e": "node test/run-tests.js providers-e2e.js"
21
+ },
22
+ "dependencies": {
23
+ "better-sqlite3": "^11.0.0",
24
+ "express": "^4.18.0",
25
+ "uuid": "^9.0.0",
26
+ "ws": "^8.18.3"
27
+ },
28
+ "files": [
29
+ "bin/",
30
+ "providers/",
31
+ "public/",
32
+ "server.js",
33
+ "db.js",
34
+ "pricing.js",
35
+ "pricing.fallback.json",
36
+ "logger.js",
37
+ "otlp.js",
38
+ "otlp-logs.js",
39
+ "otlp-metrics.js",
40
+ "otlp-export.js"
41
+ ],
42
+ "keywords": [
43
+ "llm",
44
+ "observability",
45
+ "tracing",
46
+ "openai",
47
+ "anthropic",
48
+ "langchain",
49
+ "opentelemetry",
50
+ "cost-tracking",
51
+ "ai"
52
+ ],
53
+ "author": "Helge Sverre",
54
+ "license": "MIT",
55
+ "repository": {
56
+ "type": "git",
57
+ "url": "https://github.com/HelgeSverre/llmflow"
58
+ },
59
+ "engines": {
60
+ "node": ">=18"
61
+ }
62
+ }
@@ -0,0 +1,58 @@
1
+ {
2
+ "gpt-4o": {
3
+ "input_cost_per_token": 0.0000025,
4
+ "output_cost_per_token": 0.00001
5
+ },
6
+ "gpt-4o-mini": {
7
+ "input_cost_per_token": 0.00000015,
8
+ "output_cost_per_token": 0.0000006
9
+ },
10
+ "gpt-4-turbo": {
11
+ "input_cost_per_token": 0.00001,
12
+ "output_cost_per_token": 0.00003
13
+ },
14
+ "gpt-4": {
15
+ "input_cost_per_token": 0.00003,
16
+ "output_cost_per_token": 0.00006
17
+ },
18
+ "gpt-3.5-turbo": {
19
+ "input_cost_per_token": 0.0000005,
20
+ "output_cost_per_token": 0.0000015
21
+ },
22
+ "gpt-3.5-turbo-16k": {
23
+ "input_cost_per_token": 0.000003,
24
+ "output_cost_per_token": 0.000004
25
+ },
26
+ "claude-3-5-sonnet-20241022": {
27
+ "input_cost_per_token": 0.000003,
28
+ "output_cost_per_token": 0.000015
29
+ },
30
+ "claude-3-5-haiku-20241022": {
31
+ "input_cost_per_token": 0.0000008,
32
+ "output_cost_per_token": 0.000004
33
+ },
34
+ "claude-3-opus-20240229": {
35
+ "input_cost_per_token": 0.000015,
36
+ "output_cost_per_token": 0.000075
37
+ },
38
+ "claude-3-sonnet-20240229": {
39
+ "input_cost_per_token": 0.000003,
40
+ "output_cost_per_token": 0.000015
41
+ },
42
+ "claude-3-haiku-20240307": {
43
+ "input_cost_per_token": 0.00000025,
44
+ "output_cost_per_token": 0.00000125
45
+ },
46
+ "gemini-1.5-pro": {
47
+ "input_cost_per_token": 0.00000125,
48
+ "output_cost_per_token": 0.000005
49
+ },
50
+ "gemini-1.5-flash": {
51
+ "input_cost_per_token": 0.000000075,
52
+ "output_cost_per_token": 0.0000003
53
+ },
54
+ "gemini-2.0-flash-exp": {
55
+ "input_cost_per_token": 0,
56
+ "output_cost_per_token": 0
57
+ }
58
+ }
package/pricing.js ADDED
@@ -0,0 +1,154 @@
1
+ const https = require('https');
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+
5
+ const DEFAULT_PRICING_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
6
+ const PRICING_URL = process.env.PRICING_URL || DEFAULT_PRICING_URL;
7
+ const REFRESH_INTERVAL_MS = 24 * 60 * 60 * 1000; // 24 hours
8
+ const VERBOSE = process.env.VERBOSE === '1' || process.argv.includes('--verbose');
9
+
10
+ const fallbackPricingPath = path.join(__dirname, 'pricing.fallback.json');
11
+
12
+ let pricingData = {};
13
+ let lastFetchTime = 0;
14
+
15
+ function loadFallbackPricing() {
16
+ try {
17
+ if (fs.existsSync(fallbackPricingPath)) {
18
+ pricingData = JSON.parse(fs.readFileSync(fallbackPricingPath, 'utf8'));
19
+ }
20
+ } catch (err) {
21
+ // Silent fail, will use empty pricing
22
+ }
23
+ }
24
+
25
+ function fetchJson(url) {
26
+ return new Promise((resolve, reject) => {
27
+ const request = https.get(url, { timeout: 10000 }, (res) => {
28
+ if (res.statusCode !== 200) {
29
+ reject(new Error(`HTTP ${res.statusCode}`));
30
+ return;
31
+ }
32
+
33
+ let data = '';
34
+ res.on('data', chunk => data += chunk);
35
+ res.on('end', () => {
36
+ try {
37
+ resolve(JSON.parse(data));
38
+ } catch (err) {
39
+ reject(err);
40
+ }
41
+ });
42
+ });
43
+
44
+ request.on('error', reject);
45
+ request.on('timeout', () => {
46
+ request.destroy();
47
+ reject(new Error('Request timeout'));
48
+ });
49
+ });
50
+ }
51
+
52
+ async function loadPricing() {
53
+ try {
54
+ const json = await fetchJson(PRICING_URL);
55
+ pricingData = json;
56
+ lastFetchTime = Date.now();
57
+ if (VERBOSE) {
58
+ console.log(`\x1b[2m[pricing] Loaded ${Object.keys(pricingData).length} models\x1b[0m`);
59
+ }
60
+ } catch (err) {
61
+ if (Object.keys(pricingData).length === 0) {
62
+ loadFallbackPricing();
63
+ }
64
+ }
65
+ }
66
+
67
+ function normalizeModelName(model) {
68
+ if (!model) return null;
69
+
70
+ let normalized = model.toLowerCase().trim();
71
+
72
+ // Remove common prefixes
73
+ const prefixes = ['openai/', 'anthropic/', 'google/', 'azure/', 'together/'];
74
+ for (const prefix of prefixes) {
75
+ if (normalized.startsWith(prefix)) {
76
+ normalized = normalized.slice(prefix.length);
77
+ break;
78
+ }
79
+ }
80
+
81
+ return normalized;
82
+ }
83
+
84
+ function findModelPricing(modelName) {
85
+ if (!modelName || !pricingData) return null;
86
+
87
+ const normalized = normalizeModelName(modelName);
88
+ if (!normalized) return null;
89
+
90
+ // Try exact match first
91
+ if (pricingData[normalized]) {
92
+ return pricingData[normalized];
93
+ }
94
+
95
+ // Try with provider prefixes
96
+ const providers = ['openai/', 'azure/', ''];
97
+ for (const prefix of providers) {
98
+ const key = prefix + normalized;
99
+ if (pricingData[key]) {
100
+ return pricingData[key];
101
+ }
102
+ }
103
+
104
+ // Try partial match for versioned models (gpt-4-0125-preview -> gpt-4)
105
+ const baseName = normalized.split('-').slice(0, 2).join('-');
106
+ for (const key of Object.keys(pricingData)) {
107
+ if (key.startsWith(baseName) || key.includes(normalized)) {
108
+ return pricingData[key];
109
+ }
110
+ }
111
+
112
+ return null;
113
+ }
114
+
115
+ function calculateCost(model, promptTokens, completionTokens) {
116
+ const pricing = findModelPricing(model);
117
+
118
+ if (!pricing) {
119
+ // Fallback to default pricing for unknown models
120
+ return ((promptTokens * 0.001) + (completionTokens * 0.002)) / 1000;
121
+ }
122
+
123
+ // LiteLLM format: input_cost_per_token, output_cost_per_token (per token, not per 1k)
124
+ const inputCost = pricing.input_cost_per_token || 0;
125
+ const outputCost = pricing.output_cost_per_token || pricing.input_cost_per_token || 0;
126
+
127
+ return (promptTokens * inputCost) + (completionTokens * outputCost);
128
+ }
129
+
130
+ function getPricingInfo(model) {
131
+ return findModelPricing(model);
132
+ }
133
+
134
+ function getPricingStats() {
135
+ return {
136
+ modelCount: Object.keys(pricingData).length,
137
+ lastFetchTime,
138
+ source: PRICING_URL
139
+ };
140
+ }
141
+
142
+ // Initialize pricing
143
+ loadFallbackPricing();
144
+ loadPricing();
145
+
146
+ // Refresh pricing periodically
147
+ setInterval(loadPricing, REFRESH_INTERVAL_MS);
148
+
149
+ module.exports = {
150
+ calculateCost,
151
+ getPricingInfo,
152
+ getPricingStats,
153
+ loadPricing
154
+ };