llmflow 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,573 @@
1
+ const https = require('https');
2
+ const { v4: uuidv4 } = require('uuid');
3
+
4
+ /**
5
+ * Base passthrough handler for forwarding requests without body transformation.
6
+ * Used for AI CLI tools that send native API formats (Anthropic, Gemini).
7
+ *
8
+ * Key differences from regular providers:
9
+ * - Request body is NOT transformed - forwarded as-is
10
+ * - Response body is NOT normalized - returned as-is to client
11
+ * - Usage metrics ARE extracted for observability
12
+ */
13
+ class PassthroughHandler {
14
+ constructor(options = {}) {
15
+ this.name = options.name || 'passthrough';
16
+ this.displayName = options.displayName || 'Passthrough';
17
+ this.targetHost = options.targetHost;
18
+ this.targetPort = options.targetPort || 443;
19
+ this.protocol = options.protocol || 'https';
20
+
21
+ // Customizable hooks
22
+ this.extractUsage = options.extractUsage || this.defaultExtractUsage;
23
+ this.identifyModel = options.identifyModel || this.defaultIdentifyModel;
24
+ this.headerTransform = options.headerTransform || this.defaultHeaderTransform;
25
+ this.parseStreamChunk = options.parseStreamChunk || this.defaultParseStreamChunk;
26
+ }
27
+
28
+ /**
29
+ * Get target configuration - passthrough preserves the original path
30
+ */
31
+ getTarget(req) {
32
+ return {
33
+ hostname: this.targetHost,
34
+ port: this.targetPort,
35
+ path: req.path,
36
+ protocol: this.protocol
37
+ };
38
+ }
39
+
40
+ /**
41
+ * Transform headers for upstream - override in subclasses
42
+ */
43
+ defaultHeaderTransform(headers) {
44
+ return {
45
+ 'Content-Type': headers['content-type'] || 'application/json',
46
+ 'Authorization': headers.authorization
47
+ };
48
+ }
49
+
50
+ /**
51
+ * Extract usage from response - override in subclasses
52
+ */
53
+ defaultExtractUsage(body) {
54
+ const usage = body?.usage || {};
55
+ return {
56
+ prompt_tokens: usage.prompt_tokens || usage.input_tokens || 0,
57
+ completion_tokens: usage.completion_tokens || usage.output_tokens || 0,
58
+ total_tokens: usage.total_tokens ||
59
+ ((usage.prompt_tokens || usage.input_tokens || 0) +
60
+ (usage.completion_tokens || usage.output_tokens || 0))
61
+ };
62
+ }
63
+
64
+ /**
65
+ * Identify model from request/response - override in subclasses
66
+ */
67
+ defaultIdentifyModel(reqBody, respBody) {
68
+ return reqBody?.model || respBody?.model || 'unknown';
69
+ }
70
+
71
+ /**
72
+ * Parse streaming chunk - override in subclasses
73
+ */
74
+ defaultParseStreamChunk(chunk) {
75
+ const lines = chunk.split('\n');
76
+ let content = '';
77
+ let usage = null;
78
+ let done = false;
79
+
80
+ for (const line of lines) {
81
+ const trimmed = line.trim();
82
+ if (!trimmed.startsWith('data:')) continue;
83
+
84
+ const payload = trimmed.slice(5).trim();
85
+ if (payload === '[DONE]') {
86
+ done = true;
87
+ continue;
88
+ }
89
+
90
+ try {
91
+ const json = JSON.parse(payload);
92
+ if (json.usage) usage = this.extractUsage(json);
93
+ } catch {
94
+ // Ignore parse errors
95
+ }
96
+ }
97
+
98
+ return { content, usage, done };
99
+ }
100
+
101
+ /**
102
+ * Check if request is streaming
103
+ */
104
+ isStreamingRequest(req) {
105
+ return req.body?.stream === true;
106
+ }
107
+
108
+ /**
109
+ * Get HTTP module based on protocol
110
+ */
111
+ getHttpModule() {
112
+ return this.protocol === 'https' ? https : require('http');
113
+ }
114
+
115
+ /**
116
+ * Strip sensitive headers for logging
117
+ */
118
+ sanitizeHeaders(headers) {
119
+ const safe = { ...headers };
120
+ delete safe['x-api-key'];
121
+ delete safe['authorization'];
122
+ delete safe['x-goog-api-key'];
123
+ delete safe['api-key'];
124
+ return safe;
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Anthropic passthrough handler for native Claude API format.
130
+ * Used by Claude Code and other tools using Anthropic's /v1/messages endpoint.
131
+ */
132
+ class AnthropicPassthrough extends PassthroughHandler {
133
+ constructor() {
134
+ super({
135
+ name: 'anthropic-passthrough',
136
+ displayName: 'Anthropic (Passthrough)',
137
+ targetHost: 'api.anthropic.com',
138
+ targetPort: 443,
139
+ protocol: 'https'
140
+ });
141
+ }
142
+
143
+ /**
144
+ * Transform headers for Anthropic API
145
+ */
146
+ defaultHeaderTransform(headers) {
147
+ // Extract API key from various sources
148
+ let apiKey = headers['x-api-key'];
149
+ if (!apiKey && headers.authorization) {
150
+ apiKey = headers.authorization.replace(/^Bearer\s+/i, '');
151
+ }
152
+
153
+ return {
154
+ 'Content-Type': 'application/json',
155
+ 'x-api-key': apiKey,
156
+ 'anthropic-version': headers['anthropic-version'] || '2023-06-01',
157
+ // Pass through beta headers if present
158
+ ...(headers['anthropic-beta'] && { 'anthropic-beta': headers['anthropic-beta'] })
159
+ };
160
+ }
161
+
162
+ /**
163
+ * Extract usage from Anthropic response format
164
+ */
165
+ defaultExtractUsage(body) {
166
+ const usage = body?.usage || {};
167
+ return {
168
+ prompt_tokens: usage.input_tokens || 0,
169
+ completion_tokens: usage.output_tokens || 0,
170
+ total_tokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
171
+ // Anthropic-specific: cache metrics
172
+ cache_creation_input_tokens: usage.cache_creation_input_tokens || 0,
173
+ cache_read_input_tokens: usage.cache_read_input_tokens || 0
174
+ };
175
+ }
176
+
177
+ /**
178
+ * Identify model from Anthropic request/response
179
+ */
180
+ defaultIdentifyModel(reqBody, respBody) {
181
+ return respBody?.model || reqBody?.model || 'claude-unknown';
182
+ }
183
+
184
+ /**
185
+ * Parse Anthropic streaming chunks
186
+ */
187
+ defaultParseStreamChunk(chunk) {
188
+ const lines = chunk.split('\n');
189
+ let content = '';
190
+ let usage = null;
191
+ let done = false;
192
+
193
+ for (const line of lines) {
194
+ const trimmed = line.trim();
195
+
196
+ // Handle event: lines
197
+ if (trimmed.startsWith('event:')) {
198
+ const eventType = trimmed.slice(6).trim();
199
+ if (eventType === 'message_stop') {
200
+ done = true;
201
+ }
202
+ continue;
203
+ }
204
+
205
+ if (!trimmed.startsWith('data:')) continue;
206
+
207
+ const payload = trimmed.slice(5).trim();
208
+ if (!payload) continue;
209
+
210
+ try {
211
+ const json = JSON.parse(payload);
212
+
213
+ // Handle different event types
214
+ if (json.type === 'content_block_delta') {
215
+ if (json.delta?.type === 'text_delta') {
216
+ content += json.delta.text || '';
217
+ }
218
+ } else if (json.type === 'message_delta') {
219
+ if (json.usage) {
220
+ usage = {
221
+ prompt_tokens: 0,
222
+ completion_tokens: json.usage.output_tokens || 0,
223
+ total_tokens: json.usage.output_tokens || 0
224
+ };
225
+ }
226
+ } else if (json.type === 'message_start' && json.message?.usage) {
227
+ usage = {
228
+ prompt_tokens: json.message.usage.input_tokens || 0,
229
+ completion_tokens: 0,
230
+ total_tokens: json.message.usage.input_tokens || 0
231
+ };
232
+ }
233
+ } catch {
234
+ // Ignore parse errors
235
+ }
236
+ }
237
+
238
+ return { content, usage, done };
239
+ }
240
+ }
241
+
242
+ /**
243
+ * Google Gemini passthrough handler for native Gemini API format.
244
+ */
245
+ class GeminiPassthrough extends PassthroughHandler {
246
+ constructor() {
247
+ super({
248
+ name: 'gemini-passthrough',
249
+ displayName: 'Google Gemini (Passthrough)',
250
+ targetHost: 'generativelanguage.googleapis.com',
251
+ targetPort: 443,
252
+ protocol: 'https'
253
+ });
254
+ }
255
+
256
+ /**
257
+ * Get target - Gemini uses API key in query string
258
+ */
259
+ getTarget(req) {
260
+ let path = req.path;
261
+
262
+ // Add API key to query string if provided
263
+ const apiKey = this.extractApiKey(req.headers);
264
+ if (apiKey) {
265
+ const separator = path.includes('?') ? '&' : '?';
266
+ path = `${path}${separator}key=${apiKey}`;
267
+ }
268
+
269
+ return {
270
+ hostname: this.targetHost,
271
+ port: this.targetPort,
272
+ path: path,
273
+ protocol: this.protocol
274
+ };
275
+ }
276
+
277
+ /**
278
+ * Extract API key from headers
279
+ */
280
+ extractApiKey(headers) {
281
+ if (headers['x-goog-api-key']) {
282
+ return headers['x-goog-api-key'];
283
+ }
284
+ if (headers.authorization) {
285
+ return headers.authorization.replace(/^Bearer\s+/i, '');
286
+ }
287
+ return null;
288
+ }
289
+
290
+ /**
291
+ * Transform headers for Gemini API
292
+ */
293
+ defaultHeaderTransform(headers) {
294
+ return {
295
+ 'Content-Type': 'application/json'
296
+ // API key is passed via query string, not header
297
+ };
298
+ }
299
+
300
+ /**
301
+ * Extract usage from Gemini response format
302
+ */
303
+ defaultExtractUsage(body) {
304
+ const usage = body?.usageMetadata || {};
305
+ return {
306
+ prompt_tokens: usage.promptTokenCount || 0,
307
+ completion_tokens: usage.candidatesTokenCount || 0,
308
+ total_tokens: usage.totalTokenCount ||
309
+ ((usage.promptTokenCount || 0) + (usage.candidatesTokenCount || 0))
310
+ };
311
+ }
312
+
313
+ /**
314
+ * Identify model from Gemini request/response
315
+ */
316
+ defaultIdentifyModel(reqBody, respBody) {
317
+ // Model is often in response or can be extracted from path
318
+ return respBody?.modelVersion || reqBody?.model || 'gemini-unknown';
319
+ }
320
+
321
+ /**
322
+ * Parse Gemini streaming chunks
323
+ */
324
+ defaultParseStreamChunk(chunk) {
325
+ let content = '';
326
+ let usage = null;
327
+ let done = false;
328
+
329
+ try {
330
+ // Gemini streams as JSON arrays or objects
331
+ const json = JSON.parse(chunk);
332
+
333
+ if (json.candidates?.[0]?.content?.parts) {
334
+ content = json.candidates[0].content.parts
335
+ .filter(p => p.text)
336
+ .map(p => p.text)
337
+ .join('');
338
+ }
339
+
340
+ if (json.usageMetadata) {
341
+ usage = this.defaultExtractUsage(json);
342
+ }
343
+
344
+ if (json.candidates?.[0]?.finishReason) {
345
+ done = true;
346
+ }
347
+ } catch {
348
+ // May be SSE format
349
+ const lines = chunk.split('\n');
350
+ for (const line of lines) {
351
+ const trimmed = line.trim();
352
+ if (!trimmed.startsWith('data:')) continue;
353
+
354
+ const payload = trimmed.slice(5).trim();
355
+ if (payload === '[DONE]') {
356
+ done = true;
357
+ continue;
358
+ }
359
+
360
+ try {
361
+ const json = JSON.parse(payload);
362
+ if (json.usageMetadata) {
363
+ usage = this.defaultExtractUsage(json);
364
+ }
365
+ } catch {
366
+ // Ignore
367
+ }
368
+ }
369
+ }
370
+
371
+ return { content, usage, done };
372
+ }
373
+ }
374
+
375
+ /**
376
+ * OpenAI passthrough handler for native OpenAI API format.
377
+ * Used by tools that already use OpenAI format but need passthrough for some reason.
378
+ */
379
+ class OpenAIPassthrough extends PassthroughHandler {
380
+ constructor() {
381
+ super({
382
+ name: 'openai-passthrough',
383
+ displayName: 'OpenAI (Passthrough)',
384
+ targetHost: 'api.openai.com',
385
+ targetPort: 443,
386
+ protocol: 'https'
387
+ });
388
+ }
389
+
390
+ /**
391
+ * Transform headers for OpenAI API
392
+ */
393
+ defaultHeaderTransform(headers) {
394
+ return {
395
+ 'Content-Type': 'application/json',
396
+ 'Authorization': headers.authorization
397
+ };
398
+ }
399
+
400
+ /**
401
+ * Extract usage from OpenAI response format
402
+ */
403
+ defaultExtractUsage(body) {
404
+ const usage = body?.usage || {};
405
+ return {
406
+ prompt_tokens: usage.prompt_tokens || usage.input_tokens || 0,
407
+ completion_tokens: usage.completion_tokens || usage.output_tokens || 0,
408
+ total_tokens: usage.total_tokens ||
409
+ ((usage.prompt_tokens || usage.input_tokens || 0) +
410
+ (usage.completion_tokens || usage.output_tokens || 0))
411
+ };
412
+ }
413
+
414
+ /**
415
+ * Parse OpenAI streaming chunks
416
+ */
417
+ defaultParseStreamChunk(chunk) {
418
+ const lines = chunk.split('\n');
419
+ let content = '';
420
+ let usage = null;
421
+ let done = false;
422
+
423
+ for (const line of lines) {
424
+ const trimmed = line.trim();
425
+ if (!trimmed.startsWith('data:')) continue;
426
+
427
+ const payload = trimmed.slice(5).trim();
428
+ if (payload === '[DONE]') {
429
+ done = true;
430
+ continue;
431
+ }
432
+
433
+ try {
434
+ const json = JSON.parse(payload);
435
+ const delta = json.choices?.[0]?.delta?.content;
436
+ if (delta) content += delta;
437
+ if (json.usage) usage = this.defaultExtractUsage(json);
438
+ } catch {
439
+ // Ignore parse errors
440
+ }
441
+ }
442
+
443
+ return { content, usage, done };
444
+ }
445
+ }
446
+
447
+ /**
448
+ * Helicone passthrough handler for LLM cost tracking.
449
+ * Routes requests through Helicone's gateway while preserving OpenAI format.
450
+ *
451
+ * Helicone adds cost tracking, caching, and analytics on top of LLM requests.
452
+ */
453
+ class HeliconePassthrough extends PassthroughHandler {
454
+ constructor() {
455
+ super({
456
+ name: 'helicone-passthrough',
457
+ displayName: 'Helicone (Cost Tracking)',
458
+ targetHost: process.env.HELICONE_HOST || 'oai.helicone.ai',
459
+ targetPort: 443,
460
+ protocol: 'https'
461
+ });
462
+ }
463
+
464
+ /**
465
+ * Get target - can be self-hosted or cloud Helicone
466
+ */
467
+ getTarget(req) {
468
+ const host = process.env.HELICONE_HOST || 'oai.helicone.ai';
469
+ const port = process.env.HELICONE_PORT || 443;
470
+
471
+ return {
472
+ hostname: host,
473
+ port: parseInt(port, 10),
474
+ path: req.path,
475
+ protocol: this.protocol
476
+ };
477
+ }
478
+
479
+ /**
480
+ * Transform headers for Helicone API
481
+ * Passes through OpenAI auth and adds Helicone-specific headers
482
+ */
483
+ defaultHeaderTransform(headers) {
484
+ const heliconeHeaders = {
485
+ 'Content-Type': 'application/json',
486
+ 'Authorization': headers.authorization
487
+ };
488
+
489
+ // Add Helicone API key if provided
490
+ const heliconeApiKey = headers['helicone-auth'] || process.env.HELICONE_API_KEY;
491
+ if (heliconeApiKey) {
492
+ heliconeHeaders['Helicone-Auth'] = heliconeApiKey.startsWith('Bearer ')
493
+ ? heliconeApiKey
494
+ : `Bearer ${heliconeApiKey}`;
495
+ }
496
+
497
+ // Pass through Helicone feature headers
498
+ const heliconeFeatures = [
499
+ 'helicone-property-',
500
+ 'helicone-user-id',
501
+ 'helicone-session-id',
502
+ 'helicone-session-name',
503
+ 'helicone-session-path',
504
+ 'helicone-prompt-id',
505
+ 'helicone-cache-enabled',
506
+ 'helicone-retry-enabled',
507
+ 'helicone-rate-limit-policy',
508
+ 'helicone-fallbacks'
509
+ ];
510
+
511
+ Object.entries(headers).forEach(([key, value]) => {
512
+ const lowerKey = key.toLowerCase();
513
+ if (heliconeFeatures.some(prefix => lowerKey.startsWith(prefix))) {
514
+ heliconeHeaders[key] = value;
515
+ }
516
+ });
517
+
518
+ return heliconeHeaders;
519
+ }
520
+
521
+ /**
522
+ * Extract usage from OpenAI response format (Helicone proxies OpenAI)
523
+ */
524
+ defaultExtractUsage(body) {
525
+ const usage = body?.usage || {};
526
+ return {
527
+ prompt_tokens: usage.prompt_tokens || 0,
528
+ completion_tokens: usage.completion_tokens || 0,
529
+ total_tokens: usage.total_tokens ||
530
+ ((usage.prompt_tokens || 0) + (usage.completion_tokens || 0))
531
+ };
532
+ }
533
+
534
+ /**
535
+ * Parse OpenAI streaming chunks (same as OpenAI passthrough)
536
+ */
537
+ defaultParseStreamChunk(chunk) {
538
+ const lines = chunk.split('\n');
539
+ let content = '';
540
+ let usage = null;
541
+ let done = false;
542
+
543
+ for (const line of lines) {
544
+ const trimmed = line.trim();
545
+ if (!trimmed.startsWith('data:')) continue;
546
+
547
+ const payload = trimmed.slice(5).trim();
548
+ if (payload === '[DONE]') {
549
+ done = true;
550
+ continue;
551
+ }
552
+
553
+ try {
554
+ const json = JSON.parse(payload);
555
+ const delta = json.choices?.[0]?.delta?.content;
556
+ if (delta) content += delta;
557
+ if (json.usage) usage = this.defaultExtractUsage(json);
558
+ } catch {
559
+ // Ignore parse errors
560
+ }
561
+ }
562
+
563
+ return { content, usage, done };
564
+ }
565
+ }
566
+
567
+ module.exports = {
568
+ PassthroughHandler,
569
+ AnthropicPassthrough,
570
+ GeminiPassthrough,
571
+ OpenAIPassthrough,
572
+ HeliconePassthrough
573
+ };