@revenium/litellm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +630 -0
  3. package/dist/client.d.ts +17 -0
  4. package/dist/client.d.ts.map +1 -0
  5. package/dist/client.js +713 -0
  6. package/dist/client.js.map +1 -0
  7. package/dist/config.d.ts +42 -0
  8. package/dist/config.d.ts.map +1 -0
  9. package/dist/config.js +332 -0
  10. package/dist/config.js.map +1 -0
  11. package/dist/constants.d.ts +15 -0
  12. package/dist/constants.d.ts.map +1 -0
  13. package/dist/constants.js +101 -0
  14. package/dist/constants.js.map +1 -0
  15. package/dist/index.d.ts +42 -0
  16. package/dist/index.d.ts.map +1 -0
  17. package/dist/index.js +189 -0
  18. package/dist/index.js.map +1 -0
  19. package/dist/prompt-extraction.d.ts +11 -0
  20. package/dist/prompt-extraction.d.ts.map +1 -0
  21. package/dist/prompt-extraction.js +201 -0
  22. package/dist/prompt-extraction.js.map +1 -0
  23. package/dist/tracking.d.ts +47 -0
  24. package/dist/tracking.d.ts.map +1 -0
  25. package/dist/tracking.js +299 -0
  26. package/dist/tracking.js.map +1 -0
  27. package/dist/types.d.ts +348 -0
  28. package/dist/types.d.ts.map +1 -0
  29. package/dist/types.js +3 -0
  30. package/dist/types.js.map +1 -0
  31. package/dist/utils/circuit-breaker.d.ts +114 -0
  32. package/dist/utils/circuit-breaker.d.ts.map +1 -0
  33. package/dist/utils/circuit-breaker.js +216 -0
  34. package/dist/utils/circuit-breaker.js.map +1 -0
  35. package/dist/utils/error-handling.d.ts +166 -0
  36. package/dist/utils/error-handling.d.ts.map +1 -0
  37. package/dist/utils/error-handling.js +306 -0
  38. package/dist/utils/error-handling.js.map +1 -0
  39. package/dist/utils/logger-types.d.ts +171 -0
  40. package/dist/utils/logger-types.d.ts.map +1 -0
  41. package/dist/utils/logger-types.js +210 -0
  42. package/dist/utils/logger-types.js.map +1 -0
  43. package/dist/utils/provider-detection.d.ts +43 -0
  44. package/dist/utils/provider-detection.d.ts.map +1 -0
  45. package/dist/utils/provider-detection.js +103 -0
  46. package/dist/utils/provider-detection.js.map +1 -0
  47. package/dist/utils/stop-reason.d.ts +58 -0
  48. package/dist/utils/stop-reason.d.ts.map +1 -0
  49. package/dist/utils/stop-reason.js +136 -0
  50. package/dist/utils/stop-reason.js.map +1 -0
  51. package/dist/utils/summary-printer.d.ts +23 -0
  52. package/dist/utils/summary-printer.d.ts.map +1 -0
  53. package/dist/utils/summary-printer.js +234 -0
  54. package/dist/utils/summary-printer.js.map +1 -0
  55. package/dist/utils/trace-fields.d.ts +10 -0
  56. package/dist/utils/trace-fields.d.ts.map +1 -0
  57. package/dist/utils/trace-fields.js +117 -0
  58. package/dist/utils/trace-fields.js.map +1 -0
  59. package/dist/utils/validation.d.ts +121 -0
  60. package/dist/utils/validation.d.ts.map +1 -0
  61. package/dist/utils/validation.js +451 -0
  62. package/dist/utils/validation.js.map +1 -0
  63. package/examples/README.md +321 -0
  64. package/examples/litellm-basic.ts +240 -0
  65. package/examples/litellm-streaming.ts +309 -0
  66. package/examples/prompt-capture.ts +128 -0
  67. package/package.json +85 -0
package/dist/client.js ADDED
@@ -0,0 +1,713 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.patchHttpClient = patchHttpClient;
4
+ exports.unpatchHttpClient = unpatchHttpClient;
5
+ exports.isHttpClientPatched = isHttpClientPatched;
6
+ exports.resetHttpClientManager = resetHttpClientManager;
7
+ const config_1 = require("./config");
8
+ const tracking_1 = require("./tracking");
9
+ const crypto_1 = require("crypto");
10
+ const validation_1 = require("./utils/validation");
11
+ const error_handling_1 = require("./utils/error-handling");
12
+ const constants_1 = require("./constants");
13
+ const prompt_extraction_1 = require("./prompt-extraction");
14
+ // Global logger
15
+ const logger = (0, config_1.getLogger)();
16
+ /**
17
+ * HTTP client manager singleton for proper state management
18
+ */
19
+ class HttpClientManager {
20
+ constructor() {
21
+ this.isPatched = false;
22
+ this.originalFetch = null;
23
+ // Private constructor to enforce singleton pattern
24
+ }
25
+ /**
26
+ * Get the singleton instance
27
+ */
28
+ static getInstance() {
29
+ if (!HttpClientManager.instance) {
30
+ HttpClientManager.instance = new HttpClientManager();
31
+ }
32
+ return HttpClientManager.instance;
33
+ }
34
+ /**
35
+ * Reset the singleton instance (for testing)
36
+ */
37
+ static resetInstance() {
38
+ HttpClientManager.instance = null;
39
+ }
40
+ /**
41
+ * Check if HTTP client is patched
42
+ */
43
+ isHttpClientPatched() {
44
+ return this.isPatched;
45
+ }
46
+ /**
47
+ * Get the original fetch function
48
+ */
49
+ getOriginalFetch() {
50
+ return this.originalFetch;
51
+ }
52
+ /**
53
+ * Set the patched state and store original fetch
54
+ */
55
+ setPatched(patched, originalFetch) {
56
+ this.isPatched = patched;
57
+ if (originalFetch)
58
+ this.originalFetch = originalFetch;
59
+ }
60
+ /**
61
+ * Reset to unpatched state (for testing)
62
+ */
63
+ reset() {
64
+ this.isPatched = false;
65
+ this.originalFetch = null;
66
+ }
67
+ }
68
+ HttpClientManager.instance = null;
69
+ /**
70
+ * Check if a URL is a LiteLLM Proxy endpoint (chat completions or embeddings)
71
+ */
72
+ function isLiteLLMProxyRequest(url, config) {
73
+ try {
74
+ const requestUrl = new URL(url);
75
+ const proxyUrl = new URL(config.litellmProxyUrl);
76
+ // Check if the request is going to our configured LiteLLM Proxy
77
+ const isSameHost = requestUrl.hostname === proxyUrl.hostname;
78
+ const isSamePort = requestUrl.port === proxyUrl.port ||
79
+ ((requestUrl.port === "80" || requestUrl.port === "443") &&
80
+ proxyUrl.port === "");
81
+ // Handle two cases:
82
+ // 1. Proxy URL is a base URL (e.g., http://localhost:4000) - check if request is to supported endpoint
83
+ // 2. Proxy URL is a full endpoint URL (e.g., http://localhost:4000/chat/completions) - check exact match
84
+ let isCorrectEndpoint = false;
85
+ if (constants_1.supportedEndpoints.some((endpoint) => proxyUrl.pathname.endsWith(endpoint))) {
86
+ // Case 2: Proxy URL includes the endpoint path - check exact path match
87
+ isCorrectEndpoint = requestUrl.pathname === proxyUrl.pathname;
88
+ }
89
+ else {
90
+ // Case 1: Proxy URL is base URL - check if request is to any supported endpoint
91
+ isCorrectEndpoint = constants_1.supportedEndpoints.some((endpoint) => requestUrl.pathname.endsWith(endpoint));
92
+ }
93
+ return isSameHost && isSamePort && isCorrectEndpoint;
94
+ }
95
+ catch (error) {
96
+ return false;
97
+ }
98
+ }
99
+ /**
100
+ * Create patched fetch function that intercepts LiteLLM Proxy requests
101
+ */
102
+ function createPatchedFetch() {
103
+ return async function patchedFetch(input, init) {
104
+ const config = (0, config_1.getConfig)();
105
+ // Convert input to URL string for checking
106
+ const url = typeof input === "string"
107
+ ? input
108
+ : input instanceof URL
109
+ ? input.toString()
110
+ : input.url;
111
+ // Only intercept LiteLLM Proxy requests if we have config
112
+ if (!config || !isLiteLLMProxyRequest(url, config)) {
113
+ const clientManager = HttpClientManager.getInstance();
114
+ const originalFetchFn = clientManager.getOriginalFetch();
115
+ if (!originalFetchFn)
116
+ throw new Error("Original fetch function not available");
117
+ return originalFetchFn(input, init);
118
+ }
119
+ // Validate the URL against our proxy configuration
120
+ const urlValidation = (0, validation_1.validateLiteLLMUrl)(url, config.litellmProxyUrl);
121
+ if (!urlValidation.isValid) {
122
+ logger.warn("Invalid LiteLLM proxy URL detected", {
123
+ url,
124
+ errors: urlValidation.errors,
125
+ configuredProxy: config.litellmProxyUrl,
126
+ });
127
+ // Continue with original fetch for invalid URLs
128
+ const clientManager = HttpClientManager.getInstance();
129
+ const originalFetchFn = clientManager.getOriginalFetch();
130
+ if (!originalFetchFn)
131
+ throw new Error("Original fetch function not available");
132
+ return originalFetchFn(input, init);
133
+ }
134
+ // Extract and validate request context
135
+ const rawHeaders = init?.headers
136
+ ? Object.fromEntries(new Headers(init.headers))
137
+ : {};
138
+ const validatedHeaders = (0, validation_1.validateHeaders)(rawHeaders);
139
+ const requestContext = {
140
+ url,
141
+ method: init?.method || "GET",
142
+ headers: validatedHeaders,
143
+ body: init?.body || null,
144
+ startTime: Date.now(),
145
+ metadata: (0, tracking_1.extractMetadataFromHeaders)(validatedHeaders),
146
+ };
147
+ const requestId = (0, crypto_1.randomUUID)();
148
+ logger.debug("Intercepted LiteLLM Proxy request", {
149
+ url: requestContext.url,
150
+ method: requestContext.method,
151
+ requestId,
152
+ hasMetadata: !!requestContext.metadata,
153
+ });
154
+ try {
155
+ // Add LiteLLM Proxy authentication if configured
156
+ const headers = new Headers(init?.headers);
157
+ if (config.litellmApiKey)
158
+ headers.set("Authorization", `Bearer ${config.litellmApiKey}`);
159
+ // Make the actual request
160
+ const clientManager = HttpClientManager.getInstance();
161
+ const originalFetchFn = clientManager.getOriginalFetch();
162
+ if (!originalFetchFn)
163
+ throw new Error("Original fetch function not available");
164
+ const response = await originalFetchFn(input, {
165
+ ...init,
166
+ headers,
167
+ });
168
+ const endTime = Date.now();
169
+ const duration = endTime - requestContext.startTime;
170
+ // Clone response to read body without consuming it
171
+ const responseClone = response.clone();
172
+ logger.debug("LiteLLM Proxy response received", {
173
+ status: response.status,
174
+ requestId,
175
+ duration,
176
+ });
177
+ // Handle successful chat completion responses
178
+ if (response.ok && requestContext.method === "POST") {
179
+ handleSuccessfulResponse(requestContext, response, responseClone, requestId, duration);
180
+ }
181
+ else if (!response.ok) {
182
+ logger.warn("LiteLLM Proxy request failed", {
183
+ status: response.status,
184
+ statusText: response.statusText,
185
+ requestId,
186
+ });
187
+ }
188
+ return response;
189
+ }
190
+ catch (error) {
191
+ const endTime = Date.now();
192
+ const duration = endTime - requestContext.startTime;
193
+ logger.error("LiteLLM Proxy request error", {
194
+ error: error instanceof Error ? error.message : String(error),
195
+ requestId,
196
+ duration,
197
+ });
198
+ throw error;
199
+ }
200
+ };
201
+ }
202
+ /**
203
+ * Parse request body for either chat completions or embeddings
204
+ */
205
+ async function parseRequestBody(requestContext, requestId, endpointType) {
206
+ if (!requestContext.body)
207
+ return null;
208
+ try {
209
+ const bodyText = typeof requestContext.body === "string"
210
+ ? requestContext.body
211
+ : await new Response(requestContext.body).text();
212
+ return JSON.parse(bodyText);
213
+ }
214
+ catch (error) {
215
+ logger.warn(`Failed to parse ${endpointType} request body`, {
216
+ requestId,
217
+ error: error instanceof Error ? error.message : String(error),
218
+ });
219
+ return null;
220
+ }
221
+ }
222
+ /**
223
+ * Handle successful chat completion response and track usage
224
+ */
225
+ async function handleSuccessfulResponse(requestContext, originalResponse, responseClone, requestId, duration) {
226
+ try {
227
+ // Determine endpoint type from URL
228
+ const url = new URL(requestContext.url);
229
+ const isEmbeddingsEndpoint = url.pathname.endsWith("/embeddings") ||
230
+ url.pathname.endsWith("/v1/embeddings");
231
+ if (isEmbeddingsEndpoint) {
232
+ // Handle embeddings request
233
+ const requestData = await parseRequestBody(requestContext, requestId, "embeddings");
234
+ const model = requestData?.model || "unknown";
235
+ await handleEmbeddingResponse(responseClone, requestContext, requestId, duration, model);
236
+ }
237
+ else {
238
+ // Handle chat completions request
239
+ const requestData = await parseRequestBody(requestContext, requestId, "chat");
240
+ const isStreaming = requestData?.stream === true;
241
+ const model = requestData?.model || "unknown";
242
+ const responseFormat = requestData?.response_format;
243
+ if (isStreaming) {
244
+ await handleStreamingResponse(responseClone, requestContext, requestId, duration, model, responseFormat);
245
+ }
246
+ else {
247
+ handleNonStreamingResponse(responseClone, requestContext, requestId, duration, model, responseFormat);
248
+ }
249
+ }
250
+ }
251
+ catch (error) {
252
+ logger.error("Error handling LiteLLM response", {
253
+ error: error instanceof Error ? error.message : String(error),
254
+ requestId,
255
+ });
256
+ }
257
+ }
258
+ /**
259
+ * Handle non-streaming chat completion response
260
+ */
261
+ async function handleNonStreamingResponse(response, requestContext, requestId, duration, model, responseFormat) {
262
+ try {
263
+ const responseData = await response.json();
264
+ const usage = (0, tracking_1.extractUsageFromResponse)(responseData);
265
+ logger.debug("Extracted usage from non-streaming response", {
266
+ requestId,
267
+ model,
268
+ ...usage,
269
+ });
270
+ const requestBody = extractRequestBody(requestContext);
271
+ (0, tracking_1.trackUsageAsync)({
272
+ requestId,
273
+ model,
274
+ promptTokens: usage.promptTokens,
275
+ completionTokens: usage.completionTokens,
276
+ totalTokens: usage.totalTokens,
277
+ duration,
278
+ finishReason: usage.finishReason,
279
+ usageMetadata: requestContext.metadata,
280
+ isStreamed: false,
281
+ responseFormat,
282
+ request: requestBody,
283
+ response: responseData,
284
+ });
285
+ }
286
+ catch (error) {
287
+ logger.error("Error processing non-streaming response", {
288
+ error: error instanceof Error ? error.message : String(error),
289
+ requestId,
290
+ });
291
+ }
292
+ }
293
+ /**
294
+ * Extract request body from RequestContext
295
+ */
296
+ function extractRequestBody(requestContext) {
297
+ try {
298
+ if (typeof requestContext.body === "string") {
299
+ return JSON.parse(requestContext.body);
300
+ }
301
+ else if (typeof requestContext.body === "object" &&
302
+ requestContext.body !== null &&
303
+ "model" in requestContext.body &&
304
+ "messages" in requestContext.body) {
305
+ return requestContext.body;
306
+ }
307
+ }
308
+ catch (e) {
309
+ logger.debug("Failed to parse request body for prompt capture", {
310
+ error: e instanceof Error ? e.message : String(e),
311
+ bodyType: typeof requestContext.body,
312
+ });
313
+ }
314
+ return undefined;
315
+ }
316
+ /**
317
+ * Handle streaming chat completion response
318
+ */
319
+ async function handleStreamingResponse(response, requestContext, requestId, duration, model, responseFormat) {
320
+ logger.debug("Processing streaming response", { requestId, model });
321
+ const requestBody = extractRequestBody(requestContext);
322
+ if (!response.body) {
323
+ logger.warn("Streaming response has no body", { requestId });
324
+ (0, tracking_1.trackUsageAsync)({
325
+ requestId,
326
+ model,
327
+ promptTokens: 0,
328
+ completionTokens: 0,
329
+ totalTokens: 0,
330
+ duration,
331
+ finishReason: "stop",
332
+ usageMetadata: requestContext.metadata,
333
+ isStreamed: true,
334
+ timeToFirstToken: duration,
335
+ responseFormat,
336
+ request: requestBody,
337
+ });
338
+ return;
339
+ }
340
+ try {
341
+ const streamParser = new StreamingResponseParser(requestId, model, requestContext, duration, responseFormat);
342
+ await streamParser.parseStream(response.body);
343
+ }
344
+ catch (error) {
345
+ logger.error("Error parsing streaming response", {
346
+ error: error instanceof Error ? error.message : String(error),
347
+ requestId,
348
+ });
349
+ (0, tracking_1.trackUsageAsync)({
350
+ requestId,
351
+ model,
352
+ promptTokens: 0,
353
+ completionTokens: 0,
354
+ totalTokens: 0,
355
+ duration,
356
+ finishReason: "error",
357
+ usageMetadata: requestContext.metadata,
358
+ isStreamed: true,
359
+ timeToFirstToken: duration,
360
+ responseFormat,
361
+ request: requestBody,
362
+ });
363
+ }
364
+ }
365
+ /**
366
+ * Handle embeddings response and track usage
367
+ */
368
+ async function handleEmbeddingResponse(response, requestContext, requestId, duration, model) {
369
+ try {
370
+ const responseData = await response.json();
371
+ const usage = responseData.usage;
372
+ logger.debug("Extracted usage from embeddings response", {
373
+ requestId,
374
+ model,
375
+ promptTokens: usage.prompt_tokens,
376
+ totalTokens: usage.total_tokens,
377
+ });
378
+ // Track embeddings usage asynchronously
379
+ (0, tracking_1.trackEmbeddingsUsageAsync)({
380
+ requestId,
381
+ model,
382
+ promptTokens: usage.prompt_tokens,
383
+ totalTokens: usage.total_tokens,
384
+ duration,
385
+ usageMetadata: requestContext.metadata,
386
+ });
387
+ }
388
+ catch (error) {
389
+ logger.error("Error processing embeddings response", {
390
+ error: error instanceof Error ? error.message : String(error),
391
+ requestId,
392
+ });
393
+ }
394
+ }
395
+ /**
396
+ * Streaming response parser for LiteLLM SSE streams
397
+ */
398
+ class StreamingResponseParser {
399
+ constructor(requestId, model, requestContext, requestDuration, responseFormat) {
400
+ this.firstTokenTime = null;
401
+ this.promptTokens = 0;
402
+ this.completionTokens = 0;
403
+ this.totalTokens = 0;
404
+ this.finishReason = null;
405
+ this.logger = (0, config_1.getLogger)();
406
+ this.shouldCapturePrompts = false;
407
+ this.accumulatedContent = "";
408
+ this.accumulatedToolCalls = new Map();
409
+ this.requestId = requestId;
410
+ this.model = model;
411
+ this.requestContext = requestContext;
412
+ this.requestDuration = requestDuration;
413
+ this.startTime = Date.now();
414
+ this.responseFormat = responseFormat;
415
+ this.requestBody = extractRequestBody(requestContext);
416
+ this.shouldCapturePrompts = (0, prompt_extraction_1.shouldCapturePrompts)(requestContext.metadata);
417
+ this.maxPromptSize = (0, prompt_extraction_1.getMaxPromptSize)(requestContext.metadata);
418
+ }
419
+ async parseStream(body) {
420
+ const reader = body.getReader();
421
+ const decoder = new TextDecoder();
422
+ let buffer = "";
423
+ try {
424
+ while (true) {
425
+ const { done, value } = await reader.read();
426
+ if (done)
427
+ break;
428
+ // Decode chunk and add to buffer
429
+ buffer += decoder.decode(value, { stream: true });
430
+ // Process complete SSE messages
431
+ const lines = buffer.split("\n");
432
+ buffer = lines.pop() || ""; // Keep incomplete line in buffer
433
+ for (const line of lines) {
434
+ this.processSSELine(line);
435
+ }
436
+ }
437
+ // Process any remaining buffer content
438
+ if (buffer.trim()) {
439
+ this.processSSELine(buffer);
440
+ }
441
+ }
442
+ finally {
443
+ reader.releaseLock();
444
+ this.finalizeTracking();
445
+ }
446
+ }
447
+ processSSELine(line) {
448
+ const trimmed = line.trim();
449
+ // Skip empty lines and comments
450
+ if (!trimmed || trimmed.startsWith(":"))
451
+ return;
452
+ // Parse SSE data lines
453
+ if (trimmed.startsWith("data: ")) {
454
+ const data = trimmed.slice(6); // Remove 'data: ' prefix
455
+ // Check for stream end marker
456
+ if (data === "[DONE]") {
457
+ this.logger.debug("Stream completed", { requestId: this.requestId });
458
+ return;
459
+ }
460
+ try {
461
+ const chunk = JSON.parse(data);
462
+ this.processStreamChunk(chunk);
463
+ }
464
+ catch (error) {
465
+ this.logger.debug("Failed to parse stream chunk", {
466
+ requestId: this.requestId,
467
+ data: data.substring(0, 100),
468
+ });
469
+ }
470
+ }
471
+ }
472
+ processStreamChunk(chunk) {
473
+ // Validate and sanitize chunk data
474
+ const validatedChunk = (0, validation_1.validateStreamChunk)(chunk);
475
+ if (!validatedChunk) {
476
+ this.logger.debug("Invalid stream chunk received", {
477
+ requestId: this.requestId,
478
+ chunkType: typeof chunk,
479
+ });
480
+ return;
481
+ }
482
+ const chunkData = validatedChunk;
483
+ if (!this.responseId && chunkData.id) {
484
+ this.responseId = chunkData.id;
485
+ }
486
+ if (!this.responseCreated && chunkData.created) {
487
+ this.responseCreated = chunkData.created;
488
+ }
489
+ // Record first token time
490
+ if (this.firstTokenTime === null &&
491
+ validatedChunk.choices?.[0]?.delta?.content) {
492
+ this.firstTokenTime = Date.now();
493
+ this.logger.debug("First token received", {
494
+ requestId: this.requestId,
495
+ timeToFirstToken: this.firstTokenTime - this.startTime,
496
+ });
497
+ }
498
+ if (this.shouldCapturePrompts &&
499
+ validatedChunk.choices?.[0]?.delta?.content) {
500
+ const remaining = this.maxPromptSize - this.accumulatedContent.length;
501
+ if (remaining > 0) {
502
+ this.accumulatedContent +=
503
+ validatedChunk.choices[0].delta.content.slice(0, remaining);
504
+ }
505
+ }
506
+ const delta = validatedChunk.choices?.[0]?.delta;
507
+ if (this.shouldCapturePrompts &&
508
+ delta?.tool_calls &&
509
+ Array.isArray(delta.tool_calls)) {
510
+ delta.tool_calls.forEach((toolCallDelta) => {
511
+ const index = toolCallDelta.index;
512
+ if (index === undefined) {
513
+ return;
514
+ }
515
+ // Get or create the accumulated tool call for this index
516
+ let accumulated = this.accumulatedToolCalls.get(index);
517
+ if (!accumulated) {
518
+ accumulated = {
519
+ index,
520
+ id: toolCallDelta.id,
521
+ type: toolCallDelta.type || "function",
522
+ function: {
523
+ name: "",
524
+ arguments: "",
525
+ },
526
+ };
527
+ this.accumulatedToolCalls.set(index, accumulated);
528
+ }
529
+ // Accumulate the tool call data
530
+ if (toolCallDelta.id) {
531
+ accumulated.id = toolCallDelta.id;
532
+ }
533
+ if (toolCallDelta.type) {
534
+ accumulated.type = toolCallDelta.type;
535
+ }
536
+ if (toolCallDelta.function?.name) {
537
+ accumulated.function.name = toolCallDelta.function.name;
538
+ }
539
+ if (toolCallDelta.function?.arguments) {
540
+ const currentSize = accumulated.function.arguments.length;
541
+ const remaining = this.maxPromptSize - currentSize;
542
+ if (remaining > 0) {
543
+ accumulated.function.arguments +=
544
+ toolCallDelta.function.arguments.slice(0, remaining);
545
+ }
546
+ }
547
+ });
548
+ }
549
+ // Extract usage information (typically in the last chunk)
550
+ if (validatedChunk.usage) {
551
+ this.promptTokens = validatedChunk.usage.prompt_tokens || 0;
552
+ this.completionTokens = validatedChunk.usage.completion_tokens || 0;
553
+ this.totalTokens = validatedChunk.usage.total_tokens || 0;
554
+ this.logger.debug("Usage data extracted from stream", {
555
+ requestId: this.requestId,
556
+ promptTokens: this.promptTokens,
557
+ completionTokens: this.completionTokens,
558
+ totalTokens: this.totalTokens,
559
+ });
560
+ }
561
+ // Extract finish reason
562
+ if (validatedChunk.choices?.[0]?.finish_reason)
563
+ this.finishReason = validatedChunk.choices[0].finish_reason;
564
+ // Some providers send usage in different chunk structures
565
+ if (!this.totalTokens && validatedChunk.x_groq?.usage) {
566
+ // Groq-specific usage format
567
+ this.promptTokens = validatedChunk.x_groq.usage.prompt_tokens || 0;
568
+ this.completionTokens =
569
+ validatedChunk.x_groq.usage.completion_tokens || 0;
570
+ this.totalTokens = validatedChunk.x_groq.usage.total_tokens || 0;
571
+ }
572
+ }
573
+ finalizeTracking() {
574
+ const timeToFirstToken = this.firstTokenTime
575
+ ? this.firstTokenTime - this.startTime
576
+ : this.requestDuration;
577
+ this.logger.debug("Finalizing streaming response tracking", {
578
+ requestId: this.requestId,
579
+ model: this.model,
580
+ promptTokens: this.promptTokens,
581
+ completionTokens: this.completionTokens,
582
+ totalTokens: this.totalTokens,
583
+ finishReason: this.finishReason,
584
+ timeToFirstToken,
585
+ });
586
+ let reconstructedResponse;
587
+ if (this.shouldCapturePrompts &&
588
+ (this.accumulatedContent || this.accumulatedToolCalls.size > 0)) {
589
+ const message = {
590
+ role: "assistant",
591
+ content: this.accumulatedContent,
592
+ };
593
+ if (this.accumulatedToolCalls.size > 0) {
594
+ // Convert Map to array, sort by index, and remove index property (not part of OpenAI spec)
595
+ message.tool_calls = Array.from(this.accumulatedToolCalls.values())
596
+ .sort((a, b) => a.index - b.index)
597
+ .map((tc) => {
598
+ const { index, ...rest } = tc;
599
+ return rest;
600
+ });
601
+ }
602
+ reconstructedResponse = {
603
+ id: this.responseId || "unknown",
604
+ object: "chat.completion",
605
+ created: this.responseCreated || Math.floor(Date.now() / 1000),
606
+ model: this.model,
607
+ choices: [
608
+ {
609
+ index: 0,
610
+ message,
611
+ finish_reason: this.finishReason || "stop",
612
+ },
613
+ ],
614
+ usage: {
615
+ prompt_tokens: this.promptTokens,
616
+ completion_tokens: this.completionTokens,
617
+ total_tokens: this.totalTokens,
618
+ },
619
+ };
620
+ }
621
+ (0, tracking_1.trackUsageAsync)({
622
+ requestId: this.requestId,
623
+ model: this.model,
624
+ promptTokens: this.promptTokens,
625
+ completionTokens: this.completionTokens,
626
+ totalTokens: this.totalTokens,
627
+ duration: this.requestDuration,
628
+ finishReason: this.finishReason || "stop",
629
+ usageMetadata: this.requestContext.metadata,
630
+ isStreamed: true,
631
+ timeToFirstToken,
632
+ responseFormat: this.responseFormat,
633
+ request: this.requestBody,
634
+ response: reconstructedResponse,
635
+ });
636
+ }
637
+ }
638
+ /**
639
+ * Patch the global fetch function to intercept LiteLLM Proxy requests
640
+ */
641
+ function patchHttpClient() {
642
+ const clientManager = HttpClientManager.getInstance();
643
+ if (clientManager.isHttpClientPatched()) {
644
+ logger.debug("HTTP client already patched");
645
+ return true;
646
+ }
647
+ if (typeof globalThis.fetch !== "function") {
648
+ const errorContext = (0, error_handling_1.createErrorContext)()
649
+ .with("fetchType", typeof globalThis.fetch)
650
+ .build();
651
+ logger.error("Global fetch function not available", errorContext);
652
+ return false;
653
+ }
654
+ try {
655
+ // Store original fetch
656
+ const originalFetch = globalThis.fetch;
657
+ clientManager.setPatched(false, originalFetch);
658
+ // Replace with patched version
659
+ globalThis.fetch = createPatchedFetch();
660
+ clientManager.setPatched(true);
661
+ logger.info("LiteLLM HTTP client middleware enabled");
662
+ return true;
663
+ }
664
+ catch (error) {
665
+ const errorContext = (0, error_handling_1.createErrorContext)()
666
+ .with("error", error instanceof Error ? error.message : String(error))
667
+ .with("stack", error instanceof Error ? error.stack : undefined)
668
+ .build();
669
+ logger.error("Failed to patch HTTP client", errorContext);
670
+ // Throw a proper error for better debugging
671
+ throw new error_handling_1.PatchingError("Failed to patch HTTP client for LiteLLM interception", errorContext);
672
+ }
673
+ }
674
+ /**
675
+ * Restore the original fetch function
676
+ */
677
+ function unpatchHttpClient() {
678
+ const clientManager = HttpClientManager.getInstance();
679
+ if (!clientManager.isHttpClientPatched()) {
680
+ logger.debug("HTTP client not patched");
681
+ return true;
682
+ }
683
+ const originalFetch = clientManager.getOriginalFetch();
684
+ if (!originalFetch) {
685
+ logger.error("Original fetch function not stored");
686
+ return false;
687
+ }
688
+ try {
689
+ globalThis.fetch = originalFetch;
690
+ clientManager.setPatched(false);
691
+ logger.info("LiteLLM HTTP client middleware disabled");
692
+ return true;
693
+ }
694
+ catch (error) {
695
+ logger.error("Failed to unpatch HTTP client", {
696
+ error: error instanceof Error ? error.message : String(error),
697
+ });
698
+ return false;
699
+ }
700
+ }
701
+ /**
702
+ * Check if HTTP client is patched
703
+ */
704
+ function isHttpClientPatched() {
705
+ return HttpClientManager.getInstance().isHttpClientPatched();
706
+ }
707
+ /**
708
+ * Reset HTTP client manager (for testing)
709
+ */
710
+ function resetHttpClientManager() {
711
+ HttpClientManager.resetInstance();
712
+ }
713
+ //# sourceMappingURL=client.js.map