@revenium/litellm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +630 -0
- package/dist/client.d.ts +17 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +713 -0
- package/dist/client.js.map +1 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +332 -0
- package/dist/config.js.map +1 -0
- package/dist/constants.d.ts +15 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +101 -0
- package/dist/constants.js.map +1 -0
- package/dist/index.d.ts +42 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +189 -0
- package/dist/index.js.map +1 -0
- package/dist/prompt-extraction.d.ts +11 -0
- package/dist/prompt-extraction.d.ts.map +1 -0
- package/dist/prompt-extraction.js +201 -0
- package/dist/prompt-extraction.js.map +1 -0
- package/dist/tracking.d.ts +47 -0
- package/dist/tracking.d.ts.map +1 -0
- package/dist/tracking.js +299 -0
- package/dist/tracking.js.map +1 -0
- package/dist/types.d.ts +348 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/circuit-breaker.d.ts +114 -0
- package/dist/utils/circuit-breaker.d.ts.map +1 -0
- package/dist/utils/circuit-breaker.js +216 -0
- package/dist/utils/circuit-breaker.js.map +1 -0
- package/dist/utils/error-handling.d.ts +166 -0
- package/dist/utils/error-handling.d.ts.map +1 -0
- package/dist/utils/error-handling.js +306 -0
- package/dist/utils/error-handling.js.map +1 -0
- package/dist/utils/logger-types.d.ts +171 -0
- package/dist/utils/logger-types.d.ts.map +1 -0
- package/dist/utils/logger-types.js +210 -0
- package/dist/utils/logger-types.js.map +1 -0
- package/dist/utils/provider-detection.d.ts +43 -0
- package/dist/utils/provider-detection.d.ts.map +1 -0
- package/dist/utils/provider-detection.js +103 -0
- package/dist/utils/provider-detection.js.map +1 -0
- package/dist/utils/stop-reason.d.ts +58 -0
- package/dist/utils/stop-reason.d.ts.map +1 -0
- package/dist/utils/stop-reason.js +136 -0
- package/dist/utils/stop-reason.js.map +1 -0
- package/dist/utils/summary-printer.d.ts +23 -0
- package/dist/utils/summary-printer.d.ts.map +1 -0
- package/dist/utils/summary-printer.js +234 -0
- package/dist/utils/summary-printer.js.map +1 -0
- package/dist/utils/trace-fields.d.ts +10 -0
- package/dist/utils/trace-fields.d.ts.map +1 -0
- package/dist/utils/trace-fields.js +117 -0
- package/dist/utils/trace-fields.js.map +1 -0
- package/dist/utils/validation.d.ts +121 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +451 -0
- package/dist/utils/validation.js.map +1 -0
- package/examples/README.md +321 -0
- package/examples/litellm-basic.ts +240 -0
- package/examples/litellm-streaming.ts +309 -0
- package/examples/prompt-capture.ts +128 -0
- package/package.json +85 -0
package/dist/client.js
ADDED
|
@@ -0,0 +1,713 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.patchHttpClient = patchHttpClient;
|
|
4
|
+
exports.unpatchHttpClient = unpatchHttpClient;
|
|
5
|
+
exports.isHttpClientPatched = isHttpClientPatched;
|
|
6
|
+
exports.resetHttpClientManager = resetHttpClientManager;
|
|
7
|
+
const config_1 = require("./config");
|
|
8
|
+
const tracking_1 = require("./tracking");
|
|
9
|
+
const crypto_1 = require("crypto");
|
|
10
|
+
const validation_1 = require("./utils/validation");
|
|
11
|
+
const error_handling_1 = require("./utils/error-handling");
|
|
12
|
+
const constants_1 = require("./constants");
|
|
13
|
+
const prompt_extraction_1 = require("./prompt-extraction");
|
|
14
|
+
// Global logger
|
|
15
|
+
const logger = (0, config_1.getLogger)();
|
|
16
|
+
/**
|
|
17
|
+
* HTTP client manager singleton for proper state management
|
|
18
|
+
*/
|
|
19
|
+
class HttpClientManager {
|
|
20
|
+
constructor() {
|
|
21
|
+
this.isPatched = false;
|
|
22
|
+
this.originalFetch = null;
|
|
23
|
+
// Private constructor to enforce singleton pattern
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Get the singleton instance
|
|
27
|
+
*/
|
|
28
|
+
static getInstance() {
|
|
29
|
+
if (!HttpClientManager.instance) {
|
|
30
|
+
HttpClientManager.instance = new HttpClientManager();
|
|
31
|
+
}
|
|
32
|
+
return HttpClientManager.instance;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Reset the singleton instance (for testing)
|
|
36
|
+
*/
|
|
37
|
+
static resetInstance() {
|
|
38
|
+
HttpClientManager.instance = null;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Check if HTTP client is patched
|
|
42
|
+
*/
|
|
43
|
+
isHttpClientPatched() {
|
|
44
|
+
return this.isPatched;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Get the original fetch function
|
|
48
|
+
*/
|
|
49
|
+
getOriginalFetch() {
|
|
50
|
+
return this.originalFetch;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Set the patched state and store original fetch
|
|
54
|
+
*/
|
|
55
|
+
setPatched(patched, originalFetch) {
|
|
56
|
+
this.isPatched = patched;
|
|
57
|
+
if (originalFetch)
|
|
58
|
+
this.originalFetch = originalFetch;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Reset to unpatched state (for testing)
|
|
62
|
+
*/
|
|
63
|
+
reset() {
|
|
64
|
+
this.isPatched = false;
|
|
65
|
+
this.originalFetch = null;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
HttpClientManager.instance = null;
|
|
69
|
+
/**
|
|
70
|
+
* Check if a URL is a LiteLLM Proxy endpoint (chat completions or embeddings)
|
|
71
|
+
*/
|
|
72
|
+
function isLiteLLMProxyRequest(url, config) {
|
|
73
|
+
try {
|
|
74
|
+
const requestUrl = new URL(url);
|
|
75
|
+
const proxyUrl = new URL(config.litellmProxyUrl);
|
|
76
|
+
// Check if the request is going to our configured LiteLLM Proxy
|
|
77
|
+
const isSameHost = requestUrl.hostname === proxyUrl.hostname;
|
|
78
|
+
const isSamePort = requestUrl.port === proxyUrl.port ||
|
|
79
|
+
((requestUrl.port === "80" || requestUrl.port === "443") &&
|
|
80
|
+
proxyUrl.port === "");
|
|
81
|
+
// Handle two cases:
|
|
82
|
+
// 1. Proxy URL is a base URL (e.g., http://localhost:4000) - check if request is to supported endpoint
|
|
83
|
+
// 2. Proxy URL is a full endpoint URL (e.g., http://localhost:4000/chat/completions) - check exact match
|
|
84
|
+
let isCorrectEndpoint = false;
|
|
85
|
+
if (constants_1.supportedEndpoints.some((endpoint) => proxyUrl.pathname.endsWith(endpoint))) {
|
|
86
|
+
// Case 2: Proxy URL includes the endpoint path - check exact path match
|
|
87
|
+
isCorrectEndpoint = requestUrl.pathname === proxyUrl.pathname;
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
// Case 1: Proxy URL is base URL - check if request is to any supported endpoint
|
|
91
|
+
isCorrectEndpoint = constants_1.supportedEndpoints.some((endpoint) => requestUrl.pathname.endsWith(endpoint));
|
|
92
|
+
}
|
|
93
|
+
return isSameHost && isSamePort && isCorrectEndpoint;
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Create patched fetch function that intercepts LiteLLM Proxy requests
|
|
101
|
+
*/
|
|
102
|
+
function createPatchedFetch() {
|
|
103
|
+
return async function patchedFetch(input, init) {
|
|
104
|
+
const config = (0, config_1.getConfig)();
|
|
105
|
+
// Convert input to URL string for checking
|
|
106
|
+
const url = typeof input === "string"
|
|
107
|
+
? input
|
|
108
|
+
: input instanceof URL
|
|
109
|
+
? input.toString()
|
|
110
|
+
: input.url;
|
|
111
|
+
// Only intercept LiteLLM Proxy requests if we have config
|
|
112
|
+
if (!config || !isLiteLLMProxyRequest(url, config)) {
|
|
113
|
+
const clientManager = HttpClientManager.getInstance();
|
|
114
|
+
const originalFetchFn = clientManager.getOriginalFetch();
|
|
115
|
+
if (!originalFetchFn)
|
|
116
|
+
throw new Error("Original fetch function not available");
|
|
117
|
+
return originalFetchFn(input, init);
|
|
118
|
+
}
|
|
119
|
+
// Validate the URL against our proxy configuration
|
|
120
|
+
const urlValidation = (0, validation_1.validateLiteLLMUrl)(url, config.litellmProxyUrl);
|
|
121
|
+
if (!urlValidation.isValid) {
|
|
122
|
+
logger.warn("Invalid LiteLLM proxy URL detected", {
|
|
123
|
+
url,
|
|
124
|
+
errors: urlValidation.errors,
|
|
125
|
+
configuredProxy: config.litellmProxyUrl,
|
|
126
|
+
});
|
|
127
|
+
// Continue with original fetch for invalid URLs
|
|
128
|
+
const clientManager = HttpClientManager.getInstance();
|
|
129
|
+
const originalFetchFn = clientManager.getOriginalFetch();
|
|
130
|
+
if (!originalFetchFn)
|
|
131
|
+
throw new Error("Original fetch function not available");
|
|
132
|
+
return originalFetchFn(input, init);
|
|
133
|
+
}
|
|
134
|
+
// Extract and validate request context
|
|
135
|
+
const rawHeaders = init?.headers
|
|
136
|
+
? Object.fromEntries(new Headers(init.headers))
|
|
137
|
+
: {};
|
|
138
|
+
const validatedHeaders = (0, validation_1.validateHeaders)(rawHeaders);
|
|
139
|
+
const requestContext = {
|
|
140
|
+
url,
|
|
141
|
+
method: init?.method || "GET",
|
|
142
|
+
headers: validatedHeaders,
|
|
143
|
+
body: init?.body || null,
|
|
144
|
+
startTime: Date.now(),
|
|
145
|
+
metadata: (0, tracking_1.extractMetadataFromHeaders)(validatedHeaders),
|
|
146
|
+
};
|
|
147
|
+
const requestId = (0, crypto_1.randomUUID)();
|
|
148
|
+
logger.debug("Intercepted LiteLLM Proxy request", {
|
|
149
|
+
url: requestContext.url,
|
|
150
|
+
method: requestContext.method,
|
|
151
|
+
requestId,
|
|
152
|
+
hasMetadata: !!requestContext.metadata,
|
|
153
|
+
});
|
|
154
|
+
try {
|
|
155
|
+
// Add LiteLLM Proxy authentication if configured
|
|
156
|
+
const headers = new Headers(init?.headers);
|
|
157
|
+
if (config.litellmApiKey)
|
|
158
|
+
headers.set("Authorization", `Bearer ${config.litellmApiKey}`);
|
|
159
|
+
// Make the actual request
|
|
160
|
+
const clientManager = HttpClientManager.getInstance();
|
|
161
|
+
const originalFetchFn = clientManager.getOriginalFetch();
|
|
162
|
+
if (!originalFetchFn)
|
|
163
|
+
throw new Error("Original fetch function not available");
|
|
164
|
+
const response = await originalFetchFn(input, {
|
|
165
|
+
...init,
|
|
166
|
+
headers,
|
|
167
|
+
});
|
|
168
|
+
const endTime = Date.now();
|
|
169
|
+
const duration = endTime - requestContext.startTime;
|
|
170
|
+
// Clone response to read body without consuming it
|
|
171
|
+
const responseClone = response.clone();
|
|
172
|
+
logger.debug("LiteLLM Proxy response received", {
|
|
173
|
+
status: response.status,
|
|
174
|
+
requestId,
|
|
175
|
+
duration,
|
|
176
|
+
});
|
|
177
|
+
// Handle successful chat completion responses
|
|
178
|
+
if (response.ok && requestContext.method === "POST") {
|
|
179
|
+
handleSuccessfulResponse(requestContext, response, responseClone, requestId, duration);
|
|
180
|
+
}
|
|
181
|
+
else if (!response.ok) {
|
|
182
|
+
logger.warn("LiteLLM Proxy request failed", {
|
|
183
|
+
status: response.status,
|
|
184
|
+
statusText: response.statusText,
|
|
185
|
+
requestId,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
return response;
|
|
189
|
+
}
|
|
190
|
+
catch (error) {
|
|
191
|
+
const endTime = Date.now();
|
|
192
|
+
const duration = endTime - requestContext.startTime;
|
|
193
|
+
logger.error("LiteLLM Proxy request error", {
|
|
194
|
+
error: error instanceof Error ? error.message : String(error),
|
|
195
|
+
requestId,
|
|
196
|
+
duration,
|
|
197
|
+
});
|
|
198
|
+
throw error;
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Parse request body for either chat completions or embeddings
|
|
204
|
+
*/
|
|
205
|
+
async function parseRequestBody(requestContext, requestId, endpointType) {
|
|
206
|
+
if (!requestContext.body)
|
|
207
|
+
return null;
|
|
208
|
+
try {
|
|
209
|
+
const bodyText = typeof requestContext.body === "string"
|
|
210
|
+
? requestContext.body
|
|
211
|
+
: await new Response(requestContext.body).text();
|
|
212
|
+
return JSON.parse(bodyText);
|
|
213
|
+
}
|
|
214
|
+
catch (error) {
|
|
215
|
+
logger.warn(`Failed to parse ${endpointType} request body`, {
|
|
216
|
+
requestId,
|
|
217
|
+
error: error instanceof Error ? error.message : String(error),
|
|
218
|
+
});
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Handle successful chat completion response and track usage
|
|
224
|
+
*/
|
|
225
|
+
async function handleSuccessfulResponse(requestContext, originalResponse, responseClone, requestId, duration) {
|
|
226
|
+
try {
|
|
227
|
+
// Determine endpoint type from URL
|
|
228
|
+
const url = new URL(requestContext.url);
|
|
229
|
+
const isEmbeddingsEndpoint = url.pathname.endsWith("/embeddings") ||
|
|
230
|
+
url.pathname.endsWith("/v1/embeddings");
|
|
231
|
+
if (isEmbeddingsEndpoint) {
|
|
232
|
+
// Handle embeddings request
|
|
233
|
+
const requestData = await parseRequestBody(requestContext, requestId, "embeddings");
|
|
234
|
+
const model = requestData?.model || "unknown";
|
|
235
|
+
await handleEmbeddingResponse(responseClone, requestContext, requestId, duration, model);
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
// Handle chat completions request
|
|
239
|
+
const requestData = await parseRequestBody(requestContext, requestId, "chat");
|
|
240
|
+
const isStreaming = requestData?.stream === true;
|
|
241
|
+
const model = requestData?.model || "unknown";
|
|
242
|
+
const responseFormat = requestData?.response_format;
|
|
243
|
+
if (isStreaming) {
|
|
244
|
+
await handleStreamingResponse(responseClone, requestContext, requestId, duration, model, responseFormat);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
handleNonStreamingResponse(responseClone, requestContext, requestId, duration, model, responseFormat);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
catch (error) {
|
|
252
|
+
logger.error("Error handling LiteLLM response", {
|
|
253
|
+
error: error instanceof Error ? error.message : String(error),
|
|
254
|
+
requestId,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Handle non-streaming chat completion response
|
|
260
|
+
*/
|
|
261
|
+
async function handleNonStreamingResponse(response, requestContext, requestId, duration, model, responseFormat) {
|
|
262
|
+
try {
|
|
263
|
+
const responseData = await response.json();
|
|
264
|
+
const usage = (0, tracking_1.extractUsageFromResponse)(responseData);
|
|
265
|
+
logger.debug("Extracted usage from non-streaming response", {
|
|
266
|
+
requestId,
|
|
267
|
+
model,
|
|
268
|
+
...usage,
|
|
269
|
+
});
|
|
270
|
+
const requestBody = extractRequestBody(requestContext);
|
|
271
|
+
(0, tracking_1.trackUsageAsync)({
|
|
272
|
+
requestId,
|
|
273
|
+
model,
|
|
274
|
+
promptTokens: usage.promptTokens,
|
|
275
|
+
completionTokens: usage.completionTokens,
|
|
276
|
+
totalTokens: usage.totalTokens,
|
|
277
|
+
duration,
|
|
278
|
+
finishReason: usage.finishReason,
|
|
279
|
+
usageMetadata: requestContext.metadata,
|
|
280
|
+
isStreamed: false,
|
|
281
|
+
responseFormat,
|
|
282
|
+
request: requestBody,
|
|
283
|
+
response: responseData,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
catch (error) {
|
|
287
|
+
logger.error("Error processing non-streaming response", {
|
|
288
|
+
error: error instanceof Error ? error.message : String(error),
|
|
289
|
+
requestId,
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Extract request body from RequestContext
|
|
295
|
+
*/
|
|
296
|
+
function extractRequestBody(requestContext) {
|
|
297
|
+
try {
|
|
298
|
+
if (typeof requestContext.body === "string") {
|
|
299
|
+
return JSON.parse(requestContext.body);
|
|
300
|
+
}
|
|
301
|
+
else if (typeof requestContext.body === "object" &&
|
|
302
|
+
requestContext.body !== null &&
|
|
303
|
+
"model" in requestContext.body &&
|
|
304
|
+
"messages" in requestContext.body) {
|
|
305
|
+
return requestContext.body;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
catch (e) {
|
|
309
|
+
logger.debug("Failed to parse request body for prompt capture", {
|
|
310
|
+
error: e instanceof Error ? e.message : String(e),
|
|
311
|
+
bodyType: typeof requestContext.body,
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
return undefined;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Handle streaming chat completion response
|
|
318
|
+
*/
|
|
319
|
+
async function handleStreamingResponse(response, requestContext, requestId, duration, model, responseFormat) {
|
|
320
|
+
logger.debug("Processing streaming response", { requestId, model });
|
|
321
|
+
const requestBody = extractRequestBody(requestContext);
|
|
322
|
+
if (!response.body) {
|
|
323
|
+
logger.warn("Streaming response has no body", { requestId });
|
|
324
|
+
(0, tracking_1.trackUsageAsync)({
|
|
325
|
+
requestId,
|
|
326
|
+
model,
|
|
327
|
+
promptTokens: 0,
|
|
328
|
+
completionTokens: 0,
|
|
329
|
+
totalTokens: 0,
|
|
330
|
+
duration,
|
|
331
|
+
finishReason: "stop",
|
|
332
|
+
usageMetadata: requestContext.metadata,
|
|
333
|
+
isStreamed: true,
|
|
334
|
+
timeToFirstToken: duration,
|
|
335
|
+
responseFormat,
|
|
336
|
+
request: requestBody,
|
|
337
|
+
});
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
340
|
+
try {
|
|
341
|
+
const streamParser = new StreamingResponseParser(requestId, model, requestContext, duration, responseFormat);
|
|
342
|
+
await streamParser.parseStream(response.body);
|
|
343
|
+
}
|
|
344
|
+
catch (error) {
|
|
345
|
+
logger.error("Error parsing streaming response", {
|
|
346
|
+
error: error instanceof Error ? error.message : String(error),
|
|
347
|
+
requestId,
|
|
348
|
+
});
|
|
349
|
+
(0, tracking_1.trackUsageAsync)({
|
|
350
|
+
requestId,
|
|
351
|
+
model,
|
|
352
|
+
promptTokens: 0,
|
|
353
|
+
completionTokens: 0,
|
|
354
|
+
totalTokens: 0,
|
|
355
|
+
duration,
|
|
356
|
+
finishReason: "error",
|
|
357
|
+
usageMetadata: requestContext.metadata,
|
|
358
|
+
isStreamed: true,
|
|
359
|
+
timeToFirstToken: duration,
|
|
360
|
+
responseFormat,
|
|
361
|
+
request: requestBody,
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Handle embeddings response and track usage
|
|
367
|
+
*/
|
|
368
|
+
async function handleEmbeddingResponse(response, requestContext, requestId, duration, model) {
|
|
369
|
+
try {
|
|
370
|
+
const responseData = await response.json();
|
|
371
|
+
const usage = responseData.usage;
|
|
372
|
+
logger.debug("Extracted usage from embeddings response", {
|
|
373
|
+
requestId,
|
|
374
|
+
model,
|
|
375
|
+
promptTokens: usage.prompt_tokens,
|
|
376
|
+
totalTokens: usage.total_tokens,
|
|
377
|
+
});
|
|
378
|
+
// Track embeddings usage asynchronously
|
|
379
|
+
(0, tracking_1.trackEmbeddingsUsageAsync)({
|
|
380
|
+
requestId,
|
|
381
|
+
model,
|
|
382
|
+
promptTokens: usage.prompt_tokens,
|
|
383
|
+
totalTokens: usage.total_tokens,
|
|
384
|
+
duration,
|
|
385
|
+
usageMetadata: requestContext.metadata,
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
catch (error) {
|
|
389
|
+
logger.error("Error processing embeddings response", {
|
|
390
|
+
error: error instanceof Error ? error.message : String(error),
|
|
391
|
+
requestId,
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Streaming response parser for LiteLLM SSE streams
|
|
397
|
+
*/
|
|
398
|
+
class StreamingResponseParser {
|
|
399
|
+
constructor(requestId, model, requestContext, requestDuration, responseFormat) {
|
|
400
|
+
this.firstTokenTime = null;
|
|
401
|
+
this.promptTokens = 0;
|
|
402
|
+
this.completionTokens = 0;
|
|
403
|
+
this.totalTokens = 0;
|
|
404
|
+
this.finishReason = null;
|
|
405
|
+
this.logger = (0, config_1.getLogger)();
|
|
406
|
+
this.shouldCapturePrompts = false;
|
|
407
|
+
this.accumulatedContent = "";
|
|
408
|
+
this.accumulatedToolCalls = new Map();
|
|
409
|
+
this.requestId = requestId;
|
|
410
|
+
this.model = model;
|
|
411
|
+
this.requestContext = requestContext;
|
|
412
|
+
this.requestDuration = requestDuration;
|
|
413
|
+
this.startTime = Date.now();
|
|
414
|
+
this.responseFormat = responseFormat;
|
|
415
|
+
this.requestBody = extractRequestBody(requestContext);
|
|
416
|
+
this.shouldCapturePrompts = (0, prompt_extraction_1.shouldCapturePrompts)(requestContext.metadata);
|
|
417
|
+
this.maxPromptSize = (0, prompt_extraction_1.getMaxPromptSize)(requestContext.metadata);
|
|
418
|
+
}
|
|
419
|
+
async parseStream(body) {
|
|
420
|
+
const reader = body.getReader();
|
|
421
|
+
const decoder = new TextDecoder();
|
|
422
|
+
let buffer = "";
|
|
423
|
+
try {
|
|
424
|
+
while (true) {
|
|
425
|
+
const { done, value } = await reader.read();
|
|
426
|
+
if (done)
|
|
427
|
+
break;
|
|
428
|
+
// Decode chunk and add to buffer
|
|
429
|
+
buffer += decoder.decode(value, { stream: true });
|
|
430
|
+
// Process complete SSE messages
|
|
431
|
+
const lines = buffer.split("\n");
|
|
432
|
+
buffer = lines.pop() || ""; // Keep incomplete line in buffer
|
|
433
|
+
for (const line of lines) {
|
|
434
|
+
this.processSSELine(line);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
// Process any remaining buffer content
|
|
438
|
+
if (buffer.trim()) {
|
|
439
|
+
this.processSSELine(buffer);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
finally {
|
|
443
|
+
reader.releaseLock();
|
|
444
|
+
this.finalizeTracking();
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
processSSELine(line) {
|
|
448
|
+
const trimmed = line.trim();
|
|
449
|
+
// Skip empty lines and comments
|
|
450
|
+
if (!trimmed || trimmed.startsWith(":"))
|
|
451
|
+
return;
|
|
452
|
+
// Parse SSE data lines
|
|
453
|
+
if (trimmed.startsWith("data: ")) {
|
|
454
|
+
const data = trimmed.slice(6); // Remove 'data: ' prefix
|
|
455
|
+
// Check for stream end marker
|
|
456
|
+
if (data === "[DONE]") {
|
|
457
|
+
this.logger.debug("Stream completed", { requestId: this.requestId });
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
try {
|
|
461
|
+
const chunk = JSON.parse(data);
|
|
462
|
+
this.processStreamChunk(chunk);
|
|
463
|
+
}
|
|
464
|
+
catch (error) {
|
|
465
|
+
this.logger.debug("Failed to parse stream chunk", {
|
|
466
|
+
requestId: this.requestId,
|
|
467
|
+
data: data.substring(0, 100),
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
processStreamChunk(chunk) {
|
|
473
|
+
// Validate and sanitize chunk data
|
|
474
|
+
const validatedChunk = (0, validation_1.validateStreamChunk)(chunk);
|
|
475
|
+
if (!validatedChunk) {
|
|
476
|
+
this.logger.debug("Invalid stream chunk received", {
|
|
477
|
+
requestId: this.requestId,
|
|
478
|
+
chunkType: typeof chunk,
|
|
479
|
+
});
|
|
480
|
+
return;
|
|
481
|
+
}
|
|
482
|
+
const chunkData = validatedChunk;
|
|
483
|
+
if (!this.responseId && chunkData.id) {
|
|
484
|
+
this.responseId = chunkData.id;
|
|
485
|
+
}
|
|
486
|
+
if (!this.responseCreated && chunkData.created) {
|
|
487
|
+
this.responseCreated = chunkData.created;
|
|
488
|
+
}
|
|
489
|
+
// Record first token time
|
|
490
|
+
if (this.firstTokenTime === null &&
|
|
491
|
+
validatedChunk.choices?.[0]?.delta?.content) {
|
|
492
|
+
this.firstTokenTime = Date.now();
|
|
493
|
+
this.logger.debug("First token received", {
|
|
494
|
+
requestId: this.requestId,
|
|
495
|
+
timeToFirstToken: this.firstTokenTime - this.startTime,
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
if (this.shouldCapturePrompts &&
|
|
499
|
+
validatedChunk.choices?.[0]?.delta?.content) {
|
|
500
|
+
const remaining = this.maxPromptSize - this.accumulatedContent.length;
|
|
501
|
+
if (remaining > 0) {
|
|
502
|
+
this.accumulatedContent +=
|
|
503
|
+
validatedChunk.choices[0].delta.content.slice(0, remaining);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
const delta = validatedChunk.choices?.[0]?.delta;
|
|
507
|
+
if (this.shouldCapturePrompts &&
|
|
508
|
+
delta?.tool_calls &&
|
|
509
|
+
Array.isArray(delta.tool_calls)) {
|
|
510
|
+
delta.tool_calls.forEach((toolCallDelta) => {
|
|
511
|
+
const index = toolCallDelta.index;
|
|
512
|
+
if (index === undefined) {
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
// Get or create the accumulated tool call for this index
|
|
516
|
+
let accumulated = this.accumulatedToolCalls.get(index);
|
|
517
|
+
if (!accumulated) {
|
|
518
|
+
accumulated = {
|
|
519
|
+
index,
|
|
520
|
+
id: toolCallDelta.id,
|
|
521
|
+
type: toolCallDelta.type || "function",
|
|
522
|
+
function: {
|
|
523
|
+
name: "",
|
|
524
|
+
arguments: "",
|
|
525
|
+
},
|
|
526
|
+
};
|
|
527
|
+
this.accumulatedToolCalls.set(index, accumulated);
|
|
528
|
+
}
|
|
529
|
+
// Accumulate the tool call data
|
|
530
|
+
if (toolCallDelta.id) {
|
|
531
|
+
accumulated.id = toolCallDelta.id;
|
|
532
|
+
}
|
|
533
|
+
if (toolCallDelta.type) {
|
|
534
|
+
accumulated.type = toolCallDelta.type;
|
|
535
|
+
}
|
|
536
|
+
if (toolCallDelta.function?.name) {
|
|
537
|
+
accumulated.function.name = toolCallDelta.function.name;
|
|
538
|
+
}
|
|
539
|
+
if (toolCallDelta.function?.arguments) {
|
|
540
|
+
const currentSize = accumulated.function.arguments.length;
|
|
541
|
+
const remaining = this.maxPromptSize - currentSize;
|
|
542
|
+
if (remaining > 0) {
|
|
543
|
+
accumulated.function.arguments +=
|
|
544
|
+
toolCallDelta.function.arguments.slice(0, remaining);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
// Extract usage information (typically in the last chunk)
|
|
550
|
+
if (validatedChunk.usage) {
|
|
551
|
+
this.promptTokens = validatedChunk.usage.prompt_tokens || 0;
|
|
552
|
+
this.completionTokens = validatedChunk.usage.completion_tokens || 0;
|
|
553
|
+
this.totalTokens = validatedChunk.usage.total_tokens || 0;
|
|
554
|
+
this.logger.debug("Usage data extracted from stream", {
|
|
555
|
+
requestId: this.requestId,
|
|
556
|
+
promptTokens: this.promptTokens,
|
|
557
|
+
completionTokens: this.completionTokens,
|
|
558
|
+
totalTokens: this.totalTokens,
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
// Extract finish reason
|
|
562
|
+
if (validatedChunk.choices?.[0]?.finish_reason)
|
|
563
|
+
this.finishReason = validatedChunk.choices[0].finish_reason;
|
|
564
|
+
// Some providers send usage in different chunk structures
|
|
565
|
+
if (!this.totalTokens && validatedChunk.x_groq?.usage) {
|
|
566
|
+
// Groq-specific usage format
|
|
567
|
+
this.promptTokens = validatedChunk.x_groq.usage.prompt_tokens || 0;
|
|
568
|
+
this.completionTokens =
|
|
569
|
+
validatedChunk.x_groq.usage.completion_tokens || 0;
|
|
570
|
+
this.totalTokens = validatedChunk.x_groq.usage.total_tokens || 0;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
finalizeTracking() {
|
|
574
|
+
const timeToFirstToken = this.firstTokenTime
|
|
575
|
+
? this.firstTokenTime - this.startTime
|
|
576
|
+
: this.requestDuration;
|
|
577
|
+
this.logger.debug("Finalizing streaming response tracking", {
|
|
578
|
+
requestId: this.requestId,
|
|
579
|
+
model: this.model,
|
|
580
|
+
promptTokens: this.promptTokens,
|
|
581
|
+
completionTokens: this.completionTokens,
|
|
582
|
+
totalTokens: this.totalTokens,
|
|
583
|
+
finishReason: this.finishReason,
|
|
584
|
+
timeToFirstToken,
|
|
585
|
+
});
|
|
586
|
+
let reconstructedResponse;
|
|
587
|
+
if (this.shouldCapturePrompts &&
|
|
588
|
+
(this.accumulatedContent || this.accumulatedToolCalls.size > 0)) {
|
|
589
|
+
const message = {
|
|
590
|
+
role: "assistant",
|
|
591
|
+
content: this.accumulatedContent,
|
|
592
|
+
};
|
|
593
|
+
if (this.accumulatedToolCalls.size > 0) {
|
|
594
|
+
// Convert Map to array, sort by index, and remove index property (not part of OpenAI spec)
|
|
595
|
+
message.tool_calls = Array.from(this.accumulatedToolCalls.values())
|
|
596
|
+
.sort((a, b) => a.index - b.index)
|
|
597
|
+
.map((tc) => {
|
|
598
|
+
const { index, ...rest } = tc;
|
|
599
|
+
return rest;
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
reconstructedResponse = {
|
|
603
|
+
id: this.responseId || "unknown",
|
|
604
|
+
object: "chat.completion",
|
|
605
|
+
created: this.responseCreated || Math.floor(Date.now() / 1000),
|
|
606
|
+
model: this.model,
|
|
607
|
+
choices: [
|
|
608
|
+
{
|
|
609
|
+
index: 0,
|
|
610
|
+
message,
|
|
611
|
+
finish_reason: this.finishReason || "stop",
|
|
612
|
+
},
|
|
613
|
+
],
|
|
614
|
+
usage: {
|
|
615
|
+
prompt_tokens: this.promptTokens,
|
|
616
|
+
completion_tokens: this.completionTokens,
|
|
617
|
+
total_tokens: this.totalTokens,
|
|
618
|
+
},
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
(0, tracking_1.trackUsageAsync)({
|
|
622
|
+
requestId: this.requestId,
|
|
623
|
+
model: this.model,
|
|
624
|
+
promptTokens: this.promptTokens,
|
|
625
|
+
completionTokens: this.completionTokens,
|
|
626
|
+
totalTokens: this.totalTokens,
|
|
627
|
+
duration: this.requestDuration,
|
|
628
|
+
finishReason: this.finishReason || "stop",
|
|
629
|
+
usageMetadata: this.requestContext.metadata,
|
|
630
|
+
isStreamed: true,
|
|
631
|
+
timeToFirstToken,
|
|
632
|
+
responseFormat: this.responseFormat,
|
|
633
|
+
request: this.requestBody,
|
|
634
|
+
response: reconstructedResponse,
|
|
635
|
+
});
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
/**
|
|
639
|
+
* Patch the global fetch function to intercept LiteLLM Proxy requests
|
|
640
|
+
*/
|
|
641
|
+
function patchHttpClient() {
|
|
642
|
+
const clientManager = HttpClientManager.getInstance();
|
|
643
|
+
if (clientManager.isHttpClientPatched()) {
|
|
644
|
+
logger.debug("HTTP client already patched");
|
|
645
|
+
return true;
|
|
646
|
+
}
|
|
647
|
+
if (typeof globalThis.fetch !== "function") {
|
|
648
|
+
const errorContext = (0, error_handling_1.createErrorContext)()
|
|
649
|
+
.with("fetchType", typeof globalThis.fetch)
|
|
650
|
+
.build();
|
|
651
|
+
logger.error("Global fetch function not available", errorContext);
|
|
652
|
+
return false;
|
|
653
|
+
}
|
|
654
|
+
try {
|
|
655
|
+
// Store original fetch
|
|
656
|
+
const originalFetch = globalThis.fetch;
|
|
657
|
+
clientManager.setPatched(false, originalFetch);
|
|
658
|
+
// Replace with patched version
|
|
659
|
+
globalThis.fetch = createPatchedFetch();
|
|
660
|
+
clientManager.setPatched(true);
|
|
661
|
+
logger.info("LiteLLM HTTP client middleware enabled");
|
|
662
|
+
return true;
|
|
663
|
+
}
|
|
664
|
+
catch (error) {
|
|
665
|
+
const errorContext = (0, error_handling_1.createErrorContext)()
|
|
666
|
+
.with("error", error instanceof Error ? error.message : String(error))
|
|
667
|
+
.with("stack", error instanceof Error ? error.stack : undefined)
|
|
668
|
+
.build();
|
|
669
|
+
logger.error("Failed to patch HTTP client", errorContext);
|
|
670
|
+
// Throw a proper error for better debugging
|
|
671
|
+
throw new error_handling_1.PatchingError("Failed to patch HTTP client for LiteLLM interception", errorContext);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
/**
|
|
675
|
+
* Restore the original fetch function
|
|
676
|
+
*/
|
|
677
|
+
function unpatchHttpClient() {
|
|
678
|
+
const clientManager = HttpClientManager.getInstance();
|
|
679
|
+
if (!clientManager.isHttpClientPatched()) {
|
|
680
|
+
logger.debug("HTTP client not patched");
|
|
681
|
+
return true;
|
|
682
|
+
}
|
|
683
|
+
const originalFetch = clientManager.getOriginalFetch();
|
|
684
|
+
if (!originalFetch) {
|
|
685
|
+
logger.error("Original fetch function not stored");
|
|
686
|
+
return false;
|
|
687
|
+
}
|
|
688
|
+
try {
|
|
689
|
+
globalThis.fetch = originalFetch;
|
|
690
|
+
clientManager.setPatched(false);
|
|
691
|
+
logger.info("LiteLLM HTTP client middleware disabled");
|
|
692
|
+
return true;
|
|
693
|
+
}
|
|
694
|
+
catch (error) {
|
|
695
|
+
logger.error("Failed to unpatch HTTP client", {
|
|
696
|
+
error: error instanceof Error ? error.message : String(error),
|
|
697
|
+
});
|
|
698
|
+
return false;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* Check if HTTP client is patched
|
|
703
|
+
*/
|
|
704
|
+
function isHttpClientPatched() {
|
|
705
|
+
return HttpClientManager.getInstance().isHttpClientPatched();
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Reset HTTP client manager (for testing)
|
|
709
|
+
*/
|
|
710
|
+
function resetHttpClientManager() {
|
|
711
|
+
HttpClientManager.resetInstance();
|
|
712
|
+
}
|
|
713
|
+
//# sourceMappingURL=client.js.map
|