@openhoo/hoopilot 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +86 -25
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +86 -25
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +12 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.js +86 -25
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -15,6 +15,13 @@ declare class MetricsRegistry {
|
|
|
15
15
|
startRequest(): void;
|
|
16
16
|
/** Record a completed request and clear its in-flight slot. */
|
|
17
17
|
observe(observation: RequestObservation): void;
|
|
18
|
+
/**
|
|
19
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
20
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
21
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
22
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
23
|
+
*/
|
|
24
|
+
recordTokenExtraction(extracted: boolean): void;
|
|
18
25
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
19
26
|
recordTokens(model: string, usage: TokenUsage): void;
|
|
20
27
|
/** Record one upstream Copilot call and whether it succeeded. */
|
|
@@ -43,7 +50,7 @@ declare class MetricsRegistry {
|
|
|
43
50
|
* branch; combined with the runtime cancelling the client branch, that releases
|
|
44
51
|
* the shared upstream connection instead of draining it in the background.
|
|
45
52
|
*/
|
|
46
|
-
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal): Response;
|
|
53
|
+
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal, onOutcome?: (extracted: boolean) => void): Response;
|
|
47
54
|
|
|
48
55
|
type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
49
56
|
interface Logger {
|
|
@@ -200,6 +207,10 @@ interface MetricsSnapshot {
|
|
|
200
207
|
byModel: Record<string, ModelTokenTotals>;
|
|
201
208
|
cached: number;
|
|
202
209
|
completion: number;
|
|
210
|
+
extraction: {
|
|
211
|
+
extracted: number;
|
|
212
|
+
missing: number;
|
|
213
|
+
};
|
|
203
214
|
prompt: number;
|
|
204
215
|
reasoning: number;
|
|
205
216
|
total: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -15,6 +15,13 @@ declare class MetricsRegistry {
|
|
|
15
15
|
startRequest(): void;
|
|
16
16
|
/** Record a completed request and clear its in-flight slot. */
|
|
17
17
|
observe(observation: RequestObservation): void;
|
|
18
|
+
/**
|
|
19
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
20
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
21
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
22
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
23
|
+
*/
|
|
24
|
+
recordTokenExtraction(extracted: boolean): void;
|
|
18
25
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
19
26
|
recordTokens(model: string, usage: TokenUsage): void;
|
|
20
27
|
/** Record one upstream Copilot call and whether it succeeded. */
|
|
@@ -43,7 +50,7 @@ declare class MetricsRegistry {
|
|
|
43
50
|
* branch; combined with the runtime cancelling the client branch, that releases
|
|
44
51
|
* the shared upstream connection instead of draining it in the background.
|
|
45
52
|
*/
|
|
46
|
-
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal): Response;
|
|
53
|
+
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal, onOutcome?: (extracted: boolean) => void): Response;
|
|
47
54
|
|
|
48
55
|
type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
49
56
|
interface Logger {
|
|
@@ -200,6 +207,10 @@ interface MetricsSnapshot {
|
|
|
200
207
|
byModel: Record<string, ModelTokenTotals>;
|
|
201
208
|
cached: number;
|
|
202
209
|
completion: number;
|
|
210
|
+
extraction: {
|
|
211
|
+
extracted: number;
|
|
212
|
+
missing: number;
|
|
213
|
+
};
|
|
203
214
|
prompt: number;
|
|
204
215
|
reasoning: number;
|
|
205
216
|
total: number;
|
package/dist/index.js
CHANGED
|
@@ -2220,6 +2220,7 @@ var MetricsRegistry = class {
|
|
|
2220
2220
|
#upstream = /* @__PURE__ */ new Map();
|
|
2221
2221
|
#copilotQuota;
|
|
2222
2222
|
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
2223
|
+
#extraction = { extracted: 0, missing: 0 };
|
|
2223
2224
|
constructor(options = {}) {
|
|
2224
2225
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
2225
2226
|
}
|
|
@@ -2236,6 +2237,19 @@ var MetricsRegistry = class {
|
|
|
2236
2237
|
this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
|
|
2237
2238
|
this.#observeDuration(observation.route, observation.durationMs / 1e3);
|
|
2238
2239
|
}
|
|
2240
|
+
/**
|
|
2241
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
2242
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
2243
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
2244
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
2245
|
+
*/
|
|
2246
|
+
recordTokenExtraction(extracted) {
|
|
2247
|
+
if (extracted) {
|
|
2248
|
+
this.#extraction.extracted += 1;
|
|
2249
|
+
} else {
|
|
2250
|
+
this.#extraction.missing += 1;
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2239
2253
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
2240
2254
|
recordTokens(model, usage) {
|
|
2241
2255
|
const name = this.#modelLabel(model);
|
|
@@ -2343,7 +2357,7 @@ var MetricsRegistry = class {
|
|
|
2343
2357
|
inFlight: this.#inFlight,
|
|
2344
2358
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
2345
2359
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
2346
|
-
tokens: { byModel, ...tokenTotals },
|
|
2360
|
+
tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
|
|
2347
2361
|
upstream: { errors: upstreamErrors, total: upstreamTotal },
|
|
2348
2362
|
uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
|
|
2349
2363
|
};
|
|
@@ -2393,6 +2407,16 @@ var MetricsRegistry = class {
|
|
|
2393
2407
|
for (const [model, totals] of this.#tokens) {
|
|
2394
2408
|
lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
|
|
2395
2409
|
}
|
|
2410
|
+
lines.push(
|
|
2411
|
+
"# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
|
|
2412
|
+
);
|
|
2413
|
+
lines.push("# TYPE hoopilot_token_extraction_total counter");
|
|
2414
|
+
lines.push(
|
|
2415
|
+
`hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
|
|
2416
|
+
);
|
|
2417
|
+
lines.push(
|
|
2418
|
+
`hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
|
|
2419
|
+
);
|
|
2396
2420
|
lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
|
|
2397
2421
|
lines.push("# TYPE hoopilot_request_duration_seconds histogram");
|
|
2398
2422
|
for (const [route, entry] of this.#durations) {
|
|
@@ -2548,23 +2572,25 @@ var MetricsRegistry = class {
|
|
|
2548
2572
|
}
|
|
2549
2573
|
}
|
|
2550
2574
|
};
|
|
2551
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal) {
|
|
2575
|
+
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
2552
2576
|
const body = response.body;
|
|
2553
2577
|
if (!body) {
|
|
2554
2578
|
return response;
|
|
2555
2579
|
}
|
|
2556
2580
|
const [clientBranch, observerBranch] = body.tee();
|
|
2557
2581
|
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
2558
|
-
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(
|
|
2559
|
-
|
|
2582
|
+
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
|
|
2583
|
+
() => {
|
|
2584
|
+
}
|
|
2585
|
+
);
|
|
2560
2586
|
return new Response(clientBranch, {
|
|
2561
2587
|
headers: response.headers,
|
|
2562
2588
|
status: response.status,
|
|
2563
2589
|
statusText: response.statusText
|
|
2564
2590
|
});
|
|
2565
2591
|
}
|
|
2566
|
-
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
2567
|
-
const accumulator = createUsageAccumulator(fallbackModel, onUsage);
|
|
2592
|
+
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
2593
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
2568
2594
|
if (isSse) {
|
|
2569
2595
|
for (const line of text.split(/\r?\n/)) {
|
|
2570
2596
|
considerSseLine(line, accumulator.consider);
|
|
@@ -2577,7 +2603,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
|
2577
2603
|
}
|
|
2578
2604
|
accumulator.finish();
|
|
2579
2605
|
}
|
|
2580
|
-
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
2606
|
+
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
2581
2607
|
const reader = stream.getReader();
|
|
2582
2608
|
const onAbort = () => {
|
|
2583
2609
|
reader.cancel().catch(() => {
|
|
@@ -2590,7 +2616,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2590
2616
|
signal?.addEventListener("abort", onAbort, { once: true });
|
|
2591
2617
|
}
|
|
2592
2618
|
const decoder = new TextDecoder();
|
|
2593
|
-
const
|
|
2619
|
+
const guardedOutcome = onOutcome ? (extracted) => {
|
|
2620
|
+
if (!signal?.aborted) {
|
|
2621
|
+
onOutcome(extracted);
|
|
2622
|
+
}
|
|
2623
|
+
} : void 0;
|
|
2624
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
|
|
2594
2625
|
let buffer = "";
|
|
2595
2626
|
let bufferedBytes = 0;
|
|
2596
2627
|
let overflowed = false;
|
|
@@ -2638,7 +2669,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2638
2669
|
}
|
|
2639
2670
|
accumulator.finish();
|
|
2640
2671
|
}
|
|
2641
|
-
function createUsageAccumulator(fallbackModel, onUsage) {
|
|
2672
|
+
function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
|
|
2642
2673
|
let model = fallbackModel;
|
|
2643
2674
|
let usage;
|
|
2644
2675
|
return {
|
|
@@ -2657,6 +2688,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
|
|
|
2657
2688
|
if (usage) {
|
|
2658
2689
|
onUsage(model, usage);
|
|
2659
2690
|
}
|
|
2691
|
+
onOutcome?.(usage !== void 0);
|
|
2660
2692
|
}
|
|
2661
2693
|
};
|
|
2662
2694
|
}
|
|
@@ -2765,6 +2797,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2765
2797
|
const metrics = options.metrics ?? new MetricsRegistry();
|
|
2766
2798
|
const readUsage = createUsageReader(client, metrics);
|
|
2767
2799
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
2800
|
+
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
2768
2801
|
const streamingProxyMode = resolveStreamingProxyMode(options);
|
|
2769
2802
|
const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
|
|
2770
2803
|
return async (request) => {
|
|
@@ -2830,6 +2863,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2830
2863
|
client,
|
|
2831
2864
|
metrics,
|
|
2832
2865
|
recordTokens,
|
|
2866
|
+
recordExtraction,
|
|
2833
2867
|
request,
|
|
2834
2868
|
requestLogger,
|
|
2835
2869
|
bufferProxyBodies
|
|
@@ -2845,6 +2879,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2845
2879
|
client,
|
|
2846
2880
|
metrics,
|
|
2847
2881
|
recordTokens,
|
|
2882
|
+
recordExtraction,
|
|
2848
2883
|
request,
|
|
2849
2884
|
requestLogger,
|
|
2850
2885
|
bufferProxyBodies
|
|
@@ -2857,6 +2892,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2857
2892
|
client,
|
|
2858
2893
|
metrics,
|
|
2859
2894
|
recordTokens,
|
|
2895
|
+
recordExtraction,
|
|
2860
2896
|
request,
|
|
2861
2897
|
requestLogger,
|
|
2862
2898
|
bufferProxyBodies
|
|
@@ -2865,7 +2901,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
2865
2901
|
}
|
|
2866
2902
|
if (request.method === "POST" && apiPath === "/v1/responses/compact") {
|
|
2867
2903
|
return finish(
|
|
2868
|
-
await handleResponsesCompact(
|
|
2904
|
+
await handleResponsesCompact(
|
|
2905
|
+
client,
|
|
2906
|
+
metrics,
|
|
2907
|
+
recordTokens,
|
|
2908
|
+
recordExtraction,
|
|
2909
|
+
request,
|
|
2910
|
+
requestLogger
|
|
2911
|
+
)
|
|
2869
2912
|
);
|
|
2870
2913
|
}
|
|
2871
2914
|
if (request.method === "POST" && apiPath === "/v1/responses") {
|
|
@@ -2874,6 +2917,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2874
2917
|
client,
|
|
2875
2918
|
metrics,
|
|
2876
2919
|
recordTokens,
|
|
2920
|
+
recordExtraction,
|
|
2877
2921
|
request,
|
|
2878
2922
|
requestLogger,
|
|
2879
2923
|
bufferProxyBodies
|
|
@@ -2950,7 +2994,7 @@ function startHoopilotServer(options = {}) {
|
|
|
2950
2994
|
url: `http://${urlHost(host)}:${server.port}`
|
|
2951
2995
|
};
|
|
2952
2996
|
}
|
|
2953
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2997
|
+
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2954
2998
|
const anthropicRequest = await readJson(request);
|
|
2955
2999
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
2956
3000
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -2963,12 +3007,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2963
3007
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2964
3008
|
if (bufferProxyBodies) {
|
|
2965
3009
|
const text = await upstream.text();
|
|
2966
|
-
recordResponseTextUsage(text, true, model, recordTokens);
|
|
3010
|
+
recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
|
|
2967
3011
|
return proxyResponse(
|
|
2968
3012
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
2969
3013
|
);
|
|
2970
3014
|
}
|
|
2971
|
-
const observed = observeResponseUsage(
|
|
3015
|
+
const observed = observeResponseUsage(
|
|
3016
|
+
upstream,
|
|
3017
|
+
model,
|
|
3018
|
+
recordTokens,
|
|
3019
|
+
request.signal,
|
|
3020
|
+
recordExtraction
|
|
3021
|
+
);
|
|
2972
3022
|
if (!observed.body) {
|
|
2973
3023
|
return proxyResponse(observed);
|
|
2974
3024
|
}
|
|
@@ -2986,6 +3036,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2986
3036
|
const responseModel = typeof body.model === "string" ? body.model.trim() : "";
|
|
2987
3037
|
recordTokens(responseModel || model, usage);
|
|
2988
3038
|
}
|
|
3039
|
+
recordExtraction(usage !== void 0);
|
|
2989
3040
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
2990
3041
|
}
|
|
2991
3042
|
function handleAnthropicCountTokens(body) {
|
|
@@ -3011,7 +3062,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
3011
3062
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
3012
3063
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
3013
3064
|
}
|
|
3014
|
-
async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3065
|
+
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3015
3066
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
3016
3067
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
3017
3068
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -3026,11 +3077,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
|
|
|
3026
3077
|
model,
|
|
3027
3078
|
recordTokens,
|
|
3028
3079
|
request.signal,
|
|
3029
|
-
bufferProxyBodies
|
|
3080
|
+
bufferProxyBodies,
|
|
3081
|
+
recordExtraction
|
|
3030
3082
|
)
|
|
3031
3083
|
);
|
|
3032
3084
|
}
|
|
3033
|
-
async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3085
|
+
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3034
3086
|
const body = await readJson(request);
|
|
3035
3087
|
const upstream = await client.chatCompletions(
|
|
3036
3088
|
completionsRequestToChatCompletion(body),
|
|
@@ -3045,7 +3097,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3045
3097
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
3046
3098
|
if (bufferProxyBodies) {
|
|
3047
3099
|
const upstreamText = await upstream.text();
|
|
3048
|
-
recordResponseTextUsage(upstreamText, true, model, recordTokens);
|
|
3100
|
+
recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
|
|
3049
3101
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
3050
3102
|
return proxyResponse(responseFromText(upstream, text));
|
|
3051
3103
|
}
|
|
@@ -3058,7 +3110,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3058
3110
|
}),
|
|
3059
3111
|
model,
|
|
3060
3112
|
recordTokens,
|
|
3061
|
-
request.signal
|
|
3113
|
+
request.signal,
|
|
3114
|
+
recordExtraction
|
|
3062
3115
|
)
|
|
3063
3116
|
);
|
|
3064
3117
|
}
|
|
@@ -3068,9 +3121,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3068
3121
|
const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
|
|
3069
3122
|
recordTokens(responseModel || model, usage);
|
|
3070
3123
|
}
|
|
3124
|
+
recordExtraction(usage !== void 0);
|
|
3071
3125
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
3072
3126
|
}
|
|
3073
|
-
async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3127
|
+
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3074
3128
|
const body = await readJsonText(request);
|
|
3075
3129
|
const upstream = await client.responses(body, request.signal);
|
|
3076
3130
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -3085,11 +3139,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
|
|
|
3085
3139
|
model,
|
|
3086
3140
|
recordTokens,
|
|
3087
3141
|
request.signal,
|
|
3088
|
-
bufferProxyBodies
|
|
3142
|
+
bufferProxyBodies,
|
|
3143
|
+
recordExtraction
|
|
3089
3144
|
)
|
|
3090
3145
|
);
|
|
3091
3146
|
}
|
|
3092
|
-
async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
|
|
3147
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
3093
3148
|
const body = await readJson(request);
|
|
3094
3149
|
const upstream = await client.responses(
|
|
3095
3150
|
JSON.stringify({ ...body, stream: false }),
|
|
@@ -3102,17 +3157,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
|
|
|
3102
3157
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
3103
3158
|
const isSse = isStreamingResponse(upstream);
|
|
3104
3159
|
const text = await upstream.text();
|
|
3105
|
-
recordResponseTextUsage(
|
|
3160
|
+
recordResponseTextUsage(
|
|
3161
|
+
text,
|
|
3162
|
+
isSse,
|
|
3163
|
+
normalizeRequestedModel(body.model),
|
|
3164
|
+
recordTokens,
|
|
3165
|
+
recordExtraction
|
|
3166
|
+
);
|
|
3106
3167
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
3107
3168
|
}
|
|
3108
|
-
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
|
|
3169
|
+
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
|
|
3109
3170
|
const isSse = isStreamingResponse(response);
|
|
3110
3171
|
if (bufferBody && response.body) {
|
|
3111
3172
|
const text = await response.text();
|
|
3112
|
-
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
|
|
3173
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
3113
3174
|
return responseFromText(response, text);
|
|
3114
3175
|
}
|
|
3115
|
-
return observeResponseUsage(response, fallbackModel, recordTokens, signal);
|
|
3176
|
+
return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
|
|
3116
3177
|
}
|
|
3117
3178
|
function responseFromText(source, text) {
|
|
3118
3179
|
return new Response(text, {
|