@openhoo/hoopilot 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +86 -25
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +86 -25
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +12 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.js +86 -25
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -2297,6 +2297,7 @@ var MetricsRegistry = class {
|
|
|
2297
2297
|
#upstream = /* @__PURE__ */ new Map();
|
|
2298
2298
|
#copilotQuota;
|
|
2299
2299
|
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
2300
|
+
#extraction = { extracted: 0, missing: 0 };
|
|
2300
2301
|
constructor(options = {}) {
|
|
2301
2302
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
2302
2303
|
}
|
|
@@ -2313,6 +2314,19 @@ var MetricsRegistry = class {
|
|
|
2313
2314
|
this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
|
|
2314
2315
|
this.#observeDuration(observation.route, observation.durationMs / 1e3);
|
|
2315
2316
|
}
|
|
2317
|
+
/**
|
|
2318
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
2319
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
2320
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
2321
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
2322
|
+
*/
|
|
2323
|
+
recordTokenExtraction(extracted) {
|
|
2324
|
+
if (extracted) {
|
|
2325
|
+
this.#extraction.extracted += 1;
|
|
2326
|
+
} else {
|
|
2327
|
+
this.#extraction.missing += 1;
|
|
2328
|
+
}
|
|
2329
|
+
}
|
|
2316
2330
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
2317
2331
|
recordTokens(model, usage) {
|
|
2318
2332
|
const name = this.#modelLabel(model);
|
|
@@ -2420,7 +2434,7 @@ var MetricsRegistry = class {
|
|
|
2420
2434
|
inFlight: this.#inFlight,
|
|
2421
2435
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
2422
2436
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
2423
|
-
tokens: { byModel, ...tokenTotals },
|
|
2437
|
+
tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
|
|
2424
2438
|
upstream: { errors: upstreamErrors, total: upstreamTotal },
|
|
2425
2439
|
uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
|
|
2426
2440
|
};
|
|
@@ -2470,6 +2484,16 @@ var MetricsRegistry = class {
|
|
|
2470
2484
|
for (const [model, totals] of this.#tokens) {
|
|
2471
2485
|
lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
|
|
2472
2486
|
}
|
|
2487
|
+
lines.push(
|
|
2488
|
+
"# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
|
|
2489
|
+
);
|
|
2490
|
+
lines.push("# TYPE hoopilot_token_extraction_total counter");
|
|
2491
|
+
lines.push(
|
|
2492
|
+
`hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
|
|
2493
|
+
);
|
|
2494
|
+
lines.push(
|
|
2495
|
+
`hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
|
|
2496
|
+
);
|
|
2473
2497
|
lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
|
|
2474
2498
|
lines.push("# TYPE hoopilot_request_duration_seconds histogram");
|
|
2475
2499
|
for (const [route, entry] of this.#durations) {
|
|
@@ -2625,23 +2649,25 @@ var MetricsRegistry = class {
|
|
|
2625
2649
|
}
|
|
2626
2650
|
}
|
|
2627
2651
|
};
|
|
2628
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal) {
|
|
2652
|
+
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
2629
2653
|
const body = response.body;
|
|
2630
2654
|
if (!body) {
|
|
2631
2655
|
return response;
|
|
2632
2656
|
}
|
|
2633
2657
|
const [clientBranch, observerBranch] = body.tee();
|
|
2634
2658
|
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
2635
|
-
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(
|
|
2636
|
-
|
|
2659
|
+
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
|
|
2660
|
+
() => {
|
|
2661
|
+
}
|
|
2662
|
+
);
|
|
2637
2663
|
return new Response(clientBranch, {
|
|
2638
2664
|
headers: response.headers,
|
|
2639
2665
|
status: response.status,
|
|
2640
2666
|
statusText: response.statusText
|
|
2641
2667
|
});
|
|
2642
2668
|
}
|
|
2643
|
-
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
2644
|
-
const accumulator = createUsageAccumulator(fallbackModel, onUsage);
|
|
2669
|
+
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
2670
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
2645
2671
|
if (isSse) {
|
|
2646
2672
|
for (const line of text.split(/\r?\n/)) {
|
|
2647
2673
|
considerSseLine(line, accumulator.consider);
|
|
@@ -2654,7 +2680,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
|
2654
2680
|
}
|
|
2655
2681
|
accumulator.finish();
|
|
2656
2682
|
}
|
|
2657
|
-
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
2683
|
+
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
2658
2684
|
const reader = stream.getReader();
|
|
2659
2685
|
const onAbort = () => {
|
|
2660
2686
|
reader.cancel().catch(() => {
|
|
@@ -2667,7 +2693,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2667
2693
|
signal?.addEventListener("abort", onAbort, { once: true });
|
|
2668
2694
|
}
|
|
2669
2695
|
const decoder = new TextDecoder();
|
|
2670
|
-
const
|
|
2696
|
+
const guardedOutcome = onOutcome ? (extracted) => {
|
|
2697
|
+
if (!signal?.aborted) {
|
|
2698
|
+
onOutcome(extracted);
|
|
2699
|
+
}
|
|
2700
|
+
} : void 0;
|
|
2701
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
|
|
2671
2702
|
let buffer = "";
|
|
2672
2703
|
let bufferedBytes = 0;
|
|
2673
2704
|
let overflowed = false;
|
|
@@ -2715,7 +2746,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2715
2746
|
}
|
|
2716
2747
|
accumulator.finish();
|
|
2717
2748
|
}
|
|
2718
|
-
function createUsageAccumulator(fallbackModel, onUsage) {
|
|
2749
|
+
function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
|
|
2719
2750
|
let model = fallbackModel;
|
|
2720
2751
|
let usage;
|
|
2721
2752
|
return {
|
|
@@ -2734,6 +2765,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
|
|
|
2734
2765
|
if (usage) {
|
|
2735
2766
|
onUsage(model, usage);
|
|
2736
2767
|
}
|
|
2768
|
+
onOutcome?.(usage !== void 0);
|
|
2737
2769
|
}
|
|
2738
2770
|
};
|
|
2739
2771
|
}
|
|
@@ -2842,6 +2874,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2842
2874
|
const metrics = options.metrics ?? new MetricsRegistry();
|
|
2843
2875
|
const readUsage = createUsageReader(client, metrics);
|
|
2844
2876
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
2877
|
+
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
2845
2878
|
const streamingProxyMode = resolveStreamingProxyMode(options);
|
|
2846
2879
|
const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
|
|
2847
2880
|
return async (request) => {
|
|
@@ -2907,6 +2940,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2907
2940
|
client,
|
|
2908
2941
|
metrics,
|
|
2909
2942
|
recordTokens,
|
|
2943
|
+
recordExtraction,
|
|
2910
2944
|
request,
|
|
2911
2945
|
requestLogger,
|
|
2912
2946
|
bufferProxyBodies
|
|
@@ -2922,6 +2956,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2922
2956
|
client,
|
|
2923
2957
|
metrics,
|
|
2924
2958
|
recordTokens,
|
|
2959
|
+
recordExtraction,
|
|
2925
2960
|
request,
|
|
2926
2961
|
requestLogger,
|
|
2927
2962
|
bufferProxyBodies
|
|
@@ -2934,6 +2969,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2934
2969
|
client,
|
|
2935
2970
|
metrics,
|
|
2936
2971
|
recordTokens,
|
|
2972
|
+
recordExtraction,
|
|
2937
2973
|
request,
|
|
2938
2974
|
requestLogger,
|
|
2939
2975
|
bufferProxyBodies
|
|
@@ -2942,7 +2978,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
2942
2978
|
}
|
|
2943
2979
|
if (request.method === "POST" && apiPath === "/v1/responses/compact") {
|
|
2944
2980
|
return finish(
|
|
2945
|
-
await handleResponsesCompact(
|
|
2981
|
+
await handleResponsesCompact(
|
|
2982
|
+
client,
|
|
2983
|
+
metrics,
|
|
2984
|
+
recordTokens,
|
|
2985
|
+
recordExtraction,
|
|
2986
|
+
request,
|
|
2987
|
+
requestLogger
|
|
2988
|
+
)
|
|
2946
2989
|
);
|
|
2947
2990
|
}
|
|
2948
2991
|
if (request.method === "POST" && apiPath === "/v1/responses") {
|
|
@@ -2951,6 +2994,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2951
2994
|
client,
|
|
2952
2995
|
metrics,
|
|
2953
2996
|
recordTokens,
|
|
2997
|
+
recordExtraction,
|
|
2954
2998
|
request,
|
|
2955
2999
|
requestLogger,
|
|
2956
3000
|
bufferProxyBodies
|
|
@@ -3027,7 +3071,7 @@ function startHoopilotServer(options = {}) {
|
|
|
3027
3071
|
url: `http://${urlHost(host)}:${server.port}`
|
|
3028
3072
|
};
|
|
3029
3073
|
}
|
|
3030
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3074
|
+
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3031
3075
|
const anthropicRequest = await readJson(request);
|
|
3032
3076
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
3033
3077
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -3040,12 +3084,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
3040
3084
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
3041
3085
|
if (bufferProxyBodies) {
|
|
3042
3086
|
const text = await upstream.text();
|
|
3043
|
-
recordResponseTextUsage(text, true, model, recordTokens);
|
|
3087
|
+
recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
|
|
3044
3088
|
return proxyResponse(
|
|
3045
3089
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
3046
3090
|
);
|
|
3047
3091
|
}
|
|
3048
|
-
const observed = observeResponseUsage(
|
|
3092
|
+
const observed = observeResponseUsage(
|
|
3093
|
+
upstream,
|
|
3094
|
+
model,
|
|
3095
|
+
recordTokens,
|
|
3096
|
+
request.signal,
|
|
3097
|
+
recordExtraction
|
|
3098
|
+
);
|
|
3049
3099
|
if (!observed.body) {
|
|
3050
3100
|
return proxyResponse(observed);
|
|
3051
3101
|
}
|
|
@@ -3063,6 +3113,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
3063
3113
|
const responseModel = typeof body.model === "string" ? body.model.trim() : "";
|
|
3064
3114
|
recordTokens(responseModel || model, usage);
|
|
3065
3115
|
}
|
|
3116
|
+
recordExtraction(usage !== void 0);
|
|
3066
3117
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
3067
3118
|
}
|
|
3068
3119
|
function handleAnthropicCountTokens(body) {
|
|
@@ -3088,7 +3139,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
3088
3139
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
3089
3140
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
3090
3141
|
}
|
|
3091
|
-
async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3142
|
+
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3092
3143
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
3093
3144
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
3094
3145
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -3103,11 +3154,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
|
|
|
3103
3154
|
model,
|
|
3104
3155
|
recordTokens,
|
|
3105
3156
|
request.signal,
|
|
3106
|
-
bufferProxyBodies
|
|
3157
|
+
bufferProxyBodies,
|
|
3158
|
+
recordExtraction
|
|
3107
3159
|
)
|
|
3108
3160
|
);
|
|
3109
3161
|
}
|
|
3110
|
-
async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3162
|
+
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3111
3163
|
const body = await readJson(request);
|
|
3112
3164
|
const upstream = await client.chatCompletions(
|
|
3113
3165
|
completionsRequestToChatCompletion(body),
|
|
@@ -3122,7 +3174,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3122
3174
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
3123
3175
|
if (bufferProxyBodies) {
|
|
3124
3176
|
const upstreamText = await upstream.text();
|
|
3125
|
-
recordResponseTextUsage(upstreamText, true, model, recordTokens);
|
|
3177
|
+
recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
|
|
3126
3178
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
3127
3179
|
return proxyResponse(responseFromText(upstream, text));
|
|
3128
3180
|
}
|
|
@@ -3135,7 +3187,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3135
3187
|
}),
|
|
3136
3188
|
model,
|
|
3137
3189
|
recordTokens,
|
|
3138
|
-
request.signal
|
|
3190
|
+
request.signal,
|
|
3191
|
+
recordExtraction
|
|
3139
3192
|
)
|
|
3140
3193
|
);
|
|
3141
3194
|
}
|
|
@@ -3145,9 +3198,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3145
3198
|
const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
|
|
3146
3199
|
recordTokens(responseModel || model, usage);
|
|
3147
3200
|
}
|
|
3201
|
+
recordExtraction(usage !== void 0);
|
|
3148
3202
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
3149
3203
|
}
|
|
3150
|
-
async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3204
|
+
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3151
3205
|
const body = await readJsonText(request);
|
|
3152
3206
|
const upstream = await client.responses(body, request.signal);
|
|
3153
3207
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -3162,11 +3216,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
|
|
|
3162
3216
|
model,
|
|
3163
3217
|
recordTokens,
|
|
3164
3218
|
request.signal,
|
|
3165
|
-
bufferProxyBodies
|
|
3219
|
+
bufferProxyBodies,
|
|
3220
|
+
recordExtraction
|
|
3166
3221
|
)
|
|
3167
3222
|
);
|
|
3168
3223
|
}
|
|
3169
|
-
async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
|
|
3224
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
3170
3225
|
const body = await readJson(request);
|
|
3171
3226
|
const upstream = await client.responses(
|
|
3172
3227
|
JSON.stringify({ ...body, stream: false }),
|
|
@@ -3179,17 +3234,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
|
|
|
3179
3234
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
3180
3235
|
const isSse = isStreamingResponse(upstream);
|
|
3181
3236
|
const text = await upstream.text();
|
|
3182
|
-
recordResponseTextUsage(
|
|
3237
|
+
recordResponseTextUsage(
|
|
3238
|
+
text,
|
|
3239
|
+
isSse,
|
|
3240
|
+
normalizeRequestedModel(body.model),
|
|
3241
|
+
recordTokens,
|
|
3242
|
+
recordExtraction
|
|
3243
|
+
);
|
|
3183
3244
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
3184
3245
|
}
|
|
3185
|
-
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
|
|
3246
|
+
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
|
|
3186
3247
|
const isSse = isStreamingResponse(response);
|
|
3187
3248
|
if (bufferBody && response.body) {
|
|
3188
3249
|
const text = await response.text();
|
|
3189
|
-
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
|
|
3250
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
3190
3251
|
return responseFromText(response, text);
|
|
3191
3252
|
}
|
|
3192
|
-
return observeResponseUsage(response, fallbackModel, recordTokens, signal);
|
|
3253
|
+
return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
|
|
3193
3254
|
}
|
|
3194
3255
|
function responseFromText(source, text) {
|
|
3195
3256
|
return new Response(text, {
|