@openhoo/hoopilot 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2297,6 +2297,7 @@ var MetricsRegistry = class {
2297
2297
  #upstream = /* @__PURE__ */ new Map();
2298
2298
  #copilotQuota;
2299
2299
  #githubRateLimit = /* @__PURE__ */ new Map();
2300
+ #extraction = { extracted: 0, missing: 0 };
2300
2301
  constructor(options = {}) {
2301
2302
  this.#startedAtMs = (options.now ?? Date.now)();
2302
2303
  }
@@ -2313,6 +2314,19 @@ var MetricsRegistry = class {
2313
2314
  this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
2314
2315
  this.#observeDuration(observation.route, observation.durationMs / 1e3);
2315
2316
  }
2317
+ /**
2318
+ * Record whether one upstream completion reported token usage. `missing`
2319
+ * counts responses that carried no usage object — most often streamed Chat
2320
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
2321
+ * rising miss rate flags clients whose token usage is going unaccounted.
2322
+ */
2323
+ recordTokenExtraction(extracted) {
2324
+ if (extracted) {
2325
+ this.#extraction.extracted += 1;
2326
+ } else {
2327
+ this.#extraction.missing += 1;
2328
+ }
2329
+ }
2316
2330
  /** Accumulate token counts for a model from one upstream completion. */
2317
2331
  recordTokens(model, usage) {
2318
2332
  const name = this.#modelLabel(model);
@@ -2420,7 +2434,7 @@ var MetricsRegistry = class {
2420
2434
  inFlight: this.#inFlight,
2421
2435
  requests: { byRoute, byStatus, total: requestsTotal },
2422
2436
  startedAt: new Date(this.#startedAtMs).toISOString(),
2423
- tokens: { byModel, ...tokenTotals },
2437
+ tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
2424
2438
  upstream: { errors: upstreamErrors, total: upstreamTotal },
2425
2439
  uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
2426
2440
  };
@@ -2470,6 +2484,16 @@ var MetricsRegistry = class {
2470
2484
  for (const [model, totals] of this.#tokens) {
2471
2485
  lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
2472
2486
  }
2487
+ lines.push(
2488
+ "# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
2489
+ );
2490
+ lines.push("# TYPE hoopilot_token_extraction_total counter");
2491
+ lines.push(
2492
+ `hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
2493
+ );
2494
+ lines.push(
2495
+ `hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
2496
+ );
2473
2497
  lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
2474
2498
  lines.push("# TYPE hoopilot_request_duration_seconds histogram");
2475
2499
  for (const [route, entry] of this.#durations) {
@@ -2625,23 +2649,25 @@ var MetricsRegistry = class {
2625
2649
  }
2626
2650
  }
2627
2651
  };
2628
- function observeResponseUsage(response, fallbackModel, onUsage, signal) {
2652
+ function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
2629
2653
  const body = response.body;
2630
2654
  if (!body) {
2631
2655
  return response;
2632
2656
  }
2633
2657
  const [clientBranch, observerBranch] = body.tee();
2634
2658
  const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
2635
- void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
2636
- });
2659
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
2660
+ () => {
2661
+ }
2662
+ );
2637
2663
  return new Response(clientBranch, {
2638
2664
  headers: response.headers,
2639
2665
  status: response.status,
2640
2666
  statusText: response.statusText
2641
2667
  });
2642
2668
  }
2643
- function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2644
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2669
+ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
2670
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
2645
2671
  if (isSse) {
2646
2672
  for (const line of text.split(/\r?\n/)) {
2647
2673
  considerSseLine(line, accumulator.consider);
@@ -2654,7 +2680,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2654
2680
  }
2655
2681
  accumulator.finish();
2656
2682
  }
2657
- async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2683
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2658
2684
  const reader = stream.getReader();
2659
2685
  const onAbort = () => {
2660
2686
  reader.cancel().catch(() => {
@@ -2667,7 +2693,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2667
2693
  signal?.addEventListener("abort", onAbort, { once: true });
2668
2694
  }
2669
2695
  const decoder = new TextDecoder();
2670
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2696
+ const guardedOutcome = onOutcome ? (extracted) => {
2697
+ if (!signal?.aborted) {
2698
+ onOutcome(extracted);
2699
+ }
2700
+ } : void 0;
2701
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
2671
2702
  let buffer = "";
2672
2703
  let bufferedBytes = 0;
2673
2704
  let overflowed = false;
@@ -2715,7 +2746,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2715
2746
  }
2716
2747
  accumulator.finish();
2717
2748
  }
2718
- function createUsageAccumulator(fallbackModel, onUsage) {
2749
+ function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
2719
2750
  let model = fallbackModel;
2720
2751
  let usage;
2721
2752
  return {
@@ -2734,6 +2765,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
2734
2765
  if (usage) {
2735
2766
  onUsage(model, usage);
2736
2767
  }
2768
+ onOutcome?.(usage !== void 0);
2737
2769
  }
2738
2770
  };
2739
2771
  }
@@ -2842,6 +2874,7 @@ function createHoopilotHandler(options = {}) {
2842
2874
  const metrics = options.metrics ?? new MetricsRegistry();
2843
2875
  const readUsage = createUsageReader(client, metrics);
2844
2876
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
2877
+ const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
2845
2878
  const streamingProxyMode = resolveStreamingProxyMode(options);
2846
2879
  const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
2847
2880
  return async (request) => {
@@ -2907,6 +2940,7 @@ function createHoopilotHandler(options = {}) {
2907
2940
  client,
2908
2941
  metrics,
2909
2942
  recordTokens,
2943
+ recordExtraction,
2910
2944
  request,
2911
2945
  requestLogger,
2912
2946
  bufferProxyBodies
@@ -2922,6 +2956,7 @@ function createHoopilotHandler(options = {}) {
2922
2956
  client,
2923
2957
  metrics,
2924
2958
  recordTokens,
2959
+ recordExtraction,
2925
2960
  request,
2926
2961
  requestLogger,
2927
2962
  bufferProxyBodies
@@ -2934,6 +2969,7 @@ function createHoopilotHandler(options = {}) {
2934
2969
  client,
2935
2970
  metrics,
2936
2971
  recordTokens,
2972
+ recordExtraction,
2937
2973
  request,
2938
2974
  requestLogger,
2939
2975
  bufferProxyBodies
@@ -2942,7 +2978,14 @@ function createHoopilotHandler(options = {}) {
2942
2978
  }
2943
2979
  if (request.method === "POST" && apiPath === "/v1/responses/compact") {
2944
2980
  return finish(
2945
- await handleResponsesCompact(client, metrics, recordTokens, request, requestLogger)
2981
+ await handleResponsesCompact(
2982
+ client,
2983
+ metrics,
2984
+ recordTokens,
2985
+ recordExtraction,
2986
+ request,
2987
+ requestLogger
2988
+ )
2946
2989
  );
2947
2990
  }
2948
2991
  if (request.method === "POST" && apiPath === "/v1/responses") {
@@ -2951,6 +2994,7 @@ function createHoopilotHandler(options = {}) {
2951
2994
  client,
2952
2995
  metrics,
2953
2996
  recordTokens,
2997
+ recordExtraction,
2954
2998
  request,
2955
2999
  requestLogger,
2956
3000
  bufferProxyBodies
@@ -3027,7 +3071,7 @@ function startHoopilotServer(options = {}) {
3027
3071
  url: `http://${urlHost(host)}:${server.port}`
3028
3072
  };
3029
3073
  }
3030
- async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3074
+ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3031
3075
  const anthropicRequest = await readJson(request);
3032
3076
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
3033
3077
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -3040,12 +3084,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
3040
3084
  if (isStreamingResponse(upstream) && upstream.body) {
3041
3085
  if (bufferProxyBodies) {
3042
3086
  const text = await upstream.text();
3043
- recordResponseTextUsage(text, true, model, recordTokens);
3087
+ recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
3044
3088
  return proxyResponse(
3045
3089
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
3046
3090
  );
3047
3091
  }
3048
- const observed = observeResponseUsage(upstream, model, recordTokens, request.signal);
3092
+ const observed = observeResponseUsage(
3093
+ upstream,
3094
+ model,
3095
+ recordTokens,
3096
+ request.signal,
3097
+ recordExtraction
3098
+ );
3049
3099
  if (!observed.body) {
3050
3100
  return proxyResponse(observed);
3051
3101
  }
@@ -3063,6 +3113,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
3063
3113
  const responseModel = typeof body.model === "string" ? body.model.trim() : "";
3064
3114
  recordTokens(responseModel || model, usage);
3065
3115
  }
3116
+ recordExtraction(usage !== void 0);
3066
3117
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
3067
3118
  }
3068
3119
  function handleAnthropicCountTokens(body) {
@@ -3088,7 +3139,7 @@ async function handleModels(client, metrics, signal, logger) {
3088
3139
  logUpstreamSuccess(logger, "/models", upstream.status);
3089
3140
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
3090
3141
  }
3091
- async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3142
+ async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3092
3143
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
3093
3144
  const upstream = await client.chatCompletions(chatRequest, request.signal);
3094
3145
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -3103,11 +3154,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
3103
3154
  model,
3104
3155
  recordTokens,
3105
3156
  request.signal,
3106
- bufferProxyBodies
3157
+ bufferProxyBodies,
3158
+ recordExtraction
3107
3159
  )
3108
3160
  );
3109
3161
  }
3110
- async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3162
+ async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3111
3163
  const body = await readJson(request);
3112
3164
  const upstream = await client.chatCompletions(
3113
3165
  completionsRequestToChatCompletion(body),
@@ -3122,7 +3174,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3122
3174
  if (isStreamingResponse(upstream) && upstream.body) {
3123
3175
  if (bufferProxyBodies) {
3124
3176
  const upstreamText = await upstream.text();
3125
- recordResponseTextUsage(upstreamText, true, model, recordTokens);
3177
+ recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
3126
3178
  const text = completionSseTextFromChatSseText(upstreamText);
3127
3179
  return proxyResponse(responseFromText(upstream, text));
3128
3180
  }
@@ -3135,7 +3187,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3135
3187
  }),
3136
3188
  model,
3137
3189
  recordTokens,
3138
- request.signal
3190
+ request.signal,
3191
+ recordExtraction
3139
3192
  )
3140
3193
  );
3141
3194
  }
@@ -3145,9 +3198,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3145
3198
  const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
3146
3199
  recordTokens(responseModel || model, usage);
3147
3200
  }
3201
+ recordExtraction(usage !== void 0);
3148
3202
  return jsonResponse(chatCompletionToCompletion(completion));
3149
3203
  }
3150
- async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3204
+ async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3151
3205
  const body = await readJsonText(request);
3152
3206
  const upstream = await client.responses(body, request.signal);
3153
3207
  metrics.recordUpstream("/responses", upstream.ok);
@@ -3162,11 +3216,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
3162
3216
  model,
3163
3217
  recordTokens,
3164
3218
  request.signal,
3165
- bufferProxyBodies
3219
+ bufferProxyBodies,
3220
+ recordExtraction
3166
3221
  )
3167
3222
  );
3168
3223
  }
3169
- async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
3224
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
3170
3225
  const body = await readJson(request);
3171
3226
  const upstream = await client.responses(
3172
3227
  JSON.stringify({ ...body, stream: false }),
@@ -3179,17 +3234,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
3179
3234
  logUpstreamSuccess(logger, "/responses", upstream.status);
3180
3235
  const isSse = isStreamingResponse(upstream);
3181
3236
  const text = await upstream.text();
3182
- recordResponseTextUsage(text, isSse, normalizeRequestedModel(body.model), recordTokens);
3237
+ recordResponseTextUsage(
3238
+ text,
3239
+ isSse,
3240
+ normalizeRequestedModel(body.model),
3241
+ recordTokens,
3242
+ recordExtraction
3243
+ );
3183
3244
  return jsonResponse(responsesCompactionResult(text, isSse));
3184
3245
  }
3185
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
3246
+ async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
3186
3247
  const isSse = isStreamingResponse(response);
3187
3248
  if (bufferBody && response.body) {
3188
3249
  const text = await response.text();
3189
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
3250
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
3190
3251
  return responseFromText(response, text);
3191
3252
  }
3192
- return observeResponseUsage(response, fallbackModel, recordTokens, signal);
3253
+ return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
3193
3254
  }
3194
3255
  function responseFromText(source, text) {
3195
3256
  return new Response(text, {