@openhoo/hoopilot 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -15,6 +15,13 @@ declare class MetricsRegistry {
15
15
  startRequest(): void;
16
16
  /** Record a completed request and clear its in-flight slot. */
17
17
  observe(observation: RequestObservation): void;
18
+ /**
19
+ * Record whether one upstream completion reported token usage. `missing`
20
+ * counts responses that carried no usage object — most often streamed Chat
21
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
22
+ * rising miss rate flags clients whose token usage is going unaccounted.
23
+ */
24
+ recordTokenExtraction(extracted: boolean): void;
18
25
  /** Accumulate token counts for a model from one upstream completion. */
19
26
  recordTokens(model: string, usage: TokenUsage): void;
20
27
  /** Record one upstream Copilot call and whether it succeeded. */
@@ -43,7 +50,7 @@ declare class MetricsRegistry {
43
50
  * branch; combined with the runtime cancelling the client branch, that releases
44
51
  * the shared upstream connection instead of draining it in the background.
45
52
  */
46
- declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal): Response;
53
+ declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal, onOutcome?: (extracted: boolean) => void): Response;
47
54
 
48
55
  type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
49
56
  interface Logger {
@@ -200,6 +207,10 @@ interface MetricsSnapshot {
200
207
  byModel: Record<string, ModelTokenTotals>;
201
208
  cached: number;
202
209
  completion: number;
210
+ extraction: {
211
+ extracted: number;
212
+ missing: number;
213
+ };
203
214
  prompt: number;
204
215
  reasoning: number;
205
216
  total: number;
package/dist/index.d.ts CHANGED
@@ -15,6 +15,13 @@ declare class MetricsRegistry {
15
15
  startRequest(): void;
16
16
  /** Record a completed request and clear its in-flight slot. */
17
17
  observe(observation: RequestObservation): void;
18
+ /**
19
+ * Record whether one upstream completion reported token usage. `missing`
20
+ * counts responses that carried no usage object — most often streamed Chat
21
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
22
+ * rising miss rate flags clients whose token usage is going unaccounted.
23
+ */
24
+ recordTokenExtraction(extracted: boolean): void;
18
25
  /** Accumulate token counts for a model from one upstream completion. */
19
26
  recordTokens(model: string, usage: TokenUsage): void;
20
27
  /** Record one upstream Copilot call and whether it succeeded. */
@@ -43,7 +50,7 @@ declare class MetricsRegistry {
43
50
  * branch; combined with the runtime cancelling the client branch, that releases
44
51
  * the shared upstream connection instead of draining it in the background.
45
52
  */
46
- declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal): Response;
53
+ declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal, onOutcome?: (extracted: boolean) => void): Response;
47
54
 
48
55
  type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
49
56
  interface Logger {
@@ -200,6 +207,10 @@ interface MetricsSnapshot {
200
207
  byModel: Record<string, ModelTokenTotals>;
201
208
  cached: number;
202
209
  completion: number;
210
+ extraction: {
211
+ extracted: number;
212
+ missing: number;
213
+ };
203
214
  prompt: number;
204
215
  reasoning: number;
205
216
  total: number;
package/dist/index.js CHANGED
@@ -2220,6 +2220,7 @@ var MetricsRegistry = class {
2220
2220
  #upstream = /* @__PURE__ */ new Map();
2221
2221
  #copilotQuota;
2222
2222
  #githubRateLimit = /* @__PURE__ */ new Map();
2223
+ #extraction = { extracted: 0, missing: 0 };
2223
2224
  constructor(options = {}) {
2224
2225
  this.#startedAtMs = (options.now ?? Date.now)();
2225
2226
  }
@@ -2236,6 +2237,19 @@ var MetricsRegistry = class {
2236
2237
  this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
2237
2238
  this.#observeDuration(observation.route, observation.durationMs / 1e3);
2238
2239
  }
2240
+ /**
2241
+ * Record whether one upstream completion reported token usage. `missing`
2242
+ * counts responses that carried no usage object — most often streamed Chat
2243
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
2244
+ * rising miss rate flags clients whose token usage is going unaccounted.
2245
+ */
2246
+ recordTokenExtraction(extracted) {
2247
+ if (extracted) {
2248
+ this.#extraction.extracted += 1;
2249
+ } else {
2250
+ this.#extraction.missing += 1;
2251
+ }
2252
+ }
2239
2253
  /** Accumulate token counts for a model from one upstream completion. */
2240
2254
  recordTokens(model, usage) {
2241
2255
  const name = this.#modelLabel(model);
@@ -2343,7 +2357,7 @@ var MetricsRegistry = class {
2343
2357
  inFlight: this.#inFlight,
2344
2358
  requests: { byRoute, byStatus, total: requestsTotal },
2345
2359
  startedAt: new Date(this.#startedAtMs).toISOString(),
2346
- tokens: { byModel, ...tokenTotals },
2360
+ tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
2347
2361
  upstream: { errors: upstreamErrors, total: upstreamTotal },
2348
2362
  uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
2349
2363
  };
@@ -2393,6 +2407,16 @@ var MetricsRegistry = class {
2393
2407
  for (const [model, totals] of this.#tokens) {
2394
2408
  lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
2395
2409
  }
2410
+ lines.push(
2411
+ "# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
2412
+ );
2413
+ lines.push("# TYPE hoopilot_token_extraction_total counter");
2414
+ lines.push(
2415
+ `hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
2416
+ );
2417
+ lines.push(
2418
+ `hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
2419
+ );
2396
2420
  lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
2397
2421
  lines.push("# TYPE hoopilot_request_duration_seconds histogram");
2398
2422
  for (const [route, entry] of this.#durations) {
@@ -2548,23 +2572,25 @@ var MetricsRegistry = class {
2548
2572
  }
2549
2573
  }
2550
2574
  };
2551
- function observeResponseUsage(response, fallbackModel, onUsage, signal) {
2575
+ function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
2552
2576
  const body = response.body;
2553
2577
  if (!body) {
2554
2578
  return response;
2555
2579
  }
2556
2580
  const [clientBranch, observerBranch] = body.tee();
2557
2581
  const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
2558
- void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
2559
- });
2582
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
2583
+ () => {
2584
+ }
2585
+ );
2560
2586
  return new Response(clientBranch, {
2561
2587
  headers: response.headers,
2562
2588
  status: response.status,
2563
2589
  statusText: response.statusText
2564
2590
  });
2565
2591
  }
2566
- function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2567
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2592
+ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
2593
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
2568
2594
  if (isSse) {
2569
2595
  for (const line of text.split(/\r?\n/)) {
2570
2596
  considerSseLine(line, accumulator.consider);
@@ -2577,7 +2603,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2577
2603
  }
2578
2604
  accumulator.finish();
2579
2605
  }
2580
- async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2606
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2581
2607
  const reader = stream.getReader();
2582
2608
  const onAbort = () => {
2583
2609
  reader.cancel().catch(() => {
@@ -2590,7 +2616,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2590
2616
  signal?.addEventListener("abort", onAbort, { once: true });
2591
2617
  }
2592
2618
  const decoder = new TextDecoder();
2593
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2619
+ const guardedOutcome = onOutcome ? (extracted) => {
2620
+ if (!signal?.aborted) {
2621
+ onOutcome(extracted);
2622
+ }
2623
+ } : void 0;
2624
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
2594
2625
  let buffer = "";
2595
2626
  let bufferedBytes = 0;
2596
2627
  let overflowed = false;
@@ -2638,7 +2669,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2638
2669
  }
2639
2670
  accumulator.finish();
2640
2671
  }
2641
- function createUsageAccumulator(fallbackModel, onUsage) {
2672
+ function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
2642
2673
  let model = fallbackModel;
2643
2674
  let usage;
2644
2675
  return {
@@ -2657,6 +2688,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
2657
2688
  if (usage) {
2658
2689
  onUsage(model, usage);
2659
2690
  }
2691
+ onOutcome?.(usage !== void 0);
2660
2692
  }
2661
2693
  };
2662
2694
  }
@@ -2765,6 +2797,7 @@ function createHoopilotHandler(options = {}) {
2765
2797
  const metrics = options.metrics ?? new MetricsRegistry();
2766
2798
  const readUsage = createUsageReader(client, metrics);
2767
2799
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
2800
+ const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
2768
2801
  const streamingProxyMode = resolveStreamingProxyMode(options);
2769
2802
  const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
2770
2803
  return async (request) => {
@@ -2830,6 +2863,7 @@ function createHoopilotHandler(options = {}) {
2830
2863
  client,
2831
2864
  metrics,
2832
2865
  recordTokens,
2866
+ recordExtraction,
2833
2867
  request,
2834
2868
  requestLogger,
2835
2869
  bufferProxyBodies
@@ -2845,6 +2879,7 @@ function createHoopilotHandler(options = {}) {
2845
2879
  client,
2846
2880
  metrics,
2847
2881
  recordTokens,
2882
+ recordExtraction,
2848
2883
  request,
2849
2884
  requestLogger,
2850
2885
  bufferProxyBodies
@@ -2857,6 +2892,7 @@ function createHoopilotHandler(options = {}) {
2857
2892
  client,
2858
2893
  metrics,
2859
2894
  recordTokens,
2895
+ recordExtraction,
2860
2896
  request,
2861
2897
  requestLogger,
2862
2898
  bufferProxyBodies
@@ -2865,7 +2901,14 @@ function createHoopilotHandler(options = {}) {
2865
2901
  }
2866
2902
  if (request.method === "POST" && apiPath === "/v1/responses/compact") {
2867
2903
  return finish(
2868
- await handleResponsesCompact(client, metrics, recordTokens, request, requestLogger)
2904
+ await handleResponsesCompact(
2905
+ client,
2906
+ metrics,
2907
+ recordTokens,
2908
+ recordExtraction,
2909
+ request,
2910
+ requestLogger
2911
+ )
2869
2912
  );
2870
2913
  }
2871
2914
  if (request.method === "POST" && apiPath === "/v1/responses") {
@@ -2874,6 +2917,7 @@ function createHoopilotHandler(options = {}) {
2874
2917
  client,
2875
2918
  metrics,
2876
2919
  recordTokens,
2920
+ recordExtraction,
2877
2921
  request,
2878
2922
  requestLogger,
2879
2923
  bufferProxyBodies
@@ -2950,7 +2994,7 @@ function startHoopilotServer(options = {}) {
2950
2994
  url: `http://${urlHost(host)}:${server.port}`
2951
2995
  };
2952
2996
  }
2953
- async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
2997
+ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2954
2998
  const anthropicRequest = await readJson(request);
2955
2999
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
2956
3000
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -2963,12 +3007,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2963
3007
  if (isStreamingResponse(upstream) && upstream.body) {
2964
3008
  if (bufferProxyBodies) {
2965
3009
  const text = await upstream.text();
2966
- recordResponseTextUsage(text, true, model, recordTokens);
3010
+ recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
2967
3011
  return proxyResponse(
2968
3012
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
2969
3013
  );
2970
3014
  }
2971
- const observed = observeResponseUsage(upstream, model, recordTokens, request.signal);
3015
+ const observed = observeResponseUsage(
3016
+ upstream,
3017
+ model,
3018
+ recordTokens,
3019
+ request.signal,
3020
+ recordExtraction
3021
+ );
2972
3022
  if (!observed.body) {
2973
3023
  return proxyResponse(observed);
2974
3024
  }
@@ -2986,6 +3036,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2986
3036
  const responseModel = typeof body.model === "string" ? body.model.trim() : "";
2987
3037
  recordTokens(responseModel || model, usage);
2988
3038
  }
3039
+ recordExtraction(usage !== void 0);
2989
3040
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
2990
3041
  }
2991
3042
  function handleAnthropicCountTokens(body) {
@@ -3011,7 +3062,7 @@ async function handleModels(client, metrics, signal, logger) {
3011
3062
  logUpstreamSuccess(logger, "/models", upstream.status);
3012
3063
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
3013
3064
  }
3014
- async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3065
+ async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3015
3066
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
3016
3067
  const upstream = await client.chatCompletions(chatRequest, request.signal);
3017
3068
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -3026,11 +3077,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
3026
3077
  model,
3027
3078
  recordTokens,
3028
3079
  request.signal,
3029
- bufferProxyBodies
3080
+ bufferProxyBodies,
3081
+ recordExtraction
3030
3082
  )
3031
3083
  );
3032
3084
  }
3033
- async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3085
+ async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3034
3086
  const body = await readJson(request);
3035
3087
  const upstream = await client.chatCompletions(
3036
3088
  completionsRequestToChatCompletion(body),
@@ -3045,7 +3097,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3045
3097
  if (isStreamingResponse(upstream) && upstream.body) {
3046
3098
  if (bufferProxyBodies) {
3047
3099
  const upstreamText = await upstream.text();
3048
- recordResponseTextUsage(upstreamText, true, model, recordTokens);
3100
+ recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
3049
3101
  const text = completionSseTextFromChatSseText(upstreamText);
3050
3102
  return proxyResponse(responseFromText(upstream, text));
3051
3103
  }
@@ -3058,7 +3110,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3058
3110
  }),
3059
3111
  model,
3060
3112
  recordTokens,
3061
- request.signal
3113
+ request.signal,
3114
+ recordExtraction
3062
3115
  )
3063
3116
  );
3064
3117
  }
@@ -3068,9 +3121,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3068
3121
  const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
3069
3122
  recordTokens(responseModel || model, usage);
3070
3123
  }
3124
+ recordExtraction(usage !== void 0);
3071
3125
  return jsonResponse(chatCompletionToCompletion(completion));
3072
3126
  }
3073
- async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3127
+ async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3074
3128
  const body = await readJsonText(request);
3075
3129
  const upstream = await client.responses(body, request.signal);
3076
3130
  metrics.recordUpstream("/responses", upstream.ok);
@@ -3085,11 +3139,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
3085
3139
  model,
3086
3140
  recordTokens,
3087
3141
  request.signal,
3088
- bufferProxyBodies
3142
+ bufferProxyBodies,
3143
+ recordExtraction
3089
3144
  )
3090
3145
  );
3091
3146
  }
3092
- async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
3147
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
3093
3148
  const body = await readJson(request);
3094
3149
  const upstream = await client.responses(
3095
3150
  JSON.stringify({ ...body, stream: false }),
@@ -3102,17 +3157,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
3102
3157
  logUpstreamSuccess(logger, "/responses", upstream.status);
3103
3158
  const isSse = isStreamingResponse(upstream);
3104
3159
  const text = await upstream.text();
3105
- recordResponseTextUsage(text, isSse, normalizeRequestedModel(body.model), recordTokens);
3160
+ recordResponseTextUsage(
3161
+ text,
3162
+ isSse,
3163
+ normalizeRequestedModel(body.model),
3164
+ recordTokens,
3165
+ recordExtraction
3166
+ );
3106
3167
  return jsonResponse(responsesCompactionResult(text, isSse));
3107
3168
  }
3108
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
3169
+ async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
3109
3170
  const isSse = isStreamingResponse(response);
3110
3171
  if (bufferBody && response.body) {
3111
3172
  const text = await response.text();
3112
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
3173
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
3113
3174
  return responseFromText(response, text);
3114
3175
  }
3115
- return observeResponseUsage(response, fallbackModel, recordTokens, signal);
3176
+ return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
3116
3177
  }
3117
3178
  function responseFromText(source, text) {
3118
3179
  return new Response(text, {