@openhoo/hoopilot 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -273,7 +273,7 @@ Hoopilot tracks token usage, request counts, and latency in memory while the ser
273
273
  - `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why. The snapshot's `proxy.githubRateLimit` field reports the most recent GitHub REST rate-limit budget per resource (`limit`, `remaining`, `used`, `resetAt`, `retryAfterSeconds`, `observedAt`).
274
274
  - `hoopilot usage` prints your Copilot plan and quota — and, when GitHub returns them, your GitHub API rate-limit budget — from the command line.
275
275
 
276
- Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted.
276
+ Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted. The `hoopilot_token_extraction_total{outcome="extracted"|"missing"}` counter (mirrored in `/v1/usage` as `proxy.tokens.extraction`) tracks how often a completion reported usage versus not, so a rising `missing` count flags clients whose token usage is going unaccounted.
277
277
 
278
278
  GitHub API usage is read from the `x-ratelimit-*` response headers that `api.github.com` returns on the `copilot_internal/user` quota call Hoopilot already makes, so it costs no extra request. (The Copilot completion host `api.githubcopilot.com` does not currently emit these headers, so per-completion rate-limit data is not yet available there.)
279
279
 
package/dist/cli.js CHANGED
@@ -1689,6 +1689,7 @@ var MetricsRegistry = class {
1689
1689
  #upstream = /* @__PURE__ */ new Map();
1690
1690
  #copilotQuota;
1691
1691
  #githubRateLimit = /* @__PURE__ */ new Map();
1692
+ #extraction = { extracted: 0, missing: 0 };
1692
1693
  constructor(options = {}) {
1693
1694
  this.#startedAtMs = (options.now ?? Date.now)();
1694
1695
  }
@@ -1705,6 +1706,19 @@ var MetricsRegistry = class {
1705
1706
  this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
1706
1707
  this.#observeDuration(observation.route, observation.durationMs / 1e3);
1707
1708
  }
1709
+ /**
1710
+ * Record whether one upstream completion reported token usage. `missing`
1711
+ * counts responses that carried no usage object — most often streamed Chat
1712
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
1713
+ * rising miss rate flags clients whose token usage is going unaccounted.
1714
+ */
1715
+ recordTokenExtraction(extracted) {
1716
+ if (extracted) {
1717
+ this.#extraction.extracted += 1;
1718
+ } else {
1719
+ this.#extraction.missing += 1;
1720
+ }
1721
+ }
1708
1722
  /** Accumulate token counts for a model from one upstream completion. */
1709
1723
  recordTokens(model, usage) {
1710
1724
  const name = this.#modelLabel(model);
@@ -1812,7 +1826,7 @@ var MetricsRegistry = class {
1812
1826
  inFlight: this.#inFlight,
1813
1827
  requests: { byRoute, byStatus, total: requestsTotal },
1814
1828
  startedAt: new Date(this.#startedAtMs).toISOString(),
1815
- tokens: { byModel, ...tokenTotals },
1829
+ tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
1816
1830
  upstream: { errors: upstreamErrors, total: upstreamTotal },
1817
1831
  uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
1818
1832
  };
@@ -1862,6 +1876,16 @@ var MetricsRegistry = class {
1862
1876
  for (const [model, totals] of this.#tokens) {
1863
1877
  lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
1864
1878
  }
1879
+ lines.push(
1880
+ "# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
1881
+ );
1882
+ lines.push("# TYPE hoopilot_token_extraction_total counter");
1883
+ lines.push(
1884
+ `hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
1885
+ );
1886
+ lines.push(
1887
+ `hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
1888
+ );
1865
1889
  lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
1866
1890
  lines.push("# TYPE hoopilot_request_duration_seconds histogram");
1867
1891
  for (const [route, entry] of this.#durations) {
@@ -2017,23 +2041,25 @@ var MetricsRegistry = class {
2017
2041
  }
2018
2042
  }
2019
2043
  };
2020
- function observeResponseUsage(response, fallbackModel, onUsage, signal) {
2044
+ function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
2021
2045
  const body = response.body;
2022
2046
  if (!body) {
2023
2047
  return response;
2024
2048
  }
2025
2049
  const [clientBranch, observerBranch] = body.tee();
2026
2050
  const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
2027
- void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
2028
- });
2051
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
2052
+ () => {
2053
+ }
2054
+ );
2029
2055
  return new Response(clientBranch, {
2030
2056
  headers: response.headers,
2031
2057
  status: response.status,
2032
2058
  statusText: response.statusText
2033
2059
  });
2034
2060
  }
2035
- function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2036
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2061
+ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
2062
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
2037
2063
  if (isSse) {
2038
2064
  for (const line of text.split(/\r?\n/)) {
2039
2065
  considerSseLine(line, accumulator.consider);
@@ -2046,7 +2072,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2046
2072
  }
2047
2073
  accumulator.finish();
2048
2074
  }
2049
- async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2075
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2050
2076
  const reader = stream.getReader();
2051
2077
  const onAbort = () => {
2052
2078
  reader.cancel().catch(() => {
@@ -2059,7 +2085,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2059
2085
  signal?.addEventListener("abort", onAbort, { once: true });
2060
2086
  }
2061
2087
  const decoder = new TextDecoder();
2062
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2088
+ const guardedOutcome = onOutcome ? (extracted) => {
2089
+ if (!signal?.aborted) {
2090
+ onOutcome(extracted);
2091
+ }
2092
+ } : void 0;
2093
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
2063
2094
  let buffer = "";
2064
2095
  let bufferedBytes = 0;
2065
2096
  let overflowed = false;
@@ -2107,7 +2138,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2107
2138
  }
2108
2139
  accumulator.finish();
2109
2140
  }
2110
- function createUsageAccumulator(fallbackModel, onUsage) {
2141
+ function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
2111
2142
  let model = fallbackModel;
2112
2143
  let usage;
2113
2144
  return {
@@ -2126,6 +2157,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
2126
2157
  if (usage) {
2127
2158
  onUsage(model, usage);
2128
2159
  }
2160
+ onOutcome?.(usage !== void 0);
2129
2161
  }
2130
2162
  };
2131
2163
  }
@@ -2254,6 +2286,7 @@ function createHoopilotHandler(options = {}) {
2254
2286
  const metrics = options.metrics ?? new MetricsRegistry();
2255
2287
  const readUsage = createUsageReader(client, metrics);
2256
2288
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
2289
+ const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
2257
2290
  const streamingProxyMode = resolveStreamingProxyMode(options);
2258
2291
  const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
2259
2292
  return async (request) => {
@@ -2319,6 +2352,7 @@ function createHoopilotHandler(options = {}) {
2319
2352
  client,
2320
2353
  metrics,
2321
2354
  recordTokens,
2355
+ recordExtraction,
2322
2356
  request,
2323
2357
  requestLogger,
2324
2358
  bufferProxyBodies
@@ -2334,6 +2368,7 @@ function createHoopilotHandler(options = {}) {
2334
2368
  client,
2335
2369
  metrics,
2336
2370
  recordTokens,
2371
+ recordExtraction,
2337
2372
  request,
2338
2373
  requestLogger,
2339
2374
  bufferProxyBodies
@@ -2346,6 +2381,7 @@ function createHoopilotHandler(options = {}) {
2346
2381
  client,
2347
2382
  metrics,
2348
2383
  recordTokens,
2384
+ recordExtraction,
2349
2385
  request,
2350
2386
  requestLogger,
2351
2387
  bufferProxyBodies
@@ -2354,7 +2390,14 @@ function createHoopilotHandler(options = {}) {
2354
2390
  }
2355
2391
  if (request.method === "POST" && apiPath === "/v1/responses/compact") {
2356
2392
  return finish(
2357
- await handleResponsesCompact(client, metrics, recordTokens, request, requestLogger)
2393
+ await handleResponsesCompact(
2394
+ client,
2395
+ metrics,
2396
+ recordTokens,
2397
+ recordExtraction,
2398
+ request,
2399
+ requestLogger
2400
+ )
2358
2401
  );
2359
2402
  }
2360
2403
  if (request.method === "POST" && apiPath === "/v1/responses") {
@@ -2363,6 +2406,7 @@ function createHoopilotHandler(options = {}) {
2363
2406
  client,
2364
2407
  metrics,
2365
2408
  recordTokens,
2409
+ recordExtraction,
2366
2410
  request,
2367
2411
  requestLogger,
2368
2412
  bufferProxyBodies
@@ -2439,7 +2483,7 @@ function startHoopilotServer(options = {}) {
2439
2483
  url: `http://${urlHost(host)}:${server.port}`
2440
2484
  };
2441
2485
  }
2442
- async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
2486
+ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2443
2487
  const anthropicRequest = await readJson(request);
2444
2488
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
2445
2489
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -2452,12 +2496,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2452
2496
  if (isStreamingResponse(upstream) && upstream.body) {
2453
2497
  if (bufferProxyBodies) {
2454
2498
  const text = await upstream.text();
2455
- recordResponseTextUsage(text, true, model, recordTokens);
2499
+ recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
2456
2500
  return proxyResponse(
2457
2501
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
2458
2502
  );
2459
2503
  }
2460
- const observed = observeResponseUsage(upstream, model, recordTokens, request.signal);
2504
+ const observed = observeResponseUsage(
2505
+ upstream,
2506
+ model,
2507
+ recordTokens,
2508
+ request.signal,
2509
+ recordExtraction
2510
+ );
2461
2511
  if (!observed.body) {
2462
2512
  return proxyResponse(observed);
2463
2513
  }
@@ -2475,6 +2525,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2475
2525
  const responseModel = typeof body.model === "string" ? body.model.trim() : "";
2476
2526
  recordTokens(responseModel || model, usage);
2477
2527
  }
2528
+ recordExtraction(usage !== void 0);
2478
2529
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
2479
2530
  }
2480
2531
  function handleAnthropicCountTokens(body) {
@@ -2500,7 +2551,7 @@ async function handleModels(client, metrics, signal, logger) {
2500
2551
  logUpstreamSuccess(logger, "/models", upstream.status);
2501
2552
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
2502
2553
  }
2503
- async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
2554
+ async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2504
2555
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
2505
2556
  const upstream = await client.chatCompletions(chatRequest, request.signal);
2506
2557
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -2515,11 +2566,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
2515
2566
  model,
2516
2567
  recordTokens,
2517
2568
  request.signal,
2518
- bufferProxyBodies
2569
+ bufferProxyBodies,
2570
+ recordExtraction
2519
2571
  )
2520
2572
  );
2521
2573
  }
2522
- async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
2574
+ async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2523
2575
  const body = await readJson(request);
2524
2576
  const upstream = await client.chatCompletions(
2525
2577
  completionsRequestToChatCompletion(body),
@@ -2534,7 +2586,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2534
2586
  if (isStreamingResponse(upstream) && upstream.body) {
2535
2587
  if (bufferProxyBodies) {
2536
2588
  const upstreamText = await upstream.text();
2537
- recordResponseTextUsage(upstreamText, true, model, recordTokens);
2589
+ recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
2538
2590
  const text = completionSseTextFromChatSseText(upstreamText);
2539
2591
  return proxyResponse(responseFromText(upstream, text));
2540
2592
  }
@@ -2547,7 +2599,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2547
2599
  }),
2548
2600
  model,
2549
2601
  recordTokens,
2550
- request.signal
2602
+ request.signal,
2603
+ recordExtraction
2551
2604
  )
2552
2605
  );
2553
2606
  }
@@ -2557,9 +2610,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2557
2610
  const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
2558
2611
  recordTokens(responseModel || model, usage);
2559
2612
  }
2613
+ recordExtraction(usage !== void 0);
2560
2614
  return jsonResponse(chatCompletionToCompletion(completion));
2561
2615
  }
2562
- async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
2616
+ async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2563
2617
  const body = await readJsonText(request);
2564
2618
  const upstream = await client.responses(body, request.signal);
2565
2619
  metrics.recordUpstream("/responses", upstream.ok);
@@ -2574,11 +2628,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
2574
2628
  model,
2575
2629
  recordTokens,
2576
2630
  request.signal,
2577
- bufferProxyBodies
2631
+ bufferProxyBodies,
2632
+ recordExtraction
2578
2633
  )
2579
2634
  );
2580
2635
  }
2581
- async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
2636
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
2582
2637
  const body = await readJson(request);
2583
2638
  const upstream = await client.responses(
2584
2639
  JSON.stringify({ ...body, stream: false }),
@@ -2591,17 +2646,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
2591
2646
  logUpstreamSuccess(logger, "/responses", upstream.status);
2592
2647
  const isSse = isStreamingResponse(upstream);
2593
2648
  const text = await upstream.text();
2594
- recordResponseTextUsage(text, isSse, normalizeRequestedModel(body.model), recordTokens);
2649
+ recordResponseTextUsage(
2650
+ text,
2651
+ isSse,
2652
+ normalizeRequestedModel(body.model),
2653
+ recordTokens,
2654
+ recordExtraction
2655
+ );
2595
2656
  return jsonResponse(responsesCompactionResult(text, isSse));
2596
2657
  }
2597
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
2658
+ async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
2598
2659
  const isSse = isStreamingResponse(response);
2599
2660
  if (bufferBody && response.body) {
2600
2661
  const text = await response.text();
2601
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
2662
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
2602
2663
  return responseFromText(response, text);
2603
2664
  }
2604
- return observeResponseUsage(response, fallbackModel, recordTokens, signal);
2665
+ return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
2605
2666
  }
2606
2667
  function responseFromText(source, text) {
2607
2668
  return new Response(text, {