@openhoo/hoopilot 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -63,6 +63,7 @@ __export(index_exports, {
63
63
  observeResponseUsage: () => observeResponseUsage,
64
64
  parseLogFormat: () => parseLogFormat,
65
65
  parseLogLevel: () => parseLogLevel,
66
+ parseRateLimitHeaders: () => parseRateLimitHeaders,
66
67
  readStoredCopilotAuth: () => readStoredCopilotAuth,
67
68
  responsesCompactionResult: () => responsesCompactionResult,
68
69
  responsesRequestToChatCompletion: () => responsesRequestToChatCompletion,
@@ -1833,6 +1834,38 @@ function applyGithubApiHeaders(headers, token) {
1833
1834
  headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
1834
1835
  return headers;
1835
1836
  }
1837
+ function parseRateLimitHeaders(headers, nowMs = Date.now()) {
1838
+ const limit = headerInt(headers, "x-ratelimit-limit");
1839
+ const remaining = headerInt(headers, "x-ratelimit-remaining");
1840
+ const used = headerInt(headers, "x-ratelimit-used");
1841
+ const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
1842
+ const retryAfterSeconds = headerInt(headers, "retry-after");
1843
+ if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
1844
+ return void 0;
1845
+ }
1846
+ return removeUndefinedRateLimit({
1847
+ limit,
1848
+ observedAtMs: nowMs,
1849
+ remaining,
1850
+ resetEpochSeconds,
1851
+ resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
1852
+ retryAfterSeconds,
1853
+ used
1854
+ });
1855
+ }
1856
+ function headerInt(headers, name) {
1857
+ const raw = headers.get(name);
1858
+ if (raw === null) {
1859
+ return void 0;
1860
+ }
1861
+ const value = Number.parseInt(raw.trim(), 10);
1862
+ return Number.isFinite(value) && value >= 0 ? value : void 0;
1863
+ }
1864
+ function removeUndefinedRateLimit(rateLimit) {
1865
+ return Object.fromEntries(
1866
+ Object.entries(rateLimit).filter(([, value]) => value !== void 0)
1867
+ );
1868
+ }
1836
1869
  var CopilotClient = class {
1837
1870
  #auth;
1838
1871
  #allowUnsafeUpstream;
@@ -2249,6 +2282,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
2249
2282
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
2250
2283
  var MAX_TRACKED_MODELS = 200;
2251
2284
  var MAX_MODEL_LABEL_LENGTH = 200;
2285
+ var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
2252
2286
  var LABEL_SEPARATOR = "";
2253
2287
  var UNKNOWN_MODEL = "unknown";
2254
2288
  function emptyModelTotals() {
@@ -2262,6 +2296,8 @@ var MetricsRegistry = class {
2262
2296
  #tokens = /* @__PURE__ */ new Map();
2263
2297
  #upstream = /* @__PURE__ */ new Map();
2264
2298
  #copilotQuota;
2299
+ #githubRateLimit = /* @__PURE__ */ new Map();
2300
+ #extraction = { extracted: 0, missing: 0 };
2265
2301
  constructor(options = {}) {
2266
2302
  this.#startedAtMs = (options.now ?? Date.now)();
2267
2303
  }
@@ -2278,6 +2314,19 @@ var MetricsRegistry = class {
2278
2314
  this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
2279
2315
  this.#observeDuration(observation.route, observation.durationMs / 1e3);
2280
2316
  }
2317
+ /**
2318
+ * Record whether one upstream completion reported token usage. `missing`
2319
+ * counts responses that carried no usage object — most often streamed Chat
2320
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
2321
+ * rising miss rate flags clients whose token usage is going unaccounted.
2322
+ */
2323
+ recordTokenExtraction(extracted) {
2324
+ if (extracted) {
2325
+ this.#extraction.extracted += 1;
2326
+ } else {
2327
+ this.#extraction.missing += 1;
2328
+ }
2329
+ }
2281
2330
  /** Accumulate token counts for a model from one upstream completion. */
2282
2331
  recordTokens(model, usage) {
2283
2332
  const name = this.#modelLabel(model);
@@ -2299,17 +2348,39 @@ var MetricsRegistry = class {
2299
2348
  recordCopilotQuota(usage) {
2300
2349
  this.#copilotQuota = usage;
2301
2350
  }
2302
- // Sanitize the model into a bounded, control-char-free label. The model can
2303
- // originate from a client request, so cap its length, strip characters that
2304
- // would corrupt the exposition format, and fold overflow past the cardinality
2305
- // limit into UNKNOWN_MODEL to keep the series count bounded.
2351
+ /**
2352
+ * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
2353
+ * A no-op when `rateLimit` is undefined (the response carried no rate-limit
2354
+ * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
2355
+ */
2356
+ recordGithubRateLimit(rateLimit) {
2357
+ if (!rateLimit) {
2358
+ return;
2359
+ }
2360
+ const resource = this.#rateLimitResource(rateLimit.resource);
2361
+ this.#githubRateLimit.set(resource, { ...rateLimit, resource });
2362
+ }
2363
+ // Sanitize the model into a bounded label. The model can originate from a
2364
+ // client request, so cap its length, strip characters that would corrupt the
2365
+ // exposition format, and fold overflow past the cardinality limit into
2366
+ // UNKNOWN_MODEL to keep the series count bounded.
2306
2367
  #modelLabel(model) {
2307
- const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2368
+ const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2308
2369
  if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
2309
2370
  return UNKNOWN_MODEL;
2310
2371
  }
2311
2372
  return cleaned;
2312
2373
  }
2374
+ // The resource comes from a trusted upstream header, but clean and bound it
2375
+ // with the same discipline as model labels: strip control characters that
2376
+ // would corrupt the exposition format and fold overflow into "unknown".
2377
+ #rateLimitResource(resource) {
2378
+ const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2379
+ if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
2380
+ return UNKNOWN_MODEL;
2381
+ }
2382
+ return cleaned;
2383
+ }
2313
2384
  #observeDuration(route, seconds) {
2314
2385
  const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
2315
2386
  const entry = this.#durations.get(route) ?? {
@@ -2354,11 +2425,16 @@ var MetricsRegistry = class {
2354
2425
  upstreamErrors += count;
2355
2426
  }
2356
2427
  }
2428
+ const githubRateLimit = {};
2429
+ for (const [resource, rateLimit] of this.#githubRateLimit) {
2430
+ githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
2431
+ }
2357
2432
  return {
2433
+ githubRateLimit,
2358
2434
  inFlight: this.#inFlight,
2359
2435
  requests: { byRoute, byStatus, total: requestsTotal },
2360
2436
  startedAt: new Date(this.#startedAtMs).toISOString(),
2361
- tokens: { byModel, ...tokenTotals },
2437
+ tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
2362
2438
  upstream: { errors: upstreamErrors, total: upstreamTotal },
2363
2439
  uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
2364
2440
  };
@@ -2408,6 +2484,16 @@ var MetricsRegistry = class {
2408
2484
  for (const [model, totals] of this.#tokens) {
2409
2485
  lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
2410
2486
  }
2487
+ lines.push(
2488
+ "# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
2489
+ );
2490
+ lines.push("# TYPE hoopilot_token_extraction_total counter");
2491
+ lines.push(
2492
+ `hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
2493
+ );
2494
+ lines.push(
2495
+ `hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
2496
+ );
2411
2497
  lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
2412
2498
  lines.push("# TYPE hoopilot_request_duration_seconds histogram");
2413
2499
  for (const [route, entry] of this.#durations) {
@@ -2425,10 +2511,43 @@ var MetricsRegistry = class {
2425
2511
  lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
2426
2512
  lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
2427
2513
  }
2514
+ this.#renderGithubRateLimit(lines);
2428
2515
  this.#renderCopilotQuota(lines);
2429
2516
  return `${lines.join("\n")}
2430
2517
  `;
2431
2518
  }
2519
+ #renderGithubRateLimit(lines) {
2520
+ const entries = [...this.#githubRateLimit.values()];
2521
+ if (entries.length === 0) {
2522
+ return;
2523
+ }
2524
+ const gauge = (suffix, help, pick) => {
2525
+ const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
2526
+ if (present.length === 0) {
2527
+ return;
2528
+ }
2529
+ lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
2530
+ lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
2531
+ for (const rateLimit of present) {
2532
+ lines.push(
2533
+ `hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
2534
+ );
2535
+ }
2536
+ };
2537
+ gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
2538
+ gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
2539
+ gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
2540
+ gauge(
2541
+ "reset_timestamp_seconds",
2542
+ "Unix epoch when the GitHub REST API window resets.",
2543
+ (r) => r.resetEpochSeconds
2544
+ );
2545
+ gauge(
2546
+ "retry_after_seconds",
2547
+ "Seconds to wait after a GitHub secondary-limit response.",
2548
+ (r) => r.retryAfterSeconds
2549
+ );
2550
+ }
2432
2551
  #renderCopilotQuota(lines) {
2433
2552
  const usage = this.#copilotQuota;
2434
2553
  if (!usage) {
@@ -2530,23 +2649,25 @@ var MetricsRegistry = class {
2530
2649
  }
2531
2650
  }
2532
2651
  };
2533
- function observeResponseUsage(response, fallbackModel, onUsage, signal) {
2652
+ function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
2534
2653
  const body = response.body;
2535
2654
  if (!body) {
2536
2655
  return response;
2537
2656
  }
2538
2657
  const [clientBranch, observerBranch] = body.tee();
2539
2658
  const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
2540
- void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
2541
- });
2659
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
2660
+ () => {
2661
+ }
2662
+ );
2542
2663
  return new Response(clientBranch, {
2543
2664
  headers: response.headers,
2544
2665
  status: response.status,
2545
2666
  statusText: response.statusText
2546
2667
  });
2547
2668
  }
2548
- function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2549
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2669
+ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
2670
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
2550
2671
  if (isSse) {
2551
2672
  for (const line of text.split(/\r?\n/)) {
2552
2673
  considerSseLine(line, accumulator.consider);
@@ -2559,7 +2680,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2559
2680
  }
2560
2681
  accumulator.finish();
2561
2682
  }
2562
- async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2683
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2563
2684
  const reader = stream.getReader();
2564
2685
  const onAbort = () => {
2565
2686
  reader.cancel().catch(() => {
@@ -2572,7 +2693,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2572
2693
  signal?.addEventListener("abort", onAbort, { once: true });
2573
2694
  }
2574
2695
  const decoder = new TextDecoder();
2575
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2696
+ const guardedOutcome = onOutcome ? (extracted) => {
2697
+ if (!signal?.aborted) {
2698
+ onOutcome(extracted);
2699
+ }
2700
+ } : void 0;
2701
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
2576
2702
  let buffer = "";
2577
2703
  let bufferedBytes = 0;
2578
2704
  let overflowed = false;
@@ -2620,7 +2746,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2620
2746
  }
2621
2747
  accumulator.finish();
2622
2748
  }
2623
- function createUsageAccumulator(fallbackModel, onUsage) {
2749
+ function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
2624
2750
  let model = fallbackModel;
2625
2751
  let usage;
2626
2752
  return {
@@ -2639,6 +2765,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
2639
2765
  if (usage) {
2640
2766
  onUsage(model, usage);
2641
2767
  }
2768
+ onOutcome?.(usage !== void 0);
2642
2769
  }
2643
2770
  };
2644
2771
  }
@@ -2669,6 +2796,37 @@ function modelText(value) {
2669
2796
  function nonNegative(value) {
2670
2797
  return Number.isFinite(value) && value > 0 ? value : 0;
2671
2798
  }
2799
+ function cleanLabel(value) {
2800
+ let result = "";
2801
+ for (const char of value) {
2802
+ const code = char.charCodeAt(0);
2803
+ if (code > 31 && code !== 127) {
2804
+ result += char;
2805
+ }
2806
+ }
2807
+ return result.trim();
2808
+ }
2809
+ function toRateLimitSnapshot(rateLimit) {
2810
+ const snapshot = {
2811
+ observedAt: new Date(rateLimit.observedAtMs).toISOString()
2812
+ };
2813
+ if (rateLimit.limit !== void 0) {
2814
+ snapshot.limit = rateLimit.limit;
2815
+ }
2816
+ if (rateLimit.remaining !== void 0) {
2817
+ snapshot.remaining = rateLimit.remaining;
2818
+ }
2819
+ if (rateLimit.used !== void 0) {
2820
+ snapshot.used = rateLimit.used;
2821
+ }
2822
+ if (rateLimit.resetEpochSeconds !== void 0) {
2823
+ snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
2824
+ }
2825
+ if (rateLimit.retryAfterSeconds !== void 0) {
2826
+ snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
2827
+ }
2828
+ return snapshot;
2829
+ }
2672
2830
  function labelKey(...parts) {
2673
2831
  return parts.join(LABEL_SEPARATOR);
2674
2832
  }
@@ -2716,6 +2874,7 @@ function createHoopilotHandler(options = {}) {
2716
2874
  const metrics = options.metrics ?? new MetricsRegistry();
2717
2875
  const readUsage = createUsageReader(client, metrics);
2718
2876
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
2877
+ const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
2719
2878
  const streamingProxyMode = resolveStreamingProxyMode(options);
2720
2879
  const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
2721
2880
  return async (request) => {
@@ -2781,6 +2940,7 @@ function createHoopilotHandler(options = {}) {
2781
2940
  client,
2782
2941
  metrics,
2783
2942
  recordTokens,
2943
+ recordExtraction,
2784
2944
  request,
2785
2945
  requestLogger,
2786
2946
  bufferProxyBodies
@@ -2796,6 +2956,7 @@ function createHoopilotHandler(options = {}) {
2796
2956
  client,
2797
2957
  metrics,
2798
2958
  recordTokens,
2959
+ recordExtraction,
2799
2960
  request,
2800
2961
  requestLogger,
2801
2962
  bufferProxyBodies
@@ -2808,6 +2969,7 @@ function createHoopilotHandler(options = {}) {
2808
2969
  client,
2809
2970
  metrics,
2810
2971
  recordTokens,
2972
+ recordExtraction,
2811
2973
  request,
2812
2974
  requestLogger,
2813
2975
  bufferProxyBodies
@@ -2816,7 +2978,14 @@ function createHoopilotHandler(options = {}) {
2816
2978
  }
2817
2979
  if (request.method === "POST" && apiPath === "/v1/responses/compact") {
2818
2980
  return finish(
2819
- await handleResponsesCompact(client, metrics, recordTokens, request, requestLogger)
2981
+ await handleResponsesCompact(
2982
+ client,
2983
+ metrics,
2984
+ recordTokens,
2985
+ recordExtraction,
2986
+ request,
2987
+ requestLogger
2988
+ )
2820
2989
  );
2821
2990
  }
2822
2991
  if (request.method === "POST" && apiPath === "/v1/responses") {
@@ -2825,6 +2994,7 @@ function createHoopilotHandler(options = {}) {
2825
2994
  client,
2826
2995
  metrics,
2827
2996
  recordTokens,
2997
+ recordExtraction,
2828
2998
  request,
2829
2999
  requestLogger,
2830
3000
  bufferProxyBodies
@@ -2901,7 +3071,7 @@ function startHoopilotServer(options = {}) {
2901
3071
  url: `http://${urlHost(host)}:${server.port}`
2902
3072
  };
2903
3073
  }
2904
- async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3074
+ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2905
3075
  const anthropicRequest = await readJson(request);
2906
3076
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
2907
3077
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -2914,12 +3084,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2914
3084
  if (isStreamingResponse(upstream) && upstream.body) {
2915
3085
  if (bufferProxyBodies) {
2916
3086
  const text = await upstream.text();
2917
- recordResponseTextUsage(text, true, model, recordTokens);
3087
+ recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
2918
3088
  return proxyResponse(
2919
3089
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
2920
3090
  );
2921
3091
  }
2922
- const observed = observeResponseUsage(upstream, model, recordTokens, request.signal);
3092
+ const observed = observeResponseUsage(
3093
+ upstream,
3094
+ model,
3095
+ recordTokens,
3096
+ request.signal,
3097
+ recordExtraction
3098
+ );
2923
3099
  if (!observed.body) {
2924
3100
  return proxyResponse(observed);
2925
3101
  }
@@ -2937,6 +3113,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2937
3113
  const responseModel = typeof body.model === "string" ? body.model.trim() : "";
2938
3114
  recordTokens(responseModel || model, usage);
2939
3115
  }
3116
+ recordExtraction(usage !== void 0);
2940
3117
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
2941
3118
  }
2942
3119
  function handleAnthropicCountTokens(body) {
@@ -2962,7 +3139,7 @@ async function handleModels(client, metrics, signal, logger) {
2962
3139
  logUpstreamSuccess(logger, "/models", upstream.status);
2963
3140
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
2964
3141
  }
2965
- async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3142
+ async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2966
3143
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
2967
3144
  const upstream = await client.chatCompletions(chatRequest, request.signal);
2968
3145
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -2977,11 +3154,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
2977
3154
  model,
2978
3155
  recordTokens,
2979
3156
  request.signal,
2980
- bufferProxyBodies
3157
+ bufferProxyBodies,
3158
+ recordExtraction
2981
3159
  )
2982
3160
  );
2983
3161
  }
2984
- async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3162
+ async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2985
3163
  const body = await readJson(request);
2986
3164
  const upstream = await client.chatCompletions(
2987
3165
  completionsRequestToChatCompletion(body),
@@ -2996,7 +3174,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2996
3174
  if (isStreamingResponse(upstream) && upstream.body) {
2997
3175
  if (bufferProxyBodies) {
2998
3176
  const upstreamText = await upstream.text();
2999
- recordResponseTextUsage(upstreamText, true, model, recordTokens);
3177
+ recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
3000
3178
  const text = completionSseTextFromChatSseText(upstreamText);
3001
3179
  return proxyResponse(responseFromText(upstream, text));
3002
3180
  }
@@ -3009,7 +3187,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3009
3187
  }),
3010
3188
  model,
3011
3189
  recordTokens,
3012
- request.signal
3190
+ request.signal,
3191
+ recordExtraction
3013
3192
  )
3014
3193
  );
3015
3194
  }
@@ -3019,9 +3198,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
3019
3198
  const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
3020
3199
  recordTokens(responseModel || model, usage);
3021
3200
  }
3201
+ recordExtraction(usage !== void 0);
3022
3202
  return jsonResponse(chatCompletionToCompletion(completion));
3023
3203
  }
3024
- async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3204
+ async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
3025
3205
  const body = await readJsonText(request);
3026
3206
  const upstream = await client.responses(body, request.signal);
3027
3207
  metrics.recordUpstream("/responses", upstream.ok);
@@ -3036,11 +3216,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
3036
3216
  model,
3037
3217
  recordTokens,
3038
3218
  request.signal,
3039
- bufferProxyBodies
3219
+ bufferProxyBodies,
3220
+ recordExtraction
3040
3221
  )
3041
3222
  );
3042
3223
  }
3043
- async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
3224
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
3044
3225
  const body = await readJson(request);
3045
3226
  const upstream = await client.responses(
3046
3227
  JSON.stringify({ ...body, stream: false }),
@@ -3053,17 +3234,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
3053
3234
  logUpstreamSuccess(logger, "/responses", upstream.status);
3054
3235
  const isSse = isStreamingResponse(upstream);
3055
3236
  const text = await upstream.text();
3056
- recordResponseTextUsage(text, isSse, normalizeRequestedModel(body.model), recordTokens);
3237
+ recordResponseTextUsage(
3238
+ text,
3239
+ isSse,
3240
+ normalizeRequestedModel(body.model),
3241
+ recordTokens,
3242
+ recordExtraction
3243
+ );
3057
3244
  return jsonResponse(responsesCompactionResult(text, isSse));
3058
3245
  }
3059
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
3246
+ async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
3060
3247
  const isSse = isStreamingResponse(response);
3061
3248
  if (bufferBody && response.body) {
3062
3249
  const text = await response.text();
3063
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
3250
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
3064
3251
  return responseFromText(response, text);
3065
3252
  }
3066
- return observeResponseUsage(response, fallbackModel, recordTokens, signal);
3253
+ return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
3067
3254
  }
3068
3255
  function responseFromText(source, text) {
3069
3256
  return new Response(text, {
@@ -3492,6 +3679,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
3492
3679
  try {
3493
3680
  const upstream = await client.usage(signal);
3494
3681
  metrics.recordUpstream(usagePath, upstream.ok);
3682
+ metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
3495
3683
  if (!upstream.ok) {
3496
3684
  return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
3497
3685
  }
@@ -3550,6 +3738,7 @@ function safeParseJson(text) {
3550
3738
  observeResponseUsage,
3551
3739
  parseLogFormat,
3552
3740
  parseLogLevel,
3741
+ parseRateLimitHeaders,
3553
3742
  readStoredCopilotAuth,
3554
3743
  responsesCompactionResult,
3555
3744
  responsesRequestToChatCompletion,