@openhoo/hoopilot 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1757,6 +1757,38 @@ function applyGithubApiHeaders(headers, token) {
1757
1757
  headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
1758
1758
  return headers;
1759
1759
  }
1760
+ function parseRateLimitHeaders(headers, nowMs = Date.now()) {
1761
+ const limit = headerInt(headers, "x-ratelimit-limit");
1762
+ const remaining = headerInt(headers, "x-ratelimit-remaining");
1763
+ const used = headerInt(headers, "x-ratelimit-used");
1764
+ const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
1765
+ const retryAfterSeconds = headerInt(headers, "retry-after");
1766
+ if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
1767
+ return void 0;
1768
+ }
1769
+ return removeUndefinedRateLimit({
1770
+ limit,
1771
+ observedAtMs: nowMs,
1772
+ remaining,
1773
+ resetEpochSeconds,
1774
+ resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
1775
+ retryAfterSeconds,
1776
+ used
1777
+ });
1778
+ }
1779
+ function headerInt(headers, name) {
1780
+ const raw = headers.get(name);
1781
+ if (raw === null) {
1782
+ return void 0;
1783
+ }
1784
+ const value = Number.parseInt(raw.trim(), 10);
1785
+ return Number.isFinite(value) && value >= 0 ? value : void 0;
1786
+ }
1787
+ function removeUndefinedRateLimit(rateLimit) {
1788
+ return Object.fromEntries(
1789
+ Object.entries(rateLimit).filter(([, value]) => value !== void 0)
1790
+ );
1791
+ }
1760
1792
  var CopilotClient = class {
1761
1793
  #auth;
1762
1794
  #allowUnsafeUpstream;
@@ -2173,6 +2205,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
2173
2205
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
2174
2206
  var MAX_TRACKED_MODELS = 200;
2175
2207
  var MAX_MODEL_LABEL_LENGTH = 200;
2208
+ var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
2176
2209
  var LABEL_SEPARATOR = "";
2177
2210
  var UNKNOWN_MODEL = "unknown";
2178
2211
  function emptyModelTotals() {
@@ -2186,6 +2219,8 @@ var MetricsRegistry = class {
2186
2219
  #tokens = /* @__PURE__ */ new Map();
2187
2220
  #upstream = /* @__PURE__ */ new Map();
2188
2221
  #copilotQuota;
2222
+ #githubRateLimit = /* @__PURE__ */ new Map();
2223
+ #extraction = { extracted: 0, missing: 0 };
2189
2224
  constructor(options = {}) {
2190
2225
  this.#startedAtMs = (options.now ?? Date.now)();
2191
2226
  }
@@ -2202,6 +2237,19 @@ var MetricsRegistry = class {
2202
2237
  this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
2203
2238
  this.#observeDuration(observation.route, observation.durationMs / 1e3);
2204
2239
  }
2240
+ /**
2241
+ * Record whether one upstream completion reported token usage. `missing`
2242
+ * counts responses that carried no usage object — most often streamed Chat
2243
+ * Completions sent without `stream_options: {"include_usage": true}` — so a
2244
+ * rising miss rate flags clients whose token usage is going unaccounted.
2245
+ */
2246
+ recordTokenExtraction(extracted) {
2247
+ if (extracted) {
2248
+ this.#extraction.extracted += 1;
2249
+ } else {
2250
+ this.#extraction.missing += 1;
2251
+ }
2252
+ }
2205
2253
  /** Accumulate token counts for a model from one upstream completion. */
2206
2254
  recordTokens(model, usage) {
2207
2255
  const name = this.#modelLabel(model);
@@ -2223,17 +2271,39 @@ var MetricsRegistry = class {
2223
2271
  recordCopilotQuota(usage) {
2224
2272
  this.#copilotQuota = usage;
2225
2273
  }
2226
- // Sanitize the model into a bounded, control-char-free label. The model can
2227
- // originate from a client request, so cap its length, strip characters that
2228
- // would corrupt the exposition format, and fold overflow past the cardinality
2229
- // limit into UNKNOWN_MODEL to keep the series count bounded.
2274
+ /**
2275
+ * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
2276
+ * A no-op when `rateLimit` is undefined (the response carried no rate-limit
2277
+ * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
2278
+ */
2279
+ recordGithubRateLimit(rateLimit) {
2280
+ if (!rateLimit) {
2281
+ return;
2282
+ }
2283
+ const resource = this.#rateLimitResource(rateLimit.resource);
2284
+ this.#githubRateLimit.set(resource, { ...rateLimit, resource });
2285
+ }
2286
+ // Sanitize the model into a bounded label. The model can originate from a
2287
+ // client request, so cap its length, strip characters that would corrupt the
2288
+ // exposition format, and fold overflow past the cardinality limit into
2289
+ // UNKNOWN_MODEL to keep the series count bounded.
2230
2290
  #modelLabel(model) {
2231
- const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2291
+ const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2232
2292
  if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
2233
2293
  return UNKNOWN_MODEL;
2234
2294
  }
2235
2295
  return cleaned;
2236
2296
  }
2297
+ // The resource comes from a trusted upstream header, but clean and bound it
2298
+ // with the same discipline as model labels: strip control characters that
2299
+ // would corrupt the exposition format and fold overflow into "unknown".
2300
+ #rateLimitResource(resource) {
2301
+ const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2302
+ if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
2303
+ return UNKNOWN_MODEL;
2304
+ }
2305
+ return cleaned;
2306
+ }
2237
2307
  #observeDuration(route, seconds) {
2238
2308
  const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
2239
2309
  const entry = this.#durations.get(route) ?? {
@@ -2278,11 +2348,16 @@ var MetricsRegistry = class {
2278
2348
  upstreamErrors += count;
2279
2349
  }
2280
2350
  }
2351
+ const githubRateLimit = {};
2352
+ for (const [resource, rateLimit] of this.#githubRateLimit) {
2353
+ githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
2354
+ }
2281
2355
  return {
2356
+ githubRateLimit,
2282
2357
  inFlight: this.#inFlight,
2283
2358
  requests: { byRoute, byStatus, total: requestsTotal },
2284
2359
  startedAt: new Date(this.#startedAtMs).toISOString(),
2285
- tokens: { byModel, ...tokenTotals },
2360
+ tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
2286
2361
  upstream: { errors: upstreamErrors, total: upstreamTotal },
2287
2362
  uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
2288
2363
  };
@@ -2332,6 +2407,16 @@ var MetricsRegistry = class {
2332
2407
  for (const [model, totals] of this.#tokens) {
2333
2408
  lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
2334
2409
  }
2410
+ lines.push(
2411
+ "# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
2412
+ );
2413
+ lines.push("# TYPE hoopilot_token_extraction_total counter");
2414
+ lines.push(
2415
+ `hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
2416
+ );
2417
+ lines.push(
2418
+ `hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
2419
+ );
2335
2420
  lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
2336
2421
  lines.push("# TYPE hoopilot_request_duration_seconds histogram");
2337
2422
  for (const [route, entry] of this.#durations) {
@@ -2349,10 +2434,43 @@ var MetricsRegistry = class {
2349
2434
  lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
2350
2435
  lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
2351
2436
  }
2437
+ this.#renderGithubRateLimit(lines);
2352
2438
  this.#renderCopilotQuota(lines);
2353
2439
  return `${lines.join("\n")}
2354
2440
  `;
2355
2441
  }
2442
+ #renderGithubRateLimit(lines) {
2443
+ const entries = [...this.#githubRateLimit.values()];
2444
+ if (entries.length === 0) {
2445
+ return;
2446
+ }
2447
+ const gauge = (suffix, help, pick) => {
2448
+ const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
2449
+ if (present.length === 0) {
2450
+ return;
2451
+ }
2452
+ lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
2453
+ lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
2454
+ for (const rateLimit of present) {
2455
+ lines.push(
2456
+ `hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
2457
+ );
2458
+ }
2459
+ };
2460
+ gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
2461
+ gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
2462
+ gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
2463
+ gauge(
2464
+ "reset_timestamp_seconds",
2465
+ "Unix epoch when the GitHub REST API window resets.",
2466
+ (r) => r.resetEpochSeconds
2467
+ );
2468
+ gauge(
2469
+ "retry_after_seconds",
2470
+ "Seconds to wait after a GitHub secondary-limit response.",
2471
+ (r) => r.retryAfterSeconds
2472
+ );
2473
+ }
2356
2474
  #renderCopilotQuota(lines) {
2357
2475
  const usage = this.#copilotQuota;
2358
2476
  if (!usage) {
@@ -2454,23 +2572,25 @@ var MetricsRegistry = class {
2454
2572
  }
2455
2573
  }
2456
2574
  };
2457
- function observeResponseUsage(response, fallbackModel, onUsage, signal) {
2575
+ function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
2458
2576
  const body = response.body;
2459
2577
  if (!body) {
2460
2578
  return response;
2461
2579
  }
2462
2580
  const [clientBranch, observerBranch] = body.tee();
2463
2581
  const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
2464
- void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
2465
- });
2582
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
2583
+ () => {
2584
+ }
2585
+ );
2466
2586
  return new Response(clientBranch, {
2467
2587
  headers: response.headers,
2468
2588
  status: response.status,
2469
2589
  statusText: response.statusText
2470
2590
  });
2471
2591
  }
2472
- function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2473
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2592
+ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
2593
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
2474
2594
  if (isSse) {
2475
2595
  for (const line of text.split(/\r?\n/)) {
2476
2596
  considerSseLine(line, accumulator.consider);
@@ -2483,7 +2603,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
2483
2603
  }
2484
2604
  accumulator.finish();
2485
2605
  }
2486
- async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2606
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2487
2607
  const reader = stream.getReader();
2488
2608
  const onAbort = () => {
2489
2609
  reader.cancel().catch(() => {
@@ -2496,7 +2616,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2496
2616
  signal?.addEventListener("abort", onAbort, { once: true });
2497
2617
  }
2498
2618
  const decoder = new TextDecoder();
2499
- const accumulator = createUsageAccumulator(fallbackModel, onUsage);
2619
+ const guardedOutcome = onOutcome ? (extracted) => {
2620
+ if (!signal?.aborted) {
2621
+ onOutcome(extracted);
2622
+ }
2623
+ } : void 0;
2624
+ const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
2500
2625
  let buffer = "";
2501
2626
  let bufferedBytes = 0;
2502
2627
  let overflowed = false;
@@ -2544,7 +2669,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
2544
2669
  }
2545
2670
  accumulator.finish();
2546
2671
  }
2547
- function createUsageAccumulator(fallbackModel, onUsage) {
2672
+ function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
2548
2673
  let model = fallbackModel;
2549
2674
  let usage;
2550
2675
  return {
@@ -2563,6 +2688,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
2563
2688
  if (usage) {
2564
2689
  onUsage(model, usage);
2565
2690
  }
2691
+ onOutcome?.(usage !== void 0);
2566
2692
  }
2567
2693
  };
2568
2694
  }
@@ -2593,6 +2719,37 @@ function modelText(value) {
2593
2719
  function nonNegative(value) {
2594
2720
  return Number.isFinite(value) && value > 0 ? value : 0;
2595
2721
  }
2722
+ function cleanLabel(value) {
2723
+ let result = "";
2724
+ for (const char of value) {
2725
+ const code = char.charCodeAt(0);
2726
+ if (code > 31 && code !== 127) {
2727
+ result += char;
2728
+ }
2729
+ }
2730
+ return result.trim();
2731
+ }
2732
+ function toRateLimitSnapshot(rateLimit) {
2733
+ const snapshot = {
2734
+ observedAt: new Date(rateLimit.observedAtMs).toISOString()
2735
+ };
2736
+ if (rateLimit.limit !== void 0) {
2737
+ snapshot.limit = rateLimit.limit;
2738
+ }
2739
+ if (rateLimit.remaining !== void 0) {
2740
+ snapshot.remaining = rateLimit.remaining;
2741
+ }
2742
+ if (rateLimit.used !== void 0) {
2743
+ snapshot.used = rateLimit.used;
2744
+ }
2745
+ if (rateLimit.resetEpochSeconds !== void 0) {
2746
+ snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
2747
+ }
2748
+ if (rateLimit.retryAfterSeconds !== void 0) {
2749
+ snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
2750
+ }
2751
+ return snapshot;
2752
+ }
2596
2753
  function labelKey(...parts) {
2597
2754
  return parts.join(LABEL_SEPARATOR);
2598
2755
  }
@@ -2640,6 +2797,7 @@ function createHoopilotHandler(options = {}) {
2640
2797
  const metrics = options.metrics ?? new MetricsRegistry();
2641
2798
  const readUsage = createUsageReader(client, metrics);
2642
2799
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
2800
+ const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
2643
2801
  const streamingProxyMode = resolveStreamingProxyMode(options);
2644
2802
  const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
2645
2803
  return async (request) => {
@@ -2705,6 +2863,7 @@ function createHoopilotHandler(options = {}) {
2705
2863
  client,
2706
2864
  metrics,
2707
2865
  recordTokens,
2866
+ recordExtraction,
2708
2867
  request,
2709
2868
  requestLogger,
2710
2869
  bufferProxyBodies
@@ -2720,6 +2879,7 @@ function createHoopilotHandler(options = {}) {
2720
2879
  client,
2721
2880
  metrics,
2722
2881
  recordTokens,
2882
+ recordExtraction,
2723
2883
  request,
2724
2884
  requestLogger,
2725
2885
  bufferProxyBodies
@@ -2732,6 +2892,7 @@ function createHoopilotHandler(options = {}) {
2732
2892
  client,
2733
2893
  metrics,
2734
2894
  recordTokens,
2895
+ recordExtraction,
2735
2896
  request,
2736
2897
  requestLogger,
2737
2898
  bufferProxyBodies
@@ -2740,7 +2901,14 @@ function createHoopilotHandler(options = {}) {
2740
2901
  }
2741
2902
  if (request.method === "POST" && apiPath === "/v1/responses/compact") {
2742
2903
  return finish(
2743
- await handleResponsesCompact(client, metrics, recordTokens, request, requestLogger)
2904
+ await handleResponsesCompact(
2905
+ client,
2906
+ metrics,
2907
+ recordTokens,
2908
+ recordExtraction,
2909
+ request,
2910
+ requestLogger
2911
+ )
2744
2912
  );
2745
2913
  }
2746
2914
  if (request.method === "POST" && apiPath === "/v1/responses") {
@@ -2749,6 +2917,7 @@ function createHoopilotHandler(options = {}) {
2749
2917
  client,
2750
2918
  metrics,
2751
2919
  recordTokens,
2920
+ recordExtraction,
2752
2921
  request,
2753
2922
  requestLogger,
2754
2923
  bufferProxyBodies
@@ -2825,7 +2994,7 @@ function startHoopilotServer(options = {}) {
2825
2994
  url: `http://${urlHost(host)}:${server.port}`
2826
2995
  };
2827
2996
  }
2828
- async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
2997
+ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2829
2998
  const anthropicRequest = await readJson(request);
2830
2999
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
2831
3000
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -2838,12 +3007,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2838
3007
  if (isStreamingResponse(upstream) && upstream.body) {
2839
3008
  if (bufferProxyBodies) {
2840
3009
  const text = await upstream.text();
2841
- recordResponseTextUsage(text, true, model, recordTokens);
3010
+ recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
2842
3011
  return proxyResponse(
2843
3012
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
2844
3013
  );
2845
3014
  }
2846
- const observed = observeResponseUsage(upstream, model, recordTokens, request.signal);
3015
+ const observed = observeResponseUsage(
3016
+ upstream,
3017
+ model,
3018
+ recordTokens,
3019
+ request.signal,
3020
+ recordExtraction
3021
+ );
2847
3022
  if (!observed.body) {
2848
3023
  return proxyResponse(observed);
2849
3024
  }
@@ -2861,6 +3036,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
2861
3036
  const responseModel = typeof body.model === "string" ? body.model.trim() : "";
2862
3037
  recordTokens(responseModel || model, usage);
2863
3038
  }
3039
+ recordExtraction(usage !== void 0);
2864
3040
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
2865
3041
  }
2866
3042
  function handleAnthropicCountTokens(body) {
@@ -2886,7 +3062,7 @@ async function handleModels(client, metrics, signal, logger) {
2886
3062
  logUpstreamSuccess(logger, "/models", upstream.status);
2887
3063
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
2888
3064
  }
2889
- async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3065
+ async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2890
3066
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
2891
3067
  const upstream = await client.chatCompletions(chatRequest, request.signal);
2892
3068
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -2901,11 +3077,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
2901
3077
  model,
2902
3078
  recordTokens,
2903
3079
  request.signal,
2904
- bufferProxyBodies
3080
+ bufferProxyBodies,
3081
+ recordExtraction
2905
3082
  )
2906
3083
  );
2907
3084
  }
2908
- async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3085
+ async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2909
3086
  const body = await readJson(request);
2910
3087
  const upstream = await client.chatCompletions(
2911
3088
  completionsRequestToChatCompletion(body),
@@ -2920,7 +3097,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2920
3097
  if (isStreamingResponse(upstream) && upstream.body) {
2921
3098
  if (bufferProxyBodies) {
2922
3099
  const upstreamText = await upstream.text();
2923
- recordResponseTextUsage(upstreamText, true, model, recordTokens);
3100
+ recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
2924
3101
  const text = completionSseTextFromChatSseText(upstreamText);
2925
3102
  return proxyResponse(responseFromText(upstream, text));
2926
3103
  }
@@ -2933,7 +3110,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2933
3110
  }),
2934
3111
  model,
2935
3112
  recordTokens,
2936
- request.signal
3113
+ request.signal,
3114
+ recordExtraction
2937
3115
  )
2938
3116
  );
2939
3117
  }
@@ -2943,9 +3121,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
2943
3121
  const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
2944
3122
  recordTokens(responseModel || model, usage);
2945
3123
  }
3124
+ recordExtraction(usage !== void 0);
2946
3125
  return jsonResponse(chatCompletionToCompletion(completion));
2947
3126
  }
2948
- async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
3127
+ async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
2949
3128
  const body = await readJsonText(request);
2950
3129
  const upstream = await client.responses(body, request.signal);
2951
3130
  metrics.recordUpstream("/responses", upstream.ok);
@@ -2960,11 +3139,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
2960
3139
  model,
2961
3140
  recordTokens,
2962
3141
  request.signal,
2963
- bufferProxyBodies
3142
+ bufferProxyBodies,
3143
+ recordExtraction
2964
3144
  )
2965
3145
  );
2966
3146
  }
2967
- async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
3147
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
2968
3148
  const body = await readJson(request);
2969
3149
  const upstream = await client.responses(
2970
3150
  JSON.stringify({ ...body, stream: false }),
@@ -2977,17 +3157,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
2977
3157
  logUpstreamSuccess(logger, "/responses", upstream.status);
2978
3158
  const isSse = isStreamingResponse(upstream);
2979
3159
  const text = await upstream.text();
2980
- recordResponseTextUsage(text, isSse, normalizeRequestedModel(body.model), recordTokens);
3160
+ recordResponseTextUsage(
3161
+ text,
3162
+ isSse,
3163
+ normalizeRequestedModel(body.model),
3164
+ recordTokens,
3165
+ recordExtraction
3166
+ );
2981
3167
  return jsonResponse(responsesCompactionResult(text, isSse));
2982
3168
  }
2983
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
3169
+ async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
2984
3170
  const isSse = isStreamingResponse(response);
2985
3171
  if (bufferBody && response.body) {
2986
3172
  const text = await response.text();
2987
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
3173
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
2988
3174
  return responseFromText(response, text);
2989
3175
  }
2990
- return observeResponseUsage(response, fallbackModel, recordTokens, signal);
3176
+ return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
2991
3177
  }
2992
3178
  function responseFromText(source, text) {
2993
3179
  return new Response(text, {
@@ -3416,6 +3602,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
3416
3602
  try {
3417
3603
  const upstream = await client.usage(signal);
3418
3604
  metrics.recordUpstream(usagePath, upstream.ok);
3605
+ metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
3419
3606
  if (!upstream.ok) {
3420
3607
  return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
3421
3608
  }
@@ -3473,6 +3660,7 @@ export {
3473
3660
  observeResponseUsage,
3474
3661
  parseLogFormat,
3475
3662
  parseLogLevel,
3663
+ parseRateLimitHeaders,
3476
3664
  readStoredCopilotAuth,
3477
3665
  responsesCompactionResult,
3478
3666
  responsesRequestToChatCompletion,