@openhoo/hoopilot 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/dist/cli.js +240 -30
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +219 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +60 -2
- package/dist/index.d.ts +60 -2
- package/dist/index.js +218 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1757,6 +1757,38 @@ function applyGithubApiHeaders(headers, token) {
|
|
|
1757
1757
|
headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
|
|
1758
1758
|
return headers;
|
|
1759
1759
|
}
|
|
1760
|
+
function parseRateLimitHeaders(headers, nowMs = Date.now()) {
|
|
1761
|
+
const limit = headerInt(headers, "x-ratelimit-limit");
|
|
1762
|
+
const remaining = headerInt(headers, "x-ratelimit-remaining");
|
|
1763
|
+
const used = headerInt(headers, "x-ratelimit-used");
|
|
1764
|
+
const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
|
|
1765
|
+
const retryAfterSeconds = headerInt(headers, "retry-after");
|
|
1766
|
+
if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
|
|
1767
|
+
return void 0;
|
|
1768
|
+
}
|
|
1769
|
+
return removeUndefinedRateLimit({
|
|
1770
|
+
limit,
|
|
1771
|
+
observedAtMs: nowMs,
|
|
1772
|
+
remaining,
|
|
1773
|
+
resetEpochSeconds,
|
|
1774
|
+
resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
|
|
1775
|
+
retryAfterSeconds,
|
|
1776
|
+
used
|
|
1777
|
+
});
|
|
1778
|
+
}
|
|
1779
|
+
function headerInt(headers, name) {
|
|
1780
|
+
const raw = headers.get(name);
|
|
1781
|
+
if (raw === null) {
|
|
1782
|
+
return void 0;
|
|
1783
|
+
}
|
|
1784
|
+
const value = Number.parseInt(raw.trim(), 10);
|
|
1785
|
+
return Number.isFinite(value) && value >= 0 ? value : void 0;
|
|
1786
|
+
}
|
|
1787
|
+
function removeUndefinedRateLimit(rateLimit) {
|
|
1788
|
+
return Object.fromEntries(
|
|
1789
|
+
Object.entries(rateLimit).filter(([, value]) => value !== void 0)
|
|
1790
|
+
);
|
|
1791
|
+
}
|
|
1760
1792
|
var CopilotClient = class {
|
|
1761
1793
|
#auth;
|
|
1762
1794
|
#allowUnsafeUpstream;
|
|
@@ -2173,6 +2205,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
|
2173
2205
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
2174
2206
|
var MAX_TRACKED_MODELS = 200;
|
|
2175
2207
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
2208
|
+
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
2176
2209
|
var LABEL_SEPARATOR = "";
|
|
2177
2210
|
var UNKNOWN_MODEL = "unknown";
|
|
2178
2211
|
function emptyModelTotals() {
|
|
@@ -2186,6 +2219,8 @@ var MetricsRegistry = class {
|
|
|
2186
2219
|
#tokens = /* @__PURE__ */ new Map();
|
|
2187
2220
|
#upstream = /* @__PURE__ */ new Map();
|
|
2188
2221
|
#copilotQuota;
|
|
2222
|
+
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
2223
|
+
#extraction = { extracted: 0, missing: 0 };
|
|
2189
2224
|
constructor(options = {}) {
|
|
2190
2225
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
2191
2226
|
}
|
|
@@ -2202,6 +2237,19 @@ var MetricsRegistry = class {
|
|
|
2202
2237
|
this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
|
|
2203
2238
|
this.#observeDuration(observation.route, observation.durationMs / 1e3);
|
|
2204
2239
|
}
|
|
2240
|
+
/**
|
|
2241
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
2242
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
2243
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
2244
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
2245
|
+
*/
|
|
2246
|
+
recordTokenExtraction(extracted) {
|
|
2247
|
+
if (extracted) {
|
|
2248
|
+
this.#extraction.extracted += 1;
|
|
2249
|
+
} else {
|
|
2250
|
+
this.#extraction.missing += 1;
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2205
2253
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
2206
2254
|
recordTokens(model, usage) {
|
|
2207
2255
|
const name = this.#modelLabel(model);
|
|
@@ -2223,17 +2271,39 @@ var MetricsRegistry = class {
|
|
|
2223
2271
|
recordCopilotQuota(usage) {
|
|
2224
2272
|
this.#copilotQuota = usage;
|
|
2225
2273
|
}
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2274
|
+
/**
|
|
2275
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
2276
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
2277
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
2278
|
+
*/
|
|
2279
|
+
recordGithubRateLimit(rateLimit) {
|
|
2280
|
+
if (!rateLimit) {
|
|
2281
|
+
return;
|
|
2282
|
+
}
|
|
2283
|
+
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
2284
|
+
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
2285
|
+
}
|
|
2286
|
+
// Sanitize the model into a bounded label. The model can originate from a
|
|
2287
|
+
// client request, so cap its length, strip characters that would corrupt the
|
|
2288
|
+
// exposition format, and fold overflow past the cardinality limit into
|
|
2289
|
+
// UNKNOWN_MODEL to keep the series count bounded.
|
|
2230
2290
|
#modelLabel(model) {
|
|
2231
|
-
const cleaned = model
|
|
2291
|
+
const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
2232
2292
|
if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
|
|
2233
2293
|
return UNKNOWN_MODEL;
|
|
2234
2294
|
}
|
|
2235
2295
|
return cleaned;
|
|
2236
2296
|
}
|
|
2297
|
+
// The resource comes from a trusted upstream header, but clean and bound it
|
|
2298
|
+
// with the same discipline as model labels: strip control characters that
|
|
2299
|
+
// would corrupt the exposition format and fold overflow into "unknown".
|
|
2300
|
+
#rateLimitResource(resource) {
|
|
2301
|
+
const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
2302
|
+
if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
|
|
2303
|
+
return UNKNOWN_MODEL;
|
|
2304
|
+
}
|
|
2305
|
+
return cleaned;
|
|
2306
|
+
}
|
|
2237
2307
|
#observeDuration(route, seconds) {
|
|
2238
2308
|
const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
|
|
2239
2309
|
const entry = this.#durations.get(route) ?? {
|
|
@@ -2278,11 +2348,16 @@ var MetricsRegistry = class {
|
|
|
2278
2348
|
upstreamErrors += count;
|
|
2279
2349
|
}
|
|
2280
2350
|
}
|
|
2351
|
+
const githubRateLimit = {};
|
|
2352
|
+
for (const [resource, rateLimit] of this.#githubRateLimit) {
|
|
2353
|
+
githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
|
|
2354
|
+
}
|
|
2281
2355
|
return {
|
|
2356
|
+
githubRateLimit,
|
|
2282
2357
|
inFlight: this.#inFlight,
|
|
2283
2358
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
2284
2359
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
2285
|
-
tokens: { byModel, ...tokenTotals },
|
|
2360
|
+
tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
|
|
2286
2361
|
upstream: { errors: upstreamErrors, total: upstreamTotal },
|
|
2287
2362
|
uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
|
|
2288
2363
|
};
|
|
@@ -2332,6 +2407,16 @@ var MetricsRegistry = class {
|
|
|
2332
2407
|
for (const [model, totals] of this.#tokens) {
|
|
2333
2408
|
lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
|
|
2334
2409
|
}
|
|
2410
|
+
lines.push(
|
|
2411
|
+
"# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
|
|
2412
|
+
);
|
|
2413
|
+
lines.push("# TYPE hoopilot_token_extraction_total counter");
|
|
2414
|
+
lines.push(
|
|
2415
|
+
`hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
|
|
2416
|
+
);
|
|
2417
|
+
lines.push(
|
|
2418
|
+
`hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
|
|
2419
|
+
);
|
|
2335
2420
|
lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
|
|
2336
2421
|
lines.push("# TYPE hoopilot_request_duration_seconds histogram");
|
|
2337
2422
|
for (const [route, entry] of this.#durations) {
|
|
@@ -2349,10 +2434,43 @@ var MetricsRegistry = class {
|
|
|
2349
2434
|
lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
|
|
2350
2435
|
lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
|
|
2351
2436
|
}
|
|
2437
|
+
this.#renderGithubRateLimit(lines);
|
|
2352
2438
|
this.#renderCopilotQuota(lines);
|
|
2353
2439
|
return `${lines.join("\n")}
|
|
2354
2440
|
`;
|
|
2355
2441
|
}
|
|
2442
|
+
#renderGithubRateLimit(lines) {
|
|
2443
|
+
const entries = [...this.#githubRateLimit.values()];
|
|
2444
|
+
if (entries.length === 0) {
|
|
2445
|
+
return;
|
|
2446
|
+
}
|
|
2447
|
+
const gauge = (suffix, help, pick) => {
|
|
2448
|
+
const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
|
|
2449
|
+
if (present.length === 0) {
|
|
2450
|
+
return;
|
|
2451
|
+
}
|
|
2452
|
+
lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
|
|
2453
|
+
lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
|
|
2454
|
+
for (const rateLimit of present) {
|
|
2455
|
+
lines.push(
|
|
2456
|
+
`hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
|
|
2457
|
+
);
|
|
2458
|
+
}
|
|
2459
|
+
};
|
|
2460
|
+
gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
|
|
2461
|
+
gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
|
|
2462
|
+
gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
|
|
2463
|
+
gauge(
|
|
2464
|
+
"reset_timestamp_seconds",
|
|
2465
|
+
"Unix epoch when the GitHub REST API window resets.",
|
|
2466
|
+
(r) => r.resetEpochSeconds
|
|
2467
|
+
);
|
|
2468
|
+
gauge(
|
|
2469
|
+
"retry_after_seconds",
|
|
2470
|
+
"Seconds to wait after a GitHub secondary-limit response.",
|
|
2471
|
+
(r) => r.retryAfterSeconds
|
|
2472
|
+
);
|
|
2473
|
+
}
|
|
2356
2474
|
#renderCopilotQuota(lines) {
|
|
2357
2475
|
const usage = this.#copilotQuota;
|
|
2358
2476
|
if (!usage) {
|
|
@@ -2454,23 +2572,25 @@ var MetricsRegistry = class {
|
|
|
2454
2572
|
}
|
|
2455
2573
|
}
|
|
2456
2574
|
};
|
|
2457
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal) {
|
|
2575
|
+
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
2458
2576
|
const body = response.body;
|
|
2459
2577
|
if (!body) {
|
|
2460
2578
|
return response;
|
|
2461
2579
|
}
|
|
2462
2580
|
const [clientBranch, observerBranch] = body.tee();
|
|
2463
2581
|
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
2464
|
-
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(
|
|
2465
|
-
|
|
2582
|
+
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
|
|
2583
|
+
() => {
|
|
2584
|
+
}
|
|
2585
|
+
);
|
|
2466
2586
|
return new Response(clientBranch, {
|
|
2467
2587
|
headers: response.headers,
|
|
2468
2588
|
status: response.status,
|
|
2469
2589
|
statusText: response.statusText
|
|
2470
2590
|
});
|
|
2471
2591
|
}
|
|
2472
|
-
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
2473
|
-
const accumulator = createUsageAccumulator(fallbackModel, onUsage);
|
|
2592
|
+
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
2593
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
2474
2594
|
if (isSse) {
|
|
2475
2595
|
for (const line of text.split(/\r?\n/)) {
|
|
2476
2596
|
considerSseLine(line, accumulator.consider);
|
|
@@ -2483,7 +2603,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
|
2483
2603
|
}
|
|
2484
2604
|
accumulator.finish();
|
|
2485
2605
|
}
|
|
2486
|
-
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
2606
|
+
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
2487
2607
|
const reader = stream.getReader();
|
|
2488
2608
|
const onAbort = () => {
|
|
2489
2609
|
reader.cancel().catch(() => {
|
|
@@ -2496,7 +2616,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2496
2616
|
signal?.addEventListener("abort", onAbort, { once: true });
|
|
2497
2617
|
}
|
|
2498
2618
|
const decoder = new TextDecoder();
|
|
2499
|
-
const
|
|
2619
|
+
const guardedOutcome = onOutcome ? (extracted) => {
|
|
2620
|
+
if (!signal?.aborted) {
|
|
2621
|
+
onOutcome(extracted);
|
|
2622
|
+
}
|
|
2623
|
+
} : void 0;
|
|
2624
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
|
|
2500
2625
|
let buffer = "";
|
|
2501
2626
|
let bufferedBytes = 0;
|
|
2502
2627
|
let overflowed = false;
|
|
@@ -2544,7 +2669,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2544
2669
|
}
|
|
2545
2670
|
accumulator.finish();
|
|
2546
2671
|
}
|
|
2547
|
-
function createUsageAccumulator(fallbackModel, onUsage) {
|
|
2672
|
+
function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
|
|
2548
2673
|
let model = fallbackModel;
|
|
2549
2674
|
let usage;
|
|
2550
2675
|
return {
|
|
@@ -2563,6 +2688,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
|
|
|
2563
2688
|
if (usage) {
|
|
2564
2689
|
onUsage(model, usage);
|
|
2565
2690
|
}
|
|
2691
|
+
onOutcome?.(usage !== void 0);
|
|
2566
2692
|
}
|
|
2567
2693
|
};
|
|
2568
2694
|
}
|
|
@@ -2593,6 +2719,37 @@ function modelText(value) {
|
|
|
2593
2719
|
function nonNegative(value) {
|
|
2594
2720
|
return Number.isFinite(value) && value > 0 ? value : 0;
|
|
2595
2721
|
}
|
|
2722
|
+
function cleanLabel(value) {
|
|
2723
|
+
let result = "";
|
|
2724
|
+
for (const char of value) {
|
|
2725
|
+
const code = char.charCodeAt(0);
|
|
2726
|
+
if (code > 31 && code !== 127) {
|
|
2727
|
+
result += char;
|
|
2728
|
+
}
|
|
2729
|
+
}
|
|
2730
|
+
return result.trim();
|
|
2731
|
+
}
|
|
2732
|
+
function toRateLimitSnapshot(rateLimit) {
|
|
2733
|
+
const snapshot = {
|
|
2734
|
+
observedAt: new Date(rateLimit.observedAtMs).toISOString()
|
|
2735
|
+
};
|
|
2736
|
+
if (rateLimit.limit !== void 0) {
|
|
2737
|
+
snapshot.limit = rateLimit.limit;
|
|
2738
|
+
}
|
|
2739
|
+
if (rateLimit.remaining !== void 0) {
|
|
2740
|
+
snapshot.remaining = rateLimit.remaining;
|
|
2741
|
+
}
|
|
2742
|
+
if (rateLimit.used !== void 0) {
|
|
2743
|
+
snapshot.used = rateLimit.used;
|
|
2744
|
+
}
|
|
2745
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
2746
|
+
snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
|
|
2747
|
+
}
|
|
2748
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
2749
|
+
snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
|
|
2750
|
+
}
|
|
2751
|
+
return snapshot;
|
|
2752
|
+
}
|
|
2596
2753
|
function labelKey(...parts) {
|
|
2597
2754
|
return parts.join(LABEL_SEPARATOR);
|
|
2598
2755
|
}
|
|
@@ -2640,6 +2797,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2640
2797
|
const metrics = options.metrics ?? new MetricsRegistry();
|
|
2641
2798
|
const readUsage = createUsageReader(client, metrics);
|
|
2642
2799
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
2800
|
+
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
2643
2801
|
const streamingProxyMode = resolveStreamingProxyMode(options);
|
|
2644
2802
|
const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
|
|
2645
2803
|
return async (request) => {
|
|
@@ -2705,6 +2863,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2705
2863
|
client,
|
|
2706
2864
|
metrics,
|
|
2707
2865
|
recordTokens,
|
|
2866
|
+
recordExtraction,
|
|
2708
2867
|
request,
|
|
2709
2868
|
requestLogger,
|
|
2710
2869
|
bufferProxyBodies
|
|
@@ -2720,6 +2879,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2720
2879
|
client,
|
|
2721
2880
|
metrics,
|
|
2722
2881
|
recordTokens,
|
|
2882
|
+
recordExtraction,
|
|
2723
2883
|
request,
|
|
2724
2884
|
requestLogger,
|
|
2725
2885
|
bufferProxyBodies
|
|
@@ -2732,6 +2892,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2732
2892
|
client,
|
|
2733
2893
|
metrics,
|
|
2734
2894
|
recordTokens,
|
|
2895
|
+
recordExtraction,
|
|
2735
2896
|
request,
|
|
2736
2897
|
requestLogger,
|
|
2737
2898
|
bufferProxyBodies
|
|
@@ -2740,7 +2901,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
2740
2901
|
}
|
|
2741
2902
|
if (request.method === "POST" && apiPath === "/v1/responses/compact") {
|
|
2742
2903
|
return finish(
|
|
2743
|
-
await handleResponsesCompact(
|
|
2904
|
+
await handleResponsesCompact(
|
|
2905
|
+
client,
|
|
2906
|
+
metrics,
|
|
2907
|
+
recordTokens,
|
|
2908
|
+
recordExtraction,
|
|
2909
|
+
request,
|
|
2910
|
+
requestLogger
|
|
2911
|
+
)
|
|
2744
2912
|
);
|
|
2745
2913
|
}
|
|
2746
2914
|
if (request.method === "POST" && apiPath === "/v1/responses") {
|
|
@@ -2749,6 +2917,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2749
2917
|
client,
|
|
2750
2918
|
metrics,
|
|
2751
2919
|
recordTokens,
|
|
2920
|
+
recordExtraction,
|
|
2752
2921
|
request,
|
|
2753
2922
|
requestLogger,
|
|
2754
2923
|
bufferProxyBodies
|
|
@@ -2825,7 +2994,7 @@ function startHoopilotServer(options = {}) {
|
|
|
2825
2994
|
url: `http://${urlHost(host)}:${server.port}`
|
|
2826
2995
|
};
|
|
2827
2996
|
}
|
|
2828
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2997
|
+
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2829
2998
|
const anthropicRequest = await readJson(request);
|
|
2830
2999
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
2831
3000
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -2838,12 +3007,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2838
3007
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2839
3008
|
if (bufferProxyBodies) {
|
|
2840
3009
|
const text = await upstream.text();
|
|
2841
|
-
recordResponseTextUsage(text, true, model, recordTokens);
|
|
3010
|
+
recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
|
|
2842
3011
|
return proxyResponse(
|
|
2843
3012
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
2844
3013
|
);
|
|
2845
3014
|
}
|
|
2846
|
-
const observed = observeResponseUsage(
|
|
3015
|
+
const observed = observeResponseUsage(
|
|
3016
|
+
upstream,
|
|
3017
|
+
model,
|
|
3018
|
+
recordTokens,
|
|
3019
|
+
request.signal,
|
|
3020
|
+
recordExtraction
|
|
3021
|
+
);
|
|
2847
3022
|
if (!observed.body) {
|
|
2848
3023
|
return proxyResponse(observed);
|
|
2849
3024
|
}
|
|
@@ -2861,6 +3036,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2861
3036
|
const responseModel = typeof body.model === "string" ? body.model.trim() : "";
|
|
2862
3037
|
recordTokens(responseModel || model, usage);
|
|
2863
3038
|
}
|
|
3039
|
+
recordExtraction(usage !== void 0);
|
|
2864
3040
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
2865
3041
|
}
|
|
2866
3042
|
function handleAnthropicCountTokens(body) {
|
|
@@ -2886,7 +3062,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
2886
3062
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
2887
3063
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
2888
3064
|
}
|
|
2889
|
-
async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3065
|
+
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2890
3066
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
2891
3067
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
2892
3068
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -2901,11 +3077,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
|
|
|
2901
3077
|
model,
|
|
2902
3078
|
recordTokens,
|
|
2903
3079
|
request.signal,
|
|
2904
|
-
bufferProxyBodies
|
|
3080
|
+
bufferProxyBodies,
|
|
3081
|
+
recordExtraction
|
|
2905
3082
|
)
|
|
2906
3083
|
);
|
|
2907
3084
|
}
|
|
2908
|
-
async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3085
|
+
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2909
3086
|
const body = await readJson(request);
|
|
2910
3087
|
const upstream = await client.chatCompletions(
|
|
2911
3088
|
completionsRequestToChatCompletion(body),
|
|
@@ -2920,7 +3097,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2920
3097
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2921
3098
|
if (bufferProxyBodies) {
|
|
2922
3099
|
const upstreamText = await upstream.text();
|
|
2923
|
-
recordResponseTextUsage(upstreamText, true, model, recordTokens);
|
|
3100
|
+
recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
|
|
2924
3101
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
2925
3102
|
return proxyResponse(responseFromText(upstream, text));
|
|
2926
3103
|
}
|
|
@@ -2933,7 +3110,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2933
3110
|
}),
|
|
2934
3111
|
model,
|
|
2935
3112
|
recordTokens,
|
|
2936
|
-
request.signal
|
|
3113
|
+
request.signal,
|
|
3114
|
+
recordExtraction
|
|
2937
3115
|
)
|
|
2938
3116
|
);
|
|
2939
3117
|
}
|
|
@@ -2943,9 +3121,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2943
3121
|
const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
|
|
2944
3122
|
recordTokens(responseModel || model, usage);
|
|
2945
3123
|
}
|
|
3124
|
+
recordExtraction(usage !== void 0);
|
|
2946
3125
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
2947
3126
|
}
|
|
2948
|
-
async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3127
|
+
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2949
3128
|
const body = await readJsonText(request);
|
|
2950
3129
|
const upstream = await client.responses(body, request.signal);
|
|
2951
3130
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -2960,11 +3139,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
|
|
|
2960
3139
|
model,
|
|
2961
3140
|
recordTokens,
|
|
2962
3141
|
request.signal,
|
|
2963
|
-
bufferProxyBodies
|
|
3142
|
+
bufferProxyBodies,
|
|
3143
|
+
recordExtraction
|
|
2964
3144
|
)
|
|
2965
3145
|
);
|
|
2966
3146
|
}
|
|
2967
|
-
async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
|
|
3147
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
2968
3148
|
const body = await readJson(request);
|
|
2969
3149
|
const upstream = await client.responses(
|
|
2970
3150
|
JSON.stringify({ ...body, stream: false }),
|
|
@@ -2977,17 +3157,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
|
|
|
2977
3157
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
2978
3158
|
const isSse = isStreamingResponse(upstream);
|
|
2979
3159
|
const text = await upstream.text();
|
|
2980
|
-
recordResponseTextUsage(
|
|
3160
|
+
recordResponseTextUsage(
|
|
3161
|
+
text,
|
|
3162
|
+
isSse,
|
|
3163
|
+
normalizeRequestedModel(body.model),
|
|
3164
|
+
recordTokens,
|
|
3165
|
+
recordExtraction
|
|
3166
|
+
);
|
|
2981
3167
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
2982
3168
|
}
|
|
2983
|
-
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
|
|
3169
|
+
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
|
|
2984
3170
|
const isSse = isStreamingResponse(response);
|
|
2985
3171
|
if (bufferBody && response.body) {
|
|
2986
3172
|
const text = await response.text();
|
|
2987
|
-
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
|
|
3173
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
2988
3174
|
return responseFromText(response, text);
|
|
2989
3175
|
}
|
|
2990
|
-
return observeResponseUsage(response, fallbackModel, recordTokens, signal);
|
|
3176
|
+
return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
|
|
2991
3177
|
}
|
|
2992
3178
|
function responseFromText(source, text) {
|
|
2993
3179
|
return new Response(text, {
|
|
@@ -3416,6 +3602,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
3416
3602
|
try {
|
|
3417
3603
|
const upstream = await client.usage(signal);
|
|
3418
3604
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
3605
|
+
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
3419
3606
|
if (!upstream.ok) {
|
|
3420
3607
|
return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
|
|
3421
3608
|
}
|
|
@@ -3473,6 +3660,7 @@ export {
|
|
|
3473
3660
|
observeResponseUsage,
|
|
3474
3661
|
parseLogFormat,
|
|
3475
3662
|
parseLogLevel,
|
|
3663
|
+
parseRateLimitHeaders,
|
|
3476
3664
|
readStoredCopilotAuth,
|
|
3477
3665
|
responsesCompactionResult,
|
|
3478
3666
|
responsesRequestToChatCompletion,
|