@openhoo/hoopilot 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/dist/cli.js +240 -30
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +219 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +60 -2
- package/dist/index.d.ts +60 -2
- package/dist/index.js +218 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -63,6 +63,7 @@ __export(index_exports, {
|
|
|
63
63
|
observeResponseUsage: () => observeResponseUsage,
|
|
64
64
|
parseLogFormat: () => parseLogFormat,
|
|
65
65
|
parseLogLevel: () => parseLogLevel,
|
|
66
|
+
parseRateLimitHeaders: () => parseRateLimitHeaders,
|
|
66
67
|
readStoredCopilotAuth: () => readStoredCopilotAuth,
|
|
67
68
|
responsesCompactionResult: () => responsesCompactionResult,
|
|
68
69
|
responsesRequestToChatCompletion: () => responsesRequestToChatCompletion,
|
|
@@ -1833,6 +1834,38 @@ function applyGithubApiHeaders(headers, token) {
|
|
|
1833
1834
|
headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
|
|
1834
1835
|
return headers;
|
|
1835
1836
|
}
|
|
1837
|
+
function parseRateLimitHeaders(headers, nowMs = Date.now()) {
|
|
1838
|
+
const limit = headerInt(headers, "x-ratelimit-limit");
|
|
1839
|
+
const remaining = headerInt(headers, "x-ratelimit-remaining");
|
|
1840
|
+
const used = headerInt(headers, "x-ratelimit-used");
|
|
1841
|
+
const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
|
|
1842
|
+
const retryAfterSeconds = headerInt(headers, "retry-after");
|
|
1843
|
+
if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
|
|
1844
|
+
return void 0;
|
|
1845
|
+
}
|
|
1846
|
+
return removeUndefinedRateLimit({
|
|
1847
|
+
limit,
|
|
1848
|
+
observedAtMs: nowMs,
|
|
1849
|
+
remaining,
|
|
1850
|
+
resetEpochSeconds,
|
|
1851
|
+
resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
|
|
1852
|
+
retryAfterSeconds,
|
|
1853
|
+
used
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
function headerInt(headers, name) {
|
|
1857
|
+
const raw = headers.get(name);
|
|
1858
|
+
if (raw === null) {
|
|
1859
|
+
return void 0;
|
|
1860
|
+
}
|
|
1861
|
+
const value = Number.parseInt(raw.trim(), 10);
|
|
1862
|
+
return Number.isFinite(value) && value >= 0 ? value : void 0;
|
|
1863
|
+
}
|
|
1864
|
+
function removeUndefinedRateLimit(rateLimit) {
|
|
1865
|
+
return Object.fromEntries(
|
|
1866
|
+
Object.entries(rateLimit).filter(([, value]) => value !== void 0)
|
|
1867
|
+
);
|
|
1868
|
+
}
|
|
1836
1869
|
var CopilotClient = class {
|
|
1837
1870
|
#auth;
|
|
1838
1871
|
#allowUnsafeUpstream;
|
|
@@ -2249,6 +2282,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
|
2249
2282
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
2250
2283
|
var MAX_TRACKED_MODELS = 200;
|
|
2251
2284
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
2285
|
+
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
2252
2286
|
var LABEL_SEPARATOR = "";
|
|
2253
2287
|
var UNKNOWN_MODEL = "unknown";
|
|
2254
2288
|
function emptyModelTotals() {
|
|
@@ -2262,6 +2296,8 @@ var MetricsRegistry = class {
|
|
|
2262
2296
|
#tokens = /* @__PURE__ */ new Map();
|
|
2263
2297
|
#upstream = /* @__PURE__ */ new Map();
|
|
2264
2298
|
#copilotQuota;
|
|
2299
|
+
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
2300
|
+
#extraction = { extracted: 0, missing: 0 };
|
|
2265
2301
|
constructor(options = {}) {
|
|
2266
2302
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
2267
2303
|
}
|
|
@@ -2278,6 +2314,19 @@ var MetricsRegistry = class {
|
|
|
2278
2314
|
this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
|
|
2279
2315
|
this.#observeDuration(observation.route, observation.durationMs / 1e3);
|
|
2280
2316
|
}
|
|
2317
|
+
/**
|
|
2318
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
2319
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
2320
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
2321
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
2322
|
+
*/
|
|
2323
|
+
recordTokenExtraction(extracted) {
|
|
2324
|
+
if (extracted) {
|
|
2325
|
+
this.#extraction.extracted += 1;
|
|
2326
|
+
} else {
|
|
2327
|
+
this.#extraction.missing += 1;
|
|
2328
|
+
}
|
|
2329
|
+
}
|
|
2281
2330
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
2282
2331
|
recordTokens(model, usage) {
|
|
2283
2332
|
const name = this.#modelLabel(model);
|
|
@@ -2299,17 +2348,39 @@ var MetricsRegistry = class {
|
|
|
2299
2348
|
recordCopilotQuota(usage) {
|
|
2300
2349
|
this.#copilotQuota = usage;
|
|
2301
2350
|
}
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2351
|
+
/**
|
|
2352
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
2353
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
2354
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
2355
|
+
*/
|
|
2356
|
+
recordGithubRateLimit(rateLimit) {
|
|
2357
|
+
if (!rateLimit) {
|
|
2358
|
+
return;
|
|
2359
|
+
}
|
|
2360
|
+
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
2361
|
+
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
2362
|
+
}
|
|
2363
|
+
// Sanitize the model into a bounded label. The model can originate from a
|
|
2364
|
+
// client request, so cap its length, strip characters that would corrupt the
|
|
2365
|
+
// exposition format, and fold overflow past the cardinality limit into
|
|
2366
|
+
// UNKNOWN_MODEL to keep the series count bounded.
|
|
2306
2367
|
#modelLabel(model) {
|
|
2307
|
-
const cleaned = model
|
|
2368
|
+
const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
2308
2369
|
if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
|
|
2309
2370
|
return UNKNOWN_MODEL;
|
|
2310
2371
|
}
|
|
2311
2372
|
return cleaned;
|
|
2312
2373
|
}
|
|
2374
|
+
// The resource comes from a trusted upstream header, but clean and bound it
|
|
2375
|
+
// with the same discipline as model labels: strip control characters that
|
|
2376
|
+
// would corrupt the exposition format and fold overflow into "unknown".
|
|
2377
|
+
#rateLimitResource(resource) {
|
|
2378
|
+
const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
2379
|
+
if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
|
|
2380
|
+
return UNKNOWN_MODEL;
|
|
2381
|
+
}
|
|
2382
|
+
return cleaned;
|
|
2383
|
+
}
|
|
2313
2384
|
#observeDuration(route, seconds) {
|
|
2314
2385
|
const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
|
|
2315
2386
|
const entry = this.#durations.get(route) ?? {
|
|
@@ -2354,11 +2425,16 @@ var MetricsRegistry = class {
|
|
|
2354
2425
|
upstreamErrors += count;
|
|
2355
2426
|
}
|
|
2356
2427
|
}
|
|
2428
|
+
const githubRateLimit = {};
|
|
2429
|
+
for (const [resource, rateLimit] of this.#githubRateLimit) {
|
|
2430
|
+
githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
|
|
2431
|
+
}
|
|
2357
2432
|
return {
|
|
2433
|
+
githubRateLimit,
|
|
2358
2434
|
inFlight: this.#inFlight,
|
|
2359
2435
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
2360
2436
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
2361
|
-
tokens: { byModel, ...tokenTotals },
|
|
2437
|
+
tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
|
|
2362
2438
|
upstream: { errors: upstreamErrors, total: upstreamTotal },
|
|
2363
2439
|
uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
|
|
2364
2440
|
};
|
|
@@ -2408,6 +2484,16 @@ var MetricsRegistry = class {
|
|
|
2408
2484
|
for (const [model, totals] of this.#tokens) {
|
|
2409
2485
|
lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
|
|
2410
2486
|
}
|
|
2487
|
+
lines.push(
|
|
2488
|
+
"# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
|
|
2489
|
+
);
|
|
2490
|
+
lines.push("# TYPE hoopilot_token_extraction_total counter");
|
|
2491
|
+
lines.push(
|
|
2492
|
+
`hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
|
|
2493
|
+
);
|
|
2494
|
+
lines.push(
|
|
2495
|
+
`hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
|
|
2496
|
+
);
|
|
2411
2497
|
lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
|
|
2412
2498
|
lines.push("# TYPE hoopilot_request_duration_seconds histogram");
|
|
2413
2499
|
for (const [route, entry] of this.#durations) {
|
|
@@ -2425,10 +2511,43 @@ var MetricsRegistry = class {
|
|
|
2425
2511
|
lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
|
|
2426
2512
|
lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
|
|
2427
2513
|
}
|
|
2514
|
+
this.#renderGithubRateLimit(lines);
|
|
2428
2515
|
this.#renderCopilotQuota(lines);
|
|
2429
2516
|
return `${lines.join("\n")}
|
|
2430
2517
|
`;
|
|
2431
2518
|
}
|
|
2519
|
+
#renderGithubRateLimit(lines) {
|
|
2520
|
+
const entries = [...this.#githubRateLimit.values()];
|
|
2521
|
+
if (entries.length === 0) {
|
|
2522
|
+
return;
|
|
2523
|
+
}
|
|
2524
|
+
const gauge = (suffix, help, pick) => {
|
|
2525
|
+
const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
|
|
2526
|
+
if (present.length === 0) {
|
|
2527
|
+
return;
|
|
2528
|
+
}
|
|
2529
|
+
lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
|
|
2530
|
+
lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
|
|
2531
|
+
for (const rateLimit of present) {
|
|
2532
|
+
lines.push(
|
|
2533
|
+
`hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
|
|
2534
|
+
);
|
|
2535
|
+
}
|
|
2536
|
+
};
|
|
2537
|
+
gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
|
|
2538
|
+
gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
|
|
2539
|
+
gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
|
|
2540
|
+
gauge(
|
|
2541
|
+
"reset_timestamp_seconds",
|
|
2542
|
+
"Unix epoch when the GitHub REST API window resets.",
|
|
2543
|
+
(r) => r.resetEpochSeconds
|
|
2544
|
+
);
|
|
2545
|
+
gauge(
|
|
2546
|
+
"retry_after_seconds",
|
|
2547
|
+
"Seconds to wait after a GitHub secondary-limit response.",
|
|
2548
|
+
(r) => r.retryAfterSeconds
|
|
2549
|
+
);
|
|
2550
|
+
}
|
|
2432
2551
|
#renderCopilotQuota(lines) {
|
|
2433
2552
|
const usage = this.#copilotQuota;
|
|
2434
2553
|
if (!usage) {
|
|
@@ -2530,23 +2649,25 @@ var MetricsRegistry = class {
|
|
|
2530
2649
|
}
|
|
2531
2650
|
}
|
|
2532
2651
|
};
|
|
2533
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal) {
|
|
2652
|
+
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
2534
2653
|
const body = response.body;
|
|
2535
2654
|
if (!body) {
|
|
2536
2655
|
return response;
|
|
2537
2656
|
}
|
|
2538
2657
|
const [clientBranch, observerBranch] = body.tee();
|
|
2539
2658
|
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
2540
|
-
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(
|
|
2541
|
-
|
|
2659
|
+
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
|
|
2660
|
+
() => {
|
|
2661
|
+
}
|
|
2662
|
+
);
|
|
2542
2663
|
return new Response(clientBranch, {
|
|
2543
2664
|
headers: response.headers,
|
|
2544
2665
|
status: response.status,
|
|
2545
2666
|
statusText: response.statusText
|
|
2546
2667
|
});
|
|
2547
2668
|
}
|
|
2548
|
-
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
2549
|
-
const accumulator = createUsageAccumulator(fallbackModel, onUsage);
|
|
2669
|
+
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
2670
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
2550
2671
|
if (isSse) {
|
|
2551
2672
|
for (const line of text.split(/\r?\n/)) {
|
|
2552
2673
|
considerSseLine(line, accumulator.consider);
|
|
@@ -2559,7 +2680,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
|
2559
2680
|
}
|
|
2560
2681
|
accumulator.finish();
|
|
2561
2682
|
}
|
|
2562
|
-
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
2683
|
+
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
2563
2684
|
const reader = stream.getReader();
|
|
2564
2685
|
const onAbort = () => {
|
|
2565
2686
|
reader.cancel().catch(() => {
|
|
@@ -2572,7 +2693,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2572
2693
|
signal?.addEventListener("abort", onAbort, { once: true });
|
|
2573
2694
|
}
|
|
2574
2695
|
const decoder = new TextDecoder();
|
|
2575
|
-
const
|
|
2696
|
+
const guardedOutcome = onOutcome ? (extracted) => {
|
|
2697
|
+
if (!signal?.aborted) {
|
|
2698
|
+
onOutcome(extracted);
|
|
2699
|
+
}
|
|
2700
|
+
} : void 0;
|
|
2701
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
|
|
2576
2702
|
let buffer = "";
|
|
2577
2703
|
let bufferedBytes = 0;
|
|
2578
2704
|
let overflowed = false;
|
|
@@ -2620,7 +2746,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2620
2746
|
}
|
|
2621
2747
|
accumulator.finish();
|
|
2622
2748
|
}
|
|
2623
|
-
function createUsageAccumulator(fallbackModel, onUsage) {
|
|
2749
|
+
function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
|
|
2624
2750
|
let model = fallbackModel;
|
|
2625
2751
|
let usage;
|
|
2626
2752
|
return {
|
|
@@ -2639,6 +2765,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
|
|
|
2639
2765
|
if (usage) {
|
|
2640
2766
|
onUsage(model, usage);
|
|
2641
2767
|
}
|
|
2768
|
+
onOutcome?.(usage !== void 0);
|
|
2642
2769
|
}
|
|
2643
2770
|
};
|
|
2644
2771
|
}
|
|
@@ -2669,6 +2796,37 @@ function modelText(value) {
|
|
|
2669
2796
|
function nonNegative(value) {
|
|
2670
2797
|
return Number.isFinite(value) && value > 0 ? value : 0;
|
|
2671
2798
|
}
|
|
2799
|
+
function cleanLabel(value) {
|
|
2800
|
+
let result = "";
|
|
2801
|
+
for (const char of value) {
|
|
2802
|
+
const code = char.charCodeAt(0);
|
|
2803
|
+
if (code > 31 && code !== 127) {
|
|
2804
|
+
result += char;
|
|
2805
|
+
}
|
|
2806
|
+
}
|
|
2807
|
+
return result.trim();
|
|
2808
|
+
}
|
|
2809
|
+
function toRateLimitSnapshot(rateLimit) {
|
|
2810
|
+
const snapshot = {
|
|
2811
|
+
observedAt: new Date(rateLimit.observedAtMs).toISOString()
|
|
2812
|
+
};
|
|
2813
|
+
if (rateLimit.limit !== void 0) {
|
|
2814
|
+
snapshot.limit = rateLimit.limit;
|
|
2815
|
+
}
|
|
2816
|
+
if (rateLimit.remaining !== void 0) {
|
|
2817
|
+
snapshot.remaining = rateLimit.remaining;
|
|
2818
|
+
}
|
|
2819
|
+
if (rateLimit.used !== void 0) {
|
|
2820
|
+
snapshot.used = rateLimit.used;
|
|
2821
|
+
}
|
|
2822
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
2823
|
+
snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
|
|
2824
|
+
}
|
|
2825
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
2826
|
+
snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
|
|
2827
|
+
}
|
|
2828
|
+
return snapshot;
|
|
2829
|
+
}
|
|
2672
2830
|
function labelKey(...parts) {
|
|
2673
2831
|
return parts.join(LABEL_SEPARATOR);
|
|
2674
2832
|
}
|
|
@@ -2716,6 +2874,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2716
2874
|
const metrics = options.metrics ?? new MetricsRegistry();
|
|
2717
2875
|
const readUsage = createUsageReader(client, metrics);
|
|
2718
2876
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
2877
|
+
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
2719
2878
|
const streamingProxyMode = resolveStreamingProxyMode(options);
|
|
2720
2879
|
const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
|
|
2721
2880
|
return async (request) => {
|
|
@@ -2781,6 +2940,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2781
2940
|
client,
|
|
2782
2941
|
metrics,
|
|
2783
2942
|
recordTokens,
|
|
2943
|
+
recordExtraction,
|
|
2784
2944
|
request,
|
|
2785
2945
|
requestLogger,
|
|
2786
2946
|
bufferProxyBodies
|
|
@@ -2796,6 +2956,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2796
2956
|
client,
|
|
2797
2957
|
metrics,
|
|
2798
2958
|
recordTokens,
|
|
2959
|
+
recordExtraction,
|
|
2799
2960
|
request,
|
|
2800
2961
|
requestLogger,
|
|
2801
2962
|
bufferProxyBodies
|
|
@@ -2808,6 +2969,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2808
2969
|
client,
|
|
2809
2970
|
metrics,
|
|
2810
2971
|
recordTokens,
|
|
2972
|
+
recordExtraction,
|
|
2811
2973
|
request,
|
|
2812
2974
|
requestLogger,
|
|
2813
2975
|
bufferProxyBodies
|
|
@@ -2816,7 +2978,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
2816
2978
|
}
|
|
2817
2979
|
if (request.method === "POST" && apiPath === "/v1/responses/compact") {
|
|
2818
2980
|
return finish(
|
|
2819
|
-
await handleResponsesCompact(
|
|
2981
|
+
await handleResponsesCompact(
|
|
2982
|
+
client,
|
|
2983
|
+
metrics,
|
|
2984
|
+
recordTokens,
|
|
2985
|
+
recordExtraction,
|
|
2986
|
+
request,
|
|
2987
|
+
requestLogger
|
|
2988
|
+
)
|
|
2820
2989
|
);
|
|
2821
2990
|
}
|
|
2822
2991
|
if (request.method === "POST" && apiPath === "/v1/responses") {
|
|
@@ -2825,6 +2994,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2825
2994
|
client,
|
|
2826
2995
|
metrics,
|
|
2827
2996
|
recordTokens,
|
|
2997
|
+
recordExtraction,
|
|
2828
2998
|
request,
|
|
2829
2999
|
requestLogger,
|
|
2830
3000
|
bufferProxyBodies
|
|
@@ -2901,7 +3071,7 @@ function startHoopilotServer(options = {}) {
|
|
|
2901
3071
|
url: `http://${urlHost(host)}:${server.port}`
|
|
2902
3072
|
};
|
|
2903
3073
|
}
|
|
2904
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3074
|
+
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2905
3075
|
const anthropicRequest = await readJson(request);
|
|
2906
3076
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
2907
3077
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -2914,12 +3084,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2914
3084
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2915
3085
|
if (bufferProxyBodies) {
|
|
2916
3086
|
const text = await upstream.text();
|
|
2917
|
-
recordResponseTextUsage(text, true, model, recordTokens);
|
|
3087
|
+
recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
|
|
2918
3088
|
return proxyResponse(
|
|
2919
3089
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
2920
3090
|
);
|
|
2921
3091
|
}
|
|
2922
|
-
const observed = observeResponseUsage(
|
|
3092
|
+
const observed = observeResponseUsage(
|
|
3093
|
+
upstream,
|
|
3094
|
+
model,
|
|
3095
|
+
recordTokens,
|
|
3096
|
+
request.signal,
|
|
3097
|
+
recordExtraction
|
|
3098
|
+
);
|
|
2923
3099
|
if (!observed.body) {
|
|
2924
3100
|
return proxyResponse(observed);
|
|
2925
3101
|
}
|
|
@@ -2937,6 +3113,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2937
3113
|
const responseModel = typeof body.model === "string" ? body.model.trim() : "";
|
|
2938
3114
|
recordTokens(responseModel || model, usage);
|
|
2939
3115
|
}
|
|
3116
|
+
recordExtraction(usage !== void 0);
|
|
2940
3117
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
2941
3118
|
}
|
|
2942
3119
|
function handleAnthropicCountTokens(body) {
|
|
@@ -2962,7 +3139,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
2962
3139
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
2963
3140
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
2964
3141
|
}
|
|
2965
|
-
async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3142
|
+
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2966
3143
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
2967
3144
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
2968
3145
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -2977,11 +3154,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
|
|
|
2977
3154
|
model,
|
|
2978
3155
|
recordTokens,
|
|
2979
3156
|
request.signal,
|
|
2980
|
-
bufferProxyBodies
|
|
3157
|
+
bufferProxyBodies,
|
|
3158
|
+
recordExtraction
|
|
2981
3159
|
)
|
|
2982
3160
|
);
|
|
2983
3161
|
}
|
|
2984
|
-
async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3162
|
+
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2985
3163
|
const body = await readJson(request);
|
|
2986
3164
|
const upstream = await client.chatCompletions(
|
|
2987
3165
|
completionsRequestToChatCompletion(body),
|
|
@@ -2996,7 +3174,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2996
3174
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2997
3175
|
if (bufferProxyBodies) {
|
|
2998
3176
|
const upstreamText = await upstream.text();
|
|
2999
|
-
recordResponseTextUsage(upstreamText, true, model, recordTokens);
|
|
3177
|
+
recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
|
|
3000
3178
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
3001
3179
|
return proxyResponse(responseFromText(upstream, text));
|
|
3002
3180
|
}
|
|
@@ -3009,7 +3187,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3009
3187
|
}),
|
|
3010
3188
|
model,
|
|
3011
3189
|
recordTokens,
|
|
3012
|
-
request.signal
|
|
3190
|
+
request.signal,
|
|
3191
|
+
recordExtraction
|
|
3013
3192
|
)
|
|
3014
3193
|
);
|
|
3015
3194
|
}
|
|
@@ -3019,9 +3198,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
3019
3198
|
const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
|
|
3020
3199
|
recordTokens(responseModel || model, usage);
|
|
3021
3200
|
}
|
|
3201
|
+
recordExtraction(usage !== void 0);
|
|
3022
3202
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
3023
3203
|
}
|
|
3024
|
-
async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
3204
|
+
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
3025
3205
|
const body = await readJsonText(request);
|
|
3026
3206
|
const upstream = await client.responses(body, request.signal);
|
|
3027
3207
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -3036,11 +3216,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
|
|
|
3036
3216
|
model,
|
|
3037
3217
|
recordTokens,
|
|
3038
3218
|
request.signal,
|
|
3039
|
-
bufferProxyBodies
|
|
3219
|
+
bufferProxyBodies,
|
|
3220
|
+
recordExtraction
|
|
3040
3221
|
)
|
|
3041
3222
|
);
|
|
3042
3223
|
}
|
|
3043
|
-
async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
|
|
3224
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
3044
3225
|
const body = await readJson(request);
|
|
3045
3226
|
const upstream = await client.responses(
|
|
3046
3227
|
JSON.stringify({ ...body, stream: false }),
|
|
@@ -3053,17 +3234,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
|
|
|
3053
3234
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
3054
3235
|
const isSse = isStreamingResponse(upstream);
|
|
3055
3236
|
const text = await upstream.text();
|
|
3056
|
-
recordResponseTextUsage(
|
|
3237
|
+
recordResponseTextUsage(
|
|
3238
|
+
text,
|
|
3239
|
+
isSse,
|
|
3240
|
+
normalizeRequestedModel(body.model),
|
|
3241
|
+
recordTokens,
|
|
3242
|
+
recordExtraction
|
|
3243
|
+
);
|
|
3057
3244
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
3058
3245
|
}
|
|
3059
|
-
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
|
|
3246
|
+
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
|
|
3060
3247
|
const isSse = isStreamingResponse(response);
|
|
3061
3248
|
if (bufferBody && response.body) {
|
|
3062
3249
|
const text = await response.text();
|
|
3063
|
-
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
|
|
3250
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
3064
3251
|
return responseFromText(response, text);
|
|
3065
3252
|
}
|
|
3066
|
-
return observeResponseUsage(response, fallbackModel, recordTokens, signal);
|
|
3253
|
+
return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
|
|
3067
3254
|
}
|
|
3068
3255
|
function responseFromText(source, text) {
|
|
3069
3256
|
return new Response(text, {
|
|
@@ -3492,6 +3679,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
3492
3679
|
try {
|
|
3493
3680
|
const upstream = await client.usage(signal);
|
|
3494
3681
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
3682
|
+
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
3495
3683
|
if (!upstream.ok) {
|
|
3496
3684
|
return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
|
|
3497
3685
|
}
|
|
@@ -3550,6 +3738,7 @@ function safeParseJson(text) {
|
|
|
3550
3738
|
observeResponseUsage,
|
|
3551
3739
|
parseLogFormat,
|
|
3552
3740
|
parseLogLevel,
|
|
3741
|
+
parseRateLimitHeaders,
|
|
3553
3742
|
readStoredCopilotAuth,
|
|
3554
3743
|
responsesCompactionResult,
|
|
3555
3744
|
responsesRequestToChatCompletion,
|