@openhoo/hoopilot 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +86 -25
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +86 -25
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +12 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.js +86 -25
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -273,7 +273,7 @@ Hoopilot tracks token usage, request counts, and latency in memory while the ser
|
|
|
273
273
|
- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why. The snapshot's `proxy.githubRateLimit` field reports the most recent GitHub REST rate-limit budget per resource (`limit`, `remaining`, `used`, `resetAt`, `retryAfterSeconds`, `observedAt`).
|
|
274
274
|
- `hoopilot usage` prints your Copilot plan and quota — and, when GitHub returns them, your GitHub API rate-limit budget — from the command line.
|
|
275
275
|
|
|
276
|
-
Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted.
|
|
276
|
+
Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted. The `hoopilot_token_extraction_total{outcome="extracted"|"missing"}` counter (mirrored in `/v1/usage` as `proxy.tokens.extraction`) tracks how often a completion reported usage versus not, so a rising `missing` count flags clients whose token usage is going unaccounted.
|
|
277
277
|
|
|
278
278
|
GitHub API usage is read from the `x-ratelimit-*` response headers that `api.github.com` returns on the `copilot_internal/user` quota call Hoopilot already makes, so it costs no extra request. (The Copilot completion host `api.githubcopilot.com` does not currently emit these headers, so per-completion rate-limit data is not yet available there.)
|
|
279
279
|
|
package/dist/cli.js
CHANGED
|
@@ -1689,6 +1689,7 @@ var MetricsRegistry = class {
|
|
|
1689
1689
|
#upstream = /* @__PURE__ */ new Map();
|
|
1690
1690
|
#copilotQuota;
|
|
1691
1691
|
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
1692
|
+
#extraction = { extracted: 0, missing: 0 };
|
|
1692
1693
|
constructor(options = {}) {
|
|
1693
1694
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
1694
1695
|
}
|
|
@@ -1705,6 +1706,19 @@ var MetricsRegistry = class {
|
|
|
1705
1706
|
this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
|
|
1706
1707
|
this.#observeDuration(observation.route, observation.durationMs / 1e3);
|
|
1707
1708
|
}
|
|
1709
|
+
/**
|
|
1710
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
1711
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
1712
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
1713
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
1714
|
+
*/
|
|
1715
|
+
recordTokenExtraction(extracted) {
|
|
1716
|
+
if (extracted) {
|
|
1717
|
+
this.#extraction.extracted += 1;
|
|
1718
|
+
} else {
|
|
1719
|
+
this.#extraction.missing += 1;
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1708
1722
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
1709
1723
|
recordTokens(model, usage) {
|
|
1710
1724
|
const name = this.#modelLabel(model);
|
|
@@ -1812,7 +1826,7 @@ var MetricsRegistry = class {
|
|
|
1812
1826
|
inFlight: this.#inFlight,
|
|
1813
1827
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
1814
1828
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
1815
|
-
tokens: { byModel, ...tokenTotals },
|
|
1829
|
+
tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
|
|
1816
1830
|
upstream: { errors: upstreamErrors, total: upstreamTotal },
|
|
1817
1831
|
uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
|
|
1818
1832
|
};
|
|
@@ -1862,6 +1876,16 @@ var MetricsRegistry = class {
|
|
|
1862
1876
|
for (const [model, totals] of this.#tokens) {
|
|
1863
1877
|
lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
|
|
1864
1878
|
}
|
|
1879
|
+
lines.push(
|
|
1880
|
+
"# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
|
|
1881
|
+
);
|
|
1882
|
+
lines.push("# TYPE hoopilot_token_extraction_total counter");
|
|
1883
|
+
lines.push(
|
|
1884
|
+
`hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
|
|
1885
|
+
);
|
|
1886
|
+
lines.push(
|
|
1887
|
+
`hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
|
|
1888
|
+
);
|
|
1865
1889
|
lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
|
|
1866
1890
|
lines.push("# TYPE hoopilot_request_duration_seconds histogram");
|
|
1867
1891
|
for (const [route, entry] of this.#durations) {
|
|
@@ -2017,23 +2041,25 @@ var MetricsRegistry = class {
|
|
|
2017
2041
|
}
|
|
2018
2042
|
}
|
|
2019
2043
|
};
|
|
2020
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal) {
|
|
2044
|
+
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
2021
2045
|
const body = response.body;
|
|
2022
2046
|
if (!body) {
|
|
2023
2047
|
return response;
|
|
2024
2048
|
}
|
|
2025
2049
|
const [clientBranch, observerBranch] = body.tee();
|
|
2026
2050
|
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
2027
|
-
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(
|
|
2028
|
-
|
|
2051
|
+
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
|
|
2052
|
+
() => {
|
|
2053
|
+
}
|
|
2054
|
+
);
|
|
2029
2055
|
return new Response(clientBranch, {
|
|
2030
2056
|
headers: response.headers,
|
|
2031
2057
|
status: response.status,
|
|
2032
2058
|
statusText: response.statusText
|
|
2033
2059
|
});
|
|
2034
2060
|
}
|
|
2035
|
-
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
2036
|
-
const accumulator = createUsageAccumulator(fallbackModel, onUsage);
|
|
2061
|
+
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
2062
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
2037
2063
|
if (isSse) {
|
|
2038
2064
|
for (const line of text.split(/\r?\n/)) {
|
|
2039
2065
|
considerSseLine(line, accumulator.consider);
|
|
@@ -2046,7 +2072,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
|
2046
2072
|
}
|
|
2047
2073
|
accumulator.finish();
|
|
2048
2074
|
}
|
|
2049
|
-
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
2075
|
+
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
2050
2076
|
const reader = stream.getReader();
|
|
2051
2077
|
const onAbort = () => {
|
|
2052
2078
|
reader.cancel().catch(() => {
|
|
@@ -2059,7 +2085,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2059
2085
|
signal?.addEventListener("abort", onAbort, { once: true });
|
|
2060
2086
|
}
|
|
2061
2087
|
const decoder = new TextDecoder();
|
|
2062
|
-
const
|
|
2088
|
+
const guardedOutcome = onOutcome ? (extracted) => {
|
|
2089
|
+
if (!signal?.aborted) {
|
|
2090
|
+
onOutcome(extracted);
|
|
2091
|
+
}
|
|
2092
|
+
} : void 0;
|
|
2093
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
|
|
2063
2094
|
let buffer = "";
|
|
2064
2095
|
let bufferedBytes = 0;
|
|
2065
2096
|
let overflowed = false;
|
|
@@ -2107,7 +2138,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2107
2138
|
}
|
|
2108
2139
|
accumulator.finish();
|
|
2109
2140
|
}
|
|
2110
|
-
function createUsageAccumulator(fallbackModel, onUsage) {
|
|
2141
|
+
function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
|
|
2111
2142
|
let model = fallbackModel;
|
|
2112
2143
|
let usage;
|
|
2113
2144
|
return {
|
|
@@ -2126,6 +2157,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
|
|
|
2126
2157
|
if (usage) {
|
|
2127
2158
|
onUsage(model, usage);
|
|
2128
2159
|
}
|
|
2160
|
+
onOutcome?.(usage !== void 0);
|
|
2129
2161
|
}
|
|
2130
2162
|
};
|
|
2131
2163
|
}
|
|
@@ -2254,6 +2286,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2254
2286
|
const metrics = options.metrics ?? new MetricsRegistry();
|
|
2255
2287
|
const readUsage = createUsageReader(client, metrics);
|
|
2256
2288
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
2289
|
+
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
2257
2290
|
const streamingProxyMode = resolveStreamingProxyMode(options);
|
|
2258
2291
|
const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
|
|
2259
2292
|
return async (request) => {
|
|
@@ -2319,6 +2352,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2319
2352
|
client,
|
|
2320
2353
|
metrics,
|
|
2321
2354
|
recordTokens,
|
|
2355
|
+
recordExtraction,
|
|
2322
2356
|
request,
|
|
2323
2357
|
requestLogger,
|
|
2324
2358
|
bufferProxyBodies
|
|
@@ -2334,6 +2368,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2334
2368
|
client,
|
|
2335
2369
|
metrics,
|
|
2336
2370
|
recordTokens,
|
|
2371
|
+
recordExtraction,
|
|
2337
2372
|
request,
|
|
2338
2373
|
requestLogger,
|
|
2339
2374
|
bufferProxyBodies
|
|
@@ -2346,6 +2381,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2346
2381
|
client,
|
|
2347
2382
|
metrics,
|
|
2348
2383
|
recordTokens,
|
|
2384
|
+
recordExtraction,
|
|
2349
2385
|
request,
|
|
2350
2386
|
requestLogger,
|
|
2351
2387
|
bufferProxyBodies
|
|
@@ -2354,7 +2390,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
2354
2390
|
}
|
|
2355
2391
|
if (request.method === "POST" && apiPath === "/v1/responses/compact") {
|
|
2356
2392
|
return finish(
|
|
2357
|
-
await handleResponsesCompact(
|
|
2393
|
+
await handleResponsesCompact(
|
|
2394
|
+
client,
|
|
2395
|
+
metrics,
|
|
2396
|
+
recordTokens,
|
|
2397
|
+
recordExtraction,
|
|
2398
|
+
request,
|
|
2399
|
+
requestLogger
|
|
2400
|
+
)
|
|
2358
2401
|
);
|
|
2359
2402
|
}
|
|
2360
2403
|
if (request.method === "POST" && apiPath === "/v1/responses") {
|
|
@@ -2363,6 +2406,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2363
2406
|
client,
|
|
2364
2407
|
metrics,
|
|
2365
2408
|
recordTokens,
|
|
2409
|
+
recordExtraction,
|
|
2366
2410
|
request,
|
|
2367
2411
|
requestLogger,
|
|
2368
2412
|
bufferProxyBodies
|
|
@@ -2439,7 +2483,7 @@ function startHoopilotServer(options = {}) {
|
|
|
2439
2483
|
url: `http://${urlHost(host)}:${server.port}`
|
|
2440
2484
|
};
|
|
2441
2485
|
}
|
|
2442
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2486
|
+
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2443
2487
|
const anthropicRequest = await readJson(request);
|
|
2444
2488
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
2445
2489
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -2452,12 +2496,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2452
2496
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2453
2497
|
if (bufferProxyBodies) {
|
|
2454
2498
|
const text = await upstream.text();
|
|
2455
|
-
recordResponseTextUsage(text, true, model, recordTokens);
|
|
2499
|
+
recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
|
|
2456
2500
|
return proxyResponse(
|
|
2457
2501
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
2458
2502
|
);
|
|
2459
2503
|
}
|
|
2460
|
-
const observed = observeResponseUsage(
|
|
2504
|
+
const observed = observeResponseUsage(
|
|
2505
|
+
upstream,
|
|
2506
|
+
model,
|
|
2507
|
+
recordTokens,
|
|
2508
|
+
request.signal,
|
|
2509
|
+
recordExtraction
|
|
2510
|
+
);
|
|
2461
2511
|
if (!observed.body) {
|
|
2462
2512
|
return proxyResponse(observed);
|
|
2463
2513
|
}
|
|
@@ -2475,6 +2525,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2475
2525
|
const responseModel = typeof body.model === "string" ? body.model.trim() : "";
|
|
2476
2526
|
recordTokens(responseModel || model, usage);
|
|
2477
2527
|
}
|
|
2528
|
+
recordExtraction(usage !== void 0);
|
|
2478
2529
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
2479
2530
|
}
|
|
2480
2531
|
function handleAnthropicCountTokens(body) {
|
|
@@ -2500,7 +2551,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
2500
2551
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
2501
2552
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
2502
2553
|
}
|
|
2503
|
-
async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2554
|
+
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2504
2555
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
2505
2556
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
2506
2557
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -2515,11 +2566,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
|
|
|
2515
2566
|
model,
|
|
2516
2567
|
recordTokens,
|
|
2517
2568
|
request.signal,
|
|
2518
|
-
bufferProxyBodies
|
|
2569
|
+
bufferProxyBodies,
|
|
2570
|
+
recordExtraction
|
|
2519
2571
|
)
|
|
2520
2572
|
);
|
|
2521
2573
|
}
|
|
2522
|
-
async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2574
|
+
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2523
2575
|
const body = await readJson(request);
|
|
2524
2576
|
const upstream = await client.chatCompletions(
|
|
2525
2577
|
completionsRequestToChatCompletion(body),
|
|
@@ -2534,7 +2586,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2534
2586
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2535
2587
|
if (bufferProxyBodies) {
|
|
2536
2588
|
const upstreamText = await upstream.text();
|
|
2537
|
-
recordResponseTextUsage(upstreamText, true, model, recordTokens);
|
|
2589
|
+
recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
|
|
2538
2590
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
2539
2591
|
return proxyResponse(responseFromText(upstream, text));
|
|
2540
2592
|
}
|
|
@@ -2547,7 +2599,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2547
2599
|
}),
|
|
2548
2600
|
model,
|
|
2549
2601
|
recordTokens,
|
|
2550
|
-
request.signal
|
|
2602
|
+
request.signal,
|
|
2603
|
+
recordExtraction
|
|
2551
2604
|
)
|
|
2552
2605
|
);
|
|
2553
2606
|
}
|
|
@@ -2557,9 +2610,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2557
2610
|
const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
|
|
2558
2611
|
recordTokens(responseModel || model, usage);
|
|
2559
2612
|
}
|
|
2613
|
+
recordExtraction(usage !== void 0);
|
|
2560
2614
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
2561
2615
|
}
|
|
2562
|
-
async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2616
|
+
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2563
2617
|
const body = await readJsonText(request);
|
|
2564
2618
|
const upstream = await client.responses(body, request.signal);
|
|
2565
2619
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -2574,11 +2628,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
|
|
|
2574
2628
|
model,
|
|
2575
2629
|
recordTokens,
|
|
2576
2630
|
request.signal,
|
|
2577
|
-
bufferProxyBodies
|
|
2631
|
+
bufferProxyBodies,
|
|
2632
|
+
recordExtraction
|
|
2578
2633
|
)
|
|
2579
2634
|
);
|
|
2580
2635
|
}
|
|
2581
|
-
async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
|
|
2636
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
2582
2637
|
const body = await readJson(request);
|
|
2583
2638
|
const upstream = await client.responses(
|
|
2584
2639
|
JSON.stringify({ ...body, stream: false }),
|
|
@@ -2591,17 +2646,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
|
|
|
2591
2646
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
2592
2647
|
const isSse = isStreamingResponse(upstream);
|
|
2593
2648
|
const text = await upstream.text();
|
|
2594
|
-
recordResponseTextUsage(
|
|
2649
|
+
recordResponseTextUsage(
|
|
2650
|
+
text,
|
|
2651
|
+
isSse,
|
|
2652
|
+
normalizeRequestedModel(body.model),
|
|
2653
|
+
recordTokens,
|
|
2654
|
+
recordExtraction
|
|
2655
|
+
);
|
|
2595
2656
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
2596
2657
|
}
|
|
2597
|
-
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
|
|
2658
|
+
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
|
|
2598
2659
|
const isSse = isStreamingResponse(response);
|
|
2599
2660
|
if (bufferBody && response.body) {
|
|
2600
2661
|
const text = await response.text();
|
|
2601
|
-
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
|
|
2662
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
2602
2663
|
return responseFromText(response, text);
|
|
2603
2664
|
}
|
|
2604
|
-
return observeResponseUsage(response, fallbackModel, recordTokens, signal);
|
|
2665
|
+
return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
|
|
2605
2666
|
}
|
|
2606
2667
|
function responseFromText(source, text) {
|
|
2607
2668
|
return new Response(text, {
|