@openhoo/hoopilot 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/dist/cli.js +240 -30
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +219 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +60 -2
- package/dist/index.d.ts +60 -2
- package/dist/index.js +218 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -267,13 +267,15 @@ Incoming `x-request-id` headers are preserved on responses. If a request has no
|
|
|
267
267
|
|
|
268
268
|
## Metrics and usage
|
|
269
269
|
|
|
270
|
-
Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage.
|
|
270
|
+
Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage, plus your GitHub REST API rate-limit budget.
|
|
271
271
|
|
|
272
|
-
- `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge,
|
|
273
|
-
- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why.
|
|
274
|
-
- `hoopilot usage` prints your Copilot plan and quota from the command line.
|
|
272
|
+
- `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge, Copilot quota gauges, and GitHub REST API rate-limit gauges (`hoopilot_github_ratelimit_limit`, `_remaining`, `_used`, `_reset_timestamp_seconds`, `_retry_after_seconds`, labelled by `resource`) — the quota and rate-limit series appear after `/v1/usage` has been fetched at least once. Counters reset to zero on restart, which Prometheus handles natively.
|
|
273
|
+
- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why. The snapshot's `proxy.githubRateLimit` field reports the most recent GitHub REST rate-limit budget per resource (`limit`, `remaining`, `used`, `resetAt`, `retryAfterSeconds`, `observedAt`).
|
|
274
|
+
- `hoopilot usage` prints your Copilot plan and quota — and, when GitHub returns them, your GitHub API rate-limit budget — from the command line.
|
|
275
275
|
|
|
276
|
-
Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted.
|
|
276
|
+
Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted. The `hoopilot_token_extraction_total{outcome="extracted"|"missing"}` counter (mirrored in `/v1/usage` as `proxy.tokens.extraction`) tracks how often a completion reported usage versus not, so a rising `missing` count flags clients whose token usage is going unaccounted.
|
|
277
|
+
|
|
278
|
+
GitHub API usage is read from the `x-ratelimit-*` response headers that `api.github.com` returns on the `copilot_internal/user` quota call Hoopilot already makes, so it costs no extra request. (The Copilot completion host `api.githubcopilot.com` does not currently emit these headers, so per-completion rate-limit data is not yet available there.)
|
|
277
279
|
|
|
278
280
|
`/metrics` and `/v1/usage` are subject to the same `HOOPILOT_API_KEY` gate as the other routes.
|
|
279
281
|
|
package/dist/cli.js
CHANGED
|
@@ -179,6 +179,38 @@ function applyGithubApiHeaders(headers, token) {
|
|
|
179
179
|
headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
|
|
180
180
|
return headers;
|
|
181
181
|
}
|
|
182
|
+
function parseRateLimitHeaders(headers, nowMs = Date.now()) {
|
|
183
|
+
const limit = headerInt(headers, "x-ratelimit-limit");
|
|
184
|
+
const remaining = headerInt(headers, "x-ratelimit-remaining");
|
|
185
|
+
const used = headerInt(headers, "x-ratelimit-used");
|
|
186
|
+
const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
|
|
187
|
+
const retryAfterSeconds = headerInt(headers, "retry-after");
|
|
188
|
+
if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
|
|
189
|
+
return void 0;
|
|
190
|
+
}
|
|
191
|
+
return removeUndefinedRateLimit({
|
|
192
|
+
limit,
|
|
193
|
+
observedAtMs: nowMs,
|
|
194
|
+
remaining,
|
|
195
|
+
resetEpochSeconds,
|
|
196
|
+
resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
|
|
197
|
+
retryAfterSeconds,
|
|
198
|
+
used
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
function headerInt(headers, name) {
|
|
202
|
+
const raw = headers.get(name);
|
|
203
|
+
if (raw === null) {
|
|
204
|
+
return void 0;
|
|
205
|
+
}
|
|
206
|
+
const value = Number.parseInt(raw.trim(), 10);
|
|
207
|
+
return Number.isFinite(value) && value >= 0 ? value : void 0;
|
|
208
|
+
}
|
|
209
|
+
function removeUndefinedRateLimit(rateLimit) {
|
|
210
|
+
return Object.fromEntries(
|
|
211
|
+
Object.entries(rateLimit).filter(([, value]) => value !== void 0)
|
|
212
|
+
);
|
|
213
|
+
}
|
|
182
214
|
var CopilotClient = class {
|
|
183
215
|
#auth;
|
|
184
216
|
#allowUnsafeUpstream;
|
|
@@ -1642,6 +1674,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
|
1642
1674
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
1643
1675
|
var MAX_TRACKED_MODELS = 200;
|
|
1644
1676
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
1677
|
+
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
1645
1678
|
var LABEL_SEPARATOR = "";
|
|
1646
1679
|
var UNKNOWN_MODEL = "unknown";
|
|
1647
1680
|
function emptyModelTotals() {
|
|
@@ -1655,6 +1688,8 @@ var MetricsRegistry = class {
|
|
|
1655
1688
|
#tokens = /* @__PURE__ */ new Map();
|
|
1656
1689
|
#upstream = /* @__PURE__ */ new Map();
|
|
1657
1690
|
#copilotQuota;
|
|
1691
|
+
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
1692
|
+
#extraction = { extracted: 0, missing: 0 };
|
|
1658
1693
|
constructor(options = {}) {
|
|
1659
1694
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
1660
1695
|
}
|
|
@@ -1671,6 +1706,19 @@ var MetricsRegistry = class {
|
|
|
1671
1706
|
this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
|
|
1672
1707
|
this.#observeDuration(observation.route, observation.durationMs / 1e3);
|
|
1673
1708
|
}
|
|
1709
|
+
/**
|
|
1710
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
1711
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
1712
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
1713
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
1714
|
+
*/
|
|
1715
|
+
recordTokenExtraction(extracted) {
|
|
1716
|
+
if (extracted) {
|
|
1717
|
+
this.#extraction.extracted += 1;
|
|
1718
|
+
} else {
|
|
1719
|
+
this.#extraction.missing += 1;
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1674
1722
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
1675
1723
|
recordTokens(model, usage) {
|
|
1676
1724
|
const name = this.#modelLabel(model);
|
|
@@ -1692,17 +1740,39 @@ var MetricsRegistry = class {
|
|
|
1692
1740
|
recordCopilotQuota(usage) {
|
|
1693
1741
|
this.#copilotQuota = usage;
|
|
1694
1742
|
}
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1743
|
+
/**
|
|
1744
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
1745
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
1746
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
1747
|
+
*/
|
|
1748
|
+
recordGithubRateLimit(rateLimit) {
|
|
1749
|
+
if (!rateLimit) {
|
|
1750
|
+
return;
|
|
1751
|
+
}
|
|
1752
|
+
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
1753
|
+
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
1754
|
+
}
|
|
1755
|
+
// Sanitize the model into a bounded label. The model can originate from a
|
|
1756
|
+
// client request, so cap its length, strip characters that would corrupt the
|
|
1757
|
+
// exposition format, and fold overflow past the cardinality limit into
|
|
1758
|
+
// UNKNOWN_MODEL to keep the series count bounded.
|
|
1699
1759
|
#modelLabel(model) {
|
|
1700
|
-
const cleaned = model
|
|
1760
|
+
const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
1701
1761
|
if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
|
|
1702
1762
|
return UNKNOWN_MODEL;
|
|
1703
1763
|
}
|
|
1704
1764
|
return cleaned;
|
|
1705
1765
|
}
|
|
1766
|
+
// The resource comes from a trusted upstream header, but clean and bound it
|
|
1767
|
+
// with the same discipline as model labels: strip control characters that
|
|
1768
|
+
// would corrupt the exposition format and fold overflow into "unknown".
|
|
1769
|
+
#rateLimitResource(resource) {
|
|
1770
|
+
const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
1771
|
+
if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
|
|
1772
|
+
return UNKNOWN_MODEL;
|
|
1773
|
+
}
|
|
1774
|
+
return cleaned;
|
|
1775
|
+
}
|
|
1706
1776
|
#observeDuration(route, seconds) {
|
|
1707
1777
|
const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
|
|
1708
1778
|
const entry = this.#durations.get(route) ?? {
|
|
@@ -1747,11 +1817,16 @@ var MetricsRegistry = class {
|
|
|
1747
1817
|
upstreamErrors += count;
|
|
1748
1818
|
}
|
|
1749
1819
|
}
|
|
1820
|
+
const githubRateLimit = {};
|
|
1821
|
+
for (const [resource, rateLimit] of this.#githubRateLimit) {
|
|
1822
|
+
githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
|
|
1823
|
+
}
|
|
1750
1824
|
return {
|
|
1825
|
+
githubRateLimit,
|
|
1751
1826
|
inFlight: this.#inFlight,
|
|
1752
1827
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
1753
1828
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
1754
|
-
tokens: { byModel, ...tokenTotals },
|
|
1829
|
+
tokens: { byModel, extraction: { ...this.#extraction }, ...tokenTotals },
|
|
1755
1830
|
upstream: { errors: upstreamErrors, total: upstreamTotal },
|
|
1756
1831
|
uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
|
|
1757
1832
|
};
|
|
@@ -1801,6 +1876,16 @@ var MetricsRegistry = class {
|
|
|
1801
1876
|
for (const [model, totals] of this.#tokens) {
|
|
1802
1877
|
lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
|
|
1803
1878
|
}
|
|
1879
|
+
lines.push(
|
|
1880
|
+
"# HELP hoopilot_token_extraction_total Completions by whether upstream reported token usage."
|
|
1881
|
+
);
|
|
1882
|
+
lines.push("# TYPE hoopilot_token_extraction_total counter");
|
|
1883
|
+
lines.push(
|
|
1884
|
+
`hoopilot_token_extraction_total${labels({ outcome: "extracted" })} ${this.#extraction.extracted}`
|
|
1885
|
+
);
|
|
1886
|
+
lines.push(
|
|
1887
|
+
`hoopilot_token_extraction_total${labels({ outcome: "missing" })} ${this.#extraction.missing}`
|
|
1888
|
+
);
|
|
1804
1889
|
lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
|
|
1805
1890
|
lines.push("# TYPE hoopilot_request_duration_seconds histogram");
|
|
1806
1891
|
for (const [route, entry] of this.#durations) {
|
|
@@ -1818,10 +1903,43 @@ var MetricsRegistry = class {
|
|
|
1818
1903
|
lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
|
|
1819
1904
|
lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
|
|
1820
1905
|
}
|
|
1906
|
+
this.#renderGithubRateLimit(lines);
|
|
1821
1907
|
this.#renderCopilotQuota(lines);
|
|
1822
1908
|
return `${lines.join("\n")}
|
|
1823
1909
|
`;
|
|
1824
1910
|
}
|
|
1911
|
+
#renderGithubRateLimit(lines) {
|
|
1912
|
+
const entries = [...this.#githubRateLimit.values()];
|
|
1913
|
+
if (entries.length === 0) {
|
|
1914
|
+
return;
|
|
1915
|
+
}
|
|
1916
|
+
const gauge = (suffix, help, pick) => {
|
|
1917
|
+
const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
|
|
1918
|
+
if (present.length === 0) {
|
|
1919
|
+
return;
|
|
1920
|
+
}
|
|
1921
|
+
lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
|
|
1922
|
+
lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
|
|
1923
|
+
for (const rateLimit of present) {
|
|
1924
|
+
lines.push(
|
|
1925
|
+
`hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
|
|
1926
|
+
);
|
|
1927
|
+
}
|
|
1928
|
+
};
|
|
1929
|
+
gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
|
|
1930
|
+
gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
|
|
1931
|
+
gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
|
|
1932
|
+
gauge(
|
|
1933
|
+
"reset_timestamp_seconds",
|
|
1934
|
+
"Unix epoch when the GitHub REST API window resets.",
|
|
1935
|
+
(r) => r.resetEpochSeconds
|
|
1936
|
+
);
|
|
1937
|
+
gauge(
|
|
1938
|
+
"retry_after_seconds",
|
|
1939
|
+
"Seconds to wait after a GitHub secondary-limit response.",
|
|
1940
|
+
(r) => r.retryAfterSeconds
|
|
1941
|
+
);
|
|
1942
|
+
}
|
|
1825
1943
|
#renderCopilotQuota(lines) {
|
|
1826
1944
|
const usage = this.#copilotQuota;
|
|
1827
1945
|
if (!usage) {
|
|
@@ -1923,23 +2041,25 @@ var MetricsRegistry = class {
|
|
|
1923
2041
|
}
|
|
1924
2042
|
}
|
|
1925
2043
|
};
|
|
1926
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal) {
|
|
2044
|
+
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
1927
2045
|
const body = response.body;
|
|
1928
2046
|
if (!body) {
|
|
1929
2047
|
return response;
|
|
1930
2048
|
}
|
|
1931
2049
|
const [clientBranch, observerBranch] = body.tee();
|
|
1932
2050
|
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
1933
|
-
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(
|
|
1934
|
-
|
|
2051
|
+
void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal, onOutcome).catch(
|
|
2052
|
+
() => {
|
|
2053
|
+
}
|
|
2054
|
+
);
|
|
1935
2055
|
return new Response(clientBranch, {
|
|
1936
2056
|
headers: response.headers,
|
|
1937
2057
|
status: response.status,
|
|
1938
2058
|
statusText: response.statusText
|
|
1939
2059
|
});
|
|
1940
2060
|
}
|
|
1941
|
-
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
1942
|
-
const accumulator = createUsageAccumulator(fallbackModel, onUsage);
|
|
2061
|
+
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
2062
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
1943
2063
|
if (isSse) {
|
|
1944
2064
|
for (const line of text.split(/\r?\n/)) {
|
|
1945
2065
|
considerSseLine(line, accumulator.consider);
|
|
@@ -1952,7 +2072,7 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage) {
|
|
|
1952
2072
|
}
|
|
1953
2073
|
accumulator.finish();
|
|
1954
2074
|
}
|
|
1955
|
-
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
2075
|
+
async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
1956
2076
|
const reader = stream.getReader();
|
|
1957
2077
|
const onAbort = () => {
|
|
1958
2078
|
reader.cancel().catch(() => {
|
|
@@ -1965,7 +2085,12 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
1965
2085
|
signal?.addEventListener("abort", onAbort, { once: true });
|
|
1966
2086
|
}
|
|
1967
2087
|
const decoder = new TextDecoder();
|
|
1968
|
-
const
|
|
2088
|
+
const guardedOutcome = onOutcome ? (extracted) => {
|
|
2089
|
+
if (!signal?.aborted) {
|
|
2090
|
+
onOutcome(extracted);
|
|
2091
|
+
}
|
|
2092
|
+
} : void 0;
|
|
2093
|
+
const accumulator = createUsageAccumulator(fallbackModel, onUsage, guardedOutcome);
|
|
1969
2094
|
let buffer = "";
|
|
1970
2095
|
let bufferedBytes = 0;
|
|
1971
2096
|
let overflowed = false;
|
|
@@ -2013,7 +2138,7 @@ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
|
|
|
2013
2138
|
}
|
|
2014
2139
|
accumulator.finish();
|
|
2015
2140
|
}
|
|
2016
|
-
function createUsageAccumulator(fallbackModel, onUsage) {
|
|
2141
|
+
function createUsageAccumulator(fallbackModel, onUsage, onOutcome) {
|
|
2017
2142
|
let model = fallbackModel;
|
|
2018
2143
|
let usage;
|
|
2019
2144
|
return {
|
|
@@ -2032,6 +2157,7 @@ function createUsageAccumulator(fallbackModel, onUsage) {
|
|
|
2032
2157
|
if (usage) {
|
|
2033
2158
|
onUsage(model, usage);
|
|
2034
2159
|
}
|
|
2160
|
+
onOutcome?.(usage !== void 0);
|
|
2035
2161
|
}
|
|
2036
2162
|
};
|
|
2037
2163
|
}
|
|
@@ -2062,6 +2188,37 @@ function modelText(value) {
|
|
|
2062
2188
|
function nonNegative(value) {
|
|
2063
2189
|
return Number.isFinite(value) && value > 0 ? value : 0;
|
|
2064
2190
|
}
|
|
2191
|
+
function cleanLabel(value) {
|
|
2192
|
+
let result = "";
|
|
2193
|
+
for (const char of value) {
|
|
2194
|
+
const code = char.charCodeAt(0);
|
|
2195
|
+
if (code > 31 && code !== 127) {
|
|
2196
|
+
result += char;
|
|
2197
|
+
}
|
|
2198
|
+
}
|
|
2199
|
+
return result.trim();
|
|
2200
|
+
}
|
|
2201
|
+
function toRateLimitSnapshot(rateLimit) {
|
|
2202
|
+
const snapshot = {
|
|
2203
|
+
observedAt: new Date(rateLimit.observedAtMs).toISOString()
|
|
2204
|
+
};
|
|
2205
|
+
if (rateLimit.limit !== void 0) {
|
|
2206
|
+
snapshot.limit = rateLimit.limit;
|
|
2207
|
+
}
|
|
2208
|
+
if (rateLimit.remaining !== void 0) {
|
|
2209
|
+
snapshot.remaining = rateLimit.remaining;
|
|
2210
|
+
}
|
|
2211
|
+
if (rateLimit.used !== void 0) {
|
|
2212
|
+
snapshot.used = rateLimit.used;
|
|
2213
|
+
}
|
|
2214
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
2215
|
+
snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
|
|
2216
|
+
}
|
|
2217
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
2218
|
+
snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
|
|
2219
|
+
}
|
|
2220
|
+
return snapshot;
|
|
2221
|
+
}
|
|
2065
2222
|
function labelKey(...parts) {
|
|
2066
2223
|
return parts.join(LABEL_SEPARATOR);
|
|
2067
2224
|
}
|
|
@@ -2129,6 +2286,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2129
2286
|
const metrics = options.metrics ?? new MetricsRegistry();
|
|
2130
2287
|
const readUsage = createUsageReader(client, metrics);
|
|
2131
2288
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
2289
|
+
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
2132
2290
|
const streamingProxyMode = resolveStreamingProxyMode(options);
|
|
2133
2291
|
const bufferProxyBodies = shouldBufferProxyBodies(streamingProxyMode);
|
|
2134
2292
|
return async (request) => {
|
|
@@ -2194,6 +2352,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2194
2352
|
client,
|
|
2195
2353
|
metrics,
|
|
2196
2354
|
recordTokens,
|
|
2355
|
+
recordExtraction,
|
|
2197
2356
|
request,
|
|
2198
2357
|
requestLogger,
|
|
2199
2358
|
bufferProxyBodies
|
|
@@ -2209,6 +2368,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2209
2368
|
client,
|
|
2210
2369
|
metrics,
|
|
2211
2370
|
recordTokens,
|
|
2371
|
+
recordExtraction,
|
|
2212
2372
|
request,
|
|
2213
2373
|
requestLogger,
|
|
2214
2374
|
bufferProxyBodies
|
|
@@ -2221,6 +2381,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2221
2381
|
client,
|
|
2222
2382
|
metrics,
|
|
2223
2383
|
recordTokens,
|
|
2384
|
+
recordExtraction,
|
|
2224
2385
|
request,
|
|
2225
2386
|
requestLogger,
|
|
2226
2387
|
bufferProxyBodies
|
|
@@ -2229,7 +2390,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
2229
2390
|
}
|
|
2230
2391
|
if (request.method === "POST" && apiPath === "/v1/responses/compact") {
|
|
2231
2392
|
return finish(
|
|
2232
|
-
await handleResponsesCompact(
|
|
2393
|
+
await handleResponsesCompact(
|
|
2394
|
+
client,
|
|
2395
|
+
metrics,
|
|
2396
|
+
recordTokens,
|
|
2397
|
+
recordExtraction,
|
|
2398
|
+
request,
|
|
2399
|
+
requestLogger
|
|
2400
|
+
)
|
|
2233
2401
|
);
|
|
2234
2402
|
}
|
|
2235
2403
|
if (request.method === "POST" && apiPath === "/v1/responses") {
|
|
@@ -2238,6 +2406,7 @@ function createHoopilotHandler(options = {}) {
|
|
|
2238
2406
|
client,
|
|
2239
2407
|
metrics,
|
|
2240
2408
|
recordTokens,
|
|
2409
|
+
recordExtraction,
|
|
2241
2410
|
request,
|
|
2242
2411
|
requestLogger,
|
|
2243
2412
|
bufferProxyBodies
|
|
@@ -2314,7 +2483,7 @@ function startHoopilotServer(options = {}) {
|
|
|
2314
2483
|
url: `http://${urlHost(host)}:${server.port}`
|
|
2315
2484
|
};
|
|
2316
2485
|
}
|
|
2317
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2486
|
+
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2318
2487
|
const anthropicRequest = await readJson(request);
|
|
2319
2488
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
2320
2489
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -2327,12 +2496,18 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2327
2496
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2328
2497
|
if (bufferProxyBodies) {
|
|
2329
2498
|
const text = await upstream.text();
|
|
2330
|
-
recordResponseTextUsage(text, true, model, recordTokens);
|
|
2499
|
+
recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
|
|
2331
2500
|
return proxyResponse(
|
|
2332
2501
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
2333
2502
|
);
|
|
2334
2503
|
}
|
|
2335
|
-
const observed = observeResponseUsage(
|
|
2504
|
+
const observed = observeResponseUsage(
|
|
2505
|
+
upstream,
|
|
2506
|
+
model,
|
|
2507
|
+
recordTokens,
|
|
2508
|
+
request.signal,
|
|
2509
|
+
recordExtraction
|
|
2510
|
+
);
|
|
2336
2511
|
if (!observed.body) {
|
|
2337
2512
|
return proxyResponse(observed);
|
|
2338
2513
|
}
|
|
@@ -2350,6 +2525,7 @@ async function handleAnthropicMessages(client, metrics, recordTokens, request, l
|
|
|
2350
2525
|
const responseModel = typeof body.model === "string" ? body.model.trim() : "";
|
|
2351
2526
|
recordTokens(responseModel || model, usage);
|
|
2352
2527
|
}
|
|
2528
|
+
recordExtraction(usage !== void 0);
|
|
2353
2529
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
2354
2530
|
}
|
|
2355
2531
|
function handleAnthropicCountTokens(body) {
|
|
@@ -2375,7 +2551,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
2375
2551
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
2376
2552
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
2377
2553
|
}
|
|
2378
|
-
async function handleChatCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2554
|
+
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2379
2555
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
2380
2556
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
2381
2557
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -2390,11 +2566,12 @@ async function handleChatCompletions(client, metrics, recordTokens, request, log
|
|
|
2390
2566
|
model,
|
|
2391
2567
|
recordTokens,
|
|
2392
2568
|
request.signal,
|
|
2393
|
-
bufferProxyBodies
|
|
2569
|
+
bufferProxyBodies,
|
|
2570
|
+
recordExtraction
|
|
2394
2571
|
)
|
|
2395
2572
|
);
|
|
2396
2573
|
}
|
|
2397
|
-
async function handleCompletions(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2574
|
+
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2398
2575
|
const body = await readJson(request);
|
|
2399
2576
|
const upstream = await client.chatCompletions(
|
|
2400
2577
|
completionsRequestToChatCompletion(body),
|
|
@@ -2409,7 +2586,7 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2409
2586
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
2410
2587
|
if (bufferProxyBodies) {
|
|
2411
2588
|
const upstreamText = await upstream.text();
|
|
2412
|
-
recordResponseTextUsage(upstreamText, true, model, recordTokens);
|
|
2589
|
+
recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
|
|
2413
2590
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
2414
2591
|
return proxyResponse(responseFromText(upstream, text));
|
|
2415
2592
|
}
|
|
@@ -2422,7 +2599,8 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2422
2599
|
}),
|
|
2423
2600
|
model,
|
|
2424
2601
|
recordTokens,
|
|
2425
|
-
request.signal
|
|
2602
|
+
request.signal,
|
|
2603
|
+
recordExtraction
|
|
2426
2604
|
)
|
|
2427
2605
|
);
|
|
2428
2606
|
}
|
|
@@ -2432,9 +2610,10 @@ async function handleCompletions(client, metrics, recordTokens, request, logger,
|
|
|
2432
2610
|
const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
|
|
2433
2611
|
recordTokens(responseModel || model, usage);
|
|
2434
2612
|
}
|
|
2613
|
+
recordExtraction(usage !== void 0);
|
|
2435
2614
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
2436
2615
|
}
|
|
2437
|
-
async function handleResponses(client, metrics, recordTokens, request, logger, bufferProxyBodies) {
|
|
2616
|
+
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
2438
2617
|
const body = await readJsonText(request);
|
|
2439
2618
|
const upstream = await client.responses(body, request.signal);
|
|
2440
2619
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -2449,11 +2628,12 @@ async function handleResponses(client, metrics, recordTokens, request, logger, b
|
|
|
2449
2628
|
model,
|
|
2450
2629
|
recordTokens,
|
|
2451
2630
|
request.signal,
|
|
2452
|
-
bufferProxyBodies
|
|
2631
|
+
bufferProxyBodies,
|
|
2632
|
+
recordExtraction
|
|
2453
2633
|
)
|
|
2454
2634
|
);
|
|
2455
2635
|
}
|
|
2456
|
-
async function handleResponsesCompact(client, metrics, recordTokens, request, logger) {
|
|
2636
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
2457
2637
|
const body = await readJson(request);
|
|
2458
2638
|
const upstream = await client.responses(
|
|
2459
2639
|
JSON.stringify({ ...body, stream: false }),
|
|
@@ -2466,17 +2646,23 @@ async function handleResponsesCompact(client, metrics, recordTokens, request, lo
|
|
|
2466
2646
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
2467
2647
|
const isSse = isStreamingResponse(upstream);
|
|
2468
2648
|
const text = await upstream.text();
|
|
2469
|
-
recordResponseTextUsage(
|
|
2649
|
+
recordResponseTextUsage(
|
|
2650
|
+
text,
|
|
2651
|
+
isSse,
|
|
2652
|
+
normalizeRequestedModel(body.model),
|
|
2653
|
+
recordTokens,
|
|
2654
|
+
recordExtraction
|
|
2655
|
+
);
|
|
2470
2656
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
2471
2657
|
}
|
|
2472
|
-
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody) {
|
|
2658
|
+
async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
|
|
2473
2659
|
const isSse = isStreamingResponse(response);
|
|
2474
2660
|
if (bufferBody && response.body) {
|
|
2475
2661
|
const text = await response.text();
|
|
2476
|
-
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens);
|
|
2662
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
2477
2663
|
return responseFromText(response, text);
|
|
2478
2664
|
}
|
|
2479
|
-
return observeResponseUsage(response, fallbackModel, recordTokens, signal);
|
|
2665
|
+
return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
|
|
2480
2666
|
}
|
|
2481
2667
|
function responseFromText(source, text) {
|
|
2482
2668
|
return new Response(text, {
|
|
@@ -2905,6 +3091,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
2905
3091
|
try {
|
|
2906
3092
|
const upstream = await client.usage(signal);
|
|
2907
3093
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
3094
|
+
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
2908
3095
|
if (!upstream.ok) {
|
|
2909
3096
|
return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
|
|
2910
3097
|
}
|
|
@@ -3741,6 +3928,7 @@ async function runUsage(options = {}) {
|
|
|
3741
3928
|
}
|
|
3742
3929
|
throw new Error(message);
|
|
3743
3930
|
}
|
|
3931
|
+
const rateLimit = parseRateLimitHeaders(response.headers);
|
|
3744
3932
|
const usage = normalizeCopilotUsage(await response.json().catch(() => ({})));
|
|
3745
3933
|
logger.debug(
|
|
3746
3934
|
{ event: "usage.fetch.succeeded", plan: usage.plan },
|
|
@@ -3749,8 +3937,30 @@ async function runUsage(options = {}) {
|
|
|
3749
3937
|
for (const line of formatCopilotUsage(usage)) {
|
|
3750
3938
|
console.log(line);
|
|
3751
3939
|
}
|
|
3940
|
+
if (rateLimit) {
|
|
3941
|
+
console.log(formatGithubRateLimit(rateLimit));
|
|
3942
|
+
}
|
|
3752
3943
|
return usage;
|
|
3753
3944
|
}
|
|
3945
|
+
function formatGithubRateLimit(rateLimit) {
|
|
3946
|
+
const parts = [];
|
|
3947
|
+
if (rateLimit.remaining !== void 0 && rateLimit.limit !== void 0) {
|
|
3948
|
+
parts.push(`${rateLimit.remaining}/${rateLimit.limit} requests remaining`);
|
|
3949
|
+
} else if (rateLimit.remaining !== void 0) {
|
|
3950
|
+
parts.push(`${rateLimit.remaining} requests remaining`);
|
|
3951
|
+
} else if (rateLimit.used !== void 0) {
|
|
3952
|
+
parts.push(`${rateLimit.used} requests used`);
|
|
3953
|
+
}
|
|
3954
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
3955
|
+
parts.push(`resets ${new Date(rateLimit.resetEpochSeconds * 1e3).toISOString()}`);
|
|
3956
|
+
}
|
|
3957
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
3958
|
+
parts.push(`retry after ${rateLimit.retryAfterSeconds}s`);
|
|
3959
|
+
}
|
|
3960
|
+
const detail = parts.length > 0 ? parts.join(", ") : "n/a";
|
|
3961
|
+
const resource = rateLimit.resource && rateLimit.resource !== "unknown" ? ` (${rateLimit.resource})` : "";
|
|
3962
|
+
return `GitHub API rate limit${resource}: ${detail}`;
|
|
3963
|
+
}
|
|
3754
3964
|
function formatCopilotUsage(usage) {
|
|
3755
3965
|
const lines = [];
|
|
3756
3966
|
if (usage.plan) {
|