@openhoo/hoopilot 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/cli.js +154 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +133 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +48 -1
- package/dist/index.d.ts +48 -1
- package/dist/index.js +132 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -267,14 +267,16 @@ Incoming `x-request-id` headers are preserved on responses. If a request has no
|
|
|
267
267
|
|
|
268
268
|
## Metrics and usage
|
|
269
269
|
|
|
270
|
-
Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage.
|
|
270
|
+
Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage, plus your GitHub REST API rate-limit budget.
|
|
271
271
|
|
|
272
|
-
- `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge,
|
|
273
|
-
- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why.
|
|
274
|
-
- `hoopilot usage` prints your Copilot plan and quota from the command line.
|
|
272
|
+
- `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge, Copilot quota gauges, and GitHub REST API rate-limit gauges (`hoopilot_github_ratelimit_limit`, `_remaining`, `_used`, `_reset_timestamp_seconds`, `_retry_after_seconds`, labelled by `resource`) — the quota and rate-limit series appear after `/v1/usage` has been fetched at least once. Counters reset to zero on restart, which Prometheus handles natively.
|
|
273
|
+
- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why. The snapshot's `proxy.githubRateLimit` field reports the most recent GitHub REST rate-limit budget per resource (`limit`, `remaining`, `used`, `resetAt`, `retryAfterSeconds`, `observedAt`).
|
|
274
|
+
- `hoopilot usage` prints your Copilot plan and quota — and, when GitHub returns them, your GitHub API rate-limit budget — from the command line.
|
|
275
275
|
|
|
276
276
|
Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted.
|
|
277
277
|
|
|
278
|
+
GitHub API usage is read from the `x-ratelimit-*` response headers that `api.github.com` returns on the `copilot_internal/user` quota call Hoopilot already makes, so it costs no extra request. (The Copilot completion host `api.githubcopilot.com` does not currently emit these headers, so per-completion rate-limit data is not yet available there.)
|
|
279
|
+
|
|
278
280
|
`/metrics` and `/v1/usage` are subject to the same `HOOPILOT_API_KEY` gate as the other routes.
|
|
279
281
|
|
|
280
282
|
## Troubleshooting
|
package/dist/cli.js
CHANGED
|
@@ -179,6 +179,38 @@ function applyGithubApiHeaders(headers, token) {
|
|
|
179
179
|
headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
|
|
180
180
|
return headers;
|
|
181
181
|
}
|
|
182
|
+
function parseRateLimitHeaders(headers, nowMs = Date.now()) {
|
|
183
|
+
const limit = headerInt(headers, "x-ratelimit-limit");
|
|
184
|
+
const remaining = headerInt(headers, "x-ratelimit-remaining");
|
|
185
|
+
const used = headerInt(headers, "x-ratelimit-used");
|
|
186
|
+
const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
|
|
187
|
+
const retryAfterSeconds = headerInt(headers, "retry-after");
|
|
188
|
+
if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
|
|
189
|
+
return void 0;
|
|
190
|
+
}
|
|
191
|
+
return removeUndefinedRateLimit({
|
|
192
|
+
limit,
|
|
193
|
+
observedAtMs: nowMs,
|
|
194
|
+
remaining,
|
|
195
|
+
resetEpochSeconds,
|
|
196
|
+
resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
|
|
197
|
+
retryAfterSeconds,
|
|
198
|
+
used
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
function headerInt(headers, name) {
|
|
202
|
+
const raw = headers.get(name);
|
|
203
|
+
if (raw === null) {
|
|
204
|
+
return void 0;
|
|
205
|
+
}
|
|
206
|
+
const value = Number.parseInt(raw.trim(), 10);
|
|
207
|
+
return Number.isFinite(value) && value >= 0 ? value : void 0;
|
|
208
|
+
}
|
|
209
|
+
function removeUndefinedRateLimit(rateLimit) {
|
|
210
|
+
return Object.fromEntries(
|
|
211
|
+
Object.entries(rateLimit).filter(([, value]) => value !== void 0)
|
|
212
|
+
);
|
|
213
|
+
}
|
|
182
214
|
var CopilotClient = class {
|
|
183
215
|
#auth;
|
|
184
216
|
#allowUnsafeUpstream;
|
|
@@ -1642,6 +1674,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
|
1642
1674
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
1643
1675
|
var MAX_TRACKED_MODELS = 200;
|
|
1644
1676
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
1677
|
+
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
1645
1678
|
var LABEL_SEPARATOR = "";
|
|
1646
1679
|
var UNKNOWN_MODEL = "unknown";
|
|
1647
1680
|
function emptyModelTotals() {
|
|
@@ -1655,6 +1688,7 @@ var MetricsRegistry = class {
|
|
|
1655
1688
|
#tokens = /* @__PURE__ */ new Map();
|
|
1656
1689
|
#upstream = /* @__PURE__ */ new Map();
|
|
1657
1690
|
#copilotQuota;
|
|
1691
|
+
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
1658
1692
|
constructor(options = {}) {
|
|
1659
1693
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
1660
1694
|
}
|
|
@@ -1692,17 +1726,39 @@ var MetricsRegistry = class {
|
|
|
1692
1726
|
recordCopilotQuota(usage) {
|
|
1693
1727
|
this.#copilotQuota = usage;
|
|
1694
1728
|
}
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1729
|
+
/**
|
|
1730
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
1731
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
1732
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
1733
|
+
*/
|
|
1734
|
+
recordGithubRateLimit(rateLimit) {
|
|
1735
|
+
if (!rateLimit) {
|
|
1736
|
+
return;
|
|
1737
|
+
}
|
|
1738
|
+
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
1739
|
+
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
1740
|
+
}
|
|
1741
|
+
// Sanitize the model into a bounded label. The model can originate from a
|
|
1742
|
+
// client request, so cap its length, strip characters that would corrupt the
|
|
1743
|
+
// exposition format, and fold overflow past the cardinality limit into
|
|
1744
|
+
// UNKNOWN_MODEL to keep the series count bounded.
|
|
1699
1745
|
#modelLabel(model) {
|
|
1700
|
-
const cleaned = model
|
|
1746
|
+
const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
1701
1747
|
if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
|
|
1702
1748
|
return UNKNOWN_MODEL;
|
|
1703
1749
|
}
|
|
1704
1750
|
return cleaned;
|
|
1705
1751
|
}
|
|
1752
|
+
// The resource comes from a trusted upstream header, but clean and bound it
|
|
1753
|
+
// with the same discipline as model labels: strip control characters that
|
|
1754
|
+
// would corrupt the exposition format and fold overflow into "unknown".
|
|
1755
|
+
#rateLimitResource(resource) {
|
|
1756
|
+
const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
1757
|
+
if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
|
|
1758
|
+
return UNKNOWN_MODEL;
|
|
1759
|
+
}
|
|
1760
|
+
return cleaned;
|
|
1761
|
+
}
|
|
1706
1762
|
#observeDuration(route, seconds) {
|
|
1707
1763
|
const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
|
|
1708
1764
|
const entry = this.#durations.get(route) ?? {
|
|
@@ -1747,7 +1803,12 @@ var MetricsRegistry = class {
|
|
|
1747
1803
|
upstreamErrors += count;
|
|
1748
1804
|
}
|
|
1749
1805
|
}
|
|
1806
|
+
const githubRateLimit = {};
|
|
1807
|
+
for (const [resource, rateLimit] of this.#githubRateLimit) {
|
|
1808
|
+
githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
|
|
1809
|
+
}
|
|
1750
1810
|
return {
|
|
1811
|
+
githubRateLimit,
|
|
1751
1812
|
inFlight: this.#inFlight,
|
|
1752
1813
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
1753
1814
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
@@ -1818,10 +1879,43 @@ var MetricsRegistry = class {
|
|
|
1818
1879
|
lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
|
|
1819
1880
|
lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
|
|
1820
1881
|
}
|
|
1882
|
+
this.#renderGithubRateLimit(lines);
|
|
1821
1883
|
this.#renderCopilotQuota(lines);
|
|
1822
1884
|
return `${lines.join("\n")}
|
|
1823
1885
|
`;
|
|
1824
1886
|
}
|
|
1887
|
+
#renderGithubRateLimit(lines) {
|
|
1888
|
+
const entries = [...this.#githubRateLimit.values()];
|
|
1889
|
+
if (entries.length === 0) {
|
|
1890
|
+
return;
|
|
1891
|
+
}
|
|
1892
|
+
const gauge = (suffix, help, pick) => {
|
|
1893
|
+
const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
|
|
1894
|
+
if (present.length === 0) {
|
|
1895
|
+
return;
|
|
1896
|
+
}
|
|
1897
|
+
lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
|
|
1898
|
+
lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
|
|
1899
|
+
for (const rateLimit of present) {
|
|
1900
|
+
lines.push(
|
|
1901
|
+
`hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
|
|
1902
|
+
);
|
|
1903
|
+
}
|
|
1904
|
+
};
|
|
1905
|
+
gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
|
|
1906
|
+
gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
|
|
1907
|
+
gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
|
|
1908
|
+
gauge(
|
|
1909
|
+
"reset_timestamp_seconds",
|
|
1910
|
+
"Unix epoch when the GitHub REST API window resets.",
|
|
1911
|
+
(r) => r.resetEpochSeconds
|
|
1912
|
+
);
|
|
1913
|
+
gauge(
|
|
1914
|
+
"retry_after_seconds",
|
|
1915
|
+
"Seconds to wait after a GitHub secondary-limit response.",
|
|
1916
|
+
(r) => r.retryAfterSeconds
|
|
1917
|
+
);
|
|
1918
|
+
}
|
|
1825
1919
|
#renderCopilotQuota(lines) {
|
|
1826
1920
|
const usage = this.#copilotQuota;
|
|
1827
1921
|
if (!usage) {
|
|
@@ -2062,6 +2156,37 @@ function modelText(value) {
|
|
|
2062
2156
|
function nonNegative(value) {
|
|
2063
2157
|
return Number.isFinite(value) && value > 0 ? value : 0;
|
|
2064
2158
|
}
|
|
2159
|
+
function cleanLabel(value) {
|
|
2160
|
+
let result = "";
|
|
2161
|
+
for (const char of value) {
|
|
2162
|
+
const code = char.charCodeAt(0);
|
|
2163
|
+
if (code > 31 && code !== 127) {
|
|
2164
|
+
result += char;
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
return result.trim();
|
|
2168
|
+
}
|
|
2169
|
+
function toRateLimitSnapshot(rateLimit) {
|
|
2170
|
+
const snapshot = {
|
|
2171
|
+
observedAt: new Date(rateLimit.observedAtMs).toISOString()
|
|
2172
|
+
};
|
|
2173
|
+
if (rateLimit.limit !== void 0) {
|
|
2174
|
+
snapshot.limit = rateLimit.limit;
|
|
2175
|
+
}
|
|
2176
|
+
if (rateLimit.remaining !== void 0) {
|
|
2177
|
+
snapshot.remaining = rateLimit.remaining;
|
|
2178
|
+
}
|
|
2179
|
+
if (rateLimit.used !== void 0) {
|
|
2180
|
+
snapshot.used = rateLimit.used;
|
|
2181
|
+
}
|
|
2182
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
2183
|
+
snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
|
|
2184
|
+
}
|
|
2185
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
2186
|
+
snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
|
|
2187
|
+
}
|
|
2188
|
+
return snapshot;
|
|
2189
|
+
}
|
|
2065
2190
|
function labelKey(...parts) {
|
|
2066
2191
|
return parts.join(LABEL_SEPARATOR);
|
|
2067
2192
|
}
|
|
@@ -2905,6 +3030,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
2905
3030
|
try {
|
|
2906
3031
|
const upstream = await client.usage(signal);
|
|
2907
3032
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
3033
|
+
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
2908
3034
|
if (!upstream.ok) {
|
|
2909
3035
|
return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
|
|
2910
3036
|
}
|
|
@@ -3741,6 +3867,7 @@ async function runUsage(options = {}) {
|
|
|
3741
3867
|
}
|
|
3742
3868
|
throw new Error(message);
|
|
3743
3869
|
}
|
|
3870
|
+
const rateLimit = parseRateLimitHeaders(response.headers);
|
|
3744
3871
|
const usage = normalizeCopilotUsage(await response.json().catch(() => ({})));
|
|
3745
3872
|
logger.debug(
|
|
3746
3873
|
{ event: "usage.fetch.succeeded", plan: usage.plan },
|
|
@@ -3749,8 +3876,30 @@ async function runUsage(options = {}) {
|
|
|
3749
3876
|
for (const line of formatCopilotUsage(usage)) {
|
|
3750
3877
|
console.log(line);
|
|
3751
3878
|
}
|
|
3879
|
+
if (rateLimit) {
|
|
3880
|
+
console.log(formatGithubRateLimit(rateLimit));
|
|
3881
|
+
}
|
|
3752
3882
|
return usage;
|
|
3753
3883
|
}
|
|
3884
|
+
function formatGithubRateLimit(rateLimit) {
|
|
3885
|
+
const parts = [];
|
|
3886
|
+
if (rateLimit.remaining !== void 0 && rateLimit.limit !== void 0) {
|
|
3887
|
+
parts.push(`${rateLimit.remaining}/${rateLimit.limit} requests remaining`);
|
|
3888
|
+
} else if (rateLimit.remaining !== void 0) {
|
|
3889
|
+
parts.push(`${rateLimit.remaining} requests remaining`);
|
|
3890
|
+
} else if (rateLimit.used !== void 0) {
|
|
3891
|
+
parts.push(`${rateLimit.used} requests used`);
|
|
3892
|
+
}
|
|
3893
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
3894
|
+
parts.push(`resets ${new Date(rateLimit.resetEpochSeconds * 1e3).toISOString()}`);
|
|
3895
|
+
}
|
|
3896
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
3897
|
+
parts.push(`retry after ${rateLimit.retryAfterSeconds}s`);
|
|
3898
|
+
}
|
|
3899
|
+
const detail = parts.length > 0 ? parts.join(", ") : "n/a";
|
|
3900
|
+
const resource = rateLimit.resource && rateLimit.resource !== "unknown" ? ` (${rateLimit.resource})` : "";
|
|
3901
|
+
return `GitHub API rate limit${resource}: ${detail}`;
|
|
3902
|
+
}
|
|
3754
3903
|
function formatCopilotUsage(usage) {
|
|
3755
3904
|
const lines = [];
|
|
3756
3905
|
if (usage.plan) {
|