@openhoo/hoopilot 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -267,14 +267,16 @@ Incoming `x-request-id` headers are preserved on responses. If a request has no
267
267
 
268
268
  ## Metrics and usage
269
269
 
270
- Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage.
270
+ Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage, plus your GitHub REST API rate-limit budget.
271
271
 
272
- - `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge, and Copilot quota gauges after `/v1/usage` has been fetched at least once. Counters reset to zero on restart, which Prometheus handles natively.
273
- - `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why.
274
- - `hoopilot usage` prints your Copilot plan and quota from the command line.
272
+ - `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge, Copilot quota gauges, and GitHub REST API rate-limit gauges (`hoopilot_github_ratelimit_limit`, `_remaining`, `_used`, `_reset_timestamp_seconds`, `_retry_after_seconds`, labelled by `resource`) — the quota and rate-limit series appear after `/v1/usage` has been fetched at least once. Counters reset to zero on restart, which Prometheus handles natively.
273
+ - `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why. The snapshot's `proxy.githubRateLimit` field reports the most recent GitHub REST rate-limit budget per resource (`limit`, `remaining`, `used`, `resetAt`, `retryAfterSeconds`, `observedAt`).
274
+ - `hoopilot usage` prints your Copilot plan and quota — and, when GitHub returns them, your GitHub API rate-limit budget — from the command line.
275
275
 
276
276
  Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted.
277
277
 
278
+ GitHub API usage is read from the `x-ratelimit-*` response headers that `api.github.com` returns on the `copilot_internal/user` quota call Hoopilot already makes, so it costs no extra request. (The Copilot completion host `api.githubcopilot.com` does not currently emit these headers, so per-completion rate-limit data is not yet available there.)
279
+
278
280
  `/metrics` and `/v1/usage` are subject to the same `HOOPILOT_API_KEY` gate as the other routes.
279
281
 
280
282
  ## Troubleshooting
package/dist/cli.js CHANGED
@@ -179,6 +179,38 @@ function applyGithubApiHeaders(headers, token) {
179
179
  headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
180
180
  return headers;
181
181
  }
182
+ function parseRateLimitHeaders(headers, nowMs = Date.now()) {
183
+ const limit = headerInt(headers, "x-ratelimit-limit");
184
+ const remaining = headerInt(headers, "x-ratelimit-remaining");
185
+ const used = headerInt(headers, "x-ratelimit-used");
186
+ const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
187
+ const retryAfterSeconds = headerInt(headers, "retry-after");
188
+ if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
189
+ return void 0;
190
+ }
191
+ return removeUndefinedRateLimit({
192
+ limit,
193
+ observedAtMs: nowMs,
194
+ remaining,
195
+ resetEpochSeconds,
196
+ resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
197
+ retryAfterSeconds,
198
+ used
199
+ });
200
+ }
201
+ function headerInt(headers, name) {
202
+ const raw = headers.get(name);
203
+ if (raw === null) {
204
+ return void 0;
205
+ }
206
+ const value = Number.parseInt(raw.trim(), 10);
207
+ return Number.isFinite(value) && value >= 0 ? value : void 0;
208
+ }
209
+ function removeUndefinedRateLimit(rateLimit) {
210
+ return Object.fromEntries(
211
+ Object.entries(rateLimit).filter(([, value]) => value !== void 0)
212
+ );
213
+ }
182
214
  var CopilotClient = class {
183
215
  #auth;
184
216
  #allowUnsafeUpstream;
@@ -1642,6 +1674,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1642
1674
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1643
1675
  var MAX_TRACKED_MODELS = 200;
1644
1676
  var MAX_MODEL_LABEL_LENGTH = 200;
1677
+ var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
1645
1678
  var LABEL_SEPARATOR = "";
1646
1679
  var UNKNOWN_MODEL = "unknown";
1647
1680
  function emptyModelTotals() {
@@ -1655,6 +1688,7 @@ var MetricsRegistry = class {
1655
1688
  #tokens = /* @__PURE__ */ new Map();
1656
1689
  #upstream = /* @__PURE__ */ new Map();
1657
1690
  #copilotQuota;
1691
+ #githubRateLimit = /* @__PURE__ */ new Map();
1658
1692
  constructor(options = {}) {
1659
1693
  this.#startedAtMs = (options.now ?? Date.now)();
1660
1694
  }
@@ -1692,17 +1726,39 @@ var MetricsRegistry = class {
1692
1726
  recordCopilotQuota(usage) {
1693
1727
  this.#copilotQuota = usage;
1694
1728
  }
1695
- // Sanitize the model into a bounded, control-char-free label. The model can
1696
- // originate from a client request, so cap its length, strip characters that
1697
- // would corrupt the exposition format, and fold overflow past the cardinality
1698
- // limit into UNKNOWN_MODEL to keep the series count bounded.
1729
+ /**
1730
+ * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
1731
+ * A no-op when `rateLimit` is undefined (the response carried no rate-limit
1732
+ * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
1733
+ */
1734
+ recordGithubRateLimit(rateLimit) {
1735
+ if (!rateLimit) {
1736
+ return;
1737
+ }
1738
+ const resource = this.#rateLimitResource(rateLimit.resource);
1739
+ this.#githubRateLimit.set(resource, { ...rateLimit, resource });
1740
+ }
1741
+ // Sanitize the model into a bounded label. The model can originate from a
1742
+ // client request, so cap its length, strip characters that would corrupt the
1743
+ // exposition format, and fold overflow past the cardinality limit into
1744
+ // UNKNOWN_MODEL to keep the series count bounded.
1699
1745
  #modelLabel(model) {
1700
- const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
1746
+ const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
1701
1747
  if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
1702
1748
  return UNKNOWN_MODEL;
1703
1749
  }
1704
1750
  return cleaned;
1705
1751
  }
1752
+ // The resource comes from a trusted upstream header, but clean and bound it
1753
+ // with the same discipline as model labels: strip control characters that
1754
+ // would corrupt the exposition format and fold overflow into "unknown".
1755
+ #rateLimitResource(resource) {
1756
+ const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
1757
+ if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
1758
+ return UNKNOWN_MODEL;
1759
+ }
1760
+ return cleaned;
1761
+ }
1706
1762
  #observeDuration(route, seconds) {
1707
1763
  const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
1708
1764
  const entry = this.#durations.get(route) ?? {
@@ -1747,7 +1803,12 @@ var MetricsRegistry = class {
1747
1803
  upstreamErrors += count;
1748
1804
  }
1749
1805
  }
1806
+ const githubRateLimit = {};
1807
+ for (const [resource, rateLimit] of this.#githubRateLimit) {
1808
+ githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
1809
+ }
1750
1810
  return {
1811
+ githubRateLimit,
1751
1812
  inFlight: this.#inFlight,
1752
1813
  requests: { byRoute, byStatus, total: requestsTotal },
1753
1814
  startedAt: new Date(this.#startedAtMs).toISOString(),
@@ -1818,10 +1879,43 @@ var MetricsRegistry = class {
1818
1879
  lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
1819
1880
  lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
1820
1881
  }
1882
+ this.#renderGithubRateLimit(lines);
1821
1883
  this.#renderCopilotQuota(lines);
1822
1884
  return `${lines.join("\n")}
1823
1885
  `;
1824
1886
  }
1887
+ #renderGithubRateLimit(lines) {
1888
+ const entries = [...this.#githubRateLimit.values()];
1889
+ if (entries.length === 0) {
1890
+ return;
1891
+ }
1892
+ const gauge = (suffix, help, pick) => {
1893
+ const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
1894
+ if (present.length === 0) {
1895
+ return;
1896
+ }
1897
+ lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
1898
+ lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
1899
+ for (const rateLimit of present) {
1900
+ lines.push(
1901
+ `hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
1902
+ );
1903
+ }
1904
+ };
1905
+ gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
1906
+ gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
1907
+ gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
1908
+ gauge(
1909
+ "reset_timestamp_seconds",
1910
+ "Unix epoch when the GitHub REST API window resets.",
1911
+ (r) => r.resetEpochSeconds
1912
+ );
1913
+ gauge(
1914
+ "retry_after_seconds",
1915
+ "Seconds to wait after a GitHub secondary-limit response.",
1916
+ (r) => r.retryAfterSeconds
1917
+ );
1918
+ }
1825
1919
  #renderCopilotQuota(lines) {
1826
1920
  const usage = this.#copilotQuota;
1827
1921
  if (!usage) {
@@ -2062,6 +2156,37 @@ function modelText(value) {
2062
2156
  function nonNegative(value) {
2063
2157
  return Number.isFinite(value) && value > 0 ? value : 0;
2064
2158
  }
2159
+ function cleanLabel(value) {
2160
+ let result = "";
2161
+ for (const char of value) {
2162
+ const code = char.charCodeAt(0);
2163
+ if (code > 31 && code !== 127) {
2164
+ result += char;
2165
+ }
2166
+ }
2167
+ return result.trim();
2168
+ }
2169
+ function toRateLimitSnapshot(rateLimit) {
2170
+ const snapshot = {
2171
+ observedAt: new Date(rateLimit.observedAtMs).toISOString()
2172
+ };
2173
+ if (rateLimit.limit !== void 0) {
2174
+ snapshot.limit = rateLimit.limit;
2175
+ }
2176
+ if (rateLimit.remaining !== void 0) {
2177
+ snapshot.remaining = rateLimit.remaining;
2178
+ }
2179
+ if (rateLimit.used !== void 0) {
2180
+ snapshot.used = rateLimit.used;
2181
+ }
2182
+ if (rateLimit.resetEpochSeconds !== void 0) {
2183
+ snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
2184
+ }
2185
+ if (rateLimit.retryAfterSeconds !== void 0) {
2186
+ snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
2187
+ }
2188
+ return snapshot;
2189
+ }
2065
2190
  function labelKey(...parts) {
2066
2191
  return parts.join(LABEL_SEPARATOR);
2067
2192
  }
@@ -2905,6 +3030,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
2905
3030
  try {
2906
3031
  const upstream = await client.usage(signal);
2907
3032
  metrics.recordUpstream(usagePath, upstream.ok);
3033
+ metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
2908
3034
  if (!upstream.ok) {
2909
3035
  return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
2910
3036
  }
@@ -3741,6 +3867,7 @@ async function runUsage(options = {}) {
3741
3867
  }
3742
3868
  throw new Error(message);
3743
3869
  }
3870
+ const rateLimit = parseRateLimitHeaders(response.headers);
3744
3871
  const usage = normalizeCopilotUsage(await response.json().catch(() => ({})));
3745
3872
  logger.debug(
3746
3873
  { event: "usage.fetch.succeeded", plan: usage.plan },
@@ -3749,8 +3876,30 @@ async function runUsage(options = {}) {
3749
3876
  for (const line of formatCopilotUsage(usage)) {
3750
3877
  console.log(line);
3751
3878
  }
3879
+ if (rateLimit) {
3880
+ console.log(formatGithubRateLimit(rateLimit));
3881
+ }
3752
3882
  return usage;
3753
3883
  }
3884
+ function formatGithubRateLimit(rateLimit) {
3885
+ const parts = [];
3886
+ if (rateLimit.remaining !== void 0 && rateLimit.limit !== void 0) {
3887
+ parts.push(`${rateLimit.remaining}/${rateLimit.limit} requests remaining`);
3888
+ } else if (rateLimit.remaining !== void 0) {
3889
+ parts.push(`${rateLimit.remaining} requests remaining`);
3890
+ } else if (rateLimit.used !== void 0) {
3891
+ parts.push(`${rateLimit.used} requests used`);
3892
+ }
3893
+ if (rateLimit.resetEpochSeconds !== void 0) {
3894
+ parts.push(`resets ${new Date(rateLimit.resetEpochSeconds * 1e3).toISOString()}`);
3895
+ }
3896
+ if (rateLimit.retryAfterSeconds !== void 0) {
3897
+ parts.push(`retry after ${rateLimit.retryAfterSeconds}s`);
3898
+ }
3899
+ const detail = parts.length > 0 ? parts.join(", ") : "n/a";
3900
+ const resource = rateLimit.resource && rateLimit.resource !== "unknown" ? ` (${rateLimit.resource})` : "";
3901
+ return `GitHub API rate limit${resource}: ${detail}`;
3902
+ }
3754
3903
  function formatCopilotUsage(usage) {
3755
3904
  const lines = [];
3756
3905
  if (usage.plan) {