npm - @openhoo/hoopilot - Versions diffs - 1.0.0 → 1.1.0 - Mend

@openhoo/hoopilot 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md CHANGED Viewed

@@ -267,14 +267,16 @@ Incoming `x-request-id` headers are preserved on responses. If a request has no
 ## Metrics and usage
-Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage.
+Hoopilot tracks token usage, request counts, and latency in memory while the server runs. It can also report your GitHub Copilot account quota and premium-request usage, plus your GitHub REST API rate-limit budget.
-- `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge, and Copilot quota gauges after `/v1/usage` has been fetched at least once. Counters reset to zero on restart, which Prometheus handles natively.
-- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why.
-- `hoopilot usage` prints your Copilot plan and quota from the command line.
+- `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters, upstream call counters, token counters by model and type, a request-duration histogram, an in-flight gauge, Copilot quota gauges, and GitHub REST API rate-limit gauges (`hoopilot_github_ratelimit_limit`, `_remaining`, `_used`, `_reset_timestamp_seconds`, `_retry_after_seconds`, labelled by `resource`) — the quota and rate-limit series appear after `/v1/usage` has been fetched at least once. Counters reset to zero on restart, which Prometheus handles natively.
+- `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub and cached for 60 seconds. If quota cannot be read, `copilot` is `null` and `copilot_error` explains why. The snapshot's `proxy.githubRateLimit` field reports the most recent GitHub REST rate-limit budget per resource (`limit`, `remaining`, `used`, `resetAt`, `retryAfterSeconds`, `observedAt`).
+- `hoopilot usage` prints your Copilot plan and quota — and, when GitHub returns them, your GitHub API rate-limit budget — from the command line.
 Token usage is read from the upstream `usage` object. For streaming chat completions, usage is only available when the client sends `stream_options: {"include_usage": true}`; Hoopilot does not inject that flag. Responses API streaming always reports usage, so streamed Responses requests are fully accounted.
+GitHub API usage is read from the `x-ratelimit-*` response headers that `api.github.com` returns on the `copilot_internal/user` quota call Hoopilot already makes, so it costs no extra request. (The Copilot completion host `api.githubcopilot.com` does not currently emit these headers, so per-completion rate-limit data is not yet available there.)
 `/metrics` and `/v1/usage` are subject to the same `HOOPILOT_API_KEY` gate as the other routes.
 ## Troubleshooting

package/dist/cli.js CHANGED Viewed

@@ -179,6 +179,38 @@ function applyGithubApiHeaders(headers, token) {
   headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
   return headers;
 }
+function parseRateLimitHeaders(headers, nowMs = Date.now()) {
+  const limit = headerInt(headers, "x-ratelimit-limit");
+  const remaining = headerInt(headers, "x-ratelimit-remaining");
+  const used = headerInt(headers, "x-ratelimit-used");
+  const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
+  const retryAfterSeconds = headerInt(headers, "retry-after");
+  if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
+    return void 0;
+  }
+  return removeUndefinedRateLimit({
+    limit,
+    observedAtMs: nowMs,
+    remaining,
+    resetEpochSeconds,
+    resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
+    retryAfterSeconds,
+    used
+  });
+}
+function headerInt(headers, name) {
+  const raw = headers.get(name);
+  if (raw === null) {
+    return void 0;
+  }
+  const value = Number.parseInt(raw.trim(), 10);
+  return Number.isFinite(value) && value >= 0 ? value : void 0;
+}
+function removeUndefinedRateLimit(rateLimit) {
+  return Object.fromEntries(
+    Object.entries(rateLimit).filter(([, value]) => value !== void 0)
+  );
+}
 var CopilotClient = class {
   #auth;
   #allowUnsafeUpstream;
@@ -1642,6 +1674,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
 var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
 var MAX_TRACKED_MODELS = 200;
 var MAX_MODEL_LABEL_LENGTH = 200;
+var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
 var LABEL_SEPARATOR = "";
 var UNKNOWN_MODEL = "unknown";
 function emptyModelTotals() {
@@ -1655,6 +1688,7 @@ var MetricsRegistry = class {
   #tokens = /* @__PURE__ */ new Map();
   #upstream = /* @__PURE__ */ new Map();
   #copilotQuota;
+  #githubRateLimit = /* @__PURE__ */ new Map();
   constructor(options = {}) {
     this.#startedAtMs = (options.now ?? Date.now)();
   }
@@ -1692,17 +1726,39 @@ var MetricsRegistry = class {
   recordCopilotQuota(usage) {
     this.#copilotQuota = usage;
   }
-  // Sanitize the model into a bounded, control-char-free label. The model can
-  // originate from a client request, so cap its length, strip characters that
-  // would corrupt the exposition format, and fold overflow past the cardinality
-  // limit into UNKNOWN_MODEL to keep the series count bounded.
+  /**
+   * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
+   * A no-op when `rateLimit` is undefined (the response carried no rate-limit
+   * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
+   */
+  recordGithubRateLimit(rateLimit) {
+    if (!rateLimit) {
+      return;
+    }
+    const resource = this.#rateLimitResource(rateLimit.resource);
+    this.#githubRateLimit.set(resource, { ...rateLimit, resource });
+  }
+  // Sanitize the model into a bounded label. The model can originate from a
+  // client request, so cap its length, strip characters that would corrupt the
+  // exposition format, and fold overflow past the cardinality limit into
+  // UNKNOWN_MODEL to keep the series count bounded.
   #modelLabel(model) {
-    const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
+    const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
     if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
       return UNKNOWN_MODEL;
     }
     return cleaned;
   }
+  // The resource comes from a trusted upstream header, but clean and bound it
+  // with the same discipline as model labels: strip control characters that
+  // would corrupt the exposition format and fold overflow into "unknown".
+  #rateLimitResource(resource) {
+    const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
+    if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
+      return UNKNOWN_MODEL;
+    }
+    return cleaned;
+  }
   #observeDuration(route, seconds) {
     const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
     const entry = this.#durations.get(route) ?? {
@@ -1747,7 +1803,12 @@ var MetricsRegistry = class {
         upstreamErrors += count;
       }
     }
+    const githubRateLimit = {};
+    for (const [resource, rateLimit] of this.#githubRateLimit) {
+      githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
+    }
     return {
+      githubRateLimit,
       inFlight: this.#inFlight,
       requests: { byRoute, byStatus, total: requestsTotal },
       startedAt: new Date(this.#startedAtMs).toISOString(),
@@ -1818,10 +1879,43 @@ var MetricsRegistry = class {
       lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
       lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
     }
+    this.#renderGithubRateLimit(lines);
     this.#renderCopilotQuota(lines);
     return `${lines.join("\n")}
 `;
   }
+  #renderGithubRateLimit(lines) {
+    const entries = [...this.#githubRateLimit.values()];
+    if (entries.length === 0) {
+      return;
+    }
+    const gauge = (suffix, help, pick) => {
+      const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
+      if (present.length === 0) {
+        return;
+      }
+      lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
+      lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
+      for (const rateLimit of present) {
+        lines.push(
+          `hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
+        );
+      }
+    };
+    gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
+    gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
+    gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
+    gauge(
+      "reset_timestamp_seconds",
+      "Unix epoch when the GitHub REST API window resets.",
+      (r) => r.resetEpochSeconds
+    );
+    gauge(
+      "retry_after_seconds",
+      "Seconds to wait after a GitHub secondary-limit response.",
+      (r) => r.retryAfterSeconds
+    );
+  }
   #renderCopilotQuota(lines) {
     const usage = this.#copilotQuota;
     if (!usage) {
@@ -2062,6 +2156,37 @@ function modelText(value) {
 function nonNegative(value) {
   return Number.isFinite(value) && value > 0 ? value : 0;
 }
+function cleanLabel(value) {
+  let result = "";
+  for (const char of value) {
+    const code = char.charCodeAt(0);
+    if (code > 31 && code !== 127) {
+      result += char;
+    }
+  }
+  return result.trim();
+}
+function toRateLimitSnapshot(rateLimit) {
+  const snapshot = {
+    observedAt: new Date(rateLimit.observedAtMs).toISOString()
+  };
+  if (rateLimit.limit !== void 0) {
+    snapshot.limit = rateLimit.limit;
+  }
+  if (rateLimit.remaining !== void 0) {
+    snapshot.remaining = rateLimit.remaining;
+  }
+  if (rateLimit.used !== void 0) {
+    snapshot.used = rateLimit.used;
+  }
+  if (rateLimit.resetEpochSeconds !== void 0) {
+    snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
+  }
+  if (rateLimit.retryAfterSeconds !== void 0) {
+    snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
+  }
+  return snapshot;
+}
 function labelKey(...parts) {
   return parts.join(LABEL_SEPARATOR);
 }
@@ -2905,6 +3030,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
     try {
       const upstream = await client.usage(signal);
       metrics.recordUpstream(usagePath, upstream.ok);
+      metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
       if (!upstream.ok) {
         return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
       }
@@ -3741,6 +3867,7 @@ async function runUsage(options = {}) {
     }
     throw new Error(message);
   }
+  const rateLimit = parseRateLimitHeaders(response.headers);
   const usage = normalizeCopilotUsage(await response.json().catch(() => ({})));
   logger.debug(
     { event: "usage.fetch.succeeded", plan: usage.plan },
@@ -3749,8 +3876,30 @@ async function runUsage(options = {}) {
   for (const line of formatCopilotUsage(usage)) {
     console.log(line);
   }
+  if (rateLimit) {
+    console.log(formatGithubRateLimit(rateLimit));
+  }
   return usage;
 }
+function formatGithubRateLimit(rateLimit) {
+  const parts = [];
+  if (rateLimit.remaining !== void 0 && rateLimit.limit !== void 0) {
+    parts.push(`${rateLimit.remaining}/${rateLimit.limit} requests remaining`);
+  } else if (rateLimit.remaining !== void 0) {
+    parts.push(`${rateLimit.remaining} requests remaining`);
+  } else if (rateLimit.used !== void 0) {
+    parts.push(`${rateLimit.used} requests used`);
+  }
+  if (rateLimit.resetEpochSeconds !== void 0) {
+    parts.push(`resets ${new Date(rateLimit.resetEpochSeconds * 1e3).toISOString()}`);
+  }
+  if (rateLimit.retryAfterSeconds !== void 0) {
+    parts.push(`retry after ${rateLimit.retryAfterSeconds}s`);
+  }
+  const detail = parts.length > 0 ? parts.join(", ") : "n/a";
+  const resource = rateLimit.resource && rateLimit.resource !== "unknown" ? ` (${rateLimit.resource})` : "";
+  return `GitHub API rate limit${resource}: ${detail}`;
+}
 function formatCopilotUsage(usage) {
   const lines = [];
   if (usage.plan) {