npm - @openhoo/hoopilot - Versions diffs - 1.0.0 → 1.1.0 - Mend

@openhoo/hoopilot 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -21,6 +21,12 @@ declare class MetricsRegistry {
     recordUpstream(path: string, ok: boolean): void;
     /** Store the latest Copilot quota so /metrics can expose it as gauges. */
     recordCopilotQuota(usage: CopilotUsage): void;
+    /**
+     * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
+     * A no-op when `rateLimit` is undefined (the response carried no rate-limit
+     * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
+     */
+    recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
     /** A JSON-friendly view of the current counters. */
     snapshot(now?: () => number): MetricsSnapshot;
     /** Render the Prometheus text exposition format (version 0.0.4). */
@@ -149,8 +155,40 @@ interface CopilotUsage {
     quotaResetDate?: string;
     quotas: Record<string, CopilotQuota>;
 }
+/**
+ * GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
+ * `api.github.com` returns on every response. Hoopilot reads these off the
+ * `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
+ * usage is visible without spending an extra request.
+ */
+interface GithubRateLimit {
+    /** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
+    resource: string;
+    /** `x-ratelimit-limit` — maximum requests allowed in the current window. */
+    limit?: number;
+    /** `x-ratelimit-remaining` — requests left in the current window. */
+    remaining?: number;
+    /** `x-ratelimit-used` — requests already spent in the current window. */
+    used?: number;
+    /** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
+    resetEpochSeconds?: number;
+    /** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
+    retryAfterSeconds?: number;
+    /** Wall-clock epoch ms when these values were observed. */
+    observedAtMs: number;
+}
+/** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
+interface GithubRateLimitSnapshot {
+    limit?: number;
+    observedAt: string;
+    remaining?: number;
+    resetAt?: string;
+    retryAfterSeconds?: number;
+    used?: number;
+}
 /** A point-in-time JSON view of the in-process metrics. */
 interface MetricsSnapshot {
+    githubRateLimit: Record<string, GithubRateLimitSnapshot>;
     inFlight: number;
     requests: {
         byRoute: Record<string, number>;
@@ -227,6 +265,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
  * Copilot completion endpoints.
  */
 declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
+/**
+ * Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
+ * response into a {@link GithubRateLimit}. `api.github.com` returns these on
+ * every reply, so the proxy reads its GitHub API budget from the quota call it
+ * already makes — no extra request is spent. Returns undefined when the response
+ * carries no rate-limit headers (for example the Copilot completion host, which
+ * does not emit them today) so callers record nothing rather than a phantom row.
+ */
+declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
 declare class CopilotClient {
     #private;
     constructor(options?: CopilotAuthOptions);
@@ -309,4 +356,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
 declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
 declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
-export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
+export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };

package/dist/index.d.ts CHANGED Viewed

@@ -21,6 +21,12 @@ declare class MetricsRegistry {
     recordUpstream(path: string, ok: boolean): void;
     /** Store the latest Copilot quota so /metrics can expose it as gauges. */
     recordCopilotQuota(usage: CopilotUsage): void;
+    /**
+     * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
+     * A no-op when `rateLimit` is undefined (the response carried no rate-limit
+     * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
+     */
+    recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
     /** A JSON-friendly view of the current counters. */
     snapshot(now?: () => number): MetricsSnapshot;
     /** Render the Prometheus text exposition format (version 0.0.4). */
@@ -149,8 +155,40 @@ interface CopilotUsage {
     quotaResetDate?: string;
     quotas: Record<string, CopilotQuota>;
 }
+/**
+ * GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
+ * `api.github.com` returns on every response. Hoopilot reads these off the
+ * `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
+ * usage is visible without spending an extra request.
+ */
+interface GithubRateLimit {
+    /** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
+    resource: string;
+    /** `x-ratelimit-limit` — maximum requests allowed in the current window. */
+    limit?: number;
+    /** `x-ratelimit-remaining` — requests left in the current window. */
+    remaining?: number;
+    /** `x-ratelimit-used` — requests already spent in the current window. */
+    used?: number;
+    /** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
+    resetEpochSeconds?: number;
+    /** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
+    retryAfterSeconds?: number;
+    /** Wall-clock epoch ms when these values were observed. */
+    observedAtMs: number;
+}
+/** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
+interface GithubRateLimitSnapshot {
+    limit?: number;
+    observedAt: string;
+    remaining?: number;
+    resetAt?: string;
+    retryAfterSeconds?: number;
+    used?: number;
+}
 /** A point-in-time JSON view of the in-process metrics. */
 interface MetricsSnapshot {
+    githubRateLimit: Record<string, GithubRateLimitSnapshot>;
     inFlight: number;
     requests: {
         byRoute: Record<string, number>;
@@ -227,6 +265,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
  * Copilot completion endpoints.
  */
 declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
+/**
+ * Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
+ * response into a {@link GithubRateLimit}. `api.github.com` returns these on
+ * every reply, so the proxy reads its GitHub API budget from the quota call it
+ * already makes — no extra request is spent. Returns undefined when the response
+ * carries no rate-limit headers (for example the Copilot completion host, which
+ * does not emit them today) so callers record nothing rather than a phantom row.
+ */
+declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
 declare class CopilotClient {
     #private;
     constructor(options?: CopilotAuthOptions);
@@ -309,4 +356,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
 declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
 declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
-export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
+export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };

package/dist/index.js CHANGED Viewed

@@ -1757,6 +1757,38 @@ function applyGithubApiHeaders(headers, token) {
   headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
   return headers;
 }
+function parseRateLimitHeaders(headers, nowMs = Date.now()) {
+  const limit = headerInt(headers, "x-ratelimit-limit");
+  const remaining = headerInt(headers, "x-ratelimit-remaining");
+  const used = headerInt(headers, "x-ratelimit-used");
+  const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
+  const retryAfterSeconds = headerInt(headers, "retry-after");
+  if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
+    return void 0;
+  }
+  return removeUndefinedRateLimit({
+    limit,
+    observedAtMs: nowMs,
+    remaining,
+    resetEpochSeconds,
+    resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
+    retryAfterSeconds,
+    used
+  });
+}
+function headerInt(headers, name) {
+  const raw = headers.get(name);
+  if (raw === null) {
+    return void 0;
+  }
+  const value = Number.parseInt(raw.trim(), 10);
+  return Number.isFinite(value) && value >= 0 ? value : void 0;
+}
+function removeUndefinedRateLimit(rateLimit) {
+  return Object.fromEntries(
+    Object.entries(rateLimit).filter(([, value]) => value !== void 0)
+  );
+}
 var CopilotClient = class {
   #auth;
   #allowUnsafeUpstream;
@@ -2173,6 +2205,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
 var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
 var MAX_TRACKED_MODELS = 200;
 var MAX_MODEL_LABEL_LENGTH = 200;
+var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
 var LABEL_SEPARATOR = "";
 var UNKNOWN_MODEL = "unknown";
 function emptyModelTotals() {
@@ -2186,6 +2219,7 @@ var MetricsRegistry = class {
   #tokens = /* @__PURE__ */ new Map();
   #upstream = /* @__PURE__ */ new Map();
   #copilotQuota;
+  #githubRateLimit = /* @__PURE__ */ new Map();
   constructor(options = {}) {
     this.#startedAtMs = (options.now ?? Date.now)();
   }
@@ -2223,17 +2257,39 @@ var MetricsRegistry = class {
   recordCopilotQuota(usage) {
     this.#copilotQuota = usage;
   }
-  // Sanitize the model into a bounded, control-char-free label. The model can
-  // originate from a client request, so cap its length, strip characters that
-  // would corrupt the exposition format, and fold overflow past the cardinality
-  // limit into UNKNOWN_MODEL to keep the series count bounded.
+  /**
+   * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
+   * A no-op when `rateLimit` is undefined (the response carried no rate-limit
+   * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
+   */
+  recordGithubRateLimit(rateLimit) {
+    if (!rateLimit) {
+      return;
+    }
+    const resource = this.#rateLimitResource(rateLimit.resource);
+    this.#githubRateLimit.set(resource, { ...rateLimit, resource });
+  }
+  // Sanitize the model into a bounded label. The model can originate from a
+  // client request, so cap its length, strip characters that would corrupt the
+  // exposition format, and fold overflow past the cardinality limit into
+  // UNKNOWN_MODEL to keep the series count bounded.
   #modelLabel(model) {
-    const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
+    const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
     if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
       return UNKNOWN_MODEL;
     }
     return cleaned;
   }
+  // The resource comes from a trusted upstream header, but clean and bound it
+  // with the same discipline as model labels: strip control characters that
+  // would corrupt the exposition format and fold overflow into "unknown".
+  #rateLimitResource(resource) {
+    const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
+    if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
+      return UNKNOWN_MODEL;
+    }
+    return cleaned;
+  }
   #observeDuration(route, seconds) {
     const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
     const entry = this.#durations.get(route) ?? {
@@ -2278,7 +2334,12 @@ var MetricsRegistry = class {
         upstreamErrors += count;
       }
     }
+    const githubRateLimit = {};
+    for (const [resource, rateLimit] of this.#githubRateLimit) {
+      githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
+    }
     return {
+      githubRateLimit,
       inFlight: this.#inFlight,
       requests: { byRoute, byStatus, total: requestsTotal },
       startedAt: new Date(this.#startedAtMs).toISOString(),
@@ -2349,10 +2410,43 @@ var MetricsRegistry = class {
       lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
       lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
     }
+    this.#renderGithubRateLimit(lines);
     this.#renderCopilotQuota(lines);
     return `${lines.join("\n")}
 `;
   }
+  #renderGithubRateLimit(lines) {
+    const entries = [...this.#githubRateLimit.values()];
+    if (entries.length === 0) {
+      return;
+    }
+    const gauge = (suffix, help, pick) => {
+      const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
+      if (present.length === 0) {
+        return;
+      }
+      lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
+      lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
+      for (const rateLimit of present) {
+        lines.push(
+          `hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
+        );
+      }
+    };
+    gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
+    gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
+    gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
+    gauge(
+      "reset_timestamp_seconds",
+      "Unix epoch when the GitHub REST API window resets.",
+      (r) => r.resetEpochSeconds
+    );
+    gauge(
+      "retry_after_seconds",
+      "Seconds to wait after a GitHub secondary-limit response.",
+      (r) => r.retryAfterSeconds
+    );
+  }
   #renderCopilotQuota(lines) {
     const usage = this.#copilotQuota;
     if (!usage) {
@@ -2593,6 +2687,37 @@ function modelText(value) {
 function nonNegative(value) {
   return Number.isFinite(value) && value > 0 ? value : 0;
 }
+function cleanLabel(value) {
+  let result = "";
+  for (const char of value) {
+    const code = char.charCodeAt(0);
+    if (code > 31 && code !== 127) {
+      result += char;
+    }
+  }
+  return result.trim();
+}
+function toRateLimitSnapshot(rateLimit) {
+  const snapshot = {
+    observedAt: new Date(rateLimit.observedAtMs).toISOString()
+  };
+  if (rateLimit.limit !== void 0) {
+    snapshot.limit = rateLimit.limit;
+  }
+  if (rateLimit.remaining !== void 0) {
+    snapshot.remaining = rateLimit.remaining;
+  }
+  if (rateLimit.used !== void 0) {
+    snapshot.used = rateLimit.used;
+  }
+  if (rateLimit.resetEpochSeconds !== void 0) {
+    snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
+  }
+  if (rateLimit.retryAfterSeconds !== void 0) {
+    snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
+  }
+  return snapshot;
+}
 function labelKey(...parts) {
   return parts.join(LABEL_SEPARATOR);
 }
@@ -3416,6 +3541,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
     try {
       const upstream = await client.usage(signal);
       metrics.recordUpstream(usagePath, upstream.ok);
+      metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
       if (!upstream.ok) {
         return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
       }
@@ -3473,6 +3599,7 @@ export {
   observeResponseUsage,
   parseLogFormat,
   parseLogLevel,
+  parseRateLimitHeaders,
   readStoredCopilotAuth,
   responsesCompactionResult,
   responsesRequestToChatCompletion,