@openhoo/hoopilot 0.10.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -21,6 +21,12 @@ declare class MetricsRegistry {
21
21
  recordUpstream(path: string, ok: boolean): void;
22
22
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
23
23
  recordCopilotQuota(usage: CopilotUsage): void;
24
+ /**
25
+ * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
26
+ * A no-op when `rateLimit` is undefined (the response carried no rate-limit
27
+ * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
28
+ */
29
+ recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
24
30
  /** A JSON-friendly view of the current counters. */
25
31
  snapshot(now?: () => number): MetricsSnapshot;
26
32
  /** Render the Prometheus text exposition format (version 0.0.4). */
@@ -149,8 +155,40 @@ interface CopilotUsage {
149
155
  quotaResetDate?: string;
150
156
  quotas: Record<string, CopilotQuota>;
151
157
  }
158
+ /**
159
+ * GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
160
+ * `api.github.com` returns on every response. Hoopilot reads these off the
161
+ * `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
162
+ * usage is visible without spending an extra request.
163
+ */
164
+ interface GithubRateLimit {
165
+ /** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
166
+ resource: string;
167
+ /** `x-ratelimit-limit` — maximum requests allowed in the current window. */
168
+ limit?: number;
169
+ /** `x-ratelimit-remaining` — requests left in the current window. */
170
+ remaining?: number;
171
+ /** `x-ratelimit-used` — requests already spent in the current window. */
172
+ used?: number;
173
+ /** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
174
+ resetEpochSeconds?: number;
175
+ /** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
176
+ retryAfterSeconds?: number;
177
+ /** Wall-clock epoch ms when these values were observed. */
178
+ observedAtMs: number;
179
+ }
180
+ /** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
181
+ interface GithubRateLimitSnapshot {
182
+ limit?: number;
183
+ observedAt: string;
184
+ remaining?: number;
185
+ resetAt?: string;
186
+ retryAfterSeconds?: number;
187
+ used?: number;
188
+ }
152
189
  /** A point-in-time JSON view of the in-process metrics. */
153
190
  interface MetricsSnapshot {
191
+ githubRateLimit: Record<string, GithubRateLimitSnapshot>;
154
192
  inFlight: number;
155
193
  requests: {
156
194
  byRoute: Record<string, number>;
@@ -227,6 +265,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
227
265
  * Copilot completion endpoints.
228
266
  */
229
267
  declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
268
+ /**
269
+ * Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
270
+ * response into a {@link GithubRateLimit}. `api.github.com` returns these on
271
+ * every reply, so the proxy reads its GitHub API budget from the quota call it
272
+ * already makes — no extra request is spent. Returns undefined when the response
273
+ * carries no rate-limit headers (for example the Copilot completion host, which
274
+ * does not emit them today) so callers record nothing rather than a phantom row.
275
+ */
276
+ declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
230
277
  declare class CopilotClient {
231
278
  #private;
232
279
  constructor(options?: CopilotAuthOptions);
@@ -309,4 +356,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
309
356
  declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
310
357
  declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
311
358
 
312
- export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
359
+ export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
package/dist/index.d.ts CHANGED
@@ -21,6 +21,12 @@ declare class MetricsRegistry {
21
21
  recordUpstream(path: string, ok: boolean): void;
22
22
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
23
23
  recordCopilotQuota(usage: CopilotUsage): void;
24
+ /**
25
+ * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
26
+ * A no-op when `rateLimit` is undefined (the response carried no rate-limit
27
+ * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
28
+ */
29
+ recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
24
30
  /** A JSON-friendly view of the current counters. */
25
31
  snapshot(now?: () => number): MetricsSnapshot;
26
32
  /** Render the Prometheus text exposition format (version 0.0.4). */
@@ -149,8 +155,40 @@ interface CopilotUsage {
149
155
  quotaResetDate?: string;
150
156
  quotas: Record<string, CopilotQuota>;
151
157
  }
158
+ /**
159
+ * GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
160
+ * `api.github.com` returns on every response. Hoopilot reads these off the
161
+ * `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
162
+ * usage is visible without spending an extra request.
163
+ */
164
+ interface GithubRateLimit {
165
+ /** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
166
+ resource: string;
167
+ /** `x-ratelimit-limit` — maximum requests allowed in the current window. */
168
+ limit?: number;
169
+ /** `x-ratelimit-remaining` — requests left in the current window. */
170
+ remaining?: number;
171
+ /** `x-ratelimit-used` — requests already spent in the current window. */
172
+ used?: number;
173
+ /** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
174
+ resetEpochSeconds?: number;
175
+ /** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
176
+ retryAfterSeconds?: number;
177
+ /** Wall-clock epoch ms when these values were observed. */
178
+ observedAtMs: number;
179
+ }
180
+ /** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
181
+ interface GithubRateLimitSnapshot {
182
+ limit?: number;
183
+ observedAt: string;
184
+ remaining?: number;
185
+ resetAt?: string;
186
+ retryAfterSeconds?: number;
187
+ used?: number;
188
+ }
152
189
  /** A point-in-time JSON view of the in-process metrics. */
153
190
  interface MetricsSnapshot {
191
+ githubRateLimit: Record<string, GithubRateLimitSnapshot>;
154
192
  inFlight: number;
155
193
  requests: {
156
194
  byRoute: Record<string, number>;
@@ -227,6 +265,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
227
265
  * Copilot completion endpoints.
228
266
  */
229
267
  declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
268
+ /**
269
+ * Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
270
+ * response into a {@link GithubRateLimit}. `api.github.com` returns these on
271
+ * every reply, so the proxy reads its GitHub API budget from the quota call it
272
+ * already makes — no extra request is spent. Returns undefined when the response
273
+ * carries no rate-limit headers (for example the Copilot completion host, which
274
+ * does not emit them today) so callers record nothing rather than a phantom row.
275
+ */
276
+ declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
230
277
  declare class CopilotClient {
231
278
  #private;
232
279
  constructor(options?: CopilotAuthOptions);
@@ -309,4 +356,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
309
356
  declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
310
357
  declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
311
358
 
312
- export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
359
+ export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
package/dist/index.js CHANGED
@@ -1757,6 +1757,38 @@ function applyGithubApiHeaders(headers, token) {
1757
1757
  headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
1758
1758
  return headers;
1759
1759
  }
1760
+ function parseRateLimitHeaders(headers, nowMs = Date.now()) {
1761
+ const limit = headerInt(headers, "x-ratelimit-limit");
1762
+ const remaining = headerInt(headers, "x-ratelimit-remaining");
1763
+ const used = headerInt(headers, "x-ratelimit-used");
1764
+ const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
1765
+ const retryAfterSeconds = headerInt(headers, "retry-after");
1766
+ if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
1767
+ return void 0;
1768
+ }
1769
+ return removeUndefinedRateLimit({
1770
+ limit,
1771
+ observedAtMs: nowMs,
1772
+ remaining,
1773
+ resetEpochSeconds,
1774
+ resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
1775
+ retryAfterSeconds,
1776
+ used
1777
+ });
1778
+ }
1779
+ function headerInt(headers, name) {
1780
+ const raw = headers.get(name);
1781
+ if (raw === null) {
1782
+ return void 0;
1783
+ }
1784
+ const value = Number.parseInt(raw.trim(), 10);
1785
+ return Number.isFinite(value) && value >= 0 ? value : void 0;
1786
+ }
1787
+ function removeUndefinedRateLimit(rateLimit) {
1788
+ return Object.fromEntries(
1789
+ Object.entries(rateLimit).filter(([, value]) => value !== void 0)
1790
+ );
1791
+ }
1760
1792
  var CopilotClient = class {
1761
1793
  #auth;
1762
1794
  #allowUnsafeUpstream;
@@ -2173,6 +2205,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
2173
2205
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
2174
2206
  var MAX_TRACKED_MODELS = 200;
2175
2207
  var MAX_MODEL_LABEL_LENGTH = 200;
2208
+ var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
2176
2209
  var LABEL_SEPARATOR = "";
2177
2210
  var UNKNOWN_MODEL = "unknown";
2178
2211
  function emptyModelTotals() {
@@ -2186,6 +2219,7 @@ var MetricsRegistry = class {
2186
2219
  #tokens = /* @__PURE__ */ new Map();
2187
2220
  #upstream = /* @__PURE__ */ new Map();
2188
2221
  #copilotQuota;
2222
+ #githubRateLimit = /* @__PURE__ */ new Map();
2189
2223
  constructor(options = {}) {
2190
2224
  this.#startedAtMs = (options.now ?? Date.now)();
2191
2225
  }
@@ -2223,17 +2257,39 @@ var MetricsRegistry = class {
2223
2257
  recordCopilotQuota(usage) {
2224
2258
  this.#copilotQuota = usage;
2225
2259
  }
2226
- // Sanitize the model into a bounded, control-char-free label. The model can
2227
- // originate from a client request, so cap its length, strip characters that
2228
- // would corrupt the exposition format, and fold overflow past the cardinality
2229
- // limit into UNKNOWN_MODEL to keep the series count bounded.
2260
+ /**
2261
+ * Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
2262
+ * A no-op when `rateLimit` is undefined (the response carried no rate-limit
2263
+ * headers) so callers can pass {@link parseRateLimitHeaders} output directly.
2264
+ */
2265
+ recordGithubRateLimit(rateLimit) {
2266
+ if (!rateLimit) {
2267
+ return;
2268
+ }
2269
+ const resource = this.#rateLimitResource(rateLimit.resource);
2270
+ this.#githubRateLimit.set(resource, { ...rateLimit, resource });
2271
+ }
2272
+ // Sanitize the model into a bounded label. The model can originate from a
2273
+ // client request, so cap its length, strip characters that would corrupt the
2274
+ // exposition format, and fold overflow past the cardinality limit into
2275
+ // UNKNOWN_MODEL to keep the series count bounded.
2230
2276
  #modelLabel(model) {
2231
- const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2277
+ const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2232
2278
  if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
2233
2279
  return UNKNOWN_MODEL;
2234
2280
  }
2235
2281
  return cleaned;
2236
2282
  }
2283
+ // The resource comes from a trusted upstream header, but clean and bound it
2284
+ // with the same discipline as model labels: strip control characters that
2285
+ // would corrupt the exposition format and fold overflow into "unknown".
2286
+ #rateLimitResource(resource) {
2287
+ const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
2288
+ if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
2289
+ return UNKNOWN_MODEL;
2290
+ }
2291
+ return cleaned;
2292
+ }
2237
2293
  #observeDuration(route, seconds) {
2238
2294
  const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
2239
2295
  const entry = this.#durations.get(route) ?? {
@@ -2278,7 +2334,12 @@ var MetricsRegistry = class {
2278
2334
  upstreamErrors += count;
2279
2335
  }
2280
2336
  }
2337
+ const githubRateLimit = {};
2338
+ for (const [resource, rateLimit] of this.#githubRateLimit) {
2339
+ githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
2340
+ }
2281
2341
  return {
2342
+ githubRateLimit,
2282
2343
  inFlight: this.#inFlight,
2283
2344
  requests: { byRoute, byStatus, total: requestsTotal },
2284
2345
  startedAt: new Date(this.#startedAtMs).toISOString(),
@@ -2349,10 +2410,43 @@ var MetricsRegistry = class {
2349
2410
  lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
2350
2411
  lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
2351
2412
  }
2413
+ this.#renderGithubRateLimit(lines);
2352
2414
  this.#renderCopilotQuota(lines);
2353
2415
  return `${lines.join("\n")}
2354
2416
  `;
2355
2417
  }
2418
+ #renderGithubRateLimit(lines) {
2419
+ const entries = [...this.#githubRateLimit.values()];
2420
+ if (entries.length === 0) {
2421
+ return;
2422
+ }
2423
+ const gauge = (suffix, help, pick) => {
2424
+ const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
2425
+ if (present.length === 0) {
2426
+ return;
2427
+ }
2428
+ lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
2429
+ lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
2430
+ for (const rateLimit of present) {
2431
+ lines.push(
2432
+ `hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
2433
+ );
2434
+ }
2435
+ };
2436
+ gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
2437
+ gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
2438
+ gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
2439
+ gauge(
2440
+ "reset_timestamp_seconds",
2441
+ "Unix epoch when the GitHub REST API window resets.",
2442
+ (r) => r.resetEpochSeconds
2443
+ );
2444
+ gauge(
2445
+ "retry_after_seconds",
2446
+ "Seconds to wait after a GitHub secondary-limit response.",
2447
+ (r) => r.retryAfterSeconds
2448
+ );
2449
+ }
2356
2450
  #renderCopilotQuota(lines) {
2357
2451
  const usage = this.#copilotQuota;
2358
2452
  if (!usage) {
@@ -2593,6 +2687,37 @@ function modelText(value) {
2593
2687
  function nonNegative(value) {
2594
2688
  return Number.isFinite(value) && value > 0 ? value : 0;
2595
2689
  }
2690
+ function cleanLabel(value) {
2691
+ let result = "";
2692
+ for (const char of value) {
2693
+ const code = char.charCodeAt(0);
2694
+ if (code > 31 && code !== 127) {
2695
+ result += char;
2696
+ }
2697
+ }
2698
+ return result.trim();
2699
+ }
2700
+ function toRateLimitSnapshot(rateLimit) {
2701
+ const snapshot = {
2702
+ observedAt: new Date(rateLimit.observedAtMs).toISOString()
2703
+ };
2704
+ if (rateLimit.limit !== void 0) {
2705
+ snapshot.limit = rateLimit.limit;
2706
+ }
2707
+ if (rateLimit.remaining !== void 0) {
2708
+ snapshot.remaining = rateLimit.remaining;
2709
+ }
2710
+ if (rateLimit.used !== void 0) {
2711
+ snapshot.used = rateLimit.used;
2712
+ }
2713
+ if (rateLimit.resetEpochSeconds !== void 0) {
2714
+ snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
2715
+ }
2716
+ if (rateLimit.retryAfterSeconds !== void 0) {
2717
+ snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
2718
+ }
2719
+ return snapshot;
2720
+ }
2596
2721
  function labelKey(...parts) {
2597
2722
  return parts.join(LABEL_SEPARATOR);
2598
2723
  }
@@ -2618,7 +2743,8 @@ var IS_STANDALONE_BINARY = BAKED_VERSION !== void 0;
2618
2743
  // src/server.ts
2619
2744
  var DEFAULT_HOST = "127.0.0.1";
2620
2745
  var DEFAULT_PORT = 4141;
2621
- var FORBIDDEN_BROWSER_ORIGIN_MESSAGE = "Browser-origin requests require HOOPILOT_API_KEY unless the Origin is loopback.";
2746
+ var FORBIDDEN_BROWSER_ORIGIN_MESSAGE = "Cross-origin browser requests are blocked unless the Origin is loopback or listed in HOOPILOT_ALLOWED_ORIGINS.";
2747
+ var WELL_KNOWN_DEMO_API_KEYS = /* @__PURE__ */ new Set(["local-key"]);
2622
2748
  var INVALID_JSON_MESSAGE = "Request body must be valid JSON.";
2623
2749
  var JSON_OBJECT_MESSAGE = "Request body must be a JSON object.";
2624
2750
  var MAX_REQUEST_BODY_BYTES = 16 * 1024 * 1024;
@@ -2634,6 +2760,7 @@ var RequestBodyTooLargeError = class extends Error {
2634
2760
  function createHoopilotHandler(options = {}) {
2635
2761
  const client = new CopilotClient(options);
2636
2762
  const apiKey = options.apiKey ?? envValue(options.env?.HOOPILOT_API_KEY);
2763
+ const allowedOrigins = parseAllowedOrigins(options.env);
2637
2764
  const logger = serverLogger(options);
2638
2765
  const metrics = options.metrics ?? new MetricsRegistry();
2639
2766
  const readUsage = createUsageReader(client, metrics);
@@ -2653,7 +2780,10 @@ function createHoopilotHandler(options = {}) {
2653
2780
  route
2654
2781
  });
2655
2782
  metrics.startRequest();
2783
+ const origin = request.headers.get("origin")?.trim() || void 0;
2784
+ const corsOrigin = resolveCorsAllowOrigin(origin, allowedOrigins);
2656
2785
  const finish = (response) => finishResponse(response, {
2786
+ corsOrigin,
2657
2787
  logger: requestLogger,
2658
2788
  method: request.method,
2659
2789
  metrics,
@@ -2663,11 +2793,11 @@ function createHoopilotHandler(options = {}) {
2663
2793
  closeConnection: bufferProxyBodies,
2664
2794
  trackStreamingBody: !bufferProxyBodies
2665
2795
  });
2666
- const browserOrigin = forbiddenBrowserOrigin(request, apiKey);
2796
+ const browserOrigin = forbiddenBrowserOrigin(origin, request, allowedOrigins);
2667
2797
  if (browserOrigin) {
2668
2798
  requestLogger.warn(
2669
2799
  { event: "http.request.forbidden_origin", origin: browserOrigin },
2670
- "blocked unauthenticated browser-origin request"
2800
+ "blocked cross-origin browser request"
2671
2801
  );
2672
2802
  return finish(jsonError(403, "forbidden_origin", FORBIDDEN_BROWSER_ORIGIN_MESSAGE));
2673
2803
  }
@@ -2793,10 +2923,17 @@ function startHoopilotServer(options = {}) {
2793
2923
  const port = normalizeServerPort(options.port ?? envValue(options.env?.PORT) ?? DEFAULT_PORT);
2794
2924
  const apiKey = options.apiKey ?? envValue(options.env?.HOOPILOT_API_KEY);
2795
2925
  const allowUnauthenticated = options.allowUnauthenticated ?? envValue(options.env?.HOOPILOT_ALLOW_UNAUTHENTICATED) === "1";
2796
- if (!isLoopbackHost(host) && !apiKey && !allowUnauthenticated) {
2797
- throw new Error(
2798
- "Refusing to listen on a non-loopback host without HOOPILOT_API_KEY. Set an API key or pass --allow-unauthenticated."
2799
- );
2926
+ if (!isLoopbackHost(host)) {
2927
+ if (!apiKey && !allowUnauthenticated) {
2928
+ throw new Error(
2929
+ "Refusing to listen on a non-loopback host without HOOPILOT_API_KEY. Set an API key or pass --allow-unauthenticated."
2930
+ );
2931
+ }
2932
+ if (apiKey && isWellKnownDemoApiKey(apiKey)) {
2933
+ throw new Error(
2934
+ "Refusing to listen on a non-loopback host with a well-known demo HOOPILOT_API_KEY. Set a strong, unique API key."
2935
+ );
2936
+ }
2800
2937
  }
2801
2938
  const server = Bun.serve({
2802
2939
  fetch: createHoopilotHandler({
@@ -3109,7 +3246,6 @@ function corsHeaders() {
3109
3246
  return {
3110
3247
  "access-control-allow-headers": "anthropic-beta, anthropic-dangerous-direct-browser-access, anthropic-version, authorization, content-type, x-api-key, x-request-id",
3111
3248
  "access-control-allow-methods": "GET, POST, OPTIONS",
3112
- "access-control-allow-origin": "*",
3113
3249
  "access-control-expose-headers": "x-request-id"
3114
3250
  };
3115
3251
  }
@@ -3121,17 +3257,34 @@ function isAuthorized(request, apiKey) {
3121
3257
  const bearer = authorization.match(/^Bearer\s+(.+)$/i)?.[1];
3122
3258
  return bearer === apiKey || request.headers.get("x-api-key") === apiKey;
3123
3259
  }
3124
- function forbiddenBrowserOrigin(request, apiKey) {
3125
- if (apiKey) {
3126
- return void 0;
3127
- }
3128
- const origin = request.headers.get("origin")?.trim();
3260
+ function forbiddenBrowserOrigin(origin, request, allowedOrigins) {
3129
3261
  if (origin) {
3130
- return isLoopbackOrigin(origin) ? void 0 : origin;
3262
+ return isAllowedOrigin(origin, allowedOrigins) ? void 0 : origin;
3131
3263
  }
3132
3264
  const fetchSite = request.headers.get("sec-fetch-site")?.toLowerCase();
3133
3265
  return fetchSite === "cross-site" ? "cross-site" : void 0;
3134
3266
  }
3267
+ function parseAllowedOrigins(env) {
3268
+ const raw = envValue(env?.HOOPILOT_ALLOWED_ORIGINS);
3269
+ if (!raw) {
3270
+ return /* @__PURE__ */ new Set();
3271
+ }
3272
+ return new Set(
3273
+ raw.split(",").map((value) => value.trim().toLowerCase()).filter((value) => value.length > 0)
3274
+ );
3275
+ }
3276
+ function isAllowedOrigin(origin, allowedOrigins) {
3277
+ return isLoopbackOrigin(origin) || allowedOrigins.has(origin.toLowerCase());
3278
+ }
3279
+ function resolveCorsAllowOrigin(origin, allowedOrigins) {
3280
+ if (!origin) {
3281
+ return "*";
3282
+ }
3283
+ return isAllowedOrigin(origin, allowedOrigins) ? origin : void 0;
3284
+ }
3285
+ function isWellKnownDemoApiKey(apiKey) {
3286
+ return WELL_KNOWN_DEMO_API_KEYS.has(apiKey.trim().toLowerCase());
3287
+ }
3135
3288
  function isUpstreamAuthStatus(status) {
3136
3289
  return status === 401 || status === 403;
3137
3290
  }
@@ -3191,7 +3344,12 @@ function shouldBufferProxyBodies(mode) {
3191
3344
  return process.platform === "win32" && IS_STANDALONE_BINARY;
3192
3345
  }
3193
3346
  function finishResponse(response, options) {
3194
- const withRequestId = responseWithRequestId(response, options.requestId, options.closeConnection);
3347
+ const withRequestId = responseWithRequestId(
3348
+ response,
3349
+ options.requestId,
3350
+ options.closeConnection,
3351
+ options.corsOrigin
3352
+ );
3195
3353
  const stream = isStreamingResponse(withRequestId);
3196
3354
  const status = withRequestId.status;
3197
3355
  const complete = () => {
@@ -3209,9 +3367,17 @@ function finishResponse(response, options) {
3209
3367
  complete();
3210
3368
  return withRequestId;
3211
3369
  }
3212
- function responseWithRequestId(response, requestId, closeConnection) {
3370
+ function responseWithRequestId(response, requestId, closeConnection, corsOrigin) {
3213
3371
  const headers = new Headers(response.headers);
3214
3372
  headers.set("x-request-id", requestId);
3373
+ if (corsOrigin) {
3374
+ headers.set("access-control-allow-origin", corsOrigin);
3375
+ if (corsOrigin !== "*") {
3376
+ headers.append("vary", "Origin");
3377
+ }
3378
+ } else {
3379
+ headers.delete("access-control-allow-origin");
3380
+ }
3215
3381
  if (closeConnection) {
3216
3382
  headers.set("connection", "close");
3217
3383
  }
@@ -3375,6 +3541,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
3375
3541
  try {
3376
3542
  const upstream = await client.usage(signal);
3377
3543
  metrics.recordUpstream(usagePath, upstream.ok);
3544
+ metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
3378
3545
  if (!upstream.ok) {
3379
3546
  return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
3380
3547
  }
@@ -3432,6 +3599,7 @@ export {
3432
3599
  observeResponseUsage,
3433
3600
  parseLogFormat,
3434
3601
  parseLogLevel,
3602
+ parseRateLimitHeaders,
3435
3603
  readStoredCopilotAuth,
3436
3604
  responsesCompactionResult,
3437
3605
  responsesRequestToChatCompletion,