@openhoo/hoopilot 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/cli.js +154 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +133 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +48 -1
- package/dist/index.d.ts +48 -1
- package/dist/index.js +132 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -21,6 +21,12 @@ declare class MetricsRegistry {
|
|
|
21
21
|
recordUpstream(path: string, ok: boolean): void;
|
|
22
22
|
/** Store the latest Copilot quota so /metrics can expose it as gauges. */
|
|
23
23
|
recordCopilotQuota(usage: CopilotUsage): void;
|
|
24
|
+
/**
|
|
25
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
26
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
27
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
28
|
+
*/
|
|
29
|
+
recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
|
|
24
30
|
/** A JSON-friendly view of the current counters. */
|
|
25
31
|
snapshot(now?: () => number): MetricsSnapshot;
|
|
26
32
|
/** Render the Prometheus text exposition format (version 0.0.4). */
|
|
@@ -149,8 +155,40 @@ interface CopilotUsage {
|
|
|
149
155
|
quotaResetDate?: string;
|
|
150
156
|
quotas: Record<string, CopilotQuota>;
|
|
151
157
|
}
|
|
158
|
+
/**
|
|
159
|
+
* GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
|
|
160
|
+
* `api.github.com` returns on every response. Hoopilot reads these off the
|
|
161
|
+
* `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
|
|
162
|
+
* usage is visible without spending an extra request.
|
|
163
|
+
*/
|
|
164
|
+
interface GithubRateLimit {
|
|
165
|
+
/** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
|
|
166
|
+
resource: string;
|
|
167
|
+
/** `x-ratelimit-limit` — maximum requests allowed in the current window. */
|
|
168
|
+
limit?: number;
|
|
169
|
+
/** `x-ratelimit-remaining` — requests left in the current window. */
|
|
170
|
+
remaining?: number;
|
|
171
|
+
/** `x-ratelimit-used` — requests already spent in the current window. */
|
|
172
|
+
used?: number;
|
|
173
|
+
/** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
|
|
174
|
+
resetEpochSeconds?: number;
|
|
175
|
+
/** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
|
|
176
|
+
retryAfterSeconds?: number;
|
|
177
|
+
/** Wall-clock epoch ms when these values were observed. */
|
|
178
|
+
observedAtMs: number;
|
|
179
|
+
}
|
|
180
|
+
/** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
|
|
181
|
+
interface GithubRateLimitSnapshot {
|
|
182
|
+
limit?: number;
|
|
183
|
+
observedAt: string;
|
|
184
|
+
remaining?: number;
|
|
185
|
+
resetAt?: string;
|
|
186
|
+
retryAfterSeconds?: number;
|
|
187
|
+
used?: number;
|
|
188
|
+
}
|
|
152
189
|
/** A point-in-time JSON view of the in-process metrics. */
|
|
153
190
|
interface MetricsSnapshot {
|
|
191
|
+
githubRateLimit: Record<string, GithubRateLimitSnapshot>;
|
|
154
192
|
inFlight: number;
|
|
155
193
|
requests: {
|
|
156
194
|
byRoute: Record<string, number>;
|
|
@@ -227,6 +265,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
|
|
|
227
265
|
* Copilot completion endpoints.
|
|
228
266
|
*/
|
|
229
267
|
declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
|
|
268
|
+
/**
|
|
269
|
+
* Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
|
|
270
|
+
* response into a {@link GithubRateLimit}. `api.github.com` returns these on
|
|
271
|
+
* every reply, so the proxy reads its GitHub API budget from the quota call it
|
|
272
|
+
* already makes — no extra request is spent. Returns undefined when the response
|
|
273
|
+
* carries no rate-limit headers (for example the Copilot completion host, which
|
|
274
|
+
* does not emit them today) so callers record nothing rather than a phantom row.
|
|
275
|
+
*/
|
|
276
|
+
declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
|
|
230
277
|
declare class CopilotClient {
|
|
231
278
|
#private;
|
|
232
279
|
constructor(options?: CopilotAuthOptions);
|
|
@@ -309,4 +356,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
|
|
|
309
356
|
declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
|
|
310
357
|
declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
|
|
311
358
|
|
|
312
|
-
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
|
359
|
+
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
package/dist/index.d.ts
CHANGED
|
@@ -21,6 +21,12 @@ declare class MetricsRegistry {
|
|
|
21
21
|
recordUpstream(path: string, ok: boolean): void;
|
|
22
22
|
/** Store the latest Copilot quota so /metrics can expose it as gauges. */
|
|
23
23
|
recordCopilotQuota(usage: CopilotUsage): void;
|
|
24
|
+
/**
|
|
25
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
26
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
27
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
28
|
+
*/
|
|
29
|
+
recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
|
|
24
30
|
/** A JSON-friendly view of the current counters. */
|
|
25
31
|
snapshot(now?: () => number): MetricsSnapshot;
|
|
26
32
|
/** Render the Prometheus text exposition format (version 0.0.4). */
|
|
@@ -149,8 +155,40 @@ interface CopilotUsage {
|
|
|
149
155
|
quotaResetDate?: string;
|
|
150
156
|
quotas: Record<string, CopilotQuota>;
|
|
151
157
|
}
|
|
158
|
+
/**
|
|
159
|
+
* GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
|
|
160
|
+
* `api.github.com` returns on every response. Hoopilot reads these off the
|
|
161
|
+
* `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
|
|
162
|
+
* usage is visible without spending an extra request.
|
|
163
|
+
*/
|
|
164
|
+
interface GithubRateLimit {
|
|
165
|
+
/** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
|
|
166
|
+
resource: string;
|
|
167
|
+
/** `x-ratelimit-limit` — maximum requests allowed in the current window. */
|
|
168
|
+
limit?: number;
|
|
169
|
+
/** `x-ratelimit-remaining` — requests left in the current window. */
|
|
170
|
+
remaining?: number;
|
|
171
|
+
/** `x-ratelimit-used` — requests already spent in the current window. */
|
|
172
|
+
used?: number;
|
|
173
|
+
/** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
|
|
174
|
+
resetEpochSeconds?: number;
|
|
175
|
+
/** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
|
|
176
|
+
retryAfterSeconds?: number;
|
|
177
|
+
/** Wall-clock epoch ms when these values were observed. */
|
|
178
|
+
observedAtMs: number;
|
|
179
|
+
}
|
|
180
|
+
/** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
|
|
181
|
+
interface GithubRateLimitSnapshot {
|
|
182
|
+
limit?: number;
|
|
183
|
+
observedAt: string;
|
|
184
|
+
remaining?: number;
|
|
185
|
+
resetAt?: string;
|
|
186
|
+
retryAfterSeconds?: number;
|
|
187
|
+
used?: number;
|
|
188
|
+
}
|
|
152
189
|
/** A point-in-time JSON view of the in-process metrics. */
|
|
153
190
|
interface MetricsSnapshot {
|
|
191
|
+
githubRateLimit: Record<string, GithubRateLimitSnapshot>;
|
|
154
192
|
inFlight: number;
|
|
155
193
|
requests: {
|
|
156
194
|
byRoute: Record<string, number>;
|
|
@@ -227,6 +265,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
|
|
|
227
265
|
* Copilot completion endpoints.
|
|
228
266
|
*/
|
|
229
267
|
declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
|
|
268
|
+
/**
|
|
269
|
+
* Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
|
|
270
|
+
* response into a {@link GithubRateLimit}. `api.github.com` returns these on
|
|
271
|
+
* every reply, so the proxy reads its GitHub API budget from the quota call it
|
|
272
|
+
* already makes — no extra request is spent. Returns undefined when the response
|
|
273
|
+
* carries no rate-limit headers (for example the Copilot completion host, which
|
|
274
|
+
* does not emit them today) so callers record nothing rather than a phantom row.
|
|
275
|
+
*/
|
|
276
|
+
declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
|
|
230
277
|
declare class CopilotClient {
|
|
231
278
|
#private;
|
|
232
279
|
constructor(options?: CopilotAuthOptions);
|
|
@@ -309,4 +356,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
|
|
|
309
356
|
declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
|
|
310
357
|
declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
|
|
311
358
|
|
|
312
|
-
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
|
359
|
+
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
package/dist/index.js
CHANGED
|
@@ -1757,6 +1757,38 @@ function applyGithubApiHeaders(headers, token) {
|
|
|
1757
1757
|
headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
|
|
1758
1758
|
return headers;
|
|
1759
1759
|
}
|
|
1760
|
+
function parseRateLimitHeaders(headers, nowMs = Date.now()) {
|
|
1761
|
+
const limit = headerInt(headers, "x-ratelimit-limit");
|
|
1762
|
+
const remaining = headerInt(headers, "x-ratelimit-remaining");
|
|
1763
|
+
const used = headerInt(headers, "x-ratelimit-used");
|
|
1764
|
+
const resetEpochSeconds = headerInt(headers, "x-ratelimit-reset");
|
|
1765
|
+
const retryAfterSeconds = headerInt(headers, "retry-after");
|
|
1766
|
+
if (limit === void 0 && remaining === void 0 && used === void 0 && resetEpochSeconds === void 0 && retryAfterSeconds === void 0) {
|
|
1767
|
+
return void 0;
|
|
1768
|
+
}
|
|
1769
|
+
return removeUndefinedRateLimit({
|
|
1770
|
+
limit,
|
|
1771
|
+
observedAtMs: nowMs,
|
|
1772
|
+
remaining,
|
|
1773
|
+
resetEpochSeconds,
|
|
1774
|
+
resource: headers.get("x-ratelimit-resource")?.trim() || "unknown",
|
|
1775
|
+
retryAfterSeconds,
|
|
1776
|
+
used
|
|
1777
|
+
});
|
|
1778
|
+
}
|
|
1779
|
+
function headerInt(headers, name) {
|
|
1780
|
+
const raw = headers.get(name);
|
|
1781
|
+
if (raw === null) {
|
|
1782
|
+
return void 0;
|
|
1783
|
+
}
|
|
1784
|
+
const value = Number.parseInt(raw.trim(), 10);
|
|
1785
|
+
return Number.isFinite(value) && value >= 0 ? value : void 0;
|
|
1786
|
+
}
|
|
1787
|
+
function removeUndefinedRateLimit(rateLimit) {
|
|
1788
|
+
return Object.fromEntries(
|
|
1789
|
+
Object.entries(rateLimit).filter(([, value]) => value !== void 0)
|
|
1790
|
+
);
|
|
1791
|
+
}
|
|
1760
1792
|
var CopilotClient = class {
|
|
1761
1793
|
#auth;
|
|
1762
1794
|
#allowUnsafeUpstream;
|
|
@@ -2173,6 +2205,7 @@ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
|
2173
2205
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
2174
2206
|
var MAX_TRACKED_MODELS = 200;
|
|
2175
2207
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
2208
|
+
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
2176
2209
|
var LABEL_SEPARATOR = "";
|
|
2177
2210
|
var UNKNOWN_MODEL = "unknown";
|
|
2178
2211
|
function emptyModelTotals() {
|
|
@@ -2186,6 +2219,7 @@ var MetricsRegistry = class {
|
|
|
2186
2219
|
#tokens = /* @__PURE__ */ new Map();
|
|
2187
2220
|
#upstream = /* @__PURE__ */ new Map();
|
|
2188
2221
|
#copilotQuota;
|
|
2222
|
+
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
2189
2223
|
constructor(options = {}) {
|
|
2190
2224
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
2191
2225
|
}
|
|
@@ -2223,17 +2257,39 @@ var MetricsRegistry = class {
|
|
|
2223
2257
|
recordCopilotQuota(usage) {
|
|
2224
2258
|
this.#copilotQuota = usage;
|
|
2225
2259
|
}
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2260
|
+
/**
|
|
2261
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
2262
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
2263
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
2264
|
+
*/
|
|
2265
|
+
recordGithubRateLimit(rateLimit) {
|
|
2266
|
+
if (!rateLimit) {
|
|
2267
|
+
return;
|
|
2268
|
+
}
|
|
2269
|
+
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
2270
|
+
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
2271
|
+
}
|
|
2272
|
+
// Sanitize the model into a bounded label. The model can originate from a
|
|
2273
|
+
// client request, so cap its length, strip characters that would corrupt the
|
|
2274
|
+
// exposition format, and fold overflow past the cardinality limit into
|
|
2275
|
+
// UNKNOWN_MODEL to keep the series count bounded.
|
|
2230
2276
|
#modelLabel(model) {
|
|
2231
|
-
const cleaned = model
|
|
2277
|
+
const cleaned = cleanLabel(model).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
2232
2278
|
if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
|
|
2233
2279
|
return UNKNOWN_MODEL;
|
|
2234
2280
|
}
|
|
2235
2281
|
return cleaned;
|
|
2236
2282
|
}
|
|
2283
|
+
// The resource comes from a trusted upstream header, but clean and bound it
|
|
2284
|
+
// with the same discipline as model labels: strip control characters that
|
|
2285
|
+
// would corrupt the exposition format and fold overflow into "unknown".
|
|
2286
|
+
#rateLimitResource(resource) {
|
|
2287
|
+
const cleaned = cleanLabel(resource).slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
|
|
2288
|
+
if (!this.#githubRateLimit.has(cleaned) && this.#githubRateLimit.size >= MAX_TRACKED_RATELIMIT_RESOURCES) {
|
|
2289
|
+
return UNKNOWN_MODEL;
|
|
2290
|
+
}
|
|
2291
|
+
return cleaned;
|
|
2292
|
+
}
|
|
2237
2293
|
#observeDuration(route, seconds) {
|
|
2238
2294
|
const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
|
|
2239
2295
|
const entry = this.#durations.get(route) ?? {
|
|
@@ -2278,7 +2334,12 @@ var MetricsRegistry = class {
|
|
|
2278
2334
|
upstreamErrors += count;
|
|
2279
2335
|
}
|
|
2280
2336
|
}
|
|
2337
|
+
const githubRateLimit = {};
|
|
2338
|
+
for (const [resource, rateLimit] of this.#githubRateLimit) {
|
|
2339
|
+
githubRateLimit[resource] = toRateLimitSnapshot(rateLimit);
|
|
2340
|
+
}
|
|
2281
2341
|
return {
|
|
2342
|
+
githubRateLimit,
|
|
2282
2343
|
inFlight: this.#inFlight,
|
|
2283
2344
|
requests: { byRoute, byStatus, total: requestsTotal },
|
|
2284
2345
|
startedAt: new Date(this.#startedAtMs).toISOString(),
|
|
@@ -2349,10 +2410,43 @@ var MetricsRegistry = class {
|
|
|
2349
2410
|
lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
|
|
2350
2411
|
lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
|
|
2351
2412
|
}
|
|
2413
|
+
this.#renderGithubRateLimit(lines);
|
|
2352
2414
|
this.#renderCopilotQuota(lines);
|
|
2353
2415
|
return `${lines.join("\n")}
|
|
2354
2416
|
`;
|
|
2355
2417
|
}
|
|
2418
|
+
#renderGithubRateLimit(lines) {
|
|
2419
|
+
const entries = [...this.#githubRateLimit.values()];
|
|
2420
|
+
if (entries.length === 0) {
|
|
2421
|
+
return;
|
|
2422
|
+
}
|
|
2423
|
+
const gauge = (suffix, help, pick) => {
|
|
2424
|
+
const present = entries.filter((rateLimit) => pick(rateLimit) !== void 0);
|
|
2425
|
+
if (present.length === 0) {
|
|
2426
|
+
return;
|
|
2427
|
+
}
|
|
2428
|
+
lines.push(`# HELP hoopilot_github_ratelimit_${suffix} ${help}`);
|
|
2429
|
+
lines.push(`# TYPE hoopilot_github_ratelimit_${suffix} gauge`);
|
|
2430
|
+
for (const rateLimit of present) {
|
|
2431
|
+
lines.push(
|
|
2432
|
+
`hoopilot_github_ratelimit_${suffix}${labels({ resource: rateLimit.resource })} ${pick(rateLimit)}`
|
|
2433
|
+
);
|
|
2434
|
+
}
|
|
2435
|
+
};
|
|
2436
|
+
gauge("limit", "GitHub REST API request ceiling for the resource window.", (r) => r.limit);
|
|
2437
|
+
gauge("remaining", "Requests remaining in the GitHub REST API window.", (r) => r.remaining);
|
|
2438
|
+
gauge("used", "Requests used in the GitHub REST API window.", (r) => r.used);
|
|
2439
|
+
gauge(
|
|
2440
|
+
"reset_timestamp_seconds",
|
|
2441
|
+
"Unix epoch when the GitHub REST API window resets.",
|
|
2442
|
+
(r) => r.resetEpochSeconds
|
|
2443
|
+
);
|
|
2444
|
+
gauge(
|
|
2445
|
+
"retry_after_seconds",
|
|
2446
|
+
"Seconds to wait after a GitHub secondary-limit response.",
|
|
2447
|
+
(r) => r.retryAfterSeconds
|
|
2448
|
+
);
|
|
2449
|
+
}
|
|
2356
2450
|
#renderCopilotQuota(lines) {
|
|
2357
2451
|
const usage = this.#copilotQuota;
|
|
2358
2452
|
if (!usage) {
|
|
@@ -2593,6 +2687,37 @@ function modelText(value) {
|
|
|
2593
2687
|
function nonNegative(value) {
|
|
2594
2688
|
return Number.isFinite(value) && value > 0 ? value : 0;
|
|
2595
2689
|
}
|
|
2690
|
+
function cleanLabel(value) {
|
|
2691
|
+
let result = "";
|
|
2692
|
+
for (const char of value) {
|
|
2693
|
+
const code = char.charCodeAt(0);
|
|
2694
|
+
if (code > 31 && code !== 127) {
|
|
2695
|
+
result += char;
|
|
2696
|
+
}
|
|
2697
|
+
}
|
|
2698
|
+
return result.trim();
|
|
2699
|
+
}
|
|
2700
|
+
function toRateLimitSnapshot(rateLimit) {
|
|
2701
|
+
const snapshot = {
|
|
2702
|
+
observedAt: new Date(rateLimit.observedAtMs).toISOString()
|
|
2703
|
+
};
|
|
2704
|
+
if (rateLimit.limit !== void 0) {
|
|
2705
|
+
snapshot.limit = rateLimit.limit;
|
|
2706
|
+
}
|
|
2707
|
+
if (rateLimit.remaining !== void 0) {
|
|
2708
|
+
snapshot.remaining = rateLimit.remaining;
|
|
2709
|
+
}
|
|
2710
|
+
if (rateLimit.used !== void 0) {
|
|
2711
|
+
snapshot.used = rateLimit.used;
|
|
2712
|
+
}
|
|
2713
|
+
if (rateLimit.resetEpochSeconds !== void 0) {
|
|
2714
|
+
snapshot.resetAt = new Date(rateLimit.resetEpochSeconds * 1e3).toISOString();
|
|
2715
|
+
}
|
|
2716
|
+
if (rateLimit.retryAfterSeconds !== void 0) {
|
|
2717
|
+
snapshot.retryAfterSeconds = rateLimit.retryAfterSeconds;
|
|
2718
|
+
}
|
|
2719
|
+
return snapshot;
|
|
2720
|
+
}
|
|
2596
2721
|
function labelKey(...parts) {
|
|
2597
2722
|
return parts.join(LABEL_SEPARATOR);
|
|
2598
2723
|
}
|
|
@@ -3416,6 +3541,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
3416
3541
|
try {
|
|
3417
3542
|
const upstream = await client.usage(signal);
|
|
3418
3543
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
3544
|
+
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
3419
3545
|
if (!upstream.ok) {
|
|
3420
3546
|
return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
|
|
3421
3547
|
}
|
|
@@ -3473,6 +3599,7 @@ export {
|
|
|
3473
3599
|
observeResponseUsage,
|
|
3474
3600
|
parseLogFormat,
|
|
3475
3601
|
parseLogLevel,
|
|
3602
|
+
parseRateLimitHeaders,
|
|
3476
3603
|
readStoredCopilotAuth,
|
|
3477
3604
|
responsesCompactionResult,
|
|
3478
3605
|
responsesRequestToChatCompletion,
|