@openhoo/hoopilot 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/dist/cli.js +240 -30
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +219 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +60 -2
- package/dist/index.d.ts +60 -2
- package/dist/index.js +218 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -15,12 +15,25 @@ declare class MetricsRegistry {
|
|
|
15
15
|
startRequest(): void;
|
|
16
16
|
/** Record a completed request and clear its in-flight slot. */
|
|
17
17
|
observe(observation: RequestObservation): void;
|
|
18
|
+
/**
|
|
19
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
20
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
21
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
22
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
23
|
+
*/
|
|
24
|
+
recordTokenExtraction(extracted: boolean): void;
|
|
18
25
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
19
26
|
recordTokens(model: string, usage: TokenUsage): void;
|
|
20
27
|
/** Record one upstream Copilot call and whether it succeeded. */
|
|
21
28
|
recordUpstream(path: string, ok: boolean): void;
|
|
22
29
|
/** Store the latest Copilot quota so /metrics can expose it as gauges. */
|
|
23
30
|
recordCopilotQuota(usage: CopilotUsage): void;
|
|
31
|
+
/**
|
|
32
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
33
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
34
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
35
|
+
*/
|
|
36
|
+
recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
|
|
24
37
|
/** A JSON-friendly view of the current counters. */
|
|
25
38
|
snapshot(now?: () => number): MetricsSnapshot;
|
|
26
39
|
/** Render the Prometheus text exposition format (version 0.0.4). */
|
|
@@ -37,7 +50,7 @@ declare class MetricsRegistry {
|
|
|
37
50
|
* branch; combined with the runtime cancelling the client branch, that releases
|
|
38
51
|
* the shared upstream connection instead of draining it in the background.
|
|
39
52
|
*/
|
|
40
|
-
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal): Response;
|
|
53
|
+
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal, onOutcome?: (extracted: boolean) => void): Response;
|
|
41
54
|
|
|
42
55
|
type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
43
56
|
interface Logger {
|
|
@@ -149,8 +162,40 @@ interface CopilotUsage {
|
|
|
149
162
|
quotaResetDate?: string;
|
|
150
163
|
quotas: Record<string, CopilotQuota>;
|
|
151
164
|
}
|
|
165
|
+
/**
|
|
166
|
+
* GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
|
|
167
|
+
* `api.github.com` returns on every response. Hoopilot reads these off the
|
|
168
|
+
* `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
|
|
169
|
+
* usage is visible without spending an extra request.
|
|
170
|
+
*/
|
|
171
|
+
interface GithubRateLimit {
|
|
172
|
+
/** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
|
|
173
|
+
resource: string;
|
|
174
|
+
/** `x-ratelimit-limit` — maximum requests allowed in the current window. */
|
|
175
|
+
limit?: number;
|
|
176
|
+
/** `x-ratelimit-remaining` — requests left in the current window. */
|
|
177
|
+
remaining?: number;
|
|
178
|
+
/** `x-ratelimit-used` — requests already spent in the current window. */
|
|
179
|
+
used?: number;
|
|
180
|
+
/** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
|
|
181
|
+
resetEpochSeconds?: number;
|
|
182
|
+
/** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
|
|
183
|
+
retryAfterSeconds?: number;
|
|
184
|
+
/** Wall-clock epoch ms when these values were observed. */
|
|
185
|
+
observedAtMs: number;
|
|
186
|
+
}
|
|
187
|
+
/** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
|
|
188
|
+
interface GithubRateLimitSnapshot {
|
|
189
|
+
limit?: number;
|
|
190
|
+
observedAt: string;
|
|
191
|
+
remaining?: number;
|
|
192
|
+
resetAt?: string;
|
|
193
|
+
retryAfterSeconds?: number;
|
|
194
|
+
used?: number;
|
|
195
|
+
}
|
|
152
196
|
/** A point-in-time JSON view of the in-process metrics. */
|
|
153
197
|
interface MetricsSnapshot {
|
|
198
|
+
githubRateLimit: Record<string, GithubRateLimitSnapshot>;
|
|
154
199
|
inFlight: number;
|
|
155
200
|
requests: {
|
|
156
201
|
byRoute: Record<string, number>;
|
|
@@ -162,6 +207,10 @@ interface MetricsSnapshot {
|
|
|
162
207
|
byModel: Record<string, ModelTokenTotals>;
|
|
163
208
|
cached: number;
|
|
164
209
|
completion: number;
|
|
210
|
+
extraction: {
|
|
211
|
+
extracted: number;
|
|
212
|
+
missing: number;
|
|
213
|
+
};
|
|
165
214
|
prompt: number;
|
|
166
215
|
reasoning: number;
|
|
167
216
|
total: number;
|
|
@@ -227,6 +276,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
|
|
|
227
276
|
* Copilot completion endpoints.
|
|
228
277
|
*/
|
|
229
278
|
declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
|
|
279
|
+
/**
|
|
280
|
+
* Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
|
|
281
|
+
* response into a {@link GithubRateLimit}. `api.github.com` returns these on
|
|
282
|
+
* every reply, so the proxy reads its GitHub API budget from the quota call it
|
|
283
|
+
* already makes — no extra request is spent. Returns undefined when the response
|
|
284
|
+
* carries no rate-limit headers (for example the Copilot completion host, which
|
|
285
|
+
* does not emit them today) so callers record nothing rather than a phantom row.
|
|
286
|
+
*/
|
|
287
|
+
declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
|
|
230
288
|
declare class CopilotClient {
|
|
231
289
|
#private;
|
|
232
290
|
constructor(options?: CopilotAuthOptions);
|
|
@@ -309,4 +367,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
|
|
|
309
367
|
declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
|
|
310
368
|
declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
|
|
311
369
|
|
|
312
|
-
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
|
370
|
+
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
package/dist/index.d.ts
CHANGED
|
@@ -15,12 +15,25 @@ declare class MetricsRegistry {
|
|
|
15
15
|
startRequest(): void;
|
|
16
16
|
/** Record a completed request and clear its in-flight slot. */
|
|
17
17
|
observe(observation: RequestObservation): void;
|
|
18
|
+
/**
|
|
19
|
+
* Record whether one upstream completion reported token usage. `missing`
|
|
20
|
+
* counts responses that carried no usage object — most often streamed Chat
|
|
21
|
+
* Completions sent without `stream_options: {"include_usage": true}` — so a
|
|
22
|
+
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
23
|
+
*/
|
|
24
|
+
recordTokenExtraction(extracted: boolean): void;
|
|
18
25
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
19
26
|
recordTokens(model: string, usage: TokenUsage): void;
|
|
20
27
|
/** Record one upstream Copilot call and whether it succeeded. */
|
|
21
28
|
recordUpstream(path: string, ok: boolean): void;
|
|
22
29
|
/** Store the latest Copilot quota so /metrics can expose it as gauges. */
|
|
23
30
|
recordCopilotQuota(usage: CopilotUsage): void;
|
|
31
|
+
/**
|
|
32
|
+
* Store the latest GitHub REST rate-limit budget, keyed by its resource bucket.
|
|
33
|
+
* A no-op when `rateLimit` is undefined (the response carried no rate-limit
|
|
34
|
+
* headers) so callers can pass {@link parseRateLimitHeaders} output directly.
|
|
35
|
+
*/
|
|
36
|
+
recordGithubRateLimit(rateLimit: GithubRateLimit | undefined): void;
|
|
24
37
|
/** A JSON-friendly view of the current counters. */
|
|
25
38
|
snapshot(now?: () => number): MetricsSnapshot;
|
|
26
39
|
/** Render the Prometheus text exposition format (version 0.0.4). */
|
|
@@ -37,7 +50,7 @@ declare class MetricsRegistry {
|
|
|
37
50
|
* branch; combined with the runtime cancelling the client branch, that releases
|
|
38
51
|
* the shared upstream connection instead of draining it in the background.
|
|
39
52
|
*/
|
|
40
|
-
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal): Response;
|
|
53
|
+
declare function observeResponseUsage(response: Response, fallbackModel: string, onUsage: (model: string, usage: TokenUsage) => void, signal?: AbortSignal, onOutcome?: (extracted: boolean) => void): Response;
|
|
41
54
|
|
|
42
55
|
type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
43
56
|
interface Logger {
|
|
@@ -149,8 +162,40 @@ interface CopilotUsage {
|
|
|
149
162
|
quotaResetDate?: string;
|
|
150
163
|
quotas: Record<string, CopilotQuota>;
|
|
151
164
|
}
|
|
165
|
+
/**
|
|
166
|
+
* GitHub REST API rate-limit budget parsed from the `x-ratelimit-*` headers that
|
|
167
|
+
* `api.github.com` returns on every response. Hoopilot reads these off the
|
|
168
|
+
* `copilot_internal/user` quota call it already makes, so the proxy's GitHub API
|
|
169
|
+
* usage is visible without spending an extra request.
|
|
170
|
+
*/
|
|
171
|
+
interface GithubRateLimit {
|
|
172
|
+
/** `x-ratelimit-resource` — the bucket the request counted against (e.g. `core`). */
|
|
173
|
+
resource: string;
|
|
174
|
+
/** `x-ratelimit-limit` — maximum requests allowed in the current window. */
|
|
175
|
+
limit?: number;
|
|
176
|
+
/** `x-ratelimit-remaining` — requests left in the current window. */
|
|
177
|
+
remaining?: number;
|
|
178
|
+
/** `x-ratelimit-used` — requests already spent in the current window. */
|
|
179
|
+
used?: number;
|
|
180
|
+
/** `x-ratelimit-reset` — Unix epoch seconds when the window resets. */
|
|
181
|
+
resetEpochSeconds?: number;
|
|
182
|
+
/** `retry-after` — seconds to wait, present on 429 / secondary-limit responses. */
|
|
183
|
+
retryAfterSeconds?: number;
|
|
184
|
+
/** Wall-clock epoch ms when these values were observed. */
|
|
185
|
+
observedAtMs: number;
|
|
186
|
+
}
|
|
187
|
+
/** JSON view of one GitHub rate-limit resource, as rendered into a snapshot. */
|
|
188
|
+
interface GithubRateLimitSnapshot {
|
|
189
|
+
limit?: number;
|
|
190
|
+
observedAt: string;
|
|
191
|
+
remaining?: number;
|
|
192
|
+
resetAt?: string;
|
|
193
|
+
retryAfterSeconds?: number;
|
|
194
|
+
used?: number;
|
|
195
|
+
}
|
|
152
196
|
/** A point-in-time JSON view of the in-process metrics. */
|
|
153
197
|
interface MetricsSnapshot {
|
|
198
|
+
githubRateLimit: Record<string, GithubRateLimitSnapshot>;
|
|
154
199
|
inFlight: number;
|
|
155
200
|
requests: {
|
|
156
201
|
byRoute: Record<string, number>;
|
|
@@ -162,6 +207,10 @@ interface MetricsSnapshot {
|
|
|
162
207
|
byModel: Record<string, ModelTokenTotals>;
|
|
163
208
|
cached: number;
|
|
164
209
|
completion: number;
|
|
210
|
+
extraction: {
|
|
211
|
+
extracted: number;
|
|
212
|
+
missing: number;
|
|
213
|
+
};
|
|
165
214
|
prompt: number;
|
|
166
215
|
reasoning: number;
|
|
167
216
|
total: number;
|
|
@@ -227,6 +276,15 @@ declare function applyCopilotHeaders(headers: Headers, token: string): Headers;
|
|
|
227
276
|
* Copilot completion endpoints.
|
|
228
277
|
*/
|
|
229
278
|
declare function applyGithubApiHeaders(headers: Headers, token: string): Headers;
|
|
279
|
+
/**
|
|
280
|
+
* Parse the GitHub REST `x-ratelimit-*` headers (plus `retry-after`) off a
|
|
281
|
+
* response into a {@link GithubRateLimit}. `api.github.com` returns these on
|
|
282
|
+
* every reply, so the proxy reads its GitHub API budget from the quota call it
|
|
283
|
+
* already makes — no extra request is spent. Returns undefined when the response
|
|
284
|
+
* carries no rate-limit headers (for example the Copilot completion host, which
|
|
285
|
+
* does not emit them today) so callers record nothing rather than a phantom row.
|
|
286
|
+
*/
|
|
287
|
+
declare function parseRateLimitHeaders(headers: Headers, nowMs?: number): GithubRateLimit | undefined;
|
|
230
288
|
declare class CopilotClient {
|
|
231
289
|
#private;
|
|
232
290
|
constructor(options?: CopilotAuthOptions);
|
|
@@ -309,4 +367,4 @@ declare function extractTokenUsage(usage: unknown): TokenUsage | undefined;
|
|
|
309
367
|
declare function createHoopilotHandler(options?: HoopilotServerOptions): (request: Request) => Promise<Response>;
|
|
310
368
|
declare function startHoopilotServer(options?: HoopilotServerOptions): StartedHoopilotServer;
|
|
311
369
|
|
|
312
|
-
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|
|
370
|
+
export { AnthropicCompatibilityError, COPILOT_USAGE_API_VERSION, type CopilotAccess, CopilotAuth, CopilotAuthError, type CopilotAuthOptions, CopilotClient, type CopilotQuota, type CopilotUsage, DEFAULT_GITHUB_API_BASE_URL, DEFAULT_LOG_FORMAT, DEFAULT_LOG_LEVEL, DEFAULT_MODEL, type FetchLike, type GithubRateLimit, type GithubRateLimitSnapshot, type HoopilotLogger, type HoopilotLoggerOptions, type HoopilotServerOptions, type JsonObject, type LogFields, type LogFormat, type LogLevel, type LogMethod, type Logger, MetricsRegistry, type MetricsSnapshot, type ModelTokenTotals, PROMETHEUS_CONTENT_TYPE, type RequestObservation, type StartedHoopilotServer, type TokenUsage, anthropicMessagesToResponsesRequest, applyCopilotHeaders, applyGithubApiHeaders, authStorePath, chatCompletionToCompletion, chatCompletionToResponse, completionStreamFromChatStream, completionsRequestToChatCompletion, createHoopilotHandler, createHoopilotLogger, estimateAnthropicMessageTokens, extractTokenUsage, fallbackModels, githubCopilotDeviceLogin, noopLogger, normalizeChatCompletionRequest, normalizeCopilotUsage, normalizeModelsResponse, normalizeRequestedModel, observeResponseUsage, parseLogFormat, parseLogLevel, parseRateLimitHeaders, readStoredCopilotAuth, responsesCompactionResult, responsesRequestToChatCompletion, responsesResponseToAnthropicMessage, responsesStreamFromChatStream, responsesStreamToAnthropicStream, startHoopilotServer, writeStoredCopilotAuth };
|