@openhoo/hoopilot 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -30,24 +30,33 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ COPILOT_USAGE_API_VERSION: () => COPILOT_USAGE_API_VERSION,
33
34
  CopilotAuth: () => CopilotAuth,
34
35
  CopilotAuthError: () => CopilotAuthError,
35
36
  CopilotClient: () => CopilotClient,
37
+ DEFAULT_GITHUB_API_BASE_URL: () => DEFAULT_GITHUB_API_BASE_URL,
36
38
  DEFAULT_LOG_FORMAT: () => DEFAULT_LOG_FORMAT,
37
39
  DEFAULT_LOG_LEVEL: () => DEFAULT_LOG_LEVEL,
38
40
  DEFAULT_MODEL: () => DEFAULT_MODEL,
41
+ MetricsRegistry: () => MetricsRegistry,
42
+ PROMETHEUS_CONTENT_TYPE: () => PROMETHEUS_CONTENT_TYPE,
43
+ applyCopilotHeaders: () => applyCopilotHeaders,
44
+ applyGithubApiHeaders: () => applyGithubApiHeaders,
39
45
  authStorePath: () => authStorePath,
40
46
  chatCompletionToCompletion: () => chatCompletionToCompletion,
41
47
  chatCompletionToResponse: () => chatCompletionToResponse,
42
48
  completionsRequestToChatCompletion: () => completionsRequestToChatCompletion,
43
49
  createHoopilotHandler: () => createHoopilotHandler,
44
50
  createHoopilotLogger: () => createHoopilotLogger,
51
+ extractTokenUsage: () => extractTokenUsage,
45
52
  fallbackModels: () => fallbackModels,
46
53
  githubCopilotDeviceLogin: () => githubCopilotDeviceLogin,
47
54
  noopLogger: () => noopLogger,
48
55
  normalizeChatCompletionRequest: () => normalizeChatCompletionRequest,
56
+ normalizeCopilotUsage: () => normalizeCopilotUsage,
49
57
  normalizeModelsResponse: () => normalizeModelsResponse,
50
58
  normalizeRequestedModel: () => normalizeRequestedModel,
59
+ observeResponseUsage: () => observeResponseUsage,
51
60
  parseLogFormat: () => parseLogFormat,
52
61
  parseLogLevel: () => parseLogLevel,
53
62
  readStoredCopilotAuth: () => readStoredCopilotAuth,
@@ -91,25 +100,36 @@ function readStoredCopilotAuth(path = authStorePath()) {
91
100
  }
92
101
  function writeStoredCopilotAuth(auth, path = authStorePath()) {
93
102
  (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(path), { recursive: true });
94
- (0, import_node_fs.writeFileSync)(
95
- path,
96
- `${JSON.stringify(
97
- {
98
- ...auth,
99
- createdAt: auth.createdAt ?? (/* @__PURE__ */ new Date()).toISOString()
100
- },
101
- null,
102
- 2
103
- )}
104
- `,
105
- { mode: 384 }
106
- );
103
+ const data = `${JSON.stringify(
104
+ {
105
+ ...auth,
106
+ createdAt: auth.createdAt ?? (/* @__PURE__ */ new Date()).toISOString()
107
+ },
108
+ null,
109
+ 2
110
+ )}
111
+ `;
112
+ const tmpPath = `${path}.${process.pid}.tmp`;
113
+ (0, import_node_fs.writeFileSync)(tmpPath, data, { mode: 384 });
114
+ (0, import_node_fs.renameSync)(tmpPath, path);
107
115
  try {
108
116
  (0, import_node_fs.chmodSync)(path, 384);
109
117
  } catch {
110
118
  }
111
119
  }
112
120
 
121
+ // src/util.ts
122
+ function trimTrailingSlash(value) {
123
+ return value.replace(/\/+$/, "");
124
+ }
125
+ async function truncatedResponseText(response, max = 500) {
126
+ const text = await response.text();
127
+ return text.slice(0, max);
128
+ }
129
+ function asRecord(value) {
130
+ return value && typeof value === "object" && !Array.isArray(value) ? value : {};
131
+ }
132
+
113
133
  // src/auth.ts
114
134
  var DEFAULT_COPILOT_API_BASE_URL = "https://api.githubcopilot.com";
115
135
  var REFRESH_SKEW_MS = 6e4;
@@ -152,17 +172,59 @@ var CopilotAuth = class {
152
172
  return access;
153
173
  }
154
174
  };
155
- function trimTrailingSlash(value) {
156
- return value.replace(/\/+$/, "");
157
- }
158
175
 
159
176
  // src/copilot.ts
177
+ var DEFAULT_GITHUB_API_BASE_URL = "https://api.github.com";
178
+ var COPILOT_USAGE_API_VERSION = "2025-04-01";
179
+ function applyCopilotHeaders(headers, token) {
180
+ headers.set("accept", headers.get("accept") ?? "application/json");
181
+ headers.set("authorization", `Bearer ${token}`);
182
+ headers.set("copilot-integration-id", "vscode-chat");
183
+ headers.set("editor-plugin-version", "hoopilot/0.1.0");
184
+ headers.set("editor-version", "Hoopilot/0.1.0");
185
+ headers.set("openai-intent", "conversation-panel");
186
+ headers.set("user-agent", "hoopilot/0.1.0");
187
+ headers.set("x-github-api-version", "2026-06-01");
188
+ return headers;
189
+ }
190
+ function applyGithubApiHeaders(headers, token) {
191
+ headers.set("accept", headers.get("accept") ?? "application/json");
192
+ headers.set("authorization", `token ${token}`);
193
+ headers.set("editor-plugin-version", "hoopilot/0.1.0");
194
+ headers.set("editor-version", "Hoopilot/0.1.0");
195
+ headers.set("user-agent", "hoopilot/0.1.0");
196
+ headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
197
+ return headers;
198
+ }
160
199
  var CopilotClient = class {
161
200
  #auth;
162
201
  #fetch;
202
+ #githubApiBaseUrl;
163
203
  constructor(options = {}) {
164
204
  this.#auth = new CopilotAuth(options);
165
205
  this.#fetch = options.fetch ?? fetch;
206
+ this.#githubApiBaseUrl = trimTrailingSlash(
207
+ options.githubApiBaseUrl ?? options.env?.HOOPILOT_GITHUB_API_BASE_URL ?? DEFAULT_GITHUB_API_BASE_URL
208
+ );
209
+ }
210
+ /**
211
+ * Fetch the Copilot account's quota / premium-request usage from the GitHub
212
+ * REST `copilot_internal/user` endpoint. The stored device-flow OAuth token is
213
+ * accepted directly here — no Copilot token exchange is required to read quota.
214
+ */
215
+ async usage(signal) {
216
+ if (!isHttpsOrLoopback(this.#githubApiBaseUrl)) {
217
+ throw new Error(
218
+ `Refusing to send the GitHub OAuth token to a non-HTTPS host: ${this.#githubApiBaseUrl}`
219
+ );
220
+ }
221
+ const access = await this.#auth.getAccess();
222
+ const headers = applyGithubApiHeaders(new Headers(), access.token);
223
+ return this.#fetch(`${this.#githubApiBaseUrl}/copilot_internal/user`, {
224
+ headers,
225
+ method: "GET",
226
+ signal
227
+ });
166
228
  }
167
229
  async chatCompletions(body, signal) {
168
230
  return this.fetchCopilot("/chat/completions", {
@@ -195,21 +257,88 @@ var CopilotClient = class {
195
257
  }
196
258
  async fetchCopilot(path, init) {
197
259
  const access = await this.#auth.getAccess();
198
- const headers = new Headers(init.headers);
199
- headers.set("accept", headers.get("accept") ?? "application/json");
200
- headers.set("authorization", `Bearer ${access.token}`);
201
- headers.set("copilot-integration-id", "vscode-chat");
202
- headers.set("editor-plugin-version", "hoopilot/0.1.0");
203
- headers.set("editor-version", "Hoopilot/0.1.0");
204
- headers.set("openai-intent", "conversation-panel");
205
- headers.set("user-agent", "hoopilot/0.1.0");
206
- headers.set("x-github-api-version", "2026-06-01");
260
+ const headers = applyCopilotHeaders(new Headers(init.headers), access.token);
207
261
  return this.#fetch(`${access.apiBaseUrl}${path}`, {
208
262
  ...init,
209
263
  headers
210
264
  });
211
265
  }
212
266
  };
267
+ function normalizeCopilotUsage(body) {
268
+ const record = asRecord(body);
269
+ const quotas = {};
270
+ const snapshots = asRecord(record.quota_snapshots);
271
+ for (const [category, detail] of Object.entries(snapshots)) {
272
+ quotas[category] = normalizeQuotaDetail(asRecord(detail));
273
+ }
274
+ if (Object.keys(quotas).length === 0) {
275
+ const remaining = asRecord(record.limited_user_quotas);
276
+ const monthly = asRecord(record.monthly_quotas);
277
+ for (const category of /* @__PURE__ */ new Set([...Object.keys(remaining), ...Object.keys(monthly)])) {
278
+ const entitlement = numberOrUndefined(monthly[category]);
279
+ const left = numberOrUndefined(remaining[category]);
280
+ quotas[category] = removeUndefinedQuota({
281
+ entitlement,
282
+ percentRemaining: entitlement !== void 0 && entitlement > 0 && left !== void 0 ? left / entitlement * 100 : void 0,
283
+ remaining: left,
284
+ used: usedFrom(entitlement, left)
285
+ });
286
+ }
287
+ }
288
+ return removeUndefinedUsage({
289
+ accessTypeSku: stringOrUndefined(record.access_type_sku),
290
+ chatEnabled: typeof record.chat_enabled === "boolean" ? record.chat_enabled : void 0,
291
+ plan: stringOrUndefined(record.copilot_plan),
292
+ quotaResetDate: stringOrUndefined(record.quota_reset_date) ?? stringOrUndefined(record.quota_reset_date_utc) ?? stringOrUndefined(record.limited_user_reset_date),
293
+ quotas
294
+ });
295
+ }
296
+ function normalizeQuotaDetail(detail) {
297
+ const entitlement = numberOrUndefined(detail.entitlement);
298
+ const remaining = numberOrUndefined(detail.remaining) ?? numberOrUndefined(detail.quota_remaining);
299
+ return removeUndefinedQuota({
300
+ entitlement,
301
+ overageCount: numberOrUndefined(detail.overage_count),
302
+ overagePermitted: typeof detail.overage_permitted === "boolean" ? detail.overage_permitted : void 0,
303
+ percentRemaining: numberOrUndefined(detail.percent_remaining),
304
+ remaining,
305
+ unlimited: typeof detail.unlimited === "boolean" ? detail.unlimited : void 0,
306
+ used: usedFrom(entitlement, remaining)
307
+ });
308
+ }
309
+ function usedFrom(entitlement, remaining) {
310
+ if (entitlement === void 0 || remaining === void 0) {
311
+ return void 0;
312
+ }
313
+ return Math.max(0, entitlement - remaining);
314
+ }
315
+ function isHttpsOrLoopback(rawUrl) {
316
+ let url;
317
+ try {
318
+ url = new URL(rawUrl);
319
+ } catch {
320
+ return false;
321
+ }
322
+ if (url.protocol === "https:") {
323
+ return true;
324
+ }
325
+ return url.protocol === "http:" && (url.hostname === "127.0.0.1" || url.hostname === "localhost" || url.hostname === "::1");
326
+ }
327
+ function numberOrUndefined(value) {
328
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
329
+ }
330
+ function stringOrUndefined(value) {
331
+ return typeof value === "string" && value.length > 0 ? value : void 0;
332
+ }
333
+ function removeUndefinedQuota(quota) {
334
+ return Object.fromEntries(
335
+ Object.entries(quota).filter(([, value]) => value !== void 0)
336
+ );
337
+ }
338
+ function removeUndefinedUsage(usage) {
339
+ const entries = Object.entries(usage).filter(([, value]) => value !== void 0);
340
+ return Object.fromEntries(entries);
341
+ }
213
342
 
214
343
  // src/github-device.ts
215
344
  var import_promises = require("timers/promises");
@@ -217,6 +346,7 @@ var DEFAULT_GITHUB_COPILOT_CLIENT_ID = "Ov23li8tweQw6odWQebz";
217
346
  var DEFAULT_GITHUB_DOMAIN = "github.com";
218
347
  var DEVICE_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:device_code";
219
348
  var POLLING_SAFETY_MARGIN_MS = 3e3;
349
+ var REQUEST_TIMEOUT_MS = 15e3;
220
350
  async function githubCopilotDeviceLogin(options = {}) {
221
351
  const env = options.env ?? process.env;
222
352
  const fetcher = options.fetch ?? fetch;
@@ -251,16 +381,20 @@ async function requestDeviceCode(fetcher, domain, clientId) {
251
381
  scope: "read:user"
252
382
  }),
253
383
  headers: oauthHeaders(),
254
- method: "POST"
384
+ method: "POST",
385
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
255
386
  });
256
387
  if (!response.ok) {
257
388
  throw new Error(
258
- `GitHub device authorization failed with ${response.status}: ${await safeResponseText(
389
+ `GitHub device authorization failed with ${response.status}: ${await truncatedResponseText(
259
390
  response
260
391
  )}`
261
392
  );
262
393
  }
263
- return await response.json();
394
+ return parseJsonResponse(
395
+ response,
396
+ "GitHub device authorization response was not valid JSON"
397
+ );
264
398
  }
265
399
  async function pollForAccessToken(fetcher, sleeper, domain, clientId, device) {
266
400
  let intervalMs = device.interval * 1e3 + POLLING_SAFETY_MARGIN_MS;
@@ -274,16 +408,20 @@ async function pollForAccessToken(fetcher, sleeper, domain, clientId, device) {
274
408
  grant_type: DEVICE_GRANT_TYPE
275
409
  }),
276
410
  headers: oauthHeaders(),
277
- method: "POST"
411
+ method: "POST",
412
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
278
413
  });
279
414
  if (!response.ok) {
280
415
  throw new Error(
281
- `GitHub device token exchange failed with ${response.status}: ${await safeResponseText(
416
+ `GitHub device token exchange failed with ${response.status}: ${await truncatedResponseText(
282
417
  response
283
418
  )}`
284
419
  );
285
420
  }
286
- const data = await response.json();
421
+ const data = await parseJsonResponse(
422
+ response,
423
+ "GitHub device token response was not valid JSON"
424
+ );
287
425
  if (data.access_token) {
288
426
  return data.access_token;
289
427
  }
@@ -319,9 +457,13 @@ function normalizeDomain(value) {
319
457
  function positiveSeconds(value, fallback) {
320
458
  return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
321
459
  }
322
- async function safeResponseText(response) {
460
+ async function parseJsonResponse(response, context) {
323
461
  const text = await response.text();
324
- return text.slice(0, 500);
462
+ try {
463
+ return JSON.parse(text);
464
+ } catch {
465
+ throw new Error(`${context}: ${text.slice(0, 500)}`);
466
+ }
325
467
  }
326
468
 
327
469
  // src/logger.ts
@@ -424,6 +566,16 @@ function shouldCreateLogger(options) {
424
566
  options.logger || options.logFormat || options.logLevel || options.env?.HOOPILOT_LOG_FORMAT || options.env?.HOOPILOT_LOG_LEVEL
425
567
  );
426
568
  }
569
+ function errorDetails(error) {
570
+ if (error instanceof Error) {
571
+ return {
572
+ message: error.message,
573
+ name: error.name,
574
+ stack: error.stack
575
+ };
576
+ }
577
+ return { message: String(error) };
578
+ }
427
579
  function isLogFormat(value) {
428
580
  return LOG_FORMATS.includes(value);
429
581
  }
@@ -601,17 +753,18 @@ function responsesStreamFromChatStream(chatStream, options) {
601
753
  const lines = buffer.split(/\r?\n/);
602
754
  buffer = lines.pop() ?? "";
603
755
  for (const line of lines) {
604
- processChatSseLine(line, enqueue, tools, (delta) => {
756
+ processChatSseLine(messageId, line, enqueue, tools, (delta) => {
605
757
  text += delta;
606
758
  });
607
759
  }
608
760
  }
609
761
  if (buffer) {
610
- processChatSseLine(buffer, enqueue, tools, (delta) => {
762
+ processChatSseLine(messageId, buffer, enqueue, tools, (delta) => {
611
763
  text += delta;
612
764
  });
613
765
  }
614
- const output = streamOutputItems(messageId, text, [...tools.values()]);
766
+ const toolItems = [...tools.values()].map(functionCallItem);
767
+ const output = [messageOutputItem(text, messageId), ...toolItems];
615
768
  enqueue("response.output_text.done", {
616
769
  content_index: 0,
617
770
  item_id: messageId,
@@ -635,8 +788,7 @@ function responsesStreamFromChatStream(chatStream, options) {
635
788
  output_index: 0,
636
789
  type: "response.output_item.done"
637
790
  });
638
- tools.forEach((tool, index) => {
639
- const item = functionCallItem(tool);
791
+ toolItems.forEach((item, index) => {
640
792
  const outputIndex = index + 1;
641
793
  enqueue("response.output_item.added", {
642
794
  item,
@@ -644,7 +796,7 @@ function responsesStreamFromChatStream(chatStream, options) {
644
796
  type: "response.output_item.added"
645
797
  });
646
798
  enqueue("response.function_call_arguments.done", {
647
- arguments: tool.arguments,
799
+ arguments: item.arguments,
648
800
  item_id: item.id,
649
801
  output_index: outputIndex,
650
802
  type: "response.function_call_arguments.done"
@@ -662,6 +814,8 @@ function responsesStreamFromChatStream(chatStream, options) {
662
814
  enqueue("done", "[DONE]");
663
815
  controller.close();
664
816
  } catch (error) {
817
+ await reader.cancel(error).catch(() => {
818
+ });
665
819
  controller.error(error);
666
820
  } finally {
667
821
  reader.releaseLock();
@@ -864,11 +1018,45 @@ function responseUsage(usage) {
864
1018
  total_tokens: record.total_tokens
865
1019
  });
866
1020
  }
1021
+ function extractTokenUsage(usage) {
1022
+ const record = asRecord(usage);
1023
+ const prompt = firstNumber(record.prompt_tokens, record.input_tokens);
1024
+ const completion = firstNumber(record.completion_tokens, record.output_tokens);
1025
+ const total = firstNumber(record.total_tokens);
1026
+ if (prompt === void 0 && completion === void 0 && total === void 0) {
1027
+ return void 0;
1028
+ }
1029
+ const promptTokens = prompt ?? 0;
1030
+ const completionTokens = completion ?? 0;
1031
+ const reasoning = firstNumber(
1032
+ asRecord(record.completion_tokens_details).reasoning_tokens,
1033
+ asRecord(record.output_tokens_details).reasoning_tokens
1034
+ );
1035
+ const cached = firstNumber(
1036
+ asRecord(record.prompt_tokens_details).cached_tokens,
1037
+ asRecord(record.input_tokens_details).cached_tokens
1038
+ );
1039
+ return removeUndefined({
1040
+ cachedTokens: cached,
1041
+ completionTokens,
1042
+ promptTokens,
1043
+ reasoningTokens: reasoning,
1044
+ totalTokens: total ?? promptTokens + completionTokens
1045
+ });
1046
+ }
1047
+ function firstNumber(...values) {
1048
+ for (const value of values) {
1049
+ if (typeof value === "number" && Number.isFinite(value)) {
1050
+ return value;
1051
+ }
1052
+ }
1053
+ return void 0;
1054
+ }
867
1055
  function firstChoice(completion) {
868
1056
  const choices = Array.isArray(completion.choices) ? completion.choices : [];
869
1057
  return asRecord(choices[0]);
870
1058
  }
871
- function processChatSseLine(line, enqueue, tools, appendText) {
1059
+ function processChatSseLine(messageId, line, enqueue, tools, appendText) {
872
1060
  const trimmed = line.trim();
873
1061
  if (!trimmed.startsWith("data:")) {
874
1062
  return;
@@ -889,7 +1077,7 @@ function processChatSseLine(line, enqueue, tools, appendText) {
889
1077
  enqueue("response.output_text.delta", {
890
1078
  content_index: 0,
891
1079
  delta: content,
892
- item_id: "",
1080
+ item_id: messageId,
893
1081
  output_index: 0,
894
1082
  type: "response.output_text.delta"
895
1083
  });
@@ -911,9 +1099,6 @@ function processChatSseLine(line, enqueue, tools, appendText) {
911
1099
  tools.set(index, existing);
912
1100
  }
913
1101
  }
914
- function streamOutputItems(messageId, text, tools) {
915
- return [messageOutputItem(text, messageId), ...tools.map((tool) => functionCallItem(tool))];
916
- }
917
1102
  function baseStreamResponse(id, model, createdAt, status, output) {
918
1103
  return {
919
1104
  created_at: createdAt,
@@ -953,9 +1138,6 @@ function parseJson(data) {
953
1138
  function removeUndefined(record) {
954
1139
  return Object.fromEntries(Object.entries(record).filter(([, value]) => value !== void 0));
955
1140
  }
956
- function asRecord(value) {
957
- return value && typeof value === "object" && !Array.isArray(value) ? value : {};
958
- }
959
1141
  function randomId() {
960
1142
  return crypto.randomUUID().replaceAll("-", "");
961
1143
  }
@@ -963,104 +1145,449 @@ function epochSeconds() {
963
1145
  return Math.floor(Date.now() / 1e3);
964
1146
  }
965
1147
 
1148
+ // src/metrics.ts
1149
+ var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
1150
+ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1151
+ var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1152
+ var MAX_TRACKED_MODELS = 200;
1153
+ var MAX_MODEL_LABEL_LENGTH = 200;
1154
+ var LABEL_SEPARATOR = "";
1155
+ var UNKNOWN_MODEL = "unknown";
1156
+ function emptyModelTotals() {
1157
+ return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
1158
+ }
1159
+ var MetricsRegistry = class {
1160
+ #startedAtMs;
1161
+ #inFlight = 0;
1162
+ #requests = /* @__PURE__ */ new Map();
1163
+ #durations = /* @__PURE__ */ new Map();
1164
+ #tokens = /* @__PURE__ */ new Map();
1165
+ #upstream = /* @__PURE__ */ new Map();
1166
+ #copilotQuota;
1167
+ constructor(options = {}) {
1168
+ this.#startedAtMs = (options.now ?? Date.now)();
1169
+ }
1170
+ /** Mark a request as started; pair with exactly one {@link observe}. */
1171
+ startRequest() {
1172
+ this.#inFlight += 1;
1173
+ }
1174
+ /** Record a completed request and clear its in-flight slot. */
1175
+ observe(observation) {
1176
+ if (this.#inFlight > 0) {
1177
+ this.#inFlight -= 1;
1178
+ }
1179
+ const key = labelKey(observation.route, observation.method, String(observation.status));
1180
+ this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
1181
+ this.#observeDuration(observation.route, observation.durationMs / 1e3);
1182
+ }
1183
+ /** Accumulate token counts for a model from one upstream completion. */
1184
+ recordTokens(model, usage) {
1185
+ const name = this.#modelLabel(model);
1186
+ const totals = this.#tokens.get(name) ?? emptyModelTotals();
1187
+ totals.requests += 1;
1188
+ totals.prompt += nonNegative(usage.promptTokens);
1189
+ totals.completion += nonNegative(usage.completionTokens);
1190
+ totals.total += nonNegative(usage.totalTokens);
1191
+ totals.reasoning += nonNegative(usage.reasoningTokens ?? 0);
1192
+ totals.cached += nonNegative(usage.cachedTokens ?? 0);
1193
+ this.#tokens.set(name, totals);
1194
+ }
1195
+ /** Record one upstream Copilot call and whether it succeeded. */
1196
+ recordUpstream(path, ok) {
1197
+ const key = labelKey(path, ok ? "ok" : "error");
1198
+ this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
1199
+ }
1200
+ /** Store the latest Copilot quota so /metrics can expose it as gauges. */
1201
+ recordCopilotQuota(usage) {
1202
+ this.#copilotQuota = usage;
1203
+ }
1204
+ // Sanitize the model into a bounded, control-char-free label. The model can
1205
+ // originate from a client request, so cap its length, strip characters that
1206
+ // would corrupt the exposition format, and fold overflow past the cardinality
1207
+ // limit into UNKNOWN_MODEL to keep the series count bounded.
1208
+ #modelLabel(model) {
1209
+ const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
1210
+ if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
1211
+ return UNKNOWN_MODEL;
1212
+ }
1213
+ return cleaned;
1214
+ }
1215
+ #observeDuration(route, seconds) {
1216
+ const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
1217
+ const entry = this.#durations.get(route) ?? {
1218
+ buckets: new Array(DURATION_BUCKETS_SECONDS.length).fill(0),
1219
+ count: 0,
1220
+ sum: 0
1221
+ };
1222
+ entry.count += 1;
1223
+ entry.sum += value;
1224
+ const index = DURATION_BUCKETS_SECONDS.findIndex((bound) => value <= bound);
1225
+ if (index !== -1) {
1226
+ entry.buckets[index] = (entry.buckets[index] ?? 0) + 1;
1227
+ }
1228
+ this.#durations.set(route, entry);
1229
+ }
1230
+ /** A JSON-friendly view of the current counters. */
1231
+ snapshot(now = Date.now) {
1232
+ const byRoute = {};
1233
+ const byStatus = {};
1234
+ let requestsTotal = 0;
1235
+ for (const [key, count] of this.#requests) {
1236
+ const [route = "", , status = ""] = key.split(LABEL_SEPARATOR);
1237
+ byRoute[route] = (byRoute[route] ?? 0) + count;
1238
+ byStatus[status] = (byStatus[status] ?? 0) + count;
1239
+ requestsTotal += count;
1240
+ }
1241
+ const byModel = {};
1242
+ const tokenTotals = { cached: 0, completion: 0, prompt: 0, reasoning: 0, total: 0 };
1243
+ for (const [model, totals] of this.#tokens) {
1244
+ byModel[model] = { ...totals };
1245
+ tokenTotals.prompt += totals.prompt;
1246
+ tokenTotals.completion += totals.completion;
1247
+ tokenTotals.total += totals.total;
1248
+ tokenTotals.reasoning += totals.reasoning;
1249
+ tokenTotals.cached += totals.cached;
1250
+ }
1251
+ let upstreamTotal = 0;
1252
+ let upstreamErrors = 0;
1253
+ for (const [key, count] of this.#upstream) {
1254
+ upstreamTotal += count;
1255
+ if (key.endsWith(`${LABEL_SEPARATOR}error`)) {
1256
+ upstreamErrors += count;
1257
+ }
1258
+ }
1259
+ return {
1260
+ inFlight: this.#inFlight,
1261
+ requests: { byRoute, byStatus, total: requestsTotal },
1262
+ startedAt: new Date(this.#startedAtMs).toISOString(),
1263
+ tokens: { byModel, ...tokenTotals },
1264
+ upstream: { errors: upstreamErrors, total: upstreamTotal },
1265
+ uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
1266
+ };
1267
+ }
1268
+ /** Render the Prometheus text exposition format (version 0.0.4). */
1269
+ renderPrometheus(now = Date.now) {
1270
+ const lines = [];
1271
+ lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
1272
+ lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
1273
+ lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
1274
+ lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
1275
+ lines.push("# TYPE hoopilot_uptime_seconds gauge");
1276
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
1277
+ lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
1278
+ lines.push("# TYPE hoopilot_requests_in_flight gauge");
1279
+ lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
1280
+ lines.push("# HELP hoopilot_requests_total Completed requests by route, method, and status.");
1281
+ lines.push("# TYPE hoopilot_requests_total counter");
1282
+ for (const [key, count] of this.#requests) {
1283
+ const [route = "", method = "", status = ""] = key.split(LABEL_SEPARATOR);
1284
+ lines.push(`hoopilot_requests_total${labels({ method, route, status })} ${count}`);
1285
+ }
1286
+ lines.push(
1287
+ "# HELP hoopilot_upstream_requests_total Copilot upstream calls by path and outcome."
1288
+ );
1289
+ lines.push("# TYPE hoopilot_upstream_requests_total counter");
1290
+ for (const [key, count] of this.#upstream) {
1291
+ const [path = "", outcome = ""] = key.split(LABEL_SEPARATOR);
1292
+ lines.push(`hoopilot_upstream_requests_total${labels({ outcome, path })} ${count}`);
1293
+ }
1294
+ lines.push(
1295
+ "# HELP hoopilot_tokens_total Tokens reported by upstream usage, by model and type."
1296
+ );
1297
+ lines.push("# TYPE hoopilot_tokens_total counter");
1298
+ for (const [model, totals] of this.#tokens) {
1299
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "prompt" })} ${totals.prompt}`);
1300
+ lines.push(
1301
+ `hoopilot_tokens_total${labels({ model, type: "completion" })} ${totals.completion}`
1302
+ );
1303
+ lines.push(
1304
+ `hoopilot_tokens_total${labels({ model, type: "reasoning" })} ${totals.reasoning}`
1305
+ );
1306
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "cached" })} ${totals.cached}`);
1307
+ }
1308
+ lines.push("# HELP hoopilot_model_requests_total Completions with usage observed, by model.");
1309
+ lines.push("# TYPE hoopilot_model_requests_total counter");
1310
+ for (const [model, totals] of this.#tokens) {
1311
+ lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
1312
+ }
1313
+ lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
1314
+ lines.push("# TYPE hoopilot_request_duration_seconds histogram");
1315
+ for (const [route, entry] of this.#durations) {
1316
+ let cumulative = 0;
1317
+ for (let i = 0; i < DURATION_BUCKETS_SECONDS.length; i += 1) {
1318
+ cumulative += entry.buckets[i] ?? 0;
1319
+ const le = formatNumber(DURATION_BUCKETS_SECONDS[i] ?? 0);
1320
+ lines.push(
1321
+ `hoopilot_request_duration_seconds_bucket${labels({ le, route })} ${cumulative}`
1322
+ );
1323
+ }
1324
+ lines.push(
1325
+ `hoopilot_request_duration_seconds_bucket${labels({ le: "+Inf", route })} ${entry.count}`
1326
+ );
1327
+ lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
1328
+ lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
1329
+ }
1330
+ this.#renderCopilotQuota(lines);
1331
+ return `${lines.join("\n")}
1332
+ `;
1333
+ }
1334
+ #renderCopilotQuota(lines) {
1335
+ const usage = this.#copilotQuota;
1336
+ if (!usage) {
1337
+ return;
1338
+ }
1339
+ const categories = Object.entries(usage.quotas);
1340
+ const gauge = (suffix, help, pick) => {
1341
+ const present = categories.filter(([, quota]) => pick(quota) !== void 0);
1342
+ if (present.length === 0) {
1343
+ return;
1344
+ }
1345
+ lines.push(`# HELP hoopilot_copilot_quota_${suffix} ${help}`);
1346
+ lines.push(`# TYPE hoopilot_copilot_quota_${suffix} gauge`);
1347
+ for (const [category, quota] of present) {
1348
+ lines.push(`hoopilot_copilot_quota_${suffix}${labels({ category })} ${pick(quota)}`);
1349
+ }
1350
+ };
1351
+ gauge("remaining", "Remaining quota for the Copilot category.", (q) => q.remaining);
1352
+ gauge("entitlement", "Quota entitlement for the Copilot category.", (q) => q.entitlement);
1353
+ gauge("used", "Used quota (entitlement minus remaining) for the category.", (q) => q.used);
1354
+ gauge(
1355
+ "percent_remaining",
1356
+ "Percent of quota remaining for the Copilot category.",
1357
+ (q) => q.percentRemaining
1358
+ );
1359
+ const resetMs = usage.quotaResetDate ? Date.parse(usage.quotaResetDate) : Number.NaN;
1360
+ if (Number.isFinite(resetMs)) {
1361
+ lines.push(
1362
+ "# HELP hoopilot_copilot_quota_reset_timestamp_seconds Unix epoch of the next reset."
1363
+ );
1364
+ lines.push("# TYPE hoopilot_copilot_quota_reset_timestamp_seconds gauge");
1365
+ lines.push(`hoopilot_copilot_quota_reset_timestamp_seconds ${resetMs / 1e3}`);
1366
+ }
1367
+ if (usage.plan || usage.accessTypeSku) {
1368
+ lines.push("# HELP hoopilot_copilot_info Copilot plan metadata as a constant-1 info gauge.");
1369
+ lines.push("# TYPE hoopilot_copilot_info gauge");
1370
+ lines.push(
1371
+ `hoopilot_copilot_info${labels({
1372
+ access_type_sku: usage.accessTypeSku ?? "",
1373
+ plan: usage.plan ?? ""
1374
+ })} 1`
1375
+ );
1376
+ }
1377
+ }
1378
+ };
1379
+ function observeResponseUsage(response, fallbackModel, onUsage, signal) {
1380
+ const body = response.body;
1381
+ if (!body) {
1382
+ return response;
1383
+ }
1384
+ const [clientBranch, observerBranch] = body.tee();
1385
+ const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
1386
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
1387
+ });
1388
+ return new Response(clientBranch, {
1389
+ headers: response.headers,
1390
+ status: response.status,
1391
+ statusText: response.statusText
1392
+ });
1393
+ }
1394
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
1395
+ const reader = stream.getReader();
1396
+ const onAbort = () => {
1397
+ reader.cancel().catch(() => {
1398
+ });
1399
+ };
1400
+ if (signal?.aborted) {
1401
+ reader.cancel().catch(() => {
1402
+ });
1403
+ } else {
1404
+ signal?.addEventListener("abort", onAbort, { once: true });
1405
+ }
1406
+ const decoder = new TextDecoder();
1407
+ let model = fallbackModel;
1408
+ let usage;
1409
+ let buffer = "";
1410
+ let bufferedBytes = 0;
1411
+ let overflowed = false;
1412
+ const consider = (payload) => {
1413
+ const record = asRecord(payload);
1414
+ const found = extractTokenUsage(record.usage) ?? extractTokenUsage(asRecord(record.response).usage);
1415
+ if (found) {
1416
+ usage = found;
1417
+ }
1418
+ const candidateModel = modelText(record.model) || modelText(asRecord(record.response).model);
1419
+ if (candidateModel) {
1420
+ model = candidateModel;
1421
+ }
1422
+ };
1423
+ try {
1424
+ while (true) {
1425
+ const result = await reader.read();
1426
+ if (result.done) {
1427
+ break;
1428
+ }
1429
+ const chunk = decoder.decode(result.value, { stream: true });
1430
+ if (isSse) {
1431
+ buffer += chunk;
1432
+ const lines = buffer.split(/\r?\n/);
1433
+ buffer = lines.pop() ?? "";
1434
+ for (const line of lines) {
1435
+ considerSseLine(line, consider);
1436
+ }
1437
+ if (buffer.length > USAGE_BUFFER_LIMIT_BYTES) {
1438
+ buffer = "";
1439
+ }
1440
+ } else if (!overflowed) {
1441
+ bufferedBytes += result.value.byteLength;
1442
+ if (bufferedBytes > USAGE_BUFFER_LIMIT_BYTES) {
1443
+ overflowed = true;
1444
+ buffer = "";
1445
+ } else {
1446
+ buffer += chunk;
1447
+ }
1448
+ }
1449
+ }
1450
+ const finalBuffer = buffer + decoder.decode();
1451
+ if (isSse) {
1452
+ if (finalBuffer) {
1453
+ considerSseLine(finalBuffer, consider);
1454
+ }
1455
+ } else if (!overflowed && finalBuffer) {
1456
+ const parsed = safeParse(finalBuffer);
1457
+ if (parsed !== void 0) {
1458
+ consider(parsed);
1459
+ }
1460
+ }
1461
+ } finally {
1462
+ signal?.removeEventListener("abort", onAbort);
1463
+ reader.releaseLock();
1464
+ }
1465
+ if (usage) {
1466
+ onUsage(model, usage);
1467
+ }
1468
+ }
1469
+ function considerSseLine(line, consider) {
1470
+ const trimmed = line.trim();
1471
+ if (!trimmed.startsWith("data:")) {
1472
+ return;
1473
+ }
1474
+ const data = trimmed.slice("data:".length).trim();
1475
+ if (!data || data === "[DONE]") {
1476
+ return;
1477
+ }
1478
+ const parsed = safeParse(data);
1479
+ if (parsed !== void 0) {
1480
+ consider(parsed);
1481
+ }
1482
+ }
1483
+ function safeParse(text) {
1484
+ try {
1485
+ return JSON.parse(text);
1486
+ } catch {
1487
+ return void 0;
1488
+ }
1489
+ }
1490
+ function modelText(value) {
1491
+ return typeof value === "string" ? value.trim() : "";
1492
+ }
1493
+ function nonNegative(value) {
1494
+ return Number.isFinite(value) && value > 0 ? value : 0;
1495
+ }
1496
+ function labelKey(...parts) {
1497
+ return parts.join(LABEL_SEPARATOR);
1498
+ }
1499
+ function labels(pairs) {
1500
+ const entries = Object.entries(pairs);
1501
+ if (entries.length === 0) {
1502
+ return "";
1503
+ }
1504
+ const rendered = entries.map(([name, value]) => `${name}="${escapeLabelValue(value)}"`);
1505
+ return `{${rendered.join(",")}}`;
1506
+ }
1507
+ function escapeLabelValue(value) {
1508
+ return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r");
1509
+ }
1510
+ function formatNumber(value) {
1511
+ return Number.isInteger(value) ? value.toString() : String(value);
1512
+ }
1513
+
966
1514
  // src/server.ts
967
1515
  var DEFAULT_HOST = "127.0.0.1";
968
1516
  var DEFAULT_PORT = 4141;
969
1517
  var INVALID_JSON_MESSAGE = "Request body must be valid JSON.";
1518
+ var USAGE_CACHE_TTL_MS = 6e4;
970
1519
  function createHoopilotHandler(options = {}) {
971
1520
  const client = new CopilotClient(options);
972
1521
  const apiKey = options.apiKey ?? options.env?.HOOPILOT_API_KEY;
973
1522
  const logger = serverLogger(options);
1523
+ const metrics = options.metrics ?? new MetricsRegistry();
1524
+ const readUsage = createUsageReader(client, metrics);
1525
+ const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
974
1526
  return async (request) => {
975
1527
  const startedAt = performance.now();
976
1528
  const url = new URL(request.url);
977
1529
  const apiPath = canonicalApiPath(url.pathname);
978
1530
  const requestId = requestIdFor(request);
1531
+ const route = routeFor(request.method, apiPath);
979
1532
  const requestLogger = logger.child({
980
1533
  method: request.method,
981
1534
  path: url.pathname,
982
1535
  requestId,
983
- route: routeFor(request.method, apiPath)
1536
+ route
1537
+ });
1538
+ metrics.startRequest();
1539
+ const finish = (response) => finishResponse(response, {
1540
+ logger: requestLogger,
1541
+ method: request.method,
1542
+ metrics,
1543
+ requestId,
1544
+ route,
1545
+ startedAt
984
1546
  });
985
1547
  if (request.method === "OPTIONS") {
986
- return finishResponse(new Response(null, { headers: corsHeaders() }), {
987
- logger: requestLogger,
988
- requestId,
989
- startedAt
990
- });
1548
+ return finish(new Response(null, { headers: corsHeaders() }));
991
1549
  }
992
1550
  if (!isAuthorized(request, apiKey)) {
993
1551
  requestLogger.warn({ event: "http.request.unauthorized" }, "invalid hoopilot api key");
994
- return finishResponse(
995
- jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."),
996
- {
997
- logger: requestLogger,
998
- requestId,
999
- startedAt
1000
- }
1001
- );
1552
+ return finish(jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."));
1002
1553
  }
1003
1554
  try {
1004
1555
  if (request.method === "GET" && (apiPath === "/" || apiPath === "/healthz")) {
1005
- return finishResponse(
1006
- jsonResponse({
1007
- name: "hoopilot",
1008
- object: "health",
1009
- status: "ok"
1010
- }),
1011
- { logger: requestLogger, requestId, startedAt }
1012
- );
1556
+ return finish(jsonResponse({ name: "hoopilot", object: "health", status: "ok" }));
1557
+ }
1558
+ if (request.method === "GET" && apiPath === "/metrics") {
1559
+ return finish(metricsResponse(metrics));
1560
+ }
1561
+ if (request.method === "GET" && apiPath === "/v1/usage") {
1562
+ return finish(await handleUsage(metrics, readUsage, request.signal));
1013
1563
  }
1014
1564
  if (request.method === "GET" && apiPath === "/v1/responses") {
1015
- return finishResponse(websocketUnsupportedResponse(), {
1016
- logger: requestLogger,
1017
- requestId,
1018
- startedAt
1019
- });
1565
+ return finish(websocketUnsupportedResponse());
1020
1566
  }
1021
1567
  if (request.method === "GET" && apiPath === "/v1/models") {
1022
- return finishResponse(await handleModels(client, request.signal, requestLogger), {
1023
- logger: requestLogger,
1024
- requestId,
1025
- startedAt
1026
- });
1568
+ return finish(await handleModels(client, metrics, request.signal, requestLogger));
1027
1569
  }
1028
1570
  if (request.method === "POST" && apiPath === "/v1/chat/completions") {
1029
- return finishResponse(await handleChatCompletions(client, request, requestLogger), {
1030
- logger: requestLogger,
1031
- requestId,
1032
- startedAt
1033
- });
1571
+ return finish(
1572
+ await handleChatCompletions(client, metrics, recordTokens, request, requestLogger)
1573
+ );
1034
1574
  }
1035
1575
  if (request.method === "POST" && apiPath === "/v1/completions") {
1036
- return finishResponse(await handleCompletions(client, request, requestLogger), {
1037
- logger: requestLogger,
1038
- requestId,
1039
- startedAt
1040
- });
1576
+ return finish(
1577
+ await handleCompletions(client, metrics, recordTokens, request, requestLogger)
1578
+ );
1041
1579
  }
1042
1580
  if (request.method === "POST" && apiPath === "/v1/responses") {
1043
- return finishResponse(await handleResponses(client, request, requestLogger), {
1044
- logger: requestLogger,
1045
- requestId,
1046
- startedAt
1047
- });
1581
+ return finish(await handleResponses(client, metrics, recordTokens, request, requestLogger));
1048
1582
  }
1049
- return finishResponse(
1050
- jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`),
1051
- { logger: requestLogger, requestId, startedAt }
1052
- );
1583
+ return finish(jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`));
1053
1584
  } catch (error) {
1054
1585
  if (error instanceof CopilotAuthError) {
1055
1586
  requestLogger.warn(
1056
1587
  { err: errorDetails(error), event: "copilot.auth.missing" },
1057
1588
  "copilot auth failed"
1058
1589
  );
1059
- return finishResponse(jsonError(401, "copilot_auth_error", error.message), {
1060
- logger: requestLogger,
1061
- requestId,
1062
- startedAt
1063
- });
1590
+ return finish(jsonError(401, "copilot_auth_error", error.message));
1064
1591
  }
1065
1592
  const message = errorMessage(error);
1066
1593
  if (message === INVALID_JSON_MESSAGE) {
@@ -1074,11 +1601,7 @@ function createHoopilotHandler(options = {}) {
1074
1601
  "request failed"
1075
1602
  );
1076
1603
  }
1077
- return finishResponse(jsonError(500, "internal_error", message), {
1078
- logger: requestLogger,
1079
- requestId,
1080
- startedAt
1081
- });
1604
+ return finish(jsonError(500, "internal_error", message));
1082
1605
  }
1083
1606
  };
1084
1607
  }
@@ -1107,8 +1630,9 @@ function startHoopilotServer(options = {}) {
1107
1630
  url: `http://${host}:${server.port}`
1108
1631
  };
1109
1632
  }
1110
- async function handleModels(client, signal, logger) {
1633
+ async function handleModels(client, metrics, signal, logger) {
1111
1634
  const upstream = await client.models(signal);
1635
+ metrics.recordUpstream("/models", upstream.ok);
1112
1636
  if (!upstream.ok) {
1113
1637
  if (isUpstreamAuthStatus(upstream.status)) {
1114
1638
  return proxyError(upstream, logger);
@@ -1126,35 +1650,50 @@ async function handleModels(client, signal, logger) {
1126
1650
  logUpstreamSuccess(logger, "/models", upstream.status);
1127
1651
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
1128
1652
  }
1129
- async function handleChatCompletions(client, request, logger) {
1653
+ async function handleChatCompletions(client, metrics, recordTokens, request, logger) {
1130
1654
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
1131
1655
  const upstream = await client.chatCompletions(chatRequest, request.signal);
1656
+ metrics.recordUpstream("/chat/completions", upstream.ok);
1132
1657
  if (!upstream.ok) {
1133
1658
  return proxyError(upstream, logger);
1134
1659
  }
1135
1660
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1136
- return proxyResponse(upstream);
1661
+ const model = normalizeRequestedModel(chatRequest.model);
1662
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1137
1663
  }
1138
- async function handleCompletions(client, request, logger) {
1664
+ async function handleCompletions(client, metrics, recordTokens, request, logger) {
1139
1665
  const body = await readJson(request);
1140
1666
  const upstream = await client.chatCompletions(
1141
1667
  completionsRequestToChatCompletion(body),
1142
1668
  request.signal
1143
1669
  );
1670
+ metrics.recordUpstream("/chat/completions", upstream.ok);
1144
1671
  if (!upstream.ok) {
1145
1672
  return proxyError(upstream, logger);
1146
1673
  }
1147
1674
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1148
- return jsonResponse(chatCompletionToCompletion(await upstream.json()));
1675
+ const model = normalizeRequestedModel(body.model);
1676
+ if (isStreamingResponse(upstream)) {
1677
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1678
+ }
1679
+ const completion = asRecord(await upstream.json());
1680
+ const usage = extractTokenUsage(completion.usage);
1681
+ if (usage) {
1682
+ const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
1683
+ recordTokens(responseModel || model, usage);
1684
+ }
1685
+ return jsonResponse(chatCompletionToCompletion(completion));
1149
1686
  }
1150
- async function handleResponses(client, request, logger) {
1687
+ async function handleResponses(client, metrics, recordTokens, request, logger) {
1151
1688
  const body = await readJsonText(request);
1152
1689
  const upstream = await client.responses(body, request.signal);
1690
+ metrics.recordUpstream("/responses", upstream.ok);
1153
1691
  if (!upstream.ok) {
1154
1692
  return proxyError(upstream, logger);
1155
1693
  }
1156
1694
  logUpstreamSuccess(logger, "/responses", upstream.status);
1157
- return proxyResponse(upstream);
1695
+ const model = normalizeRequestedModel(asRecord(safeParseJson(body)).model);
1696
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1158
1697
  }
1159
1698
  async function proxyError(upstream, logger) {
1160
1699
  const text = await upstream.text();
@@ -1187,8 +1726,7 @@ function proxyResponse(upstream) {
1187
1726
  }
1188
1727
  async function readJson(request) {
1189
1728
  try {
1190
- const value = await request.json();
1191
- return value && typeof value === "object" && !Array.isArray(value) ? value : {};
1729
+ return asRecord(await request.json());
1192
1730
  } catch {
1193
1731
  throw new Error(INVALID_JSON_MESSAGE);
1194
1732
  }
@@ -1274,7 +1812,21 @@ function serverLogger(options) {
1274
1812
  }
1275
1813
  function finishResponse(response, options) {
1276
1814
  const withRequestId = responseWithRequestId(response, options.requestId);
1277
- logRequestCompleted(options.logger, withRequestId, options.startedAt);
1815
+ const stream = isStreamingResponse(withRequestId);
1816
+ const status = withRequestId.status;
1817
+ const complete = () => {
1818
+ const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
1819
+ options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
1820
+ logRequestCompleted(options.logger, status, stream, durationMs);
1821
+ };
1822
+ if (stream && withRequestId.body) {
1823
+ return new Response(trackStreamCompletion(withRequestId.body, complete), {
1824
+ headers: withRequestId.headers,
1825
+ status,
1826
+ statusText: withRequestId.statusText
1827
+ });
1828
+ }
1829
+ complete();
1278
1830
  return withRequestId;
1279
1831
  }
1280
1832
  function responseWithRequestId(response, requestId) {
@@ -1286,18 +1838,48 @@ function responseWithRequestId(response, requestId) {
1286
1838
  statusText: response.statusText
1287
1839
  });
1288
1840
  }
1289
- function logRequestCompleted(logger, response, startedAt) {
1841
+ function trackStreamCompletion(body, onComplete) {
1842
+ const reader = body.getReader();
1843
+ let fired = false;
1844
+ const fire = () => {
1845
+ if (!fired) {
1846
+ fired = true;
1847
+ onComplete();
1848
+ }
1849
+ };
1850
+ return new ReadableStream({
1851
+ async pull(controller) {
1852
+ try {
1853
+ const { done, value } = await reader.read();
1854
+ if (done) {
1855
+ controller.close();
1856
+ fire();
1857
+ return;
1858
+ }
1859
+ controller.enqueue(value);
1860
+ } catch (error) {
1861
+ fire();
1862
+ controller.error(error);
1863
+ }
1864
+ },
1865
+ cancel(reason) {
1866
+ fire();
1867
+ return reader.cancel(reason);
1868
+ }
1869
+ });
1870
+ }
1871
+ function logRequestCompleted(logger, status, stream, durationMs) {
1290
1872
  const fields = {
1291
- durationMs: Math.round((performance.now() - startedAt) * 100) / 100,
1873
+ durationMs,
1292
1874
  event: "http.request.completed",
1293
- status: response.status,
1294
- stream: isStreamingResponse(response)
1875
+ status,
1876
+ stream
1295
1877
  };
1296
- if (response.status >= 500) {
1878
+ if (status >= 500) {
1297
1879
  logger.error(fields, "request completed with server error");
1298
1880
  return;
1299
1881
  }
1300
- if (response.status >= 400) {
1882
+ if (status >= 400) {
1301
1883
  logger.warn(fields, "request completed with client error");
1302
1884
  return;
1303
1885
  }
@@ -1318,6 +1900,8 @@ function canonicalApiPath(path) {
1318
1900
  return "/v1/completions";
1319
1901
  case "/responses":
1320
1902
  return "/v1/responses";
1903
+ case "/usage":
1904
+ return "/v1/usage";
1321
1905
  default:
1322
1906
  return withoutTrailingSlash;
1323
1907
  }
@@ -1329,6 +1913,12 @@ function routeFor(method, path) {
1329
1913
  if (method === "GET" && (path === "/" || path === "/healthz")) {
1330
1914
  return "health";
1331
1915
  }
1916
+ if (method === "GET" && path === "/metrics") {
1917
+ return "metrics";
1918
+ }
1919
+ if (method === "GET" && path === "/v1/usage") {
1920
+ return "usage";
1921
+ }
1332
1922
  if (method === "GET" && path === "/v1/models") {
1333
1923
  return "models";
1334
1924
  }
@@ -1359,36 +1949,86 @@ function logUpstreamSuccess(logger, upstreamPath, status) {
1359
1949
  "copilot upstream request completed"
1360
1950
  );
1361
1951
  }
1362
- function errorDetails(error) {
1363
- if (error instanceof Error) {
1364
- return {
1365
- message: error.message,
1366
- name: error.name,
1367
- stack: error.stack
1368
- };
1952
+ function metricsResponse(metrics) {
1953
+ return new Response(metrics.renderPrometheus(), {
1954
+ headers: {
1955
+ ...corsHeaders(),
1956
+ "content-type": PROMETHEUS_CONTENT_TYPE
1957
+ },
1958
+ status: 200
1959
+ });
1960
+ }
1961
+ async function handleUsage(metrics, readUsage, signal) {
1962
+ const proxy = metrics.snapshot();
1963
+ const { copilot, error } = await readUsage(signal);
1964
+ const body = { copilot: copilot ?? null, object: "usage", proxy };
1965
+ if (error) {
1966
+ body.copilot_error = error;
1967
+ }
1968
+ return jsonResponse(body);
1969
+ }
1970
+ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
1971
+ const usagePath = "/copilot_internal/user";
1972
+ let cache;
1973
+ return async (signal) => {
1974
+ if (cache && now() - cache.atMs < ttlMs) {
1975
+ return { copilot: cache.value };
1976
+ }
1977
+ try {
1978
+ const upstream = await client.usage(signal);
1979
+ metrics.recordUpstream(usagePath, upstream.ok);
1980
+ if (!upstream.ok) {
1981
+ return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
1982
+ }
1983
+ const value = normalizeCopilotUsage(await upstream.json().catch(() => ({})));
1984
+ cache = { atMs: now(), value };
1985
+ metrics.recordCopilotQuota(value);
1986
+ return { copilot: value };
1987
+ } catch (error) {
1988
+ metrics.recordUpstream(usagePath, false);
1989
+ if (error instanceof CopilotAuthError) {
1990
+ return { error: error.message };
1991
+ }
1992
+ return { error: errorMessage(error) };
1993
+ }
1994
+ };
1995
+ }
1996
+ function safeParseJson(text) {
1997
+ try {
1998
+ return JSON.parse(text);
1999
+ } catch {
2000
+ return void 0;
1369
2001
  }
1370
- return { message: String(error) };
1371
2002
  }
1372
2003
  // Annotate the CommonJS export names for ESM import in node:
1373
2004
  0 && (module.exports = {
2005
+ COPILOT_USAGE_API_VERSION,
1374
2006
  CopilotAuth,
1375
2007
  CopilotAuthError,
1376
2008
  CopilotClient,
2009
+ DEFAULT_GITHUB_API_BASE_URL,
1377
2010
  DEFAULT_LOG_FORMAT,
1378
2011
  DEFAULT_LOG_LEVEL,
1379
2012
  DEFAULT_MODEL,
2013
+ MetricsRegistry,
2014
+ PROMETHEUS_CONTENT_TYPE,
2015
+ applyCopilotHeaders,
2016
+ applyGithubApiHeaders,
1380
2017
  authStorePath,
1381
2018
  chatCompletionToCompletion,
1382
2019
  chatCompletionToResponse,
1383
2020
  completionsRequestToChatCompletion,
1384
2021
  createHoopilotHandler,
1385
2022
  createHoopilotLogger,
2023
+ extractTokenUsage,
1386
2024
  fallbackModels,
1387
2025
  githubCopilotDeviceLogin,
1388
2026
  noopLogger,
1389
2027
  normalizeChatCompletionRequest,
2028
+ normalizeCopilotUsage,
1390
2029
  normalizeModelsResponse,
1391
2030
  normalizeRequestedModel,
2031
+ observeResponseUsage,
1392
2032
  parseLogFormat,
1393
2033
  parseLogLevel,
1394
2034
  readStoredCopilotAuth,