@openhoo/hoopilot 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -30,24 +30,33 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ COPILOT_USAGE_API_VERSION: () => COPILOT_USAGE_API_VERSION,
33
34
  CopilotAuth: () => CopilotAuth,
34
35
  CopilotAuthError: () => CopilotAuthError,
35
36
  CopilotClient: () => CopilotClient,
37
+ DEFAULT_GITHUB_API_BASE_URL: () => DEFAULT_GITHUB_API_BASE_URL,
36
38
  DEFAULT_LOG_FORMAT: () => DEFAULT_LOG_FORMAT,
37
39
  DEFAULT_LOG_LEVEL: () => DEFAULT_LOG_LEVEL,
38
40
  DEFAULT_MODEL: () => DEFAULT_MODEL,
41
+ MetricsRegistry: () => MetricsRegistry,
42
+ PROMETHEUS_CONTENT_TYPE: () => PROMETHEUS_CONTENT_TYPE,
43
+ applyCopilotHeaders: () => applyCopilotHeaders,
44
+ applyGithubApiHeaders: () => applyGithubApiHeaders,
39
45
  authStorePath: () => authStorePath,
40
46
  chatCompletionToCompletion: () => chatCompletionToCompletion,
41
47
  chatCompletionToResponse: () => chatCompletionToResponse,
42
48
  completionsRequestToChatCompletion: () => completionsRequestToChatCompletion,
43
49
  createHoopilotHandler: () => createHoopilotHandler,
44
50
  createHoopilotLogger: () => createHoopilotLogger,
51
+ extractTokenUsage: () => extractTokenUsage,
45
52
  fallbackModels: () => fallbackModels,
46
53
  githubCopilotDeviceLogin: () => githubCopilotDeviceLogin,
47
54
  noopLogger: () => noopLogger,
48
55
  normalizeChatCompletionRequest: () => normalizeChatCompletionRequest,
56
+ normalizeCopilotUsage: () => normalizeCopilotUsage,
49
57
  normalizeModelsResponse: () => normalizeModelsResponse,
50
58
  normalizeRequestedModel: () => normalizeRequestedModel,
59
+ observeResponseUsage: () => observeResponseUsage,
51
60
  parseLogFormat: () => parseLogFormat,
52
61
  parseLogLevel: () => parseLogLevel,
53
62
  readStoredCopilotAuth: () => readStoredCopilotAuth,
@@ -165,6 +174,8 @@ var CopilotAuth = class {
165
174
  };
166
175
 
167
176
  // src/copilot.ts
177
+ var DEFAULT_GITHUB_API_BASE_URL = "https://api.github.com";
178
+ var COPILOT_USAGE_API_VERSION = "2025-04-01";
168
179
  function applyCopilotHeaders(headers, token) {
169
180
  headers.set("accept", headers.get("accept") ?? "application/json");
170
181
  headers.set("authorization", `Bearer ${token}`);
@@ -176,12 +187,44 @@ function applyCopilotHeaders(headers, token) {
176
187
  headers.set("x-github-api-version", "2026-06-01");
177
188
  return headers;
178
189
  }
190
+ function applyGithubApiHeaders(headers, token) {
191
+ headers.set("accept", headers.get("accept") ?? "application/json");
192
+ headers.set("authorization", `token ${token}`);
193
+ headers.set("editor-plugin-version", "hoopilot/0.1.0");
194
+ headers.set("editor-version", "Hoopilot/0.1.0");
195
+ headers.set("user-agent", "hoopilot/0.1.0");
196
+ headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
197
+ return headers;
198
+ }
179
199
  var CopilotClient = class {
180
200
  #auth;
181
201
  #fetch;
202
+ #githubApiBaseUrl;
182
203
  constructor(options = {}) {
183
204
  this.#auth = new CopilotAuth(options);
184
205
  this.#fetch = options.fetch ?? fetch;
206
+ this.#githubApiBaseUrl = trimTrailingSlash(
207
+ options.githubApiBaseUrl ?? options.env?.HOOPILOT_GITHUB_API_BASE_URL ?? DEFAULT_GITHUB_API_BASE_URL
208
+ );
209
+ }
210
+ /**
211
+ * Fetch the Copilot account's quota / premium-request usage from the GitHub
212
+ * REST `copilot_internal/user` endpoint. The stored device-flow OAuth token is
213
+ * accepted directly here — no Copilot token exchange is required to read quota.
214
+ */
215
+ async usage(signal) {
216
+ if (!isHttpsOrLoopback(this.#githubApiBaseUrl)) {
217
+ throw new Error(
218
+ `Refusing to send the GitHub OAuth token to a non-HTTPS host: ${this.#githubApiBaseUrl}`
219
+ );
220
+ }
221
+ const access = await this.#auth.getAccess();
222
+ const headers = applyGithubApiHeaders(new Headers(), access.token);
223
+ return this.#fetch(`${this.#githubApiBaseUrl}/copilot_internal/user`, {
224
+ headers,
225
+ method: "GET",
226
+ signal
227
+ });
185
228
  }
186
229
  async chatCompletions(body, signal) {
187
230
  return this.fetchCopilot("/chat/completions", {
@@ -221,6 +264,81 @@ var CopilotClient = class {
221
264
  });
222
265
  }
223
266
  };
267
+ function normalizeCopilotUsage(body) {
268
+ const record = asRecord(body);
269
+ const quotas = {};
270
+ const snapshots = asRecord(record.quota_snapshots);
271
+ for (const [category, detail] of Object.entries(snapshots)) {
272
+ quotas[category] = normalizeQuotaDetail(asRecord(detail));
273
+ }
274
+ if (Object.keys(quotas).length === 0) {
275
+ const remaining = asRecord(record.limited_user_quotas);
276
+ const monthly = asRecord(record.monthly_quotas);
277
+ for (const category of /* @__PURE__ */ new Set([...Object.keys(remaining), ...Object.keys(monthly)])) {
278
+ const entitlement = numberOrUndefined(monthly[category]);
279
+ const left = numberOrUndefined(remaining[category]);
280
+ quotas[category] = removeUndefinedQuota({
281
+ entitlement,
282
+ percentRemaining: entitlement !== void 0 && entitlement > 0 && left !== void 0 ? left / entitlement * 100 : void 0,
283
+ remaining: left,
284
+ used: usedFrom(entitlement, left)
285
+ });
286
+ }
287
+ }
288
+ return removeUndefinedUsage({
289
+ accessTypeSku: stringOrUndefined(record.access_type_sku),
290
+ chatEnabled: typeof record.chat_enabled === "boolean" ? record.chat_enabled : void 0,
291
+ plan: stringOrUndefined(record.copilot_plan),
292
+ quotaResetDate: stringOrUndefined(record.quota_reset_date) ?? stringOrUndefined(record.quota_reset_date_utc) ?? stringOrUndefined(record.limited_user_reset_date),
293
+ quotas
294
+ });
295
+ }
296
+ function normalizeQuotaDetail(detail) {
297
+ const entitlement = numberOrUndefined(detail.entitlement);
298
+ const remaining = numberOrUndefined(detail.remaining) ?? numberOrUndefined(detail.quota_remaining);
299
+ return removeUndefinedQuota({
300
+ entitlement,
301
+ overageCount: numberOrUndefined(detail.overage_count),
302
+ overagePermitted: typeof detail.overage_permitted === "boolean" ? detail.overage_permitted : void 0,
303
+ percentRemaining: numberOrUndefined(detail.percent_remaining),
304
+ remaining,
305
+ unlimited: typeof detail.unlimited === "boolean" ? detail.unlimited : void 0,
306
+ used: usedFrom(entitlement, remaining)
307
+ });
308
+ }
309
+ function usedFrom(entitlement, remaining) {
310
+ if (entitlement === void 0 || remaining === void 0) {
311
+ return void 0;
312
+ }
313
+ return Math.max(0, entitlement - remaining);
314
+ }
315
+ function isHttpsOrLoopback(rawUrl) {
316
+ let url;
317
+ try {
318
+ url = new URL(rawUrl);
319
+ } catch {
320
+ return false;
321
+ }
322
+ if (url.protocol === "https:") {
323
+ return true;
324
+ }
325
+ return url.protocol === "http:" && (url.hostname === "127.0.0.1" || url.hostname === "localhost" || url.hostname === "::1");
326
+ }
327
+ function numberOrUndefined(value) {
328
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
329
+ }
330
+ function stringOrUndefined(value) {
331
+ return typeof value === "string" && value.length > 0 ? value : void 0;
332
+ }
333
+ function removeUndefinedQuota(quota) {
334
+ return Object.fromEntries(
335
+ Object.entries(quota).filter(([, value]) => value !== void 0)
336
+ );
337
+ }
338
+ function removeUndefinedUsage(usage) {
339
+ const entries = Object.entries(usage).filter(([, value]) => value !== void 0);
340
+ return Object.fromEntries(entries);
341
+ }
224
342
 
225
343
  // src/github-device.ts
226
344
  var import_promises = require("timers/promises");
@@ -900,6 +1018,40 @@ function responseUsage(usage) {
900
1018
  total_tokens: record.total_tokens
901
1019
  });
902
1020
  }
1021
+ function extractTokenUsage(usage) {
1022
+ const record = asRecord(usage);
1023
+ const prompt = firstNumber(record.prompt_tokens, record.input_tokens);
1024
+ const completion = firstNumber(record.completion_tokens, record.output_tokens);
1025
+ const total = firstNumber(record.total_tokens);
1026
+ if (prompt === void 0 && completion === void 0 && total === void 0) {
1027
+ return void 0;
1028
+ }
1029
+ const promptTokens = prompt ?? 0;
1030
+ const completionTokens = completion ?? 0;
1031
+ const reasoning = firstNumber(
1032
+ asRecord(record.completion_tokens_details).reasoning_tokens,
1033
+ asRecord(record.output_tokens_details).reasoning_tokens
1034
+ );
1035
+ const cached = firstNumber(
1036
+ asRecord(record.prompt_tokens_details).cached_tokens,
1037
+ asRecord(record.input_tokens_details).cached_tokens
1038
+ );
1039
+ return removeUndefined({
1040
+ cachedTokens: cached,
1041
+ completionTokens,
1042
+ promptTokens,
1043
+ reasoningTokens: reasoning,
1044
+ totalTokens: total ?? promptTokens + completionTokens
1045
+ });
1046
+ }
1047
+ function firstNumber(...values) {
1048
+ for (const value of values) {
1049
+ if (typeof value === "number" && Number.isFinite(value)) {
1050
+ return value;
1051
+ }
1052
+ }
1053
+ return void 0;
1054
+ }
903
1055
  function firstChoice(completion) {
904
1056
  const choices = Array.isArray(completion.choices) ? completion.choices : [];
905
1057
  return asRecord(choices[0]);
@@ -993,104 +1145,449 @@ function epochSeconds() {
993
1145
  return Math.floor(Date.now() / 1e3);
994
1146
  }
995
1147
 
1148
+ // src/metrics.ts
1149
+ var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
1150
+ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1151
+ var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1152
+ var MAX_TRACKED_MODELS = 200;
1153
+ var MAX_MODEL_LABEL_LENGTH = 200;
1154
+ var LABEL_SEPARATOR = "";
1155
+ var UNKNOWN_MODEL = "unknown";
1156
+ function emptyModelTotals() {
1157
+ return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
1158
+ }
1159
+ var MetricsRegistry = class {
1160
+ #startedAtMs;
1161
+ #inFlight = 0;
1162
+ #requests = /* @__PURE__ */ new Map();
1163
+ #durations = /* @__PURE__ */ new Map();
1164
+ #tokens = /* @__PURE__ */ new Map();
1165
+ #upstream = /* @__PURE__ */ new Map();
1166
+ #copilotQuota;
1167
+ constructor(options = {}) {
1168
+ this.#startedAtMs = (options.now ?? Date.now)();
1169
+ }
1170
+ /** Mark a request as started; pair with exactly one {@link observe}. */
1171
+ startRequest() {
1172
+ this.#inFlight += 1;
1173
+ }
1174
+ /** Record a completed request and clear its in-flight slot. */
1175
+ observe(observation) {
1176
+ if (this.#inFlight > 0) {
1177
+ this.#inFlight -= 1;
1178
+ }
1179
+ const key = labelKey(observation.route, observation.method, String(observation.status));
1180
+ this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
1181
+ this.#observeDuration(observation.route, observation.durationMs / 1e3);
1182
+ }
1183
+ /** Accumulate token counts for a model from one upstream completion. */
1184
+ recordTokens(model, usage) {
1185
+ const name = this.#modelLabel(model);
1186
+ const totals = this.#tokens.get(name) ?? emptyModelTotals();
1187
+ totals.requests += 1;
1188
+ totals.prompt += nonNegative(usage.promptTokens);
1189
+ totals.completion += nonNegative(usage.completionTokens);
1190
+ totals.total += nonNegative(usage.totalTokens);
1191
+ totals.reasoning += nonNegative(usage.reasoningTokens ?? 0);
1192
+ totals.cached += nonNegative(usage.cachedTokens ?? 0);
1193
+ this.#tokens.set(name, totals);
1194
+ }
1195
+ /** Record one upstream Copilot call and whether it succeeded. */
1196
+ recordUpstream(path, ok) {
1197
+ const key = labelKey(path, ok ? "ok" : "error");
1198
+ this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
1199
+ }
1200
+ /** Store the latest Copilot quota so /metrics can expose it as gauges. */
1201
+ recordCopilotQuota(usage) {
1202
+ this.#copilotQuota = usage;
1203
+ }
1204
+ // Sanitize the model into a bounded, control-char-free label. The model can
1205
+ // originate from a client request, so cap its length, strip characters that
1206
+ // would corrupt the exposition format, and fold overflow past the cardinality
1207
+ // limit into UNKNOWN_MODEL to keep the series count bounded.
1208
+ #modelLabel(model) {
1209
+ const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
1210
+ if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
1211
+ return UNKNOWN_MODEL;
1212
+ }
1213
+ return cleaned;
1214
+ }
1215
+ #observeDuration(route, seconds) {
1216
+ const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
1217
+ const entry = this.#durations.get(route) ?? {
1218
+ buckets: new Array(DURATION_BUCKETS_SECONDS.length).fill(0),
1219
+ count: 0,
1220
+ sum: 0
1221
+ };
1222
+ entry.count += 1;
1223
+ entry.sum += value;
1224
+ const index = DURATION_BUCKETS_SECONDS.findIndex((bound) => value <= bound);
1225
+ if (index !== -1) {
1226
+ entry.buckets[index] = (entry.buckets[index] ?? 0) + 1;
1227
+ }
1228
+ this.#durations.set(route, entry);
1229
+ }
1230
+ /** A JSON-friendly view of the current counters. */
1231
+ snapshot(now = Date.now) {
1232
+ const byRoute = {};
1233
+ const byStatus = {};
1234
+ let requestsTotal = 0;
1235
+ for (const [key, count] of this.#requests) {
1236
+ const [route = "", , status = ""] = key.split(LABEL_SEPARATOR);
1237
+ byRoute[route] = (byRoute[route] ?? 0) + count;
1238
+ byStatus[status] = (byStatus[status] ?? 0) + count;
1239
+ requestsTotal += count;
1240
+ }
1241
+ const byModel = {};
1242
+ const tokenTotals = { cached: 0, completion: 0, prompt: 0, reasoning: 0, total: 0 };
1243
+ for (const [model, totals] of this.#tokens) {
1244
+ byModel[model] = { ...totals };
1245
+ tokenTotals.prompt += totals.prompt;
1246
+ tokenTotals.completion += totals.completion;
1247
+ tokenTotals.total += totals.total;
1248
+ tokenTotals.reasoning += totals.reasoning;
1249
+ tokenTotals.cached += totals.cached;
1250
+ }
1251
+ let upstreamTotal = 0;
1252
+ let upstreamErrors = 0;
1253
+ for (const [key, count] of this.#upstream) {
1254
+ upstreamTotal += count;
1255
+ if (key.endsWith(`${LABEL_SEPARATOR}error`)) {
1256
+ upstreamErrors += count;
1257
+ }
1258
+ }
1259
+ return {
1260
+ inFlight: this.#inFlight,
1261
+ requests: { byRoute, byStatus, total: requestsTotal },
1262
+ startedAt: new Date(this.#startedAtMs).toISOString(),
1263
+ tokens: { byModel, ...tokenTotals },
1264
+ upstream: { errors: upstreamErrors, total: upstreamTotal },
1265
+ uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
1266
+ };
1267
+ }
1268
+ /** Render the Prometheus text exposition format (version 0.0.4). */
1269
+ renderPrometheus(now = Date.now) {
1270
+ const lines = [];
1271
+ lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
1272
+ lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
1273
+ lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
1274
+ lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
1275
+ lines.push("# TYPE hoopilot_uptime_seconds gauge");
1276
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
1277
+ lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
1278
+ lines.push("# TYPE hoopilot_requests_in_flight gauge");
1279
+ lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
1280
+ lines.push("# HELP hoopilot_requests_total Completed requests by route, method, and status.");
1281
+ lines.push("# TYPE hoopilot_requests_total counter");
1282
+ for (const [key, count] of this.#requests) {
1283
+ const [route = "", method = "", status = ""] = key.split(LABEL_SEPARATOR);
1284
+ lines.push(`hoopilot_requests_total${labels({ method, route, status })} ${count}`);
1285
+ }
1286
+ lines.push(
1287
+ "# HELP hoopilot_upstream_requests_total Copilot upstream calls by path and outcome."
1288
+ );
1289
+ lines.push("# TYPE hoopilot_upstream_requests_total counter");
1290
+ for (const [key, count] of this.#upstream) {
1291
+ const [path = "", outcome = ""] = key.split(LABEL_SEPARATOR);
1292
+ lines.push(`hoopilot_upstream_requests_total${labels({ outcome, path })} ${count}`);
1293
+ }
1294
+ lines.push(
1295
+ "# HELP hoopilot_tokens_total Tokens reported by upstream usage, by model and type."
1296
+ );
1297
+ lines.push("# TYPE hoopilot_tokens_total counter");
1298
+ for (const [model, totals] of this.#tokens) {
1299
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "prompt" })} ${totals.prompt}`);
1300
+ lines.push(
1301
+ `hoopilot_tokens_total${labels({ model, type: "completion" })} ${totals.completion}`
1302
+ );
1303
+ lines.push(
1304
+ `hoopilot_tokens_total${labels({ model, type: "reasoning" })} ${totals.reasoning}`
1305
+ );
1306
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "cached" })} ${totals.cached}`);
1307
+ }
1308
+ lines.push("# HELP hoopilot_model_requests_total Completions with usage observed, by model.");
1309
+ lines.push("# TYPE hoopilot_model_requests_total counter");
1310
+ for (const [model, totals] of this.#tokens) {
1311
+ lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
1312
+ }
1313
+ lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
1314
+ lines.push("# TYPE hoopilot_request_duration_seconds histogram");
1315
+ for (const [route, entry] of this.#durations) {
1316
+ let cumulative = 0;
1317
+ for (let i = 0; i < DURATION_BUCKETS_SECONDS.length; i += 1) {
1318
+ cumulative += entry.buckets[i] ?? 0;
1319
+ const le = formatNumber(DURATION_BUCKETS_SECONDS[i] ?? 0);
1320
+ lines.push(
1321
+ `hoopilot_request_duration_seconds_bucket${labels({ le, route })} ${cumulative}`
1322
+ );
1323
+ }
1324
+ lines.push(
1325
+ `hoopilot_request_duration_seconds_bucket${labels({ le: "+Inf", route })} ${entry.count}`
1326
+ );
1327
+ lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
1328
+ lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
1329
+ }
1330
+ this.#renderCopilotQuota(lines);
1331
+ return `${lines.join("\n")}
1332
+ `;
1333
+ }
1334
+ #renderCopilotQuota(lines) {
1335
+ const usage = this.#copilotQuota;
1336
+ if (!usage) {
1337
+ return;
1338
+ }
1339
+ const categories = Object.entries(usage.quotas);
1340
+ const gauge = (suffix, help, pick) => {
1341
+ const present = categories.filter(([, quota]) => pick(quota) !== void 0);
1342
+ if (present.length === 0) {
1343
+ return;
1344
+ }
1345
+ lines.push(`# HELP hoopilot_copilot_quota_${suffix} ${help}`);
1346
+ lines.push(`# TYPE hoopilot_copilot_quota_${suffix} gauge`);
1347
+ for (const [category, quota] of present) {
1348
+ lines.push(`hoopilot_copilot_quota_${suffix}${labels({ category })} ${pick(quota)}`);
1349
+ }
1350
+ };
1351
+ gauge("remaining", "Remaining quota for the Copilot category.", (q) => q.remaining);
1352
+ gauge("entitlement", "Quota entitlement for the Copilot category.", (q) => q.entitlement);
1353
+ gauge("used", "Used quota (entitlement minus remaining) for the category.", (q) => q.used);
1354
+ gauge(
1355
+ "percent_remaining",
1356
+ "Percent of quota remaining for the Copilot category.",
1357
+ (q) => q.percentRemaining
1358
+ );
1359
+ const resetMs = usage.quotaResetDate ? Date.parse(usage.quotaResetDate) : Number.NaN;
1360
+ if (Number.isFinite(resetMs)) {
1361
+ lines.push(
1362
+ "# HELP hoopilot_copilot_quota_reset_timestamp_seconds Unix epoch of the next reset."
1363
+ );
1364
+ lines.push("# TYPE hoopilot_copilot_quota_reset_timestamp_seconds gauge");
1365
+ lines.push(`hoopilot_copilot_quota_reset_timestamp_seconds ${resetMs / 1e3}`);
1366
+ }
1367
+ if (usage.plan || usage.accessTypeSku) {
1368
+ lines.push("# HELP hoopilot_copilot_info Copilot plan metadata as a constant-1 info gauge.");
1369
+ lines.push("# TYPE hoopilot_copilot_info gauge");
1370
+ lines.push(
1371
+ `hoopilot_copilot_info${labels({
1372
+ access_type_sku: usage.accessTypeSku ?? "",
1373
+ plan: usage.plan ?? ""
1374
+ })} 1`
1375
+ );
1376
+ }
1377
+ }
1378
+ };
1379
+ function observeResponseUsage(response, fallbackModel, onUsage, signal) {
1380
+ const body = response.body;
1381
+ if (!body) {
1382
+ return response;
1383
+ }
1384
+ const [clientBranch, observerBranch] = body.tee();
1385
+ const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
1386
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
1387
+ });
1388
+ return new Response(clientBranch, {
1389
+ headers: response.headers,
1390
+ status: response.status,
1391
+ statusText: response.statusText
1392
+ });
1393
+ }
1394
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
1395
+ const reader = stream.getReader();
1396
+ const onAbort = () => {
1397
+ reader.cancel().catch(() => {
1398
+ });
1399
+ };
1400
+ if (signal?.aborted) {
1401
+ reader.cancel().catch(() => {
1402
+ });
1403
+ } else {
1404
+ signal?.addEventListener("abort", onAbort, { once: true });
1405
+ }
1406
+ const decoder = new TextDecoder();
1407
+ let model = fallbackModel;
1408
+ let usage;
1409
+ let buffer = "";
1410
+ let bufferedBytes = 0;
1411
+ let overflowed = false;
1412
+ const consider = (payload) => {
1413
+ const record = asRecord(payload);
1414
+ const found = extractTokenUsage(record.usage) ?? extractTokenUsage(asRecord(record.response).usage);
1415
+ if (found) {
1416
+ usage = found;
1417
+ }
1418
+ const candidateModel = modelText(record.model) || modelText(asRecord(record.response).model);
1419
+ if (candidateModel) {
1420
+ model = candidateModel;
1421
+ }
1422
+ };
1423
+ try {
1424
+ while (true) {
1425
+ const result = await reader.read();
1426
+ if (result.done) {
1427
+ break;
1428
+ }
1429
+ const chunk = decoder.decode(result.value, { stream: true });
1430
+ if (isSse) {
1431
+ buffer += chunk;
1432
+ const lines = buffer.split(/\r?\n/);
1433
+ buffer = lines.pop() ?? "";
1434
+ for (const line of lines) {
1435
+ considerSseLine(line, consider);
1436
+ }
1437
+ if (buffer.length > USAGE_BUFFER_LIMIT_BYTES) {
1438
+ buffer = "";
1439
+ }
1440
+ } else if (!overflowed) {
1441
+ bufferedBytes += result.value.byteLength;
1442
+ if (bufferedBytes > USAGE_BUFFER_LIMIT_BYTES) {
1443
+ overflowed = true;
1444
+ buffer = "";
1445
+ } else {
1446
+ buffer += chunk;
1447
+ }
1448
+ }
1449
+ }
1450
+ const finalBuffer = buffer + decoder.decode();
1451
+ if (isSse) {
1452
+ if (finalBuffer) {
1453
+ considerSseLine(finalBuffer, consider);
1454
+ }
1455
+ } else if (!overflowed && finalBuffer) {
1456
+ const parsed = safeParse(finalBuffer);
1457
+ if (parsed !== void 0) {
1458
+ consider(parsed);
1459
+ }
1460
+ }
1461
+ } finally {
1462
+ signal?.removeEventListener("abort", onAbort);
1463
+ reader.releaseLock();
1464
+ }
1465
+ if (usage) {
1466
+ onUsage(model, usage);
1467
+ }
1468
+ }
1469
+ function considerSseLine(line, consider) {
1470
+ const trimmed = line.trim();
1471
+ if (!trimmed.startsWith("data:")) {
1472
+ return;
1473
+ }
1474
+ const data = trimmed.slice("data:".length).trim();
1475
+ if (!data || data === "[DONE]") {
1476
+ return;
1477
+ }
1478
+ const parsed = safeParse(data);
1479
+ if (parsed !== void 0) {
1480
+ consider(parsed);
1481
+ }
1482
+ }
1483
+ function safeParse(text) {
1484
+ try {
1485
+ return JSON.parse(text);
1486
+ } catch {
1487
+ return void 0;
1488
+ }
1489
+ }
1490
+ function modelText(value) {
1491
+ return typeof value === "string" ? value.trim() : "";
1492
+ }
1493
+ function nonNegative(value) {
1494
+ return Number.isFinite(value) && value > 0 ? value : 0;
1495
+ }
1496
+ function labelKey(...parts) {
1497
+ return parts.join(LABEL_SEPARATOR);
1498
+ }
1499
+ function labels(pairs) {
1500
+ const entries = Object.entries(pairs);
1501
+ if (entries.length === 0) {
1502
+ return "";
1503
+ }
1504
+ const rendered = entries.map(([name, value]) => `${name}="${escapeLabelValue(value)}"`);
1505
+ return `{${rendered.join(",")}}`;
1506
+ }
1507
+ function escapeLabelValue(value) {
1508
+ return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r");
1509
+ }
1510
+ function formatNumber(value) {
1511
+ return Number.isInteger(value) ? value.toString() : String(value);
1512
+ }
1513
+
996
1514
  // src/server.ts
997
1515
  var DEFAULT_HOST = "127.0.0.1";
998
1516
  var DEFAULT_PORT = 4141;
999
1517
  var INVALID_JSON_MESSAGE = "Request body must be valid JSON.";
1518
+ var USAGE_CACHE_TTL_MS = 6e4;
1000
1519
  function createHoopilotHandler(options = {}) {
1001
1520
  const client = new CopilotClient(options);
1002
1521
  const apiKey = options.apiKey ?? options.env?.HOOPILOT_API_KEY;
1003
1522
  const logger = serverLogger(options);
1523
+ const metrics = options.metrics ?? new MetricsRegistry();
1524
+ const readUsage = createUsageReader(client, metrics);
1525
+ const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
1004
1526
  return async (request) => {
1005
1527
  const startedAt = performance.now();
1006
1528
  const url = new URL(request.url);
1007
1529
  const apiPath = canonicalApiPath(url.pathname);
1008
1530
  const requestId = requestIdFor(request);
1531
+ const route = routeFor(request.method, apiPath);
1009
1532
  const requestLogger = logger.child({
1010
1533
  method: request.method,
1011
1534
  path: url.pathname,
1012
1535
  requestId,
1013
- route: routeFor(request.method, apiPath)
1536
+ route
1537
+ });
1538
+ metrics.startRequest();
1539
+ const finish = (response) => finishResponse(response, {
1540
+ logger: requestLogger,
1541
+ method: request.method,
1542
+ metrics,
1543
+ requestId,
1544
+ route,
1545
+ startedAt
1014
1546
  });
1015
1547
  if (request.method === "OPTIONS") {
1016
- return finishResponse(new Response(null, { headers: corsHeaders() }), {
1017
- logger: requestLogger,
1018
- requestId,
1019
- startedAt
1020
- });
1548
+ return finish(new Response(null, { headers: corsHeaders() }));
1021
1549
  }
1022
1550
  if (!isAuthorized(request, apiKey)) {
1023
1551
  requestLogger.warn({ event: "http.request.unauthorized" }, "invalid hoopilot api key");
1024
- return finishResponse(
1025
- jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."),
1026
- {
1027
- logger: requestLogger,
1028
- requestId,
1029
- startedAt
1030
- }
1031
- );
1552
+ return finish(jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."));
1032
1553
  }
1033
1554
  try {
1034
1555
  if (request.method === "GET" && (apiPath === "/" || apiPath === "/healthz")) {
1035
- return finishResponse(
1036
- jsonResponse({
1037
- name: "hoopilot",
1038
- object: "health",
1039
- status: "ok"
1040
- }),
1041
- { logger: requestLogger, requestId, startedAt }
1042
- );
1556
+ return finish(jsonResponse({ name: "hoopilot", object: "health", status: "ok" }));
1557
+ }
1558
+ if (request.method === "GET" && apiPath === "/metrics") {
1559
+ return finish(metricsResponse(metrics));
1560
+ }
1561
+ if (request.method === "GET" && apiPath === "/v1/usage") {
1562
+ return finish(await handleUsage(metrics, readUsage, request.signal));
1043
1563
  }
1044
1564
  if (request.method === "GET" && apiPath === "/v1/responses") {
1045
- return finishResponse(websocketUnsupportedResponse(), {
1046
- logger: requestLogger,
1047
- requestId,
1048
- startedAt
1049
- });
1565
+ return finish(websocketUnsupportedResponse());
1050
1566
  }
1051
1567
  if (request.method === "GET" && apiPath === "/v1/models") {
1052
- return finishResponse(await handleModels(client, request.signal, requestLogger), {
1053
- logger: requestLogger,
1054
- requestId,
1055
- startedAt
1056
- });
1568
+ return finish(await handleModels(client, metrics, request.signal, requestLogger));
1057
1569
  }
1058
1570
  if (request.method === "POST" && apiPath === "/v1/chat/completions") {
1059
- return finishResponse(await handleChatCompletions(client, request, requestLogger), {
1060
- logger: requestLogger,
1061
- requestId,
1062
- startedAt
1063
- });
1571
+ return finish(
1572
+ await handleChatCompletions(client, metrics, recordTokens, request, requestLogger)
1573
+ );
1064
1574
  }
1065
1575
  if (request.method === "POST" && apiPath === "/v1/completions") {
1066
- return finishResponse(await handleCompletions(client, request, requestLogger), {
1067
- logger: requestLogger,
1068
- requestId,
1069
- startedAt
1070
- });
1576
+ return finish(
1577
+ await handleCompletions(client, metrics, recordTokens, request, requestLogger)
1578
+ );
1071
1579
  }
1072
1580
  if (request.method === "POST" && apiPath === "/v1/responses") {
1073
- return finishResponse(await handleResponses(client, request, requestLogger), {
1074
- logger: requestLogger,
1075
- requestId,
1076
- startedAt
1077
- });
1581
+ return finish(await handleResponses(client, metrics, recordTokens, request, requestLogger));
1078
1582
  }
1079
- return finishResponse(
1080
- jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`),
1081
- { logger: requestLogger, requestId, startedAt }
1082
- );
1583
+ return finish(jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`));
1083
1584
  } catch (error) {
1084
1585
  if (error instanceof CopilotAuthError) {
1085
1586
  requestLogger.warn(
1086
1587
  { err: errorDetails(error), event: "copilot.auth.missing" },
1087
1588
  "copilot auth failed"
1088
1589
  );
1089
- return finishResponse(jsonError(401, "copilot_auth_error", error.message), {
1090
- logger: requestLogger,
1091
- requestId,
1092
- startedAt
1093
- });
1590
+ return finish(jsonError(401, "copilot_auth_error", error.message));
1094
1591
  }
1095
1592
  const message = errorMessage(error);
1096
1593
  if (message === INVALID_JSON_MESSAGE) {
@@ -1104,11 +1601,7 @@ function createHoopilotHandler(options = {}) {
1104
1601
  "request failed"
1105
1602
  );
1106
1603
  }
1107
- return finishResponse(jsonError(500, "internal_error", message), {
1108
- logger: requestLogger,
1109
- requestId,
1110
- startedAt
1111
- });
1604
+ return finish(jsonError(500, "internal_error", message));
1112
1605
  }
1113
1606
  };
1114
1607
  }
@@ -1137,8 +1630,9 @@ function startHoopilotServer(options = {}) {
1137
1630
  url: `http://${host}:${server.port}`
1138
1631
  };
1139
1632
  }
1140
- async function handleModels(client, signal, logger) {
1633
+ async function handleModels(client, metrics, signal, logger) {
1141
1634
  const upstream = await client.models(signal);
1635
+ metrics.recordUpstream("/models", upstream.ok);
1142
1636
  if (!upstream.ok) {
1143
1637
  if (isUpstreamAuthStatus(upstream.status)) {
1144
1638
  return proxyError(upstream, logger);
@@ -1156,38 +1650,50 @@ async function handleModels(client, signal, logger) {
1156
1650
  logUpstreamSuccess(logger, "/models", upstream.status);
1157
1651
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
1158
1652
  }
1159
- async function handleChatCompletions(client, request, logger) {
1653
+ async function handleChatCompletions(client, metrics, recordTokens, request, logger) {
1160
1654
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
1161
1655
  const upstream = await client.chatCompletions(chatRequest, request.signal);
1656
+ metrics.recordUpstream("/chat/completions", upstream.ok);
1162
1657
  if (!upstream.ok) {
1163
1658
  return proxyError(upstream, logger);
1164
1659
  }
1165
1660
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1166
- return proxyResponse(upstream);
1661
+ const model = normalizeRequestedModel(chatRequest.model);
1662
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1167
1663
  }
1168
- async function handleCompletions(client, request, logger) {
1664
+ async function handleCompletions(client, metrics, recordTokens, request, logger) {
1169
1665
  const body = await readJson(request);
1170
1666
  const upstream = await client.chatCompletions(
1171
1667
  completionsRequestToChatCompletion(body),
1172
1668
  request.signal
1173
1669
  );
1670
+ metrics.recordUpstream("/chat/completions", upstream.ok);
1174
1671
  if (!upstream.ok) {
1175
1672
  return proxyError(upstream, logger);
1176
1673
  }
1177
1674
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1675
+ const model = normalizeRequestedModel(body.model);
1178
1676
  if (isStreamingResponse(upstream)) {
1179
- return proxyResponse(upstream);
1677
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1678
+ }
1679
+ const completion = asRecord(await upstream.json());
1680
+ const usage = extractTokenUsage(completion.usage);
1681
+ if (usage) {
1682
+ const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
1683
+ recordTokens(responseModel || model, usage);
1180
1684
  }
1181
- return jsonResponse(chatCompletionToCompletion(await upstream.json()));
1685
+ return jsonResponse(chatCompletionToCompletion(completion));
1182
1686
  }
1183
- async function handleResponses(client, request, logger) {
1687
+ async function handleResponses(client, metrics, recordTokens, request, logger) {
1184
1688
  const body = await readJsonText(request);
1185
1689
  const upstream = await client.responses(body, request.signal);
1690
+ metrics.recordUpstream("/responses", upstream.ok);
1186
1691
  if (!upstream.ok) {
1187
1692
  return proxyError(upstream, logger);
1188
1693
  }
1189
1694
  logUpstreamSuccess(logger, "/responses", upstream.status);
1190
- return proxyResponse(upstream);
1695
+ const model = normalizeRequestedModel(asRecord(safeParseJson(body)).model);
1696
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1191
1697
  }
1192
1698
  async function proxyError(upstream, logger) {
1193
1699
  const text = await upstream.text();
@@ -1306,7 +1812,21 @@ function serverLogger(options) {
1306
1812
  }
1307
1813
  function finishResponse(response, options) {
1308
1814
  const withRequestId = responseWithRequestId(response, options.requestId);
1309
- logRequestCompleted(options.logger, withRequestId, options.startedAt);
1815
+ const stream = isStreamingResponse(withRequestId);
1816
+ const status = withRequestId.status;
1817
+ const complete = () => {
1818
+ const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
1819
+ options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
1820
+ logRequestCompleted(options.logger, status, stream, durationMs);
1821
+ };
1822
+ if (stream && withRequestId.body) {
1823
+ return new Response(trackStreamCompletion(withRequestId.body, complete), {
1824
+ headers: withRequestId.headers,
1825
+ status,
1826
+ statusText: withRequestId.statusText
1827
+ });
1828
+ }
1829
+ complete();
1310
1830
  return withRequestId;
1311
1831
  }
1312
1832
  function responseWithRequestId(response, requestId) {
@@ -1318,18 +1838,48 @@ function responseWithRequestId(response, requestId) {
1318
1838
  statusText: response.statusText
1319
1839
  });
1320
1840
  }
1321
- function logRequestCompleted(logger, response, startedAt) {
1841
+ function trackStreamCompletion(body, onComplete) {
1842
+ const reader = body.getReader();
1843
+ let fired = false;
1844
+ const fire = () => {
1845
+ if (!fired) {
1846
+ fired = true;
1847
+ onComplete();
1848
+ }
1849
+ };
1850
+ return new ReadableStream({
1851
+ async pull(controller) {
1852
+ try {
1853
+ const { done, value } = await reader.read();
1854
+ if (done) {
1855
+ controller.close();
1856
+ fire();
1857
+ return;
1858
+ }
1859
+ controller.enqueue(value);
1860
+ } catch (error) {
1861
+ fire();
1862
+ controller.error(error);
1863
+ }
1864
+ },
1865
+ cancel(reason) {
1866
+ fire();
1867
+ return reader.cancel(reason);
1868
+ }
1869
+ });
1870
+ }
1871
+ function logRequestCompleted(logger, status, stream, durationMs) {
1322
1872
  const fields = {
1323
- durationMs: Math.round((performance.now() - startedAt) * 100) / 100,
1873
+ durationMs,
1324
1874
  event: "http.request.completed",
1325
- status: response.status,
1326
- stream: isStreamingResponse(response)
1875
+ status,
1876
+ stream
1327
1877
  };
1328
- if (response.status >= 500) {
1878
+ if (status >= 500) {
1329
1879
  logger.error(fields, "request completed with server error");
1330
1880
  return;
1331
1881
  }
1332
- if (response.status >= 400) {
1882
+ if (status >= 400) {
1333
1883
  logger.warn(fields, "request completed with client error");
1334
1884
  return;
1335
1885
  }
@@ -1350,6 +1900,8 @@ function canonicalApiPath(path) {
1350
1900
  return "/v1/completions";
1351
1901
  case "/responses":
1352
1902
  return "/v1/responses";
1903
+ case "/usage":
1904
+ return "/v1/usage";
1353
1905
  default:
1354
1906
  return withoutTrailingSlash;
1355
1907
  }
@@ -1361,6 +1913,12 @@ function routeFor(method, path) {
1361
1913
  if (method === "GET" && (path === "/" || path === "/healthz")) {
1362
1914
  return "health";
1363
1915
  }
1916
+ if (method === "GET" && path === "/metrics") {
1917
+ return "metrics";
1918
+ }
1919
+ if (method === "GET" && path === "/v1/usage") {
1920
+ return "usage";
1921
+ }
1364
1922
  if (method === "GET" && path === "/v1/models") {
1365
1923
  return "models";
1366
1924
  }
@@ -1391,26 +1949,86 @@ function logUpstreamSuccess(logger, upstreamPath, status) {
1391
1949
  "copilot upstream request completed"
1392
1950
  );
1393
1951
  }
1952
+ function metricsResponse(metrics) {
1953
+ return new Response(metrics.renderPrometheus(), {
1954
+ headers: {
1955
+ ...corsHeaders(),
1956
+ "content-type": PROMETHEUS_CONTENT_TYPE
1957
+ },
1958
+ status: 200
1959
+ });
1960
+ }
1961
+ async function handleUsage(metrics, readUsage, signal) {
1962
+ const proxy = metrics.snapshot();
1963
+ const { copilot, error } = await readUsage(signal);
1964
+ const body = { copilot: copilot ?? null, object: "usage", proxy };
1965
+ if (error) {
1966
+ body.copilot_error = error;
1967
+ }
1968
+ return jsonResponse(body);
1969
+ }
1970
+ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
1971
+ const usagePath = "/copilot_internal/user";
1972
+ let cache;
1973
+ return async (signal) => {
1974
+ if (cache && now() - cache.atMs < ttlMs) {
1975
+ return { copilot: cache.value };
1976
+ }
1977
+ try {
1978
+ const upstream = await client.usage(signal);
1979
+ metrics.recordUpstream(usagePath, upstream.ok);
1980
+ if (!upstream.ok) {
1981
+ return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
1982
+ }
1983
+ const value = normalizeCopilotUsage(await upstream.json().catch(() => ({})));
1984
+ cache = { atMs: now(), value };
1985
+ metrics.recordCopilotQuota(value);
1986
+ return { copilot: value };
1987
+ } catch (error) {
1988
+ metrics.recordUpstream(usagePath, false);
1989
+ if (error instanceof CopilotAuthError) {
1990
+ return { error: error.message };
1991
+ }
1992
+ return { error: errorMessage(error) };
1993
+ }
1994
+ };
1995
+ }
1996
+ function safeParseJson(text) {
1997
+ try {
1998
+ return JSON.parse(text);
1999
+ } catch {
2000
+ return void 0;
2001
+ }
2002
+ }
1394
2003
  // Annotate the CommonJS export names for ESM import in node:
1395
2004
  0 && (module.exports = {
2005
+ COPILOT_USAGE_API_VERSION,
1396
2006
  CopilotAuth,
1397
2007
  CopilotAuthError,
1398
2008
  CopilotClient,
2009
+ DEFAULT_GITHUB_API_BASE_URL,
1399
2010
  DEFAULT_LOG_FORMAT,
1400
2011
  DEFAULT_LOG_LEVEL,
1401
2012
  DEFAULT_MODEL,
2013
+ MetricsRegistry,
2014
+ PROMETHEUS_CONTENT_TYPE,
2015
+ applyCopilotHeaders,
2016
+ applyGithubApiHeaders,
1402
2017
  authStorePath,
1403
2018
  chatCompletionToCompletion,
1404
2019
  chatCompletionToResponse,
1405
2020
  completionsRequestToChatCompletion,
1406
2021
  createHoopilotHandler,
1407
2022
  createHoopilotLogger,
2023
+ extractTokenUsage,
1408
2024
  fallbackModels,
1409
2025
  githubCopilotDeviceLogin,
1410
2026
  noopLogger,
1411
2027
  normalizeChatCompletionRequest,
2028
+ normalizeCopilotUsage,
1412
2029
  normalizeModelsResponse,
1413
2030
  normalizeRequestedModel,
2031
+ observeResponseUsage,
1414
2032
  parseLogFormat,
1415
2033
  parseLogLevel,
1416
2034
  readStoredCopilotAuth,