@openhoo/hoopilot 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -105,6 +105,8 @@ var CopilotAuth = class {
105
105
  };
106
106
 
107
107
  // src/copilot.ts
108
+ var DEFAULT_GITHUB_API_BASE_URL = "https://api.github.com";
109
+ var COPILOT_USAGE_API_VERSION = "2025-04-01";
108
110
  function applyCopilotHeaders(headers, token) {
109
111
  headers.set("accept", headers.get("accept") ?? "application/json");
110
112
  headers.set("authorization", `Bearer ${token}`);
@@ -116,12 +118,44 @@ function applyCopilotHeaders(headers, token) {
116
118
  headers.set("x-github-api-version", "2026-06-01");
117
119
  return headers;
118
120
  }
121
+ function applyGithubApiHeaders(headers, token) {
122
+ headers.set("accept", headers.get("accept") ?? "application/json");
123
+ headers.set("authorization", `token ${token}`);
124
+ headers.set("editor-plugin-version", "hoopilot/0.1.0");
125
+ headers.set("editor-version", "Hoopilot/0.1.0");
126
+ headers.set("user-agent", "hoopilot/0.1.0");
127
+ headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
128
+ return headers;
129
+ }
119
130
  var CopilotClient = class {
120
131
  #auth;
121
132
  #fetch;
133
+ #githubApiBaseUrl;
122
134
  constructor(options = {}) {
123
135
  this.#auth = new CopilotAuth(options);
124
136
  this.#fetch = options.fetch ?? fetch;
137
+ this.#githubApiBaseUrl = trimTrailingSlash(
138
+ options.githubApiBaseUrl ?? options.env?.HOOPILOT_GITHUB_API_BASE_URL ?? DEFAULT_GITHUB_API_BASE_URL
139
+ );
140
+ }
141
+ /**
142
+ * Fetch the Copilot account's quota / premium-request usage from the GitHub
143
+ * REST `copilot_internal/user` endpoint. The stored device-flow OAuth token is
144
+ * accepted directly here — no Copilot token exchange is required to read quota.
145
+ */
146
+ async usage(signal) {
147
+ if (!isHttpsOrLoopback(this.#githubApiBaseUrl)) {
148
+ throw new Error(
149
+ `Refusing to send the GitHub OAuth token to a non-HTTPS host: ${this.#githubApiBaseUrl}`
150
+ );
151
+ }
152
+ const access = await this.#auth.getAccess();
153
+ const headers = applyGithubApiHeaders(new Headers(), access.token);
154
+ return this.#fetch(`${this.#githubApiBaseUrl}/copilot_internal/user`, {
155
+ headers,
156
+ method: "GET",
157
+ signal
158
+ });
125
159
  }
126
160
  async chatCompletions(body, signal) {
127
161
  return this.fetchCopilot("/chat/completions", {
@@ -161,6 +195,81 @@ var CopilotClient = class {
161
195
  });
162
196
  }
163
197
  };
198
+ function normalizeCopilotUsage(body) {
199
+ const record = asRecord(body);
200
+ const quotas = {};
201
+ const snapshots = asRecord(record.quota_snapshots);
202
+ for (const [category, detail] of Object.entries(snapshots)) {
203
+ quotas[category] = normalizeQuotaDetail(asRecord(detail));
204
+ }
205
+ if (Object.keys(quotas).length === 0) {
206
+ const remaining = asRecord(record.limited_user_quotas);
207
+ const monthly = asRecord(record.monthly_quotas);
208
+ for (const category of /* @__PURE__ */ new Set([...Object.keys(remaining), ...Object.keys(monthly)])) {
209
+ const entitlement = numberOrUndefined(monthly[category]);
210
+ const left = numberOrUndefined(remaining[category]);
211
+ quotas[category] = removeUndefinedQuota({
212
+ entitlement,
213
+ percentRemaining: entitlement !== void 0 && entitlement > 0 && left !== void 0 ? left / entitlement * 100 : void 0,
214
+ remaining: left,
215
+ used: usedFrom(entitlement, left)
216
+ });
217
+ }
218
+ }
219
+ return removeUndefinedUsage({
220
+ accessTypeSku: stringOrUndefined(record.access_type_sku),
221
+ chatEnabled: typeof record.chat_enabled === "boolean" ? record.chat_enabled : void 0,
222
+ plan: stringOrUndefined(record.copilot_plan),
223
+ quotaResetDate: stringOrUndefined(record.quota_reset_date) ?? stringOrUndefined(record.quota_reset_date_utc) ?? stringOrUndefined(record.limited_user_reset_date),
224
+ quotas
225
+ });
226
+ }
227
+ function normalizeQuotaDetail(detail) {
228
+ const entitlement = numberOrUndefined(detail.entitlement);
229
+ const remaining = numberOrUndefined(detail.remaining) ?? numberOrUndefined(detail.quota_remaining);
230
+ return removeUndefinedQuota({
231
+ entitlement,
232
+ overageCount: numberOrUndefined(detail.overage_count),
233
+ overagePermitted: typeof detail.overage_permitted === "boolean" ? detail.overage_permitted : void 0,
234
+ percentRemaining: numberOrUndefined(detail.percent_remaining),
235
+ remaining,
236
+ unlimited: typeof detail.unlimited === "boolean" ? detail.unlimited : void 0,
237
+ used: usedFrom(entitlement, remaining)
238
+ });
239
+ }
240
+ function usedFrom(entitlement, remaining) {
241
+ if (entitlement === void 0 || remaining === void 0) {
242
+ return void 0;
243
+ }
244
+ return Math.max(0, entitlement - remaining);
245
+ }
246
+ function isHttpsOrLoopback(rawUrl) {
247
+ let url;
248
+ try {
249
+ url = new URL(rawUrl);
250
+ } catch {
251
+ return false;
252
+ }
253
+ if (url.protocol === "https:") {
254
+ return true;
255
+ }
256
+ return url.protocol === "http:" && (url.hostname === "127.0.0.1" || url.hostname === "localhost" || url.hostname === "::1");
257
+ }
258
+ function numberOrUndefined(value) {
259
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
260
+ }
261
+ function stringOrUndefined(value) {
262
+ return typeof value === "string" && value.length > 0 ? value : void 0;
263
+ }
264
+ function removeUndefinedQuota(quota) {
265
+ return Object.fromEntries(
266
+ Object.entries(quota).filter(([, value]) => value !== void 0)
267
+ );
268
+ }
269
+ function removeUndefinedUsage(usage) {
270
+ const entries = Object.entries(usage).filter(([, value]) => value !== void 0);
271
+ return Object.fromEntries(entries);
272
+ }
164
273
 
165
274
  // src/github-device.ts
166
275
  import { setTimeout as sleep } from "timers/promises";
@@ -840,6 +949,40 @@ function responseUsage(usage) {
840
949
  total_tokens: record.total_tokens
841
950
  });
842
951
  }
952
+ function extractTokenUsage(usage) {
953
+ const record = asRecord(usage);
954
+ const prompt = firstNumber(record.prompt_tokens, record.input_tokens);
955
+ const completion = firstNumber(record.completion_tokens, record.output_tokens);
956
+ const total = firstNumber(record.total_tokens);
957
+ if (prompt === void 0 && completion === void 0 && total === void 0) {
958
+ return void 0;
959
+ }
960
+ const promptTokens = prompt ?? 0;
961
+ const completionTokens = completion ?? 0;
962
+ const reasoning = firstNumber(
963
+ asRecord(record.completion_tokens_details).reasoning_tokens,
964
+ asRecord(record.output_tokens_details).reasoning_tokens
965
+ );
966
+ const cached = firstNumber(
967
+ asRecord(record.prompt_tokens_details).cached_tokens,
968
+ asRecord(record.input_tokens_details).cached_tokens
969
+ );
970
+ return removeUndefined({
971
+ cachedTokens: cached,
972
+ completionTokens,
973
+ promptTokens,
974
+ reasoningTokens: reasoning,
975
+ totalTokens: total ?? promptTokens + completionTokens
976
+ });
977
+ }
978
+ function firstNumber(...values) {
979
+ for (const value of values) {
980
+ if (typeof value === "number" && Number.isFinite(value)) {
981
+ return value;
982
+ }
983
+ }
984
+ return void 0;
985
+ }
843
986
  function firstChoice(completion) {
844
987
  const choices = Array.isArray(completion.choices) ? completion.choices : [];
845
988
  return asRecord(choices[0]);
@@ -933,104 +1076,449 @@ function epochSeconds() {
933
1076
  return Math.floor(Date.now() / 1e3);
934
1077
  }
935
1078
 
1079
+ // src/metrics.ts
1080
+ var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
1081
+ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1082
+ var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1083
+ var MAX_TRACKED_MODELS = 200;
1084
+ var MAX_MODEL_LABEL_LENGTH = 200;
1085
+ var LABEL_SEPARATOR = "";
1086
+ var UNKNOWN_MODEL = "unknown";
1087
+ function emptyModelTotals() {
1088
+ return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
1089
+ }
1090
+ var MetricsRegistry = class {
1091
+ #startedAtMs;
1092
+ #inFlight = 0;
1093
+ #requests = /* @__PURE__ */ new Map();
1094
+ #durations = /* @__PURE__ */ new Map();
1095
+ #tokens = /* @__PURE__ */ new Map();
1096
+ #upstream = /* @__PURE__ */ new Map();
1097
+ #copilotQuota;
1098
+ constructor(options = {}) {
1099
+ this.#startedAtMs = (options.now ?? Date.now)();
1100
+ }
1101
+ /** Mark a request as started; pair with exactly one {@link observe}. */
1102
+ startRequest() {
1103
+ this.#inFlight += 1;
1104
+ }
1105
+ /** Record a completed request and clear its in-flight slot. */
1106
+ observe(observation) {
1107
+ if (this.#inFlight > 0) {
1108
+ this.#inFlight -= 1;
1109
+ }
1110
+ const key = labelKey(observation.route, observation.method, String(observation.status));
1111
+ this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
1112
+ this.#observeDuration(observation.route, observation.durationMs / 1e3);
1113
+ }
1114
+ /** Accumulate token counts for a model from one upstream completion. */
1115
+ recordTokens(model, usage) {
1116
+ const name = this.#modelLabel(model);
1117
+ const totals = this.#tokens.get(name) ?? emptyModelTotals();
1118
+ totals.requests += 1;
1119
+ totals.prompt += nonNegative(usage.promptTokens);
1120
+ totals.completion += nonNegative(usage.completionTokens);
1121
+ totals.total += nonNegative(usage.totalTokens);
1122
+ totals.reasoning += nonNegative(usage.reasoningTokens ?? 0);
1123
+ totals.cached += nonNegative(usage.cachedTokens ?? 0);
1124
+ this.#tokens.set(name, totals);
1125
+ }
1126
+ /** Record one upstream Copilot call and whether it succeeded. */
1127
+ recordUpstream(path, ok) {
1128
+ const key = labelKey(path, ok ? "ok" : "error");
1129
+ this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
1130
+ }
1131
+ /** Store the latest Copilot quota so /metrics can expose it as gauges. */
1132
+ recordCopilotQuota(usage) {
1133
+ this.#copilotQuota = usage;
1134
+ }
1135
+ // Sanitize the model into a bounded, control-char-free label. The model can
1136
+ // originate from a client request, so cap its length, strip characters that
1137
+ // would corrupt the exposition format, and fold overflow past the cardinality
1138
+ // limit into UNKNOWN_MODEL to keep the series count bounded.
1139
+ #modelLabel(model) {
1140
+ const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
1141
+ if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
1142
+ return UNKNOWN_MODEL;
1143
+ }
1144
+ return cleaned;
1145
+ }
1146
+ #observeDuration(route, seconds) {
1147
+ const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
1148
+ const entry = this.#durations.get(route) ?? {
1149
+ buckets: new Array(DURATION_BUCKETS_SECONDS.length).fill(0),
1150
+ count: 0,
1151
+ sum: 0
1152
+ };
1153
+ entry.count += 1;
1154
+ entry.sum += value;
1155
+ const index = DURATION_BUCKETS_SECONDS.findIndex((bound) => value <= bound);
1156
+ if (index !== -1) {
1157
+ entry.buckets[index] = (entry.buckets[index] ?? 0) + 1;
1158
+ }
1159
+ this.#durations.set(route, entry);
1160
+ }
1161
+ /** A JSON-friendly view of the current counters. */
1162
+ snapshot(now = Date.now) {
1163
+ const byRoute = {};
1164
+ const byStatus = {};
1165
+ let requestsTotal = 0;
1166
+ for (const [key, count] of this.#requests) {
1167
+ const [route = "", , status = ""] = key.split(LABEL_SEPARATOR);
1168
+ byRoute[route] = (byRoute[route] ?? 0) + count;
1169
+ byStatus[status] = (byStatus[status] ?? 0) + count;
1170
+ requestsTotal += count;
1171
+ }
1172
+ const byModel = {};
1173
+ const tokenTotals = { cached: 0, completion: 0, prompt: 0, reasoning: 0, total: 0 };
1174
+ for (const [model, totals] of this.#tokens) {
1175
+ byModel[model] = { ...totals };
1176
+ tokenTotals.prompt += totals.prompt;
1177
+ tokenTotals.completion += totals.completion;
1178
+ tokenTotals.total += totals.total;
1179
+ tokenTotals.reasoning += totals.reasoning;
1180
+ tokenTotals.cached += totals.cached;
1181
+ }
1182
+ let upstreamTotal = 0;
1183
+ let upstreamErrors = 0;
1184
+ for (const [key, count] of this.#upstream) {
1185
+ upstreamTotal += count;
1186
+ if (key.endsWith(`${LABEL_SEPARATOR}error`)) {
1187
+ upstreamErrors += count;
1188
+ }
1189
+ }
1190
+ return {
1191
+ inFlight: this.#inFlight,
1192
+ requests: { byRoute, byStatus, total: requestsTotal },
1193
+ startedAt: new Date(this.#startedAtMs).toISOString(),
1194
+ tokens: { byModel, ...tokenTotals },
1195
+ upstream: { errors: upstreamErrors, total: upstreamTotal },
1196
+ uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
1197
+ };
1198
+ }
1199
+ /** Render the Prometheus text exposition format (version 0.0.4). */
1200
+ renderPrometheus(now = Date.now) {
1201
+ const lines = [];
1202
+ lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
1203
+ lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
1204
+ lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
1205
+ lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
1206
+ lines.push("# TYPE hoopilot_uptime_seconds gauge");
1207
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
1208
+ lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
1209
+ lines.push("# TYPE hoopilot_requests_in_flight gauge");
1210
+ lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
1211
+ lines.push("# HELP hoopilot_requests_total Completed requests by route, method, and status.");
1212
+ lines.push("# TYPE hoopilot_requests_total counter");
1213
+ for (const [key, count] of this.#requests) {
1214
+ const [route = "", method = "", status = ""] = key.split(LABEL_SEPARATOR);
1215
+ lines.push(`hoopilot_requests_total${labels({ method, route, status })} ${count}`);
1216
+ }
1217
+ lines.push(
1218
+ "# HELP hoopilot_upstream_requests_total Copilot upstream calls by path and outcome."
1219
+ );
1220
+ lines.push("# TYPE hoopilot_upstream_requests_total counter");
1221
+ for (const [key, count] of this.#upstream) {
1222
+ const [path = "", outcome = ""] = key.split(LABEL_SEPARATOR);
1223
+ lines.push(`hoopilot_upstream_requests_total${labels({ outcome, path })} ${count}`);
1224
+ }
1225
+ lines.push(
1226
+ "# HELP hoopilot_tokens_total Tokens reported by upstream usage, by model and type."
1227
+ );
1228
+ lines.push("# TYPE hoopilot_tokens_total counter");
1229
+ for (const [model, totals] of this.#tokens) {
1230
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "prompt" })} ${totals.prompt}`);
1231
+ lines.push(
1232
+ `hoopilot_tokens_total${labels({ model, type: "completion" })} ${totals.completion}`
1233
+ );
1234
+ lines.push(
1235
+ `hoopilot_tokens_total${labels({ model, type: "reasoning" })} ${totals.reasoning}`
1236
+ );
1237
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "cached" })} ${totals.cached}`);
1238
+ }
1239
+ lines.push("# HELP hoopilot_model_requests_total Completions with usage observed, by model.");
1240
+ lines.push("# TYPE hoopilot_model_requests_total counter");
1241
+ for (const [model, totals] of this.#tokens) {
1242
+ lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
1243
+ }
1244
+ lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
1245
+ lines.push("# TYPE hoopilot_request_duration_seconds histogram");
1246
+ for (const [route, entry] of this.#durations) {
1247
+ let cumulative = 0;
1248
+ for (let i = 0; i < DURATION_BUCKETS_SECONDS.length; i += 1) {
1249
+ cumulative += entry.buckets[i] ?? 0;
1250
+ const le = formatNumber(DURATION_BUCKETS_SECONDS[i] ?? 0);
1251
+ lines.push(
1252
+ `hoopilot_request_duration_seconds_bucket${labels({ le, route })} ${cumulative}`
1253
+ );
1254
+ }
1255
+ lines.push(
1256
+ `hoopilot_request_duration_seconds_bucket${labels({ le: "+Inf", route })} ${entry.count}`
1257
+ );
1258
+ lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
1259
+ lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
1260
+ }
1261
+ this.#renderCopilotQuota(lines);
1262
+ return `${lines.join("\n")}
1263
+ `;
1264
+ }
1265
+ #renderCopilotQuota(lines) {
1266
+ const usage = this.#copilotQuota;
1267
+ if (!usage) {
1268
+ return;
1269
+ }
1270
+ const categories = Object.entries(usage.quotas);
1271
+ const gauge = (suffix, help, pick) => {
1272
+ const present = categories.filter(([, quota]) => pick(quota) !== void 0);
1273
+ if (present.length === 0) {
1274
+ return;
1275
+ }
1276
+ lines.push(`# HELP hoopilot_copilot_quota_${suffix} ${help}`);
1277
+ lines.push(`# TYPE hoopilot_copilot_quota_${suffix} gauge`);
1278
+ for (const [category, quota] of present) {
1279
+ lines.push(`hoopilot_copilot_quota_${suffix}${labels({ category })} ${pick(quota)}`);
1280
+ }
1281
+ };
1282
+ gauge("remaining", "Remaining quota for the Copilot category.", (q) => q.remaining);
1283
+ gauge("entitlement", "Quota entitlement for the Copilot category.", (q) => q.entitlement);
1284
+ gauge("used", "Used quota (entitlement minus remaining) for the category.", (q) => q.used);
1285
+ gauge(
1286
+ "percent_remaining",
1287
+ "Percent of quota remaining for the Copilot category.",
1288
+ (q) => q.percentRemaining
1289
+ );
1290
+ const resetMs = usage.quotaResetDate ? Date.parse(usage.quotaResetDate) : Number.NaN;
1291
+ if (Number.isFinite(resetMs)) {
1292
+ lines.push(
1293
+ "# HELP hoopilot_copilot_quota_reset_timestamp_seconds Unix epoch of the next reset."
1294
+ );
1295
+ lines.push("# TYPE hoopilot_copilot_quota_reset_timestamp_seconds gauge");
1296
+ lines.push(`hoopilot_copilot_quota_reset_timestamp_seconds ${resetMs / 1e3}`);
1297
+ }
1298
+ if (usage.plan || usage.accessTypeSku) {
1299
+ lines.push("# HELP hoopilot_copilot_info Copilot plan metadata as a constant-1 info gauge.");
1300
+ lines.push("# TYPE hoopilot_copilot_info gauge");
1301
+ lines.push(
1302
+ `hoopilot_copilot_info${labels({
1303
+ access_type_sku: usage.accessTypeSku ?? "",
1304
+ plan: usage.plan ?? ""
1305
+ })} 1`
1306
+ );
1307
+ }
1308
+ }
1309
+ };
1310
+ function observeResponseUsage(response, fallbackModel, onUsage, signal) {
1311
+ const body = response.body;
1312
+ if (!body) {
1313
+ return response;
1314
+ }
1315
+ const [clientBranch, observerBranch] = body.tee();
1316
+ const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
1317
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
1318
+ });
1319
+ return new Response(clientBranch, {
1320
+ headers: response.headers,
1321
+ status: response.status,
1322
+ statusText: response.statusText
1323
+ });
1324
+ }
1325
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
1326
+ const reader = stream.getReader();
1327
+ const onAbort = () => {
1328
+ reader.cancel().catch(() => {
1329
+ });
1330
+ };
1331
+ if (signal?.aborted) {
1332
+ reader.cancel().catch(() => {
1333
+ });
1334
+ } else {
1335
+ signal?.addEventListener("abort", onAbort, { once: true });
1336
+ }
1337
+ const decoder = new TextDecoder();
1338
+ let model = fallbackModel;
1339
+ let usage;
1340
+ let buffer = "";
1341
+ let bufferedBytes = 0;
1342
+ let overflowed = false;
1343
+ const consider = (payload) => {
1344
+ const record = asRecord(payload);
1345
+ const found = extractTokenUsage(record.usage) ?? extractTokenUsage(asRecord(record.response).usage);
1346
+ if (found) {
1347
+ usage = found;
1348
+ }
1349
+ const candidateModel = modelText(record.model) || modelText(asRecord(record.response).model);
1350
+ if (candidateModel) {
1351
+ model = candidateModel;
1352
+ }
1353
+ };
1354
+ try {
1355
+ while (true) {
1356
+ const result = await reader.read();
1357
+ if (result.done) {
1358
+ break;
1359
+ }
1360
+ const chunk = decoder.decode(result.value, { stream: true });
1361
+ if (isSse) {
1362
+ buffer += chunk;
1363
+ const lines = buffer.split(/\r?\n/);
1364
+ buffer = lines.pop() ?? "";
1365
+ for (const line of lines) {
1366
+ considerSseLine(line, consider);
1367
+ }
1368
+ if (buffer.length > USAGE_BUFFER_LIMIT_BYTES) {
1369
+ buffer = "";
1370
+ }
1371
+ } else if (!overflowed) {
1372
+ bufferedBytes += result.value.byteLength;
1373
+ if (bufferedBytes > USAGE_BUFFER_LIMIT_BYTES) {
1374
+ overflowed = true;
1375
+ buffer = "";
1376
+ } else {
1377
+ buffer += chunk;
1378
+ }
1379
+ }
1380
+ }
1381
+ const finalBuffer = buffer + decoder.decode();
1382
+ if (isSse) {
1383
+ if (finalBuffer) {
1384
+ considerSseLine(finalBuffer, consider);
1385
+ }
1386
+ } else if (!overflowed && finalBuffer) {
1387
+ const parsed = safeParse(finalBuffer);
1388
+ if (parsed !== void 0) {
1389
+ consider(parsed);
1390
+ }
1391
+ }
1392
+ } finally {
1393
+ signal?.removeEventListener("abort", onAbort);
1394
+ reader.releaseLock();
1395
+ }
1396
+ if (usage) {
1397
+ onUsage(model, usage);
1398
+ }
1399
+ }
1400
+ function considerSseLine(line, consider) {
1401
+ const trimmed = line.trim();
1402
+ if (!trimmed.startsWith("data:")) {
1403
+ return;
1404
+ }
1405
+ const data = trimmed.slice("data:".length).trim();
1406
+ if (!data || data === "[DONE]") {
1407
+ return;
1408
+ }
1409
+ const parsed = safeParse(data);
1410
+ if (parsed !== void 0) {
1411
+ consider(parsed);
1412
+ }
1413
+ }
1414
+ function safeParse(text) {
1415
+ try {
1416
+ return JSON.parse(text);
1417
+ } catch {
1418
+ return void 0;
1419
+ }
1420
+ }
1421
+ function modelText(value) {
1422
+ return typeof value === "string" ? value.trim() : "";
1423
+ }
1424
+ function nonNegative(value) {
1425
+ return Number.isFinite(value) && value > 0 ? value : 0;
1426
+ }
1427
+ function labelKey(...parts) {
1428
+ return parts.join(LABEL_SEPARATOR);
1429
+ }
1430
+ function labels(pairs) {
1431
+ const entries = Object.entries(pairs);
1432
+ if (entries.length === 0) {
1433
+ return "";
1434
+ }
1435
+ const rendered = entries.map(([name, value]) => `${name}="${escapeLabelValue(value)}"`);
1436
+ return `{${rendered.join(",")}}`;
1437
+ }
1438
+ function escapeLabelValue(value) {
1439
+ return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r");
1440
+ }
1441
+ function formatNumber(value) {
1442
+ return Number.isInteger(value) ? value.toString() : String(value);
1443
+ }
1444
+
936
1445
  // src/server.ts
937
1446
  var DEFAULT_HOST = "127.0.0.1";
938
1447
  var DEFAULT_PORT = 4141;
939
1448
  var INVALID_JSON_MESSAGE = "Request body must be valid JSON.";
1449
+ var USAGE_CACHE_TTL_MS = 6e4;
940
1450
  function createHoopilotHandler(options = {}) {
941
1451
  const client = new CopilotClient(options);
942
1452
  const apiKey = options.apiKey ?? options.env?.HOOPILOT_API_KEY;
943
1453
  const logger = serverLogger(options);
1454
+ const metrics = options.metrics ?? new MetricsRegistry();
1455
+ const readUsage = createUsageReader(client, metrics);
1456
+ const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
944
1457
  return async (request) => {
945
1458
  const startedAt = performance.now();
946
1459
  const url = new URL(request.url);
947
1460
  const apiPath = canonicalApiPath(url.pathname);
948
1461
  const requestId = requestIdFor(request);
1462
+ const route = routeFor(request.method, apiPath);
949
1463
  const requestLogger = logger.child({
950
1464
  method: request.method,
951
1465
  path: url.pathname,
952
1466
  requestId,
953
- route: routeFor(request.method, apiPath)
1467
+ route
1468
+ });
1469
+ metrics.startRequest();
1470
+ const finish = (response) => finishResponse(response, {
1471
+ logger: requestLogger,
1472
+ method: request.method,
1473
+ metrics,
1474
+ requestId,
1475
+ route,
1476
+ startedAt
954
1477
  });
955
1478
  if (request.method === "OPTIONS") {
956
- return finishResponse(new Response(null, { headers: corsHeaders() }), {
957
- logger: requestLogger,
958
- requestId,
959
- startedAt
960
- });
1479
+ return finish(new Response(null, { headers: corsHeaders() }));
961
1480
  }
962
1481
  if (!isAuthorized(request, apiKey)) {
963
1482
  requestLogger.warn({ event: "http.request.unauthorized" }, "invalid hoopilot api key");
964
- return finishResponse(
965
- jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."),
966
- {
967
- logger: requestLogger,
968
- requestId,
969
- startedAt
970
- }
971
- );
1483
+ return finish(jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."));
972
1484
  }
973
1485
  try {
974
1486
  if (request.method === "GET" && (apiPath === "/" || apiPath === "/healthz")) {
975
- return finishResponse(
976
- jsonResponse({
977
- name: "hoopilot",
978
- object: "health",
979
- status: "ok"
980
- }),
981
- { logger: requestLogger, requestId, startedAt }
982
- );
1487
+ return finish(jsonResponse({ name: "hoopilot", object: "health", status: "ok" }));
1488
+ }
1489
+ if (request.method === "GET" && apiPath === "/metrics") {
1490
+ return finish(metricsResponse(metrics));
1491
+ }
1492
+ if (request.method === "GET" && apiPath === "/v1/usage") {
1493
+ return finish(await handleUsage(metrics, readUsage, request.signal));
983
1494
  }
984
1495
  if (request.method === "GET" && apiPath === "/v1/responses") {
985
- return finishResponse(websocketUnsupportedResponse(), {
986
- logger: requestLogger,
987
- requestId,
988
- startedAt
989
- });
1496
+ return finish(websocketUnsupportedResponse());
990
1497
  }
991
1498
  if (request.method === "GET" && apiPath === "/v1/models") {
992
- return finishResponse(await handleModels(client, request.signal, requestLogger), {
993
- logger: requestLogger,
994
- requestId,
995
- startedAt
996
- });
1499
+ return finish(await handleModels(client, metrics, request.signal, requestLogger));
997
1500
  }
998
1501
  if (request.method === "POST" && apiPath === "/v1/chat/completions") {
999
- return finishResponse(await handleChatCompletions(client, request, requestLogger), {
1000
- logger: requestLogger,
1001
- requestId,
1002
- startedAt
1003
- });
1502
+ return finish(
1503
+ await handleChatCompletions(client, metrics, recordTokens, request, requestLogger)
1504
+ );
1004
1505
  }
1005
1506
  if (request.method === "POST" && apiPath === "/v1/completions") {
1006
- return finishResponse(await handleCompletions(client, request, requestLogger), {
1007
- logger: requestLogger,
1008
- requestId,
1009
- startedAt
1010
- });
1507
+ return finish(
1508
+ await handleCompletions(client, metrics, recordTokens, request, requestLogger)
1509
+ );
1011
1510
  }
1012
1511
  if (request.method === "POST" && apiPath === "/v1/responses") {
1013
- return finishResponse(await handleResponses(client, request, requestLogger), {
1014
- logger: requestLogger,
1015
- requestId,
1016
- startedAt
1017
- });
1512
+ return finish(await handleResponses(client, metrics, recordTokens, request, requestLogger));
1018
1513
  }
1019
- return finishResponse(
1020
- jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`),
1021
- { logger: requestLogger, requestId, startedAt }
1022
- );
1514
+ return finish(jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`));
1023
1515
  } catch (error) {
1024
1516
  if (error instanceof CopilotAuthError) {
1025
1517
  requestLogger.warn(
1026
1518
  { err: errorDetails(error), event: "copilot.auth.missing" },
1027
1519
  "copilot auth failed"
1028
1520
  );
1029
- return finishResponse(jsonError(401, "copilot_auth_error", error.message), {
1030
- logger: requestLogger,
1031
- requestId,
1032
- startedAt
1033
- });
1521
+ return finish(jsonError(401, "copilot_auth_error", error.message));
1034
1522
  }
1035
1523
  const message = errorMessage(error);
1036
1524
  if (message === INVALID_JSON_MESSAGE) {
@@ -1044,11 +1532,7 @@ function createHoopilotHandler(options = {}) {
1044
1532
  "request failed"
1045
1533
  );
1046
1534
  }
1047
- return finishResponse(jsonError(500, "internal_error", message), {
1048
- logger: requestLogger,
1049
- requestId,
1050
- startedAt
1051
- });
1535
+ return finish(jsonError(500, "internal_error", message));
1052
1536
  }
1053
1537
  };
1054
1538
  }
@@ -1077,8 +1561,9 @@ function startHoopilotServer(options = {}) {
1077
1561
  url: `http://${host}:${server.port}`
1078
1562
  };
1079
1563
  }
1080
- async function handleModels(client, signal, logger) {
1564
+ async function handleModels(client, metrics, signal, logger) {
1081
1565
  const upstream = await client.models(signal);
1566
+ metrics.recordUpstream("/models", upstream.ok);
1082
1567
  if (!upstream.ok) {
1083
1568
  if (isUpstreamAuthStatus(upstream.status)) {
1084
1569
  return proxyError(upstream, logger);
@@ -1096,38 +1581,50 @@ async function handleModels(client, signal, logger) {
1096
1581
  logUpstreamSuccess(logger, "/models", upstream.status);
1097
1582
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
1098
1583
  }
1099
- async function handleChatCompletions(client, request, logger) {
1584
+ async function handleChatCompletions(client, metrics, recordTokens, request, logger) {
1100
1585
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
1101
1586
  const upstream = await client.chatCompletions(chatRequest, request.signal);
1587
+ metrics.recordUpstream("/chat/completions", upstream.ok);
1102
1588
  if (!upstream.ok) {
1103
1589
  return proxyError(upstream, logger);
1104
1590
  }
1105
1591
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1106
- return proxyResponse(upstream);
1592
+ const model = normalizeRequestedModel(chatRequest.model);
1593
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1107
1594
  }
1108
- async function handleCompletions(client, request, logger) {
1595
+ async function handleCompletions(client, metrics, recordTokens, request, logger) {
1109
1596
  const body = await readJson(request);
1110
1597
  const upstream = await client.chatCompletions(
1111
1598
  completionsRequestToChatCompletion(body),
1112
1599
  request.signal
1113
1600
  );
1601
+ metrics.recordUpstream("/chat/completions", upstream.ok);
1114
1602
  if (!upstream.ok) {
1115
1603
  return proxyError(upstream, logger);
1116
1604
  }
1117
1605
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1606
+ const model = normalizeRequestedModel(body.model);
1118
1607
  if (isStreamingResponse(upstream)) {
1119
- return proxyResponse(upstream);
1608
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1609
+ }
1610
+ const completion = asRecord(await upstream.json());
1611
+ const usage = extractTokenUsage(completion.usage);
1612
+ if (usage) {
1613
+ const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
1614
+ recordTokens(responseModel || model, usage);
1120
1615
  }
1121
- return jsonResponse(chatCompletionToCompletion(await upstream.json()));
1616
+ return jsonResponse(chatCompletionToCompletion(completion));
1122
1617
  }
1123
- async function handleResponses(client, request, logger) {
1618
+ async function handleResponses(client, metrics, recordTokens, request, logger) {
1124
1619
  const body = await readJsonText(request);
1125
1620
  const upstream = await client.responses(body, request.signal);
1621
+ metrics.recordUpstream("/responses", upstream.ok);
1126
1622
  if (!upstream.ok) {
1127
1623
  return proxyError(upstream, logger);
1128
1624
  }
1129
1625
  logUpstreamSuccess(logger, "/responses", upstream.status);
1130
- return proxyResponse(upstream);
1626
+ const model = normalizeRequestedModel(asRecord(safeParseJson(body)).model);
1627
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
1131
1628
  }
1132
1629
  async function proxyError(upstream, logger) {
1133
1630
  const text = await upstream.text();
@@ -1246,7 +1743,21 @@ function serverLogger(options) {
1246
1743
  }
1247
1744
  function finishResponse(response, options) {
1248
1745
  const withRequestId = responseWithRequestId(response, options.requestId);
1249
- logRequestCompleted(options.logger, withRequestId, options.startedAt);
1746
+ const stream = isStreamingResponse(withRequestId);
1747
+ const status = withRequestId.status;
1748
+ const complete = () => {
1749
+ const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
1750
+ options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
1751
+ logRequestCompleted(options.logger, status, stream, durationMs);
1752
+ };
1753
+ if (stream && withRequestId.body) {
1754
+ return new Response(trackStreamCompletion(withRequestId.body, complete), {
1755
+ headers: withRequestId.headers,
1756
+ status,
1757
+ statusText: withRequestId.statusText
1758
+ });
1759
+ }
1760
+ complete();
1250
1761
  return withRequestId;
1251
1762
  }
1252
1763
  function responseWithRequestId(response, requestId) {
@@ -1258,18 +1769,48 @@ function responseWithRequestId(response, requestId) {
1258
1769
  statusText: response.statusText
1259
1770
  });
1260
1771
  }
1261
- function logRequestCompleted(logger, response, startedAt) {
1772
+ function trackStreamCompletion(body, onComplete) {
1773
+ const reader = body.getReader();
1774
+ let fired = false;
1775
+ const fire = () => {
1776
+ if (!fired) {
1777
+ fired = true;
1778
+ onComplete();
1779
+ }
1780
+ };
1781
+ return new ReadableStream({
1782
+ async pull(controller) {
1783
+ try {
1784
+ const { done, value } = await reader.read();
1785
+ if (done) {
1786
+ controller.close();
1787
+ fire();
1788
+ return;
1789
+ }
1790
+ controller.enqueue(value);
1791
+ } catch (error) {
1792
+ fire();
1793
+ controller.error(error);
1794
+ }
1795
+ },
1796
+ cancel(reason) {
1797
+ fire();
1798
+ return reader.cancel(reason);
1799
+ }
1800
+ });
1801
+ }
1802
+ function logRequestCompleted(logger, status, stream, durationMs) {
1262
1803
  const fields = {
1263
- durationMs: Math.round((performance.now() - startedAt) * 100) / 100,
1804
+ durationMs,
1264
1805
  event: "http.request.completed",
1265
- status: response.status,
1266
- stream: isStreamingResponse(response)
1806
+ status,
1807
+ stream
1267
1808
  };
1268
- if (response.status >= 500) {
1809
+ if (status >= 500) {
1269
1810
  logger.error(fields, "request completed with server error");
1270
1811
  return;
1271
1812
  }
1272
- if (response.status >= 400) {
1813
+ if (status >= 400) {
1273
1814
  logger.warn(fields, "request completed with client error");
1274
1815
  return;
1275
1816
  }
@@ -1290,6 +1831,8 @@ function canonicalApiPath(path) {
1290
1831
  return "/v1/completions";
1291
1832
  case "/responses":
1292
1833
  return "/v1/responses";
1834
+ case "/usage":
1835
+ return "/v1/usage";
1293
1836
  default:
1294
1837
  return withoutTrailingSlash;
1295
1838
  }
@@ -1301,6 +1844,12 @@ function routeFor(method, path) {
1301
1844
  if (method === "GET" && (path === "/" || path === "/healthz")) {
1302
1845
  return "health";
1303
1846
  }
1847
+ if (method === "GET" && path === "/metrics") {
1848
+ return "metrics";
1849
+ }
1850
+ if (method === "GET" && path === "/v1/usage") {
1851
+ return "usage";
1852
+ }
1304
1853
  if (method === "GET" && path === "/v1/models") {
1305
1854
  return "models";
1306
1855
  }
@@ -1331,25 +1880,85 @@ function logUpstreamSuccess(logger, upstreamPath, status) {
1331
1880
  "copilot upstream request completed"
1332
1881
  );
1333
1882
  }
1883
+ function metricsResponse(metrics) {
1884
+ return new Response(metrics.renderPrometheus(), {
1885
+ headers: {
1886
+ ...corsHeaders(),
1887
+ "content-type": PROMETHEUS_CONTENT_TYPE
1888
+ },
1889
+ status: 200
1890
+ });
1891
+ }
1892
+ async function handleUsage(metrics, readUsage, signal) {
1893
+ const proxy = metrics.snapshot();
1894
+ const { copilot, error } = await readUsage(signal);
1895
+ const body = { copilot: copilot ?? null, object: "usage", proxy };
1896
+ if (error) {
1897
+ body.copilot_error = error;
1898
+ }
1899
+ return jsonResponse(body);
1900
+ }
1901
+ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
1902
+ const usagePath = "/copilot_internal/user";
1903
+ let cache;
1904
+ return async (signal) => {
1905
+ if (cache && now() - cache.atMs < ttlMs) {
1906
+ return { copilot: cache.value };
1907
+ }
1908
+ try {
1909
+ const upstream = await client.usage(signal);
1910
+ metrics.recordUpstream(usagePath, upstream.ok);
1911
+ if (!upstream.ok) {
1912
+ return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
1913
+ }
1914
+ const value = normalizeCopilotUsage(await upstream.json().catch(() => ({})));
1915
+ cache = { atMs: now(), value };
1916
+ metrics.recordCopilotQuota(value);
1917
+ return { copilot: value };
1918
+ } catch (error) {
1919
+ metrics.recordUpstream(usagePath, false);
1920
+ if (error instanceof CopilotAuthError) {
1921
+ return { error: error.message };
1922
+ }
1923
+ return { error: errorMessage(error) };
1924
+ }
1925
+ };
1926
+ }
1927
+ function safeParseJson(text) {
1928
+ try {
1929
+ return JSON.parse(text);
1930
+ } catch {
1931
+ return void 0;
1932
+ }
1933
+ }
1334
1934
  export {
1935
+ COPILOT_USAGE_API_VERSION,
1335
1936
  CopilotAuth,
1336
1937
  CopilotAuthError,
1337
1938
  CopilotClient,
1939
+ DEFAULT_GITHUB_API_BASE_URL,
1338
1940
  DEFAULT_LOG_FORMAT,
1339
1941
  DEFAULT_LOG_LEVEL,
1340
1942
  DEFAULT_MODEL,
1943
+ MetricsRegistry,
1944
+ PROMETHEUS_CONTENT_TYPE,
1945
+ applyCopilotHeaders,
1946
+ applyGithubApiHeaders,
1341
1947
  authStorePath,
1342
1948
  chatCompletionToCompletion,
1343
1949
  chatCompletionToResponse,
1344
1950
  completionsRequestToChatCompletion,
1345
1951
  createHoopilotHandler,
1346
1952
  createHoopilotLogger,
1953
+ extractTokenUsage,
1347
1954
  fallbackModels,
1348
1955
  githubCopilotDeviceLogin,
1349
1956
  noopLogger,
1350
1957
  normalizeChatCompletionRequest,
1958
+ normalizeCopilotUsage,
1351
1959
  normalizeModelsResponse,
1352
1960
  normalizeRequestedModel,
1961
+ observeResponseUsage,
1353
1962
  parseLogFormat,
1354
1963
  parseLogLevel,
1355
1964
  readStoredCopilotAuth,