@openhoo/hoopilot 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -110,6 +110,8 @@ var CopilotAuth = class {
110
110
  };
111
111
 
112
112
  // src/copilot.ts
113
+ var DEFAULT_GITHUB_API_BASE_URL = "https://api.github.com";
114
+ var COPILOT_USAGE_API_VERSION = "2025-04-01";
113
115
  function applyCopilotHeaders(headers, token) {
114
116
  headers.set("accept", headers.get("accept") ?? "application/json");
115
117
  headers.set("authorization", `Bearer ${token}`);
@@ -121,12 +123,44 @@ function applyCopilotHeaders(headers, token) {
121
123
  headers.set("x-github-api-version", "2026-06-01");
122
124
  return headers;
123
125
  }
126
+ function applyGithubApiHeaders(headers, token) {
127
+ headers.set("accept", headers.get("accept") ?? "application/json");
128
+ headers.set("authorization", `token ${token}`);
129
+ headers.set("editor-plugin-version", "hoopilot/0.1.0");
130
+ headers.set("editor-version", "Hoopilot/0.1.0");
131
+ headers.set("user-agent", "hoopilot/0.1.0");
132
+ headers.set("x-github-api-version", COPILOT_USAGE_API_VERSION);
133
+ return headers;
134
+ }
124
135
  var CopilotClient = class {
125
136
  #auth;
126
137
  #fetch;
138
+ #githubApiBaseUrl;
127
139
  constructor(options = {}) {
128
140
  this.#auth = new CopilotAuth(options);
129
141
  this.#fetch = options.fetch ?? fetch;
142
+ this.#githubApiBaseUrl = trimTrailingSlash(
143
+ options.githubApiBaseUrl ?? options.env?.HOOPILOT_GITHUB_API_BASE_URL ?? DEFAULT_GITHUB_API_BASE_URL
144
+ );
145
+ }
146
+ /**
147
+ * Fetch the Copilot account's quota / premium-request usage from the GitHub
148
+ * REST `copilot_internal/user` endpoint. The stored device-flow OAuth token is
149
+ * accepted directly here — no Copilot token exchange is required to read quota.
150
+ */
151
+ async usage(signal) {
152
+ if (!isHttpsOrLoopback(this.#githubApiBaseUrl)) {
153
+ throw new Error(
154
+ `Refusing to send the GitHub OAuth token to a non-HTTPS host: ${this.#githubApiBaseUrl}`
155
+ );
156
+ }
157
+ const access = await this.#auth.getAccess();
158
+ const headers = applyGithubApiHeaders(new Headers(), access.token);
159
+ return this.#fetch(`${this.#githubApiBaseUrl}/copilot_internal/user`, {
160
+ headers,
161
+ method: "GET",
162
+ signal
163
+ });
130
164
  }
131
165
  async chatCompletions(body, signal) {
132
166
  return this.fetchCopilot("/chat/completions", {
@@ -166,6 +200,81 @@ var CopilotClient = class {
166
200
  });
167
201
  }
168
202
  };
203
+ function normalizeCopilotUsage(body) {
204
+ const record = asRecord(body);
205
+ const quotas = {};
206
+ const snapshots = asRecord(record.quota_snapshots);
207
+ for (const [category, detail] of Object.entries(snapshots)) {
208
+ quotas[category] = normalizeQuotaDetail(asRecord(detail));
209
+ }
210
+ if (Object.keys(quotas).length === 0) {
211
+ const remaining = asRecord(record.limited_user_quotas);
212
+ const monthly = asRecord(record.monthly_quotas);
213
+ for (const category of /* @__PURE__ */ new Set([...Object.keys(remaining), ...Object.keys(monthly)])) {
214
+ const entitlement = numberOrUndefined(monthly[category]);
215
+ const left = numberOrUndefined(remaining[category]);
216
+ quotas[category] = removeUndefinedQuota({
217
+ entitlement,
218
+ percentRemaining: entitlement !== void 0 && entitlement > 0 && left !== void 0 ? left / entitlement * 100 : void 0,
219
+ remaining: left,
220
+ used: usedFrom(entitlement, left)
221
+ });
222
+ }
223
+ }
224
+ return removeUndefinedUsage({
225
+ accessTypeSku: stringOrUndefined(record.access_type_sku),
226
+ chatEnabled: typeof record.chat_enabled === "boolean" ? record.chat_enabled : void 0,
227
+ plan: stringOrUndefined(record.copilot_plan),
228
+ quotaResetDate: stringOrUndefined(record.quota_reset_date) ?? stringOrUndefined(record.quota_reset_date_utc) ?? stringOrUndefined(record.limited_user_reset_date),
229
+ quotas
230
+ });
231
+ }
232
+ function normalizeQuotaDetail(detail) {
233
+ const entitlement = numberOrUndefined(detail.entitlement);
234
+ const remaining = numberOrUndefined(detail.remaining) ?? numberOrUndefined(detail.quota_remaining);
235
+ return removeUndefinedQuota({
236
+ entitlement,
237
+ overageCount: numberOrUndefined(detail.overage_count),
238
+ overagePermitted: typeof detail.overage_permitted === "boolean" ? detail.overage_permitted : void 0,
239
+ percentRemaining: numberOrUndefined(detail.percent_remaining),
240
+ remaining,
241
+ unlimited: typeof detail.unlimited === "boolean" ? detail.unlimited : void 0,
242
+ used: usedFrom(entitlement, remaining)
243
+ });
244
+ }
245
+ function usedFrom(entitlement, remaining) {
246
+ if (entitlement === void 0 || remaining === void 0) {
247
+ return void 0;
248
+ }
249
+ return Math.max(0, entitlement - remaining);
250
+ }
251
+ function isHttpsOrLoopback(rawUrl) {
252
+ let url;
253
+ try {
254
+ url = new URL(rawUrl);
255
+ } catch {
256
+ return false;
257
+ }
258
+ if (url.protocol === "https:") {
259
+ return true;
260
+ }
261
+ return url.protocol === "http:" && (url.hostname === "127.0.0.1" || url.hostname === "localhost" || url.hostname === "::1");
262
+ }
263
+ function numberOrUndefined(value) {
264
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
265
+ }
266
+ function stringOrUndefined(value) {
267
+ return typeof value === "string" && value.length > 0 ? value : void 0;
268
+ }
269
+ function removeUndefinedQuota(quota) {
270
+ return Object.fromEntries(
271
+ Object.entries(quota).filter(([, value]) => value !== void 0)
272
+ );
273
+ }
274
+ function removeUndefinedUsage(usage) {
275
+ const entries = Object.entries(usage).filter(([, value]) => value !== void 0);
276
+ return Object.fromEntries(entries);
277
+ }
169
278
 
170
279
  // src/github-device.ts
171
280
  import { setTimeout as sleep } from "timers/promises";
@@ -503,6 +612,40 @@ function contentToText(content) {
503
612
  }
504
613
  return "";
505
614
  }
615
+ function extractTokenUsage(usage) {
616
+ const record = asRecord(usage);
617
+ const prompt = firstNumber(record.prompt_tokens, record.input_tokens);
618
+ const completion = firstNumber(record.completion_tokens, record.output_tokens);
619
+ const total = firstNumber(record.total_tokens);
620
+ if (prompt === void 0 && completion === void 0 && total === void 0) {
621
+ return void 0;
622
+ }
623
+ const promptTokens = prompt ?? 0;
624
+ const completionTokens = completion ?? 0;
625
+ const reasoning = firstNumber(
626
+ asRecord(record.completion_tokens_details).reasoning_tokens,
627
+ asRecord(record.output_tokens_details).reasoning_tokens
628
+ );
629
+ const cached = firstNumber(
630
+ asRecord(record.prompt_tokens_details).cached_tokens,
631
+ asRecord(record.input_tokens_details).cached_tokens
632
+ );
633
+ return removeUndefined({
634
+ cachedTokens: cached,
635
+ completionTokens,
636
+ promptTokens,
637
+ reasoningTokens: reasoning,
638
+ totalTokens: total ?? promptTokens + completionTokens
639
+ });
640
+ }
641
+ function firstNumber(...values) {
642
+ for (const value of values) {
643
+ if (typeof value === "number" && Number.isFinite(value)) {
644
+ return value;
645
+ }
646
+ }
647
+ return void 0;
648
+ }
506
649
  function firstChoice(completion) {
507
650
  const choices = Array.isArray(completion.choices) ? completion.choices : [];
508
651
  return asRecord(choices[0]);
@@ -517,104 +660,449 @@ function epochSeconds() {
517
660
  return Math.floor(Date.now() / 1e3);
518
661
  }
519
662
 
663
+ // src/metrics.ts
664
+ var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
665
+ var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
666
+ var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
667
+ var MAX_TRACKED_MODELS = 200;
668
+ var MAX_MODEL_LABEL_LENGTH = 200;
669
+ var LABEL_SEPARATOR = "";
670
+ var UNKNOWN_MODEL = "unknown";
671
+ function emptyModelTotals() {
672
+ return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
673
+ }
674
+ var MetricsRegistry = class {
675
+ #startedAtMs;
676
+ #inFlight = 0;
677
+ #requests = /* @__PURE__ */ new Map();
678
+ #durations = /* @__PURE__ */ new Map();
679
+ #tokens = /* @__PURE__ */ new Map();
680
+ #upstream = /* @__PURE__ */ new Map();
681
+ #copilotQuota;
682
+ constructor(options = {}) {
683
+ this.#startedAtMs = (options.now ?? Date.now)();
684
+ }
685
+ /** Mark a request as started; pair with exactly one {@link observe}. */
686
+ startRequest() {
687
+ this.#inFlight += 1;
688
+ }
689
+ /** Record a completed request and clear its in-flight slot. */
690
+ observe(observation) {
691
+ if (this.#inFlight > 0) {
692
+ this.#inFlight -= 1;
693
+ }
694
+ const key = labelKey(observation.route, observation.method, String(observation.status));
695
+ this.#requests.set(key, (this.#requests.get(key) ?? 0) + 1);
696
+ this.#observeDuration(observation.route, observation.durationMs / 1e3);
697
+ }
698
+ /** Accumulate token counts for a model from one upstream completion. */
699
+ recordTokens(model, usage) {
700
+ const name = this.#modelLabel(model);
701
+ const totals = this.#tokens.get(name) ?? emptyModelTotals();
702
+ totals.requests += 1;
703
+ totals.prompt += nonNegative(usage.promptTokens);
704
+ totals.completion += nonNegative(usage.completionTokens);
705
+ totals.total += nonNegative(usage.totalTokens);
706
+ totals.reasoning += nonNegative(usage.reasoningTokens ?? 0);
707
+ totals.cached += nonNegative(usage.cachedTokens ?? 0);
708
+ this.#tokens.set(name, totals);
709
+ }
710
+ /** Record one upstream Copilot call and whether it succeeded. */
711
+ recordUpstream(path, ok) {
712
+ const key = labelKey(path, ok ? "ok" : "error");
713
+ this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
714
+ }
715
+ /** Store the latest Copilot quota so /metrics can expose it as gauges. */
716
+ recordCopilotQuota(usage) {
717
+ this.#copilotQuota = usage;
718
+ }
719
+ // Sanitize the model into a bounded, control-char-free label. The model can
720
+ // originate from a client request, so cap its length, strip characters that
721
+ // would corrupt the exposition format, and fold overflow past the cardinality
722
+ // limit into UNKNOWN_MODEL to keep the series count bounded.
723
+ #modelLabel(model) {
724
+ const cleaned = model.replace(/[\u0000-\u001f\u007f]/g, "").trim().slice(0, MAX_MODEL_LABEL_LENGTH) || UNKNOWN_MODEL;
725
+ if (!this.#tokens.has(cleaned) && this.#tokens.size >= MAX_TRACKED_MODELS) {
726
+ return UNKNOWN_MODEL;
727
+ }
728
+ return cleaned;
729
+ }
730
+ #observeDuration(route, seconds) {
731
+ const value = Number.isFinite(seconds) && seconds >= 0 ? seconds : 0;
732
+ const entry = this.#durations.get(route) ?? {
733
+ buckets: new Array(DURATION_BUCKETS_SECONDS.length).fill(0),
734
+ count: 0,
735
+ sum: 0
736
+ };
737
+ entry.count += 1;
738
+ entry.sum += value;
739
+ const index = DURATION_BUCKETS_SECONDS.findIndex((bound) => value <= bound);
740
+ if (index !== -1) {
741
+ entry.buckets[index] = (entry.buckets[index] ?? 0) + 1;
742
+ }
743
+ this.#durations.set(route, entry);
744
+ }
745
+ /** A JSON-friendly view of the current counters. */
746
+ snapshot(now = Date.now) {
747
+ const byRoute = {};
748
+ const byStatus = {};
749
+ let requestsTotal = 0;
750
+ for (const [key, count] of this.#requests) {
751
+ const [route = "", , status = ""] = key.split(LABEL_SEPARATOR);
752
+ byRoute[route] = (byRoute[route] ?? 0) + count;
753
+ byStatus[status] = (byStatus[status] ?? 0) + count;
754
+ requestsTotal += count;
755
+ }
756
+ const byModel = {};
757
+ const tokenTotals = { cached: 0, completion: 0, prompt: 0, reasoning: 0, total: 0 };
758
+ for (const [model, totals] of this.#tokens) {
759
+ byModel[model] = { ...totals };
760
+ tokenTotals.prompt += totals.prompt;
761
+ tokenTotals.completion += totals.completion;
762
+ tokenTotals.total += totals.total;
763
+ tokenTotals.reasoning += totals.reasoning;
764
+ tokenTotals.cached += totals.cached;
765
+ }
766
+ let upstreamTotal = 0;
767
+ let upstreamErrors = 0;
768
+ for (const [key, count] of this.#upstream) {
769
+ upstreamTotal += count;
770
+ if (key.endsWith(`${LABEL_SEPARATOR}error`)) {
771
+ upstreamErrors += count;
772
+ }
773
+ }
774
+ return {
775
+ inFlight: this.#inFlight,
776
+ requests: { byRoute, byStatus, total: requestsTotal },
777
+ startedAt: new Date(this.#startedAtMs).toISOString(),
778
+ tokens: { byModel, ...tokenTotals },
779
+ upstream: { errors: upstreamErrors, total: upstreamTotal },
780
+ uptimeSeconds: Math.max(0, Math.round((now() - this.#startedAtMs) / 1e3))
781
+ };
782
+ }
783
+ /** Render the Prometheus text exposition format (version 0.0.4). */
784
+ renderPrometheus(now = Date.now) {
785
+ const lines = [];
786
+ lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
787
+ lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
788
+ lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
789
+ lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
790
+ lines.push("# TYPE hoopilot_uptime_seconds gauge");
791
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
792
+ lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
793
+ lines.push("# TYPE hoopilot_requests_in_flight gauge");
794
+ lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
795
+ lines.push("# HELP hoopilot_requests_total Completed requests by route, method, and status.");
796
+ lines.push("# TYPE hoopilot_requests_total counter");
797
+ for (const [key, count] of this.#requests) {
798
+ const [route = "", method = "", status = ""] = key.split(LABEL_SEPARATOR);
799
+ lines.push(`hoopilot_requests_total${labels({ method, route, status })} ${count}`);
800
+ }
801
+ lines.push(
802
+ "# HELP hoopilot_upstream_requests_total Copilot upstream calls by path and outcome."
803
+ );
804
+ lines.push("# TYPE hoopilot_upstream_requests_total counter");
805
+ for (const [key, count] of this.#upstream) {
806
+ const [path = "", outcome = ""] = key.split(LABEL_SEPARATOR);
807
+ lines.push(`hoopilot_upstream_requests_total${labels({ outcome, path })} ${count}`);
808
+ }
809
+ lines.push(
810
+ "# HELP hoopilot_tokens_total Tokens reported by upstream usage, by model and type."
811
+ );
812
+ lines.push("# TYPE hoopilot_tokens_total counter");
813
+ for (const [model, totals] of this.#tokens) {
814
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "prompt" })} ${totals.prompt}`);
815
+ lines.push(
816
+ `hoopilot_tokens_total${labels({ model, type: "completion" })} ${totals.completion}`
817
+ );
818
+ lines.push(
819
+ `hoopilot_tokens_total${labels({ model, type: "reasoning" })} ${totals.reasoning}`
820
+ );
821
+ lines.push(`hoopilot_tokens_total${labels({ model, type: "cached" })} ${totals.cached}`);
822
+ }
823
+ lines.push("# HELP hoopilot_model_requests_total Completions with usage observed, by model.");
824
+ lines.push("# TYPE hoopilot_model_requests_total counter");
825
+ for (const [model, totals] of this.#tokens) {
826
+ lines.push(`hoopilot_model_requests_total${labels({ model })} ${totals.requests}`);
827
+ }
828
+ lines.push("# HELP hoopilot_request_duration_seconds Request duration by route.");
829
+ lines.push("# TYPE hoopilot_request_duration_seconds histogram");
830
+ for (const [route, entry] of this.#durations) {
831
+ let cumulative = 0;
832
+ for (let i = 0; i < DURATION_BUCKETS_SECONDS.length; i += 1) {
833
+ cumulative += entry.buckets[i] ?? 0;
834
+ const le = formatNumber(DURATION_BUCKETS_SECONDS[i] ?? 0);
835
+ lines.push(
836
+ `hoopilot_request_duration_seconds_bucket${labels({ le, route })} ${cumulative}`
837
+ );
838
+ }
839
+ lines.push(
840
+ `hoopilot_request_duration_seconds_bucket${labels({ le: "+Inf", route })} ${entry.count}`
841
+ );
842
+ lines.push(`hoopilot_request_duration_seconds_sum${labels({ route })} ${entry.sum}`);
843
+ lines.push(`hoopilot_request_duration_seconds_count${labels({ route })} ${entry.count}`);
844
+ }
845
+ this.#renderCopilotQuota(lines);
846
+ return `${lines.join("\n")}
847
+ `;
848
+ }
849
+ #renderCopilotQuota(lines) {
850
+ const usage = this.#copilotQuota;
851
+ if (!usage) {
852
+ return;
853
+ }
854
+ const categories = Object.entries(usage.quotas);
855
+ const gauge = (suffix, help, pick) => {
856
+ const present = categories.filter(([, quota]) => pick(quota) !== void 0);
857
+ if (present.length === 0) {
858
+ return;
859
+ }
860
+ lines.push(`# HELP hoopilot_copilot_quota_${suffix} ${help}`);
861
+ lines.push(`# TYPE hoopilot_copilot_quota_${suffix} gauge`);
862
+ for (const [category, quota] of present) {
863
+ lines.push(`hoopilot_copilot_quota_${suffix}${labels({ category })} ${pick(quota)}`);
864
+ }
865
+ };
866
+ gauge("remaining", "Remaining quota for the Copilot category.", (q) => q.remaining);
867
+ gauge("entitlement", "Quota entitlement for the Copilot category.", (q) => q.entitlement);
868
+ gauge("used", "Used quota (entitlement minus remaining) for the category.", (q) => q.used);
869
+ gauge(
870
+ "percent_remaining",
871
+ "Percent of quota remaining for the Copilot category.",
872
+ (q) => q.percentRemaining
873
+ );
874
+ const resetMs = usage.quotaResetDate ? Date.parse(usage.quotaResetDate) : Number.NaN;
875
+ if (Number.isFinite(resetMs)) {
876
+ lines.push(
877
+ "# HELP hoopilot_copilot_quota_reset_timestamp_seconds Unix epoch of the next reset."
878
+ );
879
+ lines.push("# TYPE hoopilot_copilot_quota_reset_timestamp_seconds gauge");
880
+ lines.push(`hoopilot_copilot_quota_reset_timestamp_seconds ${resetMs / 1e3}`);
881
+ }
882
+ if (usage.plan || usage.accessTypeSku) {
883
+ lines.push("# HELP hoopilot_copilot_info Copilot plan metadata as a constant-1 info gauge.");
884
+ lines.push("# TYPE hoopilot_copilot_info gauge");
885
+ lines.push(
886
+ `hoopilot_copilot_info${labels({
887
+ access_type_sku: usage.accessTypeSku ?? "",
888
+ plan: usage.plan ?? ""
889
+ })} 1`
890
+ );
891
+ }
892
+ }
893
+ };
894
+ function observeResponseUsage(response, fallbackModel, onUsage, signal) {
895
+ const body = response.body;
896
+ if (!body) {
897
+ return response;
898
+ }
899
+ const [clientBranch, observerBranch] = body.tee();
900
+ const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
901
+ void consumeUsage(observerBranch, isSse, fallbackModel, onUsage, signal).catch(() => {
902
+ });
903
+ return new Response(clientBranch, {
904
+ headers: response.headers,
905
+ status: response.status,
906
+ statusText: response.statusText
907
+ });
908
+ }
909
+ async function consumeUsage(stream, isSse, fallbackModel, onUsage, signal) {
910
+ const reader = stream.getReader();
911
+ const onAbort = () => {
912
+ reader.cancel().catch(() => {
913
+ });
914
+ };
915
+ if (signal?.aborted) {
916
+ reader.cancel().catch(() => {
917
+ });
918
+ } else {
919
+ signal?.addEventListener("abort", onAbort, { once: true });
920
+ }
921
+ const decoder = new TextDecoder();
922
+ let model = fallbackModel;
923
+ let usage;
924
+ let buffer = "";
925
+ let bufferedBytes = 0;
926
+ let overflowed = false;
927
+ const consider = (payload) => {
928
+ const record = asRecord(payload);
929
+ const found = extractTokenUsage(record.usage) ?? extractTokenUsage(asRecord(record.response).usage);
930
+ if (found) {
931
+ usage = found;
932
+ }
933
+ const candidateModel = modelText(record.model) || modelText(asRecord(record.response).model);
934
+ if (candidateModel) {
935
+ model = candidateModel;
936
+ }
937
+ };
938
+ try {
939
+ while (true) {
940
+ const result = await reader.read();
941
+ if (result.done) {
942
+ break;
943
+ }
944
+ const chunk = decoder.decode(result.value, { stream: true });
945
+ if (isSse) {
946
+ buffer += chunk;
947
+ const lines = buffer.split(/\r?\n/);
948
+ buffer = lines.pop() ?? "";
949
+ for (const line of lines) {
950
+ considerSseLine(line, consider);
951
+ }
952
+ if (buffer.length > USAGE_BUFFER_LIMIT_BYTES) {
953
+ buffer = "";
954
+ }
955
+ } else if (!overflowed) {
956
+ bufferedBytes += result.value.byteLength;
957
+ if (bufferedBytes > USAGE_BUFFER_LIMIT_BYTES) {
958
+ overflowed = true;
959
+ buffer = "";
960
+ } else {
961
+ buffer += chunk;
962
+ }
963
+ }
964
+ }
965
+ const finalBuffer = buffer + decoder.decode();
966
+ if (isSse) {
967
+ if (finalBuffer) {
968
+ considerSseLine(finalBuffer, consider);
969
+ }
970
+ } else if (!overflowed && finalBuffer) {
971
+ const parsed = safeParse(finalBuffer);
972
+ if (parsed !== void 0) {
973
+ consider(parsed);
974
+ }
975
+ }
976
+ } finally {
977
+ signal?.removeEventListener("abort", onAbort);
978
+ reader.releaseLock();
979
+ }
980
+ if (usage) {
981
+ onUsage(model, usage);
982
+ }
983
+ }
984
+ function considerSseLine(line, consider) {
985
+ const trimmed = line.trim();
986
+ if (!trimmed.startsWith("data:")) {
987
+ return;
988
+ }
989
+ const data = trimmed.slice("data:".length).trim();
990
+ if (!data || data === "[DONE]") {
991
+ return;
992
+ }
993
+ const parsed = safeParse(data);
994
+ if (parsed !== void 0) {
995
+ consider(parsed);
996
+ }
997
+ }
998
+ function safeParse(text) {
999
+ try {
1000
+ return JSON.parse(text);
1001
+ } catch {
1002
+ return void 0;
1003
+ }
1004
+ }
1005
+ function modelText(value) {
1006
+ return typeof value === "string" ? value.trim() : "";
1007
+ }
1008
+ function nonNegative(value) {
1009
+ return Number.isFinite(value) && value > 0 ? value : 0;
1010
+ }
1011
+ function labelKey(...parts) {
1012
+ return parts.join(LABEL_SEPARATOR);
1013
+ }
1014
+ function labels(pairs) {
1015
+ const entries = Object.entries(pairs);
1016
+ if (entries.length === 0) {
1017
+ return "";
1018
+ }
1019
+ const rendered = entries.map(([name, value]) => `${name}="${escapeLabelValue(value)}"`);
1020
+ return `{${rendered.join(",")}}`;
1021
+ }
1022
+ function escapeLabelValue(value) {
1023
+ return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r");
1024
+ }
1025
+ function formatNumber(value) {
1026
+ return Number.isInteger(value) ? value.toString() : String(value);
1027
+ }
1028
+
520
1029
  // src/server.ts
521
1030
  var DEFAULT_HOST = "127.0.0.1";
522
1031
  var DEFAULT_PORT = 4141;
523
1032
  var INVALID_JSON_MESSAGE = "Request body must be valid JSON.";
1033
+ var USAGE_CACHE_TTL_MS = 6e4;
524
1034
  function createHoopilotHandler(options = {}) {
525
1035
  const client = new CopilotClient(options);
526
1036
  const apiKey = options.apiKey ?? options.env?.HOOPILOT_API_KEY;
527
1037
  const logger = serverLogger(options);
1038
+ const metrics = options.metrics ?? new MetricsRegistry();
1039
+ const readUsage = createUsageReader(client, metrics);
1040
+ const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
528
1041
  return async (request) => {
529
1042
  const startedAt = performance.now();
530
1043
  const url = new URL(request.url);
531
1044
  const apiPath = canonicalApiPath(url.pathname);
532
1045
  const requestId = requestIdFor(request);
1046
+ const route = routeFor(request.method, apiPath);
533
1047
  const requestLogger = logger.child({
534
1048
  method: request.method,
535
1049
  path: url.pathname,
536
1050
  requestId,
537
- route: routeFor(request.method, apiPath)
1051
+ route
1052
+ });
1053
+ metrics.startRequest();
1054
+ const finish = (response) => finishResponse(response, {
1055
+ logger: requestLogger,
1056
+ method: request.method,
1057
+ metrics,
1058
+ requestId,
1059
+ route,
1060
+ startedAt
538
1061
  });
539
1062
  if (request.method === "OPTIONS") {
540
- return finishResponse(new Response(null, { headers: corsHeaders() }), {
541
- logger: requestLogger,
542
- requestId,
543
- startedAt
544
- });
1063
+ return finish(new Response(null, { headers: corsHeaders() }));
545
1064
  }
546
1065
  if (!isAuthorized(request, apiKey)) {
547
1066
  requestLogger.warn({ event: "http.request.unauthorized" }, "invalid hoopilot api key");
548
- return finishResponse(
549
- jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."),
550
- {
551
- logger: requestLogger,
552
- requestId,
553
- startedAt
554
- }
555
- );
1067
+ return finish(jsonError(401, "invalid_api_key", "Invalid or missing Hoopilot API key."));
556
1068
  }
557
1069
  try {
558
1070
  if (request.method === "GET" && (apiPath === "/" || apiPath === "/healthz")) {
559
- return finishResponse(
560
- jsonResponse({
561
- name: "hoopilot",
562
- object: "health",
563
- status: "ok"
564
- }),
565
- { logger: requestLogger, requestId, startedAt }
566
- );
1071
+ return finish(jsonResponse({ name: "hoopilot", object: "health", status: "ok" }));
1072
+ }
1073
+ if (request.method === "GET" && apiPath === "/metrics") {
1074
+ return finish(metricsResponse(metrics));
1075
+ }
1076
+ if (request.method === "GET" && apiPath === "/v1/usage") {
1077
+ return finish(await handleUsage(metrics, readUsage, request.signal));
567
1078
  }
568
1079
  if (request.method === "GET" && apiPath === "/v1/responses") {
569
- return finishResponse(websocketUnsupportedResponse(), {
570
- logger: requestLogger,
571
- requestId,
572
- startedAt
573
- });
1080
+ return finish(websocketUnsupportedResponse());
574
1081
  }
575
1082
  if (request.method === "GET" && apiPath === "/v1/models") {
576
- return finishResponse(await handleModels(client, request.signal, requestLogger), {
577
- logger: requestLogger,
578
- requestId,
579
- startedAt
580
- });
1083
+ return finish(await handleModels(client, metrics, request.signal, requestLogger));
581
1084
  }
582
1085
  if (request.method === "POST" && apiPath === "/v1/chat/completions") {
583
- return finishResponse(await handleChatCompletions(client, request, requestLogger), {
584
- logger: requestLogger,
585
- requestId,
586
- startedAt
587
- });
1086
+ return finish(
1087
+ await handleChatCompletions(client, metrics, recordTokens, request, requestLogger)
1088
+ );
588
1089
  }
589
1090
  if (request.method === "POST" && apiPath === "/v1/completions") {
590
- return finishResponse(await handleCompletions(client, request, requestLogger), {
591
- logger: requestLogger,
592
- requestId,
593
- startedAt
594
- });
1091
+ return finish(
1092
+ await handleCompletions(client, metrics, recordTokens, request, requestLogger)
1093
+ );
595
1094
  }
596
1095
  if (request.method === "POST" && apiPath === "/v1/responses") {
597
- return finishResponse(await handleResponses(client, request, requestLogger), {
598
- logger: requestLogger,
599
- requestId,
600
- startedAt
601
- });
1096
+ return finish(await handleResponses(client, metrics, recordTokens, request, requestLogger));
602
1097
  }
603
- return finishResponse(
604
- jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`),
605
- { logger: requestLogger, requestId, startedAt }
606
- );
1098
+ return finish(jsonError(404, "not_found", `No route for ${request.method} ${url.pathname}.`));
607
1099
  } catch (error) {
608
1100
  if (error instanceof CopilotAuthError) {
609
1101
  requestLogger.warn(
610
1102
  { err: errorDetails(error), event: "copilot.auth.missing" },
611
1103
  "copilot auth failed"
612
1104
  );
613
- return finishResponse(jsonError(401, "copilot_auth_error", error.message), {
614
- logger: requestLogger,
615
- requestId,
616
- startedAt
617
- });
1105
+ return finish(jsonError(401, "copilot_auth_error", error.message));
618
1106
  }
619
1107
  const message = errorMessage(error);
620
1108
  if (message === INVALID_JSON_MESSAGE) {
@@ -628,11 +1116,7 @@ function createHoopilotHandler(options = {}) {
628
1116
  "request failed"
629
1117
  );
630
1118
  }
631
- return finishResponse(jsonError(500, "internal_error", message), {
632
- logger: requestLogger,
633
- requestId,
634
- startedAt
635
- });
1119
+ return finish(jsonError(500, "internal_error", message));
636
1120
  }
637
1121
  };
638
1122
  }
@@ -661,8 +1145,9 @@ function startHoopilotServer(options = {}) {
661
1145
  url: `http://${host}:${server.port}`
662
1146
  };
663
1147
  }
664
- async function handleModels(client, signal, logger) {
1148
+ async function handleModels(client, metrics, signal, logger) {
665
1149
  const upstream = await client.models(signal);
1150
+ metrics.recordUpstream("/models", upstream.ok);
666
1151
  if (!upstream.ok) {
667
1152
  if (isUpstreamAuthStatus(upstream.status)) {
668
1153
  return proxyError(upstream, logger);
@@ -680,38 +1165,50 @@ async function handleModels(client, signal, logger) {
680
1165
  logUpstreamSuccess(logger, "/models", upstream.status);
681
1166
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
682
1167
  }
683
- async function handleChatCompletions(client, request, logger) {
1168
+ async function handleChatCompletions(client, metrics, recordTokens, request, logger) {
684
1169
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
685
1170
  const upstream = await client.chatCompletions(chatRequest, request.signal);
1171
+ metrics.recordUpstream("/chat/completions", upstream.ok);
686
1172
  if (!upstream.ok) {
687
1173
  return proxyError(upstream, logger);
688
1174
  }
689
1175
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
690
- return proxyResponse(upstream);
1176
+ const model = normalizeRequestedModel(chatRequest.model);
1177
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
691
1178
  }
692
- async function handleCompletions(client, request, logger) {
1179
+ async function handleCompletions(client, metrics, recordTokens, request, logger) {
693
1180
  const body = await readJson(request);
694
1181
  const upstream = await client.chatCompletions(
695
1182
  completionsRequestToChatCompletion(body),
696
1183
  request.signal
697
1184
  );
1185
+ metrics.recordUpstream("/chat/completions", upstream.ok);
698
1186
  if (!upstream.ok) {
699
1187
  return proxyError(upstream, logger);
700
1188
  }
701
1189
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
1190
+ const model = normalizeRequestedModel(body.model);
702
1191
  if (isStreamingResponse(upstream)) {
703
- return proxyResponse(upstream);
1192
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
704
1193
  }
705
- return jsonResponse(chatCompletionToCompletion(await upstream.json()));
1194
+ const completion = asRecord(await upstream.json());
1195
+ const usage = extractTokenUsage(completion.usage);
1196
+ if (usage) {
1197
+ const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
1198
+ recordTokens(responseModel || model, usage);
1199
+ }
1200
+ return jsonResponse(chatCompletionToCompletion(completion));
706
1201
  }
707
- async function handleResponses(client, request, logger) {
1202
+ async function handleResponses(client, metrics, recordTokens, request, logger) {
708
1203
  const body = await readJsonText(request);
709
1204
  const upstream = await client.responses(body, request.signal);
1205
+ metrics.recordUpstream("/responses", upstream.ok);
710
1206
  if (!upstream.ok) {
711
1207
  return proxyError(upstream, logger);
712
1208
  }
713
1209
  logUpstreamSuccess(logger, "/responses", upstream.status);
714
- return proxyResponse(upstream);
1210
+ const model = normalizeRequestedModel(asRecord(safeParseJson(body)).model);
1211
+ return proxyResponse(observeResponseUsage(upstream, model, recordTokens, request.signal));
715
1212
  }
716
1213
  async function proxyError(upstream, logger) {
717
1214
  const text = await upstream.text();
@@ -830,7 +1327,21 @@ function serverLogger(options) {
830
1327
  }
831
1328
  function finishResponse(response, options) {
832
1329
  const withRequestId = responseWithRequestId(response, options.requestId);
833
- logRequestCompleted(options.logger, withRequestId, options.startedAt);
1330
+ const stream = isStreamingResponse(withRequestId);
1331
+ const status = withRequestId.status;
1332
+ const complete = () => {
1333
+ const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
1334
+ options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
1335
+ logRequestCompleted(options.logger, status, stream, durationMs);
1336
+ };
1337
+ if (stream && withRequestId.body) {
1338
+ return new Response(trackStreamCompletion(withRequestId.body, complete), {
1339
+ headers: withRequestId.headers,
1340
+ status,
1341
+ statusText: withRequestId.statusText
1342
+ });
1343
+ }
1344
+ complete();
834
1345
  return withRequestId;
835
1346
  }
836
1347
  function responseWithRequestId(response, requestId) {
@@ -842,18 +1353,48 @@ function responseWithRequestId(response, requestId) {
842
1353
  statusText: response.statusText
843
1354
  });
844
1355
  }
845
- function logRequestCompleted(logger, response, startedAt) {
1356
+ function trackStreamCompletion(body, onComplete) {
1357
+ const reader = body.getReader();
1358
+ let fired = false;
1359
+ const fire = () => {
1360
+ if (!fired) {
1361
+ fired = true;
1362
+ onComplete();
1363
+ }
1364
+ };
1365
+ return new ReadableStream({
1366
+ async pull(controller) {
1367
+ try {
1368
+ const { done, value } = await reader.read();
1369
+ if (done) {
1370
+ controller.close();
1371
+ fire();
1372
+ return;
1373
+ }
1374
+ controller.enqueue(value);
1375
+ } catch (error) {
1376
+ fire();
1377
+ controller.error(error);
1378
+ }
1379
+ },
1380
+ cancel(reason) {
1381
+ fire();
1382
+ return reader.cancel(reason);
1383
+ }
1384
+ });
1385
+ }
1386
+ function logRequestCompleted(logger, status, stream, durationMs) {
846
1387
  const fields = {
847
- durationMs: Math.round((performance.now() - startedAt) * 100) / 100,
1388
+ durationMs,
848
1389
  event: "http.request.completed",
849
- status: response.status,
850
- stream: isStreamingResponse(response)
1390
+ status,
1391
+ stream
851
1392
  };
852
- if (response.status >= 500) {
1393
+ if (status >= 500) {
853
1394
  logger.error(fields, "request completed with server error");
854
1395
  return;
855
1396
  }
856
- if (response.status >= 400) {
1397
+ if (status >= 400) {
857
1398
  logger.warn(fields, "request completed with client error");
858
1399
  return;
859
1400
  }
@@ -874,6 +1415,8 @@ function canonicalApiPath(path) {
874
1415
  return "/v1/completions";
875
1416
  case "/responses":
876
1417
  return "/v1/responses";
1418
+ case "/usage":
1419
+ return "/v1/usage";
877
1420
  default:
878
1421
  return withoutTrailingSlash;
879
1422
  }
@@ -885,6 +1428,12 @@ function routeFor(method, path) {
885
1428
  if (method === "GET" && (path === "/" || path === "/healthz")) {
886
1429
  return "health";
887
1430
  }
1431
+ if (method === "GET" && path === "/metrics") {
1432
+ return "metrics";
1433
+ }
1434
+ if (method === "GET" && path === "/v1/usage") {
1435
+ return "usage";
1436
+ }
888
1437
  if (method === "GET" && path === "/v1/models") {
889
1438
  return "models";
890
1439
  }
@@ -915,6 +1464,57 @@ function logUpstreamSuccess(logger, upstreamPath, status) {
915
1464
  "copilot upstream request completed"
916
1465
  );
917
1466
  }
1467
+ function metricsResponse(metrics) {
1468
+ return new Response(metrics.renderPrometheus(), {
1469
+ headers: {
1470
+ ...corsHeaders(),
1471
+ "content-type": PROMETHEUS_CONTENT_TYPE
1472
+ },
1473
+ status: 200
1474
+ });
1475
+ }
1476
+ async function handleUsage(metrics, readUsage, signal) {
1477
+ const proxy = metrics.snapshot();
1478
+ const { copilot, error } = await readUsage(signal);
1479
+ const body = { copilot: copilot ?? null, object: "usage", proxy };
1480
+ if (error) {
1481
+ body.copilot_error = error;
1482
+ }
1483
+ return jsonResponse(body);
1484
+ }
1485
+ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
1486
+ const usagePath = "/copilot_internal/user";
1487
+ let cache;
1488
+ return async (signal) => {
1489
+ if (cache && now() - cache.atMs < ttlMs) {
1490
+ return { copilot: cache.value };
1491
+ }
1492
+ try {
1493
+ const upstream = await client.usage(signal);
1494
+ metrics.recordUpstream(usagePath, upstream.ok);
1495
+ if (!upstream.ok) {
1496
+ return { error: `GitHub Copilot usage request failed with ${upstream.status}.` };
1497
+ }
1498
+ const value = normalizeCopilotUsage(await upstream.json().catch(() => ({})));
1499
+ cache = { atMs: now(), value };
1500
+ metrics.recordCopilotQuota(value);
1501
+ return { copilot: value };
1502
+ } catch (error) {
1503
+ metrics.recordUpstream(usagePath, false);
1504
+ if (error instanceof CopilotAuthError) {
1505
+ return { error: error.message };
1506
+ }
1507
+ return { error: errorMessage(error) };
1508
+ }
1509
+ };
1510
+ }
1511
+ function safeParseJson(text) {
1512
+ try {
1513
+ return JSON.parse(text);
1514
+ } catch {
1515
+ return void 0;
1516
+ }
1517
+ }
918
1518
 
919
1519
  // src/update.ts
920
1520
  import { execFileSync } from "child_process";
@@ -1474,6 +2074,16 @@ async function main(argv = Bun.argv.slice(2)) {
1474
2074
  await runModels(args2);
1475
2075
  return;
1476
2076
  }
2077
+ if (command === "usage") {
2078
+ const args2 = withRuntimeEnv(parseArgs(argv.slice(1)));
2079
+ if (args2.help) {
2080
+ console.log(helpText(await getVersion()));
2081
+ return;
2082
+ }
2083
+ args2.logger = commandLogger(args2, "usage");
2084
+ await runUsage(args2);
2085
+ return;
2086
+ }
1477
2087
  const args = withRuntimeEnv(parseArgs(argv));
1478
2088
  if (args.help) {
1479
2089
  console.log(helpText(await getVersion()));
@@ -1619,6 +2229,87 @@ async function runModels(options = {}) {
1619
2229
  }
1620
2230
  return ids;
1621
2231
  }
2232
+ async function runUsage(options = {}) {
2233
+ const logger = options.logger?.child({ component: "usage" }) ?? noopLogger;
2234
+ logger.debug({ event: "usage.fetch.started" }, "fetching github copilot quota");
2235
+ const response = await new CopilotClient(options).usage();
2236
+ if (!response.ok) {
2237
+ const message = `GitHub Copilot usage request failed with ${response.status}: ${await truncatedResponseText(response)}`;
2238
+ if (response.status === 401 || response.status === 403) {
2239
+ throw new CopilotAuthError(message);
2240
+ }
2241
+ throw new Error(message);
2242
+ }
2243
+ const usage = normalizeCopilotUsage(await response.json().catch(() => ({})));
2244
+ logger.debug(
2245
+ { event: "usage.fetch.succeeded", plan: usage.plan },
2246
+ "github copilot quota fetched"
2247
+ );
2248
+ for (const line of formatCopilotUsage(usage)) {
2249
+ console.log(line);
2250
+ }
2251
+ return usage;
2252
+ }
2253
+ function formatCopilotUsage(usage) {
2254
+ const lines = [];
2255
+ if (usage.plan) {
2256
+ lines.push(`Plan: ${usage.plan}`);
2257
+ }
2258
+ if (usage.quotaResetDate) {
2259
+ lines.push(`Quota resets: ${usage.quotaResetDate}`);
2260
+ }
2261
+ const order = ["premium_interactions", "chat", "completions"];
2262
+ const names = Object.keys(usage.quotas).sort(
2263
+ (a, b) => quotaRank(order, a) - quotaRank(order, b) || a.localeCompare(b)
2264
+ );
2265
+ for (const name of names) {
2266
+ const quota = usage.quotas[name];
2267
+ if (quota) {
2268
+ lines.push(`${quotaLabel(name)}: ${formatQuota(quota)}`);
2269
+ }
2270
+ }
2271
+ if (lines.length === 0) {
2272
+ lines.push("No GitHub Copilot quota information available for this account.");
2273
+ }
2274
+ return lines;
2275
+ }
2276
+ function quotaRank(order, name) {
2277
+ const index = order.indexOf(name);
2278
+ return index === -1 ? order.length : index;
2279
+ }
2280
+ function quotaLabel(name) {
2281
+ switch (name) {
2282
+ case "premium_interactions":
2283
+ return "Premium requests";
2284
+ case "chat":
2285
+ return "Chat";
2286
+ case "completions":
2287
+ return "Completions";
2288
+ default:
2289
+ return name;
2290
+ }
2291
+ }
2292
+ function formatQuota(quota) {
2293
+ if (quota.unlimited) {
2294
+ return "unlimited";
2295
+ }
2296
+ const parts = [];
2297
+ if (quota.used !== void 0 && quota.entitlement !== void 0) {
2298
+ parts.push(`${roundQuota(quota.used)}/${roundQuota(quota.entitlement)} used`);
2299
+ } else if (quota.remaining !== void 0) {
2300
+ parts.push(`${roundQuota(quota.remaining)} remaining`);
2301
+ }
2302
+ if (quota.percentRemaining !== void 0) {
2303
+ parts.push(`${roundQuota(quota.percentRemaining)}% remaining`);
2304
+ }
2305
+ if (quota.overageCount) {
2306
+ parts.push(`${roundQuota(quota.overageCount)} overage`);
2307
+ }
2308
+ return parts.length > 0 ? parts.join(", ") : "n/a";
2309
+ }
2310
+ function roundQuota(value) {
2311
+ return Number.isInteger(value) ? value : Math.round(value * 10) / 10;
2312
+ }
1622
2313
  async function verifyCopilotOAuthToken(token, options = {}) {
1623
2314
  const apiBaseUrl = trimTrailingSlash(
1624
2315
  options.copilotApiBaseUrl ?? options.env?.COPILOT_API_BASE_URL ?? DEFAULT_COPILOT_API_BASE_URL
@@ -1689,6 +2380,7 @@ Usage:
1689
2380
  hoopilot [serve] [options]
1690
2381
  hoopilot login [options]
1691
2382
  hoopilot models [options]
2383
+ hoopilot usage [options]
1692
2384
  hoopilot update
1693
2385
  npx @openhoo/hoopilot [options]
1694
2386
 
@@ -1696,8 +2388,13 @@ Commands:
1696
2388
  serve Start the proxy server (default)
1697
2389
  login Sign in through GitHub OAuth in a browser and verify Copilot access
1698
2390
  models List available GitHub Copilot model IDs
2391
+ usage Show GitHub Copilot quota and premium-request usage
1699
2392
  update, upgrade Update hoopilot to the latest release
1700
2393
 
2394
+ While the server runs, GET /metrics exposes Prometheus metrics (request counts,
2395
+ token usage, latency) and GET /v1/usage returns those metrics plus live Copilot
2396
+ quota as JSON.
2397
+
1701
2398
  Options:
1702
2399
  -p, --port <port> Port to listen on. Default: 4141
1703
2400
  --host <host> Host to listen on. Default: 127.0.0.1
@@ -1719,6 +2416,7 @@ Environment:
1719
2416
  HOOPILOT_LOG_FORMAT json or pretty. Default: pretty
1720
2417
  HOOPILOT_LOG_LEVEL trace, debug, info, warn, error, fatal, or silent
1721
2418
  COPILOT_API_BASE_URL
2419
+ HOOPILOT_GITHUB_API_BASE_URL GitHub REST base for the usage/quota lookup. Default: https://api.github.com
1722
2420
  HOOPILOT_NO_UPDATE_CHECK Set to disable update checks (also NO_UPDATE_NOTIFIER)
1723
2421
  `;
1724
2422
  }
@@ -1732,6 +2430,7 @@ export {
1732
2430
  main,
1733
2431
  parseArgs,
1734
2432
  runModels,
2433
+ runUsage,
1735
2434
  verifyCopilotOAuthToken
1736
2435
  };
1737
2436
  //# sourceMappingURL=cli.js.map