@openhoo/hoopilot 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -158,7 +158,7 @@ Incoming `x-request-id` headers are preserved on responses. If a request has no
158
158
 
159
159
  Hoopilot tracks token usage, request counts, and latency in memory while the server runs, and can report your GitHub Copilot account quota (premium-request "credit" usage).
160
160
 
161
- - `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters (`hoopilot_requests_total`), upstream call counters (`hoopilot_upstream_requests_total`), token counters by model and type (`hoopilot_tokens_total{model,type}`), a request-duration histogram (`hoopilot_request_duration_seconds`), an in-flight gauge, and—once `/v1/usage` has been fetched at least once—Copilot quota gauges (`hoopilot_copilot_quota_remaining{category}`, `_entitlement`, `_used`, `_percent_remaining`). Counters reset to zero on restart, which Prometheus handles natively.
161
+ - `GET /metrics` returns Prometheus text (`text/plain; version=0.0.4`). It exposes request counters (`hoopilot_requests_total`), upstream call counters (`hoopilot_upstream_requests_total`), token counters by model and type (`hoopilot_tokens_total{model,type}`), a request-duration histogram (`hoopilot_request_duration_seconds`), an in-flight gauge, and—once `/v1/usage` has been fetched at least once—Copilot quota gauges (`hoopilot_copilot_quota_remaining{category}`, `_entitlement`, `_used`, `_percent_remaining`, `_overage_count`, `_overage_entitlement`, `_unlimited`, `_overage_permitted`, `_has_quota`, `_token_based_billing`, and category reset/snapshot timestamps). Counters reset to zero on restart, which Prometheus handles natively.
162
162
  - `GET /v1/usage` returns JSON combining the proxy metrics snapshot with live Copilot quota fetched from GitHub (cached for 60 seconds). If the quota cannot be read, `copilot` is `null` and `copilot_error` explains why, but the proxy metrics are still returned.
163
163
  - `hoopilot usage` prints your Copilot plan and quota from the command line.
164
164
 
package/dist/cli.js CHANGED
@@ -293,22 +293,31 @@ function normalizeCopilotUsage(body) {
293
293
  }
294
294
  function normalizeQuotaDetail(detail) {
295
295
  const entitlement = numberOrUndefined(detail.entitlement);
296
+ const overageCount = numberOrUndefined(detail.overage_count);
296
297
  const remaining = numberOrUndefined(detail.remaining) ?? numberOrUndefined(detail.quota_remaining);
297
298
  return removeUndefinedQuota({
298
299
  entitlement,
299
- overageCount: numberOrUndefined(detail.overage_count),
300
+ hasQuota: typeof detail.has_quota === "boolean" ? detail.has_quota : void 0,
301
+ overageCount,
302
+ overageEntitlement: numberOrUndefined(detail.overage_entitlement),
300
303
  overagePermitted: typeof detail.overage_permitted === "boolean" ? detail.overage_permitted : void 0,
301
304
  percentRemaining: numberOrUndefined(detail.percent_remaining),
305
+ quotaId: stringOrUndefined(detail.quota_id),
306
+ quotaResetAt: stringOrUndefined(detail.quota_reset_at),
302
307
  remaining,
308
+ timestampUtc: stringOrUndefined(detail.timestamp_utc),
309
+ tokenBasedBilling: typeof detail.token_based_billing === "boolean" ? detail.token_based_billing : void 0,
303
310
  unlimited: typeof detail.unlimited === "boolean" ? detail.unlimited : void 0,
304
- used: usedFrom(entitlement, remaining)
311
+ used: usedFrom(entitlement, remaining, overageCount)
305
312
  });
306
313
  }
307
- function usedFrom(entitlement, remaining) {
314
+ function usedFrom(entitlement, remaining, overageCount) {
308
315
  if (entitlement === void 0 || remaining === void 0) {
309
316
  return void 0;
310
317
  }
311
- return Math.max(0, entitlement - remaining);
318
+ const base = entitlement - remaining;
319
+ const overage = remaining === 0 ? overageCount ?? 0 : 0;
320
+ return Math.max(0, base + overage);
312
321
  }
313
322
  function numberOrUndefined(value) {
314
323
  return typeof value === "number" && Number.isFinite(value) ? value : void 0;
@@ -571,6 +580,12 @@ function isLogLevel(value) {
571
580
 
572
581
  // src/openai.ts
573
582
  var DEFAULT_MODEL = "gpt-4.1";
583
+ var OpenAICompatibilityError = class extends Error {
584
+ constructor(message) {
585
+ super(message);
586
+ this.name = "OpenAICompatibilityError";
587
+ }
588
+ };
574
589
  function normalizeChatCompletionRequest(request) {
575
590
  return removeUndefined({
576
591
  ...request,
@@ -578,13 +593,22 @@ function normalizeChatCompletionRequest(request) {
578
593
  });
579
594
  }
580
595
  function completionsRequestToChatCompletion(request) {
596
+ assertSupportedLegacyCompletionRequest(request);
581
597
  return removeUndefined({
598
+ frequency_penalty: request.frequency_penalty,
599
+ logit_bias: request.logit_bias,
582
600
  max_tokens: request.max_tokens,
583
- messages: [{ content: promptToText(request.prompt), role: "user" }],
601
+ messages: [{ content: legacyPromptToText(request.prompt), role: "user" }],
584
602
  model: normalizeRequestedModel(request.model),
603
+ n: request.n,
604
+ presence_penalty: request.presence_penalty,
605
+ seed: request.seed,
606
+ stop: request.stop,
585
607
  stream: request.stream === true,
608
+ stream_options: request.stream_options,
586
609
  temperature: request.temperature,
587
- top_p: request.top_p
610
+ top_p: request.top_p,
611
+ user: request.user
588
612
  });
589
613
  }
590
614
  function normalizeRequestedModel(model) {
@@ -592,21 +616,21 @@ function normalizeRequestedModel(model) {
592
616
  return requested || DEFAULT_MODEL;
593
617
  }
594
618
  function chatCompletionToCompletion(completion) {
595
- const choice = firstChoice(completion);
596
- const message = asRecord(choice.message);
597
619
  return removeUndefined({
598
- choices: [
599
- {
620
+ choices: completionChoices(completion).map((choice, index) => {
621
+ const message = asRecord(choice.message);
622
+ return {
600
623
  finish_reason: choice.finish_reason ?? "stop",
601
- index: 0,
602
- logprobs: null,
603
- text: contentToText(message.content)
604
- }
605
- ],
624
+ index: typeof choice.index === "number" ? choice.index : index,
625
+ logprobs: choice.logprobs ?? null,
626
+ text: contentToText(choice.text) || contentToText(message.content)
627
+ };
628
+ }),
606
629
  created: completion.created ?? epochSeconds(),
607
630
  id: completion.id ?? `cmpl_${randomId()}`,
608
631
  model: completion.model ?? DEFAULT_MODEL,
609
632
  object: "text_completion",
633
+ system_fingerprint: completion.system_fingerprint,
610
634
  usage: completion.usage
611
635
  });
612
636
  }
@@ -679,11 +703,38 @@ function fallbackModels() {
679
703
  }
680
704
  ];
681
705
  }
682
- function promptToText(prompt) {
683
- if (Array.isArray(prompt)) {
684
- return prompt.map((item) => contentToText(item)).join("\n");
706
+ function legacyPromptToText(prompt) {
707
+ if (typeof prompt === "string") {
708
+ return prompt;
709
+ }
710
+ if (Array.isArray(prompt) && prompt.length === 1 && typeof prompt[0] === "string") {
711
+ return prompt[0];
712
+ }
713
+ throw new OpenAICompatibilityError(
714
+ "Hoopilot legacy completions compatibility supports exactly one string prompt per request."
715
+ );
716
+ }
717
+ function assertSupportedLegacyCompletionRequest(request) {
718
+ if (request.echo === true) {
719
+ throw new OpenAICompatibilityError(
720
+ "Hoopilot legacy completions compatibility does not support echo=true."
721
+ );
722
+ }
723
+ if (typeof request.best_of === "number" && request.best_of > 1) {
724
+ throw new OpenAICompatibilityError(
725
+ "Hoopilot legacy completions compatibility does not support best_of greater than 1."
726
+ );
727
+ }
728
+ if (typeof request.logprobs === "number" && request.logprobs > 0) {
729
+ throw new OpenAICompatibilityError(
730
+ "Hoopilot legacy completions compatibility does not support legacy logprobs."
731
+ );
732
+ }
733
+ if (contentToText(request.suffix)) {
734
+ throw new OpenAICompatibilityError(
735
+ "Hoopilot legacy completions compatibility does not support suffix."
736
+ );
685
737
  }
686
- return contentToText(prompt);
687
738
  }
688
739
  function contentToText(content) {
689
740
  if (typeof content === "string") {
@@ -741,9 +792,9 @@ function firstNumber(...values) {
741
792
  }
742
793
  return void 0;
743
794
  }
744
- function firstChoice(completion) {
795
+ function completionChoices(completion) {
745
796
  const choices = Array.isArray(completion.choices) ? completion.choices : [];
746
- return asRecord(choices[0]);
797
+ return choices.map((choice) => asRecord(choice));
747
798
  }
748
799
  function processCompletionSseBlock(block, enqueue, markTerminal) {
749
800
  let event = "message";
@@ -775,25 +826,28 @@ function processCompletionSseBlock(block, enqueue, markTerminal) {
775
826
  enqueue({ error });
776
827
  return;
777
828
  }
778
- const choice = firstChoice(parsed);
779
- const delta = asRecord(choice.delta);
780
- const text = contentToText(delta.content);
781
- const finishReason = choice.finish_reason ?? null;
829
+ const choices = completionChoices(parsed).map((choice, index) => {
830
+ const delta = asRecord(choice.delta);
831
+ const text = contentToText(delta.content);
832
+ const finishReason = choice.finish_reason ?? null;
833
+ if (!text && finishReason === null) {
834
+ return void 0;
835
+ }
836
+ return {
837
+ finish_reason: finishReason,
838
+ index: typeof choice.index === "number" ? choice.index : index,
839
+ logprobs: choice.logprobs ?? null,
840
+ text
841
+ };
842
+ }).filter((choice) => choice !== void 0);
782
843
  const usage = asRecord(parsed.usage);
783
844
  const hasUsage = Object.keys(usage).length > 0;
784
- if (!text && finishReason === null && !hasUsage) {
845
+ if (choices.length === 0 && !hasUsage) {
785
846
  return;
786
847
  }
787
848
  enqueue(
788
849
  removeUndefined({
789
- choices: text || finishReason !== null ? [
790
- {
791
- finish_reason: finishReason,
792
- index: typeof choice.index === "number" ? choice.index : 0,
793
- logprobs: null,
794
- text
795
- }
796
- ] : [],
850
+ choices,
797
851
  created: typeof parsed.created === "number" ? parsed.created : epochSeconds(),
798
852
  id: contentToText(parsed.id) || `cmpl_${randomId()}`,
799
853
  model: contentToText(parsed.model) || DEFAULT_MODEL,
@@ -1049,11 +1103,43 @@ var MetricsRegistry = class {
1049
1103
  gauge("remaining", "Remaining quota for the Copilot category.", (q) => q.remaining);
1050
1104
  gauge("entitlement", "Quota entitlement for the Copilot category.", (q) => q.entitlement);
1051
1105
  gauge("used", "Used quota (entitlement minus remaining) for the category.", (q) => q.used);
1106
+ gauge("overage_count", "Overage count for the Copilot category.", (q) => q.overageCount);
1107
+ gauge(
1108
+ "overage_entitlement",
1109
+ "Overage entitlement for the Copilot category.",
1110
+ (q) => q.overageEntitlement
1111
+ );
1052
1112
  gauge(
1053
1113
  "percent_remaining",
1054
1114
  "Percent of quota remaining for the Copilot category.",
1055
1115
  (q) => q.percentRemaining
1056
1116
  );
1117
+ booleanGauge(
1118
+ "unlimited",
1119
+ "Whether the Copilot quota category is unlimited.",
1120
+ (q) => q.unlimited
1121
+ );
1122
+ booleanGauge(
1123
+ "overage_permitted",
1124
+ "Whether overage is permitted for the Copilot category.",
1125
+ (q) => q.overagePermitted
1126
+ );
1127
+ booleanGauge("has_quota", "Whether the Copilot quota category has a quota.", (q) => q.hasQuota);
1128
+ booleanGauge(
1129
+ "token_based_billing",
1130
+ "Whether the Copilot quota category uses token-based billing.",
1131
+ (q) => q.tokenBasedBilling
1132
+ );
1133
+ dateGauge(
1134
+ "category_reset_timestamp_seconds",
1135
+ "Unix epoch of the Copilot category-specific quota reset.",
1136
+ (q) => q.quotaResetAt
1137
+ );
1138
+ dateGauge(
1139
+ "category_snapshot_timestamp_seconds",
1140
+ "Unix epoch of the Copilot category quota snapshot.",
1141
+ (q) => q.timestampUtc
1142
+ );
1057
1143
  const resetMs = usage.quotaResetDate ? Date.parse(usage.quotaResetDate) : Number.NaN;
1058
1144
  if (Number.isFinite(resetMs)) {
1059
1145
  lines.push(
@@ -1072,6 +1158,30 @@ var MetricsRegistry = class {
1072
1158
  })} 1`
1073
1159
  );
1074
1160
  }
1161
+ function booleanGauge(suffix, help, pick) {
1162
+ const present = categories.filter(([, quota]) => pick(quota) !== void 0);
1163
+ if (present.length === 0) {
1164
+ return;
1165
+ }
1166
+ lines.push(`# HELP hoopilot_copilot_quota_${suffix} ${help}`);
1167
+ lines.push(`# TYPE hoopilot_copilot_quota_${suffix} gauge`);
1168
+ for (const [category, quota] of present) {
1169
+ lines.push(
1170
+ `hoopilot_copilot_quota_${suffix}${labels({ category })} ${pick(quota) ? 1 : 0}`
1171
+ );
1172
+ }
1173
+ }
1174
+ function dateGauge(suffix, help, pick) {
1175
+ const present = categories.map(([category, quota]) => [category, Date.parse(pick(quota) ?? "")]).filter(([, timestamp]) => Number.isFinite(timestamp));
1176
+ if (present.length === 0) {
1177
+ return;
1178
+ }
1179
+ lines.push(`# HELP hoopilot_copilot_quota_${suffix} ${help}`);
1180
+ lines.push(`# TYPE hoopilot_copilot_quota_${suffix} gauge`);
1181
+ for (const [category, timestamp] of present) {
1182
+ lines.push(`hoopilot_copilot_quota_${suffix}${labels({ category })} ${timestamp / 1e3}`);
1183
+ }
1184
+ }
1075
1185
  }
1076
1186
  };
1077
1187
  function observeResponseUsage(response, fallbackModel, onUsage, signal) {
@@ -1312,6 +1422,12 @@ function createHoopilotHandler(options = {}) {
1312
1422
  "request body was invalid json"
1313
1423
  );
1314
1424
  return finish(jsonError(400, "invalid_request_error", message));
1425
+ } else if (error instanceof OpenAICompatibilityError) {
1426
+ requestLogger.warn(
1427
+ { err: errorDetails(error), event: "http.request.failed" },
1428
+ "request body used unsupported OpenAI compatibility fields"
1429
+ );
1430
+ return finish(jsonError(400, "invalid_request_error", message));
1315
1431
  } else if (error instanceof RequestBodyTooLargeError) {
1316
1432
  requestLogger.warn(
1317
1433
  { err: errorDetails(error), event: "http.request.failed" },
@@ -1763,8 +1879,8 @@ function metricsResponse(metrics) {
1763
1879
  });
1764
1880
  }
1765
1881
  async function handleUsage(metrics, readUsage, signal) {
1766
- const proxy = metrics.snapshot();
1767
1882
  const { copilot, error } = await readUsage(signal);
1883
+ const proxy = metrics.snapshot();
1768
1884
  const body = { copilot: copilot ?? null, object: "usage", proxy };
1769
1885
  if (error) {
1770
1886
  body.copilot_error = error;
@@ -1789,10 +1905,10 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
1789
1905
  metrics.recordCopilotQuota(value);
1790
1906
  return { copilot: value };
1791
1907
  } catch (error) {
1792
- metrics.recordUpstream(usagePath, false);
1793
1908
  if (error instanceof CopilotAuthError) {
1794
1909
  return { error: error.message };
1795
1910
  }
1911
+ metrics.recordUpstream(usagePath, false);
1796
1912
  return { error: errorMessage(error) };
1797
1913
  }
1798
1914
  };