@openhoo/hoopilot 2.1.7 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -89,12 +89,38 @@ var STREAMING_PROXY_MODES = [
89
89
  "buffer",
90
90
  "live"
91
91
  ];
92
+ var USAGE_ACCOUNTING_MODES = [
93
+ "basic",
94
+ "full",
95
+ "off"
96
+ ];
92
97
  function parseStreamingProxyMode(value) {
93
98
  if (STREAMING_PROXY_MODES.includes(value)) {
94
99
  return value;
95
100
  }
96
101
  throw new Error(`Invalid stream mode: ${value}. Expected ${STREAMING_PROXY_MODES.join(", ")}.`);
97
102
  }
103
+ function parseUsageAccountingMode(value) {
104
+ if (USAGE_ACCOUNTING_MODES.includes(value)) {
105
+ return value;
106
+ }
107
+ throw new Error(
108
+ `Invalid usage accounting mode: ${value}. Expected ${USAGE_ACCOUNTING_MODES.join(", ")}.`
109
+ );
110
+ }
111
+ function parseBooleanEnv(value, name) {
112
+ const raw = envValue(value)?.toLowerCase();
113
+ if (raw === void 0) {
114
+ return void 0;
115
+ }
116
+ if (raw === "1" || raw === "true" || raw === "yes" || raw === "on") {
117
+ return true;
118
+ }
119
+ if (raw === "0" || raw === "false" || raw === "no" || raw === "off") {
120
+ return false;
121
+ }
122
+ throw new Error(`${name} must be one of: 1, 0, true, false, yes, no, on, off.`);
123
+ }
98
124
 
99
125
  // src/auth-store.ts
100
126
  var StoredCopilotAuthError = class extends Error {
@@ -742,6 +768,32 @@ var DEFAULT_LOG_FORMAT = "pretty";
742
768
  var DEFAULT_LOG_LEVEL = "info";
743
769
  var LOG_FORMATS = ["json", "pretty"];
744
770
  var LOG_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal", "silent"];
771
+ var PRETTY_INLINE_FIELDS = [
772
+ "component",
773
+ "command",
774
+ "event",
775
+ "method",
776
+ "path",
777
+ "status",
778
+ "durationMs",
779
+ "stream",
780
+ "route",
781
+ "requestId",
782
+ "upstreamPath",
783
+ "upstreamStatus",
784
+ "url",
785
+ "baseUrl",
786
+ "origin",
787
+ "currentVersion",
788
+ "installKind",
789
+ "latestVersion",
790
+ "assetName",
791
+ "count",
792
+ "plan",
793
+ "apiBaseUrl",
794
+ "authStorePath"
795
+ ];
796
+ var PRETTY_IGNORED_FIELDS = ["pid", "hostname", "service", ...PRETTY_INLINE_FIELDS];
745
797
  var REDACT_PATHS = [
746
798
  "apiKey",
747
799
  "authorization",
@@ -805,9 +857,11 @@ function createHoopilotLogger(options = {}) {
805
857
  // stream's TTY-ness is unknown, so default to no color there.
806
858
  colorize: options.colorize ?? (options.stream ? false : process.stdout.isTTY),
807
859
  destination: options.stream ?? 1,
808
- ignore: "pid,hostname",
860
+ ignore: PRETTY_IGNORED_FIELDS.join(","),
861
+ levelFirst: true,
862
+ messageFormat: formatPrettyMessage,
809
863
  singleLine: true,
810
- translateTime: "SYS:standard"
864
+ translateTime: "SYS:HH:MM:ss"
811
865
  })
812
866
  )
813
867
  );
@@ -853,6 +907,45 @@ function errorDetails(error) {
853
907
  }
854
908
  return { message: String(error) };
855
909
  }
910
+ function formatPrettyMessage(log, messageKey) {
911
+ const message = formatPrettyLogMessage(log[messageKey]);
912
+ const fields = PRETTY_INLINE_FIELDS.flatMap((field) => {
913
+ const value = log[field];
914
+ if (value === void 0) {
915
+ return [];
916
+ }
917
+ return `${prettyFieldLabel(field)}=${formatPrettyFieldValue(field, value)}`;
918
+ });
919
+ return fields.length > 0 ? `${message} ${fields.join(" ")}` : message;
920
+ }
921
+ function formatPrettyLogMessage(value) {
922
+ return typeof value === "string" ? value : formatPrettyValue(value);
923
+ }
924
+ function prettyFieldLabel(field) {
925
+ return field === "durationMs" ? "duration" : field;
926
+ }
927
+ function formatPrettyFieldValue(field, value) {
928
+ const formatted = formatPrettyValue(value);
929
+ return field === "durationMs" && typeof value === "number" ? `${formatted}ms` : formatted;
930
+ }
931
+ function formatPrettyValue(value) {
932
+ if (typeof value === "number") {
933
+ return Number.isFinite(value) ? String(value) : JSON.stringify(value);
934
+ }
935
+ if (typeof value === "boolean") {
936
+ return String(value);
937
+ }
938
+ if (typeof value === "string") {
939
+ return isBarePrettyValue(value) ? value : JSON.stringify(value);
940
+ }
941
+ if (value === null) {
942
+ return "null";
943
+ }
944
+ return JSON.stringify(value) ?? String(value);
945
+ }
946
+ function isBarePrettyValue(value) {
947
+ return /^[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]+$/.test(value);
948
+ }
856
949
  function isLogFormat(value) {
857
950
  return LOG_FORMATS.includes(value);
858
951
  }
@@ -1439,6 +1532,8 @@ function epochSeconds() {
1439
1532
  var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
1440
1533
  var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1441
1534
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1535
+ var PROMETHEUS_CACHE_TTL_MS = 1e3;
1536
+ var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
1442
1537
  var MAX_TRACKED_MODELS = 200;
1443
1538
  var MAX_MODEL_LABEL_LENGTH = 200;
1444
1539
  var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
@@ -1447,6 +1542,9 @@ var UNKNOWN_MODEL = "unknown";
1447
1542
  function emptyModelTotals() {
1448
1543
  return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
1449
1544
  }
1545
+ function isPrometheusCacheNeutralRoute(route) {
1546
+ return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
1547
+ }
1450
1548
  var MetricsRegistry = class {
1451
1549
  #startedAtMs;
1452
1550
  #inFlight = 0;
@@ -1458,11 +1556,16 @@ var MetricsRegistry = class {
1458
1556
  #copilotQuota;
1459
1557
  #githubRateLimit = /* @__PURE__ */ new Map();
1460
1558
  #extraction = { extracted: 0, missing: 0 };
1559
+ #generation = 0;
1560
+ #prometheusCache;
1461
1561
  constructor(options = {}) {
1462
1562
  this.#startedAtMs = (options.now ?? Date.now)();
1463
1563
  }
1464
1564
  /** Mark a request as started; pair with exactly one {@link observe}. */
1465
1565
  startRequest(route) {
1566
+ if (!isPrometheusCacheNeutralRoute(route)) {
1567
+ this.#changed();
1568
+ }
1466
1569
  this.#inFlight += 1;
1467
1570
  if (route) {
1468
1571
  this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
@@ -1470,6 +1573,9 @@ var MetricsRegistry = class {
1470
1573
  }
1471
1574
  /** Record a completed request and clear its in-flight slot. */
1472
1575
  observe(observation) {
1576
+ if (!isPrometheusCacheNeutralRoute(observation.route)) {
1577
+ this.#changed();
1578
+ }
1473
1579
  if (this.#inFlight > 0) {
1474
1580
  this.#inFlight -= 1;
1475
1581
  }
@@ -1490,6 +1596,7 @@ var MetricsRegistry = class {
1490
1596
  * rising miss rate flags clients whose token usage is going unaccounted.
1491
1597
  */
1492
1598
  recordTokenExtraction(extracted) {
1599
+ this.#changed();
1493
1600
  if (extracted) {
1494
1601
  this.#extraction.extracted += 1;
1495
1602
  } else {
@@ -1498,6 +1605,7 @@ var MetricsRegistry = class {
1498
1605
  }
1499
1606
  /** Accumulate token counts for a model from one upstream completion. */
1500
1607
  recordTokens(model, usage) {
1608
+ this.#changed();
1501
1609
  const name = this.#modelLabel(model);
1502
1610
  const totals = this.#tokens.get(name) ?? emptyModelTotals();
1503
1611
  totals.requests += 1;
@@ -1510,11 +1618,13 @@ var MetricsRegistry = class {
1510
1618
  }
1511
1619
  /** Record one upstream Copilot call and whether it succeeded. */
1512
1620
  recordUpstream(path, ok) {
1621
+ this.#changed();
1513
1622
  const key = labelKey(path, ok ? "ok" : "error");
1514
1623
  this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
1515
1624
  }
1516
1625
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
1517
1626
  recordCopilotQuota(usage) {
1627
+ this.#changed();
1518
1628
  this.#copilotQuota = usage;
1519
1629
  }
1520
1630
  /**
@@ -1526,6 +1636,7 @@ var MetricsRegistry = class {
1526
1636
  if (!rateLimit) {
1527
1637
  return;
1528
1638
  }
1639
+ this.#changed();
1529
1640
  const resource = this.#rateLimitResource(rateLimit.resource);
1530
1641
  this.#githubRateLimit.set(resource, { ...rateLimit, resource });
1531
1642
  }
@@ -1562,6 +1673,9 @@ var MetricsRegistry = class {
1562
1673
  }
1563
1674
  this.#durations.set(route, entry);
1564
1675
  }
1676
+ #changed() {
1677
+ this.#generation += 1;
1678
+ }
1565
1679
  /** A JSON-friendly view of the current counters. */
1566
1680
  snapshot(nowOrOptions = Date.now) {
1567
1681
  const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
@@ -1665,13 +1779,18 @@ var MetricsRegistry = class {
1665
1779
  }
1666
1780
  /** Render the Prometheus text exposition format (version 0.0.4). */
1667
1781
  renderPrometheus(now = Date.now) {
1782
+ const nowMs = now();
1783
+ const cached = this.#prometheusCache;
1784
+ if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
1785
+ return cached.text;
1786
+ }
1668
1787
  const lines = [];
1669
1788
  lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
1670
1789
  lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
1671
1790
  lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
1672
1791
  lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
1673
1792
  lines.push("# TYPE hoopilot_uptime_seconds gauge");
1674
- lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
1793
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
1675
1794
  lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
1676
1795
  lines.push("# TYPE hoopilot_requests_in_flight gauge");
1677
1796
  lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
@@ -1737,8 +1856,10 @@ var MetricsRegistry = class {
1737
1856
  }
1738
1857
  this.#renderGithubRateLimit(lines);
1739
1858
  this.#renderCopilotQuota(lines);
1740
- return `${lines.join("\n")}
1859
+ const text = `${lines.join("\n")}
1741
1860
  `;
1861
+ this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
1862
+ return text;
1742
1863
  }
1743
1864
  #renderGithubRateLimit(lines) {
1744
1865
  const entries = [...this.#githubRateLimit.values()];
@@ -1873,21 +1994,6 @@ var MetricsRegistry = class {
1873
1994
  }
1874
1995
  }
1875
1996
  };
1876
- function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
1877
- const body = response.body;
1878
- if (!body) {
1879
- return response;
1880
- }
1881
- const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
1882
- return new Response(
1883
- streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
1884
- {
1885
- headers: response.headers,
1886
- status: response.status,
1887
- statusText: response.statusText
1888
- }
1889
- );
1890
- }
1891
1997
  function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
1892
1998
  const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
1893
1999
  if (isSse) {
@@ -1902,9 +2008,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
1902
2008
  }
1903
2009
  accumulator.finish();
1904
2010
  }
1905
- function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2011
+ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
1906
2012
  const reader = stream.getReader();
1907
2013
  let aborted = signal?.aborted ?? false;
2014
+ let completed = false;
1908
2015
  let released = false;
1909
2016
  const onAbort = () => {
1910
2017
  aborted = true;
@@ -1933,6 +2040,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
1933
2040
  }
1934
2041
  released = true;
1935
2042
  signal?.removeEventListener("abort", onAbort);
2043
+ if (!completed) {
2044
+ completed = true;
2045
+ onComplete?.();
2046
+ }
1936
2047
  reader.releaseLock();
1937
2048
  };
1938
2049
  const observeChunk = (chunkBytes) => {
@@ -2042,6 +2153,9 @@ function considerSseLine(line, consider) {
2042
2153
  if (!data || data === "[DONE]") {
2043
2154
  return;
2044
2155
  }
2156
+ if (!data.includes('"usage"')) {
2157
+ return;
2158
+ }
2045
2159
  const parsed = safeJsonParse(data);
2046
2160
  if (parsed !== void 0) {
2047
2161
  consider(parsed);
@@ -3924,17 +4038,28 @@ function createHoopilotHandler(options = {}) {
3924
4038
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
3925
4039
  const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
3926
4040
  const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
4041
+ const usageAccountingMode = resolveUsageAccountingMode(options);
4042
+ const accessLog = resolveAccessLog(options);
4043
+ const responseUsage = /* @__PURE__ */ new WeakMap();
4044
+ const markUsage = (response, fallbackModel, cost) => {
4045
+ if (shouldExtractUsage(usageAccountingMode, cost)) {
4046
+ responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
4047
+ }
4048
+ return response;
4049
+ };
3927
4050
  const requestContext = /* @__PURE__ */ new WeakMap();
3928
4051
  const app = buildApp({
3929
4052
  apiKey,
3930
4053
  allowedOrigins,
3931
4054
  bufferProxyBodies,
3932
4055
  client,
4056
+ markUsage,
3933
4057
  metrics,
3934
4058
  readUsage,
3935
4059
  recordExtraction,
3936
4060
  recordTokens,
3937
- requestContext
4061
+ requestContext,
4062
+ usageAccountingMode
3938
4063
  });
3939
4064
  return async (request) => {
3940
4065
  const startedAt = performance.now();
@@ -3970,11 +4095,14 @@ function createHoopilotHandler(options = {}) {
3970
4095
  }
3971
4096
  return finishResponse(response, {
3972
4097
  corsOrigin,
4098
+ accessLog,
3973
4099
  logger: requestLogger,
3974
4100
  method: request.method,
3975
4101
  metrics,
3976
4102
  requestId,
4103
+ signal: request.signal,
3977
4104
  route,
4105
+ usageObservation: responseUsage.get(response),
3978
4106
  startedAt,
3979
4107
  closeConnection: bufferProxyBodies,
3980
4108
  trackStreamingBody: !bufferProxyBodies
@@ -3987,11 +4115,13 @@ function buildApp(deps) {
3987
4115
  allowedOrigins,
3988
4116
  bufferProxyBodies,
3989
4117
  client,
4118
+ markUsage,
3990
4119
  metrics,
3991
4120
  readUsage,
3992
4121
  recordExtraction,
3993
4122
  recordTokens,
3994
- requestContext
4123
+ requestContext,
4124
+ usageAccountingMode
3995
4125
  } = deps;
3996
4126
  const contextFor = (request) => {
3997
4127
  const stored = requestContext.get(request);
@@ -4079,11 +4209,13 @@ function buildApp(deps) {
4079
4209
  ({ request }) => handleAnthropicMessages(
4080
4210
  client,
4081
4211
  metrics,
4212
+ markUsage,
4082
4213
  recordTokens,
4083
4214
  recordExtraction,
4084
4215
  request,
4085
4216
  loggerFor(request),
4086
- bufferProxyBodies
4217
+ bufferProxyBodies,
4218
+ usageAccountingMode
4087
4219
  ),
4088
4220
  noBody
4089
4221
  ).post(
@@ -4095,11 +4227,13 @@ function buildApp(deps) {
4095
4227
  ({ request }) => handleChatCompletions(
4096
4228
  client,
4097
4229
  metrics,
4230
+ markUsage,
4098
4231
  recordTokens,
4099
4232
  recordExtraction,
4100
4233
  request,
4101
4234
  loggerFor(request),
4102
- bufferProxyBodies
4235
+ bufferProxyBodies,
4236
+ usageAccountingMode
4103
4237
  ),
4104
4238
  noBody
4105
4239
  ).post(
@@ -4107,11 +4241,13 @@ function buildApp(deps) {
4107
4241
  ({ request }) => handleCompletions(
4108
4242
  client,
4109
4243
  metrics,
4244
+ markUsage,
4110
4245
  recordTokens,
4111
4246
  recordExtraction,
4112
4247
  request,
4113
4248
  loggerFor(request),
4114
- bufferProxyBodies
4249
+ bufferProxyBodies,
4250
+ usageAccountingMode
4115
4251
  ),
4116
4252
  noBody
4117
4253
  ).post(
@@ -4122,7 +4258,8 @@ function buildApp(deps) {
4122
4258
  recordTokens,
4123
4259
  recordExtraction,
4124
4260
  request,
4125
- loggerFor(request)
4261
+ loggerFor(request),
4262
+ usageAccountingMode
4126
4263
  ),
4127
4264
  noBody
4128
4265
  ).post(
@@ -4130,11 +4267,13 @@ function buildApp(deps) {
4130
4267
  ({ request }) => handleResponses(
4131
4268
  client,
4132
4269
  metrics,
4270
+ markUsage,
4133
4271
  recordTokens,
4134
4272
  recordExtraction,
4135
4273
  request,
4136
4274
  loggerFor(request),
4137
- bufferProxyBodies
4275
+ bufferProxyBodies,
4276
+ usageAccountingMode
4138
4277
  ),
4139
4278
  noBody
4140
4279
  );
@@ -4187,7 +4326,7 @@ function startHoopilotServer(options = {}) {
4187
4326
  url: `http://${urlHost(host)}:${server.port}`
4188
4327
  };
4189
4328
  }
4190
- async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4329
+ async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4191
4330
  const anthropicRequest = await readJson(request);
4192
4331
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
4193
4332
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -4200,36 +4339,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
4200
4339
  if (isStreamingResponse(upstream) && upstream.body) {
4201
4340
  if (bufferProxyBodies) {
4202
4341
  const text = await upstream.text();
4203
- recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
4342
+ recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
4204
4343
  return proxyResponse(
4205
4344
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
4206
4345
  );
4207
4346
  }
4208
- const observed = observeResponseUsage(
4209
- upstream,
4347
+ return markUsage(
4348
+ proxyResponse(
4349
+ new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
4350
+ headers: upstream.headers,
4351
+ status: upstream.status,
4352
+ statusText: upstream.statusText
4353
+ })
4354
+ ),
4210
4355
  model,
4211
- recordTokens,
4212
- request.signal,
4213
- recordExtraction
4214
- );
4215
- if (!observed.body) {
4216
- return proxyResponse(observed);
4217
- }
4218
- return proxyResponse(
4219
- new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
4220
- headers: observed.headers,
4221
- status: observed.status,
4222
- statusText: observed.statusText
4223
- })
4356
+ "body"
4224
4357
  );
4225
4358
  }
4226
4359
  const body = asRecord(await upstream.json());
4227
- const usage = extractTokenUsage(body.usage);
4228
- if (usage) {
4229
- const responseModel = typeof body.model === "string" ? body.model.trim() : "";
4230
- recordTokens(responseModel || model, usage);
4231
- }
4232
- recordExtraction(usage !== void 0);
4360
+ recordParsedUsage(
4361
+ body.usage,
4362
+ typeof body.model === "string" ? body.model.trim() : model,
4363
+ model,
4364
+ usageAccountingMode,
4365
+ recordTokens,
4366
+ recordExtraction
4367
+ );
4233
4368
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
4234
4369
  }
4235
4370
  async function handleAnthropicCountTokens(request) {
@@ -4256,7 +4391,7 @@ async function handleModels(client, metrics, signal, logger) {
4256
4391
  logUpstreamSuccess(logger, "/models", upstream.status);
4257
4392
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
4258
4393
  }
4259
- async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4394
+ async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4260
4395
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
4261
4396
  const upstream = await client.chatCompletions(chatRequest, request.signal);
4262
4397
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -4265,18 +4400,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
4265
4400
  }
4266
4401
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
4267
4402
  const model = normalizeRequestedModel(chatRequest.model);
4268
- return proxyResponse(
4269
- await responseWithObservedUsage(
4270
- upstream,
4271
- model,
4272
- recordTokens,
4273
- request.signal,
4274
- bufferProxyBodies,
4275
- recordExtraction
4276
- )
4403
+ return proxiedResponseWithOptionalUsage(
4404
+ upstream,
4405
+ model,
4406
+ markUsage,
4407
+ usageAccountingMode,
4408
+ recordTokens,
4409
+ recordExtraction,
4410
+ bufferProxyBodies
4277
4411
  );
4278
4412
  }
4279
- async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4413
+ async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4280
4414
  const body = await readJson(request);
4281
4415
  const upstream = await client.chatCompletions(
4282
4416
  completionsRequestToChatCompletion(body),
@@ -4291,34 +4425,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
4291
4425
  if (isStreamingResponse(upstream) && upstream.body) {
4292
4426
  if (bufferProxyBodies) {
4293
4427
  const upstreamText = await upstream.text();
4294
- recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
4428
+ recordBufferedUsage(
4429
+ upstreamText,
4430
+ true,
4431
+ model,
4432
+ usageAccountingMode,
4433
+ recordTokens,
4434
+ recordExtraction
4435
+ );
4295
4436
  const text = completionSseTextFromChatSseText(upstreamText);
4296
4437
  return proxyResponse(responseFromText(upstream, text));
4297
4438
  }
4298
- return proxyResponse(
4299
- observeResponseUsage(
4439
+ return markUsage(
4440
+ proxyResponse(
4300
4441
  new Response(completionStreamFromChatStream(upstream.body), {
4301
4442
  headers: upstream.headers,
4302
4443
  status: upstream.status,
4303
4444
  statusText: upstream.statusText
4304
- }),
4305
- model,
4306
- recordTokens,
4307
- request.signal,
4308
- recordExtraction
4309
- )
4445
+ })
4446
+ ),
4447
+ model,
4448
+ "body"
4310
4449
  );
4311
4450
  }
4312
4451
  const completion = asRecord(await upstream.json());
4313
- const usage = extractTokenUsage(completion.usage);
4314
- if (usage) {
4315
- const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
4316
- recordTokens(responseModel || model, usage);
4317
- }
4318
- recordExtraction(usage !== void 0);
4452
+ recordParsedUsage(
4453
+ completion.usage,
4454
+ typeof completion.model === "string" ? completion.model.trim() : model,
4455
+ model,
4456
+ usageAccountingMode,
4457
+ recordTokens,
4458
+ recordExtraction
4459
+ );
4319
4460
  return jsonResponse(chatCompletionToCompletion(completion));
4320
4461
  }
4321
- async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4462
+ async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4322
4463
  const { json, text: body } = await readJsonText(request);
4323
4464
  if (isResponsesCompactionRequest(json)) {
4324
4465
  return handleResponsesCompactionV2(
@@ -4328,7 +4469,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4328
4469
  recordExtraction,
4329
4470
  json,
4330
4471
  request,
4331
- logger
4472
+ logger,
4473
+ usageAccountingMode
4332
4474
  );
4333
4475
  }
4334
4476
  const upstream = await client.responses(
@@ -4341,18 +4483,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4341
4483
  }
4342
4484
  logUpstreamSuccess(logger, "/responses", upstream.status);
4343
4485
  const model = normalizeRequestedModel(json.model);
4344
- return proxyResponse(
4345
- await responseWithObservedUsage(
4346
- upstream,
4347
- model,
4348
- recordTokens,
4349
- request.signal,
4350
- bufferProxyBodies,
4351
- recordExtraction
4352
- )
4486
+ return proxiedResponseWithOptionalUsage(
4487
+ upstream,
4488
+ model,
4489
+ markUsage,
4490
+ usageAccountingMode,
4491
+ recordTokens,
4492
+ recordExtraction,
4493
+ bufferProxyBodies
4353
4494
  );
4354
4495
  }
4355
- async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
4496
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
4356
4497
  const body = await readJson(request);
4357
4498
  const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
4358
4499
  metrics.recordUpstream("/responses", upstream.ok);
@@ -4362,16 +4503,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
4362
4503
  logUpstreamSuccess(logger, "/responses", upstream.status);
4363
4504
  const isSse = isStreamingResponse(upstream);
4364
4505
  const text = await upstream.text();
4365
- recordResponseTextUsage(
4506
+ recordBufferedUsage(
4366
4507
  text,
4367
4508
  isSse,
4368
4509
  normalizeRequestedModel(body.model),
4510
+ usageAccountingMode,
4369
4511
  recordTokens,
4370
4512
  recordExtraction
4371
4513
  );
4372
4514
  return jsonResponse(responsesCompactionResult(text, isSse));
4373
4515
  }
4374
- async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
4516
+ async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
4375
4517
  const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
4376
4518
  metrics.recordUpstream("/responses", upstream.ok);
4377
4519
  if (!upstream.ok) {
@@ -4381,20 +4523,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
4381
4523
  const isSse = isStreamingResponse(upstream);
4382
4524
  const text = await upstream.text();
4383
4525
  const model = normalizeRequestedModel(json.model);
4384
- recordResponseTextUsage(text, isSse, model, recordTokens, recordExtraction);
4526
+ recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
4385
4527
  if (json.stream === true) {
4386
4528
  return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
4387
4529
  }
4388
4530
  return jsonResponse(responsesCompactionResponse(text, isSse, model));
4389
4531
  }
4390
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
4532
+ async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
4391
4533
  const isSse = isStreamingResponse(response);
4392
- if (bufferBody && response.body) {
4534
+ if (bufferProxyBodies && response.body) {
4393
4535
  const text = await response.text();
4394
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4395
- return responseFromText(response, text);
4536
+ recordBufferedUsage(
4537
+ text,
4538
+ isSse,
4539
+ fallbackModel,
4540
+ usageAccountingMode,
4541
+ recordTokens,
4542
+ recordExtraction
4543
+ );
4544
+ return proxyResponse(responseFromText(response, text));
4396
4545
  }
4397
- return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
4546
+ return markUsage(proxyResponse(response), fallbackModel, "body");
4547
+ }
4548
+ function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4549
+ if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
4550
+ return;
4551
+ }
4552
+ const usage = extractTokenUsage(rawUsage);
4553
+ if (usage) {
4554
+ recordTokens(responseModel || fallbackModel, usage);
4555
+ }
4556
+ recordExtraction(usage !== void 0);
4557
+ }
4558
+ function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4559
+ if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
4560
+ return;
4561
+ }
4562
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4398
4563
  }
4399
4564
  async function proxyError(upstream, logger) {
4400
4565
  const text = await upstream.text();
@@ -4450,7 +4615,24 @@ function shouldBufferProxyBodies(mode) {
4450
4615
  }
4451
4616
  return process.platform === "win32" && IS_STANDALONE_BINARY;
4452
4617
  }
4618
+ function resolveUsageAccountingMode(options) {
4619
+ const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
4620
+ return parseUsageAccountingMode(value);
4621
+ }
4622
+ function resolveAccessLog(options) {
4623
+ return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
4624
+ }
4625
+ function shouldExtractUsage(mode, cost) {
4626
+ if (mode === "off") {
4627
+ return false;
4628
+ }
4629
+ if (mode === "basic") {
4630
+ return cost === "parsed";
4631
+ }
4632
+ return true;
4633
+ }
4453
4634
  function finishResponse(response, options) {
4635
+ const usageObservation = options.usageObservation;
4454
4636
  const withRequestId = responseWithRequestId(
4455
4637
  response,
4456
4638
  options.requestId,
@@ -4459,11 +4641,36 @@ function finishResponse(response, options) {
4459
4641
  );
4460
4642
  const stream = isStreamingResponse(withRequestId);
4461
4643
  const status = withRequestId.status;
4644
+ let completed = false;
4462
4645
  const complete = () => {
4646
+ if (completed) {
4647
+ return;
4648
+ }
4649
+ completed = true;
4463
4650
  const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
4464
4651
  options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
4465
- logRequestCompleted(options.logger, status, stream, durationMs);
4652
+ logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
4466
4653
  };
4654
+ if (withRequestId.body && usageObservation) {
4655
+ const shouldTrackCompletion = stream && options.trackStreamingBody;
4656
+ const observedBody = streamWithUsageObservation(
4657
+ withRequestId.body,
4658
+ stream,
4659
+ usageObservation.fallbackModel,
4660
+ usageObservation.recordTokens,
4661
+ options.signal,
4662
+ usageObservation.recordExtraction,
4663
+ shouldTrackCompletion ? complete : void 0
4664
+ );
4665
+ if (!shouldTrackCompletion) {
4666
+ complete();
4667
+ }
4668
+ return new Response(observedBody, {
4669
+ headers: withRequestId.headers,
4670
+ status,
4671
+ statusText: withRequestId.statusText
4672
+ });
4673
+ }
4467
4674
  if (stream && withRequestId.body && options.trackStreamingBody) {
4468
4675
  return new Response(trackStreamCompletion(withRequestId.body, complete), {
4469
4676
  headers: withRequestId.headers,
@@ -4533,7 +4740,7 @@ function trackStreamCompletion(body, onComplete) {
4533
4740
  }
4534
4741
  });
4535
4742
  }
4536
- function logRequestCompleted(logger, status, stream, durationMs) {
4743
+ function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
4537
4744
  const fields = {
4538
4745
  durationMs,
4539
4746
  event: "http.request.completed",
@@ -4548,6 +4755,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
4548
4755
  logger.warn(fields, "request completed with client error");
4549
4756
  return;
4550
4757
  }
4758
+ if (!accessLog) {
4759
+ return;
4760
+ }
4551
4761
  logger.info(fields, "request completed");
4552
4762
  }
4553
4763
  function requestIdFor(request) {
@@ -4592,11 +4802,17 @@ var API_ROUTES = [
4592
4802
  { method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
4593
4803
  { method: "POST", path: "/v1/responses", name: "responses" }
4594
4804
  ];
4805
+ var ROUTE_NAMES = new Map(
4806
+ API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
4807
+ );
4595
4808
  function routeFor(method, path) {
4596
4809
  if (method === "OPTIONS") {
4597
4810
  return "cors.preflight";
4598
4811
  }
4599
- return API_ROUTES.find((entry) => entry.method === method && entry.path === path)?.name ?? "not_found";
4812
+ return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
4813
+ }
4814
+ function routeKey(method, path) {
4815
+ return `${method} ${path}`;
4600
4816
  }
4601
4817
  function isStreamingResponse(response) {
4602
4818
  return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
@@ -4654,12 +4870,24 @@ async function handleUsage(metrics, readUsage, request) {
4654
4870
  function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
4655
4871
  const usagePath = "/copilot_internal/user";
4656
4872
  let cache;
4657
- return async (signal) => {
4873
+ let inFlight;
4874
+ return async () => {
4658
4875
  if (cache && now() - cache.atMs < ttlMs) {
4659
4876
  return cache.result;
4660
4877
  }
4878
+ if (inFlight) {
4879
+ return inFlight;
4880
+ }
4881
+ inFlight = readFreshUsage();
4882
+ try {
4883
+ return await inFlight;
4884
+ } finally {
4885
+ inFlight = void 0;
4886
+ }
4887
+ };
4888
+ async function readFreshUsage() {
4661
4889
  try {
4662
- const upstream = await client.usage(signal);
4890
+ const upstream = await client.usage();
4663
4891
  metrics.recordUpstream(usagePath, upstream.ok);
4664
4892
  metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
4665
4893
  if (!upstream.ok) {
@@ -4681,7 +4909,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
4681
4909
  cache = { atMs: now(), result };
4682
4910
  return result;
4683
4911
  }
4684
- };
4912
+ }
4685
4913
  }
4686
4914
  export {
4687
4915
  COPILOT_USAGE_API_VERSION,