@openhoo/hoopilot 2.1.8 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,6 +3,7 @@ import { chmodSync, mkdirSync, readFileSync, renameSync, rmSync, writeFileSync }
3
3
  import { dirname, join } from "path";
4
4
 
5
5
  // src/util.ts
6
+ import { isIP } from "net";
6
7
  function trimTrailingSlash(value) {
7
8
  return value.replace(/\/+$/, "");
8
9
  }
@@ -39,9 +40,16 @@ function parseUrl(rawUrl) {
39
40
  }
40
41
  return url;
41
42
  }
42
- var LOOPBACK_HOSTNAMES = /* @__PURE__ */ new Set(["localhost", "127.0.0.1", "::1", "[::1]"]);
43
43
  function isLoopbackHostname(host) {
44
- return LOOPBACK_HOSTNAMES.has(host);
44
+ const normalized = host.trim().toLowerCase();
45
+ const address = normalized.startsWith("[") && normalized.endsWith("]") ? normalized.slice(1, -1) : normalized;
46
+ if (address === "localhost") {
47
+ return true;
48
+ }
49
+ if (isIP(address) === 4) {
50
+ return address.startsWith("127.");
51
+ }
52
+ return isIP(address) === 6 && (address === "::1" || address === "0:0:0:0:0:0:0:1");
45
53
  }
46
54
  function isLoopbackHttpUrl(url) {
47
55
  return url.protocol === "http:" && isLoopbackHostname(url.hostname);
@@ -89,12 +97,38 @@ var STREAMING_PROXY_MODES = [
89
97
  "buffer",
90
98
  "live"
91
99
  ];
100
+ var USAGE_ACCOUNTING_MODES = [
101
+ "basic",
102
+ "full",
103
+ "off"
104
+ ];
92
105
  function parseStreamingProxyMode(value) {
93
106
  if (STREAMING_PROXY_MODES.includes(value)) {
94
107
  return value;
95
108
  }
96
109
  throw new Error(`Invalid stream mode: ${value}. Expected ${STREAMING_PROXY_MODES.join(", ")}.`);
97
110
  }
111
+ function parseUsageAccountingMode(value) {
112
+ if (USAGE_ACCOUNTING_MODES.includes(value)) {
113
+ return value;
114
+ }
115
+ throw new Error(
116
+ `Invalid usage accounting mode: ${value}. Expected ${USAGE_ACCOUNTING_MODES.join(", ")}.`
117
+ );
118
+ }
119
+ function parseBooleanEnv(value, name) {
120
+ const raw = envValue(value)?.toLowerCase();
121
+ if (raw === void 0) {
122
+ return void 0;
123
+ }
124
+ if (raw === "1" || raw === "true" || raw === "yes" || raw === "on") {
125
+ return true;
126
+ }
127
+ if (raw === "0" || raw === "false" || raw === "no" || raw === "off") {
128
+ return false;
129
+ }
130
+ throw new Error(`${name} must be one of: 1, 0, true, false, yes, no, on, off.`);
131
+ }
98
132
 
99
133
  // src/auth-store.ts
100
134
  var StoredCopilotAuthError = class extends Error {
@@ -1506,6 +1540,8 @@ function epochSeconds() {
1506
1540
  var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
1507
1541
  var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1508
1542
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1543
+ var PROMETHEUS_CACHE_TTL_MS = 1e3;
1544
+ var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
1509
1545
  var MAX_TRACKED_MODELS = 200;
1510
1546
  var MAX_MODEL_LABEL_LENGTH = 200;
1511
1547
  var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
@@ -1514,6 +1550,9 @@ var UNKNOWN_MODEL = "unknown";
1514
1550
  function emptyModelTotals() {
1515
1551
  return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
1516
1552
  }
1553
+ function isPrometheusCacheNeutralRoute(route) {
1554
+ return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
1555
+ }
1517
1556
  var MetricsRegistry = class {
1518
1557
  #startedAtMs;
1519
1558
  #inFlight = 0;
@@ -1525,11 +1564,16 @@ var MetricsRegistry = class {
1525
1564
  #copilotQuota;
1526
1565
  #githubRateLimit = /* @__PURE__ */ new Map();
1527
1566
  #extraction = { extracted: 0, missing: 0 };
1567
+ #generation = 0;
1568
+ #prometheusCache;
1528
1569
  constructor(options = {}) {
1529
1570
  this.#startedAtMs = (options.now ?? Date.now)();
1530
1571
  }
1531
1572
  /** Mark a request as started; pair with exactly one {@link observe}. */
1532
1573
  startRequest(route) {
1574
+ if (!isPrometheusCacheNeutralRoute(route)) {
1575
+ this.#changed();
1576
+ }
1533
1577
  this.#inFlight += 1;
1534
1578
  if (route) {
1535
1579
  this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
@@ -1537,6 +1581,9 @@ var MetricsRegistry = class {
1537
1581
  }
1538
1582
  /** Record a completed request and clear its in-flight slot. */
1539
1583
  observe(observation) {
1584
+ if (!isPrometheusCacheNeutralRoute(observation.route)) {
1585
+ this.#changed();
1586
+ }
1540
1587
  if (this.#inFlight > 0) {
1541
1588
  this.#inFlight -= 1;
1542
1589
  }
@@ -1557,6 +1604,7 @@ var MetricsRegistry = class {
1557
1604
  * rising miss rate flags clients whose token usage is going unaccounted.
1558
1605
  */
1559
1606
  recordTokenExtraction(extracted) {
1607
+ this.#changed();
1560
1608
  if (extracted) {
1561
1609
  this.#extraction.extracted += 1;
1562
1610
  } else {
@@ -1565,6 +1613,7 @@ var MetricsRegistry = class {
1565
1613
  }
1566
1614
  /** Accumulate token counts for a model from one upstream completion. */
1567
1615
  recordTokens(model, usage) {
1616
+ this.#changed();
1568
1617
  const name = this.#modelLabel(model);
1569
1618
  const totals = this.#tokens.get(name) ?? emptyModelTotals();
1570
1619
  totals.requests += 1;
@@ -1577,11 +1626,13 @@ var MetricsRegistry = class {
1577
1626
  }
1578
1627
  /** Record one upstream Copilot call and whether it succeeded. */
1579
1628
  recordUpstream(path, ok) {
1629
+ this.#changed();
1580
1630
  const key = labelKey(path, ok ? "ok" : "error");
1581
1631
  this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
1582
1632
  }
1583
1633
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
1584
1634
  recordCopilotQuota(usage) {
1635
+ this.#changed();
1585
1636
  this.#copilotQuota = usage;
1586
1637
  }
1587
1638
  /**
@@ -1593,6 +1644,7 @@ var MetricsRegistry = class {
1593
1644
  if (!rateLimit) {
1594
1645
  return;
1595
1646
  }
1647
+ this.#changed();
1596
1648
  const resource = this.#rateLimitResource(rateLimit.resource);
1597
1649
  this.#githubRateLimit.set(resource, { ...rateLimit, resource });
1598
1650
  }
@@ -1629,6 +1681,9 @@ var MetricsRegistry = class {
1629
1681
  }
1630
1682
  this.#durations.set(route, entry);
1631
1683
  }
1684
+ #changed() {
1685
+ this.#generation += 1;
1686
+ }
1632
1687
  /** A JSON-friendly view of the current counters. */
1633
1688
  snapshot(nowOrOptions = Date.now) {
1634
1689
  const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
@@ -1732,13 +1787,18 @@ var MetricsRegistry = class {
1732
1787
  }
1733
1788
  /** Render the Prometheus text exposition format (version 0.0.4). */
1734
1789
  renderPrometheus(now = Date.now) {
1790
+ const nowMs = now();
1791
+ const cached = this.#prometheusCache;
1792
+ if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
1793
+ return cached.text;
1794
+ }
1735
1795
  const lines = [];
1736
1796
  lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
1737
1797
  lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
1738
1798
  lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
1739
1799
  lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
1740
1800
  lines.push("# TYPE hoopilot_uptime_seconds gauge");
1741
- lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
1801
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
1742
1802
  lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
1743
1803
  lines.push("# TYPE hoopilot_requests_in_flight gauge");
1744
1804
  lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
@@ -1804,8 +1864,10 @@ var MetricsRegistry = class {
1804
1864
  }
1805
1865
  this.#renderGithubRateLimit(lines);
1806
1866
  this.#renderCopilotQuota(lines);
1807
- return `${lines.join("\n")}
1867
+ const text = `${lines.join("\n")}
1808
1868
  `;
1869
+ this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
1870
+ return text;
1809
1871
  }
1810
1872
  #renderGithubRateLimit(lines) {
1811
1873
  const entries = [...this.#githubRateLimit.values()];
@@ -1940,21 +2002,6 @@ var MetricsRegistry = class {
1940
2002
  }
1941
2003
  }
1942
2004
  };
1943
- function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
1944
- const body = response.body;
1945
- if (!body) {
1946
- return response;
1947
- }
1948
- const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
1949
- return new Response(
1950
- streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
1951
- {
1952
- headers: response.headers,
1953
- status: response.status,
1954
- statusText: response.statusText
1955
- }
1956
- );
1957
- }
1958
2005
  function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
1959
2006
  const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
1960
2007
  if (isSse) {
@@ -1969,9 +2016,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
1969
2016
  }
1970
2017
  accumulator.finish();
1971
2018
  }
1972
- function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2019
+ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
1973
2020
  const reader = stream.getReader();
1974
2021
  let aborted = signal?.aborted ?? false;
2022
+ let completed = false;
1975
2023
  let released = false;
1976
2024
  const onAbort = () => {
1977
2025
  aborted = true;
@@ -2000,6 +2048,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
2000
2048
  }
2001
2049
  released = true;
2002
2050
  signal?.removeEventListener("abort", onAbort);
2051
+ if (!completed) {
2052
+ completed = true;
2053
+ onComplete?.();
2054
+ }
2003
2055
  reader.releaseLock();
2004
2056
  };
2005
2057
  const observeChunk = (chunkBytes) => {
@@ -2109,6 +2161,9 @@ function considerSseLine(line, consider) {
2109
2161
  if (!data || data === "[DONE]") {
2110
2162
  return;
2111
2163
  }
2164
+ if (!data.includes('"usage"')) {
2165
+ return;
2166
+ }
2112
2167
  const parsed = safeJsonParse(data);
2113
2168
  if (parsed !== void 0) {
2114
2169
  consider(parsed);
@@ -2362,7 +2417,7 @@ function anthropicMessagesToResponsesInput(messages) {
2362
2417
  arguments: JSON.stringify(asRecord(part.input)),
2363
2418
  cache_control: anthropicCacheControl(part.cache_control),
2364
2419
  call_id: textValue(part.id) || `call_hoopilot_${fallbackToolCallIndex++}`,
2365
- name: textValue(part.name),
2420
+ name: requiredAnthropicText(part.name, "tool_use name"),
2366
2421
  type: "function_call"
2367
2422
  })
2368
2423
  );
@@ -2508,7 +2563,7 @@ function anthropicTools(tools) {
2508
2563
  return removeUndefined({
2509
2564
  cache_control: anthropicCacheControl(record.cache_control),
2510
2565
  description: record.description,
2511
- name: record.name,
2566
+ name: requiredAnthropicText(record.name, "tool name"),
2512
2567
  parameters: record.input_schema,
2513
2568
  strict: record.strict,
2514
2569
  type: "function"
@@ -2581,12 +2636,19 @@ function anthropicToolChoice(toolChoice) {
2581
2636
  return "none";
2582
2637
  }
2583
2638
  if (type === "tool") {
2584
- return { name: textValue(record.name), type: "function" };
2639
+ return { name: requiredAnthropicText(record.name, "tool_choice name"), type: "function" };
2585
2640
  }
2586
2641
  throw new AnthropicCompatibilityError(
2587
2642
  `Anthropic tool_choice type "${type || "unknown"}" is not supported.`
2588
2643
  );
2589
2644
  }
2645
+ function requiredAnthropicText(value, field) {
2646
+ const text = textValue(value).trim();
2647
+ if (!text) {
2648
+ throw new AnthropicCompatibilityError(`Anthropic ${field} is required.`);
2649
+ }
2650
+ return text;
2651
+ }
2590
2652
  function anthropicThinkingToReasoning(thinking) {
2591
2653
  const record = asRecord(thinking);
2592
2654
  if (Object.keys(record).length === 0) {
@@ -3875,6 +3937,17 @@ function isLoopbackOrigin(origin) {
3875
3937
  }
3876
3938
 
3877
3939
  // src/http/responses.ts
3940
+ var HOP_BY_HOP_HEADERS = [
3941
+ "connection",
3942
+ "keep-alive",
3943
+ "proxy-authenticate",
3944
+ "proxy-authorization",
3945
+ "te",
3946
+ "trailer",
3947
+ "transfer-encoding",
3948
+ "upgrade"
3949
+ ];
3950
+ var STALE_BODY_HEADERS = ["content-encoding", "content-length"];
3878
3951
  function jsonResponse(body, status = 200) {
3879
3952
  return new Response(JSON.stringify(body), {
3880
3953
  headers: {
@@ -3914,9 +3987,7 @@ function responseFromText(source, text) {
3914
3987
  }
3915
3988
  function proxyResponse(upstream) {
3916
3989
  const headers = new Headers(upstream.headers);
3917
- headers.delete("content-encoding");
3918
- headers.delete("content-length");
3919
- headers.delete("transfer-encoding");
3990
+ stripProxyUnsafeHeaders(headers);
3920
3991
  for (const [key, value] of Object.entries(corsHeaders())) {
3921
3992
  headers.set(key, value);
3922
3993
  }
@@ -3942,6 +4013,23 @@ function websocketUnsupportedResponse() {
3942
4013
  response.headers.set("upgrade", "websocket");
3943
4014
  return response;
3944
4015
  }
4016
+ function stripProxyUnsafeHeaders(headers) {
4017
+ const connection = headers.get("connection");
4018
+ if (connection) {
4019
+ for (const name of connection.split(",")) {
4020
+ const trimmed = name.trim();
4021
+ if (trimmed) {
4022
+ headers.delete(trimmed);
4023
+ }
4024
+ }
4025
+ }
4026
+ for (const name of HOP_BY_HOP_HEADERS) {
4027
+ headers.delete(name);
4028
+ }
4029
+ for (const name of STALE_BODY_HEADERS) {
4030
+ headers.delete(name);
4031
+ }
4032
+ }
3945
4033
 
3946
4034
  // src/version.ts
3947
4035
  var BAKED_VERSION = typeof HOOPILOT_VERSION !== "undefined" ? HOOPILOT_VERSION : void 0;
@@ -3991,17 +4079,28 @@ function createHoopilotHandler(options = {}) {
3991
4079
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
3992
4080
  const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
3993
4081
  const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
4082
+ const usageAccountingMode = resolveUsageAccountingMode(options);
4083
+ const accessLog = resolveAccessLog(options);
4084
+ const responseUsage = /* @__PURE__ */ new WeakMap();
4085
+ const markUsage = (response, fallbackModel, cost) => {
4086
+ if (shouldExtractUsage(usageAccountingMode, cost)) {
4087
+ responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
4088
+ }
4089
+ return response;
4090
+ };
3994
4091
  const requestContext = /* @__PURE__ */ new WeakMap();
3995
4092
  const app = buildApp({
3996
4093
  apiKey,
3997
4094
  allowedOrigins,
3998
4095
  bufferProxyBodies,
3999
4096
  client,
4097
+ markUsage,
4000
4098
  metrics,
4001
4099
  readUsage,
4002
4100
  recordExtraction,
4003
4101
  recordTokens,
4004
- requestContext
4102
+ requestContext,
4103
+ usageAccountingMode
4005
4104
  });
4006
4105
  return async (request) => {
4007
4106
  const startedAt = performance.now();
@@ -4037,11 +4136,14 @@ function createHoopilotHandler(options = {}) {
4037
4136
  }
4038
4137
  return finishResponse(response, {
4039
4138
  corsOrigin,
4139
+ accessLog,
4040
4140
  logger: requestLogger,
4041
4141
  method: request.method,
4042
4142
  metrics,
4043
4143
  requestId,
4144
+ signal: request.signal,
4044
4145
  route,
4146
+ usageObservation: responseUsage.get(response),
4045
4147
  startedAt,
4046
4148
  closeConnection: bufferProxyBodies,
4047
4149
  trackStreamingBody: !bufferProxyBodies
@@ -4054,11 +4156,13 @@ function buildApp(deps) {
4054
4156
  allowedOrigins,
4055
4157
  bufferProxyBodies,
4056
4158
  client,
4159
+ markUsage,
4057
4160
  metrics,
4058
4161
  readUsage,
4059
4162
  recordExtraction,
4060
4163
  recordTokens,
4061
- requestContext
4164
+ requestContext,
4165
+ usageAccountingMode
4062
4166
  } = deps;
4063
4167
  const contextFor = (request) => {
4064
4168
  const stored = requestContext.get(request);
@@ -4146,11 +4250,13 @@ function buildApp(deps) {
4146
4250
  ({ request }) => handleAnthropicMessages(
4147
4251
  client,
4148
4252
  metrics,
4253
+ markUsage,
4149
4254
  recordTokens,
4150
4255
  recordExtraction,
4151
4256
  request,
4152
4257
  loggerFor(request),
4153
- bufferProxyBodies
4258
+ bufferProxyBodies,
4259
+ usageAccountingMode
4154
4260
  ),
4155
4261
  noBody
4156
4262
  ).post(
@@ -4162,11 +4268,13 @@ function buildApp(deps) {
4162
4268
  ({ request }) => handleChatCompletions(
4163
4269
  client,
4164
4270
  metrics,
4271
+ markUsage,
4165
4272
  recordTokens,
4166
4273
  recordExtraction,
4167
4274
  request,
4168
4275
  loggerFor(request),
4169
- bufferProxyBodies
4276
+ bufferProxyBodies,
4277
+ usageAccountingMode
4170
4278
  ),
4171
4279
  noBody
4172
4280
  ).post(
@@ -4174,11 +4282,13 @@ function buildApp(deps) {
4174
4282
  ({ request }) => handleCompletions(
4175
4283
  client,
4176
4284
  metrics,
4285
+ markUsage,
4177
4286
  recordTokens,
4178
4287
  recordExtraction,
4179
4288
  request,
4180
4289
  loggerFor(request),
4181
- bufferProxyBodies
4290
+ bufferProxyBodies,
4291
+ usageAccountingMode
4182
4292
  ),
4183
4293
  noBody
4184
4294
  ).post(
@@ -4189,7 +4299,8 @@ function buildApp(deps) {
4189
4299
  recordTokens,
4190
4300
  recordExtraction,
4191
4301
  request,
4192
- loggerFor(request)
4302
+ loggerFor(request),
4303
+ usageAccountingMode
4193
4304
  ),
4194
4305
  noBody
4195
4306
  ).post(
@@ -4197,11 +4308,13 @@ function buildApp(deps) {
4197
4308
  ({ request }) => handleResponses(
4198
4309
  client,
4199
4310
  metrics,
4311
+ markUsage,
4200
4312
  recordTokens,
4201
4313
  recordExtraction,
4202
4314
  request,
4203
4315
  loggerFor(request),
4204
- bufferProxyBodies
4316
+ bufferProxyBodies,
4317
+ usageAccountingMode
4205
4318
  ),
4206
4319
  noBody
4207
4320
  );
@@ -4254,7 +4367,7 @@ function startHoopilotServer(options = {}) {
4254
4367
  url: `http://${urlHost(host)}:${server.port}`
4255
4368
  };
4256
4369
  }
4257
- async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4370
+ async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4258
4371
  const anthropicRequest = await readJson(request);
4259
4372
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
4260
4373
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -4267,36 +4380,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
4267
4380
  if (isStreamingResponse(upstream) && upstream.body) {
4268
4381
  if (bufferProxyBodies) {
4269
4382
  const text = await upstream.text();
4270
- recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
4383
+ recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
4271
4384
  return proxyResponse(
4272
4385
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
4273
4386
  );
4274
4387
  }
4275
- const observed = observeResponseUsage(
4276
- upstream,
4388
+ return markUsage(
4389
+ proxyResponse(
4390
+ new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
4391
+ headers: upstream.headers,
4392
+ status: upstream.status,
4393
+ statusText: upstream.statusText
4394
+ })
4395
+ ),
4277
4396
  model,
4278
- recordTokens,
4279
- request.signal,
4280
- recordExtraction
4281
- );
4282
- if (!observed.body) {
4283
- return proxyResponse(observed);
4284
- }
4285
- return proxyResponse(
4286
- new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
4287
- headers: observed.headers,
4288
- status: observed.status,
4289
- statusText: observed.statusText
4290
- })
4397
+ "body"
4291
4398
  );
4292
4399
  }
4293
4400
  const body = asRecord(await upstream.json());
4294
- const usage = extractTokenUsage(body.usage);
4295
- if (usage) {
4296
- const responseModel = typeof body.model === "string" ? body.model.trim() : "";
4297
- recordTokens(responseModel || model, usage);
4298
- }
4299
- recordExtraction(usage !== void 0);
4401
+ recordParsedUsage(
4402
+ body.usage,
4403
+ typeof body.model === "string" ? body.model.trim() : model,
4404
+ model,
4405
+ usageAccountingMode,
4406
+ recordTokens,
4407
+ recordExtraction
4408
+ );
4300
4409
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
4301
4410
  }
4302
4411
  async function handleAnthropicCountTokens(request) {
@@ -4323,7 +4432,7 @@ async function handleModels(client, metrics, signal, logger) {
4323
4432
  logUpstreamSuccess(logger, "/models", upstream.status);
4324
4433
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
4325
4434
  }
4326
- async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4435
+ async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4327
4436
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
4328
4437
  const upstream = await client.chatCompletions(chatRequest, request.signal);
4329
4438
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -4332,18 +4441,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
4332
4441
  }
4333
4442
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
4334
4443
  const model = normalizeRequestedModel(chatRequest.model);
4335
- return proxyResponse(
4336
- await responseWithObservedUsage(
4337
- upstream,
4338
- model,
4339
- recordTokens,
4340
- request.signal,
4341
- bufferProxyBodies,
4342
- recordExtraction
4343
- )
4444
+ return proxiedResponseWithOptionalUsage(
4445
+ upstream,
4446
+ model,
4447
+ markUsage,
4448
+ usageAccountingMode,
4449
+ recordTokens,
4450
+ recordExtraction,
4451
+ bufferProxyBodies
4344
4452
  );
4345
4453
  }
4346
- async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4454
+ async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4347
4455
  const body = await readJson(request);
4348
4456
  const upstream = await client.chatCompletions(
4349
4457
  completionsRequestToChatCompletion(body),
@@ -4358,34 +4466,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
4358
4466
  if (isStreamingResponse(upstream) && upstream.body) {
4359
4467
  if (bufferProxyBodies) {
4360
4468
  const upstreamText = await upstream.text();
4361
- recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
4469
+ recordBufferedUsage(
4470
+ upstreamText,
4471
+ true,
4472
+ model,
4473
+ usageAccountingMode,
4474
+ recordTokens,
4475
+ recordExtraction
4476
+ );
4362
4477
  const text = completionSseTextFromChatSseText(upstreamText);
4363
4478
  return proxyResponse(responseFromText(upstream, text));
4364
4479
  }
4365
- return proxyResponse(
4366
- observeResponseUsage(
4480
+ return markUsage(
4481
+ proxyResponse(
4367
4482
  new Response(completionStreamFromChatStream(upstream.body), {
4368
4483
  headers: upstream.headers,
4369
4484
  status: upstream.status,
4370
4485
  statusText: upstream.statusText
4371
- }),
4372
- model,
4373
- recordTokens,
4374
- request.signal,
4375
- recordExtraction
4376
- )
4486
+ })
4487
+ ),
4488
+ model,
4489
+ "body"
4377
4490
  );
4378
4491
  }
4379
4492
  const completion = asRecord(await upstream.json());
4380
- const usage = extractTokenUsage(completion.usage);
4381
- if (usage) {
4382
- const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
4383
- recordTokens(responseModel || model, usage);
4384
- }
4385
- recordExtraction(usage !== void 0);
4493
+ recordParsedUsage(
4494
+ completion.usage,
4495
+ typeof completion.model === "string" ? completion.model.trim() : model,
4496
+ model,
4497
+ usageAccountingMode,
4498
+ recordTokens,
4499
+ recordExtraction
4500
+ );
4386
4501
  return jsonResponse(chatCompletionToCompletion(completion));
4387
4502
  }
4388
- async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4503
+ async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4389
4504
  const { json, text: body } = await readJsonText(request);
4390
4505
  if (isResponsesCompactionRequest(json)) {
4391
4506
  return handleResponsesCompactionV2(
@@ -4395,7 +4510,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4395
4510
  recordExtraction,
4396
4511
  json,
4397
4512
  request,
4398
- logger
4513
+ logger,
4514
+ usageAccountingMode
4399
4515
  );
4400
4516
  }
4401
4517
  const upstream = await client.responses(
@@ -4408,18 +4524,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4408
4524
  }
4409
4525
  logUpstreamSuccess(logger, "/responses", upstream.status);
4410
4526
  const model = normalizeRequestedModel(json.model);
4411
- return proxyResponse(
4412
- await responseWithObservedUsage(
4413
- upstream,
4414
- model,
4415
- recordTokens,
4416
- request.signal,
4417
- bufferProxyBodies,
4418
- recordExtraction
4419
- )
4527
+ return proxiedResponseWithOptionalUsage(
4528
+ upstream,
4529
+ model,
4530
+ markUsage,
4531
+ usageAccountingMode,
4532
+ recordTokens,
4533
+ recordExtraction,
4534
+ bufferProxyBodies
4420
4535
  );
4421
4536
  }
4422
- async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
4537
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
4423
4538
  const body = await readJson(request);
4424
4539
  const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
4425
4540
  metrics.recordUpstream("/responses", upstream.ok);
@@ -4429,16 +4544,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
4429
4544
  logUpstreamSuccess(logger, "/responses", upstream.status);
4430
4545
  const isSse = isStreamingResponse(upstream);
4431
4546
  const text = await upstream.text();
4432
- recordResponseTextUsage(
4547
+ recordBufferedUsage(
4433
4548
  text,
4434
4549
  isSse,
4435
4550
  normalizeRequestedModel(body.model),
4551
+ usageAccountingMode,
4436
4552
  recordTokens,
4437
4553
  recordExtraction
4438
4554
  );
4439
4555
  return jsonResponse(responsesCompactionResult(text, isSse));
4440
4556
  }
4441
- async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
4557
+ async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
4442
4558
  const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
4443
4559
  metrics.recordUpstream("/responses", upstream.ok);
4444
4560
  if (!upstream.ok) {
@@ -4448,20 +4564,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
4448
4564
  const isSse = isStreamingResponse(upstream);
4449
4565
  const text = await upstream.text();
4450
4566
  const model = normalizeRequestedModel(json.model);
4451
- recordResponseTextUsage(text, isSse, model, recordTokens, recordExtraction);
4567
+ recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
4452
4568
  if (json.stream === true) {
4453
4569
  return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
4454
4570
  }
4455
4571
  return jsonResponse(responsesCompactionResponse(text, isSse, model));
4456
4572
  }
4457
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
4573
+ async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
4458
4574
  const isSse = isStreamingResponse(response);
4459
- if (bufferBody && response.body) {
4575
+ if (bufferProxyBodies && response.body) {
4460
4576
  const text = await response.text();
4461
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4462
- return responseFromText(response, text);
4577
+ recordBufferedUsage(
4578
+ text,
4579
+ isSse,
4580
+ fallbackModel,
4581
+ usageAccountingMode,
4582
+ recordTokens,
4583
+ recordExtraction
4584
+ );
4585
+ return proxyResponse(responseFromText(response, text));
4463
4586
  }
4464
- return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
4587
+ return markUsage(proxyResponse(response), fallbackModel, "body");
4588
+ }
4589
+ function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4590
+ if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
4591
+ return;
4592
+ }
4593
+ const usage = extractTokenUsage(rawUsage);
4594
+ if (usage) {
4595
+ recordTokens(responseModel || fallbackModel, usage);
4596
+ }
4597
+ recordExtraction(usage !== void 0);
4598
+ }
4599
+ function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4600
+ if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
4601
+ return;
4602
+ }
4603
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4465
4604
  }
4466
4605
  async function proxyError(upstream, logger) {
4467
4606
  const text = await upstream.text();
@@ -4517,7 +4656,24 @@ function shouldBufferProxyBodies(mode) {
4517
4656
  }
4518
4657
  return process.platform === "win32" && IS_STANDALONE_BINARY;
4519
4658
  }
4659
+ function resolveUsageAccountingMode(options) {
4660
+ const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
4661
+ return parseUsageAccountingMode(value);
4662
+ }
4663
+ function resolveAccessLog(options) {
4664
+ return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
4665
+ }
4666
+ function shouldExtractUsage(mode, cost) {
4667
+ if (mode === "off") {
4668
+ return false;
4669
+ }
4670
+ if (mode === "basic") {
4671
+ return cost === "parsed";
4672
+ }
4673
+ return true;
4674
+ }
4520
4675
  function finishResponse(response, options) {
4676
+ const usageObservation = options.usageObservation;
4521
4677
  const withRequestId = responseWithRequestId(
4522
4678
  response,
4523
4679
  options.requestId,
@@ -4526,11 +4682,36 @@ function finishResponse(response, options) {
4526
4682
  );
4527
4683
  const stream = isStreamingResponse(withRequestId);
4528
4684
  const status = withRequestId.status;
4685
+ let completed = false;
4529
4686
  const complete = () => {
4687
+ if (completed) {
4688
+ return;
4689
+ }
4690
+ completed = true;
4530
4691
  const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
4531
4692
  options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
4532
- logRequestCompleted(options.logger, status, stream, durationMs);
4693
+ logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
4533
4694
  };
4695
+ if (withRequestId.body && usageObservation) {
4696
+ const shouldTrackCompletion = stream && options.trackStreamingBody;
4697
+ const observedBody = streamWithUsageObservation(
4698
+ withRequestId.body,
4699
+ stream,
4700
+ usageObservation.fallbackModel,
4701
+ usageObservation.recordTokens,
4702
+ options.signal,
4703
+ usageObservation.recordExtraction,
4704
+ shouldTrackCompletion ? complete : void 0
4705
+ );
4706
+ if (!shouldTrackCompletion) {
4707
+ complete();
4708
+ }
4709
+ return new Response(observedBody, {
4710
+ headers: withRequestId.headers,
4711
+ status,
4712
+ statusText: withRequestId.statusText
4713
+ });
4714
+ }
4534
4715
  if (stream && withRequestId.body && options.trackStreamingBody) {
4535
4716
  return new Response(trackStreamCompletion(withRequestId.body, complete), {
4536
4717
  headers: withRequestId.headers,
@@ -4600,7 +4781,7 @@ function trackStreamCompletion(body, onComplete) {
4600
4781
  }
4601
4782
  });
4602
4783
  }
4603
- function logRequestCompleted(logger, status, stream, durationMs) {
4784
+ function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
4604
4785
  const fields = {
4605
4786
  durationMs,
4606
4787
  event: "http.request.completed",
@@ -4615,6 +4796,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
4615
4796
  logger.warn(fields, "request completed with client error");
4616
4797
  return;
4617
4798
  }
4799
+ if (!accessLog) {
4800
+ return;
4801
+ }
4618
4802
  logger.info(fields, "request completed");
4619
4803
  }
4620
4804
  function requestIdFor(request) {
@@ -4659,11 +4843,17 @@ var API_ROUTES = [
4659
4843
  { method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
4660
4844
  { method: "POST", path: "/v1/responses", name: "responses" }
4661
4845
  ];
4846
+ var ROUTE_NAMES = new Map(
4847
+ API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
4848
+ );
4662
4849
  function routeFor(method, path) {
4663
4850
  if (method === "OPTIONS") {
4664
4851
  return "cors.preflight";
4665
4852
  }
4666
- return API_ROUTES.find((entry) => entry.method === method && entry.path === path)?.name ?? "not_found";
4853
+ return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
4854
+ }
4855
+ function routeKey(method, path) {
4856
+ return `${method} ${path}`;
4667
4857
  }
4668
4858
  function isStreamingResponse(response) {
4669
4859
  return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
@@ -4702,7 +4892,7 @@ function dashboardResponse() {
4702
4892
  }
4703
4893
  async function handleUsage(metrics, readUsage, request) {
4704
4894
  const view = new URL(request.url).searchParams.get("view");
4705
- const { copilot, error } = await readUsage(request.signal);
4895
+ const { copilot, error } = await readUsage();
4706
4896
  const proxy = view === DASHBOARD_USAGE_VIEW ? metrics.snapshot({
4707
4897
  excludeRoutes: DASHBOARD_EXCLUDED_ROUTES,
4708
4898
  excludeUpstreamPaths: DASHBOARD_EXCLUDED_UPSTREAM_PATHS
@@ -4721,12 +4911,24 @@ async function handleUsage(metrics, readUsage, request) {
4721
4911
  function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
4722
4912
  const usagePath = "/copilot_internal/user";
4723
4913
  let cache;
4724
- return async (signal) => {
4914
+ let inFlight;
4915
+ return async () => {
4725
4916
  if (cache && now() - cache.atMs < ttlMs) {
4726
4917
  return cache.result;
4727
4918
  }
4919
+ if (inFlight) {
4920
+ return inFlight;
4921
+ }
4922
+ inFlight = readFreshUsage();
4923
+ try {
4924
+ return await inFlight;
4925
+ } finally {
4926
+ inFlight = void 0;
4927
+ }
4928
+ };
4929
+ async function readFreshUsage() {
4728
4930
  try {
4729
- const upstream = await client.usage(signal);
4931
+ const upstream = await client.usage();
4730
4932
  metrics.recordUpstream(usagePath, upstream.ok);
4731
4933
  metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
4732
4934
  if (!upstream.ok) {
@@ -4748,7 +4950,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
4748
4950
  cache = { atMs: now(), result };
4749
4951
  return result;
4750
4952
  }
4751
- };
4953
+ }
4752
4954
  }
4753
4955
  export {
4754
4956
  COPILOT_USAGE_API_VERSION,