@openhoo/hoopilot 2.1.8 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -89,12 +89,38 @@ var STREAMING_PROXY_MODES = [
89
89
  "buffer",
90
90
  "live"
91
91
  ];
92
+ var USAGE_ACCOUNTING_MODES = [
93
+ "basic",
94
+ "full",
95
+ "off"
96
+ ];
92
97
  function parseStreamingProxyMode(value) {
93
98
  if (STREAMING_PROXY_MODES.includes(value)) {
94
99
  return value;
95
100
  }
96
101
  throw new Error(`Invalid stream mode: ${value}. Expected ${STREAMING_PROXY_MODES.join(", ")}.`);
97
102
  }
103
+ function parseUsageAccountingMode(value) {
104
+ if (USAGE_ACCOUNTING_MODES.includes(value)) {
105
+ return value;
106
+ }
107
+ throw new Error(
108
+ `Invalid usage accounting mode: ${value}. Expected ${USAGE_ACCOUNTING_MODES.join(", ")}.`
109
+ );
110
+ }
111
+ function parseBooleanEnv(value, name) {
112
+ const raw = envValue(value)?.toLowerCase();
113
+ if (raw === void 0) {
114
+ return void 0;
115
+ }
116
+ if (raw === "1" || raw === "true" || raw === "yes" || raw === "on") {
117
+ return true;
118
+ }
119
+ if (raw === "0" || raw === "false" || raw === "no" || raw === "off") {
120
+ return false;
121
+ }
122
+ throw new Error(`${name} must be one of: 1, 0, true, false, yes, no, on, off.`);
123
+ }
98
124
 
99
125
  // src/auth-store.ts
100
126
  var StoredCopilotAuthError = class extends Error {
@@ -1506,6 +1532,8 @@ function epochSeconds() {
1506
1532
  var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
1507
1533
  var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
1508
1534
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
1535
+ var PROMETHEUS_CACHE_TTL_MS = 1e3;
1536
+ var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
1509
1537
  var MAX_TRACKED_MODELS = 200;
1510
1538
  var MAX_MODEL_LABEL_LENGTH = 200;
1511
1539
  var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
@@ -1514,6 +1542,9 @@ var UNKNOWN_MODEL = "unknown";
1514
1542
  function emptyModelTotals() {
1515
1543
  return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
1516
1544
  }
1545
+ function isPrometheusCacheNeutralRoute(route) {
1546
+ return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
1547
+ }
1517
1548
  var MetricsRegistry = class {
1518
1549
  #startedAtMs;
1519
1550
  #inFlight = 0;
@@ -1525,11 +1556,16 @@ var MetricsRegistry = class {
1525
1556
  #copilotQuota;
1526
1557
  #githubRateLimit = /* @__PURE__ */ new Map();
1527
1558
  #extraction = { extracted: 0, missing: 0 };
1559
+ #generation = 0;
1560
+ #prometheusCache;
1528
1561
  constructor(options = {}) {
1529
1562
  this.#startedAtMs = (options.now ?? Date.now)();
1530
1563
  }
1531
1564
  /** Mark a request as started; pair with exactly one {@link observe}. */
1532
1565
  startRequest(route) {
1566
+ if (!isPrometheusCacheNeutralRoute(route)) {
1567
+ this.#changed();
1568
+ }
1533
1569
  this.#inFlight += 1;
1534
1570
  if (route) {
1535
1571
  this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
@@ -1537,6 +1573,9 @@ var MetricsRegistry = class {
1537
1573
  }
1538
1574
  /** Record a completed request and clear its in-flight slot. */
1539
1575
  observe(observation) {
1576
+ if (!isPrometheusCacheNeutralRoute(observation.route)) {
1577
+ this.#changed();
1578
+ }
1540
1579
  if (this.#inFlight > 0) {
1541
1580
  this.#inFlight -= 1;
1542
1581
  }
@@ -1557,6 +1596,7 @@ var MetricsRegistry = class {
1557
1596
  * rising miss rate flags clients whose token usage is going unaccounted.
1558
1597
  */
1559
1598
  recordTokenExtraction(extracted) {
1599
+ this.#changed();
1560
1600
  if (extracted) {
1561
1601
  this.#extraction.extracted += 1;
1562
1602
  } else {
@@ -1565,6 +1605,7 @@ var MetricsRegistry = class {
1565
1605
  }
1566
1606
  /** Accumulate token counts for a model from one upstream completion. */
1567
1607
  recordTokens(model, usage) {
1608
+ this.#changed();
1568
1609
  const name = this.#modelLabel(model);
1569
1610
  const totals = this.#tokens.get(name) ?? emptyModelTotals();
1570
1611
  totals.requests += 1;
@@ -1577,11 +1618,13 @@ var MetricsRegistry = class {
1577
1618
  }
1578
1619
  /** Record one upstream Copilot call and whether it succeeded. */
1579
1620
  recordUpstream(path, ok) {
1621
+ this.#changed();
1580
1622
  const key = labelKey(path, ok ? "ok" : "error");
1581
1623
  this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
1582
1624
  }
1583
1625
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
1584
1626
  recordCopilotQuota(usage) {
1627
+ this.#changed();
1585
1628
  this.#copilotQuota = usage;
1586
1629
  }
1587
1630
  /**
@@ -1593,6 +1636,7 @@ var MetricsRegistry = class {
1593
1636
  if (!rateLimit) {
1594
1637
  return;
1595
1638
  }
1639
+ this.#changed();
1596
1640
  const resource = this.#rateLimitResource(rateLimit.resource);
1597
1641
  this.#githubRateLimit.set(resource, { ...rateLimit, resource });
1598
1642
  }
@@ -1629,6 +1673,9 @@ var MetricsRegistry = class {
1629
1673
  }
1630
1674
  this.#durations.set(route, entry);
1631
1675
  }
1676
+ #changed() {
1677
+ this.#generation += 1;
1678
+ }
1632
1679
  /** A JSON-friendly view of the current counters. */
1633
1680
  snapshot(nowOrOptions = Date.now) {
1634
1681
  const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
@@ -1732,13 +1779,18 @@ var MetricsRegistry = class {
1732
1779
  }
1733
1780
  /** Render the Prometheus text exposition format (version 0.0.4). */
1734
1781
  renderPrometheus(now = Date.now) {
1782
+ const nowMs = now();
1783
+ const cached = this.#prometheusCache;
1784
+ if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
1785
+ return cached.text;
1786
+ }
1735
1787
  const lines = [];
1736
1788
  lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
1737
1789
  lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
1738
1790
  lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
1739
1791
  lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
1740
1792
  lines.push("# TYPE hoopilot_uptime_seconds gauge");
1741
- lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
1793
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
1742
1794
  lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
1743
1795
  lines.push("# TYPE hoopilot_requests_in_flight gauge");
1744
1796
  lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
@@ -1804,8 +1856,10 @@ var MetricsRegistry = class {
1804
1856
  }
1805
1857
  this.#renderGithubRateLimit(lines);
1806
1858
  this.#renderCopilotQuota(lines);
1807
- return `${lines.join("\n")}
1859
+ const text = `${lines.join("\n")}
1808
1860
  `;
1861
+ this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
1862
+ return text;
1809
1863
  }
1810
1864
  #renderGithubRateLimit(lines) {
1811
1865
  const entries = [...this.#githubRateLimit.values()];
@@ -1940,21 +1994,6 @@ var MetricsRegistry = class {
1940
1994
  }
1941
1995
  }
1942
1996
  };
1943
- function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
1944
- const body = response.body;
1945
- if (!body) {
1946
- return response;
1947
- }
1948
- const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
1949
- return new Response(
1950
- streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
1951
- {
1952
- headers: response.headers,
1953
- status: response.status,
1954
- statusText: response.statusText
1955
- }
1956
- );
1957
- }
1958
1997
  function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
1959
1998
  const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
1960
1999
  if (isSse) {
@@ -1969,9 +2008,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
1969
2008
  }
1970
2009
  accumulator.finish();
1971
2010
  }
1972
- function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
2011
+ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
1973
2012
  const reader = stream.getReader();
1974
2013
  let aborted = signal?.aborted ?? false;
2014
+ let completed = false;
1975
2015
  let released = false;
1976
2016
  const onAbort = () => {
1977
2017
  aborted = true;
@@ -2000,6 +2040,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
2000
2040
  }
2001
2041
  released = true;
2002
2042
  signal?.removeEventListener("abort", onAbort);
2043
+ if (!completed) {
2044
+ completed = true;
2045
+ onComplete?.();
2046
+ }
2003
2047
  reader.releaseLock();
2004
2048
  };
2005
2049
  const observeChunk = (chunkBytes) => {
@@ -2109,6 +2153,9 @@ function considerSseLine(line, consider) {
2109
2153
  if (!data || data === "[DONE]") {
2110
2154
  return;
2111
2155
  }
2156
+ if (!data.includes('"usage"')) {
2157
+ return;
2158
+ }
2112
2159
  const parsed = safeJsonParse(data);
2113
2160
  if (parsed !== void 0) {
2114
2161
  consider(parsed);
@@ -3991,17 +4038,28 @@ function createHoopilotHandler(options = {}) {
3991
4038
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
3992
4039
  const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
3993
4040
  const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
4041
+ const usageAccountingMode = resolveUsageAccountingMode(options);
4042
+ const accessLog = resolveAccessLog(options);
4043
+ const responseUsage = /* @__PURE__ */ new WeakMap();
4044
+ const markUsage = (response, fallbackModel, cost) => {
4045
+ if (shouldExtractUsage(usageAccountingMode, cost)) {
4046
+ responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
4047
+ }
4048
+ return response;
4049
+ };
3994
4050
  const requestContext = /* @__PURE__ */ new WeakMap();
3995
4051
  const app = buildApp({
3996
4052
  apiKey,
3997
4053
  allowedOrigins,
3998
4054
  bufferProxyBodies,
3999
4055
  client,
4056
+ markUsage,
4000
4057
  metrics,
4001
4058
  readUsage,
4002
4059
  recordExtraction,
4003
4060
  recordTokens,
4004
- requestContext
4061
+ requestContext,
4062
+ usageAccountingMode
4005
4063
  });
4006
4064
  return async (request) => {
4007
4065
  const startedAt = performance.now();
@@ -4037,11 +4095,14 @@ function createHoopilotHandler(options = {}) {
4037
4095
  }
4038
4096
  return finishResponse(response, {
4039
4097
  corsOrigin,
4098
+ accessLog,
4040
4099
  logger: requestLogger,
4041
4100
  method: request.method,
4042
4101
  metrics,
4043
4102
  requestId,
4103
+ signal: request.signal,
4044
4104
  route,
4105
+ usageObservation: responseUsage.get(response),
4045
4106
  startedAt,
4046
4107
  closeConnection: bufferProxyBodies,
4047
4108
  trackStreamingBody: !bufferProxyBodies
@@ -4054,11 +4115,13 @@ function buildApp(deps) {
4054
4115
  allowedOrigins,
4055
4116
  bufferProxyBodies,
4056
4117
  client,
4118
+ markUsage,
4057
4119
  metrics,
4058
4120
  readUsage,
4059
4121
  recordExtraction,
4060
4122
  recordTokens,
4061
- requestContext
4123
+ requestContext,
4124
+ usageAccountingMode
4062
4125
  } = deps;
4063
4126
  const contextFor = (request) => {
4064
4127
  const stored = requestContext.get(request);
@@ -4146,11 +4209,13 @@ function buildApp(deps) {
4146
4209
  ({ request }) => handleAnthropicMessages(
4147
4210
  client,
4148
4211
  metrics,
4212
+ markUsage,
4149
4213
  recordTokens,
4150
4214
  recordExtraction,
4151
4215
  request,
4152
4216
  loggerFor(request),
4153
- bufferProxyBodies
4217
+ bufferProxyBodies,
4218
+ usageAccountingMode
4154
4219
  ),
4155
4220
  noBody
4156
4221
  ).post(
@@ -4162,11 +4227,13 @@ function buildApp(deps) {
4162
4227
  ({ request }) => handleChatCompletions(
4163
4228
  client,
4164
4229
  metrics,
4230
+ markUsage,
4165
4231
  recordTokens,
4166
4232
  recordExtraction,
4167
4233
  request,
4168
4234
  loggerFor(request),
4169
- bufferProxyBodies
4235
+ bufferProxyBodies,
4236
+ usageAccountingMode
4170
4237
  ),
4171
4238
  noBody
4172
4239
  ).post(
@@ -4174,11 +4241,13 @@ function buildApp(deps) {
4174
4241
  ({ request }) => handleCompletions(
4175
4242
  client,
4176
4243
  metrics,
4244
+ markUsage,
4177
4245
  recordTokens,
4178
4246
  recordExtraction,
4179
4247
  request,
4180
4248
  loggerFor(request),
4181
- bufferProxyBodies
4249
+ bufferProxyBodies,
4250
+ usageAccountingMode
4182
4251
  ),
4183
4252
  noBody
4184
4253
  ).post(
@@ -4189,7 +4258,8 @@ function buildApp(deps) {
4189
4258
  recordTokens,
4190
4259
  recordExtraction,
4191
4260
  request,
4192
- loggerFor(request)
4261
+ loggerFor(request),
4262
+ usageAccountingMode
4193
4263
  ),
4194
4264
  noBody
4195
4265
  ).post(
@@ -4197,11 +4267,13 @@ function buildApp(deps) {
4197
4267
  ({ request }) => handleResponses(
4198
4268
  client,
4199
4269
  metrics,
4270
+ markUsage,
4200
4271
  recordTokens,
4201
4272
  recordExtraction,
4202
4273
  request,
4203
4274
  loggerFor(request),
4204
- bufferProxyBodies
4275
+ bufferProxyBodies,
4276
+ usageAccountingMode
4205
4277
  ),
4206
4278
  noBody
4207
4279
  );
@@ -4254,7 +4326,7 @@ function startHoopilotServer(options = {}) {
4254
4326
  url: `http://${urlHost(host)}:${server.port}`
4255
4327
  };
4256
4328
  }
4257
- async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4329
+ async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4258
4330
  const anthropicRequest = await readJson(request);
4259
4331
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
4260
4332
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -4267,36 +4339,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
4267
4339
  if (isStreamingResponse(upstream) && upstream.body) {
4268
4340
  if (bufferProxyBodies) {
4269
4341
  const text = await upstream.text();
4270
- recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
4342
+ recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
4271
4343
  return proxyResponse(
4272
4344
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
4273
4345
  );
4274
4346
  }
4275
- const observed = observeResponseUsage(
4276
- upstream,
4347
+ return markUsage(
4348
+ proxyResponse(
4349
+ new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
4350
+ headers: upstream.headers,
4351
+ status: upstream.status,
4352
+ statusText: upstream.statusText
4353
+ })
4354
+ ),
4277
4355
  model,
4278
- recordTokens,
4279
- request.signal,
4280
- recordExtraction
4281
- );
4282
- if (!observed.body) {
4283
- return proxyResponse(observed);
4284
- }
4285
- return proxyResponse(
4286
- new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
4287
- headers: observed.headers,
4288
- status: observed.status,
4289
- statusText: observed.statusText
4290
- })
4356
+ "body"
4291
4357
  );
4292
4358
  }
4293
4359
  const body = asRecord(await upstream.json());
4294
- const usage = extractTokenUsage(body.usage);
4295
- if (usage) {
4296
- const responseModel = typeof body.model === "string" ? body.model.trim() : "";
4297
- recordTokens(responseModel || model, usage);
4298
- }
4299
- recordExtraction(usage !== void 0);
4360
+ recordParsedUsage(
4361
+ body.usage,
4362
+ typeof body.model === "string" ? body.model.trim() : model,
4363
+ model,
4364
+ usageAccountingMode,
4365
+ recordTokens,
4366
+ recordExtraction
4367
+ );
4300
4368
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
4301
4369
  }
4302
4370
  async function handleAnthropicCountTokens(request) {
@@ -4323,7 +4391,7 @@ async function handleModels(client, metrics, signal, logger) {
4323
4391
  logUpstreamSuccess(logger, "/models", upstream.status);
4324
4392
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
4325
4393
  }
4326
- async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4394
+ async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4327
4395
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
4328
4396
  const upstream = await client.chatCompletions(chatRequest, request.signal);
4329
4397
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -4332,18 +4400,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
4332
4400
  }
4333
4401
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
4334
4402
  const model = normalizeRequestedModel(chatRequest.model);
4335
- return proxyResponse(
4336
- await responseWithObservedUsage(
4337
- upstream,
4338
- model,
4339
- recordTokens,
4340
- request.signal,
4341
- bufferProxyBodies,
4342
- recordExtraction
4343
- )
4403
+ return proxiedResponseWithOptionalUsage(
4404
+ upstream,
4405
+ model,
4406
+ markUsage,
4407
+ usageAccountingMode,
4408
+ recordTokens,
4409
+ recordExtraction,
4410
+ bufferProxyBodies
4344
4411
  );
4345
4412
  }
4346
- async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4413
+ async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4347
4414
  const body = await readJson(request);
4348
4415
  const upstream = await client.chatCompletions(
4349
4416
  completionsRequestToChatCompletion(body),
@@ -4358,34 +4425,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
4358
4425
  if (isStreamingResponse(upstream) && upstream.body) {
4359
4426
  if (bufferProxyBodies) {
4360
4427
  const upstreamText = await upstream.text();
4361
- recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
4428
+ recordBufferedUsage(
4429
+ upstreamText,
4430
+ true,
4431
+ model,
4432
+ usageAccountingMode,
4433
+ recordTokens,
4434
+ recordExtraction
4435
+ );
4362
4436
  const text = completionSseTextFromChatSseText(upstreamText);
4363
4437
  return proxyResponse(responseFromText(upstream, text));
4364
4438
  }
4365
- return proxyResponse(
4366
- observeResponseUsage(
4439
+ return markUsage(
4440
+ proxyResponse(
4367
4441
  new Response(completionStreamFromChatStream(upstream.body), {
4368
4442
  headers: upstream.headers,
4369
4443
  status: upstream.status,
4370
4444
  statusText: upstream.statusText
4371
- }),
4372
- model,
4373
- recordTokens,
4374
- request.signal,
4375
- recordExtraction
4376
- )
4445
+ })
4446
+ ),
4447
+ model,
4448
+ "body"
4377
4449
  );
4378
4450
  }
4379
4451
  const completion = asRecord(await upstream.json());
4380
- const usage = extractTokenUsage(completion.usage);
4381
- if (usage) {
4382
- const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
4383
- recordTokens(responseModel || model, usage);
4384
- }
4385
- recordExtraction(usage !== void 0);
4452
+ recordParsedUsage(
4453
+ completion.usage,
4454
+ typeof completion.model === "string" ? completion.model.trim() : model,
4455
+ model,
4456
+ usageAccountingMode,
4457
+ recordTokens,
4458
+ recordExtraction
4459
+ );
4386
4460
  return jsonResponse(chatCompletionToCompletion(completion));
4387
4461
  }
4388
- async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4462
+ async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4389
4463
  const { json, text: body } = await readJsonText(request);
4390
4464
  if (isResponsesCompactionRequest(json)) {
4391
4465
  return handleResponsesCompactionV2(
@@ -4395,7 +4469,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4395
4469
  recordExtraction,
4396
4470
  json,
4397
4471
  request,
4398
- logger
4472
+ logger,
4473
+ usageAccountingMode
4399
4474
  );
4400
4475
  }
4401
4476
  const upstream = await client.responses(
@@ -4408,18 +4483,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4408
4483
  }
4409
4484
  logUpstreamSuccess(logger, "/responses", upstream.status);
4410
4485
  const model = normalizeRequestedModel(json.model);
4411
- return proxyResponse(
4412
- await responseWithObservedUsage(
4413
- upstream,
4414
- model,
4415
- recordTokens,
4416
- request.signal,
4417
- bufferProxyBodies,
4418
- recordExtraction
4419
- )
4486
+ return proxiedResponseWithOptionalUsage(
4487
+ upstream,
4488
+ model,
4489
+ markUsage,
4490
+ usageAccountingMode,
4491
+ recordTokens,
4492
+ recordExtraction,
4493
+ bufferProxyBodies
4420
4494
  );
4421
4495
  }
4422
- async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
4496
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
4423
4497
  const body = await readJson(request);
4424
4498
  const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
4425
4499
  metrics.recordUpstream("/responses", upstream.ok);
@@ -4429,16 +4503,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
4429
4503
  logUpstreamSuccess(logger, "/responses", upstream.status);
4430
4504
  const isSse = isStreamingResponse(upstream);
4431
4505
  const text = await upstream.text();
4432
- recordResponseTextUsage(
4506
+ recordBufferedUsage(
4433
4507
  text,
4434
4508
  isSse,
4435
4509
  normalizeRequestedModel(body.model),
4510
+ usageAccountingMode,
4436
4511
  recordTokens,
4437
4512
  recordExtraction
4438
4513
  );
4439
4514
  return jsonResponse(responsesCompactionResult(text, isSse));
4440
4515
  }
4441
- async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
4516
+ async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
4442
4517
  const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
4443
4518
  metrics.recordUpstream("/responses", upstream.ok);
4444
4519
  if (!upstream.ok) {
@@ -4448,20 +4523,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
4448
4523
  const isSse = isStreamingResponse(upstream);
4449
4524
  const text = await upstream.text();
4450
4525
  const model = normalizeRequestedModel(json.model);
4451
- recordResponseTextUsage(text, isSse, model, recordTokens, recordExtraction);
4526
+ recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
4452
4527
  if (json.stream === true) {
4453
4528
  return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
4454
4529
  }
4455
4530
  return jsonResponse(responsesCompactionResponse(text, isSse, model));
4456
4531
  }
4457
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
4532
+ async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
4458
4533
  const isSse = isStreamingResponse(response);
4459
- if (bufferBody && response.body) {
4534
+ if (bufferProxyBodies && response.body) {
4460
4535
  const text = await response.text();
4461
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4462
- return responseFromText(response, text);
4536
+ recordBufferedUsage(
4537
+ text,
4538
+ isSse,
4539
+ fallbackModel,
4540
+ usageAccountingMode,
4541
+ recordTokens,
4542
+ recordExtraction
4543
+ );
4544
+ return proxyResponse(responseFromText(response, text));
4545
+ }
4546
+ return markUsage(proxyResponse(response), fallbackModel, "body");
4547
+ }
4548
+ function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4549
+ if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
4550
+ return;
4551
+ }
4552
+ const usage = extractTokenUsage(rawUsage);
4553
+ if (usage) {
4554
+ recordTokens(responseModel || fallbackModel, usage);
4555
+ }
4556
+ recordExtraction(usage !== void 0);
4557
+ }
4558
+ function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4559
+ if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
4560
+ return;
4463
4561
  }
4464
- return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
4562
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4465
4563
  }
4466
4564
  async function proxyError(upstream, logger) {
4467
4565
  const text = await upstream.text();
@@ -4517,7 +4615,24 @@ function shouldBufferProxyBodies(mode) {
4517
4615
  }
4518
4616
  return process.platform === "win32" && IS_STANDALONE_BINARY;
4519
4617
  }
4618
+ function resolveUsageAccountingMode(options) {
4619
+ const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
4620
+ return parseUsageAccountingMode(value);
4621
+ }
4622
+ function resolveAccessLog(options) {
4623
+ return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
4624
+ }
4625
+ function shouldExtractUsage(mode, cost) {
4626
+ if (mode === "off") {
4627
+ return false;
4628
+ }
4629
+ if (mode === "basic") {
4630
+ return cost === "parsed";
4631
+ }
4632
+ return true;
4633
+ }
4520
4634
  function finishResponse(response, options) {
4635
+ const usageObservation = options.usageObservation;
4521
4636
  const withRequestId = responseWithRequestId(
4522
4637
  response,
4523
4638
  options.requestId,
@@ -4526,11 +4641,36 @@ function finishResponse(response, options) {
4526
4641
  );
4527
4642
  const stream = isStreamingResponse(withRequestId);
4528
4643
  const status = withRequestId.status;
4644
+ let completed = false;
4529
4645
  const complete = () => {
4646
+ if (completed) {
4647
+ return;
4648
+ }
4649
+ completed = true;
4530
4650
  const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
4531
4651
  options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
4532
- logRequestCompleted(options.logger, status, stream, durationMs);
4652
+ logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
4533
4653
  };
4654
+ if (withRequestId.body && usageObservation) {
4655
+ const shouldTrackCompletion = stream && options.trackStreamingBody;
4656
+ const observedBody = streamWithUsageObservation(
4657
+ withRequestId.body,
4658
+ stream,
4659
+ usageObservation.fallbackModel,
4660
+ usageObservation.recordTokens,
4661
+ options.signal,
4662
+ usageObservation.recordExtraction,
4663
+ shouldTrackCompletion ? complete : void 0
4664
+ );
4665
+ if (!shouldTrackCompletion) {
4666
+ complete();
4667
+ }
4668
+ return new Response(observedBody, {
4669
+ headers: withRequestId.headers,
4670
+ status,
4671
+ statusText: withRequestId.statusText
4672
+ });
4673
+ }
4534
4674
  if (stream && withRequestId.body && options.trackStreamingBody) {
4535
4675
  return new Response(trackStreamCompletion(withRequestId.body, complete), {
4536
4676
  headers: withRequestId.headers,
@@ -4600,7 +4740,7 @@ function trackStreamCompletion(body, onComplete) {
4600
4740
  }
4601
4741
  });
4602
4742
  }
4603
- function logRequestCompleted(logger, status, stream, durationMs) {
4743
+ function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
4604
4744
  const fields = {
4605
4745
  durationMs,
4606
4746
  event: "http.request.completed",
@@ -4615,6 +4755,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
4615
4755
  logger.warn(fields, "request completed with client error");
4616
4756
  return;
4617
4757
  }
4758
+ if (!accessLog) {
4759
+ return;
4760
+ }
4618
4761
  logger.info(fields, "request completed");
4619
4762
  }
4620
4763
  function requestIdFor(request) {
@@ -4659,11 +4802,17 @@ var API_ROUTES = [
4659
4802
  { method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
4660
4803
  { method: "POST", path: "/v1/responses", name: "responses" }
4661
4804
  ];
4805
+ var ROUTE_NAMES = new Map(
4806
+ API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
4807
+ );
4662
4808
  function routeFor(method, path) {
4663
4809
  if (method === "OPTIONS") {
4664
4810
  return "cors.preflight";
4665
4811
  }
4666
- return API_ROUTES.find((entry) => entry.method === method && entry.path === path)?.name ?? "not_found";
4812
+ return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
4813
+ }
4814
+ function routeKey(method, path) {
4815
+ return `${method} ${path}`;
4667
4816
  }
4668
4817
  function isStreamingResponse(response) {
4669
4818
  return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
@@ -4721,12 +4870,24 @@ async function handleUsage(metrics, readUsage, request) {
4721
4870
  function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
4722
4871
  const usagePath = "/copilot_internal/user";
4723
4872
  let cache;
4724
- return async (signal) => {
4873
+ let inFlight;
4874
+ return async () => {
4725
4875
  if (cache && now() - cache.atMs < ttlMs) {
4726
4876
  return cache.result;
4727
4877
  }
4878
+ if (inFlight) {
4879
+ return inFlight;
4880
+ }
4881
+ inFlight = readFreshUsage();
4728
4882
  try {
4729
- const upstream = await client.usage(signal);
4883
+ return await inFlight;
4884
+ } finally {
4885
+ inFlight = void 0;
4886
+ }
4887
+ };
4888
+ async function readFreshUsage() {
4889
+ try {
4890
+ const upstream = await client.usage();
4730
4891
  metrics.recordUpstream(usagePath, upstream.ok);
4731
4892
  metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
4732
4893
  if (!upstream.ok) {
@@ -4748,7 +4909,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
4748
4909
  cache = { atMs: now(), result };
4749
4910
  return result;
4750
4911
  }
4751
- };
4912
+ }
4752
4913
  }
4753
4914
  export {
4754
4915
  COPILOT_USAGE_API_VERSION,