@openhoo/hoopilot 2.1.8 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -9,14 +9,16 @@ import {
9
9
  isTrustedTokenBaseUrl,
10
10
  main,
11
11
  modelIdsFromResponse,
12
+ parseBooleanEnv,
12
13
  parseJsonObject,
13
14
  parseStreamingProxyMode,
15
+ parseUsageAccountingMode,
14
16
  randomId,
15
17
  removeUndefined,
16
18
  safeJsonParse,
17
19
  trimTrailingSlash,
18
20
  truncatedResponseText
19
- } from "./chunk-2GLKVNAA.js";
21
+ } from "./chunk-2GIR4W4A.js";
20
22
 
21
23
  // src/cli.ts
22
24
  import { spawn } from "child_process";
@@ -1600,7 +1602,7 @@ function anthropicMessagesToResponsesInput(messages) {
1600
1602
  arguments: JSON.stringify(asRecord(part.input)),
1601
1603
  cache_control: anthropicCacheControl(part.cache_control),
1602
1604
  call_id: textValue(part.id) || `call_hoopilot_${fallbackToolCallIndex++}`,
1603
- name: textValue(part.name),
1605
+ name: requiredAnthropicText(part.name, "tool_use name"),
1604
1606
  type: "function_call"
1605
1607
  })
1606
1608
  );
@@ -1746,7 +1748,7 @@ function anthropicTools(tools) {
1746
1748
  return removeUndefined({
1747
1749
  cache_control: anthropicCacheControl(record.cache_control),
1748
1750
  description: record.description,
1749
- name: record.name,
1751
+ name: requiredAnthropicText(record.name, "tool name"),
1750
1752
  parameters: record.input_schema,
1751
1753
  strict: record.strict,
1752
1754
  type: "function"
@@ -1819,12 +1821,19 @@ function anthropicToolChoice(toolChoice) {
1819
1821
  return "none";
1820
1822
  }
1821
1823
  if (type === "tool") {
1822
- return { name: textValue(record.name), type: "function" };
1824
+ return { name: requiredAnthropicText(record.name, "tool_choice name"), type: "function" };
1823
1825
  }
1824
1826
  throw new AnthropicCompatibilityError(
1825
1827
  `Anthropic tool_choice type "${type || "unknown"}" is not supported.`
1826
1828
  );
1827
1829
  }
1830
+ function requiredAnthropicText(value, field) {
1831
+ const text = textValue(value).trim();
1832
+ if (!text) {
1833
+ throw new AnthropicCompatibilityError(`Anthropic ${field} is required.`);
1834
+ }
1835
+ return text;
1836
+ }
1828
1837
  function anthropicThinkingToReasoning(thinking) {
1829
1838
  const record = asRecord(thinking);
1830
1839
  if (Object.keys(record).length === 0) {
@@ -3113,6 +3122,17 @@ function isLoopbackOrigin(origin) {
3113
3122
  }
3114
3123
 
3115
3124
  // src/http/responses.ts
3125
+ var HOP_BY_HOP_HEADERS = [
3126
+ "connection",
3127
+ "keep-alive",
3128
+ "proxy-authenticate",
3129
+ "proxy-authorization",
3130
+ "te",
3131
+ "trailer",
3132
+ "transfer-encoding",
3133
+ "upgrade"
3134
+ ];
3135
+ var STALE_BODY_HEADERS = ["content-encoding", "content-length"];
3116
3136
  function jsonResponse(body, status = 200) {
3117
3137
  return new Response(JSON.stringify(body), {
3118
3138
  headers: {
@@ -3152,9 +3172,7 @@ function responseFromText(source, text) {
3152
3172
  }
3153
3173
  function proxyResponse(upstream) {
3154
3174
  const headers = new Headers(upstream.headers);
3155
- headers.delete("content-encoding");
3156
- headers.delete("content-length");
3157
- headers.delete("transfer-encoding");
3175
+ stripProxyUnsafeHeaders(headers);
3158
3176
  for (const [key, value] of Object.entries(corsHeaders())) {
3159
3177
  headers.set(key, value);
3160
3178
  }
@@ -3180,11 +3198,30 @@ function websocketUnsupportedResponse() {
3180
3198
  response.headers.set("upgrade", "websocket");
3181
3199
  return response;
3182
3200
  }
3201
+ function stripProxyUnsafeHeaders(headers) {
3202
+ const connection = headers.get("connection");
3203
+ if (connection) {
3204
+ for (const name of connection.split(",")) {
3205
+ const trimmed = name.trim();
3206
+ if (trimmed) {
3207
+ headers.delete(trimmed);
3208
+ }
3209
+ }
3210
+ }
3211
+ for (const name of HOP_BY_HOP_HEADERS) {
3212
+ headers.delete(name);
3213
+ }
3214
+ for (const name of STALE_BODY_HEADERS) {
3215
+ headers.delete(name);
3216
+ }
3217
+ }
3183
3218
 
3184
3219
  // src/metrics.ts
3185
3220
  var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
3186
3221
  var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
3187
3222
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
3223
+ var PROMETHEUS_CACHE_TTL_MS = 1e3;
3224
+ var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
3188
3225
  var MAX_TRACKED_MODELS = 200;
3189
3226
  var MAX_MODEL_LABEL_LENGTH = 200;
3190
3227
  var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
@@ -3193,6 +3230,9 @@ var UNKNOWN_MODEL = "unknown";
3193
3230
  function emptyModelTotals() {
3194
3231
  return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
3195
3232
  }
3233
+ function isPrometheusCacheNeutralRoute(route) {
3234
+ return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
3235
+ }
3196
3236
  var MetricsRegistry = class {
3197
3237
  #startedAtMs;
3198
3238
  #inFlight = 0;
@@ -3204,11 +3244,16 @@ var MetricsRegistry = class {
3204
3244
  #copilotQuota;
3205
3245
  #githubRateLimit = /* @__PURE__ */ new Map();
3206
3246
  #extraction = { extracted: 0, missing: 0 };
3247
+ #generation = 0;
3248
+ #prometheusCache;
3207
3249
  constructor(options = {}) {
3208
3250
  this.#startedAtMs = (options.now ?? Date.now)();
3209
3251
  }
3210
3252
  /** Mark a request as started; pair with exactly one {@link observe}. */
3211
3253
  startRequest(route) {
3254
+ if (!isPrometheusCacheNeutralRoute(route)) {
3255
+ this.#changed();
3256
+ }
3212
3257
  this.#inFlight += 1;
3213
3258
  if (route) {
3214
3259
  this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
@@ -3216,6 +3261,9 @@ var MetricsRegistry = class {
3216
3261
  }
3217
3262
  /** Record a completed request and clear its in-flight slot. */
3218
3263
  observe(observation) {
3264
+ if (!isPrometheusCacheNeutralRoute(observation.route)) {
3265
+ this.#changed();
3266
+ }
3219
3267
  if (this.#inFlight > 0) {
3220
3268
  this.#inFlight -= 1;
3221
3269
  }
@@ -3236,6 +3284,7 @@ var MetricsRegistry = class {
3236
3284
  * rising miss rate flags clients whose token usage is going unaccounted.
3237
3285
  */
3238
3286
  recordTokenExtraction(extracted) {
3287
+ this.#changed();
3239
3288
  if (extracted) {
3240
3289
  this.#extraction.extracted += 1;
3241
3290
  } else {
@@ -3244,6 +3293,7 @@ var MetricsRegistry = class {
3244
3293
  }
3245
3294
  /** Accumulate token counts for a model from one upstream completion. */
3246
3295
  recordTokens(model, usage) {
3296
+ this.#changed();
3247
3297
  const name = this.#modelLabel(model);
3248
3298
  const totals = this.#tokens.get(name) ?? emptyModelTotals();
3249
3299
  totals.requests += 1;
@@ -3256,11 +3306,13 @@ var MetricsRegistry = class {
3256
3306
  }
3257
3307
  /** Record one upstream Copilot call and whether it succeeded. */
3258
3308
  recordUpstream(path, ok) {
3309
+ this.#changed();
3259
3310
  const key = labelKey(path, ok ? "ok" : "error");
3260
3311
  this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
3261
3312
  }
3262
3313
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
3263
3314
  recordCopilotQuota(usage) {
3315
+ this.#changed();
3264
3316
  this.#copilotQuota = usage;
3265
3317
  }
3266
3318
  /**
@@ -3272,6 +3324,7 @@ var MetricsRegistry = class {
3272
3324
  if (!rateLimit) {
3273
3325
  return;
3274
3326
  }
3327
+ this.#changed();
3275
3328
  const resource = this.#rateLimitResource(rateLimit.resource);
3276
3329
  this.#githubRateLimit.set(resource, { ...rateLimit, resource });
3277
3330
  }
@@ -3308,6 +3361,9 @@ var MetricsRegistry = class {
3308
3361
  }
3309
3362
  this.#durations.set(route, entry);
3310
3363
  }
3364
+ #changed() {
3365
+ this.#generation += 1;
3366
+ }
3311
3367
  /** A JSON-friendly view of the current counters. */
3312
3368
  snapshot(nowOrOptions = Date.now) {
3313
3369
  const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
@@ -3411,13 +3467,18 @@ var MetricsRegistry = class {
3411
3467
  }
3412
3468
  /** Render the Prometheus text exposition format (version 0.0.4). */
3413
3469
  renderPrometheus(now = Date.now) {
3470
+ const nowMs = now();
3471
+ const cached = this.#prometheusCache;
3472
+ if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
3473
+ return cached.text;
3474
+ }
3414
3475
  const lines = [];
3415
3476
  lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
3416
3477
  lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
3417
3478
  lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
3418
3479
  lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
3419
3480
  lines.push("# TYPE hoopilot_uptime_seconds gauge");
3420
- lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
3481
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
3421
3482
  lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
3422
3483
  lines.push("# TYPE hoopilot_requests_in_flight gauge");
3423
3484
  lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
@@ -3483,8 +3544,10 @@ var MetricsRegistry = class {
3483
3544
  }
3484
3545
  this.#renderGithubRateLimit(lines);
3485
3546
  this.#renderCopilotQuota(lines);
3486
- return `${lines.join("\n")}
3547
+ const text = `${lines.join("\n")}
3487
3548
  `;
3549
+ this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
3550
+ return text;
3488
3551
  }
3489
3552
  #renderGithubRateLimit(lines) {
3490
3553
  const entries = [...this.#githubRateLimit.values()];
@@ -3619,21 +3682,6 @@ var MetricsRegistry = class {
3619
3682
  }
3620
3683
  }
3621
3684
  };
3622
- function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
3623
- const body = response.body;
3624
- if (!body) {
3625
- return response;
3626
- }
3627
- const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
3628
- return new Response(
3629
- streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
3630
- {
3631
- headers: response.headers,
3632
- status: response.status,
3633
- statusText: response.statusText
3634
- }
3635
- );
3636
- }
3637
3685
  function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
3638
3686
  const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
3639
3687
  if (isSse) {
@@ -3648,9 +3696,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
3648
3696
  }
3649
3697
  accumulator.finish();
3650
3698
  }
3651
- function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
3699
+ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
3652
3700
  const reader = stream.getReader();
3653
3701
  let aborted = signal?.aborted ?? false;
3702
+ let completed = false;
3654
3703
  let released = false;
3655
3704
  const onAbort = () => {
3656
3705
  aborted = true;
@@ -3679,6 +3728,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
3679
3728
  }
3680
3729
  released = true;
3681
3730
  signal?.removeEventListener("abort", onAbort);
3731
+ if (!completed) {
3732
+ completed = true;
3733
+ onComplete?.();
3734
+ }
3682
3735
  reader.releaseLock();
3683
3736
  };
3684
3737
  const observeChunk = (chunkBytes) => {
@@ -3788,6 +3841,9 @@ function considerSseLine(line, consider) {
3788
3841
  if (!data || data === "[DONE]") {
3789
3842
  return;
3790
3843
  }
3844
+ if (!data.includes('"usage"')) {
3845
+ return;
3846
+ }
3791
3847
  const parsed = safeJsonParse(data);
3792
3848
  if (parsed !== void 0) {
3793
3849
  consider(parsed);
@@ -3917,17 +3973,28 @@ function createHoopilotHandler(options = {}) {
3917
3973
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
3918
3974
  const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
3919
3975
  const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
3976
+ const usageAccountingMode = resolveUsageAccountingMode(options);
3977
+ const accessLog = resolveAccessLog(options);
3978
+ const responseUsage = /* @__PURE__ */ new WeakMap();
3979
+ const markUsage = (response, fallbackModel, cost) => {
3980
+ if (shouldExtractUsage(usageAccountingMode, cost)) {
3981
+ responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
3982
+ }
3983
+ return response;
3984
+ };
3920
3985
  const requestContext = /* @__PURE__ */ new WeakMap();
3921
3986
  const app = buildApp({
3922
3987
  apiKey,
3923
3988
  allowedOrigins,
3924
3989
  bufferProxyBodies,
3925
3990
  client,
3991
+ markUsage,
3926
3992
  metrics,
3927
3993
  readUsage,
3928
3994
  recordExtraction,
3929
3995
  recordTokens,
3930
- requestContext
3996
+ requestContext,
3997
+ usageAccountingMode
3931
3998
  });
3932
3999
  return async (request) => {
3933
4000
  const startedAt = performance.now();
@@ -3963,11 +4030,14 @@ function createHoopilotHandler(options = {}) {
3963
4030
  }
3964
4031
  return finishResponse(response, {
3965
4032
  corsOrigin,
4033
+ accessLog,
3966
4034
  logger: requestLogger,
3967
4035
  method: request.method,
3968
4036
  metrics,
3969
4037
  requestId,
4038
+ signal: request.signal,
3970
4039
  route,
4040
+ usageObservation: responseUsage.get(response),
3971
4041
  startedAt,
3972
4042
  closeConnection: bufferProxyBodies,
3973
4043
  trackStreamingBody: !bufferProxyBodies
@@ -3980,11 +4050,13 @@ function buildApp(deps) {
3980
4050
  allowedOrigins,
3981
4051
  bufferProxyBodies,
3982
4052
  client,
4053
+ markUsage,
3983
4054
  metrics,
3984
4055
  readUsage,
3985
4056
  recordExtraction,
3986
4057
  recordTokens,
3987
- requestContext
4058
+ requestContext,
4059
+ usageAccountingMode
3988
4060
  } = deps;
3989
4061
  const contextFor = (request) => {
3990
4062
  const stored = requestContext.get(request);
@@ -4072,11 +4144,13 @@ function buildApp(deps) {
4072
4144
  ({ request }) => handleAnthropicMessages(
4073
4145
  client,
4074
4146
  metrics,
4147
+ markUsage,
4075
4148
  recordTokens,
4076
4149
  recordExtraction,
4077
4150
  request,
4078
4151
  loggerFor(request),
4079
- bufferProxyBodies
4152
+ bufferProxyBodies,
4153
+ usageAccountingMode
4080
4154
  ),
4081
4155
  noBody
4082
4156
  ).post(
@@ -4088,11 +4162,13 @@ function buildApp(deps) {
4088
4162
  ({ request }) => handleChatCompletions(
4089
4163
  client,
4090
4164
  metrics,
4165
+ markUsage,
4091
4166
  recordTokens,
4092
4167
  recordExtraction,
4093
4168
  request,
4094
4169
  loggerFor(request),
4095
- bufferProxyBodies
4170
+ bufferProxyBodies,
4171
+ usageAccountingMode
4096
4172
  ),
4097
4173
  noBody
4098
4174
  ).post(
@@ -4100,11 +4176,13 @@ function buildApp(deps) {
4100
4176
  ({ request }) => handleCompletions(
4101
4177
  client,
4102
4178
  metrics,
4179
+ markUsage,
4103
4180
  recordTokens,
4104
4181
  recordExtraction,
4105
4182
  request,
4106
4183
  loggerFor(request),
4107
- bufferProxyBodies
4184
+ bufferProxyBodies,
4185
+ usageAccountingMode
4108
4186
  ),
4109
4187
  noBody
4110
4188
  ).post(
@@ -4115,7 +4193,8 @@ function buildApp(deps) {
4115
4193
  recordTokens,
4116
4194
  recordExtraction,
4117
4195
  request,
4118
- loggerFor(request)
4196
+ loggerFor(request),
4197
+ usageAccountingMode
4119
4198
  ),
4120
4199
  noBody
4121
4200
  ).post(
@@ -4123,11 +4202,13 @@ function buildApp(deps) {
4123
4202
  ({ request }) => handleResponses(
4124
4203
  client,
4125
4204
  metrics,
4205
+ markUsage,
4126
4206
  recordTokens,
4127
4207
  recordExtraction,
4128
4208
  request,
4129
4209
  loggerFor(request),
4130
- bufferProxyBodies
4210
+ bufferProxyBodies,
4211
+ usageAccountingMode
4131
4212
  ),
4132
4213
  noBody
4133
4214
  );
@@ -4180,7 +4261,7 @@ function startHoopilotServer(options = {}) {
4180
4261
  url: `http://${urlHost(host)}:${server.port}`
4181
4262
  };
4182
4263
  }
4183
- async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4264
+ async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4184
4265
  const anthropicRequest = await readJson(request);
4185
4266
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
4186
4267
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -4193,36 +4274,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
4193
4274
  if (isStreamingResponse(upstream) && upstream.body) {
4194
4275
  if (bufferProxyBodies) {
4195
4276
  const text = await upstream.text();
4196
- recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
4277
+ recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
4197
4278
  return proxyResponse(
4198
4279
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
4199
4280
  );
4200
4281
  }
4201
- const observed = observeResponseUsage(
4202
- upstream,
4282
+ return markUsage(
4283
+ proxyResponse(
4284
+ new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
4285
+ headers: upstream.headers,
4286
+ status: upstream.status,
4287
+ statusText: upstream.statusText
4288
+ })
4289
+ ),
4203
4290
  model,
4204
- recordTokens,
4205
- request.signal,
4206
- recordExtraction
4207
- );
4208
- if (!observed.body) {
4209
- return proxyResponse(observed);
4210
- }
4211
- return proxyResponse(
4212
- new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
4213
- headers: observed.headers,
4214
- status: observed.status,
4215
- statusText: observed.statusText
4216
- })
4291
+ "body"
4217
4292
  );
4218
4293
  }
4219
4294
  const body = asRecord(await upstream.json());
4220
- const usage = extractTokenUsage(body.usage);
4221
- if (usage) {
4222
- const responseModel = typeof body.model === "string" ? body.model.trim() : "";
4223
- recordTokens(responseModel || model, usage);
4224
- }
4225
- recordExtraction(usage !== void 0);
4295
+ recordParsedUsage(
4296
+ body.usage,
4297
+ typeof body.model === "string" ? body.model.trim() : model,
4298
+ model,
4299
+ usageAccountingMode,
4300
+ recordTokens,
4301
+ recordExtraction
4302
+ );
4226
4303
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
4227
4304
  }
4228
4305
  async function handleAnthropicCountTokens(request) {
@@ -4249,7 +4326,7 @@ async function handleModels(client, metrics, signal, logger) {
4249
4326
  logUpstreamSuccess(logger, "/models", upstream.status);
4250
4327
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
4251
4328
  }
4252
- async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4329
+ async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4253
4330
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
4254
4331
  const upstream = await client.chatCompletions(chatRequest, request.signal);
4255
4332
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -4258,18 +4335,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
4258
4335
  }
4259
4336
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
4260
4337
  const model = normalizeRequestedModel(chatRequest.model);
4261
- return proxyResponse(
4262
- await responseWithObservedUsage(
4263
- upstream,
4264
- model,
4265
- recordTokens,
4266
- request.signal,
4267
- bufferProxyBodies,
4268
- recordExtraction
4269
- )
4338
+ return proxiedResponseWithOptionalUsage(
4339
+ upstream,
4340
+ model,
4341
+ markUsage,
4342
+ usageAccountingMode,
4343
+ recordTokens,
4344
+ recordExtraction,
4345
+ bufferProxyBodies
4270
4346
  );
4271
4347
  }
4272
- async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4348
+ async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4273
4349
  const body = await readJson(request);
4274
4350
  const upstream = await client.chatCompletions(
4275
4351
  completionsRequestToChatCompletion(body),
@@ -4284,34 +4360,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
4284
4360
  if (isStreamingResponse(upstream) && upstream.body) {
4285
4361
  if (bufferProxyBodies) {
4286
4362
  const upstreamText = await upstream.text();
4287
- recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
4363
+ recordBufferedUsage(
4364
+ upstreamText,
4365
+ true,
4366
+ model,
4367
+ usageAccountingMode,
4368
+ recordTokens,
4369
+ recordExtraction
4370
+ );
4288
4371
  const text = completionSseTextFromChatSseText(upstreamText);
4289
4372
  return proxyResponse(responseFromText(upstream, text));
4290
4373
  }
4291
- return proxyResponse(
4292
- observeResponseUsage(
4374
+ return markUsage(
4375
+ proxyResponse(
4293
4376
  new Response(completionStreamFromChatStream(upstream.body), {
4294
4377
  headers: upstream.headers,
4295
4378
  status: upstream.status,
4296
4379
  statusText: upstream.statusText
4297
- }),
4298
- model,
4299
- recordTokens,
4300
- request.signal,
4301
- recordExtraction
4302
- )
4380
+ })
4381
+ ),
4382
+ model,
4383
+ "body"
4303
4384
  );
4304
4385
  }
4305
4386
  const completion = asRecord(await upstream.json());
4306
- const usage = extractTokenUsage(completion.usage);
4307
- if (usage) {
4308
- const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
4309
- recordTokens(responseModel || model, usage);
4310
- }
4311
- recordExtraction(usage !== void 0);
4387
+ recordParsedUsage(
4388
+ completion.usage,
4389
+ typeof completion.model === "string" ? completion.model.trim() : model,
4390
+ model,
4391
+ usageAccountingMode,
4392
+ recordTokens,
4393
+ recordExtraction
4394
+ );
4312
4395
  return jsonResponse(chatCompletionToCompletion(completion));
4313
4396
  }
4314
- async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4397
+ async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4315
4398
  const { json, text: body } = await readJsonText(request);
4316
4399
  if (isResponsesCompactionRequest(json)) {
4317
4400
  return handleResponsesCompactionV2(
@@ -4321,7 +4404,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4321
4404
  recordExtraction,
4322
4405
  json,
4323
4406
  request,
4324
- logger
4407
+ logger,
4408
+ usageAccountingMode
4325
4409
  );
4326
4410
  }
4327
4411
  const upstream = await client.responses(
@@ -4334,18 +4418,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4334
4418
  }
4335
4419
  logUpstreamSuccess(logger, "/responses", upstream.status);
4336
4420
  const model = normalizeRequestedModel(json.model);
4337
- return proxyResponse(
4338
- await responseWithObservedUsage(
4339
- upstream,
4340
- model,
4341
- recordTokens,
4342
- request.signal,
4343
- bufferProxyBodies,
4344
- recordExtraction
4345
- )
4421
+ return proxiedResponseWithOptionalUsage(
4422
+ upstream,
4423
+ model,
4424
+ markUsage,
4425
+ usageAccountingMode,
4426
+ recordTokens,
4427
+ recordExtraction,
4428
+ bufferProxyBodies
4346
4429
  );
4347
4430
  }
4348
- async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
4431
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
4349
4432
  const body = await readJson(request);
4350
4433
  const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
4351
4434
  metrics.recordUpstream("/responses", upstream.ok);
@@ -4355,16 +4438,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
4355
4438
  logUpstreamSuccess(logger, "/responses", upstream.status);
4356
4439
  const isSse = isStreamingResponse(upstream);
4357
4440
  const text = await upstream.text();
4358
- recordResponseTextUsage(
4441
+ recordBufferedUsage(
4359
4442
  text,
4360
4443
  isSse,
4361
4444
  normalizeRequestedModel(body.model),
4445
+ usageAccountingMode,
4362
4446
  recordTokens,
4363
4447
  recordExtraction
4364
4448
  );
4365
4449
  return jsonResponse(responsesCompactionResult(text, isSse));
4366
4450
  }
4367
- async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
4451
+ async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
4368
4452
  const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
4369
4453
  metrics.recordUpstream("/responses", upstream.ok);
4370
4454
  if (!upstream.ok) {
@@ -4374,20 +4458,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
4374
4458
  const isSse = isStreamingResponse(upstream);
4375
4459
  const text = await upstream.text();
4376
4460
  const model = normalizeRequestedModel(json.model);
4377
- recordResponseTextUsage(text, isSse, model, recordTokens, recordExtraction);
4461
+ recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
4378
4462
  if (json.stream === true) {
4379
4463
  return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
4380
4464
  }
4381
4465
  return jsonResponse(responsesCompactionResponse(text, isSse, model));
4382
4466
  }
4383
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
4467
+ async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
4384
4468
  const isSse = isStreamingResponse(response);
4385
- if (bufferBody && response.body) {
4469
+ if (bufferProxyBodies && response.body) {
4386
4470
  const text = await response.text();
4387
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4388
- return responseFromText(response, text);
4471
+ recordBufferedUsage(
4472
+ text,
4473
+ isSse,
4474
+ fallbackModel,
4475
+ usageAccountingMode,
4476
+ recordTokens,
4477
+ recordExtraction
4478
+ );
4479
+ return proxyResponse(responseFromText(response, text));
4480
+ }
4481
+ return markUsage(proxyResponse(response), fallbackModel, "body");
4482
+ }
4483
+ function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4484
+ if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
4485
+ return;
4486
+ }
4487
+ const usage = extractTokenUsage(rawUsage);
4488
+ if (usage) {
4489
+ recordTokens(responseModel || fallbackModel, usage);
4490
+ }
4491
+ recordExtraction(usage !== void 0);
4492
+ }
4493
+ function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4494
+ if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
4495
+ return;
4389
4496
  }
4390
- return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
4497
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4391
4498
  }
4392
4499
  async function proxyError(upstream, logger) {
4393
4500
  const text = await upstream.text();
@@ -4443,7 +4550,24 @@ function shouldBufferProxyBodies(mode) {
4443
4550
  }
4444
4551
  return process.platform === "win32" && IS_STANDALONE_BINARY;
4445
4552
  }
4553
+ function resolveUsageAccountingMode(options) {
4554
+ const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
4555
+ return parseUsageAccountingMode(value);
4556
+ }
4557
+ function resolveAccessLog(options) {
4558
+ return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
4559
+ }
4560
+ function shouldExtractUsage(mode, cost) {
4561
+ if (mode === "off") {
4562
+ return false;
4563
+ }
4564
+ if (mode === "basic") {
4565
+ return cost === "parsed";
4566
+ }
4567
+ return true;
4568
+ }
4446
4569
  function finishResponse(response, options) {
4570
+ const usageObservation = options.usageObservation;
4447
4571
  const withRequestId = responseWithRequestId(
4448
4572
  response,
4449
4573
  options.requestId,
@@ -4452,11 +4576,36 @@ function finishResponse(response, options) {
4452
4576
  );
4453
4577
  const stream = isStreamingResponse(withRequestId);
4454
4578
  const status = withRequestId.status;
4579
+ let completed = false;
4455
4580
  const complete = () => {
4581
+ if (completed) {
4582
+ return;
4583
+ }
4584
+ completed = true;
4456
4585
  const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
4457
4586
  options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
4458
- logRequestCompleted(options.logger, status, stream, durationMs);
4587
+ logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
4459
4588
  };
4589
+ if (withRequestId.body && usageObservation) {
4590
+ const shouldTrackCompletion = stream && options.trackStreamingBody;
4591
+ const observedBody = streamWithUsageObservation(
4592
+ withRequestId.body,
4593
+ stream,
4594
+ usageObservation.fallbackModel,
4595
+ usageObservation.recordTokens,
4596
+ options.signal,
4597
+ usageObservation.recordExtraction,
4598
+ shouldTrackCompletion ? complete : void 0
4599
+ );
4600
+ if (!shouldTrackCompletion) {
4601
+ complete();
4602
+ }
4603
+ return new Response(observedBody, {
4604
+ headers: withRequestId.headers,
4605
+ status,
4606
+ statusText: withRequestId.statusText
4607
+ });
4608
+ }
4460
4609
  if (stream && withRequestId.body && options.trackStreamingBody) {
4461
4610
  return new Response(trackStreamCompletion(withRequestId.body, complete), {
4462
4611
  headers: withRequestId.headers,
@@ -4526,7 +4675,7 @@ function trackStreamCompletion(body, onComplete) {
4526
4675
  }
4527
4676
  });
4528
4677
  }
4529
- function logRequestCompleted(logger, status, stream, durationMs) {
4678
+ function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
4530
4679
  const fields = {
4531
4680
  durationMs,
4532
4681
  event: "http.request.completed",
@@ -4541,6 +4690,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
4541
4690
  logger.warn(fields, "request completed with client error");
4542
4691
  return;
4543
4692
  }
4693
+ if (!accessLog) {
4694
+ return;
4695
+ }
4544
4696
  logger.info(fields, "request completed");
4545
4697
  }
4546
4698
  function requestIdFor(request) {
@@ -4585,11 +4737,17 @@ var API_ROUTES = [
4585
4737
  { method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
4586
4738
  { method: "POST", path: "/v1/responses", name: "responses" }
4587
4739
  ];
4740
+ var ROUTE_NAMES = new Map(
4741
+ API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
4742
+ );
4588
4743
  function routeFor(method, path) {
4589
4744
  if (method === "OPTIONS") {
4590
4745
  return "cors.preflight";
4591
4746
  }
4592
- return API_ROUTES.find((entry) => entry.method === method && entry.path === path)?.name ?? "not_found";
4747
+ return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
4748
+ }
4749
+ function routeKey(method, path) {
4750
+ return `${method} ${path}`;
4593
4751
  }
4594
4752
  function isStreamingResponse(response) {
4595
4753
  return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
@@ -4628,7 +4786,7 @@ function dashboardResponse() {
4628
4786
  }
4629
4787
  async function handleUsage(metrics, readUsage, request) {
4630
4788
  const view = new URL(request.url).searchParams.get("view");
4631
- const { copilot, error } = await readUsage(request.signal);
4789
+ const { copilot, error } = await readUsage();
4632
4790
  const proxy = view === DASHBOARD_USAGE_VIEW ? metrics.snapshot({
4633
4791
  excludeRoutes: DASHBOARD_EXCLUDED_ROUTES,
4634
4792
  excludeUpstreamPaths: DASHBOARD_EXCLUDED_UPSTREAM_PATHS
@@ -4647,12 +4805,24 @@ async function handleUsage(metrics, readUsage, request) {
4647
4805
  function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
4648
4806
  const usagePath = "/copilot_internal/user";
4649
4807
  let cache;
4650
- return async (signal) => {
4808
+ let inFlight;
4809
+ return async () => {
4651
4810
  if (cache && now() - cache.atMs < ttlMs) {
4652
4811
  return cache.result;
4653
4812
  }
4813
+ if (inFlight) {
4814
+ return inFlight;
4815
+ }
4816
+ inFlight = readFreshUsage();
4817
+ try {
4818
+ return await inFlight;
4819
+ } finally {
4820
+ inFlight = void 0;
4821
+ }
4822
+ };
4823
+ async function readFreshUsage() {
4654
4824
  try {
4655
- const upstream = await client.usage(signal);
4825
+ const upstream = await client.usage();
4656
4826
  metrics.recordUpstream(usagePath, upstream.ok);
4657
4827
  metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
4658
4828
  if (!upstream.ok) {
@@ -4674,7 +4844,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
4674
4844
  cache = { atMs: now(), result };
4675
4845
  return result;
4676
4846
  }
4677
- };
4847
+ }
4678
4848
  }
4679
4849
 
4680
4850
  // src/update.ts
@@ -5346,6 +5516,14 @@ function parseArgs(argv) {
5346
5516
  args.allowUnauthenticated = true;
5347
5517
  continue;
5348
5518
  }
5519
+ if (arg === "--access-log") {
5520
+ args.accessLog = true;
5521
+ continue;
5522
+ }
5523
+ if (arg === "--no-access-log") {
5524
+ args.accessLog = false;
5525
+ continue;
5526
+ }
5349
5527
  if (arg === "--no-update-check") {
5350
5528
  args.noUpdateCheck = true;
5351
5529
  continue;
@@ -5380,6 +5558,9 @@ function parseArgs(argv) {
5380
5558
  case "--stream-mode":
5381
5559
  args.streamingProxyMode = parseStreamingProxyMode(optionValue(name, inlineValue, rest));
5382
5560
  break;
5561
+ case "--usage-accounting":
5562
+ args.usageAccountingMode = parseUsageAccountingMode(optionValue(name, inlineValue, rest));
5563
+ break;
5383
5564
  case "--host":
5384
5565
  args.host = optionValue(name, inlineValue, rest);
5385
5566
  break;
@@ -5684,6 +5865,9 @@ Options:
5684
5865
  --log-level <level> trace, debug, info, warn, error, fatal, or silent
5685
5866
  --log-format <format> json or pretty. Default: pretty
5686
5867
  --stream-mode <mode> auto, live, or buffer. Auto buffers Windows standalone streams.
5868
+ --usage-accounting <mode> basic, full, or off. Default: basic
5869
+ --access-log Log successful requests
5870
+ --no-access-log Do not log successful requests. Default
5687
5871
  --no-update-check Do not check GitHub for a newer release
5688
5872
  --allow-unauthenticated Allow non-loopback bind without --api-key
5689
5873
  -h, --help Show help
@@ -5697,6 +5881,8 @@ Environment:
5697
5881
  HOOPILOT_LOG_FORMAT json or pretty. Default: pretty
5698
5882
  HOOPILOT_LOG_LEVEL trace, debug, info, warn, error, fatal, or silent
5699
5883
  HOOPILOT_STREAM_MODE auto, live, or buffer
5884
+ HOOPILOT_USAGE_ACCOUNTING basic, full, or off
5885
+ HOOPILOT_ACCESS_LOG 1/0, true/false, yes/no, or on/off
5700
5886
  COPILOT_API_BASE_URL
5701
5887
  HOOPILOT_GITHUB_API_BASE_URL GitHub REST base for the usage/quota lookup. Default: https://api.github.com
5702
5888
  HOOPILOT_ALLOW_UNSAFE_UPSTREAM Set to 1 to allow nonstandard HTTPS token hosts