@openhoo/hoopilot 2.1.7 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -9,14 +9,16 @@ import {
9
9
  isTrustedTokenBaseUrl,
10
10
  main,
11
11
  modelIdsFromResponse,
12
+ parseBooleanEnv,
12
13
  parseJsonObject,
13
14
  parseStreamingProxyMode,
15
+ parseUsageAccountingMode,
14
16
  randomId,
15
17
  removeUndefined,
16
18
  safeJsonParse,
17
19
  trimTrailingSlash,
18
20
  truncatedResponseText
19
- } from "./chunk-2GLKVNAA.js";
21
+ } from "./chunk-FH6WSFOC.js";
20
22
 
21
23
  // src/cli.ts
22
24
  import { spawn } from "child_process";
@@ -667,6 +669,32 @@ var DEFAULT_LOG_FORMAT = "pretty";
667
669
  var DEFAULT_LOG_LEVEL = "info";
668
670
  var LOG_FORMATS = ["json", "pretty"];
669
671
  var LOG_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal", "silent"];
672
+ var PRETTY_INLINE_FIELDS = [
673
+ "component",
674
+ "command",
675
+ "event",
676
+ "method",
677
+ "path",
678
+ "status",
679
+ "durationMs",
680
+ "stream",
681
+ "route",
682
+ "requestId",
683
+ "upstreamPath",
684
+ "upstreamStatus",
685
+ "url",
686
+ "baseUrl",
687
+ "origin",
688
+ "currentVersion",
689
+ "installKind",
690
+ "latestVersion",
691
+ "assetName",
692
+ "count",
693
+ "plan",
694
+ "apiBaseUrl",
695
+ "authStorePath"
696
+ ];
697
+ var PRETTY_IGNORED_FIELDS = ["pid", "hostname", "service", ...PRETTY_INLINE_FIELDS];
670
698
  var REDACT_PATHS = [
671
699
  "apiKey",
672
700
  "authorization",
@@ -730,9 +758,11 @@ function createHoopilotLogger(options = {}) {
730
758
  // stream's TTY-ness is unknown, so default to no color there.
731
759
  colorize: options.colorize ?? (options.stream ? false : process.stdout.isTTY),
732
760
  destination: options.stream ?? 1,
733
- ignore: "pid,hostname",
761
+ ignore: PRETTY_IGNORED_FIELDS.join(","),
762
+ levelFirst: true,
763
+ messageFormat: formatPrettyMessage,
734
764
  singleLine: true,
735
- translateTime: "SYS:standard"
765
+ translateTime: "SYS:HH:MM:ss"
736
766
  })
737
767
  )
738
768
  );
@@ -778,6 +808,45 @@ function errorDetails(error) {
778
808
  }
779
809
  return { message: String(error) };
780
810
  }
811
+ function formatPrettyMessage(log, messageKey) {
812
+ const message = formatPrettyLogMessage(log[messageKey]);
813
+ const fields = PRETTY_INLINE_FIELDS.flatMap((field) => {
814
+ const value = log[field];
815
+ if (value === void 0) {
816
+ return [];
817
+ }
818
+ return `${prettyFieldLabel(field)}=${formatPrettyFieldValue(field, value)}`;
819
+ });
820
+ return fields.length > 0 ? `${message} ${fields.join(" ")}` : message;
821
+ }
822
+ function formatPrettyLogMessage(value) {
823
+ return typeof value === "string" ? value : formatPrettyValue(value);
824
+ }
825
+ function prettyFieldLabel(field) {
826
+ return field === "durationMs" ? "duration" : field;
827
+ }
828
+ function formatPrettyFieldValue(field, value) {
829
+ const formatted = formatPrettyValue(value);
830
+ return field === "durationMs" && typeof value === "number" ? `${formatted}ms` : formatted;
831
+ }
832
+ function formatPrettyValue(value) {
833
+ if (typeof value === "number") {
834
+ return Number.isFinite(value) ? String(value) : JSON.stringify(value);
835
+ }
836
+ if (typeof value === "boolean") {
837
+ return String(value);
838
+ }
839
+ if (typeof value === "string") {
840
+ return isBarePrettyValue(value) ? value : JSON.stringify(value);
841
+ }
842
+ if (value === null) {
843
+ return "null";
844
+ }
845
+ return JSON.stringify(value) ?? String(value);
846
+ }
847
+ function isBarePrettyValue(value) {
848
+ return /^[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]+$/.test(value);
849
+ }
781
850
  function isLogFormat(value) {
782
851
  return LOG_FORMATS.includes(value);
783
852
  }
@@ -3118,6 +3187,8 @@ function websocketUnsupportedResponse() {
3118
3187
  var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
3119
3188
  var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
3120
3189
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
3190
+ var PROMETHEUS_CACHE_TTL_MS = 1e3;
3191
+ var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
3121
3192
  var MAX_TRACKED_MODELS = 200;
3122
3193
  var MAX_MODEL_LABEL_LENGTH = 200;
3123
3194
  var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
@@ -3126,6 +3197,9 @@ var UNKNOWN_MODEL = "unknown";
3126
3197
  function emptyModelTotals() {
3127
3198
  return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
3128
3199
  }
3200
+ function isPrometheusCacheNeutralRoute(route) {
3201
+ return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
3202
+ }
3129
3203
  var MetricsRegistry = class {
3130
3204
  #startedAtMs;
3131
3205
  #inFlight = 0;
@@ -3137,11 +3211,16 @@ var MetricsRegistry = class {
3137
3211
  #copilotQuota;
3138
3212
  #githubRateLimit = /* @__PURE__ */ new Map();
3139
3213
  #extraction = { extracted: 0, missing: 0 };
3214
+ #generation = 0;
3215
+ #prometheusCache;
3140
3216
  constructor(options = {}) {
3141
3217
  this.#startedAtMs = (options.now ?? Date.now)();
3142
3218
  }
3143
3219
  /** Mark a request as started; pair with exactly one {@link observe}. */
3144
3220
  startRequest(route) {
3221
+ if (!isPrometheusCacheNeutralRoute(route)) {
3222
+ this.#changed();
3223
+ }
3145
3224
  this.#inFlight += 1;
3146
3225
  if (route) {
3147
3226
  this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
@@ -3149,6 +3228,9 @@ var MetricsRegistry = class {
3149
3228
  }
3150
3229
  /** Record a completed request and clear its in-flight slot. */
3151
3230
  observe(observation) {
3231
+ if (!isPrometheusCacheNeutralRoute(observation.route)) {
3232
+ this.#changed();
3233
+ }
3152
3234
  if (this.#inFlight > 0) {
3153
3235
  this.#inFlight -= 1;
3154
3236
  }
@@ -3169,6 +3251,7 @@ var MetricsRegistry = class {
3169
3251
  * rising miss rate flags clients whose token usage is going unaccounted.
3170
3252
  */
3171
3253
  recordTokenExtraction(extracted) {
3254
+ this.#changed();
3172
3255
  if (extracted) {
3173
3256
  this.#extraction.extracted += 1;
3174
3257
  } else {
@@ -3177,6 +3260,7 @@ var MetricsRegistry = class {
3177
3260
  }
3178
3261
  /** Accumulate token counts for a model from one upstream completion. */
3179
3262
  recordTokens(model, usage) {
3263
+ this.#changed();
3180
3264
  const name = this.#modelLabel(model);
3181
3265
  const totals = this.#tokens.get(name) ?? emptyModelTotals();
3182
3266
  totals.requests += 1;
@@ -3189,11 +3273,13 @@ var MetricsRegistry = class {
3189
3273
  }
3190
3274
  /** Record one upstream Copilot call and whether it succeeded. */
3191
3275
  recordUpstream(path, ok) {
3276
+ this.#changed();
3192
3277
  const key = labelKey(path, ok ? "ok" : "error");
3193
3278
  this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
3194
3279
  }
3195
3280
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
3196
3281
  recordCopilotQuota(usage) {
3282
+ this.#changed();
3197
3283
  this.#copilotQuota = usage;
3198
3284
  }
3199
3285
  /**
@@ -3205,6 +3291,7 @@ var MetricsRegistry = class {
3205
3291
  if (!rateLimit) {
3206
3292
  return;
3207
3293
  }
3294
+ this.#changed();
3208
3295
  const resource = this.#rateLimitResource(rateLimit.resource);
3209
3296
  this.#githubRateLimit.set(resource, { ...rateLimit, resource });
3210
3297
  }
@@ -3241,6 +3328,9 @@ var MetricsRegistry = class {
3241
3328
  }
3242
3329
  this.#durations.set(route, entry);
3243
3330
  }
3331
+ #changed() {
3332
+ this.#generation += 1;
3333
+ }
3244
3334
  /** A JSON-friendly view of the current counters. */
3245
3335
  snapshot(nowOrOptions = Date.now) {
3246
3336
  const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
@@ -3344,13 +3434,18 @@ var MetricsRegistry = class {
3344
3434
  }
3345
3435
  /** Render the Prometheus text exposition format (version 0.0.4). */
3346
3436
  renderPrometheus(now = Date.now) {
3437
+ const nowMs = now();
3438
+ const cached = this.#prometheusCache;
3439
+ if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
3440
+ return cached.text;
3441
+ }
3347
3442
  const lines = [];
3348
3443
  lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
3349
3444
  lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
3350
3445
  lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
3351
3446
  lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
3352
3447
  lines.push("# TYPE hoopilot_uptime_seconds gauge");
3353
- lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
3448
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
3354
3449
  lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
3355
3450
  lines.push("# TYPE hoopilot_requests_in_flight gauge");
3356
3451
  lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
@@ -3416,8 +3511,10 @@ var MetricsRegistry = class {
3416
3511
  }
3417
3512
  this.#renderGithubRateLimit(lines);
3418
3513
  this.#renderCopilotQuota(lines);
3419
- return `${lines.join("\n")}
3514
+ const text = `${lines.join("\n")}
3420
3515
  `;
3516
+ this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
3517
+ return text;
3421
3518
  }
3422
3519
  #renderGithubRateLimit(lines) {
3423
3520
  const entries = [...this.#githubRateLimit.values()];
@@ -3552,21 +3649,6 @@ var MetricsRegistry = class {
3552
3649
  }
3553
3650
  }
3554
3651
  };
3555
- function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
3556
- const body = response.body;
3557
- if (!body) {
3558
- return response;
3559
- }
3560
- const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
3561
- return new Response(
3562
- streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
3563
- {
3564
- headers: response.headers,
3565
- status: response.status,
3566
- statusText: response.statusText
3567
- }
3568
- );
3569
- }
3570
3652
  function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
3571
3653
  const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
3572
3654
  if (isSse) {
@@ -3581,9 +3663,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
3581
3663
  }
3582
3664
  accumulator.finish();
3583
3665
  }
3584
- function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
3666
+ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
3585
3667
  const reader = stream.getReader();
3586
3668
  let aborted = signal?.aborted ?? false;
3669
+ let completed = false;
3587
3670
  let released = false;
3588
3671
  const onAbort = () => {
3589
3672
  aborted = true;
@@ -3612,6 +3695,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
3612
3695
  }
3613
3696
  released = true;
3614
3697
  signal?.removeEventListener("abort", onAbort);
3698
+ if (!completed) {
3699
+ completed = true;
3700
+ onComplete?.();
3701
+ }
3615
3702
  reader.releaseLock();
3616
3703
  };
3617
3704
  const observeChunk = (chunkBytes) => {
@@ -3721,6 +3808,9 @@ function considerSseLine(line, consider) {
3721
3808
  if (!data || data === "[DONE]") {
3722
3809
  return;
3723
3810
  }
3811
+ if (!data.includes('"usage"')) {
3812
+ return;
3813
+ }
3724
3814
  const parsed = safeJsonParse(data);
3725
3815
  if (parsed !== void 0) {
3726
3816
  consider(parsed);
@@ -3850,17 +3940,28 @@ function createHoopilotHandler(options = {}) {
3850
3940
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
3851
3941
  const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
3852
3942
  const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
3943
+ const usageAccountingMode = resolveUsageAccountingMode(options);
3944
+ const accessLog = resolveAccessLog(options);
3945
+ const responseUsage = /* @__PURE__ */ new WeakMap();
3946
+ const markUsage = (response, fallbackModel, cost) => {
3947
+ if (shouldExtractUsage(usageAccountingMode, cost)) {
3948
+ responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
3949
+ }
3950
+ return response;
3951
+ };
3853
3952
  const requestContext = /* @__PURE__ */ new WeakMap();
3854
3953
  const app = buildApp({
3855
3954
  apiKey,
3856
3955
  allowedOrigins,
3857
3956
  bufferProxyBodies,
3858
3957
  client,
3958
+ markUsage,
3859
3959
  metrics,
3860
3960
  readUsage,
3861
3961
  recordExtraction,
3862
3962
  recordTokens,
3863
- requestContext
3963
+ requestContext,
3964
+ usageAccountingMode
3864
3965
  });
3865
3966
  return async (request) => {
3866
3967
  const startedAt = performance.now();
@@ -3896,11 +3997,14 @@ function createHoopilotHandler(options = {}) {
3896
3997
  }
3897
3998
  return finishResponse(response, {
3898
3999
  corsOrigin,
4000
+ accessLog,
3899
4001
  logger: requestLogger,
3900
4002
  method: request.method,
3901
4003
  metrics,
3902
4004
  requestId,
4005
+ signal: request.signal,
3903
4006
  route,
4007
+ usageObservation: responseUsage.get(response),
3904
4008
  startedAt,
3905
4009
  closeConnection: bufferProxyBodies,
3906
4010
  trackStreamingBody: !bufferProxyBodies
@@ -3913,11 +4017,13 @@ function buildApp(deps) {
3913
4017
  allowedOrigins,
3914
4018
  bufferProxyBodies,
3915
4019
  client,
4020
+ markUsage,
3916
4021
  metrics,
3917
4022
  readUsage,
3918
4023
  recordExtraction,
3919
4024
  recordTokens,
3920
- requestContext
4025
+ requestContext,
4026
+ usageAccountingMode
3921
4027
  } = deps;
3922
4028
  const contextFor = (request) => {
3923
4029
  const stored = requestContext.get(request);
@@ -4005,11 +4111,13 @@ function buildApp(deps) {
4005
4111
  ({ request }) => handleAnthropicMessages(
4006
4112
  client,
4007
4113
  metrics,
4114
+ markUsage,
4008
4115
  recordTokens,
4009
4116
  recordExtraction,
4010
4117
  request,
4011
4118
  loggerFor(request),
4012
- bufferProxyBodies
4119
+ bufferProxyBodies,
4120
+ usageAccountingMode
4013
4121
  ),
4014
4122
  noBody
4015
4123
  ).post(
@@ -4021,11 +4129,13 @@ function buildApp(deps) {
4021
4129
  ({ request }) => handleChatCompletions(
4022
4130
  client,
4023
4131
  metrics,
4132
+ markUsage,
4024
4133
  recordTokens,
4025
4134
  recordExtraction,
4026
4135
  request,
4027
4136
  loggerFor(request),
4028
- bufferProxyBodies
4137
+ bufferProxyBodies,
4138
+ usageAccountingMode
4029
4139
  ),
4030
4140
  noBody
4031
4141
  ).post(
@@ -4033,11 +4143,13 @@ function buildApp(deps) {
4033
4143
  ({ request }) => handleCompletions(
4034
4144
  client,
4035
4145
  metrics,
4146
+ markUsage,
4036
4147
  recordTokens,
4037
4148
  recordExtraction,
4038
4149
  request,
4039
4150
  loggerFor(request),
4040
- bufferProxyBodies
4151
+ bufferProxyBodies,
4152
+ usageAccountingMode
4041
4153
  ),
4042
4154
  noBody
4043
4155
  ).post(
@@ -4048,7 +4160,8 @@ function buildApp(deps) {
4048
4160
  recordTokens,
4049
4161
  recordExtraction,
4050
4162
  request,
4051
- loggerFor(request)
4163
+ loggerFor(request),
4164
+ usageAccountingMode
4052
4165
  ),
4053
4166
  noBody
4054
4167
  ).post(
@@ -4056,11 +4169,13 @@ function buildApp(deps) {
4056
4169
  ({ request }) => handleResponses(
4057
4170
  client,
4058
4171
  metrics,
4172
+ markUsage,
4059
4173
  recordTokens,
4060
4174
  recordExtraction,
4061
4175
  request,
4062
4176
  loggerFor(request),
4063
- bufferProxyBodies
4177
+ bufferProxyBodies,
4178
+ usageAccountingMode
4064
4179
  ),
4065
4180
  noBody
4066
4181
  );
@@ -4113,7 +4228,7 @@ function startHoopilotServer(options = {}) {
4113
4228
  url: `http://${urlHost(host)}:${server.port}`
4114
4229
  };
4115
4230
  }
4116
- async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4231
+ async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4117
4232
  const anthropicRequest = await readJson(request);
4118
4233
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
4119
4234
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -4126,36 +4241,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
4126
4241
  if (isStreamingResponse(upstream) && upstream.body) {
4127
4242
  if (bufferProxyBodies) {
4128
4243
  const text = await upstream.text();
4129
- recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
4244
+ recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
4130
4245
  return proxyResponse(
4131
4246
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
4132
4247
  );
4133
4248
  }
4134
- const observed = observeResponseUsage(
4135
- upstream,
4249
+ return markUsage(
4250
+ proxyResponse(
4251
+ new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
4252
+ headers: upstream.headers,
4253
+ status: upstream.status,
4254
+ statusText: upstream.statusText
4255
+ })
4256
+ ),
4136
4257
  model,
4137
- recordTokens,
4138
- request.signal,
4139
- recordExtraction
4140
- );
4141
- if (!observed.body) {
4142
- return proxyResponse(observed);
4143
- }
4144
- return proxyResponse(
4145
- new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
4146
- headers: observed.headers,
4147
- status: observed.status,
4148
- statusText: observed.statusText
4149
- })
4258
+ "body"
4150
4259
  );
4151
4260
  }
4152
4261
  const body = asRecord(await upstream.json());
4153
- const usage = extractTokenUsage(body.usage);
4154
- if (usage) {
4155
- const responseModel = typeof body.model === "string" ? body.model.trim() : "";
4156
- recordTokens(responseModel || model, usage);
4157
- }
4158
- recordExtraction(usage !== void 0);
4262
+ recordParsedUsage(
4263
+ body.usage,
4264
+ typeof body.model === "string" ? body.model.trim() : model,
4265
+ model,
4266
+ usageAccountingMode,
4267
+ recordTokens,
4268
+ recordExtraction
4269
+ );
4159
4270
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
4160
4271
  }
4161
4272
  async function handleAnthropicCountTokens(request) {
@@ -4182,7 +4293,7 @@ async function handleModels(client, metrics, signal, logger) {
4182
4293
  logUpstreamSuccess(logger, "/models", upstream.status);
4183
4294
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
4184
4295
  }
4185
- async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4296
+ async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4186
4297
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
4187
4298
  const upstream = await client.chatCompletions(chatRequest, request.signal);
4188
4299
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -4191,18 +4302,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
4191
4302
  }
4192
4303
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
4193
4304
  const model = normalizeRequestedModel(chatRequest.model);
4194
- return proxyResponse(
4195
- await responseWithObservedUsage(
4196
- upstream,
4197
- model,
4198
- recordTokens,
4199
- request.signal,
4200
- bufferProxyBodies,
4201
- recordExtraction
4202
- )
4305
+ return proxiedResponseWithOptionalUsage(
4306
+ upstream,
4307
+ model,
4308
+ markUsage,
4309
+ usageAccountingMode,
4310
+ recordTokens,
4311
+ recordExtraction,
4312
+ bufferProxyBodies
4203
4313
  );
4204
4314
  }
4205
- async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4315
+ async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4206
4316
  const body = await readJson(request);
4207
4317
  const upstream = await client.chatCompletions(
4208
4318
  completionsRequestToChatCompletion(body),
@@ -4217,34 +4327,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
4217
4327
  if (isStreamingResponse(upstream) && upstream.body) {
4218
4328
  if (bufferProxyBodies) {
4219
4329
  const upstreamText = await upstream.text();
4220
- recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
4330
+ recordBufferedUsage(
4331
+ upstreamText,
4332
+ true,
4333
+ model,
4334
+ usageAccountingMode,
4335
+ recordTokens,
4336
+ recordExtraction
4337
+ );
4221
4338
  const text = completionSseTextFromChatSseText(upstreamText);
4222
4339
  return proxyResponse(responseFromText(upstream, text));
4223
4340
  }
4224
- return proxyResponse(
4225
- observeResponseUsage(
4341
+ return markUsage(
4342
+ proxyResponse(
4226
4343
  new Response(completionStreamFromChatStream(upstream.body), {
4227
4344
  headers: upstream.headers,
4228
4345
  status: upstream.status,
4229
4346
  statusText: upstream.statusText
4230
- }),
4231
- model,
4232
- recordTokens,
4233
- request.signal,
4234
- recordExtraction
4235
- )
4347
+ })
4348
+ ),
4349
+ model,
4350
+ "body"
4236
4351
  );
4237
4352
  }
4238
4353
  const completion = asRecord(await upstream.json());
4239
- const usage = extractTokenUsage(completion.usage);
4240
- if (usage) {
4241
- const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
4242
- recordTokens(responseModel || model, usage);
4243
- }
4244
- recordExtraction(usage !== void 0);
4354
+ recordParsedUsage(
4355
+ completion.usage,
4356
+ typeof completion.model === "string" ? completion.model.trim() : model,
4357
+ model,
4358
+ usageAccountingMode,
4359
+ recordTokens,
4360
+ recordExtraction
4361
+ );
4245
4362
  return jsonResponse(chatCompletionToCompletion(completion));
4246
4363
  }
4247
- async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4364
+ async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4248
4365
  const { json, text: body } = await readJsonText(request);
4249
4366
  if (isResponsesCompactionRequest(json)) {
4250
4367
  return handleResponsesCompactionV2(
@@ -4254,7 +4371,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4254
4371
  recordExtraction,
4255
4372
  json,
4256
4373
  request,
4257
- logger
4374
+ logger,
4375
+ usageAccountingMode
4258
4376
  );
4259
4377
  }
4260
4378
  const upstream = await client.responses(
@@ -4267,18 +4385,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4267
4385
  }
4268
4386
  logUpstreamSuccess(logger, "/responses", upstream.status);
4269
4387
  const model = normalizeRequestedModel(json.model);
4270
- return proxyResponse(
4271
- await responseWithObservedUsage(
4272
- upstream,
4273
- model,
4274
- recordTokens,
4275
- request.signal,
4276
- bufferProxyBodies,
4277
- recordExtraction
4278
- )
4388
+ return proxiedResponseWithOptionalUsage(
4389
+ upstream,
4390
+ model,
4391
+ markUsage,
4392
+ usageAccountingMode,
4393
+ recordTokens,
4394
+ recordExtraction,
4395
+ bufferProxyBodies
4279
4396
  );
4280
4397
  }
4281
- async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
4398
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
4282
4399
  const body = await readJson(request);
4283
4400
  const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
4284
4401
  metrics.recordUpstream("/responses", upstream.ok);
@@ -4288,16 +4405,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
4288
4405
  logUpstreamSuccess(logger, "/responses", upstream.status);
4289
4406
  const isSse = isStreamingResponse(upstream);
4290
4407
  const text = await upstream.text();
4291
- recordResponseTextUsage(
4408
+ recordBufferedUsage(
4292
4409
  text,
4293
4410
  isSse,
4294
4411
  normalizeRequestedModel(body.model),
4412
+ usageAccountingMode,
4295
4413
  recordTokens,
4296
4414
  recordExtraction
4297
4415
  );
4298
4416
  return jsonResponse(responsesCompactionResult(text, isSse));
4299
4417
  }
4300
- async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
4418
+ async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
4301
4419
  const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
4302
4420
  metrics.recordUpstream("/responses", upstream.ok);
4303
4421
  if (!upstream.ok) {
@@ -4307,20 +4425,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
4307
4425
  const isSse = isStreamingResponse(upstream);
4308
4426
  const text = await upstream.text();
4309
4427
  const model = normalizeRequestedModel(json.model);
4310
- recordResponseTextUsage(text, isSse, model, recordTokens, recordExtraction);
4428
+ recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
4311
4429
  if (json.stream === true) {
4312
4430
  return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
4313
4431
  }
4314
4432
  return jsonResponse(responsesCompactionResponse(text, isSse, model));
4315
4433
  }
4316
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
4434
+ async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
4317
4435
  const isSse = isStreamingResponse(response);
4318
- if (bufferBody && response.body) {
4436
+ if (bufferProxyBodies && response.body) {
4319
4437
  const text = await response.text();
4320
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4321
- return responseFromText(response, text);
4438
+ recordBufferedUsage(
4439
+ text,
4440
+ isSse,
4441
+ fallbackModel,
4442
+ usageAccountingMode,
4443
+ recordTokens,
4444
+ recordExtraction
4445
+ );
4446
+ return proxyResponse(responseFromText(response, text));
4447
+ }
4448
+ return markUsage(proxyResponse(response), fallbackModel, "body");
4449
+ }
4450
+ function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4451
+ if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
4452
+ return;
4453
+ }
4454
+ const usage = extractTokenUsage(rawUsage);
4455
+ if (usage) {
4456
+ recordTokens(responseModel || fallbackModel, usage);
4457
+ }
4458
+ recordExtraction(usage !== void 0);
4459
+ }
4460
+ function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4461
+ if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
4462
+ return;
4322
4463
  }
4323
- return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
4464
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4324
4465
  }
4325
4466
  async function proxyError(upstream, logger) {
4326
4467
  const text = await upstream.text();
@@ -4376,7 +4517,24 @@ function shouldBufferProxyBodies(mode) {
4376
4517
  }
4377
4518
  return process.platform === "win32" && IS_STANDALONE_BINARY;
4378
4519
  }
4520
+ function resolveUsageAccountingMode(options) {
4521
+ const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
4522
+ return parseUsageAccountingMode(value);
4523
+ }
4524
+ function resolveAccessLog(options) {
4525
+ return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
4526
+ }
4527
+ function shouldExtractUsage(mode, cost) {
4528
+ if (mode === "off") {
4529
+ return false;
4530
+ }
4531
+ if (mode === "basic") {
4532
+ return cost === "parsed";
4533
+ }
4534
+ return true;
4535
+ }
4379
4536
  function finishResponse(response, options) {
4537
+ const usageObservation = options.usageObservation;
4380
4538
  const withRequestId = responseWithRequestId(
4381
4539
  response,
4382
4540
  options.requestId,
@@ -4385,11 +4543,36 @@ function finishResponse(response, options) {
4385
4543
  );
4386
4544
  const stream = isStreamingResponse(withRequestId);
4387
4545
  const status = withRequestId.status;
4546
+ let completed = false;
4388
4547
  const complete = () => {
4548
+ if (completed) {
4549
+ return;
4550
+ }
4551
+ completed = true;
4389
4552
  const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
4390
4553
  options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
4391
- logRequestCompleted(options.logger, status, stream, durationMs);
4554
+ logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
4392
4555
  };
4556
+ if (withRequestId.body && usageObservation) {
4557
+ const shouldTrackCompletion = stream && options.trackStreamingBody;
4558
+ const observedBody = streamWithUsageObservation(
4559
+ withRequestId.body,
4560
+ stream,
4561
+ usageObservation.fallbackModel,
4562
+ usageObservation.recordTokens,
4563
+ options.signal,
4564
+ usageObservation.recordExtraction,
4565
+ shouldTrackCompletion ? complete : void 0
4566
+ );
4567
+ if (!shouldTrackCompletion) {
4568
+ complete();
4569
+ }
4570
+ return new Response(observedBody, {
4571
+ headers: withRequestId.headers,
4572
+ status,
4573
+ statusText: withRequestId.statusText
4574
+ });
4575
+ }
4393
4576
  if (stream && withRequestId.body && options.trackStreamingBody) {
4394
4577
  return new Response(trackStreamCompletion(withRequestId.body, complete), {
4395
4578
  headers: withRequestId.headers,
@@ -4459,7 +4642,7 @@ function trackStreamCompletion(body, onComplete) {
4459
4642
  }
4460
4643
  });
4461
4644
  }
4462
- function logRequestCompleted(logger, status, stream, durationMs) {
4645
+ function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
4463
4646
  const fields = {
4464
4647
  durationMs,
4465
4648
  event: "http.request.completed",
@@ -4474,6 +4657,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
4474
4657
  logger.warn(fields, "request completed with client error");
4475
4658
  return;
4476
4659
  }
4660
+ if (!accessLog) {
4661
+ return;
4662
+ }
4477
4663
  logger.info(fields, "request completed");
4478
4664
  }
4479
4665
  function requestIdFor(request) {
@@ -4518,11 +4704,17 @@ var API_ROUTES = [
4518
4704
  { method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
4519
4705
  { method: "POST", path: "/v1/responses", name: "responses" }
4520
4706
  ];
4707
+ var ROUTE_NAMES = new Map(
4708
+ API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
4709
+ );
4521
4710
  function routeFor(method, path) {
4522
4711
  if (method === "OPTIONS") {
4523
4712
  return "cors.preflight";
4524
4713
  }
4525
- return API_ROUTES.find((entry) => entry.method === method && entry.path === path)?.name ?? "not_found";
4714
+ return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
4715
+ }
4716
+ function routeKey(method, path) {
4717
+ return `${method} ${path}`;
4526
4718
  }
4527
4719
  function isStreamingResponse(response) {
4528
4720
  return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
@@ -4580,12 +4772,24 @@ async function handleUsage(metrics, readUsage, request) {
4580
4772
  function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
4581
4773
  const usagePath = "/copilot_internal/user";
4582
4774
  let cache;
4583
- return async (signal) => {
4775
+ let inFlight;
4776
+ return async () => {
4584
4777
  if (cache && now() - cache.atMs < ttlMs) {
4585
4778
  return cache.result;
4586
4779
  }
4780
+ if (inFlight) {
4781
+ return inFlight;
4782
+ }
4783
+ inFlight = readFreshUsage();
4784
+ try {
4785
+ return await inFlight;
4786
+ } finally {
4787
+ inFlight = void 0;
4788
+ }
4789
+ };
4790
+ async function readFreshUsage() {
4587
4791
  try {
4588
- const upstream = await client.usage(signal);
4792
+ const upstream = await client.usage();
4589
4793
  metrics.recordUpstream(usagePath, upstream.ok);
4590
4794
  metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
4591
4795
  if (!upstream.ok) {
@@ -4607,7 +4811,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
4607
4811
  cache = { atMs: now(), result };
4608
4812
  return result;
4609
4813
  }
4610
- };
4814
+ }
4611
4815
  }
4612
4816
 
4613
4817
  // src/update.ts
@@ -5279,6 +5483,14 @@ function parseArgs(argv) {
5279
5483
  args.allowUnauthenticated = true;
5280
5484
  continue;
5281
5485
  }
5486
+ if (arg === "--access-log") {
5487
+ args.accessLog = true;
5488
+ continue;
5489
+ }
5490
+ if (arg === "--no-access-log") {
5491
+ args.accessLog = false;
5492
+ continue;
5493
+ }
5282
5494
  if (arg === "--no-update-check") {
5283
5495
  args.noUpdateCheck = true;
5284
5496
  continue;
@@ -5313,6 +5525,9 @@ function parseArgs(argv) {
5313
5525
  case "--stream-mode":
5314
5526
  args.streamingProxyMode = parseStreamingProxyMode(optionValue(name, inlineValue, rest));
5315
5527
  break;
5528
+ case "--usage-accounting":
5529
+ args.usageAccountingMode = parseUsageAccountingMode(optionValue(name, inlineValue, rest));
5530
+ break;
5316
5531
  case "--host":
5317
5532
  args.host = optionValue(name, inlineValue, rest);
5318
5533
  break;
@@ -5617,6 +5832,9 @@ Options:
5617
5832
  --log-level <level> trace, debug, info, warn, error, fatal, or silent
5618
5833
  --log-format <format> json or pretty. Default: pretty
5619
5834
  --stream-mode <mode> auto, live, or buffer. Auto buffers Windows standalone streams.
5835
+ --usage-accounting <mode> basic, full, or off. Default: basic
5836
+ --access-log Log successful requests
5837
+ --no-access-log Do not log successful requests. Default
5620
5838
  --no-update-check Do not check GitHub for a newer release
5621
5839
  --allow-unauthenticated Allow non-loopback bind without --api-key
5622
5840
  -h, --help Show help
@@ -5630,6 +5848,8 @@ Environment:
5630
5848
  HOOPILOT_LOG_FORMAT json or pretty. Default: pretty
5631
5849
  HOOPILOT_LOG_LEVEL trace, debug, info, warn, error, fatal, or silent
5632
5850
  HOOPILOT_STREAM_MODE auto, live, or buffer
5851
+ HOOPILOT_USAGE_ACCOUNTING basic, full, or off
5852
+ HOOPILOT_ACCESS_LOG 1/0, true/false, yes/no, or on/off
5633
5853
  COPILOT_API_BASE_URL
5634
5854
  HOOPILOT_GITHUB_API_BASE_URL GitHub REST base for the usage/quota lookup. Default: https://api.github.com
5635
5855
  HOOPILOT_ALLOW_UNSAFE_UPSTREAM Set to 1 to allow nonstandard HTTPS token hosts