@openhoo/hoopilot 2.1.8 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -9,14 +9,16 @@ import {
9
9
  isTrustedTokenBaseUrl,
10
10
  main,
11
11
  modelIdsFromResponse,
12
+ parseBooleanEnv,
12
13
  parseJsonObject,
13
14
  parseStreamingProxyMode,
15
+ parseUsageAccountingMode,
14
16
  randomId,
15
17
  removeUndefined,
16
18
  safeJsonParse,
17
19
  trimTrailingSlash,
18
20
  truncatedResponseText
19
- } from "./chunk-2GLKVNAA.js";
21
+ } from "./chunk-FH6WSFOC.js";
20
22
 
21
23
  // src/cli.ts
22
24
  import { spawn } from "child_process";
@@ -3185,6 +3187,8 @@ function websocketUnsupportedResponse() {
3185
3187
  var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
3186
3188
  var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
3187
3189
  var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
3190
+ var PROMETHEUS_CACHE_TTL_MS = 1e3;
3191
+ var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
3188
3192
  var MAX_TRACKED_MODELS = 200;
3189
3193
  var MAX_MODEL_LABEL_LENGTH = 200;
3190
3194
  var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
@@ -3193,6 +3197,9 @@ var UNKNOWN_MODEL = "unknown";
3193
3197
  function emptyModelTotals() {
3194
3198
  return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
3195
3199
  }
3200
+ function isPrometheusCacheNeutralRoute(route) {
3201
+ return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
3202
+ }
3196
3203
  var MetricsRegistry = class {
3197
3204
  #startedAtMs;
3198
3205
  #inFlight = 0;
@@ -3204,11 +3211,16 @@ var MetricsRegistry = class {
3204
3211
  #copilotQuota;
3205
3212
  #githubRateLimit = /* @__PURE__ */ new Map();
3206
3213
  #extraction = { extracted: 0, missing: 0 };
3214
+ #generation = 0;
3215
+ #prometheusCache;
3207
3216
  constructor(options = {}) {
3208
3217
  this.#startedAtMs = (options.now ?? Date.now)();
3209
3218
  }
3210
3219
  /** Mark a request as started; pair with exactly one {@link observe}. */
3211
3220
  startRequest(route) {
3221
+ if (!isPrometheusCacheNeutralRoute(route)) {
3222
+ this.#changed();
3223
+ }
3212
3224
  this.#inFlight += 1;
3213
3225
  if (route) {
3214
3226
  this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
@@ -3216,6 +3228,9 @@ var MetricsRegistry = class {
3216
3228
  }
3217
3229
  /** Record a completed request and clear its in-flight slot. */
3218
3230
  observe(observation) {
3231
+ if (!isPrometheusCacheNeutralRoute(observation.route)) {
3232
+ this.#changed();
3233
+ }
3219
3234
  if (this.#inFlight > 0) {
3220
3235
  this.#inFlight -= 1;
3221
3236
  }
@@ -3236,6 +3251,7 @@ var MetricsRegistry = class {
3236
3251
  * rising miss rate flags clients whose token usage is going unaccounted.
3237
3252
  */
3238
3253
  recordTokenExtraction(extracted) {
3254
+ this.#changed();
3239
3255
  if (extracted) {
3240
3256
  this.#extraction.extracted += 1;
3241
3257
  } else {
@@ -3244,6 +3260,7 @@ var MetricsRegistry = class {
3244
3260
  }
3245
3261
  /** Accumulate token counts for a model from one upstream completion. */
3246
3262
  recordTokens(model, usage) {
3263
+ this.#changed();
3247
3264
  const name = this.#modelLabel(model);
3248
3265
  const totals = this.#tokens.get(name) ?? emptyModelTotals();
3249
3266
  totals.requests += 1;
@@ -3256,11 +3273,13 @@ var MetricsRegistry = class {
3256
3273
  }
3257
3274
  /** Record one upstream Copilot call and whether it succeeded. */
3258
3275
  recordUpstream(path, ok) {
3276
+ this.#changed();
3259
3277
  const key = labelKey(path, ok ? "ok" : "error");
3260
3278
  this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
3261
3279
  }
3262
3280
  /** Store the latest Copilot quota so /metrics can expose it as gauges. */
3263
3281
  recordCopilotQuota(usage) {
3282
+ this.#changed();
3264
3283
  this.#copilotQuota = usage;
3265
3284
  }
3266
3285
  /**
@@ -3272,6 +3291,7 @@ var MetricsRegistry = class {
3272
3291
  if (!rateLimit) {
3273
3292
  return;
3274
3293
  }
3294
+ this.#changed();
3275
3295
  const resource = this.#rateLimitResource(rateLimit.resource);
3276
3296
  this.#githubRateLimit.set(resource, { ...rateLimit, resource });
3277
3297
  }
@@ -3308,6 +3328,9 @@ var MetricsRegistry = class {
3308
3328
  }
3309
3329
  this.#durations.set(route, entry);
3310
3330
  }
3331
+ #changed() {
3332
+ this.#generation += 1;
3333
+ }
3311
3334
  /** A JSON-friendly view of the current counters. */
3312
3335
  snapshot(nowOrOptions = Date.now) {
3313
3336
  const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
@@ -3411,13 +3434,18 @@ var MetricsRegistry = class {
3411
3434
  }
3412
3435
  /** Render the Prometheus text exposition format (version 0.0.4). */
3413
3436
  renderPrometheus(now = Date.now) {
3437
+ const nowMs = now();
3438
+ const cached = this.#prometheusCache;
3439
+ if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
3440
+ return cached.text;
3441
+ }
3414
3442
  const lines = [];
3415
3443
  lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
3416
3444
  lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
3417
3445
  lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
3418
3446
  lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
3419
3447
  lines.push("# TYPE hoopilot_uptime_seconds gauge");
3420
- lines.push(`hoopilot_uptime_seconds ${Math.max(0, (now() - this.#startedAtMs) / 1e3)}`);
3448
+ lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
3421
3449
  lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
3422
3450
  lines.push("# TYPE hoopilot_requests_in_flight gauge");
3423
3451
  lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
@@ -3483,8 +3511,10 @@ var MetricsRegistry = class {
3483
3511
  }
3484
3512
  this.#renderGithubRateLimit(lines);
3485
3513
  this.#renderCopilotQuota(lines);
3486
- return `${lines.join("\n")}
3514
+ const text = `${lines.join("\n")}
3487
3515
  `;
3516
+ this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
3517
+ return text;
3488
3518
  }
3489
3519
  #renderGithubRateLimit(lines) {
3490
3520
  const entries = [...this.#githubRateLimit.values()];
@@ -3619,21 +3649,6 @@ var MetricsRegistry = class {
3619
3649
  }
3620
3650
  }
3621
3651
  };
3622
- function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
3623
- const body = response.body;
3624
- if (!body) {
3625
- return response;
3626
- }
3627
- const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
3628
- return new Response(
3629
- streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
3630
- {
3631
- headers: response.headers,
3632
- status: response.status,
3633
- statusText: response.statusText
3634
- }
3635
- );
3636
- }
3637
3652
  function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
3638
3653
  const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
3639
3654
  if (isSse) {
@@ -3648,9 +3663,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
3648
3663
  }
3649
3664
  accumulator.finish();
3650
3665
  }
3651
- function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
3666
+ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
3652
3667
  const reader = stream.getReader();
3653
3668
  let aborted = signal?.aborted ?? false;
3669
+ let completed = false;
3654
3670
  let released = false;
3655
3671
  const onAbort = () => {
3656
3672
  aborted = true;
@@ -3679,6 +3695,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
3679
3695
  }
3680
3696
  released = true;
3681
3697
  signal?.removeEventListener("abort", onAbort);
3698
+ if (!completed) {
3699
+ completed = true;
3700
+ onComplete?.();
3701
+ }
3682
3702
  reader.releaseLock();
3683
3703
  };
3684
3704
  const observeChunk = (chunkBytes) => {
@@ -3788,6 +3808,9 @@ function considerSseLine(line, consider) {
3788
3808
  if (!data || data === "[DONE]") {
3789
3809
  return;
3790
3810
  }
3811
+ if (!data.includes('"usage"')) {
3812
+ return;
3813
+ }
3791
3814
  const parsed = safeJsonParse(data);
3792
3815
  if (parsed !== void 0) {
3793
3816
  consider(parsed);
@@ -3917,17 +3940,28 @@ function createHoopilotHandler(options = {}) {
3917
3940
  const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
3918
3941
  const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
3919
3942
  const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
3943
+ const usageAccountingMode = resolveUsageAccountingMode(options);
3944
+ const accessLog = resolveAccessLog(options);
3945
+ const responseUsage = /* @__PURE__ */ new WeakMap();
3946
+ const markUsage = (response, fallbackModel, cost) => {
3947
+ if (shouldExtractUsage(usageAccountingMode, cost)) {
3948
+ responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
3949
+ }
3950
+ return response;
3951
+ };
3920
3952
  const requestContext = /* @__PURE__ */ new WeakMap();
3921
3953
  const app = buildApp({
3922
3954
  apiKey,
3923
3955
  allowedOrigins,
3924
3956
  bufferProxyBodies,
3925
3957
  client,
3958
+ markUsage,
3926
3959
  metrics,
3927
3960
  readUsage,
3928
3961
  recordExtraction,
3929
3962
  recordTokens,
3930
- requestContext
3963
+ requestContext,
3964
+ usageAccountingMode
3931
3965
  });
3932
3966
  return async (request) => {
3933
3967
  const startedAt = performance.now();
@@ -3963,11 +3997,14 @@ function createHoopilotHandler(options = {}) {
3963
3997
  }
3964
3998
  return finishResponse(response, {
3965
3999
  corsOrigin,
4000
+ accessLog,
3966
4001
  logger: requestLogger,
3967
4002
  method: request.method,
3968
4003
  metrics,
3969
4004
  requestId,
4005
+ signal: request.signal,
3970
4006
  route,
4007
+ usageObservation: responseUsage.get(response),
3971
4008
  startedAt,
3972
4009
  closeConnection: bufferProxyBodies,
3973
4010
  trackStreamingBody: !bufferProxyBodies
@@ -3980,11 +4017,13 @@ function buildApp(deps) {
3980
4017
  allowedOrigins,
3981
4018
  bufferProxyBodies,
3982
4019
  client,
4020
+ markUsage,
3983
4021
  metrics,
3984
4022
  readUsage,
3985
4023
  recordExtraction,
3986
4024
  recordTokens,
3987
- requestContext
4025
+ requestContext,
4026
+ usageAccountingMode
3988
4027
  } = deps;
3989
4028
  const contextFor = (request) => {
3990
4029
  const stored = requestContext.get(request);
@@ -4072,11 +4111,13 @@ function buildApp(deps) {
4072
4111
  ({ request }) => handleAnthropicMessages(
4073
4112
  client,
4074
4113
  metrics,
4114
+ markUsage,
4075
4115
  recordTokens,
4076
4116
  recordExtraction,
4077
4117
  request,
4078
4118
  loggerFor(request),
4079
- bufferProxyBodies
4119
+ bufferProxyBodies,
4120
+ usageAccountingMode
4080
4121
  ),
4081
4122
  noBody
4082
4123
  ).post(
@@ -4088,11 +4129,13 @@ function buildApp(deps) {
4088
4129
  ({ request }) => handleChatCompletions(
4089
4130
  client,
4090
4131
  metrics,
4132
+ markUsage,
4091
4133
  recordTokens,
4092
4134
  recordExtraction,
4093
4135
  request,
4094
4136
  loggerFor(request),
4095
- bufferProxyBodies
4137
+ bufferProxyBodies,
4138
+ usageAccountingMode
4096
4139
  ),
4097
4140
  noBody
4098
4141
  ).post(
@@ -4100,11 +4143,13 @@ function buildApp(deps) {
4100
4143
  ({ request }) => handleCompletions(
4101
4144
  client,
4102
4145
  metrics,
4146
+ markUsage,
4103
4147
  recordTokens,
4104
4148
  recordExtraction,
4105
4149
  request,
4106
4150
  loggerFor(request),
4107
- bufferProxyBodies
4151
+ bufferProxyBodies,
4152
+ usageAccountingMode
4108
4153
  ),
4109
4154
  noBody
4110
4155
  ).post(
@@ -4115,7 +4160,8 @@ function buildApp(deps) {
4115
4160
  recordTokens,
4116
4161
  recordExtraction,
4117
4162
  request,
4118
- loggerFor(request)
4163
+ loggerFor(request),
4164
+ usageAccountingMode
4119
4165
  ),
4120
4166
  noBody
4121
4167
  ).post(
@@ -4123,11 +4169,13 @@ function buildApp(deps) {
4123
4169
  ({ request }) => handleResponses(
4124
4170
  client,
4125
4171
  metrics,
4172
+ markUsage,
4126
4173
  recordTokens,
4127
4174
  recordExtraction,
4128
4175
  request,
4129
4176
  loggerFor(request),
4130
- bufferProxyBodies
4177
+ bufferProxyBodies,
4178
+ usageAccountingMode
4131
4179
  ),
4132
4180
  noBody
4133
4181
  );
@@ -4180,7 +4228,7 @@ function startHoopilotServer(options = {}) {
4180
4228
  url: `http://${urlHost(host)}:${server.port}`
4181
4229
  };
4182
4230
  }
4183
- async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4231
+ async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4184
4232
  const anthropicRequest = await readJson(request);
4185
4233
  const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
4186
4234
  const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
@@ -4193,36 +4241,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
4193
4241
  if (isStreamingResponse(upstream) && upstream.body) {
4194
4242
  if (bufferProxyBodies) {
4195
4243
  const text = await upstream.text();
4196
- recordResponseTextUsage(text, true, model, recordTokens, recordExtraction);
4244
+ recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
4197
4245
  return proxyResponse(
4198
4246
  responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
4199
4247
  );
4200
4248
  }
4201
- const observed = observeResponseUsage(
4202
- upstream,
4249
+ return markUsage(
4250
+ proxyResponse(
4251
+ new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
4252
+ headers: upstream.headers,
4253
+ status: upstream.status,
4254
+ statusText: upstream.statusText
4255
+ })
4256
+ ),
4203
4257
  model,
4204
- recordTokens,
4205
- request.signal,
4206
- recordExtraction
4207
- );
4208
- if (!observed.body) {
4209
- return proxyResponse(observed);
4210
- }
4211
- return proxyResponse(
4212
- new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
4213
- headers: observed.headers,
4214
- status: observed.status,
4215
- statusText: observed.statusText
4216
- })
4258
+ "body"
4217
4259
  );
4218
4260
  }
4219
4261
  const body = asRecord(await upstream.json());
4220
- const usage = extractTokenUsage(body.usage);
4221
- if (usage) {
4222
- const responseModel = typeof body.model === "string" ? body.model.trim() : "";
4223
- recordTokens(responseModel || model, usage);
4224
- }
4225
- recordExtraction(usage !== void 0);
4262
+ recordParsedUsage(
4263
+ body.usage,
4264
+ typeof body.model === "string" ? body.model.trim() : model,
4265
+ model,
4266
+ usageAccountingMode,
4267
+ recordTokens,
4268
+ recordExtraction
4269
+ );
4226
4270
  return jsonResponse(responsesResponseToAnthropicMessage(body, model));
4227
4271
  }
4228
4272
  async function handleAnthropicCountTokens(request) {
@@ -4249,7 +4293,7 @@ async function handleModels(client, metrics, signal, logger) {
4249
4293
  logUpstreamSuccess(logger, "/models", upstream.status);
4250
4294
  return jsonResponse(normalizeModelsResponse(await upstream.json()));
4251
4295
  }
4252
- async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4296
+ async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4253
4297
  const chatRequest = normalizeChatCompletionRequest(await readJson(request));
4254
4298
  const upstream = await client.chatCompletions(chatRequest, request.signal);
4255
4299
  metrics.recordUpstream("/chat/completions", upstream.ok);
@@ -4258,18 +4302,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
4258
4302
  }
4259
4303
  logUpstreamSuccess(logger, "/chat/completions", upstream.status);
4260
4304
  const model = normalizeRequestedModel(chatRequest.model);
4261
- return proxyResponse(
4262
- await responseWithObservedUsage(
4263
- upstream,
4264
- model,
4265
- recordTokens,
4266
- request.signal,
4267
- bufferProxyBodies,
4268
- recordExtraction
4269
- )
4305
+ return proxiedResponseWithOptionalUsage(
4306
+ upstream,
4307
+ model,
4308
+ markUsage,
4309
+ usageAccountingMode,
4310
+ recordTokens,
4311
+ recordExtraction,
4312
+ bufferProxyBodies
4270
4313
  );
4271
4314
  }
4272
- async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4315
+ async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4273
4316
  const body = await readJson(request);
4274
4317
  const upstream = await client.chatCompletions(
4275
4318
  completionsRequestToChatCompletion(body),
@@ -4284,34 +4327,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
4284
4327
  if (isStreamingResponse(upstream) && upstream.body) {
4285
4328
  if (bufferProxyBodies) {
4286
4329
  const upstreamText = await upstream.text();
4287
- recordResponseTextUsage(upstreamText, true, model, recordTokens, recordExtraction);
4330
+ recordBufferedUsage(
4331
+ upstreamText,
4332
+ true,
4333
+ model,
4334
+ usageAccountingMode,
4335
+ recordTokens,
4336
+ recordExtraction
4337
+ );
4288
4338
  const text = completionSseTextFromChatSseText(upstreamText);
4289
4339
  return proxyResponse(responseFromText(upstream, text));
4290
4340
  }
4291
- return proxyResponse(
4292
- observeResponseUsage(
4341
+ return markUsage(
4342
+ proxyResponse(
4293
4343
  new Response(completionStreamFromChatStream(upstream.body), {
4294
4344
  headers: upstream.headers,
4295
4345
  status: upstream.status,
4296
4346
  statusText: upstream.statusText
4297
- }),
4298
- model,
4299
- recordTokens,
4300
- request.signal,
4301
- recordExtraction
4302
- )
4347
+ })
4348
+ ),
4349
+ model,
4350
+ "body"
4303
4351
  );
4304
4352
  }
4305
4353
  const completion = asRecord(await upstream.json());
4306
- const usage = extractTokenUsage(completion.usage);
4307
- if (usage) {
4308
- const responseModel = typeof completion.model === "string" ? completion.model.trim() : "";
4309
- recordTokens(responseModel || model, usage);
4310
- }
4311
- recordExtraction(usage !== void 0);
4354
+ recordParsedUsage(
4355
+ completion.usage,
4356
+ typeof completion.model === "string" ? completion.model.trim() : model,
4357
+ model,
4358
+ usageAccountingMode,
4359
+ recordTokens,
4360
+ recordExtraction
4361
+ );
4312
4362
  return jsonResponse(chatCompletionToCompletion(completion));
4313
4363
  }
4314
- async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
4364
+ async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
4315
4365
  const { json, text: body } = await readJsonText(request);
4316
4366
  if (isResponsesCompactionRequest(json)) {
4317
4367
  return handleResponsesCompactionV2(
@@ -4321,7 +4371,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4321
4371
  recordExtraction,
4322
4372
  json,
4323
4373
  request,
4324
- logger
4374
+ logger,
4375
+ usageAccountingMode
4325
4376
  );
4326
4377
  }
4327
4378
  const upstream = await client.responses(
@@ -4334,18 +4385,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
4334
4385
  }
4335
4386
  logUpstreamSuccess(logger, "/responses", upstream.status);
4336
4387
  const model = normalizeRequestedModel(json.model);
4337
- return proxyResponse(
4338
- await responseWithObservedUsage(
4339
- upstream,
4340
- model,
4341
- recordTokens,
4342
- request.signal,
4343
- bufferProxyBodies,
4344
- recordExtraction
4345
- )
4388
+ return proxiedResponseWithOptionalUsage(
4389
+ upstream,
4390
+ model,
4391
+ markUsage,
4392
+ usageAccountingMode,
4393
+ recordTokens,
4394
+ recordExtraction,
4395
+ bufferProxyBodies
4346
4396
  );
4347
4397
  }
4348
- async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
4398
+ async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
4349
4399
  const body = await readJson(request);
4350
4400
  const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
4351
4401
  metrics.recordUpstream("/responses", upstream.ok);
@@ -4355,16 +4405,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
4355
4405
  logUpstreamSuccess(logger, "/responses", upstream.status);
4356
4406
  const isSse = isStreamingResponse(upstream);
4357
4407
  const text = await upstream.text();
4358
- recordResponseTextUsage(
4408
+ recordBufferedUsage(
4359
4409
  text,
4360
4410
  isSse,
4361
4411
  normalizeRequestedModel(body.model),
4412
+ usageAccountingMode,
4362
4413
  recordTokens,
4363
4414
  recordExtraction
4364
4415
  );
4365
4416
  return jsonResponse(responsesCompactionResult(text, isSse));
4366
4417
  }
4367
- async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
4418
+ async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
4368
4419
  const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
4369
4420
  metrics.recordUpstream("/responses", upstream.ok);
4370
4421
  if (!upstream.ok) {
@@ -4374,20 +4425,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
4374
4425
  const isSse = isStreamingResponse(upstream);
4375
4426
  const text = await upstream.text();
4376
4427
  const model = normalizeRequestedModel(json.model);
4377
- recordResponseTextUsage(text, isSse, model, recordTokens, recordExtraction);
4428
+ recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
4378
4429
  if (json.stream === true) {
4379
4430
  return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
4380
4431
  }
4381
4432
  return jsonResponse(responsesCompactionResponse(text, isSse, model));
4382
4433
  }
4383
- async function responseWithObservedUsage(response, fallbackModel, recordTokens, signal, bufferBody, recordExtraction) {
4434
+ async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
4384
4435
  const isSse = isStreamingResponse(response);
4385
- if (bufferBody && response.body) {
4436
+ if (bufferProxyBodies && response.body) {
4386
4437
  const text = await response.text();
4387
- recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4388
- return responseFromText(response, text);
4438
+ recordBufferedUsage(
4439
+ text,
4440
+ isSse,
4441
+ fallbackModel,
4442
+ usageAccountingMode,
4443
+ recordTokens,
4444
+ recordExtraction
4445
+ );
4446
+ return proxyResponse(responseFromText(response, text));
4447
+ }
4448
+ return markUsage(proxyResponse(response), fallbackModel, "body");
4449
+ }
4450
+ function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4451
+ if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
4452
+ return;
4453
+ }
4454
+ const usage = extractTokenUsage(rawUsage);
4455
+ if (usage) {
4456
+ recordTokens(responseModel || fallbackModel, usage);
4389
4457
  }
4390
- return observeResponseUsage(response, fallbackModel, recordTokens, signal, recordExtraction);
4458
+ recordExtraction(usage !== void 0);
4459
+ }
4460
+ function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
4461
+ if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
4462
+ return;
4463
+ }
4464
+ recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
4391
4465
  }
4392
4466
  async function proxyError(upstream, logger) {
4393
4467
  const text = await upstream.text();
@@ -4443,7 +4517,24 @@ function shouldBufferProxyBodies(mode) {
4443
4517
  }
4444
4518
  return process.platform === "win32" && IS_STANDALONE_BINARY;
4445
4519
  }
4520
+ function resolveUsageAccountingMode(options) {
4521
+ const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
4522
+ return parseUsageAccountingMode(value);
4523
+ }
4524
+ function resolveAccessLog(options) {
4525
+ return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
4526
+ }
4527
+ function shouldExtractUsage(mode, cost) {
4528
+ if (mode === "off") {
4529
+ return false;
4530
+ }
4531
+ if (mode === "basic") {
4532
+ return cost === "parsed";
4533
+ }
4534
+ return true;
4535
+ }
4446
4536
  function finishResponse(response, options) {
4537
+ const usageObservation = options.usageObservation;
4447
4538
  const withRequestId = responseWithRequestId(
4448
4539
  response,
4449
4540
  options.requestId,
@@ -4452,11 +4543,36 @@ function finishResponse(response, options) {
4452
4543
  );
4453
4544
  const stream = isStreamingResponse(withRequestId);
4454
4545
  const status = withRequestId.status;
4546
+ let completed = false;
4455
4547
  const complete = () => {
4548
+ if (completed) {
4549
+ return;
4550
+ }
4551
+ completed = true;
4456
4552
  const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
4457
4553
  options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
4458
- logRequestCompleted(options.logger, status, stream, durationMs);
4554
+ logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
4459
4555
  };
4556
+ if (withRequestId.body && usageObservation) {
4557
+ const shouldTrackCompletion = stream && options.trackStreamingBody;
4558
+ const observedBody = streamWithUsageObservation(
4559
+ withRequestId.body,
4560
+ stream,
4561
+ usageObservation.fallbackModel,
4562
+ usageObservation.recordTokens,
4563
+ options.signal,
4564
+ usageObservation.recordExtraction,
4565
+ shouldTrackCompletion ? complete : void 0
4566
+ );
4567
+ if (!shouldTrackCompletion) {
4568
+ complete();
4569
+ }
4570
+ return new Response(observedBody, {
4571
+ headers: withRequestId.headers,
4572
+ status,
4573
+ statusText: withRequestId.statusText
4574
+ });
4575
+ }
4460
4576
  if (stream && withRequestId.body && options.trackStreamingBody) {
4461
4577
  return new Response(trackStreamCompletion(withRequestId.body, complete), {
4462
4578
  headers: withRequestId.headers,
@@ -4526,7 +4642,7 @@ function trackStreamCompletion(body, onComplete) {
4526
4642
  }
4527
4643
  });
4528
4644
  }
4529
- function logRequestCompleted(logger, status, stream, durationMs) {
4645
+ function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
4530
4646
  const fields = {
4531
4647
  durationMs,
4532
4648
  event: "http.request.completed",
@@ -4541,6 +4657,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
4541
4657
  logger.warn(fields, "request completed with client error");
4542
4658
  return;
4543
4659
  }
4660
+ if (!accessLog) {
4661
+ return;
4662
+ }
4544
4663
  logger.info(fields, "request completed");
4545
4664
  }
4546
4665
  function requestIdFor(request) {
@@ -4585,11 +4704,17 @@ var API_ROUTES = [
4585
4704
  { method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
4586
4705
  { method: "POST", path: "/v1/responses", name: "responses" }
4587
4706
  ];
4707
+ var ROUTE_NAMES = new Map(
4708
+ API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
4709
+ );
4588
4710
  function routeFor(method, path) {
4589
4711
  if (method === "OPTIONS") {
4590
4712
  return "cors.preflight";
4591
4713
  }
4592
- return API_ROUTES.find((entry) => entry.method === method && entry.path === path)?.name ?? "not_found";
4714
+ return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
4715
+ }
4716
+ function routeKey(method, path) {
4717
+ return `${method} ${path}`;
4593
4718
  }
4594
4719
  function isStreamingResponse(response) {
4595
4720
  return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
@@ -4647,12 +4772,24 @@ async function handleUsage(metrics, readUsage, request) {
4647
4772
  function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
4648
4773
  const usagePath = "/copilot_internal/user";
4649
4774
  let cache;
4650
- return async (signal) => {
4775
+ let inFlight;
4776
+ return async () => {
4651
4777
  if (cache && now() - cache.atMs < ttlMs) {
4652
4778
  return cache.result;
4653
4779
  }
4780
+ if (inFlight) {
4781
+ return inFlight;
4782
+ }
4783
+ inFlight = readFreshUsage();
4784
+ try {
4785
+ return await inFlight;
4786
+ } finally {
4787
+ inFlight = void 0;
4788
+ }
4789
+ };
4790
+ async function readFreshUsage() {
4654
4791
  try {
4655
- const upstream = await client.usage(signal);
4792
+ const upstream = await client.usage();
4656
4793
  metrics.recordUpstream(usagePath, upstream.ok);
4657
4794
  metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
4658
4795
  if (!upstream.ok) {
@@ -4674,7 +4811,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
4674
4811
  cache = { atMs: now(), result };
4675
4812
  return result;
4676
4813
  }
4677
- };
4814
+ }
4678
4815
  }
4679
4816
 
4680
4817
  // src/update.ts
@@ -5346,6 +5483,14 @@ function parseArgs(argv) {
5346
5483
  args.allowUnauthenticated = true;
5347
5484
  continue;
5348
5485
  }
5486
+ if (arg === "--access-log") {
5487
+ args.accessLog = true;
5488
+ continue;
5489
+ }
5490
+ if (arg === "--no-access-log") {
5491
+ args.accessLog = false;
5492
+ continue;
5493
+ }
5349
5494
  if (arg === "--no-update-check") {
5350
5495
  args.noUpdateCheck = true;
5351
5496
  continue;
@@ -5380,6 +5525,9 @@ function parseArgs(argv) {
5380
5525
  case "--stream-mode":
5381
5526
  args.streamingProxyMode = parseStreamingProxyMode(optionValue(name, inlineValue, rest));
5382
5527
  break;
5528
+ case "--usage-accounting":
5529
+ args.usageAccountingMode = parseUsageAccountingMode(optionValue(name, inlineValue, rest));
5530
+ break;
5383
5531
  case "--host":
5384
5532
  args.host = optionValue(name, inlineValue, rest);
5385
5533
  break;
@@ -5684,6 +5832,9 @@ Options:
5684
5832
  --log-level <level> trace, debug, info, warn, error, fatal, or silent
5685
5833
  --log-format <format> json or pretty. Default: pretty
5686
5834
  --stream-mode <mode> auto, live, or buffer. Auto buffers Windows standalone streams.
5835
+ --usage-accounting <mode> basic, full, or off. Default: basic
5836
+ --access-log Log successful requests
5837
+ --no-access-log Do not log successful requests. Default
5687
5838
  --no-update-check Do not check GitHub for a newer release
5688
5839
  --allow-unauthenticated Allow non-loopback bind without --api-key
5689
5840
  -h, --help Show help
@@ -5697,6 +5848,8 @@ Environment:
5697
5848
  HOOPILOT_LOG_FORMAT json or pretty. Default: pretty
5698
5849
  HOOPILOT_LOG_LEVEL trace, debug, info, warn, error, fatal, or silent
5699
5850
  HOOPILOT_STREAM_MODE auto, live, or buffer
5851
+ HOOPILOT_USAGE_ACCOUNTING basic, full, or off
5852
+ HOOPILOT_ACCESS_LOG 1/0, true/false, yes/no, or on/off
5700
5853
  COPILOT_API_BASE_URL
5701
5854
  HOOPILOT_GITHUB_API_BASE_URL GitHub REST base for the usage/quota lookup. Default: https://api.github.com
5702
5855
  HOOPILOT_ALLOW_UNSAFE_UPSTREAM Set to 1 to allow nonstandard HTTPS token hosts