@openhoo/hoopilot 2.1.8 → 2.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -3
- package/dist/{chunk-2GLKVNAA.js → chunk-FH6WSFOC.js} +29 -1
- package/dist/chunk-FH6WSFOC.js.map +1 -0
- package/dist/cli.js +254 -101
- package/dist/cli.js.map +1 -1
- package/dist/codexx.js +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.js +261 -100
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-2GLKVNAA.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -9,14 +9,16 @@ import {
|
|
|
9
9
|
isTrustedTokenBaseUrl,
|
|
10
10
|
main,
|
|
11
11
|
modelIdsFromResponse,
|
|
12
|
+
parseBooleanEnv,
|
|
12
13
|
parseJsonObject,
|
|
13
14
|
parseStreamingProxyMode,
|
|
15
|
+
parseUsageAccountingMode,
|
|
14
16
|
randomId,
|
|
15
17
|
removeUndefined,
|
|
16
18
|
safeJsonParse,
|
|
17
19
|
trimTrailingSlash,
|
|
18
20
|
truncatedResponseText
|
|
19
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-FH6WSFOC.js";
|
|
20
22
|
|
|
21
23
|
// src/cli.ts
|
|
22
24
|
import { spawn } from "child_process";
|
|
@@ -3185,6 +3187,8 @@ function websocketUnsupportedResponse() {
|
|
|
3185
3187
|
var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
|
|
3186
3188
|
var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
3187
3189
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
3190
|
+
var PROMETHEUS_CACHE_TTL_MS = 1e3;
|
|
3191
|
+
var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
|
|
3188
3192
|
var MAX_TRACKED_MODELS = 200;
|
|
3189
3193
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
3190
3194
|
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
@@ -3193,6 +3197,9 @@ var UNKNOWN_MODEL = "unknown";
|
|
|
3193
3197
|
function emptyModelTotals() {
|
|
3194
3198
|
return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
|
|
3195
3199
|
}
|
|
3200
|
+
function isPrometheusCacheNeutralRoute(route) {
|
|
3201
|
+
return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
|
|
3202
|
+
}
|
|
3196
3203
|
var MetricsRegistry = class {
|
|
3197
3204
|
#startedAtMs;
|
|
3198
3205
|
#inFlight = 0;
|
|
@@ -3204,11 +3211,16 @@ var MetricsRegistry = class {
|
|
|
3204
3211
|
#copilotQuota;
|
|
3205
3212
|
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
3206
3213
|
#extraction = { extracted: 0, missing: 0 };
|
|
3214
|
+
#generation = 0;
|
|
3215
|
+
#prometheusCache;
|
|
3207
3216
|
constructor(options = {}) {
|
|
3208
3217
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
3209
3218
|
}
|
|
3210
3219
|
/** Mark a request as started; pair with exactly one {@link observe}. */
|
|
3211
3220
|
startRequest(route) {
|
|
3221
|
+
if (!isPrometheusCacheNeutralRoute(route)) {
|
|
3222
|
+
this.#changed();
|
|
3223
|
+
}
|
|
3212
3224
|
this.#inFlight += 1;
|
|
3213
3225
|
if (route) {
|
|
3214
3226
|
this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
|
|
@@ -3216,6 +3228,9 @@ var MetricsRegistry = class {
|
|
|
3216
3228
|
}
|
|
3217
3229
|
/** Record a completed request and clear its in-flight slot. */
|
|
3218
3230
|
observe(observation) {
|
|
3231
|
+
if (!isPrometheusCacheNeutralRoute(observation.route)) {
|
|
3232
|
+
this.#changed();
|
|
3233
|
+
}
|
|
3219
3234
|
if (this.#inFlight > 0) {
|
|
3220
3235
|
this.#inFlight -= 1;
|
|
3221
3236
|
}
|
|
@@ -3236,6 +3251,7 @@ var MetricsRegistry = class {
|
|
|
3236
3251
|
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
3237
3252
|
*/
|
|
3238
3253
|
recordTokenExtraction(extracted) {
|
|
3254
|
+
this.#changed();
|
|
3239
3255
|
if (extracted) {
|
|
3240
3256
|
this.#extraction.extracted += 1;
|
|
3241
3257
|
} else {
|
|
@@ -3244,6 +3260,7 @@ var MetricsRegistry = class {
|
|
|
3244
3260
|
}
|
|
3245
3261
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
3246
3262
|
recordTokens(model, usage) {
|
|
3263
|
+
this.#changed();
|
|
3247
3264
|
const name = this.#modelLabel(model);
|
|
3248
3265
|
const totals = this.#tokens.get(name) ?? emptyModelTotals();
|
|
3249
3266
|
totals.requests += 1;
|
|
@@ -3256,11 +3273,13 @@ var MetricsRegistry = class {
|
|
|
3256
3273
|
}
|
|
3257
3274
|
/** Record one upstream Copilot call and whether it succeeded. */
|
|
3258
3275
|
recordUpstream(path, ok) {
|
|
3276
|
+
this.#changed();
|
|
3259
3277
|
const key = labelKey(path, ok ? "ok" : "error");
|
|
3260
3278
|
this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
|
|
3261
3279
|
}
|
|
3262
3280
|
/** Store the latest Copilot quota so /metrics can expose it as gauges. */
|
|
3263
3281
|
recordCopilotQuota(usage) {
|
|
3282
|
+
this.#changed();
|
|
3264
3283
|
this.#copilotQuota = usage;
|
|
3265
3284
|
}
|
|
3266
3285
|
/**
|
|
@@ -3272,6 +3291,7 @@ var MetricsRegistry = class {
|
|
|
3272
3291
|
if (!rateLimit) {
|
|
3273
3292
|
return;
|
|
3274
3293
|
}
|
|
3294
|
+
this.#changed();
|
|
3275
3295
|
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
3276
3296
|
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
3277
3297
|
}
|
|
@@ -3308,6 +3328,9 @@ var MetricsRegistry = class {
|
|
|
3308
3328
|
}
|
|
3309
3329
|
this.#durations.set(route, entry);
|
|
3310
3330
|
}
|
|
3331
|
+
#changed() {
|
|
3332
|
+
this.#generation += 1;
|
|
3333
|
+
}
|
|
3311
3334
|
/** A JSON-friendly view of the current counters. */
|
|
3312
3335
|
snapshot(nowOrOptions = Date.now) {
|
|
3313
3336
|
const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
|
|
@@ -3411,13 +3434,18 @@ var MetricsRegistry = class {
|
|
|
3411
3434
|
}
|
|
3412
3435
|
/** Render the Prometheus text exposition format (version 0.0.4). */
|
|
3413
3436
|
renderPrometheus(now = Date.now) {
|
|
3437
|
+
const nowMs = now();
|
|
3438
|
+
const cached = this.#prometheusCache;
|
|
3439
|
+
if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
|
|
3440
|
+
return cached.text;
|
|
3441
|
+
}
|
|
3414
3442
|
const lines = [];
|
|
3415
3443
|
lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
|
|
3416
3444
|
lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
|
|
3417
3445
|
lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
|
|
3418
3446
|
lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
|
|
3419
3447
|
lines.push("# TYPE hoopilot_uptime_seconds gauge");
|
|
3420
|
-
lines.push(`hoopilot_uptime_seconds ${Math.max(0, (
|
|
3448
|
+
lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
|
|
3421
3449
|
lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
|
|
3422
3450
|
lines.push("# TYPE hoopilot_requests_in_flight gauge");
|
|
3423
3451
|
lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
|
|
@@ -3483,8 +3511,10 @@ var MetricsRegistry = class {
|
|
|
3483
3511
|
}
|
|
3484
3512
|
this.#renderGithubRateLimit(lines);
|
|
3485
3513
|
this.#renderCopilotQuota(lines);
|
|
3486
|
-
|
|
3514
|
+
const text = `${lines.join("\n")}
|
|
3487
3515
|
`;
|
|
3516
|
+
this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
|
|
3517
|
+
return text;
|
|
3488
3518
|
}
|
|
3489
3519
|
#renderGithubRateLimit(lines) {
|
|
3490
3520
|
const entries = [...this.#githubRateLimit.values()];
|
|
@@ -3619,21 +3649,6 @@ var MetricsRegistry = class {
|
|
|
3619
3649
|
}
|
|
3620
3650
|
}
|
|
3621
3651
|
};
|
|
3622
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
3623
|
-
const body = response.body;
|
|
3624
|
-
if (!body) {
|
|
3625
|
-
return response;
|
|
3626
|
-
}
|
|
3627
|
-
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
3628
|
-
return new Response(
|
|
3629
|
-
streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
|
|
3630
|
-
{
|
|
3631
|
-
headers: response.headers,
|
|
3632
|
-
status: response.status,
|
|
3633
|
-
statusText: response.statusText
|
|
3634
|
-
}
|
|
3635
|
-
);
|
|
3636
|
-
}
|
|
3637
3652
|
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
3638
3653
|
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
3639
3654
|
if (isSse) {
|
|
@@ -3648,9 +3663,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
|
|
|
3648
3663
|
}
|
|
3649
3664
|
accumulator.finish();
|
|
3650
3665
|
}
|
|
3651
|
-
function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
3666
|
+
function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
|
|
3652
3667
|
const reader = stream.getReader();
|
|
3653
3668
|
let aborted = signal?.aborted ?? false;
|
|
3669
|
+
let completed = false;
|
|
3654
3670
|
let released = false;
|
|
3655
3671
|
const onAbort = () => {
|
|
3656
3672
|
aborted = true;
|
|
@@ -3679,6 +3695,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
|
|
|
3679
3695
|
}
|
|
3680
3696
|
released = true;
|
|
3681
3697
|
signal?.removeEventListener("abort", onAbort);
|
|
3698
|
+
if (!completed) {
|
|
3699
|
+
completed = true;
|
|
3700
|
+
onComplete?.();
|
|
3701
|
+
}
|
|
3682
3702
|
reader.releaseLock();
|
|
3683
3703
|
};
|
|
3684
3704
|
const observeChunk = (chunkBytes) => {
|
|
@@ -3788,6 +3808,9 @@ function considerSseLine(line, consider) {
|
|
|
3788
3808
|
if (!data || data === "[DONE]") {
|
|
3789
3809
|
return;
|
|
3790
3810
|
}
|
|
3811
|
+
if (!data.includes('"usage"')) {
|
|
3812
|
+
return;
|
|
3813
|
+
}
|
|
3791
3814
|
const parsed = safeJsonParse(data);
|
|
3792
3815
|
if (parsed !== void 0) {
|
|
3793
3816
|
consider(parsed);
|
|
@@ -3917,17 +3940,28 @@ function createHoopilotHandler(options = {}) {
|
|
|
3917
3940
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
3918
3941
|
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
3919
3942
|
const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
|
|
3943
|
+
const usageAccountingMode = resolveUsageAccountingMode(options);
|
|
3944
|
+
const accessLog = resolveAccessLog(options);
|
|
3945
|
+
const responseUsage = /* @__PURE__ */ new WeakMap();
|
|
3946
|
+
const markUsage = (response, fallbackModel, cost) => {
|
|
3947
|
+
if (shouldExtractUsage(usageAccountingMode, cost)) {
|
|
3948
|
+
responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
|
|
3949
|
+
}
|
|
3950
|
+
return response;
|
|
3951
|
+
};
|
|
3920
3952
|
const requestContext = /* @__PURE__ */ new WeakMap();
|
|
3921
3953
|
const app = buildApp({
|
|
3922
3954
|
apiKey,
|
|
3923
3955
|
allowedOrigins,
|
|
3924
3956
|
bufferProxyBodies,
|
|
3925
3957
|
client,
|
|
3958
|
+
markUsage,
|
|
3926
3959
|
metrics,
|
|
3927
3960
|
readUsage,
|
|
3928
3961
|
recordExtraction,
|
|
3929
3962
|
recordTokens,
|
|
3930
|
-
requestContext
|
|
3963
|
+
requestContext,
|
|
3964
|
+
usageAccountingMode
|
|
3931
3965
|
});
|
|
3932
3966
|
return async (request) => {
|
|
3933
3967
|
const startedAt = performance.now();
|
|
@@ -3963,11 +3997,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
3963
3997
|
}
|
|
3964
3998
|
return finishResponse(response, {
|
|
3965
3999
|
corsOrigin,
|
|
4000
|
+
accessLog,
|
|
3966
4001
|
logger: requestLogger,
|
|
3967
4002
|
method: request.method,
|
|
3968
4003
|
metrics,
|
|
3969
4004
|
requestId,
|
|
4005
|
+
signal: request.signal,
|
|
3970
4006
|
route,
|
|
4007
|
+
usageObservation: responseUsage.get(response),
|
|
3971
4008
|
startedAt,
|
|
3972
4009
|
closeConnection: bufferProxyBodies,
|
|
3973
4010
|
trackStreamingBody: !bufferProxyBodies
|
|
@@ -3980,11 +4017,13 @@ function buildApp(deps) {
|
|
|
3980
4017
|
allowedOrigins,
|
|
3981
4018
|
bufferProxyBodies,
|
|
3982
4019
|
client,
|
|
4020
|
+
markUsage,
|
|
3983
4021
|
metrics,
|
|
3984
4022
|
readUsage,
|
|
3985
4023
|
recordExtraction,
|
|
3986
4024
|
recordTokens,
|
|
3987
|
-
requestContext
|
|
4025
|
+
requestContext,
|
|
4026
|
+
usageAccountingMode
|
|
3988
4027
|
} = deps;
|
|
3989
4028
|
const contextFor = (request) => {
|
|
3990
4029
|
const stored = requestContext.get(request);
|
|
@@ -4072,11 +4111,13 @@ function buildApp(deps) {
|
|
|
4072
4111
|
({ request }) => handleAnthropicMessages(
|
|
4073
4112
|
client,
|
|
4074
4113
|
metrics,
|
|
4114
|
+
markUsage,
|
|
4075
4115
|
recordTokens,
|
|
4076
4116
|
recordExtraction,
|
|
4077
4117
|
request,
|
|
4078
4118
|
loggerFor(request),
|
|
4079
|
-
bufferProxyBodies
|
|
4119
|
+
bufferProxyBodies,
|
|
4120
|
+
usageAccountingMode
|
|
4080
4121
|
),
|
|
4081
4122
|
noBody
|
|
4082
4123
|
).post(
|
|
@@ -4088,11 +4129,13 @@ function buildApp(deps) {
|
|
|
4088
4129
|
({ request }) => handleChatCompletions(
|
|
4089
4130
|
client,
|
|
4090
4131
|
metrics,
|
|
4132
|
+
markUsage,
|
|
4091
4133
|
recordTokens,
|
|
4092
4134
|
recordExtraction,
|
|
4093
4135
|
request,
|
|
4094
4136
|
loggerFor(request),
|
|
4095
|
-
bufferProxyBodies
|
|
4137
|
+
bufferProxyBodies,
|
|
4138
|
+
usageAccountingMode
|
|
4096
4139
|
),
|
|
4097
4140
|
noBody
|
|
4098
4141
|
).post(
|
|
@@ -4100,11 +4143,13 @@ function buildApp(deps) {
|
|
|
4100
4143
|
({ request }) => handleCompletions(
|
|
4101
4144
|
client,
|
|
4102
4145
|
metrics,
|
|
4146
|
+
markUsage,
|
|
4103
4147
|
recordTokens,
|
|
4104
4148
|
recordExtraction,
|
|
4105
4149
|
request,
|
|
4106
4150
|
loggerFor(request),
|
|
4107
|
-
bufferProxyBodies
|
|
4151
|
+
bufferProxyBodies,
|
|
4152
|
+
usageAccountingMode
|
|
4108
4153
|
),
|
|
4109
4154
|
noBody
|
|
4110
4155
|
).post(
|
|
@@ -4115,7 +4160,8 @@ function buildApp(deps) {
|
|
|
4115
4160
|
recordTokens,
|
|
4116
4161
|
recordExtraction,
|
|
4117
4162
|
request,
|
|
4118
|
-
loggerFor(request)
|
|
4163
|
+
loggerFor(request),
|
|
4164
|
+
usageAccountingMode
|
|
4119
4165
|
),
|
|
4120
4166
|
noBody
|
|
4121
4167
|
).post(
|
|
@@ -4123,11 +4169,13 @@ function buildApp(deps) {
|
|
|
4123
4169
|
({ request }) => handleResponses(
|
|
4124
4170
|
client,
|
|
4125
4171
|
metrics,
|
|
4172
|
+
markUsage,
|
|
4126
4173
|
recordTokens,
|
|
4127
4174
|
recordExtraction,
|
|
4128
4175
|
request,
|
|
4129
4176
|
loggerFor(request),
|
|
4130
|
-
bufferProxyBodies
|
|
4177
|
+
bufferProxyBodies,
|
|
4178
|
+
usageAccountingMode
|
|
4131
4179
|
),
|
|
4132
4180
|
noBody
|
|
4133
4181
|
);
|
|
@@ -4180,7 +4228,7 @@ function startHoopilotServer(options = {}) {
|
|
|
4180
4228
|
url: `http://${urlHost(host)}:${server.port}`
|
|
4181
4229
|
};
|
|
4182
4230
|
}
|
|
4183
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4231
|
+
async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4184
4232
|
const anthropicRequest = await readJson(request);
|
|
4185
4233
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
4186
4234
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -4193,36 +4241,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
|
|
|
4193
4241
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
4194
4242
|
if (bufferProxyBodies) {
|
|
4195
4243
|
const text = await upstream.text();
|
|
4196
|
-
|
|
4244
|
+
recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
|
|
4197
4245
|
return proxyResponse(
|
|
4198
4246
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
4199
4247
|
);
|
|
4200
4248
|
}
|
|
4201
|
-
|
|
4202
|
-
|
|
4249
|
+
return markUsage(
|
|
4250
|
+
proxyResponse(
|
|
4251
|
+
new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
|
|
4252
|
+
headers: upstream.headers,
|
|
4253
|
+
status: upstream.status,
|
|
4254
|
+
statusText: upstream.statusText
|
|
4255
|
+
})
|
|
4256
|
+
),
|
|
4203
4257
|
model,
|
|
4204
|
-
|
|
4205
|
-
request.signal,
|
|
4206
|
-
recordExtraction
|
|
4207
|
-
);
|
|
4208
|
-
if (!observed.body) {
|
|
4209
|
-
return proxyResponse(observed);
|
|
4210
|
-
}
|
|
4211
|
-
return proxyResponse(
|
|
4212
|
-
new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
|
|
4213
|
-
headers: observed.headers,
|
|
4214
|
-
status: observed.status,
|
|
4215
|
-
statusText: observed.statusText
|
|
4216
|
-
})
|
|
4258
|
+
"body"
|
|
4217
4259
|
);
|
|
4218
4260
|
}
|
|
4219
4261
|
const body = asRecord(await upstream.json());
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
|
|
4223
|
-
|
|
4224
|
-
|
|
4225
|
-
|
|
4262
|
+
recordParsedUsage(
|
|
4263
|
+
body.usage,
|
|
4264
|
+
typeof body.model === "string" ? body.model.trim() : model,
|
|
4265
|
+
model,
|
|
4266
|
+
usageAccountingMode,
|
|
4267
|
+
recordTokens,
|
|
4268
|
+
recordExtraction
|
|
4269
|
+
);
|
|
4226
4270
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
4227
4271
|
}
|
|
4228
4272
|
async function handleAnthropicCountTokens(request) {
|
|
@@ -4249,7 +4293,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
4249
4293
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
4250
4294
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
4251
4295
|
}
|
|
4252
|
-
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4296
|
+
async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4253
4297
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
4254
4298
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
4255
4299
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -4258,18 +4302,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
|
|
|
4258
4302
|
}
|
|
4259
4303
|
logUpstreamSuccess(logger, "/chat/completions", upstream.status);
|
|
4260
4304
|
const model = normalizeRequestedModel(chatRequest.model);
|
|
4261
|
-
return
|
|
4262
|
-
|
|
4263
|
-
|
|
4264
|
-
|
|
4265
|
-
|
|
4266
|
-
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
)
|
|
4305
|
+
return proxiedResponseWithOptionalUsage(
|
|
4306
|
+
upstream,
|
|
4307
|
+
model,
|
|
4308
|
+
markUsage,
|
|
4309
|
+
usageAccountingMode,
|
|
4310
|
+
recordTokens,
|
|
4311
|
+
recordExtraction,
|
|
4312
|
+
bufferProxyBodies
|
|
4270
4313
|
);
|
|
4271
4314
|
}
|
|
4272
|
-
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4315
|
+
async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4273
4316
|
const body = await readJson(request);
|
|
4274
4317
|
const upstream = await client.chatCompletions(
|
|
4275
4318
|
completionsRequestToChatCompletion(body),
|
|
@@ -4284,34 +4327,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
|
|
|
4284
4327
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
4285
4328
|
if (bufferProxyBodies) {
|
|
4286
4329
|
const upstreamText = await upstream.text();
|
|
4287
|
-
|
|
4330
|
+
recordBufferedUsage(
|
|
4331
|
+
upstreamText,
|
|
4332
|
+
true,
|
|
4333
|
+
model,
|
|
4334
|
+
usageAccountingMode,
|
|
4335
|
+
recordTokens,
|
|
4336
|
+
recordExtraction
|
|
4337
|
+
);
|
|
4288
4338
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
4289
4339
|
return proxyResponse(responseFromText(upstream, text));
|
|
4290
4340
|
}
|
|
4291
|
-
return
|
|
4292
|
-
|
|
4341
|
+
return markUsage(
|
|
4342
|
+
proxyResponse(
|
|
4293
4343
|
new Response(completionStreamFromChatStream(upstream.body), {
|
|
4294
4344
|
headers: upstream.headers,
|
|
4295
4345
|
status: upstream.status,
|
|
4296
4346
|
statusText: upstream.statusText
|
|
4297
|
-
})
|
|
4298
|
-
|
|
4299
|
-
|
|
4300
|
-
|
|
4301
|
-
recordExtraction
|
|
4302
|
-
)
|
|
4347
|
+
})
|
|
4348
|
+
),
|
|
4349
|
+
model,
|
|
4350
|
+
"body"
|
|
4303
4351
|
);
|
|
4304
4352
|
}
|
|
4305
4353
|
const completion = asRecord(await upstream.json());
|
|
4306
|
-
|
|
4307
|
-
|
|
4308
|
-
|
|
4309
|
-
|
|
4310
|
-
|
|
4311
|
-
|
|
4354
|
+
recordParsedUsage(
|
|
4355
|
+
completion.usage,
|
|
4356
|
+
typeof completion.model === "string" ? completion.model.trim() : model,
|
|
4357
|
+
model,
|
|
4358
|
+
usageAccountingMode,
|
|
4359
|
+
recordTokens,
|
|
4360
|
+
recordExtraction
|
|
4361
|
+
);
|
|
4312
4362
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
4313
4363
|
}
|
|
4314
|
-
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4364
|
+
async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4315
4365
|
const { json, text: body } = await readJsonText(request);
|
|
4316
4366
|
if (isResponsesCompactionRequest(json)) {
|
|
4317
4367
|
return handleResponsesCompactionV2(
|
|
@@ -4321,7 +4371,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
|
|
|
4321
4371
|
recordExtraction,
|
|
4322
4372
|
json,
|
|
4323
4373
|
request,
|
|
4324
|
-
logger
|
|
4374
|
+
logger,
|
|
4375
|
+
usageAccountingMode
|
|
4325
4376
|
);
|
|
4326
4377
|
}
|
|
4327
4378
|
const upstream = await client.responses(
|
|
@@ -4334,18 +4385,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
|
|
|
4334
4385
|
}
|
|
4335
4386
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
4336
4387
|
const model = normalizeRequestedModel(json.model);
|
|
4337
|
-
return
|
|
4338
|
-
|
|
4339
|
-
|
|
4340
|
-
|
|
4341
|
-
|
|
4342
|
-
|
|
4343
|
-
|
|
4344
|
-
|
|
4345
|
-
)
|
|
4388
|
+
return proxiedResponseWithOptionalUsage(
|
|
4389
|
+
upstream,
|
|
4390
|
+
model,
|
|
4391
|
+
markUsage,
|
|
4392
|
+
usageAccountingMode,
|
|
4393
|
+
recordTokens,
|
|
4394
|
+
recordExtraction,
|
|
4395
|
+
bufferProxyBodies
|
|
4346
4396
|
);
|
|
4347
4397
|
}
|
|
4348
|
-
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
4398
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
|
|
4349
4399
|
const body = await readJson(request);
|
|
4350
4400
|
const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
|
|
4351
4401
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -4355,16 +4405,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
|
|
|
4355
4405
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
4356
4406
|
const isSse = isStreamingResponse(upstream);
|
|
4357
4407
|
const text = await upstream.text();
|
|
4358
|
-
|
|
4408
|
+
recordBufferedUsage(
|
|
4359
4409
|
text,
|
|
4360
4410
|
isSse,
|
|
4361
4411
|
normalizeRequestedModel(body.model),
|
|
4412
|
+
usageAccountingMode,
|
|
4362
4413
|
recordTokens,
|
|
4363
4414
|
recordExtraction
|
|
4364
4415
|
);
|
|
4365
4416
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
4366
4417
|
}
|
|
4367
|
-
async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
|
|
4418
|
+
async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
|
|
4368
4419
|
const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
|
|
4369
4420
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
4370
4421
|
if (!upstream.ok) {
|
|
@@ -4374,20 +4425,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
|
|
|
4374
4425
|
const isSse = isStreamingResponse(upstream);
|
|
4375
4426
|
const text = await upstream.text();
|
|
4376
4427
|
const model = normalizeRequestedModel(json.model);
|
|
4377
|
-
|
|
4428
|
+
recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
|
|
4378
4429
|
if (json.stream === true) {
|
|
4379
4430
|
return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
|
|
4380
4431
|
}
|
|
4381
4432
|
return jsonResponse(responsesCompactionResponse(text, isSse, model));
|
|
4382
4433
|
}
|
|
4383
|
-
async function
|
|
4434
|
+
async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
|
|
4384
4435
|
const isSse = isStreamingResponse(response);
|
|
4385
|
-
if (
|
|
4436
|
+
if (bufferProxyBodies && response.body) {
|
|
4386
4437
|
const text = await response.text();
|
|
4387
|
-
|
|
4388
|
-
|
|
4438
|
+
recordBufferedUsage(
|
|
4439
|
+
text,
|
|
4440
|
+
isSse,
|
|
4441
|
+
fallbackModel,
|
|
4442
|
+
usageAccountingMode,
|
|
4443
|
+
recordTokens,
|
|
4444
|
+
recordExtraction
|
|
4445
|
+
);
|
|
4446
|
+
return proxyResponse(responseFromText(response, text));
|
|
4447
|
+
}
|
|
4448
|
+
return markUsage(proxyResponse(response), fallbackModel, "body");
|
|
4449
|
+
}
|
|
4450
|
+
function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
|
|
4451
|
+
if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
|
|
4452
|
+
return;
|
|
4453
|
+
}
|
|
4454
|
+
const usage = extractTokenUsage(rawUsage);
|
|
4455
|
+
if (usage) {
|
|
4456
|
+
recordTokens(responseModel || fallbackModel, usage);
|
|
4389
4457
|
}
|
|
4390
|
-
|
|
4458
|
+
recordExtraction(usage !== void 0);
|
|
4459
|
+
}
|
|
4460
|
+
function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
|
|
4461
|
+
if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
|
|
4462
|
+
return;
|
|
4463
|
+
}
|
|
4464
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
4391
4465
|
}
|
|
4392
4466
|
async function proxyError(upstream, logger) {
|
|
4393
4467
|
const text = await upstream.text();
|
|
@@ -4443,7 +4517,24 @@ function shouldBufferProxyBodies(mode) {
|
|
|
4443
4517
|
}
|
|
4444
4518
|
return process.platform === "win32" && IS_STANDALONE_BINARY;
|
|
4445
4519
|
}
|
|
4520
|
+
function resolveUsageAccountingMode(options) {
|
|
4521
|
+
const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
|
|
4522
|
+
return parseUsageAccountingMode(value);
|
|
4523
|
+
}
|
|
4524
|
+
function resolveAccessLog(options) {
|
|
4525
|
+
return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
|
|
4526
|
+
}
|
|
4527
|
+
function shouldExtractUsage(mode, cost) {
|
|
4528
|
+
if (mode === "off") {
|
|
4529
|
+
return false;
|
|
4530
|
+
}
|
|
4531
|
+
if (mode === "basic") {
|
|
4532
|
+
return cost === "parsed";
|
|
4533
|
+
}
|
|
4534
|
+
return true;
|
|
4535
|
+
}
|
|
4446
4536
|
function finishResponse(response, options) {
|
|
4537
|
+
const usageObservation = options.usageObservation;
|
|
4447
4538
|
const withRequestId = responseWithRequestId(
|
|
4448
4539
|
response,
|
|
4449
4540
|
options.requestId,
|
|
@@ -4452,11 +4543,36 @@ function finishResponse(response, options) {
|
|
|
4452
4543
|
);
|
|
4453
4544
|
const stream = isStreamingResponse(withRequestId);
|
|
4454
4545
|
const status = withRequestId.status;
|
|
4546
|
+
let completed = false;
|
|
4455
4547
|
const complete = () => {
|
|
4548
|
+
if (completed) {
|
|
4549
|
+
return;
|
|
4550
|
+
}
|
|
4551
|
+
completed = true;
|
|
4456
4552
|
const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
|
|
4457
4553
|
options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
|
|
4458
|
-
logRequestCompleted(options.logger, status, stream, durationMs);
|
|
4554
|
+
logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
|
|
4459
4555
|
};
|
|
4556
|
+
if (withRequestId.body && usageObservation) {
|
|
4557
|
+
const shouldTrackCompletion = stream && options.trackStreamingBody;
|
|
4558
|
+
const observedBody = streamWithUsageObservation(
|
|
4559
|
+
withRequestId.body,
|
|
4560
|
+
stream,
|
|
4561
|
+
usageObservation.fallbackModel,
|
|
4562
|
+
usageObservation.recordTokens,
|
|
4563
|
+
options.signal,
|
|
4564
|
+
usageObservation.recordExtraction,
|
|
4565
|
+
shouldTrackCompletion ? complete : void 0
|
|
4566
|
+
);
|
|
4567
|
+
if (!shouldTrackCompletion) {
|
|
4568
|
+
complete();
|
|
4569
|
+
}
|
|
4570
|
+
return new Response(observedBody, {
|
|
4571
|
+
headers: withRequestId.headers,
|
|
4572
|
+
status,
|
|
4573
|
+
statusText: withRequestId.statusText
|
|
4574
|
+
});
|
|
4575
|
+
}
|
|
4460
4576
|
if (stream && withRequestId.body && options.trackStreamingBody) {
|
|
4461
4577
|
return new Response(trackStreamCompletion(withRequestId.body, complete), {
|
|
4462
4578
|
headers: withRequestId.headers,
|
|
@@ -4526,7 +4642,7 @@ function trackStreamCompletion(body, onComplete) {
|
|
|
4526
4642
|
}
|
|
4527
4643
|
});
|
|
4528
4644
|
}
|
|
4529
|
-
function logRequestCompleted(logger, status, stream, durationMs) {
|
|
4645
|
+
function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
|
|
4530
4646
|
const fields = {
|
|
4531
4647
|
durationMs,
|
|
4532
4648
|
event: "http.request.completed",
|
|
@@ -4541,6 +4657,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
|
|
|
4541
4657
|
logger.warn(fields, "request completed with client error");
|
|
4542
4658
|
return;
|
|
4543
4659
|
}
|
|
4660
|
+
if (!accessLog) {
|
|
4661
|
+
return;
|
|
4662
|
+
}
|
|
4544
4663
|
logger.info(fields, "request completed");
|
|
4545
4664
|
}
|
|
4546
4665
|
function requestIdFor(request) {
|
|
@@ -4585,11 +4704,17 @@ var API_ROUTES = [
|
|
|
4585
4704
|
{ method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
|
|
4586
4705
|
{ method: "POST", path: "/v1/responses", name: "responses" }
|
|
4587
4706
|
];
|
|
4707
|
+
var ROUTE_NAMES = new Map(
|
|
4708
|
+
API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
|
|
4709
|
+
);
|
|
4588
4710
|
function routeFor(method, path) {
|
|
4589
4711
|
if (method === "OPTIONS") {
|
|
4590
4712
|
return "cors.preflight";
|
|
4591
4713
|
}
|
|
4592
|
-
return
|
|
4714
|
+
return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
|
|
4715
|
+
}
|
|
4716
|
+
function routeKey(method, path) {
|
|
4717
|
+
return `${method} ${path}`;
|
|
4593
4718
|
}
|
|
4594
4719
|
function isStreamingResponse(response) {
|
|
4595
4720
|
return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
@@ -4647,12 +4772,24 @@ async function handleUsage(metrics, readUsage, request) {
|
|
|
4647
4772
|
function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
|
|
4648
4773
|
const usagePath = "/copilot_internal/user";
|
|
4649
4774
|
let cache;
|
|
4650
|
-
|
|
4775
|
+
let inFlight;
|
|
4776
|
+
return async () => {
|
|
4651
4777
|
if (cache && now() - cache.atMs < ttlMs) {
|
|
4652
4778
|
return cache.result;
|
|
4653
4779
|
}
|
|
4780
|
+
if (inFlight) {
|
|
4781
|
+
return inFlight;
|
|
4782
|
+
}
|
|
4783
|
+
inFlight = readFreshUsage();
|
|
4784
|
+
try {
|
|
4785
|
+
return await inFlight;
|
|
4786
|
+
} finally {
|
|
4787
|
+
inFlight = void 0;
|
|
4788
|
+
}
|
|
4789
|
+
};
|
|
4790
|
+
async function readFreshUsage() {
|
|
4654
4791
|
try {
|
|
4655
|
-
const upstream = await client.usage(
|
|
4792
|
+
const upstream = await client.usage();
|
|
4656
4793
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
4657
4794
|
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
4658
4795
|
if (!upstream.ok) {
|
|
@@ -4674,7 +4811,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
4674
4811
|
cache = { atMs: now(), result };
|
|
4675
4812
|
return result;
|
|
4676
4813
|
}
|
|
4677
|
-
}
|
|
4814
|
+
}
|
|
4678
4815
|
}
|
|
4679
4816
|
|
|
4680
4817
|
// src/update.ts
|
|
@@ -5346,6 +5483,14 @@ function parseArgs(argv) {
|
|
|
5346
5483
|
args.allowUnauthenticated = true;
|
|
5347
5484
|
continue;
|
|
5348
5485
|
}
|
|
5486
|
+
if (arg === "--access-log") {
|
|
5487
|
+
args.accessLog = true;
|
|
5488
|
+
continue;
|
|
5489
|
+
}
|
|
5490
|
+
if (arg === "--no-access-log") {
|
|
5491
|
+
args.accessLog = false;
|
|
5492
|
+
continue;
|
|
5493
|
+
}
|
|
5349
5494
|
if (arg === "--no-update-check") {
|
|
5350
5495
|
args.noUpdateCheck = true;
|
|
5351
5496
|
continue;
|
|
@@ -5380,6 +5525,9 @@ function parseArgs(argv) {
|
|
|
5380
5525
|
case "--stream-mode":
|
|
5381
5526
|
args.streamingProxyMode = parseStreamingProxyMode(optionValue(name, inlineValue, rest));
|
|
5382
5527
|
break;
|
|
5528
|
+
case "--usage-accounting":
|
|
5529
|
+
args.usageAccountingMode = parseUsageAccountingMode(optionValue(name, inlineValue, rest));
|
|
5530
|
+
break;
|
|
5383
5531
|
case "--host":
|
|
5384
5532
|
args.host = optionValue(name, inlineValue, rest);
|
|
5385
5533
|
break;
|
|
@@ -5684,6 +5832,9 @@ Options:
|
|
|
5684
5832
|
--log-level <level> trace, debug, info, warn, error, fatal, or silent
|
|
5685
5833
|
--log-format <format> json or pretty. Default: pretty
|
|
5686
5834
|
--stream-mode <mode> auto, live, or buffer. Auto buffers Windows standalone streams.
|
|
5835
|
+
--usage-accounting <mode> basic, full, or off. Default: basic
|
|
5836
|
+
--access-log Log successful requests
|
|
5837
|
+
--no-access-log Do not log successful requests. Default
|
|
5687
5838
|
--no-update-check Do not check GitHub for a newer release
|
|
5688
5839
|
--allow-unauthenticated Allow non-loopback bind without --api-key
|
|
5689
5840
|
-h, --help Show help
|
|
@@ -5697,6 +5848,8 @@ Environment:
|
|
|
5697
5848
|
HOOPILOT_LOG_FORMAT json or pretty. Default: pretty
|
|
5698
5849
|
HOOPILOT_LOG_LEVEL trace, debug, info, warn, error, fatal, or silent
|
|
5699
5850
|
HOOPILOT_STREAM_MODE auto, live, or buffer
|
|
5851
|
+
HOOPILOT_USAGE_ACCOUNTING basic, full, or off
|
|
5852
|
+
HOOPILOT_ACCESS_LOG 1/0, true/false, yes/no, or on/off
|
|
5700
5853
|
COPILOT_API_BASE_URL
|
|
5701
5854
|
HOOPILOT_GITHUB_API_BASE_URL GitHub REST base for the usage/quota lookup. Default: https://api.github.com
|
|
5702
5855
|
HOOPILOT_ALLOW_UNSAFE_UPSTREAM Set to 1 to allow nonstandard HTTPS token hosts
|