@openhoo/hoopilot 2.1.8 → 2.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -3
- package/dist/{chunk-2GLKVNAA.js → chunk-FH6WSFOC.js} +29 -1
- package/dist/chunk-FH6WSFOC.js.map +1 -0
- package/dist/cli.js +254 -101
- package/dist/cli.js.map +1 -1
- package/dist/codexx.js +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.js +261 -100
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-2GLKVNAA.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -89,12 +89,38 @@ var STREAMING_PROXY_MODES = [
|
|
|
89
89
|
"buffer",
|
|
90
90
|
"live"
|
|
91
91
|
];
|
|
92
|
+
var USAGE_ACCOUNTING_MODES = [
|
|
93
|
+
"basic",
|
|
94
|
+
"full",
|
|
95
|
+
"off"
|
|
96
|
+
];
|
|
92
97
|
function parseStreamingProxyMode(value) {
|
|
93
98
|
if (STREAMING_PROXY_MODES.includes(value)) {
|
|
94
99
|
return value;
|
|
95
100
|
}
|
|
96
101
|
throw new Error(`Invalid stream mode: ${value}. Expected ${STREAMING_PROXY_MODES.join(", ")}.`);
|
|
97
102
|
}
|
|
103
|
+
function parseUsageAccountingMode(value) {
|
|
104
|
+
if (USAGE_ACCOUNTING_MODES.includes(value)) {
|
|
105
|
+
return value;
|
|
106
|
+
}
|
|
107
|
+
throw new Error(
|
|
108
|
+
`Invalid usage accounting mode: ${value}. Expected ${USAGE_ACCOUNTING_MODES.join(", ")}.`
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
function parseBooleanEnv(value, name) {
|
|
112
|
+
const raw = envValue(value)?.toLowerCase();
|
|
113
|
+
if (raw === void 0) {
|
|
114
|
+
return void 0;
|
|
115
|
+
}
|
|
116
|
+
if (raw === "1" || raw === "true" || raw === "yes" || raw === "on") {
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
if (raw === "0" || raw === "false" || raw === "no" || raw === "off") {
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
throw new Error(`${name} must be one of: 1, 0, true, false, yes, no, on, off.`);
|
|
123
|
+
}
|
|
98
124
|
|
|
99
125
|
// src/auth-store.ts
|
|
100
126
|
var StoredCopilotAuthError = class extends Error {
|
|
@@ -1506,6 +1532,8 @@ function epochSeconds() {
|
|
|
1506
1532
|
var PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8";
|
|
1507
1533
|
var DURATION_BUCKETS_SECONDS = [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60];
|
|
1508
1534
|
var USAGE_BUFFER_LIMIT_BYTES = 16 * 1024 * 1024;
|
|
1535
|
+
var PROMETHEUS_CACHE_TTL_MS = 1e3;
|
|
1536
|
+
var PROMETHEUS_CACHE_NEUTRAL_ROUTES = /* @__PURE__ */ new Set(["metrics"]);
|
|
1509
1537
|
var MAX_TRACKED_MODELS = 200;
|
|
1510
1538
|
var MAX_MODEL_LABEL_LENGTH = 200;
|
|
1511
1539
|
var MAX_TRACKED_RATELIMIT_RESOURCES = 32;
|
|
@@ -1514,6 +1542,9 @@ var UNKNOWN_MODEL = "unknown";
|
|
|
1514
1542
|
function emptyModelTotals() {
|
|
1515
1543
|
return { cached: 0, completion: 0, prompt: 0, reasoning: 0, requests: 0, total: 0 };
|
|
1516
1544
|
}
|
|
1545
|
+
function isPrometheusCacheNeutralRoute(route) {
|
|
1546
|
+
return route !== void 0 && PROMETHEUS_CACHE_NEUTRAL_ROUTES.has(route);
|
|
1547
|
+
}
|
|
1517
1548
|
var MetricsRegistry = class {
|
|
1518
1549
|
#startedAtMs;
|
|
1519
1550
|
#inFlight = 0;
|
|
@@ -1525,11 +1556,16 @@ var MetricsRegistry = class {
|
|
|
1525
1556
|
#copilotQuota;
|
|
1526
1557
|
#githubRateLimit = /* @__PURE__ */ new Map();
|
|
1527
1558
|
#extraction = { extracted: 0, missing: 0 };
|
|
1559
|
+
#generation = 0;
|
|
1560
|
+
#prometheusCache;
|
|
1528
1561
|
constructor(options = {}) {
|
|
1529
1562
|
this.#startedAtMs = (options.now ?? Date.now)();
|
|
1530
1563
|
}
|
|
1531
1564
|
/** Mark a request as started; pair with exactly one {@link observe}. */
|
|
1532
1565
|
startRequest(route) {
|
|
1566
|
+
if (!isPrometheusCacheNeutralRoute(route)) {
|
|
1567
|
+
this.#changed();
|
|
1568
|
+
}
|
|
1533
1569
|
this.#inFlight += 1;
|
|
1534
1570
|
if (route) {
|
|
1535
1571
|
this.#inFlightByRoute.set(route, (this.#inFlightByRoute.get(route) ?? 0) + 1);
|
|
@@ -1537,6 +1573,9 @@ var MetricsRegistry = class {
|
|
|
1537
1573
|
}
|
|
1538
1574
|
/** Record a completed request and clear its in-flight slot. */
|
|
1539
1575
|
observe(observation) {
|
|
1576
|
+
if (!isPrometheusCacheNeutralRoute(observation.route)) {
|
|
1577
|
+
this.#changed();
|
|
1578
|
+
}
|
|
1540
1579
|
if (this.#inFlight > 0) {
|
|
1541
1580
|
this.#inFlight -= 1;
|
|
1542
1581
|
}
|
|
@@ -1557,6 +1596,7 @@ var MetricsRegistry = class {
|
|
|
1557
1596
|
* rising miss rate flags clients whose token usage is going unaccounted.
|
|
1558
1597
|
*/
|
|
1559
1598
|
recordTokenExtraction(extracted) {
|
|
1599
|
+
this.#changed();
|
|
1560
1600
|
if (extracted) {
|
|
1561
1601
|
this.#extraction.extracted += 1;
|
|
1562
1602
|
} else {
|
|
@@ -1565,6 +1605,7 @@ var MetricsRegistry = class {
|
|
|
1565
1605
|
}
|
|
1566
1606
|
/** Accumulate token counts for a model from one upstream completion. */
|
|
1567
1607
|
recordTokens(model, usage) {
|
|
1608
|
+
this.#changed();
|
|
1568
1609
|
const name = this.#modelLabel(model);
|
|
1569
1610
|
const totals = this.#tokens.get(name) ?? emptyModelTotals();
|
|
1570
1611
|
totals.requests += 1;
|
|
@@ -1577,11 +1618,13 @@ var MetricsRegistry = class {
|
|
|
1577
1618
|
}
|
|
1578
1619
|
/** Record one upstream Copilot call and whether it succeeded. */
|
|
1579
1620
|
recordUpstream(path, ok) {
|
|
1621
|
+
this.#changed();
|
|
1580
1622
|
const key = labelKey(path, ok ? "ok" : "error");
|
|
1581
1623
|
this.#upstream.set(key, (this.#upstream.get(key) ?? 0) + 1);
|
|
1582
1624
|
}
|
|
1583
1625
|
/** Store the latest Copilot quota so /metrics can expose it as gauges. */
|
|
1584
1626
|
recordCopilotQuota(usage) {
|
|
1627
|
+
this.#changed();
|
|
1585
1628
|
this.#copilotQuota = usage;
|
|
1586
1629
|
}
|
|
1587
1630
|
/**
|
|
@@ -1593,6 +1636,7 @@ var MetricsRegistry = class {
|
|
|
1593
1636
|
if (!rateLimit) {
|
|
1594
1637
|
return;
|
|
1595
1638
|
}
|
|
1639
|
+
this.#changed();
|
|
1596
1640
|
const resource = this.#rateLimitResource(rateLimit.resource);
|
|
1597
1641
|
this.#githubRateLimit.set(resource, { ...rateLimit, resource });
|
|
1598
1642
|
}
|
|
@@ -1629,6 +1673,9 @@ var MetricsRegistry = class {
|
|
|
1629
1673
|
}
|
|
1630
1674
|
this.#durations.set(route, entry);
|
|
1631
1675
|
}
|
|
1676
|
+
#changed() {
|
|
1677
|
+
this.#generation += 1;
|
|
1678
|
+
}
|
|
1632
1679
|
/** A JSON-friendly view of the current counters. */
|
|
1633
1680
|
snapshot(nowOrOptions = Date.now) {
|
|
1634
1681
|
const options = typeof nowOrOptions === "function" ? { now: nowOrOptions } : nowOrOptions;
|
|
@@ -1732,13 +1779,18 @@ var MetricsRegistry = class {
|
|
|
1732
1779
|
}
|
|
1733
1780
|
/** Render the Prometheus text exposition format (version 0.0.4). */
|
|
1734
1781
|
renderPrometheus(now = Date.now) {
|
|
1782
|
+
const nowMs = now();
|
|
1783
|
+
const cached = this.#prometheusCache;
|
|
1784
|
+
if (cached && cached.generation === this.#generation && nowMs - cached.renderedAtMs < PROMETHEUS_CACHE_TTL_MS) {
|
|
1785
|
+
return cached.text;
|
|
1786
|
+
}
|
|
1735
1787
|
const lines = [];
|
|
1736
1788
|
lines.push("# HELP hoopilot_process_start_time_seconds Unix epoch when the proxy started.");
|
|
1737
1789
|
lines.push("# TYPE hoopilot_process_start_time_seconds gauge");
|
|
1738
1790
|
lines.push(`hoopilot_process_start_time_seconds ${this.#startedAtMs / 1e3}`);
|
|
1739
1791
|
lines.push("# HELP hoopilot_uptime_seconds Seconds since the proxy started.");
|
|
1740
1792
|
lines.push("# TYPE hoopilot_uptime_seconds gauge");
|
|
1741
|
-
lines.push(`hoopilot_uptime_seconds ${Math.max(0, (
|
|
1793
|
+
lines.push(`hoopilot_uptime_seconds ${Math.max(0, (nowMs - this.#startedAtMs) / 1e3)}`);
|
|
1742
1794
|
lines.push("# HELP hoopilot_requests_in_flight Requests currently being served.");
|
|
1743
1795
|
lines.push("# TYPE hoopilot_requests_in_flight gauge");
|
|
1744
1796
|
lines.push(`hoopilot_requests_in_flight ${this.#inFlight}`);
|
|
@@ -1804,8 +1856,10 @@ var MetricsRegistry = class {
|
|
|
1804
1856
|
}
|
|
1805
1857
|
this.#renderGithubRateLimit(lines);
|
|
1806
1858
|
this.#renderCopilotQuota(lines);
|
|
1807
|
-
|
|
1859
|
+
const text = `${lines.join("\n")}
|
|
1808
1860
|
`;
|
|
1861
|
+
this.#prometheusCache = { generation: this.#generation, renderedAtMs: nowMs, text };
|
|
1862
|
+
return text;
|
|
1809
1863
|
}
|
|
1810
1864
|
#renderGithubRateLimit(lines) {
|
|
1811
1865
|
const entries = [...this.#githubRateLimit.values()];
|
|
@@ -1940,21 +1994,6 @@ var MetricsRegistry = class {
|
|
|
1940
1994
|
}
|
|
1941
1995
|
}
|
|
1942
1996
|
};
|
|
1943
|
-
function observeResponseUsage(response, fallbackModel, onUsage, signal, onOutcome) {
|
|
1944
|
-
const body = response.body;
|
|
1945
|
-
if (!body) {
|
|
1946
|
-
return response;
|
|
1947
|
-
}
|
|
1948
|
-
const isSse = response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
1949
|
-
return new Response(
|
|
1950
|
-
streamWithUsageObservation(body, isSse, fallbackModel, onUsage, signal, onOutcome),
|
|
1951
|
-
{
|
|
1952
|
-
headers: response.headers,
|
|
1953
|
-
status: response.status,
|
|
1954
|
-
statusText: response.statusText
|
|
1955
|
-
}
|
|
1956
|
-
);
|
|
1957
|
-
}
|
|
1958
1997
|
function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome) {
|
|
1959
1998
|
const accumulator = createUsageAccumulator(fallbackModel, onUsage, onOutcome);
|
|
1960
1999
|
if (isSse) {
|
|
@@ -1969,9 +2008,10 @@ function recordResponseTextUsage(text, isSse, fallbackModel, onUsage, onOutcome)
|
|
|
1969
2008
|
}
|
|
1970
2009
|
accumulator.finish();
|
|
1971
2010
|
}
|
|
1972
|
-
function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome) {
|
|
2011
|
+
function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signal, onOutcome, onComplete) {
|
|
1973
2012
|
const reader = stream.getReader();
|
|
1974
2013
|
let aborted = signal?.aborted ?? false;
|
|
2014
|
+
let completed = false;
|
|
1975
2015
|
let released = false;
|
|
1976
2016
|
const onAbort = () => {
|
|
1977
2017
|
aborted = true;
|
|
@@ -2000,6 +2040,10 @@ function streamWithUsageObservation(stream, isSse, fallbackModel, onUsage, signa
|
|
|
2000
2040
|
}
|
|
2001
2041
|
released = true;
|
|
2002
2042
|
signal?.removeEventListener("abort", onAbort);
|
|
2043
|
+
if (!completed) {
|
|
2044
|
+
completed = true;
|
|
2045
|
+
onComplete?.();
|
|
2046
|
+
}
|
|
2003
2047
|
reader.releaseLock();
|
|
2004
2048
|
};
|
|
2005
2049
|
const observeChunk = (chunkBytes) => {
|
|
@@ -2109,6 +2153,9 @@ function considerSseLine(line, consider) {
|
|
|
2109
2153
|
if (!data || data === "[DONE]") {
|
|
2110
2154
|
return;
|
|
2111
2155
|
}
|
|
2156
|
+
if (!data.includes('"usage"')) {
|
|
2157
|
+
return;
|
|
2158
|
+
}
|
|
2112
2159
|
const parsed = safeJsonParse(data);
|
|
2113
2160
|
if (parsed !== void 0) {
|
|
2114
2161
|
consider(parsed);
|
|
@@ -3991,17 +4038,28 @@ function createHoopilotHandler(options = {}) {
|
|
|
3991
4038
|
const recordTokens = (model, usage) => metrics.recordTokens(model, usage);
|
|
3992
4039
|
const recordExtraction = (extracted) => metrics.recordTokenExtraction(extracted);
|
|
3993
4040
|
const bufferProxyBodies = shouldBufferProxyBodies(resolveStreamingProxyMode(options));
|
|
4041
|
+
const usageAccountingMode = resolveUsageAccountingMode(options);
|
|
4042
|
+
const accessLog = resolveAccessLog(options);
|
|
4043
|
+
const responseUsage = /* @__PURE__ */ new WeakMap();
|
|
4044
|
+
const markUsage = (response, fallbackModel, cost) => {
|
|
4045
|
+
if (shouldExtractUsage(usageAccountingMode, cost)) {
|
|
4046
|
+
responseUsage.set(response, { fallbackModel, recordExtraction, recordTokens });
|
|
4047
|
+
}
|
|
4048
|
+
return response;
|
|
4049
|
+
};
|
|
3994
4050
|
const requestContext = /* @__PURE__ */ new WeakMap();
|
|
3995
4051
|
const app = buildApp({
|
|
3996
4052
|
apiKey,
|
|
3997
4053
|
allowedOrigins,
|
|
3998
4054
|
bufferProxyBodies,
|
|
3999
4055
|
client,
|
|
4056
|
+
markUsage,
|
|
4000
4057
|
metrics,
|
|
4001
4058
|
readUsage,
|
|
4002
4059
|
recordExtraction,
|
|
4003
4060
|
recordTokens,
|
|
4004
|
-
requestContext
|
|
4061
|
+
requestContext,
|
|
4062
|
+
usageAccountingMode
|
|
4005
4063
|
});
|
|
4006
4064
|
return async (request) => {
|
|
4007
4065
|
const startedAt = performance.now();
|
|
@@ -4037,11 +4095,14 @@ function createHoopilotHandler(options = {}) {
|
|
|
4037
4095
|
}
|
|
4038
4096
|
return finishResponse(response, {
|
|
4039
4097
|
corsOrigin,
|
|
4098
|
+
accessLog,
|
|
4040
4099
|
logger: requestLogger,
|
|
4041
4100
|
method: request.method,
|
|
4042
4101
|
metrics,
|
|
4043
4102
|
requestId,
|
|
4103
|
+
signal: request.signal,
|
|
4044
4104
|
route,
|
|
4105
|
+
usageObservation: responseUsage.get(response),
|
|
4045
4106
|
startedAt,
|
|
4046
4107
|
closeConnection: bufferProxyBodies,
|
|
4047
4108
|
trackStreamingBody: !bufferProxyBodies
|
|
@@ -4054,11 +4115,13 @@ function buildApp(deps) {
|
|
|
4054
4115
|
allowedOrigins,
|
|
4055
4116
|
bufferProxyBodies,
|
|
4056
4117
|
client,
|
|
4118
|
+
markUsage,
|
|
4057
4119
|
metrics,
|
|
4058
4120
|
readUsage,
|
|
4059
4121
|
recordExtraction,
|
|
4060
4122
|
recordTokens,
|
|
4061
|
-
requestContext
|
|
4123
|
+
requestContext,
|
|
4124
|
+
usageAccountingMode
|
|
4062
4125
|
} = deps;
|
|
4063
4126
|
const contextFor = (request) => {
|
|
4064
4127
|
const stored = requestContext.get(request);
|
|
@@ -4146,11 +4209,13 @@ function buildApp(deps) {
|
|
|
4146
4209
|
({ request }) => handleAnthropicMessages(
|
|
4147
4210
|
client,
|
|
4148
4211
|
metrics,
|
|
4212
|
+
markUsage,
|
|
4149
4213
|
recordTokens,
|
|
4150
4214
|
recordExtraction,
|
|
4151
4215
|
request,
|
|
4152
4216
|
loggerFor(request),
|
|
4153
|
-
bufferProxyBodies
|
|
4217
|
+
bufferProxyBodies,
|
|
4218
|
+
usageAccountingMode
|
|
4154
4219
|
),
|
|
4155
4220
|
noBody
|
|
4156
4221
|
).post(
|
|
@@ -4162,11 +4227,13 @@ function buildApp(deps) {
|
|
|
4162
4227
|
({ request }) => handleChatCompletions(
|
|
4163
4228
|
client,
|
|
4164
4229
|
metrics,
|
|
4230
|
+
markUsage,
|
|
4165
4231
|
recordTokens,
|
|
4166
4232
|
recordExtraction,
|
|
4167
4233
|
request,
|
|
4168
4234
|
loggerFor(request),
|
|
4169
|
-
bufferProxyBodies
|
|
4235
|
+
bufferProxyBodies,
|
|
4236
|
+
usageAccountingMode
|
|
4170
4237
|
),
|
|
4171
4238
|
noBody
|
|
4172
4239
|
).post(
|
|
@@ -4174,11 +4241,13 @@ function buildApp(deps) {
|
|
|
4174
4241
|
({ request }) => handleCompletions(
|
|
4175
4242
|
client,
|
|
4176
4243
|
metrics,
|
|
4244
|
+
markUsage,
|
|
4177
4245
|
recordTokens,
|
|
4178
4246
|
recordExtraction,
|
|
4179
4247
|
request,
|
|
4180
4248
|
loggerFor(request),
|
|
4181
|
-
bufferProxyBodies
|
|
4249
|
+
bufferProxyBodies,
|
|
4250
|
+
usageAccountingMode
|
|
4182
4251
|
),
|
|
4183
4252
|
noBody
|
|
4184
4253
|
).post(
|
|
@@ -4189,7 +4258,8 @@ function buildApp(deps) {
|
|
|
4189
4258
|
recordTokens,
|
|
4190
4259
|
recordExtraction,
|
|
4191
4260
|
request,
|
|
4192
|
-
loggerFor(request)
|
|
4261
|
+
loggerFor(request),
|
|
4262
|
+
usageAccountingMode
|
|
4193
4263
|
),
|
|
4194
4264
|
noBody
|
|
4195
4265
|
).post(
|
|
@@ -4197,11 +4267,13 @@ function buildApp(deps) {
|
|
|
4197
4267
|
({ request }) => handleResponses(
|
|
4198
4268
|
client,
|
|
4199
4269
|
metrics,
|
|
4270
|
+
markUsage,
|
|
4200
4271
|
recordTokens,
|
|
4201
4272
|
recordExtraction,
|
|
4202
4273
|
request,
|
|
4203
4274
|
loggerFor(request),
|
|
4204
|
-
bufferProxyBodies
|
|
4275
|
+
bufferProxyBodies,
|
|
4276
|
+
usageAccountingMode
|
|
4205
4277
|
),
|
|
4206
4278
|
noBody
|
|
4207
4279
|
);
|
|
@@ -4254,7 +4326,7 @@ function startHoopilotServer(options = {}) {
|
|
|
4254
4326
|
url: `http://${urlHost(host)}:${server.port}`
|
|
4255
4327
|
};
|
|
4256
4328
|
}
|
|
4257
|
-
async function handleAnthropicMessages(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4329
|
+
async function handleAnthropicMessages(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4258
4330
|
const anthropicRequest = await readJson(request);
|
|
4259
4331
|
const responsesRequest = anthropicMessagesToResponsesRequest(anthropicRequest);
|
|
4260
4332
|
const upstream = await client.responses(JSON.stringify(responsesRequest), request.signal);
|
|
@@ -4267,36 +4339,32 @@ async function handleAnthropicMessages(client, metrics, recordTokens, recordExtr
|
|
|
4267
4339
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
4268
4340
|
if (bufferProxyBodies) {
|
|
4269
4341
|
const text = await upstream.text();
|
|
4270
|
-
|
|
4342
|
+
recordBufferedUsage(text, true, model, usageAccountingMode, recordTokens, recordExtraction);
|
|
4271
4343
|
return proxyResponse(
|
|
4272
4344
|
responseFromText(upstream, responsesSseTextToAnthropicSseText(text, { model }))
|
|
4273
4345
|
);
|
|
4274
4346
|
}
|
|
4275
|
-
|
|
4276
|
-
|
|
4347
|
+
return markUsage(
|
|
4348
|
+
proxyResponse(
|
|
4349
|
+
new Response(responsesStreamToAnthropicStream(upstream.body, { model }), {
|
|
4350
|
+
headers: upstream.headers,
|
|
4351
|
+
status: upstream.status,
|
|
4352
|
+
statusText: upstream.statusText
|
|
4353
|
+
})
|
|
4354
|
+
),
|
|
4277
4355
|
model,
|
|
4278
|
-
|
|
4279
|
-
request.signal,
|
|
4280
|
-
recordExtraction
|
|
4281
|
-
);
|
|
4282
|
-
if (!observed.body) {
|
|
4283
|
-
return proxyResponse(observed);
|
|
4284
|
-
}
|
|
4285
|
-
return proxyResponse(
|
|
4286
|
-
new Response(responsesStreamToAnthropicStream(observed.body, { model }), {
|
|
4287
|
-
headers: observed.headers,
|
|
4288
|
-
status: observed.status,
|
|
4289
|
-
statusText: observed.statusText
|
|
4290
|
-
})
|
|
4356
|
+
"body"
|
|
4291
4357
|
);
|
|
4292
4358
|
}
|
|
4293
4359
|
const body = asRecord(await upstream.json());
|
|
4294
|
-
|
|
4295
|
-
|
|
4296
|
-
|
|
4297
|
-
|
|
4298
|
-
|
|
4299
|
-
|
|
4360
|
+
recordParsedUsage(
|
|
4361
|
+
body.usage,
|
|
4362
|
+
typeof body.model === "string" ? body.model.trim() : model,
|
|
4363
|
+
model,
|
|
4364
|
+
usageAccountingMode,
|
|
4365
|
+
recordTokens,
|
|
4366
|
+
recordExtraction
|
|
4367
|
+
);
|
|
4300
4368
|
return jsonResponse(responsesResponseToAnthropicMessage(body, model));
|
|
4301
4369
|
}
|
|
4302
4370
|
async function handleAnthropicCountTokens(request) {
|
|
@@ -4323,7 +4391,7 @@ async function handleModels(client, metrics, signal, logger) {
|
|
|
4323
4391
|
logUpstreamSuccess(logger, "/models", upstream.status);
|
|
4324
4392
|
return jsonResponse(normalizeModelsResponse(await upstream.json()));
|
|
4325
4393
|
}
|
|
4326
|
-
async function handleChatCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4394
|
+
async function handleChatCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4327
4395
|
const chatRequest = normalizeChatCompletionRequest(await readJson(request));
|
|
4328
4396
|
const upstream = await client.chatCompletions(chatRequest, request.signal);
|
|
4329
4397
|
metrics.recordUpstream("/chat/completions", upstream.ok);
|
|
@@ -4332,18 +4400,17 @@ async function handleChatCompletions(client, metrics, recordTokens, recordExtrac
|
|
|
4332
4400
|
}
|
|
4333
4401
|
logUpstreamSuccess(logger, "/chat/completions", upstream.status);
|
|
4334
4402
|
const model = normalizeRequestedModel(chatRequest.model);
|
|
4335
|
-
return
|
|
4336
|
-
|
|
4337
|
-
|
|
4338
|
-
|
|
4339
|
-
|
|
4340
|
-
|
|
4341
|
-
|
|
4342
|
-
|
|
4343
|
-
)
|
|
4403
|
+
return proxiedResponseWithOptionalUsage(
|
|
4404
|
+
upstream,
|
|
4405
|
+
model,
|
|
4406
|
+
markUsage,
|
|
4407
|
+
usageAccountingMode,
|
|
4408
|
+
recordTokens,
|
|
4409
|
+
recordExtraction,
|
|
4410
|
+
bufferProxyBodies
|
|
4344
4411
|
);
|
|
4345
4412
|
}
|
|
4346
|
-
async function handleCompletions(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4413
|
+
async function handleCompletions(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4347
4414
|
const body = await readJson(request);
|
|
4348
4415
|
const upstream = await client.chatCompletions(
|
|
4349
4416
|
completionsRequestToChatCompletion(body),
|
|
@@ -4358,34 +4425,41 @@ async function handleCompletions(client, metrics, recordTokens, recordExtraction
|
|
|
4358
4425
|
if (isStreamingResponse(upstream) && upstream.body) {
|
|
4359
4426
|
if (bufferProxyBodies) {
|
|
4360
4427
|
const upstreamText = await upstream.text();
|
|
4361
|
-
|
|
4428
|
+
recordBufferedUsage(
|
|
4429
|
+
upstreamText,
|
|
4430
|
+
true,
|
|
4431
|
+
model,
|
|
4432
|
+
usageAccountingMode,
|
|
4433
|
+
recordTokens,
|
|
4434
|
+
recordExtraction
|
|
4435
|
+
);
|
|
4362
4436
|
const text = completionSseTextFromChatSseText(upstreamText);
|
|
4363
4437
|
return proxyResponse(responseFromText(upstream, text));
|
|
4364
4438
|
}
|
|
4365
|
-
return
|
|
4366
|
-
|
|
4439
|
+
return markUsage(
|
|
4440
|
+
proxyResponse(
|
|
4367
4441
|
new Response(completionStreamFromChatStream(upstream.body), {
|
|
4368
4442
|
headers: upstream.headers,
|
|
4369
4443
|
status: upstream.status,
|
|
4370
4444
|
statusText: upstream.statusText
|
|
4371
|
-
})
|
|
4372
|
-
|
|
4373
|
-
|
|
4374
|
-
|
|
4375
|
-
recordExtraction
|
|
4376
|
-
)
|
|
4445
|
+
})
|
|
4446
|
+
),
|
|
4447
|
+
model,
|
|
4448
|
+
"body"
|
|
4377
4449
|
);
|
|
4378
4450
|
}
|
|
4379
4451
|
const completion = asRecord(await upstream.json());
|
|
4380
|
-
|
|
4381
|
-
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
|
|
4385
|
-
|
|
4452
|
+
recordParsedUsage(
|
|
4453
|
+
completion.usage,
|
|
4454
|
+
typeof completion.model === "string" ? completion.model.trim() : model,
|
|
4455
|
+
model,
|
|
4456
|
+
usageAccountingMode,
|
|
4457
|
+
recordTokens,
|
|
4458
|
+
recordExtraction
|
|
4459
|
+
);
|
|
4386
4460
|
return jsonResponse(chatCompletionToCompletion(completion));
|
|
4387
4461
|
}
|
|
4388
|
-
async function handleResponses(client, metrics, recordTokens, recordExtraction, request, logger, bufferProxyBodies) {
|
|
4462
|
+
async function handleResponses(client, metrics, markUsage, recordTokens, recordExtraction, request, logger, bufferProxyBodies, usageAccountingMode) {
|
|
4389
4463
|
const { json, text: body } = await readJsonText(request);
|
|
4390
4464
|
if (isResponsesCompactionRequest(json)) {
|
|
4391
4465
|
return handleResponsesCompactionV2(
|
|
@@ -4395,7 +4469,8 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
|
|
|
4395
4469
|
recordExtraction,
|
|
4396
4470
|
json,
|
|
4397
4471
|
request,
|
|
4398
|
-
logger
|
|
4472
|
+
logger,
|
|
4473
|
+
usageAccountingMode
|
|
4399
4474
|
);
|
|
4400
4475
|
}
|
|
4401
4476
|
const upstream = await client.responses(
|
|
@@ -4408,18 +4483,17 @@ async function handleResponses(client, metrics, recordTokens, recordExtraction,
|
|
|
4408
4483
|
}
|
|
4409
4484
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
4410
4485
|
const model = normalizeRequestedModel(json.model);
|
|
4411
|
-
return
|
|
4412
|
-
|
|
4413
|
-
|
|
4414
|
-
|
|
4415
|
-
|
|
4416
|
-
|
|
4417
|
-
|
|
4418
|
-
|
|
4419
|
-
)
|
|
4486
|
+
return proxiedResponseWithOptionalUsage(
|
|
4487
|
+
upstream,
|
|
4488
|
+
model,
|
|
4489
|
+
markUsage,
|
|
4490
|
+
usageAccountingMode,
|
|
4491
|
+
recordTokens,
|
|
4492
|
+
recordExtraction,
|
|
4493
|
+
bufferProxyBodies
|
|
4420
4494
|
);
|
|
4421
4495
|
}
|
|
4422
|
-
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger) {
|
|
4496
|
+
async function handleResponsesCompact(client, metrics, recordTokens, recordExtraction, request, logger, usageAccountingMode) {
|
|
4423
4497
|
const body = await readJson(request);
|
|
4424
4498
|
const upstream = await client.responses(responsesCompactionRequestBody(body), request.signal);
|
|
4425
4499
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
@@ -4429,16 +4503,17 @@ async function handleResponsesCompact(client, metrics, recordTokens, recordExtra
|
|
|
4429
4503
|
logUpstreamSuccess(logger, "/responses", upstream.status);
|
|
4430
4504
|
const isSse = isStreamingResponse(upstream);
|
|
4431
4505
|
const text = await upstream.text();
|
|
4432
|
-
|
|
4506
|
+
recordBufferedUsage(
|
|
4433
4507
|
text,
|
|
4434
4508
|
isSse,
|
|
4435
4509
|
normalizeRequestedModel(body.model),
|
|
4510
|
+
usageAccountingMode,
|
|
4436
4511
|
recordTokens,
|
|
4437
4512
|
recordExtraction
|
|
4438
4513
|
);
|
|
4439
4514
|
return jsonResponse(responsesCompactionResult(text, isSse));
|
|
4440
4515
|
}
|
|
4441
|
-
async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger) {
|
|
4516
|
+
async function handleResponsesCompactionV2(client, metrics, recordTokens, recordExtraction, json, request, logger, usageAccountingMode) {
|
|
4442
4517
|
const upstream = await client.responses(responsesCompactionRequestBody(json), request.signal);
|
|
4443
4518
|
metrics.recordUpstream("/responses", upstream.ok);
|
|
4444
4519
|
if (!upstream.ok) {
|
|
@@ -4448,20 +4523,43 @@ async function handleResponsesCompactionV2(client, metrics, recordTokens, record
|
|
|
4448
4523
|
const isSse = isStreamingResponse(upstream);
|
|
4449
4524
|
const text = await upstream.text();
|
|
4450
4525
|
const model = normalizeRequestedModel(json.model);
|
|
4451
|
-
|
|
4526
|
+
recordBufferedUsage(text, isSse, model, usageAccountingMode, recordTokens, recordExtraction);
|
|
4452
4527
|
if (json.stream === true) {
|
|
4453
4528
|
return textResponse(responsesCompactionSseText(text, isSse, model), "text/event-stream");
|
|
4454
4529
|
}
|
|
4455
4530
|
return jsonResponse(responsesCompactionResponse(text, isSse, model));
|
|
4456
4531
|
}
|
|
4457
|
-
async function
|
|
4532
|
+
async function proxiedResponseWithOptionalUsage(response, fallbackModel, markUsage, usageAccountingMode, recordTokens, recordExtraction, bufferProxyBodies) {
|
|
4458
4533
|
const isSse = isStreamingResponse(response);
|
|
4459
|
-
if (
|
|
4534
|
+
if (bufferProxyBodies && response.body) {
|
|
4460
4535
|
const text = await response.text();
|
|
4461
|
-
|
|
4462
|
-
|
|
4536
|
+
recordBufferedUsage(
|
|
4537
|
+
text,
|
|
4538
|
+
isSse,
|
|
4539
|
+
fallbackModel,
|
|
4540
|
+
usageAccountingMode,
|
|
4541
|
+
recordTokens,
|
|
4542
|
+
recordExtraction
|
|
4543
|
+
);
|
|
4544
|
+
return proxyResponse(responseFromText(response, text));
|
|
4545
|
+
}
|
|
4546
|
+
return markUsage(proxyResponse(response), fallbackModel, "body");
|
|
4547
|
+
}
|
|
4548
|
+
function recordParsedUsage(rawUsage, responseModel, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
|
|
4549
|
+
if (!shouldExtractUsage(usageAccountingMode, "parsed")) {
|
|
4550
|
+
return;
|
|
4551
|
+
}
|
|
4552
|
+
const usage = extractTokenUsage(rawUsage);
|
|
4553
|
+
if (usage) {
|
|
4554
|
+
recordTokens(responseModel || fallbackModel, usage);
|
|
4555
|
+
}
|
|
4556
|
+
recordExtraction(usage !== void 0);
|
|
4557
|
+
}
|
|
4558
|
+
function recordBufferedUsage(text, isSse, fallbackModel, usageAccountingMode, recordTokens, recordExtraction) {
|
|
4559
|
+
if (!shouldExtractUsage(usageAccountingMode, "buffered")) {
|
|
4560
|
+
return;
|
|
4463
4561
|
}
|
|
4464
|
-
|
|
4562
|
+
recordResponseTextUsage(text, isSse, fallbackModel, recordTokens, recordExtraction);
|
|
4465
4563
|
}
|
|
4466
4564
|
async function proxyError(upstream, logger) {
|
|
4467
4565
|
const text = await upstream.text();
|
|
@@ -4517,7 +4615,24 @@ function shouldBufferProxyBodies(mode) {
|
|
|
4517
4615
|
}
|
|
4518
4616
|
return process.platform === "win32" && IS_STANDALONE_BINARY;
|
|
4519
4617
|
}
|
|
4618
|
+
function resolveUsageAccountingMode(options) {
|
|
4619
|
+
const value = options.usageAccountingMode ?? envValue(options.env?.HOOPILOT_USAGE_ACCOUNTING) ?? "basic";
|
|
4620
|
+
return parseUsageAccountingMode(value);
|
|
4621
|
+
}
|
|
4622
|
+
function resolveAccessLog(options) {
|
|
4623
|
+
return options.accessLog ?? parseBooleanEnv(options.env?.HOOPILOT_ACCESS_LOG, "HOOPILOT_ACCESS_LOG") ?? false;
|
|
4624
|
+
}
|
|
4625
|
+
function shouldExtractUsage(mode, cost) {
|
|
4626
|
+
if (mode === "off") {
|
|
4627
|
+
return false;
|
|
4628
|
+
}
|
|
4629
|
+
if (mode === "basic") {
|
|
4630
|
+
return cost === "parsed";
|
|
4631
|
+
}
|
|
4632
|
+
return true;
|
|
4633
|
+
}
|
|
4520
4634
|
function finishResponse(response, options) {
|
|
4635
|
+
const usageObservation = options.usageObservation;
|
|
4521
4636
|
const withRequestId = responseWithRequestId(
|
|
4522
4637
|
response,
|
|
4523
4638
|
options.requestId,
|
|
@@ -4526,11 +4641,36 @@ function finishResponse(response, options) {
|
|
|
4526
4641
|
);
|
|
4527
4642
|
const stream = isStreamingResponse(withRequestId);
|
|
4528
4643
|
const status = withRequestId.status;
|
|
4644
|
+
let completed = false;
|
|
4529
4645
|
const complete = () => {
|
|
4646
|
+
if (completed) {
|
|
4647
|
+
return;
|
|
4648
|
+
}
|
|
4649
|
+
completed = true;
|
|
4530
4650
|
const durationMs = Math.round((performance.now() - options.startedAt) * 100) / 100;
|
|
4531
4651
|
options.metrics.observe({ durationMs, method: options.method, route: options.route, status });
|
|
4532
|
-
logRequestCompleted(options.logger, status, stream, durationMs);
|
|
4652
|
+
logRequestCompleted(options.logger, status, stream, durationMs, options.accessLog);
|
|
4533
4653
|
};
|
|
4654
|
+
if (withRequestId.body && usageObservation) {
|
|
4655
|
+
const shouldTrackCompletion = stream && options.trackStreamingBody;
|
|
4656
|
+
const observedBody = streamWithUsageObservation(
|
|
4657
|
+
withRequestId.body,
|
|
4658
|
+
stream,
|
|
4659
|
+
usageObservation.fallbackModel,
|
|
4660
|
+
usageObservation.recordTokens,
|
|
4661
|
+
options.signal,
|
|
4662
|
+
usageObservation.recordExtraction,
|
|
4663
|
+
shouldTrackCompletion ? complete : void 0
|
|
4664
|
+
);
|
|
4665
|
+
if (!shouldTrackCompletion) {
|
|
4666
|
+
complete();
|
|
4667
|
+
}
|
|
4668
|
+
return new Response(observedBody, {
|
|
4669
|
+
headers: withRequestId.headers,
|
|
4670
|
+
status,
|
|
4671
|
+
statusText: withRequestId.statusText
|
|
4672
|
+
});
|
|
4673
|
+
}
|
|
4534
4674
|
if (stream && withRequestId.body && options.trackStreamingBody) {
|
|
4535
4675
|
return new Response(trackStreamCompletion(withRequestId.body, complete), {
|
|
4536
4676
|
headers: withRequestId.headers,
|
|
@@ -4600,7 +4740,7 @@ function trackStreamCompletion(body, onComplete) {
|
|
|
4600
4740
|
}
|
|
4601
4741
|
});
|
|
4602
4742
|
}
|
|
4603
|
-
function logRequestCompleted(logger, status, stream, durationMs) {
|
|
4743
|
+
function logRequestCompleted(logger, status, stream, durationMs, accessLog) {
|
|
4604
4744
|
const fields = {
|
|
4605
4745
|
durationMs,
|
|
4606
4746
|
event: "http.request.completed",
|
|
@@ -4615,6 +4755,9 @@ function logRequestCompleted(logger, status, stream, durationMs) {
|
|
|
4615
4755
|
logger.warn(fields, "request completed with client error");
|
|
4616
4756
|
return;
|
|
4617
4757
|
}
|
|
4758
|
+
if (!accessLog) {
|
|
4759
|
+
return;
|
|
4760
|
+
}
|
|
4618
4761
|
logger.info(fields, "request completed");
|
|
4619
4762
|
}
|
|
4620
4763
|
function requestIdFor(request) {
|
|
@@ -4659,11 +4802,17 @@ var API_ROUTES = [
|
|
|
4659
4802
|
{ method: "POST", path: "/v1/responses/compact", name: "responses_compact" },
|
|
4660
4803
|
{ method: "POST", path: "/v1/responses", name: "responses" }
|
|
4661
4804
|
];
|
|
4805
|
+
var ROUTE_NAMES = new Map(
|
|
4806
|
+
API_ROUTES.map((entry) => [routeKey(entry.method, entry.path), entry.name])
|
|
4807
|
+
);
|
|
4662
4808
|
function routeFor(method, path) {
|
|
4663
4809
|
if (method === "OPTIONS") {
|
|
4664
4810
|
return "cors.preflight";
|
|
4665
4811
|
}
|
|
4666
|
-
return
|
|
4812
|
+
return ROUTE_NAMES.get(routeKey(method, path)) ?? "not_found";
|
|
4813
|
+
}
|
|
4814
|
+
function routeKey(method, path) {
|
|
4815
|
+
return `${method} ${path}`;
|
|
4667
4816
|
}
|
|
4668
4817
|
function isStreamingResponse(response) {
|
|
4669
4818
|
return response.headers.get("content-type")?.includes("text/event-stream") ?? false;
|
|
@@ -4721,12 +4870,24 @@ async function handleUsage(metrics, readUsage, request) {
|
|
|
4721
4870
|
function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_TTL_MS) {
|
|
4722
4871
|
const usagePath = "/copilot_internal/user";
|
|
4723
4872
|
let cache;
|
|
4724
|
-
|
|
4873
|
+
let inFlight;
|
|
4874
|
+
return async () => {
|
|
4725
4875
|
if (cache && now() - cache.atMs < ttlMs) {
|
|
4726
4876
|
return cache.result;
|
|
4727
4877
|
}
|
|
4878
|
+
if (inFlight) {
|
|
4879
|
+
return inFlight;
|
|
4880
|
+
}
|
|
4881
|
+
inFlight = readFreshUsage();
|
|
4728
4882
|
try {
|
|
4729
|
-
|
|
4883
|
+
return await inFlight;
|
|
4884
|
+
} finally {
|
|
4885
|
+
inFlight = void 0;
|
|
4886
|
+
}
|
|
4887
|
+
};
|
|
4888
|
+
async function readFreshUsage() {
|
|
4889
|
+
try {
|
|
4890
|
+
const upstream = await client.usage();
|
|
4730
4891
|
metrics.recordUpstream(usagePath, upstream.ok);
|
|
4731
4892
|
metrics.recordGithubRateLimit(parseRateLimitHeaders(upstream.headers, now()));
|
|
4732
4893
|
if (!upstream.ok) {
|
|
@@ -4748,7 +4909,7 @@ function createUsageReader(client, metrics, now = Date.now, ttlMs = USAGE_CACHE_
|
|
|
4748
4909
|
cache = { atMs: now(), result };
|
|
4749
4910
|
return result;
|
|
4750
4911
|
}
|
|
4751
|
-
}
|
|
4912
|
+
}
|
|
4752
4913
|
}
|
|
4753
4914
|
export {
|
|
4754
4915
|
COPILOT_USAGE_API_VERSION,
|