@hsupu/copilot-api 0.7.18-beta → 0.7.18-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.example.yaml +272 -0
- package/dist/main.mjs +593 -430
- package/dist/main.mjs.map +1 -1
- package/package.json +3 -2
package/dist/main.mjs
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { defineCommand, runMain } from "citty";
|
|
3
3
|
import consola, { consola as consola$1 } from "consola";
|
|
4
|
+
import * as fs$1 from "node:fs/promises";
|
|
4
5
|
import fs, { access, constants, readFile } from "node:fs/promises";
|
|
5
6
|
import os, { homedir } from "node:os";
|
|
7
|
+
import * as path$1 from "node:path";
|
|
6
8
|
import path, { dirname, join, resolve } from "node:path";
|
|
7
9
|
import { randomBytes, randomUUID } from "node:crypto";
|
|
8
10
|
import pc from "picocolors";
|
|
@@ -24,6 +26,7 @@ const PATHS = {
|
|
|
24
26
|
APP_DIR,
|
|
25
27
|
GITHUB_TOKEN_PATH,
|
|
26
28
|
CONFIG_YAML: path.join(APP_DIR, "config.yaml"),
|
|
29
|
+
LEARNED_LIMITS: path.join(APP_DIR, "learned-limits.json"),
|
|
27
30
|
ERROR_DIR: path.join(APP_DIR, "errmsgs")
|
|
28
31
|
};
|
|
29
32
|
async function ensurePaths() {
|
|
@@ -42,7 +45,15 @@ async function ensureFile(filePath) {
|
|
|
42
45
|
|
|
43
46
|
//#endregion
|
|
44
47
|
//#region src/lib/state.ts
|
|
45
|
-
/**
|
|
48
|
+
/**
|
|
49
|
+
* Rebuild model lookup indexes from state.models.
|
|
50
|
+
* Called by cacheModels() in production; call directly in tests after setting state.models.
|
|
51
|
+
*/
|
|
52
|
+
function rebuildModelIndex() {
|
|
53
|
+
const data = state.models?.data ?? [];
|
|
54
|
+
state.modelIndex = new Map(data.map((m) => [m.id, m]));
|
|
55
|
+
state.modelIds = new Set(data.map((m) => m.id));
|
|
56
|
+
}
|
|
46
57
|
const DEFAULT_MODEL_OVERRIDES = {
|
|
47
58
|
opus: "claude-opus-4.6",
|
|
48
59
|
sonnet: "claude-sonnet-4.6",
|
|
@@ -50,6 +61,8 @@ const DEFAULT_MODEL_OVERRIDES = {
|
|
|
50
61
|
};
|
|
51
62
|
const state = {
|
|
52
63
|
accountType: "individual",
|
|
64
|
+
modelIndex: /* @__PURE__ */ new Map(),
|
|
65
|
+
modelIds: /* @__PURE__ */ new Set(),
|
|
53
66
|
showGitHubToken: false,
|
|
54
67
|
verbose: false,
|
|
55
68
|
autoTruncate: true,
|
|
@@ -198,7 +211,11 @@ function extractTrailingSystemReminderTags(text) {
|
|
|
198
211
|
while (true) {
|
|
199
212
|
const currentTagEnd = scanEnd;
|
|
200
213
|
let end = scanEnd;
|
|
201
|
-
while (end > 0
|
|
214
|
+
while (end > 0) {
|
|
215
|
+
const c = text.charCodeAt(end - 1);
|
|
216
|
+
if (c !== 10 && c !== 32 && c !== 9 && c !== 13) break;
|
|
217
|
+
end--;
|
|
218
|
+
}
|
|
202
219
|
if (end < 18) break;
|
|
203
220
|
if (text.slice(end - 18, end) !== CLOSE_TAG) break;
|
|
204
221
|
const closeTagStart = end - 18;
|
|
@@ -240,7 +257,11 @@ function extractLeadingSystemReminderTags(text) {
|
|
|
240
257
|
while (true) {
|
|
241
258
|
const currentTagStart = scanStart;
|
|
242
259
|
let start = scanStart;
|
|
243
|
-
while (start < text.length
|
|
260
|
+
while (start < text.length) {
|
|
261
|
+
const c = text.charCodeAt(start);
|
|
262
|
+
if (c !== 32 && c !== 9 && c !== 13) break;
|
|
263
|
+
start++;
|
|
264
|
+
}
|
|
244
265
|
if (start + 17 > text.length) break;
|
|
245
266
|
if (text.slice(start, start + 17) !== OPEN_TAG) break;
|
|
246
267
|
const afterOpen = start + 17;
|
|
@@ -365,21 +386,6 @@ function removeSystemReminderTags(text) {
|
|
|
365
386
|
return end < result.length ? result.slice(0, end) : result;
|
|
366
387
|
}
|
|
367
388
|
|
|
368
|
-
//#endregion
|
|
369
|
-
//#region src/lib/utils.ts
|
|
370
|
-
const sleep = (ms) => new Promise((resolve) => {
|
|
371
|
-
setTimeout(resolve, ms);
|
|
372
|
-
});
|
|
373
|
-
const isNullish = (value) => value === null || value === void 0;
|
|
374
|
-
/** Convert bytes to KB with rounding */
|
|
375
|
-
function bytesToKB(bytes) {
|
|
376
|
-
return Math.round(bytes / 1024);
|
|
377
|
-
}
|
|
378
|
-
/** Generate unique ID (timestamp + random) */
|
|
379
|
-
function generateId(randomLength = 7) {
|
|
380
|
-
return Date.now().toString(36) + Math.random().toString(36).slice(2, 2 + randomLength);
|
|
381
|
-
}
|
|
382
|
-
|
|
383
389
|
//#endregion
|
|
384
390
|
//#region src/lib/auto-truncate/index.ts
|
|
385
391
|
/**
|
|
@@ -392,64 +398,126 @@ const MAX_AUTO_TRUNCATE_RETRIES = 5;
|
|
|
392
398
|
const AUTO_TRUNCATE_RETRY_FACTOR = .9;
|
|
393
399
|
const DEFAULT_AUTO_TRUNCATE_CONFIG = {
|
|
394
400
|
safetyMarginPercent: 2,
|
|
395
|
-
maxRequestBodyBytes: 510 * 1024,
|
|
396
401
|
preserveRecentPercent: .7,
|
|
397
|
-
checkTokenLimit: true
|
|
398
|
-
checkByteLimit: false
|
|
402
|
+
checkTokenLimit: true
|
|
399
403
|
};
|
|
400
|
-
|
|
401
|
-
|
|
404
|
+
const learnedLimits = /* @__PURE__ */ new Map();
|
|
405
|
+
/** Get learned limits for a model (including calibration data) */
|
|
406
|
+
function getLearnedLimits(modelId) {
|
|
407
|
+
return learnedLimits.get(modelId);
|
|
408
|
+
}
|
|
402
409
|
/**
|
|
403
|
-
*
|
|
410
|
+
* Check whether a model has known limits from previous failures.
|
|
411
|
+
* Used to decide whether to pre-check requests before sending.
|
|
404
412
|
*/
|
|
405
|
-
function
|
|
406
|
-
|
|
407
|
-
dynamicByteLimit = newLimit;
|
|
408
|
-
consola.info(`[AutoTruncate] Adjusted byte limit: ${bytesToKB(failingBytes)}KB failed → ${bytesToKB(newLimit)}KB`);
|
|
409
|
-
}
|
|
410
|
-
/** Get the current effective byte limit */
|
|
411
|
-
function getEffectiveByteLimitBytes() {
|
|
412
|
-
return dynamicByteLimit ?? DEFAULT_AUTO_TRUNCATE_CONFIG.maxRequestBodyBytes;
|
|
413
|
+
function hasKnownLimits(modelId) {
|
|
414
|
+
return learnedLimits.has(modelId);
|
|
413
415
|
}
|
|
414
|
-
/** Dynamic token limits per model, adjusted based on token limit errors */
|
|
415
|
-
const dynamicTokenLimits = /* @__PURE__ */ new Map();
|
|
416
416
|
/**
|
|
417
417
|
* Called when a token limit error (400) occurs.
|
|
418
|
-
*
|
|
419
|
-
*/
|
|
420
|
-
function onTokenLimitExceeded(modelId, reportedLimit) {
|
|
421
|
-
const
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
418
|
+
* Records the learned limit and optionally updates calibration.
|
|
419
|
+
*/
|
|
420
|
+
function onTokenLimitExceeded(modelId, reportedLimit, reportedCurrent, estimatedTokens) {
|
|
421
|
+
const existing = learnedLimits.get(modelId);
|
|
422
|
+
if (!existing || reportedLimit < existing.tokenLimit) {
|
|
423
|
+
learnedLimits.set(modelId, {
|
|
424
|
+
tokenLimit: reportedLimit,
|
|
425
|
+
calibrationFactor: existing?.calibrationFactor ?? 1,
|
|
426
|
+
sampleCount: existing?.sampleCount ?? 0,
|
|
427
|
+
updatedAt: Date.now()
|
|
428
|
+
});
|
|
429
|
+
consola.info(`[AutoTruncate] Learned token limit for ${modelId}: ${reportedLimit}`);
|
|
430
|
+
}
|
|
431
|
+
if (reportedCurrent !== void 0 && estimatedTokens !== void 0 && estimatedTokens > 0) {
|
|
432
|
+
updateCalibration(modelId, reportedCurrent, estimatedTokens);
|
|
433
|
+
const lim = learnedLimits.get(modelId);
|
|
434
|
+
consola.info(`[AutoTruncate] Calibration for ${modelId}: actual=${reportedCurrent} vs estimated=${estimatedTokens} → factor=${lim.calibrationFactor.toFixed(3)} (${lim.sampleCount} samples)`);
|
|
426
435
|
}
|
|
436
|
+
schedulePersist();
|
|
427
437
|
}
|
|
438
|
+
const CALIBRATION_ALPHA = .3;
|
|
439
|
+
const CALIBRATION_MIN = .5;
|
|
440
|
+
const CALIBRATION_MAX = 3;
|
|
428
441
|
/**
|
|
429
|
-
*
|
|
430
|
-
*
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
442
|
+
* Update the per-model calibration factor using EWMA.
|
|
443
|
+
*
|
|
444
|
+
* Called after a token limit error when we know both the GPT tokenizer estimate
|
|
445
|
+
* and the actual token count (from the error response). The ratio between them
|
|
446
|
+
* tells us how much the GPT tokenizer over/under-estimates for this model.
|
|
447
|
+
*/
|
|
448
|
+
function updateCalibration(modelId, actualTokens, estimatedTokens) {
|
|
449
|
+
if (estimatedTokens <= 0) return;
|
|
450
|
+
const limits = learnedLimits.get(modelId);
|
|
451
|
+
if (!limits) return;
|
|
452
|
+
const rawFactor = actualTokens / estimatedTokens;
|
|
453
|
+
const clamped = Math.max(CALIBRATION_MIN, Math.min(CALIBRATION_MAX, rawFactor));
|
|
454
|
+
if (limits.sampleCount === 0) limits.calibrationFactor = clamped;
|
|
455
|
+
else limits.calibrationFactor = CALIBRATION_ALPHA * clamped + (1 - CALIBRATION_ALPHA) * limits.calibrationFactor;
|
|
456
|
+
limits.sampleCount++;
|
|
457
|
+
limits.updatedAt = Date.now();
|
|
458
|
+
}
|
|
459
|
+
/** Apply calibration factor to a GPT tokenizer estimate */
|
|
460
|
+
function calibrate(modelId, gptEstimate) {
|
|
461
|
+
const limits = learnedLimits.get(modelId);
|
|
462
|
+
if (!limits || limits.sampleCount === 0) return gptEstimate;
|
|
463
|
+
return Math.ceil(gptEstimate * limits.calibrationFactor);
|
|
464
|
+
}
|
|
465
|
+
const BASE_MARGIN = .03;
|
|
466
|
+
const BONUS_MARGIN_PER_SAMPLE = .07;
|
|
467
|
+
/**
|
|
468
|
+
* Compute dynamic safety margin based on calibration confidence.
|
|
469
|
+
* Fewer samples → wider margin (conservative). More samples → narrower margin.
|
|
470
|
+
*
|
|
471
|
+
* - 0 samples: 10% (0.03 + 0.07)
|
|
472
|
+
* - 1 sample: 10%
|
|
473
|
+
* - 10 samples: ~3.7%
|
|
474
|
+
* - ∞ samples: 3%
|
|
475
|
+
*/
|
|
476
|
+
function computeSafetyMargin(sampleCount) {
|
|
477
|
+
if (sampleCount <= 0) return BASE_MARGIN + BONUS_MARGIN_PER_SAMPLE;
|
|
478
|
+
return BASE_MARGIN + BONUS_MARGIN_PER_SAMPLE / sampleCount;
|
|
479
|
+
}
|
|
480
|
+
let persistTimer = null;
|
|
481
|
+
const PERSIST_DEBOUNCE_MS = 5e3;
|
|
482
|
+
/** Schedule an async write of learned limits (debounced) */
|
|
483
|
+
function schedulePersist() {
|
|
484
|
+
if (persistTimer) return;
|
|
485
|
+
persistTimer = setTimeout(() => {
|
|
486
|
+
persistTimer = null;
|
|
487
|
+
persistLimits();
|
|
488
|
+
}, PERSIST_DEBOUNCE_MS);
|
|
489
|
+
}
|
|
490
|
+
/** Write learned limits to disk */
|
|
491
|
+
async function persistLimits() {
|
|
492
|
+
if (learnedLimits.size === 0) return;
|
|
493
|
+
const data = {
|
|
494
|
+
version: 1,
|
|
495
|
+
limits: Object.fromEntries(learnedLimits)
|
|
496
|
+
};
|
|
497
|
+
try {
|
|
498
|
+
await fs.writeFile(PATHS.LEARNED_LIMITS, JSON.stringify(data, null, 2), "utf8");
|
|
499
|
+
} catch {}
|
|
434
500
|
}
|
|
435
|
-
/**
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
501
|
+
/** Load previously persisted limits from disk (called at startup) */
|
|
502
|
+
async function loadPersistedLimits() {
|
|
503
|
+
try {
|
|
504
|
+
const raw = await fs.readFile(PATHS.LEARNED_LIMITS, "utf8");
|
|
505
|
+
const data = JSON.parse(raw);
|
|
506
|
+
if (data.version !== 1) return;
|
|
507
|
+
for (const [modelId, lim] of Object.entries(data.limits)) if (lim.tokenLimit > 0 && lim.calibrationFactor >= CALIBRATION_MIN && lim.calibrationFactor <= CALIBRATION_MAX) learnedLimits.set(modelId, lim);
|
|
508
|
+
if (learnedLimits.size > 0) consola.info(`[AutoTruncate] Loaded learned limits for ${learnedLimits.size} model(s)`);
|
|
509
|
+
} catch {}
|
|
441
510
|
}
|
|
442
511
|
/**
|
|
443
|
-
* Parse an HTTPError to detect token limit
|
|
512
|
+
* Parse an HTTPError to detect token limit errors,
|
|
444
513
|
* and record the learned limit for future pre-checks.
|
|
445
514
|
*
|
|
446
|
-
*
|
|
515
|
+
* When `estimatedTokens` is provided (the GPT tokenizer estimate at the time
|
|
516
|
+
* of the error), also updates the per-model calibration factor.
|
|
517
|
+
*
|
|
518
|
+
* Returns error info if the error is a retryable token limit error, null otherwise.
|
|
447
519
|
*/
|
|
448
|
-
function tryParseAndLearnLimit(error, modelId,
|
|
449
|
-
if (error.status === 413) {
|
|
450
|
-
if (payloadBytes && learn) onRequestTooLarge(payloadBytes);
|
|
451
|
-
return { type: "body_too_large" };
|
|
452
|
-
}
|
|
520
|
+
function tryParseAndLearnLimit(error, modelId, learn = true, estimatedTokens) {
|
|
453
521
|
if (error.status === 400) {
|
|
454
522
|
let errorJson;
|
|
455
523
|
try {
|
|
@@ -461,7 +529,7 @@ function tryParseAndLearnLimit(error, modelId, payloadBytes, learn = true) {
|
|
|
461
529
|
if (!(errorJson.error.code === "model_max_prompt_tokens_exceeded" || errorJson.error.type === "invalid_request_error")) return null;
|
|
462
530
|
const tokenInfo = parseTokenLimitError(errorJson.error.message);
|
|
463
531
|
if (!tokenInfo) return null;
|
|
464
|
-
if (learn) onTokenLimitExceeded(modelId, tokenInfo.limit);
|
|
532
|
+
if (learn) onTokenLimitExceeded(modelId, tokenInfo.limit, tokenInfo.current, estimatedTokens);
|
|
465
533
|
return {
|
|
466
534
|
type: "token_limit",
|
|
467
535
|
limit: tokenInfo.limit,
|
|
@@ -594,64 +662,9 @@ function formatRateLimitError(copilotMessage) {
|
|
|
594
662
|
}
|
|
595
663
|
};
|
|
596
664
|
}
|
|
597
|
-
/** Format timestamp as YYMMDD_HHmmss for error directory names */
|
|
598
|
-
function formatErrorTimestamp() {
|
|
599
|
-
const now = /* @__PURE__ */ new Date();
|
|
600
|
-
return `${String(now.getFullYear()).slice(2)}${String(now.getMonth() + 1).padStart(2, "0")}${String(now.getDate()).padStart(2, "0")}_${String(now.getHours()).padStart(2, "0")}${String(now.getMinutes()).padStart(2, "0")}${String(now.getSeconds()).padStart(2, "0")}`;
|
|
601
|
-
}
|
|
602
|
-
/** Extract request headers as a plain object (excluding potentially large/binary headers) */
|
|
603
|
-
function extractHeaders(c) {
|
|
604
|
-
const headers = {};
|
|
605
|
-
for (const [key, value] of c.req.raw.headers.entries()) headers[key] = key.toLowerCase() === "authorization" ? "[REDACTED]" : value;
|
|
606
|
-
return headers;
|
|
607
|
-
}
|
|
608
|
-
/**
|
|
609
|
-
* Persist error details to disk for post-mortem debugging.
|
|
610
|
-
* Each error gets a subdirectory under errmsgs/ containing:
|
|
611
|
-
* - meta.json: structured metadata (timestamp, status, headers, error info)
|
|
612
|
-
* - request.json: raw request body
|
|
613
|
-
* - response.txt: raw upstream response body
|
|
614
|
-
*
|
|
615
|
-
* Fire-and-forget — never blocks or throws.
|
|
616
|
-
*/
|
|
617
|
-
async function writeErrorToFile(c, error) {
|
|
618
|
-
const id = randomBytes(4).toString("hex");
|
|
619
|
-
const dirName = `${formatErrorTimestamp()}_${id}`;
|
|
620
|
-
const dirPath = path.join(PATHS.ERROR_DIR, dirName);
|
|
621
|
-
await fs.mkdir(dirPath, { recursive: true });
|
|
622
|
-
const meta = {
|
|
623
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
624
|
-
request: {
|
|
625
|
-
method: c.req.method,
|
|
626
|
-
path: c.req.path,
|
|
627
|
-
url: c.req.url,
|
|
628
|
-
headers: extractHeaders(c)
|
|
629
|
-
}
|
|
630
|
-
};
|
|
631
|
-
if (error instanceof HTTPError) {
|
|
632
|
-
meta.response = {
|
|
633
|
-
status: error.status,
|
|
634
|
-
modelId: error.modelId
|
|
635
|
-
};
|
|
636
|
-
meta.error = { message: error.message };
|
|
637
|
-
} else if (error instanceof Error) meta.error = {
|
|
638
|
-
message: formatErrorWithCause(error),
|
|
639
|
-
name: error.name,
|
|
640
|
-
stack: error.stack
|
|
641
|
-
};
|
|
642
|
-
else meta.error = { message: String(error) };
|
|
643
|
-
const writes = [fs.writeFile(path.join(dirPath, "meta.json"), JSON.stringify(meta, null, 2))];
|
|
644
|
-
try {
|
|
645
|
-
const body = await c.req.json();
|
|
646
|
-
writes.push(fs.writeFile(path.join(dirPath, "request.json"), JSON.stringify(body, null, 2)));
|
|
647
|
-
} catch {}
|
|
648
|
-
if (error instanceof HTTPError && error.responseText) writes.push(fs.writeFile(path.join(dirPath, "response.txt"), error.responseText));
|
|
649
|
-
await Promise.all(writes);
|
|
650
|
-
}
|
|
651
665
|
function forwardError(c, error) {
|
|
652
|
-
writeErrorToFile(c, error).catch(() => {});
|
|
653
666
|
if (error instanceof HTTPError) {
|
|
654
|
-
const limitInfo = tryParseAndLearnLimit(error, error.modelId ?? "unknown",
|
|
667
|
+
const limitInfo = tryParseAndLearnLimit(error, error.modelId ?? "unknown", state.autoTruncate);
|
|
655
668
|
if (error.status === 413) {
|
|
656
669
|
const formattedError = formatRequestTooLargeError();
|
|
657
670
|
consola.warn(`HTTP 413: Request too large`);
|
|
@@ -827,7 +840,6 @@ const NETWORK_ERROR_PATTERNS = [
|
|
|
827
840
|
function isNetworkError(error) {
|
|
828
841
|
const msg = error.message.toLowerCase();
|
|
829
842
|
if (NETWORK_ERROR_PATTERNS.some((p) => msg.includes(p.toLowerCase()))) return true;
|
|
830
|
-
if (error instanceof TypeError) return true;
|
|
831
843
|
if (error.cause instanceof Error) return isNetworkError(error.cause);
|
|
832
844
|
return false;
|
|
833
845
|
}
|
|
@@ -1048,6 +1060,21 @@ var CopilotTokenManager = class {
|
|
|
1048
1060
|
}
|
|
1049
1061
|
};
|
|
1050
1062
|
|
|
1063
|
+
//#endregion
|
|
1064
|
+
//#region src/lib/utils.ts
|
|
1065
|
+
const sleep = (ms) => new Promise((resolve) => {
|
|
1066
|
+
setTimeout(resolve, ms);
|
|
1067
|
+
});
|
|
1068
|
+
const isNullish = (value) => value === null || value === void 0;
|
|
1069
|
+
/** Convert bytes to KB with rounding */
|
|
1070
|
+
function bytesToKB(bytes) {
|
|
1071
|
+
return Math.round(bytes / 1024);
|
|
1072
|
+
}
|
|
1073
|
+
/** Generate unique ID (timestamp + random) */
|
|
1074
|
+
function generateId(randomLength = 7) {
|
|
1075
|
+
return Date.now().toString(36) + Math.random().toString(36).slice(2, 2 + randomLength);
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1051
1078
|
//#endregion
|
|
1052
1079
|
//#region src/lib/token/github-client.ts
|
|
1053
1080
|
/** GitHub OAuth API client — device code flow and user info */
|
|
@@ -1589,6 +1616,7 @@ const checkUsage = defineCommand({
|
|
|
1589
1616
|
/** Fetch models from Copilot API and cache in global state */
|
|
1590
1617
|
async function cacheModels() {
|
|
1591
1618
|
state.models = await getModels();
|
|
1619
|
+
rebuildModelIndex();
|
|
1592
1620
|
}
|
|
1593
1621
|
const getModels = async () => {
|
|
1594
1622
|
const response = await fetch(`${copilotBaseUrl(state)}/models`, { headers: copilotHeaders(state) });
|
|
@@ -1766,6 +1794,8 @@ var AdaptiveRateLimiter = class {
|
|
|
1766
1794
|
lastRequestTime = 0;
|
|
1767
1795
|
/** Current step in gradual recovery (index into gradualRecoverySteps) */
|
|
1768
1796
|
recoveryStepIndex = 0;
|
|
1797
|
+
/** Abort controller for cancelling pending sleeps during shutdown */
|
|
1798
|
+
sleepAbortController = new AbortController();
|
|
1769
1799
|
constructor(config = {}) {
|
|
1770
1800
|
this.config = {
|
|
1771
1801
|
...DEFAULT_CONFIG,
|
|
@@ -1999,10 +2029,20 @@ var AdaptiveRateLimiter = class {
|
|
|
1999
2029
|
request.reject(/* @__PURE__ */ new Error("Server shutting down"));
|
|
2000
2030
|
}
|
|
2001
2031
|
this.processing = false;
|
|
2032
|
+
this.sleepAbortController.abort();
|
|
2033
|
+
this.sleepAbortController = new AbortController();
|
|
2002
2034
|
return count;
|
|
2003
2035
|
}
|
|
2004
2036
|
sleep(ms) {
|
|
2005
|
-
|
|
2037
|
+
const signal = this.sleepAbortController.signal;
|
|
2038
|
+
if (signal.aborted) return Promise.resolve();
|
|
2039
|
+
return new Promise((resolve) => {
|
|
2040
|
+
const timer = setTimeout(resolve, ms);
|
|
2041
|
+
signal.addEventListener("abort", () => {
|
|
2042
|
+
clearTimeout(timer);
|
|
2043
|
+
resolve();
|
|
2044
|
+
}, { once: true });
|
|
2045
|
+
});
|
|
2006
2046
|
}
|
|
2007
2047
|
/**
|
|
2008
2048
|
* Get current status for debugging/monitoring
|
|
@@ -2073,6 +2113,10 @@ const MODEL_PREFERENCE = {
|
|
|
2073
2113
|
],
|
|
2074
2114
|
haiku: ["claude-haiku-4.5"]
|
|
2075
2115
|
};
|
|
2116
|
+
/** Pre-compiled regex: claude-{family}-{major}-{minor}[-YYYYMMDD] */
|
|
2117
|
+
const VERSIONED_RE = /^(claude-(?:opus|sonnet|haiku))-(\d+)-(\d{1,2})(?:-\d{8,})?$/;
|
|
2118
|
+
/** Pre-compiled regex: claude-{family}-{major}-YYYYMMDD (date-only suffix) */
|
|
2119
|
+
const DATE_ONLY_RE = /^(claude-(opus|sonnet|haiku)-\d+)-\d{8,}$/;
|
|
2076
2120
|
/**
|
|
2077
2121
|
* Normalize model ID for matching: lowercase and replace dots with dashes.
|
|
2078
2122
|
* e.g. "claude-sonnet-4.5" → "claude-sonnet-4-5"
|
|
@@ -2093,7 +2137,7 @@ function normalizeForMatching(modelId) {
|
|
|
2093
2137
|
*/
|
|
2094
2138
|
function normalizeModelId(modelId) {
|
|
2095
2139
|
const { base, suffix } = extractModifierSuffix(modelId);
|
|
2096
|
-
const versionedMatch = base.match(
|
|
2140
|
+
const versionedMatch = base.match(VERSIONED_RE);
|
|
2097
2141
|
if (versionedMatch) return `${versionedMatch[1]}-${versionedMatch[2]}.${versionedMatch[3]}${suffix}`;
|
|
2098
2142
|
return modelId;
|
|
2099
2143
|
}
|
|
@@ -2112,9 +2156,8 @@ function getModelFamily(modelId) {
|
|
|
2112
2156
|
function findPreferredModel(family) {
|
|
2113
2157
|
const preference = MODEL_PREFERENCE[family];
|
|
2114
2158
|
if (!preference) return family;
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
for (const candidate of preference) if (availableIds.includes(candidate)) return candidate;
|
|
2159
|
+
if (state.modelIds.size === 0) return preference[0];
|
|
2160
|
+
for (const candidate of preference) if (state.modelIds.has(candidate)) return candidate;
|
|
2118
2161
|
return preference[0];
|
|
2119
2162
|
}
|
|
2120
2163
|
/** Known model modifier suffixes (e.g., "-fast" for fast output mode, "-1m" for 1M context). */
|
|
@@ -2182,8 +2225,7 @@ function resolveModelName(model) {
|
|
|
2182
2225
|
* Uses `seen` set to prevent circular override chains.
|
|
2183
2226
|
*/
|
|
2184
2227
|
function resolveOverrideTarget(source, target, seen) {
|
|
2185
|
-
|
|
2186
|
-
if (!availableIds || availableIds.length === 0 || availableIds.includes(target)) return target;
|
|
2228
|
+
if (state.modelIds.size === 0 || state.modelIds.has(target)) return target;
|
|
2187
2229
|
const visited = seen ?? new Set([source]);
|
|
2188
2230
|
const targetOverride = state.modelOverrides[target];
|
|
2189
2231
|
if (targetOverride && !visited.has(target)) {
|
|
@@ -2213,8 +2255,7 @@ function resolveModelNameCore(model) {
|
|
|
2213
2255
|
const resolvedBase = resolveBase(base);
|
|
2214
2256
|
if (suffix) {
|
|
2215
2257
|
const withSuffix = resolvedBase + suffix;
|
|
2216
|
-
|
|
2217
|
-
if (!availableIds || availableIds.length === 0 || availableIds.includes(withSuffix)) return withSuffix;
|
|
2258
|
+
if (state.modelIds.size === 0 || state.modelIds.has(withSuffix)) return withSuffix;
|
|
2218
2259
|
return resolvedBase;
|
|
2219
2260
|
}
|
|
2220
2261
|
return resolvedBase;
|
|
@@ -2222,17 +2263,16 @@ function resolveModelNameCore(model) {
|
|
|
2222
2263
|
/** Resolve a base model name (without modifier suffix) to its canonical form. */
|
|
2223
2264
|
function resolveBase(model) {
|
|
2224
2265
|
if (model in MODEL_PREFERENCE) return findPreferredModel(model);
|
|
2225
|
-
const versionedMatch = model.match(
|
|
2266
|
+
const versionedMatch = model.match(VERSIONED_RE);
|
|
2226
2267
|
if (versionedMatch) {
|
|
2227
2268
|
const dotModel = `${versionedMatch[1]}-${versionedMatch[2]}.${versionedMatch[3]}`;
|
|
2228
|
-
|
|
2229
|
-
if (!availableIds || availableIds.length === 0 || availableIds.includes(dotModel)) return dotModel;
|
|
2269
|
+
if (state.modelIds.size === 0 || state.modelIds.has(dotModel)) return dotModel;
|
|
2230
2270
|
}
|
|
2231
|
-
const dateOnlyMatch = model.match(
|
|
2271
|
+
const dateOnlyMatch = model.match(DATE_ONLY_RE);
|
|
2232
2272
|
if (dateOnlyMatch) {
|
|
2233
2273
|
const baseModel = dateOnlyMatch[1];
|
|
2234
2274
|
const family = dateOnlyMatch[2];
|
|
2235
|
-
if (
|
|
2275
|
+
if (state.modelIds.has(baseModel)) return baseModel;
|
|
2236
2276
|
return findPreferredModel(family);
|
|
2237
2277
|
}
|
|
2238
2278
|
return model;
|
|
@@ -2272,6 +2312,9 @@ function createRequestContext(opts) {
|
|
|
2272
2312
|
get durationMs() {
|
|
2273
2313
|
return Date.now() - startTime;
|
|
2274
2314
|
},
|
|
2315
|
+
get settled() {
|
|
2316
|
+
return settled;
|
|
2317
|
+
},
|
|
2275
2318
|
get originalRequest() {
|
|
2276
2319
|
return _originalRequest;
|
|
2277
2320
|
},
|
|
@@ -2410,7 +2453,7 @@ function createRequestContext(opts) {
|
|
|
2410
2453
|
fail(model, error) {
|
|
2411
2454
|
if (settled) return;
|
|
2412
2455
|
settled = true;
|
|
2413
|
-
const
|
|
2456
|
+
const errorMsg = getErrorMessage(error);
|
|
2414
2457
|
_response = {
|
|
2415
2458
|
success: false,
|
|
2416
2459
|
model: normalizeModelId(model),
|
|
@@ -2418,28 +2461,14 @@ function createRequestContext(opts) {
|
|
|
2418
2461
|
input_tokens: 0,
|
|
2419
2462
|
output_tokens: 0
|
|
2420
2463
|
},
|
|
2421
|
-
error:
|
|
2464
|
+
error: errorMsg,
|
|
2422
2465
|
content: null
|
|
2423
2466
|
};
|
|
2424
2467
|
if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
|
|
2425
2468
|
const responseText = error.responseText;
|
|
2426
|
-
|
|
2427
|
-
if (responseText) {
|
|
2428
|
-
let formattedBody;
|
|
2429
|
-
try {
|
|
2430
|
-
formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
|
|
2431
|
-
} catch {
|
|
2432
|
-
formattedBody = responseText;
|
|
2433
|
-
}
|
|
2434
|
-
_response.content = {
|
|
2435
|
-
role: "assistant",
|
|
2436
|
-
content: [{
|
|
2437
|
-
type: "text",
|
|
2438
|
-
text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
|
|
2439
|
-
}]
|
|
2440
|
-
};
|
|
2441
|
-
}
|
|
2469
|
+
if (responseText) _response.responseText = responseText;
|
|
2442
2470
|
}
|
|
2471
|
+
if (error instanceof Error && "status" in error && typeof error.status === "number") _response.status = error.status;
|
|
2443
2472
|
_state = "failed";
|
|
2444
2473
|
emit({
|
|
2445
2474
|
type: "failed",
|
|
@@ -2462,7 +2491,7 @@ function createRequestContext(opts) {
|
|
|
2462
2491
|
}
|
|
2463
2492
|
};
|
|
2464
2493
|
if (_response) entry.response = _response;
|
|
2465
|
-
const lastTruncation =
|
|
2494
|
+
const lastTruncation = _attempts.findLast((a) => a.truncation)?.truncation;
|
|
2466
2495
|
if (lastTruncation) entry.truncation = lastTruncation;
|
|
2467
2496
|
if (_rewrites) entry.rewrites = _rewrites;
|
|
2468
2497
|
if (_sseEvents) entry.sseEvents = _sseEvents;
|
|
@@ -2500,7 +2529,7 @@ function createRequestContextManager() {
|
|
|
2500
2529
|
const maxAgeMs = state.staleRequestMaxAge * 1e3;
|
|
2501
2530
|
if (maxAgeMs <= 0) return;
|
|
2502
2531
|
for (const [id, ctx] of activeContexts) if (ctx.durationMs > maxAgeMs) {
|
|
2503
|
-
consola$1.warn(`[context] Force-failing stale request ${id} (
|
|
2532
|
+
consola$1.warn(`[context] Force-failing stale request ${id} (endpoint: ${ctx.endpoint}, model: ${ctx.originalRequest?.model ?? "unknown"}, stream: ${ctx.originalRequest?.stream ?? "?"}, state: ${ctx.state}, age: ${Math.round(ctx.durationMs / 1e3)}s, max: ${state.staleRequestMaxAge}s)`);
|
|
2504
2533
|
ctx.fail(ctx.originalRequest?.model ?? "unknown", /* @__PURE__ */ new Error(`Request exceeded maximum age of ${state.staleRequestMaxAge}s (stale context reaper)`));
|
|
2505
2534
|
}
|
|
2506
2535
|
}
|
|
@@ -2707,10 +2736,18 @@ function buildSearchText(entry) {
|
|
|
2707
2736
|
for (const block of msg.content) if (block.type === "text" && block.text) parts.push(block.text.slice(0, 200));
|
|
2708
2737
|
else if (block.type === "tool_use") {
|
|
2709
2738
|
if (block.name) parts.push(block.name);
|
|
2739
|
+
if (block.input) {
|
|
2740
|
+
const inputStr = typeof block.input === "string" ? block.input : JSON.stringify(block.input);
|
|
2741
|
+
parts.push(inputStr.slice(0, 500));
|
|
2742
|
+
}
|
|
2743
|
+
} else if (block.type === "tool_result" && block.content) {
|
|
2744
|
+
const contentStr = typeof block.content === "string" ? block.content : JSON.stringify(block.content);
|
|
2745
|
+
parts.push(contentStr.slice(0, 500));
|
|
2710
2746
|
} else if (block.type === "thinking" && block.thinking) parts.push(block.thinking.slice(0, 200));
|
|
2711
2747
|
}
|
|
2712
|
-
if (msg.tool_calls) {
|
|
2713
|
-
|
|
2748
|
+
if (msg.tool_calls) for (const tc of msg.tool_calls) {
|
|
2749
|
+
if (tc.function.name) parts.push(tc.function.name);
|
|
2750
|
+
if (tc.function.arguments) parts.push(tc.function.arguments.slice(0, 500));
|
|
2714
2751
|
}
|
|
2715
2752
|
}
|
|
2716
2753
|
if (entry.response?.content) {
|
|
@@ -2723,7 +2760,7 @@ function buildSearchText(entry) {
|
|
|
2723
2760
|
}
|
|
2724
2761
|
return parts.join(" ").toLowerCase();
|
|
2725
2762
|
}
|
|
2726
|
-
/** Build a summary from a full HistoryEntry */
|
|
2763
|
+
/** Build a summary from a full HistoryEntry (searchText is computed lazily) */
|
|
2727
2764
|
function toSummary(entry) {
|
|
2728
2765
|
return {
|
|
2729
2766
|
id: entry.id,
|
|
@@ -2739,7 +2776,7 @@ function toSummary(entry) {
|
|
|
2739
2776
|
usage: entry.response?.usage,
|
|
2740
2777
|
durationMs: entry.durationMs,
|
|
2741
2778
|
previewText: extractPreviewText(entry),
|
|
2742
|
-
searchText:
|
|
2779
|
+
searchText: ""
|
|
2743
2780
|
};
|
|
2744
2781
|
}
|
|
2745
2782
|
/** Global history state */
|
|
@@ -2756,6 +2793,14 @@ const entryIndex = /* @__PURE__ */ new Map();
|
|
|
2756
2793
|
const summaryIndex = /* @__PURE__ */ new Map();
|
|
2757
2794
|
/** Track entry count per session to avoid O(n) filter during FIFO eviction */
|
|
2758
2795
|
const sessionEntryCount = /* @__PURE__ */ new Map();
|
|
2796
|
+
/** O(1) uniqueness tracking for session.models (avoids Array.includes in hot path) */
|
|
2797
|
+
const sessionModelsSet = /* @__PURE__ */ new Map();
|
|
2798
|
+
/** O(1) uniqueness tracking for session.toolsUsed (avoids Array.includes in hot path) */
|
|
2799
|
+
const sessionToolsSet = /* @__PURE__ */ new Map();
|
|
2800
|
+
/** Dirty flag for stats cache — set true when entries are inserted/updated */
|
|
2801
|
+
let statsDirty = true;
|
|
2802
|
+
/** Cached stats result — recomputed only when statsDirty is true */
|
|
2803
|
+
let cachedStats = null;
|
|
2759
2804
|
function initHistory(enabled, maxEntries) {
|
|
2760
2805
|
historyState.enabled = enabled;
|
|
2761
2806
|
historyState.maxEntries = maxEntries;
|
|
@@ -2765,6 +2810,10 @@ function initHistory(enabled, maxEntries) {
|
|
|
2765
2810
|
entryIndex.clear();
|
|
2766
2811
|
summaryIndex.clear();
|
|
2767
2812
|
sessionEntryCount.clear();
|
|
2813
|
+
sessionModelsSet.clear();
|
|
2814
|
+
sessionToolsSet.clear();
|
|
2815
|
+
statsDirty = true;
|
|
2816
|
+
cachedStats = null;
|
|
2768
2817
|
}
|
|
2769
2818
|
/** Update the maximum number of history entries (for config hot-reload) */
|
|
2770
2819
|
function setHistoryMaxEntries(limit) {
|
|
@@ -2790,6 +2839,8 @@ function getCurrentSession(endpoint) {
|
|
|
2790
2839
|
const now = Date.now();
|
|
2791
2840
|
const sessionId = generateId();
|
|
2792
2841
|
historyState.currentSessionId = sessionId;
|
|
2842
|
+
sessionModelsSet.set(sessionId, /* @__PURE__ */ new Set());
|
|
2843
|
+
sessionToolsSet.set(sessionId, /* @__PURE__ */ new Set());
|
|
2793
2844
|
historyState.sessions.set(sessionId, {
|
|
2794
2845
|
id: sessionId,
|
|
2795
2846
|
startTime: now,
|
|
@@ -2815,25 +2866,43 @@ function insertEntry(entry) {
|
|
|
2815
2866
|
session.requestCount++;
|
|
2816
2867
|
sessionEntryCount.set(entry.sessionId, (sessionEntryCount.get(entry.sessionId) ?? 0) + 1);
|
|
2817
2868
|
const model = entry.request.model;
|
|
2818
|
-
if (model
|
|
2869
|
+
if (model) {
|
|
2870
|
+
const modelsSet = sessionModelsSet.get(entry.sessionId);
|
|
2871
|
+
if (modelsSet && !modelsSet.has(model)) {
|
|
2872
|
+
modelsSet.add(model);
|
|
2873
|
+
session.models.push(model);
|
|
2874
|
+
}
|
|
2875
|
+
}
|
|
2819
2876
|
if (entry.request.tools && entry.request.tools.length > 0) {
|
|
2820
2877
|
if (!session.toolsUsed) session.toolsUsed = [];
|
|
2821
|
-
|
|
2878
|
+
let toolsSet = sessionToolsSet.get(entry.sessionId);
|
|
2879
|
+
if (!toolsSet) {
|
|
2880
|
+
toolsSet = new Set(session.toolsUsed);
|
|
2881
|
+
sessionToolsSet.set(entry.sessionId, toolsSet);
|
|
2882
|
+
}
|
|
2883
|
+
for (const tool of entry.request.tools) if (!toolsSet.has(tool.name)) {
|
|
2884
|
+
toolsSet.add(tool.name);
|
|
2885
|
+
session.toolsUsed.push(tool.name);
|
|
2886
|
+
}
|
|
2822
2887
|
}
|
|
2823
2888
|
const summary = toSummary(entry);
|
|
2824
2889
|
summaryIndex.set(entry.id, summary);
|
|
2825
|
-
|
|
2826
|
-
const
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2890
|
+
if (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
|
|
2891
|
+
const excess = historyState.entries.length - historyState.maxEntries;
|
|
2892
|
+
const removed = historyState.entries.splice(0, excess);
|
|
2893
|
+
for (const r of removed) {
|
|
2894
|
+
entryIndex.delete(r.id);
|
|
2895
|
+
summaryIndex.delete(r.id);
|
|
2896
|
+
const count = (sessionEntryCount.get(r.sessionId) ?? 1) - 1;
|
|
2831
2897
|
if (count <= 0) {
|
|
2832
|
-
sessionEntryCount.delete(
|
|
2833
|
-
|
|
2834
|
-
|
|
2898
|
+
sessionEntryCount.delete(r.sessionId);
|
|
2899
|
+
sessionModelsSet.delete(r.sessionId);
|
|
2900
|
+
sessionToolsSet.delete(r.sessionId);
|
|
2901
|
+
historyState.sessions.delete(r.sessionId);
|
|
2902
|
+
} else sessionEntryCount.set(r.sessionId, count);
|
|
2835
2903
|
}
|
|
2836
2904
|
}
|
|
2905
|
+
statsDirty = true;
|
|
2837
2906
|
notifyEntryAdded(summary);
|
|
2838
2907
|
}
|
|
2839
2908
|
/**
|
|
@@ -2849,10 +2918,24 @@ function updateEntry(id, update) {
|
|
|
2849
2918
|
const session = historyState.sessions.get(entry.sessionId);
|
|
2850
2919
|
if (session) {
|
|
2851
2920
|
const model = update.request.model;
|
|
2852
|
-
if (model
|
|
2921
|
+
if (model) {
|
|
2922
|
+
const modelsSet = sessionModelsSet.get(entry.sessionId);
|
|
2923
|
+
if (modelsSet && !modelsSet.has(model)) {
|
|
2924
|
+
modelsSet.add(model);
|
|
2925
|
+
session.models.push(model);
|
|
2926
|
+
}
|
|
2927
|
+
}
|
|
2853
2928
|
if (update.request.tools && update.request.tools.length > 0) {
|
|
2854
2929
|
if (!session.toolsUsed) session.toolsUsed = [];
|
|
2855
|
-
|
|
2930
|
+
let toolsSet = sessionToolsSet.get(entry.sessionId);
|
|
2931
|
+
if (!toolsSet) {
|
|
2932
|
+
toolsSet = new Set(session.toolsUsed);
|
|
2933
|
+
sessionToolsSet.set(entry.sessionId, toolsSet);
|
|
2934
|
+
}
|
|
2935
|
+
for (const tool of update.request.tools) if (!toolsSet.has(tool.name)) {
|
|
2936
|
+
toolsSet.add(tool.name);
|
|
2937
|
+
session.toolsUsed.push(tool.name);
|
|
2938
|
+
}
|
|
2856
2939
|
}
|
|
2857
2940
|
}
|
|
2858
2941
|
}
|
|
@@ -2867,6 +2950,7 @@ function updateEntry(id, update) {
|
|
|
2867
2950
|
session.lastActivity = Date.now();
|
|
2868
2951
|
}
|
|
2869
2952
|
}
|
|
2953
|
+
statsDirty = true;
|
|
2870
2954
|
const summary = toSummary(entry);
|
|
2871
2955
|
summaryIndex.set(entry.id, summary);
|
|
2872
2956
|
notifyEntryUpdated(summary);
|
|
@@ -2894,7 +2978,13 @@ function getHistorySummaries(options = {}) {
|
|
|
2894
2978
|
if (to) summaries = summaries.filter((s) => s.timestamp <= to);
|
|
2895
2979
|
if (search) {
|
|
2896
2980
|
const needle = search.toLowerCase();
|
|
2897
|
-
summaries = summaries.filter((s) =>
|
|
2981
|
+
summaries = summaries.filter((s) => {
|
|
2982
|
+
if (s.searchText === "") {
|
|
2983
|
+
const entry = entryIndex.get(s.id);
|
|
2984
|
+
if (entry) s.searchText = buildSearchText(entry);
|
|
2985
|
+
}
|
|
2986
|
+
return s.searchText.includes(needle);
|
|
2987
|
+
});
|
|
2898
2988
|
}
|
|
2899
2989
|
summaries.sort((a, b) => b.timestamp - a.timestamp);
|
|
2900
2990
|
const total = summaries.length;
|
|
@@ -2928,6 +3018,10 @@ function clearHistory() {
|
|
|
2928
3018
|
entryIndex.clear();
|
|
2929
3019
|
summaryIndex.clear();
|
|
2930
3020
|
sessionEntryCount.clear();
|
|
3021
|
+
sessionModelsSet.clear();
|
|
3022
|
+
sessionToolsSet.clear();
|
|
3023
|
+
statsDirty = true;
|
|
3024
|
+
cachedStats = null;
|
|
2931
3025
|
}
|
|
2932
3026
|
function deleteSession(sessionId) {
|
|
2933
3027
|
if (!historyState.sessions.has(sessionId)) return false;
|
|
@@ -2939,10 +3033,15 @@ function deleteSession(sessionId) {
|
|
|
2939
3033
|
historyState.entries = remaining;
|
|
2940
3034
|
historyState.sessions.delete(sessionId);
|
|
2941
3035
|
sessionEntryCount.delete(sessionId);
|
|
3036
|
+
sessionModelsSet.delete(sessionId);
|
|
3037
|
+
sessionToolsSet.delete(sessionId);
|
|
3038
|
+
statsDirty = true;
|
|
3039
|
+
cachedStats = null;
|
|
2942
3040
|
if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
|
|
2943
3041
|
return true;
|
|
2944
3042
|
}
|
|
2945
3043
|
function getStats() {
|
|
3044
|
+
if (!statsDirty && cachedStats) return cachedStats;
|
|
2946
3045
|
const entries = historyState.entries;
|
|
2947
3046
|
const modelDist = {};
|
|
2948
3047
|
const endpointDist = {};
|
|
@@ -2975,7 +3074,7 @@ function getStats() {
|
|
|
2975
3074
|
hour,
|
|
2976
3075
|
count
|
|
2977
3076
|
}));
|
|
2978
|
-
|
|
3077
|
+
const stats = {
|
|
2979
3078
|
totalRequests: entries.length,
|
|
2980
3079
|
successfulRequests: successCount,
|
|
2981
3080
|
failedRequests: failCount,
|
|
@@ -2987,6 +3086,9 @@ function getStats() {
|
|
|
2987
3086
|
recentActivity,
|
|
2988
3087
|
activeSessions: historyState.sessions.size
|
|
2989
3088
|
};
|
|
3089
|
+
statsDirty = false;
|
|
3090
|
+
cachedStats = stats;
|
|
3091
|
+
return stats;
|
|
2990
3092
|
}
|
|
2991
3093
|
/** Escape a value for CSV: wrap in quotes if it contains comma, quote, or newline; convert nullish to empty string */
|
|
2992
3094
|
function escapeCsvValue(value) {
|
|
@@ -3112,7 +3214,6 @@ async function gracefulShutdown(signal, deps) {
|
|
|
3112
3214
|
const tracker = deps?.tracker ?? tuiLogger;
|
|
3113
3215
|
const server = deps?.server ?? serverInstance;
|
|
3114
3216
|
const rateLimiter = deps?.rateLimiter !== void 0 ? deps.rateLimiter : getAdaptiveRateLimiter();
|
|
3115
|
-
const contextManager = deps?.contextManager ?? getRequestContextManager();
|
|
3116
3217
|
const stopRefresh = deps?.stopTokenRefreshFn ?? stopTokenRefresh;
|
|
3117
3218
|
const closeWsClients = deps?.closeAllClientsFn ?? closeAllClients;
|
|
3118
3219
|
const getWsClientCount = deps?.getClientCountFn ?? getClientCount;
|
|
@@ -3125,7 +3226,9 @@ async function gracefulShutdown(signal, deps) {
|
|
|
3125
3226
|
_isShuttingDown = true;
|
|
3126
3227
|
shutdownAbortController = new AbortController();
|
|
3127
3228
|
consola.info(`Received ${signal}, shutting down gracefully...`);
|
|
3128
|
-
|
|
3229
|
+
try {
|
|
3230
|
+
(deps?.contextManager ?? getRequestContextManager()).stopReaper();
|
|
3231
|
+
} catch {}
|
|
3129
3232
|
stopRefresh();
|
|
3130
3233
|
const wsClients = getWsClientCount();
|
|
3131
3234
|
if (wsClients > 0) {
|
|
@@ -3244,7 +3347,7 @@ var TuiLogger = class {
|
|
|
3244
3347
|
if (!entry) return;
|
|
3245
3348
|
if (update.model !== void 0) {
|
|
3246
3349
|
entry.model = update.model;
|
|
3247
|
-
const multiplier = state.
|
|
3350
|
+
const multiplier = state.modelIndex.get(update.model)?.billing?.multiplier;
|
|
3248
3351
|
if (multiplier !== void 0) entry.multiplier = multiplier;
|
|
3249
3352
|
}
|
|
3250
3353
|
if (update.clientModel !== void 0) entry.clientModel = update.clientModel;
|
|
@@ -3957,7 +4060,7 @@ const setupClaudeCode = defineCommand({
|
|
|
3957
4060
|
|
|
3958
4061
|
//#endregion
|
|
3959
4062
|
//#region package.json
|
|
3960
|
-
var version = "0.7.18-beta";
|
|
4063
|
+
var version = "0.7.18-beta.2";
|
|
3961
4064
|
|
|
3962
4065
|
//#endregion
|
|
3963
4066
|
//#region src/lib/config/config.ts
|
|
@@ -3999,9 +4102,15 @@ function compileRewriteRules(raws) {
|
|
|
3999
4102
|
}
|
|
4000
4103
|
let cachedConfig = null;
|
|
4001
4104
|
let configLastMtimeMs = 0;
|
|
4105
|
+
/** Time-based debounce: skip stat() if checked recently */
|
|
4106
|
+
let lastStatTimeMs = 0;
|
|
4107
|
+
const STAT_DEBOUNCE_MS = 2e3;
|
|
4002
4108
|
async function loadConfig() {
|
|
4003
4109
|
try {
|
|
4110
|
+
const now = Date.now();
|
|
4111
|
+
if (cachedConfig && now - lastStatTimeMs < STAT_DEBOUNCE_MS) return cachedConfig;
|
|
4004
4112
|
const stat = await fs.stat(PATHS.CONFIG_YAML);
|
|
4113
|
+
lastStatTimeMs = now;
|
|
4005
4114
|
if (cachedConfig && stat.mtimeMs === configLastMtimeMs) return cachedConfig;
|
|
4006
4115
|
const content = await fs.readFile(PATHS.CONFIG_YAML, "utf8");
|
|
4007
4116
|
const { parse } = await import("yaml");
|
|
@@ -4048,7 +4157,7 @@ async function applyConfigToState() {
|
|
|
4048
4157
|
else if (Array.isArray(a.rewrite_system_reminders)) state.rewriteSystemReminders = compileRewriteRules(a.rewrite_system_reminders);
|
|
4049
4158
|
}
|
|
4050
4159
|
}
|
|
4051
|
-
if (config.system_prompt_overrides
|
|
4160
|
+
if (Array.isArray(config.system_prompt_overrides)) state.systemPromptOverrides = config.system_prompt_overrides.length > 0 ? compileRewriteRules(config.system_prompt_overrides) : [];
|
|
4052
4161
|
if (config.model_overrides) state.modelOverrides = {
|
|
4053
4162
|
...DEFAULT_MODEL_OVERRIDES,
|
|
4054
4163
|
...config.model_overrides
|
|
@@ -4072,6 +4181,78 @@ async function applyConfigToState() {
|
|
|
4072
4181
|
return config;
|
|
4073
4182
|
}
|
|
4074
4183
|
|
|
4184
|
+
//#endregion
|
|
4185
|
+
//#region src/lib/context/error-persistence.ts
|
|
4186
|
+
/**
|
|
4187
|
+
* Error persistence consumer.
|
|
4188
|
+
*
|
|
4189
|
+
* Subscribes to "failed" events on RequestContext and writes structured
|
|
4190
|
+
* error files to disk for post-mortem debugging. All data comes from
|
|
4191
|
+
* RequestContext (via HistoryEntryData on the event), not from Hono
|
|
4192
|
+
* Context — ensuring reliability regardless of whether the HTTP body
|
|
4193
|
+
* has been consumed.
|
|
4194
|
+
*
|
|
4195
|
+
* Output directory: PATHS.ERROR_DIR/{timestamp}_{id}/
|
|
4196
|
+
* Files:
|
|
4197
|
+
* - meta.json: structured metadata (timestamp, endpoint, model, error, attempts)
|
|
4198
|
+
* - request.json: full request payload (messages capped at 50 for size)
|
|
4199
|
+
* - response.txt: raw upstream response body (if available)
|
|
4200
|
+
* - sse-events.json: recorded SSE events (if streaming request failed mid-stream)
|
|
4201
|
+
*/
|
|
4202
|
+
/** Handle context events — only acts on "failed" */
|
|
4203
|
+
function handleErrorPersistence(event) {
|
|
4204
|
+
if (event.type !== "failed") return;
|
|
4205
|
+
writeErrorEntry(event.entry).catch((err) => {
|
|
4206
|
+
consola.debug(`[ErrorPersistence] Failed to write error file: ${err}`);
|
|
4207
|
+
});
|
|
4208
|
+
}
|
|
4209
|
+
/** Max number of messages to include in request.json (to avoid huge files) */
|
|
4210
|
+
const MAX_MESSAGES_IN_DUMP = 50;
|
|
4211
|
+
async function writeErrorEntry(entry) {
|
|
4212
|
+
const meta = {
|
|
4213
|
+
timestamp: new Date(entry.timestamp).toISOString(),
|
|
4214
|
+
id: entry.id,
|
|
4215
|
+
endpoint: entry.endpoint,
|
|
4216
|
+
durationMs: entry.durationMs,
|
|
4217
|
+
request: {
|
|
4218
|
+
model: entry.request.model,
|
|
4219
|
+
stream: entry.request.stream,
|
|
4220
|
+
messageCount: entry.request.messages?.length,
|
|
4221
|
+
toolCount: entry.request.tools?.length
|
|
4222
|
+
},
|
|
4223
|
+
response: entry.response ? {
|
|
4224
|
+
success: entry.response.success,
|
|
4225
|
+
model: entry.response.model,
|
|
4226
|
+
error: entry.response.error,
|
|
4227
|
+
status: entry.response.status
|
|
4228
|
+
} : void 0,
|
|
4229
|
+
truncation: entry.truncation,
|
|
4230
|
+
attempts: entry.attempts
|
|
4231
|
+
};
|
|
4232
|
+
const files = [["meta.json", JSON.stringify(meta, null, 2)]];
|
|
4233
|
+
if (entry.request) {
|
|
4234
|
+
const { messages, ...requestWithoutMessages } = entry.request;
|
|
4235
|
+
const requestData = {
|
|
4236
|
+
...requestWithoutMessages,
|
|
4237
|
+
messageCount: messages?.length,
|
|
4238
|
+
...messages && messages.length <= MAX_MESSAGES_IN_DUMP && { messages }
|
|
4239
|
+
};
|
|
4240
|
+
files.push(["request.json", JSON.stringify(requestData, null, 2)]);
|
|
4241
|
+
}
|
|
4242
|
+
if (entry.response?.responseText) files.push(["response.txt", entry.response.responseText]);
|
|
4243
|
+
if (entry.sseEvents?.length) files.push(["sse-events.json", JSON.stringify(entry.sseEvents, null, 2)]);
|
|
4244
|
+
const id = randomBytes(4).toString("hex");
|
|
4245
|
+
const dirPath = path$1.join(PATHS.ERROR_DIR, `${formatTimestamp()}_${id}`);
|
|
4246
|
+
await fs$1.mkdir(dirPath, { recursive: true });
|
|
4247
|
+
await Promise.all(files.map(([name, content]) => fs$1.writeFile(path$1.join(dirPath, name), content)));
|
|
4248
|
+
}
|
|
4249
|
+
/** Format timestamp as YYMMDD_HHmmss for error directory names */
|
|
4250
|
+
function formatTimestamp() {
|
|
4251
|
+
const now = /* @__PURE__ */ new Date();
|
|
4252
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
4253
|
+
return `${String(now.getFullYear()).slice(2)}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
4254
|
+
}
|
|
4255
|
+
|
|
4075
4256
|
//#endregion
|
|
4076
4257
|
//#region src/lib/context/consumers.ts
|
|
4077
4258
|
function handleHistoryEvent(event) {
|
|
@@ -4190,6 +4371,7 @@ function toHistoryResponse(entryData) {
|
|
|
4190
4371
|
function registerContextConsumers(manager) {
|
|
4191
4372
|
manager.on("change", handleHistoryEvent);
|
|
4192
4373
|
manager.on("change", handleTuiEvent);
|
|
4374
|
+
manager.on("change", handleErrorPersistence);
|
|
4193
4375
|
}
|
|
4194
4376
|
|
|
4195
4377
|
//#endregion
|
|
@@ -4709,13 +4891,13 @@ const getTokenCount = async (payload, model) => {
|
|
|
4709
4891
|
*/
|
|
4710
4892
|
/**
|
|
4711
4893
|
* Log helpful debugging information when a 413 error occurs.
|
|
4712
|
-
*
|
|
4894
|
+
*
|
|
4895
|
+
* @param precomputedBytes - Optional pre-computed payload byte size to avoid redundant JSON.stringify
|
|
4713
4896
|
*/
|
|
4714
|
-
async function logPayloadSizeInfo(payload, model) {
|
|
4897
|
+
async function logPayloadSizeInfo(payload, model, precomputedBytes) {
|
|
4715
4898
|
const messageCount = payload.messages.length;
|
|
4716
|
-
const bodySize = JSON.stringify(payload).length;
|
|
4899
|
+
const bodySize = precomputedBytes ?? JSON.stringify(payload).length;
|
|
4717
4900
|
const bodySizeKB = bytesToKB(bodySize);
|
|
4718
|
-
onRequestTooLarge(bodySize);
|
|
4719
4901
|
let imageCount = 0;
|
|
4720
4902
|
let largeMessages = 0;
|
|
4721
4903
|
let totalImageSize = 0;
|
|
@@ -4797,7 +4979,7 @@ async function executeRequestPipeline(opts) {
|
|
|
4797
4979
|
try {
|
|
4798
4980
|
const { result: response, queueWaitMs } = await adapter.execute(effectivePayload);
|
|
4799
4981
|
totalQueueWaitMs += queueWaitMs;
|
|
4800
|
-
requestContext?.addQueueWaitMs(
|
|
4982
|
+
requestContext?.addQueueWaitMs(queueWaitMs);
|
|
4801
4983
|
return {
|
|
4802
4984
|
response,
|
|
4803
4985
|
effectivePayload,
|
|
@@ -5105,8 +5287,7 @@ function buildResponsesResponseData(acc, fallbackModel) {
|
|
|
5105
5287
|
/**
|
|
5106
5288
|
* Auto-truncate retry strategy.
|
|
5107
5289
|
*
|
|
5108
|
-
* Handles
|
|
5109
|
-
* message payload and retrying.
|
|
5290
|
+
* Handles token limit errors by truncating the message payload and retrying.
|
|
5110
5291
|
*/
|
|
5111
5292
|
/**
|
|
5112
5293
|
* Create an auto-truncate retry strategy.
|
|
@@ -5134,26 +5315,44 @@ function createAutoTruncateStrategy(opts) {
|
|
|
5134
5315
|
action: "abort",
|
|
5135
5316
|
error
|
|
5136
5317
|
};
|
|
5137
|
-
const
|
|
5138
|
-
const
|
|
5139
|
-
|
|
5140
|
-
|
|
5141
|
-
|
|
5142
|
-
|
|
5318
|
+
const payloadJson = JSON.stringify(currentPayload);
|
|
5319
|
+
const estimatedTokens = Math.ceil(payloadJson.length / 4);
|
|
5320
|
+
const parsed = tryParseAndLearnLimit(rawError, model.id, true, estimatedTokens);
|
|
5321
|
+
if (!parsed) {
|
|
5322
|
+
if (rawError.status === 413) {
|
|
5323
|
+
consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: 413 Body too large, retrying with truncation...`);
|
|
5324
|
+
const truncateResult = await truncate(originalPayload, model, { checkTokenLimit: true });
|
|
5325
|
+
if (!truncateResult.wasTruncated) return {
|
|
5326
|
+
action: "abort",
|
|
5327
|
+
error
|
|
5328
|
+
};
|
|
5329
|
+
const sanitizeResult = resanitize(truncateResult.payload);
|
|
5330
|
+
return {
|
|
5331
|
+
action: "retry",
|
|
5332
|
+
payload: sanitizeResult.payload,
|
|
5333
|
+
meta: {
|
|
5334
|
+
truncateResult,
|
|
5335
|
+
sanitization: sanitizeResult.stats ?? {
|
|
5336
|
+
totalBlocksRemoved: sanitizeResult.removedCount,
|
|
5337
|
+
systemReminderRemovals: sanitizeResult.systemReminderRemovals
|
|
5338
|
+
},
|
|
5339
|
+
attempt: attempt + 1
|
|
5340
|
+
}
|
|
5341
|
+
};
|
|
5342
|
+
}
|
|
5343
|
+
return {
|
|
5344
|
+
action: "abort",
|
|
5345
|
+
error
|
|
5346
|
+
};
|
|
5347
|
+
}
|
|
5143
5348
|
let targetTokenLimit;
|
|
5144
|
-
|
|
5145
|
-
if (parsed.type === "token_limit" && parsed.limit) {
|
|
5349
|
+
if (parsed.limit) {
|
|
5146
5350
|
targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
5147
5351
|
consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
|
|
5148
|
-
} else if (parsed.type === "body_too_large") {
|
|
5149
|
-
targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
|
|
5150
|
-
consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
|
|
5151
5352
|
}
|
|
5152
5353
|
const truncateResult = await truncate(originalPayload, model, {
|
|
5153
5354
|
checkTokenLimit: true,
|
|
5154
|
-
|
|
5155
|
-
targetTokenLimit,
|
|
5156
|
-
targetByteLimitBytes
|
|
5355
|
+
targetTokenLimit
|
|
5157
5356
|
});
|
|
5158
5357
|
if (!truncateResult.wasTruncated) return {
|
|
5159
5358
|
action: "abort",
|
|
@@ -5576,12 +5775,13 @@ function sanitizeMessageParamContent(msg) {
|
|
|
5576
5775
|
*/
|
|
5577
5776
|
function removeAnthropicSystemReminders(messages) {
|
|
5578
5777
|
let modifiedCount = 0;
|
|
5778
|
+
const result = messages.map((msg) => {
|
|
5779
|
+
const sanitized = sanitizeMessageParamContent(msg);
|
|
5780
|
+
if (sanitized !== msg) modifiedCount++;
|
|
5781
|
+
return sanitized;
|
|
5782
|
+
});
|
|
5579
5783
|
return {
|
|
5580
|
-
messages: messages
|
|
5581
|
-
const sanitized = sanitizeMessageParamContent(msg);
|
|
5582
|
-
if (sanitized !== msg) modifiedCount++;
|
|
5583
|
-
return sanitized;
|
|
5584
|
-
}),
|
|
5784
|
+
messages: modifiedCount === 0 ? messages : result,
|
|
5585
5785
|
modifiedCount
|
|
5586
5786
|
};
|
|
5587
5787
|
}
|
|
@@ -6180,11 +6380,11 @@ function convertServerToolsToCustom(tools) {
|
|
|
6180
6380
|
* Auto-truncate module for Anthropic-style messages.
|
|
6181
6381
|
*
|
|
6182
6382
|
* This module handles automatic truncation of Anthropic message format
|
|
6183
|
-
* when it exceeds token
|
|
6383
|
+
* when it exceeds token limits.
|
|
6184
6384
|
*
|
|
6185
6385
|
* Key features:
|
|
6186
6386
|
* - Binary search for optimal truncation point
|
|
6187
|
-
* -
|
|
6387
|
+
* - Token limit enforcement with learned calibration
|
|
6188
6388
|
* - Preserves system messages
|
|
6189
6389
|
* - Filters orphaned tool_result and tool_use messages
|
|
6190
6390
|
* - Smart compression of old tool_result content (e.g., Read tool results)
|
|
@@ -6315,15 +6515,6 @@ async function countTotalInputTokens(payload, model) {
|
|
|
6315
6515
|
}
|
|
6316
6516
|
return total;
|
|
6317
6517
|
}
|
|
6318
|
-
/** Get byte size of a message (memoized to avoid redundant JSON.stringify) */
|
|
6319
|
-
const messageBytesCache$1 = /* @__PURE__ */ new WeakMap();
|
|
6320
|
-
function getMessageBytes$1(msg) {
|
|
6321
|
-
let cached = messageBytesCache$1.get(msg);
|
|
6322
|
-
if (cached !== void 0) return cached;
|
|
6323
|
-
cached = JSON.stringify(msg).length;
|
|
6324
|
-
messageBytesCache$1.set(msg, cached);
|
|
6325
|
-
return cached;
|
|
6326
|
-
}
|
|
6327
6518
|
/**
|
|
6328
6519
|
* Strip thinking/redacted_thinking blocks from old assistant messages.
|
|
6329
6520
|
*
|
|
@@ -6377,26 +6568,20 @@ function compressToolResultBlock(block) {
|
|
|
6377
6568
|
}
|
|
6378
6569
|
/**
|
|
6379
6570
|
* Smart compression strategy:
|
|
6380
|
-
* 1. Calculate tokens
|
|
6571
|
+
* 1. Calculate tokens from the end until reaching preservePercent of limit
|
|
6381
6572
|
* 2. Messages before that threshold get their tool_results compressed
|
|
6382
6573
|
* 3. Returns compressed messages and stats
|
|
6383
6574
|
*
|
|
6384
6575
|
* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
|
|
6385
6576
|
*/
|
|
6386
|
-
function smartCompressToolResults$1(messages, tokenLimit,
|
|
6577
|
+
function smartCompressToolResults$1(messages, tokenLimit, preservePercent) {
|
|
6387
6578
|
const n = messages.length;
|
|
6388
6579
|
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
6389
|
-
|
|
6390
|
-
for (let i = n - 1; i >= 0; i--) {
|
|
6391
|
-
const msg = messages[i];
|
|
6392
|
-
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
|
|
6393
|
-
cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
|
|
6394
|
-
}
|
|
6580
|
+
for (let i = n - 1; i >= 0; i--) cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(messages[i]);
|
|
6395
6581
|
const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
|
|
6396
|
-
const preserveByteLimit = Math.floor(byteLimit * preservePercent);
|
|
6397
6582
|
let thresholdIndex = n;
|
|
6398
6583
|
for (let i = n - 1; i >= 0; i--) {
|
|
6399
|
-
if (cumTokens[i] > preserveTokenLimit
|
|
6584
|
+
if (cumTokens[i] > preserveTokenLimit) {
|
|
6400
6585
|
thresholdIndex = i + 1;
|
|
6401
6586
|
break;
|
|
6402
6587
|
}
|
|
@@ -6448,40 +6633,35 @@ function smartCompressToolResults$1(messages, tokenLimit, byteLimit, preservePer
|
|
|
6448
6633
|
};
|
|
6449
6634
|
}
|
|
6450
6635
|
/** Default fallback for when model capabilities are not available */
|
|
6451
|
-
const DEFAULT_CONTEXT_WINDOW = 2e5;
|
|
6452
|
-
|
|
6453
|
-
|
|
6454
|
-
|
|
6455
|
-
|
|
6456
|
-
|
|
6457
|
-
|
|
6458
|
-
return
|
|
6459
|
-
|
|
6460
|
-
|
|
6461
|
-
|
|
6636
|
+
const DEFAULT_CONTEXT_WINDOW$1 = 2e5;
|
|
6637
|
+
/**
|
|
6638
|
+
* Calculate the effective token limit for auto-truncate.
|
|
6639
|
+
* Uses explicit target if provided, otherwise learned limits with calibration,
|
|
6640
|
+
* otherwise model capabilities with safety margin.
|
|
6641
|
+
*/
|
|
6642
|
+
function calculateTokenLimit$1(model, config) {
|
|
6643
|
+
if (config.targetTokenLimit !== void 0) return config.targetTokenLimit;
|
|
6644
|
+
const learned = getLearnedLimits(model.id);
|
|
6645
|
+
if (learned) {
|
|
6646
|
+
const margin = computeSafetyMargin(learned.sampleCount);
|
|
6647
|
+
return Math.floor(learned.tokenLimit * (1 - margin));
|
|
6648
|
+
}
|
|
6649
|
+
const rawTokenLimit = model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW$1;
|
|
6650
|
+
return Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
6462
6651
|
}
|
|
6463
6652
|
function findOptimalPreserveIndex$1(params) {
|
|
6464
|
-
const { messages,
|
|
6653
|
+
const { messages, systemTokens, tokenLimit } = params;
|
|
6465
6654
|
if (messages.length === 0) return 0;
|
|
6466
|
-
const markerBytes = 200;
|
|
6467
6655
|
const availableTokens = tokenLimit - systemTokens - 50;
|
|
6468
|
-
|
|
6469
|
-
if (checkTokenLimit && availableTokens <= 0 || checkByteLimit && availableBytes <= 0) return messages.length;
|
|
6656
|
+
if (availableTokens <= 0) return messages.length;
|
|
6470
6657
|
const n = messages.length;
|
|
6471
6658
|
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
6472
|
-
|
|
6473
|
-
for (let i = n - 1; i >= 0; i--) {
|
|
6474
|
-
const msg = messages[i];
|
|
6475
|
-
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
|
|
6476
|
-
cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
|
|
6477
|
-
}
|
|
6659
|
+
for (let i = n - 1; i >= 0; i--) cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(messages[i]);
|
|
6478
6660
|
let left = 0;
|
|
6479
6661
|
let right = n;
|
|
6480
6662
|
while (left < right) {
|
|
6481
6663
|
const mid = left + right >>> 1;
|
|
6482
|
-
|
|
6483
|
-
const bytesFit = !checkByteLimit || cumBytes[mid] <= availableBytes;
|
|
6484
|
-
if (tokensFit && bytesFit) right = mid;
|
|
6664
|
+
if (cumTokens[mid] <= availableTokens) right = mid;
|
|
6485
6665
|
else left = mid + 1;
|
|
6486
6666
|
}
|
|
6487
6667
|
return left;
|
|
@@ -6572,36 +6752,28 @@ async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
|
6572
6752
|
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
6573
6753
|
...config
|
|
6574
6754
|
};
|
|
6575
|
-
const
|
|
6755
|
+
const tokenLimit = calculateTokenLimit$1(model, cfg);
|
|
6576
6756
|
const fixedTokens = await countFixedTokens(payload, model);
|
|
6577
|
-
const originalBytes = JSON.stringify(payload).length;
|
|
6578
6757
|
const originalTokens = fixedTokens + await countMessagesTokens(payload.messages, model);
|
|
6579
|
-
if (originalTokens <= tokenLimit
|
|
6758
|
+
if (originalTokens <= tokenLimit) return buildResult({
|
|
6580
6759
|
payload,
|
|
6581
6760
|
wasTruncated: false,
|
|
6582
6761
|
originalTokens,
|
|
6583
6762
|
compactedTokens: originalTokens,
|
|
6584
6763
|
removedMessageCount: 0
|
|
6585
6764
|
});
|
|
6586
|
-
const exceedsTokens = originalTokens > tokenLimit;
|
|
6587
|
-
const exceedsBytes = originalBytes > byteLimit;
|
|
6588
6765
|
const { messages: thinkingStripped, strippedCount: thinkingStrippedCount } = stripThinkingBlocks(payload.messages, 4);
|
|
6589
6766
|
let workingMessages = thinkingStripped;
|
|
6590
6767
|
if (thinkingStrippedCount > 0) {
|
|
6591
|
-
const strippedPayload = {
|
|
6592
|
-
...payload,
|
|
6593
|
-
messages: workingMessages
|
|
6594
|
-
};
|
|
6595
|
-
const strippedBytes = JSON.stringify(strippedPayload).length;
|
|
6596
6768
|
const strippedTokens = fixedTokens + await countMessagesTokens(workingMessages, model);
|
|
6597
|
-
if (strippedTokens <= tokenLimit
|
|
6598
|
-
let reason = "tokens";
|
|
6599
|
-
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
6600
|
-
else if (exceedsBytes) reason = "size";
|
|
6769
|
+
if (strippedTokens <= tokenLimit) {
|
|
6601
6770
|
const elapsedMs = Math.round(performance.now() - startTime);
|
|
6602
|
-
consola.info(`[AutoTruncate:Anthropic]
|
|
6771
|
+
consola.info(`[AutoTruncate:Anthropic] tokens: ${originalTokens}→${strippedTokens} (stripped ${thinkingStrippedCount} thinking blocks) [${elapsedMs}ms]`);
|
|
6603
6772
|
return buildResult({
|
|
6604
|
-
payload:
|
|
6773
|
+
payload: {
|
|
6774
|
+
...payload,
|
|
6775
|
+
messages: workingMessages
|
|
6776
|
+
},
|
|
6605
6777
|
wasTruncated: true,
|
|
6606
6778
|
originalTokens,
|
|
6607
6779
|
compactedTokens: strippedTokens,
|
|
@@ -6611,47 +6783,37 @@ async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
|
6611
6783
|
}
|
|
6612
6784
|
let compressedCount = 0;
|
|
6613
6785
|
if (state.compressToolResultsBeforeTruncate) {
|
|
6614
|
-
const compressionResult = smartCompressToolResults$1(workingMessages, tokenLimit,
|
|
6786
|
+
const compressionResult = smartCompressToolResults$1(workingMessages, tokenLimit, cfg.preserveRecentPercent);
|
|
6615
6787
|
workingMessages = compressionResult.messages;
|
|
6616
6788
|
compressedCount = compressionResult.compressedCount;
|
|
6617
|
-
const compressedPayload = {
|
|
6618
|
-
...payload,
|
|
6619
|
-
messages: workingMessages
|
|
6620
|
-
};
|
|
6621
|
-
const compressedBytes = JSON.stringify(compressedPayload).length;
|
|
6622
6789
|
const compressedTokens = fixedTokens + await countMessagesTokens(workingMessages, model);
|
|
6623
|
-
if (compressedTokens <= tokenLimit
|
|
6624
|
-
let reason = "tokens";
|
|
6625
|
-
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
6626
|
-
else if (exceedsBytes) reason = "size";
|
|
6790
|
+
if (compressedTokens <= tokenLimit) {
|
|
6627
6791
|
const elapsedMs = Math.round(performance.now() - startTime);
|
|
6628
|
-
consola.info(`[AutoTruncate:Anthropic]
|
|
6792
|
+
consola.info(`[AutoTruncate:Anthropic] tokens: ${originalTokens}→${compressedTokens} (compressed ${compressedCount} tool_results) [${elapsedMs}ms]`);
|
|
6629
6793
|
return buildResult({
|
|
6630
|
-
payload: addCompressionNotice$1(
|
|
6794
|
+
payload: addCompressionNotice$1({
|
|
6795
|
+
...payload,
|
|
6796
|
+
messages: workingMessages
|
|
6797
|
+
}, compressedCount),
|
|
6631
6798
|
wasTruncated: true,
|
|
6632
6799
|
originalTokens,
|
|
6633
6800
|
compactedTokens: compressedTokens + (Math.ceil(150 / 4) + 4),
|
|
6634
6801
|
removedMessageCount: 0
|
|
6635
6802
|
});
|
|
6636
6803
|
}
|
|
6637
|
-
const allCompression = smartCompressToolResults$1(workingMessages, tokenLimit,
|
|
6804
|
+
const allCompression = smartCompressToolResults$1(workingMessages, tokenLimit, 0);
|
|
6638
6805
|
if (allCompression.compressedCount > 0) {
|
|
6639
6806
|
workingMessages = allCompression.messages;
|
|
6640
6807
|
compressedCount += allCompression.compressedCount;
|
|
6641
|
-
const allCompressedPayload = {
|
|
6642
|
-
...payload,
|
|
6643
|
-
messages: workingMessages
|
|
6644
|
-
};
|
|
6645
|
-
const allCompressedBytes = JSON.stringify(allCompressedPayload).length;
|
|
6646
6808
|
const allCompressedTokens = fixedTokens + await countMessagesTokens(workingMessages, model);
|
|
6647
|
-
if (allCompressedTokens <= tokenLimit
|
|
6648
|
-
let reason = "tokens";
|
|
6649
|
-
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
6650
|
-
else if (exceedsBytes) reason = "size";
|
|
6809
|
+
if (allCompressedTokens <= tokenLimit) {
|
|
6651
6810
|
const elapsedMs = Math.round(performance.now() - startTime);
|
|
6652
|
-
consola.info(`[AutoTruncate:Anthropic]
|
|
6811
|
+
consola.info(`[AutoTruncate:Anthropic] tokens: ${originalTokens}→${allCompressedTokens} (compressed ${compressedCount} tool_results, including recent) [${elapsedMs}ms]`);
|
|
6653
6812
|
return buildResult({
|
|
6654
|
-
payload: addCompressionNotice$1(
|
|
6813
|
+
payload: addCompressionNotice$1({
|
|
6814
|
+
...payload,
|
|
6815
|
+
messages: workingMessages
|
|
6816
|
+
}, compressedCount),
|
|
6655
6817
|
wasTruncated: true,
|
|
6656
6818
|
originalTokens,
|
|
6657
6819
|
compactedTokens: allCompressedTokens + (Math.ceil(150 / 4) + 4),
|
|
@@ -6660,23 +6822,11 @@ async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
|
6660
6822
|
}
|
|
6661
6823
|
}
|
|
6662
6824
|
}
|
|
6663
|
-
const systemBytes = payload.system ? JSON.stringify(payload.system).length : 0;
|
|
6664
6825
|
const systemTokens = await countSystemTokens(payload.system, model);
|
|
6665
|
-
const messagesBytes = workingMessages.reduce((sum, msg) => sum + getMessageBytes$1(msg) + 1, 0) + 2;
|
|
6666
|
-
const payloadOverhead = JSON.stringify({
|
|
6667
|
-
...payload,
|
|
6668
|
-
messages: workingMessages
|
|
6669
|
-
}).length - messagesBytes - systemBytes;
|
|
6670
|
-
consola.debug(`[AutoTruncate:Anthropic] overhead=${bytesToKB(payloadOverhead)}KB, system=${bytesToKB(systemBytes)}KB`);
|
|
6671
6826
|
const preserveIndex = findOptimalPreserveIndex$1({
|
|
6672
6827
|
messages: workingMessages,
|
|
6673
|
-
systemBytes,
|
|
6674
6828
|
systemTokens,
|
|
6675
|
-
|
|
6676
|
-
tokenLimit,
|
|
6677
|
-
byteLimit,
|
|
6678
|
-
checkTokenLimit: cfg.checkTokenLimit,
|
|
6679
|
-
checkByteLimit: cfg.checkByteLimit
|
|
6829
|
+
tokenLimit
|
|
6680
6830
|
});
|
|
6681
6831
|
if (preserveIndex >= workingMessages.length) {
|
|
6682
6832
|
consola.warn("[AutoTruncate:Anthropic] Would need to remove all messages");
|
|
@@ -6724,17 +6874,14 @@ async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
|
6724
6874
|
const newBytes = JSON.stringify(newPayload).length;
|
|
6725
6875
|
const newMsgTokens = await countMessagesTokens(newMessages, model);
|
|
6726
6876
|
const newTokens = (newSystem !== payload.system ? await countSystemTokens(newSystem, model) : systemTokens) + (fixedTokens - await countSystemTokens(payload.system, model)) + newMsgTokens;
|
|
6727
|
-
let reason = "tokens";
|
|
6728
|
-
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
6729
|
-
else if (exceedsBytes) reason = "size";
|
|
6730
6877
|
const actions = [];
|
|
6731
6878
|
if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
|
|
6732
6879
|
if (thinkingStrippedCount > 0) actions.push(`stripped ${thinkingStrippedCount} thinking blocks`);
|
|
6733
6880
|
if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
|
|
6734
6881
|
const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
|
|
6735
6882
|
const elapsedMs = Math.round(performance.now() - startTime);
|
|
6736
|
-
consola.info(`[AutoTruncate:Anthropic]
|
|
6737
|
-
if (
|
|
6883
|
+
consola.info(`[AutoTruncate:Anthropic] tokens: ${originalTokens}→${newTokens}, ${bytesToKB(newBytes)}KB${actionInfo} [${elapsedMs}ms]`);
|
|
6884
|
+
if (newTokens > tokenLimit) consola.warn(`[AutoTruncate:Anthropic] Result still over token limit (${newTokens} > ${tokenLimit})`);
|
|
6738
6885
|
return buildResult({
|
|
6739
6886
|
payload: newPayload,
|
|
6740
6887
|
wasTruncated: true,
|
|
@@ -6744,32 +6891,43 @@ async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
|
6744
6891
|
});
|
|
6745
6892
|
}
|
|
6746
6893
|
/**
|
|
6747
|
-
* Check if payload needs compaction.
|
|
6894
|
+
* Check if payload needs compaction based on learned model limits.
|
|
6895
|
+
* Returns early with `needed: false` when no limits are known for the model.
|
|
6748
6896
|
*/
|
|
6749
6897
|
async function checkNeedsCompactionAnthropic(payload, model, config = {}) {
|
|
6750
6898
|
const cfg = {
|
|
6751
6899
|
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
6752
6900
|
...config
|
|
6753
6901
|
};
|
|
6754
|
-
const
|
|
6755
|
-
|
|
6756
|
-
|
|
6902
|
+
const learned = getLearnedLimits(model.id);
|
|
6903
|
+
if (!learned && cfg.targetTokenLimit === void 0) return {
|
|
6904
|
+
needed: false,
|
|
6905
|
+
currentTokens: 0,
|
|
6906
|
+
tokenLimit: 0
|
|
6907
|
+
};
|
|
6908
|
+
const tokenLimit = calculateTokenLimit$1(model, cfg);
|
|
6909
|
+
const rawTokens = await countTotalTokens(payload, model);
|
|
6910
|
+
const currentTokens = learned && learned.sampleCount > 0 ? calibrate(model.id, rawTokens) : rawTokens;
|
|
6757
6911
|
const exceedsTokens = cfg.checkTokenLimit && currentTokens > tokenLimit;
|
|
6758
|
-
const exceedsBytes = cfg.checkByteLimit && currentBytes > byteLimit;
|
|
6759
|
-
let reason;
|
|
6760
|
-
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
6761
|
-
else if (exceedsTokens) reason = "tokens";
|
|
6762
|
-
else if (exceedsBytes) reason = "bytes";
|
|
6763
6912
|
return {
|
|
6764
|
-
needed: exceedsTokens
|
|
6913
|
+
needed: exceedsTokens,
|
|
6765
6914
|
currentTokens,
|
|
6766
6915
|
tokenLimit,
|
|
6767
|
-
|
|
6768
|
-
byteLimit,
|
|
6769
|
-
reason
|
|
6916
|
+
reason: exceedsTokens ? "tokens" : void 0
|
|
6770
6917
|
};
|
|
6771
6918
|
}
|
|
6772
6919
|
|
|
6920
|
+
//#endregion
|
|
6921
|
+
//#region src/lib/fetch-utils.ts
|
|
6922
|
+
/**
|
|
6923
|
+
* Create an AbortSignal for fetch timeout if configured.
|
|
6924
|
+
* Controls the time from request start to receiving response headers.
|
|
6925
|
+
* Returns undefined if fetchTimeout is 0 (disabled).
|
|
6926
|
+
*/
|
|
6927
|
+
function createFetchSignal() {
|
|
6928
|
+
return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
|
|
6929
|
+
}
|
|
6930
|
+
|
|
6773
6931
|
//#endregion
|
|
6774
6932
|
//#region src/lib/anthropic/features.ts
|
|
6775
6933
|
/**
|
|
@@ -7126,7 +7284,7 @@ async function createAnthropicMessages(payload) {
|
|
|
7126
7284
|
}
|
|
7127
7285
|
}
|
|
7128
7286
|
consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
|
|
7129
|
-
const fetchSignal =
|
|
7287
|
+
const fetchSignal = createFetchSignal();
|
|
7130
7288
|
const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
|
|
7131
7289
|
method: "POST",
|
|
7132
7290
|
headers,
|
|
@@ -7365,7 +7523,7 @@ function raceIteratorNext(promise, opts) {
|
|
|
7365
7523
|
* Returns a decision with reason so callers can log/display the routing rationale.
|
|
7366
7524
|
*/
|
|
7367
7525
|
function supportsDirectAnthropicApi(modelId) {
|
|
7368
|
-
const model = state.
|
|
7526
|
+
const model = state.modelIndex.get(modelId);
|
|
7369
7527
|
if (model?.vendor !== "Anthropic") return {
|
|
7370
7528
|
supported: false,
|
|
7371
7529
|
reason: `vendor is "${model?.vendor ?? "unknown"}", not Anthropic`
|
|
@@ -7387,6 +7545,12 @@ function supportsDirectAnthropicApi(modelId) {
|
|
|
7387
7545
|
async function handleAnthropicMessagesCompletion(c, anthropicPayload, options) {
|
|
7388
7546
|
if (anthropicPayload.system) anthropicPayload.system = await processAnthropicSystem(anthropicPayload.system);
|
|
7389
7547
|
const tuiLogId = c.get("tuiLogId");
|
|
7548
|
+
const routingDecision = supportsDirectAnthropicApi(anthropicPayload.model);
|
|
7549
|
+
if (!routingDecision.supported) {
|
|
7550
|
+
const msg = `Model "${anthropicPayload.model}" does not support /v1/messages: ${routingDecision.reason}`;
|
|
7551
|
+
throw new HTTPError(msg, 400, msg);
|
|
7552
|
+
}
|
|
7553
|
+
consola.debug(`[AnthropicRouting] ${anthropicPayload.model}: ${routingDecision.reason}`);
|
|
7390
7554
|
const reqCtx = getRequestContextManager().create({
|
|
7391
7555
|
endpoint: "anthropic",
|
|
7392
7556
|
tuiLogId
|
|
@@ -7409,17 +7573,11 @@ async function handleAnthropicMessagesCompletion(c, anthropicPayload, options) {
|
|
|
7409
7573
|
strippedReadTagCount: preprocessed.strippedReadTagCount,
|
|
7410
7574
|
dedupedToolCallCount: preprocessed.dedupedToolCallCount
|
|
7411
7575
|
});
|
|
7412
|
-
const routingDecision = supportsDirectAnthropicApi(anthropicPayload.model);
|
|
7413
|
-
if (!routingDecision.supported) {
|
|
7414
|
-
const msg = `Model "${anthropicPayload.model}" does not support /v1/messages: ${routingDecision.reason}`;
|
|
7415
|
-
throw new HTTPError(msg, 400, msg);
|
|
7416
|
-
}
|
|
7417
|
-
consola.debug(`[AnthropicRouting] ${anthropicPayload.model}: ${routingDecision.reason}`);
|
|
7418
7576
|
return handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx);
|
|
7419
7577
|
}
|
|
7420
7578
|
async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
|
|
7421
7579
|
consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
|
|
7422
|
-
const selectedModel = state.
|
|
7580
|
+
const selectedModel = state.modelIndex.get(anthropicPayload.model);
|
|
7423
7581
|
const { payload: initialSanitized, stats: sanitizationStats } = sanitizeAnthropicMessages(anthropicPayload);
|
|
7424
7582
|
reqCtx.addSanitizationInfo(toSanitizationInfo(sanitizationStats));
|
|
7425
7583
|
const hasPreprocessing = reqCtx.preprocessInfo ? reqCtx.preprocessInfo.dedupedToolCallCount > 0 || reqCtx.preprocessInfo.strippedReadTagCount > 0 : false;
|
|
@@ -7527,8 +7685,8 @@ function combineAbortSignals(...signals) {
|
|
|
7527
7685
|
async function* processAnthropicStream(response, acc, clientAbortSignal) {
|
|
7528
7686
|
const idleTimeoutMs = state.streamIdleTimeout * 1e3;
|
|
7529
7687
|
const iterator = response[Symbol.asyncIterator]();
|
|
7688
|
+
const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
|
|
7530
7689
|
for (;;) {
|
|
7531
|
-
const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
|
|
7532
7690
|
const result = await raceIteratorNext(iterator.next(), {
|
|
7533
7691
|
idleTimeoutMs,
|
|
7534
7692
|
abortSignal
|
|
@@ -7597,7 +7755,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
|
|
|
7597
7755
|
await stream.writeSSE({
|
|
7598
7756
|
data: rawEvent.data ?? "",
|
|
7599
7757
|
event: rawEvent.event,
|
|
7600
|
-
id: String(rawEvent.id),
|
|
7758
|
+
id: rawEvent.id != null ? String(rawEvent.id) : void 0,
|
|
7601
7759
|
retry: rawEvent.retry
|
|
7602
7760
|
});
|
|
7603
7761
|
}
|
|
@@ -7761,26 +7919,31 @@ function extractOpenAISystemMessages(messages) {
|
|
|
7761
7919
|
//#region src/lib/openai/auto-truncate.ts
|
|
7762
7920
|
/**
|
|
7763
7921
|
* Auto-truncate module: Automatically truncates conversation history
|
|
7764
|
-
* when it exceeds token
|
|
7922
|
+
* when it exceeds token limits (OpenAI format).
|
|
7765
7923
|
*
|
|
7766
7924
|
* Key features:
|
|
7767
7925
|
* - Binary search for optimal truncation point
|
|
7768
|
-
* -
|
|
7926
|
+
* - Token limit enforcement with learned calibration
|
|
7769
7927
|
* - Preserves system messages
|
|
7770
7928
|
* - Filters orphaned tool_result and tool_use messages
|
|
7771
|
-
* - Dynamic byte limit adjustment on 413 errors
|
|
7772
7929
|
* - Optional smart compression of old tool_result content
|
|
7773
7930
|
*/
|
|
7774
|
-
|
|
7775
|
-
|
|
7776
|
-
|
|
7777
|
-
|
|
7778
|
-
|
|
7779
|
-
|
|
7780
|
-
|
|
7781
|
-
|
|
7782
|
-
|
|
7783
|
-
|
|
7931
|
+
/** Default fallback for when model capabilities are not available */
|
|
7932
|
+
const DEFAULT_CONTEXT_WINDOW = 128e3;
|
|
7933
|
+
/**
|
|
7934
|
+
* Calculate the effective token limit for auto-truncate.
|
|
7935
|
+
* Uses explicit target if provided, otherwise learned limits with calibration,
|
|
7936
|
+
* otherwise model capabilities with safety margin.
|
|
7937
|
+
*/
|
|
7938
|
+
function calculateTokenLimit(model, config) {
|
|
7939
|
+
if (config.targetTokenLimit !== void 0) return config.targetTokenLimit;
|
|
7940
|
+
const learned = getLearnedLimits(model.id);
|
|
7941
|
+
if (learned) {
|
|
7942
|
+
const margin = computeSafetyMargin(learned.sampleCount);
|
|
7943
|
+
return Math.floor(learned.tokenLimit * (1 - margin));
|
|
7944
|
+
}
|
|
7945
|
+
const rawTokenLimit = model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW;
|
|
7946
|
+
return Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
7784
7947
|
}
|
|
7785
7948
|
/** Estimate tokens for a single message (fast approximation) */
|
|
7786
7949
|
function estimateMessageTokens(msg) {
|
|
@@ -7793,28 +7956,12 @@ function estimateMessageTokens(msg) {
|
|
|
7793
7956
|
if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
|
|
7794
7957
|
return Math.ceil(charCount / 4) + 10;
|
|
7795
7958
|
}
|
|
7796
|
-
/**
|
|
7797
|
-
const messageBytesCache = /* @__PURE__ */ new WeakMap();
|
|
7798
|
-
function getMessageBytes(msg) {
|
|
7799
|
-
let cached = messageBytesCache.get(msg);
|
|
7800
|
-
if (cached !== void 0) return cached;
|
|
7801
|
-
cached = JSON.stringify(msg).length;
|
|
7802
|
-
messageBytesCache.set(msg, cached);
|
|
7803
|
-
return cached;
|
|
7804
|
-
}
|
|
7805
|
-
/** Calculate cumulative token and byte sums from the end of the message array */
|
|
7959
|
+
/** Calculate cumulative token sums from the end of the message array */
|
|
7806
7960
|
function calculateCumulativeSums(messages) {
|
|
7807
7961
|
const n = messages.length;
|
|
7808
7962
|
const cumTokens = Array.from({ length: n + 1 }).fill(0);
|
|
7809
|
-
|
|
7810
|
-
|
|
7811
|
-
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(messages[i]);
|
|
7812
|
-
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(messages[i]) + 1;
|
|
7813
|
-
}
|
|
7814
|
-
return {
|
|
7815
|
-
cumTokens,
|
|
7816
|
-
cumBytes
|
|
7817
|
-
};
|
|
7963
|
+
for (let i = n - 1; i >= 0; i--) cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(messages[i]);
|
|
7964
|
+
return { cumTokens };
|
|
7818
7965
|
}
|
|
7819
7966
|
/**
|
|
7820
7967
|
* Clean up orphaned tool messages and ensure valid conversation start.
|
|
@@ -7833,20 +7980,19 @@ function cleanupMessages(messages) {
|
|
|
7833
7980
|
}
|
|
7834
7981
|
/**
|
|
7835
7982
|
* Smart compression strategy for OpenAI format:
|
|
7836
|
-
* 1. Calculate tokens
|
|
7983
|
+
* 1. Calculate tokens from the end until reaching preservePercent of limit
|
|
7837
7984
|
* 2. Messages before that threshold get their tool content compressed
|
|
7838
7985
|
* 3. Returns compressed messages and stats
|
|
7839
7986
|
*
|
|
7840
7987
|
* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
|
|
7841
7988
|
*/
|
|
7842
|
-
function smartCompressToolResults(messages, tokenLimit,
|
|
7989
|
+
function smartCompressToolResults(messages, tokenLimit, preservePercent) {
|
|
7843
7990
|
const n = messages.length;
|
|
7844
|
-
const { cumTokens
|
|
7991
|
+
const { cumTokens } = calculateCumulativeSums(messages);
|
|
7845
7992
|
const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
|
|
7846
|
-
const preserveByteLimit = Math.floor(byteLimit * preservePercent);
|
|
7847
7993
|
let thresholdIndex = n;
|
|
7848
7994
|
for (let i = n - 1; i >= 0; i--) {
|
|
7849
|
-
if (cumTokens[i] > preserveTokenLimit
|
|
7995
|
+
if (cumTokens[i] > preserveTokenLimit) {
|
|
7850
7996
|
thresholdIndex = i + 1;
|
|
7851
7997
|
break;
|
|
7852
7998
|
}
|
|
@@ -7882,21 +8028,17 @@ function smartCompressToolResults(messages, tokenLimit, byteLimit, preservePerce
|
|
|
7882
8028
|
* Returns the smallest index where the preserved portion fits within limits.
|
|
7883
8029
|
*/
|
|
7884
8030
|
function findOptimalPreserveIndex(params) {
|
|
7885
|
-
const { messages,
|
|
8031
|
+
const { messages, systemTokens, tokenLimit } = params;
|
|
7886
8032
|
if (messages.length === 0) return 0;
|
|
7887
|
-
const markerBytes = 200;
|
|
7888
8033
|
const availableTokens = tokenLimit - systemTokens - 50;
|
|
7889
|
-
|
|
7890
|
-
if (checkTokenLimit && availableTokens <= 0 || checkByteLimit && availableBytes <= 0) return messages.length;
|
|
8034
|
+
if (availableTokens <= 0) return messages.length;
|
|
7891
8035
|
const n = messages.length;
|
|
7892
|
-
const { cumTokens
|
|
8036
|
+
const { cumTokens } = calculateCumulativeSums(messages);
|
|
7893
8037
|
let left = 0;
|
|
7894
8038
|
let right = n;
|
|
7895
8039
|
while (left < right) {
|
|
7896
8040
|
const mid = left + right >>> 1;
|
|
7897
|
-
|
|
7898
|
-
const bytesFit = !checkByteLimit || cumBytes[mid] <= availableBytes;
|
|
7899
|
-
if (tokensFit && bytesFit) right = mid;
|
|
8041
|
+
if (cumTokens[mid] <= availableTokens) right = mid;
|
|
7900
8042
|
else left = mid + 1;
|
|
7901
8043
|
}
|
|
7902
8044
|
return left;
|
|
@@ -7981,11 +8123,6 @@ function buildTimedResult(ctx, result) {
|
|
|
7981
8123
|
processingTimeMs: Math.round(performance.now() - ctx.startTime)
|
|
7982
8124
|
};
|
|
7983
8125
|
}
|
|
7984
|
-
function getReasonLabel(exceedsTokens, exceedsBytes) {
|
|
7985
|
-
if (exceedsTokens && exceedsBytes) return "tokens+size";
|
|
7986
|
-
if (exceedsBytes) return "size";
|
|
7987
|
-
return "tokens";
|
|
7988
|
-
}
|
|
7989
8126
|
/**
|
|
7990
8127
|
* Step 1: Try compressing tool results to fit within limits.
|
|
7991
8128
|
* First compresses old tool results, then all if needed.
|
|
@@ -7996,7 +8133,7 @@ async function tryCompressToolResults(ctx) {
|
|
|
7996
8133
|
workingMessages: ctx.payload.messages,
|
|
7997
8134
|
compressedCount: 0
|
|
7998
8135
|
};
|
|
7999
|
-
const compressionResult = smartCompressToolResults(ctx.payload.messages, ctx.tokenLimit, ctx.
|
|
8136
|
+
const compressionResult = smartCompressToolResults(ctx.payload.messages, ctx.tokenLimit, ctx.cfg.preserveRecentPercent);
|
|
8000
8137
|
let workingMessages = compressionResult.messages;
|
|
8001
8138
|
let compressedCount = compressionResult.compressedCount;
|
|
8002
8139
|
const compressedPayload = {
|
|
@@ -8005,10 +8142,9 @@ async function tryCompressToolResults(ctx) {
|
|
|
8005
8142
|
};
|
|
8006
8143
|
const compressedBytes = JSON.stringify(compressedPayload).length;
|
|
8007
8144
|
const compressedTokenCount = await getTokenCount(compressedPayload, ctx.model);
|
|
8008
|
-
if (compressedTokenCount.input <= ctx.tokenLimit
|
|
8009
|
-
const reason = getReasonLabel(ctx.exceedsTokens, ctx.exceedsBytes);
|
|
8145
|
+
if (compressedTokenCount.input <= ctx.tokenLimit) {
|
|
8010
8146
|
const elapsedMs = Math.round(performance.now() - ctx.startTime);
|
|
8011
|
-
consola.info(`[AutoTruncate:OpenAI]
|
|
8147
|
+
consola.info(`[AutoTruncate:OpenAI] tokens: ${ctx.originalTokens}→${compressedTokenCount.input}, ${bytesToKB(ctx.originalBytes)}→${bytesToKB(compressedBytes)}KB (compressed ${compressedCount} tool_results) [${elapsedMs}ms]`);
|
|
8012
8148
|
const noticePayload = addCompressionNotice(compressedPayload, compressedCount);
|
|
8013
8149
|
const noticeTokenOverhead = Math.ceil(150 / 4) + 10;
|
|
8014
8150
|
return {
|
|
@@ -8023,7 +8159,7 @@ async function tryCompressToolResults(ctx) {
|
|
|
8023
8159
|
})
|
|
8024
8160
|
};
|
|
8025
8161
|
}
|
|
8026
|
-
const allCompression = smartCompressToolResults(workingMessages, ctx.tokenLimit,
|
|
8162
|
+
const allCompression = smartCompressToolResults(workingMessages, ctx.tokenLimit, 0);
|
|
8027
8163
|
if (allCompression.compressedCount > 0) {
|
|
8028
8164
|
workingMessages = allCompression.messages;
|
|
8029
8165
|
compressedCount += allCompression.compressedCount;
|
|
@@ -8033,10 +8169,9 @@ async function tryCompressToolResults(ctx) {
|
|
|
8033
8169
|
};
|
|
8034
8170
|
const allCompressedBytes = JSON.stringify(allCompressedPayload).length;
|
|
8035
8171
|
const allCompressedTokenCount = await getTokenCount(allCompressedPayload, ctx.model);
|
|
8036
|
-
if (allCompressedTokenCount.input <= ctx.tokenLimit
|
|
8037
|
-
const reason = getReasonLabel(ctx.exceedsTokens, ctx.exceedsBytes);
|
|
8172
|
+
if (allCompressedTokenCount.input <= ctx.tokenLimit) {
|
|
8038
8173
|
const elapsedMs = Math.round(performance.now() - ctx.startTime);
|
|
8039
|
-
consola.info(`[AutoTruncate:OpenAI]
|
|
8174
|
+
consola.info(`[AutoTruncate:OpenAI] tokens: ${ctx.originalTokens}→${allCompressedTokenCount.input}, ${bytesToKB(ctx.originalBytes)}→${bytesToKB(allCompressedBytes)}KB (compressed ${compressedCount} tool_results, including recent) [${elapsedMs}ms]`);
|
|
8040
8175
|
const noticePayload = addCompressionNotice(allCompressedPayload, compressedCount);
|
|
8041
8176
|
const noticeTokenOverhead = Math.ceil(150 / 4) + 10;
|
|
8042
8177
|
return {
|
|
@@ -8063,23 +8198,10 @@ async function tryCompressToolResults(ctx) {
|
|
|
8063
8198
|
*/
|
|
8064
8199
|
async function truncateByMessageRemoval(ctx, workingMessages, compressedCount) {
|
|
8065
8200
|
const { systemMessages, conversationMessages } = extractOpenAISystemMessages(workingMessages);
|
|
8066
|
-
const messagesBytes = workingMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0) + 1;
|
|
8067
|
-
const payloadOverhead = JSON.stringify({
|
|
8068
|
-
...ctx.payload,
|
|
8069
|
-
messages: workingMessages
|
|
8070
|
-
}).length - messagesBytes;
|
|
8071
|
-
const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
|
|
8072
|
-
const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
8073
|
-
consola.debug(`[AutoTruncate:OpenAI] overhead=${bytesToKB(payloadOverhead)}KB, system=${systemMessages.length} msgs (${bytesToKB(systemBytes)}KB)`);
|
|
8074
8201
|
const preserveIndex = findOptimalPreserveIndex({
|
|
8075
8202
|
messages: conversationMessages,
|
|
8076
|
-
|
|
8077
|
-
|
|
8078
|
-
payloadOverhead,
|
|
8079
|
-
tokenLimit: ctx.tokenLimit,
|
|
8080
|
-
byteLimit: ctx.byteLimit,
|
|
8081
|
-
checkTokenLimit: ctx.cfg.checkTokenLimit,
|
|
8082
|
-
checkByteLimit: ctx.cfg.checkByteLimit
|
|
8203
|
+
systemTokens: systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0),
|
|
8204
|
+
tokenLimit: ctx.tokenLimit
|
|
8083
8205
|
});
|
|
8084
8206
|
if (preserveIndex >= conversationMessages.length) {
|
|
8085
8207
|
consola.warn("[AutoTruncate:OpenAI] Would need to remove all messages");
|
|
@@ -8124,14 +8246,13 @@ async function truncateByMessageRemoval(ctx, workingMessages, compressedCount) {
|
|
|
8124
8246
|
};
|
|
8125
8247
|
const newBytes = JSON.stringify(newPayload).length;
|
|
8126
8248
|
const newTokenCount = await getTokenCount(newPayload, ctx.model);
|
|
8127
|
-
const reason = getReasonLabel(ctx.exceedsTokens, ctx.exceedsBytes);
|
|
8128
8249
|
const actions = [];
|
|
8129
8250
|
if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
|
|
8130
8251
|
if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
|
|
8131
8252
|
const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
|
|
8132
8253
|
const elapsedMs = Math.round(performance.now() - ctx.startTime);
|
|
8133
|
-
consola.info(`[AutoTruncate:OpenAI]
|
|
8134
|
-
if (
|
|
8254
|
+
consola.info(`[AutoTruncate:OpenAI] tokens: ${ctx.originalTokens}→${newTokenCount.input}, ${bytesToKB(ctx.originalBytes)}→${bytesToKB(newBytes)}KB${actionInfo} [${elapsedMs}ms]`);
|
|
8255
|
+
if (newTokenCount.input > ctx.tokenLimit) consola.warn(`[AutoTruncate:OpenAI] Result still over token limit (${newTokenCount.input} > ${ctx.tokenLimit})`);
|
|
8135
8256
|
return buildTimedResult(ctx, {
|
|
8136
8257
|
payload: newPayload,
|
|
8137
8258
|
wasTruncated: true,
|
|
@@ -8155,7 +8276,7 @@ async function autoTruncateOpenAI(payload, model, config = {}) {
|
|
|
8155
8276
|
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
8156
8277
|
...config
|
|
8157
8278
|
};
|
|
8158
|
-
const
|
|
8279
|
+
const tokenLimit = calculateTokenLimit(model, cfg);
|
|
8159
8280
|
const originalBytes = JSON.stringify(payload).length;
|
|
8160
8281
|
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
8161
8282
|
const ctx = {
|
|
@@ -8163,14 +8284,11 @@ async function autoTruncateOpenAI(payload, model, config = {}) {
|
|
|
8163
8284
|
model,
|
|
8164
8285
|
cfg,
|
|
8165
8286
|
tokenLimit,
|
|
8166
|
-
byteLimit,
|
|
8167
8287
|
originalTokens,
|
|
8168
8288
|
originalBytes,
|
|
8169
|
-
exceedsTokens: originalTokens > tokenLimit,
|
|
8170
|
-
exceedsBytes: originalBytes > byteLimit,
|
|
8171
8289
|
startTime
|
|
8172
8290
|
};
|
|
8173
|
-
if (
|
|
8291
|
+
if (originalTokens <= tokenLimit) return buildTimedResult(ctx, {
|
|
8174
8292
|
payload,
|
|
8175
8293
|
wasTruncated: false,
|
|
8176
8294
|
originalTokens,
|
|
@@ -8201,7 +8319,7 @@ const createChatCompletions = async (payload) => {
|
|
|
8201
8319
|
...copilotHeaders(state, enableVision),
|
|
8202
8320
|
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
8203
8321
|
};
|
|
8204
|
-
const fetchSignal =
|
|
8322
|
+
const fetchSignal = createFetchSignal();
|
|
8205
8323
|
const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
|
|
8206
8324
|
method: "POST",
|
|
8207
8325
|
headers,
|
|
@@ -8372,7 +8490,7 @@ async function handleCompletion(c) {
|
|
|
8372
8490
|
consola.debug(`Model name resolved: ${clientModel} → ${resolvedModel}`);
|
|
8373
8491
|
originalPayload.model = resolvedModel;
|
|
8374
8492
|
}
|
|
8375
|
-
const selectedModel = state.
|
|
8493
|
+
const selectedModel = state.modelIndex.get(originalPayload.model);
|
|
8376
8494
|
if (!isEndpointSupported(selectedModel, ENDPOINT.CHAT_COMPLETIONS)) {
|
|
8377
8495
|
const msg = `Model "${originalPayload.model}" does not support the ${ENDPOINT.CHAT_COMPLETIONS} endpoint`;
|
|
8378
8496
|
throw new HTTPError(msg, 400, msg);
|
|
@@ -8524,8 +8642,8 @@ async function handleStreamingResponse(opts) {
|
|
|
8524
8642
|
acc.content += marker;
|
|
8525
8643
|
}
|
|
8526
8644
|
const iterator = response[Symbol.asyncIterator]();
|
|
8645
|
+
const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
|
|
8527
8646
|
for (;;) {
|
|
8528
|
-
const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
|
|
8529
8647
|
const result = await raceIteratorNext(iterator.next(), {
|
|
8530
8648
|
idleTimeoutMs,
|
|
8531
8649
|
abortSignal
|
|
@@ -8545,7 +8663,7 @@ async function handleStreamingResponse(opts) {
|
|
|
8545
8663
|
await stream.writeSSE({
|
|
8546
8664
|
data: rawEvent.data ?? "",
|
|
8547
8665
|
event: rawEvent.event,
|
|
8548
|
-
id: String(rawEvent.id),
|
|
8666
|
+
id: rawEvent.id != null ? String(rawEvent.id) : void 0,
|
|
8549
8667
|
retry: rawEvent.retry
|
|
8550
8668
|
});
|
|
8551
8669
|
}
|
|
@@ -8630,16 +8748,13 @@ async function handleCountTokens(c) {
|
|
|
8630
8748
|
const anthropicPayload = await c.req.json();
|
|
8631
8749
|
anthropicPayload.model = resolveModelName(anthropicPayload.model);
|
|
8632
8750
|
if (tuiLogId) tuiLogger.updateRequest(tuiLogId, { model: anthropicPayload.model });
|
|
8633
|
-
const selectedModel = state.
|
|
8751
|
+
const selectedModel = state.modelIndex.get(anthropicPayload.model);
|
|
8634
8752
|
if (!selectedModel) {
|
|
8635
8753
|
consola.warn(`[count_tokens] Model "${anthropicPayload.model}" not found, returning input_tokens=1`);
|
|
8636
8754
|
return c.json({ input_tokens: 1 });
|
|
8637
8755
|
}
|
|
8638
8756
|
if (state.autoTruncate && hasKnownLimits(selectedModel.id)) {
|
|
8639
|
-
const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel, {
|
|
8640
|
-
checkTokenLimit: true,
|
|
8641
|
-
checkByteLimit: true
|
|
8642
|
-
});
|
|
8757
|
+
const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel, { checkTokenLimit: true });
|
|
8643
8758
|
if (truncateCheck.needed) {
|
|
8644
8759
|
const contextWindow = selectedModel.capabilities?.limits?.max_context_window_tokens ?? 2e5;
|
|
8645
8760
|
const inflatedTokens = Math.floor(contextWindow * .95);
|
|
@@ -8716,7 +8831,7 @@ modelRoutes.get("/:model", async (c) => {
|
|
|
8716
8831
|
try {
|
|
8717
8832
|
if (!state.models) await cacheModels();
|
|
8718
8833
|
const modelId = c.req.param("model");
|
|
8719
|
-
const model = state.
|
|
8834
|
+
const model = state.modelIndex.get(modelId);
|
|
8720
8835
|
if (!model) return c.json({ error: {
|
|
8721
8836
|
message: `The model '${modelId}' does not exist`,
|
|
8722
8837
|
type: "invalid_request_error",
|
|
@@ -8740,7 +8855,7 @@ const createResponses = async (payload) => {
|
|
|
8740
8855
|
...copilotHeaders(state, enableVision),
|
|
8741
8856
|
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
8742
8857
|
};
|
|
8743
|
-
const fetchSignal =
|
|
8858
|
+
const fetchSignal = createFetchSignal();
|
|
8744
8859
|
const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
|
|
8745
8860
|
method: "POST",
|
|
8746
8861
|
headers,
|
|
@@ -8880,8 +8995,7 @@ async function handleResponsesCompletion(c) {
|
|
|
8880
8995
|
consola.debug(`Model name resolved: ${clientModel} → ${resolvedModel}`);
|
|
8881
8996
|
payload.model = resolvedModel;
|
|
8882
8997
|
}
|
|
8883
|
-
|
|
8884
|
-
if (!isEndpointSupported(selectedModel, ENDPOINT.RESPONSES)) {
|
|
8998
|
+
if (!isEndpointSupported(state.modelIndex.get(payload.model), ENDPOINT.RESPONSES)) {
|
|
8885
8999
|
const msg = `Model "${payload.model}" does not support the ${ENDPOINT.RESPONSES} endpoint`;
|
|
8886
9000
|
throw new HTTPError(msg, 400, msg);
|
|
8887
9001
|
}
|
|
@@ -8911,10 +9025,33 @@ async function handleResponsesCompletion(c) {
|
|
|
8911
9025
|
/** Pass through to Copilot /responses endpoint directly */
|
|
8912
9026
|
async function handleDirectResponses(opts) {
|
|
8913
9027
|
const { c, payload, reqCtx } = opts;
|
|
8914
|
-
const
|
|
8915
|
-
|
|
9028
|
+
const adapter = {
|
|
9029
|
+
format: "openai-responses",
|
|
9030
|
+
sanitize: (p) => ({
|
|
9031
|
+
payload: p,
|
|
9032
|
+
removedCount: 0,
|
|
9033
|
+
systemReminderRemovals: 0
|
|
9034
|
+
}),
|
|
9035
|
+
execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p)),
|
|
9036
|
+
logPayloadSize: (p) => {
|
|
9037
|
+
const count = typeof p.input === "string" ? 1 : p.input.length;
|
|
9038
|
+
consola.debug(`Responses payload: ${count} input item(s), model: ${p.model}`);
|
|
9039
|
+
}
|
|
9040
|
+
};
|
|
9041
|
+
const strategies = [createTokenRefreshStrategy()];
|
|
9042
|
+
const selectedModel = state.modelIndex.get(payload.model);
|
|
8916
9043
|
try {
|
|
8917
|
-
const
|
|
9044
|
+
const pipelineResult = await executeRequestPipeline({
|
|
9045
|
+
adapter,
|
|
9046
|
+
strategies,
|
|
9047
|
+
payload,
|
|
9048
|
+
originalPayload: payload,
|
|
9049
|
+
model: selectedModel,
|
|
9050
|
+
maxRetries: 1,
|
|
9051
|
+
requestContext: reqCtx
|
|
9052
|
+
});
|
|
9053
|
+
const response = pipelineResult.response;
|
|
9054
|
+
reqCtx.addQueueWaitMs(pipelineResult.queueWaitMs);
|
|
8918
9055
|
if (!payload.stream) {
|
|
8919
9056
|
const responsesResponse = response;
|
|
8920
9057
|
const content = responsesOutputToContent(responsesResponse.output);
|
|
@@ -8942,8 +9079,8 @@ async function handleDirectResponses(opts) {
|
|
|
8942
9079
|
let eventsIn = 0;
|
|
8943
9080
|
try {
|
|
8944
9081
|
const iterator = response[Symbol.asyncIterator]();
|
|
9082
|
+
const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbort.signal);
|
|
8945
9083
|
for (;;) {
|
|
8946
|
-
const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbort.signal);
|
|
8947
9084
|
const result = await raceIteratorNext(iterator.next(), {
|
|
8948
9085
|
idleTimeoutMs,
|
|
8949
9086
|
abortSignal
|
|
@@ -8989,7 +9126,13 @@ async function handleDirectResponses(opts) {
|
|
|
8989
9126
|
* Handles POST /responses and POST /v1/responses.
|
|
8990
9127
|
*/
|
|
8991
9128
|
const responsesRoutes = new Hono();
|
|
8992
|
-
responsesRoutes.post("/",
|
|
9129
|
+
responsesRoutes.post("/", async (c) => {
|
|
9130
|
+
try {
|
|
9131
|
+
return await handleResponsesCompletion(c);
|
|
9132
|
+
} catch (error) {
|
|
9133
|
+
return forwardError(c, error);
|
|
9134
|
+
}
|
|
9135
|
+
});
|
|
8993
9136
|
|
|
8994
9137
|
//#endregion
|
|
8995
9138
|
//#region src/routes/token/route.ts
|
|
@@ -9078,20 +9221,39 @@ registerRoutes(server);
|
|
|
9078
9221
|
function formatLimit(value) {
|
|
9079
9222
|
return value ? `${Math.round(value / 1e3)}k` : "?";
|
|
9080
9223
|
}
|
|
9224
|
+
/**
|
|
9225
|
+
* Format a model as 3 lines: main info, features, and supported endpoints.
|
|
9226
|
+
*
|
|
9227
|
+
* Example output:
|
|
9228
|
+
* - claude-opus-4.6-1m Anthropic ctx:1000k prp: 936k out: 64k
|
|
9229
|
+
* features: adaptive-thinking, thinking, streaming, vision, tool-calls
|
|
9230
|
+
* endpoints: messages, completions
|
|
9231
|
+
*/
|
|
9081
9232
|
function formatModelInfo(model) {
|
|
9082
9233
|
const limits = model.capabilities?.limits;
|
|
9083
9234
|
const supports = model.capabilities?.supports;
|
|
9084
9235
|
const contextK = formatLimit(limits?.max_context_window_tokens);
|
|
9085
9236
|
const promptK = formatLimit(limits?.max_prompt_tokens);
|
|
9086
9237
|
const outputK = formatLimit(limits?.max_output_tokens);
|
|
9238
|
+
const mainLine = ` - ${model.id.length > 28 ? `${model.id.slice(0, 25)}...` : model.id.padEnd(28)} ${model.vendor.padEnd(13)} ctx:${contextK.padStart(5)} prp:${promptK.padStart(5)} out:${outputK.padStart(5)}`;
|
|
9087
9239
|
const features = [
|
|
9088
9240
|
...Object.entries(supports ?? {}).filter(([, value]) => value === true).map(([key]) => key.replaceAll("_", "-")),
|
|
9089
9241
|
supports?.max_thinking_budget && "thinking",
|
|
9090
9242
|
model.capabilities?.type === "embeddings" && "embeddings",
|
|
9091
9243
|
model.preview && "preview"
|
|
9092
9244
|
].filter(Boolean).join(", ");
|
|
9093
|
-
const
|
|
9094
|
-
|
|
9245
|
+
const featLine = features ? pc.dim(` features: ${features}`) : "";
|
|
9246
|
+
const endpoints = formatEndpoints(model.supported_endpoints);
|
|
9247
|
+
return [
|
|
9248
|
+
mainLine,
|
|
9249
|
+
featLine,
|
|
9250
|
+
pc.dim(` endpoints: ${endpoints}`)
|
|
9251
|
+
].filter(Boolean).join("\n");
|
|
9252
|
+
}
|
|
9253
|
+
/** Format endpoint paths as short display names */
|
|
9254
|
+
function formatEndpoints(endpoints) {
|
|
9255
|
+
if (!endpoints || endpoints.length === 0) return "(legacy)";
|
|
9256
|
+
return endpoints.map((e) => e.replace(/^\/(v1\/|chat\/)?/, "")).join(", ");
|
|
9095
9257
|
}
|
|
9096
9258
|
/** Parse an integer from a string, returning a default if the result is NaN. */
|
|
9097
9259
|
function parseIntOrDefault(value, defaultValue) {
|
|
@@ -9165,6 +9327,7 @@ async function runServer(options) {
|
|
|
9165
9327
|
consola.warn("Failed to fetch models from Copilot API:", error instanceof Error ? error.message : error);
|
|
9166
9328
|
}
|
|
9167
9329
|
consola.info(`Available models:\n${state.models?.data.map((m) => formatModelInfo(m)).join("\n")}`);
|
|
9330
|
+
await loadPersistedLimits();
|
|
9168
9331
|
const availableIds = state.models?.data.map((m) => m.id) ?? [];
|
|
9169
9332
|
const overrideLines = Object.entries(state.modelOverrides).map(([from, to]) => {
|
|
9170
9333
|
const resolved = resolveModelName(from);
|