@hsupu/copilot-api 0.7.10 → 0.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/main.js +1763 -348
- package/dist/main.js.map +1 -1
- package/package.json +5 -1
package/dist/main.js
CHANGED
|
@@ -46,7 +46,11 @@ const state = {
|
|
|
46
46
|
accountType: "individual",
|
|
47
47
|
manualApprove: false,
|
|
48
48
|
showToken: false,
|
|
49
|
-
|
|
49
|
+
verbose: false,
|
|
50
|
+
autoTruncate: true,
|
|
51
|
+
compressToolResults: false,
|
|
52
|
+
redirectAnthropic: false,
|
|
53
|
+
rewriteAnthropicTools: true
|
|
50
54
|
};
|
|
51
55
|
|
|
52
56
|
//#endregion
|
|
@@ -90,27 +94,78 @@ const GITHUB_BASE_URL = "https://github.com";
|
|
|
90
94
|
const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
|
|
91
95
|
const GITHUB_APP_SCOPES = ["read:user"].join(" ");
|
|
92
96
|
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/lib/auto-truncate-common.ts
|
|
99
|
+
const DEFAULT_AUTO_TRUNCATE_CONFIG = {
|
|
100
|
+
safetyMarginPercent: 2,
|
|
101
|
+
maxRequestBodyBytes: 510 * 1024,
|
|
102
|
+
preserveRecentPercent: .7
|
|
103
|
+
};
|
|
104
|
+
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
105
|
+
let dynamicByteLimit = null;
|
|
106
|
+
/**
|
|
107
|
+
* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
|
|
108
|
+
*/
|
|
109
|
+
function onRequestTooLarge(failingBytes) {
|
|
110
|
+
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
111
|
+
dynamicByteLimit = newLimit;
|
|
112
|
+
consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
113
|
+
}
|
|
114
|
+
/** Get the current effective byte limit */
|
|
115
|
+
function getEffectiveByteLimitBytes() {
|
|
116
|
+
return dynamicByteLimit ?? DEFAULT_AUTO_TRUNCATE_CONFIG.maxRequestBodyBytes;
|
|
117
|
+
}
|
|
118
|
+
/** Dynamic token limits per model, adjusted based on token limit errors */
|
|
119
|
+
const dynamicTokenLimits = /* @__PURE__ */ new Map();
|
|
120
|
+
/**
|
|
121
|
+
* Called when a token limit error (400) occurs.
|
|
122
|
+
* Adjusts the token limit for the specific model to 95% of the reported limit.
|
|
123
|
+
*/
|
|
124
|
+
function onTokenLimitExceeded(modelId, reportedLimit) {
|
|
125
|
+
const newLimit = Math.floor(reportedLimit * .95);
|
|
126
|
+
const previous = dynamicTokenLimits.get(modelId);
|
|
127
|
+
if (!previous || newLimit < previous) {
|
|
128
|
+
dynamicTokenLimits.set(modelId, newLimit);
|
|
129
|
+
consola.info(`[AutoTruncate] Adjusted token limit for ${modelId}: ${reportedLimit} reported → ${newLimit} effective`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Get the effective token limit for a model.
|
|
134
|
+
* Returns the dynamic limit if set, otherwise null to use model capabilities.
|
|
135
|
+
*/
|
|
136
|
+
function getEffectiveTokenLimit(modelId) {
|
|
137
|
+
return dynamicTokenLimits.get(modelId) ?? null;
|
|
138
|
+
}
|
|
139
|
+
|
|
93
140
|
//#endregion
|
|
94
141
|
//#region src/lib/error.ts
|
|
95
142
|
var HTTPError = class HTTPError extends Error {
|
|
96
143
|
status;
|
|
97
144
|
responseText;
|
|
98
|
-
|
|
145
|
+
/** Model ID that caused the error (if known) */
|
|
146
|
+
modelId;
|
|
147
|
+
constructor(message, status, responseText, modelId) {
|
|
99
148
|
super(message);
|
|
100
149
|
this.status = status;
|
|
101
150
|
this.responseText = responseText;
|
|
151
|
+
this.modelId = modelId;
|
|
102
152
|
}
|
|
103
|
-
static async fromResponse(message, response) {
|
|
153
|
+
static async fromResponse(message, response, modelId) {
|
|
104
154
|
const text = await response.text();
|
|
105
|
-
return new HTTPError(message, response.status, text);
|
|
155
|
+
return new HTTPError(message, response.status, text, modelId);
|
|
106
156
|
}
|
|
107
157
|
};
|
|
108
158
|
/** Parse token limit info from error message */
|
|
109
159
|
function parseTokenLimitError(message) {
|
|
110
|
-
const
|
|
111
|
-
if (
|
|
112
|
-
current: Number.parseInt(
|
|
113
|
-
limit: Number.parseInt(
|
|
160
|
+
const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
|
|
161
|
+
if (openaiMatch) return {
|
|
162
|
+
current: Number.parseInt(openaiMatch[1], 10),
|
|
163
|
+
limit: Number.parseInt(openaiMatch[2], 10)
|
|
164
|
+
};
|
|
165
|
+
const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
|
|
166
|
+
if (anthropicMatch) return {
|
|
167
|
+
current: Number.parseInt(anthropicMatch[1], 10),
|
|
168
|
+
limit: Number.parseInt(anthropicMatch[2], 10)
|
|
114
169
|
};
|
|
115
170
|
return null;
|
|
116
171
|
}
|
|
@@ -147,11 +202,10 @@ function formatRateLimitError(copilotMessage) {
|
|
|
147
202
|
};
|
|
148
203
|
}
|
|
149
204
|
function forwardError(c, error) {
|
|
150
|
-
consola.error("Error occurred:", error);
|
|
151
205
|
if (error instanceof HTTPError) {
|
|
152
206
|
if (error.status === 413) {
|
|
153
207
|
const formattedError = formatRequestTooLargeError();
|
|
154
|
-
consola.
|
|
208
|
+
consola.warn(`HTTP 413: Request too large`);
|
|
155
209
|
return c.json(formattedError, 413);
|
|
156
210
|
}
|
|
157
211
|
let errorJson;
|
|
@@ -160,26 +214,38 @@ function forwardError(c, error) {
|
|
|
160
214
|
} catch {
|
|
161
215
|
errorJson = error.responseText;
|
|
162
216
|
}
|
|
163
|
-
consola.error("HTTP error:", errorJson);
|
|
164
217
|
const copilotError = errorJson;
|
|
165
218
|
if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
|
|
166
219
|
const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
|
|
167
220
|
if (tokenInfo) {
|
|
221
|
+
if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
|
|
222
|
+
const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
|
|
223
|
+
consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
|
|
224
|
+
return c.json(formattedError, 400);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
const anthropicError = errorJson;
|
|
228
|
+
if (anthropicError.error?.type === "invalid_request_error") {
|
|
229
|
+
const tokenInfo = parseTokenLimitError(anthropicError.error.message ?? "");
|
|
230
|
+
if (tokenInfo) {
|
|
231
|
+
if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
|
|
168
232
|
const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
|
|
169
|
-
consola.
|
|
233
|
+
consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
|
|
170
234
|
return c.json(formattedError, 400);
|
|
171
235
|
}
|
|
172
236
|
}
|
|
173
237
|
if (error.status === 429 || copilotError.error?.code === "rate_limited") {
|
|
174
238
|
const formattedError = formatRateLimitError(copilotError.error?.message);
|
|
175
|
-
consola.
|
|
239
|
+
consola.warn(`HTTP 429: Rate limit exceeded`);
|
|
176
240
|
return c.json(formattedError, 429);
|
|
177
241
|
}
|
|
242
|
+
consola.error(`HTTP ${error.status}:`, errorJson);
|
|
178
243
|
return c.json({ error: {
|
|
179
244
|
message: error.responseText,
|
|
180
245
|
type: "error"
|
|
181
246
|
} }, error.status);
|
|
182
247
|
}
|
|
248
|
+
consola.error("Unexpected error:", error);
|
|
183
249
|
return c.json({ error: {
|
|
184
250
|
message: error.message,
|
|
185
251
|
type: "error"
|
|
@@ -305,6 +371,7 @@ async function pollAccessToken(deviceCode) {
|
|
|
305
371
|
//#region src/lib/token.ts
|
|
306
372
|
const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
|
|
307
373
|
const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
|
|
374
|
+
let copilotTokenRefreshTimer = null;
|
|
308
375
|
/**
|
|
309
376
|
* Refresh the Copilot token with exponential backoff retry.
|
|
310
377
|
* Returns the new token on success, or null if all retries fail.
|
|
@@ -323,20 +390,34 @@ async function refreshCopilotTokenWithRetry(maxRetries = 3) {
|
|
|
323
390
|
consola.error("All token refresh attempts failed:", lastError);
|
|
324
391
|
return null;
|
|
325
392
|
}
|
|
393
|
+
/**
|
|
394
|
+
* Clear any existing token refresh timer.
|
|
395
|
+
* Call this before setting up a new timer or during cleanup.
|
|
396
|
+
*/
|
|
397
|
+
function clearCopilotTokenRefresh() {
|
|
398
|
+
if (copilotTokenRefreshTimer) {
|
|
399
|
+
clearInterval(copilotTokenRefreshTimer);
|
|
400
|
+
copilotTokenRefreshTimer = null;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
326
403
|
const setupCopilotToken = async () => {
|
|
327
404
|
const { token, refresh_in } = await getCopilotToken();
|
|
328
405
|
state.copilotToken = token;
|
|
329
406
|
consola.debug("GitHub Copilot Token fetched successfully!");
|
|
330
407
|
if (state.showToken) consola.info("Copilot token:", token);
|
|
331
|
-
const refreshInterval = (refresh_in - 60) * 1e3;
|
|
332
|
-
|
|
408
|
+
const refreshInterval = Math.max((refresh_in - 60) * 1e3, 60 * 1e3);
|
|
409
|
+
clearCopilotTokenRefresh();
|
|
410
|
+
copilotTokenRefreshTimer = setInterval(() => {
|
|
333
411
|
consola.debug("Refreshing Copilot token");
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
412
|
+
refreshCopilotTokenWithRetry().then((newToken) => {
|
|
413
|
+
if (newToken) {
|
|
414
|
+
state.copilotToken = newToken;
|
|
415
|
+
consola.debug("Copilot token refreshed");
|
|
416
|
+
if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
|
|
417
|
+
} else consola.error("Failed to refresh Copilot token after retries, using existing token");
|
|
418
|
+
}).catch((error) => {
|
|
419
|
+
consola.error("Unexpected error during token refresh:", error);
|
|
420
|
+
});
|
|
340
421
|
}, refreshInterval);
|
|
341
422
|
};
|
|
342
423
|
async function setupGitHubToken(options) {
|
|
@@ -480,9 +561,23 @@ async function checkTokenExists() {
|
|
|
480
561
|
return false;
|
|
481
562
|
}
|
|
482
563
|
}
|
|
483
|
-
async function
|
|
564
|
+
async function getAccountInfo() {
|
|
565
|
+
try {
|
|
566
|
+
await ensurePaths();
|
|
567
|
+
await setupGitHubToken();
|
|
568
|
+
if (!state.githubToken) return null;
|
|
569
|
+
const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
|
|
570
|
+
return {
|
|
571
|
+
user,
|
|
572
|
+
copilot
|
|
573
|
+
};
|
|
574
|
+
} catch {
|
|
575
|
+
return null;
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
async function getDebugInfo(includeAccount) {
|
|
484
579
|
const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
|
|
485
|
-
|
|
580
|
+
const info = {
|
|
486
581
|
version: version$1,
|
|
487
582
|
runtime: getRuntimeInfo(),
|
|
488
583
|
paths: {
|
|
@@ -491,9 +586,14 @@ async function getDebugInfo() {
|
|
|
491
586
|
},
|
|
492
587
|
tokenExists
|
|
493
588
|
};
|
|
589
|
+
if (includeAccount && tokenExists) {
|
|
590
|
+
const account = await getAccountInfo();
|
|
591
|
+
if (account) info.account = account;
|
|
592
|
+
}
|
|
593
|
+
return info;
|
|
494
594
|
}
|
|
495
595
|
function printDebugInfoPlain(info) {
|
|
496
|
-
|
|
596
|
+
let output = `copilot-api debug
|
|
497
597
|
|
|
498
598
|
Version: ${info.version}
|
|
499
599
|
Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
|
|
@@ -502,19 +602,24 @@ Paths:
|
|
|
502
602
|
- APP_DIR: ${info.paths.APP_DIR}
|
|
503
603
|
- GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
|
|
504
604
|
|
|
505
|
-
Token exists: ${info.tokenExists ? "Yes" : "No"}
|
|
605
|
+
Token exists: ${info.tokenExists ? "Yes" : "No"}`;
|
|
606
|
+
if (info.account) output += `
|
|
607
|
+
|
|
608
|
+
Account Info:
|
|
609
|
+
${JSON.stringify(info.account, null, 2)}`;
|
|
610
|
+
consola.info(output);
|
|
506
611
|
}
|
|
507
612
|
function printDebugInfoJson(info) {
|
|
508
613
|
console.log(JSON.stringify(info, null, 2));
|
|
509
614
|
}
|
|
510
615
|
async function runDebug(options) {
|
|
511
|
-
const debugInfo = await getDebugInfo();
|
|
512
|
-
if (options.json) printDebugInfoJson(debugInfo);
|
|
513
|
-
else printDebugInfoPlain(debugInfo);
|
|
616
|
+
const debugInfo$1 = await getDebugInfo(true);
|
|
617
|
+
if (options.json) printDebugInfoJson(debugInfo$1);
|
|
618
|
+
else printDebugInfoPlain(debugInfo$1);
|
|
514
619
|
}
|
|
515
|
-
const
|
|
620
|
+
const debugInfo = defineCommand({
|
|
516
621
|
meta: {
|
|
517
|
-
name: "
|
|
622
|
+
name: "info",
|
|
518
623
|
description: "Print debug information about the application"
|
|
519
624
|
},
|
|
520
625
|
args: { json: {
|
|
@@ -526,6 +631,48 @@ const debug = defineCommand({
|
|
|
526
631
|
return runDebug({ json: args.json });
|
|
527
632
|
}
|
|
528
633
|
});
|
|
634
|
+
const debugModels = defineCommand({
|
|
635
|
+
meta: {
|
|
636
|
+
name: "models",
|
|
637
|
+
description: "Fetch and display raw model data from Copilot API"
|
|
638
|
+
},
|
|
639
|
+
args: {
|
|
640
|
+
"account-type": {
|
|
641
|
+
type: "string",
|
|
642
|
+
alias: "a",
|
|
643
|
+
default: "individual",
|
|
644
|
+
description: "The type of GitHub account (individual, business, enterprise)"
|
|
645
|
+
},
|
|
646
|
+
"github-token": {
|
|
647
|
+
type: "string",
|
|
648
|
+
alias: "g",
|
|
649
|
+
description: "GitHub token to use (skips interactive auth)"
|
|
650
|
+
}
|
|
651
|
+
},
|
|
652
|
+
async run({ args }) {
|
|
653
|
+
state.accountType = args["account-type"];
|
|
654
|
+
await ensurePaths();
|
|
655
|
+
if (args["github-token"]) {
|
|
656
|
+
state.githubToken = args["github-token"];
|
|
657
|
+
consola.info("Using provided GitHub token");
|
|
658
|
+
} else await setupGitHubToken();
|
|
659
|
+
const { token } = await getCopilotToken();
|
|
660
|
+
state.copilotToken = token;
|
|
661
|
+
consola.info("Fetching models from Copilot API...");
|
|
662
|
+
const models = await getModels();
|
|
663
|
+
console.log(JSON.stringify(models, null, 2));
|
|
664
|
+
}
|
|
665
|
+
});
|
|
666
|
+
const debug = defineCommand({
|
|
667
|
+
meta: {
|
|
668
|
+
name: "debug",
|
|
669
|
+
description: "Debug commands for troubleshooting"
|
|
670
|
+
},
|
|
671
|
+
subCommands: {
|
|
672
|
+
info: debugInfo,
|
|
673
|
+
models: debugModels
|
|
674
|
+
}
|
|
675
|
+
});
|
|
529
676
|
|
|
530
677
|
//#endregion
|
|
531
678
|
//#region src/logout.ts
|
|
@@ -552,7 +699,7 @@ const logout = defineCommand({
|
|
|
552
699
|
});
|
|
553
700
|
|
|
554
701
|
//#endregion
|
|
555
|
-
//#region src/patch-claude.ts
|
|
702
|
+
//#region src/patch-claude-code.ts
|
|
556
703
|
const SUPPORTED_VERSIONS = {
|
|
557
704
|
v2a: {
|
|
558
705
|
min: "2.0.0",
|
|
@@ -872,7 +1019,7 @@ const patchClaude = defineCommand({
|
|
|
872
1019
|
//#endregion
|
|
873
1020
|
//#region package.json
|
|
874
1021
|
var name = "@hsupu/copilot-api";
|
|
875
|
-
var version = "0.7.
|
|
1022
|
+
var version = "0.7.12";
|
|
876
1023
|
var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
|
|
877
1024
|
var keywords = [
|
|
878
1025
|
"proxy",
|
|
@@ -900,11 +1047,15 @@ var scripts = {
|
|
|
900
1047
|
"prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
|
|
901
1048
|
"release": "bumpp && npm publish --access public",
|
|
902
1049
|
"start": "NODE_ENV=production bun run ./src/main.ts",
|
|
1050
|
+
"test": "bun test tests/*.test.ts",
|
|
1051
|
+
"test:all": "bun test tests/*.test.ts && bun test tests/integration/",
|
|
1052
|
+
"test:integration": "bun test tests/integration/",
|
|
903
1053
|
"typecheck": "tsc"
|
|
904
1054
|
};
|
|
905
1055
|
var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
|
|
906
1056
|
var lint_staged = { "*": "bun run lint --fix" };
|
|
907
1057
|
var dependencies = {
|
|
1058
|
+
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
908
1059
|
"citty": "^0.1.6",
|
|
909
1060
|
"clipboardy": "^5.0.0",
|
|
910
1061
|
"consola": "^3.4.2",
|
|
@@ -951,7 +1102,7 @@ var package_default = {
|
|
|
951
1102
|
|
|
952
1103
|
//#endregion
|
|
953
1104
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
954
|
-
const DEFAULT_CONFIG
|
|
1105
|
+
const DEFAULT_CONFIG = {
|
|
955
1106
|
baseRetryIntervalSeconds: 10,
|
|
956
1107
|
maxRetryIntervalSeconds: 120,
|
|
957
1108
|
requestIntervalSeconds: 10,
|
|
@@ -980,7 +1131,7 @@ var AdaptiveRateLimiter = class {
|
|
|
980
1131
|
recoveryStepIndex = 0;
|
|
981
1132
|
constructor(config = {}) {
|
|
982
1133
|
this.config = {
|
|
983
|
-
...DEFAULT_CONFIG
|
|
1134
|
+
...DEFAULT_CONFIG,
|
|
984
1135
|
...config
|
|
985
1136
|
};
|
|
986
1137
|
}
|
|
@@ -1222,12 +1373,12 @@ let rateLimiterInstance = null;
|
|
|
1222
1373
|
*/
|
|
1223
1374
|
function initAdaptiveRateLimiter(config = {}) {
|
|
1224
1375
|
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1225
|
-
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG
|
|
1226
|
-
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG
|
|
1227
|
-
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG
|
|
1228
|
-
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG
|
|
1229
|
-
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG
|
|
1230
|
-
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG
|
|
1376
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
|
|
1377
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
|
|
1378
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
|
|
1379
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
|
|
1380
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
|
|
1381
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
|
|
1231
1382
|
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1232
1383
|
}
|
|
1233
1384
|
/**
|
|
@@ -1848,6 +1999,7 @@ var RequestTracker = class {
|
|
|
1848
1999
|
requests = /* @__PURE__ */ new Map();
|
|
1849
2000
|
renderer = null;
|
|
1850
2001
|
completedQueue = [];
|
|
2002
|
+
completedTimeouts = /* @__PURE__ */ new Map();
|
|
1851
2003
|
historySize = 5;
|
|
1852
2004
|
completedDisplayMs = 2e3;
|
|
1853
2005
|
setRenderer(renderer) {
|
|
@@ -1907,11 +2059,22 @@ var RequestTracker = class {
|
|
|
1907
2059
|
this.renderer?.onRequestComplete(request);
|
|
1908
2060
|
this.requests.delete(id);
|
|
1909
2061
|
this.completedQueue.push(request);
|
|
1910
|
-
while (this.completedQueue.length > this.historySize)
|
|
1911
|
-
|
|
2062
|
+
while (this.completedQueue.length > this.historySize) {
|
|
2063
|
+
const removed = this.completedQueue.shift();
|
|
2064
|
+
if (removed) {
|
|
2065
|
+
const timeoutId$1 = this.completedTimeouts.get(removed.id);
|
|
2066
|
+
if (timeoutId$1) {
|
|
2067
|
+
clearTimeout(timeoutId$1);
|
|
2068
|
+
this.completedTimeouts.delete(removed.id);
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
const timeoutId = setTimeout(() => {
|
|
1912
2073
|
const idx = this.completedQueue.indexOf(request);
|
|
1913
2074
|
if (idx !== -1) this.completedQueue.splice(idx, 1);
|
|
2075
|
+
this.completedTimeouts.delete(id);
|
|
1914
2076
|
}, this.completedDisplayMs);
|
|
2077
|
+
this.completedTimeouts.set(id, timeoutId);
|
|
1915
2078
|
}
|
|
1916
2079
|
/**
|
|
1917
2080
|
* Mark request as failed with error
|
|
@@ -1946,11 +2109,13 @@ var RequestTracker = class {
|
|
|
1946
2109
|
return this.requests.get(id);
|
|
1947
2110
|
}
|
|
1948
2111
|
/**
|
|
1949
|
-
* Clear all tracked requests
|
|
2112
|
+
* Clear all tracked requests and pending timeouts
|
|
1950
2113
|
*/
|
|
1951
2114
|
clear() {
|
|
1952
2115
|
this.requests.clear();
|
|
1953
2116
|
this.completedQueue = [];
|
|
2117
|
+
for (const timeoutId of this.completedTimeouts.values()) clearTimeout(timeoutId);
|
|
2118
|
+
this.completedTimeouts.clear();
|
|
1954
2119
|
}
|
|
1955
2120
|
};
|
|
1956
2121
|
const requestTracker = new RequestTracker();
|
|
@@ -2101,6 +2266,14 @@ const getTokenizerFromModel = (model) => {
|
|
|
2101
2266
|
return model.capabilities?.tokenizer || "o200k_base";
|
|
2102
2267
|
};
|
|
2103
2268
|
/**
|
|
2269
|
+
* Count tokens in a text string using the model's tokenizer.
|
|
2270
|
+
* This is a simple wrapper for counting tokens in plain text.
|
|
2271
|
+
*/
|
|
2272
|
+
const countTextTokens = async (text, model) => {
|
|
2273
|
+
const tokenizer = getTokenizerFromModel(model);
|
|
2274
|
+
return (await getEncodeChatFunction(tokenizer)).encode(text).length;
|
|
2275
|
+
};
|
|
2276
|
+
/**
|
|
2104
2277
|
* Get model-specific constants for token calculation.
|
|
2105
2278
|
* These values are empirically determined based on OpenAI's function calling token overhead.
|
|
2106
2279
|
* - funcInit: Tokens for initializing a function definition
|
|
@@ -2206,7 +2379,9 @@ const numTokensForTools = (tools, encoder, constants) => {
|
|
|
2206
2379
|
return funcTokenCount;
|
|
2207
2380
|
};
|
|
2208
2381
|
/**
|
|
2209
|
-
* Calculate the token count of messages
|
|
2382
|
+
* Calculate the token count of messages.
|
|
2383
|
+
* Uses the tokenizer specified by the GitHub Copilot API model info.
|
|
2384
|
+
* All models (including Claude) use GPT tokenizers (o200k_base or cl100k_base).
|
|
2210
2385
|
*/
|
|
2211
2386
|
const getTokenCount = async (payload, model) => {
|
|
2212
2387
|
const tokenizer = getTokenizerFromModel(model);
|
|
@@ -2225,32 +2400,18 @@ const getTokenCount = async (payload, model) => {
|
|
|
2225
2400
|
};
|
|
2226
2401
|
|
|
2227
2402
|
//#endregion
|
|
2228
|
-
//#region src/lib/auto-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
maxRequestBodyBytes: 500 * 1024
|
|
2232
|
-
};
|
|
2233
|
-
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
2234
|
-
let dynamicByteLimit = null;
|
|
2235
|
-
/**
|
|
2236
|
-
* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
|
|
2237
|
-
*/
|
|
2238
|
-
function onRequestTooLarge(failingBytes) {
|
|
2239
|
-
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2240
|
-
dynamicByteLimit = newLimit;
|
|
2241
|
-
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
2242
|
-
}
|
|
2243
|
-
function calculateLimits(model, config) {
|
|
2244
|
-
const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2403
|
+
//#region src/lib/auto-truncate-openai.ts
|
|
2404
|
+
function calculateLimits$1(model, config) {
|
|
2405
|
+
const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2245
2406
|
const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
2246
|
-
const byteLimit =
|
|
2407
|
+
const byteLimit = getEffectiveByteLimitBytes();
|
|
2247
2408
|
return {
|
|
2248
2409
|
tokenLimit,
|
|
2249
2410
|
byteLimit
|
|
2250
2411
|
};
|
|
2251
2412
|
}
|
|
2252
2413
|
/** Estimate tokens for a single message (fast approximation) */
|
|
2253
|
-
function estimateMessageTokens(msg) {
|
|
2414
|
+
function estimateMessageTokens$1(msg) {
|
|
2254
2415
|
let charCount = 0;
|
|
2255
2416
|
if (typeof msg.content === "string") charCount = msg.content.length;
|
|
2256
2417
|
else if (Array.isArray(msg.content)) {
|
|
@@ -2261,7 +2422,7 @@ function estimateMessageTokens(msg) {
|
|
|
2261
2422
|
return Math.ceil(charCount / 4) + 10;
|
|
2262
2423
|
}
|
|
2263
2424
|
/** Get byte size of a message */
|
|
2264
|
-
function getMessageBytes(msg) {
|
|
2425
|
+
function getMessageBytes$1(msg) {
|
|
2265
2426
|
return JSON.stringify(msg).length;
|
|
2266
2427
|
}
|
|
2267
2428
|
/** Extract system/developer messages from the beginning */
|
|
@@ -2283,7 +2444,7 @@ function getToolCallIds(msg) {
|
|
|
2283
2444
|
return [];
|
|
2284
2445
|
}
|
|
2285
2446
|
/** Filter orphaned tool_result messages */
|
|
2286
|
-
function filterOrphanedToolResults(messages) {
|
|
2447
|
+
function filterOrphanedToolResults$1(messages) {
|
|
2287
2448
|
const toolUseIds = /* @__PURE__ */ new Set();
|
|
2288
2449
|
for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
|
|
2289
2450
|
let removedCount = 0;
|
|
@@ -2294,22 +2455,127 @@ function filterOrphanedToolResults(messages) {
|
|
|
2294
2455
|
}
|
|
2295
2456
|
return true;
|
|
2296
2457
|
});
|
|
2297
|
-
if (removedCount > 0) consola.debug(`
|
|
2458
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_result`);
|
|
2298
2459
|
return filtered;
|
|
2299
2460
|
}
|
|
2461
|
+
/** Get tool_result IDs from all tool messages */
|
|
2462
|
+
function getToolResultIds$1(messages) {
|
|
2463
|
+
const ids = /* @__PURE__ */ new Set();
|
|
2464
|
+
for (const msg of messages) if (msg.role === "tool" && msg.tool_call_id) ids.add(msg.tool_call_id);
|
|
2465
|
+
return ids;
|
|
2466
|
+
}
|
|
2467
|
+
/** Filter orphaned tool_use messages (those without matching tool_result) */
|
|
2468
|
+
function filterOrphanedToolUse$1(messages) {
|
|
2469
|
+
const toolResultIds = getToolResultIds$1(messages);
|
|
2470
|
+
const result = [];
|
|
2471
|
+
let removedCount = 0;
|
|
2472
|
+
for (const msg of messages) {
|
|
2473
|
+
if (msg.role === "assistant" && msg.tool_calls) {
|
|
2474
|
+
const filteredToolCalls = msg.tool_calls.filter((tc) => {
|
|
2475
|
+
if (!toolResultIds.has(tc.id)) {
|
|
2476
|
+
removedCount++;
|
|
2477
|
+
return false;
|
|
2478
|
+
}
|
|
2479
|
+
return true;
|
|
2480
|
+
});
|
|
2481
|
+
if (filteredToolCalls.length === 0) {
|
|
2482
|
+
if (msg.content) result.push({
|
|
2483
|
+
...msg,
|
|
2484
|
+
tool_calls: void 0
|
|
2485
|
+
});
|
|
2486
|
+
continue;
|
|
2487
|
+
}
|
|
2488
|
+
result.push({
|
|
2489
|
+
...msg,
|
|
2490
|
+
tool_calls: filteredToolCalls
|
|
2491
|
+
});
|
|
2492
|
+
continue;
|
|
2493
|
+
}
|
|
2494
|
+
result.push(msg);
|
|
2495
|
+
}
|
|
2496
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_use`);
|
|
2497
|
+
return result;
|
|
2498
|
+
}
|
|
2300
2499
|
/** Ensure messages start with a user message */
|
|
2301
|
-
function ensureStartsWithUser(messages) {
|
|
2500
|
+
function ensureStartsWithUser$1(messages) {
|
|
2302
2501
|
let startIndex = 0;
|
|
2303
2502
|
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
2304
|
-
if (startIndex > 0) consola.debug(`
|
|
2503
|
+
if (startIndex > 0) consola.debug(`[AutoTruncate:OpenAI] Skipped ${startIndex} leading non-user messages`);
|
|
2305
2504
|
return messages.slice(startIndex);
|
|
2306
2505
|
}
|
|
2506
|
+
/** Threshold for large tool message content (bytes) */
|
|
2507
|
+
const LARGE_TOOL_RESULT_THRESHOLD$1 = 1e4;
|
|
2508
|
+
/** Maximum length for compressed tool_result summary */
|
|
2509
|
+
const COMPRESSED_SUMMARY_LENGTH$1 = 500;
|
|
2510
|
+
/**
|
|
2511
|
+
* Compress a large tool message content to a summary.
|
|
2512
|
+
* Keeps the first and last portions with a note about truncation.
|
|
2513
|
+
*/
|
|
2514
|
+
function compressToolResultContent$1(content) {
|
|
2515
|
+
if (content.length <= LARGE_TOOL_RESULT_THRESHOLD$1) return content;
|
|
2516
|
+
const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH$1 / 2);
|
|
2517
|
+
const start$1 = content.slice(0, halfLen);
|
|
2518
|
+
const end = content.slice(-halfLen);
|
|
2519
|
+
const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH$1;
|
|
2520
|
+
return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
|
|
2521
|
+
}
|
|
2522
|
+
/**
|
|
2523
|
+
* Smart compression strategy for OpenAI format:
|
|
2524
|
+
* 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
|
|
2525
|
+
* 2. Messages before that threshold get their tool content compressed
|
|
2526
|
+
* 3. Returns compressed messages and stats
|
|
2527
|
+
*
|
|
2528
|
+
* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
|
|
2529
|
+
*/
|
|
2530
|
+
function smartCompressToolResults$1(messages, tokenLimit, byteLimit, preservePercent) {
|
|
2531
|
+
const n = messages.length;
|
|
2532
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
2533
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
2534
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
2535
|
+
const msg = messages[i];
|
|
2536
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
|
|
2537
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
|
|
2538
|
+
}
|
|
2539
|
+
const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
|
|
2540
|
+
const preserveByteLimit = Math.floor(byteLimit * preservePercent);
|
|
2541
|
+
let thresholdIndex = n;
|
|
2542
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
2543
|
+
if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
|
|
2544
|
+
thresholdIndex = i + 1;
|
|
2545
|
+
break;
|
|
2546
|
+
}
|
|
2547
|
+
thresholdIndex = i;
|
|
2548
|
+
}
|
|
2549
|
+
if (thresholdIndex >= n) return {
|
|
2550
|
+
messages,
|
|
2551
|
+
compressedCount: 0,
|
|
2552
|
+
compressThresholdIndex: n
|
|
2553
|
+
};
|
|
2554
|
+
const result = [];
|
|
2555
|
+
let compressedCount = 0;
|
|
2556
|
+
for (const [i, msg] of messages.entries()) {
|
|
2557
|
+
if (i < thresholdIndex && msg.role === "tool" && typeof msg.content === "string" && msg.content.length > LARGE_TOOL_RESULT_THRESHOLD$1) {
|
|
2558
|
+
compressedCount++;
|
|
2559
|
+
result.push({
|
|
2560
|
+
...msg,
|
|
2561
|
+
content: compressToolResultContent$1(msg.content)
|
|
2562
|
+
});
|
|
2563
|
+
continue;
|
|
2564
|
+
}
|
|
2565
|
+
result.push(msg);
|
|
2566
|
+
}
|
|
2567
|
+
return {
|
|
2568
|
+
messages: result,
|
|
2569
|
+
compressedCount,
|
|
2570
|
+
compressThresholdIndex: thresholdIndex
|
|
2571
|
+
};
|
|
2572
|
+
}
|
|
2307
2573
|
/**
|
|
2308
2574
|
* Find the optimal index from which to preserve messages.
|
|
2309
2575
|
* Uses binary search with pre-calculated cumulative sums.
|
|
2310
2576
|
* Returns the smallest index where the preserved portion fits within limits.
|
|
2311
2577
|
*/
|
|
2312
|
-
function findOptimalPreserveIndex(params) {
|
|
2578
|
+
function findOptimalPreserveIndex$1(params) {
|
|
2313
2579
|
const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
|
|
2314
2580
|
if (messages.length === 0) return 0;
|
|
2315
2581
|
const markerBytes = 200;
|
|
@@ -2321,8 +2587,8 @@ function findOptimalPreserveIndex(params) {
|
|
|
2321
2587
|
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
2322
2588
|
for (let i = n - 1; i >= 0; i--) {
|
|
2323
2589
|
const msg = messages[i];
|
|
2324
|
-
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
2325
|
-
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
2590
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
|
|
2591
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
|
|
2326
2592
|
}
|
|
2327
2593
|
let left = 0;
|
|
2328
2594
|
let right = n;
|
|
@@ -2336,12 +2602,12 @@ function findOptimalPreserveIndex(params) {
|
|
|
2336
2602
|
/**
|
|
2337
2603
|
* Check if payload needs compaction based on model limits or byte size.
|
|
2338
2604
|
*/
|
|
2339
|
-
async function
|
|
2605
|
+
async function checkNeedsCompactionOpenAI(payload, model, config = {}) {
|
|
2340
2606
|
const cfg = {
|
|
2341
|
-
...
|
|
2607
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
2342
2608
|
...config
|
|
2343
2609
|
};
|
|
2344
|
-
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2610
|
+
const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
|
|
2345
2611
|
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2346
2612
|
const currentBytes = JSON.stringify(payload).length;
|
|
2347
2613
|
const exceedsTokens = currentTokens > tokenLimit;
|
|
@@ -2359,23 +2625,90 @@ async function checkNeedsCompaction(payload, model, config = {}) {
|
|
|
2359
2625
|
reason
|
|
2360
2626
|
};
|
|
2361
2627
|
}
|
|
2362
|
-
/**
|
|
2363
|
-
|
|
2628
|
+
/**
|
|
2629
|
+
* Generate a summary of removed messages for context.
|
|
2630
|
+
* Extracts key information like tool calls and topics.
|
|
2631
|
+
*/
|
|
2632
|
+
function generateRemovedMessagesSummary$1(removedMessages) {
|
|
2633
|
+
const toolCalls = [];
|
|
2634
|
+
let userMessageCount = 0;
|
|
2635
|
+
let assistantMessageCount = 0;
|
|
2636
|
+
for (const msg of removedMessages) {
|
|
2637
|
+
if (msg.role === "user") userMessageCount++;
|
|
2638
|
+
else if (msg.role === "assistant") assistantMessageCount++;
|
|
2639
|
+
if (msg.tool_calls) {
|
|
2640
|
+
for (const tc of msg.tool_calls) if (tc.function.name) toolCalls.push(tc.function.name);
|
|
2641
|
+
}
|
|
2642
|
+
}
|
|
2643
|
+
const parts = [];
|
|
2644
|
+
if (userMessageCount > 0 || assistantMessageCount > 0) {
|
|
2645
|
+
const breakdown = [];
|
|
2646
|
+
if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
|
|
2647
|
+
if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
|
|
2648
|
+
parts.push(`Messages: ${breakdown.join(", ")}`);
|
|
2649
|
+
}
|
|
2650
|
+
if (toolCalls.length > 0) {
|
|
2651
|
+
const uniqueTools = [...new Set(toolCalls)];
|
|
2652
|
+
const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
|
|
2653
|
+
parts.push(`Tools used: ${displayTools.join(", ")}`);
|
|
2654
|
+
}
|
|
2655
|
+
return parts.join(". ");
|
|
2656
|
+
}
|
|
2657
|
+
/**
|
|
2658
|
+
* Add a compression notice to the system message.
|
|
2659
|
+
* Informs the model that some tool content has been compressed.
|
|
2660
|
+
*/
|
|
2661
|
+
function addCompressionNotice$1(payload, compressedCount) {
|
|
2662
|
+
const notice = `\n\n[CONTEXT NOTE]\n${compressedCount} large tool results have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]`;
|
|
2663
|
+
const messages = [...payload.messages];
|
|
2664
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
2665
|
+
const msg = messages[i];
|
|
2666
|
+
if (msg.role === "system" || msg.role === "developer") {
|
|
2667
|
+
if (typeof msg.content === "string") messages[i] = {
|
|
2668
|
+
...msg,
|
|
2669
|
+
content: msg.content + notice
|
|
2670
|
+
};
|
|
2671
|
+
break;
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
return {
|
|
2675
|
+
...payload,
|
|
2676
|
+
messages
|
|
2677
|
+
};
|
|
2678
|
+
}
|
|
2679
|
+
/**
|
|
2680
|
+
* Create truncation context to append to system messages.
|
|
2681
|
+
*/
|
|
2682
|
+
function createTruncationSystemContext$1(removedCount, compressedCount, summary) {
|
|
2683
|
+
let context = `\n\n[CONVERSATION CONTEXT]\n`;
|
|
2684
|
+
if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
|
|
2685
|
+
if (compressedCount > 0) context += `${compressedCount} large tool results have been compressed.\n`;
|
|
2686
|
+
if (summary) context += `Summary of removed content: ${summary}\n`;
|
|
2687
|
+
context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]";
|
|
2688
|
+
return context;
|
|
2689
|
+
}
|
|
2690
|
+
/** Create a truncation marker message (fallback when no system message) */
|
|
2691
|
+
function createTruncationMarker$2(removedCount, compressedCount, summary) {
|
|
2692
|
+
const parts = [];
|
|
2693
|
+
if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
|
|
2694
|
+
if (compressedCount > 0) parts.push(`${compressedCount} tool results compressed`);
|
|
2695
|
+
let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
|
|
2696
|
+
if (summary) content += `\n[Summary: ${summary}]`;
|
|
2364
2697
|
return {
|
|
2365
2698
|
role: "user",
|
|
2366
|
-
content
|
|
2699
|
+
content
|
|
2367
2700
|
};
|
|
2368
2701
|
}
|
|
2369
2702
|
/**
|
|
2370
|
-
* Perform auto-
|
|
2703
|
+
* Perform auto-truncation on a payload that exceeds limits.
|
|
2371
2704
|
* Uses binary search to find the optimal truncation point.
|
|
2372
2705
|
*/
|
|
2373
|
-
async function
|
|
2706
|
+
async function autoTruncateOpenAI(payload, model, config = {}) {
|
|
2374
2707
|
const cfg = {
|
|
2375
|
-
...
|
|
2708
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
2376
2709
|
...config
|
|
2377
2710
|
};
|
|
2378
|
-
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2711
|
+
const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
|
|
2379
2712
|
const originalBytes = JSON.stringify(payload).length;
|
|
2380
2713
|
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
2381
2714
|
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
@@ -2387,18 +2720,44 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2387
2720
|
};
|
|
2388
2721
|
const exceedsTokens = originalTokens > tokenLimit;
|
|
2389
2722
|
const exceedsBytes = originalBytes > byteLimit;
|
|
2390
|
-
let
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2723
|
+
let workingMessages = payload.messages;
|
|
2724
|
+
let compressedCount = 0;
|
|
2725
|
+
if (state.compressToolResults) {
|
|
2726
|
+
const compressionResult = smartCompressToolResults$1(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
|
|
2727
|
+
workingMessages = compressionResult.messages;
|
|
2728
|
+
compressedCount = compressionResult.compressedCount;
|
|
2729
|
+
const compressedPayload = {
|
|
2730
|
+
...payload,
|
|
2731
|
+
messages: workingMessages
|
|
2732
|
+
};
|
|
2733
|
+
const compressedBytes = JSON.stringify(compressedPayload).length;
|
|
2734
|
+
const compressedTokenCount = await getTokenCount(compressedPayload, model);
|
|
2735
|
+
if (compressedTokenCount.input <= tokenLimit && compressedBytes <= byteLimit) {
|
|
2736
|
+
let reason$1 = "tokens";
|
|
2737
|
+
if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
|
|
2738
|
+
else if (exceedsBytes) reason$1 = "size";
|
|
2739
|
+
consola.info(`[AutoTruncate:OpenAI] ${reason$1}: ${originalTokens}→${compressedTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
|
|
2740
|
+
const noticePayload = addCompressionNotice$1(compressedPayload, compressedCount);
|
|
2741
|
+
const noticeTokenCount = await getTokenCount(noticePayload, model);
|
|
2742
|
+
return {
|
|
2743
|
+
payload: noticePayload,
|
|
2744
|
+
wasCompacted: true,
|
|
2745
|
+
originalTokens,
|
|
2746
|
+
compactedTokens: noticeTokenCount.input,
|
|
2747
|
+
removedMessageCount: 0
|
|
2748
|
+
};
|
|
2749
|
+
}
|
|
2750
|
+
}
|
|
2751
|
+
const { systemMessages, conversationMessages } = extractSystemMessages(workingMessages);
|
|
2752
|
+
const messagesJson = JSON.stringify(workingMessages);
|
|
2753
|
+
const payloadOverhead = JSON.stringify({
|
|
2754
|
+
...payload,
|
|
2755
|
+
messages: workingMessages
|
|
2756
|
+
}).length - messagesJson.length;
|
|
2757
|
+
const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes$1(m) + 1, 0);
|
|
2758
|
+
const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens$1(m), 0);
|
|
2759
|
+
consola.debug(`[AutoTruncate:OpenAI] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
|
|
2760
|
+
const preserveIndex = findOptimalPreserveIndex$1({
|
|
2402
2761
|
messages: conversationMessages,
|
|
2403
2762
|
systemBytes,
|
|
2404
2763
|
systemTokens,
|
|
@@ -2407,7 +2766,7 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2407
2766
|
byteLimit
|
|
2408
2767
|
});
|
|
2409
2768
|
if (preserveIndex === 0) {
|
|
2410
|
-
consola.warn("
|
|
2769
|
+
consola.warn("[AutoTruncate:OpenAI] Cannot truncate, system messages too large");
|
|
2411
2770
|
return {
|
|
2412
2771
|
payload,
|
|
2413
2772
|
wasCompacted: false,
|
|
@@ -2417,7 +2776,7 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2417
2776
|
};
|
|
2418
2777
|
}
|
|
2419
2778
|
if (preserveIndex >= conversationMessages.length) {
|
|
2420
|
-
consola.warn("
|
|
2779
|
+
consola.warn("[AutoTruncate:OpenAI] Would need to remove all messages");
|
|
2421
2780
|
return {
|
|
2422
2781
|
payload,
|
|
2423
2782
|
wasCompacted: false,
|
|
@@ -2427,11 +2786,13 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2427
2786
|
};
|
|
2428
2787
|
}
|
|
2429
2788
|
let preserved = conversationMessages.slice(preserveIndex);
|
|
2430
|
-
preserved = filterOrphanedToolResults(preserved);
|
|
2431
|
-
preserved =
|
|
2432
|
-
preserved =
|
|
2789
|
+
preserved = filterOrphanedToolResults$1(preserved);
|
|
2790
|
+
preserved = filterOrphanedToolUse$1(preserved);
|
|
2791
|
+
preserved = ensureStartsWithUser$1(preserved);
|
|
2792
|
+
preserved = filterOrphanedToolResults$1(preserved);
|
|
2793
|
+
preserved = filterOrphanedToolUse$1(preserved);
|
|
2433
2794
|
if (preserved.length === 0) {
|
|
2434
|
-
consola.warn("
|
|
2795
|
+
consola.warn("[AutoTruncate:OpenAI] All messages filtered out after cleanup");
|
|
2435
2796
|
return {
|
|
2436
2797
|
payload,
|
|
2437
2798
|
wasCompacted: false,
|
|
@@ -2440,20 +2801,36 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2440
2801
|
removedMessageCount: 0
|
|
2441
2802
|
};
|
|
2442
2803
|
}
|
|
2804
|
+
const removedMessages = conversationMessages.slice(0, preserveIndex);
|
|
2443
2805
|
const removedCount = conversationMessages.length - preserved.length;
|
|
2444
|
-
const
|
|
2806
|
+
const summary = generateRemovedMessagesSummary$1(removedMessages);
|
|
2807
|
+
let newSystemMessages = systemMessages;
|
|
2808
|
+
let newMessages = preserved;
|
|
2809
|
+
if (systemMessages.length > 0) {
|
|
2810
|
+
const truncationContext = createTruncationSystemContext$1(removedCount, compressedCount, summary);
|
|
2811
|
+
const lastSystemIdx = systemMessages.length - 1;
|
|
2812
|
+
const lastSystem = systemMessages[lastSystemIdx];
|
|
2813
|
+
const updatedSystem = {
|
|
2814
|
+
...lastSystem,
|
|
2815
|
+
content: typeof lastSystem.content === "string" ? lastSystem.content + truncationContext : lastSystem.content
|
|
2816
|
+
};
|
|
2817
|
+
newSystemMessages = [...systemMessages.slice(0, lastSystemIdx), updatedSystem];
|
|
2818
|
+
} else newMessages = [createTruncationMarker$2(removedCount, compressedCount, summary), ...preserved];
|
|
2445
2819
|
const newPayload = {
|
|
2446
2820
|
...payload,
|
|
2447
|
-
messages: [
|
|
2448
|
-
...systemMessages,
|
|
2449
|
-
marker,
|
|
2450
|
-
...preserved
|
|
2451
|
-
]
|
|
2821
|
+
messages: [...newSystemMessages, ...newMessages]
|
|
2452
2822
|
};
|
|
2453
2823
|
const newBytes = JSON.stringify(newPayload).length;
|
|
2454
2824
|
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2455
|
-
|
|
2456
|
-
if (
|
|
2825
|
+
let reason = "tokens";
|
|
2826
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
2827
|
+
else if (exceedsBytes) reason = "size";
|
|
2828
|
+
const actions = [];
|
|
2829
|
+
if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
|
|
2830
|
+
if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
|
|
2831
|
+
const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
|
|
2832
|
+
consola.info(`[AutoTruncate:OpenAI] ${reason}: ${originalTokens}→${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
|
|
2833
|
+
if (newBytes > byteLimit) consola.warn(`[AutoTruncate:OpenAI] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
|
|
2457
2834
|
return {
|
|
2458
2835
|
payload: newPayload,
|
|
2459
2836
|
wasCompacted: true,
|
|
@@ -2463,13 +2840,13 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2463
2840
|
};
|
|
2464
2841
|
}
|
|
2465
2842
|
/**
|
|
2466
|
-
* Create a marker to prepend to responses indicating auto-
|
|
2843
|
+
* Create a marker to prepend to responses indicating auto-truncation occurred.
|
|
2467
2844
|
*/
|
|
2468
|
-
function
|
|
2845
|
+
function createTruncationResponseMarkerOpenAI(result) {
|
|
2469
2846
|
if (!result.wasCompacted) return "";
|
|
2470
2847
|
const reduction = result.originalTokens - result.compactedTokens;
|
|
2471
2848
|
const percentage = Math.round(reduction / result.originalTokens * 100);
|
|
2472
|
-
return `\n\n---\n[Auto-
|
|
2849
|
+
return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2473
2850
|
}
|
|
2474
2851
|
|
|
2475
2852
|
//#endregion
|
|
@@ -2489,7 +2866,7 @@ const createChatCompletions = async (payload) => {
|
|
|
2489
2866
|
});
|
|
2490
2867
|
if (!response.ok) {
|
|
2491
2868
|
consola.error("Failed to create chat completions", response);
|
|
2492
|
-
throw await HTTPError.fromResponse("Failed to create chat completions", response);
|
|
2869
|
+
throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
|
|
2493
2870
|
}
|
|
2494
2871
|
if (payload.stream) return events(response);
|
|
2495
2872
|
return await response.json();
|
|
@@ -2539,6 +2916,18 @@ function failTracking(trackingId, error) {
|
|
|
2539
2916
|
if (!trackingId) return;
|
|
2540
2917
|
requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
|
|
2541
2918
|
}
|
|
2919
|
+
/**
|
|
2920
|
+
* Create a marker to prepend to responses indicating auto-truncation occurred.
|
|
2921
|
+
* Works with both OpenAI and Anthropic truncate results.
|
|
2922
|
+
*/
|
|
2923
|
+
function createTruncationMarker(result) {
|
|
2924
|
+
if (!result.wasCompacted) return "";
|
|
2925
|
+
const { originalTokens, compactedTokens, removedMessageCount } = result;
|
|
2926
|
+
if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
|
|
2927
|
+
const reduction = originalTokens - compactedTokens;
|
|
2928
|
+
const percentage = Math.round(reduction / originalTokens * 100);
|
|
2929
|
+
return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2930
|
+
}
|
|
2542
2931
|
/** Record streaming error to history (works with any accumulator type) */
|
|
2543
2932
|
function recordStreamError(opts) {
|
|
2544
2933
|
const { acc, fallbackModel, ctx, error } = opts;
|
|
@@ -2557,37 +2946,37 @@ function recordStreamError(opts) {
|
|
|
2557
2946
|
function isNonStreaming(response) {
|
|
2558
2947
|
return Object.hasOwn(response, "choices");
|
|
2559
2948
|
}
|
|
2560
|
-
/** Build final payload with auto-
|
|
2949
|
+
/** Build final payload with auto-truncate if needed */
|
|
2561
2950
|
async function buildFinalPayload(payload, model) {
|
|
2562
|
-
if (!state.
|
|
2563
|
-
if (state.
|
|
2951
|
+
if (!state.autoTruncate || !model) {
|
|
2952
|
+
if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
|
|
2564
2953
|
return {
|
|
2565
2954
|
finalPayload: payload,
|
|
2566
|
-
|
|
2955
|
+
truncateResult: null
|
|
2567
2956
|
};
|
|
2568
2957
|
}
|
|
2569
2958
|
try {
|
|
2570
|
-
const check = await
|
|
2571
|
-
consola.debug(`Auto-
|
|
2959
|
+
const check = await checkNeedsCompactionOpenAI(payload, model);
|
|
2960
|
+
consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
2572
2961
|
if (!check.needed) return {
|
|
2573
2962
|
finalPayload: payload,
|
|
2574
|
-
|
|
2963
|
+
truncateResult: null
|
|
2575
2964
|
};
|
|
2576
2965
|
let reasonText;
|
|
2577
2966
|
if (check.reason === "both") reasonText = "tokens and size";
|
|
2578
2967
|
else if (check.reason === "bytes") reasonText = "size";
|
|
2579
2968
|
else reasonText = "tokens";
|
|
2580
|
-
consola.info(`Auto-
|
|
2581
|
-
const
|
|
2969
|
+
consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
|
|
2970
|
+
const truncateResult = await autoTruncateOpenAI(payload, model);
|
|
2582
2971
|
return {
|
|
2583
|
-
finalPayload:
|
|
2584
|
-
|
|
2972
|
+
finalPayload: truncateResult.payload,
|
|
2973
|
+
truncateResult
|
|
2585
2974
|
};
|
|
2586
2975
|
} catch (error) {
|
|
2587
|
-
consola.warn("Auto-
|
|
2976
|
+
consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
|
|
2588
2977
|
return {
|
|
2589
2978
|
finalPayload: payload,
|
|
2590
|
-
|
|
2979
|
+
truncateResult: null
|
|
2591
2980
|
};
|
|
2592
2981
|
}
|
|
2593
2982
|
}
|
|
@@ -2631,7 +3020,7 @@ async function logPayloadSizeInfo(payload, model) {
|
|
|
2631
3020
|
if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
|
|
2632
3021
|
consola.info("");
|
|
2633
3022
|
consola.info(" Suggestions:");
|
|
2634
|
-
if (!state.
|
|
3023
|
+
if (!state.autoTruncate) consola.info(" • Enable --auto-truncate to automatically truncate history");
|
|
2635
3024
|
if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
|
|
2636
3025
|
consola.info(" • Start a new conversation with /clear or /reset");
|
|
2637
3026
|
consola.info(" • Reduce conversation history by deleting old messages");
|
|
@@ -2663,8 +3052,8 @@ async function handleCompletion$1(c) {
|
|
|
2663
3052
|
};
|
|
2664
3053
|
const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
|
|
2665
3054
|
await logTokenCount(originalPayload, selectedModel);
|
|
2666
|
-
const { finalPayload,
|
|
2667
|
-
if (
|
|
3055
|
+
const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
|
|
3056
|
+
if (truncateResult) ctx.truncateResult = truncateResult;
|
|
2668
3057
|
const payload = isNullish(finalPayload.max_tokens) ? {
|
|
2669
3058
|
...finalPayload,
|
|
2670
3059
|
max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
|
|
@@ -2717,8 +3106,8 @@ async function logTokenCount(payload, selectedModel) {
|
|
|
2717
3106
|
function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
2718
3107
|
consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
|
|
2719
3108
|
let response = originalResponse;
|
|
2720
|
-
if (ctx.
|
|
2721
|
-
const marker =
|
|
3109
|
+
if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
|
|
3110
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
2722
3111
|
response = {
|
|
2723
3112
|
...response,
|
|
2724
3113
|
choices: response.choices.map((choice$1, i) => i === 0 ? {
|
|
@@ -2786,8 +3175,8 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2786
3175
|
const { stream, response, payload, ctx } = opts;
|
|
2787
3176
|
const acc = createStreamAccumulator();
|
|
2788
3177
|
try {
|
|
2789
|
-
if (ctx.
|
|
2790
|
-
const marker =
|
|
3178
|
+
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
3179
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
2791
3180
|
const markerChunk = {
|
|
2792
3181
|
id: `compact-marker-${Date.now()}`,
|
|
2793
3182
|
object: "chat.completion.chunk",
|
|
@@ -4070,53 +4459,624 @@ historyRoutes.get("/", (c) => {
|
|
|
4070
4459
|
});
|
|
4071
4460
|
|
|
4072
4461
|
//#endregion
|
|
4073
|
-
//#region src/
|
|
4074
|
-
function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
4075
|
-
if (finishReason === null) return null;
|
|
4076
|
-
return {
|
|
4077
|
-
stop: "end_turn",
|
|
4078
|
-
length: "max_tokens",
|
|
4079
|
-
tool_calls: "tool_use",
|
|
4080
|
-
content_filter: "end_turn"
|
|
4081
|
-
}[finishReason];
|
|
4082
|
-
}
|
|
4083
|
-
|
|
4084
|
-
//#endregion
|
|
4085
|
-
//#region src/routes/messages/non-stream-translation.ts
|
|
4086
|
-
const OPENAI_TOOL_NAME_LIMIT = 64;
|
|
4462
|
+
//#region src/lib/auto-truncate-anthropic.ts
|
|
4087
4463
|
/**
|
|
4088
|
-
*
|
|
4089
|
-
* This handles edge cases where conversation history may be incomplete:
|
|
4090
|
-
* - Session interruptions where tool execution was cut off
|
|
4091
|
-
* - Previous request failures
|
|
4092
|
-
* - Client sending truncated history
|
|
4093
|
-
*
|
|
4094
|
-
* Adding placeholder responses prevents API errors and maintains protocol compliance.
|
|
4464
|
+
* Convert Anthropic message content to text for token counting.
|
|
4095
4465
|
*/
|
|
4096
|
-
function
|
|
4097
|
-
|
|
4098
|
-
|
|
4099
|
-
|
|
4100
|
-
|
|
4101
|
-
|
|
4102
|
-
|
|
4103
|
-
|
|
4104
|
-
|
|
4105
|
-
|
|
4106
|
-
|
|
4107
|
-
|
|
4466
|
+
function contentToText(content) {
|
|
4467
|
+
if (typeof content === "string") return content;
|
|
4468
|
+
const parts = [];
|
|
4469
|
+
for (const block of content) switch (block.type) {
|
|
4470
|
+
case "text":
|
|
4471
|
+
parts.push(block.text);
|
|
4472
|
+
break;
|
|
4473
|
+
case "tool_use":
|
|
4474
|
+
parts.push(`[tool_use: ${block.name}]`, JSON.stringify(block.input));
|
|
4475
|
+
break;
|
|
4476
|
+
case "tool_result":
|
|
4477
|
+
if (typeof block.content === "string") parts.push(block.content);
|
|
4478
|
+
else if (Array.isArray(block.content)) {
|
|
4479
|
+
for (const inner of block.content) if (inner.type === "text") parts.push(inner.text);
|
|
4108
4480
|
}
|
|
4109
|
-
|
|
4110
|
-
|
|
4111
|
-
|
|
4112
|
-
|
|
4113
|
-
|
|
4114
|
-
|
|
4481
|
+
break;
|
|
4482
|
+
case "thinking":
|
|
4483
|
+
parts.push(block.thinking);
|
|
4484
|
+
break;
|
|
4485
|
+
default: break;
|
|
4486
|
+
}
|
|
4487
|
+
return parts.join("\n");
|
|
4488
|
+
}
|
|
4489
|
+
/**
|
|
4490
|
+
* Estimate tokens for a message (fast, synchronous).
|
|
4491
|
+
* Uses ~4 chars per token approximation for internal calculations.
|
|
4492
|
+
* The final result is verified with the accurate tokenizer.
|
|
4493
|
+
*/
|
|
4494
|
+
function estimateMessageTokens(msg) {
|
|
4495
|
+
const text = contentToText(msg.content);
|
|
4496
|
+
return Math.ceil(text.length / 4) + 4;
|
|
4497
|
+
}
|
|
4498
|
+
/**
|
|
4499
|
+
* Count tokens for an Anthropic message using the model's tokenizer.
|
|
4500
|
+
*/
|
|
4501
|
+
async function countMessageTokens(msg, model) {
|
|
4502
|
+
const text = contentToText(msg.content);
|
|
4503
|
+
return await countTextTokens(text, model) + 4;
|
|
4504
|
+
}
|
|
4505
|
+
/**
|
|
4506
|
+
* Count tokens for system prompt.
|
|
4507
|
+
*/
|
|
4508
|
+
async function countSystemTokens(system, model) {
|
|
4509
|
+
if (!system) return 0;
|
|
4510
|
+
if (typeof system === "string") return await countTextTokens(system, model) + 4;
|
|
4511
|
+
const text = system.map((block) => block.text).join("\n");
|
|
4512
|
+
return await countTextTokens(text, model) + 4;
|
|
4513
|
+
}
|
|
4514
|
+
/**
|
|
4515
|
+
* Count total tokens for the payload using the model's tokenizer.
|
|
4516
|
+
*/
|
|
4517
|
+
async function countTotalTokens(payload, model) {
|
|
4518
|
+
let total = await countSystemTokens(payload.system, model);
|
|
4519
|
+
for (const msg of payload.messages) total += await countMessageTokens(msg, model);
|
|
4520
|
+
if (payload.tools) {
|
|
4521
|
+
const toolsText = JSON.stringify(payload.tools);
|
|
4522
|
+
total += await countTextTokens(toolsText, model);
|
|
4523
|
+
}
|
|
4524
|
+
return total;
|
|
4525
|
+
}
|
|
4526
|
+
function getMessageBytes(msg) {
|
|
4527
|
+
return JSON.stringify(msg).length;
|
|
4528
|
+
}
|
|
4529
|
+
/**
|
|
4530
|
+
* Get tool_use IDs from an assistant message.
|
|
4531
|
+
*/
|
|
4532
|
+
function getToolUseIds(msg) {
|
|
4533
|
+
if (msg.role !== "assistant") return [];
|
|
4534
|
+
if (typeof msg.content === "string") return [];
|
|
4535
|
+
const ids = [];
|
|
4536
|
+
for (const block of msg.content) if (block.type === "tool_use") ids.push(block.id);
|
|
4537
|
+
return ids;
|
|
4538
|
+
}
|
|
4539
|
+
/**
|
|
4540
|
+
* Get tool_result IDs from a user message.
|
|
4541
|
+
*/
|
|
4542
|
+
function getToolResultIds(msg) {
|
|
4543
|
+
if (msg.role !== "user") return [];
|
|
4544
|
+
if (typeof msg.content === "string") return [];
|
|
4545
|
+
const ids = [];
|
|
4546
|
+
for (const block of msg.content) if (block.type === "tool_result") ids.push(block.tool_use_id);
|
|
4547
|
+
return ids;
|
|
4548
|
+
}
|
|
4549
|
+
/**
|
|
4550
|
+
* Filter orphaned tool_result messages (those without matching tool_use).
|
|
4551
|
+
*/
|
|
4552
|
+
function filterOrphanedToolResults(messages) {
|
|
4553
|
+
const toolUseIds = /* @__PURE__ */ new Set();
|
|
4554
|
+
for (const msg of messages) for (const id of getToolUseIds(msg)) toolUseIds.add(id);
|
|
4555
|
+
const result = [];
|
|
4556
|
+
let removedCount = 0;
|
|
4557
|
+
for (const msg of messages) {
|
|
4558
|
+
if (msg.role === "user" && typeof msg.content !== "string") {
|
|
4559
|
+
if (getToolResultIds(msg).some((id) => !toolUseIds.has(id))) {
|
|
4560
|
+
const filteredContent = msg.content.filter((block) => {
|
|
4561
|
+
if (block.type === "tool_result" && !toolUseIds.has(block.tool_use_id)) {
|
|
4562
|
+
removedCount++;
|
|
4563
|
+
return false;
|
|
4564
|
+
}
|
|
4565
|
+
return true;
|
|
4115
4566
|
});
|
|
4567
|
+
if (filteredContent.length === 0) continue;
|
|
4568
|
+
result.push({
|
|
4569
|
+
...msg,
|
|
4570
|
+
content: filteredContent
|
|
4571
|
+
});
|
|
4572
|
+
continue;
|
|
4116
4573
|
}
|
|
4117
4574
|
}
|
|
4575
|
+
result.push(msg);
|
|
4118
4576
|
}
|
|
4119
|
-
|
|
4577
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_result`);
|
|
4578
|
+
return result;
|
|
4579
|
+
}
|
|
4580
|
+
/**
|
|
4581
|
+
* Filter orphaned tool_use messages (those without matching tool_result).
|
|
4582
|
+
* In Anthropic API, every tool_use must have a corresponding tool_result.
|
|
4583
|
+
*/
|
|
4584
|
+
function filterOrphanedToolUse(messages) {
|
|
4585
|
+
const toolResultIds = /* @__PURE__ */ new Set();
|
|
4586
|
+
for (const msg of messages) for (const id of getToolResultIds(msg)) toolResultIds.add(id);
|
|
4587
|
+
const result = [];
|
|
4588
|
+
let removedCount = 0;
|
|
4589
|
+
for (const msg of messages) {
|
|
4590
|
+
if (msg.role === "assistant" && typeof msg.content !== "string") {
|
|
4591
|
+
if (getToolUseIds(msg).some((id) => !toolResultIds.has(id))) {
|
|
4592
|
+
const filteredContent = msg.content.filter((block) => {
|
|
4593
|
+
if (block.type === "tool_use" && !toolResultIds.has(block.id)) {
|
|
4594
|
+
removedCount++;
|
|
4595
|
+
return false;
|
|
4596
|
+
}
|
|
4597
|
+
return true;
|
|
4598
|
+
});
|
|
4599
|
+
if (filteredContent.length === 0) continue;
|
|
4600
|
+
result.push({
|
|
4601
|
+
...msg,
|
|
4602
|
+
content: filteredContent
|
|
4603
|
+
});
|
|
4604
|
+
continue;
|
|
4605
|
+
}
|
|
4606
|
+
}
|
|
4607
|
+
result.push(msg);
|
|
4608
|
+
}
|
|
4609
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_use`);
|
|
4610
|
+
return result;
|
|
4611
|
+
}
|
|
4612
|
+
/**
|
|
4613
|
+
* Ensure messages start with a user message.
|
|
4614
|
+
*/
|
|
4615
|
+
function ensureStartsWithUser(messages) {
|
|
4616
|
+
let startIndex = 0;
|
|
4617
|
+
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
4618
|
+
if (startIndex > 0) consola.debug(`[AutoTruncate:Anthropic] Skipped ${startIndex} leading non-user messages`);
|
|
4619
|
+
return messages.slice(startIndex);
|
|
4620
|
+
}
|
|
4621
|
+
/** Threshold for large tool_result content (bytes) */
|
|
4622
|
+
const LARGE_TOOL_RESULT_THRESHOLD = 1e4;
|
|
4623
|
+
/** Maximum length for compressed tool_result summary */
|
|
4624
|
+
const COMPRESSED_SUMMARY_LENGTH = 500;
|
|
4625
|
+
/**
|
|
4626
|
+
* Compress a large tool_result content to a summary.
|
|
4627
|
+
* Keeps the first and last portions with a note about truncation.
|
|
4628
|
+
*/
|
|
4629
|
+
function compressToolResultContent(content) {
|
|
4630
|
+
if (content.length <= LARGE_TOOL_RESULT_THRESHOLD) return content;
|
|
4631
|
+
const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH / 2);
|
|
4632
|
+
const start$1 = content.slice(0, halfLen);
|
|
4633
|
+
const end = content.slice(-halfLen);
|
|
4634
|
+
const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH;
|
|
4635
|
+
return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
|
|
4636
|
+
}
|
|
4637
|
+
/**
|
|
4638
|
+
* Compress a tool_result block in an Anthropic message.
|
|
4639
|
+
*/
|
|
4640
|
+
function compressToolResultBlock(block) {
|
|
4641
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) return {
|
|
4642
|
+
...block,
|
|
4643
|
+
content: compressToolResultContent(block.content)
|
|
4644
|
+
};
|
|
4645
|
+
return block;
|
|
4646
|
+
}
|
|
4647
|
+
/**
|
|
4648
|
+
* Smart compression strategy:
|
|
4649
|
+
* 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
|
|
4650
|
+
* 2. Messages before that threshold get their tool_results compressed
|
|
4651
|
+
* 3. Returns compressed messages and stats
|
|
4652
|
+
*
|
|
4653
|
+
* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
|
|
4654
|
+
*/
|
|
4655
|
+
function smartCompressToolResults(messages, tokenLimit, byteLimit, preservePercent) {
|
|
4656
|
+
const n = messages.length;
|
|
4657
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
4658
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
4659
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
4660
|
+
const msg = messages[i];
|
|
4661
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
4662
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
4663
|
+
}
|
|
4664
|
+
const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
|
|
4665
|
+
const preserveByteLimit = Math.floor(byteLimit * preservePercent);
|
|
4666
|
+
let thresholdIndex = n;
|
|
4667
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
4668
|
+
if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
|
|
4669
|
+
thresholdIndex = i + 1;
|
|
4670
|
+
break;
|
|
4671
|
+
}
|
|
4672
|
+
thresholdIndex = i;
|
|
4673
|
+
}
|
|
4674
|
+
if (thresholdIndex >= n) return {
|
|
4675
|
+
messages,
|
|
4676
|
+
compressedCount: 0,
|
|
4677
|
+
compressThresholdIndex: n
|
|
4678
|
+
};
|
|
4679
|
+
const result = [];
|
|
4680
|
+
let compressedCount = 0;
|
|
4681
|
+
for (const [i, msg] of messages.entries()) {
|
|
4682
|
+
if (i < thresholdIndex && msg.role === "user" && Array.isArray(msg.content)) {
|
|
4683
|
+
if (msg.content.some((block) => block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD)) {
|
|
4684
|
+
const compressedContent = msg.content.map((block) => {
|
|
4685
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) {
|
|
4686
|
+
compressedCount++;
|
|
4687
|
+
return compressToolResultBlock(block);
|
|
4688
|
+
}
|
|
4689
|
+
return block;
|
|
4690
|
+
});
|
|
4691
|
+
result.push({
|
|
4692
|
+
...msg,
|
|
4693
|
+
content: compressedContent
|
|
4694
|
+
});
|
|
4695
|
+
continue;
|
|
4696
|
+
}
|
|
4697
|
+
}
|
|
4698
|
+
result.push(msg);
|
|
4699
|
+
}
|
|
4700
|
+
return {
|
|
4701
|
+
messages: result,
|
|
4702
|
+
compressedCount,
|
|
4703
|
+
compressThresholdIndex: thresholdIndex
|
|
4704
|
+
};
|
|
4705
|
+
}
|
|
4706
|
+
/** Default fallback for when model capabilities are not available */
|
|
4707
|
+
const DEFAULT_CONTEXT_WINDOW = 2e5;
|
|
4708
|
+
function calculateLimits(model, config) {
|
|
4709
|
+
const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW;
|
|
4710
|
+
const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
4711
|
+
const byteLimit = getEffectiveByteLimitBytes();
|
|
4712
|
+
return {
|
|
4713
|
+
tokenLimit,
|
|
4714
|
+
byteLimit
|
|
4715
|
+
};
|
|
4716
|
+
}
|
|
4717
|
+
function findOptimalPreserveIndex(params) {
|
|
4718
|
+
const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
|
|
4719
|
+
if (messages.length === 0) return 0;
|
|
4720
|
+
const markerBytes = 200;
|
|
4721
|
+
const availableTokens = tokenLimit - systemTokens - 50;
|
|
4722
|
+
const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
|
|
4723
|
+
if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
|
|
4724
|
+
const n = messages.length;
|
|
4725
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
4726
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
4727
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
4728
|
+
const msg = messages[i];
|
|
4729
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
4730
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
4731
|
+
}
|
|
4732
|
+
let left = 0;
|
|
4733
|
+
let right = n;
|
|
4734
|
+
while (left < right) {
|
|
4735
|
+
const mid = left + right >>> 1;
|
|
4736
|
+
if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
|
|
4737
|
+
else left = mid + 1;
|
|
4738
|
+
}
|
|
4739
|
+
return left;
|
|
4740
|
+
}
|
|
4741
|
+
/**
|
|
4742
|
+
* Generate a summary of removed messages for context.
|
|
4743
|
+
* Extracts key information like tool calls and topics.
|
|
4744
|
+
*/
|
|
4745
|
+
function generateRemovedMessagesSummary(removedMessages) {
|
|
4746
|
+
const toolCalls = [];
|
|
4747
|
+
let userMessageCount = 0;
|
|
4748
|
+
let assistantMessageCount = 0;
|
|
4749
|
+
for (const msg of removedMessages) {
|
|
4750
|
+
if (msg.role === "user") userMessageCount++;
|
|
4751
|
+
else assistantMessageCount++;
|
|
4752
|
+
if (Array.isArray(msg.content)) {
|
|
4753
|
+
for (const block of msg.content) if (block.type === "tool_use") toolCalls.push(block.name);
|
|
4754
|
+
}
|
|
4755
|
+
}
|
|
4756
|
+
const parts = [];
|
|
4757
|
+
if (userMessageCount > 0 || assistantMessageCount > 0) {
|
|
4758
|
+
const breakdown = [];
|
|
4759
|
+
if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
|
|
4760
|
+
if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
|
|
4761
|
+
parts.push(`Messages: ${breakdown.join(", ")}`);
|
|
4762
|
+
}
|
|
4763
|
+
if (toolCalls.length > 0) {
|
|
4764
|
+
const uniqueTools = [...new Set(toolCalls)];
|
|
4765
|
+
const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
|
|
4766
|
+
parts.push(`Tools used: ${displayTools.join(", ")}`);
|
|
4767
|
+
}
|
|
4768
|
+
return parts.join(". ");
|
|
4769
|
+
}
|
|
4770
|
+
/**
|
|
4771
|
+
* Add a compression notice to the system prompt.
|
|
4772
|
+
* Informs the model that some tool_result content has been compressed.
|
|
4773
|
+
*/
|
|
4774
|
+
function addCompressionNotice(payload, compressedCount) {
|
|
4775
|
+
const notice = `[CONTEXT NOTE]\n${compressedCount} large tool_result blocks have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]\n\n`;
|
|
4776
|
+
let newSystem;
|
|
4777
|
+
if (typeof payload.system === "string") newSystem = notice + payload.system;
|
|
4778
|
+
else if (Array.isArray(payload.system)) newSystem = [{
|
|
4779
|
+
type: "text",
|
|
4780
|
+
text: notice
|
|
4781
|
+
}, ...payload.system];
|
|
4782
|
+
else newSystem = notice;
|
|
4783
|
+
return {
|
|
4784
|
+
...payload,
|
|
4785
|
+
system: newSystem
|
|
4786
|
+
};
|
|
4787
|
+
}
|
|
4788
|
+
/**
|
|
4789
|
+
* Create truncation context to prepend to system prompt.
|
|
4790
|
+
*/
|
|
4791
|
+
function createTruncationSystemContext(removedCount, compressedCount, summary) {
|
|
4792
|
+
let context = `[CONVERSATION CONTEXT]\n`;
|
|
4793
|
+
if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
|
|
4794
|
+
if (compressedCount > 0) context += `${compressedCount} large tool_result blocks have been compressed.\n`;
|
|
4795
|
+
if (summary) context += `Summary of removed content: ${summary}\n`;
|
|
4796
|
+
context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]\n\n";
|
|
4797
|
+
return context;
|
|
4798
|
+
}
|
|
4799
|
+
/**
|
|
4800
|
+
* Create a truncation marker message (fallback when no system prompt).
|
|
4801
|
+
*/
|
|
4802
|
+
function createTruncationMarker$1(removedCount, compressedCount, summary) {
|
|
4803
|
+
const parts = [];
|
|
4804
|
+
if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
|
|
4805
|
+
if (compressedCount > 0) parts.push(`${compressedCount} tool_result blocks compressed`);
|
|
4806
|
+
let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
|
|
4807
|
+
if (summary) content += `\n[Summary: ${summary}]`;
|
|
4808
|
+
return {
|
|
4809
|
+
role: "user",
|
|
4810
|
+
content
|
|
4811
|
+
};
|
|
4812
|
+
}
|
|
4813
|
+
/**
|
|
4814
|
+
* Perform auto-truncation on an Anthropic payload that exceeds limits.
|
|
4815
|
+
*/
|
|
4816
|
+
async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
4817
|
+
const cfg = {
|
|
4818
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
4819
|
+
...config
|
|
4820
|
+
};
|
|
4821
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
4822
|
+
const originalBytes = JSON.stringify(payload).length;
|
|
4823
|
+
const originalTokens = await countTotalTokens(payload, model);
|
|
4824
|
+
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
4825
|
+
payload,
|
|
4826
|
+
wasCompacted: false,
|
|
4827
|
+
originalTokens,
|
|
4828
|
+
compactedTokens: originalTokens,
|
|
4829
|
+
removedMessageCount: 0
|
|
4830
|
+
};
|
|
4831
|
+
const exceedsTokens = originalTokens > tokenLimit;
|
|
4832
|
+
const exceedsBytes = originalBytes > byteLimit;
|
|
4833
|
+
let workingMessages = payload.messages;
|
|
4834
|
+
let compressedCount = 0;
|
|
4835
|
+
if (state.compressToolResults) {
|
|
4836
|
+
const compressionResult = smartCompressToolResults(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
|
|
4837
|
+
workingMessages = compressionResult.messages;
|
|
4838
|
+
compressedCount = compressionResult.compressedCount;
|
|
4839
|
+
const compressedPayload = {
|
|
4840
|
+
...payload,
|
|
4841
|
+
messages: workingMessages
|
|
4842
|
+
};
|
|
4843
|
+
const compressedBytes = JSON.stringify(compressedPayload).length;
|
|
4844
|
+
const compressedTokens = await countTotalTokens(compressedPayload, model);
|
|
4845
|
+
if (compressedTokens <= tokenLimit && compressedBytes <= byteLimit) {
|
|
4846
|
+
let reason$1 = "tokens";
|
|
4847
|
+
if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
|
|
4848
|
+
else if (exceedsBytes) reason$1 = "size";
|
|
4849
|
+
consola.info(`[AutoTruncate:Anthropic] ${reason$1}: ${originalTokens}→${compressedTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
|
|
4850
|
+
const noticePayload = addCompressionNotice(compressedPayload, compressedCount);
|
|
4851
|
+
return {
|
|
4852
|
+
payload: noticePayload,
|
|
4853
|
+
wasCompacted: true,
|
|
4854
|
+
originalTokens,
|
|
4855
|
+
compactedTokens: await countTotalTokens(noticePayload, model),
|
|
4856
|
+
removedMessageCount: 0
|
|
4857
|
+
};
|
|
4858
|
+
}
|
|
4859
|
+
}
|
|
4860
|
+
const systemBytes = payload.system ? JSON.stringify(payload.system).length : 0;
|
|
4861
|
+
const systemTokens = await countSystemTokens(payload.system, model);
|
|
4862
|
+
const messagesJson = JSON.stringify(workingMessages);
|
|
4863
|
+
const payloadOverhead = JSON.stringify({
|
|
4864
|
+
...payload,
|
|
4865
|
+
messages: workingMessages
|
|
4866
|
+
}).length - messagesJson.length;
|
|
4867
|
+
consola.debug(`[AutoTruncate:Anthropic] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${Math.round(systemBytes / 1024)}KB`);
|
|
4868
|
+
const preserveIndex = findOptimalPreserveIndex({
|
|
4869
|
+
messages: workingMessages,
|
|
4870
|
+
systemBytes,
|
|
4871
|
+
systemTokens,
|
|
4872
|
+
payloadOverhead,
|
|
4873
|
+
tokenLimit,
|
|
4874
|
+
byteLimit
|
|
4875
|
+
});
|
|
4876
|
+
if (preserveIndex === 0) {
|
|
4877
|
+
consola.warn("[AutoTruncate:Anthropic] Cannot truncate, system messages too large");
|
|
4878
|
+
return {
|
|
4879
|
+
payload,
|
|
4880
|
+
wasCompacted: false,
|
|
4881
|
+
originalTokens,
|
|
4882
|
+
compactedTokens: originalTokens,
|
|
4883
|
+
removedMessageCount: 0
|
|
4884
|
+
};
|
|
4885
|
+
}
|
|
4886
|
+
if (preserveIndex >= workingMessages.length) {
|
|
4887
|
+
consola.warn("[AutoTruncate:Anthropic] Would need to remove all messages");
|
|
4888
|
+
return {
|
|
4889
|
+
payload,
|
|
4890
|
+
wasCompacted: false,
|
|
4891
|
+
originalTokens,
|
|
4892
|
+
compactedTokens: originalTokens,
|
|
4893
|
+
removedMessageCount: 0
|
|
4894
|
+
};
|
|
4895
|
+
}
|
|
4896
|
+
let preserved = workingMessages.slice(preserveIndex);
|
|
4897
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
4898
|
+
preserved = filterOrphanedToolUse(preserved);
|
|
4899
|
+
preserved = ensureStartsWithUser(preserved);
|
|
4900
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
4901
|
+
preserved = filterOrphanedToolUse(preserved);
|
|
4902
|
+
if (preserved.length === 0) {
|
|
4903
|
+
consola.warn("[AutoTruncate:Anthropic] All messages filtered out after cleanup");
|
|
4904
|
+
return {
|
|
4905
|
+
payload,
|
|
4906
|
+
wasCompacted: false,
|
|
4907
|
+
originalTokens,
|
|
4908
|
+
compactedTokens: originalTokens,
|
|
4909
|
+
removedMessageCount: 0
|
|
4910
|
+
};
|
|
4911
|
+
}
|
|
4912
|
+
const removedMessages = payload.messages.slice(0, preserveIndex);
|
|
4913
|
+
const removedCount = workingMessages.length - preserved.length;
|
|
4914
|
+
const summary = generateRemovedMessagesSummary(removedMessages);
|
|
4915
|
+
let newSystem = payload.system;
|
|
4916
|
+
let newMessages = preserved;
|
|
4917
|
+
if (payload.system !== void 0) {
|
|
4918
|
+
const truncationContext = createTruncationSystemContext(removedCount, compressedCount, summary);
|
|
4919
|
+
if (typeof payload.system === "string") newSystem = truncationContext + payload.system;
|
|
4920
|
+
else if (Array.isArray(payload.system)) newSystem = [{
|
|
4921
|
+
type: "text",
|
|
4922
|
+
text: truncationContext
|
|
4923
|
+
}, ...payload.system];
|
|
4924
|
+
} else newMessages = [createTruncationMarker$1(removedCount, compressedCount, summary), ...preserved];
|
|
4925
|
+
const newPayload = {
|
|
4926
|
+
...payload,
|
|
4927
|
+
system: newSystem,
|
|
4928
|
+
messages: newMessages
|
|
4929
|
+
};
|
|
4930
|
+
const newBytes = JSON.stringify(newPayload).length;
|
|
4931
|
+
const newTokens = await countTotalTokens(newPayload, model);
|
|
4932
|
+
let reason = "tokens";
|
|
4933
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
4934
|
+
else if (exceedsBytes) reason = "size";
|
|
4935
|
+
const actions = [];
|
|
4936
|
+
if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
|
|
4937
|
+
if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
|
|
4938
|
+
const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
|
|
4939
|
+
consola.info(`[AutoTruncate:Anthropic] ${reason}: ${originalTokens}→${newTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
|
|
4940
|
+
if (newBytes > byteLimit || newTokens > tokenLimit) consola.warn(`[AutoTruncate:Anthropic] Result still over limit (${newTokens} tokens, ${Math.round(newBytes / 1024)}KB)`);
|
|
4941
|
+
return {
|
|
4942
|
+
payload: newPayload,
|
|
4943
|
+
wasCompacted: true,
|
|
4944
|
+
originalTokens,
|
|
4945
|
+
compactedTokens: newTokens,
|
|
4946
|
+
removedMessageCount: removedCount
|
|
4947
|
+
};
|
|
4948
|
+
}
|
|
4949
|
+
/**
|
|
4950
|
+
* Check if payload needs compaction.
|
|
4951
|
+
*/
|
|
4952
|
+
async function checkNeedsCompactionAnthropic(payload, model, config = {}) {
|
|
4953
|
+
const cfg = {
|
|
4954
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
4955
|
+
...config
|
|
4956
|
+
};
|
|
4957
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
4958
|
+
const currentTokens = await countTotalTokens(payload, model);
|
|
4959
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
4960
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
4961
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
4962
|
+
let reason;
|
|
4963
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
4964
|
+
else if (exceedsTokens) reason = "tokens";
|
|
4965
|
+
else if (exceedsBytes) reason = "bytes";
|
|
4966
|
+
return {
|
|
4967
|
+
needed: exceedsTokens || exceedsBytes,
|
|
4968
|
+
currentTokens,
|
|
4969
|
+
tokenLimit,
|
|
4970
|
+
currentBytes,
|
|
4971
|
+
byteLimit,
|
|
4972
|
+
reason
|
|
4973
|
+
};
|
|
4974
|
+
}
|
|
4975
|
+
|
|
4976
|
+
//#endregion
|
|
4977
|
+
//#region src/routes/messages/message-utils.ts
|
|
4978
|
+
function convertAnthropicMessages(messages) {
|
|
4979
|
+
return messages.map((msg) => {
|
|
4980
|
+
if (typeof msg.content === "string") return {
|
|
4981
|
+
role: msg.role,
|
|
4982
|
+
content: msg.content
|
|
4983
|
+
};
|
|
4984
|
+
const content = msg.content.map((block) => {
|
|
4985
|
+
if (block.type === "text") return {
|
|
4986
|
+
type: "text",
|
|
4987
|
+
text: block.text
|
|
4988
|
+
};
|
|
4989
|
+
if (block.type === "tool_use") return {
|
|
4990
|
+
type: "tool_use",
|
|
4991
|
+
id: block.id,
|
|
4992
|
+
name: block.name,
|
|
4993
|
+
input: JSON.stringify(block.input)
|
|
4994
|
+
};
|
|
4995
|
+
if (block.type === "tool_result") {
|
|
4996
|
+
const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
|
|
4997
|
+
return {
|
|
4998
|
+
type: "tool_result",
|
|
4999
|
+
tool_use_id: block.tool_use_id,
|
|
5000
|
+
content: resultContent
|
|
5001
|
+
};
|
|
5002
|
+
}
|
|
5003
|
+
return { type: block.type };
|
|
5004
|
+
});
|
|
5005
|
+
return {
|
|
5006
|
+
role: msg.role,
|
|
5007
|
+
content
|
|
5008
|
+
};
|
|
5009
|
+
});
|
|
5010
|
+
}
|
|
5011
|
+
function extractSystemPrompt(system) {
|
|
5012
|
+
if (!system) return void 0;
|
|
5013
|
+
if (typeof system === "string") return system;
|
|
5014
|
+
return system.map((block) => block.text).join("\n");
|
|
5015
|
+
}
|
|
5016
|
+
function extractToolCallsFromContent(content) {
|
|
5017
|
+
const tools = [];
|
|
5018
|
+
for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
|
|
5019
|
+
id: String(block.id),
|
|
5020
|
+
name: String(block.name),
|
|
5021
|
+
input: JSON.stringify(block.input)
|
|
5022
|
+
});
|
|
5023
|
+
return tools.length > 0 ? tools : void 0;
|
|
5024
|
+
}
|
|
5025
|
+
function extractToolCallsFromAnthropicContent(content) {
|
|
5026
|
+
const tools = [];
|
|
5027
|
+
for (const block of content) if (block.type === "tool_use") tools.push({
|
|
5028
|
+
id: block.id,
|
|
5029
|
+
name: block.name,
|
|
5030
|
+
input: JSON.stringify(block.input)
|
|
5031
|
+
});
|
|
5032
|
+
return tools.length > 0 ? tools : void 0;
|
|
5033
|
+
}
|
|
5034
|
+
function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
5035
|
+
if (finishReason === null) return null;
|
|
5036
|
+
return {
|
|
5037
|
+
stop: "end_turn",
|
|
5038
|
+
length: "max_tokens",
|
|
5039
|
+
tool_calls: "tool_use",
|
|
5040
|
+
content_filter: "end_turn"
|
|
5041
|
+
}[finishReason];
|
|
5042
|
+
}
|
|
5043
|
+
|
|
5044
|
+
//#endregion
|
|
5045
|
+
//#region src/routes/messages/non-stream-translation.ts
|
|
5046
|
+
const OPENAI_TOOL_NAME_LIMIT = 64;
|
|
5047
|
+
/**
|
|
5048
|
+
* Ensure all tool_use blocks have corresponding tool_result responses.
|
|
5049
|
+
* This handles edge cases where conversation history may be incomplete:
|
|
5050
|
+
* - Session interruptions where tool execution was cut off
|
|
5051
|
+
* - Previous request failures
|
|
5052
|
+
* - Client sending truncated history
|
|
5053
|
+
*
|
|
5054
|
+
* Adding placeholder responses prevents API errors and maintains protocol compliance.
|
|
5055
|
+
*/
|
|
5056
|
+
function fixMessageSequence(messages) {
|
|
5057
|
+
const fixedMessages = [];
|
|
5058
|
+
for (let i = 0; i < messages.length; i++) {
|
|
5059
|
+
const message = messages[i];
|
|
5060
|
+
fixedMessages.push(message);
|
|
5061
|
+
if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
|
|
5062
|
+
const foundToolResponses = /* @__PURE__ */ new Set();
|
|
5063
|
+
let j = i + 1;
|
|
5064
|
+
while (j < messages.length && messages[j].role === "tool") {
|
|
5065
|
+
const toolMessage = messages[j];
|
|
5066
|
+
if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
|
|
5067
|
+
j++;
|
|
5068
|
+
}
|
|
5069
|
+
for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
|
|
5070
|
+
consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
|
|
5071
|
+
fixedMessages.push({
|
|
5072
|
+
role: "tool",
|
|
5073
|
+
tool_call_id: toolCall.id,
|
|
5074
|
+
content: "Tool execution was interrupted or failed."
|
|
5075
|
+
});
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
}
|
|
5079
|
+
return fixedMessages;
|
|
4120
5080
|
}
|
|
4121
5081
|
function translateToOpenAI(payload) {
|
|
4122
5082
|
const toolNameMapping = {
|
|
@@ -4140,19 +5100,50 @@ function translateToOpenAI(payload) {
|
|
|
4140
5100
|
toolNameMapping
|
|
4141
5101
|
};
|
|
4142
5102
|
}
|
|
5103
|
+
/**
|
|
5104
|
+
* Find the latest available model matching a family prefix.
|
|
5105
|
+
* Searches state.models for models starting with the given prefix
|
|
5106
|
+
* and returns the one with the highest version number.
|
|
5107
|
+
*
|
|
5108
|
+
* @param familyPrefix - e.g., "claude-opus", "claude-sonnet", "claude-haiku"
|
|
5109
|
+
* @param fallback - fallback model ID if no match found
|
|
5110
|
+
*/
|
|
5111
|
+
function findLatestModel(familyPrefix, fallback) {
|
|
5112
|
+
const models = state.models?.data;
|
|
5113
|
+
if (!models || models.length === 0) return fallback;
|
|
5114
|
+
const candidates = models.filter((m) => m.id.startsWith(familyPrefix));
|
|
5115
|
+
if (candidates.length === 0) return fallback;
|
|
5116
|
+
candidates.sort((a, b) => {
|
|
5117
|
+
const versionA = extractVersion(a.id, familyPrefix);
|
|
5118
|
+
return extractVersion(b.id, familyPrefix) - versionA;
|
|
5119
|
+
});
|
|
5120
|
+
return candidates[0].id;
|
|
5121
|
+
}
|
|
5122
|
+
/**
|
|
5123
|
+
* Extract numeric version from model ID.
|
|
5124
|
+
* e.g., "claude-opus-4.5" with prefix "claude-opus" -> 4.5
|
|
5125
|
+
*/
|
|
5126
|
+
function extractVersion(modelId, prefix) {
|
|
5127
|
+
const match = modelId.slice(prefix.length + 1).match(/^(\d+(?:\.\d+)?)/);
|
|
5128
|
+
return match ? Number.parseFloat(match[1]) : 0;
|
|
5129
|
+
}
|
|
4143
5130
|
function translateModelName(model) {
|
|
4144
|
-
const
|
|
4145
|
-
opus: "claude-opus
|
|
4146
|
-
sonnet: "claude-sonnet
|
|
4147
|
-
haiku: "claude-haiku
|
|
5131
|
+
const aliasMap = {
|
|
5132
|
+
opus: "claude-opus",
|
|
5133
|
+
sonnet: "claude-sonnet",
|
|
5134
|
+
haiku: "claude-haiku"
|
|
4148
5135
|
};
|
|
4149
|
-
if (
|
|
5136
|
+
if (aliasMap[model]) {
|
|
5137
|
+
const familyPrefix = aliasMap[model];
|
|
5138
|
+
const fallback = `${familyPrefix}-4.5`;
|
|
5139
|
+
return findLatestModel(familyPrefix, fallback);
|
|
5140
|
+
}
|
|
4150
5141
|
if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
|
|
4151
5142
|
if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
|
|
4152
5143
|
if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
|
|
4153
|
-
if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
|
|
5144
|
+
if (/^claude-opus-4-\d+$/.test(model)) return findLatestModel("claude-opus", "claude-opus-4.5");
|
|
4154
5145
|
if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
|
|
4155
|
-
if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
|
|
5146
|
+
if (/^claude-haiku-3-5-\d+$/.test(model)) return findLatestModel("claude-haiku", "claude-haiku-4.5");
|
|
4156
5147
|
return model;
|
|
4157
5148
|
}
|
|
4158
5149
|
function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
|
|
@@ -4160,7 +5151,7 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
|
|
|
4160
5151
|
const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
|
|
4161
5152
|
return [...systemMessages, ...otherMessages];
|
|
4162
5153
|
}
|
|
4163
|
-
const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
|
|
5154
|
+
const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
|
|
4164
5155
|
/**
|
|
4165
5156
|
* Filter out reserved keywords from system prompt text.
|
|
4166
5157
|
* Copilot API rejects requests containing these keywords.
|
|
@@ -4284,7 +5275,7 @@ function translateAnthropicToolsToOpenAI(anthropicTools, toolNameMapping) {
|
|
|
4284
5275
|
function: {
|
|
4285
5276
|
name: getTruncatedToolName(tool.name, toolNameMapping),
|
|
4286
5277
|
description: tool.description,
|
|
4287
|
-
parameters: tool.input_schema
|
|
5278
|
+
parameters: tool.input_schema ?? {}
|
|
4288
5279
|
}
|
|
4289
5280
|
}));
|
|
4290
5281
|
}
|
|
@@ -4385,7 +5376,13 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
|
|
|
4385
5376
|
//#endregion
|
|
4386
5377
|
//#region src/routes/messages/count-tokens-handler.ts
|
|
4387
5378
|
/**
|
|
4388
|
-
* Handles token counting for Anthropic messages
|
|
5379
|
+
* Handles token counting for Anthropic messages.
|
|
5380
|
+
*
|
|
5381
|
+
* For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
|
|
5382
|
+
* For other models, uses GPT tokenizers with appropriate buffers.
|
|
5383
|
+
*
|
|
5384
|
+
* When auto-truncate is enabled and the request would exceed limits,
|
|
5385
|
+
* returns an inflated token count to trigger Claude Code's auto-compact mechanism.
|
|
4389
5386
|
*/
|
|
4390
5387
|
async function handleCountTokens(c) {
|
|
4391
5388
|
try {
|
|
@@ -4397,6 +5394,16 @@ async function handleCountTokens(c) {
|
|
|
4397
5394
|
consola.warn("Model not found, returning default token count");
|
|
4398
5395
|
return c.json({ input_tokens: 1 });
|
|
4399
5396
|
}
|
|
5397
|
+
if (state.autoTruncate) {
|
|
5398
|
+
const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
|
|
5399
|
+
if (truncateCheck.needed) {
|
|
5400
|
+
const contextWindow = selectedModel.capabilities?.limits?.max_context_window_tokens ?? 2e5;
|
|
5401
|
+
const inflatedTokens = Math.floor(contextWindow * .95);
|
|
5402
|
+
consola.debug(`[count_tokens] Would trigger auto-truncate: ${truncateCheck.currentTokens} tokens > ${truncateCheck.tokenLimit}, returning inflated count: ${inflatedTokens}`);
|
|
5403
|
+
return c.json({ input_tokens: inflatedTokens });
|
|
5404
|
+
}
|
|
5405
|
+
}
|
|
5406
|
+
const tokenizerName = selectedModel.capabilities?.tokenizer ?? "o200k_base";
|
|
4400
5407
|
const tokenCount = await getTokenCount(openAIPayload, selectedModel);
|
|
4401
5408
|
if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
|
|
4402
5409
|
let mcpToolExist = false;
|
|
@@ -4407,9 +5414,8 @@ async function handleCountTokens(c) {
|
|
|
4407
5414
|
}
|
|
4408
5415
|
}
|
|
4409
5416
|
let finalTokenCount = tokenCount.input + tokenCount.output;
|
|
4410
|
-
if (anthropicPayload.model.startsWith("
|
|
4411
|
-
|
|
4412
|
-
consola.debug("Token count:", finalTokenCount);
|
|
5417
|
+
if (!(selectedModel.vendor === "Anthropic")) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
|
|
5418
|
+
consola.debug(`Token count: ${finalTokenCount} (tokenizer: ${tokenizerName})`);
|
|
4413
5419
|
return c.json({ input_tokens: finalTokenCount });
|
|
4414
5420
|
} catch (error) {
|
|
4415
5421
|
consola.error("Error counting tokens:", error);
|
|
@@ -4417,6 +5423,262 @@ async function handleCountTokens(c) {
|
|
|
4417
5423
|
}
|
|
4418
5424
|
}
|
|
4419
5425
|
|
|
5426
|
+
//#endregion
|
|
5427
|
+
//#region src/services/copilot/create-anthropic-messages.ts
|
|
5428
|
+
/**
|
|
5429
|
+
* Fields that are supported by Copilot's Anthropic API endpoint.
|
|
5430
|
+
* Any other fields in the incoming request will be stripped.
|
|
5431
|
+
*/
|
|
5432
|
+
const COPILOT_SUPPORTED_FIELDS = new Set([
|
|
5433
|
+
"model",
|
|
5434
|
+
"messages",
|
|
5435
|
+
"max_tokens",
|
|
5436
|
+
"system",
|
|
5437
|
+
"metadata",
|
|
5438
|
+
"stop_sequences",
|
|
5439
|
+
"stream",
|
|
5440
|
+
"temperature",
|
|
5441
|
+
"top_p",
|
|
5442
|
+
"top_k",
|
|
5443
|
+
"tools",
|
|
5444
|
+
"tool_choice",
|
|
5445
|
+
"thinking",
|
|
5446
|
+
"service_tier"
|
|
5447
|
+
]);
|
|
5448
|
+
/**
|
|
5449
|
+
* Filter payload to only include fields supported by Copilot's Anthropic API.
|
|
5450
|
+
* This prevents errors like "Extra inputs are not permitted" for unsupported
|
|
5451
|
+
* fields like `output_config`.
|
|
5452
|
+
*
|
|
5453
|
+
* Also converts server-side tools (web_search, etc.) to custom tools.
|
|
5454
|
+
*/
|
|
5455
|
+
function filterPayloadForCopilot(payload) {
|
|
5456
|
+
const filtered = {};
|
|
5457
|
+
const unsupportedFields = [];
|
|
5458
|
+
for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
|
|
5459
|
+
else unsupportedFields.push(key);
|
|
5460
|
+
if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
|
|
5461
|
+
if (filtered.tools) filtered.tools = convertServerToolsToCustom(filtered.tools);
|
|
5462
|
+
return filtered;
|
|
5463
|
+
}
|
|
5464
|
+
/**
|
|
5465
|
+
* Adjust max_tokens if thinking is enabled.
|
|
5466
|
+
* According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
|
|
5467
|
+
* max_tokens = thinking_budget + response_tokens
|
|
5468
|
+
*/
|
|
5469
|
+
function adjustMaxTokensForThinking(payload) {
|
|
5470
|
+
const thinking = payload.thinking;
|
|
5471
|
+
if (!thinking) return payload;
|
|
5472
|
+
const budgetTokens = thinking.budget_tokens;
|
|
5473
|
+
if (!budgetTokens) return payload;
|
|
5474
|
+
if (payload.max_tokens <= budgetTokens) {
|
|
5475
|
+
const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
|
|
5476
|
+
consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
|
|
5477
|
+
return {
|
|
5478
|
+
...payload,
|
|
5479
|
+
max_tokens: newMaxTokens
|
|
5480
|
+
};
|
|
5481
|
+
}
|
|
5482
|
+
return payload;
|
|
5483
|
+
}
|
|
5484
|
+
/**
|
|
5485
|
+
* Create messages using Anthropic-style API directly.
|
|
5486
|
+
* This bypasses the OpenAI translation layer for Anthropic models.
|
|
5487
|
+
*/
|
|
5488
|
+
async function createAnthropicMessages(payload) {
|
|
5489
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
5490
|
+
let filteredPayload = filterPayloadForCopilot(payload);
|
|
5491
|
+
filteredPayload = adjustMaxTokensForThinking(filteredPayload);
|
|
5492
|
+
const enableVision = filteredPayload.messages.some((msg) => {
|
|
5493
|
+
if (typeof msg.content === "string") return false;
|
|
5494
|
+
return msg.content.some((block) => block.type === "image");
|
|
5495
|
+
});
|
|
5496
|
+
const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
|
|
5497
|
+
const headers = {
|
|
5498
|
+
...copilotHeaders(state, enableVision),
|
|
5499
|
+
"X-Initiator": isAgentCall ? "agent" : "user",
|
|
5500
|
+
"anthropic-version": "2023-06-01"
|
|
5501
|
+
};
|
|
5502
|
+
consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
|
|
5503
|
+
const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
|
|
5504
|
+
method: "POST",
|
|
5505
|
+
headers,
|
|
5506
|
+
body: JSON.stringify(filteredPayload)
|
|
5507
|
+
});
|
|
5508
|
+
if (!response.ok) {
|
|
5509
|
+
consola.debug("Request failed:", {
|
|
5510
|
+
model: filteredPayload.model,
|
|
5511
|
+
max_tokens: filteredPayload.max_tokens,
|
|
5512
|
+
stream: filteredPayload.stream,
|
|
5513
|
+
tools: filteredPayload.tools?.map((t) => ({
|
|
5514
|
+
name: t.name,
|
|
5515
|
+
type: t.type
|
|
5516
|
+
})),
|
|
5517
|
+
thinking: filteredPayload.thinking,
|
|
5518
|
+
messageCount: filteredPayload.messages.length
|
|
5519
|
+
});
|
|
5520
|
+
throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, filteredPayload.model);
|
|
5521
|
+
}
|
|
5522
|
+
if (payload.stream) return events(response);
|
|
5523
|
+
return await response.json();
|
|
5524
|
+
}
|
|
5525
|
+
const SERVER_TOOL_CONFIGS = {
|
|
5526
|
+
web_search: {
|
|
5527
|
+
description: "Search the web for current information. Returns web search results that can help answer questions about recent events, current data, or information that may have changed since your knowledge cutoff.",
|
|
5528
|
+
input_schema: {
|
|
5529
|
+
type: "object",
|
|
5530
|
+
properties: { query: {
|
|
5531
|
+
type: "string",
|
|
5532
|
+
description: "The search query"
|
|
5533
|
+
} },
|
|
5534
|
+
required: ["query"]
|
|
5535
|
+
}
|
|
5536
|
+
},
|
|
5537
|
+
web_fetch: {
|
|
5538
|
+
description: "Fetch content from a URL. NOTE: This is a client-side tool - the client must fetch the URL and return the content.",
|
|
5539
|
+
input_schema: {
|
|
5540
|
+
type: "object",
|
|
5541
|
+
properties: { url: {
|
|
5542
|
+
type: "string",
|
|
5543
|
+
description: "The URL to fetch"
|
|
5544
|
+
} },
|
|
5545
|
+
required: ["url"]
|
|
5546
|
+
}
|
|
5547
|
+
},
|
|
5548
|
+
code_execution: {
|
|
5549
|
+
description: "Execute code in a sandbox. NOTE: This is a client-side tool - the client must execute the code.",
|
|
5550
|
+
input_schema: {
|
|
5551
|
+
type: "object",
|
|
5552
|
+
properties: {
|
|
5553
|
+
code: {
|
|
5554
|
+
type: "string",
|
|
5555
|
+
description: "The code to execute"
|
|
5556
|
+
},
|
|
5557
|
+
language: {
|
|
5558
|
+
type: "string",
|
|
5559
|
+
description: "The programming language"
|
|
5560
|
+
}
|
|
5561
|
+
},
|
|
5562
|
+
required: ["code"]
|
|
5563
|
+
}
|
|
5564
|
+
},
|
|
5565
|
+
computer: {
|
|
5566
|
+
description: "Control computer desktop. NOTE: This is a client-side tool - the client must handle computer control.",
|
|
5567
|
+
input_schema: {
|
|
5568
|
+
type: "object",
|
|
5569
|
+
properties: { action: {
|
|
5570
|
+
type: "string",
|
|
5571
|
+
description: "The action to perform"
|
|
5572
|
+
} },
|
|
5573
|
+
required: ["action"]
|
|
5574
|
+
}
|
|
5575
|
+
}
|
|
5576
|
+
};
|
|
5577
|
+
/**
|
|
5578
|
+
* Check if a tool is a server-side tool that needs conversion.
|
|
5579
|
+
*/
|
|
5580
|
+
function getServerToolPrefix(tool) {
|
|
5581
|
+
if (tool.type) {
|
|
5582
|
+
for (const prefix of Object.keys(SERVER_TOOL_CONFIGS)) if (tool.type.startsWith(prefix)) return prefix;
|
|
5583
|
+
}
|
|
5584
|
+
return null;
|
|
5585
|
+
}
|
|
5586
|
+
/**
|
|
5587
|
+
* Convert server-side tools to custom tools, or pass them through unchanged.
|
|
5588
|
+
* This allows them to be passed to the API and handled by the client.
|
|
5589
|
+
*
|
|
5590
|
+
* Note: Server-side tools are only converted if state.rewriteAnthropicTools is enabled.
|
|
5591
|
+
*/
|
|
5592
|
+
function convertServerToolsToCustom(tools) {
|
|
5593
|
+
if (!tools) return;
|
|
5594
|
+
const result = [];
|
|
5595
|
+
for (const tool of tools) {
|
|
5596
|
+
const serverToolPrefix = getServerToolPrefix(tool);
|
|
5597
|
+
if (serverToolPrefix) {
|
|
5598
|
+
const config = SERVER_TOOL_CONFIGS[serverToolPrefix];
|
|
5599
|
+
if (!state.rewriteAnthropicTools) {
|
|
5600
|
+
consola.debug(`[DirectAnthropic] Passing ${serverToolPrefix} through unchanged (use --rewrite-anthropic-tools to convert)`);
|
|
5601
|
+
result.push(tool);
|
|
5602
|
+
continue;
|
|
5603
|
+
}
|
|
5604
|
+
if (config.remove) {
|
|
5605
|
+
consola.warn(`[DirectAnthropic] Removing unsupported server tool: ${tool.name}. Reason: ${config.removalReason}`);
|
|
5606
|
+
continue;
|
|
5607
|
+
}
|
|
5608
|
+
consola.debug(`[DirectAnthropic] Converting server tool to custom: ${tool.name} (type: ${tool.type})`);
|
|
5609
|
+
result.push({
|
|
5610
|
+
name: tool.name,
|
|
5611
|
+
description: config.description,
|
|
5612
|
+
input_schema: config.input_schema
|
|
5613
|
+
});
|
|
5614
|
+
} else result.push(tool);
|
|
5615
|
+
}
|
|
5616
|
+
return result.length > 0 ? result : void 0;
|
|
5617
|
+
}
|
|
5618
|
+
/**
|
|
5619
|
+
* Check if a model supports direct Anthropic API.
|
|
5620
|
+
* Returns true if redirect is disabled (direct API is on) and the model is from Anthropic vendor.
|
|
5621
|
+
*/
|
|
5622
|
+
function supportsDirectAnthropicApi(modelId) {
|
|
5623
|
+
if (state.redirectAnthropic) return false;
|
|
5624
|
+
return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
|
|
5625
|
+
}
|
|
5626
|
+
|
|
5627
|
+
//#endregion
|
|
5628
|
+
//#region src/routes/messages/stream-accumulator.ts
|
|
5629
|
+
function createAnthropicStreamAccumulator() {
|
|
5630
|
+
return {
|
|
5631
|
+
model: "",
|
|
5632
|
+
inputTokens: 0,
|
|
5633
|
+
outputTokens: 0,
|
|
5634
|
+
stopReason: "",
|
|
5635
|
+
content: "",
|
|
5636
|
+
toolCalls: [],
|
|
5637
|
+
currentToolCall: null
|
|
5638
|
+
};
|
|
5639
|
+
}
|
|
5640
|
+
function processAnthropicEvent(event, acc) {
|
|
5641
|
+
switch (event.type) {
|
|
5642
|
+
case "content_block_delta":
|
|
5643
|
+
handleContentBlockDelta(event.delta, acc);
|
|
5644
|
+
break;
|
|
5645
|
+
case "content_block_start":
|
|
5646
|
+
handleContentBlockStart(event.content_block, acc);
|
|
5647
|
+
break;
|
|
5648
|
+
case "content_block_stop":
|
|
5649
|
+
handleContentBlockStop(acc);
|
|
5650
|
+
break;
|
|
5651
|
+
case "message_delta":
|
|
5652
|
+
handleMessageDelta(event.delta, event.usage, acc);
|
|
5653
|
+
break;
|
|
5654
|
+
default: break;
|
|
5655
|
+
}
|
|
5656
|
+
}
|
|
5657
|
+
function handleContentBlockDelta(delta, acc) {
|
|
5658
|
+
if (delta.type === "text_delta") acc.content += delta.text;
|
|
5659
|
+
else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
|
|
5660
|
+
}
|
|
5661
|
+
function handleContentBlockStart(block, acc) {
|
|
5662
|
+
if (block.type === "tool_use") acc.currentToolCall = {
|
|
5663
|
+
id: block.id,
|
|
5664
|
+
name: block.name,
|
|
5665
|
+
input: ""
|
|
5666
|
+
};
|
|
5667
|
+
}
|
|
5668
|
+
function handleContentBlockStop(acc) {
|
|
5669
|
+
if (acc.currentToolCall) {
|
|
5670
|
+
acc.toolCalls.push(acc.currentToolCall);
|
|
5671
|
+
acc.currentToolCall = null;
|
|
5672
|
+
}
|
|
5673
|
+
}
|
|
5674
|
+
function handleMessageDelta(delta, usage, acc) {
|
|
5675
|
+
if (delta.stop_reason) acc.stopReason = delta.stop_reason;
|
|
5676
|
+
if (usage) {
|
|
5677
|
+
acc.inputTokens = usage.input_tokens ?? 0;
|
|
5678
|
+
acc.outputTokens = usage.output_tokens;
|
|
5679
|
+
}
|
|
5680
|
+
}
|
|
5681
|
+
|
|
4420
5682
|
//#endregion
|
|
4421
5683
|
//#region src/routes/messages/stream-translation.ts
|
|
4422
5684
|
function isToolBlockOpen(state$1) {
|
|
@@ -4522,68 +5784,244 @@ function translateChunkToAnthropicEvents(chunk, state$1, toolNameMapping) {
|
|
|
4522
5784
|
});
|
|
4523
5785
|
}
|
|
4524
5786
|
}
|
|
4525
|
-
if (choice.finish_reason) {
|
|
4526
|
-
if (state$1.contentBlockOpen) {
|
|
4527
|
-
events$1.push({
|
|
4528
|
-
type: "content_block_stop",
|
|
4529
|
-
index: state$1.contentBlockIndex
|
|
5787
|
+
if (choice.finish_reason) {
|
|
5788
|
+
if (state$1.contentBlockOpen) {
|
|
5789
|
+
events$1.push({
|
|
5790
|
+
type: "content_block_stop",
|
|
5791
|
+
index: state$1.contentBlockIndex
|
|
5792
|
+
});
|
|
5793
|
+
state$1.contentBlockOpen = false;
|
|
5794
|
+
}
|
|
5795
|
+
events$1.push({
|
|
5796
|
+
type: "message_delta",
|
|
5797
|
+
delta: {
|
|
5798
|
+
stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
|
|
5799
|
+
stop_sequence: null
|
|
5800
|
+
},
|
|
5801
|
+
usage: {
|
|
5802
|
+
input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
5803
|
+
output_tokens: chunk.usage?.completion_tokens ?? 0,
|
|
5804
|
+
...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
5805
|
+
}
|
|
5806
|
+
}, { type: "message_stop" });
|
|
5807
|
+
}
|
|
5808
|
+
return events$1;
|
|
5809
|
+
}
|
|
5810
|
+
function translateErrorToAnthropicErrorEvent() {
|
|
5811
|
+
return {
|
|
5812
|
+
type: "error",
|
|
5813
|
+
error: {
|
|
5814
|
+
type: "api_error",
|
|
5815
|
+
message: "An unexpected error occurred during streaming."
|
|
5816
|
+
}
|
|
5817
|
+
};
|
|
5818
|
+
}
|
|
5819
|
+
|
|
5820
|
+
//#endregion
|
|
5821
|
+
//#region src/routes/messages/direct-anthropic-handler.ts
|
|
5822
|
+
/**
|
|
5823
|
+
* Handle completion using direct Anthropic API (no translation needed)
|
|
5824
|
+
*/
|
|
5825
|
+
async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
|
|
5826
|
+
consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
|
|
5827
|
+
const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
|
|
5828
|
+
let effectivePayload = anthropicPayload;
|
|
5829
|
+
let truncateResult;
|
|
5830
|
+
if (state.autoTruncate && selectedModel) {
|
|
5831
|
+
const check = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
|
|
5832
|
+
consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
5833
|
+
if (check.needed) try {
|
|
5834
|
+
truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel);
|
|
5835
|
+
if (truncateResult.wasCompacted) effectivePayload = truncateResult.payload;
|
|
5836
|
+
} catch (error) {
|
|
5837
|
+
consola.warn("[Anthropic] Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
|
|
5838
|
+
}
|
|
5839
|
+
} else if (state.autoTruncate && !selectedModel) consola.debug(`[Anthropic] Model '${anthropicPayload.model}' not found, skipping auto-truncate`);
|
|
5840
|
+
if (state.manualApprove) await awaitApproval();
|
|
5841
|
+
try {
|
|
5842
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
|
|
5843
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
5844
|
+
if (Symbol.asyncIterator in response) {
|
|
5845
|
+
consola.debug("Streaming response from Copilot (direct Anthropic)");
|
|
5846
|
+
updateTrackerStatus(ctx.trackingId, "streaming");
|
|
5847
|
+
return streamSSE(c, async (stream) => {
|
|
5848
|
+
await handleDirectAnthropicStreamingResponse({
|
|
5849
|
+
stream,
|
|
5850
|
+
response,
|
|
5851
|
+
anthropicPayload: effectivePayload,
|
|
5852
|
+
ctx
|
|
5853
|
+
});
|
|
5854
|
+
});
|
|
5855
|
+
}
|
|
5856
|
+
return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
|
|
5857
|
+
} catch (error) {
|
|
5858
|
+
if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
|
|
5859
|
+
recordErrorResponse(ctx, anthropicPayload.model, error);
|
|
5860
|
+
throw error;
|
|
5861
|
+
}
|
|
5862
|
+
}
|
|
5863
|
+
/**
|
|
5864
|
+
* Log payload size info for debugging 413 errors
|
|
5865
|
+
*/
|
|
5866
|
+
function logPayloadSizeInfoAnthropic(payload, model) {
|
|
5867
|
+
const payloadSize = JSON.stringify(payload).length;
|
|
5868
|
+
const messageCount = payload.messages.length;
|
|
5869
|
+
const toolCount = payload.tools?.length ?? 0;
|
|
5870
|
+
const systemSize = payload.system ? JSON.stringify(payload.system).length : 0;
|
|
5871
|
+
consola.info(`[Anthropic 413] Payload size: ${Math.round(payloadSize / 1024)}KB, messages: ${messageCount}, tools: ${toolCount}, system: ${Math.round(systemSize / 1024)}KB`);
|
|
5872
|
+
if (model?.capabilities?.limits) {
|
|
5873
|
+
const limits = model.capabilities.limits;
|
|
5874
|
+
consola.info(`[Anthropic 413] Model limits: context=${limits.max_context_window_tokens}, prompt=${limits.max_prompt_tokens}, output=${limits.max_output_tokens}`);
|
|
5875
|
+
}
|
|
5876
|
+
if (!state.autoTruncate) consola.info("[Anthropic 413] Consider enabling --auto-truncate to automatically reduce payload size");
|
|
5877
|
+
}
|
|
5878
|
+
/**
|
|
5879
|
+
* Handle non-streaming direct Anthropic response
|
|
5880
|
+
*/
|
|
5881
|
+
function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
|
|
5882
|
+
consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
|
|
5883
|
+
recordResponse(ctx.historyId, {
|
|
5884
|
+
success: true,
|
|
5885
|
+
model: response.model,
|
|
5886
|
+
usage: response.usage,
|
|
5887
|
+
stop_reason: response.stop_reason ?? void 0,
|
|
5888
|
+
content: {
|
|
5889
|
+
role: "assistant",
|
|
5890
|
+
content: response.content.map((block) => {
|
|
5891
|
+
switch (block.type) {
|
|
5892
|
+
case "text": return {
|
|
5893
|
+
type: "text",
|
|
5894
|
+
text: block.text
|
|
5895
|
+
};
|
|
5896
|
+
case "tool_use": return {
|
|
5897
|
+
type: "tool_use",
|
|
5898
|
+
id: block.id,
|
|
5899
|
+
name: block.name,
|
|
5900
|
+
input: JSON.stringify(block.input)
|
|
5901
|
+
};
|
|
5902
|
+
case "thinking": return {
|
|
5903
|
+
type: "thinking",
|
|
5904
|
+
thinking: block.thinking
|
|
5905
|
+
};
|
|
5906
|
+
default: return { type: block.type };
|
|
5907
|
+
}
|
|
5908
|
+
})
|
|
5909
|
+
},
|
|
5910
|
+
toolCalls: extractToolCallsFromAnthropicContent(response.content)
|
|
5911
|
+
}, Date.now() - ctx.startTime);
|
|
5912
|
+
if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
|
|
5913
|
+
inputTokens: response.usage.input_tokens,
|
|
5914
|
+
outputTokens: response.usage.output_tokens,
|
|
5915
|
+
queueWaitMs: ctx.queueWaitMs
|
|
5916
|
+
});
|
|
5917
|
+
let finalResponse = response;
|
|
5918
|
+
if (state.verbose && truncateResult?.wasCompacted) {
|
|
5919
|
+
const marker = createTruncationMarker(truncateResult);
|
|
5920
|
+
finalResponse = prependMarkerToAnthropicResponse$1(response, marker);
|
|
5921
|
+
}
|
|
5922
|
+
return c.json(finalResponse);
|
|
5923
|
+
}
|
|
5924
|
+
/**
|
|
5925
|
+
* Prepend marker to Anthropic response content (at the beginning of first text block)
|
|
5926
|
+
*/
|
|
5927
|
+
function prependMarkerToAnthropicResponse$1(response, marker) {
|
|
5928
|
+
if (!marker) return response;
|
|
5929
|
+
const content = [...response.content];
|
|
5930
|
+
const firstTextIndex = content.findIndex((block) => block.type === "text");
|
|
5931
|
+
if (firstTextIndex !== -1) {
|
|
5932
|
+
const textBlock = content[firstTextIndex];
|
|
5933
|
+
if (textBlock.type === "text") content[firstTextIndex] = {
|
|
5934
|
+
...textBlock,
|
|
5935
|
+
text: marker + textBlock.text
|
|
5936
|
+
};
|
|
5937
|
+
} else content.unshift({
|
|
5938
|
+
type: "text",
|
|
5939
|
+
text: marker
|
|
5940
|
+
});
|
|
5941
|
+
return {
|
|
5942
|
+
...response,
|
|
5943
|
+
content
|
|
5944
|
+
};
|
|
5945
|
+
}
|
|
5946
|
+
/**
|
|
5947
|
+
* Handle streaming direct Anthropic response (passthrough SSE events)
|
|
5948
|
+
*/
|
|
5949
|
+
async function handleDirectAnthropicStreamingResponse(opts) {
|
|
5950
|
+
const { stream, response, anthropicPayload, ctx } = opts;
|
|
5951
|
+
const acc = createAnthropicStreamAccumulator();
|
|
5952
|
+
try {
|
|
5953
|
+
for await (const rawEvent of response) {
|
|
5954
|
+
consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
|
|
5955
|
+
if (rawEvent.data === "[DONE]") break;
|
|
5956
|
+
if (!rawEvent.data) continue;
|
|
5957
|
+
let event;
|
|
5958
|
+
try {
|
|
5959
|
+
event = JSON.parse(rawEvent.data);
|
|
5960
|
+
} catch (parseError) {
|
|
5961
|
+
consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
|
|
5962
|
+
continue;
|
|
5963
|
+
}
|
|
5964
|
+
processAnthropicEvent(event, acc);
|
|
5965
|
+
await stream.writeSSE({
|
|
5966
|
+
event: rawEvent.event || event.type,
|
|
5967
|
+
data: rawEvent.data
|
|
4530
5968
|
});
|
|
4531
|
-
state$1.contentBlockOpen = false;
|
|
4532
5969
|
}
|
|
4533
|
-
|
|
4534
|
-
|
|
4535
|
-
|
|
4536
|
-
|
|
4537
|
-
|
|
4538
|
-
|
|
4539
|
-
|
|
4540
|
-
|
|
4541
|
-
|
|
4542
|
-
|
|
4543
|
-
|
|
4544
|
-
|
|
5970
|
+
recordStreamingResponse$1(acc, anthropicPayload.model, ctx);
|
|
5971
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
5972
|
+
} catch (error) {
|
|
5973
|
+
consola.error("Direct Anthropic stream error:", error);
|
|
5974
|
+
recordStreamError({
|
|
5975
|
+
acc,
|
|
5976
|
+
fallbackModel: anthropicPayload.model,
|
|
5977
|
+
ctx,
|
|
5978
|
+
error
|
|
5979
|
+
});
|
|
5980
|
+
failTracking(ctx.trackingId, error);
|
|
5981
|
+
const errorEvent = translateErrorToAnthropicErrorEvent();
|
|
5982
|
+
await stream.writeSSE({
|
|
5983
|
+
event: errorEvent.type,
|
|
5984
|
+
data: JSON.stringify(errorEvent)
|
|
5985
|
+
});
|
|
4545
5986
|
}
|
|
4546
|
-
return events$1;
|
|
4547
5987
|
}
|
|
4548
|
-
function
|
|
4549
|
-
|
|
4550
|
-
|
|
4551
|
-
|
|
4552
|
-
|
|
4553
|
-
|
|
4554
|
-
|
|
4555
|
-
|
|
5988
|
+
function recordStreamingResponse$1(acc, fallbackModel, ctx) {
|
|
5989
|
+
const contentBlocks = [];
|
|
5990
|
+
if (acc.content) contentBlocks.push({
|
|
5991
|
+
type: "text",
|
|
5992
|
+
text: acc.content
|
|
5993
|
+
});
|
|
5994
|
+
for (const tc of acc.toolCalls) contentBlocks.push({
|
|
5995
|
+
type: "tool_use",
|
|
5996
|
+
...tc
|
|
5997
|
+
});
|
|
5998
|
+
recordResponse(ctx.historyId, {
|
|
5999
|
+
success: true,
|
|
6000
|
+
model: acc.model || fallbackModel,
|
|
6001
|
+
usage: {
|
|
6002
|
+
input_tokens: acc.inputTokens,
|
|
6003
|
+
output_tokens: acc.outputTokens
|
|
6004
|
+
},
|
|
6005
|
+
stop_reason: acc.stopReason || void 0,
|
|
6006
|
+
content: contentBlocks.length > 0 ? {
|
|
6007
|
+
role: "assistant",
|
|
6008
|
+
content: contentBlocks
|
|
6009
|
+
} : null,
|
|
6010
|
+
toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
|
|
6011
|
+
}, Date.now() - ctx.startTime);
|
|
4556
6012
|
}
|
|
4557
6013
|
|
|
4558
6014
|
//#endregion
|
|
4559
|
-
//#region src/routes/messages/handler.ts
|
|
4560
|
-
|
|
4561
|
-
|
|
4562
|
-
|
|
4563
|
-
|
|
4564
|
-
const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
|
|
4565
|
-
updateTrackerModel(trackingId, anthropicPayload.model);
|
|
4566
|
-
const ctx = {
|
|
4567
|
-
historyId: recordRequest("anthropic", {
|
|
4568
|
-
model: anthropicPayload.model,
|
|
4569
|
-
messages: convertAnthropicMessages(anthropicPayload.messages),
|
|
4570
|
-
stream: anthropicPayload.stream ?? false,
|
|
4571
|
-
tools: anthropicPayload.tools?.map((t) => ({
|
|
4572
|
-
name: t.name,
|
|
4573
|
-
description: t.description
|
|
4574
|
-
})),
|
|
4575
|
-
max_tokens: anthropicPayload.max_tokens,
|
|
4576
|
-
temperature: anthropicPayload.temperature,
|
|
4577
|
-
system: extractSystemPrompt(anthropicPayload.system)
|
|
4578
|
-
}),
|
|
4579
|
-
trackingId,
|
|
4580
|
-
startTime
|
|
4581
|
-
};
|
|
6015
|
+
//#region src/routes/messages/translated-handler.ts
|
|
6016
|
+
/**
|
|
6017
|
+
* Handle completion using OpenAI translation path (legacy)
|
|
6018
|
+
*/
|
|
6019
|
+
async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
|
|
4582
6020
|
const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
|
|
4583
6021
|
consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
|
|
4584
6022
|
const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
|
|
4585
|
-
const { finalPayload: openAIPayload,
|
|
4586
|
-
if (
|
|
6023
|
+
const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
|
|
6024
|
+
if (truncateResult) ctx.truncateResult = truncateResult;
|
|
4587
6025
|
if (state.manualApprove) await awaitApproval();
|
|
4588
6026
|
try {
|
|
4589
6027
|
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
|
|
@@ -4595,7 +6033,7 @@ async function handleCompletion(c) {
|
|
|
4595
6033
|
ctx
|
|
4596
6034
|
});
|
|
4597
6035
|
consola.debug("Streaming response from Copilot");
|
|
4598
|
-
updateTrackerStatus(trackingId, "streaming");
|
|
6036
|
+
updateTrackerStatus(ctx.trackingId, "streaming");
|
|
4599
6037
|
return streamSSE(c, async (stream) => {
|
|
4600
6038
|
await handleStreamingResponse({
|
|
4601
6039
|
stream,
|
|
@@ -4616,8 +6054,8 @@ function handleNonStreamingResponse(opts) {
|
|
|
4616
6054
|
consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
|
|
4617
6055
|
let anthropicResponse = translateToAnthropic(response, toolNameMapping);
|
|
4618
6056
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
4619
|
-
if (ctx.
|
|
4620
|
-
const marker =
|
|
6057
|
+
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
6058
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
4621
6059
|
anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
|
|
4622
6060
|
}
|
|
4623
6061
|
recordResponse(ctx.historyId, {
|
|
@@ -4668,17 +6106,6 @@ function prependMarkerToAnthropicResponse(response, marker) {
|
|
|
4668
6106
|
content
|
|
4669
6107
|
};
|
|
4670
6108
|
}
|
|
4671
|
-
function createAnthropicStreamAccumulator() {
|
|
4672
|
-
return {
|
|
4673
|
-
model: "",
|
|
4674
|
-
inputTokens: 0,
|
|
4675
|
-
outputTokens: 0,
|
|
4676
|
-
stopReason: "",
|
|
4677
|
-
content: "",
|
|
4678
|
-
toolCalls: [],
|
|
4679
|
-
currentToolCall: null
|
|
4680
|
-
};
|
|
4681
|
-
}
|
|
4682
6109
|
async function handleStreamingResponse(opts) {
|
|
4683
6110
|
const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
|
|
4684
6111
|
const streamState = {
|
|
@@ -4689,9 +6116,9 @@ async function handleStreamingResponse(opts) {
|
|
|
4689
6116
|
};
|
|
4690
6117
|
const acc = createAnthropicStreamAccumulator();
|
|
4691
6118
|
try {
|
|
4692
|
-
if (ctx.
|
|
4693
|
-
const marker =
|
|
4694
|
-
await
|
|
6119
|
+
if (ctx.truncateResult?.wasCompacted) {
|
|
6120
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
6121
|
+
await sendTruncationMarkerEvent(stream, streamState, marker);
|
|
4695
6122
|
acc.content += marker;
|
|
4696
6123
|
}
|
|
4697
6124
|
await processStreamChunks({
|
|
@@ -4719,7 +6146,7 @@ async function handleStreamingResponse(opts) {
|
|
|
4719
6146
|
});
|
|
4720
6147
|
}
|
|
4721
6148
|
}
|
|
4722
|
-
async function
|
|
6149
|
+
async function sendTruncationMarkerEvent(stream, streamState, marker) {
|
|
4723
6150
|
const blockStartEvent = {
|
|
4724
6151
|
type: "content_block_start",
|
|
4725
6152
|
index: streamState.contentBlockIndex,
|
|
@@ -4779,47 +6206,6 @@ async function processStreamChunks(opts) {
|
|
|
4779
6206
|
}
|
|
4780
6207
|
}
|
|
4781
6208
|
}
|
|
4782
|
-
function processAnthropicEvent(event, acc) {
|
|
4783
|
-
switch (event.type) {
|
|
4784
|
-
case "content_block_delta":
|
|
4785
|
-
handleContentBlockDelta(event.delta, acc);
|
|
4786
|
-
break;
|
|
4787
|
-
case "content_block_start":
|
|
4788
|
-
handleContentBlockStart(event.content_block, acc);
|
|
4789
|
-
break;
|
|
4790
|
-
case "content_block_stop":
|
|
4791
|
-
handleContentBlockStop(acc);
|
|
4792
|
-
break;
|
|
4793
|
-
case "message_delta":
|
|
4794
|
-
handleMessageDelta(event.delta, event.usage, acc);
|
|
4795
|
-
break;
|
|
4796
|
-
default: break;
|
|
4797
|
-
}
|
|
4798
|
-
}
|
|
4799
|
-
function handleContentBlockDelta(delta, acc) {
|
|
4800
|
-
if (delta.type === "text_delta") acc.content += delta.text;
|
|
4801
|
-
else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
|
|
4802
|
-
}
|
|
4803
|
-
function handleContentBlockStart(block, acc) {
|
|
4804
|
-
if (block.type === "tool_use") acc.currentToolCall = {
|
|
4805
|
-
id: block.id,
|
|
4806
|
-
name: block.name,
|
|
4807
|
-
input: ""
|
|
4808
|
-
};
|
|
4809
|
-
}
|
|
4810
|
-
function handleContentBlockStop(acc) {
|
|
4811
|
-
if (acc.currentToolCall) {
|
|
4812
|
-
acc.toolCalls.push(acc.currentToolCall);
|
|
4813
|
-
acc.currentToolCall = null;
|
|
4814
|
-
}
|
|
4815
|
-
}
|
|
4816
|
-
function handleMessageDelta(delta, usage, acc) {
|
|
4817
|
-
if (delta.stop_reason) acc.stopReason = delta.stop_reason;
|
|
4818
|
-
if (usage) {
|
|
4819
|
-
acc.inputTokens = usage.input_tokens ?? 0;
|
|
4820
|
-
acc.outputTokens = usage.output_tokens;
|
|
4821
|
-
}
|
|
4822
|
-
}
|
|
4823
6209
|
function recordStreamingResponse(acc, fallbackModel, ctx) {
|
|
4824
6210
|
const contentBlocks = [];
|
|
4825
6211
|
if (acc.content) contentBlocks.push({
|
|
@@ -4845,52 +6231,51 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
|
|
|
4845
6231
|
toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
|
|
4846
6232
|
}, Date.now() - ctx.startTime);
|
|
4847
6233
|
}
|
|
4848
|
-
|
|
4849
|
-
|
|
4850
|
-
|
|
4851
|
-
|
|
4852
|
-
|
|
4853
|
-
|
|
4854
|
-
|
|
4855
|
-
|
|
4856
|
-
|
|
4857
|
-
|
|
4858
|
-
|
|
4859
|
-
|
|
4860
|
-
|
|
4861
|
-
|
|
4862
|
-
|
|
4863
|
-
|
|
4864
|
-
|
|
4865
|
-
|
|
4866
|
-
|
|
4867
|
-
|
|
4868
|
-
|
|
4869
|
-
|
|
4870
|
-
|
|
4871
|
-
|
|
4872
|
-
|
|
4873
|
-
|
|
4874
|
-
|
|
4875
|
-
|
|
4876
|
-
|
|
4877
|
-
content
|
|
4878
|
-
};
|
|
4879
|
-
});
|
|
4880
|
-
}
|
|
4881
|
-
function extractSystemPrompt(system) {
|
|
4882
|
-
if (!system) return void 0;
|
|
4883
|
-
if (typeof system === "string") return system;
|
|
4884
|
-
return system.map((block) => block.text).join("\n");
|
|
6234
|
+
|
|
6235
|
+
//#endregion
|
|
6236
|
+
//#region src/routes/messages/handler.ts
|
|
6237
|
+
async function handleCompletion(c) {
|
|
6238
|
+
const anthropicPayload = await c.req.json();
|
|
6239
|
+
consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
|
|
6240
|
+
logToolInfo(anthropicPayload);
|
|
6241
|
+
const useDirectAnthropicApi = supportsDirectAnthropicApi(anthropicPayload.model);
|
|
6242
|
+
const trackingId = c.get("trackingId");
|
|
6243
|
+
const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
|
|
6244
|
+
updateTrackerModel(trackingId, anthropicPayload.model);
|
|
6245
|
+
const ctx = {
|
|
6246
|
+
historyId: recordRequest("anthropic", {
|
|
6247
|
+
model: anthropicPayload.model,
|
|
6248
|
+
messages: convertAnthropicMessages(anthropicPayload.messages),
|
|
6249
|
+
stream: anthropicPayload.stream ?? false,
|
|
6250
|
+
tools: anthropicPayload.tools?.map((t) => ({
|
|
6251
|
+
name: t.name,
|
|
6252
|
+
description: t.description
|
|
6253
|
+
})),
|
|
6254
|
+
max_tokens: anthropicPayload.max_tokens,
|
|
6255
|
+
temperature: anthropicPayload.temperature,
|
|
6256
|
+
system: extractSystemPrompt(anthropicPayload.system)
|
|
6257
|
+
}),
|
|
6258
|
+
trackingId,
|
|
6259
|
+
startTime
|
|
6260
|
+
};
|
|
6261
|
+
if (useDirectAnthropicApi) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
|
|
6262
|
+
return handleTranslatedCompletion(c, anthropicPayload, ctx);
|
|
4885
6263
|
}
|
|
4886
|
-
|
|
4887
|
-
|
|
4888
|
-
|
|
4889
|
-
|
|
4890
|
-
|
|
4891
|
-
|
|
4892
|
-
|
|
4893
|
-
|
|
6264
|
+
/**
|
|
6265
|
+
* Log tool-related information for debugging
|
|
6266
|
+
*/
|
|
6267
|
+
function logToolInfo(anthropicPayload) {
|
|
6268
|
+
if (anthropicPayload.tools?.length) {
|
|
6269
|
+
const toolInfo = anthropicPayload.tools.map((t) => ({
|
|
6270
|
+
name: t.name,
|
|
6271
|
+
type: t.type ?? "(custom)"
|
|
6272
|
+
}));
|
|
6273
|
+
consola.debug(`[Tools] Defined tools:`, JSON.stringify(toolInfo));
|
|
6274
|
+
}
|
|
6275
|
+
for (const msg of anthropicPayload.messages) if (typeof msg.content !== "string") for (const block of msg.content) {
|
|
6276
|
+
if (block.type === "tool_use") consola.debug(`[Tools] tool_use in message: ${block.name} (id: ${block.id})`);
|
|
6277
|
+
if (block.type === "tool_result") consola.debug(`[Tools] tool_result in message: id=${block.tool_use_id}, is_error=${block.is_error ?? false}`);
|
|
6278
|
+
}
|
|
4894
6279
|
}
|
|
4895
6280
|
|
|
4896
6281
|
//#endregion
|
|
@@ -5004,13 +6389,18 @@ server.route("/history", historyRoutes);
|
|
|
5004
6389
|
|
|
5005
6390
|
//#endregion
|
|
5006
6391
|
//#region src/start.ts
|
|
6392
|
+
/** Format limit values as "Xk" or "?" if not available */
|
|
6393
|
+
function formatLimit(value) {
|
|
6394
|
+
return value ? `${Math.round(value / 1e3)}k` : "?";
|
|
6395
|
+
}
|
|
5007
6396
|
function formatModelInfo(model) {
|
|
5008
6397
|
const limits = model.capabilities?.limits;
|
|
5009
|
-
const contextK = limits?.
|
|
5010
|
-
const
|
|
6398
|
+
const contextK = formatLimit(limits?.max_context_window_tokens);
|
|
6399
|
+
const promptK = formatLimit(limits?.max_prompt_tokens);
|
|
6400
|
+
const outputK = formatLimit(limits?.max_output_tokens);
|
|
5011
6401
|
const features = [model.capabilities?.supports?.tool_calls && "tools", model.preview && "preview"].filter(Boolean).join(", ");
|
|
5012
6402
|
const featureStr = features ? ` (${features})` : "";
|
|
5013
|
-
return ` - ${model.id.
|
|
6403
|
+
return ` - ${model.id.length > 30 ? `${model.id.slice(0, 27)}...` : model.id.padEnd(30)} ctx:${contextK.padStart(5)} in:${promptK.padStart(5)} out:${outputK.padStart(4)}` + featureStr;
|
|
5014
6404
|
}
|
|
5015
6405
|
async function runServer(options) {
|
|
5016
6406
|
consola.info(`copilot-api v${package_default.version}`);
|
|
@@ -5018,12 +6408,16 @@ async function runServer(options) {
|
|
|
5018
6408
|
if (options.verbose) {
|
|
5019
6409
|
consola.level = 5;
|
|
5020
6410
|
consola.info("Verbose logging enabled");
|
|
6411
|
+
state.verbose = true;
|
|
5021
6412
|
}
|
|
5022
6413
|
state.accountType = options.accountType;
|
|
5023
6414
|
if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
|
|
5024
6415
|
state.manualApprove = options.manual;
|
|
5025
6416
|
state.showToken = options.showToken;
|
|
5026
|
-
state.
|
|
6417
|
+
state.autoTruncate = options.autoTruncate;
|
|
6418
|
+
state.compressToolResults = options.compressToolResults;
|
|
6419
|
+
state.redirectAnthropic = options.redirectAnthropic;
|
|
6420
|
+
state.rewriteAnthropicTools = options.rewriteAnthropicTools;
|
|
5027
6421
|
if (options.rateLimit) initAdaptiveRateLimiter({
|
|
5028
6422
|
baseRetryIntervalSeconds: options.retryInterval,
|
|
5029
6423
|
requestIntervalSeconds: options.requestInterval,
|
|
@@ -5031,7 +6425,10 @@ async function runServer(options) {
|
|
|
5031
6425
|
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
5032
6426
|
});
|
|
5033
6427
|
else consola.info("Rate limiting disabled");
|
|
5034
|
-
if (!options.
|
|
6428
|
+
if (!options.autoTruncate) consola.info("Auto-truncate disabled");
|
|
6429
|
+
if (options.compressToolResults) consola.info("Tool result compression enabled");
|
|
6430
|
+
if (options.redirectAnthropic) consola.info("Anthropic API redirect enabled (using OpenAI translation)");
|
|
6431
|
+
if (!options.rewriteAnthropicTools) consola.info("Anthropic server-side tools rewrite disabled (passing through unchanged)");
|
|
5035
6432
|
initHistory(options.history, options.historyLimit);
|
|
5036
6433
|
if (options.history) {
|
|
5037
6434
|
const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
|
|
@@ -5173,10 +6570,25 @@ const start = defineCommand({
|
|
|
5173
6570
|
default: "1000",
|
|
5174
6571
|
description: "Maximum number of history entries to keep in memory (0 = unlimited)"
|
|
5175
6572
|
},
|
|
5176
|
-
"no-auto-
|
|
6573
|
+
"no-auto-truncate": {
|
|
6574
|
+
type: "boolean",
|
|
6575
|
+
default: false,
|
|
6576
|
+
description: "Disable automatic conversation history truncation when exceeding limits"
|
|
6577
|
+
},
|
|
6578
|
+
"compress-tool-results": {
|
|
6579
|
+
type: "boolean",
|
|
6580
|
+
default: false,
|
|
6581
|
+
description: "Compress old tool_result content before truncating messages (may lose context details)"
|
|
6582
|
+
},
|
|
6583
|
+
"redirect-anthropic": {
|
|
6584
|
+
type: "boolean",
|
|
6585
|
+
default: false,
|
|
6586
|
+
description: "Redirect Anthropic models through OpenAI translation (instead of direct API)"
|
|
6587
|
+
},
|
|
6588
|
+
"no-rewrite-anthropic-tools": {
|
|
5177
6589
|
type: "boolean",
|
|
5178
6590
|
default: false,
|
|
5179
|
-
description: "
|
|
6591
|
+
description: "Don't rewrite Anthropic server-side tools (web_search, etc.) to custom tool format"
|
|
5180
6592
|
}
|
|
5181
6593
|
},
|
|
5182
6594
|
run({ args }) {
|
|
@@ -5197,7 +6609,10 @@ const start = defineCommand({
|
|
|
5197
6609
|
proxyEnv: args["proxy-env"],
|
|
5198
6610
|
history: !args["no-history"],
|
|
5199
6611
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
5200
|
-
|
|
6612
|
+
autoTruncate: !args["no-auto-truncate"],
|
|
6613
|
+
compressToolResults: args["compress-tool-results"],
|
|
6614
|
+
redirectAnthropic: args["redirect-anthropic"],
|
|
6615
|
+
rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"]
|
|
5201
6616
|
});
|
|
5202
6617
|
}
|
|
5203
6618
|
});
|