@hsupu/copilot-api 0.7.11 → 0.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +1468 -408
- package/dist/main.js.map +1 -1
- package/package.json +4 -1
package/dist/main.js
CHANGED
|
@@ -17,7 +17,6 @@ import pc from "picocolors";
|
|
|
17
17
|
import { Hono } from "hono";
|
|
18
18
|
import { cors } from "hono/cors";
|
|
19
19
|
import { streamSSE } from "hono/streaming";
|
|
20
|
-
import { countTokens } from "@anthropic-ai/tokenizer";
|
|
21
20
|
import { events } from "fetch-event-stream";
|
|
22
21
|
|
|
23
22
|
//#region src/lib/paths.ts
|
|
@@ -49,7 +48,9 @@ const state = {
|
|
|
49
48
|
showToken: false,
|
|
50
49
|
verbose: false,
|
|
51
50
|
autoTruncate: true,
|
|
52
|
-
|
|
51
|
+
compressToolResults: false,
|
|
52
|
+
redirectAnthropic: false,
|
|
53
|
+
rewriteAnthropicTools: true
|
|
53
54
|
};
|
|
54
55
|
|
|
55
56
|
//#endregion
|
|
@@ -93,27 +94,78 @@ const GITHUB_BASE_URL = "https://github.com";
|
|
|
93
94
|
const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
|
|
94
95
|
const GITHUB_APP_SCOPES = ["read:user"].join(" ");
|
|
95
96
|
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/lib/auto-truncate-common.ts
|
|
99
|
+
const DEFAULT_AUTO_TRUNCATE_CONFIG = {
|
|
100
|
+
safetyMarginPercent: 2,
|
|
101
|
+
maxRequestBodyBytes: 510 * 1024,
|
|
102
|
+
preserveRecentPercent: .7
|
|
103
|
+
};
|
|
104
|
+
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
105
|
+
let dynamicByteLimit = null;
|
|
106
|
+
/**
|
|
107
|
+
* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
|
|
108
|
+
*/
|
|
109
|
+
function onRequestTooLarge(failingBytes) {
|
|
110
|
+
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
111
|
+
dynamicByteLimit = newLimit;
|
|
112
|
+
consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
113
|
+
}
|
|
114
|
+
/** Get the current effective byte limit */
|
|
115
|
+
function getEffectiveByteLimitBytes() {
|
|
116
|
+
return dynamicByteLimit ?? DEFAULT_AUTO_TRUNCATE_CONFIG.maxRequestBodyBytes;
|
|
117
|
+
}
|
|
118
|
+
/** Dynamic token limits per model, adjusted based on token limit errors */
|
|
119
|
+
const dynamicTokenLimits = /* @__PURE__ */ new Map();
|
|
120
|
+
/**
|
|
121
|
+
* Called when a token limit error (400) occurs.
|
|
122
|
+
* Adjusts the token limit for the specific model to 95% of the reported limit.
|
|
123
|
+
*/
|
|
124
|
+
function onTokenLimitExceeded(modelId, reportedLimit) {
|
|
125
|
+
const newLimit = Math.floor(reportedLimit * .95);
|
|
126
|
+
const previous = dynamicTokenLimits.get(modelId);
|
|
127
|
+
if (!previous || newLimit < previous) {
|
|
128
|
+
dynamicTokenLimits.set(modelId, newLimit);
|
|
129
|
+
consola.info(`[AutoTruncate] Adjusted token limit for ${modelId}: ${reportedLimit} reported → ${newLimit} effective`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Get the effective token limit for a model.
|
|
134
|
+
* Returns the dynamic limit if set, otherwise null to use model capabilities.
|
|
135
|
+
*/
|
|
136
|
+
function getEffectiveTokenLimit(modelId) {
|
|
137
|
+
return dynamicTokenLimits.get(modelId) ?? null;
|
|
138
|
+
}
|
|
139
|
+
|
|
96
140
|
//#endregion
|
|
97
141
|
//#region src/lib/error.ts
|
|
98
142
|
var HTTPError = class HTTPError extends Error {
|
|
99
143
|
status;
|
|
100
144
|
responseText;
|
|
101
|
-
|
|
145
|
+
/** Model ID that caused the error (if known) */
|
|
146
|
+
modelId;
|
|
147
|
+
constructor(message, status, responseText, modelId) {
|
|
102
148
|
super(message);
|
|
103
149
|
this.status = status;
|
|
104
150
|
this.responseText = responseText;
|
|
151
|
+
this.modelId = modelId;
|
|
105
152
|
}
|
|
106
|
-
static async fromResponse(message, response) {
|
|
153
|
+
static async fromResponse(message, response, modelId) {
|
|
107
154
|
const text = await response.text();
|
|
108
|
-
return new HTTPError(message, response.status, text);
|
|
155
|
+
return new HTTPError(message, response.status, text, modelId);
|
|
109
156
|
}
|
|
110
157
|
};
|
|
111
158
|
/** Parse token limit info from error message */
|
|
112
159
|
function parseTokenLimitError(message) {
|
|
113
|
-
const
|
|
114
|
-
if (
|
|
115
|
-
current: Number.parseInt(
|
|
116
|
-
limit: Number.parseInt(
|
|
160
|
+
const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
|
|
161
|
+
if (openaiMatch) return {
|
|
162
|
+
current: Number.parseInt(openaiMatch[1], 10),
|
|
163
|
+
limit: Number.parseInt(openaiMatch[2], 10)
|
|
164
|
+
};
|
|
165
|
+
const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
|
|
166
|
+
if (anthropicMatch) return {
|
|
167
|
+
current: Number.parseInt(anthropicMatch[1], 10),
|
|
168
|
+
limit: Number.parseInt(anthropicMatch[2], 10)
|
|
117
169
|
};
|
|
118
170
|
return null;
|
|
119
171
|
}
|
|
@@ -150,11 +202,10 @@ function formatRateLimitError(copilotMessage) {
|
|
|
150
202
|
};
|
|
151
203
|
}
|
|
152
204
|
function forwardError(c, error) {
|
|
153
|
-
consola.error("Error occurred:", error);
|
|
154
205
|
if (error instanceof HTTPError) {
|
|
155
206
|
if (error.status === 413) {
|
|
156
207
|
const formattedError = formatRequestTooLargeError();
|
|
157
|
-
consola.
|
|
208
|
+
consola.warn(`HTTP 413: Request too large`);
|
|
158
209
|
return c.json(formattedError, 413);
|
|
159
210
|
}
|
|
160
211
|
let errorJson;
|
|
@@ -163,26 +214,38 @@ function forwardError(c, error) {
|
|
|
163
214
|
} catch {
|
|
164
215
|
errorJson = error.responseText;
|
|
165
216
|
}
|
|
166
|
-
consola.error("HTTP error:", errorJson);
|
|
167
217
|
const copilotError = errorJson;
|
|
168
218
|
if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
|
|
169
219
|
const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
|
|
170
220
|
if (tokenInfo) {
|
|
221
|
+
if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
|
|
222
|
+
const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
|
|
223
|
+
consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
|
|
224
|
+
return c.json(formattedError, 400);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
const anthropicError = errorJson;
|
|
228
|
+
if (anthropicError.error?.type === "invalid_request_error") {
|
|
229
|
+
const tokenInfo = parseTokenLimitError(anthropicError.error.message ?? "");
|
|
230
|
+
if (tokenInfo) {
|
|
231
|
+
if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
|
|
171
232
|
const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
|
|
172
|
-
consola.
|
|
233
|
+
consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
|
|
173
234
|
return c.json(formattedError, 400);
|
|
174
235
|
}
|
|
175
236
|
}
|
|
176
237
|
if (error.status === 429 || copilotError.error?.code === "rate_limited") {
|
|
177
238
|
const formattedError = formatRateLimitError(copilotError.error?.message);
|
|
178
|
-
consola.
|
|
239
|
+
consola.warn(`HTTP 429: Rate limit exceeded`);
|
|
179
240
|
return c.json(formattedError, 429);
|
|
180
241
|
}
|
|
242
|
+
consola.error(`HTTP ${error.status}:`, errorJson);
|
|
181
243
|
return c.json({ error: {
|
|
182
244
|
message: error.responseText,
|
|
183
245
|
type: "error"
|
|
184
246
|
} }, error.status);
|
|
185
247
|
}
|
|
248
|
+
consola.error("Unexpected error:", error);
|
|
186
249
|
return c.json({ error: {
|
|
187
250
|
message: error.message,
|
|
188
251
|
type: "error"
|
|
@@ -308,6 +371,7 @@ async function pollAccessToken(deviceCode) {
|
|
|
308
371
|
//#region src/lib/token.ts
|
|
309
372
|
const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
|
|
310
373
|
const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
|
|
374
|
+
let copilotTokenRefreshTimer = null;
|
|
311
375
|
/**
|
|
312
376
|
* Refresh the Copilot token with exponential backoff retry.
|
|
313
377
|
* Returns the new token on success, or null if all retries fail.
|
|
@@ -326,20 +390,34 @@ async function refreshCopilotTokenWithRetry(maxRetries = 3) {
|
|
|
326
390
|
consola.error("All token refresh attempts failed:", lastError);
|
|
327
391
|
return null;
|
|
328
392
|
}
|
|
393
|
+
/**
|
|
394
|
+
* Clear any existing token refresh timer.
|
|
395
|
+
* Call this before setting up a new timer or during cleanup.
|
|
396
|
+
*/
|
|
397
|
+
function clearCopilotTokenRefresh() {
|
|
398
|
+
if (copilotTokenRefreshTimer) {
|
|
399
|
+
clearInterval(copilotTokenRefreshTimer);
|
|
400
|
+
copilotTokenRefreshTimer = null;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
329
403
|
const setupCopilotToken = async () => {
|
|
330
404
|
const { token, refresh_in } = await getCopilotToken();
|
|
331
405
|
state.copilotToken = token;
|
|
332
406
|
consola.debug("GitHub Copilot Token fetched successfully!");
|
|
333
407
|
if (state.showToken) consola.info("Copilot token:", token);
|
|
334
|
-
const refreshInterval = (refresh_in - 60) * 1e3;
|
|
335
|
-
|
|
408
|
+
const refreshInterval = Math.max((refresh_in - 60) * 1e3, 60 * 1e3);
|
|
409
|
+
clearCopilotTokenRefresh();
|
|
410
|
+
copilotTokenRefreshTimer = setInterval(() => {
|
|
336
411
|
consola.debug("Refreshing Copilot token");
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
412
|
+
refreshCopilotTokenWithRetry().then((newToken) => {
|
|
413
|
+
if (newToken) {
|
|
414
|
+
state.copilotToken = newToken;
|
|
415
|
+
consola.debug("Copilot token refreshed");
|
|
416
|
+
if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
|
|
417
|
+
} else consola.error("Failed to refresh Copilot token after retries, using existing token");
|
|
418
|
+
}).catch((error) => {
|
|
419
|
+
consola.error("Unexpected error during token refresh:", error);
|
|
420
|
+
});
|
|
343
421
|
}, refreshInterval);
|
|
344
422
|
};
|
|
345
423
|
async function setupGitHubToken(options) {
|
|
@@ -621,7 +699,7 @@ const logout = defineCommand({
|
|
|
621
699
|
});
|
|
622
700
|
|
|
623
701
|
//#endregion
|
|
624
|
-
//#region src/patch-claude.ts
|
|
702
|
+
//#region src/patch-claude-code.ts
|
|
625
703
|
const SUPPORTED_VERSIONS = {
|
|
626
704
|
v2a: {
|
|
627
705
|
min: "2.0.0",
|
|
@@ -941,7 +1019,7 @@ const patchClaude = defineCommand({
|
|
|
941
1019
|
//#endregion
|
|
942
1020
|
//#region package.json
|
|
943
1021
|
var name = "@hsupu/copilot-api";
|
|
944
|
-
var version = "0.7.
|
|
1022
|
+
var version = "0.7.12";
|
|
945
1023
|
var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
|
|
946
1024
|
var keywords = [
|
|
947
1025
|
"proxy",
|
|
@@ -969,6 +1047,9 @@ var scripts = {
|
|
|
969
1047
|
"prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
|
|
970
1048
|
"release": "bumpp && npm publish --access public",
|
|
971
1049
|
"start": "NODE_ENV=production bun run ./src/main.ts",
|
|
1050
|
+
"test": "bun test tests/*.test.ts",
|
|
1051
|
+
"test:all": "bun test tests/*.test.ts && bun test tests/integration/",
|
|
1052
|
+
"test:integration": "bun test tests/integration/",
|
|
972
1053
|
"typecheck": "tsc"
|
|
973
1054
|
};
|
|
974
1055
|
var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
|
|
@@ -1021,7 +1102,7 @@ var package_default = {
|
|
|
1021
1102
|
|
|
1022
1103
|
//#endregion
|
|
1023
1104
|
//#region src/lib/adaptive-rate-limiter.ts
|
|
1024
|
-
const DEFAULT_CONFIG
|
|
1105
|
+
const DEFAULT_CONFIG = {
|
|
1025
1106
|
baseRetryIntervalSeconds: 10,
|
|
1026
1107
|
maxRetryIntervalSeconds: 120,
|
|
1027
1108
|
requestIntervalSeconds: 10,
|
|
@@ -1050,7 +1131,7 @@ var AdaptiveRateLimiter = class {
|
|
|
1050
1131
|
recoveryStepIndex = 0;
|
|
1051
1132
|
constructor(config = {}) {
|
|
1052
1133
|
this.config = {
|
|
1053
|
-
...DEFAULT_CONFIG
|
|
1134
|
+
...DEFAULT_CONFIG,
|
|
1054
1135
|
...config
|
|
1055
1136
|
};
|
|
1056
1137
|
}
|
|
@@ -1292,12 +1373,12 @@ let rateLimiterInstance = null;
|
|
|
1292
1373
|
*/
|
|
1293
1374
|
function initAdaptiveRateLimiter(config = {}) {
|
|
1294
1375
|
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1295
|
-
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG
|
|
1296
|
-
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG
|
|
1297
|
-
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG
|
|
1298
|
-
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG
|
|
1299
|
-
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG
|
|
1300
|
-
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG
|
|
1376
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
|
|
1377
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
|
|
1378
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
|
|
1379
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
|
|
1380
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
|
|
1381
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
|
|
1301
1382
|
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1302
1383
|
}
|
|
1303
1384
|
/**
|
|
@@ -1918,6 +1999,7 @@ var RequestTracker = class {
|
|
|
1918
1999
|
requests = /* @__PURE__ */ new Map();
|
|
1919
2000
|
renderer = null;
|
|
1920
2001
|
completedQueue = [];
|
|
2002
|
+
completedTimeouts = /* @__PURE__ */ new Map();
|
|
1921
2003
|
historySize = 5;
|
|
1922
2004
|
completedDisplayMs = 2e3;
|
|
1923
2005
|
setRenderer(renderer) {
|
|
@@ -1977,11 +2059,22 @@ var RequestTracker = class {
|
|
|
1977
2059
|
this.renderer?.onRequestComplete(request);
|
|
1978
2060
|
this.requests.delete(id);
|
|
1979
2061
|
this.completedQueue.push(request);
|
|
1980
|
-
while (this.completedQueue.length > this.historySize)
|
|
1981
|
-
|
|
2062
|
+
while (this.completedQueue.length > this.historySize) {
|
|
2063
|
+
const removed = this.completedQueue.shift();
|
|
2064
|
+
if (removed) {
|
|
2065
|
+
const timeoutId$1 = this.completedTimeouts.get(removed.id);
|
|
2066
|
+
if (timeoutId$1) {
|
|
2067
|
+
clearTimeout(timeoutId$1);
|
|
2068
|
+
this.completedTimeouts.delete(removed.id);
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
const timeoutId = setTimeout(() => {
|
|
1982
2073
|
const idx = this.completedQueue.indexOf(request);
|
|
1983
2074
|
if (idx !== -1) this.completedQueue.splice(idx, 1);
|
|
2075
|
+
this.completedTimeouts.delete(id);
|
|
1984
2076
|
}, this.completedDisplayMs);
|
|
2077
|
+
this.completedTimeouts.set(id, timeoutId);
|
|
1985
2078
|
}
|
|
1986
2079
|
/**
|
|
1987
2080
|
* Mark request as failed with error
|
|
@@ -2016,11 +2109,13 @@ var RequestTracker = class {
|
|
|
2016
2109
|
return this.requests.get(id);
|
|
2017
2110
|
}
|
|
2018
2111
|
/**
|
|
2019
|
-
* Clear all tracked requests
|
|
2112
|
+
* Clear all tracked requests and pending timeouts
|
|
2020
2113
|
*/
|
|
2021
2114
|
clear() {
|
|
2022
2115
|
this.requests.clear();
|
|
2023
2116
|
this.completedQueue = [];
|
|
2117
|
+
for (const timeoutId of this.completedTimeouts.values()) clearTimeout(timeoutId);
|
|
2118
|
+
this.completedTimeouts.clear();
|
|
2024
2119
|
}
|
|
2025
2120
|
};
|
|
2026
2121
|
const requestTracker = new RequestTracker();
|
|
@@ -2171,6 +2266,14 @@ const getTokenizerFromModel = (model) => {
|
|
|
2171
2266
|
return model.capabilities?.tokenizer || "o200k_base";
|
|
2172
2267
|
};
|
|
2173
2268
|
/**
|
|
2269
|
+
* Count tokens in a text string using the model's tokenizer.
|
|
2270
|
+
* This is a simple wrapper for counting tokens in plain text.
|
|
2271
|
+
*/
|
|
2272
|
+
const countTextTokens = async (text, model) => {
|
|
2273
|
+
const tokenizer = getTokenizerFromModel(model);
|
|
2274
|
+
return (await getEncodeChatFunction(tokenizer)).encode(text).length;
|
|
2275
|
+
};
|
|
2276
|
+
/**
|
|
2174
2277
|
* Get model-specific constants for token calculation.
|
|
2175
2278
|
* These values are empirically determined based on OpenAI's function calling token overhead.
|
|
2176
2279
|
* - funcInit: Tokens for initializing a function definition
|
|
@@ -2276,61 +2379,11 @@ const numTokensForTools = (tools, encoder, constants) => {
|
|
|
2276
2379
|
return funcTokenCount;
|
|
2277
2380
|
};
|
|
2278
2381
|
/**
|
|
2279
|
-
* Check if a model is an Anthropic model
|
|
2280
|
-
*/
|
|
2281
|
-
function isAnthropicModel(model) {
|
|
2282
|
-
return model.vendor === "Anthropic";
|
|
2283
|
-
}
|
|
2284
|
-
/**
|
|
2285
|
-
* Convert a message to plain text for Anthropic tokenizer
|
|
2286
|
-
*/
|
|
2287
|
-
function messageToText(message) {
|
|
2288
|
-
const parts = [];
|
|
2289
|
-
parts.push(`${message.role}:`);
|
|
2290
|
-
if (typeof message.content === "string") parts.push(message.content);
|
|
2291
|
-
else if (Array.isArray(message.content)) {
|
|
2292
|
-
for (const part of message.content) if ("text" in part && part.text) parts.push(part.text);
|
|
2293
|
-
else if (part.type === "image_url") parts.push("[image]");
|
|
2294
|
-
}
|
|
2295
|
-
if (message.tool_calls) for (const tc of message.tool_calls) parts.push(JSON.stringify(tc));
|
|
2296
|
-
if ("tool_call_id" in message && message.tool_call_id) parts.push(`tool_call_id:${message.tool_call_id}`);
|
|
2297
|
-
return parts.join("\n");
|
|
2298
|
-
}
|
|
2299
|
-
/**
|
|
2300
|
-
* Convert tools to text for Anthropic tokenizer
|
|
2301
|
-
*/
|
|
2302
|
-
function toolsToText(tools) {
|
|
2303
|
-
return tools.map((tool) => JSON.stringify(tool)).join("\n");
|
|
2304
|
-
}
|
|
2305
|
-
/**
|
|
2306
|
-
* Calculate token count using Anthropic's official tokenizer
|
|
2307
|
-
*/
|
|
2308
|
-
function getAnthropicTokenCount(payload) {
|
|
2309
|
-
const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
|
|
2310
|
-
const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
|
|
2311
|
-
const inputText = inputMessages.map((msg) => messageToText(msg)).join("\n\n");
|
|
2312
|
-
const outputText = outputMessages.map((msg) => messageToText(msg)).join("\n\n");
|
|
2313
|
-
let inputTokens = countTokens(inputText);
|
|
2314
|
-
let outputTokens = countTokens(outputText);
|
|
2315
|
-
if (payload.tools && payload.tools.length > 0) {
|
|
2316
|
-
const toolsText = toolsToText(payload.tools);
|
|
2317
|
-
inputTokens += countTokens(toolsText);
|
|
2318
|
-
}
|
|
2319
|
-
inputTokens += inputMessages.length * 3;
|
|
2320
|
-
outputTokens += outputMessages.length * 3;
|
|
2321
|
-
inputTokens += 3;
|
|
2322
|
-
return {
|
|
2323
|
-
input: inputTokens,
|
|
2324
|
-
output: outputTokens
|
|
2325
|
-
};
|
|
2326
|
-
}
|
|
2327
|
-
/**
|
|
2328
2382
|
* Calculate the token count of messages.
|
|
2329
|
-
* Uses
|
|
2330
|
-
*
|
|
2383
|
+
* Uses the tokenizer specified by the GitHub Copilot API model info.
|
|
2384
|
+
* All models (including Claude) use GPT tokenizers (o200k_base or cl100k_base).
|
|
2331
2385
|
*/
|
|
2332
2386
|
const getTokenCount = async (payload, model) => {
|
|
2333
|
-
if (isAnthropicModel(model)) return getAnthropicTokenCount(payload);
|
|
2334
2387
|
const tokenizer = getTokenizerFromModel(model);
|
|
2335
2388
|
const encoder = await getEncodeChatFunction(tokenizer);
|
|
2336
2389
|
const simplifiedMessages = payload.messages;
|
|
@@ -2347,32 +2400,18 @@ const getTokenCount = async (payload, model) => {
|
|
|
2347
2400
|
};
|
|
2348
2401
|
|
|
2349
2402
|
//#endregion
|
|
2350
|
-
//#region src/lib/auto-truncate.ts
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
maxRequestBodyBytes: 510 * 1024
|
|
2354
|
-
};
|
|
2355
|
-
/** Dynamic byte limit that adjusts based on 413 errors */
|
|
2356
|
-
let dynamicByteLimit = null;
|
|
2357
|
-
/**
|
|
2358
|
-
* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
|
|
2359
|
-
*/
|
|
2360
|
-
function onRequestTooLarge(failingBytes) {
|
|
2361
|
-
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2362
|
-
dynamicByteLimit = newLimit;
|
|
2363
|
-
consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
|
|
2364
|
-
}
|
|
2365
|
-
function calculateLimits(model, config) {
|
|
2366
|
-
const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2403
|
+
//#region src/lib/auto-truncate-openai.ts
|
|
2404
|
+
function calculateLimits$1(model, config) {
|
|
2405
|
+
const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2367
2406
|
const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
2368
|
-
const byteLimit =
|
|
2407
|
+
const byteLimit = getEffectiveByteLimitBytes();
|
|
2369
2408
|
return {
|
|
2370
2409
|
tokenLimit,
|
|
2371
2410
|
byteLimit
|
|
2372
2411
|
};
|
|
2373
2412
|
}
|
|
2374
2413
|
/** Estimate tokens for a single message (fast approximation) */
|
|
2375
|
-
function estimateMessageTokens(msg) {
|
|
2414
|
+
function estimateMessageTokens$1(msg) {
|
|
2376
2415
|
let charCount = 0;
|
|
2377
2416
|
if (typeof msg.content === "string") charCount = msg.content.length;
|
|
2378
2417
|
else if (Array.isArray(msg.content)) {
|
|
@@ -2383,7 +2422,7 @@ function estimateMessageTokens(msg) {
|
|
|
2383
2422
|
return Math.ceil(charCount / 4) + 10;
|
|
2384
2423
|
}
|
|
2385
2424
|
/** Get byte size of a message */
|
|
2386
|
-
function getMessageBytes(msg) {
|
|
2425
|
+
function getMessageBytes$1(msg) {
|
|
2387
2426
|
return JSON.stringify(msg).length;
|
|
2388
2427
|
}
|
|
2389
2428
|
/** Extract system/developer messages from the beginning */
|
|
@@ -2405,7 +2444,7 @@ function getToolCallIds(msg) {
|
|
|
2405
2444
|
return [];
|
|
2406
2445
|
}
|
|
2407
2446
|
/** Filter orphaned tool_result messages */
|
|
2408
|
-
function filterOrphanedToolResults(messages) {
|
|
2447
|
+
function filterOrphanedToolResults$1(messages) {
|
|
2409
2448
|
const toolUseIds = /* @__PURE__ */ new Set();
|
|
2410
2449
|
for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
|
|
2411
2450
|
let removedCount = 0;
|
|
@@ -2416,22 +2455,127 @@ function filterOrphanedToolResults(messages) {
|
|
|
2416
2455
|
}
|
|
2417
2456
|
return true;
|
|
2418
2457
|
});
|
|
2419
|
-
if (removedCount > 0) consola.debug(`[AutoTruncate] Filtered ${removedCount} orphaned tool_result`);
|
|
2458
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_result`);
|
|
2420
2459
|
return filtered;
|
|
2421
2460
|
}
|
|
2461
|
+
/** Get tool_result IDs from all tool messages */
|
|
2462
|
+
function getToolResultIds$1(messages) {
|
|
2463
|
+
const ids = /* @__PURE__ */ new Set();
|
|
2464
|
+
for (const msg of messages) if (msg.role === "tool" && msg.tool_call_id) ids.add(msg.tool_call_id);
|
|
2465
|
+
return ids;
|
|
2466
|
+
}
|
|
2467
|
+
/** Filter orphaned tool_use messages (those without matching tool_result) */
|
|
2468
|
+
function filterOrphanedToolUse$1(messages) {
|
|
2469
|
+
const toolResultIds = getToolResultIds$1(messages);
|
|
2470
|
+
const result = [];
|
|
2471
|
+
let removedCount = 0;
|
|
2472
|
+
for (const msg of messages) {
|
|
2473
|
+
if (msg.role === "assistant" && msg.tool_calls) {
|
|
2474
|
+
const filteredToolCalls = msg.tool_calls.filter((tc) => {
|
|
2475
|
+
if (!toolResultIds.has(tc.id)) {
|
|
2476
|
+
removedCount++;
|
|
2477
|
+
return false;
|
|
2478
|
+
}
|
|
2479
|
+
return true;
|
|
2480
|
+
});
|
|
2481
|
+
if (filteredToolCalls.length === 0) {
|
|
2482
|
+
if (msg.content) result.push({
|
|
2483
|
+
...msg,
|
|
2484
|
+
tool_calls: void 0
|
|
2485
|
+
});
|
|
2486
|
+
continue;
|
|
2487
|
+
}
|
|
2488
|
+
result.push({
|
|
2489
|
+
...msg,
|
|
2490
|
+
tool_calls: filteredToolCalls
|
|
2491
|
+
});
|
|
2492
|
+
continue;
|
|
2493
|
+
}
|
|
2494
|
+
result.push(msg);
|
|
2495
|
+
}
|
|
2496
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_use`);
|
|
2497
|
+
return result;
|
|
2498
|
+
}
|
|
2422
2499
|
/** Ensure messages start with a user message */
|
|
2423
|
-
function ensureStartsWithUser(messages) {
|
|
2500
|
+
function ensureStartsWithUser$1(messages) {
|
|
2424
2501
|
let startIndex = 0;
|
|
2425
2502
|
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
2426
|
-
if (startIndex > 0) consola.debug(`[AutoTruncate] Skipped ${startIndex} leading non-user messages`);
|
|
2503
|
+
if (startIndex > 0) consola.debug(`[AutoTruncate:OpenAI] Skipped ${startIndex} leading non-user messages`);
|
|
2427
2504
|
return messages.slice(startIndex);
|
|
2428
2505
|
}
|
|
2506
|
+
/** Threshold for large tool message content (bytes) */
|
|
2507
|
+
const LARGE_TOOL_RESULT_THRESHOLD$1 = 1e4;
|
|
2508
|
+
/** Maximum length for compressed tool_result summary */
|
|
2509
|
+
const COMPRESSED_SUMMARY_LENGTH$1 = 500;
|
|
2510
|
+
/**
|
|
2511
|
+
* Compress a large tool message content to a summary.
|
|
2512
|
+
* Keeps the first and last portions with a note about truncation.
|
|
2513
|
+
*/
|
|
2514
|
+
function compressToolResultContent$1(content) {
|
|
2515
|
+
if (content.length <= LARGE_TOOL_RESULT_THRESHOLD$1) return content;
|
|
2516
|
+
const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH$1 / 2);
|
|
2517
|
+
const start$1 = content.slice(0, halfLen);
|
|
2518
|
+
const end = content.slice(-halfLen);
|
|
2519
|
+
const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH$1;
|
|
2520
|
+
return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
|
|
2521
|
+
}
|
|
2522
|
+
/**
|
|
2523
|
+
* Smart compression strategy for OpenAI format:
|
|
2524
|
+
* 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
|
|
2525
|
+
* 2. Messages before that threshold get their tool content compressed
|
|
2526
|
+
* 3. Returns compressed messages and stats
|
|
2527
|
+
*
|
|
2528
|
+
* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
|
|
2529
|
+
*/
|
|
2530
|
+
function smartCompressToolResults$1(messages, tokenLimit, byteLimit, preservePercent) {
|
|
2531
|
+
const n = messages.length;
|
|
2532
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
2533
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
2534
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
2535
|
+
const msg = messages[i];
|
|
2536
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
|
|
2537
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
|
|
2538
|
+
}
|
|
2539
|
+
const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
|
|
2540
|
+
const preserveByteLimit = Math.floor(byteLimit * preservePercent);
|
|
2541
|
+
let thresholdIndex = n;
|
|
2542
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
2543
|
+
if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
|
|
2544
|
+
thresholdIndex = i + 1;
|
|
2545
|
+
break;
|
|
2546
|
+
}
|
|
2547
|
+
thresholdIndex = i;
|
|
2548
|
+
}
|
|
2549
|
+
if (thresholdIndex >= n) return {
|
|
2550
|
+
messages,
|
|
2551
|
+
compressedCount: 0,
|
|
2552
|
+
compressThresholdIndex: n
|
|
2553
|
+
};
|
|
2554
|
+
const result = [];
|
|
2555
|
+
let compressedCount = 0;
|
|
2556
|
+
for (const [i, msg] of messages.entries()) {
|
|
2557
|
+
if (i < thresholdIndex && msg.role === "tool" && typeof msg.content === "string" && msg.content.length > LARGE_TOOL_RESULT_THRESHOLD$1) {
|
|
2558
|
+
compressedCount++;
|
|
2559
|
+
result.push({
|
|
2560
|
+
...msg,
|
|
2561
|
+
content: compressToolResultContent$1(msg.content)
|
|
2562
|
+
});
|
|
2563
|
+
continue;
|
|
2564
|
+
}
|
|
2565
|
+
result.push(msg);
|
|
2566
|
+
}
|
|
2567
|
+
return {
|
|
2568
|
+
messages: result,
|
|
2569
|
+
compressedCount,
|
|
2570
|
+
compressThresholdIndex: thresholdIndex
|
|
2571
|
+
};
|
|
2572
|
+
}
|
|
2429
2573
|
/**
|
|
2430
2574
|
* Find the optimal index from which to preserve messages.
|
|
2431
2575
|
* Uses binary search with pre-calculated cumulative sums.
|
|
2432
2576
|
* Returns the smallest index where the preserved portion fits within limits.
|
|
2433
2577
|
*/
|
|
2434
|
-
function findOptimalPreserveIndex(params) {
|
|
2578
|
+
function findOptimalPreserveIndex$1(params) {
|
|
2435
2579
|
const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
|
|
2436
2580
|
if (messages.length === 0) return 0;
|
|
2437
2581
|
const markerBytes = 200;
|
|
@@ -2443,8 +2587,8 @@ function findOptimalPreserveIndex(params) {
|
|
|
2443
2587
|
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
2444
2588
|
for (let i = n - 1; i >= 0; i--) {
|
|
2445
2589
|
const msg = messages[i];
|
|
2446
|
-
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
2447
|
-
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
2590
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
|
|
2591
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
|
|
2448
2592
|
}
|
|
2449
2593
|
let left = 0;
|
|
2450
2594
|
let right = n;
|
|
@@ -2458,12 +2602,12 @@ function findOptimalPreserveIndex(params) {
|
|
|
2458
2602
|
/**
|
|
2459
2603
|
* Check if payload needs compaction based on model limits or byte size.
|
|
2460
2604
|
*/
|
|
2461
|
-
async function
|
|
2605
|
+
async function checkNeedsCompactionOpenAI(payload, model, config = {}) {
|
|
2462
2606
|
const cfg = {
|
|
2463
|
-
...
|
|
2607
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
2464
2608
|
...config
|
|
2465
2609
|
};
|
|
2466
|
-
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2610
|
+
const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
|
|
2467
2611
|
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2468
2612
|
const currentBytes = JSON.stringify(payload).length;
|
|
2469
2613
|
const exceedsTokens = currentTokens > tokenLimit;
|
|
@@ -2481,23 +2625,90 @@ async function checkNeedsCompaction(payload, model, config = {}) {
|
|
|
2481
2625
|
reason
|
|
2482
2626
|
};
|
|
2483
2627
|
}
|
|
2484
|
-
/**
|
|
2485
|
-
|
|
2628
|
+
/**
|
|
2629
|
+
* Generate a summary of removed messages for context.
|
|
2630
|
+
* Extracts key information like tool calls and topics.
|
|
2631
|
+
*/
|
|
2632
|
+
function generateRemovedMessagesSummary$1(removedMessages) {
|
|
2633
|
+
const toolCalls = [];
|
|
2634
|
+
let userMessageCount = 0;
|
|
2635
|
+
let assistantMessageCount = 0;
|
|
2636
|
+
for (const msg of removedMessages) {
|
|
2637
|
+
if (msg.role === "user") userMessageCount++;
|
|
2638
|
+
else if (msg.role === "assistant") assistantMessageCount++;
|
|
2639
|
+
if (msg.tool_calls) {
|
|
2640
|
+
for (const tc of msg.tool_calls) if (tc.function.name) toolCalls.push(tc.function.name);
|
|
2641
|
+
}
|
|
2642
|
+
}
|
|
2643
|
+
const parts = [];
|
|
2644
|
+
if (userMessageCount > 0 || assistantMessageCount > 0) {
|
|
2645
|
+
const breakdown = [];
|
|
2646
|
+
if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
|
|
2647
|
+
if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
|
|
2648
|
+
parts.push(`Messages: ${breakdown.join(", ")}`);
|
|
2649
|
+
}
|
|
2650
|
+
if (toolCalls.length > 0) {
|
|
2651
|
+
const uniqueTools = [...new Set(toolCalls)];
|
|
2652
|
+
const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
|
|
2653
|
+
parts.push(`Tools used: ${displayTools.join(", ")}`);
|
|
2654
|
+
}
|
|
2655
|
+
return parts.join(". ");
|
|
2656
|
+
}
|
|
2657
|
+
/**
|
|
2658
|
+
* Add a compression notice to the system message.
|
|
2659
|
+
* Informs the model that some tool content has been compressed.
|
|
2660
|
+
*/
|
|
2661
|
+
function addCompressionNotice$1(payload, compressedCount) {
|
|
2662
|
+
const notice = `\n\n[CONTEXT NOTE]\n${compressedCount} large tool results have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]`;
|
|
2663
|
+
const messages = [...payload.messages];
|
|
2664
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
2665
|
+
const msg = messages[i];
|
|
2666
|
+
if (msg.role === "system" || msg.role === "developer") {
|
|
2667
|
+
if (typeof msg.content === "string") messages[i] = {
|
|
2668
|
+
...msg,
|
|
2669
|
+
content: msg.content + notice
|
|
2670
|
+
};
|
|
2671
|
+
break;
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
return {
|
|
2675
|
+
...payload,
|
|
2676
|
+
messages
|
|
2677
|
+
};
|
|
2678
|
+
}
|
|
2679
|
+
/**
|
|
2680
|
+
* Create truncation context to append to system messages.
|
|
2681
|
+
*/
|
|
2682
|
+
function createTruncationSystemContext$1(removedCount, compressedCount, summary) {
|
|
2683
|
+
let context = `\n\n[CONVERSATION CONTEXT]\n`;
|
|
2684
|
+
if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
|
|
2685
|
+
if (compressedCount > 0) context += `${compressedCount} large tool results have been compressed.\n`;
|
|
2686
|
+
if (summary) context += `Summary of removed content: ${summary}\n`;
|
|
2687
|
+
context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]";
|
|
2688
|
+
return context;
|
|
2689
|
+
}
|
|
2690
|
+
/** Create a truncation marker message (fallback when no system message) */
|
|
2691
|
+
function createTruncationMarker$2(removedCount, compressedCount, summary) {
|
|
2692
|
+
const parts = [];
|
|
2693
|
+
if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
|
|
2694
|
+
if (compressedCount > 0) parts.push(`${compressedCount} tool results compressed`);
|
|
2695
|
+
let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
|
|
2696
|
+
if (summary) content += `\n[Summary: ${summary}]`;
|
|
2486
2697
|
return {
|
|
2487
2698
|
role: "user",
|
|
2488
|
-
content
|
|
2699
|
+
content
|
|
2489
2700
|
};
|
|
2490
2701
|
}
|
|
2491
2702
|
/**
|
|
2492
2703
|
* Perform auto-truncation on a payload that exceeds limits.
|
|
2493
2704
|
* Uses binary search to find the optimal truncation point.
|
|
2494
2705
|
*/
|
|
2495
|
-
async function
|
|
2706
|
+
async function autoTruncateOpenAI(payload, model, config = {}) {
|
|
2496
2707
|
const cfg = {
|
|
2497
|
-
...
|
|
2708
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
2498
2709
|
...config
|
|
2499
2710
|
};
|
|
2500
|
-
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
2711
|
+
const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
|
|
2501
2712
|
const originalBytes = JSON.stringify(payload).length;
|
|
2502
2713
|
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
2503
2714
|
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
@@ -2509,18 +2720,44 @@ async function autoTruncate(payload, model, config = {}) {
|
|
|
2509
2720
|
};
|
|
2510
2721
|
const exceedsTokens = originalTokens > tokenLimit;
|
|
2511
2722
|
const exceedsBytes = originalBytes > byteLimit;
|
|
2512
|
-
let
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2723
|
+
let workingMessages = payload.messages;
|
|
2724
|
+
let compressedCount = 0;
|
|
2725
|
+
if (state.compressToolResults) {
|
|
2726
|
+
const compressionResult = smartCompressToolResults$1(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
|
|
2727
|
+
workingMessages = compressionResult.messages;
|
|
2728
|
+
compressedCount = compressionResult.compressedCount;
|
|
2729
|
+
const compressedPayload = {
|
|
2730
|
+
...payload,
|
|
2731
|
+
messages: workingMessages
|
|
2732
|
+
};
|
|
2733
|
+
const compressedBytes = JSON.stringify(compressedPayload).length;
|
|
2734
|
+
const compressedTokenCount = await getTokenCount(compressedPayload, model);
|
|
2735
|
+
if (compressedTokenCount.input <= tokenLimit && compressedBytes <= byteLimit) {
|
|
2736
|
+
let reason$1 = "tokens";
|
|
2737
|
+
if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
|
|
2738
|
+
else if (exceedsBytes) reason$1 = "size";
|
|
2739
|
+
consola.info(`[AutoTruncate:OpenAI] ${reason$1}: ${originalTokens}→${compressedTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
|
|
2740
|
+
const noticePayload = addCompressionNotice$1(compressedPayload, compressedCount);
|
|
2741
|
+
const noticeTokenCount = await getTokenCount(noticePayload, model);
|
|
2742
|
+
return {
|
|
2743
|
+
payload: noticePayload,
|
|
2744
|
+
wasCompacted: true,
|
|
2745
|
+
originalTokens,
|
|
2746
|
+
compactedTokens: noticeTokenCount.input,
|
|
2747
|
+
removedMessageCount: 0
|
|
2748
|
+
};
|
|
2749
|
+
}
|
|
2750
|
+
}
|
|
2751
|
+
const { systemMessages, conversationMessages } = extractSystemMessages(workingMessages);
|
|
2752
|
+
const messagesJson = JSON.stringify(workingMessages);
|
|
2753
|
+
const payloadOverhead = JSON.stringify({
|
|
2754
|
+
...payload,
|
|
2755
|
+
messages: workingMessages
|
|
2756
|
+
}).length - messagesJson.length;
|
|
2757
|
+
const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes$1(m) + 1, 0);
|
|
2758
|
+
const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens$1(m), 0);
|
|
2759
|
+
consola.debug(`[AutoTruncate:OpenAI] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
|
|
2760
|
+
const preserveIndex = findOptimalPreserveIndex$1({
|
|
2524
2761
|
messages: conversationMessages,
|
|
2525
2762
|
systemBytes,
|
|
2526
2763
|
systemTokens,
|
|
@@ -2529,7 +2766,7 @@ async function autoTruncate(payload, model, config = {}) {
|
|
|
2529
2766
|
byteLimit
|
|
2530
2767
|
});
|
|
2531
2768
|
if (preserveIndex === 0) {
|
|
2532
|
-
consola.warn("[AutoTruncate] Cannot truncate, system messages too large");
|
|
2769
|
+
consola.warn("[AutoTruncate:OpenAI] Cannot truncate, system messages too large");
|
|
2533
2770
|
return {
|
|
2534
2771
|
payload,
|
|
2535
2772
|
wasCompacted: false,
|
|
@@ -2539,7 +2776,7 @@ async function autoTruncate(payload, model, config = {}) {
|
|
|
2539
2776
|
};
|
|
2540
2777
|
}
|
|
2541
2778
|
if (preserveIndex >= conversationMessages.length) {
|
|
2542
|
-
consola.warn("[AutoTruncate] Would need to remove all messages");
|
|
2779
|
+
consola.warn("[AutoTruncate:OpenAI] Would need to remove all messages");
|
|
2543
2780
|
return {
|
|
2544
2781
|
payload,
|
|
2545
2782
|
wasCompacted: false,
|
|
@@ -2549,11 +2786,13 @@ async function autoTruncate(payload, model, config = {}) {
|
|
|
2549
2786
|
};
|
|
2550
2787
|
}
|
|
2551
2788
|
let preserved = conversationMessages.slice(preserveIndex);
|
|
2552
|
-
preserved = filterOrphanedToolResults(preserved);
|
|
2553
|
-
preserved =
|
|
2554
|
-
preserved =
|
|
2789
|
+
preserved = filterOrphanedToolResults$1(preserved);
|
|
2790
|
+
preserved = filterOrphanedToolUse$1(preserved);
|
|
2791
|
+
preserved = ensureStartsWithUser$1(preserved);
|
|
2792
|
+
preserved = filterOrphanedToolResults$1(preserved);
|
|
2793
|
+
preserved = filterOrphanedToolUse$1(preserved);
|
|
2555
2794
|
if (preserved.length === 0) {
|
|
2556
|
-
consola.warn("[AutoTruncate] All messages filtered out after cleanup");
|
|
2795
|
+
consola.warn("[AutoTruncate:OpenAI] All messages filtered out after cleanup");
|
|
2557
2796
|
return {
|
|
2558
2797
|
payload,
|
|
2559
2798
|
wasCompacted: false,
|
|
@@ -2562,20 +2801,36 @@ async function autoTruncate(payload, model, config = {}) {
|
|
|
2562
2801
|
removedMessageCount: 0
|
|
2563
2802
|
};
|
|
2564
2803
|
}
|
|
2804
|
+
const removedMessages = conversationMessages.slice(0, preserveIndex);
|
|
2565
2805
|
const removedCount = conversationMessages.length - preserved.length;
|
|
2566
|
-
const
|
|
2806
|
+
const summary = generateRemovedMessagesSummary$1(removedMessages);
|
|
2807
|
+
let newSystemMessages = systemMessages;
|
|
2808
|
+
let newMessages = preserved;
|
|
2809
|
+
if (systemMessages.length > 0) {
|
|
2810
|
+
const truncationContext = createTruncationSystemContext$1(removedCount, compressedCount, summary);
|
|
2811
|
+
const lastSystemIdx = systemMessages.length - 1;
|
|
2812
|
+
const lastSystem = systemMessages[lastSystemIdx];
|
|
2813
|
+
const updatedSystem = {
|
|
2814
|
+
...lastSystem,
|
|
2815
|
+
content: typeof lastSystem.content === "string" ? lastSystem.content + truncationContext : lastSystem.content
|
|
2816
|
+
};
|
|
2817
|
+
newSystemMessages = [...systemMessages.slice(0, lastSystemIdx), updatedSystem];
|
|
2818
|
+
} else newMessages = [createTruncationMarker$2(removedCount, compressedCount, summary), ...preserved];
|
|
2567
2819
|
const newPayload = {
|
|
2568
2820
|
...payload,
|
|
2569
|
-
messages: [
|
|
2570
|
-
...systemMessages,
|
|
2571
|
-
marker,
|
|
2572
|
-
...preserved
|
|
2573
|
-
]
|
|
2821
|
+
messages: [...newSystemMessages, ...newMessages]
|
|
2574
2822
|
};
|
|
2575
2823
|
const newBytes = JSON.stringify(newPayload).length;
|
|
2576
2824
|
const newTokenCount = await getTokenCount(newPayload, model);
|
|
2577
|
-
|
|
2578
|
-
if (
|
|
2825
|
+
let reason = "tokens";
|
|
2826
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
2827
|
+
else if (exceedsBytes) reason = "size";
|
|
2828
|
+
const actions = [];
|
|
2829
|
+
if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
|
|
2830
|
+
if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
|
|
2831
|
+
const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
|
|
2832
|
+
consola.info(`[AutoTruncate:OpenAI] ${reason}: ${originalTokens}→${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
|
|
2833
|
+
if (newBytes > byteLimit) consola.warn(`[AutoTruncate:OpenAI] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
|
|
2579
2834
|
return {
|
|
2580
2835
|
payload: newPayload,
|
|
2581
2836
|
wasCompacted: true,
|
|
@@ -2587,7 +2842,7 @@ async function autoTruncate(payload, model, config = {}) {
|
|
|
2587
2842
|
/**
|
|
2588
2843
|
* Create a marker to prepend to responses indicating auto-truncation occurred.
|
|
2589
2844
|
*/
|
|
2590
|
-
function
|
|
2845
|
+
function createTruncationResponseMarkerOpenAI(result) {
|
|
2591
2846
|
if (!result.wasCompacted) return "";
|
|
2592
2847
|
const reduction = result.originalTokens - result.compactedTokens;
|
|
2593
2848
|
const percentage = Math.round(reduction / result.originalTokens * 100);
|
|
@@ -2611,7 +2866,7 @@ const createChatCompletions = async (payload) => {
|
|
|
2611
2866
|
});
|
|
2612
2867
|
if (!response.ok) {
|
|
2613
2868
|
consola.error("Failed to create chat completions", response);
|
|
2614
|
-
throw await HTTPError.fromResponse("Failed to create chat completions", response);
|
|
2869
|
+
throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
|
|
2615
2870
|
}
|
|
2616
2871
|
if (payload.stream) return events(response);
|
|
2617
2872
|
return await response.json();
|
|
@@ -2661,6 +2916,18 @@ function failTracking(trackingId, error) {
|
|
|
2661
2916
|
if (!trackingId) return;
|
|
2662
2917
|
requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
|
|
2663
2918
|
}
|
|
2919
|
+
/**
|
|
2920
|
+
* Create a marker to prepend to responses indicating auto-truncation occurred.
|
|
2921
|
+
* Works with both OpenAI and Anthropic truncate results.
|
|
2922
|
+
*/
|
|
2923
|
+
function createTruncationMarker(result) {
|
|
2924
|
+
if (!result.wasCompacted) return "";
|
|
2925
|
+
const { originalTokens, compactedTokens, removedMessageCount } = result;
|
|
2926
|
+
if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
|
|
2927
|
+
const reduction = originalTokens - compactedTokens;
|
|
2928
|
+
const percentage = Math.round(reduction / originalTokens * 100);
|
|
2929
|
+
return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2930
|
+
}
|
|
2664
2931
|
/** Record streaming error to history (works with any accumulator type) */
|
|
2665
2932
|
function recordStreamError(opts) {
|
|
2666
2933
|
const { acc, fallbackModel, ctx, error } = opts;
|
|
@@ -2689,7 +2956,7 @@ async function buildFinalPayload(payload, model) {
|
|
|
2689
2956
|
};
|
|
2690
2957
|
}
|
|
2691
2958
|
try {
|
|
2692
|
-
const check = await
|
|
2959
|
+
const check = await checkNeedsCompactionOpenAI(payload, model);
|
|
2693
2960
|
consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
2694
2961
|
if (!check.needed) return {
|
|
2695
2962
|
finalPayload: payload,
|
|
@@ -2700,7 +2967,7 @@ async function buildFinalPayload(payload, model) {
|
|
|
2700
2967
|
else if (check.reason === "bytes") reasonText = "size";
|
|
2701
2968
|
else reasonText = "tokens";
|
|
2702
2969
|
consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
|
|
2703
|
-
const truncateResult = await
|
|
2970
|
+
const truncateResult = await autoTruncateOpenAI(payload, model);
|
|
2704
2971
|
return {
|
|
2705
2972
|
finalPayload: truncateResult.payload,
|
|
2706
2973
|
truncateResult
|
|
@@ -2840,7 +3107,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
|
2840
3107
|
consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
|
|
2841
3108
|
let response = originalResponse;
|
|
2842
3109
|
if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
|
|
2843
|
-
const marker =
|
|
3110
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
2844
3111
|
response = {
|
|
2845
3112
|
...response,
|
|
2846
3113
|
choices: response.choices.map((choice$1, i) => i === 0 ? {
|
|
@@ -2909,7 +3176,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2909
3176
|
const acc = createStreamAccumulator();
|
|
2910
3177
|
try {
|
|
2911
3178
|
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
2912
|
-
const marker =
|
|
3179
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
2913
3180
|
const markerChunk = {
|
|
2914
3181
|
id: `compact-marker-${Date.now()}`,
|
|
2915
3182
|
object: "chat.completion.chunk",
|
|
@@ -4192,97 +4459,699 @@ historyRoutes.get("/", (c) => {
|
|
|
4192
4459
|
});
|
|
4193
4460
|
|
|
4194
4461
|
//#endregion
|
|
4195
|
-
//#region src/
|
|
4196
|
-
function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
4197
|
-
if (finishReason === null) return null;
|
|
4198
|
-
return {
|
|
4199
|
-
stop: "end_turn",
|
|
4200
|
-
length: "max_tokens",
|
|
4201
|
-
tool_calls: "tool_use",
|
|
4202
|
-
content_filter: "end_turn"
|
|
4203
|
-
}[finishReason];
|
|
4204
|
-
}
|
|
4205
|
-
|
|
4206
|
-
//#endregion
|
|
4207
|
-
//#region src/routes/messages/non-stream-translation.ts
|
|
4208
|
-
const OPENAI_TOOL_NAME_LIMIT = 64;
|
|
4462
|
+
//#region src/lib/auto-truncate-anthropic.ts
|
|
4209
4463
|
/**
|
|
4210
|
-
*
|
|
4211
|
-
* This handles edge cases where conversation history may be incomplete:
|
|
4212
|
-
* - Session interruptions where tool execution was cut off
|
|
4213
|
-
* - Previous request failures
|
|
4214
|
-
* - Client sending truncated history
|
|
4215
|
-
*
|
|
4216
|
-
* Adding placeholder responses prevents API errors and maintains protocol compliance.
|
|
4464
|
+
* Convert Anthropic message content to text for token counting.
|
|
4217
4465
|
*/
|
|
4218
|
-
function
|
|
4219
|
-
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
|
|
4223
|
-
|
|
4224
|
-
|
|
4225
|
-
|
|
4226
|
-
|
|
4227
|
-
|
|
4228
|
-
|
|
4229
|
-
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
|
|
4233
|
-
fixedMessages.push({
|
|
4234
|
-
role: "tool",
|
|
4235
|
-
tool_call_id: toolCall.id,
|
|
4236
|
-
content: "Tool execution was interrupted or failed."
|
|
4237
|
-
});
|
|
4466
|
+
function contentToText(content) {
|
|
4467
|
+
if (typeof content === "string") return content;
|
|
4468
|
+
const parts = [];
|
|
4469
|
+
for (const block of content) switch (block.type) {
|
|
4470
|
+
case "text":
|
|
4471
|
+
parts.push(block.text);
|
|
4472
|
+
break;
|
|
4473
|
+
case "tool_use":
|
|
4474
|
+
parts.push(`[tool_use: ${block.name}]`, JSON.stringify(block.input));
|
|
4475
|
+
break;
|
|
4476
|
+
case "tool_result":
|
|
4477
|
+
if (typeof block.content === "string") parts.push(block.content);
|
|
4478
|
+
else if (Array.isArray(block.content)) {
|
|
4479
|
+
for (const inner of block.content) if (inner.type === "text") parts.push(inner.text);
|
|
4238
4480
|
}
|
|
4239
|
-
|
|
4481
|
+
break;
|
|
4482
|
+
case "thinking":
|
|
4483
|
+
parts.push(block.thinking);
|
|
4484
|
+
break;
|
|
4485
|
+
default: break;
|
|
4240
4486
|
}
|
|
4241
|
-
return
|
|
4487
|
+
return parts.join("\n");
|
|
4242
4488
|
}
|
|
4243
|
-
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
|
|
4250
|
-
|
|
4251
|
-
model: translateModelName(payload.model),
|
|
4252
|
-
messages: fixMessageSequence(messages),
|
|
4253
|
-
max_tokens: payload.max_tokens,
|
|
4254
|
-
stop: payload.stop_sequences,
|
|
4255
|
-
stream: payload.stream,
|
|
4256
|
-
temperature: payload.temperature,
|
|
4257
|
-
top_p: payload.top_p,
|
|
4258
|
-
user: payload.metadata?.user_id,
|
|
4259
|
-
tools: translateAnthropicToolsToOpenAI(payload.tools, toolNameMapping),
|
|
4260
|
-
tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice, toolNameMapping)
|
|
4261
|
-
},
|
|
4262
|
-
toolNameMapping
|
|
4263
|
-
};
|
|
4489
|
+
/**
|
|
4490
|
+
* Estimate tokens for a message (fast, synchronous).
|
|
4491
|
+
* Uses ~4 chars per token approximation for internal calculations.
|
|
4492
|
+
* The final result is verified with the accurate tokenizer.
|
|
4493
|
+
*/
|
|
4494
|
+
function estimateMessageTokens(msg) {
|
|
4495
|
+
const text = contentToText(msg.content);
|
|
4496
|
+
return Math.ceil(text.length / 4) + 4;
|
|
4264
4497
|
}
|
|
4265
|
-
|
|
4266
|
-
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
|
|
4270
|
-
|
|
4271
|
-
if (shortNameMap[model]) return shortNameMap[model];
|
|
4272
|
-
if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
|
|
4273
|
-
if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
|
|
4274
|
-
if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
|
|
4275
|
-
if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
|
|
4276
|
-
if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
|
|
4277
|
-
if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
|
|
4278
|
-
return model;
|
|
4498
|
+
/**
|
|
4499
|
+
* Count tokens for an Anthropic message using the model's tokenizer.
|
|
4500
|
+
*/
|
|
4501
|
+
async function countMessageTokens(msg, model) {
|
|
4502
|
+
const text = contentToText(msg.content);
|
|
4503
|
+
return await countTextTokens(text, model) + 4;
|
|
4279
4504
|
}
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4505
|
+
/**
|
|
4506
|
+
* Count tokens for system prompt.
|
|
4507
|
+
*/
|
|
4508
|
+
async function countSystemTokens(system, model) {
|
|
4509
|
+
if (!system) return 0;
|
|
4510
|
+
if (typeof system === "string") return await countTextTokens(system, model) + 4;
|
|
4511
|
+
const text = system.map((block) => block.text).join("\n");
|
|
4512
|
+
return await countTextTokens(text, model) + 4;
|
|
4284
4513
|
}
|
|
4285
|
-
|
|
4514
|
+
/**
|
|
4515
|
+
* Count total tokens for the payload using the model's tokenizer.
|
|
4516
|
+
*/
|
|
4517
|
+
async function countTotalTokens(payload, model) {
|
|
4518
|
+
let total = await countSystemTokens(payload.system, model);
|
|
4519
|
+
for (const msg of payload.messages) total += await countMessageTokens(msg, model);
|
|
4520
|
+
if (payload.tools) {
|
|
4521
|
+
const toolsText = JSON.stringify(payload.tools);
|
|
4522
|
+
total += await countTextTokens(toolsText, model);
|
|
4523
|
+
}
|
|
4524
|
+
return total;
|
|
4525
|
+
}
|
|
4526
|
+
function getMessageBytes(msg) {
|
|
4527
|
+
return JSON.stringify(msg).length;
|
|
4528
|
+
}
|
|
4529
|
+
/**
|
|
4530
|
+
* Get tool_use IDs from an assistant message.
|
|
4531
|
+
*/
|
|
4532
|
+
function getToolUseIds(msg) {
|
|
4533
|
+
if (msg.role !== "assistant") return [];
|
|
4534
|
+
if (typeof msg.content === "string") return [];
|
|
4535
|
+
const ids = [];
|
|
4536
|
+
for (const block of msg.content) if (block.type === "tool_use") ids.push(block.id);
|
|
4537
|
+
return ids;
|
|
4538
|
+
}
|
|
4539
|
+
/**
|
|
4540
|
+
* Get tool_result IDs from a user message.
|
|
4541
|
+
*/
|
|
4542
|
+
function getToolResultIds(msg) {
|
|
4543
|
+
if (msg.role !== "user") return [];
|
|
4544
|
+
if (typeof msg.content === "string") return [];
|
|
4545
|
+
const ids = [];
|
|
4546
|
+
for (const block of msg.content) if (block.type === "tool_result") ids.push(block.tool_use_id);
|
|
4547
|
+
return ids;
|
|
4548
|
+
}
|
|
4549
|
+
/**
|
|
4550
|
+
* Filter orphaned tool_result messages (those without matching tool_use).
|
|
4551
|
+
*/
|
|
4552
|
+
function filterOrphanedToolResults(messages) {
|
|
4553
|
+
const toolUseIds = /* @__PURE__ */ new Set();
|
|
4554
|
+
for (const msg of messages) for (const id of getToolUseIds(msg)) toolUseIds.add(id);
|
|
4555
|
+
const result = [];
|
|
4556
|
+
let removedCount = 0;
|
|
4557
|
+
for (const msg of messages) {
|
|
4558
|
+
if (msg.role === "user" && typeof msg.content !== "string") {
|
|
4559
|
+
if (getToolResultIds(msg).some((id) => !toolUseIds.has(id))) {
|
|
4560
|
+
const filteredContent = msg.content.filter((block) => {
|
|
4561
|
+
if (block.type === "tool_result" && !toolUseIds.has(block.tool_use_id)) {
|
|
4562
|
+
removedCount++;
|
|
4563
|
+
return false;
|
|
4564
|
+
}
|
|
4565
|
+
return true;
|
|
4566
|
+
});
|
|
4567
|
+
if (filteredContent.length === 0) continue;
|
|
4568
|
+
result.push({
|
|
4569
|
+
...msg,
|
|
4570
|
+
content: filteredContent
|
|
4571
|
+
});
|
|
4572
|
+
continue;
|
|
4573
|
+
}
|
|
4574
|
+
}
|
|
4575
|
+
result.push(msg);
|
|
4576
|
+
}
|
|
4577
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_result`);
|
|
4578
|
+
return result;
|
|
4579
|
+
}
|
|
4580
|
+
/**
|
|
4581
|
+
* Filter orphaned tool_use messages (those without matching tool_result).
|
|
4582
|
+
* In Anthropic API, every tool_use must have a corresponding tool_result.
|
|
4583
|
+
*/
|
|
4584
|
+
function filterOrphanedToolUse(messages) {
|
|
4585
|
+
const toolResultIds = /* @__PURE__ */ new Set();
|
|
4586
|
+
for (const msg of messages) for (const id of getToolResultIds(msg)) toolResultIds.add(id);
|
|
4587
|
+
const result = [];
|
|
4588
|
+
let removedCount = 0;
|
|
4589
|
+
for (const msg of messages) {
|
|
4590
|
+
if (msg.role === "assistant" && typeof msg.content !== "string") {
|
|
4591
|
+
if (getToolUseIds(msg).some((id) => !toolResultIds.has(id))) {
|
|
4592
|
+
const filteredContent = msg.content.filter((block) => {
|
|
4593
|
+
if (block.type === "tool_use" && !toolResultIds.has(block.id)) {
|
|
4594
|
+
removedCount++;
|
|
4595
|
+
return false;
|
|
4596
|
+
}
|
|
4597
|
+
return true;
|
|
4598
|
+
});
|
|
4599
|
+
if (filteredContent.length === 0) continue;
|
|
4600
|
+
result.push({
|
|
4601
|
+
...msg,
|
|
4602
|
+
content: filteredContent
|
|
4603
|
+
});
|
|
4604
|
+
continue;
|
|
4605
|
+
}
|
|
4606
|
+
}
|
|
4607
|
+
result.push(msg);
|
|
4608
|
+
}
|
|
4609
|
+
if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_use`);
|
|
4610
|
+
return result;
|
|
4611
|
+
}
|
|
4612
|
+
/**
|
|
4613
|
+
* Ensure messages start with a user message.
|
|
4614
|
+
*/
|
|
4615
|
+
function ensureStartsWithUser(messages) {
|
|
4616
|
+
let startIndex = 0;
|
|
4617
|
+
while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
|
|
4618
|
+
if (startIndex > 0) consola.debug(`[AutoTruncate:Anthropic] Skipped ${startIndex} leading non-user messages`);
|
|
4619
|
+
return messages.slice(startIndex);
|
|
4620
|
+
}
|
|
4621
|
+
/** Threshold for large tool_result content (bytes) */
|
|
4622
|
+
const LARGE_TOOL_RESULT_THRESHOLD = 1e4;
|
|
4623
|
+
/** Maximum length for compressed tool_result summary */
|
|
4624
|
+
const COMPRESSED_SUMMARY_LENGTH = 500;
|
|
4625
|
+
/**
|
|
4626
|
+
* Compress a large tool_result content to a summary.
|
|
4627
|
+
* Keeps the first and last portions with a note about truncation.
|
|
4628
|
+
*/
|
|
4629
|
+
function compressToolResultContent(content) {
|
|
4630
|
+
if (content.length <= LARGE_TOOL_RESULT_THRESHOLD) return content;
|
|
4631
|
+
const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH / 2);
|
|
4632
|
+
const start$1 = content.slice(0, halfLen);
|
|
4633
|
+
const end = content.slice(-halfLen);
|
|
4634
|
+
const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH;
|
|
4635
|
+
return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
|
|
4636
|
+
}
|
|
4637
|
+
/**
|
|
4638
|
+
* Compress a tool_result block in an Anthropic message.
|
|
4639
|
+
*/
|
|
4640
|
+
function compressToolResultBlock(block) {
|
|
4641
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) return {
|
|
4642
|
+
...block,
|
|
4643
|
+
content: compressToolResultContent(block.content)
|
|
4644
|
+
};
|
|
4645
|
+
return block;
|
|
4646
|
+
}
|
|
4647
|
+
/**
|
|
4648
|
+
* Smart compression strategy:
|
|
4649
|
+
* 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
|
|
4650
|
+
* 2. Messages before that threshold get their tool_results compressed
|
|
4651
|
+
* 3. Returns compressed messages and stats
|
|
4652
|
+
*
|
|
4653
|
+
* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
|
|
4654
|
+
*/
|
|
4655
|
+
function smartCompressToolResults(messages, tokenLimit, byteLimit, preservePercent) {
|
|
4656
|
+
const n = messages.length;
|
|
4657
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
4658
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
4659
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
4660
|
+
const msg = messages[i];
|
|
4661
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
4662
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
4663
|
+
}
|
|
4664
|
+
const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
|
|
4665
|
+
const preserveByteLimit = Math.floor(byteLimit * preservePercent);
|
|
4666
|
+
let thresholdIndex = n;
|
|
4667
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
4668
|
+
if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
|
|
4669
|
+
thresholdIndex = i + 1;
|
|
4670
|
+
break;
|
|
4671
|
+
}
|
|
4672
|
+
thresholdIndex = i;
|
|
4673
|
+
}
|
|
4674
|
+
if (thresholdIndex >= n) return {
|
|
4675
|
+
messages,
|
|
4676
|
+
compressedCount: 0,
|
|
4677
|
+
compressThresholdIndex: n
|
|
4678
|
+
};
|
|
4679
|
+
const result = [];
|
|
4680
|
+
let compressedCount = 0;
|
|
4681
|
+
for (const [i, msg] of messages.entries()) {
|
|
4682
|
+
if (i < thresholdIndex && msg.role === "user" && Array.isArray(msg.content)) {
|
|
4683
|
+
if (msg.content.some((block) => block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD)) {
|
|
4684
|
+
const compressedContent = msg.content.map((block) => {
|
|
4685
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) {
|
|
4686
|
+
compressedCount++;
|
|
4687
|
+
return compressToolResultBlock(block);
|
|
4688
|
+
}
|
|
4689
|
+
return block;
|
|
4690
|
+
});
|
|
4691
|
+
result.push({
|
|
4692
|
+
...msg,
|
|
4693
|
+
content: compressedContent
|
|
4694
|
+
});
|
|
4695
|
+
continue;
|
|
4696
|
+
}
|
|
4697
|
+
}
|
|
4698
|
+
result.push(msg);
|
|
4699
|
+
}
|
|
4700
|
+
return {
|
|
4701
|
+
messages: result,
|
|
4702
|
+
compressedCount,
|
|
4703
|
+
compressThresholdIndex: thresholdIndex
|
|
4704
|
+
};
|
|
4705
|
+
}
|
|
4706
|
+
/** Default fallback for when model capabilities are not available */
|
|
4707
|
+
const DEFAULT_CONTEXT_WINDOW = 2e5;
|
|
4708
|
+
function calculateLimits(model, config) {
|
|
4709
|
+
const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW;
|
|
4710
|
+
const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
|
|
4711
|
+
const byteLimit = getEffectiveByteLimitBytes();
|
|
4712
|
+
return {
|
|
4713
|
+
tokenLimit,
|
|
4714
|
+
byteLimit
|
|
4715
|
+
};
|
|
4716
|
+
}
|
|
4717
|
+
function findOptimalPreserveIndex(params) {
|
|
4718
|
+
const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
|
|
4719
|
+
if (messages.length === 0) return 0;
|
|
4720
|
+
const markerBytes = 200;
|
|
4721
|
+
const availableTokens = tokenLimit - systemTokens - 50;
|
|
4722
|
+
const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
|
|
4723
|
+
if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
|
|
4724
|
+
const n = messages.length;
|
|
4725
|
+
const cumTokens = Array.from({ length: n + 1 }, () => 0);
|
|
4726
|
+
const cumBytes = Array.from({ length: n + 1 }, () => 0);
|
|
4727
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
4728
|
+
const msg = messages[i];
|
|
4729
|
+
cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
|
|
4730
|
+
cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
|
|
4731
|
+
}
|
|
4732
|
+
let left = 0;
|
|
4733
|
+
let right = n;
|
|
4734
|
+
while (left < right) {
|
|
4735
|
+
const mid = left + right >>> 1;
|
|
4736
|
+
if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
|
|
4737
|
+
else left = mid + 1;
|
|
4738
|
+
}
|
|
4739
|
+
return left;
|
|
4740
|
+
}
|
|
4741
|
+
/**
|
|
4742
|
+
* Generate a summary of removed messages for context.
|
|
4743
|
+
* Extracts key information like tool calls and topics.
|
|
4744
|
+
*/
|
|
4745
|
+
function generateRemovedMessagesSummary(removedMessages) {
|
|
4746
|
+
const toolCalls = [];
|
|
4747
|
+
let userMessageCount = 0;
|
|
4748
|
+
let assistantMessageCount = 0;
|
|
4749
|
+
for (const msg of removedMessages) {
|
|
4750
|
+
if (msg.role === "user") userMessageCount++;
|
|
4751
|
+
else assistantMessageCount++;
|
|
4752
|
+
if (Array.isArray(msg.content)) {
|
|
4753
|
+
for (const block of msg.content) if (block.type === "tool_use") toolCalls.push(block.name);
|
|
4754
|
+
}
|
|
4755
|
+
}
|
|
4756
|
+
const parts = [];
|
|
4757
|
+
if (userMessageCount > 0 || assistantMessageCount > 0) {
|
|
4758
|
+
const breakdown = [];
|
|
4759
|
+
if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
|
|
4760
|
+
if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
|
|
4761
|
+
parts.push(`Messages: ${breakdown.join(", ")}`);
|
|
4762
|
+
}
|
|
4763
|
+
if (toolCalls.length > 0) {
|
|
4764
|
+
const uniqueTools = [...new Set(toolCalls)];
|
|
4765
|
+
const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
|
|
4766
|
+
parts.push(`Tools used: ${displayTools.join(", ")}`);
|
|
4767
|
+
}
|
|
4768
|
+
return parts.join(". ");
|
|
4769
|
+
}
|
|
4770
|
+
/**
|
|
4771
|
+
* Add a compression notice to the system prompt.
|
|
4772
|
+
* Informs the model that some tool_result content has been compressed.
|
|
4773
|
+
*/
|
|
4774
|
+
function addCompressionNotice(payload, compressedCount) {
|
|
4775
|
+
const notice = `[CONTEXT NOTE]\n${compressedCount} large tool_result blocks have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]\n\n`;
|
|
4776
|
+
let newSystem;
|
|
4777
|
+
if (typeof payload.system === "string") newSystem = notice + payload.system;
|
|
4778
|
+
else if (Array.isArray(payload.system)) newSystem = [{
|
|
4779
|
+
type: "text",
|
|
4780
|
+
text: notice
|
|
4781
|
+
}, ...payload.system];
|
|
4782
|
+
else newSystem = notice;
|
|
4783
|
+
return {
|
|
4784
|
+
...payload,
|
|
4785
|
+
system: newSystem
|
|
4786
|
+
};
|
|
4787
|
+
}
|
|
4788
|
+
/**
|
|
4789
|
+
* Create truncation context to prepend to system prompt.
|
|
4790
|
+
*/
|
|
4791
|
+
function createTruncationSystemContext(removedCount, compressedCount, summary) {
|
|
4792
|
+
let context = `[CONVERSATION CONTEXT]\n`;
|
|
4793
|
+
if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
|
|
4794
|
+
if (compressedCount > 0) context += `${compressedCount} large tool_result blocks have been compressed.\n`;
|
|
4795
|
+
if (summary) context += `Summary of removed content: ${summary}\n`;
|
|
4796
|
+
context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]\n\n";
|
|
4797
|
+
return context;
|
|
4798
|
+
}
|
|
4799
|
+
/**
|
|
4800
|
+
* Create a truncation marker message (fallback when no system prompt).
|
|
4801
|
+
*/
|
|
4802
|
+
function createTruncationMarker$1(removedCount, compressedCount, summary) {
|
|
4803
|
+
const parts = [];
|
|
4804
|
+
if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
|
|
4805
|
+
if (compressedCount > 0) parts.push(`${compressedCount} tool_result blocks compressed`);
|
|
4806
|
+
let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
|
|
4807
|
+
if (summary) content += `\n[Summary: ${summary}]`;
|
|
4808
|
+
return {
|
|
4809
|
+
role: "user",
|
|
4810
|
+
content
|
|
4811
|
+
};
|
|
4812
|
+
}
|
|
4813
|
+
/**
|
|
4814
|
+
* Perform auto-truncation on an Anthropic payload that exceeds limits.
|
|
4815
|
+
*/
|
|
4816
|
+
async function autoTruncateAnthropic(payload, model, config = {}) {
|
|
4817
|
+
const cfg = {
|
|
4818
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
4819
|
+
...config
|
|
4820
|
+
};
|
|
4821
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
4822
|
+
const originalBytes = JSON.stringify(payload).length;
|
|
4823
|
+
const originalTokens = await countTotalTokens(payload, model);
|
|
4824
|
+
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
4825
|
+
payload,
|
|
4826
|
+
wasCompacted: false,
|
|
4827
|
+
originalTokens,
|
|
4828
|
+
compactedTokens: originalTokens,
|
|
4829
|
+
removedMessageCount: 0
|
|
4830
|
+
};
|
|
4831
|
+
const exceedsTokens = originalTokens > tokenLimit;
|
|
4832
|
+
const exceedsBytes = originalBytes > byteLimit;
|
|
4833
|
+
let workingMessages = payload.messages;
|
|
4834
|
+
let compressedCount = 0;
|
|
4835
|
+
if (state.compressToolResults) {
|
|
4836
|
+
const compressionResult = smartCompressToolResults(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
|
|
4837
|
+
workingMessages = compressionResult.messages;
|
|
4838
|
+
compressedCount = compressionResult.compressedCount;
|
|
4839
|
+
const compressedPayload = {
|
|
4840
|
+
...payload,
|
|
4841
|
+
messages: workingMessages
|
|
4842
|
+
};
|
|
4843
|
+
const compressedBytes = JSON.stringify(compressedPayload).length;
|
|
4844
|
+
const compressedTokens = await countTotalTokens(compressedPayload, model);
|
|
4845
|
+
if (compressedTokens <= tokenLimit && compressedBytes <= byteLimit) {
|
|
4846
|
+
let reason$1 = "tokens";
|
|
4847
|
+
if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
|
|
4848
|
+
else if (exceedsBytes) reason$1 = "size";
|
|
4849
|
+
consola.info(`[AutoTruncate:Anthropic] ${reason$1}: ${originalTokens}→${compressedTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
|
|
4850
|
+
const noticePayload = addCompressionNotice(compressedPayload, compressedCount);
|
|
4851
|
+
return {
|
|
4852
|
+
payload: noticePayload,
|
|
4853
|
+
wasCompacted: true,
|
|
4854
|
+
originalTokens,
|
|
4855
|
+
compactedTokens: await countTotalTokens(noticePayload, model),
|
|
4856
|
+
removedMessageCount: 0
|
|
4857
|
+
};
|
|
4858
|
+
}
|
|
4859
|
+
}
|
|
4860
|
+
const systemBytes = payload.system ? JSON.stringify(payload.system).length : 0;
|
|
4861
|
+
const systemTokens = await countSystemTokens(payload.system, model);
|
|
4862
|
+
const messagesJson = JSON.stringify(workingMessages);
|
|
4863
|
+
const payloadOverhead = JSON.stringify({
|
|
4864
|
+
...payload,
|
|
4865
|
+
messages: workingMessages
|
|
4866
|
+
}).length - messagesJson.length;
|
|
4867
|
+
consola.debug(`[AutoTruncate:Anthropic] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${Math.round(systemBytes / 1024)}KB`);
|
|
4868
|
+
const preserveIndex = findOptimalPreserveIndex({
|
|
4869
|
+
messages: workingMessages,
|
|
4870
|
+
systemBytes,
|
|
4871
|
+
systemTokens,
|
|
4872
|
+
payloadOverhead,
|
|
4873
|
+
tokenLimit,
|
|
4874
|
+
byteLimit
|
|
4875
|
+
});
|
|
4876
|
+
if (preserveIndex === 0) {
|
|
4877
|
+
consola.warn("[AutoTruncate:Anthropic] Cannot truncate, system messages too large");
|
|
4878
|
+
return {
|
|
4879
|
+
payload,
|
|
4880
|
+
wasCompacted: false,
|
|
4881
|
+
originalTokens,
|
|
4882
|
+
compactedTokens: originalTokens,
|
|
4883
|
+
removedMessageCount: 0
|
|
4884
|
+
};
|
|
4885
|
+
}
|
|
4886
|
+
if (preserveIndex >= workingMessages.length) {
|
|
4887
|
+
consola.warn("[AutoTruncate:Anthropic] Would need to remove all messages");
|
|
4888
|
+
return {
|
|
4889
|
+
payload,
|
|
4890
|
+
wasCompacted: false,
|
|
4891
|
+
originalTokens,
|
|
4892
|
+
compactedTokens: originalTokens,
|
|
4893
|
+
removedMessageCount: 0
|
|
4894
|
+
};
|
|
4895
|
+
}
|
|
4896
|
+
let preserved = workingMessages.slice(preserveIndex);
|
|
4897
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
4898
|
+
preserved = filterOrphanedToolUse(preserved);
|
|
4899
|
+
preserved = ensureStartsWithUser(preserved);
|
|
4900
|
+
preserved = filterOrphanedToolResults(preserved);
|
|
4901
|
+
preserved = filterOrphanedToolUse(preserved);
|
|
4902
|
+
if (preserved.length === 0) {
|
|
4903
|
+
consola.warn("[AutoTruncate:Anthropic] All messages filtered out after cleanup");
|
|
4904
|
+
return {
|
|
4905
|
+
payload,
|
|
4906
|
+
wasCompacted: false,
|
|
4907
|
+
originalTokens,
|
|
4908
|
+
compactedTokens: originalTokens,
|
|
4909
|
+
removedMessageCount: 0
|
|
4910
|
+
};
|
|
4911
|
+
}
|
|
4912
|
+
const removedMessages = payload.messages.slice(0, preserveIndex);
|
|
4913
|
+
const removedCount = workingMessages.length - preserved.length;
|
|
4914
|
+
const summary = generateRemovedMessagesSummary(removedMessages);
|
|
4915
|
+
let newSystem = payload.system;
|
|
4916
|
+
let newMessages = preserved;
|
|
4917
|
+
if (payload.system !== void 0) {
|
|
4918
|
+
const truncationContext = createTruncationSystemContext(removedCount, compressedCount, summary);
|
|
4919
|
+
if (typeof payload.system === "string") newSystem = truncationContext + payload.system;
|
|
4920
|
+
else if (Array.isArray(payload.system)) newSystem = [{
|
|
4921
|
+
type: "text",
|
|
4922
|
+
text: truncationContext
|
|
4923
|
+
}, ...payload.system];
|
|
4924
|
+
} else newMessages = [createTruncationMarker$1(removedCount, compressedCount, summary), ...preserved];
|
|
4925
|
+
const newPayload = {
|
|
4926
|
+
...payload,
|
|
4927
|
+
system: newSystem,
|
|
4928
|
+
messages: newMessages
|
|
4929
|
+
};
|
|
4930
|
+
const newBytes = JSON.stringify(newPayload).length;
|
|
4931
|
+
const newTokens = await countTotalTokens(newPayload, model);
|
|
4932
|
+
let reason = "tokens";
|
|
4933
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens+size";
|
|
4934
|
+
else if (exceedsBytes) reason = "size";
|
|
4935
|
+
const actions = [];
|
|
4936
|
+
if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
|
|
4937
|
+
if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
|
|
4938
|
+
const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
|
|
4939
|
+
consola.info(`[AutoTruncate:Anthropic] ${reason}: ${originalTokens}→${newTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
|
|
4940
|
+
if (newBytes > byteLimit || newTokens > tokenLimit) consola.warn(`[AutoTruncate:Anthropic] Result still over limit (${newTokens} tokens, ${Math.round(newBytes / 1024)}KB)`);
|
|
4941
|
+
return {
|
|
4942
|
+
payload: newPayload,
|
|
4943
|
+
wasCompacted: true,
|
|
4944
|
+
originalTokens,
|
|
4945
|
+
compactedTokens: newTokens,
|
|
4946
|
+
removedMessageCount: removedCount
|
|
4947
|
+
};
|
|
4948
|
+
}
|
|
4949
|
+
/**
|
|
4950
|
+
* Check if payload needs compaction.
|
|
4951
|
+
*/
|
|
4952
|
+
async function checkNeedsCompactionAnthropic(payload, model, config = {}) {
|
|
4953
|
+
const cfg = {
|
|
4954
|
+
...DEFAULT_AUTO_TRUNCATE_CONFIG,
|
|
4955
|
+
...config
|
|
4956
|
+
};
|
|
4957
|
+
const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
|
|
4958
|
+
const currentTokens = await countTotalTokens(payload, model);
|
|
4959
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
4960
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
4961
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
4962
|
+
let reason;
|
|
4963
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
4964
|
+
else if (exceedsTokens) reason = "tokens";
|
|
4965
|
+
else if (exceedsBytes) reason = "bytes";
|
|
4966
|
+
return {
|
|
4967
|
+
needed: exceedsTokens || exceedsBytes,
|
|
4968
|
+
currentTokens,
|
|
4969
|
+
tokenLimit,
|
|
4970
|
+
currentBytes,
|
|
4971
|
+
byteLimit,
|
|
4972
|
+
reason
|
|
4973
|
+
};
|
|
4974
|
+
}
|
|
4975
|
+
|
|
4976
|
+
//#endregion
|
|
4977
|
+
//#region src/routes/messages/message-utils.ts
|
|
4978
|
+
function convertAnthropicMessages(messages) {
|
|
4979
|
+
return messages.map((msg) => {
|
|
4980
|
+
if (typeof msg.content === "string") return {
|
|
4981
|
+
role: msg.role,
|
|
4982
|
+
content: msg.content
|
|
4983
|
+
};
|
|
4984
|
+
const content = msg.content.map((block) => {
|
|
4985
|
+
if (block.type === "text") return {
|
|
4986
|
+
type: "text",
|
|
4987
|
+
text: block.text
|
|
4988
|
+
};
|
|
4989
|
+
if (block.type === "tool_use") return {
|
|
4990
|
+
type: "tool_use",
|
|
4991
|
+
id: block.id,
|
|
4992
|
+
name: block.name,
|
|
4993
|
+
input: JSON.stringify(block.input)
|
|
4994
|
+
};
|
|
4995
|
+
if (block.type === "tool_result") {
|
|
4996
|
+
const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
|
|
4997
|
+
return {
|
|
4998
|
+
type: "tool_result",
|
|
4999
|
+
tool_use_id: block.tool_use_id,
|
|
5000
|
+
content: resultContent
|
|
5001
|
+
};
|
|
5002
|
+
}
|
|
5003
|
+
return { type: block.type };
|
|
5004
|
+
});
|
|
5005
|
+
return {
|
|
5006
|
+
role: msg.role,
|
|
5007
|
+
content
|
|
5008
|
+
};
|
|
5009
|
+
});
|
|
5010
|
+
}
|
|
5011
|
+
function extractSystemPrompt(system) {
|
|
5012
|
+
if (!system) return void 0;
|
|
5013
|
+
if (typeof system === "string") return system;
|
|
5014
|
+
return system.map((block) => block.text).join("\n");
|
|
5015
|
+
}
|
|
5016
|
+
function extractToolCallsFromContent(content) {
|
|
5017
|
+
const tools = [];
|
|
5018
|
+
for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
|
|
5019
|
+
id: String(block.id),
|
|
5020
|
+
name: String(block.name),
|
|
5021
|
+
input: JSON.stringify(block.input)
|
|
5022
|
+
});
|
|
5023
|
+
return tools.length > 0 ? tools : void 0;
|
|
5024
|
+
}
|
|
5025
|
+
function extractToolCallsFromAnthropicContent(content) {
|
|
5026
|
+
const tools = [];
|
|
5027
|
+
for (const block of content) if (block.type === "tool_use") tools.push({
|
|
5028
|
+
id: block.id,
|
|
5029
|
+
name: block.name,
|
|
5030
|
+
input: JSON.stringify(block.input)
|
|
5031
|
+
});
|
|
5032
|
+
return tools.length > 0 ? tools : void 0;
|
|
5033
|
+
}
|
|
5034
|
+
function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
5035
|
+
if (finishReason === null) return null;
|
|
5036
|
+
return {
|
|
5037
|
+
stop: "end_turn",
|
|
5038
|
+
length: "max_tokens",
|
|
5039
|
+
tool_calls: "tool_use",
|
|
5040
|
+
content_filter: "end_turn"
|
|
5041
|
+
}[finishReason];
|
|
5042
|
+
}
|
|
5043
|
+
|
|
5044
|
+
//#endregion
|
|
5045
|
+
//#region src/routes/messages/non-stream-translation.ts
|
|
5046
|
+
const OPENAI_TOOL_NAME_LIMIT = 64;
|
|
5047
|
+
/**
|
|
5048
|
+
* Ensure all tool_use blocks have corresponding tool_result responses.
|
|
5049
|
+
* This handles edge cases where conversation history may be incomplete:
|
|
5050
|
+
* - Session interruptions where tool execution was cut off
|
|
5051
|
+
* - Previous request failures
|
|
5052
|
+
* - Client sending truncated history
|
|
5053
|
+
*
|
|
5054
|
+
* Adding placeholder responses prevents API errors and maintains protocol compliance.
|
|
5055
|
+
*/
|
|
5056
|
+
function fixMessageSequence(messages) {
|
|
5057
|
+
const fixedMessages = [];
|
|
5058
|
+
for (let i = 0; i < messages.length; i++) {
|
|
5059
|
+
const message = messages[i];
|
|
5060
|
+
fixedMessages.push(message);
|
|
5061
|
+
if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
|
|
5062
|
+
const foundToolResponses = /* @__PURE__ */ new Set();
|
|
5063
|
+
let j = i + 1;
|
|
5064
|
+
while (j < messages.length && messages[j].role === "tool") {
|
|
5065
|
+
const toolMessage = messages[j];
|
|
5066
|
+
if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
|
|
5067
|
+
j++;
|
|
5068
|
+
}
|
|
5069
|
+
for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
|
|
5070
|
+
consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
|
|
5071
|
+
fixedMessages.push({
|
|
5072
|
+
role: "tool",
|
|
5073
|
+
tool_call_id: toolCall.id,
|
|
5074
|
+
content: "Tool execution was interrupted or failed."
|
|
5075
|
+
});
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
}
|
|
5079
|
+
return fixedMessages;
|
|
5080
|
+
}
|
|
5081
|
+
function translateToOpenAI(payload) {
|
|
5082
|
+
const toolNameMapping = {
|
|
5083
|
+
truncatedToOriginal: /* @__PURE__ */ new Map(),
|
|
5084
|
+
originalToTruncated: /* @__PURE__ */ new Map()
|
|
5085
|
+
};
|
|
5086
|
+
const messages = translateAnthropicMessagesToOpenAI(payload.messages, payload.system, toolNameMapping);
|
|
5087
|
+
return {
|
|
5088
|
+
payload: {
|
|
5089
|
+
model: translateModelName(payload.model),
|
|
5090
|
+
messages: fixMessageSequence(messages),
|
|
5091
|
+
max_tokens: payload.max_tokens,
|
|
5092
|
+
stop: payload.stop_sequences,
|
|
5093
|
+
stream: payload.stream,
|
|
5094
|
+
temperature: payload.temperature,
|
|
5095
|
+
top_p: payload.top_p,
|
|
5096
|
+
user: payload.metadata?.user_id,
|
|
5097
|
+
tools: translateAnthropicToolsToOpenAI(payload.tools, toolNameMapping),
|
|
5098
|
+
tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice, toolNameMapping)
|
|
5099
|
+
},
|
|
5100
|
+
toolNameMapping
|
|
5101
|
+
};
|
|
5102
|
+
}
|
|
5103
|
+
/**
|
|
5104
|
+
* Find the latest available model matching a family prefix.
|
|
5105
|
+
* Searches state.models for models starting with the given prefix
|
|
5106
|
+
* and returns the one with the highest version number.
|
|
5107
|
+
*
|
|
5108
|
+
* @param familyPrefix - e.g., "claude-opus", "claude-sonnet", "claude-haiku"
|
|
5109
|
+
* @param fallback - fallback model ID if no match found
|
|
5110
|
+
*/
|
|
5111
|
+
function findLatestModel(familyPrefix, fallback) {
|
|
5112
|
+
const models = state.models?.data;
|
|
5113
|
+
if (!models || models.length === 0) return fallback;
|
|
5114
|
+
const candidates = models.filter((m) => m.id.startsWith(familyPrefix));
|
|
5115
|
+
if (candidates.length === 0) return fallback;
|
|
5116
|
+
candidates.sort((a, b) => {
|
|
5117
|
+
const versionA = extractVersion(a.id, familyPrefix);
|
|
5118
|
+
return extractVersion(b.id, familyPrefix) - versionA;
|
|
5119
|
+
});
|
|
5120
|
+
return candidates[0].id;
|
|
5121
|
+
}
|
|
5122
|
+
/**
|
|
5123
|
+
* Extract numeric version from model ID.
|
|
5124
|
+
* e.g., "claude-opus-4.5" with prefix "claude-opus" -> 4.5
|
|
5125
|
+
*/
|
|
5126
|
+
function extractVersion(modelId, prefix) {
|
|
5127
|
+
const match = modelId.slice(prefix.length + 1).match(/^(\d+(?:\.\d+)?)/);
|
|
5128
|
+
return match ? Number.parseFloat(match[1]) : 0;
|
|
5129
|
+
}
|
|
5130
|
+
function translateModelName(model) {
|
|
5131
|
+
const aliasMap = {
|
|
5132
|
+
opus: "claude-opus",
|
|
5133
|
+
sonnet: "claude-sonnet",
|
|
5134
|
+
haiku: "claude-haiku"
|
|
5135
|
+
};
|
|
5136
|
+
if (aliasMap[model]) {
|
|
5137
|
+
const familyPrefix = aliasMap[model];
|
|
5138
|
+
const fallback = `${familyPrefix}-4.5`;
|
|
5139
|
+
return findLatestModel(familyPrefix, fallback);
|
|
5140
|
+
}
|
|
5141
|
+
if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
|
|
5142
|
+
if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
|
|
5143
|
+
if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
|
|
5144
|
+
if (/^claude-opus-4-\d+$/.test(model)) return findLatestModel("claude-opus", "claude-opus-4.5");
|
|
5145
|
+
if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
|
|
5146
|
+
if (/^claude-haiku-3-5-\d+$/.test(model)) return findLatestModel("claude-haiku", "claude-haiku-4.5");
|
|
5147
|
+
return model;
|
|
5148
|
+
}
|
|
5149
|
+
function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
|
|
5150
|
+
const systemMessages = handleSystemPrompt(system);
|
|
5151
|
+
const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
|
|
5152
|
+
return [...systemMessages, ...otherMessages];
|
|
5153
|
+
}
|
|
5154
|
+
const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
|
|
4286
5155
|
/**
|
|
4287
5156
|
* Filter out reserved keywords from system prompt text.
|
|
4288
5157
|
* Copilot API rejects requests containing these keywords.
|
|
@@ -4406,7 +5275,7 @@ function translateAnthropicToolsToOpenAI(anthropicTools, toolNameMapping) {
|
|
|
4406
5275
|
function: {
|
|
4407
5276
|
name: getTruncatedToolName(tool.name, toolNameMapping),
|
|
4408
5277
|
description: tool.description,
|
|
4409
|
-
parameters: tool.input_schema
|
|
5278
|
+
parameters: tool.input_schema ?? {}
|
|
4410
5279
|
}
|
|
4411
5280
|
}));
|
|
4412
5281
|
}
|
|
@@ -4511,6 +5380,9 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
|
|
|
4511
5380
|
*
|
|
4512
5381
|
* For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
|
|
4513
5382
|
* For other models, uses GPT tokenizers with appropriate buffers.
|
|
5383
|
+
*
|
|
5384
|
+
* When auto-truncate is enabled and the request would exceed limits,
|
|
5385
|
+
* returns an inflated token count to trigger Claude Code's auto-compact mechanism.
|
|
4514
5386
|
*/
|
|
4515
5387
|
async function handleCountTokens(c) {
|
|
4516
5388
|
try {
|
|
@@ -4522,7 +5394,16 @@ async function handleCountTokens(c) {
|
|
|
4522
5394
|
consola.warn("Model not found, returning default token count");
|
|
4523
5395
|
return c.json({ input_tokens: 1 });
|
|
4524
5396
|
}
|
|
4525
|
-
|
|
5397
|
+
if (state.autoTruncate) {
|
|
5398
|
+
const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
|
|
5399
|
+
if (truncateCheck.needed) {
|
|
5400
|
+
const contextWindow = selectedModel.capabilities?.limits?.max_context_window_tokens ?? 2e5;
|
|
5401
|
+
const inflatedTokens = Math.floor(contextWindow * .95);
|
|
5402
|
+
consola.debug(`[count_tokens] Would trigger auto-truncate: ${truncateCheck.currentTokens} tokens > ${truncateCheck.tokenLimit}, returning inflated count: ${inflatedTokens}`);
|
|
5403
|
+
return c.json({ input_tokens: inflatedTokens });
|
|
5404
|
+
}
|
|
5405
|
+
}
|
|
5406
|
+
const tokenizerName = selectedModel.capabilities?.tokenizer ?? "o200k_base";
|
|
4526
5407
|
const tokenCount = await getTokenCount(openAIPayload, selectedModel);
|
|
4527
5408
|
if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
|
|
4528
5409
|
let mcpToolExist = false;
|
|
@@ -4533,8 +5414,8 @@ async function handleCountTokens(c) {
|
|
|
4533
5414
|
}
|
|
4534
5415
|
}
|
|
4535
5416
|
let finalTokenCount = tokenCount.input + tokenCount.output;
|
|
4536
|
-
if (!
|
|
4537
|
-
consola.debug(`Token count: ${finalTokenCount} (
|
|
5417
|
+
if (!(selectedModel.vendor === "Anthropic")) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
|
|
5418
|
+
consola.debug(`Token count: ${finalTokenCount} (tokenizer: ${tokenizerName})`);
|
|
4538
5419
|
return c.json({ input_tokens: finalTokenCount });
|
|
4539
5420
|
} catch (error) {
|
|
4540
5421
|
consola.error("Error counting tokens:", error);
|
|
@@ -4568,6 +5449,8 @@ const COPILOT_SUPPORTED_FIELDS = new Set([
|
|
|
4568
5449
|
* Filter payload to only include fields supported by Copilot's Anthropic API.
|
|
4569
5450
|
* This prevents errors like "Extra inputs are not permitted" for unsupported
|
|
4570
5451
|
* fields like `output_config`.
|
|
5452
|
+
*
|
|
5453
|
+
* Also converts server-side tools (web_search, etc.) to custom tools.
|
|
4571
5454
|
*/
|
|
4572
5455
|
function filterPayloadForCopilot(payload) {
|
|
4573
5456
|
const filtered = {};
|
|
@@ -4575,6 +5458,7 @@ function filterPayloadForCopilot(payload) {
|
|
|
4575
5458
|
for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
|
|
4576
5459
|
else unsupportedFields.push(key);
|
|
4577
5460
|
if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
|
|
5461
|
+
if (filtered.tools) filtered.tools = convertServerToolsToCustom(filtered.tools);
|
|
4578
5462
|
return filtered;
|
|
4579
5463
|
}
|
|
4580
5464
|
/**
|
|
@@ -4615,26 +5499,184 @@ async function createAnthropicMessages(payload) {
|
|
|
4615
5499
|
"X-Initiator": isAgentCall ? "agent" : "user",
|
|
4616
5500
|
"anthropic-version": "2023-06-01"
|
|
4617
5501
|
};
|
|
4618
|
-
consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
|
|
4619
|
-
const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
|
|
4620
|
-
method: "POST",
|
|
4621
|
-
headers,
|
|
4622
|
-
body: JSON.stringify(filteredPayload)
|
|
4623
|
-
});
|
|
4624
|
-
if (!response.ok) {
|
|
4625
|
-
consola.
|
|
4626
|
-
|
|
5502
|
+
consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
|
|
5503
|
+
const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
|
|
5504
|
+
method: "POST",
|
|
5505
|
+
headers,
|
|
5506
|
+
body: JSON.stringify(filteredPayload)
|
|
5507
|
+
});
|
|
5508
|
+
if (!response.ok) {
|
|
5509
|
+
consola.debug("Request failed:", {
|
|
5510
|
+
model: filteredPayload.model,
|
|
5511
|
+
max_tokens: filteredPayload.max_tokens,
|
|
5512
|
+
stream: filteredPayload.stream,
|
|
5513
|
+
tools: filteredPayload.tools?.map((t) => ({
|
|
5514
|
+
name: t.name,
|
|
5515
|
+
type: t.type
|
|
5516
|
+
})),
|
|
5517
|
+
thinking: filteredPayload.thinking,
|
|
5518
|
+
messageCount: filteredPayload.messages.length
|
|
5519
|
+
});
|
|
5520
|
+
throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, filteredPayload.model);
|
|
5521
|
+
}
|
|
5522
|
+
if (payload.stream) return events(response);
|
|
5523
|
+
return await response.json();
|
|
5524
|
+
}
|
|
5525
|
+
const SERVER_TOOL_CONFIGS = {
|
|
5526
|
+
web_search: {
|
|
5527
|
+
description: "Search the web for current information. Returns web search results that can help answer questions about recent events, current data, or information that may have changed since your knowledge cutoff.",
|
|
5528
|
+
input_schema: {
|
|
5529
|
+
type: "object",
|
|
5530
|
+
properties: { query: {
|
|
5531
|
+
type: "string",
|
|
5532
|
+
description: "The search query"
|
|
5533
|
+
} },
|
|
5534
|
+
required: ["query"]
|
|
5535
|
+
}
|
|
5536
|
+
},
|
|
5537
|
+
web_fetch: {
|
|
5538
|
+
description: "Fetch content from a URL. NOTE: This is a client-side tool - the client must fetch the URL and return the content.",
|
|
5539
|
+
input_schema: {
|
|
5540
|
+
type: "object",
|
|
5541
|
+
properties: { url: {
|
|
5542
|
+
type: "string",
|
|
5543
|
+
description: "The URL to fetch"
|
|
5544
|
+
} },
|
|
5545
|
+
required: ["url"]
|
|
5546
|
+
}
|
|
5547
|
+
},
|
|
5548
|
+
code_execution: {
|
|
5549
|
+
description: "Execute code in a sandbox. NOTE: This is a client-side tool - the client must execute the code.",
|
|
5550
|
+
input_schema: {
|
|
5551
|
+
type: "object",
|
|
5552
|
+
properties: {
|
|
5553
|
+
code: {
|
|
5554
|
+
type: "string",
|
|
5555
|
+
description: "The code to execute"
|
|
5556
|
+
},
|
|
5557
|
+
language: {
|
|
5558
|
+
type: "string",
|
|
5559
|
+
description: "The programming language"
|
|
5560
|
+
}
|
|
5561
|
+
},
|
|
5562
|
+
required: ["code"]
|
|
5563
|
+
}
|
|
5564
|
+
},
|
|
5565
|
+
computer: {
|
|
5566
|
+
description: "Control computer desktop. NOTE: This is a client-side tool - the client must handle computer control.",
|
|
5567
|
+
input_schema: {
|
|
5568
|
+
type: "object",
|
|
5569
|
+
properties: { action: {
|
|
5570
|
+
type: "string",
|
|
5571
|
+
description: "The action to perform"
|
|
5572
|
+
} },
|
|
5573
|
+
required: ["action"]
|
|
5574
|
+
}
|
|
5575
|
+
}
|
|
5576
|
+
};
|
|
5577
|
+
/**
|
|
5578
|
+
* Check if a tool is a server-side tool that needs conversion.
|
|
5579
|
+
*/
|
|
5580
|
+
function getServerToolPrefix(tool) {
|
|
5581
|
+
if (tool.type) {
|
|
5582
|
+
for (const prefix of Object.keys(SERVER_TOOL_CONFIGS)) if (tool.type.startsWith(prefix)) return prefix;
|
|
5583
|
+
}
|
|
5584
|
+
return null;
|
|
5585
|
+
}
|
|
5586
|
+
/**
|
|
5587
|
+
* Convert server-side tools to custom tools, or pass them through unchanged.
|
|
5588
|
+
* This allows them to be passed to the API and handled by the client.
|
|
5589
|
+
*
|
|
5590
|
+
* Note: Server-side tools are only converted if state.rewriteAnthropicTools is enabled.
|
|
5591
|
+
*/
|
|
5592
|
+
function convertServerToolsToCustom(tools) {
|
|
5593
|
+
if (!tools) return;
|
|
5594
|
+
const result = [];
|
|
5595
|
+
for (const tool of tools) {
|
|
5596
|
+
const serverToolPrefix = getServerToolPrefix(tool);
|
|
5597
|
+
if (serverToolPrefix) {
|
|
5598
|
+
const config = SERVER_TOOL_CONFIGS[serverToolPrefix];
|
|
5599
|
+
if (!state.rewriteAnthropicTools) {
|
|
5600
|
+
consola.debug(`[DirectAnthropic] Passing ${serverToolPrefix} through unchanged (use --rewrite-anthropic-tools to convert)`);
|
|
5601
|
+
result.push(tool);
|
|
5602
|
+
continue;
|
|
5603
|
+
}
|
|
5604
|
+
if (config.remove) {
|
|
5605
|
+
consola.warn(`[DirectAnthropic] Removing unsupported server tool: ${tool.name}. Reason: ${config.removalReason}`);
|
|
5606
|
+
continue;
|
|
5607
|
+
}
|
|
5608
|
+
consola.debug(`[DirectAnthropic] Converting server tool to custom: ${tool.name} (type: ${tool.type})`);
|
|
5609
|
+
result.push({
|
|
5610
|
+
name: tool.name,
|
|
5611
|
+
description: config.description,
|
|
5612
|
+
input_schema: config.input_schema
|
|
5613
|
+
});
|
|
5614
|
+
} else result.push(tool);
|
|
5615
|
+
}
|
|
5616
|
+
return result.length > 0 ? result : void 0;
|
|
5617
|
+
}
|
|
5618
|
+
/**
|
|
5619
|
+
* Check if a model supports direct Anthropic API.
|
|
5620
|
+
* Returns true if redirect is disabled (direct API is on) and the model is from Anthropic vendor.
|
|
5621
|
+
*/
|
|
5622
|
+
function supportsDirectAnthropicApi(modelId) {
|
|
5623
|
+
if (state.redirectAnthropic) return false;
|
|
5624
|
+
return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
|
|
5625
|
+
}
|
|
5626
|
+
|
|
5627
|
+
//#endregion
|
|
5628
|
+
//#region src/routes/messages/stream-accumulator.ts
|
|
5629
|
+
function createAnthropicStreamAccumulator() {
|
|
5630
|
+
return {
|
|
5631
|
+
model: "",
|
|
5632
|
+
inputTokens: 0,
|
|
5633
|
+
outputTokens: 0,
|
|
5634
|
+
stopReason: "",
|
|
5635
|
+
content: "",
|
|
5636
|
+
toolCalls: [],
|
|
5637
|
+
currentToolCall: null
|
|
5638
|
+
};
|
|
5639
|
+
}
|
|
5640
|
+
function processAnthropicEvent(event, acc) {
|
|
5641
|
+
switch (event.type) {
|
|
5642
|
+
case "content_block_delta":
|
|
5643
|
+
handleContentBlockDelta(event.delta, acc);
|
|
5644
|
+
break;
|
|
5645
|
+
case "content_block_start":
|
|
5646
|
+
handleContentBlockStart(event.content_block, acc);
|
|
5647
|
+
break;
|
|
5648
|
+
case "content_block_stop":
|
|
5649
|
+
handleContentBlockStop(acc);
|
|
5650
|
+
break;
|
|
5651
|
+
case "message_delta":
|
|
5652
|
+
handleMessageDelta(event.delta, event.usage, acc);
|
|
5653
|
+
break;
|
|
5654
|
+
default: break;
|
|
5655
|
+
}
|
|
5656
|
+
}
|
|
5657
|
+
function handleContentBlockDelta(delta, acc) {
|
|
5658
|
+
if (delta.type === "text_delta") acc.content += delta.text;
|
|
5659
|
+
else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
|
|
5660
|
+
}
|
|
5661
|
+
function handleContentBlockStart(block, acc) {
|
|
5662
|
+
if (block.type === "tool_use") acc.currentToolCall = {
|
|
5663
|
+
id: block.id,
|
|
5664
|
+
name: block.name,
|
|
5665
|
+
input: ""
|
|
5666
|
+
};
|
|
5667
|
+
}
|
|
5668
|
+
function handleContentBlockStop(acc) {
|
|
5669
|
+
if (acc.currentToolCall) {
|
|
5670
|
+
acc.toolCalls.push(acc.currentToolCall);
|
|
5671
|
+
acc.currentToolCall = null;
|
|
4627
5672
|
}
|
|
4628
|
-
if (payload.stream) return events(response);
|
|
4629
|
-
return await response.json();
|
|
4630
5673
|
}
|
|
4631
|
-
|
|
4632
|
-
|
|
4633
|
-
|
|
4634
|
-
|
|
4635
|
-
|
|
4636
|
-
|
|
4637
|
-
return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
|
|
5674
|
+
function handleMessageDelta(delta, usage, acc) {
|
|
5675
|
+
if (delta.stop_reason) acc.stopReason = delta.stop_reason;
|
|
5676
|
+
if (usage) {
|
|
5677
|
+
acc.inputTokens = usage.input_tokens ?? 0;
|
|
5678
|
+
acc.outputTokens = usage.output_tokens;
|
|
5679
|
+
}
|
|
4638
5680
|
}
|
|
4639
5681
|
|
|
4640
5682
|
//#endregion
|
|
@@ -4776,40 +5818,28 @@ function translateErrorToAnthropicErrorEvent() {
|
|
|
4776
5818
|
}
|
|
4777
5819
|
|
|
4778
5820
|
//#endregion
|
|
4779
|
-
//#region src/routes/messages/handler.ts
|
|
4780
|
-
async function handleCompletion(c) {
|
|
4781
|
-
const anthropicPayload = await c.req.json();
|
|
4782
|
-
consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
|
|
4783
|
-
const trackingId = c.get("trackingId");
|
|
4784
|
-
const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
|
|
4785
|
-
updateTrackerModel(trackingId, anthropicPayload.model);
|
|
4786
|
-
const ctx = {
|
|
4787
|
-
historyId: recordRequest("anthropic", {
|
|
4788
|
-
model: anthropicPayload.model,
|
|
4789
|
-
messages: convertAnthropicMessages(anthropicPayload.messages),
|
|
4790
|
-
stream: anthropicPayload.stream ?? false,
|
|
4791
|
-
tools: anthropicPayload.tools?.map((t) => ({
|
|
4792
|
-
name: t.name,
|
|
4793
|
-
description: t.description
|
|
4794
|
-
})),
|
|
4795
|
-
max_tokens: anthropicPayload.max_tokens,
|
|
4796
|
-
temperature: anthropicPayload.temperature,
|
|
4797
|
-
system: extractSystemPrompt(anthropicPayload.system)
|
|
4798
|
-
}),
|
|
4799
|
-
trackingId,
|
|
4800
|
-
startTime
|
|
4801
|
-
};
|
|
4802
|
-
if (supportsDirectAnthropicApi(anthropicPayload.model)) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
|
|
4803
|
-
return handleTranslatedCompletion(c, anthropicPayload, ctx);
|
|
4804
|
-
}
|
|
5821
|
+
//#region src/routes/messages/direct-anthropic-handler.ts
|
|
4805
5822
|
/**
|
|
4806
5823
|
* Handle completion using direct Anthropic API (no translation needed)
|
|
4807
5824
|
*/
|
|
4808
5825
|
async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
|
|
4809
5826
|
consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
|
|
5827
|
+
const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
|
|
5828
|
+
let effectivePayload = anthropicPayload;
|
|
5829
|
+
let truncateResult;
|
|
5830
|
+
if (state.autoTruncate && selectedModel) {
|
|
5831
|
+
const check = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
|
|
5832
|
+
consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
5833
|
+
if (check.needed) try {
|
|
5834
|
+
truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel);
|
|
5835
|
+
if (truncateResult.wasCompacted) effectivePayload = truncateResult.payload;
|
|
5836
|
+
} catch (error) {
|
|
5837
|
+
consola.warn("[Anthropic] Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
|
|
5838
|
+
}
|
|
5839
|
+
} else if (state.autoTruncate && !selectedModel) consola.debug(`[Anthropic] Model '${anthropicPayload.model}' not found, skipping auto-truncate`);
|
|
4810
5840
|
if (state.manualApprove) await awaitApproval();
|
|
4811
5841
|
try {
|
|
4812
|
-
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(
|
|
5842
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
|
|
4813
5843
|
ctx.queueWaitMs = queueWaitMs;
|
|
4814
5844
|
if (Symbol.asyncIterator in response) {
|
|
4815
5845
|
consola.debug("Streaming response from Copilot (direct Anthropic)");
|
|
@@ -4818,21 +5848,37 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
|
|
|
4818
5848
|
await handleDirectAnthropicStreamingResponse({
|
|
4819
5849
|
stream,
|
|
4820
5850
|
response,
|
|
4821
|
-
anthropicPayload,
|
|
5851
|
+
anthropicPayload: effectivePayload,
|
|
4822
5852
|
ctx
|
|
4823
5853
|
});
|
|
4824
5854
|
});
|
|
4825
5855
|
}
|
|
4826
|
-
return handleDirectAnthropicNonStreamingResponse(c, response, ctx);
|
|
5856
|
+
return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
|
|
4827
5857
|
} catch (error) {
|
|
5858
|
+
if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
|
|
4828
5859
|
recordErrorResponse(ctx, anthropicPayload.model, error);
|
|
4829
5860
|
throw error;
|
|
4830
5861
|
}
|
|
4831
5862
|
}
|
|
4832
5863
|
/**
|
|
5864
|
+
* Log payload size info for debugging 413 errors
|
|
5865
|
+
*/
|
|
5866
|
+
function logPayloadSizeInfoAnthropic(payload, model) {
|
|
5867
|
+
const payloadSize = JSON.stringify(payload).length;
|
|
5868
|
+
const messageCount = payload.messages.length;
|
|
5869
|
+
const toolCount = payload.tools?.length ?? 0;
|
|
5870
|
+
const systemSize = payload.system ? JSON.stringify(payload.system).length : 0;
|
|
5871
|
+
consola.info(`[Anthropic 413] Payload size: ${Math.round(payloadSize / 1024)}KB, messages: ${messageCount}, tools: ${toolCount}, system: ${Math.round(systemSize / 1024)}KB`);
|
|
5872
|
+
if (model?.capabilities?.limits) {
|
|
5873
|
+
const limits = model.capabilities.limits;
|
|
5874
|
+
consola.info(`[Anthropic 413] Model limits: context=${limits.max_context_window_tokens}, prompt=${limits.max_prompt_tokens}, output=${limits.max_output_tokens}`);
|
|
5875
|
+
}
|
|
5876
|
+
if (!state.autoTruncate) consola.info("[Anthropic 413] Consider enabling --auto-truncate to automatically reduce payload size");
|
|
5877
|
+
}
|
|
5878
|
+
/**
|
|
4833
5879
|
* Handle non-streaming direct Anthropic response
|
|
4834
5880
|
*/
|
|
4835
|
-
function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
|
|
5881
|
+
function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
|
|
4836
5882
|
consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
|
|
4837
5883
|
recordResponse(ctx.historyId, {
|
|
4838
5884
|
success: true,
|
|
@@ -4868,7 +5914,34 @@ function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
|
|
|
4868
5914
|
outputTokens: response.usage.output_tokens,
|
|
4869
5915
|
queueWaitMs: ctx.queueWaitMs
|
|
4870
5916
|
});
|
|
4871
|
-
|
|
5917
|
+
let finalResponse = response;
|
|
5918
|
+
if (state.verbose && truncateResult?.wasCompacted) {
|
|
5919
|
+
const marker = createTruncationMarker(truncateResult);
|
|
5920
|
+
finalResponse = prependMarkerToAnthropicResponse$1(response, marker);
|
|
5921
|
+
}
|
|
5922
|
+
return c.json(finalResponse);
|
|
5923
|
+
}
|
|
5924
|
+
/**
|
|
5925
|
+
* Prepend marker to Anthropic response content (at the beginning of first text block)
|
|
5926
|
+
*/
|
|
5927
|
+
function prependMarkerToAnthropicResponse$1(response, marker) {
|
|
5928
|
+
if (!marker) return response;
|
|
5929
|
+
const content = [...response.content];
|
|
5930
|
+
const firstTextIndex = content.findIndex((block) => block.type === "text");
|
|
5931
|
+
if (firstTextIndex !== -1) {
|
|
5932
|
+
const textBlock = content[firstTextIndex];
|
|
5933
|
+
if (textBlock.type === "text") content[firstTextIndex] = {
|
|
5934
|
+
...textBlock,
|
|
5935
|
+
text: marker + textBlock.text
|
|
5936
|
+
};
|
|
5937
|
+
} else content.unshift({
|
|
5938
|
+
type: "text",
|
|
5939
|
+
text: marker
|
|
5940
|
+
});
|
|
5941
|
+
return {
|
|
5942
|
+
...response,
|
|
5943
|
+
content
|
|
5944
|
+
};
|
|
4872
5945
|
}
|
|
4873
5946
|
/**
|
|
4874
5947
|
* Handle streaming direct Anthropic response (passthrough SSE events)
|
|
@@ -4894,7 +5967,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
|
|
|
4894
5967
|
data: rawEvent.data
|
|
4895
5968
|
});
|
|
4896
5969
|
}
|
|
4897
|
-
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
5970
|
+
recordStreamingResponse$1(acc, anthropicPayload.model, ctx);
|
|
4898
5971
|
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
4899
5972
|
} catch (error) {
|
|
4900
5973
|
consola.error("Direct Anthropic stream error:", error);
|
|
@@ -4912,6 +5985,34 @@ async function handleDirectAnthropicStreamingResponse(opts) {
|
|
|
4912
5985
|
});
|
|
4913
5986
|
}
|
|
4914
5987
|
}
|
|
5988
|
+
function recordStreamingResponse$1(acc, fallbackModel, ctx) {
|
|
5989
|
+
const contentBlocks = [];
|
|
5990
|
+
if (acc.content) contentBlocks.push({
|
|
5991
|
+
type: "text",
|
|
5992
|
+
text: acc.content
|
|
5993
|
+
});
|
|
5994
|
+
for (const tc of acc.toolCalls) contentBlocks.push({
|
|
5995
|
+
type: "tool_use",
|
|
5996
|
+
...tc
|
|
5997
|
+
});
|
|
5998
|
+
recordResponse(ctx.historyId, {
|
|
5999
|
+
success: true,
|
|
6000
|
+
model: acc.model || fallbackModel,
|
|
6001
|
+
usage: {
|
|
6002
|
+
input_tokens: acc.inputTokens,
|
|
6003
|
+
output_tokens: acc.outputTokens
|
|
6004
|
+
},
|
|
6005
|
+
stop_reason: acc.stopReason || void 0,
|
|
6006
|
+
content: contentBlocks.length > 0 ? {
|
|
6007
|
+
role: "assistant",
|
|
6008
|
+
content: contentBlocks
|
|
6009
|
+
} : null,
|
|
6010
|
+
toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
|
|
6011
|
+
}, Date.now() - ctx.startTime);
|
|
6012
|
+
}
|
|
6013
|
+
|
|
6014
|
+
//#endregion
|
|
6015
|
+
//#region src/routes/messages/translated-handler.ts
|
|
4915
6016
|
/**
|
|
4916
6017
|
* Handle completion using OpenAI translation path (legacy)
|
|
4917
6018
|
*/
|
|
@@ -4954,7 +6055,7 @@ function handleNonStreamingResponse(opts) {
|
|
|
4954
6055
|
let anthropicResponse = translateToAnthropic(response, toolNameMapping);
|
|
4955
6056
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
4956
6057
|
if (state.verbose && ctx.truncateResult?.wasCompacted) {
|
|
4957
|
-
const marker =
|
|
6058
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
4958
6059
|
anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
|
|
4959
6060
|
}
|
|
4960
6061
|
recordResponse(ctx.historyId, {
|
|
@@ -5005,17 +6106,6 @@ function prependMarkerToAnthropicResponse(response, marker) {
|
|
|
5005
6106
|
content
|
|
5006
6107
|
};
|
|
5007
6108
|
}
|
|
5008
|
-
function createAnthropicStreamAccumulator() {
|
|
5009
|
-
return {
|
|
5010
|
-
model: "",
|
|
5011
|
-
inputTokens: 0,
|
|
5012
|
-
outputTokens: 0,
|
|
5013
|
-
stopReason: "",
|
|
5014
|
-
content: "",
|
|
5015
|
-
toolCalls: [],
|
|
5016
|
-
currentToolCall: null
|
|
5017
|
-
};
|
|
5018
|
-
}
|
|
5019
6109
|
async function handleStreamingResponse(opts) {
|
|
5020
6110
|
const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
|
|
5021
6111
|
const streamState = {
|
|
@@ -5027,7 +6117,7 @@ async function handleStreamingResponse(opts) {
|
|
|
5027
6117
|
const acc = createAnthropicStreamAccumulator();
|
|
5028
6118
|
try {
|
|
5029
6119
|
if (ctx.truncateResult?.wasCompacted) {
|
|
5030
|
-
const marker =
|
|
6120
|
+
const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
|
|
5031
6121
|
await sendTruncationMarkerEvent(stream, streamState, marker);
|
|
5032
6122
|
acc.content += marker;
|
|
5033
6123
|
}
|
|
@@ -5116,47 +6206,6 @@ async function processStreamChunks(opts) {
|
|
|
5116
6206
|
}
|
|
5117
6207
|
}
|
|
5118
6208
|
}
|
|
5119
|
-
function processAnthropicEvent(event, acc) {
|
|
5120
|
-
switch (event.type) {
|
|
5121
|
-
case "content_block_delta":
|
|
5122
|
-
handleContentBlockDelta(event.delta, acc);
|
|
5123
|
-
break;
|
|
5124
|
-
case "content_block_start":
|
|
5125
|
-
handleContentBlockStart(event.content_block, acc);
|
|
5126
|
-
break;
|
|
5127
|
-
case "content_block_stop":
|
|
5128
|
-
handleContentBlockStop(acc);
|
|
5129
|
-
break;
|
|
5130
|
-
case "message_delta":
|
|
5131
|
-
handleMessageDelta(event.delta, event.usage, acc);
|
|
5132
|
-
break;
|
|
5133
|
-
default: break;
|
|
5134
|
-
}
|
|
5135
|
-
}
|
|
5136
|
-
function handleContentBlockDelta(delta, acc) {
|
|
5137
|
-
if (delta.type === "text_delta") acc.content += delta.text;
|
|
5138
|
-
else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
|
|
5139
|
-
}
|
|
5140
|
-
function handleContentBlockStart(block, acc) {
|
|
5141
|
-
if (block.type === "tool_use") acc.currentToolCall = {
|
|
5142
|
-
id: block.id,
|
|
5143
|
-
name: block.name,
|
|
5144
|
-
input: ""
|
|
5145
|
-
};
|
|
5146
|
-
}
|
|
5147
|
-
function handleContentBlockStop(acc) {
|
|
5148
|
-
if (acc.currentToolCall) {
|
|
5149
|
-
acc.toolCalls.push(acc.currentToolCall);
|
|
5150
|
-
acc.currentToolCall = null;
|
|
5151
|
-
}
|
|
5152
|
-
}
|
|
5153
|
-
function handleMessageDelta(delta, usage, acc) {
|
|
5154
|
-
if (delta.stop_reason) acc.stopReason = delta.stop_reason;
|
|
5155
|
-
if (usage) {
|
|
5156
|
-
acc.inputTokens = usage.input_tokens ?? 0;
|
|
5157
|
-
acc.outputTokens = usage.output_tokens;
|
|
5158
|
-
}
|
|
5159
|
-
}
|
|
5160
6209
|
function recordStreamingResponse(acc, fallbackModel, ctx) {
|
|
5161
6210
|
const contentBlocks = [];
|
|
5162
6211
|
if (acc.content) contentBlocks.push({
|
|
@@ -5182,61 +6231,51 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
|
|
|
5182
6231
|
toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
|
|
5183
6232
|
}, Date.now() - ctx.startTime);
|
|
5184
6233
|
}
|
|
5185
|
-
|
|
5186
|
-
|
|
5187
|
-
|
|
5188
|
-
|
|
5189
|
-
|
|
5190
|
-
|
|
5191
|
-
|
|
5192
|
-
|
|
5193
|
-
|
|
5194
|
-
|
|
5195
|
-
|
|
5196
|
-
|
|
5197
|
-
|
|
5198
|
-
|
|
5199
|
-
|
|
5200
|
-
|
|
5201
|
-
|
|
5202
|
-
|
|
5203
|
-
|
|
5204
|
-
|
|
5205
|
-
|
|
5206
|
-
|
|
5207
|
-
|
|
5208
|
-
|
|
5209
|
-
|
|
5210
|
-
|
|
5211
|
-
|
|
5212
|
-
|
|
5213
|
-
|
|
5214
|
-
content
|
|
5215
|
-
};
|
|
5216
|
-
});
|
|
5217
|
-
}
|
|
5218
|
-
function extractSystemPrompt(system) {
|
|
5219
|
-
if (!system) return void 0;
|
|
5220
|
-
if (typeof system === "string") return system;
|
|
5221
|
-
return system.map((block) => block.text).join("\n");
|
|
5222
|
-
}
|
|
5223
|
-
function extractToolCallsFromContent(content) {
|
|
5224
|
-
const tools = [];
|
|
5225
|
-
for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
|
|
5226
|
-
id: String(block.id),
|
|
5227
|
-
name: String(block.name),
|
|
5228
|
-
input: JSON.stringify(block.input)
|
|
5229
|
-
});
|
|
5230
|
-
return tools.length > 0 ? tools : void 0;
|
|
6234
|
+
|
|
6235
|
+
//#endregion
|
|
6236
|
+
//#region src/routes/messages/handler.ts
|
|
6237
|
+
async function handleCompletion(c) {
|
|
6238
|
+
const anthropicPayload = await c.req.json();
|
|
6239
|
+
consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
|
|
6240
|
+
logToolInfo(anthropicPayload);
|
|
6241
|
+
const useDirectAnthropicApi = supportsDirectAnthropicApi(anthropicPayload.model);
|
|
6242
|
+
const trackingId = c.get("trackingId");
|
|
6243
|
+
const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
|
|
6244
|
+
updateTrackerModel(trackingId, anthropicPayload.model);
|
|
6245
|
+
const ctx = {
|
|
6246
|
+
historyId: recordRequest("anthropic", {
|
|
6247
|
+
model: anthropicPayload.model,
|
|
6248
|
+
messages: convertAnthropicMessages(anthropicPayload.messages),
|
|
6249
|
+
stream: anthropicPayload.stream ?? false,
|
|
6250
|
+
tools: anthropicPayload.tools?.map((t) => ({
|
|
6251
|
+
name: t.name,
|
|
6252
|
+
description: t.description
|
|
6253
|
+
})),
|
|
6254
|
+
max_tokens: anthropicPayload.max_tokens,
|
|
6255
|
+
temperature: anthropicPayload.temperature,
|
|
6256
|
+
system: extractSystemPrompt(anthropicPayload.system)
|
|
6257
|
+
}),
|
|
6258
|
+
trackingId,
|
|
6259
|
+
startTime
|
|
6260
|
+
};
|
|
6261
|
+
if (useDirectAnthropicApi) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
|
|
6262
|
+
return handleTranslatedCompletion(c, anthropicPayload, ctx);
|
|
5231
6263
|
}
|
|
5232
|
-
|
|
5233
|
-
|
|
5234
|
-
|
|
5235
|
-
|
|
5236
|
-
|
|
5237
|
-
|
|
5238
|
-
|
|
5239
|
-
|
|
6264
|
+
/**
|
|
6265
|
+
* Log tool-related information for debugging
|
|
6266
|
+
*/
|
|
6267
|
+
function logToolInfo(anthropicPayload) {
|
|
6268
|
+
if (anthropicPayload.tools?.length) {
|
|
6269
|
+
const toolInfo = anthropicPayload.tools.map((t) => ({
|
|
6270
|
+
name: t.name,
|
|
6271
|
+
type: t.type ?? "(custom)"
|
|
6272
|
+
}));
|
|
6273
|
+
consola.debug(`[Tools] Defined tools:`, JSON.stringify(toolInfo));
|
|
6274
|
+
}
|
|
6275
|
+
for (const msg of anthropicPayload.messages) if (typeof msg.content !== "string") for (const block of msg.content) {
|
|
6276
|
+
if (block.type === "tool_use") consola.debug(`[Tools] tool_use in message: ${block.name} (id: ${block.id})`);
|
|
6277
|
+
if (block.type === "tool_result") consola.debug(`[Tools] tool_result in message: id=${block.tool_use_id}, is_error=${block.is_error ?? false}`);
|
|
6278
|
+
}
|
|
5240
6279
|
}
|
|
5241
6280
|
|
|
5242
6281
|
//#endregion
|
|
@@ -5350,13 +6389,18 @@ server.route("/history", historyRoutes);
|
|
|
5350
6389
|
|
|
5351
6390
|
//#endregion
|
|
5352
6391
|
//#region src/start.ts
|
|
6392
|
+
/** Format limit values as "Xk" or "?" if not available */
|
|
6393
|
+
function formatLimit(value) {
|
|
6394
|
+
return value ? `${Math.round(value / 1e3)}k` : "?";
|
|
6395
|
+
}
|
|
5353
6396
|
function formatModelInfo(model) {
|
|
5354
6397
|
const limits = model.capabilities?.limits;
|
|
5355
|
-
const contextK = limits?.
|
|
5356
|
-
const
|
|
6398
|
+
const contextK = formatLimit(limits?.max_context_window_tokens);
|
|
6399
|
+
const promptK = formatLimit(limits?.max_prompt_tokens);
|
|
6400
|
+
const outputK = formatLimit(limits?.max_output_tokens);
|
|
5357
6401
|
const features = [model.capabilities?.supports?.tool_calls && "tools", model.preview && "preview"].filter(Boolean).join(", ");
|
|
5358
6402
|
const featureStr = features ? ` (${features})` : "";
|
|
5359
|
-
return ` - ${model.id.
|
|
6403
|
+
return ` - ${model.id.length > 30 ? `${model.id.slice(0, 27)}...` : model.id.padEnd(30)} ctx:${contextK.padStart(5)} in:${promptK.padStart(5)} out:${outputK.padStart(4)}` + featureStr;
|
|
5360
6404
|
}
|
|
5361
6405
|
async function runServer(options) {
|
|
5362
6406
|
consola.info(`copilot-api v${package_default.version}`);
|
|
@@ -5371,7 +6415,9 @@ async function runServer(options) {
|
|
|
5371
6415
|
state.manualApprove = options.manual;
|
|
5372
6416
|
state.showToken = options.showToken;
|
|
5373
6417
|
state.autoTruncate = options.autoTruncate;
|
|
5374
|
-
state.
|
|
6418
|
+
state.compressToolResults = options.compressToolResults;
|
|
6419
|
+
state.redirectAnthropic = options.redirectAnthropic;
|
|
6420
|
+
state.rewriteAnthropicTools = options.rewriteAnthropicTools;
|
|
5375
6421
|
if (options.rateLimit) initAdaptiveRateLimiter({
|
|
5376
6422
|
baseRetryIntervalSeconds: options.retryInterval,
|
|
5377
6423
|
requestIntervalSeconds: options.requestInterval,
|
|
@@ -5380,7 +6426,9 @@ async function runServer(options) {
|
|
|
5380
6426
|
});
|
|
5381
6427
|
else consola.info("Rate limiting disabled");
|
|
5382
6428
|
if (!options.autoTruncate) consola.info("Auto-truncate disabled");
|
|
5383
|
-
if (
|
|
6429
|
+
if (options.compressToolResults) consola.info("Tool result compression enabled");
|
|
6430
|
+
if (options.redirectAnthropic) consola.info("Anthropic API redirect enabled (using OpenAI translation)");
|
|
6431
|
+
if (!options.rewriteAnthropicTools) consola.info("Anthropic server-side tools rewrite disabled (passing through unchanged)");
|
|
5384
6432
|
initHistory(options.history, options.historyLimit);
|
|
5385
6433
|
if (options.history) {
|
|
5386
6434
|
const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
|
|
@@ -5527,10 +6575,20 @@ const start = defineCommand({
|
|
|
5527
6575
|
default: false,
|
|
5528
6576
|
description: "Disable automatic conversation history truncation when exceeding limits"
|
|
5529
6577
|
},
|
|
5530
|
-
"
|
|
6578
|
+
"compress-tool-results": {
|
|
6579
|
+
type: "boolean",
|
|
6580
|
+
default: false,
|
|
6581
|
+
description: "Compress old tool_result content before truncating messages (may lose context details)"
|
|
6582
|
+
},
|
|
6583
|
+
"redirect-anthropic": {
|
|
6584
|
+
type: "boolean",
|
|
6585
|
+
default: false,
|
|
6586
|
+
description: "Redirect Anthropic models through OpenAI translation (instead of direct API)"
|
|
6587
|
+
},
|
|
6588
|
+
"no-rewrite-anthropic-tools": {
|
|
5531
6589
|
type: "boolean",
|
|
5532
6590
|
default: false,
|
|
5533
|
-
description: "
|
|
6591
|
+
description: "Don't rewrite Anthropic server-side tools (web_search, etc.) to custom tool format"
|
|
5534
6592
|
}
|
|
5535
6593
|
},
|
|
5536
6594
|
run({ args }) {
|
|
@@ -5552,7 +6610,9 @@ const start = defineCommand({
|
|
|
5552
6610
|
history: !args["no-history"],
|
|
5553
6611
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
5554
6612
|
autoTruncate: !args["no-auto-truncate"],
|
|
5555
|
-
|
|
6613
|
+
compressToolResults: args["compress-tool-results"],
|
|
6614
|
+
redirectAnthropic: args["redirect-anthropic"],
|
|
6615
|
+
rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"]
|
|
5556
6616
|
});
|
|
5557
6617
|
}
|
|
5558
6618
|
});
|