@hsupu/copilot-api 0.7.11 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -17,7 +17,6 @@ import pc from "picocolors";
17
17
  import { Hono } from "hono";
18
18
  import { cors } from "hono/cors";
19
19
  import { streamSSE } from "hono/streaming";
20
- import { countTokens } from "@anthropic-ai/tokenizer";
21
20
  import { events } from "fetch-event-stream";
22
21
 
23
22
  //#region src/lib/paths.ts
@@ -49,7 +48,9 @@ const state = {
49
48
  showToken: false,
50
49
  verbose: false,
51
50
  autoTruncate: true,
52
- directAnthropicApi: true
51
+ compressToolResults: false,
52
+ redirectAnthropic: false,
53
+ rewriteAnthropicTools: true
53
54
  };
54
55
 
55
56
  //#endregion
@@ -93,27 +94,78 @@ const GITHUB_BASE_URL = "https://github.com";
93
94
  const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
94
95
  const GITHUB_APP_SCOPES = ["read:user"].join(" ");
95
96
 
97
+ //#endregion
98
+ //#region src/lib/auto-truncate-common.ts
99
+ const DEFAULT_AUTO_TRUNCATE_CONFIG = {
100
+ safetyMarginPercent: 2,
101
+ maxRequestBodyBytes: 510 * 1024,
102
+ preserveRecentPercent: .7
103
+ };
104
+ /** Dynamic byte limit that adjusts based on 413 errors */
105
+ let dynamicByteLimit = null;
106
+ /**
107
+ * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
108
+ */
109
+ function onRequestTooLarge(failingBytes) {
110
+ const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
111
+ dynamicByteLimit = newLimit;
112
+ consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
113
+ }
114
+ /** Get the current effective byte limit */
115
+ function getEffectiveByteLimitBytes() {
116
+ return dynamicByteLimit ?? DEFAULT_AUTO_TRUNCATE_CONFIG.maxRequestBodyBytes;
117
+ }
118
+ /** Dynamic token limits per model, adjusted based on token limit errors */
119
+ const dynamicTokenLimits = /* @__PURE__ */ new Map();
120
+ /**
121
+ * Called when a token limit error (400) occurs.
122
+ * Adjusts the token limit for the specific model to 95% of the reported limit.
123
+ */
124
+ function onTokenLimitExceeded(modelId, reportedLimit) {
125
+ const newLimit = Math.floor(reportedLimit * .95);
126
+ const previous = dynamicTokenLimits.get(modelId);
127
+ if (!previous || newLimit < previous) {
128
+ dynamicTokenLimits.set(modelId, newLimit);
129
+ consola.info(`[AutoTruncate] Adjusted token limit for ${modelId}: ${reportedLimit} reported → ${newLimit} effective`);
130
+ }
131
+ }
132
+ /**
133
+ * Get the effective token limit for a model.
134
+ * Returns the dynamic limit if set, otherwise null to use model capabilities.
135
+ */
136
+ function getEffectiveTokenLimit(modelId) {
137
+ return dynamicTokenLimits.get(modelId) ?? null;
138
+ }
139
+
96
140
  //#endregion
97
141
  //#region src/lib/error.ts
98
142
  var HTTPError = class HTTPError extends Error {
99
143
  status;
100
144
  responseText;
101
- constructor(message, status, responseText) {
145
+ /** Model ID that caused the error (if known) */
146
+ modelId;
147
+ constructor(message, status, responseText, modelId) {
102
148
  super(message);
103
149
  this.status = status;
104
150
  this.responseText = responseText;
151
+ this.modelId = modelId;
105
152
  }
106
- static async fromResponse(message, response) {
153
+ static async fromResponse(message, response, modelId) {
107
154
  const text = await response.text();
108
- return new HTTPError(message, response.status, text);
155
+ return new HTTPError(message, response.status, text, modelId);
109
156
  }
110
157
  };
111
158
  /** Parse token limit info from error message */
112
159
  function parseTokenLimitError(message) {
113
- const match = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
114
- if (match) return {
115
- current: Number.parseInt(match[1], 10),
116
- limit: Number.parseInt(match[2], 10)
160
+ const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
161
+ if (openaiMatch) return {
162
+ current: Number.parseInt(openaiMatch[1], 10),
163
+ limit: Number.parseInt(openaiMatch[2], 10)
164
+ };
165
+ const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
166
+ if (anthropicMatch) return {
167
+ current: Number.parseInt(anthropicMatch[1], 10),
168
+ limit: Number.parseInt(anthropicMatch[2], 10)
117
169
  };
118
170
  return null;
119
171
  }
@@ -150,11 +202,10 @@ function formatRateLimitError(copilotMessage) {
150
202
  };
151
203
  }
152
204
  function forwardError(c, error) {
153
- consola.error("Error occurred:", error);
154
205
  if (error instanceof HTTPError) {
155
206
  if (error.status === 413) {
156
207
  const formattedError = formatRequestTooLargeError();
157
- consola.debug("Returning formatted 413 error:", formattedError);
208
+ consola.warn(`HTTP 413: Request too large`);
158
209
  return c.json(formattedError, 413);
159
210
  }
160
211
  let errorJson;
@@ -163,26 +214,38 @@ function forwardError(c, error) {
163
214
  } catch {
164
215
  errorJson = error.responseText;
165
216
  }
166
- consola.error("HTTP error:", errorJson);
167
217
  const copilotError = errorJson;
168
218
  if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
169
219
  const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
170
220
  if (tokenInfo) {
221
+ if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
222
+ const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
223
+ consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
224
+ return c.json(formattedError, 400);
225
+ }
226
+ }
227
+ const anthropicError = errorJson;
228
+ if (anthropicError.error?.type === "invalid_request_error") {
229
+ const tokenInfo = parseTokenLimitError(anthropicError.error.message ?? "");
230
+ if (tokenInfo) {
231
+ if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
171
232
  const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
172
- consola.debug("Returning formatted token limit error:", formattedError);
233
+ consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
173
234
  return c.json(formattedError, 400);
174
235
  }
175
236
  }
176
237
  if (error.status === 429 || copilotError.error?.code === "rate_limited") {
177
238
  const formattedError = formatRateLimitError(copilotError.error?.message);
178
- consola.debug("Returning formatted rate limit error:", formattedError);
239
+ consola.warn(`HTTP 429: Rate limit exceeded`);
179
240
  return c.json(formattedError, 429);
180
241
  }
242
+ consola.error(`HTTP ${error.status}:`, errorJson);
181
243
  return c.json({ error: {
182
244
  message: error.responseText,
183
245
  type: "error"
184
246
  } }, error.status);
185
247
  }
248
+ consola.error("Unexpected error:", error);
186
249
  return c.json({ error: {
187
250
  message: error.message,
188
251
  type: "error"
@@ -308,6 +371,7 @@ async function pollAccessToken(deviceCode) {
308
371
  //#region src/lib/token.ts
309
372
  const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
310
373
  const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
374
+ let copilotTokenRefreshTimer = null;
311
375
  /**
312
376
  * Refresh the Copilot token with exponential backoff retry.
313
377
  * Returns the new token on success, or null if all retries fail.
@@ -326,20 +390,34 @@ async function refreshCopilotTokenWithRetry(maxRetries = 3) {
326
390
  consola.error("All token refresh attempts failed:", lastError);
327
391
  return null;
328
392
  }
393
+ /**
394
+ * Clear any existing token refresh timer.
395
+ * Call this before setting up a new timer or during cleanup.
396
+ */
397
+ function clearCopilotTokenRefresh() {
398
+ if (copilotTokenRefreshTimer) {
399
+ clearInterval(copilotTokenRefreshTimer);
400
+ copilotTokenRefreshTimer = null;
401
+ }
402
+ }
329
403
  const setupCopilotToken = async () => {
330
404
  const { token, refresh_in } = await getCopilotToken();
331
405
  state.copilotToken = token;
332
406
  consola.debug("GitHub Copilot Token fetched successfully!");
333
407
  if (state.showToken) consola.info("Copilot token:", token);
334
- const refreshInterval = (refresh_in - 60) * 1e3;
335
- setInterval(async () => {
408
+ const refreshInterval = Math.max((refresh_in - 60) * 1e3, 60 * 1e3);
409
+ clearCopilotTokenRefresh();
410
+ copilotTokenRefreshTimer = setInterval(() => {
336
411
  consola.debug("Refreshing Copilot token");
337
- const newToken = await refreshCopilotTokenWithRetry();
338
- if (newToken) {
339
- state.copilotToken = newToken;
340
- consola.debug("Copilot token refreshed");
341
- if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
342
- } else consola.error("Failed to refresh Copilot token after retries, using existing token");
412
+ refreshCopilotTokenWithRetry().then((newToken) => {
413
+ if (newToken) {
414
+ state.copilotToken = newToken;
415
+ consola.debug("Copilot token refreshed");
416
+ if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
417
+ } else consola.error("Failed to refresh Copilot token after retries, using existing token");
418
+ }).catch((error) => {
419
+ consola.error("Unexpected error during token refresh:", error);
420
+ });
343
421
  }, refreshInterval);
344
422
  };
345
423
  async function setupGitHubToken(options) {
@@ -621,7 +699,7 @@ const logout = defineCommand({
621
699
  });
622
700
 
623
701
  //#endregion
624
- //#region src/patch-claude.ts
702
+ //#region src/patch-claude-code.ts
625
703
  const SUPPORTED_VERSIONS = {
626
704
  v2a: {
627
705
  min: "2.0.0",
@@ -941,7 +1019,7 @@ const patchClaude = defineCommand({
941
1019
  //#endregion
942
1020
  //#region package.json
943
1021
  var name = "@hsupu/copilot-api";
944
- var version = "0.7.11";
1022
+ var version = "0.7.12";
945
1023
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
946
1024
  var keywords = [
947
1025
  "proxy",
@@ -969,6 +1047,9 @@ var scripts = {
969
1047
  "prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
970
1048
  "release": "bumpp && npm publish --access public",
971
1049
  "start": "NODE_ENV=production bun run ./src/main.ts",
1050
+ "test": "bun test tests/*.test.ts",
1051
+ "test:all": "bun test tests/*.test.ts && bun test tests/integration/",
1052
+ "test:integration": "bun test tests/integration/",
972
1053
  "typecheck": "tsc"
973
1054
  };
974
1055
  var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
@@ -1021,7 +1102,7 @@ var package_default = {
1021
1102
 
1022
1103
  //#endregion
1023
1104
  //#region src/lib/adaptive-rate-limiter.ts
1024
- const DEFAULT_CONFIG$1 = {
1105
+ const DEFAULT_CONFIG = {
1025
1106
  baseRetryIntervalSeconds: 10,
1026
1107
  maxRetryIntervalSeconds: 120,
1027
1108
  requestIntervalSeconds: 10,
@@ -1050,7 +1131,7 @@ var AdaptiveRateLimiter = class {
1050
1131
  recoveryStepIndex = 0;
1051
1132
  constructor(config = {}) {
1052
1133
  this.config = {
1053
- ...DEFAULT_CONFIG$1,
1134
+ ...DEFAULT_CONFIG,
1054
1135
  ...config
1055
1136
  };
1056
1137
  }
@@ -1292,12 +1373,12 @@ let rateLimiterInstance = null;
1292
1373
  */
1293
1374
  function initAdaptiveRateLimiter(config = {}) {
1294
1375
  rateLimiterInstance = new AdaptiveRateLimiter(config);
1295
- const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
1296
- const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
1297
- const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
1298
- const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
1299
- const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
1300
- const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
1376
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
1377
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
1378
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
1379
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
1380
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
1381
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
1301
1382
  consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
1302
1383
  }
1303
1384
  /**
@@ -1918,6 +1999,7 @@ var RequestTracker = class {
1918
1999
  requests = /* @__PURE__ */ new Map();
1919
2000
  renderer = null;
1920
2001
  completedQueue = [];
2002
+ completedTimeouts = /* @__PURE__ */ new Map();
1921
2003
  historySize = 5;
1922
2004
  completedDisplayMs = 2e3;
1923
2005
  setRenderer(renderer) {
@@ -1977,11 +2059,22 @@ var RequestTracker = class {
1977
2059
  this.renderer?.onRequestComplete(request);
1978
2060
  this.requests.delete(id);
1979
2061
  this.completedQueue.push(request);
1980
- while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
1981
- setTimeout(() => {
2062
+ while (this.completedQueue.length > this.historySize) {
2063
+ const removed = this.completedQueue.shift();
2064
+ if (removed) {
2065
+ const timeoutId$1 = this.completedTimeouts.get(removed.id);
2066
+ if (timeoutId$1) {
2067
+ clearTimeout(timeoutId$1);
2068
+ this.completedTimeouts.delete(removed.id);
2069
+ }
2070
+ }
2071
+ }
2072
+ const timeoutId = setTimeout(() => {
1982
2073
  const idx = this.completedQueue.indexOf(request);
1983
2074
  if (idx !== -1) this.completedQueue.splice(idx, 1);
2075
+ this.completedTimeouts.delete(id);
1984
2076
  }, this.completedDisplayMs);
2077
+ this.completedTimeouts.set(id, timeoutId);
1985
2078
  }
1986
2079
  /**
1987
2080
  * Mark request as failed with error
@@ -2016,11 +2109,13 @@ var RequestTracker = class {
2016
2109
  return this.requests.get(id);
2017
2110
  }
2018
2111
  /**
2019
- * Clear all tracked requests
2112
+ * Clear all tracked requests and pending timeouts
2020
2113
  */
2021
2114
  clear() {
2022
2115
  this.requests.clear();
2023
2116
  this.completedQueue = [];
2117
+ for (const timeoutId of this.completedTimeouts.values()) clearTimeout(timeoutId);
2118
+ this.completedTimeouts.clear();
2024
2119
  }
2025
2120
  };
2026
2121
  const requestTracker = new RequestTracker();
@@ -2171,6 +2266,14 @@ const getTokenizerFromModel = (model) => {
2171
2266
  return model.capabilities?.tokenizer || "o200k_base";
2172
2267
  };
2173
2268
  /**
2269
+ * Count tokens in a text string using the model's tokenizer.
2270
+ * This is a simple wrapper for counting tokens in plain text.
2271
+ */
2272
+ const countTextTokens = async (text, model) => {
2273
+ const tokenizer = getTokenizerFromModel(model);
2274
+ return (await getEncodeChatFunction(tokenizer)).encode(text).length;
2275
+ };
2276
+ /**
2174
2277
  * Get model-specific constants for token calculation.
2175
2278
  * These values are empirically determined based on OpenAI's function calling token overhead.
2176
2279
  * - funcInit: Tokens for initializing a function definition
@@ -2276,61 +2379,11 @@ const numTokensForTools = (tools, encoder, constants) => {
2276
2379
  return funcTokenCount;
2277
2380
  };
2278
2381
  /**
2279
- * Check if a model is an Anthropic model
2280
- */
2281
- function isAnthropicModel(model) {
2282
- return model.vendor === "Anthropic";
2283
- }
2284
- /**
2285
- * Convert a message to plain text for Anthropic tokenizer
2286
- */
2287
- function messageToText(message) {
2288
- const parts = [];
2289
- parts.push(`${message.role}:`);
2290
- if (typeof message.content === "string") parts.push(message.content);
2291
- else if (Array.isArray(message.content)) {
2292
- for (const part of message.content) if ("text" in part && part.text) parts.push(part.text);
2293
- else if (part.type === "image_url") parts.push("[image]");
2294
- }
2295
- if (message.tool_calls) for (const tc of message.tool_calls) parts.push(JSON.stringify(tc));
2296
- if ("tool_call_id" in message && message.tool_call_id) parts.push(`tool_call_id:${message.tool_call_id}`);
2297
- return parts.join("\n");
2298
- }
2299
- /**
2300
- * Convert tools to text for Anthropic tokenizer
2301
- */
2302
- function toolsToText(tools) {
2303
- return tools.map((tool) => JSON.stringify(tool)).join("\n");
2304
- }
2305
- /**
2306
- * Calculate token count using Anthropic's official tokenizer
2307
- */
2308
- function getAnthropicTokenCount(payload) {
2309
- const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
2310
- const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
2311
- const inputText = inputMessages.map((msg) => messageToText(msg)).join("\n\n");
2312
- const outputText = outputMessages.map((msg) => messageToText(msg)).join("\n\n");
2313
- let inputTokens = countTokens(inputText);
2314
- let outputTokens = countTokens(outputText);
2315
- if (payload.tools && payload.tools.length > 0) {
2316
- const toolsText = toolsToText(payload.tools);
2317
- inputTokens += countTokens(toolsText);
2318
- }
2319
- inputTokens += inputMessages.length * 3;
2320
- outputTokens += outputMessages.length * 3;
2321
- inputTokens += 3;
2322
- return {
2323
- input: inputTokens,
2324
- output: outputTokens
2325
- };
2326
- }
2327
- /**
2328
2382
  * Calculate the token count of messages.
2329
- * Uses Anthropic's official tokenizer for Anthropic models,
2330
- * and GPT tokenizers for other models.
2383
+ * Uses the tokenizer specified by the GitHub Copilot API model info.
2384
+ * All models (including Claude) use GPT tokenizers (o200k_base or cl100k_base).
2331
2385
  */
2332
2386
  const getTokenCount = async (payload, model) => {
2333
- if (isAnthropicModel(model)) return getAnthropicTokenCount(payload);
2334
2387
  const tokenizer = getTokenizerFromModel(model);
2335
2388
  const encoder = await getEncodeChatFunction(tokenizer);
2336
2389
  const simplifiedMessages = payload.messages;
@@ -2347,32 +2400,18 @@ const getTokenCount = async (payload, model) => {
2347
2400
  };
2348
2401
 
2349
2402
  //#endregion
2350
- //#region src/lib/auto-truncate.ts
2351
- const DEFAULT_CONFIG = {
2352
- safetyMarginPercent: 2,
2353
- maxRequestBodyBytes: 510 * 1024
2354
- };
2355
- /** Dynamic byte limit that adjusts based on 413 errors */
2356
- let dynamicByteLimit = null;
2357
- /**
2358
- * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
2359
- */
2360
- function onRequestTooLarge(failingBytes) {
2361
- const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2362
- dynamicByteLimit = newLimit;
2363
- consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
2364
- }
2365
- function calculateLimits(model, config) {
2366
- const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2403
+ //#region src/lib/auto-truncate-openai.ts
2404
+ function calculateLimits$1(model, config) {
2405
+ const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2367
2406
  const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
2368
- const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
2407
+ const byteLimit = getEffectiveByteLimitBytes();
2369
2408
  return {
2370
2409
  tokenLimit,
2371
2410
  byteLimit
2372
2411
  };
2373
2412
  }
2374
2413
  /** Estimate tokens for a single message (fast approximation) */
2375
- function estimateMessageTokens(msg) {
2414
+ function estimateMessageTokens$1(msg) {
2376
2415
  let charCount = 0;
2377
2416
  if (typeof msg.content === "string") charCount = msg.content.length;
2378
2417
  else if (Array.isArray(msg.content)) {
@@ -2383,7 +2422,7 @@ function estimateMessageTokens(msg) {
2383
2422
  return Math.ceil(charCount / 4) + 10;
2384
2423
  }
2385
2424
  /** Get byte size of a message */
2386
- function getMessageBytes(msg) {
2425
+ function getMessageBytes$1(msg) {
2387
2426
  return JSON.stringify(msg).length;
2388
2427
  }
2389
2428
  /** Extract system/developer messages from the beginning */
@@ -2405,7 +2444,7 @@ function getToolCallIds(msg) {
2405
2444
  return [];
2406
2445
  }
2407
2446
  /** Filter orphaned tool_result messages */
2408
- function filterOrphanedToolResults(messages) {
2447
+ function filterOrphanedToolResults$1(messages) {
2409
2448
  const toolUseIds = /* @__PURE__ */ new Set();
2410
2449
  for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
2411
2450
  let removedCount = 0;
@@ -2416,22 +2455,127 @@ function filterOrphanedToolResults(messages) {
2416
2455
  }
2417
2456
  return true;
2418
2457
  });
2419
- if (removedCount > 0) consola.debug(`[AutoTruncate] Filtered ${removedCount} orphaned tool_result`);
2458
+ if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_result`);
2420
2459
  return filtered;
2421
2460
  }
2461
+ /** Get tool_result IDs from all tool messages */
2462
+ function getToolResultIds$1(messages) {
2463
+ const ids = /* @__PURE__ */ new Set();
2464
+ for (const msg of messages) if (msg.role === "tool" && msg.tool_call_id) ids.add(msg.tool_call_id);
2465
+ return ids;
2466
+ }
2467
+ /** Filter orphaned tool_use messages (those without matching tool_result) */
2468
+ function filterOrphanedToolUse$1(messages) {
2469
+ const toolResultIds = getToolResultIds$1(messages);
2470
+ const result = [];
2471
+ let removedCount = 0;
2472
+ for (const msg of messages) {
2473
+ if (msg.role === "assistant" && msg.tool_calls) {
2474
+ const filteredToolCalls = msg.tool_calls.filter((tc) => {
2475
+ if (!toolResultIds.has(tc.id)) {
2476
+ removedCount++;
2477
+ return false;
2478
+ }
2479
+ return true;
2480
+ });
2481
+ if (filteredToolCalls.length === 0) {
2482
+ if (msg.content) result.push({
2483
+ ...msg,
2484
+ tool_calls: void 0
2485
+ });
2486
+ continue;
2487
+ }
2488
+ result.push({
2489
+ ...msg,
2490
+ tool_calls: filteredToolCalls
2491
+ });
2492
+ continue;
2493
+ }
2494
+ result.push(msg);
2495
+ }
2496
+ if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_use`);
2497
+ return result;
2498
+ }
2422
2499
  /** Ensure messages start with a user message */
2423
- function ensureStartsWithUser(messages) {
2500
+ function ensureStartsWithUser$1(messages) {
2424
2501
  let startIndex = 0;
2425
2502
  while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2426
- if (startIndex > 0) consola.debug(`[AutoTruncate] Skipped ${startIndex} leading non-user messages`);
2503
+ if (startIndex > 0) consola.debug(`[AutoTruncate:OpenAI] Skipped ${startIndex} leading non-user messages`);
2427
2504
  return messages.slice(startIndex);
2428
2505
  }
2506
+ /** Threshold for large tool message content (bytes) */
2507
+ const LARGE_TOOL_RESULT_THRESHOLD$1 = 1e4;
2508
+ /** Maximum length for compressed tool_result summary */
2509
+ const COMPRESSED_SUMMARY_LENGTH$1 = 500;
2510
+ /**
2511
+ * Compress a large tool message content to a summary.
2512
+ * Keeps the first and last portions with a note about truncation.
2513
+ */
2514
+ function compressToolResultContent$1(content) {
2515
+ if (content.length <= LARGE_TOOL_RESULT_THRESHOLD$1) return content;
2516
+ const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH$1 / 2);
2517
+ const start$1 = content.slice(0, halfLen);
2518
+ const end = content.slice(-halfLen);
2519
+ const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH$1;
2520
+ return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
2521
+ }
2522
+ /**
2523
+ * Smart compression strategy for OpenAI format:
2524
+ * 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
2525
+ * 2. Messages before that threshold get their tool content compressed
2526
+ * 3. Returns compressed messages and stats
2527
+ *
2528
+ * @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
2529
+ */
2530
+ function smartCompressToolResults$1(messages, tokenLimit, byteLimit, preservePercent) {
2531
+ const n = messages.length;
2532
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
2533
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
2534
+ for (let i = n - 1; i >= 0; i--) {
2535
+ const msg = messages[i];
2536
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
2537
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
2538
+ }
2539
+ const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
2540
+ const preserveByteLimit = Math.floor(byteLimit * preservePercent);
2541
+ let thresholdIndex = n;
2542
+ for (let i = n - 1; i >= 0; i--) {
2543
+ if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
2544
+ thresholdIndex = i + 1;
2545
+ break;
2546
+ }
2547
+ thresholdIndex = i;
2548
+ }
2549
+ if (thresholdIndex >= n) return {
2550
+ messages,
2551
+ compressedCount: 0,
2552
+ compressThresholdIndex: n
2553
+ };
2554
+ const result = [];
2555
+ let compressedCount = 0;
2556
+ for (const [i, msg] of messages.entries()) {
2557
+ if (i < thresholdIndex && msg.role === "tool" && typeof msg.content === "string" && msg.content.length > LARGE_TOOL_RESULT_THRESHOLD$1) {
2558
+ compressedCount++;
2559
+ result.push({
2560
+ ...msg,
2561
+ content: compressToolResultContent$1(msg.content)
2562
+ });
2563
+ continue;
2564
+ }
2565
+ result.push(msg);
2566
+ }
2567
+ return {
2568
+ messages: result,
2569
+ compressedCount,
2570
+ compressThresholdIndex: thresholdIndex
2571
+ };
2572
+ }
2429
2573
  /**
2430
2574
  * Find the optimal index from which to preserve messages.
2431
2575
  * Uses binary search with pre-calculated cumulative sums.
2432
2576
  * Returns the smallest index where the preserved portion fits within limits.
2433
2577
  */
2434
- function findOptimalPreserveIndex(params) {
2578
+ function findOptimalPreserveIndex$1(params) {
2435
2579
  const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
2436
2580
  if (messages.length === 0) return 0;
2437
2581
  const markerBytes = 200;
@@ -2443,8 +2587,8 @@ function findOptimalPreserveIndex(params) {
2443
2587
  const cumBytes = Array.from({ length: n + 1 }, () => 0);
2444
2588
  for (let i = n - 1; i >= 0; i--) {
2445
2589
  const msg = messages[i];
2446
- cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
2447
- cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
2590
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
2591
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
2448
2592
  }
2449
2593
  let left = 0;
2450
2594
  let right = n;
@@ -2458,12 +2602,12 @@ function findOptimalPreserveIndex(params) {
2458
2602
  /**
2459
2603
  * Check if payload needs compaction based on model limits or byte size.
2460
2604
  */
2461
- async function checkNeedsCompaction(payload, model, config = {}) {
2605
+ async function checkNeedsCompactionOpenAI(payload, model, config = {}) {
2462
2606
  const cfg = {
2463
- ...DEFAULT_CONFIG,
2607
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
2464
2608
  ...config
2465
2609
  };
2466
- const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2610
+ const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
2467
2611
  const currentTokens = (await getTokenCount(payload, model)).input;
2468
2612
  const currentBytes = JSON.stringify(payload).length;
2469
2613
  const exceedsTokens = currentTokens > tokenLimit;
@@ -2481,23 +2625,90 @@ async function checkNeedsCompaction(payload, model, config = {}) {
2481
2625
  reason
2482
2626
  };
2483
2627
  }
2484
- /** Create a truncation marker message */
2485
- function createTruncationMarker(removedCount) {
2628
+ /**
2629
+ * Generate a summary of removed messages for context.
2630
+ * Extracts key information like tool calls and topics.
2631
+ */
2632
+ function generateRemovedMessagesSummary$1(removedMessages) {
2633
+ const toolCalls = [];
2634
+ let userMessageCount = 0;
2635
+ let assistantMessageCount = 0;
2636
+ for (const msg of removedMessages) {
2637
+ if (msg.role === "user") userMessageCount++;
2638
+ else if (msg.role === "assistant") assistantMessageCount++;
2639
+ if (msg.tool_calls) {
2640
+ for (const tc of msg.tool_calls) if (tc.function.name) toolCalls.push(tc.function.name);
2641
+ }
2642
+ }
2643
+ const parts = [];
2644
+ if (userMessageCount > 0 || assistantMessageCount > 0) {
2645
+ const breakdown = [];
2646
+ if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
2647
+ if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
2648
+ parts.push(`Messages: ${breakdown.join(", ")}`);
2649
+ }
2650
+ if (toolCalls.length > 0) {
2651
+ const uniqueTools = [...new Set(toolCalls)];
2652
+ const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
2653
+ parts.push(`Tools used: ${displayTools.join(", ")}`);
2654
+ }
2655
+ return parts.join(". ");
2656
+ }
2657
+ /**
2658
+ * Add a compression notice to the system message.
2659
+ * Informs the model that some tool content has been compressed.
2660
+ */
2661
+ function addCompressionNotice$1(payload, compressedCount) {
2662
+ const notice = `\n\n[CONTEXT NOTE]\n${compressedCount} large tool results have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]`;
2663
+ const messages = [...payload.messages];
2664
+ for (let i = messages.length - 1; i >= 0; i--) {
2665
+ const msg = messages[i];
2666
+ if (msg.role === "system" || msg.role === "developer") {
2667
+ if (typeof msg.content === "string") messages[i] = {
2668
+ ...msg,
2669
+ content: msg.content + notice
2670
+ };
2671
+ break;
2672
+ }
2673
+ }
2674
+ return {
2675
+ ...payload,
2676
+ messages
2677
+ };
2678
+ }
2679
+ /**
2680
+ * Create truncation context to append to system messages.
2681
+ */
2682
+ function createTruncationSystemContext$1(removedCount, compressedCount, summary) {
2683
+ let context = `\n\n[CONVERSATION CONTEXT]\n`;
2684
+ if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
2685
+ if (compressedCount > 0) context += `${compressedCount} large tool results have been compressed.\n`;
2686
+ if (summary) context += `Summary of removed content: ${summary}\n`;
2687
+ context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]";
2688
+ return context;
2689
+ }
2690
+ /** Create a truncation marker message (fallback when no system message) */
2691
+ function createTruncationMarker$2(removedCount, compressedCount, summary) {
2692
+ const parts = [];
2693
+ if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
2694
+ if (compressedCount > 0) parts.push(`${compressedCount} tool results compressed`);
2695
+ let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
2696
+ if (summary) content += `\n[Summary: ${summary}]`;
2486
2697
  return {
2487
2698
  role: "user",
2488
- content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
2699
+ content
2489
2700
  };
2490
2701
  }
2491
2702
  /**
2492
2703
  * Perform auto-truncation on a payload that exceeds limits.
2493
2704
  * Uses binary search to find the optimal truncation point.
2494
2705
  */
2495
- async function autoTruncate(payload, model, config = {}) {
2706
+ async function autoTruncateOpenAI(payload, model, config = {}) {
2496
2707
  const cfg = {
2497
- ...DEFAULT_CONFIG,
2708
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
2498
2709
  ...config
2499
2710
  };
2500
- const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2711
+ const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
2501
2712
  const originalBytes = JSON.stringify(payload).length;
2502
2713
  const originalTokens = (await getTokenCount(payload, model)).input;
2503
2714
  if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
@@ -2509,18 +2720,44 @@ async function autoTruncate(payload, model, config = {}) {
2509
2720
  };
2510
2721
  const exceedsTokens = originalTokens > tokenLimit;
2511
2722
  const exceedsBytes = originalBytes > byteLimit;
2512
- let reason;
2513
- if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2514
- else if (exceedsBytes) reason = "size";
2515
- else reason = "tokens";
2516
- consola.info(`[AutoTruncate] Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2517
- const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2518
- const messagesJson = JSON.stringify(payload.messages);
2519
- const payloadOverhead = originalBytes - messagesJson.length;
2520
- const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2521
- const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2522
- consola.debug(`[AutoTruncate] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2523
- const preserveIndex = findOptimalPreserveIndex({
2723
+ let workingMessages = payload.messages;
2724
+ let compressedCount = 0;
2725
+ if (state.compressToolResults) {
2726
+ const compressionResult = smartCompressToolResults$1(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
2727
+ workingMessages = compressionResult.messages;
2728
+ compressedCount = compressionResult.compressedCount;
2729
+ const compressedPayload = {
2730
+ ...payload,
2731
+ messages: workingMessages
2732
+ };
2733
+ const compressedBytes = JSON.stringify(compressedPayload).length;
2734
+ const compressedTokenCount = await getTokenCount(compressedPayload, model);
2735
+ if (compressedTokenCount.input <= tokenLimit && compressedBytes <= byteLimit) {
2736
+ let reason$1 = "tokens";
2737
+ if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
2738
+ else if (exceedsBytes) reason$1 = "size";
2739
+ consola.info(`[AutoTruncate:OpenAI] ${reason$1}: ${originalTokens}→${compressedTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
2740
+ const noticePayload = addCompressionNotice$1(compressedPayload, compressedCount);
2741
+ const noticeTokenCount = await getTokenCount(noticePayload, model);
2742
+ return {
2743
+ payload: noticePayload,
2744
+ wasCompacted: true,
2745
+ originalTokens,
2746
+ compactedTokens: noticeTokenCount.input,
2747
+ removedMessageCount: 0
2748
+ };
2749
+ }
2750
+ }
2751
+ const { systemMessages, conversationMessages } = extractSystemMessages(workingMessages);
2752
+ const messagesJson = JSON.stringify(workingMessages);
2753
+ const payloadOverhead = JSON.stringify({
2754
+ ...payload,
2755
+ messages: workingMessages
2756
+ }).length - messagesJson.length;
2757
+ const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes$1(m) + 1, 0);
2758
+ const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens$1(m), 0);
2759
+ consola.debug(`[AutoTruncate:OpenAI] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2760
+ const preserveIndex = findOptimalPreserveIndex$1({
2524
2761
  messages: conversationMessages,
2525
2762
  systemBytes,
2526
2763
  systemTokens,
@@ -2529,7 +2766,7 @@ async function autoTruncate(payload, model, config = {}) {
2529
2766
  byteLimit
2530
2767
  });
2531
2768
  if (preserveIndex === 0) {
2532
- consola.warn("[AutoTruncate] Cannot truncate, system messages too large");
2769
+ consola.warn("[AutoTruncate:OpenAI] Cannot truncate, system messages too large");
2533
2770
  return {
2534
2771
  payload,
2535
2772
  wasCompacted: false,
@@ -2539,7 +2776,7 @@ async function autoTruncate(payload, model, config = {}) {
2539
2776
  };
2540
2777
  }
2541
2778
  if (preserveIndex >= conversationMessages.length) {
2542
- consola.warn("[AutoTruncate] Would need to remove all messages");
2779
+ consola.warn("[AutoTruncate:OpenAI] Would need to remove all messages");
2543
2780
  return {
2544
2781
  payload,
2545
2782
  wasCompacted: false,
@@ -2549,11 +2786,13 @@ async function autoTruncate(payload, model, config = {}) {
2549
2786
  };
2550
2787
  }
2551
2788
  let preserved = conversationMessages.slice(preserveIndex);
2552
- preserved = filterOrphanedToolResults(preserved);
2553
- preserved = ensureStartsWithUser(preserved);
2554
- preserved = filterOrphanedToolResults(preserved);
2789
+ preserved = filterOrphanedToolResults$1(preserved);
2790
+ preserved = filterOrphanedToolUse$1(preserved);
2791
+ preserved = ensureStartsWithUser$1(preserved);
2792
+ preserved = filterOrphanedToolResults$1(preserved);
2793
+ preserved = filterOrphanedToolUse$1(preserved);
2555
2794
  if (preserved.length === 0) {
2556
- consola.warn("[AutoTruncate] All messages filtered out after cleanup");
2795
+ consola.warn("[AutoTruncate:OpenAI] All messages filtered out after cleanup");
2557
2796
  return {
2558
2797
  payload,
2559
2798
  wasCompacted: false,
@@ -2562,20 +2801,36 @@ async function autoTruncate(payload, model, config = {}) {
2562
2801
  removedMessageCount: 0
2563
2802
  };
2564
2803
  }
2804
+ const removedMessages = conversationMessages.slice(0, preserveIndex);
2565
2805
  const removedCount = conversationMessages.length - preserved.length;
2566
- const marker = createTruncationMarker(removedCount);
2806
+ const summary = generateRemovedMessagesSummary$1(removedMessages);
2807
+ let newSystemMessages = systemMessages;
2808
+ let newMessages = preserved;
2809
+ if (systemMessages.length > 0) {
2810
+ const truncationContext = createTruncationSystemContext$1(removedCount, compressedCount, summary);
2811
+ const lastSystemIdx = systemMessages.length - 1;
2812
+ const lastSystem = systemMessages[lastSystemIdx];
2813
+ const updatedSystem = {
2814
+ ...lastSystem,
2815
+ content: typeof lastSystem.content === "string" ? lastSystem.content + truncationContext : lastSystem.content
2816
+ };
2817
+ newSystemMessages = [...systemMessages.slice(0, lastSystemIdx), updatedSystem];
2818
+ } else newMessages = [createTruncationMarker$2(removedCount, compressedCount, summary), ...preserved];
2567
2819
  const newPayload = {
2568
2820
  ...payload,
2569
- messages: [
2570
- ...systemMessages,
2571
- marker,
2572
- ...preserved
2573
- ]
2821
+ messages: [...newSystemMessages, ...newMessages]
2574
2822
  };
2575
2823
  const newBytes = JSON.stringify(newPayload).length;
2576
2824
  const newTokenCount = await getTokenCount(newPayload, model);
2577
- consola.info(`[AutoTruncate] ${originalTokens} ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2578
- if (newBytes > byteLimit) consola.warn(`[AutoTruncate] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2825
+ let reason = "tokens";
2826
+ if (exceedsTokens && exceedsBytes) reason = "tokens+size";
2827
+ else if (exceedsBytes) reason = "size";
2828
+ const actions = [];
2829
+ if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
2830
+ if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
2831
+ const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
2832
+ consola.info(`[AutoTruncate:OpenAI] ${reason}: ${originalTokens}→${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
2833
+ if (newBytes > byteLimit) consola.warn(`[AutoTruncate:OpenAI] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2579
2834
  return {
2580
2835
  payload: newPayload,
2581
2836
  wasCompacted: true,
@@ -2587,7 +2842,7 @@ async function autoTruncate(payload, model, config = {}) {
2587
2842
  /**
2588
2843
  * Create a marker to prepend to responses indicating auto-truncation occurred.
2589
2844
  */
2590
- function createTruncationResponseMarker(result) {
2845
+ function createTruncationResponseMarkerOpenAI(result) {
2591
2846
  if (!result.wasCompacted) return "";
2592
2847
  const reduction = result.originalTokens - result.compactedTokens;
2593
2848
  const percentage = Math.round(reduction / result.originalTokens * 100);
@@ -2611,7 +2866,7 @@ const createChatCompletions = async (payload) => {
2611
2866
  });
2612
2867
  if (!response.ok) {
2613
2868
  consola.error("Failed to create chat completions", response);
2614
- throw await HTTPError.fromResponse("Failed to create chat completions", response);
2869
+ throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
2615
2870
  }
2616
2871
  if (payload.stream) return events(response);
2617
2872
  return await response.json();
@@ -2661,6 +2916,18 @@ function failTracking(trackingId, error) {
2661
2916
  if (!trackingId) return;
2662
2917
  requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
2663
2918
  }
2919
+ /**
2920
+ * Create a marker to prepend to responses indicating auto-truncation occurred.
2921
+ * Works with both OpenAI and Anthropic truncate results.
2922
+ */
2923
+ function createTruncationMarker(result) {
2924
+ if (!result.wasCompacted) return "";
2925
+ const { originalTokens, compactedTokens, removedMessageCount } = result;
2926
+ if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
2927
+ const reduction = originalTokens - compactedTokens;
2928
+ const percentage = Math.round(reduction / originalTokens * 100);
2929
+ return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${percentage}% reduction)]`;
2930
+ }
2664
2931
  /** Record streaming error to history (works with any accumulator type) */
2665
2932
  function recordStreamError(opts) {
2666
2933
  const { acc, fallbackModel, ctx, error } = opts;
@@ -2689,7 +2956,7 @@ async function buildFinalPayload(payload, model) {
2689
2956
  };
2690
2957
  }
2691
2958
  try {
2692
- const check = await checkNeedsCompaction(payload, model);
2959
+ const check = await checkNeedsCompactionOpenAI(payload, model);
2693
2960
  consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2694
2961
  if (!check.needed) return {
2695
2962
  finalPayload: payload,
@@ -2700,7 +2967,7 @@ async function buildFinalPayload(payload, model) {
2700
2967
  else if (check.reason === "bytes") reasonText = "size";
2701
2968
  else reasonText = "tokens";
2702
2969
  consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
2703
- const truncateResult = await autoTruncate(payload, model);
2970
+ const truncateResult = await autoTruncateOpenAI(payload, model);
2704
2971
  return {
2705
2972
  finalPayload: truncateResult.payload,
2706
2973
  truncateResult
@@ -2840,7 +3107,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2840
3107
  consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
2841
3108
  let response = originalResponse;
2842
3109
  if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
2843
- const marker = createTruncationResponseMarker(ctx.truncateResult);
3110
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
2844
3111
  response = {
2845
3112
  ...response,
2846
3113
  choices: response.choices.map((choice$1, i) => i === 0 ? {
@@ -2909,7 +3176,7 @@ async function handleStreamingResponse$1(opts) {
2909
3176
  const acc = createStreamAccumulator();
2910
3177
  try {
2911
3178
  if (state.verbose && ctx.truncateResult?.wasCompacted) {
2912
- const marker = createTruncationResponseMarker(ctx.truncateResult);
3179
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
2913
3180
  const markerChunk = {
2914
3181
  id: `compact-marker-${Date.now()}`,
2915
3182
  object: "chat.completion.chunk",
@@ -4192,97 +4459,699 @@ historyRoutes.get("/", (c) => {
4192
4459
  });
4193
4460
 
4194
4461
  //#endregion
4195
- //#region src/routes/messages/utils.ts
4196
- function mapOpenAIStopReasonToAnthropic(finishReason) {
4197
- if (finishReason === null) return null;
4198
- return {
4199
- stop: "end_turn",
4200
- length: "max_tokens",
4201
- tool_calls: "tool_use",
4202
- content_filter: "end_turn"
4203
- }[finishReason];
4204
- }
4205
-
4206
- //#endregion
4207
- //#region src/routes/messages/non-stream-translation.ts
4208
- const OPENAI_TOOL_NAME_LIMIT = 64;
4462
+ //#region src/lib/auto-truncate-anthropic.ts
4209
4463
  /**
4210
- * Ensure all tool_use blocks have corresponding tool_result responses.
4211
- * This handles edge cases where conversation history may be incomplete:
4212
- * - Session interruptions where tool execution was cut off
4213
- * - Previous request failures
4214
- * - Client sending truncated history
4215
- *
4216
- * Adding placeholder responses prevents API errors and maintains protocol compliance.
4464
+ * Convert Anthropic message content to text for token counting.
4217
4465
  */
4218
- function fixMessageSequence(messages) {
4219
- const fixedMessages = [];
4220
- for (let i = 0; i < messages.length; i++) {
4221
- const message = messages[i];
4222
- fixedMessages.push(message);
4223
- if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
4224
- const foundToolResponses = /* @__PURE__ */ new Set();
4225
- let j = i + 1;
4226
- while (j < messages.length && messages[j].role === "tool") {
4227
- const toolMessage = messages[j];
4228
- if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
4229
- j++;
4230
- }
4231
- for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
4232
- consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
4233
- fixedMessages.push({
4234
- role: "tool",
4235
- tool_call_id: toolCall.id,
4236
- content: "Tool execution was interrupted or failed."
4237
- });
4466
+ function contentToText(content) {
4467
+ if (typeof content === "string") return content;
4468
+ const parts = [];
4469
+ for (const block of content) switch (block.type) {
4470
+ case "text":
4471
+ parts.push(block.text);
4472
+ break;
4473
+ case "tool_use":
4474
+ parts.push(`[tool_use: ${block.name}]`, JSON.stringify(block.input));
4475
+ break;
4476
+ case "tool_result":
4477
+ if (typeof block.content === "string") parts.push(block.content);
4478
+ else if (Array.isArray(block.content)) {
4479
+ for (const inner of block.content) if (inner.type === "text") parts.push(inner.text);
4238
4480
  }
4239
- }
4481
+ break;
4482
+ case "thinking":
4483
+ parts.push(block.thinking);
4484
+ break;
4485
+ default: break;
4240
4486
  }
4241
- return fixedMessages;
4487
+ return parts.join("\n");
4242
4488
  }
4243
- function translateToOpenAI(payload) {
4244
- const toolNameMapping = {
4245
- truncatedToOriginal: /* @__PURE__ */ new Map(),
4246
- originalToTruncated: /* @__PURE__ */ new Map()
4247
- };
4248
- const messages = translateAnthropicMessagesToOpenAI(payload.messages, payload.system, toolNameMapping);
4249
- return {
4250
- payload: {
4251
- model: translateModelName(payload.model),
4252
- messages: fixMessageSequence(messages),
4253
- max_tokens: payload.max_tokens,
4254
- stop: payload.stop_sequences,
4255
- stream: payload.stream,
4256
- temperature: payload.temperature,
4257
- top_p: payload.top_p,
4258
- user: payload.metadata?.user_id,
4259
- tools: translateAnthropicToolsToOpenAI(payload.tools, toolNameMapping),
4260
- tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice, toolNameMapping)
4261
- },
4262
- toolNameMapping
4263
- };
4489
+ /**
4490
+ * Estimate tokens for a message (fast, synchronous).
4491
+ * Uses ~4 chars per token approximation for internal calculations.
4492
+ * The final result is verified with the accurate tokenizer.
4493
+ */
4494
+ function estimateMessageTokens(msg) {
4495
+ const text = contentToText(msg.content);
4496
+ return Math.ceil(text.length / 4) + 4;
4264
4497
  }
4265
- function translateModelName(model) {
4266
- const shortNameMap = {
4267
- opus: "claude-opus-4.5",
4268
- sonnet: "claude-sonnet-4.5",
4269
- haiku: "claude-haiku-4.5"
4270
- };
4271
- if (shortNameMap[model]) return shortNameMap[model];
4272
- if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
4273
- if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
4274
- if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
4275
- if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
4276
- if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
4277
- if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
4278
- return model;
4498
+ /**
4499
+ * Count tokens for an Anthropic message using the model's tokenizer.
4500
+ */
4501
+ async function countMessageTokens(msg, model) {
4502
+ const text = contentToText(msg.content);
4503
+ return await countTextTokens(text, model) + 4;
4279
4504
  }
4280
- function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
4281
- const systemMessages = handleSystemPrompt(system);
4282
- const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
4283
- return [...systemMessages, ...otherMessages];
4505
+ /**
4506
+ * Count tokens for system prompt.
4507
+ */
4508
+ async function countSystemTokens(system, model) {
4509
+ if (!system) return 0;
4510
+ if (typeof system === "string") return await countTextTokens(system, model) + 4;
4511
+ const text = system.map((block) => block.text).join("\n");
4512
+ return await countTextTokens(text, model) + 4;
4284
4513
  }
4285
- const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
4514
+ /**
4515
+ * Count total tokens for the payload using the model's tokenizer.
4516
+ */
4517
+ async function countTotalTokens(payload, model) {
4518
+ let total = await countSystemTokens(payload.system, model);
4519
+ for (const msg of payload.messages) total += await countMessageTokens(msg, model);
4520
+ if (payload.tools) {
4521
+ const toolsText = JSON.stringify(payload.tools);
4522
+ total += await countTextTokens(toolsText, model);
4523
+ }
4524
+ return total;
4525
+ }
4526
+ function getMessageBytes(msg) {
4527
+ return JSON.stringify(msg).length;
4528
+ }
4529
+ /**
4530
+ * Get tool_use IDs from an assistant message.
4531
+ */
4532
+ function getToolUseIds(msg) {
4533
+ if (msg.role !== "assistant") return [];
4534
+ if (typeof msg.content === "string") return [];
4535
+ const ids = [];
4536
+ for (const block of msg.content) if (block.type === "tool_use") ids.push(block.id);
4537
+ return ids;
4538
+ }
4539
+ /**
4540
+ * Get tool_result IDs from a user message.
4541
+ */
4542
+ function getToolResultIds(msg) {
4543
+ if (msg.role !== "user") return [];
4544
+ if (typeof msg.content === "string") return [];
4545
+ const ids = [];
4546
+ for (const block of msg.content) if (block.type === "tool_result") ids.push(block.tool_use_id);
4547
+ return ids;
4548
+ }
4549
+ /**
4550
+ * Filter orphaned tool_result messages (those without matching tool_use).
4551
+ */
4552
+ function filterOrphanedToolResults(messages) {
4553
+ const toolUseIds = /* @__PURE__ */ new Set();
4554
+ for (const msg of messages) for (const id of getToolUseIds(msg)) toolUseIds.add(id);
4555
+ const result = [];
4556
+ let removedCount = 0;
4557
+ for (const msg of messages) {
4558
+ if (msg.role === "user" && typeof msg.content !== "string") {
4559
+ if (getToolResultIds(msg).some((id) => !toolUseIds.has(id))) {
4560
+ const filteredContent = msg.content.filter((block) => {
4561
+ if (block.type === "tool_result" && !toolUseIds.has(block.tool_use_id)) {
4562
+ removedCount++;
4563
+ return false;
4564
+ }
4565
+ return true;
4566
+ });
4567
+ if (filteredContent.length === 0) continue;
4568
+ result.push({
4569
+ ...msg,
4570
+ content: filteredContent
4571
+ });
4572
+ continue;
4573
+ }
4574
+ }
4575
+ result.push(msg);
4576
+ }
4577
+ if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_result`);
4578
+ return result;
4579
+ }
4580
+ /**
4581
+ * Filter orphaned tool_use messages (those without matching tool_result).
4582
+ * In Anthropic API, every tool_use must have a corresponding tool_result.
4583
+ */
4584
+ function filterOrphanedToolUse(messages) {
4585
+ const toolResultIds = /* @__PURE__ */ new Set();
4586
+ for (const msg of messages) for (const id of getToolResultIds(msg)) toolResultIds.add(id);
4587
+ const result = [];
4588
+ let removedCount = 0;
4589
+ for (const msg of messages) {
4590
+ if (msg.role === "assistant" && typeof msg.content !== "string") {
4591
+ if (getToolUseIds(msg).some((id) => !toolResultIds.has(id))) {
4592
+ const filteredContent = msg.content.filter((block) => {
4593
+ if (block.type === "tool_use" && !toolResultIds.has(block.id)) {
4594
+ removedCount++;
4595
+ return false;
4596
+ }
4597
+ return true;
4598
+ });
4599
+ if (filteredContent.length === 0) continue;
4600
+ result.push({
4601
+ ...msg,
4602
+ content: filteredContent
4603
+ });
4604
+ continue;
4605
+ }
4606
+ }
4607
+ result.push(msg);
4608
+ }
4609
+ if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_use`);
4610
+ return result;
4611
+ }
4612
+ /**
4613
+ * Ensure messages start with a user message.
4614
+ */
4615
+ function ensureStartsWithUser(messages) {
4616
+ let startIndex = 0;
4617
+ while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
4618
+ if (startIndex > 0) consola.debug(`[AutoTruncate:Anthropic] Skipped ${startIndex} leading non-user messages`);
4619
+ return messages.slice(startIndex);
4620
+ }
4621
+ /** Threshold for large tool_result content (bytes) */
4622
+ const LARGE_TOOL_RESULT_THRESHOLD = 1e4;
4623
+ /** Maximum length for compressed tool_result summary */
4624
+ const COMPRESSED_SUMMARY_LENGTH = 500;
4625
+ /**
4626
+ * Compress a large tool_result content to a summary.
4627
+ * Keeps the first and last portions with a note about truncation.
4628
+ */
4629
+ function compressToolResultContent(content) {
4630
+ if (content.length <= LARGE_TOOL_RESULT_THRESHOLD) return content;
4631
+ const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH / 2);
4632
+ const start$1 = content.slice(0, halfLen);
4633
+ const end = content.slice(-halfLen);
4634
+ const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH;
4635
+ return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
4636
+ }
4637
+ /**
4638
+ * Compress a tool_result block in an Anthropic message.
4639
+ */
4640
+ function compressToolResultBlock(block) {
4641
+ if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) return {
4642
+ ...block,
4643
+ content: compressToolResultContent(block.content)
4644
+ };
4645
+ return block;
4646
+ }
4647
+ /**
4648
+ * Smart compression strategy:
4649
+ * 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
4650
+ * 2. Messages before that threshold get their tool_results compressed
4651
+ * 3. Returns compressed messages and stats
4652
+ *
4653
+ * @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
4654
+ */
4655
+ function smartCompressToolResults(messages, tokenLimit, byteLimit, preservePercent) {
4656
+ const n = messages.length;
4657
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
4658
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
4659
+ for (let i = n - 1; i >= 0; i--) {
4660
+ const msg = messages[i];
4661
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
4662
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
4663
+ }
4664
+ const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
4665
+ const preserveByteLimit = Math.floor(byteLimit * preservePercent);
4666
+ let thresholdIndex = n;
4667
+ for (let i = n - 1; i >= 0; i--) {
4668
+ if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
4669
+ thresholdIndex = i + 1;
4670
+ break;
4671
+ }
4672
+ thresholdIndex = i;
4673
+ }
4674
+ if (thresholdIndex >= n) return {
4675
+ messages,
4676
+ compressedCount: 0,
4677
+ compressThresholdIndex: n
4678
+ };
4679
+ const result = [];
4680
+ let compressedCount = 0;
4681
+ for (const [i, msg] of messages.entries()) {
4682
+ if (i < thresholdIndex && msg.role === "user" && Array.isArray(msg.content)) {
4683
+ if (msg.content.some((block) => block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD)) {
4684
+ const compressedContent = msg.content.map((block) => {
4685
+ if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) {
4686
+ compressedCount++;
4687
+ return compressToolResultBlock(block);
4688
+ }
4689
+ return block;
4690
+ });
4691
+ result.push({
4692
+ ...msg,
4693
+ content: compressedContent
4694
+ });
4695
+ continue;
4696
+ }
4697
+ }
4698
+ result.push(msg);
4699
+ }
4700
+ return {
4701
+ messages: result,
4702
+ compressedCount,
4703
+ compressThresholdIndex: thresholdIndex
4704
+ };
4705
+ }
4706
+ /** Default fallback for when model capabilities are not available */
4707
+ const DEFAULT_CONTEXT_WINDOW = 2e5;
4708
+ function calculateLimits(model, config) {
4709
+ const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW;
4710
+ const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
4711
+ const byteLimit = getEffectiveByteLimitBytes();
4712
+ return {
4713
+ tokenLimit,
4714
+ byteLimit
4715
+ };
4716
+ }
4717
+ function findOptimalPreserveIndex(params) {
4718
+ const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
4719
+ if (messages.length === 0) return 0;
4720
+ const markerBytes = 200;
4721
+ const availableTokens = tokenLimit - systemTokens - 50;
4722
+ const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
4723
+ if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
4724
+ const n = messages.length;
4725
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
4726
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
4727
+ for (let i = n - 1; i >= 0; i--) {
4728
+ const msg = messages[i];
4729
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
4730
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
4731
+ }
4732
+ let left = 0;
4733
+ let right = n;
4734
+ while (left < right) {
4735
+ const mid = left + right >>> 1;
4736
+ if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
4737
+ else left = mid + 1;
4738
+ }
4739
+ return left;
4740
+ }
4741
+ /**
4742
+ * Generate a summary of removed messages for context.
4743
+ * Extracts key information like tool calls and topics.
4744
+ */
4745
+ function generateRemovedMessagesSummary(removedMessages) {
4746
+ const toolCalls = [];
4747
+ let userMessageCount = 0;
4748
+ let assistantMessageCount = 0;
4749
+ for (const msg of removedMessages) {
4750
+ if (msg.role === "user") userMessageCount++;
4751
+ else assistantMessageCount++;
4752
+ if (Array.isArray(msg.content)) {
4753
+ for (const block of msg.content) if (block.type === "tool_use") toolCalls.push(block.name);
4754
+ }
4755
+ }
4756
+ const parts = [];
4757
+ if (userMessageCount > 0 || assistantMessageCount > 0) {
4758
+ const breakdown = [];
4759
+ if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
4760
+ if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
4761
+ parts.push(`Messages: ${breakdown.join(", ")}`);
4762
+ }
4763
+ if (toolCalls.length > 0) {
4764
+ const uniqueTools = [...new Set(toolCalls)];
4765
+ const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
4766
+ parts.push(`Tools used: ${displayTools.join(", ")}`);
4767
+ }
4768
+ return parts.join(". ");
4769
+ }
4770
+ /**
4771
+ * Add a compression notice to the system prompt.
4772
+ * Informs the model that some tool_result content has been compressed.
4773
+ */
4774
+ function addCompressionNotice(payload, compressedCount) {
4775
+ const notice = `[CONTEXT NOTE]\n${compressedCount} large tool_result blocks have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]\n\n`;
4776
+ let newSystem;
4777
+ if (typeof payload.system === "string") newSystem = notice + payload.system;
4778
+ else if (Array.isArray(payload.system)) newSystem = [{
4779
+ type: "text",
4780
+ text: notice
4781
+ }, ...payload.system];
4782
+ else newSystem = notice;
4783
+ return {
4784
+ ...payload,
4785
+ system: newSystem
4786
+ };
4787
+ }
4788
+ /**
4789
+ * Create truncation context to prepend to system prompt.
4790
+ */
4791
+ function createTruncationSystemContext(removedCount, compressedCount, summary) {
4792
+ let context = `[CONVERSATION CONTEXT]\n`;
4793
+ if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
4794
+ if (compressedCount > 0) context += `${compressedCount} large tool_result blocks have been compressed.\n`;
4795
+ if (summary) context += `Summary of removed content: ${summary}\n`;
4796
+ context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]\n\n";
4797
+ return context;
4798
+ }
4799
+ /**
4800
+ * Create a truncation marker message (fallback when no system prompt).
4801
+ */
4802
+ function createTruncationMarker$1(removedCount, compressedCount, summary) {
4803
+ const parts = [];
4804
+ if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
4805
+ if (compressedCount > 0) parts.push(`${compressedCount} tool_result blocks compressed`);
4806
+ let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
4807
+ if (summary) content += `\n[Summary: ${summary}]`;
4808
+ return {
4809
+ role: "user",
4810
+ content
4811
+ };
4812
+ }
4813
+ /**
4814
+ * Perform auto-truncation on an Anthropic payload that exceeds limits.
4815
+ */
4816
+ async function autoTruncateAnthropic(payload, model, config = {}) {
4817
+ const cfg = {
4818
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
4819
+ ...config
4820
+ };
4821
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
4822
+ const originalBytes = JSON.stringify(payload).length;
4823
+ const originalTokens = await countTotalTokens(payload, model);
4824
+ if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
4825
+ payload,
4826
+ wasCompacted: false,
4827
+ originalTokens,
4828
+ compactedTokens: originalTokens,
4829
+ removedMessageCount: 0
4830
+ };
4831
+ const exceedsTokens = originalTokens > tokenLimit;
4832
+ const exceedsBytes = originalBytes > byteLimit;
4833
+ let workingMessages = payload.messages;
4834
+ let compressedCount = 0;
4835
+ if (state.compressToolResults) {
4836
+ const compressionResult = smartCompressToolResults(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
4837
+ workingMessages = compressionResult.messages;
4838
+ compressedCount = compressionResult.compressedCount;
4839
+ const compressedPayload = {
4840
+ ...payload,
4841
+ messages: workingMessages
4842
+ };
4843
+ const compressedBytes = JSON.stringify(compressedPayload).length;
4844
+ const compressedTokens = await countTotalTokens(compressedPayload, model);
4845
+ if (compressedTokens <= tokenLimit && compressedBytes <= byteLimit) {
4846
+ let reason$1 = "tokens";
4847
+ if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
4848
+ else if (exceedsBytes) reason$1 = "size";
4849
+ consola.info(`[AutoTruncate:Anthropic] ${reason$1}: ${originalTokens}→${compressedTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
4850
+ const noticePayload = addCompressionNotice(compressedPayload, compressedCount);
4851
+ return {
4852
+ payload: noticePayload,
4853
+ wasCompacted: true,
4854
+ originalTokens,
4855
+ compactedTokens: await countTotalTokens(noticePayload, model),
4856
+ removedMessageCount: 0
4857
+ };
4858
+ }
4859
+ }
4860
+ const systemBytes = payload.system ? JSON.stringify(payload.system).length : 0;
4861
+ const systemTokens = await countSystemTokens(payload.system, model);
4862
+ const messagesJson = JSON.stringify(workingMessages);
4863
+ const payloadOverhead = JSON.stringify({
4864
+ ...payload,
4865
+ messages: workingMessages
4866
+ }).length - messagesJson.length;
4867
+ consola.debug(`[AutoTruncate:Anthropic] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${Math.round(systemBytes / 1024)}KB`);
4868
+ const preserveIndex = findOptimalPreserveIndex({
4869
+ messages: workingMessages,
4870
+ systemBytes,
4871
+ systemTokens,
4872
+ payloadOverhead,
4873
+ tokenLimit,
4874
+ byteLimit
4875
+ });
4876
+ if (preserveIndex === 0) {
4877
+ consola.warn("[AutoTruncate:Anthropic] Cannot truncate, system messages too large");
4878
+ return {
4879
+ payload,
4880
+ wasCompacted: false,
4881
+ originalTokens,
4882
+ compactedTokens: originalTokens,
4883
+ removedMessageCount: 0
4884
+ };
4885
+ }
4886
+ if (preserveIndex >= workingMessages.length) {
4887
+ consola.warn("[AutoTruncate:Anthropic] Would need to remove all messages");
4888
+ return {
4889
+ payload,
4890
+ wasCompacted: false,
4891
+ originalTokens,
4892
+ compactedTokens: originalTokens,
4893
+ removedMessageCount: 0
4894
+ };
4895
+ }
4896
+ let preserved = workingMessages.slice(preserveIndex);
4897
+ preserved = filterOrphanedToolResults(preserved);
4898
+ preserved = filterOrphanedToolUse(preserved);
4899
+ preserved = ensureStartsWithUser(preserved);
4900
+ preserved = filterOrphanedToolResults(preserved);
4901
+ preserved = filterOrphanedToolUse(preserved);
4902
+ if (preserved.length === 0) {
4903
+ consola.warn("[AutoTruncate:Anthropic] All messages filtered out after cleanup");
4904
+ return {
4905
+ payload,
4906
+ wasCompacted: false,
4907
+ originalTokens,
4908
+ compactedTokens: originalTokens,
4909
+ removedMessageCount: 0
4910
+ };
4911
+ }
4912
+ const removedMessages = payload.messages.slice(0, preserveIndex);
4913
+ const removedCount = workingMessages.length - preserved.length;
4914
+ const summary = generateRemovedMessagesSummary(removedMessages);
4915
+ let newSystem = payload.system;
4916
+ let newMessages = preserved;
4917
+ if (payload.system !== void 0) {
4918
+ const truncationContext = createTruncationSystemContext(removedCount, compressedCount, summary);
4919
+ if (typeof payload.system === "string") newSystem = truncationContext + payload.system;
4920
+ else if (Array.isArray(payload.system)) newSystem = [{
4921
+ type: "text",
4922
+ text: truncationContext
4923
+ }, ...payload.system];
4924
+ } else newMessages = [createTruncationMarker$1(removedCount, compressedCount, summary), ...preserved];
4925
+ const newPayload = {
4926
+ ...payload,
4927
+ system: newSystem,
4928
+ messages: newMessages
4929
+ };
4930
+ const newBytes = JSON.stringify(newPayload).length;
4931
+ const newTokens = await countTotalTokens(newPayload, model);
4932
+ let reason = "tokens";
4933
+ if (exceedsTokens && exceedsBytes) reason = "tokens+size";
4934
+ else if (exceedsBytes) reason = "size";
4935
+ const actions = [];
4936
+ if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
4937
+ if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
4938
+ const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
4939
+ consola.info(`[AutoTruncate:Anthropic] ${reason}: ${originalTokens}→${newTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
4940
+ if (newBytes > byteLimit || newTokens > tokenLimit) consola.warn(`[AutoTruncate:Anthropic] Result still over limit (${newTokens} tokens, ${Math.round(newBytes / 1024)}KB)`);
4941
+ return {
4942
+ payload: newPayload,
4943
+ wasCompacted: true,
4944
+ originalTokens,
4945
+ compactedTokens: newTokens,
4946
+ removedMessageCount: removedCount
4947
+ };
4948
+ }
4949
+ /**
4950
+ * Check if payload needs compaction.
4951
+ */
4952
+ async function checkNeedsCompactionAnthropic(payload, model, config = {}) {
4953
+ const cfg = {
4954
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
4955
+ ...config
4956
+ };
4957
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
4958
+ const currentTokens = await countTotalTokens(payload, model);
4959
+ const currentBytes = JSON.stringify(payload).length;
4960
+ const exceedsTokens = currentTokens > tokenLimit;
4961
+ const exceedsBytes = currentBytes > byteLimit;
4962
+ let reason;
4963
+ if (exceedsTokens && exceedsBytes) reason = "both";
4964
+ else if (exceedsTokens) reason = "tokens";
4965
+ else if (exceedsBytes) reason = "bytes";
4966
+ return {
4967
+ needed: exceedsTokens || exceedsBytes,
4968
+ currentTokens,
4969
+ tokenLimit,
4970
+ currentBytes,
4971
+ byteLimit,
4972
+ reason
4973
+ };
4974
+ }
4975
+
4976
+ //#endregion
4977
+ //#region src/routes/messages/message-utils.ts
4978
+ function convertAnthropicMessages(messages) {
4979
+ return messages.map((msg) => {
4980
+ if (typeof msg.content === "string") return {
4981
+ role: msg.role,
4982
+ content: msg.content
4983
+ };
4984
+ const content = msg.content.map((block) => {
4985
+ if (block.type === "text") return {
4986
+ type: "text",
4987
+ text: block.text
4988
+ };
4989
+ if (block.type === "tool_use") return {
4990
+ type: "tool_use",
4991
+ id: block.id,
4992
+ name: block.name,
4993
+ input: JSON.stringify(block.input)
4994
+ };
4995
+ if (block.type === "tool_result") {
4996
+ const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
4997
+ return {
4998
+ type: "tool_result",
4999
+ tool_use_id: block.tool_use_id,
5000
+ content: resultContent
5001
+ };
5002
+ }
5003
+ return { type: block.type };
5004
+ });
5005
+ return {
5006
+ role: msg.role,
5007
+ content
5008
+ };
5009
+ });
5010
+ }
5011
+ function extractSystemPrompt(system) {
5012
+ if (!system) return void 0;
5013
+ if (typeof system === "string") return system;
5014
+ return system.map((block) => block.text).join("\n");
5015
+ }
5016
+ function extractToolCallsFromContent(content) {
5017
+ const tools = [];
5018
+ for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
5019
+ id: String(block.id),
5020
+ name: String(block.name),
5021
+ input: JSON.stringify(block.input)
5022
+ });
5023
+ return tools.length > 0 ? tools : void 0;
5024
+ }
5025
+ function extractToolCallsFromAnthropicContent(content) {
5026
+ const tools = [];
5027
+ for (const block of content) if (block.type === "tool_use") tools.push({
5028
+ id: block.id,
5029
+ name: block.name,
5030
+ input: JSON.stringify(block.input)
5031
+ });
5032
+ return tools.length > 0 ? tools : void 0;
5033
+ }
5034
+ function mapOpenAIStopReasonToAnthropic(finishReason) {
5035
+ if (finishReason === null) return null;
5036
+ return {
5037
+ stop: "end_turn",
5038
+ length: "max_tokens",
5039
+ tool_calls: "tool_use",
5040
+ content_filter: "end_turn"
5041
+ }[finishReason];
5042
+ }
5043
+
5044
+ //#endregion
5045
+ //#region src/routes/messages/non-stream-translation.ts
5046
+ const OPENAI_TOOL_NAME_LIMIT = 64;
5047
+ /**
5048
+ * Ensure all tool_use blocks have corresponding tool_result responses.
5049
+ * This handles edge cases where conversation history may be incomplete:
5050
+ * - Session interruptions where tool execution was cut off
5051
+ * - Previous request failures
5052
+ * - Client sending truncated history
5053
+ *
5054
+ * Adding placeholder responses prevents API errors and maintains protocol compliance.
5055
+ */
5056
+ function fixMessageSequence(messages) {
5057
+ const fixedMessages = [];
5058
+ for (let i = 0; i < messages.length; i++) {
5059
+ const message = messages[i];
5060
+ fixedMessages.push(message);
5061
+ if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
5062
+ const foundToolResponses = /* @__PURE__ */ new Set();
5063
+ let j = i + 1;
5064
+ while (j < messages.length && messages[j].role === "tool") {
5065
+ const toolMessage = messages[j];
5066
+ if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
5067
+ j++;
5068
+ }
5069
+ for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
5070
+ consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
5071
+ fixedMessages.push({
5072
+ role: "tool",
5073
+ tool_call_id: toolCall.id,
5074
+ content: "Tool execution was interrupted or failed."
5075
+ });
5076
+ }
5077
+ }
5078
+ }
5079
+ return fixedMessages;
5080
+ }
5081
+ function translateToOpenAI(payload) {
5082
+ const toolNameMapping = {
5083
+ truncatedToOriginal: /* @__PURE__ */ new Map(),
5084
+ originalToTruncated: /* @__PURE__ */ new Map()
5085
+ };
5086
+ const messages = translateAnthropicMessagesToOpenAI(payload.messages, payload.system, toolNameMapping);
5087
+ return {
5088
+ payload: {
5089
+ model: translateModelName(payload.model),
5090
+ messages: fixMessageSequence(messages),
5091
+ max_tokens: payload.max_tokens,
5092
+ stop: payload.stop_sequences,
5093
+ stream: payload.stream,
5094
+ temperature: payload.temperature,
5095
+ top_p: payload.top_p,
5096
+ user: payload.metadata?.user_id,
5097
+ tools: translateAnthropicToolsToOpenAI(payload.tools, toolNameMapping),
5098
+ tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice, toolNameMapping)
5099
+ },
5100
+ toolNameMapping
5101
+ };
5102
+ }
5103
+ /**
5104
+ * Find the latest available model matching a family prefix.
5105
+ * Searches state.models for models starting with the given prefix
5106
+ * and returns the one with the highest version number.
5107
+ *
5108
+ * @param familyPrefix - e.g., "claude-opus", "claude-sonnet", "claude-haiku"
5109
+ * @param fallback - fallback model ID if no match found
5110
+ */
5111
+ function findLatestModel(familyPrefix, fallback) {
5112
+ const models = state.models?.data;
5113
+ if (!models || models.length === 0) return fallback;
5114
+ const candidates = models.filter((m) => m.id.startsWith(familyPrefix));
5115
+ if (candidates.length === 0) return fallback;
5116
+ candidates.sort((a, b) => {
5117
+ const versionA = extractVersion(a.id, familyPrefix);
5118
+ return extractVersion(b.id, familyPrefix) - versionA;
5119
+ });
5120
+ return candidates[0].id;
5121
+ }
5122
+ /**
5123
+ * Extract numeric version from model ID.
5124
+ * e.g., "claude-opus-4.5" with prefix "claude-opus" -> 4.5
5125
+ */
5126
+ function extractVersion(modelId, prefix) {
5127
+ const match = modelId.slice(prefix.length + 1).match(/^(\d+(?:\.\d+)?)/);
5128
+ return match ? Number.parseFloat(match[1]) : 0;
5129
+ }
5130
+ function translateModelName(model) {
5131
+ const aliasMap = {
5132
+ opus: "claude-opus",
5133
+ sonnet: "claude-sonnet",
5134
+ haiku: "claude-haiku"
5135
+ };
5136
+ if (aliasMap[model]) {
5137
+ const familyPrefix = aliasMap[model];
5138
+ const fallback = `${familyPrefix}-4.5`;
5139
+ return findLatestModel(familyPrefix, fallback);
5140
+ }
5141
+ if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
5142
+ if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
5143
+ if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
5144
+ if (/^claude-opus-4-\d+$/.test(model)) return findLatestModel("claude-opus", "claude-opus-4.5");
5145
+ if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
5146
+ if (/^claude-haiku-3-5-\d+$/.test(model)) return findLatestModel("claude-haiku", "claude-haiku-4.5");
5147
+ return model;
5148
+ }
5149
+ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
5150
+ const systemMessages = handleSystemPrompt(system);
5151
+ const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
5152
+ return [...systemMessages, ...otherMessages];
5153
+ }
5154
+ const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
4286
5155
  /**
4287
5156
  * Filter out reserved keywords from system prompt text.
4288
5157
  * Copilot API rejects requests containing these keywords.
@@ -4406,7 +5275,7 @@ function translateAnthropicToolsToOpenAI(anthropicTools, toolNameMapping) {
4406
5275
  function: {
4407
5276
  name: getTruncatedToolName(tool.name, toolNameMapping),
4408
5277
  description: tool.description,
4409
- parameters: tool.input_schema
5278
+ parameters: tool.input_schema ?? {}
4410
5279
  }
4411
5280
  }));
4412
5281
  }
@@ -4511,6 +5380,9 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
4511
5380
  *
4512
5381
  * For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
4513
5382
  * For other models, uses GPT tokenizers with appropriate buffers.
5383
+ *
5384
+ * When auto-truncate is enabled and the request would exceed limits,
5385
+ * returns an inflated token count to trigger Claude Code's auto-compact mechanism.
4514
5386
  */
4515
5387
  async function handleCountTokens(c) {
4516
5388
  try {
@@ -4522,7 +5394,16 @@ async function handleCountTokens(c) {
4522
5394
  consola.warn("Model not found, returning default token count");
4523
5395
  return c.json({ input_tokens: 1 });
4524
5396
  }
4525
- const isAnthropicModel$1 = selectedModel.vendor === "Anthropic";
5397
+ if (state.autoTruncate) {
5398
+ const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
5399
+ if (truncateCheck.needed) {
5400
+ const contextWindow = selectedModel.capabilities?.limits?.max_context_window_tokens ?? 2e5;
5401
+ const inflatedTokens = Math.floor(contextWindow * .95);
5402
+ consola.debug(`[count_tokens] Would trigger auto-truncate: ${truncateCheck.currentTokens} tokens > ${truncateCheck.tokenLimit}, returning inflated count: ${inflatedTokens}`);
5403
+ return c.json({ input_tokens: inflatedTokens });
5404
+ }
5405
+ }
5406
+ const tokenizerName = selectedModel.capabilities?.tokenizer ?? "o200k_base";
4526
5407
  const tokenCount = await getTokenCount(openAIPayload, selectedModel);
4527
5408
  if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
4528
5409
  let mcpToolExist = false;
@@ -4533,8 +5414,8 @@ async function handleCountTokens(c) {
4533
5414
  }
4534
5415
  }
4535
5416
  let finalTokenCount = tokenCount.input + tokenCount.output;
4536
- if (!isAnthropicModel$1) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
4537
- consola.debug(`Token count: ${finalTokenCount} (${isAnthropicModel$1 ? "Anthropic tokenizer" : "GPT tokenizer"})`);
5417
+ if (!(selectedModel.vendor === "Anthropic")) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
5418
+ consola.debug(`Token count: ${finalTokenCount} (tokenizer: ${tokenizerName})`);
4538
5419
  return c.json({ input_tokens: finalTokenCount });
4539
5420
  } catch (error) {
4540
5421
  consola.error("Error counting tokens:", error);
@@ -4568,6 +5449,8 @@ const COPILOT_SUPPORTED_FIELDS = new Set([
4568
5449
  * Filter payload to only include fields supported by Copilot's Anthropic API.
4569
5450
  * This prevents errors like "Extra inputs are not permitted" for unsupported
4570
5451
  * fields like `output_config`.
5452
+ *
5453
+ * Also converts server-side tools (web_search, etc.) to custom tools.
4571
5454
  */
4572
5455
  function filterPayloadForCopilot(payload) {
4573
5456
  const filtered = {};
@@ -4575,6 +5458,7 @@ function filterPayloadForCopilot(payload) {
4575
5458
  for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
4576
5459
  else unsupportedFields.push(key);
4577
5460
  if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
5461
+ if (filtered.tools) filtered.tools = convertServerToolsToCustom(filtered.tools);
4578
5462
  return filtered;
4579
5463
  }
4580
5464
  /**
@@ -4615,26 +5499,184 @@ async function createAnthropicMessages(payload) {
4615
5499
  "X-Initiator": isAgentCall ? "agent" : "user",
4616
5500
  "anthropic-version": "2023-06-01"
4617
5501
  };
4618
- consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
4619
- const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
4620
- method: "POST",
4621
- headers,
4622
- body: JSON.stringify(filteredPayload)
4623
- });
4624
- if (!response.ok) {
4625
- consola.error("Failed to create Anthropic messages", response);
4626
- throw await HTTPError.fromResponse("Failed to create Anthropic messages", response);
5502
+ consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
5503
+ const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
5504
+ method: "POST",
5505
+ headers,
5506
+ body: JSON.stringify(filteredPayload)
5507
+ });
5508
+ if (!response.ok) {
5509
+ consola.debug("Request failed:", {
5510
+ model: filteredPayload.model,
5511
+ max_tokens: filteredPayload.max_tokens,
5512
+ stream: filteredPayload.stream,
5513
+ tools: filteredPayload.tools?.map((t) => ({
5514
+ name: t.name,
5515
+ type: t.type
5516
+ })),
5517
+ thinking: filteredPayload.thinking,
5518
+ messageCount: filteredPayload.messages.length
5519
+ });
5520
+ throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, filteredPayload.model);
5521
+ }
5522
+ if (payload.stream) return events(response);
5523
+ return await response.json();
5524
+ }
5525
+ const SERVER_TOOL_CONFIGS = {
5526
+ web_search: {
5527
+ description: "Search the web for current information. Returns web search results that can help answer questions about recent events, current data, or information that may have changed since your knowledge cutoff.",
5528
+ input_schema: {
5529
+ type: "object",
5530
+ properties: { query: {
5531
+ type: "string",
5532
+ description: "The search query"
5533
+ } },
5534
+ required: ["query"]
5535
+ }
5536
+ },
5537
+ web_fetch: {
5538
+ description: "Fetch content from a URL. NOTE: This is a client-side tool - the client must fetch the URL and return the content.",
5539
+ input_schema: {
5540
+ type: "object",
5541
+ properties: { url: {
5542
+ type: "string",
5543
+ description: "The URL to fetch"
5544
+ } },
5545
+ required: ["url"]
5546
+ }
5547
+ },
5548
+ code_execution: {
5549
+ description: "Execute code in a sandbox. NOTE: This is a client-side tool - the client must execute the code.",
5550
+ input_schema: {
5551
+ type: "object",
5552
+ properties: {
5553
+ code: {
5554
+ type: "string",
5555
+ description: "The code to execute"
5556
+ },
5557
+ language: {
5558
+ type: "string",
5559
+ description: "The programming language"
5560
+ }
5561
+ },
5562
+ required: ["code"]
5563
+ }
5564
+ },
5565
+ computer: {
5566
+ description: "Control computer desktop. NOTE: This is a client-side tool - the client must handle computer control.",
5567
+ input_schema: {
5568
+ type: "object",
5569
+ properties: { action: {
5570
+ type: "string",
5571
+ description: "The action to perform"
5572
+ } },
5573
+ required: ["action"]
5574
+ }
5575
+ }
5576
+ };
5577
+ /**
5578
+ * Check if a tool is a server-side tool that needs conversion.
5579
+ */
5580
+ function getServerToolPrefix(tool) {
5581
+ if (tool.type) {
5582
+ for (const prefix of Object.keys(SERVER_TOOL_CONFIGS)) if (tool.type.startsWith(prefix)) return prefix;
5583
+ }
5584
+ return null;
5585
+ }
5586
+ /**
5587
+ * Convert server-side tools to custom tools, or pass them through unchanged.
5588
+ * This allows them to be passed to the API and handled by the client.
5589
+ *
5590
+ * Note: Server-side tools are only converted if state.rewriteAnthropicTools is enabled.
5591
+ */
5592
+ function convertServerToolsToCustom(tools) {
5593
+ if (!tools) return;
5594
+ const result = [];
5595
+ for (const tool of tools) {
5596
+ const serverToolPrefix = getServerToolPrefix(tool);
5597
+ if (serverToolPrefix) {
5598
+ const config = SERVER_TOOL_CONFIGS[serverToolPrefix];
5599
+ if (!state.rewriteAnthropicTools) {
5600
+ consola.debug(`[DirectAnthropic] Passing ${serverToolPrefix} through unchanged (use --rewrite-anthropic-tools to convert)`);
5601
+ result.push(tool);
5602
+ continue;
5603
+ }
5604
+ if (config.remove) {
5605
+ consola.warn(`[DirectAnthropic] Removing unsupported server tool: ${tool.name}. Reason: ${config.removalReason}`);
5606
+ continue;
5607
+ }
5608
+ consola.debug(`[DirectAnthropic] Converting server tool to custom: ${tool.name} (type: ${tool.type})`);
5609
+ result.push({
5610
+ name: tool.name,
5611
+ description: config.description,
5612
+ input_schema: config.input_schema
5613
+ });
5614
+ } else result.push(tool);
5615
+ }
5616
+ return result.length > 0 ? result : void 0;
5617
+ }
5618
+ /**
5619
+ * Check if a model supports direct Anthropic API.
5620
+ * Returns true if redirect is disabled (direct API is on) and the model is from Anthropic vendor.
5621
+ */
5622
+ function supportsDirectAnthropicApi(modelId) {
5623
+ if (state.redirectAnthropic) return false;
5624
+ return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
5625
+ }
5626
+
5627
+ //#endregion
5628
+ //#region src/routes/messages/stream-accumulator.ts
5629
+ function createAnthropicStreamAccumulator() {
5630
+ return {
5631
+ model: "",
5632
+ inputTokens: 0,
5633
+ outputTokens: 0,
5634
+ stopReason: "",
5635
+ content: "",
5636
+ toolCalls: [],
5637
+ currentToolCall: null
5638
+ };
5639
+ }
5640
+ function processAnthropicEvent(event, acc) {
5641
+ switch (event.type) {
5642
+ case "content_block_delta":
5643
+ handleContentBlockDelta(event.delta, acc);
5644
+ break;
5645
+ case "content_block_start":
5646
+ handleContentBlockStart(event.content_block, acc);
5647
+ break;
5648
+ case "content_block_stop":
5649
+ handleContentBlockStop(acc);
5650
+ break;
5651
+ case "message_delta":
5652
+ handleMessageDelta(event.delta, event.usage, acc);
5653
+ break;
5654
+ default: break;
5655
+ }
5656
+ }
5657
+ function handleContentBlockDelta(delta, acc) {
5658
+ if (delta.type === "text_delta") acc.content += delta.text;
5659
+ else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
5660
+ }
5661
+ function handleContentBlockStart(block, acc) {
5662
+ if (block.type === "tool_use") acc.currentToolCall = {
5663
+ id: block.id,
5664
+ name: block.name,
5665
+ input: ""
5666
+ };
5667
+ }
5668
+ function handleContentBlockStop(acc) {
5669
+ if (acc.currentToolCall) {
5670
+ acc.toolCalls.push(acc.currentToolCall);
5671
+ acc.currentToolCall = null;
4627
5672
  }
4628
- if (payload.stream) return events(response);
4629
- return await response.json();
4630
5673
  }
4631
- /**
4632
- * Check if a model supports direct Anthropic API.
4633
- * Returns true if direct Anthropic API is enabled and the model is from Anthropic vendor.
4634
- */
4635
- function supportsDirectAnthropicApi(modelId) {
4636
- if (!state.directAnthropicApi) return false;
4637
- return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
5674
+ function handleMessageDelta(delta, usage, acc) {
5675
+ if (delta.stop_reason) acc.stopReason = delta.stop_reason;
5676
+ if (usage) {
5677
+ acc.inputTokens = usage.input_tokens ?? 0;
5678
+ acc.outputTokens = usage.output_tokens;
5679
+ }
4638
5680
  }
4639
5681
 
4640
5682
  //#endregion
@@ -4776,40 +5818,28 @@ function translateErrorToAnthropicErrorEvent() {
4776
5818
  }
4777
5819
 
4778
5820
  //#endregion
4779
- //#region src/routes/messages/handler.ts
4780
- async function handleCompletion(c) {
4781
- const anthropicPayload = await c.req.json();
4782
- consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
4783
- const trackingId = c.get("trackingId");
4784
- const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
4785
- updateTrackerModel(trackingId, anthropicPayload.model);
4786
- const ctx = {
4787
- historyId: recordRequest("anthropic", {
4788
- model: anthropicPayload.model,
4789
- messages: convertAnthropicMessages(anthropicPayload.messages),
4790
- stream: anthropicPayload.stream ?? false,
4791
- tools: anthropicPayload.tools?.map((t) => ({
4792
- name: t.name,
4793
- description: t.description
4794
- })),
4795
- max_tokens: anthropicPayload.max_tokens,
4796
- temperature: anthropicPayload.temperature,
4797
- system: extractSystemPrompt(anthropicPayload.system)
4798
- }),
4799
- trackingId,
4800
- startTime
4801
- };
4802
- if (supportsDirectAnthropicApi(anthropicPayload.model)) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
4803
- return handleTranslatedCompletion(c, anthropicPayload, ctx);
4804
- }
5821
+ //#region src/routes/messages/direct-anthropic-handler.ts
4805
5822
  /**
4806
5823
  * Handle completion using direct Anthropic API (no translation needed)
4807
5824
  */
4808
5825
  async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
4809
5826
  consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
5827
+ const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
5828
+ let effectivePayload = anthropicPayload;
5829
+ let truncateResult;
5830
+ if (state.autoTruncate && selectedModel) {
5831
+ const check = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
5832
+ consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
5833
+ if (check.needed) try {
5834
+ truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel);
5835
+ if (truncateResult.wasCompacted) effectivePayload = truncateResult.payload;
5836
+ } catch (error) {
5837
+ consola.warn("[Anthropic] Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
5838
+ }
5839
+ } else if (state.autoTruncate && !selectedModel) consola.debug(`[Anthropic] Model '${anthropicPayload.model}' not found, skipping auto-truncate`);
4810
5840
  if (state.manualApprove) await awaitApproval();
4811
5841
  try {
4812
- const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(anthropicPayload));
5842
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
4813
5843
  ctx.queueWaitMs = queueWaitMs;
4814
5844
  if (Symbol.asyncIterator in response) {
4815
5845
  consola.debug("Streaming response from Copilot (direct Anthropic)");
@@ -4818,21 +5848,37 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
4818
5848
  await handleDirectAnthropicStreamingResponse({
4819
5849
  stream,
4820
5850
  response,
4821
- anthropicPayload,
5851
+ anthropicPayload: effectivePayload,
4822
5852
  ctx
4823
5853
  });
4824
5854
  });
4825
5855
  }
4826
- return handleDirectAnthropicNonStreamingResponse(c, response, ctx);
5856
+ return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
4827
5857
  } catch (error) {
5858
+ if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
4828
5859
  recordErrorResponse(ctx, anthropicPayload.model, error);
4829
5860
  throw error;
4830
5861
  }
4831
5862
  }
4832
5863
  /**
5864
+ * Log payload size info for debugging 413 errors
5865
+ */
5866
+ function logPayloadSizeInfoAnthropic(payload, model) {
5867
+ const payloadSize = JSON.stringify(payload).length;
5868
+ const messageCount = payload.messages.length;
5869
+ const toolCount = payload.tools?.length ?? 0;
5870
+ const systemSize = payload.system ? JSON.stringify(payload.system).length : 0;
5871
+ consola.info(`[Anthropic 413] Payload size: ${Math.round(payloadSize / 1024)}KB, messages: ${messageCount}, tools: ${toolCount}, system: ${Math.round(systemSize / 1024)}KB`);
5872
+ if (model?.capabilities?.limits) {
5873
+ const limits = model.capabilities.limits;
5874
+ consola.info(`[Anthropic 413] Model limits: context=${limits.max_context_window_tokens}, prompt=${limits.max_prompt_tokens}, output=${limits.max_output_tokens}`);
5875
+ }
5876
+ if (!state.autoTruncate) consola.info("[Anthropic 413] Consider enabling --auto-truncate to automatically reduce payload size");
5877
+ }
5878
+ /**
4833
5879
  * Handle non-streaming direct Anthropic response
4834
5880
  */
4835
- function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
5881
+ function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
4836
5882
  consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
4837
5883
  recordResponse(ctx.historyId, {
4838
5884
  success: true,
@@ -4868,7 +5914,34 @@ function handleDirectAnthropicNonStreamingResponse(c, response, ctx) {
4868
5914
  outputTokens: response.usage.output_tokens,
4869
5915
  queueWaitMs: ctx.queueWaitMs
4870
5916
  });
4871
- return c.json(response);
5917
+ let finalResponse = response;
5918
+ if (state.verbose && truncateResult?.wasCompacted) {
5919
+ const marker = createTruncationMarker(truncateResult);
5920
+ finalResponse = prependMarkerToAnthropicResponse$1(response, marker);
5921
+ }
5922
+ return c.json(finalResponse);
5923
+ }
5924
+ /**
5925
+ * Prepend marker to Anthropic response content (at the beginning of first text block)
5926
+ */
5927
+ function prependMarkerToAnthropicResponse$1(response, marker) {
5928
+ if (!marker) return response;
5929
+ const content = [...response.content];
5930
+ const firstTextIndex = content.findIndex((block) => block.type === "text");
5931
+ if (firstTextIndex !== -1) {
5932
+ const textBlock = content[firstTextIndex];
5933
+ if (textBlock.type === "text") content[firstTextIndex] = {
5934
+ ...textBlock,
5935
+ text: marker + textBlock.text
5936
+ };
5937
+ } else content.unshift({
5938
+ type: "text",
5939
+ text: marker
5940
+ });
5941
+ return {
5942
+ ...response,
5943
+ content
5944
+ };
4872
5945
  }
4873
5946
  /**
4874
5947
  * Handle streaming direct Anthropic response (passthrough SSE events)
@@ -4894,7 +5967,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
4894
5967
  data: rawEvent.data
4895
5968
  });
4896
5969
  }
4897
- recordStreamingResponse(acc, anthropicPayload.model, ctx);
5970
+ recordStreamingResponse$1(acc, anthropicPayload.model, ctx);
4898
5971
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
4899
5972
  } catch (error) {
4900
5973
  consola.error("Direct Anthropic stream error:", error);
@@ -4912,6 +5985,34 @@ async function handleDirectAnthropicStreamingResponse(opts) {
4912
5985
  });
4913
5986
  }
4914
5987
  }
5988
+ function recordStreamingResponse$1(acc, fallbackModel, ctx) {
5989
+ const contentBlocks = [];
5990
+ if (acc.content) contentBlocks.push({
5991
+ type: "text",
5992
+ text: acc.content
5993
+ });
5994
+ for (const tc of acc.toolCalls) contentBlocks.push({
5995
+ type: "tool_use",
5996
+ ...tc
5997
+ });
5998
+ recordResponse(ctx.historyId, {
5999
+ success: true,
6000
+ model: acc.model || fallbackModel,
6001
+ usage: {
6002
+ input_tokens: acc.inputTokens,
6003
+ output_tokens: acc.outputTokens
6004
+ },
6005
+ stop_reason: acc.stopReason || void 0,
6006
+ content: contentBlocks.length > 0 ? {
6007
+ role: "assistant",
6008
+ content: contentBlocks
6009
+ } : null,
6010
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
6011
+ }, Date.now() - ctx.startTime);
6012
+ }
6013
+
6014
+ //#endregion
6015
+ //#region src/routes/messages/translated-handler.ts
4915
6016
  /**
4916
6017
  * Handle completion using OpenAI translation path (legacy)
4917
6018
  */
@@ -4954,7 +6055,7 @@ function handleNonStreamingResponse(opts) {
4954
6055
  let anthropicResponse = translateToAnthropic(response, toolNameMapping);
4955
6056
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4956
6057
  if (state.verbose && ctx.truncateResult?.wasCompacted) {
4957
- const marker = createTruncationResponseMarker(ctx.truncateResult);
6058
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
4958
6059
  anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
4959
6060
  }
4960
6061
  recordResponse(ctx.historyId, {
@@ -5005,17 +6106,6 @@ function prependMarkerToAnthropicResponse(response, marker) {
5005
6106
  content
5006
6107
  };
5007
6108
  }
5008
- function createAnthropicStreamAccumulator() {
5009
- return {
5010
- model: "",
5011
- inputTokens: 0,
5012
- outputTokens: 0,
5013
- stopReason: "",
5014
- content: "",
5015
- toolCalls: [],
5016
- currentToolCall: null
5017
- };
5018
- }
5019
6109
  async function handleStreamingResponse(opts) {
5020
6110
  const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
5021
6111
  const streamState = {
@@ -5027,7 +6117,7 @@ async function handleStreamingResponse(opts) {
5027
6117
  const acc = createAnthropicStreamAccumulator();
5028
6118
  try {
5029
6119
  if (ctx.truncateResult?.wasCompacted) {
5030
- const marker = createTruncationResponseMarker(ctx.truncateResult);
6120
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
5031
6121
  await sendTruncationMarkerEvent(stream, streamState, marker);
5032
6122
  acc.content += marker;
5033
6123
  }
@@ -5116,47 +6206,6 @@ async function processStreamChunks(opts) {
5116
6206
  }
5117
6207
  }
5118
6208
  }
5119
- function processAnthropicEvent(event, acc) {
5120
- switch (event.type) {
5121
- case "content_block_delta":
5122
- handleContentBlockDelta(event.delta, acc);
5123
- break;
5124
- case "content_block_start":
5125
- handleContentBlockStart(event.content_block, acc);
5126
- break;
5127
- case "content_block_stop":
5128
- handleContentBlockStop(acc);
5129
- break;
5130
- case "message_delta":
5131
- handleMessageDelta(event.delta, event.usage, acc);
5132
- break;
5133
- default: break;
5134
- }
5135
- }
5136
- function handleContentBlockDelta(delta, acc) {
5137
- if (delta.type === "text_delta") acc.content += delta.text;
5138
- else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
5139
- }
5140
- function handleContentBlockStart(block, acc) {
5141
- if (block.type === "tool_use") acc.currentToolCall = {
5142
- id: block.id,
5143
- name: block.name,
5144
- input: ""
5145
- };
5146
- }
5147
- function handleContentBlockStop(acc) {
5148
- if (acc.currentToolCall) {
5149
- acc.toolCalls.push(acc.currentToolCall);
5150
- acc.currentToolCall = null;
5151
- }
5152
- }
5153
- function handleMessageDelta(delta, usage, acc) {
5154
- if (delta.stop_reason) acc.stopReason = delta.stop_reason;
5155
- if (usage) {
5156
- acc.inputTokens = usage.input_tokens ?? 0;
5157
- acc.outputTokens = usage.output_tokens;
5158
- }
5159
- }
5160
6209
  function recordStreamingResponse(acc, fallbackModel, ctx) {
5161
6210
  const contentBlocks = [];
5162
6211
  if (acc.content) contentBlocks.push({
@@ -5182,61 +6231,51 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
5182
6231
  toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
5183
6232
  }, Date.now() - ctx.startTime);
5184
6233
  }
5185
- function convertAnthropicMessages(messages) {
5186
- return messages.map((msg) => {
5187
- if (typeof msg.content === "string") return {
5188
- role: msg.role,
5189
- content: msg.content
5190
- };
5191
- const content = msg.content.map((block) => {
5192
- if (block.type === "text") return {
5193
- type: "text",
5194
- text: block.text
5195
- };
5196
- if (block.type === "tool_use") return {
5197
- type: "tool_use",
5198
- id: block.id,
5199
- name: block.name,
5200
- input: JSON.stringify(block.input)
5201
- };
5202
- if (block.type === "tool_result") {
5203
- const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
5204
- return {
5205
- type: "tool_result",
5206
- tool_use_id: block.tool_use_id,
5207
- content: resultContent
5208
- };
5209
- }
5210
- return { type: block.type };
5211
- });
5212
- return {
5213
- role: msg.role,
5214
- content
5215
- };
5216
- });
5217
- }
5218
- function extractSystemPrompt(system) {
5219
- if (!system) return void 0;
5220
- if (typeof system === "string") return system;
5221
- return system.map((block) => block.text).join("\n");
5222
- }
5223
- function extractToolCallsFromContent(content) {
5224
- const tools = [];
5225
- for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
5226
- id: String(block.id),
5227
- name: String(block.name),
5228
- input: JSON.stringify(block.input)
5229
- });
5230
- return tools.length > 0 ? tools : void 0;
6234
+
6235
+ //#endregion
6236
+ //#region src/routes/messages/handler.ts
6237
+ async function handleCompletion(c) {
6238
+ const anthropicPayload = await c.req.json();
6239
+ consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
6240
+ logToolInfo(anthropicPayload);
6241
+ const useDirectAnthropicApi = supportsDirectAnthropicApi(anthropicPayload.model);
6242
+ const trackingId = c.get("trackingId");
6243
+ const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
6244
+ updateTrackerModel(trackingId, anthropicPayload.model);
6245
+ const ctx = {
6246
+ historyId: recordRequest("anthropic", {
6247
+ model: anthropicPayload.model,
6248
+ messages: convertAnthropicMessages(anthropicPayload.messages),
6249
+ stream: anthropicPayload.stream ?? false,
6250
+ tools: anthropicPayload.tools?.map((t) => ({
6251
+ name: t.name,
6252
+ description: t.description
6253
+ })),
6254
+ max_tokens: anthropicPayload.max_tokens,
6255
+ temperature: anthropicPayload.temperature,
6256
+ system: extractSystemPrompt(anthropicPayload.system)
6257
+ }),
6258
+ trackingId,
6259
+ startTime
6260
+ };
6261
+ if (useDirectAnthropicApi) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
6262
+ return handleTranslatedCompletion(c, anthropicPayload, ctx);
5231
6263
  }
5232
- function extractToolCallsFromAnthropicContent(content) {
5233
- const tools = [];
5234
- for (const block of content) if (block.type === "tool_use") tools.push({
5235
- id: block.id,
5236
- name: block.name,
5237
- input: JSON.stringify(block.input)
5238
- });
5239
- return tools.length > 0 ? tools : void 0;
6264
+ /**
6265
+ * Log tool-related information for debugging
6266
+ */
6267
+ function logToolInfo(anthropicPayload) {
6268
+ if (anthropicPayload.tools?.length) {
6269
+ const toolInfo = anthropicPayload.tools.map((t) => ({
6270
+ name: t.name,
6271
+ type: t.type ?? "(custom)"
6272
+ }));
6273
+ consola.debug(`[Tools] Defined tools:`, JSON.stringify(toolInfo));
6274
+ }
6275
+ for (const msg of anthropicPayload.messages) if (typeof msg.content !== "string") for (const block of msg.content) {
6276
+ if (block.type === "tool_use") consola.debug(`[Tools] tool_use in message: ${block.name} (id: ${block.id})`);
6277
+ if (block.type === "tool_result") consola.debug(`[Tools] tool_result in message: id=${block.tool_use_id}, is_error=${block.is_error ?? false}`);
6278
+ }
5240
6279
  }
5241
6280
 
5242
6281
  //#endregion
@@ -5350,13 +6389,18 @@ server.route("/history", historyRoutes);
5350
6389
 
5351
6390
  //#endregion
5352
6391
  //#region src/start.ts
6392
+ /** Format limit values as "Xk" or "?" if not available */
6393
+ function formatLimit(value) {
6394
+ return value ? `${Math.round(value / 1e3)}k` : "?";
6395
+ }
5353
6396
  function formatModelInfo(model) {
5354
6397
  const limits = model.capabilities?.limits;
5355
- const contextK = limits?.max_prompt_tokens ? `${Math.round(limits.max_prompt_tokens / 1e3)}k` : "?";
5356
- const outputK = limits?.max_output_tokens ? `${Math.round(limits.max_output_tokens / 1e3)}k` : "?";
6398
+ const contextK = formatLimit(limits?.max_context_window_tokens);
6399
+ const promptK = formatLimit(limits?.max_prompt_tokens);
6400
+ const outputK = formatLimit(limits?.max_output_tokens);
5357
6401
  const features = [model.capabilities?.supports?.tool_calls && "tools", model.preview && "preview"].filter(Boolean).join(", ");
5358
6402
  const featureStr = features ? ` (${features})` : "";
5359
- return ` - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
6403
+ return ` - ${model.id.length > 30 ? `${model.id.slice(0, 27)}...` : model.id.padEnd(30)} ctx:${contextK.padStart(5)} in:${promptK.padStart(5)} out:${outputK.padStart(4)}` + featureStr;
5360
6404
  }
5361
6405
  async function runServer(options) {
5362
6406
  consola.info(`copilot-api v${package_default.version}`);
@@ -5371,7 +6415,9 @@ async function runServer(options) {
5371
6415
  state.manualApprove = options.manual;
5372
6416
  state.showToken = options.showToken;
5373
6417
  state.autoTruncate = options.autoTruncate;
5374
- state.directAnthropicApi = options.directAnthropicApi;
6418
+ state.compressToolResults = options.compressToolResults;
6419
+ state.redirectAnthropic = options.redirectAnthropic;
6420
+ state.rewriteAnthropicTools = options.rewriteAnthropicTools;
5375
6421
  if (options.rateLimit) initAdaptiveRateLimiter({
5376
6422
  baseRetryIntervalSeconds: options.retryInterval,
5377
6423
  requestIntervalSeconds: options.requestInterval,
@@ -5380,7 +6426,9 @@ async function runServer(options) {
5380
6426
  });
5381
6427
  else consola.info("Rate limiting disabled");
5382
6428
  if (!options.autoTruncate) consola.info("Auto-truncate disabled");
5383
- if (!options.directAnthropicApi) consola.info("Direct Anthropic API disabled (using OpenAI translation)");
6429
+ if (options.compressToolResults) consola.info("Tool result compression enabled");
6430
+ if (options.redirectAnthropic) consola.info("Anthropic API redirect enabled (using OpenAI translation)");
6431
+ if (!options.rewriteAnthropicTools) consola.info("Anthropic server-side tools rewrite disabled (passing through unchanged)");
5384
6432
  initHistory(options.history, options.historyLimit);
5385
6433
  if (options.history) {
5386
6434
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5527,10 +6575,20 @@ const start = defineCommand({
5527
6575
  default: false,
5528
6576
  description: "Disable automatic conversation history truncation when exceeding limits"
5529
6577
  },
5530
- "no-direct-anthropic": {
6578
+ "compress-tool-results": {
6579
+ type: "boolean",
6580
+ default: false,
6581
+ description: "Compress old tool_result content before truncating messages (may lose context details)"
6582
+ },
6583
+ "redirect-anthropic": {
6584
+ type: "boolean",
6585
+ default: false,
6586
+ description: "Redirect Anthropic models through OpenAI translation (instead of direct API)"
6587
+ },
6588
+ "no-rewrite-anthropic-tools": {
5531
6589
  type: "boolean",
5532
6590
  default: false,
5533
- description: "Disable direct Anthropic API for Anthropic models (use OpenAI translation instead)"
6591
+ description: "Don't rewrite Anthropic server-side tools (web_search, etc.) to custom tool format"
5534
6592
  }
5535
6593
  },
5536
6594
  run({ args }) {
@@ -5552,7 +6610,9 @@ const start = defineCommand({
5552
6610
  history: !args["no-history"],
5553
6611
  historyLimit: Number.parseInt(args["history-limit"], 10),
5554
6612
  autoTruncate: !args["no-auto-truncate"],
5555
- directAnthropicApi: !args["no-direct-anthropic"]
6613
+ compressToolResults: args["compress-tool-results"],
6614
+ redirectAnthropic: args["redirect-anthropic"],
6615
+ rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"]
5556
6616
  });
5557
6617
  }
5558
6618
  });