@hsupu/copilot-api 0.7.10 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -46,7 +46,11 @@ const state = {
46
46
  accountType: "individual",
47
47
  manualApprove: false,
48
48
  showToken: false,
49
- autoCompact: true
49
+ verbose: false,
50
+ autoTruncate: true,
51
+ compressToolResults: false,
52
+ redirectAnthropic: false,
53
+ rewriteAnthropicTools: true
50
54
  };
51
55
 
52
56
  //#endregion
@@ -90,27 +94,78 @@ const GITHUB_BASE_URL = "https://github.com";
90
94
  const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
91
95
  const GITHUB_APP_SCOPES = ["read:user"].join(" ");
92
96
 
97
+ //#endregion
98
+ //#region src/lib/auto-truncate-common.ts
99
+ const DEFAULT_AUTO_TRUNCATE_CONFIG = {
100
+ safetyMarginPercent: 2,
101
+ maxRequestBodyBytes: 510 * 1024,
102
+ preserveRecentPercent: .7
103
+ };
104
+ /** Dynamic byte limit that adjusts based on 413 errors */
105
+ let dynamicByteLimit = null;
106
+ /**
107
+ * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
108
+ */
109
+ function onRequestTooLarge(failingBytes) {
110
+ const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
111
+ dynamicByteLimit = newLimit;
112
+ consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
113
+ }
114
+ /** Get the current effective byte limit */
115
+ function getEffectiveByteLimitBytes() {
116
+ return dynamicByteLimit ?? DEFAULT_AUTO_TRUNCATE_CONFIG.maxRequestBodyBytes;
117
+ }
118
+ /** Dynamic token limits per model, adjusted based on token limit errors */
119
+ const dynamicTokenLimits = /* @__PURE__ */ new Map();
120
+ /**
121
+ * Called when a token limit error (400) occurs.
122
+ * Adjusts the token limit for the specific model to 95% of the reported limit.
123
+ */
124
+ function onTokenLimitExceeded(modelId, reportedLimit) {
125
+ const newLimit = Math.floor(reportedLimit * .95);
126
+ const previous = dynamicTokenLimits.get(modelId);
127
+ if (!previous || newLimit < previous) {
128
+ dynamicTokenLimits.set(modelId, newLimit);
129
+ consola.info(`[AutoTruncate] Adjusted token limit for ${modelId}: ${reportedLimit} reported → ${newLimit} effective`);
130
+ }
131
+ }
132
+ /**
133
+ * Get the effective token limit for a model.
134
+ * Returns the dynamic limit if set, otherwise null to use model capabilities.
135
+ */
136
+ function getEffectiveTokenLimit(modelId) {
137
+ return dynamicTokenLimits.get(modelId) ?? null;
138
+ }
139
+
93
140
  //#endregion
94
141
  //#region src/lib/error.ts
95
142
  var HTTPError = class HTTPError extends Error {
96
143
  status;
97
144
  responseText;
98
- constructor(message, status, responseText) {
145
+ /** Model ID that caused the error (if known) */
146
+ modelId;
147
+ constructor(message, status, responseText, modelId) {
99
148
  super(message);
100
149
  this.status = status;
101
150
  this.responseText = responseText;
151
+ this.modelId = modelId;
102
152
  }
103
- static async fromResponse(message, response) {
153
+ static async fromResponse(message, response, modelId) {
104
154
  const text = await response.text();
105
- return new HTTPError(message, response.status, text);
155
+ return new HTTPError(message, response.status, text, modelId);
106
156
  }
107
157
  };
108
158
  /** Parse token limit info from error message */
109
159
  function parseTokenLimitError(message) {
110
- const match = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
111
- if (match) return {
112
- current: Number.parseInt(match[1], 10),
113
- limit: Number.parseInt(match[2], 10)
160
+ const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
161
+ if (openaiMatch) return {
162
+ current: Number.parseInt(openaiMatch[1], 10),
163
+ limit: Number.parseInt(openaiMatch[2], 10)
164
+ };
165
+ const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
166
+ if (anthropicMatch) return {
167
+ current: Number.parseInt(anthropicMatch[1], 10),
168
+ limit: Number.parseInt(anthropicMatch[2], 10)
114
169
  };
115
170
  return null;
116
171
  }
@@ -147,11 +202,10 @@ function formatRateLimitError(copilotMessage) {
147
202
  };
148
203
  }
149
204
  function forwardError(c, error) {
150
- consola.error("Error occurred:", error);
151
205
  if (error instanceof HTTPError) {
152
206
  if (error.status === 413) {
153
207
  const formattedError = formatRequestTooLargeError();
154
- consola.debug("Returning formatted 413 error:", formattedError);
208
+ consola.warn(`HTTP 413: Request too large`);
155
209
  return c.json(formattedError, 413);
156
210
  }
157
211
  let errorJson;
@@ -160,26 +214,38 @@ function forwardError(c, error) {
160
214
  } catch {
161
215
  errorJson = error.responseText;
162
216
  }
163
- consola.error("HTTP error:", errorJson);
164
217
  const copilotError = errorJson;
165
218
  if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
166
219
  const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
167
220
  if (tokenInfo) {
221
+ if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
222
+ const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
223
+ consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
224
+ return c.json(formattedError, 400);
225
+ }
226
+ }
227
+ const anthropicError = errorJson;
228
+ if (anthropicError.error?.type === "invalid_request_error") {
229
+ const tokenInfo = parseTokenLimitError(anthropicError.error.message ?? "");
230
+ if (tokenInfo) {
231
+ if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
168
232
  const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
169
- consola.debug("Returning formatted token limit error:", formattedError);
233
+ consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
170
234
  return c.json(formattedError, 400);
171
235
  }
172
236
  }
173
237
  if (error.status === 429 || copilotError.error?.code === "rate_limited") {
174
238
  const formattedError = formatRateLimitError(copilotError.error?.message);
175
- consola.debug("Returning formatted rate limit error:", formattedError);
239
+ consola.warn(`HTTP 429: Rate limit exceeded`);
176
240
  return c.json(formattedError, 429);
177
241
  }
242
+ consola.error(`HTTP ${error.status}:`, errorJson);
178
243
  return c.json({ error: {
179
244
  message: error.responseText,
180
245
  type: "error"
181
246
  } }, error.status);
182
247
  }
248
+ consola.error("Unexpected error:", error);
183
249
  return c.json({ error: {
184
250
  message: error.message,
185
251
  type: "error"
@@ -305,6 +371,7 @@ async function pollAccessToken(deviceCode) {
305
371
  //#region src/lib/token.ts
306
372
  const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
307
373
  const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
374
+ let copilotTokenRefreshTimer = null;
308
375
  /**
309
376
  * Refresh the Copilot token with exponential backoff retry.
310
377
  * Returns the new token on success, or null if all retries fail.
@@ -323,20 +390,34 @@ async function refreshCopilotTokenWithRetry(maxRetries = 3) {
323
390
  consola.error("All token refresh attempts failed:", lastError);
324
391
  return null;
325
392
  }
393
+ /**
394
+ * Clear any existing token refresh timer.
395
+ * Call this before setting up a new timer or during cleanup.
396
+ */
397
+ function clearCopilotTokenRefresh() {
398
+ if (copilotTokenRefreshTimer) {
399
+ clearInterval(copilotTokenRefreshTimer);
400
+ copilotTokenRefreshTimer = null;
401
+ }
402
+ }
326
403
  const setupCopilotToken = async () => {
327
404
  const { token, refresh_in } = await getCopilotToken();
328
405
  state.copilotToken = token;
329
406
  consola.debug("GitHub Copilot Token fetched successfully!");
330
407
  if (state.showToken) consola.info("Copilot token:", token);
331
- const refreshInterval = (refresh_in - 60) * 1e3;
332
- setInterval(async () => {
408
+ const refreshInterval = Math.max((refresh_in - 60) * 1e3, 60 * 1e3);
409
+ clearCopilotTokenRefresh();
410
+ copilotTokenRefreshTimer = setInterval(() => {
333
411
  consola.debug("Refreshing Copilot token");
334
- const newToken = await refreshCopilotTokenWithRetry();
335
- if (newToken) {
336
- state.copilotToken = newToken;
337
- consola.debug("Copilot token refreshed");
338
- if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
339
- } else consola.error("Failed to refresh Copilot token after retries, using existing token");
412
+ refreshCopilotTokenWithRetry().then((newToken) => {
413
+ if (newToken) {
414
+ state.copilotToken = newToken;
415
+ consola.debug("Copilot token refreshed");
416
+ if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
417
+ } else consola.error("Failed to refresh Copilot token after retries, using existing token");
418
+ }).catch((error) => {
419
+ consola.error("Unexpected error during token refresh:", error);
420
+ });
340
421
  }, refreshInterval);
341
422
  };
342
423
  async function setupGitHubToken(options) {
@@ -480,9 +561,23 @@ async function checkTokenExists() {
480
561
  return false;
481
562
  }
482
563
  }
483
- async function getDebugInfo() {
564
+ async function getAccountInfo() {
565
+ try {
566
+ await ensurePaths();
567
+ await setupGitHubToken();
568
+ if (!state.githubToken) return null;
569
+ const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
570
+ return {
571
+ user,
572
+ copilot
573
+ };
574
+ } catch {
575
+ return null;
576
+ }
577
+ }
578
+ async function getDebugInfo(includeAccount) {
484
579
  const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
485
- return {
580
+ const info = {
486
581
  version: version$1,
487
582
  runtime: getRuntimeInfo(),
488
583
  paths: {
@@ -491,9 +586,14 @@ async function getDebugInfo() {
491
586
  },
492
587
  tokenExists
493
588
  };
589
+ if (includeAccount && tokenExists) {
590
+ const account = await getAccountInfo();
591
+ if (account) info.account = account;
592
+ }
593
+ return info;
494
594
  }
495
595
  function printDebugInfoPlain(info) {
496
- consola.info(`copilot-api debug
596
+ let output = `copilot-api debug
497
597
 
498
598
  Version: ${info.version}
499
599
  Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
@@ -502,19 +602,24 @@ Paths:
502
602
  - APP_DIR: ${info.paths.APP_DIR}
503
603
  - GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
504
604
 
505
- Token exists: ${info.tokenExists ? "Yes" : "No"}`);
605
+ Token exists: ${info.tokenExists ? "Yes" : "No"}`;
606
+ if (info.account) output += `
607
+
608
+ Account Info:
609
+ ${JSON.stringify(info.account, null, 2)}`;
610
+ consola.info(output);
506
611
  }
507
612
  function printDebugInfoJson(info) {
508
613
  console.log(JSON.stringify(info, null, 2));
509
614
  }
510
615
  async function runDebug(options) {
511
- const debugInfo = await getDebugInfo();
512
- if (options.json) printDebugInfoJson(debugInfo);
513
- else printDebugInfoPlain(debugInfo);
616
+ const debugInfo$1 = await getDebugInfo(true);
617
+ if (options.json) printDebugInfoJson(debugInfo$1);
618
+ else printDebugInfoPlain(debugInfo$1);
514
619
  }
515
- const debug = defineCommand({
620
+ const debugInfo = defineCommand({
516
621
  meta: {
517
- name: "debug",
622
+ name: "info",
518
623
  description: "Print debug information about the application"
519
624
  },
520
625
  args: { json: {
@@ -526,6 +631,48 @@ const debug = defineCommand({
526
631
  return runDebug({ json: args.json });
527
632
  }
528
633
  });
634
+ const debugModels = defineCommand({
635
+ meta: {
636
+ name: "models",
637
+ description: "Fetch and display raw model data from Copilot API"
638
+ },
639
+ args: {
640
+ "account-type": {
641
+ type: "string",
642
+ alias: "a",
643
+ default: "individual",
644
+ description: "The type of GitHub account (individual, business, enterprise)"
645
+ },
646
+ "github-token": {
647
+ type: "string",
648
+ alias: "g",
649
+ description: "GitHub token to use (skips interactive auth)"
650
+ }
651
+ },
652
+ async run({ args }) {
653
+ state.accountType = args["account-type"];
654
+ await ensurePaths();
655
+ if (args["github-token"]) {
656
+ state.githubToken = args["github-token"];
657
+ consola.info("Using provided GitHub token");
658
+ } else await setupGitHubToken();
659
+ const { token } = await getCopilotToken();
660
+ state.copilotToken = token;
661
+ consola.info("Fetching models from Copilot API...");
662
+ const models = await getModels();
663
+ console.log(JSON.stringify(models, null, 2));
664
+ }
665
+ });
666
+ const debug = defineCommand({
667
+ meta: {
668
+ name: "debug",
669
+ description: "Debug commands for troubleshooting"
670
+ },
671
+ subCommands: {
672
+ info: debugInfo,
673
+ models: debugModels
674
+ }
675
+ });
529
676
 
530
677
  //#endregion
531
678
  //#region src/logout.ts
@@ -552,7 +699,7 @@ const logout = defineCommand({
552
699
  });
553
700
 
554
701
  //#endregion
555
- //#region src/patch-claude.ts
702
+ //#region src/patch-claude-code.ts
556
703
  const SUPPORTED_VERSIONS = {
557
704
  v2a: {
558
705
  min: "2.0.0",
@@ -872,7 +1019,7 @@ const patchClaude = defineCommand({
872
1019
  //#endregion
873
1020
  //#region package.json
874
1021
  var name = "@hsupu/copilot-api";
875
- var version = "0.7.10";
1022
+ var version = "0.7.12";
876
1023
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
877
1024
  var keywords = [
878
1025
  "proxy",
@@ -900,11 +1047,15 @@ var scripts = {
900
1047
  "prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
901
1048
  "release": "bumpp && npm publish --access public",
902
1049
  "start": "NODE_ENV=production bun run ./src/main.ts",
1050
+ "test": "bun test tests/*.test.ts",
1051
+ "test:all": "bun test tests/*.test.ts && bun test tests/integration/",
1052
+ "test:integration": "bun test tests/integration/",
903
1053
  "typecheck": "tsc"
904
1054
  };
905
1055
  var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
906
1056
  var lint_staged = { "*": "bun run lint --fix" };
907
1057
  var dependencies = {
1058
+ "@anthropic-ai/tokenizer": "^0.0.4",
908
1059
  "citty": "^0.1.6",
909
1060
  "clipboardy": "^5.0.0",
910
1061
  "consola": "^3.4.2",
@@ -951,7 +1102,7 @@ var package_default = {
951
1102
 
952
1103
  //#endregion
953
1104
  //#region src/lib/adaptive-rate-limiter.ts
954
- const DEFAULT_CONFIG$1 = {
1105
+ const DEFAULT_CONFIG = {
955
1106
  baseRetryIntervalSeconds: 10,
956
1107
  maxRetryIntervalSeconds: 120,
957
1108
  requestIntervalSeconds: 10,
@@ -980,7 +1131,7 @@ var AdaptiveRateLimiter = class {
980
1131
  recoveryStepIndex = 0;
981
1132
  constructor(config = {}) {
982
1133
  this.config = {
983
- ...DEFAULT_CONFIG$1,
1134
+ ...DEFAULT_CONFIG,
984
1135
  ...config
985
1136
  };
986
1137
  }
@@ -1222,12 +1373,12 @@ let rateLimiterInstance = null;
1222
1373
  */
1223
1374
  function initAdaptiveRateLimiter(config = {}) {
1224
1375
  rateLimiterInstance = new AdaptiveRateLimiter(config);
1225
- const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
1226
- const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
1227
- const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
1228
- const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
1229
- const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
1230
- const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
1376
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
1377
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
1378
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
1379
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
1380
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
1381
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
1231
1382
  consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
1232
1383
  }
1233
1384
  /**
@@ -1848,6 +1999,7 @@ var RequestTracker = class {
1848
1999
  requests = /* @__PURE__ */ new Map();
1849
2000
  renderer = null;
1850
2001
  completedQueue = [];
2002
+ completedTimeouts = /* @__PURE__ */ new Map();
1851
2003
  historySize = 5;
1852
2004
  completedDisplayMs = 2e3;
1853
2005
  setRenderer(renderer) {
@@ -1907,11 +2059,22 @@ var RequestTracker = class {
1907
2059
  this.renderer?.onRequestComplete(request);
1908
2060
  this.requests.delete(id);
1909
2061
  this.completedQueue.push(request);
1910
- while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
1911
- setTimeout(() => {
2062
+ while (this.completedQueue.length > this.historySize) {
2063
+ const removed = this.completedQueue.shift();
2064
+ if (removed) {
2065
+ const timeoutId$1 = this.completedTimeouts.get(removed.id);
2066
+ if (timeoutId$1) {
2067
+ clearTimeout(timeoutId$1);
2068
+ this.completedTimeouts.delete(removed.id);
2069
+ }
2070
+ }
2071
+ }
2072
+ const timeoutId = setTimeout(() => {
1912
2073
  const idx = this.completedQueue.indexOf(request);
1913
2074
  if (idx !== -1) this.completedQueue.splice(idx, 1);
2075
+ this.completedTimeouts.delete(id);
1914
2076
  }, this.completedDisplayMs);
2077
+ this.completedTimeouts.set(id, timeoutId);
1915
2078
  }
1916
2079
  /**
1917
2080
  * Mark request as failed with error
@@ -1946,11 +2109,13 @@ var RequestTracker = class {
1946
2109
  return this.requests.get(id);
1947
2110
  }
1948
2111
  /**
1949
- * Clear all tracked requests
2112
+ * Clear all tracked requests and pending timeouts
1950
2113
  */
1951
2114
  clear() {
1952
2115
  this.requests.clear();
1953
2116
  this.completedQueue = [];
2117
+ for (const timeoutId of this.completedTimeouts.values()) clearTimeout(timeoutId);
2118
+ this.completedTimeouts.clear();
1954
2119
  }
1955
2120
  };
1956
2121
  const requestTracker = new RequestTracker();
@@ -2101,6 +2266,14 @@ const getTokenizerFromModel = (model) => {
2101
2266
  return model.capabilities?.tokenizer || "o200k_base";
2102
2267
  };
2103
2268
  /**
2269
+ * Count tokens in a text string using the model's tokenizer.
2270
+ * This is a simple wrapper for counting tokens in plain text.
2271
+ */
2272
+ const countTextTokens = async (text, model) => {
2273
+ const tokenizer = getTokenizerFromModel(model);
2274
+ return (await getEncodeChatFunction(tokenizer)).encode(text).length;
2275
+ };
2276
+ /**
2104
2277
  * Get model-specific constants for token calculation.
2105
2278
  * These values are empirically determined based on OpenAI's function calling token overhead.
2106
2279
  * - funcInit: Tokens for initializing a function definition
@@ -2206,7 +2379,9 @@ const numTokensForTools = (tools, encoder, constants) => {
2206
2379
  return funcTokenCount;
2207
2380
  };
2208
2381
  /**
2209
- * Calculate the token count of messages, supporting multiple GPT encoders
2382
+ * Calculate the token count of messages.
2383
+ * Uses the tokenizer specified by the GitHub Copilot API model info.
2384
+ * All models (including Claude) use GPT tokenizers (o200k_base or cl100k_base).
2210
2385
  */
2211
2386
  const getTokenCount = async (payload, model) => {
2212
2387
  const tokenizer = getTokenizerFromModel(model);
@@ -2225,32 +2400,18 @@ const getTokenCount = async (payload, model) => {
2225
2400
  };
2226
2401
 
2227
2402
  //#endregion
2228
- //#region src/lib/auto-compact.ts
2229
- const DEFAULT_CONFIG = {
2230
- safetyMarginPercent: 2,
2231
- maxRequestBodyBytes: 500 * 1024
2232
- };
2233
- /** Dynamic byte limit that adjusts based on 413 errors */
2234
- let dynamicByteLimit = null;
2235
- /**
2236
- * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
2237
- */
2238
- function onRequestTooLarge(failingBytes) {
2239
- const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2240
- dynamicByteLimit = newLimit;
2241
- consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
2242
- }
2243
- function calculateLimits(model, config) {
2244
- const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2403
+ //#region src/lib/auto-truncate-openai.ts
2404
+ function calculateLimits$1(model, config) {
2405
+ const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2245
2406
  const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
2246
- const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
2407
+ const byteLimit = getEffectiveByteLimitBytes();
2247
2408
  return {
2248
2409
  tokenLimit,
2249
2410
  byteLimit
2250
2411
  };
2251
2412
  }
2252
2413
  /** Estimate tokens for a single message (fast approximation) */
2253
- function estimateMessageTokens(msg) {
2414
+ function estimateMessageTokens$1(msg) {
2254
2415
  let charCount = 0;
2255
2416
  if (typeof msg.content === "string") charCount = msg.content.length;
2256
2417
  else if (Array.isArray(msg.content)) {
@@ -2261,7 +2422,7 @@ function estimateMessageTokens(msg) {
2261
2422
  return Math.ceil(charCount / 4) + 10;
2262
2423
  }
2263
2424
  /** Get byte size of a message */
2264
- function getMessageBytes(msg) {
2425
+ function getMessageBytes$1(msg) {
2265
2426
  return JSON.stringify(msg).length;
2266
2427
  }
2267
2428
  /** Extract system/developer messages from the beginning */
@@ -2283,7 +2444,7 @@ function getToolCallIds(msg) {
2283
2444
  return [];
2284
2445
  }
2285
2446
  /** Filter orphaned tool_result messages */
2286
- function filterOrphanedToolResults(messages) {
2447
+ function filterOrphanedToolResults$1(messages) {
2287
2448
  const toolUseIds = /* @__PURE__ */ new Set();
2288
2449
  for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
2289
2450
  let removedCount = 0;
@@ -2294,22 +2455,127 @@ function filterOrphanedToolResults(messages) {
2294
2455
  }
2295
2456
  return true;
2296
2457
  });
2297
- if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
2458
+ if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_result`);
2298
2459
  return filtered;
2299
2460
  }
2461
+ /** Get tool_result IDs from all tool messages */
2462
+ function getToolResultIds$1(messages) {
2463
+ const ids = /* @__PURE__ */ new Set();
2464
+ for (const msg of messages) if (msg.role === "tool" && msg.tool_call_id) ids.add(msg.tool_call_id);
2465
+ return ids;
2466
+ }
2467
+ /** Filter orphaned tool_use messages (those without matching tool_result) */
2468
+ function filterOrphanedToolUse$1(messages) {
2469
+ const toolResultIds = getToolResultIds$1(messages);
2470
+ const result = [];
2471
+ let removedCount = 0;
2472
+ for (const msg of messages) {
2473
+ if (msg.role === "assistant" && msg.tool_calls) {
2474
+ const filteredToolCalls = msg.tool_calls.filter((tc) => {
2475
+ if (!toolResultIds.has(tc.id)) {
2476
+ removedCount++;
2477
+ return false;
2478
+ }
2479
+ return true;
2480
+ });
2481
+ if (filteredToolCalls.length === 0) {
2482
+ if (msg.content) result.push({
2483
+ ...msg,
2484
+ tool_calls: void 0
2485
+ });
2486
+ continue;
2487
+ }
2488
+ result.push({
2489
+ ...msg,
2490
+ tool_calls: filteredToolCalls
2491
+ });
2492
+ continue;
2493
+ }
2494
+ result.push(msg);
2495
+ }
2496
+ if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_use`);
2497
+ return result;
2498
+ }
2300
2499
  /** Ensure messages start with a user message */
2301
- function ensureStartsWithUser(messages) {
2500
+ function ensureStartsWithUser$1(messages) {
2302
2501
  let startIndex = 0;
2303
2502
  while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2304
- if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
2503
+ if (startIndex > 0) consola.debug(`[AutoTruncate:OpenAI] Skipped ${startIndex} leading non-user messages`);
2305
2504
  return messages.slice(startIndex);
2306
2505
  }
2506
+ /** Threshold for large tool message content (bytes) */
2507
+ const LARGE_TOOL_RESULT_THRESHOLD$1 = 1e4;
2508
+ /** Maximum length for compressed tool_result summary */
2509
+ const COMPRESSED_SUMMARY_LENGTH$1 = 500;
2510
+ /**
2511
+ * Compress a large tool message content to a summary.
2512
+ * Keeps the first and last portions with a note about truncation.
2513
+ */
2514
+ function compressToolResultContent$1(content) {
2515
+ if (content.length <= LARGE_TOOL_RESULT_THRESHOLD$1) return content;
2516
+ const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH$1 / 2);
2517
+ const start$1 = content.slice(0, halfLen);
2518
+ const end = content.slice(-halfLen);
2519
+ const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH$1;
2520
+ return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
2521
+ }
2522
+ /**
2523
+ * Smart compression strategy for OpenAI format:
2524
+ * 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
2525
+ * 2. Messages before that threshold get their tool content compressed
2526
+ * 3. Returns compressed messages and stats
2527
+ *
2528
+ * @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
2529
+ */
2530
+ function smartCompressToolResults$1(messages, tokenLimit, byteLimit, preservePercent) {
2531
+ const n = messages.length;
2532
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
2533
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
2534
+ for (let i = n - 1; i >= 0; i--) {
2535
+ const msg = messages[i];
2536
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
2537
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
2538
+ }
2539
+ const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
2540
+ const preserveByteLimit = Math.floor(byteLimit * preservePercent);
2541
+ let thresholdIndex = n;
2542
+ for (let i = n - 1; i >= 0; i--) {
2543
+ if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
2544
+ thresholdIndex = i + 1;
2545
+ break;
2546
+ }
2547
+ thresholdIndex = i;
2548
+ }
2549
+ if (thresholdIndex >= n) return {
2550
+ messages,
2551
+ compressedCount: 0,
2552
+ compressThresholdIndex: n
2553
+ };
2554
+ const result = [];
2555
+ let compressedCount = 0;
2556
+ for (const [i, msg] of messages.entries()) {
2557
+ if (i < thresholdIndex && msg.role === "tool" && typeof msg.content === "string" && msg.content.length > LARGE_TOOL_RESULT_THRESHOLD$1) {
2558
+ compressedCount++;
2559
+ result.push({
2560
+ ...msg,
2561
+ content: compressToolResultContent$1(msg.content)
2562
+ });
2563
+ continue;
2564
+ }
2565
+ result.push(msg);
2566
+ }
2567
+ return {
2568
+ messages: result,
2569
+ compressedCount,
2570
+ compressThresholdIndex: thresholdIndex
2571
+ };
2572
+ }
2307
2573
  /**
2308
2574
  * Find the optimal index from which to preserve messages.
2309
2575
  * Uses binary search with pre-calculated cumulative sums.
2310
2576
  * Returns the smallest index where the preserved portion fits within limits.
2311
2577
  */
2312
- function findOptimalPreserveIndex(params) {
2578
+ function findOptimalPreserveIndex$1(params) {
2313
2579
  const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
2314
2580
  if (messages.length === 0) return 0;
2315
2581
  const markerBytes = 200;
@@ -2321,8 +2587,8 @@ function findOptimalPreserveIndex(params) {
2321
2587
  const cumBytes = Array.from({ length: n + 1 }, () => 0);
2322
2588
  for (let i = n - 1; i >= 0; i--) {
2323
2589
  const msg = messages[i];
2324
- cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
2325
- cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
2590
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
2591
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
2326
2592
  }
2327
2593
  let left = 0;
2328
2594
  let right = n;
@@ -2336,12 +2602,12 @@ function findOptimalPreserveIndex(params) {
2336
2602
  /**
2337
2603
  * Check if payload needs compaction based on model limits or byte size.
2338
2604
  */
2339
- async function checkNeedsCompaction(payload, model, config = {}) {
2605
+ async function checkNeedsCompactionOpenAI(payload, model, config = {}) {
2340
2606
  const cfg = {
2341
- ...DEFAULT_CONFIG,
2607
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
2342
2608
  ...config
2343
2609
  };
2344
- const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2610
+ const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
2345
2611
  const currentTokens = (await getTokenCount(payload, model)).input;
2346
2612
  const currentBytes = JSON.stringify(payload).length;
2347
2613
  const exceedsTokens = currentTokens > tokenLimit;
@@ -2359,23 +2625,90 @@ async function checkNeedsCompaction(payload, model, config = {}) {
2359
2625
  reason
2360
2626
  };
2361
2627
  }
2362
- /** Create a truncation marker message */
2363
- function createTruncationMarker(removedCount) {
2628
+ /**
2629
+ * Generate a summary of removed messages for context.
2630
+ * Extracts key information like tool calls and topics.
2631
+ */
2632
+ function generateRemovedMessagesSummary$1(removedMessages) {
2633
+ const toolCalls = [];
2634
+ let userMessageCount = 0;
2635
+ let assistantMessageCount = 0;
2636
+ for (const msg of removedMessages) {
2637
+ if (msg.role === "user") userMessageCount++;
2638
+ else if (msg.role === "assistant") assistantMessageCount++;
2639
+ if (msg.tool_calls) {
2640
+ for (const tc of msg.tool_calls) if (tc.function.name) toolCalls.push(tc.function.name);
2641
+ }
2642
+ }
2643
+ const parts = [];
2644
+ if (userMessageCount > 0 || assistantMessageCount > 0) {
2645
+ const breakdown = [];
2646
+ if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
2647
+ if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
2648
+ parts.push(`Messages: ${breakdown.join(", ")}`);
2649
+ }
2650
+ if (toolCalls.length > 0) {
2651
+ const uniqueTools = [...new Set(toolCalls)];
2652
+ const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
2653
+ parts.push(`Tools used: ${displayTools.join(", ")}`);
2654
+ }
2655
+ return parts.join(". ");
2656
+ }
2657
+ /**
2658
+ * Add a compression notice to the system message.
2659
+ * Informs the model that some tool content has been compressed.
2660
+ */
2661
+ function addCompressionNotice$1(payload, compressedCount) {
2662
+ const notice = `\n\n[CONTEXT NOTE]\n${compressedCount} large tool results have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]`;
2663
+ const messages = [...payload.messages];
2664
+ for (let i = messages.length - 1; i >= 0; i--) {
2665
+ const msg = messages[i];
2666
+ if (msg.role === "system" || msg.role === "developer") {
2667
+ if (typeof msg.content === "string") messages[i] = {
2668
+ ...msg,
2669
+ content: msg.content + notice
2670
+ };
2671
+ break;
2672
+ }
2673
+ }
2674
+ return {
2675
+ ...payload,
2676
+ messages
2677
+ };
2678
+ }
2679
+ /**
2680
+ * Create truncation context to append to system messages.
2681
+ */
2682
+ function createTruncationSystemContext$1(removedCount, compressedCount, summary) {
2683
+ let context = `\n\n[CONVERSATION CONTEXT]\n`;
2684
+ if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
2685
+ if (compressedCount > 0) context += `${compressedCount} large tool results have been compressed.\n`;
2686
+ if (summary) context += `Summary of removed content: ${summary}\n`;
2687
+ context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]";
2688
+ return context;
2689
+ }
2690
+ /** Create a truncation marker message (fallback when no system message) */
2691
+ function createTruncationMarker$2(removedCount, compressedCount, summary) {
2692
+ const parts = [];
2693
+ if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
2694
+ if (compressedCount > 0) parts.push(`${compressedCount} tool results compressed`);
2695
+ let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
2696
+ if (summary) content += `\n[Summary: ${summary}]`;
2364
2697
  return {
2365
2698
  role: "user",
2366
- content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
2699
+ content
2367
2700
  };
2368
2701
  }
2369
2702
  /**
2370
- * Perform auto-compaction on a payload that exceeds limits.
2703
+ * Perform auto-truncation on a payload that exceeds limits.
2371
2704
  * Uses binary search to find the optimal truncation point.
2372
2705
  */
2373
- async function autoCompact(payload, model, config = {}) {
2706
+ async function autoTruncateOpenAI(payload, model, config = {}) {
2374
2707
  const cfg = {
2375
- ...DEFAULT_CONFIG,
2708
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
2376
2709
  ...config
2377
2710
  };
2378
- const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2711
+ const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
2379
2712
  const originalBytes = JSON.stringify(payload).length;
2380
2713
  const originalTokens = (await getTokenCount(payload, model)).input;
2381
2714
  if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
@@ -2387,18 +2720,44 @@ async function autoCompact(payload, model, config = {}) {
2387
2720
  };
2388
2721
  const exceedsTokens = originalTokens > tokenLimit;
2389
2722
  const exceedsBytes = originalBytes > byteLimit;
2390
- let reason;
2391
- if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2392
- else if (exceedsBytes) reason = "size";
2393
- else reason = "tokens";
2394
- consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2395
- const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2396
- const messagesJson = JSON.stringify(payload.messages);
2397
- const payloadOverhead = originalBytes - messagesJson.length;
2398
- const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2399
- const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2400
- consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2401
- const preserveIndex = findOptimalPreserveIndex({
2723
+ let workingMessages = payload.messages;
2724
+ let compressedCount = 0;
2725
+ if (state.compressToolResults) {
2726
+ const compressionResult = smartCompressToolResults$1(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
2727
+ workingMessages = compressionResult.messages;
2728
+ compressedCount = compressionResult.compressedCount;
2729
+ const compressedPayload = {
2730
+ ...payload,
2731
+ messages: workingMessages
2732
+ };
2733
+ const compressedBytes = JSON.stringify(compressedPayload).length;
2734
+ const compressedTokenCount = await getTokenCount(compressedPayload, model);
2735
+ if (compressedTokenCount.input <= tokenLimit && compressedBytes <= byteLimit) {
2736
+ let reason$1 = "tokens";
2737
+ if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
2738
+ else if (exceedsBytes) reason$1 = "size";
2739
+ consola.info(`[AutoTruncate:OpenAI] ${reason$1}: ${originalTokens}→${compressedTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
2740
+ const noticePayload = addCompressionNotice$1(compressedPayload, compressedCount);
2741
+ const noticeTokenCount = await getTokenCount(noticePayload, model);
2742
+ return {
2743
+ payload: noticePayload,
2744
+ wasCompacted: true,
2745
+ originalTokens,
2746
+ compactedTokens: noticeTokenCount.input,
2747
+ removedMessageCount: 0
2748
+ };
2749
+ }
2750
+ }
2751
+ const { systemMessages, conversationMessages } = extractSystemMessages(workingMessages);
2752
+ const messagesJson = JSON.stringify(workingMessages);
2753
+ const payloadOverhead = JSON.stringify({
2754
+ ...payload,
2755
+ messages: workingMessages
2756
+ }).length - messagesJson.length;
2757
+ const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes$1(m) + 1, 0);
2758
+ const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens$1(m), 0);
2759
+ consola.debug(`[AutoTruncate:OpenAI] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2760
+ const preserveIndex = findOptimalPreserveIndex$1({
2402
2761
  messages: conversationMessages,
2403
2762
  systemBytes,
2404
2763
  systemTokens,
@@ -2407,7 +2766,7 @@ async function autoCompact(payload, model, config = {}) {
2407
2766
  byteLimit
2408
2767
  });
2409
2768
  if (preserveIndex === 0) {
2410
- consola.warn("Auto-compact: Cannot truncate, system messages too large");
2769
+ consola.warn("[AutoTruncate:OpenAI] Cannot truncate, system messages too large");
2411
2770
  return {
2412
2771
  payload,
2413
2772
  wasCompacted: false,
@@ -2417,7 +2776,7 @@ async function autoCompact(payload, model, config = {}) {
2417
2776
  };
2418
2777
  }
2419
2778
  if (preserveIndex >= conversationMessages.length) {
2420
- consola.warn("Auto-compact: Would need to remove all messages");
2779
+ consola.warn("[AutoTruncate:OpenAI] Would need to remove all messages");
2421
2780
  return {
2422
2781
  payload,
2423
2782
  wasCompacted: false,
@@ -2427,11 +2786,13 @@ async function autoCompact(payload, model, config = {}) {
2427
2786
  };
2428
2787
  }
2429
2788
  let preserved = conversationMessages.slice(preserveIndex);
2430
- preserved = filterOrphanedToolResults(preserved);
2431
- preserved = ensureStartsWithUser(preserved);
2432
- preserved = filterOrphanedToolResults(preserved);
2789
+ preserved = filterOrphanedToolResults$1(preserved);
2790
+ preserved = filterOrphanedToolUse$1(preserved);
2791
+ preserved = ensureStartsWithUser$1(preserved);
2792
+ preserved = filterOrphanedToolResults$1(preserved);
2793
+ preserved = filterOrphanedToolUse$1(preserved);
2433
2794
  if (preserved.length === 0) {
2434
- consola.warn("Auto-compact: All messages filtered out after cleanup");
2795
+ consola.warn("[AutoTruncate:OpenAI] All messages filtered out after cleanup");
2435
2796
  return {
2436
2797
  payload,
2437
2798
  wasCompacted: false,
@@ -2440,20 +2801,36 @@ async function autoCompact(payload, model, config = {}) {
2440
2801
  removedMessageCount: 0
2441
2802
  };
2442
2803
  }
2804
+ const removedMessages = conversationMessages.slice(0, preserveIndex);
2443
2805
  const removedCount = conversationMessages.length - preserved.length;
2444
- const marker = createTruncationMarker(removedCount);
2806
+ const summary = generateRemovedMessagesSummary$1(removedMessages);
2807
+ let newSystemMessages = systemMessages;
2808
+ let newMessages = preserved;
2809
+ if (systemMessages.length > 0) {
2810
+ const truncationContext = createTruncationSystemContext$1(removedCount, compressedCount, summary);
2811
+ const lastSystemIdx = systemMessages.length - 1;
2812
+ const lastSystem = systemMessages[lastSystemIdx];
2813
+ const updatedSystem = {
2814
+ ...lastSystem,
2815
+ content: typeof lastSystem.content === "string" ? lastSystem.content + truncationContext : lastSystem.content
2816
+ };
2817
+ newSystemMessages = [...systemMessages.slice(0, lastSystemIdx), updatedSystem];
2818
+ } else newMessages = [createTruncationMarker$2(removedCount, compressedCount, summary), ...preserved];
2445
2819
  const newPayload = {
2446
2820
  ...payload,
2447
- messages: [
2448
- ...systemMessages,
2449
- marker,
2450
- ...preserved
2451
- ]
2821
+ messages: [...newSystemMessages, ...newMessages]
2452
2822
  };
2453
2823
  const newBytes = JSON.stringify(newPayload).length;
2454
2824
  const newTokenCount = await getTokenCount(newPayload, model);
2455
- consola.info(`Auto-compact: ${originalTokens} ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2456
- if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2825
+ let reason = "tokens";
2826
+ if (exceedsTokens && exceedsBytes) reason = "tokens+size";
2827
+ else if (exceedsBytes) reason = "size";
2828
+ const actions = [];
2829
+ if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
2830
+ if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
2831
+ const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
2832
+ consola.info(`[AutoTruncate:OpenAI] ${reason}: ${originalTokens}→${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
2833
+ if (newBytes > byteLimit) consola.warn(`[AutoTruncate:OpenAI] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2457
2834
  return {
2458
2835
  payload: newPayload,
2459
2836
  wasCompacted: true,
@@ -2463,13 +2840,13 @@ async function autoCompact(payload, model, config = {}) {
2463
2840
  };
2464
2841
  }
2465
2842
  /**
2466
- * Create a marker to prepend to responses indicating auto-compaction occurred.
2843
+ * Create a marker to prepend to responses indicating auto-truncation occurred.
2467
2844
  */
2468
- function createCompactionMarker(result) {
2845
+ function createTruncationResponseMarkerOpenAI(result) {
2469
2846
  if (!result.wasCompacted) return "";
2470
2847
  const reduction = result.originalTokens - result.compactedTokens;
2471
2848
  const percentage = Math.round(reduction / result.originalTokens * 100);
2472
- return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2849
+ return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2473
2850
  }
2474
2851
 
2475
2852
  //#endregion
@@ -2489,7 +2866,7 @@ const createChatCompletions = async (payload) => {
2489
2866
  });
2490
2867
  if (!response.ok) {
2491
2868
  consola.error("Failed to create chat completions", response);
2492
- throw await HTTPError.fromResponse("Failed to create chat completions", response);
2869
+ throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
2493
2870
  }
2494
2871
  if (payload.stream) return events(response);
2495
2872
  return await response.json();
@@ -2539,6 +2916,18 @@ function failTracking(trackingId, error) {
2539
2916
  if (!trackingId) return;
2540
2917
  requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
2541
2918
  }
2919
+ /**
2920
+ * Create a marker to prepend to responses indicating auto-truncation occurred.
2921
+ * Works with both OpenAI and Anthropic truncate results.
2922
+ */
2923
+ function createTruncationMarker(result) {
2924
+ if (!result.wasCompacted) return "";
2925
+ const { originalTokens, compactedTokens, removedMessageCount } = result;
2926
+ if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
2927
+ const reduction = originalTokens - compactedTokens;
2928
+ const percentage = Math.round(reduction / originalTokens * 100);
2929
+ return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${percentage}% reduction)]`;
2930
+ }
2542
2931
  /** Record streaming error to history (works with any accumulator type) */
2543
2932
  function recordStreamError(opts) {
2544
2933
  const { acc, fallbackModel, ctx, error } = opts;
@@ -2557,37 +2946,37 @@ function recordStreamError(opts) {
2557
2946
  function isNonStreaming(response) {
2558
2947
  return Object.hasOwn(response, "choices");
2559
2948
  }
2560
- /** Build final payload with auto-compact if needed */
2949
+ /** Build final payload with auto-truncate if needed */
2561
2950
  async function buildFinalPayload(payload, model) {
2562
- if (!state.autoCompact || !model) {
2563
- if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
2951
+ if (!state.autoTruncate || !model) {
2952
+ if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
2564
2953
  return {
2565
2954
  finalPayload: payload,
2566
- compactResult: null
2955
+ truncateResult: null
2567
2956
  };
2568
2957
  }
2569
2958
  try {
2570
- const check = await checkNeedsCompaction(payload, model);
2571
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2959
+ const check = await checkNeedsCompactionOpenAI(payload, model);
2960
+ consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2572
2961
  if (!check.needed) return {
2573
2962
  finalPayload: payload,
2574
- compactResult: null
2963
+ truncateResult: null
2575
2964
  };
2576
2965
  let reasonText;
2577
2966
  if (check.reason === "both") reasonText = "tokens and size";
2578
2967
  else if (check.reason === "bytes") reasonText = "size";
2579
2968
  else reasonText = "tokens";
2580
- consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
2581
- const compactResult = await autoCompact(payload, model);
2969
+ consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
2970
+ const truncateResult = await autoTruncateOpenAI(payload, model);
2582
2971
  return {
2583
- finalPayload: compactResult.payload,
2584
- compactResult
2972
+ finalPayload: truncateResult.payload,
2973
+ truncateResult
2585
2974
  };
2586
2975
  } catch (error) {
2587
- consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2976
+ consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2588
2977
  return {
2589
2978
  finalPayload: payload,
2590
- compactResult: null
2979
+ truncateResult: null
2591
2980
  };
2592
2981
  }
2593
2982
  }
@@ -2631,7 +3020,7 @@ async function logPayloadSizeInfo(payload, model) {
2631
3020
  if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
2632
3021
  consola.info("");
2633
3022
  consola.info(" Suggestions:");
2634
- if (!state.autoCompact) consola.info(" • Enable --auto-compact to automatically truncate history");
3023
+ if (!state.autoTruncate) consola.info(" • Enable --auto-truncate to automatically truncate history");
2635
3024
  if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
2636
3025
  consola.info(" • Start a new conversation with /clear or /reset");
2637
3026
  consola.info(" • Reduce conversation history by deleting old messages");
@@ -2663,8 +3052,8 @@ async function handleCompletion$1(c) {
2663
3052
  };
2664
3053
  const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
2665
3054
  await logTokenCount(originalPayload, selectedModel);
2666
- const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
2667
- if (compactResult) ctx.compactResult = compactResult;
3055
+ const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
3056
+ if (truncateResult) ctx.truncateResult = truncateResult;
2668
3057
  const payload = isNullish(finalPayload.max_tokens) ? {
2669
3058
  ...finalPayload,
2670
3059
  max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
@@ -2717,8 +3106,8 @@ async function logTokenCount(payload, selectedModel) {
2717
3106
  function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2718
3107
  consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
2719
3108
  let response = originalResponse;
2720
- if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
2721
- const marker = createCompactionMarker(ctx.compactResult);
3109
+ if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
3110
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
2722
3111
  response = {
2723
3112
  ...response,
2724
3113
  choices: response.choices.map((choice$1, i) => i === 0 ? {
@@ -2786,8 +3175,8 @@ async function handleStreamingResponse$1(opts) {
2786
3175
  const { stream, response, payload, ctx } = opts;
2787
3176
  const acc = createStreamAccumulator();
2788
3177
  try {
2789
- if (ctx.compactResult?.wasCompacted) {
2790
- const marker = createCompactionMarker(ctx.compactResult);
3178
+ if (state.verbose && ctx.truncateResult?.wasCompacted) {
3179
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
2791
3180
  const markerChunk = {
2792
3181
  id: `compact-marker-${Date.now()}`,
2793
3182
  object: "chat.completion.chunk",
@@ -4070,53 +4459,624 @@ historyRoutes.get("/", (c) => {
4070
4459
  });
4071
4460
 
4072
4461
  //#endregion
4073
- //#region src/routes/messages/utils.ts
4074
- function mapOpenAIStopReasonToAnthropic(finishReason) {
4075
- if (finishReason === null) return null;
4076
- return {
4077
- stop: "end_turn",
4078
- length: "max_tokens",
4079
- tool_calls: "tool_use",
4080
- content_filter: "end_turn"
4081
- }[finishReason];
4082
- }
4083
-
4084
- //#endregion
4085
- //#region src/routes/messages/non-stream-translation.ts
4086
- const OPENAI_TOOL_NAME_LIMIT = 64;
4462
+ //#region src/lib/auto-truncate-anthropic.ts
4087
4463
  /**
4088
- * Ensure all tool_use blocks have corresponding tool_result responses.
4089
- * This handles edge cases where conversation history may be incomplete:
4090
- * - Session interruptions where tool execution was cut off
4091
- * - Previous request failures
4092
- * - Client sending truncated history
4093
- *
4094
- * Adding placeholder responses prevents API errors and maintains protocol compliance.
4464
+ * Convert Anthropic message content to text for token counting.
4095
4465
  */
4096
- function fixMessageSequence(messages) {
4097
- const fixedMessages = [];
4098
- for (let i = 0; i < messages.length; i++) {
4099
- const message = messages[i];
4100
- fixedMessages.push(message);
4101
- if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
4102
- const foundToolResponses = /* @__PURE__ */ new Set();
4103
- let j = i + 1;
4104
- while (j < messages.length && messages[j].role === "tool") {
4105
- const toolMessage = messages[j];
4106
- if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
4107
- j++;
4466
+ function contentToText(content) {
4467
+ if (typeof content === "string") return content;
4468
+ const parts = [];
4469
+ for (const block of content) switch (block.type) {
4470
+ case "text":
4471
+ parts.push(block.text);
4472
+ break;
4473
+ case "tool_use":
4474
+ parts.push(`[tool_use: ${block.name}]`, JSON.stringify(block.input));
4475
+ break;
4476
+ case "tool_result":
4477
+ if (typeof block.content === "string") parts.push(block.content);
4478
+ else if (Array.isArray(block.content)) {
4479
+ for (const inner of block.content) if (inner.type === "text") parts.push(inner.text);
4108
4480
  }
4109
- for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
4110
- consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
4111
- fixedMessages.push({
4112
- role: "tool",
4113
- tool_call_id: toolCall.id,
4114
- content: "Tool execution was interrupted or failed."
4481
+ break;
4482
+ case "thinking":
4483
+ parts.push(block.thinking);
4484
+ break;
4485
+ default: break;
4486
+ }
4487
+ return parts.join("\n");
4488
+ }
4489
+ /**
4490
+ * Estimate tokens for a message (fast, synchronous).
4491
+ * Uses ~4 chars per token approximation for internal calculations.
4492
+ * The final result is verified with the accurate tokenizer.
4493
+ */
4494
+ function estimateMessageTokens(msg) {
4495
+ const text = contentToText(msg.content);
4496
+ return Math.ceil(text.length / 4) + 4;
4497
+ }
4498
+ /**
4499
+ * Count tokens for an Anthropic message using the model's tokenizer.
4500
+ */
4501
+ async function countMessageTokens(msg, model) {
4502
+ const text = contentToText(msg.content);
4503
+ return await countTextTokens(text, model) + 4;
4504
+ }
4505
+ /**
4506
+ * Count tokens for system prompt.
4507
+ */
4508
+ async function countSystemTokens(system, model) {
4509
+ if (!system) return 0;
4510
+ if (typeof system === "string") return await countTextTokens(system, model) + 4;
4511
+ const text = system.map((block) => block.text).join("\n");
4512
+ return await countTextTokens(text, model) + 4;
4513
+ }
4514
+ /**
4515
+ * Count total tokens for the payload using the model's tokenizer.
4516
+ */
4517
+ async function countTotalTokens(payload, model) {
4518
+ let total = await countSystemTokens(payload.system, model);
4519
+ for (const msg of payload.messages) total += await countMessageTokens(msg, model);
4520
+ if (payload.tools) {
4521
+ const toolsText = JSON.stringify(payload.tools);
4522
+ total += await countTextTokens(toolsText, model);
4523
+ }
4524
+ return total;
4525
+ }
4526
+ function getMessageBytes(msg) {
4527
+ return JSON.stringify(msg).length;
4528
+ }
4529
+ /**
4530
+ * Get tool_use IDs from an assistant message.
4531
+ */
4532
+ function getToolUseIds(msg) {
4533
+ if (msg.role !== "assistant") return [];
4534
+ if (typeof msg.content === "string") return [];
4535
+ const ids = [];
4536
+ for (const block of msg.content) if (block.type === "tool_use") ids.push(block.id);
4537
+ return ids;
4538
+ }
4539
+ /**
4540
+ * Get tool_result IDs from a user message.
4541
+ */
4542
+ function getToolResultIds(msg) {
4543
+ if (msg.role !== "user") return [];
4544
+ if (typeof msg.content === "string") return [];
4545
+ const ids = [];
4546
+ for (const block of msg.content) if (block.type === "tool_result") ids.push(block.tool_use_id);
4547
+ return ids;
4548
+ }
4549
+ /**
4550
+ * Filter orphaned tool_result messages (those without matching tool_use).
4551
+ */
4552
+ function filterOrphanedToolResults(messages) {
4553
+ const toolUseIds = /* @__PURE__ */ new Set();
4554
+ for (const msg of messages) for (const id of getToolUseIds(msg)) toolUseIds.add(id);
4555
+ const result = [];
4556
+ let removedCount = 0;
4557
+ for (const msg of messages) {
4558
+ if (msg.role === "user" && typeof msg.content !== "string") {
4559
+ if (getToolResultIds(msg).some((id) => !toolUseIds.has(id))) {
4560
+ const filteredContent = msg.content.filter((block) => {
4561
+ if (block.type === "tool_result" && !toolUseIds.has(block.tool_use_id)) {
4562
+ removedCount++;
4563
+ return false;
4564
+ }
4565
+ return true;
4115
4566
  });
4567
+ if (filteredContent.length === 0) continue;
4568
+ result.push({
4569
+ ...msg,
4570
+ content: filteredContent
4571
+ });
4572
+ continue;
4116
4573
  }
4117
4574
  }
4575
+ result.push(msg);
4118
4576
  }
4119
- return fixedMessages;
4577
+ if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_result`);
4578
+ return result;
4579
+ }
4580
+ /**
4581
+ * Filter orphaned tool_use messages (those without matching tool_result).
4582
+ * In Anthropic API, every tool_use must have a corresponding tool_result.
4583
+ */
4584
+ function filterOrphanedToolUse(messages) {
4585
+ const toolResultIds = /* @__PURE__ */ new Set();
4586
+ for (const msg of messages) for (const id of getToolResultIds(msg)) toolResultIds.add(id);
4587
+ const result = [];
4588
+ let removedCount = 0;
4589
+ for (const msg of messages) {
4590
+ if (msg.role === "assistant" && typeof msg.content !== "string") {
4591
+ if (getToolUseIds(msg).some((id) => !toolResultIds.has(id))) {
4592
+ const filteredContent = msg.content.filter((block) => {
4593
+ if (block.type === "tool_use" && !toolResultIds.has(block.id)) {
4594
+ removedCount++;
4595
+ return false;
4596
+ }
4597
+ return true;
4598
+ });
4599
+ if (filteredContent.length === 0) continue;
4600
+ result.push({
4601
+ ...msg,
4602
+ content: filteredContent
4603
+ });
4604
+ continue;
4605
+ }
4606
+ }
4607
+ result.push(msg);
4608
+ }
4609
+ if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_use`);
4610
+ return result;
4611
+ }
4612
+ /**
4613
+ * Ensure messages start with a user message.
4614
+ */
4615
+ function ensureStartsWithUser(messages) {
4616
+ let startIndex = 0;
4617
+ while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
4618
+ if (startIndex > 0) consola.debug(`[AutoTruncate:Anthropic] Skipped ${startIndex} leading non-user messages`);
4619
+ return messages.slice(startIndex);
4620
+ }
4621
+ /** Threshold for large tool_result content (bytes) */
4622
+ const LARGE_TOOL_RESULT_THRESHOLD = 1e4;
4623
+ /** Maximum length for compressed tool_result summary */
4624
+ const COMPRESSED_SUMMARY_LENGTH = 500;
4625
+ /**
4626
+ * Compress a large tool_result content to a summary.
4627
+ * Keeps the first and last portions with a note about truncation.
4628
+ */
4629
+ function compressToolResultContent(content) {
4630
+ if (content.length <= LARGE_TOOL_RESULT_THRESHOLD) return content;
4631
+ const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH / 2);
4632
+ const start$1 = content.slice(0, halfLen);
4633
+ const end = content.slice(-halfLen);
4634
+ const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH;
4635
+ return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
4636
+ }
4637
+ /**
4638
+ * Compress a tool_result block in an Anthropic message.
4639
+ */
4640
+ function compressToolResultBlock(block) {
4641
+ if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) return {
4642
+ ...block,
4643
+ content: compressToolResultContent(block.content)
4644
+ };
4645
+ return block;
4646
+ }
4647
+ /**
4648
+ * Smart compression strategy:
4649
+ * 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
4650
+ * 2. Messages before that threshold get their tool_results compressed
4651
+ * 3. Returns compressed messages and stats
4652
+ *
4653
+ * @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
4654
+ */
4655
+ function smartCompressToolResults(messages, tokenLimit, byteLimit, preservePercent) {
4656
+ const n = messages.length;
4657
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
4658
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
4659
+ for (let i = n - 1; i >= 0; i--) {
4660
+ const msg = messages[i];
4661
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
4662
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
4663
+ }
4664
+ const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
4665
+ const preserveByteLimit = Math.floor(byteLimit * preservePercent);
4666
+ let thresholdIndex = n;
4667
+ for (let i = n - 1; i >= 0; i--) {
4668
+ if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
4669
+ thresholdIndex = i + 1;
4670
+ break;
4671
+ }
4672
+ thresholdIndex = i;
4673
+ }
4674
+ if (thresholdIndex >= n) return {
4675
+ messages,
4676
+ compressedCount: 0,
4677
+ compressThresholdIndex: n
4678
+ };
4679
+ const result = [];
4680
+ let compressedCount = 0;
4681
+ for (const [i, msg] of messages.entries()) {
4682
+ if (i < thresholdIndex && msg.role === "user" && Array.isArray(msg.content)) {
4683
+ if (msg.content.some((block) => block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD)) {
4684
+ const compressedContent = msg.content.map((block) => {
4685
+ if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) {
4686
+ compressedCount++;
4687
+ return compressToolResultBlock(block);
4688
+ }
4689
+ return block;
4690
+ });
4691
+ result.push({
4692
+ ...msg,
4693
+ content: compressedContent
4694
+ });
4695
+ continue;
4696
+ }
4697
+ }
4698
+ result.push(msg);
4699
+ }
4700
+ return {
4701
+ messages: result,
4702
+ compressedCount,
4703
+ compressThresholdIndex: thresholdIndex
4704
+ };
4705
+ }
4706
+ /** Default fallback for when model capabilities are not available */
4707
+ const DEFAULT_CONTEXT_WINDOW = 2e5;
4708
+ function calculateLimits(model, config) {
4709
+ const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW;
4710
+ const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
4711
+ const byteLimit = getEffectiveByteLimitBytes();
4712
+ return {
4713
+ tokenLimit,
4714
+ byteLimit
4715
+ };
4716
+ }
4717
+ function findOptimalPreserveIndex(params) {
4718
+ const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
4719
+ if (messages.length === 0) return 0;
4720
+ const markerBytes = 200;
4721
+ const availableTokens = tokenLimit - systemTokens - 50;
4722
+ const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
4723
+ if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
4724
+ const n = messages.length;
4725
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
4726
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
4727
+ for (let i = n - 1; i >= 0; i--) {
4728
+ const msg = messages[i];
4729
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
4730
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
4731
+ }
4732
+ let left = 0;
4733
+ let right = n;
4734
+ while (left < right) {
4735
+ const mid = left + right >>> 1;
4736
+ if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
4737
+ else left = mid + 1;
4738
+ }
4739
+ return left;
4740
+ }
4741
+ /**
4742
+ * Generate a summary of removed messages for context.
4743
+ * Extracts key information like tool calls and topics.
4744
+ */
4745
+ function generateRemovedMessagesSummary(removedMessages) {
4746
+ const toolCalls = [];
4747
+ let userMessageCount = 0;
4748
+ let assistantMessageCount = 0;
4749
+ for (const msg of removedMessages) {
4750
+ if (msg.role === "user") userMessageCount++;
4751
+ else assistantMessageCount++;
4752
+ if (Array.isArray(msg.content)) {
4753
+ for (const block of msg.content) if (block.type === "tool_use") toolCalls.push(block.name);
4754
+ }
4755
+ }
4756
+ const parts = [];
4757
+ if (userMessageCount > 0 || assistantMessageCount > 0) {
4758
+ const breakdown = [];
4759
+ if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
4760
+ if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
4761
+ parts.push(`Messages: ${breakdown.join(", ")}`);
4762
+ }
4763
+ if (toolCalls.length > 0) {
4764
+ const uniqueTools = [...new Set(toolCalls)];
4765
+ const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
4766
+ parts.push(`Tools used: ${displayTools.join(", ")}`);
4767
+ }
4768
+ return parts.join(". ");
4769
+ }
4770
+ /**
4771
+ * Add a compression notice to the system prompt.
4772
+ * Informs the model that some tool_result content has been compressed.
4773
+ */
4774
+ function addCompressionNotice(payload, compressedCount) {
4775
+ const notice = `[CONTEXT NOTE]\n${compressedCount} large tool_result blocks have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]\n\n`;
4776
+ let newSystem;
4777
+ if (typeof payload.system === "string") newSystem = notice + payload.system;
4778
+ else if (Array.isArray(payload.system)) newSystem = [{
4779
+ type: "text",
4780
+ text: notice
4781
+ }, ...payload.system];
4782
+ else newSystem = notice;
4783
+ return {
4784
+ ...payload,
4785
+ system: newSystem
4786
+ };
4787
+ }
4788
+ /**
4789
+ * Create truncation context to prepend to system prompt.
4790
+ */
4791
+ function createTruncationSystemContext(removedCount, compressedCount, summary) {
4792
+ let context = `[CONVERSATION CONTEXT]\n`;
4793
+ if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
4794
+ if (compressedCount > 0) context += `${compressedCount} large tool_result blocks have been compressed.\n`;
4795
+ if (summary) context += `Summary of removed content: ${summary}\n`;
4796
+ context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]\n\n";
4797
+ return context;
4798
+ }
4799
+ /**
4800
+ * Create a truncation marker message (fallback when no system prompt).
4801
+ */
4802
+ function createTruncationMarker$1(removedCount, compressedCount, summary) {
4803
+ const parts = [];
4804
+ if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
4805
+ if (compressedCount > 0) parts.push(`${compressedCount} tool_result blocks compressed`);
4806
+ let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
4807
+ if (summary) content += `\n[Summary: ${summary}]`;
4808
+ return {
4809
+ role: "user",
4810
+ content
4811
+ };
4812
+ }
4813
+ /**
4814
+ * Perform auto-truncation on an Anthropic payload that exceeds limits.
4815
+ */
4816
+ async function autoTruncateAnthropic(payload, model, config = {}) {
4817
+ const cfg = {
4818
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
4819
+ ...config
4820
+ };
4821
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
4822
+ const originalBytes = JSON.stringify(payload).length;
4823
+ const originalTokens = await countTotalTokens(payload, model);
4824
+ if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
4825
+ payload,
4826
+ wasCompacted: false,
4827
+ originalTokens,
4828
+ compactedTokens: originalTokens,
4829
+ removedMessageCount: 0
4830
+ };
4831
+ const exceedsTokens = originalTokens > tokenLimit;
4832
+ const exceedsBytes = originalBytes > byteLimit;
4833
+ let workingMessages = payload.messages;
4834
+ let compressedCount = 0;
4835
+ if (state.compressToolResults) {
4836
+ const compressionResult = smartCompressToolResults(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
4837
+ workingMessages = compressionResult.messages;
4838
+ compressedCount = compressionResult.compressedCount;
4839
+ const compressedPayload = {
4840
+ ...payload,
4841
+ messages: workingMessages
4842
+ };
4843
+ const compressedBytes = JSON.stringify(compressedPayload).length;
4844
+ const compressedTokens = await countTotalTokens(compressedPayload, model);
4845
+ if (compressedTokens <= tokenLimit && compressedBytes <= byteLimit) {
4846
+ let reason$1 = "tokens";
4847
+ if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
4848
+ else if (exceedsBytes) reason$1 = "size";
4849
+ consola.info(`[AutoTruncate:Anthropic] ${reason$1}: ${originalTokens}→${compressedTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
4850
+ const noticePayload = addCompressionNotice(compressedPayload, compressedCount);
4851
+ return {
4852
+ payload: noticePayload,
4853
+ wasCompacted: true,
4854
+ originalTokens,
4855
+ compactedTokens: await countTotalTokens(noticePayload, model),
4856
+ removedMessageCount: 0
4857
+ };
4858
+ }
4859
+ }
4860
+ const systemBytes = payload.system ? JSON.stringify(payload.system).length : 0;
4861
+ const systemTokens = await countSystemTokens(payload.system, model);
4862
+ const messagesJson = JSON.stringify(workingMessages);
4863
+ const payloadOverhead = JSON.stringify({
4864
+ ...payload,
4865
+ messages: workingMessages
4866
+ }).length - messagesJson.length;
4867
+ consola.debug(`[AutoTruncate:Anthropic] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${Math.round(systemBytes / 1024)}KB`);
4868
+ const preserveIndex = findOptimalPreserveIndex({
4869
+ messages: workingMessages,
4870
+ systemBytes,
4871
+ systemTokens,
4872
+ payloadOverhead,
4873
+ tokenLimit,
4874
+ byteLimit
4875
+ });
4876
+ if (preserveIndex === 0) {
4877
+ consola.warn("[AutoTruncate:Anthropic] Cannot truncate, system messages too large");
4878
+ return {
4879
+ payload,
4880
+ wasCompacted: false,
4881
+ originalTokens,
4882
+ compactedTokens: originalTokens,
4883
+ removedMessageCount: 0
4884
+ };
4885
+ }
4886
+ if (preserveIndex >= workingMessages.length) {
4887
+ consola.warn("[AutoTruncate:Anthropic] Would need to remove all messages");
4888
+ return {
4889
+ payload,
4890
+ wasCompacted: false,
4891
+ originalTokens,
4892
+ compactedTokens: originalTokens,
4893
+ removedMessageCount: 0
4894
+ };
4895
+ }
4896
+ let preserved = workingMessages.slice(preserveIndex);
4897
+ preserved = filterOrphanedToolResults(preserved);
4898
+ preserved = filterOrphanedToolUse(preserved);
4899
+ preserved = ensureStartsWithUser(preserved);
4900
+ preserved = filterOrphanedToolResults(preserved);
4901
+ preserved = filterOrphanedToolUse(preserved);
4902
+ if (preserved.length === 0) {
4903
+ consola.warn("[AutoTruncate:Anthropic] All messages filtered out after cleanup");
4904
+ return {
4905
+ payload,
4906
+ wasCompacted: false,
4907
+ originalTokens,
4908
+ compactedTokens: originalTokens,
4909
+ removedMessageCount: 0
4910
+ };
4911
+ }
4912
+ const removedMessages = payload.messages.slice(0, preserveIndex);
4913
+ const removedCount = workingMessages.length - preserved.length;
4914
+ const summary = generateRemovedMessagesSummary(removedMessages);
4915
+ let newSystem = payload.system;
4916
+ let newMessages = preserved;
4917
+ if (payload.system !== void 0) {
4918
+ const truncationContext = createTruncationSystemContext(removedCount, compressedCount, summary);
4919
+ if (typeof payload.system === "string") newSystem = truncationContext + payload.system;
4920
+ else if (Array.isArray(payload.system)) newSystem = [{
4921
+ type: "text",
4922
+ text: truncationContext
4923
+ }, ...payload.system];
4924
+ } else newMessages = [createTruncationMarker$1(removedCount, compressedCount, summary), ...preserved];
4925
+ const newPayload = {
4926
+ ...payload,
4927
+ system: newSystem,
4928
+ messages: newMessages
4929
+ };
4930
+ const newBytes = JSON.stringify(newPayload).length;
4931
+ const newTokens = await countTotalTokens(newPayload, model);
4932
+ let reason = "tokens";
4933
+ if (exceedsTokens && exceedsBytes) reason = "tokens+size";
4934
+ else if (exceedsBytes) reason = "size";
4935
+ const actions = [];
4936
+ if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
4937
+ if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
4938
+ const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
4939
+ consola.info(`[AutoTruncate:Anthropic] ${reason}: ${originalTokens}→${newTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
4940
+ if (newBytes > byteLimit || newTokens > tokenLimit) consola.warn(`[AutoTruncate:Anthropic] Result still over limit (${newTokens} tokens, ${Math.round(newBytes / 1024)}KB)`);
4941
+ return {
4942
+ payload: newPayload,
4943
+ wasCompacted: true,
4944
+ originalTokens,
4945
+ compactedTokens: newTokens,
4946
+ removedMessageCount: removedCount
4947
+ };
4948
+ }
4949
+ /**
4950
+ * Check if payload needs compaction.
4951
+ */
4952
+ async function checkNeedsCompactionAnthropic(payload, model, config = {}) {
4953
+ const cfg = {
4954
+ ...DEFAULT_AUTO_TRUNCATE_CONFIG,
4955
+ ...config
4956
+ };
4957
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
4958
+ const currentTokens = await countTotalTokens(payload, model);
4959
+ const currentBytes = JSON.stringify(payload).length;
4960
+ const exceedsTokens = currentTokens > tokenLimit;
4961
+ const exceedsBytes = currentBytes > byteLimit;
4962
+ let reason;
4963
+ if (exceedsTokens && exceedsBytes) reason = "both";
4964
+ else if (exceedsTokens) reason = "tokens";
4965
+ else if (exceedsBytes) reason = "bytes";
4966
+ return {
4967
+ needed: exceedsTokens || exceedsBytes,
4968
+ currentTokens,
4969
+ tokenLimit,
4970
+ currentBytes,
4971
+ byteLimit,
4972
+ reason
4973
+ };
4974
+ }
4975
+
4976
+ //#endregion
4977
+ //#region src/routes/messages/message-utils.ts
4978
+ function convertAnthropicMessages(messages) {
4979
+ return messages.map((msg) => {
4980
+ if (typeof msg.content === "string") return {
4981
+ role: msg.role,
4982
+ content: msg.content
4983
+ };
4984
+ const content = msg.content.map((block) => {
4985
+ if (block.type === "text") return {
4986
+ type: "text",
4987
+ text: block.text
4988
+ };
4989
+ if (block.type === "tool_use") return {
4990
+ type: "tool_use",
4991
+ id: block.id,
4992
+ name: block.name,
4993
+ input: JSON.stringify(block.input)
4994
+ };
4995
+ if (block.type === "tool_result") {
4996
+ const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
4997
+ return {
4998
+ type: "tool_result",
4999
+ tool_use_id: block.tool_use_id,
5000
+ content: resultContent
5001
+ };
5002
+ }
5003
+ return { type: block.type };
5004
+ });
5005
+ return {
5006
+ role: msg.role,
5007
+ content
5008
+ };
5009
+ });
5010
+ }
5011
+ function extractSystemPrompt(system) {
5012
+ if (!system) return void 0;
5013
+ if (typeof system === "string") return system;
5014
+ return system.map((block) => block.text).join("\n");
5015
+ }
5016
+ function extractToolCallsFromContent(content) {
5017
+ const tools = [];
5018
+ for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
5019
+ id: String(block.id),
5020
+ name: String(block.name),
5021
+ input: JSON.stringify(block.input)
5022
+ });
5023
+ return tools.length > 0 ? tools : void 0;
5024
+ }
5025
+ function extractToolCallsFromAnthropicContent(content) {
5026
+ const tools = [];
5027
+ for (const block of content) if (block.type === "tool_use") tools.push({
5028
+ id: block.id,
5029
+ name: block.name,
5030
+ input: JSON.stringify(block.input)
5031
+ });
5032
+ return tools.length > 0 ? tools : void 0;
5033
+ }
5034
+ function mapOpenAIStopReasonToAnthropic(finishReason) {
5035
+ if (finishReason === null) return null;
5036
+ return {
5037
+ stop: "end_turn",
5038
+ length: "max_tokens",
5039
+ tool_calls: "tool_use",
5040
+ content_filter: "end_turn"
5041
+ }[finishReason];
5042
+ }
5043
+
5044
+ //#endregion
5045
+ //#region src/routes/messages/non-stream-translation.ts
5046
+ const OPENAI_TOOL_NAME_LIMIT = 64;
5047
+ /**
5048
+ * Ensure all tool_use blocks have corresponding tool_result responses.
5049
+ * This handles edge cases where conversation history may be incomplete:
5050
+ * - Session interruptions where tool execution was cut off
5051
+ * - Previous request failures
5052
+ * - Client sending truncated history
5053
+ *
5054
+ * Adding placeholder responses prevents API errors and maintains protocol compliance.
5055
+ */
5056
+ function fixMessageSequence(messages) {
5057
+ const fixedMessages = [];
5058
+ for (let i = 0; i < messages.length; i++) {
5059
+ const message = messages[i];
5060
+ fixedMessages.push(message);
5061
+ if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
5062
+ const foundToolResponses = /* @__PURE__ */ new Set();
5063
+ let j = i + 1;
5064
+ while (j < messages.length && messages[j].role === "tool") {
5065
+ const toolMessage = messages[j];
5066
+ if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
5067
+ j++;
5068
+ }
5069
+ for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
5070
+ consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
5071
+ fixedMessages.push({
5072
+ role: "tool",
5073
+ tool_call_id: toolCall.id,
5074
+ content: "Tool execution was interrupted or failed."
5075
+ });
5076
+ }
5077
+ }
5078
+ }
5079
+ return fixedMessages;
4120
5080
  }
4121
5081
  function translateToOpenAI(payload) {
4122
5082
  const toolNameMapping = {
@@ -4140,19 +5100,50 @@ function translateToOpenAI(payload) {
4140
5100
  toolNameMapping
4141
5101
  };
4142
5102
  }
5103
+ /**
5104
+ * Find the latest available model matching a family prefix.
5105
+ * Searches state.models for models starting with the given prefix
5106
+ * and returns the one with the highest version number.
5107
+ *
5108
+ * @param familyPrefix - e.g., "claude-opus", "claude-sonnet", "claude-haiku"
5109
+ * @param fallback - fallback model ID if no match found
5110
+ */
5111
+ function findLatestModel(familyPrefix, fallback) {
5112
+ const models = state.models?.data;
5113
+ if (!models || models.length === 0) return fallback;
5114
+ const candidates = models.filter((m) => m.id.startsWith(familyPrefix));
5115
+ if (candidates.length === 0) return fallback;
5116
+ candidates.sort((a, b) => {
5117
+ const versionA = extractVersion(a.id, familyPrefix);
5118
+ return extractVersion(b.id, familyPrefix) - versionA;
5119
+ });
5120
+ return candidates[0].id;
5121
+ }
5122
+ /**
5123
+ * Extract numeric version from model ID.
5124
+ * e.g., "claude-opus-4.5" with prefix "claude-opus" -> 4.5
5125
+ */
5126
+ function extractVersion(modelId, prefix) {
5127
+ const match = modelId.slice(prefix.length + 1).match(/^(\d+(?:\.\d+)?)/);
5128
+ return match ? Number.parseFloat(match[1]) : 0;
5129
+ }
4143
5130
  function translateModelName(model) {
4144
- const shortNameMap = {
4145
- opus: "claude-opus-4.5",
4146
- sonnet: "claude-sonnet-4.5",
4147
- haiku: "claude-haiku-4.5"
5131
+ const aliasMap = {
5132
+ opus: "claude-opus",
5133
+ sonnet: "claude-sonnet",
5134
+ haiku: "claude-haiku"
4148
5135
  };
4149
- if (shortNameMap[model]) return shortNameMap[model];
5136
+ if (aliasMap[model]) {
5137
+ const familyPrefix = aliasMap[model];
5138
+ const fallback = `${familyPrefix}-4.5`;
5139
+ return findLatestModel(familyPrefix, fallback);
5140
+ }
4150
5141
  if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
4151
5142
  if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
4152
5143
  if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
4153
- if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
5144
+ if (/^claude-opus-4-\d+$/.test(model)) return findLatestModel("claude-opus", "claude-opus-4.5");
4154
5145
  if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
4155
- if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
5146
+ if (/^claude-haiku-3-5-\d+$/.test(model)) return findLatestModel("claude-haiku", "claude-haiku-4.5");
4156
5147
  return model;
4157
5148
  }
4158
5149
  function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
@@ -4160,7 +5151,7 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
4160
5151
  const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
4161
5152
  return [...systemMessages, ...otherMessages];
4162
5153
  }
4163
- const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
5154
+ const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
4164
5155
  /**
4165
5156
  * Filter out reserved keywords from system prompt text.
4166
5157
  * Copilot API rejects requests containing these keywords.
@@ -4284,7 +5275,7 @@ function translateAnthropicToolsToOpenAI(anthropicTools, toolNameMapping) {
4284
5275
  function: {
4285
5276
  name: getTruncatedToolName(tool.name, toolNameMapping),
4286
5277
  description: tool.description,
4287
- parameters: tool.input_schema
5278
+ parameters: tool.input_schema ?? {}
4288
5279
  }
4289
5280
  }));
4290
5281
  }
@@ -4385,7 +5376,13 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
4385
5376
  //#endregion
4386
5377
  //#region src/routes/messages/count-tokens-handler.ts
4387
5378
  /**
4388
- * Handles token counting for Anthropic messages
5379
+ * Handles token counting for Anthropic messages.
5380
+ *
5381
+ * For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
5382
+ * For other models, uses GPT tokenizers with appropriate buffers.
5383
+ *
5384
+ * When auto-truncate is enabled and the request would exceed limits,
5385
+ * returns an inflated token count to trigger Claude Code's auto-compact mechanism.
4389
5386
  */
4390
5387
  async function handleCountTokens(c) {
4391
5388
  try {
@@ -4397,6 +5394,16 @@ async function handleCountTokens(c) {
4397
5394
  consola.warn("Model not found, returning default token count");
4398
5395
  return c.json({ input_tokens: 1 });
4399
5396
  }
5397
+ if (state.autoTruncate) {
5398
+ const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
5399
+ if (truncateCheck.needed) {
5400
+ const contextWindow = selectedModel.capabilities?.limits?.max_context_window_tokens ?? 2e5;
5401
+ const inflatedTokens = Math.floor(contextWindow * .95);
5402
+ consola.debug(`[count_tokens] Would trigger auto-truncate: ${truncateCheck.currentTokens} tokens > ${truncateCheck.tokenLimit}, returning inflated count: ${inflatedTokens}`);
5403
+ return c.json({ input_tokens: inflatedTokens });
5404
+ }
5405
+ }
5406
+ const tokenizerName = selectedModel.capabilities?.tokenizer ?? "o200k_base";
4400
5407
  const tokenCount = await getTokenCount(openAIPayload, selectedModel);
4401
5408
  if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
4402
5409
  let mcpToolExist = false;
@@ -4407,9 +5414,8 @@ async function handleCountTokens(c) {
4407
5414
  }
4408
5415
  }
4409
5416
  let finalTokenCount = tokenCount.input + tokenCount.output;
4410
- if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
4411
- else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
4412
- consola.debug("Token count:", finalTokenCount);
5417
+ if (!(selectedModel.vendor === "Anthropic")) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
5418
+ consola.debug(`Token count: ${finalTokenCount} (tokenizer: ${tokenizerName})`);
4413
5419
  return c.json({ input_tokens: finalTokenCount });
4414
5420
  } catch (error) {
4415
5421
  consola.error("Error counting tokens:", error);
@@ -4417,6 +5423,262 @@ async function handleCountTokens(c) {
4417
5423
  }
4418
5424
  }
4419
5425
 
5426
+ //#endregion
5427
+ //#region src/services/copilot/create-anthropic-messages.ts
5428
+ /**
5429
+ * Fields that are supported by Copilot's Anthropic API endpoint.
5430
+ * Any other fields in the incoming request will be stripped.
5431
+ */
5432
+ const COPILOT_SUPPORTED_FIELDS = new Set([
5433
+ "model",
5434
+ "messages",
5435
+ "max_tokens",
5436
+ "system",
5437
+ "metadata",
5438
+ "stop_sequences",
5439
+ "stream",
5440
+ "temperature",
5441
+ "top_p",
5442
+ "top_k",
5443
+ "tools",
5444
+ "tool_choice",
5445
+ "thinking",
5446
+ "service_tier"
5447
+ ]);
5448
+ /**
5449
+ * Filter payload to only include fields supported by Copilot's Anthropic API.
5450
+ * This prevents errors like "Extra inputs are not permitted" for unsupported
5451
+ * fields like `output_config`.
5452
+ *
5453
+ * Also converts server-side tools (web_search, etc.) to custom tools.
5454
+ */
5455
+ function filterPayloadForCopilot(payload) {
5456
+ const filtered = {};
5457
+ const unsupportedFields = [];
5458
+ for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
5459
+ else unsupportedFields.push(key);
5460
+ if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
5461
+ if (filtered.tools) filtered.tools = convertServerToolsToCustom(filtered.tools);
5462
+ return filtered;
5463
+ }
5464
+ /**
5465
+ * Adjust max_tokens if thinking is enabled.
5466
+ * According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
5467
+ * max_tokens = thinking_budget + response_tokens
5468
+ */
5469
+ function adjustMaxTokensForThinking(payload) {
5470
+ const thinking = payload.thinking;
5471
+ if (!thinking) return payload;
5472
+ const budgetTokens = thinking.budget_tokens;
5473
+ if (!budgetTokens) return payload;
5474
+ if (payload.max_tokens <= budgetTokens) {
5475
+ const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
5476
+ consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
5477
+ return {
5478
+ ...payload,
5479
+ max_tokens: newMaxTokens
5480
+ };
5481
+ }
5482
+ return payload;
5483
+ }
5484
+ /**
5485
+ * Create messages using Anthropic-style API directly.
5486
+ * This bypasses the OpenAI translation layer for Anthropic models.
5487
+ */
5488
+ async function createAnthropicMessages(payload) {
5489
+ if (!state.copilotToken) throw new Error("Copilot token not found");
5490
+ let filteredPayload = filterPayloadForCopilot(payload);
5491
+ filteredPayload = adjustMaxTokensForThinking(filteredPayload);
5492
+ const enableVision = filteredPayload.messages.some((msg) => {
5493
+ if (typeof msg.content === "string") return false;
5494
+ return msg.content.some((block) => block.type === "image");
5495
+ });
5496
+ const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
5497
+ const headers = {
5498
+ ...copilotHeaders(state, enableVision),
5499
+ "X-Initiator": isAgentCall ? "agent" : "user",
5500
+ "anthropic-version": "2023-06-01"
5501
+ };
5502
+ consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
5503
+ const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
5504
+ method: "POST",
5505
+ headers,
5506
+ body: JSON.stringify(filteredPayload)
5507
+ });
5508
+ if (!response.ok) {
5509
+ consola.debug("Request failed:", {
5510
+ model: filteredPayload.model,
5511
+ max_tokens: filteredPayload.max_tokens,
5512
+ stream: filteredPayload.stream,
5513
+ tools: filteredPayload.tools?.map((t) => ({
5514
+ name: t.name,
5515
+ type: t.type
5516
+ })),
5517
+ thinking: filteredPayload.thinking,
5518
+ messageCount: filteredPayload.messages.length
5519
+ });
5520
+ throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, filteredPayload.model);
5521
+ }
5522
+ if (payload.stream) return events(response);
5523
+ return await response.json();
5524
+ }
5525
+ const SERVER_TOOL_CONFIGS = {
5526
+ web_search: {
5527
+ description: "Search the web for current information. Returns web search results that can help answer questions about recent events, current data, or information that may have changed since your knowledge cutoff.",
5528
+ input_schema: {
5529
+ type: "object",
5530
+ properties: { query: {
5531
+ type: "string",
5532
+ description: "The search query"
5533
+ } },
5534
+ required: ["query"]
5535
+ }
5536
+ },
5537
+ web_fetch: {
5538
+ description: "Fetch content from a URL. NOTE: This is a client-side tool - the client must fetch the URL and return the content.",
5539
+ input_schema: {
5540
+ type: "object",
5541
+ properties: { url: {
5542
+ type: "string",
5543
+ description: "The URL to fetch"
5544
+ } },
5545
+ required: ["url"]
5546
+ }
5547
+ },
5548
+ code_execution: {
5549
+ description: "Execute code in a sandbox. NOTE: This is a client-side tool - the client must execute the code.",
5550
+ input_schema: {
5551
+ type: "object",
5552
+ properties: {
5553
+ code: {
5554
+ type: "string",
5555
+ description: "The code to execute"
5556
+ },
5557
+ language: {
5558
+ type: "string",
5559
+ description: "The programming language"
5560
+ }
5561
+ },
5562
+ required: ["code"]
5563
+ }
5564
+ },
5565
+ computer: {
5566
+ description: "Control computer desktop. NOTE: This is a client-side tool - the client must handle computer control.",
5567
+ input_schema: {
5568
+ type: "object",
5569
+ properties: { action: {
5570
+ type: "string",
5571
+ description: "The action to perform"
5572
+ } },
5573
+ required: ["action"]
5574
+ }
5575
+ }
5576
+ };
5577
+ /**
5578
+ * Check if a tool is a server-side tool that needs conversion.
5579
+ */
5580
+ function getServerToolPrefix(tool) {
5581
+ if (tool.type) {
5582
+ for (const prefix of Object.keys(SERVER_TOOL_CONFIGS)) if (tool.type.startsWith(prefix)) return prefix;
5583
+ }
5584
+ return null;
5585
+ }
5586
+ /**
5587
+ * Convert server-side tools to custom tools, or pass them through unchanged.
5588
+ * This allows them to be passed to the API and handled by the client.
5589
+ *
5590
+ * Note: Server-side tools are only converted if state.rewriteAnthropicTools is enabled.
5591
+ */
5592
+ function convertServerToolsToCustom(tools) {
5593
+ if (!tools) return;
5594
+ const result = [];
5595
+ for (const tool of tools) {
5596
+ const serverToolPrefix = getServerToolPrefix(tool);
5597
+ if (serverToolPrefix) {
5598
+ const config = SERVER_TOOL_CONFIGS[serverToolPrefix];
5599
+ if (!state.rewriteAnthropicTools) {
5600
+ consola.debug(`[DirectAnthropic] Passing ${serverToolPrefix} through unchanged (use --rewrite-anthropic-tools to convert)`);
5601
+ result.push(tool);
5602
+ continue;
5603
+ }
5604
+ if (config.remove) {
5605
+ consola.warn(`[DirectAnthropic] Removing unsupported server tool: ${tool.name}. Reason: ${config.removalReason}`);
5606
+ continue;
5607
+ }
5608
+ consola.debug(`[DirectAnthropic] Converting server tool to custom: ${tool.name} (type: ${tool.type})`);
5609
+ result.push({
5610
+ name: tool.name,
5611
+ description: config.description,
5612
+ input_schema: config.input_schema
5613
+ });
5614
+ } else result.push(tool);
5615
+ }
5616
+ return result.length > 0 ? result : void 0;
5617
+ }
5618
+ /**
5619
+ * Check if a model supports direct Anthropic API.
5620
+ * Returns true if redirect is disabled (direct API is on) and the model is from Anthropic vendor.
5621
+ */
5622
+ function supportsDirectAnthropicApi(modelId) {
5623
+ if (state.redirectAnthropic) return false;
5624
+ return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
5625
+ }
5626
+
5627
+ //#endregion
5628
+ //#region src/routes/messages/stream-accumulator.ts
5629
+ function createAnthropicStreamAccumulator() {
5630
+ return {
5631
+ model: "",
5632
+ inputTokens: 0,
5633
+ outputTokens: 0,
5634
+ stopReason: "",
5635
+ content: "",
5636
+ toolCalls: [],
5637
+ currentToolCall: null
5638
+ };
5639
+ }
5640
+ function processAnthropicEvent(event, acc) {
5641
+ switch (event.type) {
5642
+ case "content_block_delta":
5643
+ handleContentBlockDelta(event.delta, acc);
5644
+ break;
5645
+ case "content_block_start":
5646
+ handleContentBlockStart(event.content_block, acc);
5647
+ break;
5648
+ case "content_block_stop":
5649
+ handleContentBlockStop(acc);
5650
+ break;
5651
+ case "message_delta":
5652
+ handleMessageDelta(event.delta, event.usage, acc);
5653
+ break;
5654
+ default: break;
5655
+ }
5656
+ }
5657
+ function handleContentBlockDelta(delta, acc) {
5658
+ if (delta.type === "text_delta") acc.content += delta.text;
5659
+ else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
5660
+ }
5661
+ function handleContentBlockStart(block, acc) {
5662
+ if (block.type === "tool_use") acc.currentToolCall = {
5663
+ id: block.id,
5664
+ name: block.name,
5665
+ input: ""
5666
+ };
5667
+ }
5668
+ function handleContentBlockStop(acc) {
5669
+ if (acc.currentToolCall) {
5670
+ acc.toolCalls.push(acc.currentToolCall);
5671
+ acc.currentToolCall = null;
5672
+ }
5673
+ }
5674
+ function handleMessageDelta(delta, usage, acc) {
5675
+ if (delta.stop_reason) acc.stopReason = delta.stop_reason;
5676
+ if (usage) {
5677
+ acc.inputTokens = usage.input_tokens ?? 0;
5678
+ acc.outputTokens = usage.output_tokens;
5679
+ }
5680
+ }
5681
+
4420
5682
  //#endregion
4421
5683
  //#region src/routes/messages/stream-translation.ts
4422
5684
  function isToolBlockOpen(state$1) {
@@ -4522,68 +5784,244 @@ function translateChunkToAnthropicEvents(chunk, state$1, toolNameMapping) {
4522
5784
  });
4523
5785
  }
4524
5786
  }
4525
- if (choice.finish_reason) {
4526
- if (state$1.contentBlockOpen) {
4527
- events$1.push({
4528
- type: "content_block_stop",
4529
- index: state$1.contentBlockIndex
5787
+ if (choice.finish_reason) {
5788
+ if (state$1.contentBlockOpen) {
5789
+ events$1.push({
5790
+ type: "content_block_stop",
5791
+ index: state$1.contentBlockIndex
5792
+ });
5793
+ state$1.contentBlockOpen = false;
5794
+ }
5795
+ events$1.push({
5796
+ type: "message_delta",
5797
+ delta: {
5798
+ stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
5799
+ stop_sequence: null
5800
+ },
5801
+ usage: {
5802
+ input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
5803
+ output_tokens: chunk.usage?.completion_tokens ?? 0,
5804
+ ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
5805
+ }
5806
+ }, { type: "message_stop" });
5807
+ }
5808
+ return events$1;
5809
+ }
5810
+ function translateErrorToAnthropicErrorEvent() {
5811
+ return {
5812
+ type: "error",
5813
+ error: {
5814
+ type: "api_error",
5815
+ message: "An unexpected error occurred during streaming."
5816
+ }
5817
+ };
5818
+ }
5819
+
5820
+ //#endregion
5821
+ //#region src/routes/messages/direct-anthropic-handler.ts
5822
+ /**
5823
+ * Handle completion using direct Anthropic API (no translation needed)
5824
+ */
5825
+ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
5826
+ consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
5827
+ const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
5828
+ let effectivePayload = anthropicPayload;
5829
+ let truncateResult;
5830
+ if (state.autoTruncate && selectedModel) {
5831
+ const check = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
5832
+ consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
5833
+ if (check.needed) try {
5834
+ truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel);
5835
+ if (truncateResult.wasCompacted) effectivePayload = truncateResult.payload;
5836
+ } catch (error) {
5837
+ consola.warn("[Anthropic] Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
5838
+ }
5839
+ } else if (state.autoTruncate && !selectedModel) consola.debug(`[Anthropic] Model '${anthropicPayload.model}' not found, skipping auto-truncate`);
5840
+ if (state.manualApprove) await awaitApproval();
5841
+ try {
5842
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
5843
+ ctx.queueWaitMs = queueWaitMs;
5844
+ if (Symbol.asyncIterator in response) {
5845
+ consola.debug("Streaming response from Copilot (direct Anthropic)");
5846
+ updateTrackerStatus(ctx.trackingId, "streaming");
5847
+ return streamSSE(c, async (stream) => {
5848
+ await handleDirectAnthropicStreamingResponse({
5849
+ stream,
5850
+ response,
5851
+ anthropicPayload: effectivePayload,
5852
+ ctx
5853
+ });
5854
+ });
5855
+ }
5856
+ return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
5857
+ } catch (error) {
5858
+ if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
5859
+ recordErrorResponse(ctx, anthropicPayload.model, error);
5860
+ throw error;
5861
+ }
5862
+ }
5863
+ /**
5864
+ * Log payload size info for debugging 413 errors
5865
+ */
5866
+ function logPayloadSizeInfoAnthropic(payload, model) {
5867
+ const payloadSize = JSON.stringify(payload).length;
5868
+ const messageCount = payload.messages.length;
5869
+ const toolCount = payload.tools?.length ?? 0;
5870
+ const systemSize = payload.system ? JSON.stringify(payload.system).length : 0;
5871
+ consola.info(`[Anthropic 413] Payload size: ${Math.round(payloadSize / 1024)}KB, messages: ${messageCount}, tools: ${toolCount}, system: ${Math.round(systemSize / 1024)}KB`);
5872
+ if (model?.capabilities?.limits) {
5873
+ const limits = model.capabilities.limits;
5874
+ consola.info(`[Anthropic 413] Model limits: context=${limits.max_context_window_tokens}, prompt=${limits.max_prompt_tokens}, output=${limits.max_output_tokens}`);
5875
+ }
5876
+ if (!state.autoTruncate) consola.info("[Anthropic 413] Consider enabling --auto-truncate to automatically reduce payload size");
5877
+ }
5878
+ /**
5879
+ * Handle non-streaming direct Anthropic response
5880
+ */
5881
+ function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
5882
+ consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
5883
+ recordResponse(ctx.historyId, {
5884
+ success: true,
5885
+ model: response.model,
5886
+ usage: response.usage,
5887
+ stop_reason: response.stop_reason ?? void 0,
5888
+ content: {
5889
+ role: "assistant",
5890
+ content: response.content.map((block) => {
5891
+ switch (block.type) {
5892
+ case "text": return {
5893
+ type: "text",
5894
+ text: block.text
5895
+ };
5896
+ case "tool_use": return {
5897
+ type: "tool_use",
5898
+ id: block.id,
5899
+ name: block.name,
5900
+ input: JSON.stringify(block.input)
5901
+ };
5902
+ case "thinking": return {
5903
+ type: "thinking",
5904
+ thinking: block.thinking
5905
+ };
5906
+ default: return { type: block.type };
5907
+ }
5908
+ })
5909
+ },
5910
+ toolCalls: extractToolCallsFromAnthropicContent(response.content)
5911
+ }, Date.now() - ctx.startTime);
5912
+ if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
5913
+ inputTokens: response.usage.input_tokens,
5914
+ outputTokens: response.usage.output_tokens,
5915
+ queueWaitMs: ctx.queueWaitMs
5916
+ });
5917
+ let finalResponse = response;
5918
+ if (state.verbose && truncateResult?.wasCompacted) {
5919
+ const marker = createTruncationMarker(truncateResult);
5920
+ finalResponse = prependMarkerToAnthropicResponse$1(response, marker);
5921
+ }
5922
+ return c.json(finalResponse);
5923
+ }
5924
+ /**
5925
+ * Prepend marker to Anthropic response content (at the beginning of first text block)
5926
+ */
5927
+ function prependMarkerToAnthropicResponse$1(response, marker) {
5928
+ if (!marker) return response;
5929
+ const content = [...response.content];
5930
+ const firstTextIndex = content.findIndex((block) => block.type === "text");
5931
+ if (firstTextIndex !== -1) {
5932
+ const textBlock = content[firstTextIndex];
5933
+ if (textBlock.type === "text") content[firstTextIndex] = {
5934
+ ...textBlock,
5935
+ text: marker + textBlock.text
5936
+ };
5937
+ } else content.unshift({
5938
+ type: "text",
5939
+ text: marker
5940
+ });
5941
+ return {
5942
+ ...response,
5943
+ content
5944
+ };
5945
+ }
5946
+ /**
5947
+ * Handle streaming direct Anthropic response (passthrough SSE events)
5948
+ */
5949
+ async function handleDirectAnthropicStreamingResponse(opts) {
5950
+ const { stream, response, anthropicPayload, ctx } = opts;
5951
+ const acc = createAnthropicStreamAccumulator();
5952
+ try {
5953
+ for await (const rawEvent of response) {
5954
+ consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
5955
+ if (rawEvent.data === "[DONE]") break;
5956
+ if (!rawEvent.data) continue;
5957
+ let event;
5958
+ try {
5959
+ event = JSON.parse(rawEvent.data);
5960
+ } catch (parseError) {
5961
+ consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
5962
+ continue;
5963
+ }
5964
+ processAnthropicEvent(event, acc);
5965
+ await stream.writeSSE({
5966
+ event: rawEvent.event || event.type,
5967
+ data: rawEvent.data
4530
5968
  });
4531
- state$1.contentBlockOpen = false;
4532
5969
  }
4533
- events$1.push({
4534
- type: "message_delta",
4535
- delta: {
4536
- stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
4537
- stop_sequence: null
4538
- },
4539
- usage: {
4540
- input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
4541
- output_tokens: chunk.usage?.completion_tokens ?? 0,
4542
- ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
4543
- }
4544
- }, { type: "message_stop" });
5970
+ recordStreamingResponse$1(acc, anthropicPayload.model, ctx);
5971
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
5972
+ } catch (error) {
5973
+ consola.error("Direct Anthropic stream error:", error);
5974
+ recordStreamError({
5975
+ acc,
5976
+ fallbackModel: anthropicPayload.model,
5977
+ ctx,
5978
+ error
5979
+ });
5980
+ failTracking(ctx.trackingId, error);
5981
+ const errorEvent = translateErrorToAnthropicErrorEvent();
5982
+ await stream.writeSSE({
5983
+ event: errorEvent.type,
5984
+ data: JSON.stringify(errorEvent)
5985
+ });
4545
5986
  }
4546
- return events$1;
4547
5987
  }
4548
- function translateErrorToAnthropicErrorEvent() {
4549
- return {
4550
- type: "error",
4551
- error: {
4552
- type: "api_error",
4553
- message: "An unexpected error occurred during streaming."
4554
- }
4555
- };
5988
+ function recordStreamingResponse$1(acc, fallbackModel, ctx) {
5989
+ const contentBlocks = [];
5990
+ if (acc.content) contentBlocks.push({
5991
+ type: "text",
5992
+ text: acc.content
5993
+ });
5994
+ for (const tc of acc.toolCalls) contentBlocks.push({
5995
+ type: "tool_use",
5996
+ ...tc
5997
+ });
5998
+ recordResponse(ctx.historyId, {
5999
+ success: true,
6000
+ model: acc.model || fallbackModel,
6001
+ usage: {
6002
+ input_tokens: acc.inputTokens,
6003
+ output_tokens: acc.outputTokens
6004
+ },
6005
+ stop_reason: acc.stopReason || void 0,
6006
+ content: contentBlocks.length > 0 ? {
6007
+ role: "assistant",
6008
+ content: contentBlocks
6009
+ } : null,
6010
+ toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
6011
+ }, Date.now() - ctx.startTime);
4556
6012
  }
4557
6013
 
4558
6014
  //#endregion
4559
- //#region src/routes/messages/handler.ts
4560
- async function handleCompletion(c) {
4561
- const anthropicPayload = await c.req.json();
4562
- consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
4563
- const trackingId = c.get("trackingId");
4564
- const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
4565
- updateTrackerModel(trackingId, anthropicPayload.model);
4566
- const ctx = {
4567
- historyId: recordRequest("anthropic", {
4568
- model: anthropicPayload.model,
4569
- messages: convertAnthropicMessages(anthropicPayload.messages),
4570
- stream: anthropicPayload.stream ?? false,
4571
- tools: anthropicPayload.tools?.map((t) => ({
4572
- name: t.name,
4573
- description: t.description
4574
- })),
4575
- max_tokens: anthropicPayload.max_tokens,
4576
- temperature: anthropicPayload.temperature,
4577
- system: extractSystemPrompt(anthropicPayload.system)
4578
- }),
4579
- trackingId,
4580
- startTime
4581
- };
6015
+ //#region src/routes/messages/translated-handler.ts
6016
+ /**
6017
+ * Handle completion using OpenAI translation path (legacy)
6018
+ */
6019
+ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
4582
6020
  const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
4583
6021
  consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
4584
6022
  const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
4585
- const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
4586
- if (compactResult) ctx.compactResult = compactResult;
6023
+ const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
6024
+ if (truncateResult) ctx.truncateResult = truncateResult;
4587
6025
  if (state.manualApprove) await awaitApproval();
4588
6026
  try {
4589
6027
  const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
@@ -4595,7 +6033,7 @@ async function handleCompletion(c) {
4595
6033
  ctx
4596
6034
  });
4597
6035
  consola.debug("Streaming response from Copilot");
4598
- updateTrackerStatus(trackingId, "streaming");
6036
+ updateTrackerStatus(ctx.trackingId, "streaming");
4599
6037
  return streamSSE(c, async (stream) => {
4600
6038
  await handleStreamingResponse({
4601
6039
  stream,
@@ -4616,8 +6054,8 @@ function handleNonStreamingResponse(opts) {
4616
6054
  consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
4617
6055
  let anthropicResponse = translateToAnthropic(response, toolNameMapping);
4618
6056
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4619
- if (ctx.compactResult?.wasCompacted) {
4620
- const marker = createCompactionMarker(ctx.compactResult);
6057
+ if (state.verbose && ctx.truncateResult?.wasCompacted) {
6058
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
4621
6059
  anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
4622
6060
  }
4623
6061
  recordResponse(ctx.historyId, {
@@ -4668,17 +6106,6 @@ function prependMarkerToAnthropicResponse(response, marker) {
4668
6106
  content
4669
6107
  };
4670
6108
  }
4671
- function createAnthropicStreamAccumulator() {
4672
- return {
4673
- model: "",
4674
- inputTokens: 0,
4675
- outputTokens: 0,
4676
- stopReason: "",
4677
- content: "",
4678
- toolCalls: [],
4679
- currentToolCall: null
4680
- };
4681
- }
4682
6109
  async function handleStreamingResponse(opts) {
4683
6110
  const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
4684
6111
  const streamState = {
@@ -4689,9 +6116,9 @@ async function handleStreamingResponse(opts) {
4689
6116
  };
4690
6117
  const acc = createAnthropicStreamAccumulator();
4691
6118
  try {
4692
- if (ctx.compactResult?.wasCompacted) {
4693
- const marker = createCompactionMarker(ctx.compactResult);
4694
- await sendCompactionMarkerEvent(stream, streamState, marker);
6119
+ if (ctx.truncateResult?.wasCompacted) {
6120
+ const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
6121
+ await sendTruncationMarkerEvent(stream, streamState, marker);
4695
6122
  acc.content += marker;
4696
6123
  }
4697
6124
  await processStreamChunks({
@@ -4719,7 +6146,7 @@ async function handleStreamingResponse(opts) {
4719
6146
  });
4720
6147
  }
4721
6148
  }
4722
- async function sendCompactionMarkerEvent(stream, streamState, marker) {
6149
+ async function sendTruncationMarkerEvent(stream, streamState, marker) {
4723
6150
  const blockStartEvent = {
4724
6151
  type: "content_block_start",
4725
6152
  index: streamState.contentBlockIndex,
@@ -4779,47 +6206,6 @@ async function processStreamChunks(opts) {
4779
6206
  }
4780
6207
  }
4781
6208
  }
4782
- function processAnthropicEvent(event, acc) {
4783
- switch (event.type) {
4784
- case "content_block_delta":
4785
- handleContentBlockDelta(event.delta, acc);
4786
- break;
4787
- case "content_block_start":
4788
- handleContentBlockStart(event.content_block, acc);
4789
- break;
4790
- case "content_block_stop":
4791
- handleContentBlockStop(acc);
4792
- break;
4793
- case "message_delta":
4794
- handleMessageDelta(event.delta, event.usage, acc);
4795
- break;
4796
- default: break;
4797
- }
4798
- }
4799
- function handleContentBlockDelta(delta, acc) {
4800
- if (delta.type === "text_delta") acc.content += delta.text;
4801
- else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
4802
- }
4803
- function handleContentBlockStart(block, acc) {
4804
- if (block.type === "tool_use") acc.currentToolCall = {
4805
- id: block.id,
4806
- name: block.name,
4807
- input: ""
4808
- };
4809
- }
4810
- function handleContentBlockStop(acc) {
4811
- if (acc.currentToolCall) {
4812
- acc.toolCalls.push(acc.currentToolCall);
4813
- acc.currentToolCall = null;
4814
- }
4815
- }
4816
- function handleMessageDelta(delta, usage, acc) {
4817
- if (delta.stop_reason) acc.stopReason = delta.stop_reason;
4818
- if (usage) {
4819
- acc.inputTokens = usage.input_tokens ?? 0;
4820
- acc.outputTokens = usage.output_tokens;
4821
- }
4822
- }
4823
6209
  function recordStreamingResponse(acc, fallbackModel, ctx) {
4824
6210
  const contentBlocks = [];
4825
6211
  if (acc.content) contentBlocks.push({
@@ -4845,52 +6231,51 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
4845
6231
  toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
4846
6232
  }, Date.now() - ctx.startTime);
4847
6233
  }
4848
- function convertAnthropicMessages(messages) {
4849
- return messages.map((msg) => {
4850
- if (typeof msg.content === "string") return {
4851
- role: msg.role,
4852
- content: msg.content
4853
- };
4854
- const content = msg.content.map((block) => {
4855
- if (block.type === "text") return {
4856
- type: "text",
4857
- text: block.text
4858
- };
4859
- if (block.type === "tool_use") return {
4860
- type: "tool_use",
4861
- id: block.id,
4862
- name: block.name,
4863
- input: JSON.stringify(block.input)
4864
- };
4865
- if (block.type === "tool_result") {
4866
- const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
4867
- return {
4868
- type: "tool_result",
4869
- tool_use_id: block.tool_use_id,
4870
- content: resultContent
4871
- };
4872
- }
4873
- return { type: block.type };
4874
- });
4875
- return {
4876
- role: msg.role,
4877
- content
4878
- };
4879
- });
4880
- }
4881
- function extractSystemPrompt(system) {
4882
- if (!system) return void 0;
4883
- if (typeof system === "string") return system;
4884
- return system.map((block) => block.text).join("\n");
6234
+
6235
+ //#endregion
6236
+ //#region src/routes/messages/handler.ts
6237
+ async function handleCompletion(c) {
6238
+ const anthropicPayload = await c.req.json();
6239
+ consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
6240
+ logToolInfo(anthropicPayload);
6241
+ const useDirectAnthropicApi = supportsDirectAnthropicApi(anthropicPayload.model);
6242
+ const trackingId = c.get("trackingId");
6243
+ const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
6244
+ updateTrackerModel(trackingId, anthropicPayload.model);
6245
+ const ctx = {
6246
+ historyId: recordRequest("anthropic", {
6247
+ model: anthropicPayload.model,
6248
+ messages: convertAnthropicMessages(anthropicPayload.messages),
6249
+ stream: anthropicPayload.stream ?? false,
6250
+ tools: anthropicPayload.tools?.map((t) => ({
6251
+ name: t.name,
6252
+ description: t.description
6253
+ })),
6254
+ max_tokens: anthropicPayload.max_tokens,
6255
+ temperature: anthropicPayload.temperature,
6256
+ system: extractSystemPrompt(anthropicPayload.system)
6257
+ }),
6258
+ trackingId,
6259
+ startTime
6260
+ };
6261
+ if (useDirectAnthropicApi) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
6262
+ return handleTranslatedCompletion(c, anthropicPayload, ctx);
4885
6263
  }
4886
- function extractToolCallsFromContent(content) {
4887
- const tools = [];
4888
- for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
4889
- id: String(block.id),
4890
- name: String(block.name),
4891
- input: JSON.stringify(block.input)
4892
- });
4893
- return tools.length > 0 ? tools : void 0;
6264
+ /**
6265
+ * Log tool-related information for debugging
6266
+ */
6267
+ function logToolInfo(anthropicPayload) {
6268
+ if (anthropicPayload.tools?.length) {
6269
+ const toolInfo = anthropicPayload.tools.map((t) => ({
6270
+ name: t.name,
6271
+ type: t.type ?? "(custom)"
6272
+ }));
6273
+ consola.debug(`[Tools] Defined tools:`, JSON.stringify(toolInfo));
6274
+ }
6275
+ for (const msg of anthropicPayload.messages) if (typeof msg.content !== "string") for (const block of msg.content) {
6276
+ if (block.type === "tool_use") consola.debug(`[Tools] tool_use in message: ${block.name} (id: ${block.id})`);
6277
+ if (block.type === "tool_result") consola.debug(`[Tools] tool_result in message: id=${block.tool_use_id}, is_error=${block.is_error ?? false}`);
6278
+ }
4894
6279
  }
4895
6280
 
4896
6281
  //#endregion
@@ -5004,13 +6389,18 @@ server.route("/history", historyRoutes);
5004
6389
 
5005
6390
  //#endregion
5006
6391
  //#region src/start.ts
6392
+ /** Format limit values as "Xk" or "?" if not available */
6393
+ function formatLimit(value) {
6394
+ return value ? `${Math.round(value / 1e3)}k` : "?";
6395
+ }
5007
6396
  function formatModelInfo(model) {
5008
6397
  const limits = model.capabilities?.limits;
5009
- const contextK = limits?.max_prompt_tokens ? `${Math.round(limits.max_prompt_tokens / 1e3)}k` : "?";
5010
- const outputK = limits?.max_output_tokens ? `${Math.round(limits.max_output_tokens / 1e3)}k` : "?";
6398
+ const contextK = formatLimit(limits?.max_context_window_tokens);
6399
+ const promptK = formatLimit(limits?.max_prompt_tokens);
6400
+ const outputK = formatLimit(limits?.max_output_tokens);
5011
6401
  const features = [model.capabilities?.supports?.tool_calls && "tools", model.preview && "preview"].filter(Boolean).join(", ");
5012
6402
  const featureStr = features ? ` (${features})` : "";
5013
- return ` - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
6403
+ return ` - ${model.id.length > 30 ? `${model.id.slice(0, 27)}...` : model.id.padEnd(30)} ctx:${contextK.padStart(5)} in:${promptK.padStart(5)} out:${outputK.padStart(4)}` + featureStr;
5014
6404
  }
5015
6405
  async function runServer(options) {
5016
6406
  consola.info(`copilot-api v${package_default.version}`);
@@ -5018,12 +6408,16 @@ async function runServer(options) {
5018
6408
  if (options.verbose) {
5019
6409
  consola.level = 5;
5020
6410
  consola.info("Verbose logging enabled");
6411
+ state.verbose = true;
5021
6412
  }
5022
6413
  state.accountType = options.accountType;
5023
6414
  if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
5024
6415
  state.manualApprove = options.manual;
5025
6416
  state.showToken = options.showToken;
5026
- state.autoCompact = options.autoCompact;
6417
+ state.autoTruncate = options.autoTruncate;
6418
+ state.compressToolResults = options.compressToolResults;
6419
+ state.redirectAnthropic = options.redirectAnthropic;
6420
+ state.rewriteAnthropicTools = options.rewriteAnthropicTools;
5027
6421
  if (options.rateLimit) initAdaptiveRateLimiter({
5028
6422
  baseRetryIntervalSeconds: options.retryInterval,
5029
6423
  requestIntervalSeconds: options.requestInterval,
@@ -5031,7 +6425,10 @@ async function runServer(options) {
5031
6425
  consecutiveSuccessesForRecovery: options.consecutiveSuccesses
5032
6426
  });
5033
6427
  else consola.info("Rate limiting disabled");
5034
- if (!options.autoCompact) consola.info("Auto-compact disabled");
6428
+ if (!options.autoTruncate) consola.info("Auto-truncate disabled");
6429
+ if (options.compressToolResults) consola.info("Tool result compression enabled");
6430
+ if (options.redirectAnthropic) consola.info("Anthropic API redirect enabled (using OpenAI translation)");
6431
+ if (!options.rewriteAnthropicTools) consola.info("Anthropic server-side tools rewrite disabled (passing through unchanged)");
5035
6432
  initHistory(options.history, options.historyLimit);
5036
6433
  if (options.history) {
5037
6434
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5173,10 +6570,25 @@ const start = defineCommand({
5173
6570
  default: "1000",
5174
6571
  description: "Maximum number of history entries to keep in memory (0 = unlimited)"
5175
6572
  },
5176
- "no-auto-compact": {
6573
+ "no-auto-truncate": {
6574
+ type: "boolean",
6575
+ default: false,
6576
+ description: "Disable automatic conversation history truncation when exceeding limits"
6577
+ },
6578
+ "compress-tool-results": {
6579
+ type: "boolean",
6580
+ default: false,
6581
+ description: "Compress old tool_result content before truncating messages (may lose context details)"
6582
+ },
6583
+ "redirect-anthropic": {
6584
+ type: "boolean",
6585
+ default: false,
6586
+ description: "Redirect Anthropic models through OpenAI translation (instead of direct API)"
6587
+ },
6588
+ "no-rewrite-anthropic-tools": {
5177
6589
  type: "boolean",
5178
6590
  default: false,
5179
- description: "Disable automatic conversation history compression when exceeding limits"
6591
+ description: "Don't rewrite Anthropic server-side tools (web_search, etc.) to custom tool format"
5180
6592
  }
5181
6593
  },
5182
6594
  run({ args }) {
@@ -5197,7 +6609,10 @@ const start = defineCommand({
5197
6609
  proxyEnv: args["proxy-env"],
5198
6610
  history: !args["no-history"],
5199
6611
  historyLimit: Number.parseInt(args["history-limit"], 10),
5200
- autoCompact: !args["no-auto-compact"]
6612
+ autoTruncate: !args["no-auto-truncate"],
6613
+ compressToolResults: args["compress-tool-results"],
6614
+ redirectAnthropic: args["redirect-anthropic"],
6615
+ rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"]
5201
6616
  });
5202
6617
  }
5203
6618
  });