npm - @hsupu/copilot-api - Versions diffs - 0.7.10 → 0.7.12 - Mend

@hsupu/copilot-api 0.7.10 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/main.js CHANGED Viewed

@@ -46,7 +46,11 @@ const state = {
 	accountType: "individual",
 	manualApprove: false,
 	showToken: false,
-	autoCompact: true
+	verbose: false,
+	autoTruncate: true,
+	compressToolResults: false,
+	redirectAnthropic: false,
+	rewriteAnthropicTools: true
 };
 //#endregion
@@ -90,27 +94,78 @@ const GITHUB_BASE_URL = "https://github.com";
 const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
 const GITHUB_APP_SCOPES = ["read:user"].join(" ");
+//#endregion
+//#region src/lib/auto-truncate-common.ts
+const DEFAULT_AUTO_TRUNCATE_CONFIG = {
+	safetyMarginPercent: 2,
+	maxRequestBodyBytes: 510 * 1024,
+	preserveRecentPercent: .7
+};
+/** Dynamic byte limit that adjusts based on 413 errors */
+let dynamicByteLimit = null;
+/**
+* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
+*/
+function onRequestTooLarge(failingBytes) {
+	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
+	dynamicByteLimit = newLimit;
+	consola.info(`[AutoTruncate] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
+}
+/** Get the current effective byte limit */
+function getEffectiveByteLimitBytes() {
+	return dynamicByteLimit ?? DEFAULT_AUTO_TRUNCATE_CONFIG.maxRequestBodyBytes;
+}
+/** Dynamic token limits per model, adjusted based on token limit errors */
+const dynamicTokenLimits = /* @__PURE__ */ new Map();
+/**
+* Called when a token limit error (400) occurs.
+* Adjusts the token limit for the specific model to 95% of the reported limit.
+*/
+function onTokenLimitExceeded(modelId, reportedLimit) {
+	const newLimit = Math.floor(reportedLimit * .95);
+	const previous = dynamicTokenLimits.get(modelId);
+	if (!previous || newLimit < previous) {
+		dynamicTokenLimits.set(modelId, newLimit);
+		consola.info(`[AutoTruncate] Adjusted token limit for ${modelId}: ${reportedLimit} reported → ${newLimit} effective`);
+	}
+}
+/**
+* Get the effective token limit for a model.
+* Returns the dynamic limit if set, otherwise null to use model capabilities.
+*/
+function getEffectiveTokenLimit(modelId) {
+	return dynamicTokenLimits.get(modelId) ?? null;
+}
 //#endregion
 //#region src/lib/error.ts
 var HTTPError = class HTTPError extends Error {
 	status;
 	responseText;
-	constructor(message, status, responseText) {
+	/** Model ID that caused the error (if known) */
+	modelId;
+	constructor(message, status, responseText, modelId) {
 		super(message);
 		this.status = status;
 		this.responseText = responseText;
+		this.modelId = modelId;
 	}
-	static async fromResponse(message, response) {
+	static async fromResponse(message, response, modelId) {
 		const text = await response.text();
-		return new HTTPError(message, response.status, text);
+		return new HTTPError(message, response.status, text, modelId);
 	}
 };
 /** Parse token limit info from error message */
 function parseTokenLimitError(message) {
-	const match = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
-	if (match) return {
-		current: Number.parseInt(match[1], 10),
-		limit: Number.parseInt(match[2], 10)
+	const openaiMatch = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
+	if (openaiMatch) return {
+		current: Number.parseInt(openaiMatch[1], 10),
+		limit: Number.parseInt(openaiMatch[2], 10)
+	};
+	const anthropicMatch = message.match(/prompt is too long: (\d+) tokens > (\d+) maximum/);
+	if (anthropicMatch) return {
+		current: Number.parseInt(anthropicMatch[1], 10),
+		limit: Number.parseInt(anthropicMatch[2], 10)
 	};
 	return null;
 }
@@ -147,11 +202,10 @@ function formatRateLimitError(copilotMessage) {
 	};
 }
 function forwardError(c, error) {
-	consola.error("Error occurred:", error);
 	if (error instanceof HTTPError) {
 		if (error.status === 413) {
 			const formattedError = formatRequestTooLargeError();
-			consola.debug("Returning formatted 413 error:", formattedError);
+			consola.warn(`HTTP 413: Request too large`);
 			return c.json(formattedError, 413);
 		}
 		let errorJson;
@@ -160,26 +214,38 @@ function forwardError(c, error) {
 		} catch {
 			errorJson = error.responseText;
 		}
-		consola.error("HTTP error:", errorJson);
 		const copilotError = errorJson;
 		if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
 			const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
 			if (tokenInfo) {
+				if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
+				const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
+				consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
+				return c.json(formattedError, 400);
+			}
+		}
+		const anthropicError = errorJson;
+		if (anthropicError.error?.type === "invalid_request_error") {
+			const tokenInfo = parseTokenLimitError(anthropicError.error.message ?? "");
+			if (tokenInfo) {
+				if (error.modelId) onTokenLimitExceeded(error.modelId, tokenInfo.limit);
 				const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
-				consola.debug("Returning formatted token limit error:", formattedError);
+				consola.warn(`HTTP ${error.status}: Token limit exceeded (${tokenInfo.current} > ${tokenInfo.limit})`);
 				return c.json(formattedError, 400);
 			}
 		}
 		if (error.status === 429 || copilotError.error?.code === "rate_limited") {
 			const formattedError = formatRateLimitError(copilotError.error?.message);
-			consola.debug("Returning formatted rate limit error:", formattedError);
+			consola.warn(`HTTP 429: Rate limit exceeded`);
 			return c.json(formattedError, 429);
 		}
+		consola.error(`HTTP ${error.status}:`, errorJson);
 		return c.json({ error: {
 			message: error.responseText,
 			type: "error"
 		} }, error.status);
 	}
+	consola.error("Unexpected error:", error);
 	return c.json({ error: {
 		message: error.message,
 		type: "error"
@@ -305,6 +371,7 @@ async function pollAccessToken(deviceCode) {
 //#region src/lib/token.ts
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
 const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
+let copilotTokenRefreshTimer = null;
 /**
 * Refresh the Copilot token with exponential backoff retry.
 * Returns the new token on success, or null if all retries fail.
@@ -323,20 +390,34 @@ async function refreshCopilotTokenWithRetry(maxRetries = 3) {
 	consola.error("All token refresh attempts failed:", lastError);
 	return null;
 }
+/**
+* Clear any existing token refresh timer.
+* Call this before setting up a new timer or during cleanup.
+*/
+function clearCopilotTokenRefresh() {
+	if (copilotTokenRefreshTimer) {
+		clearInterval(copilotTokenRefreshTimer);
+		copilotTokenRefreshTimer = null;
+	}
+}
 const setupCopilotToken = async () => {
 	const { token, refresh_in } = await getCopilotToken();
 	state.copilotToken = token;
 	consola.debug("GitHub Copilot Token fetched successfully!");
 	if (state.showToken) consola.info("Copilot token:", token);
-	const refreshInterval = (refresh_in - 60) * 1e3;
-	setInterval(async () => {
+	const refreshInterval = Math.max((refresh_in - 60) * 1e3, 60 * 1e3);
+	clearCopilotTokenRefresh();
+	copilotTokenRefreshTimer = setInterval(() => {
 		consola.debug("Refreshing Copilot token");
-		const newToken = await refreshCopilotTokenWithRetry();
-		if (newToken) {
-			state.copilotToken = newToken;
-			consola.debug("Copilot token refreshed");
-			if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
-		} else consola.error("Failed to refresh Copilot token after retries, using existing token");
+		refreshCopilotTokenWithRetry().then((newToken) => {
+			if (newToken) {
+				state.copilotToken = newToken;
+				consola.debug("Copilot token refreshed");
+				if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
+			} else consola.error("Failed to refresh Copilot token after retries, using existing token");
+		}).catch((error) => {
+			consola.error("Unexpected error during token refresh:", error);
+		});
 	}, refreshInterval);
 };
 async function setupGitHubToken(options) {
@@ -480,9 +561,23 @@ async function checkTokenExists() {
 		return false;
 	}
 }
-async function getDebugInfo() {
+async function getAccountInfo() {
+	try {
+		await ensurePaths();
+		await setupGitHubToken();
+		if (!state.githubToken) return null;
+		const [user, copilot] = await Promise.all([getGitHubUser(), getCopilotUsage()]);
+		return {
+			user,
+			copilot
+		};
+	} catch {
+		return null;
+	}
+}
+async function getDebugInfo(includeAccount) {
 	const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
-	return {
+	const info = {
 		version: version$1,
 		runtime: getRuntimeInfo(),
 		paths: {
@@ -491,9 +586,14 @@ async function getDebugInfo() {
 		},
 		tokenExists
 	};
+	if (includeAccount && tokenExists) {
+		const account = await getAccountInfo();
+		if (account) info.account = account;
+	}
+	return info;
 }
 function printDebugInfoPlain(info) {
-	consola.info(`copilot-api debug
+	let output = `copilot-api debug
 Version: ${info.version}
 Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch})
@@ -502,19 +602,24 @@ Paths:
 - APP_DIR: ${info.paths.APP_DIR}
 - GITHUB_TOKEN_PATH: ${info.paths.GITHUB_TOKEN_PATH}
-Token exists: ${info.tokenExists ? "Yes" : "No"}`);
+Token exists: ${info.tokenExists ? "Yes" : "No"}`;
+	if (info.account) output += `
+Account Info:
+${JSON.stringify(info.account, null, 2)}`;
+	consola.info(output);
 }
 function printDebugInfoJson(info) {
 	console.log(JSON.stringify(info, null, 2));
 }
 async function runDebug(options) {
-	const debugInfo = await getDebugInfo();
-	if (options.json) printDebugInfoJson(debugInfo);
-	else printDebugInfoPlain(debugInfo);
+	const debugInfo$1 = await getDebugInfo(true);
+	if (options.json) printDebugInfoJson(debugInfo$1);
+	else printDebugInfoPlain(debugInfo$1);
 }
-const debug = defineCommand({
+const debugInfo = defineCommand({
 	meta: {
-		name: "debug",
+		name: "info",
 		description: "Print debug information about the application"
 	},
 	args: { json: {
@@ -526,6 +631,48 @@ const debug = defineCommand({
 		return runDebug({ json: args.json });
 	}
 });
+const debugModels = defineCommand({
+	meta: {
+		name: "models",
+		description: "Fetch and display raw model data from Copilot API"
+	},
+	args: {
+		"account-type": {
+			type: "string",
+			alias: "a",
+			default: "individual",
+			description: "The type of GitHub account (individual, business, enterprise)"
+		},
+		"github-token": {
+			type: "string",
+			alias: "g",
+			description: "GitHub token to use (skips interactive auth)"
+		}
+	},
+	async run({ args }) {
+		state.accountType = args["account-type"];
+		await ensurePaths();
+		if (args["github-token"]) {
+			state.githubToken = args["github-token"];
+			consola.info("Using provided GitHub token");
+		} else await setupGitHubToken();
+		const { token } = await getCopilotToken();
+		state.copilotToken = token;
+		consola.info("Fetching models from Copilot API...");
+		const models = await getModels();
+		console.log(JSON.stringify(models, null, 2));
+	}
+});
+const debug = defineCommand({
+	meta: {
+		name: "debug",
+		description: "Debug commands for troubleshooting"
+	},
+	subCommands: {
+		info: debugInfo,
+		models: debugModels
+	}
+});
 //#endregion
 //#region src/logout.ts
@@ -552,7 +699,7 @@ const logout = defineCommand({
 });
 //#endregion
-//#region src/patch-claude.ts
+//#region src/patch-claude-code.ts
 const SUPPORTED_VERSIONS = {
 	v2a: {
 		min: "2.0.0",
@@ -872,7 +1019,7 @@ const patchClaude = defineCommand({
 //#endregion
 //#region package.json
 var name = "@hsupu/copilot-api";
-var version = "0.7.10";
+var version = "0.7.12";
 var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
 var keywords = [
 	"proxy",
@@ -900,11 +1047,15 @@ var scripts = {
 	"prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
 	"release": "bumpp && npm publish --access public",
 	"start": "NODE_ENV=production bun run ./src/main.ts",
+	"test": "bun test tests/*.test.ts",
+	"test:all": "bun test tests/*.test.ts && bun test tests/integration/",
+	"test:integration": "bun test tests/integration/",
 	"typecheck": "tsc"
 };
 var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
 var lint_staged = { "*": "bun run lint --fix" };
 var dependencies = {
+	"@anthropic-ai/tokenizer": "^0.0.4",
 	"citty": "^0.1.6",
 	"clipboardy": "^5.0.0",
 	"consola": "^3.4.2",
@@ -951,7 +1102,7 @@ var package_default = {
 //#endregion
 //#region src/lib/adaptive-rate-limiter.ts
-const DEFAULT_CONFIG$1 = {
+const DEFAULT_CONFIG = {
 	baseRetryIntervalSeconds: 10,
 	maxRetryIntervalSeconds: 120,
 	requestIntervalSeconds: 10,
@@ -980,7 +1131,7 @@ var AdaptiveRateLimiter = class {
 	recoveryStepIndex = 0;
 	constructor(config = {}) {
 		this.config = {
-			...DEFAULT_CONFIG$1,
+			...DEFAULT_CONFIG,
 			...config
 		};
 	}
@@ -1222,12 +1373,12 @@ let rateLimiterInstance = null;
 */
 function initAdaptiveRateLimiter(config = {}) {
 	rateLimiterInstance = new AdaptiveRateLimiter(config);
-	const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
-	const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
-	const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
-	const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
-	const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
-	const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
+	const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
+	const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
+	const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
+	const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
+	const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
+	const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
 	consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
 }
 /**
@@ -1848,6 +1999,7 @@ var RequestTracker = class {
 	requests = /* @__PURE__ */ new Map();
 	renderer = null;
 	completedQueue = [];
+	completedTimeouts = /* @__PURE__ */ new Map();
 	historySize = 5;
 	completedDisplayMs = 2e3;
 	setRenderer(renderer) {
@@ -1907,11 +2059,22 @@ var RequestTracker = class {
 		this.renderer?.onRequestComplete(request);
 		this.requests.delete(id);
 		this.completedQueue.push(request);
-		while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
-		setTimeout(() => {
+		while (this.completedQueue.length > this.historySize) {
+			const removed = this.completedQueue.shift();
+			if (removed) {
+				const timeoutId$1 = this.completedTimeouts.get(removed.id);
+				if (timeoutId$1) {
+					clearTimeout(timeoutId$1);
+					this.completedTimeouts.delete(removed.id);
+				}
+			}
+		}
+		const timeoutId = setTimeout(() => {
 			const idx = this.completedQueue.indexOf(request);
 			if (idx !== -1) this.completedQueue.splice(idx, 1);
+			this.completedTimeouts.delete(id);
 		}, this.completedDisplayMs);
+		this.completedTimeouts.set(id, timeoutId);
 	}
 	/**
 	* Mark request as failed with error
@@ -1946,11 +2109,13 @@ var RequestTracker = class {
 		return this.requests.get(id);
 	}
 	/**
-	* Clear all tracked requests
+	* Clear all tracked requests and pending timeouts
 	*/
 	clear() {
 		this.requests.clear();
 		this.completedQueue = [];
+		for (const timeoutId of this.completedTimeouts.values()) clearTimeout(timeoutId);
+		this.completedTimeouts.clear();
 	}
 };
 const requestTracker = new RequestTracker();
@@ -2101,6 +2266,14 @@ const getTokenizerFromModel = (model) => {
 	return model.capabilities?.tokenizer || "o200k_base";
 };
 /**
+* Count tokens in a text string using the model's tokenizer.
+* This is a simple wrapper for counting tokens in plain text.
+*/
+const countTextTokens = async (text, model) => {
+	const tokenizer = getTokenizerFromModel(model);
+	return (await getEncodeChatFunction(tokenizer)).encode(text).length;
+};
+/**
 * Get model-specific constants for token calculation.
 * These values are empirically determined based on OpenAI's function calling token overhead.
 * - funcInit: Tokens for initializing a function definition
@@ -2206,7 +2379,9 @@ const numTokensForTools = (tools, encoder, constants) => {
 	return funcTokenCount;
 };
 /**
-* Calculate the token count of messages, supporting multiple GPT encoders
+* Calculate the token count of messages.
+* Uses the tokenizer specified by the GitHub Copilot API model info.
+* All models (including Claude) use GPT tokenizers (o200k_base or cl100k_base).
 */
 const getTokenCount = async (payload, model) => {
 	const tokenizer = getTokenizerFromModel(model);
@@ -2225,32 +2400,18 @@ const getTokenCount = async (payload, model) => {
 };
 //#endregion
-//#region src/lib/auto-compact.ts
-const DEFAULT_CONFIG = {
-	safetyMarginPercent: 2,
-	maxRequestBodyBytes: 500 * 1024
-};
-/** Dynamic byte limit that adjusts based on 413 errors */
-let dynamicByteLimit = null;
-/**
-* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
-*/
-function onRequestTooLarge(failingBytes) {
-	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
-	dynamicByteLimit = newLimit;
-	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
-}
-function calculateLimits(model, config) {
-	const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
+//#region src/lib/auto-truncate-openai.ts
+function calculateLimits$1(model, config) {
+	const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
 	const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
-	const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
+	const byteLimit = getEffectiveByteLimitBytes();
 	return {
 		tokenLimit,
 		byteLimit
 	};
 }
 /** Estimate tokens for a single message (fast approximation) */
-function estimateMessageTokens(msg) {
+function estimateMessageTokens$1(msg) {
 	let charCount = 0;
 	if (typeof msg.content === "string") charCount = msg.content.length;
 	else if (Array.isArray(msg.content)) {
@@ -2261,7 +2422,7 @@ function estimateMessageTokens(msg) {
 	return Math.ceil(charCount / 4) + 10;
 }
 /** Get byte size of a message */
-function getMessageBytes(msg) {
+function getMessageBytes$1(msg) {
 	return JSON.stringify(msg).length;
 }
 /** Extract system/developer messages from the beginning */
@@ -2283,7 +2444,7 @@ function getToolCallIds(msg) {
 	return [];
 }
 /** Filter orphaned tool_result messages */
-function filterOrphanedToolResults(messages) {
+function filterOrphanedToolResults$1(messages) {
 	const toolUseIds = /* @__PURE__ */ new Set();
 	for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
 	let removedCount = 0;
@@ -2294,22 +2455,127 @@ function filterOrphanedToolResults(messages) {
 		}
 		return true;
 	});
-	if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
+	if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_result`);
 	return filtered;
 }
+/** Get tool_result IDs from all tool messages */
+function getToolResultIds$1(messages) {
+	const ids = /* @__PURE__ */ new Set();
+	for (const msg of messages) if (msg.role === "tool" && msg.tool_call_id) ids.add(msg.tool_call_id);
+	return ids;
+}
+/** Filter orphaned tool_use messages (those without matching tool_result) */
+function filterOrphanedToolUse$1(messages) {
+	const toolResultIds = getToolResultIds$1(messages);
+	const result = [];
+	let removedCount = 0;
+	for (const msg of messages) {
+		if (msg.role === "assistant" && msg.tool_calls) {
+			const filteredToolCalls = msg.tool_calls.filter((tc) => {
+				if (!toolResultIds.has(tc.id)) {
+					removedCount++;
+					return false;
+				}
+				return true;
+			});
+			if (filteredToolCalls.length === 0) {
+				if (msg.content) result.push({
+					...msg,
+					tool_calls: void 0
+				});
+				continue;
+			}
+			result.push({
+				...msg,
+				tool_calls: filteredToolCalls
+			});
+			continue;
+		}
+		result.push(msg);
+	}
+	if (removedCount > 0) consola.debug(`[AutoTruncate:OpenAI] Filtered ${removedCount} orphaned tool_use`);
+	return result;
+}
 /** Ensure messages start with a user message */
-function ensureStartsWithUser(messages) {
+function ensureStartsWithUser$1(messages) {
 	let startIndex = 0;
 	while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
-	if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
+	if (startIndex > 0) consola.debug(`[AutoTruncate:OpenAI] Skipped ${startIndex} leading non-user messages`);
 	return messages.slice(startIndex);
 }
+/** Threshold for large tool message content (bytes) */
+const LARGE_TOOL_RESULT_THRESHOLD$1 = 1e4;
+/** Maximum length for compressed tool_result summary */
+const COMPRESSED_SUMMARY_LENGTH$1 = 500;
+/**
+* Compress a large tool message content to a summary.
+* Keeps the first and last portions with a note about truncation.
+*/
+function compressToolResultContent$1(content) {
+	if (content.length <= LARGE_TOOL_RESULT_THRESHOLD$1) return content;
+	const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH$1 / 2);
+	const start$1 = content.slice(0, halfLen);
+	const end = content.slice(-halfLen);
+	const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH$1;
+	return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
+}
+/**
+* Smart compression strategy for OpenAI format:
+* 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
+* 2. Messages before that threshold get their tool content compressed
+* 3. Returns compressed messages and stats
+*
+* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
+*/
+function smartCompressToolResults$1(messages, tokenLimit, byteLimit, preservePercent) {
+	const n = messages.length;
+	const cumTokens = Array.from({ length: n + 1 }, () => 0);
+	const cumBytes = Array.from({ length: n + 1 }, () => 0);
+	for (let i = n - 1; i >= 0; i--) {
+		const msg = messages[i];
+		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
+		cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
+	}
+	const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
+	const preserveByteLimit = Math.floor(byteLimit * preservePercent);
+	let thresholdIndex = n;
+	for (let i = n - 1; i >= 0; i--) {
+		if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
+			thresholdIndex = i + 1;
+			break;
+		}
+		thresholdIndex = i;
+	}
+	if (thresholdIndex >= n) return {
+		messages,
+		compressedCount: 0,
+		compressThresholdIndex: n
+	};
+	const result = [];
+	let compressedCount = 0;
+	for (const [i, msg] of messages.entries()) {
+		if (i < thresholdIndex && msg.role === "tool" && typeof msg.content === "string" && msg.content.length > LARGE_TOOL_RESULT_THRESHOLD$1) {
+			compressedCount++;
+			result.push({
+				...msg,
+				content: compressToolResultContent$1(msg.content)
+			});
+			continue;
+		}
+		result.push(msg);
+	}
+	return {
+		messages: result,
+		compressedCount,
+		compressThresholdIndex: thresholdIndex
+	};
+}
 /**
 * Find the optimal index from which to preserve messages.
 * Uses binary search with pre-calculated cumulative sums.
 * Returns the smallest index where the preserved portion fits within limits.
 */
-function findOptimalPreserveIndex(params) {
+function findOptimalPreserveIndex$1(params) {
 	const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
 	if (messages.length === 0) return 0;
 	const markerBytes = 200;
@@ -2321,8 +2587,8 @@ function findOptimalPreserveIndex(params) {
 	const cumBytes = Array.from({ length: n + 1 }, () => 0);
 	for (let i = n - 1; i >= 0; i--) {
 		const msg = messages[i];
-		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
-		cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
+		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens$1(msg);
+		cumBytes[i] = cumBytes[i + 1] + getMessageBytes$1(msg) + 1;
 	}
 	let left = 0;
 	let right = n;
@@ -2336,12 +2602,12 @@ function findOptimalPreserveIndex(params) {
 /**
 * Check if payload needs compaction based on model limits or byte size.
 */
-async function checkNeedsCompaction(payload, model, config = {}) {
+async function checkNeedsCompactionOpenAI(payload, model, config = {}) {
 	const cfg = {
-		...DEFAULT_CONFIG,
+		...DEFAULT_AUTO_TRUNCATE_CONFIG,
 		...config
 	};
-	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
+	const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
 	const currentTokens = (await getTokenCount(payload, model)).input;
 	const currentBytes = JSON.stringify(payload).length;
 	const exceedsTokens = currentTokens > tokenLimit;
@@ -2359,23 +2625,90 @@ async function checkNeedsCompaction(payload, model, config = {}) {
 		reason
 	};
 }
-/** Create a truncation marker message */
-function createTruncationMarker(removedCount) {
+/**
+* Generate a summary of removed messages for context.
+* Extracts key information like tool calls and topics.
+*/
+function generateRemovedMessagesSummary$1(removedMessages) {
+	const toolCalls = [];
+	let userMessageCount = 0;
+	let assistantMessageCount = 0;
+	for (const msg of removedMessages) {
+		if (msg.role === "user") userMessageCount++;
+		else if (msg.role === "assistant") assistantMessageCount++;
+		if (msg.tool_calls) {
+			for (const tc of msg.tool_calls) if (tc.function.name) toolCalls.push(tc.function.name);
+		}
+	}
+	const parts = [];
+	if (userMessageCount > 0 || assistantMessageCount > 0) {
+		const breakdown = [];
+		if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
+		if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
+		parts.push(`Messages: ${breakdown.join(", ")}`);
+	}
+	if (toolCalls.length > 0) {
+		const uniqueTools = [...new Set(toolCalls)];
+		const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
+		parts.push(`Tools used: ${displayTools.join(", ")}`);
+	}
+	return parts.join(". ");
+}
+/**
+* Add a compression notice to the system message.
+* Informs the model that some tool content has been compressed.
+*/
+function addCompressionNotice$1(payload, compressedCount) {
+	const notice = `\n\n[CONTEXT NOTE]\n${compressedCount} large tool results have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]`;
+	const messages = [...payload.messages];
+	for (let i = messages.length - 1; i >= 0; i--) {
+		const msg = messages[i];
+		if (msg.role === "system" || msg.role === "developer") {
+			if (typeof msg.content === "string") messages[i] = {
+				...msg,
+				content: msg.content + notice
+			};
+			break;
+		}
+	}
+	return {
+		...payload,
+		messages
+	};
+}
+/**
+* Create truncation context to append to system messages.
+*/
+function createTruncationSystemContext$1(removedCount, compressedCount, summary) {
+	let context = `\n\n[CONVERSATION CONTEXT]\n`;
+	if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
+	if (compressedCount > 0) context += `${compressedCount} large tool results have been compressed.\n`;
+	if (summary) context += `Summary of removed content: ${summary}\n`;
+	context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]";
+	return context;
+}
+/** Create a truncation marker message (fallback when no system message) */
+function createTruncationMarker$2(removedCount, compressedCount, summary) {
+	const parts = [];
+	if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
+	if (compressedCount > 0) parts.push(`${compressedCount} tool results compressed`);
+	let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
+	if (summary) content += `\n[Summary: ${summary}]`;
 	return {
 		role: "user",
-		content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
+		content
 	};
 }
 /**
-* Perform auto-compaction on a payload that exceeds limits.
+* Perform auto-truncation on a payload that exceeds limits.
 * Uses binary search to find the optimal truncation point.
 */
-async function autoCompact(payload, model, config = {}) {
+async function autoTruncateOpenAI(payload, model, config = {}) {
 	const cfg = {
-		...DEFAULT_CONFIG,
+		...DEFAULT_AUTO_TRUNCATE_CONFIG,
 		...config
 	};
-	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
+	const { tokenLimit, byteLimit } = calculateLimits$1(model, cfg);
 	const originalBytes = JSON.stringify(payload).length;
 	const originalTokens = (await getTokenCount(payload, model)).input;
 	if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
@@ -2387,18 +2720,44 @@ async function autoCompact(payload, model, config = {}) {
 	};
 	const exceedsTokens = originalTokens > tokenLimit;
 	const exceedsBytes = originalBytes > byteLimit;
-	let reason;
-	if (exceedsTokens && exceedsBytes) reason = "tokens and size";
-	else if (exceedsBytes) reason = "size";
-	else reason = "tokens";
-	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
-	const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
-	const messagesJson = JSON.stringify(payload.messages);
-	const payloadOverhead = originalBytes - messagesJson.length;
-	const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
-	const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
-	consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
-	const preserveIndex = findOptimalPreserveIndex({
+	let workingMessages = payload.messages;
+	let compressedCount = 0;
+	if (state.compressToolResults) {
+		const compressionResult = smartCompressToolResults$1(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
+		workingMessages = compressionResult.messages;
+		compressedCount = compressionResult.compressedCount;
+		const compressedPayload = {
+			...payload,
+			messages: workingMessages
+		};
+		const compressedBytes = JSON.stringify(compressedPayload).length;
+		const compressedTokenCount = await getTokenCount(compressedPayload, model);
+		if (compressedTokenCount.input <= tokenLimit && compressedBytes <= byteLimit) {
+			let reason$1 = "tokens";
+			if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
+			else if (exceedsBytes) reason$1 = "size";
+			consola.info(`[AutoTruncate:OpenAI] ${reason$1}: ${originalTokens}→${compressedTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
+			const noticePayload = addCompressionNotice$1(compressedPayload, compressedCount);
+			const noticeTokenCount = await getTokenCount(noticePayload, model);
+			return {
+				payload: noticePayload,
+				wasCompacted: true,
+				originalTokens,
+				compactedTokens: noticeTokenCount.input,
+				removedMessageCount: 0
+			};
+		}
+	}
+	const { systemMessages, conversationMessages } = extractSystemMessages(workingMessages);
+	const messagesJson = JSON.stringify(workingMessages);
+	const payloadOverhead = JSON.stringify({
+		...payload,
+		messages: workingMessages
+	}).length - messagesJson.length;
+	const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes$1(m) + 1, 0);
+	const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens$1(m), 0);
+	consola.debug(`[AutoTruncate:OpenAI] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
+	const preserveIndex = findOptimalPreserveIndex$1({
 		messages: conversationMessages,
 		systemBytes,
 		systemTokens,
@@ -2407,7 +2766,7 @@ async function autoCompact(payload, model, config = {}) {
 		byteLimit
 	});
 	if (preserveIndex === 0) {
-		consola.warn("Auto-compact: Cannot truncate, system messages too large");
+		consola.warn("[AutoTruncate:OpenAI] Cannot truncate, system messages too large");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2417,7 +2776,7 @@ async function autoCompact(payload, model, config = {}) {
 		};
 	}
 	if (preserveIndex >= conversationMessages.length) {
-		consola.warn("Auto-compact: Would need to remove all messages");
+		consola.warn("[AutoTruncate:OpenAI] Would need to remove all messages");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2427,11 +2786,13 @@ async function autoCompact(payload, model, config = {}) {
 		};
 	}
 	let preserved = conversationMessages.slice(preserveIndex);
-	preserved = filterOrphanedToolResults(preserved);
-	preserved = ensureStartsWithUser(preserved);
-	preserved = filterOrphanedToolResults(preserved);
+	preserved = filterOrphanedToolResults$1(preserved);
+	preserved = filterOrphanedToolUse$1(preserved);
+	preserved = ensureStartsWithUser$1(preserved);
+	preserved = filterOrphanedToolResults$1(preserved);
+	preserved = filterOrphanedToolUse$1(preserved);
 	if (preserved.length === 0) {
-		consola.warn("Auto-compact: All messages filtered out after cleanup");
+		consola.warn("[AutoTruncate:OpenAI] All messages filtered out after cleanup");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2440,20 +2801,36 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
+	const removedMessages = conversationMessages.slice(0, preserveIndex);
 	const removedCount = conversationMessages.length - preserved.length;
-	const marker = createTruncationMarker(removedCount);
+	const summary = generateRemovedMessagesSummary$1(removedMessages);
+	let newSystemMessages = systemMessages;
+	let newMessages = preserved;
+	if (systemMessages.length > 0) {
+		const truncationContext = createTruncationSystemContext$1(removedCount, compressedCount, summary);
+		const lastSystemIdx = systemMessages.length - 1;
+		const lastSystem = systemMessages[lastSystemIdx];
+		const updatedSystem = {
+			...lastSystem,
+			content: typeof lastSystem.content === "string" ? lastSystem.content + truncationContext : lastSystem.content
+		};
+		newSystemMessages = [...systemMessages.slice(0, lastSystemIdx), updatedSystem];
+	} else newMessages = [createTruncationMarker$2(removedCount, compressedCount, summary), ...preserved];
 	const newPayload = {
 		...payload,
-		messages: [
-			...systemMessages,
-			marker,
-			...preserved
-		]
+		messages: [...newSystemMessages, ...newMessages]
 	};
 	const newBytes = JSON.stringify(newPayload).length;
 	const newTokenCount = await getTokenCount(newPayload, model);
-	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
-	if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
+	let reason = "tokens";
+	if (exceedsTokens && exceedsBytes) reason = "tokens+size";
+	else if (exceedsBytes) reason = "size";
+	const actions = [];
+	if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
+	if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
+	const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
+	consola.info(`[AutoTruncate:OpenAI] ${reason}: ${originalTokens}→${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
+	if (newBytes > byteLimit) consola.warn(`[AutoTruncate:OpenAI] Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
 	return {
 		payload: newPayload,
 		wasCompacted: true,
@@ -2463,13 +2840,13 @@ async function autoCompact(payload, model, config = {}) {
 	};
 }
 /**
-* Create a marker to prepend to responses indicating auto-compaction occurred.
+* Create a marker to prepend to responses indicating auto-truncation occurred.
 */
-function createCompactionMarker(result) {
+function createTruncationResponseMarkerOpenAI(result) {
 	if (!result.wasCompacted) return "";
 	const reduction = result.originalTokens - result.compactedTokens;
 	const percentage = Math.round(reduction / result.originalTokens * 100);
-	return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
+	return `\n\n---\n[Auto-truncated: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
 }
 //#endregion
@@ -2489,7 +2866,7 @@ const createChatCompletions = async (payload) => {
 	});
 	if (!response.ok) {
 		consola.error("Failed to create chat completions", response);
-		throw await HTTPError.fromResponse("Failed to create chat completions", response);
+		throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
 	}
 	if (payload.stream) return events(response);
 	return await response.json();
@@ -2539,6 +2916,18 @@ function failTracking(trackingId, error) {
 	if (!trackingId) return;
 	requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
 }
+/**
+* Create a marker to prepend to responses indicating auto-truncation occurred.
+* Works with both OpenAI and Anthropic truncate results.
+*/
+function createTruncationMarker(result) {
+	if (!result.wasCompacted) return "";
+	const { originalTokens, compactedTokens, removedMessageCount } = result;
+	if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
+	const reduction = originalTokens - compactedTokens;
+	const percentage = Math.round(reduction / originalTokens * 100);
+	return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${percentage}% reduction)]`;
+}
 /** Record streaming error to history (works with any accumulator type) */
 function recordStreamError(opts) {
 	const { acc, fallbackModel, ctx, error } = opts;
@@ -2557,37 +2946,37 @@ function recordStreamError(opts) {
 function isNonStreaming(response) {
 	return Object.hasOwn(response, "choices");
 }
-/** Build final payload with auto-compact if needed */
+/** Build final payload with auto-truncate if needed */
 async function buildFinalPayload(payload, model) {
-	if (!state.autoCompact || !model) {
-		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
+	if (!state.autoTruncate || !model) {
+		if (state.autoTruncate && !model) consola.warn(`Auto-truncate: Model '${payload.model}' not found in cached models, skipping`);
 		return {
 			finalPayload: payload,
-			compactResult: null
+			truncateResult: null
 		};
 	}
 	try {
-		const check = await checkNeedsCompaction(payload, model);
-		consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
+		const check = await checkNeedsCompactionOpenAI(payload, model);
+		consola.debug(`Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
 		if (!check.needed) return {
 			finalPayload: payload,
-			compactResult: null
+			truncateResult: null
 		};
 		let reasonText;
 		if (check.reason === "both") reasonText = "tokens and size";
 		else if (check.reason === "bytes") reasonText = "size";
 		else reasonText = "tokens";
-		consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
-		const compactResult = await autoCompact(payload, model);
+		consola.info(`Auto-truncate triggered: exceeds ${reasonText} limit`);
+		const truncateResult = await autoTruncateOpenAI(payload, model);
 		return {
-			finalPayload: compactResult.payload,
-			compactResult
+			finalPayload: truncateResult.payload,
+			truncateResult
 		};
 	} catch (error) {
-		consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
+		consola.warn("Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
 		return {
 			finalPayload: payload,
-			compactResult: null
+			truncateResult: null
 		};
 	}
 }
@@ -2631,7 +3020,7 @@ async function logPayloadSizeInfo(payload, model) {
 	if (largeMessages > 0) consola.info(`  Large messages (>50KB): ${largeMessages}`);
 	consola.info("");
 	consola.info("  Suggestions:");
-	if (!state.autoCompact) consola.info("    • Enable --auto-compact to automatically truncate history");
+	if (!state.autoTruncate) consola.info("    • Enable --auto-truncate to automatically truncate history");
 	if (imageCount > 0) consola.info("    • Remove or resize large images in the conversation");
 	consola.info("    • Start a new conversation with /clear or /reset");
 	consola.info("    • Reduce conversation history by deleting old messages");
@@ -2663,8 +3052,8 @@ async function handleCompletion$1(c) {
 	};
 	const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
 	await logTokenCount(originalPayload, selectedModel);
-	const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
-	if (compactResult) ctx.compactResult = compactResult;
+	const { finalPayload, truncateResult } = await buildFinalPayload(originalPayload, selectedModel);
+	if (truncateResult) ctx.truncateResult = truncateResult;
 	const payload = isNullish(finalPayload.max_tokens) ? {
 		...finalPayload,
 		max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
@@ -2717,8 +3106,8 @@ async function logTokenCount(payload, selectedModel) {
 function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 	consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
 	let response = originalResponse;
-	if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
-		const marker = createCompactionMarker(ctx.compactResult);
+	if (state.verbose && ctx.truncateResult?.wasCompacted && response.choices[0]?.message.content) {
+		const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
 		response = {
 			...response,
 			choices: response.choices.map((choice$1, i) => i === 0 ? {
@@ -2786,8 +3175,8 @@ async function handleStreamingResponse$1(opts) {
 	const { stream, response, payload, ctx } = opts;
 	const acc = createStreamAccumulator();
 	try {
-		if (ctx.compactResult?.wasCompacted) {
-			const marker = createCompactionMarker(ctx.compactResult);
+		if (state.verbose && ctx.truncateResult?.wasCompacted) {
+			const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
 			const markerChunk = {
 				id: `compact-marker-${Date.now()}`,
 				object: "chat.completion.chunk",
@@ -4070,53 +4459,624 @@ historyRoutes.get("/", (c) => {
 });
 //#endregion
-//#region src/routes/messages/utils.ts
-function mapOpenAIStopReasonToAnthropic(finishReason) {
-	if (finishReason === null) return null;
-	return {
-		stop: "end_turn",
-		length: "max_tokens",
-		tool_calls: "tool_use",
-		content_filter: "end_turn"
-	}[finishReason];
-}
-//#endregion
-//#region src/routes/messages/non-stream-translation.ts
-const OPENAI_TOOL_NAME_LIMIT = 64;
+//#region src/lib/auto-truncate-anthropic.ts
 /**
-* Ensure all tool_use blocks have corresponding tool_result responses.
-* This handles edge cases where conversation history may be incomplete:
-* - Session interruptions where tool execution was cut off
-* - Previous request failures
-* - Client sending truncated history
-*
-* Adding placeholder responses prevents API errors and maintains protocol compliance.
+* Convert Anthropic message content to text for token counting.
 */
-function fixMessageSequence(messages) {
-	const fixedMessages = [];
-	for (let i = 0; i < messages.length; i++) {
-		const message = messages[i];
-		fixedMessages.push(message);
-		if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
-			const foundToolResponses = /* @__PURE__ */ new Set();
-			let j = i + 1;
-			while (j < messages.length && messages[j].role === "tool") {
-				const toolMessage = messages[j];
-				if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
-				j++;
+function contentToText(content) {
+	if (typeof content === "string") return content;
+	const parts = [];
+	for (const block of content) switch (block.type) {
+		case "text":
+			parts.push(block.text);
+			break;
+		case "tool_use":
+			parts.push(`[tool_use: ${block.name}]`, JSON.stringify(block.input));
+			break;
+		case "tool_result":
+			if (typeof block.content === "string") parts.push(block.content);
+			else if (Array.isArray(block.content)) {
+				for (const inner of block.content) if (inner.type === "text") parts.push(inner.text);
 			}
-			for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
-				consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
-				fixedMessages.push({
-					role: "tool",
-					tool_call_id: toolCall.id,
-					content: "Tool execution was interrupted or failed."
+			break;
+		case "thinking":
+			parts.push(block.thinking);
+			break;
+		default: break;
+	}
+	return parts.join("\n");
+}
+/**
+* Estimate tokens for a message (fast, synchronous).
+* Uses ~4 chars per token approximation for internal calculations.
+* The final result is verified with the accurate tokenizer.
+*/
+function estimateMessageTokens(msg) {
+	const text = contentToText(msg.content);
+	return Math.ceil(text.length / 4) + 4;
+}
+/**
+* Count tokens for an Anthropic message using the model's tokenizer.
+*/
+async function countMessageTokens(msg, model) {
+	const text = contentToText(msg.content);
+	return await countTextTokens(text, model) + 4;
+}
+/**
+* Count tokens for system prompt.
+*/
+async function countSystemTokens(system, model) {
+	if (!system) return 0;
+	if (typeof system === "string") return await countTextTokens(system, model) + 4;
+	const text = system.map((block) => block.text).join("\n");
+	return await countTextTokens(text, model) + 4;
+}
+/**
+* Count total tokens for the payload using the model's tokenizer.
+*/
+async function countTotalTokens(payload, model) {
+	let total = await countSystemTokens(payload.system, model);
+	for (const msg of payload.messages) total += await countMessageTokens(msg, model);
+	if (payload.tools) {
+		const toolsText = JSON.stringify(payload.tools);
+		total += await countTextTokens(toolsText, model);
+	}
+	return total;
+}
+function getMessageBytes(msg) {
+	return JSON.stringify(msg).length;
+}
+/**
+* Get tool_use IDs from an assistant message.
+*/
+function getToolUseIds(msg) {
+	if (msg.role !== "assistant") return [];
+	if (typeof msg.content === "string") return [];
+	const ids = [];
+	for (const block of msg.content) if (block.type === "tool_use") ids.push(block.id);
+	return ids;
+}
+/**
+* Get tool_result IDs from a user message.
+*/
+function getToolResultIds(msg) {
+	if (msg.role !== "user") return [];
+	if (typeof msg.content === "string") return [];
+	const ids = [];
+	for (const block of msg.content) if (block.type === "tool_result") ids.push(block.tool_use_id);
+	return ids;
+}
+/**
+* Filter orphaned tool_result messages (those without matching tool_use).
+*/
+function filterOrphanedToolResults(messages) {
+	const toolUseIds = /* @__PURE__ */ new Set();
+	for (const msg of messages) for (const id of getToolUseIds(msg)) toolUseIds.add(id);
+	const result = [];
+	let removedCount = 0;
+	for (const msg of messages) {
+		if (msg.role === "user" && typeof msg.content !== "string") {
+			if (getToolResultIds(msg).some((id) => !toolUseIds.has(id))) {
+				const filteredContent = msg.content.filter((block) => {
+					if (block.type === "tool_result" && !toolUseIds.has(block.tool_use_id)) {
+						removedCount++;
+						return false;
+					}
+					return true;
 				});
+				if (filteredContent.length === 0) continue;
+				result.push({
+					...msg,
+					content: filteredContent
+				});
+				continue;
 			}
 		}
+		result.push(msg);
 	}
-	return fixedMessages;
+	if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_result`);
+	return result;
+}
+/**
+* Filter orphaned tool_use messages (those without matching tool_result).
+* In Anthropic API, every tool_use must have a corresponding tool_result.
+*/
+function filterOrphanedToolUse(messages) {
+	const toolResultIds = /* @__PURE__ */ new Set();
+	for (const msg of messages) for (const id of getToolResultIds(msg)) toolResultIds.add(id);
+	const result = [];
+	let removedCount = 0;
+	for (const msg of messages) {
+		if (msg.role === "assistant" && typeof msg.content !== "string") {
+			if (getToolUseIds(msg).some((id) => !toolResultIds.has(id))) {
+				const filteredContent = msg.content.filter((block) => {
+					if (block.type === "tool_use" && !toolResultIds.has(block.id)) {
+						removedCount++;
+						return false;
+					}
+					return true;
+				});
+				if (filteredContent.length === 0) continue;
+				result.push({
+					...msg,
+					content: filteredContent
+				});
+				continue;
+			}
+		}
+		result.push(msg);
+	}
+	if (removedCount > 0) consola.debug(`[AutoTruncate:Anthropic] Filtered ${removedCount} orphaned tool_use`);
+	return result;
+}
+/**
+* Ensure messages start with a user message.
+*/
+function ensureStartsWithUser(messages) {
+	let startIndex = 0;
+	while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
+	if (startIndex > 0) consola.debug(`[AutoTruncate:Anthropic] Skipped ${startIndex} leading non-user messages`);
+	return messages.slice(startIndex);
+}
+/** Threshold for large tool_result content (bytes) */
+const LARGE_TOOL_RESULT_THRESHOLD = 1e4;
+/** Maximum length for compressed tool_result summary */
+const COMPRESSED_SUMMARY_LENGTH = 500;
+/**
+* Compress a large tool_result content to a summary.
+* Keeps the first and last portions with a note about truncation.
+*/
+function compressToolResultContent(content) {
+	if (content.length <= LARGE_TOOL_RESULT_THRESHOLD) return content;
+	const halfLen = Math.floor(COMPRESSED_SUMMARY_LENGTH / 2);
+	const start$1 = content.slice(0, halfLen);
+	const end = content.slice(-halfLen);
+	const removedChars = content.length - COMPRESSED_SUMMARY_LENGTH;
+	return `${start$1}\n\n[... ${removedChars.toLocaleString()} characters omitted for brevity ...]\n\n${end}`;
+}
+/**
+* Compress a tool_result block in an Anthropic message.
+*/
+function compressToolResultBlock(block) {
+	if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) return {
+		...block,
+		content: compressToolResultContent(block.content)
+	};
+	return block;
+}
+/**
+* Smart compression strategy:
+* 1. Calculate tokens/bytes from the end until reaching preservePercent of limit
+* 2. Messages before that threshold get their tool_results compressed
+* 3. Returns compressed messages and stats
+*
+* @param preservePercent - Percentage of context to preserve uncompressed (0.0-1.0)
+*/
+function smartCompressToolResults(messages, tokenLimit, byteLimit, preservePercent) {
+	const n = messages.length;
+	const cumTokens = Array.from({ length: n + 1 }, () => 0);
+	const cumBytes = Array.from({ length: n + 1 }, () => 0);
+	for (let i = n - 1; i >= 0; i--) {
+		const msg = messages[i];
+		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
+		cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
+	}
+	const preserveTokenLimit = Math.floor(tokenLimit * preservePercent);
+	const preserveByteLimit = Math.floor(byteLimit * preservePercent);
+	let thresholdIndex = n;
+	for (let i = n - 1; i >= 0; i--) {
+		if (cumTokens[i] > preserveTokenLimit || cumBytes[i] > preserveByteLimit) {
+			thresholdIndex = i + 1;
+			break;
+		}
+		thresholdIndex = i;
+	}
+	if (thresholdIndex >= n) return {
+		messages,
+		compressedCount: 0,
+		compressThresholdIndex: n
+	};
+	const result = [];
+	let compressedCount = 0;
+	for (const [i, msg] of messages.entries()) {
+		if (i < thresholdIndex && msg.role === "user" && Array.isArray(msg.content)) {
+			if (msg.content.some((block) => block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD)) {
+				const compressedContent = msg.content.map((block) => {
+					if (block.type === "tool_result" && typeof block.content === "string" && block.content.length > LARGE_TOOL_RESULT_THRESHOLD) {
+						compressedCount++;
+						return compressToolResultBlock(block);
+					}
+					return block;
+				});
+				result.push({
+					...msg,
+					content: compressedContent
+				});
+				continue;
+			}
+		}
+		result.push(msg);
+	}
+	return {
+		messages: result,
+		compressedCount,
+		compressThresholdIndex: thresholdIndex
+	};
+}
+/** Default fallback for when model capabilities are not available */
+const DEFAULT_CONTEXT_WINDOW = 2e5;
+function calculateLimits(model, config) {
+	const rawTokenLimit = getEffectiveTokenLimit(model.id) ?? model.capabilities?.limits?.max_context_window_tokens ?? model.capabilities?.limits?.max_prompt_tokens ?? DEFAULT_CONTEXT_WINDOW;
+	const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
+	const byteLimit = getEffectiveByteLimitBytes();
+	return {
+		tokenLimit,
+		byteLimit
+	};
+}
+function findOptimalPreserveIndex(params) {
+	const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
+	if (messages.length === 0) return 0;
+	const markerBytes = 200;
+	const availableTokens = tokenLimit - systemTokens - 50;
+	const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
+	if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
+	const n = messages.length;
+	const cumTokens = Array.from({ length: n + 1 }, () => 0);
+	const cumBytes = Array.from({ length: n + 1 }, () => 0);
+	for (let i = n - 1; i >= 0; i--) {
+		const msg = messages[i];
+		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
+		cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
+	}
+	let left = 0;
+	let right = n;
+	while (left < right) {
+		const mid = left + right >>> 1;
+		if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
+		else left = mid + 1;
+	}
+	return left;
+}
+/**
+* Generate a summary of removed messages for context.
+* Extracts key information like tool calls and topics.
+*/
+function generateRemovedMessagesSummary(removedMessages) {
+	const toolCalls = [];
+	let userMessageCount = 0;
+	let assistantMessageCount = 0;
+	for (const msg of removedMessages) {
+		if (msg.role === "user") userMessageCount++;
+		else assistantMessageCount++;
+		if (Array.isArray(msg.content)) {
+			for (const block of msg.content) if (block.type === "tool_use") toolCalls.push(block.name);
+		}
+	}
+	const parts = [];
+	if (userMessageCount > 0 || assistantMessageCount > 0) {
+		const breakdown = [];
+		if (userMessageCount > 0) breakdown.push(`${userMessageCount} user`);
+		if (assistantMessageCount > 0) breakdown.push(`${assistantMessageCount} assistant`);
+		parts.push(`Messages: ${breakdown.join(", ")}`);
+	}
+	if (toolCalls.length > 0) {
+		const uniqueTools = [...new Set(toolCalls)];
+		const displayTools = uniqueTools.length > 5 ? [...uniqueTools.slice(0, 5), `+${uniqueTools.length - 5} more`] : uniqueTools;
+		parts.push(`Tools used: ${displayTools.join(", ")}`);
+	}
+	return parts.join(". ");
+}
+/**
+* Add a compression notice to the system prompt.
+* Informs the model that some tool_result content has been compressed.
+*/
+function addCompressionNotice(payload, compressedCount) {
+	const notice = `[CONTEXT NOTE]\n${compressedCount} large tool_result blocks have been compressed to reduce context size.\nThe compressed results show the beginning and end of the content with an omission marker.\nIf you need the full content, you can re-read the file or re-run the tool.\n[END NOTE]\n\n`;
+	let newSystem;
+	if (typeof payload.system === "string") newSystem = notice + payload.system;
+	else if (Array.isArray(payload.system)) newSystem = [{
+		type: "text",
+		text: notice
+	}, ...payload.system];
+	else newSystem = notice;
+	return {
+		...payload,
+		system: newSystem
+	};
+}
+/**
+* Create truncation context to prepend to system prompt.
+*/
+function createTruncationSystemContext(removedCount, compressedCount, summary) {
+	let context = `[CONVERSATION CONTEXT]\n`;
+	if (removedCount > 0) context += `${removedCount} earlier messages have been removed due to context window limits.\n`;
+	if (compressedCount > 0) context += `${compressedCount} large tool_result blocks have been compressed.\n`;
+	if (summary) context += `Summary of removed content: ${summary}\n`;
+	context += "If you need earlier context, ask the user or check available tools for conversation history access.\n[END CONTEXT]\n\n";
+	return context;
+}
+/**
+* Create a truncation marker message (fallback when no system prompt).
+*/
+function createTruncationMarker$1(removedCount, compressedCount, summary) {
+	const parts = [];
+	if (removedCount > 0) parts.push(`${removedCount} earlier messages removed`);
+	if (compressedCount > 0) parts.push(`${compressedCount} tool_result blocks compressed`);
+	let content = `[CONTEXT MODIFIED: ${parts.join(", ")} to fit context limits]`;
+	if (summary) content += `\n[Summary: ${summary}]`;
+	return {
+		role: "user",
+		content
+	};
+}
+/**
+* Perform auto-truncation on an Anthropic payload that exceeds limits.
+*/
+async function autoTruncateAnthropic(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_AUTO_TRUNCATE_CONFIG,
+		...config
+	};
+	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
+	const originalBytes = JSON.stringify(payload).length;
+	const originalTokens = await countTotalTokens(payload, model);
+	if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
+		payload,
+		wasCompacted: false,
+		originalTokens,
+		compactedTokens: originalTokens,
+		removedMessageCount: 0
+	};
+	const exceedsTokens = originalTokens > tokenLimit;
+	const exceedsBytes = originalBytes > byteLimit;
+	let workingMessages = payload.messages;
+	let compressedCount = 0;
+	if (state.compressToolResults) {
+		const compressionResult = smartCompressToolResults(payload.messages, tokenLimit, byteLimit, cfg.preserveRecentPercent);
+		workingMessages = compressionResult.messages;
+		compressedCount = compressionResult.compressedCount;
+		const compressedPayload = {
+			...payload,
+			messages: workingMessages
+		};
+		const compressedBytes = JSON.stringify(compressedPayload).length;
+		const compressedTokens = await countTotalTokens(compressedPayload, model);
+		if (compressedTokens <= tokenLimit && compressedBytes <= byteLimit) {
+			let reason$1 = "tokens";
+			if (exceedsTokens && exceedsBytes) reason$1 = "tokens+size";
+			else if (exceedsBytes) reason$1 = "size";
+			consola.info(`[AutoTruncate:Anthropic] ${reason$1}: ${originalTokens}→${compressedTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(compressedBytes / 1024)}KB (compressed ${compressedCount} tool_results)`);
+			const noticePayload = addCompressionNotice(compressedPayload, compressedCount);
+			return {
+				payload: noticePayload,
+				wasCompacted: true,
+				originalTokens,
+				compactedTokens: await countTotalTokens(noticePayload, model),
+				removedMessageCount: 0
+			};
+		}
+	}
+	const systemBytes = payload.system ? JSON.stringify(payload.system).length : 0;
+	const systemTokens = await countSystemTokens(payload.system, model);
+	const messagesJson = JSON.stringify(workingMessages);
+	const payloadOverhead = JSON.stringify({
+		...payload,
+		messages: workingMessages
+	}).length - messagesJson.length;
+	consola.debug(`[AutoTruncate:Anthropic] overhead=${Math.round(payloadOverhead / 1024)}KB, system=${Math.round(systemBytes / 1024)}KB`);
+	const preserveIndex = findOptimalPreserveIndex({
+		messages: workingMessages,
+		systemBytes,
+		systemTokens,
+		payloadOverhead,
+		tokenLimit,
+		byteLimit
+	});
+	if (preserveIndex === 0) {
+		consola.warn("[AutoTruncate:Anthropic] Cannot truncate, system messages too large");
+		return {
+			payload,
+			wasCompacted: false,
+			originalTokens,
+			compactedTokens: originalTokens,
+			removedMessageCount: 0
+		};
+	}
+	if (preserveIndex >= workingMessages.length) {
+		consola.warn("[AutoTruncate:Anthropic] Would need to remove all messages");
+		return {
+			payload,
+			wasCompacted: false,
+			originalTokens,
+			compactedTokens: originalTokens,
+			removedMessageCount: 0
+		};
+	}
+	let preserved = workingMessages.slice(preserveIndex);
+	preserved = filterOrphanedToolResults(preserved);
+	preserved = filterOrphanedToolUse(preserved);
+	preserved = ensureStartsWithUser(preserved);
+	preserved = filterOrphanedToolResults(preserved);
+	preserved = filterOrphanedToolUse(preserved);
+	if (preserved.length === 0) {
+		consola.warn("[AutoTruncate:Anthropic] All messages filtered out after cleanup");
+		return {
+			payload,
+			wasCompacted: false,
+			originalTokens,
+			compactedTokens: originalTokens,
+			removedMessageCount: 0
+		};
+	}
+	const removedMessages = payload.messages.slice(0, preserveIndex);
+	const removedCount = workingMessages.length - preserved.length;
+	const summary = generateRemovedMessagesSummary(removedMessages);
+	let newSystem = payload.system;
+	let newMessages = preserved;
+	if (payload.system !== void 0) {
+		const truncationContext = createTruncationSystemContext(removedCount, compressedCount, summary);
+		if (typeof payload.system === "string") newSystem = truncationContext + payload.system;
+		else if (Array.isArray(payload.system)) newSystem = [{
+			type: "text",
+			text: truncationContext
+		}, ...payload.system];
+	} else newMessages = [createTruncationMarker$1(removedCount, compressedCount, summary), ...preserved];
+	const newPayload = {
+		...payload,
+		system: newSystem,
+		messages: newMessages
+	};
+	const newBytes = JSON.stringify(newPayload).length;
+	const newTokens = await countTotalTokens(newPayload, model);
+	let reason = "tokens";
+	if (exceedsTokens && exceedsBytes) reason = "tokens+size";
+	else if (exceedsBytes) reason = "size";
+	const actions = [];
+	if (removedCount > 0) actions.push(`removed ${removedCount} msgs`);
+	if (compressedCount > 0) actions.push(`compressed ${compressedCount} tool_results`);
+	const actionInfo = actions.length > 0 ? ` (${actions.join(", ")})` : "";
+	consola.info(`[AutoTruncate:Anthropic] ${reason}: ${originalTokens}→${newTokens} tokens, ${Math.round(originalBytes / 1024)}→${Math.round(newBytes / 1024)}KB${actionInfo}`);
+	if (newBytes > byteLimit || newTokens > tokenLimit) consola.warn(`[AutoTruncate:Anthropic] Result still over limit (${newTokens} tokens, ${Math.round(newBytes / 1024)}KB)`);
+	return {
+		payload: newPayload,
+		wasCompacted: true,
+		originalTokens,
+		compactedTokens: newTokens,
+		removedMessageCount: removedCount
+	};
+}
+/**
+* Check if payload needs compaction.
+*/
+async function checkNeedsCompactionAnthropic(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_AUTO_TRUNCATE_CONFIG,
+		...config
+	};
+	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
+	const currentTokens = await countTotalTokens(payload, model);
+	const currentBytes = JSON.stringify(payload).length;
+	const exceedsTokens = currentTokens > tokenLimit;
+	const exceedsBytes = currentBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "both";
+	else if (exceedsTokens) reason = "tokens";
+	else if (exceedsBytes) reason = "bytes";
+	return {
+		needed: exceedsTokens || exceedsBytes,
+		currentTokens,
+		tokenLimit,
+		currentBytes,
+		byteLimit,
+		reason
+	};
+}
+//#endregion
+//#region src/routes/messages/message-utils.ts
+function convertAnthropicMessages(messages) {
+	return messages.map((msg) => {
+		if (typeof msg.content === "string") return {
+			role: msg.role,
+			content: msg.content
+		};
+		const content = msg.content.map((block) => {
+			if (block.type === "text") return {
+				type: "text",
+				text: block.text
+			};
+			if (block.type === "tool_use") return {
+				type: "tool_use",
+				id: block.id,
+				name: block.name,
+				input: JSON.stringify(block.input)
+			};
+			if (block.type === "tool_result") {
+				const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
+				return {
+					type: "tool_result",
+					tool_use_id: block.tool_use_id,
+					content: resultContent
+				};
+			}
+			return { type: block.type };
+		});
+		return {
+			role: msg.role,
+			content
+		};
+	});
+}
+function extractSystemPrompt(system) {
+	if (!system) return void 0;
+	if (typeof system === "string") return system;
+	return system.map((block) => block.text).join("\n");
+}
+function extractToolCallsFromContent(content) {
+	const tools = [];
+	for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
+		id: String(block.id),
+		name: String(block.name),
+		input: JSON.stringify(block.input)
+	});
+	return tools.length > 0 ? tools : void 0;
+}
+function extractToolCallsFromAnthropicContent(content) {
+	const tools = [];
+	for (const block of content) if (block.type === "tool_use") tools.push({
+		id: block.id,
+		name: block.name,
+		input: JSON.stringify(block.input)
+	});
+	return tools.length > 0 ? tools : void 0;
+}
+function mapOpenAIStopReasonToAnthropic(finishReason) {
+	if (finishReason === null) return null;
+	return {
+		stop: "end_turn",
+		length: "max_tokens",
+		tool_calls: "tool_use",
+		content_filter: "end_turn"
+	}[finishReason];
+}
+//#endregion
+//#region src/routes/messages/non-stream-translation.ts
+const OPENAI_TOOL_NAME_LIMIT = 64;
+/**
+* Ensure all tool_use blocks have corresponding tool_result responses.
+* This handles edge cases where conversation history may be incomplete:
+* - Session interruptions where tool execution was cut off
+* - Previous request failures
+* - Client sending truncated history
+*
+* Adding placeholder responses prevents API errors and maintains protocol compliance.
+*/
+function fixMessageSequence(messages) {
+	const fixedMessages = [];
+	for (let i = 0; i < messages.length; i++) {
+		const message = messages[i];
+		fixedMessages.push(message);
+		if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) {
+			const foundToolResponses = /* @__PURE__ */ new Set();
+			let j = i + 1;
+			while (j < messages.length && messages[j].role === "tool") {
+				const toolMessage = messages[j];
+				if (toolMessage.tool_call_id) foundToolResponses.add(toolMessage.tool_call_id);
+				j++;
+			}
+			for (const toolCall of message.tool_calls) if (!foundToolResponses.has(toolCall.id)) {
+				consola.debug(`Adding placeholder tool_result for ${toolCall.id}`);
+				fixedMessages.push({
+					role: "tool",
+					tool_call_id: toolCall.id,
+					content: "Tool execution was interrupted or failed."
+				});
+			}
+		}
+	}
+	return fixedMessages;
 }
 function translateToOpenAI(payload) {
 	const toolNameMapping = {
@@ -4140,19 +5100,50 @@ function translateToOpenAI(payload) {
 		toolNameMapping
 	};
 }
+/**
+* Find the latest available model matching a family prefix.
+* Searches state.models for models starting with the given prefix
+* and returns the one with the highest version number.
+*
+* @param familyPrefix - e.g., "claude-opus", "claude-sonnet", "claude-haiku"
+* @param fallback - fallback model ID if no match found
+*/
+function findLatestModel(familyPrefix, fallback) {
+	const models = state.models?.data;
+	if (!models || models.length === 0) return fallback;
+	const candidates = models.filter((m) => m.id.startsWith(familyPrefix));
+	if (candidates.length === 0) return fallback;
+	candidates.sort((a, b) => {
+		const versionA = extractVersion(a.id, familyPrefix);
+		return extractVersion(b.id, familyPrefix) - versionA;
+	});
+	return candidates[0].id;
+}
+/**
+* Extract numeric version from model ID.
+* e.g., "claude-opus-4.5" with prefix "claude-opus" -> 4.5
+*/
+function extractVersion(modelId, prefix) {
+	const match = modelId.slice(prefix.length + 1).match(/^(\d+(?:\.\d+)?)/);
+	return match ? Number.parseFloat(match[1]) : 0;
+}
 function translateModelName(model) {
-	const shortNameMap = {
-		opus: "claude-opus-4.5",
-		sonnet: "claude-sonnet-4.5",
-		haiku: "claude-haiku-4.5"
+	const aliasMap = {
+		opus: "claude-opus",
+		sonnet: "claude-sonnet",
+		haiku: "claude-haiku"
 	};
-	if (shortNameMap[model]) return shortNameMap[model];
+	if (aliasMap[model]) {
+		const familyPrefix = aliasMap[model];
+		const fallback = `${familyPrefix}-4.5`;
+		return findLatestModel(familyPrefix, fallback);
+	}
 	if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
 	if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
 	if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
-	if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
+	if (/^claude-opus-4-\d+$/.test(model)) return findLatestModel("claude-opus", "claude-opus-4.5");
 	if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
-	if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
+	if (/^claude-haiku-3-5-\d+$/.test(model)) return findLatestModel("claude-haiku", "claude-haiku-4.5");
 	return model;
 }
 function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
@@ -4160,7 +5151,7 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
 	const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
 	return [...systemMessages, ...otherMessages];
 }
-const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
+const RESERVED_KEYWORDS = ["x-anthropic-billing-header", "x-anthropic-billing"];
 /**
 * Filter out reserved keywords from system prompt text.
 * Copilot API rejects requests containing these keywords.
@@ -4284,7 +5275,7 @@ function translateAnthropicToolsToOpenAI(anthropicTools, toolNameMapping) {
 		function: {
 			name: getTruncatedToolName(tool.name, toolNameMapping),
 			description: tool.description,
-			parameters: tool.input_schema
+			parameters: tool.input_schema ?? {}
 		}
 	}));
 }
@@ -4385,7 +5376,13 @@ function getAnthropicToolUseBlocks(toolCalls, toolNameMapping) {
 //#endregion
 //#region src/routes/messages/count-tokens-handler.ts
 /**
-* Handles token counting for Anthropic messages
+* Handles token counting for Anthropic messages.
+*
+* For Anthropic models (vendor === "Anthropic"), uses the official Anthropic tokenizer.
+* For other models, uses GPT tokenizers with appropriate buffers.
+*
+* When auto-truncate is enabled and the request would exceed limits,
+* returns an inflated token count to trigger Claude Code's auto-compact mechanism.
 */
 async function handleCountTokens(c) {
 	try {
@@ -4397,6 +5394,16 @@ async function handleCountTokens(c) {
 			consola.warn("Model not found, returning default token count");
 			return c.json({ input_tokens: 1 });
 		}
+		if (state.autoTruncate) {
+			const truncateCheck = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
+			if (truncateCheck.needed) {
+				const contextWindow = selectedModel.capabilities?.limits?.max_context_window_tokens ?? 2e5;
+				const inflatedTokens = Math.floor(contextWindow * .95);
+				consola.debug(`[count_tokens] Would trigger auto-truncate: ${truncateCheck.currentTokens} tokens > ${truncateCheck.tokenLimit}, returning inflated count: ${inflatedTokens}`);
+				return c.json({ input_tokens: inflatedTokens });
+			}
+		}
+		const tokenizerName = selectedModel.capabilities?.tokenizer ?? "o200k_base";
 		const tokenCount = await getTokenCount(openAIPayload, selectedModel);
 		if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
 			let mcpToolExist = false;
@@ -4407,9 +5414,8 @@ async function handleCountTokens(c) {
 			}
 		}
 		let finalTokenCount = tokenCount.input + tokenCount.output;
-		if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
-		else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
-		consola.debug("Token count:", finalTokenCount);
+		if (!(selectedModel.vendor === "Anthropic")) finalTokenCount = anthropicPayload.model.startsWith("grok") ? Math.round(finalTokenCount * 1.03) : Math.round(finalTokenCount * 1.05);
+		consola.debug(`Token count: ${finalTokenCount} (tokenizer: ${tokenizerName})`);
 		return c.json({ input_tokens: finalTokenCount });
 	} catch (error) {
 		consola.error("Error counting tokens:", error);
@@ -4417,6 +5423,262 @@ async function handleCountTokens(c) {
 	}
 }
+//#endregion
+//#region src/services/copilot/create-anthropic-messages.ts
+/**
+* Fields that are supported by Copilot's Anthropic API endpoint.
+* Any other fields in the incoming request will be stripped.
+*/
+const COPILOT_SUPPORTED_FIELDS = new Set([
+	"model",
+	"messages",
+	"max_tokens",
+	"system",
+	"metadata",
+	"stop_sequences",
+	"stream",
+	"temperature",
+	"top_p",
+	"top_k",
+	"tools",
+	"tool_choice",
+	"thinking",
+	"service_tier"
+]);
+/**
+* Filter payload to only include fields supported by Copilot's Anthropic API.
+* This prevents errors like "Extra inputs are not permitted" for unsupported
+* fields like `output_config`.
+*
+* Also converts server-side tools (web_search, etc.) to custom tools.
+*/
+function filterPayloadForCopilot(payload) {
+	const filtered = {};
+	const unsupportedFields = [];
+	for (const [key, value] of Object.entries(payload)) if (COPILOT_SUPPORTED_FIELDS.has(key)) filtered[key] = value;
+	else unsupportedFields.push(key);
+	if (unsupportedFields.length > 0) consola.debug(`[DirectAnthropic] Filtered unsupported fields: ${unsupportedFields.join(", ")}`);
+	if (filtered.tools) filtered.tools = convertServerToolsToCustom(filtered.tools);
+	return filtered;
+}
+/**
+* Adjust max_tokens if thinking is enabled.
+* According to Anthropic docs, max_tokens must be greater than thinking.budget_tokens.
+* max_tokens = thinking_budget + response_tokens
+*/
+function adjustMaxTokensForThinking(payload) {
+	const thinking = payload.thinking;
+	if (!thinking) return payload;
+	const budgetTokens = thinking.budget_tokens;
+	if (!budgetTokens) return payload;
+	if (payload.max_tokens <= budgetTokens) {
+		const newMaxTokens = budgetTokens + Math.min(16384, budgetTokens);
+		consola.debug(`[DirectAnthropic] Adjusted max_tokens: ${payload.max_tokens} → ${newMaxTokens} (thinking.budget_tokens=${budgetTokens})`);
+		return {
+			...payload,
+			max_tokens: newMaxTokens
+		};
+	}
+	return payload;
+}
+/**
+* Create messages using Anthropic-style API directly.
+* This bypasses the OpenAI translation layer for Anthropic models.
+*/
+async function createAnthropicMessages(payload) {
+	if (!state.copilotToken) throw new Error("Copilot token not found");
+	let filteredPayload = filterPayloadForCopilot(payload);
+	filteredPayload = adjustMaxTokensForThinking(filteredPayload);
+	const enableVision = filteredPayload.messages.some((msg) => {
+		if (typeof msg.content === "string") return false;
+		return msg.content.some((block) => block.type === "image");
+	});
+	const isAgentCall = filteredPayload.messages.some((msg) => msg.role === "assistant");
+	const headers = {
+		...copilotHeaders(state, enableVision),
+		"X-Initiator": isAgentCall ? "agent" : "user",
+		"anthropic-version": "2023-06-01"
+	};
+	consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
+	const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
+		method: "POST",
+		headers,
+		body: JSON.stringify(filteredPayload)
+	});
+	if (!response.ok) {
+		consola.debug("Request failed:", {
+			model: filteredPayload.model,
+			max_tokens: filteredPayload.max_tokens,
+			stream: filteredPayload.stream,
+			tools: filteredPayload.tools?.map((t) => ({
+				name: t.name,
+				type: t.type
+			})),
+			thinking: filteredPayload.thinking,
+			messageCount: filteredPayload.messages.length
+		});
+		throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, filteredPayload.model);
+	}
+	if (payload.stream) return events(response);
+	return await response.json();
+}
+const SERVER_TOOL_CONFIGS = {
+	web_search: {
+		description: "Search the web for current information. Returns web search results that can help answer questions about recent events, current data, or information that may have changed since your knowledge cutoff.",
+		input_schema: {
+			type: "object",
+			properties: { query: {
+				type: "string",
+				description: "The search query"
+			} },
+			required: ["query"]
+		}
+	},
+	web_fetch: {
+		description: "Fetch content from a URL. NOTE: This is a client-side tool - the client must fetch the URL and return the content.",
+		input_schema: {
+			type: "object",
+			properties: { url: {
+				type: "string",
+				description: "The URL to fetch"
+			} },
+			required: ["url"]
+		}
+	},
+	code_execution: {
+		description: "Execute code in a sandbox. NOTE: This is a client-side tool - the client must execute the code.",
+		input_schema: {
+			type: "object",
+			properties: {
+				code: {
+					type: "string",
+					description: "The code to execute"
+				},
+				language: {
+					type: "string",
+					description: "The programming language"
+				}
+			},
+			required: ["code"]
+		}
+	},
+	computer: {
+		description: "Control computer desktop. NOTE: This is a client-side tool - the client must handle computer control.",
+		input_schema: {
+			type: "object",
+			properties: { action: {
+				type: "string",
+				description: "The action to perform"
+			} },
+			required: ["action"]
+		}
+	}
+};
+/**
+* Check if a tool is a server-side tool that needs conversion.
+*/
+function getServerToolPrefix(tool) {
+	if (tool.type) {
+		for (const prefix of Object.keys(SERVER_TOOL_CONFIGS)) if (tool.type.startsWith(prefix)) return prefix;
+	}
+	return null;
+}
+/**
+* Convert server-side tools to custom tools, or pass them through unchanged.
+* This allows them to be passed to the API and handled by the client.
+*
+* Note: Server-side tools are only converted if state.rewriteAnthropicTools is enabled.
+*/
+function convertServerToolsToCustom(tools) {
+	if (!tools) return;
+	const result = [];
+	for (const tool of tools) {
+		const serverToolPrefix = getServerToolPrefix(tool);
+		if (serverToolPrefix) {
+			const config = SERVER_TOOL_CONFIGS[serverToolPrefix];
+			if (!state.rewriteAnthropicTools) {
+				consola.debug(`[DirectAnthropic] Passing ${serverToolPrefix} through unchanged (use --rewrite-anthropic-tools to convert)`);
+				result.push(tool);
+				continue;
+			}
+			if (config.remove) {
+				consola.warn(`[DirectAnthropic] Removing unsupported server tool: ${tool.name}. Reason: ${config.removalReason}`);
+				continue;
+			}
+			consola.debug(`[DirectAnthropic] Converting server tool to custom: ${tool.name} (type: ${tool.type})`);
+			result.push({
+				name: tool.name,
+				description: config.description,
+				input_schema: config.input_schema
+			});
+		} else result.push(tool);
+	}
+	return result.length > 0 ? result : void 0;
+}
+/**
+* Check if a model supports direct Anthropic API.
+* Returns true if redirect is disabled (direct API is on) and the model is from Anthropic vendor.
+*/
+function supportsDirectAnthropicApi(modelId) {
+	if (state.redirectAnthropic) return false;
+	return (state.models?.data.find((m) => m.id === modelId))?.vendor === "Anthropic";
+}
+//#endregion
+//#region src/routes/messages/stream-accumulator.ts
+function createAnthropicStreamAccumulator() {
+	return {
+		model: "",
+		inputTokens: 0,
+		outputTokens: 0,
+		stopReason: "",
+		content: "",
+		toolCalls: [],
+		currentToolCall: null
+	};
+}
+function processAnthropicEvent(event, acc) {
+	switch (event.type) {
+		case "content_block_delta":
+			handleContentBlockDelta(event.delta, acc);
+			break;
+		case "content_block_start":
+			handleContentBlockStart(event.content_block, acc);
+			break;
+		case "content_block_stop":
+			handleContentBlockStop(acc);
+			break;
+		case "message_delta":
+			handleMessageDelta(event.delta, event.usage, acc);
+			break;
+		default: break;
+	}
+}
+function handleContentBlockDelta(delta, acc) {
+	if (delta.type === "text_delta") acc.content += delta.text;
+	else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
+}
+function handleContentBlockStart(block, acc) {
+	if (block.type === "tool_use") acc.currentToolCall = {
+		id: block.id,
+		name: block.name,
+		input: ""
+	};
+}
+function handleContentBlockStop(acc) {
+	if (acc.currentToolCall) {
+		acc.toolCalls.push(acc.currentToolCall);
+		acc.currentToolCall = null;
+	}
+}
+function handleMessageDelta(delta, usage, acc) {
+	if (delta.stop_reason) acc.stopReason = delta.stop_reason;
+	if (usage) {
+		acc.inputTokens = usage.input_tokens ?? 0;
+		acc.outputTokens = usage.output_tokens;
+	}
+}
 //#endregion
 //#region src/routes/messages/stream-translation.ts
 function isToolBlockOpen(state$1) {
@@ -4522,68 +5784,244 @@ function translateChunkToAnthropicEvents(chunk, state$1, toolNameMapping) {
 			});
 		}
 	}
-	if (choice.finish_reason) {
-		if (state$1.contentBlockOpen) {
-			events$1.push({
-				type: "content_block_stop",
-				index: state$1.contentBlockIndex
+	if (choice.finish_reason) {
+		if (state$1.contentBlockOpen) {
+			events$1.push({
+				type: "content_block_stop",
+				index: state$1.contentBlockIndex
+			});
+			state$1.contentBlockOpen = false;
+		}
+		events$1.push({
+			type: "message_delta",
+			delta: {
+				stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+				stop_sequence: null
+			},
+			usage: {
+				input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
+				output_tokens: chunk.usage?.completion_tokens ?? 0,
+				...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
+			}
+		}, { type: "message_stop" });
+	}
+	return events$1;
+}
+function translateErrorToAnthropicErrorEvent() {
+	return {
+		type: "error",
+		error: {
+			type: "api_error",
+			message: "An unexpected error occurred during streaming."
+		}
+	};
+}
+//#endregion
+//#region src/routes/messages/direct-anthropic-handler.ts
+/**
+* Handle completion using direct Anthropic API (no translation needed)
+*/
+async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
+	consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
+	const selectedModel = state.models?.data.find((m) => m.id === anthropicPayload.model);
+	let effectivePayload = anthropicPayload;
+	let truncateResult;
+	if (state.autoTruncate && selectedModel) {
+		const check = await checkNeedsCompactionAnthropic(anthropicPayload, selectedModel);
+		consola.debug(`[Anthropic] Auto-truncate check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
+		if (check.needed) try {
+			truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel);
+			if (truncateResult.wasCompacted) effectivePayload = truncateResult.payload;
+		} catch (error) {
+			consola.warn("[Anthropic] Auto-truncate failed, proceeding with original payload:", error instanceof Error ? error.message : error);
+		}
+	} else if (state.autoTruncate && !selectedModel) consola.debug(`[Anthropic] Model '${anthropicPayload.model}' not found, skipping auto-truncate`);
+	if (state.manualApprove) await awaitApproval();
+	try {
+		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
+		ctx.queueWaitMs = queueWaitMs;
+		if (Symbol.asyncIterator in response) {
+			consola.debug("Streaming response from Copilot (direct Anthropic)");
+			updateTrackerStatus(ctx.trackingId, "streaming");
+			return streamSSE(c, async (stream) => {
+				await handleDirectAnthropicStreamingResponse({
+					stream,
+					response,
+					anthropicPayload: effectivePayload,
+					ctx
+				});
+			});
+		}
+		return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
+	} catch (error) {
+		if (error instanceof HTTPError && error.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
+		recordErrorResponse(ctx, anthropicPayload.model, error);
+		throw error;
+	}
+}
+/**
+* Log payload size info for debugging 413 errors
+*/
+function logPayloadSizeInfoAnthropic(payload, model) {
+	const payloadSize = JSON.stringify(payload).length;
+	const messageCount = payload.messages.length;
+	const toolCount = payload.tools?.length ?? 0;
+	const systemSize = payload.system ? JSON.stringify(payload.system).length : 0;
+	consola.info(`[Anthropic 413] Payload size: ${Math.round(payloadSize / 1024)}KB, messages: ${messageCount}, tools: ${toolCount}, system: ${Math.round(systemSize / 1024)}KB`);
+	if (model?.capabilities?.limits) {
+		const limits = model.capabilities.limits;
+		consola.info(`[Anthropic 413] Model limits: context=${limits.max_context_window_tokens}, prompt=${limits.max_prompt_tokens}, output=${limits.max_output_tokens}`);
+	}
+	if (!state.autoTruncate) consola.info("[Anthropic 413] Consider enabling --auto-truncate to automatically reduce payload size");
+}
+/**
+* Handle non-streaming direct Anthropic response
+*/
+function handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult) {
+	consola.debug("Non-streaming response from Copilot (direct Anthropic):", JSON.stringify(response).slice(-400));
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: response.model,
+		usage: response.usage,
+		stop_reason: response.stop_reason ?? void 0,
+		content: {
+			role: "assistant",
+			content: response.content.map((block) => {
+				switch (block.type) {
+					case "text": return {
+						type: "text",
+						text: block.text
+					};
+					case "tool_use": return {
+						type: "tool_use",
+						id: block.id,
+						name: block.name,
+						input: JSON.stringify(block.input)
+					};
+					case "thinking": return {
+						type: "thinking",
+						thinking: block.thinking
+					};
+					default: return { type: block.type };
+				}
+			})
+		},
+		toolCalls: extractToolCallsFromAnthropicContent(response.content)
+	}, Date.now() - ctx.startTime);
+	if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
+		inputTokens: response.usage.input_tokens,
+		outputTokens: response.usage.output_tokens,
+		queueWaitMs: ctx.queueWaitMs
+	});
+	let finalResponse = response;
+	if (state.verbose && truncateResult?.wasCompacted) {
+		const marker = createTruncationMarker(truncateResult);
+		finalResponse = prependMarkerToAnthropicResponse$1(response, marker);
+	}
+	return c.json(finalResponse);
+}
+/**
+* Prepend marker to Anthropic response content (at the beginning of first text block)
+*/
+function prependMarkerToAnthropicResponse$1(response, marker) {
+	if (!marker) return response;
+	const content = [...response.content];
+	const firstTextIndex = content.findIndex((block) => block.type === "text");
+	if (firstTextIndex !== -1) {
+		const textBlock = content[firstTextIndex];
+		if (textBlock.type === "text") content[firstTextIndex] = {
+			...textBlock,
+			text: marker + textBlock.text
+		};
+	} else content.unshift({
+		type: "text",
+		text: marker
+	});
+	return {
+		...response,
+		content
+	};
+}
+/**
+* Handle streaming direct Anthropic response (passthrough SSE events)
+*/
+async function handleDirectAnthropicStreamingResponse(opts) {
+	const { stream, response, anthropicPayload, ctx } = opts;
+	const acc = createAnthropicStreamAccumulator();
+	try {
+		for await (const rawEvent of response) {
+			consola.debug("Direct Anthropic raw stream event:", JSON.stringify(rawEvent));
+			if (rawEvent.data === "[DONE]") break;
+			if (!rawEvent.data) continue;
+			let event;
+			try {
+				event = JSON.parse(rawEvent.data);
+			} catch (parseError) {
+				consola.error("Failed to parse Anthropic stream event:", parseError, rawEvent.data);
+				continue;
+			}
+			processAnthropicEvent(event, acc);
+			await stream.writeSSE({
+				event: rawEvent.event || event.type,
+				data: rawEvent.data
 			});
-			state$1.contentBlockOpen = false;
 		}
-		events$1.push({
-			type: "message_delta",
-			delta: {
-				stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
-				stop_sequence: null
-			},
-			usage: {
-				input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
-				output_tokens: chunk.usage?.completion_tokens ?? 0,
-				...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
-			}
-		}, { type: "message_stop" });
+		recordStreamingResponse$1(acc, anthropicPayload.model, ctx);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
+	} catch (error) {
+		consola.error("Direct Anthropic stream error:", error);
+		recordStreamError({
+			acc,
+			fallbackModel: anthropicPayload.model,
+			ctx,
+			error
+		});
+		failTracking(ctx.trackingId, error);
+		const errorEvent = translateErrorToAnthropicErrorEvent();
+		await stream.writeSSE({
+			event: errorEvent.type,
+			data: JSON.stringify(errorEvent)
+		});
 	}
-	return events$1;
 }
-function translateErrorToAnthropicErrorEvent() {
-	return {
-		type: "error",
-		error: {
-			type: "api_error",
-			message: "An unexpected error occurred during streaming."
-		}
-	};
+function recordStreamingResponse$1(acc, fallbackModel, ctx) {
+	const contentBlocks = [];
+	if (acc.content) contentBlocks.push({
+		type: "text",
+		text: acc.content
+	});
+	for (const tc of acc.toolCalls) contentBlocks.push({
+		type: "tool_use",
+		...tc
+	});
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: acc.model || fallbackModel,
+		usage: {
+			input_tokens: acc.inputTokens,
+			output_tokens: acc.outputTokens
+		},
+		stop_reason: acc.stopReason || void 0,
+		content: contentBlocks.length > 0 ? {
+			role: "assistant",
+			content: contentBlocks
+		} : null,
+		toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
+	}, Date.now() - ctx.startTime);
 }
 //#endregion
-//#region src/routes/messages/handler.ts
-async function handleCompletion(c) {
-	const anthropicPayload = await c.req.json();
-	consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
-	const trackingId = c.get("trackingId");
-	const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
-	updateTrackerModel(trackingId, anthropicPayload.model);
-	const ctx = {
-		historyId: recordRequest("anthropic", {
-			model: anthropicPayload.model,
-			messages: convertAnthropicMessages(anthropicPayload.messages),
-			stream: anthropicPayload.stream ?? false,
-			tools: anthropicPayload.tools?.map((t) => ({
-				name: t.name,
-				description: t.description
-			})),
-			max_tokens: anthropicPayload.max_tokens,
-			temperature: anthropicPayload.temperature,
-			system: extractSystemPrompt(anthropicPayload.system)
-		}),
-		trackingId,
-		startTime
-	};
+//#region src/routes/messages/translated-handler.ts
+/**
+* Handle completion using OpenAI translation path (legacy)
+*/
+async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
 	const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
 	consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
 	const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
-	const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
-	if (compactResult) ctx.compactResult = compactResult;
+	const { finalPayload: openAIPayload, truncateResult } = await buildFinalPayload(translatedPayload, selectedModel);
+	if (truncateResult) ctx.truncateResult = truncateResult;
 	if (state.manualApprove) await awaitApproval();
 	try {
 		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
@@ -4595,7 +6033,7 @@ async function handleCompletion(c) {
 			ctx
 		});
 		consola.debug("Streaming response from Copilot");
-		updateTrackerStatus(trackingId, "streaming");
+		updateTrackerStatus(ctx.trackingId, "streaming");
 		return streamSSE(c, async (stream) => {
 			await handleStreamingResponse({
 				stream,
@@ -4616,8 +6054,8 @@ function handleNonStreamingResponse(opts) {
 	consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
 	let anthropicResponse = translateToAnthropic(response, toolNameMapping);
 	consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
-	if (ctx.compactResult?.wasCompacted) {
-		const marker = createCompactionMarker(ctx.compactResult);
+	if (state.verbose && ctx.truncateResult?.wasCompacted) {
+		const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
 		anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
 	}
 	recordResponse(ctx.historyId, {
@@ -4668,17 +6106,6 @@ function prependMarkerToAnthropicResponse(response, marker) {
 		content
 	};
 }
-function createAnthropicStreamAccumulator() {
-	return {
-		model: "",
-		inputTokens: 0,
-		outputTokens: 0,
-		stopReason: "",
-		content: "",
-		toolCalls: [],
-		currentToolCall: null
-	};
-}
 async function handleStreamingResponse(opts) {
 	const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
 	const streamState = {
@@ -4689,9 +6116,9 @@ async function handleStreamingResponse(opts) {
 	};
 	const acc = createAnthropicStreamAccumulator();
 	try {
-		if (ctx.compactResult?.wasCompacted) {
-			const marker = createCompactionMarker(ctx.compactResult);
-			await sendCompactionMarkerEvent(stream, streamState, marker);
+		if (ctx.truncateResult?.wasCompacted) {
+			const marker = createTruncationResponseMarkerOpenAI(ctx.truncateResult);
+			await sendTruncationMarkerEvent(stream, streamState, marker);
 			acc.content += marker;
 		}
 		await processStreamChunks({
@@ -4719,7 +6146,7 @@ async function handleStreamingResponse(opts) {
 		});
 	}
 }
-async function sendCompactionMarkerEvent(stream, streamState, marker) {
+async function sendTruncationMarkerEvent(stream, streamState, marker) {
 	const blockStartEvent = {
 		type: "content_block_start",
 		index: streamState.contentBlockIndex,
@@ -4779,47 +6206,6 @@ async function processStreamChunks(opts) {
 		}
 	}
 }
-function processAnthropicEvent(event, acc) {
-	switch (event.type) {
-		case "content_block_delta":
-			handleContentBlockDelta(event.delta, acc);
-			break;
-		case "content_block_start":
-			handleContentBlockStart(event.content_block, acc);
-			break;
-		case "content_block_stop":
-			handleContentBlockStop(acc);
-			break;
-		case "message_delta":
-			handleMessageDelta(event.delta, event.usage, acc);
-			break;
-		default: break;
-	}
-}
-function handleContentBlockDelta(delta, acc) {
-	if (delta.type === "text_delta") acc.content += delta.text;
-	else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
-}
-function handleContentBlockStart(block, acc) {
-	if (block.type === "tool_use") acc.currentToolCall = {
-		id: block.id,
-		name: block.name,
-		input: ""
-	};
-}
-function handleContentBlockStop(acc) {
-	if (acc.currentToolCall) {
-		acc.toolCalls.push(acc.currentToolCall);
-		acc.currentToolCall = null;
-	}
-}
-function handleMessageDelta(delta, usage, acc) {
-	if (delta.stop_reason) acc.stopReason = delta.stop_reason;
-	if (usage) {
-		acc.inputTokens = usage.input_tokens ?? 0;
-		acc.outputTokens = usage.output_tokens;
-	}
-}
 function recordStreamingResponse(acc, fallbackModel, ctx) {
 	const contentBlocks = [];
 	if (acc.content) contentBlocks.push({
@@ -4845,52 +6231,51 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
 		toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
 	}, Date.now() - ctx.startTime);
 }
-function convertAnthropicMessages(messages) {
-	return messages.map((msg) => {
-		if (typeof msg.content === "string") return {
-			role: msg.role,
-			content: msg.content
-		};
-		const content = msg.content.map((block) => {
-			if (block.type === "text") return {
-				type: "text",
-				text: block.text
-			};
-			if (block.type === "tool_use") return {
-				type: "tool_use",
-				id: block.id,
-				name: block.name,
-				input: JSON.stringify(block.input)
-			};
-			if (block.type === "tool_result") {
-				const resultContent = typeof block.content === "string" ? block.content : block.content.map((c) => c.type === "text" ? c.text : `[${c.type}]`).join("\n");
-				return {
-					type: "tool_result",
-					tool_use_id: block.tool_use_id,
-					content: resultContent
-				};
-			}
-			return { type: block.type };
-		});
-		return {
-			role: msg.role,
-			content
-		};
-	});
-}
-function extractSystemPrompt(system) {
-	if (!system) return void 0;
-	if (typeof system === "string") return system;
-	return system.map((block) => block.text).join("\n");
+//#endregion
+//#region src/routes/messages/handler.ts
+async function handleCompletion(c) {
+	const anthropicPayload = await c.req.json();
+	consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
+	logToolInfo(anthropicPayload);
+	const useDirectAnthropicApi = supportsDirectAnthropicApi(anthropicPayload.model);
+	const trackingId = c.get("trackingId");
+	const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
+	updateTrackerModel(trackingId, anthropicPayload.model);
+	const ctx = {
+		historyId: recordRequest("anthropic", {
+			model: anthropicPayload.model,
+			messages: convertAnthropicMessages(anthropicPayload.messages),
+			stream: anthropicPayload.stream ?? false,
+			tools: anthropicPayload.tools?.map((t) => ({
+				name: t.name,
+				description: t.description
+			})),
+			max_tokens: anthropicPayload.max_tokens,
+			temperature: anthropicPayload.temperature,
+			system: extractSystemPrompt(anthropicPayload.system)
+		}),
+		trackingId,
+		startTime
+	};
+	if (useDirectAnthropicApi) return handleDirectAnthropicCompletion(c, anthropicPayload, ctx);
+	return handleTranslatedCompletion(c, anthropicPayload, ctx);
 }
-function extractToolCallsFromContent(content) {
-	const tools = [];
-	for (const block of content) if (typeof block === "object" && block !== null && "type" in block && block.type === "tool_use" && "id" in block && "name" in block && "input" in block) tools.push({
-		id: String(block.id),
-		name: String(block.name),
-		input: JSON.stringify(block.input)
-	});
-	return tools.length > 0 ? tools : void 0;
+/**
+* Log tool-related information for debugging
+*/
+function logToolInfo(anthropicPayload) {
+	if (anthropicPayload.tools?.length) {
+		const toolInfo = anthropicPayload.tools.map((t) => ({
+			name: t.name,
+			type: t.type ?? "(custom)"
+		}));
+		consola.debug(`[Tools] Defined tools:`, JSON.stringify(toolInfo));
+	}
+	for (const msg of anthropicPayload.messages) if (typeof msg.content !== "string") for (const block of msg.content) {
+		if (block.type === "tool_use") consola.debug(`[Tools] tool_use in message: ${block.name} (id: ${block.id})`);
+		if (block.type === "tool_result") consola.debug(`[Tools] tool_result in message: id=${block.tool_use_id}, is_error=${block.is_error ?? false}`);
+	}
 }
 //#endregion
@@ -5004,13 +6389,18 @@ server.route("/history", historyRoutes);
 //#endregion
 //#region src/start.ts
+/** Format limit values as "Xk" or "?" if not available */
+function formatLimit(value) {
+	return value ? `${Math.round(value / 1e3)}k` : "?";
+}
 function formatModelInfo(model) {
 	const limits = model.capabilities?.limits;
-	const contextK = limits?.max_prompt_tokens ? `${Math.round(limits.max_prompt_tokens / 1e3)}k` : "?";
-	const outputK = limits?.max_output_tokens ? `${Math.round(limits.max_output_tokens / 1e3)}k` : "?";
+	const contextK = formatLimit(limits?.max_context_window_tokens);
+	const promptK = formatLimit(limits?.max_prompt_tokens);
+	const outputK = formatLimit(limits?.max_output_tokens);
 	const features = [model.capabilities?.supports?.tool_calls && "tools", model.preview && "preview"].filter(Boolean).join(", ");
 	const featureStr = features ? ` (${features})` : "";
-	return `  - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
+	return `  - ${model.id.length > 30 ? `${model.id.slice(0, 27)}...` : model.id.padEnd(30)} ctx:${contextK.padStart(5)} in:${promptK.padStart(5)} out:${outputK.padStart(4)}` + featureStr;
 }
 async function runServer(options) {
 	consola.info(`copilot-api v${package_default.version}`);
@@ -5018,12 +6408,16 @@ async function runServer(options) {
 	if (options.verbose) {
 		consola.level = 5;
 		consola.info("Verbose logging enabled");
+		state.verbose = true;
 	}
 	state.accountType = options.accountType;
 	if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
 	state.manualApprove = options.manual;
 	state.showToken = options.showToken;
-	state.autoCompact = options.autoCompact;
+	state.autoTruncate = options.autoTruncate;
+	state.compressToolResults = options.compressToolResults;
+	state.redirectAnthropic = options.redirectAnthropic;
+	state.rewriteAnthropicTools = options.rewriteAnthropicTools;
 	if (options.rateLimit) initAdaptiveRateLimiter({
 		baseRetryIntervalSeconds: options.retryInterval,
 		requestIntervalSeconds: options.requestInterval,
@@ -5031,7 +6425,10 @@ async function runServer(options) {
 		consecutiveSuccessesForRecovery: options.consecutiveSuccesses
 	});
 	else consola.info("Rate limiting disabled");
-	if (!options.autoCompact) consola.info("Auto-compact disabled");
+	if (!options.autoTruncate) consola.info("Auto-truncate disabled");
+	if (options.compressToolResults) consola.info("Tool result compression enabled");
+	if (options.redirectAnthropic) consola.info("Anthropic API redirect enabled (using OpenAI translation)");
+	if (!options.rewriteAnthropicTools) consola.info("Anthropic server-side tools rewrite disabled (passing through unchanged)");
 	initHistory(options.history, options.historyLimit);
 	if (options.history) {
 		const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5173,10 +6570,25 @@ const start = defineCommand({
 			default: "1000",
 			description: "Maximum number of history entries to keep in memory (0 = unlimited)"
 		},
-		"no-auto-compact": {
+		"no-auto-truncate": {
+			type: "boolean",
+			default: false,
+			description: "Disable automatic conversation history truncation when exceeding limits"
+		},
+		"compress-tool-results": {
+			type: "boolean",
+			default: false,
+			description: "Compress old tool_result content before truncating messages (may lose context details)"
+		},
+		"redirect-anthropic": {
+			type: "boolean",
+			default: false,
+			description: "Redirect Anthropic models through OpenAI translation (instead of direct API)"
+		},
+		"no-rewrite-anthropic-tools": {
 			type: "boolean",
 			default: false,
-			description: "Disable automatic conversation history compression when exceeding limits"
+			description: "Don't rewrite Anthropic server-side tools (web_search, etc.) to custom tool format"
 		}
 	},
 	run({ args }) {
@@ -5197,7 +6609,10 @@ const start = defineCommand({
 			proxyEnv: args["proxy-env"],
 			history: !args["no-history"],
 			historyLimit: Number.parseInt(args["history-limit"], 10),
-			autoCompact: !args["no-auto-compact"]
+			autoTruncate: !args["no-auto-truncate"],
+			compressToolResults: args["compress-tool-results"],
+			redirectAnthropic: args["redirect-anthropic"],
+			rewriteAnthropicTools: !args["no-rewrite-anthropic-tools"]
 		});
 	}
 });