npm - @hsupu/copilot-api - Versions diffs - 0.7.4 → 0.7.6 - Mend

@hsupu/copilot-api 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/main.js CHANGED Viewed

@@ -3,8 +3,9 @@ import { defineCommand, runMain } from "citty";
 import consola from "consola";
 import fs from "node:fs/promises";
 import os from "node:os";
-import path from "node:path";
+import path, { dirname, join } from "node:path";
 import { randomUUID } from "node:crypto";
+import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
 import clipboard from "clipboardy";
 import { serve } from "srvx";
 import invariant from "tiny-invariant";
@@ -44,7 +45,6 @@ async function ensureFile(filePath) {
 const state = {
 	accountType: "individual",
 	manualApprove: false,
-	rateLimitWait: false,
 	showToken: false,
 	autoCompact: false
 };
@@ -136,7 +136,17 @@ function formatRequestTooLargeError() {
 		}
 	};
 }
-async function forwardError(c, error) {
+/** Format Anthropic-compatible error for rate limit exceeded (429) */
+function formatRateLimitError(copilotMessage) {
+	return {
+		type: "error",
+		error: {
+			type: "rate_limit_error",
+			message: copilotMessage ?? "You have exceeded your rate limit. Please try again later."
+		}
+	};
+}
+function forwardError(c, error) {
 	consola.error("Error occurred:", error);
 	if (error instanceof HTTPError) {
 		if (error.status === 413) {
@@ -160,6 +170,11 @@ async function forwardError(c, error) {
 				return c.json(formattedError, 400);
 			}
 		}
+		if (error.status === 429 || copilotError.error?.code === "rate_limited") {
+			const formattedError = formatRateLimitError(copilotError.error?.message);
+			consola.debug("Returning formatted rate limit error:", formattedError);
+			return c.json(formattedError, 429);
+		}
 		return c.json({ error: {
 			message: error.responseText,
 			type: "error"
@@ -290,6 +305,24 @@ async function pollAccessToken(deviceCode) {
 //#region src/lib/token.ts
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
 const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
+/**
+* Refresh the Copilot token with exponential backoff retry.
+* Returns the new token on success, or null if all retries fail.
+*/
+async function refreshCopilotTokenWithRetry(maxRetries = 3) {
+	let lastError = null;
+	for (let attempt = 0; attempt < maxRetries; attempt++) try {
+		const { token } = await getCopilotToken();
+		return token;
+	} catch (error) {
+		lastError = error;
+		const delay = Math.min(1e3 * 2 ** attempt, 3e4);
+		consola.warn(`Token refresh attempt ${attempt + 1}/${maxRetries} failed, retrying in ${delay}ms`);
+		await new Promise((resolve) => setTimeout(resolve, delay));
+	}
+	consola.error("All token refresh attempts failed:", lastError);
+	return null;
+}
 const setupCopilotToken = async () => {
 	const { token, refresh_in } = await getCopilotToken();
 	state.copilotToken = token;
@@ -298,14 +331,12 @@ const setupCopilotToken = async () => {
 	const refreshInterval = (refresh_in - 60) * 1e3;
 	setInterval(async () => {
 		consola.debug("Refreshing Copilot token");
-		try {
-			const { token: token$1 } = await getCopilotToken();
-			state.copilotToken = token$1;
+		const newToken = await refreshCopilotTokenWithRetry();
+		if (newToken) {
+			state.copilotToken = newToken;
 			consola.debug("Copilot token refreshed");
-			if (state.showToken) consola.info("Refreshed Copilot token:", token$1);
-		} catch (error) {
-			consola.error("Failed to refresh Copilot token (will retry on next interval):", error);
-		}
+			if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
+		} else consola.error("Failed to refresh Copilot token after retries, using existing token");
 	}, refreshInterval);
 };
 async function setupGitHubToken(options) {
@@ -520,6 +551,567 @@ const logout = defineCommand({
 	}
 });
+//#endregion
+//#region src/patch-claude.ts
+const SUPPORTED_VERSIONS = {
+	v2a: {
+		min: "2.0.0",
+		max: "2.1.10"
+	},
+	v2b: {
+		min: "2.1.11",
+		max: "2.1.12"
+	}
+};
+const PATTERNS = {
+	funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
+	funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
+	variable: /var BS9=(\d+)/
+};
+/**
+* Parse semver version string to comparable parts
+*/
+function parseVersion(version) {
+	return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
+}
+/**
+* Compare two semver versions
+* Returns: -1 if a < b, 0 if a == b, 1 if a > b
+*/
+function compareVersions(a, b) {
+	const partsA = parseVersion(a);
+	const partsB = parseVersion(b);
+	const len = Math.max(partsA.length, partsB.length);
+	for (let i = 0; i < len; i++) {
+		const numA = partsA[i] || 0;
+		const numB = partsB[i] || 0;
+		if (numA < numB) return -1;
+		if (numA > numB) return 1;
+	}
+	return 0;
+}
+function getPatternTypeForVersion(version) {
+	if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
+	if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
+	return null;
+}
+/**
+* Get supported version range string for error messages
+*/
+function getSupportedRangeString() {
+	return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
+}
+/**
+* Get Claude Code version from package.json
+*/
+function getClaudeCodeVersion(cliPath) {
+	try {
+		const packageJsonPath = join(dirname(cliPath), "package.json");
+		if (!existsSync(packageJsonPath)) return null;
+		const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
+		if (typeof packageJson === "object" && packageJson !== null && "version" in packageJson && typeof packageJson.version === "string") return packageJson.version;
+		return null;
+	} catch {
+		return null;
+	}
+}
+/**
+* Search volta tools directory for Claude Code
+*/
+function findInVoltaTools(voltaHome) {
+	const paths = [];
+	const packagesPath = join(voltaHome, "tools", "image", "packages", "@anthropic-ai", "claude-code", "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
+	if (existsSync(packagesPath)) paths.push(packagesPath);
+	const toolsDir = join(voltaHome, "tools", "image", "node");
+	if (existsSync(toolsDir)) try {
+		for (const version of readdirSync(toolsDir)) {
+			const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
+			if (existsSync(claudePath)) paths.push(claudePath);
+		}
+	} catch {}
+	return paths;
+}
+/**
+* Find Claude Code CLI path by checking common locations
+*/
+function findClaudeCodePath() {
+	const possiblePaths = [];
+	const home = process.env.HOME || "";
+	const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
+	if (existsSync(voltaHome)) possiblePaths.push(...findInVoltaTools(voltaHome));
+	const npmPrefix = process.env.npm_config_prefix;
+	if (npmPrefix) possiblePaths.push(join(npmPrefix, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
+	const globalPaths = [
+		join(home, ".npm-global", "lib", "node_modules"),
+		"/usr/local/lib/node_modules",
+		"/usr/lib/node_modules"
+	];
+	for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
+	const bunGlobal = join(home, ".bun", "install", "global");
+	if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
+	return possiblePaths.find((p) => existsSync(p)) ?? null;
+}
+/**
+* Get current context limit from Claude Code
+*/
+function getCurrentLimit(content) {
+	const varMatch = content.match(PATTERNS.variable);
+	if (varMatch) return Number.parseInt(varMatch[1], 10);
+	const funcMatch = content.match(PATTERNS.funcPatched);
+	if (funcMatch) {
+		const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
+		return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
+	}
+	return null;
+}
+/**
+* Check if Claude Code version is supported for patching
+*/
+function checkVersionSupport(cliPath) {
+	const version = getClaudeCodeVersion(cliPath);
+	if (!version) return {
+		supported: false,
+		version: null,
+		patternType: null,
+		error: "Could not detect Claude Code version"
+	};
+	const patternType = getPatternTypeForVersion(version);
+	if (!patternType) return {
+		supported: false,
+		version,
+		patternType: null,
+		error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
+	};
+	return {
+		supported: true,
+		version,
+		patternType
+	};
+}
+/**
+* Patch Claude Code to use a different context limit
+*/
+function patchClaudeCode(cliPath, newLimit) {
+	const content = readFileSync(cliPath, "utf8");
+	const versionCheck = checkVersionSupport(cliPath);
+	if (!versionCheck.supported) {
+		consola.error(versionCheck.error);
+		return false;
+	}
+	consola.info(`Claude Code version: ${versionCheck.version}`);
+	if (getCurrentLimit(content) === newLimit) {
+		consola.info(`Already patched with limit ${newLimit}`);
+		return true;
+	}
+	let newContent;
+	if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
+	else {
+		const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
+		const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
+		newContent = content.replace(pattern, replacement);
+	}
+	writeFileSync(cliPath, newContent);
+	return true;
+}
+/**
+* Restore Claude Code to original 200k limit
+*/
+function restoreClaudeCode(cliPath) {
+	const content = readFileSync(cliPath, "utf8");
+	const versionCheck = checkVersionSupport(cliPath);
+	if (!versionCheck.supported) {
+		consola.error(versionCheck.error);
+		return false;
+	}
+	consola.info(`Claude Code version: ${versionCheck.version}`);
+	if (getCurrentLimit(content) === 2e5) {
+		consola.info("Already at original 200000 limit");
+		return true;
+	}
+	let newContent;
+	if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
+	else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
+	writeFileSync(cliPath, newContent);
+	return true;
+}
+function showStatus(cliPath, currentLimit) {
+	const version = getClaudeCodeVersion(cliPath);
+	if (version) consola.info(`Claude Code version: ${version}`);
+	if (currentLimit === null) {
+		consola.warn("Could not detect current limit - CLI may have been updated");
+		consola.info("Look for the BS9 variable or HR function pattern in cli.js");
+	} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
+	else consola.info(`Status: Patched (${currentLimit} context window)`);
+}
+const patchClaude = defineCommand({
+	meta: {
+		name: "patch-claude",
+		description: "Patch Claude Code's context window limit to match Copilot's limits"
+	},
+	args: {
+		limit: {
+			alias: "l",
+			type: "string",
+			default: "128000",
+			description: "Context window limit in tokens (default: 128000 for Copilot)"
+		},
+		restore: {
+			alias: "r",
+			type: "boolean",
+			default: false,
+			description: "Restore original 200k limit"
+		},
+		path: {
+			alias: "p",
+			type: "string",
+			description: "Path to Claude Code cli.js (auto-detected if not specified)"
+		},
+		status: {
+			alias: "s",
+			type: "boolean",
+			default: false,
+			description: "Show current patch status without modifying"
+		}
+	},
+	run({ args }) {
+		const cliPath = args.path || findClaudeCodePath();
+		if (!cliPath) {
+			consola.error("Could not find Claude Code installation");
+			consola.info("Searched in: volta, npm global, bun global");
+			consola.info("Use --path to specify the path to cli.js manually");
+			process.exit(1);
+		}
+		if (!existsSync(cliPath)) {
+			consola.error(`File not found: ${cliPath}`);
+			process.exit(1);
+		}
+		consola.info(`Claude Code path: ${cliPath}`);
+		const content = readFileSync(cliPath, "utf8");
+		const currentLimit = getCurrentLimit(content);
+		if (args.status) {
+			showStatus(cliPath, currentLimit);
+			return;
+		}
+		if (args.restore) {
+			if (restoreClaudeCode(cliPath)) consola.success("Restored to original 200k limit");
+			else {
+				consola.error("Failed to restore - pattern not found");
+				consola.info("Claude Code may have been updated to a new version");
+				process.exit(1);
+			}
+			return;
+		}
+		const limit = Number.parseInt(args.limit, 10);
+		if (Number.isNaN(limit) || limit < 1e3) {
+			consola.error("Invalid limit value. Must be a number >= 1000");
+			process.exit(1);
+		}
+		if (patchClaudeCode(cliPath, limit)) {
+			consola.success(`Patched context window: 200000 → ${limit}`);
+			consola.info("Note: You may need to re-run this after Claude Code updates");
+		} else {
+			consola.error("Failed to patch - pattern not found");
+			consola.info("Claude Code may have been updated to a new version");
+			consola.info("Check the cli.js for the HR function pattern");
+			process.exit(1);
+		}
+	}
+});
+//#endregion
+//#region src/lib/adaptive-rate-limiter.ts
+const DEFAULT_CONFIG$1 = {
+	baseRetryIntervalSeconds: 10,
+	maxRetryIntervalSeconds: 120,
+	requestIntervalSeconds: 10,
+	recoveryTimeoutMinutes: 10,
+	consecutiveSuccessesForRecovery: 5,
+	gradualRecoverySteps: [
+		5,
+		2,
+		1,
+		0
+	]
+};
+/**
+* Adaptive rate limiter that switches between normal, rate-limited, and recovering modes
+* based on API responses.
+*/
+var AdaptiveRateLimiter = class {
+	config;
+	mode = "normal";
+	queue = [];
+	processing = false;
+	rateLimitedAt = null;
+	consecutiveSuccesses = 0;
+	lastRequestTime = 0;
+	/** Current step in gradual recovery (index into gradualRecoverySteps) */
+	recoveryStepIndex = 0;
+	constructor(config = {}) {
+		this.config = {
+			...DEFAULT_CONFIG$1,
+			...config
+		};
+	}
+	/**
+	* Execute a request with adaptive rate limiting.
+	* Returns a promise that resolves when the request succeeds.
+	* The request will be retried automatically on 429 errors.
+	*/
+	async execute(fn) {
+		if (this.mode === "normal") return this.executeInNormalMode(fn);
+		if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
+		return this.enqueue(fn);
+	}
+	/**
+	* Check if an error is a rate limit error (429) and extract Retry-After if available
+	*/
+	isRateLimitError(error) {
+		if (error && typeof error === "object") {
+			if ("status" in error && error.status === 429) return {
+				isRateLimit: true,
+				retryAfter: this.extractRetryAfter(error)
+			};
+			if ("responseText" in error && typeof error.responseText === "string") try {
+				const parsed = JSON.parse(error.responseText);
+				if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
+			} catch {}
+		}
+		return { isRateLimit: false };
+	}
+	/**
+	* Extract Retry-After value from error response
+	*/
+	extractRetryAfter(error) {
+		if (!error || typeof error !== "object") return void 0;
+		if ("responseText" in error && typeof error.responseText === "string") try {
+			const parsed = JSON.parse(error.responseText);
+			if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
+			if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
+		} catch {}
+	}
+	/**
+	* Execute in normal mode - full speed
+	*/
+	async executeInNormalMode(fn) {
+		try {
+			return {
+				result: await fn(),
+				queueWaitMs: 0
+			};
+		} catch (error) {
+			const { isRateLimit, retryAfter } = this.isRateLimitError(error);
+			if (isRateLimit) {
+				this.enterRateLimitedMode();
+				return this.enqueue(fn, retryAfter);
+			}
+			throw error;
+		}
+	}
+	/**
+	* Execute in recovering mode - gradual speedup
+	*/
+	async executeInRecoveringMode(fn) {
+		const startTime = Date.now();
+		const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
+		if (currentInterval > 0) {
+			const elapsedMs = Date.now() - this.lastRequestTime;
+			const requiredMs = currentInterval * 1e3;
+			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
+				const waitMs = requiredMs - elapsedMs;
+				await this.sleep(waitMs);
+			}
+		}
+		this.lastRequestTime = Date.now();
+		try {
+			const result = await fn();
+			this.recoveryStepIndex++;
+			if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
+			else {
+				const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
+				consola.info(`[RateLimiter] Recovery step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
+			}
+			const queueWaitMs = Date.now() - startTime;
+			return {
+				result,
+				queueWaitMs
+			};
+		} catch (error) {
+			const { isRateLimit, retryAfter } = this.isRateLimitError(error);
+			if (isRateLimit) {
+				consola.warn("[RateLimiter] Hit rate limit during recovery, returning to rate-limited mode");
+				this.enterRateLimitedMode();
+				return this.enqueue(fn, retryAfter);
+			}
+			throw error;
+		}
+	}
+	/**
+	* Enter rate-limited mode
+	*/
+	enterRateLimitedMode() {
+		if (this.mode === "rate-limited") return;
+		this.mode = "rate-limited";
+		this.rateLimitedAt = Date.now();
+		this.consecutiveSuccesses = 0;
+		consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
+	}
+	/**
+	* Check if we should try to recover to normal mode
+	*/
+	shouldAttemptRecovery() {
+		if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
+			consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting gradual recovery.`);
+			return true;
+		}
+		if (this.rateLimitedAt) {
+			const elapsed = Date.now() - this.rateLimitedAt;
+			const timeout = this.config.recoveryTimeoutMinutes * 60 * 1e3;
+			if (elapsed >= timeout) {
+				consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting gradual recovery.`);
+				return true;
+			}
+		}
+		return false;
+	}
+	/**
+	* Start gradual recovery mode
+	*/
+	startGradualRecovery() {
+		this.mode = "recovering";
+		this.recoveryStepIndex = 0;
+		this.rateLimitedAt = null;
+		this.consecutiveSuccesses = 0;
+		const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
+		consola.info(`[RateLimiter] Starting gradual recovery (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
+	}
+	/**
+	* Complete recovery to normal mode
+	*/
+	completeRecovery() {
+		this.mode = "normal";
+		this.recoveryStepIndex = 0;
+		consola.success("[RateLimiter] Recovery complete. Full speed enabled.");
+	}
+	/**
+	* Enqueue a request for later execution
+	*/
+	enqueue(fn, retryAfterSeconds) {
+		return new Promise((resolve, reject) => {
+			const request = {
+				execute: fn,
+				resolve,
+				reject,
+				retryCount: 0,
+				retryAfterSeconds,
+				enqueuedAt: Date.now()
+			};
+			this.queue.push(request);
+			if (this.queue.length > 1) {
+				const position = this.queue.length;
+				const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
+				consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
+			}
+			this.processQueue();
+		});
+	}
+	/**
+	* Calculate retry interval with exponential backoff
+	*/
+	calculateRetryInterval(request) {
+		if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
+		const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
+		return Math.min(backoff, this.config.maxRetryIntervalSeconds);
+	}
+	/**
+	* Process the queue
+	*/
+	async processQueue() {
+		if (this.processing) return;
+		this.processing = true;
+		while (this.queue.length > 0) {
+			const request = this.queue[0];
+			if (this.shouldAttemptRecovery()) this.startGradualRecovery();
+			const elapsedMs = Date.now() - this.lastRequestTime;
+			const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
+			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
+				const waitMs = requiredMs - elapsedMs;
+				const waitSec = Math.ceil(waitMs / 1e3);
+				consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
+				await this.sleep(waitMs);
+			}
+			this.lastRequestTime = Date.now();
+			try {
+				const result = await request.execute();
+				this.queue.shift();
+				this.consecutiveSuccesses++;
+				request.retryAfterSeconds = void 0;
+				const queueWaitMs = Date.now() - request.enqueuedAt;
+				request.resolve({
+					result,
+					queueWaitMs
+				});
+				if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for recovery)`);
+			} catch (error) {
+				const { isRateLimit, retryAfter } = this.isRateLimitError(error);
+				if (isRateLimit) {
+					request.retryCount++;
+					request.retryAfterSeconds = retryAfter;
+					this.consecutiveSuccesses = 0;
+					this.rateLimitedAt = Date.now();
+					const nextInterval = this.calculateRetryInterval(request);
+					const source = retryAfter ? "server Retry-After" : "exponential backoff";
+					consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
+				} else {
+					this.queue.shift();
+					request.reject(error);
+				}
+			}
+		}
+		this.processing = false;
+	}
+	sleep(ms) {
+		return new Promise((resolve) => setTimeout(resolve, ms));
+	}
+	/**
+	* Get current status for debugging/monitoring
+	*/
+	getStatus() {
+		return {
+			mode: this.mode,
+			queueLength: this.queue.length,
+			consecutiveSuccesses: this.consecutiveSuccesses,
+			rateLimitedAt: this.rateLimitedAt
+		};
+	}
+};
+let rateLimiterInstance = null;
+/**
+* Initialize the adaptive rate limiter with configuration
+*/
+function initAdaptiveRateLimiter(config = {}) {
+	rateLimiterInstance = new AdaptiveRateLimiter(config);
+	const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
+	const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
+	const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
+	const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
+	const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
+	const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
+	consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
+}
+/**
+* Execute a request with adaptive rate limiting.
+* If rate limiter is not initialized, executes immediately.
+* Returns the result along with queue wait time.
+*/
+async function executeWithAdaptiveRateLimit(fn) {
+	if (!rateLimiterInstance) return {
+		result: await fn(),
+		queueWaitMs: 0
+	};
+	return rateLimiterInstance.execute(fn);
+}
 //#endregion
 //#region src/lib/history.ts
 function generateId$1() {
@@ -771,44 +1363,74 @@ function exportHistory(format = "json") {
 //#endregion
 //#region src/lib/proxy.ts
+/**
+* Custom dispatcher that routes requests through proxies based on environment variables.
+* Extends Agent to properly inherit the Dispatcher interface.
+*/
+var ProxyDispatcher = class extends Agent {
+	proxies = /* @__PURE__ */ new Map();
+	dispatch(options, handler) {
+		try {
+			const origin = this.getOriginUrl(options.origin);
+			const proxyUrl = this.getProxyUrl(origin);
+			if (!proxyUrl) {
+				consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
+				return super.dispatch(options, handler);
+			}
+			const agent = this.getOrCreateProxyAgent(proxyUrl);
+			consola.debug(`HTTP proxy route: ${origin.hostname} via ${this.formatProxyLabel(proxyUrl)}`);
+			return agent.dispatch(options, handler);
+		} catch {
+			return super.dispatch(options, handler);
+		}
+	}
+	getOriginUrl(origin) {
+		return typeof origin === "string" ? new URL(origin) : origin;
+	}
+	getProxyUrl(origin) {
+		const raw = getProxyForUrl(origin.toString());
+		return raw && raw.length > 0 ? raw : void 0;
+	}
+	getOrCreateProxyAgent(proxyUrl) {
+		let agent = this.proxies.get(proxyUrl);
+		if (!agent) {
+			agent = new ProxyAgent(proxyUrl);
+			this.proxies.set(proxyUrl, agent);
+		}
+		return agent;
+	}
+	formatProxyLabel(proxyUrl) {
+		try {
+			const u = new URL(proxyUrl);
+			return `${u.protocol}//${u.host}`;
+		} catch {
+			return proxyUrl;
+		}
+	}
+	async close() {
+		await super.close();
+		await Promise.all([...this.proxies.values()].map((p) => p.close()));
+		this.proxies.clear();
+	}
+	destroy(errOrCallback, callback) {
+		for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
+		else if (callback) agent.destroy(errOrCallback ?? null, callback);
+		else agent.destroy(errOrCallback ?? null).catch(() => {});
+		this.proxies.clear();
+		if (typeof errOrCallback === "function") {
+			super.destroy(errOrCallback);
+			return;
+		} else if (callback) {
+			super.destroy(errOrCallback ?? null, callback);
+			return;
+		} else return super.destroy(errOrCallback ?? null);
+	}
+};
 function initProxyFromEnv() {
 	if (typeof Bun !== "undefined") return;
 	try {
-		const direct = new Agent();
-		const proxies = /* @__PURE__ */ new Map();
-		setGlobalDispatcher({
-			dispatch(options, handler) {
-				try {
-					const origin = typeof options.origin === "string" ? new URL(options.origin) : options.origin;
-					const raw = getProxyForUrl(origin.toString());
-					const proxyUrl = raw && raw.length > 0 ? raw : void 0;
-					if (!proxyUrl) {
-						consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
-						return direct.dispatch(options, handler);
-					}
-					let agent = proxies.get(proxyUrl);
-					if (!agent) {
-						agent = new ProxyAgent(proxyUrl);
-						proxies.set(proxyUrl, agent);
-					}
-					let label = proxyUrl;
-					try {
-						const u = new URL(proxyUrl);
-						label = `${u.protocol}//${u.host}`;
-					} catch {}
-					consola.debug(`HTTP proxy route: ${origin.hostname} via ${label}`);
-					return agent.dispatch(options, handler);
-				} catch {
-					return direct.dispatch(options, handler);
-				}
-			},
-			close() {
-				return direct.close();
-			},
-			destroy() {
-				return direct.destroy();
-			}
-		});
+		const dispatcher = new ProxyDispatcher();
+		setGlobalDispatcher(dispatcher);
 		consola.debug("HTTP proxy configured from environment (per-URL)");
 	} catch (err) {
 		consola.debug("Proxy setup skipped:", err);
@@ -894,23 +1516,66 @@ function formatTokens(input, output) {
 * Console renderer that shows request lifecycle with apt-get style footer
 *
 * Log format:
-* - Start: [....] METHOD /path model-name
-* - Streaming: [<-->] METHOD /path model-name streaming...
-* - Complete: [ OK ] METHOD /path 200 1.2s 1.5K/500 model-name
+* - Start: [....] HH:MM:SS METHOD /path model-name (debug only, dim)
+* - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming... (dim)
+* - Complete: [ OK ] HH:MM:SS METHOD /path model-name 200 1.2s 1.5K/500 (colored)
+* - Error: [FAIL] HH:MM:SS METHOD /path model-name 500 1.2s: error message (red)
+*
+* Color scheme for completed requests:
+* - Prefix: green (success) / red (error)
+* - Time: dim
+* - Method: cyan
+* - Path: white
+* - Model: magenta
+* - Status: green (success) / red (error)
+* - Duration: yellow
+* - Tokens: blue
 *
 * Features:
-* - /history API requests are displayed in gray (dim)
-* - Sticky footer shows active request count, updated in-place on the last line
-* - Footer disappears when all requests complete
+* - Start lines only shown in debug mode (--verbose)
+* - Streaming lines are dim (less important)
+* - /history API requests are always dim
+* - Sticky footer shows active request count
+* - Intercepts consola output to properly handle footer
 */
 var ConsoleRenderer = class {
 	activeRequests = /* @__PURE__ */ new Map();
 	showActive;
 	footerVisible = false;
 	isTTY;
+	originalReporters = [];
 	constructor(options) {
 		this.showActive = options?.showActive ?? true;
 		this.isTTY = process.stdout.isTTY;
+		this.installConsolaReporter();
+	}
+	/**
+	* Install a custom consola reporter that coordinates with footer
+	*/
+	installConsolaReporter() {
+		this.originalReporters = [...consola.options.reporters];
+		consola.setReporters([{ log: (logObj) => {
+			this.clearFooterForLog();
+			const message = logObj.args.map((arg) => typeof arg === "string" ? arg : JSON.stringify(arg)).join(" ");
+			const prefix = this.getLogPrefix(logObj.type);
+			if (prefix) process.stdout.write(`${prefix} ${message}\n`);
+			else process.stdout.write(`${message}\n`);
+			this.renderFooter();
+		} }]);
+	}
+	/**
+	* Get log prefix based on log type
+	*/
+	getLogPrefix(type) {
+		switch (type) {
+			case "error":
+			case "fatal": return pc.red("✖");
+			case "warn": return pc.yellow("⚠");
+			case "info": return pc.cyan("ℹ");
+			case "success": return pc.green("✔");
+			case "debug": return pc.gray("●");
+			default: return "";
+		}
 	}
 	/**
 	* Get footer text based on active request count
@@ -946,25 +1611,52 @@ var ConsoleRenderer = class {
 		}
 	}
 	/**
+	* Format a complete log line with colored parts
+	*/
+	formatLogLine(parts) {
+		const { prefix, time, method, path: path$1, model, status, duration, tokens, queueWait, extra, isError, isDim } = parts;
+		if (isDim) {
+			const modelPart = model ? ` ${model}` : "";
+			const extraPart = extra ? ` ${extra}` : "";
+			return pc.dim(`${prefix} ${time} ${method} ${path$1}${modelPart}${extraPart}`);
+		}
+		const coloredPrefix = isError ? pc.red(prefix) : pc.green(prefix);
+		const coloredTime = pc.dim(time);
+		const coloredMethod = pc.cyan(method);
+		const coloredPath = pc.white(path$1);
+		const coloredModel = model ? pc.magenta(` ${model}`) : "";
+		let result = `${coloredPrefix} ${coloredTime} ${coloredMethod} ${coloredPath}${coloredModel}`;
+		if (status !== void 0) {
+			const coloredStatus = isError ? pc.red(String(status)) : pc.green(String(status));
+			result += ` ${coloredStatus}`;
+		}
+		if (duration) result += ` ${pc.yellow(duration)}`;
+		if (queueWait) result += ` ${pc.dim(`(queued ${queueWait})`)}`;
+		if (tokens) result += ` ${pc.blue(tokens)}`;
+		if (extra) result += isError ? pc.red(extra) : extra;
+		return result;
+	}
+	/**
 	* Print a log line with proper footer handling
-	* 1. Clear footer if visible
-	* 2. Print log with newline
-	* 3. Re-render footer on new line (no newline after footer)
 	*/
-	printLog(message, isGray = false) {
+	printLog(message) {
 		this.clearFooterForLog();
-		if (isGray) consola.log(pc.dim(message));
-		else consola.log(message);
+		process.stdout.write(message + "\n");
 		this.renderFooter();
 	}
 	onRequestStart(request) {
 		this.activeRequests.set(request.id, request);
-		if (this.showActive) {
-			const time = formatTime();
-			const modelInfo = request.model ? ` ${request.model}` : "";
-			const queueInfo = request.queuePosition !== void 0 && request.queuePosition > 0 ? ` [q#${request.queuePosition}]` : "";
-			const message = `${time} [....] ${request.method} ${request.path}${modelInfo}${queueInfo}`;
-			this.printLog(message, request.isHistoryAccess);
+		if (this.showActive && consola.level >= 5) {
+			const message = this.formatLogLine({
+				prefix: "[....]",
+				time: formatTime(),
+				method: request.method,
+				path: request.path,
+				model: request.model,
+				extra: request.queuePosition !== void 0 && request.queuePosition > 0 ? `[q#${request.queuePosition}]` : void 0,
+				isDim: true
+			});
+			this.printLog(message);
 		}
 	}
 	onRequestUpdate(id, update) {
@@ -972,28 +1664,39 @@ var ConsoleRenderer = class {
 		if (!request) return;
 		Object.assign(request, update);
 		if (this.showActive && update.status === "streaming") {
-			const time = formatTime();
-			const modelInfo = request.model ? ` ${request.model}` : "";
-			const message = `${time} [<-->] ${request.method} ${request.path}${modelInfo} streaming...`;
-			this.printLog(message, request.isHistoryAccess);
+			const message = this.formatLogLine({
+				prefix: "[<-->]",
+				time: formatTime(),
+				method: request.method,
+				path: request.path,
+				model: request.model,
+				extra: "streaming...",
+				isDim: true
+			});
+			this.printLog(message);
 		}
 	}
 	onRequestComplete(request) {
 		this.activeRequests.delete(request.id);
-		const time = formatTime();
 		const status = request.statusCode ?? 0;
-		const duration = formatDuration(request.durationMs ?? 0);
-		const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : "";
-		const modelInfo = request.model ? ` ${request.model}` : "";
 		const isError = request.status === "error" || status >= 400;
-		const prefix = isError ? "[FAIL]" : "[ OK ]";
-		const tokensPart = tokens ? ` ${tokens}` : "";
-		let content = `${time} ${prefix} ${request.method} ${request.path} ${status} ${duration}${tokensPart}${modelInfo}`;
-		if (isError) {
-			const errorInfo = request.error ? `: ${request.error}` : "";
-			content += errorInfo;
-		}
-		this.printLog(content, request.isHistoryAccess);
+		const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : void 0;
+		const queueWait = request.queueWaitMs && request.queueWaitMs > 100 ? formatDuration(request.queueWaitMs) : void 0;
+		const message = this.formatLogLine({
+			prefix: isError ? "[FAIL]" : "[ OK ]",
+			time: formatTime(),
+			method: request.method,
+			path: request.path,
+			model: request.model,
+			status,
+			duration: formatDuration(request.durationMs ?? 0),
+			queueWait,
+			tokens,
+			extra: isError && request.error ? `: ${request.error}` : void 0,
+			isError,
+			isDim: request.isHistoryAccess
+		});
+		this.printLog(message);
 	}
 	destroy() {
 		if (this.footerVisible && this.isTTY) {
@@ -1001,6 +1704,7 @@ var ConsoleRenderer = class {
 			this.footerVisible = false;
 		}
 		this.activeRequests.clear();
+		if (this.originalReporters.length > 0) consola.setReporters(this.originalReporters);
 	}
 };
@@ -1392,14 +2096,14 @@ const getTokenCount = async (payload, model) => {
 //#endregion
 //#region src/lib/auto-compact.ts
 const DEFAULT_CONFIG = {
-	targetTokens: 1e5,
-	safetyMarginPercent: 10
+	targetTokens: 12e4,
+	safetyMarginPercent: 2
 };
 /**
 * Check if payload needs compaction based on model limits.
 * Uses a safety margin to account for token counting differences.
 */
-async function checkNeedsCompaction(payload, model, safetyMarginPercent = 10) {
+async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
 	const currentTokens = (await getTokenCount(payload, model)).input;
 	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
 	const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
@@ -1442,6 +2146,13 @@ function extractSystemMessages(messages) {
 	};
 }
 /**
+* Extract tool_use ids from assistant messages with tool_calls.
+*/
+function getToolUseIds(message) {
+	if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
+	return [];
+}
+/**
 * Find messages to keep from the end to stay under target tokens.
 * Returns the starting index of messages to preserve.
 */
@@ -1456,6 +2167,41 @@ function findPreserveIndex(messages, targetTokens, systemTokens) {
 	return 0;
 }
 /**
+* Filter out orphaned tool_result messages that don't have a matching tool_use
+* in the preserved message list. This prevents API errors when truncation
+* separates tool_use/tool_result pairs.
+*/
+function filterOrphanedToolResults(messages) {
+	const availableToolUseIds = /* @__PURE__ */ new Set();
+	for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
+	const filteredMessages = [];
+	let removedCount = 0;
+	for (const msg of messages) {
+		if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
+			removedCount++;
+			continue;
+		}
+		filteredMessages.push(msg);
+	}
+	if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
+	return filteredMessages;
+}
+/**
+* Ensure the message list starts with a user message.
+* If it starts with assistant or tool messages, skip them until we find a user message.
+* This is required because OpenAI API expects conversations to start with user messages
+* (after system messages).
+*/
+function ensureStartsWithUser(messages) {
+	let startIndex = 0;
+	while (startIndex < messages.length) {
+		if (messages[startIndex].role === "user") break;
+		startIndex++;
+	}
+	if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
+	return messages.slice(startIndex);
+}
+/**
 * Calculate estimated tokens for system messages.
 */
 function estimateSystemTokens(systemMessages) {
@@ -1473,6 +2219,7 @@ function createTruncationMarker(removedCount) {
 /**
 * Perform auto-compaction on a payload that exceeds token limits.
 * This uses simple truncation - no LLM calls required.
+* Uses iterative approach with decreasing target tokens until under limit.
 */
 async function autoCompact(payload, model, config = {}) {
 	const cfg = {
@@ -1493,8 +2240,49 @@ async function autoCompact(payload, model, config = {}) {
 	const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
 	const systemTokens = estimateSystemTokens(systemMessages);
 	consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
-	const effectiveTarget = Math.min(cfg.targetTokens, limit);
-	const preserveIndex = findPreserveIndex(remainingMessages, effectiveTarget, systemTokens);
+	const MAX_ITERATIONS = 5;
+	const MIN_TARGET = 2e4;
+	let currentTarget = Math.min(cfg.targetTokens, limit);
+	let lastResult = null;
+	for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
+		const result = await tryCompactWithTarget({
+			payload,
+			model,
+			systemMessages,
+			remainingMessages,
+			systemTokens,
+			targetTokens: currentTarget,
+			limit,
+			originalTokens
+		});
+		if (!result.wasCompacted) return result;
+		lastResult = result;
+		if (result.compactedTokens <= limit) {
+			consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
+			return result;
+		}
+		consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
+		currentTarget = Math.floor(currentTarget * .7);
+		if (currentTarget < MIN_TARGET) {
+			consola.error("Auto-compact: Cannot reduce further, target too low");
+			return result;
+		}
+	}
+	consola.error(`Auto-compact: Exhausted ${MAX_ITERATIONS} iterations, returning best effort`);
+	return lastResult ?? {
+		payload,
+		wasCompacted: false,
+		originalTokens,
+		compactedTokens: originalTokens,
+		removedMessageCount: 0
+	};
+}
+/**
+* Helper to attempt compaction with a specific target token count.
+*/
+async function tryCompactWithTarget(opts) {
+	const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
+	const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
 	if (preserveIndex === 0) {
 		consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
 		return {
@@ -1506,8 +2294,21 @@ async function autoCompact(payload, model, config = {}) {
 		};
 	}
 	const removedMessages = remainingMessages.slice(0, preserveIndex);
-	const preservedMessages = remainingMessages.slice(preserveIndex);
-	consola.info(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
+	let preservedMessages = remainingMessages.slice(preserveIndex);
+	preservedMessages = filterOrphanedToolResults(preservedMessages);
+	preservedMessages = ensureStartsWithUser(preservedMessages);
+	preservedMessages = filterOrphanedToolResults(preservedMessages);
+	if (preservedMessages.length === 0) {
+		consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
+		return {
+			payload,
+			wasCompacted: false,
+			originalTokens,
+			compactedTokens: originalTokens,
+			removedMessageCount: 0
+		};
+	}
+	consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
 	const truncationMarker = createTruncationMarker(removedMessages.length);
 	const newPayload = {
 		...payload,
@@ -1518,136 +2319,192 @@ async function autoCompact(payload, model, config = {}) {
 		]
 	};
 	const newTokenCount = await getTokenCount(newPayload, model);
-	consola.info(`Auto-compact: Reduced from ${originalTokens} to ${newTokenCount.input} tokens`);
-	if (newTokenCount.input > limit) {
-		consola.warn(`Auto-compact: Still over limit (${newTokenCount.input} > ${limit}), trying more aggressive truncation`);
-		const aggressiveTarget = Math.floor(effectiveTarget * .7);
-		if (aggressiveTarget < 2e4) {
-			consola.error("Auto-compact: Cannot reduce further, target too low");
-			return {
-				payload: newPayload,
-				wasCompacted: true,
-				originalTokens,
-				compactedTokens: newTokenCount.input,
-				removedMessageCount: removedMessages.length
-			};
-		}
-		return autoCompact(payload, model, {
-			...cfg,
-			targetTokens: aggressiveTarget
-		});
+	return {
+		payload: newPayload,
+		wasCompacted: true,
+		originalTokens,
+		compactedTokens: newTokenCount.input,
+		removedMessageCount: removedMessages.length
+	};
+}
+/**
+* Create a marker to append to responses indicating auto-compaction occurred.
+*/
+function createCompactionMarker(result) {
+	if (!result.wasCompacted) return "";
+	const reduction = result.originalTokens - result.compactedTokens;
+	const percentage = Math.round(reduction / result.originalTokens * 100);
+	return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
+}
+//#endregion
+//#region src/services/copilot/create-chat-completions.ts
+const createChatCompletions = async (payload) => {
+	if (!state.copilotToken) throw new Error("Copilot token not found");
+	const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
+	const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
+	const headers = {
+		...copilotHeaders(state, enableVision),
+		"X-Initiator": isAgentCall ? "agent" : "user"
+	};
+	const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
+		method: "POST",
+		headers,
+		body: JSON.stringify(payload)
+	});
+	if (!response.ok) {
+		consola.error("Failed to create chat completions", response);
+		throw await HTTPError.fromResponse("Failed to create chat completions", response);
+	}
+	if (payload.stream) return events(response);
+	return await response.json();
+};
+//#endregion
+//#region src/routes/shared.ts
+/** Helper to update tracker model */
+function updateTrackerModel(trackingId, model) {
+	if (!trackingId) return;
+	const request = requestTracker.getRequest(trackingId);
+	if (request) request.model = model;
+}
+/** Helper to update tracker status */
+function updateTrackerStatus(trackingId, status) {
+	if (!trackingId) return;
+	requestTracker.updateRequest(trackingId, { status });
+}
+/** Record error response to history */
+function recordErrorResponse(ctx, model, error) {
+	recordResponse(ctx.historyId, {
+		success: false,
+		model,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: error instanceof Error ? error.message : "Unknown error",
+		content: null
+	}, Date.now() - ctx.startTime);
+}
+/** Complete TUI tracking */
+function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
+	if (!trackingId) return;
+	requestTracker.updateRequest(trackingId, {
+		inputTokens,
+		outputTokens,
+		queueWaitMs
+	});
+	requestTracker.completeRequest(trackingId, 200, {
+		inputTokens,
+		outputTokens
+	});
+}
+/** Fail TUI tracking */
+function failTracking(trackingId, error) {
+	if (!trackingId) return;
+	requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
+}
+/** Record streaming error to history (works with any accumulator type) */
+function recordStreamError(opts) {
+	const { acc, fallbackModel, ctx, error } = opts;
+	recordResponse(ctx.historyId, {
+		success: false,
+		model: acc.model || fallbackModel,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: error instanceof Error ? error.message : "Stream error",
+		content: null
+	}, Date.now() - ctx.startTime);
+}
+/** Type guard for non-streaming responses */
+function isNonStreaming(response) {
+	return Object.hasOwn(response, "choices");
+}
+/** Build final payload with auto-compact if needed */
+async function buildFinalPayload(payload, model) {
+	if (!state.autoCompact || !model) {
+		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
+		return {
+			finalPayload: payload,
+			compactResult: null
+		};
+	}
+	try {
+		const check = await checkNeedsCompaction(payload, model);
+		consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
+		if (!check.needed) return {
+			finalPayload: payload,
+			compactResult: null
+		};
+		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
+		const compactResult = await autoCompact(payload, model);
+		return {
+			finalPayload: compactResult.payload,
+			compactResult
+		};
+	} catch (error) {
+		consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
+		return {
+			finalPayload: payload,
+			compactResult: null
+		};
 	}
-	return {
-		payload: newPayload,
-		wasCompacted: true,
-		originalTokens,
-		compactedTokens: newTokenCount.input,
-		removedMessageCount: removedMessages.length
-	};
 }
 /**
-* Create a marker to append to responses indicating auto-compaction occurred.
+* Log helpful debugging information when a 413 error occurs.
 */
-function createCompactionMarker(result) {
-	if (!result.wasCompacted) return "";
-	const reduction = result.originalTokens - result.compactedTokens;
-	const percentage = Math.round(reduction / result.originalTokens * 100);
-	return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
-}
-//#endregion
-//#region src/lib/queue.ts
-var RequestQueue = class {
-	queue = [];
-	processing = false;
-	lastRequestTime = 0;
-	async enqueue(execute, rateLimitSeconds) {
-		return new Promise((resolve, reject) => {
-			this.queue.push({
-				execute,
-				resolve,
-				reject
-			});
-			if (this.queue.length > 1) {
-				const position = this.queue.length;
-				const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
-				(waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
-			}
-			this.processQueue(rateLimitSeconds);
-		});
-	}
-	async processQueue(rateLimitSeconds) {
-		if (this.processing) return;
-		this.processing = true;
-		while (this.queue.length > 0) {
-			const elapsedMs = Date.now() - this.lastRequestTime;
-			const requiredMs = rateLimitSeconds * 1e3;
-			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
-				const waitMs = requiredMs - elapsedMs;
-				const waitSec = Math.ceil(waitMs / 1e3);
-				(waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
-				await new Promise((resolve) => setTimeout(resolve, waitMs));
-			}
-			const request = this.queue.shift();
-			if (!request) break;
-			this.lastRequestTime = Date.now();
-			try {
-				const result = await request.execute();
-				request.resolve(result);
-			} catch (error) {
-				request.reject(error);
+async function logPayloadSizeInfo(payload, model) {
+	const messageCount = payload.messages.length;
+	const bodySize = JSON.stringify(payload).length;
+	const bodySizeKB = Math.round(bodySize / 1024);
+	let imageCount = 0;
+	let largeMessages = 0;
+	let totalImageSize = 0;
+	for (const msg of payload.messages) {
+		if (Array.isArray(msg.content)) {
+			for (const part of msg.content) if (part.type === "image_url") {
+				imageCount++;
+				if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
 			}
 		}
-		this.processing = false;
+		if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
 	}
-	get length() {
-		return this.queue.length;
+	consola.info("");
+	consola.info("╭─────────────────────────────────────────────────────────╮");
+	consola.info("│           413 Request Entity Too Large                  │");
+	consola.info("╰─────────────────────────────────────────────────────────╯");
+	consola.info("");
+	consola.info(`  Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
+	consola.info(`  Message count: ${messageCount}`);
+	if (model) try {
+		const tokenCount = await getTokenCount(payload, model);
+		const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
+		consola.info(`  Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
+	} catch {}
+	if (imageCount > 0) {
+		const imageSizeKB = Math.round(totalImageSize / 1024);
+		consola.info(`  Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
 	}
-};
-const requestQueue = new RequestQueue();
-/**
-* Execute a request with rate limiting via queue.
-* Requests are queued and processed sequentially at the configured rate.
-*/
-async function executeWithRateLimit(state$1, execute) {
-	if (state$1.rateLimitSeconds === void 0) return execute();
-	return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
+	if (largeMessages > 0) consola.info(`  Large messages (>50KB): ${largeMessages}`);
+	consola.info("");
+	consola.info("  Suggestions:");
+	if (!state.autoCompact) consola.info("    • Enable --auto-compact to automatically truncate history");
+	if (imageCount > 0) consola.info("    • Remove or resize large images in the conversation");
+	consola.info("    • Start a new conversation with /clear or /reset");
+	consola.info("    • Reduce conversation history by deleting old messages");
+	consola.info("");
 }
-//#endregion
-//#region src/services/copilot/create-chat-completions.ts
-const createChatCompletions = async (payload) => {
-	if (!state.copilotToken) throw new Error("Copilot token not found");
-	const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
-	const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
-	const headers = {
-		...copilotHeaders(state, enableVision),
-		"X-Initiator": isAgentCall ? "agent" : "user"
-	};
-	const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
-		method: "POST",
-		headers,
-		body: JSON.stringify(payload)
-	});
-	if (!response.ok) {
-		consola.error("Failed to create chat completions", response);
-		throw await HTTPError.fromResponse("Failed to create chat completions", response);
-	}
-	if (payload.stream) return events(response);
-	return await response.json();
-};
 //#endregion
 //#region src/routes/chat-completions/handler.ts
-function getModelMaxOutputTokens(model) {
-	return model?.capabilities?.limits?.max_output_tokens;
-}
 async function handleCompletion$1(c) {
 	const originalPayload = await c.req.json();
 	consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
 	const trackingId = c.get("trackingId");
 	const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
-	updateTrackerModel$1(trackingId, originalPayload.model);
+	updateTrackerModel(trackingId, originalPayload.model);
 	const ctx = {
 		historyId: recordRequest("openai", {
 			model: originalPayload.model,
@@ -1665,19 +2522,33 @@ async function handleCompletion$1(c) {
 	};
 	const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
 	await logTokenCount(originalPayload, selectedModel);
-	const { finalPayload, compactResult } = await buildFinalPayload$1(originalPayload, selectedModel);
+	const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
 	if (compactResult) ctx.compactResult = compactResult;
 	const payload = isNullish(finalPayload.max_tokens) ? {
 		...finalPayload,
-		max_tokens: getModelMaxOutputTokens(selectedModel)
+		max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
 	} : finalPayload;
 	if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
 	if (state.manualApprove) await awaitApproval();
+	return executeRequest({
+		c,
+		payload,
+		selectedModel,
+		ctx,
+		trackingId
+	});
+}
+/**
+* Execute the API call with enhanced error handling for 413 errors.
+*/
+async function executeRequest(opts) {
+	const { c, payload, selectedModel, ctx, trackingId } = opts;
 	try {
-		const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
-		if (isNonStreaming$1(response)) return handleNonStreamingResponse$1(c, response, ctx);
+		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
+		ctx.queueWaitMs = queueWaitMs;
+		if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
 		consola.debug("Streaming response");
-		updateTrackerStatus$1(trackingId, "streaming");
+		updateTrackerStatus(trackingId, "streaming");
 		return streamSSE(c, async (stream) => {
 			await handleStreamingResponse$1({
 				stream,
@@ -1687,39 +2558,11 @@ async function handleCompletion$1(c) {
 			});
 		});
 	} catch (error) {
-		recordErrorResponse$1(ctx, payload.model, error);
+		if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(payload, selectedModel);
+		recordErrorResponse(ctx, payload.model, error);
 		throw error;
 	}
 }
-async function buildFinalPayload$1(payload, model) {
-	if (!state.autoCompact || !model) {
-		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
-		return {
-			finalPayload: payload,
-			compactResult: null
-		};
-	}
-	try {
-		const check = await checkNeedsCompaction(payload, model);
-		consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
-		if (!check.needed) return {
-			finalPayload: payload,
-			compactResult: null
-		};
-		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
-		const compactResult = await autoCompact(payload, model);
-		return {
-			finalPayload: compactResult.payload,
-			compactResult
-		};
-	} catch (error) {
-		consola.warn("Auto-compact failed, proceeding with original payload:", error);
-		return {
-			finalPayload: payload,
-			compactResult: null
-		};
-	}
-}
 async function logTokenCount(payload, selectedModel) {
 	try {
 		if (selectedModel) {
@@ -1730,27 +2573,6 @@ async function logTokenCount(payload, selectedModel) {
 		consola.debug("Failed to calculate token count:", error);
 	}
 }
-function updateTrackerModel$1(trackingId, model) {
-	if (!trackingId) return;
-	const request = requestTracker.getRequest(trackingId);
-	if (request) request.model = model;
-}
-function updateTrackerStatus$1(trackingId, status) {
-	if (!trackingId) return;
-	requestTracker.updateRequest(trackingId, { status });
-}
-function recordErrorResponse$1(ctx, model, error) {
-	recordResponse(ctx.historyId, {
-		success: false,
-		model,
-		usage: {
-			input_tokens: 0,
-			output_tokens: 0
-		},
-		error: error instanceof Error ? error.message : "Unknown error",
-		content: null
-	}, Date.now() - ctx.startTime);
-}
 function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 	consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
 	let response = originalResponse;
@@ -1782,7 +2604,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 	}, Date.now() - ctx.startTime);
 	if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
 		inputTokens: usage.prompt_tokens,
-		outputTokens: usage.completion_tokens
+		outputTokens: usage.completion_tokens,
+		queueWaitMs: ctx.queueWaitMs
 	});
 	return c.json(response);
 }
@@ -1848,7 +2671,7 @@ async function handleStreamingResponse$1(opts) {
 			acc.content += marker;
 		}
 		recordStreamSuccess(acc, payload.model, ctx);
-		completeTracking$1(ctx.trackingId, acc.inputTokens, acc.outputTokens);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
 	} catch (error) {
 		recordStreamError({
 			acc,
@@ -1856,7 +2679,7 @@ async function handleStreamingResponse$1(opts) {
 			ctx,
 			error
 		});
-		failTracking$1(ctx.trackingId, error);
+		failTracking(ctx.trackingId, error);
 		throw error;
 	}
 }
@@ -1864,42 +2687,31 @@ function parseStreamChunk(chunk, acc) {
 	if (!chunk.data || chunk.data === "[DONE]") return;
 	try {
 		const parsed = JSON.parse(chunk.data);
-		accumulateModel(parsed, acc);
-		accumulateUsage(parsed, acc);
-		accumulateChoice(parsed.choices[0], acc);
-	} catch {}
-}
-function accumulateModel(parsed, acc) {
-	if (parsed.model && !acc.model) acc.model = parsed.model;
-}
-function accumulateUsage(parsed, acc) {
-	if (parsed.usage) {
-		acc.inputTokens = parsed.usage.prompt_tokens;
-		acc.outputTokens = parsed.usage.completion_tokens;
-	}
-}
-function accumulateChoice(choice, acc) {
-	if (!choice) return;
-	if (choice.delta.content) acc.content += choice.delta.content;
-	if (choice.delta.tool_calls) accumulateToolCalls(choice.delta.tool_calls, acc);
-	if (choice.finish_reason) acc.finishReason = choice.finish_reason;
-}
-function accumulateToolCalls(toolCalls, acc) {
-	if (!toolCalls) return;
-	for (const tc of toolCalls) {
-		const idx = tc.index;
-		if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
-			id: tc.id ?? "",
-			name: tc.function?.name ?? "",
-			arguments: ""
-		});
-		const item = acc.toolCallMap.get(idx);
-		if (item) {
-			if (tc.id) item.id = tc.id;
-			if (tc.function?.name) item.name = tc.function.name;
-			if (tc.function?.arguments) item.arguments += tc.function.arguments;
+		if (parsed.model && !acc.model) acc.model = parsed.model;
+		if (parsed.usage) {
+			acc.inputTokens = parsed.usage.prompt_tokens;
+			acc.outputTokens = parsed.usage.completion_tokens;
 		}
-	}
+		const choice = parsed.choices[0];
+		if (choice) {
+			if (choice.delta.content) acc.content += choice.delta.content;
+			if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
+				const idx = tc.index;
+				if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
+					id: tc.id ?? "",
+					name: tc.function?.name ?? "",
+					arguments: ""
+				});
+				const item = acc.toolCallMap.get(idx);
+				if (item) {
+					if (tc.id) item.id = tc.id;
+					if (tc.function?.name) item.name = tc.function.name;
+					if (tc.function?.arguments) item.arguments += tc.function.arguments;
+				}
+			}
+			if (choice.finish_reason) acc.finishReason = choice.finish_reason;
+		}
+	} catch {}
 }
 function recordStreamSuccess(acc, fallbackModel, ctx) {
 	for (const tc of acc.toolCallMap.values()) if (tc.id && tc.name) acc.toolCalls.push(tc);
@@ -1931,35 +2743,6 @@ function recordStreamSuccess(acc, fallbackModel, ctx) {
 		})) : void 0
 	}, Date.now() - ctx.startTime);
 }
-function recordStreamError(opts) {
-	const { acc, fallbackModel, ctx, error } = opts;
-	recordResponse(ctx.historyId, {
-		success: false,
-		model: acc.model || fallbackModel,
-		usage: {
-			input_tokens: 0,
-			output_tokens: 0
-		},
-		error: error instanceof Error ? error.message : "Stream error",
-		content: null
-	}, Date.now() - ctx.startTime);
-}
-function completeTracking$1(trackingId, inputTokens, outputTokens) {
-	if (!trackingId) return;
-	requestTracker.updateRequest(trackingId, {
-		inputTokens,
-		outputTokens
-	});
-	requestTracker.completeRequest(trackingId, 200, {
-		inputTokens,
-		outputTokens
-	});
-}
-function failTracking$1(trackingId, error) {
-	if (!trackingId) return;
-	requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
-}
-const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
 function convertOpenAIMessages(messages) {
 	return messages.map((msg) => {
 		const result = {
@@ -1987,7 +2770,7 @@ completionRoutes.post("/", async (c) => {
 	try {
 		return await handleCompletion$1(c);
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
@@ -2013,7 +2796,7 @@ embeddingRoutes.post("/", async (c) => {
 		const response = await createEmbeddings(payload);
 		return c.json(response);
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
@@ -3160,6 +3943,15 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
 //#endregion
 //#region src/routes/messages/non-stream-translation.ts
 const OPENAI_TOOL_NAME_LIMIT = 64;
+/**
+* Ensure all tool_use blocks have corresponding tool_result responses.
+* This handles edge cases where conversation history may be incomplete:
+* - Session interruptions where tool execution was cut off
+* - Previous request failures
+* - Client sending truncated history
+*
+* Adding placeholder responses prevents API errors and maintains protocol compliance.
+*/
 function fixMessageSequence(messages) {
 	const fixedMessages = [];
 	for (let i = 0; i < messages.length; i++) {
@@ -3318,7 +4110,7 @@ function getTruncatedToolName(originalName, toolNameMapping) {
 	for (let i = 0; i < originalName.length; i++) {
 		const char = originalName.codePointAt(i) ?? 0;
 		hash = (hash << 5) - hash + char;
-		hash = hash & hash;
+		hash = Math.trunc(hash);
 	}
 	const hashSuffix = Math.abs(hash).toString(36).slice(0, 8);
 	const truncatedName = originalName.slice(0, OPENAI_TOOL_NAME_LIMIT - 9) + "_" + hashSuffix;
@@ -3636,7 +4428,8 @@ async function handleCompletion(c) {
 	if (compactResult) ctx.compactResult = compactResult;
 	if (state.manualApprove) await awaitApproval();
 	try {
-		const response = await executeWithRateLimit(state, () => createChatCompletions(openAIPayload));
+		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
+		ctx.queueWaitMs = queueWaitMs;
 		if (isNonStreaming(response)) return handleNonStreamingResponse({
 			c,
 			response,
@@ -3655,60 +4448,11 @@ async function handleCompletion(c) {
 			});
 		});
 	} catch (error) {
+		if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(openAIPayload, selectedModel);
 		recordErrorResponse(ctx, anthropicPayload.model, error);
 		throw error;
 	}
 }
-function updateTrackerModel(trackingId, model) {
-	if (!trackingId) return;
-	const request = requestTracker.getRequest(trackingId);
-	if (request) request.model = model;
-}
-async function buildFinalPayload(payload, model) {
-	if (!state.autoCompact || !model) {
-		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
-		return {
-			finalPayload: payload,
-			compactResult: null
-		};
-	}
-	try {
-		const check = await checkNeedsCompaction(payload, model);
-		consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
-		if (!check.needed) return {
-			finalPayload: payload,
-			compactResult: null
-		};
-		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
-		const compactResult = await autoCompact(payload, model);
-		return {
-			finalPayload: compactResult.payload,
-			compactResult
-		};
-	} catch (error) {
-		consola.warn("Auto-compact failed, proceeding with original payload:", error);
-		return {
-			finalPayload: payload,
-			compactResult: null
-		};
-	}
-}
-function updateTrackerStatus(trackingId, status) {
-	if (!trackingId) return;
-	requestTracker.updateRequest(trackingId, { status });
-}
-function recordErrorResponse(ctx, model, error) {
-	recordResponse(ctx.historyId, {
-		success: false,
-		model,
-		usage: {
-			input_tokens: 0,
-			output_tokens: 0
-		},
-		error: error instanceof Error ? error.message : "Unknown error",
-		content: null
-	}, Date.now() - ctx.startTime);
-}
 function handleNonStreamingResponse(opts) {
 	const { c, response, toolNameMapping, ctx } = opts;
 	consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
@@ -3743,7 +4487,8 @@ function handleNonStreamingResponse(opts) {
 	}, Date.now() - ctx.startTime);
 	if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
 		inputTokens: anthropicResponse.usage.input_tokens,
-		outputTokens: anthropicResponse.usage.output_tokens
+		outputTokens: anthropicResponse.usage.output_tokens,
+		queueWaitMs: ctx.queueWaitMs
 	});
 	return c.json(anthropicResponse);
 }
@@ -3799,10 +4544,10 @@ async function handleStreamingResponse(opts) {
 			acc.content += marker;
 		}
 		recordStreamingResponse(acc, anthropicPayload.model, ctx);
-		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
 	} catch (error) {
 		consola.error("Stream error:", error);
-		recordStreamingError({
+		recordStreamError({
 			acc,
 			fallbackModel: anthropicPayload.model,
 			ctx,
@@ -3942,34 +4687,6 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
 		toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
 	}, Date.now() - ctx.startTime);
 }
-function recordStreamingError(opts) {
-	const { acc, fallbackModel, ctx, error } = opts;
-	recordResponse(ctx.historyId, {
-		success: false,
-		model: acc.model || fallbackModel,
-		usage: {
-			input_tokens: 0,
-			output_tokens: 0
-		},
-		error: error instanceof Error ? error.message : "Stream error",
-		content: null
-	}, Date.now() - ctx.startTime);
-}
-function completeTracking(trackingId, inputTokens, outputTokens) {
-	if (!trackingId) return;
-	requestTracker.updateRequest(trackingId, {
-		inputTokens,
-		outputTokens
-	});
-	requestTracker.completeRequest(trackingId, 200, {
-		inputTokens,
-		outputTokens
-	});
-}
-function failTracking(trackingId, error) {
-	if (!trackingId) return;
-	requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
-}
 function convertAnthropicMessages(messages) {
 	return messages.map((msg) => {
 		if (typeof msg.content === "string") return {
@@ -4017,7 +4734,6 @@ function extractToolCallsFromContent(content) {
 	});
 	return tools.length > 0 ? tools : void 0;
 }
-const isNonStreaming = (response) => Object.hasOwn(response, "choices");
 //#endregion
 //#region src/routes/messages/route.ts
@@ -4026,14 +4742,14 @@ messageRoutes.post("/", async (c) => {
 	try {
 		return await handleCompletion(c);
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
 messageRoutes.post("/count_tokens", async (c) => {
 	try {
 		return await handleCountTokens(c);
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
@@ -4072,18 +4788,18 @@ modelRoutes.get("/", async (c) => {
 			has_more: false
 		});
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
 //#endregion
 //#region src/routes/token/route.ts
 const tokenRoute = new Hono();
-tokenRoute.get("/", async (c) => {
+tokenRoute.get("/", (c) => {
 	try {
 		return c.json({ token: state.copilotToken });
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
@@ -4095,7 +4811,7 @@ usageRoute.get("/", async (c) => {
 		const usage = await getCopilotUsage();
 		return c.json(usage);
 	} catch (error) {
-		return await forwardError(c, error);
+		return forwardError(c, error);
 	}
 });
@@ -4147,10 +4863,15 @@ async function runServer(options) {
 	state.accountType = options.accountType;
 	if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
 	state.manualApprove = options.manual;
-	state.rateLimitSeconds = options.rateLimit;
-	state.rateLimitWait = options.rateLimitWait;
 	state.showToken = options.showToken;
 	state.autoCompact = options.autoCompact;
+	if (options.rateLimit) initAdaptiveRateLimiter({
+		baseRetryIntervalSeconds: options.retryInterval,
+		requestIntervalSeconds: options.requestInterval,
+		recoveryTimeoutMinutes: options.recoveryTimeout,
+		consecutiveSuccessesForRecovery: options.consecutiveSuccesses
+	});
+	else consola.info("Rate limiting disabled");
 	if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
 	initHistory(options.history, options.historyLimit);
 	if (options.history) {
@@ -4237,16 +4958,30 @@ const start = defineCommand({
 			default: false,
 			description: "Enable manual request approval"
 		},
-		"rate-limit": {
-			alias: "r",
-			type: "string",
-			description: "Rate limit in seconds between requests"
-		},
-		wait: {
-			alias: "w",
+		"no-rate-limit": {
 			type: "boolean",
 			default: false,
-			description: "Wait instead of error when rate limit is hit. Has no effect if rate limit is not set"
+			description: "Disable adaptive rate limiting"
+		},
+		"retry-interval": {
+			type: "string",
+			default: "10",
+			description: "Seconds to wait before retrying after rate limit error (default: 10)"
+		},
+		"request-interval": {
+			type: "string",
+			default: "10",
+			description: "Seconds between requests in rate-limited mode (default: 10)"
+		},
+		"recovery-timeout": {
+			type: "string",
+			default: "10",
+			description: "Minutes before attempting to recover from rate-limited mode (default: 10)"
+		},
+		"consecutive-successes": {
+			type: "string",
+			default: "5",
+			description: "Number of consecutive successes needed to recover from rate-limited mode (default: 5)"
 		},
 		"github-token": {
 			alias: "g",
@@ -4269,10 +5004,10 @@ const start = defineCommand({
 			default: false,
 			description: "Initialize proxy from environment variables"
 		},
-		history: {
+		"no-history": {
 			type: "boolean",
 			default: false,
-			description: "Enable request history recording and Web UI at /history"
+			description: "Disable request history recording and Web UI"
 		},
 		"history-limit": {
 			type: "string",
@@ -4286,21 +5021,22 @@ const start = defineCommand({
 		}
 	},
 	run({ args }) {
-		const rateLimitRaw = args["rate-limit"];
-		const rateLimit = rateLimitRaw === void 0 ? void 0 : Number.parseInt(rateLimitRaw, 10);
 		return runServer({
 			port: Number.parseInt(args.port, 10),
 			host: args.host,
 			verbose: args.verbose,
 			accountType: args["account-type"],
 			manual: args.manual,
-			rateLimit,
-			rateLimitWait: args.wait,
+			rateLimit: !args["no-rate-limit"],
+			retryInterval: Number.parseInt(args["retry-interval"], 10),
+			requestInterval: Number.parseInt(args["request-interval"], 10),
+			recoveryTimeout: Number.parseInt(args["recovery-timeout"], 10),
+			consecutiveSuccesses: Number.parseInt(args["consecutive-successes"], 10),
 			githubToken: args["github-token"],
 			claudeCode: args["claude-code"],
 			showToken: args["show-token"],
 			proxyEnv: args["proxy-env"],
-			history: args.history,
+			history: !args["no-history"],
 			historyLimit: Number.parseInt(args["history-limit"], 10),
 			autoCompact: args["auto-compact"]
 		});
@@ -4320,7 +5056,8 @@ const main = defineCommand({
 		logout,
 		start,
 		"check-usage": checkUsage,
-		debug
+		debug,
+		"patch-claude": patchClaude
 	}
 });
 await runMain(main);