npm - @hsupu/copilot-api - Versions diffs - 0.7.17-beta.0 → 0.7.17 - Mend

@hsupu/copilot-api 0.7.17-beta.0 → 0.7.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/main.mjs CHANGED Viewed

@@ -17,7 +17,7 @@ import { trimTrailingSlash } from "hono/trailing-slash";
 import { streamSSE } from "hono/streaming";
 import { events } from "fetch-event-stream";
-//#region src/lib/paths.ts
+//#region src/lib/config/paths.ts
 const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api");
 const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token");
 const PATHS = {
@@ -55,7 +55,7 @@ const state = {
 };
 //#endregion
-//#region src/lib/api-config.ts
+//#region src/lib/config/api.ts
 const standardHeaders = () => ({
 	"content-type": "application/json",
 	accept: "application/json"
@@ -618,6 +618,118 @@ function forwardError(c, error) {
 		type: "error"
 	} }, 500);
 }
+/**
+* Classify a raw error into a structured ApiError.
+* Used by the pipeline to route errors to appropriate RetryStrategies.
+*/
+function classifyError(error) {
+	if (error instanceof HTTPError) return classifyHTTPError(error);
+	if (error instanceof TypeError && error.message.includes("fetch")) return {
+		type: "network_error",
+		status: 0,
+		message: error.message,
+		raw: error
+	};
+	if (error instanceof Error) return {
+		type: "bad_request",
+		status: 0,
+		message: error.message,
+		raw: error
+	};
+	return {
+		type: "bad_request",
+		status: 0,
+		message: String(error),
+		raw: error
+	};
+}
+function classifyHTTPError(error) {
+	const { status, responseText, message } = error;
+	if (status === 429) return {
+		type: "rate_limited",
+		status,
+		message,
+		retryAfter: extractRetryAfterFromBody(responseText),
+		raw: error
+	};
+	if (status === 413) return {
+		type: "payload_too_large",
+		status,
+		message,
+		raw: error
+	};
+	if (status >= 500) return {
+		type: "server_error",
+		status,
+		message,
+		raw: error
+	};
+	if (status === 401 || status === 403) return {
+		type: "auth_expired",
+		status,
+		message,
+		raw: error
+	};
+	if (status === 400) {
+		const tokenLimit = tryExtractTokenLimit(responseText);
+		if (tokenLimit) return {
+			type: "token_limit",
+			status,
+			message,
+			tokenLimit: tokenLimit.limit,
+			tokenCurrent: tokenLimit.current,
+			raw: error
+		};
+		if (isRateLimitedInBody(responseText)) return {
+			type: "rate_limited",
+			status,
+			message,
+			retryAfter: extractRetryAfterFromBody(responseText),
+			raw: error
+		};
+	}
+	return {
+		type: "bad_request",
+		status,
+		message,
+		raw: error
+	};
+}
+/** Extract retry_after from JSON response body */
+function extractRetryAfterFromBody(responseText) {
+	try {
+		const parsed = JSON.parse(responseText);
+		if (parsed && typeof parsed === "object") {
+			if ("retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
+			if ("error" in parsed) {
+				const err = parsed.error;
+				if (err && typeof err === "object" && "retry_after" in err && typeof err.retry_after === "number") return err.retry_after;
+			}
+		}
+	} catch {}
+}
+/** Check if response body contains rate_limited code */
+function isRateLimitedInBody(responseText) {
+	try {
+		const parsed = JSON.parse(responseText);
+		if (parsed && typeof parsed === "object" && "error" in parsed) {
+			const err = parsed.error;
+			if (err && typeof err === "object" && "code" in err) return err.code === "rate_limited";
+		}
+	} catch {}
+	return false;
+}
+/** Try to extract token limit info from response body */
+function tryExtractTokenLimit(responseText) {
+	try {
+		const parsed = JSON.parse(responseText);
+		if (parsed && typeof parsed === "object" && "error" in parsed) {
+			const err = parsed.error;
+			if (err && typeof err === "object" && "message" in err && typeof err.message === "string") return parseTokenLimitError(err.message);
+		}
+	} catch {}
+	return null;
+}
 //#endregion
 //#region src/services/github/get-copilot-token.ts
@@ -1444,7 +1556,7 @@ const debug = defineCommand({
 });
 //#endregion
-//#region src/lib/history-ws.ts
+//#region src/lib/history/ws.ts
 const clients = /* @__PURE__ */ new Set();
 function addClient(ws) {
 	clients.add(ws);
@@ -1496,144 +1608,432 @@ function notifyEntryUpdated(entry) {
 }
 //#endregion
-//#region src/lib/shutdown.ts
-let serverInstance = null;
-let _isShuttingDown = false;
-let shutdownResolve = null;
-/** Drain timeouts based on active request types */
-const THINKING_DRAIN_TIMEOUT_MS = 18e4;
-const NORMAL_DRAIN_TIMEOUT_MS = 6e4;
-const MIN_DRAIN_TIMEOUT_MS = 5e3;
-const DRAIN_POLL_INTERVAL_MS = 500;
-const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
-/** Check if the server is in shutdown state (used by middleware to reject new requests) */
-function getIsShuttingDown() {
-	return _isShuttingDown;
-}
-/**
-* Returns a promise that resolves when the server is shut down via signal.
-* Used by runServer() to keep the async function alive until shutdown.
-*/
-function waitForShutdown() {
-	return new Promise((resolve) => {
-		shutdownResolve = resolve;
-	});
+//#region src/lib/history/store.ts
+function formatLocalTimestamp(ts) {
+	const d = new Date(ts);
+	return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")} ${String(d.getHours()).padStart(2, "0")}:${String(d.getMinutes()).padStart(2, "0")}:${String(d.getSeconds()).padStart(2, "0")}`;
 }
-/** Store the server instance for shutdown */
-function setServerInstance(server) {
-	serverInstance = server;
+const historyState = {
+	enabled: false,
+	entries: [],
+	sessions: /* @__PURE__ */ new Map(),
+	currentSessionId: "",
+	maxEntries: 200
+};
+function initHistory(enabled, maxEntries) {
+	historyState.enabled = enabled;
+	historyState.maxEntries = maxEntries;
+	historyState.entries = [];
+	historyState.sessions = /* @__PURE__ */ new Map();
+	historyState.currentSessionId = enabled ? generateId() : "";
 }
-/**
-* Compute drain timeout based on currently active requests.
-* Thinking requests get more time because they can take 120s+.
-*/
-function computeDrainTimeout() {
-	const active = requestTracker.getActiveRequests();
-	if (active.length === 0) return MIN_DRAIN_TIMEOUT_MS;
-	return active.some((r) => r.tags?.some((t) => t.startsWith("thinking:"))) ? THINKING_DRAIN_TIMEOUT_MS : NORMAL_DRAIN_TIMEOUT_MS;
+function isHistoryEnabled() {
+	return historyState.enabled;
 }
-/** Log a summary of active requests during drain */
-function logActiveRequestsSummary(requests) {
+function getCurrentSession(endpoint) {
+	if (historyState.currentSessionId) {
+		const session = historyState.sessions.get(historyState.currentSessionId);
+		if (session) {
+			session.lastActivity = Date.now();
+			return historyState.currentSessionId;
+		}
+	}
 	const now = Date.now();
-	const lines = requests.map((req) => {
-		const age = Math.round((now - req.startTime) / 1e3);
-		const model = req.model || "unknown";
-		const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
-		return `  ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
+	const sessionId = generateId();
+	historyState.currentSessionId = sessionId;
+	historyState.sessions.set(sessionId, {
+		id: sessionId,
+		startTime: now,
+		lastActivity: now,
+		requestCount: 0,
+		totalInputTokens: 0,
+		totalOutputTokens: 0,
+		models: [],
+		endpoint
 	});
-	consola.info(`Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`);
+	return sessionId;
 }
-/**
-* Wait for all active requests to complete, with periodic progress logging.
-* Returns "drained" when all requests finish, "timeout" if deadline is reached.
-*/
-async function drainActiveRequests(timeoutMs) {
-	const deadline = Date.now() + timeoutMs;
-	let lastProgressLog = 0;
-	while (Date.now() < deadline) {
-		const active = requestTracker.getActiveRequests();
-		if (active.length === 0) return "drained";
-		const now = Date.now();
-		if (now - lastProgressLog >= DRAIN_PROGRESS_INTERVAL_MS) {
-			lastProgressLog = now;
-			logActiveRequestsSummary(active);
+function recordRequest(endpoint, request) {
+	if (!historyState.enabled) return "";
+	const sessionId = getCurrentSession(endpoint);
+	const session = historyState.sessions.get(sessionId);
+	if (!session) return "";
+	const entry = {
+		id: generateId(),
+		sessionId,
+		timestamp: Date.now(),
+		endpoint,
+		request: {
+			model: request.model,
+			messages: request.messages,
+			stream: request.stream,
+			tools: request.tools,
+			max_tokens: request.max_tokens,
+			temperature: request.temperature,
+			system: request.system
 		}
-		await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_INTERVAL_MS));
-	}
-	return "timeout";
-}
-/** Perform graceful shutdown */
-async function gracefulShutdown(signal) {
-	_isShuttingDown = true;
-	consola.info(`Received ${signal}, shutting down gracefully...`);
-	stopTokenRefresh();
-	const wsClients = getClientCount();
-	if (wsClients > 0) {
-		closeAllClients();
-		consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
+	};
+	historyState.entries.push(entry);
+	session.requestCount++;
+	if (!session.models.includes(request.model)) session.models.push(request.model);
+	if (request.tools && request.tools.length > 0) {
+		if (!session.toolsUsed) session.toolsUsed = [];
+		for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
 	}
-	if (serverInstance) {
-		const activeCount = requestTracker.getActiveRequests().length;
-		const drainTimeout = computeDrainTimeout();
-		if (activeCount > 0) {
-			consola.info(`Draining ${activeCount} active request(s), timeout ${drainTimeout / 1e3}s`);
-			if (await drainActiveRequests(drainTimeout) === "timeout") {
-				const remaining = requestTracker.getActiveRequests();
-				consola.warn(`Drain timeout, force-closing ${remaining.length} remaining request(s)`);
-			} else consola.info("All requests completed");
-		}
-		try {
-			await serverInstance.close(true);
-		} catch (error) {
-			consola.error("Error closing server:", error);
+	while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
+		const removed = historyState.entries.shift();
+		if (removed) {
+			if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
 		}
 	}
-	consola.info("Shutdown complete");
-	shutdownResolve?.();
+	notifyEntryAdded(entry);
+	return entry.id;
 }
-/** Setup process signal handlers for graceful shutdown */
-function setupShutdownHandlers() {
-	const handler = (signal) => {
-		if (_isShuttingDown) {
-			consola.warn("Second signal received, forcing immediate exit");
-			process.exit(1);
+function recordResponse(id, response, durationMs) {
+	if (!historyState.enabled || !id) return;
+	const entry = historyState.entries.find((e) => e.id === id);
+	if (entry) {
+		entry.response = response;
+		entry.durationMs = durationMs;
+		const session = historyState.sessions.get(entry.sessionId);
+		if (session) {
+			session.totalInputTokens += response.usage.input_tokens;
+			session.totalOutputTokens += response.usage.output_tokens;
+			session.lastActivity = Date.now();
 		}
-		gracefulShutdown(signal);
-	};
-	process.on("SIGINT", () => handler("SIGINT"));
-	process.on("SIGTERM", () => handler("SIGTERM"));
+		notifyEntryUpdated(entry);
+	}
 }
-//#endregion
-//#region src/lib/tui/tracker.ts
-var RequestTracker = class {
-	requests = /* @__PURE__ */ new Map();
-	renderer = null;
-	completedQueue = [];
-	completedTimeouts = /* @__PURE__ */ new Map();
-	historySize = 5;
-	completedDisplayMs = 2e3;
-	setRenderer(renderer) {
-		this.renderer = renderer;
+function recordRewrites(id, rewrites) {
+	if (!historyState.enabled || !id) return;
+	const entry = historyState.entries.find((e) => e.id === id);
+	if (entry) {
+		entry.rewrites = rewrites;
+		if (rewrites.truncation) entry.truncation = rewrites.truncation;
+		notifyEntryUpdated(entry);
 	}
-	setOptions(options) {
-		if (options.historySize !== void 0) this.historySize = options.historySize;
-		if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
+}
+function getHistory(options = {}) {
+	const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
+	let filtered = [...historyState.entries];
+	if (sessionId) filtered = filtered.filter((e) => e.sessionId === sessionId);
+	if (model) {
+		const modelLower = model.toLowerCase();
+		filtered = filtered.filter((e) => e.request.model.toLowerCase().includes(modelLower) || e.response?.model.toLowerCase().includes(modelLower));
 	}
-	/**
-	* Start tracking a new request
-	* Returns the tracking ID
-	*/
-	startRequest(options) {
-		const id = generateId();
-		const request = {
-			id,
-			method: options.method,
-			path: options.path,
-			model: options.model,
-			startTime: Date.now(),
-			status: "executing",
-			isHistoryAccess: options.isHistoryAccess
+	if (endpoint) filtered = filtered.filter((e) => e.endpoint === endpoint);
+	if (success !== void 0) filtered = filtered.filter((e) => e.response?.success === success);
+	if (from) filtered = filtered.filter((e) => e.timestamp >= from);
+	if (to) filtered = filtered.filter((e) => e.timestamp <= to);
+	if (search) {
+		const searchLower = search.toLowerCase();
+		filtered = filtered.filter((e) => {
+			if (e.request.model.toLowerCase().includes(searchLower) || e.response?.model && e.response.model.toLowerCase().includes(searchLower)) return true;
+			if (e.response?.error && e.response.error.toLowerCase().includes(searchLower)) return true;
+			if (e.request.system?.toLowerCase().includes(searchLower)) return true;
+			if (e.request.messages.some((m) => {
+				if (typeof m.content === "string") return m.content.toLowerCase().includes(searchLower);
+				if (Array.isArray(m.content)) return m.content.some((c) => {
+					if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
+					if (c.type === "tool_use") {
+						const name = c.name;
+						if (name && name.toLowerCase().includes(searchLower)) return true;
+						if (c.input) {
+							if ((typeof c.input === "string" ? c.input : JSON.stringify(c.input)).toLowerCase().includes(searchLower)) return true;
+						}
+					}
+					if (c.type === "tool_result" && c.content) {
+						if ((typeof c.content === "string" ? c.content : JSON.stringify(c.content)).toLowerCase().includes(searchLower)) return true;
+					}
+					if (c.type === "thinking") {
+						const thinking = c.thinking;
+						if (thinking && thinking.toLowerCase().includes(searchLower)) return true;
+					}
+					return false;
+				});
+				return false;
+			})) return true;
+			if (e.response?.content) {
+				const rc = e.response.content;
+				if (typeof rc.content === "string" && rc.content.toLowerCase().includes(searchLower)) return true;
+				if (Array.isArray(rc.content)) {
+					if (rc.content.some((c) => {
+						if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
+						if (c.type === "tool_use" && c.name && c.name.toLowerCase().includes(searchLower)) return true;
+						if (c.type === "thinking" && c.thinking && c.thinking.toLowerCase().includes(searchLower)) return true;
+						return false;
+					})) return true;
+				}
+			}
+			if (e.response?.toolCalls?.some((t) => t.name.toLowerCase().includes(searchLower))) return true;
+			return false;
+		});
+	}
+	filtered.sort((a, b) => b.timestamp - a.timestamp);
+	const total = filtered.length;
+	const totalPages = Math.ceil(total / limit);
+	const start = (page - 1) * limit;
+	return {
+		entries: filtered.slice(start, start + limit),
+		total,
+		page,
+		limit,
+		totalPages
+	};
+}
+function getEntry(id) {
+	return historyState.entries.find((e) => e.id === id);
+}
+function getSessions() {
+	const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
+	return {
+		sessions,
+		total: sessions.length
+	};
+}
+function getSession(id) {
+	return historyState.sessions.get(id);
+}
+function getSessionEntries(sessionId) {
+	return historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
+}
+function clearHistory() {
+	historyState.entries = [];
+	historyState.sessions = /* @__PURE__ */ new Map();
+	historyState.currentSessionId = generateId();
+}
+function deleteSession(sessionId) {
+	if (!historyState.sessions.has(sessionId)) return false;
+	historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
+	historyState.sessions.delete(sessionId);
+	if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
+	return true;
+}
+function getStats() {
+	const entries = historyState.entries;
+	const modelDist = {};
+	const endpointDist = {};
+	const hourlyActivity = {};
+	let totalInput = 0;
+	let totalOutput = 0;
+	let totalDuration = 0;
+	let durationCount = 0;
+	let successCount = 0;
+	let failCount = 0;
+	for (const entry of entries) {
+		const model = entry.response?.model || entry.request.model;
+		modelDist[model] = (modelDist[model] || 0) + 1;
+		endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
+		const d = new Date(entry.timestamp);
+		const hour = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}T${String(d.getHours()).padStart(2, "0")}`;
+		hourlyActivity[hour] = (hourlyActivity[hour] || 0) + 1;
+		if (entry.response) {
+			if (entry.response.success) successCount++;
+			else failCount++;
+			totalInput += entry.response.usage.input_tokens;
+			totalOutput += entry.response.usage.output_tokens;
+		}
+		if (entry.durationMs) {
+			totalDuration += entry.durationMs;
+			durationCount++;
+		}
+	}
+	const recentActivity = Object.entries(hourlyActivity).sort(([a], [b]) => a.localeCompare(b)).slice(-24).map(([hour, count]) => ({
+		hour,
+		count
+	}));
+	return {
+		totalRequests: entries.length,
+		successfulRequests: successCount,
+		failedRequests: failCount,
+		totalInputTokens: totalInput,
+		totalOutputTokens: totalOutput,
+		averageDurationMs: durationCount > 0 ? totalDuration / durationCount : 0,
+		modelDistribution: modelDist,
+		endpointDistribution: endpointDist,
+		recentActivity,
+		activeSessions: historyState.sessions.size
+	};
+}
+function exportHistory(format = "json") {
+	if (format === "json") return JSON.stringify({
+		sessions: Array.from(historyState.sessions.values()),
+		entries: historyState.entries
+	}, null, 2);
+	const headers = [
+		"id",
+		"session_id",
+		"timestamp",
+		"endpoint",
+		"request_model",
+		"message_count",
+		"stream",
+		"success",
+		"response_model",
+		"input_tokens",
+		"output_tokens",
+		"duration_ms",
+		"stop_reason",
+		"error"
+	];
+	const rows = historyState.entries.map((e) => [
+		e.id,
+		e.sessionId,
+		formatLocalTimestamp(e.timestamp),
+		e.endpoint,
+		e.request.model,
+		e.request.messages.length,
+		e.request.stream,
+		e.response?.success ?? "",
+		e.response?.model ?? "",
+		e.response?.usage.input_tokens ?? "",
+		e.response?.usage.output_tokens ?? "",
+		e.durationMs ?? "",
+		e.response?.stop_reason ?? "",
+		e.response?.error ?? ""
+	]);
+	return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
+}
+//#endregion
+//#region src/lib/shutdown.ts
+let serverInstance = null;
+let _isShuttingDown = false;
+let shutdownResolve = null;
+/** Drain timeouts based on active request types */
+const THINKING_DRAIN_TIMEOUT_MS = 18e4;
+const NORMAL_DRAIN_TIMEOUT_MS = 6e4;
+const MIN_DRAIN_TIMEOUT_MS = 5e3;
+const DRAIN_POLL_INTERVAL_MS = 500;
+const DRAIN_PROGRESS_INTERVAL_MS = 5e3;
+/** Check if the server is in shutdown state (used by middleware to reject new requests) */
+function getIsShuttingDown() {
+	return _isShuttingDown;
+}
+/**
+* Returns a promise that resolves when the server is shut down via signal.
+* Used by runServer() to keep the async function alive until shutdown.
+*/
+function waitForShutdown() {
+	return new Promise((resolve) => {
+		shutdownResolve = resolve;
+	});
+}
+/** Store the server instance for shutdown */
+function setServerInstance(server) {
+	serverInstance = server;
+}
+/**
+* Compute drain timeout based on currently active requests.
+* Thinking requests get more time because they can take 120s+.
+*/
+function computeDrainTimeout() {
+	const active = requestTracker.getActiveRequests();
+	if (active.length === 0) return MIN_DRAIN_TIMEOUT_MS;
+	return active.some((r) => r.tags?.some((t) => t.startsWith("thinking:"))) ? THINKING_DRAIN_TIMEOUT_MS : NORMAL_DRAIN_TIMEOUT_MS;
+}
+/** Log a summary of active requests during drain */
+function logActiveRequestsSummary(requests) {
+	const now = Date.now();
+	const lines = requests.map((req) => {
+		const age = Math.round((now - req.startTime) / 1e3);
+		const model = req.model || "unknown";
+		const tags = req.tags?.length ? ` [${req.tags.join(", ")}]` : "";
+		return `  ${req.method} ${req.path} ${model} (${req.status}, ${age}s)${tags}`;
+	});
+	consola.info(`Waiting for ${requests.length} active request(s):\n${lines.join("\n")}`);
+}
+/**
+* Wait for all active requests to complete, with periodic progress logging.
+* Returns "drained" when all requests finish, "timeout" if deadline is reached.
+*/
+async function drainActiveRequests(timeoutMs) {
+	const deadline = Date.now() + timeoutMs;
+	let lastProgressLog = 0;
+	while (Date.now() < deadline) {
+		const active = requestTracker.getActiveRequests();
+		if (active.length === 0) return "drained";
+		const now = Date.now();
+		if (now - lastProgressLog >= DRAIN_PROGRESS_INTERVAL_MS) {
+			lastProgressLog = now;
+			logActiveRequestsSummary(active);
+		}
+		await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_INTERVAL_MS));
+	}
+	return "timeout";
+}
+/** Perform graceful shutdown */
+async function gracefulShutdown(signal) {
+	_isShuttingDown = true;
+	consola.info(`Received ${signal}, shutting down gracefully...`);
+	stopTokenRefresh();
+	const wsClients = getClientCount();
+	if (wsClients > 0) {
+		closeAllClients();
+		consola.info(`Disconnected ${wsClients} WebSocket client(s)`);
+	}
+	if (serverInstance) {
+		const activeCount = requestTracker.getActiveRequests().length;
+		const drainTimeout = computeDrainTimeout();
+		if (activeCount > 0) {
+			consola.info(`Draining ${activeCount} active request(s), timeout ${drainTimeout / 1e3}s`);
+			if (await drainActiveRequests(drainTimeout) === "timeout") {
+				const remaining = requestTracker.getActiveRequests();
+				consola.warn(`Drain timeout, force-closing ${remaining.length} remaining request(s)`);
+			} else consola.info("All requests completed");
+		}
+		try {
+			await serverInstance.close(true);
+		} catch (error) {
+			consola.error("Error closing server:", error);
+		}
+	}
+	consola.info("Shutdown complete");
+	shutdownResolve?.();
+}
+/** Setup process signal handlers for graceful shutdown */
+function setupShutdownHandlers() {
+	const handler = (signal) => {
+		if (_isShuttingDown) {
+			consola.warn("Second signal received, forcing immediate exit");
+			process.exit(1);
+		}
+		gracefulShutdown(signal);
+	};
+	process.on("SIGINT", () => handler("SIGINT"));
+	process.on("SIGTERM", () => handler("SIGTERM"));
+}
+//#endregion
+//#region src/lib/tui/tracker.ts
+var RequestTracker = class {
+	requests = /* @__PURE__ */ new Map();
+	renderer = null;
+	completedQueue = [];
+	completedTimeouts = /* @__PURE__ */ new Map();
+	historySize = 5;
+	completedDisplayMs = 2e3;
+	setRenderer(renderer) {
+		this.renderer = renderer;
+	}
+	setOptions(options) {
+		if (options.historySize !== void 0) this.historySize = options.historySize;
+		if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
+	}
+	/**
+	* Start tracking a new request
+	* Returns the tracking ID
+	*/
+	startRequest(options) {
+		const id = generateId();
+		const request = {
+			id,
+			method: options.method,
+			path: options.path,
+			model: options.model,
+			startTime: Date.now(),
+			status: "executing",
+			isHistoryAccess: options.isHistoryAccess
 		};
 		this.requests.set(id, request);
 		this.renderer?.onRequestStart(request);
@@ -2293,7 +2693,7 @@ const setupClaudeCode = defineCommand({
 //#endregion
 //#region package.json
-var version = "0.7.17-beta.0";
+var version = "0.7.17";
 //#endregion
 //#region src/lib/adaptive-rate-limiter.ts
@@ -2324,558 +2724,270 @@ var AdaptiveRateLimiter = class {
 	lastRequestTime = 0;
 	/** Current step in gradual recovery (index into gradualRecoverySteps) */
 	recoveryStepIndex = 0;
-	constructor(config = {}) {
-		this.config = {
-			...DEFAULT_CONFIG,
-			...config
-		};
-	}
-	/**
-	* Execute a request with adaptive rate limiting.
-	* Returns a promise that resolves when the request succeeds.
-	* The request will be retried automatically on 429 errors.
-	*/
-	async execute(fn) {
-		if (this.mode === "normal") return this.executeInNormalMode(fn);
-		if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
-		return this.enqueue(fn);
-	}
-	/**
-	* Check if an error is a rate limit error (429) and extract Retry-After if available
-	*/
-	isRateLimitError(error) {
-		if (error && typeof error === "object") {
-			if ("status" in error && error.status === 429) return {
-				isRateLimit: true,
-				retryAfter: this.extractRetryAfter(error)
-			};
-			if ("responseText" in error && typeof error.responseText === "string") try {
-				const parsed = JSON.parse(error.responseText);
-				if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
-			} catch {}
-		}
-		return { isRateLimit: false };
-	}
-	/**
-	* Extract Retry-After value from error response
-	*/
-	extractRetryAfter(error) {
-		if (!error || typeof error !== "object") return void 0;
-		if ("responseText" in error && typeof error.responseText === "string") try {
-			const parsed = JSON.parse(error.responseText);
-			if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
-			if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
-		} catch {}
-	}
-	/**
-	* Execute in normal mode - full speed
-	*/
-	async executeInNormalMode(fn) {
-		try {
-			return {
-				result: await fn(),
-				queueWaitMs: 0
-			};
-		} catch (error) {
-			const { isRateLimit, retryAfter } = this.isRateLimitError(error);
-			if (isRateLimit) {
-				this.enterRateLimitedMode();
-				return this.enqueue(fn, retryAfter);
-			}
-			throw error;
-		}
-	}
-	/**
-	* Execute in recovering mode - gradual speedup
-	*/
-	async executeInRecoveringMode(fn) {
-		const startTime = Date.now();
-		const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
-		if (currentInterval > 0) {
-			const elapsedMs = Date.now() - this.lastRequestTime;
-			const requiredMs = currentInterval * 1e3;
-			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
-				const waitMs = requiredMs - elapsedMs;
-				await this.sleep(waitMs);
-			}
-		}
-		this.lastRequestTime = Date.now();
-		try {
-			const result = await fn();
-			this.recoveryStepIndex++;
-			if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
-			else {
-				const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
-				consola.info(`[RateLimiter] Ramp-up step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
-			}
-			return {
-				result,
-				queueWaitMs: Date.now() - startTime
-			};
-		} catch (error) {
-			const { isRateLimit, retryAfter } = this.isRateLimitError(error);
-			if (isRateLimit) {
-				consola.warn("[RateLimiter] Hit rate limit during ramp-up, returning to rate-limited mode");
-				this.enterRateLimitedMode();
-				return this.enqueue(fn, retryAfter);
-			}
-			throw error;
-		}
+	constructor(config = {}) {
+		this.config = {
+			...DEFAULT_CONFIG,
+			...config
+		};
 	}
 	/**
-	* Enter rate-limited mode
+	* Execute a request with adaptive rate limiting.
+	* Returns a promise that resolves when the request succeeds.
+	* The request will be retried automatically on 429 errors.
 	*/
-	enterRateLimitedMode() {
-		if (this.mode === "rate-limited") return;
-		this.mode = "rate-limited";
-		this.rateLimitedAt = Date.now();
-		this.consecutiveSuccesses = 0;
-		consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
+	async execute(fn) {
+		if (this.mode === "normal") return this.executeInNormalMode(fn);
+		if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
+		return this.enqueue(fn);
 	}
 	/**
-	* Check if we should try to recover to normal mode
+	* Check if an error is a rate limit error (429) and extract Retry-After if available
 	*/
-	shouldAttemptRecovery() {
-		if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
-			consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting ramp-up.`);
-			return true;
-		}
-		if (this.rateLimitedAt) {
-			if (Date.now() - this.rateLimitedAt >= this.config.recoveryTimeoutMinutes * 60 * 1e3) {
-				consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting ramp-up.`);
-				return true;
-			}
+	isRateLimitError(error) {
+		if (error && typeof error === "object") {
+			if ("status" in error && error.status === 429) return {
+				isRateLimit: true,
+				retryAfter: this.extractRetryAfter(error)
+			};
+			if ("responseText" in error && typeof error.responseText === "string") try {
+				const parsed = JSON.parse(error.responseText);
+				if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
+			} catch {}
 		}
-		return false;
-	}
-	/**
-	* Start gradual recovery mode
-	*/
-	startGradualRecovery() {
-		this.mode = "recovering";
-		this.recoveryStepIndex = 0;
-		this.rateLimitedAt = null;
-		this.consecutiveSuccesses = 0;
-		const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
-		consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
+		return { isRateLimit: false };
 	}
 	/**
-	* Complete recovery to normal mode
+	* Extract Retry-After value from error response
 	*/
-	completeRecovery() {
-		this.mode = "normal";
-		this.recoveryStepIndex = 0;
-		consola.success("[RateLimiter] Exiting rate-limited mode.");
+	extractRetryAfter(error) {
+		if (!error || typeof error !== "object") return void 0;
+		if ("responseText" in error && typeof error.responseText === "string") try {
+			const parsed = JSON.parse(error.responseText);
+			if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
+			if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
+		} catch {}
 	}
 	/**
-	* Enqueue a request for later execution
+	* Execute in normal mode - full speed
 	*/
-	enqueue(fn, retryAfterSeconds) {
-		return new Promise((resolve, reject) => {
-			const request = {
-				execute: fn,
-				resolve,
-				reject,
-				retryCount: 0,
-				retryAfterSeconds,
-				enqueuedAt: Date.now()
+	async executeInNormalMode(fn) {
+		try {
+			return {
+				result: await fn(),
+				queueWaitMs: 0
 			};
-			this.queue.push(request);
-			if (this.queue.length > 1) {
-				const position = this.queue.length;
-				const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
-				consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
-			}
-			this.processQueue();
-		});
-	}
-	/**
-	* Calculate retry interval with exponential backoff
-	*/
-	calculateRetryInterval(request) {
-		if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
-		const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
-		return Math.min(backoff, this.config.maxRetryIntervalSeconds);
-	}
-	/**
-	* Process the queue
-	*/
-	async processQueue() {
-		if (this.processing) return;
-		this.processing = true;
-		while (this.queue.length > 0) {
-			const request = this.queue[0];
-			if (this.shouldAttemptRecovery()) this.startGradualRecovery();
-			const elapsedMs = Date.now() - this.lastRequestTime;
-			const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
-			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
-				const waitMs = requiredMs - elapsedMs;
-				const waitSec = Math.ceil(waitMs / 1e3);
-				consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
-				await this.sleep(waitMs);
-			}
-			this.lastRequestTime = Date.now();
-			try {
-				const result = await request.execute();
-				this.queue.shift();
-				this.consecutiveSuccesses++;
-				request.retryAfterSeconds = void 0;
-				const queueWaitMs = Date.now() - request.enqueuedAt;
-				request.resolve({
-					result,
-					queueWaitMs
-				});
-				if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for ramp-up)`);
-			} catch (error) {
-				const { isRateLimit, retryAfter } = this.isRateLimitError(error);
-				if (isRateLimit) {
-					request.retryCount++;
-					request.retryAfterSeconds = retryAfter;
-					this.consecutiveSuccesses = 0;
-					this.rateLimitedAt = Date.now();
-					const nextInterval = this.calculateRetryInterval(request);
-					const source = retryAfter ? "server Retry-After" : "exponential backoff";
-					consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
-				} else {
-					this.queue.shift();
-					request.reject(error);
-				}
+		} catch (error) {
+			const { isRateLimit, retryAfter } = this.isRateLimitError(error);
+			if (isRateLimit) {
+				this.enterRateLimitedMode();
+				return this.enqueue(fn, retryAfter);
 			}
+			throw error;
 		}
-		this.processing = false;
-	}
-	sleep(ms) {
-		return new Promise((resolve) => setTimeout(resolve, ms));
-	}
-	/**
-	* Get current status for debugging/monitoring
-	*/
-	getStatus() {
-		return {
-			mode: this.mode,
-			queueLength: this.queue.length,
-			consecutiveSuccesses: this.consecutiveSuccesses,
-			rateLimitedAt: this.rateLimitedAt
-		};
-	}
-};
-let rateLimiterInstance = null;
-/**
-* Initialize the adaptive rate limiter with configuration
-*/
-function initAdaptiveRateLimiter(config = {}) {
-	rateLimiterInstance = new AdaptiveRateLimiter(config);
-	const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
-	const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
-	const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
-	const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
-	const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
-	const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
-	consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
-}
-/**
-* Execute a request with adaptive rate limiting.
-* If rate limiter is not initialized, executes immediately.
-* Returns the result along with queue wait time.
-*/
-async function executeWithAdaptiveRateLimit(fn) {
-	if (!rateLimiterInstance) return {
-		result: await fn(),
-		queueWaitMs: 0
-	};
-	return rateLimiterInstance.execute(fn);
-}
-//#endregion
-//#region src/lib/history.ts
-function formatLocalTimestamp(ts) {
-	const d = new Date(ts);
-	return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")} ${String(d.getHours()).padStart(2, "0")}:${String(d.getMinutes()).padStart(2, "0")}:${String(d.getSeconds()).padStart(2, "0")}`;
-}
-const historyState = {
-	enabled: false,
-	entries: [],
-	sessions: /* @__PURE__ */ new Map(),
-	currentSessionId: "",
-	maxEntries: 200
-};
-function initHistory(enabled, maxEntries) {
-	historyState.enabled = enabled;
-	historyState.maxEntries = maxEntries;
-	historyState.entries = [];
-	historyState.sessions = /* @__PURE__ */ new Map();
-	historyState.currentSessionId = enabled ? generateId() : "";
-}
-function isHistoryEnabled() {
-	return historyState.enabled;
-}
-function getCurrentSession(endpoint) {
-	if (historyState.currentSessionId) {
-		const session = historyState.sessions.get(historyState.currentSessionId);
-		if (session) {
-			session.lastActivity = Date.now();
-			return historyState.currentSessionId;
-		}
-	}
-	const now = Date.now();
-	const sessionId = generateId();
-	historyState.currentSessionId = sessionId;
-	historyState.sessions.set(sessionId, {
-		id: sessionId,
-		startTime: now,
-		lastActivity: now,
-		requestCount: 0,
-		totalInputTokens: 0,
-		totalOutputTokens: 0,
-		models: [],
-		endpoint
-	});
-	return sessionId;
-}
-function recordRequest(endpoint, request) {
-	if (!historyState.enabled) return "";
-	const sessionId = getCurrentSession(endpoint);
-	const session = historyState.sessions.get(sessionId);
-	if (!session) return "";
-	const entry = {
-		id: generateId(),
-		sessionId,
-		timestamp: Date.now(),
-		endpoint,
-		request: {
-			model: request.model,
-			messages: request.messages,
-			stream: request.stream,
-			tools: request.tools,
-			max_tokens: request.max_tokens,
-			temperature: request.temperature,
-			system: request.system
-		}
-	};
-	historyState.entries.push(entry);
-	session.requestCount++;
-	if (!session.models.includes(request.model)) session.models.push(request.model);
-	if (request.tools && request.tools.length > 0) {
-		if (!session.toolsUsed) session.toolsUsed = [];
-		for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
 	}
-	while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
-		const removed = historyState.entries.shift();
-		if (removed) {
-			if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
+	/**
+	* Execute in recovering mode - gradual speedup
+	*/
+	async executeInRecoveringMode(fn) {
+		const startTime = Date.now();
+		const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
+		if (currentInterval > 0) {
+			const elapsedMs = Date.now() - this.lastRequestTime;
+			const requiredMs = currentInterval * 1e3;
+			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
+				const waitMs = requiredMs - elapsedMs;
+				await this.sleep(waitMs);
+			}
+		}
+		this.lastRequestTime = Date.now();
+		try {
+			const result = await fn();
+			this.recoveryStepIndex++;
+			if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
+			else {
+				const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
+				consola.info(`[RateLimiter] Ramp-up step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
+			}
+			return {
+				result,
+				queueWaitMs: Date.now() - startTime
+			};
+		} catch (error) {
+			const { isRateLimit, retryAfter } = this.isRateLimitError(error);
+			if (isRateLimit) {
+				consola.warn("[RateLimiter] Hit rate limit during ramp-up, returning to rate-limited mode");
+				this.enterRateLimitedMode();
+				return this.enqueue(fn, retryAfter);
+			}
+			throw error;
 		}
 	}
-	notifyEntryAdded(entry);
-	return entry.id;
-}
-function recordResponse(id, response, durationMs) {
-	if (!historyState.enabled || !id) return;
-	const entry = historyState.entries.find((e) => e.id === id);
-	if (entry) {
-		entry.response = response;
-		entry.durationMs = durationMs;
-		const session = historyState.sessions.get(entry.sessionId);
-		if (session) {
-			session.totalInputTokens += response.usage.input_tokens;
-			session.totalOutputTokens += response.usage.output_tokens;
-			session.lastActivity = Date.now();
+	/**
+	* Enter rate-limited mode
+	*/
+	enterRateLimitedMode() {
+		if (this.mode === "rate-limited") return;
+		this.mode = "rate-limited";
+		this.rateLimitedAt = Date.now();
+		this.consecutiveSuccesses = 0;
+		consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
+	}
+	/**
+	* Check if we should try to recover to normal mode
+	*/
+	shouldAttemptRecovery() {
+		if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
+			consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting ramp-up.`);
+			return true;
 		}
-		notifyEntryUpdated(entry);
+		if (this.rateLimitedAt) {
+			if (Date.now() - this.rateLimitedAt >= this.config.recoveryTimeoutMinutes * 60 * 1e3) {
+				consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting ramp-up.`);
+				return true;
+			}
+		}
+		return false;
 	}
-}
-function recordRewrites(id, rewrites) {
-	if (!historyState.enabled || !id) return;
-	const entry = historyState.entries.find((e) => e.id === id);
-	if (entry) {
-		entry.rewrites = rewrites;
-		if (rewrites.truncation) entry.truncation = rewrites.truncation;
-		notifyEntryUpdated(entry);
+	/**
+	* Start gradual recovery mode
+	*/
+	startGradualRecovery() {
+		this.mode = "recovering";
+		this.recoveryStepIndex = 0;
+		this.rateLimitedAt = null;
+		this.consecutiveSuccesses = 0;
+		const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
+		consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
 	}
-}
-function getHistory(options = {}) {
-	const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
-	let filtered = [...historyState.entries];
-	if (sessionId) filtered = filtered.filter((e) => e.sessionId === sessionId);
-	if (model) {
-		const modelLower = model.toLowerCase();
-		filtered = filtered.filter((e) => e.request.model.toLowerCase().includes(modelLower) || e.response?.model.toLowerCase().includes(modelLower));
+	/**
+	* Complete recovery to normal mode
+	*/
+	completeRecovery() {
+		this.mode = "normal";
+		this.recoveryStepIndex = 0;
+		consola.success("[RateLimiter] Exiting rate-limited mode.");
 	}
-	if (endpoint) filtered = filtered.filter((e) => e.endpoint === endpoint);
-	if (success !== void 0) filtered = filtered.filter((e) => e.response?.success === success);
-	if (from) filtered = filtered.filter((e) => e.timestamp >= from);
-	if (to) filtered = filtered.filter((e) => e.timestamp <= to);
-	if (search) {
-		const searchLower = search.toLowerCase();
-		filtered = filtered.filter((e) => {
-			if (e.request.model.toLowerCase().includes(searchLower) || e.response?.model && e.response.model.toLowerCase().includes(searchLower)) return true;
-			if (e.response?.error && e.response.error.toLowerCase().includes(searchLower)) return true;
-			if (e.request.system?.toLowerCase().includes(searchLower)) return true;
-			if (e.request.messages.some((m) => {
-				if (typeof m.content === "string") return m.content.toLowerCase().includes(searchLower);
-				if (Array.isArray(m.content)) return m.content.some((c) => {
-					if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
-					if (c.type === "tool_use") {
-						const name = c.name;
-						if (name && name.toLowerCase().includes(searchLower)) return true;
-						if (c.input) {
-							if ((typeof c.input === "string" ? c.input : JSON.stringify(c.input)).toLowerCase().includes(searchLower)) return true;
-						}
-					}
-					if (c.type === "tool_result" && c.content) {
-						if ((typeof c.content === "string" ? c.content : JSON.stringify(c.content)).toLowerCase().includes(searchLower)) return true;
-					}
-					if (c.type === "thinking") {
-						const thinking = c.thinking;
-						if (thinking && thinking.toLowerCase().includes(searchLower)) return true;
-					}
-					return false;
-				});
-				return false;
-			})) return true;
-			if (e.response?.content) {
-				const rc = e.response.content;
-				if (typeof rc.content === "string" && rc.content.toLowerCase().includes(searchLower)) return true;
-				if (Array.isArray(rc.content)) {
-					if (rc.content.some((c) => {
-						if (c.text && c.text.toLowerCase().includes(searchLower)) return true;
-						if (c.type === "tool_use" && c.name && c.name.toLowerCase().includes(searchLower)) return true;
-						if (c.type === "thinking" && c.thinking && c.thinking.toLowerCase().includes(searchLower)) return true;
-						return false;
-					})) return true;
-				}
+	/**
+	* Enqueue a request for later execution
+	*/
+	enqueue(fn, retryAfterSeconds) {
+		return new Promise((resolve, reject) => {
+			const request = {
+				execute: fn,
+				resolve,
+				reject,
+				retryCount: 0,
+				retryAfterSeconds,
+				enqueuedAt: Date.now()
+			};
+			this.queue.push(request);
+			if (this.queue.length > 1) {
+				const position = this.queue.length;
+				const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
+				consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
 			}
-			if (e.response?.toolCalls?.some((t) => t.name.toLowerCase().includes(searchLower))) return true;
-			return false;
+			this.processQueue();
 		});
 	}
-	filtered.sort((a, b) => b.timestamp - a.timestamp);
-	const total = filtered.length;
-	const totalPages = Math.ceil(total / limit);
-	const start = (page - 1) * limit;
-	return {
-		entries: filtered.slice(start, start + limit),
-		total,
-		page,
-		limit,
-		totalPages
-	};
-}
-function getEntry(id) {
-	return historyState.entries.find((e) => e.id === id);
-}
-function getSessions() {
-	const sessions = Array.from(historyState.sessions.values()).sort((a, b) => b.lastActivity - a.lastActivity);
-	return {
-		sessions,
-		total: sessions.length
-	};
-}
-function getSession(id) {
-	return historyState.sessions.get(id);
-}
-function getSessionEntries(sessionId) {
-	return historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
-}
-function clearHistory() {
-	historyState.entries = [];
-	historyState.sessions = /* @__PURE__ */ new Map();
-	historyState.currentSessionId = generateId();
-}
-function deleteSession(sessionId) {
-	if (!historyState.sessions.has(sessionId)) return false;
-	historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
-	historyState.sessions.delete(sessionId);
-	if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
-	return true;
-}
-function getStats() {
-	const entries = historyState.entries;
-	const modelDist = {};
-	const endpointDist = {};
-	const hourlyActivity = {};
-	let totalInput = 0;
-	let totalOutput = 0;
-	let totalDuration = 0;
-	let durationCount = 0;
-	let successCount = 0;
-	let failCount = 0;
-	for (const entry of entries) {
-		const model = entry.response?.model || entry.request.model;
-		modelDist[model] = (modelDist[model] || 0) + 1;
-		endpointDist[entry.endpoint] = (endpointDist[entry.endpoint] || 0) + 1;
-		const d = new Date(entry.timestamp);
-		const hour = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}T${String(d.getHours()).padStart(2, "0")}`;
-		hourlyActivity[hour] = (hourlyActivity[hour] || 0) + 1;
-		if (entry.response) {
-			if (entry.response.success) successCount++;
-			else failCount++;
-			totalInput += entry.response.usage.input_tokens;
-			totalOutput += entry.response.usage.output_tokens;
-		}
-		if (entry.durationMs) {
-			totalDuration += entry.durationMs;
-			durationCount++;
+	/**
+	* Calculate retry interval with exponential backoff
+	*/
+	calculateRetryInterval(request) {
+		if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
+		const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
+		return Math.min(backoff, this.config.maxRetryIntervalSeconds);
+	}
+	/**
+	* Process the queue
+	*/
+	async processQueue() {
+		if (this.processing) return;
+		this.processing = true;
+		while (this.queue.length > 0) {
+			const request = this.queue[0];
+			if (this.shouldAttemptRecovery()) this.startGradualRecovery();
+			const elapsedMs = Date.now() - this.lastRequestTime;
+			const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
+			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
+				const waitMs = requiredMs - elapsedMs;
+				const waitSec = Math.ceil(waitMs / 1e3);
+				consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
+				await this.sleep(waitMs);
+			}
+			this.lastRequestTime = Date.now();
+			try {
+				const result = await request.execute();
+				this.queue.shift();
+				this.consecutiveSuccesses++;
+				request.retryAfterSeconds = void 0;
+				const queueWaitMs = Date.now() - request.enqueuedAt;
+				request.resolve({
+					result,
+					queueWaitMs
+				});
+				if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for ramp-up)`);
+			} catch (error) {
+				const { isRateLimit, retryAfter } = this.isRateLimitError(error);
+				if (isRateLimit) {
+					request.retryCount++;
+					request.retryAfterSeconds = retryAfter;
+					this.consecutiveSuccesses = 0;
+					this.rateLimitedAt = Date.now();
+					const nextInterval = this.calculateRetryInterval(request);
+					const source = retryAfter ? "server Retry-After" : "exponential backoff";
+					consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
+				} else {
+					this.queue.shift();
+					request.reject(error);
+				}
+			}
 		}
+		this.processing = false;
 	}
-	const recentActivity = Object.entries(hourlyActivity).sort(([a], [b]) => a.localeCompare(b)).slice(-24).map(([hour, count]) => ({
-		hour,
-		count
-	}));
-	return {
-		totalRequests: entries.length,
-		successfulRequests: successCount,
-		failedRequests: failCount,
-		totalInputTokens: totalInput,
-		totalOutputTokens: totalOutput,
-		averageDurationMs: durationCount > 0 ? totalDuration / durationCount : 0,
-		modelDistribution: modelDist,
-		endpointDistribution: endpointDist,
-		recentActivity,
-		activeSessions: historyState.sessions.size
-	};
+	sleep(ms) {
+		return new Promise((resolve) => setTimeout(resolve, ms));
+	}
+	/**
+	* Get current status for debugging/monitoring
+	*/
+	getStatus() {
+		return {
+			mode: this.mode,
+			queueLength: this.queue.length,
+			consecutiveSuccesses: this.consecutiveSuccesses,
+			rateLimitedAt: this.rateLimitedAt
+		};
+	}
+};
+let rateLimiterInstance = null;
+/**
+* Initialize the adaptive rate limiter with configuration
+*/
+function initAdaptiveRateLimiter(config = {}) {
+	rateLimiterInstance = new AdaptiveRateLimiter(config);
+	const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG.baseRetryIntervalSeconds;
+	const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG.maxRetryIntervalSeconds;
+	const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG.requestIntervalSeconds;
+	const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG.recoveryTimeoutMinutes;
+	const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG.consecutiveSuccessesForRecovery;
+	const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG.gradualRecoverySteps;
+	consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
 }
-function exportHistory(format = "json") {
-	if (format === "json") return JSON.stringify({
-		sessions: Array.from(historyState.sessions.values()),
-		entries: historyState.entries
-	}, null, 2);
-	const headers = [
-		"id",
-		"session_id",
-		"timestamp",
-		"endpoint",
-		"request_model",
-		"message_count",
-		"stream",
-		"success",
-		"response_model",
-		"input_tokens",
-		"output_tokens",
-		"duration_ms",
-		"stop_reason",
-		"error"
-	];
-	const rows = historyState.entries.map((e) => [
-		e.id,
-		e.sessionId,
-		formatLocalTimestamp(e.timestamp),
-		e.endpoint,
-		e.request.model,
-		e.request.messages.length,
-		e.request.stream,
-		e.response?.success ?? "",
-		e.response?.model ?? "",
-		e.response?.usage.input_tokens ?? "",
-		e.response?.usage.output_tokens ?? "",
-		e.durationMs ?? "",
-		e.response?.stop_reason ?? "",
-		e.response?.error ?? ""
-	]);
-	return [headers.join(","), ...rows.map((r) => r.join(","))].join("\n");
+/**
+* Execute a request with adaptive rate limiting.
+* If rate limiter is not initialized, executes immediately.
+* Returns the result along with queue wait time.
+*/
+async function executeWithAdaptiveRateLimit(fn) {
+	if (!rateLimiterInstance) return {
+		result: await fn(),
+		queueWaitMs: 0
+	};
+	return rateLimiterInstance.execute(fn);
 }
 //#endregion
-//#region src/lib/proxy.ts
+//#region src/lib/config/proxy.ts
 /**
 * Custom dispatcher that routes requests through proxies based on environment variables.
 * Extends Agent to properly inherit the Dispatcher interface.
@@ -3700,7 +3812,7 @@ function sanitizeOpenAIMessages(payload) {
 }
 //#endregion
-//#region src/lib/tokenizer.ts
+//#region src/lib/models/tokenizer.ts
 const ENCODING_MAP = {
 	o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
 	cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
@@ -4278,7 +4390,7 @@ function createTruncationResponseMarkerOpenAI(result) {
 }
 //#endregion
-//#region src/lib/model-resolver.ts
+//#region src/lib/models/resolver.ts
 /**
 * Unified model name resolution and normalization.
 *
@@ -4397,11 +4509,77 @@ const createChatCompletions = async (payload) => {
 };
 //#endregion
-//#region src/routes/shared.ts
+//#region src/routes/shared/payload.ts
+/**
+* Payload utilities for request handlers.
+*/
+/** Build final payload with sanitization (no pre-truncation — truncation is now reactive) */
+function buildFinalPayload(payload, _model) {
+	const { payload: sanitizedPayload, removedCount: sanitizeRemovedCount, systemReminderRemovals } = sanitizeOpenAIMessages(payload);
+	return {
+		finalPayload: sanitizedPayload,
+		truncateResult: null,
+		sanitizeRemovedCount,
+		systemReminderRemovals
+	};
+}
 /**
-* Shared utilities for request handlers.
-* Contains common functions used by both OpenAI and Anthropic message handlers.
+* Log helpful debugging information when a 413 error occurs.
+* Also adjusts the dynamic byte limit for future requests.
 */
+async function logPayloadSizeInfo(payload, model) {
+	const messageCount = payload.messages.length;
+	const bodySize = JSON.stringify(payload).length;
+	const bodySizeKB = bytesToKB(bodySize);
+	onRequestTooLarge(bodySize);
+	let imageCount = 0;
+	let largeMessages = 0;
+	let totalImageSize = 0;
+	for (const msg of payload.messages) {
+		if (Array.isArray(msg.content)) {
+			for (const part of msg.content) if (part.type === "image_url") {
+				imageCount++;
+				if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
+			}
+		}
+		if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
+	}
+	consola.info("");
+	consola.info("╭─────────────────────────────────────────────────────────╮");
+	consola.info("│           413 Request Entity Too Large                  │");
+	consola.info("╰─────────────────────────────────────────────────────────╯");
+	consola.info("");
+	consola.info(`  Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
+	consola.info(`  Message count: ${messageCount}`);
+	if (model) try {
+		const tokenCount = await getTokenCount(payload, model);
+		const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
+		consola.info(`  Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
+	} catch (error) {
+		consola.debug("Token count estimation failed:", error);
+	}
+	if (imageCount > 0) {
+		const imageSizeKB = bytesToKB(totalImageSize);
+		consola.info(`  Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
+	}
+	if (largeMessages > 0) consola.info(`  Large messages (>50KB): ${largeMessages}`);
+	consola.info("");
+	consola.info("  Suggestions:");
+	if (imageCount > 0) consola.info("    • Remove or resize large images in the conversation");
+	consola.info("    • Start a new conversation with /clear or /reset");
+	consola.info("    • Reduce conversation history by deleting old messages");
+	consola.info("");
+}
+//#endregion
+//#region src/routes/shared/response.ts
+/** Type guard for non-streaming responses */
+function isNonStreaming(response) {
+	return Object.hasOwn(response, "choices");
+}
+//#endregion
+//#region src/routes/shared/tracking.ts
 /** Helper to update tracker model */
 function updateTrackerModel(trackingId, model) {
 	if (!trackingId) return;
@@ -4413,40 +4591,6 @@ function updateTrackerStatus(trackingId, status) {
 	if (!trackingId) return;
 	requestTracker.updateRequest(trackingId, { status });
 }
-/** Record error response to history, preserving full error details for debugging */
-function recordErrorResponse(ctx, model, error) {
-	const errorMessage = getErrorMessage(error);
-	let content = null;
-	if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
-		const responseText = error.responseText;
-		const status = "status" in error ? error.status : void 0;
-		if (responseText) {
-			let formattedBody;
-			try {
-				formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
-			} catch {
-				formattedBody = responseText;
-			}
-			content = {
-				role: "assistant",
-				content: [{
-					type: "text",
-					text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
-				}]
-			};
-		}
-	}
-	recordResponse(ctx.historyId, {
-		success: false,
-		model,
-		usage: {
-			input_tokens: 0,
-			output_tokens: 0
-		},
-		error: errorMessage,
-		content
-	}, Date.now() - ctx.startTime);
-}
 /** Complete TUI tracking */
 function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
 	if (!trackingId) return;
@@ -4465,16 +4609,39 @@ function failTracking(trackingId, error) {
 	if (!trackingId) return;
 	requestTracker.failRequest(trackingId, getErrorMessage(error, "Stream error"));
 }
-/**
-* Create a marker to prepend to responses indicating auto-truncation occurred.
-* Works with both OpenAI and Anthropic truncate results.
-*/
-function createTruncationMarker$1(result) {
-	if (!result.wasCompacted) return "";
-	const { originalTokens, compactedTokens, removedMessageCount } = result;
-	if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
-	const reduction = originalTokens - compactedTokens;
-	return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${Math.round(reduction / originalTokens * 100)}% reduction)]`;
+/** Record error response to history, preserving full error details for debugging */
+function recordErrorResponse(ctx, model, error) {
+	const errorMessage = getErrorMessage(error);
+	let content = null;
+	if (error instanceof Error && "responseText" in error && typeof error.responseText === "string") {
+		const responseText = error.responseText;
+		const status = "status" in error ? error.status : void 0;
+		if (responseText) {
+			let formattedBody;
+			try {
+				formattedBody = JSON.stringify(JSON.parse(responseText), null, 2);
+			} catch {
+				formattedBody = responseText;
+			}
+			content = {
+				role: "assistant",
+				content: [{
+					type: "text",
+					text: `[API Error Response${status ? ` - HTTP ${status}` : ""}]\n\n${formattedBody}`
+				}]
+			};
+		}
+	}
+	recordResponse(ctx.historyId, {
+		success: false,
+		model,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: errorMessage,
+		content
+	}, Date.now() - ctx.startTime);
 }
 /** Record streaming error to history, preserving any data accumulated before the error */
 function recordStreamError(opts) {
@@ -4496,66 +4663,168 @@ function recordStreamError(opts) {
 		} : null
 	}, Date.now() - ctx.startTime);
 }
-/** Type guard for non-streaming responses */
-function isNonStreaming(response) {
-	return Object.hasOwn(response, "choices");
-}
-/** Build final payload with sanitization (no pre-truncation — truncation is now reactive) */
-function buildFinalPayload(payload, _model) {
-	const { payload: sanitizedPayload, removedCount: sanitizeRemovedCount, systemReminderRemovals } = sanitizeOpenAIMessages(payload);
-	return {
-		finalPayload: sanitizedPayload,
-		truncateResult: null,
-		sanitizeRemovedCount,
-		systemReminderRemovals
-	};
+//#endregion
+//#region src/routes/shared/truncation.ts
+/**
+* Create a marker to prepend to responses indicating auto-truncation occurred.
+* Works with both OpenAI and Anthropic truncate results.
+*/
+function createTruncationMarker$1(result) {
+	if (!result.wasCompacted) return "";
+	const { originalTokens, compactedTokens, removedMessageCount } = result;
+	if (originalTokens === void 0 || compactedTokens === void 0 || removedMessageCount === void 0) return `\n\n---\n[Auto-truncated: conversation history was reduced to fit context limits]`;
+	const reduction = originalTokens - compactedTokens;
+	return `\n\n---\n[Auto-truncated: ${removedMessageCount} messages removed, ${originalTokens} → ${compactedTokens} tokens (${Math.round(reduction / originalTokens * 100)}% reduction)]`;
 }
+//#endregion
+//#region src/routes/shared/pipeline.ts
 /**
-* Log helpful debugging information when a 413 error occurs.
-* Also adjusts the dynamic byte limit for future requests.
+* Request execution pipeline with pluggable retry strategies.
+*
+* Unifies the retry loop pattern shared by direct-anthropic-handler,
+* translated-handler, and (soon) completions handler.
 */
-async function logPayloadSizeInfo(payload, model) {
-	const messageCount = payload.messages.length;
-	const bodySize = JSON.stringify(payload).length;
-	const bodySizeKB = bytesToKB(bodySize);
-	onRequestTooLarge(bodySize);
-	let imageCount = 0;
-	let largeMessages = 0;
-	let totalImageSize = 0;
-	for (const msg of payload.messages) {
-		if (Array.isArray(msg.content)) {
-			for (const part of msg.content) if (part.type === "image_url") {
-				imageCount++;
-				if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
+/**
+* Execute a request through the pipeline with retry strategies.
+*
+* Flow:
+* 1. Execute API call with the current payload
+* 2. On success → return response
+* 3. On failure → classify error → find first matching strategy → handle
+*    - retry → use new payload, loop back to step 1
+*    - abort or no strategy → throw error
+*/
+async function executeRequestPipeline(opts) {
+	const { adapter, strategies, originalPayload, model, maxRetries = 3, onBeforeAttempt, onRetry } = opts;
+	let effectivePayload = opts.payload;
+	let lastError = null;
+	let totalQueueWaitMs = 0;
+	for (let attempt = 0; attempt <= maxRetries; attempt++) {
+		onBeforeAttempt?.(attempt, effectivePayload);
+		try {
+			const { result: response, queueWaitMs } = await adapter.execute(effectivePayload);
+			totalQueueWaitMs += queueWaitMs;
+			return {
+				response,
+				effectivePayload,
+				queueWaitMs: totalQueueWaitMs,
+				totalRetries: attempt
+			};
+		} catch (error) {
+			lastError = error;
+			if (attempt >= maxRetries) break;
+			const apiError = classifyError(error);
+			let handled = false;
+			for (const strategy of strategies) {
+				if (!strategy.canHandle(apiError)) continue;
+				const retryContext = {
+					attempt,
+					originalPayload,
+					model,
+					maxRetries
+				};
+				try {
+					const action = await strategy.handle(apiError, effectivePayload, retryContext);
+					if (action.action === "retry") {
+						consola.debug(`[Pipeline] Strategy "${strategy.name}" requests retry (attempt ${attempt + 1}/${maxRetries + 1})`);
+						if (action.waitMs && action.waitMs > 0) totalQueueWaitMs += action.waitMs;
+						effectivePayload = action.payload;
+						onRetry?.(attempt, strategy.name, action.payload, action.meta);
+						handled = true;
+						break;
+					}
+					break;
+				} catch (strategyError) {
+					consola.warn(`[Pipeline] Strategy "${strategy.name}" failed on attempt ${attempt + 1}:`, strategyError instanceof Error ? strategyError.message : strategyError);
+					break;
+				}
 			}
+			if (!handled) break;
 		}
-		if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
-	}
-	consola.info("");
-	consola.info("╭─────────────────────────────────────────────────────────╮");
-	consola.info("│           413 Request Entity Too Large                  │");
-	consola.info("╰─────────────────────────────────────────────────────────╯");
-	consola.info("");
-	consola.info(`  Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
-	consola.info(`  Message count: ${messageCount}`);
-	if (model) try {
-		const tokenCount = await getTokenCount(payload, model);
-		const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-		consola.info(`  Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
-	} catch (error) {
-		consola.debug("Token count estimation failed:", error);
 	}
-	if (imageCount > 0) {
-		const imageSizeKB = bytesToKB(totalImageSize);
-		consola.info(`  Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
+	if (lastError) {
+		if (classifyError(lastError).type === "payload_too_large") await adapter.logPayloadSize(effectivePayload);
+		throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
 	}
-	if (largeMessages > 0) consola.info(`  Large messages (>50KB): ${largeMessages}`);
-	consola.info("");
-	consola.info("  Suggestions:");
-	if (imageCount > 0) consola.info("    • Remove or resize large images in the conversation");
-	consola.info("    • Start a new conversation with /clear or /reset");
-	consola.info("    • Reduce conversation history by deleting old messages");
-	consola.info("");
+	throw new Error("Unexpected state in pipeline retry loop");
+}
+//#endregion
+//#region src/routes/shared/strategies/auto-truncate.ts
+/**
+* Auto-truncate retry strategy.
+*
+* Handles 413 (body too large) and token limit errors by truncating the
+* message payload and retrying.
+*/
+/**
+* Create an auto-truncate retry strategy.
+*
+* @param truncate - Format-specific truncation function
+* @param resanitize - Format-specific re-sanitization after truncation
+* @param isEnabled - Check if auto-truncate is enabled (typically reads state.autoTruncate)
+*/
+function createAutoTruncateStrategy(opts) {
+	const { truncate, resanitize, isEnabled, label } = opts;
+	return {
+		name: "auto-truncate",
+		canHandle(error) {
+			if (!isEnabled()) return false;
+			return error.type === "payload_too_large" || error.type === "token_limit";
+		},
+		async handle(error, currentPayload, context) {
+			const { attempt, originalPayload, model, maxRetries } = context;
+			if (!model) return {
+				action: "abort",
+				error
+			};
+			const rawError = error.raw;
+			if (!(rawError instanceof HTTPError)) return {
+				action: "abort",
+				error
+			};
+			const payloadBytes = JSON.stringify(currentPayload).length;
+			const parsed = tryParseAndLearnLimit(rawError, model.id, payloadBytes);
+			if (!parsed) return {
+				action: "abort",
+				error
+			};
+			let targetTokenLimit;
+			let targetByteLimitBytes;
+			if (parsed.type === "token_limit" && parsed.limit) {
+				targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
+				consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
+			} else if (parsed.type === "body_too_large") {
+				targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
+				consola.info(`[${label}] Attempt ${attempt + 1}/${maxRetries + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
+			}
+			const truncateResult = await truncate(originalPayload, model, {
+				checkTokenLimit: true,
+				checkByteLimit: true,
+				targetTokenLimit,
+				targetByteLimitBytes
+			});
+			if (!truncateResult.wasCompacted) return {
+				action: "abort",
+				error
+			};
+			const sanitizeResult = resanitize(truncateResult.payload);
+			return {
+				action: "retry",
+				payload: sanitizeResult.payload,
+				meta: {
+					truncateResult,
+					sanitization: {
+						removedCount: sanitizeResult.removedCount,
+						systemReminderRemovals: sanitizeResult.systemReminderRemovals
+					},
+					attempt: attempt + 1
+				}
+			};
+		}
+	};
 }
 //#endregion
@@ -4606,19 +4875,46 @@ async function handleCompletion$1(c) {
 	return executeRequest({
 		c,
 		payload,
+		originalPayload,
 		selectedModel,
 		ctx,
 		trackingId
 	});
 }
 /**
-* Execute the API call with enhanced error handling for 413 errors.
+* Execute the API call with reactive retry pipeline.
+* Handles 413 and token limit errors with auto-truncation.
 */
 async function executeRequest(opts) {
-	const { c, payload, selectedModel, ctx, trackingId } = opts;
+	const { c, payload, originalPayload, selectedModel, ctx, trackingId } = opts;
+	const adapter = {
+		format: "openai",
+		sanitize: (p) => sanitizeOpenAIMessages(p),
+		execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p)),
+		logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
+	};
+	const strategies = [createAutoTruncateStrategy({
+		truncate: (p, model, truncOpts) => autoTruncateOpenAI(p, model, truncOpts),
+		resanitize: (p) => sanitizeOpenAIMessages(p),
+		isEnabled: () => state.autoTruncate,
+		label: "Completions"
+	})];
 	try {
-		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
-		ctx.queueWaitMs = queueWaitMs;
+		const result = await executeRequestPipeline({
+			adapter,
+			strategies,
+			payload,
+			originalPayload,
+			model: selectedModel,
+			maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
+			onRetry: (attempt, _strategyName, _newPayload, meta) => {
+				const retryTruncateResult = meta?.truncateResult;
+				if (retryTruncateResult) ctx.truncateResult = retryTruncateResult;
+				if (trackingId) requestTracker.updateRequest(trackingId, { tags: ["compact", `retry-${attempt + 1}`] });
+			}
+		});
+		ctx.queueWaitMs = result.queueWaitMs;
+		const response = result.response;
 		if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
 		consola.debug("Streaming response");
 		updateTrackerStatus(trackingId, "streaming");
@@ -4631,7 +4927,6 @@ async function executeRequest(opts) {
 			});
 		});
 	} catch (error) {
-		if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(payload, selectedModel);
 		recordErrorResponse(ctx, payload.model, error);
 		throw error;
 	}
@@ -5715,7 +6010,7 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
 }
 //#endregion
-//#region src/routes/messages/non-stream-translation.ts
+//#region src/lib/translation/non-stream.ts
 const OPENAI_TOOL_NAME_LIMIT = 64;
 /**
 * Ensure all tool_use blocks have corresponding tool_result responses,
@@ -6650,7 +6945,47 @@ function handleMessageDelta(delta, usage, acc) {
 }
 //#endregion
-//#region src/routes/messages/stream-translation.ts
+//#region src/lib/translation/message-mapping.ts
+/**
+* Check if two messages likely correspond to the same original message.
+* Used by buildMessageMapping to handle cases where sanitization removes
+* content blocks within a message (changing its shape) or removes entire messages.
+*/
+function messagesMatch(orig, rewritten) {
+	if (orig.role !== rewritten.role) return false;
+	if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
+	const origBlocks = Array.isArray(orig.content) ? orig.content : [];
+	const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
+	if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
+	const ob = origBlocks[0];
+	const rb = rwBlocks[0];
+	if (ob.type !== rb.type) return false;
+	if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
+	if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
+	return true;
+}
+/**
+* Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
+* Uses a two-pointer approach since rewritten messages maintain the same relative
+* order as originals (all transformations are deletions, never reorderings).
+*/
+function buildMessageMapping(original, rewritten) {
+	const mapping = [];
+	let origIdx = 0;
+	for (const element of rewritten) while (origIdx < original.length) {
+		if (messagesMatch(original[origIdx], element)) {
+			mapping.push(origIdx);
+			origIdx++;
+			break;
+		}
+		origIdx++;
+	}
+	while (mapping.length < rewritten.length) mapping.push(-1);
+	return mapping;
+}
+//#endregion
+//#region src/lib/translation/stream.ts
 function isToolBlockOpen(state) {
 	if (!state.contentBlockOpen) return false;
 	return Object.values(state.toolCalls).some((tc) => tc.anthropicBlockIndex === state.contentBlockIndex);
@@ -6823,12 +7158,57 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
 		if (initialSanitized.thinking && initialSanitized.thinking.type !== "disabled") tags.push(`thinking:${initialSanitized.thinking.type}`);
 		if (tags.length > 0) requestTracker.updateRequest(ctx.trackingId, { tags });
 	}
-	let effectivePayload = initialSanitized;
+	const adapter = {
+		format: "anthropic",
+		sanitize: (p) => sanitizeAnthropicMessages(p),
+		execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p)),
+		logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
+	};
+	const strategies = [createAutoTruncateStrategy({
+		truncate: (p, model, opts) => autoTruncateAnthropic(p, model, opts),
+		resanitize: (p) => sanitizeAnthropicMessages(p),
+		isEnabled: () => state.autoTruncate,
+		label: "Anthropic"
+	})];
 	let truncateResult;
-	let lastError = null;
-	for (let attempt = 0; attempt <= MAX_AUTO_TRUNCATE_RETRIES; attempt++) try {
-		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createAnthropicMessages(effectivePayload));
-		ctx.queueWaitMs = queueWaitMs;
+	try {
+		const result = await executeRequestPipeline({
+			adapter,
+			strategies,
+			payload: initialSanitized,
+			originalPayload: anthropicPayload,
+			model: selectedModel,
+			maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
+			onRetry: (_attempt, _strategyName, newPayload, meta) => {
+				const retryTruncateResult = meta?.truncateResult;
+				if (retryTruncateResult) truncateResult = retryTruncateResult;
+				const retrySanitization = meta?.sanitization;
+				const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, newPayload.messages);
+				recordRewrites(ctx.historyId, {
+					truncation: retryTruncateResult ? {
+						removedMessageCount: retryTruncateResult.removedMessageCount,
+						originalTokens: retryTruncateResult.originalTokens,
+						compactedTokens: retryTruncateResult.compactedTokens,
+						processingTimeMs: retryTruncateResult.processingTimeMs
+					} : void 0,
+					sanitization: retrySanitization && (retrySanitization.removedCount > 0 || retrySanitization.systemReminderRemovals > 0) ? {
+						removedBlockCount: retrySanitization.removedCount,
+						systemReminderRemovals: retrySanitization.systemReminderRemovals
+					} : void 0,
+					rewrittenMessages: convertAnthropicMessages(newPayload.messages),
+					rewrittenSystem: typeof newPayload.system === "string" ? newPayload.system : void 0,
+					messageMapping: retryMessageMapping
+				});
+				if (ctx.trackingId) {
+					const retryTags = ["compact", `retry-${meta?.attempt ?? 1}`];
+					if (newPayload.thinking && newPayload.thinking.type !== "disabled") retryTags.push(`thinking:${newPayload.thinking.type}`);
+					requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
+				}
+			}
+		});
+		ctx.queueWaitMs = result.queueWaitMs;
+		const response = result.response;
+		const effectivePayload = result.effectivePayload;
 		if (Symbol.asyncIterator in response) {
 			consola.debug("Streaming response from Copilot (direct Anthropic)");
 			updateTrackerStatus(ctx.trackingId, "streaming");
@@ -6843,67 +7223,9 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, ctx) {
 		}
 		return handleDirectAnthropicNonStreamingResponse(c, response, ctx, truncateResult);
 	} catch (error) {
-		lastError = error;
-		if (state.autoTruncate && error instanceof HTTPError && selectedModel && attempt < MAX_AUTO_TRUNCATE_RETRIES) {
-			const payloadBytes = JSON.stringify(effectivePayload).length;
-			const parsed = tryParseAndLearnLimit(error, selectedModel.id, payloadBytes);
-			if (parsed) {
-				let targetTokenLimit;
-				let targetByteLimitBytes;
-				if (parsed.type === "token_limit" && parsed.limit) {
-					targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
-					consola.info(`[Anthropic] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
-				} else if (parsed.type === "body_too_large") {
-					targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
-					consola.info(`[Anthropic] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
-				}
-				try {
-					truncateResult = await autoTruncateAnthropic(anthropicPayload, selectedModel, {
-						checkTokenLimit: true,
-						checkByteLimit: true,
-						targetTokenLimit,
-						targetByteLimitBytes
-					});
-					if (truncateResult.wasCompacted) {
-						const { payload: retrySanitized, removedCount: retryOrphanedRemovals, systemReminderRemovals: retrySystemRemovals } = sanitizeAnthropicMessages(truncateResult.payload);
-						effectivePayload = retrySanitized;
-						const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, effectivePayload.messages);
-						recordRewrites(ctx.historyId, {
-							truncation: {
-								removedMessageCount: truncateResult.removedMessageCount,
-								originalTokens: truncateResult.originalTokens,
-								compactedTokens: truncateResult.compactedTokens,
-								processingTimeMs: truncateResult.processingTimeMs
-							},
-							sanitization: retryOrphanedRemovals > 0 || retrySystemRemovals > 0 ? {
-								removedBlockCount: retryOrphanedRemovals,
-								systemReminderRemovals: retrySystemRemovals
-							} : void 0,
-							rewrittenMessages: convertAnthropicMessages(effectivePayload.messages),
-							rewrittenSystem: typeof effectivePayload.system === "string" ? effectivePayload.system : void 0,
-							messageMapping: retryMessageMapping
-						});
-						if (ctx.trackingId) {
-							const retryTags = ["compact", `retry-${attempt + 1}`];
-							if (effectivePayload.thinking && effectivePayload.thinking.type !== "disabled") retryTags.push(`thinking:${effectivePayload.thinking.type}`);
-							requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
-						}
-						continue;
-					} else break;
-				} catch (truncateError) {
-					consola.warn(`[Anthropic] Auto-truncate failed on attempt ${attempt + 1}:`, truncateError instanceof Error ? truncateError.message : truncateError);
-					break;
-				}
-			}
-		}
-		break;
-	}
-	if (lastError) {
-		if (lastError instanceof HTTPError && lastError.status === 413) logPayloadSizeInfoAnthropic(effectivePayload, selectedModel);
-		recordErrorResponse(ctx, anthropicPayload.model, lastError);
-		throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
+		recordErrorResponse(ctx, anthropicPayload.model, error);
+		throw error;
 	}
-	throw new Error("Unexpected state in retry loop");
 }
 /**
 * Log payload size info for debugging 413 errors
@@ -7078,43 +7400,6 @@ function recordStreamingResponse$1(acc, fallbackModel, ctx) {
 		toolCalls
 	}, Date.now() - ctx.startTime);
 }
-/**
-* Check if two messages likely correspond to the same original message.
-* Used by buildMessageMapping to handle cases where sanitization removes
-* content blocks within a message (changing its shape) or removes entire messages.
-*/
-function messagesMatch(orig, rewritten) {
-	if (orig.role !== rewritten.role) return false;
-	if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
-	const origBlocks = Array.isArray(orig.content) ? orig.content : [];
-	const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
-	if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
-	const ob = origBlocks[0];
-	const rb = rwBlocks[0];
-	if (ob.type !== rb.type) return false;
-	if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
-	if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
-	return true;
-}
-/**
-* Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
-* Uses a two-pointer approach since rewritten messages maintain the same relative
-* order as originals (all transformations are deletions, never reorderings).
-*/
-function buildMessageMapping(original, rewritten) {
-	const mapping = [];
-	let origIdx = 0;
-	for (const element of rewritten) while (origIdx < original.length) {
-		if (messagesMatch(original[origIdx], element)) {
-			mapping.push(origIdx);
-			origIdx++;
-			break;
-		}
-		origIdx++;
-	}
-	while (mapping.length < rewritten.length) mapping.push(-1);
-	return mapping;
-}
 //#endregion
 //#region src/routes/messages/translated-handler.ts
@@ -7152,11 +7437,38 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
 		if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") tags.push(`thinking:${anthropicPayload.thinking.type}`);
 		if (tags.length > 0) requestTracker.updateRequest(ctx.trackingId, { tags });
 	}
-	let effectivePayload = initialOpenAIPayload;
-	let lastError = null;
-	for (let attempt = 0; attempt <= MAX_AUTO_TRUNCATE_RETRIES; attempt++) try {
-		const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(effectivePayload));
-		ctx.queueWaitMs = queueWaitMs;
+	const adapter = {
+		format: "openai",
+		sanitize: (p) => sanitizeOpenAIMessages(p),
+		execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p)),
+		logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
+	};
+	const strategies = [createAutoTruncateStrategy({
+		truncate: (p, model, opts) => autoTruncateOpenAI(p, model, opts),
+		resanitize: (p) => sanitizeOpenAIMessages(p),
+		isEnabled: () => state.autoTruncate,
+		label: "Translated"
+	})];
+	try {
+		const result = await executeRequestPipeline({
+			adapter,
+			strategies,
+			payload: initialOpenAIPayload,
+			originalPayload: translatedPayload,
+			model: selectedModel,
+			maxRetries: MAX_AUTO_TRUNCATE_RETRIES,
+			onRetry: (attempt, _strategyName, _newPayload, meta) => {
+				const retryTruncateResult = meta?.truncateResult;
+				if (retryTruncateResult) ctx.truncateResult = retryTruncateResult;
+				if (ctx.trackingId) {
+					const retryTags = ["compact", `retry-${attempt + 1}`];
+					if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") retryTags.push(`thinking:${anthropicPayload.thinking.type}`);
+					requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
+				}
+			}
+		});
+		ctx.queueWaitMs = result.queueWaitMs;
+		const response = result.response;
 		if (isNonStreaming(response)) return handleNonStreamingResponse({
 			c,
 			response,
@@ -7175,52 +7487,9 @@ async function handleTranslatedCompletion(c, anthropicPayload, ctx) {
 			});
 		});
 	} catch (error) {
-		lastError = error;
-		if (state.autoTruncate && error instanceof HTTPError && selectedModel && attempt < MAX_AUTO_TRUNCATE_RETRIES) {
-			const payloadBytes = JSON.stringify(effectivePayload).length;
-			const parsed = tryParseAndLearnLimit(error, selectedModel.id, payloadBytes);
-			if (parsed) {
-				let targetTokenLimit;
-				let targetByteLimitBytes;
-				if (parsed.type === "token_limit" && parsed.limit) {
-					targetTokenLimit = Math.floor(parsed.limit * AUTO_TRUNCATE_RETRY_FACTOR);
-					consola.info(`[Translated] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Token limit error (${parsed.current}>${parsed.limit}), retrying with limit ${targetTokenLimit}...`);
-				} else if (parsed.type === "body_too_large") {
-					targetByteLimitBytes = Math.floor(payloadBytes * AUTO_TRUNCATE_RETRY_FACTOR);
-					consola.info(`[Translated] Attempt ${attempt + 1}/${MAX_AUTO_TRUNCATE_RETRIES + 1}: Body too large (${bytesToKB(payloadBytes)}KB), retrying with limit ${bytesToKB(targetByteLimitBytes)}KB...`);
-				}
-				try {
-					const retryTruncateResult = await autoTruncateOpenAI(translatedPayload, selectedModel, {
-						checkTokenLimit: true,
-						checkByteLimit: true,
-						targetTokenLimit,
-						targetByteLimitBytes
-					});
-					if (retryTruncateResult.wasCompacted) {
-						const { payload: retrySanitized } = sanitizeOpenAIMessages(retryTruncateResult.payload);
-						effectivePayload = retrySanitized;
-						ctx.truncateResult = retryTruncateResult;
-						if (ctx.trackingId) {
-							const retryTags = ["compact", `retry-${attempt + 1}`];
-							if (anthropicPayload.thinking && anthropicPayload.thinking.type !== "disabled") retryTags.push(`thinking:${anthropicPayload.thinking.type}`);
-							requestTracker.updateRequest(ctx.trackingId, { tags: retryTags });
-						}
-						continue;
-					} else break;
-				} catch (truncateError) {
-					consola.warn(`[Translated] Auto-truncate failed on attempt ${attempt + 1}:`, truncateError instanceof Error ? truncateError.message : truncateError);
-					break;
-				}
-			}
-		}
-		break;
-	}
-	if (lastError) {
-		if (lastError instanceof HTTPError && lastError.status === 413) await logPayloadSizeInfo(effectivePayload, selectedModel);
-		recordErrorResponse(ctx, anthropicPayload.model, lastError);
-		throw lastError instanceof Error ? lastError : /* @__PURE__ */ new Error("Unknown error");
+		recordErrorResponse(ctx, anthropicPayload.model, error);
+		throw error;
 	}
-	throw new Error("Unexpected state in retry loop");
 }
 function handleNonStreamingResponse(opts) {
 	const { c, response, toolNameMapping, ctx } = opts;
@@ -7596,6 +7865,25 @@ usageRoute.get("/", async (c) => {
 	}
 });
+//#endregion
+//#region src/routes/index.ts
+/**
+* Register all API routes on the given Hono app.
+*/
+function registerRoutes(app) {
+	app.route("/chat/completions", completionRoutes);
+	app.route("/models", modelRoutes);
+	app.route("/embeddings", embeddingRoutes);
+	app.route("/usage", usageRoute);
+	app.route("/token", tokenRoute);
+	app.route("/v1/chat/completions", completionRoutes);
+	app.route("/v1/models", modelRoutes);
+	app.route("/v1/embeddings", embeddingRoutes);
+	app.route("/v1/messages", messageRoutes);
+	app.route("/api/event_logging", eventLoggingRoutes);
+	app.route("/history", historyRoutes);
+}
 //#endregion
 //#region src/server.ts
 const server = new Hono();
@@ -7622,17 +7910,7 @@ server.get("/health", (c) => {
 		}
 	}, healthy ? 200 : 503);
 });
-server.route("/chat/completions", completionRoutes);
-server.route("/models", modelRoutes);
-server.route("/embeddings", embeddingRoutes);
-server.route("/usage", usageRoute);
-server.route("/token", tokenRoute);
-server.route("/v1/chat/completions", completionRoutes);
-server.route("/v1/models", modelRoutes);
-server.route("/v1/embeddings", embeddingRoutes);
-server.route("/v1/messages", messageRoutes);
-server.route("/api/event_logging", eventLoggingRoutes);
-server.route("/history", historyRoutes);
+registerRoutes(server);
 //#endregion
 //#region src/start.ts