npm - @hsupu/copilot-api - Versions diffs - 0.7.0 → 0.8.0 - Mend

@hsupu/copilot-api 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/main.js CHANGED Viewed

@@ -12,9 +12,11 @@ import { getProxyForUrl } from "proxy-from-env";
 import { Agent, ProxyAgent, setGlobalDispatcher } from "undici";
 import { execSync } from "node:child_process";
 import process$1 from "node:process";
+import { Box, Text, render, useInput, useStdout } from "ink";
+import React, { useEffect, useState } from "react";
+import { Fragment, jsx, jsxs } from "react/jsx-runtime";
 import { Hono } from "hono";
 import { cors } from "hono/cors";
-import { logger } from "hono/logger";
 import { streamSSE } from "hono/streaming";
 import { events } from "fetch-event-stream";
@@ -45,7 +47,8 @@ const state = {
 	accountType: "individual",
 	manualApprove: false,
 	rateLimitWait: false,
-	showToken: false
+	showToken: false,
+	autoCompact: false
 };
 //#endregion
@@ -104,6 +107,27 @@ var HTTPError = class HTTPError extends Error {
 		return new HTTPError(message, response.status, text);
 	}
 };
+/** Parse token limit info from error message */
+function parseTokenLimitError(message) {
+	const match = message.match(/prompt token count of (\d+) exceeds the limit of (\d+)/);
+	if (match) return {
+		current: Number.parseInt(match[1], 10),
+		limit: Number.parseInt(match[2], 10)
+	};
+	return null;
+}
+/** Format Anthropic-compatible error for token limit exceeded */
+function formatTokenLimitError(current, limit) {
+	const excess = current - limit;
+	const percentage = Math.round(excess / limit * 100);
+	return {
+		type: "error",
+		error: {
+			type: "invalid_request_error",
+			message: `prompt is too long: ${current} tokens > ${limit} maximum (${excess} tokens over, ${percentage}% excess)`
+		}
+	};
+}
 async function forwardError(c, error) {
 	consola.error("Error occurred:", error);
 	if (error instanceof HTTPError) {
@@ -114,6 +138,15 @@ async function forwardError(c, error) {
 			errorJson = error.responseText;
 		}
 		consola.error("HTTP error:", errorJson);
+		const copilotError = errorJson;
+		if (copilotError.error?.code === "model_max_prompt_tokens_exceeded") {
+			const tokenInfo = parseTokenLimitError(copilotError.error.message ?? "");
+			if (tokenInfo) {
+				const formattedError = formatTokenLimitError(tokenInfo.current, tokenInfo.limit);
+				consola.info("Returning formatted token limit error:", formattedError);
+				return c.json(formattedError, 400);
+			}
+		}
 		return c.json({ error: {
 			message: error.responseText,
 			type: "error"
@@ -476,7 +509,7 @@ const logout = defineCommand({
 //#endregion
 //#region src/lib/history.ts
-function generateId() {
+function generateId$1() {
 	return Date.now().toString(36) + Math.random().toString(36).slice(2, 9);
 }
 const historyState = {
@@ -492,7 +525,7 @@ function initHistory(enabled, maxEntries) {
 	historyState.maxEntries = maxEntries;
 	historyState.entries = [];
 	historyState.sessions = /* @__PURE__ */ new Map();
-	historyState.currentSessionId = enabled ? generateId() : "";
+	historyState.currentSessionId = enabled ? generateId$1() : "";
 }
 function isHistoryEnabled() {
 	return historyState.enabled;
@@ -506,7 +539,7 @@ function getCurrentSession(endpoint) {
 			return historyState.currentSessionId;
 		}
 	}
-	const sessionId = generateId();
+	const sessionId = generateId$1();
 	historyState.currentSessionId = sessionId;
 	historyState.sessions.set(sessionId, {
 		id: sessionId,
@@ -526,7 +559,7 @@ function recordRequest(endpoint, request) {
 	const session = historyState.sessions.get(sessionId);
 	if (!session) return "";
 	const entry = {
-		id: generateId(),
+		id: generateId$1(),
 		sessionId,
 		timestamp: Date.now(),
 		endpoint,
@@ -543,7 +576,11 @@ function recordRequest(endpoint, request) {
 	historyState.entries.push(entry);
 	session.requestCount++;
 	if (!session.models.includes(request.model)) session.models.push(request.model);
-	while (historyState.entries.length > historyState.maxEntries) {
+	if (request.tools && request.tools.length > 0) {
+		if (!session.toolsUsed) session.toolsUsed = [];
+		for (const tool of request.tools) if (!session.toolsUsed.includes(tool.name)) session.toolsUsed.push(tool.name);
+	}
+	while (historyState.maxEntries > 0 && historyState.entries.length > historyState.maxEntries) {
 		const removed = historyState.entries.shift();
 		if (removed) {
 			if (historyState.entries.filter((e) => e.sessionId === removed.sessionId).length === 0) historyState.sessions.delete(removed.sessionId);
@@ -622,13 +659,13 @@ function getSessionEntries(sessionId) {
 function clearHistory() {
 	historyState.entries = [];
 	historyState.sessions = /* @__PURE__ */ new Map();
-	historyState.currentSessionId = generateId();
+	historyState.currentSessionId = generateId$1();
 }
 function deleteSession(sessionId) {
 	if (!historyState.sessions.has(sessionId)) return false;
 	historyState.entries = historyState.entries.filter((e) => e.sessionId !== sessionId);
 	historyState.sessions.delete(sessionId);
-	if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId();
+	if (historyState.currentSessionId === sessionId) historyState.currentSessionId = generateId$1();
 	return true;
 }
 function getStats() {
@@ -800,16 +837,16 @@ function generateEnvScript(envVars, commandToRun = "") {
 	let commandBlock;
 	switch (shell) {
 		case "powershell":
-			commandBlock = filteredEnvVars.map(([key, value]) => `$env:${key} = "${value.replace(/"/g, "`\"")}"`).join("; ");
+			commandBlock = filteredEnvVars.map(([key, value]) => `$env:${key} = "${value.replaceAll("\"", "`\"")}"`).join("; ");
 			break;
 		case "cmd":
 			commandBlock = filteredEnvVars.map(([key, value]) => `set ${key}=${value}`).join(" & ");
 			break;
 		case "fish":
-			commandBlock = filteredEnvVars.map(([key, value]) => `set -gx ${key} "${value.replace(/"/g, "\\\"")}"`).join("; ");
+			commandBlock = filteredEnvVars.map(([key, value]) => `set -gx ${key} "${value.replaceAll("\"", String.raw`\"`)}"`).join("; ");
 			break;
 		default: {
-			const assignments = filteredEnvVars.map(([key, value]) => `${key}="${value.replace(/"/g, "\\\"")}"`).join(" ");
+			const assignments = filteredEnvVars.map(([key, value]) => `${key}="${value.replaceAll("\"", String.raw`\"`)}"`).join(" ");
 			commandBlock = filteredEnvVars.length > 0 ? `export ${assignments}` : "";
 			break;
 		}
@@ -819,68 +856,580 @@ function generateEnvScript(envVars, commandToRun = "") {
 }
 //#endregion
-//#region src/lib/approval.ts
-const awaitApproval = async () => {
-	if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", 403, JSON.stringify({ message: "Request rejected" }));
+//#region src/lib/tui/console-renderer.ts
+function formatDuration$1(ms) {
+	if (ms < 1e3) return `${ms}ms`;
+	return `${(ms / 1e3).toFixed(1)}s`;
+}
+function formatNumber$1(n) {
+	if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
+	if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
+	return String(n);
+}
+function formatTokens$1(input, output) {
+	if (input === void 0 || output === void 0) return "-";
+	return `${formatNumber$1(input)}/${formatNumber$1(output)}`;
+}
+/**
+* Console renderer that shows request lifecycle
+* Start: METHOD /path model-name
+* Complete: METHOD /path 200 1.2s 1.5K/500 model-name
+*/
+var ConsoleRenderer = class {
+	activeRequests = /* @__PURE__ */ new Map();
+	showActive;
+	constructor(options) {
+		this.showActive = options?.showActive ?? true;
+	}
+	onRequestStart(request) {
+		this.activeRequests.set(request.id, request);
+		if (this.showActive) {
+			const modelInfo = request.model ? ` ${request.model}` : "";
+			const queueInfo = request.queuePosition !== void 0 && request.queuePosition > 0 ? ` [q#${request.queuePosition}]` : "";
+			consola.log(`[....] ${request.method} ${request.path}${modelInfo}${queueInfo}`);
+		}
+	}
+	onRequestUpdate(id, update) {
+		const request = this.activeRequests.get(id);
+		if (!request) return;
+		Object.assign(request, update);
+		if (this.showActive && update.status === "streaming") {
+			const modelInfo = request.model ? ` ${request.model}` : "";
+			consola.log(`[<-->] ${request.method} ${request.path}${modelInfo} streaming...`);
+		}
+	}
+	onRequestComplete(request) {
+		this.activeRequests.delete(request.id);
+		const status = request.statusCode ?? 0;
+		const duration = formatDuration$1(request.durationMs ?? 0);
+		const tokens = request.model ? formatTokens$1(request.inputTokens, request.outputTokens) : "";
+		const modelInfo = request.model ? ` ${request.model}` : "";
+		const isError = request.status === "error" || status >= 400;
+		const prefix = isError ? "[FAIL]" : "[ OK ]";
+		const tokensPart = tokens ? ` ${tokens}` : "";
+		const content = `${prefix} ${request.method} ${request.path} ${status} ${duration}${tokensPart}${modelInfo}`;
+		if (isError) {
+			const errorInfo = request.error ? `: ${request.error}` : "";
+			consola.log(content + errorInfo);
+		} else consola.log(content);
+	}
+	destroy() {
+		this.activeRequests.clear();
+	}
 };
 //#endregion
-//#region src/lib/queue.ts
-var RequestQueue = class {
-	queue = [];
-	processing = false;
-	lastRequestTime = 0;
-	async enqueue(execute, rateLimitSeconds) {
-		return new Promise((resolve, reject) => {
-			this.queue.push({
-				execute,
-				resolve,
-				reject
-			});
-			if (this.queue.length > 1) {
-				const waitTime = Math.ceil((this.queue.length - 1) * rateLimitSeconds);
-				consola.info(`Request queued. Position: ${this.queue.length}, estimated wait: ${waitTime}s`);
-			}
-			this.processQueue(rateLimitSeconds);
+//#region src/lib/tui/fullscreen-renderer.tsx
+const tuiState = {
+	activeRequests: /* @__PURE__ */ new Map(),
+	completedRequests: [],
+	errorRequests: []
+};
+const listeners = [];
+function notifyListeners() {
+	for (const listener of listeners) listener();
+}
+function formatDuration(ms) {
+	if (ms < 1e3) return `${ms}ms`;
+	return `${(ms / 1e3).toFixed(1)}s`;
+}
+function formatNumber(n) {
+	if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
+	if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
+	return String(n);
+}
+function formatTokens(input, output) {
+	if (input === void 0 || output === void 0) return "-";
+	return `${formatNumber(input)}/${formatNumber(output)}`;
+}
+function getElapsedTime(startTime) {
+	return formatDuration(Date.now() - startTime);
+}
+function TabHeader({ currentTab, counts }) {
+	const tabs = [
+		{
+			key: "active",
+			label: "Active",
+			count: counts.active
+		},
+		{
+			key: "completed",
+			label: "Completed",
+			count: counts.completed
+		},
+		{
+			key: "errors",
+			label: "Errors",
+			count: counts.errors
+		}
+	];
+	return /* @__PURE__ */ jsxs(Box, {
+		borderStyle: "single",
+		paddingX: 1,
+		children: [tabs.map((tab, idx) => /* @__PURE__ */ jsxs(React.Fragment, { children: [idx > 0 && /* @__PURE__ */ jsx(Text, { children: " │ " }), /* @__PURE__ */ jsxs(Text, {
+			bold: currentTab === tab.key,
+			color: currentTab === tab.key ? "cyan" : void 0,
+			inverse: currentTab === tab.key,
+			children: [
+				" ",
+				"[",
+				idx + 1,
+				"] ",
+				tab.label,
+				" (",
+				tab.count,
+				")",
+				" "
+			]
+		})] }, tab.key)), /* @__PURE__ */ jsx(Text, {
+			dimColor: true,
+			children: " │ Press 1/2/3 to switch tabs, q to quit"
+		})]
+	});
+}
+function getStatusColor(status) {
+	if (status === "streaming") return "yellow";
+	if (status === "queued") return "gray";
+	return "blue";
+}
+function getStatusIcon(status) {
+	if (status === "streaming") return "⟳";
+	if (status === "queued") return "◷";
+	return "●";
+}
+function ActiveRequestRow({ request }) {
+	const [, setTick] = useState(0);
+	useEffect(() => {
+		const interval = setInterval(() => setTick((t) => t + 1), 1e3);
+		return () => clearInterval(interval);
+	}, []);
+	const statusColor = getStatusColor(request.status);
+	const statusIcon = getStatusIcon(request.status);
+	return /* @__PURE__ */ jsxs(Box, { children: [
+		/* @__PURE__ */ jsxs(Text, {
+			color: statusColor,
+			children: [statusIcon, " "]
+		}),
+		/* @__PURE__ */ jsx(Text, {
+			bold: true,
+			children: request.method
+		}),
+		/* @__PURE__ */ jsxs(Text, { children: [
+			" ",
+			request.path,
+			" "
+		] }),
+		/* @__PURE__ */ jsxs(Text, {
+			dimColor: true,
+			children: [getElapsedTime(request.startTime), " "]
+		}),
+		request.queuePosition !== void 0 && request.queuePosition > 0 && /* @__PURE__ */ jsxs(Text, {
+			color: "gray",
+			children: [
+				"[queue #",
+				request.queuePosition,
+				"] "
+			]
+		}),
+		/* @__PURE__ */ jsx(Text, {
+			color: "magenta",
+			children: request.model
+		})
+	] });
+}
+function CompletedRequestRow({ request }) {
+	const isError = request.status === "error" || (request.statusCode ?? 0) >= 400;
+	return /* @__PURE__ */ jsxs(Box, { children: [
+		/* @__PURE__ */ jsxs(Text, {
+			color: isError ? "red" : "green",
+			children: [isError ? "✗" : "✓", " "]
+		}),
+		/* @__PURE__ */ jsx(Text, {
+			bold: true,
+			children: request.method
+		}),
+		/* @__PURE__ */ jsxs(Text, { children: [
+			" ",
+			request.path,
+			" "
+		] }),
+		/* @__PURE__ */ jsxs(Text, {
+			color: isError ? "red" : "green",
+			children: [request.statusCode ?? "-", " "]
+		}),
+		/* @__PURE__ */ jsxs(Text, {
+			dimColor: true,
+			children: [formatDuration(request.durationMs ?? 0), " "]
+		}),
+		/* @__PURE__ */ jsxs(Text, { children: [formatTokens(request.inputTokens, request.outputTokens), " "] }),
+		/* @__PURE__ */ jsx(Text, {
+			color: "magenta",
+			children: request.model
+		})
+	] });
+}
+function ErrorRequestRow({ request }) {
+	return /* @__PURE__ */ jsxs(Box, {
+		flexDirection: "column",
+		children: [/* @__PURE__ */ jsxs(Box, { children: [
+			/* @__PURE__ */ jsx(Text, {
+				color: "red",
+				children: "✗ "
+			}),
+			/* @__PURE__ */ jsx(Text, {
+				bold: true,
+				children: request.method
+			}),
+			/* @__PURE__ */ jsxs(Text, { children: [
+				" ",
+				request.path,
+				" "
+			] }),
+			/* @__PURE__ */ jsxs(Text, {
+				color: "red",
+				children: [request.statusCode ?? "-", " "]
+			}),
+			/* @__PURE__ */ jsxs(Text, {
+				dimColor: true,
+				children: [formatDuration(request.durationMs ?? 0), " "]
+			}),
+			/* @__PURE__ */ jsx(Text, {
+				color: "magenta",
+				children: request.model
+			})
+		] }), request.error && /* @__PURE__ */ jsx(Box, {
+			marginLeft: 2,
+			children: /* @__PURE__ */ jsxs(Text, {
+				color: "red",
+				dimColor: true,
+				children: ["└─ ", request.error]
+			})
+		})]
+	});
+}
+function ContentPanel({ currentTab, activeList, completedList, errorList, contentHeight }) {
+	if (currentTab === "active") {
+		if (activeList.length === 0) return /* @__PURE__ */ jsx(Text, {
+			dimColor: true,
+			children: "No active requests"
 		});
+		return /* @__PURE__ */ jsx(Fragment, { children: activeList.slice(0, contentHeight).map((req) => /* @__PURE__ */ jsx(ActiveRequestRow, { request: req }, req.id)) });
 	}
-	async processQueue(rateLimitSeconds) {
-		if (this.processing) return;
-		this.processing = true;
-		while (this.queue.length > 0) {
-			const elapsedMs = Date.now() - this.lastRequestTime;
-			const requiredMs = rateLimitSeconds * 1e3;
-			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
-				const waitMs = requiredMs - elapsedMs;
-				consola.debug(`Rate limit: waiting ${Math.ceil(waitMs / 1e3)}s`);
-				await new Promise((resolve) => setTimeout(resolve, waitMs));
-			}
-			const request = this.queue.shift();
-			if (!request) break;
-			this.lastRequestTime = Date.now();
-			try {
-				const result = await request.execute();
-				request.resolve(result);
-			} catch (error) {
-				request.reject(error);
-			}
+	if (currentTab === "completed") {
+		if (completedList.length === 0) return /* @__PURE__ */ jsx(Text, {
+			dimColor: true,
+			children: "No completed requests"
+		});
+		return /* @__PURE__ */ jsx(Fragment, { children: completedList.slice(-contentHeight).reverse().map((req) => /* @__PURE__ */ jsx(CompletedRequestRow, { request: req }, req.id)) });
+	}
+	if (errorList.length === 0) return /* @__PURE__ */ jsx(Text, {
+		dimColor: true,
+		children: "No errors"
+	});
+	return /* @__PURE__ */ jsx(Fragment, { children: errorList.slice(-contentHeight).reverse().map((req) => /* @__PURE__ */ jsx(ErrorRequestRow, { request: req }, req.id)) });
+}
+function TuiApp() {
+	const [currentTab, setCurrentTab] = useState("active");
+	const [, forceUpdate] = useState(0);
+	const { stdout } = useStdout();
+	useEffect(() => {
+		const listener = () => forceUpdate((n) => n + 1);
+		listeners.push(listener);
+		return () => {
+			const idx = listeners.indexOf(listener);
+			if (idx !== -1) listeners.splice(idx, 1);
+		};
+	}, []);
+	useInput((input, key) => {
+		switch (input) {
+			case "1":
+				setCurrentTab("active");
+				break;
+			case "2":
+				setCurrentTab("completed");
+				break;
+			case "3":
+				setCurrentTab("errors");
+				break;
+			default: if (input === "q" || key.ctrl && input === "c") process.exit(0);
 		}
-		this.processing = false;
+	});
+	const activeList = Array.from(tuiState.activeRequests.values());
+	const completedList = tuiState.completedRequests;
+	const errorList = tuiState.errorRequests;
+	const counts = {
+		active: activeList.length,
+		completed: completedList.length,
+		errors: errorList.length
+	};
+	const terminalHeight = stdout.rows || 24;
+	const contentHeight = terminalHeight - 3 - 1 - 2;
+	return /* @__PURE__ */ jsxs(Box, {
+		flexDirection: "column",
+		height: terminalHeight,
+		children: [
+			/* @__PURE__ */ jsx(TabHeader, {
+				currentTab,
+				counts
+			}),
+			/* @__PURE__ */ jsx(Box, {
+				flexDirection: "column",
+				height: contentHeight,
+				borderStyle: "single",
+				paddingX: 1,
+				overflow: "hidden",
+				children: /* @__PURE__ */ jsx(ContentPanel, {
+					currentTab,
+					activeList,
+					completedList,
+					errorList,
+					contentHeight
+				})
+			}),
+			/* @__PURE__ */ jsx(Box, {
+				paddingX: 1,
+				children: /* @__PURE__ */ jsxs(Text, {
+					dimColor: true,
+					children: [
+						"copilot-api │ Active: ",
+						counts.active,
+						" │ Completed: ",
+						counts.completed,
+						" ",
+						"│ Errors: ",
+						counts.errors
+					]
+				})
+			})
+		]
+	});
+}
+/**
+* Fullscreen TUI renderer using Ink
+* Provides interactive terminal interface with tabs
+*/
+var FullscreenRenderer = class {
+	inkInstance = null;
+	maxHistory = 100;
+	constructor(options) {
+		if (options?.maxHistory !== void 0) this.maxHistory = options.maxHistory;
 	}
-	get length() {
-		return this.queue.length;
+	start() {
+		if (this.inkInstance) return;
+		this.inkInstance = render(/* @__PURE__ */ jsx(TuiApp, {}), {});
+	}
+	onRequestStart(request) {
+		tuiState.activeRequests.set(request.id, { ...request });
+		notifyListeners();
+	}
+	onRequestUpdate(id, update) {
+		const request = tuiState.activeRequests.get(id);
+		if (!request) return;
+		Object.assign(request, update);
+		notifyListeners();
+	}
+	onRequestComplete(request) {
+		tuiState.activeRequests.delete(request.id);
+		if (request.status === "error" || (request.statusCode ?? 0) >= 400) {
+			tuiState.errorRequests.push({ ...request });
+			while (tuiState.errorRequests.length > this.maxHistory) tuiState.errorRequests.shift();
+		}
+		tuiState.completedRequests.push({ ...request });
+		while (tuiState.completedRequests.length > this.maxHistory) tuiState.completedRequests.shift();
+		notifyListeners();
+	}
+	destroy() {
+		if (this.inkInstance) {
+			this.inkInstance.unmount();
+			this.inkInstance = null;
+		}
+		tuiState.activeRequests.clear();
+		tuiState.completedRequests = [];
+		tuiState.errorRequests = [];
 	}
 };
-const requestQueue = new RequestQueue();
+//#endregion
+//#region src/lib/tui/tracker.ts
+function generateId() {
+	return Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
+}
+var RequestTracker = class {
+	requests = /* @__PURE__ */ new Map();
+	renderer = null;
+	completedQueue = [];
+	historySize = 5;
+	completedDisplayMs = 2e3;
+	setRenderer(renderer) {
+		this.renderer = renderer;
+	}
+	setOptions(options) {
+		if (options.historySize !== void 0) this.historySize = options.historySize;
+		if (options.completedDisplayMs !== void 0) this.completedDisplayMs = options.completedDisplayMs;
+	}
+	/**
+	* Start tracking a new request
+	* Returns the tracking ID
+	*/
+	startRequest(method, path$1, model) {
+		const id = generateId();
+		const request = {
+			id,
+			method,
+			path: path$1,
+			model,
+			startTime: Date.now(),
+			status: "executing"
+		};
+		this.requests.set(id, request);
+		this.renderer?.onRequestStart(request);
+		return id;
+	}
+	/**
+	* Update request status
+	*/
+	updateRequest(id, update) {
+		const request = this.requests.get(id);
+		if (!request) return;
+		if (update.status !== void 0) request.status = update.status;
+		if (update.statusCode !== void 0) request.statusCode = update.statusCode;
+		if (update.durationMs !== void 0) request.durationMs = update.durationMs;
+		if (update.inputTokens !== void 0) request.inputTokens = update.inputTokens;
+		if (update.outputTokens !== void 0) request.outputTokens = update.outputTokens;
+		if (update.error !== void 0) request.error = update.error;
+		if (update.queuePosition !== void 0) request.queuePosition = update.queuePosition;
+		this.renderer?.onRequestUpdate(id, update);
+	}
+	/**
+	* Mark request as completed
+	*/
+	completeRequest(id, statusCode, usage) {
+		const request = this.requests.get(id);
+		if (!request) return;
+		request.status = statusCode >= 200 && statusCode < 400 ? "completed" : "error";
+		request.statusCode = statusCode;
+		request.durationMs = Date.now() - request.startTime;
+		if (usage) {
+			request.inputTokens = usage.inputTokens;
+			request.outputTokens = usage.outputTokens;
+		}
+		this.renderer?.onRequestComplete(request);
+		this.requests.delete(id);
+		this.completedQueue.push(request);
+		while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
+		setTimeout(() => {
+			const idx = this.completedQueue.indexOf(request);
+			if (idx !== -1) this.completedQueue.splice(idx, 1);
+		}, this.completedDisplayMs);
+	}
+	/**
+	* Mark request as failed with error
+	*/
+	failRequest(id, error) {
+		const request = this.requests.get(id);
+		if (!request) return;
+		request.status = "error";
+		request.error = error;
+		request.durationMs = Date.now() - request.startTime;
+		this.renderer?.onRequestComplete(request);
+		this.requests.delete(id);
+		this.completedQueue.push(request);
+		while (this.completedQueue.length > this.historySize) this.completedQueue.shift();
+	}
+	/**
+	* Get all active requests
+	*/
+	getActiveRequests() {
+		return Array.from(this.requests.values());
+	}
+	/**
+	* Get recently completed requests
+	*/
+	getCompletedRequests() {
+		return [...this.completedQueue];
+	}
+	/**
+	* Get request by ID
+	*/
+	getRequest(id) {
+		return this.requests.get(id);
+	}
+	/**
+	* Clear all tracked requests
+	*/
+	clear() {
+		this.requests.clear();
+		this.completedQueue = [];
+	}
+};
+const requestTracker = new RequestTracker();
+//#endregion
+//#region src/lib/tui/middleware.ts
 /**
-* Execute a request with rate limiting via queue.
-* Requests are queued and processed sequentially at the configured rate.
+* Custom logger middleware that tracks requests through the TUI system
+* Shows single-line output: METHOD /path 200 1.2s 1.5K/500 model-name
+*
+* For streaming responses (SSE), the handler is responsible for calling
+* completeRequest after the stream finishes.
 */
-async function executeWithRateLimit(state$1, execute) {
-	if (state$1.rateLimitSeconds === void 0) return execute();
-	return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
+function tuiLogger() {
+	return async (c, next) => {
+		const method = c.req.method;
+		const path$1 = c.req.path;
+		const trackingId = requestTracker.startRequest(method, path$1, "");
+		c.set("trackingId", trackingId);
+		try {
+			await next();
+			if ((c.res.headers.get("content-type") ?? "").includes("text/event-stream")) return;
+			const status = c.res.status;
+			const inputTokens = c.res.headers.get("x-input-tokens");
+			const outputTokens = c.res.headers.get("x-output-tokens");
+			const model = c.res.headers.get("x-model");
+			if (model) {
+				const request = requestTracker.getRequest(trackingId);
+				if (request) request.model = model;
+			}
+			requestTracker.completeRequest(trackingId, status, inputTokens && outputTokens ? {
+				inputTokens: Number.parseInt(inputTokens, 10),
+				outputTokens: Number.parseInt(outputTokens, 10)
+			} : void 0);
+		} catch (error) {
+			requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Unknown error");
+			throw error;
+		}
+	};
+}
+//#endregion
+//#region src/lib/tui/index.ts
+/**
+* Initialize the TUI system
+* @param options.mode - "console" for simple log output (default), "fullscreen" for interactive TUI
+*/
+function initTui(options) {
+	const enabled = options?.enabled ?? process.stdout.isTTY;
+	const mode = options?.mode ?? "console";
+	if (enabled) if (mode === "fullscreen") {
+		const renderer = new FullscreenRenderer({ maxHistory: options?.historySize ?? 100 });
+		requestTracker.setRenderer(renderer);
+		renderer.start();
+	} else {
+		const renderer = new ConsoleRenderer();
+		requestTracker.setRenderer(renderer);
+	}
+	if (options?.historySize !== void 0 || options?.completedDisplayMs !== void 0) requestTracker.setOptions({
+		historySize: options.historySize,
+		completedDisplayMs: options.completedDisplayMs
+	});
 }
+//#endregion
+//#region src/lib/approval.ts
+const awaitApproval = async () => {
+	if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", 403, JSON.stringify({ message: "Request rejected" }));
+};
 //#endregion
 //#region src/lib/tokenizer.ts
 const ENCODING_MAP = {
@@ -1085,6 +1634,229 @@ const getTokenCount = async (payload, model) => {
 	};
 };
+//#endregion
+//#region src/lib/auto-compact.ts
+const DEFAULT_CONFIG = {
+	targetTokens: 1e5,
+	safetyMarginPercent: 10
+};
+/**
+* Check if payload needs compaction based on model limits.
+* Uses a safety margin to account for token counting differences.
+*/
+async function checkNeedsCompaction(payload, model, safetyMarginPercent = 10) {
+	const currentTokens = (await getTokenCount(payload, model)).input;
+	const rawLimit = model.capabilities.limits.max_prompt_tokens ?? 128e3;
+	const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
+	return {
+		needed: currentTokens > limit,
+		currentTokens,
+		limit
+	};
+}
+/**
+* Calculate approximate token count for a single message.
+* This is a fast estimation for splitting decisions.
+*/
+function estimateMessageTokens(message) {
+	let text = "";
+	if (typeof message.content === "string") text = message.content;
+	else if (Array.isArray(message.content)) {
+		for (const part of message.content) if (part.type === "text") text += part.text;
+		else if ("image_url" in part) text += part.image_url.url;
+	}
+	if (message.tool_calls) text += JSON.stringify(message.tool_calls);
+	return Math.ceil(text.length / 4) + 10;
+}
+/**
+* Extract system messages from the beginning of the message list.
+*/
+function extractSystemMessages(messages) {
+	const systemMessages = [];
+	let i = 0;
+	while (i < messages.length) {
+		const msg = messages[i];
+		if (msg.role === "system" || msg.role === "developer") {
+			systemMessages.push(msg);
+			i++;
+		} else break;
+	}
+	return {
+		systemMessages,
+		remainingMessages: messages.slice(i)
+	};
+}
+/**
+* Find messages to keep from the end to stay under target tokens.
+* Returns the starting index of messages to preserve.
+*/
+function findPreserveIndex(messages, targetTokens, systemTokens) {
+	const availableTokens = targetTokens - systemTokens - 500;
+	let accumulatedTokens = 0;
+	for (let i = messages.length - 1; i >= 0; i--) {
+		const msgTokens = estimateMessageTokens(messages[i]);
+		if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
+		accumulatedTokens += msgTokens;
+	}
+	return 0;
+}
+/**
+* Calculate estimated tokens for system messages.
+*/
+function estimateSystemTokens(systemMessages) {
+	return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
+}
+/**
+* Create a truncation marker message.
+*/
+function createTruncationMarker(removedCount) {
+	return {
+		role: "user",
+		content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
+	};
+}
+/**
+* Perform auto-compaction on a payload that exceeds token limits.
+* This uses simple truncation - no LLM calls required.
+*/
+async function autoCompact(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_CONFIG,
+		...config
+	};
+	const originalTokens = (await getTokenCount(payload, model)).input;
+	const rawLimit = model.capabilities.limits.max_prompt_tokens ?? 128e3;
+	const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	if (originalTokens <= limit) return {
+		payload,
+		wasCompacted: false,
+		originalTokens,
+		compactedTokens: originalTokens,
+		removedMessageCount: 0
+	};
+	consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
+	const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
+	const systemTokens = estimateSystemTokens(systemMessages);
+	consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
+	const effectiveTarget = Math.min(cfg.targetTokens, limit);
+	const preserveIndex = findPreserveIndex(remainingMessages, effectiveTarget, systemTokens);
+	if (preserveIndex === 0) {
+		consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
+		return {
+			payload,
+			wasCompacted: false,
+			originalTokens,
+			compactedTokens: originalTokens,
+			removedMessageCount: 0
+		};
+	}
+	const removedMessages = remainingMessages.slice(0, preserveIndex);
+	const preservedMessages = remainingMessages.slice(preserveIndex);
+	consola.info(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
+	const truncationMarker = createTruncationMarker(removedMessages.length);
+	const newPayload = {
+		...payload,
+		messages: [
+			...systemMessages,
+			truncationMarker,
+			...preservedMessages
+		]
+	};
+	const newTokenCount = await getTokenCount(newPayload, model);
+	consola.info(`Auto-compact: Reduced from ${originalTokens} to ${newTokenCount.input} tokens`);
+	if (newTokenCount.input > limit) {
+		consola.warn(`Auto-compact: Still over limit (${newTokenCount.input} > ${limit}), trying more aggressive truncation`);
+		const aggressiveTarget = Math.floor(effectiveTarget * .7);
+		if (aggressiveTarget < 2e4) {
+			consola.error("Auto-compact: Cannot reduce further, target too low");
+			return {
+				payload: newPayload,
+				wasCompacted: true,
+				originalTokens,
+				compactedTokens: newTokenCount.input,
+				removedMessageCount: removedMessages.length
+			};
+		}
+		return autoCompact(payload, model, {
+			...cfg,
+			targetTokens: aggressiveTarget
+		});
+	}
+	return {
+		payload: newPayload,
+		wasCompacted: true,
+		originalTokens,
+		compactedTokens: newTokenCount.input,
+		removedMessageCount: removedMessages.length
+	};
+}
+/**
+* Create a marker to append to responses indicating auto-compaction occurred.
+*/
+function createCompactionMarker(result) {
+	if (!result.wasCompacted) return "";
+	const reduction = result.originalTokens - result.compactedTokens;
+	const percentage = Math.round(reduction / result.originalTokens * 100);
+	return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
+}
+//#endregion
+//#region src/lib/queue.ts
+var RequestQueue = class {
+	queue = [];
+	processing = false;
+	lastRequestTime = 0;
+	async enqueue(execute, rateLimitSeconds) {
+		return new Promise((resolve, reject) => {
+			this.queue.push({
+				execute,
+				resolve,
+				reject
+			});
+			if (this.queue.length > 1) {
+				const waitTime = Math.ceil((this.queue.length - 1) * rateLimitSeconds);
+				consola.info(`Request queued. Position: ${this.queue.length}, estimated wait: ${waitTime}s`);
+			}
+			this.processQueue(rateLimitSeconds);
+		});
+	}
+	async processQueue(rateLimitSeconds) {
+		if (this.processing) return;
+		this.processing = true;
+		while (this.queue.length > 0) {
+			const elapsedMs = Date.now() - this.lastRequestTime;
+			const requiredMs = rateLimitSeconds * 1e3;
+			if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
+				const waitMs = requiredMs - elapsedMs;
+				consola.debug(`Rate limit: waiting ${Math.ceil(waitMs / 1e3)}s`);
+				await new Promise((resolve) => setTimeout(resolve, waitMs));
+			}
+			const request = this.queue.shift();
+			if (!request) break;
+			this.lastRequestTime = Date.now();
+			try {
+				const result = await request.execute();
+				request.resolve(result);
+			} catch (error) {
+				request.reject(error);
+			}
+		}
+		this.processing = false;
+	}
+	get length() {
+		return this.queue.length;
+	}
+};
+const requestQueue = new RequestQueue();
+/**
+* Execute a request with rate limiting via queue.
+* Requests are queued and processed sequentially at the configured rate.
+*/
+async function executeWithRateLimit(state$1, execute) {
+	if (state$1.rateLimitSeconds === void 0) return execute();
+	return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
+}
 //#endregion
 //#region src/services/copilot/create-chat-completions.ts
 const createChatCompletions = async (payload) => {
@@ -1112,20 +1884,83 @@ const createChatCompletions = async (payload) => {
 //#region src/routes/chat-completions/handler.ts
 async function handleCompletion$1(c) {
 	const startTime = Date.now();
-	let payload = await c.req.json();
-	consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
-	const historyId = recordRequest("openai", {
-		model: payload.model,
-		messages: convertOpenAIMessages(payload.messages),
-		stream: payload.stream ?? false,
-		tools: payload.tools?.map((t) => ({
-			name: t.function.name,
-			description: t.function.description
-		})),
-		max_tokens: payload.max_tokens ?? void 0,
-		temperature: payload.temperature ?? void 0
-	});
-	const selectedModel = state.models?.data.find((model) => model.id === payload.model);
+	const originalPayload = await c.req.json();
+	consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
+	const trackingId = c.get("trackingId");
+	updateTrackerModel$1(trackingId, originalPayload.model);
+	const ctx = {
+		historyId: recordRequest("openai", {
+			model: originalPayload.model,
+			messages: convertOpenAIMessages(originalPayload.messages),
+			stream: originalPayload.stream ?? false,
+			tools: originalPayload.tools?.map((t) => ({
+				name: t.function.name,
+				description: t.function.description
+			})),
+			max_tokens: originalPayload.max_tokens ?? void 0,
+			temperature: originalPayload.temperature ?? void 0
+		}),
+		trackingId,
+		startTime
+	};
+	const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
+	await logTokenCount(originalPayload, selectedModel);
+	const { finalPayload, compactResult } = await buildFinalPayload$1(originalPayload, selectedModel);
+	if (compactResult) ctx.compactResult = compactResult;
+	const payload = isNullish(finalPayload.max_tokens) ? {
+		...finalPayload,
+		max_tokens: selectedModel?.capabilities.limits.max_output_tokens
+	} : finalPayload;
+	if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
+	if (state.manualApprove) await awaitApproval();
+	try {
+		const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
+		if (isNonStreaming$1(response)) return handleNonStreamingResponse$1(c, response, ctx);
+		consola.debug("Streaming response");
+		updateTrackerStatus$1(trackingId, "streaming");
+		return streamSSE(c, async (stream) => {
+			await handleStreamingResponse$1({
+				stream,
+				response,
+				payload,
+				ctx
+			});
+		});
+	} catch (error) {
+		recordErrorResponse$1(ctx, payload.model, error);
+		throw error;
+	}
+}
+async function buildFinalPayload$1(payload, model) {
+	if (!state.autoCompact || !model) {
+		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
+		return {
+			finalPayload: payload,
+			compactResult: null
+		};
+	}
+	try {
+		const check = await checkNeedsCompaction(payload, model);
+		consola.info(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
+		if (!check.needed) return {
+			finalPayload: payload,
+			compactResult: null
+		};
+		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
+		const compactResult = await autoCompact(payload, model);
+		return {
+			finalPayload: compactResult.payload,
+			compactResult
+		};
+	} catch (error) {
+		consola.warn("Auto-compact failed, proceeding with original payload:", error);
+		return {
+			finalPayload: payload,
+			compactResult: null
+		};
+	}
+}
+async function logTokenCount(payload, selectedModel) {
 	try {
 		if (selectedModel) {
 			const tokenCount = await getTokenCount(payload, selectedModel);
@@ -1134,146 +1969,236 @@ async function handleCompletion$1(c) {
 	} catch (error) {
 		consola.warn("Failed to calculate token count:", error);
 	}
-	if (state.manualApprove) await awaitApproval();
-	if (isNullish(payload.max_tokens)) {
-		payload = {
-			...payload,
-			max_tokens: selectedModel?.capabilities.limits.max_output_tokens
+}
+function updateTrackerModel$1(trackingId, model) {
+	if (!trackingId) return;
+	const request = requestTracker.getRequest(trackingId);
+	if (request) request.model = model;
+}
+function updateTrackerStatus$1(trackingId, status) {
+	if (!trackingId) return;
+	requestTracker.updateRequest(trackingId, { status });
+}
+function recordErrorResponse$1(ctx, model, error) {
+	recordResponse(ctx.historyId, {
+		success: false,
+		model,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: error instanceof Error ? error.message : "Unknown error",
+		content: null
+	}, Date.now() - ctx.startTime);
+}
+function handleNonStreamingResponse$1(c, originalResponse, ctx) {
+	consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
+	let response = originalResponse;
+	if (ctx.compactResult?.wasCompacted && response.choices[0]?.message.content) {
+		const marker = createCompactionMarker(ctx.compactResult);
+		response = {
+			...response,
+			choices: response.choices.map((choice$1, i) => i === 0 ? {
+				...choice$1,
+				message: {
+					...choice$1.message,
+					content: (choice$1.message.content ?? "") + marker
+				}
+			} : choice$1)
 		};
-		consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
 	}
+	const choice = response.choices[0];
+	const usage = response.usage;
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: response.model,
+		usage: {
+			input_tokens: usage?.prompt_tokens ?? 0,
+			output_tokens: usage?.completion_tokens ?? 0
+		},
+		stop_reason: choice.finish_reason,
+		content: buildResponseContent(choice),
+		toolCalls: extractToolCalls(choice)
+	}, Date.now() - ctx.startTime);
+	if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
+		inputTokens: usage.prompt_tokens,
+		outputTokens: usage.completion_tokens
+	});
+	return c.json(response);
+}
+function buildResponseContent(choice) {
+	return {
+		role: choice.message.role,
+		content: typeof choice.message.content === "string" ? choice.message.content : JSON.stringify(choice.message.content),
+		tool_calls: choice.message.tool_calls?.map((tc) => ({
+			id: tc.id,
+			type: tc.type,
+			function: {
+				name: tc.function.name,
+				arguments: tc.function.arguments
+			}
+		}))
+	};
+}
+function extractToolCalls(choice) {
+	return choice.message.tool_calls?.map((tc) => ({
+		id: tc.id,
+		name: tc.function.name,
+		input: tc.function.arguments
+	}));
+}
+function createStreamAccumulator() {
+	return {
+		model: "",
+		inputTokens: 0,
+		outputTokens: 0,
+		finishReason: "",
+		content: "",
+		toolCalls: [],
+		toolCallMap: /* @__PURE__ */ new Map()
+	};
+}
+async function handleStreamingResponse$1(opts) {
+	const { stream, response, payload, ctx } = opts;
+	const acc = createStreamAccumulator();
 	try {
-		const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
-		if (isNonStreaming$1(response)) {
-			consola.debug("Non-streaming response:", JSON.stringify(response));
-			const choice = response.choices[0];
-			recordResponse(historyId, {
-				success: true,
-				model: response.model,
-				usage: {
-					input_tokens: response.usage?.prompt_tokens ?? 0,
-					output_tokens: response.usage?.completion_tokens ?? 0
-				},
-				stop_reason: choice?.finish_reason ?? void 0,
-				content: choice?.message ? {
-					role: choice.message.role,
-					content: typeof choice.message.content === "string" ? choice.message.content : JSON.stringify(choice.message.content),
-					tool_calls: choice.message.tool_calls?.map((tc) => ({
-						id: tc.id,
-						type: tc.type,
-						function: {
-							name: tc.function.name,
-							arguments: tc.function.arguments
-						}
-					}))
-				} : null,
-				toolCalls: choice?.message?.tool_calls?.map((tc) => ({
-					id: tc.id,
-					name: tc.function.name,
-					input: tc.function.arguments
-				}))
-			}, Date.now() - startTime);
-			return c.json(response);
+		for await (const chunk of response) {
+			consola.debug("Streaming chunk:", JSON.stringify(chunk));
+			parseStreamChunk(chunk, acc);
+			await stream.writeSSE(chunk);
 		}
-		consola.debug("Streaming response");
-		return streamSSE(c, async (stream) => {
-			let streamModel = "";
-			let streamInputTokens = 0;
-			let streamOutputTokens = 0;
-			let streamFinishReason = "";
-			let streamContent = "";
-			const streamToolCalls = [];
-			const toolCallAccumulators = /* @__PURE__ */ new Map();
-			try {
-				for await (const chunk of response) {
-					consola.debug("Streaming chunk:", JSON.stringify(chunk));
-					if (chunk.data && chunk.data !== "[DONE]") try {
-						const parsed = JSON.parse(chunk.data);
-						if (parsed.model && !streamModel) streamModel = parsed.model;
-						if (parsed.usage) {
-							streamInputTokens = parsed.usage.prompt_tokens;
-							streamOutputTokens = parsed.usage.completion_tokens;
-						}
-						const choice = parsed.choices[0];
-						if (choice?.delta?.content) streamContent += choice.delta.content;
-						if (choice?.delta?.tool_calls) for (const tc of choice.delta.tool_calls) {
-							const idx = tc.index;
-							if (!toolCallAccumulators.has(idx)) toolCallAccumulators.set(idx, {
-								id: tc.id || "",
-								name: tc.function?.name || "",
-								arguments: ""
-							});
-							const acc = toolCallAccumulators.get(idx);
-							if (acc) {
-								if (tc.id) acc.id = tc.id;
-								if (tc.function?.name) acc.name = tc.function.name;
-								if (tc.function?.arguments) acc.arguments += tc.function.arguments;
-							}
-						}
-						if (choice?.finish_reason) streamFinishReason = choice.finish_reason;
-					} catch {}
-					await stream.writeSSE(chunk);
-				}
-				for (const tc of toolCallAccumulators.values()) if (tc.id && tc.name) streamToolCalls.push({
-					id: tc.id,
-					name: tc.name,
-					arguments: tc.arguments
-				});
-				const toolCallsForContent = streamToolCalls.map((tc) => ({
-					id: tc.id,
-					type: "function",
-					function: {
-						name: tc.name,
-						arguments: tc.arguments
-					}
-				}));
-				recordResponse(historyId, {
-					success: true,
-					model: streamModel || payload.model,
-					usage: {
-						input_tokens: streamInputTokens,
-						output_tokens: streamOutputTokens
-					},
-					stop_reason: streamFinishReason || void 0,
-					content: {
-						role: "assistant",
-						content: streamContent || void 0,
-						tool_calls: toolCallsForContent.length > 0 ? toolCallsForContent : void 0
-					},
-					toolCalls: streamToolCalls.length > 0 ? streamToolCalls.map((tc) => ({
-						id: tc.id,
-						name: tc.name,
-						input: tc.arguments
-					})) : void 0
-				}, Date.now() - startTime);
-			} catch (error) {
-				recordResponse(historyId, {
-					success: false,
-					model: streamModel || payload.model,
-					usage: {
-						input_tokens: 0,
-						output_tokens: 0
-					},
-					error: error instanceof Error ? error.message : "Stream error",
-					content: null
-				}, Date.now() - startTime);
-				throw error;
-			}
-		});
+		if (ctx.compactResult?.wasCompacted) {
+			const marker = createCompactionMarker(ctx.compactResult);
+			const markerChunk = {
+				id: `compact-marker-${Date.now()}`,
+				object: "chat.completion.chunk",
+				created: Math.floor(Date.now() / 1e3),
+				model: acc.model || payload.model,
+				choices: [{
+					index: 0,
+					delta: { content: marker },
+					finish_reason: null,
+					logprobs: null
+				}]
+			};
+			await stream.writeSSE({
+				data: JSON.stringify(markerChunk),
+				event: "message"
+			});
+			acc.content += marker;
+		}
+		recordStreamSuccess(acc, payload.model, ctx);
+		completeTracking$1(ctx.trackingId, acc.inputTokens, acc.outputTokens);
 	} catch (error) {
-		recordResponse(historyId, {
-			success: false,
-			model: payload.model,
-			usage: {
-				input_tokens: 0,
-				output_tokens: 0
-			},
-			error: error instanceof Error ? error.message : "Unknown error",
-			content: null
-		}, Date.now() - startTime);
+		recordStreamError({
+			acc,
+			fallbackModel: payload.model,
+			ctx,
+			error
+		});
+		failTracking$1(ctx.trackingId, error);
 		throw error;
 	}
 }
+function parseStreamChunk(chunk, acc) {
+	if (!chunk.data || chunk.data === "[DONE]") return;
+	try {
+		const parsed = JSON.parse(chunk.data);
+		accumulateModel(parsed, acc);
+		accumulateUsage(parsed, acc);
+		accumulateChoice(parsed.choices[0], acc);
+	} catch {}
+}
+function accumulateModel(parsed, acc) {
+	if (parsed.model && !acc.model) acc.model = parsed.model;
+}
+function accumulateUsage(parsed, acc) {
+	if (parsed.usage) {
+		acc.inputTokens = parsed.usage.prompt_tokens;
+		acc.outputTokens = parsed.usage.completion_tokens;
+	}
+}
+function accumulateChoice(choice, acc) {
+	if (!choice) return;
+	if (choice.delta.content) acc.content += choice.delta.content;
+	if (choice.delta.tool_calls) accumulateToolCalls(choice.delta.tool_calls, acc);
+	if (choice.finish_reason) acc.finishReason = choice.finish_reason;
+}
+function accumulateToolCalls(toolCalls, acc) {
+	if (!toolCalls) return;
+	for (const tc of toolCalls) {
+		const idx = tc.index;
+		if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
+			id: tc.id ?? "",
+			name: tc.function?.name ?? "",
+			arguments: ""
+		});
+		const item = acc.toolCallMap.get(idx);
+		if (item) {
+			if (tc.id) item.id = tc.id;
+			if (tc.function?.name) item.name = tc.function.name;
+			if (tc.function?.arguments) item.arguments += tc.function.arguments;
+		}
+	}
+}
+function recordStreamSuccess(acc, fallbackModel, ctx) {
+	for (const tc of acc.toolCallMap.values()) if (tc.id && tc.name) acc.toolCalls.push(tc);
+	const toolCalls = acc.toolCalls.map((tc) => ({
+		id: tc.id,
+		type: "function",
+		function: {
+			name: tc.name,
+			arguments: tc.arguments
+		}
+	}));
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: acc.model || fallbackModel,
+		usage: {
+			input_tokens: acc.inputTokens,
+			output_tokens: acc.outputTokens
+		},
+		stop_reason: acc.finishReason || void 0,
+		content: {
+			role: "assistant",
+			content: acc.content,
+			tool_calls: toolCalls.length > 0 ? toolCalls : void 0
+		},
+		toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls.map((tc) => ({
+			id: tc.id,
+			name: tc.name,
+			input: tc.arguments
+		})) : void 0
+	}, Date.now() - ctx.startTime);
+}
+function recordStreamError(opts) {
+	const { acc, fallbackModel, ctx, error } = opts;
+	recordResponse(ctx.historyId, {
+		success: false,
+		model: acc.model || fallbackModel,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: error instanceof Error ? error.message : "Stream error",
+		content: null
+	}, Date.now() - ctx.startTime);
+}
+function completeTracking$1(trackingId, inputTokens, outputTokens) {
+	if (!trackingId) return;
+	requestTracker.updateRequest(trackingId, {
+		inputTokens,
+		outputTokens
+	});
+	requestTracker.completeRequest(trackingId, 200, {
+		inputTokens,
+		outputTokens
+	});
+}
+function failTracking$1(trackingId, error) {
+	if (!trackingId) return;
+	requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
+}
 const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
 function convertOpenAIMessages(messages) {
 	return messages.map((msg) => {
@@ -1461,6 +2386,78 @@ function getContentText(content) {
   return JSON.stringify(content, null, 2);
 }
+// Extract real user text, skipping system tags like <system-reminder>, <ide_opened_file>, etc.
+function extractRealUserText(content) {
+  if (!content) return '';
+  let text = '';
+  if (typeof content === 'string') {
+    text = content;
+  } else if (Array.isArray(content)) {
+    text = content
+      .filter(c => c.type === 'text' && c.text)
+      .map(c => c.text)
+      .join('\\n');
+  }
+  if (!text) return '';
+  // Remove system tags and their content
+  const systemTags = [
+    'system-reminder',
+    'ide_opened_file',
+    'ide_selection',
+    'ide_visible_files',
+    'ide_diagnostics',
+    'ide_cursor_position',
+    'user-prompt-submit-hook',
+    'antml:function_calls',
+    'antml:invoke',
+    'antml:parameter'
+  ];
+  let cleaned = text;
+  for (const tag of systemTags) {
+    // Remove <tag>...</tag> blocks (including multiline)
+    const regex = new RegExp('<' + tag + '[^>]*>[\\\\s\\\\S]*?</' + tag + '>', 'gi');
+    cleaned = cleaned.replace(regex, '');
+    // Remove self-closing <tag ... /> or <tag ...>content without closing
+    const selfClosingRegex = new RegExp('<' + tag + '[^>]*/>', 'gi');
+    cleaned = cleaned.replace(selfClosingRegex, '');
+  }
+  // Trim whitespace and return
+  return cleaned.trim();
+}
+// Get preview text from assistant message content
+function getAssistantPreview(content) {
+  if (!content) return '';
+  if (typeof content === 'string') {
+    const text = content.trim();
+    if (text.length > 0) {
+      return text.length > 80 ? text.slice(0, 80) + '...' : text;
+    }
+    return '';
+  }
+  if (Array.isArray(content)) {
+    // First try to get text content
+    const textParts = content.filter(c => c.type === 'text' && c.text).map(c => c.text);
+    if (textParts.length > 0) {
+      const text = textParts.join('\\n').trim();
+      if (text.length > 0) {
+        return text.length > 80 ? text.slice(0, 80) + '...' : text;
+      }
+    }
+    // If no text, show tool_use info
+    const toolUses = content.filter(c => c.type === 'tool_use');
+    if (toolUses.length === 1) {
+      return '[tool_use: ' + toolUses[0].name + ']';
+    } else if (toolUses.length > 1) {
+      return '[' + toolUses.length + ' tool_uses]';
+    }
+  }
+  return '';
+}
 function formatContentForDisplay(content) {
   if (!content) return { summary: '', raw: 'null' };
   if (typeof content === 'string') return { summary: content, raw: JSON.stringify(content) };
@@ -1516,6 +2513,7 @@ async function loadSessions() {
     for (const s of data.sessions) {
       const isActive = currentSessionId === s.id;
       const shortId = s.id.slice(0, 8);
+      const toolCount = s.toolsUsed ? s.toolsUsed.length : 0;
       html += \`
         <div class="session-item\${isActive ? ' active' : ''}" onclick="selectSession('\${s.id}')">
           <div class="session-meta">
@@ -1526,6 +2524,7 @@ async function loadSessions() {
             <span style="color:var(--text-dim);font-family:monospace;font-size:10px;">\${shortId}</span>
             <span>\${s.requestCount} req</span>
             <span>\${formatNumber(s.totalInputTokens + s.totalOutputTokens)} tok</span>
+            \${toolCount > 0 ? '<span class="badge tool">' + toolCount + ' tools</span>' : ''}
             <span class="badge \${s.endpoint}">\${s.endpoint}</span>
           </div>
         </div>
@@ -1584,6 +2583,37 @@ async function loadEntries() {
       const tokens = e.response ? formatNumber(e.response.usage.input_tokens) + '/' + formatNumber(e.response.usage.output_tokens) : '-';
       const shortId = e.id.slice(0, 8);
+      // Get preview: show meaningful context about the request
+      let lastUserMsg = '';
+      const messages = e.request.messages;
+      const lastMsg = messages[messages.length - 1];
+      // If last message is tool_result, look at the previous assistant message for context
+      if (lastMsg && lastMsg.role === 'user') {
+        const content = lastMsg.content;
+        if (Array.isArray(content) && content.length > 0 && content[0].type === 'tool_result') {
+          // This is a tool_result response - look for previous assistant message
+          const prevMsg = messages.length >= 2 ? messages[messages.length - 2] : null;
+          if (prevMsg && prevMsg.role === 'assistant') {
+            lastUserMsg = getAssistantPreview(prevMsg.content);
+          }
+          // If no meaningful preview from assistant, show tool_result count
+          if (!lastUserMsg) {
+            const toolResults = content.filter(c => c.type === 'tool_result');
+            lastUserMsg = '[' + toolResults.length + ' tool_result' + (toolResults.length > 1 ? 's' : '') + ']';
+          }
+        } else {
+          // Regular user message, extract real text
+          const realText = extractRealUserText(lastMsg.content);
+          if (realText.length > 0) {
+            lastUserMsg = realText.slice(0, 80);
+            if (realText.length > 80) lastUserMsg += '...';
+          }
+        }
+      } else if (lastMsg && lastMsg.role === 'assistant') {
+        lastUserMsg = getAssistantPreview(lastMsg.content);
+      }
       html += \`
         <div class="entry-item\${isSelected ? ' selected' : ''}" onclick="showDetail('\${e.id}')">
           <div class="entry-header">
@@ -1596,6 +2626,7 @@ async function loadEntries() {
             <span class="entry-tokens">\${tokens}</span>
             <span class="entry-duration">\${formatDuration(e.durationMs)}</span>
           </div>
+          \${lastUserMsg ? '<div class="entry-preview">' + escapeHtml(lastUserMsg) + '</div>' : ''}
         </div>
       \`;
     }
@@ -1655,7 +2686,7 @@ async function showDetail(id) {
             <div class="info-item"><div class="info-label">Duration</div><div class="info-value">\${formatDuration(entry.durationMs)}</div></div>
             <div class="info-item"><div class="info-label">Stop Reason</div><div class="info-value">\${entry.response.stop_reason || '-'}</div></div>
           </div>
-          \${entry.response.error ? '<div style="color:var(--error);margin-top:8px;">Error: ' + entry.response.error + '</div>' : ''}
+          \${entry.response.error ? '<div class="error-detail"><div class="error-label">Error Details</div><pre class="error-content">' + escapeHtml(entry.response.error) + '</pre></div>' : ''}
         </div>
       \`;
     }
@@ -2023,6 +3054,14 @@ input::placeholder { color: var(--text-dim); }
 .entry-model { font-weight: 500; flex: 1; }
 .entry-tokens { font-size: 11px; color: var(--text-dim); }
 .entry-duration { font-size: 11px; color: var(--text-dim); min-width: 50px; text-align: right; }
+.entry-preview {
+  padding: 0 16px 8px 16px;
+  font-size: 11px;
+  color: var(--text-muted);
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
 /* Badges */
 .badge {
@@ -2038,6 +3077,7 @@ input::placeholder { color: var(--text-dim); }
 .badge.anthropic { background: rgba(163, 113, 247, 0.15); color: var(--purple); }
 .badge.openai { background: rgba(210, 153, 34, 0.15); color: var(--warning); }
 .badge.stream { background: rgba(57, 197, 207, 0.15); color: var(--cyan); }
+.badge.tool { background: rgba(88, 166, 255, 0.15); color: var(--primary); }
 /* Detail panel */
 .detail-panel {
@@ -2133,6 +3173,32 @@ input::placeholder { color: var(--text-dim); }
 .info-label { font-size: 11px; color: var(--text-muted); }
 .info-value { font-weight: 500; }
+/* Error detail display */
+.error-detail {
+  margin-top: 12px;
+  padding: 12px;
+  background: rgba(248, 81, 73, 0.1);
+  border: 1px solid rgba(248, 81, 73, 0.3);
+  border-radius: 6px;
+}
+.error-label {
+  font-size: 11px;
+  color: var(--error);
+  font-weight: 600;
+  margin-bottom: 8px;
+  text-transform: uppercase;
+}
+.error-content {
+  margin: 0;
+  font-family: 'SF Mono', Monaco, 'Courier New', monospace;
+  font-size: 12px;
+  color: var(--error);
+  white-space: pre-wrap;
+  word-break: break-word;
+  max-height: 300px;
+  overflow-y: auto;
+}
 /* Empty state */
 .empty-state {
   text-align: center;
@@ -2388,12 +3454,12 @@ function translateModelName(model) {
 		haiku: "claude-haiku-4.5"
 	};
 	if (shortNameMap[model]) return shortNameMap[model];
-	if (model.match(/^claude-sonnet-4-5-\d+$/)) return "claude-sonnet-4.5";
-	if (model.match(/^claude-sonnet-4-\d+$/)) return "claude-sonnet-4";
-	if (model.match(/^claude-opus-4-5-\d+$/)) return "claude-opus-4.5";
-	if (model.match(/^claude-opus-4-\d+$/)) return "claude-opus-4.5";
-	if (model.match(/^claude-haiku-4-5-\d+$/)) return "claude-haiku-4.5";
-	if (model.match(/^claude-haiku-3-5-\d+$/)) return "claude-haiku-4.5";
+	if (/^claude-sonnet-4-5-\d+$/.test(model)) return "claude-sonnet-4.5";
+	if (/^claude-sonnet-4-\d+$/.test(model)) return "claude-sonnet-4";
+	if (/^claude-opus-4-5-\d+$/.test(model)) return "claude-opus-4.5";
+	if (/^claude-opus-4-\d+$/.test(model)) return "claude-opus-4.5";
+	if (/^claude-haiku-4-5-\d+$/.test(model)) return "claude-haiku-4.5";
+	if (/^claude-haiku-3-5-\d+$/.test(model)) return "claude-haiku-4.5";
 	return model;
 }
 function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameMapping) {
@@ -2490,7 +3556,7 @@ function getTruncatedToolName(originalName, toolNameMapping) {
 	if (existingTruncated) return existingTruncated;
 	let hash = 0;
 	for (let i = 0; i < originalName.length; i++) {
-		const char = originalName.charCodeAt(i);
+		const char = originalName.codePointAt(i) ?? 0;
 		hash = (hash << 5) - hash + char;
 		hash = hash & hash;
 	}
@@ -2527,8 +3593,9 @@ function translateAnthropicToolChoiceToOpenAI(anthropicToolChoice, toolNameMappi
 		default: return;
 	}
 }
-function translateToAnthropic(response, toolNameMapping) {
-	if (response.choices.length === 0) return {
+/** Create empty response for edge case of no choices */
+function createEmptyResponse(response) {
+	return {
 		id: response.id,
 		type: "message",
 		role: "assistant",
@@ -2541,6 +3608,18 @@ function translateToAnthropic(response, toolNameMapping) {
 			output_tokens: response.usage?.completion_tokens ?? 0
 		}
 	};
+}
+/** Build usage object from response */
+function buildUsageObject(response) {
+	const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens;
+	return {
+		input_tokens: (response.usage?.prompt_tokens ?? 0) - (cachedTokens ?? 0),
+		output_tokens: response.usage?.completion_tokens ?? 0,
+		...cachedTokens !== void 0 && { cache_read_input_tokens: cachedTokens }
+	};
+}
+function translateToAnthropic(response, toolNameMapping) {
+	if (response.choices.length === 0) return createEmptyResponse(response);
 	const allTextBlocks = [];
 	const allToolUseBlocks = [];
 	let stopReason = null;
@@ -2560,11 +3639,7 @@ function translateToAnthropic(response, toolNameMapping) {
 		content: [...allTextBlocks, ...allToolUseBlocks],
 		stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
 		stop_sequence: null,
-		usage: {
-			input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
-			output_tokens: response.usage?.completion_tokens ?? 0,
-			...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
-		}
+		usage: buildUsageObject(response)
 	};
 }
 function getAnthropicTextBlocks(messageContent) {
@@ -2776,175 +3851,365 @@ async function handleCompletion(c) {
 	const startTime = Date.now();
 	const anthropicPayload = await c.req.json();
 	consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
-	const historyId = recordRequest("anthropic", {
-		model: anthropicPayload.model,
-		messages: convertAnthropicMessages(anthropicPayload.messages),
-		stream: anthropicPayload.stream ?? false,
-		tools: anthropicPayload.tools?.map((t) => ({
-			name: t.name,
-			description: t.description
-		})),
-		max_tokens: anthropicPayload.max_tokens,
-		temperature: anthropicPayload.temperature,
-		system: extractSystemPrompt(anthropicPayload.system)
-	});
-	const { payload: openAIPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
-	consola.debug("Translated OpenAI request payload:", JSON.stringify(openAIPayload));
+	const trackingId = c.get("trackingId");
+	updateTrackerModel(trackingId, anthropicPayload.model);
+	const ctx = {
+		historyId: recordRequest("anthropic", {
+			model: anthropicPayload.model,
+			messages: convertAnthropicMessages(anthropicPayload.messages),
+			stream: anthropicPayload.stream ?? false,
+			tools: anthropicPayload.tools?.map((t) => ({
+				name: t.name,
+				description: t.description
+			})),
+			max_tokens: anthropicPayload.max_tokens,
+			temperature: anthropicPayload.temperature,
+			system: extractSystemPrompt(anthropicPayload.system)
+		}),
+		trackingId,
+		startTime
+	};
+	const { payload: translatedPayload, toolNameMapping } = translateToOpenAI(anthropicPayload);
+	consola.debug("Translated OpenAI request payload:", JSON.stringify(translatedPayload));
+	const selectedModel = state.models?.data.find((model) => model.id === translatedPayload.model);
+	const { finalPayload: openAIPayload, compactResult } = await buildFinalPayload(translatedPayload, selectedModel);
+	if (compactResult) ctx.compactResult = compactResult;
 	if (state.manualApprove) await awaitApproval();
 	try {
 		const response = await executeWithRateLimit(state, () => createChatCompletions(openAIPayload));
-		if (isNonStreaming(response)) {
-			consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
-			const anthropicResponse = translateToAnthropic(response, toolNameMapping);
-			consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
-			recordResponse(historyId, {
-				success: true,
-				model: anthropicResponse.model,
-				usage: anthropicResponse.usage,
-				stop_reason: anthropicResponse.stop_reason ?? void 0,
-				content: {
-					role: "assistant",
-					content: anthropicResponse.content.map((block) => {
-						if (block.type === "text") return {
-							type: "text",
-							text: block.text
-						};
-						if (block.type === "tool_use") return {
-							type: "tool_use",
-							id: block.id,
-							name: block.name,
-							input: JSON.stringify(block.input)
-						};
-						return { type: block.type };
-					})
-				},
-				toolCalls: extractToolCallsFromContent(anthropicResponse.content)
-			}, Date.now() - startTime);
-			return c.json(anthropicResponse);
-		}
+		if (isNonStreaming(response)) return handleNonStreamingResponse({
+			c,
+			response,
+			toolNameMapping,
+			ctx
+		});
 		consola.debug("Streaming response from Copilot");
+		updateTrackerStatus(trackingId, "streaming");
 		return streamSSE(c, async (stream) => {
-			const streamState = {
-				messageStartSent: false,
-				contentBlockIndex: 0,
-				contentBlockOpen: false,
-				toolCalls: {}
-			};
-			let streamModel = "";
-			let streamInputTokens = 0;
-			let streamOutputTokens = 0;
-			let streamStopReason = "";
-			let streamContent = "";
-			const streamToolCalls = [];
-			let currentToolCall = null;
-			try {
-				for await (const rawEvent of response) {
-					consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
-					if (rawEvent.data === "[DONE]") break;
-					if (!rawEvent.data) continue;
-					let chunk;
-					try {
-						chunk = JSON.parse(rawEvent.data);
-					} catch (parseError) {
-						consola.error("Failed to parse stream chunk:", parseError, rawEvent.data);
-						continue;
-					}
-					if (chunk.model && !streamModel) streamModel = chunk.model;
-					const events$1 = translateChunkToAnthropicEvents(chunk, streamState, toolNameMapping);
-					for (const event of events$1) {
-						consola.debug("Translated Anthropic event:", JSON.stringify(event));
-						switch (event.type) {
-							case "content_block_delta":
-								if ("text" in event.delta) streamContent += event.delta.text;
-								else if ("partial_json" in event.delta && currentToolCall) currentToolCall.input += event.delta.partial_json;
-								break;
-							case "content_block_start":
-								if (event.content_block.type === "tool_use") currentToolCall = {
-									id: event.content_block.id,
-									name: event.content_block.name,
-									input: ""
-								};
-								break;
-							case "content_block_stop":
-								if (currentToolCall) {
-									streamToolCalls.push(currentToolCall);
-									currentToolCall = null;
-								}
-								break;
-							case "message_delta":
-								if (event.delta.stop_reason) streamStopReason = event.delta.stop_reason;
-								if (event.usage) {
-									streamInputTokens = event.usage.input_tokens ?? 0;
-									streamOutputTokens = event.usage.output_tokens;
-								}
-								break;
-						}
-						await stream.writeSSE({
-							event: event.type,
-							data: JSON.stringify(event)
-						});
-					}
-				}
-				const contentBlocks = [];
-				if (streamContent) contentBlocks.push({
+			await handleStreamingResponse({
+				stream,
+				response,
+				toolNameMapping,
+				anthropicPayload,
+				ctx
+			});
+		});
+	} catch (error) {
+		recordErrorResponse(ctx, anthropicPayload.model, error);
+		throw error;
+	}
+}
+function updateTrackerModel(trackingId, model) {
+	if (!trackingId) return;
+	const request = requestTracker.getRequest(trackingId);
+	if (request) request.model = model;
+}
+async function buildFinalPayload(payload, model) {
+	if (!state.autoCompact || !model) {
+		if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
+		return {
+			finalPayload: payload,
+			compactResult: null
+		};
+	}
+	try {
+		const check = await checkNeedsCompaction(payload, model);
+		consola.info(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
+		if (!check.needed) return {
+			finalPayload: payload,
+			compactResult: null
+		};
+		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
+		const compactResult = await autoCompact(payload, model);
+		return {
+			finalPayload: compactResult.payload,
+			compactResult
+		};
+	} catch (error) {
+		consola.warn("Auto-compact failed, proceeding with original payload:", error);
+		return {
+			finalPayload: payload,
+			compactResult: null
+		};
+	}
+}
+function updateTrackerStatus(trackingId, status) {
+	if (!trackingId) return;
+	requestTracker.updateRequest(trackingId, { status });
+}
+function recordErrorResponse(ctx, model, error) {
+	recordResponse(ctx.historyId, {
+		success: false,
+		model,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: error instanceof Error ? error.message : "Unknown error",
+		content: null
+	}, Date.now() - ctx.startTime);
+}
+function handleNonStreamingResponse(opts) {
+	const { c, response, toolNameMapping, ctx } = opts;
+	consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
+	let anthropicResponse = translateToAnthropic(response, toolNameMapping);
+	consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
+	if (ctx.compactResult?.wasCompacted) {
+		const marker = createCompactionMarker(ctx.compactResult);
+		anthropicResponse = appendMarkerToAnthropicResponse(anthropicResponse, marker);
+	}
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: anthropicResponse.model,
+		usage: anthropicResponse.usage,
+		stop_reason: anthropicResponse.stop_reason ?? void 0,
+		content: {
+			role: "assistant",
+			content: anthropicResponse.content.map((block) => {
+				if (block.type === "text") return {
 					type: "text",
-					text: streamContent
-				});
-				for (const tc of streamToolCalls) contentBlocks.push({
+					text: block.text
+				};
+				if (block.type === "tool_use") return {
 					type: "tool_use",
-					...tc
-				});
-				recordResponse(historyId, {
-					success: true,
-					model: streamModel || anthropicPayload.model,
-					usage: {
-						input_tokens: streamInputTokens,
-						output_tokens: streamOutputTokens
-					},
-					stop_reason: streamStopReason || void 0,
-					content: contentBlocks.length > 0 ? {
-						role: "assistant",
-						content: contentBlocks
-					} : null,
-					toolCalls: streamToolCalls.length > 0 ? streamToolCalls.map((tc) => ({
-						id: tc.id,
-						name: tc.name,
-						input: tc.input
-					})) : void 0
-				}, Date.now() - startTime);
-			} catch (error) {
-				consola.error("Stream error:", error);
-				recordResponse(historyId, {
-					success: false,
-					model: streamModel || anthropicPayload.model,
-					usage: {
-						input_tokens: 0,
-						output_tokens: 0
-					},
-					error: error instanceof Error ? error.message : "Stream error",
-					content: null
-				}, Date.now() - startTime);
-				const errorEvent = translateErrorToAnthropicErrorEvent();
-				await stream.writeSSE({
-					event: errorEvent.type,
-					data: JSON.stringify(errorEvent)
-				});
-			}
+					id: block.id,
+					name: block.name,
+					input: JSON.stringify(block.input)
+				};
+				return { type: block.type };
+			})
+		},
+		toolCalls: extractToolCallsFromContent(anthropicResponse.content)
+	}, Date.now() - ctx.startTime);
+	if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
+		inputTokens: anthropicResponse.usage.input_tokens,
+		outputTokens: anthropicResponse.usage.output_tokens
+	});
+	return c.json(anthropicResponse);
+}
+function appendMarkerToAnthropicResponse(response, marker) {
+	const content = [...response.content];
+	const lastTextIndex = content.findLastIndex((block) => block.type === "text");
+	if (lastTextIndex !== -1) {
+		const textBlock = content[lastTextIndex];
+		if (textBlock.type === "text") content[lastTextIndex] = {
+			...textBlock,
+			text: textBlock.text + marker
+		};
+	} else content.push({
+		type: "text",
+		text: marker
+	});
+	return {
+		...response,
+		content
+	};
+}
+function createAnthropicStreamAccumulator() {
+	return {
+		model: "",
+		inputTokens: 0,
+		outputTokens: 0,
+		stopReason: "",
+		content: "",
+		toolCalls: [],
+		currentToolCall: null
+	};
+}
+async function handleStreamingResponse(opts) {
+	const { stream, response, toolNameMapping, anthropicPayload, ctx } = opts;
+	const streamState = {
+		messageStartSent: false,
+		contentBlockIndex: 0,
+		contentBlockOpen: false,
+		toolCalls: {}
+	};
+	const acc = createAnthropicStreamAccumulator();
+	try {
+		await processStreamChunks({
+			stream,
+			response,
+			toolNameMapping,
+			streamState,
+			acc
 		});
+		if (ctx.compactResult?.wasCompacted) {
+			const marker = createCompactionMarker(ctx.compactResult);
+			await sendCompactionMarkerEvent(stream, streamState, marker);
+			acc.content += marker;
+		}
+		recordStreamingResponse(acc, anthropicPayload.model, ctx);
+		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
 	} catch (error) {
-		recordResponse(historyId, {
-			success: false,
-			model: anthropicPayload.model,
-			usage: {
-				input_tokens: 0,
-				output_tokens: 0
-			},
-			error: error instanceof Error ? error.message : "Unknown error",
-			content: null
-		}, Date.now() - startTime);
-		throw error;
+		consola.error("Stream error:", error);
+		recordStreamingError({
+			acc,
+			fallbackModel: anthropicPayload.model,
+			ctx,
+			error
+		});
+		failTracking(ctx.trackingId, error);
+		const errorEvent = translateErrorToAnthropicErrorEvent();
+		await stream.writeSSE({
+			event: errorEvent.type,
+			data: JSON.stringify(errorEvent)
+		});
+	}
+}
+async function sendCompactionMarkerEvent(stream, streamState, marker) {
+	const blockStartEvent = {
+		type: "content_block_start",
+		index: streamState.contentBlockIndex,
+		content_block: {
+			type: "text",
+			text: ""
+		}
+	};
+	await stream.writeSSE({
+		event: "content_block_start",
+		data: JSON.stringify(blockStartEvent)
+	});
+	const deltaEvent = {
+		type: "content_block_delta",
+		index: streamState.contentBlockIndex,
+		delta: {
+			type: "text_delta",
+			text: marker
+		}
+	};
+	await stream.writeSSE({
+		event: "content_block_delta",
+		data: JSON.stringify(deltaEvent)
+	});
+	const blockStopEvent = {
+		type: "content_block_stop",
+		index: streamState.contentBlockIndex
+	};
+	await stream.writeSSE({
+		event: "content_block_stop",
+		data: JSON.stringify(blockStopEvent)
+	});
+	streamState.contentBlockIndex++;
+}
+async function processStreamChunks(opts) {
+	const { stream, response, toolNameMapping, streamState, acc } = opts;
+	for await (const rawEvent of response) {
+		consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
+		if (rawEvent.data === "[DONE]") break;
+		if (!rawEvent.data) continue;
+		let chunk;
+		try {
+			chunk = JSON.parse(rawEvent.data);
+		} catch (parseError) {
+			consola.error("Failed to parse stream chunk:", parseError, rawEvent.data);
+			continue;
+		}
+		if (chunk.model && !acc.model) acc.model = chunk.model;
+		const events$1 = translateChunkToAnthropicEvents(chunk, streamState, toolNameMapping);
+		for (const event of events$1) {
+			consola.debug("Translated Anthropic event:", JSON.stringify(event));
+			processAnthropicEvent(event, acc);
+			await stream.writeSSE({
+				event: event.type,
+				data: JSON.stringify(event)
+			});
+		}
 	}
 }
+function processAnthropicEvent(event, acc) {
+	switch (event.type) {
+		case "content_block_delta":
+			handleContentBlockDelta(event.delta, acc);
+			break;
+		case "content_block_start":
+			handleContentBlockStart(event.content_block, acc);
+			break;
+		case "content_block_stop":
+			handleContentBlockStop(acc);
+			break;
+		case "message_delta":
+			handleMessageDelta(event.delta, event.usage, acc);
+			break;
+		default: break;
+	}
+}
+function handleContentBlockDelta(delta, acc) {
+	if (delta.type === "text_delta") acc.content += delta.text;
+	else if (delta.type === "input_json_delta" && acc.currentToolCall) acc.currentToolCall.input += delta.partial_json;
+}
+function handleContentBlockStart(block, acc) {
+	if (block.type === "tool_use") acc.currentToolCall = {
+		id: block.id,
+		name: block.name,
+		input: ""
+	};
+}
+function handleContentBlockStop(acc) {
+	if (acc.currentToolCall) {
+		acc.toolCalls.push(acc.currentToolCall);
+		acc.currentToolCall = null;
+	}
+}
+function handleMessageDelta(delta, usage, acc) {
+	if (delta.stop_reason) acc.stopReason = delta.stop_reason;
+	if (usage) {
+		acc.inputTokens = usage.input_tokens ?? 0;
+		acc.outputTokens = usage.output_tokens;
+	}
+}
+function recordStreamingResponse(acc, fallbackModel, ctx) {
+	const contentBlocks = [];
+	if (acc.content) contentBlocks.push({
+		type: "text",
+		text: acc.content
+	});
+	for (const tc of acc.toolCalls) contentBlocks.push({
+		type: "tool_use",
+		...tc
+	});
+	recordResponse(ctx.historyId, {
+		success: true,
+		model: acc.model || fallbackModel,
+		usage: {
+			input_tokens: acc.inputTokens,
+			output_tokens: acc.outputTokens
+		},
+		stop_reason: acc.stopReason || void 0,
+		content: contentBlocks.length > 0 ? {
+			role: "assistant",
+			content: contentBlocks
+		} : null,
+		toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
+	}, Date.now() - ctx.startTime);
+}
+function recordStreamingError(opts) {
+	const { acc, fallbackModel, ctx, error } = opts;
+	recordResponse(ctx.historyId, {
+		success: false,
+		model: acc.model || fallbackModel,
+		usage: {
+			input_tokens: 0,
+			output_tokens: 0
+		},
+		error: error instanceof Error ? error.message : "Stream error",
+		content: null
+	}, Date.now() - ctx.startTime);
+}
+function completeTracking(trackingId, inputTokens, outputTokens) {
+	if (!trackingId) return;
+	requestTracker.updateRequest(trackingId, {
+		inputTokens,
+		outputTokens
+	});
+	requestTracker.completeRequest(trackingId, 200, {
+		inputTokens,
+		outputTokens
+	});
+}
+function failTracking(trackingId, error) {
+	if (!trackingId) return;
+	requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
+}
 function convertAnthropicMessages(messages) {
 	return messages.map((msg) => {
 		if (typeof msg.content === "string") return {
@@ -3025,7 +4290,21 @@ modelRoutes.get("/", async (c) => {
 			created: 0,
 			created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
 			owned_by: model.vendor,
-			display_name: model.name
+			display_name: model.name,
+			capabilities: {
+				family: model.capabilities.family,
+				type: model.capabilities.type,
+				tokenizer: model.capabilities.tokenizer,
+				limits: {
+					max_context_window_tokens: model.capabilities.limits.max_context_window_tokens,
+					max_output_tokens: model.capabilities.limits.max_output_tokens,
+					max_prompt_tokens: model.capabilities.limits.max_prompt_tokens
+				},
+				supports: {
+					tool_calls: model.capabilities.supports.tool_calls,
+					parallel_tool_calls: model.capabilities.supports.parallel_tool_calls
+				}
+			}
 		}));
 		return c.json({
 			object: "list",
@@ -3063,7 +4342,7 @@ usageRoute.get("/", async (c) => {
 //#endregion
 //#region src/server.ts
 const server = new Hono();
-server.use(logger());
+server.use(tuiLogger());
 server.use(cors());
 server.get("/", (c) => c.text("Server running"));
 server.get("/health", (c) => {
@@ -3103,8 +4382,17 @@ async function runServer(options) {
 	state.rateLimitSeconds = options.rateLimit;
 	state.rateLimitWait = options.rateLimitWait;
 	state.showToken = options.showToken;
+	state.autoCompact = options.autoCompact;
+	if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
 	initHistory(options.history, options.historyLimit);
-	if (options.history) consola.info(`History recording enabled (max ${options.historyLimit} entries)`);
+	if (options.history) {
+		const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
+		consola.info(`History recording enabled (${limitText} entries)`);
+	}
+	initTui({
+		enabled: true,
+		mode: options.tui
+	});
 	await ensurePaths();
 	await cacheVSCodeVersion();
 	if (options.githubToken) {
@@ -3224,7 +4512,17 @@ const start = defineCommand({
 		"history-limit": {
 			type: "string",
 			default: "1000",
-			description: "Maximum number of history entries to keep in memory"
+			description: "Maximum number of history entries to keep in memory (0 = unlimited)"
+		},
+		tui: {
+			type: "string",
+			default: "console",
+			description: "TUI mode: 'console' for simple log output, 'fullscreen' for interactive terminal UI with tabs"
+		},
+		"auto-compact": {
+			type: "boolean",
+			default: false,
+			description: "Automatically compress conversation history when exceeding model token limits"
 		}
 	},
 	run({ args }) {
@@ -3243,7 +4541,9 @@ const start = defineCommand({
 			showToken: args["show-token"],
 			proxyEnv: args["proxy-env"],
 			history: args.history,
-			historyLimit: Number.parseInt(args["history-limit"], 10)
+			historyLimit: Number.parseInt(args["history-limit"], 10),
+			tui: args.tui,
+			autoCompact: args["auto-compact"]
 		});
 	}
 });