npm - @hsupu/copilot-api - Versions diffs - 0.7.23 → 0.8.1-beta.1 - Mend

@hsupu/copilot-api 0.7.23 → 0.8.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/config.example.yaml +4 -0
package/dist/dist-8vhX0s4a.mjs +429 -0
package/dist/dist-8vhX0s4a.mjs.map +1 -0
package/dist/main.mjs +884 -248
package/dist/main.mjs.map +1 -1
package/package.json +1 -3
package/ui/history-v3/dist/assets/DashboardPage-B1uByGH4.js +1 -0
package/ui/history-v3/dist/assets/DashboardPage-CabImjCg.css +1 -0
package/ui/history-v3/dist/assets/HistoryPage-CcuFI_7q.js +3 -0
package/ui/history-v3/dist/assets/HistoryPage-MBmz_wrp.css +1 -0
package/ui/history-v3/dist/assets/LogsPage-CTboGme2.js +1 -0
package/ui/history-v3/dist/assets/LogsPage-DnfMvMyI.css +1 -0
package/ui/history-v3/dist/assets/ModelsPage-BCijEIH1.css +1 -0
package/ui/history-v3/dist/assets/ModelsPage-B_sjc_hg.js +1 -0
package/ui/history-v3/dist/assets/ProgressBar-BQxTnwIj.js +1 -0
package/ui/history-v3/dist/assets/ProgressBar-B_0VSeE9.css +1 -0
package/ui/history-v3/dist/assets/UsagePage-Nx_clyfV.css +1 -0
package/ui/history-v3/dist/assets/UsagePage-Nxy64EGo.js +1 -0
package/ui/history-v3/dist/assets/index-D5HcAJB-.css +1 -0
package/ui/history-v3/dist/assets/index-DOp9l2lW.js +2 -0
package/ui/history-v3/dist/assets/useFormatters-BBIZmSf2.js +1 -0
package/ui/history-v3/dist/assets/{vendor-BGG3lVOP.js → vendor-BJ2Uq5vY.js} +1 -1
package/ui/history-v3/dist/assets/vue-CYcm9SLm.js +1 -0
package/ui/history-v3/dist/index.html +3 -4
package/ui/history-v1/index.html +0 -149
package/ui/history-v1/script.js +0 -1799
package/ui/history-v1/styles.css +0 -1467
package/ui/history-v3/dist/assets/index-CaOzq3V0.js +0 -3
package/ui/history-v3/dist/assets/index-Dfh3zN1X.css +0 -1
package/ui/history-v3/dist/assets/vue-CJ6HbDRX.js +0 -1

package/dist/main.mjs CHANGED Viewed

@@ -14,7 +14,6 @@ import { randomBytes, randomUUID } from "node:crypto";
 import pc from "picocolors";
 import { existsSync, promises, readFileSync, readdirSync } from "node:fs";
 import invariant from "tiny-invariant";
-import { serve } from "srvx";
 import { Hono } from "hono";
 import { events } from "fetch-event-stream";
 import { cors } from "hono/cors";
@@ -22,6 +21,12 @@ import { trimTrailingSlash } from "hono/trailing-slash";
 import { streamSSE } from "hono/streaming";
 //#region src/lib/state.ts
+/** Epoch ms when the server started (set once in runServer) */
+let serverStartTime = 0;
+/** Set the server start time (called once from runServer) */
+function setServerStartTime(ts) {
+	serverStartTime = ts;
+}
 /**
 * Rebuild model lookup indexes from state.models.
 * Called by cacheModels() in production; call directly in tests after setting state.models.
@@ -42,6 +47,7 @@ const state = {
 	compressToolResultsBeforeTruncate: true,
 	contextEditingMode: "off",
 	stripServerTools: false,
+	immutableThinkingMessages: false,
 	dedupToolCalls: false,
 	fetchTimeout: 300,
 	historyLimit: 200,
@@ -77,16 +83,15 @@ function generateId(randomLength = 7) {
 }
 //#endregion
-//#region src/lib/history/ws.ts
-/**
-* WebSocket support for History API.
-* Enables real-time updates when new requests are recorded.
-*/
-/** Track connected WebSocket clients */
-const clients = /* @__PURE__ */ new Set();
-/** Register a new WebSocket client and send connection confirmation */
+//#region src/lib/ws/index.ts
+/** Connected clients indexed by their raw WebSocket instance */
+const clients = /* @__PURE__ */ new Map();
+/** Register a new WebSocket client (starts with no topic subscriptions = receive all) */
 function addClient(ws) {
-	clients.add(ws);
+	clients.set(ws, {
+		ws,
+		topics: /* @__PURE__ */ new Set()
+	});
 	const msg = {
 		type: "connected",
 		data: { clientCount: clients.size },
@@ -104,19 +109,41 @@ function getClientCount() {
 }
 /** Close all connected WebSocket clients */
 function closeAllClients() {
-	for (const client of clients) try {
-		client.close(1001, "Server shutting down");
+	for (const { ws } of clients.values()) try {
+		ws.close(1001, "Server shutting down");
 	} catch {}
 	clients.clear();
 }
-function broadcast(message) {
+/** Handle an incoming message from a client (topic subscription) */
+function handleClientMessage(ws, data) {
+	try {
+		const parsed = JSON.parse(data);
+		if (!isSubscribeMessage(parsed)) return;
+		const client = clients.get(ws);
+		if (!client) return;
+		client.topics = new Set(parsed.topics);
+		consola.debug(`[WS] Client subscribed to topics: [${[...client.topics].join(", ")}]`);
+	} catch {}
+}
+/**
+* Broadcast a message to clients subscribed to a specific topic.
+*
+* - Clients with no subscriptions (empty topics) receive the message (wildcard).
+* - Clients subscribed to the given topic receive the message.
+* - Clients subscribed to other topics (but not this one) are skipped.
+*/
+function broadcast(message, topic) {
+	if (clients.size === 0) return;
 	const data = JSON.stringify(message);
-	for (const client of clients) try {
-		if (client.readyState === WebSocket.OPEN) client.send(data);
-		else clients.delete(client);
-	} catch (error) {
-		consola.debug("WebSocket send failed, removing client:", error);
-		clients.delete(client);
+	for (const [rawWs, client] of clients) {
+		if (client.topics.size > 0 && !client.topics.has(topic)) continue;
+		try {
+			if (rawWs.readyState === WebSocket.OPEN) rawWs.send(data);
+			else clients.delete(rawWs);
+		} catch (error) {
+			consola.debug("WebSocket send failed, removing client:", error);
+			clients.delete(rawWs);
+		}
 	}
 }
 /** Called when a new entry is recorded */
@@ -126,7 +153,7 @@ function notifyEntryAdded(summary) {
 		type: "entry_added",
 		data: summary,
 		timestamp: Date.now()
-	});
+	}, "history");
 }
 /** Called when an entry is updated (e.g., response received) */
 function notifyEntryUpdated(summary) {
@@ -135,7 +162,7 @@ function notifyEntryUpdated(summary) {
 		type: "entry_updated",
 		data: summary,
 		timestamp: Date.now()
-	});
+	}, "history");
 }
 /** Called when stats change */
 function notifyStatsUpdated(stats) {
@@ -144,7 +171,7 @@ function notifyStatsUpdated(stats) {
 		type: "stats_updated",
 		data: stats,
 		timestamp: Date.now()
-	});
+	}, "history");
 }
 /** Called when all history is cleared */
 function notifyHistoryCleared() {
@@ -153,7 +180,7 @@ function notifyHistoryCleared() {
 		type: "history_cleared",
 		data: null,
 		timestamp: Date.now()
-	});
+	}, "history");
 }
 /** Called when a session is deleted */
 function notifySessionDeleted(sessionId) {
@@ -162,7 +189,65 @@ function notifySessionDeleted(sessionId) {
 		type: "session_deleted",
 		data: { sessionId },
 		timestamp: Date.now()
-	});
+	}, "history");
+}
+/** Called when active request state changes (topic: "requests") */
+function notifyActiveRequestChanged(data) {
+	if (clients.size === 0) return;
+	broadcast({
+		type: "active_request_changed",
+		data,
+		timestamp: Date.now()
+	}, "requests");
+}
+/** Called when rate limiter state changes (topic: "status") */
+function notifyRateLimiterChanged(data) {
+	if (clients.size === 0) return;
+	broadcast({
+		type: "rate_limiter_changed",
+		data,
+		timestamp: Date.now()
+	}, "status");
+}
+/** Called when shutdown phase changes (topic: "status") */
+function notifyShutdownPhaseChanged(data) {
+	if (clients.size === 0) return;
+	broadcast({
+		type: "shutdown_phase_changed",
+		data,
+		timestamp: Date.now()
+	}, "status");
+}
+/**
+* Initialize the global WebSocket endpoint at `/ws`.
+* Registers the route on the root Hono app using the shared WebSocket adapter.
+*
+* @param rootApp - The root Hono app instance
+* @param upgradeWs - Shared WebSocket upgrade function from createWebSocketAdapter
+*/
+function initWebSocket(rootApp, upgradeWs) {
+	rootApp.get("/ws", upgradeWs(() => ({
+		onOpen(_event, ws) {
+			addClient(ws.raw);
+		},
+		onClose(_event, ws) {
+			removeClient(ws.raw);
+		},
+		onMessage(event, ws) {
+			const raw = typeof event.data === "string" ? event.data : String(event.data);
+			handleClientMessage(ws.raw, raw);
+		},
+		onError(event, ws) {
+			consola.debug("WebSocket error:", event);
+			removeClient(ws.raw);
+		}
+	})));
+}
+/** Type guard for subscribe messages from the client */
+function isSubscribeMessage(value) {
+	if (typeof value !== "object" || value === null) return false;
+	const msg = value;
+	return msg.type === "subscribe" && Array.isArray(msg.topics);
 }
 //#endregion
@@ -449,7 +534,9 @@ function updateEntry(id, update) {
 	if (update.pipelineInfo) entry.pipelineInfo = update.pipelineInfo;
 	if (update.durationMs !== void 0) entry.durationMs = update.durationMs;
 	if (update.sseEvents) entry.sseEvents = update.sseEvents;
-	if (update.httpHeaders) entry.httpHeaders = update.httpHeaders;
+	if (update.effectiveRequest) entry.effectiveRequest = update.effectiveRequest;
+	if (update.wireRequest) entry.wireRequest = update.wireRequest;
+	if (update.attempts) entry.attempts = update.attempts;
 	if (update.response) {
 		const session = historyState.sessions.get(entry.sessionId);
 		if (session) {
@@ -468,13 +555,12 @@ function getEntry(id) {
 	return entryIndex.get(id) ?? historyState.entries.find((e) => e.id === id);
 }
 /**
-* Efficient summary-only query for list views. Filters and paginates using
-* the lightweight summaryIndex instead of full entries.
-* Search matches against the pre-computed `searchText` field — O(n) string
-* includes instead of O(n*m*b) deep content block traversal.
+* Efficient summary-only query for list views with cursor-based pagination.
+* Filters using the lightweight summaryIndex. Search matches against the
+* pre-computed `searchText` field.
 */
 function getHistorySummaries(options = {}) {
-	const { page = 1, limit = 50, model, endpoint, success, from, to, search, sessionId } = options;
+	const { cursor, limit = 50, direction = "older", model, endpoint, success, from, to, search, sessionId } = options;
 	let summaries = Array.from(summaryIndex.values());
 	if (sessionId) summaries = summaries.filter((s) => s.sessionId === sessionId);
 	if (model) {
@@ -495,16 +581,19 @@ function getHistorySummaries(options = {}) {
 			return s.searchText.includes(needle);
 		});
 	}
-	summaries.sort((a, b) => b.timestamp - a.timestamp);
+	summaries.sort((a, b) => b.timestamp - a.timestamp || b.id.localeCompare(a.id));
 	const total = summaries.length;
-	const totalPages = Math.ceil(total / limit);
-	const start = (page - 1) * limit;
+	let startIdx = 0;
+	if (cursor) {
+		const cursorIdx = summaries.findIndex((s) => s.id === cursor);
+		if (cursorIdx !== -1) startIdx = direction === "older" ? cursorIdx + 1 : Math.max(0, cursorIdx - limit);
+	}
+	const entries = summaries.slice(startIdx, startIdx + limit);
 	return {
-		entries: summaries.slice(start, start + limit),
+		entries,
 		total,
-		page,
-		limit,
-		totalPages
+		nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
+		prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
 	};
 }
 function getSessions() {
@@ -518,17 +607,20 @@ function getSession(id) {
 	return historyState.sessions.get(id);
 }
 function getSessionEntries(sessionId, options = {}) {
-	const { page = 1, limit = 50 } = options;
+	const { cursor, limit = 50 } = options;
 	const all = historyState.entries.filter((e) => e.sessionId === sessionId).sort((a, b) => a.timestamp - b.timestamp);
 	const total = all.length;
-	const totalPages = Math.max(1, Math.ceil(total / limit));
-	const start = (page - 1) * limit;
+	let startIdx = 0;
+	if (cursor) {
+		const cursorIdx = all.findIndex((e) => e.id === cursor);
+		if (cursorIdx !== -1) startIdx = cursorIdx + 1;
+	}
+	const entries = all.slice(startIdx, startIdx + limit);
 	return {
-		entries: all.slice(start, start + limit),
+		entries,
 		total,
-		page,
-		limit,
-		totalPages
+		nextCursor: startIdx + limit < total ? entries.at(-1)?.id ?? null : null,
+		prevCursor: startIdx > 0 ? entries[0]?.id ?? null : null
 	};
 }
 function clearHistory() {
@@ -758,6 +850,16 @@ function stopMemoryPressureMonitor() {
 		timer = null;
 	}
 }
+/** Get memory pressure diagnostics */
+function getMemoryPressureStats() {
+	const { heapUsed } = process.memoryUsage();
+	return {
+		totalEvictedCount,
+		currentMaxEntries: historyState.maxEntries,
+		heapUsedMB: Math.round(heapUsed / 1024 / 1024),
+		heapLimitMB: resolvedHeapLimit ? Math.round(resolvedHeapLimit / 1024 / 1024) : null
+	};
+}
 //#endregion
 //#region src/lib/config/paths.ts
@@ -882,6 +984,7 @@ async function applyConfigToState() {
 	if (config.anthropic) {
 		const a = config.anthropic;
 		if (a.strip_server_tools !== void 0) state.stripServerTools = a.strip_server_tools;
+		if (a.immutable_thinking_messages !== void 0) state.immutableThinkingMessages = a.immutable_thinking_messages;
 		if (a.dedup_tool_calls !== void 0) state.dedupToolCalls = a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls;
 		if (a.strip_read_tool_result_tags !== void 0) state.stripReadToolResultTags = a.strip_read_tool_result_tags;
 		if (a.context_editing !== void 0) state.contextEditingMode = a.context_editing;
@@ -2791,6 +2894,12 @@ const checkUsage = defineCommand({
 //#endregion
 //#region src/lib/fetch-utils.ts
+const SENSITIVE_HEADER_NAMES = new Set([
+	"authorization",
+	"proxy-authorization",
+	"x-api-key",
+	"api-key"
+]);
 /**
 * Create an AbortSignal for fetch timeout if configured.
 * Controls the time from request start to receiving response headers.
@@ -2805,9 +2914,13 @@ function createFetchSignal() {
 * so headers are captured even for error responses.
 */
 function captureHttpHeaders(capture, requestHeaders, response) {
-	capture.request = { ...requestHeaders };
+	capture.request = sanitizeHeadersForHistory(requestHeaders);
 	capture.response = Object.fromEntries(response.headers.entries());
 }
+/** Return a copy of headers safe to persist in history/error artifacts. */
+function sanitizeHeadersForHistory(headers) {
+	return Object.fromEntries(Object.entries(headers).map(([name, value]) => [name, SENSITIVE_HEADER_NAMES.has(name.toLowerCase()) ? "***" : value]));
+}
 //#endregion
 //#region src/lib/models/client.ts
@@ -3100,10 +3213,18 @@ var AdaptiveRateLimiter = class {
 	*/
 	enterRateLimitedMode() {
 		if (this.mode === "rate-limited") return;
+		const previousMode = this.mode;
 		this.mode = "rate-limited";
 		this.rateLimitedAt = Date.now();
 		this.consecutiveSuccesses = 0;
 		consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
+		notifyRateLimiterChanged({
+			mode: this.mode,
+			previousMode,
+			queueLength: this.queue.length,
+			consecutiveSuccesses: this.consecutiveSuccesses,
+			rateLimitedAt: this.rateLimitedAt
+		});
 	}
 	/**
 	* Check if we should try to recover to normal mode
@@ -3125,20 +3246,36 @@ var AdaptiveRateLimiter = class {
 	* Start gradual recovery mode
 	*/
 	startGradualRecovery() {
+		const previousMode = this.mode;
 		this.mode = "recovering";
 		this.recoveryStepIndex = 0;
 		this.rateLimitedAt = null;
 		this.consecutiveSuccesses = 0;
 		const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
 		consola.info(`[RateLimiter] Starting ramp-up (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
+		notifyRateLimiterChanged({
+			mode: this.mode,
+			previousMode,
+			queueLength: this.queue.length,
+			consecutiveSuccesses: this.consecutiveSuccesses,
+			rateLimitedAt: this.rateLimitedAt
+		});
 	}
 	/**
 	* Complete recovery to normal mode
 	*/
 	completeRecovery() {
+		const previousMode = this.mode;
 		this.mode = "normal";
 		this.recoveryStepIndex = 0;
 		consola.success("[RateLimiter] Exiting rate-limited mode.");
+		notifyRateLimiterChanged({
+			mode: this.mode,
+			previousMode,
+			queueLength: this.queue.length,
+			consecutiveSuccesses: this.consecutiveSuccesses,
+			rateLimitedAt: this.rateLimitedAt
+		});
 	}
 	/**
 	* Enqueue a request for later execution
@@ -3256,6 +3393,10 @@ var AdaptiveRateLimiter = class {
 			rateLimitedAt: this.rateLimitedAt
 		};
 	}
+	/** Get the effective configuration */
+	getConfig() {
+		return { ...this.config };
+	}
 };
 /** Singleton instance */
 let rateLimiterInstance = null;
@@ -3490,10 +3631,8 @@ function createRequestContext(opts) {
 	let _originalRequest = null;
 	let _response = null;
 	let _pipelineInfo = null;
-	let _preprocessInfo = null;
 	let _sseEvents = null;
 	let _httpHeaders = null;
-	const _sanitizationHistory = [];
 	let _queueWaitMs = 0;
 	const _attempts = [];
 	/** Guard: once complete() or fail() is called, subsequent calls are no-ops */
@@ -3526,9 +3665,6 @@ function createRequestContext(opts) {
 		get pipelineInfo() {
 			return _pipelineInfo;
 		},
-		get preprocessInfo() {
-			return _preprocessInfo;
-		},
 		get httpHeaders() {
 			return _httpHeaders;
 		},
@@ -3549,18 +3685,8 @@ function createRequestContext(opts) {
 				field: "originalRequest"
 			});
 		},
-		setPreprocessInfo(info) {
-			_preprocessInfo = info;
-		},
-		addSanitizationInfo(info) {
-			_sanitizationHistory.push(info);
-		},
 		setPipelineInfo(info) {
-			_pipelineInfo = {
-				..._preprocessInfo && { preprocessing: _preprocessInfo },
-				..._sanitizationHistory.length > 0 && { sanitization: _sanitizationHistory },
-				...info
-			};
+			_pipelineInfo = info;
 			emit({
 				type: "updated",
 				context: ctx,
@@ -3580,6 +3706,7 @@ function createRequestContext(opts) {
 			const attempt = {
 				index: _attempts.length,
 				effectiveRequest: null,
+				wireRequest: null,
 				response: null,
 				error: null,
 				strategy: attemptOpts.strategy,
@@ -3603,6 +3730,10 @@ function createRequestContext(opts) {
 			const attempt = ctx.currentAttempt;
 			if (attempt) attempt.effectiveRequest = req;
 		},
+		setAttemptWireRequest(req) {
+			const attempt = ctx.currentAttempt;
+			if (attempt) attempt.wireRequest = req;
+		},
 		setAttemptResponse(response) {
 			const attempt = ctx.currentAttempt;
 			if (attempt) {
@@ -3643,24 +3774,6 @@ function createRequestContext(opts) {
 				entry: ctx.toHistoryEntry()
 			});
 		},
-		completeFromStream(acc) {
-			const response = {
-				success: true,
-				model: acc.model,
-				usage: {
-					input_tokens: acc.inputTokens,
-					output_tokens: acc.outputTokens,
-					...acc.cacheReadTokens > 0 && { cache_read_input_tokens: acc.cacheReadTokens },
-					...acc.cacheCreationTokens > 0 && { cache_creation_input_tokens: acc.cacheCreationTokens }
-				},
-				content: acc.contentBlocks.length > 0 ? {
-					role: "assistant",
-					content: acc.contentBlocks
-				} : null,
-				stop_reason: acc.stopReason || void 0
-			};
-			ctx.complete(response);
-		},
 		fail(model, error) {
 			if (settled) return;
 			settled = true;
@@ -3688,6 +3801,7 @@ function createRequestContext(opts) {
 			});
 		},
 		toHistoryEntry() {
+			const p = _originalRequest?.payload;
 			const entry = {
 				id,
 				endpoint: opts.endpoint,
@@ -3698,7 +3812,10 @@ function createRequestContext(opts) {
 					messages: _originalRequest?.messages,
 					stream: _originalRequest?.stream,
 					tools: _originalRequest?.tools,
-					system: _originalRequest?.system
+					system: _originalRequest?.system,
+					max_tokens: typeof p?.max_tokens === "number" ? p.max_tokens : void 0,
+					temperature: typeof p?.temperature === "number" ? p.temperature : void 0,
+					thinking: p?.thinking ?? void 0
 				}
 			};
 			if (_response) entry.response = _response;
@@ -3707,12 +3824,38 @@ function createRequestContext(opts) {
 			if (_pipelineInfo) entry.pipelineInfo = _pipelineInfo;
 			if (_sseEvents) entry.sseEvents = _sseEvents;
 			if (_httpHeaders) entry.httpHeaders = _httpHeaders;
-			if (_attempts.length > 1) entry.attempts = _attempts.map((a) => ({
+			const finalAttempt = _attempts.at(-1);
+			if (finalAttempt?.effectiveRequest) {
+				const ep = finalAttempt.effectiveRequest;
+				entry.effectiveRequest = {
+					model: ep.model,
+					format: ep.format,
+					messageCount: ep.messages.length,
+					messages: ep.messages,
+					system: ep.payload?.system,
+					payload: ep.payload
+				};
+			}
+			if (finalAttempt?.wireRequest) {
+				const wp = finalAttempt.wireRequest;
+				entry.wireRequest = {
+					model: wp.model,
+					format: wp.format,
+					messageCount: wp.messages.length,
+					messages: wp.messages,
+					system: wp.payload?.system,
+					payload: wp.payload,
+					headers: wp.headers
+				};
+			}
+			if (_attempts.length > 0) entry.attempts = _attempts.map((a) => ({
 				index: a.index,
 				strategy: a.strategy,
 				durationMs: a.durationMs,
 				error: a.error?.message,
-				truncation: a.truncation
+				truncation: a.truncation,
+				sanitization: a.sanitization,
+				effectiveMessageCount: a.effectiveRequest?.messages?.length
 			}));
 			return entry;
 		}
@@ -3779,12 +3922,19 @@ function createRequestContextManager() {
 		const { type, context } = rawEvent;
 		switch (type) {
 			case "state_changed":
-				if (rawEvent.previousState) emit({
-					type: "state_changed",
-					context,
-					previousState: rawEvent.previousState,
-					meta: rawEvent.meta
-				});
+				if (rawEvent.previousState) {
+					emit({
+						type: "state_changed",
+						context,
+						previousState: rawEvent.previousState,
+						meta: rawEvent.meta
+					});
+					notifyActiveRequestChanged({
+						action: "state_changed",
+						request: summarizeContext(context),
+						activeCount: activeContexts.size
+					});
+				}
 				break;
 			case "updated":
 				if (rawEvent.field) emit({
@@ -3800,6 +3950,11 @@ function createRequestContextManager() {
 					entry: rawEvent.entry
 				});
 				activeContexts.delete(context.id);
+				notifyActiveRequestChanged({
+					action: "completed",
+					requestId: context.id,
+					activeCount: activeContexts.size
+				});
 				break;
 			case "failed":
 				if (rawEvent.entry) emit({
@@ -3808,10 +3963,30 @@ function createRequestContextManager() {
 					entry: rawEvent.entry
 				});
 				activeContexts.delete(context.id);
+				notifyActiveRequestChanged({
+					action: "failed",
+					requestId: context.id,
+					activeCount: activeContexts.size
+				});
 				break;
 			default: break;
 		}
 	}
+	/** Build a lightweight summary of a context for WS broadcast */
+	function summarizeContext(ctx) {
+		return {
+			id: ctx.id,
+			endpoint: ctx.endpoint,
+			state: ctx.state,
+			startTime: ctx.startTime,
+			durationMs: ctx.durationMs,
+			model: ctx.originalRequest?.model,
+			stream: ctx.originalRequest?.stream,
+			attemptCount: ctx.attempts.length,
+			currentStrategy: ctx.currentAttempt?.strategy,
+			queueWaitMs: ctx.queueWaitMs
+		};
+	}
 	return {
 		create(opts) {
 			const ctx = createRequestContext({
@@ -3824,6 +3999,11 @@ function createRequestContextManager() {
 				type: "created",
 				context: ctx
 			});
+			notifyActiveRequestChanged({
+				action: "created",
+				request: summarizeContext(ctx),
+				activeCount: activeContexts.size
+			});
 			return ctx;
 		},
 		get(id) {
@@ -3857,10 +4037,26 @@ let serverInstance = null;
 let _isShuttingDown = false;
 let shutdownResolve = null;
 let shutdownAbortController = null;
+let shutdownDrainAbortController = null;
+let shutdownPhase = "idle";
+let shutdownPromise = null;
+/** Transition shutdown phase and broadcast via WebSocket */
+function setPhase(phase) {
+	const prev = shutdownPhase;
+	shutdownPhase = phase;
+	if (prev !== phase) notifyShutdownPhaseChanged({
+		phase,
+		previousPhase: prev
+	});
+}
 /** Check if the server is in shutdown state (used by middleware to reject new requests) */
 function getIsShuttingDown() {
 	return _isShuttingDown;
 }
+/** Get the current shutdown phase */
+function getShutdownPhase() {
+	return shutdownPhase;
+}
 /**
 * Get the shutdown abort signal.
 * Returns undefined before shutdown starts. During Phase 1–2 the signal is
@@ -3900,9 +4096,11 @@ function formatActiveRequestsSummary(requests) {
 async function drainActiveRequests(timeoutMs, tracker, opts) {
 	const pollInterval = opts?.pollIntervalMs ?? DRAIN_POLL_INTERVAL_MS;
 	const progressInterval = opts?.progressIntervalMs ?? DRAIN_PROGRESS_INTERVAL_MS;
+	const abortSignal = opts?.abortSignal;
 	const deadline = Date.now() + timeoutMs;
 	let lastProgressLog = 0;
 	while (Date.now() < deadline) {
+		if (abortSignal?.aborted) return "aborted";
 		const active = tracker.getActiveRequests();
 		if (active.length === 0) return "drained";
 		const now = Date.now();
@@ -3910,7 +4108,23 @@ async function drainActiveRequests(timeoutMs, tracker, opts) {
 			lastProgressLog = now;
 			consola.info(formatActiveRequestsSummary(active));
 		}
-		await new Promise((resolve) => setTimeout(resolve, pollInterval));
+		if (await new Promise((resolve) => {
+			let settled = false;
+			let onAbort;
+			const finish = (value) => {
+				if (settled) return;
+				settled = true;
+				if (abortSignal && onAbort) abortSignal.removeEventListener("abort", onAbort);
+				resolve(value);
+			};
+			const timeoutId = setTimeout(() => finish("timer"), pollInterval);
+			if (!abortSignal) return;
+			onAbort = () => {
+				clearTimeout(timeoutId);
+				finish("aborted");
+			};
+			abortSignal.addEventListener("abort", onAbort, { once: true });
+		}) === "aborted") return "aborted";
 	}
 	return "timeout";
 }
@@ -3935,6 +4149,7 @@ async function gracefulShutdown(signal, deps) {
 	};
 	_isShuttingDown = true;
 	shutdownAbortController = new AbortController();
+	setPhase("phase1");
 	consola.info(`Received ${signal}, shutting down gracefully...`);
 	try {
 		(deps?.contextManager ?? getRequestContextManager()).stopReaper();
@@ -3959,8 +4174,13 @@ async function gracefulShutdown(signal, deps) {
 	const activeCount = tracker.getActiveRequests().length;
 	if (activeCount > 0) {
 		consola.info(`Phase 2: Waiting up to ${gracefulWaitMs / 1e3}s for ${activeCount} active request(s)...`);
+		setPhase("phase2");
+		shutdownDrainAbortController = new AbortController();
 		try {
-			if (await drainActiveRequests(gracefulWaitMs, tracker, drainOpts) === "drained") {
+			if (await drainActiveRequests(gracefulWaitMs, tracker, {
+				...drainOpts,
+				abortSignal: shutdownDrainAbortController.signal
+			}) === "drained") {
 				consola.info("All requests completed naturally");
 				finalize(tracker);
 				return;
@@ -3970,9 +4190,14 @@ async function gracefulShutdown(signal, deps) {
 		}
 		const remaining = tracker.getActiveRequests().length;
 		consola.info(`Phase 3: Sending abort signal to ${remaining} remaining request(s), waiting up to ${abortWaitMs / 1e3}s...`);
+		setPhase("phase3");
+		shutdownDrainAbortController = new AbortController();
 		shutdownAbortController.abort();
 		try {
-			if (await drainActiveRequests(abortWaitMs, tracker, drainOpts) === "drained") {
+			if (await drainActiveRequests(abortWaitMs, tracker, {
+				...drainOpts,
+				abortSignal: shutdownDrainAbortController.signal
+			}) === "drained") {
 				consola.info("All requests completed after abort signal");
 				finalize(tracker);
 				return;
@@ -3980,6 +4205,7 @@ async function gracefulShutdown(signal, deps) {
 		} catch (error) {
 			consola.error("Error during Phase 3 drain:", error);
 		}
+		setPhase("phase4");
 		const forceRemaining = tracker.getActiveRequests().length;
 		consola.warn(`Phase 4: Force-closing ${forceRemaining} remaining request(s)`);
 		if (server) try {
@@ -3992,22 +4218,41 @@ async function gracefulShutdown(signal, deps) {
 }
 /** Final cleanup after drain/force-close */
 function finalize(tracker) {
+	setPhase("finalized");
+	shutdownDrainAbortController = null;
 	tracker.destroy();
 	consola.info("Shutdown complete");
 	shutdownResolve?.();
 }
+function handleShutdownSignal(signal, opts) {
+	const shutdownFn = opts?.gracefulShutdownFn ?? ((shutdownSignal) => gracefulShutdown(shutdownSignal));
+	const exitFn = opts?.exitFn ?? ((code) => process.exit(code));
+	if (_isShuttingDown) {
+		if (shutdownPhase === "phase2") {
+			consola.warn("Second signal received, escalating shutdown to abort active requests");
+			shutdownDrainAbortController?.abort();
+			return shutdownPromise ?? void 0;
+		}
+		if (shutdownPhase === "phase3") {
+			consola.warn("Additional signal received, escalating shutdown to force-close remaining requests");
+			shutdownDrainAbortController?.abort();
+			return shutdownPromise ?? void 0;
+		}
+		consola.warn("Additional signal received during forced shutdown, exiting immediately");
+		exitFn(1);
+		return shutdownPromise ?? void 0;
+	}
+	shutdownPromise = shutdownFn(signal).catch((error) => {
+		consola.error("Fatal error during shutdown:", error);
+		shutdownResolve?.();
+		exitFn(1);
+	});
+	return shutdownPromise;
+}
 /** Setup process signal handlers for graceful shutdown */
 function setupShutdownHandlers() {
 	const handler = (signal) => {
-		if (_isShuttingDown) {
-			consola.warn("Second signal received, forcing immediate exit");
-			process.exit(1);
-		}
-		gracefulShutdown(signal).catch((error) => {
-			consola.error("Fatal error during shutdown:", error);
-			shutdownResolve?.();
-			process.exit(1);
-		});
+		handleShutdownSignal(signal);
 	};
 	process.on("SIGINT", () => handler("SIGINT"));
 	process.on("SIGTERM", () => handler("SIGTERM"));
@@ -4780,9 +5025,56 @@ const setupClaudeCode = defineCommand({
 	}
 });
+//#endregion
+//#region src/lib/serve.ts
+/** Start the HTTP server and return a ServerInstance. */
+async function startServer(options) {
+	if (typeof globalThis.Bun !== "undefined") return startBunServer(options);
+	return startNodeServer(options);
+}
+async function startNodeServer(options) {
+	const { createAdaptorServer } = await import("./dist-8vhX0s4a.mjs");
+	const nodeServer = createAdaptorServer({ fetch: options.fetch });
+	await new Promise((resolve, reject) => {
+		nodeServer.once("error", reject);
+		nodeServer.listen({
+			port: options.port,
+			host: options.hostname,
+			exclusive: false
+		}, () => {
+			nodeServer.removeListener("error", reject);
+			resolve();
+		});
+	});
+	return {
+		nodeServer,
+		close(force) {
+			return new Promise((resolve, reject) => {
+				if (force && "closeAllConnections" in nodeServer) nodeServer.closeAllConnections();
+				nodeServer.close((err) => err ? reject(err) : resolve());
+			});
+		}
+	};
+}
+async function startBunServer(options) {
+	const bunServer = Bun.serve({
+		fetch(request, server) {
+			return options.fetch(request, { server });
+		},
+		port: options.port,
+		hostname: options.hostname,
+		idleTimeout: 255,
+		...options.bunWebSocket ? { websocket: options.bunWebSocket } : {}
+	});
+	return { close(force) {
+		bunServer.stop(force ?? false);
+		return Promise.resolve();
+	} };
+}
 //#endregion
 //#region package.json
-var version = "0.7.23";
+var version = "0.8.1-beta.1";
 //#endregion
 //#region src/lib/context/error-persistence.ts
@@ -4799,6 +5091,8 @@ var version = "0.7.23";
 * Files:
 *   - meta.json:       structured metadata (timestamp, endpoint, model, error, attempts)
 *   - request.json:    full request payload (messages capped at 50 for size)
+*   - effective-request.json: logical request after sanitize/truncate/retry
+*   - wire-request.json: final outbound HTTP payload + headers sent upstream
 *   - response.txt:    raw upstream response body (if available)
 *   - sse-events.json: recorded SSE events (if streaming request failed mid-stream)
 */
@@ -4823,6 +5117,14 @@ async function writeErrorEntry(entry) {
 			messageCount: entry.request.messages?.length,
 			toolCount: entry.request.tools?.length
 		},
+		effective: entry.effectiveRequest ? {
+			model: entry.effectiveRequest.model,
+			messageCount: entry.effectiveRequest.messageCount
+		} : void 0,
+		wire: entry.wireRequest ? {
+			model: entry.wireRequest.model,
+			messageCount: entry.wireRequest.messageCount
+		} : void 0,
 		response: entry.response ? {
 			success: entry.response.success,
 			model: entry.response.model,
@@ -4844,6 +5146,8 @@ async function writeErrorEntry(entry) {
 	}
 	if (entry.response?.responseText) files.push(["response.txt", entry.response.responseText]);
 	if (entry.sseEvents?.length) files.push(["sse-events.json", JSON.stringify(entry.sseEvents, null, 2)]);
+	if (entry.effectiveRequest) files.push(["effective-request.json", JSON.stringify(entry.effectiveRequest.payload ?? entry.effectiveRequest, null, 2)]);
+	if (entry.wireRequest) files.push(["wire-request.json", JSON.stringify(entry.wireRequest, null, 2)]);
 	const id = randomBytes(4).toString("hex");
 	const dirPath = path$1.join(PATHS.ERROR_DIR, `${formatTimestamp()}_${id}`);
 	await fs$1.mkdir(dirPath, { recursive: true });
@@ -4861,34 +5165,26 @@ function formatTimestamp() {
 function handleHistoryEvent(event) {
 	if (!isHistoryEnabled()) return;
 	switch (event.type) {
-		case "created": {
-			const ctx = event.context;
-			const sessionId = getCurrentSession(ctx.endpoint);
-			insertEntry({
-				id: ctx.id,
-				sessionId,
-				timestamp: ctx.startTime,
-				endpoint: ctx.endpoint,
-				request: {
-					model: ctx.originalRequest?.model,
-					messages: ctx.originalRequest?.messages,
-					stream: ctx.originalRequest?.stream,
-					tools: ctx.originalRequest?.tools,
-					system: ctx.originalRequest?.system
-				}
-			});
-			break;
-		}
+		case "created": break;
 		case "updated":
-			if (event.field === "originalRequest" && event.context.originalRequest) {
+			if (event.field === "originalRequest") {
 				const orig = event.context.originalRequest;
-				updateEntry(event.context.id, { request: {
-					model: orig.model,
-					messages: orig.messages,
-					stream: orig.stream,
-					tools: orig.tools,
-					system: orig.system
-				} });
+				if (!orig) break;
+				const ctx = event.context;
+				const sessionId = getCurrentSession(ctx.endpoint);
+				insertEntry({
+					id: ctx.id,
+					sessionId,
+					timestamp: ctx.startTime,
+					endpoint: ctx.endpoint,
+					request: {
+						model: orig.model,
+						messages: orig.messages,
+						stream: orig.stream,
+						tools: orig.tools,
+						system: orig.system
+					}
+				});
 			}
 			if (event.field === "pipelineInfo" && event.context.pipelineInfo) updateEntry(event.context.id, { pipelineInfo: event.context.pipelineInfo });
 			break;
@@ -4900,7 +5196,24 @@ function handleHistoryEvent(event) {
 				response,
 				durationMs: entryData.durationMs,
 				sseEvents: entryData.sseEvents,
-				httpHeaders: entryData.httpHeaders
+				...entryData.effectiveRequest && { effectiveRequest: {
+					model: entryData.effectiveRequest.model,
+					format: entryData.effectiveRequest.format,
+					messageCount: entryData.effectiveRequest.messageCount,
+					messages: entryData.effectiveRequest.messages,
+					system: entryData.effectiveRequest.system,
+					payload: entryData.effectiveRequest.payload
+				} },
+				...entryData.wireRequest && { wireRequest: {
+					model: entryData.wireRequest.model,
+					format: entryData.wireRequest.format,
+					messageCount: entryData.wireRequest.messageCount,
+					messages: entryData.wireRequest.messages,
+					system: entryData.wireRequest.system,
+					payload: entryData.wireRequest.payload,
+					headers: entryData.wireRequest.headers ?? entryData.httpHeaders?.request
+				} },
+				...entryData.attempts && { attempts: entryData.attempts }
 			});
 			break;
 		}
@@ -4971,7 +5284,10 @@ function toHistoryResponse(entryData) {
 		},
 		stop_reason: r.stop_reason,
 		error: r.error,
-		content: r.content
+		status: r.status,
+		content: r.content,
+		rawBody: r.responseText,
+		headers: entryData.httpHeaders?.response
 	};
 }
 function registerContextConsumers(manager) {
@@ -4986,6 +5302,7 @@ const ENDPOINT = {
 	MESSAGES: "/v1/messages",
 	CHAT_COMPLETIONS: "/chat/completions",
 	RESPONSES: "/responses",
+	WS_RESPONSES: "ws:/responses",
 	EMBEDDINGS: "/v1/embeddings"
 };
 /** Capability type → default endpoints for legacy models without `supported_endpoints` */
@@ -5016,9 +5333,16 @@ function isEndpointSupported(model, endpoint) {
 	if (!model?.supported_endpoints) return true;
 	return model.supported_endpoints.includes(endpoint);
 }
+/**
+* Check if a model supports the Responses API via either transport:
+* HTTP (`/responses`) or WebSocket (`ws:/responses`).
+*/
+function isResponsesSupported(model) {
+	return isEndpointSupported(model, ENDPOINT.RESPONSES) || isEndpointSupported(model, ENDPOINT.WS_RESPONSES);
+}
 //#endregion
-//#region src/lib/ws.ts
+//#region src/lib/ws-adapter.ts
 /** Create a shared WebSocket adapter for the given Hono app */
 async function createWebSocketAdapter(app) {
 	if (typeof globalThis.Bun !== "undefined") {
@@ -5039,8 +5363,9 @@ function handleGetEntries(c) {
 	if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
 	const query = c.req.query();
 	const result = getHistorySummaries({
-		page: query.page ? Number.parseInt(query.page, 10) : void 0,
+		cursor: query.cursor || void 0,
 		limit: query.limit ? Number.parseInt(query.limit, 10) : void 0,
+		direction: query.direction || void 0,
 		model: query.model || void 0,
 		endpoint: query.endpoint,
 		success: query.success ? query.success === "true" : void 0,
@@ -5096,7 +5421,7 @@ function handleGetSession(c) {
 	if (!session) return c.json({ error: "Session not found" }, 404);
 	const query = c.req.query();
 	const result = getSessionEntries(id, {
-		page: query.page ? Number.parseInt(query.page, 10) : void 0,
+		cursor: query.cursor || void 0,
 		limit: query.limit ? Number.parseInt(query.limit, 10) : void 0
 	});
 	return c.json({
@@ -5155,7 +5480,10 @@ function initHistoryWebSocket(rootApp, upgradeWs) {
 		onClose(_event, ws) {
 			removeClient(ws.raw);
 		},
-		onMessage(_event, _ws) {},
+		onMessage(event, ws) {
+			const raw = typeof event.data === "string" ? event.data : String(event.data);
+			handleClientMessage(ws.raw, raw);
+		},
 		onError(event, ws) {
 			consola.debug("WebSocket error:", event);
 			removeClient(ws.raw);
@@ -5465,9 +5793,21 @@ async function executeRequestPipeline(opts) {
 	let effectivePayload = opts.payload;
 	let lastError = null;
 	let totalQueueWaitMs = 0;
+	let lastStrategyName;
 	for (let attempt = 0; attempt <= maxRetries; attempt++) {
+		requestContext?.beginAttempt({ strategy: attempt > 0 ? lastStrategyName : void 0 });
+		lastStrategyName = void 0;
+		if (requestContext) {
+			const p = effectivePayload;
+			requestContext.setAttemptEffectiveRequest({
+				model: typeof p.model === "string" ? p.model : "",
+				resolvedModel: model,
+				messages: Array.isArray(p.messages) ? p.messages : [],
+				payload: effectivePayload,
+				format: adapter.format
+			});
+		}
 		onBeforeAttempt?.(attempt, effectivePayload);
-		requestContext?.beginAttempt({ strategy: attempt > 0 ? "retry" : void 0 });
 		requestContext?.transition("executing");
 		try {
 			const { result: response, queueWaitMs } = await adapter.execute(effectivePayload);
@@ -5501,6 +5841,8 @@ async function executeRequestPipeline(opts) {
 							totalQueueWaitMs += action.waitMs;
 							requestContext?.addQueueWaitMs(action.waitMs);
 						}
+						if (action.meta?.sanitization && requestContext) requestContext.setAttemptSanitization(action.meta.sanitization);
+						lastStrategyName = strategy.name;
 						effectivePayload = action.payload;
 						onRetry?.(attempt, strategy.name, action.payload, action.meta);
 						handled = true;
@@ -5872,33 +6214,45 @@ async function processResponsesInstructions(instructions, model) {
 //#endregion
 //#region src/lib/openai/responses-client.ts
+function prepareResponsesRequest(payload, opts) {
+	const wire = payload;
+	const enableVision = hasVisionContent(wire.input);
+	const isAgentCall = Array.isArray(wire.input) && wire.input.some((item) => item.role === "assistant" || item.type === "function_call" || item.type === "function_call_output");
+	const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
+	return {
+		wire,
+		headers: {
+			...copilotHeaders(state, {
+				vision: enableVision && modelSupportsVision,
+				modelRequestHeaders: opts?.resolvedModel?.request_headers,
+				intent: isAgentCall ? "conversation-agent" : "conversation-panel"
+			}),
+			"X-Initiator": isAgentCall ? "agent" : "user"
+		}
+	};
+}
 /** Call Copilot /responses endpoint */
 const createResponses = async (payload, opts) => {
 	if (!state.copilotToken) throw new Error("Copilot token not found");
-	const enableVision = hasVisionContent(payload.input);
-	const isAgentCall = Array.isArray(payload.input) && payload.input.some((item) => item.role === "assistant" || item.type === "function_call" || item.type === "function_call_output");
-	const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
-	const headers = {
-		...copilotHeaders(state, {
-			vision: enableVision && modelSupportsVision,
-			modelRequestHeaders: opts?.resolvedModel?.request_headers,
-			intent: isAgentCall ? "conversation-agent" : "conversation-panel"
-		}),
-		"X-Initiator": isAgentCall ? "agent" : "user"
-	};
+	const prepared = prepareResponsesRequest(payload, opts);
+	opts?.onPrepared?.({
+		wire: prepared.wire,
+		headers: sanitizeHeadersForHistory(prepared.headers)
+	});
+	const { wire, headers } = prepared;
 	const fetchSignal = createFetchSignal();
 	const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
 		method: "POST",
 		headers,
-		body: JSON.stringify(payload),
+		body: JSON.stringify(wire),
 		signal: fetchSignal
 	});
 	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
 	if (!response.ok) {
 		consola.error("Failed to create responses", response);
-		throw await HTTPError.fromResponse("Failed to create responses", response, payload.model);
+		throw await HTTPError.fromResponse("Failed to create responses", response, wire.model);
 	}
-	if (payload.stream) return events(response);
+	if (wire.stream) return events(response);
 	return await response.json();
 };
 /** Check if the input contains any image content */
@@ -6010,7 +6364,7 @@ function createTokenRefreshStrategy() {
 * centralizes that configuration to avoid duplication.
 */
 /** Create the FormatAdapter for Responses API pipeline execution */
-function createResponsesAdapter(selectedModel, headersCapture) {
+function createResponsesAdapter(selectedModel, headersCapture, onPrepared) {
 	return {
 		format: "openai-responses",
 		sanitize: (p) => ({
@@ -6020,7 +6374,16 @@ function createResponsesAdapter(selectedModel, headersCapture) {
 		}),
 		execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, {
 			resolvedModel: selectedModel,
-			headersCapture
+			headersCapture,
+			onPrepared: ({ wire, headers }) => {
+				onPrepared?.({
+					model: typeof wire.model === "string" ? wire.model : p.model,
+					messages: [],
+					payload: wire,
+					headers,
+					format: "openai-responses"
+				});
+			}
 		})),
 		logPayloadSize: (p) => {
 			const count = typeof p.input === "string" ? 1 : p.input.length;
@@ -6118,7 +6481,7 @@ async function handleResponseCreate(ws, rawPayload) {
 	const resolvedModel = resolveModelName(requestedModel);
 	payload.model = resolvedModel;
 	const selectedModel = state.modelIndex.get(resolvedModel);
-	if (!isEndpointSupported(selectedModel, ENDPOINT.RESPONSES)) {
+	if (!isResponsesSupported(selectedModel)) {
 		sendErrorAndClose(ws, `Model "${resolvedModel}" does not support the Responses API`, "invalid_request_error");
 		return;
 	}
@@ -6146,7 +6509,9 @@ async function handleResponseCreate(ws, rawPayload) {
 		clientModel: requestedModel
 	});
 	const headersCapture = {};
-	const adapter = createResponsesAdapter(selectedModel, headersCapture);
+	const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
+		reqCtx.setAttemptWireRequest(wireRequest);
+	});
 	const strategies = createResponsesStrategies();
 	try {
 		const pipelineResult = await executeRequestPipeline({
@@ -6155,15 +6520,16 @@ async function handleResponseCreate(ws, rawPayload) {
 			payload,
 			originalPayload: payload,
 			model: selectedModel,
-			maxRetries: 1
+			maxRetries: 1,
+			requestContext: reqCtx
 		});
 		reqCtx.setHttpHeaders(headersCapture);
 		const iterator = pipelineResult.response[Symbol.asyncIterator]();
 		const acc = createResponsesStreamAccumulator();
 		const idleTimeoutMs = state.streamIdleTimeout > 0 ? state.streamIdleTimeout * 1e3 : 0;
-		const shutdownSignal = getShutdownSignal();
 		let eventsReceived = 0;
 		while (true) {
+			const shutdownSignal = getShutdownSignal();
 			const result = await raceIteratorNext(iterator.next(), {
 				idleTimeoutMs,
 				abortSignal: shutdownSignal ?? void 0
@@ -6954,32 +7320,44 @@ function createTruncationResponseMarkerOpenAI(result) {
 //#endregion
 //#region src/lib/openai/client.ts
-const createChatCompletions = async (payload, opts) => {
-	if (!state.copilotToken) throw new Error("Copilot token not found");
-	const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"));
-	const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
+function prepareChatCompletionsRequest(payload, opts) {
+	const wire = payload;
+	const enableVision = wire.messages.some((x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"));
+	const isAgentCall = wire.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
 	const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
-	const headers = {
-		...copilotHeaders(state, {
-			vision: enableVision && modelSupportsVision,
-			modelRequestHeaders: opts?.resolvedModel?.request_headers,
-			intent: isAgentCall ? "conversation-agent" : "conversation-panel"
-		}),
-		"X-Initiator": isAgentCall ? "agent" : "user"
+	return {
+		wire,
+		headers: {
+			...copilotHeaders(state, {
+				vision: enableVision && modelSupportsVision,
+				modelRequestHeaders: opts?.resolvedModel?.request_headers,
+				intent: isAgentCall ? "conversation-agent" : "conversation-panel"
+			}),
+			"X-Initiator": isAgentCall ? "agent" : "user"
+		}
 	};
+}
+const createChatCompletions = async (payload, opts) => {
+	if (!state.copilotToken) throw new Error("Copilot token not found");
+	const prepared = prepareChatCompletionsRequest(payload, opts);
+	opts?.onPrepared?.({
+		wire: prepared.wire,
+		headers: sanitizeHeadersForHistory(prepared.headers)
+	});
+	const { wire, headers } = prepared;
 	const fetchSignal = createFetchSignal();
 	const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
 		method: "POST",
 		headers,
-		body: JSON.stringify(payload),
+		body: JSON.stringify(wire),
 		signal: fetchSignal
 	});
 	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
 	if (!response.ok) {
 		consola.error("Failed to create chat completions", response);
-		throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
+		throw await HTTPError.fromResponse("Failed to create chat completions", response, wire.model);
 	}
-	if (payload.stream) return events(response);
+	if (wire.stream) return events(response);
 	return await response.json();
 };
@@ -7364,7 +7742,16 @@ async function executeRequest(opts) {
 		sanitize: (p) => sanitizeOpenAIMessages(p),
 		execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
 			resolvedModel: selectedModel,
-			headersCapture
+			headersCapture,
+			onPrepared: ({ wire, headers }) => {
+				reqCtx.setAttemptWireRequest({
+					model: typeof wire.model === "string" ? wire.model : payload.model,
+					messages: Array.isArray(wire.messages) ? wire.messages : [],
+					payload: wire,
+					headers,
+					format: "openai-chat-completions"
+				});
+			}
 		})),
 		logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
 	};
@@ -7476,8 +7863,8 @@ async function handleStreamingResponse(opts) {
 			acc.rawContent += marker;
 		}
 		const iterator = response[Symbol.asyncIterator]();
-		const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
 		for (;;) {
+			const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
 			const result = await raceIteratorNext(iterator.next(), {
 				idleTimeoutMs,
 				abortSignal
@@ -7528,6 +7915,48 @@ chatCompletionRoutes.post("/", async (c) => {
 	}
 });
+//#endregion
+//#region src/routes/config/route.ts
+/** Current effective runtime configuration (read-only, sanitized) */
+const configRoutes = new Hono();
+configRoutes.get("/", (c) => {
+	return c.json({
+		autoTruncate: state.autoTruncate,
+		compressToolResultsBeforeTruncate: state.compressToolResultsBeforeTruncate,
+		stripServerTools: state.stripServerTools,
+		immutableThinkingMessages: state.immutableThinkingMessages,
+		dedupToolCalls: state.dedupToolCalls,
+		contextEditingMode: state.contextEditingMode,
+		rewriteSystemReminders: serializeRewriteSystemReminders(state.rewriteSystemReminders),
+		stripReadToolResultTags: state.stripReadToolResultTags,
+		systemPromptOverridesCount: state.systemPromptOverrides.length,
+		normalizeResponsesCallIds: state.normalizeResponsesCallIds,
+		fetchTimeout: state.fetchTimeout,
+		streamIdleTimeout: state.streamIdleTimeout,
+		staleRequestMaxAge: state.staleRequestMaxAge,
+		shutdownGracefulWait: state.shutdownGracefulWait,
+		shutdownAbortWait: state.shutdownAbortWait,
+		historyLimit: state.historyLimit,
+		historyMinEntries: state.historyMinEntries,
+		modelOverrides: state.modelOverrides,
+		rateLimiter: state.adaptiveRateLimitConfig ?? null
+	});
+});
+/**
+* Serialize rewriteSystemReminders for API output.
+* CompiledRewriteRule contains RegExp objects which don't serialize well —
+* convert back to a human-readable form.
+*/
+function serializeRewriteSystemReminders(value) {
+	if (typeof value === "boolean") return value;
+	return value.map((rule) => ({
+		from: rule.from instanceof RegExp ? rule.from.source : rule.from,
+		to: rule.to,
+		...rule.method ? { method: rule.method } : {},
+		...rule.modelPattern ? { model: rule.modelPattern.source } : {}
+	}));
+}
 //#endregion
 //#region src/lib/openai/embeddings.ts
 const createEmbeddings = async (payload) => {
@@ -7565,6 +7994,25 @@ eventLoggingRoutes.post("/batch", (c) => {
 	return c.text("OK", 200);
 });
+//#endregion
+//#region src/routes/logs/route.ts
+/**
+* Live log endpoint — recent EntrySummary snapshot for the log viewer page.
+*
+* Returns the most recent entries (newest first, capped at `limit`).
+* After initial load, the web client subscribes to the existing /history/ws
+* WebSocket for real-time `entry_added` / `entry_updated` events.
+*/
+const logsRoutes = new Hono();
+logsRoutes.get("/", (c) => {
+	if (!isHistoryEnabled()) return c.json({ error: "History recording is not enabled" }, 400);
+	const result = getHistorySummaries({ limit: Math.min(Number(c.req.query("limit")) || 100, 500) });
+	return c.json({
+		entries: result.entries,
+		total: result.total
+	});
+});
 //#endregion
 //#region src/types/api/anthropic.ts
 /** Type guard for server-side tool result blocks (web_search, tool_search, code_execution, etc.) */
@@ -7574,6 +8022,28 @@ function isServerToolResultBlock(block) {
 	return type !== "tool_result" && type.endsWith("_tool_result") && "tool_use_id" in block;
 }
+//#endregion
+//#region src/lib/anthropic/thinking-immutability.ts
+/**
+* Whether an assistant message contains signature-bound thinking content.
+*
+* Anthropic returns `thinking` / `redacted_thinking` blocks in assistant
+* messages. These blocks may need stronger preservation guarantees depending
+* on the configured rewrite policy.
+*/
+function hasThinkingSignatureBlocks(msg) {
+	return msg.role === "assistant" && Array.isArray(msg.content) && msg.content.some((block) => block.type === "thinking" || block.type === "redacted_thinking");
+}
+/**
+* Strong preservation mode for assistant messages that contain thinking blocks.
+*
+* When enabled, the entire assistant message is treated as immutable by
+* client-side rewrite passes.
+*/
+function isImmutableThinkingAssistantMessage(msg) {
+	return state.immutableThinkingMessages && hasThinkingSignatureBlocks(msg);
+}
 //#endregion
 //#region src/lib/anthropic/sanitize.ts
 /**
@@ -7676,6 +8146,7 @@ function sanitizeMessageParamContent(msg) {
 			content: blocks
 		} : msg;
 	}
+	if (isImmutableThinkingAssistantMessage(msg)) return msg;
 	const { blocks, modified } = sanitizeTextBlocksInArray(msg.content, (b) => b.type === "text" && "text" in b ? b.text : void 0, (b, text) => ({
 		...b,
 		text
@@ -7738,7 +8209,7 @@ function sanitizeAnthropicSystemPrompt(system) {
 function filterEmptyAnthropicTextBlocks(messages) {
 	return messages.map((msg) => {
 		if (typeof msg.content === "string") return msg;
-		if (msg.role === "assistant" && msg.content.some((b) => b.type === "thinking" || b.type === "redacted_thinking")) return msg;
+		if (msg.role === "assistant" && hasThinkingSignatureBlocks(msg)) return msg;
 		const filtered = msg.content.filter((block) => {
 			if (block.type === "text" && "text" in block) return block.text.trim() !== "";
 			return true;
@@ -7805,6 +8276,10 @@ function processToolBlocks(messages, tools) {
 			continue;
 		}
 		if (msg.role === "assistant") {
+			if (isImmutableThinkingAssistantMessage(msg)) {
+				result.push(msg);
+				continue;
+			}
 			const newContent = [];
 			let modified = false;
 			for (const block of msg.content) if (block.type === "tool_use") {
@@ -7945,7 +8420,7 @@ function deduplicateToolCalls(messages, mode = "input") {
 	const protectedIds = /* @__PURE__ */ new Set();
 	for (const msg of messages) {
 		if (msg.role !== "assistant" || typeof msg.content === "string") continue;
-		if (!msg.content.some((b) => b.type === "thinking" || b.type === "redacted_thinking")) continue;
+		if (!hasThinkingSignatureBlocks(msg)) continue;
 		for (const block of msg.content) if (block.type === "tool_use") protectedIds.add(block.id);
 	}
 	const removedIds = /* @__PURE__ */ new Set();
@@ -7989,6 +8464,10 @@ function deduplicateToolCalls(messages, mode = "input") {
 	for (const msg of filtered) {
 		const prev = merged.at(-1);
 		if (prev && prev.role === msg.role) {
+			if (prev.role === "assistant" && (isImmutableThinkingAssistantMessage(prev) || isImmutableThinkingAssistantMessage(msg))) {
+				merged.push(msg);
+				continue;
+			}
 			const prevContent = typeof prev.content === "string" ? [{
 				type: "text",
 				text: prev.content
@@ -8361,6 +8840,7 @@ function stripThinkingBlocks(messages, preserveRecentCount) {
 	return {
 		messages: messages.map((msg, i) => {
 			if (i >= stripBefore || msg.role !== "assistant" || !Array.isArray(msg.content)) return msg;
+			if (isImmutableThinkingAssistantMessage(msg)) return msg;
 			if (!msg.content.some((block) => block.type === "thinking" || block.type === "redacted_thinking")) return msg;
 			const filtered = msg.content.filter((block) => {
 				if (block.type === "thinking" || block.type === "redacted_thinking") {
@@ -8802,6 +9282,28 @@ async function handleCountTokens(c) {
 	}
 }
+//#endregion
+//#region src/lib/anthropic/feature-negotiation.ts
+const NEGOTIATION_TTL_MS = 600 * 1e3;
+const unsupportedFeatures = /* @__PURE__ */ new Map();
+function makeKey(modelId, feature) {
+	return `${copilotBaseUrl(state)}|anthropic-messages|${normalizeForMatching(modelId)}|${feature}`;
+}
+function isFresh(expiresAt) {
+	return expiresAt > Date.now();
+}
+function markAnthropicFeatureUnsupported(modelId, feature) {
+	unsupportedFeatures.set(makeKey(modelId, feature), Date.now() + NEGOTIATION_TTL_MS);
+}
+function isAnthropicFeatureUnsupported(modelId, feature) {
+	const key = makeKey(modelId, feature);
+	const expiresAt = unsupportedFeatures.get(key);
+	if (!expiresAt) return false;
+	if (isFresh(expiresAt)) return true;
+	unsupportedFeatures.delete(key);
+	return false;
+}
 //#endregion
 //#region src/lib/anthropic/features.ts
 /**
@@ -8857,11 +9359,11 @@ function modelHasAdaptiveThinking(resolvedModel) {
 * The resolvedModel parameter provides model metadata for capability-based
 * decisions. When unavailable, falls back to name-based detection.
 */
-function buildAnthropicBetaHeaders(modelId, resolvedModel) {
+function buildAnthropicBetaHeaders(modelId, resolvedModel, opts) {
 	const headers = {};
 	const betaFeatures = [];
 	if (!modelHasAdaptiveThinking(resolvedModel)) betaFeatures.push("interleaved-thinking-2025-05-14");
-	if (isContextEditingEnabled(modelId)) betaFeatures.push("context-management-2025-06-27");
+	if (!opts?.disableContextManagement && isContextEditingEnabled(modelId)) betaFeatures.push("context-management-2025-06-27");
 	if (modelSupportsToolSearch(modelId)) betaFeatures.push("advanced-tool-use-2025-11-20");
 	if (betaFeatures.length > 0) headers["anthropic-beta"] = betaFeatures.join(",");
 	return headers;
@@ -9197,11 +9699,44 @@ function adjustThinkingBudget(wire) {
 */
 async function createAnthropicMessages(payload, opts) {
 	if (!state.copilotToken) throw new Error("Copilot token not found");
+	const prepared = prepareAnthropicRequest(payload, opts);
+	opts?.onPrepared?.({
+		wire: prepared.wire,
+		headers: sanitizeHeadersForHistory(prepared.headers)
+	});
+	const { wire, headers } = prepared;
+	const model = wire.model;
+	const messages = wire.messages;
+	const tools = wire.tools;
+	const thinking = wire.thinking;
+	consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
+	const fetchSignal = createFetchSignal();
+	const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
+		method: "POST",
+		headers,
+		body: JSON.stringify(wire),
+		signal: fetchSignal
+	});
+	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
+	if (!response.ok) {
+		consola.debug("Request failed:", {
+			model,
+			max_tokens: wire.max_tokens,
+			stream: wire.stream,
+			toolCount: tools?.length ?? 0,
+			thinking,
+			messageCount: messages.length
+		});
+		throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, model);
+	}
+	if (payload.stream) return events(response);
+	return await response.json();
+}
+function prepareAnthropicRequest(payload, opts) {
 	const wire = buildWirePayload(payload);
 	adjustThinkingBudget(wire);
 	const model = wire.model;
 	const messages = wire.messages;
-	const tools = wire.tools;
 	const thinking = wire.thinking;
 	const enableVision = messages.some((msg) => {
 		if (typeof msg.content === "string") return false;
@@ -9209,6 +9744,8 @@ async function createAnthropicMessages(payload, opts) {
 	});
 	const isAgentCall = messages.some((msg) => msg.role === "assistant");
 	const modelSupportsVision = opts?.resolvedModel?.capabilities?.supports?.vision !== false;
+	const contextManagementDisabled = wire.context_management === null || isAnthropicFeatureUnsupported(model, "context_management");
+	if (contextManagementDisabled) delete wire.context_management;
 	const headers = {
 		...copilotHeaders(state, {
 			vision: enableVision && modelSupportsVision,
@@ -9217,9 +9754,9 @@ async function createAnthropicMessages(payload, opts) {
 		}),
 		"X-Initiator": isAgentCall ? "agent" : "user",
 		"anthropic-version": "2023-06-01",
-		...buildAnthropicBetaHeaders(model, opts?.resolvedModel)
+		...buildAnthropicBetaHeaders(model, opts?.resolvedModel, { disableContextManagement: contextManagementDisabled })
 	};
-	if (!wire.context_management && isContextEditingEnabled(model)) {
+	if (!contextManagementDisabled && !("context_management" in wire) && isContextEditingEnabled(model)) {
 		const hasThinking = Boolean(thinking && thinking.type !== "disabled");
 		const contextManagement = buildContextManagement(state.contextEditingMode, hasThinking);
 		if (contextManagement) {
@@ -9227,28 +9764,10 @@ async function createAnthropicMessages(payload, opts) {
 			consola.debug("[DirectAnthropic] Added context_management:", JSON.stringify(contextManagement));
 		}
 	}
-	consola.debug("Sending direct Anthropic request to Copilot /v1/messages");
-	const fetchSignal = createFetchSignal();
-	const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
-		method: "POST",
-		headers,
-		body: JSON.stringify(wire),
-		signal: fetchSignal
-	});
-	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
-	if (!response.ok) {
-		consola.debug("Request failed:", {
-			model,
-			max_tokens: wire.max_tokens,
-			stream: wire.stream,
-			toolCount: tools?.length ?? 0,
-			thinking,
-			messageCount: messages.length
-		});
-		throw await HTTPError.fromResponse("Failed to create Anthropic messages", response, model);
-	}
-	if (payload.stream) return events(response);
-	return await response.json();
+	return {
+		wire,
+		headers
+	};
 }
 //#endregion
@@ -9605,11 +10124,11 @@ function supportsDirectAnthropicApi(modelId) {
 * and the shutdown abort signal — so a stalled upstream connection can be
 * interrupted by either mechanism without waiting for the next event.
 */
-async function* processAnthropicStream(response, acc, clientAbortSignal) {
+async function* processAnthropicStream(response, acc, clientAbortSignal, shutdownSignalProvider = getShutdownSignal) {
 	const idleTimeoutMs = state.streamIdleTimeout * 1e3;
 	const iterator = response[Symbol.asyncIterator]();
-	const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbortSignal);
 	for (;;) {
+		const abortSignal = combineAbortSignals(shutdownSignalProvider(), clientAbortSignal);
 		const result = await raceIteratorNext(iterator.next(), {
 			idleTimeoutMs,
 			abortSignal
@@ -9759,6 +10278,48 @@ function createStreamRepetitionChecker(label, config) {
 	};
 }
+//#endregion
+//#region src/lib/request/strategies/context-management-retry.ts
+const EXTRA_INPUTS_PATTERN = /context_management:\s*Extra inputs are not permitted/i;
+function parseContextManagementExtraInputsError(message) {
+	return EXTRA_INPUTS_PATTERN.test(message);
+}
+function extractErrorMessage(error) {
+	if (parseContextManagementExtraInputsError(error.message)) return error.message;
+	const raw = error.raw;
+	if (!raw || typeof raw !== "object" || !("responseText" in raw) || typeof raw.responseText !== "string") return null;
+	try {
+		return JSON.parse(raw.responseText).error?.message ?? raw.responseText;
+	} catch {
+		return raw.responseText;
+	}
+}
+function createContextManagementRetryStrategy() {
+	return {
+		name: "context-management-retry",
+		canHandle(error) {
+			if (error.type !== "bad_request" || error.status !== 400) return false;
+			const message = extractErrorMessage(error);
+			return message ? parseContextManagementExtraInputsError(message) : false;
+		},
+		handle(error, currentPayload, _context) {
+			markAnthropicFeatureUnsupported(currentPayload.model, "context_management");
+			if (currentPayload.context_management === null) return Promise.resolve({
+				action: "abort",
+				error
+			});
+			return Promise.resolve({
+				action: "retry",
+				payload: {
+					...currentPayload,
+					context_management: null
+				},
+				meta: { disabledContextManagement: true }
+			});
+		}
+	};
+}
 //#endregion
 //#region src/lib/request/strategies/deferred-tool-retry.ts
 /**
@@ -9903,23 +10464,22 @@ async function handleMessages(c) {
 	});
 	const preprocessed = preprocessAnthropicMessages(anthropicPayload.messages);
 	anthropicPayload.messages = preprocessed.messages;
-	reqCtx.setPreprocessInfo({
+	return handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx, {
 		strippedReadTagCount: preprocessed.strippedReadTagCount,
 		dedupedToolCallCount: preprocessed.dedupedToolCallCount
 	});
-	return handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx);
 }
-async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
+async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx, preprocessInfo) {
 	consola.debug("Using direct Anthropic API path for model:", anthropicPayload.model);
 	const selectedModel = state.modelIndex.get(anthropicPayload.model);
 	const { payload: initialSanitized, stats: sanitizationStats } = sanitizeAnthropicMessages(preprocessTools(anthropicPayload));
-	reqCtx.addSanitizationInfo(toSanitizationInfo(sanitizationStats));
-	const hasPreprocessing = reqCtx.preprocessInfo ? reqCtx.preprocessInfo.dedupedToolCallCount > 0 || reqCtx.preprocessInfo.strippedReadTagCount > 0 : false;
+	const initialSanitizationInfo = toSanitizationInfo(sanitizationStats);
+	const hasPreprocessing = preprocessInfo.dedupedToolCallCount > 0 || preprocessInfo.strippedReadTagCount > 0;
 	if (sanitizationStats.totalBlocksRemoved > 0 || sanitizationStats.systemReminderRemovals > 0 || sanitizationStats.fixedNameCount > 0 || hasPreprocessing) {
 		const messageMapping = buildMessageMapping(anthropicPayload.messages, initialSanitized.messages);
 		reqCtx.setPipelineInfo({
-			rewrittenMessages: initialSanitized.messages,
-			rewrittenSystem: typeof initialSanitized.system === "string" ? initialSanitized.system : void 0,
+			preprocessing: preprocessInfo,
+			sanitization: [initialSanitizationInfo],
 			messageMapping
 		});
 	}
@@ -9934,13 +10494,23 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
 		sanitize: (p) => sanitizeAnthropicMessages(preprocessTools(p)),
 		execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p, {
 			resolvedModel: selectedModel,
-			headersCapture
+			headersCapture,
+			onPrepared: ({ wire, headers }) => {
+				reqCtx.setAttemptWireRequest({
+					model: typeof wire.model === "string" ? wire.model : anthropicPayload.model,
+					messages: Array.isArray(wire.messages) ? wire.messages : [],
+					payload: wire,
+					headers,
+					format: "anthropic-messages"
+				});
+			}
 		})),
 		logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
 	};
 	const strategies = [
 		createNetworkRetryStrategy(),
 		createTokenRefreshStrategy(),
+		createContextManagementRetryStrategy(),
 		createDeferredToolRetryStrategy(),
 		createAutoTruncateStrategy({
 			truncate: (p, model, opts) => autoTruncateAnthropic(p, model, opts),
@@ -9963,17 +10533,18 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
 				const retryTruncateResult = meta?.truncateResult;
 				if (retryTruncateResult) truncateResult = retryTruncateResult;
 				const retrySanitization = meta?.sanitization;
-				if (retrySanitization) reqCtx.addSanitizationInfo(toSanitizationInfo(retrySanitization));
+				const allSanitization = [initialSanitizationInfo, ...retrySanitization ? [toSanitizationInfo(retrySanitization)] : []];
 				const retryMessageMapping = buildMessageMapping(anthropicPayload.messages, newPayload.messages);
 				reqCtx.setPipelineInfo({
+					preprocessing: preprocessInfo,
+					sanitization: allSanitization,
 					truncation: retryTruncateResult ? {
+						wasTruncated: true,
 						removedMessageCount: retryTruncateResult.removedMessageCount,
 						originalTokens: retryTruncateResult.originalTokens,
 						compactedTokens: retryTruncateResult.compactedTokens,
 						processingTimeMs: retryTruncateResult.processingTimeMs
 					} : void 0,
-					rewrittenMessages: newPayload.messages,
-					rewrittenSystem: typeof newPayload.system === "string" ? newPayload.system : void 0,
 					messageMapping: retryMessageMapping
 				});
 				if (reqCtx.tuiLogId) {
@@ -10158,10 +10729,22 @@ function formatModel(model) {
 		capabilities: model.capabilities
 	};
 }
+function formatModelDetail(model) {
+	return {
+		...formatModel(model),
+		version: model.version,
+		preview: model.preview,
+		model_picker_enabled: model.model_picker_enabled,
+		model_picker_category: model.model_picker_category,
+		supported_endpoints: model.supported_endpoints,
+		billing: model.billing
+	};
+}
 modelsRoutes.get("/", async (c) => {
 	try {
 		if (!state.models) await cacheModels();
-		const models = state.models?.data.map((m) => formatModel(m));
+		const formatter = c.req.query("detail") === "true" ? formatModelDetail : formatModel;
+		const models = state.models?.data.map((m) => formatter(m));
 		return c.json({
 			object: "list",
 			data: models,
@@ -10182,7 +10765,7 @@ modelsRoutes.get("/:model", async (c) => {
 			param: "model",
 			code: "model_not_found"
 		} }, 404);
-		return c.json(formatModel(model));
+		return c.json(formatModelDetail(model));
 	} catch (error) {
 		return forwardError(c, error);
 	}
@@ -10199,7 +10782,7 @@ async function handleResponses(c) {
 		consola.debug(`Model name resolved: ${clientModel} → ${resolvedModel}`);
 		payload.model = resolvedModel;
 	}
-	if (!isEndpointSupported(state.modelIndex.get(payload.model), ENDPOINT.RESPONSES)) {
+	if (!isResponsesSupported(state.modelIndex.get(payload.model))) {
 		const msg = `Model "${payload.model}" does not support the ${ENDPOINT.RESPONSES} endpoint`;
 		throw new HTTPError(msg, 400, msg);
 	}
@@ -10233,7 +10816,9 @@ async function handleDirectResponses(opts) {
 	const { c, payload, reqCtx } = opts;
 	const selectedModel = state.modelIndex.get(payload.model);
 	const headersCapture = {};
-	const adapter = createResponsesAdapter(selectedModel, headersCapture);
+	const adapter = createResponsesAdapter(selectedModel, headersCapture, (wireRequest) => {
+		reqCtx.setAttemptWireRequest(wireRequest);
+	});
 	const strategies = createResponsesStrategies();
 	try {
 		const pipelineResult = await executeRequestPipeline({
@@ -10247,7 +10832,6 @@ async function handleDirectResponses(opts) {
 		});
 		reqCtx.setHttpHeaders(headersCapture);
 		const response = pipelineResult.response;
-		reqCtx.addQueueWaitMs(pipelineResult.queueWaitMs);
 		if (!payload.stream) {
 			const responsesResponse = response;
 			const content = responsesOutputToContent(responsesResponse.output);
@@ -10276,8 +10860,8 @@ async function handleDirectResponses(opts) {
 			let eventsIn = 0;
 			try {
 				const iterator = response[Symbol.asyncIterator]();
-				const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbort.signal);
 				for (;;) {
+					const abortSignal = combineAbortSignals(getShutdownSignal(), clientAbort.signal);
 					const result = await raceIteratorNext(iterator.next(), {
 						idleTimeoutMs,
 						abortSignal
@@ -10340,23 +10924,82 @@ responsesRoutes.post("/", async (c) => {
 });
 //#endregion
-//#region src/routes/token/route.ts
-const tokenRoutes = new Hono();
-tokenRoutes.get("/", (c) => {
+//#region src/routes/status/route.ts
+/**
+* Aggregated server status endpoint.
+* Returns health, auth, quota, rate limiter, memory, shutdown, and model counts
+* in a single request.
+*/
+const statusRoutes = new Hono();
+statusRoutes.get("/", async (c) => {
+	const now = Date.now();
+	const limiter = getAdaptiveRateLimiter();
+	const limiterStatus = limiter?.getStatus();
+	const memStats = getMemoryPressureStats();
+	let activeCount = 0;
 	try {
-		return c.json({ token: state.copilotToken });
-	} catch (error) {
-		return forwardError(c, error);
-	}
+		activeCount = getRequestContextManager().activeCount;
+	} catch {}
+	let quota = null;
+	try {
+		const usage = await getCopilotUsage();
+		quota = {
+			plan: usage.copilot_plan,
+			resetDate: usage.quota_reset_date,
+			chat: usage.quota_snapshots.chat,
+			completions: usage.quota_snapshots.completions,
+			premiumInteractions: usage.quota_snapshots.premium_interactions
+		};
+	} catch {}
+	return c.json({
+		status: getIsShuttingDown() ? "shutting_down" : state.copilotToken && state.githubToken ? "healthy" : "unhealthy",
+		uptime: serverStartTime > 0 ? Math.floor((now - serverStartTime) / 1e3) : 0,
+		version,
+		auth: {
+			accountType: state.accountType,
+			tokenSource: state.tokenInfo?.source ?? null,
+			tokenExpiresAt: state.tokenInfo?.expiresAt ?? null,
+			copilotTokenExpiresAt: state.copilotTokenInfo ? state.copilotTokenInfo.expiresAt * 1e3 : null
+		},
+		quota,
+		activeRequests: { count: activeCount },
+		rateLimiter: limiterStatus ? {
+			...limiterStatus,
+			config: limiter.getConfig()
+		} : null,
+		memory: {
+			heapUsedMB: memStats.heapUsedMB,
+			heapLimitMB: memStats.heapLimitMB,
+			historyEntryCount: historyState.entries.length,
+			historyMaxEntries: memStats.currentMaxEntries,
+			totalEvictedCount: memStats.totalEvictedCount
+		},
+		shutdown: { phase: getShutdownPhase() },
+		models: {
+			totalCount: state.models?.data.length ?? 0,
+			availableCount: state.modelIds.size
+		}
+	});
 });
 //#endregion
-//#region src/routes/usage/route.ts
-const usageRoutes = new Hono();
-usageRoutes.get("/", async (c) => {
+//#region src/routes/token/route.ts
+const tokenRoutes = new Hono();
+tokenRoutes.get("/", (c) => {
 	try {
-		const usage = await getCopilotUsage();
-		return c.json(usage);
+		return c.json({
+			github: state.tokenInfo ? {
+				token: state.tokenInfo.token,
+				source: state.tokenInfo.source,
+				expiresAt: state.tokenInfo.expiresAt ?? null,
+				refreshable: state.tokenInfo.refreshable
+			} : null,
+			copilot: state.copilotTokenInfo ? {
+				token: state.copilotTokenInfo.token,
+				expiresAt: state.copilotTokenInfo.expiresAt,
+				refreshIn: state.copilotTokenInfo.refreshIn
+			} : null
+		});
 	} catch (error) {
 		return forwardError(c, error);
 	}
@@ -10378,8 +11021,10 @@ function registerRoutes(app) {
 	app.route("/v1/responses", responsesRoutes);
 	app.route("/v1/messages", messagesRoutes);
 	app.route("/api/event_logging", eventLoggingRoutes);
-	app.route("/usage", usageRoutes);
-	app.route("/token", tokenRoutes);
+	app.route("/api/status", statusRoutes);
+	app.route("/api/tokens", tokenRoutes);
+	app.route("/api/config", configRoutes);
+	app.route("/api/logs", logsRoutes);
 	app.route("/history", historyRoutes);
 }
@@ -10518,6 +11163,7 @@ async function runServer(options) {
 	else off("[model_overrides]", "Model overrides");
 	if (state.dedupToolCalls) on("[anthropic.dedup_tool_calls]", "Dedup tool calls", `mode: ${state.dedupToolCalls}`);
 	else off("[anthropic.dedup_tool_calls]", "Dedup tool calls");
+	toggle(state.immutableThinkingMessages, "[anthropic.immutable_thinking_messages]", "Immutable thinking messages");
 	toggle(state.stripReadToolResultTags, "[anthropic.strip_read_tool_result_tags]", "Strip Read tool result tags");
 	if (state.rewriteSystemReminders === true) on("[anthropic.rewrite_system_reminders]", "Rewrite system reminders", "remove all");
 	else if (state.rewriteSystemReminders === false) off("[anthropic.rewrite_system_reminders]", "Rewrite system reminders");
@@ -10567,39 +11213,29 @@ async function runServer(options) {
 	}).join("\n");
 	if (overrideLines) consola.info(`Model overrides:\n${overrideLines}`);
 	const serverUrl = `http://${options.host ?? "localhost"}:${options.port}`;
-	if (typeof globalThis.Bun !== "undefined") server.use("*", async (c, next) => {
-		const runtime = c.req.raw.runtime;
-		if (runtime?.bun?.server) c.env = { server: runtime.bun.server };
-		await next();
-	});
 	const wsAdapter = await createWebSocketAdapter(server);
+	initWebSocket(server, wsAdapter.upgradeWebSocket);
 	initHistoryWebSocket(server, wsAdapter.upgradeWebSocket);
 	initResponsesWebSocket(server, wsAdapter.upgradeWebSocket);
 	consola.box(`Web UI:\n🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage\n📜 History UI:   ${serverUrl}/history`);
 	const bunWebSocket = typeof globalThis.Bun !== "undefined" ? (await import("hono/bun")).websocket : void 0;
 	let serverInstance;
 	try {
-		serverInstance = serve({
+		serverInstance = await startServer({
 			fetch: server.fetch,
 			port: options.port,
 			hostname: options.host,
-			reusePort: true,
-			gracefulShutdown: false,
-			bun: {
-				idleTimeout: 255,
-				...bunWebSocket && { websocket: bunWebSocket }
-			}
+			bunWebSocket
 		});
 	} catch (error) {
 		consola.error(`Failed to start server on port ${options.port}. Is the port already in use?`, error);
 		process.exit(1);
 	}
+	consola.info(`Listening on ${serverUrl}`);
+	setServerStartTime(Date.now());
 	setServerInstance(serverInstance);
 	setupShutdownHandlers();
-	if (wsAdapter.injectWebSocket) {
-		const nodeServer = serverInstance.node?.server;
-		if (nodeServer && "on" in nodeServer) wsAdapter.injectWebSocket(nodeServer);
-	}
+	if (wsAdapter.injectWebSocket && serverInstance.nodeServer) wsAdapter.injectWebSocket(serverInstance.nodeServer);
 	await waitForShutdown();
 }
 const start = defineCommand({