npm - @hsupu/copilot-api - Versions diffs - 0.7.20 → 0.7.22 - Mend

@hsupu/copilot-api 0.7.20 → 0.7.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/config.example.yaml +82 -52
package/dist/main.mjs +543 -403
package/dist/main.mjs.map +1 -1
package/package.json +1 -1
package/ui/history-v3/dist/assets/index-CaOzq3V0.js +3 -0
package/ui/history-v3/dist/assets/{index-CMA0Arxs.css → index-Dfh3zN1X.css} +1 -1
package/ui/history-v3/dist/index.html +2 -2
package/ui/history-v3/dist/assets/index-DS5mAk0y.js +0 -3

package/dist/main.mjs CHANGED Viewed

@@ -21,6 +21,47 @@ import { cors } from "hono/cors";
 import { trimTrailingSlash } from "hono/trailing-slash";
 import { streamSSE } from "hono/streaming";
+//#region src/lib/state.ts
+/**
+* Rebuild model lookup indexes from state.models.
+* Called by cacheModels() in production; call directly in tests after setting state.models.
+*/
+function rebuildModelIndex() {
+	const data = state.models?.data ?? [];
+	state.modelIndex = new Map(data.map((m) => [m.id, m]));
+	state.modelIds = new Set(data.map((m) => m.id));
+}
+const DEFAULT_MODEL_OVERRIDES = {
+	opus: "claude-opus-4.6",
+	sonnet: "claude-sonnet-4.6",
+	haiku: "claude-haiku-4.5"
+};
+const state = {
+	accountType: "individual",
+	autoTruncate: true,
+	compressToolResultsBeforeTruncate: true,
+	contextEditingMode: "off",
+	stripServerTools: false,
+	dedupToolCalls: false,
+	fetchTimeout: 300,
+	historyLimit: 200,
+	historyMinEntries: 50,
+	modelIds: /* @__PURE__ */ new Set(),
+	modelIndex: /* @__PURE__ */ new Map(),
+	modelOverrides: { ...DEFAULT_MODEL_OVERRIDES },
+	rewriteSystemReminders: false,
+	showGitHubToken: false,
+	shutdownAbortWait: 120,
+	shutdownGracefulWait: 60,
+	staleRequestMaxAge: 600,
+	streamIdleTimeout: 300,
+	systemPromptOverrides: [],
+	stripReadToolResultTags: false,
+	normalizeResponsesCallIds: false,
+	verbose: false
+};
+//#endregion
 //#region src/lib/utils.ts
 const sleep = (ms) => new Promise((resolve) => {
 	setTimeout(resolve, ms);
@@ -408,6 +449,7 @@ function updateEntry(id, update) {
 	if (update.pipelineInfo) entry.pipelineInfo = update.pipelineInfo;
 	if (update.durationMs !== void 0) entry.durationMs = update.durationMs;
 	if (update.sseEvents) entry.sseEvents = update.sseEvents;
+	if (update.httpHeaders) entry.httpHeaders = update.httpHeaders;
 	if (update.response) {
 		const session = historyState.sessions.get(entry.sessionId);
 		if (session) {
@@ -619,45 +661,6 @@ function exportHistory(format = "json") {
 	return [headers.join(","), ...rows.map((r) => r.map((v) => escapeCsvValue(v)).join(","))].join("\n");
 }
-//#endregion
-//#region src/lib/state.ts
-/**
-* Rebuild model lookup indexes from state.models.
-* Called by cacheModels() in production; call directly in tests after setting state.models.
-*/
-function rebuildModelIndex() {
-	const data = state.models?.data ?? [];
-	state.modelIndex = new Map(data.map((m) => [m.id, m]));
-	state.modelIds = new Set(data.map((m) => m.id));
-}
-const DEFAULT_MODEL_OVERRIDES = {
-	opus: "claude-opus-4.6",
-	sonnet: "claude-sonnet-4.6",
-	haiku: "claude-haiku-4.5"
-};
-const state = {
-	accountType: "individual",
-	autoTruncate: true,
-	compressToolResultsBeforeTruncate: true,
-	stripServerTools: false,
-	dedupToolCalls: false,
-	fetchTimeout: 300,
-	historyLimit: 200,
-	historyMinEntries: 50,
-	modelIds: /* @__PURE__ */ new Set(),
-	modelIndex: /* @__PURE__ */ new Map(),
-	modelOverrides: { ...DEFAULT_MODEL_OVERRIDES },
-	rewriteSystemReminders: false,
-	showGitHubToken: false,
-	shutdownAbortWait: 120,
-	shutdownGracefulWait: 60,
-	staleRequestMaxAge: 600,
-	streamIdleTimeout: 300,
-	systemPromptOverrides: [],
-	stripReadToolResultTags: false,
-	verbose: false
-};
 //#endregion
 //#region src/lib/history/memory-pressure.ts
 /**
@@ -746,7 +749,7 @@ function startMemoryPressureMonitor() {
 			consola.error("[memory] Error in memory pressure check:", error);
 		});
 	}, CHECK_INTERVAL_MS);
-	if (timer && "unref" in timer) timer.unref();
+	if ("unref" in timer) timer.unref();
 }
 /** Stop the memory pressure monitor */
 function stopMemoryPressureMonitor() {
@@ -772,12 +775,15 @@ async function ensurePaths() {
 	await ensureFile(PATHS.GITHUB_TOKEN_PATH);
 }
 async function ensureFile(filePath) {
+	const isWindows = process.platform === "win32";
 	try {
 		await fs.access(filePath, fs.constants.W_OK);
-		if (((await fs.stat(filePath)).mode & 511) !== 384) await fs.chmod(filePath, 384);
+		if (!isWindows) {
+			if (((await fs.stat(filePath)).mode & 511) !== 384) await fs.chmod(filePath, 384);
+		}
 	} catch {
 		await fs.writeFile(filePath, "");
-		await fs.chmod(filePath, 384);
+		if (!isWindows) await fs.chmod(filePath, 384);
 	}
 }
@@ -878,6 +884,7 @@ async function applyConfigToState() {
 		if (a.strip_server_tools !== void 0) state.stripServerTools = a.strip_server_tools;
 		if (a.dedup_tool_calls !== void 0) state.dedupToolCalls = a.dedup_tool_calls === true ? "input" : a.dedup_tool_calls;
 		if (a.strip_read_tool_result_tags !== void 0) state.stripReadToolResultTags = a.strip_read_tool_result_tags;
+		if (a.context_editing !== void 0) state.contextEditingMode = a.context_editing;
 		if (a.rewrite_system_reminders !== void 0) {
 			if (typeof a.rewrite_system_reminders === "boolean") state.rewriteSystemReminders = a.rewrite_system_reminders;
 			else if (Array.isArray(a.rewrite_system_reminders)) state.rewriteSystemReminders = compileRewriteRules(a.rewrite_system_reminders);
@@ -905,6 +912,8 @@ async function applyConfigToState() {
 	if (config.fetch_timeout !== void 0) state.fetchTimeout = config.fetch_timeout;
 	if (config.stream_idle_timeout !== void 0) state.streamIdleTimeout = config.stream_idle_timeout;
 	if (config.stale_request_max_age !== void 0) state.staleRequestMaxAge = config.stale_request_max_age;
+	const responsesConfig = config["openai-responses"];
+	if (responsesConfig && responsesConfig.normalize_call_ids !== void 0) state.normalizeResponsesCallIds = responsesConfig.normalize_call_ids;
 	const currentMtime = getConfigMtimeMs();
 	if (hasApplied && currentMtime !== lastAppliedMtimeMs) consola.info("[config] Reloaded config.yaml");
 	hasApplied = true;
@@ -1076,97 +1085,6 @@ function initProxyBun(options) {
 	consola.debug(`Proxy configured (Bun env): ${formatProxyDisplay(options.url)}`);
 }
-//#endregion
-//#region src/lib/copilot-api.ts
-const standardHeaders = () => ({
-	"content-type": "application/json",
-	accept: "application/json"
-});
-const COPILOT_VERSION = "0.38.0";
-const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
-const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
-/** Copilot Chat API version (for chat/completions requests) */
-const COPILOT_API_VERSION = "2025-05-01";
-/** Copilot internal API version (for token & usage endpoints) */
-const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
-/** GitHub public API version (for /user, repos, etc.) */
-const GITHUB_API_VERSION = "2022-11-28";
-/**
-* Session-level interaction ID.
-* Used to correlate all requests within a single server session.
-* Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
-*/
-const INTERACTION_ID = randomUUID();
-const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
-const copilotHeaders = (state, opts) => {
-	const headers = {
-		Authorization: `Bearer ${state.copilotToken}`,
-		"content-type": standardHeaders()["content-type"],
-		"copilot-integration-id": "vscode-chat",
-		"editor-version": `vscode/${state.vsCodeVersion}`,
-		"editor-plugin-version": EDITOR_PLUGIN_VERSION,
-		"user-agent": USER_AGENT,
-		"openai-intent": opts?.intent ?? "conversation-panel",
-		"x-github-api-version": COPILOT_API_VERSION,
-		"x-request-id": randomUUID(),
-		"X-Interaction-Id": INTERACTION_ID,
-		"x-vscode-user-agent-library-version": "electron-fetch"
-	};
-	if (opts?.vision) headers["copilot-vision-request"] = "true";
-	if (opts?.modelRequestHeaders) {
-		const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
-		for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
-	}
-	return headers;
-};
-const GITHUB_API_BASE_URL = "https://api.github.com";
-const githubHeaders = (state) => ({
-	...standardHeaders(),
-	authorization: `token ${state.githubToken}`,
-	"editor-version": `vscode/${state.vsCodeVersion}`,
-	"editor-plugin-version": EDITOR_PLUGIN_VERSION,
-	"user-agent": USER_AGENT,
-	"x-github-api-version": GITHUB_API_VERSION,
-	"x-vscode-user-agent-library-version": "electron-fetch"
-});
-const GITHUB_BASE_URL = "https://github.com";
-const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
-const GITHUB_APP_SCOPES = ["read:user"].join(" ");
-/** Fallback VSCode version when GitHub API is unavailable */
-const VSCODE_VERSION_FALLBACK = "1.104.3";
-/** GitHub API endpoint for latest VSCode release */
-const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
-/** Fetch the latest VSCode version and cache in global state */
-async function cacheVSCodeVersion() {
-	const response = await getVSCodeVersion();
-	state.vsCodeVersion = response;
-	consola.info(`Using VSCode version: ${response}`);
-}
-/** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
-async function getVSCodeVersion() {
-	const controller = new AbortController();
-	const timeout = setTimeout(() => {
-		controller.abort();
-	}, 5e3);
-	try {
-		const response = await fetch(VSCODE_RELEASE_URL, {
-			signal: controller.signal,
-			headers: {
-				Accept: "application/vnd.github.v3+json",
-				"User-Agent": "copilot-api"
-			}
-		});
-		if (!response.ok) return VSCODE_VERSION_FALLBACK;
-		const version = (await response.json()).tag_name;
-		if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
-		return VSCODE_VERSION_FALLBACK;
-	} catch {
-		return VSCODE_VERSION_FALLBACK;
-	} finally {
-		clearTimeout(timeout);
-	}
-}
 //#endregion
 //#region src/lib/sanitize-system-reminder.ts
 /**
@@ -2009,63 +1927,162 @@ function getErrorMessage(error, fallback = "Unknown error") {
 }
 //#endregion
-//#region src/lib/token/copilot-client.ts
-/** Copilot API client — token and usage */
-const getCopilotToken = async () => {
-	const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/v2/token`, { headers: {
-		...githubHeaders(state),
-		"x-github-api-version": COPILOT_INTERNAL_API_VERSION
-	} });
-	if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot token", response);
-	return await response.json();
-};
-const getCopilotUsage = async () => {
-	const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, { headers: {
-		...githubHeaders(state),
-		"x-github-api-version": COPILOT_INTERNAL_API_VERSION
-	} });
-	if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot usage", response);
-	return await response.json();
-};
-//#endregion
-//#region src/lib/token/copilot-token-manager.ts
+//#region src/lib/copilot-api.ts
+const standardHeaders = () => ({
+	"content-type": "application/json",
+	accept: "application/json"
+});
+const COPILOT_VERSION = "0.38.0";
+const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
+const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
+/** Copilot Chat API version (for chat/completions requests) */
+const COPILOT_API_VERSION = "2025-05-01";
+/** Copilot internal API version (for token & usage endpoints) */
+const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
+/** GitHub public API version (for /user, repos, etc.) */
+const GITHUB_API_VERSION = "2022-11-28";
 /**
-* Manages Copilot token lifecycle including automatic refresh.
-* Depends on GitHubTokenManager for authentication.
-*
-* All refresh paths (scheduled + on-demand via 401) go through `refresh()`,
-* which deduplicates concurrent callers and reschedules the next refresh based
-* on the server's `refresh_in` value.
+* Session-level interaction ID.
+* Used to correlate all requests within a single server session.
+* Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
 */
-var CopilotTokenManager = class {
-	githubTokenManager;
-	currentToken = null;
-	refreshTimeout = null;
-	minRefreshIntervalMs;
-	maxRetries;
-	/** Shared promise to prevent concurrent refresh attempts */
-	refreshInFlight = null;
-	constructor(options) {
-		this.githubTokenManager = options.githubTokenManager;
-		this.minRefreshIntervalMs = (options.minRefreshIntervalSeconds ?? 60) * 1e3;
-		this.maxRetries = options.maxRetries ?? 3;
-	}
-	/**
-	* Get the current Copilot token info.
-	*/
-	getCurrentToken() {
-		return this.currentToken;
-	}
-	/**
-	* Initialize the Copilot token and start automatic refresh.
-	*/
-	async initialize() {
-		const tokenInfo = await this.fetchCopilotToken();
-		state.copilotToken = tokenInfo.token;
-		consola.debug("GitHub Copilot Token fetched successfully!");
-		this.scheduleRefresh(tokenInfo.refreshIn);
-		return tokenInfo;
+const INTERACTION_ID = randomUUID();
+const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
+const copilotHeaders = (state, opts) => {
+	const headers = {
+		Authorization: `Bearer ${state.copilotToken}`,
+		"content-type": standardHeaders()["content-type"],
+		"copilot-integration-id": "vscode-chat",
+		"editor-version": `vscode/${state.vsCodeVersion}`,
+		"editor-plugin-version": EDITOR_PLUGIN_VERSION,
+		"user-agent": USER_AGENT,
+		"openai-intent": opts?.intent ?? "conversation-panel",
+		"x-github-api-version": COPILOT_API_VERSION,
+		"x-request-id": randomUUID(),
+		"X-Interaction-Id": INTERACTION_ID,
+		"x-vscode-user-agent-library-version": "electron-fetch"
+	};
+	if (opts?.vision) headers["copilot-vision-request"] = "true";
+	if (opts?.modelRequestHeaders) {
+		const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
+		for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
+	}
+	return headers;
+};
+const GITHUB_API_BASE_URL = "https://api.github.com";
+const githubHeaders = (state) => ({
+	...standardHeaders(),
+	authorization: `token ${state.githubToken}`,
+	"editor-version": `vscode/${state.vsCodeVersion}`,
+	"editor-plugin-version": EDITOR_PLUGIN_VERSION,
+	"user-agent": USER_AGENT,
+	"x-github-api-version": GITHUB_API_VERSION,
+	"x-vscode-user-agent-library-version": "electron-fetch"
+});
+const GITHUB_BASE_URL = "https://github.com";
+const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
+const GITHUB_APP_SCOPES = ["read:user"].join(" ");
+/** Fallback VSCode version when GitHub API is unavailable */
+const VSCODE_VERSION_FALLBACK = "1.104.3";
+/** GitHub API endpoint for latest VSCode release */
+const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
+/** Fetch the latest VSCode version and cache in global state */
+async function cacheVSCodeVersion() {
+	const response = await getVSCodeVersion();
+	state.vsCodeVersion = response;
+	consola.info(`Using VSCode version: ${response}`);
+}
+/** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
+async function getVSCodeVersion() {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => {
+		controller.abort();
+	}, 5e3);
+	try {
+		const response = await fetch(VSCODE_RELEASE_URL, {
+			signal: controller.signal,
+			headers: {
+				Accept: "application/vnd.github.v3+json",
+				"User-Agent": "copilot-api"
+			}
+		});
+		if (!response.ok) return VSCODE_VERSION_FALLBACK;
+		const version = (await response.json()).tag_name;
+		if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
+		return VSCODE_VERSION_FALLBACK;
+	} catch {
+		return VSCODE_VERSION_FALLBACK;
+	} finally {
+		clearTimeout(timeout);
+	}
+}
+//#endregion
+//#region src/lib/token/copilot-client.ts
+/** Copilot API client — token and usage */
+const getCopilotToken = async () => {
+	const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/v2/token`, {
+		headers: {
+			...githubHeaders(state),
+			"x-github-api-version": COPILOT_INTERNAL_API_VERSION
+		},
+		signal: AbortSignal.timeout(15e3)
+	});
+	if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot token", response);
+	return await response.json();
+};
+const getCopilotUsage = async () => {
+	const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, {
+		headers: {
+			...githubHeaders(state),
+			"x-github-api-version": COPILOT_INTERNAL_API_VERSION
+		},
+		signal: AbortSignal.timeout(15e3)
+	});
+	if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot usage", response);
+	return await response.json();
+};
+//#endregion
+//#region src/lib/token/copilot-token-manager.ts
+/**
+* Manages Copilot token lifecycle including automatic refresh.
+* Depends on GitHubTokenManager for authentication.
+*
+* All refresh paths (scheduled + on-demand via 401) go through `refresh()`,
+* which deduplicates concurrent callers and reschedules the next refresh based
+* on the server's `refresh_in` value.
+*/
+var CopilotTokenManager = class {
+	githubTokenManager;
+	currentToken = null;
+	refreshTimeout = null;
+	minRefreshIntervalMs;
+	maxRetries;
+	/** Shared promise to prevent concurrent refresh attempts */
+	refreshInFlight = null;
+	/** Set when a refresh attempt fails; cleared on next success */
+	_refreshNeeded = false;
+	constructor(options) {
+		this.githubTokenManager = options.githubTokenManager;
+		this.minRefreshIntervalMs = (options.minRefreshIntervalSeconds ?? 60) * 1e3;
+		this.maxRetries = options.maxRetries ?? 3;
+	}
+	/**
+	* Get the current Copilot token info.
+	*/
+	getCurrentToken() {
+		return this.currentToken;
+	}
+	/**
+	* Initialize the Copilot token and start automatic refresh.
+	*/
+	async initialize() {
+		const tokenInfo = await this.fetchCopilotToken();
+		state.copilotToken = tokenInfo.token;
+		consola.debug("GitHub Copilot Token fetched successfully!");
+		this.scheduleRefresh(tokenInfo.refreshIn);
+		return tokenInfo;
 	}
 	/**
 	* Fetch a new Copilot token from the API.
@@ -2100,10 +2117,12 @@ var CopilotTokenManager = class {
 				}
 			}
 			const delay = Math.min(1e3 * 2 ** attempt, 3e4);
-			consola.warn(`Token refresh attempt ${attempt + 1}/${this.maxRetries} failed, retrying in ${delay}ms`);
+			const reason = error instanceof Error ? formatErrorWithCause(error) : String(error);
+			consola.warn(`Token refresh attempt ${attempt + 1}/${this.maxRetries} failed: ${reason}, retrying in ${delay}ms`);
 			await new Promise((resolve) => setTimeout(resolve, delay));
 		}
-		consola.error("All token refresh attempts failed:", lastError);
+		const reason = lastError instanceof Error ? formatErrorWithCause(lastError) : String(lastError);
+		consola.error(`All token refresh attempts failed: ${reason}`);
 		return null;
 	}
 	/**
@@ -2166,10 +2185,12 @@ var CopilotTokenManager = class {
 		}
 		this.refreshInFlight = this.fetchTokenWithRetry().then((tokenInfo) => {
 			if (tokenInfo) {
+				this._refreshNeeded = false;
 				state.copilotToken = tokenInfo.token;
 				this.scheduleRefresh(tokenInfo.refreshIn);
 				consola.verbose(`[CopilotToken] Token refreshed (next refresh_in=${tokenInfo.refreshIn}s)`);
 			} else {
+				this._refreshNeeded = true;
 				consola.error("[CopilotToken] Token refresh failed, keeping existing token");
 				this.scheduleRefresh(300);
 			}
@@ -2180,6 +2201,16 @@ var CopilotTokenManager = class {
 		return this.refreshInFlight;
 	}
 	/**
+	* Proactively ensure the token is valid before sending a request.
+	* Triggers a refresh if the token is expired/expiring or the last
+	* refresh attempt failed. Concurrent callers share the same in-flight
+	* refresh via `refresh()`.
+	*/
+	async ensureValidToken() {
+		if (!this.isExpiredOrExpiring() && !this._refreshNeeded) return;
+		await this.refresh();
+	}
+	/**
 	* Check if the current token is expired or about to expire.
 	*/
 	isExpiredOrExpiring(marginSeconds = 60) {
@@ -2651,6 +2682,14 @@ function getCopilotTokenManager() {
 function stopTokenRefresh() {
 	copilotTokenManager?.stopAutoRefresh();
 }
+/**
+* Proactively ensure the Copilot token is valid.
+* Triggers a refresh if the token is expired/expiring or the last
+* background refresh failed. No-op if the manager is not initialized.
+*/
+async function ensureValidCopilotToken() {
+	await copilotTokenManager?.ensureValidToken();
+}
 //#endregion
 //#region src/auth.ts
@@ -2760,6 +2799,15 @@ const checkUsage = defineCommand({
 function createFetchSignal() {
 	return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
 }
+/**
+* Populate a HeadersCapture object with request and response headers.
+* Should be called immediately after fetch(), before !response.ok check,
+* so headers are captured even for error responses.
+*/
+function captureHttpHeaders(capture, requestHeaders, response) {
+	capture.request = { ...requestHeaders };
+	capture.response = Object.fromEntries(response.headers.entries());
+}
 //#endregion
 //#region src/lib/models/client.ts
@@ -3444,6 +3492,7 @@ function createRequestContext(opts) {
 	let _pipelineInfo = null;
 	let _preprocessInfo = null;
 	let _sseEvents = null;
+	let _httpHeaders = null;
 	const _sanitizationHistory = [];
 	let _queueWaitMs = 0;
 	const _attempts = [];
@@ -3480,6 +3529,9 @@ function createRequestContext(opts) {
 		get preprocessInfo() {
 			return _preprocessInfo;
 		},
+		get httpHeaders() {
+			return _httpHeaders;
+		},
 		get attempts() {
 			return _attempts;
 		},
@@ -3518,6 +3570,12 @@ function createRequestContext(opts) {
 		setSseEvents(events) {
 			_sseEvents = events.length > 0 ? events : null;
 		},
+		setHttpHeaders(capture) {
+			if (capture.request && capture.response) _httpHeaders = {
+				request: capture.request,
+				response: capture.response
+			};
+		},
 		beginAttempt(attemptOpts) {
 			const attempt = {
 				index: _attempts.length,
@@ -3648,6 +3706,7 @@ function createRequestContext(opts) {
 			if (lastTruncation) entry.truncation = lastTruncation;
 			if (_pipelineInfo) entry.pipelineInfo = _pipelineInfo;
 			if (_sseEvents) entry.sseEvents = _sseEvents;
+			if (_httpHeaders) entry.httpHeaders = _httpHeaders;
 			if (_attempts.length > 1) entry.attempts = _attempts.map((a) => ({
 				index: a.index,
 				strategy: a.strategy,
@@ -4723,7 +4782,7 @@ const setupClaudeCode = defineCommand({
 //#endregion
 //#region package.json
-var version = "0.7.20";
+var version = "0.7.22";
 //#endregion
 //#region src/lib/context/error-persistence.ts
@@ -4840,7 +4899,8 @@ function handleHistoryEvent(event) {
 			updateEntry(entryData.id, {
 				response,
 				durationMs: entryData.durationMs,
-				sseEvents: entryData.sseEvents
+				sseEvents: entryData.sseEvents,
+				httpHeaders: entryData.httpHeaders
 			});
 			break;
 		}
@@ -4957,6 +5017,22 @@ function isEndpointSupported(model, endpoint) {
 	return model.supported_endpoints.includes(endpoint);
 }
+//#endregion
+//#region src/lib/ws.ts
+/** Create a shared WebSocket adapter for the given Hono app */
+async function createWebSocketAdapter(app) {
+	if (typeof globalThis.Bun !== "undefined") {
+		const { upgradeWebSocket } = await import("hono/bun");
+		return { upgradeWebSocket };
+	}
+	const { createNodeWebSocket } = await import("@hono/node-ws");
+	const nodeWs = createNodeWebSocket({ app });
+	return {
+		upgradeWebSocket: nodeWs.upgradeWebSocket,
+		injectWebSocket: (server) => nodeWs.injectWebSocket(server)
+	};
+}
 //#endregion
 //#region src/routes/history/api.ts
 function handleGetEntries(c) {
@@ -5066,25 +5142,12 @@ historyRoutes.get("/api/sessions/:id", handleGetSession);
 historyRoutes.delete("/api/sessions/:id", handleDeleteSession);
 /**
 * Initialize WebSocket support for history real-time updates.
-* Registers the /ws route on historyRoutes using the appropriate WebSocket
-* adapter for the current runtime (hono/bun for Bun, @hono/node-ws for Node.js).
+* Registers the /history/ws route on the root app using the shared WebSocket adapter.
 *
-* @param rootApp - The root Hono app instance (needed by @hono/node-ws to match upgrade requests)
-* @returns An `injectWebSocket` function that must be called with the Node.js HTTP server
-* after the server is created. Returns `undefined` under Bun (no injection needed).
+* @param rootApp - The root Hono app instance
+* @param upgradeWs - Shared WebSocket upgrade function from createWebSocketAdapter
 */
-async function initHistoryWebSocket(rootApp) {
-	let upgradeWs;
-	let injectFn;
-	if (typeof globalThis.Bun !== "undefined") {
-		const { upgradeWebSocket } = await import("hono/bun");
-		upgradeWs = upgradeWebSocket;
-	} else {
-		const { createNodeWebSocket } = await import("@hono/node-ws");
-		const nodeWs = createNodeWebSocket({ app: rootApp });
-		upgradeWs = nodeWs.upgradeWebSocket;
-		injectFn = (server) => nodeWs.injectWebSocket(server);
-	}
+function initHistoryWebSocket(rootApp, upgradeWs) {
 	rootApp.get("/history/ws", upgradeWs(() => ({
 		onOpen(_event, ws) {
 			addClient(ws.raw);
@@ -5098,7 +5161,6 @@ async function initHistoryWebSocket(rootApp) {
 			removeClient(ws.raw);
 		}
 	})));
-	return injectFn;
 }
 /**
 * Resolve a UI directory that exists at runtime.
@@ -5831,6 +5893,7 @@ const createResponses = async (payload, opts) => {
 		body: JSON.stringify(payload),
 		signal: fetchSignal
 	});
+	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
 	if (!response.ok) {
 		consola.error("Failed to create responses", response);
 		throw await HTTPError.fromResponse("Failed to create responses", response, payload.model);
@@ -5947,15 +6010,20 @@ function createTokenRefreshStrategy() {
 * centralizes that configuration to avoid duplication.
 */
 /** Create the FormatAdapter for Responses API pipeline execution */
-function createResponsesAdapter(selectedModel) {
+function createResponsesAdapter(selectedModel, headersCapture) {
 	return {
 		format: "openai-responses",
-		sanitize: (p) => ({
-			payload: p,
-			blocksRemoved: 0,
-			systemReminderRemovals: 0
-		}),
-		execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, { resolvedModel: selectedModel })),
+		sanitize: (p) => {
+			return {
+				payload: state.normalizeResponsesCallIds ? normalizeCallIds(p) : p,
+				blocksRemoved: 0,
+				systemReminderRemovals: 0
+			};
+		},
+		execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, {
+			resolvedModel: selectedModel,
+			headersCapture
+		})),
 		logPayloadSize: (p) => {
 			const count = typeof p.input === "string" ? 1 : p.input.length;
 			consola.debug(`Responses payload: ${count} input item(s), model: ${p.model}`);
@@ -5966,6 +6034,36 @@ function createResponsesAdapter(selectedModel) {
 function createResponsesStrategies() {
 	return [createNetworkRetryStrategy(), createTokenRefreshStrategy()];
 }
+const CALL_PREFIX = "call_";
+const FC_PREFIX = "fc_";
+/**
+* Normalize function call IDs in Responses API input.
+* Converts Chat Completions format `call_xxx` IDs to Responses format `fc_xxx` IDs
+* on `function_call` and `function_call_output` items.
+*/
+function normalizeCallIds(payload) {
+	if (typeof payload.input === "string") return payload;
+	let count = 0;
+	const normalizedInput = payload.input.map((item) => {
+		if (item.type !== "function_call" && item.type !== "function_call_output") return item;
+		const newItem = { ...item };
+		if (newItem.id?.startsWith(CALL_PREFIX)) {
+			newItem.id = FC_PREFIX + newItem.id.slice(5);
+			count++;
+		}
+		if (newItem.call_id?.startsWith(CALL_PREFIX)) {
+			newItem.call_id = FC_PREFIX + newItem.call_id.slice(5);
+			count++;
+		}
+		return newItem;
+	});
+	if (count === 0) return payload;
+	consola.debug(`[responses] Normalized ${count} call ID(s) (call_ → fc_)`);
+	return {
+		...payload,
+		input: normalizedInput
+	};
+}
 //#endregion
 //#region src/routes/responses/ws.ts
@@ -6047,17 +6145,20 @@ async function handleResponseCreate(ws, payload) {
 		model: resolvedModel,
 		clientModel: requestedModel
 	});
-	const adapter = createResponsesAdapter(selectedModel);
+	const headersCapture = {};
+	const adapter = createResponsesAdapter(selectedModel, headersCapture);
 	const strategies = createResponsesStrategies();
 	try {
-		const iterator = (await executeRequestPipeline({
+		const pipelineResult = await executeRequestPipeline({
 			adapter,
 			strategies,
 			payload,
 			originalPayload: payload,
 			model: selectedModel,
 			maxRetries: 1
-		})).response[Symbol.asyncIterator]();
+		});
+		reqCtx.setHttpHeaders(headersCapture);
+		const iterator = pipelineResult.response[Symbol.asyncIterator]();
 		const acc = createResponsesStreamAccumulator();
 		const idleTimeoutMs = state.streamIdleTimeout > 0 ? state.streamIdleTimeout * 1e3 : 0;
 		const shutdownSignal = getShutdownSignal();
@@ -6085,6 +6186,7 @@ async function handleResponseCreate(ws, payload) {
 		reqCtx.complete(responseData);
 		ws.close(1e3, "done");
 	} catch (error) {
+		reqCtx.setHttpHeaders(headersCapture);
 		reqCtx.fail(resolvedModel, error);
 		const message = error instanceof Error ? error.message : String(error);
 		consola.error(`[WS] Responses API error: ${message}`);
@@ -6095,25 +6197,13 @@ async function handleResponseCreate(ws, payload) {
 * Initialize WebSocket routes for the Responses API.
 *
 * Registers GET /v1/responses and GET /responses on the root Hono app
-* with WebSocket upgrade handling. Follows the same pattern as
-* initHistoryWebSocket in src/routes/history/route.ts.
+* with WebSocket upgrade handling. Uses the shared WebSocket adapter
+* to avoid multiple upgrade listeners on the same HTTP server.
 *
-* @returns An inject function for Node.js HTTP server (undefined for Bun)
+* @param rootApp - The root Hono app instance
+* @param upgradeWs - Shared WebSocket upgrade function from createWebSocketAdapter
 */
-async function initResponsesWebSocket(rootApp) {
-	let upgradeWs;
-	let injectFn;
-	if (typeof globalThis.Bun !== "undefined") {
-		const { upgradeWebSocket } = await import("hono/bun");
-		upgradeWs = upgradeWebSocket;
-	} else {
-		const { createNodeWebSocket } = await import("@hono/node-ws");
-		const nodeWs = createNodeWebSocket({ app: rootApp });
-		upgradeWs = nodeWs.upgradeWebSocket;
-		injectFn = (server) => {
-			nodeWs.injectWebSocket(server);
-		};
-	}
+function initResponsesWebSocket(rootApp, upgradeWs) {
 	const wsHandler = upgradeWs(() => ({
 		onOpen(_event, _ws) {
 			consola.debug("[WS] Responses API WebSocket connected");
@@ -6147,7 +6237,6 @@ async function initResponsesWebSocket(rootApp) {
 	rootApp.get("/v1/responses", wsHandler);
 	rootApp.get("/responses", wsHandler);
 	consola.debug("[WS] Responses API WebSocket routes registered");
-	return injectFn;
 }
 //#endregion
@@ -6885,6 +6974,7 @@ const createChatCompletions = async (payload, opts) => {
 		body: JSON.stringify(payload),
 		signal: fetchSignal
 	});
+	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
 	if (!response.ok) {
 		consola.error("Failed to create chat completions", response);
 		throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
@@ -6984,14 +7074,14 @@ function sanitizeOpenAIMessages(payload) {
 			content: filtered
 		};
 	});
-	const removedCount = originalCount - messages.length;
-	if (removedCount > 0) consola.info(`[Sanitizer:OpenAI] Filtered ${removedCount} orphaned tool messages`);
+	const blocksRemoved = originalCount - messages.length;
+	if (blocksRemoved > 0) consola.info(`[Sanitizer:OpenAI] Filtered ${blocksRemoved} orphaned tool messages`);
 	return {
 		payload: {
 			...payload,
 			messages: allMessages
 		},
-		blocksRemoved: removedCount,
+		blocksRemoved,
 		systemReminderRemovals
 	};
 }
@@ -7268,10 +7358,14 @@ async function handleChatCompletion(c) {
 */
 async function executeRequest(opts) {
 	const { c, payload, originalPayload, selectedModel, reqCtx } = opts;
+	const headersCapture = {};
 	const adapter = {
 		format: "openai-chat-completions",
 		sanitize: (p) => sanitizeOpenAIMessages(p),
-		execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, { resolvedModel: selectedModel })),
+		execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
+			resolvedModel: selectedModel,
+			headersCapture
+		})),
 		logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
 	};
 	const strategies = [
@@ -7286,7 +7380,7 @@ async function executeRequest(opts) {
 	];
 	let truncateResult;
 	try {
-		const response = (await executeRequestPipeline({
+		const result = await executeRequestPipeline({
 			adapter,
 			strategies,
 			payload,
@@ -7299,7 +7393,9 @@ async function executeRequest(opts) {
 				if (retryTruncateResult) truncateResult = retryTruncateResult;
 				if (reqCtx.tuiLogId) tuiLogger.updateRequest(reqCtx.tuiLogId, { tags: ["truncated", `retry-${attempt + 1}`] });
 			}
-		})).response;
+		});
+		reqCtx.setHttpHeaders(headersCapture);
+		const response = result.response;
 		if (isNonStreaming(response)) return handleNonStreamingResponse(c, response, reqCtx, truncateResult);
 		consola.debug("Streaming response");
 		reqCtx.transition("streaming");
@@ -7316,6 +7412,7 @@ async function executeRequest(opts) {
 			});
 		});
 	} catch (error) {
+		reqCtx.setHttpHeaders(headersCapture);
 		reqCtx.fail(payload.model, error);
 		throw error;
 	}
@@ -8718,6 +8815,14 @@ function modelSupportsContextEditing(modelId) {
 	return normalized.startsWith("claude-haiku-4-5") || normalized.startsWith("claude-sonnet-4-5") || normalized.startsWith("claude-sonnet-4") || normalized.startsWith("claude-opus-4-5") || normalized.startsWith("claude-opus-4-6") || normalized.startsWith("claude-opus-4-1") || normalized.startsWith("claude-opus-4");
 }
 /**
+* Check if context editing is enabled for a model.
+* Requires both model support AND config mode != 'off'.
+* Mirrors VSCode Copilot Chat's isAnthropicContextEditingEnabled().
+*/
+function isContextEditingEnabled(modelId) {
+	return modelSupportsContextEditing(modelId) && state.contextEditingMode !== "off";
+}
+/**
 * Tool search is supported by:
 * - Claude Opus 4.5/4.6
 */
@@ -8756,7 +8861,7 @@ function buildAnthropicBetaHeaders(modelId, resolvedModel) {
 	const headers = {};
 	const betaFeatures = [];
 	if (!modelHasAdaptiveThinking(resolvedModel)) betaFeatures.push("interleaved-thinking-2025-05-14");
-	if (modelSupportsContextEditing(modelId)) betaFeatures.push("context-management-2025-06-27");
+	if (isContextEditingEnabled(modelId)) betaFeatures.push("context-management-2025-06-27");
 	if (modelSupportsToolSearch(modelId)) betaFeatures.push("advanced-tool-use-2025-11-20");
 	if (betaFeatures.length > 0) headers["anthropic-beta"] = betaFeatures.join(",");
 	return headers;
@@ -8767,22 +8872,28 @@ function buildAnthropicBetaHeaders(modelId, resolvedModel) {
 * From anthropic.ts:270-329 (buildContextManagement + getContextManagementFromConfig):
 * - clear_thinking: keep last N thinking turns
 * - clear_tool_uses: triggered by input_tokens threshold, keep last N tool uses
+*
+* Only builds edits matching the requested mode:
+* - "off" → undefined (no context management)
+* - "clear-thinking" → clear_thinking only (if thinking is enabled)
+* - "clear-tooluse" → clear_tool_uses only
+* - "clear-both" → both edits
 */
-function buildContextManagement(modelId, hasThinking) {
-	if (!modelSupportsContextEditing(modelId)) return;
+function buildContextManagement(mode, hasThinking) {
+	if (mode === "off") return;
 	const triggerType = "input_tokens";
 	const triggerValue = 1e5;
 	const keepCount = 3;
 	const thinkingKeepTurns = 1;
 	const edits = [];
-	if (hasThinking) edits.push({
+	if ((mode === "clear-thinking" || mode === "clear-both") && hasThinking) edits.push({
 		type: "clear_thinking_20251015",
 		keep: {
 			type: "thinking_turns",
 			value: Math.max(1, thinkingKeepTurns)
 		}
 	});
-	edits.push({
+	if (mode === "clear-tooluse" || mode === "clear-both") edits.push({
 		type: "clear_tool_uses_20250919",
 		trigger: {
 			type: triggerType,
@@ -8793,7 +8904,7 @@ function buildContextManagement(modelId, hasThinking) {
 			value: keepCount
 		}
 	});
-	return { edits };
+	return edits.length > 0 ? { edits } : void 0;
 }
 //#endregion
@@ -9108,8 +9219,9 @@ async function createAnthropicMessages(payload, opts) {
 		"anthropic-version": "2023-06-01",
 		...buildAnthropicBetaHeaders(model, opts?.resolvedModel)
 	};
-	if (!wire.context_management) {
-		const contextManagement = buildContextManagement(model, Boolean(thinking && thinking.type !== "disabled"));
+	if (!wire.context_management && isContextEditingEnabled(model)) {
+		const hasThinking = Boolean(thinking && thinking.type !== "disabled");
+		const contextManagement = buildContextManagement(state.contextEditingMode, hasThinking);
 		if (contextManagement) {
 			wire.context_management = contextManagement;
 			consola.debug("[DirectAnthropic] Added context_management:", JSON.stringify(contextManagement));
@@ -9123,6 +9235,7 @@ async function createAnthropicMessages(payload, opts) {
 		body: JSON.stringify(wire),
 		signal: fetchSignal
 	});
+	if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
 	if (!response.ok) {
 		consola.debug("Request failed:", {
 			model,
@@ -9138,6 +9251,161 @@ async function createAnthropicMessages(payload, opts) {
 	return await response.json();
 }
+//#endregion
+//#region src/lib/anthropic/message-mapping.ts
+/**
+* Check if two messages likely correspond to the same original message.
+* Used by buildMessageMapping to handle cases where sanitization removes
+* content blocks within a message (changing its shape) or removes entire messages.
+*/
+function messagesMatch(orig, rewritten) {
+	if (orig.role !== rewritten.role) return false;
+	if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
+	const origBlocks = Array.isArray(orig.content) ? orig.content : [];
+	const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
+	if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
+	const ob = origBlocks[0];
+	const rb = rwBlocks[0];
+	if (ob.type !== rb.type) return false;
+	if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
+	if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
+	return true;
+}
+/**
+* Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
+* Uses a two-pointer approach since rewritten messages maintain the same relative
+* order as originals (all transformations are deletions, never reorderings).
+*/
+function buildMessageMapping(original, rewritten) {
+	const mapping = [];
+	let origIdx = 0;
+	for (const element of rewritten) while (origIdx < original.length) {
+		if (messagesMatch(original[origIdx], element)) {
+			mapping.push(origIdx);
+			origIdx++;
+			break;
+		}
+		origIdx++;
+	}
+	while (mapping.length < rewritten.length) mapping.push(-1);
+	return mapping;
+}
+//#endregion
+//#region src/lib/anthropic/server-tool-filter.ts
+/**
+* Server tool block filter for Anthropic SSE streams and non-streaming responses.
+*
+* Always active — matching vscode-copilot-chat behavior, which intercepts
+* server_tool_use and *_tool_result blocks unconditionally. These are server-side
+* artifacts (e.g. tool_search injected by copilot-api, web_search) that clients
+* don't expect and most SDKs can't validate.
+*
+* Also provides logging for server tool blocks (called before filtering,
+* so information is never lost even when blocks are stripped).
+*/
+/** Check if a block type is a server-side tool result (ends with _tool_result, but not plain tool_result) */
+function isServerToolResultType(type) {
+	return type !== "tool_result" && type.endsWith("_tool_result");
+}
+/**
+* Check if a content block is a server-side tool block.
+* Matches `server_tool_use` (any name) and all server tool result types
+* (web_search_tool_result, tool_search_tool_result, code_execution_tool_result, etc.).
+*/
+function isServerToolBlock(block) {
+	if (block.type === "server_tool_use") return true;
+	return isServerToolResultType(block.type);
+}
+/**
+* Log a single server tool block (server_tool_use or *_tool_result).
+* No-op for non-server-tool blocks — safe to call unconditionally.
+*
+* Called before filtering, so information is never lost even when blocks are stripped.
+*/
+function logServerToolBlock(block) {
+	if (block.type === "server_tool_use") {
+		consola.debug(`[ServerTool] server_tool_use: ${block.name}`);
+		return;
+	}
+	if (!isServerToolResultType(block.type)) return;
+	const content = block.content;
+	if (!content) return;
+	const contentType = content.type;
+	if (contentType === "tool_search_tool_search_result") {
+		const toolNames = content.tool_references?.map((r) => r.tool_name).filter(Boolean) ?? [];
+		consola.debug(`[ServerTool] tool_search result: discovered ${toolNames.length} tools${toolNames.length > 0 ? ` [${toolNames.join(", ")}]` : ""}`);
+	} else if (contentType === "tool_search_tool_result_error") consola.warn(`[ServerTool] tool_search error: ${content.error_code}`);
+	else consola.debug(`[ServerTool] ${block.type}: ${contentType ?? "unknown"}`);
+}
+/**
+* Log all server tool blocks from a non-streaming response content array.
+* Must be called before filterServerToolBlocksFromResponse() to preserve info.
+*/
+function logServerToolBlocks(content) {
+	for (const block of content) logServerToolBlock(block);
+}
+/**
+* Filters server tool blocks from the SSE stream before forwarding to the client.
+* Handles index remapping so block indices remain dense/sequential after filtering.
+*
+* Always active — matching vscode-copilot-chat behavior, which intercepts
+* server_tool_use and *_tool_result blocks unconditionally. These are server-side
+* artifacts (e.g. tool_search injected by copilot-api, web_search) that clients
+* don't expect and most SDKs can't validate.
+*/
+function createServerToolBlockFilter() {
+	const filteredIndices = /* @__PURE__ */ new Set();
+	const clientIndexMap = /* @__PURE__ */ new Map();
+	let nextClientIndex = 0;
+	function getClientIndex(apiIndex) {
+		let idx = clientIndexMap.get(apiIndex);
+		if (idx === void 0) {
+			idx = nextClientIndex++;
+			clientIndexMap.set(apiIndex, idx);
+		}
+		return idx;
+	}
+	return { rewriteEvent(parsed, rawData) {
+		if (!parsed) return rawData;
+		if (parsed.type === "content_block_start") {
+			const block = parsed.content_block;
+			if (isServerToolBlock(block)) {
+				filteredIndices.add(parsed.index);
+				return null;
+			}
+			if (filteredIndices.size === 0) {
+				getClientIndex(parsed.index);
+				return rawData;
+			}
+			const clientIndex = getClientIndex(parsed.index);
+			if (clientIndex === parsed.index) return rawData;
+			const obj = JSON.parse(rawData);
+			obj.index = clientIndex;
+			return JSON.stringify(obj);
+		}
+		if (parsed.type === "content_block_delta" || parsed.type === "content_block_stop") {
+			if (filteredIndices.has(parsed.index)) return null;
+			if (filteredIndices.size === 0) return rawData;
+			const clientIndex = getClientIndex(parsed.index);
+			if (clientIndex === parsed.index) return rawData;
+			const obj = JSON.parse(rawData);
+			obj.index = clientIndex;
+			return JSON.stringify(obj);
+		}
+		return rawData;
+	} };
+}
+/** Filter server tool blocks from a non-streaming response */
+function filterServerToolBlocksFromResponse(response) {
+	const filtered = response.content.filter((block) => !isServerToolBlock(block));
+	if (filtered.length === response.content.length) return response;
+	return {
+		...response,
+		content: filtered
+	};
+}
 //#endregion
 //#region src/lib/anthropic/stream-accumulator.ts
 /**
@@ -9260,10 +9528,6 @@ function handleContentBlockStart(index, block, acc) {
 	}
 	acc.contentBlocks[index] = newBlock;
 }
-/** Check if a block type is a server-side tool result (ends with _tool_result, but not plain tool_result) */
-function isServerToolResultType(type) {
-	return type !== "tool_result" && type.endsWith("_tool_result");
-}
 function handleContentBlockDelta(index, delta, acc, copilotAnnotations) {
 	const block = acc.contentBlocks[index];
 	if (!block) return;
@@ -9313,7 +9577,7 @@ function handleMessageDelta(delta, usage, acc) {
 }
 //#endregion
-//#region src/lib/anthropic/handlers.ts
+//#region src/lib/anthropic/sse.ts
 /**
 * Check if a model supports direct Anthropic API.
 * Returns a decision with reason so callers can log/display the routing rationale.
@@ -9374,46 +9638,6 @@ async function* processAnthropicStream(response, acc, clientAbortSignal) {
 	}
 }
-//#endregion
-//#region src/lib/anthropic/message-mapping.ts
-/**
-* Check if two messages likely correspond to the same original message.
-* Used by buildMessageMapping to handle cases where sanitization removes
-* content blocks within a message (changing its shape) or removes entire messages.
-*/
-function messagesMatch(orig, rewritten) {
-	if (orig.role !== rewritten.role) return false;
-	if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
-	const origBlocks = Array.isArray(orig.content) ? orig.content : [];
-	const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
-	if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
-	const ob = origBlocks[0];
-	const rb = rwBlocks[0];
-	if (ob.type !== rb.type) return false;
-	if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
-	if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
-	return true;
-}
-/**
-* Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
-* Uses a two-pointer approach since rewritten messages maintain the same relative
-* order as originals (all transformations are deletions, never reorderings).
-*/
-function buildMessageMapping(original, rewritten) {
-	const mapping = [];
-	let origIdx = 0;
-	for (const element of rewritten) while (origIdx < original.length) {
-		if (messagesMatch(original[origIdx], element)) {
-			mapping.push(origIdx);
-			origIdx++;
-			break;
-		}
-		origIdx++;
-	}
-	while (mapping.length < rewritten.length) mapping.push(-1);
-	return mapping;
-}
 //#endregion
 //#region src/lib/repetition-detector.ts
 /**
@@ -9704,10 +9928,14 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
 		if (initialSanitized.thinking && initialSanitized.thinking.type !== "disabled") tags.push(`thinking:${initialSanitized.thinking.type}`);
 		if (tags.length > 0) tuiLogger.updateRequest(reqCtx.tuiLogId, { tags });
 	}
+	const headersCapture = {};
 	const adapter = {
 		format: "anthropic-messages",
 		sanitize: (p) => sanitizeAnthropicMessages(preprocessTools(p)),
-		execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p, { resolvedModel: selectedModel })),
+		execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p, {
+			resolvedModel: selectedModel,
+			headersCapture
+		})),
 		logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
 	};
 	const strategies = [
@@ -9755,6 +9983,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
 				}
 			}
 		});
+		reqCtx.setHttpHeaders(headersCapture);
 		const response = result.response;
 		const effectivePayload = result.effectivePayload;
 		if (Symbol.asyncIterator in response) {
@@ -9774,6 +10003,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
 		}
 		return handleDirectAnthropicNonStreamingResponse(c, response, reqCtx, truncateResult);
 	} catch (error) {
+		reqCtx.setHttpHeaders(headersCapture);
 		reqCtx.fail(anthropicPayload.model, error);
 		throw error;
 	}
@@ -9789,7 +10019,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
 	let eventsIn = 0;
 	let currentBlockType = "";
 	let firstEventLogged = false;
-	const serverToolFilter = state.stripServerTools ? createServerToolBlockFilter() : null;
+	const serverToolFilter = createServerToolBlockFilter();
 	try {
 		for await (const { raw: rawEvent, parsed } of processAnthropicStream(response, acc, clientAbortSignal)) {
 			const dataLen = rawEvent.data?.length ?? 0;
@@ -9809,8 +10039,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
 				currentBlockType = parsed.content_block.type;
 				consola.debug(`[Stream] Block #${parsed.index} start: ${currentBlockType} at +${Date.now() - streamStartMs}ms`);
 				const block = parsed.content_block;
-				if (block.type === "server_tool_use") consola.debug(`[ServerTool] server_tool_use: ${block.name}`);
-				else if (block.type !== "tool_result" && block.type.endsWith("_tool_result")) logServerToolResult(block);
+				logServerToolBlock(block);
 			} else if (parsed?.type === "content_block_stop") {
 				const offset = Date.now() - streamStartMs;
 				consola.debug(`[Stream] Block #${parsed.index} stop (${currentBlockType}) at +${offset}ms, cumulative ↓${bytesIn}B ${eventsIn}ev`);
@@ -9825,7 +10054,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
 				const delta = parsed.delta;
 				if (delta.type === "text_delta" && delta.text) checkRepetition(delta.text);
 			}
-			const forwardData = serverToolFilter ? serverToolFilter.rewriteEvent(parsed, rawEvent.data ?? "") : rawEvent.data ?? "";
+			const forwardData = serverToolFilter.rewriteEvent(parsed, rawEvent.data ?? "");
 			if (forwardData === null) continue;
 			await stream.writeSSE({
 				data: forwardData,
@@ -9880,31 +10109,9 @@ function handleDirectAnthropicNonStreamingResponse(c, response, reqCtx, truncate
 	let finalResponse = response;
 	if (state.verbose && truncateResult?.wasTruncated) finalResponse = prependMarkerToResponse(response, createTruncationMarker$1(truncateResult));
 	logServerToolBlocks(finalResponse.content);
-	if (state.stripServerTools) finalResponse = filterServerToolBlocksFromResponse(finalResponse);
+	finalResponse = filterServerToolBlocksFromResponse(finalResponse);
 	return c.json(finalResponse);
 }
-/**
-* Log information extracted from a server tool result block.
-* Called before filtering, so information is never lost even when blocks are stripped.
-*/
-function logServerToolResult(block) {
-	const content = block.content;
-	if (!content) return;
-	const contentType = content.type;
-	if (contentType === "tool_search_tool_search_result") {
-		const toolNames = content.tool_references?.map((r) => r.tool_name).filter(Boolean) ?? [];
-		consola.debug(`[ServerTool] tool_search result: discovered ${toolNames.length} tools${toolNames.length > 0 ? ` [${toolNames.join(", ")}]` : ""}`);
-	} else if (contentType === "tool_search_tool_result_error") consola.warn(`[ServerTool] tool_search error: ${content.error_code}`);
-	else consola.debug(`[ServerTool] ${block.type}: ${contentType ?? "unknown"}`);
-}
-/**
-* Log server tool blocks from a non-streaming response.
-* Must be called before filterServerToolBlocksFromResponse() to preserve info.
-*/
-function logServerToolBlocks(content) {
-	for (const block of content) if (block.type === "server_tool_use") consola.debug(`[ServerTool] server_tool_use: ${block.name}`);
-	else if (block.type !== "tool_result" && block.type.endsWith("_tool_result")) logServerToolResult(block);
-}
 /** Convert SanitizationStats to the format expected by rewrites */
 function toSanitizationInfo(stats) {
 	return {
@@ -9916,75 +10123,6 @@ function toSanitizationInfo(stats) {
 		systemReminderRemovals: stats.systemReminderRemovals
 	};
 }
-/**
-* Check if a content block is a server-side tool block.
-* Matches `server_tool_use` (any name) and all server tool result types
-* (web_search_tool_result, tool_search_tool_result, code_execution_tool_result, etc.).
-*/
-function isServerToolBlock(block) {
-	if (block.type === "server_tool_use") return true;
-	return block.type !== "tool_result" && block.type.endsWith("_tool_result");
-}
-/**
-* Filters server tool blocks from the SSE stream before forwarding to the client.
-* Handles index remapping so block indices remain dense/sequential after filtering.
-*
-* Only active when stripServerTools is enabled — in that mode, server tools
-* were stripped from the request, so any server_tool_use blocks in the response
-* are unexpected artifacts. When disabled (default), server tool blocks are
-* transparently forwarded per Anthropic protocol.
-*/
-function createServerToolBlockFilter() {
-	const filteredIndices = /* @__PURE__ */ new Set();
-	const clientIndexMap = /* @__PURE__ */ new Map();
-	let nextClientIndex = 0;
-	function getClientIndex(apiIndex) {
-		let idx = clientIndexMap.get(apiIndex);
-		if (idx === void 0) {
-			idx = nextClientIndex++;
-			clientIndexMap.set(apiIndex, idx);
-		}
-		return idx;
-	}
-	return { rewriteEvent(parsed, rawData) {
-		if (!parsed) return rawData;
-		if (parsed.type === "content_block_start") {
-			const block = parsed.content_block;
-			if (isServerToolBlock(block)) {
-				filteredIndices.add(parsed.index);
-				return null;
-			}
-			if (filteredIndices.size === 0) {
-				getClientIndex(parsed.index);
-				return rawData;
-			}
-			const clientIndex = getClientIndex(parsed.index);
-			if (clientIndex === parsed.index) return rawData;
-			const obj = JSON.parse(rawData);
-			obj.index = clientIndex;
-			return JSON.stringify(obj);
-		}
-		if (parsed.type === "content_block_delta" || parsed.type === "content_block_stop") {
-			if (filteredIndices.has(parsed.index)) return null;
-			if (filteredIndices.size === 0) return rawData;
-			const clientIndex = getClientIndex(parsed.index);
-			if (clientIndex === parsed.index) return rawData;
-			const obj = JSON.parse(rawData);
-			obj.index = clientIndex;
-			return JSON.stringify(obj);
-		}
-		return rawData;
-	} };
-}
-/** Filter server tool blocks from a non-streaming response */
-function filterServerToolBlocksFromResponse(response) {
-	const filtered = response.content.filter((block) => !isServerToolBlock(block));
-	if (filtered.length === response.content.length) return response;
-	return {
-		...response,
-		content: filtered
-	};
-}
 //#endregion
 //#region src/routes/messages/route.ts
@@ -10093,7 +10231,8 @@ async function handleResponses(c) {
 async function handleDirectResponses(opts) {
 	const { c, payload, reqCtx } = opts;
 	const selectedModel = state.modelIndex.get(payload.model);
-	const adapter = createResponsesAdapter(selectedModel);
+	const headersCapture = {};
+	const adapter = createResponsesAdapter(selectedModel, headersCapture);
 	const strategies = createResponsesStrategies();
 	try {
 		const pipelineResult = await executeRequestPipeline({
@@ -10105,6 +10244,7 @@ async function handleDirectResponses(opts) {
 			maxRetries: 1,
 			requestContext: reqCtx
 		});
+		reqCtx.setHttpHeaders(headersCapture);
 		const response = pipelineResult.response;
 		reqCtx.addQueueWaitMs(pipelineResult.queueWaitMs);
 		if (!payload.stream) {
@@ -10177,6 +10317,7 @@ async function handleDirectResponses(opts) {
 			}
 		});
 	} catch (error) {
+		reqCtx.setHttpHeaders(headersCapture);
 		reqCtx.fail(payload.model, error);
 		throw error;
 	}
@@ -10259,6 +10400,7 @@ server.notFound((c) => {
 });
 server.use(async (_c, next) => {
 	await applyConfigToState();
+	await ensureValidCopilotToken();
 	await next();
 });
 server.use(tuiMiddleware());
@@ -10343,6 +10485,7 @@ async function runServer(options) {
 	state.showGitHubToken = options.showGitHubToken;
 	state.autoTruncate = options.autoTruncate;
 	await ensurePaths();
+	consola.info(`Data directory: ${PATHS.APP_DIR}`);
 	const config = await applyConfigToState();
 	const proxyUrl = options.proxy ?? config.proxy;
 	initProxy({
@@ -10428,8 +10571,9 @@ async function runServer(options) {
 		if (runtime?.bun?.server) c.env = { server: runtime.bun.server };
 		await next();
 	});
-	const injectHistoryWs = await initHistoryWebSocket(server);
-	const injectResponsesWs = await initResponsesWebSocket(server);
+	const wsAdapter = await createWebSocketAdapter(server);
+	initHistoryWebSocket(server, wsAdapter.upgradeWebSocket);
+	initResponsesWebSocket(server, wsAdapter.upgradeWebSocket);
 	consola.box(`Web UI:\n🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage\n📜 History UI:   ${serverUrl}/history`);
 	const bunWebSocket = typeof globalThis.Bun !== "undefined" ? (await import("hono/bun")).websocket : void 0;
 	let serverInstance;
@@ -10451,13 +10595,9 @@ async function runServer(options) {
 	}
 	setServerInstance(serverInstance);
 	setupShutdownHandlers();
-	if (injectHistoryWs || injectResponsesWs) {
+	if (wsAdapter.injectWebSocket) {
 		const nodeServer = serverInstance.node?.server;
-		if (nodeServer && "on" in nodeServer) {
-			const ns = nodeServer;
-			injectHistoryWs?.(ns);
-			injectResponsesWs?.(ns);
-		}
+		if (nodeServer && "on" in nodeServer) wsAdapter.injectWebSocket(nodeServer);
 	}
 	await waitForShutdown();
 }