npm - @hsupu/copilot-api - Versions diffs - 0.7.8 → 0.7.10 - Mend

@hsupu/copilot-api 0.7.8 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/main.js CHANGED Viewed

@@ -46,7 +46,7 @@ const state = {
 	accountType: "individual",
 	manualApprove: false,
 	showToken: false,
-	autoCompact: false
+	autoCompact: true
 };
 //#endregion
@@ -558,15 +558,12 @@ const SUPPORTED_VERSIONS = {
 		min: "2.0.0",
 		max: "2.1.10"
 	},
-	v2b: {
-		min: "2.1.11",
-		max: "2.1.12"
-	}
+	v2b: { min: "2.1.11" }
 };
 const PATTERNS = {
 	funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
 	funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
-	variable: /var BS9=(\d+)/
+	variable: /var ([A-Za-z_$]\w*)=(\d+)(?=,\w+=20000,)/
 };
 /**
 * Parse semver version string to comparable parts
@@ -592,14 +589,14 @@ function compareVersions(a, b) {
 }
 function getPatternTypeForVersion(version$1) {
 	if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
-	if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
+	if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0) return "variable";
 	return null;
 }
 /**
 * Get supported version range string for error messages
 */
 function getSupportedRangeString() {
-	return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
+	return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}+`;
 }
 /**
 * Get Claude Code version from package.json
@@ -632,9 +629,9 @@ function findInVoltaTools(voltaHome) {
 	return paths;
 }
 /**
-* Find Claude Code CLI path by checking common locations
+* Find all Claude Code CLI paths by checking common locations
 */
-function findClaudeCodePath() {
+function findAllClaudeCodePaths() {
 	const possiblePaths = [];
 	const home = process.env.HOME || "";
 	const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
@@ -649,22 +646,41 @@ function findClaudeCodePath() {
 	for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
 	const bunGlobal = join(home, ".bun", "install", "global");
 	if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
-	return possiblePaths.find((p) => existsSync(p)) ?? null;
+	return [...new Set(possiblePaths.filter((p) => existsSync(p)))];
 }
 /**
-* Get current context limit from Claude Code
+* Get installation info for a CLI path
 */
-function getCurrentLimit(content) {
+function getInstallationInfo(cliPath) {
+	const version$1 = getClaudeCodeVersion(cliPath);
+	const content = readFileSync(cliPath, "utf8");
+	const limit = getCurrentLimit(content);
+	return {
+		path: cliPath,
+		version: version$1,
+		limit
+	};
+}
+function getCurrentLimitInfo(content) {
 	const varMatch = content.match(PATTERNS.variable);
-	if (varMatch) return Number.parseInt(varMatch[1], 10);
+	if (varMatch) return {
+		limit: Number.parseInt(varMatch[2], 10),
+		varName: varMatch[1]
+	};
 	const funcMatch = content.match(PATTERNS.funcPatched);
 	if (funcMatch) {
 		const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
-		return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
+		return limitMatch ? { limit: Number.parseInt(limitMatch[1], 10) } : null;
 	}
 	return null;
 }
 /**
+* Get current context limit from Claude Code (legacy wrapper)
+*/
+function getCurrentLimit(content) {
+	return getCurrentLimitInfo(content)?.limit ?? null;
+}
+/**
 * Check if Claude Code version is supported for patching
 */
 function checkVersionSupport(cliPath) {
@@ -696,22 +712,25 @@ function patchClaudeCode(cliPath, newLimit) {
 	const versionCheck = checkVersionSupport(cliPath);
 	if (!versionCheck.supported) {
 		consola.error(versionCheck.error);
-		return false;
+		return "failed";
 	}
 	consola.info(`Claude Code version: ${versionCheck.version}`);
-	if (getCurrentLimit(content) === newLimit) {
-		consola.info(`Already patched with limit ${newLimit}`);
-		return true;
-	}
+	const limitInfo = getCurrentLimitInfo(content);
+	if (limitInfo?.limit === newLimit) return "already_patched";
 	let newContent;
-	if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
-	else {
+	if (versionCheck.patternType === "variable") {
+		if (!limitInfo?.varName) {
+			consola.error("Could not detect variable name for patching");
+			return "failed";
+		}
+		newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=${newLimit}`);
+	} else {
 		const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
 		const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
 		newContent = content.replace(pattern, replacement);
 	}
 	writeFileSync(cliPath, newContent);
-	return true;
+	return "success";
 }
 /**
 * Restore Claude Code to original 200k limit
@@ -724,13 +743,19 @@ function restoreClaudeCode(cliPath) {
 		return false;
 	}
 	consola.info(`Claude Code version: ${versionCheck.version}`);
-	if (getCurrentLimit(content) === 2e5) {
+	const limitInfo = getCurrentLimitInfo(content);
+	if (limitInfo?.limit === 2e5) {
 		consola.info("Already at original 200000 limit");
 		return true;
 	}
 	let newContent;
-	if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
-	else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
+	if (versionCheck.patternType === "variable") {
+		if (!limitInfo?.varName) {
+			consola.error("Could not detect variable name for restoring");
+			return false;
+		}
+		newContent = content.replace(PATTERNS.variable, `var ${limitInfo.varName}=200000`);
+	} else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
 	writeFileSync(cliPath, newContent);
 	return true;
 }
@@ -739,7 +764,7 @@ function showStatus(cliPath, currentLimit) {
 	if (version$1) consola.info(`Claude Code version: ${version$1}`);
 	if (currentLimit === null) {
 		consola.warn("Could not detect current limit - CLI may have been updated");
-		consola.info("Look for the BS9 variable or HR function pattern in cli.js");
+		consola.info("Look for a variable like 'var XXX=200000' followed by ',YYY=20000,' in cli.js");
 	} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
 	else consola.info(`Status: Patched (${currentLimit} context window)`);
 }
@@ -773,17 +798,42 @@ const patchClaude = defineCommand({
 			description: "Show current patch status without modifying"
 		}
 	},
-	run({ args }) {
-		const cliPath = args.path || findClaudeCodePath();
-		if (!cliPath) {
-			consola.error("Could not find Claude Code installation");
-			consola.info("Searched in: volta, npm global, bun global");
-			consola.info("Use --path to specify the path to cli.js manually");
-			process.exit(1);
-		}
-		if (!existsSync(cliPath)) {
-			consola.error(`File not found: ${cliPath}`);
-			process.exit(1);
+	async run({ args }) {
+		let cliPath;
+		if (args.path) {
+			cliPath = args.path;
+			if (!existsSync(cliPath)) {
+				consola.error(`File not found: ${cliPath}`);
+				process.exit(1);
+			}
+		} else {
+			const installations = findAllClaudeCodePaths();
+			if (installations.length === 0) {
+				consola.error("Could not find Claude Code installation");
+				consola.info("Searched in: volta, npm global, bun global");
+				consola.info("Use --path to specify the path to cli.js manually");
+				process.exit(1);
+			}
+			if (installations.length === 1) cliPath = installations[0];
+			else {
+				consola.info(`Found ${installations.length} Claude Code installations:`);
+				const options = installations.map((path$1) => {
+					const info = getInstallationInfo(path$1);
+					let status = "unknown";
+					if (info.limit === 2e5) status = "original";
+					else if (info.limit) status = `patched: ${info.limit}`;
+					return {
+						label: `v${info.version ?? "?"} (${status}) - ${path$1}`,
+						value: path$1
+					};
+				});
+				const selected = await consola.prompt("Select installation to patch:", {
+					type: "select",
+					options
+				});
+				if (typeof selected === "symbol") process.exit(0);
+				cliPath = selected;
+			}
 		}
 		consola.info(`Claude Code path: ${cliPath}`);
 		const content = readFileSync(cliPath, "utf8");
@@ -806,13 +856,14 @@ const patchClaude = defineCommand({
 			consola.error("Invalid limit value. Must be a number >= 1000");
 			process.exit(1);
 		}
-		if (patchClaudeCode(cliPath, limit)) {
-			consola.success(`Patched context window: 200000 → ${limit}`);
+		const result = patchClaudeCode(cliPath, limit);
+		if (result === "success") {
+			consola.success(`Patched context window: ${currentLimit ?? 2e5} → ${limit}`);
 			consola.info("Note: You may need to re-run this after Claude Code updates");
-		} else {
+		} else if (result === "already_patched") consola.success(`Already patched with limit ${limit}`);
+		else {
 			consola.error("Failed to patch - pattern not found");
 			consola.info("Claude Code may have been updated to a new version");
-			consola.info("Check the cli.js for the HR function pattern");
 			process.exit(1);
 		}
 	}
@@ -821,7 +872,7 @@ const patchClaude = defineCommand({
 //#endregion
 //#region package.json
 var name = "@hsupu/copilot-api";
-var version = "0.7.8";
+var version = "0.7.10";
 var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
 var keywords = [
 	"proxy",
@@ -2176,188 +2227,157 @@ const getTokenCount = async (payload, model) => {
 //#endregion
 //#region src/lib/auto-compact.ts
 const DEFAULT_CONFIG = {
-	targetTokens: 12e4,
 	safetyMarginPercent: 2,
 	maxRequestBodyBytes: 500 * 1024
 };
+/** Dynamic byte limit that adjusts based on 413 errors */
+let dynamicByteLimit = null;
 /**
-* Dynamic byte limit that adjusts based on 413 errors.
-* Starts at 500KB and can be adjusted when 413 errors are encountered.
-*/
-let dynamicByteLimitOverride = null;
-/**
-* Called when a 413 error is encountered with a specific payload size.
-* Adjusts the dynamic byte limit to 90% of the failing size.
+* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
 */
 function onRequestTooLarge(failingBytes) {
 	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
-	dynamicByteLimitOverride = newLimit;
-	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
+	dynamicByteLimit = newLimit;
+	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
 }
-/**
-* Check if payload needs compaction based on model limits OR request body size.
-* Uses a safety margin to account for token counting differences.
-*/
-async function checkNeedsCompaction(payload, model, config = {}) {
-	const cfg = {
-		...DEFAULT_CONFIG,
-		...config
-	};
-	const currentTokens = (await getTokenCount(payload, model)).input;
-	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
-	const currentBytes = JSON.stringify(payload).length;
-	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
-	const exceedsTokens = currentTokens > tokenLimit;
-	const exceedsBytes = currentBytes > byteLimit;
-	let reason;
-	if (exceedsTokens && exceedsBytes) reason = "both";
-	else if (exceedsTokens) reason = "tokens";
-	else if (exceedsBytes) reason = "bytes";
+function calculateLimits(model, config) {
+	const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
+	const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
+	const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
 	return {
-		needed: exceedsTokens || exceedsBytes,
-		currentTokens,
 		tokenLimit,
-		currentBytes,
-		byteLimit,
-		reason
+		byteLimit
 	};
 }
-/**
-* Calculate approximate token count for a single message.
-* This is a fast estimation for splitting decisions.
-*/
-function estimateMessageTokens(message) {
-	let text = "";
-	if (typeof message.content === "string") text = message.content;
-	else if (Array.isArray(message.content)) {
-		for (const part of message.content) if (part.type === "text") text += part.text;
-		else if ("image_url" in part) text += part.image_url.url;
+/** Estimate tokens for a single message (fast approximation) */
+function estimateMessageTokens(msg) {
+	let charCount = 0;
+	if (typeof msg.content === "string") charCount = msg.content.length;
+	else if (Array.isArray(msg.content)) {
+		for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
+		else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
 	}
-	if (message.tool_calls) text += JSON.stringify(message.tool_calls);
-	return Math.ceil(text.length / 4) + 10;
+	if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
+	return Math.ceil(charCount / 4) + 10;
 }
-/**
-* Extract system messages from the beginning of the message list.
-*/
+/** Get byte size of a message */
+function getMessageBytes(msg) {
+	return JSON.stringify(msg).length;
+}
+/** Extract system/developer messages from the beginning */
 function extractSystemMessages(messages) {
-	const systemMessages = [];
-	let i = 0;
-	while (i < messages.length) {
-		const msg = messages[i];
-		if (msg.role === "system" || msg.role === "developer") {
-			systemMessages.push(msg);
-			i++;
-		} else break;
+	let splitIndex = 0;
+	while (splitIndex < messages.length) {
+		const role = messages[splitIndex].role;
+		if (role !== "system" && role !== "developer") break;
+		splitIndex++;
 	}
 	return {
-		systemMessages,
-		remainingMessages: messages.slice(i)
+		systemMessages: messages.slice(0, splitIndex),
+		conversationMessages: messages.slice(splitIndex)
 	};
 }
-/**
-* Extract tool_use ids from assistant messages with tool_calls.
-*/
-function getToolUseIds(message) {
-	if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
+/** Get tool_use IDs from an assistant message */
+function getToolCallIds(msg) {
+	if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
 	return [];
 }
-/**
-* Estimate the byte size of a message (for binary search).
-*/
-function estimateMessageBytes(message) {
-	return JSON.stringify(message).length;
+/** Filter orphaned tool_result messages */
+function filterOrphanedToolResults(messages) {
+	const toolUseIds = /* @__PURE__ */ new Set();
+	for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
+	let removedCount = 0;
+	const filtered = messages.filter((msg) => {
+		if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
+			removedCount++;
+			return false;
+		}
+		return true;
+	});
+	if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
+	return filtered;
+}
+/** Ensure messages start with a user message */
+function ensureStartsWithUser(messages) {
+	let startIndex = 0;
+	while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
+	if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
+	return messages.slice(startIndex);
 }
 /**
-* Find the optimal preserve index using binary search.
-* This finds the point where we keep as many messages as possible
-* while staying under both token and byte limits.
+* Find the optimal index from which to preserve messages.
+* Uses binary search with pre-calculated cumulative sums.
+* Returns the smallest index where the preserved portion fits within limits.
 */
-function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
+function findOptimalPreserveIndex(params) {
+	const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
 	if (messages.length === 0) return 0;
-	const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
-	const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
-	const markerOverhead = 200;
-	const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
-	const availableBytes = targetBytes - systemBytes - markerOverhead;
-	const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
-	const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
-	for (let i = messages.length - 1; i >= 0; i--) {
+	const markerBytes = 200;
+	const availableTokens = tokenLimit - systemTokens - 50;
+	const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
+	if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
+	const n = messages.length;
+	const cumTokens = Array.from({ length: n + 1 }, () => 0);
+	const cumBytes = Array.from({ length: n + 1 }, () => 0);
+	for (let i = n - 1; i >= 0; i--) {
 		const msg = messages[i];
-		cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
-		cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
+		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
+		cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
 	}
 	let left = 0;
-	let right = messages.length;
+	let right = n;
 	while (left < right) {
-		const mid = Math.floor((left + right) / 2);
-		const tokensFromMid = cumulativeTokens[mid];
-		const bytesFromMid = cumulativeBytes[mid];
-		if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
+		const mid = left + right >>> 1;
+		if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
 		else left = mid + 1;
 	}
 	return left;
 }
 /**
-* Filter out orphaned tool_result messages that don't have a matching tool_use
-* in the preserved message list. This prevents API errors when truncation
-* separates tool_use/tool_result pairs.
-*/
-function filterOrphanedToolResults(messages) {
-	const availableToolUseIds = /* @__PURE__ */ new Set();
-	for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
-	const filteredMessages = [];
-	let removedCount = 0;
-	for (const msg of messages) {
-		if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
-			removedCount++;
-			continue;
-		}
-		filteredMessages.push(msg);
-	}
-	if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
-	return filteredMessages;
-}
-/**
-* Ensure the message list starts with a user message.
-* If it starts with assistant or tool messages, skip them until we find a user message.
-* This is required because OpenAI API expects conversations to start with user messages
-* (after system messages).
+* Check if payload needs compaction based on model limits or byte size.
 */
-function ensureStartsWithUser(messages) {
-	let startIndex = 0;
-	while (startIndex < messages.length) {
-		if (messages[startIndex].role === "user") break;
-		startIndex++;
-	}
-	if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
-	return messages.slice(startIndex);
+async function checkNeedsCompaction(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_CONFIG,
+		...config
+	};
+	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
+	const currentTokens = (await getTokenCount(payload, model)).input;
+	const currentBytes = JSON.stringify(payload).length;
+	const exceedsTokens = currentTokens > tokenLimit;
+	const exceedsBytes = currentBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "both";
+	else if (exceedsTokens) reason = "tokens";
+	else if (exceedsBytes) reason = "bytes";
+	return {
+		needed: exceedsTokens || exceedsBytes,
+		currentTokens,
+		tokenLimit,
+		currentBytes,
+		byteLimit,
+		reason
+	};
 }
-/**
-* Create a truncation marker message.
-*/
+/** Create a truncation marker message */
 function createTruncationMarker(removedCount) {
 	return {
 		role: "user",
-		content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
+		content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
 	};
 }
 /**
-* Perform auto-compaction on a payload that exceeds token or size limits.
-* This uses simple truncation with binary search - no LLM calls required.
-* The algorithm finds the optimal truncation point to maximize preserved messages
-* while staying under both token and byte limits.
+* Perform auto-compaction on a payload that exceeds limits.
+* Uses binary search to find the optimal truncation point.
 */
 async function autoCompact(payload, model, config = {}) {
 	const cfg = {
 		...DEFAULT_CONFIG,
 		...config
 	};
-	const originalTokens = (await getTokenCount(payload, model)).input;
-	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
 	const originalBytes = JSON.stringify(payload).length;
-	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
+	const originalTokens = (await getTokenCount(payload, model)).input;
 	if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
 		payload,
 		wasCompacted: false,
@@ -2371,12 +2391,23 @@ async function autoCompact(payload, model, config = {}) {
 	if (exceedsTokens && exceedsBytes) reason = "tokens and size";
 	else if (exceedsBytes) reason = "size";
 	else reason = "tokens";
-	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
-	const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
-	consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
-	const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
+	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
+	const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
+	const messagesJson = JSON.stringify(payload.messages);
+	const payloadOverhead = originalBytes - messagesJson.length;
+	const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
+	const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
+	consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
+	const preserveIndex = findOptimalPreserveIndex({
+		messages: conversationMessages,
+		systemBytes,
+		systemTokens,
+		payloadOverhead,
+		tokenLimit,
+		byteLimit
+	});
 	if (preserveIndex === 0) {
-		consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
+		consola.warn("Auto-compact: Cannot truncate, system messages too large");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2385,8 +2416,8 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
-	if (preserveIndex >= remainingMessages.length) {
-		consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
+	if (preserveIndex >= conversationMessages.length) {
+		consola.warn("Auto-compact: Would need to remove all messages");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2395,13 +2426,12 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
-	const removedMessages = remainingMessages.slice(0, preserveIndex);
-	let preservedMessages = remainingMessages.slice(preserveIndex);
-	preservedMessages = filterOrphanedToolResults(preservedMessages);
-	preservedMessages = ensureStartsWithUser(preservedMessages);
-	preservedMessages = filterOrphanedToolResults(preservedMessages);
-	if (preservedMessages.length === 0) {
-		consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
+	let preserved = conversationMessages.slice(preserveIndex);
+	preserved = filterOrphanedToolResults(preserved);
+	preserved = ensureStartsWithUser(preserved);
+	preserved = filterOrphanedToolResults(preserved);
+	if (preserved.length === 0) {
+		consola.warn("Auto-compact: All messages filtered out after cleanup");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2410,29 +2440,30 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
-	consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
-	const truncationMarker = createTruncationMarker(removedMessages.length);
+	const removedCount = conversationMessages.length - preserved.length;
+	const marker = createTruncationMarker(removedCount);
 	const newPayload = {
 		...payload,
 		messages: [
 			...systemMessages,
-			truncationMarker,
-			...preservedMessages
+			marker,
+			...preserved
 		]
 	};
-	const newTokenCount = await getTokenCount(newPayload, model);
 	const newBytes = JSON.stringify(newPayload).length;
-	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
+	const newTokenCount = await getTokenCount(newPayload, model);
+	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
+	if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
 	return {
 		payload: newPayload,
 		wasCompacted: true,
 		originalTokens,
 		compactedTokens: newTokenCount.input,
-		removedMessageCount: removedMessages.length
+		removedMessageCount: removedCount
 	};
 }
 /**
-* Create a marker to append to responses indicating auto-compaction occurred.
+* Create a marker to prepend to responses indicating auto-compaction occurred.
 */
 function createCompactionMarker(result) {
 	if (!result.wasCompacted) return "";
@@ -4129,16 +4160,33 @@ function translateAnthropicMessagesToOpenAI(anthropicMessages, system, toolNameM
 	const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, toolNameMapping));
 	return [...systemMessages, ...otherMessages];
 }
+const RESERVED_KEYWORDS = ["x-anthropic-billing-header"];
+/**
+* Filter out reserved keywords from system prompt text.
+* Copilot API rejects requests containing these keywords.
+* Removes the entire line containing the keyword to keep the prompt clean.
+*/
+function filterReservedKeywords(text) {
+	let filtered = text;
+	for (const keyword of RESERVED_KEYWORDS) if (text.includes(keyword)) {
+		consola.debug(`[Reserved Keyword] Removing line containing "${keyword}"`);
+		filtered = filtered.split("\n").filter((line) => !line.includes(keyword)).join("\n");
+	}
+	return filtered;
+}
 function handleSystemPrompt(system) {
 	if (!system) return [];
 	if (typeof system === "string") return [{
 		role: "system",
-		content: system
-	}];
-	else return [{
-		role: "system",
-		content: system.map((block) => block.text).join("\n\n")
+		content: filterReservedKeywords(system)
 	}];
+	else {
+		const systemText = system.map((block) => block.text).join("\n\n");
+		return [{
+			role: "system",
+			content: filterReservedKeywords(systemText)
+		}];
+	}
 }
 function handleUserMessage(message) {
 	const newMessages = [];
@@ -4983,7 +5031,7 @@ async function runServer(options) {
 		consecutiveSuccessesForRecovery: options.consecutiveSuccesses
 	});
 	else consola.info("Rate limiting disabled");
-	if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
+	if (!options.autoCompact) consola.info("Auto-compact disabled");
 	initHistory(options.history, options.historyLimit);
 	if (options.history) {
 		const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5125,10 +5173,10 @@ const start = defineCommand({
 			default: "1000",
 			description: "Maximum number of history entries to keep in memory (0 = unlimited)"
 		},
-		"auto-compact": {
+		"no-auto-compact": {
 			type: "boolean",
 			default: false,
-			description: "Automatically compress conversation history when exceeding model token limits"
+			description: "Disable automatic conversation history compression when exceeding limits"
 		}
 	},
 	run({ args }) {
@@ -5149,7 +5197,7 @@ const start = defineCommand({
 			proxyEnv: args["proxy-env"],
 			history: !args["no-history"],
 			historyLimit: Number.parseInt(args["history-limit"], 10),
-			autoCompact: args["auto-compact"]
+			autoCompact: !args["no-auto-compact"]
 		});
 	}
 });