npm - @hsupu/copilot-api - Versions diffs - 0.7.6 → 0.7.8 - Mend

@hsupu/copilot-api 0.7.6 → 0.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/main.js CHANGED Viewed

@@ -246,8 +246,8 @@ async function getVSCodeVersion() {
 			}
 		});
 		if (!response.ok) return FALLBACK;
-		const version = (await response.json()).tag_name;
-		if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
+		const version$1 = (await response.json()).tag_name;
+		if (version$1 && /^\d+\.\d+\.\d+$/.test(version$1)) return version$1;
 		return FALLBACK;
 	} catch {
 		return FALLBACK;
@@ -434,13 +434,13 @@ const checkUsage = defineCommand({
 			const premiumUsed = premiumTotal - premium.remaining;
 			const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
 			const premiumPercentRemaining = premium.percent_remaining;
-			function summarizeQuota(name, snap) {
-				if (!snap) return `${name}: N/A`;
+			function summarizeQuota(name$1, snap) {
+				if (!snap) return `${name$1}: N/A`;
 				const total = snap.entitlement;
 				const used = total - snap.remaining;
 				const percentUsed = total > 0 ? used / total * 100 : 0;
 				const percentRemaining = snap.percent_remaining;
-				return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
+				return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
 			}
 			const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
 			const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
@@ -481,9 +481,9 @@ async function checkTokenExists() {
 	}
 }
 async function getDebugInfo() {
-	const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
+	const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
 	return {
-		version,
+		version: version$1,
 		runtime: getRuntimeInfo(),
 		paths: {
 			APP_DIR: PATHS.APP_DIR,
@@ -571,8 +571,8 @@ const PATTERNS = {
 /**
 * Parse semver version string to comparable parts
 */
-function parseVersion(version) {
-	return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
+function parseVersion(version$1) {
+	return version$1.split(".").map((n) => Number.parseInt(n, 10) || 0);
 }
 /**
 * Compare two semver versions
@@ -590,9 +590,9 @@ function compareVersions(a, b) {
 	}
 	return 0;
 }
-function getPatternTypeForVersion(version) {
-	if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
-	if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
+function getPatternTypeForVersion(version$1) {
+	if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
+	if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
 	return null;
 }
 /**
@@ -624,8 +624,8 @@ function findInVoltaTools(voltaHome) {
 	if (existsSync(packagesPath)) paths.push(packagesPath);
 	const toolsDir = join(voltaHome, "tools", "image", "node");
 	if (existsSync(toolsDir)) try {
-		for (const version of readdirSync(toolsDir)) {
-			const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
+		for (const version$1 of readdirSync(toolsDir)) {
+			const claudePath = join(toolsDir, version$1, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
 			if (existsSync(claudePath)) paths.push(claudePath);
 		}
 	} catch {}
@@ -668,23 +668,23 @@ function getCurrentLimit(content) {
 * Check if Claude Code version is supported for patching
 */
 function checkVersionSupport(cliPath) {
-	const version = getClaudeCodeVersion(cliPath);
-	if (!version) return {
+	const version$1 = getClaudeCodeVersion(cliPath);
+	if (!version$1) return {
 		supported: false,
 		version: null,
 		patternType: null,
 		error: "Could not detect Claude Code version"
 	};
-	const patternType = getPatternTypeForVersion(version);
+	const patternType = getPatternTypeForVersion(version$1);
 	if (!patternType) return {
 		supported: false,
-		version,
+		version: version$1,
 		patternType: null,
-		error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
+		error: `Version ${version$1} is not supported. Supported: ${getSupportedRangeString()}`
 	};
 	return {
 		supported: true,
-		version,
+		version: version$1,
 		patternType
 	};
 }
@@ -735,8 +735,8 @@ function restoreClaudeCode(cliPath) {
 	return true;
 }
 function showStatus(cliPath, currentLimit) {
-	const version = getClaudeCodeVersion(cliPath);
-	if (version) consola.info(`Claude Code version: ${version}`);
+	const version$1 = getClaudeCodeVersion(cliPath);
+	if (version$1) consola.info(`Claude Code version: ${version$1}`);
 	if (currentLimit === null) {
 		consola.warn("Could not detect current limit - CLI may have been updated");
 		consola.info("Look for the BS9 variable or HR function pattern in cli.js");
@@ -818,6 +818,86 @@ const patchClaude = defineCommand({
 	}
 });
+//#endregion
+//#region package.json
+var name = "@hsupu/copilot-api";
+var version = "0.7.8";
+var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
+var keywords = [
+	"proxy",
+	"github-copilot",
+	"openai-compatible",
+	"anthropic-compatible"
+];
+var homepage = "https://github.com/puxu-msft/copilot-api-js";
+var bugs = "https://github.com/puxu-msft/copilot-api-js/issues";
+var repository = {
+	"type": "git",
+	"url": "git+https://github.com/puxu-msft/copilot-api-js.git"
+};
+var author = "hsupu";
+var type = "module";
+var bin = { "copilot-api": "dist/main.js" };
+var files = ["dist"];
+var scripts = {
+	"build": "npx tsdown",
+	"dev": "bun run --watch ./src/main.ts",
+	"knip": "knip-bun",
+	"lint": "eslint --cache",
+	"lint:all": "eslint --cache .",
+	"prepack": "npm run build",
+	"prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
+	"release": "bumpp && npm publish --access public",
+	"start": "NODE_ENV=production bun run ./src/main.ts",
+	"typecheck": "tsc"
+};
+var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
+var lint_staged = { "*": "bun run lint --fix" };
+var dependencies = {
+	"citty": "^0.1.6",
+	"clipboardy": "^5.0.0",
+	"consola": "^3.4.2",
+	"fetch-event-stream": "^0.1.5",
+	"gpt-tokenizer": "^3.0.1",
+	"hono": "^4.9.9",
+	"picocolors": "^1.1.1",
+	"proxy-from-env": "^1.1.0",
+	"srvx": "^0.8.9",
+	"tiny-invariant": "^1.3.3",
+	"undici": "^7.16.0"
+};
+var devDependencies = {
+	"@echristian/eslint-config": "^0.0.54",
+	"@types/bun": "^1.2.23",
+	"@types/proxy-from-env": "^1.0.4",
+	"bumpp": "^10.2.3",
+	"eslint": "^9.37.0",
+	"knip": "^5.64.1",
+	"lint-staged": "^16.2.3",
+	"prettier-plugin-packagejson": "^2.5.19",
+	"simple-git-hooks": "^2.13.1",
+	"tsdown": "^0.15.6",
+	"typescript": "^5.9.3"
+};
+var package_default = {
+	name,
+	version,
+	description,
+	keywords,
+	homepage,
+	bugs,
+	repository,
+	author,
+	type,
+	bin,
+	files,
+	scripts,
+	"simple-git-hooks": simple_git_hooks,
+	"lint-staged": lint_staged,
+	dependencies,
+	devDependencies
+};
 //#endregion
 //#region src/lib/adaptive-rate-limiter.ts
 const DEFAULT_CONFIG$1 = {
@@ -1566,8 +1646,8 @@ var ConsoleRenderer = class {
 	/**
 	* Get log prefix based on log type
 	*/
-	getLogPrefix(type) {
-		switch (type) {
+	getLogPrefix(type$1) {
+		switch (type$1) {
 			case "error":
 			case "fatal": return pc.red("✖");
 			case "warn": return pc.yellow("⚠");
@@ -2097,20 +2177,50 @@ const getTokenCount = async (payload, model) => {
 //#region src/lib/auto-compact.ts
 const DEFAULT_CONFIG = {
 	targetTokens: 12e4,
-	safetyMarginPercent: 2
+	safetyMarginPercent: 2,
+	maxRequestBodyBytes: 500 * 1024
 };
 /**
-* Check if payload needs compaction based on model limits.
+* Dynamic byte limit that adjusts based on 413 errors.
+* Starts at 500KB and can be adjusted when 413 errors are encountered.
+*/
+let dynamicByteLimitOverride = null;
+/**
+* Called when a 413 error is encountered with a specific payload size.
+* Adjusts the dynamic byte limit to 90% of the failing size.
+*/
+function onRequestTooLarge(failingBytes) {
+	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
+	dynamicByteLimitOverride = newLimit;
+	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
+}
+/**
+* Check if payload needs compaction based on model limits OR request body size.
 * Uses a safety margin to account for token counting differences.
 */
-async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
+async function checkNeedsCompaction(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_CONFIG,
+		...config
+	};
 	const currentTokens = (await getTokenCount(payload, model)).input;
 	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
+	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	const currentBytes = JSON.stringify(payload).length;
+	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
+	const exceedsTokens = currentTokens > tokenLimit;
+	const exceedsBytes = currentBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "both";
+	else if (exceedsTokens) reason = "tokens";
+	else if (exceedsBytes) reason = "bytes";
 	return {
-		needed: currentTokens > limit,
+		needed: exceedsTokens || exceedsBytes,
 		currentTokens,
-		limit
+		tokenLimit,
+		currentBytes,
+		byteLimit,
+		reason
 	};
 }
 /**
@@ -2153,18 +2263,40 @@ function getToolUseIds(message) {
 	return [];
 }
 /**
-* Find messages to keep from the end to stay under target tokens.
-* Returns the starting index of messages to preserve.
+* Estimate the byte size of a message (for binary search).
 */
-function findPreserveIndex(messages, targetTokens, systemTokens) {
-	const availableTokens = targetTokens - systemTokens - 500;
-	let accumulatedTokens = 0;
+function estimateMessageBytes(message) {
+	return JSON.stringify(message).length;
+}
+/**
+* Find the optimal preserve index using binary search.
+* This finds the point where we keep as many messages as possible
+* while staying under both token and byte limits.
+*/
+function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
+	if (messages.length === 0) return 0;
+	const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
+	const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
+	const markerOverhead = 200;
+	const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
+	const availableBytes = targetBytes - systemBytes - markerOverhead;
+	const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
+	const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
 	for (let i = messages.length - 1; i >= 0; i--) {
-		const msgTokens = estimateMessageTokens(messages[i]);
-		if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
-		accumulatedTokens += msgTokens;
+		const msg = messages[i];
+		cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
+		cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
 	}
-	return 0;
+	let left = 0;
+	let right = messages.length;
+	while (left < right) {
+		const mid = Math.floor((left + right) / 2);
+		const tokensFromMid = cumulativeTokens[mid];
+		const bytesFromMid = cumulativeBytes[mid];
+		if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
+		else left = mid + 1;
+	}
+	return left;
 }
 /**
 * Filter out orphaned tool_result messages that don't have a matching tool_use
@@ -2202,12 +2334,6 @@ function ensureStartsWithUser(messages) {
 	return messages.slice(startIndex);
 }
 /**
-* Calculate estimated tokens for system messages.
-*/
-function estimateSystemTokens(systemMessages) {
-	return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
-}
-/**
 * Create a truncation marker message.
 */
 function createTruncationMarker(removedCount) {
@@ -2217,9 +2343,10 @@ function createTruncationMarker(removedCount) {
 	};
 }
 /**
-* Perform auto-compaction on a payload that exceeds token limits.
-* This uses simple truncation - no LLM calls required.
-* Uses iterative approach with decreasing target tokens until under limit.
+* Perform auto-compaction on a payload that exceeds token or size limits.
+* This uses simple truncation with binary search - no LLM calls required.
+* The algorithm finds the optimal truncation point to maximize preserved messages
+* while staying under both token and byte limits.
 */
 async function autoCompact(payload, model, config = {}) {
 	const cfg = {
@@ -2228,63 +2355,38 @@ async function autoCompact(payload, model, config = {}) {
 	};
 	const originalTokens = (await getTokenCount(payload, model)).input;
 	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
-	if (originalTokens <= limit) return {
+	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	const originalBytes = JSON.stringify(payload).length;
+	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
+	if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
 		payload,
 		wasCompacted: false,
 		originalTokens,
 		compactedTokens: originalTokens,
 		removedMessageCount: 0
 	};
-	consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
+	const exceedsTokens = originalTokens > tokenLimit;
+	const exceedsBytes = originalBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "tokens and size";
+	else if (exceedsBytes) reason = "size";
+	else reason = "tokens";
+	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
 	const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
-	const systemTokens = estimateSystemTokens(systemMessages);
-	consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
-	const MAX_ITERATIONS = 5;
-	const MIN_TARGET = 2e4;
-	let currentTarget = Math.min(cfg.targetTokens, limit);
-	let lastResult = null;
-	for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
-		const result = await tryCompactWithTarget({
+	consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
+	const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
+	if (preserveIndex === 0) {
+		consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
+		return {
 			payload,
-			model,
-			systemMessages,
-			remainingMessages,
-			systemTokens,
-			targetTokens: currentTarget,
-			limit,
-			originalTokens
-		});
-		if (!result.wasCompacted) return result;
-		lastResult = result;
-		if (result.compactedTokens <= limit) {
-			consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
-			return result;
-		}
-		consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
-		currentTarget = Math.floor(currentTarget * .7);
-		if (currentTarget < MIN_TARGET) {
-			consola.error("Auto-compact: Cannot reduce further, target too low");
-			return result;
-		}
+			wasCompacted: false,
+			originalTokens,
+			compactedTokens: originalTokens,
+			removedMessageCount: 0
+		};
 	}
-	consola.error(`Auto-compact: Exhausted ${MAX_ITERATIONS} iterations, returning best effort`);
-	return lastResult ?? {
-		payload,
-		wasCompacted: false,
-		originalTokens,
-		compactedTokens: originalTokens,
-		removedMessageCount: 0
-	};
-}
-/**
-* Helper to attempt compaction with a specific target token count.
-*/
-async function tryCompactWithTarget(opts) {
-	const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
-	const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
-	if (preserveIndex === 0) {
-		consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
+	if (preserveIndex >= remainingMessages.length) {
+		consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2319,6 +2421,8 @@ async function tryCompactWithTarget(opts) {
 		]
 	};
 	const newTokenCount = await getTokenCount(newPayload, model);
+	const newBytes = JSON.stringify(newPayload).length;
+	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
 	return {
 		payload: newPayload,
 		wasCompacted: true,
@@ -2433,12 +2537,16 @@ async function buildFinalPayload(payload, model) {
 	}
 	try {
 		const check = await checkNeedsCompaction(payload, model);
-		consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
+		consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
 		if (!check.needed) return {
 			finalPayload: payload,
 			compactResult: null
 		};
-		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
+		let reasonText;
+		if (check.reason === "both") reasonText = "tokens and size";
+		else if (check.reason === "bytes") reasonText = "size";
+		else reasonText = "tokens";
+		consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
 		const compactResult = await autoCompact(payload, model);
 		return {
 			finalPayload: compactResult.payload,
@@ -2454,11 +2562,13 @@ async function buildFinalPayload(payload, model) {
 }
 /**
 * Log helpful debugging information when a 413 error occurs.
+* Also adjusts the dynamic byte limit for future requests.
 */
 async function logPayloadSizeInfo(payload, model) {
 	const messageCount = payload.messages.length;
 	const bodySize = JSON.stringify(payload).length;
 	const bodySizeKB = Math.round(bodySize / 1024);
+	onRequestTooLarge(bodySize);
 	let imageCount = 0;
 	let largeMessages = 0;
 	let totalImageSize = 0;
@@ -2584,7 +2694,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
 				...choice$1,
 				message: {
 					...choice$1.message,
-					content: (choice$1.message.content ?? "") + marker
+					content: marker + (choice$1.message.content ?? "")
 				}
 			} : choice$1)
 		};
@@ -2645,18 +2755,13 @@ async function handleStreamingResponse$1(opts) {
 	const { stream, response, payload, ctx } = opts;
 	const acc = createStreamAccumulator();
 	try {
-		for await (const chunk of response) {
-			consola.debug("Streaming chunk:", JSON.stringify(chunk));
-			parseStreamChunk(chunk, acc);
-			await stream.writeSSE(chunk);
-		}
 		if (ctx.compactResult?.wasCompacted) {
 			const marker = createCompactionMarker(ctx.compactResult);
 			const markerChunk = {
 				id: `compact-marker-${Date.now()}`,
 				object: "chat.completion.chunk",
 				created: Math.floor(Date.now() / 1e3),
-				model: acc.model || payload.model,
+				model: payload.model,
 				choices: [{
 					index: 0,
 					delta: { content: marker },
@@ -2670,6 +2775,11 @@ async function handleStreamingResponse$1(opts) {
 			});
 			acc.content += marker;
 		}
+		for await (const chunk of response) {
+			consola.debug("Streaming chunk:", JSON.stringify(chunk));
+			parseStreamChunk(chunk, acc);
+			await stream.writeSSE(chunk);
+		}
 		recordStreamSuccess(acc, payload.model, ctx);
 		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
 	} catch (error) {
@@ -4460,7 +4570,7 @@ function handleNonStreamingResponse(opts) {
 	consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
 	if (ctx.compactResult?.wasCompacted) {
 		const marker = createCompactionMarker(ctx.compactResult);
-		anthropicResponse = appendMarkerToAnthropicResponse(anthropicResponse, marker);
+		anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
 	}
 	recordResponse(ctx.historyId, {
 		success: true,
@@ -4492,16 +4602,16 @@ function handleNonStreamingResponse(opts) {
 	});
 	return c.json(anthropicResponse);
 }
-function appendMarkerToAnthropicResponse(response, marker) {
+function prependMarkerToAnthropicResponse(response, marker) {
 	const content = [...response.content];
-	const lastTextIndex = content.findLastIndex((block) => block.type === "text");
-	if (lastTextIndex !== -1) {
-		const textBlock = content[lastTextIndex];
-		if (textBlock.type === "text") content[lastTextIndex] = {
+	const firstTextIndex = content.findIndex((block) => block.type === "text");
+	if (firstTextIndex !== -1) {
+		const textBlock = content[firstTextIndex];
+		if (textBlock.type === "text") content[firstTextIndex] = {
 			...textBlock,
-			text: textBlock.text + marker
+			text: marker + textBlock.text
 		};
-	} else content.push({
+	} else content.unshift({
 		type: "text",
 		text: marker
 	});
@@ -4531,6 +4641,11 @@ async function handleStreamingResponse(opts) {
 	};
 	const acc = createAnthropicStreamAccumulator();
 	try {
+		if (ctx.compactResult?.wasCompacted) {
+			const marker = createCompactionMarker(ctx.compactResult);
+			await sendCompactionMarkerEvent(stream, streamState, marker);
+			acc.content += marker;
+		}
 		await processStreamChunks({
 			stream,
 			response,
@@ -4538,11 +4653,6 @@ async function handleStreamingResponse(opts) {
 			streamState,
 			acc
 		});
-		if (ctx.compactResult?.wasCompacted) {
-			const marker = createCompactionMarker(ctx.compactResult);
-			await sendCompactionMarkerEvent(stream, streamState, marker);
-			acc.content += marker;
-		}
 		recordStreamingResponse(acc, anthropicPayload.model, ctx);
 		completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
 	} catch (error) {
@@ -4855,6 +4965,7 @@ function formatModelInfo(model) {
 	return `  - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
 }
 async function runServer(options) {
+	consola.info(`copilot-api v${package_default.version}`);
 	if (options.proxyEnv) initProxyFromEnv();
 	if (options.verbose) {
 		consola.level = 5;