npm - @hsupu/copilot-api - Versions diffs - 0.7.8 → 0.7.9 - Mend

@hsupu/copilot-api 0.7.8 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/main.js CHANGED Viewed

@@ -46,7 +46,7 @@ const state = {
 	accountType: "individual",
 	manualApprove: false,
 	showToken: false,
-	autoCompact: false
+	autoCompact: true
 };
 //#endregion
@@ -821,7 +821,7 @@ const patchClaude = defineCommand({
 //#endregion
 //#region package.json
 var name = "@hsupu/copilot-api";
-var version = "0.7.8";
+var version = "0.7.9";
 var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
 var keywords = [
 	"proxy",
@@ -2176,188 +2176,157 @@ const getTokenCount = async (payload, model) => {
 //#endregion
 //#region src/lib/auto-compact.ts
 const DEFAULT_CONFIG = {
-	targetTokens: 12e4,
 	safetyMarginPercent: 2,
 	maxRequestBodyBytes: 500 * 1024
 };
+/** Dynamic byte limit that adjusts based on 413 errors */
+let dynamicByteLimit = null;
 /**
-* Dynamic byte limit that adjusts based on 413 errors.
-* Starts at 500KB and can be adjusted when 413 errors are encountered.
-*/
-let dynamicByteLimitOverride = null;
-/**
-* Called when a 413 error is encountered with a specific payload size.
-* Adjusts the dynamic byte limit to 90% of the failing size.
+* Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
 */
 function onRequestTooLarge(failingBytes) {
 	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
-	dynamicByteLimitOverride = newLimit;
-	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
+	dynamicByteLimit = newLimit;
+	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed → ${Math.round(newLimit / 1024)}KB`);
 }
-/**
-* Check if payload needs compaction based on model limits OR request body size.
-* Uses a safety margin to account for token counting differences.
-*/
-async function checkNeedsCompaction(payload, model, config = {}) {
-	const cfg = {
-		...DEFAULT_CONFIG,
-		...config
-	};
-	const currentTokens = (await getTokenCount(payload, model)).input;
-	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
-	const currentBytes = JSON.stringify(payload).length;
-	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
-	const exceedsTokens = currentTokens > tokenLimit;
-	const exceedsBytes = currentBytes > byteLimit;
-	let reason;
-	if (exceedsTokens && exceedsBytes) reason = "both";
-	else if (exceedsTokens) reason = "tokens";
-	else if (exceedsBytes) reason = "bytes";
+function calculateLimits(model, config) {
+	const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
+	const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
+	const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
 	return {
-		needed: exceedsTokens || exceedsBytes,
-		currentTokens,
 		tokenLimit,
-		currentBytes,
-		byteLimit,
-		reason
+		byteLimit
 	};
 }
-/**
-* Calculate approximate token count for a single message.
-* This is a fast estimation for splitting decisions.
-*/
-function estimateMessageTokens(message) {
-	let text = "";
-	if (typeof message.content === "string") text = message.content;
-	else if (Array.isArray(message.content)) {
-		for (const part of message.content) if (part.type === "text") text += part.text;
-		else if ("image_url" in part) text += part.image_url.url;
+/** Estimate tokens for a single message (fast approximation) */
+function estimateMessageTokens(msg) {
+	let charCount = 0;
+	if (typeof msg.content === "string") charCount = msg.content.length;
+	else if (Array.isArray(msg.content)) {
+		for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
+		else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
 	}
-	if (message.tool_calls) text += JSON.stringify(message.tool_calls);
-	return Math.ceil(text.length / 4) + 10;
+	if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
+	return Math.ceil(charCount / 4) + 10;
 }
-/**
-* Extract system messages from the beginning of the message list.
-*/
+/** Get byte size of a message */
+function getMessageBytes(msg) {
+	return JSON.stringify(msg).length;
+}
+/** Extract system/developer messages from the beginning */
 function extractSystemMessages(messages) {
-	const systemMessages = [];
-	let i = 0;
-	while (i < messages.length) {
-		const msg = messages[i];
-		if (msg.role === "system" || msg.role === "developer") {
-			systemMessages.push(msg);
-			i++;
-		} else break;
+	let splitIndex = 0;
+	while (splitIndex < messages.length) {
+		const role = messages[splitIndex].role;
+		if (role !== "system" && role !== "developer") break;
+		splitIndex++;
 	}
 	return {
-		systemMessages,
-		remainingMessages: messages.slice(i)
+		systemMessages: messages.slice(0, splitIndex),
+		conversationMessages: messages.slice(splitIndex)
 	};
 }
-/**
-* Extract tool_use ids from assistant messages with tool_calls.
-*/
-function getToolUseIds(message) {
-	if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
+/** Get tool_use IDs from an assistant message */
+function getToolCallIds(msg) {
+	if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
 	return [];
 }
-/**
-* Estimate the byte size of a message (for binary search).
-*/
-function estimateMessageBytes(message) {
-	return JSON.stringify(message).length;
+/** Filter orphaned tool_result messages */
+function filterOrphanedToolResults(messages) {
+	const toolUseIds = /* @__PURE__ */ new Set();
+	for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
+	let removedCount = 0;
+	const filtered = messages.filter((msg) => {
+		if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
+			removedCount++;
+			return false;
+		}
+		return true;
+	});
+	if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
+	return filtered;
+}
+/** Ensure messages start with a user message */
+function ensureStartsWithUser(messages) {
+	let startIndex = 0;
+	while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
+	if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
+	return messages.slice(startIndex);
 }
 /**
-* Find the optimal preserve index using binary search.
-* This finds the point where we keep as many messages as possible
-* while staying under both token and byte limits.
+* Find the optimal index from which to preserve messages.
+* Uses binary search with pre-calculated cumulative sums.
+* Returns the smallest index where the preserved portion fits within limits.
 */
-function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
+function findOptimalPreserveIndex(params) {
+	const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
 	if (messages.length === 0) return 0;
-	const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
-	const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
-	const markerOverhead = 200;
-	const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
-	const availableBytes = targetBytes - systemBytes - markerOverhead;
-	const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
-	const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
-	for (let i = messages.length - 1; i >= 0; i--) {
+	const markerBytes = 200;
+	const availableTokens = tokenLimit - systemTokens - 50;
+	const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
+	if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
+	const n = messages.length;
+	const cumTokens = Array.from({ length: n + 1 }, () => 0);
+	const cumBytes = Array.from({ length: n + 1 }, () => 0);
+	for (let i = n - 1; i >= 0; i--) {
 		const msg = messages[i];
-		cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
-		cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
+		cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
+		cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
 	}
 	let left = 0;
-	let right = messages.length;
+	let right = n;
 	while (left < right) {
-		const mid = Math.floor((left + right) / 2);
-		const tokensFromMid = cumulativeTokens[mid];
-		const bytesFromMid = cumulativeBytes[mid];
-		if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
+		const mid = left + right >>> 1;
+		if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
 		else left = mid + 1;
 	}
 	return left;
 }
 /**
-* Filter out orphaned tool_result messages that don't have a matching tool_use
-* in the preserved message list. This prevents API errors when truncation
-* separates tool_use/tool_result pairs.
+* Check if payload needs compaction based on model limits or byte size.
 */
-function filterOrphanedToolResults(messages) {
-	const availableToolUseIds = /* @__PURE__ */ new Set();
-	for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
-	const filteredMessages = [];
-	let removedCount = 0;
-	for (const msg of messages) {
-		if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
-			removedCount++;
-			continue;
-		}
-		filteredMessages.push(msg);
-	}
-	if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
-	return filteredMessages;
-}
-/**
-* Ensure the message list starts with a user message.
-* If it starts with assistant or tool messages, skip them until we find a user message.
-* This is required because OpenAI API expects conversations to start with user messages
-* (after system messages).
-*/
-function ensureStartsWithUser(messages) {
-	let startIndex = 0;
-	while (startIndex < messages.length) {
-		if (messages[startIndex].role === "user") break;
-		startIndex++;
-	}
-	if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
-	return messages.slice(startIndex);
+async function checkNeedsCompaction(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_CONFIG,
+		...config
+	};
+	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
+	const currentTokens = (await getTokenCount(payload, model)).input;
+	const currentBytes = JSON.stringify(payload).length;
+	const exceedsTokens = currentTokens > tokenLimit;
+	const exceedsBytes = currentBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "both";
+	else if (exceedsTokens) reason = "tokens";
+	else if (exceedsBytes) reason = "bytes";
+	return {
+		needed: exceedsTokens || exceedsBytes,
+		currentTokens,
+		tokenLimit,
+		currentBytes,
+		byteLimit,
+		reason
+	};
 }
-/**
-* Create a truncation marker message.
-*/
+/** Create a truncation marker message */
 function createTruncationMarker(removedCount) {
 	return {
 		role: "user",
-		content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
+		content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
 	};
 }
 /**
-* Perform auto-compaction on a payload that exceeds token or size limits.
-* This uses simple truncation with binary search - no LLM calls required.
-* The algorithm finds the optimal truncation point to maximize preserved messages
-* while staying under both token and byte limits.
+* Perform auto-compaction on a payload that exceeds limits.
+* Uses binary search to find the optimal truncation point.
 */
 async function autoCompact(payload, model, config = {}) {
 	const cfg = {
 		...DEFAULT_CONFIG,
 		...config
 	};
-	const originalTokens = (await getTokenCount(payload, model)).input;
-	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
 	const originalBytes = JSON.stringify(payload).length;
-	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
+	const originalTokens = (await getTokenCount(payload, model)).input;
 	if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
 		payload,
 		wasCompacted: false,
@@ -2371,12 +2340,23 @@ async function autoCompact(payload, model, config = {}) {
 	if (exceedsTokens && exceedsBytes) reason = "tokens and size";
 	else if (exceedsBytes) reason = "size";
 	else reason = "tokens";
-	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
-	const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
-	consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
-	const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
+	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
+	const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
+	const messagesJson = JSON.stringify(payload.messages);
+	const payloadOverhead = originalBytes - messagesJson.length;
+	const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
+	const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
+	consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
+	const preserveIndex = findOptimalPreserveIndex({
+		messages: conversationMessages,
+		systemBytes,
+		systemTokens,
+		payloadOverhead,
+		tokenLimit,
+		byteLimit
+	});
 	if (preserveIndex === 0) {
-		consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
+		consola.warn("Auto-compact: Cannot truncate, system messages too large");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2385,8 +2365,8 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
-	if (preserveIndex >= remainingMessages.length) {
-		consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
+	if (preserveIndex >= conversationMessages.length) {
+		consola.warn("Auto-compact: Would need to remove all messages");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2395,13 +2375,12 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
-	const removedMessages = remainingMessages.slice(0, preserveIndex);
-	let preservedMessages = remainingMessages.slice(preserveIndex);
-	preservedMessages = filterOrphanedToolResults(preservedMessages);
-	preservedMessages = ensureStartsWithUser(preservedMessages);
-	preservedMessages = filterOrphanedToolResults(preservedMessages);
-	if (preservedMessages.length === 0) {
-		consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
+	let preserved = conversationMessages.slice(preserveIndex);
+	preserved = filterOrphanedToolResults(preserved);
+	preserved = ensureStartsWithUser(preserved);
+	preserved = filterOrphanedToolResults(preserved);
+	if (preserved.length === 0) {
+		consola.warn("Auto-compact: All messages filtered out after cleanup");
 		return {
 			payload,
 			wasCompacted: false,
@@ -2410,29 +2389,30 @@ async function autoCompact(payload, model, config = {}) {
 			removedMessageCount: 0
 		};
 	}
-	consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
-	const truncationMarker = createTruncationMarker(removedMessages.length);
+	const removedCount = conversationMessages.length - preserved.length;
+	const marker = createTruncationMarker(removedCount);
 	const newPayload = {
 		...payload,
 		messages: [
 			...systemMessages,
-			truncationMarker,
-			...preservedMessages
+			marker,
+			...preserved
 		]
 	};
-	const newTokenCount = await getTokenCount(newPayload, model);
 	const newBytes = JSON.stringify(newPayload).length;
-	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
+	const newTokenCount = await getTokenCount(newPayload, model);
+	consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
+	if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
 	return {
 		payload: newPayload,
 		wasCompacted: true,
 		originalTokens,
 		compactedTokens: newTokenCount.input,
-		removedMessageCount: removedMessages.length
+		removedMessageCount: removedCount
 	};
 }
 /**
-* Create a marker to append to responses indicating auto-compaction occurred.
+* Create a marker to prepend to responses indicating auto-compaction occurred.
 */
 function createCompactionMarker(result) {
 	if (!result.wasCompacted) return "";
@@ -4983,7 +4963,7 @@ async function runServer(options) {
 		consecutiveSuccessesForRecovery: options.consecutiveSuccesses
 	});
 	else consola.info("Rate limiting disabled");
-	if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
+	if (!options.autoCompact) consola.info("Auto-compact disabled");
 	initHistory(options.history, options.historyLimit);
 	if (options.history) {
 		const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5125,10 +5105,10 @@ const start = defineCommand({
 			default: "1000",
 			description: "Maximum number of history entries to keep in memory (0 = unlimited)"
 		},
-		"auto-compact": {
+		"no-auto-compact": {
 			type: "boolean",
 			default: false,
-			description: "Automatically compress conversation history when exceeding model token limits"
+			description: "Disable automatic conversation history compression when exceeding limits"
 		}
 	},
 	run({ args }) {
@@ -5149,7 +5129,7 @@ const start = defineCommand({
 			proxyEnv: args["proxy-env"],
 			history: !args["no-history"],
 			historyLimit: Number.parseInt(args["history-limit"], 10),
-			autoCompact: args["auto-compact"]
+			autoCompact: !args["no-auto-compact"]
 		});
 	}
 });