npm - @hsupu/copilot-api - Versions diffs - 0.7.6 → 0.7.7 - Mend

@hsupu/copilot-api 0.7.6 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/main.js CHANGED Viewed

@@ -2097,20 +2097,50 @@ const getTokenCount = async (payload, model) => {
 //#region src/lib/auto-compact.ts
 const DEFAULT_CONFIG = {
 	targetTokens: 12e4,
-	safetyMarginPercent: 2
+	safetyMarginPercent: 2,
+	maxRequestBodyBytes: 500 * 1024
 };
 /**
-* Check if payload needs compaction based on model limits.
+* Dynamic byte limit that adjusts based on 413 errors.
+* Starts at 500KB and can be adjusted when 413 errors are encountered.
+*/
+let dynamicByteLimitOverride = null;
+/**
+* Called when a 413 error is encountered with a specific payload size.
+* Adjusts the dynamic byte limit to 90% of the failing size.
+*/
+function onRequestTooLarge(failingBytes) {
+	const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
+	dynamicByteLimitOverride = newLimit;
+	consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
+}
+/**
+* Check if payload needs compaction based on model limits OR request body size.
 * Uses a safety margin to account for token counting differences.
 */
-async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
+async function checkNeedsCompaction(payload, model, config = {}) {
+	const cfg = {
+		...DEFAULT_CONFIG,
+		...config
+	};
 	const currentTokens = (await getTokenCount(payload, model)).input;
 	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
+	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	const currentBytes = JSON.stringify(payload).length;
+	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
+	const exceedsTokens = currentTokens > tokenLimit;
+	const exceedsBytes = currentBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "both";
+	else if (exceedsTokens) reason = "tokens";
+	else if (exceedsBytes) reason = "bytes";
 	return {
-		needed: currentTokens > limit,
+		needed: exceedsTokens || exceedsBytes,
 		currentTokens,
-		limit
+		tokenLimit,
+		currentBytes,
+		byteLimit,
+		reason
 	};
 }
 /**
@@ -2217,7 +2247,7 @@ function createTruncationMarker(removedCount) {
 	};
 }
 /**
-* Perform auto-compaction on a payload that exceeds token limits.
+* Perform auto-compaction on a payload that exceeds token or size limits.
 * This uses simple truncation - no LLM calls required.
 * Uses iterative approach with decreasing target tokens until under limit.
 */
@@ -2228,21 +2258,29 @@ async function autoCompact(payload, model, config = {}) {
 	};
 	const originalTokens = (await getTokenCount(payload, model)).input;
 	const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
-	const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
-	if (originalTokens <= limit) return {
+	const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
+	const originalBytes = JSON.stringify(payload).length;
+	const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
+	if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
 		payload,
 		wasCompacted: false,
 		originalTokens,
 		compactedTokens: originalTokens,
 		removedMessageCount: 0
 	};
-	consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
+	const exceedsTokens = originalTokens > tokenLimit;
+	const exceedsBytes = originalBytes > byteLimit;
+	let reason;
+	if (exceedsTokens && exceedsBytes) reason = "tokens and size";
+	else if (exceedsBytes) reason = "size";
+	else reason = "tokens";
+	consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
 	const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
 	const systemTokens = estimateSystemTokens(systemMessages);
 	consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
 	const MAX_ITERATIONS = 5;
 	const MIN_TARGET = 2e4;
-	let currentTarget = Math.min(cfg.targetTokens, limit);
+	let currentTarget = Math.min(cfg.targetTokens, tokenLimit);
 	let lastResult = null;
 	for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
 		const result = await tryCompactWithTarget({
@@ -2252,16 +2290,21 @@ async function autoCompact(payload, model, config = {}) {
 			remainingMessages,
 			systemTokens,
 			targetTokens: currentTarget,
-			limit,
+			limit: tokenLimit,
 			originalTokens
 		});
 		if (!result.wasCompacted) return result;
 		lastResult = result;
-		if (result.compactedTokens <= limit) {
-			consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
+		const resultBytes = JSON.stringify(result.payload).length;
+		const underTokenLimit = result.compactedTokens <= tokenLimit;
+		const underByteLimit = resultBytes <= byteLimit;
+		if (underTokenLimit && underByteLimit) {
+			consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
 			return result;
 		}
-		consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
+		const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
+		const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
+		consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
 		currentTarget = Math.floor(currentTarget * .7);
 		if (currentTarget < MIN_TARGET) {
 			consola.error("Auto-compact: Cannot reduce further, target too low");
@@ -2433,12 +2476,16 @@ async function buildFinalPayload(payload, model) {
 	}
 	try {
 		const check = await checkNeedsCompaction(payload, model);
-		consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
+		consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
 		if (!check.needed) return {
 			finalPayload: payload,
 			compactResult: null
 		};
-		consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
+		let reasonText;
+		if (check.reason === "both") reasonText = "tokens and size";
+		else if (check.reason === "bytes") reasonText = "size";
+		else reasonText = "tokens";
+		consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
 		const compactResult = await autoCompact(payload, model);
 		return {
 			finalPayload: compactResult.payload,
@@ -2454,11 +2501,13 @@ async function buildFinalPayload(payload, model) {
 }
 /**
 * Log helpful debugging information when a 413 error occurs.
+* Also adjusts the dynamic byte limit for future requests.
 */
 async function logPayloadSizeInfo(payload, model) {
 	const messageCount = payload.messages.length;
 	const bodySize = JSON.stringify(payload).length;
 	const bodySizeKB = Math.round(bodySize / 1024);
+	onRequestTooLarge(bodySize);
 	let imageCount = 0;
 	let largeMessages = 0;
 	let totalImageSize = 0;