npm - opencodekit - Versions diffs - 0.18.16 → 0.18.17 - Mend

opencodekit 0.18.16 → 0.18.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.js +1 -1
package/dist/template/.opencode/memory.db +0 -0
package/dist/template/.opencode/memory.db-shm +0 -0
package/dist/template/.opencode/memory.db-wal +0 -0
package/dist/template/.opencode/plugin/copilot-auth.ts +304 -8
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -20,7 +20,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
 //#endregion
 //#region package.json
-var version = "0.18.16";
+var version = "0.18.17";
 //#endregion
 //#region src/utils/license.ts

package/dist/template/.opencode/memory.db CHANGED Viewed

Binary file

package/dist/template/.opencode/memory.db-shm CHANGED Viewed

Binary file

package/dist/template/.opencode/memory.db-wal CHANGED Viewed

Binary file

package/dist/template/.opencode/plugin/copilot-auth.ts CHANGED Viewed

@@ -89,11 +89,31 @@ const RATE_LIMIT_CONFIG = {
 	maxFallbacks: 4, // Max model fallback switches per request
 };
+// Local request shaping to smooth bursts before they hit Copilot limits
+const REQUEST_SHAPING_CONFIG = {
+	tokensPerSecond: 1,
+	burstCapacity: 2,
+	maxQueueDelayMs: 15000,
+};
+const CIRCUIT_BREAKER_CONFIG = {
+	maxInlineWaitMs: 30000,
+	maxRecoveryCycles: 3,
+};
 // Per-model rate limit state (in-memory, resets on restart)
 interface RateLimitEntry {
 	rateLimitedUntil: number; // Unix timestamp (ms)
 }
 const rateLimitState = new Map<string, RateLimitEntry>();
+const familyCircuitBreakerState = new Map<string, number>();
+interface TokenBucketState {
+	tokens: number;
+	lastRefillAt: number;
+}
+const modelTokenBuckets = new Map<string, TokenBucketState>();
+const modelQueueTail = new Map<string, Promise<void>>();
 // Model fallback chains: same-family alternatives when a model is rate-limited
 const MODEL_FALLBACK_CHAINS: Record<string, string[]> = {
@@ -146,6 +166,144 @@ function isModelRateLimited(model: string): boolean {
 	return true;
 }
+function getRateLimitRemainingMs(model: string): number | null {
+	const entry = rateLimitState.get(model);
+	if (!entry) return null;
+	const remaining = entry.rateLimitedUntil - Date.now();
+	if (remaining <= 0) {
+		rateLimitState.delete(model);
+		return null;
+	}
+	return remaining;
+}
+function getModelFamily(model: string): string[] {
+	const family = new Set<string>([
+		model,
+		...(MODEL_FALLBACK_CHAINS[model] || []),
+	]);
+	return [...family];
+}
+function getFamilyCircuitKey(model: string): string {
+	return getModelFamily(model).sort().join("|");
+}
+function getFamilyCircuitRemainingMs(model: string): number {
+	const key = getFamilyCircuitKey(model);
+	const until = familyCircuitBreakerState.get(key);
+	if (!until) return 0;
+	const remaining = until - Date.now();
+	if (remaining <= 0) {
+		familyCircuitBreakerState.delete(key);
+		return 0;
+	}
+	return remaining;
+}
+function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
+	const key = getFamilyCircuitKey(model);
+	familyCircuitBreakerState.set(
+		key,
+		Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
+	);
+}
+function getFamilyMaxCooldownRemainingMs(model: string): number {
+	let maxRemaining = 0;
+	for (const candidate of getModelFamily(model)) {
+		const remaining = getRateLimitRemainingMs(candidate) ?? 0;
+		if (remaining > maxRemaining) maxRemaining = remaining;
+	}
+	return maxRemaining;
+}
+function isEntireModelFamilyCoolingDown(model: string): boolean {
+	const family = getModelFamily(model);
+	return (
+		family.length > 0 &&
+		family.every((candidate) => isModelRateLimited(candidate))
+	);
+}
+function formatRetryAfter(seconds: number): string {
+	if (seconds < 60) return `${seconds}s`;
+	const mins = Math.floor(seconds / 60);
+	const secs = seconds % 60;
+	return secs > 0 ? `${mins}m ${secs}s` : `${mins}m`;
+}
+async function shapeRequestForModel(model: string): Promise<void> {
+	if (!model) return;
+	const previousTail = modelQueueTail.get(model) ?? Promise.resolve();
+	let releaseQueue: (() => void) | undefined;
+	const currentGate = new Promise<void>((resolve) => {
+		releaseQueue = resolve;
+	});
+	const currentTail = previousTail.then(() => currentGate);
+	modelQueueTail.set(model, currentTail);
+	let queueTimeout: ReturnType<typeof setTimeout> | undefined;
+	try {
+		await Promise.race([
+			previousTail,
+			new Promise<void>((_, reject) => {
+				queueTimeout = setTimeout(() => {
+					reject(
+						new Error(
+							`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(REQUEST_SHAPING_CONFIG.maxQueueDelayMs / 1000))}.`,
+						),
+					);
+				}, REQUEST_SHAPING_CONFIG.maxQueueDelayMs);
+			}),
+		]);
+		const now = Date.now();
+		const bucket = modelTokenBuckets.get(model) ?? {
+			tokens: REQUEST_SHAPING_CONFIG.burstCapacity,
+			lastRefillAt: now,
+		};
+		const elapsedMs = Math.max(0, now - bucket.lastRefillAt);
+		const refillTokens =
+			(elapsedMs / 1000) * REQUEST_SHAPING_CONFIG.tokensPerSecond;
+		bucket.tokens = Math.min(
+			REQUEST_SHAPING_CONFIG.burstCapacity,
+			bucket.tokens + refillTokens,
+		);
+		bucket.lastRefillAt = now;
+		if (bucket.tokens < 1) {
+			const deficit = 1 - bucket.tokens;
+			const waitMs = Math.ceil(
+				(deficit / REQUEST_SHAPING_CONFIG.tokensPerSecond) * 1000,
+			);
+			if (waitMs > REQUEST_SHAPING_CONFIG.maxQueueDelayMs) {
+				throw new Error(
+					`[Copilot] Local request queue saturated for ${model}. Retry in ${formatRetryAfter(Math.ceil(waitMs / 1000))}.`,
+				);
+			}
+			log("info", `Local request shaping wait for ${model}`, {
+				wait_ms: waitMs,
+			});
+			await sleep(waitMs);
+			bucket.tokens = 0;
+			bucket.lastRefillAt = Date.now();
+		} else {
+			bucket.tokens -= 1;
+		}
+		modelTokenBuckets.set(model, bucket);
+	} finally {
+		if (queueTimeout) clearTimeout(queueTimeout);
+		releaseQueue?.();
+		if (modelQueueTail.get(model) === currentTail) {
+			modelQueueTail.delete(model);
+		}
+	}
+}
 function markModelRateLimited(model: string, cooldownMs: number): void {
 	rateLimitState.set(model, {
 		rateLimitedUntil: Date.now() + cooldownMs,
@@ -160,11 +318,16 @@ function markModelRateLimited(model: string, cooldownMs: number): void {
  * Find the next available fallback model in the same family.
  * Skips models that are themselves rate-limited.
  */
-function getNextFallbackModel(model: string): string | null {
+function getNextFallbackModel(
+	model: string,
+	attemptedModels: Set<string>,
+): string | null {
 	const chain = MODEL_FALLBACK_CHAINS[model];
 	if (!chain) return null;
 	for (const fallback of chain) {
-		if (!isModelRateLimited(fallback)) return fallback;
+		if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
+			return fallback;
+		}
 	}
 	return null;
 }
@@ -530,8 +693,33 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 						// Pre-flight: if current model is already known rate-limited, switch to fallback
 						let activeFinalInit: RequestInit = finalInit;
+						const attemptedModels = new Set<string>();
+						if (currentModel) attemptedModels.add(currentModel);
+						const requestedModel = currentModel;
+						if (currentModel) {
+							const circuitRemainingMs =
+								getFamilyCircuitRemainingMs(currentModel);
+							if (circuitRemainingMs > 0) {
+								if (
+									circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
+								) {
+									log(
+										"info",
+										`Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
+									);
+									await sleep(circuitRemainingMs);
+								} else {
+									throw new Error(
+										`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
+									);
+								}
+							}
+						}
 						if (currentModel && isModelRateLimited(currentModel)) {
-							const fallback = getNextFallbackModel(currentModel);
+							const fallback = getNextFallbackModel(
+								currentModel,
+								attemptedModels,
+							);
 							if (fallback) {
 								log(
 									"info",
@@ -540,6 +728,26 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 								activeFinalInit =
 									swapModelInBody(finalInit, fallback) || finalInit;
 								currentModel = fallback;
+								attemptedModels.add(fallback);
+							} else {
+								const familyCooldownMs =
+									getFamilyMaxCooldownRemainingMs(currentModel);
+								openFamilyCircuitBreaker(currentModel, familyCooldownMs);
+								if (
+									familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
+								) {
+									log(
+										"info",
+										`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
+									);
+									await sleep(familyCooldownMs);
+									attemptedModels.clear();
+									if (currentModel) attemptedModels.add(currentModel);
+								} else {
+									throw new Error(
+										`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
+									);
+								}
 							}
 						}
@@ -547,12 +755,20 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 						let lastError: Error | undefined;
 						let fallbacksUsed = 0;
 						let attempt = 0;
+						let recoveryCyclesUsed = 0;
 						while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
 							try {
+								if (currentModel) {
+									await shapeRequestForModel(currentModel);
+								}
 								const response = await fetch(input, activeFinalInit);
 								if (response.status === 429) {
+									try {
+										await response.body?.cancel();
+									} catch {}
 									// Parse Retry-After header for server-suggested cooldown
 									const retryAfterMs = parseRetryAfter(response);
 									const cooldownMs =
@@ -568,7 +784,10 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 										currentModel &&
 										fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
 									) {
-										const fallback = getNextFallbackModel(currentModel);
+										const fallback = getNextFallbackModel(
+											currentModel,
+											attemptedModels,
+										);
 										if (fallback) {
 											log(
 												"warn",
@@ -583,6 +802,7 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 												swapModelInBody(activeFinalInit, fallback) ||
 												activeFinalInit;
 											currentModel = fallback;
+											attemptedModels.add(fallback);
 											fallbacksUsed++;
 											continue; // Retry immediately with new model, no delay
 										}
@@ -590,10 +810,41 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 									// No fallback available — use exponential backoff on same model
 									if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
-										const delay =
-											retryAfterMs != null
-												? Math.min(retryAfterMs, RATE_LIMIT_CONFIG.maxDelayMs)
-												: calculateRetryDelay(attempt);
+										if (
+											currentModel &&
+											isEntireModelFamilyCoolingDown(currentModel)
+										) {
+											const familyCooldownMs =
+												getFamilyMaxCooldownRemainingMs(currentModel);
+											openFamilyCircuitBreaker(currentModel, familyCooldownMs);
+											if (
+												familyCooldownMs <=
+												CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
+											) {
+												log(
+													"info",
+													`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
+												);
+												await sleep(familyCooldownMs);
+												attemptedModels.clear();
+												if (currentModel) attemptedModels.add(currentModel);
+												attempt++;
+												continue;
+											}
+											throw new Error(
+												`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
+											);
+										}
+										const modelCooldownMs = currentModel
+											? getRateLimitRemainingMs(currentModel)
+											: null;
+										const delay = Math.min(
+											modelCooldownMs ??
+												retryAfterMs ??
+												calculateRetryDelay(attempt),
+											RATE_LIMIT_CONFIG.maxDelayMs,
+										);
 										log(
 											"warn",
 											`Rate limited (429), no fallback available, waiting ${delay}ms`,
@@ -605,11 +856,47 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 											},
 										);
 										await sleep(delay);
+										attemptedModels.clear();
+										if (currentModel) attemptedModels.add(currentModel);
 										attempt++;
 										continue;
 									}
 									// Exhausted retries and fallbacks
+									if (currentModel) {
+										const familyCooldownMs =
+											getFamilyMaxCooldownRemainingMs(currentModel);
+										const recoveryDelayMs =
+											familyCooldownMs > 0
+												? Math.min(
+														familyCooldownMs,
+														CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
+													)
+												: calculateRetryDelay(0);
+										if (
+											recoveryDelayMs > 0 &&
+											recoveryCyclesUsed <
+												CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
+										) {
+											recoveryCyclesUsed++;
+											log(
+												"info",
+												`Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
+											);
+											await sleep(recoveryDelayMs);
+											attempt = 0;
+											fallbacksUsed = 0;
+											if (requestedModel) {
+												currentModel = requestedModel;
+												activeFinalInit =
+													swapModelInBody(finalInit, requestedModel) ||
+													finalInit;
+											}
+											attemptedModels.clear();
+											if (currentModel) attemptedModels.add(currentModel);
+											continue;
+										}
+									}
 									throw new Error(
 										`[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
 									);
@@ -621,6 +908,15 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 							} catch (error) {
 								lastError = error as Error;
+								if (
+									lastError.message.includes(
+										"All fallback models cooling down",
+									) ||
+									lastError.message.includes("Local request queue saturated")
+								) {
+									throw lastError;
+								}
 								// Network errors might be transient, retry
 								if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
 									const delay = calculateRetryDelay(attempt);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "opencodekit",
-	"version": "0.18.16",
+	"version": "0.18.17",
 	"description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
 	"keywords": [
 		"agents",