npm - opencodekit - Versions diffs - 0.18.25 → 0.18.26 - Mend

opencodekit 0.18.25 → 0.18.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.js +1 -1
package/dist/template/.opencode/dcp.jsonc +81 -70
package/dist/template/.opencode/memory.db +0 -0
package/dist/template/.opencode/memory.db-shm +0 -0
package/dist/template/.opencode/memory.db-wal +0 -0
package/dist/template/.opencode/package.json +1 -1
package/dist/template/.opencode/plugin/copilot-auth.ts +111 -451
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -20,7 +20,7 @@ var __require = /* @__PURE__ */ createRequire(import.meta.url);
 //#endregion
 //#region package.json
-var version = "0.18.25";
+var version = "0.18.26";
 //#endregion
 //#region src/utils/license.ts

package/dist/template/.opencode/dcp.jsonc CHANGED Viewed

@@ -1,72 +1,83 @@
 {
-	"$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json",
-	"enabled": true,
-	"debug": false,
-	// "minimal" shows prune activity without noise; "detailed" shows token counts
-	"pruneNotification": "detailed",
-	// "chat" (in-conversation) or "toast" (system notification)
-	"pruneNotificationType": "toast",
-	// Commands: /dcp context, /dcp stats, /dcp sweep, /dcp compress, /dcp decompress, /dcp recompress
-	"commands": {
-		"enabled": true,
-		// Protect these from /dcp sweep
-		"protectedTools": ["observation", "memory-update", "memory-search"]
-	},
-	// Manual mode: disables autonomous context management
-	"manualMode": {
-		"enabled": false,
-		"automaticStrategies": true
-	},
-	"turnProtection": {
-		"enabled": true,
-		"turns": 4
-	},
-	// Protected file patterns - never auto-prune reads of these files
-	"protectedFilePatterns": [
-		"**/.env*",
-		"**/AGENTS.md",
-		"**/.opencode/**",
-		"**/.beads/**",
-		"**/package.json",
-		"**/tsconfig.json",
-		"**/biome.json"
-	],
-	"compress": {
-		// v3.0.0: single compress tool replaces the old 3-tool system
-		"permission": "allow",
-		"showCompression": false,
-		"maxContextLimit": "80%",
-		"minContextLimit": 50000,
-		"nudgeFrequency": 5,
-		"iterationNudgeThreshold": 15,
-		"nudgeForce": "soft",
-		"flatSchema": false,
-		"protectUserMessages": true,
-		// v3.0.0 auto-protects: task, skill, todowrite, todoread, compress, batch, plan_enter, plan_exit
-		// Only list additional tools here
-		"protectedTools": ["write", "edit", "memory-*", "observation", "tilth_*"]
-	},
-	// Experimental features
-	"experimental": {
-		"allowSubAgents": true,
-		"customPrompts": false
-	},
-	// Auto strategies
-	"strategies": {
-		// Dedup = zero LLM cost, high impact - always enable
-		"deduplication": {
-			"enabled": true,
-			"protectedTools": []
-		},
-		// Supersede writes = zero cost, removes redundant write inputs after read
-		"supersedeWrites": {
-			"enabled": true
-		},
-		// Purge error inputs after N turns
-		"purgeErrors": {
-			"enabled": true,
-			"turns": 4,
-			"protectedTools": []
-		}
-	}
+  "$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json",
+  "enabled": true,
+  "debug": false,
+  // "off" | "minimal" | "detailed" — keep minimal for low-noise dev flow
+  "pruneNotification": "minimal",
+  // "chat" (in-conversation) or "toast" (system notification)
+  "pruneNotificationType": "toast",
+  // Slash commands: /dcp context, /dcp stats, /dcp sweep, /dcp compress, /dcp decompress, /dcp recompress
+  "commands": {
+    "enabled": true,
+    // Additional tools to protect from /dcp sweep (supports glob wildcards)
+    "protectedTools": ["observation", "memory-*"],
+  },
+  // Manual mode: disables autonomous context management
+  "manualMode": {
+    "enabled": false,
+    "automaticStrategies": true,
+  },
+  // Protect recent tool outputs from pruning
+  "turnProtection": {
+    "enabled": false,
+    "turns": 4,
+  },
+  // Glob patterns for files that should never be auto-pruned
+  // Keep tight: broad patterns reduce DCP effectiveness
+  "protectedFilePatterns": [
+    "**/.env*",
+    "**/AGENTS.md",
+    "**/.opencode/**",
+    "**/.beads/**",
+    "**/package.json",
+    "**/tsconfig.json",
+  ],
+  // Unified context compression tool (v3.1.0)
+  "compress": {
+    // "range" (stable) compresses spans into block summaries
+    // "message" (experimental) compresses individual raw messages
+    "mode": "message",
+    // "allow" (no prompt) | "ask" (prompt) | "deny" (tool not registered)
+    "permission": "allow",
+    "showCompression": false,
+    // v3.1.0: active summary tokens extend effective maxContextLimit
+    "summaryBuffer": true,
+    // Soft upper threshold: above this, strong compression nudges fire
+    // Accepts number or "X%" of model context window
+    "maxContextLimit": "80%",
+    // Soft lower threshold: below this, turn/iteration reminders are off
+    "minContextLimit": "35%",
+    // How often context-limit nudge fires above maxContextLimit (1 = every fetch)
+    "nudgeFrequency": 5,
+    // Messages since last user message before adding compression reminders
+    "iterationNudgeThreshold": 15,
+    // "strong" = more likely to compress, "soft" = less likely
+    "nudgeForce": "soft",
+    // Keep user messages compressible to avoid permanent context growth
+    "protectUserMessages": false,
+    // Auto-protected by DCP: task, skill, todowrite, todoread, compress, batch, plan_enter, plan_exit, write, edit
+    // Only list ADDITIONAL tools whose outputs should be appended to compression summaries
+    "protectedTools": ["observation", "memory-*", "tilth_*"],
+  },
+  // Experimental features
+  "experimental": {
+    // Allow DCP processing in subagent sessions (default: false)
+    "allowSubAgents": false,
+    // Enable user-editable prompt overrides under dcp-prompts directories
+    "customPrompts": false,
+  },
+  // Automatic pruning strategies (zero LLM cost)
+  "strategies": {
+    // Removes duplicate tool calls (same tool + same arguments), keeps most recent
+    "deduplication": {
+      "enabled": true,
+      "protectedTools": [],
+    },
+    // Prunes inputs from errored tool calls after N turns (error messages preserved)
+    "purgeErrors": {
+      "enabled": true,
+      "turns": 4,
+      "protectedTools": [],
+    },
+  },
 }

package/dist/template/.opencode/memory.db CHANGED Viewed

Binary file

package/dist/template/.opencode/memory.db-shm CHANGED Viewed

Binary file

package/dist/template/.opencode/memory.db-wal CHANGED Viewed

Binary file

package/dist/template/.opencode/package.json CHANGED Viewed

@@ -11,7 +11,7 @@
     "type-check": "tsc --noEmit"
   },
   "dependencies": {
-    "@opencode-ai/plugin": "1.3.0"
+    "@opencode-ai/plugin": "1.3.2"
   },
   "devDependencies": {
     "@types/node": "^25.3.0",

package/dist/template/.opencode/plugin/copilot-auth.ts CHANGED Viewed

@@ -96,11 +96,8 @@ const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
 // Rate limit handling configuration
 const RATE_LIMIT_CONFIG = {
-	maxRetries: 3,
-	baseDelayMs: 2000, // Start with 2 seconds
 	maxDelayMs: 60000, // Cap at 60 seconds
 	defaultCooldownMs: 60000, // Default cooldown when Retry-After header is missing
-	maxFallbacks: 4, // Max model fallback switches per request
 };
 // Local request shaping to smooth bursts before they hit Copilot limits
@@ -110,11 +107,6 @@ const REQUEST_SHAPING_CONFIG = {
 	maxQueueDelayMs: 15000,
 };
-const CIRCUIT_BREAKER_CONFIG = {
-	maxInlineWaitMs: 30000,
-	maxRecoveryCycles: 3,
-};
 // Per-model rate limit state (in-memory, resets on restart)
 interface RateLimitEntry {
 	rateLimitedUntil: number; // Unix timestamp (ms)
@@ -170,14 +162,14 @@ function parseRetryAfter(response: Response): number | null {
 	return null;
 }
-function isModelRateLimited(model: string): boolean {
-	const entry = rateLimitState.get(model);
-	if (!entry) return false;
-	if (Date.now() >= entry.rateLimitedUntil) {
-		rateLimitState.delete(model);
-		return false;
-	}
-	return true;
+function clampCooldownMs(
+	value: number | null | undefined,
+	fallbackMs = 0,
+): number {
+	return Math.min(
+		Math.max(value ?? fallbackMs, 0),
+		RATE_LIMIT_CONFIG.maxDelayMs,
+	);
 }
 function getRateLimitRemainingMs(model: string): number | null {
@@ -217,10 +209,7 @@ function getFamilyCircuitRemainingMs(model: string): number {
 function openFamilyCircuitBreaker(model: string, cooldownMs: number): void {
 	const key = getFamilyCircuitKey(model);
-	familyCircuitBreakerState.set(
-		key,
-		Date.now() + Math.min(cooldownMs, RATE_LIMIT_CONFIG.maxDelayMs),
-	);
+	familyCircuitBreakerState.set(key, Date.now() + clampCooldownMs(cooldownMs));
 }
 function getFamilyMaxCooldownRemainingMs(model: string): number {
@@ -232,14 +221,6 @@ function getFamilyMaxCooldownRemainingMs(model: string): number {
 	return maxRemaining;
 }
-function isEntireModelFamilyCoolingDown(model: string): boolean {
-	const family = getModelFamily(model);
-	return (
-		family.length > 0 &&
-		family.every((candidate) => isModelRateLimited(candidate))
-	);
-}
 function formatRetryAfter(seconds: number): string {
 	if (seconds < 60) return `${seconds}s`;
 	const mins = Math.floor(seconds / 60);
@@ -319,50 +300,16 @@ async function shapeRequestForModel(model: string): Promise<void> {
 }
 function markModelRateLimited(model: string, cooldownMs: number): void {
+	const boundedCooldownMs = clampCooldownMs(cooldownMs);
 	rateLimitState.set(model, {
-		rateLimitedUntil: Date.now() + cooldownMs,
+		rateLimitedUntil: Date.now() + boundedCooldownMs,
 	});
 	log(
 		"info",
-		`Marked ${model} as rate-limited for ${Math.round(cooldownMs / 1000)}s`,
+		`Marked ${model} as rate-limited for ${Math.round(boundedCooldownMs / 1000)}s`,
 	);
 }
-/**
- * Find the next available fallback model in the same family.
- * Skips models that are themselves rate-limited.
- */
-function getNextFallbackModel(
-	model: string,
-	attemptedModels: Set<string>,
-): string | null {
-	const chain = MODEL_FALLBACK_CHAINS[model];
-	if (!chain) return null;
-	for (const fallback of chain) {
-		if (!attemptedModels.has(fallback) && !isModelRateLimited(fallback)) {
-			return fallback;
-		}
-	}
-	return null;
-}
-/**
- * Swap the model field in a fetch RequestInit body.
- */
-function swapModelInBody(
-	init: RequestInit | undefined,
-	newModel: string,
-): RequestInit | undefined {
-	if (!init?.body || typeof init.body !== "string") return init;
-	try {
-		const body = JSON.parse(init.body);
-		body.model = newModel;
-		return { ...init, body: JSON.stringify(body) };
-	} catch {
-		return init;
-	}
-}
 // Maximum length for item IDs in the OpenAI Responses API
 const MAX_RESPONSE_API_ID_LENGTH = 64;
 // OpenAI Responses API only allows: letters, numbers, underscores, dashes
@@ -402,13 +349,18 @@ function sanitizeResponseId(id: string, forcedPrefix?: string): string {
 	const cleanCore = rawCore.replace(INVALID_ID_CHARS, "_").replace(/_+$/g, "");
 	// Check if any sanitization is actually needed
-	const needsSanitization = forcedPrefix || hasInvalidIdChars(rawCore) ||
+	const needsSanitization =
+		forcedPrefix ||
+		hasInvalidIdChars(rawCore) ||
 		id.length > MAX_RESPONSE_API_ID_LENGTH;
 	if (!needsSanitization) return id;
 	// If result fits within length and core is non-empty, use cleaned core directly
-	if (cleanCore.length > 0 && (prefix.length + cleanCore.length) <= MAX_RESPONSE_API_ID_LENGTH) {
+	if (
+		cleanCore.length > 0 &&
+		prefix.length + cleanCore.length <= MAX_RESPONSE_API_ID_LENGTH
+	) {
 		return `${prefix}${cleanCore}`;
 	}
@@ -422,7 +374,10 @@ function sanitizeResponseId(id: string, forcedPrefix?: string): string {
 		MAX_RESPONSE_API_ID_LENGTH - prefix.length - hashStr.length - 1;
 	const middle = cleanCore.slice(0, Math.max(0, maxMiddleLen));
 	// Format: prefix + middle + "_" + hash (ensure total <= 64)
-	const result = `${prefix}${middle}_${hashStr}`.slice(0, MAX_RESPONSE_API_ID_LENGTH);
+	const result = `${prefix}${middle}_${hashStr}`.slice(
+		0,
+		MAX_RESPONSE_API_ID_LENGTH,
+	);
 	// Strip trailing underscores from truncation
 	return result.replace(/_+$/, "");
 }
@@ -480,7 +435,11 @@ function sanitizeResponseInputIds(input: any[]): any[] {
 		// Check for wrong prefix (e.g., function_call with "h_" instead of "fc_")
 		const expectedPrefix = getExpectedPrefix(item);
-		if (expectedPrefix && typeof item.id === "string" && !idRemap.has(item.id)) {
+		if (
+			expectedPrefix &&
+			typeof item.id === "string" &&
+			!idRemap.has(item.id)
+		) {
 			const newId = sanitizeResponseId(item.id, expectedPrefix);
 			if (newId !== item.id) {
 				idRemap.set(item.id, newId);
@@ -530,26 +489,16 @@ function sanitizeResponseInputIds(input: any[]): any[] {
 		if (typeof sanitized.id === "string" && idRemap.has(sanitized.id)) {
 			sanitized.id = idRemap.get(sanitized.id);
 		}
-		if (typeof sanitized.call_id === "string" && idRemap.has(sanitized.call_id)) {
+		if (
+			typeof sanitized.call_id === "string" &&
+			idRemap.has(sanitized.call_id)
+		) {
 			sanitized.call_id = idRemap.get(sanitized.call_id);
 		}
 		return sanitized;
 	});
 }
-/**
- * Retries: 2s, 4s, 8s (with jitter)
- */
-function calculateRetryDelay(attempt: number): number {
-	const exponentialDelay = RATE_LIMIT_CONFIG.baseDelayMs * 2 ** attempt;
-	const jitter = Math.random() * 1000; // Add 0-1s random jitter
-	const delay = Math.min(
-		exponentialDelay + jitter,
-		RATE_LIMIT_CONFIG.maxDelayMs,
-	);
-	return Math.round(delay);
-}
 export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 	// Initialize logger with the SDK client
 	setLogger(sdk);
@@ -742,10 +691,14 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 									])
 									.filter(Boolean);
 								if (rawIds.length > 0) {
-									log("debug", "[ID-SANITIZE] Raw input IDs before sanitization", {
-										ids: rawIds,
-										count: rawIds.length,
-									});
+									log(
+										"debug",
+										"[ID-SANITIZE] Raw input IDs before sanitization",
+										{
+											ids: rawIds,
+											count: rawIds.length,
+										},
+									);
 								}
 								// Sanitize IDs from Copilot backend:
@@ -753,7 +706,9 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 								// 2. Excessive length — Copilot returns 400+ char IDs (max is 64)
 								const sanitizedInput = sanitizeResponseInputIds(body.input);
 								const refDiffers = sanitizedInput !== body.input;
-								const jsonDiffers = refDiffers && JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
+								const jsonDiffers =
+									refDiffers &&
+									JSON.stringify(sanitizedInput) !== JSON.stringify(body.input);
 								const inputWasSanitized = refDiffers && jsonDiffers;
 								log("debug", "[ID-SANITIZE] Sanitization result", {
@@ -764,26 +719,40 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 								if (inputWasSanitized) {
 									const fixes = body.input
-										.map((item: any, i: number) => ({ item, i, si: sanitizedInput[i] }))
-										.filter(({ item, si }: any) =>
-											item && si && (item.id !== si.id || item.call_id !== si.call_id),
+										.map((item: any, i: number) => ({
+											item,
+											i,
+											si: sanitizedInput[i],
+										}))
+										.filter(
+											({ item, si }: any) =>
+												item &&
+												si &&
+												(item.id !== si.id || item.call_id !== si.call_id),
 										);
-									log("info", "[ID-SANITIZE] Fixed IDs in Responses API input", {
-										items_fixed: fixes.length,
-										fixes: fixes.map(({ item, si }: any) => ({
-											type: item.type,
-											old_id: item.id,
-											new_id: si?.id,
-											old_call_id: item.call_id,
-											new_call_id: si?.call_id,
-										})),
-									});
+									log(
+										"info",
+										"[ID-SANITIZE] Fixed IDs in Responses API input",
+										{
+											items_fixed: fixes.length,
+											fixes: fixes.map(({ item, si }: any) => ({
+												type: item.type,
+												old_id: item.id,
+												new_id: si?.id,
+												old_call_id: item.call_id,
+												new_call_id: si?.call_id,
+											})),
+										},
+									);
 									modifiedBody = {
 										...(modifiedBody || body),
 										input: sanitizedInput,
 									};
 								} else {
-									log("debug", "[ID-SANITIZE] No sanitization needed — all IDs valid");
+									log(
+										"debug",
+										"[ID-SANITIZE] No sanitization needed — all IDs valid",
+									);
 								}
 								isAgentCall = (sanitizedInput || body.input).some(
@@ -856,369 +825,60 @@ export const CopilotAuthPlugin: Plugin = async ({ client: sdk }) => {
 							currentModel = bodyObj?.model || "";
 						} catch {}
-						// Pre-flight: if current model is already known rate-limited, switch to fallback
-						let activeFinalInit: RequestInit = finalInit;
-						const attemptedModels = new Set<string>();
-						if (currentModel) attemptedModels.add(currentModel);
-						const requestedModel = currentModel;
+						// Pre-flight: fail fast if current model family is cooling down
+						const activeFinalInit: RequestInit = finalInit;
 						if (currentModel) {
-							const circuitRemainingMs =
-								getFamilyCircuitRemainingMs(currentModel);
-							if (circuitRemainingMs > 0) {
-								if (
-									circuitRemainingMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
-								) {
-									log(
-										"info",
-										`Family circuit open for ${currentModel}, waiting ${circuitRemainingMs}ms`,
-									);
-									await sleep(circuitRemainingMs);
-								} else {
-									throw new Error(
-										`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(circuitRemainingMs / 1000))}.`,
-									);
-								}
-							}
-						}
-						if (currentModel && isModelRateLimited(currentModel)) {
-							const fallback = getNextFallbackModel(
-								currentModel,
-								attemptedModels,
+							const familyCooldownMs = Math.max(
+								getFamilyCircuitRemainingMs(currentModel),
+								getFamilyMaxCooldownRemainingMs(currentModel),
 							);
-							if (fallback) {
-								log(
-									"info",
-									`Model ${currentModel} is rate-limited, pre-switching to ${fallback}`,
+							if (familyCooldownMs > 0) {
+								throw new Error(
+									`[Copilot] Rate limited: all fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
 								);
-								activeFinalInit =
-									swapModelInBody(finalInit, fallback) || finalInit;
-								currentModel = fallback;
-								attemptedModels.add(fallback);
-							} else {
-								const familyCooldownMs =
-									getFamilyMaxCooldownRemainingMs(currentModel);
-								openFamilyCircuitBreaker(currentModel, familyCooldownMs);
-								if (
-									familyCooldownMs <= CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
-								) {
-									log(
-										"info",
-										`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms`,
-									);
-									await sleep(familyCooldownMs);
-									attemptedModels.clear();
-									if (currentModel) attemptedModels.add(currentModel);
-								} else {
-									throw new Error(
-										`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
-									);
-								}
 							}
 						}
-						// Retry logic with model fallback and exponential backoff for rate limiting
-						let lastError: Error | undefined;
-						let fallbacksUsed = 0;
-						let attempt = 0;
-						let recoveryCyclesUsed = 0;
-						let attempted400Recovery = false;
-						while (attempt <= RATE_LIMIT_CONFIG.maxRetries) {
-							try {
-								if (currentModel) {
-									await shapeRequestForModel(currentModel);
-								}
-								const response = await fetch(input, activeFinalInit);
-								if (response.status === 429) {
-									try {
-										await response.body?.cancel();
-									} catch {}
-									// Parse Retry-After header for server-suggested cooldown
-									const retryAfterMs = parseRetryAfter(response);
-									const cooldownMs =
-										retryAfterMs ?? RATE_LIMIT_CONFIG.defaultCooldownMs;
-									// Mark this model as rate-limited
-									if (currentModel) {
-										markModelRateLimited(currentModel, cooldownMs);
-									}
-									// Try fallback model (doesn't count against retry budget)
-									if (
-										currentModel &&
-										fallbacksUsed < RATE_LIMIT_CONFIG.maxFallbacks
-									) {
-										const fallback = getNextFallbackModel(
-											currentModel,
-											attemptedModels,
-										);
-										if (fallback) {
-											log(
-												"warn",
-												`Rate limited on ${currentModel}, switching to ${fallback}`,
-												{
-													retry_after_ms: retryAfterMs,
-													cooldown_ms: cooldownMs,
-													fallbacks_used: fallbacksUsed + 1,
-												},
-											);
-											activeFinalInit =
-												swapModelInBody(activeFinalInit, fallback) ||
-												activeFinalInit;
-											currentModel = fallback;
-											attemptedModels.add(fallback);
-											fallbacksUsed++;
-											continue; // Retry immediately with new model, no delay
-										}
-									}
-									// No fallback available — use exponential backoff on same model
-									if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
-										if (
-											currentModel &&
-											isEntireModelFamilyCoolingDown(currentModel)
-										) {
-											const familyCooldownMs =
-												getFamilyMaxCooldownRemainingMs(currentModel);
-											openFamilyCircuitBreaker(currentModel, familyCooldownMs);
-											if (
-												familyCooldownMs <=
-												CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs
-											) {
-												log(
-													"info",
-													`All fallback models cooling down for ${currentModel}, waiting ${familyCooldownMs}ms before retry`,
-												);
-												await sleep(familyCooldownMs);
-												attemptedModels.clear();
-												if (currentModel) attemptedModels.add(currentModel);
-												attempt++;
-												continue;
-											}
-											throw new Error(
-												`[Copilot] All fallback models cooling down. Retry in ${formatRetryAfter(Math.ceil(familyCooldownMs / 1000))}.`,
-											);
-										}
-										const modelCooldownMs = currentModel
-											? getRateLimitRemainingMs(currentModel)
-											: null;
-										const delay = Math.min(
-											modelCooldownMs ??
-												retryAfterMs ??
-												calculateRetryDelay(attempt),
-											RATE_LIMIT_CONFIG.maxDelayMs,
-										);
-										log(
-											"warn",
-											`Rate limited (429), no fallback available, waiting ${delay}ms`,
-											{
-												delay_ms: delay,
-												attempt: attempt + 1,
-												max_retries: RATE_LIMIT_CONFIG.maxRetries,
-												fallbacks_exhausted: true,
-											},
-										);
-										await sleep(delay);
-										attemptedModels.clear();
-										if (currentModel) attemptedModels.add(currentModel);
-										attempt++;
-										continue;
-									}
-									// Exhausted retries and fallbacks
-									if (currentModel) {
-										const familyCooldownMs =
-											getFamilyMaxCooldownRemainingMs(currentModel);
-										const recoveryDelayMs =
-											familyCooldownMs > 0
-												? Math.min(
-														familyCooldownMs,
-														CIRCUIT_BREAKER_CONFIG.maxInlineWaitMs,
-													)
-												: calculateRetryDelay(0);
-										if (
-											recoveryDelayMs > 0 &&
-											recoveryCyclesUsed <
-												CIRCUIT_BREAKER_CONFIG.maxRecoveryCycles
-										) {
-											recoveryCyclesUsed++;
-											log(
-												"info",
-												`Rate-limit budget exhausted for ${currentModel}, waiting ${recoveryDelayMs}ms for recovery cycle ${recoveryCyclesUsed}`,
-											);
-											await sleep(recoveryDelayMs);
-											attempt = 0;
-											fallbacksUsed = 0;
-											if (requestedModel) {
-												currentModel = requestedModel;
-												activeFinalInit =
-													swapModelInBody(finalInit, requestedModel) ||
-													finalInit;
-											}
-											attemptedModels.clear();
-											if (currentModel) attemptedModels.add(currentModel);
-											continue;
-										}
-									}
-									throw new Error(
-										`[Copilot] Rate limited. Tried ${fallbacksUsed} fallback model(s) and ${attempt} retries. Model: ${currentModel}`,
-									);
-								}
-								// Handle 400 Bad Request with auto-recovery
-								if (response.status === 400 && !attempted400Recovery) {
-									let errorDetail = "Bad Request";
-									try {
-										const clonedResponse = response.clone();
-										const errorBody = await clonedResponse.json();
-										errorDetail =
-											errorBody?.error?.message ||
-											errorBody?.message ||
-											"Bad Request";
-									} catch {}
-									log(
-										"warn",
-										`[400-RECOVERY] Bad Request from Copilot API`,
-										{
-											model: currentModel,
-											error_detail: errorDetail,
-											attempt,
-										},
-									);
-									// Check for recoverable 400 causes
-									const isThinkingBlockError =
-										/thinking.?block|invalid.*signature|reasoning.*invalid/i.test(
-											errorDetail,
-										);
-									const isIdError =
-										/invalid.*\bid\b|item.*\bid\b|unknown.*\bid\b|malformed.*\bid\b/i.test(
-											errorDetail,
-										);
-									if (isThinkingBlockError || isIdError) {
-										let bodyObj: any;
-										try {
-											bodyObj =
-												typeof activeFinalInit.body === "string"
-													? JSON.parse(activeFinalInit.body)
-													: activeFinalInit.body;
-										} catch {
-											// Can't parse body — not recoverable
-											log(
-												"warn",
-												`[400-RECOVERY] Cannot parse request body, giving up`,
-											);
-											return response;
-										}
-										// Cancel original response body only after confirming we can recover
-										try {
-											await response.body?.cancel();
-										} catch {}
-										if (isThinkingBlockError && bodyObj?.messages) {
-											// Strip ALL thinking/reasoning content aggressively
-											bodyObj.messages = bodyObj.messages.map(
-												(msg: any) => {
-													if (msg.role !== "assistant") return msg;
-													const {
-														reasoning_text: _rt,
-														reasoning_opaque: _ro,
-														...cleaned
-													} = msg;
-													if (Array.isArray(cleaned.content)) {
-														cleaned.content = cleaned.content.filter(
-															(part: any) => part.type !== "thinking",
-														);
-														if (cleaned.content.length === 0)
-															cleaned.content = null;
-													}
-													return cleaned;
-												},
-											);
-											delete bodyObj.thinking_budget;
-											recovered = true;
-											log(
-												"info",
-												`[400-RECOVERY] Stripped all thinking/reasoning content for retry`,
-											);
-										}
-										if (isIdError && bodyObj?.input) {
-											bodyObj.input = sanitizeResponseInputIds(
-												bodyObj.input,
-											);
-											recovered = true;
-											log(
-												"info",
-												`[400-RECOVERY] Re-sanitized Responses API IDs for retry`,
-											);
-										}
+						try {
+							if (currentModel) {
+								await shapeRequestForModel(currentModel);
+							}
+							const response = await fetch(input, activeFinalInit);
-										if (recovered) {
-											attempted400Recovery = true;
-											activeFinalInit = {
-												...activeFinalInit,
-												body: JSON.stringify(bodyObj),
-											};
-											attempt++;
-											continue;
-										}
-									}
+							if (response.status === 429) {
+								try {
+									await response.body?.cancel();
+								} catch {}
-									// Not recoverable — log detail and return original response
-									log(
-										"warn",
-										`[400-RECOVERY] Non-recoverable 400: ${errorDetail}`,
-									);
-								}
+								const retryAfterMs = parseRetryAfter(response);
+								const cooldownMs = clampCooldownMs(
+									retryAfterMs,
+									RATE_LIMIT_CONFIG.defaultCooldownMs,
+								);
-								// Response transformation is handled by the custom SDK at
-								// .opencode/plugin/sdk/copilot/
-								return response;
-							} catch (error) {
-								lastError = error as Error;
-								if (
-									lastError.message.includes(
-										"All fallback models cooling down",
-									) ||
-									lastError.message.includes("Local request queue saturated")
-								) {
-									throw lastError;
+								if (currentModel) {
+									markModelRateLimited(currentModel, cooldownMs);
+									openFamilyCircuitBreaker(currentModel, cooldownMs);
 								}
-								// Network errors might be transient, retry
-								if (attempt < RATE_LIMIT_CONFIG.maxRetries) {
-									const delay = calculateRetryDelay(attempt);
-									log("warn", `Request failed, retrying`, {
-										delay_ms: delay,
-										attempt: attempt + 1,
-										max_retries: RATE_LIMIT_CONFIG.maxRetries,
-										error: lastError.message,
-									});
-									await sleep(delay);
-									attempt++;
-									continue;
-								}
-								throw error;
+								throw new Error(
+									`[Copilot] Rate limited: ${currentModel || "model"} cooling down. Retry in ${formatRetryAfter(Math.ceil(cooldownMs / 1000))}.`,
+								);
 							}
-						}
-						// Exhausted all retries
-						if (lastError) {
-							throw new Error(
-								`[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded. Last error: ${lastError.message}`,
-							);
+							// Response transformation is handled by the custom SDK at
+							// .opencode/plugin/sdk/copilot/
+							return response;
+						} catch (error) {
+							const lastError = error as Error;
+							if (
+								lastError.message.includes("Rate limited") ||
+								lastError.message.includes("Local request queue saturated")
+							) {
+								throw lastError;
+							}
+							throw error;
 						}
-						throw new Error(
-							`[Copilot] Max retries (${RATE_LIMIT_CONFIG.maxRetries}) exceeded`,
-						);
 					},
 				};
 			},

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "opencodekit",
-	"version": "0.18.25",
+	"version": "0.18.26",
 	"description": "CLI tool for bootstrapping and managing OpenCodeKit projects",
 	"keywords": [
 		"agents",