npm - @link-assistant/hive-mind - Versions diffs - 1.56.5 → 1.56.7 - Mend

@link-assistant/hive-mind 1.56.5 → 1.56.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CHANGELOG.md +13 -0
package/package.json +2 -2
package/src/agent.lib.mjs +31 -4
package/src/auto-iteration-limits.lib.mjs +33 -0
package/src/claude.lib.mjs +9 -4
package/src/codex.lib.mjs +47 -5
package/src/hive.config.lib.mjs +1 -1
package/src/hive.mjs +3 -0
package/src/models/index.mjs +17 -0
package/src/opencode.lib.mjs +28 -6
package/src/option-suggestions.lib.mjs +1 -0
package/src/solve.auto-continue.lib.mjs +14 -0
package/src/solve.auto-merge.lib.mjs +91 -24
package/src/solve.config.lib.mjs +25 -3
package/src/solve.error-handlers.lib.mjs +1 -1
package/src/solve.execution.lib.mjs +1 -1
package/src/solve.mjs +12 -15
package/src/solve.pre-pr-failure-notifier.lib.mjs +1 -1
package/src/solve.results.lib.mjs +14 -8
package/src/solve.watch.lib.mjs +14 -9
package/src/telegram-bot.mjs +32 -42
package/src/telegram-solve-command.lib.mjs +58 -0
package/src/tool-retry.lib.mjs +118 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # @link-assistant/hive-mind
+## 1.56.7
+### Patch Changes
+- 37c895c: Retry capacity-related tool failures with exponential backoff and support fallback models for Codex, Claude, OpenCode, and Agent resumes.
+- 16f341d: Limit automatic restart/resume loops to five iterations by default and avoid pre-restart branch sync when local merge state must be resolved by the AI session.
+## 1.56.6
+### Patch Changes
+- e4037e1: Support Telegram solve and hive commands when options are placed before the GitHub URL.
 ## 1.56.5
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.56.5",
+  "version": "1.56.7",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",
@@ -15,7 +15,7 @@
     "hive-telegram-bot": "./src/telegram-bot.mjs"
   },
   "scripts": {
-    "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
+    "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-auto-restart-limits-1664.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
     "test:queue": "node tests/solve-queue.test.mjs",
     "test:limits-display": "node tests/limits-display.test.mjs",
     "test:usage-limit": "node tests/test-usage-limit.mjs",

package/src/agent.lib.mjs CHANGED Viewed

@@ -15,13 +15,14 @@ const os = (await use('os')).default;
 // Import log from general lib
 import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
-import { timeouts } from './config.lib.mjs';
+import { timeouts, retryLimits } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import Decimal from 'decimal.js-light';
 import { agentModels, defaultModels, freeToBaseModelMap } from './models/index.mjs';
 import { checkPlaywrightMcpPackageAvailability, getAgentPlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
 import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage } from './agent-token-usage.lib.mjs';
+import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 export { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage };
@@ -410,10 +411,9 @@ export const executeAgent = async params => {
 };
 export const executeAgentCommand = async params => {
-  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $ } = params;
+  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $, waitForRetryDelay = waitWithCountdown } = params;
   // Retry configuration
-  const maxRetries = 3;
   let retryCount = 0;
   const executeWithRetry = async () => {
@@ -421,7 +421,7 @@ export const executeAgentCommand = async params => {
     if (retryCount === 0) {
       await log(`\n${formatAligned('🤖', 'Executing Agent:', argv.model.toUpperCase())}`);
     } else {
-      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
+      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
     }
     if (argv.verbose) {
@@ -470,6 +470,11 @@ export const executeAgentCommand = async params => {
       agentArgs += ' --verbose';
     }
+    if (argv.resume) {
+      await log(`🔄 Resuming from session: ${argv.resume}`);
+      agentArgs += ` --resume ${argv.resume} --no-fork`;
+    }
     // Agent supports stdin in both plain text and JSON format
     // We'll combine system and user prompts into a single message
     const combinedPrompt = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt;
@@ -783,6 +788,28 @@ export const executeAgentCommand = async params => {
       }
       if (exitCode !== 0 || outputError.detected) {
+        const retryableError = classifyRetryableError(outputError.match || streamingErrorMessage || lastMessage || fullOutput);
+        if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'agent', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
+        }
         // Build JSON error structure for consistent error reporting
         const errorInfo = {
           type: 'error',

package/src/auto-iteration-limits.lib.mjs ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env node
+export const DEFAULT_AUTO_ITERATION_LIMIT = 5;
+export const normalizeAutoIterationLimit = (value, fallback = DEFAULT_AUTO_ITERATION_LIMIT) => {
+  if (value === 0 || value === '0') return 0;
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 1) return fallback;
+  return Math.floor(parsed);
+};
+export const normalizeAutoIterationCounter = value => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 0) return 0;
+  return Math.floor(parsed);
+};
+export const hasReachedAutoIterationLimit = (completedIterations, maxIterations) => {
+  const normalizedMax = normalizeAutoIterationLimit(maxIterations);
+  if (normalizedMax === 0) return false;
+  return normalizeAutoIterationCounter(completedIterations) >= normalizedMax;
+};
+export const formatAutoIterationLimit = maxIterations => {
+  const normalizedMax = normalizeAutoIterationLimit(maxIterations);
+  return normalizedMax === 0 ? 'unlimited' : `${normalizedMax}`;
+};
+export const shouldSyncBeforeRestart = ({ hasUncommittedChanges }) => !hasUncommittedChanges;

package/src/claude.lib.mjs CHANGED Viewed

@@ -24,6 +24,7 @@ import { buildMcpConfigWithoutPlaywright } from './playwright-mcp.lib.mjs';
 import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
 import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
 import { fetchModelInfo } from './model-info.lib.mjs';
+import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
 export { availableModels }; // Re-export for backward compatibility
 export { fetchModelInfo };
 const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
@@ -1148,8 +1149,9 @@ export const executeClaudeCommand = async params => {
       // Issue #817: Stop bidirectional mode monitoring and collect queued feedback
       queuedFeedback = await finalizeBidirectionalHandler(bidirectionalHandler, log);
+      const retryableLastError = classifyRetryableError(lastMessage);
       // Issues #1331, #1353, #1472/#1475: Unified transient error retry (exponential backoff, session preservation)
-      const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
+      const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || retryableLastError.isRetryable || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
       if ((commandFailed || isTransientError) && isTransientError) {
         // Issue #1472/#1475: Startup/activity timeout → 30s–2min backoff; #1353: Request timeout → 5min–1hr; general → 2min–30min
         const isTimeoutRetry = isStartupTimeout || isActivityTimeout;
@@ -1178,7 +1180,7 @@ export const executeClaudeCommand = async params => {
         }
         if (retryCount < maxRetries) {
           const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
-          const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
+          const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : retryableLastError.label || (isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
           const notRetryableHint = apiMarkedNotRetryable ? ' (API says not retryable — will stop early if no progress)' : '';
           const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
           const retryMode = isStartupTimeout ? ' (fresh start)' : ' (session preserved)';
@@ -1199,6 +1201,7 @@ export const executeClaudeCommand = async params => {
           }
           // Activity timeout preserves session (work was started), startup timeout does not (no session created)
           if (!isStartupTimeout && sessionId && !argv.resume) argv.resume = sessionId;
+          await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: retryableLastError.message || lastMessage });
           await waitWithCountdown(delay, log);
           await log('\n🔄 Retrying now...');
           retryCount++;
@@ -1375,11 +1378,12 @@ export const executeClaudeCommand = async params => {
         operation: 'run_claude_command',
       });
       const errorStr = error.message || error.toString();
+      const retryableException = classifyRetryableError(errorStr);
       // Issue #1331: Unified handler for all transient API errors in exception block
       // Issue #1353: Also handle "Request timed out" in exception block
       // (Overloaded, 503, Internal Server Error, Request timed out) - all with session preservation
       const isTimeoutException = errorStr === 'Request timed out' || errorStr.includes('Request timed out');
-      const isTransientException = isTimeoutException || (errorStr.includes('API Error: 500') && (errorStr.includes('Overloaded') || errorStr.includes('Internal server error'))) || (errorStr.includes('API Error: 529') && (errorStr.includes('overloaded_error') || errorStr.includes('Overloaded'))) || (errorStr.includes('api_error') && errorStr.includes('Overloaded')) || (errorStr.includes('overloaded_error') && errorStr.includes('Overloaded')) || errorStr.includes('API Error: 503') || (errorStr.includes('503') && (errorStr.includes('upstream connect error') || errorStr.includes('remote connection failure')));
+      const isTransientException = isTimeoutException || retryableException.isRetryable;
       if (isTransientException) {
         // Issue #1353: Use timeout-specific backoff for request timeouts
         const maxRetries = isTimeoutException ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
@@ -1387,9 +1391,10 @@ export const executeClaudeCommand = async params => {
         const maxDelay = isTimeoutException ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs;
         if (retryCount < maxRetries) {
           const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
-          const errorLabel = isTimeoutException ? 'Request timeout' : errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
+          const errorLabel = isTimeoutException ? 'Request timeout' : retryableException.label || (errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
           await log(`\n⚠️ ${errorLabel} in exception. Retry ${retryCount + 1}/${maxRetries} in ${Math.round(delay / 60000)} min (session preserved)...`, { level: 'warning' });
           if (sessionId && !argv.resume) argv.resume = sessionId;
+          await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: errorStr });
           await waitWithCountdown(delay, log);
           await log('\n🔄 Retrying now...');
           retryCount++;

package/src/codex.lib.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ const os = (await use('os')).default;
 // Import log from general lib
 import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
-import { timeouts } from './config.lib.mjs';
+import { timeouts, retryLimits } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { mapModelToId, resolveCodexReasoningEffort } from './codex.options.lib.mjs';
@@ -24,6 +24,7 @@ import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
 import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs';
 import { fetchModelInfo } from './model-info.lib.mjs';
 import { defaultModels } from './models/index.mjs';
+import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 import Decimal from 'decimal.js-light';
 const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -648,12 +649,11 @@ export const executeCodex = async params => {
 };
 export const executeCodexCommand = async params => {
-  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing } = params;
+  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing, waitForRetryDelay = waitWithCountdown } = params;
   const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
   // Retry configuration
-  const maxRetries = 3;
   let retryCount = 0;
   const executeWithRetry = async () => {
@@ -661,7 +661,7 @@ export const executeCodexCommand = async params => {
     if (retryCount === 0) {
       await log(`\n${formatAligned('🤖', 'Executing Codex:', argv.model.toUpperCase())}`);
     } else {
-      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
+      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
     }
     if (argv.verbose) {
@@ -711,7 +711,7 @@ export const executeCodexCommand = async params => {
     let codexArgs = 'exec';
     if (isResumeMode) {
       await log(`🔄 Resuming from session: ${argv.resume}`);
-      codexArgs += ` resume ${shellQuote(argv.resume)}`;
+      codexArgs += ` resume ${shellQuote(argv.resume)} --model ${shellQuote(mappedModel)}`;
     } else {
       codexArgs += ` --model ${shellQuote(mappedModel)}`;
     }
@@ -930,6 +930,7 @@ export const executeCodexCommand = async params => {
       const codexErrorSummary = getCodexErrorEventSummary(codexJsonState);
       if (codexErrorSummary.hasError) {
         const limitInfo = detectUsageLimit(codexErrorSummary.message || lastMessage);
+        const retryableError = classifyRetryableError(codexErrorSummary.message || lastMessage);
         if (limitInfo.isUsageLimit) {
           limitReached = true;
           limitResetTime = limitInfo.resetTime;
@@ -944,6 +945,25 @@ export const executeCodexCommand = async params => {
           for (const line of messageLines) {
             await log(line, { level: 'warning' });
           }
+        } else if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
         } else {
           await log(`\n\n❌ Codex emitted error event: ${codexErrorSummary.message}`, { level: 'error' });
           await log(`   Error events: item=${codexErrorSummary.counts.item}, turn=${codexErrorSummary.counts.turn}, stream=${codexErrorSummary.counts.stream}`, { level: 'error' });
@@ -971,6 +991,28 @@ export const executeCodexCommand = async params => {
       }
       if (exitCode !== 0) {
+        const retryableError = classifyRetryableError(lastMessage);
+        if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
+        }
         // Check for usage limit errors first (more specific)
         const limitInfo = detectUsageLimit(lastMessage);
         if (limitInfo.isUsageLimit) {

package/src/hive.config.lib.mjs CHANGED Viewed

@@ -12,7 +12,7 @@ const HIVE_ONLY_OPTION_NAMES = new Set(['monitor-tag', 'all-issues', 'skip-issue
 // Solve-only options that should NOT be registered in hive
 // (they are internal to solve and not meaningful when passed from hive)
-const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type']);
+const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type', 'auto-resume-iteration']);
 // Options that hive defines with different defaults/descriptions than solve.
 // These are registered manually in hive config to preserve hive-specific behavior.

package/src/hive.mjs CHANGED Viewed

@@ -464,6 +464,9 @@ if (isRunningDirectly) {
     // Validate model names EARLY (simple string check, always runs)
     const tool = argv.tool || 'claude';
     await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
+    if (argv.fallbackModel) {
+      await validateAndExitOnInvalidModel(argv.fallbackModel, tool, safeExit);
+    }
     if (argv.planModel) {
       if (tool !== 'claude') {
         await log(`❌ --plan-model is only supported with --tool claude (current tool: ${tool})`, { level: 'error' });

package/src/models/index.mjs CHANGED Viewed

@@ -905,6 +905,23 @@ export const resolveModelId = (requestedModel, tool) => {
   }
 };
+export const defaultFallbackModels = {
+  claude: {
+    'claude-opus-4-7': 'opus-4-6',
+  },
+  codex: {
+    'gpt-5.5': 'gpt-5.4',
+  },
+};
+export const resolveDefaultFallbackModel = (tool, model) => {
+  if (!model) return null;
+  const toolName = (tool || 'claude').toString().toLowerCase();
+  const resolvedModel = resolveModelId(model, toolName);
+  return defaultFallbackModels[toolName]?.[resolvedModel] || null;
+};
 /**
  * Fetch model info and build the complete model information string for PR comments.
  * Uses actual models from CLI JSON output when available.

package/src/opencode.lib.mjs CHANGED Viewed

@@ -15,13 +15,14 @@ const os = (await use('os')).default;
 // Import log from general lib
 import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
-import { timeouts } from './config.lib.mjs';
+import { timeouts, retryLimits } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { opencodeModels, defaultModels } from './models/index.mjs';
 import { checkPlaywrightMcpPackageAvailability, getOpenCodePlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
 import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage as parseOpenCodeTokenUsage } from './agent-token-usage.lib.mjs';
 import { calculateAgentPricing } from './agent.lib.mjs';
+import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 export { parseOpenCodeTokenUsage };
@@ -184,10 +185,9 @@ export const executeOpenCode = async params => {
 };
 export const executeOpenCodeCommand = async params => {
-  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $ } = params;
+  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $, waitForRetryDelay = waitWithCountdown } = params;
   // Retry configuration
-  const maxRetries = 3;
   let retryCount = 0;
   const executeWithRetry = async () => {
@@ -195,7 +195,7 @@ export const executeOpenCodeCommand = async params => {
     if (retryCount === 0) {
       await log(`\n${formatAligned('🤖', 'Executing OpenCode:', argv.model.toUpperCase())}`);
     } else {
-      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
+      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
     }
     if (argv.verbose) {
@@ -265,7 +265,7 @@ export const executeOpenCodeCommand = async params => {
     if (argv.resume) {
       await log(`🔄 Resuming from session: ${argv.resume}`);
-      opencodeArgs = `run --format json --resume ${argv.resume} --model ${mappedModel}`;
+      opencodeArgs = `run --format json --session ${argv.resume} --model ${mappedModel}`;
     }
     // For OpenCode, we pass the prompt via stdin
@@ -301,7 +301,7 @@ export const executeOpenCodeCommand = async params => {
           cwd: tempDir,
           mirror: false,
           env: opencodeEnv,
-        })`cat ${promptFile} | ${opencodePath} run --format json --resume ${argv.resume} --model ${mappedModel}`;
+        })`cat ${promptFile} | ${opencodePath} run --format json --session ${argv.resume} --model ${mappedModel}`;
       } else {
         execCommand = $({
           cwd: tempDir,
@@ -470,6 +470,28 @@ export const executeOpenCodeCommand = async params => {
       }
       if (exitCode !== 0) {
+        const retryableError = classifyRetryableError(allOutput || lastMessage);
+        if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'opencode', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
+        }
         // Check for usage limit errors first (more specific)
         const limitInfo = detectUsageLimit(lastMessage);
         if (limitInfo.isUsageLimit) {

package/src/option-suggestions.lib.mjs CHANGED Viewed

@@ -203,6 +203,7 @@ const KNOWN_OPTION_NAMES = [
   'allow-to-push-to-contributors-pull-requests-as-maintainer',
   'prefix-fork-name-with-owner-name',
   'auto-restart-max-iterations',
+  'auto-resume-max-iterations',
   'auto-continue-only-on-new-comments',
   'auto-restart-on-limit-reset',
   'auto-restart-on-non-updated-pull-request-description',

package/src/solve.auto-continue.lib.mjs CHANGED Viewed

@@ -48,6 +48,7 @@ const { extractLinkedIssueNumber } = githubLinking;
 // Import configuration
 import { autoContinue, limitReset } from './config.lib.mjs';
+import { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationCounter, normalizeAutoIterationLimit } from './auto-iteration-limits.lib.mjs';
 // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
 const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
@@ -79,6 +80,15 @@ const formatWaitTime = ms => {
 // See: https://github.com/link-assistant/hive-mind/issues/1152
 export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, shouldAttachLogs, tempDir = null, isRestart = false) => {
   try {
+    const maxAutoResumeIterations = normalizeAutoIterationLimit(argv.autoResumeMaxIterations);
+    const currentAutoResumeIteration = normalizeAutoIterationCounter(argv.autoResumeIteration);
+    if (hasReachedAutoIterationLimit(currentAutoResumeIteration, maxAutoResumeIterations)) {
+      await log(`\n⚠️  Auto-${isRestart ? 'restart' : 'resume'} limit reached: ${currentAutoResumeIteration}/${formatAutoIterationLimit(maxAutoResumeIterations)}`);
+      await safeExit(1, `Auto-${isRestart ? 'restart' : 'resume'} limit reached`);
+    }
+    const nextAutoResumeIteration = currentAutoResumeIteration + 1;
     const resetTime = global.limitResetTime;
     const timezone = global.limitTimezone || null;
     const baseWaitMs = calculateWaitTime(resetTime);
@@ -125,6 +135,7 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
     const actionType = isRestart ? 'Restarting' : 'Resuming';
     await log(`\n✅ Limit reset time reached (+ ${bufferMinutes} min buffer + ${jitterSeconds}s jitter)! ${actionType} session...`);
     await log(`   Current time: ${new Date().toLocaleTimeString()}`);
+    await log(`   Auto-${isRestart ? 'restart' : 'resume'} iteration: ${maxAutoResumeIterations === 0 ? nextAutoResumeIteration : `${nextAutoResumeIteration}/${maxAutoResumeIterations}`}`);
     // Recursively call the solve script
     // For resume: use --resume with session ID to maintain context
@@ -153,6 +164,8 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
     if (argv.autoRestartOnLimitReset) {
       resumeArgs.push('--auto-restart-on-limit-reset');
     }
+    resumeArgs.push('--auto-resume-iteration', String(nextAutoResumeIteration));
+    resumeArgs.push('--auto-resume-max-iterations', String(maxAutoResumeIterations));
     // Pass session type for proper comment differentiation
     // See: https://github.com/link-assistant/hive-mind/issues/1152
@@ -162,6 +175,7 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
     // Preserve other flags from original invocation
     if (argv.tool && argv.tool !== 'claude') resumeArgs.push('--tool', argv.tool);
     if (argv.model !== 'sonnet') resumeArgs.push('--model', argv.model);
+    if (argv.fallbackModel) resumeArgs.push('--fallback-model', argv.fallbackModel);
     if (argv.verbose) resumeArgs.push('--verbose');
     if (argv.fork) resumeArgs.push('--fork');
     if (shouldAttachLogs) resumeArgs.push('--attach-logs');