npm - @link-assistant/hive-mind - Versions diffs - 1.56.6 → 1.56.7 - Mend

@link-assistant/hive-mind 1.56.6 → 1.56.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +7 -0
package/package.json +2 -2
package/src/agent.lib.mjs +31 -4
package/src/auto-iteration-limits.lib.mjs +33 -0
package/src/claude.lib.mjs +9 -4
package/src/codex.lib.mjs +47 -5
package/src/hive.config.lib.mjs +1 -1
package/src/hive.mjs +3 -0
package/src/models/index.mjs +17 -0
package/src/opencode.lib.mjs +28 -6
package/src/option-suggestions.lib.mjs +1 -0
package/src/solve.auto-continue.lib.mjs +14 -0
package/src/solve.auto-merge.lib.mjs +91 -24
package/src/solve.config.lib.mjs +25 -3
package/src/solve.error-handlers.lib.mjs +1 -1
package/src/solve.execution.lib.mjs +1 -1
package/src/solve.mjs +12 -15
package/src/solve.pre-pr-failure-notifier.lib.mjs +1 -1
package/src/solve.results.lib.mjs +14 -8
package/src/solve.watch.lib.mjs +14 -9
package/src/tool-retry.lib.mjs +118 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # @link-assistant/hive-mind
+## 1.56.7
+### Patch Changes
+- 37c895c: Retry capacity-related tool failures with exponential backoff and support fallback models for Codex, Claude, OpenCode, and Agent resumes.
+- 16f341d: Limit automatic restart/resume loops to five iterations by default and avoid pre-restart branch sync when local merge state must be resolved by the AI session.
 ## 1.56.6
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.56.6",
+  "version": "1.56.7",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",
@@ -15,7 +15,7 @@
     "hive-telegram-bot": "./src/telegram-bot.mjs"
   },
   "scripts": {
-    "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
+    "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-auto-restart-limits-1664.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
     "test:queue": "node tests/solve-queue.test.mjs",
     "test:limits-display": "node tests/limits-display.test.mjs",
     "test:usage-limit": "node tests/test-usage-limit.mjs",

package/src/agent.lib.mjs CHANGED Viewed

@@ -15,13 +15,14 @@ const os = (await use('os')).default;
 // Import log from general lib
 import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
-import { timeouts } from './config.lib.mjs';
+import { timeouts, retryLimits } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import Decimal from 'decimal.js-light';
 import { agentModels, defaultModels, freeToBaseModelMap } from './models/index.mjs';
 import { checkPlaywrightMcpPackageAvailability, getAgentPlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
 import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage } from './agent-token-usage.lib.mjs';
+import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 export { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage };
@@ -410,10 +411,9 @@ export const executeAgent = async params => {
 };
 export const executeAgentCommand = async params => {
-  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $ } = params;
+  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $, waitForRetryDelay = waitWithCountdown } = params;
   // Retry configuration
-  const maxRetries = 3;
   let retryCount = 0;
   const executeWithRetry = async () => {
@@ -421,7 +421,7 @@ export const executeAgentCommand = async params => {
     if (retryCount === 0) {
       await log(`\n${formatAligned('🤖', 'Executing Agent:', argv.model.toUpperCase())}`);
     } else {
-      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
+      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
     }
     if (argv.verbose) {
@@ -470,6 +470,11 @@ export const executeAgentCommand = async params => {
       agentArgs += ' --verbose';
     }
+    if (argv.resume) {
+      await log(`🔄 Resuming from session: ${argv.resume}`);
+      agentArgs += ` --resume ${argv.resume} --no-fork`;
+    }
     // Agent supports stdin in both plain text and JSON format
     // We'll combine system and user prompts into a single message
     const combinedPrompt = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt;
@@ -783,6 +788,28 @@ export const executeAgentCommand = async params => {
       }
       if (exitCode !== 0 || outputError.detected) {
+        const retryableError = classifyRetryableError(outputError.match || streamingErrorMessage || lastMessage || fullOutput);
+        if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'agent', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
+        }
         // Build JSON error structure for consistent error reporting
         const errorInfo = {
           type: 'error',

package/src/auto-iteration-limits.lib.mjs ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env node
+export const DEFAULT_AUTO_ITERATION_LIMIT = 5;
+export const normalizeAutoIterationLimit = (value, fallback = DEFAULT_AUTO_ITERATION_LIMIT) => {
+  if (value === 0 || value === '0') return 0;
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 1) return fallback;
+  return Math.floor(parsed);
+};
+export const normalizeAutoIterationCounter = value => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 0) return 0;
+  return Math.floor(parsed);
+};
+export const hasReachedAutoIterationLimit = (completedIterations, maxIterations) => {
+  const normalizedMax = normalizeAutoIterationLimit(maxIterations);
+  if (normalizedMax === 0) return false;
+  return normalizeAutoIterationCounter(completedIterations) >= normalizedMax;
+};
+export const formatAutoIterationLimit = maxIterations => {
+  const normalizedMax = normalizeAutoIterationLimit(maxIterations);
+  return normalizedMax === 0 ? 'unlimited' : `${normalizedMax}`;
+};
+export const shouldSyncBeforeRestart = ({ hasUncommittedChanges }) => !hasUncommittedChanges;

package/src/claude.lib.mjs CHANGED Viewed

@@ -24,6 +24,7 @@ import { buildMcpConfigWithoutPlaywright } from './playwright-mcp.lib.mjs';
 import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
 import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
 import { fetchModelInfo } from './model-info.lib.mjs';
+import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
 export { availableModels }; // Re-export for backward compatibility
 export { fetchModelInfo };
 const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
@@ -1148,8 +1149,9 @@ export const executeClaudeCommand = async params => {
       // Issue #817: Stop bidirectional mode monitoring and collect queued feedback
       queuedFeedback = await finalizeBidirectionalHandler(bidirectionalHandler, log);
+      const retryableLastError = classifyRetryableError(lastMessage);
       // Issues #1331, #1353, #1472/#1475: Unified transient error retry (exponential backoff, session preservation)
-      const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
+      const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || retryableLastError.isRetryable || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
       if ((commandFailed || isTransientError) && isTransientError) {
         // Issue #1472/#1475: Startup/activity timeout → 30s–2min backoff; #1353: Request timeout → 5min–1hr; general → 2min–30min
         const isTimeoutRetry = isStartupTimeout || isActivityTimeout;
@@ -1178,7 +1180,7 @@ export const executeClaudeCommand = async params => {
         }
         if (retryCount < maxRetries) {
           const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
-          const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
+          const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : retryableLastError.label || (isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
           const notRetryableHint = apiMarkedNotRetryable ? ' (API says not retryable — will stop early if no progress)' : '';
           const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
           const retryMode = isStartupTimeout ? ' (fresh start)' : ' (session preserved)';
@@ -1199,6 +1201,7 @@ export const executeClaudeCommand = async params => {
           }
           // Activity timeout preserves session (work was started), startup timeout does not (no session created)
           if (!isStartupTimeout && sessionId && !argv.resume) argv.resume = sessionId;
+          await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: retryableLastError.message || lastMessage });
           await waitWithCountdown(delay, log);
           await log('\n🔄 Retrying now...');
           retryCount++;
@@ -1375,11 +1378,12 @@ export const executeClaudeCommand = async params => {
         operation: 'run_claude_command',
       });
       const errorStr = error.message || error.toString();
+      const retryableException = classifyRetryableError(errorStr);
       // Issue #1331: Unified handler for all transient API errors in exception block
       // Issue #1353: Also handle "Request timed out" in exception block
       // (Overloaded, 503, Internal Server Error, Request timed out) - all with session preservation
       const isTimeoutException = errorStr === 'Request timed out' || errorStr.includes('Request timed out');
-      const isTransientException = isTimeoutException || (errorStr.includes('API Error: 500') && (errorStr.includes('Overloaded') || errorStr.includes('Internal server error'))) || (errorStr.includes('API Error: 529') && (errorStr.includes('overloaded_error') || errorStr.includes('Overloaded'))) || (errorStr.includes('api_error') && errorStr.includes('Overloaded')) || (errorStr.includes('overloaded_error') && errorStr.includes('Overloaded')) || errorStr.includes('API Error: 503') || (errorStr.includes('503') && (errorStr.includes('upstream connect error') || errorStr.includes('remote connection failure')));
+      const isTransientException = isTimeoutException || retryableException.isRetryable;
       if (isTransientException) {
         // Issue #1353: Use timeout-specific backoff for request timeouts
         const maxRetries = isTimeoutException ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
@@ -1387,9 +1391,10 @@ export const executeClaudeCommand = async params => {
         const maxDelay = isTimeoutException ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs;
         if (retryCount < maxRetries) {
           const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
-          const errorLabel = isTimeoutException ? 'Request timeout' : errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
+          const errorLabel = isTimeoutException ? 'Request timeout' : retryableException.label || (errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
           await log(`\n⚠️ ${errorLabel} in exception. Retry ${retryCount + 1}/${maxRetries} in ${Math.round(delay / 60000)} min (session preserved)...`, { level: 'warning' });
           if (sessionId && !argv.resume) argv.resume = sessionId;
+          await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: errorStr });
           await waitWithCountdown(delay, log);
           await log('\n🔄 Retrying now...');
           retryCount++;

package/src/codex.lib.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ const os = (await use('os')).default;
 // Import log from general lib
 import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
-import { timeouts } from './config.lib.mjs';
+import { timeouts, retryLimits } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { mapModelToId, resolveCodexReasoningEffort } from './codex.options.lib.mjs';
@@ -24,6 +24,7 @@ import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
 import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs';
 import { fetchModelInfo } from './model-info.lib.mjs';
 import { defaultModels } from './models/index.mjs';
+import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 import Decimal from 'decimal.js-light';
 const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -648,12 +649,11 @@ export const executeCodex = async params => {
 };
 export const executeCodexCommand = async params => {
-  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing } = params;
+  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing, waitForRetryDelay = waitWithCountdown } = params;
   const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
   // Retry configuration
-  const maxRetries = 3;
   let retryCount = 0;
   const executeWithRetry = async () => {
@@ -661,7 +661,7 @@ export const executeCodexCommand = async params => {
     if (retryCount === 0) {
       await log(`\n${formatAligned('🤖', 'Executing Codex:', argv.model.toUpperCase())}`);
     } else {
-      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
+      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
     }
     if (argv.verbose) {
@@ -711,7 +711,7 @@ export const executeCodexCommand = async params => {
     let codexArgs = 'exec';
     if (isResumeMode) {
       await log(`🔄 Resuming from session: ${argv.resume}`);
-      codexArgs += ` resume ${shellQuote(argv.resume)}`;
+      codexArgs += ` resume ${shellQuote(argv.resume)} --model ${shellQuote(mappedModel)}`;
     } else {
       codexArgs += ` --model ${shellQuote(mappedModel)}`;
     }
@@ -930,6 +930,7 @@ export const executeCodexCommand = async params => {
       const codexErrorSummary = getCodexErrorEventSummary(codexJsonState);
       if (codexErrorSummary.hasError) {
         const limitInfo = detectUsageLimit(codexErrorSummary.message || lastMessage);
+        const retryableError = classifyRetryableError(codexErrorSummary.message || lastMessage);
         if (limitInfo.isUsageLimit) {
           limitReached = true;
           limitResetTime = limitInfo.resetTime;
@@ -944,6 +945,25 @@ export const executeCodexCommand = async params => {
           for (const line of messageLines) {
             await log(line, { level: 'warning' });
           }
+        } else if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
         } else {
           await log(`\n\n❌ Codex emitted error event: ${codexErrorSummary.message}`, { level: 'error' });
           await log(`   Error events: item=${codexErrorSummary.counts.item}, turn=${codexErrorSummary.counts.turn}, stream=${codexErrorSummary.counts.stream}`, { level: 'error' });
@@ -971,6 +991,28 @@ export const executeCodexCommand = async params => {
       }
       if (exitCode !== 0) {
+        const retryableError = classifyRetryableError(lastMessage);
+        if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
+        }
         // Check for usage limit errors first (more specific)
         const limitInfo = detectUsageLimit(lastMessage);
         if (limitInfo.isUsageLimit) {

package/src/hive.config.lib.mjs CHANGED Viewed

@@ -12,7 +12,7 @@ const HIVE_ONLY_OPTION_NAMES = new Set(['monitor-tag', 'all-issues', 'skip-issue
 // Solve-only options that should NOT be registered in hive
 // (they are internal to solve and not meaningful when passed from hive)
-const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type']);
+const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type', 'auto-resume-iteration']);
 // Options that hive defines with different defaults/descriptions than solve.
 // These are registered manually in hive config to preserve hive-specific behavior.

package/src/hive.mjs CHANGED Viewed

@@ -464,6 +464,9 @@ if (isRunningDirectly) {
     // Validate model names EARLY (simple string check, always runs)
     const tool = argv.tool || 'claude';
     await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
+    if (argv.fallbackModel) {
+      await validateAndExitOnInvalidModel(argv.fallbackModel, tool, safeExit);
+    }
     if (argv.planModel) {
       if (tool !== 'claude') {
         await log(`❌ --plan-model is only supported with --tool claude (current tool: ${tool})`, { level: 'error' });

package/src/models/index.mjs CHANGED Viewed

@@ -905,6 +905,23 @@ export const resolveModelId = (requestedModel, tool) => {
   }
 };
+export const defaultFallbackModels = {
+  claude: {
+    'claude-opus-4-7': 'opus-4-6',
+  },
+  codex: {
+    'gpt-5.5': 'gpt-5.4',
+  },
+};
+export const resolveDefaultFallbackModel = (tool, model) => {
+  if (!model) return null;
+  const toolName = (tool || 'claude').toString().toLowerCase();
+  const resolvedModel = resolveModelId(model, toolName);
+  return defaultFallbackModels[toolName]?.[resolvedModel] || null;
+};
 /**
  * Fetch model info and build the complete model information string for PR comments.
  * Uses actual models from CLI JSON output when available.

package/src/opencode.lib.mjs CHANGED Viewed

@@ -15,13 +15,14 @@ const os = (await use('os')).default;
 // Import log from general lib
 import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
-import { timeouts } from './config.lib.mjs';
+import { timeouts, retryLimits } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { opencodeModels, defaultModels } from './models/index.mjs';
 import { checkPlaywrightMcpPackageAvailability, getOpenCodePlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
 import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage as parseOpenCodeTokenUsage } from './agent-token-usage.lib.mjs';
 import { calculateAgentPricing } from './agent.lib.mjs';
+import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 export { parseOpenCodeTokenUsage };
@@ -184,10 +185,9 @@ export const executeOpenCode = async params => {
 };
 export const executeOpenCodeCommand = async params => {
-  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $ } = params;
+  const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $, waitForRetryDelay = waitWithCountdown } = params;
   // Retry configuration
-  const maxRetries = 3;
   let retryCount = 0;
   const executeWithRetry = async () => {
@@ -195,7 +195,7 @@ export const executeOpenCodeCommand = async params => {
     if (retryCount === 0) {
       await log(`\n${formatAligned('🤖', 'Executing OpenCode:', argv.model.toUpperCase())}`);
     } else {
-      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
+      await log(`\n${formatAligned('🔄', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
     }
     if (argv.verbose) {
@@ -265,7 +265,7 @@ export const executeOpenCodeCommand = async params => {
     if (argv.resume) {
       await log(`🔄 Resuming from session: ${argv.resume}`);
-      opencodeArgs = `run --format json --resume ${argv.resume} --model ${mappedModel}`;
+      opencodeArgs = `run --format json --session ${argv.resume} --model ${mappedModel}`;
     }
     // For OpenCode, we pass the prompt via stdin
@@ -301,7 +301,7 @@ export const executeOpenCodeCommand = async params => {
           cwd: tempDir,
           mirror: false,
           env: opencodeEnv,
-        })`cat ${promptFile} | ${opencodePath} run --format json --resume ${argv.resume} --model ${mappedModel}`;
+        })`cat ${promptFile} | ${opencodePath} run --format json --session ${argv.resume} --model ${mappedModel}`;
       } else {
         execCommand = $({
           cwd: tempDir,
@@ -470,6 +470,28 @@ export const executeOpenCodeCommand = async params => {
       }
       if (exitCode !== 0) {
+        const retryableError = classifyRetryableError(allOutput || lastMessage);
+        if (retryableError.isRetryable) {
+          const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
+          const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
+          if (retryCount < maxRetries) {
+            const delay = getRetryDelayMs({
+              retryCount,
+              initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
+              maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
+            });
+            const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
+            await log(`\n⚠️ ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
+            if (sessionId && !argv.resume) argv.resume = sessionId;
+            await maybeSwitchToFallbackModel({ tool: 'opencode', argv, log, errorMessage: retryableError.message });
+            await waitForRetryDelay(delay, log);
+            await log('\n🔄 Retrying now...');
+            retryCount++;
+            return await executeWithRetry();
+          }
+          await log(`\n\n❌ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
+        }
         // Check for usage limit errors first (more specific)
         const limitInfo = detectUsageLimit(lastMessage);
         if (limitInfo.isUsageLimit) {

package/src/option-suggestions.lib.mjs CHANGED Viewed

@@ -203,6 +203,7 @@ const KNOWN_OPTION_NAMES = [
   'allow-to-push-to-contributors-pull-requests-as-maintainer',
   'prefix-fork-name-with-owner-name',
   'auto-restart-max-iterations',
+  'auto-resume-max-iterations',
   'auto-continue-only-on-new-comments',
   'auto-restart-on-limit-reset',
   'auto-restart-on-non-updated-pull-request-description',

package/src/solve.auto-continue.lib.mjs CHANGED Viewed

@@ -48,6 +48,7 @@ const { extractLinkedIssueNumber } = githubLinking;
 // Import configuration
 import { autoContinue, limitReset } from './config.lib.mjs';
+import { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationCounter, normalizeAutoIterationLimit } from './auto-iteration-limits.lib.mjs';
 // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
 const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
@@ -79,6 +80,15 @@ const formatWaitTime = ms => {
 // See: https://github.com/link-assistant/hive-mind/issues/1152
 export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, shouldAttachLogs, tempDir = null, isRestart = false) => {
   try {
+    const maxAutoResumeIterations = normalizeAutoIterationLimit(argv.autoResumeMaxIterations);
+    const currentAutoResumeIteration = normalizeAutoIterationCounter(argv.autoResumeIteration);
+    if (hasReachedAutoIterationLimit(currentAutoResumeIteration, maxAutoResumeIterations)) {
+      await log(`\n⚠️  Auto-${isRestart ? 'restart' : 'resume'} limit reached: ${currentAutoResumeIteration}/${formatAutoIterationLimit(maxAutoResumeIterations)}`);
+      await safeExit(1, `Auto-${isRestart ? 'restart' : 'resume'} limit reached`);
+    }
+    const nextAutoResumeIteration = currentAutoResumeIteration + 1;
     const resetTime = global.limitResetTime;
     const timezone = global.limitTimezone || null;
     const baseWaitMs = calculateWaitTime(resetTime);
@@ -125,6 +135,7 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
     const actionType = isRestart ? 'Restarting' : 'Resuming';
     await log(`\n✅ Limit reset time reached (+ ${bufferMinutes} min buffer + ${jitterSeconds}s jitter)! ${actionType} session...`);
     await log(`   Current time: ${new Date().toLocaleTimeString()}`);
+    await log(`   Auto-${isRestart ? 'restart' : 'resume'} iteration: ${maxAutoResumeIterations === 0 ? nextAutoResumeIteration : `${nextAutoResumeIteration}/${maxAutoResumeIterations}`}`);
     // Recursively call the solve script
     // For resume: use --resume with session ID to maintain context
@@ -153,6 +164,8 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
     if (argv.autoRestartOnLimitReset) {
       resumeArgs.push('--auto-restart-on-limit-reset');
     }
+    resumeArgs.push('--auto-resume-iteration', String(nextAutoResumeIteration));
+    resumeArgs.push('--auto-resume-max-iterations', String(maxAutoResumeIterations));
     // Pass session type for proper comment differentiation
     // See: https://github.com/link-assistant/hive-mind/issues/1152
@@ -162,6 +175,7 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
     // Preserve other flags from original invocation
     if (argv.tool && argv.tool !== 'claude') resumeArgs.push('--tool', argv.tool);
     if (argv.model !== 'sonnet') resumeArgs.push('--model', argv.model);
+    if (argv.fallbackModel) resumeArgs.push('--fallback-model', argv.fallbackModel);
     if (argv.verbose) resumeArgs.push('--verbose');
     if (argv.fork) resumeArgs.push('--fork');
     if (shouldAttachLogs) resumeArgs.push('--attach-logs');

package/src/solve.auto-merge.lib.mjs CHANGED Viewed

@@ -60,6 +60,7 @@ const { READY_TO_MERGE_MARKER, AUTO_RESTART_MARKER, AUTO_MERGED_MARKER, postTrac
 // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
 const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
+const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationLimit, shouldSyncBeforeRestart } = await import('./auto-iteration-limits.lib.mjs');
 /**
  * Main function: Watch and restart until PR becomes mergeable
@@ -73,6 +74,8 @@ export const watchUntilMergeable = async params => {
   const MIN_CI_CHECK_INTERVAL_SECONDS = 120;
   const watchInterval = Math.max(rawWatchInterval, MIN_CI_CHECK_INTERVAL_SECONDS);
   const isAutoMerge = argv.autoMerge || false;
+  const maxAutoRestartIterations = normalizeAutoIterationLimit(argv.autoRestartMaxIterations);
+  const maxAutoResumeIterations = normalizeAutoIterationLimit(argv.autoResumeMaxIterations);
   // Issue #1503/#1573/#1612: repo-wide action gating is opt-in strict mode.
   // The config default may be bypassed when this module is reused directly, so normalize here.
   const waitForAllRepoActionsFlag = argv.waitForAllActionsInRepositoryBeforeMergeable ?? argv['wait-for-all-actions-in-repository-before-mergeable'] ?? argv.waitForAllActionsInRepositoryBeforeMergable ?? argv['wait-for-all-actions-in-repository-before-mergable'] ?? false;
@@ -83,6 +86,7 @@ export const watchUntilMergeable = async params => {
   // Issue #1323: Track actual AI restarts separately from check cycle iterations
   let restartCount = 0;
+  let limitResumeCount = 0;
   // Issue #1371: In-memory dedup for "Ready to merge" comment (per-session, not all-time)
   let readyToMergeCommentPosted = false;
@@ -102,6 +106,8 @@ export const watchUntilMergeable = async params => {
   await log(formatAligned('', 'Mode:', isAutoMerge ? 'Auto-merge (will merge when ready)' : 'Auto-restart-until-mergeable (will NOT auto-merge)', 2));
   await log(formatAligned('', 'Checking interval:', `${watchInterval} seconds (minimum: ${MIN_CI_CHECK_INTERVAL_SECONDS}s)`, 2));
   await log(formatAligned('', 'Initial cooldown:', `${INITIAL_COOLDOWN_SECONDS} seconds`, 2));
+  await log(formatAligned('', 'Max restart iterations:', formatAutoIterationLimit(maxAutoRestartIterations), 2));
+  await log(formatAligned('', 'Max limit resumes:', formatAutoIterationLimit(maxAutoResumeIterations), 2));
   await log(formatAligned('', 'Wait for all repo actions:', waitForAllRepoActionsFlag ? 'Yes (strict repo-wide safety)' : 'No (PR-scoped CI only)', 2));
   await log(formatAligned('', 'Stop conditions:', 'PR merged, PR closed, or becomes mergeable', 2));
   await log(formatAligned('', 'Restart triggers:', 'New non-bot comments, CI failures, merge conflicts', 2));
@@ -480,20 +486,85 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
       }
       if (shouldRestart) {
-        // Issue #1323: Increment restart count (actual AI executions, not check cycles)
-        restartCount++;
+        if (hasReachedAutoIterationLimit(restartCount, maxAutoRestartIterations)) {
+          await log('');
+          await log(formatAligned('⚠️', 'AUTO-RESTART LIMIT REACHED', `Stopping after ${restartCount} restart iteration${restartCount !== 1 ? 's' : ''}`));
+          await log(formatAligned('', 'Configured limit:', formatAutoIterationLimit(maxAutoRestartIterations), 2));
+          await log(formatAligned('', 'Remaining blockers:', restartReason, 2));
+          await log('');
+          try {
+            const limitComment = `## ⚠️ Auto-restart limit reached
+Hive Mind stopped auto-restart-until-mergeable after ${restartCount} restart iteration${restartCount !== 1 ? 's' : ''}.
+**Configured limit:** ${formatAutoIterationLimit(maxAutoRestartIterations)}
+**Remaining reason:** ${restartReason}
+No further AI sessions will be started automatically for this run. Please review the remaining blockers manually or rerun with a higher \`--auto-restart-max-iterations\` value.
+---
+*Auto-restart-until-mergeable stopped by the safety limit.*`;
+            await postTrackedComment({ $, owner, repo, targetNumber: prNumber, body: limitComment });
+          } catch (commentError) {
+            reportError(commentError, {
+              context: 'post_auto_restart_limit_comment',
+              owner,
+              repo,
+              prNumber,
+              operation: 'comment_on_pr',
+            });
+            await log(formatAligned('', '⚠️  Could not post auto-restart limit comment to PR', '', 2));
+          }
+          return { success: false, reason: 'auto_restart_limit_reached', latestSessionId, latestAnthropicCost };
+        }
         // Add standard instructions for auto-restart-until-mergeable mode using shared utility
         feedbackLines.push(...buildAutoRestartInstructions());
+        // Get PR merge state status
+        const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
+        const mergeStateStatus = prStateResult.code === 0 ? prStateResult.stdout.toString().trim() : null;
+        // Issue #1572: Sync clean local branches with remote before restarting to avoid push failures.
+        // Issue #1664: Do not run git pull over an unfinished merge or other uncommitted state.
+        // The tool must see that state and either commit, continue, abort, or otherwise resolve it.
+        const effectiveBranch = prBranch || branchName;
+        if (shouldSyncBeforeRestart({ hasUncommittedChanges })) {
+          const pullResult = await $({ cwd: tempDir })`git pull origin ${effectiveBranch} 2>&1`;
+          if (pullResult.code === 0) {
+            await log(formatAligned('🔄', 'Synced:', `Local branch ${effectiveBranch} updated from remote`));
+          } else {
+            const pullOutput = `${pullResult.stdout || ''}${pullResult.stderr || ''}`.trim() || 'no output';
+            const pullLeftLocalChanges = await checkForUncommittedChanges(tempDir, argv);
+            if (pullLeftLocalChanges && /CONFLICT|MERGE_HEAD|unmerged|Automatic merge failed|not concluded your merge/i.test(pullOutput)) {
+              await log(formatAligned('⚠️', 'Sync produced merge state:', 'Proceeding with AI restart to resolve it', 2));
+              feedbackLines.push('');
+              feedbackLines.push('⚠️ Branch sync encountered an unfinished merge or conflicts:');
+              feedbackLines.push(pullOutput);
+              feedbackLines.push('');
+              feedbackLines.push('Please resolve the merge state before finishing.');
+            } else {
+              throw new Error(`git pull failed (code ${pullResult.code}): ${pullOutput}`);
+            }
+          }
+        } else {
+          await log(formatAligned('↪️', 'Skipping branch sync:', 'Local uncommitted/merge state must be resolved by the AI session', 2));
+        }
+        // Issue #1323: Increment restart count only when a tool execution is about to start.
+        restartCount++;
         await log(formatAligned('🔄', 'RESTART TRIGGERED:', restartReason));
-        await log(formatAligned('', 'Restart iteration:', `${restartCount}`, 2));
+        await log(formatAligned('', 'Restart iteration:', maxAutoRestartIterations === 0 ? `${restartCount}` : `${restartCount}/${maxAutoRestartIterations}`, 2));
         await log('');
-        // Post a comment to PR about the restart
-        // Issue #1356: Include restart count for tracking and add deduplication
+        // Post a comment to PR about the restart after preflight succeeds, so every
+        // posted restart notification corresponds to an actual tool session.
         try {
-          const commentBody = `## 🔄 ${AUTO_RESTART_MARKER} triggered (iteration ${restartCount})\n\n**Reason:** ${restartReason}\n\nStarting new session to address the issues.\n\n---\n*Auto-restart-until-mergeable mode is active. Will continue until PR becomes mergeable.*`;
+          const limitText = maxAutoRestartIterations === 0 ? 'No automatic restart limit is configured.' : `This run will stop after ${maxAutoRestartIterations} restart iteration${maxAutoRestartIterations !== 1 ? 's' : ''}.`;
+          const commentBody = `## 🔄 ${AUTO_RESTART_MARKER} triggered (iteration ${restartCount})\n\n**Reason:** ${restartReason}\n\nStarting new session to address the issues.\n\n---\n*Auto-restart-until-mergeable mode is active. ${limitText}*`;
           // Issue #1625: Track so this doesn't falsely count as an AI-authored comment
           await postTrackedComment({ $, owner, repo, targetNumber: prNumber, body: commentBody });
           await log(formatAligned('', '💬 Posted auto-restart notification to PR', '', 2));
@@ -508,20 +579,6 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
           await log(formatAligned('', '⚠️  Could not post comment to PR', '', 2));
         }
-        // Get PR merge state status
-        const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
-        const mergeStateStatus = prStateResult.code === 0 ? prStateResult.stdout.toString().trim() : null;
-        // Issue #1572: Sync local branch with remote before restarting to avoid push failures.
-        // Without this, the restarted session works on stale local state and can't push.
-        const effectiveBranch = prBranch || branchName;
-        const pullResult = await $({ cwd: tempDir })`git pull origin ${effectiveBranch} 2>&1`;
-        if (pullResult.code === 0) {
-          await log(formatAligned('🔄', 'Synced:', `Local branch ${effectiveBranch} updated from remote`));
-        } else {
-          throw new Error(`git pull failed (code ${pullResult.code}): ${pullResult.stdout || pullResult.stderr || 'no output'}`);
-        }
         // Execute the AI tool using shared utility
         await log(formatAligned('🔄', 'Restarting:', `Running ${argv.tool.toUpperCase()} to address issues...`));
@@ -545,6 +602,15 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
           // Issue #1570: Always post a GitHub comment to notify the user about the delay
           // and when exactly execution will be resumed, so the user doesn't think the process is stuck.
           if (isUsageLimitReached(toolResult)) {
+            if (hasReachedAutoIterationLimit(limitResumeCount, maxAutoResumeIterations)) {
+              await log('');
+              await log(formatAligned('⚠️', 'AUTO-RESUME LIMIT REACHED', `Stopping after ${limitResumeCount} limit-reset continuation${limitResumeCount !== 1 ? 's' : ''}`));
+              await log(formatAligned('', 'Configured limit:', formatAutoIterationLimit(maxAutoResumeIterations), 2));
+              await log('');
+              return { success: false, reason: 'auto_resume_limit_reached', latestSessionId, latestAnthropicCost };
+            }
+            limitResumeCount++;
             const resumeSessionId = toolResult.sessionId;
             const resetTime = toolResult.limitResetTime;
             const baseWaitMs = resetTime ? calculateWaitTime(resetTime) : 0;
@@ -567,6 +633,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
             await log(formatAligned('', 'Reset time:', resetTime || 'Unknown', 2));
             await log(formatAligned('', 'Waiting:', `${waitMinutes} min (reset + ${bufferMinutes} min buffer + ${jitterSeconds}s jitter)`, 2));
             await log(formatAligned('', 'Resume at:', resumeTimeUTC, 2));
+            await log(formatAligned('', 'Auto-resume iteration:', maxAutoResumeIterations === 0 ? `${limitResumeCount}` : `${limitResumeCount}/${maxAutoResumeIterations}`, 2));
             await log(formatAligned('', 'Action:', 'Posting GitHub comment and waiting for limit reset', 2));
             if (resumeSessionId) {
               await log(formatAligned('', 'Session ID:', resumeSessionId, 2));
@@ -598,7 +665,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
                     toolName: `Anthropic ${(argv.tool || 'claude').charAt(0).toUpperCase() + (argv.tool || 'claude').slice(1)} Code`,
                     isAutoResumeEnabled: true,
                     autoResumeMode: 'restart',
-                    requestedModel: argv.model,
+                    requestedModel: argv.originalModel || argv.model,
                     tool: argv.tool || 'claude',
                     publicPricingEstimate: toolResult.publicPricingEstimate,
                     pricingInfo: toolResult.pricingInfo,
@@ -676,7 +743,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
                         errorMessage: `${argv.tool.toUpperCase()} execution failed after limit reset`,
                         sessionId: latestSessionId,
                         tempDir,
-                        requestedModel: argv.model,
+                        requestedModel: argv.originalModel || argv.model,
                         tool: argv.tool || 'claude',
                       });
                     }
@@ -726,7 +793,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
                   errorMessage: `${argv.tool.toUpperCase()} execution failed`,
                   sessionId: latestSessionId,
                   tempDir,
-                  requestedModel: argv.model,
+                  requestedModel: argv.originalModel || argv.model,
                   tool: argv.tool || 'claude',
                 });
               }
@@ -791,7 +858,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
                   publicPricingEstimate: toolResult.publicPricingEstimate,
                   pricingInfo: toolResult.pricingInfo,
                   // Issue #1225: Pass model and tool info for PR comments
-                  requestedModel: argv.model,
+                  requestedModel: argv.originalModel || argv.model,
                   tool: argv.tool || 'claude',
                   // Issue #1508: Include budget stats (context/token/cost) for auto-restart log
                   resultModelUsage: toolResult.resultModelUsage || null,

package/src/solve.config.lib.mjs CHANGED Viewed

@@ -8,7 +8,7 @@
 // This approach was adopted per issue #482 feedback to minimize custom code maintenance
 import { enhanceErrorMessage, detectMalformedFlags } from './option-suggestions.lib.mjs';
-import { defaultModels, buildModelOptionDescription, resolveRuntimeDefaultModel } from './models/index.mjs';
+import { defaultModels, buildModelOptionDescription, resolveDefaultFallbackModel, resolveRuntimeDefaultModel } from './models/index.mjs';
 import { validateBranchName } from './solve.branch.lib.mjs';
 // Re-export for use by telegram-bot.mjs (avoids extra import lines there)
@@ -173,8 +173,19 @@ export const SOLVE_OPTION_DEFINITIONS = {
   },
   'auto-restart-max-iterations': {
     type: 'number',
-    description: 'Maximum number of auto-restart iterations when uncommitted changes are detected (default: 3)',
-    default: 3,
+    description: 'Maximum number of auto-restart iterations before stopping (default: 5, 0 = unlimited)',
+    default: 5,
+  },
+  'auto-resume-max-iterations': {
+    type: 'number',
+    description: 'Maximum number of automatic resume/restart continuations after usage-limit resets (default: 5, 0 = unlimited)',
+    default: 5,
+  },
+  'auto-resume-iteration': {
+    type: 'number',
+    description: 'Internal: current automatic resume/restart continuation count',
+    default: 0,
+    hidden: true,
   },
   'auto-merge': {
     type: 'boolean',
@@ -248,6 +259,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
     description: 'Maximum thinking budget for calculating --think level mappings (default: 31999 for Claude Code). Values: off=0, low=max/4, medium=max/2, high=max*3/4, max=max.',
     default: 31999,
   },
+  'fallback-model': {
+    type: 'string',
+    description: 'Fallback model to switch to on model capacity/overload errors. When supported, retries resume the same session with this model. Defaults: claude opus/opus-4-7 -> opus-4-6; codex gpt-5.5 -> gpt-5.4; all others unset.',
+    default: undefined,
+  },
   'show-thinking-content': {
     type: 'boolean',
     description: 'Show thinking content in Claude responses. Opus 4.7 omits thinking content by default; this option opts in to receive summarized thinking blocks. Disabled by default. Only affects --tool claude.',
@@ -616,6 +632,7 @@ export const parseArguments = async (yargs, hideBin) => {
   // Yargs doesn't properly handle dynamic defaults based on other arguments,
   // so we need to handle this manually after parsing
   const modelExplicitlyProvided = rawArgs.includes('--model') || rawArgs.includes('-m') || rawArgs.includes('--worker-model');
+  const fallbackModelExplicitlyProvided = rawArgs.includes('--fallback-model');
   const planModelExplicitlyProvided = rawArgs.includes('--plan-model');
   // --plan flag expansion (Issue #1223)
@@ -681,6 +698,11 @@ export const parseArguments = async (yargs, hideBin) => {
     argv.model = await resolveRuntimeDefaultModel(argv.tool);
   }
+  if (argv.tool && !fallbackModelExplicitlyProvided) {
+    const defaultFallbackModel = resolveDefaultFallbackModel(argv.tool, argv.model);
+    argv.fallbackModel = defaultFallbackModel || undefined;
+  }
   // Validate mutual exclusivity of --claude-file and --gitkeep-file
   // Check if both are explicitly enabled (user passed both --claude-file and --gitkeep-file)
   if (argv.claudeFile && argv.gitkeepFile) {

package/src/solve.error-handlers.lib.mjs CHANGED Viewed

@@ -65,7 +65,7 @@ export const handleFailure = async options => {
           verbose: argv.verbose,
           errorMessage: cleanErrorMessage(error),
           // Issue #1225: Pass model and tool info for PR comments
-          requestedModel: argv.model,
+          requestedModel: argv.originalModel || argv.model,
           tool: argv.tool || 'claude',
         });
         if (logUploadSuccess) {

package/src/solve.execution.lib.mjs CHANGED Viewed

@@ -195,7 +195,7 @@ export const handleExecutionError = async (error, shouldAttachLogs, owner, repo,
           verbose: argv.verbose || false,
           errorMessage: cleanErrorMessage(error),
           // Issue #1225: Pass model and tool info for PR comments
-          requestedModel: argv.model,
+          requestedModel: argv.originalModel || argv.model,
           tool: argv.tool || 'claude',
         });

package/src/solve.mjs CHANGED Viewed

@@ -32,17 +32,13 @@ const results = await import('./solve.results.lib.mjs');
 const { cleanupClaudeFile, showSessionSummary, verifyResults, buildClaudeResumeCommand, buildSolveResumeCommand, checkForAiCreatedComments, attachSolutionSummary, verifyPullRequestIssueLinkAfterAutoRestart } = results;
 const claudeLib = await import('./claude.lib.mjs');
 const { executeClaude, checkPlaywrightMcpAvailability } = claudeLib;
 const githubLinking = await import('./github-linking.lib.mjs');
 const { extractLinkedIssueNumber } = githubLinking;
 const usageLimitLib = await import('./usage-limit.lib.mjs');
 const { formatResetTimeWithRelative } = usageLimitLib;
 const errorHandlers = await import('./solve.error-handlers.lib.mjs');
 const { createUncaughtExceptionHandler, createUnhandledRejectionHandler, handleMainExecutionError, handleNoPrAvailableError } = errorHandlers;
 const { notifyIssueAboutPrePullRequestFailure } = await import('./solve.pre-pr-failure-notifier.lib.mjs');
 const watchLib = await import('./solve.watch.lib.mjs');
 const { startWatchMode } = watchLib;
 const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
@@ -62,7 +58,6 @@ const { postTrackedComment, USAGE_LIMIT_REACHED_MARKER } = await import('./tool-
 const { prepareFeedbackAndTimestamps, checkUncommittedChanges, checkForkActions } = await import('./solve.preparation.lib.mjs');
 const { validateAndExitOnInvalidModel } = await import('./models/index.mjs');
 const { autoAcceptInviteForRepo } = await import('./solve.accept-invite.lib.mjs');
 // Initialize log file early (before argument parsing) to capture all output
 const logFile = await initializeLogFile(null);
 // Log version and raw command IMMEDIATELY after log file initialization
@@ -183,6 +178,8 @@ if (!(await validateContinueOnlyOnFeedback(argv, isPrUrl, isIssueUrl))) {
 // Validate model name EARLY - always runs regardless of --skip-tool-connection-check
 const tool = argv.tool || 'claude';
 await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
+if (argv.fallbackModel) await validateAndExitOnInvalidModel(argv.fallbackModel, tool, safeExit);
+argv.originalModel ||= argv.model;
 // Validate --plan-model if provided (Issue #1223)
 if (argv.planModel) {
@@ -912,7 +909,7 @@ try {
           await log(`   ${claudeResumeCmd}`);
           await log('');
         } else if (argv.url) {
-          const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForResume, model: argv.model, tempDir });
+          const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForResume, model: argv.model, fallbackModel: argv.fallbackModel, tempDir });
           await log(`💡 To continue this ${toolForResume} session with solve:`);
           await log('');
           await log(`   ${solveResumeCmd}`);
@@ -926,7 +923,7 @@ try {
         try {
           // Build Claude CLI resume command
           const tool = argv.tool || 'claude';
-          const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir }) : null;
+          const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir }) : null;
           const logUploadSuccess = await attachLogToGitHub({
             logFile: getLogFile(),
             targetType: 'pr',
@@ -942,7 +939,7 @@ try {
             toolName: getToolDisplayName(argv.tool),
             resumeCommand,
             sessionId,
-            requestedModel: argv.model,
+            requestedModel: argv.originalModel || argv.model,
             tool: argv.tool || 'claude',
             // Issue #1454: Pass resultModelUsage for accurate multi-model display
             resultModelUsage,
@@ -964,7 +961,7 @@ try {
           const resetTime = global.limitResetTime;
           // Build Claude CLI resume command
           const tool = argv.tool || 'claude';
-          const resumeCmd = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir }) : null;
+          const resumeCmd = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir }) : null;
           const resumeSection = resumeCmd ? `To resume after the limit resets, use:\n\`\`\`bash\n${resumeCmd}\n\`\`\`` : `Session ID: \`${sessionId}\``;
           // Format the reset time with relative time and UTC conversion if available
           const timezone = global.limitTimezone || null;
@@ -992,7 +989,7 @@ try {
           try {
             // Build Claude CLI resume command (only for logging, not shown to users when auto-resume is enabled)
             const tool = argv.tool || 'claude';
-            const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir }) : null;
+            const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir }) : null;
             const logUploadSuccess = await attachLogToGitHub({
               logFile: getLogFile(),
               targetType: 'pr',
@@ -1012,7 +1009,7 @@ try {
               // See: https://github.com/link-assistant/hive-mind/issues/1152
               isAutoResumeEnabled: true,
               autoResumeMode: limitContinueMode,
-              requestedModel: argv.model,
+              requestedModel: argv.originalModel || argv.model,
               tool: argv.tool || 'claude',
               // Issue #1454: Pass resultModelUsage for accurate multi-model display
               resultModelUsage,
@@ -1081,7 +1078,7 @@ try {
       await log(`   ${claudeResumeCmd}`);
       await log('');
     } else if (sessionId && argv.url) {
-      const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForFailure, model: argv.model, tempDir });
+      const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForFailure, model: argv.model, fallbackModel: argv.fallbackModel, tempDir });
       await log('');
       await log(`💡 To continue this ${toolForFailure} session with solve:`);
       await log('');
@@ -1101,7 +1098,7 @@ try {
       try {
         // Build Claude CLI resume command
         const tool = argv.tool || 'claude';
-        const resumeCommand = sessionId ? (tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir })) : null;
+        const resumeCommand = sessionId ? (tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir })) : null;
         const logUploadSuccess = await attachLogToGitHub({
           logFile: getLogFile(),
           targetType: logTargetType,
@@ -1120,7 +1117,7 @@ try {
           sessionId,
           // If not a usage limit case, fall back to generic failure format
           errorMessage: limitReached ? undefined : `${argv.tool.toUpperCase()} execution failed`,
-          requestedModel: argv.model,
+          requestedModel: argv.originalModel || argv.model,
           tool: argv.tool || 'claude',
           // Issue #1454: Pass resultModelUsage for accurate multi-model display
           resultModelUsage,
@@ -1383,7 +1380,7 @@ try {
           sessionId,
           tempDir,
           anthropicTotalCostUSD,
-          requestedModel: argv.model,
+          requestedModel: argv.originalModel || argv.model,
           tool: argv.tool || 'claude',
           // Issue #1454: Pass resultModelUsage for accurate multi-model display
           resultModelUsage,

package/src/solve.pre-pr-failure-notifier.lib.mjs CHANGED Viewed

@@ -88,7 +88,7 @@ export async function notifyIssueAboutPrePullRequestFailure(options) {
         sanitizeLogContent,
         verbose: argv.verbose,
         errorMessage: `The solver stopped before creating a pull request.\n\nReason: ${reason || 'Unknown error'}`,
-        requestedModel: argv.model,
+        requestedModel: argv.originalModel || argv.model,
         tool: argv.tool || 'claude',
       });
       if (uploaded) {

package/src/solve.results.lib.mjs CHANGED Viewed

@@ -47,12 +47,13 @@ export const { buildClaudeResumeCommand, buildClaudeInitialCommand } = claudeCom
  * @param {string} options.sessionId - The session ID to resume
  * @param {string|null} [options.tool] - Tool name (codex, opencode, agent)
  * @param {string|null} [options.model] - Model name to preserve
+ * @param {string|null} [options.fallbackModel] - Explicit fallback model to preserve
  * @param {string|null} [options.tempDir] - Working directory to preserve
  * @param {string} [options.nodePath] - Node binary path
  * @param {string} [options.scriptPath] - solve.mjs path
  * @returns {string}
  */
-export const buildSolveResumeCommand = ({ issueUrl, sessionId, tool = null, model = null, tempDir = null, nodePath = process.argv[0], scriptPath = process.argv[1] }) => {
+export const buildSolveResumeCommand = ({ issueUrl, sessionId, tool = null, model = null, fallbackModel = null, tempDir = null, nodePath = process.argv[0], scriptPath = process.argv[1] }) => {
   const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
   const args = [shellQuote(scriptPath), shellQuote(issueUrl), '--resume', shellQuote(sessionId)];
@@ -65,6 +66,10 @@ export const buildSolveResumeCommand = ({ issueUrl, sessionId, tool = null, mode
     args.push('--model', shellQuote(model));
   }
+  if (fallbackModel) {
+    args.push('--fallback-model', shellQuote(fallbackModel));
+  }
   if (tempDir) {
     args.push('--working-directory', shellQuote(tempDir));
   }
@@ -566,7 +571,7 @@ export const showSessionSummary = async (sessionId, limitReached, argv, issueUrl
       await log(`   ${claudeResumeCmd}`);
       await log('');
     } else if (issueUrl) {
-      const solveResumeCmd = buildSolveResumeCommand({ issueUrl, sessionId, tool, model: argv.model, tempDir });
+      const solveResumeCmd = buildSolveResumeCommand({ issueUrl, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir });
       await log('');
       await log(`💡 To continue this ${tool} session with solve:`);
       await log('');
@@ -577,11 +582,12 @@ export const showSessionSummary = async (sessionId, limitReached, argv, issueUrl
     if (limitReached) {
       await log('⏰ LIMIT REACHED DETECTED!');
-      if (argv.autoResumeOnLimitReset && global.limitResetTime) {
-        await log(`\n🔄 AUTO-RESUME ON LIMIT RESET ENABLED - Will resume at ${global.limitResetTime}`);
+      if ((argv.autoResumeOnLimitReset || argv.autoRestartOnLimitReset) && global.limitResetTime) {
+        const isRestart = !!argv.autoRestartOnLimitReset;
+        await log(`\n🔄 AUTO-${isRestart ? 'RESTART' : 'RESUME'} ON LIMIT RESET ENABLED - Will ${isRestart ? 'restart' : 'resume'} at ${global.limitResetTime}`);
         // Pass tempDir to ensure resumed session uses the same working directory
         // This is critical for Claude Code session resume to work correctly
-        await autoContinueWhenLimitResets(issueUrl, sessionId, argv, shouldAttachLogs, tempDir);
+        await autoContinueWhenLimitResets(issueUrl, sessionId, argv, shouldAttachLogs, tempDir, isRestart);
       } else {
         if (global.limitResetTime) {
           await log(`\n⏰ Limit resets at: ${global.limitResetTime}`);
@@ -823,7 +829,7 @@ Fixes ${issueRef}
             // Issue #1152: Pass sessionType for differentiated log comments
             sessionType,
             // Issue #1225: Pass model and tool info for PR comments
-            requestedModel: argv.model,
+            requestedModel: argv.originalModel || argv.model,
             tool: argv.tool || 'claude',
             // Issue #1454: Pass resultModelUsage for accurate multi-model display
             resultModelUsage,
@@ -909,7 +915,7 @@ Fixes ${issueRef}
           // Issue #1152: Pass sessionType for differentiated log comments
           sessionType,
           // Issue #1225: Pass model and tool info for issue comments
-          requestedModel: argv.model,
+          requestedModel: argv.originalModel || argv.model,
           tool: argv.tool || 'claude',
           // Issue #1454: Pass resultModelUsage for accurate multi-model display
           resultModelUsage,
@@ -1000,7 +1006,7 @@ export const handleExecutionError = async (error, shouldAttachLogs, owner, repo,
           verbose: argv.verbose || false,
           errorMessage: cleanErrorMessage(error),
           // Issue #1225: Pass model and tool info for PR comments
-          requestedModel: argv.model,
+          requestedModel: argv.originalModel || argv.model,
           tool: argv.tool || 'claude',
         });

package/src/solve.watch.lib.mjs CHANGED Viewed

@@ -39,6 +39,7 @@ const { checkPRMerged, checkForUncommittedChanges, getUncommittedChangesDetails,
 // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
 const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
+const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationLimit } = await import('./auto-iteration-limits.lib.mjs');
 // Issue #1625: Central marker constants + tracked comment posting
 const toolComments = await import('./tool-comments.lib.mjs');
@@ -52,7 +53,7 @@ export const watchForFeedback = async params => {
   const watchInterval = argv.watchInterval || 60; // seconds
   const isTemporaryWatch = argv.temporaryWatch || false;
-  const maxAutoRestartIterations = argv.autoRestartMaxIterations || 3;
+  const maxAutoRestartIterations = normalizeAutoIterationLimit(argv.autoRestartMaxIterations);
   // Track latest session data across all iterations for accurate pricing
   let latestSessionId = null;
@@ -75,7 +76,7 @@ export const watchForFeedback = async params => {
     await log(formatAligned('', 'Monitoring PR:', `#${prNumber}`, 2));
     await log(formatAligned('', 'Mode:', 'Auto-restart (NOT --watch mode)', 2));
     await log(formatAligned('', 'Stop conditions:', 'All changes committed OR PR merged OR max iterations reached', 2));
-    await log(formatAligned('', 'Max iterations:', `${maxAutoRestartIterations}`, 2));
+    await log(formatAligned('', 'Max iterations:', formatAutoIterationLimit(maxAutoRestartIterations), 2));
     await log(formatAligned('', 'Note:', 'No wait time between iterations in auto-restart mode', 2));
   } else {
     await log(formatAligned('👁️', 'WATCH MODE ACTIVATED', ''));
@@ -117,7 +118,7 @@ export const watchForFeedback = async params => {
       }
       // Check if we've reached max iterations
-      if (autoRestartCount >= maxAutoRestartIterations) {
+      if (hasReachedAutoIterationLimit(autoRestartCount, maxAutoRestartIterations)) {
         await log('');
         await log(formatAligned('⚠️', 'MAX ITERATIONS REACHED', `Exiting auto-restart mode after ${autoRestartCount} iterations`));
         await log(formatAligned('', 'Some uncommitted changes may remain', '', 2));
@@ -188,7 +189,7 @@ export const watchForFeedback = async params => {
           // Post a comment to PR about auto-restart
           if (prNumber) {
             try {
-              const remainingIterations = maxAutoRestartIterations - autoRestartCount;
+              const remainingIterations = maxAutoRestartIterations === 0 ? null : maxAutoRestartIterations - autoRestartCount;
               // Get uncommitted files list for the comment
               let uncommittedFilesList = '';
@@ -196,7 +197,9 @@ export const watchForFeedback = async params => {
                 uncommittedFilesList = '\n\n**Uncommitted files:**\n```\n' + changes.join('\n') + '\n```';
               }
-              const commentBody = `## 🔄 ${AUTO_RESTART_MARKER} ${autoRestartCount}/${maxAutoRestartIterations}\n\nDetected uncommitted changes from previous run. Starting new session to review and commit or discard them.${uncommittedFilesList}\n\n---\n*Auto-restart will stop after changes are committed or discarded, or after ${remainingIterations} more iteration${remainingIterations !== 1 ? 's' : ''}. Please wait until working session will end and give your feedback.*`;
+              const iterationLabel = maxAutoRestartIterations === 0 ? `${autoRestartCount}` : `${autoRestartCount}/${maxAutoRestartIterations}`;
+              const stopText = remainingIterations === null ? 'Auto-restart is configured with no iteration limit.' : `Auto-restart will stop after changes are committed or discarded, or after ${remainingIterations} more iteration${remainingIterations !== 1 ? 's' : ''}.`;
+              const commentBody = `## 🔄 ${AUTO_RESTART_MARKER} ${iterationLabel}\n\nDetected uncommitted changes from previous run. Starting new session to review and commit or discard them.${uncommittedFilesList}\n\n---\n*${stopText} Please wait until working session will end and give your feedback.*`;
               // Issue #1625: Track so this doesn't falsely count as AI-authored.
               await postTrackedComment({ $, owner, repo, targetNumber: prNumber, body: commentBody });
               await log(formatAligned('', '💬 Posted auto-restart notification to PR', '', 2));
@@ -283,7 +286,8 @@ export const watchForFeedback = async params => {
               const logFile = getLogFile();
               if (logFile) {
                 // Use "Auto-restart X/Y Failure Log" format to distinguish from success logs
-                const customTitle = `⚠️ Auto-restart ${autoRestartCount}/${maxAutoRestartIterations} Failure Log`;
+                const iterationLabel = maxAutoRestartIterations === 0 ? `${autoRestartCount}` : `${autoRestartCount}/${maxAutoRestartIterations}`;
+                const customTitle = `⚠️ Auto-restart ${iterationLabel} Failure Log`;
                 const logUploadSuccess = await attachLogToGitHub({
                   logFile,
                   targetType: 'pr',
@@ -306,7 +310,7 @@ export const watchForFeedback = async params => {
                   isUsageLimit: toolResult.limitReached,
                   limitResetTime: toolResult.limitResetTime,
                   // Issue #1225: Pass model and tool info for PR comments
-                  requestedModel: argv.model,
+                  requestedModel: argv.originalModel || argv.model,
                   tool: argv.tool || 'claude',
                   // Issue #1508: Pass model usage for failure log (cost info per model)
                   resultModelUsage: toolResult.resultModelUsage || null,
@@ -372,7 +376,8 @@ export const watchForFeedback = async params => {
               const logFile = getLogFile();
               if (logFile) {
                 // Use "Auto-restart X/Y Log" format as requested in issue #1107
-                const customTitle = `🔄 Auto-restart ${autoRestartCount}/${maxAutoRestartIterations} Log`;
+                const iterationLabel = maxAutoRestartIterations === 0 ? `${autoRestartCount}` : `${autoRestartCount}/${maxAutoRestartIterations}`;
+                const customTitle = `🔄 Auto-restart ${iterationLabel} Log`;
                 const logUploadSuccess = await attachLogToGitHub({
                   logFile,
                   targetType: 'pr',
@@ -391,7 +396,7 @@ export const watchForFeedback = async params => {
                   publicPricingEstimate: toolResult.publicPricingEstimate,
                   pricingInfo: toolResult.pricingInfo,
                   // Issue #1225: Pass model and tool info for PR comments
-                  requestedModel: argv.model,
+                  requestedModel: argv.originalModel || argv.model,
                   tool: argv.tool || 'claude',
                   // Issue #1508: Include budget stats (context/token/cost) for auto-restart log
                   resultModelUsage: toolResult.resultModelUsage || null,

package/src/tool-retry.lib.mjs ADDED Viewed

@@ -0,0 +1,118 @@
+#!/usr/bin/env node
+import { retryLimits } from './config.lib.mjs';
+import { resolveDefaultFallbackModel, resolveModelId } from './models/index.mjs';
+const normalizeMessage = value => {
+  if (value === null || value === undefined) return '';
+  if (typeof value === 'string') return value;
+  if (typeof value?.error?.message === 'string') return value.error.message;
+  if (typeof value?.message === 'string') return value.message;
+  try {
+    return JSON.stringify(value);
+  } catch {
+    return String(value);
+  }
+};
+const normalizeModelKey = value => {
+  if (!value) return '';
+  return String(value)
+    .toLowerCase()
+    .replace(/\[1m\]$/i, '')
+    .trim();
+};
+export const classifyRetryableError = value => {
+  const message = normalizeMessage(value);
+  const lower = message.toLowerCase();
+  if (lower.includes('selected model is at capacity') || (lower.includes('at capacity') && lower.includes('try a different model'))) {
+    return { message, isRetryable: true, isCapacity: true, label: 'Model capacity error' };
+  }
+  if (lower.includes('overloaded') || lower.includes('overloaded_error')) {
+    return { message, isRetryable: true, isCapacity: true, label: 'API overload' };
+  }
+  if (lower.includes('request timed out')) {
+    return { message, isRetryable: true, isCapacity: false, label: 'Request timeout' };
+  }
+  if (lower.includes('api error: 503') || (lower.includes('503') && (lower.includes('upstream connect error') || lower.includes('remote connection failure')))) {
+    return { message, isRetryable: true, isCapacity: false, label: '503 network error' };
+  }
+  if (lower.includes('internal server error') || lower.includes('api error: 500')) {
+    return { message, isRetryable: true, isCapacity: false, label: 'Internal server error (500)' };
+  }
+  return { message, isRetryable: false, isCapacity: false, label: null };
+};
+export const getRetryDelayMs = ({ retryCount, initialDelayMs = retryLimits.initialTransientErrorDelayMs, maxDelayMs = retryLimits.maxTransientErrorDelayMs } = {}) => {
+  return Math.min(initialDelayMs * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelayMs);
+};
+export const waitWithCountdown = async (delayMs, log) => {
+  if (delayMs <= 60000) {
+    await new Promise(resolve => setTimeout(resolve, delayMs));
+    return;
+  }
+  let remaining = delayMs;
+  const timer = setInterval(async () => {
+    remaining -= 60000;
+    if (remaining > 0) await log(`⏳ ${Math.round(remaining / 60000)} min remaining...`);
+  }, 60000);
+  await new Promise(resolve => setTimeout(resolve, delayMs));
+  clearInterval(timer);
+};
+export const resolveConfiguredFallbackModel = ({ tool, currentModel, configuredFallbackModel = undefined } = {}) => {
+  if (configuredFallbackModel) return configuredFallbackModel;
+  return resolveDefaultFallbackModel(tool, currentModel);
+};
+export const maybeSwitchToFallbackModel = async ({ tool, argv, log, errorMessage } = {}) => {
+  const fallbackModel = resolveConfiguredFallbackModel({
+    tool,
+    currentModel: argv?.model,
+    configuredFallbackModel: argv?.fallbackModel,
+  });
+  const classification = classifyRetryableError(errorMessage);
+  if (!fallbackModel || !classification.isCapacity || !argv?.model) {
+    return { switched: false, fallbackModel, reason: classification.label };
+  }
+  const currentResolvedModel = normalizeModelKey(resolveModelId(argv.model, tool));
+  const fallbackResolvedModel = normalizeModelKey(resolveModelId(fallbackModel, tool));
+  if (!fallbackResolvedModel || currentResolvedModel === fallbackResolvedModel) {
+    return { switched: false, fallbackModel, reason: classification.label };
+  }
+  const previousModel = argv.model;
+  argv.model = fallbackModel;
+  if (!argv.fallbackModel) argv.fallbackModel = fallbackModel;
+  if (typeof log === 'function') {
+    await log(`🔀 Switching to fallback model: ${previousModel} -> ${fallbackModel}`, { level: 'warning' });
+  }
+  return {
+    switched: true,
+    fallbackModel,
+    previousModel,
+    reason: classification.label,
+  };
+};
+export default {
+  classifyRetryableError,
+  getRetryDelayMs,
+  waitWithCountdown,
+  resolveConfiguredFallbackModel,
+  maybeSwitchToFallbackModel,
+};