@link-assistant/hive-mind 1.56.6 → 1.56.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.56.7
4
+
5
+ ### Patch Changes
6
+
7
+ - 37c895c: Retry capacity-related tool failures with exponential backoff and support fallback models for Codex, Claude, OpenCode, and Agent resumes.
8
+ - 16f341d: Limit automatic restart/resume loops to five iterations by default and avoid pre-restart branch sync when local merge state must be resolved by the AI session.
9
+
3
10
  ## 1.56.6
4
11
 
5
12
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.56.6",
3
+ "version": "1.56.7",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -15,7 +15,7 @@
15
15
  "hive-telegram-bot": "./src/telegram-bot.mjs"
16
16
  },
17
17
  "scripts": {
18
- "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
18
+ "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-auto-restart-limits-1664.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
19
19
  "test:queue": "node tests/solve-queue.test.mjs",
20
20
  "test:limits-display": "node tests/limits-display.test.mjs",
21
21
  "test:usage-limit": "node tests/test-usage-limit.mjs",
package/src/agent.lib.mjs CHANGED
@@ -15,13 +15,14 @@ const os = (await use('os')).default;
15
15
  // Import log from general lib
16
16
  import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
- import { timeouts } from './config.lib.mjs';
18
+ import { timeouts, retryLimits } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
21
21
  import Decimal from 'decimal.js-light';
22
22
  import { agentModels, defaultModels, freeToBaseModelMap } from './models/index.mjs';
23
23
  import { checkPlaywrightMcpPackageAvailability, getAgentPlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
24
24
  import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage } from './agent-token-usage.lib.mjs';
25
+ import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
25
26
 
26
27
  export { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage };
27
28
 
@@ -410,10 +411,9 @@ export const executeAgent = async params => {
410
411
  };
411
412
 
412
413
  export const executeAgentCommand = async params => {
413
- const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $ } = params;
414
+ const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $, waitForRetryDelay = waitWithCountdown } = params;
414
415
 
415
416
  // Retry configuration
416
- const maxRetries = 3;
417
417
  let retryCount = 0;
418
418
 
419
419
  const executeWithRetry = async () => {
@@ -421,7 +421,7 @@ export const executeAgentCommand = async params => {
421
421
  if (retryCount === 0) {
422
422
  await log(`\n${formatAligned('šŸ¤–', 'Executing Agent:', argv.model.toUpperCase())}`);
423
423
  } else {
424
- await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
424
+ await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
425
425
  }
426
426
 
427
427
  if (argv.verbose) {
@@ -470,6 +470,11 @@ export const executeAgentCommand = async params => {
470
470
  agentArgs += ' --verbose';
471
471
  }
472
472
 
473
+ if (argv.resume) {
474
+ await log(`šŸ”„ Resuming from session: ${argv.resume}`);
475
+ agentArgs += ` --resume ${argv.resume} --no-fork`;
476
+ }
477
+
473
478
  // Agent supports stdin in both plain text and JSON format
474
479
  // We'll combine system and user prompts into a single message
475
480
  const combinedPrompt = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt;
@@ -783,6 +788,28 @@ export const executeAgentCommand = async params => {
783
788
  }
784
789
 
785
790
  if (exitCode !== 0 || outputError.detected) {
791
+ const retryableError = classifyRetryableError(outputError.match || streamingErrorMessage || lastMessage || fullOutput);
792
+ if (retryableError.isRetryable) {
793
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
794
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
795
+ if (retryCount < maxRetries) {
796
+ const delay = getRetryDelayMs({
797
+ retryCount,
798
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
799
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
800
+ });
801
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
802
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
803
+ if (sessionId && !argv.resume) argv.resume = sessionId;
804
+ await maybeSwitchToFallbackModel({ tool: 'agent', argv, log, errorMessage: retryableError.message });
805
+ await waitForRetryDelay(delay, log);
806
+ await log('\nšŸ”„ Retrying now...');
807
+ retryCount++;
808
+ return await executeWithRetry();
809
+ }
810
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
811
+ }
812
+
786
813
  // Build JSON error structure for consistent error reporting
787
814
  const errorInfo = {
788
815
  type: 'error',
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env node
2
+
3
+ export const DEFAULT_AUTO_ITERATION_LIMIT = 5;
4
+
5
+ export const normalizeAutoIterationLimit = (value, fallback = DEFAULT_AUTO_ITERATION_LIMIT) => {
6
+ if (value === 0 || value === '0') return 0;
7
+
8
+ const parsed = Number(value);
9
+ if (!Number.isFinite(parsed) || parsed < 1) return fallback;
10
+
11
+ return Math.floor(parsed);
12
+ };
13
+
14
+ export const normalizeAutoIterationCounter = value => {
15
+ const parsed = Number(value);
16
+ if (!Number.isFinite(parsed) || parsed < 0) return 0;
17
+
18
+ return Math.floor(parsed);
19
+ };
20
+
21
+ export const hasReachedAutoIterationLimit = (completedIterations, maxIterations) => {
22
+ const normalizedMax = normalizeAutoIterationLimit(maxIterations);
23
+ if (normalizedMax === 0) return false;
24
+
25
+ return normalizeAutoIterationCounter(completedIterations) >= normalizedMax;
26
+ };
27
+
28
+ export const formatAutoIterationLimit = maxIterations => {
29
+ const normalizedMax = normalizeAutoIterationLimit(maxIterations);
30
+ return normalizedMax === 0 ? 'unlimited' : `${normalizedMax}`;
31
+ };
32
+
33
+ export const shouldSyncBeforeRestart = ({ hasUncommittedChanges }) => !hasUncommittedChanges;
@@ -24,6 +24,7 @@ import { buildMcpConfigWithoutPlaywright } from './playwright-mcp.lib.mjs';
24
24
  import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
25
25
  import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
26
26
  import { fetchModelInfo } from './model-info.lib.mjs';
27
+ import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
27
28
  export { availableModels }; // Re-export for backward compatibility
28
29
  export { fetchModelInfo };
29
30
  const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
@@ -1148,8 +1149,9 @@ export const executeClaudeCommand = async params => {
1148
1149
 
1149
1150
  // Issue #817: Stop bidirectional mode monitoring and collect queued feedback
1150
1151
  queuedFeedback = await finalizeBidirectionalHandler(bidirectionalHandler, log);
1152
+ const retryableLastError = classifyRetryableError(lastMessage);
1151
1153
  // Issues #1331, #1353, #1472/#1475: Unified transient error retry (exponential backoff, session preservation)
1152
- const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
1154
+ const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || retryableLastError.isRetryable || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
1153
1155
  if ((commandFailed || isTransientError) && isTransientError) {
1154
1156
  // Issue #1472/#1475: Startup/activity timeout → 30s–2min backoff; #1353: Request timeout → 5min–1hr; general → 2min–30min
1155
1157
  const isTimeoutRetry = isStartupTimeout || isActivityTimeout;
@@ -1178,7 +1180,7 @@ export const executeClaudeCommand = async params => {
1178
1180
  }
1179
1181
  if (retryCount < maxRetries) {
1180
1182
  const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
1181
- const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
1183
+ const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : retryableLastError.label || (isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
1182
1184
  const notRetryableHint = apiMarkedNotRetryable ? ' (API says not retryable — will stop early if no progress)' : '';
1183
1185
  const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
1184
1186
  const retryMode = isStartupTimeout ? ' (fresh start)' : ' (session preserved)';
@@ -1199,6 +1201,7 @@ export const executeClaudeCommand = async params => {
1199
1201
  }
1200
1202
  // Activity timeout preserves session (work was started), startup timeout does not (no session created)
1201
1203
  if (!isStartupTimeout && sessionId && !argv.resume) argv.resume = sessionId;
1204
+ await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: retryableLastError.message || lastMessage });
1202
1205
  await waitWithCountdown(delay, log);
1203
1206
  await log('\nšŸ”„ Retrying now...');
1204
1207
  retryCount++;
@@ -1375,11 +1378,12 @@ export const executeClaudeCommand = async params => {
1375
1378
  operation: 'run_claude_command',
1376
1379
  });
1377
1380
  const errorStr = error.message || error.toString();
1381
+ const retryableException = classifyRetryableError(errorStr);
1378
1382
  // Issue #1331: Unified handler for all transient API errors in exception block
1379
1383
  // Issue #1353: Also handle "Request timed out" in exception block
1380
1384
  // (Overloaded, 503, Internal Server Error, Request timed out) - all with session preservation
1381
1385
  const isTimeoutException = errorStr === 'Request timed out' || errorStr.includes('Request timed out');
1382
- const isTransientException = isTimeoutException || (errorStr.includes('API Error: 500') && (errorStr.includes('Overloaded') || errorStr.includes('Internal server error'))) || (errorStr.includes('API Error: 529') && (errorStr.includes('overloaded_error') || errorStr.includes('Overloaded'))) || (errorStr.includes('api_error') && errorStr.includes('Overloaded')) || (errorStr.includes('overloaded_error') && errorStr.includes('Overloaded')) || errorStr.includes('API Error: 503') || (errorStr.includes('503') && (errorStr.includes('upstream connect error') || errorStr.includes('remote connection failure')));
1386
+ const isTransientException = isTimeoutException || retryableException.isRetryable;
1383
1387
  if (isTransientException) {
1384
1388
  // Issue #1353: Use timeout-specific backoff for request timeouts
1385
1389
  const maxRetries = isTimeoutException ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
@@ -1387,9 +1391,10 @@ export const executeClaudeCommand = async params => {
1387
1391
  const maxDelay = isTimeoutException ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs;
1388
1392
  if (retryCount < maxRetries) {
1389
1393
  const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
1390
- const errorLabel = isTimeoutException ? 'Request timeout' : errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
1394
+ const errorLabel = isTimeoutException ? 'Request timeout' : retryableException.label || (errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
1391
1395
  await log(`\nāš ļø ${errorLabel} in exception. Retry ${retryCount + 1}/${maxRetries} in ${Math.round(delay / 60000)} min (session preserved)...`, { level: 'warning' });
1392
1396
  if (sessionId && !argv.resume) argv.resume = sessionId;
1397
+ await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: errorStr });
1393
1398
  await waitWithCountdown(delay, log);
1394
1399
  await log('\nšŸ”„ Retrying now...');
1395
1400
  retryCount++;
package/src/codex.lib.mjs CHANGED
@@ -15,7 +15,7 @@ const os = (await use('os')).default;
15
15
  // Import log from general lib
16
16
  import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
- import { timeouts } from './config.lib.mjs';
18
+ import { timeouts, retryLimits } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
21
21
  import { mapModelToId, resolveCodexReasoningEffort } from './codex.options.lib.mjs';
@@ -24,6 +24,7 @@ import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
24
24
  import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs';
25
25
  import { fetchModelInfo } from './model-info.lib.mjs';
26
26
  import { defaultModels } from './models/index.mjs';
27
+ import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
27
28
  import Decimal from 'decimal.js-light';
28
29
 
29
30
  const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -648,12 +649,11 @@ export const executeCodex = async params => {
648
649
  };
649
650
 
650
651
  export const executeCodexCommand = async params => {
651
- const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing } = params;
652
+ const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing, waitForRetryDelay = waitWithCountdown } = params;
652
653
 
653
654
  const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
654
655
 
655
656
  // Retry configuration
656
- const maxRetries = 3;
657
657
  let retryCount = 0;
658
658
 
659
659
  const executeWithRetry = async () => {
@@ -661,7 +661,7 @@ export const executeCodexCommand = async params => {
661
661
  if (retryCount === 0) {
662
662
  await log(`\n${formatAligned('šŸ¤–', 'Executing Codex:', argv.model.toUpperCase())}`);
663
663
  } else {
664
- await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
664
+ await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
665
665
  }
666
666
 
667
667
  if (argv.verbose) {
@@ -711,7 +711,7 @@ export const executeCodexCommand = async params => {
711
711
  let codexArgs = 'exec';
712
712
  if (isResumeMode) {
713
713
  await log(`šŸ”„ Resuming from session: ${argv.resume}`);
714
- codexArgs += ` resume ${shellQuote(argv.resume)}`;
714
+ codexArgs += ` resume ${shellQuote(argv.resume)} --model ${shellQuote(mappedModel)}`;
715
715
  } else {
716
716
  codexArgs += ` --model ${shellQuote(mappedModel)}`;
717
717
  }
@@ -930,6 +930,7 @@ export const executeCodexCommand = async params => {
930
930
  const codexErrorSummary = getCodexErrorEventSummary(codexJsonState);
931
931
  if (codexErrorSummary.hasError) {
932
932
  const limitInfo = detectUsageLimit(codexErrorSummary.message || lastMessage);
933
+ const retryableError = classifyRetryableError(codexErrorSummary.message || lastMessage);
933
934
  if (limitInfo.isUsageLimit) {
934
935
  limitReached = true;
935
936
  limitResetTime = limitInfo.resetTime;
@@ -944,6 +945,25 @@ export const executeCodexCommand = async params => {
944
945
  for (const line of messageLines) {
945
946
  await log(line, { level: 'warning' });
946
947
  }
948
+ } else if (retryableError.isRetryable) {
949
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
950
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
951
+ if (retryCount < maxRetries) {
952
+ const delay = getRetryDelayMs({
953
+ retryCount,
954
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
955
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
956
+ });
957
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
958
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
959
+ if (sessionId && !argv.resume) argv.resume = sessionId;
960
+ await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
961
+ await waitForRetryDelay(delay, log);
962
+ await log('\nšŸ”„ Retrying now...');
963
+ retryCount++;
964
+ return await executeWithRetry();
965
+ }
966
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
947
967
  } else {
948
968
  await log(`\n\nāŒ Codex emitted error event: ${codexErrorSummary.message}`, { level: 'error' });
949
969
  await log(` Error events: item=${codexErrorSummary.counts.item}, turn=${codexErrorSummary.counts.turn}, stream=${codexErrorSummary.counts.stream}`, { level: 'error' });
@@ -971,6 +991,28 @@ export const executeCodexCommand = async params => {
971
991
  }
972
992
 
973
993
  if (exitCode !== 0) {
994
+ const retryableError = classifyRetryableError(lastMessage);
995
+ if (retryableError.isRetryable) {
996
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
997
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
998
+ if (retryCount < maxRetries) {
999
+ const delay = getRetryDelayMs({
1000
+ retryCount,
1001
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
1002
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
1003
+ });
1004
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
1005
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
1006
+ if (sessionId && !argv.resume) argv.resume = sessionId;
1007
+ await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
1008
+ await waitForRetryDelay(delay, log);
1009
+ await log('\nšŸ”„ Retrying now...');
1010
+ retryCount++;
1011
+ return await executeWithRetry();
1012
+ }
1013
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
1014
+ }
1015
+
974
1016
  // Check for usage limit errors first (more specific)
975
1017
  const limitInfo = detectUsageLimit(lastMessage);
976
1018
  if (limitInfo.isUsageLimit) {
@@ -12,7 +12,7 @@ const HIVE_ONLY_OPTION_NAMES = new Set(['monitor-tag', 'all-issues', 'skip-issue
12
12
 
13
13
  // Solve-only options that should NOT be registered in hive
14
14
  // (they are internal to solve and not meaningful when passed from hive)
15
- const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type']);
15
+ const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type', 'auto-resume-iteration']);
16
16
 
17
17
  // Options that hive defines with different defaults/descriptions than solve.
18
18
  // These are registered manually in hive config to preserve hive-specific behavior.
package/src/hive.mjs CHANGED
@@ -464,6 +464,9 @@ if (isRunningDirectly) {
464
464
  // Validate model names EARLY (simple string check, always runs)
465
465
  const tool = argv.tool || 'claude';
466
466
  await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
467
+ if (argv.fallbackModel) {
468
+ await validateAndExitOnInvalidModel(argv.fallbackModel, tool, safeExit);
469
+ }
467
470
  if (argv.planModel) {
468
471
  if (tool !== 'claude') {
469
472
  await log(`āŒ --plan-model is only supported with --tool claude (current tool: ${tool})`, { level: 'error' });
@@ -905,6 +905,23 @@ export const resolveModelId = (requestedModel, tool) => {
905
905
  }
906
906
  };
907
907
 
908
+ export const defaultFallbackModels = {
909
+ claude: {
910
+ 'claude-opus-4-7': 'opus-4-6',
911
+ },
912
+ codex: {
913
+ 'gpt-5.5': 'gpt-5.4',
914
+ },
915
+ };
916
+
917
+ export const resolveDefaultFallbackModel = (tool, model) => {
918
+ if (!model) return null;
919
+
920
+ const toolName = (tool || 'claude').toString().toLowerCase();
921
+ const resolvedModel = resolveModelId(model, toolName);
922
+ return defaultFallbackModels[toolName]?.[resolvedModel] || null;
923
+ };
924
+
908
925
  /**
909
926
  * Fetch model info and build the complete model information string for PR comments.
910
927
  * Uses actual models from CLI JSON output when available.
@@ -15,13 +15,14 @@ const os = (await use('os')).default;
15
15
  // Import log from general lib
16
16
  import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
- import { timeouts } from './config.lib.mjs';
18
+ import { timeouts, retryLimits } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
21
21
  import { opencodeModels, defaultModels } from './models/index.mjs';
22
22
  import { checkPlaywrightMcpPackageAvailability, getOpenCodePlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
23
23
  import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage as parseOpenCodeTokenUsage } from './agent-token-usage.lib.mjs';
24
24
  import { calculateAgentPricing } from './agent.lib.mjs';
25
+ import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
25
26
 
26
27
  export { parseOpenCodeTokenUsage };
27
28
 
@@ -184,10 +185,9 @@ export const executeOpenCode = async params => {
184
185
  };
185
186
 
186
187
  export const executeOpenCodeCommand = async params => {
187
- const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $ } = params;
188
+ const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $, waitForRetryDelay = waitWithCountdown } = params;
188
189
 
189
190
  // Retry configuration
190
- const maxRetries = 3;
191
191
  let retryCount = 0;
192
192
 
193
193
  const executeWithRetry = async () => {
@@ -195,7 +195,7 @@ export const executeOpenCodeCommand = async params => {
195
195
  if (retryCount === 0) {
196
196
  await log(`\n${formatAligned('šŸ¤–', 'Executing OpenCode:', argv.model.toUpperCase())}`);
197
197
  } else {
198
- await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
198
+ await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
199
199
  }
200
200
 
201
201
  if (argv.verbose) {
@@ -265,7 +265,7 @@ export const executeOpenCodeCommand = async params => {
265
265
 
266
266
  if (argv.resume) {
267
267
  await log(`šŸ”„ Resuming from session: ${argv.resume}`);
268
- opencodeArgs = `run --format json --resume ${argv.resume} --model ${mappedModel}`;
268
+ opencodeArgs = `run --format json --session ${argv.resume} --model ${mappedModel}`;
269
269
  }
270
270
 
271
271
  // For OpenCode, we pass the prompt via stdin
@@ -301,7 +301,7 @@ export const executeOpenCodeCommand = async params => {
301
301
  cwd: tempDir,
302
302
  mirror: false,
303
303
  env: opencodeEnv,
304
- })`cat ${promptFile} | ${opencodePath} run --format json --resume ${argv.resume} --model ${mappedModel}`;
304
+ })`cat ${promptFile} | ${opencodePath} run --format json --session ${argv.resume} --model ${mappedModel}`;
305
305
  } else {
306
306
  execCommand = $({
307
307
  cwd: tempDir,
@@ -470,6 +470,28 @@ export const executeOpenCodeCommand = async params => {
470
470
  }
471
471
 
472
472
  if (exitCode !== 0) {
473
+ const retryableError = classifyRetryableError(allOutput || lastMessage);
474
+ if (retryableError.isRetryable) {
475
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
476
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
477
+ if (retryCount < maxRetries) {
478
+ const delay = getRetryDelayMs({
479
+ retryCount,
480
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
481
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
482
+ });
483
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
484
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
485
+ if (sessionId && !argv.resume) argv.resume = sessionId;
486
+ await maybeSwitchToFallbackModel({ tool: 'opencode', argv, log, errorMessage: retryableError.message });
487
+ await waitForRetryDelay(delay, log);
488
+ await log('\nšŸ”„ Retrying now...');
489
+ retryCount++;
490
+ return await executeWithRetry();
491
+ }
492
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
493
+ }
494
+
473
495
  // Check for usage limit errors first (more specific)
474
496
  const limitInfo = detectUsageLimit(lastMessage);
475
497
  if (limitInfo.isUsageLimit) {
@@ -203,6 +203,7 @@ const KNOWN_OPTION_NAMES = [
203
203
  'allow-to-push-to-contributors-pull-requests-as-maintainer',
204
204
  'prefix-fork-name-with-owner-name',
205
205
  'auto-restart-max-iterations',
206
+ 'auto-resume-max-iterations',
206
207
  'auto-continue-only-on-new-comments',
207
208
  'auto-restart-on-limit-reset',
208
209
  'auto-restart-on-non-updated-pull-request-description',
@@ -48,6 +48,7 @@ const { extractLinkedIssueNumber } = githubLinking;
48
48
 
49
49
  // Import configuration
50
50
  import { autoContinue, limitReset } from './config.lib.mjs';
51
+ import { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationCounter, normalizeAutoIterationLimit } from './auto-iteration-limits.lib.mjs';
51
52
 
52
53
  // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
53
54
  const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
@@ -79,6 +80,15 @@ const formatWaitTime = ms => {
79
80
  // See: https://github.com/link-assistant/hive-mind/issues/1152
80
81
  export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, shouldAttachLogs, tempDir = null, isRestart = false) => {
81
82
  try {
83
+ const maxAutoResumeIterations = normalizeAutoIterationLimit(argv.autoResumeMaxIterations);
84
+ const currentAutoResumeIteration = normalizeAutoIterationCounter(argv.autoResumeIteration);
85
+
86
+ if (hasReachedAutoIterationLimit(currentAutoResumeIteration, maxAutoResumeIterations)) {
87
+ await log(`\nāš ļø Auto-${isRestart ? 'restart' : 'resume'} limit reached: ${currentAutoResumeIteration}/${formatAutoIterationLimit(maxAutoResumeIterations)}`);
88
+ await safeExit(1, `Auto-${isRestart ? 'restart' : 'resume'} limit reached`);
89
+ }
90
+
91
+ const nextAutoResumeIteration = currentAutoResumeIteration + 1;
82
92
  const resetTime = global.limitResetTime;
83
93
  const timezone = global.limitTimezone || null;
84
94
  const baseWaitMs = calculateWaitTime(resetTime);
@@ -125,6 +135,7 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
125
135
  const actionType = isRestart ? 'Restarting' : 'Resuming';
126
136
  await log(`\nāœ… Limit reset time reached (+ ${bufferMinutes} min buffer + ${jitterSeconds}s jitter)! ${actionType} session...`);
127
137
  await log(` Current time: ${new Date().toLocaleTimeString()}`);
138
+ await log(` Auto-${isRestart ? 'restart' : 'resume'} iteration: ${maxAutoResumeIterations === 0 ? nextAutoResumeIteration : `${nextAutoResumeIteration}/${maxAutoResumeIterations}`}`);
128
139
 
129
140
  // Recursively call the solve script
130
141
  // For resume: use --resume with session ID to maintain context
@@ -153,6 +164,8 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
153
164
  if (argv.autoRestartOnLimitReset) {
154
165
  resumeArgs.push('--auto-restart-on-limit-reset');
155
166
  }
167
+ resumeArgs.push('--auto-resume-iteration', String(nextAutoResumeIteration));
168
+ resumeArgs.push('--auto-resume-max-iterations', String(maxAutoResumeIterations));
156
169
 
157
170
  // Pass session type for proper comment differentiation
158
171
  // See: https://github.com/link-assistant/hive-mind/issues/1152
@@ -162,6 +175,7 @@ export const autoContinueWhenLimitResets = async (issueUrl, sessionId, argv, sho
162
175
  // Preserve other flags from original invocation
163
176
  if (argv.tool && argv.tool !== 'claude') resumeArgs.push('--tool', argv.tool);
164
177
  if (argv.model !== 'sonnet') resumeArgs.push('--model', argv.model);
178
+ if (argv.fallbackModel) resumeArgs.push('--fallback-model', argv.fallbackModel);
165
179
  if (argv.verbose) resumeArgs.push('--verbose');
166
180
  if (argv.fork) resumeArgs.push('--fork');
167
181
  if (shouldAttachLogs) resumeArgs.push('--attach-logs');
@@ -60,6 +60,7 @@ const { READY_TO_MERGE_MARKER, AUTO_RESTART_MARKER, AUTO_MERGED_MARKER, postTrac
60
60
 
61
61
  // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
62
62
  const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
63
+ const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationLimit, shouldSyncBeforeRestart } = await import('./auto-iteration-limits.lib.mjs');
63
64
 
64
65
  /**
65
66
  * Main function: Watch and restart until PR becomes mergeable
@@ -73,6 +74,8 @@ export const watchUntilMergeable = async params => {
73
74
  const MIN_CI_CHECK_INTERVAL_SECONDS = 120;
74
75
  const watchInterval = Math.max(rawWatchInterval, MIN_CI_CHECK_INTERVAL_SECONDS);
75
76
  const isAutoMerge = argv.autoMerge || false;
77
+ const maxAutoRestartIterations = normalizeAutoIterationLimit(argv.autoRestartMaxIterations);
78
+ const maxAutoResumeIterations = normalizeAutoIterationLimit(argv.autoResumeMaxIterations);
76
79
  // Issue #1503/#1573/#1612: repo-wide action gating is opt-in strict mode.
77
80
  // The config default may be bypassed when this module is reused directly, so normalize here.
78
81
  const waitForAllRepoActionsFlag = argv.waitForAllActionsInRepositoryBeforeMergeable ?? argv['wait-for-all-actions-in-repository-before-mergeable'] ?? argv.waitForAllActionsInRepositoryBeforeMergable ?? argv['wait-for-all-actions-in-repository-before-mergable'] ?? false;
@@ -83,6 +86,7 @@ export const watchUntilMergeable = async params => {
83
86
 
84
87
  // Issue #1323: Track actual AI restarts separately from check cycle iterations
85
88
  let restartCount = 0;
89
+ let limitResumeCount = 0;
86
90
 
87
91
  // Issue #1371: In-memory dedup for "Ready to merge" comment (per-session, not all-time)
88
92
  let readyToMergeCommentPosted = false;
@@ -102,6 +106,8 @@ export const watchUntilMergeable = async params => {
102
106
  await log(formatAligned('', 'Mode:', isAutoMerge ? 'Auto-merge (will merge when ready)' : 'Auto-restart-until-mergeable (will NOT auto-merge)', 2));
103
107
  await log(formatAligned('', 'Checking interval:', `${watchInterval} seconds (minimum: ${MIN_CI_CHECK_INTERVAL_SECONDS}s)`, 2));
104
108
  await log(formatAligned('', 'Initial cooldown:', `${INITIAL_COOLDOWN_SECONDS} seconds`, 2));
109
+ await log(formatAligned('', 'Max restart iterations:', formatAutoIterationLimit(maxAutoRestartIterations), 2));
110
+ await log(formatAligned('', 'Max limit resumes:', formatAutoIterationLimit(maxAutoResumeIterations), 2));
105
111
  await log(formatAligned('', 'Wait for all repo actions:', waitForAllRepoActionsFlag ? 'Yes (strict repo-wide safety)' : 'No (PR-scoped CI only)', 2));
106
112
  await log(formatAligned('', 'Stop conditions:', 'PR merged, PR closed, or becomes mergeable', 2));
107
113
  await log(formatAligned('', 'Restart triggers:', 'New non-bot comments, CI failures, merge conflicts', 2));
@@ -480,20 +486,85 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
480
486
  }
481
487
 
482
488
  if (shouldRestart) {
483
- // Issue #1323: Increment restart count (actual AI executions, not check cycles)
484
- restartCount++;
489
+ if (hasReachedAutoIterationLimit(restartCount, maxAutoRestartIterations)) {
490
+ await log('');
491
+ await log(formatAligned('āš ļø', 'AUTO-RESTART LIMIT REACHED', `Stopping after ${restartCount} restart iteration${restartCount !== 1 ? 's' : ''}`));
492
+ await log(formatAligned('', 'Configured limit:', formatAutoIterationLimit(maxAutoRestartIterations), 2));
493
+ await log(formatAligned('', 'Remaining blockers:', restartReason, 2));
494
+ await log('');
495
+
496
+ try {
497
+ const limitComment = `## āš ļø Auto-restart limit reached
498
+
499
+ Hive Mind stopped auto-restart-until-mergeable after ${restartCount} restart iteration${restartCount !== 1 ? 's' : ''}.
500
+
501
+ **Configured limit:** ${formatAutoIterationLimit(maxAutoRestartIterations)}
502
+ **Remaining reason:** ${restartReason}
503
+
504
+ No further AI sessions will be started automatically for this run. Please review the remaining blockers manually or rerun with a higher \`--auto-restart-max-iterations\` value.
505
+
506
+ ---
507
+ *Auto-restart-until-mergeable stopped by the safety limit.*`;
508
+ await postTrackedComment({ $, owner, repo, targetNumber: prNumber, body: limitComment });
509
+ } catch (commentError) {
510
+ reportError(commentError, {
511
+ context: 'post_auto_restart_limit_comment',
512
+ owner,
513
+ repo,
514
+ prNumber,
515
+ operation: 'comment_on_pr',
516
+ });
517
+ await log(formatAligned('', 'āš ļø Could not post auto-restart limit comment to PR', '', 2));
518
+ }
519
+
520
+ return { success: false, reason: 'auto_restart_limit_reached', latestSessionId, latestAnthropicCost };
521
+ }
485
522
 
486
523
  // Add standard instructions for auto-restart-until-mergeable mode using shared utility
487
524
  feedbackLines.push(...buildAutoRestartInstructions());
488
525
 
526
+ // Get PR merge state status
527
+ const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
528
+ const mergeStateStatus = prStateResult.code === 0 ? prStateResult.stdout.toString().trim() : null;
529
+
530
+ // Issue #1572: Sync clean local branches with remote before restarting to avoid push failures.
531
+ // Issue #1664: Do not run git pull over an unfinished merge or other uncommitted state.
532
+ // The tool must see that state and either commit, continue, abort, or otherwise resolve it.
533
+ const effectiveBranch = prBranch || branchName;
534
+ if (shouldSyncBeforeRestart({ hasUncommittedChanges })) {
535
+ const pullResult = await $({ cwd: tempDir })`git pull origin ${effectiveBranch} 2>&1`;
536
+ if (pullResult.code === 0) {
537
+ await log(formatAligned('šŸ”„', 'Synced:', `Local branch ${effectiveBranch} updated from remote`));
538
+ } else {
539
+ const pullOutput = `${pullResult.stdout || ''}${pullResult.stderr || ''}`.trim() || 'no output';
540
+ const pullLeftLocalChanges = await checkForUncommittedChanges(tempDir, argv);
541
+ if (pullLeftLocalChanges && /CONFLICT|MERGE_HEAD|unmerged|Automatic merge failed|not concluded your merge/i.test(pullOutput)) {
542
+ await log(formatAligned('āš ļø', 'Sync produced merge state:', 'Proceeding with AI restart to resolve it', 2));
543
+ feedbackLines.push('');
544
+ feedbackLines.push('āš ļø Branch sync encountered an unfinished merge or conflicts:');
545
+ feedbackLines.push(pullOutput);
546
+ feedbackLines.push('');
547
+ feedbackLines.push('Please resolve the merge state before finishing.');
548
+ } else {
549
+ throw new Error(`git pull failed (code ${pullResult.code}): ${pullOutput}`);
550
+ }
551
+ }
552
+ } else {
553
+ await log(formatAligned('ā†Ŗļø', 'Skipping branch sync:', 'Local uncommitted/merge state must be resolved by the AI session', 2));
554
+ }
555
+
556
+ // Issue #1323: Increment restart count only when a tool execution is about to start.
557
+ restartCount++;
558
+
489
559
  await log(formatAligned('šŸ”„', 'RESTART TRIGGERED:', restartReason));
490
- await log(formatAligned('', 'Restart iteration:', `${restartCount}`, 2));
560
+ await log(formatAligned('', 'Restart iteration:', maxAutoRestartIterations === 0 ? `${restartCount}` : `${restartCount}/${maxAutoRestartIterations}`, 2));
491
561
  await log('');
492
562
 
493
- // Post a comment to PR about the restart
494
- // Issue #1356: Include restart count for tracking and add deduplication
563
+ // Post a comment to PR about the restart after preflight succeeds, so every
564
+ // posted restart notification corresponds to an actual tool session.
495
565
  try {
496
- const commentBody = `## šŸ”„ ${AUTO_RESTART_MARKER} triggered (iteration ${restartCount})\n\n**Reason:** ${restartReason}\n\nStarting new session to address the issues.\n\n---\n*Auto-restart-until-mergeable mode is active. Will continue until PR becomes mergeable.*`;
566
+ const limitText = maxAutoRestartIterations === 0 ? 'No automatic restart limit is configured.' : `This run will stop after ${maxAutoRestartIterations} restart iteration${maxAutoRestartIterations !== 1 ? 's' : ''}.`;
567
+ const commentBody = `## šŸ”„ ${AUTO_RESTART_MARKER} triggered (iteration ${restartCount})\n\n**Reason:** ${restartReason}\n\nStarting new session to address the issues.\n\n---\n*Auto-restart-until-mergeable mode is active. ${limitText}*`;
497
568
  // Issue #1625: Track so this doesn't falsely count as an AI-authored comment
498
569
  await postTrackedComment({ $, owner, repo, targetNumber: prNumber, body: commentBody });
499
570
  await log(formatAligned('', 'šŸ’¬ Posted auto-restart notification to PR', '', 2));
@@ -508,20 +579,6 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
508
579
  await log(formatAligned('', 'āš ļø Could not post comment to PR', '', 2));
509
580
  }
510
581
 
511
- // Get PR merge state status
512
- const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
513
- const mergeStateStatus = prStateResult.code === 0 ? prStateResult.stdout.toString().trim() : null;
514
-
515
- // Issue #1572: Sync local branch with remote before restarting to avoid push failures.
516
- // Without this, the restarted session works on stale local state and can't push.
517
- const effectiveBranch = prBranch || branchName;
518
- const pullResult = await $({ cwd: tempDir })`git pull origin ${effectiveBranch} 2>&1`;
519
- if (pullResult.code === 0) {
520
- await log(formatAligned('šŸ”„', 'Synced:', `Local branch ${effectiveBranch} updated from remote`));
521
- } else {
522
- throw new Error(`git pull failed (code ${pullResult.code}): ${pullResult.stdout || pullResult.stderr || 'no output'}`);
523
- }
524
-
525
582
  // Execute the AI tool using shared utility
526
583
  await log(formatAligned('šŸ”„', 'Restarting:', `Running ${argv.tool.toUpperCase()} to address issues...`));
527
584
 
@@ -545,6 +602,15 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
545
602
  // Issue #1570: Always post a GitHub comment to notify the user about the delay
546
603
  // and when exactly execution will be resumed, so the user doesn't think the process is stuck.
547
604
  if (isUsageLimitReached(toolResult)) {
605
+ if (hasReachedAutoIterationLimit(limitResumeCount, maxAutoResumeIterations)) {
606
+ await log('');
607
+ await log(formatAligned('āš ļø', 'AUTO-RESUME LIMIT REACHED', `Stopping after ${limitResumeCount} limit-reset continuation${limitResumeCount !== 1 ? 's' : ''}`));
608
+ await log(formatAligned('', 'Configured limit:', formatAutoIterationLimit(maxAutoResumeIterations), 2));
609
+ await log('');
610
+ return { success: false, reason: 'auto_resume_limit_reached', latestSessionId, latestAnthropicCost };
611
+ }
612
+
613
+ limitResumeCount++;
548
614
  const resumeSessionId = toolResult.sessionId;
549
615
  const resetTime = toolResult.limitResetTime;
550
616
  const baseWaitMs = resetTime ? calculateWaitTime(resetTime) : 0;
@@ -567,6 +633,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
567
633
  await log(formatAligned('', 'Reset time:', resetTime || 'Unknown', 2));
568
634
  await log(formatAligned('', 'Waiting:', `${waitMinutes} min (reset + ${bufferMinutes} min buffer + ${jitterSeconds}s jitter)`, 2));
569
635
  await log(formatAligned('', 'Resume at:', resumeTimeUTC, 2));
636
+ await log(formatAligned('', 'Auto-resume iteration:', maxAutoResumeIterations === 0 ? `${limitResumeCount}` : `${limitResumeCount}/${maxAutoResumeIterations}`, 2));
570
637
  await log(formatAligned('', 'Action:', 'Posting GitHub comment and waiting for limit reset', 2));
571
638
  if (resumeSessionId) {
572
639
  await log(formatAligned('', 'Session ID:', resumeSessionId, 2));
@@ -598,7 +665,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
598
665
  toolName: `Anthropic ${(argv.tool || 'claude').charAt(0).toUpperCase() + (argv.tool || 'claude').slice(1)} Code`,
599
666
  isAutoResumeEnabled: true,
600
667
  autoResumeMode: 'restart',
601
- requestedModel: argv.model,
668
+ requestedModel: argv.originalModel || argv.model,
602
669
  tool: argv.tool || 'claude',
603
670
  publicPricingEstimate: toolResult.publicPricingEstimate,
604
671
  pricingInfo: toolResult.pricingInfo,
@@ -676,7 +743,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
676
743
  errorMessage: `${argv.tool.toUpperCase()} execution failed after limit reset`,
677
744
  sessionId: latestSessionId,
678
745
  tempDir,
679
- requestedModel: argv.model,
746
+ requestedModel: argv.originalModel || argv.model,
680
747
  tool: argv.tool || 'claude',
681
748
  });
682
749
  }
@@ -726,7 +793,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
726
793
  errorMessage: `${argv.tool.toUpperCase()} execution failed`,
727
794
  sessionId: latestSessionId,
728
795
  tempDir,
729
- requestedModel: argv.model,
796
+ requestedModel: argv.originalModel || argv.model,
730
797
  tool: argv.tool || 'claude',
731
798
  });
732
799
  }
@@ -791,7 +858,7 @@ Once the billing issue is resolved, you can re-run the CI checks or push a new c
791
858
  publicPricingEstimate: toolResult.publicPricingEstimate,
792
859
  pricingInfo: toolResult.pricingInfo,
793
860
  // Issue #1225: Pass model and tool info for PR comments
794
- requestedModel: argv.model,
861
+ requestedModel: argv.originalModel || argv.model,
795
862
  tool: argv.tool || 'claude',
796
863
  // Issue #1508: Include budget stats (context/token/cost) for auto-restart log
797
864
  resultModelUsage: toolResult.resultModelUsage || null,
@@ -8,7 +8,7 @@
8
8
  // This approach was adopted per issue #482 feedback to minimize custom code maintenance
9
9
 
10
10
  import { enhanceErrorMessage, detectMalformedFlags } from './option-suggestions.lib.mjs';
11
- import { defaultModels, buildModelOptionDescription, resolveRuntimeDefaultModel } from './models/index.mjs';
11
+ import { defaultModels, buildModelOptionDescription, resolveDefaultFallbackModel, resolveRuntimeDefaultModel } from './models/index.mjs';
12
12
  import { validateBranchName } from './solve.branch.lib.mjs';
13
13
 
14
14
  // Re-export for use by telegram-bot.mjs (avoids extra import lines there)
@@ -173,8 +173,19 @@ export const SOLVE_OPTION_DEFINITIONS = {
173
173
  },
174
174
  'auto-restart-max-iterations': {
175
175
  type: 'number',
176
- description: 'Maximum number of auto-restart iterations when uncommitted changes are detected (default: 3)',
177
- default: 3,
176
+ description: 'Maximum number of auto-restart iterations before stopping (default: 5, 0 = unlimited)',
177
+ default: 5,
178
+ },
179
+ 'auto-resume-max-iterations': {
180
+ type: 'number',
181
+ description: 'Maximum number of automatic resume/restart continuations after usage-limit resets (default: 5, 0 = unlimited)',
182
+ default: 5,
183
+ },
184
+ 'auto-resume-iteration': {
185
+ type: 'number',
186
+ description: 'Internal: current automatic resume/restart continuation count',
187
+ default: 0,
188
+ hidden: true,
178
189
  },
179
190
  'auto-merge': {
180
191
  type: 'boolean',
@@ -248,6 +259,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
248
259
  description: 'Maximum thinking budget for calculating --think level mappings (default: 31999 for Claude Code). Values: off=0, low=max/4, medium=max/2, high=max*3/4, max=max.',
249
260
  default: 31999,
250
261
  },
262
+ 'fallback-model': {
263
+ type: 'string',
264
+ description: 'Fallback model to switch to on model capacity/overload errors. When supported, retries resume the same session with this model. Defaults: claude opus/opus-4-7 -> opus-4-6; codex gpt-5.5 -> gpt-5.4; all others unset.',
265
+ default: undefined,
266
+ },
251
267
  'show-thinking-content': {
252
268
  type: 'boolean',
253
269
  description: 'Show thinking content in Claude responses. Opus 4.7 omits thinking content by default; this option opts in to receive summarized thinking blocks. Disabled by default. Only affects --tool claude.',
@@ -616,6 +632,7 @@ export const parseArguments = async (yargs, hideBin) => {
616
632
  // Yargs doesn't properly handle dynamic defaults based on other arguments,
617
633
  // so we need to handle this manually after parsing
618
634
  const modelExplicitlyProvided = rawArgs.includes('--model') || rawArgs.includes('-m') || rawArgs.includes('--worker-model');
635
+ const fallbackModelExplicitlyProvided = rawArgs.includes('--fallback-model');
619
636
  const planModelExplicitlyProvided = rawArgs.includes('--plan-model');
620
637
 
621
638
  // --plan flag expansion (Issue #1223)
@@ -681,6 +698,11 @@ export const parseArguments = async (yargs, hideBin) => {
681
698
  argv.model = await resolveRuntimeDefaultModel(argv.tool);
682
699
  }
683
700
 
701
+ if (argv.tool && !fallbackModelExplicitlyProvided) {
702
+ const defaultFallbackModel = resolveDefaultFallbackModel(argv.tool, argv.model);
703
+ argv.fallbackModel = defaultFallbackModel || undefined;
704
+ }
705
+
684
706
  // Validate mutual exclusivity of --claude-file and --gitkeep-file
685
707
  // Check if both are explicitly enabled (user passed both --claude-file and --gitkeep-file)
686
708
  if (argv.claudeFile && argv.gitkeepFile) {
@@ -65,7 +65,7 @@ export const handleFailure = async options => {
65
65
  verbose: argv.verbose,
66
66
  errorMessage: cleanErrorMessage(error),
67
67
  // Issue #1225: Pass model and tool info for PR comments
68
- requestedModel: argv.model,
68
+ requestedModel: argv.originalModel || argv.model,
69
69
  tool: argv.tool || 'claude',
70
70
  });
71
71
  if (logUploadSuccess) {
@@ -195,7 +195,7 @@ export const handleExecutionError = async (error, shouldAttachLogs, owner, repo,
195
195
  verbose: argv.verbose || false,
196
196
  errorMessage: cleanErrorMessage(error),
197
197
  // Issue #1225: Pass model and tool info for PR comments
198
- requestedModel: argv.model,
198
+ requestedModel: argv.originalModel || argv.model,
199
199
  tool: argv.tool || 'claude',
200
200
  });
201
201
 
package/src/solve.mjs CHANGED
@@ -32,17 +32,13 @@ const results = await import('./solve.results.lib.mjs');
32
32
  const { cleanupClaudeFile, showSessionSummary, verifyResults, buildClaudeResumeCommand, buildSolveResumeCommand, checkForAiCreatedComments, attachSolutionSummary, verifyPullRequestIssueLinkAfterAutoRestart } = results;
33
33
  const claudeLib = await import('./claude.lib.mjs');
34
34
  const { executeClaude, checkPlaywrightMcpAvailability } = claudeLib;
35
-
36
35
  const githubLinking = await import('./github-linking.lib.mjs');
37
36
  const { extractLinkedIssueNumber } = githubLinking;
38
-
39
37
  const usageLimitLib = await import('./usage-limit.lib.mjs');
40
38
  const { formatResetTimeWithRelative } = usageLimitLib;
41
-
42
39
  const errorHandlers = await import('./solve.error-handlers.lib.mjs');
43
40
  const { createUncaughtExceptionHandler, createUnhandledRejectionHandler, handleMainExecutionError, handleNoPrAvailableError } = errorHandlers;
44
41
  const { notifyIssueAboutPrePullRequestFailure } = await import('./solve.pre-pr-failure-notifier.lib.mjs');
45
-
46
42
  const watchLib = await import('./solve.watch.lib.mjs');
47
43
  const { startWatchMode } = watchLib;
48
44
  const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
@@ -62,7 +58,6 @@ const { postTrackedComment, USAGE_LIMIT_REACHED_MARKER } = await import('./tool-
62
58
  const { prepareFeedbackAndTimestamps, checkUncommittedChanges, checkForkActions } = await import('./solve.preparation.lib.mjs');
63
59
  const { validateAndExitOnInvalidModel } = await import('./models/index.mjs');
64
60
  const { autoAcceptInviteForRepo } = await import('./solve.accept-invite.lib.mjs');
65
-
66
61
  // Initialize log file early (before argument parsing) to capture all output
67
62
  const logFile = await initializeLogFile(null);
68
63
  // Log version and raw command IMMEDIATELY after log file initialization
@@ -183,6 +178,8 @@ if (!(await validateContinueOnlyOnFeedback(argv, isPrUrl, isIssueUrl))) {
183
178
  // Validate model name EARLY - always runs regardless of --skip-tool-connection-check
184
179
  const tool = argv.tool || 'claude';
185
180
  await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
181
+ if (argv.fallbackModel) await validateAndExitOnInvalidModel(argv.fallbackModel, tool, safeExit);
182
+ argv.originalModel ||= argv.model;
186
183
 
187
184
  // Validate --plan-model if provided (Issue #1223)
188
185
  if (argv.planModel) {
@@ -912,7 +909,7 @@ try {
912
909
  await log(` ${claudeResumeCmd}`);
913
910
  await log('');
914
911
  } else if (argv.url) {
915
- const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForResume, model: argv.model, tempDir });
912
+ const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForResume, model: argv.model, fallbackModel: argv.fallbackModel, tempDir });
916
913
  await log(`šŸ’” To continue this ${toolForResume} session with solve:`);
917
914
  await log('');
918
915
  await log(` ${solveResumeCmd}`);
@@ -926,7 +923,7 @@ try {
926
923
  try {
927
924
  // Build Claude CLI resume command
928
925
  const tool = argv.tool || 'claude';
929
- const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir }) : null;
926
+ const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir }) : null;
930
927
  const logUploadSuccess = await attachLogToGitHub({
931
928
  logFile: getLogFile(),
932
929
  targetType: 'pr',
@@ -942,7 +939,7 @@ try {
942
939
  toolName: getToolDisplayName(argv.tool),
943
940
  resumeCommand,
944
941
  sessionId,
945
- requestedModel: argv.model,
942
+ requestedModel: argv.originalModel || argv.model,
946
943
  tool: argv.tool || 'claude',
947
944
  // Issue #1454: Pass resultModelUsage for accurate multi-model display
948
945
  resultModelUsage,
@@ -964,7 +961,7 @@ try {
964
961
  const resetTime = global.limitResetTime;
965
962
  // Build Claude CLI resume command
966
963
  const tool = argv.tool || 'claude';
967
- const resumeCmd = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir }) : null;
964
+ const resumeCmd = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir }) : null;
968
965
  const resumeSection = resumeCmd ? `To resume after the limit resets, use:\n\`\`\`bash\n${resumeCmd}\n\`\`\`` : `Session ID: \`${sessionId}\``;
969
966
  // Format the reset time with relative time and UTC conversion if available
970
967
  const timezone = global.limitTimezone || null;
@@ -992,7 +989,7 @@ try {
992
989
  try {
993
990
  // Build Claude CLI resume command (only for logging, not shown to users when auto-resume is enabled)
994
991
  const tool = argv.tool || 'claude';
995
- const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir }) : null;
992
+ const resumeCommand = tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : sessionId ? buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir }) : null;
996
993
  const logUploadSuccess = await attachLogToGitHub({
997
994
  logFile: getLogFile(),
998
995
  targetType: 'pr',
@@ -1012,7 +1009,7 @@ try {
1012
1009
  // See: https://github.com/link-assistant/hive-mind/issues/1152
1013
1010
  isAutoResumeEnabled: true,
1014
1011
  autoResumeMode: limitContinueMode,
1015
- requestedModel: argv.model,
1012
+ requestedModel: argv.originalModel || argv.model,
1016
1013
  tool: argv.tool || 'claude',
1017
1014
  // Issue #1454: Pass resultModelUsage for accurate multi-model display
1018
1015
  resultModelUsage,
@@ -1081,7 +1078,7 @@ try {
1081
1078
  await log(` ${claudeResumeCmd}`);
1082
1079
  await log('');
1083
1080
  } else if (sessionId && argv.url) {
1084
- const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForFailure, model: argv.model, tempDir });
1081
+ const solveResumeCmd = buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool: toolForFailure, model: argv.model, fallbackModel: argv.fallbackModel, tempDir });
1085
1082
  await log('');
1086
1083
  await log(`šŸ’” To continue this ${toolForFailure} session with solve:`);
1087
1084
  await log('');
@@ -1101,7 +1098,7 @@ try {
1101
1098
  try {
1102
1099
  // Build Claude CLI resume command
1103
1100
  const tool = argv.tool || 'claude';
1104
- const resumeCommand = sessionId ? (tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, tempDir })) : null;
1101
+ const resumeCommand = sessionId ? (tool === 'claude' ? buildClaudeResumeCommand({ tempDir, sessionId, model: argv.model }) : buildSolveResumeCommand({ issueUrl: argv.url, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir })) : null;
1105
1102
  const logUploadSuccess = await attachLogToGitHub({
1106
1103
  logFile: getLogFile(),
1107
1104
  targetType: logTargetType,
@@ -1120,7 +1117,7 @@ try {
1120
1117
  sessionId,
1121
1118
  // If not a usage limit case, fall back to generic failure format
1122
1119
  errorMessage: limitReached ? undefined : `${argv.tool.toUpperCase()} execution failed`,
1123
- requestedModel: argv.model,
1120
+ requestedModel: argv.originalModel || argv.model,
1124
1121
  tool: argv.tool || 'claude',
1125
1122
  // Issue #1454: Pass resultModelUsage for accurate multi-model display
1126
1123
  resultModelUsage,
@@ -1383,7 +1380,7 @@ try {
1383
1380
  sessionId,
1384
1381
  tempDir,
1385
1382
  anthropicTotalCostUSD,
1386
- requestedModel: argv.model,
1383
+ requestedModel: argv.originalModel || argv.model,
1387
1384
  tool: argv.tool || 'claude',
1388
1385
  // Issue #1454: Pass resultModelUsage for accurate multi-model display
1389
1386
  resultModelUsage,
@@ -88,7 +88,7 @@ export async function notifyIssueAboutPrePullRequestFailure(options) {
88
88
  sanitizeLogContent,
89
89
  verbose: argv.verbose,
90
90
  errorMessage: `The solver stopped before creating a pull request.\n\nReason: ${reason || 'Unknown error'}`,
91
- requestedModel: argv.model,
91
+ requestedModel: argv.originalModel || argv.model,
92
92
  tool: argv.tool || 'claude',
93
93
  });
94
94
  if (uploaded) {
@@ -47,12 +47,13 @@ export const { buildClaudeResumeCommand, buildClaudeInitialCommand } = claudeCom
47
47
  * @param {string} options.sessionId - The session ID to resume
48
48
  * @param {string|null} [options.tool] - Tool name (codex, opencode, agent)
49
49
  * @param {string|null} [options.model] - Model name to preserve
50
+ * @param {string|null} [options.fallbackModel] - Explicit fallback model to preserve
50
51
  * @param {string|null} [options.tempDir] - Working directory to preserve
51
52
  * @param {string} [options.nodePath] - Node binary path
52
53
  * @param {string} [options.scriptPath] - solve.mjs path
53
54
  * @returns {string}
54
55
  */
55
- export const buildSolveResumeCommand = ({ issueUrl, sessionId, tool = null, model = null, tempDir = null, nodePath = process.argv[0], scriptPath = process.argv[1] }) => {
56
+ export const buildSolveResumeCommand = ({ issueUrl, sessionId, tool = null, model = null, fallbackModel = null, tempDir = null, nodePath = process.argv[0], scriptPath = process.argv[1] }) => {
56
57
  const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
57
58
 
58
59
  const args = [shellQuote(scriptPath), shellQuote(issueUrl), '--resume', shellQuote(sessionId)];
@@ -65,6 +66,10 @@ export const buildSolveResumeCommand = ({ issueUrl, sessionId, tool = null, mode
65
66
  args.push('--model', shellQuote(model));
66
67
  }
67
68
 
69
+ if (fallbackModel) {
70
+ args.push('--fallback-model', shellQuote(fallbackModel));
71
+ }
72
+
68
73
  if (tempDir) {
69
74
  args.push('--working-directory', shellQuote(tempDir));
70
75
  }
@@ -566,7 +571,7 @@ export const showSessionSummary = async (sessionId, limitReached, argv, issueUrl
566
571
  await log(` ${claudeResumeCmd}`);
567
572
  await log('');
568
573
  } else if (issueUrl) {
569
- const solveResumeCmd = buildSolveResumeCommand({ issueUrl, sessionId, tool, model: argv.model, tempDir });
574
+ const solveResumeCmd = buildSolveResumeCommand({ issueUrl, sessionId, tool, model: argv.model, fallbackModel: argv.fallbackModel, tempDir });
570
575
  await log('');
571
576
  await log(`šŸ’” To continue this ${tool} session with solve:`);
572
577
  await log('');
@@ -577,11 +582,12 @@ export const showSessionSummary = async (sessionId, limitReached, argv, issueUrl
577
582
  if (limitReached) {
578
583
  await log('ā° LIMIT REACHED DETECTED!');
579
584
 
580
- if (argv.autoResumeOnLimitReset && global.limitResetTime) {
581
- await log(`\nšŸ”„ AUTO-RESUME ON LIMIT RESET ENABLED - Will resume at ${global.limitResetTime}`);
585
+ if ((argv.autoResumeOnLimitReset || argv.autoRestartOnLimitReset) && global.limitResetTime) {
586
+ const isRestart = !!argv.autoRestartOnLimitReset;
587
+ await log(`\nšŸ”„ AUTO-${isRestart ? 'RESTART' : 'RESUME'} ON LIMIT RESET ENABLED - Will ${isRestart ? 'restart' : 'resume'} at ${global.limitResetTime}`);
582
588
  // Pass tempDir to ensure resumed session uses the same working directory
583
589
  // This is critical for Claude Code session resume to work correctly
584
- await autoContinueWhenLimitResets(issueUrl, sessionId, argv, shouldAttachLogs, tempDir);
590
+ await autoContinueWhenLimitResets(issueUrl, sessionId, argv, shouldAttachLogs, tempDir, isRestart);
585
591
  } else {
586
592
  if (global.limitResetTime) {
587
593
  await log(`\nā° Limit resets at: ${global.limitResetTime}`);
@@ -823,7 +829,7 @@ Fixes ${issueRef}
823
829
  // Issue #1152: Pass sessionType for differentiated log comments
824
830
  sessionType,
825
831
  // Issue #1225: Pass model and tool info for PR comments
826
- requestedModel: argv.model,
832
+ requestedModel: argv.originalModel || argv.model,
827
833
  tool: argv.tool || 'claude',
828
834
  // Issue #1454: Pass resultModelUsage for accurate multi-model display
829
835
  resultModelUsage,
@@ -909,7 +915,7 @@ Fixes ${issueRef}
909
915
  // Issue #1152: Pass sessionType for differentiated log comments
910
916
  sessionType,
911
917
  // Issue #1225: Pass model and tool info for issue comments
912
- requestedModel: argv.model,
918
+ requestedModel: argv.originalModel || argv.model,
913
919
  tool: argv.tool || 'claude',
914
920
  // Issue #1454: Pass resultModelUsage for accurate multi-model display
915
921
  resultModelUsage,
@@ -1000,7 +1006,7 @@ export const handleExecutionError = async (error, shouldAttachLogs, owner, repo,
1000
1006
  verbose: argv.verbose || false,
1001
1007
  errorMessage: cleanErrorMessage(error),
1002
1008
  // Issue #1225: Pass model and tool info for PR comments
1003
- requestedModel: argv.model,
1009
+ requestedModel: argv.originalModel || argv.model,
1004
1010
  tool: argv.tool || 'claude',
1005
1011
  });
1006
1012
 
@@ -39,6 +39,7 @@ const { checkPRMerged, checkForUncommittedChanges, getUncommittedChangesDetails,
39
39
 
40
40
  // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
41
41
  const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
42
+ const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationLimit } = await import('./auto-iteration-limits.lib.mjs');
42
43
 
43
44
  // Issue #1625: Central marker constants + tracked comment posting
44
45
  const toolComments = await import('./tool-comments.lib.mjs');
@@ -52,7 +53,7 @@ export const watchForFeedback = async params => {
52
53
 
53
54
  const watchInterval = argv.watchInterval || 60; // seconds
54
55
  const isTemporaryWatch = argv.temporaryWatch || false;
55
- const maxAutoRestartIterations = argv.autoRestartMaxIterations || 3;
56
+ const maxAutoRestartIterations = normalizeAutoIterationLimit(argv.autoRestartMaxIterations);
56
57
 
57
58
  // Track latest session data across all iterations for accurate pricing
58
59
  let latestSessionId = null;
@@ -75,7 +76,7 @@ export const watchForFeedback = async params => {
75
76
  await log(formatAligned('', 'Monitoring PR:', `#${prNumber}`, 2));
76
77
  await log(formatAligned('', 'Mode:', 'Auto-restart (NOT --watch mode)', 2));
77
78
  await log(formatAligned('', 'Stop conditions:', 'All changes committed OR PR merged OR max iterations reached', 2));
78
- await log(formatAligned('', 'Max iterations:', `${maxAutoRestartIterations}`, 2));
79
+ await log(formatAligned('', 'Max iterations:', formatAutoIterationLimit(maxAutoRestartIterations), 2));
79
80
  await log(formatAligned('', 'Note:', 'No wait time between iterations in auto-restart mode', 2));
80
81
  } else {
81
82
  await log(formatAligned('šŸ‘ļø', 'WATCH MODE ACTIVATED', ''));
@@ -117,7 +118,7 @@ export const watchForFeedback = async params => {
117
118
  }
118
119
 
119
120
  // Check if we've reached max iterations
120
- if (autoRestartCount >= maxAutoRestartIterations) {
121
+ if (hasReachedAutoIterationLimit(autoRestartCount, maxAutoRestartIterations)) {
121
122
  await log('');
122
123
  await log(formatAligned('āš ļø', 'MAX ITERATIONS REACHED', `Exiting auto-restart mode after ${autoRestartCount} iterations`));
123
124
  await log(formatAligned('', 'Some uncommitted changes may remain', '', 2));
@@ -188,7 +189,7 @@ export const watchForFeedback = async params => {
188
189
  // Post a comment to PR about auto-restart
189
190
  if (prNumber) {
190
191
  try {
191
- const remainingIterations = maxAutoRestartIterations - autoRestartCount;
192
+ const remainingIterations = maxAutoRestartIterations === 0 ? null : maxAutoRestartIterations - autoRestartCount;
192
193
 
193
194
  // Get uncommitted files list for the comment
194
195
  let uncommittedFilesList = '';
@@ -196,7 +197,9 @@ export const watchForFeedback = async params => {
196
197
  uncommittedFilesList = '\n\n**Uncommitted files:**\n```\n' + changes.join('\n') + '\n```';
197
198
  }
198
199
 
199
- const commentBody = `## šŸ”„ ${AUTO_RESTART_MARKER} ${autoRestartCount}/${maxAutoRestartIterations}\n\nDetected uncommitted changes from previous run. Starting new session to review and commit or discard them.${uncommittedFilesList}\n\n---\n*Auto-restart will stop after changes are committed or discarded, or after ${remainingIterations} more iteration${remainingIterations !== 1 ? 's' : ''}. Please wait until working session will end and give your feedback.*`;
200
+ const iterationLabel = maxAutoRestartIterations === 0 ? `${autoRestartCount}` : `${autoRestartCount}/${maxAutoRestartIterations}`;
201
+ const stopText = remainingIterations === null ? 'Auto-restart is configured with no iteration limit.' : `Auto-restart will stop after changes are committed or discarded, or after ${remainingIterations} more iteration${remainingIterations !== 1 ? 's' : ''}.`;
202
+ const commentBody = `## šŸ”„ ${AUTO_RESTART_MARKER} ${iterationLabel}\n\nDetected uncommitted changes from previous run. Starting new session to review and commit or discard them.${uncommittedFilesList}\n\n---\n*${stopText} Please wait until working session will end and give your feedback.*`;
200
203
  // Issue #1625: Track so this doesn't falsely count as AI-authored.
201
204
  await postTrackedComment({ $, owner, repo, targetNumber: prNumber, body: commentBody });
202
205
  await log(formatAligned('', 'šŸ’¬ Posted auto-restart notification to PR', '', 2));
@@ -283,7 +286,8 @@ export const watchForFeedback = async params => {
283
286
  const logFile = getLogFile();
284
287
  if (logFile) {
285
288
  // Use "Auto-restart X/Y Failure Log" format to distinguish from success logs
286
- const customTitle = `āš ļø Auto-restart ${autoRestartCount}/${maxAutoRestartIterations} Failure Log`;
289
+ const iterationLabel = maxAutoRestartIterations === 0 ? `${autoRestartCount}` : `${autoRestartCount}/${maxAutoRestartIterations}`;
290
+ const customTitle = `āš ļø Auto-restart ${iterationLabel} Failure Log`;
287
291
  const logUploadSuccess = await attachLogToGitHub({
288
292
  logFile,
289
293
  targetType: 'pr',
@@ -306,7 +310,7 @@ export const watchForFeedback = async params => {
306
310
  isUsageLimit: toolResult.limitReached,
307
311
  limitResetTime: toolResult.limitResetTime,
308
312
  // Issue #1225: Pass model and tool info for PR comments
309
- requestedModel: argv.model,
313
+ requestedModel: argv.originalModel || argv.model,
310
314
  tool: argv.tool || 'claude',
311
315
  // Issue #1508: Pass model usage for failure log (cost info per model)
312
316
  resultModelUsage: toolResult.resultModelUsage || null,
@@ -372,7 +376,8 @@ export const watchForFeedback = async params => {
372
376
  const logFile = getLogFile();
373
377
  if (logFile) {
374
378
  // Use "Auto-restart X/Y Log" format as requested in issue #1107
375
- const customTitle = `šŸ”„ Auto-restart ${autoRestartCount}/${maxAutoRestartIterations} Log`;
379
+ const iterationLabel = maxAutoRestartIterations === 0 ? `${autoRestartCount}` : `${autoRestartCount}/${maxAutoRestartIterations}`;
380
+ const customTitle = `šŸ”„ Auto-restart ${iterationLabel} Log`;
376
381
  const logUploadSuccess = await attachLogToGitHub({
377
382
  logFile,
378
383
  targetType: 'pr',
@@ -391,7 +396,7 @@ export const watchForFeedback = async params => {
391
396
  publicPricingEstimate: toolResult.publicPricingEstimate,
392
397
  pricingInfo: toolResult.pricingInfo,
393
398
  // Issue #1225: Pass model and tool info for PR comments
394
- requestedModel: argv.model,
399
+ requestedModel: argv.originalModel || argv.model,
395
400
  tool: argv.tool || 'claude',
396
401
  // Issue #1508: Include budget stats (context/token/cost) for auto-restart log
397
402
  resultModelUsage: toolResult.resultModelUsage || null,
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { retryLimits } from './config.lib.mjs';
4
+ import { resolveDefaultFallbackModel, resolveModelId } from './models/index.mjs';
5
+
6
+ const normalizeMessage = value => {
7
+ if (value === null || value === undefined) return '';
8
+ if (typeof value === 'string') return value;
9
+ if (typeof value?.error?.message === 'string') return value.error.message;
10
+ if (typeof value?.message === 'string') return value.message;
11
+ try {
12
+ return JSON.stringify(value);
13
+ } catch {
14
+ return String(value);
15
+ }
16
+ };
17
+
18
+ const normalizeModelKey = value => {
19
+ if (!value) return '';
20
+ return String(value)
21
+ .toLowerCase()
22
+ .replace(/\[1m\]$/i, '')
23
+ .trim();
24
+ };
25
+
26
+ export const classifyRetryableError = value => {
27
+ const message = normalizeMessage(value);
28
+ const lower = message.toLowerCase();
29
+
30
+ if (lower.includes('selected model is at capacity') || (lower.includes('at capacity') && lower.includes('try a different model'))) {
31
+ return { message, isRetryable: true, isCapacity: true, label: 'Model capacity error' };
32
+ }
33
+
34
+ if (lower.includes('overloaded') || lower.includes('overloaded_error')) {
35
+ return { message, isRetryable: true, isCapacity: true, label: 'API overload' };
36
+ }
37
+
38
+ if (lower.includes('request timed out')) {
39
+ return { message, isRetryable: true, isCapacity: false, label: 'Request timeout' };
40
+ }
41
+
42
+ if (lower.includes('api error: 503') || (lower.includes('503') && (lower.includes('upstream connect error') || lower.includes('remote connection failure')))) {
43
+ return { message, isRetryable: true, isCapacity: false, label: '503 network error' };
44
+ }
45
+
46
+ if (lower.includes('internal server error') || lower.includes('api error: 500')) {
47
+ return { message, isRetryable: true, isCapacity: false, label: 'Internal server error (500)' };
48
+ }
49
+
50
+ return { message, isRetryable: false, isCapacity: false, label: null };
51
+ };
52
+
53
+ export const getRetryDelayMs = ({ retryCount, initialDelayMs = retryLimits.initialTransientErrorDelayMs, maxDelayMs = retryLimits.maxTransientErrorDelayMs } = {}) => {
54
+ return Math.min(initialDelayMs * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelayMs);
55
+ };
56
+
57
+ export const waitWithCountdown = async (delayMs, log) => {
58
+ if (delayMs <= 60000) {
59
+ await new Promise(resolve => setTimeout(resolve, delayMs));
60
+ return;
61
+ }
62
+
63
+ let remaining = delayMs;
64
+ const timer = setInterval(async () => {
65
+ remaining -= 60000;
66
+ if (remaining > 0) await log(`ā³ ${Math.round(remaining / 60000)} min remaining...`);
67
+ }, 60000);
68
+
69
+ await new Promise(resolve => setTimeout(resolve, delayMs));
70
+ clearInterval(timer);
71
+ };
72
+
73
+ export const resolveConfiguredFallbackModel = ({ tool, currentModel, configuredFallbackModel = undefined } = {}) => {
74
+ if (configuredFallbackModel) return configuredFallbackModel;
75
+ return resolveDefaultFallbackModel(tool, currentModel);
76
+ };
77
+
78
+ export const maybeSwitchToFallbackModel = async ({ tool, argv, log, errorMessage } = {}) => {
79
+ const fallbackModel = resolveConfiguredFallbackModel({
80
+ tool,
81
+ currentModel: argv?.model,
82
+ configuredFallbackModel: argv?.fallbackModel,
83
+ });
84
+
85
+ const classification = classifyRetryableError(errorMessage);
86
+ if (!fallbackModel || !classification.isCapacity || !argv?.model) {
87
+ return { switched: false, fallbackModel, reason: classification.label };
88
+ }
89
+
90
+ const currentResolvedModel = normalizeModelKey(resolveModelId(argv.model, tool));
91
+ const fallbackResolvedModel = normalizeModelKey(resolveModelId(fallbackModel, tool));
92
+ if (!fallbackResolvedModel || currentResolvedModel === fallbackResolvedModel) {
93
+ return { switched: false, fallbackModel, reason: classification.label };
94
+ }
95
+
96
+ const previousModel = argv.model;
97
+ argv.model = fallbackModel;
98
+ if (!argv.fallbackModel) argv.fallbackModel = fallbackModel;
99
+
100
+ if (typeof log === 'function') {
101
+ await log(`šŸ”€ Switching to fallback model: ${previousModel} -> ${fallbackModel}`, { level: 'warning' });
102
+ }
103
+
104
+ return {
105
+ switched: true,
106
+ fallbackModel,
107
+ previousModel,
108
+ reason: classification.label,
109
+ };
110
+ };
111
+
112
+ export default {
113
+ classifyRetryableError,
114
+ getRetryDelayMs,
115
+ waitWithCountdown,
116
+ resolveConfiguredFallbackModel,
117
+ maybeSwitchToFallbackModel,
118
+ };