@link-assistant/hive-mind 1.56.6 → 1.56.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.56.8
4
+
5
+ ### Patch Changes
6
+
7
+ - 05a3e42: Fix CI/CD change detection for pull request synchronize events so metadata-only updates skip expensive test jobs while still reporting completed checks.
8
+ - c12f99d: Fix screen-isolated solve monitoring so completed `$ --status` sessions no longer block duplicate commands, queued status displays executing isolation sessions, and Telegram start messages stay in an executing state until completion.
9
+
10
+ ## 1.56.7
11
+
12
+ ### Patch Changes
13
+
14
+ - 37c895c: Retry capacity-related tool failures with exponential backoff and support fallback models for Codex, Claude, OpenCode, and Agent resumes.
15
+ - 16f341d: Limit automatic restart/resume loops to five iterations by default and avoid pre-restart branch sync when local merge state must be resolved by the AI session.
16
+
3
17
  ## 1.56.6
4
18
 
5
19
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.56.6",
3
+ "version": "1.56.8",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -15,7 +15,7 @@
15
15
  "hive-telegram-bot": "./src/telegram-bot.mjs"
16
16
  },
17
17
  "scripts": {
18
- "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
18
+ "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-codex-support.mjs && node tests/test-build-cost-info-string.mjs && node tests/test-claude-code-install-method.mjs && node tests/test-claude-quiet-config.mjs && node tests/test-configure-claude-bin.mjs && node tests/test-docker-release-order.mjs && node tests/test-docker-box-migration.mjs && node tests/test-hive-screens.mjs && node tests/test-issue-1616-pr-issue-link-preservation.mjs && node tests/test-pre-pr-failure-notifier-1640.mjs && node tests/test-ready-to-merge-pagination-1645.mjs && node tests/test-require-gh-paginate-rule.mjs && node tests/test-auto-restart-limits-1664.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-telegram-bot-command-aliases.mjs && node tests/test-telegram-options-before-url.mjs && node tests/test-telegram-bot-configuration-isolation-links-notation.mjs && node tests/test-extract-isolation-from-args.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-issue-1670-screen-status-monitoring.mjs && node tests/test-telegram-bot-launcher.mjs",
19
19
  "test:queue": "node tests/solve-queue.test.mjs",
20
20
  "test:limits-display": "node tests/limits-display.test.mjs",
21
21
  "test:usage-limit": "node tests/test-usage-limit.mjs",
package/src/agent.lib.mjs CHANGED
@@ -15,13 +15,14 @@ const os = (await use('os')).default;
15
15
  // Import log from general lib
16
16
  import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
- import { timeouts } from './config.lib.mjs';
18
+ import { timeouts, retryLimits } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
21
21
  import Decimal from 'decimal.js-light';
22
22
  import { agentModels, defaultModels, freeToBaseModelMap } from './models/index.mjs';
23
23
  import { checkPlaywrightMcpPackageAvailability, getAgentPlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
24
24
  import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage } from './agent-token-usage.lib.mjs';
25
+ import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
25
26
 
26
27
  export { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage };
27
28
 
@@ -410,10 +411,9 @@ export const executeAgent = async params => {
410
411
  };
411
412
 
412
413
  export const executeAgentCommand = async params => {
413
- const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $ } = params;
414
+ const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, agentPath, $, waitForRetryDelay = waitWithCountdown } = params;
414
415
 
415
416
  // Retry configuration
416
- const maxRetries = 3;
417
417
  let retryCount = 0;
418
418
 
419
419
  const executeWithRetry = async () => {
@@ -421,7 +421,7 @@ export const executeAgentCommand = async params => {
421
421
  if (retryCount === 0) {
422
422
  await log(`\n${formatAligned('šŸ¤–', 'Executing Agent:', argv.model.toUpperCase())}`);
423
423
  } else {
424
- await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
424
+ await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
425
425
  }
426
426
 
427
427
  if (argv.verbose) {
@@ -470,6 +470,11 @@ export const executeAgentCommand = async params => {
470
470
  agentArgs += ' --verbose';
471
471
  }
472
472
 
473
+ if (argv.resume) {
474
+ await log(`šŸ”„ Resuming from session: ${argv.resume}`);
475
+ agentArgs += ` --resume ${argv.resume} --no-fork`;
476
+ }
477
+
473
478
  // Agent supports stdin in both plain text and JSON format
474
479
  // We'll combine system and user prompts into a single message
475
480
  const combinedPrompt = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt;
@@ -783,6 +788,28 @@ export const executeAgentCommand = async params => {
783
788
  }
784
789
 
785
790
  if (exitCode !== 0 || outputError.detected) {
791
+ const retryableError = classifyRetryableError(outputError.match || streamingErrorMessage || lastMessage || fullOutput);
792
+ if (retryableError.isRetryable) {
793
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
794
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
795
+ if (retryCount < maxRetries) {
796
+ const delay = getRetryDelayMs({
797
+ retryCount,
798
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
799
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
800
+ });
801
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
802
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
803
+ if (sessionId && !argv.resume) argv.resume = sessionId;
804
+ await maybeSwitchToFallbackModel({ tool: 'agent', argv, log, errorMessage: retryableError.message });
805
+ await waitForRetryDelay(delay, log);
806
+ await log('\nšŸ”„ Retrying now...');
807
+ retryCount++;
808
+ return await executeWithRetry();
809
+ }
810
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
811
+ }
812
+
786
813
  // Build JSON error structure for consistent error reporting
787
814
  const errorInfo = {
788
815
  type: 'error',
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env node
2
+
3
+ export const DEFAULT_AUTO_ITERATION_LIMIT = 5;
4
+
5
+ export const normalizeAutoIterationLimit = (value, fallback = DEFAULT_AUTO_ITERATION_LIMIT) => {
6
+ if (value === 0 || value === '0') return 0;
7
+
8
+ const parsed = Number(value);
9
+ if (!Number.isFinite(parsed) || parsed < 1) return fallback;
10
+
11
+ return Math.floor(parsed);
12
+ };
13
+
14
+ export const normalizeAutoIterationCounter = value => {
15
+ const parsed = Number(value);
16
+ if (!Number.isFinite(parsed) || parsed < 0) return 0;
17
+
18
+ return Math.floor(parsed);
19
+ };
20
+
21
+ export const hasReachedAutoIterationLimit = (completedIterations, maxIterations) => {
22
+ const normalizedMax = normalizeAutoIterationLimit(maxIterations);
23
+ if (normalizedMax === 0) return false;
24
+
25
+ return normalizeAutoIterationCounter(completedIterations) >= normalizedMax;
26
+ };
27
+
28
+ export const formatAutoIterationLimit = maxIterations => {
29
+ const normalizedMax = normalizeAutoIterationLimit(maxIterations);
30
+ return normalizedMax === 0 ? 'unlimited' : `${normalizedMax}`;
31
+ };
32
+
33
+ export const shouldSyncBeforeRestart = ({ hasUncommittedChanges }) => !hasUncommittedChanges;
@@ -24,6 +24,7 @@ import { buildMcpConfigWithoutPlaywright } from './playwright-mcp.lib.mjs';
24
24
  import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
25
25
  import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
26
26
  import { fetchModelInfo } from './model-info.lib.mjs';
27
+ import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
27
28
  export { availableModels }; // Re-export for backward compatibility
28
29
  export { fetchModelInfo };
29
30
  const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
@@ -1148,8 +1149,9 @@ export const executeClaudeCommand = async params => {
1148
1149
 
1149
1150
  // Issue #817: Stop bidirectional mode monitoring and collect queued feedback
1150
1151
  queuedFeedback = await finalizeBidirectionalHandler(bidirectionalHandler, log);
1152
+ const retryableLastError = classifyRetryableError(lastMessage);
1151
1153
  // Issues #1331, #1353, #1472/#1475: Unified transient error retry (exponential backoff, session preservation)
1152
- const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
1154
+ const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || retryableLastError.isRetryable || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
1153
1155
  if ((commandFailed || isTransientError) && isTransientError) {
1154
1156
  // Issue #1472/#1475: Startup/activity timeout → 30s–2min backoff; #1353: Request timeout → 5min–1hr; general → 2min–30min
1155
1157
  const isTimeoutRetry = isStartupTimeout || isActivityTimeout;
@@ -1178,7 +1180,7 @@ export const executeClaudeCommand = async params => {
1178
1180
  }
1179
1181
  if (retryCount < maxRetries) {
1180
1182
  const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
1181
- const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
1183
+ const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : retryableLastError.label || (isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
1182
1184
  const notRetryableHint = apiMarkedNotRetryable ? ' (API says not retryable — will stop early if no progress)' : '';
1183
1185
  const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
1184
1186
  const retryMode = isStartupTimeout ? ' (fresh start)' : ' (session preserved)';
@@ -1199,6 +1201,7 @@ export const executeClaudeCommand = async params => {
1199
1201
  }
1200
1202
  // Activity timeout preserves session (work was started), startup timeout does not (no session created)
1201
1203
  if (!isStartupTimeout && sessionId && !argv.resume) argv.resume = sessionId;
1204
+ await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: retryableLastError.message || lastMessage });
1202
1205
  await waitWithCountdown(delay, log);
1203
1206
  await log('\nšŸ”„ Retrying now...');
1204
1207
  retryCount++;
@@ -1375,11 +1378,12 @@ export const executeClaudeCommand = async params => {
1375
1378
  operation: 'run_claude_command',
1376
1379
  });
1377
1380
  const errorStr = error.message || error.toString();
1381
+ const retryableException = classifyRetryableError(errorStr);
1378
1382
  // Issue #1331: Unified handler for all transient API errors in exception block
1379
1383
  // Issue #1353: Also handle "Request timed out" in exception block
1380
1384
  // (Overloaded, 503, Internal Server Error, Request timed out) - all with session preservation
1381
1385
  const isTimeoutException = errorStr === 'Request timed out' || errorStr.includes('Request timed out');
1382
- const isTransientException = isTimeoutException || (errorStr.includes('API Error: 500') && (errorStr.includes('Overloaded') || errorStr.includes('Internal server error'))) || (errorStr.includes('API Error: 529') && (errorStr.includes('overloaded_error') || errorStr.includes('Overloaded'))) || (errorStr.includes('api_error') && errorStr.includes('Overloaded')) || (errorStr.includes('overloaded_error') && errorStr.includes('Overloaded')) || errorStr.includes('API Error: 503') || (errorStr.includes('503') && (errorStr.includes('upstream connect error') || errorStr.includes('remote connection failure')));
1386
+ const isTransientException = isTimeoutException || retryableException.isRetryable;
1383
1387
  if (isTransientException) {
1384
1388
  // Issue #1353: Use timeout-specific backoff for request timeouts
1385
1389
  const maxRetries = isTimeoutException ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
@@ -1387,9 +1391,10 @@ export const executeClaudeCommand = async params => {
1387
1391
  const maxDelay = isTimeoutException ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs;
1388
1392
  if (retryCount < maxRetries) {
1389
1393
  const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
1390
- const errorLabel = isTimeoutException ? 'Request timeout' : errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
1394
+ const errorLabel = isTimeoutException ? 'Request timeout' : retryableException.label || (errorStr.includes('Overloaded') ? `API overload (${errorStr.includes('529') ? '529' : '500'})` : errorStr.includes('Internal server error') ? 'Internal server error (500)' : '503 network error');
1391
1395
  await log(`\nāš ļø ${errorLabel} in exception. Retry ${retryCount + 1}/${maxRetries} in ${Math.round(delay / 60000)} min (session preserved)...`, { level: 'warning' });
1392
1396
  if (sessionId && !argv.resume) argv.resume = sessionId;
1397
+ await maybeSwitchToFallbackModel({ tool: 'claude', argv, log, errorMessage: errorStr });
1393
1398
  await waitWithCountdown(delay, log);
1394
1399
  await log('\nšŸ”„ Retrying now...');
1395
1400
  retryCount++;
package/src/codex.lib.mjs CHANGED
@@ -15,7 +15,7 @@ const os = (await use('os')).default;
15
15
  // Import log from general lib
16
16
  import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
- import { timeouts } from './config.lib.mjs';
18
+ import { timeouts, retryLimits } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
21
21
  import { mapModelToId, resolveCodexReasoningEffort } from './codex.options.lib.mjs';
@@ -24,6 +24,7 @@ import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
24
24
  import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs';
25
25
  import { fetchModelInfo } from './model-info.lib.mjs';
26
26
  import { defaultModels } from './models/index.mjs';
27
+ import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
27
28
  import Decimal from 'decimal.js-light';
28
29
 
29
30
  const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -648,12 +649,11 @@ export const executeCodex = async params => {
648
649
  };
649
650
 
650
651
  export const executeCodexCommand = async params => {
651
- const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing } = params;
652
+ const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, codexPath, $, owner, repo, prNumber, calculatePricing = calculateCodexPricing, waitForRetryDelay = waitWithCountdown } = params;
652
653
 
653
654
  const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
654
655
 
655
656
  // Retry configuration
656
- const maxRetries = 3;
657
657
  let retryCount = 0;
658
658
 
659
659
  const executeWithRetry = async () => {
@@ -661,7 +661,7 @@ export const executeCodexCommand = async params => {
661
661
  if (retryCount === 0) {
662
662
  await log(`\n${formatAligned('šŸ¤–', 'Executing Codex:', argv.model.toUpperCase())}`);
663
663
  } else {
664
- await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
664
+ await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
665
665
  }
666
666
 
667
667
  if (argv.verbose) {
@@ -711,7 +711,7 @@ export const executeCodexCommand = async params => {
711
711
  let codexArgs = 'exec';
712
712
  if (isResumeMode) {
713
713
  await log(`šŸ”„ Resuming from session: ${argv.resume}`);
714
- codexArgs += ` resume ${shellQuote(argv.resume)}`;
714
+ codexArgs += ` resume ${shellQuote(argv.resume)} --model ${shellQuote(mappedModel)}`;
715
715
  } else {
716
716
  codexArgs += ` --model ${shellQuote(mappedModel)}`;
717
717
  }
@@ -930,6 +930,7 @@ export const executeCodexCommand = async params => {
930
930
  const codexErrorSummary = getCodexErrorEventSummary(codexJsonState);
931
931
  if (codexErrorSummary.hasError) {
932
932
  const limitInfo = detectUsageLimit(codexErrorSummary.message || lastMessage);
933
+ const retryableError = classifyRetryableError(codexErrorSummary.message || lastMessage);
933
934
  if (limitInfo.isUsageLimit) {
934
935
  limitReached = true;
935
936
  limitResetTime = limitInfo.resetTime;
@@ -944,6 +945,25 @@ export const executeCodexCommand = async params => {
944
945
  for (const line of messageLines) {
945
946
  await log(line, { level: 'warning' });
946
947
  }
948
+ } else if (retryableError.isRetryable) {
949
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
950
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
951
+ if (retryCount < maxRetries) {
952
+ const delay = getRetryDelayMs({
953
+ retryCount,
954
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
955
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
956
+ });
957
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
958
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
959
+ if (sessionId && !argv.resume) argv.resume = sessionId;
960
+ await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
961
+ await waitForRetryDelay(delay, log);
962
+ await log('\nšŸ”„ Retrying now...');
963
+ retryCount++;
964
+ return await executeWithRetry();
965
+ }
966
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
947
967
  } else {
948
968
  await log(`\n\nāŒ Codex emitted error event: ${codexErrorSummary.message}`, { level: 'error' });
949
969
  await log(` Error events: item=${codexErrorSummary.counts.item}, turn=${codexErrorSummary.counts.turn}, stream=${codexErrorSummary.counts.stream}`, { level: 'error' });
@@ -971,6 +991,28 @@ export const executeCodexCommand = async params => {
971
991
  }
972
992
 
973
993
  if (exitCode !== 0) {
994
+ const retryableError = classifyRetryableError(lastMessage);
995
+ if (retryableError.isRetryable) {
996
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
997
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
998
+ if (retryCount < maxRetries) {
999
+ const delay = getRetryDelayMs({
1000
+ retryCount,
1001
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
1002
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
1003
+ });
1004
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
1005
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
1006
+ if (sessionId && !argv.resume) argv.resume = sessionId;
1007
+ await maybeSwitchToFallbackModel({ tool: 'codex', argv, log, errorMessage: retryableError.message });
1008
+ await waitForRetryDelay(delay, log);
1009
+ await log('\nšŸ”„ Retrying now...');
1010
+ retryCount++;
1011
+ return await executeWithRetry();
1012
+ }
1013
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
1014
+ }
1015
+
974
1016
  // Check for usage limit errors first (more specific)
975
1017
  const limitInfo = detectUsageLimit(lastMessage);
976
1018
  if (limitInfo.isUsageLimit) {
@@ -12,7 +12,7 @@ const HIVE_ONLY_OPTION_NAMES = new Set(['monitor-tag', 'all-issues', 'skip-issue
12
12
 
13
13
  // Solve-only options that should NOT be registered in hive
14
14
  // (they are internal to solve and not meaningful when passed from hive)
15
- const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type']);
15
+ const SOLVE_ONLY_OPTION_NAMES = new Set(['resume', 'working-directory', 'only-prepare-command', 'session-type', 'auto-resume-iteration']);
16
16
 
17
17
  // Options that hive defines with different defaults/descriptions than solve.
18
18
  // These are registered manually in hive config to preserve hive-specific behavior.
package/src/hive.mjs CHANGED
@@ -464,6 +464,9 @@ if (isRunningDirectly) {
464
464
  // Validate model names EARLY (simple string check, always runs)
465
465
  const tool = argv.tool || 'claude';
466
466
  await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
467
+ if (argv.fallbackModel) {
468
+ await validateAndExitOnInvalidModel(argv.fallbackModel, tool, safeExit);
469
+ }
467
470
  if (argv.planModel) {
468
471
  if (tool !== 'claude') {
469
472
  await log(`āŒ --plan-model is only supported with --tool claude (current tool: ${tool})`, { level: 'error' });
@@ -22,6 +22,8 @@ const { $ } = await use('command-stream');
22
22
 
23
23
  // Valid isolation backends
24
24
  const VALID_ISOLATION_BACKENDS = ['screen', 'tmux', 'docker'];
25
+ const RUNNING_SESSION_STATUSES = new Set(['executing', 'running']);
26
+ const TERMINAL_SESSION_STATUSES = new Set(['executed', 'completed', 'failed', 'cancelled', 'canceled', 'error']);
25
27
 
26
28
  /**
27
29
  * Generate a UUID v4 for unique session identification
@@ -31,6 +33,76 @@ export function generateSessionId() {
31
33
  return crypto.randomUUID();
32
34
  }
33
35
 
36
+ /**
37
+ * Parse output from `$ --status <session>`.
38
+ *
39
+ * start-command versions used in the wild may return JSON when
40
+ * `--output-format json` is supported, or human-readable key/value text.
41
+ * Keep the parser tolerant so completion monitoring survives either format.
42
+ *
43
+ * @param {string} output - Raw stdout from `$ --status`
44
+ * @returns {{exists: boolean, uuid: string|null, status: string|null, exitCode: number|null, startTime: string|null, endTime: string|null, currentTime: string|null, raw: string}}
45
+ */
46
+ export function parseSessionStatusOutput(output) {
47
+ const raw = (output || '').trim();
48
+ if (!raw) {
49
+ return { exists: false, uuid: null, status: null, exitCode: null, startTime: null, endTime: null, currentTime: null, raw: '' };
50
+ }
51
+
52
+ try {
53
+ const parsed = JSON.parse(raw);
54
+ const data = Array.isArray(parsed) ? parsed[0] : parsed;
55
+ return {
56
+ exists: true,
57
+ uuid: data?.uuid || null,
58
+ status: typeof data?.status === 'string' ? data.status.toLowerCase() : null,
59
+ exitCode: data?.exitCode !== undefined && data?.exitCode !== null ? Number(data.exitCode) : null,
60
+ startTime: data?.startTime || null,
61
+ endTime: data?.endTime || null,
62
+ currentTime: data?.currentTime || null,
63
+ raw,
64
+ };
65
+ } catch {
66
+ // Fall through to text parsing.
67
+ }
68
+
69
+ const firstLine =
70
+ raw
71
+ .split('\n')
72
+ .find(line => line.trim() && !line.includes(' '))
73
+ ?.trim() || null;
74
+ const readField = name => {
75
+ const match = raw.match(new RegExp(`^\\s*${name}\\s+"?([^"\\n]+)"?\\s*$`, 'mi'));
76
+ return match ? match[1].trim() : null;
77
+ };
78
+
79
+ const status = readField('status')?.toLowerCase() || null;
80
+ const exitCodeText = readField('exitCode');
81
+
82
+ return {
83
+ exists: Boolean(status || firstLine),
84
+ uuid: readField('uuid') || firstLine,
85
+ status,
86
+ exitCode: exitCodeText !== null ? Number(exitCodeText) : null,
87
+ startTime: readField('startTime'),
88
+ endTime: readField('endTime'),
89
+ currentTime: readField('currentTime'),
90
+ raw,
91
+ };
92
+ }
93
+
94
+ export function isExecutingSessionStatus(status) {
95
+ return RUNNING_SESSION_STATUSES.has(String(status || '').toLowerCase());
96
+ }
97
+
98
+ export function isTerminalSessionStatus(status) {
99
+ return TERMINAL_SESSION_STATUSES.has(String(status || '').toLowerCase());
100
+ }
101
+
102
+ export function shouldFallbackToScreenStatus(statusResult) {
103
+ return !statusResult?.exists || !statusResult?.status;
104
+ }
105
+
34
106
  /**
35
107
  * Find the `$` CLI binary path
36
108
  * @returns {Promise<string|null>} Path to `$` binary or null
@@ -133,7 +205,7 @@ export async function executeWithIsolation(command, args, options = {}) {
133
205
  *
134
206
  * @param {string} sessionId - UUID of the session to check
135
207
  * @param {boolean} [verbose] - Enable verbose logging
136
- * @returns {Promise<{exists: boolean, status: string|null, exitCode: number|null, raw: string}>}
208
+ * @returns {Promise<{exists: boolean, uuid: string|null, status: string|null, exitCode: number|null, startTime: string|null, endTime: string|null, currentTime: string|null, raw: string}>}
137
209
  */
138
210
  export async function querySessionStatus(sessionId, verbose = false) {
139
211
  const binPath = await findStartCommandBinary();
@@ -141,7 +213,7 @@ export async function querySessionStatus(sessionId, verbose = false) {
141
213
  if (verbose) {
142
214
  console.log('[VERBOSE] isolation-runner: Cannot query status - $ binary not found');
143
215
  }
144
- return { exists: false, status: null, exitCode: null, raw: '' };
216
+ return { exists: false, uuid: null, status: null, exitCode: null, startTime: null, endTime: null, currentTime: null, raw: '' };
145
217
  }
146
218
 
147
219
  try {
@@ -153,30 +225,12 @@ export async function querySessionStatus(sessionId, verbose = false) {
153
225
  console.log(`[VERBOSE] isolation-runner: Status query result: ${stdout.substring(0, 300)}`);
154
226
  }
155
227
 
156
- try {
157
- const data = JSON.parse(stdout);
158
- return {
159
- exists: true,
160
- status: data.status || null,
161
- exitCode: data.exitCode !== undefined ? data.exitCode : null,
162
- raw: stdout,
163
- };
164
- } catch {
165
- // If JSON parsing fails, try text-based detection
166
- const isExecuting = stdout.includes('executing');
167
- const isExecuted = stdout.includes('executed');
168
- return {
169
- exists: isExecuting || isExecuted,
170
- status: isExecuting ? 'executing' : isExecuted ? 'executed' : null,
171
- exitCode: null,
172
- raw: stdout,
173
- };
174
- }
228
+ return parseSessionStatusOutput(stdout);
175
229
  } catch (error) {
176
230
  if (verbose) {
177
231
  console.log(`[VERBOSE] isolation-runner: Status query error: ${error.message}`);
178
232
  }
179
- return { exists: false, status: null, exitCode: null, raw: '' };
233
+ return { exists: false, uuid: null, status: null, exitCode: null, startTime: null, endTime: null, currentTime: null, raw: '' };
180
234
  }
181
235
  }
182
236
 
@@ -222,16 +276,21 @@ export async function isSessionRunning(sessionId, options = {}) {
222
276
  const { backend, verbose = false } = opts;
223
277
 
224
278
  const result = await querySessionStatus(sessionId, verbose);
225
- if (result.exists && result.status === 'executing') {
226
- return true;
279
+ if (result.exists && result.status) {
280
+ if (isExecutingSessionStatus(result.status)) {
281
+ return true;
282
+ }
283
+ if (isTerminalSessionStatus(result.status)) {
284
+ return false;
285
+ }
227
286
  }
228
287
 
229
288
  // Fallback: for screen backend, check screen -ls directly.
230
- // This works around start-command bugs where:
289
+ // Only use this when $ --status has no usable record. This works around
290
+ // older start-command bugs where:
231
291
  // 1. $ --status can't find session by --session name (only by internal UUID)
232
- // 2. $ --status reports "executed" immediately for --detached screen sessions
233
292
  // See: https://github.com/link-assistant/hive-mind/issues/1545
234
- if (backend === 'screen') {
293
+ if (backend === 'screen' && shouldFallbackToScreenStatus(result)) {
235
294
  const screenRunning = await checkScreenSessionRunning(sessionId, verbose);
236
295
  if (screenRunning && verbose) {
237
296
  console.log(`[VERBOSE] isolation-runner: $ --status says not running, but screen -ls confirms session '${sessionId}' is still active`);
@@ -905,6 +905,23 @@ export const resolveModelId = (requestedModel, tool) => {
905
905
  }
906
906
  };
907
907
 
908
+ export const defaultFallbackModels = {
909
+ claude: {
910
+ 'claude-opus-4-7': 'opus-4-6',
911
+ },
912
+ codex: {
913
+ 'gpt-5.5': 'gpt-5.4',
914
+ },
915
+ };
916
+
917
+ export const resolveDefaultFallbackModel = (tool, model) => {
918
+ if (!model) return null;
919
+
920
+ const toolName = (tool || 'claude').toString().toLowerCase();
921
+ const resolvedModel = resolveModelId(model, toolName);
922
+ return defaultFallbackModels[toolName]?.[resolvedModel] || null;
923
+ };
924
+
908
925
  /**
909
926
  * Fetch model info and build the complete model information string for PR comments.
910
927
  * Uses actual models from CLI JSON output when available.
@@ -15,13 +15,14 @@ const os = (await use('os')).default;
15
15
  // Import log from general lib
16
16
  import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
- import { timeouts } from './config.lib.mjs';
18
+ import { timeouts, retryLimits } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
20
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
21
21
  import { opencodeModels, defaultModels } from './models/index.mjs';
22
22
  import { checkPlaywrightMcpPackageAvailability, getOpenCodePlaywrightMcpDisableEnv } from './playwright-mcp.lib.mjs';
23
23
  import { createAgentTokenUsage, accumulateAgentStepFinishUsage, parseAgentTokenUsage as parseOpenCodeTokenUsage } from './agent-token-usage.lib.mjs';
24
24
  import { calculateAgentPricing } from './agent.lib.mjs';
25
+ import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
25
26
 
26
27
  export { parseOpenCodeTokenUsage };
27
28
 
@@ -184,10 +185,9 @@ export const executeOpenCode = async params => {
184
185
  };
185
186
 
186
187
  export const executeOpenCodeCommand = async params => {
187
- const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $ } = params;
188
+ const { tempDir, branchName, prompt, systemPrompt, argv, log, formatAligned, getResourceSnapshot, forkedRepo, feedbackLines, opencodePath, $, waitForRetryDelay = waitWithCountdown } = params;
188
189
 
189
190
  // Retry configuration
190
- const maxRetries = 3;
191
191
  let retryCount = 0;
192
192
 
193
193
  const executeWithRetry = async () => {
@@ -195,7 +195,7 @@ export const executeOpenCodeCommand = async params => {
195
195
  if (retryCount === 0) {
196
196
  await log(`\n${formatAligned('šŸ¤–', 'Executing OpenCode:', argv.model.toUpperCase())}`);
197
197
  } else {
198
- await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${maxRetries}`)}`);
198
+ await log(`\n${formatAligned('šŸ”„', 'Retry attempt:', `${retryCount}/${retryLimits.maxTransientErrorRetries}`)}`);
199
199
  }
200
200
 
201
201
  if (argv.verbose) {
@@ -265,7 +265,7 @@ export const executeOpenCodeCommand = async params => {
265
265
 
266
266
  if (argv.resume) {
267
267
  await log(`šŸ”„ Resuming from session: ${argv.resume}`);
268
- opencodeArgs = `run --format json --resume ${argv.resume} --model ${mappedModel}`;
268
+ opencodeArgs = `run --format json --session ${argv.resume} --model ${mappedModel}`;
269
269
  }
270
270
 
271
271
  // For OpenCode, we pass the prompt via stdin
@@ -301,7 +301,7 @@ export const executeOpenCodeCommand = async params => {
301
301
  cwd: tempDir,
302
302
  mirror: false,
303
303
  env: opencodeEnv,
304
- })`cat ${promptFile} | ${opencodePath} run --format json --resume ${argv.resume} --model ${mappedModel}`;
304
+ })`cat ${promptFile} | ${opencodePath} run --format json --session ${argv.resume} --model ${mappedModel}`;
305
305
  } else {
306
306
  execCommand = $({
307
307
  cwd: tempDir,
@@ -470,6 +470,28 @@ export const executeOpenCodeCommand = async params => {
470
470
  }
471
471
 
472
472
  if (exitCode !== 0) {
473
+ const retryableError = classifyRetryableError(allOutput || lastMessage);
474
+ if (retryableError.isRetryable) {
475
+ const isRequestTimeoutRetry = retryableError.label === 'Request timeout';
476
+ const maxRetries = isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
477
+ if (retryCount < maxRetries) {
478
+ const delay = getRetryDelayMs({
479
+ retryCount,
480
+ initialDelayMs: isRequestTimeoutRetry ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs,
481
+ maxDelayMs: isRequestTimeoutRetry ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs,
482
+ });
483
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
484
+ await log(`\nāš ļø ${retryableError.label} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${sessionId ? ' (session preserved)' : ''}...`, { level: 'warning' });
485
+ if (sessionId && !argv.resume) argv.resume = sessionId;
486
+ await maybeSwitchToFallbackModel({ tool: 'opencode', argv, log, errorMessage: retryableError.message });
487
+ await waitForRetryDelay(delay, log);
488
+ await log('\nšŸ”„ Retrying now...');
489
+ retryCount++;
490
+ return await executeWithRetry();
491
+ }
492
+ await log(`\n\nāŒ ${retryableError.label} persisted after ${maxRetries} retries`, { level: 'error' });
493
+ }
494
+
473
495
  // Check for usage limit errors first (more specific)
474
496
  const limitInfo = detectUsageLimit(lastMessage);
475
497
  if (limitInfo.isUsageLimit) {
@@ -203,6 +203,7 @@ const KNOWN_OPTION_NAMES = [
203
203
  'allow-to-push-to-contributors-pull-requests-as-maintainer',
204
204
  'prefix-fork-name-with-owner-name',
205
205
  'auto-restart-max-iterations',
206
+ 'auto-resume-max-iterations',
206
207
  'auto-continue-only-on-new-comments',
207
208
  'auto-restart-on-limit-reset',
208
209
  'auto-restart-on-non-updated-pull-request-description',