npm - @link-assistant/hive-mind - Versions diffs - 1.74.11 → 1.75.0 - Mend

@link-assistant/hive-mind 1.74.11 → 1.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +48 -0
package/package.json +1 -1
package/src/option-suggestions.lib.mjs +4 -0
package/src/solve.config.lib.mjs +29 -0
package/src/solve.keep-working.detect.lib.mjs +223 -0
package/src/solve.keep-working.lib.mjs +285 -0
package/src/solve.mjs +13 -13
package/src/tool-retry.lib.mjs +14 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,53 @@
 # @link-assistant/hive-mind
+## 1.75.0
+### Minor Changes
+- d2adf6b: feat(solve): experimental `--keep-working-until-all-requirements-are-fully-done` (#1883)
+  Add an experimental `solve` option that, after the main run (and any `--finalize`
+  pass), scans three cheap sources — the pull request description, the AI solution
+  summary, and the added lines of changed markdown documents — for strong
+  indicators of deferred work ("out of scope", "future work", "follow-up PR",
+  "deferred", "delayed", "TODO"/"TBD", etc.) using ~14 regular expressions. When
+  indicators are found it auto-restarts the AI tool with the concrete detected
+  reasons plus a verbatim reinforcement prompt, and repeats until the scan is clean
+  or the restart limit is reached.
+  Limit semantics:
+  - `--keep-working-until-all-requirements-are-fully-done` (bare) → 5 restarts
+  - `... 3` → an explicit count
+  - `... forever` / `unlimited` / `infinite` / `0` → no limit (with a hard cap of 3
+    consecutive errors as a safety net)
+  Aliases: `--keep-going-until-all-requirements-are-fully-done`, `--keep-working`,
+  `--keep-going`.
+  Detection lives in a pure, network-free module
+  (`src/solve.keep-working.detect.lib.mjs`) for full unit-test coverage;
+  orchestration lives in `src/solve.keep-working.lib.mjs`. A deep case study is
+  compiled under `docs/case-studies/issue-1883/`.
+## 1.74.12
+### Patch Changes
+- e921b34: fix(retry): treat "socket connection was closed unexpectedly" as a transient, retryable error (#1881)
+  The Claude/Codex CLI surfaces transient network disconnects (the Anthropic SDK's
+  underlying `fetch()` socket dropping mid-stream) as a synthetic error:
+  `API Error: The socket connection was closed unexpectedly.` Previously
+  `classifyRetryableError()` did not recognise this family of errors, so a single
+  dropped socket aborted the entire solve session (exit code 1, zero retries) and
+  discarded all in-progress work. These socket/connection drops
+  (`socket connection was closed unexpectedly`, `socket hang up`, `ECONNRESET`,
+  `connection reset`, `Connection error`, `fetch failed`, `network connection lost`)
+  are now classified as retryable, so the session is retried with `--resume`
+  (context preserved) via the existing exponential-backoff path. Because
+  `classifyRetryableError` is the shared classifier, the fix covers the Claude,
+  Codex and Agent execution loops at once.
 ## 1.74.11
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.74.11",
+  "version": "1.75.0",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/option-suggestions.lib.mjs CHANGED Viewed

@@ -221,6 +221,10 @@ const KNOWN_OPTION_NAMES = [
   'prompt-ensure-all-requirements-are-met',
   'finalize',
   'finalize-model',
+  'keep-working-until-all-requirements-are-fully-done',
+  'keep-going-until-all-requirements-are-fully-done',
+  'keep-working',
+  'keep-going',
 ];
 /**

package/src/solve.config.lib.mjs CHANGED Viewed

@@ -586,6 +586,12 @@ export const SOLVE_OPTION_DEFINITIONS = {
     description: '[EXPERIMENTAL] Model to use for --finalize iterations. Defaults to the same model as --model.',
     default: undefined,
   },
+  'keep-working-until-all-requirements-are-fully-done': {
+    type: 'string',
+    description: '[EXPERIMENTAL] After the main solve completes, scan the pull request description, the AI solution summary and changed markdown documents for strong indicators of deferred/delayed/out-of-scope work (e.g. "future work", "out of scope", "deferred", "follow-up PR", "TODO") and automatically restart the AI tool to finish everything in this single pull request. Accepts a number of restarts (default: 5), or "forever"/"unlimited" to remove the limit. Bare flag means the default of 5.',
+    alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
+    default: undefined,
+  },
   'working-session-live-progress': {
     type: 'string',
     description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
@@ -836,6 +842,29 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
     }
   }
+  // --keep-working-until-all-requirements-are-fully-done normalization
+  // Issue #1883: the flag accepts a number of restarts, the keywords
+  // "forever"/"unlimited"/"infinite", or no value (bare flag => default of 5).
+  // We canonicalize the bare-flag / empty-string case here so downstream
+  // detection (normalizeKeepWorkingLimit) sees a meaningful value. Final
+  // numeric normalization happens at runtime in solve.keep-working.lib.mjs.
+  {
+    const keepWorkingAliases = ['--keep-working-until-all-requirements-are-fully-done', '--keep-going-until-all-requirements-are-fully-done', '--keep-working', '--keep-going'];
+    const keepWorkingProvided = keepWorkingAliases.some(alias => hasRawOption(rawArgs, alias));
+    if (keepWorkingProvided) {
+      const current = argv.keepWorkingUntilAllRequirementsAreFullyDone;
+      // Bare flag (no value) -> yargs may yield true or an empty string; treat as default count.
+      if (current === true || current === '' || current === undefined || current === null) {
+        argv.keepWorkingUntilAllRequirementsAreFullyDone = 5;
+      } else if (typeof current === 'string') {
+        argv.keepWorkingUntilAllRequirementsAreFullyDone = current.trim();
+      }
+    } else if (argv.keepWorkingUntilAllRequirementsAreFullyDone === undefined) {
+      // Not provided: keep it disabled (do not coerce the string-type default).
+      argv.keepWorkingUntilAllRequirementsAreFullyDone = undefined;
+    }
+  }
   // --working-session-live-progress normalization
   // When passed as --working-session-live-progress (no value), yargs gives true for string type
   // Normalize: true → "comment", validate known values

package/src/solve.keep-working.detect.lib.mjs ADDED Viewed

@@ -0,0 +1,223 @@
+#!/usr/bin/env node
+/**
+ * Pure detection + normalization helpers for the keep-working feature.
+ *
+ * This module intentionally has NO use-m / command-stream / network imports so
+ * it can be unit-tested in isolation (mirroring auto-iteration-limits.lib.mjs).
+ * The orchestration lives in solve.keep-working.lib.mjs.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
+ */
+/**
+ * The default number of auto-restarts when the feature is enabled without an
+ * explicit count.
+ */
+export const DEFAULT_KEEP_WORKING_LIMIT = 5;
+/**
+ * The reinforcement prompt appended to every keep-working restart, in addition
+ * to the concrete detected reasons. Taken verbatim from issue #1883.
+ */
+export const KEEP_WORKING_PROMPT = 'Please plan and execute everything in this single pull request, you have unlimited time and context, as context auto-compacts and you can continue indefinitely, until it is each and every requirement fully addressed, and everything is totally done.';
+/**
+ * Strong indicators that work was deferred / delayed / left for a future pull
+ * request. These intentionally favour recall over precision: when the user
+ * enables --keep-working-until-all-requirements-are-fully-done they explicitly
+ * want the AI to keep going, so we accept some false positives (issue #1883).
+ *
+ * Each entry has a human-readable `label` (shown to the user / AI as the reason
+ * for the restart) and a `pattern` (a global, case-insensitive RegExp).
+ *
+ * IMPORTANT: keep these patterns anchored on deferral semantics so the
+ * reinforcement prompt itself ("until it is each and every requirement fully
+ * addressed") does NOT match and cause an infinite restart loop.
+ */
+export const DEFERRED_WORK_PATTERNS = [
+  { label: 'out of scope', pattern: /\b(?:out[\s-]of[\s-]scope|beyond\s+the\s+scope|outside\s+the\s+scope|not\s+(?:in|within)\s+(?:the\s+)?scope)\b/gi },
+  { label: 'future work', pattern: /\bfuture\s+(?:work|improvements?|enhancements?|iterations?|steps?|considerations?)\b/gi },
+  { label: 'future / separate / follow-up pull request', pattern: /\b(?:in\s+a\s+|a\s+)?(?:future|separate|subsequent|later|next|follow[\s-]?up|another)\s+(?:pull\s+request|pr|mr|merge\s+request|change(?:set)?|commit)\b/gi },
+  { label: 'follow-up work', pattern: /\bfollow[\s-]?up(?:\s+(?:work|task|item|pr|pull\s+request|issue))?\b/gi },
+  { label: 'deferred', pattern: /\bdefer(?:red|ring|s)?\b(?!\s+to\s+the\s+caller)/gi },
+  { label: 'delayed / postponed', pattern: /\b(?:delayed|postponed|postpone|deprioriti[sz]ed)\b/gi },
+  { label: 'planned for later / another pull request', pattern: /\bplanned\s+for\s+(?:a\s+)?(?:future|later|the\s+next|another|separate|subsequent)\b/gi },
+  { label: 'left / leaving for later', pattern: /\ble(?:ft|aving|ave)\s+(?:it\s+|this\s+|that\s+|them\s+)?(?:for\s+(?:later|now|the\s+future)|as\s+(?:a\s+)?(?:future|follow[\s-]?up))/gi },
+  { label: 'will be addressed later / separately', pattern: /\b(?:will|to)\s+be\s+(?:addressed|handled|implemented|done|tackled|covered|completed|fixed)\s+(?:later|separately|in\s+(?:a\s+)?(?:future|subsequent|separate|follow[\s-]?up|another|the\s+next))/gi },
+  { label: 'not implemented yet', pattern: /\bnot\s+(?:yet\s+)?(?:implemented|done|completed|finished|addressed|supported|covered)(?:\s+yet)?\b/gi },
+  { label: 'to be implemented / TBD', pattern: /\b(?:to\s+be\s+(?:implemented|done|added|determined|decided)|tbd|to[\s-]?dos?|fixme)\b/gi },
+  { label: 'remaining work / not covered', pattern: /\b(?:remaining\s+(?:work|tasks?|items?)|not\s+covered\s+(?:here|in\s+this\s+(?:pr|pull\s+request|change))|won['’]?t\s+(?:be\s+)?(?:covered|implemented|addressed|done)(?:\s+here)?)\b/gi },
+  { label: 'tracked separately / in a separate issue', pattern: /\btrack(?:ed|ing)?\s+(?:this\s+|it\s+|them\s+|separately\s+)?(?:in\s+)?(?:a\s+)?(?:separate|new|future|follow[\s-]?up)\s+(?:issue|ticket|task)\b/gi },
+  { label: 'for now / as a stopgap / temporary', pattern: /\b(?:for\s+now|as\s+a\s+(?:stop[\s-]?gap|temporary\s+measure|first\s+step)|in\s+the\s+meantime)\b/gi },
+];
+const UNLIMITED_KEYWORDS = new Set(['forever', 'unlimited', 'infinite', 'infinity', 'inf', 'no-limit', 'nolimit', 'none', 'always']);
+/**
+ * Returns true when a raw flag value requests an unlimited number of restarts.
+ * @param {*} value
+ * @returns {boolean}
+ */
+export const isUnlimitedKeepWorking = value => {
+  if (value === Infinity) return true;
+  if (typeof value === 'number') return value === 0;
+  if (typeof value === 'string') {
+    const normalized = value.trim().toLowerCase();
+    if (UNLIMITED_KEYWORDS.has(normalized)) return true;
+    if (normalized === '0') return true;
+  }
+  return false;
+};
+/**
+ * Normalize the --keep-working-until-all-requirements-are-fully-done flag value
+ * into a numeric restart limit.
+ *
+ *  - boolean true (flag without value) -> DEFAULT_KEEP_WORKING_LIMIT (5)
+ *  - "forever" / "unlimited" / "infinite" / "0" / 0 -> Infinity (no limit)
+ *  - a positive number / numeric string -> floor(value)
+ *  - anything invalid -> DEFAULT_KEEP_WORKING_LIMIT (5)
+ *  - falsy (undefined / null / false / "") -> 0 (feature disabled)
+ *
+ * @param {*} value
+ * @param {number} [fallback=DEFAULT_KEEP_WORKING_LIMIT]
+ * @returns {number} numeric limit (Infinity for unlimited, 0 when disabled)
+ */
+export const normalizeKeepWorkingLimit = (value, fallback = DEFAULT_KEEP_WORKING_LIMIT) => {
+  // Disabled
+  if (value === undefined || value === null || value === false || value === '') {
+    return 0;
+  }
+  // Flag provided without a value
+  if (value === true) return fallback;
+  // Unlimited keywords / 0
+  if (isUnlimitedKeepWorking(value)) return Infinity;
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 1) return fallback;
+  return Math.floor(parsed);
+};
+/**
+ * Human readable description of the limit for logs.
+ * @param {number} limit
+ * @returns {string}
+ */
+export const formatKeepWorkingLimit = limit => (limit === Infinity ? 'unlimited' : `${limit}`);
+/**
+ * Scan a single block of text for deferred-work indicators.
+ *
+ * @param {string} text - the text to scan
+ * @param {string} [source='text'] - a label describing where the text came from
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
+ */
+export const detectDeferredWork = (text, source = 'text') => {
+  if (!text || typeof text !== 'string') return [];
+  const detections = [];
+  const seen = new Set();
+  for (const { label, pattern } of DEFERRED_WORK_PATTERNS) {
+    // Reset lastIndex because patterns are global and reused across calls.
+    pattern.lastIndex = 0;
+    let match;
+    while ((match = pattern.exec(text)) !== null) {
+      const matchedText = match[0];
+      // Build a short snippet around the match for context.
+      const start = Math.max(0, match.index - 40);
+      const end = Math.min(text.length, match.index + matchedText.length + 40);
+      const snippet = text.slice(start, end).replace(/\s+/g, ' ').trim();
+      // De-duplicate identical (label + snippet) hits within a single source.
+      const key = `${label}::${snippet.toLowerCase()}`;
+      if (!seen.has(key)) {
+        seen.add(key);
+        detections.push({ label, match: matchedText, snippet, source });
+      }
+      // Guard against zero-length matches causing an infinite loop.
+      if (pattern.lastIndex === match.index) pattern.lastIndex++;
+    }
+    pattern.lastIndex = 0;
+  }
+  return detections;
+};
+/**
+ * Run all configured sources through the detector and return a flat list of
+ * detections.
+ *
+ * @param {Array<{source: string, text: string}>} sources
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
+ */
+export const detectDeferredWorkInSources = sources => {
+  const detections = [];
+  for (const { source, text } of sources || []) {
+    detections.push(...detectDeferredWork(text, source));
+  }
+  return detections;
+};
+/**
+ * Extract the added lines (lines beginning with "+") from a unified diff patch.
+ * @param {string} patch
+ * @returns {string}
+ */
+export const extractAddedLinesFromPatch = patch => {
+  if (!patch || typeof patch !== 'string') return '';
+  return patch
+    .split('\n')
+    .filter(line => line.startsWith('+') && !line.startsWith('+++'))
+    .map(line => line.slice(1))
+    .join('\n');
+};
+/**
+ * Build the feedback lines for a keep-working restart iteration.
+ *
+ * @param {Array<{label, snippet, source}>} detections
+ * @param {number} iteration
+ * @param {number} limit
+ * @returns {string[]}
+ */
+export const buildKeepWorkingFeedback = (detections, iteration, limit) => {
+  const limitLabel = formatKeepWorkingLimit(limit);
+  const lines = ['', '='.repeat(60), `🔁 KEEP WORKING UNTIL ALL REQUIREMENTS ARE FULLY DONE (restart ${iteration}/${limitLabel}):`, '='.repeat(60), '', 'It looks like some work was deferred, delayed or planned for a future pull request.', 'The following strong indicators of unfinished / deferred work were detected:', ''];
+  // Show up to 15 distinct detected reasons to keep the prompt focused.
+  const shown = (detections || []).slice(0, 15);
+  for (const detection of shown) {
+    lines.push(`  • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
+  }
+  if ((detections || []).length > shown.length) {
+    lines.push(`  • ...and ${detections.length - shown.length} more indicator(s)`);
+  }
+  lines.push('');
+  lines.push('There is NO future pull request. This is the single pull request where everything must be done.');
+  lines.push('Do not defer, delay or postpone anything. Remove any "future work" / "out of scope" / "TODO" / "follow-up" notes by actually implementing them now.');
+  lines.push('');
+  lines.push(KEEP_WORKING_PROMPT);
+  lines.push('');
+  return lines;
+};
+export default {
+  DEFAULT_KEEP_WORKING_LIMIT,
+  KEEP_WORKING_PROMPT,
+  DEFERRED_WORK_PATTERNS,
+  isUnlimitedKeepWorking,
+  normalizeKeepWorkingLimit,
+  formatKeepWorkingLimit,
+  detectDeferredWork,
+  detectDeferredWorkInSources,
+  extractAddedLinesFromPatch,
+  buildKeepWorkingFeedback,
+};

package/src/solve.keep-working.lib.mjs ADDED Viewed

@@ -0,0 +1,285 @@
+#!/usr/bin/env node
+/**
+ * Keep-working-until-done module for solve.mjs
+ *
+ * [EXPERIMENTAL] When --keep-working-until-all-requirements-are-fully-done is
+ * enabled, after the main solve (and any other post-processing) completes, this
+ * module scans the pull request description, the AI working-session/solution
+ * summary, and the markdown documents changed by the pull request for strong
+ * indicators that the AI deferred, delayed or postponed work to a future pull
+ * request / iteration (e.g. "out of scope", "future work", "deferred",
+ * "follow-up PR", "TODO", ...).
+ *
+ * When such indicators are found, it automatically restarts the AI tool with a
+ * prompt instructing it to finish everything in this single pull request, in
+ * addition to the concrete detected reasons. It keeps restarting until no
+ * indicators remain or until the configured restart limit is reached.
+ *
+ * By default the restart limit is 5. The limit can be set to a custom number,
+ * or to "forever" / "unlimited" / "infinite" / 0 to remove the limit entirely.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
+ */
+// Check if use is already defined globally (when imported from solve.mjs)
+// If not, fetch it (when running standalone)
+if (typeof globalThis.use === 'undefined') {
+  globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
+}
+const use = globalThis.use;
+// Use command-stream for consistent $ behavior across runtimes
+const { $: __rawDollar$ } = await use('command-stream');
+const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
+const $ = wrapDollarWithGhRetry(__rawDollar$);
+// Import shared library functions
+const lib = await import('./lib.mjs');
+const { log, cleanErrorMessage } = lib;
+// Import shared restart utilities
+const restartShared = await import('./solve.restart-shared.lib.mjs');
+const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
+const sentryLib = await import('./sentry.lib.mjs');
+const { reportError } = sentryLib;
+// Pure detection + normalization helpers live in a separate, network-free
+// module so they can be unit-tested in isolation (issue #1883).
+const detectLib = await import('./solve.keep-working.detect.lib.mjs');
+const { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, extractAddedLinesFromPatch, buildKeepWorkingFeedback } = detectLib;
+// Re-export the pure helpers so existing importers of this module keep working.
+export { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, buildKeepWorkingFeedback };
+/**
+ * Collect the text sources to scan for deferred-work indicators:
+ *   1. The pull request description (body).
+ *   2. The AI working-session / solution summary (passed in-memory).
+ *   3. The markdown documents changed by the pull request (added lines only).
+ *
+ * @param {object} params
+ * @returns {Promise<Array<{source: string, text: string}>>}
+ */
+export const collectDeferredWorkSources = async ({ owner, repo, prNumber, resultSummary }) => {
+  const sources = [];
+  // 1. Pull request description
+  try {
+    const prResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.body // ""'`;
+    if (prResult.code === 0) {
+      const body = prResult.stdout.toString();
+      if (body && body.trim()) {
+        sources.push({ source: 'pull request description', text: body });
+      }
+    }
+  } catch (error) {
+    reportError(error, { context: 'keep_working_collect_pr_body', owner, repo, prNumber, operation: 'fetch_pr_body' });
+  }
+  // 2. AI working-session / solution summary (in-memory, no token cost)
+  if (resultSummary && typeof resultSummary === 'string' && resultSummary.trim()) {
+    sources.push({ source: 'AI solution summary', text: resultSummary });
+  }
+  // 3. Changed markdown documents (scan only added lines from the diff)
+  try {
+    const filesResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber}/files --paginate`;
+    if (filesResult.code === 0) {
+      const files = JSON.parse(filesResult.stdout.toString() || '[]');
+      for (const file of files) {
+        const filename = file.filename || '';
+        if (!/\.(md|markdown|mdx)$/i.test(filename)) continue;
+        if (file.status === 'removed') continue;
+        const addedText = extractAddedLinesFromPatch(file.patch);
+        if (addedText && addedText.trim()) {
+          sources.push({ source: `changed markdown document ${filename}`, text: addedText });
+        }
+      }
+    }
+  } catch (error) {
+    reportError(error, { context: 'keep_working_collect_md_files', owner, repo, prNumber, operation: 'fetch_pr_files' });
+  }
+  return sources;
+};
+/**
+ * Runs keep-working restart iterations after the main solve.
+ *
+ * @param {object} params
+ * @param {string} params.issueUrl
+ * @param {string} params.owner
+ * @param {string} params.repo
+ * @param {string|number} params.issueNumber
+ * @param {string|number} params.prNumber
+ * @param {string} params.branchName
+ * @param {string} params.tempDir
+ * @param {string} [params.workspaceTmpDir]
+ * @param {object} params.argv - CLI arguments
+ * @param {function} params.cleanupClaudeFile - cleanup function
+ * @param {string} [params.resultSummary] - AI solution summary from the last session
+ * @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
+ */
+export const runKeepWorkingUntilDone = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
+  const limit = normalizeKeepWorkingLimit(argv.keepWorkingUntilAllRequirementsAreFullyDone);
+  if (!limit || !prNumber) {
+    return null;
+  }
+  await log('');
+  await log(`🔁 KEEP-WORKING: Scanning for deferred / delayed / out-of-scope work (limit: ${formatKeepWorkingLimit(limit)} restart(s))`);
+  await log('   Sources: pull request description, AI solution summary, changed markdown documents');
+  await log('');
+  // Get PR merge state status for the iterations
+  let currentMergeStateStatus = null;
+  try {
+    const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
+    if (prStateResult.code === 0) {
+      currentMergeStateStatus = prStateResult.stdout.toString().trim();
+    }
+  } catch {
+    // Ignore errors getting merge state
+  }
+  let sessionId;
+  let anthropicTotalCostUSD;
+  let publicPricingEstimate;
+  let pricingInfo;
+  let lastResultSummary = resultSummary;
+  let consecutiveErrors = 0;
+  // Hard safety cap even in "unlimited" mode, to avoid spinning forever on
+  // repeated failures (issue #1883: "limit it with 5 auto-restarts ... in case
+  // of errors"). Only consecutive errors count toward this cap.
+  const MAX_CONSECUTIVE_ERRORS = 3;
+  let iteration = 0;
+  while (true) {
+    // Gather and scan sources fresh on every iteration.
+    let sources = [];
+    try {
+      sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
+    } catch (error) {
+      reportError(error, { context: 'keep_working_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
+      await log(`⚠️  KEEP-WORKING: Could not collect sources: ${cleanErrorMessage(error)}`, { level: 'warning' });
+      break;
+    }
+    const detections = detectDeferredWorkInSources(sources);
+    if (detections.length === 0) {
+      if (iteration === 0) {
+        await log('✅ KEEP-WORKING: No deferred / delayed / out-of-scope work detected. Nothing to restart for.');
+      } else {
+        await log(`✅ KEEP-WORKING: No more deferred work detected after ${iteration} restart(s). All requirements appear to be fully done.`);
+      }
+      break;
+    }
+    if (iteration >= limit) {
+      await log(`🛑 KEEP-WORKING: Reached restart limit (${formatKeepWorkingLimit(limit)}) but ${detections.length} deferred-work indicator(s) still detected.`);
+      await log('   Stopping to avoid an unbounded loop. Increase the limit (or use "forever"/"unlimited") to keep going.');
+      for (const detection of detections.slice(0, 10)) {
+        await log(`   • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
+      }
+      break;
+    }
+    iteration++;
+    await log('');
+    await log(`🔁 KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)}: ${detections.length} deferred-work indicator(s) detected, restarting...`);
+    for (const detection of detections.slice(0, 10)) {
+      await log(`   • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
+    }
+    // Issue #1572 pattern: sync local branch with remote before each iteration
+    try {
+      const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
+      if (pullResult.code === 0) {
+        await log(`   Synced local branch ${branchName} from remote`, { verbose: true });
+      } else {
+        await log(`   Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
+      }
+    } catch (error) {
+      reportError(error, { context: 'keep_working_git_pull', branchName, operation: 'git_pull' });
+      await log(`   Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
+    }
+    const feedbackLines = buildKeepWorkingFeedback(detections, iteration, limit);
+    const iterationResult = await executeToolIteration({
+      issueUrl,
+      owner,
+      repo,
+      issueNumber,
+      prNumber,
+      branchName,
+      tempDir,
+      workspaceTmpDir,
+      mergeStateStatus: currentMergeStateStatus,
+      feedbackLines,
+      argv: {
+        ...argv,
+        // Reinforce the "finish everything now" guidance in the system prompt.
+        promptEnsureAllRequirementsAreMet: true,
+        // Prevent recursive keep-working inside the restart iteration.
+        keepWorkingUntilAllRequirementsAreFullyDone: 0,
+      },
+    });
+    // Update session data from the restart.
+    if (iterationResult) {
+      if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
+      if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
+      if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
+      if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
+      if (iterationResult.result) lastResultSummary = iterationResult.result;
+    }
+    // Issue #1883: cap consecutive errors so we don't spin forever (especially
+    // important in "unlimited" mode).
+    if (isUsageLimitReached(iterationResult)) {
+      await log('🛑 KEEP-WORKING: Usage limit reached during restart. Stopping keep-working loop.');
+      break;
+    }
+    if (isApiError(iterationResult)) {
+      consecutiveErrors++;
+      await log(`⚠️  KEEP-WORKING: API error during restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
+      if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
+        await log('🛑 KEEP-WORKING: Too many consecutive errors. Stopping keep-working loop.');
+        break;
+      }
+    } else {
+      consecutiveErrors = 0;
+    }
+    await log(`✅ KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)} complete`);
+    await log('');
+  }
+  // Clean up CLAUDE.md/.gitkeep after restarts
+  try {
+    await cleanupClaudeFile(tempDir, branchName, null, argv);
+  } catch (error) {
+    reportError(error, { context: 'keep_working_cleanup', branchName, operation: 'cleanup_claude_file' });
+  }
+  if (iteration === 0) return null;
+  return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
+};
+export default {
+  DEFAULT_KEEP_WORKING_LIMIT,
+  KEEP_WORKING_PROMPT,
+  DEFERRED_WORK_PATTERNS,
+  isUnlimitedKeepWorking,
+  normalizeKeepWorkingLimit,
+  formatKeepWorkingLimit,
+  detectDeferredWork,
+  detectDeferredWorkInSources,
+  collectDeferredWorkSources,
+  buildKeepWorkingFeedback,
+  runKeepWorkingUntilDone,
+};

package/src/solve.mjs CHANGED Viewed

@@ -45,6 +45,7 @@ const watchLib = await import('./solve.watch.lib.mjs');
 const { startWatchMode } = watchLib;
 const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
 const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
+const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
 const exitHandler = await import('./exit-handler.lib.mjs');
 const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
 const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
@@ -852,6 +853,14 @@ try {
   let resultModelUsage = toolResult.resultModelUsage || null;
   let streamTokenUsage = toolResult.streamTokenUsage || null;
   let subAgentCalls = toolResult.subAgentCalls || null; // Issue #1590
+  const applyRestartResult = result => {
+    if (!result) return;
+    sessionId = result.sessionId || sessionId;
+    anthropicTotalCostUSD = result.anthropicTotalCostUSD || anthropicTotalCostUSD;
+    publicPricingEstimate = result.publicPricingEstimate || publicPricingEstimate;
+    pricingInfo = result.pricingInfo || pricingInfo;
+  };
   limitReached = toolResult.limitReached;
   cleanupContext.limitReached = limitReached;
@@ -1249,12 +1258,7 @@ try {
     });
     // Update session data from restart
-    if (restartResult) {
-      if (restartResult.sessionId) sessionId = restartResult.sessionId;
-      if (restartResult.anthropicTotalCostUSD) anthropicTotalCostUSD = restartResult.anthropicTotalCostUSD;
-      if (restartResult.publicPricingEstimate) publicPricingEstimate = restartResult.publicPricingEstimate;
-      if (restartResult.pricingInfo) pricingInfo = restartResult.pricingInfo;
-    }
+    applyRestartResult(restartResult);
     // Clean up CLAUDE.md/.gitkeep again after restart
     await cleanupClaudeFile(tempDir, branchName, null, argv);
@@ -1268,13 +1272,9 @@ try {
   }
   // Issue #1383: --finalize
-  const autoEnsureResult = await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile });
-  if (autoEnsureResult) {
-    if (autoEnsureResult.sessionId) sessionId = autoEnsureResult.sessionId;
-    if (autoEnsureResult.anthropicTotalCostUSD) anthropicTotalCostUSD = autoEnsureResult.anthropicTotalCostUSD;
-    if (autoEnsureResult.publicPricingEstimate) publicPricingEstimate = autoEnsureResult.publicPricingEstimate;
-    if (autoEnsureResult.pricingInfo) pricingInfo = autoEnsureResult.pricingInfo;
-  }
+  applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
+  // Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
+  applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
   // Start watch mode if enabled OR if we need to handle uncommitted changes
   if (argv.verbose) {

package/src/tool-retry.lib.mjs CHANGED Viewed

@@ -43,6 +43,20 @@ export const classifyRetryableError = value => {
     return { message, isRetryable: true, isCapacity: false, label: 'Stream disconnected before completion' };
   }
+  // Issue #1881: Transient socket / network disconnects from the SDK's underlying fetch.
+  // When the HTTP(S)/streaming socket drops mid-request, the Claude/Codex CLI surfaces a
+  // synthetic assistant message such as:
+  //   "API Error: The socket connection was closed unexpectedly. For more information,
+  //    pass `verbose: true` in the second argument to fetch()"
+  // These are network-level failures (QUIC/TCP resets, idle-socket teardown, proxy/VPN
+  // interruptions, undici socket hang-ups), not request-content errors, so they are safe
+  // to retry with the session preserved (--resume). Without this branch the whole solve
+  // session aborts on a single dropped socket.
+  // Upstream: anthropics/claude-code#48837, #51107, #54287, #60133.
+  if (lower.includes('socket connection was closed unexpectedly') || lower.includes('socket hang up') || lower.includes('econnreset') || lower.includes('connection reset') || lower.includes('network connection lost') || lower.includes('connection error') || lower.includes('fetch failed')) {
+    return { message, isRetryable: true, isCapacity: false, label: 'Socket/connection closed unexpectedly' };
+  }
   // Issue #1834: Corrupted extended-thinking blocks. When extended thinking is combined with tool
   // use, Claude Code can persist a thinking block to the session transcript with the `thinking`
   // text emptied to "" while retaining the original `signature`. On resume/continue the block is