@link-assistant/hive-mind 1.74.11 → 1.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,53 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.75.0
4
+
5
+ ### Minor Changes
6
+
7
+ - d2adf6b: feat(solve): experimental `--keep-working-until-all-requirements-are-fully-done` (#1883)
8
+
9
+ Add an experimental `solve` option that, after the main run (and any `--finalize`
10
+ pass), scans three cheap sources — the pull request description, the AI solution
11
+ summary, and the added lines of changed markdown documents — for strong
12
+ indicators of deferred work ("out of scope", "future work", "follow-up PR",
13
+ "deferred", "delayed", "TODO"/"TBD", etc.) using ~14 regular expressions. When
14
+ indicators are found it auto-restarts the AI tool with the concrete detected
15
+ reasons plus a verbatim reinforcement prompt, and repeats until the scan is clean
16
+ or the restart limit is reached.
17
+
18
+ Limit semantics:
19
+ - `--keep-working-until-all-requirements-are-fully-done` (bare) → 5 restarts
20
+ - `... 3` → an explicit count
21
+ - `... forever` / `unlimited` / `infinite` / `0` → no limit (with a hard cap of 3
22
+ consecutive errors as a safety net)
23
+
24
+ Aliases: `--keep-going-until-all-requirements-are-fully-done`, `--keep-working`,
25
+ `--keep-going`.
26
+
27
+ Detection lives in a pure, network-free module
28
+ (`src/solve.keep-working.detect.lib.mjs`) for full unit-test coverage;
29
+ orchestration lives in `src/solve.keep-working.lib.mjs`. A deep case study is
30
+ compiled under `docs/case-studies/issue-1883/`.
31
+
32
+ ## 1.74.12
33
+
34
+ ### Patch Changes
35
+
36
+ - e921b34: fix(retry): treat "socket connection was closed unexpectedly" as a transient, retryable error (#1881)
37
+
38
+ The Claude/Codex CLI surfaces transient network disconnects (the Anthropic SDK's
39
+ underlying `fetch()` socket dropping mid-stream) as a synthetic error:
40
+ `API Error: The socket connection was closed unexpectedly.` Previously
41
+ `classifyRetryableError()` did not recognise this family of errors, so a single
42
+ dropped socket aborted the entire solve session (exit code 1, zero retries) and
43
+ discarded all in-progress work. These socket/connection drops
44
+ (`socket connection was closed unexpectedly`, `socket hang up`, `ECONNRESET`,
45
+ `connection reset`, `Connection error`, `fetch failed`, `network connection lost`)
46
+ are now classified as retryable, so the session is retried with `--resume`
47
+ (context preserved) via the existing exponential-backoff path. Because
48
+ `classifyRetryableError` is the shared classifier, the fix covers the Claude,
49
+ Codex and Agent execution loops at once.
50
+
3
51
  ## 1.74.11
4
52
 
5
53
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.74.11",
3
+ "version": "1.75.0",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -221,6 +221,10 @@ const KNOWN_OPTION_NAMES = [
221
221
  'prompt-ensure-all-requirements-are-met',
222
222
  'finalize',
223
223
  'finalize-model',
224
+ 'keep-working-until-all-requirements-are-fully-done',
225
+ 'keep-going-until-all-requirements-are-fully-done',
226
+ 'keep-working',
227
+ 'keep-going',
224
228
  ];
225
229
 
226
230
  /**
@@ -586,6 +586,12 @@ export const SOLVE_OPTION_DEFINITIONS = {
586
586
  description: '[EXPERIMENTAL] Model to use for --finalize iterations. Defaults to the same model as --model.',
587
587
  default: undefined,
588
588
  },
589
+ 'keep-working-until-all-requirements-are-fully-done': {
590
+ type: 'string',
591
+ description: '[EXPERIMENTAL] After the main solve completes, scan the pull request description, the AI solution summary and changed markdown documents for strong indicators of deferred/delayed/out-of-scope work (e.g. "future work", "out of scope", "deferred", "follow-up PR", "TODO") and automatically restart the AI tool to finish everything in this single pull request. Accepts a number of restarts (default: 5), or "forever"/"unlimited" to remove the limit. Bare flag means the default of 5.',
592
+ alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
593
+ default: undefined,
594
+ },
589
595
  'working-session-live-progress': {
590
596
  type: 'string',
591
597
  description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
@@ -836,6 +842,29 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
836
842
  }
837
843
  }
838
844
 
845
+ // --keep-working-until-all-requirements-are-fully-done normalization
846
+ // Issue #1883: the flag accepts a number of restarts, the keywords
847
+ // "forever"/"unlimited"/"infinite", or no value (bare flag => default of 5).
848
+ // We canonicalize the bare-flag / empty-string case here so downstream
849
+ // detection (normalizeKeepWorkingLimit) sees a meaningful value. Final
850
+ // numeric normalization happens at runtime in solve.keep-working.lib.mjs.
851
+ {
852
+ const keepWorkingAliases = ['--keep-working-until-all-requirements-are-fully-done', '--keep-going-until-all-requirements-are-fully-done', '--keep-working', '--keep-going'];
853
+ const keepWorkingProvided = keepWorkingAliases.some(alias => hasRawOption(rawArgs, alias));
854
+ if (keepWorkingProvided) {
855
+ const current = argv.keepWorkingUntilAllRequirementsAreFullyDone;
856
+ // Bare flag (no value) -> yargs may yield true or an empty string; treat as default count.
857
+ if (current === true || current === '' || current === undefined || current === null) {
858
+ argv.keepWorkingUntilAllRequirementsAreFullyDone = 5;
859
+ } else if (typeof current === 'string') {
860
+ argv.keepWorkingUntilAllRequirementsAreFullyDone = current.trim();
861
+ }
862
+ } else if (argv.keepWorkingUntilAllRequirementsAreFullyDone === undefined) {
863
+ // Not provided: keep it disabled (do not coerce the string-type default).
864
+ argv.keepWorkingUntilAllRequirementsAreFullyDone = undefined;
865
+ }
866
+ }
867
+
839
868
  // --working-session-live-progress normalization
840
869
  // When passed as --working-session-live-progress (no value), yargs gives true for string type
841
870
  // Normalize: true → "comment", validate known values
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Pure detection + normalization helpers for the keep-working feature.
5
+ *
6
+ * This module intentionally has NO use-m / command-stream / network imports so
7
+ * it can be unit-tested in isolation (mirroring auto-iteration-limits.lib.mjs).
8
+ * The orchestration lives in solve.keep-working.lib.mjs.
9
+ *
10
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
11
+ */
12
+
13
+ /**
14
+ * The default number of auto-restarts when the feature is enabled without an
15
+ * explicit count.
16
+ */
17
+ export const DEFAULT_KEEP_WORKING_LIMIT = 5;
18
+
19
+ /**
20
+ * The reinforcement prompt appended to every keep-working restart, in addition
21
+ * to the concrete detected reasons. Taken verbatim from issue #1883.
22
+ */
23
+ export const KEEP_WORKING_PROMPT = 'Please plan and execute everything in this single pull request, you have unlimited time and context, as context auto-compacts and you can continue indefinitely, until it is each and every requirement fully addressed, and everything is totally done.';
24
+
25
+ /**
26
+ * Strong indicators that work was deferred / delayed / left for a future pull
27
+ * request. These intentionally favour recall over precision: when the user
28
+ * enables --keep-working-until-all-requirements-are-fully-done they explicitly
29
+ * want the AI to keep going, so we accept some false positives (issue #1883).
30
+ *
31
+ * Each entry has a human-readable `label` (shown to the user / AI as the reason
32
+ * for the restart) and a `pattern` (a global, case-insensitive RegExp).
33
+ *
34
+ * IMPORTANT: keep these patterns anchored on deferral semantics so the
35
+ * reinforcement prompt itself ("until it is each and every requirement fully
36
+ * addressed") does NOT match and cause an infinite restart loop.
37
+ */
38
+ export const DEFERRED_WORK_PATTERNS = [
39
+ { label: 'out of scope', pattern: /\b(?:out[\s-]of[\s-]scope|beyond\s+the\s+scope|outside\s+the\s+scope|not\s+(?:in|within)\s+(?:the\s+)?scope)\b/gi },
40
+ { label: 'future work', pattern: /\bfuture\s+(?:work|improvements?|enhancements?|iterations?|steps?|considerations?)\b/gi },
41
+ { label: 'future / separate / follow-up pull request', pattern: /\b(?:in\s+a\s+|a\s+)?(?:future|separate|subsequent|later|next|follow[\s-]?up|another)\s+(?:pull\s+request|pr|mr|merge\s+request|change(?:set)?|commit)\b/gi },
42
+ { label: 'follow-up work', pattern: /\bfollow[\s-]?up(?:\s+(?:work|task|item|pr|pull\s+request|issue))?\b/gi },
43
+ { label: 'deferred', pattern: /\bdefer(?:red|ring|s)?\b(?!\s+to\s+the\s+caller)/gi },
44
+ { label: 'delayed / postponed', pattern: /\b(?:delayed|postponed|postpone|deprioriti[sz]ed)\b/gi },
45
+ { label: 'planned for later / another pull request', pattern: /\bplanned\s+for\s+(?:a\s+)?(?:future|later|the\s+next|another|separate|subsequent)\b/gi },
46
+ { label: 'left / leaving for later', pattern: /\ble(?:ft|aving|ave)\s+(?:it\s+|this\s+|that\s+|them\s+)?(?:for\s+(?:later|now|the\s+future)|as\s+(?:a\s+)?(?:future|follow[\s-]?up))/gi },
47
+ { label: 'will be addressed later / separately', pattern: /\b(?:will|to)\s+be\s+(?:addressed|handled|implemented|done|tackled|covered|completed|fixed)\s+(?:later|separately|in\s+(?:a\s+)?(?:future|subsequent|separate|follow[\s-]?up|another|the\s+next))/gi },
48
+ { label: 'not implemented yet', pattern: /\bnot\s+(?:yet\s+)?(?:implemented|done|completed|finished|addressed|supported|covered)(?:\s+yet)?\b/gi },
49
+ { label: 'to be implemented / TBD', pattern: /\b(?:to\s+be\s+(?:implemented|done|added|determined|decided)|tbd|to[\s-]?dos?|fixme)\b/gi },
50
+ { label: 'remaining work / not covered', pattern: /\b(?:remaining\s+(?:work|tasks?|items?)|not\s+covered\s+(?:here|in\s+this\s+(?:pr|pull\s+request|change))|won['’]?t\s+(?:be\s+)?(?:covered|implemented|addressed|done)(?:\s+here)?)\b/gi },
51
+ { label: 'tracked separately / in a separate issue', pattern: /\btrack(?:ed|ing)?\s+(?:this\s+|it\s+|them\s+|separately\s+)?(?:in\s+)?(?:a\s+)?(?:separate|new|future|follow[\s-]?up)\s+(?:issue|ticket|task)\b/gi },
52
+ { label: 'for now / as a stopgap / temporary', pattern: /\b(?:for\s+now|as\s+a\s+(?:stop[\s-]?gap|temporary\s+measure|first\s+step)|in\s+the\s+meantime)\b/gi },
53
+ ];
54
+
55
+ const UNLIMITED_KEYWORDS = new Set(['forever', 'unlimited', 'infinite', 'infinity', 'inf', 'no-limit', 'nolimit', 'none', 'always']);
56
+
57
+ /**
58
+ * Returns true when a raw flag value requests an unlimited number of restarts.
59
+ * @param {*} value
60
+ * @returns {boolean}
61
+ */
62
+ export const isUnlimitedKeepWorking = value => {
63
+ if (value === Infinity) return true;
64
+ if (typeof value === 'number') return value === 0;
65
+ if (typeof value === 'string') {
66
+ const normalized = value.trim().toLowerCase();
67
+ if (UNLIMITED_KEYWORDS.has(normalized)) return true;
68
+ if (normalized === '0') return true;
69
+ }
70
+ return false;
71
+ };
72
+
73
+ /**
74
+ * Normalize the --keep-working-until-all-requirements-are-fully-done flag value
75
+ * into a numeric restart limit.
76
+ *
77
+ * - boolean true (flag without value) -> DEFAULT_KEEP_WORKING_LIMIT (5)
78
+ * - "forever" / "unlimited" / "infinite" / "0" / 0 -> Infinity (no limit)
79
+ * - a positive number / numeric string -> floor(value)
80
+ * - anything invalid -> DEFAULT_KEEP_WORKING_LIMIT (5)
81
+ * - falsy (undefined / null / false / "") -> 0 (feature disabled)
82
+ *
83
+ * @param {*} value
84
+ * @param {number} [fallback=DEFAULT_KEEP_WORKING_LIMIT]
85
+ * @returns {number} numeric limit (Infinity for unlimited, 0 when disabled)
86
+ */
87
+ export const normalizeKeepWorkingLimit = (value, fallback = DEFAULT_KEEP_WORKING_LIMIT) => {
88
+ // Disabled
89
+ if (value === undefined || value === null || value === false || value === '') {
90
+ return 0;
91
+ }
92
+
93
+ // Flag provided without a value
94
+ if (value === true) return fallback;
95
+
96
+ // Unlimited keywords / 0
97
+ if (isUnlimitedKeepWorking(value)) return Infinity;
98
+
99
+ const parsed = Number(value);
100
+ if (!Number.isFinite(parsed) || parsed < 1) return fallback;
101
+
102
+ return Math.floor(parsed);
103
+ };
104
+
105
+ /**
106
+ * Human readable description of the limit for logs.
107
+ * @param {number} limit
108
+ * @returns {string}
109
+ */
110
+ export const formatKeepWorkingLimit = limit => (limit === Infinity ? 'unlimited' : `${limit}`);
111
+
112
+ /**
113
+ * Scan a single block of text for deferred-work indicators.
114
+ *
115
+ * @param {string} text - the text to scan
116
+ * @param {string} [source='text'] - a label describing where the text came from
117
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
118
+ */
119
+ export const detectDeferredWork = (text, source = 'text') => {
120
+ if (!text || typeof text !== 'string') return [];
121
+
122
+ const detections = [];
123
+ const seen = new Set();
124
+
125
+ for (const { label, pattern } of DEFERRED_WORK_PATTERNS) {
126
+ // Reset lastIndex because patterns are global and reused across calls.
127
+ pattern.lastIndex = 0;
128
+ let match;
129
+ while ((match = pattern.exec(text)) !== null) {
130
+ const matchedText = match[0];
131
+ // Build a short snippet around the match for context.
132
+ const start = Math.max(0, match.index - 40);
133
+ const end = Math.min(text.length, match.index + matchedText.length + 40);
134
+ const snippet = text.slice(start, end).replace(/\s+/g, ' ').trim();
135
+
136
+ // De-duplicate identical (label + snippet) hits within a single source.
137
+ const key = `${label}::${snippet.toLowerCase()}`;
138
+ if (!seen.has(key)) {
139
+ seen.add(key);
140
+ detections.push({ label, match: matchedText, snippet, source });
141
+ }
142
+
143
+ // Guard against zero-length matches causing an infinite loop.
144
+ if (pattern.lastIndex === match.index) pattern.lastIndex++;
145
+ }
146
+ pattern.lastIndex = 0;
147
+ }
148
+
149
+ return detections;
150
+ };
151
+
152
+ /**
153
+ * Run all configured sources through the detector and return a flat list of
154
+ * detections.
155
+ *
156
+ * @param {Array<{source: string, text: string}>} sources
157
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
158
+ */
159
+ export const detectDeferredWorkInSources = sources => {
160
+ const detections = [];
161
+ for (const { source, text } of sources || []) {
162
+ detections.push(...detectDeferredWork(text, source));
163
+ }
164
+ return detections;
165
+ };
166
+
167
+ /**
168
+ * Extract the added lines (lines beginning with "+") from a unified diff patch.
169
+ * @param {string} patch
170
+ * @returns {string}
171
+ */
172
+ export const extractAddedLinesFromPatch = patch => {
173
+ if (!patch || typeof patch !== 'string') return '';
174
+ return patch
175
+ .split('\n')
176
+ .filter(line => line.startsWith('+') && !line.startsWith('+++'))
177
+ .map(line => line.slice(1))
178
+ .join('\n');
179
+ };
180
+
181
+ /**
182
+ * Build the feedback lines for a keep-working restart iteration.
183
+ *
184
+ * @param {Array<{label, snippet, source}>} detections
185
+ * @param {number} iteration
186
+ * @param {number} limit
187
+ * @returns {string[]}
188
+ */
189
+ export const buildKeepWorkingFeedback = (detections, iteration, limit) => {
190
+ const limitLabel = formatKeepWorkingLimit(limit);
191
+ const lines = ['', '='.repeat(60), `🔁 KEEP WORKING UNTIL ALL REQUIREMENTS ARE FULLY DONE (restart ${iteration}/${limitLabel}):`, '='.repeat(60), '', 'It looks like some work was deferred, delayed or planned for a future pull request.', 'The following strong indicators of unfinished / deferred work were detected:', ''];
192
+
193
+ // Show up to 15 distinct detected reasons to keep the prompt focused.
194
+ const shown = (detections || []).slice(0, 15);
195
+ for (const detection of shown) {
196
+ lines.push(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
197
+ }
198
+ if ((detections || []).length > shown.length) {
199
+ lines.push(` • ...and ${detections.length - shown.length} more indicator(s)`);
200
+ }
201
+
202
+ lines.push('');
203
+ lines.push('There is NO future pull request. This is the single pull request where everything must be done.');
204
+ lines.push('Do not defer, delay or postpone anything. Remove any "future work" / "out of scope" / "TODO" / "follow-up" notes by actually implementing them now.');
205
+ lines.push('');
206
+ lines.push(KEEP_WORKING_PROMPT);
207
+ lines.push('');
208
+
209
+ return lines;
210
+ };
211
+
212
+ export default {
213
+ DEFAULT_KEEP_WORKING_LIMIT,
214
+ KEEP_WORKING_PROMPT,
215
+ DEFERRED_WORK_PATTERNS,
216
+ isUnlimitedKeepWorking,
217
+ normalizeKeepWorkingLimit,
218
+ formatKeepWorkingLimit,
219
+ detectDeferredWork,
220
+ detectDeferredWorkInSources,
221
+ extractAddedLinesFromPatch,
222
+ buildKeepWorkingFeedback,
223
+ };
@@ -0,0 +1,285 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Keep-working-until-done module for solve.mjs
5
+ *
6
+ * [EXPERIMENTAL] When --keep-working-until-all-requirements-are-fully-done is
7
+ * enabled, after the main solve (and any other post-processing) completes, this
8
+ * module scans the pull request description, the AI working-session/solution
9
+ * summary, and the markdown documents changed by the pull request for strong
10
+ * indicators that the AI deferred, delayed or postponed work to a future pull
11
+ * request / iteration (e.g. "out of scope", "future work", "deferred",
12
+ * "follow-up PR", "TODO", ...).
13
+ *
14
+ * When such indicators are found, it automatically restarts the AI tool with a
15
+ * prompt instructing it to finish everything in this single pull request, in
16
+ * addition to the concrete detected reasons. It keeps restarting until no
17
+ * indicators remain or until the configured restart limit is reached.
18
+ *
19
+ * By default the restart limit is 5. The limit can be set to a custom number,
20
+ * or to "forever" / "unlimited" / "infinite" / 0 to remove the limit entirely.
21
+ *
22
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
23
+ */
24
+
25
+ // Check if use is already defined globally (when imported from solve.mjs)
26
+ // If not, fetch it (when running standalone)
27
+ if (typeof globalThis.use === 'undefined') {
28
+ globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
29
+ }
30
+ const use = globalThis.use;
31
+
32
+ // Use command-stream for consistent $ behavior across runtimes
33
+ const { $: __rawDollar$ } = await use('command-stream');
34
+ const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
35
+ const $ = wrapDollarWithGhRetry(__rawDollar$);
36
+
37
+ // Import shared library functions
38
+ const lib = await import('./lib.mjs');
39
+ const { log, cleanErrorMessage } = lib;
40
+
41
+ // Import shared restart utilities
42
+ const restartShared = await import('./solve.restart-shared.lib.mjs');
43
+ const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
44
+
45
+ const sentryLib = await import('./sentry.lib.mjs');
46
+ const { reportError } = sentryLib;
47
+
48
+ // Pure detection + normalization helpers live in a separate, network-free
49
+ // module so they can be unit-tested in isolation (issue #1883).
50
+ const detectLib = await import('./solve.keep-working.detect.lib.mjs');
51
+ const { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, extractAddedLinesFromPatch, buildKeepWorkingFeedback } = detectLib;
52
+
53
+ // Re-export the pure helpers so existing importers of this module keep working.
54
+ export { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, buildKeepWorkingFeedback };
55
+
56
+ /**
57
+ * Collect the text sources to scan for deferred-work indicators:
58
+ * 1. The pull request description (body).
59
+ * 2. The AI working-session / solution summary (passed in-memory).
60
+ * 3. The markdown documents changed by the pull request (added lines only).
61
+ *
62
+ * @param {object} params
63
+ * @returns {Promise<Array<{source: string, text: string}>>}
64
+ */
65
+ export const collectDeferredWorkSources = async ({ owner, repo, prNumber, resultSummary }) => {
66
+ const sources = [];
67
+
68
+ // 1. Pull request description
69
+ try {
70
+ const prResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.body // ""'`;
71
+ if (prResult.code === 0) {
72
+ const body = prResult.stdout.toString();
73
+ if (body && body.trim()) {
74
+ sources.push({ source: 'pull request description', text: body });
75
+ }
76
+ }
77
+ } catch (error) {
78
+ reportError(error, { context: 'keep_working_collect_pr_body', owner, repo, prNumber, operation: 'fetch_pr_body' });
79
+ }
80
+
81
+ // 2. AI working-session / solution summary (in-memory, no token cost)
82
+ if (resultSummary && typeof resultSummary === 'string' && resultSummary.trim()) {
83
+ sources.push({ source: 'AI solution summary', text: resultSummary });
84
+ }
85
+
86
+ // 3. Changed markdown documents (scan only added lines from the diff)
87
+ try {
88
+ const filesResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber}/files --paginate`;
89
+ if (filesResult.code === 0) {
90
+ const files = JSON.parse(filesResult.stdout.toString() || '[]');
91
+ for (const file of files) {
92
+ const filename = file.filename || '';
93
+ if (!/\.(md|markdown|mdx)$/i.test(filename)) continue;
94
+ if (file.status === 'removed') continue;
95
+ const addedText = extractAddedLinesFromPatch(file.patch);
96
+ if (addedText && addedText.trim()) {
97
+ sources.push({ source: `changed markdown document ${filename}`, text: addedText });
98
+ }
99
+ }
100
+ }
101
+ } catch (error) {
102
+ reportError(error, { context: 'keep_working_collect_md_files', owner, repo, prNumber, operation: 'fetch_pr_files' });
103
+ }
104
+
105
+ return sources;
106
+ };
107
+
108
+ /**
109
+ * Runs keep-working restart iterations after the main solve.
110
+ *
111
+ * @param {object} params
112
+ * @param {string} params.issueUrl
113
+ * @param {string} params.owner
114
+ * @param {string} params.repo
115
+ * @param {string|number} params.issueNumber
116
+ * @param {string|number} params.prNumber
117
+ * @param {string} params.branchName
118
+ * @param {string} params.tempDir
119
+ * @param {string} [params.workspaceTmpDir]
120
+ * @param {object} params.argv - CLI arguments
121
+ * @param {function} params.cleanupClaudeFile - cleanup function
122
+ * @param {string} [params.resultSummary] - AI solution summary from the last session
123
+ * @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
124
+ */
125
+ export const runKeepWorkingUntilDone = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
126
+ const limit = normalizeKeepWorkingLimit(argv.keepWorkingUntilAllRequirementsAreFullyDone);
127
+ if (!limit || !prNumber) {
128
+ return null;
129
+ }
130
+
131
+ await log('');
132
+ await log(`🔁 KEEP-WORKING: Scanning for deferred / delayed / out-of-scope work (limit: ${formatKeepWorkingLimit(limit)} restart(s))`);
133
+ await log(' Sources: pull request description, AI solution summary, changed markdown documents');
134
+ await log('');
135
+
136
+ // Get PR merge state status for the iterations
137
+ let currentMergeStateStatus = null;
138
+ try {
139
+ const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
140
+ if (prStateResult.code === 0) {
141
+ currentMergeStateStatus = prStateResult.stdout.toString().trim();
142
+ }
143
+ } catch {
144
+ // Ignore errors getting merge state
145
+ }
146
+
147
+ let sessionId;
148
+ let anthropicTotalCostUSD;
149
+ let publicPricingEstimate;
150
+ let pricingInfo;
151
+ let lastResultSummary = resultSummary;
152
+ let consecutiveErrors = 0;
153
+ // Hard safety cap even in "unlimited" mode, to avoid spinning forever on
154
+ // repeated failures (issue #1883: "limit it with 5 auto-restarts ... in case
155
+ // of errors"). Only consecutive errors count toward this cap.
156
+ const MAX_CONSECUTIVE_ERRORS = 3;
157
+
158
+ let iteration = 0;
159
+ while (true) {
160
+ // Gather and scan sources fresh on every iteration.
161
+ let sources = [];
162
+ try {
163
+ sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
164
+ } catch (error) {
165
+ reportError(error, { context: 'keep_working_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
166
+ await log(`⚠️ KEEP-WORKING: Could not collect sources: ${cleanErrorMessage(error)}`, { level: 'warning' });
167
+ break;
168
+ }
169
+
170
+ const detections = detectDeferredWorkInSources(sources);
171
+
172
+ if (detections.length === 0) {
173
+ if (iteration === 0) {
174
+ await log('✅ KEEP-WORKING: No deferred / delayed / out-of-scope work detected. Nothing to restart for.');
175
+ } else {
176
+ await log(`✅ KEEP-WORKING: No more deferred work detected after ${iteration} restart(s). All requirements appear to be fully done.`);
177
+ }
178
+ break;
179
+ }
180
+
181
+ if (iteration >= limit) {
182
+ await log(`🛑 KEEP-WORKING: Reached restart limit (${formatKeepWorkingLimit(limit)}) but ${detections.length} deferred-work indicator(s) still detected.`);
183
+ await log(' Stopping to avoid an unbounded loop. Increase the limit (or use "forever"/"unlimited") to keep going.');
184
+ for (const detection of detections.slice(0, 10)) {
185
+ await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
186
+ }
187
+ break;
188
+ }
189
+
190
+ iteration++;
191
+ await log('');
192
+ await log(`🔁 KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)}: ${detections.length} deferred-work indicator(s) detected, restarting...`);
193
+ for (const detection of detections.slice(0, 10)) {
194
+ await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
195
+ }
196
+
197
+ // Issue #1572 pattern: sync local branch with remote before each iteration
198
+ try {
199
+ const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
200
+ if (pullResult.code === 0) {
201
+ await log(` Synced local branch ${branchName} from remote`, { verbose: true });
202
+ } else {
203
+ await log(` Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
204
+ }
205
+ } catch (error) {
206
+ reportError(error, { context: 'keep_working_git_pull', branchName, operation: 'git_pull' });
207
+ await log(` Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
208
+ }
209
+
210
+ const feedbackLines = buildKeepWorkingFeedback(detections, iteration, limit);
211
+
212
+ const iterationResult = await executeToolIteration({
213
+ issueUrl,
214
+ owner,
215
+ repo,
216
+ issueNumber,
217
+ prNumber,
218
+ branchName,
219
+ tempDir,
220
+ workspaceTmpDir,
221
+ mergeStateStatus: currentMergeStateStatus,
222
+ feedbackLines,
223
+ argv: {
224
+ ...argv,
225
+ // Reinforce the "finish everything now" guidance in the system prompt.
226
+ promptEnsureAllRequirementsAreMet: true,
227
+ // Prevent recursive keep-working inside the restart iteration.
228
+ keepWorkingUntilAllRequirementsAreFullyDone: 0,
229
+ },
230
+ });
231
+
232
+ // Update session data from the restart.
233
+ if (iterationResult) {
234
+ if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
235
+ if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
236
+ if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
237
+ if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
238
+ if (iterationResult.result) lastResultSummary = iterationResult.result;
239
+ }
240
+
241
+ // Issue #1883: cap consecutive errors so we don't spin forever (especially
242
+ // important in "unlimited" mode).
243
+ if (isUsageLimitReached(iterationResult)) {
244
+ await log('🛑 KEEP-WORKING: Usage limit reached during restart. Stopping keep-working loop.');
245
+ break;
246
+ }
247
+ if (isApiError(iterationResult)) {
248
+ consecutiveErrors++;
249
+ await log(`⚠️ KEEP-WORKING: API error during restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
250
+ if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
251
+ await log('🛑 KEEP-WORKING: Too many consecutive errors. Stopping keep-working loop.');
252
+ break;
253
+ }
254
+ } else {
255
+ consecutiveErrors = 0;
256
+ }
257
+
258
+ await log(`✅ KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)} complete`);
259
+ await log('');
260
+ }
261
+
262
+ // Clean up CLAUDE.md/.gitkeep after restarts
263
+ try {
264
+ await cleanupClaudeFile(tempDir, branchName, null, argv);
265
+ } catch (error) {
266
+ reportError(error, { context: 'keep_working_cleanup', branchName, operation: 'cleanup_claude_file' });
267
+ }
268
+
269
+ if (iteration === 0) return null;
270
+ return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
271
+ };
272
+
273
+ export default {
274
+ DEFAULT_KEEP_WORKING_LIMIT,
275
+ KEEP_WORKING_PROMPT,
276
+ DEFERRED_WORK_PATTERNS,
277
+ isUnlimitedKeepWorking,
278
+ normalizeKeepWorkingLimit,
279
+ formatKeepWorkingLimit,
280
+ detectDeferredWork,
281
+ detectDeferredWorkInSources,
282
+ collectDeferredWorkSources,
283
+ buildKeepWorkingFeedback,
284
+ runKeepWorkingUntilDone,
285
+ };
package/src/solve.mjs CHANGED
@@ -45,6 +45,7 @@ const watchLib = await import('./solve.watch.lib.mjs');
45
45
  const { startWatchMode } = watchLib;
46
46
  const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
47
47
  const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
48
+ const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
48
49
  const exitHandler = await import('./exit-handler.lib.mjs');
49
50
  const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
50
51
  const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
@@ -852,6 +853,14 @@ try {
852
853
  let resultModelUsage = toolResult.resultModelUsage || null;
853
854
  let streamTokenUsage = toolResult.streamTokenUsage || null;
854
855
  let subAgentCalls = toolResult.subAgentCalls || null; // Issue #1590
856
+
857
+ const applyRestartResult = result => {
858
+ if (!result) return;
859
+ sessionId = result.sessionId || sessionId;
860
+ anthropicTotalCostUSD = result.anthropicTotalCostUSD || anthropicTotalCostUSD;
861
+ publicPricingEstimate = result.publicPricingEstimate || publicPricingEstimate;
862
+ pricingInfo = result.pricingInfo || pricingInfo;
863
+ };
855
864
  limitReached = toolResult.limitReached;
856
865
  cleanupContext.limitReached = limitReached;
857
866
 
@@ -1249,12 +1258,7 @@ try {
1249
1258
  });
1250
1259
 
1251
1260
  // Update session data from restart
1252
- if (restartResult) {
1253
- if (restartResult.sessionId) sessionId = restartResult.sessionId;
1254
- if (restartResult.anthropicTotalCostUSD) anthropicTotalCostUSD = restartResult.anthropicTotalCostUSD;
1255
- if (restartResult.publicPricingEstimate) publicPricingEstimate = restartResult.publicPricingEstimate;
1256
- if (restartResult.pricingInfo) pricingInfo = restartResult.pricingInfo;
1257
- }
1261
+ applyRestartResult(restartResult);
1258
1262
 
1259
1263
  // Clean up CLAUDE.md/.gitkeep again after restart
1260
1264
  await cleanupClaudeFile(tempDir, branchName, null, argv);
@@ -1268,13 +1272,9 @@ try {
1268
1272
  }
1269
1273
 
1270
1274
  // Issue #1383: --finalize
1271
- const autoEnsureResult = await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile });
1272
- if (autoEnsureResult) {
1273
- if (autoEnsureResult.sessionId) sessionId = autoEnsureResult.sessionId;
1274
- if (autoEnsureResult.anthropicTotalCostUSD) anthropicTotalCostUSD = autoEnsureResult.anthropicTotalCostUSD;
1275
- if (autoEnsureResult.publicPricingEstimate) publicPricingEstimate = autoEnsureResult.publicPricingEstimate;
1276
- if (autoEnsureResult.pricingInfo) pricingInfo = autoEnsureResult.pricingInfo;
1277
- }
1275
+ applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
1276
+ // Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
1277
+ applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
1278
1278
 
1279
1279
  // Start watch mode if enabled OR if we need to handle uncommitted changes
1280
1280
  if (argv.verbose) {
@@ -43,6 +43,20 @@ export const classifyRetryableError = value => {
43
43
  return { message, isRetryable: true, isCapacity: false, label: 'Stream disconnected before completion' };
44
44
  }
45
45
 
46
+ // Issue #1881: Transient socket / network disconnects from the SDK's underlying fetch.
47
+ // When the HTTP(S)/streaming socket drops mid-request, the Claude/Codex CLI surfaces a
48
+ // synthetic assistant message such as:
49
+ // "API Error: The socket connection was closed unexpectedly. For more information,
50
+ // pass `verbose: true` in the second argument to fetch()"
51
+ // These are network-level failures (QUIC/TCP resets, idle-socket teardown, proxy/VPN
52
+ // interruptions, undici socket hang-ups), not request-content errors, so they are safe
53
+ // to retry with the session preserved (--resume). Without this branch the whole solve
54
+ // session aborts on a single dropped socket.
55
+ // Upstream: anthropics/claude-code#48837, #51107, #54287, #60133.
56
+ if (lower.includes('socket connection was closed unexpectedly') || lower.includes('socket hang up') || lower.includes('econnreset') || lower.includes('connection reset') || lower.includes('network connection lost') || lower.includes('connection error') || lower.includes('fetch failed')) {
57
+ return { message, isRetryable: true, isCapacity: false, label: 'Socket/connection closed unexpectedly' };
58
+ }
59
+
46
60
  // Issue #1834: Corrupted extended-thinking blocks. When extended thinking is combined with tool
47
61
  // use, Claude Code can persist a thinking block to the session transcript with the `thinking`
48
62
  // text emptied to "" while retaining the original `signature`. On resume/continue the block is