@link-assistant/hive-mind 1.74.12 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,55 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.76.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 80c56fa: Add experimental `--use-handoff` HANDOFF.md continuity **Agent Skill** (issue
8
+ #1877). When enabled, Hive Mind deploys a real `SKILL.md` (the Agent Skills open
9
+ standard created by Anthropic) into the session working directory for both tools
10
+ natively — `.claude/skills/handoff/SKILL.md` for `--tool claude` and
11
+ `.agents/skills/handoff/SKILL.md` for `--tool codex` — so the very same skill
12
+ teaches each tool to read `HANDOFF.md` (repository root) first when present and
13
+ keep it updated with task, current state, decisions, next steps, gotchas, and
14
+ critical files. A minimal activation nudge in the system prompt ensures the
15
+ read-at-session-start behavior fires reliably. Because each Hive Mind working
16
+ session runs in an ephemeral working directory cloned from the PR branch, the
17
+ handoff file is committed to the branch — making it the shared cross-session,
18
+ cross-tool memory so Claude and Codex can continue each other's work in a single
19
+ pull request. The deployed `SKILL.md` is tooling (re-deployed every session) and
20
+ is kept out of the target repository via `.git/info/exclude`, so it never appears
21
+ in the PR. Disabled by default; auto-forwarded by `hive`. Includes a case study
22
+ in `docs/case-studies/issue-1877/` and tests in `tests/handoff-prompt.test.mjs`.
23
+
24
+ ## 1.75.0
25
+
26
+ ### Minor Changes
27
+
28
+ - d2adf6b: feat(solve): experimental `--keep-working-until-all-requirements-are-fully-done` (#1883)
29
+
30
+ Add an experimental `solve` option that, after the main run (and any `--finalize`
31
+ pass), scans three cheap sources — the pull request description, the AI solution
32
+ summary, and the added lines of changed markdown documents — for strong
33
+ indicators of deferred work ("out of scope", "future work", "follow-up PR",
34
+ "deferred", "delayed", "TODO"/"TBD", etc.) using ~14 regular expressions. When
35
+ indicators are found it auto-restarts the AI tool with the concrete detected
36
+ reasons plus a verbatim reinforcement prompt, and repeats until the scan is clean
37
+ or the restart limit is reached.
38
+
39
+ Limit semantics:
40
+ - `--keep-working-until-all-requirements-are-fully-done` (bare) → 5 restarts
41
+ - `... 3` → an explicit count
42
+ - `... forever` / `unlimited` / `infinite` / `0` → no limit (with a hard cap of 3
43
+ consecutive errors as a safety net)
44
+
45
+ Aliases: `--keep-going-until-all-requirements-are-fully-done`, `--keep-working`,
46
+ `--keep-going`.
47
+
48
+ Detection lives in a pure, network-free module
49
+ (`src/solve.keep-working.detect.lib.mjs`) for full unit-test coverage;
50
+ orchestration lives in `src/solve.keep-working.lib.mjs`. A deep case study is
51
+ compiled under `docs/case-studies/issue-1883/`.
52
+
3
53
  ## 1.74.12
4
54
 
5
55
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.74.12",
3
+ "version": "1.76.0",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -28,6 +28,7 @@ import { fetchModelInfo } from './model-info.lib.mjs';
28
28
  import { classifyRetryableError, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
29
29
  import { resolveSubSessionSize } from './sub-session-size.lib.mjs'; // Issue #1706
30
30
  import { withAgentsMdAsClaudeMd } from './agents-md-claude-support.lib.mjs';
31
+ import { deployHandoffSkill } from './handoff-skill.lib.mjs'; // Issue #1877
31
32
  import { createThinkingBlockRecovery } from './claude.thinking-block-recovery.lib.mjs'; // Issue #1834 (PR #1835 feedback)
32
33
  export { availableModels, fetchModelInfo }; // Re-export for backward compatibility
33
34
  const showResumeCommand = async (sessionId, tempDir, claudePath, model, log, argv = null) => {
@@ -353,6 +354,10 @@ export const executeClaude = async params => {
353
354
  const escapedPrompt = prompt.replace(/"/g, '\\"').replace(/\$/g, '\\$');
354
355
  const escapedSystemPrompt = systemPrompt.replace(/"/g, '\\"').replace(/\$/g, '\\$');
355
356
 
357
+ // Issue #1877: deploy the experimental HANDOFF.md Agent Skill so Claude loads
358
+ // it natively from .claude/skills/handoff/SKILL.md (no-op unless --use-handoff).
359
+ await deployHandoffSkill({ tempDir, argv, log, $ });
360
+
356
361
  return await withAgentsMdAsClaudeMd({ tempDir, branchName, argv, prompt, fs, path, $, log, formatAligned }, () =>
357
362
  executeClaudeCommand({
358
363
  tempDir,
@@ -4,6 +4,7 @@
4
4
  */
5
5
 
6
6
  import { getArchitectureCareSubPrompt } from './architecture-care.prompts.lib.mjs';
7
+ import { getHandoffSubPrompt } from './handoff.prompts.lib.mjs';
7
8
  import { getExperimentsExamplesSubPrompt } from './experiments-examples.prompts.lib.mjs';
8
9
  import { primaryModelNames } from './models/index.mjs';
9
10
  import { getThinkingPromptInstruction } from './thinking-prompt.lib.mjs';
@@ -338,7 +339,7 @@ Visual UI work and screenshots.
338
339
  - When the fix is visual, include side-by-side or sequential comparison of before/after states in the PR description.
339
340
  - When possible, create automated visual regression tests to prevent the UI bug from recurring.`
340
341
  : ''
341
- }${ciExamples}${getArchitectureCareSubPrompt(argv)}${buildWorkLanguageDirective()}`;
342
+ }${ciExamples}${getArchitectureCareSubPrompt(argv)}${getHandoffSubPrompt(argv)}${buildWorkLanguageDirective()}`;
342
343
  };
343
344
 
344
345
  // Export all functions as default object too
package/src/codex.lib.mjs CHANGED
@@ -29,6 +29,7 @@ import { defaultModels } from './models/index.mjs';
29
29
  import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
30
30
  import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
31
31
  import { getCumulativeContextInputTokens } from './context-fill.lib.mjs';
32
+ import { deployHandoffSkill } from './handoff-skill.lib.mjs'; // Issue #1877
32
33
  import Decimal from 'decimal.js-light';
33
34
 
34
35
  const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -661,6 +662,10 @@ export const executeCodex = async params => {
661
662
  }
662
663
  }
663
664
 
665
+ // Issue #1877: deploy the experimental HANDOFF.md Agent Skill so Codex loads
666
+ // it natively from .agents/skills/handoff/SKILL.md (no-op unless --use-handoff).
667
+ await deployHandoffSkill({ tempDir, argv, log, $ });
668
+
664
669
  // Execute the Codex command
665
670
  return await executeCodexCommand({
666
671
  tempDir,
@@ -4,6 +4,7 @@
4
4
  */
5
5
 
6
6
  import { getArchitectureCareSubPrompt } from './architecture-care.prompts.lib.mjs';
7
+ import { getHandoffSubPrompt } from './handoff.prompts.lib.mjs';
7
8
  import { getExperimentsExamplesSubPrompt } from './experiments-examples.prompts.lib.mjs';
8
9
  import { getThinkingPromptInstruction } from './thinking-prompt.lib.mjs';
9
10
  import { buildWorkLanguageDirective } from './work-language.prompts.lib.mjs';
@@ -306,7 +307,7 @@ Visual UI work and screenshots.
306
307
  - When the fix is visual, include side-by-side or sequential comparison of before/after states in the PR description.
307
308
  - When possible, create automated visual regression tests to prevent the UI bug from recurring.`
308
309
  : ''
309
- }${ciExamples}${getArchitectureCareSubPrompt(argv)}${buildWorkLanguageDirective()}`;
310
+ }${ciExamples}${getArchitectureCareSubPrompt(argv)}${getHandoffSubPrompt(argv)}${buildWorkLanguageDirective()}`;
310
311
  };
311
312
 
312
313
  // Export all functions as default object too
@@ -0,0 +1,256 @@
1
+ /**
2
+ * HANDOFF.md Agent Skill deployment (issue #1877)
3
+ *
4
+ * Writes the canonical handoff `SKILL.md` (built by handoff.prompts.lib.mjs)
5
+ * into the session working directory so the AI tool loads it natively as an
6
+ * Agent Skill, instead of relying on an injected prompt.
7
+ *
8
+ * Both supported tools read the Agent Skills standard, but from different
9
+ * hardcoded project directories (neither tool exposes a setting or env var to
10
+ * point at a custom/shared folder):
11
+ * - Claude Code: .claude/skills/<name>/SKILL.md
12
+ * - Codex: .agents/skills/<name>/SKILL.md
13
+ *
14
+ * To answer "can both CLIs use the SAME folder?": there is no native shared
15
+ * location, so we make one ourselves. The SKILL.md is written exactly ONCE into
16
+ * a single real directory (the Claude path, `.claude/skills/handoff/`), and the
17
+ * Codex path (`.agents/skills/handoff`) is a relative **symlink** pointing at
18
+ * that one real directory. Both tools therefore read byte-for-byte the same
19
+ * file from a single source of truth on disk — not two copies that could drift.
20
+ * If the filesystem cannot create a symlink (e.g. Windows without privilege),
21
+ * we fall back to writing a real second copy so the feature still works.
22
+ *
23
+ * The deployed skill is tool configuration, not project state, so it is:
24
+ * - re-deployed every session by hive-mind (each session clones fresh), and
25
+ * - excluded from git via `.git/info/exclude` (a local, never-committed
26
+ * ignore) so it never pollutes the pull request or the "uncommitted
27
+ * changes" checks. Only the HANDOFF.md the tool produces is committed.
28
+ */
29
+
30
+ // Fetch use-m if not available (matches the rest of src/*.lib.mjs).
31
+ if (typeof globalThis.use === 'undefined') {
32
+ globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
33
+ }
34
+ const fs = (await use('fs')).promises;
35
+ const path = (await use('path')).default;
36
+
37
+ import { buildHandoffSkillFile, HANDOFF_SKILL_NAME } from './handoff.prompts.lib.mjs';
38
+
39
+ const noopLog = async () => {};
40
+
41
+ const SKILL_FILE = 'SKILL.md';
42
+
43
+ /**
44
+ * The single real skill directory the SKILL.md is written into. Claude Code
45
+ * reads it directly; Codex reaches the same files through a symlink (below).
46
+ * @type {string}
47
+ */
48
+ export const HANDOFF_PRIMARY_SKILL_DIR = path.join('.claude', 'skills', HANDOFF_SKILL_NAME);
49
+
50
+ /**
51
+ * Additional skill directories that should resolve to the same SKILL.md. Each
52
+ * is created as a symlink to HANDOFF_PRIMARY_SKILL_DIR (one source of truth),
53
+ * falling back to a real copy only if symlinking is unsupported.
54
+ * @type {string[]}
55
+ */
56
+ export const HANDOFF_LINKED_SKILL_DIRS = Object.freeze([
57
+ path.join('.agents', 'skills', HANDOFF_SKILL_NAME), // Codex
58
+ ]);
59
+
60
+ /**
61
+ * All skill directories the deployment touches (primary + links). Kept for the
62
+ * git-exclude bookkeeping and for callers/tests that enumerate every location.
63
+ * @type {string[]}
64
+ */
65
+ export const HANDOFF_SKILL_DIRS = Object.freeze([HANDOFF_PRIMARY_SKILL_DIR, ...HANDOFF_LINKED_SKILL_DIRS]);
66
+
67
+ /**
68
+ * Determine whether a path is already tracked by git in the working dir. We
69
+ * never clobber a file/dir the target repository tracks itself.
70
+ */
71
+ const isTracked = async ({ $, tempDir, relPath }) => {
72
+ if (!$) return false;
73
+ try {
74
+ const result = await $({ cwd: tempDir })`git ls-files --error-unmatch ${relPath} 2>/dev/null`;
75
+ return result.code === 0;
76
+ } catch {
77
+ return false;
78
+ }
79
+ };
80
+
81
+ /**
82
+ * Resolve the local git exclude file (`.git/info/exclude`), honoring worktrees
83
+ * via `git rev-parse --git-path`. Falls back to the conventional location.
84
+ */
85
+ const resolveExcludePath = async ({ $, tempDir }) => {
86
+ if ($) {
87
+ try {
88
+ const result = await $({ cwd: tempDir })`git rev-parse --git-path info/exclude 2>/dev/null`;
89
+ const rel = (result.stdout || '').toString().trim();
90
+ if (result.code === 0 && rel) {
91
+ return path.isAbsolute(rel) ? rel : path.join(tempDir, rel);
92
+ }
93
+ } catch {
94
+ // fall through to default
95
+ }
96
+ }
97
+ return path.join(tempDir, '.git', 'info', 'exclude');
98
+ };
99
+
100
+ /**
101
+ * Append the skill directories to `.git/info/exclude` (idempotent) so the
102
+ * deployed SKILL.md files (real dir and symlink alike) stay invisible to git.
103
+ * Entries are written WITHOUT a trailing slash so they match both a real
104
+ * directory and a directory symlink (git would not match a symlink against a
105
+ * `dir/` pattern).
106
+ */
107
+ const updateGitExclude = async ({ $, tempDir, log }) => {
108
+ const excludePath = await resolveExcludePath({ $, tempDir });
109
+ // Only touch the exclude file if its parent (.git/info) exists — i.e. this is
110
+ // a real git working dir. Avoid creating a stray `.git/` in non-git dirs.
111
+ try {
112
+ await fs.access(path.dirname(excludePath));
113
+ } catch {
114
+ await log(' Handoff skill: no .git/info directory; skipping git-exclude update', { verbose: true });
115
+ return false;
116
+ }
117
+
118
+ let existing = '';
119
+ try {
120
+ existing = await fs.readFile(excludePath, 'utf8');
121
+ } catch {
122
+ existing = '';
123
+ }
124
+
125
+ const entries = HANDOFF_SKILL_DIRS.map(dir => `/${dir.split(path.sep).join('/')}`);
126
+ const existingLines = existing.split(/\r?\n/);
127
+ const missing = entries.filter(entry => !existingLines.includes(entry));
128
+ if (missing.length === 0) return true;
129
+
130
+ const header = '# hive-mind --use-handoff: experimental HANDOFF.md Agent Skill (issue #1877)';
131
+ const prefix = existing.length > 0 && !existing.endsWith('\n') ? '\n' : '';
132
+ const block = `${prefix}${existing.includes(header) ? '' : header + '\n'}${missing.join('\n')}\n`;
133
+ await fs.writeFile(excludePath, existing + block, 'utf8');
134
+ return true;
135
+ };
136
+
137
+ /**
138
+ * Write the real SKILL.md into the primary skill directory.
139
+ */
140
+ const writeRealSkill = async ({ tempDir, content }) => {
141
+ const absDir = path.join(tempDir, HANDOFF_PRIMARY_SKILL_DIR);
142
+ await fs.mkdir(absDir, { recursive: true });
143
+ await fs.writeFile(path.join(absDir, SKILL_FILE), content, 'utf8');
144
+ return absDir;
145
+ };
146
+
147
+ /**
148
+ * Make `relLinkDir` resolve to the same files as the primary skill directory.
149
+ * Prefers a relative symlink (single source of truth); if symlinking is not
150
+ * supported, falls back to writing a real copy of the SKILL.md.
151
+ *
152
+ * @returns {Promise<'symlink'|'copy'>}
153
+ */
154
+ const linkOrCopySkill = async ({ tempDir, relLinkDir, primaryAbsDir, content }) => {
155
+ const absLinkDir = path.join(tempDir, relLinkDir);
156
+ const parent = path.dirname(absLinkDir);
157
+ await fs.mkdir(parent, { recursive: true });
158
+ const relTarget = path.relative(parent, primaryAbsDir);
159
+
160
+ // Reconcile any pre-existing entry (e.g. from a prior session re-deploy).
161
+ try {
162
+ const st = await fs.lstat(absLinkDir);
163
+ if (st.isSymbolicLink()) {
164
+ const current = await fs.readlink(absLinkDir);
165
+ if (current === relTarget) return 'symlink'; // already correct
166
+ await fs.rm(absLinkDir, { recursive: true, force: true });
167
+ } else if (st.isDirectory()) {
168
+ // A real directory is already there (prior copy fallback). Refresh the
169
+ // copy in place rather than replacing the directory.
170
+ await fs.writeFile(path.join(absLinkDir, SKILL_FILE), content, 'utf8');
171
+ return 'copy';
172
+ } else {
173
+ await fs.rm(absLinkDir, { force: true });
174
+ }
175
+ } catch {
176
+ // Nothing there yet — fall through and create it.
177
+ }
178
+
179
+ try {
180
+ await fs.symlink(relTarget, absLinkDir, 'dir');
181
+ return 'symlink';
182
+ } catch {
183
+ await fs.mkdir(absLinkDir, { recursive: true });
184
+ await fs.writeFile(path.join(absLinkDir, SKILL_FILE), content, 'utf8');
185
+ return 'copy';
186
+ }
187
+ };
188
+
189
+ /**
190
+ * Deploy the handoff SKILL.md into the session working directory.
191
+ *
192
+ * @param {Object} params
193
+ * @param {string} params.tempDir - The repo working directory.
194
+ * @param {Object} params.argv - Parsed CLI args (uses argv.useHandoff).
195
+ * @param {Function} [params.log] - Logger.
196
+ * @param {Function} [params.$] - Command runner (for git checks); optional.
197
+ * @returns {Promise<{deployed: boolean, reason?: string, paths: string[], shared: boolean}>}
198
+ */
199
+ export const deployHandoffSkill = async ({ tempDir, argv, log = noopLog, $ = null } = {}) => {
200
+ if (!argv || !argv.useHandoff) {
201
+ return { deployed: false, reason: 'disabled', paths: [], shared: false };
202
+ }
203
+ if (!tempDir) {
204
+ return { deployed: false, reason: 'no-temp-dir', paths: [], shared: false };
205
+ }
206
+
207
+ const content = buildHandoffSkillFile();
208
+ const written = [];
209
+ let allShared = true;
210
+
211
+ // 1. Write the single real SKILL.md (unless the repo tracks it itself).
212
+ const primaryRelFile = path.join(HANDOFF_PRIMARY_SKILL_DIR, SKILL_FILE);
213
+ let primaryAbsDir = path.join(tempDir, HANDOFF_PRIMARY_SKILL_DIR);
214
+ if (await isTracked({ $, tempDir, relPath: primaryRelFile })) {
215
+ await log(` Handoff skill: ${primaryRelFile} is tracked by the repo; leaving it untouched`, { verbose: true });
216
+ } else {
217
+ try {
218
+ primaryAbsDir = await writeRealSkill({ tempDir, content });
219
+ written.push(primaryRelFile);
220
+ } catch (error) {
221
+ await log(` Handoff skill: failed to deploy ${primaryRelFile}: ${error.message}`, { verbose: true });
222
+ return { deployed: false, reason: 'write-failed', paths: [], shared: false };
223
+ }
224
+ }
225
+
226
+ // 2. Point every other tool's skill dir at that same real directory.
227
+ for (const relLinkDir of HANDOFF_LINKED_SKILL_DIRS) {
228
+ const relFile = path.join(relLinkDir, SKILL_FILE);
229
+ if (await isTracked({ $, tempDir, relPath: relFile })) {
230
+ await log(` Handoff skill: ${relFile} is tracked by the repo; leaving it untouched`, { verbose: true });
231
+ continue;
232
+ }
233
+ try {
234
+ const mode = await linkOrCopySkill({ tempDir, relLinkDir, primaryAbsDir, content });
235
+ if (mode !== 'symlink') allShared = false;
236
+ written.push(relFile);
237
+ } catch (error) {
238
+ await log(` Handoff skill: failed to link ${relFile}: ${error.message}`, { verbose: true });
239
+ }
240
+ }
241
+
242
+ if (written.length > 0) {
243
+ await updateGitExclude({ $, tempDir, log });
244
+ const how = allShared ? 'one shared folder via symlink' : 'copied (symlink unsupported)';
245
+ await log(` Handoff skill deployed (--use-handoff, ${how}): ${written.join(', ')}`, { verbose: true });
246
+ }
247
+
248
+ return { deployed: written.length > 0, paths: written, shared: allShared };
249
+ };
250
+
251
+ export default {
252
+ HANDOFF_PRIMARY_SKILL_DIR,
253
+ HANDOFF_LINKED_SKILL_DIRS,
254
+ HANDOFF_SKILL_DIRS,
255
+ deployHandoffSkill,
256
+ };
@@ -0,0 +1,158 @@
1
+ /**
2
+ * HANDOFF.md support — Agent Skill (issue #1877)
3
+ *
4
+ * Instead of injecting a bespoke sub-prompt, this module ships a real
5
+ * **Agent Skill** (https://agentskills.io) — a `SKILL.md` document with YAML
6
+ * frontmatter — that teaches the AI tool to read and maintain a HANDOFF.md file
7
+ * in the repository root. The Agent Skills format is an open standard (created
8
+ * by Anthropic) that BOTH supported tools load natively:
9
+ * - Claude Code discovers project skills from `.claude/skills/<name>/SKILL.md`.
10
+ * - Codex discovers project skills from `.agents/skills/<name>/SKILL.md`.
11
+ * The exact same `SKILL.md` works for both, so "same skill, same way" is
12
+ * satisfied by a single canonical file rather than a tool-specific prompt.
13
+ * Because neither tool lets you redirect its skills folder, the deployment
14
+ * writes that file ONCE and symlinks the second tool's path to it, so both
15
+ * tools literally read the same folder (see handoff-skill.lib.mjs).
16
+ *
17
+ * The skill is deployed into the session working directory by
18
+ * `handoff-skill.lib.mjs` (gated behind the experimental --use-handoff flag).
19
+ * This module only builds the canonical text; the deployment module writes it.
20
+ *
21
+ * Goal: cross-session AND cross-tool continuity — a session driven by one tool
22
+ * (e.g. Claude) can be continued by another tool (e.g. Codex) inside the same
23
+ * pull request, because the HANDOFF.md state travels with the branch.
24
+ *
25
+ * Design rationale specific to hive-mind:
26
+ * - Each working session runs in an ephemeral temp working directory that is
27
+ * cloned fresh from the pull request branch. The ONLY state that persists
28
+ * between sessions (and between different tools) is what is committed to the
29
+ * branch. Therefore, unlike the general "disposable temp-dir handoff"
30
+ * convention, the handoff file here MUST be committed to the PR branch so
31
+ * the next session/tool can read it. We keep a single active HANDOFF.md per
32
+ * branch to avoid ambiguity.
33
+ * - The skill file itself (SKILL.md) is tool configuration, not project state,
34
+ * so it is re-deployed each session by hive-mind and is NOT committed to the
35
+ * target repository (see handoff-skill.lib.mjs).
36
+ */
37
+
38
+ /**
39
+ * The default handoff file name (repository root, relative path).
40
+ * @type {string}
41
+ */
42
+ export const HANDOFF_FILE_NAME = 'HANDOFF.md';
43
+
44
+ /**
45
+ * The skill directory / invocation name (Agent Skills standard).
46
+ * @type {string}
47
+ */
48
+ export const HANDOFF_SKILL_NAME = 'handoff';
49
+
50
+ /**
51
+ * The skill description used in the SKILL.md frontmatter. Front-loads the key
52
+ * use case and trigger words so the tool can match the skill implicitly.
53
+ * @type {string}
54
+ */
55
+ export const HANDOFF_SKILL_DESCRIPTION = "Maintain a HANDOFF.md continuity document in the repository root so any session can continue a previous session's work — even across different AI tools (Claude and Codex) in the same pull request. Use when starting, resuming, or finishing work on a long-running task, issue, or pull request.";
56
+
57
+ /**
58
+ * Build the canonical handoff skill instructions (the markdown body that follows
59
+ * the YAML frontmatter in SKILL.md). This is tool-agnostic and identical for
60
+ * Claude and Codex.
61
+ *
62
+ * @param {Object} [options]
63
+ * @param {string} [options.fileName=HANDOFF_FILE_NAME] - Handoff file name.
64
+ * @returns {string} The markdown instructions body.
65
+ */
66
+ export const buildHandoffSkillBody = ({ fileName = HANDOFF_FILE_NAME } = {}) => {
67
+ return `# HANDOFF.md continuity skill
68
+
69
+ ${fileName} is a single shared handoff document in the repository root that lets any session continue the work of any previous session, even when a different AI tool (for example Claude and Codex) is used. It travels with the pull request branch, so it is the cross-tool, cross-session memory for this PR.
70
+
71
+ ## When to use this skill
72
+
73
+ - When you start a working session, read ${fileName} first if it exists. Treat its "Next steps" section as your immediate starting point and honor the decisions and constraints it records before exploring anything else.
74
+ - When ${fileName} does not exist yet and the task is non-trivial, create it early so an interrupted session can always be resumed.
75
+ - When you make meaningful progress, update ${fileName} so it always reflects the current truth. Keep exactly one active ${fileName} per pull request branch (do not create per-session copies).
76
+ - When all requirements are fully met and the work is complete, record that completion at the top of ${fileName} (or delete the file) so the next session knows there is nothing left to continue.
77
+
78
+ ## How to write ${fileName}
79
+
80
+ - Keep it concise and tool-agnostic: describe state by referencing file paths, function names, branch, and commit SHAs rather than tool-specific commands, so the next tool (Claude or Codex) can act on it directly. Prefer pointers to existing artifacts over duplicating their content.
81
+ - Include these sections:
82
+ 1. **Task** — the issue/PR being solved and the goal.
83
+ 2. **Current state** — what is done and verified.
84
+ 3. **Decisions** — key choices made and why (so they are not re-litigated).
85
+ 4. **Next steps** — the concrete, ordered actions the next session should take.
86
+ 5. **Gotchas** — known pitfalls, failing checks, or constraints.
87
+ 6. **Critical files** — the important paths and what each is for.
88
+ - When you record next steps, make them specific and actionable (a path, a function, a command to run) instead of vague goals, and remove items as they are completed.
89
+
90
+ ## Committing and safety
91
+
92
+ - When you finish a step that changes the state, commit ${fileName} together with the related code changes so the handoff stays in sync with the branch and is never lost if the session is interrupted.
93
+ - Never include secrets, tokens, API keys, passwords, or personal data in ${fileName} — it is committed to the repository.`;
94
+ };
95
+
96
+ /**
97
+ * Build a complete SKILL.md document (Agent Skills standard): YAML frontmatter
98
+ * with `name` and `description`, followed by the instructions body. This exact
99
+ * file is deployed verbatim for both Claude (.claude/skills/handoff/SKILL.md)
100
+ * and Codex (.agents/skills/handoff/SKILL.md).
101
+ *
102
+ * @param {Object} [options]
103
+ * @param {string} [options.fileName=HANDOFF_FILE_NAME] - Handoff file name.
104
+ * @param {string} [options.name=HANDOFF_SKILL_NAME] - Skill name (frontmatter).
105
+ * @param {string} [options.description=HANDOFF_SKILL_DESCRIPTION] - Skill description.
106
+ * @returns {string} The full SKILL.md content.
107
+ */
108
+ export const buildHandoffSkillFile = ({ fileName = HANDOFF_FILE_NAME, name = HANDOFF_SKILL_NAME, description = HANDOFF_SKILL_DESCRIPTION } = {}) => {
109
+ return `---
110
+ name: ${name}
111
+ description: ${description}
112
+ ---
113
+
114
+ ${buildHandoffSkillBody({ fileName })}
115
+ `;
116
+ };
117
+
118
+ /**
119
+ * Build a minimal activation nudge for the system prompt. The full procedure
120
+ * lives in the deployed SKILL.md (loaded natively by the tool); this short
121
+ * pointer only ensures the read-at-session-start behavior reliably fires, since
122
+ * that is triggered by session lifecycle rather than by a task description.
123
+ *
124
+ * @param {Object} [options]
125
+ * @param {string} [options.fileName=HANDOFF_FILE_NAME] - Handoff file name.
126
+ * @param {string} [options.name=HANDOFF_SKILL_NAME] - Skill name.
127
+ * @returns {string} The activation nudge.
128
+ */
129
+ export const buildHandoffSubPrompt = ({ fileName = HANDOFF_FILE_NAME, name = HANDOFF_SKILL_NAME } = {}) => {
130
+ return `
131
+ HANDOFF.md continuity skill (experimental, --use-handoff).
132
+ - A reusable "${name}" Agent Skill is installed in this workspace (.claude/skills/${name}/ for Claude, .agents/skills/${name}/ for Codex). It defines how to read and maintain ${fileName} so any session can continue the work of a previous one — even across tools (Claude and Codex) in the same pull request.
133
+ - At the start of this session, use the ${name} skill: if ${fileName} exists in the repository root, read it first and continue from its "Next steps". Create or update ${fileName} as you make progress and commit it to the pull request branch.`;
134
+ };
135
+
136
+ /**
137
+ * Get the handoff skill activation nudge if enabled.
138
+ *
139
+ * @param {Object} argv - Parsed command line arguments.
140
+ * @returns {string} The sub-prompt content, or an empty string when disabled.
141
+ */
142
+ export const getHandoffSubPrompt = argv => {
143
+ if (argv && argv.useHandoff) {
144
+ return buildHandoffSubPrompt();
145
+ }
146
+ return '';
147
+ };
148
+
149
+ // Export all functions as default object too (mirrors architecture-care module)
150
+ export default {
151
+ HANDOFF_FILE_NAME,
152
+ HANDOFF_SKILL_NAME,
153
+ HANDOFF_SKILL_DESCRIPTION,
154
+ buildHandoffSkillBody,
155
+ buildHandoffSkillFile,
156
+ buildHandoffSubPrompt,
157
+ getHandoffSubPrompt,
158
+ };
@@ -189,6 +189,7 @@ const KNOWN_OPTION_NAMES = [
189
189
  'prompt-issue-reporting',
190
190
  'prompt-architecture-care',
191
191
  'prompt-case-studies',
192
+ 'use-handoff',
192
193
  'prompt-playwright-mcp',
193
194
  'prompt-check-sibling-pull-requests',
194
195
  'enable-workspaces',
@@ -221,6 +222,10 @@ const KNOWN_OPTION_NAMES = [
221
222
  'prompt-ensure-all-requirements-are-met',
222
223
  'finalize',
223
224
  'finalize-model',
225
+ 'keep-working-until-all-requirements-are-fully-done',
226
+ 'keep-going-until-all-requirements-are-fully-done',
227
+ 'keep-working',
228
+ 'keep-going',
224
229
  ];
225
230
 
226
231
  /**
@@ -480,6 +480,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
480
480
  description: 'Create comprehensive case study documentation for the issue including logs, analysis, timeline, root cause investigation, and proposed solutions. Organizes findings into ./docs/case-studies/issue-{id}/ directory. Supported for --tool claude and --tool codex.',
481
481
  default: false,
482
482
  },
483
+ 'use-handoff': {
484
+ type: 'boolean',
485
+ description: '[EXPERIMENTAL] Enable the HANDOFF.md continuity Agent Skill so a session can continue the work of a previous session — even when a different AI tool is used (e.g. Claude and Codex continuing each other in the same pull request). A real SKILL.md (the open Agent Skills standard) is deployed into the working directory so each tool loads it natively (.claude/skills/handoff/ for Claude, .agents/skills/handoff/ for Codex). The AI reads HANDOFF.md (repository root) first when present and keeps it updated with task, current state, decisions, next steps, gotchas, and critical files. HANDOFF.md is committed to the PR branch so it persists across the ephemeral per-session working directories; the SKILL.md itself is re-deployed each session and git-excluded so it never pollutes the PR. The same skill file is used identically for --tool claude and --tool codex. Disabled by default (issue #1877).',
486
+ default: false,
487
+ },
483
488
  'prompt-playwright-mcp': {
484
489
  type: 'boolean',
485
490
  description: 'Enable Playwright MCP browser automation hints in system prompt (enabled by default, only takes effect if Playwright MCP is installed). Use --no-prompt-playwright-mcp to disable. Supported for --tool claude, --tool codex, --tool opencode, --tool agent, --tool qwen, and --tool gemini.',
@@ -586,6 +591,12 @@ export const SOLVE_OPTION_DEFINITIONS = {
586
591
  description: '[EXPERIMENTAL] Model to use for --finalize iterations. Defaults to the same model as --model.',
587
592
  default: undefined,
588
593
  },
594
+ 'keep-working-until-all-requirements-are-fully-done': {
595
+ type: 'string',
596
+ description: '[EXPERIMENTAL] After the main solve completes, scan the pull request description, the AI solution summary and changed markdown documents for strong indicators of deferred/delayed/out-of-scope work (e.g. "future work", "out of scope", "deferred", "follow-up PR", "TODO") and automatically restart the AI tool to finish everything in this single pull request. Accepts a number of restarts (default: 5), or "forever"/"unlimited" to remove the limit. Bare flag means the default of 5.',
597
+ alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
598
+ default: undefined,
599
+ },
589
600
  'working-session-live-progress': {
590
601
  type: 'string',
591
602
  description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
@@ -836,6 +847,29 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
836
847
  }
837
848
  }
838
849
 
850
+ // --keep-working-until-all-requirements-are-fully-done normalization
851
+ // Issue #1883: the flag accepts a number of restarts, the keywords
852
+ // "forever"/"unlimited"/"infinite", or no value (bare flag => default of 5).
853
+ // We canonicalize the bare-flag / empty-string case here so downstream
854
+ // detection (normalizeKeepWorkingLimit) sees a meaningful value. Final
855
+ // numeric normalization happens at runtime in solve.keep-working.lib.mjs.
856
+ {
857
+ const keepWorkingAliases = ['--keep-working-until-all-requirements-are-fully-done', '--keep-going-until-all-requirements-are-fully-done', '--keep-working', '--keep-going'];
858
+ const keepWorkingProvided = keepWorkingAliases.some(alias => hasRawOption(rawArgs, alias));
859
+ if (keepWorkingProvided) {
860
+ const current = argv.keepWorkingUntilAllRequirementsAreFullyDone;
861
+ // Bare flag (no value) -> yargs may yield true or an empty string; treat as default count.
862
+ if (current === true || current === '' || current === undefined || current === null) {
863
+ argv.keepWorkingUntilAllRequirementsAreFullyDone = 5;
864
+ } else if (typeof current === 'string') {
865
+ argv.keepWorkingUntilAllRequirementsAreFullyDone = current.trim();
866
+ }
867
+ } else if (argv.keepWorkingUntilAllRequirementsAreFullyDone === undefined) {
868
+ // Not provided: keep it disabled (do not coerce the string-type default).
869
+ argv.keepWorkingUntilAllRequirementsAreFullyDone = undefined;
870
+ }
871
+ }
872
+
839
873
  // --working-session-live-progress normalization
840
874
  // When passed as --working-session-live-progress (no value), yargs gives true for string type
841
875
  // Normalize: true → "comment", validate known values
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Pure detection + normalization helpers for the keep-working feature.
5
+ *
6
+ * This module intentionally has NO use-m / command-stream / network imports so
7
+ * it can be unit-tested in isolation (mirroring auto-iteration-limits.lib.mjs).
8
+ * The orchestration lives in solve.keep-working.lib.mjs.
9
+ *
10
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
11
+ */
12
+
13
+ /**
14
+ * The default number of auto-restarts when the feature is enabled without an
15
+ * explicit count.
16
+ */
17
+ export const DEFAULT_KEEP_WORKING_LIMIT = 5;
18
+
19
+ /**
20
+ * The reinforcement prompt appended to every keep-working restart, in addition
21
+ * to the concrete detected reasons. Taken verbatim from issue #1883.
22
+ */
23
+ export const KEEP_WORKING_PROMPT = 'Please plan and execute everything in this single pull request, you have unlimited time and context, as context auto-compacts and you can continue indefinitely, until it is each and every requirement fully addressed, and everything is totally done.';
24
+
25
+ /**
26
+ * Strong indicators that work was deferred / delayed / left for a future pull
27
+ * request. These intentionally favour recall over precision: when the user
28
+ * enables --keep-working-until-all-requirements-are-fully-done they explicitly
29
+ * want the AI to keep going, so we accept some false positives (issue #1883).
30
+ *
31
+ * Each entry has a human-readable `label` (shown to the user / AI as the reason
32
+ * for the restart) and a `pattern` (a global, case-insensitive RegExp).
33
+ *
34
+ * IMPORTANT: keep these patterns anchored on deferral semantics so the
35
+ * reinforcement prompt itself ("until it is each and every requirement fully
36
+ * addressed") does NOT match and cause an infinite restart loop.
37
+ */
38
+ export const DEFERRED_WORK_PATTERNS = [
39
+ { label: 'out of scope', pattern: /\b(?:out[\s-]of[\s-]scope|beyond\s+the\s+scope|outside\s+the\s+scope|not\s+(?:in|within)\s+(?:the\s+)?scope)\b/gi },
40
+ { label: 'future work', pattern: /\bfuture\s+(?:work|improvements?|enhancements?|iterations?|steps?|considerations?)\b/gi },
41
+ { label: 'future / separate / follow-up pull request', pattern: /\b(?:in\s+a\s+|a\s+)?(?:future|separate|subsequent|later|next|follow[\s-]?up|another)\s+(?:pull\s+request|pr|mr|merge\s+request|change(?:set)?|commit)\b/gi },
42
+ { label: 'follow-up work', pattern: /\bfollow[\s-]?up(?:\s+(?:work|task|item|pr|pull\s+request|issue))?\b/gi },
43
+ { label: 'deferred', pattern: /\bdefer(?:red|ring|s)?\b(?!\s+to\s+the\s+caller)/gi },
44
+ { label: 'delayed / postponed', pattern: /\b(?:delayed|postponed|postpone|deprioriti[sz]ed)\b/gi },
45
+ { label: 'planned for later / another pull request', pattern: /\bplanned\s+for\s+(?:a\s+)?(?:future|later|the\s+next|another|separate|subsequent)\b/gi },
46
+ { label: 'left / leaving for later', pattern: /\ble(?:ft|aving|ave)\s+(?:it\s+|this\s+|that\s+|them\s+)?(?:for\s+(?:later|now|the\s+future)|as\s+(?:a\s+)?(?:future|follow[\s-]?up))/gi },
47
+ { label: 'will be addressed later / separately', pattern: /\b(?:will|to)\s+be\s+(?:addressed|handled|implemented|done|tackled|covered|completed|fixed)\s+(?:later|separately|in\s+(?:a\s+)?(?:future|subsequent|separate|follow[\s-]?up|another|the\s+next))/gi },
48
+ { label: 'not implemented yet', pattern: /\bnot\s+(?:yet\s+)?(?:implemented|done|completed|finished|addressed|supported|covered)(?:\s+yet)?\b/gi },
49
+ { label: 'to be implemented / TBD', pattern: /\b(?:to\s+be\s+(?:implemented|done|added|determined|decided)|tbd|to[\s-]?dos?|fixme)\b/gi },
50
+ { label: 'remaining work / not covered', pattern: /\b(?:remaining\s+(?:work|tasks?|items?)|not\s+covered\s+(?:here|in\s+this\s+(?:pr|pull\s+request|change))|won['’]?t\s+(?:be\s+)?(?:covered|implemented|addressed|done)(?:\s+here)?)\b/gi },
51
+ { label: 'tracked separately / in a separate issue', pattern: /\btrack(?:ed|ing)?\s+(?:this\s+|it\s+|them\s+|separately\s+)?(?:in\s+)?(?:a\s+)?(?:separate|new|future|follow[\s-]?up)\s+(?:issue|ticket|task)\b/gi },
52
+ { label: 'for now / as a stopgap / temporary', pattern: /\b(?:for\s+now|as\s+a\s+(?:stop[\s-]?gap|temporary\s+measure|first\s+step)|in\s+the\s+meantime)\b/gi },
53
+ ];
54
+
55
+ const UNLIMITED_KEYWORDS = new Set(['forever', 'unlimited', 'infinite', 'infinity', 'inf', 'no-limit', 'nolimit', 'none', 'always']);
56
+
57
+ /**
58
+ * Returns true when a raw flag value requests an unlimited number of restarts.
59
+ * @param {*} value
60
+ * @returns {boolean}
61
+ */
62
+ export const isUnlimitedKeepWorking = value => {
63
+ if (value === Infinity) return true;
64
+ if (typeof value === 'number') return value === 0;
65
+ if (typeof value === 'string') {
66
+ const normalized = value.trim().toLowerCase();
67
+ if (UNLIMITED_KEYWORDS.has(normalized)) return true;
68
+ if (normalized === '0') return true;
69
+ }
70
+ return false;
71
+ };
72
+
73
+ /**
74
+ * Normalize the --keep-working-until-all-requirements-are-fully-done flag value
75
+ * into a numeric restart limit.
76
+ *
77
+ * - boolean true (flag without value) -> DEFAULT_KEEP_WORKING_LIMIT (5)
78
+ * - "forever" / "unlimited" / "infinite" / "0" / 0 -> Infinity (no limit)
79
+ * - a positive number / numeric string -> floor(value)
80
+ * - anything invalid -> DEFAULT_KEEP_WORKING_LIMIT (5)
81
+ * - falsy (undefined / null / false / "") -> 0 (feature disabled)
82
+ *
83
+ * @param {*} value
84
+ * @param {number} [fallback=DEFAULT_KEEP_WORKING_LIMIT]
85
+ * @returns {number} numeric limit (Infinity for unlimited, 0 when disabled)
86
+ */
87
+ export const normalizeKeepWorkingLimit = (value, fallback = DEFAULT_KEEP_WORKING_LIMIT) => {
88
+ // Disabled
89
+ if (value === undefined || value === null || value === false || value === '') {
90
+ return 0;
91
+ }
92
+
93
+ // Flag provided without a value
94
+ if (value === true) return fallback;
95
+
96
+ // Unlimited keywords / 0
97
+ if (isUnlimitedKeepWorking(value)) return Infinity;
98
+
99
+ const parsed = Number(value);
100
+ if (!Number.isFinite(parsed) || parsed < 1) return fallback;
101
+
102
+ return Math.floor(parsed);
103
+ };
104
+
105
+ /**
106
+ * Human readable description of the limit for logs.
107
+ * @param {number} limit
108
+ * @returns {string}
109
+ */
110
+ export const formatKeepWorkingLimit = limit => (limit === Infinity ? 'unlimited' : `${limit}`);
111
+
112
+ /**
113
+ * Scan a single block of text for deferred-work indicators.
114
+ *
115
+ * @param {string} text - the text to scan
116
+ * @param {string} [source='text'] - a label describing where the text came from
117
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
118
+ */
119
+ export const detectDeferredWork = (text, source = 'text') => {
120
+ if (!text || typeof text !== 'string') return [];
121
+
122
+ const detections = [];
123
+ const seen = new Set();
124
+
125
+ for (const { label, pattern } of DEFERRED_WORK_PATTERNS) {
126
+ // Reset lastIndex because patterns are global and reused across calls.
127
+ pattern.lastIndex = 0;
128
+ let match;
129
+ while ((match = pattern.exec(text)) !== null) {
130
+ const matchedText = match[0];
131
+ // Build a short snippet around the match for context.
132
+ const start = Math.max(0, match.index - 40);
133
+ const end = Math.min(text.length, match.index + matchedText.length + 40);
134
+ const snippet = text.slice(start, end).replace(/\s+/g, ' ').trim();
135
+
136
+ // De-duplicate identical (label + snippet) hits within a single source.
137
+ const key = `${label}::${snippet.toLowerCase()}`;
138
+ if (!seen.has(key)) {
139
+ seen.add(key);
140
+ detections.push({ label, match: matchedText, snippet, source });
141
+ }
142
+
143
+ // Guard against zero-length matches causing an infinite loop.
144
+ if (pattern.lastIndex === match.index) pattern.lastIndex++;
145
+ }
146
+ pattern.lastIndex = 0;
147
+ }
148
+
149
+ return detections;
150
+ };
151
+
152
+ /**
153
+ * Run all configured sources through the detector and return a flat list of
154
+ * detections.
155
+ *
156
+ * @param {Array<{source: string, text: string}>} sources
157
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
158
+ */
159
+ export const detectDeferredWorkInSources = sources => {
160
+ const detections = [];
161
+ for (const { source, text } of sources || []) {
162
+ detections.push(...detectDeferredWork(text, source));
163
+ }
164
+ return detections;
165
+ };
166
+
167
+ /**
168
+ * Extract the added lines (lines beginning with "+") from a unified diff patch.
169
+ * @param {string} patch
170
+ * @returns {string}
171
+ */
172
+ export const extractAddedLinesFromPatch = patch => {
173
+ if (!patch || typeof patch !== 'string') return '';
174
+ return patch
175
+ .split('\n')
176
+ .filter(line => line.startsWith('+') && !line.startsWith('+++'))
177
+ .map(line => line.slice(1))
178
+ .join('\n');
179
+ };
180
+
181
+ /**
182
+ * Build the feedback lines for a keep-working restart iteration.
183
+ *
184
+ * @param {Array<{label, snippet, source}>} detections
185
+ * @param {number} iteration
186
+ * @param {number} limit
187
+ * @returns {string[]}
188
+ */
189
+ export const buildKeepWorkingFeedback = (detections, iteration, limit) => {
190
+ const limitLabel = formatKeepWorkingLimit(limit);
191
+ const lines = ['', '='.repeat(60), `🔁 KEEP WORKING UNTIL ALL REQUIREMENTS ARE FULLY DONE (restart ${iteration}/${limitLabel}):`, '='.repeat(60), '', 'It looks like some work was deferred, delayed or planned for a future pull request.', 'The following strong indicators of unfinished / deferred work were detected:', ''];
192
+
193
+ // Show up to 15 distinct detected reasons to keep the prompt focused.
194
+ const shown = (detections || []).slice(0, 15);
195
+ for (const detection of shown) {
196
+ lines.push(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
197
+ }
198
+ if ((detections || []).length > shown.length) {
199
+ lines.push(` • ...and ${detections.length - shown.length} more indicator(s)`);
200
+ }
201
+
202
+ lines.push('');
203
+ lines.push('There is NO future pull request. This is the single pull request where everything must be done.');
204
+ lines.push('Do not defer, delay or postpone anything. Remove any "future work" / "out of scope" / "TODO" / "follow-up" notes by actually implementing them now.');
205
+ lines.push('');
206
+ lines.push(KEEP_WORKING_PROMPT);
207
+ lines.push('');
208
+
209
+ return lines;
210
+ };
211
+
212
+ export default {
213
+ DEFAULT_KEEP_WORKING_LIMIT,
214
+ KEEP_WORKING_PROMPT,
215
+ DEFERRED_WORK_PATTERNS,
216
+ isUnlimitedKeepWorking,
217
+ normalizeKeepWorkingLimit,
218
+ formatKeepWorkingLimit,
219
+ detectDeferredWork,
220
+ detectDeferredWorkInSources,
221
+ extractAddedLinesFromPatch,
222
+ buildKeepWorkingFeedback,
223
+ };
@@ -0,0 +1,285 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Keep-working-until-done module for solve.mjs
5
+ *
6
+ * [EXPERIMENTAL] When --keep-working-until-all-requirements-are-fully-done is
7
+ * enabled, after the main solve (and any other post-processing) completes, this
8
+ * module scans the pull request description, the AI working-session/solution
9
+ * summary, and the markdown documents changed by the pull request for strong
10
+ * indicators that the AI deferred, delayed or postponed work to a future pull
11
+ * request / iteration (e.g. "out of scope", "future work", "deferred",
12
+ * "follow-up PR", "TODO", ...).
13
+ *
14
+ * When such indicators are found, it automatically restarts the AI tool with a
15
+ * prompt instructing it to finish everything in this single pull request, in
16
+ * addition to the concrete detected reasons. It keeps restarting until no
17
+ * indicators remain or until the configured restart limit is reached.
18
+ *
19
+ * By default the restart limit is 5. The limit can be set to a custom number,
20
+ * or to "forever" / "unlimited" / "infinite" / 0 to remove the limit entirely.
21
+ *
22
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
23
+ */
24
+
25
+ // Check if use is already defined globally (when imported from solve.mjs)
26
+ // If not, fetch it (when running standalone)
27
+ if (typeof globalThis.use === 'undefined') {
28
+ globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
29
+ }
30
+ const use = globalThis.use;
31
+
32
+ // Use command-stream for consistent $ behavior across runtimes
33
+ const { $: __rawDollar$ } = await use('command-stream');
34
+ const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
35
+ const $ = wrapDollarWithGhRetry(__rawDollar$);
36
+
37
+ // Import shared library functions
38
+ const lib = await import('./lib.mjs');
39
+ const { log, cleanErrorMessage } = lib;
40
+
41
+ // Import shared restart utilities
42
+ const restartShared = await import('./solve.restart-shared.lib.mjs');
43
+ const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
44
+
45
+ const sentryLib = await import('./sentry.lib.mjs');
46
+ const { reportError } = sentryLib;
47
+
48
+ // Pure detection + normalization helpers live in a separate, network-free
49
+ // module so they can be unit-tested in isolation (issue #1883).
50
+ const detectLib = await import('./solve.keep-working.detect.lib.mjs');
51
+ const { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, extractAddedLinesFromPatch, buildKeepWorkingFeedback } = detectLib;
52
+
53
+ // Re-export the pure helpers so existing importers of this module keep working.
54
+ export { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, buildKeepWorkingFeedback };
55
+
56
+ /**
57
+ * Collect the text sources to scan for deferred-work indicators:
58
+ * 1. The pull request description (body).
59
+ * 2. The AI working-session / solution summary (passed in-memory).
60
+ * 3. The markdown documents changed by the pull request (added lines only).
61
+ *
62
+ * @param {object} params
63
+ * @returns {Promise<Array<{source: string, text: string}>>}
64
+ */
65
+ export const collectDeferredWorkSources = async ({ owner, repo, prNumber, resultSummary }) => {
66
+ const sources = [];
67
+
68
+ // 1. Pull request description
69
+ try {
70
+ const prResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.body // ""'`;
71
+ if (prResult.code === 0) {
72
+ const body = prResult.stdout.toString();
73
+ if (body && body.trim()) {
74
+ sources.push({ source: 'pull request description', text: body });
75
+ }
76
+ }
77
+ } catch (error) {
78
+ reportError(error, { context: 'keep_working_collect_pr_body', owner, repo, prNumber, operation: 'fetch_pr_body' });
79
+ }
80
+
81
+ // 2. AI working-session / solution summary (in-memory, no token cost)
82
+ if (resultSummary && typeof resultSummary === 'string' && resultSummary.trim()) {
83
+ sources.push({ source: 'AI solution summary', text: resultSummary });
84
+ }
85
+
86
+ // 3. Changed markdown documents (scan only added lines from the diff)
87
+ try {
88
+ const filesResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber}/files --paginate`;
89
+ if (filesResult.code === 0) {
90
+ const files = JSON.parse(filesResult.stdout.toString() || '[]');
91
+ for (const file of files) {
92
+ const filename = file.filename || '';
93
+ if (!/\.(md|markdown|mdx)$/i.test(filename)) continue;
94
+ if (file.status === 'removed') continue;
95
+ const addedText = extractAddedLinesFromPatch(file.patch);
96
+ if (addedText && addedText.trim()) {
97
+ sources.push({ source: `changed markdown document ${filename}`, text: addedText });
98
+ }
99
+ }
100
+ }
101
+ } catch (error) {
102
+ reportError(error, { context: 'keep_working_collect_md_files', owner, repo, prNumber, operation: 'fetch_pr_files' });
103
+ }
104
+
105
+ return sources;
106
+ };
107
+
108
+ /**
109
+ * Runs keep-working restart iterations after the main solve.
110
+ *
111
+ * @param {object} params
112
+ * @param {string} params.issueUrl
113
+ * @param {string} params.owner
114
+ * @param {string} params.repo
115
+ * @param {string|number} params.issueNumber
116
+ * @param {string|number} params.prNumber
117
+ * @param {string} params.branchName
118
+ * @param {string} params.tempDir
119
+ * @param {string} [params.workspaceTmpDir]
120
+ * @param {object} params.argv - CLI arguments
121
+ * @param {function} params.cleanupClaudeFile - cleanup function
122
+ * @param {string} [params.resultSummary] - AI solution summary from the last session
123
+ * @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
124
+ */
125
+ export const runKeepWorkingUntilDone = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
126
+ const limit = normalizeKeepWorkingLimit(argv.keepWorkingUntilAllRequirementsAreFullyDone);
127
+ if (!limit || !prNumber) {
128
+ return null;
129
+ }
130
+
131
+ await log('');
132
+ await log(`🔁 KEEP-WORKING: Scanning for deferred / delayed / out-of-scope work (limit: ${formatKeepWorkingLimit(limit)} restart(s))`);
133
+ await log(' Sources: pull request description, AI solution summary, changed markdown documents');
134
+ await log('');
135
+
136
+ // Get PR merge state status for the iterations
137
+ let currentMergeStateStatus = null;
138
+ try {
139
+ const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
140
+ if (prStateResult.code === 0) {
141
+ currentMergeStateStatus = prStateResult.stdout.toString().trim();
142
+ }
143
+ } catch {
144
+ // Ignore errors getting merge state
145
+ }
146
+
147
+ let sessionId;
148
+ let anthropicTotalCostUSD;
149
+ let publicPricingEstimate;
150
+ let pricingInfo;
151
+ let lastResultSummary = resultSummary;
152
+ let consecutiveErrors = 0;
153
+ // Hard safety cap even in "unlimited" mode, to avoid spinning forever on
154
+ // repeated failures (issue #1883: "limit it with 5 auto-restarts ... in case
155
+ // of errors"). Only consecutive errors count toward this cap.
156
+ const MAX_CONSECUTIVE_ERRORS = 3;
157
+
158
+ let iteration = 0;
159
+ while (true) {
160
+ // Gather and scan sources fresh on every iteration.
161
+ let sources = [];
162
+ try {
163
+ sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
164
+ } catch (error) {
165
+ reportError(error, { context: 'keep_working_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
166
+ await log(`⚠️ KEEP-WORKING: Could not collect sources: ${cleanErrorMessage(error)}`, { level: 'warning' });
167
+ break;
168
+ }
169
+
170
+ const detections = detectDeferredWorkInSources(sources);
171
+
172
+ if (detections.length === 0) {
173
+ if (iteration === 0) {
174
+ await log('✅ KEEP-WORKING: No deferred / delayed / out-of-scope work detected. Nothing to restart for.');
175
+ } else {
176
+ await log(`✅ KEEP-WORKING: No more deferred work detected after ${iteration} restart(s). All requirements appear to be fully done.`);
177
+ }
178
+ break;
179
+ }
180
+
181
+ if (iteration >= limit) {
182
+ await log(`🛑 KEEP-WORKING: Reached restart limit (${formatKeepWorkingLimit(limit)}) but ${detections.length} deferred-work indicator(s) still detected.`);
183
+ await log(' Stopping to avoid an unbounded loop. Increase the limit (or use "forever"/"unlimited") to keep going.');
184
+ for (const detection of detections.slice(0, 10)) {
185
+ await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
186
+ }
187
+ break;
188
+ }
189
+
190
+ iteration++;
191
+ await log('');
192
+ await log(`🔁 KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)}: ${detections.length} deferred-work indicator(s) detected, restarting...`);
193
+ for (const detection of detections.slice(0, 10)) {
194
+ await log(` • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
195
+ }
196
+
197
+ // Issue #1572 pattern: sync local branch with remote before each iteration
198
+ try {
199
+ const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
200
+ if (pullResult.code === 0) {
201
+ await log(` Synced local branch ${branchName} from remote`, { verbose: true });
202
+ } else {
203
+ await log(` Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
204
+ }
205
+ } catch (error) {
206
+ reportError(error, { context: 'keep_working_git_pull', branchName, operation: 'git_pull' });
207
+ await log(` Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
208
+ }
209
+
210
+ const feedbackLines = buildKeepWorkingFeedback(detections, iteration, limit);
211
+
212
+ const iterationResult = await executeToolIteration({
213
+ issueUrl,
214
+ owner,
215
+ repo,
216
+ issueNumber,
217
+ prNumber,
218
+ branchName,
219
+ tempDir,
220
+ workspaceTmpDir,
221
+ mergeStateStatus: currentMergeStateStatus,
222
+ feedbackLines,
223
+ argv: {
224
+ ...argv,
225
+ // Reinforce the "finish everything now" guidance in the system prompt.
226
+ promptEnsureAllRequirementsAreMet: true,
227
+ // Prevent recursive keep-working inside the restart iteration.
228
+ keepWorkingUntilAllRequirementsAreFullyDone: 0,
229
+ },
230
+ });
231
+
232
+ // Update session data from the restart.
233
+ if (iterationResult) {
234
+ if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
235
+ if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
236
+ if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
237
+ if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
238
+ if (iterationResult.result) lastResultSummary = iterationResult.result;
239
+ }
240
+
241
+ // Issue #1883: cap consecutive errors so we don't spin forever (especially
242
+ // important in "unlimited" mode).
243
+ if (isUsageLimitReached(iterationResult)) {
244
+ await log('🛑 KEEP-WORKING: Usage limit reached during restart. Stopping keep-working loop.');
245
+ break;
246
+ }
247
+ if (isApiError(iterationResult)) {
248
+ consecutiveErrors++;
249
+ await log(`⚠️ KEEP-WORKING: API error during restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
250
+ if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
251
+ await log('🛑 KEEP-WORKING: Too many consecutive errors. Stopping keep-working loop.');
252
+ break;
253
+ }
254
+ } else {
255
+ consecutiveErrors = 0;
256
+ }
257
+
258
+ await log(`✅ KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)} complete`);
259
+ await log('');
260
+ }
261
+
262
+ // Clean up CLAUDE.md/.gitkeep after restarts
263
+ try {
264
+ await cleanupClaudeFile(tempDir, branchName, null, argv);
265
+ } catch (error) {
266
+ reportError(error, { context: 'keep_working_cleanup', branchName, operation: 'cleanup_claude_file' });
267
+ }
268
+
269
+ if (iteration === 0) return null;
270
+ return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
271
+ };
272
+
273
+ export default {
274
+ DEFAULT_KEEP_WORKING_LIMIT,
275
+ KEEP_WORKING_PROMPT,
276
+ DEFERRED_WORK_PATTERNS,
277
+ isUnlimitedKeepWorking,
278
+ normalizeKeepWorkingLimit,
279
+ formatKeepWorkingLimit,
280
+ detectDeferredWork,
281
+ detectDeferredWorkInSources,
282
+ collectDeferredWorkSources,
283
+ buildKeepWorkingFeedback,
284
+ runKeepWorkingUntilDone,
285
+ };
package/src/solve.mjs CHANGED
@@ -45,6 +45,7 @@ const watchLib = await import('./solve.watch.lib.mjs');
45
45
  const { startWatchMode } = watchLib;
46
46
  const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
47
47
  const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
48
+ const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
48
49
  const exitHandler = await import('./exit-handler.lib.mjs');
49
50
  const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
50
51
  const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
@@ -852,6 +853,14 @@ try {
852
853
  let resultModelUsage = toolResult.resultModelUsage || null;
853
854
  let streamTokenUsage = toolResult.streamTokenUsage || null;
854
855
  let subAgentCalls = toolResult.subAgentCalls || null; // Issue #1590
856
+
857
+ const applyRestartResult = result => {
858
+ if (!result) return;
859
+ sessionId = result.sessionId || sessionId;
860
+ anthropicTotalCostUSD = result.anthropicTotalCostUSD || anthropicTotalCostUSD;
861
+ publicPricingEstimate = result.publicPricingEstimate || publicPricingEstimate;
862
+ pricingInfo = result.pricingInfo || pricingInfo;
863
+ };
855
864
  limitReached = toolResult.limitReached;
856
865
  cleanupContext.limitReached = limitReached;
857
866
 
@@ -1249,12 +1258,7 @@ try {
1249
1258
  });
1250
1259
 
1251
1260
  // Update session data from restart
1252
- if (restartResult) {
1253
- if (restartResult.sessionId) sessionId = restartResult.sessionId;
1254
- if (restartResult.anthropicTotalCostUSD) anthropicTotalCostUSD = restartResult.anthropicTotalCostUSD;
1255
- if (restartResult.publicPricingEstimate) publicPricingEstimate = restartResult.publicPricingEstimate;
1256
- if (restartResult.pricingInfo) pricingInfo = restartResult.pricingInfo;
1257
- }
1261
+ applyRestartResult(restartResult);
1258
1262
 
1259
1263
  // Clean up CLAUDE.md/.gitkeep again after restart
1260
1264
  await cleanupClaudeFile(tempDir, branchName, null, argv);
@@ -1268,13 +1272,9 @@ try {
1268
1272
  }
1269
1273
 
1270
1274
  // Issue #1383: --finalize
1271
- const autoEnsureResult = await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile });
1272
- if (autoEnsureResult) {
1273
- if (autoEnsureResult.sessionId) sessionId = autoEnsureResult.sessionId;
1274
- if (autoEnsureResult.anthropicTotalCostUSD) anthropicTotalCostUSD = autoEnsureResult.anthropicTotalCostUSD;
1275
- if (autoEnsureResult.publicPricingEstimate) publicPricingEstimate = autoEnsureResult.publicPricingEstimate;
1276
- if (autoEnsureResult.pricingInfo) pricingInfo = autoEnsureResult.pricingInfo;
1277
- }
1275
+ applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
1276
+ // Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
1277
+ applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
1278
1278
 
1279
1279
  // Start watch mode if enabled OR if we need to handle uncommitted changes
1280
1280
  if (argv.verbose) {