npm - @link-assistant/hive-mind - Versions diffs - 1.74.12 → 1.76.0 - Mend

@link-assistant/hive-mind 1.74.12 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +50 -0
package/package.json +1 -1
package/src/claude.lib.mjs +5 -0
package/src/claude.prompts.lib.mjs +2 -1
package/src/codex.lib.mjs +5 -0
package/src/codex.prompts.lib.mjs +2 -1
package/src/handoff-skill.lib.mjs +256 -0
package/src/handoff.prompts.lib.mjs +158 -0
package/src/option-suggestions.lib.mjs +5 -0
package/src/solve.config.lib.mjs +34 -0
package/src/solve.keep-working.detect.lib.mjs +223 -0
package/src/solve.keep-working.lib.mjs +285 -0
package/src/solve.mjs +13 -13

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,55 @@
 # @link-assistant/hive-mind
+## 1.76.0
+### Minor Changes
+- 80c56fa: Add experimental `--use-handoff` HANDOFF.md continuity **Agent Skill** (issue
+  #1877). When enabled, Hive Mind deploys a real `SKILL.md` (the Agent Skills open
+  standard created by Anthropic) into the session working directory for both tools
+  natively — `.claude/skills/handoff/SKILL.md` for `--tool claude` and
+  `.agents/skills/handoff/SKILL.md` for `--tool codex` — so the very same skill
+  teaches each tool to read `HANDOFF.md` (repository root) first when present and
+  keep it updated with task, current state, decisions, next steps, gotchas, and
+  critical files. A minimal activation nudge in the system prompt ensures the
+  read-at-session-start behavior fires reliably. Because each Hive Mind working
+  session runs in an ephemeral working directory cloned from the PR branch, the
+  handoff file is committed to the branch — making it the shared cross-session,
+  cross-tool memory so Claude and Codex can continue each other's work in a single
+  pull request. The deployed `SKILL.md` is tooling (re-deployed every session) and
+  is kept out of the target repository via `.git/info/exclude`, so it never appears
+  in the PR. Disabled by default; auto-forwarded by `hive`. Includes a case study
+  in `docs/case-studies/issue-1877/` and tests in `tests/handoff-prompt.test.mjs`.
+## 1.75.0
+### Minor Changes
+- d2adf6b: feat(solve): experimental `--keep-working-until-all-requirements-are-fully-done` (#1883)
+  Add an experimental `solve` option that, after the main run (and any `--finalize`
+  pass), scans three cheap sources — the pull request description, the AI solution
+  summary, and the added lines of changed markdown documents — for strong
+  indicators of deferred work ("out of scope", "future work", "follow-up PR",
+  "deferred", "delayed", "TODO"/"TBD", etc.) using ~14 regular expressions. When
+  indicators are found it auto-restarts the AI tool with the concrete detected
+  reasons plus a verbatim reinforcement prompt, and repeats until the scan is clean
+  or the restart limit is reached.
+  Limit semantics:
+  - `--keep-working-until-all-requirements-are-fully-done` (bare) → 5 restarts
+  - `... 3` → an explicit count
+  - `... forever` / `unlimited` / `infinite` / `0` → no limit (with a hard cap of 3
+    consecutive errors as a safety net)
+  Aliases: `--keep-going-until-all-requirements-are-fully-done`, `--keep-working`,
+  `--keep-going`.
+  Detection lives in a pure, network-free module
+  (`src/solve.keep-working.detect.lib.mjs`) for full unit-test coverage;
+  orchestration lives in `src/solve.keep-working.lib.mjs`. A deep case study is
+  compiled under `docs/case-studies/issue-1883/`.
 ## 1.74.12
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.74.12",
+  "version": "1.76.0",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/claude.lib.mjs CHANGED Viewed

@@ -28,6 +28,7 @@ import { fetchModelInfo } from './model-info.lib.mjs';
 import { classifyRetryableError, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 import { resolveSubSessionSize } from './sub-session-size.lib.mjs'; // Issue #1706
 import { withAgentsMdAsClaudeMd } from './agents-md-claude-support.lib.mjs';
+import { deployHandoffSkill } from './handoff-skill.lib.mjs'; // Issue #1877
 import { createThinkingBlockRecovery } from './claude.thinking-block-recovery.lib.mjs'; // Issue #1834 (PR #1835 feedback)
 export { availableModels, fetchModelInfo }; // Re-export for backward compatibility
 const showResumeCommand = async (sessionId, tempDir, claudePath, model, log, argv = null) => {
@@ -353,6 +354,10 @@ export const executeClaude = async params => {
   const escapedPrompt = prompt.replace(/"/g, '\\"').replace(/\$/g, '\\$');
   const escapedSystemPrompt = systemPrompt.replace(/"/g, '\\"').replace(/\$/g, '\\$');
+  // Issue #1877: deploy the experimental HANDOFF.md Agent Skill so Claude loads
+  // it natively from .claude/skills/handoff/SKILL.md (no-op unless --use-handoff).
+  await deployHandoffSkill({ tempDir, argv, log, $ });
   return await withAgentsMdAsClaudeMd({ tempDir, branchName, argv, prompt, fs, path, $, log, formatAligned }, () =>
     executeClaudeCommand({
       tempDir,

package/src/claude.prompts.lib.mjs CHANGED Viewed

@@ -4,6 +4,7 @@
  */
 import { getArchitectureCareSubPrompt } from './architecture-care.prompts.lib.mjs';
+import { getHandoffSubPrompt } from './handoff.prompts.lib.mjs';
 import { getExperimentsExamplesSubPrompt } from './experiments-examples.prompts.lib.mjs';
 import { primaryModelNames } from './models/index.mjs';
 import { getThinkingPromptInstruction } from './thinking-prompt.lib.mjs';
@@ -338,7 +339,7 @@ Visual UI work and screenshots.
    - When the fix is visual, include side-by-side or sequential comparison of before/after states in the PR description.
    - When possible, create automated visual regression tests to prevent the UI bug from recurring.`
        : ''
-   }${ciExamples}${getArchitectureCareSubPrompt(argv)}${buildWorkLanguageDirective()}`;
+   }${ciExamples}${getArchitectureCareSubPrompt(argv)}${getHandoffSubPrompt(argv)}${buildWorkLanguageDirective()}`;
 };
 // Export all functions as default object too

package/src/codex.lib.mjs CHANGED Viewed

@@ -29,6 +29,7 @@ import { defaultModels } from './models/index.mjs';
 import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
 import { getCumulativeContextInputTokens } from './context-fill.lib.mjs';
+import { deployHandoffSkill } from './handoff-skill.lib.mjs'; // Issue #1877
 import Decimal from 'decimal.js-light';
 const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -661,6 +662,10 @@ export const executeCodex = async params => {
     }
   }
+  // Issue #1877: deploy the experimental HANDOFF.md Agent Skill so Codex loads
+  // it natively from .agents/skills/handoff/SKILL.md (no-op unless --use-handoff).
+  await deployHandoffSkill({ tempDir, argv, log, $ });
   // Execute the Codex command
   return await executeCodexCommand({
     tempDir,

package/src/codex.prompts.lib.mjs CHANGED Viewed

@@ -4,6 +4,7 @@
  */
 import { getArchitectureCareSubPrompt } from './architecture-care.prompts.lib.mjs';
+import { getHandoffSubPrompt } from './handoff.prompts.lib.mjs';
 import { getExperimentsExamplesSubPrompt } from './experiments-examples.prompts.lib.mjs';
 import { getThinkingPromptInstruction } from './thinking-prompt.lib.mjs';
 import { buildWorkLanguageDirective } from './work-language.prompts.lib.mjs';
@@ -306,7 +307,7 @@ Visual UI work and screenshots.
    - When the fix is visual, include side-by-side or sequential comparison of before/after states in the PR description.
    - When possible, create automated visual regression tests to prevent the UI bug from recurring.`
        : ''
-   }${ciExamples}${getArchitectureCareSubPrompt(argv)}${buildWorkLanguageDirective()}`;
+   }${ciExamples}${getArchitectureCareSubPrompt(argv)}${getHandoffSubPrompt(argv)}${buildWorkLanguageDirective()}`;
 };
 // Export all functions as default object too

package/src/handoff-skill.lib.mjs ADDED Viewed

@@ -0,0 +1,256 @@
+/**
+ * HANDOFF.md Agent Skill deployment (issue #1877)
+ *
+ * Writes the canonical handoff `SKILL.md` (built by handoff.prompts.lib.mjs)
+ * into the session working directory so the AI tool loads it natively as an
+ * Agent Skill, instead of relying on an injected prompt.
+ *
+ * Both supported tools read the Agent Skills standard, but from different
+ * hardcoded project directories (neither tool exposes a setting or env var to
+ * point at a custom/shared folder):
+ *   - Claude Code:  .claude/skills/<name>/SKILL.md
+ *   - Codex:        .agents/skills/<name>/SKILL.md
+ *
+ * To answer "can both CLIs use the SAME folder?": there is no native shared
+ * location, so we make one ourselves. The SKILL.md is written exactly ONCE into
+ * a single real directory (the Claude path, `.claude/skills/handoff/`), and the
+ * Codex path (`.agents/skills/handoff`) is a relative **symlink** pointing at
+ * that one real directory. Both tools therefore read byte-for-byte the same
+ * file from a single source of truth on disk — not two copies that could drift.
+ * If the filesystem cannot create a symlink (e.g. Windows without privilege),
+ * we fall back to writing a real second copy so the feature still works.
+ *
+ * The deployed skill is tool configuration, not project state, so it is:
+ *   - re-deployed every session by hive-mind (each session clones fresh), and
+ *   - excluded from git via `.git/info/exclude` (a local, never-committed
+ *     ignore) so it never pollutes the pull request or the "uncommitted
+ *     changes" checks. Only the HANDOFF.md the tool produces is committed.
+ */
+// Fetch use-m if not available (matches the rest of src/*.lib.mjs).
+if (typeof globalThis.use === 'undefined') {
+  globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
+}
+const fs = (await use('fs')).promises;
+const path = (await use('path')).default;
+import { buildHandoffSkillFile, HANDOFF_SKILL_NAME } from './handoff.prompts.lib.mjs';
+const noopLog = async () => {};
+const SKILL_FILE = 'SKILL.md';
+/**
+ * The single real skill directory the SKILL.md is written into. Claude Code
+ * reads it directly; Codex reaches the same files through a symlink (below).
+ * @type {string}
+ */
+export const HANDOFF_PRIMARY_SKILL_DIR = path.join('.claude', 'skills', HANDOFF_SKILL_NAME);
+/**
+ * Additional skill directories that should resolve to the same SKILL.md. Each
+ * is created as a symlink to HANDOFF_PRIMARY_SKILL_DIR (one source of truth),
+ * falling back to a real copy only if symlinking is unsupported.
+ * @type {string[]}
+ */
+export const HANDOFF_LINKED_SKILL_DIRS = Object.freeze([
+  path.join('.agents', 'skills', HANDOFF_SKILL_NAME), // Codex
+]);
+/**
+ * All skill directories the deployment touches (primary + links). Kept for the
+ * git-exclude bookkeeping and for callers/tests that enumerate every location.
+ * @type {string[]}
+ */
+export const HANDOFF_SKILL_DIRS = Object.freeze([HANDOFF_PRIMARY_SKILL_DIR, ...HANDOFF_LINKED_SKILL_DIRS]);
+/**
+ * Determine whether a path is already tracked by git in the working dir. We
+ * never clobber a file/dir the target repository tracks itself.
+ */
+const isTracked = async ({ $, tempDir, relPath }) => {
+  if (!$) return false;
+  try {
+    const result = await $({ cwd: tempDir })`git ls-files --error-unmatch ${relPath} 2>/dev/null`;
+    return result.code === 0;
+  } catch {
+    return false;
+  }
+};
+/**
+ * Resolve the local git exclude file (`.git/info/exclude`), honoring worktrees
+ * via `git rev-parse --git-path`. Falls back to the conventional location.
+ */
+const resolveExcludePath = async ({ $, tempDir }) => {
+  if ($) {
+    try {
+      const result = await $({ cwd: tempDir })`git rev-parse --git-path info/exclude 2>/dev/null`;
+      const rel = (result.stdout || '').toString().trim();
+      if (result.code === 0 && rel) {
+        return path.isAbsolute(rel) ? rel : path.join(tempDir, rel);
+      }
+    } catch {
+      // fall through to default
+    }
+  }
+  return path.join(tempDir, '.git', 'info', 'exclude');
+};
+/**
+ * Append the skill directories to `.git/info/exclude` (idempotent) so the
+ * deployed SKILL.md files (real dir and symlink alike) stay invisible to git.
+ * Entries are written WITHOUT a trailing slash so they match both a real
+ * directory and a directory symlink (git would not match a symlink against a
+ * `dir/` pattern).
+ */
+const updateGitExclude = async ({ $, tempDir, log }) => {
+  const excludePath = await resolveExcludePath({ $, tempDir });
+  // Only touch the exclude file if its parent (.git/info) exists — i.e. this is
+  // a real git working dir. Avoid creating a stray `.git/` in non-git dirs.
+  try {
+    await fs.access(path.dirname(excludePath));
+  } catch {
+    await log('   Handoff skill: no .git/info directory; skipping git-exclude update', { verbose: true });
+    return false;
+  }
+  let existing = '';
+  try {
+    existing = await fs.readFile(excludePath, 'utf8');
+  } catch {
+    existing = '';
+  }
+  const entries = HANDOFF_SKILL_DIRS.map(dir => `/${dir.split(path.sep).join('/')}`);
+  const existingLines = existing.split(/\r?\n/);
+  const missing = entries.filter(entry => !existingLines.includes(entry));
+  if (missing.length === 0) return true;
+  const header = '# hive-mind --use-handoff: experimental HANDOFF.md Agent Skill (issue #1877)';
+  const prefix = existing.length > 0 && !existing.endsWith('\n') ? '\n' : '';
+  const block = `${prefix}${existing.includes(header) ? '' : header + '\n'}${missing.join('\n')}\n`;
+  await fs.writeFile(excludePath, existing + block, 'utf8');
+  return true;
+};
+/**
+ * Write the real SKILL.md into the primary skill directory.
+ */
+const writeRealSkill = async ({ tempDir, content }) => {
+  const absDir = path.join(tempDir, HANDOFF_PRIMARY_SKILL_DIR);
+  await fs.mkdir(absDir, { recursive: true });
+  await fs.writeFile(path.join(absDir, SKILL_FILE), content, 'utf8');
+  return absDir;
+};
+/**
+ * Make `relLinkDir` resolve to the same files as the primary skill directory.
+ * Prefers a relative symlink (single source of truth); if symlinking is not
+ * supported, falls back to writing a real copy of the SKILL.md.
+ *
+ * @returns {Promise<'symlink'|'copy'>}
+ */
+const linkOrCopySkill = async ({ tempDir, relLinkDir, primaryAbsDir, content }) => {
+  const absLinkDir = path.join(tempDir, relLinkDir);
+  const parent = path.dirname(absLinkDir);
+  await fs.mkdir(parent, { recursive: true });
+  const relTarget = path.relative(parent, primaryAbsDir);
+  // Reconcile any pre-existing entry (e.g. from a prior session re-deploy).
+  try {
+    const st = await fs.lstat(absLinkDir);
+    if (st.isSymbolicLink()) {
+      const current = await fs.readlink(absLinkDir);
+      if (current === relTarget) return 'symlink'; // already correct
+      await fs.rm(absLinkDir, { recursive: true, force: true });
+    } else if (st.isDirectory()) {
+      // A real directory is already there (prior copy fallback). Refresh the
+      // copy in place rather than replacing the directory.
+      await fs.writeFile(path.join(absLinkDir, SKILL_FILE), content, 'utf8');
+      return 'copy';
+    } else {
+      await fs.rm(absLinkDir, { force: true });
+    }
+  } catch {
+    // Nothing there yet — fall through and create it.
+  }
+  try {
+    await fs.symlink(relTarget, absLinkDir, 'dir');
+    return 'symlink';
+  } catch {
+    await fs.mkdir(absLinkDir, { recursive: true });
+    await fs.writeFile(path.join(absLinkDir, SKILL_FILE), content, 'utf8');
+    return 'copy';
+  }
+};
+/**
+ * Deploy the handoff SKILL.md into the session working directory.
+ *
+ * @param {Object} params
+ * @param {string} params.tempDir - The repo working directory.
+ * @param {Object} params.argv - Parsed CLI args (uses argv.useHandoff).
+ * @param {Function} [params.log] - Logger.
+ * @param {Function} [params.$] - Command runner (for git checks); optional.
+ * @returns {Promise<{deployed: boolean, reason?: string, paths: string[], shared: boolean}>}
+ */
+export const deployHandoffSkill = async ({ tempDir, argv, log = noopLog, $ = null } = {}) => {
+  if (!argv || !argv.useHandoff) {
+    return { deployed: false, reason: 'disabled', paths: [], shared: false };
+  }
+  if (!tempDir) {
+    return { deployed: false, reason: 'no-temp-dir', paths: [], shared: false };
+  }
+  const content = buildHandoffSkillFile();
+  const written = [];
+  let allShared = true;
+  // 1. Write the single real SKILL.md (unless the repo tracks it itself).
+  const primaryRelFile = path.join(HANDOFF_PRIMARY_SKILL_DIR, SKILL_FILE);
+  let primaryAbsDir = path.join(tempDir, HANDOFF_PRIMARY_SKILL_DIR);
+  if (await isTracked({ $, tempDir, relPath: primaryRelFile })) {
+    await log(`   Handoff skill: ${primaryRelFile} is tracked by the repo; leaving it untouched`, { verbose: true });
+  } else {
+    try {
+      primaryAbsDir = await writeRealSkill({ tempDir, content });
+      written.push(primaryRelFile);
+    } catch (error) {
+      await log(`   Handoff skill: failed to deploy ${primaryRelFile}: ${error.message}`, { verbose: true });
+      return { deployed: false, reason: 'write-failed', paths: [], shared: false };
+    }
+  }
+  // 2. Point every other tool's skill dir at that same real directory.
+  for (const relLinkDir of HANDOFF_LINKED_SKILL_DIRS) {
+    const relFile = path.join(relLinkDir, SKILL_FILE);
+    if (await isTracked({ $, tempDir, relPath: relFile })) {
+      await log(`   Handoff skill: ${relFile} is tracked by the repo; leaving it untouched`, { verbose: true });
+      continue;
+    }
+    try {
+      const mode = await linkOrCopySkill({ tempDir, relLinkDir, primaryAbsDir, content });
+      if (mode !== 'symlink') allShared = false;
+      written.push(relFile);
+    } catch (error) {
+      await log(`   Handoff skill: failed to link ${relFile}: ${error.message}`, { verbose: true });
+    }
+  }
+  if (written.length > 0) {
+    await updateGitExclude({ $, tempDir, log });
+    const how = allShared ? 'one shared folder via symlink' : 'copied (symlink unsupported)';
+    await log(`   Handoff skill deployed (--use-handoff, ${how}): ${written.join(', ')}`, { verbose: true });
+  }
+  return { deployed: written.length > 0, paths: written, shared: allShared };
+};
+export default {
+  HANDOFF_PRIMARY_SKILL_DIR,
+  HANDOFF_LINKED_SKILL_DIRS,
+  HANDOFF_SKILL_DIRS,
+  deployHandoffSkill,
+};

package/src/handoff.prompts.lib.mjs ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * HANDOFF.md support — Agent Skill (issue #1877)
+ *
+ * Instead of injecting a bespoke sub-prompt, this module ships a real
+ * **Agent Skill** (https://agentskills.io) — a `SKILL.md` document with YAML
+ * frontmatter — that teaches the AI tool to read and maintain a HANDOFF.md file
+ * in the repository root. The Agent Skills format is an open standard (created
+ * by Anthropic) that BOTH supported tools load natively:
+ *   - Claude Code discovers project skills from `.claude/skills/<name>/SKILL.md`.
+ *   - Codex discovers project skills from `.agents/skills/<name>/SKILL.md`.
+ * The exact same `SKILL.md` works for both, so "same skill, same way" is
+ * satisfied by a single canonical file rather than a tool-specific prompt.
+ * Because neither tool lets you redirect its skills folder, the deployment
+ * writes that file ONCE and symlinks the second tool's path to it, so both
+ * tools literally read the same folder (see handoff-skill.lib.mjs).
+ *
+ * The skill is deployed into the session working directory by
+ * `handoff-skill.lib.mjs` (gated behind the experimental --use-handoff flag).
+ * This module only builds the canonical text; the deployment module writes it.
+ *
+ * Goal: cross-session AND cross-tool continuity — a session driven by one tool
+ * (e.g. Claude) can be continued by another tool (e.g. Codex) inside the same
+ * pull request, because the HANDOFF.md state travels with the branch.
+ *
+ * Design rationale specific to hive-mind:
+ *   - Each working session runs in an ephemeral temp working directory that is
+ *     cloned fresh from the pull request branch. The ONLY state that persists
+ *     between sessions (and between different tools) is what is committed to the
+ *     branch. Therefore, unlike the general "disposable temp-dir handoff"
+ *     convention, the handoff file here MUST be committed to the PR branch so
+ *     the next session/tool can read it. We keep a single active HANDOFF.md per
+ *     branch to avoid ambiguity.
+ *   - The skill file itself (SKILL.md) is tool configuration, not project state,
+ *     so it is re-deployed each session by hive-mind and is NOT committed to the
+ *     target repository (see handoff-skill.lib.mjs).
+ */
+/**
+ * The default handoff file name (repository root, relative path).
+ * @type {string}
+ */
+export const HANDOFF_FILE_NAME = 'HANDOFF.md';
+/**
+ * The skill directory / invocation name (Agent Skills standard).
+ * @type {string}
+ */
+export const HANDOFF_SKILL_NAME = 'handoff';
+/**
+ * The skill description used in the SKILL.md frontmatter. Front-loads the key
+ * use case and trigger words so the tool can match the skill implicitly.
+ * @type {string}
+ */
+export const HANDOFF_SKILL_DESCRIPTION = "Maintain a HANDOFF.md continuity document in the repository root so any session can continue a previous session's work — even across different AI tools (Claude and Codex) in the same pull request. Use when starting, resuming, or finishing work on a long-running task, issue, or pull request.";
+/**
+ * Build the canonical handoff skill instructions (the markdown body that follows
+ * the YAML frontmatter in SKILL.md). This is tool-agnostic and identical for
+ * Claude and Codex.
+ *
+ * @param {Object} [options]
+ * @param {string} [options.fileName=HANDOFF_FILE_NAME] - Handoff file name.
+ * @returns {string} The markdown instructions body.
+ */
+export const buildHandoffSkillBody = ({ fileName = HANDOFF_FILE_NAME } = {}) => {
+  return `# HANDOFF.md continuity skill
+${fileName} is a single shared handoff document in the repository root that lets any session continue the work of any previous session, even when a different AI tool (for example Claude and Codex) is used. It travels with the pull request branch, so it is the cross-tool, cross-session memory for this PR.
+## When to use this skill
+- When you start a working session, read ${fileName} first if it exists. Treat its "Next steps" section as your immediate starting point and honor the decisions and constraints it records before exploring anything else.
+- When ${fileName} does not exist yet and the task is non-trivial, create it early so an interrupted session can always be resumed.
+- When you make meaningful progress, update ${fileName} so it always reflects the current truth. Keep exactly one active ${fileName} per pull request branch (do not create per-session copies).
+- When all requirements are fully met and the work is complete, record that completion at the top of ${fileName} (or delete the file) so the next session knows there is nothing left to continue.
+## How to write ${fileName}
+- Keep it concise and tool-agnostic: describe state by referencing file paths, function names, branch, and commit SHAs rather than tool-specific commands, so the next tool (Claude or Codex) can act on it directly. Prefer pointers to existing artifacts over duplicating their content.
+- Include these sections:
+  1. **Task** — the issue/PR being solved and the goal.
+  2. **Current state** — what is done and verified.
+  3. **Decisions** — key choices made and why (so they are not re-litigated).
+  4. **Next steps** — the concrete, ordered actions the next session should take.
+  5. **Gotchas** — known pitfalls, failing checks, or constraints.
+  6. **Critical files** — the important paths and what each is for.
+- When you record next steps, make them specific and actionable (a path, a function, a command to run) instead of vague goals, and remove items as they are completed.
+## Committing and safety
+- When you finish a step that changes the state, commit ${fileName} together with the related code changes so the handoff stays in sync with the branch and is never lost if the session is interrupted.
+- Never include secrets, tokens, API keys, passwords, or personal data in ${fileName} — it is committed to the repository.`;
+};
+/**
+ * Build a complete SKILL.md document (Agent Skills standard): YAML frontmatter
+ * with `name` and `description`, followed by the instructions body. This exact
+ * file is deployed verbatim for both Claude (.claude/skills/handoff/SKILL.md)
+ * and Codex (.agents/skills/handoff/SKILL.md).
+ *
+ * @param {Object} [options]
+ * @param {string} [options.fileName=HANDOFF_FILE_NAME] - Handoff file name.
+ * @param {string} [options.name=HANDOFF_SKILL_NAME] - Skill name (frontmatter).
+ * @param {string} [options.description=HANDOFF_SKILL_DESCRIPTION] - Skill description.
+ * @returns {string} The full SKILL.md content.
+ */
+export const buildHandoffSkillFile = ({ fileName = HANDOFF_FILE_NAME, name = HANDOFF_SKILL_NAME, description = HANDOFF_SKILL_DESCRIPTION } = {}) => {
+  return `---
+name: ${name}
+description: ${description}
+---
+${buildHandoffSkillBody({ fileName })}
+`;
+};
+/**
+ * Build a minimal activation nudge for the system prompt. The full procedure
+ * lives in the deployed SKILL.md (loaded natively by the tool); this short
+ * pointer only ensures the read-at-session-start behavior reliably fires, since
+ * that is triggered by session lifecycle rather than by a task description.
+ *
+ * @param {Object} [options]
+ * @param {string} [options.fileName=HANDOFF_FILE_NAME] - Handoff file name.
+ * @param {string} [options.name=HANDOFF_SKILL_NAME] - Skill name.
+ * @returns {string} The activation nudge.
+ */
+export const buildHandoffSubPrompt = ({ fileName = HANDOFF_FILE_NAME, name = HANDOFF_SKILL_NAME } = {}) => {
+  return `
+HANDOFF.md continuity skill (experimental, --use-handoff).
+   - A reusable "${name}" Agent Skill is installed in this workspace (.claude/skills/${name}/ for Claude, .agents/skills/${name}/ for Codex). It defines how to read and maintain ${fileName} so any session can continue the work of a previous one — even across tools (Claude and Codex) in the same pull request.
+   - At the start of this session, use the ${name} skill: if ${fileName} exists in the repository root, read it first and continue from its "Next steps". Create or update ${fileName} as you make progress and commit it to the pull request branch.`;
+};
+/**
+ * Get the handoff skill activation nudge if enabled.
+ *
+ * @param {Object} argv - Parsed command line arguments.
+ * @returns {string} The sub-prompt content, or an empty string when disabled.
+ */
+export const getHandoffSubPrompt = argv => {
+  if (argv && argv.useHandoff) {
+    return buildHandoffSubPrompt();
+  }
+  return '';
+};
+// Export all functions as default object too (mirrors architecture-care module)
+export default {
+  HANDOFF_FILE_NAME,
+  HANDOFF_SKILL_NAME,
+  HANDOFF_SKILL_DESCRIPTION,
+  buildHandoffSkillBody,
+  buildHandoffSkillFile,
+  buildHandoffSubPrompt,
+  getHandoffSubPrompt,
+};

package/src/option-suggestions.lib.mjs CHANGED Viewed

@@ -189,6 +189,7 @@ const KNOWN_OPTION_NAMES = [
   'prompt-issue-reporting',
   'prompt-architecture-care',
   'prompt-case-studies',
+  'use-handoff',
   'prompt-playwright-mcp',
   'prompt-check-sibling-pull-requests',
   'enable-workspaces',
@@ -221,6 +222,10 @@ const KNOWN_OPTION_NAMES = [
   'prompt-ensure-all-requirements-are-met',
   'finalize',
   'finalize-model',
+  'keep-working-until-all-requirements-are-fully-done',
+  'keep-going-until-all-requirements-are-fully-done',
+  'keep-working',
+  'keep-going',
 ];
 /**

package/src/solve.config.lib.mjs CHANGED Viewed

@@ -480,6 +480,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
     description: 'Create comprehensive case study documentation for the issue including logs, analysis, timeline, root cause investigation, and proposed solutions. Organizes findings into ./docs/case-studies/issue-{id}/ directory. Supported for --tool claude and --tool codex.',
     default: false,
   },
+  'use-handoff': {
+    type: 'boolean',
+    description: '[EXPERIMENTAL] Enable the HANDOFF.md continuity Agent Skill so a session can continue the work of a previous session — even when a different AI tool is used (e.g. Claude and Codex continuing each other in the same pull request). A real SKILL.md (the open Agent Skills standard) is deployed into the working directory so each tool loads it natively (.claude/skills/handoff/ for Claude, .agents/skills/handoff/ for Codex). The AI reads HANDOFF.md (repository root) first when present and keeps it updated with task, current state, decisions, next steps, gotchas, and critical files. HANDOFF.md is committed to the PR branch so it persists across the ephemeral per-session working directories; the SKILL.md itself is re-deployed each session and git-excluded so it never pollutes the PR. The same skill file is used identically for --tool claude and --tool codex. Disabled by default (issue #1877).',
+    default: false,
+  },
   'prompt-playwright-mcp': {
     type: 'boolean',
     description: 'Enable Playwright MCP browser automation hints in system prompt (enabled by default, only takes effect if Playwright MCP is installed). Use --no-prompt-playwright-mcp to disable. Supported for --tool claude, --tool codex, --tool opencode, --tool agent, --tool qwen, and --tool gemini.',
@@ -586,6 +591,12 @@ export const SOLVE_OPTION_DEFINITIONS = {
     description: '[EXPERIMENTAL] Model to use for --finalize iterations. Defaults to the same model as --model.',
     default: undefined,
   },
+  'keep-working-until-all-requirements-are-fully-done': {
+    type: 'string',
+    description: '[EXPERIMENTAL] After the main solve completes, scan the pull request description, the AI solution summary and changed markdown documents for strong indicators of deferred/delayed/out-of-scope work (e.g. "future work", "out of scope", "deferred", "follow-up PR", "TODO") and automatically restart the AI tool to finish everything in this single pull request. Accepts a number of restarts (default: 5), or "forever"/"unlimited" to remove the limit. Bare flag means the default of 5.',
+    alias: ['keep-going-until-all-requirements-are-fully-done', 'keep-working', 'keep-going'],
+    default: undefined,
+  },
   'working-session-live-progress': {
     type: 'string',
     description: '[EXPERIMENTAL] Enable live progress monitoring. Accepts "comment" (default, updates a per-session PR comment) or "pr" (updates PR description). Plain --working-session-live-progress means "comment". Works with or without --interactive-mode.',
@@ -836,6 +847,29 @@ export const parseArguments = async (yargs = getLinoYargsFactory(), hideBinFn =
     }
   }
+  // --keep-working-until-all-requirements-are-fully-done normalization
+  // Issue #1883: the flag accepts a number of restarts, the keywords
+  // "forever"/"unlimited"/"infinite", or no value (bare flag => default of 5).
+  // We canonicalize the bare-flag / empty-string case here so downstream
+  // detection (normalizeKeepWorkingLimit) sees a meaningful value. Final
+  // numeric normalization happens at runtime in solve.keep-working.lib.mjs.
+  {
+    const keepWorkingAliases = ['--keep-working-until-all-requirements-are-fully-done', '--keep-going-until-all-requirements-are-fully-done', '--keep-working', '--keep-going'];
+    const keepWorkingProvided = keepWorkingAliases.some(alias => hasRawOption(rawArgs, alias));
+    if (keepWorkingProvided) {
+      const current = argv.keepWorkingUntilAllRequirementsAreFullyDone;
+      // Bare flag (no value) -> yargs may yield true or an empty string; treat as default count.
+      if (current === true || current === '' || current === undefined || current === null) {
+        argv.keepWorkingUntilAllRequirementsAreFullyDone = 5;
+      } else if (typeof current === 'string') {
+        argv.keepWorkingUntilAllRequirementsAreFullyDone = current.trim();
+      }
+    } else if (argv.keepWorkingUntilAllRequirementsAreFullyDone === undefined) {
+      // Not provided: keep it disabled (do not coerce the string-type default).
+      argv.keepWorkingUntilAllRequirementsAreFullyDone = undefined;
+    }
+  }
   // --working-session-live-progress normalization
   // When passed as --working-session-live-progress (no value), yargs gives true for string type
   // Normalize: true → "comment", validate known values

package/src/solve.keep-working.detect.lib.mjs ADDED Viewed

@@ -0,0 +1,223 @@
+#!/usr/bin/env node
+/**
+ * Pure detection + normalization helpers for the keep-working feature.
+ *
+ * This module intentionally has NO use-m / command-stream / network imports so
+ * it can be unit-tested in isolation (mirroring auto-iteration-limits.lib.mjs).
+ * The orchestration lives in solve.keep-working.lib.mjs.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
+ */
+/**
+ * The default number of auto-restarts when the feature is enabled without an
+ * explicit count.
+ */
+export const DEFAULT_KEEP_WORKING_LIMIT = 5;
+/**
+ * The reinforcement prompt appended to every keep-working restart, in addition
+ * to the concrete detected reasons. Taken verbatim from issue #1883.
+ */
+export const KEEP_WORKING_PROMPT = 'Please plan and execute everything in this single pull request, you have unlimited time and context, as context auto-compacts and you can continue indefinitely, until it is each and every requirement fully addressed, and everything is totally done.';
+/**
+ * Strong indicators that work was deferred / delayed / left for a future pull
+ * request. These intentionally favour recall over precision: when the user
+ * enables --keep-working-until-all-requirements-are-fully-done they explicitly
+ * want the AI to keep going, so we accept some false positives (issue #1883).
+ *
+ * Each entry has a human-readable `label` (shown to the user / AI as the reason
+ * for the restart) and a `pattern` (a global, case-insensitive RegExp).
+ *
+ * IMPORTANT: keep these patterns anchored on deferral semantics so the
+ * reinforcement prompt itself ("until it is each and every requirement fully
+ * addressed") does NOT match and cause an infinite restart loop.
+ */
+export const DEFERRED_WORK_PATTERNS = [
+  { label: 'out of scope', pattern: /\b(?:out[\s-]of[\s-]scope|beyond\s+the\s+scope|outside\s+the\s+scope|not\s+(?:in|within)\s+(?:the\s+)?scope)\b/gi },
+  { label: 'future work', pattern: /\bfuture\s+(?:work|improvements?|enhancements?|iterations?|steps?|considerations?)\b/gi },
+  { label: 'future / separate / follow-up pull request', pattern: /\b(?:in\s+a\s+|a\s+)?(?:future|separate|subsequent|later|next|follow[\s-]?up|another)\s+(?:pull\s+request|pr|mr|merge\s+request|change(?:set)?|commit)\b/gi },
+  { label: 'follow-up work', pattern: /\bfollow[\s-]?up(?:\s+(?:work|task|item|pr|pull\s+request|issue))?\b/gi },
+  { label: 'deferred', pattern: /\bdefer(?:red|ring|s)?\b(?!\s+to\s+the\s+caller)/gi },
+  { label: 'delayed / postponed', pattern: /\b(?:delayed|postponed|postpone|deprioriti[sz]ed)\b/gi },
+  { label: 'planned for later / another pull request', pattern: /\bplanned\s+for\s+(?:a\s+)?(?:future|later|the\s+next|another|separate|subsequent)\b/gi },
+  { label: 'left / leaving for later', pattern: /\ble(?:ft|aving|ave)\s+(?:it\s+|this\s+|that\s+|them\s+)?(?:for\s+(?:later|now|the\s+future)|as\s+(?:a\s+)?(?:future|follow[\s-]?up))/gi },
+  { label: 'will be addressed later / separately', pattern: /\b(?:will|to)\s+be\s+(?:addressed|handled|implemented|done|tackled|covered|completed|fixed)\s+(?:later|separately|in\s+(?:a\s+)?(?:future|subsequent|separate|follow[\s-]?up|another|the\s+next))/gi },
+  { label: 'not implemented yet', pattern: /\bnot\s+(?:yet\s+)?(?:implemented|done|completed|finished|addressed|supported|covered)(?:\s+yet)?\b/gi },
+  { label: 'to be implemented / TBD', pattern: /\b(?:to\s+be\s+(?:implemented|done|added|determined|decided)|tbd|to[\s-]?dos?|fixme)\b/gi },
+  { label: 'remaining work / not covered', pattern: /\b(?:remaining\s+(?:work|tasks?|items?)|not\s+covered\s+(?:here|in\s+this\s+(?:pr|pull\s+request|change))|won['’]?t\s+(?:be\s+)?(?:covered|implemented|addressed|done)(?:\s+here)?)\b/gi },
+  { label: 'tracked separately / in a separate issue', pattern: /\btrack(?:ed|ing)?\s+(?:this\s+|it\s+|them\s+|separately\s+)?(?:in\s+)?(?:a\s+)?(?:separate|new|future|follow[\s-]?up)\s+(?:issue|ticket|task)\b/gi },
+  { label: 'for now / as a stopgap / temporary', pattern: /\b(?:for\s+now|as\s+a\s+(?:stop[\s-]?gap|temporary\s+measure|first\s+step)|in\s+the\s+meantime)\b/gi },
+];
+const UNLIMITED_KEYWORDS = new Set(['forever', 'unlimited', 'infinite', 'infinity', 'inf', 'no-limit', 'nolimit', 'none', 'always']);
+/**
+ * Returns true when a raw flag value requests an unlimited number of restarts.
+ * @param {*} value
+ * @returns {boolean}
+ */
+export const isUnlimitedKeepWorking = value => {
+  if (value === Infinity) return true;
+  if (typeof value === 'number') return value === 0;
+  if (typeof value === 'string') {
+    const normalized = value.trim().toLowerCase();
+    if (UNLIMITED_KEYWORDS.has(normalized)) return true;
+    if (normalized === '0') return true;
+  }
+  return false;
+};
+/**
+ * Normalize the --keep-working-until-all-requirements-are-fully-done flag value
+ * into a numeric restart limit.
+ *
+ *  - boolean true (flag without value) -> DEFAULT_KEEP_WORKING_LIMIT (5)
+ *  - "forever" / "unlimited" / "infinite" / "0" / 0 -> Infinity (no limit)
+ *  - a positive number / numeric string -> floor(value)
+ *  - anything invalid -> DEFAULT_KEEP_WORKING_LIMIT (5)
+ *  - falsy (undefined / null / false / "") -> 0 (feature disabled)
+ *
+ * @param {*} value
+ * @param {number} [fallback=DEFAULT_KEEP_WORKING_LIMIT]
+ * @returns {number} numeric limit (Infinity for unlimited, 0 when disabled)
+ */
+export const normalizeKeepWorkingLimit = (value, fallback = DEFAULT_KEEP_WORKING_LIMIT) => {
+  // Disabled
+  if (value === undefined || value === null || value === false || value === '') {
+    return 0;
+  }
+  // Flag provided without a value
+  if (value === true) return fallback;
+  // Unlimited keywords / 0
+  if (isUnlimitedKeepWorking(value)) return Infinity;
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 1) return fallback;
+  return Math.floor(parsed);
+};
+/**
+ * Human readable description of the limit for logs.
+ * @param {number} limit
+ * @returns {string}
+ */
+export const formatKeepWorkingLimit = limit => (limit === Infinity ? 'unlimited' : `${limit}`);
+/**
+ * Scan a single block of text for deferred-work indicators.
+ *
+ * @param {string} text - the text to scan
+ * @param {string} [source='text'] - a label describing where the text came from
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
+ */
+export const detectDeferredWork = (text, source = 'text') => {
+  if (!text || typeof text !== 'string') return [];
+  const detections = [];
+  const seen = new Set();
+  for (const { label, pattern } of DEFERRED_WORK_PATTERNS) {
+    // Reset lastIndex because patterns are global and reused across calls.
+    pattern.lastIndex = 0;
+    let match;
+    while ((match = pattern.exec(text)) !== null) {
+      const matchedText = match[0];
+      // Build a short snippet around the match for context.
+      const start = Math.max(0, match.index - 40);
+      const end = Math.min(text.length, match.index + matchedText.length + 40);
+      const snippet = text.slice(start, end).replace(/\s+/g, ' ').trim();
+      // De-duplicate identical (label + snippet) hits within a single source.
+      const key = `${label}::${snippet.toLowerCase()}`;
+      if (!seen.has(key)) {
+        seen.add(key);
+        detections.push({ label, match: matchedText, snippet, source });
+      }
+      // Guard against zero-length matches causing an infinite loop.
+      if (pattern.lastIndex === match.index) pattern.lastIndex++;
+    }
+    pattern.lastIndex = 0;
+  }
+  return detections;
+};
+/**
+ * Run all configured sources through the detector and return a flat list of
+ * detections.
+ *
+ * @param {Array<{source: string, text: string}>} sources
+ * @returns {Array<{label: string, match: string, snippet: string, source: string}>}
+ */
+export const detectDeferredWorkInSources = sources => {
+  const detections = [];
+  for (const { source, text } of sources || []) {
+    detections.push(...detectDeferredWork(text, source));
+  }
+  return detections;
+};
+/**
+ * Extract the added lines (lines beginning with "+") from a unified diff patch.
+ * @param {string} patch
+ * @returns {string}
+ */
+export const extractAddedLinesFromPatch = patch => {
+  if (!patch || typeof patch !== 'string') return '';
+  return patch
+    .split('\n')
+    .filter(line => line.startsWith('+') && !line.startsWith('+++'))
+    .map(line => line.slice(1))
+    .join('\n');
+};
+/**
+ * Build the feedback lines for a keep-working restart iteration.
+ *
+ * @param {Array<{label, snippet, source}>} detections
+ * @param {number} iteration
+ * @param {number} limit
+ * @returns {string[]}
+ */
+export const buildKeepWorkingFeedback = (detections, iteration, limit) => {
+  const limitLabel = formatKeepWorkingLimit(limit);
+  const lines = ['', '='.repeat(60), `🔁 KEEP WORKING UNTIL ALL REQUIREMENTS ARE FULLY DONE (restart ${iteration}/${limitLabel}):`, '='.repeat(60), '', 'It looks like some work was deferred, delayed or planned for a future pull request.', 'The following strong indicators of unfinished / deferred work were detected:', ''];
+  // Show up to 15 distinct detected reasons to keep the prompt focused.
+  const shown = (detections || []).slice(0, 15);
+  for (const detection of shown) {
+    lines.push(`  • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
+  }
+  if ((detections || []).length > shown.length) {
+    lines.push(`  • ...and ${detections.length - shown.length} more indicator(s)`);
+  }
+  lines.push('');
+  lines.push('There is NO future pull request. This is the single pull request where everything must be done.');
+  lines.push('Do not defer, delay or postpone anything. Remove any "future work" / "out of scope" / "TODO" / "follow-up" notes by actually implementing them now.');
+  lines.push('');
+  lines.push(KEEP_WORKING_PROMPT);
+  lines.push('');
+  return lines;
+};
+export default {
+  DEFAULT_KEEP_WORKING_LIMIT,
+  KEEP_WORKING_PROMPT,
+  DEFERRED_WORK_PATTERNS,
+  isUnlimitedKeepWorking,
+  normalizeKeepWorkingLimit,
+  formatKeepWorkingLimit,
+  detectDeferredWork,
+  detectDeferredWorkInSources,
+  extractAddedLinesFromPatch,
+  buildKeepWorkingFeedback,
+};

package/src/solve.keep-working.lib.mjs ADDED Viewed

@@ -0,0 +1,285 @@
+#!/usr/bin/env node
+/**
+ * Keep-working-until-done module for solve.mjs
+ *
+ * [EXPERIMENTAL] When --keep-working-until-all-requirements-are-fully-done is
+ * enabled, after the main solve (and any other post-processing) completes, this
+ * module scans the pull request description, the AI working-session/solution
+ * summary, and the markdown documents changed by the pull request for strong
+ * indicators that the AI deferred, delayed or postponed work to a future pull
+ * request / iteration (e.g. "out of scope", "future work", "deferred",
+ * "follow-up PR", "TODO", ...).
+ *
+ * When such indicators are found, it automatically restarts the AI tool with a
+ * prompt instructing it to finish everything in this single pull request, in
+ * addition to the concrete detected reasons. It keeps restarting until no
+ * indicators remain or until the configured restart limit is reached.
+ *
+ * By default the restart limit is 5. The limit can be set to a custom number,
+ * or to "forever" / "unlimited" / "infinite" / 0 to remove the limit entirely.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1883
+ */
+// Check if use is already defined globally (when imported from solve.mjs)
+// If not, fetch it (when running standalone)
+if (typeof globalThis.use === 'undefined') {
+  globalThis.use = (await eval(await (await fetch('https://unpkg.com/use-m/use.js')).text())).use;
+}
+const use = globalThis.use;
+// Use command-stream for consistent $ behavior across runtimes
+const { $: __rawDollar$ } = await use('command-stream');
+const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
+const $ = wrapDollarWithGhRetry(__rawDollar$);
+// Import shared library functions
+const lib = await import('./lib.mjs');
+const { log, cleanErrorMessage } = lib;
+// Import shared restart utilities
+const restartShared = await import('./solve.restart-shared.lib.mjs');
+const { executeToolIteration, isApiError, isUsageLimitReached } = restartShared;
+const sentryLib = await import('./sentry.lib.mjs');
+const { reportError } = sentryLib;
+// Pure detection + normalization helpers live in a separate, network-free
+// module so they can be unit-tested in isolation (issue #1883).
+const detectLib = await import('./solve.keep-working.detect.lib.mjs');
+const { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, extractAddedLinesFromPatch, buildKeepWorkingFeedback } = detectLib;
+// Re-export the pure helpers so existing importers of this module keep working.
+export { DEFAULT_KEEP_WORKING_LIMIT, KEEP_WORKING_PROMPT, DEFERRED_WORK_PATTERNS, isUnlimitedKeepWorking, normalizeKeepWorkingLimit, formatKeepWorkingLimit, detectDeferredWork, detectDeferredWorkInSources, buildKeepWorkingFeedback };
+/**
+ * Collect the text sources to scan for deferred-work indicators:
+ *   1. The pull request description (body).
+ *   2. The AI working-session / solution summary (passed in-memory).
+ *   3. The markdown documents changed by the pull request (added lines only).
+ *
+ * @param {object} params
+ * @returns {Promise<Array<{source: string, text: string}>>}
+ */
+export const collectDeferredWorkSources = async ({ owner, repo, prNumber, resultSummary }) => {
+  const sources = [];
+  // 1. Pull request description
+  try {
+    const prResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.body // ""'`;
+    if (prResult.code === 0) {
+      const body = prResult.stdout.toString();
+      if (body && body.trim()) {
+        sources.push({ source: 'pull request description', text: body });
+      }
+    }
+  } catch (error) {
+    reportError(error, { context: 'keep_working_collect_pr_body', owner, repo, prNumber, operation: 'fetch_pr_body' });
+  }
+  // 2. AI working-session / solution summary (in-memory, no token cost)
+  if (resultSummary && typeof resultSummary === 'string' && resultSummary.trim()) {
+    sources.push({ source: 'AI solution summary', text: resultSummary });
+  }
+  // 3. Changed markdown documents (scan only added lines from the diff)
+  try {
+    const filesResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber}/files --paginate`;
+    if (filesResult.code === 0) {
+      const files = JSON.parse(filesResult.stdout.toString() || '[]');
+      for (const file of files) {
+        const filename = file.filename || '';
+        if (!/\.(md|markdown|mdx)$/i.test(filename)) continue;
+        if (file.status === 'removed') continue;
+        const addedText = extractAddedLinesFromPatch(file.patch);
+        if (addedText && addedText.trim()) {
+          sources.push({ source: `changed markdown document ${filename}`, text: addedText });
+        }
+      }
+    }
+  } catch (error) {
+    reportError(error, { context: 'keep_working_collect_md_files', owner, repo, prNumber, operation: 'fetch_pr_files' });
+  }
+  return sources;
+};
+/**
+ * Runs keep-working restart iterations after the main solve.
+ *
+ * @param {object} params
+ * @param {string} params.issueUrl
+ * @param {string} params.owner
+ * @param {string} params.repo
+ * @param {string|number} params.issueNumber
+ * @param {string|number} params.prNumber
+ * @param {string} params.branchName
+ * @param {string} params.tempDir
+ * @param {string} [params.workspaceTmpDir]
+ * @param {object} params.argv - CLI arguments
+ * @param {function} params.cleanupClaudeFile - cleanup function
+ * @param {string} [params.resultSummary] - AI solution summary from the last session
+ * @returns {Promise<{sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo}|null>}
+ */
+export const runKeepWorkingUntilDone = async ({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }) => {
+  const limit = normalizeKeepWorkingLimit(argv.keepWorkingUntilAllRequirementsAreFullyDone);
+  if (!limit || !prNumber) {
+    return null;
+  }
+  await log('');
+  await log(`🔁 KEEP-WORKING: Scanning for deferred / delayed / out-of-scope work (limit: ${formatKeepWorkingLimit(limit)} restart(s))`);
+  await log('   Sources: pull request description, AI solution summary, changed markdown documents');
+  await log('');
+  // Get PR merge state status for the iterations
+  let currentMergeStateStatus = null;
+  try {
+    const prStateResult = await $`gh api repos/${owner}/${repo}/pulls/${prNumber} --jq '.mergeStateStatus'`;
+    if (prStateResult.code === 0) {
+      currentMergeStateStatus = prStateResult.stdout.toString().trim();
+    }
+  } catch {
+    // Ignore errors getting merge state
+  }
+  let sessionId;
+  let anthropicTotalCostUSD;
+  let publicPricingEstimate;
+  let pricingInfo;
+  let lastResultSummary = resultSummary;
+  let consecutiveErrors = 0;
+  // Hard safety cap even in "unlimited" mode, to avoid spinning forever on
+  // repeated failures (issue #1883: "limit it with 5 auto-restarts ... in case
+  // of errors"). Only consecutive errors count toward this cap.
+  const MAX_CONSECUTIVE_ERRORS = 3;
+  let iteration = 0;
+  while (true) {
+    // Gather and scan sources fresh on every iteration.
+    let sources = [];
+    try {
+      sources = await collectDeferredWorkSources({ owner, repo, prNumber, resultSummary: lastResultSummary });
+    } catch (error) {
+      reportError(error, { context: 'keep_working_collect_sources', owner, repo, prNumber, operation: 'collect_sources' });
+      await log(`⚠️  KEEP-WORKING: Could not collect sources: ${cleanErrorMessage(error)}`, { level: 'warning' });
+      break;
+    }
+    const detections = detectDeferredWorkInSources(sources);
+    if (detections.length === 0) {
+      if (iteration === 0) {
+        await log('✅ KEEP-WORKING: No deferred / delayed / out-of-scope work detected. Nothing to restart for.');
+      } else {
+        await log(`✅ KEEP-WORKING: No more deferred work detected after ${iteration} restart(s). All requirements appear to be fully done.`);
+      }
+      break;
+    }
+    if (iteration >= limit) {
+      await log(`🛑 KEEP-WORKING: Reached restart limit (${formatKeepWorkingLimit(limit)}) but ${detections.length} deferred-work indicator(s) still detected.`);
+      await log('   Stopping to avoid an unbounded loop. Increase the limit (or use "forever"/"unlimited") to keep going.');
+      for (const detection of detections.slice(0, 10)) {
+        await log(`   • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
+      }
+      break;
+    }
+    iteration++;
+    await log('');
+    await log(`🔁 KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)}: ${detections.length} deferred-work indicator(s) detected, restarting...`);
+    for (const detection of detections.slice(0, 10)) {
+      await log(`   • [${detection.label}] in ${detection.source}: "${detection.snippet}"`);
+    }
+    // Issue #1572 pattern: sync local branch with remote before each iteration
+    try {
+      const pullResult = await $({ cwd: tempDir })`git pull origin ${branchName} 2>&1`;
+      if (pullResult.code === 0) {
+        await log(`   Synced local branch ${branchName} from remote`, { verbose: true });
+      } else {
+        await log(`   Warning: git pull failed (code ${pullResult.code}); continuing with local state`, { level: 'warning' });
+      }
+    } catch (error) {
+      reportError(error, { context: 'keep_working_git_pull', branchName, operation: 'git_pull' });
+      await log(`   Warning: git pull error: ${cleanErrorMessage(error)}`, { level: 'warning' });
+    }
+    const feedbackLines = buildKeepWorkingFeedback(detections, iteration, limit);
+    const iterationResult = await executeToolIteration({
+      issueUrl,
+      owner,
+      repo,
+      issueNumber,
+      prNumber,
+      branchName,
+      tempDir,
+      workspaceTmpDir,
+      mergeStateStatus: currentMergeStateStatus,
+      feedbackLines,
+      argv: {
+        ...argv,
+        // Reinforce the "finish everything now" guidance in the system prompt.
+        promptEnsureAllRequirementsAreMet: true,
+        // Prevent recursive keep-working inside the restart iteration.
+        keepWorkingUntilAllRequirementsAreFullyDone: 0,
+      },
+    });
+    // Update session data from the restart.
+    if (iterationResult) {
+      if (iterationResult.sessionId) sessionId = iterationResult.sessionId;
+      if (iterationResult.anthropicTotalCostUSD) anthropicTotalCostUSD = iterationResult.anthropicTotalCostUSD;
+      if (iterationResult.publicPricingEstimate) publicPricingEstimate = iterationResult.publicPricingEstimate;
+      if (iterationResult.pricingInfo) pricingInfo = iterationResult.pricingInfo;
+      if (iterationResult.result) lastResultSummary = iterationResult.result;
+    }
+    // Issue #1883: cap consecutive errors so we don't spin forever (especially
+    // important in "unlimited" mode).
+    if (isUsageLimitReached(iterationResult)) {
+      await log('🛑 KEEP-WORKING: Usage limit reached during restart. Stopping keep-working loop.');
+      break;
+    }
+    if (isApiError(iterationResult)) {
+      consecutiveErrors++;
+      await log(`⚠️  KEEP-WORKING: API error during restart (${consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS} consecutive).`, { level: 'warning' });
+      if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
+        await log('🛑 KEEP-WORKING: Too many consecutive errors. Stopping keep-working loop.');
+        break;
+      }
+    } else {
+      consecutiveErrors = 0;
+    }
+    await log(`✅ KEEP-WORKING iteration ${iteration}/${formatKeepWorkingLimit(limit)} complete`);
+    await log('');
+  }
+  // Clean up CLAUDE.md/.gitkeep after restarts
+  try {
+    await cleanupClaudeFile(tempDir, branchName, null, argv);
+  } catch (error) {
+    reportError(error, { context: 'keep_working_cleanup', branchName, operation: 'cleanup_claude_file' });
+  }
+  if (iteration === 0) return null;
+  return { sessionId, anthropicTotalCostUSD, publicPricingEstimate, pricingInfo };
+};
+export default {
+  DEFAULT_KEEP_WORKING_LIMIT,
+  KEEP_WORKING_PROMPT,
+  DEFERRED_WORK_PATTERNS,
+  isUnlimitedKeepWorking,
+  normalizeKeepWorkingLimit,
+  formatKeepWorkingLimit,
+  detectDeferredWork,
+  detectDeferredWorkInSources,
+  collectDeferredWorkSources,
+  buildKeepWorkingFeedback,
+  runKeepWorkingUntilDone,
+};

package/src/solve.mjs CHANGED Viewed

@@ -45,6 +45,7 @@ const watchLib = await import('./solve.watch.lib.mjs');
 const { startWatchMode } = watchLib;
 const { startAutoRestartUntilMergeable } = await import('./solve.auto-merge.lib.mjs');
 const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs');
+const { runKeepWorkingUntilDone } = await import('./solve.keep-working.lib.mjs');
 const exitHandler = await import('./exit-handler.lib.mjs');
 const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
 const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
@@ -852,6 +853,14 @@ try {
   let resultModelUsage = toolResult.resultModelUsage || null;
   let streamTokenUsage = toolResult.streamTokenUsage || null;
   let subAgentCalls = toolResult.subAgentCalls || null; // Issue #1590
+  const applyRestartResult = result => {
+    if (!result) return;
+    sessionId = result.sessionId || sessionId;
+    anthropicTotalCostUSD = result.anthropicTotalCostUSD || anthropicTotalCostUSD;
+    publicPricingEstimate = result.publicPricingEstimate || publicPricingEstimate;
+    pricingInfo = result.pricingInfo || pricingInfo;
+  };
   limitReached = toolResult.limitReached;
   cleanupContext.limitReached = limitReached;
@@ -1249,12 +1258,7 @@ try {
     });
     // Update session data from restart
-    if (restartResult) {
-      if (restartResult.sessionId) sessionId = restartResult.sessionId;
-      if (restartResult.anthropicTotalCostUSD) anthropicTotalCostUSD = restartResult.anthropicTotalCostUSD;
-      if (restartResult.publicPricingEstimate) publicPricingEstimate = restartResult.publicPricingEstimate;
-      if (restartResult.pricingInfo) pricingInfo = restartResult.pricingInfo;
-    }
+    applyRestartResult(restartResult);
     // Clean up CLAUDE.md/.gitkeep again after restart
     await cleanupClaudeFile(tempDir, branchName, null, argv);
@@ -1268,13 +1272,9 @@ try {
   }
   // Issue #1383: --finalize
-  const autoEnsureResult = await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile });
-  if (autoEnsureResult) {
-    if (autoEnsureResult.sessionId) sessionId = autoEnsureResult.sessionId;
-    if (autoEnsureResult.anthropicTotalCostUSD) anthropicTotalCostUSD = autoEnsureResult.anthropicTotalCostUSD;
-    if (autoEnsureResult.publicPricingEstimate) publicPricingEstimate = autoEnsureResult.publicPricingEstimate;
-    if (autoEnsureResult.pricingInfo) pricingInfo = autoEnsureResult.pricingInfo;
-  }
+  applyRestartResult(await runAutoEnsureRequirements({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, argv, cleanupClaudeFile }));
+  // Issue #1883: --keep-working-until-all-requirements-are-fully-done (detect deferred work and auto-restart until done)
+  applyRestartResult(await runKeepWorkingUntilDone({ issueUrl, owner, repo, issueNumber, prNumber, branchName, tempDir, workspaceTmpDir, argv, cleanupClaudeFile, resultSummary }));
   // Start watch mode if enabled OR if we need to handle uncommitted changes
   if (argv.verbose) {