npm - claude-code-session-manager - Versions diffs - 0.23.0 → 0.25.0 - Mend

claude-code-session-manager 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/assets/{TiptapBody-Bj0_gFeB.js → TiptapBody-BIuH7h34.js} +1 -1
package/dist/assets/index-Cu9X6oyA.css +32 -0
package/dist/assets/{index-DqCaosIl.js → index-H0IXEKiC.js} +600 -600
package/dist/index.html +2 -2
package/package.json +2 -3
package/src/main/__tests__/runVerify.test.cjs +276 -0
package/src/main/runVerify.cjs +89 -6
package/src/main/scheduler.cjs +122 -20
package/src/main/webRemote.cjs +20 -4
package/dist/assets/index-Dq3KViBt.css +0 -32

package/dist/index.html CHANGED Viewed

@@ -7,10 +7,10 @@
     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,500;0,6..72,600;0,6..72,700;1,6..72,400&family=Geist:wght@300;400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap" rel="stylesheet">
-    <script type="module" crossorigin src="./assets/index-DqCaosIl.js"></script>
+    <script type="module" crossorigin src="./assets/index-H0IXEKiC.js"></script>
     <link rel="modulepreload" crossorigin href="./assets/monaco-editor-BW5C4Iv1.js">
     <link rel="stylesheet" crossorigin href="./assets/monaco-editor-BTnBOi8r.css">
-    <link rel="stylesheet" crossorigin href="./assets/index-Dq3KViBt.css">
+    <link rel="stylesheet" crossorigin href="./assets/index-Cu9X6oyA.css">
   </head>
   <body class="bg-bg text-fg font-sans antialiased">
     <div id="root"></div>

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-session-manager",
-  "version": "0.23.0",
+  "version": "0.25.0",
   "description": "Local cockpit for the Claude Code CLI — multi-tab terminal, full config surface, scheduler, voice dictation, and live observability.",
   "type": "module",
   "main": "src/main/index.cjs",
@@ -63,8 +63,7 @@
     "url": "https://github.com/StanislavBG/claude-code-session-manager/issues"
   },
   "publishConfig": {
-    "access": "public",
-    "provenance": true
+    "access": "public"
   },
   "engines": {
     "node": ">=18"

package/src/main/__tests__/runVerify.test.cjs CHANGED Viewed

@@ -584,3 +584,279 @@ test('isHarnessToolError detects wrapper and "No such tool available"', () => {
   assert.equal(isHarnessToolError('ModuleNotFoundError: No module named x'), false);
   assert.equal(isHarnessToolError(''), false);
 });
+// ─── SCHEDULER_VERDICT sentinel override tests ────────────────────────────────
+/** Build a log where a tool_result contains a Traceback+KeyError and the
+ *  result event optionally contains the sentinel line. */
+function tracebackRunEvents(sentinelLine) {
+  const resultText = sentinelLine
+    ? `All work done.\n${sentinelLine}`
+    : 'All work done.';
+  return [
+    {
+      type: 'assistant',
+      message: {
+        role: 'assistant',
+        content: [{
+          type: 'tool_use',
+          id: 'toolu_tv_001',
+          name: 'Bash',
+          input: { command: 'pytest', description: 'Run acceptance tests' },
+        }],
+      },
+    },
+    {
+      type: 'user',
+      message: {
+        role: 'user',
+        content: [{
+          type: 'tool_result',
+          tool_use_id: 'toolu_tv_001',
+          content: [
+            '=== TDD red phase ===',
+            'Traceback (most recent call last):',
+            '  File "test_foo.py", line 5, in test_bar',
+            "KeyError: 'missing_key'",
+          ].join('\n'),
+          is_error: false,
+        }],
+      },
+    },
+    { type: 'result', subtype: 'success', result: resultText },
+  ];
+}
+// (a) sentinel PASS + committedDuringRun:true + Traceback+Error → clean
+test('sentinel PASS + committedDuringRun:true + Traceback → clean (override)', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '77-sentinel-override-pass';
+    writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: PASS'));
+    const prdPath = writePrd(tmp, slug, '# Sentinel override test');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'running' },
+      allJobs: [],
+      committedDuringRun: true,
+    });
+    assert.equal(verdict.verdict, 'clean', `expected clean, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, null);
+    assert.ok(verdict.reason.includes('SCHEDULER_VERDICT: PASS'), `reason should mention sentinel: ${verdict.reason}`);
+    // Sidecar should record the sentinel and override
+    const sidecar = JSON.parse(fs.readFileSync(path.join(tmp, `${slug}.verdicts.json`), 'utf8'));
+    assert.equal(sidecar.sentinel, 'pass');
+    assert.ok(sidecar.sentinelOverride, 'sidecar should record sentinelOverride');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (b) no sentinel + Traceback+Error → transcript_errors (unchanged baseline)
+test('no sentinel + Traceback+Error + committedDuringRun:true → transcript_errors (no override)', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '77-no-sentinel-baseline';
+    writeLog(tmp, slug, tracebackRunEvents(null));
+    const prdPath = writePrd(tmp, slug, '# No sentinel baseline');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'running' },
+      allJobs: [],
+      committedDuringRun: true,
+    });
+    assert.equal(verdict.verdict, 'transcript_errors', `expected transcript_errors without sentinel, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (c) sentinel PASS + committedDuringRun:false → stays transcript_errors (commit not confirmed)
+test('sentinel PASS + committedDuringRun:false → transcript_errors (commit unconfirmed)', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '77-sentinel-no-commit';
+    writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: PASS'));
+    const prdPath = writePrd(tmp, slug, '# Sentinel without commit');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'running' },
+      allJobs: [],
+      committedDuringRun: false,
+    });
+    assert.equal(verdict.verdict, 'transcript_errors', `PASS without commit must not override, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (d) sentinel FAIL → never clean (even with committedDuringRun:true)
+test('sentinel FAIL + committedDuringRun:true → transcript_errors (FAIL never overrides)', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '77-sentinel-fail';
+    writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: FAIL AC gate was red'));
+    const prdPath = writePrd(tmp, slug, '# Sentinel FAIL');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'running' },
+      allJobs: [],
+      committedDuringRun: true,
+    });
+    assert.equal(verdict.verdict, 'transcript_errors', `FAIL sentinel must not override to clean, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// ─── pre-sentinel heal (allowPreSentinelHeal) ─────────────────────────────────
+// (f) allowPreSentinelHeal=true + committed + no sentinel → clean
+test('pre-sentinel heal: committed + no sentinel + allowPreSentinelHeal → clean', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '86-pre-sentinel-heal-pass';
+    writeLog(tmp, slug, tracebackRunEvents(null)); // no SCHEDULER_VERDICT line
+    const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'needs_review' },
+      allJobs: [],
+      committedDuringRun: true,
+      allowPreSentinelHeal: true,
+    });
+    assert.equal(verdict.verdict, 'clean', `expected clean via pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, null);
+    assert.ok(verdict.reason.includes('pre-sentinel heal'), `reason should mention pre-sentinel heal: ${verdict.reason}`);
+    const sidecar = JSON.parse(fs.readFileSync(path.join(tmp, `${slug}.verdicts.json`), 'utf8'));
+    assert.ok(sidecar.preSentinelHeal, 'sidecar should record preSentinelHeal');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (g) allowPreSentinelHeal=true + committed + FAIL sentinel → stays transcript_errors
+test('pre-sentinel heal: committed + SCHEDULER_VERDICT: FAIL + allowPreSentinelHeal → transcript_errors', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '86-pre-sentinel-heal-fail';
+    writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: FAIL AC gate was red'));
+    const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal blocked by FAIL');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'needs_review' },
+      allJobs: [],
+      committedDuringRun: true,
+      allowPreSentinelHeal: true,
+    });
+    assert.equal(verdict.verdict, 'transcript_errors', `FAIL sentinel must block pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (h) allowPreSentinelHeal=true + NOT committed + no sentinel → stays transcript_errors
+test('pre-sentinel heal: not committed + no sentinel + allowPreSentinelHeal → transcript_errors', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '86-pre-sentinel-heal-uncommitted';
+    writeLog(tmp, slug, tracebackRunEvents(null));
+    const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal blocked by no commit');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'needs_review' },
+      allJobs: [],
+      committedDuringRun: false,
+      allowPreSentinelHeal: true,
+    });
+    assert.equal(verdict.verdict, 'transcript_errors', `no commit must block pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (i) allowPreSentinelHeal=false (default) + committed + no sentinel → baseline unchanged
+test('no sentinel + committed + allowPreSentinelHeal=false (default) → transcript_errors', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '86-pre-sentinel-heal-disabled';
+    writeLog(tmp, slug, tracebackRunEvents(null));
+    const prdPath = writePrd(tmp, slug, '# allowPreSentinelHeal disabled by default');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'needs_review' },
+      allJobs: [],
+      committedDuringRun: true,
+      // allowPreSentinelHeal defaults to false
+    });
+    assert.equal(verdict.verdict, 'transcript_errors', `must not heal without allowPreSentinelHeal, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally {
+    rmdir(tmp);
+  }
+});
+// (e) halt + sentinel PASS → still halt (override must not apply to halt)
+test('halt result + sentinel PASS + committedDuringRun:true → still halt', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '77-halt-sentinel-pass';
+    const logEvents = [
+      {
+        type: 'assistant',
+        message: {
+          role: 'assistant',
+          content: [{
+            type: 'tool_use',
+            id: 'toolu_halt_001',
+            name: 'Bash',
+            input: { command: 'check deps', description: 'Check prerequisites' },
+          }],
+        },
+      },
+      {
+        type: 'user',
+        message: {
+          role: 'user',
+          content: [{
+            type: 'tool_result',
+            tool_use_id: 'toolu_halt_001',
+            content: 'dep not ready',
+            is_error: false,
+          }],
+        },
+      },
+      {
+        type: 'result',
+        subtype: 'success',
+        result: 'HALT: prerequisite not met\nSCHEDULER_VERDICT: PASS',
+      },
+    ];
+    writeLog(tmp, slug, logEvents);
+    const prdPath = writePrd(tmp, slug, '# Halt with sentinel');
+    const verdict = await verifyRun({
+      runDir: tmp,
+      prdPath,
+      queueEntry: { slug, status: 'running' },
+      allJobs: [],
+      committedDuringRun: true,
+    });
+    assert.equal(verdict.verdict, 'halt', `halt must survive even with PASS sentinel, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'pending');
+  } finally {
+    rmdir(tmp);
+  }
+});

package/src/main/runVerify.cjs CHANGED Viewed

@@ -414,6 +414,37 @@ function checkDeps(queueEntry, allJobs, prdBody) {
   return { ok: true };
 }
+// ─── sentinel scanner ─────────────────────────────────────────────────────────
+/**
+ * Scan for a `SCHEDULER_VERDICT: PASS|FAIL` sentinel line in the run output.
+ *
+ * Checks `resultEvent.resultText` first (the agent's final message), then the
+ * last tool_result content. Anchored to line-start so prose mentioning the
+ * string in mid-sentence does not match.
+ *
+ * Returns 'pass', 'fail', or null.
+ */
+function scanSentinel(resultEvent, events) {
+  const RE = /^SCHEDULER_VERDICT:\s*(PASS|FAIL)\b/m;
+  if (resultEvent) {
+    const m = resultEvent.resultText.match(RE);
+    if (m) return m[1].toLowerCase();
+  }
+  let lastToolResult = null;
+  for (const ev of events) {
+    if (ev.kind === 'tool_result') lastToolResult = ev;
+  }
+  if (lastToolResult && lastToolResult.content) {
+    const m = lastToolResult.content.match(RE);
+    if (m) return m[1].toLowerCase();
+  }
+  return null;
+}
 // ─── main verifier ────────────────────────────────────────────────────────────
 /**
@@ -422,13 +453,23 @@ function checkDeps(queueEntry, allJobs, prdBody) {
  * escalate to 'needs_review'.
  *
  * @param {object}   params
- * @param {string}   params.runDir      Absolute path to the run directory.
- * @param {string}   params.prdPath     Absolute path to the PRD .md file.
- * @param {object}   params.queueEntry  The queue.json entry for this job.
- * @param {object[]} [params.allJobs]   All entries from queue.json (dep checks).
+ * @param {string}   params.runDir            Absolute path to the run directory.
+ * @param {string}   params.prdPath           Absolute path to the PRD .md file.
+ * @param {object}   params.queueEntry        The queue.json entry for this job.
+ * @param {object[]} [params.allJobs]         All entries from queue.json (dep checks).
+ * @param {boolean}  [params.committedDuringRun] True when HEAD moved during the run,
+ *                                            confirming the job's commit landed.
+ *                                            Default false for back-compat.
+ * @param {boolean}  [params.allowPreSentinelHeal] When true, a commit-in-window
+ *                                            with no SCHEDULER_VERDICT: FAIL is
+ *                                            sufficient to override weak verdicts
+ *                                            (transcript_errors/verify_unavailable)
+ *                                            even without a PASS sentinel. Only
+ *                                            set by the boot reverify self-heal
+ *                                            pass for pre-sentinel legacy runs.
  * @returns {Promise<{verdict:string, reason:string, downgradeTo:string|null}>}
  */
-async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
+async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [], committedDuringRun = false, allowPreSentinelHeal = false }) {
   const { slug } = queueEntry;
   const logPath = path.join(runDir, `${slug}.log`);
   const verdictsPath = path.join(runDir, `${slug}.verdicts.json`);
@@ -568,7 +609,12 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
       }
     }
-    const extras = annotations.length ? { annotations } : undefined;
+    // Scan for the SCHEDULER_VERDICT sentinel emitted by the finish protocol.
+    const sentinel = scanSentinel(resultEvent, events);
+    const sentinelFields = sentinel ? { sentinel } : {};
+    const extras = (annotations.length || sentinel)
+      ? { ...(annotations.length ? { annotations } : {}), ...sentinelFields }
+      : undefined;
     if (issues.length === 0) {
       const reason = annotations.length
@@ -580,6 +626,42 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
     // Pick highest-priority issue (transcript_errors > verify_unavailable).
     issues.sort((a, b) => b.priority - a.priority);
     const top = issues[0];
+    // Sentinel override: SCHEDULER_VERDICT: PASS + a commit that landed during
+    // the run is authoritative evidence the job succeeded. Suppresses incidental
+    // transcript noise (grep results with "Error", TDD red-phase reproductions,
+    // Traceback in debug output) for the two weakest verdict classes.
+    // MUST NOT apply to halt or deps_unmet — those keep their existing semantics.
+    if (
+      sentinel === 'pass'
+      && committedDuringRun
+      && (top.verdict === 'transcript_errors' || top.verdict === 'verify_unavailable')
+    ) {
+      return conclude('clean',
+        `SCHEDULER_VERDICT: PASS + commit landed overrides ${top.verdict}`,
+        null,
+        { ...(annotations.length ? { annotations } : {}), sentinel, sentinelOverride: top.verdict },
+      );
+    }
+    // Pre-sentinel heal: job predates SCHEDULER_VERDICT emission. A commit in
+    // the run window with no explicit FAIL sentinel is weak but sufficient to
+    // override the two weakest verdict classes during the self-heal pass.
+    // Only applies when the caller opts in (allowPreSentinelHeal=true) — live
+    // runs never set this, so only the boot reverify self-heal uses it.
+    if (
+      allowPreSentinelHeal
+      && committedDuringRun
+      && sentinel !== 'fail'
+      && (top.verdict === 'transcript_errors' || top.verdict === 'verify_unavailable')
+    ) {
+      return conclude('clean',
+        `pre-sentinel heal: committed in run window, no SCHEDULER_VERDICT: FAIL, overrides ${top.verdict}`,
+        null,
+        { ...(annotations.length ? { annotations } : {}), preSentinelHeal: top.verdict },
+      );
+    }
     return conclude(top.verdict, top.reason, 'needs_review', extras);
   } catch (e) {
@@ -601,4 +683,5 @@ module.exports = {
   parsePrdBodyDepFragments,
   checkDeps,
   parseLog,
+  scanSentinel,
 };

package/src/main/scheduler.cjs CHANGED Viewed

@@ -91,6 +91,13 @@ const RESULT_TAIL_BYTES = 8 * 1024;
 const IDLE_OUTPUT_KILL_MS = 20 * 60_000;
 const IDLE_CHECK_INTERVAL_MS = 60_000;
+// Boot reconciliation: a job left 'running' by an app restart/crash whose log
+// shows neither success nor a real failure result was merely interrupted — the
+// host died, the PRD didn't. Re-queue it up to this many times before giving up
+// and marking it failed, so a restart self-recovers instead of needing a manual
+// flip + a wasted fix-plan investigation.
+const ORPHAN_REQUEUE_CAP = 2;
 // Appended to every scheduled job prompt so the queue can be RELIED ON to finish
 // work to a consistent bar: review → security-review → verify → commit. Enforced
 // centrally here (not per-PRD) so it applies to every current and future PRD.
@@ -116,6 +123,17 @@ sequence. Do not stop before the commit lands; committing is part of the job.
    pass. Do not assume npm; use whatever the target project uses.
 4. COMMIT — stage and commit ALL changes with a clear conventional message:
    \`git add -A && git commit -m "<type>(<scope>): <summary>"\`.
+5. VERDICT SENTINEL — as the LAST LINE of your final result text, emit exactly
+   one of these two lines (no trailing text after it):
+     SCHEDULER_VERDICT: PASS
+     SCHEDULER_VERDICT: FAIL <one-line reason>
+   Print PASS only when the AC gate is green AND the commit from step 4 landed.
+   Print FAIL (and exit 1) if the AC gate was red or the commit could not land.
+   NEVER print PASS on a red AC gate — a lying PASS turns the verifier from a
+   false-failure catcher into a silent-failure shipper. A truthful PASS + a
+   landed commit lets the verifier override incidental transcript noise (grep
+   results containing "Error", a TDD red-test run early in the session, debug
+   Tracebacks) so those do not false-trip a needs_review downgrade.
 A job that exits with uncommitted changes is treated as INCOMPLETE and flagged
 for review. Do NOT add work beyond the acceptance criteria — this protocol is the
@@ -169,6 +187,25 @@ function gitHead(cwd) {
   });
 }
+// Returns true if ≥1 commit landed in cwd between startedAt and finishedAt
+// (with 60s slack). Used by the self-heal pass to derive committedDuringRun
+// from the recorded run window — the live commit-guard uses gitHead() instead.
+// Never throws; git-unavailable → false (no override, job stays as-is).
+function committedInWindow(cwd, startedAt, finishedAt) {
+  return new Promise((resolve) => {
+    if (!cwd || !startedAt) { resolve(false); return; }
+    const until = finishedAt
+      ? new Date(Date.parse(finishedAt) + 60_000).toISOString()
+      : new Date().toISOString();
+    execFile(
+      'git',
+      ['-C', cwd, 'log', '--format=%H', `--since=${startedAt}`, `--until=${until}`],
+      { timeout: 10_000, windowsHide: true },
+      (err, stdout) => { resolve(!err && String(stdout || '').trim().length > 0); },
+    );
+  });
+}
 const ROOT = path.join(os.homedir(), '.claude', 'session-manager', 'scheduled-plans');
 const PRDS_DIR = path.join(ROOT, 'prds');
 const RUNS_DIR = path.join(ROOT, 'runs');
@@ -1224,6 +1261,12 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
     // Called outside mutate() so the queue lock is not held during I/O.
     let verifyResult = null;
     if (res.exitCode === 0 && !res.rateLimited) {
+      // Detect whether the job self-committed by comparing HEAD before/after.
+      // Used by the sentinel override: SCHEDULER_VERDICT: PASS + a landed
+      // commit together override incidental transcript noise verdicts.
+      const headAtExit = await gitHead(guardCwd);
+      const committedDuringRun = !!(guardHeadBefore && headAtExit && guardHeadBefore !== headAtExit);
       const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
       const stateForDeps = await readQueue();
       verifyResult = await verifyRun({
@@ -1231,6 +1274,7 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
         prdPath,
         queueEntry: job,
         allJobs: stateForDeps.jobs,
+        committedDuringRun,
       }).catch((e) => ({
         verdict: 'verify_unavailable',
         reason: `verifier threw: ${e?.message ?? String(e)}`,
@@ -1688,14 +1732,30 @@ async function reverifyNeedsReview() {
   const snap = await readQueue();
   const candidates = snap.jobs.filter(isRescanCandidate);
   const healed = [];
+  const leftForReview = [];
   for (const job of candidates) {
     const runDir = path.join(RUNS_DIR, job.runId);
     const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
+    // Derive committedDuringRun from the recorded run window. The live
+    // commit-guard uses gitHead() (before/after HEAD diff); here the run is
+    // already over so we query git log filtered to [startedAt, finishedAt+60s].
+    const committedDuringRun = await committedInWindow(job.cwd, job.startedAt, job.finishedAt);
     let v = null;
     try {
-      v = await verifyRun({ runDir, prdPath, queueEntry: job, allJobs: snap.jobs });
-    } catch { continue; } // unreadable log etc. — leave for human review
-    if (v && v.verdict === 'clean') healed.push(job.slug);
+      v = await verifyRun({
+        runDir,
+        prdPath,
+        queueEntry: job,
+        allJobs: snap.jobs,
+        committedDuringRun,
+        allowPreSentinelHeal: true,
+      });
+    } catch { leftForReview.push({ slug: job.slug, reason: 'verifyRun threw' }); continue; }
+    if (v && v.verdict === 'clean') {
+      healed.push(job.slug);
+    } else {
+      leftForReview.push({ slug: job.slug, reason: v ? `${v.verdict}: ${v.reason}` : 'null verdict' });
+    }
   }
   if (healed.length) {
     const healSet = new Set(healed);
@@ -1711,7 +1771,11 @@ async function reverifyNeedsReview() {
     console.log(`[scheduler] boot reverify: healed ${healed.length} stale needs_review → completed (${healed.join(', ')})`);
     await broadcast();
   }
-  return { rescanned: candidates.length, healed };
+  if (leftForReview.length) {
+    const detail = leftForReview.map((e) => `${e.slug} (${e.reason})`).join(', ');
+    console.log(`[scheduler] boot reverify: left for review: ${detail}`);
+  }
+  return { rescanned: candidates.length, healed, leftForReview };
 }
 function registerScheduleHandlers() {
@@ -1981,24 +2045,52 @@ async function init() {
   }
   await mutate((state) => {
     for (const j of state.jobs) {
-      if (j.status === 'running') {
-        const pid = j.runtime?.pid;
-        let killNote = '';
-        if (pid) {
-          const result = killOrphanClaudePid(pid);
-          killNote = ` (orphan pid=${pid}: ${result})`;
-          if (result === 'killed') {
-            console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
-          }
+      if (j.status !== 'running') continue;
+      const pid = j.runtime?.pid;
+      let killNote = '';
+      if (pid) {
+        const result = killOrphanClaudePid(pid);
+        killNote = ` (orphan pid=${pid}: ${result})`;
+        if (result === 'killed') {
+          console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
         }
-        const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
-        const success = outcome === 'success';
-        j.status = success ? 'completed' : 'failed';
-        j.exitCode = success ? 0 : (j.exitCode ?? 1);
-        j.error = success ? null : `orphaned: app restarted while running${killNote}`;
+      }
+      const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
+      if (outcome === 'success') {
+        // Job finished cleanly before the crash — keep the win.
+        j.status = 'completed';
+        j.exitCode = 0;
+        j.error = null;
         j.finishedAt = new Date().toISOString();
         delete j.runtime;
-        console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → ${j.status}`);
+        console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=success → completed`);
+      } else if (outcome === 'failed') {
+        // The log carries a real failure result event — a genuine failure, keep it.
+        j.status = 'failed';
+        j.exitCode = j.exitCode ?? 1;
+        j.error = `orphaned: app restarted while running${killNote}`;
+        j.finishedAt = new Date().toISOString();
+        delete j.runtime;
+        console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=failed → failed`);
+      } else {
+        // no_result / unknown: the run was interrupted (host died / app restarted)
+        // with NO evidence it failed on its own merits. Punishing the PRD here is
+        // the wrong call — it demands a manual flip and burns an Opus fix-plan on a
+        // job that never actually failed. Re-queue it (bounded) so an app restart
+        // self-recovers. Mirrors the transient-kill auto-retry on the live path.
+        const tries = j.orphanRetries ?? 0;
+        if (tries < ORPHAN_REQUEUE_CAP) {
+          resetJobFields(j, `orphaned: app restarted mid-run, re-queued (attempt ${tries + 1}/${ORPHAN_REQUEUE_CAP})${killNote}`);
+          j.orphanRetries = tries + 1;
+          console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → re-queued (${tries + 1}/${ORPHAN_REQUEUE_CAP})`);
+        } else {
+          j.status = 'failed';
+          j.exitCode = j.exitCode ?? 1;
+          j.error = `orphaned: app restarted while running, exhausted ${ORPHAN_REQUEUE_CAP} re-queue attempts${killNote}`;
+          j.finishedAt = new Date().toISOString();
+          delete j.runtime;
+          console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → failed (orphan retries exhausted)`);
+        }
       }
     }
   });
@@ -2024,7 +2116,17 @@ async function init() {
   // Refresh next-reset every 10 minutes — billing window can shift if usage
   // resets early or the auth token rotates. Tracked so re-init doesn't leak.
   if (rescheduleInterval) clearInterval(rescheduleInterval);
-  rescheduleInterval = setInterval(() => { rescheduleTimer().catch(() => {}); }, 10 * 60_000);
+  rescheduleInterval = setInterval(() => {
+    rescheduleTimer().catch(() => {});
+    // Periodic self-heal: re-run the verifier over stale needs_review jobs so a
+    // job whose work actually landed (committed in-window, no FAIL sentinel)
+    // auto-clears WITHOUT waiting for the next app restart. Cheap-guarded — the
+    // log scan only runs when something is actually flagged.
+    const s = readQueueSync();
+    if (s.jobs.some((j) => j.status === 'needs_review')) {
+      reverifyNeedsReview().catch(() => {});
+    }
+  }, 10 * 60_000);
   // Self-rescheduling poll loop with exponential backoff. Replaces the
   // old fixed-interval pollTimer + initialPollTimeout.