claude-code-session-manager 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.html CHANGED
@@ -7,10 +7,10 @@
7
7
  <link rel="preconnect" href="https://fonts.googleapis.com">
8
8
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
9
  <link href="https://fonts.googleapis.com/css2?family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,500;0,6..72,600;0,6..72,700;1,6..72,400&family=Geist:wght@300;400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap" rel="stylesheet">
10
- <script type="module" crossorigin src="./assets/index-DqCaosIl.js"></script>
10
+ <script type="module" crossorigin src="./assets/index-H0IXEKiC.js"></script>
11
11
  <link rel="modulepreload" crossorigin href="./assets/monaco-editor-BW5C4Iv1.js">
12
12
  <link rel="stylesheet" crossorigin href="./assets/monaco-editor-BTnBOi8r.css">
13
- <link rel="stylesheet" crossorigin href="./assets/index-Dq3KViBt.css">
13
+ <link rel="stylesheet" crossorigin href="./assets/index-Cu9X6oyA.css">
14
14
  </head>
15
15
  <body class="bg-bg text-fg font-sans antialiased">
16
16
  <div id="root"></div>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-session-manager",
3
- "version": "0.23.0",
3
+ "version": "0.25.0",
4
4
  "description": "Local cockpit for the Claude Code CLI — multi-tab terminal, full config surface, scheduler, voice dictation, and live observability.",
5
5
  "type": "module",
6
6
  "main": "src/main/index.cjs",
@@ -63,8 +63,7 @@
63
63
  "url": "https://github.com/StanislavBG/claude-code-session-manager/issues"
64
64
  },
65
65
  "publishConfig": {
66
- "access": "public",
67
- "provenance": true
66
+ "access": "public"
68
67
  },
69
68
  "engines": {
70
69
  "node": ">=18"
@@ -584,3 +584,279 @@ test('isHarnessToolError detects wrapper and "No such tool available"', () => {
584
584
  assert.equal(isHarnessToolError('ModuleNotFoundError: No module named x'), false);
585
585
  assert.equal(isHarnessToolError(''), false);
586
586
  });
587
+
588
+ // ─── SCHEDULER_VERDICT sentinel override tests ────────────────────────────────
589
+
590
+ /** Build a log where a tool_result contains a Traceback+KeyError and the
591
+ * result event optionally contains the sentinel line. */
592
+ function tracebackRunEvents(sentinelLine) {
593
+ const resultText = sentinelLine
594
+ ? `All work done.\n${sentinelLine}`
595
+ : 'All work done.';
596
+ return [
597
+ {
598
+ type: 'assistant',
599
+ message: {
600
+ role: 'assistant',
601
+ content: [{
602
+ type: 'tool_use',
603
+ id: 'toolu_tv_001',
604
+ name: 'Bash',
605
+ input: { command: 'pytest', description: 'Run acceptance tests' },
606
+ }],
607
+ },
608
+ },
609
+ {
610
+ type: 'user',
611
+ message: {
612
+ role: 'user',
613
+ content: [{
614
+ type: 'tool_result',
615
+ tool_use_id: 'toolu_tv_001',
616
+ content: [
617
+ '=== TDD red phase ===',
618
+ 'Traceback (most recent call last):',
619
+ ' File "test_foo.py", line 5, in test_bar',
620
+ "KeyError: 'missing_key'",
621
+ ].join('\n'),
622
+ is_error: false,
623
+ }],
624
+ },
625
+ },
626
+ { type: 'result', subtype: 'success', result: resultText },
627
+ ];
628
+ }
629
+
630
+ // (a) sentinel PASS + committedDuringRun:true + Traceback+Error → clean
631
+ test('sentinel PASS + committedDuringRun:true + Traceback → clean (override)', async () => {
632
+ const tmp = makeTmpDir();
633
+ try {
634
+ const slug = '77-sentinel-override-pass';
635
+ writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: PASS'));
636
+ const prdPath = writePrd(tmp, slug, '# Sentinel override test');
637
+ const verdict = await verifyRun({
638
+ runDir: tmp,
639
+ prdPath,
640
+ queueEntry: { slug, status: 'running' },
641
+ allJobs: [],
642
+ committedDuringRun: true,
643
+ });
644
+ assert.equal(verdict.verdict, 'clean', `expected clean, got ${verdict.verdict}: ${verdict.reason}`);
645
+ assert.equal(verdict.downgradeTo, null);
646
+ assert.ok(verdict.reason.includes('SCHEDULER_VERDICT: PASS'), `reason should mention sentinel: ${verdict.reason}`);
647
+ // Sidecar should record the sentinel and override
648
+ const sidecar = JSON.parse(fs.readFileSync(path.join(tmp, `${slug}.verdicts.json`), 'utf8'));
649
+ assert.equal(sidecar.sentinel, 'pass');
650
+ assert.ok(sidecar.sentinelOverride, 'sidecar should record sentinelOverride');
651
+ } finally {
652
+ rmdir(tmp);
653
+ }
654
+ });
655
+
656
+ // (b) no sentinel + Traceback+Error → transcript_errors (unchanged baseline)
657
+ test('no sentinel + Traceback+Error + committedDuringRun:true → transcript_errors (no override)', async () => {
658
+ const tmp = makeTmpDir();
659
+ try {
660
+ const slug = '77-no-sentinel-baseline';
661
+ writeLog(tmp, slug, tracebackRunEvents(null));
662
+ const prdPath = writePrd(tmp, slug, '# No sentinel baseline');
663
+ const verdict = await verifyRun({
664
+ runDir: tmp,
665
+ prdPath,
666
+ queueEntry: { slug, status: 'running' },
667
+ allJobs: [],
668
+ committedDuringRun: true,
669
+ });
670
+ assert.equal(verdict.verdict, 'transcript_errors', `expected transcript_errors without sentinel, got ${verdict.verdict}: ${verdict.reason}`);
671
+ assert.equal(verdict.downgradeTo, 'needs_review');
672
+ } finally {
673
+ rmdir(tmp);
674
+ }
675
+ });
676
+
677
+ // (c) sentinel PASS + committedDuringRun:false → stays transcript_errors (commit not confirmed)
678
+ test('sentinel PASS + committedDuringRun:false → transcript_errors (commit unconfirmed)', async () => {
679
+ const tmp = makeTmpDir();
680
+ try {
681
+ const slug = '77-sentinel-no-commit';
682
+ writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: PASS'));
683
+ const prdPath = writePrd(tmp, slug, '# Sentinel without commit');
684
+ const verdict = await verifyRun({
685
+ runDir: tmp,
686
+ prdPath,
687
+ queueEntry: { slug, status: 'running' },
688
+ allJobs: [],
689
+ committedDuringRun: false,
690
+ });
691
+ assert.equal(verdict.verdict, 'transcript_errors', `PASS without commit must not override, got ${verdict.verdict}: ${verdict.reason}`);
692
+ assert.equal(verdict.downgradeTo, 'needs_review');
693
+ } finally {
694
+ rmdir(tmp);
695
+ }
696
+ });
697
+
698
+ // (d) sentinel FAIL → never clean (even with committedDuringRun:true)
699
+ test('sentinel FAIL + committedDuringRun:true → transcript_errors (FAIL never overrides)', async () => {
700
+ const tmp = makeTmpDir();
701
+ try {
702
+ const slug = '77-sentinel-fail';
703
+ writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: FAIL AC gate was red'));
704
+ const prdPath = writePrd(tmp, slug, '# Sentinel FAIL');
705
+ const verdict = await verifyRun({
706
+ runDir: tmp,
707
+ prdPath,
708
+ queueEntry: { slug, status: 'running' },
709
+ allJobs: [],
710
+ committedDuringRun: true,
711
+ });
712
+ assert.equal(verdict.verdict, 'transcript_errors', `FAIL sentinel must not override to clean, got ${verdict.verdict}: ${verdict.reason}`);
713
+ assert.equal(verdict.downgradeTo, 'needs_review');
714
+ } finally {
715
+ rmdir(tmp);
716
+ }
717
+ });
718
+
719
+ // ─── pre-sentinel heal (allowPreSentinelHeal) ─────────────────────────────────
720
+
721
+ // (f) allowPreSentinelHeal=true + committed + no sentinel → clean
722
+ test('pre-sentinel heal: committed + no sentinel + allowPreSentinelHeal → clean', async () => {
723
+ const tmp = makeTmpDir();
724
+ try {
725
+ const slug = '86-pre-sentinel-heal-pass';
726
+ writeLog(tmp, slug, tracebackRunEvents(null)); // no SCHEDULER_VERDICT line
727
+ const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal');
728
+ const verdict = await verifyRun({
729
+ runDir: tmp,
730
+ prdPath,
731
+ queueEntry: { slug, status: 'needs_review' },
732
+ allJobs: [],
733
+ committedDuringRun: true,
734
+ allowPreSentinelHeal: true,
735
+ });
736
+ assert.equal(verdict.verdict, 'clean', `expected clean via pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
737
+ assert.equal(verdict.downgradeTo, null);
738
+ assert.ok(verdict.reason.includes('pre-sentinel heal'), `reason should mention pre-sentinel heal: ${verdict.reason}`);
739
+ const sidecar = JSON.parse(fs.readFileSync(path.join(tmp, `${slug}.verdicts.json`), 'utf8'));
740
+ assert.ok(sidecar.preSentinelHeal, 'sidecar should record preSentinelHeal');
741
+ } finally {
742
+ rmdir(tmp);
743
+ }
744
+ });
745
+
746
+ // (g) allowPreSentinelHeal=true + committed + FAIL sentinel → stays transcript_errors
747
+ test('pre-sentinel heal: committed + SCHEDULER_VERDICT: FAIL + allowPreSentinelHeal → transcript_errors', async () => {
748
+ const tmp = makeTmpDir();
749
+ try {
750
+ const slug = '86-pre-sentinel-heal-fail';
751
+ writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: FAIL AC gate was red'));
752
+ const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal blocked by FAIL');
753
+ const verdict = await verifyRun({
754
+ runDir: tmp,
755
+ prdPath,
756
+ queueEntry: { slug, status: 'needs_review' },
757
+ allJobs: [],
758
+ committedDuringRun: true,
759
+ allowPreSentinelHeal: true,
760
+ });
761
+ assert.equal(verdict.verdict, 'transcript_errors', `FAIL sentinel must block pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
762
+ assert.equal(verdict.downgradeTo, 'needs_review');
763
+ } finally {
764
+ rmdir(tmp);
765
+ }
766
+ });
767
+
768
+ // (h) allowPreSentinelHeal=true + NOT committed + no sentinel → stays transcript_errors
769
+ test('pre-sentinel heal: not committed + no sentinel + allowPreSentinelHeal → transcript_errors', async () => {
770
+ const tmp = makeTmpDir();
771
+ try {
772
+ const slug = '86-pre-sentinel-heal-uncommitted';
773
+ writeLog(tmp, slug, tracebackRunEvents(null));
774
+ const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal blocked by no commit');
775
+ const verdict = await verifyRun({
776
+ runDir: tmp,
777
+ prdPath,
778
+ queueEntry: { slug, status: 'needs_review' },
779
+ allJobs: [],
780
+ committedDuringRun: false,
781
+ allowPreSentinelHeal: true,
782
+ });
783
+ assert.equal(verdict.verdict, 'transcript_errors', `no commit must block pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
784
+ assert.equal(verdict.downgradeTo, 'needs_review');
785
+ } finally {
786
+ rmdir(tmp);
787
+ }
788
+ });
789
+
790
+ // (i) allowPreSentinelHeal=false (default) + committed + no sentinel → baseline unchanged
791
+ test('no sentinel + committed + allowPreSentinelHeal=false (default) → transcript_errors', async () => {
792
+ const tmp = makeTmpDir();
793
+ try {
794
+ const slug = '86-pre-sentinel-heal-disabled';
795
+ writeLog(tmp, slug, tracebackRunEvents(null));
796
+ const prdPath = writePrd(tmp, slug, '# allowPreSentinelHeal disabled by default');
797
+ const verdict = await verifyRun({
798
+ runDir: tmp,
799
+ prdPath,
800
+ queueEntry: { slug, status: 'needs_review' },
801
+ allJobs: [],
802
+ committedDuringRun: true,
803
+ // allowPreSentinelHeal defaults to false
804
+ });
805
+ assert.equal(verdict.verdict, 'transcript_errors', `must not heal without allowPreSentinelHeal, got ${verdict.verdict}: ${verdict.reason}`);
806
+ assert.equal(verdict.downgradeTo, 'needs_review');
807
+ } finally {
808
+ rmdir(tmp);
809
+ }
810
+ });
811
+
812
+ // (e) halt + sentinel PASS → still halt (override must not apply to halt)
813
+ test('halt result + sentinel PASS + committedDuringRun:true → still halt', async () => {
814
+ const tmp = makeTmpDir();
815
+ try {
816
+ const slug = '77-halt-sentinel-pass';
817
+ const logEvents = [
818
+ {
819
+ type: 'assistant',
820
+ message: {
821
+ role: 'assistant',
822
+ content: [{
823
+ type: 'tool_use',
824
+ id: 'toolu_halt_001',
825
+ name: 'Bash',
826
+ input: { command: 'check deps', description: 'Check prerequisites' },
827
+ }],
828
+ },
829
+ },
830
+ {
831
+ type: 'user',
832
+ message: {
833
+ role: 'user',
834
+ content: [{
835
+ type: 'tool_result',
836
+ tool_use_id: 'toolu_halt_001',
837
+ content: 'dep not ready',
838
+ is_error: false,
839
+ }],
840
+ },
841
+ },
842
+ {
843
+ type: 'result',
844
+ subtype: 'success',
845
+ result: 'HALT: prerequisite not met\nSCHEDULER_VERDICT: PASS',
846
+ },
847
+ ];
848
+ writeLog(tmp, slug, logEvents);
849
+ const prdPath = writePrd(tmp, slug, '# Halt with sentinel');
850
+ const verdict = await verifyRun({
851
+ runDir: tmp,
852
+ prdPath,
853
+ queueEntry: { slug, status: 'running' },
854
+ allJobs: [],
855
+ committedDuringRun: true,
856
+ });
857
+ assert.equal(verdict.verdict, 'halt', `halt must survive even with PASS sentinel, got ${verdict.verdict}: ${verdict.reason}`);
858
+ assert.equal(verdict.downgradeTo, 'pending');
859
+ } finally {
860
+ rmdir(tmp);
861
+ }
862
+ });
@@ -414,6 +414,37 @@ function checkDeps(queueEntry, allJobs, prdBody) {
414
414
  return { ok: true };
415
415
  }
416
416
 
417
+ // ─── sentinel scanner ─────────────────────────────────────────────────────────
418
+
419
+ /**
420
+ * Scan for a `SCHEDULER_VERDICT: PASS|FAIL` sentinel line in the run output.
421
+ *
422
+ * Checks `resultEvent.resultText` first (the agent's final message), then the
423
+ * last tool_result content. Anchored to line-start so prose mentioning the
424
+ * string in mid-sentence does not match.
425
+ *
426
+ * Returns 'pass', 'fail', or null.
427
+ */
428
+ function scanSentinel(resultEvent, events) {
429
+ const RE = /^SCHEDULER_VERDICT:\s*(PASS|FAIL)\b/m;
430
+
431
+ if (resultEvent) {
432
+ const m = resultEvent.resultText.match(RE);
433
+ if (m) return m[1].toLowerCase();
434
+ }
435
+
436
+ let lastToolResult = null;
437
+ for (const ev of events) {
438
+ if (ev.kind === 'tool_result') lastToolResult = ev;
439
+ }
440
+ if (lastToolResult && lastToolResult.content) {
441
+ const m = lastToolResult.content.match(RE);
442
+ if (m) return m[1].toLowerCase();
443
+ }
444
+
445
+ return null;
446
+ }
447
+
417
448
  // ─── main verifier ────────────────────────────────────────────────────────────
418
449
 
419
450
  /**
@@ -422,13 +453,23 @@ function checkDeps(queueEntry, allJobs, prdBody) {
422
453
  * escalate to 'needs_review'.
423
454
  *
424
455
  * @param {object} params
425
- * @param {string} params.runDir Absolute path to the run directory.
426
- * @param {string} params.prdPath Absolute path to the PRD .md file.
427
- * @param {object} params.queueEntry The queue.json entry for this job.
428
- * @param {object[]} [params.allJobs] All entries from queue.json (dep checks).
456
+ * @param {string} params.runDir Absolute path to the run directory.
457
+ * @param {string} params.prdPath Absolute path to the PRD .md file.
458
+ * @param {object} params.queueEntry The queue.json entry for this job.
459
+ * @param {object[]} [params.allJobs] All entries from queue.json (dep checks).
460
+ * @param {boolean} [params.committedDuringRun] True when HEAD moved during the run,
461
+ * confirming the job's commit landed.
462
+ * Default false for back-compat.
463
+ * @param {boolean} [params.allowPreSentinelHeal] When true, a commit-in-window
464
+ * with no SCHEDULER_VERDICT: FAIL is
465
+ * sufficient to override weak verdicts
466
+ * (transcript_errors/verify_unavailable)
467
+ * even without a PASS sentinel. Only
468
+ * set by the boot reverify self-heal
469
+ * pass for pre-sentinel legacy runs.
429
470
  * @returns {Promise<{verdict:string, reason:string, downgradeTo:string|null}>}
430
471
  */
431
- async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
472
+ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [], committedDuringRun = false, allowPreSentinelHeal = false }) {
432
473
  const { slug } = queueEntry;
433
474
  const logPath = path.join(runDir, `${slug}.log`);
434
475
  const verdictsPath = path.join(runDir, `${slug}.verdicts.json`);
@@ -568,7 +609,12 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
568
609
  }
569
610
  }
570
611
 
571
- const extras = annotations.length ? { annotations } : undefined;
612
+ // Scan for the SCHEDULER_VERDICT sentinel emitted by the finish protocol.
613
+ const sentinel = scanSentinel(resultEvent, events);
614
+ const sentinelFields = sentinel ? { sentinel } : {};
615
+ const extras = (annotations.length || sentinel)
616
+ ? { ...(annotations.length ? { annotations } : {}), ...sentinelFields }
617
+ : undefined;
572
618
 
573
619
  if (issues.length === 0) {
574
620
  const reason = annotations.length
@@ -580,6 +626,42 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
580
626
  // Pick highest-priority issue (transcript_errors > verify_unavailable).
581
627
  issues.sort((a, b) => b.priority - a.priority);
582
628
  const top = issues[0];
629
+
630
+ // Sentinel override: SCHEDULER_VERDICT: PASS + a commit that landed during
631
+ // the run is authoritative evidence the job succeeded. Suppresses incidental
632
+ // transcript noise (grep results with "Error", TDD red-phase reproductions,
633
+ // Traceback in debug output) for the two weakest verdict classes.
634
+ // MUST NOT apply to halt or deps_unmet — those keep their existing semantics.
635
+ if (
636
+ sentinel === 'pass'
637
+ && committedDuringRun
638
+ && (top.verdict === 'transcript_errors' || top.verdict === 'verify_unavailable')
639
+ ) {
640
+ return conclude('clean',
641
+ `SCHEDULER_VERDICT: PASS + commit landed overrides ${top.verdict}`,
642
+ null,
643
+ { ...(annotations.length ? { annotations } : {}), sentinel, sentinelOverride: top.verdict },
644
+ );
645
+ }
646
+
647
+ // Pre-sentinel heal: job predates SCHEDULER_VERDICT emission. A commit in
648
+ // the run window with no explicit FAIL sentinel is weak but sufficient to
649
+ // override the two weakest verdict classes during the self-heal pass.
650
+ // Only applies when the caller opts in (allowPreSentinelHeal=true) — live
651
+ // runs never set this, so only the boot reverify self-heal uses it.
652
+ if (
653
+ allowPreSentinelHeal
654
+ && committedDuringRun
655
+ && sentinel !== 'fail'
656
+ && (top.verdict === 'transcript_errors' || top.verdict === 'verify_unavailable')
657
+ ) {
658
+ return conclude('clean',
659
+ `pre-sentinel heal: committed in run window, no SCHEDULER_VERDICT: FAIL, overrides ${top.verdict}`,
660
+ null,
661
+ { ...(annotations.length ? { annotations } : {}), preSentinelHeal: top.verdict },
662
+ );
663
+ }
664
+
583
665
  return conclude(top.verdict, top.reason, 'needs_review', extras);
584
666
 
585
667
  } catch (e) {
@@ -601,4 +683,5 @@ module.exports = {
601
683
  parsePrdBodyDepFragments,
602
684
  checkDeps,
603
685
  parseLog,
686
+ scanSentinel,
604
687
  };
@@ -91,6 +91,13 @@ const RESULT_TAIL_BYTES = 8 * 1024;
91
91
  const IDLE_OUTPUT_KILL_MS = 20 * 60_000;
92
92
  const IDLE_CHECK_INTERVAL_MS = 60_000;
93
93
 
94
+ // Boot reconciliation: a job left 'running' by an app restart/crash whose log
95
+ // shows neither success nor a real failure result was merely interrupted — the
96
+ // host died, the PRD didn't. Re-queue it up to this many times before giving up
97
+ // and marking it failed, so a restart self-recovers instead of needing a manual
98
+ // flip + a wasted fix-plan investigation.
99
+ const ORPHAN_REQUEUE_CAP = 2;
100
+
94
101
  // Appended to every scheduled job prompt so the queue can be RELIED ON to finish
95
102
  // work to a consistent bar: review → security-review → verify → commit. Enforced
96
103
  // centrally here (not per-PRD) so it applies to every current and future PRD.
@@ -116,6 +123,17 @@ sequence. Do not stop before the commit lands; committing is part of the job.
116
123
  pass. Do not assume npm; use whatever the target project uses.
117
124
  4. COMMIT — stage and commit ALL changes with a clear conventional message:
118
125
  \`git add -A && git commit -m "<type>(<scope>): <summary>"\`.
126
+ 5. VERDICT SENTINEL — as the LAST LINE of your final result text, emit exactly
127
+ one of these two lines (no trailing text after it):
128
+ SCHEDULER_VERDICT: PASS
129
+ SCHEDULER_VERDICT: FAIL <one-line reason>
130
+ Print PASS only when the AC gate is green AND the commit from step 4 landed.
131
+ Print FAIL (and exit 1) if the AC gate was red or the commit could not land.
132
+ NEVER print PASS on a red AC gate — a lying PASS turns the verifier from a
133
+ false-failure catcher into a silent-failure shipper. A truthful PASS + a
134
+ landed commit lets the verifier override incidental transcript noise (grep
135
+ results containing "Error", a TDD red-test run early in the session, debug
136
+ Tracebacks) so those do not false-trip a needs_review downgrade.
119
137
 
120
138
  A job that exits with uncommitted changes is treated as INCOMPLETE and flagged
121
139
  for review. Do NOT add work beyond the acceptance criteria — this protocol is the
@@ -169,6 +187,25 @@ function gitHead(cwd) {
169
187
  });
170
188
  }
171
189
 
190
+ // Returns true if ≥1 commit landed in cwd between startedAt and finishedAt
191
+ // (with 60s slack). Used by the self-heal pass to derive committedDuringRun
192
+ // from the recorded run window — the live commit-guard uses gitHead() instead.
193
+ // Never throws; git-unavailable → false (no override, job stays as-is).
194
+ function committedInWindow(cwd, startedAt, finishedAt) {
195
+ return new Promise((resolve) => {
196
+ if (!cwd || !startedAt) { resolve(false); return; }
197
+ const until = finishedAt
198
+ ? new Date(Date.parse(finishedAt) + 60_000).toISOString()
199
+ : new Date().toISOString();
200
+ execFile(
201
+ 'git',
202
+ ['-C', cwd, 'log', '--format=%H', `--since=${startedAt}`, `--until=${until}`],
203
+ { timeout: 10_000, windowsHide: true },
204
+ (err, stdout) => { resolve(!err && String(stdout || '').trim().length > 0); },
205
+ );
206
+ });
207
+ }
208
+
172
209
  const ROOT = path.join(os.homedir(), '.claude', 'session-manager', 'scheduled-plans');
173
210
  const PRDS_DIR = path.join(ROOT, 'prds');
174
211
  const RUNS_DIR = path.join(ROOT, 'runs');
@@ -1224,6 +1261,12 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1224
1261
  // Called outside mutate() so the queue lock is not held during I/O.
1225
1262
  let verifyResult = null;
1226
1263
  if (res.exitCode === 0 && !res.rateLimited) {
1264
+ // Detect whether the job self-committed by comparing HEAD before/after.
1265
+ // Used by the sentinel override: SCHEDULER_VERDICT: PASS + a landed
1266
+ // commit together override incidental transcript noise verdicts.
1267
+ const headAtExit = await gitHead(guardCwd);
1268
+ const committedDuringRun = !!(guardHeadBefore && headAtExit && guardHeadBefore !== headAtExit);
1269
+
1227
1270
  const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
1228
1271
  const stateForDeps = await readQueue();
1229
1272
  verifyResult = await verifyRun({
@@ -1231,6 +1274,7 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1231
1274
  prdPath,
1232
1275
  queueEntry: job,
1233
1276
  allJobs: stateForDeps.jobs,
1277
+ committedDuringRun,
1234
1278
  }).catch((e) => ({
1235
1279
  verdict: 'verify_unavailable',
1236
1280
  reason: `verifier threw: ${e?.message ?? String(e)}`,
@@ -1688,14 +1732,30 @@ async function reverifyNeedsReview() {
1688
1732
  const snap = await readQueue();
1689
1733
  const candidates = snap.jobs.filter(isRescanCandidate);
1690
1734
  const healed = [];
1735
+ const leftForReview = [];
1691
1736
  for (const job of candidates) {
1692
1737
  const runDir = path.join(RUNS_DIR, job.runId);
1693
1738
  const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
1739
+ // Derive committedDuringRun from the recorded run window. The live
1740
+ // commit-guard uses gitHead() (before/after HEAD diff); here the run is
1741
+ // already over so we query git log filtered to [startedAt, finishedAt+60s].
1742
+ const committedDuringRun = await committedInWindow(job.cwd, job.startedAt, job.finishedAt);
1694
1743
  let v = null;
1695
1744
  try {
1696
- v = await verifyRun({ runDir, prdPath, queueEntry: job, allJobs: snap.jobs });
1697
- } catch { continue; } // unreadable log etc. — leave for human review
1698
- if (v && v.verdict === 'clean') healed.push(job.slug);
1745
+ v = await verifyRun({
1746
+ runDir,
1747
+ prdPath,
1748
+ queueEntry: job,
1749
+ allJobs: snap.jobs,
1750
+ committedDuringRun,
1751
+ allowPreSentinelHeal: true,
1752
+ });
1753
+ } catch { leftForReview.push({ slug: job.slug, reason: 'verifyRun threw' }); continue; }
1754
+ if (v && v.verdict === 'clean') {
1755
+ healed.push(job.slug);
1756
+ } else {
1757
+ leftForReview.push({ slug: job.slug, reason: v ? `${v.verdict}: ${v.reason}` : 'null verdict' });
1758
+ }
1699
1759
  }
1700
1760
  if (healed.length) {
1701
1761
  const healSet = new Set(healed);
@@ -1711,7 +1771,11 @@ async function reverifyNeedsReview() {
1711
1771
  console.log(`[scheduler] boot reverify: healed ${healed.length} stale needs_review → completed (${healed.join(', ')})`);
1712
1772
  await broadcast();
1713
1773
  }
1714
- return { rescanned: candidates.length, healed };
1774
+ if (leftForReview.length) {
1775
+ const detail = leftForReview.map((e) => `${e.slug} (${e.reason})`).join(', ');
1776
+ console.log(`[scheduler] boot reverify: left for review: ${detail}`);
1777
+ }
1778
+ return { rescanned: candidates.length, healed, leftForReview };
1715
1779
  }
1716
1780
 
1717
1781
  function registerScheduleHandlers() {
@@ -1981,24 +2045,52 @@ async function init() {
1981
2045
  }
1982
2046
  await mutate((state) => {
1983
2047
  for (const j of state.jobs) {
1984
- if (j.status === 'running') {
1985
- const pid = j.runtime?.pid;
1986
- let killNote = '';
1987
- if (pid) {
1988
- const result = killOrphanClaudePid(pid);
1989
- killNote = ` (orphan pid=${pid}: ${result})`;
1990
- if (result === 'killed') {
1991
- console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
1992
- }
2048
+ if (j.status !== 'running') continue;
2049
+ const pid = j.runtime?.pid;
2050
+ let killNote = '';
2051
+ if (pid) {
2052
+ const result = killOrphanClaudePid(pid);
2053
+ killNote = ` (orphan pid=${pid}: ${result})`;
2054
+ if (result === 'killed') {
2055
+ console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
1993
2056
  }
1994
- const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
1995
- const success = outcome === 'success';
1996
- j.status = success ? 'completed' : 'failed';
1997
- j.exitCode = success ? 0 : (j.exitCode ?? 1);
1998
- j.error = success ? null : `orphaned: app restarted while running${killNote}`;
2057
+ }
2058
+ const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
2059
+ if (outcome === 'success') {
2060
+ // Job finished cleanly before the crash keep the win.
2061
+ j.status = 'completed';
2062
+ j.exitCode = 0;
2063
+ j.error = null;
1999
2064
  j.finishedAt = new Date().toISOString();
2000
2065
  delete j.runtime;
2001
- console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome}${j.status}`);
2066
+ console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=successcompleted`);
2067
+ } else if (outcome === 'failed') {
2068
+ // The log carries a real failure result event — a genuine failure, keep it.
2069
+ j.status = 'failed';
2070
+ j.exitCode = j.exitCode ?? 1;
2071
+ j.error = `orphaned: app restarted while running${killNote}`;
2072
+ j.finishedAt = new Date().toISOString();
2073
+ delete j.runtime;
2074
+ console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=failed → failed`);
2075
+ } else {
2076
+ // no_result / unknown: the run was interrupted (host died / app restarted)
2077
+ // with NO evidence it failed on its own merits. Punishing the PRD here is
2078
+ // the wrong call — it demands a manual flip and burns an Opus fix-plan on a
2079
+ // job that never actually failed. Re-queue it (bounded) so an app restart
2080
+ // self-recovers. Mirrors the transient-kill auto-retry on the live path.
2081
+ const tries = j.orphanRetries ?? 0;
2082
+ if (tries < ORPHAN_REQUEUE_CAP) {
2083
+ resetJobFields(j, `orphaned: app restarted mid-run, re-queued (attempt ${tries + 1}/${ORPHAN_REQUEUE_CAP})${killNote}`);
2084
+ j.orphanRetries = tries + 1;
2085
+ console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → re-queued (${tries + 1}/${ORPHAN_REQUEUE_CAP})`);
2086
+ } else {
2087
+ j.status = 'failed';
2088
+ j.exitCode = j.exitCode ?? 1;
2089
+ j.error = `orphaned: app restarted while running, exhausted ${ORPHAN_REQUEUE_CAP} re-queue attempts${killNote}`;
2090
+ j.finishedAt = new Date().toISOString();
2091
+ delete j.runtime;
2092
+ console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → failed (orphan retries exhausted)`);
2093
+ }
2002
2094
  }
2003
2095
  }
2004
2096
  });
@@ -2024,7 +2116,17 @@ async function init() {
2024
2116
  // Refresh next-reset every 10 minutes — billing window can shift if usage
2025
2117
  // resets early or the auth token rotates. Tracked so re-init doesn't leak.
2026
2118
  if (rescheduleInterval) clearInterval(rescheduleInterval);
2027
- rescheduleInterval = setInterval(() => { rescheduleTimer().catch(() => {}); }, 10 * 60_000);
2119
+ rescheduleInterval = setInterval(() => {
2120
+ rescheduleTimer().catch(() => {});
2121
+ // Periodic self-heal: re-run the verifier over stale needs_review jobs so a
2122
+ // job whose work actually landed (committed in-window, no FAIL sentinel)
2123
+ // auto-clears WITHOUT waiting for the next app restart. Cheap-guarded — the
2124
+ // log scan only runs when something is actually flagged.
2125
+ const s = readQueueSync();
2126
+ if (s.jobs.some((j) => j.status === 'needs_review')) {
2127
+ reverifyNeedsReview().catch(() => {});
2128
+ }
2129
+ }, 10 * 60_000);
2028
2130
 
2029
2131
  // Self-rescheduling poll loop with exponential backoff. Replaces the
2030
2132
  // old fixed-interval pollTimer + initialPollTimeout.