claude-code-session-manager 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{TiptapBody-Bj0_gFeB.js → TiptapBody-BIuH7h34.js} +1 -1
- package/dist/assets/index-Cu9X6oyA.css +32 -0
- package/dist/assets/{index-DqCaosIl.js → index-H0IXEKiC.js} +600 -600
- package/dist/index.html +2 -2
- package/package.json +2 -3
- package/src/main/__tests__/runVerify.test.cjs +276 -0
- package/src/main/runVerify.cjs +89 -6
- package/src/main/scheduler.cjs +122 -20
- package/src/main/webRemote.cjs +20 -4
- package/dist/assets/index-Dq3KViBt.css +0 -32
package/dist/index.html
CHANGED
|
@@ -7,10 +7,10 @@
|
|
|
7
7
|
<link rel="preconnect" href="https://fonts.googleapis.com">
|
|
8
8
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
9
9
|
<link href="https://fonts.googleapis.com/css2?family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,500;0,6..72,600;0,6..72,700;1,6..72,400&family=Geist:wght@300;400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap" rel="stylesheet">
|
|
10
|
-
<script type="module" crossorigin src="./assets/index-
|
|
10
|
+
<script type="module" crossorigin src="./assets/index-H0IXEKiC.js"></script>
|
|
11
11
|
<link rel="modulepreload" crossorigin href="./assets/monaco-editor-BW5C4Iv1.js">
|
|
12
12
|
<link rel="stylesheet" crossorigin href="./assets/monaco-editor-BTnBOi8r.css">
|
|
13
|
-
<link rel="stylesheet" crossorigin href="./assets/index-
|
|
13
|
+
<link rel="stylesheet" crossorigin href="./assets/index-Cu9X6oyA.css">
|
|
14
14
|
</head>
|
|
15
15
|
<body class="bg-bg text-fg font-sans antialiased">
|
|
16
16
|
<div id="root"></div>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-session-manager",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "Local cockpit for the Claude Code CLI — multi-tab terminal, full config surface, scheduler, voice dictation, and live observability.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/main/index.cjs",
|
|
@@ -63,8 +63,7 @@
|
|
|
63
63
|
"url": "https://github.com/StanislavBG/claude-code-session-manager/issues"
|
|
64
64
|
},
|
|
65
65
|
"publishConfig": {
|
|
66
|
-
"access": "public"
|
|
67
|
-
"provenance": true
|
|
66
|
+
"access": "public"
|
|
68
67
|
},
|
|
69
68
|
"engines": {
|
|
70
69
|
"node": ">=18"
|
|
@@ -584,3 +584,279 @@ test('isHarnessToolError detects wrapper and "No such tool available"', () => {
|
|
|
584
584
|
assert.equal(isHarnessToolError('ModuleNotFoundError: No module named x'), false);
|
|
585
585
|
assert.equal(isHarnessToolError(''), false);
|
|
586
586
|
});
|
|
587
|
+
|
|
588
|
+
// ─── SCHEDULER_VERDICT sentinel override tests ────────────────────────────────
|
|
589
|
+
|
|
590
|
+
/** Build a log where a tool_result contains a Traceback+KeyError and the
|
|
591
|
+
* result event optionally contains the sentinel line. */
|
|
592
|
+
function tracebackRunEvents(sentinelLine) {
|
|
593
|
+
const resultText = sentinelLine
|
|
594
|
+
? `All work done.\n${sentinelLine}`
|
|
595
|
+
: 'All work done.';
|
|
596
|
+
return [
|
|
597
|
+
{
|
|
598
|
+
type: 'assistant',
|
|
599
|
+
message: {
|
|
600
|
+
role: 'assistant',
|
|
601
|
+
content: [{
|
|
602
|
+
type: 'tool_use',
|
|
603
|
+
id: 'toolu_tv_001',
|
|
604
|
+
name: 'Bash',
|
|
605
|
+
input: { command: 'pytest', description: 'Run acceptance tests' },
|
|
606
|
+
}],
|
|
607
|
+
},
|
|
608
|
+
},
|
|
609
|
+
{
|
|
610
|
+
type: 'user',
|
|
611
|
+
message: {
|
|
612
|
+
role: 'user',
|
|
613
|
+
content: [{
|
|
614
|
+
type: 'tool_result',
|
|
615
|
+
tool_use_id: 'toolu_tv_001',
|
|
616
|
+
content: [
|
|
617
|
+
'=== TDD red phase ===',
|
|
618
|
+
'Traceback (most recent call last):',
|
|
619
|
+
' File "test_foo.py", line 5, in test_bar',
|
|
620
|
+
"KeyError: 'missing_key'",
|
|
621
|
+
].join('\n'),
|
|
622
|
+
is_error: false,
|
|
623
|
+
}],
|
|
624
|
+
},
|
|
625
|
+
},
|
|
626
|
+
{ type: 'result', subtype: 'success', result: resultText },
|
|
627
|
+
];
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// (a) sentinel PASS + committedDuringRun:true + Traceback+Error → clean
|
|
631
|
+
test('sentinel PASS + committedDuringRun:true + Traceback → clean (override)', async () => {
|
|
632
|
+
const tmp = makeTmpDir();
|
|
633
|
+
try {
|
|
634
|
+
const slug = '77-sentinel-override-pass';
|
|
635
|
+
writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: PASS'));
|
|
636
|
+
const prdPath = writePrd(tmp, slug, '# Sentinel override test');
|
|
637
|
+
const verdict = await verifyRun({
|
|
638
|
+
runDir: tmp,
|
|
639
|
+
prdPath,
|
|
640
|
+
queueEntry: { slug, status: 'running' },
|
|
641
|
+
allJobs: [],
|
|
642
|
+
committedDuringRun: true,
|
|
643
|
+
});
|
|
644
|
+
assert.equal(verdict.verdict, 'clean', `expected clean, got ${verdict.verdict}: ${verdict.reason}`);
|
|
645
|
+
assert.equal(verdict.downgradeTo, null);
|
|
646
|
+
assert.ok(verdict.reason.includes('SCHEDULER_VERDICT: PASS'), `reason should mention sentinel: ${verdict.reason}`);
|
|
647
|
+
// Sidecar should record the sentinel and override
|
|
648
|
+
const sidecar = JSON.parse(fs.readFileSync(path.join(tmp, `${slug}.verdicts.json`), 'utf8'));
|
|
649
|
+
assert.equal(sidecar.sentinel, 'pass');
|
|
650
|
+
assert.ok(sidecar.sentinelOverride, 'sidecar should record sentinelOverride');
|
|
651
|
+
} finally {
|
|
652
|
+
rmdir(tmp);
|
|
653
|
+
}
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
// (b) no sentinel + Traceback+Error → transcript_errors (unchanged baseline)
|
|
657
|
+
test('no sentinel + Traceback+Error + committedDuringRun:true → transcript_errors (no override)', async () => {
|
|
658
|
+
const tmp = makeTmpDir();
|
|
659
|
+
try {
|
|
660
|
+
const slug = '77-no-sentinel-baseline';
|
|
661
|
+
writeLog(tmp, slug, tracebackRunEvents(null));
|
|
662
|
+
const prdPath = writePrd(tmp, slug, '# No sentinel baseline');
|
|
663
|
+
const verdict = await verifyRun({
|
|
664
|
+
runDir: tmp,
|
|
665
|
+
prdPath,
|
|
666
|
+
queueEntry: { slug, status: 'running' },
|
|
667
|
+
allJobs: [],
|
|
668
|
+
committedDuringRun: true,
|
|
669
|
+
});
|
|
670
|
+
assert.equal(verdict.verdict, 'transcript_errors', `expected transcript_errors without sentinel, got ${verdict.verdict}: ${verdict.reason}`);
|
|
671
|
+
assert.equal(verdict.downgradeTo, 'needs_review');
|
|
672
|
+
} finally {
|
|
673
|
+
rmdir(tmp);
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
// (c) sentinel PASS + committedDuringRun:false → stays transcript_errors (commit not confirmed)
|
|
678
|
+
test('sentinel PASS + committedDuringRun:false → transcript_errors (commit unconfirmed)', async () => {
|
|
679
|
+
const tmp = makeTmpDir();
|
|
680
|
+
try {
|
|
681
|
+
const slug = '77-sentinel-no-commit';
|
|
682
|
+
writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: PASS'));
|
|
683
|
+
const prdPath = writePrd(tmp, slug, '# Sentinel without commit');
|
|
684
|
+
const verdict = await verifyRun({
|
|
685
|
+
runDir: tmp,
|
|
686
|
+
prdPath,
|
|
687
|
+
queueEntry: { slug, status: 'running' },
|
|
688
|
+
allJobs: [],
|
|
689
|
+
committedDuringRun: false,
|
|
690
|
+
});
|
|
691
|
+
assert.equal(verdict.verdict, 'transcript_errors', `PASS without commit must not override, got ${verdict.verdict}: ${verdict.reason}`);
|
|
692
|
+
assert.equal(verdict.downgradeTo, 'needs_review');
|
|
693
|
+
} finally {
|
|
694
|
+
rmdir(tmp);
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
// (d) sentinel FAIL → never clean (even with committedDuringRun:true)
|
|
699
|
+
test('sentinel FAIL + committedDuringRun:true → transcript_errors (FAIL never overrides)', async () => {
|
|
700
|
+
const tmp = makeTmpDir();
|
|
701
|
+
try {
|
|
702
|
+
const slug = '77-sentinel-fail';
|
|
703
|
+
writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: FAIL AC gate was red'));
|
|
704
|
+
const prdPath = writePrd(tmp, slug, '# Sentinel FAIL');
|
|
705
|
+
const verdict = await verifyRun({
|
|
706
|
+
runDir: tmp,
|
|
707
|
+
prdPath,
|
|
708
|
+
queueEntry: { slug, status: 'running' },
|
|
709
|
+
allJobs: [],
|
|
710
|
+
committedDuringRun: true,
|
|
711
|
+
});
|
|
712
|
+
assert.equal(verdict.verdict, 'transcript_errors', `FAIL sentinel must not override to clean, got ${verdict.verdict}: ${verdict.reason}`);
|
|
713
|
+
assert.equal(verdict.downgradeTo, 'needs_review');
|
|
714
|
+
} finally {
|
|
715
|
+
rmdir(tmp);
|
|
716
|
+
}
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
// ─── pre-sentinel heal (allowPreSentinelHeal) ─────────────────────────────────
|
|
720
|
+
|
|
721
|
+
// (f) allowPreSentinelHeal=true + committed + no sentinel → clean
|
|
722
|
+
test('pre-sentinel heal: committed + no sentinel + allowPreSentinelHeal → clean', async () => {
|
|
723
|
+
const tmp = makeTmpDir();
|
|
724
|
+
try {
|
|
725
|
+
const slug = '86-pre-sentinel-heal-pass';
|
|
726
|
+
writeLog(tmp, slug, tracebackRunEvents(null)); // no SCHEDULER_VERDICT line
|
|
727
|
+
const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal');
|
|
728
|
+
const verdict = await verifyRun({
|
|
729
|
+
runDir: tmp,
|
|
730
|
+
prdPath,
|
|
731
|
+
queueEntry: { slug, status: 'needs_review' },
|
|
732
|
+
allJobs: [],
|
|
733
|
+
committedDuringRun: true,
|
|
734
|
+
allowPreSentinelHeal: true,
|
|
735
|
+
});
|
|
736
|
+
assert.equal(verdict.verdict, 'clean', `expected clean via pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
|
|
737
|
+
assert.equal(verdict.downgradeTo, null);
|
|
738
|
+
assert.ok(verdict.reason.includes('pre-sentinel heal'), `reason should mention pre-sentinel heal: ${verdict.reason}`);
|
|
739
|
+
const sidecar = JSON.parse(fs.readFileSync(path.join(tmp, `${slug}.verdicts.json`), 'utf8'));
|
|
740
|
+
assert.ok(sidecar.preSentinelHeal, 'sidecar should record preSentinelHeal');
|
|
741
|
+
} finally {
|
|
742
|
+
rmdir(tmp);
|
|
743
|
+
}
|
|
744
|
+
});
|
|
745
|
+
|
|
746
|
+
// (g) allowPreSentinelHeal=true + committed + FAIL sentinel → stays transcript_errors
|
|
747
|
+
test('pre-sentinel heal: committed + SCHEDULER_VERDICT: FAIL + allowPreSentinelHeal → transcript_errors', async () => {
|
|
748
|
+
const tmp = makeTmpDir();
|
|
749
|
+
try {
|
|
750
|
+
const slug = '86-pre-sentinel-heal-fail';
|
|
751
|
+
writeLog(tmp, slug, tracebackRunEvents('SCHEDULER_VERDICT: FAIL AC gate was red'));
|
|
752
|
+
const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal blocked by FAIL');
|
|
753
|
+
const verdict = await verifyRun({
|
|
754
|
+
runDir: tmp,
|
|
755
|
+
prdPath,
|
|
756
|
+
queueEntry: { slug, status: 'needs_review' },
|
|
757
|
+
allJobs: [],
|
|
758
|
+
committedDuringRun: true,
|
|
759
|
+
allowPreSentinelHeal: true,
|
|
760
|
+
});
|
|
761
|
+
assert.equal(verdict.verdict, 'transcript_errors', `FAIL sentinel must block pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
|
|
762
|
+
assert.equal(verdict.downgradeTo, 'needs_review');
|
|
763
|
+
} finally {
|
|
764
|
+
rmdir(tmp);
|
|
765
|
+
}
|
|
766
|
+
});
|
|
767
|
+
|
|
768
|
+
// (h) allowPreSentinelHeal=true + NOT committed + no sentinel → stays transcript_errors
|
|
769
|
+
test('pre-sentinel heal: not committed + no sentinel + allowPreSentinelHeal → transcript_errors', async () => {
|
|
770
|
+
const tmp = makeTmpDir();
|
|
771
|
+
try {
|
|
772
|
+
const slug = '86-pre-sentinel-heal-uncommitted';
|
|
773
|
+
writeLog(tmp, slug, tracebackRunEvents(null));
|
|
774
|
+
const prdPath = writePrd(tmp, slug, '# Pre-sentinel heal blocked by no commit');
|
|
775
|
+
const verdict = await verifyRun({
|
|
776
|
+
runDir: tmp,
|
|
777
|
+
prdPath,
|
|
778
|
+
queueEntry: { slug, status: 'needs_review' },
|
|
779
|
+
allJobs: [],
|
|
780
|
+
committedDuringRun: false,
|
|
781
|
+
allowPreSentinelHeal: true,
|
|
782
|
+
});
|
|
783
|
+
assert.equal(verdict.verdict, 'transcript_errors', `no commit must block pre-sentinel heal, got ${verdict.verdict}: ${verdict.reason}`);
|
|
784
|
+
assert.equal(verdict.downgradeTo, 'needs_review');
|
|
785
|
+
} finally {
|
|
786
|
+
rmdir(tmp);
|
|
787
|
+
}
|
|
788
|
+
});
|
|
789
|
+
|
|
790
|
+
// (i) allowPreSentinelHeal=false (default) + committed + no sentinel → baseline unchanged
|
|
791
|
+
test('no sentinel + committed + allowPreSentinelHeal=false (default) → transcript_errors', async () => {
|
|
792
|
+
const tmp = makeTmpDir();
|
|
793
|
+
try {
|
|
794
|
+
const slug = '86-pre-sentinel-heal-disabled';
|
|
795
|
+
writeLog(tmp, slug, tracebackRunEvents(null));
|
|
796
|
+
const prdPath = writePrd(tmp, slug, '# allowPreSentinelHeal disabled by default');
|
|
797
|
+
const verdict = await verifyRun({
|
|
798
|
+
runDir: tmp,
|
|
799
|
+
prdPath,
|
|
800
|
+
queueEntry: { slug, status: 'needs_review' },
|
|
801
|
+
allJobs: [],
|
|
802
|
+
committedDuringRun: true,
|
|
803
|
+
// allowPreSentinelHeal defaults to false
|
|
804
|
+
});
|
|
805
|
+
assert.equal(verdict.verdict, 'transcript_errors', `must not heal without allowPreSentinelHeal, got ${verdict.verdict}: ${verdict.reason}`);
|
|
806
|
+
assert.equal(verdict.downgradeTo, 'needs_review');
|
|
807
|
+
} finally {
|
|
808
|
+
rmdir(tmp);
|
|
809
|
+
}
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
// (e) halt + sentinel PASS → still halt (override must not apply to halt)
|
|
813
|
+
test('halt result + sentinel PASS + committedDuringRun:true → still halt', async () => {
|
|
814
|
+
const tmp = makeTmpDir();
|
|
815
|
+
try {
|
|
816
|
+
const slug = '77-halt-sentinel-pass';
|
|
817
|
+
const logEvents = [
|
|
818
|
+
{
|
|
819
|
+
type: 'assistant',
|
|
820
|
+
message: {
|
|
821
|
+
role: 'assistant',
|
|
822
|
+
content: [{
|
|
823
|
+
type: 'tool_use',
|
|
824
|
+
id: 'toolu_halt_001',
|
|
825
|
+
name: 'Bash',
|
|
826
|
+
input: { command: 'check deps', description: 'Check prerequisites' },
|
|
827
|
+
}],
|
|
828
|
+
},
|
|
829
|
+
},
|
|
830
|
+
{
|
|
831
|
+
type: 'user',
|
|
832
|
+
message: {
|
|
833
|
+
role: 'user',
|
|
834
|
+
content: [{
|
|
835
|
+
type: 'tool_result',
|
|
836
|
+
tool_use_id: 'toolu_halt_001',
|
|
837
|
+
content: 'dep not ready',
|
|
838
|
+
is_error: false,
|
|
839
|
+
}],
|
|
840
|
+
},
|
|
841
|
+
},
|
|
842
|
+
{
|
|
843
|
+
type: 'result',
|
|
844
|
+
subtype: 'success',
|
|
845
|
+
result: 'HALT: prerequisite not met\nSCHEDULER_VERDICT: PASS',
|
|
846
|
+
},
|
|
847
|
+
];
|
|
848
|
+
writeLog(tmp, slug, logEvents);
|
|
849
|
+
const prdPath = writePrd(tmp, slug, '# Halt with sentinel');
|
|
850
|
+
const verdict = await verifyRun({
|
|
851
|
+
runDir: tmp,
|
|
852
|
+
prdPath,
|
|
853
|
+
queueEntry: { slug, status: 'running' },
|
|
854
|
+
allJobs: [],
|
|
855
|
+
committedDuringRun: true,
|
|
856
|
+
});
|
|
857
|
+
assert.equal(verdict.verdict, 'halt', `halt must survive even with PASS sentinel, got ${verdict.verdict}: ${verdict.reason}`);
|
|
858
|
+
assert.equal(verdict.downgradeTo, 'pending');
|
|
859
|
+
} finally {
|
|
860
|
+
rmdir(tmp);
|
|
861
|
+
}
|
|
862
|
+
});
|
package/src/main/runVerify.cjs
CHANGED
|
@@ -414,6 +414,37 @@ function checkDeps(queueEntry, allJobs, prdBody) {
|
|
|
414
414
|
return { ok: true };
|
|
415
415
|
}
|
|
416
416
|
|
|
417
|
+
// ─── sentinel scanner ─────────────────────────────────────────────────────────
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Scan for a `SCHEDULER_VERDICT: PASS|FAIL` sentinel line in the run output.
|
|
421
|
+
*
|
|
422
|
+
* Checks `resultEvent.resultText` first (the agent's final message), then the
|
|
423
|
+
* last tool_result content. Anchored to line-start so prose mentioning the
|
|
424
|
+
* string in mid-sentence does not match.
|
|
425
|
+
*
|
|
426
|
+
* Returns 'pass', 'fail', or null.
|
|
427
|
+
*/
|
|
428
|
+
function scanSentinel(resultEvent, events) {
|
|
429
|
+
const RE = /^SCHEDULER_VERDICT:\s*(PASS|FAIL)\b/m;
|
|
430
|
+
|
|
431
|
+
if (resultEvent) {
|
|
432
|
+
const m = resultEvent.resultText.match(RE);
|
|
433
|
+
if (m) return m[1].toLowerCase();
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
let lastToolResult = null;
|
|
437
|
+
for (const ev of events) {
|
|
438
|
+
if (ev.kind === 'tool_result') lastToolResult = ev;
|
|
439
|
+
}
|
|
440
|
+
if (lastToolResult && lastToolResult.content) {
|
|
441
|
+
const m = lastToolResult.content.match(RE);
|
|
442
|
+
if (m) return m[1].toLowerCase();
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return null;
|
|
446
|
+
}
|
|
447
|
+
|
|
417
448
|
// ─── main verifier ────────────────────────────────────────────────────────────
|
|
418
449
|
|
|
419
450
|
/**
|
|
@@ -422,13 +453,23 @@ function checkDeps(queueEntry, allJobs, prdBody) {
|
|
|
422
453
|
* escalate to 'needs_review'.
|
|
423
454
|
*
|
|
424
455
|
* @param {object} params
|
|
425
|
-
* @param {string} params.runDir
|
|
426
|
-
* @param {string} params.prdPath
|
|
427
|
-
* @param {object} params.queueEntry
|
|
428
|
-
* @param {object[]} [params.allJobs]
|
|
456
|
+
* @param {string} params.runDir Absolute path to the run directory.
|
|
457
|
+
* @param {string} params.prdPath Absolute path to the PRD .md file.
|
|
458
|
+
* @param {object} params.queueEntry The queue.json entry for this job.
|
|
459
|
+
* @param {object[]} [params.allJobs] All entries from queue.json (dep checks).
|
|
460
|
+
* @param {boolean} [params.committedDuringRun] True when HEAD moved during the run,
|
|
461
|
+
* confirming the job's commit landed.
|
|
462
|
+
* Default false for back-compat.
|
|
463
|
+
* @param {boolean} [params.allowPreSentinelHeal] When true, a commit-in-window
|
|
464
|
+
* with no SCHEDULER_VERDICT: FAIL is
|
|
465
|
+
* sufficient to override weak verdicts
|
|
466
|
+
* (transcript_errors/verify_unavailable)
|
|
467
|
+
* even without a PASS sentinel. Only
|
|
468
|
+
* set by the boot reverify self-heal
|
|
469
|
+
* pass for pre-sentinel legacy runs.
|
|
429
470
|
* @returns {Promise<{verdict:string, reason:string, downgradeTo:string|null}>}
|
|
430
471
|
*/
|
|
431
|
-
async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
|
|
472
|
+
async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [], committedDuringRun = false, allowPreSentinelHeal = false }) {
|
|
432
473
|
const { slug } = queueEntry;
|
|
433
474
|
const logPath = path.join(runDir, `${slug}.log`);
|
|
434
475
|
const verdictsPath = path.join(runDir, `${slug}.verdicts.json`);
|
|
@@ -568,7 +609,12 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
|
|
|
568
609
|
}
|
|
569
610
|
}
|
|
570
611
|
|
|
571
|
-
|
|
612
|
+
// Scan for the SCHEDULER_VERDICT sentinel emitted by the finish protocol.
|
|
613
|
+
const sentinel = scanSentinel(resultEvent, events);
|
|
614
|
+
const sentinelFields = sentinel ? { sentinel } : {};
|
|
615
|
+
const extras = (annotations.length || sentinel)
|
|
616
|
+
? { ...(annotations.length ? { annotations } : {}), ...sentinelFields }
|
|
617
|
+
: undefined;
|
|
572
618
|
|
|
573
619
|
if (issues.length === 0) {
|
|
574
620
|
const reason = annotations.length
|
|
@@ -580,6 +626,42 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
|
|
|
580
626
|
// Pick highest-priority issue (transcript_errors > verify_unavailable).
|
|
581
627
|
issues.sort((a, b) => b.priority - a.priority);
|
|
582
628
|
const top = issues[0];
|
|
629
|
+
|
|
630
|
+
// Sentinel override: SCHEDULER_VERDICT: PASS + a commit that landed during
|
|
631
|
+
// the run is authoritative evidence the job succeeded. Suppresses incidental
|
|
632
|
+
// transcript noise (grep results with "Error", TDD red-phase reproductions,
|
|
633
|
+
// Traceback in debug output) for the two weakest verdict classes.
|
|
634
|
+
// MUST NOT apply to halt or deps_unmet — those keep their existing semantics.
|
|
635
|
+
if (
|
|
636
|
+
sentinel === 'pass'
|
|
637
|
+
&& committedDuringRun
|
|
638
|
+
&& (top.verdict === 'transcript_errors' || top.verdict === 'verify_unavailable')
|
|
639
|
+
) {
|
|
640
|
+
return conclude('clean',
|
|
641
|
+
`SCHEDULER_VERDICT: PASS + commit landed overrides ${top.verdict}`,
|
|
642
|
+
null,
|
|
643
|
+
{ ...(annotations.length ? { annotations } : {}), sentinel, sentinelOverride: top.verdict },
|
|
644
|
+
);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// Pre-sentinel heal: job predates SCHEDULER_VERDICT emission. A commit in
|
|
648
|
+
// the run window with no explicit FAIL sentinel is weak but sufficient to
|
|
649
|
+
// override the two weakest verdict classes during the self-heal pass.
|
|
650
|
+
// Only applies when the caller opts in (allowPreSentinelHeal=true) — live
|
|
651
|
+
// runs never set this, so only the boot reverify self-heal uses it.
|
|
652
|
+
if (
|
|
653
|
+
allowPreSentinelHeal
|
|
654
|
+
&& committedDuringRun
|
|
655
|
+
&& sentinel !== 'fail'
|
|
656
|
+
&& (top.verdict === 'transcript_errors' || top.verdict === 'verify_unavailable')
|
|
657
|
+
) {
|
|
658
|
+
return conclude('clean',
|
|
659
|
+
`pre-sentinel heal: committed in run window, no SCHEDULER_VERDICT: FAIL, overrides ${top.verdict}`,
|
|
660
|
+
null,
|
|
661
|
+
{ ...(annotations.length ? { annotations } : {}), preSentinelHeal: top.verdict },
|
|
662
|
+
);
|
|
663
|
+
}
|
|
664
|
+
|
|
583
665
|
return conclude(top.verdict, top.reason, 'needs_review', extras);
|
|
584
666
|
|
|
585
667
|
} catch (e) {
|
|
@@ -601,4 +683,5 @@ module.exports = {
|
|
|
601
683
|
parsePrdBodyDepFragments,
|
|
602
684
|
checkDeps,
|
|
603
685
|
parseLog,
|
|
686
|
+
scanSentinel,
|
|
604
687
|
};
|
package/src/main/scheduler.cjs
CHANGED
|
@@ -91,6 +91,13 @@ const RESULT_TAIL_BYTES = 8 * 1024;
|
|
|
91
91
|
const IDLE_OUTPUT_KILL_MS = 20 * 60_000;
|
|
92
92
|
const IDLE_CHECK_INTERVAL_MS = 60_000;
|
|
93
93
|
|
|
94
|
+
// Boot reconciliation: a job left 'running' by an app restart/crash whose log
|
|
95
|
+
// shows neither success nor a real failure result was merely interrupted — the
|
|
96
|
+
// host died, the PRD didn't. Re-queue it up to this many times before giving up
|
|
97
|
+
// and marking it failed, so a restart self-recovers instead of needing a manual
|
|
98
|
+
// flip + a wasted fix-plan investigation.
|
|
99
|
+
const ORPHAN_REQUEUE_CAP = 2;
|
|
100
|
+
|
|
94
101
|
// Appended to every scheduled job prompt so the queue can be RELIED ON to finish
|
|
95
102
|
// work to a consistent bar: review → security-review → verify → commit. Enforced
|
|
96
103
|
// centrally here (not per-PRD) so it applies to every current and future PRD.
|
|
@@ -116,6 +123,17 @@ sequence. Do not stop before the commit lands; committing is part of the job.
|
|
|
116
123
|
pass. Do not assume npm; use whatever the target project uses.
|
|
117
124
|
4. COMMIT — stage and commit ALL changes with a clear conventional message:
|
|
118
125
|
\`git add -A && git commit -m "<type>(<scope>): <summary>"\`.
|
|
126
|
+
5. VERDICT SENTINEL — as the LAST LINE of your final result text, emit exactly
|
|
127
|
+
one of these two lines (no trailing text after it):
|
|
128
|
+
SCHEDULER_VERDICT: PASS
|
|
129
|
+
SCHEDULER_VERDICT: FAIL <one-line reason>
|
|
130
|
+
Print PASS only when the AC gate is green AND the commit from step 4 landed.
|
|
131
|
+
Print FAIL (and exit 1) if the AC gate was red or the commit could not land.
|
|
132
|
+
NEVER print PASS on a red AC gate — a lying PASS turns the verifier from a
|
|
133
|
+
false-failure catcher into a silent-failure shipper. A truthful PASS + a
|
|
134
|
+
landed commit lets the verifier override incidental transcript noise (grep
|
|
135
|
+
results containing "Error", a TDD red-test run early in the session, debug
|
|
136
|
+
Tracebacks) so those do not false-trip a needs_review downgrade.
|
|
119
137
|
|
|
120
138
|
A job that exits with uncommitted changes is treated as INCOMPLETE and flagged
|
|
121
139
|
for review. Do NOT add work beyond the acceptance criteria — this protocol is the
|
|
@@ -169,6 +187,25 @@ function gitHead(cwd) {
|
|
|
169
187
|
});
|
|
170
188
|
}
|
|
171
189
|
|
|
190
|
+
// Returns true if ≥1 commit landed in cwd between startedAt and finishedAt
|
|
191
|
+
// (with 60s slack). Used by the self-heal pass to derive committedDuringRun
|
|
192
|
+
// from the recorded run window — the live commit-guard uses gitHead() instead.
|
|
193
|
+
// Never throws; git-unavailable → false (no override, job stays as-is).
|
|
194
|
+
function committedInWindow(cwd, startedAt, finishedAt) {
|
|
195
|
+
return new Promise((resolve) => {
|
|
196
|
+
if (!cwd || !startedAt) { resolve(false); return; }
|
|
197
|
+
const until = finishedAt
|
|
198
|
+
? new Date(Date.parse(finishedAt) + 60_000).toISOString()
|
|
199
|
+
: new Date().toISOString();
|
|
200
|
+
execFile(
|
|
201
|
+
'git',
|
|
202
|
+
['-C', cwd, 'log', '--format=%H', `--since=${startedAt}`, `--until=${until}`],
|
|
203
|
+
{ timeout: 10_000, windowsHide: true },
|
|
204
|
+
(err, stdout) => { resolve(!err && String(stdout || '').trim().length > 0); },
|
|
205
|
+
);
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
172
209
|
const ROOT = path.join(os.homedir(), '.claude', 'session-manager', 'scheduled-plans');
|
|
173
210
|
const PRDS_DIR = path.join(ROOT, 'prds');
|
|
174
211
|
const RUNS_DIR = path.join(ROOT, 'runs');
|
|
@@ -1224,6 +1261,12 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
|
|
|
1224
1261
|
// Called outside mutate() so the queue lock is not held during I/O.
|
|
1225
1262
|
let verifyResult = null;
|
|
1226
1263
|
if (res.exitCode === 0 && !res.rateLimited) {
|
|
1264
|
+
// Detect whether the job self-committed by comparing HEAD before/after.
|
|
1265
|
+
// Used by the sentinel override: SCHEDULER_VERDICT: PASS + a landed
|
|
1266
|
+
// commit together override incidental transcript noise verdicts.
|
|
1267
|
+
const headAtExit = await gitHead(guardCwd);
|
|
1268
|
+
const committedDuringRun = !!(guardHeadBefore && headAtExit && guardHeadBefore !== headAtExit);
|
|
1269
|
+
|
|
1227
1270
|
const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
|
|
1228
1271
|
const stateForDeps = await readQueue();
|
|
1229
1272
|
verifyResult = await verifyRun({
|
|
@@ -1231,6 +1274,7 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
|
|
|
1231
1274
|
prdPath,
|
|
1232
1275
|
queueEntry: job,
|
|
1233
1276
|
allJobs: stateForDeps.jobs,
|
|
1277
|
+
committedDuringRun,
|
|
1234
1278
|
}).catch((e) => ({
|
|
1235
1279
|
verdict: 'verify_unavailable',
|
|
1236
1280
|
reason: `verifier threw: ${e?.message ?? String(e)}`,
|
|
@@ -1688,14 +1732,30 @@ async function reverifyNeedsReview() {
|
|
|
1688
1732
|
const snap = await readQueue();
|
|
1689
1733
|
const candidates = snap.jobs.filter(isRescanCandidate);
|
|
1690
1734
|
const healed = [];
|
|
1735
|
+
const leftForReview = [];
|
|
1691
1736
|
for (const job of candidates) {
|
|
1692
1737
|
const runDir = path.join(RUNS_DIR, job.runId);
|
|
1693
1738
|
const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
|
|
1739
|
+
// Derive committedDuringRun from the recorded run window. The live
|
|
1740
|
+
// commit-guard uses gitHead() (before/after HEAD diff); here the run is
|
|
1741
|
+
// already over so we query git log filtered to [startedAt, finishedAt+60s].
|
|
1742
|
+
const committedDuringRun = await committedInWindow(job.cwd, job.startedAt, job.finishedAt);
|
|
1694
1743
|
let v = null;
|
|
1695
1744
|
try {
|
|
1696
|
-
v = await verifyRun({
|
|
1697
|
-
|
|
1698
|
-
|
|
1745
|
+
v = await verifyRun({
|
|
1746
|
+
runDir,
|
|
1747
|
+
prdPath,
|
|
1748
|
+
queueEntry: job,
|
|
1749
|
+
allJobs: snap.jobs,
|
|
1750
|
+
committedDuringRun,
|
|
1751
|
+
allowPreSentinelHeal: true,
|
|
1752
|
+
});
|
|
1753
|
+
} catch { leftForReview.push({ slug: job.slug, reason: 'verifyRun threw' }); continue; }
|
|
1754
|
+
if (v && v.verdict === 'clean') {
|
|
1755
|
+
healed.push(job.slug);
|
|
1756
|
+
} else {
|
|
1757
|
+
leftForReview.push({ slug: job.slug, reason: v ? `${v.verdict}: ${v.reason}` : 'null verdict' });
|
|
1758
|
+
}
|
|
1699
1759
|
}
|
|
1700
1760
|
if (healed.length) {
|
|
1701
1761
|
const healSet = new Set(healed);
|
|
@@ -1711,7 +1771,11 @@ async function reverifyNeedsReview() {
|
|
|
1711
1771
|
console.log(`[scheduler] boot reverify: healed ${healed.length} stale needs_review → completed (${healed.join(', ')})`);
|
|
1712
1772
|
await broadcast();
|
|
1713
1773
|
}
|
|
1714
|
-
|
|
1774
|
+
if (leftForReview.length) {
|
|
1775
|
+
const detail = leftForReview.map((e) => `${e.slug} (${e.reason})`).join(', ');
|
|
1776
|
+
console.log(`[scheduler] boot reverify: left for review: ${detail}`);
|
|
1777
|
+
}
|
|
1778
|
+
return { rescanned: candidates.length, healed, leftForReview };
|
|
1715
1779
|
}
|
|
1716
1780
|
|
|
1717
1781
|
function registerScheduleHandlers() {
|
|
@@ -1981,24 +2045,52 @@ async function init() {
|
|
|
1981
2045
|
}
|
|
1982
2046
|
await mutate((state) => {
|
|
1983
2047
|
for (const j of state.jobs) {
|
|
1984
|
-
if (j.status
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
}
|
|
2048
|
+
if (j.status !== 'running') continue;
|
|
2049
|
+
const pid = j.runtime?.pid;
|
|
2050
|
+
let killNote = '';
|
|
2051
|
+
if (pid) {
|
|
2052
|
+
const result = killOrphanClaudePid(pid);
|
|
2053
|
+
killNote = ` (orphan pid=${pid}: ${result})`;
|
|
2054
|
+
if (result === 'killed') {
|
|
2055
|
+
console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
|
|
1993
2056
|
}
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
j.
|
|
2057
|
+
}
|
|
2058
|
+
const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
|
|
2059
|
+
if (outcome === 'success') {
|
|
2060
|
+
// Job finished cleanly before the crash — keep the win.
|
|
2061
|
+
j.status = 'completed';
|
|
2062
|
+
j.exitCode = 0;
|
|
2063
|
+
j.error = null;
|
|
1999
2064
|
j.finishedAt = new Date().toISOString();
|
|
2000
2065
|
delete j.runtime;
|
|
2001
|
-
console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome
|
|
2066
|
+
console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=success → completed`);
|
|
2067
|
+
} else if (outcome === 'failed') {
|
|
2068
|
+
// The log carries a real failure result event — a genuine failure, keep it.
|
|
2069
|
+
j.status = 'failed';
|
|
2070
|
+
j.exitCode = j.exitCode ?? 1;
|
|
2071
|
+
j.error = `orphaned: app restarted while running${killNote}`;
|
|
2072
|
+
j.finishedAt = new Date().toISOString();
|
|
2073
|
+
delete j.runtime;
|
|
2074
|
+
console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=failed → failed`);
|
|
2075
|
+
} else {
|
|
2076
|
+
// no_result / unknown: the run was interrupted (host died / app restarted)
|
|
2077
|
+
// with NO evidence it failed on its own merits. Punishing the PRD here is
|
|
2078
|
+
// the wrong call — it demands a manual flip and burns an Opus fix-plan on a
|
|
2079
|
+
// job that never actually failed. Re-queue it (bounded) so an app restart
|
|
2080
|
+
// self-recovers. Mirrors the transient-kill auto-retry on the live path.
|
|
2081
|
+
const tries = j.orphanRetries ?? 0;
|
|
2082
|
+
if (tries < ORPHAN_REQUEUE_CAP) {
|
|
2083
|
+
resetJobFields(j, `orphaned: app restarted mid-run, re-queued (attempt ${tries + 1}/${ORPHAN_REQUEUE_CAP})${killNote}`);
|
|
2084
|
+
j.orphanRetries = tries + 1;
|
|
2085
|
+
console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → re-queued (${tries + 1}/${ORPHAN_REQUEUE_CAP})`);
|
|
2086
|
+
} else {
|
|
2087
|
+
j.status = 'failed';
|
|
2088
|
+
j.exitCode = j.exitCode ?? 1;
|
|
2089
|
+
j.error = `orphaned: app restarted while running, exhausted ${ORPHAN_REQUEUE_CAP} re-queue attempts${killNote}`;
|
|
2090
|
+
j.finishedAt = new Date().toISOString();
|
|
2091
|
+
delete j.runtime;
|
|
2092
|
+
console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → failed (orphan retries exhausted)`);
|
|
2093
|
+
}
|
|
2002
2094
|
}
|
|
2003
2095
|
}
|
|
2004
2096
|
});
|
|
@@ -2024,7 +2116,17 @@ async function init() {
|
|
|
2024
2116
|
// Refresh next-reset every 10 minutes — billing window can shift if usage
|
|
2025
2117
|
// resets early or the auth token rotates. Tracked so re-init doesn't leak.
|
|
2026
2118
|
if (rescheduleInterval) clearInterval(rescheduleInterval);
|
|
2027
|
-
rescheduleInterval = setInterval(() => {
|
|
2119
|
+
rescheduleInterval = setInterval(() => {
|
|
2120
|
+
rescheduleTimer().catch(() => {});
|
|
2121
|
+
// Periodic self-heal: re-run the verifier over stale needs_review jobs so a
|
|
2122
|
+
// job whose work actually landed (committed in-window, no FAIL sentinel)
|
|
2123
|
+
// auto-clears WITHOUT waiting for the next app restart. Cheap-guarded — the
|
|
2124
|
+
// log scan only runs when something is actually flagged.
|
|
2125
|
+
const s = readQueueSync();
|
|
2126
|
+
if (s.jobs.some((j) => j.status === 'needs_review')) {
|
|
2127
|
+
reverifyNeedsReview().catch(() => {});
|
|
2128
|
+
}
|
|
2129
|
+
}, 10 * 60_000);
|
|
2028
2130
|
|
|
2029
2131
|
// Self-rescheduling poll loop with exponential backoff. Replaces the
|
|
2030
2132
|
// old fixed-interval pollTimer + initialPollTimeout.
|