claude-code-session-manager 0.21.3 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.html CHANGED
@@ -7,10 +7,10 @@
7
7
  <link rel="preconnect" href="https://fonts.googleapis.com">
8
8
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
9
  <link href="https://fonts.googleapis.com/css2?family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,500;0,6..72,600;0,6..72,700;1,6..72,400&family=Geist:wght@300;400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap" rel="stylesheet">
10
- <script type="module" crossorigin src="./assets/index-DO3ROR11.js"></script>
10
+ <script type="module" crossorigin src="./assets/index-JOeKcfuw.js"></script>
11
11
  <link rel="modulepreload" crossorigin href="./assets/monaco-editor-BW5C4Iv1.js">
12
12
  <link rel="stylesheet" crossorigin href="./assets/monaco-editor-BTnBOi8r.css">
13
- <link rel="stylesheet" crossorigin href="./assets/index-DeQI4oVI.css">
13
+ <link rel="stylesheet" crossorigin href="./assets/index-BWn4BuSW.css">
14
14
  </head>
15
15
  <body class="bg-bg text-fg font-sans antialiased">
16
16
  <div id="root"></div>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-session-manager",
3
- "version": "0.21.3",
3
+ "version": "0.22.0",
4
4
  "description": "Local cockpit for the Claude Code CLI — multi-tab terminal, full config surface, scheduler, voice dictation, and live observability.",
5
5
  "type": "module",
6
6
  "main": "src/main/index.cjs",
@@ -389,8 +389,8 @@ test('FAIL recovered within 30 events → clean', async () => {
389
389
 
390
390
  // ─── fixtures: feedback 2026-06-10-01 — quoted-error false positives ─────────
391
391
 
392
- /** Build a one-Bash-call log: tool_use → tool_result(content) → success result. */
393
- function bashRunEvents(content, { toolName = 'Bash' } = {}) {
392
+ /** Build a one-Bash-call log: tool_use → tool_result(content) → result event. */
393
+ function bashRunEvents(content, { toolName = 'Bash', resultSubtype = 'success' } = {}) {
394
394
  return [
395
395
  {
396
396
  type: 'assistant',
@@ -416,7 +416,7 @@ function bashRunEvents(content, { toolName = 'Bash' } = {}) {
416
416
  }],
417
417
  },
418
418
  },
419
- { type: 'result', subtype: 'success', result: 'All acceptance criteria verified.' },
419
+ { type: 'result', subtype: resultSubtype, result: 'All acceptance criteria verified.' },
420
420
  ];
421
421
  }
422
422
 
@@ -435,7 +435,7 @@ test('feedback 01: reviewer prose mentioning ImportError mid-sentence → clean'
435
435
  } finally { rmdir(tmp); }
436
436
  });
437
437
 
438
- test('feedback 01: real line-anchored ModuleNotFoundError, no recovery → verify_unavailable', async () => {
438
+ test('feedback 01: real line-anchored ModuleNotFoundError in a FAILED run → verify_unavailable/needs_review', async () => {
439
439
  const tmp = makeTmpDir();
440
440
  try {
441
441
  const slug = '25-real-import-error';
@@ -446,11 +446,71 @@ test('feedback 01: real line-anchored ModuleNotFoundError, no recovery → verif
446
446
  ' from playwright.sync_api import sync_playwright',
447
447
  "ModuleNotFoundError: No module named 'playwright'",
448
448
  ].join('\n');
449
- writeLog(tmp, slug, bashRunEvents(out));
449
+ // Run did NOT succeed: the missing dependency was never resolved, so the
450
+ // "couldn't verify" signal must still escalate to a human.
451
+ writeLog(tmp, slug, bashRunEvents(out, { resultSubtype: 'error_during_execution' }));
450
452
  const prdPath = writePrd(tmp, slug, '# Real failure');
451
453
  const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
452
- // Traceback detector outranks (priority 2 > 1) but either way it must NOT be clean.
453
- assert.notEqual(verdict.verdict, 'clean', 'real interpreter error must still flag');
454
+ assert.equal(verdict.verdict, 'verify_unavailable', `unresolved missing-dep must flag, got ${verdict.verdict}: ${verdict.reason}`);
455
+ assert.equal(verdict.downgradeTo, 'needs_review');
456
+ } finally { rmdir(tmp); }
457
+ });
458
+
459
+ // ─── feedback 2026-06-10 addendum — recovered env-probe false positives ──────
460
+ //
461
+ // Setup probes (interpreter/venv search) that surface ModuleNotFoundError but
462
+ // the run still reaches result:success are the missing-dependency class, not a
463
+ // real failure. They must NOT downgrade — only annotate. But a Traceback ending
464
+ // in a real logic exception (KeyError/AssertionError) still hard-flags, even on
465
+ // "success", preserving the 2026-05-23 false-PASS guard.
466
+
467
+ test('addendum: Traceback→ModuleNotFoundError in a SUCCEEDED run → clean (annotated, not downgraded)', async () => {
468
+ const tmp = makeTmpDir();
469
+ try {
470
+ const slug = '26-self-billbot-shared-lib';
471
+ const probe = [
472
+ 'Exit code 1',
473
+ 'Traceback (most recent call last):',
474
+ ' File "/home/bilko/Self/.claude/skills/snopud-bill/download_bill.py", line 42, in <module>',
475
+ ' from playwright.sync_api import TimeoutError as PWTimeout',
476
+ "ModuleNotFoundError: No module named 'playwright'",
477
+ ].join('\n');
478
+ writeLog(tmp, slug, bashRunEvents(probe)); // resultSubtype defaults to success
479
+ const prdPath = writePrd(tmp, slug, '# Shared lib');
480
+ const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
481
+ assert.equal(verdict.verdict, 'clean', `recovered env probe must not downgrade, got ${verdict.verdict}: ${verdict.reason}`);
482
+ assert.equal(verdict.downgradeTo, null);
483
+ assert.ok(Array.isArray(verdict.annotations) && verdict.annotations.length === 1, 'should record one annotation');
484
+ assert.equal(verdict.annotations[0].verdict, 'verify_unavailable');
485
+ } finally { rmdir(tmp); }
486
+ });
487
+
488
+ test('addendum: bare Import/ModuleNotFound probe (no traceback) in SUCCEEDED run → clean/annotated', async () => {
489
+ const tmp = makeTmpDir();
490
+ try {
491
+ const slug = '26-self-parser-tests';
492
+ writeLog(tmp, slug, bashRunEvents("ModuleNotFoundError: No module named 'conftest'"));
493
+ const prdPath = writePrd(tmp, slug, '# Parser tests');
494
+ const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
495
+ assert.equal(verdict.verdict, 'clean', `got ${verdict.verdict}: ${verdict.reason}`);
496
+ assert.ok(Array.isArray(verdict.annotations) && verdict.annotations.length === 1);
497
+ } finally { rmdir(tmp); }
498
+ });
499
+
500
+ test('addendum: Traceback→KeyError (real logic failure) on "success" → still transcript_errors/needs_review', async () => {
501
+ const tmp = makeTmpDir();
502
+ try {
503
+ const slug = '26-real-logic-failure';
504
+ const out = [
505
+ '=== contract.json panels.sentiment ===',
506
+ 'Traceback (most recent call last):',
507
+ ' File "<string>", line 1, in <module>',
508
+ "KeyError: 'panels.sentiment'",
509
+ ].join('\n');
510
+ writeLog(tmp, slug, bashRunEvents(out)); // success result — must NOT rescue a real failure
511
+ const prdPath = writePrd(tmp, slug, '# Logic failure');
512
+ const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
513
+ assert.equal(verdict.verdict, 'transcript_errors', `real logic Traceback must still flag, got ${verdict.verdict}: ${verdict.reason}`);
454
514
  assert.equal(verdict.downgradeTo, 'needs_review');
455
515
  } finally { rmdir(tmp); }
456
516
  });
@@ -487,3 +547,40 @@ test('feedback 01: quoted "Traceback..." line (leading quote) → clean', async
487
547
  assert.equal(verdict.verdict, 'clean', `quoted traceback prose must not flag, got ${verdict.verdict}: ${verdict.reason}`);
488
548
  } finally { rmdir(tmp); }
489
549
  });
550
+
551
+ // ─── harness tool errors (feedback follow-up 2026-06-10) ─────────────────────
552
+
553
+ test('harness tool error (<tool_use_error>) in final 20% → clean', async () => {
554
+ const tmp = makeTmpDir();
555
+ try {
556
+ const slug = '58-harness-tool-error';
557
+ // Pad with benign events so the error lands in the final 20%, then a
558
+ // successful result — mirrors the real 58-web-remote-correctness-batch run.
559
+ const events = [];
560
+ for (let k = 0; k < 8; k++) {
561
+ events.push({ type: 'assistant', message: { role: 'assistant', content: [
562
+ { type: 'tool_use', id: `t${k}`, name: 'Read', input: { description: `read ${k}` } }] } });
563
+ events.push({ type: 'user', message: { role: 'user', content: [
564
+ { type: 'tool_result', tool_use_id: `t${k}`, content: 'ok', is_error: false }] } });
565
+ }
566
+ events.push({ type: 'assistant', message: { role: 'assistant', content: [
567
+ { type: 'tool_use', id: 'tbad', name: 'bash', input: { description: 'run tests' } }] } });
568
+ events.push({ type: 'user', message: { role: 'user', content: [
569
+ { type: 'tool_result', tool_use_id: 'tbad',
570
+ content: '<tool_use_error>Error: No such tool available: bash</tool_use_error>', is_error: true }] } });
571
+ events.push({ type: 'result', subtype: 'success', result: 'All acceptance criteria verified.' });
572
+
573
+ writeLog(tmp, slug, events);
574
+ const prdPath = writePrd(tmp, slug, '# Correctness batch');
575
+ const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
576
+ assert.equal(verdict.verdict, 'clean', `harness tool error must not flag, got ${verdict.verdict}: ${verdict.reason}`);
577
+ } finally { rmdir(tmp); }
578
+ });
579
+
580
+ test('isHarnessToolError detects wrapper and "No such tool available"', () => {
581
+ const { isHarnessToolError } = require('../runVerify.cjs');
582
+ assert.equal(isHarnessToolError('<tool_use_error>Error: No such tool available: bash</tool_use_error>'), true);
583
+ assert.equal(isHarnessToolError('No such tool available: Foo'), true);
584
+ assert.equal(isHarnessToolError('ModuleNotFoundError: No module named x'), false);
585
+ assert.equal(isHarnessToolError(''), false);
586
+ });
@@ -50,6 +50,19 @@ const VERDICTS_SCHEMA_VERSION = 1;
50
50
  * 2. Traceback + Error within 10 lines (Python exception)
51
51
  * 3. ModuleNotFoundError / ImportError (missing venv / broken deps)
52
52
  */
53
+ /**
54
+ * True when a tool_result content is a Claude Code harness tool error rather
55
+ * than task output — emitted when the model calls a tool that doesn't exist or
56
+ * isn't allowed (e.g. `<tool_use_error>Error: No such tool available: bash`).
57
+ * The harness rejects the call; the model recovers by retrying with a valid
58
+ * tool. Never a task failure, so the verifier must not downgrade on it.
59
+ */
60
+ function isHarnessToolError(content) {
61
+ if (typeof content !== 'string' || !content) return false;
62
+ return content.includes('<tool_use_error>')
63
+ || /\bNo such tool available\b/.test(content);
64
+ }
65
+
53
66
  function detectPattern(content) {
54
67
  if (typeof content !== 'string' || !content) return null;
55
68
 
@@ -61,11 +74,25 @@ function detectPattern(content) {
61
74
  // (2) Python Traceback + exception line within next 10 lines. Both anchored
62
75
  // to line starts: reviewer prose quoting "will crash with ImportError" or
63
76
  // embedding "...Error:" mid-sentence must not match (feedback 2026-06-10-01).
77
+ //
78
+ // The TERMINATING exception decides the class: a Traceback ending in
79
+ // ModuleNotFoundError/ImportError is the missing-dependency class ("the
80
+ // verification couldn't run", same as detector 3), NOT a logic failure — so
81
+ // it routes through the weaker verify_unavailable path (env-recovery escape
82
+ // hatch + success demotion). A Traceback ending in any other exception
83
+ // (KeyError, AssertionError, …) stays transcript_errors — that is the real
84
+ // false-PASS class the verifier exists to catch (2026-05-23 incident).
85
+ // (feedback 2026-06-10 addendum: interpreter-search setup probes that ended
86
+ // in ModuleNotFoundError were 3/3 false positives.)
64
87
  const lines = content.split('\n');
65
88
  for (let i = 0; i < lines.length; i++) {
66
89
  if (/^\s*Traceback \(most recent call last\):/.test(lines[i])) {
67
90
  for (let j = i + 1; j < Math.min(i + 11, lines.length); j++) {
68
- if (/^\s*[A-Za-z_][\w.]*(?:Error|Exception)\s*:/.test(lines[j])) {
91
+ const m = lines[j].match(/^\s*([A-Za-z_][\w.]*(?:Error|Exception))\s*:/);
92
+ if (m) {
93
+ if (m[1] === 'ModuleNotFoundError' || m[1] === 'ImportError') {
94
+ return { verdict: 'verify_unavailable', pattern: `Traceback → ${m[1]}` };
95
+ }
69
96
  return { verdict: 'transcript_errors', pattern: 'Traceback + Error within 10 lines' };
70
97
  }
71
98
  }
@@ -417,7 +444,7 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
417
444
  ...(extras ?? {}),
418
445
  };
419
446
  try { fs.writeFileSync(verdictsPath, JSON.stringify(record, null, 2)); } catch { /* best-effort */ }
420
- return { verdict, reason, downgradeTo };
447
+ return { verdict, reason, downgradeTo, ...(extras ?? {}) };
421
448
  }
422
449
 
423
450
  try {
@@ -467,11 +494,24 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
467
494
  const total = events.length;
468
495
  const last20pctStart = Math.floor(total * 0.8);
469
496
  const issues = [];
497
+ // Non-blocking notes: signals worth recording but not strong enough to
498
+ // downgrade (e.g. a missing-dependency probe in a run that still succeeded).
499
+ const annotations = [];
500
+ const runSucceeded = !!resultEvent && resultEvent.subtype === 'success';
470
501
 
471
502
  for (let i = 0; i < events.length; i++) {
472
503
  const ev = events[i];
473
504
  if (ev.kind !== 'tool_result') continue;
474
505
 
506
+ // Harness tool errors (`<tool_use_error>…`) are emitted when the model
507
+ // requests a tool that isn't available — e.g. a wrong-case name like
508
+ // "bash" instead of "Bash", or a tool outside the allowlist. The harness
509
+ // rejects the call and the model retries with a valid tool; the task is
510
+ // unaffected. These are never task failures, so they are exempt from both
511
+ // the is_error scan and the content pattern scan (false-positive class
512
+ // seen in 58-web-remote-correctness-batch, 2026-06-10).
513
+ if (isHarnessToolError(ev.content)) continue;
514
+
475
515
  // is_error:true in the final 20% of the transcript.
476
516
  if (ev.isError && i >= last20pctStart) {
477
517
  const desc = toolUseDesc(events, ev.toolUseId);
@@ -502,11 +542,19 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
502
542
  // ModuleNotFoundError/ImportError: first check for pip/uv install in
503
543
  // the next ≤5 tool_use calls (the agent may have self-healed).
504
544
  if (!hasInstallRecovery(events, ev.seq) && !isSelfRecovered(events, ev.seq, desc)) {
505
- issues.push({
506
- verdict: 'verify_unavailable',
507
- reason: `${hit.pattern} at event ${i}, no install recovery found`,
508
- priority: 1,
509
- });
545
+ const note = `${hit.pattern} at event ${i}, no install recovery found`;
546
+ if (runSucceeded) {
547
+ // "Verification couldn't run" is the weakest signal. When the run
548
+ // still reached a genuine result:success, the agent resolved its
549
+ // environment (often an interpreter/venv search the recovery
550
+ // heuristics above don't model) and finished — record it as an
551
+ // annotation, do NOT downgrade. transcript_errors (real logic/test
552
+ // failures) are never demoted this way, so the false-PASS guard is
553
+ // intact. (feedback 2026-06-10 addendum.)
554
+ annotations.push({ verdict: 'verify_unavailable', reason: note });
555
+ } else {
556
+ issues.push({ verdict: 'verify_unavailable', reason: note, priority: 1 });
557
+ }
510
558
  }
511
559
  } else {
512
560
  // transcript_errors (FAIL/FATAL/Traceback): self-recovery escape hatch.
@@ -520,14 +568,19 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
520
568
  }
521
569
  }
522
570
 
571
+ const extras = annotations.length ? { annotations } : undefined;
572
+
523
573
  if (issues.length === 0) {
524
- return conclude('clean', 'no issues detected', null);
574
+ const reason = annotations.length
575
+ ? `no blocking issues (${annotations.length} annotation(s): ${annotations.map((a) => a.reason).join('; ')})`
576
+ : 'no issues detected';
577
+ return conclude('clean', reason, null, extras);
525
578
  }
526
579
 
527
580
  // Pick highest-priority issue (transcript_errors > verify_unavailable).
528
581
  issues.sort((a, b) => b.priority - a.priority);
529
582
  const top = issues[0];
530
- return conclude(top.verdict, top.reason, 'needs_review');
583
+ return conclude(top.verdict, top.reason, 'needs_review', extras);
531
584
 
532
585
  } catch (e) {
533
586
  return conclude(
@@ -542,6 +595,7 @@ module.exports = {
542
595
  verifyRun,
543
596
  // Exposed for unit tests.
544
597
  detectPattern,
598
+ isHarnessToolError,
545
599
  toolUseName,
546
600
  extractSoakFromBody,
547
601
  parsePrdBodyDepFragments,
@@ -1257,7 +1257,16 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1257
1257
  // (e.g. an interactive session editing the same repo), not the job's
1258
1258
  // unsaved work — so skip rather than false-flag a completed job.
1259
1259
  // Non-git cwds resolve to null and are skipped (the guard is best-effort).
1260
- if (res.exitCode === 0 && !res.rateLimited && (!verifyResult || verifyResult.verdict === 'clean')) {
1260
+ //
1261
+ // Runs even when a transcript-pattern verdict already fired: the commit-guard
1262
+ // is a MATERIALLY-CHECKABLE signal (real git state) and outranks pattern hits.
1263
+ // Skipped only when the job is about to re-fire (HALT / deps_unmet → pending),
1264
+ // where working-tree state is irrelevant. When both fire, the uncommitted
1265
+ // verdict owns the needs_review reason and the pattern hit is demoted to an
1266
+ // annotation, so a real "finish protocol incomplete" is distinguishable from
1267
+ // transcript noise in the queue (feedback 2026-06-10 addendum).
1268
+ const guardWillRefire = verifyResult && verifyResult.downgradeTo === 'pending';
1269
+ if (res.exitCode === 0 && !res.rateLimited && !guardWillRefire) {
1261
1270
  const after = await uncommittedChanges(guardCwd);
1262
1271
  if (after && after.length > 0) {
1263
1272
  const baseSet = new Set(guardBaseline || []);
@@ -1270,10 +1279,16 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1270
1279
  const jobSelfCommitted = guardHeadBefore && guardHeadAfter && guardHeadAfter !== guardHeadBefore;
1271
1280
  if (newlyDirty.length > 0 && !siblingRunning && !jobSelfCommitted) {
1272
1281
  const sample = newlyDirty.slice(0, 3).join(', ');
1282
+ // Carry any prior transcript verdict + its annotations forward as notes.
1283
+ const carried = [...(verifyResult?.annotations ?? [])];
1284
+ if (verifyResult && verifyResult.verdict !== 'clean') {
1285
+ carried.push({ verdict: verifyResult.verdict, reason: verifyResult.reason });
1286
+ }
1273
1287
  verifyResult = {
1274
1288
  verdict: 'uncommitted_changes',
1275
1289
  reason: `finish protocol incomplete: ${newlyDirty.length} uncommitted file(s) left in working tree (e.g. ${sample})`,
1276
1290
  downgradeTo: 'needs_review',
1291
+ annotations: carried.length ? carried : undefined,
1277
1292
  };
1278
1293
  console.log(`[scheduler] commit-guard: ${job.slug} left ${newlyDirty.length} files uncommitted → needs_review`);
1279
1294
  }
@@ -1316,6 +1331,16 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1316
1331
  } else {
1317
1332
  delete s.jobs[i2].verifierVerdict;
1318
1333
  }
1334
+ // Non-blocking notes (e.g. a recovered missing-dependency probe, or a
1335
+ // pattern hit demoted because a materially-checkable verdict outranked
1336
+ // it) — surfaced even on completed jobs so the signal isn't lost.
1337
+ if (verifyResult?.annotations && verifyResult.annotations.length) {
1338
+ s.jobs[i2].verifierAnnotations = verifyResult.annotations.map(
1339
+ (a) => `${a.verdict}: ${a.reason}`,
1340
+ );
1341
+ } else {
1342
+ delete s.jobs[i2].verifierAnnotations;
1343
+ }
1319
1344
  delete s.jobs[i2].runtime;
1320
1345
 
1321
1346
  if (effectiveStatus === 'failed') {
@@ -1629,6 +1654,66 @@ function selectHistoryJobs(jobs, limit) {
1629
1654
  .slice(0, cap);
1630
1655
  }
1631
1656
 
1657
+ // Transcript-scan verdicts that re-running verifyRun can re-evaluate. NOT
1658
+ // 'uncommitted_changes' — that comes from the git commit-guard, which verifyRun
1659
+ // does not inspect, so re-scanning it would always return 'clean' and wrongly
1660
+ // heal a genuinely-unfinished job.
1661
+ const RESCANNABLE_VERDICTS = new Set(['transcript_errors', 'verify_unavailable']);
1662
+
1663
+ /**
1664
+ * Pure predicate: is this job eligible for the boot re-verify self-heal? Only
1665
+ * needs_review jobs with a run log AND a transcript-scan verdict. Crucially
1666
+ * EXCLUDES 'uncommitted_changes' (git commit-guard) — verifyRun can't see git,
1667
+ * so re-scanning it would falsely heal an unfinished job. Exported for tests.
1668
+ */
1669
+ function isRescanCandidate(job) {
1670
+ return !!job
1671
+ && job.status === 'needs_review'
1672
+ && !!job.runId
1673
+ && RESCANNABLE_VERDICTS.has(job.verifierVerdict);
1674
+ }
1675
+
1676
+ /**
1677
+ * Self-healing pass over needs_review jobs. The verifier runs in-process, so a
1678
+ * fix to runVerify.cjs only takes effect for jobs verified AFTER an app
1679
+ * restart — jobs flagged by the old (buggy) verifier stay stuck in needs_review
1680
+ * forever. On boot we re-run the CURRENT verifier over every transcript-scan
1681
+ * needs_review job and auto-complete the ones that now pass clean, so verifier
1682
+ * improvements retroactively clear their own false positives (2026-06-10:
1683
+ * anchored ImportError detectors + harness-tool-error exemption healed 8 jobs).
1684
+ *
1685
+ * @returns {Promise<{rescanned:number, healed:string[]}>}
1686
+ */
1687
+ async function reverifyNeedsReview() {
1688
+ const snap = await readQueue();
1689
+ const candidates = snap.jobs.filter(isRescanCandidate);
1690
+ const healed = [];
1691
+ for (const job of candidates) {
1692
+ const runDir = path.join(RUNS_DIR, job.runId);
1693
+ const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
1694
+ let v = null;
1695
+ try {
1696
+ v = await verifyRun({ runDir, prdPath, queueEntry: job, allJobs: snap.jobs });
1697
+ } catch { continue; } // unreadable log etc. — leave for human review
1698
+ if (v && v.verdict === 'clean') healed.push(job.slug);
1699
+ }
1700
+ if (healed.length) {
1701
+ const healSet = new Set(healed);
1702
+ await mutate((s) => {
1703
+ for (const j of s.jobs) {
1704
+ if (j.status === 'needs_review' && healSet.has(j.slug)) {
1705
+ j.status = 'completed';
1706
+ j.error = null;
1707
+ delete j.verifierVerdict;
1708
+ }
1709
+ }
1710
+ });
1711
+ console.log(`[scheduler] boot reverify: healed ${healed.length} stale needs_review → completed (${healed.join(', ')})`);
1712
+ await broadcast();
1713
+ }
1714
+ return { rescanned: candidates.length, healed };
1715
+ }
1716
+
1632
1717
  function registerScheduleHandlers() {
1633
1718
  ensureDirs();
1634
1719
  supervisor.registerHandlers();
@@ -1666,6 +1751,13 @@ function registerScheduleHandlers() {
1666
1751
  };
1667
1752
  });
1668
1753
 
1754
+ ipcMain.handle('schedule:reverify-needs-review', async () => {
1755
+ // Manual trigger for the boot self-heal pass — re-scan needs_review jobs
1756
+ // with the current verifier and auto-complete the ones that now pass clean.
1757
+ const result = await reverifyNeedsReview();
1758
+ return { ok: true, ...result };
1759
+ });
1760
+
1669
1761
  ipcMain.handle('schedule:force-tick', async () => {
1670
1762
  // Bypass the billing-poll gate entirely — fire pending jobs immediately regardless of meter state.
1671
1763
  // Clears any existing pause first (same semantics as run-now).
@@ -1921,6 +2013,13 @@ async function init() {
1921
2013
  await setPaused(boot.paused.reason, boot.paused.resumeAt);
1922
2014
  }
1923
2015
 
2016
+ // Self-heal stale needs_review flags using the current verifier (see
2017
+ // reverifyNeedsReview). Runs once on boot so a shipped verifier fix clears
2018
+ // its own historical false positives without manual retagging.
2019
+ await reverifyNeedsReview().catch((e) => {
2020
+ console.error(`[scheduler] boot reverify failed: ${e?.message ?? e}`);
2021
+ });
2022
+
1924
2023
  await rescheduleTimer();
1925
2024
  // Refresh next-reset every 10 minutes — billing window can shift if usage
1926
2025
  // resets early or the auth token rotates. Tracked so re-init doesn't leak.
@@ -2089,4 +2188,4 @@ const remote = {
2089
2188
  },
2090
2189
  };
2091
2190
 
2092
- module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs, pollRecoveryClearSource, memoryLimitedBatchSize };
2191
+ module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs, pollRecoveryClearSource, memoryLimitedBatchSize, reverifyNeedsReview, isRescanCandidate };
@@ -589,9 +589,13 @@ async function tailLines(filePath, fromOffset) {
589
589
  const len = stat.size - start;
590
590
  const buf = Buffer.alloc(len);
591
591
  await fd.read(buf, 0, len, start);
592
- const parts = buf.toString('utf8').split('\n').filter(Boolean);
592
+ // Shift before filter: the fragment may be an empty string (when the
593
+ // buffer starts with '\n', completing the previous partial line). If we
594
+ // filter(Boolean) first, the empty fragment disappears and shift() would
595
+ // remove the first valid line instead.
596
+ const parts = buf.toString('utf8').split('\n');
593
597
  if (dropFirst && parts.length) parts.shift();
594
- return { lines: parts, size: stat.size, inode: stat.ino };
598
+ return { lines: parts.filter(Boolean), size: stat.size, inode: stat.ino };
595
599
  } finally {
596
600
  await fd.close();
597
601
  }