npm - claude-code-session-manager - Versions diffs - 0.21.3 → 0.22.0 - Mend

claude-code-session-manager 0.21.3 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/assets/{TiptapBody-PdmsfUCQ.js → TiptapBody-DEFQ0d2H.js} +1 -1
package/dist/assets/index-BWn4BuSW.css +32 -0
package/dist/assets/{index-DO3ROR11.js → index-JOeKcfuw.js} +455 -456
package/dist/index.html +2 -2
package/package.json +1 -1
package/src/main/__tests__/runVerify.test.cjs +104 -7
package/src/main/runVerify.cjs +63 -9
package/src/main/scheduler.cjs +101 -2
package/src/main/webRemote.cjs +6 -2
package/dist/assets/index-DeQI4oVI.css +0 -32

package/dist/index.html CHANGED Viewed

@@ -7,10 +7,10 @@
     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,500;0,6..72,600;0,6..72,700;1,6..72,400&family=Geist:wght@300;400;500;600;700&family=IBM+Plex+Mono:wght@400;500;600&display=swap" rel="stylesheet">
-    <script type="module" crossorigin src="./assets/index-DO3ROR11.js"></script>
+    <script type="module" crossorigin src="./assets/index-JOeKcfuw.js"></script>
     <link rel="modulepreload" crossorigin href="./assets/monaco-editor-BW5C4Iv1.js">
     <link rel="stylesheet" crossorigin href="./assets/monaco-editor-BTnBOi8r.css">
-    <link rel="stylesheet" crossorigin href="./assets/index-DeQI4oVI.css">
+    <link rel="stylesheet" crossorigin href="./assets/index-BWn4BuSW.css">
   </head>
   <body class="bg-bg text-fg font-sans antialiased">
     <div id="root"></div>

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-session-manager",
-  "version": "0.21.3",
+  "version": "0.22.0",
   "description": "Local cockpit for the Claude Code CLI — multi-tab terminal, full config surface, scheduler, voice dictation, and live observability.",
   "type": "module",
   "main": "src/main/index.cjs",

package/src/main/__tests__/runVerify.test.cjs CHANGED Viewed

@@ -389,8 +389,8 @@ test('FAIL recovered within 30 events → clean', async () => {
 // ─── fixtures: feedback 2026-06-10-01 — quoted-error false positives ─────────
-/** Build a one-Bash-call log: tool_use → tool_result(content) → success result. */
-function bashRunEvents(content, { toolName = 'Bash' } = {}) {
+/** Build a one-Bash-call log: tool_use → tool_result(content) → result event. */
+function bashRunEvents(content, { toolName = 'Bash', resultSubtype = 'success' } = {}) {
   return [
     {
       type: 'assistant',
@@ -416,7 +416,7 @@ function bashRunEvents(content, { toolName = 'Bash' } = {}) {
         }],
       },
     },
-    { type: 'result', subtype: 'success', result: 'All acceptance criteria verified.' },
+    { type: 'result', subtype: resultSubtype, result: 'All acceptance criteria verified.' },
   ];
 }
@@ -435,7 +435,7 @@ test('feedback 01: reviewer prose mentioning ImportError mid-sentence → clean'
   } finally { rmdir(tmp); }
 });
-test('feedback 01: real line-anchored ModuleNotFoundError, no recovery → verify_unavailable', async () => {
+test('feedback 01: real line-anchored ModuleNotFoundError in a FAILED run → verify_unavailable/needs_review', async () => {
   const tmp = makeTmpDir();
   try {
     const slug = '25-real-import-error';
@@ -446,11 +446,71 @@ test('feedback 01: real line-anchored ModuleNotFoundError, no recovery → verif
       '    from playwright.sync_api import sync_playwright',
       "ModuleNotFoundError: No module named 'playwright'",
     ].join('\n');
-    writeLog(tmp, slug, bashRunEvents(out));
+    // Run did NOT succeed: the missing dependency was never resolved, so the
+    // "couldn't verify" signal must still escalate to a human.
+    writeLog(tmp, slug, bashRunEvents(out, { resultSubtype: 'error_during_execution' }));
     const prdPath = writePrd(tmp, slug, '# Real failure');
     const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
-    // Traceback detector outranks (priority 2 > 1) but either way it must NOT be clean.
-    assert.notEqual(verdict.verdict, 'clean', 'real interpreter error must still flag');
+    assert.equal(verdict.verdict, 'verify_unavailable', `unresolved missing-dep must flag, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, 'needs_review');
+  } finally { rmdir(tmp); }
+});
+// ─── feedback 2026-06-10 addendum — recovered env-probe false positives ──────
+//
+// Setup probes (interpreter/venv search) that surface ModuleNotFoundError but
+// the run still reaches result:success are the missing-dependency class, not a
+// real failure. They must NOT downgrade — only annotate. But a Traceback ending
+// in a real logic exception (KeyError/AssertionError) still hard-flags, even on
+// "success", preserving the 2026-05-23 false-PASS guard.
+test('addendum: Traceback→ModuleNotFoundError in a SUCCEEDED run → clean (annotated, not downgraded)', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '26-self-billbot-shared-lib';
+    const probe = [
+      'Exit code 1',
+      'Traceback (most recent call last):',
+      '  File "/home/bilko/Self/.claude/skills/snopud-bill/download_bill.py", line 42, in <module>',
+      '    from playwright.sync_api import TimeoutError as PWTimeout',
+      "ModuleNotFoundError: No module named 'playwright'",
+    ].join('\n');
+    writeLog(tmp, slug, bashRunEvents(probe)); // resultSubtype defaults to success
+    const prdPath = writePrd(tmp, slug, '# Shared lib');
+    const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
+    assert.equal(verdict.verdict, 'clean', `recovered env probe must not downgrade, got ${verdict.verdict}: ${verdict.reason}`);
+    assert.equal(verdict.downgradeTo, null);
+    assert.ok(Array.isArray(verdict.annotations) && verdict.annotations.length === 1, 'should record one annotation');
+    assert.equal(verdict.annotations[0].verdict, 'verify_unavailable');
+  } finally { rmdir(tmp); }
+});
+test('addendum: bare Import/ModuleNotFound probe (no traceback) in SUCCEEDED run → clean/annotated', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '26-self-parser-tests';
+    writeLog(tmp, slug, bashRunEvents("ModuleNotFoundError: No module named 'conftest'"));
+    const prdPath = writePrd(tmp, slug, '# Parser tests');
+    const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
+    assert.equal(verdict.verdict, 'clean', `got ${verdict.verdict}: ${verdict.reason}`);
+    assert.ok(Array.isArray(verdict.annotations) && verdict.annotations.length === 1);
+  } finally { rmdir(tmp); }
+});
+test('addendum: Traceback→KeyError (real logic failure) on "success" → still transcript_errors/needs_review', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '26-real-logic-failure';
+    const out = [
+      '=== contract.json panels.sentiment ===',
+      'Traceback (most recent call last):',
+      '  File "<string>", line 1, in <module>',
+      "KeyError: 'panels.sentiment'",
+    ].join('\n');
+    writeLog(tmp, slug, bashRunEvents(out)); // success result — must NOT rescue a real failure
+    const prdPath = writePrd(tmp, slug, '# Logic failure');
+    const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
+    assert.equal(verdict.verdict, 'transcript_errors', `real logic Traceback must still flag, got ${verdict.verdict}: ${verdict.reason}`);
     assert.equal(verdict.downgradeTo, 'needs_review');
   } finally { rmdir(tmp); }
 });
@@ -487,3 +547,40 @@ test('feedback 01: quoted "Traceback..." line (leading quote) → clean', async
     assert.equal(verdict.verdict, 'clean', `quoted traceback prose must not flag, got ${verdict.verdict}: ${verdict.reason}`);
   } finally { rmdir(tmp); }
 });
+// ─── harness tool errors (feedback follow-up 2026-06-10) ─────────────────────
+test('harness tool error (<tool_use_error>) in final 20% → clean', async () => {
+  const tmp = makeTmpDir();
+  try {
+    const slug = '58-harness-tool-error';
+    // Pad with benign events so the error lands in the final 20%, then a
+    // successful result — mirrors the real 58-web-remote-correctness-batch run.
+    const events = [];
+    for (let k = 0; k < 8; k++) {
+      events.push({ type: 'assistant', message: { role: 'assistant', content: [
+        { type: 'tool_use', id: `t${k}`, name: 'Read', input: { description: `read ${k}` } }] } });
+      events.push({ type: 'user', message: { role: 'user', content: [
+        { type: 'tool_result', tool_use_id: `t${k}`, content: 'ok', is_error: false }] } });
+    }
+    events.push({ type: 'assistant', message: { role: 'assistant', content: [
+      { type: 'tool_use', id: 'tbad', name: 'bash', input: { description: 'run tests' } }] } });
+    events.push({ type: 'user', message: { role: 'user', content: [
+      { type: 'tool_result', tool_use_id: 'tbad',
+        content: '<tool_use_error>Error: No such tool available: bash</tool_use_error>', is_error: true }] } });
+    events.push({ type: 'result', subtype: 'success', result: 'All acceptance criteria verified.' });
+    writeLog(tmp, slug, events);
+    const prdPath = writePrd(tmp, slug, '# Correctness batch');
+    const verdict = await verifyRun({ runDir: tmp, prdPath, queueEntry: { slug, status: 'running' }, allJobs: [] });
+    assert.equal(verdict.verdict, 'clean', `harness tool error must not flag, got ${verdict.verdict}: ${verdict.reason}`);
+  } finally { rmdir(tmp); }
+});
+test('isHarnessToolError detects wrapper and "No such tool available"', () => {
+  const { isHarnessToolError } = require('../runVerify.cjs');
+  assert.equal(isHarnessToolError('<tool_use_error>Error: No such tool available: bash</tool_use_error>'), true);
+  assert.equal(isHarnessToolError('No such tool available: Foo'), true);
+  assert.equal(isHarnessToolError('ModuleNotFoundError: No module named x'), false);
+  assert.equal(isHarnessToolError(''), false);
+});

package/src/main/runVerify.cjs CHANGED Viewed

@@ -50,6 +50,19 @@ const VERDICTS_SCHEMA_VERSION = 1;
  *   2. Traceback + Error within 10 lines (Python exception)
  *   3. ModuleNotFoundError / ImportError (missing venv / broken deps)
  */
+/**
+ * True when a tool_result content is a Claude Code harness tool error rather
+ * than task output — emitted when the model calls a tool that doesn't exist or
+ * isn't allowed (e.g. `<tool_use_error>Error: No such tool available: bash`).
+ * The harness rejects the call; the model recovers by retrying with a valid
+ * tool. Never a task failure, so the verifier must not downgrade on it.
+ */
+function isHarnessToolError(content) {
+  if (typeof content !== 'string' || !content) return false;
+  return content.includes('<tool_use_error>')
+    || /\bNo such tool available\b/.test(content);
+}
 function detectPattern(content) {
   if (typeof content !== 'string' || !content) return null;
@@ -61,11 +74,25 @@ function detectPattern(content) {
   // (2) Python Traceback + exception line within next 10 lines. Both anchored
   // to line starts: reviewer prose quoting "will crash with ImportError" or
   // embedding "...Error:" mid-sentence must not match (feedback 2026-06-10-01).
+  //
+  // The TERMINATING exception decides the class: a Traceback ending in
+  // ModuleNotFoundError/ImportError is the missing-dependency class ("the
+  // verification couldn't run", same as detector 3), NOT a logic failure — so
+  // it routes through the weaker verify_unavailable path (env-recovery escape
+  // hatch + success demotion). A Traceback ending in any other exception
+  // (KeyError, AssertionError, …) stays transcript_errors — that is the real
+  // false-PASS class the verifier exists to catch (2026-05-23 incident).
+  // (feedback 2026-06-10 addendum: interpreter-search setup probes that ended
+  // in ModuleNotFoundError were 3/3 false positives.)
   const lines = content.split('\n');
   for (let i = 0; i < lines.length; i++) {
     if (/^\s*Traceback \(most recent call last\):/.test(lines[i])) {
       for (let j = i + 1; j < Math.min(i + 11, lines.length); j++) {
-        if (/^\s*[A-Za-z_][\w.]*(?:Error|Exception)\s*:/.test(lines[j])) {
+        const m = lines[j].match(/^\s*([A-Za-z_][\w.]*(?:Error|Exception))\s*:/);
+        if (m) {
+          if (m[1] === 'ModuleNotFoundError' || m[1] === 'ImportError') {
+            return { verdict: 'verify_unavailable', pattern: `Traceback → ${m[1]}` };
+          }
           return { verdict: 'transcript_errors', pattern: 'Traceback + Error within 10 lines' };
         }
       }
@@ -417,7 +444,7 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
       ...(extras ?? {}),
     };
     try { fs.writeFileSync(verdictsPath, JSON.stringify(record, null, 2)); } catch { /* best-effort */ }
-    return { verdict, reason, downgradeTo };
+    return { verdict, reason, downgradeTo, ...(extras ?? {}) };
   }
   try {
@@ -467,11 +494,24 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
     const total = events.length;
     const last20pctStart = Math.floor(total * 0.8);
     const issues = [];
+    // Non-blocking notes: signals worth recording but not strong enough to
+    // downgrade (e.g. a missing-dependency probe in a run that still succeeded).
+    const annotations = [];
+    const runSucceeded = !!resultEvent && resultEvent.subtype === 'success';
     for (let i = 0; i < events.length; i++) {
       const ev = events[i];
       if (ev.kind !== 'tool_result') continue;
+      // Harness tool errors (`<tool_use_error>…`) are emitted when the model
+      // requests a tool that isn't available — e.g. a wrong-case name like
+      // "bash" instead of "Bash", or a tool outside the allowlist. The harness
+      // rejects the call and the model retries with a valid tool; the task is
+      // unaffected. These are never task failures, so they are exempt from both
+      // the is_error scan and the content pattern scan (false-positive class
+      // seen in 58-web-remote-correctness-batch, 2026-06-10).
+      if (isHarnessToolError(ev.content)) continue;
       // is_error:true in the final 20% of the transcript.
       if (ev.isError && i >= last20pctStart) {
         const desc = toolUseDesc(events, ev.toolUseId);
@@ -502,11 +542,19 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
         // ModuleNotFoundError/ImportError: first check for pip/uv install in
         // the next ≤5 tool_use calls (the agent may have self-healed).
         if (!hasInstallRecovery(events, ev.seq) && !isSelfRecovered(events, ev.seq, desc)) {
-          issues.push({
-            verdict: 'verify_unavailable',
-            reason: `${hit.pattern} at event ${i}, no install recovery found`,
-            priority: 1,
-          });
+          const note = `${hit.pattern} at event ${i}, no install recovery found`;
+          if (runSucceeded) {
+            // "Verification couldn't run" is the weakest signal. When the run
+            // still reached a genuine result:success, the agent resolved its
+            // environment (often an interpreter/venv search the recovery
+            // heuristics above don't model) and finished — record it as an
+            // annotation, do NOT downgrade. transcript_errors (real logic/test
+            // failures) are never demoted this way, so the false-PASS guard is
+            // intact. (feedback 2026-06-10 addendum.)
+            annotations.push({ verdict: 'verify_unavailable', reason: note });
+          } else {
+            issues.push({ verdict: 'verify_unavailable', reason: note, priority: 1 });
+          }
         }
       } else {
         // transcript_errors (FAIL/FATAL/Traceback): self-recovery escape hatch.
@@ -520,14 +568,19 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
       }
     }
+    const extras = annotations.length ? { annotations } : undefined;
     if (issues.length === 0) {
-      return conclude('clean', 'no issues detected', null);
+      const reason = annotations.length
+        ? `no blocking issues (${annotations.length} annotation(s): ${annotations.map((a) => a.reason).join('; ')})`
+        : 'no issues detected';
+      return conclude('clean', reason, null, extras);
     }
     // Pick highest-priority issue (transcript_errors > verify_unavailable).
     issues.sort((a, b) => b.priority - a.priority);
     const top = issues[0];
-    return conclude(top.verdict, top.reason, 'needs_review');
+    return conclude(top.verdict, top.reason, 'needs_review', extras);
   } catch (e) {
     return conclude(
@@ -542,6 +595,7 @@ module.exports = {
   verifyRun,
   // Exposed for unit tests.
   detectPattern,
+  isHarnessToolError,
   toolUseName,
   extractSoakFromBody,
   parsePrdBodyDepFragments,

package/src/main/scheduler.cjs CHANGED Viewed

@@ -1257,7 +1257,16 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
     //     (e.g. an interactive session editing the same repo), not the job's
     //     unsaved work — so skip rather than false-flag a completed job.
     // Non-git cwds resolve to null and are skipped (the guard is best-effort).
-    if (res.exitCode === 0 && !res.rateLimited && (!verifyResult || verifyResult.verdict === 'clean')) {
+    //
+    // Runs even when a transcript-pattern verdict already fired: the commit-guard
+    // is a MATERIALLY-CHECKABLE signal (real git state) and outranks pattern hits.
+    // Skipped only when the job is about to re-fire (HALT / deps_unmet → pending),
+    // where working-tree state is irrelevant. When both fire, the uncommitted
+    // verdict owns the needs_review reason and the pattern hit is demoted to an
+    // annotation, so a real "finish protocol incomplete" is distinguishable from
+    // transcript noise in the queue (feedback 2026-06-10 addendum).
+    const guardWillRefire = verifyResult && verifyResult.downgradeTo === 'pending';
+    if (res.exitCode === 0 && !res.rateLimited && !guardWillRefire) {
       const after = await uncommittedChanges(guardCwd);
       if (after && after.length > 0) {
         const baseSet = new Set(guardBaseline || []);
@@ -1270,10 +1279,16 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
         const jobSelfCommitted = guardHeadBefore && guardHeadAfter && guardHeadAfter !== guardHeadBefore;
         if (newlyDirty.length > 0 && !siblingRunning && !jobSelfCommitted) {
           const sample = newlyDirty.slice(0, 3).join(', ');
+          // Carry any prior transcript verdict + its annotations forward as notes.
+          const carried = [...(verifyResult?.annotations ?? [])];
+          if (verifyResult && verifyResult.verdict !== 'clean') {
+            carried.push({ verdict: verifyResult.verdict, reason: verifyResult.reason });
+          }
           verifyResult = {
             verdict: 'uncommitted_changes',
             reason: `finish protocol incomplete: ${newlyDirty.length} uncommitted file(s) left in working tree (e.g. ${sample})`,
             downgradeTo: 'needs_review',
+            annotations: carried.length ? carried : undefined,
           };
           console.log(`[scheduler] commit-guard: ${job.slug} left ${newlyDirty.length} files uncommitted → needs_review`);
         }
@@ -1316,6 +1331,16 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
           } else {
             delete s.jobs[i2].verifierVerdict;
           }
+          // Non-blocking notes (e.g. a recovered missing-dependency probe, or a
+          // pattern hit demoted because a materially-checkable verdict outranked
+          // it) — surfaced even on completed jobs so the signal isn't lost.
+          if (verifyResult?.annotations && verifyResult.annotations.length) {
+            s.jobs[i2].verifierAnnotations = verifyResult.annotations.map(
+              (a) => `${a.verdict}: ${a.reason}`,
+            );
+          } else {
+            delete s.jobs[i2].verifierAnnotations;
+          }
           delete s.jobs[i2].runtime;
           if (effectiveStatus === 'failed') {
@@ -1629,6 +1654,66 @@ function selectHistoryJobs(jobs, limit) {
     .slice(0, cap);
 }
+// Transcript-scan verdicts that re-running verifyRun can re-evaluate. NOT
+// 'uncommitted_changes' — that comes from the git commit-guard, which verifyRun
+// does not inspect, so re-scanning it would always return 'clean' and wrongly
+// heal a genuinely-unfinished job.
+const RESCANNABLE_VERDICTS = new Set(['transcript_errors', 'verify_unavailable']);
+/**
+ * Pure predicate: is this job eligible for the boot re-verify self-heal? Only
+ * needs_review jobs with a run log AND a transcript-scan verdict. Crucially
+ * EXCLUDES 'uncommitted_changes' (git commit-guard) — verifyRun can't see git,
+ * so re-scanning it would falsely heal an unfinished job. Exported for tests.
+ */
+function isRescanCandidate(job) {
+  return !!job
+    && job.status === 'needs_review'
+    && !!job.runId
+    && RESCANNABLE_VERDICTS.has(job.verifierVerdict);
+}
+/**
+ * Self-healing pass over needs_review jobs. The verifier runs in-process, so a
+ * fix to runVerify.cjs only takes effect for jobs verified AFTER an app
+ * restart — jobs flagged by the old (buggy) verifier stay stuck in needs_review
+ * forever. On boot we re-run the CURRENT verifier over every transcript-scan
+ * needs_review job and auto-complete the ones that now pass clean, so verifier
+ * improvements retroactively clear their own false positives (2026-06-10:
+ * anchored ImportError detectors + harness-tool-error exemption healed 8 jobs).
+ *
+ * @returns {Promise<{rescanned:number, healed:string[]}>}
+ */
+async function reverifyNeedsReview() {
+  const snap = await readQueue();
+  const candidates = snap.jobs.filter(isRescanCandidate);
+  const healed = [];
+  for (const job of candidates) {
+    const runDir = path.join(RUNS_DIR, job.runId);
+    const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
+    let v = null;
+    try {
+      v = await verifyRun({ runDir, prdPath, queueEntry: job, allJobs: snap.jobs });
+    } catch { continue; } // unreadable log etc. — leave for human review
+    if (v && v.verdict === 'clean') healed.push(job.slug);
+  }
+  if (healed.length) {
+    const healSet = new Set(healed);
+    await mutate((s) => {
+      for (const j of s.jobs) {
+        if (j.status === 'needs_review' && healSet.has(j.slug)) {
+          j.status = 'completed';
+          j.error = null;
+          delete j.verifierVerdict;
+        }
+      }
+    });
+    console.log(`[scheduler] boot reverify: healed ${healed.length} stale needs_review → completed (${healed.join(', ')})`);
+    await broadcast();
+  }
+  return { rescanned: candidates.length, healed };
+}
 function registerScheduleHandlers() {
   ensureDirs();
   supervisor.registerHandlers();
@@ -1666,6 +1751,13 @@ function registerScheduleHandlers() {
     };
   });
+  ipcMain.handle('schedule:reverify-needs-review', async () => {
+    // Manual trigger for the boot self-heal pass — re-scan needs_review jobs
+    // with the current verifier and auto-complete the ones that now pass clean.
+    const result = await reverifyNeedsReview();
+    return { ok: true, ...result };
+  });
   ipcMain.handle('schedule:force-tick', async () => {
     // Bypass the billing-poll gate entirely — fire pending jobs immediately regardless of meter state.
     // Clears any existing pause first (same semantics as run-now).
@@ -1921,6 +2013,13 @@ async function init() {
     await setPaused(boot.paused.reason, boot.paused.resumeAt);
   }
+  // Self-heal stale needs_review flags using the current verifier (see
+  // reverifyNeedsReview). Runs once on boot so a shipped verifier fix clears
+  // its own historical false positives without manual retagging.
+  await reverifyNeedsReview().catch((e) => {
+    console.error(`[scheduler] boot reverify failed: ${e?.message ?? e}`);
+  });
   await rescheduleTimer();
   // Refresh next-reset every 10 minutes — billing window can shift if usage
   // resets early or the auth token rotates. Tracked so re-init doesn't leak.
@@ -2089,4 +2188,4 @@ const remote = {
   },
 };
-module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs, pollRecoveryClearSource, memoryLimitedBatchSize };
+module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs, pollRecoveryClearSource, memoryLimitedBatchSize, reverifyNeedsReview, isRescanCandidate };

package/src/main/webRemote.cjs CHANGED Viewed

@@ -589,9 +589,13 @@ async function tailLines(filePath, fromOffset) {
     const len = stat.size - start;
     const buf = Buffer.alloc(len);
     await fd.read(buf, 0, len, start);
-    const parts = buf.toString('utf8').split('\n').filter(Boolean);
+    // Shift before filter: the fragment may be an empty string (when the
+    // buffer starts with '\n', completing the previous partial line). If we
+    // filter(Boolean) first, the empty fragment disappears and shift() would
+    // remove the first valid line instead.
+    const parts = buf.toString('utf8').split('\n');
     if (dropFirst && parts.length) parts.shift();
-    return { lines: parts, size: stat.size, inode: stat.ino };
+    return { lines: parts.filter(Boolean), size: stat.size, inode: stat.ino };
   } finally {
     await fd.close();
   }