npm - claude-code-session-manager - Versions diffs - 0.19.0 → 0.20.1 - Mend

claude-code-session-manager 0.19.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/src/main/scheduler.cjs CHANGED Viewed

@@ -45,12 +45,14 @@ const fsp = require('node:fs/promises');
 const path = require('node:path');
 const os = require('node:os');
 const { randomUUID } = require('node:crypto');
+const { execFile } = require('node:child_process');
 const { ipcMain } = require('electron');
 const billing = require('./usage.cjs');
 const { cleanChildEnv } = require('./lib/cleanEnv.cjs');
 const supervisor = require('./supervisor.cjs');
 const { resolveClaudeBin } = require('./lib/claudeBin.cjs');
 const { readTail } = require('./lib/fileTail.cjs');
+const { claudePidAlive, classifyRunOutcome } = require('./lib/reaperHelpers.cjs');
 const { openLog, withChildAndLog } = require('./lib/childWithLog.cjs');
 const { sendIfAlive } = require('./lib/sendToRenderer.cjs');
 const prdParser = require('./scheduler/prdParser.cjs');
@@ -62,6 +64,7 @@ const {
   USAGE_REFRESH_INTERVAL_MS,
   MAX_JOB_DURATION_MS,
 } = require('./lib/schedulerConfig.cjs');
+const { pickForProject, pickNextBatch, DEFAULT_PROJECT_CWD } = require('./lib/schedulerBatch.cjs');
 const MAX_INVESTIGATION_DURATION_MS = 30 * 60_000;
@@ -88,6 +91,68 @@ const RESULT_TAIL_BYTES = 8 * 1024;
 const IDLE_OUTPUT_KILL_MS = 20 * 60_000;
 const IDLE_CHECK_INTERVAL_MS = 60_000;
+// Appended to every scheduled job prompt so the queue can be RELIED ON to finish
+// work to a consistent bar: review → security-review → verify → commit. Enforced
+// centrally here (not per-PRD) so it applies to every current and future PRD.
+// The commit step is also backstopped by the post-run commit guard below: a
+// clean exit that leaves uncommitted changes is downgraded to needs_review.
+const FINISH_PROTOCOL = `
+---
+# SCHEDULER FINISH PROTOCOL (mandatory — runs AFTER the work above)
+Once every acceptance-criteria line above is satisfied, finish in this EXACT
+sequence. Do not stop before the commit lands; committing is part of the job.
+1. CODE REVIEW — run \`/code-review --fix\` on your changes and apply the fixes it
+   surfaces (correctness first). For any finding you judge a false positive, say
+   why in your result; do not silently skip it. If \`/code-review\` is not
+   available in this environment, do an equivalent careful self-review instead.
+2. SECURITY REVIEW — run \`/security-review\` and address every finding (or
+   justify it). If unavailable, self-review the diff for injection, secrets,
+   path traversal, and unsafe input handling.
+3. VERIFY — run the project's OWN check commands (typecheck / lint / tests — the
+   project's CLAUDE.md names them; infer from the repo if not) and make them
+   pass. Do not assume npm; use whatever the target project uses.
+4. COMMIT — stage and commit ALL changes with a clear conventional message:
+   \`git add -A && git commit -m "<type>(<scope>): <summary>"\`.
+A job that exits with uncommitted changes is treated as INCOMPLETE and flagged
+for review. Do NOT add work beyond the acceptance criteria — this protocol is the
+only post-AC work. If a review finding can't be fixed within scope, commit what
+you have, describe the finding in the commit body, and note the follow-up in your
+final result.`;
+// Parse \`git status --porcelain\` output into a list of changed paths. Pure +
+// exported for unit testing. Each porcelain line is "XY<space>PATH" (2 status
+// chars + space), so the path starts at index 3; rename lines ("R  a -> b")
+// keep the "a -> b" tail, which is fine for a human-facing dirty-file list.
+function parsePorcelain(stdout) {
+  return String(stdout || '')
+    .split('\n')
+    .filter((l) => l.length > 0)
+    .map((l) => l.slice(3))
+    .filter(Boolean);
+}
+// Return the list of uncommitted paths in cwd, or null when the guard does not
+// apply (cwd is not a git work tree, git is missing, or the call errors). Never
+// throws — a guard failure must not fail an otherwise-successful job.
+function uncommittedChanges(cwd) {
+  return new Promise((resolve) => {
+    if (!cwd) { resolve(null); return; }
+    execFile(
+      'git',
+      ['-C', cwd, 'status', '--porcelain'],
+      { timeout: 10_000, windowsHide: true },
+      (err, stdout) => {
+        if (err) { resolve(null); return; } // not a repo / git missing → skip
+        resolve(parsePorcelain(stdout));
+      },
+    );
+  });
+}
 const ROOT = path.join(os.homedir(), '.claude', 'session-manager', 'scheduled-plans');
 const PRDS_DIR = path.join(ROOT, 'prds');
 const RUNS_DIR = path.join(ROOT, 'runs');
@@ -96,7 +161,7 @@ const QUEUE_PATH = path.join(ROOT, 'queue.json');
 const SCHEDULER_STATE_PATH = path.join(os.homedir(), '.claude', 'session-manager', 'scheduler-state.json');
 const HEARTBEAT_PATH = path.join(os.homedir(), '.claude', 'session-manager', 'scheduler-heartbeat.log');
 const HEARTBEAT_MAX_BYTES = 1024 * 1024;
-const DEFAULT_PROJECT_CWD = path.join(os.homedir(), 'Projects', 'session-manager');
+// DEFAULT_PROJECT_CWD imported from lib/schedulerBatch.cjs (single source of truth).
 const ENV_CAP = process.env.SM_SCHEDULER_MAX_CONCURRENCY
   ? Math.max(1, Math.min(20, parseInt(process.env.SM_SCHEDULER_MAX_CONCURRENCY, 10) || 4))
@@ -677,7 +742,9 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
   const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
   try {
     const parsed = await parsePrd(prdPath);
-    prompt = parsed.body;
+    // Centrally enforce the review → security-review → verify → commit finish
+    // sequence on every job, regardless of what the PRD body says.
+    prompt = parsed.body + FINISH_PROTOCOL;
   } catch (e) {
     safeLog(`[scheduler] failed to read PRD: ${e?.message}\n`);
     closeFd();
@@ -877,121 +944,10 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
   });
 }
-/**
- * Pick the next batch of jobs to spawn this tick.
- *
- * Rules:
- *   1. Find the lowest parallelGroup that has pending jobs not already in
- *      runningSet.
- *   2. If that group has jobs in runningSet (i.e., we're mid-group), backfill
- *      up to (cap - runningSet.size) more from the SAME group.
- *   3. If the current group has NO jobs in runningSet (new group), and there
- *      are still jobs from an earlier group in runningSet, do nothing — wait
- *      for the earlier group to drain before advancing.
- *   4. **Late-arrival**: if a lower-numbered (higher-priority) PRD reconciles
- *      AFTER a higher-numbered group was already picked, fire the late-arrival
- *      immediately in parallel with the active group rather than starving it
- *      until the active group drains. This handles the reconcile-race where
- *      a PRD file lands on disk between two pickNextBatch invocations.
- *   5. A singleton group (unique NN, no other jobs share it) runs alone;
- *      no bleed into adjacent groups.
- *
- * Returns array of job objects to spawn. O(N) where N = pending.length.
- */
-function pickNextBatch(allJobs, running, cap) {
-  const pending = allJobs.filter((j) => j.status === 'pending' && !running.has(j.slug));
-  if (pending.length === 0) return [];
-  // Lowest pending group (computed up-front so the failure gate can compare).
-  const lowestPendingGroup = pending.reduce(
-    (min, j) => Math.min(min, j.parallelGroup ?? 99),
-    Infinity,
-  );
-  // Cross-group failure gate: refuse to advance past a group with failed jobs.
-  // Without this, a failed foundation PRD (e.g. 03-doc-editor-foundation
-  // crashed with a NUL-byte spawn error on 2026-05-21) doesn't stop later
-  // groups (04, 05, 06...) from running and silently corrupting the project
-  // state. The user can re-queue the failed job (pending) or archive it to
-  // unblock the gate, but the default is to halt until the failure is
-  // acknowledged.
-  const blockingFailures = allJobs.filter((j) =>
-    (j.status === 'failed' || j.status === 'needs_review') &&
-    (j.parallelGroup ?? 99) < lowestPendingGroup,
-  );
-  if (blockingFailures.length > 0) {
-    const slugs = blockingFailures.map((j) => j.slug).join(', ');
-    console.log(`[scheduler] failure-gate: holding g${lowestPendingGroup} — ${blockingFailures.length} failed job(s) in earlier groups [${slugs}]. Reset to pending or archive to unblock.`);
-    return [];
-  }
-  // Groups with at least one job in flight: either tracked in runningSet
-  // (this process spawned it) or still marked 'running' in queue.json
-  // (persisted from a previous session that hasn't been orphan-reset yet).
-  const activeGroups = new Set();
-  for (const slug of running) {
-    const job = allJobs.find((j) => j.slug === slug);
-    if (job) activeGroups.add(job.parallelGroup ?? 99);
-  }
-  for (const j of allJobs) {
-    if (j.status === 'running' && !running.has(j.slug)) {
-      activeGroups.add(j.parallelGroup ?? 99);
-    }
-  }
-  // Total slots consumed: in-process spawns + queue.json running count.
-  const queueRunningCount = allJobs.filter((j) => j.status === 'running').length;
-  const effectiveRunning = Math.max(running.size, queueRunningCount);
-  // (lowestPendingGroup was computed up-front for the failure-gate check.)
-  if (activeGroups.size > 0) {
-    const lowestActive = Math.min(...activeGroups);
-    if (lowestPendingGroup > lowestActive) {
-      // Earlier group still running — wait for it to drain before advancing.
-      console.log(`[scheduler] concurrency: g${lowestActive} in flight, holding g${lowestPendingGroup}`);
-      return [];
-    }
-    if (lowestPendingGroup < lowestActive) {
-      // Late-arrival: a lower-numbered (higher-priority) PRD reconciled AFTER
-      // a higher-numbered group was already picked. Without this branch the
-      // pending PRD starves until the active group drains — the bug observed
-      // on 2026-05-10 where 118-studio-add-wave2-games (g118) was held while
-      // the g130 hardening trio ran. Honor priority: fire the late-arrival
-      // now, in parallel with the active group. (Strict serial group
-      // ordering still applies between groups that were both present at the
-      // time of picking; this only handles the reconcile-race edge case.)
-      const slots = cap - effectiveRunning;
-      if (slots <= 0) {
-        console.log(`[scheduler] concurrency: cap ${cap} reached (${effectiveRunning} running), no slots for late-arrival g${lowestPendingGroup}`);
-        return [];
-      }
-      const batch = pending.filter((j) => (j.parallelGroup ?? 99) === lowestPendingGroup).slice(0, slots);
-      console.log(`[scheduler] concurrency: firing late-arrival g${lowestPendingGroup} (${batch.length} job(s)) alongside active g${lowestActive}`);
-      return batch;
-    }
-    // Backfill slots remaining in the current group.
-    const slots = cap - effectiveRunning;
-    if (slots <= 0) {
-      console.log(`[scheduler] concurrency: cap ${cap} reached (${effectiveRunning} running), no slots`);
-      return [];
-    }
-    const batch = pending.filter((j) => (j.parallelGroup ?? 99) === lowestActive).slice(0, slots);
-    if (batch.length > 0) {
-      console.log(`[scheduler] concurrency: backfilling ${batch.length} into g${lowestActive} (${effectiveRunning}/${cap} running)`);
-    }
-    return batch;
-  }
-  // No active group — start the next group fresh.
-  const slots = cap - effectiveRunning;
-  if (slots <= 0) {
-    console.log(`[scheduler] concurrency: cap ${cap} reached (${effectiveRunning} running), no slots`);
-    return [];
-  }
-  const batch = pending.filter((j) => (j.parallelGroup ?? 99) === lowestPendingGroup).slice(0, slots);
-  console.log(`[scheduler] concurrency: starting g${lowestPendingGroup} with ${batch.length} job(s) (cap ${cap})`);
-  return batch;
-}
+// pickNextBatch and pickForProject are defined in lib/schedulerBatch.cjs and
+// required at the top of this file. Group-ordering gates are evaluated per
+// project (keyed by cwd) so jobs in different repos run concurrently up to
+// the cap; within one project, sequential-group semantics are preserved.
 /**
  * Recognize fix-plan slugs (NN-fix-...) so we don't recurse on a fix-plan that
@@ -1177,6 +1133,11 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
     });
     await broadcast();
+    // Commit-guard baseline: snapshot the working tree BEFORE the run so the
+    // post-run check flags only paths THIS job left dirty, not pre-existing WIP.
+    const guardCwd = job.cwd || defaultCwd;
+    const guardBaseline = await uncommittedChanges(guardCwd);
     const res = await executeJob(job, runDir, defaultCwd, async (pid, sessionId, cwd) => {
       await mutate((s) => {
         const idx = s.jobs.findIndex((x) => x.slug === job.slug);
@@ -1220,6 +1181,36 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
       }
     }
+    // Commit guard: a clean exit that left NEW uncommitted changes means the
+    // finish protocol's COMMIT step did not run. Surface it as needs_review
+    // instead of letting it masquerade as 'completed' (the PRD 03/04
+    // left-uncommitted incident). Two false-positive defenses:
+    //   - baseline DELTA: only files dirtied during THIS run count, so
+    //     pre-existing user WIP is excluded; and
+    //   - sibling skip: if another job is concurrently writing the same repo,
+    //     working-tree dirt can't be attributed to this job, so skip the guard.
+    // Non-git cwds resolve to null and are skipped (the guard is best-effort).
+    if (res.exitCode === 0 && !res.rateLimited && (!verifyResult || verifyResult.verdict === 'clean')) {
+      const after = await uncommittedChanges(guardCwd);
+      if (after && after.length > 0) {
+        const baseSet = new Set(guardBaseline || []);
+        const newlyDirty = after.filter((p) => !baseSet.has(p));
+        const guardState = await readQueue().catch(() => ({ jobs: [] }));
+        const siblingRunning = (guardState.jobs || []).some(
+          (j) => j.slug !== job.slug && j.status === 'running' && (j.cwd || defaultCwd) === guardCwd,
+        );
+        if (newlyDirty.length > 0 && !siblingRunning) {
+          const sample = newlyDirty.slice(0, 3).join(', ');
+          verifyResult = {
+            verdict: 'uncommitted_changes',
+            reason: `finish protocol incomplete: ${newlyDirty.length} uncommitted file(s) left in working tree (e.g. ${sample})`,
+            downgradeTo: 'needs_review',
+          };
+          console.log(`[scheduler] commit-guard: ${job.slug} left ${newlyDirty.length} files uncommitted → needs_review`);
+        }
+      }
+    }
     let actuallyFailed = false;
     let failedJobSnapshot = null;
     await mutate((s) => {
@@ -1385,10 +1376,60 @@ async function maybeLaunchWhenAvailable(state) {
   tickQueue().catch((e) => console.error('[scheduler] tickQueue error', e));
 }
+// ---------- dead-process reaper ----------
+/**
+ * Scan running jobs, identify those whose claude process is provably dead, and
+ * finalize them to completed/failed by reading the run log. Called once per
+ * poll cycle. Conservative: a job with no runtime.pid yet (spawn mid-flight)
+ * is always skipped. A job whose pid is alive (claudePidAlive) is always skipped.
+ * Exported so unit tests can invoke it directly.
+ */
+async function reapDeadRunningJobs() {
+  try {
+    if (runningSet.size === 0) return; // fast path: no in-flight jobs
+    const state = await readQueue();
+    const dead = [];
+    for (const j of state.jobs) {
+      if (j.status !== 'running') continue;
+      const pid = j.runtime?.pid;
+      if (!pid) continue; // spawn may be mid-flight; give it a cycle
+      if (claudePidAlive(pid)) continue;
+      const logPath = j.runId
+        ? path.join(RUNS_DIR, j.runId, `${j.slug}.log`)
+        : null;
+      const outcome = logPath ? classifyRunOutcome(logPath) : 'unknown';
+      dead.push({ slug: j.slug, pid, outcome });
+    }
+    if (dead.length === 0) return;
+    await mutate((s) => {
+      for (const { slug, pid, outcome } of dead) {
+        const idx = s.jobs.findIndex((x) => x.slug === slug);
+        if (idx < 0 || s.jobs[idx].status !== 'running') continue; // race guard
+        const success = outcome === 'success';
+        s.jobs[idx].status = success ? 'completed' : 'failed';
+        s.jobs[idx].exitCode = success ? 0 : (s.jobs[idx].exitCode ?? 1);
+        s.jobs[idx].finishedAt = new Date().toISOString();
+        s.jobs[idx].error = success ? null : `reaped: process gone, no success result in log (${outcome})`;
+        delete s.jobs[idx].runtime;
+        runningSet.delete(slug);
+        console.log(`[scheduler] reaped dead job slug=${slug} pid=${pid} outcome=${outcome}`);
+      }
+    });
+    await broadcast();
+    tickQueue().catch(() => {});
+  } catch (e) {
+    console.warn('[scheduler] reapDeadRunningJobs error', e?.message);
+  }
+}
 // ---------- poll loop with exponential backoff ----------
 async function pollLoop() {
   try {
+    await reapDeadRunningJobs().catch(() => {});
     const r = await billing.fetchUsage();
     if (r.kind === 'ok') {
@@ -1736,11 +1777,22 @@ async function init() {
   loadSchedulerState();
   bootedAt = Date.now();
-  // Boot reconciliation: mark any job that was 'running' when the app died as
-  // 'failed', AND kill its detached claude child if still alive. Without the
-  // kill step the child keeps running as a zombie writing to the project on
-  // its own schedule, which is exactly what happened on 2026-05-21 (PID 78230
-  // writing PRD 05's output while the scheduler thought the job was orphaned).
+  // Boot reconciliation: finalize any job that was 'running' when the app died.
+  // Check the run log first — a job that emitted result/success before the crash
+  // should be marked 'completed', not 'failed', so it doesn't wedge the queue
+  // via the failure-gate. Also kill any still-live orphan claude child to prevent
+  // it from continuing to write to the project unsupervised (2026-05-21 incident).
+  //
+  // classifyRunOutcome calls readTail → fs.readFileSync (up to 64 KB per job).
+  // Pre-compute all outcomes BEFORE entering the mutate lock so the blocking I/O
+  // does not stall the event loop or hold the mutateTail chain during startup.
+  const bootSnap = readQueueSync();
+  const bootOutcomes = new Map();
+  for (const j of bootSnap.jobs) {
+    if (j.status !== 'running') continue;
+    const logPath = j.runId ? path.join(RUNS_DIR, j.runId, `${j.slug}.log`) : null;
+    bootOutcomes.set(j.slug, logPath ? classifyRunOutcome(logPath) : 'unknown');
+  }
   await mutate((state) => {
     for (const j of state.jobs) {
       if (j.status === 'running') {
@@ -1753,10 +1805,14 @@ async function init() {
             console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
           }
         }
-        j.status = 'failed';
-        j.error = `orphaned: app restarted while running${killNote}`;
+        const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
+        const success = outcome === 'success';
+        j.status = success ? 'completed' : 'failed';
+        j.exitCode = success ? 0 : (j.exitCode ?? 1);
+        j.error = success ? null : `orphaned: app restarted while running${killNote}`;
         j.finishedAt = new Date().toISOString();
         delete j.runtime;
+        console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → ${j.status}`);
       }
     }
   });
@@ -1833,4 +1889,97 @@ async function init() {
   }
 }
-module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs };
+// remote — callable from webRemote.cjs without going through IPC.
+const remote = {
+  async getState() {
+    const state = await readQueue();
+    await reconcile(state);
+    await writeQueue(state);
+    return buildScheduleStatePayload(state, { withPaths: true });
+  },
+  async readPrd(slug) {
+    const filePath = safeSlugPath(slug);
+    if (!filePath) return { ok: false, error: 'invalid slug' };
+    try {
+      // realpath resolves symlinks; re-check boundary to block a rogue agent job
+      // that places a symlink inside PRDS_DIR pointing outside the safe root.
+      const real = await fsp.realpath(filePath);
+      if (!real.startsWith(PRDS_DIR + path.sep)) {
+        return { ok: false, error: 'invalid slug' };
+      }
+      const text = await fsp.readFile(real, 'utf8');
+      return { ok: true, text };
+    } catch (e) {
+      return { ok: false, error: e?.message };
+    }
+  },
+  async readLog(slug, runId) {
+    const logPath = path.resolve(path.join(RUNS_DIR, runId, `${slug}.log`));
+    if (!logPath.startsWith(RUNS_DIR + path.sep)) {
+      return { ok: false, error: 'invalid slug or runId' };
+    }
+    try {
+      // realpath resolves symlinks; re-check boundary to block a rogue agent job
+      // that places a symlink inside RUNS_DIR pointing outside the safe root.
+      const real = await fsp.realpath(logPath);
+      if (!real.startsWith(RUNS_DIR + path.sep)) {
+        return { ok: false, error: 'invalid slug or runId' };
+      }
+      const text = await fsp.readFile(real, 'utf8');
+      return { ok: true, text };
+    } catch (e) {
+      return { ok: false, error: e?.message };
+    }
+  },
+  async writePrd(slug, body) {
+    const resolved = safeSlugPath(slug);
+    if (!resolved) return { ok: false, error: 'invalid slug' };
+    try {
+      await config.writeTextAtomic(resolved, body);
+      const stat = await fsp.stat(resolved);
+      return { ok: true, bytesWritten: stat.size };
+    } catch (e) {
+      return { ok: false, error: e?.message ?? 'write failed' };
+    }
+  },
+  async resetJob(slug) {
+    if (!safeSlugPath(slug)) return { ok: false, error: 'invalid slug' };
+    const found = await mutate((state) => {
+      const idx = state.jobs.findIndex((j) => j.slug === slug);
+      if (idx < 0) return false;
+      resetJobFields(state.jobs[idx]);
+      return true;
+    });
+    if (!found) return { ok: false, error: 'not found' };
+    await broadcast();
+    return { ok: true };
+  },
+  async runNow() {
+    await clearPause('run-now');
+    runDueJobs().catch((e) => logs.writeLine({
+      level: 'error', scope: 'scheduler',
+      message: 'runDueJobs error (remote:run-now)', meta: { error: e?.message },
+    }));
+    return { ok: true };
+  },
+  async setConfig(partial) {
+    const cfg = await mutate((state) => {
+      const { supervisor: supPartial, ...rest } = partial;
+      state.config = { ...state.config, ...rest };
+      if (supPartial !== undefined) {
+        state.config.supervisor = { ...(state.config.supervisor ?? {}), ...supPartial };
+      }
+      return state.config;
+    });
+    await rescheduleTimer();
+    return { ok: true, config: cfg };
+  },
+};
+module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs };