claude-code-session-manager 0.19.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/assets/{TiptapBody-CO4q65kH.js → TiptapBody-Db7_uXrI.js} +1 -1
  2. package/dist/assets/{cssMode-0tbceX4i.js → cssMode-DFKJhhi6.js} +1 -1
  3. package/dist/assets/{freemarker2-Dv8wl_HH.js → freemarker2-DUat8x8o.js} +1 -1
  4. package/dist/assets/{handlebars-MzrjkW3b.js → handlebars-B2C1qhAI.js} +1 -1
  5. package/dist/assets/{html-C0YEYUHk.js → html-khtg0DVs.js} +1 -1
  6. package/dist/assets/{htmlMode-Bf9ccIo3.js → htmlMode-Jmhs-vfl.js} +1 -1
  7. package/dist/assets/{index-BsSklu93.css → index-BkkBX1z7.css} +1 -1
  8. package/dist/assets/{index-BXeFi7dA.js → index-pqnuXM14.js} +634 -624
  9. package/dist/assets/{javascript-BZhQgLYg.js → javascript-i1CXbgg4.js} +1 -1
  10. package/dist/assets/{jsonMode-XkKuSIs5.js → jsonMode-DXZaj-kR.js} +1 -1
  11. package/dist/assets/{liquid-B6fnroVU.js → liquid-Ds7jUF53.js} +1 -1
  12. package/dist/assets/{lspLanguageFeatures-BAIq7N4N.js → lspLanguageFeatures-B_15vO6X.js} +1 -1
  13. package/dist/assets/{mdx-DzH38OXA.js → mdx-DgrrLgTE.js} +1 -1
  14. package/dist/assets/{python-ak0De5ar.js → python-Cff3tPw3.js} +1 -1
  15. package/dist/assets/{razor-DC-IpQpX.js → razor-DlyG7FmM.js} +1 -1
  16. package/dist/assets/{tsMode-DaZCqNuS.js → tsMode-DRmmmttS.js} +1 -1
  17. package/dist/assets/{typescript-D5YkmMgh.js → typescript-DQFL2T1p.js} +1 -1
  18. package/dist/assets/{whisperWorker-CcsPqZUS.js → whisperWorker-Dbia1OpC.js} +15 -15
  19. package/dist/assets/{xml-8idHpw2C.js → xml-CwsJEzdU.js} +1 -1
  20. package/dist/assets/{yaml-Dm8NKlcv.js → yaml-BDsDjf-y.js} +1 -1
  21. package/dist/index.html +2 -2
  22. package/package.json +5 -2
  23. package/src/main/health.cjs +216 -0
  24. package/src/main/historyAggregator.cjs +15 -9
  25. package/src/main/index.cjs +7 -2
  26. package/src/main/ipcSchemas.cjs +43 -0
  27. package/src/main/kg.cjs +0 -0
  28. package/src/main/lib/reaperHelpers.cjs +67 -0
  29. package/src/main/lib/schedulerBatch.cjs +212 -0
  30. package/src/main/lib/schedulerConfig.cjs +9 -1
  31. package/src/main/scheduler.cjs +274 -125
  32. package/src/main/webRemote.cjs +916 -0
  33. package/src/preload/api.d.ts +78 -15
  34. package/src/preload/index.cjs +41 -8
  35. package/src/main/projectSkills.cjs +0 -124
@@ -45,12 +45,14 @@ const fsp = require('node:fs/promises');
45
45
  const path = require('node:path');
46
46
  const os = require('node:os');
47
47
  const { randomUUID } = require('node:crypto');
48
+ const { execFile } = require('node:child_process');
48
49
  const { ipcMain } = require('electron');
49
50
  const billing = require('./usage.cjs');
50
51
  const { cleanChildEnv } = require('./lib/cleanEnv.cjs');
51
52
  const supervisor = require('./supervisor.cjs');
52
53
  const { resolveClaudeBin } = require('./lib/claudeBin.cjs');
53
54
  const { readTail } = require('./lib/fileTail.cjs');
55
+ const { claudePidAlive, classifyRunOutcome } = require('./lib/reaperHelpers.cjs');
54
56
  const { openLog, withChildAndLog } = require('./lib/childWithLog.cjs');
55
57
  const { sendIfAlive } = require('./lib/sendToRenderer.cjs');
56
58
  const prdParser = require('./scheduler/prdParser.cjs');
@@ -62,6 +64,7 @@ const {
62
64
  USAGE_REFRESH_INTERVAL_MS,
63
65
  MAX_JOB_DURATION_MS,
64
66
  } = require('./lib/schedulerConfig.cjs');
67
+ const { pickForProject, pickNextBatch, DEFAULT_PROJECT_CWD } = require('./lib/schedulerBatch.cjs');
65
68
 
66
69
  const MAX_INVESTIGATION_DURATION_MS = 30 * 60_000;
67
70
 
@@ -88,6 +91,68 @@ const RESULT_TAIL_BYTES = 8 * 1024;
88
91
  const IDLE_OUTPUT_KILL_MS = 20 * 60_000;
89
92
  const IDLE_CHECK_INTERVAL_MS = 60_000;
90
93
 
94
+ // Appended to every scheduled job prompt so the queue can be RELIED ON to finish
95
+ // work to a consistent bar: review → security-review → verify → commit. Enforced
96
+ // centrally here (not per-PRD) so it applies to every current and future PRD.
97
+ // The commit step is also backstopped by the post-run commit guard below: a
98
+ // clean exit that leaves uncommitted changes is downgraded to needs_review.
99
+ const FINISH_PROTOCOL = `
100
+
101
+ ---
102
+ # SCHEDULER FINISH PROTOCOL (mandatory — runs AFTER the work above)
103
+
104
+ Once every acceptance-criteria line above is satisfied, finish in this EXACT
105
+ sequence. Do not stop before the commit lands; committing is part of the job.
106
+
107
+ 1. CODE REVIEW — run \`/code-review --fix\` on your changes and apply the fixes it
108
+ surfaces (correctness first). For any finding you judge a false positive, say
109
+ why in your result; do not silently skip it. If \`/code-review\` is not
110
+ available in this environment, do an equivalent careful self-review instead.
111
+ 2. SECURITY REVIEW — run \`/security-review\` and address every finding (or
112
+ justify it). If unavailable, self-review the diff for injection, secrets,
113
+ path traversal, and unsafe input handling.
114
+ 3. VERIFY — run the project's OWN check commands (typecheck / lint / tests — the
115
+ project's CLAUDE.md names them; infer from the repo if not) and make them
116
+ pass. Do not assume npm; use whatever the target project uses.
117
+ 4. COMMIT — stage and commit ALL changes with a clear conventional message:
118
+ \`git add -A && git commit -m "<type>(<scope>): <summary>"\`.
119
+
120
+ A job that exits with uncommitted changes is treated as INCOMPLETE and flagged
121
+ for review. Do NOT add work beyond the acceptance criteria — this protocol is the
122
+ only post-AC work. If a review finding can't be fixed within scope, commit what
123
+ you have, describe the finding in the commit body, and note the follow-up in your
124
+ final result.`;
125
+
126
+ // Parse \`git status --porcelain\` output into a list of changed paths. Pure +
127
+ // exported for unit testing. Each porcelain line is "XY<space>PATH" (2 status
128
+ // chars + space), so the path starts at index 3; rename lines ("R a -> b")
129
+ // keep the "a -> b" tail, which is fine for a human-facing dirty-file list.
130
+ function parsePorcelain(stdout) {
131
+ return String(stdout || '')
132
+ .split('\n')
133
+ .filter((l) => l.length > 0)
134
+ .map((l) => l.slice(3))
135
+ .filter(Boolean);
136
+ }
137
+
138
+ // Return the list of uncommitted paths in cwd, or null when the guard does not
139
+ // apply (cwd is not a git work tree, git is missing, or the call errors). Never
140
+ // throws — a guard failure must not fail an otherwise-successful job.
141
+ function uncommittedChanges(cwd) {
142
+ return new Promise((resolve) => {
143
+ if (!cwd) { resolve(null); return; }
144
+ execFile(
145
+ 'git',
146
+ ['-C', cwd, 'status', '--porcelain'],
147
+ { timeout: 10_000, windowsHide: true },
148
+ (err, stdout) => {
149
+ if (err) { resolve(null); return; } // not a repo / git missing → skip
150
+ resolve(parsePorcelain(stdout));
151
+ },
152
+ );
153
+ });
154
+ }
155
+
91
156
  const ROOT = path.join(os.homedir(), '.claude', 'session-manager', 'scheduled-plans');
92
157
  const PRDS_DIR = path.join(ROOT, 'prds');
93
158
  const RUNS_DIR = path.join(ROOT, 'runs');
@@ -96,7 +161,7 @@ const QUEUE_PATH = path.join(ROOT, 'queue.json');
96
161
  const SCHEDULER_STATE_PATH = path.join(os.homedir(), '.claude', 'session-manager', 'scheduler-state.json');
97
162
  const HEARTBEAT_PATH = path.join(os.homedir(), '.claude', 'session-manager', 'scheduler-heartbeat.log');
98
163
  const HEARTBEAT_MAX_BYTES = 1024 * 1024;
99
- const DEFAULT_PROJECT_CWD = path.join(os.homedir(), 'Projects', 'session-manager');
164
+ // DEFAULT_PROJECT_CWD imported from lib/schedulerBatch.cjs (single source of truth).
100
165
 
101
166
  const ENV_CAP = process.env.SM_SCHEDULER_MAX_CONCURRENCY
102
167
  ? Math.max(1, Math.min(20, parseInt(process.env.SM_SCHEDULER_MAX_CONCURRENCY, 10) || 4))
@@ -677,7 +742,9 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
677
742
  const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
678
743
  try {
679
744
  const parsed = await parsePrd(prdPath);
680
- prompt = parsed.body;
745
+ // Centrally enforce the review → security-review → verify → commit finish
746
+ // sequence on every job, regardless of what the PRD body says.
747
+ prompt = parsed.body + FINISH_PROTOCOL;
681
748
  } catch (e) {
682
749
  safeLog(`[scheduler] failed to read PRD: ${e?.message}\n`);
683
750
  closeFd();
@@ -877,121 +944,10 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
877
944
  });
878
945
  }
879
946
 
880
- /**
881
- * Pick the next batch of jobs to spawn this tick.
882
- *
883
- * Rules:
884
- * 1. Find the lowest parallelGroup that has pending jobs not already in
885
- * runningSet.
886
- * 2. If that group has jobs in runningSet (i.e., we're mid-group), backfill
887
- * up to (cap - runningSet.size) more from the SAME group.
888
- * 3. If the current group has NO jobs in runningSet (new group), and there
889
- * are still jobs from an earlier group in runningSet, do nothing — wait
890
- * for the earlier group to drain before advancing.
891
- * 4. **Late-arrival**: if a lower-numbered (higher-priority) PRD reconciles
892
- * AFTER a higher-numbered group was already picked, fire the late-arrival
893
- * immediately in parallel with the active group rather than starving it
894
- * until the active group drains. This handles the reconcile-race where
895
- * a PRD file lands on disk between two pickNextBatch invocations.
896
- * 5. A singleton group (unique NN, no other jobs share it) runs alone;
897
- * no bleed into adjacent groups.
898
- *
899
- * Returns array of job objects to spawn. O(N) where N = pending.length.
900
- */
901
- function pickNextBatch(allJobs, running, cap) {
902
- const pending = allJobs.filter((j) => j.status === 'pending' && !running.has(j.slug));
903
- if (pending.length === 0) return [];
904
-
905
- // Lowest pending group (computed up-front so the failure gate can compare).
906
- const lowestPendingGroup = pending.reduce(
907
- (min, j) => Math.min(min, j.parallelGroup ?? 99),
908
- Infinity,
909
- );
910
-
911
- // Cross-group failure gate: refuse to advance past a group with failed jobs.
912
- // Without this, a failed foundation PRD (e.g. 03-doc-editor-foundation
913
- // crashed with a NUL-byte spawn error on 2026-05-21) doesn't stop later
914
- // groups (04, 05, 06...) from running and silently corrupting the project
915
- // state. The user can re-queue the failed job (pending) or archive it to
916
- // unblock the gate, but the default is to halt until the failure is
917
- // acknowledged.
918
- const blockingFailures = allJobs.filter((j) =>
919
- (j.status === 'failed' || j.status === 'needs_review') &&
920
- (j.parallelGroup ?? 99) < lowestPendingGroup,
921
- );
922
- if (blockingFailures.length > 0) {
923
- const slugs = blockingFailures.map((j) => j.slug).join(', ');
924
- console.log(`[scheduler] failure-gate: holding g${lowestPendingGroup} — ${blockingFailures.length} failed job(s) in earlier groups [${slugs}]. Reset to pending or archive to unblock.`);
925
- return [];
926
- }
927
-
928
- // Groups with at least one job in flight: either tracked in runningSet
929
- // (this process spawned it) or still marked 'running' in queue.json
930
- // (persisted from a previous session that hasn't been orphan-reset yet).
931
- const activeGroups = new Set();
932
- for (const slug of running) {
933
- const job = allJobs.find((j) => j.slug === slug);
934
- if (job) activeGroups.add(job.parallelGroup ?? 99);
935
- }
936
- for (const j of allJobs) {
937
- if (j.status === 'running' && !running.has(j.slug)) {
938
- activeGroups.add(j.parallelGroup ?? 99);
939
- }
940
- }
941
- // Total slots consumed: in-process spawns + queue.json running count.
942
- const queueRunningCount = allJobs.filter((j) => j.status === 'running').length;
943
- const effectiveRunning = Math.max(running.size, queueRunningCount);
944
-
945
- // (lowestPendingGroup was computed up-front for the failure-gate check.)
946
-
947
- if (activeGroups.size > 0) {
948
- const lowestActive = Math.min(...activeGroups);
949
- if (lowestPendingGroup > lowestActive) {
950
- // Earlier group still running — wait for it to drain before advancing.
951
- console.log(`[scheduler] concurrency: g${lowestActive} in flight, holding g${lowestPendingGroup}`);
952
- return [];
953
- }
954
- if (lowestPendingGroup < lowestActive) {
955
- // Late-arrival: a lower-numbered (higher-priority) PRD reconciled AFTER
956
- // a higher-numbered group was already picked. Without this branch the
957
- // pending PRD starves until the active group drains — the bug observed
958
- // on 2026-05-10 where 118-studio-add-wave2-games (g118) was held while
959
- // the g130 hardening trio ran. Honor priority: fire the late-arrival
960
- // now, in parallel with the active group. (Strict serial group
961
- // ordering still applies between groups that were both present at the
962
- // time of picking; this only handles the reconcile-race edge case.)
963
- const slots = cap - effectiveRunning;
964
- if (slots <= 0) {
965
- console.log(`[scheduler] concurrency: cap ${cap} reached (${effectiveRunning} running), no slots for late-arrival g${lowestPendingGroup}`);
966
- return [];
967
- }
968
- const batch = pending.filter((j) => (j.parallelGroup ?? 99) === lowestPendingGroup).slice(0, slots);
969
- console.log(`[scheduler] concurrency: firing late-arrival g${lowestPendingGroup} (${batch.length} job(s)) alongside active g${lowestActive}`);
970
- return batch;
971
- }
972
- // Backfill slots remaining in the current group.
973
- const slots = cap - effectiveRunning;
974
- if (slots <= 0) {
975
- console.log(`[scheduler] concurrency: cap ${cap} reached (${effectiveRunning} running), no slots`);
976
- return [];
977
- }
978
- const batch = pending.filter((j) => (j.parallelGroup ?? 99) === lowestActive).slice(0, slots);
979
- if (batch.length > 0) {
980
- console.log(`[scheduler] concurrency: backfilling ${batch.length} into g${lowestActive} (${effectiveRunning}/${cap} running)`);
981
- }
982
- return batch;
983
- }
984
-
985
- // No active group — start the next group fresh.
986
- const slots = cap - effectiveRunning;
987
- if (slots <= 0) {
988
- console.log(`[scheduler] concurrency: cap ${cap} reached (${effectiveRunning} running), no slots`);
989
- return [];
990
- }
991
- const batch = pending.filter((j) => (j.parallelGroup ?? 99) === lowestPendingGroup).slice(0, slots);
992
- console.log(`[scheduler] concurrency: starting g${lowestPendingGroup} with ${batch.length} job(s) (cap ${cap})`);
993
- return batch;
994
- }
947
+ // pickNextBatch and pickForProject are defined in lib/schedulerBatch.cjs and
948
+ // required at the top of this file. Group-ordering gates are evaluated per
949
+ // project (keyed by cwd) so jobs in different repos run concurrently up to
950
+ // the cap; within one project, sequential-group semantics are preserved.
995
951
 
996
952
  /**
997
953
  * Recognize fix-plan slugs (NN-fix-...) so we don't recurse on a fix-plan that
@@ -1177,6 +1133,11 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1177
1133
  });
1178
1134
  await broadcast();
1179
1135
 
1136
+ // Commit-guard baseline: snapshot the working tree BEFORE the run so the
1137
+ // post-run check flags only paths THIS job left dirty, not pre-existing WIP.
1138
+ const guardCwd = job.cwd || defaultCwd;
1139
+ const guardBaseline = await uncommittedChanges(guardCwd);
1140
+
1180
1141
  const res = await executeJob(job, runDir, defaultCwd, async (pid, sessionId, cwd) => {
1181
1142
  await mutate((s) => {
1182
1143
  const idx = s.jobs.findIndex((x) => x.slug === job.slug);
@@ -1220,6 +1181,36 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1220
1181
  }
1221
1182
  }
1222
1183
 
1184
+ // Commit guard: a clean exit that left NEW uncommitted changes means the
1185
+ // finish protocol's COMMIT step did not run. Surface it as needs_review
1186
+ // instead of letting it masquerade as 'completed' (the PRD 03/04
1187
+ // left-uncommitted incident). Two false-positive defenses:
1188
+ // - baseline DELTA: only files dirtied during THIS run count, so
1189
+ // pre-existing user WIP is excluded; and
1190
+ // - sibling skip: if another job is concurrently writing the same repo,
1191
+ // working-tree dirt can't be attributed to this job, so skip the guard.
1192
+ // Non-git cwds resolve to null and are skipped (the guard is best-effort).
1193
+ if (res.exitCode === 0 && !res.rateLimited && (!verifyResult || verifyResult.verdict === 'clean')) {
1194
+ const after = await uncommittedChanges(guardCwd);
1195
+ if (after && after.length > 0) {
1196
+ const baseSet = new Set(guardBaseline || []);
1197
+ const newlyDirty = after.filter((p) => !baseSet.has(p));
1198
+ const guardState = await readQueue().catch(() => ({ jobs: [] }));
1199
+ const siblingRunning = (guardState.jobs || []).some(
1200
+ (j) => j.slug !== job.slug && j.status === 'running' && (j.cwd || defaultCwd) === guardCwd,
1201
+ );
1202
+ if (newlyDirty.length > 0 && !siblingRunning) {
1203
+ const sample = newlyDirty.slice(0, 3).join(', ');
1204
+ verifyResult = {
1205
+ verdict: 'uncommitted_changes',
1206
+ reason: `finish protocol incomplete: ${newlyDirty.length} uncommitted file(s) left in working tree (e.g. ${sample})`,
1207
+ downgradeTo: 'needs_review',
1208
+ };
1209
+ console.log(`[scheduler] commit-guard: ${job.slug} left ${newlyDirty.length} files uncommitted → needs_review`);
1210
+ }
1211
+ }
1212
+ }
1213
+
1223
1214
  let actuallyFailed = false;
1224
1215
  let failedJobSnapshot = null;
1225
1216
  await mutate((s) => {
@@ -1385,10 +1376,60 @@ async function maybeLaunchWhenAvailable(state) {
1385
1376
  tickQueue().catch((e) => console.error('[scheduler] tickQueue error', e));
1386
1377
  }
1387
1378
 
1379
+ // ---------- dead-process reaper ----------
1380
+
1381
+ /**
1382
+ * Scan running jobs, identify those whose claude process is provably dead, and
1383
+ * finalize them to completed/failed by reading the run log. Called once per
1384
+ * poll cycle. Conservative: a job with no runtime.pid yet (spawn mid-flight)
1385
+ * is always skipped. A job whose pid is alive (claudePidAlive) is always skipped.
1386
+ * Exported so unit tests can invoke it directly.
1387
+ */
1388
+ async function reapDeadRunningJobs() {
1389
+ try {
1390
+ if (runningSet.size === 0) return; // fast path: no in-flight jobs
1391
+ const state = await readQueue();
1392
+ const dead = [];
1393
+ for (const j of state.jobs) {
1394
+ if (j.status !== 'running') continue;
1395
+ const pid = j.runtime?.pid;
1396
+ if (!pid) continue; // spawn may be mid-flight; give it a cycle
1397
+ if (claudePidAlive(pid)) continue;
1398
+ const logPath = j.runId
1399
+ ? path.join(RUNS_DIR, j.runId, `${j.slug}.log`)
1400
+ : null;
1401
+ const outcome = logPath ? classifyRunOutcome(logPath) : 'unknown';
1402
+ dead.push({ slug: j.slug, pid, outcome });
1403
+ }
1404
+ if (dead.length === 0) return;
1405
+
1406
+ await mutate((s) => {
1407
+ for (const { slug, pid, outcome } of dead) {
1408
+ const idx = s.jobs.findIndex((x) => x.slug === slug);
1409
+ if (idx < 0 || s.jobs[idx].status !== 'running') continue; // race guard
1410
+ const success = outcome === 'success';
1411
+ s.jobs[idx].status = success ? 'completed' : 'failed';
1412
+ s.jobs[idx].exitCode = success ? 0 : (s.jobs[idx].exitCode ?? 1);
1413
+ s.jobs[idx].finishedAt = new Date().toISOString();
1414
+ s.jobs[idx].error = success ? null : `reaped: process gone, no success result in log (${outcome})`;
1415
+ delete s.jobs[idx].runtime;
1416
+ runningSet.delete(slug);
1417
+ console.log(`[scheduler] reaped dead job slug=${slug} pid=${pid} outcome=${outcome}`);
1418
+ }
1419
+ });
1420
+
1421
+ await broadcast();
1422
+ tickQueue().catch(() => {});
1423
+ } catch (e) {
1424
+ console.warn('[scheduler] reapDeadRunningJobs error', e?.message);
1425
+ }
1426
+ }
1427
+
1388
1428
  // ---------- poll loop with exponential backoff ----------
1389
1429
 
1390
1430
  async function pollLoop() {
1391
1431
  try {
1432
+ await reapDeadRunningJobs().catch(() => {});
1392
1433
  const r = await billing.fetchUsage();
1393
1434
 
1394
1435
  if (r.kind === 'ok') {
@@ -1736,11 +1777,22 @@ async function init() {
1736
1777
  loadSchedulerState();
1737
1778
  bootedAt = Date.now();
1738
1779
 
1739
- // Boot reconciliation: mark any job that was 'running' when the app died as
1740
- // 'failed', AND kill its detached claude child if still alive. Without the
1741
- // kill step the child keeps running as a zombie writing to the project on
1742
- // its own schedule, which is exactly what happened on 2026-05-21 (PID 78230
1743
- // writing PRD 05's output while the scheduler thought the job was orphaned).
1780
+ // Boot reconciliation: finalize any job that was 'running' when the app died.
1781
+ // Check the run log first a job that emitted result/success before the crash
1782
+ // should be marked 'completed', not 'failed', so it doesn't wedge the queue
1783
+ // via the failure-gate. Also kill any still-live orphan claude child to prevent
1784
+ // it from continuing to write to the project unsupervised (2026-05-21 incident).
1785
+ //
1786
+ // classifyRunOutcome calls readTail → fs.readFileSync (up to 64 KB per job).
1787
+ // Pre-compute all outcomes BEFORE entering the mutate lock so the blocking I/O
1788
+ // does not stall the event loop or hold the mutateTail chain during startup.
1789
+ const bootSnap = readQueueSync();
1790
+ const bootOutcomes = new Map();
1791
+ for (const j of bootSnap.jobs) {
1792
+ if (j.status !== 'running') continue;
1793
+ const logPath = j.runId ? path.join(RUNS_DIR, j.runId, `${j.slug}.log`) : null;
1794
+ bootOutcomes.set(j.slug, logPath ? classifyRunOutcome(logPath) : 'unknown');
1795
+ }
1744
1796
  await mutate((state) => {
1745
1797
  for (const j of state.jobs) {
1746
1798
  if (j.status === 'running') {
@@ -1753,10 +1805,14 @@ async function init() {
1753
1805
  console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
1754
1806
  }
1755
1807
  }
1756
- j.status = 'failed';
1757
- j.error = `orphaned: app restarted while running${killNote}`;
1808
+ const outcome = bootOutcomes.get(j.slug) ?? 'unknown';
1809
+ const success = outcome === 'success';
1810
+ j.status = success ? 'completed' : 'failed';
1811
+ j.exitCode = success ? 0 : (j.exitCode ?? 1);
1812
+ j.error = success ? null : `orphaned: app restarted while running${killNote}`;
1758
1813
  j.finishedAt = new Date().toISOString();
1759
1814
  delete j.runtime;
1815
+ console.log(`[scheduler] boot reconcile: slug=${j.slug} outcome=${outcome} → ${j.status}`);
1760
1816
  }
1761
1817
  }
1762
1818
  });
@@ -1833,4 +1889,97 @@ async function init() {
1833
1889
  }
1834
1890
  }
1835
1891
 
1836
- module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs };
1892
+ // remote callable from webRemote.cjs without going through IPC.
1893
+ const remote = {
1894
+ async getState() {
1895
+ const state = await readQueue();
1896
+ await reconcile(state);
1897
+ await writeQueue(state);
1898
+ return buildScheduleStatePayload(state, { withPaths: true });
1899
+ },
1900
+
1901
+ async readPrd(slug) {
1902
+ const filePath = safeSlugPath(slug);
1903
+ if (!filePath) return { ok: false, error: 'invalid slug' };
1904
+ try {
1905
+ // realpath resolves symlinks; re-check boundary to block a rogue agent job
1906
+ // that places a symlink inside PRDS_DIR pointing outside the safe root.
1907
+ const real = await fsp.realpath(filePath);
1908
+ if (!real.startsWith(PRDS_DIR + path.sep)) {
1909
+ return { ok: false, error: 'invalid slug' };
1910
+ }
1911
+ const text = await fsp.readFile(real, 'utf8');
1912
+ return { ok: true, text };
1913
+ } catch (e) {
1914
+ return { ok: false, error: e?.message };
1915
+ }
1916
+ },
1917
+
1918
+ async readLog(slug, runId) {
1919
+ const logPath = path.resolve(path.join(RUNS_DIR, runId, `${slug}.log`));
1920
+ if (!logPath.startsWith(RUNS_DIR + path.sep)) {
1921
+ return { ok: false, error: 'invalid slug or runId' };
1922
+ }
1923
+ try {
1924
+ // realpath resolves symlinks; re-check boundary to block a rogue agent job
1925
+ // that places a symlink inside RUNS_DIR pointing outside the safe root.
1926
+ const real = await fsp.realpath(logPath);
1927
+ if (!real.startsWith(RUNS_DIR + path.sep)) {
1928
+ return { ok: false, error: 'invalid slug or runId' };
1929
+ }
1930
+ const text = await fsp.readFile(real, 'utf8');
1931
+ return { ok: true, text };
1932
+ } catch (e) {
1933
+ return { ok: false, error: e?.message };
1934
+ }
1935
+ },
1936
+
1937
+ async writePrd(slug, body) {
1938
+ const resolved = safeSlugPath(slug);
1939
+ if (!resolved) return { ok: false, error: 'invalid slug' };
1940
+ try {
1941
+ await config.writeTextAtomic(resolved, body);
1942
+ const stat = await fsp.stat(resolved);
1943
+ return { ok: true, bytesWritten: stat.size };
1944
+ } catch (e) {
1945
+ return { ok: false, error: e?.message ?? 'write failed' };
1946
+ }
1947
+ },
1948
+
1949
+ async resetJob(slug) {
1950
+ if (!safeSlugPath(slug)) return { ok: false, error: 'invalid slug' };
1951
+ const found = await mutate((state) => {
1952
+ const idx = state.jobs.findIndex((j) => j.slug === slug);
1953
+ if (idx < 0) return false;
1954
+ resetJobFields(state.jobs[idx]);
1955
+ return true;
1956
+ });
1957
+ if (!found) return { ok: false, error: 'not found' };
1958
+ await broadcast();
1959
+ return { ok: true };
1960
+ },
1961
+
1962
+ async runNow() {
1963
+ await clearPause('run-now');
1964
+ runDueJobs().catch((e) => logs.writeLine({
1965
+ level: 'error', scope: 'scheduler',
1966
+ message: 'runDueJobs error (remote:run-now)', meta: { error: e?.message },
1967
+ }));
1968
+ return { ok: true };
1969
+ },
1970
+
1971
+ async setConfig(partial) {
1972
+ const cfg = await mutate((state) => {
1973
+ const { supervisor: supPartial, ...rest } = partial;
1974
+ state.config = { ...state.config, ...rest };
1975
+ if (supPartial !== undefined) {
1976
+ state.config.supervisor = { ...(state.config.supervisor ?? {}), ...supPartial };
1977
+ }
1978
+ return state.config;
1979
+ });
1980
+ await rescheduleTimer();
1981
+ return { ok: true, config: cfg };
1982
+ },
1983
+ };
1984
+
1985
+ module.exports = { registerScheduleHandlers, attachWindow, init, ROOT, PRDS_DIR, selectHistoryJobs, parsePorcelain, FINISH_PROTOCOL, remote, pickNextBatch, pickForProject, reapDeadRunningJobs };