claude-code-session-manager 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/assets/TiptapBody-CAJSNRPs.js +189 -0
  2. package/dist/assets/{cssMode-DyodRfD-.js → cssMode-o7rZCrm4.js} +1 -1
  3. package/dist/assets/{freemarker2-D1H1ixRK.js → freemarker2-CgmCS5Wh.js} +1 -1
  4. package/dist/assets/{handlebars-wnlxpTlt.js → handlebars-BcPLqhPv.js} +1 -1
  5. package/dist/assets/{html-Dv_oA_OQ.js → html-CC9xWnC3.js} +1 -1
  6. package/dist/assets/{htmlMode-DGXsu2-V.js → htmlMode-DEgCqH7k.js} +1 -1
  7. package/dist/assets/{index-oiSqLrkZ.js → index-C7ljEoqc.js} +1223 -1192
  8. package/dist/assets/{index-CcRP2nIC.css → index-CH3K1pkS.css} +1 -1
  9. package/dist/assets/{javascript-CxejmYhM.js → javascript-CjwqkQrn.js} +1 -1
  10. package/dist/assets/{jsonMode-ztPfF7kI.js → jsonMode-BYTLu76d.js} +4 -4
  11. package/dist/assets/{liquid-DvtfrYeo.js → liquid-wbQUuJwT.js} +1 -1
  12. package/dist/assets/{lspLanguageFeatures-mIBTKOZq.js → lspLanguageFeatures-BJGMI7Xu.js} +1 -1
  13. package/dist/assets/{mdx-DTebMWEJ.js → mdx-DcDstgPF.js} +1 -1
  14. package/dist/assets/{python-zea5QgfT.js → python-B96yyM_5.js} +1 -1
  15. package/dist/assets/{razor-DODk3om_.js → razor-C7aRIxIE.js} +1 -1
  16. package/dist/assets/{tsMode-BQGo_Gc8.js → tsMode-B3UYlGaL.js} +1 -1
  17. package/dist/assets/{typescript-Cfo1NBg6.js → typescript-CV587TvC.js} +1 -1
  18. package/dist/assets/{xml-D1RKIHcE.js → xml-PWUJecBf.js} +1 -1
  19. package/dist/assets/{yaml-B8MoJlND.js → yaml-D8bBNHE4.js} +1 -1
  20. package/dist/index.html +2 -2
  21. package/package.json +5 -1
  22. package/src/main/agentMemory.cjs +267 -0
  23. package/src/main/docEditor.cjs +92 -0
  24. package/src/main/files.cjs +346 -0
  25. package/src/main/git.cjs +333 -0
  26. package/src/main/historyAggregator.cjs +70 -0
  27. package/src/main/index.cjs +66 -0
  28. package/src/main/ipcSchemas.cjs +75 -0
  29. package/src/main/projectSkills.cjs +124 -0
  30. package/src/main/scheduler.cjs +155 -11
  31. package/src/main/superagent.cjs +202 -0
  32. package/src/main/transcripts.cjs +8 -1
  33. package/src/preload/api.d.ts +215 -0
  34. package/src/preload/index.cjs +54 -0
@@ -105,6 +105,34 @@ async function parseJSONL(filePath, stat) {
105
105
  return acc;
106
106
  }
107
107
 
108
+ /** Lightweight per-file meta: { firstTs, lastTs, inputTokens, outputTokens, skipped }.
109
+ * Powers the `history:list-conversations` IPC used by the Overview detailed-
110
+ * stats panel. Single-pass O(L) scan, only honors ts + usage blocks. */
111
+ async function parseConversationMeta(filePath, stat) {
112
+ const meta = { firstTs: null, lastTs: null, inputTokens: 0, outputTokens: 0, skipped: false };
113
+ if (stat.size > MAX_FILE_BYTES) { meta.skipped = true; return meta; }
114
+ let text;
115
+ try { text = await fsp.readFile(filePath, 'utf8'); } catch { return meta; }
116
+ const lines = text.split('\n');
117
+ for (const raw of lines) {
118
+ const line = raw.trim();
119
+ if (!line) continue;
120
+ let obj;
121
+ try { obj = JSON.parse(line); } catch { continue; }
122
+ const ts = obj.ts ?? obj.timestamp;
123
+ if (ts) {
124
+ if (meta.firstTs === null) meta.firstTs = ts;
125
+ meta.lastTs = ts;
126
+ }
127
+ const usage = obj.usage ?? obj.message?.usage;
128
+ if (usage && typeof usage === 'object') {
129
+ if (typeof usage.inputTokens === 'number') meta.inputTokens += usage.inputTokens;
130
+ if (typeof usage.outputTokens === 'number') meta.outputTokens += usage.outputTokens;
131
+ }
132
+ }
133
+ return meta;
134
+ }
135
+
108
136
  function registerHistoryAggregatorHandlers() {
109
137
  ipcMain.handle('history:aggregate', async (_e, rawReq) => {
110
138
  // Wire the historyAggregate schema (previously defined but never used).
@@ -208,6 +236,48 @@ function registerHistoryAggregatorHandlers() {
208
236
  const scannedMs = Date.now() - t0;
209
237
  return { rows, partial, scannedMs, skippedLargeFiles };
210
238
  });
239
+
240
+ /** Per-conversation metadata: one row per JSONL with derived duration +
241
+ * token totals. Used by the Overview detailed-stats panel to compute
242
+ * hourly/daily distribution + top-projects. */
243
+ ipcMain.handle('history:list-conversations', async () => {
244
+ const t0 = Date.now();
245
+ const conversations = [];
246
+ let projectEntries;
247
+ try {
248
+ projectEntries = await fsp.readdir(PROJECTS_DIR, { withFileTypes: true });
249
+ } catch {
250
+ return { conversations: [], scannedMs: Date.now() - t0 };
251
+ }
252
+ for (const ent of projectEntries) {
253
+ if (!ent.isDirectory()) continue;
254
+ const projectDir = path.join(PROJECTS_DIR, ent.name);
255
+ const projectFolder = '/' + ent.name.replace(/-/g, '/');
256
+ let files;
257
+ try { files = await fsp.readdir(projectDir, { withFileTypes: true }); } catch { continue; }
258
+ for (const f of files) {
259
+ if (!f.isFile() || !f.name.endsWith('.jsonl')) continue;
260
+ const filePath = path.join(projectDir, f.name);
261
+ let stat;
262
+ try { stat = await fsp.stat(filePath); } catch { continue; }
263
+ const meta = await parseConversationMeta(filePath, stat);
264
+ const firstTs = meta.firstTs || new Date(stat.mtimeMs).toISOString();
265
+ const duration =
266
+ meta.firstTs && meta.lastTs
267
+ ? Math.max(0, Date.parse(meta.lastTs) - Date.parse(meta.firstTs))
268
+ : undefined;
269
+ conversations.push({
270
+ timestamp: firstTs,
271
+ projectFolder,
272
+ stats: {
273
+ ...(duration !== undefined ? { duration } : {}),
274
+ estimatedTokens: meta.inputTokens + meta.outputTokens,
275
+ },
276
+ });
277
+ }
278
+ }
279
+ return { conversations, scannedMs: Date.now() - t0 };
280
+ });
211
281
  }
212
282
 
213
283
  module.exports = { registerHistoryAggregatorHandlers };
@@ -24,6 +24,12 @@ const otel = require('./otel.cjs');
24
24
  const otelSettings = require('./otelSettings.cjs');
25
25
  const { registerHistoryAggregatorHandlers } = require('./historyAggregator.cjs');
26
26
  const memoryTool = require('./memoryTool.cjs');
27
+ const agentMemory = require('./agentMemory.cjs');
28
+ const { registerDocEditorHandlers } = require('./docEditor.cjs');
29
+ const git = require('./git.cjs');
30
+ const superagent = require('./superagent.cjs');
31
+ const { registerProjectSkillsHandlers } = require('./projectSkills.cjs');
32
+ const filesIpc = require('./files.cjs');
27
33
  const { resolveClaudeBin } = require('./lib/claudeBin.cjs');
28
34
  const { assertCwdInsideHome } = require('./lib/insideHome.cjs');
29
35
 
@@ -183,6 +189,7 @@ async function rebootApp() {
183
189
  scheduler.attachWindow(mainWindow);
184
190
  watchers.attachWindow(mainWindow);
185
191
  pluginInstall.attachWindow(mainWindow);
192
+ superagent.attachWindow(mainWindow);
186
193
  rebooting = false;
187
194
  return;
188
195
  }
@@ -241,6 +248,20 @@ function createWindow() {
241
248
  mainWindow.show();
242
249
  });
243
250
 
251
+ // Native right-click menu — Copy / Paste / Select All everywhere. Roles
252
+ // hook into Electron's built-in clipboard/selection plumbing, which xterm.js
253
+ // (and Monaco, and Tiptap) all participate in via the standard DOM
254
+ // selection API, so this single block covers Terminal + Doc Editor + plain
255
+ // text inputs without per-component wiring.
256
+ mainWindow.webContents.on('context-menu', (_e, params) => {
257
+ const items = [];
258
+ if (params.editFlags.canCopy) items.push({ label: 'Copy', role: 'copy' });
259
+ if (params.editFlags.canPaste) items.push({ label: 'Paste', role: 'paste' });
260
+ if (items.length) items.push({ type: 'separator' });
261
+ items.push({ label: 'Select All', role: 'selectAll' });
262
+ Menu.buildFromTemplate(items).popup({ window: mainWindow });
263
+ });
264
+
244
265
  const distIndex = path.join(__dirname, '..', '..', 'dist', 'index.html');
245
266
  const useDevServer = process.env.SM_DEV === '1';
246
267
  if (useDevServer) {
@@ -522,6 +543,44 @@ ipcMain.handle('app:open-in-editor', async (_e, payload) => {
522
543
  return { ok: false, error: 'no editor found' };
523
544
  });
524
545
 
546
+ ipcMain.handle('app:open-external', async (_e, payload) => {
547
+ // URL filter mirrors setWindowOpenHandler at line ~631: without it, the
548
+ // renderer could be tricked into asking shell.openExternal to launch
549
+ // `file:///etc/passwd`, `javascript:…`, or `mailto:…`. Stick to web URLs.
550
+ const { url } = schemas.openExternal.parse(payload);
551
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
552
+ return { ok: false, error: 'only http/https URLs are allowed' };
553
+ }
554
+ await shell.openExternal(url);
555
+ return { ok: true };
556
+ });
557
+
558
+ ipcMain.handle('app:open-file-in-editor', async (_e, payload) => {
559
+ // Open a specific file (with optional line:col) in the user's editor.
560
+ // Distinct from app:open-in-editor above which opens a project root.
561
+ // GUI editors that support the goto-line flag (code/cursor/subl) get
562
+ // `-g file:line:col`; everything else falls back to opening the file alone.
563
+ const { path: p, line, col, editor } = schemas.openFileInEditor.parse(payload);
564
+ const home = os.homedir();
565
+ const abs = path.isAbsolute(p) ? p : path.resolve(home, p);
566
+ const err = checkInsideHome(abs);
567
+ if (err) throw new Error(err);
568
+ try { await fsp.access(abs); } catch { return { ok: false, error: `file not found: ${abs}` }; }
569
+ const candidates = (editor && editor !== 'auto')
570
+ ? [editor]
571
+ : [process.env.VISUAL, process.env.EDITOR, 'code', 'cursor', 'subl', 'nano'].filter(Boolean);
572
+ for (const cmd of candidates) {
573
+ if (!findCommand(cmd)) continue;
574
+ const supportsGoto = /^(code|cursor|subl)$/.test(cmd);
575
+ const target = (supportsGoto && line) ? `${abs}:${line}${col ? `:${col}` : ''}` : abs;
576
+ const args = supportsGoto ? ['-g', target] : [abs];
577
+ const child = spawn(cmd, args, { detached: true, stdio: 'ignore', env: cleanChildEnv() });
578
+ child.unref();
579
+ return { ok: true, editor: cmd };
580
+ }
581
+ return { ok: false, error: 'no editor found' };
582
+ });
583
+
525
584
  ipcMain.handle('app:open-in-finder', async (_e, payload) => {
526
585
  const { cwd } = schemas.openInFinder.parse(payload);
527
586
  const err = checkInsideHome(cwd);
@@ -577,6 +636,12 @@ queueOps.registerQueueOpsHandlers();
577
636
  registerHistoryAggregatorHandlers();
578
637
  pluginInstall.registerPluginInstallHandlers();
579
638
  memoryTool.registerMemoryHandlers();
639
+ agentMemory.registerAgentMemoryHandlers();
640
+ registerDocEditorHandlers();
641
+ git.register(ipcMain);
642
+ superagent.registerSuperAgentHandlers();
643
+ registerProjectSkillsHandlers();
644
+ filesIpc.registerFilesHandlers();
580
645
 
581
646
  // OTEL telemetry export (opt-in via ~/.config/session-manager/otel.json).
582
647
  ipcMain.handle('otel:get-config', async () => otelSettings.load());
@@ -788,6 +853,7 @@ app.whenReady().then(async () => {
788
853
  scheduler.attachWindow(mainWindow);
789
854
  watchers.attachWindow(mainWindow);
790
855
  pluginInstall.attachWindow(mainWindow);
856
+ superagent.attachWindow(mainWindow);
791
857
  scheduler.init().catch((e) => {
792
858
  logs.writeLine({ scope: 'scheduler', level: 'error', message: 'init failed', meta: { error: e?.message } });
793
859
  });
@@ -135,6 +135,17 @@ const openInEditor = z.object({
135
135
  editor: z.string().max(256).nullable().optional(),
136
136
  });
137
137
 
138
+ const openExternal = z.object({
139
+ url: z.string().min(1).max(4096),
140
+ });
141
+
142
+ const openFileInEditor = z.object({
143
+ path: z.string().min(1).max(4096),
144
+ line: z.number().int().positive().optional(),
145
+ col: z.number().int().positive().optional(),
146
+ editor: z.string().max(256).nullable().optional(),
147
+ });
148
+
138
149
  const openInFinder = z.object({
139
150
  cwd: z.string().min(1).max(4096),
140
151
  });
@@ -201,6 +212,36 @@ const memoryCreate = z.object({
201
212
  description: z.string().max(2048).optional(),
202
213
  }).strict();
203
214
 
215
+ // ──────────────────────────────────────────── Per-subagent memory
216
+ // Distinct from the workspace-scoped Memory tool: agentMemory is keyed by
217
+ // subagent name (the .md filename in ~/.claude/agents/, e.g. "code-reviewer"),
218
+ // not by cwd. Storage lives at ~/.claude/session-manager/agent-memory/<agentId>.json.
219
+ // Regex caps must stay in lockstep with agentMemory.cjs AGENT_ID_RE / ENTRY_ID_RE.
220
+ const AGENT_MEMORY_ID_RE = /^[A-Za-z0-9._-]{1,128}$/;
221
+ const AGENT_MEMORY_CATEGORY = z.enum(['command', 'preference', 'pattern', 'failure', 'workflow']);
222
+ const AGENT_MEMORY_MAX_BODY = 1024 * 1024; // 1 MiB — must match MAX_BODY_BYTES in agentMemory.cjs
223
+
224
+ const agentMemoryList = z.object({
225
+ agentId: z.string().regex(AGENT_MEMORY_ID_RE),
226
+ }).strict();
227
+
228
+ const agentMemoryGet = z.object({
229
+ agentId: z.string().regex(AGENT_MEMORY_ID_RE),
230
+ entryId: z.string().regex(AGENT_MEMORY_ID_RE),
231
+ }).strict();
232
+
233
+ const agentMemorySet = z.object({
234
+ agentId: z.string().regex(AGENT_MEMORY_ID_RE),
235
+ entryId: z.string().regex(AGENT_MEMORY_ID_RE),
236
+ body: z.string().max(AGENT_MEMORY_MAX_BODY),
237
+ category: AGENT_MEMORY_CATEGORY.optional(),
238
+ }).strict();
239
+
240
+ const agentMemoryDelete = z.object({
241
+ agentId: z.string().regex(AGENT_MEMORY_ID_RE),
242
+ entryId: z.string().regex(AGENT_MEMORY_ID_RE),
243
+ }).strict();
244
+
204
245
  // ──────────────────────────────────────────── History
205
246
  const DATE_YYYY_MM_DD = /^\d{4}-\d{2}-\d{2}$/;
206
247
 
@@ -272,6 +313,16 @@ const appGitBranch = z.object({
272
313
  cwd: z.string().min(1).max(4096),
273
314
  }).passthrough();
274
315
 
316
+ // git:status / git:file-status — see src/main/git.cjs. cwd is validatePath'd
317
+ // inside the handler (allowedRoots = home), so the schema only enforces shape.
318
+ const gitStatus = z.object({
319
+ cwd: z.string().min(1).max(4096),
320
+ }).passthrough();
321
+
322
+ const gitFileStatus = z.object({
323
+ cwd: z.string().min(1).max(4096),
324
+ }).passthrough();
325
+
275
326
  // Plugin install: mirrors pluginInstall.cjs SLUG_RE + length cap. Defense in
276
327
  // depth — install() re-checks; the schema rejects earlier.
277
328
  const PLUGIN_SLUG_RE = /^[a-z0-9\-/]+$/;
@@ -279,6 +330,20 @@ const pluginsInstall = z.object({
279
330
  slug: z.string().regex(PLUGIN_SLUG_RE).min(1).max(128),
280
331
  }).passthrough();
281
332
 
333
+ // SuperAgent — "boss" run that writes a structured prompt to the active
334
+ // tab's PTY. Bounds match the inline schemas in superagent.cjs; centralizing
335
+ // here so the schema is the boundary fence rather than each handler.
336
+ const superagentStart = z.object({
337
+ tabId: z.string().min(1).max(128),
338
+ prompt: z.string().min(1).max(8 * 1024),
339
+ specialistCount: z.number().int().min(1).max(8),
340
+ depth: z.enum(['quick', 'standard', 'deep']),
341
+ }).strict();
342
+
343
+ const superagentTabId = z.object({
344
+ tabId: z.string().min(1).max(128),
345
+ }).strict();
346
+
282
347
  /**
283
348
  * Wrap an IPC handler with schema validation. Returns a new handler that
284
349
  * parses the payload before calling the original. On invalid payload throws
@@ -319,6 +384,8 @@ module.exports = {
319
384
  scheduleRetagPrd,
320
385
  setConfigSchema,
321
386
  openInEditor,
387
+ openExternal,
388
+ openFileInEditor,
322
389
  openInFinder,
323
390
  openInTerminal,
324
391
  archiveProject,
@@ -329,12 +396,20 @@ module.exports = {
329
396
  voiceSetRecording,
330
397
  appTestFireHook,
331
398
  appGitBranch,
399
+ gitStatus,
400
+ gitFileStatus,
332
401
  pluginsInstall,
402
+ superagentStart,
403
+ superagentTabId,
333
404
  memoryList,
334
405
  memoryRead,
335
406
  memoryWrite,
336
407
  memoryDelete,
337
408
  memoryCreate,
409
+ agentMemoryList,
410
+ agentMemoryGet,
411
+ agentMemorySet,
412
+ agentMemoryDelete,
338
413
  watchersAdd,
339
414
  watchersList,
340
415
  watchersRemove,
@@ -0,0 +1,124 @@
1
+ /**
2
+ * ProjectSkills — per-project skill enable/disable state.
3
+ *
4
+ * Storage: <cwd>/.claude/project-skills.json
5
+ * Format: { skills: Array<{ skillId: string; enabled: boolean }>, schemaVersion: 1 }
6
+ *
7
+ * Reads enumerate all skills under <cwd>/.claude/skills/ and <home>/.claude/skills/
8
+ * and merge their enable state from the project-local config. Skills not listed in
9
+ * the JSON default to `enabled: true` (i.e., opt-out per project).
10
+ *
11
+ * IPC:
12
+ * - project-skills:get(cwd) -> SkillState[]
13
+ * - project-skills:set(cwd, skillId, enabled) -> { ok: boolean }
14
+ *
15
+ * Atomic writes go through config.cjs::writeJson. cwd is validated via
16
+ * validatePath which constrains it to allowedRoots (home + registered project
17
+ * dirs).
18
+ */
19
+
20
+ const { ipcMain } = require('electron');
21
+ const path = require('node:path');
22
+ const { z } = require('zod');
23
+ const { readJson, writeJson, addAllowedRoot } = require('./config.cjs');
24
+
25
+ const SCHEMA_VERSION = 1;
26
+
27
+ function projectSkillsPath(cwd) {
28
+ return path.join(cwd, '.claude', 'project-skills.json');
29
+ }
30
+
31
+ /**
32
+ * Load the project-skills.json record for a cwd. Missing file => empty record.
33
+ * Returns { skills: Array<{skillId, enabled}>, schemaVersion }.
34
+ */
35
+ async function loadRecord(cwd) {
36
+ const filePath = projectSkillsPath(cwd);
37
+ const r = await readJson(filePath);
38
+ if (!r.exists || !r.data || typeof r.data !== 'object') {
39
+ return { skills: [], schemaVersion: SCHEMA_VERSION };
40
+ }
41
+ const data = r.data;
42
+ const skills = Array.isArray(data.skills) ? data.skills : [];
43
+ // Filter for well-formed entries; tolerate corruption silently.
44
+ const clean = [];
45
+ const seen = new Set();
46
+ for (const s of skills) {
47
+ if (!s || typeof s.skillId !== 'string' || typeof s.enabled !== 'boolean') continue;
48
+ if (seen.has(s.skillId)) continue;
49
+ seen.add(s.skillId);
50
+ clean.push({ skillId: s.skillId, enabled: s.enabled });
51
+ }
52
+ return { skills: clean, schemaVersion: SCHEMA_VERSION };
53
+ }
54
+
55
+ async function saveRecord(cwd, record) {
56
+ const filePath = projectSkillsPath(cwd);
57
+ const payload = {
58
+ schemaVersion: SCHEMA_VERSION,
59
+ skills: record.skills,
60
+ savedAt: Date.now(),
61
+ };
62
+ return writeJson(filePath, payload);
63
+ }
64
+
65
+ /** Return the array of skill enable-states for a project cwd. */
66
+ async function getProjectSkills(cwd) {
67
+ // Register cwd so writeJson is permitted under <cwd>/.claude.
68
+ addAllowedRoot(cwd);
69
+ const record = await loadRecord(cwd);
70
+ return record.skills;
71
+ }
72
+
73
+ /** Upsert a single skillId's enabled flag. */
74
+ async function setProjectSkill(cwd, skillId, enabled) {
75
+ addAllowedRoot(cwd);
76
+ const record = await loadRecord(cwd);
77
+ const idx = record.skills.findIndex((s) => s.skillId === skillId);
78
+ if (idx >= 0) {
79
+ record.skills[idx] = { skillId, enabled };
80
+ } else {
81
+ record.skills.push({ skillId, enabled });
82
+ }
83
+ await saveRecord(cwd, record);
84
+ return { ok: true };
85
+ }
86
+
87
+ // ──────────────────────────────────────────── IPC schemas
88
+ const projectSkillsCwd = z.object({
89
+ cwd: z.string().min(1).max(4096),
90
+ });
91
+
92
+ const projectSkillsSet = z.object({
93
+ cwd: z.string().min(1).max(4096),
94
+ skillId: z.string().min(1).max(256),
95
+ enabled: z.boolean(),
96
+ });
97
+
98
+ function validated(schema, handler) {
99
+ return (_event, payload) => {
100
+ const parsed = schema.parse(payload);
101
+ return handler(parsed);
102
+ };
103
+ }
104
+
105
+ function registerProjectSkillsHandlers() {
106
+ ipcMain.handle(
107
+ 'project-skills:get',
108
+ validated(projectSkillsCwd, ({ cwd }) => getProjectSkills(cwd)),
109
+ );
110
+ ipcMain.handle(
111
+ 'project-skills:set',
112
+ validated(projectSkillsSet, ({ cwd, skillId, enabled }) =>
113
+ setProjectSkill(cwd, skillId, enabled),
114
+ ),
115
+ );
116
+ }
117
+
118
+ module.exports = {
119
+ registerProjectSkillsHandlers,
120
+ // Exported for tests / direct use.
121
+ getProjectSkills,
122
+ setProjectSkill,
123
+ projectSkillsPath,
124
+ };
@@ -280,6 +280,81 @@ async function listPrdFiles() {
280
280
  return prdParser.listPrdFiles(PRDS_DIR);
281
281
  }
282
282
 
283
+ /**
284
+ * Best-effort kill of a child claude PID that the previous app instance spawned
285
+ * but never reaped. Used by init() to clean up the orphan tree on boot.
286
+ *
287
+ * Safety:
288
+ * - PID-recycling: between app death and this call, another process may have
289
+ * reused the PID. We read /proc/<pid>/cmdline (Linux) or `ps -p` (macOS)
290
+ * and only SIGTERM if the cmdline starts with the claude bin path.
291
+ * - Detached process group: jobs are spawned with detached:true so we kill
292
+ * -pid (the group). If the group leader is already gone, that fails
293
+ * silently and we fall back to single-pid kill.
294
+ * - Returns synchronously after issuing SIGTERM; a 5s SIGKILL follow-up is
295
+ * scheduled via setTimeout to clean up any process ignoring SIGTERM.
296
+ *
297
+ * Returns: 'killed' (cmdline matched + signal sent), 'gone' (pid not alive),
298
+ * 'mismatch' (pid alive but cmdline doesn't look like claude),
299
+ * 'unknown' (couldn't read cmdline — leave the pid alone).
300
+ */
301
+ function killOrphanClaudePid(pid) {
302
+ if (!pid || typeof pid !== 'number' || pid <= 1) return 'gone';
303
+ try { process.kill(pid, 0); } catch { return 'gone'; }
304
+ let cmdline = '';
305
+ try {
306
+ cmdline = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf8').replace(/\0/g, ' ');
307
+ } catch {
308
+ try {
309
+ const out = require('node:child_process').execSync(`ps -p ${pid} -o command=`, { encoding: 'utf8', stdio: ['ignore','pipe','ignore'] });
310
+ cmdline = out.trim();
311
+ } catch { return 'unknown'; }
312
+ }
313
+ if (!/\bclaude\b/.test(cmdline)) return 'mismatch';
314
+ try { process.kill(-pid, 'SIGTERM'); }
315
+ catch { try { process.kill(pid, 'SIGTERM'); } catch { /* race: died between checks */ } }
316
+ setTimeout(() => {
317
+ try { process.kill(pid, 0); } catch { return; /* already gone */ }
318
+ try { process.kill(-pid, 'SIGKILL'); }
319
+ catch { try { process.kill(pid, 'SIGKILL'); } catch { /* race */ } }
320
+ }, 5000).unref?.();
321
+ return 'killed';
322
+ }
323
+
324
+ /**
325
+ * Validate that a string is safe to pass as a child_process.spawn argv element.
326
+ *
327
+ * Node.js rejects argv strings containing NUL bytes with a cryptic error:
328
+ * "The argument 'args[1]' must be a string without null bytes. Received '...'"
329
+ *
330
+ * The error message truncates the offending string at ~120 chars, so when it
331
+ * surfaces in the queue.json `error` field the user has no way to find the
332
+ * actual byte. The real incident (2026-05-21, PRD 03-doc-editor-foundation)
333
+ * was a single NUL inside backtick code-fence in the PRD body. Total wall-clock
334
+ * to diagnose: ~30min. This validator catches it pre-spawn and reports the
335
+ * file + offset + surrounding context.
336
+ *
337
+ * Also flags other ASCII control bytes (< 0x20 except TAB/LF/CR), since they
338
+ * are virtually always a typo or copy-paste artifact in a markdown PRD body
339
+ * and may cause subtle issues in claude's prompt tokenizer.
340
+ */
341
+ function validatePromptForSpawn(body, srcLabel) {
342
+ for (let i = 0; i < body.length; i++) {
343
+ const code = body.charCodeAt(i);
344
+ if (code < 0x20 && code !== 0x09 && code !== 0x0A && code !== 0x0D) {
345
+ const start = Math.max(0, i - 20);
346
+ const end = Math.min(body.length, i + 20);
347
+ const ctx = body.slice(start, end).replace(/[\x00-\x1F]/g, (c) => `\\x${c.charCodeAt(0).toString(16).padStart(2, '0')}`);
348
+ const hex = code.toString(16).padStart(2, '0');
349
+ return {
350
+ ok: false,
351
+ error: `PRD body contains control char 0x${hex} at byte offset ${i} in ${srcLabel} (context: "${ctx}"). child_process.spawn would reject this with a truncated error message; remove the control char and re-queue.`,
352
+ };
353
+ }
354
+ }
355
+ return { ok: true };
356
+ }
357
+
283
358
  // ---------- queue reconciliation ----------
284
359
 
285
360
  /**
@@ -607,8 +682,9 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
607
682
 
608
683
  // Read full PRD body fresh from disk (queue stored only the preview).
609
684
  let prompt;
685
+ const prdPath = path.join(PRDS_DIR, `${job.slug}.md`);
610
686
  try {
611
- const parsed = await parsePrd(path.join(PRDS_DIR, `${job.slug}.md`));
687
+ const parsed = await parsePrd(prdPath);
612
688
  prompt = parsed.body;
613
689
  } catch (e) {
614
690
  safeLog(`[scheduler] failed to read PRD: ${e?.message}\n`);
@@ -616,6 +692,14 @@ async function executeJob(job, runDir, defaultCwd, onPid) {
616
692
  return { exitCode: -1, durationMs: 0, error: e?.message };
617
693
  }
618
694
 
695
+ const promptCheck = validatePromptForSpawn(prompt, prdPath);
696
+ if (!promptCheck.ok) {
697
+ safeLog(`[scheduler] ${promptCheck.error}\n`);
698
+ closeFd();
699
+ atomicWriteJsonSync(metaPath, { slug: job.slug, cwd, sessionId, exitCode: -1, error: promptCheck.error, startedAt, finishedAt: Date.now(), durationMs: 0 });
700
+ return { exitCode: -1, durationMs: 0, error: promptCheck.error, sessionId };
701
+ }
702
+
619
703
  return await new Promise((resolve) => {
620
704
  const claudeBin = resolveClaudeBin();
621
705
  // Strip Claude Code env and secrets that leak in when session-manager is
@@ -822,6 +906,28 @@ function pickNextBatch(allJobs, running, cap) {
822
906
  const pending = allJobs.filter((j) => j.status === 'pending' && !running.has(j.slug));
823
907
  if (pending.length === 0) return [];
824
908
 
909
+ // Lowest pending group (computed up-front so the failure gate can compare).
910
+ const lowestPendingGroup = pending.reduce(
911
+ (min, j) => Math.min(min, j.parallelGroup ?? 99),
912
+ Infinity,
913
+ );
914
+
915
+ // Cross-group failure gate: refuse to advance past a group with failed jobs.
916
+ // Without this, a failed foundation PRD (e.g. 03-doc-editor-foundation
917
+ // crashed with a NUL-byte spawn error on 2026-05-21) doesn't stop later
918
+ // groups (04, 05, 06...) from running and silently corrupting the project
919
+ // state. The user can re-queue the failed job (pending) or archive it to
920
+ // unblock the gate, but the default is to halt until the failure is
921
+ // acknowledged.
922
+ const blockingFailures = allJobs.filter((j) =>
923
+ j.status === 'failed' && (j.parallelGroup ?? 99) < lowestPendingGroup,
924
+ );
925
+ if (blockingFailures.length > 0) {
926
+ const slugs = blockingFailures.map((j) => j.slug).join(', ');
927
+ console.log(`[scheduler] failure-gate: holding g${lowestPendingGroup} — ${blockingFailures.length} failed job(s) in earlier groups [${slugs}]. Reset to pending or archive to unblock.`);
928
+ return [];
929
+ }
930
+
825
931
  // Groups with at least one job in flight: either tracked in runningSet
826
932
  // (this process spawned it) or still marked 'running' in queue.json
827
933
  // (persisted from a previous session that hasn't been orphan-reset yet).
@@ -839,11 +945,7 @@ function pickNextBatch(allJobs, running, cap) {
839
945
  const queueRunningCount = allJobs.filter((j) => j.status === 'running').length;
840
946
  const effectiveRunning = Math.max(running.size, queueRunningCount);
841
947
 
842
- // Lowest pending group.
843
- const lowestPendingGroup = pending.reduce(
844
- (min, j) => Math.min(min, j.parallelGroup ?? 99),
845
- Infinity,
846
- );
948
+ // (lowestPendingGroup was computed up-front for the failure-gate check.)
847
949
 
848
950
  if (activeGroups.size > 0) {
849
951
  const lowestActive = Math.min(...activeGroups);
@@ -1002,6 +1104,12 @@ DO NOT attempt the fix. ONLY write the file. When the file exists, exit immediat
1002
1104
 
1003
1105
  const claudeBin = resolveClaudeBin();
1004
1106
  const childEnv = cleanChildEnv();
1107
+ const investigationPromptCheck = validatePromptForSpawn(prompt, `<investigation prompt for ${failedJob.slug}>`);
1108
+ if (!investigationPromptCheck.ok) {
1109
+ try { fs.writeSync(fd, `\n[scheduler] ${investigationPromptCheck.error}\n`); } catch { /* */ }
1110
+ try { fs.closeSync(fd); } catch { /* */ }
1111
+ return;
1112
+ }
1005
1113
  let child;
1006
1114
  try {
1007
1115
  child = spawn(claudeBin, [
@@ -1103,9 +1211,33 @@ async function spawnJob(job, runId, runDir, defaultCwd) {
1103
1211
  await broadcast();
1104
1212
 
1105
1213
  if (actuallyFailed && failedJobSnapshot) {
1106
- spawnInvestigation(failedJobSnapshot, runDir).catch((e) => {
1107
- console.error('[scheduler] spawnInvestigation error', job.slug, e);
1108
- });
1214
+ // Transient-failure detector: SIGTERM/SIGKILL within 30s = almost
1215
+ // always external kill (user-initiated app restart, OOM-kill, manual
1216
+ // process kill). The PRD itself didn't fail; the run was interrupted
1217
+ // before it could do meaningful work. Spawning an Opus investigator on
1218
+ // these is wasted tokens AND pollutes the queue with redundant fix-PRDs
1219
+ // (real example 2026-05-21: 07-agent-view-robot-rename-lasttool got
1220
+ // SIGTERMed at 10s by an app restart, the rename had already been done
1221
+ // anyway, and the auto-generated fix-PRD just sat in queue.json as
1222
+ // noise). Auto-retry up to 2x before falling through to investigation.
1223
+ const ec = failedJobSnapshot.exitCode;
1224
+ const transient = (ec === 143 || ec === 137) && res.durationMs < 30_000;
1225
+ const retries = failedJobSnapshot.transientRetries ?? 0;
1226
+ if (transient && retries < 2) {
1227
+ console.log(`[scheduler] transient failure (exit=${ec} dur=${res.durationMs}ms) — auto-retry ${retries + 1}/2 for ${job.slug}`);
1228
+ await mutate((s) => {
1229
+ const i = s.jobs.findIndex((x) => x.slug === job.slug);
1230
+ if (i >= 0) {
1231
+ resetJobFields(s.jobs[i], null);
1232
+ s.jobs[i].transientRetries = retries + 1;
1233
+ }
1234
+ });
1235
+ await broadcast();
1236
+ } else {
1237
+ spawnInvestigation(failedJobSnapshot, runDir).catch((e) => {
1238
+ console.error('[scheduler] spawnInvestigation error', job.slug, e);
1239
+ });
1240
+ }
1109
1241
  }
1110
1242
  } catch (e) {
1111
1243
  console.error('[scheduler] spawnJob error', job.slug, e);
@@ -1529,12 +1661,24 @@ async function init() {
1529
1661
  bootedAt = Date.now();
1530
1662
 
1531
1663
  // Boot reconciliation: mark any job that was 'running' when the app died as
1532
- // 'failed'. mutate() creates queue.json from defaults if it doesn't exist.
1664
+ // 'failed', AND kill its detached claude child if still alive. Without the
1665
+ // kill step the child keeps running as a zombie writing to the project on
1666
+ // its own schedule, which is exactly what happened on 2026-05-21 (PID 78230
1667
+ // writing PRD 05's output while the scheduler thought the job was orphaned).
1533
1668
  await mutate((state) => {
1534
1669
  for (const j of state.jobs) {
1535
1670
  if (j.status === 'running') {
1671
+ const pid = j.runtime?.pid;
1672
+ let killNote = '';
1673
+ if (pid) {
1674
+ const result = killOrphanClaudePid(pid);
1675
+ killNote = ` (orphan pid=${pid}: ${result})`;
1676
+ if (result === 'killed') {
1677
+ console.log(`[scheduler] boot: SIGTERM'd orphan claude pid=${pid} for ${j.slug}`);
1678
+ }
1679
+ }
1536
1680
  j.status = 'failed';
1537
- j.error = 'orphaned: app restarted while running';
1681
+ j.error = `orphaned: app restarted while running${killNote}`;
1538
1682
  j.finishedAt = new Date().toISOString();
1539
1683
  delete j.runtime;
1540
1684
  }