npm - claude-code-session-manager - Versions diffs - 0.21.2 → 0.21.4 - Mend

claude-code-session-manager 0.21.2 → 0.21.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/bin/cli.cjs +5 -0
package/dist/assets/{TiptapBody-CepFtp62.js → TiptapBody-CZLSQ6pj.js} +2 -2
package/dist/assets/cssMode-DfqZGMQs.js +1 -0
package/dist/assets/{freemarker2-DqQlU_4i.js → freemarker2-XTPYh37h.js} +1 -1
package/dist/assets/handlebars-DKUF5VyH.js +1 -0
package/dist/assets/html-uqoqsIeI.js +1 -0
package/dist/assets/htmlMode-aMTQs1su.js +1 -0
package/dist/assets/index-BUrrcj7x.js +3525 -0
package/dist/assets/index-DeQI4oVI.css +32 -0
package/dist/assets/javascript-BVxRZMds.js +1 -0
package/dist/assets/{jsonMode-CFEryxme.js → jsonMode-D04xP2s5.js} +4 -4
package/dist/assets/liquid-BkQHTH2P.js +1 -0
package/dist/assets/lspLanguageFeatures-By9uLznH.js +4 -0
package/dist/assets/mdx-Du1IlbjV.js +1 -0
package/dist/assets/{index-CrE67_1W.css → monaco-editor-BTnBOi8r.css} +1 -32
package/dist/assets/monaco-editor-BW5C4Iv1.js +908 -0
package/dist/assets/python-DSlImqXd.js +1 -0
package/dist/assets/razor-BmUVyvSK.js +1 -0
package/dist/assets/{tsMode-CNLm8WAZ.js → tsMode-Btj0TTH7.js} +1 -1
package/dist/assets/typescript-Bzelq9vO.js +1 -0
package/dist/assets/xml-Whd9EaSd.js +1 -0
package/dist/assets/yaml-QYf0-IN8.js +1 -0
package/dist/index.html +4 -2
package/package.json +1 -1
package/src/main/__tests__/runVerify.test.cjs +138 -0
package/src/main/config.cjs +36 -4
package/src/main/historyAggregator.cjs +400 -149
package/src/main/index.cjs +8 -0
package/src/main/ipcSchemas.cjs +42 -13
package/src/main/kg.cjs +87 -30
package/src/main/lib/credentials.cjs +7 -0
package/src/main/lib/e2eStateMachine.cjs +39 -0
package/src/main/runVerify.cjs +51 -5
package/src/main/scheduler/prdParser.cjs +16 -1
package/src/main/scheduler.cjs +171 -13
package/src/main/transcripts.cjs +141 -19
package/src/main/usageMatrix.cjs +7 -3
package/src/main/webRemote.cjs +196 -31
package/src/preload/api.d.ts +40 -0
package/src/preload/index.cjs +7 -0
package/dist/assets/cssMode-8hR_Zezu.js +0 -1
package/dist/assets/handlebars-Ts2NzFcS.js +0 -1
package/dist/assets/html-QjLxt2p6.js +0 -1
package/dist/assets/htmlMode-Dst38sy3.js +0 -1
package/dist/assets/index-XKsJ4Pk3.js +0 -4431
package/dist/assets/javascript-CNxLjNGz.js +0 -1
package/dist/assets/liquid-BBfKLTB_.js +0 -1
package/dist/assets/lspLanguageFeatures-BNyh7ouG.js +0 -4
package/dist/assets/mdx-SaTyS1xC.js +0 -1
package/dist/assets/python-C84TNhMd.js +0 -1
package/dist/assets/razor-BaVJM3L8.js +0 -1
package/dist/assets/typescript-BdrDpzPy.js +0 -1
package/dist/assets/xml-CHJ3Xjjj.js +0 -1
package/dist/assets/yaml-Cg2-K8t3.js +0 -1

package/src/main/ipcSchemas.cjs CHANGED Viewed

@@ -394,35 +394,64 @@ function validated(schema, handler) {
 }
 // ──────────────────────────────────────────── Web Remote command allowlist
-// Single source of truth — imported by webRemote.cjs and by the unit test.
-// Only these type strings will ever reach a handler; all others are silently
-// dropped without leaking error details back to the relay (ADR §6.2).
-const ALLOWED_COMMANDS = new Set([
+// Commands are split into three tiers:
+//   READ_COMMANDS      — return data; allowed when remoteEnabled=true.
+//   SAS_GATED_READS    — return sensitive user data (sessions, PRDs, logs,
+//                        transcript summaries); additionally require
+//                        _e2eAuthenticated=true (SAS confirmed by user).
+//                        A compromised relay cannot exfiltrate this data from
+//                        a session that has not been SAS-confirmed.
+//   MUTATE_COMMANDS    — write files, spawn processes, or mutate persisted
+//                        state; gated behind remoteControlEnabled=true AND
+//                        _e2eAuthenticated=true.
+// ALLOWED_COMMANDS is the union, kept for existing import compatibility.
+//
+// Ungated READ_COMMANDS (justify each):
+//   cmd:app:version      — exposes only the app semver string; no user data.
+//   cmd:session:unsubscribe — teardown lifecycle; returns nothing sensitive.
+const READ_COMMANDS = new Set([
+  'cmd:app:version',
+  // v2 mobile: unsubscribe is a teardown lifecycle call with no data payload.
+  'cmd:session:unsubscribe',
+]);
+// Sensitive reads — return user data; require SAS confirmation same as MUTATE.
+const SAS_GATED_READS = new Set([
   'cmd:sessions:load',
+  'cmd:schedule:state',
+  'cmd:schedule:read-prd',
+  'cmd:schedule:read-log',
+  'cmd:history:aggregate',
+  // subscribe initiates a live stream of session state/summary — sensitive.
+  'cmd:session:subscribe',
+]);
+const MUTATE_COMMANDS = new Set([
   'cmd:sessions:save',
   'cmd:pty:spawn',
   'cmd:pty:write',
-  'cmd:pty:resize',
+  // pty:kill terminates a live session; pty:resize drives the geometry of the
+  // user's interactive PTY — both write live process state, so they are gated
+  // behind remoteControlEnabled + SAS like every other mutation. A read-only
+  // mobile mirror has no business killing or resizing the desktop's session.
   'cmd:pty:kill',
-  'cmd:schedule:state',
-  'cmd:schedule:read-prd',
-  'cmd:schedule:read-log',
+  'cmd:pty:resize',
   'cmd:schedule:write-prd',
   'cmd:schedule:reset-job',
   'cmd:schedule:run-now',
   'cmd:schedule:set-config',
-  'cmd:history:aggregate',
-  'cmd:app:version',
-  // v2 mobile: per-session live state + summary push (ARCHITECTURE-V2-MOBILE.md §3)
-  'cmd:session:subscribe',
-  'cmd:session:unsubscribe',
 ]);
+const ALLOWED_COMMANDS = new Set([...READ_COMMANDS, ...SAS_GATED_READS, ...MUTATE_COMMANDS]);
 module.exports = {
   // Centralized slug regex — used by scheduler.cjs and queueOps.cjs for
   // direct test()/match() containment checks alongside the zod parses.
   SCHEDULE_SLUG_RE,
   SCHEDULE_RUN_ID_RE,
+  READ_COMMANDS,
+  SAS_GATED_READS,
+  MUTATE_COMMANDS,
   ALLOWED_COMMANDS,
   schemas: {
     webRemotePair,

package/src/main/kg.cjs CHANGED Viewed

@@ -39,16 +39,24 @@ const path = require('node:path');
 const os = require('node:os');
 const { resolveClaudeBin } = require('./lib/claudeBin.cjs');
 const { encodeCwd } = require('./lib/encodeCwd.cjs');
+const { writeJson } = require('./config.cjs');
 const HOME = os.homedir();
 const KG_DIR = path.join(HOME, '.claude', 'knowledge-log');
 const LOG_PATH = path.join(KG_DIR, 'prompts.jsonl');
 const GRAPHS_DIR = path.join(KG_DIR, 'graphs');
 const INGEST_STATE_PATH = path.join(KG_DIR, 'ingest-state.json');
+const PROMPT_INDEX_PATH = path.join(KG_DIR, 'prompt-index.json');
 const BATCH = 20;                 // prompts per extraction call (also a per-project cap)
 const KNOWN_VOCAB = 200;          // top node names pre-seeded for dedup-at-extraction
 const MAX_TAIL_BYTES = 8 * 1024 * 1024;   // bound bytes scanned per ingest run
 const MAX_EXTRACTIONS_PER_RUN = 30;       // bound claude calls per run (cost/time)
+// Coalescing window before an auto-ingest after new prompts land. Units never
+// mix projects, and a project switch in the log closes the current batch — so
+// with concurrent sessions a short window yields 1-2-prompt batches and one
+// claude spawn each (~1.2K extraction runs in one 48h period). A long window
+// lets prompts accumulate into fuller batches; the KG tab tolerates the lag.
+const WATCH_COALESCE_MS = 5 * 60_000;
 const ENTITY_TYPES = ['project', 'feature', 'tool', 'tech', 'concept', 'goal', 'person'];
@@ -137,11 +145,7 @@ async function loadGraphFor(cwd) {
 }
 async function saveGraph(g) {
-  await fsp.mkdir(GRAPHS_DIR, { recursive: true });
-  const p = graphPath(g.cwd);
-  const tmp = `${p}.tmp`;
-  await fsp.writeFile(tmp, JSON.stringify(g, null, 2));
-  await fsp.rename(tmp, p);   // atomic
+  await writeJson(graphPath(g.cwd), g);
 }
 async function loadIngestState() {
@@ -152,10 +156,20 @@ async function loadIngestState() {
 }
 async function saveIngestState(s) {
-  await fsp.mkdir(KG_DIR, { recursive: true });
-  const tmp = `${INGEST_STATE_PATH}.tmp`;
-  await fsp.writeFile(tmp, JSON.stringify(s, null, 2));
-  await fsp.rename(tmp, INGEST_STATE_PATH);
+  await writeJson(INGEST_STATE_PATH, s);
+}
+/**
+ * Per-project prompt-count sidecar: { [encodedCwd]: { count: number, cwd: string } }
+ * Returns null when the file does not yet exist (triggers a one-time migration scan).
+ */
+async function readPromptIndex() {
+  try { return JSON.parse(await fsp.readFile(PROMPT_INDEX_PATH, 'utf8')); }
+  catch { return null; }
+}
+async function savePromptIndex(idx) {
+  await writeJson(PROMPT_INDEX_PATH, idx);
 }
 /** Canonical dedup key: lowercase, strip leading article, collapse whitespace. */
@@ -337,6 +351,7 @@ async function ingest() {
   broadcast('kg:ingest-progress', { phase: 'start', ingesting: true });
   try {
     const st = await loadIngestState();
+    const promptIdx = await readPromptIndex() ?? {};
     let stat;
     try { stat = await fsp.stat(LOG_PATH); }
     catch { broadcast('kg:ingest-progress', { phase: 'done', ingesting: false, added: 0 }); return { ok: true, added: 0, note: 'no log yet' }; }
@@ -423,6 +438,14 @@ async function ingest() {
         st.lastOffset += u.bytes;
         st.lastTs = u.entries[u.entries.length - 1].ts || st.lastTs;
         st.updatedAt = new Date().toISOString();
+        // Write index before advancing watermark: if we crash between these two
+        // writes, the watermark hasn't moved so the batch will be re-processed
+        // (the index count may be slightly high) rather than advanced past a
+        // batch whose index entry was never written.
+        if (!promptIdx[u.enc]) promptIdx[u.enc] = { count: 0, cwd: u.cwd };
+        promptIdx[u.enc].count += u.entries.length;
+        promptIdx[u.enc].cwd = u.cwd;
+        await savePromptIndex(promptIdx);
         await saveIngestState(st);
         if (extractions >= MAX_EXTRACTIONS_PER_RUN) { capped = true; break; }
         continue;
@@ -435,12 +458,19 @@ async function ingest() {
       g.updatedAt = new Date().toISOString();
       // Commit this batch: graph first (so a crash can't advance the watermark
-      // past unsaved work), then the watermark.
+      // past unsaved work), then the watermark + sidecar index.
       await saveGraph(g);
       st.lastOffset += u.bytes;
       st.promptCount += u.entries.length;
       st.lastTs = batchTs;
       st.updatedAt = new Date().toISOString();
+      // Write index before advancing watermark so a crash between the two
+      // leaves the watermark un-advanced (re-processable) rather than
+      // advancing past a batch whose index entry was never committed.
+      if (!promptIdx[u.enc]) promptIdx[u.enc] = { count: 0, cwd: u.cwd };
+      promptIdx[u.enc].count += u.entries.length;
+      promptIdx[u.enc].cwd = u.cwd;
+      await savePromptIndex(promptIdx);
       await saveIngestState(st);
       committedPrompts += u.entries.length;
@@ -473,25 +503,29 @@ async function ingest() {
 /** Enumerate projects seen in the log, enriched with per-project graph stats. */
 async function listProjects() {
-  const prompts = await readAllPrompts();
-  const byEnc = new Map();
-  for (const p of prompts) {
-    if (!p.cwd) continue;
-    const enc = encodeCwd(p.cwd);
-    let e = byEnc.get(enc);
-    if (!e) { e = { cwd: p.cwd, enc, total: 0 }; byEnc.set(enc, e); }
-    e.total++;
-    e.cwd = p.cwd; // keep most recent spelling
+  let idx = await readPromptIndex();
+  if (idx === null) {
+    // One-time migration: build sidecar from the full log.
+    idx = {};
+    const prompts = await readAllPrompts();
+    for (const p of prompts) {
+      if (!p.cwd) continue;
+      const enc = encodeCwd(p.cwd);
+      if (!idx[enc]) idx[enc] = { count: 0, cwd: p.cwd };
+      idx[enc].count++;
+      idx[enc].cwd = p.cwd;
+    }
+    await savePromptIndex(idx).catch(() => {});
   }
   const out = [];
-  for (const e of byEnc.values()) {
-    const g = await loadGraphFor(e.cwd);
+  for (const [enc, entry] of Object.entries(idx)) {
+    const g = await loadGraphFor(entry.cwd);
     out.push({
-      cwd: e.cwd,
-      label: shortLabel(e.cwd),
-      total: e.total,
+      cwd: entry.cwd,
+      label: shortLabel(entry.cwd),
+      total: entry.count,
       processed: g.promptCount || 0,
-      pending: Math.max(0, e.total - (g.promptCount || 0)),
+      pending: Math.max(0, entry.count - (g.promptCount || 0)),
       nodes: g.nodes.length,
       edges: g.edges.length,
       lastIngest: g.updatedAt,
@@ -510,7 +544,24 @@ async function getState(cwd) {
   const target = cwd || await defaultCwd();
   const enc = encodeCwd(target);
   const g = await loadGraphFor(target);
-  const prompts = (await readAllPrompts()).filter((p) => encodeCwd(p.cwd) === enc);
+  let idx = await readPromptIndex();
+  let totalPrompts;
+  if (idx === null) {
+    // One-time migration fallback — build from full log.
+    idx = {};
+    const prompts = await readAllPrompts();
+    for (const p of prompts) {
+      if (!p.cwd) continue;
+      const e2 = encodeCwd(p.cwd);
+      if (!idx[e2]) idx[e2] = { count: 0, cwd: p.cwd };
+      idx[e2].count++;
+      idx[e2].cwd = p.cwd;
+    }
+    await savePromptIndex(idx).catch(() => {});
+    totalPrompts = idx[enc]?.count ?? 0;
+  } else {
+    totalPrompts = idx[enc]?.count ?? 0;
+  }
   return {
     cwd: target,
     label: shortLabel(target),
@@ -518,8 +569,8 @@ async function getState(cwd) {
     edges: g.edges,
     status: {
       promptCount: g.promptCount || 0,
-      totalPrompts: prompts.length,
-      pending: Math.max(0, prompts.length - (g.promptCount || 0)),
+      totalPrompts,
+      pending: Math.max(0, totalPrompts - (g.promptCount || 0)),
       lastIngest: g.updatedAt,
       ingesting,
       logPath: LOG_PATH,
@@ -584,8 +635,14 @@ function init(opts = {}) {
     fs.mkdirSync(KG_DIR, { recursive: true });
     fs.watch(KG_DIR, (_evt, file) => {
       if (file && file !== 'prompts.jsonl') return;
-      if (watchTimer) clearTimeout(watchTimer);
-      watchTimer = setTimeout(() => { ingest().catch(() => {}); }, 8_000);
+      // Leading-edge coalesce: first new prompt arms the timer; later prompts
+      // ride along instead of resetting it, so busy periods can't starve
+      // ingest and every run sees a full window's worth of prompts.
+      if (watchTimer) return;
+      watchTimer = setTimeout(() => {
+        watchTimer = null;
+        ingest().catch(() => {});
+      }, WATCH_COALESCE_MS);
     });
   } catch { /* watch is best-effort */ }
 }

package/src/main/lib/credentials.cjs CHANGED Viewed

@@ -168,6 +168,13 @@ async function refreshIfNeeded(forceRefresh = false) {
   }
   if (alreadyExpired) {
+    // Re-read from disk in case credentials were externally refreshed (e.g. via
+    // `claude login`) between our initial read and the failed OAuth attempt.
+    const recheckCr = await readCredentials();
+    if (recheckCr.kind === 'ok' && !isExpired(recheckCr.creds)) {
+      appendRefreshLog({ event: 'externally_refreshed_ok', recheckExpiresAt: recheckCr.creds.expiresAt ?? null });
+      return { kind: 'ok', creds: recheckCr.creds };
+    }
     const ms = expiresAtMs(creds);
     appendRefreshLog({ event: 'auth_failed_expired', expiredAtMs: ms });
     return {

package/src/main/lib/e2eStateMachine.cjs ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * Pure E2E session state machine for the web-remote relay.
+ * No Electron, no I/O — importable in unit tests.
+ *
+ * State transitions:
+ *   idle        → pending_sas   : successful deriveSessionKey + deriveSas
+ *   idle        → failed        : crypto derivation error
+ *   pending_sas → authenticated : user confirms SAS
+ *   pending_sas → failed        : deriveSas threw after sessionKey succeeded
+ *   any         → idle          : disconnect / reset
+ */
+/** @returns {{ state: string, sessionKey: Buffer|null, pendingSas: string|null }} */
+function makeState(state = 'idle', sessionKey = null, pendingSas = null) {
+  return { state, sessionKey, pendingSas };
+}
+/**
+ * Attempt to confirm the SAS.  Pure — does not mutate; returns the next state.
+ * @param {{ state: string, sessionKey: Buffer|null, pendingSas: string|null }} e2eState
+ * @returns {{ ok: boolean, error?: string, next: { state: string, sessionKey: Buffer|null, pendingSas: string|null } }}
+ */
+function confirmSas(e2eState) {
+  if (e2eState.state !== 'pending_sas') {
+    const errorMap = {
+      idle: 'no_e2e_session',
+      failed: 'e2e_failed',
+      authenticated: 'already_authenticated',
+    };
+    const error = errorMap[e2eState.state] ?? 'unexpected_state';
+    return { ok: false, error, next: e2eState };
+  }
+  return {
+    ok: true,
+    next: makeState('authenticated', e2eState.sessionKey, null),
+  };
+}
+module.exports = { makeState, confirmSas };

package/src/main/runVerify.cjs CHANGED Viewed

@@ -50,6 +50,19 @@ const VERDICTS_SCHEMA_VERSION = 1;
  *   2. Traceback + Error within 10 lines (Python exception)
  *   3. ModuleNotFoundError / ImportError (missing venv / broken deps)
  */
+/**
+ * True when a tool_result content is a Claude Code harness tool error rather
+ * than task output — emitted when the model calls a tool that doesn't exist or
+ * isn't allowed (e.g. `<tool_use_error>Error: No such tool available: bash`).
+ * The harness rejects the call; the model recovers by retrying with a valid
+ * tool. Never a task failure, so the verifier must not downgrade on it.
+ */
+function isHarnessToolError(content) {
+  if (typeof content !== 'string' || !content) return false;
+  return content.includes('<tool_use_error>')
+    || /\bNo such tool available\b/.test(content);
+}
 function detectPattern(content) {
   if (typeof content !== 'string' || !content) return null;
@@ -58,20 +71,24 @@ function detectPattern(content) {
     return { verdict: 'transcript_errors', pattern: 'FAIL/FATAL at line start' };
   }
-  // (2) Python Traceback + Error line within next 10 lines.
+  // (2) Python Traceback + exception line within next 10 lines. Both anchored
+  // to line starts: reviewer prose quoting "will crash with ImportError" or
+  // embedding "...Error:" mid-sentence must not match (feedback 2026-06-10-01).
   const lines = content.split('\n');
   for (let i = 0; i < lines.length; i++) {
-    if (lines[i].includes('Traceback (most recent call last):')) {
+    if (/^\s*Traceback \(most recent call last\):/.test(lines[i])) {
       for (let j = i + 1; j < Math.min(i + 11, lines.length); j++) {
-        if (lines[j].includes('Error:')) {
+        if (/^\s*[A-Za-z_][\w.]*(?:Error|Exception)\s*:/.test(lines[j])) {
           return { verdict: 'transcript_errors', pattern: 'Traceback + Error within 10 lines' };
         }
       }
     }
   }
-  // (3) Import / module errors (verification was skipped).
-  if (content.includes('ModuleNotFoundError') || content.includes('ImportError')) {
+  // (3) Import / module errors (verification was skipped). Line-anchored:
+  // real interpreter output starts the line with the exception name
+  // ("ModuleNotFoundError: No module named 'x'"); prose never does.
+  if (/^\s*(?:ModuleNotFoundError|ImportError)\s*(?::|$)/m.test(content)) {
     return { verdict: 'verify_unavailable', pattern: 'ModuleNotFoundError/ImportError' };
   }
@@ -195,6 +212,18 @@ function toolUseDesc(events, toolUseId) {
   return '';
 }
+/**
+ * Return the tool name of the tool_use that produced a given tool_result.
+ * Returns '' if not found.
+ */
+function toolUseName(events, toolUseId) {
+  if (!toolUseId) return '';
+  for (const ev of events) {
+    if (ev.kind === 'tool_use' && ev.toolUseId === toolUseId) return ev.toolName ?? '';
+  }
+  return '';
+}
 /**
  * Check whether the next ≤5 tool_use calls after `fromSeq` include a package
  * install command (pip install, pip3 install, uv sync, uv pip install).
@@ -456,6 +485,15 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
       const ev = events[i];
       if (ev.kind !== 'tool_result') continue;
+      // Harness tool errors (`<tool_use_error>…`) are emitted when the model
+      // requests a tool that isn't available — e.g. a wrong-case name like
+      // "bash" instead of "Bash", or a tool outside the allowlist. The harness
+      // rejects the call and the model retries with a valid tool; the task is
+      // unaffected. These are never task failures, so they are exempt from both
+      // the is_error scan and the content pattern scan (false-positive class
+      // seen in 58-web-remote-correctness-batch, 2026-06-10).
+      if (isHarnessToolError(ev.content)) continue;
       // is_error:true in the final 20% of the transcript.
       if (ev.isError && i >= last20pctStart) {
         const desc = toolUseDesc(events, ev.toolUseId);
@@ -471,6 +509,12 @@ async function verifyRun({ runDir, prdPath, queueEntry, allJobs = [] }) {
       if (!ev.content) continue;
+      // Subagent (Task) results are structured prose — review findings that
+      // *describe* exceptions ("will crash with ImportError") are the dominant
+      // false-positive source (feedback 2026-06-10-01). Real runtime errors
+      // surface through Bash/test tool_results, which are still scanned.
+      if (toolUseName(events, ev.toolUseId) === 'Task') continue;
       const hit = detectPattern(ev.content);
       if (!hit) continue;
@@ -520,6 +564,8 @@ module.exports = {
   verifyRun,
   // Exposed for unit tests.
   detectPattern,
+  isHarnessToolError,
+  toolUseName,
   extractSoakFromBody,
   parsePrdBodyDepFragments,
   checkDeps,

package/src/main/scheduler/prdParser.cjs CHANGED Viewed

@@ -15,9 +15,24 @@
 const fs = require('node:fs');
 const fsp = require('node:fs/promises');
+const os = require('node:os');
 const path = require('node:path');
 const { splitFrontmatter } = require('../lib/prdFrontmatter.cjs');
+/**
+ * Expand a PRD `cwd` value to an absolute path.
+ * - `~/...` or `~` alone → absolute under os.homedir()
+ * - Already-absolute paths pass through unchanged.
+ * - Bare relative paths → joined onto os.homedir().
+ * null/empty returns null (caller falls back to defaultCwd).
+ */
+function expandCwd(cwd) {
+  if (!cwd) return null;
+  if (cwd === '~' || cwd.startsWith('~/')) return path.join(os.homedir(), cwd.slice(1));
+  if (path.isAbsolute(cwd)) return cwd;
+  return path.join(os.homedir(), cwd);
+}
 // Hard cap to keep one malformed PRD (e.g. a binary blob accidentally renamed
 // .md) from wedging the main thread. PRDs are PRDs, not media files; 1 MB is
 // already ~25k lines and well beyond any legitimate authored doc.
@@ -46,7 +61,7 @@ async function parsePrdRaw(filePath) {
     slug: base,
     path: filePath,
     title: fm.title || base,
-    cwd: fm.cwd || null,
+    cwd: expandCwd(fm.cwd || null),
     estimateMinutes: fm.estimateMinutes ? Number(fm.estimateMinutes) || null : null,
     parallelGroup: (fm.parallelGroup ? Number(fm.parallelGroup) || null : null) ?? groupFromName ?? 99,
     body: body.trim(),