moflo 4.10.4 → 4.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/.claude/agents/analysis/analyze-code-quality.md +14 -0
  2. package/.claude/agents/analysis/code-analyzer.md +14 -0
  3. package/.claude/agents/architecture/system-design/arch-system-design.md +14 -0
  4. package/.claude/agents/base-template-generator.md +14 -0
  5. package/.claude/agents/core/coder.md +14 -0
  6. package/.claude/agents/core/planner.md +14 -0
  7. package/.claude/agents/core/researcher.md +14 -0
  8. package/.claude/agents/core/reviewer.md +14 -0
  9. package/.claude/agents/core/tester.md +14 -0
  10. package/.claude/agents/custom/test-long-runner.md +14 -0
  11. package/.claude/agents/development/dev-backend-api.md +14 -0
  12. package/.claude/agents/development/dev-database.md +13 -0
  13. package/.claude/agents/development/dev-frontend.md +13 -0
  14. package/.claude/agents/devops/ci-cd/ops-cicd-github.md +14 -0
  15. package/.claude/agents/documentation/api-docs/docs-api-openapi.md +14 -0
  16. package/.claude/agents/security/security-auditor.md +13 -0
  17. package/.claude/guidance/shipped/moflo-claude-swarm-cohesion.md +5 -3
  18. package/.claude/guidance/shipped/moflo-cli-reference.md +17 -31
  19. package/.claude/guidance/shipped/moflo-task-icons.md +10 -6
  20. package/.claude/guidance/shipped/moflo-yaml-reference.md +1 -1
  21. package/.claude/helpers/gate.cjs +101 -1
  22. package/.claude/helpers/subagent-bootstrap.json +1 -1
  23. package/.claude/helpers/subagent-start.cjs +1 -1
  24. package/bin/gate.cjs +101 -1
  25. package/bin/lib/daemon-recycler.mjs +203 -0
  26. package/bin/session-start-launcher.mjs +173 -77
  27. package/dist/src/cli/commands/daemon.js +43 -18
  28. package/dist/src/cli/commands/retire.js +22 -17
  29. package/dist/src/cli/init/helpers-generator.js +36 -1
  30. package/dist/src/cli/init/settings-generator.js +5 -2
  31. package/dist/src/cli/services/hook-block-hash.js +9 -3
  32. package/dist/src/cli/services/subagent-bootstrap.js +1 -1
  33. package/dist/src/cli/version.js +1 -1
  34. package/package.json +2 -2
  35. package/retired-files.json +305 -112
package/bin/gate.cjs CHANGED
@@ -82,6 +82,53 @@ var command = process.argv[2];
82
82
 
83
83
  var EXEMPT = ['.claude/', '.claude\\', 'CLAUDE.md', 'MEMORY.md', 'workflow-state', 'node_modules', 'moflo.yaml'];
84
84
  var DANGEROUS = ['rm -rf /', 'format c:', 'del /s /q c:\\', ':(){:|:&};:', 'mkfs.', '> /dev/sda'];
85
+
86
+ // #1132 — Bash memory-first gate.
87
+ //
88
+ // CREDIT: the legacy detector that marks the gate satisfied when Claude
89
+ // manually invokes a memory-search CLI (flo-search, the moflo MCP search via
90
+ // shell, etc.). Preserved verbatim from the pre-#1132 behaviour so existing
91
+ // recipes keep crediting the gate.
92
+ var CREDIT_MEMORY_SEARCH_RE = /semantic-search|memory search|memory retrieve|memory-search/;
93
+ // BLOCK: read-like Bash commands that bypass the existing check-before-read /
94
+ // check-before-scan gates by going through the shell. Anchored to the start of
95
+ // the line so subcommands inside pipelines or `npm install grep` don't trip.
96
+ // Covers POSIX read/search tools, Windows cmd `type`, and PowerShell readers.
97
+ var READ_LIKE_BASH_RE = new RegExp([
98
+ '^\\s*(?:cat|head|tail|less|more|bat|xxd|od|hexdump)\\b',
99
+ '^\\s*(?:grep|rg|ag|fgrep|egrep|find|fd)\\b',
100
+ '^\\s*sed\\s+-n\\b',
101
+ '^\\s*awk\\s+(?!.*<<)',
102
+ // `type <path>` on Windows. No `$` anchor so a piped form
103
+ // (`type src\foo.ts | grep x`) still matches and gets blocked. The argument
104
+ // must contain a slash, backslash, or dot — otherwise it's the shell-builtin
105
+ // command-lookup form (`type ls`, `type cd`) which the gate has no business
106
+ // blocking. False-negative trade: extension-less filenames like `type Makefile`
107
+ // pass through. Acceptable — source files all have extensions, and the
108
+ // primary risk pattern is leaking past the gate via `type src\foo.ts`.
109
+ '^\\s*type\\s+\\S*[\\\\/.]',
110
+ '^\\s*(?:Get-Content|gc|Select-String|sls)\\b',
111
+ ].join('|'), 'i');
112
+ // CARVE-OUT: commands that LOOK read-like but are operational. Anchored to the
113
+ // LEADING command — the pipe-filter case (`npm test | grep FAIL`) is already
114
+ // handled by READ_LIKE's `^\s*` anchor never matching the leading `npm`, so
115
+ // there is intentionally no pipe arm here: catching the leading command lets
116
+ // `grep -r TODO src/ | head -5` reach the BLOCK exit (which it must, that's
117
+ // the gap the ticket exists to close). #1132.
118
+ var BASH_CARVE_OUT_RE = new RegExp([
119
+ '^\\s*(npm|npx|pnpm|yarn|bun|node|deno|tsx|ts-node)\\s',
120
+ '^\\s*(git|gh|hub)\\s',
121
+ '^\\s*(docker|kubectl|helm|terraform)\\s',
122
+ '^\\s*(curl|wget|http|fetch)\\s',
123
+ '^\\s*(jq|yq|xq)\\s',
124
+ '^\\s*(echo|printf|true|false|sleep|test|\\[)\\s',
125
+ '^\\s*cat\\s+(<<|<<<)',
126
+ '^\\s*cat\\s+[^|]*\\s*>',
127
+ '^\\s*tee\\b',
128
+ // Lazy `.+?` instead of `.+\s` to avoid catastrophic backtracking on long
129
+ // `find` commands that lack a `-delete` / `-exec rm` suffix.
130
+ '^\\s*find\\s+.+?-(delete|exec\\s+rm)\\b',
131
+ ].join('|'));
85
132
  var DIRECTIVE_RE = /^(yes|no|yeah|yep|nope|sure|ok|okay|correct|right|exactly|perfect)\b/i;
86
133
  var TASK_RE = /\b(fix|bug|error|implement|add|create|build|write|refactor|debug|test|feature|issue|security|optimi)\b/i;
87
134
 
@@ -146,6 +193,29 @@ function classifyNamespaceHint(promptText) {
146
193
  return '';
147
194
  }
148
195
 
196
+ // #1132 — command-shape namespace classifier for the bash-BLOCK message.
197
+ // Used when the prompt-derived `lastNamespaceHint` is empty (e.g. subagents,
198
+ // which never see the user prompt) so the block message still routes to a
199
+ // useful namespace rather than the generic "pick one of five" list. Returns a
200
+ // full sentence in the same shape as classifyNamespaceHint so the BLOCK arm
201
+ // can write either source's hint without branching on format.
202
+ //
203
+ // SYNC: duplicated verbatim in src/cli/init/helpers-generator.ts.
204
+ function classifyBashNamespaceHint(cmd) {
205
+ // Search-like tools — the user is hunting for a symbol/file, code-map wins.
206
+ if (/^\s*(?:grep|rg|ag|fgrep|egrep|find|fd|Select-String|sls)\b/i.test(cmd)) {
207
+ return 'Memory namespace hint: use "code-map" for codebase navigation.';
208
+ }
209
+ // Reading a .md / RST / TXT, or a well-known doc file — guidance/learnings win.
210
+ // `.*` (not `\S*`) so flag-prefixed forms like `head -50 README.md` match.
211
+ // Anchored on the leading reader so a piped `cmd | grep foo.md` doesn't trip.
212
+ if (/^\s*(?:cat|head|tail|less|more|bat|type|Get-Content|gc)\b.*\.(?:md|mdx|rst|txt)\b/i.test(cmd)
213
+ || /^\s*(?:cat|head|tail|less|more|bat|type|Get-Content|gc)\b.*\b(?:README|CLAUDE|CHANGELOG|CONTRIBUTING|LICENSE)\b/i.test(cmd)) {
214
+ return 'Memory namespace hint: search "guidance" and "learnings" for project rules and decisions.';
215
+ }
216
+ return '';
217
+ }
218
+
149
219
  // Apply per-prompt state reset shared by `prompt-reminder` (full) and
150
220
  // `prompt-state-reset` (defensive safety-net, no emission). Idempotent — both
151
221
  // UserPromptSubmit hooks can run it without compounding any field. Caller
@@ -402,11 +472,41 @@ switch (command) {
402
472
  break;
403
473
  }
404
474
  case 'check-bash-memory': {
475
+ // #1132 — preserve CREDIT side-effect AND add a BLOCK arm for read-like
476
+ // Bash commands. Wired as PreToolUse[Bash] (was PostToolUse before #1132)
477
+ // so process.exit(2) actually prevents the read from reaching the shell.
405
478
  var cmd = process.env.TOOL_INPUT_command || '';
406
- if (/semantic-search|memory search|memory retrieve|memory-search/.test(cmd)) {
479
+
480
+ // 1) CREDIT — preserved behavior. A real memory-search invocation flips
481
+ // the gate flag so subsequent Read/Grep/Glob within this prompt pass.
482
+ if (CREDIT_MEMORY_SEARCH_RE.test(cmd)) {
407
483
  var s = readState();
408
484
  if (markMemorySearched(s)) writeState(s);
485
+ break;
409
486
  }
487
+
488
+ // 2) BLOCK — new behavior. Cheap regex checks come BEFORE readState() so
489
+ // the overwhelming majority of Bash invocations (git/npm/curl/echo/etc.)
490
+ // never touch the filesystem. Order: config flag → command-shape regexes
491
+ // → state read → memory gate.
492
+ if (!config.memory_first) break;
493
+ if (!READ_LIKE_BASH_RE.test(cmd)) break;
494
+ if (BASH_CARVE_OUT_RE.test(cmd)) break;
495
+ var s2 = readState();
496
+ if (!s2.memoryRequired || isMemorySearchedFor(s2)) break;
497
+ // Hint precedence: prompt-derived classification (set by applyPromptStateReset
498
+ // from the user prompt text) → command-shape classification (works for
499
+ // subagents that never saw the user prompt). Either source returns a full
500
+ // "Memory namespace hint: ..." sentence so the BLOCK message stays uniform.
501
+ var hint = s2.lastNamespaceHint || classifyBashNamespaceHint(cmd) || '';
502
+ process.stderr.write(
503
+ 'BLOCKED: Search memory before reading files via Bash.\n' +
504
+ 'Example: mcp__moflo__memory_search { query: "<topic>", namespace: "<one of: guidance | code-map | patterns | learnings | tests>" }\n' +
505
+ (hint ? hint + '\n' : '') +
506
+ 'On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n' +
507
+ 'Disable per-gate via moflo.yaml: gates: memory_first: false\n'
508
+ );
509
+ process.exit(2);
410
510
  break;
411
511
  }
412
512
  case 'check-task-transition': {
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Detached recycler for §2a of session-start-launcher.mjs.
4
+ *
5
+ * The launcher used to inline the kill-and-restart synchronously, which kept
6
+ * up to 500ms of liveness-polling in the foreground — fine on Linux, but on
7
+ * Windows under the SessionStart hook's 3000ms timeout it eroded the budget
8
+ * that's supposed to be spent on real work. Per the launcher's contract
9
+ * ("spawns background tasks via spawn(detached + unref) and exits
10
+ * immediately"), the daemon recycle belongs in a detached worker.
11
+ *
12
+ * Invocation (from §2a, via fireAndForget):
13
+ * node bin/lib/daemon-recycler.mjs <projectRoot> <pid> <installedVersion>
14
+ *
15
+ * Steps:
16
+ * 1. Force-kill <pid> (Windows: taskkill /F /T, Unix: SIGKILL). Skip
17
+ * graceful — by this point the launcher has already decided the daemon
18
+ * is running stale code and its shutdown handlers are stale too.
19
+ * 2. Poll liveness up to 5s. Unlink the lockfile only once the PID is gone,
20
+ * so a surviving daemon can't re-attach to the unlinked path.
21
+ * 3. Spawn `node node_modules/moflo/bin/cli.js daemon start --quiet`
22
+ * detached + unref so this recycler can exit immediately.
23
+ *
24
+ * Output is intentionally silent — there's no parent to read it. Failures are
25
+ * surfaced via `.moflo/daemon-recycle.last.json` for `flo doctor` to read.
26
+ */
27
+
28
+ import { spawn, execFileSync } from 'node:child_process';
29
+ import { existsSync, openSync, closeSync, unlinkSync, writeFileSync, readFileSync } from 'node:fs';
30
+ import { resolve, join } from 'node:path';
31
+
32
+ const [, , projectRootArg, pidArg, installedVersion] = process.argv;
33
+
34
+ if (!projectRootArg || !pidArg) {
35
+ // No way to surface this — the launcher fire-and-forgets us, no parent
36
+ // captures stderr. Bail silently.
37
+ process.exit(2);
38
+ }
39
+
40
+ const projectRoot = resolve(projectRootArg);
41
+ const pid = Number.parseInt(pidArg, 10);
42
+ const lockFile = join(projectRoot, '.moflo', 'daemon.lock');
43
+
44
+ // EPERM means "exists but owned by another user" — treat as alive (matches
45
+ // launcher's isDaemonPidAlive contract). ESRCH means "no such process" — dead.
46
+ //
47
+ // Linux zombie handling: on Linux, `kill(pid, 0)` succeeds for zombie processes
48
+ // (exited but not yet reaped). A zombie can't write to the DB or hold locks, so
49
+ // treating it as alive exhausts the 5s kill budget polling a corpse. Read
50
+ // /proc/<pid>/stat and treat 'Z' as dead — same logic the launcher uses (#1083).
51
+ function isAlive(p) {
52
+ if (!p || p <= 0) return false;
53
+ try {
54
+ process.kill(p, 0);
55
+ } catch (err) {
56
+ return err && err.code === 'EPERM';
57
+ }
58
+ if (process.platform === 'linux') {
59
+ try {
60
+ const stat = readFileSync(`/proc/${p}/stat`, 'utf-8');
61
+ const lastParen = stat.lastIndexOf(')');
62
+ if (lastParen !== -1 && stat.charAt(lastParen + 2) === 'Z') return false;
63
+ } catch (err) {
64
+ if (err && err.code === 'ENOENT') return false;
65
+ // /proc unavailable — fall through with the kill(0) verdict.
66
+ }
67
+ }
68
+ return true;
69
+ }
70
+
71
+ function sleepSyncMs(ms) {
72
+ const buf = new Int32Array(new SharedArrayBuffer(4));
73
+ Atomics.wait(buf, 0, 0, ms);
74
+ }
75
+
76
+ function writeOutcome(status, detail) {
77
+ try {
78
+ writeFileSync(
79
+ join(projectRoot, '.moflo', 'daemon-recycle.last.json'),
80
+ JSON.stringify(
81
+ {
82
+ status,
83
+ detail,
84
+ pid,
85
+ installedVersion: installedVersion ?? null,
86
+ completedAt: new Date().toISOString(),
87
+ },
88
+ null,
89
+ 2,
90
+ ),
91
+ );
92
+ } catch { /* best-effort — doctor reads this file optionally */ }
93
+ }
94
+
95
+ // ── 0. Single-recycler advisory lock ────────────────────────────────────────
96
+ // Two session starts within the same second can both fire §2a, both detect
97
+ // behind, both spawn this recycler against the same PID. Without the lock,
98
+ // both call `daemon start` and race for daemon-lock acquisition — only one
99
+ // daemon wins but the other wastes a spawn cycle. Use O_EXCL on a sentinel
100
+ // file so the second invocation exits early.
101
+ const recycleLock = join(projectRoot, '.moflo', 'recycle.lock');
102
+ let lockFd;
103
+ let lockAcquired = false;
104
+ try {
105
+ lockFd = openSync(recycleLock, 'wx'); // O_CREAT | O_EXCL
106
+ lockAcquired = true;
107
+ } catch (err) {
108
+ if (err && err.code === 'EEXIST') {
109
+ // Another recycler is mid-flight. Bail silently — it will handle the kill.
110
+ writeOutcome('already-running', `another recycler holds ${recycleLock}`);
111
+ process.exit(0);
112
+ }
113
+ // Unexpected — proceed without the lock rather than blocking the recycle.
114
+ }
115
+
116
+ // Release the advisory lock on every exit path, including process.exit() and
117
+ // crashes. Idempotent: if the lock wasn't acquired this becomes a no-op.
118
+ process.on('exit', () => {
119
+ if (!lockAcquired) return;
120
+ try { closeSync(lockFd); } catch { /* already closed */ }
121
+ try { unlinkSync(recycleLock); } catch { /* already gone */ }
122
+ });
123
+
124
+ // ── 1. Force-kill ───────────────────────────────────────────────────────────
125
+ // EPERM on the kill attempt means the daemon is owned by another user. Can't
126
+ // kill it. Don't proceed to unlink + restart — that'd resurrect a fresh daemon
127
+ // alongside the foreign-owned one, double-writing the DB.
128
+ let killBlockedByEperm = false;
129
+ if (Number.isFinite(pid) && pid > 0 && isAlive(pid)) {
130
+ try {
131
+ if (process.platform === 'win32') {
132
+ execFileSync('taskkill', ['/F', '/T', '/PID', String(pid)], { windowsHide: true, timeout: 5000 });
133
+ } else {
134
+ process.kill(pid, 'SIGKILL');
135
+ }
136
+ } catch (err) {
137
+ if (err && (err.code === 'EPERM' || err.code === 'EACCES')) {
138
+ killBlockedByEperm = true;
139
+ }
140
+ // Other errors (ESRCH = already dead) — fall through; liveness poll confirms.
141
+ }
142
+ }
143
+
144
+ if (killBlockedByEperm) {
145
+ writeOutcome('kill-permission-denied', `PID ${pid} owned by another user — leaving daemon alive, not spawning replacement`);
146
+ process.exit(1);
147
+ }
148
+
149
+ // ── 2. Wait for death, then unlink the lockfile ─────────────────────────────
150
+ const deadline = Date.now() + 5000;
151
+ let killed = !isAlive(pid);
152
+ while (!killed && Date.now() < deadline) {
153
+ sleepSyncMs(100);
154
+ killed = !isAlive(pid);
155
+ }
156
+
157
+ if (!killed) {
158
+ writeOutcome('kill-failed', `PID ${pid} survived 5s force-kill window`);
159
+ process.exit(1);
160
+ }
161
+
162
+ // Only unlink once we know nothing's holding the lock file's old identity.
163
+ // A surviving daemon would re-write a lockfile with its stale PID + version
164
+ // and defeat the whole purpose of the recycle.
165
+ try {
166
+ if (existsSync(lockFile)) {
167
+ // Defensive: if the lockfile has been re-written under us (another
168
+ // recycler raced), only unlink if the PID still matches what we killed.
169
+ try {
170
+ const current = JSON.parse(readFileSync(lockFile, 'utf-8'));
171
+ if (typeof current?.pid === 'number' && current.pid !== pid) {
172
+ writeOutcome('lock-changed', `another daemon (PID ${current.pid}) wrote the lock; leaving it alone`);
173
+ process.exit(0);
174
+ }
175
+ } catch { /* unreadable / malformed — fall through and unlink */ }
176
+ unlinkSync(lockFile);
177
+ }
178
+ } catch { /* non-fatal */ }
179
+
180
+ // ── 3. Spawn fresh daemon, detached + unref ─────────────────────────────────
181
+ const cliPath = join(projectRoot, 'node_modules', 'moflo', 'bin', 'cli.js');
182
+ if (existsSync(cliPath)) {
183
+ try {
184
+ const child = spawn('node', [cliPath, 'daemon', 'start', '--quiet'], {
185
+ cwd: projectRoot,
186
+ stdio: 'ignore',
187
+ detached: true,
188
+ shell: false,
189
+ windowsHide: true,
190
+ });
191
+ child.unref();
192
+ writeOutcome('ok', 'fresh daemon spawn requested');
193
+ } catch (err) {
194
+ writeOutcome('spawn-failed', err && err.message ? err.message : String(err));
195
+ process.exit(1);
196
+ }
197
+ } else {
198
+ writeOutcome('cli-missing', `node_modules/moflo/bin/cli.js not present at ${cliPath}`);
199
+ process.exit(1);
200
+ }
201
+
202
+ // Recycler's job is done. Exit fast.
203
+ process.exit(0);
@@ -432,41 +432,49 @@ function stopDaemon(lockFile) {
432
432
 
433
433
  let killed = false;
434
434
  if (stalePid !== null && isDaemonPidAlive(stalePid)) {
435
- // Graceful signal — platform-aware. On Windows, `process.kill(pid, 'SIGTERM')`
436
- // silently force-kills (skipping the daemon's shutdown handlers that flush
437
- // sql.js + release lock cleanly), so use bare `taskkill` (no /F) for a
438
- // close-event signal.
439
- try {
440
- if (process.platform === 'win32') {
441
- execFileSync('taskkill', ['/PID', String(stalePid)], { windowsHide: true, timeout: 5000 });
442
- } else {
443
- process.kill(stalePid, 'SIGTERM');
444
- }
445
- } catch { /* signal/spawn failed fall through to liveness poll + force */ }
446
-
447
- // Poll for death up to 3s. The daemon's shutdown handler does a final
448
- // sql.js dump + lock release, which under load can take ~1s.
449
- const gracefulDeadline = Date.now() + 3000;
450
- while (Date.now() < gracefulDeadline) {
451
- if (!isDaemonPidAlive(stalePid)) { killed = true; break; }
452
- sleepSyncMs(100);
453
- }
454
-
455
- // Force-kill if still alive.
456
- if (!killed) {
435
+ // Platform-split shutdown. On Linux/macOS, SIGTERM lets the daemon's
436
+ // shutdown handler run a final sql.js dump + lock release before we
437
+ // escalate.
438
+ //
439
+ // On Windows there is no SIGTERM equivalent for our headless detached
440
+ // Node daemon — `taskkill /PID` (no /F) sends a window-close message
441
+ // that a non-GUI process can't receive and always fails with the visible
442
+ // error 'process can only be terminated forcefully'. The prior
443
+ // implementation invoked it anyway, swallowed the error, then polled
444
+ // alive for 3s before escalating — exactly the time-waste that pushed
445
+ // §3's stopDaemon past the 3000ms SessionStart hook timeout. Go
446
+ // straight to /F /T (tree-kill, in case a worker child outlived its
447
+ // parent) on Win.
448
+ if (process.platform === 'win32') {
457
449
  try {
458
- if (process.platform === 'win32') {
459
- execFileSync('taskkill', ['/F', '/T', '/PID', String(stalePid)], { windowsHide: true, timeout: 5000 });
460
- } else {
461
- process.kill(stalePid, 'SIGKILL');
462
- }
463
- } catch { /* dead or unreachable */ }
464
- // Short grace period for OS reap.
450
+ execFileSync('taskkill', ['/F', '/T', '/PID', String(stalePid)], { windowsHide: true, timeout: 5000 });
451
+ } catch { /* dead or unreachable liveness poll below confirms */ }
452
+ // Short grace period for OS reap (typically ~ms).
465
453
  const forceDeadline = Date.now() + 1000;
466
454
  while (Date.now() < forceDeadline) {
467
455
  if (!isDaemonPidAlive(stalePid)) { killed = true; break; }
468
456
  sleepSyncMs(100);
469
457
  }
458
+ } else {
459
+ try { process.kill(stalePid, 'SIGTERM'); } catch { /* signal failed — escalate below */ }
460
+
461
+ // Poll for death up to 3s. The daemon's shutdown handler does a final
462
+ // sql.js dump + lock release, which under load can take ~1s.
463
+ const gracefulDeadline = Date.now() + 3000;
464
+ while (Date.now() < gracefulDeadline) {
465
+ if (!isDaemonPidAlive(stalePid)) { killed = true; break; }
466
+ sleepSyncMs(100);
467
+ }
468
+
469
+ // Force-kill if still alive.
470
+ if (!killed) {
471
+ try { process.kill(stalePid, 'SIGKILL'); } catch { /* dead or unreachable */ }
472
+ const forceDeadline = Date.now() + 1000;
473
+ while (Date.now() < forceDeadline) {
474
+ if (!isDaemonPidAlive(stalePid)) { killed = true; break; }
475
+ sleepSyncMs(100);
476
+ }
477
+ }
470
478
  }
471
479
 
472
480
  if (!killed) {
@@ -499,6 +507,42 @@ function recycleDaemon(lockFile, label) {
499
507
  return true;
500
508
  }
501
509
 
510
+ // Numeric semver compare. Returns -1 / 0 / +1 for a vs b. Treats missing
511
+ // segments as 0 so '4.10' < '4.10.4'. Strips pre-release tags ('1.2.3-beta'
512
+ // compares as '1.2.3') — close enough for "is the daemon's version behind
513
+ // the installed package's version", which is all §2a needs.
514
+ function compareVersionsSemver(a, b) {
515
+ const norm = (v) => String(v || '').split('-')[0].split('.').map((s) => {
516
+ const n = parseInt(s, 10);
517
+ return Number.isFinite(n) ? n : 0;
518
+ });
519
+ const aa = norm(a);
520
+ const bb = norm(b);
521
+ const len = Math.max(aa.length, bb.length);
522
+ for (let i = 0; i < len; i++) {
523
+ const av = aa[i] ?? 0;
524
+ const bv = bb[i] ?? 0;
525
+ if (av < bv) return -1;
526
+ if (av > bv) return 1;
527
+ }
528
+ return 0;
529
+ }
530
+
531
+ // Resolve `bin/lib/daemon-recycler.mjs` across the three places it can live:
532
+ // 1. node_modules/moflo/bin/lib/ (consumer install, always present)
533
+ // 2. .claude/scripts/lib/ (synced copy in consumer/dogfood projects)
534
+ // 3. bin/lib/ (dogfood source tree)
535
+ // Returns null when not found — §2a falls back to inline force-kill in that
536
+ // case, which is the pre-recycler behavior.
537
+ function resolveDaemonRecyclerPath() {
538
+ const candidates = [
539
+ resolve(projectRoot, 'node_modules/moflo/bin/lib/daemon-recycler.mjs'),
540
+ resolve(projectRoot, '.claude/scripts/lib/daemon-recycler.mjs'),
541
+ resolve(projectRoot, 'bin/lib/daemon-recycler.mjs'),
542
+ ];
543
+ return candidates.find((p) => existsSync(p)) || null;
544
+ }
545
+
502
546
  // ── 2. Reset workflow state for new session ──────────────────────────────────
503
547
  const stateDir = resolve(projectRoot, '.claude');
504
548
  const stateFile = resolve(stateDir, 'workflow-state.json');
@@ -514,6 +558,84 @@ try {
514
558
  // Non-fatal - workflow gate will use defaults
515
559
  }
516
560
 
561
+ // ── 2a. Recycle daemon when behind installed version (#1054 follow-up) ──────
562
+ // Promoted from §3a-pre to run BEFORE §3's file-sync work. The launcher has
563
+ // a 3000ms SessionStart hook timeout (src/cli/services/hook-block-hash.ts);
564
+ // §0c (DB repair) + §3 (file-sync, manifest, cherry-pick) + stopDaemon's
565
+ // up-to-4s graceful poll routinely exceeds it on upgrade sessions, killing
566
+ // the launcher mid-§3. Result: §3a-pre never ran on the very sessions that
567
+ // needed it, leaving a stale-version daemon alive after `npm install moflo`
568
+ // + Claude restart — `📊 ?` in the statusline (this bug's tell).
569
+ //
570
+ // Semver-BEHIND only — a downgrade-test daemon ahead of installed is left
571
+ // alone. Pre-#1054 daemons (no `version` field in the lock) are treated as
572
+ // behind because by construction they predate version publishing.
573
+ //
574
+ // Force-kill skips the graceful poll: a stale-code daemon's flush handlers
575
+ // are themselves stale, and losing one in-flight flush beats running past
576
+ // the hook timeout. fireAndForget the fresh `daemon start` so spawn returns
577
+ // immediately and the launcher can move on to §3.
578
+ try {
579
+ const mofloPkgPath = resolve(projectRoot, 'node_modules/moflo/package.json');
580
+ const lockFile = resolve(projectRoot, '.moflo', 'daemon.lock');
581
+ // Single readFileSync each (try/catch instead of existsSync + readFileSync)
582
+ // — halves the syscalls in the hot path and closes the TOCTOU window where
583
+ // the file existed for existsSync but was unlinked before readFileSync.
584
+ let installedVersion;
585
+ let daemonVersion;
586
+ let daemonPid;
587
+ try {
588
+ installedVersion = JSON.parse(readFileSync(mofloPkgPath, 'utf-8')).version;
589
+ } catch { /* node_modules/moflo absent — fresh consumer or fatal, nothing §2a can do */ }
590
+ let lockReadOk = false;
591
+ try {
592
+ const lock = JSON.parse(readFileSync(lockFile, 'utf-8'));
593
+ lockReadOk = true;
594
+ if (typeof lock?.version === 'string') daemonVersion = lock.version;
595
+ if (typeof lock?.pid === 'number' && lock.pid > 0) daemonPid = lock.pid;
596
+ } catch { /* no lock or corrupt — no daemon to recycle, skip the block below */ }
597
+
598
+ if (installedVersion && lockReadOk) {
599
+ const isBehind = !daemonVersion || compareVersionsSemver(daemonVersion, installedVersion) < 0;
600
+ if (isBehind) {
601
+ const observed = daemonVersion ?? '<pre-1054 / unknown>';
602
+ const recyclerPath = resolveDaemonRecyclerPath();
603
+ if (recyclerPath && daemonPid && daemonPid > 0) {
604
+ // Fire-and-forget the detached recycler. Per the launcher's contract
605
+ // ("spawns background tasks ... and exits immediately"), the
606
+ // kill+wait+restart sequence runs in a separate process so §2a's
607
+ // foreground cost is ~ms instead of up-to-5s. The recycler writes
608
+ // .moflo/daemon-recycle.last.json on completion for doctor to read.
609
+ fireAndForget(
610
+ 'node',
611
+ [recyclerPath, projectRoot, String(daemonPid), installedVersion],
612
+ 'daemon-behind-recycle',
613
+ );
614
+ emitMutation(
615
+ 'recycled stale daemon',
616
+ `behind: daemon v${observed} → installed v${installedVersion}`,
617
+ );
618
+ } else if (!recyclerPath) {
619
+ // Recycler script missing — happens during the transition release
620
+ // where the launcher upgraded but bin/lib/daemon-recycler.mjs hasn't
621
+ // synced yet. Surface so /healer can flag; §3 below will sync the
622
+ // recycler on this session and §2a covers it on the next.
623
+ emitWarning(
624
+ `daemon-behind recycle: bin/lib/daemon-recycler.mjs not resolvable — ` +
625
+ `daemon v${observed} stays alive this session, will recycle on the next`,
626
+ );
627
+ } else {
628
+ // No PID — lockfile is corrupt or malformed. Unlink it so a fresh
629
+ // daemon can start cleanly on the next worker request.
630
+ try { unlinkSync(lockFile); } catch { /* non-fatal */ }
631
+ emitMutation('cleared malformed daemon lock', `version field: ${observed}`);
632
+ }
633
+ }
634
+ }
635
+ } catch (err) {
636
+ emitWarning(`daemon-behind check failed: ${errMessage(err)}`);
637
+ }
638
+
517
639
  // ── 3. Auto-sync scripts and helpers on version change ───────────────────────
518
640
  // Controlled by `auto_update.enabled` in moflo.yaml (default: true).
519
641
  // When moflo is upgraded (npm install), scripts and helpers may be stale.
@@ -931,12 +1053,14 @@ try {
931
1053
  // prune when the consumer's file matches a known-shipped hash —
932
1054
  // customized files (different hash) get preserved with a one-line
933
1055
  // notice the user can act on.
1056
+ let prunedRetiredPaths = new Set();
934
1057
  try {
935
1058
  const retiredManifestPath = resolve(
936
1059
  projectRoot,
937
1060
  'node_modules/moflo/retired-files.json',
938
1061
  );
939
1062
  const report = applyRetiredPrune(projectRoot, retiredManifestPath);
1063
+ prunedRetiredPaths = new Set(report.pruned);
940
1064
  if (report.pruned.length > 0) {
941
1065
  emitMutation(
942
1066
  'pruned retired shipped files',
@@ -988,10 +1112,23 @@ try {
988
1112
 
989
1113
  // Manifest reflects synced files immediately; version stamp is deferred
990
1114
  // to 3g so an aborted launcher re-runs upgrade detection (#730).
1115
+ //
1116
+ // Exclude paths that `applyRetiredPrune` just deleted from disk —
1117
+ // recording a non-existent file in `installed-files.json` triggers
1118
+ // false drift detection on the next launcher run (`manifestDrifted`
1119
+ // flips true because the recorded path doesn't exist), which spuriously
1120
+ // re-fires the cherry-pick + manifest-sync pipeline. Widening
1121
+ // `retired-files.json`'s hash window in #1133 exposed this — more
1122
+ // legitimate matches → more pruned files → guaranteed false drift on
1123
+ // the next launcher and re-imported legacy rows (knowledge namespace
1124
+ // came back even though the migration deleted it).
1125
+ const persistedManifest = prunedRetiredPaths.size > 0
1126
+ ? currentManifest.filter((e) => !prunedRetiredPaths.has(e.path))
1127
+ : currentManifest;
991
1128
  try {
992
1129
  const cfDir = resolve(projectRoot, '.moflo');
993
1130
  if (!existsSync(cfDir)) mkdirSync(cfDir, { recursive: true });
994
- writeFileSync(manifestPath, JSON.stringify(currentManifest, null, 2));
1131
+ writeFileSync(manifestPath, JSON.stringify(persistedManifest, null, 2));
995
1132
  pendingVersionStampWrite = { path: versionStampPath, version: installedVersion };
996
1133
  } catch (err) {
997
1134
  // #854: manifest write must surface — without it the next launcher
@@ -1009,53 +1146,12 @@ try {
1009
1146
  emitWarning(`upgrade section failed (${errMessage(err)})`);
1010
1147
  }
1011
1148
 
1012
- // ── 3a-pre. Recycle daemons started before the current moflo install ────────
1013
- // The version-bump block above only fires when `installedVersion !== cachedVersion`.
1014
- // That misses the common case where a user upgraded moflo, ran ONE session
1015
- // (which bumped the stamp + recycled the daemon), then on a subsequent session
1016
- // the version stamp matches but the daemon they started long-ago is still
1017
- // holding stale module cache from a pre-collapse moflo image. The
1018
- // `[neural-tools] @moflo/embeddings not resolvable` spam (#639) is the
1019
- // observable symptom of exactly this: a daemon running pre-#592 code that no
1020
- // longer exists in source, calling a require helper that prints the warning
1021
- // every time `neural_predict` / `neural_patterns` fires.
1022
- //
1023
- // Fix (epic #1054): compare the daemon-lock's reported moflo `version` against
1024
- // the installed `node_modules/moflo/package.json` version. If they differ —
1025
- // or the lock predates #1054 and has no `version` field at all — recycle the
1026
- // daemon. This is exact (not a heuristic margin like the prior mtime-based
1027
- // check) and named explicitly so the doctor's Daemon Version Skew check
1028
- // (#1059) can share the diagnosis.
1029
- //
1030
- // Pre-#1054 daemons have no `version` in their lock payload — treated as a
1031
- // mismatch by definition because by construction they were launched before
1032
- // version publishing existed.
1033
- try {
1034
- const mofloPkgPathForRecycle = resolve(projectRoot, 'node_modules/moflo/package.json');
1035
- const lockFile = resolve(projectRoot, '.moflo', 'daemon.lock');
1036
- // Cheap stat first — if either file is gone, no skew check is possible.
1037
- if (existsSync(mofloPkgPathForRecycle) && existsSync(lockFile)) {
1038
- const installedVersion = JSON.parse(readFileSync(mofloPkgPathForRecycle, 'utf-8')).version;
1039
- let daemonVersion;
1040
- try {
1041
- const lock = JSON.parse(readFileSync(lockFile, 'utf-8'));
1042
- if (typeof lock?.version === 'string') daemonVersion = lock.version;
1043
- } catch { /* corrupt lock — recycleDaemon will unlink it */ }
1044
- if (daemonVersion !== installedVersion) {
1045
- if (recycleDaemon(lockFile, 'daemon-version-skew')) {
1046
- const observed = daemonVersion ?? '<pre-1054 / unknown>';
1047
- emitMutation(
1048
- 'recycled stale daemon',
1049
- `version skew: installed ${installedVersion}, daemon ${observed}`,
1050
- );
1051
- }
1052
- }
1053
- }
1054
- } catch (err) {
1055
- // Non-fatal; surface via emitWarning per feedback_no_layered_workarounds —
1056
- // no silent catch on the upgrade path (#854).
1057
- emitWarning(`daemon version-skew check failed: ${errMessage(err)}`);
1058
- }
1149
+ // ── 3a-pre. (removed) Daemon-version-skew recycle moved to §2a. ─────────────
1150
+ // The previous version of this block ran AFTER §3's heavy file-sync work,
1151
+ // which routinely exceeded the 3000ms SessionStart hook timeout and was
1152
+ // killed before reaching this point. §2a now runs early and force-kills the
1153
+ // stale daemon before §3 can starve out. Don't restore §3a-pre keep the
1154
+ // recycle in one place so the two paths can't drift.
1059
1155
 
1060
1156
  // ── 3a. Auto-migrate settings.json (npx flo → node helpers, PATH setup) ────
1061
1157
  // Existing users may have stale settings.json with `npx flo` hooks that break