moflo 4.9.1 → 4.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ import { spawn, execFileSync } from 'child_process';
11
11
  import { existsSync, readFileSync, writeFileSync, copyFileSync, unlinkSync, readdirSync, mkdirSync, statSync } from 'fs';
12
12
  import { resolve, dirname, join } from 'path';
13
13
  import { fileURLToPath } from 'url';
14
- import { migrateClaudeFlowToMoflo, migrateMemoryDbToMoflo, mofloDir } from './lib/moflo-paths.mjs';
14
+ import { mofloDir } from './lib/moflo-paths.mjs';
15
15
  import { repairMemoryDbIfCorrupt } from './lib/db-repair.mjs';
16
16
 
17
17
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -60,6 +60,48 @@ function emitMutation(action, details) {
60
60
  }
61
61
  }
62
62
 
63
+ // Stderr counterpart to emitMutation for non-fatal failures (#854). Every
64
+ // previously-bare `catch {}` in the upgrade flow is routed through here so
65
+ // partial-migration regressions can never go silent again. The inner try
66
+ // guards against a broken stderr pipe — writing the failure itself must
67
+ // never throw, otherwise a fast session-end would surface as a crash.
68
+ function emitWarning(message) {
69
+ try {
70
+ process.stderr.write(`moflo: ${message}\n`);
71
+ } catch { /* stderr write must not throw */ }
72
+ }
73
+ function errMessage(err) {
74
+ return err && err.message ? err.message : String(err);
75
+ }
76
+
77
+ // Manifest schema (#854 hardening). Originally `string[]`; now `{path,size}[]`
78
+ // so the launcher can detect *content* drift, not just *missing-file* drift.
79
+ // Reading accepts both forms — a legacy v1 manifest is reported via
80
+ // `isLegacy=true` so the drift check forces one re-sync to migrate to v2,
81
+ // closing the failure mode where a v4.9.2 launcher writes a stamp+manifest
82
+ // in stage 1 of an upgrade and the v4.9.3 launcher (with this fix) sees
83
+ // `installedVersion === cachedVersion` + no file-missing drift, then skips
84
+ // section 3 leaving stale `gate.cjs` etc. stuck.
85
+ function readInstallManifest(manifestPath) {
86
+ let raw;
87
+ try { raw = readFileSync(manifestPath, 'utf-8'); } catch { return { entries: [], isLegacy: false }; }
88
+ let parsed;
89
+ try { parsed = JSON.parse(raw); } catch { return { entries: [], isLegacy: false }; }
90
+ if (!Array.isArray(parsed)) return { entries: [], isLegacy: false };
91
+ let isLegacy = false;
92
+ const entries = [];
93
+ for (const item of parsed) {
94
+ if (typeof item === 'string') {
95
+ isLegacy = true;
96
+ entries.push({ path: item, size: null });
97
+ } else if (item && typeof item === 'object' && typeof item.path === 'string') {
98
+ entries.push({ path: item.path, size: typeof item.size === 'number' ? item.size : null });
99
+ }
100
+ // malformed entries are silently dropped — not surfaceable, never written by us
101
+ }
102
+ return { entries, isLegacy };
103
+ }
104
+
63
105
  const plural = (n, word) => `${n} ${word}${n === 1 ? '' : 's'}`;
64
106
 
65
107
  // Captured inside the upgrade/drift branch so the post-spawn notice writer
@@ -116,56 +158,21 @@ try {
116
158
  unlinkSync(join(mofloDir(projectRoot), 'upgrade-notice.json'));
117
159
  } catch { /* non-fatal — file usually doesn't exist */ }
118
160
 
119
- // ── 0. LEGACY state migration (#699, #735) ──────────────────────────────────
120
- // Consumers upgrading from older moflo builds (inherited from upstream Ruflo)
121
- // get a one-time auto-migration of LEGACY `.claude-flow/` → `.moflo/` so claim
122
- // files, models cache, metrics, and the version stamp survive the rename.
123
- // The migration helper is idempotent see bin/lib/moflo-paths.mjs for the
124
- // algorithm.
161
+ // ── 0. Legacy whole-DB / directory migrations have been retired (#851) ─────
162
+ // LEGACY-V2: Pre-#851 the launcher renamed `.claude-flow/` `.moflo/` and
163
+ // byte-copied `.swarm/memory.db` → `.moflo/moflo.db` on every session start.
164
+ // Both blocks ran silently against a daemon that was still holding the old
165
+ // paths in memory, leaving consumers with ghost runtime files reappearing
166
+ // in legacy dirs and a `.gitignore` deletion that exposed 30+ daemon-state
167
+ // files for commit. See docs/moflo-4.9.1-upgrade-experience-2026-05-02.md
168
+ // for the full UX failure report.
125
169
  //
126
- // Staged removal contract (#735):
127
- // 1. THIS release ships Phase 1 (writers redirected to `.moflo/`) + Phase 2 // LEGACY
128
- // (this migration call moves stragglers + warns on collisions).
129
- // 2. The release AFTER Phase 1 is steady-state should hard-delete any
130
- // remaining empty `.claude-flow/` directory — until then, the helper // LEGACY
131
- // drops the dir naturally once everything's been moved.
132
- // LEGACY: every emit below stops firing once `.claude-flow/` is gone.
133
- try {
134
- const cfMigration = migrateClaudeFlowToMoflo(projectRoot);
135
- if (cfMigration?.migrated) {
136
- const count = cfMigration.movedCount ?? 0;
137
- emitMutation(`migrated ${plural(count, 'entry')} from legacy .claude-flow/`); // LEGACY
138
- }
139
- // Surface collisions so users notice that BOTH locations now hold the same
140
- // subdir name (most often `models/` after a partial pre-#735 migration).
141
- // Manual cleanup is needed — moflo refuses to silently choose.
142
- if ((cfMigration?.collisions?.length ?? 0) > 0) {
143
- const collisionMsg = 'kept legacy .claude-flow/ entries to avoid clobbering .moflo/'; // LEGACY
144
- emitMutation(collisionMsg, `collisions: ${cfMigration.collisions.join(', ')}`);
145
- }
146
- } catch {
147
- // Non-fatal — anything left behind by the migration just means it runs
148
- // again next session. Better to keep launching than to block on it.
149
- }
150
-
151
- // ── 0b. LEGACY memory DB relocation (#727) ──────────────────────────────────
152
- // Run BEFORE long-lived sql.js consumers (MCP server, daemon) — see the
153
- // `migrateMemoryDbToMoflo` JSDoc for the copy-verify-delete contract and
154
- // the sql.js write-back hazard.
155
- try {
156
- const dbMigration = migrateMemoryDbToMoflo(projectRoot);
157
- if (dbMigration?.migrated) {
158
- const detail = dbMigration.hnswMoved
159
- ? '.swarm/memory.db → .moflo/moflo.db (with hnsw.index)'
160
- : '.swarm/memory.db → .moflo/moflo.db';
161
- emitMutation('relocated memory db', detail);
162
- if (dbMigration.reason === 'rename-failed') {
163
- emitMutation('legacy .swarm/memory.db remains', 'rename to .bak failed — flo doctor will warn');
164
- }
165
- }
166
- } catch {
167
- // Non-fatal — failed migration leaves both DBs in place; next session retries.
168
- }
170
+ // The version-bump-gated cherry-pick now lives inside section 3 (which is
171
+ // already the gate on the `.moflo/moflo-version` stamp). It stops the daemon
172
+ // first, then `INSERT OR IGNORE`s only the user-authored `learnings` /
173
+ // `knowledge` namespaces every other DB row is derived and rebuilds via
174
+ // the indexers. LEGACY-V2 directories (`.swarm/`, `.claude-flow/`) are left
175
+ // in place as recovery sources; users delete them at their leisure.
169
176
 
170
177
  // ── 0c. Memory DB index repair (#743) ───────────────────────────────────────
171
178
  // The .moflo/moflo.db SQLite file accumulates index corruption ("row N missing
@@ -215,16 +222,14 @@ function fireAndForget(cmd, args, label) {
215
222
  }
216
223
  }
217
224
 
218
- // Stop the daemon recorded in `lockFile` (if any) and start a fresh one. Used
219
- // from two recycle paths in this launcher: (a) the version-bump branch when
220
- // installed moflo just changed, and (b) the stale-daemon branch when the
221
- // running daemon predates the current install by a meaningful margin.
225
+ // Stop the daemon recorded in `lockFile` (if any) without restarting. Used by
226
+ // the upgrade flow before any DB work the daemon must not be holding old
227
+ // path resolution in memory, and a concurrent sql.js flush would clobber the
228
+ // cherry-picked rows. Returns true when a live PID was actually killed.
222
229
  //
223
- // Reads the lock, SIGTERMs the recorded PID, removes the lock, and fires a
224
- // `daemon start --quiet` against `node_modules/moflo/bin/cli.js`. Every
225
- // failure mode (no lock, dead PID, missing CLI) is silently absorbed — the
226
- // recycle is best-effort and must never block session start.
227
- function recycleDaemon(lockFile, label) {
230
+ // Section 4's `hooks.mjs session-start` spawn is responsible for starting a
231
+ // fresh daemon under the current code; this function intentionally does not.
232
+ function stopDaemon(lockFile) {
228
233
  if (!existsSync(lockFile)) return false;
229
234
  let stalePid = null;
230
235
  try {
@@ -235,16 +240,20 @@ function recycleDaemon(lockFile, label) {
235
240
  try { process.kill(stalePid, 'SIGTERM'); } catch { /* already dead */ }
236
241
  }
237
242
  try { unlinkSync(lockFile); } catch { /* non-fatal */ }
238
- // Respawn only if a live daemon was actually recorded — no point starting
239
- // one when there wasn't one before.
240
- if (stalePid !== null) {
241
- const localCliPath = resolve(projectRoot, 'node_modules/moflo/bin/cli.js');
242
- if (existsSync(localCliPath)) {
243
- fireAndForget('node', [localCliPath, 'daemon', 'start', '--quiet'], label);
244
- }
245
- return true;
243
+ return stalePid !== null;
244
+ }
245
+
246
+ // Stop-and-restart helper for the stale-daemon branch (section 3a-pre). The
247
+ // version-bump branch uses stopDaemon directly + relies on section 4 for the
248
+ // fresh start.
249
+ function recycleDaemon(lockFile, label) {
250
+ const stopped = stopDaemon(lockFile);
251
+ if (!stopped) return false;
252
+ const localCliPath = resolve(projectRoot, 'node_modules/moflo/bin/cli.js');
253
+ if (existsSync(localCliPath)) {
254
+ fireAndForget('node', [localCliPath, 'daemon', 'start', '--quiet'], label);
246
255
  }
247
- return false;
256
+ return true;
248
257
  }
249
258
 
250
259
  // ── 2. Reset workflow state for new session ──────────────────────────────────
@@ -279,7 +288,11 @@ try {
279
288
  if (scriptsMatch) autoUpdateConfig.scripts = scriptsMatch[1] === 'true';
280
289
  if (helpersMatch) autoUpdateConfig.helpers = helpersMatch[1] === 'true';
281
290
  }
282
- } catch { /* non-fatal — use defaults (all true) */ }
291
+ } catch (err) {
292
+ // Defaults (all true) keep the upgrade flow alive but the user should
293
+ // see when their moflo.yaml fails to parse (#854).
294
+ emitWarning(`moflo.yaml parse failed (${errMessage(err)}) — using defaults`);
295
+ }
283
296
 
284
297
  try {
285
298
  const mofloPkgPath = resolve(projectRoot, 'node_modules/moflo/package.json');
@@ -289,18 +302,27 @@ try {
289
302
  let cachedVersion = '';
290
303
  try { cachedVersion = readFileSync(versionStampPath, 'utf-8').trim(); } catch {}
291
304
 
292
- // Drift healing: re-sync if any previously-installed file is missing, even
293
- // when version stamp matches. Guards against out-of-band deletions (manual
294
- // rm, botched merges, dedup commits, etc.) that would otherwise silently
295
- // leave .claude/scripts/ incomplete until the next moflo upgrade.
305
+ // Drift healing: re-sync if any previously-installed file is missing
306
+ // OR has drifted in size since we last wrote it. Guards against:
307
+ // - out-of-band deletions (manual rm, botched merges, dedup commits)
308
+ // - stale-content drift (a prior partial migration left the file at
309
+ // pre-upgrade content even though it still exists — #854)
310
+ // - legacy v1 manifests written by an older launcher (force one
311
+ // re-sync to migrate to v2 so subsequent runs can size-check)
296
312
  const manifestPath = resolve(projectRoot, '.moflo', 'installed-files.json');
297
- let manifestDrifted = false;
298
- try {
299
- const prev = JSON.parse(readFileSync(manifestPath, 'utf-8'));
300
- if (Array.isArray(prev)) {
301
- manifestDrifted = prev.some(f => !existsSync(resolve(projectRoot, f)));
313
+ const { entries: priorEntries, isLegacy: manifestIsLegacy } = readInstallManifest(manifestPath);
314
+ let manifestDrifted = manifestIsLegacy;
315
+ if (!manifestDrifted) {
316
+ for (const { path: rel, size } of priorEntries) {
317
+ const abs = resolve(projectRoot, rel);
318
+ if (!existsSync(abs)) { manifestDrifted = true; break; }
319
+ if (size !== null) {
320
+ try {
321
+ if (statSync(abs).size !== size) { manifestDrifted = true; break; }
322
+ } catch { manifestDrifted = true; break; }
323
+ }
302
324
  }
303
- } catch { /* no manifest yet — version check handles first install */ }
325
+ }
304
326
 
305
327
  if (installedVersion !== cachedVersion || manifestDrifted) {
306
328
  if (installedVersion !== cachedVersion) {
@@ -326,6 +348,63 @@ try {
326
348
  // migration). See #738 — section 3f flips this to a 2-min "completed"
327
349
  // badge once work finishes (TTL rationale at the constants above).
328
350
  writeUpgradeNotice('in-progress');
351
+
352
+ // Stop the daemon BEFORE any DB writes (#851). It was started under the
353
+ // previous moflo image and holds old path resolution + module cache in
354
+ // memory; a concurrent sql.js flush would clobber the cherry-picked
355
+ // rows below, and old-path writes would resurrect ghost files in legacy
356
+ // dirs. Section 4's `hooks.mjs session-start` spawns a fresh daemon
357
+ // under the current code once 3g writes the version stamp.
358
+ const upgradeDaemonLock = resolve(projectRoot, '.moflo', 'daemon.lock');
359
+ if (stopDaemon(upgradeDaemonLock)) {
360
+ emitMutation('stopped daemon for upgrade', 'will restart fresh after upgrade work');
361
+ }
362
+
363
+ // Cherry-pick durable rows from any legacy DBs (#851). Replaces the
364
+ // pre-#851 full-DB byte-copy migration. The service is idempotent
365
+ // (INSERT OR IGNORE on UNIQUE(namespace, key)) so an aborted launcher
366
+ // re-runs cleanly without duplicate rows. Sources are read-only —
367
+ // .swarm/memory.db is preserved as a recovery source.
368
+ try {
369
+ const cherryPickPaths = [
370
+ resolve(projectRoot, 'node_modules/moflo/dist/src/cli/services/cherry-pick-learnings.js'),
371
+ resolve(projectRoot, 'dist/src/cli/services/cherry-pick-learnings.js'),
372
+ ];
373
+ const cherryPickPath = cherryPickPaths.find((p) => existsSync(p));
374
+ if (cherryPickPath) {
375
+ const mod = await import(`file://${cherryPickPath.replace(/\\/g, '/')}`);
376
+ if (typeof mod.cherryPickLearningsFromLegacy === 'function') {
377
+ const result = await mod.cherryPickLearningsFromLegacy({ projectRoot });
378
+ if (result.copied > 0) {
379
+ emitMutation(
380
+ 'copied learnings forward',
381
+ `${plural(result.copied, 'learning/knowledge entry')} cherry-picked from legacy db`,
382
+ );
383
+ }
384
+ // LEGACY-V2: One-time hint that legacy dirs are recoverable.
385
+ // Only emit when the user actually has legacy state — silent
386
+ // fast-path for fresh installs and consumers who already cleaned
387
+ // up. The legacy dirs are intentionally never auto-deleted; they
388
+ // exist as recovery sources for the cherry-pick (#851).
389
+ const hasLegacy =
390
+ existsSync(resolve(projectRoot, '.swarm', 'memory.db')) || // LEGACY-V2
391
+ existsSync(resolve(projectRoot, '.swarm', 'memory.db.bak')) || // LEGACY-V2
392
+ existsSync(resolve(projectRoot, '.claude-flow')); // LEGACY-V2
393
+ if (hasLegacy) {
394
+ emitMutation(
395
+ 'legacy .swarm/ + .claude-flow/ left in place', // LEGACY-V2
396
+ 'safe to delete — derived data rebuilds on demand',
397
+ );
398
+ }
399
+ }
400
+ }
401
+ } catch (err) {
402
+ try {
403
+ const msg = err && err.message ? err.message : String(err);
404
+ process.stderr.write(`cherry-pick learnings skipped: ${msg}\n`);
405
+ } catch { /* stderr write must not throw */ }
406
+ }
407
+
329
408
  const binDir = resolve(projectRoot, 'node_modules/moflo/bin');
330
409
 
331
410
  // ── Manifest-based auto-update ──────────────────────────────────────
@@ -342,23 +421,86 @@ try {
342
421
  // 3. That's it — cleanup is automatic on the next upgrade
343
422
  // ────────────────────────────────────────────────────────────────────
344
423
 
345
- // Load the previous manifest so we can diff after syncing
346
- let previousManifest = [];
347
- try { previousManifest = JSON.parse(readFileSync(manifestPath, 'utf-8')); } catch { /* ok */ }
424
+ // Load the previous manifest so we can diff after syncing.
425
+ // Both v1 (string[]) and v2 ({path,size}[]) are normalized to entries
426
+ // by readInstallManifest the cleanup loop only needs the path field.
427
+ const { entries: previousManifest } = readInstallManifest(manifestPath);
348
428
 
349
429
  // Track every file we install this round
350
430
  const currentManifest = [];
431
+ // Per-file copy failures used to be invisible (#854): a Windows file
432
+ // lock / AV real-time scan / concurrent helper invocation would EBUSY
433
+ // the copy, the bare catch swallowed it, and the file stayed at its
434
+ // pre-upgrade content forever because it was never recorded in the
435
+ // manifest. Surface failures on stderr — Claude Code captures
436
+ // session-start stderr as additionalContext so the user sees them too.
437
+ const syncFailures = [];
438
+
439
+ // Standard retry with exponential backoff + circuit breaker for the
440
+ // transient error class (EBUSY / EPERM / EACCES — Windows file lock,
441
+ // AV real-time scan, concurrent helper invocation). Hard errors
442
+ // (ENOENT, etc.) fall through immediately. Once 5 distinct files have
443
+ // exhausted retries the circuit opens and the tail of the sync runs
444
+ // with maxAttempts=1 so a sick host (AV mid-scan over node_modules)
445
+ // doesn't compound the wall-clock cost. Async setTimeout — never
446
+ // busy-wait in a session-start hook (CPU pinning during EBUSY backoff
447
+ // is the worst possible response when the OS is the bottleneck).
448
+ const TRANSIENT_CODES = new Set(['EBUSY', 'EPERM', 'EACCES']);
449
+ const RETRY_BACKOFF_MS = [50, 200, 800];
450
+ const CIRCUIT_BREAK_THRESHOLD = 5;
451
+ let circuitOpen = false;
452
+ const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
453
+ async function syncWithRetry(operation) {
454
+ const maxAttempts = circuitOpen ? 1 : RETRY_BACKOFF_MS.length + 1;
455
+ let lastErr = null;
456
+ let lastCode = null;
457
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
458
+ if (attempt > 0) await sleep(RETRY_BACKOFF_MS[attempt - 1]);
459
+ try {
460
+ operation();
461
+ return { ok: true };
462
+ } catch (err) {
463
+ lastErr = err;
464
+ lastCode = err && err.code ? err.code : null;
465
+ if (!TRANSIENT_CODES.has(lastCode)) break;
466
+ }
467
+ }
468
+ if (!circuitOpen && syncFailures.length + 1 >= CIRCUIT_BREAK_THRESHOLD) {
469
+ circuitOpen = true;
470
+ }
471
+ return { ok: false, err: lastErr, code: lastCode };
472
+ }
351
473
 
352
- /** Copy src → dest if src exists, record in manifest. */
353
- function syncFile(src, dest, manifestKey) {
354
- if (existsSync(src)) {
355
- try { copyFileSync(src, dest); currentManifest.push(manifestKey); } catch { /* non-fatal */ }
474
+ /** Copy src → dest if src exists, record `{path, size}` in manifest.
475
+ * Retries the transient error class with backoff (#854); failures land
476
+ * in syncFailures for the post-block stderr summary. The recorded size
477
+ * is read from the just-written destination so a subsequent launcher
478
+ * can detect content drift via size mismatch. */
479
+ function recordManifestEntry(manifestKey, dest) {
480
+ let size = null;
481
+ try { size = statSync(dest).size; } catch { /* size left null — drift check still works on file-existence */ }
482
+ currentManifest.push({ path: manifestKey, size });
483
+ }
484
+ async function syncFile(src, dest, manifestKey) {
485
+ if (!existsSync(src)) return;
486
+ const result = await syncWithRetry(() => copyFileSync(src, dest));
487
+ if (result.ok) {
488
+ recordManifestEntry(manifestKey, dest);
489
+ return;
356
490
  }
491
+ const tail = TRANSIENT_CODES.has(result.code)
492
+ ? ` (retried ${RETRY_BACKOFF_MS.length}× after ${result.code}${circuitOpen ? '; circuit open' : ''})`
493
+ : '';
494
+ syncFailures.push({ key: manifestKey, message: `${errMessage(result.err)}${tail}` });
357
495
  }
358
496
 
359
497
  // Version changed — sync scripts from bin/
360
498
  if (autoUpdateConfig.scripts) {
361
499
  const scriptsDir = resolve(projectRoot, '.claude/scripts');
500
+ // Ensure the destination dir exists — first-install consumers may
501
+ // not have it yet, in which case every copyFileSync below would
502
+ // silently ENOENT (#854).
503
+ if (!existsSync(scriptsDir)) mkdirSync(scriptsDir, { recursive: true });
362
504
  const scriptFiles = [
363
505
  'hooks.mjs', 'session-start-launcher.mjs', 'index-guidance.mjs',
364
506
  'build-embeddings.mjs', 'generate-code-map.mjs', 'semantic-search.mjs',
@@ -366,7 +508,7 @@ try {
366
508
  'setup-project.mjs', 'run-migrations.mjs',
367
509
  ];
368
510
  for (const file of scriptFiles) {
369
- syncFile(resolve(binDir, file), resolve(scriptsDir, file), `.claude/scripts/${file}`);
511
+ await syncFile(resolve(binDir, file), resolve(scriptsDir, file), `.claude/scripts/${file}`);
370
512
  }
371
513
 
372
514
  // Sync lib/ subdirectory (process-manager.mjs, registry-cleanup.cjs, etc.)
@@ -377,7 +519,7 @@ try {
377
519
  if (existsSync(libSrcDir)) {
378
520
  if (!existsSync(libDestDir)) mkdirSync(libDestDir, { recursive: true });
379
521
  for (const file of readdirSync(libSrcDir)) {
380
- syncFile(resolve(libSrcDir, file), resolve(libDestDir, file), `.claude/scripts/lib/${file}`);
522
+ await syncFile(resolve(libSrcDir, file), resolve(libDestDir, file), `.claude/scripts/lib/${file}`);
381
523
  }
382
524
  }
383
525
 
@@ -391,7 +533,8 @@ try {
391
533
  let migrationEntries;
392
534
  try {
393
535
  migrationEntries = readdirSync(migrationsSrcDir, { recursive: true, withFileTypes: true });
394
- } catch {
536
+ } catch (err) {
537
+ emitWarning(`migrations source readdir failed (${errMessage(err)})`);
395
538
  migrationEntries = [];
396
539
  }
397
540
  for (const entry of migrationEntries) {
@@ -400,8 +543,10 @@ try {
400
543
  const absSrc = resolve(parent, entry.name);
401
544
  const rel = absSrc.slice(migrationsSrcDir.length + 1).split(/[\\/]/).join('/');
402
545
  const absDest = resolve(migrationsDestDir, rel);
403
- try { mkdirSync(dirname(absDest), { recursive: true }); } catch { /* non-fatal */ }
404
- syncFile(absSrc, absDest, `.claude/scripts/migrations/${rel}`);
546
+ try { mkdirSync(dirname(absDest), { recursive: true }); } catch (err) {
547
+ emitWarning(`migrations subdir mkdir failed for ${rel} (${errMessage(err)})`);
548
+ }
549
+ await syncFile(absSrc, absDest, `.claude/scripts/migrations/${rel}`);
405
550
  }
406
551
  }
407
552
  }
@@ -416,7 +561,7 @@ try {
416
561
  'gate.cjs', 'gate-hook.mjs', 'prompt-hook.mjs', 'hook-handler.cjs',
417
562
  ];
418
563
  for (const file of binHelperFiles) {
419
- syncFile(resolve(binDir, file), resolve(helpersDir, file), `.claude/helpers/${file}`);
564
+ await syncFile(resolve(binDir, file), resolve(helpersDir, file), `.claude/helpers/${file}`);
420
565
  }
421
566
 
422
567
  // Other helpers from .claude/helpers/ and CLI .claude/helpers/
@@ -434,7 +579,19 @@ try {
434
579
  for (const srcDir of helperSources) {
435
580
  const src = resolve(srcDir, file);
436
581
  if (existsSync(src)) {
437
- try { copyFileSync(src, dest); currentManifest.push(`.claude/helpers/${file}`); } catch { /* non-fatal */ }
582
+ const inlineResult = await syncWithRetry(() => copyFileSync(src, dest));
583
+ if (inlineResult.ok) {
584
+ recordManifestEntry(`.claude/helpers/${file}`, dest);
585
+ } else {
586
+ const code = inlineResult.code;
587
+ const tail = TRANSIENT_CODES.has(code)
588
+ ? ` (retried ${RETRY_BACKOFF_MS.length}× after ${code}${circuitOpen ? '; circuit open' : ''})`
589
+ : '';
590
+ syncFailures.push({
591
+ key: `.claude/helpers/${file}`,
592
+ message: `${errMessage(inlineResult.err)}${tail}`,
593
+ });
594
+ }
438
595
  break; // first source wins
439
596
  }
440
597
  }
@@ -453,26 +610,31 @@ try {
453
610
  const dest = resolve(guidanceDir, file);
454
611
  const content = readFileSync(src, 'utf-8');
455
612
  writeFileSync(dest, sessionStartMirrorHeader(file) + content);
456
- currentManifest.push(`.claude/guidance/${file}`);
613
+ recordManifestEntry(`.claude/guidance/${file}`, dest);
457
614
  }
458
- } catch { /* non-fatal */ }
615
+ } catch (err) {
616
+ emitWarning(`shipped guidance sync failed (${errMessage(err)})`);
617
+ }
459
618
  }
460
619
 
461
620
  // ── Clean up files we installed previously but no longer ship ──
462
621
  // Only remove files that are in the OLD manifest but NOT in the new one.
463
622
  // This ensures we never delete user-created or runtime-generated files.
623
+ // Both v1 (string) and v2 ({path,size}) old entries are normalized to
624
+ // entries by readInstallManifest; we only need the path for cleanup.
464
625
  let removedFiles = 0;
465
626
  if (previousManifest.length > 0) {
466
- const currentSet = new Set(currentManifest);
467
- for (const rel of previousManifest) {
468
- if (!currentSet.has(rel)) {
469
- const abs = resolve(projectRoot, rel);
470
- try {
471
- if (existsSync(abs)) {
472
- unlinkSync(abs);
473
- removedFiles++;
474
- }
475
- } catch { /* non-fatal */ }
627
+ const currentSet = new Set(currentManifest.map((e) => e.path));
628
+ for (const { path: rel } of previousManifest) {
629
+ if (currentSet.has(rel)) continue;
630
+ const abs = resolve(projectRoot, rel);
631
+ try {
632
+ if (existsSync(abs)) {
633
+ unlinkSync(abs);
634
+ removedFiles++;
635
+ }
636
+ } catch (err) {
637
+ emitWarning(`cleanup unlink failed for ${rel} (${errMessage(err)})`);
476
638
  }
477
639
  }
478
640
  }
@@ -480,21 +642,22 @@ try {
480
642
  emitMutation('cleaned up retired files', `${removedFiles} removed`);
481
643
  }
482
644
 
483
- // Recycle the running daemon its in-process module cache holds the
484
- // previous moflo image. After an upgrade that cache is stale, which
485
- // shows up as warnings from removed code paths (e.g. the
486
- // `[neural-tools] @moflo/embeddings not resolvable` spam from #639,
487
- // emitted by pre-#592 collapse code that no longer exists in source)
488
- // and means freshly-disabled workers keep running.
489
- //
490
- // `daemon.autoStart` only governs the cold-start case (no daemon
491
- // existed); here a daemon was actually running, so replacing it with a
492
- // current-code copy is the desired behaviour regardless of that flag.
493
- try {
494
- if (recycleDaemon(resolve(projectRoot, '.moflo', 'daemon.lock'), 'daemon-recycle')) {
495
- emitMutation('recycled daemon', 'load fresh moflo code');
496
- }
497
- } catch { /* non-fatal — daemon recycle is best-effort */ }
645
+ // The daemon was already stopped above so the lock file is gone and
646
+ // there's no live PID to recycle here. Section 4's `hooks.mjs
647
+ // session-start` will spawn a fresh daemon under the current moflo
648
+ // image once 3g writes the version stamp.
649
+
650
+ // Surface per-file copy failures so the user / Claude can see what
651
+ // didn't sync (#854). The file isn't in the manifest either, so the
652
+ // next-upgrade cleanup pass can never reconcile it on its own —
653
+ // direct the user at `flo doctor --fix` as the compensating healer.
654
+ if (syncFailures.length > 0) {
655
+ const sample = syncFailures.slice(0, 5).map((f) => ` - ${f.key}: ${f.message}`).join('\n');
656
+ const more = syncFailures.length > 5 ? `\n …and ${syncFailures.length - 5} more` : '';
657
+ emitWarning(
658
+ `${plural(syncFailures.length, 'file')} failed to sync during upgrade — run 'flo doctor --fix' to repair:\n${sample}${more}`,
659
+ );
660
+ }
498
661
 
499
662
  // Manifest reflects synced files immediately; version stamp is deferred
500
663
  // to 3g so an aborted launcher re-runs upgrade detection (#730).
@@ -503,11 +666,19 @@ try {
503
666
  if (!existsSync(cfDir)) mkdirSync(cfDir, { recursive: true });
504
667
  writeFileSync(manifestPath, JSON.stringify(currentManifest, null, 2));
505
668
  pendingVersionStampWrite = { path: versionStampPath, version: installedVersion };
506
- } catch {}
669
+ } catch (err) {
670
+ // #854: manifest write must surface — without it the next launcher
671
+ // can't tell what was installed and the version stamp never gets
672
+ // queued for 3g.
673
+ emitWarning(`manifest write failed (${errMessage(err)})`);
674
+ }
507
675
  }
508
676
  }
509
- } catch {
510
- // Non-fatal scripts will still work, just may be stale
677
+ } catch (err) {
678
+ // #854: bare catches here hid upgrade regressions across multiple 4.8.x
679
+ // bumps. We keep the catch so a single throw doesn't crash the launcher,
680
+ // but we never silence it.
681
+ emitWarning(`upgrade section failed (${errMessage(err)})`);
511
682
  }
512
683
 
513
684
  // ── 3a-pre. Recycle daemons started before the current moflo install ────────
@@ -654,14 +825,19 @@ try {
654
825
  settingsChanges.push(`repaired ${plural(repaired.length, 'hook wiring')}`);
655
826
  }
656
827
  }
657
- } catch { /* non-fatal — doctor can still fix later */ }
828
+ } catch (err) {
829
+ emitWarning(`hook-wiring repair skipped (${errMessage(err)})`);
830
+ }
658
831
 
659
832
  if (dirty) {
660
833
  writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
661
834
  emitMutation('updated .claude/settings.json', settingsChanges.join(', '));
662
835
  }
663
836
  }
664
- } catch { /* non-fatal — stale hooks won't block session, just emit warnings */ }
837
+ } catch (err) {
838
+ // #854: silent fail-loop hid hook breakage — surface so the user can act.
839
+ emitWarning(`settings.json migration failed (${errMessage(err)})`);
840
+ }
665
841
 
666
842
  // ── 3b. Ensure shipped guidance files exist (even without version change) ──
667
843
  // Subagents need these files on disk for direct reads without memory search.
@@ -706,7 +882,9 @@ try {
706
882
  }
707
883
  }
708
884
  }
709
- } catch { /* non-fatal */ }
885
+ } catch (err) {
886
+ emitWarning(`shipped guidance restore failed (${errMessage(err)})`);
887
+ }
710
888
 
711
889
  // ── 3b-714. Retire legacy `.swarm/vector-stats.json` parallel write (#714) ─
712
890
  // `.moflo/vector-stats.json` is canonical post-#699; pre-#714 builds also
@@ -896,11 +1074,15 @@ if (upgradeNoticeContext) {
896
1074
 
897
1075
  // ── 3g. Commit deferred version stamp (#730) ────────────────────────────────
898
1076
  // Written LAST so an abort above leaves the stamp unchanged and the next
899
- // launcher re-detects the upgrade.
1077
+ // launcher re-detects the upgrade. Failure here is surfaced (#854) so a
1078
+ // permanently-broken stamp write (filesystem permissions, AV holds) doesn't
1079
+ // silently strand consumers in re-detect-on-every-session loops.
900
1080
  if (pendingVersionStampWrite) {
901
1081
  try {
902
1082
  writeFileSync(pendingVersionStampWrite.path, pendingVersionStampWrite.version);
903
- } catch { /* non-fatal — next launcher re-detects + retries the upgrade */ }
1083
+ } catch (err) {
1084
+ emitWarning(`version stamp write failed (${errMessage(err)}) — next launcher will re-detect the upgrade`);
1085
+ }
904
1086
  }
905
1087
 
906
1088
  // Bypasses emitMutation — framing, not a mutation, so it must not inflate the count.