moflo 4.9.37 → 4.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.claude/guidance/shipped/moflo-memory-protocol.md +5 -1
  2. package/.claude/guidance/shipped/moflo-memorydb-maintenance.md +22 -11
  3. package/.claude/guidance/shipped/moflo-root-cause-discipline.md +47 -0
  4. package/.claude/helpers/statusline.cjs +69 -33
  5. package/.claude/helpers/subagent-bootstrap.json +1 -1
  6. package/.claude/helpers/subagent-start.cjs +1 -1
  7. package/bin/build-embeddings.mjs +6 -20
  8. package/bin/cli.js +5 -0
  9. package/bin/generate-code-map.mjs +4 -24
  10. package/bin/hooks.mjs +3 -12
  11. package/bin/index-all.mjs +3 -13
  12. package/bin/index-guidance.mjs +36 -85
  13. package/bin/index-patterns.mjs +6 -24
  14. package/bin/index-tests.mjs +4 -23
  15. package/bin/lib/db-repair.mjs +358 -62
  16. package/bin/lib/get-backend.mjs +306 -0
  17. package/bin/lib/incremental-write.mjs +27 -7
  18. package/bin/lib/moflo-paths.mjs +64 -4
  19. package/bin/lib/suppress-sqlite-warning.mjs +57 -0
  20. package/bin/migrations/knowledge-purge.mjs +7 -8
  21. package/bin/migrations/knowledge-to-learnings.mjs +7 -9
  22. package/bin/migrations/purge-doc-entries.mjs +7 -8
  23. package/bin/migrations/strip-context-preambles.mjs +4 -6
  24. package/bin/run-migrations.mjs +1 -10
  25. package/bin/semantic-search.mjs +7 -18
  26. package/bin/session-start-launcher.mjs +144 -108
  27. package/bin/simplify-classify.cjs +38 -17
  28. package/dist/src/cli/commands/daemon.js +38 -11
  29. package/dist/src/cli/commands/doctor-checks-config.js +60 -0
  30. package/dist/src/cli/commands/doctor-checks-coverage-truth.js +136 -0
  31. package/dist/src/cli/commands/doctor-checks-memory-access.js +146 -86
  32. package/dist/src/cli/commands/doctor-checks-memory.js +13 -18
  33. package/dist/src/cli/commands/doctor-checks-version-skew.js +94 -0
  34. package/dist/src/cli/commands/doctor-checks-writers-audit.js +170 -0
  35. package/dist/src/cli/commands/doctor-embedding-hygiene.js +3 -15
  36. package/dist/src/cli/commands/doctor-fixes.js +87 -0
  37. package/dist/src/cli/commands/doctor-registry.js +24 -1
  38. package/dist/src/cli/commands/doctor.js +1 -1
  39. package/dist/src/cli/commands/embeddings.js +17 -22
  40. package/dist/src/cli/commands/memory.js +13 -23
  41. package/dist/src/cli/embeddings/persistent-cache.js +44 -83
  42. package/dist/src/cli/init/moflo-init.js +40 -0
  43. package/dist/src/cli/mcp-tools/memory-tools.js +10 -3
  44. package/dist/src/cli/memory/bridge-core.js +256 -30
  45. package/dist/src/cli/memory/bridge-embedder.js +84 -3
  46. package/dist/src/cli/memory/bridge-entries.js +70 -6
  47. package/dist/src/cli/memory/controller-registry.js +7 -2
  48. package/dist/src/cli/memory/controllers/batch-operations.js +5 -1
  49. package/dist/src/cli/memory/controllers/hierarchical-memory.js +7 -2
  50. package/dist/src/cli/memory/controllers/mutation-guard.js +22 -2
  51. package/dist/src/cli/memory/daemon-backend.js +400 -0
  52. package/dist/src/cli/memory/daemon-write-client.js +192 -15
  53. package/dist/src/cli/memory/database-provider.js +57 -40
  54. package/dist/src/cli/memory/hnsw-persistence.js +6 -8
  55. package/dist/src/cli/memory/index.js +0 -1
  56. package/dist/src/cli/memory/memory-bridge.js +40 -8
  57. package/dist/src/cli/memory/memory-initializer.js +271 -211
  58. package/dist/src/cli/memory/rvf-migration.js +25 -11
  59. package/dist/src/cli/memory/sqlite-backend.js +573 -0
  60. package/dist/src/cli/memory/suppress-sqlite-warning.js +49 -0
  61. package/dist/src/cli/services/cherry-pick-learnings.js +32 -21
  62. package/dist/src/cli/services/daemon-dashboard.js +13 -1
  63. package/dist/src/cli/services/daemon-lock.js +58 -1
  64. package/dist/src/cli/services/daemon-memory-rpc.js +245 -10
  65. package/dist/src/cli/services/embeddings-migration.js +9 -12
  66. package/dist/src/cli/services/ephemeral-namespace-purge.js +21 -16
  67. package/dist/src/cli/services/learning-service.js +12 -20
  68. package/dist/src/cli/services/memory-db-integrity-repair.js +119 -0
  69. package/dist/src/cli/services/project-root.js +69 -9
  70. package/dist/src/cli/services/soft-delete-purge.js +6 -11
  71. package/dist/src/cli/services/sqljs-migration-store.js +4 -1
  72. package/dist/src/cli/services/subagent-bootstrap.js +1 -1
  73. package/dist/src/cli/shared/events/event-store.js +26 -55
  74. package/dist/src/cli/version.js +1 -1
  75. package/package.json +2 -4
  76. package/dist/src/cli/memory/sqljs-backend.js +0 -643
@@ -9,44 +9,44 @@ import * as fs from 'fs';
9
9
  import * as crypto from 'crypto';
10
10
  import { atomicWriteFileSync } from '../services/atomic-file-write.js';
11
11
  import { legacyMemoryDbPath, memoryDbPath, MOFLO_DIR, } from '../services/moflo-paths.js';
12
+ import { findProjectRoot } from '../services/project-root.js';
12
13
  // When run via npx, CWD may be node_modules/moflo — walk up to find actual project
13
14
  let _projectRoot;
14
15
  /**
15
16
  * Reset the cached project root. Tests that change `process.cwd()` or
16
17
  * `process.env.CLAUDE_PROJECT_DIR` between cases must call this to avoid
17
18
  * leaking state across tests.
19
+ *
20
+ * Also drops the bridge-coherence cursor (#1058) so a test that re-points the
21
+ * project root doesn't inherit a stale mtime anchor from the previous root.
18
22
  */
19
23
  export function _resetProjectRootForTest() {
20
24
  _projectRoot = undefined;
25
+ lastSeenMtimeMs = null;
21
26
  }
27
+ /**
28
+ * Test seam (#1058): peek at the bridge-coherence cursor. Production callers
29
+ * never invoke this; tests assert that own writes update the anchor and that
30
+ * another writer's mtime bump triggers reload.
31
+ */
32
+ export function _getBridgeCoherenceCursorForTest() {
33
+ return lastSeenMtimeMs;
34
+ }
35
+ /**
36
+ * Resolve the bridge's project root.
37
+ *
38
+ * Delegates to the canonical resolver in `src/cli/services/project-root.ts`
39
+ * (twin: `bin/lib/moflo-paths.mjs:findProjectRoot()`). The bridge keeps a
40
+ * module-level cache so the hot path (every withDb call) doesn't redo the
41
+ * stat sweep. Tests reset via {@link _resetProjectRootForTest}.
42
+ *
43
+ * If you find yourself wanting to inline a custom walk here, STOP — every
44
+ * divergent walk creates a new path-mismatch bug class (see #1057 / #1058).
45
+ */
22
46
  function getProjectRoot() {
23
47
  if (_projectRoot)
24
48
  return _projectRoot;
25
- if (process.env.CLAUDE_PROJECT_DIR) {
26
- _projectRoot = process.env.CLAUDE_PROJECT_DIR;
27
- return _projectRoot;
28
- }
29
- let dir = process.cwd();
30
- const root = path.parse(dir).root;
31
- while (dir !== root) {
32
- // `.moflo/moflo.db` is the canonical post-#727 marker. Older consumers
33
- // mid-migration may still only have `.swarm/memory.db`; recognise both
34
- // so the bridge can find the project root either way.
35
- if (fs.existsSync(memoryDbPath(dir)) || fs.existsSync(legacyMemoryDbPath(dir))) {
36
- _projectRoot = dir;
37
- return _projectRoot;
38
- }
39
- if (fs.existsSync(path.join(dir, 'CLAUDE.md')) && fs.existsSync(path.join(dir, 'package.json'))) {
40
- _projectRoot = dir;
41
- return _projectRoot;
42
- }
43
- if (path.basename(dir) === 'node_modules') {
44
- dir = path.dirname(dir);
45
- continue;
46
- }
47
- dir = path.dirname(dir);
48
- }
49
- _projectRoot = process.cwd();
49
+ _projectRoot = findProjectRoot();
50
50
  return _projectRoot;
51
51
  }
52
52
  import { ControllerRegistry } from './controller-registry.js';
@@ -57,6 +57,20 @@ let registryPromise = null;
57
57
  let resolvedRegistry = null;
58
58
  let lastBridgeError = null;
59
59
  const schemaInitialized = new WeakSet();
60
+ /**
61
+ * Last-known disk mtime for the bridge's dbPath. Anchors the bridge-coherence
62
+ * check (story #1058 / epic #1054): when another process writes to disk, its
63
+ * persist bumps mtime past this value; the next withDb call shuts the bridge
64
+ * down so getRegistry re-reads fresh from disk.
65
+ *
66
+ * Set after every successful persist (own writes; no self-invalidation) and
67
+ * after every successful registry init (anchor to load-time disk state).
68
+ * Reset to null when the bridge is shut down so the next init re-anchors.
69
+ *
70
+ * Module-level because the bridge itself is process-wide singleton state —
71
+ * matches the existing `registryPromise` lifecycle.
72
+ */
73
+ let lastSeenMtimeMs = null;
60
74
  /** Controllers every moflodb_* MCP tool assumes are present when the bridge is available. */
61
75
  export const REQUIRED_BRIDGE_CONTROLLERS = Object.freeze([
62
76
  'hierarchicalMemory',
@@ -81,6 +95,61 @@ export function logBridgeError(context, err, opts) {
81
95
  const msg = errorDetail(err);
82
96
  console.error(`[moflo] ${context}: ${msg}`);
83
97
  }
98
+ /**
99
+ * Treats an error as a SQLITE_BUSY lock-contention failure if either the
100
+ * error code or message indicates it. Belt-and-suspenders around node:sqlite,
101
+ * whose surface intermittently surfaces busy-conflicts as either `code:
102
+ * 'SQLITE_BUSY'` or a plain `Error: database is locked`. We match both.
103
+ */
104
+ function isBusyError(err) {
105
+ if (!err || typeof err !== 'object')
106
+ return false;
107
+ const e = err;
108
+ if (e.code === 'SQLITE_BUSY' || e.code === 'SQLITE_BUSY_SNAPSHOT' || e.code === 'SQLITE_BUSY_RECOVERY')
109
+ return true;
110
+ return typeof e.message === 'string' && /database is locked|SQLITE_BUSY/i.test(e.message);
111
+ }
112
+ // Exponential backoff with jitter. Total ceiling ≈ 1.55s of waiting (50 +
113
+ // 100 + 200 + 400 + 800), plus the work itself. Sized so a typical short
114
+ // indexer write (a few rows in auto-commit) finishes before we give up,
115
+ // without ballooning bridge latency on a really stuck DB. See #1098.
116
+ const BRIDGE_BUSY_RETRY_DELAYS_MS = [50, 100, 200, 400, 800];
117
+ /**
118
+ * Run `fn` with a jittered exponential-backoff retry on SQLITE_BUSY errors.
119
+ *
120
+ * Why this exists: in CI the bridge's parallel doctor-subcheck workload hit
121
+ * "database is locked" 5–7 times in a 5ms window while the configured
122
+ * `busy_timeout=15000ms` should have been retrying for full seconds (#1098).
123
+ * The hypothesis-in-flight is that `node:sqlite`'s `db.prepare()` bypasses
124
+ * the engine-level `busy_handler`, so the busy_timeout pragma never engages
125
+ * for the bridge's prepare-heavy call patterns. Until that's confirmed
126
+ * (#1098 follow-up — local repro), an explicit retry here is the only
127
+ * guard between the consumer and a hard fail.
128
+ *
129
+ * Jitter scatters parallel retries so the workload doesn't thunder back
130
+ * onto the same lock at the same instant.
131
+ */
132
+ async function withBusyRetry(fn) {
133
+ let lastErr = null;
134
+ for (let attempt = 0; attempt <= BRIDGE_BUSY_RETRY_DELAYS_MS.length; attempt++) {
135
+ if (attempt > 0) {
136
+ const base = BRIDGE_BUSY_RETRY_DELAYS_MS[attempt - 1];
137
+ const jitter = base * (Math.random() * 0.5 - 0.25); // ±25%
138
+ const delay = Math.max(0, Math.round(base + jitter));
139
+ await new Promise((resolve) => setTimeout(resolve, delay));
140
+ }
141
+ try {
142
+ return await fn();
143
+ }
144
+ catch (err) {
145
+ lastErr = err;
146
+ if (!isBusyError(err))
147
+ throw err;
148
+ // Loop continues — backoff applied at top of next iteration.
149
+ }
150
+ }
151
+ throw lastErr;
152
+ }
84
153
  /**
85
154
  * Resolve the on-disk DB path the bridge should read/write.
86
155
  *
@@ -215,7 +284,34 @@ export function persistBridgeDb(db, dbPath) {
215
284
  return;
216
285
  try {
217
286
  fs.mkdirSync(path.dirname(target), { recursive: true });
218
- atomicWriteFileSync(target, db.export());
287
+ // Phase 4 (#1083) — node:sqlite-backed handles persist incrementally via
288
+ // WAL; `save()` on the factory adapter is a no-op for them. Doing the
289
+ // sql.js-style `export()` + `atomicWriteFileSync` against a node:sqlite
290
+ // handle would CLOBBER the WAL writes (the catastrophic case the epic
291
+ // was killing). Route through `save()` when the handle is the factory
292
+ // shape; only fall back to the legacy export-and-write for raw sql.js.
293
+ if (db && db.kind === 'node-sqlite' && typeof db.save === 'function') {
294
+ db.save();
295
+ }
296
+ else {
297
+ atomicWriteFileSync(target, db.export());
298
+ }
299
+ // Anchor the bridge-coherence cursor to the post-persist mtime so our own
300
+ // write doesn't trigger a self-invalidation on the next withDb call.
301
+ // Under WAL the write lands in `-wal` (not the main file), so include
302
+ // its mtime — must match the read side of checkBridgeCoherence or self-
303
+ // writes self-invalidate (#1098).
304
+ try {
305
+ let anchored = fs.statSync(target).mtimeMs;
306
+ try {
307
+ const walStat = fs.statSync(`${target}-wal`);
308
+ if (walStat.mtimeMs > anchored)
309
+ anchored = walStat.mtimeMs;
310
+ }
311
+ catch { /* no WAL sidecar — main mtime is authoritative */ }
312
+ lastSeenMtimeMs = anchored;
313
+ }
314
+ catch { /* tolerate; coherence check re-anchors on next read */ }
219
315
  }
220
316
  catch (err) {
221
317
  logBridgeError('bridge persist failed', err, { alwaysLog: true });
@@ -280,20 +376,138 @@ export function getDb(registry) {
280
376
  }
281
377
  return { db, mofloDb };
282
378
  }
379
+ /**
380
+ * Bridge coherence check (story #1058 / epic #1054 — read-side symmetry to
381
+ * #981 single-writer). sql.js holds an in-memory DB snapshot per process and
382
+ * never re-reads disk after init, so any long-lived process — daemon or
383
+ * not — returns stale rows when another writer has touched the file since
384
+ * this process loaded its snapshot.
385
+ *
386
+ * Solution: stat the dbPath before every bridge op; if the mtime has advanced
387
+ * past our last-known value, another writer has touched the file — drop the
388
+ * bridge so `getRegistry` re-loads from disk on the next call.
389
+ *
390
+ * The daemon participates in this check too. #1058 originally exempted the
391
+ * daemon under "daemon is the sole writer", but that assumption breaks every
392
+ * session-start: `bin/index-guidance.mjs`, migration runners, and repair
393
+ * tools all write directly to `.moflo/moflo.db` while the daemon is up
394
+ * (epic #1057 calls these out as in-scope writers to coordinate). Without
395
+ * the daemon doing the check, daemon-routed MCP reads served the pre-init
396
+ * snapshot indefinitely, hiding the indexer's chunks from `memory_search` /
397
+ * `memory_get_neighbors` until the daemon process restarted (#1073, smoke).
398
+ *
399
+ * Self-invalidation is still suppressed: `persistBridgeDb` anchors
400
+ * `lastSeenMtimeMs` to the post-write mtime, so the daemon's own writes never
401
+ * trip the reload. External writers — whose touches advance mtime past the
402
+ * anchor — do.
403
+ */
404
+ async function checkBridgeCoherence(dbPath) {
405
+ // No registry yet → nothing to invalidate; first init will anchor the cursor.
406
+ if (!registryPromise)
407
+ return;
408
+ const target = dbPath ? path.resolve(dbPath) : getDbPath();
409
+ if (target === ':memory:')
410
+ return;
411
+ // Under WAL (Phase 5 / #1083), commits land in the `-wal` sidecar first —
412
+ // the main DB file's mtime ONLY advances on checkpoint, which may be many
413
+ // writes apart. Statting just the main file misses every external WAL
414
+ // write between checkpoints, leaving the bridge with a stale in-memory
415
+ // snapshot indefinitely. That's the failure mode in #1098 / #1073 smoke
416
+ // where doctor's seed-via-openDaemonDatabase then bridge-via-MCP couldn't
417
+ // see its own freshly-written rows. Stat both files and use whichever is
418
+ // most recent. Mirrors the same fix in `refreshVectorStatsCache`.
419
+ let mtimeMs = 0;
420
+ try {
421
+ mtimeMs = fs.statSync(target).mtimeMs;
422
+ }
423
+ catch {
424
+ // File missing or unreadable — fall through. Downstream withDb surfaces
425
+ // the error; we don't synthesize a coherence event from a stat failure.
426
+ return;
427
+ }
428
+ try {
429
+ const walStat = fs.statSync(`${target}-wal`);
430
+ if (walStat.mtimeMs > mtimeMs)
431
+ mtimeMs = walStat.mtimeMs;
432
+ }
433
+ catch { /* no WAL sidecar yet — main mtime is authoritative */ }
434
+ if (lastSeenMtimeMs == null) {
435
+ // First op after init — anchor and proceed.
436
+ lastSeenMtimeMs = mtimeMs;
437
+ return;
438
+ }
439
+ if (mtimeMs > lastSeenMtimeMs) {
440
+ // Another process wrote since we loaded. Drop the bridge so the next
441
+ // `getRegistry` call re-initializes from fresh disk. Reset the cursor;
442
+ // the post-reload anchor (after `getRegistry` succeeds) re-sets it.
443
+ await shutdownBridge();
444
+ lastSeenMtimeMs = null;
445
+ }
446
+ }
283
447
  /**
284
448
  * Resolve registry + db, run fn, return null on any unexpected failure so
285
449
  * the caller falls back to raw sql.js. Errors are logged to stderr —
286
450
  * silently swallowing them previously masked real bugs in bridge-entries.ts.
451
+ *
452
+ * Bridge coherence (#1058): every entry through this gate checks whether the
453
+ * dbPath's mtime has advanced past our last-known value; if so, the bridge is
454
+ * torn down so the next op reads fresh disk state. Daemon participates in the
455
+ * check; its own writes anchor `lastSeenMtimeMs` via `persistBridgeDb` so
456
+ * self-fire is suppressed.
287
457
  */
288
458
  export async function withDb(dbPath, fn) {
459
+ await checkBridgeCoherence(dbPath);
289
460
  const registry = await getRegistry(dbPath);
290
461
  if (!registry)
291
462
  return null;
292
463
  const ctx = getDb(registry);
293
464
  if (!ctx)
294
465
  return null;
466
+ // Anchor the coherence cursor to load-time disk state once the registry is
467
+ // resolved. The post-init read of `mofloDb.database` reflects the bytes
468
+ // that were on disk when `openSqlJsDatabase` ran; pin the matching mtime so
469
+ // a subsequent unrelated process write triggers reload, not a self-fire.
470
+ // Include `-wal` since WAL writes don't bump the main file mtime (#1098).
471
+ const target = dbPath ? path.resolve(dbPath) : getDbPath();
472
+ if (lastSeenMtimeMs == null && target !== ':memory:') {
473
+ try {
474
+ let anchor = fs.statSync(target).mtimeMs;
475
+ try {
476
+ const walStat = fs.statSync(`${target}-wal`);
477
+ if (walStat.mtimeMs > anchor)
478
+ anchor = walStat.mtimeMs;
479
+ }
480
+ catch { /* no WAL sidecar — main mtime is authoritative */ }
481
+ lastSeenMtimeMs = anchor;
482
+ }
483
+ catch { /* file may not exist yet — first persist will anchor */ }
484
+ }
295
485
  try {
296
- return await fn(ctx, registry);
486
+ const result = await withBusyRetry(() => fn(ctx, registry));
487
+ // Re-anchor the coherence cursor to the post-op mtime so internal
488
+ // bridge writes that happen AFTER persistBridgeDb (attestation log,
489
+ // bumpAccessCounts, cache invalidation row updates, etc.) don't
490
+ // look like external writes on the next withDb call. Without this
491
+ // re-anchor, the next call's checkBridgeCoherence sees the
492
+ // attestation-advanced -wal mtime, tears down the registry, and
493
+ // any test-injected stubs (cache.set, etc.) get reset — exactly
494
+ // the failure mode in `bridge-entries.test.ts` #994 after the
495
+ // WAL-coherence fix (49f91a01a). External writes still get
496
+ // detected at the START of the next withDb call.
497
+ if (target !== ':memory:') {
498
+ try {
499
+ let anchor = fs.statSync(target).mtimeMs;
500
+ try {
501
+ const walStat = fs.statSync(`${target}-wal`);
502
+ if (walStat.mtimeMs > anchor)
503
+ anchor = walStat.mtimeMs;
504
+ }
505
+ catch { /* no WAL sidecar */ }
506
+ lastSeenMtimeMs = anchor;
507
+ }
508
+ catch { /* tolerate; coherence check re-anchors on next read */ }
509
+ }
510
+ return result;
297
511
  }
298
512
  catch (err) {
299
513
  logBridgeError('bridge operation failed', err);
@@ -313,6 +527,9 @@ export async function shutdownBridge() {
313
527
  const registry = await registryPromise;
314
528
  registryPromise = null;
315
529
  resolvedRegistry = null;
530
+ // Drop the coherence cursor too — the next init will re-anchor against
531
+ // whatever's on disk by then.
532
+ lastSeenMtimeMs = null;
316
533
  if (registry) {
317
534
  try {
318
535
  await registry.shutdown();
@@ -388,9 +605,12 @@ export function refreshVectorStatsCache(dbPathOverride) {
388
605
  const existing = readExistingVectorStats(root);
389
606
  // Mtime short-circuit (#639 perf): refreshVectorStatsCache fires on every
390
607
  // bridge store/delete. When the on-disk DB hasn't changed since we last
391
- // wrote the cache (the common case mid-sessionbridge writes don't
392
- // touch the file until persist), running 3 COUNT queries is wasted work.
393
- // Skip the rest entirely.
608
+ // wrote the cache, running 3 COUNT queries is wasted work skip the rest.
609
+ //
610
+ // Phase 4 (#1083) flipped the engine to node:sqlite + WAL: every commit
611
+ // lands in the `-wal` sidecar (mtime advances there), not the main file.
612
+ // Stat both so a write to either invalidates the cache. The `-shm` file
613
+ // is not load-bearing — it tracks WAL readers, not committed writes.
394
614
  let dbMtimeMs = 0;
395
615
  let dbSizeKB = 0;
396
616
  try {
@@ -399,6 +619,12 @@ export function refreshVectorStatsCache(dbPathOverride) {
399
619
  dbSizeKB = Math.floor(stat.size / 1024);
400
620
  }
401
621
  catch { /* file may not exist */ }
622
+ try {
623
+ const walStat = fs.statSync(`${dbFile}-wal`);
624
+ if (walStat.mtimeMs > dbMtimeMs)
625
+ dbMtimeMs = walStat.mtimeMs;
626
+ }
627
+ catch { /* no WAL sidecar — fine, dbMtimeMs already covers it */ }
402
628
  if (existing &&
403
629
  typeof existing.updatedAt === 'number' &&
404
630
  typeof existing.vectorCount === 'number' &&
@@ -54,9 +54,14 @@ export const EMBEDDING_MODEL_LEGACY_DEFAULT = 'local';
54
54
  * - `epic-state` — Epic progress (epic-N, story-M) written by commands/epic.ts
55
55
  * - `test-bridge-fix` — Single 2026-04-23 row left over from a one-off test
56
56
  *
57
+ * Membership is also extended by {@link EPHEMERAL_NAMESPACE_PREFIXES} for
58
+ * dynamic-name namespaces (e.g. `doctor-memprobe-<persona>`). Most callers
59
+ * should use {@link isEphemeralNamespace} which checks both sets.
60
+ *
57
61
  * See story #729 for the source-trace and rationale. The session-start
58
- * launcher only purges {@link PURGE_ON_SESSION_START_NAMESPACES} — a strict
59
- * subset that *excludes* `tasklist`, because the dashboard's Flo Runs tab
62
+ * launcher only purges {@link PURGE_ON_SESSION_START_NAMESPACES} +
63
+ * {@link PURGE_ON_SESSION_START_PREFIXES} — a strict subset that *excludes*
64
+ * `tasklist`, because the dashboard's Flo Runs tab
60
65
  * (`daemon-dashboard.ts handleSpells`) reads tasklist; purging it on every
61
66
  * session would empty the tab between sessions (#968).
62
67
  */
@@ -66,6 +71,26 @@ export const EPHEMERAL_NAMESPACES = new Set([
66
71
  'epic-state',
67
72
  'test-bridge-fix',
68
73
  ]);
74
+ /**
75
+ * Prefix patterns that extend {@link EPHEMERAL_NAMESPACES} for namespaces
76
+ * whose suffix is generated at runtime. Any namespace beginning with one of
77
+ * these prefixes is treated as ephemeral (skips embedding).
78
+ *
79
+ * NOTE — design distinction from {@link PURGE_ON_SESSION_START_PREFIXES}:
80
+ * a namespace can be auto-purgeable WITHOUT being skip-embed. For example,
81
+ * `doctor-memprobe-<persona>` rows are intentionally purged on every
82
+ * session start (the cleanup is best-effort and accumulates across
83
+ * sessions) but MUST still get embeddings — the probe's whole purpose is
84
+ * to validate the embedder is wired (`Memory Access Functional` check
85
+ * asserts `hasEmbedding=true`). Skipping embedding for those rows breaks
86
+ * the doctor check. Put a prefix here only when both properties apply.
87
+ *
88
+ * Currently empty — there's no namespace today that needs both skip-embed
89
+ * AND prefix-match. Kept as an explicit export so the bridge embedder's
90
+ * call site is uniform and future skip-embed prefixes have an obvious
91
+ * home.
92
+ */
93
+ export const EPHEMERAL_NAMESPACE_PREFIXES = new Set([]);
69
94
  /**
70
95
  * Subset of {@link EPHEMERAL_NAMESPACES} that the session-start launcher
71
96
  * hard-purges via `services/ephemeral-namespace-purge.ts`. Excludes
@@ -77,6 +102,62 @@ export const PURGE_ON_SESSION_START_NAMESPACES = new Set([
77
102
  'epic-state',
78
103
  'test-bridge-fix',
79
104
  ]);
105
+ /**
106
+ * Prefix patterns purged alongside {@link PURGE_ON_SESSION_START_NAMESPACES}
107
+ * by the session-start launcher.
108
+ *
109
+ * Members:
110
+ * - `doctor-memprobe-` — `flo healer`'s `Memory Access` round-trip probe
111
+ * writes a sentinel into `doctor-memprobe-<persona>` (persona is one of
112
+ * `subagent`, `swarm-agent`, `hive-mind-worker`, plus test variants).
113
+ * - `doctor-neighbors-` — `flo healer`'s neighbor-traversal probe creates a
114
+ * fresh `doctor-neighbors-<timestamp>` namespace for each run and seeds
115
+ * three chunk rows. Unlike memprobe (fixed personas), every healer run
116
+ * spawns a NEW namespace, so namespace pollution grows linearly with
117
+ * healer-run count if cleanup races fail.
118
+ *
119
+ * Both probes register an explicit cleanup via `safeDelete`, but the
120
+ * cleanup is best-effort and silently swallows failures (e.g. daemon
121
+ * races, MCP transport errors) — so rows accumulate across consumer
122
+ * sessions. Auto-purging matches the pattern for
123
+ * `hive-mind`/`epic-state`/`test-bridge-fix`. These rows MUST still get
124
+ * embeddings (see {@link EPHEMERAL_NAMESPACE_PREFIXES} for why) — only
125
+ * their persistence across sessions is curtailed.
126
+ */
127
+ export const PURGE_ON_SESSION_START_PREFIXES = new Set([
128
+ 'doctor-memprobe-',
129
+ 'doctor-neighbors-',
130
+ ]);
131
+ /**
132
+ * Return `true` if a namespace is ephemeral — either an exact member of
133
+ * {@link EPHEMERAL_NAMESPACES} or one whose name begins with a prefix in
134
+ * {@link EPHEMERAL_NAMESPACE_PREFIXES}. Callers checking embedding-skip
135
+ * behavior should use this helper rather than `.has()` on the Set directly.
136
+ */
137
+ export function isEphemeralNamespace(namespace) {
138
+ if (EPHEMERAL_NAMESPACES.has(namespace))
139
+ return true;
140
+ for (const prefix of EPHEMERAL_NAMESPACE_PREFIXES) {
141
+ if (namespace.startsWith(prefix))
142
+ return true;
143
+ }
144
+ return false;
145
+ }
146
+ /**
147
+ * Return `true` if a namespace should be hard-purged on session start —
148
+ * either an exact member of {@link PURGE_ON_SESSION_START_NAMESPACES} or one
149
+ * whose name begins with a prefix in
150
+ * {@link PURGE_ON_SESSION_START_PREFIXES}.
151
+ */
152
+ export function shouldPurgeOnSessionStart(namespace) {
153
+ if (PURGE_ON_SESSION_START_NAMESPACES.has(namespace))
154
+ return true;
155
+ for (const prefix of PURGE_ON_SESSION_START_PREFIXES) {
156
+ if (namespace.startsWith(prefix))
157
+ return true;
158
+ }
159
+ return false;
160
+ }
80
161
  /**
81
162
  * Maximum number of `tasklist` rows kept across session restarts. The
82
163
  * session-start retention pass deletes oldest rows beyond this cap, so the
@@ -140,7 +221,7 @@ export async function resolveBridgeEmbedding(value, precomputed, generateEmbeddi
140
221
  // Ephemeral namespaces (run-tracking, never user knowledge) skip embeddings
141
222
  // unconditionally — even precomputed vectors are dropped. Result row has
142
223
  // `embedding IS NULL` and `embedding_model IS NULL`. See #729.
143
- if (namespace && EPHEMERAL_NAMESPACES.has(namespace)) {
224
+ if (namespace && isEphemeralNamespace(namespace)) {
144
225
  return { ok: true, json: null, dimensions: 0, model: null };
145
226
  }
146
227
  const wantsEmbedding = generateEmbeddingFlag !== false && value.length > 0;
@@ -30,6 +30,19 @@ function makeEntryCacheKey(namespace, key) {
30
30
  const safeKey = String(key).replace(/:/g, '_');
31
31
  return `entry:${safeNs}:${safeKey}`;
32
32
  }
33
+ /** Normalise `metadata` for the `metadata` TEXT column; `undefined` → `'{}'` (#1064). */
34
+ export function serialiseMetadata(metadata) {
35
+ if (metadata == null)
36
+ return '{}';
37
+ if (typeof metadata === 'string')
38
+ return metadata;
39
+ try {
40
+ return JSON.stringify(metadata);
41
+ }
42
+ catch {
43
+ return '{}';
44
+ }
45
+ }
33
46
  function bm25Score(queryTerms, docContent, avgDocLength, docCount, termDocFreqs) {
34
47
  const k1 = 1.2;
35
48
  const b = 0.75;
@@ -83,9 +96,29 @@ async function cacheInvalidate(registry, cacheKey) {
83
96
  async function guardValidate(registry, operation, params, options) {
84
97
  const guard = registry.get('mutationGuard');
85
98
  if (!guard)
86
- return { allowed: true };
99
+ return { allowed: true, commit: null };
87
100
  const result = guard.validate({ operation, params, timestamp: Date.now(), bypassDedupe: options?.bypassDedupe });
88
- return { allowed: result?.allowed === true, reason: result?.reason };
101
+ const allowed = result?.allowed === true;
102
+ return {
103
+ allowed,
104
+ reason: result?.reason,
105
+ commit: allowed && result?.token ? { guard, token: result.token } : null,
106
+ };
107
+ }
108
+ /**
109
+ * Confirm a previously-validated mutation. Idempotent and null-safe so
110
+ * call sites can fire it from a `finally`-style success branch without
111
+ * extra null checking. After commit, the mutation lands in MutationGuard's
112
+ * dedupe buffer so subsequent identical writes within the window are
113
+ * correctly rejected.
114
+ */
115
+ function guardCommit(handle) {
116
+ if (!handle)
117
+ return;
118
+ try {
119
+ handle.guard.commit(handle.token);
120
+ }
121
+ catch { /* commit failure is non-fatal — recording is observability-grade */ }
89
122
  }
90
123
  async function logAttestation(registry, operation, entryId, metadata) {
91
124
  const attestation = registry.get('attestationLog');
@@ -198,12 +231,13 @@ export async function bridgeStoreEntry(options) {
198
231
  tags, metadata, created_at, updated_at, expires_at, status
199
232
  ) VALUES (?, ?, ?, ?, 'semantic', ?, ?, ?, ?, ?, ?, ?, ?, 'active')`;
200
233
  // sql.js Statement.run takes an array of bindings — not varargs.
234
+ const metadataJson = serialiseMetadata(options.metadata);
201
235
  const stmt = ctx.db.prepare(insertSql);
202
236
  stmt.run([
203
237
  id, key, namespace, value,
204
238
  embeddingJson, dimensions || null, model,
205
239
  tags.length > 0 ? JSON.stringify(tags) : null,
206
- '{}',
240
+ metadataJson,
207
241
  now, now,
208
242
  ttl ? now + (ttl * 1000) : null,
209
243
  ]);
@@ -226,7 +260,16 @@ export async function bridgeStoreEntry(options) {
226
260
  const cacheKey = makeEntryCacheKey(namespace, key);
227
261
  let cached = true;
228
262
  try {
229
- await cacheSet(registry, cacheKey, { id, key, namespace, content: value, embedding: embeddingJson });
263
+ // #1064 include metadata in the cache value so a subsequent
264
+ // bridgeGetEntry cache-hit returns the same shape as a fresh disk read.
265
+ // Without this, chunk-row producers writing through the chokepoint would
266
+ // get `{}` back from cache and the full metadata from disk — exactly the
267
+ // divergence the cache is supposed to mask.
268
+ await cacheSet(registry, cacheKey, {
269
+ id, key, namespace, content: value,
270
+ embedding: embeddingJson,
271
+ metadata: metadataJson,
272
+ });
230
273
  }
231
274
  catch (err) {
232
275
  cached = false;
@@ -249,6 +292,11 @@ export async function bridgeStoreEntry(options) {
249
292
  logBridgeError('post-persist stats refresh failed', err);
250
293
  }
251
294
  }
295
+ // Commit the MutationGuard recording NOW that the row is durable on
296
+ // disk + cache + attestation log. Order: persist before commit so a
297
+ // SQLITE_BUSY mid-write doesn't leave a stale dedupe entry that would
298
+ // reject the withDb retry as a "duplicate" (#1098).
299
+ guardCommit(guardResult.commit);
252
300
  return {
253
301
  success: true,
254
302
  id,
@@ -316,13 +364,14 @@ export async function bridgeStoreEntries(items, dbPath) {
316
364
  embedding, embedding_dimensions, embedding_model,
317
365
  tags, metadata, created_at, updated_at, expires_at, status
318
366
  ) VALUES (?, ?, ?, ?, 'semantic', ?, ?, ?, ?, ?, ?, ?, ?, 'active')`;
367
+ const metadataJson = serialiseMetadata(opts.metadata);
319
368
  try {
320
369
  const stmt = ctx.db.prepare(insertSql);
321
370
  stmt.run([
322
371
  id, key, namespace, value,
323
372
  embeddingJson, dimensions || null, model,
324
373
  tags.length > 0 ? JSON.stringify(tags) : null,
325
- '{}',
374
+ metadataJson,
326
375
  now, now,
327
376
  ttl ? now + (ttl * 1000) : null,
328
377
  ]);
@@ -337,7 +386,12 @@ export async function bridgeStoreEntries(items, dbPath) {
337
386
  anyEmbedded = true;
338
387
  deferredBookkeeping.push({
339
388
  cacheKey: makeEntryCacheKey(namespace, key),
340
- cacheValue: { id, key, namespace, content: value, embedding: embeddingJson },
389
+ // #1064 keep cache shape in sync with disk (see single-store path).
390
+ cacheValue: {
391
+ id, key, namespace, content: value,
392
+ embedding: embeddingJson,
393
+ metadata: metadataJson,
394
+ },
341
395
  entryId: id,
342
396
  entryKey: key,
343
397
  namespace,
@@ -389,6 +443,11 @@ export async function bridgeStoreEntries(items, dbPath) {
389
443
  logBridgeError('post-persist stats refresh failed', err);
390
444
  }
391
445
  }
446
+ // Commit the bulk-store mutation in the dedupe buffer (#1098). At least
447
+ // one row reached disk, which is sufficient to record the bulk op —
448
+ // partial-batch persist failure is already reflected per-item via the
449
+ // results array.
450
+ guardCommit(guardResult.commit);
392
451
  return results;
393
452
  });
394
453
  }
@@ -663,6 +722,11 @@ export async function bridgeDeleteEntry(options) {
663
722
  // Non-fatal — count is informational
664
723
  }
665
724
  refreshVectorStatsCache();
725
+ // Commit the delete mutation in the dedupe buffer (#1098). The row is
726
+ // gone from disk and the cache is invalidated, so this is the safe
727
+ // point to record — a SQLITE_BUSY mid-DELETE earlier would have caught
728
+ // in the try/catch above and never reached here.
729
+ guardCommit(guardResult.commit);
666
730
  return {
667
731
  success: true,
668
732
  deleted: true,
@@ -12,7 +12,7 @@
12
12
  */
13
13
  import { EventEmitter } from 'node:events';
14
14
  import * as path from 'node:path';
15
- import { openSqlJsDatabase } from './sqljs-backend.js';
15
+ import { openDaemonDatabase } from './daemon-backend.js';
16
16
  import { CONTROLLER_SPECS } from './controller-specs.js';
17
17
  import { errorDetail } from '../shared/utils/error-detail.js';
18
18
  // ===== Initialization Levels =====
@@ -213,7 +213,12 @@ export class ControllerRegistry extends EventEmitter {
213
213
  return;
214
214
  }
215
215
  }
216
- const database = await openSqlJsDatabase(dbPath, config.wasmPath);
216
+ // Phase 4 (#1083) — open via the node:sqlite-backed adapter (shape-
217
+ // compatible with sql.js Statement API). Eliminates the cross-process
218
+ // clobber between `bin/` writers (node:sqlite + WAL) and the daemon's
219
+ // bridge (was sql.js readFileSync). `config.wasmPath` is now unused —
220
+ // node:sqlite is built into Node 22+; Phase 5 removes the field.
221
+ const database = openDaemonDatabase(dbPath);
217
222
  this.mofloDb = { database, close: async () => database.close() };
218
223
  this.emit('mofloDb:initialized');
219
224
  }
@@ -31,7 +31,11 @@ export class BatchOperations {
31
31
  }
32
32
  const ids = [];
33
33
  const now = Date.now();
34
- this.db.run('BEGIN TRANSACTION');
34
+ // BEGIN IMMEDIATE acquires RESERVED upfront — busy_handler is consulted
35
+ // for the lock acquisition, so concurrent writers wait out PRAGMA
36
+ // busy_timeout instead of fail-fasting on the SHARED→RESERVED upgrade
37
+ // (#1099: the deferred-BEGIN trap surfaced in #1098 CI).
38
+ this.db.run('BEGIN IMMEDIATE');
35
39
  try {
36
40
  const stmt = this.db.prepare(`INSERT INTO ${EPISODES_TABLE} (id, key, content, metadata, embedding, ts)
37
41
  VALUES (?, ?, ?, ?, ?, ?)`);