moflo 4.9.37 → 4.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/guidance/shipped/moflo-memory-protocol.md +5 -1
- package/.claude/guidance/shipped/moflo-memorydb-maintenance.md +22 -11
- package/.claude/guidance/shipped/moflo-root-cause-discipline.md +47 -0
- package/.claude/helpers/statusline.cjs +69 -33
- package/.claude/helpers/subagent-bootstrap.json +1 -1
- package/.claude/helpers/subagent-start.cjs +1 -1
- package/bin/build-embeddings.mjs +6 -20
- package/bin/cli.js +5 -0
- package/bin/generate-code-map.mjs +4 -24
- package/bin/hooks.mjs +3 -12
- package/bin/index-all.mjs +3 -13
- package/bin/index-guidance.mjs +36 -85
- package/bin/index-patterns.mjs +6 -24
- package/bin/index-tests.mjs +4 -23
- package/bin/lib/db-repair.mjs +358 -62
- package/bin/lib/get-backend.mjs +306 -0
- package/bin/lib/incremental-write.mjs +27 -7
- package/bin/lib/moflo-paths.mjs +64 -4
- package/bin/lib/suppress-sqlite-warning.mjs +57 -0
- package/bin/migrations/knowledge-purge.mjs +7 -8
- package/bin/migrations/knowledge-to-learnings.mjs +7 -9
- package/bin/migrations/purge-doc-entries.mjs +7 -8
- package/bin/migrations/strip-context-preambles.mjs +4 -6
- package/bin/run-migrations.mjs +1 -10
- package/bin/semantic-search.mjs +7 -18
- package/bin/session-start-launcher.mjs +144 -108
- package/bin/simplify-classify.cjs +38 -17
- package/dist/src/cli/commands/daemon.js +38 -11
- package/dist/src/cli/commands/doctor-checks-config.js +60 -0
- package/dist/src/cli/commands/doctor-checks-coverage-truth.js +136 -0
- package/dist/src/cli/commands/doctor-checks-memory-access.js +146 -86
- package/dist/src/cli/commands/doctor-checks-memory.js +13 -18
- package/dist/src/cli/commands/doctor-checks-version-skew.js +94 -0
- package/dist/src/cli/commands/doctor-checks-writers-audit.js +170 -0
- package/dist/src/cli/commands/doctor-embedding-hygiene.js +3 -15
- package/dist/src/cli/commands/doctor-fixes.js +87 -0
- package/dist/src/cli/commands/doctor-registry.js +24 -1
- package/dist/src/cli/commands/doctor.js +1 -1
- package/dist/src/cli/commands/embeddings.js +17 -22
- package/dist/src/cli/commands/memory.js +13 -23
- package/dist/src/cli/embeddings/persistent-cache.js +44 -83
- package/dist/src/cli/init/moflo-init.js +40 -0
- package/dist/src/cli/mcp-tools/memory-tools.js +10 -3
- package/dist/src/cli/memory/bridge-core.js +256 -30
- package/dist/src/cli/memory/bridge-embedder.js +84 -3
- package/dist/src/cli/memory/bridge-entries.js +70 -6
- package/dist/src/cli/memory/controller-registry.js +7 -2
- package/dist/src/cli/memory/controllers/batch-operations.js +5 -1
- package/dist/src/cli/memory/controllers/hierarchical-memory.js +7 -2
- package/dist/src/cli/memory/controllers/mutation-guard.js +22 -2
- package/dist/src/cli/memory/daemon-backend.js +400 -0
- package/dist/src/cli/memory/daemon-write-client.js +192 -15
- package/dist/src/cli/memory/database-provider.js +57 -40
- package/dist/src/cli/memory/hnsw-persistence.js +6 -8
- package/dist/src/cli/memory/index.js +0 -1
- package/dist/src/cli/memory/memory-bridge.js +40 -8
- package/dist/src/cli/memory/memory-initializer.js +271 -211
- package/dist/src/cli/memory/rvf-migration.js +25 -11
- package/dist/src/cli/memory/sqlite-backend.js +573 -0
- package/dist/src/cli/memory/suppress-sqlite-warning.js +49 -0
- package/dist/src/cli/services/cherry-pick-learnings.js +32 -21
- package/dist/src/cli/services/daemon-dashboard.js +13 -1
- package/dist/src/cli/services/daemon-lock.js +58 -1
- package/dist/src/cli/services/daemon-memory-rpc.js +245 -10
- package/dist/src/cli/services/embeddings-migration.js +9 -12
- package/dist/src/cli/services/ephemeral-namespace-purge.js +21 -16
- package/dist/src/cli/services/learning-service.js +12 -20
- package/dist/src/cli/services/memory-db-integrity-repair.js +119 -0
- package/dist/src/cli/services/project-root.js +69 -9
- package/dist/src/cli/services/soft-delete-purge.js +6 -11
- package/dist/src/cli/services/sqljs-migration-store.js +4 -1
- package/dist/src/cli/services/subagent-bootstrap.js +1 -1
- package/dist/src/cli/shared/events/event-store.js +26 -55
- package/dist/src/cli/version.js +1 -1
- package/package.json +2 -4
- package/dist/src/cli/memory/sqljs-backend.js +0 -643
|
@@ -9,44 +9,44 @@ import * as fs from 'fs';
|
|
|
9
9
|
import * as crypto from 'crypto';
|
|
10
10
|
import { atomicWriteFileSync } from '../services/atomic-file-write.js';
|
|
11
11
|
import { legacyMemoryDbPath, memoryDbPath, MOFLO_DIR, } from '../services/moflo-paths.js';
|
|
12
|
+
import { findProjectRoot } from '../services/project-root.js';
|
|
12
13
|
// When run via npx, CWD may be node_modules/moflo — walk up to find actual project
|
|
13
14
|
let _projectRoot;
|
|
14
15
|
/**
|
|
15
16
|
* Reset the cached project root. Tests that change `process.cwd()` or
|
|
16
17
|
* `process.env.CLAUDE_PROJECT_DIR` between cases must call this to avoid
|
|
17
18
|
* leaking state across tests.
|
|
19
|
+
*
|
|
20
|
+
* Also drops the bridge-coherence cursor (#1058) so a test that re-points the
|
|
21
|
+
* project root doesn't inherit a stale mtime anchor from the previous root.
|
|
18
22
|
*/
|
|
19
23
|
export function _resetProjectRootForTest() {
|
|
20
24
|
_projectRoot = undefined;
|
|
25
|
+
lastSeenMtimeMs = null;
|
|
21
26
|
}
|
|
27
|
+
/**
|
|
28
|
+
* Test seam (#1058): peek at the bridge-coherence cursor. Production callers
|
|
29
|
+
* never invoke this; tests assert that own writes update the anchor and that
|
|
30
|
+
* another writer's mtime bump triggers reload.
|
|
31
|
+
*/
|
|
32
|
+
export function _getBridgeCoherenceCursorForTest() {
|
|
33
|
+
return lastSeenMtimeMs;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Resolve the bridge's project root.
|
|
37
|
+
*
|
|
38
|
+
* Delegates to the canonical resolver in `src/cli/services/project-root.ts`
|
|
39
|
+
* (twin: `bin/lib/moflo-paths.mjs:findProjectRoot()`). The bridge keeps a
|
|
40
|
+
* module-level cache so the hot path (every withDb call) doesn't redo the
|
|
41
|
+
* stat sweep. Tests reset via {@link _resetProjectRootForTest}.
|
|
42
|
+
*
|
|
43
|
+
* If you find yourself wanting to inline a custom walk here, STOP — every
|
|
44
|
+
* divergent walk creates a new path-mismatch bug class (see #1057 / #1058).
|
|
45
|
+
*/
|
|
22
46
|
function getProjectRoot() {
|
|
23
47
|
if (_projectRoot)
|
|
24
48
|
return _projectRoot;
|
|
25
|
-
|
|
26
|
-
_projectRoot = process.env.CLAUDE_PROJECT_DIR;
|
|
27
|
-
return _projectRoot;
|
|
28
|
-
}
|
|
29
|
-
let dir = process.cwd();
|
|
30
|
-
const root = path.parse(dir).root;
|
|
31
|
-
while (dir !== root) {
|
|
32
|
-
// `.moflo/moflo.db` is the canonical post-#727 marker. Older consumers
|
|
33
|
-
// mid-migration may still only have `.swarm/memory.db`; recognise both
|
|
34
|
-
// so the bridge can find the project root either way.
|
|
35
|
-
if (fs.existsSync(memoryDbPath(dir)) || fs.existsSync(legacyMemoryDbPath(dir))) {
|
|
36
|
-
_projectRoot = dir;
|
|
37
|
-
return _projectRoot;
|
|
38
|
-
}
|
|
39
|
-
if (fs.existsSync(path.join(dir, 'CLAUDE.md')) && fs.existsSync(path.join(dir, 'package.json'))) {
|
|
40
|
-
_projectRoot = dir;
|
|
41
|
-
return _projectRoot;
|
|
42
|
-
}
|
|
43
|
-
if (path.basename(dir) === 'node_modules') {
|
|
44
|
-
dir = path.dirname(dir);
|
|
45
|
-
continue;
|
|
46
|
-
}
|
|
47
|
-
dir = path.dirname(dir);
|
|
48
|
-
}
|
|
49
|
-
_projectRoot = process.cwd();
|
|
49
|
+
_projectRoot = findProjectRoot();
|
|
50
50
|
return _projectRoot;
|
|
51
51
|
}
|
|
52
52
|
import { ControllerRegistry } from './controller-registry.js';
|
|
@@ -57,6 +57,20 @@ let registryPromise = null;
|
|
|
57
57
|
let resolvedRegistry = null;
|
|
58
58
|
let lastBridgeError = null;
|
|
59
59
|
const schemaInitialized = new WeakSet();
|
|
60
|
+
/**
|
|
61
|
+
* Last-known disk mtime for the bridge's dbPath. Anchors the bridge-coherence
|
|
62
|
+
* check (story #1058 / epic #1054): when another process writes to disk, its
|
|
63
|
+
* persist bumps mtime past this value; the next withDb call shuts the bridge
|
|
64
|
+
* down so getRegistry re-reads fresh from disk.
|
|
65
|
+
*
|
|
66
|
+
* Set after every successful persist (own writes; no self-invalidation) and
|
|
67
|
+
* after every successful registry init (anchor to load-time disk state).
|
|
68
|
+
* Reset to null when the bridge is shut down so the next init re-anchors.
|
|
69
|
+
*
|
|
70
|
+
* Module-level because the bridge itself is process-wide singleton state —
|
|
71
|
+
* matches the existing `registryPromise` lifecycle.
|
|
72
|
+
*/
|
|
73
|
+
let lastSeenMtimeMs = null;
|
|
60
74
|
/** Controllers every moflodb_* MCP tool assumes are present when the bridge is available. */
|
|
61
75
|
export const REQUIRED_BRIDGE_CONTROLLERS = Object.freeze([
|
|
62
76
|
'hierarchicalMemory',
|
|
@@ -81,6 +95,61 @@ export function logBridgeError(context, err, opts) {
|
|
|
81
95
|
const msg = errorDetail(err);
|
|
82
96
|
console.error(`[moflo] ${context}: ${msg}`);
|
|
83
97
|
}
|
|
98
|
+
/**
|
|
99
|
+
* Treats an error as a SQLITE_BUSY lock-contention failure if either the
|
|
100
|
+
* error code or message indicates it. Belt-and-suspenders around node:sqlite,
|
|
101
|
+
* whose surface intermittently surfaces busy-conflicts as either `code:
|
|
102
|
+
* 'SQLITE_BUSY'` or a plain `Error: database is locked`. We match both.
|
|
103
|
+
*/
|
|
104
|
+
function isBusyError(err) {
|
|
105
|
+
if (!err || typeof err !== 'object')
|
|
106
|
+
return false;
|
|
107
|
+
const e = err;
|
|
108
|
+
if (e.code === 'SQLITE_BUSY' || e.code === 'SQLITE_BUSY_SNAPSHOT' || e.code === 'SQLITE_BUSY_RECOVERY')
|
|
109
|
+
return true;
|
|
110
|
+
return typeof e.message === 'string' && /database is locked|SQLITE_BUSY/i.test(e.message);
|
|
111
|
+
}
|
|
112
|
+
// Exponential backoff with jitter. Total ceiling ≈ 1.55s of waiting (50 +
|
|
113
|
+
// 100 + 200 + 400 + 800), plus the work itself. Sized so a typical short
|
|
114
|
+
// indexer write (a few rows in auto-commit) finishes before we give up,
|
|
115
|
+
// without ballooning bridge latency on a really stuck DB. See #1098.
|
|
116
|
+
const BRIDGE_BUSY_RETRY_DELAYS_MS = [50, 100, 200, 400, 800];
|
|
117
|
+
/**
|
|
118
|
+
* Run `fn` with a jittered exponential-backoff retry on SQLITE_BUSY errors.
|
|
119
|
+
*
|
|
120
|
+
* Why this exists: in CI the bridge's parallel doctor-subcheck workload hit
|
|
121
|
+
* "database is locked" 5–7 times in a 5ms window while the configured
|
|
122
|
+
* `busy_timeout=15000ms` should have been retrying for full seconds (#1098).
|
|
123
|
+
* The hypothesis-in-flight is that `node:sqlite`'s `db.prepare()` bypasses
|
|
124
|
+
* the engine-level `busy_handler`, so the busy_timeout pragma never engages
|
|
125
|
+
* for the bridge's prepare-heavy call patterns. Until that's confirmed
|
|
126
|
+
* (#1098 follow-up — local repro), an explicit retry here is the only
|
|
127
|
+
* guard between the consumer and a hard fail.
|
|
128
|
+
*
|
|
129
|
+
* Jitter scatters parallel retries so the workload doesn't thunder back
|
|
130
|
+
* onto the same lock at the same instant.
|
|
131
|
+
*/
|
|
132
|
+
async function withBusyRetry(fn) {
|
|
133
|
+
let lastErr = null;
|
|
134
|
+
for (let attempt = 0; attempt <= BRIDGE_BUSY_RETRY_DELAYS_MS.length; attempt++) {
|
|
135
|
+
if (attempt > 0) {
|
|
136
|
+
const base = BRIDGE_BUSY_RETRY_DELAYS_MS[attempt - 1];
|
|
137
|
+
const jitter = base * (Math.random() * 0.5 - 0.25); // ±25%
|
|
138
|
+
const delay = Math.max(0, Math.round(base + jitter));
|
|
139
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
140
|
+
}
|
|
141
|
+
try {
|
|
142
|
+
return await fn();
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
lastErr = err;
|
|
146
|
+
if (!isBusyError(err))
|
|
147
|
+
throw err;
|
|
148
|
+
// Loop continues — backoff applied at top of next iteration.
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
throw lastErr;
|
|
152
|
+
}
|
|
84
153
|
/**
|
|
85
154
|
* Resolve the on-disk DB path the bridge should read/write.
|
|
86
155
|
*
|
|
@@ -215,7 +284,34 @@ export function persistBridgeDb(db, dbPath) {
|
|
|
215
284
|
return;
|
|
216
285
|
try {
|
|
217
286
|
fs.mkdirSync(path.dirname(target), { recursive: true });
|
|
218
|
-
|
|
287
|
+
// Phase 4 (#1083) — node:sqlite-backed handles persist incrementally via
|
|
288
|
+
// WAL; `save()` on the factory adapter is a no-op for them. Doing the
|
|
289
|
+
// sql.js-style `export()` + `atomicWriteFileSync` against a node:sqlite
|
|
290
|
+
// handle would CLOBBER the WAL writes (the catastrophic case the epic
|
|
291
|
+
// was killing). Route through `save()` when the handle is the factory
|
|
292
|
+
// shape; only fall back to the legacy export-and-write for raw sql.js.
|
|
293
|
+
if (db && db.kind === 'node-sqlite' && typeof db.save === 'function') {
|
|
294
|
+
db.save();
|
|
295
|
+
}
|
|
296
|
+
else {
|
|
297
|
+
atomicWriteFileSync(target, db.export());
|
|
298
|
+
}
|
|
299
|
+
// Anchor the bridge-coherence cursor to the post-persist mtime so our own
|
|
300
|
+
// write doesn't trigger a self-invalidation on the next withDb call.
|
|
301
|
+
// Under WAL the write lands in `-wal` (not the main file), so include
|
|
302
|
+
// its mtime — must match the read side of checkBridgeCoherence or self-
|
|
303
|
+
// writes self-invalidate (#1098).
|
|
304
|
+
try {
|
|
305
|
+
let anchored = fs.statSync(target).mtimeMs;
|
|
306
|
+
try {
|
|
307
|
+
const walStat = fs.statSync(`${target}-wal`);
|
|
308
|
+
if (walStat.mtimeMs > anchored)
|
|
309
|
+
anchored = walStat.mtimeMs;
|
|
310
|
+
}
|
|
311
|
+
catch { /* no WAL sidecar — main mtime is authoritative */ }
|
|
312
|
+
lastSeenMtimeMs = anchored;
|
|
313
|
+
}
|
|
314
|
+
catch { /* tolerate; coherence check re-anchors on next read */ }
|
|
219
315
|
}
|
|
220
316
|
catch (err) {
|
|
221
317
|
logBridgeError('bridge persist failed', err, { alwaysLog: true });
|
|
@@ -280,20 +376,138 @@ export function getDb(registry) {
|
|
|
280
376
|
}
|
|
281
377
|
return { db, mofloDb };
|
|
282
378
|
}
|
|
379
|
+
/**
|
|
380
|
+
* Bridge coherence check (story #1058 / epic #1054 — read-side symmetry to
|
|
381
|
+
* #981 single-writer). sql.js holds an in-memory DB snapshot per process and
|
|
382
|
+
* never re-reads disk after init, so any long-lived process — daemon or
|
|
383
|
+
* not — returns stale rows when another writer has touched the file since
|
|
384
|
+
* this process loaded its snapshot.
|
|
385
|
+
*
|
|
386
|
+
* Solution: stat the dbPath before every bridge op; if the mtime has advanced
|
|
387
|
+
* past our last-known value, another writer has touched the file — drop the
|
|
388
|
+
* bridge so `getRegistry` re-loads from disk on the next call.
|
|
389
|
+
*
|
|
390
|
+
* The daemon participates in this check too. #1058 originally exempted the
|
|
391
|
+
* daemon under "daemon is the sole writer", but that assumption breaks every
|
|
392
|
+
* session-start: `bin/index-guidance.mjs`, migration runners, and repair
|
|
393
|
+
* tools all write directly to `.moflo/moflo.db` while the daemon is up
|
|
394
|
+
* (epic #1057 calls these out as in-scope writers to coordinate). Without
|
|
395
|
+
* the daemon doing the check, daemon-routed MCP reads served the pre-init
|
|
396
|
+
* snapshot indefinitely, hiding the indexer's chunks from `memory_search` /
|
|
397
|
+
* `memory_get_neighbors` until the daemon process restarted (#1073, smoke).
|
|
398
|
+
*
|
|
399
|
+
* Self-invalidation is still suppressed: `persistBridgeDb` anchors
|
|
400
|
+
* `lastSeenMtimeMs` to the post-write mtime, so the daemon's own writes never
|
|
401
|
+
* trip the reload. External writers — whose touches advance mtime past the
|
|
402
|
+
* anchor — do.
|
|
403
|
+
*/
|
|
404
|
+
async function checkBridgeCoherence(dbPath) {
|
|
405
|
+
// No registry yet → nothing to invalidate; first init will anchor the cursor.
|
|
406
|
+
if (!registryPromise)
|
|
407
|
+
return;
|
|
408
|
+
const target = dbPath ? path.resolve(dbPath) : getDbPath();
|
|
409
|
+
if (target === ':memory:')
|
|
410
|
+
return;
|
|
411
|
+
// Under WAL (Phase 5 / #1083), commits land in the `-wal` sidecar first —
|
|
412
|
+
// the main DB file's mtime ONLY advances on checkpoint, which may be many
|
|
413
|
+
// writes apart. Statting just the main file misses every external WAL
|
|
414
|
+
// write between checkpoints, leaving the bridge with a stale in-memory
|
|
415
|
+
// snapshot indefinitely. That's the failure mode in #1098 / #1073 smoke
|
|
416
|
+
// where doctor's seed-via-openDaemonDatabase then bridge-via-MCP couldn't
|
|
417
|
+
// see its own freshly-written rows. Stat both files and use whichever is
|
|
418
|
+
// most recent. Mirrors the same fix in `refreshVectorStatsCache`.
|
|
419
|
+
let mtimeMs = 0;
|
|
420
|
+
try {
|
|
421
|
+
mtimeMs = fs.statSync(target).mtimeMs;
|
|
422
|
+
}
|
|
423
|
+
catch {
|
|
424
|
+
// File missing or unreadable — fall through. Downstream withDb surfaces
|
|
425
|
+
// the error; we don't synthesize a coherence event from a stat failure.
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
try {
|
|
429
|
+
const walStat = fs.statSync(`${target}-wal`);
|
|
430
|
+
if (walStat.mtimeMs > mtimeMs)
|
|
431
|
+
mtimeMs = walStat.mtimeMs;
|
|
432
|
+
}
|
|
433
|
+
catch { /* no WAL sidecar yet — main mtime is authoritative */ }
|
|
434
|
+
if (lastSeenMtimeMs == null) {
|
|
435
|
+
// First op after init — anchor and proceed.
|
|
436
|
+
lastSeenMtimeMs = mtimeMs;
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
if (mtimeMs > lastSeenMtimeMs) {
|
|
440
|
+
// Another process wrote since we loaded. Drop the bridge so the next
|
|
441
|
+
// `getRegistry` call re-initializes from fresh disk. Reset the cursor;
|
|
442
|
+
// the post-reload anchor (after `getRegistry` succeeds) re-sets it.
|
|
443
|
+
await shutdownBridge();
|
|
444
|
+
lastSeenMtimeMs = null;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
283
447
|
/**
|
|
284
448
|
* Resolve registry + db, run fn, return null on any unexpected failure so
|
|
285
449
|
* the caller falls back to raw sql.js. Errors are logged to stderr —
|
|
286
450
|
* silently swallowing them previously masked real bugs in bridge-entries.ts.
|
|
451
|
+
*
|
|
452
|
+
* Bridge coherence (#1058): every entry through this gate checks whether the
|
|
453
|
+
* dbPath's mtime has advanced past our last-known value; if so, the bridge is
|
|
454
|
+
* torn down so the next op reads fresh disk state. Daemon participates in the
|
|
455
|
+
* check; its own writes anchor `lastSeenMtimeMs` via `persistBridgeDb` so
|
|
456
|
+
* self-fire is suppressed.
|
|
287
457
|
*/
|
|
288
458
|
export async function withDb(dbPath, fn) {
|
|
459
|
+
await checkBridgeCoherence(dbPath);
|
|
289
460
|
const registry = await getRegistry(dbPath);
|
|
290
461
|
if (!registry)
|
|
291
462
|
return null;
|
|
292
463
|
const ctx = getDb(registry);
|
|
293
464
|
if (!ctx)
|
|
294
465
|
return null;
|
|
466
|
+
// Anchor the coherence cursor to load-time disk state once the registry is
|
|
467
|
+
// resolved. The post-init read of `mofloDb.database` reflects the bytes
|
|
468
|
+
// that were on disk when `openSqlJsDatabase` ran; pin the matching mtime so
|
|
469
|
+
// a subsequent unrelated process write triggers reload, not a self-fire.
|
|
470
|
+
// Include `-wal` since WAL writes don't bump the main file mtime (#1098).
|
|
471
|
+
const target = dbPath ? path.resolve(dbPath) : getDbPath();
|
|
472
|
+
if (lastSeenMtimeMs == null && target !== ':memory:') {
|
|
473
|
+
try {
|
|
474
|
+
let anchor = fs.statSync(target).mtimeMs;
|
|
475
|
+
try {
|
|
476
|
+
const walStat = fs.statSync(`${target}-wal`);
|
|
477
|
+
if (walStat.mtimeMs > anchor)
|
|
478
|
+
anchor = walStat.mtimeMs;
|
|
479
|
+
}
|
|
480
|
+
catch { /* no WAL sidecar — main mtime is authoritative */ }
|
|
481
|
+
lastSeenMtimeMs = anchor;
|
|
482
|
+
}
|
|
483
|
+
catch { /* file may not exist yet — first persist will anchor */ }
|
|
484
|
+
}
|
|
295
485
|
try {
|
|
296
|
-
|
|
486
|
+
const result = await withBusyRetry(() => fn(ctx, registry));
|
|
487
|
+
// Re-anchor the coherence cursor to the post-op mtime so internal
|
|
488
|
+
// bridge writes that happen AFTER persistBridgeDb (attestation log,
|
|
489
|
+
// bumpAccessCounts, cache invalidation row updates, etc.) don't
|
|
490
|
+
// look like external writes on the next withDb call. Without this
|
|
491
|
+
// re-anchor, the next call's checkBridgeCoherence sees the
|
|
492
|
+
// attestation-advanced -wal mtime, tears down the registry, and
|
|
493
|
+
// any test-injected stubs (cache.set, etc.) get reset — exactly
|
|
494
|
+
// the failure mode in `bridge-entries.test.ts` #994 after the
|
|
495
|
+
// WAL-coherence fix (49f91a01a). External writes still get
|
|
496
|
+
// detected at the START of the next withDb call.
|
|
497
|
+
if (target !== ':memory:') {
|
|
498
|
+
try {
|
|
499
|
+
let anchor = fs.statSync(target).mtimeMs;
|
|
500
|
+
try {
|
|
501
|
+
const walStat = fs.statSync(`${target}-wal`);
|
|
502
|
+
if (walStat.mtimeMs > anchor)
|
|
503
|
+
anchor = walStat.mtimeMs;
|
|
504
|
+
}
|
|
505
|
+
catch { /* no WAL sidecar */ }
|
|
506
|
+
lastSeenMtimeMs = anchor;
|
|
507
|
+
}
|
|
508
|
+
catch { /* tolerate; coherence check re-anchors on next read */ }
|
|
509
|
+
}
|
|
510
|
+
return result;
|
|
297
511
|
}
|
|
298
512
|
catch (err) {
|
|
299
513
|
logBridgeError('bridge operation failed', err);
|
|
@@ -313,6 +527,9 @@ export async function shutdownBridge() {
|
|
|
313
527
|
const registry = await registryPromise;
|
|
314
528
|
registryPromise = null;
|
|
315
529
|
resolvedRegistry = null;
|
|
530
|
+
// Drop the coherence cursor too — the next init will re-anchor against
|
|
531
|
+
// whatever's on disk by then.
|
|
532
|
+
lastSeenMtimeMs = null;
|
|
316
533
|
if (registry) {
|
|
317
534
|
try {
|
|
318
535
|
await registry.shutdown();
|
|
@@ -388,9 +605,12 @@ export function refreshVectorStatsCache(dbPathOverride) {
|
|
|
388
605
|
const existing = readExistingVectorStats(root);
|
|
389
606
|
// Mtime short-circuit (#639 perf): refreshVectorStatsCache fires on every
|
|
390
607
|
// bridge store/delete. When the on-disk DB hasn't changed since we last
|
|
391
|
-
// wrote the cache
|
|
392
|
-
//
|
|
393
|
-
//
|
|
608
|
+
// wrote the cache, running 3 COUNT queries is wasted work — skip the rest.
|
|
609
|
+
//
|
|
610
|
+
// Phase 4 (#1083) flipped the engine to node:sqlite + WAL: every commit
|
|
611
|
+
// lands in the `-wal` sidecar (mtime advances there), not the main file.
|
|
612
|
+
// Stat both so a write to either invalidates the cache. The `-shm` file
|
|
613
|
+
// is not load-bearing — it tracks WAL readers, not committed writes.
|
|
394
614
|
let dbMtimeMs = 0;
|
|
395
615
|
let dbSizeKB = 0;
|
|
396
616
|
try {
|
|
@@ -399,6 +619,12 @@ export function refreshVectorStatsCache(dbPathOverride) {
|
|
|
399
619
|
dbSizeKB = Math.floor(stat.size / 1024);
|
|
400
620
|
}
|
|
401
621
|
catch { /* file may not exist */ }
|
|
622
|
+
try {
|
|
623
|
+
const walStat = fs.statSync(`${dbFile}-wal`);
|
|
624
|
+
if (walStat.mtimeMs > dbMtimeMs)
|
|
625
|
+
dbMtimeMs = walStat.mtimeMs;
|
|
626
|
+
}
|
|
627
|
+
catch { /* no WAL sidecar — fine, dbMtimeMs already covers it */ }
|
|
402
628
|
if (existing &&
|
|
403
629
|
typeof existing.updatedAt === 'number' &&
|
|
404
630
|
typeof existing.vectorCount === 'number' &&
|
|
@@ -54,9 +54,14 @@ export const EMBEDDING_MODEL_LEGACY_DEFAULT = 'local';
|
|
|
54
54
|
* - `epic-state` — Epic progress (epic-N, story-M) written by commands/epic.ts
|
|
55
55
|
* - `test-bridge-fix` — Single 2026-04-23 row left over from a one-off test
|
|
56
56
|
*
|
|
57
|
+
* Membership is also extended by {@link EPHEMERAL_NAMESPACE_PREFIXES} for
|
|
58
|
+
* dynamic-name namespaces (e.g. `doctor-memprobe-<persona>`). Most callers
|
|
59
|
+
* should use {@link isEphemeralNamespace} which checks both sets.
|
|
60
|
+
*
|
|
57
61
|
* See story #729 for the source-trace and rationale. The session-start
|
|
58
|
-
* launcher only purges {@link PURGE_ON_SESSION_START_NAMESPACES}
|
|
59
|
-
* subset that *excludes*
|
|
62
|
+
* launcher only purges {@link PURGE_ON_SESSION_START_NAMESPACES} +
|
|
63
|
+
* {@link PURGE_ON_SESSION_START_PREFIXES} — a strict subset that *excludes*
|
|
64
|
+
* `tasklist`, because the dashboard's Flo Runs tab
|
|
60
65
|
* (`daemon-dashboard.ts handleSpells`) reads tasklist; purging it on every
|
|
61
66
|
* session would empty the tab between sessions (#968).
|
|
62
67
|
*/
|
|
@@ -66,6 +71,26 @@ export const EPHEMERAL_NAMESPACES = new Set([
|
|
|
66
71
|
'epic-state',
|
|
67
72
|
'test-bridge-fix',
|
|
68
73
|
]);
|
|
74
|
+
/**
|
|
75
|
+
* Prefix patterns that extend {@link EPHEMERAL_NAMESPACES} for namespaces
|
|
76
|
+
* whose suffix is generated at runtime. Any namespace beginning with one of
|
|
77
|
+
* these prefixes is treated as ephemeral (skips embedding).
|
|
78
|
+
*
|
|
79
|
+
* NOTE — design distinction from {@link PURGE_ON_SESSION_START_PREFIXES}:
|
|
80
|
+
* a namespace can be auto-purgeable WITHOUT being skip-embed. For example,
|
|
81
|
+
* `doctor-memprobe-<persona>` rows are intentionally purged on every
|
|
82
|
+
* session start (the cleanup is best-effort and accumulates across
|
|
83
|
+
* sessions) but MUST still get embeddings — the probe's whole purpose is
|
|
84
|
+
* to validate the embedder is wired (`Memory Access Functional` check
|
|
85
|
+
* asserts `hasEmbedding=true`). Skipping embedding for those rows breaks
|
|
86
|
+
* the doctor check. Put a prefix here only when both properties apply.
|
|
87
|
+
*
|
|
88
|
+
* Currently empty — there's no namespace today that needs both skip-embed
|
|
89
|
+
* AND prefix-match. Kept as an explicit export so the bridge embedder's
|
|
90
|
+
* call site is uniform and future skip-embed prefixes have an obvious
|
|
91
|
+
* home.
|
|
92
|
+
*/
|
|
93
|
+
export const EPHEMERAL_NAMESPACE_PREFIXES = new Set([]);
|
|
69
94
|
/**
|
|
70
95
|
* Subset of {@link EPHEMERAL_NAMESPACES} that the session-start launcher
|
|
71
96
|
* hard-purges via `services/ephemeral-namespace-purge.ts`. Excludes
|
|
@@ -77,6 +102,62 @@ export const PURGE_ON_SESSION_START_NAMESPACES = new Set([
|
|
|
77
102
|
'epic-state',
|
|
78
103
|
'test-bridge-fix',
|
|
79
104
|
]);
|
|
105
|
+
/**
|
|
106
|
+
* Prefix patterns purged alongside {@link PURGE_ON_SESSION_START_NAMESPACES}
|
|
107
|
+
* by the session-start launcher.
|
|
108
|
+
*
|
|
109
|
+
* Members:
|
|
110
|
+
* - `doctor-memprobe-` — `flo healer`'s `Memory Access` round-trip probe
|
|
111
|
+
* writes a sentinel into `doctor-memprobe-<persona>` (persona is one of
|
|
112
|
+
* `subagent`, `swarm-agent`, `hive-mind-worker`, plus test variants).
|
|
113
|
+
* - `doctor-neighbors-` — `flo healer`'s neighbor-traversal probe creates a
|
|
114
|
+
* fresh `doctor-neighbors-<timestamp>` namespace for each run and seeds
|
|
115
|
+
* three chunk rows. Unlike memprobe (fixed personas), every healer run
|
|
116
|
+
* spawns a NEW namespace, so namespace pollution grows linearly with
|
|
117
|
+
* healer-run count if cleanup races fail.
|
|
118
|
+
*
|
|
119
|
+
* Both probes register an explicit cleanup via `safeDelete`, but the
|
|
120
|
+
* cleanup is best-effort and silently swallows failures (e.g. daemon
|
|
121
|
+
* races, MCP transport errors) — so rows accumulate across consumer
|
|
122
|
+
* sessions. Auto-purging matches the pattern for
|
|
123
|
+
* `hive-mind`/`epic-state`/`test-bridge-fix`. These rows MUST still get
|
|
124
|
+
* embeddings (see {@link EPHEMERAL_NAMESPACE_PREFIXES} for why) — only
|
|
125
|
+
* their persistence across sessions is curtailed.
|
|
126
|
+
*/
|
|
127
|
+
export const PURGE_ON_SESSION_START_PREFIXES = new Set([
|
|
128
|
+
'doctor-memprobe-',
|
|
129
|
+
'doctor-neighbors-',
|
|
130
|
+
]);
|
|
131
|
+
/**
|
|
132
|
+
* Return `true` if a namespace is ephemeral — either an exact member of
|
|
133
|
+
* {@link EPHEMERAL_NAMESPACES} or one whose name begins with a prefix in
|
|
134
|
+
* {@link EPHEMERAL_NAMESPACE_PREFIXES}. Callers checking embedding-skip
|
|
135
|
+
* behavior should use this helper rather than `.has()` on the Set directly.
|
|
136
|
+
*/
|
|
137
|
+
export function isEphemeralNamespace(namespace) {
|
|
138
|
+
if (EPHEMERAL_NAMESPACES.has(namespace))
|
|
139
|
+
return true;
|
|
140
|
+
for (const prefix of EPHEMERAL_NAMESPACE_PREFIXES) {
|
|
141
|
+
if (namespace.startsWith(prefix))
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Return `true` if a namespace should be hard-purged on session start —
|
|
148
|
+
* either an exact member of {@link PURGE_ON_SESSION_START_NAMESPACES} or one
|
|
149
|
+
* whose name begins with a prefix in
|
|
150
|
+
* {@link PURGE_ON_SESSION_START_PREFIXES}.
|
|
151
|
+
*/
|
|
152
|
+
export function shouldPurgeOnSessionStart(namespace) {
|
|
153
|
+
if (PURGE_ON_SESSION_START_NAMESPACES.has(namespace))
|
|
154
|
+
return true;
|
|
155
|
+
for (const prefix of PURGE_ON_SESSION_START_PREFIXES) {
|
|
156
|
+
if (namespace.startsWith(prefix))
|
|
157
|
+
return true;
|
|
158
|
+
}
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
80
161
|
/**
|
|
81
162
|
* Maximum number of `tasklist` rows kept across session restarts. The
|
|
82
163
|
* session-start retention pass deletes oldest rows beyond this cap, so the
|
|
@@ -140,7 +221,7 @@ export async function resolveBridgeEmbedding(value, precomputed, generateEmbeddi
|
|
|
140
221
|
// Ephemeral namespaces (run-tracking, never user knowledge) skip embeddings
|
|
141
222
|
// unconditionally — even precomputed vectors are dropped. Result row has
|
|
142
223
|
// `embedding IS NULL` and `embedding_model IS NULL`. See #729.
|
|
143
|
-
if (namespace &&
|
|
224
|
+
if (namespace && isEphemeralNamespace(namespace)) {
|
|
144
225
|
return { ok: true, json: null, dimensions: 0, model: null };
|
|
145
226
|
}
|
|
146
227
|
const wantsEmbedding = generateEmbeddingFlag !== false && value.length > 0;
|
|
@@ -30,6 +30,19 @@ function makeEntryCacheKey(namespace, key) {
|
|
|
30
30
|
const safeKey = String(key).replace(/:/g, '_');
|
|
31
31
|
return `entry:${safeNs}:${safeKey}`;
|
|
32
32
|
}
|
|
33
|
+
/** Normalise `metadata` for the `metadata` TEXT column; `undefined` → `'{}'` (#1064). */
|
|
34
|
+
export function serialiseMetadata(metadata) {
|
|
35
|
+
if (metadata == null)
|
|
36
|
+
return '{}';
|
|
37
|
+
if (typeof metadata === 'string')
|
|
38
|
+
return metadata;
|
|
39
|
+
try {
|
|
40
|
+
return JSON.stringify(metadata);
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return '{}';
|
|
44
|
+
}
|
|
45
|
+
}
|
|
33
46
|
function bm25Score(queryTerms, docContent, avgDocLength, docCount, termDocFreqs) {
|
|
34
47
|
const k1 = 1.2;
|
|
35
48
|
const b = 0.75;
|
|
@@ -83,9 +96,29 @@ async function cacheInvalidate(registry, cacheKey) {
|
|
|
83
96
|
async function guardValidate(registry, operation, params, options) {
|
|
84
97
|
const guard = registry.get('mutationGuard');
|
|
85
98
|
if (!guard)
|
|
86
|
-
return { allowed: true };
|
|
99
|
+
return { allowed: true, commit: null };
|
|
87
100
|
const result = guard.validate({ operation, params, timestamp: Date.now(), bypassDedupe: options?.bypassDedupe });
|
|
88
|
-
|
|
101
|
+
const allowed = result?.allowed === true;
|
|
102
|
+
return {
|
|
103
|
+
allowed,
|
|
104
|
+
reason: result?.reason,
|
|
105
|
+
commit: allowed && result?.token ? { guard, token: result.token } : null,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Confirm a previously-validated mutation. Idempotent and null-safe so
|
|
110
|
+
* call sites can fire it from a `finally`-style success branch without
|
|
111
|
+
* extra null checking. After commit, the mutation lands in MutationGuard's
|
|
112
|
+
* dedupe buffer so subsequent identical writes within the window are
|
|
113
|
+
* correctly rejected.
|
|
114
|
+
*/
|
|
115
|
+
function guardCommit(handle) {
|
|
116
|
+
if (!handle)
|
|
117
|
+
return;
|
|
118
|
+
try {
|
|
119
|
+
handle.guard.commit(handle.token);
|
|
120
|
+
}
|
|
121
|
+
catch { /* commit failure is non-fatal — recording is observability-grade */ }
|
|
89
122
|
}
|
|
90
123
|
async function logAttestation(registry, operation, entryId, metadata) {
|
|
91
124
|
const attestation = registry.get('attestationLog');
|
|
@@ -198,12 +231,13 @@ export async function bridgeStoreEntry(options) {
|
|
|
198
231
|
tags, metadata, created_at, updated_at, expires_at, status
|
|
199
232
|
) VALUES (?, ?, ?, ?, 'semantic', ?, ?, ?, ?, ?, ?, ?, ?, 'active')`;
|
|
200
233
|
// sql.js Statement.run takes an array of bindings — not varargs.
|
|
234
|
+
const metadataJson = serialiseMetadata(options.metadata);
|
|
201
235
|
const stmt = ctx.db.prepare(insertSql);
|
|
202
236
|
stmt.run([
|
|
203
237
|
id, key, namespace, value,
|
|
204
238
|
embeddingJson, dimensions || null, model,
|
|
205
239
|
tags.length > 0 ? JSON.stringify(tags) : null,
|
|
206
|
-
|
|
240
|
+
metadataJson,
|
|
207
241
|
now, now,
|
|
208
242
|
ttl ? now + (ttl * 1000) : null,
|
|
209
243
|
]);
|
|
@@ -226,7 +260,16 @@ export async function bridgeStoreEntry(options) {
|
|
|
226
260
|
const cacheKey = makeEntryCacheKey(namespace, key);
|
|
227
261
|
let cached = true;
|
|
228
262
|
try {
|
|
229
|
-
|
|
263
|
+
// #1064 — include metadata in the cache value so a subsequent
|
|
264
|
+
// bridgeGetEntry cache-hit returns the same shape as a fresh disk read.
|
|
265
|
+
// Without this, chunk-row producers writing through the chokepoint would
|
|
266
|
+
// get `{}` back from cache and the full metadata from disk — exactly the
|
|
267
|
+
// divergence the cache is supposed to mask.
|
|
268
|
+
await cacheSet(registry, cacheKey, {
|
|
269
|
+
id, key, namespace, content: value,
|
|
270
|
+
embedding: embeddingJson,
|
|
271
|
+
metadata: metadataJson,
|
|
272
|
+
});
|
|
230
273
|
}
|
|
231
274
|
catch (err) {
|
|
232
275
|
cached = false;
|
|
@@ -249,6 +292,11 @@ export async function bridgeStoreEntry(options) {
|
|
|
249
292
|
logBridgeError('post-persist stats refresh failed', err);
|
|
250
293
|
}
|
|
251
294
|
}
|
|
295
|
+
// Commit the MutationGuard recording NOW that the row is durable on
|
|
296
|
+
// disk + cache + attestation log. Order: persist before commit so a
|
|
297
|
+
// SQLITE_BUSY mid-write doesn't leave a stale dedupe entry that would
|
|
298
|
+
// reject the withDb retry as a "duplicate" (#1098).
|
|
299
|
+
guardCommit(guardResult.commit);
|
|
252
300
|
return {
|
|
253
301
|
success: true,
|
|
254
302
|
id,
|
|
@@ -316,13 +364,14 @@ export async function bridgeStoreEntries(items, dbPath) {
|
|
|
316
364
|
embedding, embedding_dimensions, embedding_model,
|
|
317
365
|
tags, metadata, created_at, updated_at, expires_at, status
|
|
318
366
|
) VALUES (?, ?, ?, ?, 'semantic', ?, ?, ?, ?, ?, ?, ?, ?, 'active')`;
|
|
367
|
+
const metadataJson = serialiseMetadata(opts.metadata);
|
|
319
368
|
try {
|
|
320
369
|
const stmt = ctx.db.prepare(insertSql);
|
|
321
370
|
stmt.run([
|
|
322
371
|
id, key, namespace, value,
|
|
323
372
|
embeddingJson, dimensions || null, model,
|
|
324
373
|
tags.length > 0 ? JSON.stringify(tags) : null,
|
|
325
|
-
|
|
374
|
+
metadataJson,
|
|
326
375
|
now, now,
|
|
327
376
|
ttl ? now + (ttl * 1000) : null,
|
|
328
377
|
]);
|
|
@@ -337,7 +386,12 @@ export async function bridgeStoreEntries(items, dbPath) {
|
|
|
337
386
|
anyEmbedded = true;
|
|
338
387
|
deferredBookkeeping.push({
|
|
339
388
|
cacheKey: makeEntryCacheKey(namespace, key),
|
|
340
|
-
|
|
389
|
+
// #1064 — keep cache shape in sync with disk (see single-store path).
|
|
390
|
+
cacheValue: {
|
|
391
|
+
id, key, namespace, content: value,
|
|
392
|
+
embedding: embeddingJson,
|
|
393
|
+
metadata: metadataJson,
|
|
394
|
+
},
|
|
341
395
|
entryId: id,
|
|
342
396
|
entryKey: key,
|
|
343
397
|
namespace,
|
|
@@ -389,6 +443,11 @@ export async function bridgeStoreEntries(items, dbPath) {
|
|
|
389
443
|
logBridgeError('post-persist stats refresh failed', err);
|
|
390
444
|
}
|
|
391
445
|
}
|
|
446
|
+
// Commit the bulk-store mutation in the dedupe buffer (#1098). At least
|
|
447
|
+
// one row reached disk, which is sufficient to record the bulk op —
|
|
448
|
+
// partial-batch persist failure is already reflected per-item via the
|
|
449
|
+
// results array.
|
|
450
|
+
guardCommit(guardResult.commit);
|
|
392
451
|
return results;
|
|
393
452
|
});
|
|
394
453
|
}
|
|
@@ -663,6 +722,11 @@ export async function bridgeDeleteEntry(options) {
|
|
|
663
722
|
// Non-fatal — count is informational
|
|
664
723
|
}
|
|
665
724
|
refreshVectorStatsCache();
|
|
725
|
+
// Commit the delete mutation in the dedupe buffer (#1098). The row is
|
|
726
|
+
// gone from disk and the cache is invalidated, so this is the safe
|
|
727
|
+
// point to record — a SQLITE_BUSY mid-DELETE earlier would have caught
|
|
728
|
+
// in the try/catch above and never reached here.
|
|
729
|
+
guardCommit(guardResult.commit);
|
|
666
730
|
return {
|
|
667
731
|
success: true,
|
|
668
732
|
deleted: true,
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { EventEmitter } from 'node:events';
|
|
14
14
|
import * as path from 'node:path';
|
|
15
|
-
import {
|
|
15
|
+
import { openDaemonDatabase } from './daemon-backend.js';
|
|
16
16
|
import { CONTROLLER_SPECS } from './controller-specs.js';
|
|
17
17
|
import { errorDetail } from '../shared/utils/error-detail.js';
|
|
18
18
|
// ===== Initialization Levels =====
|
|
@@ -213,7 +213,12 @@ export class ControllerRegistry extends EventEmitter {
|
|
|
213
213
|
return;
|
|
214
214
|
}
|
|
215
215
|
}
|
|
216
|
-
|
|
216
|
+
// Phase 4 (#1083) — open via the node:sqlite-backed adapter (shape-
|
|
217
|
+
// compatible with sql.js Statement API). Eliminates the cross-process
|
|
218
|
+
// clobber between `bin/` writers (node:sqlite + WAL) and the daemon's
|
|
219
|
+
// bridge (was sql.js readFileSync). `config.wasmPath` is now unused —
|
|
220
|
+
// node:sqlite is built into Node 22+; Phase 5 removes the field.
|
|
221
|
+
const database = openDaemonDatabase(dbPath);
|
|
217
222
|
this.mofloDb = { database, close: async () => database.close() };
|
|
218
223
|
this.emit('mofloDb:initialized');
|
|
219
224
|
}
|
|
@@ -31,7 +31,11 @@ export class BatchOperations {
|
|
|
31
31
|
}
|
|
32
32
|
const ids = [];
|
|
33
33
|
const now = Date.now();
|
|
34
|
-
|
|
34
|
+
// BEGIN IMMEDIATE acquires RESERVED upfront — busy_handler is consulted
|
|
35
|
+
// for the lock acquisition, so concurrent writers wait out PRAGMA
|
|
36
|
+
// busy_timeout instead of fail-fasting on the SHARED→RESERVED upgrade
|
|
37
|
+
// (#1099: the deferred-BEGIN trap surfaced in #1098 CI).
|
|
38
|
+
this.db.run('BEGIN IMMEDIATE');
|
|
35
39
|
try {
|
|
36
40
|
const stmt = this.db.prepare(`INSERT INTO ${EPISODES_TABLE} (id, key, content, metadata, embedding, ts)
|
|
37
41
|
VALUES (?, ?, ?, ?, ?, ?)`);
|