moflo 4.10.0 → 4.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/lib/db-repair.mjs +358 -41
- package/bin/session-start-launcher.mjs +42 -6
- package/dist/src/cli/commands/doctor-checks-config.js +60 -0
- package/dist/src/cli/commands/doctor-fixes.js +57 -0
- package/dist/src/cli/commands/doctor-registry.js +10 -1
- package/dist/src/cli/memory/bridge-embedder.js +84 -3
- package/dist/src/cli/memory/memory-initializer.js +2 -2
- package/dist/src/cli/services/ephemeral-namespace-purge.js +15 -5
- package/dist/src/cli/services/memory-db-integrity-repair.js +119 -0
- package/dist/src/cli/version.js +1 -1
- package/package.json +2 -2
package/bin/lib/db-repair.mjs
CHANGED
|
@@ -1,32 +1,54 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Memory-DB integrity check +
|
|
2
|
+
* Memory-DB integrity check + tiered repair (#743, #1090-followup).
|
|
3
3
|
*
|
|
4
|
-
* The `.moflo/moflo.db` SQLite file
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* concurrent writes (
|
|
4
|
+
* The `.moflo/moflo.db` SQLite file picks up corruption in two distinct modes:
|
|
5
|
+
*
|
|
6
|
+
* 1. **Index drift** — `row N missing from sqlite_autoindex_memory_entries_1`.
|
|
7
|
+
* Row data is intact; only the unique-key b-tree is wrong. Trigger: sql.js's
|
|
8
|
+
* whole-file dump-on-flush racing with concurrent writes (#714, #743 —
|
|
9
|
+
* fixed for new installs by Phase 5 / #1084 which removed sql.js entirely).
|
|
10
|
+
* **REINDEX** rebuilds the index from canonical row data.
|
|
11
|
+
*
|
|
12
|
+
* 2. **Table b-tree corruption** — `Tree N page M cell K: Rowid X out of
|
|
13
|
+
* order`, where Tree N is a TABLE root page (not just an index). Row data
|
|
14
|
+
* is partly intact, but page ordering is broken. Triggers we've seen:
|
|
15
|
+
* - sql.js → node:sqlite migration: an old 4.9.x sql.js daemon flushes its
|
|
16
|
+
* full-file dump OVER a WAL frame that the new 4.10 backend has already
|
|
17
|
+
* written, leaving WAL referencing pages that no longer exist in main.
|
|
18
|
+
* - Concurrent multi-process writes when the daemon was disabled (#981).
|
|
19
|
+
* **REINDEX cannot fix this** — the table itself is broken. Recovery path:
|
|
20
|
+
* a) `VACUUM INTO` a fresh file (single-shot rebuild; fails fast if
|
|
21
|
+
* iteration hits an unreadable page),
|
|
22
|
+
* b) row-level salvage — chunked `SELECT rowid > ?` per table, catching
|
|
23
|
+
* per-chunk errors and skipping past corrupt page ranges,
|
|
24
|
+
* c) atomic swap with .corrupt.<TS> backup retained for forensics.
|
|
25
|
+
*
|
|
26
|
+
* 3. **Unrecoverable** — header damage, encrypted-by-malware, etc. We can't
|
|
27
|
+
* fix this; surface a clear failure and let the user decide between manual
|
|
28
|
+
* `flo memory rebuild-index` (destructive) and offline recovery tools.
|
|
9
29
|
*
|
|
10
30
|
* Symptoms when uncorrected:
|
|
11
31
|
* - `index-guidance.mjs` and `index-patterns.mjs` fail mid-write with
|
|
12
32
|
* `database disk image is malformed`, leaving partial state.
|
|
13
33
|
* - The ephemeral-namespace purge (#729) fails silently, so hive-mind /
|
|
14
34
|
* tasklist / epic-state / test-bridge-fix rows accumulate.
|
|
15
|
-
* - Vector counts in the statusline stay inflated
|
|
16
|
-
*
|
|
35
|
+
* - Vector counts in the statusline stay inflated.
|
|
36
|
+
* - Healer's deep checks throw with "database disk image is malformed",
|
|
37
|
+
* surfacing as the synthetic 'Check' failure (doctor.ts:214).
|
|
17
38
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* report; manual `flo memory rebuild-index` is the fallback.
|
|
22
|
-
*
|
|
23
|
-
* MUST run BEFORE any long-lived sql.js consumer (MCP server, daemon) opens
|
|
24
|
-
* the DB and BEFORE the embeddings migration / soft-delete purge / ephemeral
|
|
25
|
-
* purge — those all swallow corruption errors and silently no-op.
|
|
39
|
+
* MUST run BEFORE any long-lived consumer (MCP server, daemon) opens the DB
|
|
40
|
+
* and BEFORE the embeddings migration / soft-delete purge / ephemeral purge —
|
|
41
|
+
* those all swallow corruption errors and silently no-op.
|
|
26
42
|
*/
|
|
27
|
-
import { existsSync } from 'node:fs';
|
|
43
|
+
import { existsSync, renameSync, unlinkSync } from 'node:fs';
|
|
28
44
|
import { memoryDbPath } from './moflo-paths.mjs';
|
|
29
45
|
import { openBackend } from './get-backend.mjs';
|
|
46
|
+
import './suppress-sqlite-warning.mjs';
|
|
47
|
+
// Resolve node:sqlite once at module load — get-backend.mjs has already
|
|
48
|
+
// loaded it by this point, so the dynamic import is a cache hit. Avoids
|
|
49
|
+
// three independent `await import('node:sqlite')` calls inside the repair
|
|
50
|
+
// functions (style cleanup; was producing no functional difference).
|
|
51
|
+
const { DatabaseSync } = await import('node:sqlite');
|
|
30
52
|
|
|
31
53
|
function isOk(execResult) {
|
|
32
54
|
const rows = execResult?.[0]?.values ?? [];
|
|
@@ -38,42 +60,337 @@ function corruptionCount(execResult) {
|
|
|
38
60
|
}
|
|
39
61
|
|
|
40
62
|
/**
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
63
|
+
* Open `.moflo/moflo.db` raw via node:sqlite in readonly mode and run
|
|
64
|
+
* `PRAGMA integrity_check`. Bypasses {@link openBackend} because that path
|
|
65
|
+
* sets `journal_mode=WAL`, `busy_timeout`, and `synchronous=NORMAL` on every
|
|
66
|
+
* non-readonly open — those PRAGMAs can themselves throw against a corrupt
|
|
67
|
+
* file, and the pre-#1090 code path caught those throws and reported the DB
|
|
68
|
+
* as healthy. Readonly + no PRAGMAs = the probe always reaches the
|
|
69
|
+
* `integrity_check` call regardless of file health.
|
|
70
|
+
*
|
|
71
|
+
* Exported so the TS doctor check (`checkMemoryDbIntegrity` in
|
|
72
|
+
* `src/cli/commands/doctor-checks-config.ts`) can call into the same
|
|
73
|
+
* implementation instead of re-deriving the readonly-no-PRAGMAs probe.
|
|
74
|
+
*
|
|
75
|
+
* @param {string} dbPath
|
|
76
|
+
* @returns {Promise<{ ok: boolean, errors: number, openFailed?: boolean }>}
|
|
77
|
+
*/
|
|
78
|
+
export async function probeIntegrityRaw(dbPath) {
|
|
79
|
+
let db;
|
|
80
|
+
try {
|
|
81
|
+
db = new DatabaseSync(dbPath, { readOnly: true });
|
|
82
|
+
} catch {
|
|
83
|
+
return { ok: false, errors: 0, openFailed: true };
|
|
84
|
+
}
|
|
85
|
+
try {
|
|
86
|
+
const rows = db.prepare('PRAGMA integrity_check').all();
|
|
87
|
+
if (rows.length === 1 && String(rows[0]?.integrity_check ?? '').toLowerCase() === 'ok') {
|
|
88
|
+
return { ok: true, errors: 0 };
|
|
89
|
+
}
|
|
90
|
+
return { ok: false, errors: rows.length };
|
|
91
|
+
} catch {
|
|
92
|
+
return { ok: false, errors: 0, openFailed: true };
|
|
93
|
+
} finally {
|
|
94
|
+
try { db.close(); } catch { /* already-dead handle */ }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Tier-2 recovery: `VACUUM INTO` a fresh file. Single SQLite call that
|
|
100
|
+
* iterates every row of every table and writes them to a brand-new database
|
|
101
|
+
* with rebuilt indexes. Fails fast if iteration hits an unreadable page —
|
|
102
|
+
* caller falls back to row-level salvage.
|
|
103
|
+
*
|
|
104
|
+
* @param {string} srcPath
|
|
105
|
+
* @param {string} dstPath
|
|
106
|
+
* @returns {Promise<{ ok: boolean, error?: string }>}
|
|
107
|
+
*/
|
|
108
|
+
async function tryVacuumInto(srcPath, dstPath) {
|
|
109
|
+
try { if (existsSync(dstPath)) unlinkSync(dstPath); } catch { /* best effort */ }
|
|
110
|
+
let db;
|
|
111
|
+
try {
|
|
112
|
+
// Open writable (not readonly) — VACUUM needs to checkpoint WAL first.
|
|
113
|
+
// Skip our standard WAL pragmas (they can throw on corrupt files); SQLite
|
|
114
|
+
// applies its defaults which are sufficient for VACUUM INTO.
|
|
115
|
+
db = new DatabaseSync(srcPath);
|
|
116
|
+
} catch (err) {
|
|
117
|
+
return { ok: false, error: err?.message ?? 'open failed' };
|
|
118
|
+
}
|
|
119
|
+
try {
|
|
120
|
+
try { db.exec('PRAGMA wal_checkpoint(TRUNCATE)'); } catch { /* corrupt WAL ok */ }
|
|
121
|
+
db.exec(`VACUUM INTO '${dstPath.replace(/'/g, "''")}'`);
|
|
122
|
+
return { ok: true };
|
|
123
|
+
} catch (err) {
|
|
124
|
+
return { ok: false, error: err?.message ?? 'vacuum failed' };
|
|
125
|
+
} finally {
|
|
126
|
+
try { db.close(); } catch { /* */ }
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Tier-3 recovery: row-level salvage. Iterate each non-empty table in
|
|
132
|
+
* `rowid > ?` chunks; on any chunk-read failure, skip past that chunk's
|
|
133
|
+
* rowid range and continue. Per-table loss stats returned so the caller can
|
|
134
|
+
* surface what was preserved vs lost.
|
|
135
|
+
*
|
|
136
|
+
* Schema is copied verbatim from `sqlite_master.sql` so triggers/indexes/views
|
|
137
|
+
* are preserved alongside tables. `INSERT OR IGNORE` handles unique-key
|
|
138
|
+
* collisions from any duplicate-rowid corruption mode.
|
|
139
|
+
*
|
|
140
|
+
* @param {string} srcPath
|
|
141
|
+
* @param {string} dstPath
|
|
142
|
+
* @returns {Promise<{
|
|
143
|
+
* ok: boolean,
|
|
144
|
+
* error?: string,
|
|
145
|
+
* lossStats?: Record<string, { read: number, written: number, errors: number }>,
|
|
146
|
+
* }>}
|
|
147
|
+
*/
|
|
148
|
+
async function trySalvageRowByRow(srcPath, dstPath) {
|
|
149
|
+
try { if (existsSync(dstPath)) unlinkSync(dstPath); } catch { /* */ }
|
|
150
|
+
|
|
151
|
+
let src;
|
|
152
|
+
try {
|
|
153
|
+
src = new DatabaseSync(srcPath, { readOnly: true });
|
|
154
|
+
} catch (err) {
|
|
155
|
+
return { ok: false, error: err?.message ?? 'src open failed' };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Open dst defensively. If this throws (e.g. permissions, dst path in a
|
|
159
|
+
// dir we can't create, or a concurrent lock on dstPath), keep the
|
|
160
|
+
// "never throws" contract by returning the failure shape — otherwise the
|
|
161
|
+
// open exception would escape past `repairMemoryDbIfCorrupt` and block
|
|
162
|
+
// session start, which is the failure mode this whole module exists to
|
|
163
|
+
// prevent.
|
|
164
|
+
let dst;
|
|
165
|
+
try {
|
|
166
|
+
dst = new DatabaseSync(dstPath);
|
|
167
|
+
} catch (err) {
|
|
168
|
+
try { src.close(); } catch { /* */ }
|
|
169
|
+
return { ok: false, error: err?.message ?? 'dst open failed' };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const lossStats = {};
|
|
173
|
+
const CHUNK = 500;
|
|
174
|
+
|
|
175
|
+
try {
|
|
176
|
+
// Copy schema. Order matters: tables first (else indexes/triggers/views
|
|
177
|
+
// reference nonexistent tables), then everything else. sqlite_* objects
|
|
178
|
+
// (sqlite_sequence, sqlite_autoindex_*) are created implicitly by SQLite.
|
|
179
|
+
const schemaRows = src
|
|
180
|
+
.prepare(
|
|
181
|
+
"SELECT type, name, tbl_name, sql FROM sqlite_master " +
|
|
182
|
+
"WHERE sql IS NOT NULL ORDER BY CASE type " +
|
|
183
|
+
"WHEN 'table' THEN 1 WHEN 'index' THEN 2 WHEN 'view' THEN 3 ELSE 4 END",
|
|
184
|
+
)
|
|
185
|
+
.all();
|
|
186
|
+
for (const s of schemaRows) {
|
|
187
|
+
if (String(s.name).startsWith('sqlite_')) continue;
|
|
188
|
+
try { dst.exec(s.sql + ';'); } catch { /* malformed schema row — skip */ }
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Salvage rows table-by-table.
|
|
192
|
+
const tables = src
|
|
193
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
|
|
194
|
+
.all();
|
|
195
|
+
|
|
196
|
+
for (const t of tables) {
|
|
197
|
+
const name = String(t.name);
|
|
198
|
+
lossStats[name] = { read: 0, written: 0, errors: 0 };
|
|
199
|
+
|
|
200
|
+
const cols = src.prepare(`PRAGMA table_info('${name.replace(/'/g, "''")}')`).all();
|
|
201
|
+
if (cols.length === 0) continue;
|
|
202
|
+
const colList = cols.map((c) => '"' + String(c.name).replace(/"/g, '""') + '"').join(',');
|
|
203
|
+
const placeholders = cols.map(() => '?').join(',');
|
|
204
|
+
const insert = dst.prepare(
|
|
205
|
+
`INSERT OR IGNORE INTO "${name.replace(/"/g, '""')}" (${colList}) VALUES (${placeholders})`,
|
|
206
|
+
);
|
|
207
|
+
|
|
208
|
+
let lastRowid = 0;
|
|
209
|
+
let safetyCap = 0;
|
|
210
|
+
const MAX_ITERATIONS = 100_000;
|
|
211
|
+
|
|
212
|
+
while (safetyCap++ < MAX_ITERATIONS) {
|
|
213
|
+
let rows;
|
|
214
|
+
try {
|
|
215
|
+
rows = src
|
|
216
|
+
.prepare(
|
|
217
|
+
`SELECT rowid as __rid, * FROM "${name.replace(/"/g, '""')}" ` +
|
|
218
|
+
`WHERE rowid > ? ORDER BY rowid LIMIT ${CHUNK}`,
|
|
219
|
+
)
|
|
220
|
+
.all(lastRowid);
|
|
221
|
+
} catch {
|
|
222
|
+
lossStats[name].errors++;
|
|
223
|
+
lastRowid += CHUNK;
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
if (!rows || rows.length === 0) break;
|
|
227
|
+
lossStats[name].read += rows.length;
|
|
228
|
+
for (const r of rows) {
|
|
229
|
+
try {
|
|
230
|
+
insert.run(...cols.map((c) => r[c.name]));
|
|
231
|
+
lossStats[name].written++;
|
|
232
|
+
} catch {
|
|
233
|
+
lossStats[name].errors++;
|
|
234
|
+
}
|
|
235
|
+
lastRowid = Number(r.__rid);
|
|
236
|
+
}
|
|
237
|
+
if (rows.length < CHUNK) break;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Verify the recovered file. If integrity_check still fails, the
|
|
242
|
+
// salvage didn't actually produce a clean file — surface as failure
|
|
243
|
+
// (caller will keep the corrupted original in place).
|
|
244
|
+
const checkRows = dst.prepare('PRAGMA integrity_check').all();
|
|
245
|
+
const recoveredOk =
|
|
246
|
+
checkRows.length === 1 &&
|
|
247
|
+
String(checkRows[0]?.integrity_check ?? '').toLowerCase() === 'ok';
|
|
248
|
+
if (!recoveredOk) {
|
|
249
|
+
return { ok: false, error: 'recovered file failed integrity_check', lossStats };
|
|
250
|
+
}
|
|
251
|
+
return { ok: true, lossStats };
|
|
252
|
+
} catch (err) {
|
|
253
|
+
return { ok: false, error: err?.message ?? 'salvage failed' };
|
|
254
|
+
} finally {
|
|
255
|
+
try { src.close(); } catch { /* */ }
|
|
256
|
+
try { dst.close(); } catch { /* */ }
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Atomically swap a freshly recovered DB into the canonical path, keeping the
|
|
262
|
+
* corrupted original (+ its WAL/SHM sidecars if present) under `.corrupt.<TS>`
|
|
263
|
+
* suffixes for forensics. Caller must guarantee no live writer holds the
|
|
264
|
+
* canonical file open before invoking this — see `stopWritersBeforeRepair`
|
|
265
|
+
* for the daemon-coordinated entry point.
|
|
266
|
+
*
|
|
267
|
+
* @param {string} canonicalPath
|
|
268
|
+
* @param {string} recoveredPath
|
|
269
|
+
* @returns {{ ok: boolean, error?: string, corruptSuffix: string }}
|
|
270
|
+
*/
|
|
271
|
+
function atomicSwap(canonicalPath, recoveredPath) {
|
|
272
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-').replace(/Z$/, '');
|
|
273
|
+
const corruptSuffix = `.corrupt.${ts}`;
|
|
274
|
+
try {
|
|
275
|
+
if (existsSync(canonicalPath)) {
|
|
276
|
+
renameSync(canonicalPath, canonicalPath + corruptSuffix);
|
|
277
|
+
}
|
|
278
|
+
const walPath = canonicalPath + '-wal';
|
|
279
|
+
const shmPath = canonicalPath + '-shm';
|
|
280
|
+
if (existsSync(walPath)) {
|
|
281
|
+
try { renameSync(walPath, walPath + corruptSuffix); } catch { /* not always present */ }
|
|
282
|
+
}
|
|
283
|
+
if (existsSync(shmPath)) {
|
|
284
|
+
try { renameSync(shmPath, shmPath + corruptSuffix); } catch { /* not always present */ }
|
|
285
|
+
}
|
|
286
|
+
renameSync(recoveredPath, canonicalPath);
|
|
287
|
+
return { ok: true, corruptSuffix };
|
|
288
|
+
} catch (err) {
|
|
289
|
+
return { ok: false, error: err?.message ?? 'swap failed', corruptSuffix };
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Probe the memory DB for corruption and run a tiered repair if found:
|
|
295
|
+
*
|
|
296
|
+
* - Tier 1: `REINDEX` in place (index-only corruption — #743).
|
|
297
|
+
* - Tier 2: `VACUUM INTO` fresh file + atomic swap (table b-tree corruption).
|
|
298
|
+
* - Tier 3: row-level salvage + atomic swap (deep corruption with partial
|
|
299
|
+
* row loss).
|
|
300
|
+
*
|
|
301
|
+
* Returns a structured result:
|
|
302
|
+
* - `{ repaired: false, errors: 0 }` — healthy or absent.
|
|
303
|
+
* - `{ repaired: true, errors: N, tier: 'reindex' }` — Tier 1 worked.
|
|
304
|
+
* - `{ repaired: true, errors: N, tier: 'vacuum', corruptBackup }` — Tier 2.
|
|
305
|
+
* - `{ repaired: true, errors: N, tier: 'salvage', corruptBackup, lossStats }`
|
|
306
|
+
* — Tier 3 (partial row loss possible; see `lossStats`).
|
|
307
|
+
* - `{ repaired: false, errors: N, persistent: true }` — nothing worked;
|
|
308
|
+
* manual recovery needed.
|
|
47
309
|
*
|
|
48
310
|
* Never throws; any internal failure becomes `{ repaired: false, errors: 0 }`
|
|
49
311
|
* so a probe failure cannot block session start.
|
|
312
|
+
*
|
|
313
|
+
* @param {string} projectRoot
|
|
314
|
+
* @returns {Promise<{
|
|
315
|
+
* repaired: boolean,
|
|
316
|
+
* errors: number,
|
|
317
|
+
* tier?: 'reindex' | 'vacuum' | 'salvage',
|
|
318
|
+
* persistent?: boolean,
|
|
319
|
+
* corruptBackup?: string,
|
|
320
|
+
* lossStats?: Record<string, { read: number, written: number, errors: number }>,
|
|
321
|
+
* }>}
|
|
50
322
|
*/
|
|
51
323
|
export async function repairMemoryDbIfCorrupt(projectRoot) {
|
|
52
324
|
const dbPath = memoryDbPath(projectRoot);
|
|
53
325
|
if (!existsSync(dbPath)) return { repaired: false, errors: 0 };
|
|
54
326
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
327
|
+
// Step 1 — defensive readonly probe (cannot throw on WAL-setup errors
|
|
328
|
+
// against corrupt files). If the open itself fails, fall through to the
|
|
329
|
+
// openBackend path which has retry semantics for transient lock issues;
|
|
330
|
+
// truly unopenable files surface as persistent below.
|
|
331
|
+
const probe = await probeIntegrityRaw(dbPath);
|
|
332
|
+
if (probe.ok) return { repaired: false, errors: 0 };
|
|
58
333
|
|
|
59
|
-
|
|
60
|
-
if (isOk(before)) {
|
|
61
|
-
return { repaired: false, errors: 0 };
|
|
62
|
-
}
|
|
334
|
+
const errors = probe.errors;
|
|
63
335
|
|
|
64
|
-
|
|
65
|
-
|
|
336
|
+
// Step 2 — Tier 1: REINDEX via the existing backend path. Fast for the
|
|
337
|
+
// common index-drift mode and preserves the file in place.
|
|
338
|
+
if (!probe.openFailed) {
|
|
339
|
+
try {
|
|
340
|
+
const db = await openBackend(projectRoot, { create: false });
|
|
341
|
+
try {
|
|
342
|
+
db.run('REINDEX');
|
|
343
|
+
const after = db.exec('PRAGMA integrity_check');
|
|
344
|
+
if (isOk(after)) {
|
|
345
|
+
db.save();
|
|
346
|
+
return { repaired: true, errors, tier: 'reindex' };
|
|
347
|
+
}
|
|
348
|
+
} finally {
|
|
349
|
+
try { db.close(); } catch { /* */ }
|
|
350
|
+
}
|
|
351
|
+
} catch {
|
|
352
|
+
// REINDEX path failed (often because openBackend's WAL pragmas throw
|
|
353
|
+
// on a corrupt file). Fall through to deeper recovery.
|
|
354
|
+
}
|
|
355
|
+
}
|
|
66
356
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
357
|
+
// Step 3 — Tier 2: VACUUM INTO a fresh file.
|
|
358
|
+
const recoveredPath = dbPath + '.recovered';
|
|
359
|
+
const vacuum = await tryVacuumInto(dbPath, recoveredPath);
|
|
360
|
+
if (vacuum.ok) {
|
|
361
|
+
const recoveredProbe = await probeIntegrityRaw(recoveredPath);
|
|
362
|
+
if (recoveredProbe.ok) {
|
|
363
|
+
const swap = atomicSwap(dbPath, recoveredPath);
|
|
364
|
+
if (swap.ok) {
|
|
365
|
+
return {
|
|
366
|
+
repaired: true,
|
|
367
|
+
errors: errors || corruptionCount(recoveredProbe),
|
|
368
|
+
tier: 'vacuum',
|
|
369
|
+
corruptBackup: dbPath + swap.corruptSuffix,
|
|
370
|
+
};
|
|
371
|
+
}
|
|
70
372
|
}
|
|
373
|
+
try { unlinkSync(recoveredPath); } catch { /* */ }
|
|
374
|
+
}
|
|
71
375
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
376
|
+
// Step 4 — Tier 3: row-level salvage.
|
|
377
|
+
const salvage = await trySalvageRowByRow(dbPath, recoveredPath);
|
|
378
|
+
if (salvage.ok) {
|
|
379
|
+
const swap = atomicSwap(dbPath, recoveredPath);
|
|
380
|
+
if (swap.ok) {
|
|
381
|
+
return {
|
|
382
|
+
repaired: true,
|
|
383
|
+
errors,
|
|
384
|
+
tier: 'salvage',
|
|
385
|
+
corruptBackup: dbPath + swap.corruptSuffix,
|
|
386
|
+
lossStats: salvage.lossStats,
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
try { unlinkSync(recoveredPath); } catch { /* */ }
|
|
390
|
+
} else {
|
|
391
|
+
try { if (existsSync(recoveredPath)) unlinkSync(recoveredPath); } catch { /* */ }
|
|
78
392
|
}
|
|
393
|
+
|
|
394
|
+
// Step 5 — give up.
|
|
395
|
+
return { repaired: false, errors, persistent: true };
|
|
79
396
|
}
|
|
@@ -268,15 +268,51 @@ try {
|
|
|
268
268
|
try {
|
|
269
269
|
const repair = await repairMemoryDbIfCorrupt(projectRoot);
|
|
270
270
|
if (repair?.repaired) {
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
)
|
|
271
|
+
// Three recovery tiers, three messages. Tier surfaces what level of
|
|
272
|
+
// damage the DB had so the user (and any downstream telemetry) knows
|
|
273
|
+
// whether row data was lost. See bin/lib/db-repair.mjs for the cascade.
|
|
274
|
+
if (repair.tier === 'reindex') {
|
|
275
|
+
emitMutation(
|
|
276
|
+
'repaired memory db index',
|
|
277
|
+
`${plural(repair.errors, 'index error')} fixed via REINDEX`,
|
|
278
|
+
);
|
|
279
|
+
} else if (repair.tier === 'vacuum') {
|
|
280
|
+
emitMutation(
|
|
281
|
+
'rebuilt memory db',
|
|
282
|
+
`${plural(repair.errors, 'integrity violation')} fixed via VACUUM INTO; corrupt original kept at ${repair.corruptBackup ?? '.moflo/moflo.db.corrupt.*'}`,
|
|
283
|
+
);
|
|
284
|
+
} else if (repair.tier === 'salvage') {
|
|
285
|
+
// Row-level salvage may have dropped rows; summarise loss so the
|
|
286
|
+
// user sees what's gone before downstream consumers (indexer,
|
|
287
|
+
// embeddings) re-process the survivors.
|
|
288
|
+
let lossSummary = '';
|
|
289
|
+
if (repair.lossStats) {
|
|
290
|
+
const losses = Object.entries(repair.lossStats)
|
|
291
|
+
.map(([tbl, s]) => {
|
|
292
|
+
const lost = Math.max(0, s.read - s.written);
|
|
293
|
+
return lost > 0 ? `${tbl} ${s.written}/${s.read}` : null;
|
|
294
|
+
})
|
|
295
|
+
.filter(Boolean);
|
|
296
|
+
if (losses.length > 0) lossSummary = ` (rows preserved: ${losses.join(', ')})`;
|
|
297
|
+
}
|
|
298
|
+
emitMutation(
|
|
299
|
+
'salvaged memory db',
|
|
300
|
+
`${plural(repair.errors, 'integrity violation')} recovered via row-level salvage${lossSummary}; corrupt original kept at ${repair.corruptBackup ?? '.moflo/moflo.db.corrupt.*'}`,
|
|
301
|
+
);
|
|
302
|
+
} else {
|
|
303
|
+
// Older db-repair without a `tier` field — fall back to legacy text.
|
|
304
|
+
emitMutation(
|
|
305
|
+
'repaired memory db',
|
|
306
|
+
`${plural(repair.errors, 'integrity violation')} fixed`,
|
|
307
|
+
);
|
|
308
|
+
}
|
|
275
309
|
} else if (repair?.persistent) {
|
|
276
310
|
// Surface to stderr — Claude additionalContext + the user both see this.
|
|
277
|
-
//
|
|
311
|
+
// Every recovery tier exhausted; user options are destructive only.
|
|
278
312
|
process.stderr.write(
|
|
279
|
-
`moflo: memory db has ${plural(repair.errors, '
|
|
313
|
+
`moflo: memory db has ${plural(repair.errors, 'integrity violation')} ` +
|
|
314
|
+
`that REINDEX / VACUUM INTO / row-level salvage could not fix — ` +
|
|
315
|
+
`run 'flo memory rebuild-index' (destructive) or restore from backup\n`,
|
|
280
316
|
);
|
|
281
317
|
}
|
|
282
318
|
} catch {
|
|
@@ -8,6 +8,7 @@ import { join } from 'path';
|
|
|
8
8
|
import os from 'os';
|
|
9
9
|
import { getDaemonLockHolder } from '../services/daemon-lock.js';
|
|
10
10
|
import { legacyMemoryDbPath, memoryDbCandidatePaths, memoryDbPath, } from '../services/moflo-paths.js';
|
|
11
|
+
import { probeDbIntegrity } from '../services/memory-db-integrity-repair.js';
|
|
11
12
|
import { errorDetail } from '../shared/utils/error-detail.js';
|
|
12
13
|
export async function checkConfigFile() {
|
|
13
14
|
// JSON configs (parse-validated). LEGACY-CONFIG: `.claude-flow.json` and
|
|
@@ -131,6 +132,65 @@ export async function checkMemoryDatabase() {
|
|
|
131
132
|
}
|
|
132
133
|
return { name: 'Memory Database', status: 'warn', message: 'Not initialized', fix: 'claude-flow memory configure --backend hybrid' };
|
|
133
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Tier-1 corruption probe for `.moflo/moflo.db`. Runs `PRAGMA integrity_check`
|
|
137
|
+
* via a raw node:sqlite readonly handle — bypasses `openBackend` because that
|
|
138
|
+
* path sets WAL pragmas on open and those throw on deeply-corrupt files,
|
|
139
|
+
* masking the real failure as a generic "Check" error (doctor.ts:214).
|
|
140
|
+
*
|
|
141
|
+
* Owns the corruption signal so downstream checks (Embeddings, Semantic
|
|
142
|
+
* Quality, Memory Access Functional, etc.) don't end up doing it implicitly
|
|
143
|
+
* via their own swallow-all error paths. The companion fix in
|
|
144
|
+
* doctor-fixes.ts coordinates daemon stop + tiered repair via the JS-side
|
|
145
|
+
* `repairMemoryDbIfCorrupt` (bin/lib/db-repair.mjs).
|
|
146
|
+
*
|
|
147
|
+
* Status semantics:
|
|
148
|
+
* - `pass` — DB absent OR `integrity_check` returns 'ok'.
|
|
149
|
+
* - `fail` — corruption detected. `fix` field points at the healer's
|
|
150
|
+
* auto-recovery path (which runs REINDEX → VACUUM INTO → row-level
|
|
151
|
+
* salvage in order of escalation).
|
|
152
|
+
* - `warn` — probe itself crashed (rare; surfaces the diagnostic rather
|
|
153
|
+
* than masking it).
|
|
154
|
+
*/
|
|
155
|
+
export async function checkMemoryDbIntegrity(cwd = process.cwd()) {
|
|
156
|
+
const dbPath = memoryDbPath(cwd);
|
|
157
|
+
if (!existsSync(dbPath)) {
|
|
158
|
+
return { name: 'Memory DB Integrity', status: 'pass', message: 'DB absent (no integrity probe needed)' };
|
|
159
|
+
}
|
|
160
|
+
// Delegate to the single readonly-no-PRAGMAs probe in
|
|
161
|
+
// `bin/lib/db-repair.mjs` (via the TS service bridge). Avoids re-deriving
|
|
162
|
+
// the same DatabaseSync({ readOnly: true }) + integrity_check sequence in
|
|
163
|
+
// two places and keeps the "what counts as healthy" semantics in one file.
|
|
164
|
+
try {
|
|
165
|
+
const probe = await probeDbIntegrity(dbPath);
|
|
166
|
+
if (probe.ok) {
|
|
167
|
+
return { name: 'Memory DB Integrity', status: 'pass', message: 'PRAGMA integrity_check: ok' };
|
|
168
|
+
}
|
|
169
|
+
const message = probe.openFailed
|
|
170
|
+
? 'Unable to probe DB (readonly open failed — likely deep corruption)'
|
|
171
|
+
: `${probe.errors} integrity violation(s) detected`;
|
|
172
|
+
return {
|
|
173
|
+
name: 'Memory DB Integrity',
|
|
174
|
+
status: 'fail',
|
|
175
|
+
message,
|
|
176
|
+
fix: 'flo healer --fix -c memory-db-integrity',
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
catch (e) {
|
|
180
|
+
// The probe itself maps "readonly open failed" to `openFailed: true`
|
|
181
|
+
// and we surface that as `fail` above. Reaching the catch means the
|
|
182
|
+
// probe *module* couldn't be loaded — `findMofloPackageRoot()` returned
|
|
183
|
+
// null (broken install / wrong cwd) or the dynamic import threw. Both
|
|
184
|
+
// are first-class diagnostic failures — a broken install must not be
|
|
185
|
+
// silently downgraded to `warn` and hidden from the healer summary.
|
|
186
|
+
return {
|
|
187
|
+
name: 'Memory DB Integrity',
|
|
188
|
+
status: 'fail',
|
|
189
|
+
message: `Integrity probe unavailable: ${errorDetail(e)}`,
|
|
190
|
+
fix: 'flo healer --fix -c memory-db-integrity',
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
134
194
|
/**
|
|
135
195
|
* Standard MCP-config search paths: home (Claude Desktop on macOS/Linux),
|
|
136
196
|
* XDG config dir, project-local `.mcp.json`, and APPDATA on Windows.
|
|
@@ -243,6 +243,63 @@ export async function autoFixCheck(check) {
|
|
|
243
243
|
return false;
|
|
244
244
|
}
|
|
245
245
|
},
|
|
246
|
+
// Tiered recovery for `.moflo/moflo.db` corruption (REINDEX → VACUUM
|
|
247
|
+
// INTO → row-level salvage). The TS service stops the daemon
|
|
248
|
+
// automatically (cross-platform via `process.kill('SIGTERM')`) so the
|
|
249
|
+
// atomic swap doesn't race a live writer; we restart it via the
|
|
250
|
+
// existing `npx moflo daemon start` shorthand after. The MCP server,
|
|
251
|
+
// started by Claude Code outside our process tree, isn't stopped here —
|
|
252
|
+
// explicit user guidance covers that case at the end.
|
|
253
|
+
'Memory DB Integrity': async () => {
|
|
254
|
+
try {
|
|
255
|
+
const { repairMemoryDbIntegrity } = await import('../services/memory-db-integrity-repair.js');
|
|
256
|
+
const result = await repairMemoryDbIntegrity(process.cwd());
|
|
257
|
+
if (result.repaired) {
|
|
258
|
+
const tierLabel = result.tier === 'reindex' ? 'REINDEX (index rebuild)'
|
|
259
|
+
: result.tier === 'vacuum' ? 'VACUUM INTO (fresh-file rebuild)'
|
|
260
|
+
: result.tier === 'salvage' ? 'row-level salvage'
|
|
261
|
+
: 'repaired';
|
|
262
|
+
output.writeln(output.dim(` Recovered via ${tierLabel}.`));
|
|
263
|
+
if (result.corruptBackup) {
|
|
264
|
+
output.writeln(output.dim(` Pre-repair backup retained: ${result.corruptBackup}`));
|
|
265
|
+
}
|
|
266
|
+
if (result.lossStats) {
|
|
267
|
+
for (const [tbl, s] of Object.entries(result.lossStats)) {
|
|
268
|
+
if (s.read > 0) {
|
|
269
|
+
const lost = Math.max(0, s.read - s.written);
|
|
270
|
+
if (lost > 0) {
|
|
271
|
+
output.writeln(output.warning(` ${tbl}: ${s.written}/${s.read} rows preserved (lost ${lost} across ${s.errors} unreadable chunk(s))`));
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
output.writeln(output.dim(' Embeddings for lost rows will be regenerated on next index pass — run `npx moflo embeddings init` to force.'));
|
|
276
|
+
}
|
|
277
|
+
// Restart the daemon if we stopped it. The launcher's own
|
|
278
|
+
// section-4 spawn handles this on next session-start, but a
|
|
279
|
+
// mid-session healer call shouldn't leave the daemon down.
|
|
280
|
+
if (result.daemonStopped) {
|
|
281
|
+
output.writeln(output.dim(' Restarting daemon...'));
|
|
282
|
+
await runFixCommand('npx moflo daemon start');
|
|
283
|
+
}
|
|
284
|
+
// Cross-platform note for the MCP server (out-of-tree, can't
|
|
285
|
+
// SIGTERM). On Windows the swap would have failed if MCP was
|
|
286
|
+
// holding the file; on POSIX the swap succeeds but MCP keeps
|
|
287
|
+
// reading the stale inode until restart. Either way: restart
|
|
288
|
+
// Claude Code to fully apply.
|
|
289
|
+
output.writeln(output.dim(' Restart Claude Code so the MCP server re-opens the recovered DB.'));
|
|
290
|
+
return true;
|
|
291
|
+
}
|
|
292
|
+
if (result.persistent) {
|
|
293
|
+
output.writeln(output.warning(' Corruption survived every recovery tier. Manual options: ' +
|
|
294
|
+
'`npx moflo memory rebuild-index` (destructive) or restore from a known-good backup.'));
|
|
295
|
+
}
|
|
296
|
+
return false;
|
|
297
|
+
}
|
|
298
|
+
catch (e) {
|
|
299
|
+
output.writeln(output.warning(` Repair failed: ${errorDetail(e)}`));
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
},
|
|
246
303
|
'Status Line': async () => {
|
|
247
304
|
const settingsPath = join(process.cwd(), '.claude', 'settings.json');
|
|
248
305
|
if (!existsSync(settingsPath))
|
|
@@ -12,7 +12,7 @@ import { checkWritersAudit } from './doctor-checks-writers-audit.js';
|
|
|
12
12
|
import { checkSwarmFunctional, checkHiveMindFunctional, } from './doctor-checks-swarm.js';
|
|
13
13
|
import { checkMemoryAccessFunctional } from './doctor-checks-memory-access.js';
|
|
14
14
|
import { checkBuildTools, checkClaudeCode, checkDiskSpace, checkGit, checkGitRepo, checkNodeVersion, checkNpmVersion, } from './doctor-checks-runtime.js';
|
|
15
|
-
import { checkConfigFile, checkDaemonStatus, checkDaemonWriteRouting, checkMcpServers, checkMemoryDatabase, checkMofloYamlCompliance, checkStatusLine, checkTestDirs, } from './doctor-checks-config.js';
|
|
15
|
+
import { checkConfigFile, checkDaemonStatus, checkDaemonWriteRouting, checkMcpServers, checkMemoryDatabase, checkMemoryDbIntegrity, checkMofloYamlCompliance, checkStatusLine, checkTestDirs, } from './doctor-checks-config.js';
|
|
16
16
|
import { checkSpellEngine, checkSandboxTier } from './doctor-checks-platform.js';
|
|
17
17
|
import { checkEmbeddings, checkSemanticQuality, } from './doctor-checks-memory.js';
|
|
18
18
|
import { checkIntelligence } from './doctor-checks-intelligence.js';
|
|
@@ -40,6 +40,12 @@ export const allChecks = [
|
|
|
40
40
|
checkDaemonWriteRouting,
|
|
41
41
|
checkWritersAudit,
|
|
42
42
|
checkMemoryDatabase,
|
|
43
|
+
// Owns the corruption signal so downstream checks (Embeddings, Semantic
|
|
44
|
+
// Quality, Memory Access Functional) don't surface it as the synthetic
|
|
45
|
+
// "Check" failure (doctor.ts:214). MUST run after checkMemoryDatabase
|
|
46
|
+
// (which confirms the file exists) and before any check that opens the
|
|
47
|
+
// DB via openBackend.
|
|
48
|
+
checkMemoryDbIntegrity,
|
|
43
49
|
checkEmbeddings,
|
|
44
50
|
checkEmbeddingHygiene,
|
|
45
51
|
checkEmbeddingCoverageTruth,
|
|
@@ -91,6 +97,9 @@ export const componentMap = {
|
|
|
91
97
|
'writers-audit': checkWritersAudit,
|
|
92
98
|
'writers': checkWritersAudit,
|
|
93
99
|
'memory': checkMemoryDatabase,
|
|
100
|
+
'memory-db-integrity': checkMemoryDbIntegrity,
|
|
101
|
+
'integrity': checkMemoryDbIntegrity,
|
|
102
|
+
'memory-integrity': checkMemoryDbIntegrity,
|
|
94
103
|
'embeddings': checkEmbeddings,
|
|
95
104
|
'embedding-hygiene': checkEmbeddingHygiene,
|
|
96
105
|
'embedding-coverage': checkEmbeddingCoverageTruth,
|
|
@@ -54,9 +54,14 @@ export const EMBEDDING_MODEL_LEGACY_DEFAULT = 'local';
|
|
|
54
54
|
* - `epic-state` — Epic progress (epic-N, story-M) written by commands/epic.ts
|
|
55
55
|
* - `test-bridge-fix` — Single 2026-04-23 row left over from a one-off test
|
|
56
56
|
*
|
|
57
|
+
* Membership is also extended by {@link EPHEMERAL_NAMESPACE_PREFIXES} for
|
|
58
|
+
* dynamic-name namespaces (e.g. `doctor-memprobe-<persona>`). Most callers
|
|
59
|
+
* should use {@link isEphemeralNamespace} which checks both sets.
|
|
60
|
+
*
|
|
57
61
|
* See story #729 for the source-trace and rationale. The session-start
|
|
58
|
-
* launcher only purges {@link PURGE_ON_SESSION_START_NAMESPACES}
|
|
59
|
-
* subset that *excludes*
|
|
62
|
+
* launcher only purges {@link PURGE_ON_SESSION_START_NAMESPACES} +
|
|
63
|
+
* {@link PURGE_ON_SESSION_START_PREFIXES} — a strict subset that *excludes*
|
|
64
|
+
* `tasklist`, because the dashboard's Flo Runs tab
|
|
60
65
|
* (`daemon-dashboard.ts handleSpells`) reads tasklist; purging it on every
|
|
61
66
|
* session would empty the tab between sessions (#968).
|
|
62
67
|
*/
|
|
@@ -66,6 +71,26 @@ export const EPHEMERAL_NAMESPACES = new Set([
|
|
|
66
71
|
'epic-state',
|
|
67
72
|
'test-bridge-fix',
|
|
68
73
|
]);
|
|
74
|
+
/**
|
|
75
|
+
* Prefix patterns that extend {@link EPHEMERAL_NAMESPACES} for namespaces
|
|
76
|
+
* whose suffix is generated at runtime. Any namespace beginning with one of
|
|
77
|
+
* these prefixes is treated as ephemeral (skips embedding).
|
|
78
|
+
*
|
|
79
|
+
* NOTE — design distinction from {@link PURGE_ON_SESSION_START_PREFIXES}:
|
|
80
|
+
* a namespace can be auto-purgeable WITHOUT being skip-embed. For example,
|
|
81
|
+
* `doctor-memprobe-<persona>` rows are intentionally purged on every
|
|
82
|
+
* session start (the cleanup is best-effort and accumulates across
|
|
83
|
+
* sessions) but MUST still get embeddings — the probe's whole purpose is
|
|
84
|
+
* to validate the embedder is wired (`Memory Access Functional` check
|
|
85
|
+
* asserts `hasEmbedding=true`). Skipping embedding for those rows breaks
|
|
86
|
+
* the doctor check. Put a prefix here only when both properties apply.
|
|
87
|
+
*
|
|
88
|
+
* Currently empty — there's no namespace today that needs both skip-embed
|
|
89
|
+
* AND prefix-match. Kept as an explicit export so the bridge embedder's
|
|
90
|
+
* call site is uniform and future skip-embed prefixes have an obvious
|
|
91
|
+
* home.
|
|
92
|
+
*/
|
|
93
|
+
export const EPHEMERAL_NAMESPACE_PREFIXES = new Set([]);
|
|
69
94
|
/**
|
|
70
95
|
* Subset of {@link EPHEMERAL_NAMESPACES} that the session-start launcher
|
|
71
96
|
* hard-purges via `services/ephemeral-namespace-purge.ts`. Excludes
|
|
@@ -77,6 +102,62 @@ export const PURGE_ON_SESSION_START_NAMESPACES = new Set([
|
|
|
77
102
|
'epic-state',
|
|
78
103
|
'test-bridge-fix',
|
|
79
104
|
]);
|
|
105
|
+
/**
|
|
106
|
+
* Prefix patterns purged alongside {@link PURGE_ON_SESSION_START_NAMESPACES}
|
|
107
|
+
* by the session-start launcher.
|
|
108
|
+
*
|
|
109
|
+
* Members:
|
|
110
|
+
* - `doctor-memprobe-` — `flo healer`'s `Memory Access` round-trip probe
|
|
111
|
+
* writes a sentinel into `doctor-memprobe-<persona>` (persona is one of
|
|
112
|
+
* `subagent`, `swarm-agent`, `hive-mind-worker`, plus test variants).
|
|
113
|
+
* - `doctor-neighbors-` — `flo healer`'s neighbor-traversal probe creates a
|
|
114
|
+
* fresh `doctor-neighbors-<timestamp>` namespace for each run and seeds
|
|
115
|
+
* three chunk rows. Unlike memprobe (fixed personas), every healer run
|
|
116
|
+
* spawns a NEW namespace, so namespace pollution grows linearly with
|
|
117
|
+
* healer-run count if cleanup races fail.
|
|
118
|
+
*
|
|
119
|
+
* Both probes register an explicit cleanup via `safeDelete`, but the
|
|
120
|
+
* cleanup is best-effort and silently swallows failures (e.g. daemon
|
|
121
|
+
* races, MCP transport errors) — so rows accumulate across consumer
|
|
122
|
+
* sessions. Auto-purging matches the pattern for
|
|
123
|
+
* `hive-mind`/`epic-state`/`test-bridge-fix`. These rows MUST still get
|
|
124
|
+
* embeddings (see {@link EPHEMERAL_NAMESPACE_PREFIXES} for why) — only
|
|
125
|
+
* their persistence across sessions is curtailed.
|
|
126
|
+
*/
|
|
127
|
+
export const PURGE_ON_SESSION_START_PREFIXES = new Set([
|
|
128
|
+
'doctor-memprobe-',
|
|
129
|
+
'doctor-neighbors-',
|
|
130
|
+
]);
|
|
131
|
+
/**
|
|
132
|
+
* Return `true` if a namespace is ephemeral — either an exact member of
|
|
133
|
+
* {@link EPHEMERAL_NAMESPACES} or one whose name begins with a prefix in
|
|
134
|
+
* {@link EPHEMERAL_NAMESPACE_PREFIXES}. Callers checking embedding-skip
|
|
135
|
+
* behavior should use this helper rather than `.has()` on the Set directly.
|
|
136
|
+
*/
|
|
137
|
+
export function isEphemeralNamespace(namespace) {
|
|
138
|
+
if (EPHEMERAL_NAMESPACES.has(namespace))
|
|
139
|
+
return true;
|
|
140
|
+
for (const prefix of EPHEMERAL_NAMESPACE_PREFIXES) {
|
|
141
|
+
if (namespace.startsWith(prefix))
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Return `true` if a namespace should be hard-purged on session start —
|
|
148
|
+
* either an exact member of {@link PURGE_ON_SESSION_START_NAMESPACES} or one
|
|
149
|
+
* whose name begins with a prefix in
|
|
150
|
+
* {@link PURGE_ON_SESSION_START_PREFIXES}.
|
|
151
|
+
*/
|
|
152
|
+
export function shouldPurgeOnSessionStart(namespace) {
|
|
153
|
+
if (PURGE_ON_SESSION_START_NAMESPACES.has(namespace))
|
|
154
|
+
return true;
|
|
155
|
+
for (const prefix of PURGE_ON_SESSION_START_PREFIXES) {
|
|
156
|
+
if (namespace.startsWith(prefix))
|
|
157
|
+
return true;
|
|
158
|
+
}
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
80
161
|
/**
|
|
81
162
|
* Maximum number of `tasklist` rows kept across session restarts. The
|
|
82
163
|
* session-start retention pass deletes oldest rows beyond this cap, so the
|
|
@@ -140,7 +221,7 @@ export async function resolveBridgeEmbedding(value, precomputed, generateEmbeddi
|
|
|
140
221
|
// Ephemeral namespaces (run-tracking, never user knowledge) skip embeddings
|
|
141
222
|
// unconditionally — even precomputed vectors are dropped. Result row has
|
|
142
223
|
// `embedding IS NULL` and `embedding_model IS NULL`. See #729.
|
|
143
|
-
if (namespace &&
|
|
224
|
+
if (namespace && isEphemeralNamespace(namespace)) {
|
|
144
225
|
return { ok: true, json: null, dimensions: 0, model: null };
|
|
145
226
|
}
|
|
146
227
|
const wantsEmbedding = generateEmbeddingFlag !== false && value.length > 0;
|
|
@@ -20,7 +20,7 @@ import * as path from 'path';
|
|
|
20
20
|
import { formatEmbeddingError } from './embedding-errors.js';
|
|
21
21
|
import { HnswLite } from './hnsw-lite.js';
|
|
22
22
|
import { tryLoadHnswSidecar } from './hnsw-persistence.js';
|
|
23
|
-
import { EMBEDDING_MODEL_OPT_OUT,
|
|
23
|
+
import { EMBEDDING_MODEL_OPT_OUT, getBridgeEmbedder, isEphemeralNamespace } from './bridge-embedder.js';
|
|
24
24
|
import { parseEmbeddingJson, toFloat32 } from './controllers/_shared.js';
|
|
25
25
|
import { writeVectorStatsJson } from './bridge-core.js';
|
|
26
26
|
import { serialiseMetadata } from './bridge-entries.js';
|
|
@@ -1619,7 +1619,7 @@ export async function storeEntry(options) {
|
|
|
1619
1619
|
let embeddingJson = null;
|
|
1620
1620
|
let embeddingDimensions = null;
|
|
1621
1621
|
let embeddingModel = EMBEDDING_MODEL_OPT_OUT;
|
|
1622
|
-
const isEphemeralNs =
|
|
1622
|
+
const isEphemeralNs = isEphemeralNamespace(namespace);
|
|
1623
1623
|
if (isEphemeralNs) {
|
|
1624
1624
|
embeddingModel = null;
|
|
1625
1625
|
}
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
* @module cli/services/ephemeral-namespace-purge
|
|
28
28
|
*/
|
|
29
29
|
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
30
|
-
import { PURGE_ON_SESSION_START_NAMESPACES, TASKLIST_RETENTION_CAP, } from '../memory/bridge-embedder.js';
|
|
30
|
+
import { PURGE_ON_SESSION_START_NAMESPACES, PURGE_ON_SESSION_START_PREFIXES, TASKLIST_RETENTION_CAP, } from '../memory/bridge-embedder.js';
|
|
31
31
|
import { memoryDbPath } from './moflo-paths.js';
|
|
32
32
|
import { openDaemonDatabase } from '../memory/daemon-backend.js';
|
|
33
33
|
/**
|
|
@@ -56,18 +56,28 @@ export async function purgeEphemeralNamespaces(options = {}) {
|
|
|
56
56
|
// Single COUNT pass to gate both DELETEs — a clean DB is the steady
|
|
57
57
|
// state and we don't want two no-op DELETEs (with their query-planner
|
|
58
58
|
// overhead) on every session start.
|
|
59
|
+
//
|
|
60
|
+
// Match shape: exact namespace IN (...) OR namespace LIKE 'prefix-%'.
|
|
61
|
+
// The prefix clause covers runtime-suffixed namespaces like
|
|
62
|
+
// `doctor-memprobe-<persona>` whose set of suffixes isn't known upfront.
|
|
59
63
|
const namespaces = Array.from(PURGE_ON_SESSION_START_NAMESPACES);
|
|
64
|
+
const prefixes = Array.from(PURGE_ON_SESSION_START_PREFIXES);
|
|
60
65
|
const cap = options.tasklistRetentionCap ?? TASKLIST_RETENTION_CAP;
|
|
61
|
-
const
|
|
66
|
+
const exactClause = namespaces.length
|
|
67
|
+
? `namespace IN (${namespaces.map(() => '?').join(', ')})`
|
|
68
|
+
: '0';
|
|
69
|
+
const prefixClause = prefixes.map(() => 'namespace LIKE ?').join(' OR ');
|
|
70
|
+
const purgeWhere = prefixClause ? `(${exactClause} OR ${prefixClause})` : exactClause;
|
|
71
|
+
const purgeBindings = [...namespaces, ...prefixes.map((p) => `${p}%`)];
|
|
62
72
|
const countRows = db.exec(`SELECT
|
|
63
|
-
(SELECT COUNT(*) FROM memory_entries WHERE
|
|
64
|
-
(SELECT COUNT(*) FROM memory_entries WHERE namespace = 'tasklist') AS tasklistTotal`,
|
|
73
|
+
(SELECT COUNT(*) FROM memory_entries WHERE ${purgeWhere}) AS purgeable,
|
|
74
|
+
(SELECT COUNT(*) FROM memory_entries WHERE namespace = 'tasklist') AS tasklistTotal`, purgeBindings);
|
|
65
75
|
const counts = countRows[0]?.values?.[0] ?? [0, 0];
|
|
66
76
|
const purgeable = Number(counts[0] ?? 0);
|
|
67
77
|
const tasklistTotal = Number(counts[1] ?? 0);
|
|
68
78
|
let purged = 0;
|
|
69
79
|
if (purgeable > 0) {
|
|
70
|
-
db.run(`DELETE FROM memory_entries WHERE
|
|
80
|
+
db.run(`DELETE FROM memory_entries WHERE ${purgeWhere}`, purgeBindings);
|
|
71
81
|
purged = db.getRowsModified?.() ?? 0;
|
|
72
82
|
}
|
|
73
83
|
let trimmed = 0;
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TS bridge for `flo healer --fix -c memory-db-integrity` and any other
|
|
3
|
+
* caller that wants the tiered repair (REINDEX → VACUUM INTO → row-level
|
|
4
|
+
* salvage) implemented in {@link
|
|
5
|
+
* "../../../bin/lib/db-repair.mjs".repairMemoryDbIfCorrupt} but with the
|
|
6
|
+
* caller-side daemon coordination that the launcher path gets for free.
|
|
7
|
+
*
|
|
8
|
+
* The launcher (bin/session-start-launcher.mjs § 0c) runs the same repair
|
|
9
|
+
* at session start after the daemon is already stopped. A mid-session
|
|
10
|
+
* healer call needs to stop the daemon itself first — a live writer would
|
|
11
|
+
* race the atomic swap on Windows (EBUSY on `renameSync`) and leak
|
|
12
|
+
* corruption back through stale POSIX inodes elsewhere.
|
|
13
|
+
*
|
|
14
|
+
* Cross-platform notes:
|
|
15
|
+
* - `process.kill(pid, 'SIGTERM')` maps to `TerminateProcess` on Windows
|
|
16
|
+
* (Node maps every signal name to immediate termination on win32);
|
|
17
|
+
* behaves like POSIX SIGTERM on Linux/macOS. Either way the daemon
|
|
18
|
+
* exits before we touch the DB file.
|
|
19
|
+
* - Path resolution uses `import.meta.url` so dist/ and bin/ stay
|
|
20
|
+
* siblings whether moflo is running from a dogfood checkout or from a
|
|
21
|
+
* consumer's `node_modules/moflo/` install.
|
|
22
|
+
* - The MCP server (spawned by Claude Code per `.mcp.json`, not by moflo)
|
|
23
|
+
* is out of our process tree and cannot be stopped here. We surface
|
|
24
|
+
* explicit guidance to restart Claude Code in the caller's UX.
|
|
25
|
+
*/
|
|
26
|
+
import { existsSync, unlinkSync } from 'node:fs';
|
|
27
|
+
import { join, resolve } from 'node:path';
|
|
28
|
+
import { pathToFileURL } from 'node:url';
|
|
29
|
+
import { getDaemonLockHolder, getDaemonLockPayload } from './daemon-lock.js';
|
|
30
|
+
import { findMofloPackageRoot } from './moflo-require.js';
|
|
31
|
+
async function loadJsDbRepairModule() {
|
|
32
|
+
// Resolve the JS module via the moflo package root walk so the path
|
|
33
|
+
// works identically in three contexts:
|
|
34
|
+
// - Dogfood TS source (vitest): walks up from the .ts location to the
|
|
35
|
+
// repo's package.json → joins `bin/lib/db-repair.mjs`
|
|
36
|
+
// - Compiled dist (CLI runtime): walks up from dist/src/cli/services/
|
|
37
|
+
// to package root → joins `bin/lib/db-repair.mjs`
|
|
38
|
+
// - Consumer install: walks up from
|
|
39
|
+
// node_modules/moflo/dist/src/cli/services/ to
|
|
40
|
+
// node_modules/moflo/ → joins `bin/lib/db-repair.mjs`
|
|
41
|
+
// The previous `new URL('../../../../bin/lib/...', import.meta.url)` only
|
|
42
|
+
// worked in the dist context — source-tree depth is one level shallower
|
|
43
|
+
// so vitest hit "Cannot find module" on the wrong path.
|
|
44
|
+
const root = findMofloPackageRoot();
|
|
45
|
+
if (!root) {
|
|
46
|
+
throw new Error('moflo package root not found — cannot locate bin/lib/db-repair.mjs');
|
|
47
|
+
}
|
|
48
|
+
const repairPath = join(root, 'bin', 'lib', 'db-repair.mjs');
|
|
49
|
+
const repairUrl = pathToFileURL(repairPath).href;
|
|
50
|
+
return (await import(repairUrl));
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Probe `.moflo/moflo.db` for corruption without WAL pragmas — the readonly
|
|
54
|
+
* raw-DatabaseSync open that bypasses the openBackend code path which itself
|
|
55
|
+
* throws on corrupt files (pre-#1090's silent-"healthy"-reporting bug).
|
|
56
|
+
*
|
|
57
|
+
* Single source of truth: delegates to {@link
|
|
58
|
+
* "../../../bin/lib/db-repair.mjs".probeIntegrityRaw}. Callers in the TS tree
|
|
59
|
+
* (currently `checkMemoryDbIntegrity` doctor check) should use this rather
|
|
60
|
+
* than re-deriving the readonly+no-PRAGMAs probe so the implementation
|
|
61
|
+
* stays in one place.
|
|
62
|
+
*/
|
|
63
|
+
export async function probeDbIntegrity(dbPath) {
|
|
64
|
+
const mod = await loadJsDbRepairModule();
|
|
65
|
+
return mod.probeIntegrityRaw(dbPath);
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Send a SIGTERM-equivalent to the daemon PID and clear the lockfile.
|
|
69
|
+
* Returns true if a live daemon was actually stopped. Cross-platform:
|
|
70
|
+
* `process.kill` accepts the signal name on all platforms; Node treats it
|
|
71
|
+
* as an immediate terminate on Windows.
|
|
72
|
+
*/
|
|
73
|
+
function stopDaemon(projectRoot) {
|
|
74
|
+
const payload = getDaemonLockPayload(projectRoot);
|
|
75
|
+
if (!payload?.pid || payload.pid <= 0)
|
|
76
|
+
return false;
|
|
77
|
+
try {
|
|
78
|
+
process.kill(payload.pid, 'SIGTERM');
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
// ESRCH (already dead) or EPERM — treat both as "nothing to stop".
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
const lockFile = join(projectRoot, '.moflo', 'daemon.lock');
|
|
85
|
+
try {
|
|
86
|
+
if (existsSync(lockFile))
|
|
87
|
+
unlinkSync(lockFile);
|
|
88
|
+
}
|
|
89
|
+
catch { /* */ }
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Run the tiered repair against the project's `.moflo/moflo.db`.
|
|
94
|
+
*
|
|
95
|
+
* Default behavior is to stop the daemon if alive (cross-platform via
|
|
96
|
+
* `process.kill('SIGTERM')`) so the atomic swap doesn't race a live writer.
|
|
97
|
+
* Pass `stopDaemonFirst: false` to suppress that — the launcher path uses
|
|
98
|
+
* this because its own daemon-stop already ran before § 0c.
|
|
99
|
+
*
|
|
100
|
+
* Never throws; any internal error surfaces as
|
|
101
|
+
* `{ repaired: false, errors: 0, persistent: true }`.
|
|
102
|
+
*/
|
|
103
|
+
export async function repairMemoryDbIntegrity(projectRoot = process.cwd(), options = {}) {
|
|
104
|
+
const root = resolve(projectRoot);
|
|
105
|
+
const stopFirst = options.stopDaemonFirst !== false;
|
|
106
|
+
let daemonStopped = false;
|
|
107
|
+
if (stopFirst && getDaemonLockHolder(root) !== null) {
|
|
108
|
+
daemonStopped = stopDaemon(root);
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
const mod = await loadJsDbRepairModule();
|
|
112
|
+
const result = await mod.repairMemoryDbIfCorrupt(root);
|
|
113
|
+
return { ...result, daemonStopped };
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return { repaired: false, errors: 0, persistent: true, daemonStopped };
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
//# sourceMappingURL=memory-db-integrity-repair.js.map
|
package/dist/src/cli/version.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "moflo",
|
|
3
|
-
"version": "4.10.
|
|
3
|
+
"version": "4.10.1",
|
|
4
4
|
"description": "MoFlo — AI agent orchestration for Claude Code. A standalone, opinionated toolkit with semantic memory, learned routing, gates, spells, and the /flo issue-execution skill.",
|
|
5
5
|
"main": "dist/src/cli/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -95,7 +95,7 @@
|
|
|
95
95
|
"@typescript-eslint/eslint-plugin": "^7.18.0",
|
|
96
96
|
"@typescript-eslint/parser": "^7.18.0",
|
|
97
97
|
"eslint": "^8.0.0",
|
|
98
|
-
"moflo": "^4.
|
|
98
|
+
"moflo": "^4.10.0",
|
|
99
99
|
"tsx": "^4.21.0",
|
|
100
100
|
"typescript": "^5.9.3",
|
|
101
101
|
"vitest": "^4.0.0"
|