moflo 4.10.0 → 4.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/healer/SKILL.md +3 -1
- package/bin/lib/db-repair.mjs +358 -41
- package/bin/session-start-launcher.mjs +42 -6
- package/dist/src/cli/commands/doctor-checks-config.js +60 -0
- package/dist/src/cli/commands/doctor-checks-memory-access.js +27 -1
- package/dist/src/cli/commands/doctor-embedding-hygiene.js +48 -12
- package/dist/src/cli/commands/doctor-fixes.js +57 -0
- package/dist/src/cli/commands/doctor-registry.js +10 -1
- package/dist/src/cli/commands/doctor-render.js +118 -74
- package/dist/src/cli/commands/doctor.js +70 -25
- package/dist/src/cli/memory/bridge-core.js +36 -0
- package/dist/src/cli/memory/bridge-embedder.js +84 -3
- package/dist/src/cli/memory/memory-initializer.js +2 -2
- package/dist/src/cli/services/ephemeral-namespace-purge.js +15 -5
- package/dist/src/cli/services/memory-db-integrity-repair.js +119 -0
- package/dist/src/cli/version.js +1 -1
- package/package.json +2 -2
|
@@ -16,6 +16,13 @@
|
|
|
16
16
|
* vectors. The Story-2 self-healing migration converges every active
|
|
17
17
|
* row on the canonical label; this check verifies it actually did.
|
|
18
18
|
*
|
|
19
|
+
* Story #729 carve-out: ephemeral-namespace rows (tasklist, hive-mind,
|
|
20
|
+
* epic-state, test-bridge-fix, plus EPHEMERAL_NAMESPACE_PREFIXES) are
|
|
21
|
+
* intentionally written with `embedding IS NULL AND embedding_model IS
|
|
22
|
+
* NULL`. They are excluded from the count so they don't trip branch (4)
|
|
23
|
+
* "unrecognised embedding_model" on every publish — see bridge-embedder.ts
|
|
24
|
+
* for the writer-side rationale.
|
|
25
|
+
*
|
|
19
26
|
* Lives next to the doctor command rather than in `doctor.ts` to keep that
|
|
20
27
|
* file under the 500-line decomposition target.
|
|
21
28
|
*
|
|
@@ -26,6 +33,7 @@ import { existsSync } from 'fs';
|
|
|
26
33
|
import { CANONICAL_EMBEDDING_MODEL } from '../embeddings/migration/types.js';
|
|
27
34
|
import { memoryDbCandidatePaths } from '../services/moflo-paths.js';
|
|
28
35
|
import { openDaemonDatabase } from '../memory/daemon-backend.js';
|
|
36
|
+
import { EPHEMERAL_NAMESPACES, EPHEMERAL_NAMESPACE_PREFIXES, } from '../memory/bridge-embedder.js';
|
|
29
37
|
/**
|
|
30
38
|
* Known neural-model labels that all share the all-MiniLM-L6-v2 384-dim
|
|
31
39
|
* vector space. The Story-2 migration retags any of these to the
|
|
@@ -155,23 +163,51 @@ async function loadModelGroups(dbPath) {
|
|
|
155
163
|
}
|
|
156
164
|
if (!hasSchema)
|
|
157
165
|
return [];
|
|
158
|
-
|
|
159
|
-
|
|
166
|
+
// Story #729: ephemeral-namespace rows (tasklist, hive-mind, epic-state,
|
|
167
|
+
// …) are intentionally written with `embedding IS NULL AND
|
|
168
|
+
// embedding_model IS NULL`. Without this exclusion every spell run that
|
|
169
|
+
// logs to `tasklist` re-trips branch (4) "unrecognised embedding_model"
|
|
170
|
+
// on the next publish, even though the writer is doing the right thing.
|
|
171
|
+
const ephemeralNames = [...EPHEMERAL_NAMESPACES];
|
|
172
|
+
const ephemeralPrefixes = [...EPHEMERAL_NAMESPACE_PREFIXES];
|
|
173
|
+
const matchClauses = [];
|
|
174
|
+
const params = [];
|
|
175
|
+
if (ephemeralNames.length > 0) {
|
|
176
|
+
matchClauses.push(`namespace IN (${ephemeralNames.map(() => '?').join(', ')})`);
|
|
177
|
+
params.push(...ephemeralNames);
|
|
178
|
+
}
|
|
179
|
+
for (const prefix of ephemeralPrefixes) {
|
|
180
|
+
matchClauses.push(`namespace LIKE ?`);
|
|
181
|
+
params.push(`${prefix}%`);
|
|
182
|
+
}
|
|
183
|
+
const ephemeralExclusion = matchClauses.length > 0
|
|
184
|
+
? `AND NOT (embedding IS NULL AND embedding_model IS NULL AND (${matchClauses.join(' OR ')}))`
|
|
185
|
+
: '';
|
|
186
|
+
const sql = `SELECT
|
|
160
187
|
COALESCE(embedding_model, 'NULL') AS model,
|
|
161
188
|
COUNT(*) AS n,
|
|
162
189
|
SUM(CASE WHEN embedding IS NULL THEN 1 ELSE 0 END) AS null_count
|
|
163
190
|
FROM memory_entries
|
|
164
191
|
WHERE status = 'active'
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
const
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
192
|
+
${ephemeralExclusion}
|
|
193
|
+
GROUP BY model`;
|
|
194
|
+
const groups = [];
|
|
195
|
+
const stmt = db.prepare(sql);
|
|
196
|
+
try {
|
|
197
|
+
stmt.bind(params);
|
|
198
|
+
while (stmt.step()) {
|
|
199
|
+
const row = stmt.get();
|
|
200
|
+
if (Array.isArray(row)) {
|
|
201
|
+
groups.push({
|
|
202
|
+
model: String(row[0]),
|
|
203
|
+
count: Number(row[1]),
|
|
204
|
+
hasNullEmbedding: Number(row[2]) > 0,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
finally {
|
|
210
|
+
stmt.free();
|
|
175
211
|
}
|
|
176
212
|
return groups;
|
|
177
213
|
}
|
|
@@ -243,6 +243,63 @@ export async function autoFixCheck(check) {
|
|
|
243
243
|
return false;
|
|
244
244
|
}
|
|
245
245
|
},
|
|
246
|
+
// Tiered recovery for `.moflo/moflo.db` corruption (REINDEX → VACUUM
|
|
247
|
+
// INTO → row-level salvage). The TS service stops the daemon
|
|
248
|
+
// automatically (cross-platform via `process.kill('SIGTERM')`) so the
|
|
249
|
+
// atomic swap doesn't race a live writer; we restart it via the
|
|
250
|
+
// existing `npx moflo daemon start` shorthand after. The MCP server,
|
|
251
|
+
// started by Claude Code outside our process tree, isn't stopped here —
|
|
252
|
+
// explicit user guidance covers that case at the end.
|
|
253
|
+
'Memory DB Integrity': async () => {
|
|
254
|
+
try {
|
|
255
|
+
const { repairMemoryDbIntegrity } = await import('../services/memory-db-integrity-repair.js');
|
|
256
|
+
const result = await repairMemoryDbIntegrity(process.cwd());
|
|
257
|
+
if (result.repaired) {
|
|
258
|
+
const tierLabel = result.tier === 'reindex' ? 'REINDEX (index rebuild)'
|
|
259
|
+
: result.tier === 'vacuum' ? 'VACUUM INTO (fresh-file rebuild)'
|
|
260
|
+
: result.tier === 'salvage' ? 'row-level salvage'
|
|
261
|
+
: 'repaired';
|
|
262
|
+
output.writeln(output.dim(` Recovered via ${tierLabel}.`));
|
|
263
|
+
if (result.corruptBackup) {
|
|
264
|
+
output.writeln(output.dim(` Pre-repair backup retained: ${result.corruptBackup}`));
|
|
265
|
+
}
|
|
266
|
+
if (result.lossStats) {
|
|
267
|
+
for (const [tbl, s] of Object.entries(result.lossStats)) {
|
|
268
|
+
if (s.read > 0) {
|
|
269
|
+
const lost = Math.max(0, s.read - s.written);
|
|
270
|
+
if (lost > 0) {
|
|
271
|
+
output.writeln(output.warning(` ${tbl}: ${s.written}/${s.read} rows preserved (lost ${lost} across ${s.errors} unreadable chunk(s))`));
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
output.writeln(output.dim(' Embeddings for lost rows will be regenerated on next index pass — run `npx moflo embeddings init` to force.'));
|
|
276
|
+
}
|
|
277
|
+
// Restart the daemon if we stopped it. The launcher's own
|
|
278
|
+
// section-4 spawn handles this on next session-start, but a
|
|
279
|
+
// mid-session healer call shouldn't leave the daemon down.
|
|
280
|
+
if (result.daemonStopped) {
|
|
281
|
+
output.writeln(output.dim(' Restarting daemon...'));
|
|
282
|
+
await runFixCommand('npx moflo daemon start');
|
|
283
|
+
}
|
|
284
|
+
// Cross-platform note for the MCP server (out-of-tree, can't
|
|
285
|
+
// SIGTERM). On Windows the swap would have failed if MCP was
|
|
286
|
+
// holding the file; on POSIX the swap succeeds but MCP keeps
|
|
287
|
+
// reading the stale inode until restart. Either way: restart
|
|
288
|
+
// Claude Code to fully apply.
|
|
289
|
+
output.writeln(output.dim(' Restart Claude Code so the MCP server re-opens the recovered DB.'));
|
|
290
|
+
return true;
|
|
291
|
+
}
|
|
292
|
+
if (result.persistent) {
|
|
293
|
+
output.writeln(output.warning(' Corruption survived every recovery tier. Manual options: ' +
|
|
294
|
+
'`npx moflo memory rebuild-index` (destructive) or restore from a known-good backup.'));
|
|
295
|
+
}
|
|
296
|
+
return false;
|
|
297
|
+
}
|
|
298
|
+
catch (e) {
|
|
299
|
+
output.writeln(output.warning(` Repair failed: ${errorDetail(e)}`));
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
},
|
|
246
303
|
'Status Line': async () => {
|
|
247
304
|
const settingsPath = join(process.cwd(), '.claude', 'settings.json');
|
|
248
305
|
if (!existsSync(settingsPath))
|
|
@@ -12,7 +12,7 @@ import { checkWritersAudit } from './doctor-checks-writers-audit.js';
|
|
|
12
12
|
import { checkSwarmFunctional, checkHiveMindFunctional, } from './doctor-checks-swarm.js';
|
|
13
13
|
import { checkMemoryAccessFunctional } from './doctor-checks-memory-access.js';
|
|
14
14
|
import { checkBuildTools, checkClaudeCode, checkDiskSpace, checkGit, checkGitRepo, checkNodeVersion, checkNpmVersion, } from './doctor-checks-runtime.js';
|
|
15
|
-
import { checkConfigFile, checkDaemonStatus, checkDaemonWriteRouting, checkMcpServers, checkMemoryDatabase, checkMofloYamlCompliance, checkStatusLine, checkTestDirs, } from './doctor-checks-config.js';
|
|
15
|
+
import { checkConfigFile, checkDaemonStatus, checkDaemonWriteRouting, checkMcpServers, checkMemoryDatabase, checkMemoryDbIntegrity, checkMofloYamlCompliance, checkStatusLine, checkTestDirs, } from './doctor-checks-config.js';
|
|
16
16
|
import { checkSpellEngine, checkSandboxTier } from './doctor-checks-platform.js';
|
|
17
17
|
import { checkEmbeddings, checkSemanticQuality, } from './doctor-checks-memory.js';
|
|
18
18
|
import { checkIntelligence } from './doctor-checks-intelligence.js';
|
|
@@ -40,6 +40,12 @@ export const allChecks = [
|
|
|
40
40
|
checkDaemonWriteRouting,
|
|
41
41
|
checkWritersAudit,
|
|
42
42
|
checkMemoryDatabase,
|
|
43
|
+
// Owns the corruption signal so downstream checks (Embeddings, Semantic
|
|
44
|
+
// Quality, Memory Access Functional) don't surface it as the synthetic
|
|
45
|
+
// "Check" failure (doctor.ts:214). MUST run after checkMemoryDatabase
|
|
46
|
+
// (which confirms the file exists) and before any check that opens the
|
|
47
|
+
// DB via openBackend.
|
|
48
|
+
checkMemoryDbIntegrity,
|
|
43
49
|
checkEmbeddings,
|
|
44
50
|
checkEmbeddingHygiene,
|
|
45
51
|
checkEmbeddingCoverageTruth,
|
|
@@ -91,6 +97,9 @@ export const componentMap = {
|
|
|
91
97
|
'writers-audit': checkWritersAudit,
|
|
92
98
|
'writers': checkWritersAudit,
|
|
93
99
|
'memory': checkMemoryDatabase,
|
|
100
|
+
'memory-db-integrity': checkMemoryDbIntegrity,
|
|
101
|
+
'integrity': checkMemoryDbIntegrity,
|
|
102
|
+
'memory-integrity': checkMemoryDbIntegrity,
|
|
94
103
|
'embeddings': checkEmbeddings,
|
|
95
104
|
'embedding-hygiene': checkEmbeddingHygiene,
|
|
96
105
|
'embedding-coverage': checkEmbeddingCoverageTruth,
|
|
@@ -19,66 +19,95 @@ function tally(results) {
|
|
|
19
19
|
failed: results.filter(r => r.status === 'fail').length,
|
|
20
20
|
};
|
|
21
21
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Run the kill-zombies scan, with optional rendering. Issue #1122: in JSON
|
|
24
|
+
* mode the prose banner would corrupt the single-document contract, so the
|
|
25
|
+
* caller passes `silent: true` and surfaces the structured result inside the
|
|
26
|
+
* JSON payload instead.
|
|
27
|
+
*/
|
|
28
|
+
export async function runKillZombies(opts = {}) {
|
|
29
|
+
const silent = !!opts.silent;
|
|
30
|
+
if (!silent) {
|
|
31
|
+
output.writeln(output.bold('Zombie Process Scan'));
|
|
32
|
+
output.writeln();
|
|
33
|
+
}
|
|
25
34
|
const registryKilled = killTrackedProcesses();
|
|
26
|
-
if (registryKilled > 0) {
|
|
35
|
+
if (!silent && registryKilled > 0) {
|
|
27
36
|
output.writeln(output.success(` Killed ${registryKilled} tracked background process(es) from registry`));
|
|
28
37
|
}
|
|
29
38
|
// Single OS-level scan + kill — the previous flow scanned twice.
|
|
30
39
|
const result = await findZombieProcesses(true);
|
|
31
40
|
const found = result.details.length;
|
|
32
|
-
if (
|
|
33
|
-
if (
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
else {
|
|
38
|
-
output.writeln(output.warning(` Found ${found} additional orphaned process(es):`));
|
|
39
|
-
for (const d of result.details) {
|
|
40
|
-
output.writeln(output.dim(` ${formatZombieDetail(d)}`));
|
|
41
|
-
}
|
|
42
|
-
if (result.killed > 0) {
|
|
43
|
-
output.writeln(output.success(` Killed ${result.killed} zombie process(es)`));
|
|
41
|
+
if (!silent) {
|
|
42
|
+
if (found === 0) {
|
|
43
|
+
if (registryKilled === 0) {
|
|
44
|
+
output.writeln(output.success(' No orphaned moflo processes found'));
|
|
45
|
+
}
|
|
44
46
|
}
|
|
45
|
-
|
|
46
|
-
output.writeln(output.warning(` ${found
|
|
47
|
+
else {
|
|
48
|
+
output.writeln(output.warning(` Found ${found} additional orphaned process(es):`));
|
|
49
|
+
for (const d of result.details) {
|
|
50
|
+
output.writeln(output.dim(` ${formatZombieDetail(d)}`));
|
|
51
|
+
}
|
|
52
|
+
if (result.killed > 0) {
|
|
53
|
+
output.writeln(output.success(` Killed ${result.killed} zombie process(es)`));
|
|
54
|
+
}
|
|
55
|
+
if (result.killed < found) {
|
|
56
|
+
output.writeln(output.warning(` ${found - result.killed} process(es) could not be killed`));
|
|
57
|
+
}
|
|
47
58
|
}
|
|
59
|
+
output.writeln();
|
|
60
|
+
output.writeln(output.dim('─'.repeat(50)));
|
|
61
|
+
output.writeln();
|
|
48
62
|
}
|
|
49
|
-
|
|
50
|
-
output.writeln(output.dim('─'.repeat(50)));
|
|
51
|
-
output.writeln();
|
|
63
|
+
return { registryKilled, found, killed: result.killed, details: result.details };
|
|
52
64
|
}
|
|
53
65
|
/**
|
|
54
66
|
* Issue #818: machine-readable output. Emits a single JSON document with
|
|
55
67
|
* per-check fields (and any FunctionalCheckDetail entries from the swarm/
|
|
56
|
-
* hive checks) and exits with the right code.
|
|
57
|
-
*
|
|
58
|
-
*
|
|
68
|
+
* hive checks) and exits with the right code.
|
|
69
|
+
*
|
|
70
|
+
* Issue #1122: action flags (`--fix`, `--install`, `--kill-zombies`) now run
|
|
71
|
+
* before this is called and their outcomes are passed in so automation can
|
|
72
|
+
* tell what changed without re-parsing prose. `results` reflects post-fix
|
|
73
|
+
* state when `fixesApplied` includes any successful fix.
|
|
59
74
|
*/
|
|
60
|
-
export function emitJsonOutput({ results, strict, allowWarnList }) {
|
|
75
|
+
export function emitJsonOutput({ results, strict, allowWarnList, fixesApplied, zombieScan, claudeCodeInstall, }) {
|
|
61
76
|
const { passed, warnings, failed } = tally(results);
|
|
62
77
|
const allowSet = new Set(allowWarnList);
|
|
63
78
|
const strictWarningFailures = strict
|
|
64
79
|
? results.filter(r => r.status === 'warn' && !allowSet.has(r.name)).map(r => r.name)
|
|
65
80
|
: [];
|
|
66
81
|
const exitCode = failed > 0 || strictWarningFailures.length > 0 ? 1 : 0;
|
|
67
|
-
|
|
82
|
+
const payload = {
|
|
68
83
|
summary: { passed, warnings, failed },
|
|
69
84
|
strict: strict ? { strictMode: true, warningsTriggeringFail: strictWarningFailures } : { strictMode: false },
|
|
70
85
|
results,
|
|
71
|
-
}
|
|
86
|
+
};
|
|
87
|
+
if (fixesApplied !== undefined)
|
|
88
|
+
payload.fixesApplied = fixesApplied;
|
|
89
|
+
if (zombieScan !== undefined)
|
|
90
|
+
payload.zombieScan = zombieScan;
|
|
91
|
+
if (claudeCodeInstall !== undefined)
|
|
92
|
+
payload.claudeCodeInstall = claudeCodeInstall;
|
|
93
|
+
process.stdout.write(JSON.stringify(payload, null, 2) + '\n');
|
|
72
94
|
return { success: exitCode === 0, exitCode, data: { passed, warnings, failed, results } };
|
|
73
95
|
}
|
|
74
|
-
/**
|
|
75
|
-
|
|
96
|
+
/**
|
|
97
|
+
* Re-runs Claude Code CLI install + check if --install was passed and the
|
|
98
|
+
* prior result wasn't pass. Issue #1122: accepts `{silent}` so the JSON path
|
|
99
|
+
* runs the install without writing prose to the corrupted stdout, and
|
|
100
|
+
* returns a structured outcome for inclusion in the JSON document.
|
|
101
|
+
*/
|
|
102
|
+
export async function maybeAutoInstallClaudeCode(results, fixes, opts = {}) {
|
|
103
|
+
const silent = !!opts.silent;
|
|
76
104
|
const claudeCodeResult = results.find(r => r.name === 'Claude Code CLI');
|
|
77
|
-
if (!claudeCodeResult || claudeCodeResult.status === 'pass')
|
|
78
|
-
return;
|
|
105
|
+
if (!claudeCodeResult || claudeCodeResult.status === 'pass') {
|
|
106
|
+
return { attempted: false, installed: false };
|
|
107
|
+
}
|
|
79
108
|
const installed = await installClaudeCode();
|
|
80
109
|
if (!installed)
|
|
81
|
-
return;
|
|
110
|
+
return { attempted: true, installed: false };
|
|
82
111
|
const newCheck = await checkClaudeCode();
|
|
83
112
|
const idx = results.findIndex(r => r.name === 'Claude Code CLI');
|
|
84
113
|
if (idx !== -1) {
|
|
@@ -88,7 +117,9 @@ export async function maybeAutoInstallClaudeCode(results, fixes) {
|
|
|
88
117
|
fixes.splice(fixIdx, 1);
|
|
89
118
|
}
|
|
90
119
|
}
|
|
91
|
-
|
|
120
|
+
if (!silent)
|
|
121
|
+
output.writeln(formatCheck(newCheck));
|
|
122
|
+
return { attempted: true, installed: true, postCheck: newCheck };
|
|
92
123
|
}
|
|
93
124
|
export function renderSummary(results) {
|
|
94
125
|
const counts = tally(results);
|
|
@@ -103,62 +134,75 @@ export function renderSummary(results) {
|
|
|
103
134
|
output.writeln(`Summary: ${summaryParts.join(', ')}`);
|
|
104
135
|
return counts;
|
|
105
136
|
}
|
|
106
|
-
/**
|
|
107
|
-
|
|
137
|
+
/**
|
|
138
|
+
* Auto-fix loop, including the post-fix re-run. Mutates `results` and `fixes`
|
|
139
|
+
* in place when fixes succeed and returns a structured outcome.
|
|
140
|
+
*
|
|
141
|
+
* Issue #1122: accepts `{silent}` so the JSON path can run the same fix work
|
|
142
|
+
* without writing prose to a stubbed stdout, and emit `fixesApplied` +
|
|
143
|
+
* post-fix `results` from the returned data.
|
|
144
|
+
*/
|
|
145
|
+
export async function runAutoFix(results, fixes, checksToRun, opts = {}) {
|
|
146
|
+
const silent = !!opts.silent;
|
|
108
147
|
if (fixes.length === 0)
|
|
109
|
-
return;
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
148
|
+
return { fixesApplied: [], reEvaluated: null };
|
|
149
|
+
if (!silent) {
|
|
150
|
+
output.writeln();
|
|
151
|
+
output.writeln(output.bold('Auto-fixing issues...'));
|
|
152
|
+
output.writeln();
|
|
153
|
+
}
|
|
113
154
|
const fixableResults = results.filter(r => r.fix && (r.status === 'fail' || r.status === 'warn'));
|
|
114
|
-
|
|
115
|
-
const unfixed = [];
|
|
155
|
+
const fixesApplied = [];
|
|
116
156
|
for (const check of fixableResults) {
|
|
117
157
|
const success = await autoFixCheck(check);
|
|
118
|
-
|
|
119
|
-
|
|
158
|
+
fixesApplied.push({ name: check.name, applied: success });
|
|
159
|
+
}
|
|
160
|
+
const fixed = fixesApplied.filter(f => f.applied).length;
|
|
161
|
+
const unfixed = fixesApplied.filter(f => !f.applied);
|
|
162
|
+
if (!silent) {
|
|
163
|
+
if (fixed > 0) {
|
|
164
|
+
output.writeln();
|
|
165
|
+
output.writeln(output.success(`Auto-fixed ${fixed} issue${fixed > 1 ? 's' : ''}`));
|
|
120
166
|
}
|
|
121
|
-
|
|
122
|
-
|
|
167
|
+
if (unfixed.length > 0) {
|
|
168
|
+
output.writeln();
|
|
169
|
+
output.writeln(output.bold('Manual fixes needed:'));
|
|
170
|
+
const fixByName = new Map(fixableResults.map(r => [r.name, r.fix ?? '']));
|
|
171
|
+
for (const f of unfixed) {
|
|
172
|
+
output.writeln(output.dim(` ${f.name}: ${fixByName.get(f.name) ?? ''}`));
|
|
173
|
+
}
|
|
123
174
|
}
|
|
124
175
|
}
|
|
125
|
-
if (fixed
|
|
176
|
+
if (fixed === 0)
|
|
177
|
+
return { fixesApplied, reEvaluated: null };
|
|
178
|
+
const reSettled = await Promise.allSettled(checksToRun.map(check => check()));
|
|
179
|
+
const reEvaluated = reSettled.map((sr) => sr.status === 'fulfilled'
|
|
180
|
+
? sr.value
|
|
181
|
+
: { name: 'Check', status: 'fail', message: sr.reason?.message ?? 'Unknown error' });
|
|
182
|
+
if (!silent) {
|
|
126
183
|
output.writeln();
|
|
127
|
-
output.writeln(output.
|
|
128
|
-
}
|
|
129
|
-
if (unfixed.length > 0) {
|
|
184
|
+
output.writeln(output.dim('Re-checking...'));
|
|
130
185
|
output.writeln();
|
|
131
|
-
|
|
132
|
-
for (const
|
|
133
|
-
output.writeln(
|
|
134
|
-
|
|
135
|
-
}
|
|
136
|
-
if (fixed === 0)
|
|
137
|
-
return;
|
|
138
|
-
output.writeln();
|
|
139
|
-
output.writeln(output.dim('Re-checking...'));
|
|
140
|
-
output.writeln();
|
|
141
|
-
const reResults = await Promise.allSettled(checksToRun.map(check => check()));
|
|
142
|
-
let rePassed = 0, reWarnings = 0, reFailed = 0;
|
|
143
|
-
for (const sr of reResults) {
|
|
144
|
-
if (sr.status === 'fulfilled') {
|
|
145
|
-
output.writeln(formatCheck(sr.value));
|
|
146
|
-
if (sr.value.status === 'pass')
|
|
186
|
+
let rePassed = 0, reWarnings = 0, reFailed = 0;
|
|
187
|
+
for (const r of reEvaluated) {
|
|
188
|
+
output.writeln(formatCheck(r));
|
|
189
|
+
if (r.status === 'pass')
|
|
147
190
|
rePassed++;
|
|
148
|
-
else if (
|
|
191
|
+
else if (r.status === 'warn')
|
|
149
192
|
reWarnings++;
|
|
150
193
|
else
|
|
151
194
|
reFailed++;
|
|
152
195
|
}
|
|
196
|
+
output.writeln();
|
|
197
|
+
output.writeln(output.dim('─'.repeat(50)));
|
|
198
|
+
const reSummary = [
|
|
199
|
+
output.success(`${rePassed} passed`),
|
|
200
|
+
reWarnings > 0 ? output.warning(`${reWarnings} warnings`) : null,
|
|
201
|
+
reFailed > 0 ? output.error(`${reFailed} failed`) : null,
|
|
202
|
+
].filter(Boolean);
|
|
203
|
+
output.writeln(`After fix: ${reSummary.join(', ')}`);
|
|
153
204
|
}
|
|
154
|
-
|
|
155
|
-
output.writeln(output.dim('─'.repeat(50)));
|
|
156
|
-
const reSummary = [
|
|
157
|
-
output.success(`${rePassed} passed`),
|
|
158
|
-
reWarnings > 0 ? output.warning(`${reWarnings} warnings`) : null,
|
|
159
|
-
reFailed > 0 ? output.error(`${reFailed} failed`) : null,
|
|
160
|
-
].filter(Boolean);
|
|
161
|
-
output.writeln(`After fix: ${reSummary.join(', ')}`);
|
|
205
|
+
return { fixesApplied, reEvaluated };
|
|
162
206
|
}
|
|
163
207
|
/**
|
|
164
208
|
* Build the final CommandResult based on pass/warn/fail counts and --strict
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { output } from '../output.js';
|
|
14
14
|
import { allChecks, componentMap, zombieScanCheck } from './doctor-registry.js';
|
|
15
|
-
import { emitJsonOutput, finalize, formatCheck, maybeAutoInstallClaudeCode, renderSummary, runAutoFix,
|
|
15
|
+
import { emitJsonOutput, finalize, formatCheck, maybeAutoInstallClaudeCode, renderSummary, runAutoFix, runKillZombies, } from './doctor-render.js';
|
|
16
16
|
import { checkEmbeddings } from './doctor-checks-memory.js';
|
|
17
17
|
import { checkMofloYamlCompliance } from './doctor-checks-config.js';
|
|
18
18
|
// Re-export for tests + external consumers (#639 stale-vector-stats test
|
|
@@ -125,24 +125,21 @@ export const doctorCommand = {
|
|
|
125
125
|
output.writeln(output.warning('--allow-warn requires --strict; ignoring (warnings are tolerated by default).'));
|
|
126
126
|
output.writeln();
|
|
127
127
|
}
|
|
128
|
-
if (killZombies) {
|
|
129
|
-
await runKillZombiesBanner();
|
|
130
|
-
}
|
|
131
128
|
const checksToRun = component && componentMap[component]
|
|
132
129
|
? [componentMap[component]]
|
|
133
130
|
: allChecks;
|
|
134
131
|
const results = [];
|
|
135
132
|
const fixes = [];
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
: output.createSpinner({ text: 'Running health checks in parallel...', spinner: 'dots' });
|
|
140
|
-
spinner?.start();
|
|
133
|
+
let zombieScan;
|
|
134
|
+
let claudeCodeInstall;
|
|
135
|
+
let fixesApplied;
|
|
141
136
|
// Issue #818: in --json mode, several deep checks (spell engine probe,
|
|
142
137
|
// mcp-spell bridge, etc.) write `[spell] ...` log lines straight to
|
|
143
138
|
// stdout — that breaks the single-JSON-document contract. Capture and
|
|
144
|
-
// discard stdout writes while checks run; restore
|
|
145
|
-
// throw can't leave the process with a stubbed stdout.
|
|
139
|
+
// discard stdout writes while checks AND post-check actions run; restore
|
|
140
|
+
// in `finally` so a throw can't leave the process with a stubbed stdout.
|
|
141
|
+
// Issue #1122: extended to wrap zombie-kill banner, --install, and
|
|
142
|
+
// --fix work so each runs on the JSON path with prose suppressed.
|
|
146
143
|
const realStdoutWrite = process.stdout.write.bind(process.stdout);
|
|
147
144
|
const restoreStdout = () => {
|
|
148
145
|
if (jsonOutput) {
|
|
@@ -153,7 +150,18 @@ export const doctorCommand = {
|
|
|
153
150
|
process.stdout.write =
|
|
154
151
|
(..._args) => true;
|
|
155
152
|
}
|
|
153
|
+
// OPTIMIZATION: Run all checks in parallel for 3-5x faster execution
|
|
154
|
+
const spinner = jsonOutput
|
|
155
|
+
? null
|
|
156
|
+
: output.createSpinner({ text: 'Running health checks in parallel...', spinner: 'dots' });
|
|
156
157
|
try {
|
|
158
|
+
// Issue #1122: kill-zombies prose used to write BEFORE the JSON
|
|
159
|
+
// suppression activated, corrupting the JSON document. Now runs
|
|
160
|
+
// under suppression and feeds a structured result into the payload.
|
|
161
|
+
if (killZombies) {
|
|
162
|
+
zombieScan = await runKillZombies({ silent: jsonOutput });
|
|
163
|
+
}
|
|
164
|
+
spinner?.start();
|
|
157
165
|
let checkResults;
|
|
158
166
|
try {
|
|
159
167
|
checkResults = await Promise.allSettled(checksToRun.map(check => check()));
|
|
@@ -174,7 +182,6 @@ export const doctorCommand = {
|
|
|
174
182
|
}
|
|
175
183
|
finally {
|
|
176
184
|
spinner?.stop();
|
|
177
|
-
restoreStdout();
|
|
178
185
|
}
|
|
179
186
|
for (const settledResult of checkResults) {
|
|
180
187
|
if (settledResult.status === 'fulfilled') {
|
|
@@ -197,26 +204,64 @@ export const doctorCommand = {
|
|
|
197
204
|
output.writeln(formatCheck(errorResult));
|
|
198
205
|
}
|
|
199
206
|
}
|
|
207
|
+
// Issue #1122: action flags must run on BOTH the JSON path and the
|
|
208
|
+
// formatted path. Previously the JSON branch early-returned before
|
|
209
|
+
// any of this ran, so `--json --fix` (and `--json --install`) silently
|
|
210
|
+
// no-op'd. Now they execute under stdout suppression and their
|
|
211
|
+
// outcomes feed the JSON payload below.
|
|
212
|
+
if (autoInstall) {
|
|
213
|
+
claudeCodeInstall = await maybeAutoInstallClaudeCode(results, fixes, { silent: jsonOutput });
|
|
214
|
+
}
|
|
215
|
+
if (!jsonOutput)
|
|
216
|
+
renderSummary(results);
|
|
217
|
+
if (showFix && fixes.length > 0) {
|
|
218
|
+
const outcome = await runAutoFix(results, fixes, checksToRun, { silent: jsonOutput });
|
|
219
|
+
fixesApplied = outcome.fixesApplied;
|
|
220
|
+
// Replace `results` with post-fix state so JSON consumers see the
|
|
221
|
+
// re-evaluated truth, not the pre-fix snapshot. Mirror the #992
|
|
222
|
+
// post-parallel zombie-scan append so the post-fix shape matches
|
|
223
|
+
// pre-fix shape (otherwise `--json --fix` silently drops the
|
|
224
|
+
// Zombie Processes entry from the JSON `results[]`).
|
|
225
|
+
if (outcome.reEvaluated) {
|
|
226
|
+
const finalChecks = [...outcome.reEvaluated];
|
|
227
|
+
if (!component) {
|
|
228
|
+
try {
|
|
229
|
+
finalChecks.push(await zombieScanCheck());
|
|
230
|
+
}
|
|
231
|
+
catch (reason) {
|
|
232
|
+
finalChecks.push({
|
|
233
|
+
name: 'Zombie Processes',
|
|
234
|
+
status: 'fail',
|
|
235
|
+
message: reason?.message ?? 'Unknown error',
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
results.length = 0;
|
|
240
|
+
results.push(...finalChecks);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
else if (fixes.length > 0 && !showFix && !jsonOutput) {
|
|
244
|
+
output.writeln();
|
|
245
|
+
output.writeln(output.dim(`Run with --fix to auto-fix ${fixes.length} issue${fixes.length > 1 ? 's' : ''}`));
|
|
246
|
+
}
|
|
200
247
|
}
|
|
201
248
|
catch {
|
|
202
249
|
spinner?.stop();
|
|
203
|
-
restoreStdout();
|
|
204
250
|
if (!jsonOutput)
|
|
205
251
|
output.writeln(output.error('Failed to run health checks'));
|
|
206
252
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
}
|
|
210
|
-
if (autoInstall) {
|
|
211
|
-
await maybeAutoInstallClaudeCode(results, fixes);
|
|
212
|
-
}
|
|
213
|
-
renderSummary(results);
|
|
214
|
-
if (showFix && fixes.length > 0) {
|
|
215
|
-
await runAutoFix(results, fixes, checksToRun);
|
|
253
|
+
finally {
|
|
254
|
+
restoreStdout();
|
|
216
255
|
}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
256
|
+
if (jsonOutput) {
|
|
257
|
+
return emitJsonOutput({
|
|
258
|
+
results,
|
|
259
|
+
strict,
|
|
260
|
+
allowWarnList,
|
|
261
|
+
fixesApplied,
|
|
262
|
+
zombieScan,
|
|
263
|
+
claudeCodeInstall,
|
|
264
|
+
});
|
|
220
265
|
}
|
|
221
266
|
return finalize({ results, strict, allowWarnList });
|
|
222
267
|
},
|
|
@@ -95,6 +95,27 @@ export function logBridgeError(context, err, opts) {
|
|
|
95
95
|
const msg = errorDetail(err);
|
|
96
96
|
console.error(`[moflo] ${context}: ${msg}`);
|
|
97
97
|
}
|
|
98
|
+
/**
|
|
99
|
+
* Recognises the node:sqlite "operation on closed handle" error shape.
|
|
100
|
+
*
|
|
101
|
+
* #1123 — A concurrent `withDb` call's `checkBridgeCoherence` can fire
|
|
102
|
+
* `shutdownBridge()` between our `getDb(registry)` and `fn(ctx, registry)`,
|
|
103
|
+
* closing the underlying `DatabaseSync`. Our previously-captured `ctx.db`
|
|
104
|
+
* then throws `ERR_INVALID_STATE: database is not open` on the next op.
|
|
105
|
+
*
|
|
106
|
+
* The operation hadn't started its mutation yet, so a single retry against a
|
|
107
|
+
* fresh registry is safe (matches the `withBusyRetry` shape for SQLITE_BUSY).
|
|
108
|
+
* Bounded to one retry so a *genuinely* broken DB still surfaces — we don't
|
|
109
|
+
* want to mask a registry that can't be re-acquired.
|
|
110
|
+
*/
|
|
111
|
+
function isStaleHandleError(err) {
|
|
112
|
+
if (!err || typeof err !== 'object')
|
|
113
|
+
return false;
|
|
114
|
+
const e = err;
|
|
115
|
+
if (e.code === 'ERR_INVALID_STATE')
|
|
116
|
+
return true;
|
|
117
|
+
return typeof e.message === 'string' && /database is not open/i.test(e.message);
|
|
118
|
+
}
|
|
98
119
|
/**
|
|
99
120
|
* Treats an error as a SQLITE_BUSY lock-contention failure if either the
|
|
100
121
|
* error code or message indicates it. Belt-and-suspenders around node:sqlite,
|
|
@@ -456,6 +477,9 @@ async function checkBridgeCoherence(dbPath) {
|
|
|
456
477
|
* self-fire is suppressed.
|
|
457
478
|
*/
|
|
458
479
|
export async function withDb(dbPath, fn) {
|
|
480
|
+
return withDbInner(dbPath, fn, 0);
|
|
481
|
+
}
|
|
482
|
+
async function withDbInner(dbPath, fn, attempt) {
|
|
459
483
|
await checkBridgeCoherence(dbPath);
|
|
460
484
|
const registry = await getRegistry(dbPath);
|
|
461
485
|
if (!registry)
|
|
@@ -510,6 +534,18 @@ export async function withDb(dbPath, fn) {
|
|
|
510
534
|
return result;
|
|
511
535
|
}
|
|
512
536
|
catch (err) {
|
|
537
|
+
// #1123 — stale-handle race: a concurrent withDb's coherence check tore
|
|
538
|
+
// the registry down between our getDb() and fn() execution, closing the
|
|
539
|
+
// underlying DatabaseSync. Drop the dead handle and retry once against a
|
|
540
|
+
// freshly-acquired registry. The first attempt threw BEFORE its mutation
|
|
541
|
+
// landed (node:sqlite errors at prepare/exec time, not mid-statement), so
|
|
542
|
+
// a retry is idempotent. Bounded to one retry so a genuinely-unrecoverable
|
|
543
|
+
// bridge (e.g. corrupt file, missing module) still surfaces as a null
|
|
544
|
+
// return + logged error, not an infinite loop.
|
|
545
|
+
if (attempt === 0 && isStaleHandleError(err)) {
|
|
546
|
+
await shutdownBridge();
|
|
547
|
+
return await withDbInner(dbPath, fn, attempt + 1);
|
|
548
|
+
}
|
|
513
549
|
logBridgeError('bridge operation failed', err);
|
|
514
550
|
return null;
|
|
515
551
|
}
|