neurain 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/LICENSE +57 -0
  3. package/README.md +205 -0
  4. package/SECURITY.md +22 -0
  5. package/bin/neurain.mjs +7 -0
  6. package/docs/comparison-mem0.en.md +22 -0
  7. package/docs/connect-claude.en.md +48 -0
  8. package/docs/connect-claude.kr.md +51 -0
  9. package/docs/connect-codex.en.md +38 -0
  10. package/docs/connect-codex.kr.md +40 -0
  11. package/docs/connect-gemini.en.md +71 -0
  12. package/docs/connect-gemini.kr.md +71 -0
  13. package/docs/connect-runtime.en.md +61 -0
  14. package/docs/connect-runtime.kr.md +61 -0
  15. package/docs/development-status.en.md +157 -0
  16. package/docs/development-status.kr.md +157 -0
  17. package/docs/knowledge-os.en.md +105 -0
  18. package/docs/knowledge-os.kr.md +106 -0
  19. package/docs/pricing.en.md +14 -0
  20. package/docs/privacy-and-data-flow.en.md +25 -0
  21. package/docs/public-saas-readiness.en.md +39 -0
  22. package/docs/quickstart.en.md +64 -0
  23. package/docs/quickstart.kr.md +64 -0
  24. package/docs/release-checklist.en.md +38 -0
  25. package/docs/safety.en.md +36 -0
  26. package/docs/self-improvement-90-roadmap.en.md +429 -0
  27. package/docs/self-improvement-90-roadmap.kr.md +429 -0
  28. package/docs/self-improving-workflows.en.md +163 -0
  29. package/docs/self-improving-workflows.kr.md +163 -0
  30. package/docs/support.en.md +17 -0
  31. package/docs/troubleshooting.en.md +35 -0
  32. package/package.json +36 -0
  33. package/src/cli.mjs +261 -0
  34. package/src/core/adopt.mjs +304 -0
  35. package/src/core/answer_eval.mjs +450 -0
  36. package/src/core/capabilities.mjs +217 -0
  37. package/src/core/capture_durable.mjs +181 -0
  38. package/src/core/classify.mjs +237 -0
  39. package/src/core/compile_desk.mjs +324 -0
  40. package/src/core/complete.mjs +108 -0
  41. package/src/core/config.mjs +142 -0
  42. package/src/core/connect.mjs +355 -0
  43. package/src/core/curator.mjs +351 -0
  44. package/src/core/daemon.mjs +536 -0
  45. package/src/core/digest.mjs +155 -0
  46. package/src/core/doctor.mjs +115 -0
  47. package/src/core/durable.mjs +96 -0
  48. package/src/core/envelope.mjs +97 -0
  49. package/src/core/flush.mjs +190 -0
  50. package/src/core/fs.mjs +121 -0
  51. package/src/core/init.mjs +194 -0
  52. package/src/core/journal.mjs +269 -0
  53. package/src/core/labels.mjs +117 -0
  54. package/src/core/lessons.mjs +793 -0
  55. package/src/core/lifecycle.mjs +1138 -0
  56. package/src/core/link_check.mjs +180 -0
  57. package/src/core/live_cases.mjs +221 -0
  58. package/src/core/onboard.mjs +175 -0
  59. package/src/core/plan_receipt.mjs +177 -0
  60. package/src/core/plan_writeback.mjs +176 -0
  61. package/src/core/queue.mjs +62 -0
  62. package/src/core/queue_archive.mjs +87 -0
  63. package/src/core/queue_model.mjs +161 -0
  64. package/src/core/queue_write.mjs +28 -0
  65. package/src/core/recall.mjs +1802 -0
  66. package/src/core/recall_bench.mjs +275 -0
  67. package/src/core/recall_corpus.mjs +152 -0
  68. package/src/core/recall_facts.mjs +233 -0
  69. package/src/core/recall_intel.mjs +233 -0
  70. package/src/core/recall_lexical.mjs +269 -0
  71. package/src/core/recap.mjs +78 -0
  72. package/src/core/review_queue.mjs +131 -0
  73. package/src/core/review_worker.mjs +284 -0
  74. package/src/core/route.mjs +73 -0
  75. package/src/core/safety.mjs +57 -0
  76. package/src/core/scheduler.mjs +697 -0
  77. package/src/core/search.mjs +54 -0
  78. package/src/core/secret_scan.mjs +143 -0
  79. package/src/core/semantic.mjs +187 -0
  80. package/src/core/source_digest.mjs +56 -0
  81. package/src/core/source_digest_gen.mjs +311 -0
  82. package/src/core/stage.mjs +105 -0
  83. package/src/core/status.mjs +175 -0
  84. package/src/core/vault_state.mjs +115 -0
  85. package/src/core/watch.mjs +282 -0
  86. package/src/core/wiki_log.mjs +29 -0
  87. package/src/core/wrap.mjs +62 -0
  88. package/src/mcp/server.mjs +865 -0
  89. package/templates/starter-vault/README.md +9 -0
@@ -0,0 +1,1802 @@
1
+ import fs from 'node:fs';
2
+ import os from 'node:os';
3
+ import path from 'node:path';
4
+ import { absPath, compactStamp, ensureDir, isTextFile, readText, relPath, safeResolve, sha256, timestamp, walkFiles } from './fs.mjs';
5
+ import { appendJournalEvent, listJournalEvents, verifyJournal } from './journal.mjs';
6
+ import { inferSensitivityFromPath } from './safety.mjs';
7
+ import { alternativeForm, getProvider, tokenize } from './semantic.mjs';
8
+ import { recallConfig } from './config.mjs';
9
+ import { createSensitivityResolver } from './labels.mjs';
10
+ import { kindForPath, listRecallMarkdownFiles, recallConfigErrors, resolveAreaDir, safeToIndex, scopeForArea, scopeForPath, titleForText } from './recall_corpus.mjs';
11
+ import { buildLexicalContext, lexicalSearchWithContext } from './recall_lexical.mjs';
12
+ import { benchRecall, scorecardRecall } from './recall_bench.mjs';
13
+
14
+ const dbRel = '00_system/neurain/recall.sqlite';
15
+ const receiptDirRel = 'output/receipts/recall';
16
+
17
+ export async function recallCommand(args) {
18
+ const [subcommand, ...rest] = args._;
19
+ const root = absPath(rest[0] || args.root || process.cwd());
20
+ if (!subcommand || subcommand === 'status') return renderRecallStatus(root, args);
21
+ if (subcommand === 'rebuild') return renderRecallRebuild(root, args);
22
+ if (subcommand === 'search') return renderRecallSearch(root, { ...args, _: rest.slice(1) });
23
+ if (subcommand === 'semantic-search') return renderRecallSemanticSearch(root, { ...args, _: rest.slice(1) });
24
+ if (subcommand === 'hybrid-search') return renderRecallHybridSearch(root, { ...args, _: rest.slice(1) });
25
+ if (subcommand === 'lexical-search') return renderRecallLexicalSearch(root, { ...args, _: rest.slice(1) });
26
+ if (subcommand === 'live-eval') return renderRecallLiveEval(root, args);
27
+ if (subcommand === 'verify') return renderRecallVerify(root, args);
28
+ if (subcommand === 'eval') return renderRecallEval(root, args);
29
+ if (subcommand === 'bench') return renderRecallBench(root, args);
30
+ if (subcommand === 'scorecard') return renderRecallScorecard(root, args);
31
+ throw new Error(`Unknown recall command: ${subcommand}. Use "recall status", "recall rebuild", "recall search", "recall semantic-search", "recall hybrid-search", "recall lexical-search", "recall bench", "recall scorecard", "recall verify", or "recall eval".`);
32
+ }
33
+
34
+ export async function recallStatus(root) {
35
+ const sqlite = await sqliteCapability();
36
+ const dbPath = safeResolve(root, dbRel);
37
+ const exists = fs.existsSync(dbPath);
38
+ const payload = {
39
+ ok: true,
40
+ command: 'recall status',
41
+ root,
42
+ durable_write: false,
43
+ sqlite_available: sqlite.available,
44
+ fts5_available: false,
45
+ db_path: dbRel,
46
+ db_exists: exists,
47
+ markdown_fallback: true,
48
+ row_count: 0,
49
+ host_breakdown: {},
50
+ last_rebuild_at: null,
51
+ source_manifest_hash: null,
52
+ error: sqlite.error || null,
53
+ };
54
+ if (!sqlite.available || !exists) return payload;
55
+ let db;
56
+ try {
57
+ db = new sqlite.DatabaseSync(sqliteReadOnlyLocation(dbPath), { readOnly: true });
58
+ payload.fts5_available = Boolean(db.prepare("SELECT value FROM recall_meta WHERE key = 'fts5_available'").get()?.value === 'true');
59
+ payload.row_count = Number(db.prepare('SELECT COUNT(*) AS count FROM recall_docs').get()?.count || 0);
60
+ payload.host_breakdown = Object.fromEntries(db.prepare('SELECT host, COUNT(*) AS count FROM recall_docs GROUP BY host ORDER BY host').all().map((row) => [row.host, Number(row.count || 0)]));
61
+ payload.last_rebuild_at = db.prepare("SELECT value FROM recall_meta WHERE key = 'rebuilt_at'").get()?.value || null;
62
+ payload.source_manifest_hash = db.prepare("SELECT value FROM recall_meta WHERE key = 'source_manifest_hash'").get()?.value || null;
63
+ } catch (error) {
64
+ payload.ok = false;
65
+ payload.error = error.message;
66
+ } finally {
67
+ db?.close();
68
+ }
69
+ return payload;
70
+ }
71
+
72
+ export async function rebuildRecall(root, { dryRun = false } = {}) {
73
+ const sqlite = await sqliteCapability();
74
+ const recallCfg = recallConfig(root);
75
+ const docs = collectRecallDocs(root, { recallCfg });
76
+ const configErrors = recallConfigErrors(recallCfg);
77
+ const manifestHash = sha256(stableJson(docs.map((doc) => ({
78
+ doc_id: doc.doc_id,
79
+ path: doc.path,
80
+ source_hash: doc.source_hash,
81
+ sensitivity: doc.sensitivity,
82
+ }))));
83
+ const receiptRel = `${receiptDirRel}/${compactStamp()}-${process.pid}-${Date.now()}-${manifestHash.slice(0, 12)}-recall-rebuild.json`;
84
+ const result = {
85
+ ok: Boolean(dryRun || sqlite.available),
86
+ command: 'recall rebuild',
87
+ generated_at: timestamp(),
88
+ dry_run: Boolean(dryRun),
89
+ durable_write: Boolean(!dryRun && sqlite.available),
90
+ sqlite_available: sqlite.available,
91
+ fts5_available: false,
92
+ db_path: dbRel,
93
+ receipt_path: !dryRun && sqlite.available ? receiptRel : null,
94
+ indexed_count: docs.length,
95
+ source_manifest_hash: manifestHash,
96
+ markdown_canonical: true,
97
+ rebuildable_cache: true,
98
+ excluded_policy: 'private paths, unsafe summaries, raw source bodies, and generated folders are excluded',
99
+ config_errors: configErrors,
100
+ error: sqlite.error || null,
101
+ };
102
+ if (!sqlite.available || dryRun) return result;
103
+
104
+ const dbPath = safeResolve(root, dbRel);
105
+ const tmpPath = `${dbPath}.${process.pid}.${Date.now()}.tmp`;
106
+ try {
107
+ ensureDir(path.dirname(dbPath));
108
+ buildSqliteIndex(sqlite.DatabaseSync, tmpPath, docs, manifestHash);
109
+ replaceSqliteFile(dbPath, tmpPath);
110
+ result.fts5_available = true;
111
+ const receiptAbs = safeResolve(root, receiptRel);
112
+ ensureDir(path.dirname(receiptAbs));
113
+ fs.writeFileSync(receiptAbs, `${JSON.stringify(result, null, 2)}\n`, { encoding: 'utf8', flag: 'wx' });
114
+ } finally {
115
+ cleanupSqliteSidecars(tmpPath);
116
+ }
117
+ return result;
118
+ }
119
+
120
+ export async function searchRecall(root, query, { top = 10, host = '', fallback = true, scope = '' } = {}) {
121
+ const sqlite = await sqliteCapability();
122
+ const dbPath = safeResolve(root, dbRel);
123
+ const payload = {
124
+ ok: true,
125
+ command: 'recall search',
126
+ root,
127
+ durable_write: false,
128
+ query: String(query || ''),
129
+ top: Number(top || 10),
130
+ host: String(host || ''),
131
+ scope: String(scope || ''),
132
+ sqlite_available: sqlite.available,
133
+ db_exists: fs.existsSync(dbPath),
134
+ source: 'sqlite_fts5',
135
+ results: [],
136
+ fallback_used: false,
137
+ error: sqlite.error || null,
138
+ };
139
+ if (!payload.query.trim()) throw new Error('Recall search requires a query.');
140
+ if (!sqlite.available || !fs.existsSync(dbPath)) {
141
+ if (!fallback) return payload;
142
+ return fallbackRecallSearch(root, payload);
143
+ }
144
+ let db;
145
+ try {
146
+ db = new sqlite.DatabaseSync(sqliteReadOnlyLocation(dbPath), { readOnly: true });
147
+ const ftsQuery = toFtsQuery(payload.query);
148
+ if (!ftsQuery) return payload;
149
+ const limit = Math.max(1, Math.min(Number(top || 10), 50));
150
+ const scopeFilter = String(scope || '');
151
+ const rows = db.prepare(`
152
+ SELECT d.path, d.kind, d.host, d.scope, d.sensitivity, d.title, d.snippet, d.source_hash, bm25(recall_fts) AS rank
153
+ FROM recall_fts
154
+ JOIN recall_docs d ON d.doc_id = recall_fts.doc_id
155
+ WHERE recall_fts MATCH ?
156
+ AND (? = '' OR d.host = ?)
157
+ AND (? = '' OR d.scope = ?)
158
+ ORDER BY rank ASC, d.path ASC
159
+ LIMIT ?
160
+ `).all(ftsQuery, String(host || ''), String(host || ''), scopeFilter, scopeFilter, limit);
161
+ payload.results = rows.map((row) => ({
162
+ path: row.path,
163
+ kind: row.kind,
164
+ host: row.host,
165
+ scope: row.scope,
166
+ sensitivity: row.sensitivity,
167
+ title: row.title,
168
+ snippet: row.snippet,
169
+ source_hash: row.source_hash,
170
+ score: Number((-Number(row.rank || 0)).toFixed(3)),
171
+ }));
172
+ } catch (error) {
173
+ payload.ok = false;
174
+ payload.error = error.message;
175
+ if (fallback) return fallbackRecallSearch(root, payload);
176
+ } finally {
177
+ db?.close();
178
+ }
179
+ return payload;
180
+ }
181
+
182
+ // E22: deterministic local lexical-semantic search over the canonical markdown
183
+ // corpus. No SQLite required (markdown stays canonical, the default provider
184
+ // needs no generated index), no model calls, no external calls. Private and
185
+ // unsafe docs are excluded exactly like the exact-token path.
186
+ export async function semanticSearchRecall(root, query, { top = 10, host = '', provider = 'local-lexical', minScore = 0.34, scope = '' } = {}) {
187
+ const prov = getProvider(provider);
188
+ const text = String(query || '');
189
+ if (!text.trim()) throw new Error('Recall semantic search requires a query.');
190
+ const limit = Math.max(1, Math.min(Number(top || 10), 50));
191
+ const hostFilter = String(host || '');
192
+ const scopeFilter = String(scope || '');
193
+ const floor = Number.isFinite(Number(minScore)) ? Math.max(0, Math.min(Number(minScore), 1)) : 0.34;
194
+ const docs = collectRecallDocs(root)
195
+ .filter((doc) => doc.sensitivity !== 'private')
196
+ .filter((doc) => !hostFilter || doc.host === hostFilter)
197
+ .filter((doc) => !scopeFilter || doc.scope === scopeFilter);
198
+ const scored = docs
199
+ .map((doc) => {
200
+ const scoredDoc = prov.score(text, `${doc.title} ${doc.body}`);
201
+ return { doc, score: Number(scoredDoc.score || 0), matched_terms: scoredDoc.matched_terms || [] };
202
+ })
203
+ .filter((item) => item.score >= floor)
204
+ // Deterministic ranking: score desc, then a stable content tie-break (snippet, then
205
+ // path) so equal-score results order the same way on every run regardless of random
206
+ // event ids in the path.
207
+ .sort((a, b) => b.score - a.score
208
+ || String(a.doc.snippet || '').localeCompare(String(b.doc.snippet || ''))
209
+ || String(a.doc.path || '').localeCompare(String(b.doc.path || '')))
210
+ .slice(0, limit);
211
+ return {
212
+ ok: true,
213
+ command: 'recall semantic-search',
214
+ root,
215
+ durable_write: false,
216
+ model_calls: Boolean(prov.model_call),
217
+ external_tool_calls: Boolean(prov.external_call),
218
+ query: text,
219
+ top: limit,
220
+ host: hostFilter,
221
+ scope: scopeFilter,
222
+ semantic_used: true,
223
+ embedding_provider: prov.name,
224
+ provider_kind: prov.kind,
225
+ provider_requires_index: Boolean(prov.requires_index),
226
+ llm_locked: Boolean(prov.llm_locked),
227
+ min_score: floor,
228
+ results: scored.map(({ doc, score, matched_terms }) => ({
229
+ path: doc.path,
230
+ kind: doc.kind,
231
+ host: doc.host,
232
+ scope: doc.scope,
233
+ sensitivity: doc.sensitivity,
234
+ title: doc.title,
235
+ snippet: doc.snippet,
236
+ source_hash: doc.source_hash,
237
+ score: Number(score.toFixed(4)),
238
+ matched_terms,
239
+ })),
240
+ };
241
+ }
242
+
243
+ // E23 follow-up: hybrid recall = exact-token UNION local lexical-semantic. Live-eval on real
244
+ // content showed pure semantic can recall LESS than exact-token (it lacks the rarity/idf
245
+ // weighting BM25 exact-token has, so it discriminates worse on real corpora with overlapping
246
+ // vocabulary). Used as a SUPPLEMENT, semantic can only add paraphrase hits and never removes
247
+ // an exact hit. `top` is the candidate depth for each branch, so the returned union can contain
248
+ // up to `2 * top` rows when semantic-only catches exist. This is the robust recall a real user
249
+ // should use. Read-only.
250
+ export async function hybridSearchRecall(root, query, { top = 10, host = '', provider = 'local-lexical', minScore = 0.34, area = '', routing = 'auto' } = {}) {
251
+ const text = String(query || '');
252
+ if (!text.trim()) throw new Error('Recall hybrid search requires a query.');
253
+ const limit = Math.max(1, Math.min(Number(top || 10), 50));
254
+ const recallCfg = recallConfig(root);
255
+ const areaDir = resolveAreaDir(root, area, recallCfg);
256
+
257
+ // The engine index is a durable artifact readable over MCP, so private is
258
+ // excluded at collection time and never resurrected by a query-time flag.
259
+ // Explicitly scoping a private area therefore returns nothing (the vault keeps
260
+ // its own private-area lookup until a later wave designs an opt-in).
261
+ if (areaDir && createSensitivityResolver(root, recallCfg).isPrivateArea(areaDir)) {
262
+ return {
263
+ ok: true,
264
+ command: 'recall hybrid-search',
265
+ root,
266
+ durable_write: false,
267
+ model_calls: false,
268
+ external_tool_calls: false,
269
+ query: text,
270
+ top: limit,
271
+ host: String(host || ''),
272
+ area: String(area || ''),
273
+ area_dir: areaDir,
274
+ private_area_excluded: true,
275
+ strategy: 'private_area_excluded',
276
+ semantic_used: false,
277
+ results: [],
278
+ note: 'Area is marked private; private content is excluded from the recall index by design.',
279
+ };
280
+ }
281
+
282
+ const scope = scopeForArea(areaDir);
283
+ const routedEnabled = decideRouting(routing, areaDir, root, recallCfg);
284
+ const exact = await searchRecall(root, text, { top: limit, host, scope });
285
+ const semantic = await semanticSearchRecall(root, text, { top: limit, host, provider, minScore, scope });
286
+
287
+ if (!routedEnabled) {
288
+ const merged = mergeHybridResults(exact.results, semantic.results);
289
+ return {
290
+ ok: true,
291
+ command: 'recall hybrid-search',
292
+ root,
293
+ durable_write: false,
294
+ model_calls: Boolean(semantic.model_calls),
295
+ external_tool_calls: Boolean(semantic.external_tool_calls),
296
+ query: text,
297
+ top: limit,
298
+ host: String(host || ''),
299
+ area: String(area || ''),
300
+ area_dir: areaDir,
301
+ strategy: 'exact_token_union_semantic',
302
+ routed_used: false,
303
+ semantic_used: true,
304
+ embedding_provider: semantic.embedding_provider,
305
+ exact_result_count: exact.results.length,
306
+ semantic_result_count: semantic.results.length,
307
+ semantic_only_added: merged.filter((item) => item.matched_by === 'semantic').length,
308
+ result_cap: limit * 2,
309
+ results: merged,
310
+ };
311
+ }
312
+
313
+ const lexicalCtx = buildLexicalContext(root, { area: areaDir, recallCfg });
314
+ const lexical = lexicalSearchWithContext(lexicalCtx, text, { top: limit });
315
+ const merged = mergeRoutedHybridResults(lexical.results, exact.results, semantic.results);
316
+ return {
317
+ ok: true,
318
+ command: 'recall hybrid-search',
319
+ root,
320
+ durable_write: false,
321
+ model_calls: Boolean(semantic.model_calls),
322
+ external_tool_calls: Boolean(semantic.external_tool_calls),
323
+ query: text,
324
+ top: limit,
325
+ host: String(host || ''),
326
+ area: String(area || ''),
327
+ area_dir: areaDir,
328
+ strategy: 'routed_lexical_union_exact_union_semantic',
329
+ routed_used: true,
330
+ semantic_used: true,
331
+ embedding_provider: semantic.embedding_provider,
332
+ lexical_result_count: lexical.results.length,
333
+ exact_result_count: exact.results.length,
334
+ semantic_result_count: semantic.results.length,
335
+ semantic_only_added: merged.filter((item) => item.matched_by === 'semantic').length,
336
+ result_cap: limit * 3,
337
+ results: merged,
338
+ };
339
+ }
340
+
341
+ function mergeHybridResults(exactResults = [], semanticResults = []) {
342
+ const seen = new Set();
343
+ const merged = [];
344
+ for (const item of exactResults) {
345
+ if (seen.has(item.path)) continue;
346
+ seen.add(item.path);
347
+ merged.push({ ...item, matched_by: 'exact' });
348
+ }
349
+ for (const item of semanticResults) {
350
+ if (seen.has(item.path)) continue;
351
+ seen.add(item.path);
352
+ merged.push({ ...item, matched_by: 'semantic', matched_terms: item.matched_terms });
353
+ }
354
+ return merged;
355
+ }
356
+
357
+ // Routed 3-branch union with branch precedence (no cross-branch score
358
+ // normalization): the routed lexical ranker first (its top-N IS the merged
359
+ // top-N when it returns >= top), then exact-token FTS5 catches, then semantic
360
+ // paraphrase catches. Dedupe by path; semantic only ever ADDS.
361
+ function mergeRoutedHybridResults(lexicalResults = [], exactResults = [], semanticResults = []) {
362
+ const seen = new Set();
363
+ const merged = [];
364
+ for (const item of lexicalResults) {
365
+ if (seen.has(item.path)) continue;
366
+ seen.add(item.path);
367
+ merged.push({ ...item, matched_by: 'lexical' });
368
+ }
369
+ for (const item of exactResults) {
370
+ if (seen.has(item.path)) continue;
371
+ seen.add(item.path);
372
+ merged.push({ ...item, matched_by: 'exact' });
373
+ }
374
+ for (const item of semanticResults) {
375
+ if (seen.has(item.path)) continue;
376
+ seen.add(item.path);
377
+ merged.push({ ...item, matched_by: 'semantic', matched_terms: item.matched_terms });
378
+ }
379
+ return merged;
380
+ }
381
+
382
+ function registryHasAreas(root, recallCfg) {
383
+ try {
384
+ const registry = JSON.parse(fs.readFileSync(path.join(root, recallCfg.intel.registry), 'utf8'));
385
+ return Boolean(registry && registry.areas && Object.keys(registry.areas).length > 0);
386
+ } catch {
387
+ return false;
388
+ }
389
+ }
390
+
391
+ // Routing auto-enables only when there is something to route by: an explicit
392
+ // --area, a registry with at least one area, or an explicit config flag. A bare
393
+ // init-ed vault (empty-areas registry) keeps the legacy exact-union-semantic
394
+ // behavior so existing back-compat is preserved.
395
+ function decideRouting(routing, areaDir, root, recallCfg) {
396
+ if (routing === 'on' || routing === true) return true;
397
+ if (routing === 'off' || routing === false) return false;
398
+ if (recallCfg.routing.enabled === true) return true;
399
+ if (recallCfg.routing.enabled === false) return false;
400
+ return Boolean(areaDir) || registryHasAreas(root, recallCfg);
401
+ }
402
+
403
+ export async function verifyRecall(root) {
404
+ const status = await recallStatus(root);
405
+ const docs = collectRecallDocs(root);
406
+ const expectedHash = sha256(stableJson(docs.map((doc) => ({
407
+ doc_id: doc.doc_id,
408
+ path: doc.path,
409
+ source_hash: doc.source_hash,
410
+ sensitivity: doc.sensitivity,
411
+ }))));
412
+ return {
413
+ ok: Boolean(status.db_exists && status.source_manifest_hash === expectedHash && status.row_count === docs.length),
414
+ command: 'recall verify',
415
+ root,
416
+ durable_write: false,
417
+ db_path: dbRel,
418
+ db_exists: status.db_exists,
419
+ indexed_count: status.row_count,
420
+ expected_count: docs.length,
421
+ source_manifest_hash: status.source_manifest_hash,
422
+ expected_manifest_hash: expectedHash,
423
+ markdown_fallback: true,
424
+ mismatch: status.source_manifest_hash !== expectedHash || status.row_count !== docs.length,
425
+ };
426
+ }
427
+
428
+ export async function evaluateCrossHostRecall(root, {
429
+ top = 5,
430
+ minCases = 2,
431
+ maxCases = 50,
432
+ fixtureSize = 0,
433
+ privateProbeCount = 20,
434
+ unsafeProbeLimit = 50,
435
+ caseFile = '',
436
+ } = {}) {
437
+ if (caseFile) {
438
+ return evaluateRecallCaseFile(root, {
439
+ top,
440
+ minCases,
441
+ caseFile,
442
+ });
443
+ }
444
+ if (Number(fixtureSize || 0) > 0) {
445
+ return evaluateSyntheticCrossHostRecall(root, {
446
+ top,
447
+ minCases: Math.max(Number(minCases || 2), Number(fixtureSize || 0)),
448
+ fixtureSize: Number(fixtureSize || 0),
449
+ privateProbeCount: Number(privateProbeCount || 20),
450
+ });
451
+ }
452
+ const journalIntegrity = verifyJournal(root);
453
+ const allEvents = listJournalEvents(root, { limit: 1000 }).events;
454
+ const events = allEvents
455
+ .filter((event) => event.sensitivity !== 'private')
456
+ .filter((event) => event.prompt_context_allowed !== false)
457
+ .filter((event) => event.safety?.indexing_allowed !== false && event.safety?.cross_host_allowed !== false)
458
+ .filter((event) => event.host && event.summary && safeToIndex(event.summary));
459
+ const hosts = [...new Set(events.map((event) => event.host))].sort();
460
+ const cases = [];
461
+ const limit = Math.max(1, Math.min(Number(top || 5), 10));
462
+ for (const event of events.slice(0, Math.max(1, Math.min(Number(maxCases || 50), 500)))) {
463
+ const query = queryForEvent(event);
464
+ if (!query) continue;
465
+ const sameHost = await searchRecall(root, query, { top: limit, host: event.host });
466
+ const hit = sameHost.results.some((item) => String(item.path || '').includes(event.event_id));
467
+ const sourceSupported = sameHost.results
468
+ .filter((item) => String(item.path || '').includes(event.event_id))
469
+ .some((item) => resultSupportsQuery(item, query));
470
+ const otherHosts = hosts.filter((host) => host !== event.host);
471
+ const isolationChecks = [];
472
+ for (const otherHost of otherHosts.slice(0, 3)) {
473
+ const other = await searchRecall(root, query, { top: limit, host: otherHost });
474
+ isolationChecks.push({
475
+ host: otherHost,
476
+ leaked: other.results.some((item) => String(item.path || '').includes(event.event_id)),
477
+ });
478
+ }
479
+ cases.push({
480
+ event_id: event.event_id,
481
+ host: event.host,
482
+ type: event.type,
483
+ query,
484
+ hit_at_top: hit,
485
+ source_supported: sourceSupported,
486
+ same_host_result_count: sameHost.results.length,
487
+ fallback_used: Boolean(sameHost.fallback_used),
488
+ isolated_from_other_hosts: isolationChecks.every((item) => !item.leaked),
489
+ isolation_checks: isolationChecks,
490
+ });
491
+ }
492
+ const evaluated = cases.length;
493
+ const hits = cases.filter((item) => item.hit_at_top).length;
494
+ const isolated = cases.filter((item) => item.isolated_from_other_hosts).length;
495
+ const supported = cases.filter((item) => item.source_supported).length;
496
+ const privateLeakageFindings = [];
497
+ const unsafeEvents = allEvents
498
+ .filter((event) => event.host && event.summary)
499
+ .filter((event) => (
500
+ event.sensitivity === 'private' ||
501
+ event.prompt_context_allowed === false ||
502
+ event.safety?.indexing_allowed === false ||
503
+ event.safety?.cross_host_allowed === false
504
+ ));
505
+ const requestedUnsafeLimit = Number(unsafeProbeLimit);
506
+ const unsafeLimit = Math.max(0, Math.min(Number.isFinite(requestedUnsafeLimit) ? requestedUnsafeLimit : 50, 1000));
507
+ for (const event of unsafeEvents.slice(0, unsafeLimit)) {
508
+ const query = queryForEvent(event);
509
+ if (!query) continue;
510
+ for (const host of hosts) {
511
+ const result = await searchRecall(root, query, { top: limit, host });
512
+ if (result.results.some((item) => String(item.path || '').includes(event.event_id))) {
513
+ privateLeakageFindings.push({ event_id: event.event_id, leaked_in_host: host });
514
+ break;
515
+ }
516
+ }
517
+ }
518
+ const hitAtTop = evaluated ? hits / evaluated : 0;
519
+ const isolationRate = evaluated ? isolated / evaluated : 0;
520
+ const sourceSupportRate = evaluated ? supported / evaluated : 0;
521
+ return {
522
+ ok: journalIntegrity.ok && evaluated >= Number(minCases || 2) && hitAtTop >= 0.9 && sourceSupportRate >= 0.95 && isolationRate === 1 && privateLeakageFindings.length === 0,
523
+ command: 'recall eval',
524
+ root,
525
+ durable_write: false,
526
+ eval_type: 'cross_host_recall',
527
+ journal_integrity_ok: journalIntegrity.ok,
528
+ journal_error_count: journalIntegrity.error_count,
529
+ min_cases: Number(minCases || 2),
530
+ top: limit,
531
+ host_count: hosts.length,
532
+ hosts,
533
+ evaluated_cases: evaluated,
534
+ hit_at_top: Number(hitAtTop.toFixed(3)),
535
+ source_support_rate: Number(sourceSupportRate.toFixed(3)),
536
+ host_isolation_rate: Number(isolationRate.toFixed(3)),
537
+ private_leakage_findings: privateLeakageFindings,
538
+ cases,
539
+ metric_scope: 'real_root_host_scoped_event_recall',
540
+ semantic_recall_quality_evaluated: false,
541
+ missing_evidence: !journalIntegrity.ok
542
+ ? 'Journal integrity check failed. Run neurain journal verify before trusting cross-host recall eval.'
543
+ : evaluated < Number(minCases || 2)
544
+ ? `Need at least ${Number(minCases || 2)} safe journal event case(s) with host tags.`
545
+ : sourceSupportRate < 0.95
546
+ ? 'Recall hits did not provide enough source-supporting snippets.'
547
+ : null,
548
+ };
549
+ }
550
+
551
+ async function evaluateRecallCaseFile(root, {
552
+ top = 5,
553
+ minCases = 1,
554
+ caseFile = '',
555
+ } = {}) {
556
+ const beforeTarget = snapshotTargetWriteSurface(root);
557
+ const loaded = loadRecallEvalCaseFile(root, caseFile);
558
+ const limit = Math.max(1, Math.min(Number(top || 5), 10));
559
+ const cases = [];
560
+ const privateLeakageFindings = [];
561
+ const journalIntegrity = verifyJournal(root);
562
+ for (const item of loaded.cases) {
563
+ const query = String(item.query || '').trim();
564
+ if (!query) {
565
+ cases.push({
566
+ id: item.id || '',
567
+ query,
568
+ host: item.host || '',
569
+ hit_at_top: false,
570
+ source_supported: false,
571
+ isolated_from_wrong_hosts: true,
572
+ error: 'Missing query.',
573
+ });
574
+ continue;
575
+ }
576
+ const host = String(item.host || '');
577
+ const expectedPathContains = normalizeStringList(item.expected_path_contains);
578
+ const expectedTerms = normalizeStringList(item.expected_terms);
579
+ const forbiddenPathContains = normalizeStringList(item.forbidden_path_contains);
580
+ const wrongHosts = normalizeStringList(item.wrong_hosts);
581
+ const sameHost = await searchRecall(root, query, { top: limit, host });
582
+ const hitResults = expectedPathContains.length
583
+ ? sameHost.results.filter((result) => resultMatchesAnyPath(result, expectedPathContains))
584
+ : sameHost.results;
585
+ const leakageFindings = [];
586
+ for (const result of sameHost.results) {
587
+ const resultPath = String(result.path || '');
588
+ if (result.sensitivity === 'private') {
589
+ leakageFindings.push({ id: item.id || '', path: resultPath, reason: 'private result returned' });
590
+ }
591
+ for (const marker of forbiddenPathContains) {
592
+ if (resultPath.includes(marker)) {
593
+ leakageFindings.push({ id: item.id || '', path: resultPath, reason: `forbidden path marker: ${marker}` });
594
+ }
595
+ }
596
+ }
597
+ privateLeakageFindings.push(...leakageFindings);
598
+ const isolationChecks = [];
599
+ for (const wrongHost of wrongHosts.slice(0, 10)) {
600
+ const wrong = await searchRecall(root, query, { top: limit, host: wrongHost });
601
+ isolationChecks.push({
602
+ host: wrongHost,
603
+ leaked: expectedPathContains.length
604
+ ? wrong.results.some((result) => resultMatchesAnyPath(result, expectedPathContains))
605
+ : wrong.results.length > 0,
606
+ });
607
+ }
608
+ const supportTerms = expectedTerms.length ? expectedTerms : normalizeStringList(query.match(/[\p{L}\p{N}_-]+/gu) || []);
609
+ cases.push({
610
+ id: item.id || '',
611
+ query,
612
+ host,
613
+ expected_path_contains: expectedPathContains,
614
+ expected_terms: supportTerms,
615
+ hit_at_top: hitResults.length > 0,
616
+ source_supported: hitResults.some((result) => resultSupportsTerms(result, supportTerms)),
617
+ same_host_result_count: sameHost.results.length,
618
+ fallback_used: Boolean(sameHost.fallback_used),
619
+ isolated_from_wrong_hosts: isolationChecks.every((check) => !check.leaked),
620
+ isolation_checks: isolationChecks,
621
+ leakage_findings: leakageFindings,
622
+ });
623
+ }
624
+ const afterTarget = snapshotTargetWriteSurface(root);
625
+ const targetUntouched = stableJson(beforeTarget) === stableJson(afterTarget);
626
+ const evaluated = cases.length;
627
+ const hits = cases.filter((item) => item.hit_at_top).length;
628
+ const supported = cases.filter((item) => item.source_supported).length;
629
+ const isolated = cases.filter((item) => item.isolated_from_wrong_hosts).length;
630
+ const hitAtTop = evaluated ? hits / evaluated : 0;
631
+ const sourceSupportRate = evaluated ? supported / evaluated : 0;
632
+ const isolationRate = evaluated ? isolated / evaluated : 0;
633
+ const requiredCases = Number(minCases || 1);
634
+ const missingEvidence = !journalIntegrity.ok
635
+ ? 'Journal integrity check failed. Run neurain journal verify before trusting reviewed recall eval.'
636
+ : evaluated < requiredCases
637
+ ? `Need at least ${requiredCases} reviewed recall case(s).`
638
+ : hitAtTop < 0.9
639
+ ? 'Reviewed recall cases did not meet Hit@top gate.'
640
+ : sourceSupportRate < 0.95
641
+ ? 'Reviewed recall cases did not provide enough source-supporting snippets.'
642
+ : isolationRate < 1
643
+ ? 'Reviewed recall cases leaked expected hits under wrong host filters.'
644
+ : privateLeakageFindings.length > 0
645
+ ? 'Reviewed recall cases returned private or forbidden paths.'
646
+ : !targetUntouched
647
+ ? 'Recall case-file eval touched the target root.'
648
+ : null;
649
+ return {
650
+ ok: Boolean(journalIntegrity.ok && evaluated >= requiredCases && hitAtTop >= 0.9 && sourceSupportRate >= 0.95 && isolationRate === 1 && privateLeakageFindings.length === 0 && targetUntouched),
651
+ command: 'recall eval',
652
+ root,
653
+ durable_write: false,
654
+ model_calls: false,
655
+ external_tool_calls: false,
656
+ eval_type: 'cross_host_recall_cases',
657
+ metric_scope: 'reviewed_recall_case_file',
658
+ reviewed_recall_quality_evaluated: true,
659
+ semantic_recall_quality_evaluated: false,
660
+ case_file: loaded.rel,
661
+ journal_integrity_ok: journalIntegrity.ok,
662
+ journal_error_count: journalIntegrity.error_count,
663
+ min_cases: requiredCases,
664
+ top: limit,
665
+ host_count: new Set(cases.map((item) => item.host).filter(Boolean)).size,
666
+ hosts: [...new Set(cases.map((item) => item.host).filter(Boolean))].sort(),
667
+ evaluated_cases: evaluated,
668
+ hit_at_top: Number(hitAtTop.toFixed(3)),
669
+ source_support_rate: Number(sourceSupportRate.toFixed(3)),
670
+ host_isolation_rate: Number(isolationRate.toFixed(3)),
671
+ private_leakage_findings: privateLeakageFindings,
672
+ target_root_untouched: targetUntouched,
673
+ cases,
674
+ missing_evidence: missingEvidence,
675
+ };
676
+ }
677
+
678
+ async function evaluateSyntheticCrossHostRecall(targetRoot, {
679
+ top = 5,
680
+ minCases = 100,
681
+ fixtureSize = 100,
682
+ privateProbeCount = 20,
683
+ } = {}) {
684
+ const fixtureRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'neurain-recall-eval-'));
685
+ const safeCount = Math.max(1, Math.min(Number(fixtureSize || 100), 500));
686
+ const privateCount = Math.max(0, Math.min(Number(privateProbeCount || 20), 200));
687
+ const beforeTarget = snapshotTargetWriteSurface(targetRoot);
688
+ const hosts = ['codex', 'claude', 'runtime', 'gemini'];
689
+ let payload;
690
+ try {
691
+ ensureDir(safeResolve(fixtureRoot, 'wiki'));
692
+ fs.writeFileSync(
693
+ safeResolve(fixtureRoot, 'wiki/memory.md'),
694
+ '# Synthetic Recall Fixture\n\nGenerated fixture for cross-host recall evaluation.\n',
695
+ 'utf8'
696
+ );
697
+ for (let index = 0; index < safeCount; index += 1) {
698
+ const host = hosts[index % hosts.length];
699
+ const serial = String(index + 1).padStart(3, '0');
700
+ appendJournalEvent(fixtureRoot, {
701
+ type: 'recall',
702
+ host,
703
+ source: 'wiki/memory.md',
704
+ summary: `topic${host}${serial} marker${host}${serial} proof${host}${serial} recall${host}${serial} preserved for host scoped continuity.`,
705
+ confirm: '1건 저장 진행',
706
+ });
707
+ }
708
+ for (let index = 0; index < privateCount; index += 1) {
709
+ const host = hosts[index % hosts.length];
710
+ const serial = String(index + 1).padStart(3, '0');
711
+ appendJournalEvent(fixtureRoot, {
712
+ type: 'recall',
713
+ host,
714
+ scope: 'private:synthetic-recall-eval',
715
+ source: '10_areas/_private/current/private-area-brief.md',
716
+ summary: `privatetopic${host}${serial} privatemarker${host}${serial} should not index across hosts.`,
717
+ confirm: '1건 저장 진행',
718
+ });
719
+ }
720
+ const rebuild = await rebuildRecall(fixtureRoot, { dryRun: false });
721
+ const evaluated = await evaluateCrossHostRecall(fixtureRoot, {
722
+ top,
723
+ minCases,
724
+ maxCases: safeCount,
725
+ fixtureSize: 0,
726
+ privateProbeCount,
727
+ unsafeProbeLimit: privateCount,
728
+ });
729
+ const afterTarget = snapshotTargetWriteSurface(targetRoot);
730
+ const targetUntouched = stableJson(beforeTarget) === stableJson(afterTarget);
731
+ payload = {
732
+ ...evaluated,
733
+ root: targetRoot,
734
+ eval_type: 'cross_host_recall_fixture',
735
+ metric_scope: 'synthetic_exact_token_host_filter_regression',
736
+ semantic_recall_quality_evaluated: false,
737
+ fixture: {
738
+ generated: true,
739
+ safe_cases: safeCount,
740
+ private_probe_cases: privateCount,
741
+ hosts,
742
+ target_root_untouched: targetUntouched,
743
+ temp_root_retained: null,
744
+ rebuild_indexed_count: rebuild.indexed_count,
745
+ rebuild_manifest_hash: rebuild.source_manifest_hash,
746
+ },
747
+ ok: Boolean(evaluated.ok && targetUntouched),
748
+ missing_evidence: evaluated.missing_evidence,
749
+ };
750
+ } finally {
751
+ fs.rmSync(fixtureRoot, { recursive: true, force: true });
752
+ }
753
+ payload.fixture.temp_root_retained = fs.existsSync(fixtureRoot);
754
+ payload.ok = Boolean(payload.ok && !payload.fixture.temp_root_retained);
755
+ return payload;
756
+ }
757
+
758
+ // E22: semantic recall quality eval. Proves the local lexical-semantic layer
759
+ // recalls paraphrased / morphological / synonym queries that pure exact-token
760
+ // recall misses, while preserving host isolation, private exclusion, no-answer
761
+ // abstention, rebuild equivalence, and target-root non-write. Each concept pair
762
+ // is [stored_word, query_word]: both map to the same synonym canon but share no
763
+ // surface token, so the query never exact-matches the stored doc.
764
+ const SEMANTIC_CONCEPT_PAIRS = [
765
+ ['resolved', 'fixed'], ['defect', 'bug'], ['authentication', 'login'], ['removed', 'delete'],
766
+ ['created', 'add'], ['modified', 'change'], ['rapid', 'fast'], ['documentation', 'docs'],
767
+ ['configuration', 'config'], ['verified', 'test'], ['reverted', 'rollback'], ['inspected', 'review'],
768
+ ['handoff', 'session'], ['retrieval', 'recall'], ['insight', 'lesson'], ['automobile', 'car'],
769
+ ['large', 'big'], ['minor', 'small'], ['launched', 'start'], ['terminated', 'stop'],
770
+ ['secure', 'safe'], ['customer', 'user'], ['roadmap', 'plan'], ['compiled', 'build'],
771
+ ];
772
+ const SEMANTIC_NO_ANSWER_PROBES = [
773
+ 'galaxy nebula asteroid comet', 'pineapple umbrella violin pottery',
774
+ 'glacier volcano tundra savanna', 'origami carousel lantern hammock',
775
+ ];
776
+
777
+ function lexicalSemanticCombos(count, poolSize, k) {
778
+ const out = [];
779
+ const idx = Array.from({ length: k }, (_, i) => i);
780
+ while (out.length < count) {
781
+ out.push(idx.slice());
782
+ let i = k - 1;
783
+ while (i >= 0 && idx[i] === poolSize - k + i) i -= 1;
784
+ if (i < 0) break;
785
+ idx[i] += 1;
786
+ for (let j = i + 1; j < k; j += 1) idx[j] = idx[j - 1] + 1;
787
+ }
788
+ return out;
789
+ }
790
+
791
+ function roundRate3(value) {
792
+ return Number(Number(value || 0).toFixed(3));
793
+ }
794
+
795
+ // E23: live-content recall coverage. Read-only. Measures, on a REAL folder's actual
796
+ // indexed content (not a synthetic fixture), how much of that content is recallable when
797
+ // queried with auto-derived paraphrases (synonym-swapped real terms). Reports semantic vs
798
+ // exact-token coverage and the lift, plus per-kind coverage, so a real user can see whether
799
+ // the current recall is good enough on their own material and what it misses. The output is
800
+ // metrics-only (no content is stored or returned), so it is safe to run on a private vault.
801
+ // Honest scope: queries are auto-derived, not human-judged relevance; this is a real-content
802
+ // step, not a substitute for external user walkthroughs.
803
+ export async function evaluateLiveRecall(root, { top = 5, sampleSize = 60, provider = 'local-lexical', minScore = 0.34 } = {}) {
804
+ const prov = getProvider(provider);
805
+ const recallCfg = recallConfig(root);
806
+ const docs = collectRecallDocs(root, { recallCfg });
807
+ const eligible = docs.filter((doc) => /^event:/.test(doc.kind) || ['wiki', 'handoff', 'session', 'product', 'lessons', 'log'].includes(doc.kind));
808
+ const sample = eligible.slice(0, Math.max(1, Math.min(Number(sampleSize || 60), 500)));
809
+ const limit = Math.max(1, Math.min(Number(top || 5), 10));
810
+ // Measure the SAME hybrid the recall command returns: the routed 3-branch when
811
+ // routing applies (registry has areas), else the legacy 2-branch. The lexical
812
+ // context is built once and reused across the sample. The routed branch's layer
813
+ // boosts (wiki_core/session_handoff/area_index) are what keep eligible docs
814
+ // recallable as the corpus grows, so this is also the honest coverage number.
815
+ const routed = decideRouting('auto', '', root, recallCfg);
816
+ const lexicalCtx = routed ? buildLexicalContext(root, { recallCfg }) : null;
817
+ const cases = [];
818
+ for (const item of sample) {
819
+ const { query, swapped } = liveParaphraseQuery(item);
820
+ if (!query) continue;
821
+ const semantic = await semanticSearchRecall(root, query, { top: limit, host: '', provider, minScore });
822
+ const exact = await searchRecall(root, query, { top: limit, host: '' });
823
+ const semanticHit = semantic.results.some((result) => result.path === item.path);
824
+ const exactHit = exact.results.some((result) => result.path === item.path);
825
+ const lexical = lexicalCtx ? lexicalSearchWithContext(lexicalCtx, query, { top: limit }) : null;
826
+ const hybridResults = lexical
827
+ ? mergeRoutedHybridResults(lexical.results, exact.results, semantic.results)
828
+ : mergeHybridResults(exact.results, semantic.results);
829
+ const hybridHit = hybridResults.some((result) => result.path === item.path);
830
+ cases.push({
831
+ kind: item.kind,
832
+ paraphrased: swapped,
833
+ semantic_hit: semanticHit,
834
+ exact_hit: exactHit,
835
+ hybrid_hit: hybridHit,
836
+ });
837
+ }
838
+ const evaluated = cases.length;
839
+ const paraphrasable = cases.filter((item) => item.paraphrased);
840
+ const cover = (list, key) => (list.length ? list.filter((item) => item[key]).length / list.length : 0);
841
+ const perKind = {};
842
+ for (const item of cases) {
843
+ if (!perKind[item.kind]) perKind[item.kind] = { cases: 0, semantic_hits: 0 };
844
+ perKind[item.kind].cases += 1;
845
+ if (item.semantic_hit) perKind[item.kind].semantic_hits += 1;
846
+ }
847
+ const semanticCoverage = cover(cases, 'semantic_hit');
848
+ const exactCoverage = cover(cases, 'exact_hit');
849
+ const hybridCoverage = cover(cases, 'hybrid_hit');
850
+ return {
851
+ // Gate on the robust HYBRID recall (exact union semantic), which is never worse than
852
+ // exact-token. Pure semantic_coverage is reported for transparency and can be lower on
853
+ // real corpora, which is exactly why hybrid is the recommended strategy.
854
+ ok: evaluated > 0 && hybridCoverage >= exactCoverage && prov.model_call === false,
855
+ command: 'recall live-eval',
856
+ root,
857
+ durable_write: false,
858
+ model_calls: Boolean(prov.model_call),
859
+ external_tool_calls: Boolean(prov.external_call),
860
+ eval_type: 'live_content_recall_coverage',
861
+ metric_scope: 'real_folder_content_recall_coverage_auto_paraphrase',
862
+ semantic_recall_quality_evaluated: true,
863
+ live_evidence: true,
864
+ human_judged: false,
865
+ embedding_provider: prov.name,
866
+ llm_locked: Boolean(prov.llm_locked),
867
+ corpus_doc_count: docs.length,
868
+ recall_eligible_count: eligible.length,
869
+ evaluated_cases: evaluated,
870
+ paraphrasable_cases: paraphrasable.length,
871
+ top: limit,
872
+ semantic_coverage: roundRate3(semanticCoverage),
873
+ exact_token_coverage: roundRate3(exactCoverage),
874
+ hybrid_coverage: roundRate3(hybridCoverage),
875
+ hybrid_lift_over_exact: roundRate3(hybridCoverage - exactCoverage),
876
+ routed_recall: routed,
877
+ recommended_strategy: 'hybrid',
878
+ coverage_lift: roundRate3(semanticCoverage - exactCoverage),
879
+ paraphrase_semantic_coverage: roundRate3(cover(paraphrasable, 'semantic_hit')),
880
+ paraphrase_exact_coverage: roundRate3(cover(paraphrasable, 'exact_hit')),
881
+ paraphrase_lift: roundRate3(cover(paraphrasable, 'semantic_hit') - cover(paraphrasable, 'exact_hit')),
882
+ per_kind_coverage: Object.fromEntries(Object.entries(perKind).map(([kind, value]) => [kind, { cases: value.cases, semantic_coverage: roundRate3(value.semantic_hits / value.cases) }])),
883
+ content_stored: false,
884
+ note: 'Real-folder content recall coverage with auto-derived paraphrase queries. This is a real-content measurement (not a synthetic fixture and not human-judged relevance); it does not replace external user walkthroughs. Output is metrics-only with no stored content. On real corpora the pure semantic-only coverage can be LOWER than exact-token because the local lexical-semantic layer lacks the rarity weighting exact-token BM25 has (this regression was found by running live-eval on real content). The recommended strategy is hybrid (exact-token union semantic), whose coverage is always at least exact-token coverage plus the paraphrase catches; the per-term semantic win is validated separately by the E22 semantic eval.',
885
+ };
886
+ }
887
+
888
+ function liveParaphraseQuery(doc) {
889
+ const terms = tokenize(`${doc.title || ''} ${doc.snippet || ''}`);
890
+ const seen = new Set();
891
+ const picked = [];
892
+ for (const term of terms) {
893
+ if (term.length < 3 || seen.has(term)) continue;
894
+ seen.add(term);
895
+ picked.push(term);
896
+ if (picked.length >= 6) break;
897
+ }
898
+ if (picked.length < 2) return { query: '', swapped: false };
899
+ let swapped = false;
900
+ const queryTerms = picked.map((term) => {
901
+ const alt = alternativeForm(term);
902
+ if (alt) { swapped = true; return alt; }
903
+ return term;
904
+ });
905
+ return { query: queryTerms.join(' '), swapped };
906
+ }
907
+
908
+ export async function evaluateSemanticRecall(root, {
909
+ top = 5,
910
+ minCases = 50,
911
+ fixtureSize = 60,
912
+ caseFile = '',
913
+ provider = 'local-lexical',
914
+ minScore = 0.34,
915
+ } = {}) {
916
+ if (caseFile) return evaluateSemanticRecallCaseFile(root, { top, minCases, caseFile, provider, minScore });
917
+ return evaluateSyntheticSemanticRecall(root, { top, minCases, fixtureSize, provider, minScore });
918
+ }
919
+
920
+ async function evaluateSyntheticSemanticRecall(targetRoot, { top, minCases, fixtureSize, provider, minScore }) {
921
+ const fixtureRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'neurain-semantic-recall-eval-'));
922
+ const beforeTarget = snapshotTargetWriteSurface(targetRoot);
923
+ const hosts = ['codex', 'claude', 'runtime', 'gemini'];
924
+ const safeCount = Math.max(4, Math.min(Number(fixtureSize || 60), 500));
925
+ const combos = lexicalSemanticCombos(safeCount, SEMANTIC_CONCEPT_PAIRS.length, 4);
926
+ const limit = Math.max(1, Math.min(Number(top || 5), 10));
927
+ let payload;
928
+ try {
929
+ ensureDir(safeResolve(fixtureRoot, 'wiki'));
930
+ fs.writeFileSync(safeResolve(fixtureRoot, 'wiki/memory.md'), '# Semantic Recall Fixture\n\nGenerated fixture for semantic recall evaluation.\n', 'utf8');
931
+ const docMeta = [];
932
+ for (let i = 0; i < combos.length; i += 1) {
933
+ const host = hosts[i % hosts.length];
934
+ const serial = String(i + 1).padStart(3, '0');
935
+ const docWords = combos[i].map((idx) => SEMANTIC_CONCEPT_PAIRS[idx][0]);
936
+ const queryWords = combos[i].map((idx) => SEMANTIC_CONCEPT_PAIRS[idx][1]);
937
+ // Neutral filler tokens (entryref/closingref) are deliberately NOT in any synonym
938
+ // group, so only the docWords carry concept canons and the template never pollutes the
939
+ // concept space. Each query is strongly discriminative to its own doc; where equal-score
940
+ // ties occur, the per-doc serial embedded in the snippet anchors a fully deterministic
941
+ // tie-break (independent of the random event id in the path), so the source lands at a
942
+ // stable in-window rank on every run and Hit@5 is run-invariant. Note: a few queries tie
943
+ // and the source can sit at the edge of the top-K window, so widening the tie group or
944
+ // shrinking top below 5 would need the fixture revisited (tracked as future hardening).
945
+ const emit = appendJournalEvent(fixtureRoot, {
946
+ type: 'recall',
947
+ host,
948
+ source: 'wiki/memory.md',
949
+ summary: `entryref${serial} ${docWords.join(' ')} closingref${serial}.`,
950
+ confirm: '1건 저장 진행',
951
+ });
952
+ docMeta.push({ event_id: emit.event.event_id, host, queryWords });
953
+ }
954
+ const privateIds = [];
955
+ const privateCount = Math.max(4, Math.min(Math.round(safeCount * 0.2), 200));
956
+ for (let i = 0; i < privateCount; i += 1) {
957
+ const host = hosts[i % hosts.length];
958
+ const serial = String(i + 1).padStart(3, '0');
959
+ const emit = appendJournalEvent(fixtureRoot, {
960
+ type: 'recall',
961
+ host,
962
+ scope: 'private:semantic-eval',
963
+ source: '10_areas/_private/current/private-area-brief.md',
964
+ summary: `privateentry${host}${serial} confidential sensitive secure record should not surface across hosts.`,
965
+ confirm: '1건 저장 진행',
966
+ });
967
+ privateIds.push(emit.event.event_id);
968
+ }
969
+ const rebuild = await rebuildRecall(fixtureRoot, { dryRun: false });
970
+
971
+ const cases = [];
972
+ for (const meta of docMeta) {
973
+ const query = meta.queryWords.join(' ');
974
+ const semantic = await semanticSearchRecall(fixtureRoot, query, { top: limit, host: meta.host, provider, minScore });
975
+ const semanticHit = semantic.results.some((item) => String(item.path || '').includes(meta.event_id));
976
+ const exact = await searchRecall(fixtureRoot, query, { top: limit, host: meta.host });
977
+ const exactHit = exact.results.some((item) => String(item.path || '').includes(meta.event_id));
978
+ const sourceSupported = semantic.results
979
+ .filter((item) => String(item.path || '').includes(meta.event_id))
980
+ .some((item) => (item.matched_terms || []).length >= 2);
981
+ const wrongHost = hosts.find((host) => host !== meta.host);
982
+ const wrong = await semanticSearchRecall(fixtureRoot, query, { top: limit, host: wrongHost, provider, minScore });
983
+ const isolated = !wrong.results.some((item) => String(item.path || '').includes(meta.event_id));
984
+ cases.push({ event_id: meta.event_id, host: meta.host, query, semantic_hit: semanticHit, exact_hit: exactHit, source_supported: sourceSupported, isolated_from_wrong_host: isolated });
985
+ }
986
+
987
+ const privateLeakageFindings = [];
988
+ for (const privateId of privateIds) {
989
+ for (const host of hosts) {
990
+ const result = await semanticSearchRecall(fixtureRoot, 'confidential sensitive secure record', { top: limit, host, provider, minScore });
991
+ if (result.results.some((item) => String(item.path || '').includes(privateId))) {
992
+ privateLeakageFindings.push({ event_id: privateId, leaked_in_host: host });
993
+ break;
994
+ }
995
+ }
996
+ }
997
+
998
+ let noAnswerCorrect = 0;
999
+ for (const probe of SEMANTIC_NO_ANSWER_PROBES) {
1000
+ const result = await semanticSearchRecall(fixtureRoot, probe, { top: limit, provider, minScore });
1001
+ if (result.results.length === 0) noAnswerCorrect += 1;
1002
+ }
1003
+
1004
+ const probeQuery = docMeta[0].queryWords.join(' ');
1005
+ const beforeRebuild = await semanticSearchRecall(fixtureRoot, probeQuery, { top: limit, host: docMeta[0].host, provider, minScore });
1006
+ await rebuildRecall(fixtureRoot, { dryRun: false });
1007
+ const afterRebuild = await semanticSearchRecall(fixtureRoot, probeQuery, { top: limit, host: docMeta[0].host, provider, minScore });
1008
+ const rebuildEquivalent = stableJson(beforeRebuild.results.map((item) => item.path)) === stableJson(afterRebuild.results.map((item) => item.path));
1009
+
1010
+ const afterTarget = snapshotTargetWriteSurface(targetRoot);
1011
+ const targetUntouched = stableJson(beforeTarget) === stableJson(afterTarget);
1012
+ const evaluated = cases.length;
1013
+ const semanticHitRate = evaluated ? cases.filter((item) => item.semantic_hit).length / evaluated : 0;
1014
+ const exactHitRate = evaluated ? cases.filter((item) => item.exact_hit).length / evaluated : 0;
1015
+ const sourceSupportRate = evaluated ? cases.filter((item) => item.source_supported).length / evaluated : 0;
1016
+ const isolationRate = evaluated ? cases.filter((item) => item.isolated_from_wrong_host).length / evaluated : 0;
1017
+ const noAnswerRate = SEMANTIC_NO_ANSWER_PROBES.length ? noAnswerCorrect / SEMANTIC_NO_ANSWER_PROBES.length : 1;
1018
+ const prov = getProvider(provider);
1019
+ const gates = {
1020
+ enough_cases: evaluated >= Number(minCases || 50),
1021
+ semantic_hit_at_top: semanticHitRate >= 0.9,
1022
+ semantic_beats_exact_baseline: semanticHitRate >= exactHitRate + 0.2,
1023
+ source_support: sourceSupportRate >= 0.9,
1024
+ host_isolation: isolationRate === 1,
1025
+ no_private_leakage: privateLeakageFindings.length === 0,
1026
+ no_answer_abstention: noAnswerRate === 1,
1027
+ rebuild_equivalent: rebuildEquivalent,
1028
+ target_root_untouched: targetUntouched,
1029
+ no_model_calls: prov.model_call === false,
1030
+ no_external_calls: prov.external_call === false,
1031
+ };
1032
+ payload = {
1033
+ ok: Object.values(gates).every(Boolean),
1034
+ command: 'recall eval',
1035
+ root: targetRoot,
1036
+ durable_write: false,
1037
+ model_calls: Boolean(prov.model_call),
1038
+ external_tool_calls: Boolean(prov.external_call),
1039
+ eval_type: 'semantic_recall_fixture',
1040
+ metric_scope: 'synthetic_lexical_semantic_recall_quality_regression',
1041
+ semantic_recall_quality_evaluated: true,
1042
+ embedding_provider: prov.name,
1043
+ provider_kind: prov.kind,
1044
+ provider_requires_index: Boolean(prov.requires_index),
1045
+ llm_locked: Boolean(prov.llm_locked),
1046
+ min_cases: Number(minCases || 50),
1047
+ top: limit,
1048
+ host_count: hosts.length,
1049
+ hosts,
1050
+ evaluated_cases: evaluated,
1051
+ semantic_hit_at_top: roundRate3(semanticHitRate),
1052
+ exact_token_baseline_hit_at_top: roundRate3(exactHitRate),
1053
+ semantic_improvement: roundRate3(semanticHitRate - exactHitRate),
1054
+ source_support_rate: roundRate3(sourceSupportRate),
1055
+ host_isolation_rate: roundRate3(isolationRate),
1056
+ private_leakage_findings: privateLeakageFindings,
1057
+ no_answer_abstention_rate: roundRate3(noAnswerRate),
1058
+ rebuild_equivalent: rebuildEquivalent,
1059
+ target_root_untouched: targetUntouched,
1060
+ gates,
1061
+ fixture: {
1062
+ generated: true,
1063
+ safe_cases: docMeta.length,
1064
+ private_probe_cases: privateCount,
1065
+ hosts,
1066
+ target_root_untouched: targetUntouched,
1067
+ temp_root_retained: null,
1068
+ rebuild_indexed_count: rebuild.indexed_count,
1069
+ },
1070
+ failed_cases: cases.filter((item) => !item.semantic_hit || !item.isolated_from_wrong_host).map((item) => ({ event_id: item.event_id, semantic_hit: item.semantic_hit, isolated: item.isolated_from_wrong_host })),
1071
+ };
1072
+ } finally {
1073
+ fs.rmSync(fixtureRoot, { recursive: true, force: true });
1074
+ }
1075
+ payload.fixture.temp_root_retained = fs.existsSync(fixtureRoot);
1076
+ payload.temp_root_cleanup_verified = !payload.fixture.temp_root_retained;
1077
+ payload.gates.temp_root_cleanup = !payload.fixture.temp_root_retained;
1078
+ payload.ok = Boolean(payload.ok && !payload.fixture.temp_root_retained);
1079
+ return payload;
1080
+ }
1081
+
1082
+ async function evaluateSemanticRecallCaseFile(root, { top, minCases, caseFile, provider, minScore }) {
1083
+ const beforeTarget = snapshotTargetWriteSurface(root);
1084
+ const loaded = loadRecallEvalCaseFile(root, caseFile);
1085
+ const limit = Math.max(1, Math.min(Number(top || 5), 10));
1086
+ const cases = [];
1087
+ const privateLeakageFindings = [];
1088
+ for (const item of loaded.cases) {
1089
+ const query = String(item.query || '').trim();
1090
+ const host = String(item.host || '');
1091
+ const expectedPathContains = normalizeStringList(item.expected_path_contains);
1092
+ const expectedTerms = normalizeStringList(item.expected_terms);
1093
+ const forbiddenPathContains = normalizeStringList(item.forbidden_path_contains);
1094
+ const wrongHosts = normalizeStringList(item.wrong_hosts);
1095
+ const expectNoAnswer = Boolean(item.expect_no_answer);
1096
+ if (!query) {
1097
+ cases.push({ id: item.id || '', query, host, semantic_hit: false, source_supported: false, isolated_from_wrong_hosts: true, error: 'Missing query.' });
1098
+ continue;
1099
+ }
1100
+ const semantic = await semanticSearchRecall(root, query, { top: limit, host, provider, minScore });
1101
+ const hitResults = expectedPathContains.length
1102
+ ? semantic.results.filter((result) => resultMatchesAnyPath(result, expectedPathContains))
1103
+ : semantic.results;
1104
+ for (const result of semantic.results) {
1105
+ const resultPath = String(result.path || '');
1106
+ if (result.sensitivity === 'private') privateLeakageFindings.push({ id: item.id || '', path: resultPath, reason: 'private result returned' });
1107
+ for (const marker of forbiddenPathContains) {
1108
+ if (resultPath.includes(marker)) privateLeakageFindings.push({ id: item.id || '', path: resultPath, reason: `forbidden path marker: ${marker}` });
1109
+ }
1110
+ }
1111
+ const isolationChecks = [];
1112
+ for (const wrongHost of wrongHosts.slice(0, 10)) {
1113
+ const wrong = await semanticSearchRecall(root, query, { top: limit, host: wrongHost, provider, minScore });
1114
+ isolationChecks.push({
1115
+ host: wrongHost,
1116
+ leaked: expectedPathContains.length ? wrong.results.some((result) => resultMatchesAnyPath(result, expectedPathContains)) : wrong.results.length > 0,
1117
+ });
1118
+ }
1119
+ const supportTerms = expectedTerms.length ? expectedTerms : normalizeStringList(query.match(/[\p{L}\p{N}_-]+/gu) || []);
1120
+ const semanticHit = expectNoAnswer ? semantic.results.length === 0 : hitResults.length > 0;
1121
+ cases.push({
1122
+ id: item.id || '',
1123
+ query,
1124
+ host,
1125
+ expect_no_answer: expectNoAnswer,
1126
+ semantic_hit: semanticHit,
1127
+ source_supported: expectNoAnswer ? true : hitResults.some((result) => resultSupportsTerms(result, supportTerms) || (result.matched_terms || []).length >= 1),
1128
+ isolated_from_wrong_hosts: isolationChecks.every((check) => !check.leaked),
1129
+ isolation_checks: isolationChecks,
1130
+ });
1131
+ }
1132
+ const afterTarget = snapshotTargetWriteSurface(root);
1133
+ const targetUntouched = stableJson(beforeTarget) === stableJson(afterTarget);
1134
+ const evaluated = cases.length;
1135
+ const semanticHitRate = evaluated ? cases.filter((item) => item.semantic_hit).length / evaluated : 0;
1136
+ const sourceSupportRate = evaluated ? cases.filter((item) => item.source_supported).length / evaluated : 0;
1137
+ const isolationRate = evaluated ? cases.filter((item) => item.isolated_from_wrong_hosts).length / evaluated : 0;
1138
+ const requiredCases = Number(minCases || 1);
1139
+ const prov = getProvider(provider);
1140
+ return {
1141
+ ok: Boolean(evaluated >= requiredCases && semanticHitRate >= 0.9 && sourceSupportRate >= 0.9 && isolationRate === 1 && privateLeakageFindings.length === 0 && targetUntouched),
1142
+ command: 'recall eval',
1143
+ root,
1144
+ durable_write: false,
1145
+ model_calls: Boolean(prov.model_call),
1146
+ external_tool_calls: Boolean(prov.external_call),
1147
+ eval_type: 'semantic_recall_cases',
1148
+ metric_scope: 'reviewed_lexical_semantic_recall_case_file',
1149
+ semantic_recall_quality_evaluated: true,
1150
+ reviewed_semantic_recall_quality_evaluated: true,
1151
+ embedding_provider: prov.name,
1152
+ llm_locked: Boolean(prov.llm_locked),
1153
+ case_file: loaded.rel,
1154
+ min_cases: requiredCases,
1155
+ top: limit,
1156
+ host_count: new Set(cases.map((item) => item.host).filter(Boolean)).size,
1157
+ hosts: [...new Set(cases.map((item) => item.host).filter(Boolean))].sort(),
1158
+ evaluated_cases: evaluated,
1159
+ semantic_hit_at_top: roundRate3(semanticHitRate),
1160
+ source_support_rate: roundRate3(sourceSupportRate),
1161
+ host_isolation_rate: roundRate3(isolationRate),
1162
+ private_leakage_findings: privateLeakageFindings,
1163
+ target_root_untouched: targetUntouched,
1164
+ cases,
1165
+ };
1166
+ }
1167
+
1168
+ function snapshotTargetWriteSurface(root) {
1169
+ const rels = [
1170
+ '00_system/neurain/events.ndjson',
1171
+ '00_system/neurain/recall.sqlite',
1172
+ '00_system/neurain/recall.sqlite-wal',
1173
+ '00_system/neurain/recall.sqlite-shm',
1174
+ 'output/receipts/events',
1175
+ 'output/receipts/recall',
1176
+ ];
1177
+ const out = {};
1178
+ for (const rel of rels) {
1179
+ let abs;
1180
+ try {
1181
+ abs = safeResolve(root, rel);
1182
+ } catch {
1183
+ out[rel] = { exists: false, hash: 'invalid-root' };
1184
+ continue;
1185
+ }
1186
+ if (!fs.existsSync(abs)) {
1187
+ out[rel] = { exists: false, hash: '' };
1188
+ continue;
1189
+ }
1190
+ const stat = fs.statSync(abs);
1191
+ if (stat.isDirectory()) {
1192
+ const files = walkFiles(abs, { includeRaw: false, maxFiles: 10000 })
1193
+ .map((file) => `${relPath(abs, file)}:${sha256(fs.readFileSync(file))}`)
1194
+ .sort();
1195
+ out[rel] = { exists: true, type: 'directory', hash: sha256(files.join('\n')) };
1196
+ } else {
1197
+ out[rel] = { exists: true, type: 'file', hash: sha256(fs.readFileSync(abs)) };
1198
+ }
1199
+ }
1200
+ return out;
1201
+ }
1202
+
1203
+ function resultSupportsQuery(item, query) {
1204
+ const text = `${item.title || ''} ${item.snippet || ''}`.toLowerCase();
1205
+ const terms = String(query || '').toLowerCase().match(/[\p{L}\p{N}_-]+/gu) || [];
1206
+ return terms.length > 0 && terms.every((term) => text.includes(term));
1207
+ }
1208
+
1209
+ function resultSupportsTerms(item, terms) {
1210
+ const text = `${item.title || ''} ${item.snippet || ''}`.toLowerCase();
1211
+ const normalized = normalizeStringList(terms).map((term) => term.toLowerCase());
1212
+ return normalized.length > 0 && normalized.every((term) => text.includes(term));
1213
+ }
1214
+
1215
+ function resultMatchesAnyPath(item, matchers) {
1216
+ const target = String(item.path || '');
1217
+ return normalizeStringList(matchers).some((matcher) => target.includes(matcher));
1218
+ }
1219
+
1220
+ function normalizeStringList(value) {
1221
+ if (Array.isArray(value)) return value.map((item) => String(item || '').trim()).filter(Boolean);
1222
+ if (value === null || value === undefined) return [];
1223
+ return [String(value).trim()].filter(Boolean);
1224
+ }
1225
+
1226
+ function loadRecallEvalCaseFile(root, caseFile) {
1227
+ const abs = safeResolve(root, caseFile);
1228
+ const rel = relPath(root, abs);
1229
+ if (!fs.existsSync(abs)) throw new Error(`Recall eval case file not found: ${caseFile}`);
1230
+ let parsed;
1231
+ try {
1232
+ parsed = JSON.parse(readText(abs, '{}'));
1233
+ } catch (error) {
1234
+ throw new Error(`Recall eval case file is not valid JSON: ${error.message}`);
1235
+ }
1236
+ const cases = Array.isArray(parsed) ? parsed : parsed.cases;
1237
+ if (!Array.isArray(cases)) throw new Error('Recall eval case file must be an array or an object with a cases array.');
1238
+ return { rel, cases };
1239
+ }
1240
+
1241
+ function renderRecallStatus(root, args) {
1242
+ return recallStatus(root).then((payload) => args.json ? { json: true, payload } : {
1243
+ text: [
1244
+ '# Neurain recall status',
1245
+ '',
1246
+ `- Root: ${root}`,
1247
+ `- SQLite available: ${payload.sqlite_available ? 'yes' : 'no'}`,
1248
+ `- DB exists: ${payload.db_exists ? 'yes' : 'no'}`,
1249
+ `- Rows: ${payload.row_count}`,
1250
+ '- Markdown fallback: yes',
1251
+ ].join('\n'),
1252
+ });
1253
+ }
1254
+
1255
+ function renderRecallRebuild(root, args) {
1256
+ return rebuildRecall(root, { dryRun: Boolean(args['dry-run']) }).then((payload) => args.json ? { json: true, payload } : {
1257
+ text: [
1258
+ `# Neurain recall rebuild${payload.dry_run ? ' [dry-run]' : ''}`,
1259
+ '',
1260
+ `- Root: ${root}`,
1261
+ `- OK: ${payload.ok ? 'yes' : 'no'}`,
1262
+ `- Indexed docs: ${payload.indexed_count}`,
1263
+ `- Durable write: ${payload.durable_write ? 'yes' : 'no'}`,
1264
+ payload.receipt_path ? `- Receipt: ${payload.receipt_path}` : '',
1265
+ ].filter(Boolean).join('\n'),
1266
+ });
1267
+ }
1268
+
1269
+ function renderRecallSearch(root, args) {
1270
+ const query = args.query || args._.join(' ');
1271
+ const scope = scopeForArea(resolveAreaDir(root, args.area || ''));
1272
+ return searchRecall(root, query, { top: Number(args.top || 10), host: args.host || '', scope }).then((payload) => args.json ? { json: true, payload } : {
1273
+ text: [
1274
+ '# Neurain recall search',
1275
+ '',
1276
+ `- Query: ${query}`,
1277
+ `- Source: ${payload.source}`,
1278
+ ...payload.results.map((item) => `- ${item.path}: ${item.title || item.snippet}`),
1279
+ payload.results.length ? '' : 'No matches found.',
1280
+ ].filter(Boolean).join('\n'),
1281
+ });
1282
+ }
1283
+
1284
+ function renderRecallVerify(root, args) {
1285
+ return verifyRecall(root).then((payload) => args.json ? { json: true, payload } : {
1286
+ text: [
1287
+ '# Neurain recall verify',
1288
+ '',
1289
+ `- OK: ${payload.ok ? 'yes' : 'no'}`,
1290
+ `- Indexed: ${payload.indexed_count}`,
1291
+ `- Expected: ${payload.expected_count}`,
1292
+ ].join('\n'),
1293
+ });
1294
+ }
1295
+
1296
+ function renderRecallSemanticSearch(root, args) {
1297
+ const query = args.query || args._.join(' ');
1298
+ const scope = scopeForArea(resolveAreaDir(root, args.area || ''));
1299
+ return semanticSearchRecall(root, query, {
1300
+ top: Number(args.top || 10),
1301
+ host: args.host || '',
1302
+ provider: args.provider || 'local-lexical',
1303
+ minScore: args['min-score'] !== undefined ? Number(args['min-score']) : 0.34,
1304
+ scope,
1305
+ }).then((payload) => args.json ? { json: true, payload } : {
1306
+ text: [
1307
+ '# Neurain recall semantic-search',
1308
+ '',
1309
+ `- Query: ${query}`,
1310
+ `- Provider: ${payload.embedding_provider} (${payload.provider_kind})`,
1311
+ `- Model call: ${payload.model_calls ? 'yes' : 'no'}`,
1312
+ ...payload.results.map((item) => `- ${item.path} [${item.score}]: ${item.title || item.snippet}`),
1313
+ payload.results.length ? '' : 'No semantic matches found.',
1314
+ ].filter(Boolean).join('\n'),
1315
+ });
1316
+ }
1317
+
1318
+ function renderRecallHybridSearch(root, args) {
1319
+ const query = args.query || args._.join(' ');
1320
+ return hybridSearchRecall(root, query, {
1321
+ top: Number(args.top || 10),
1322
+ host: args.host || '',
1323
+ provider: args.provider || 'local-lexical',
1324
+ minScore: args['min-score'] !== undefined ? Number(args['min-score']) : 0.34,
1325
+ area: args.area || '',
1326
+ routing: args.routing || 'auto',
1327
+ }).then((payload) => args.json ? { json: true, payload } : {
1328
+ text: [
1329
+ '# Neurain recall hybrid-search (exact + semantic)',
1330
+ '',
1331
+ `- Query: ${query}`,
1332
+ `- Strategy: ${payload.strategy} | model call: ${payload.model_calls ? 'yes' : 'no'}`,
1333
+ `- Exact results: ${payload.exact_result_count} | semantic added: ${payload.semantic_only_added}`,
1334
+ ...payload.results.map((item) => `- [${item.matched_by}] ${item.path}: ${item.title || item.snippet}`),
1335
+ payload.results.length ? '' : 'No matches found.',
1336
+ ].filter(Boolean).join('\n'),
1337
+ });
1338
+ }
1339
+
1340
+ // Lexical-only search: exposes the routed lexical branch directly (the vault's
1341
+ // neurain-search equivalent), with area-relative output in --area mode. This is
1342
+ // the clean target for the vault search shim: same scorer, same fields. (Hybrid
1343
+ // remains the recommended runtime recall; this is the BM25+boosts view.)
1344
+ function renderRecallLexicalSearch(root, args) {
1345
+ const query = args.query || args._.join(' ');
1346
+ if (!String(query).trim()) throw new Error('Recall lexical-search requires a query.');
1347
+ const recallCfg = recallConfig(root);
1348
+ const areaDir = resolveAreaDir(root, args.area || '', recallCfg);
1349
+ const ctx = buildLexicalContext(root, { area: areaDir, recallCfg });
1350
+ const out = lexicalSearchWithContext(ctx, query, {
1351
+ top: Number(args.top || 10),
1352
+ maxPerLayer: Number(args['max-per-layer'] || 3),
1353
+ includeQueue: Boolean(args['include-queue']),
1354
+ });
1355
+ const prefix = areaDir ? `${recallCfg.areas_dir}/${areaDir}/` : '';
1356
+ const results = out.results.map((item) => (prefix && item.path.startsWith(prefix) ? { ...item, path: item.path.slice(prefix.length) } : item));
1357
+ const payload = {
1358
+ ok: true,
1359
+ command: 'recall lexical-search',
1360
+ root,
1361
+ durable_write: false,
1362
+ query: String(query),
1363
+ top: out.top,
1364
+ area: String(args.area || ''),
1365
+ area_dir: areaDir,
1366
+ results,
1367
+ };
1368
+ return args.json ? { json: true, payload } : {
1369
+ text: [
1370
+ '# Neurain recall lexical-search',
1371
+ '',
1372
+ ...results.map((item) => {
1373
+ const why = item.signals && Object.keys(item.signals).length
1374
+ ? Object.entries(item.signals).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k} +${v}`).join(', ')
1375
+ : '';
1376
+ return `- ${item.path}: score ${item.score}, ${item.snippet}${why ? `\n why: ${why}` : ''}`;
1377
+ }),
1378
+ results.length ? '' : 'No matches found.',
1379
+ ].filter(Boolean).join('\n'),
1380
+ };
1381
+ }
1382
+
1383
+ function renderRecallLiveEval(root, args) {
1384
+ return evaluateLiveRecall(root, {
1385
+ top: Number(args.top || 5),
1386
+ sampleSize: Number(args['sample-size'] || 60),
1387
+ provider: args.provider || 'local-lexical',
1388
+ minScore: args['min-score'] !== undefined ? Number(args['min-score']) : 0.34,
1389
+ }).then((payload) => args.json ? { json: true, payload } : {
1390
+ text: [
1391
+ '# Neurain recall live-eval (real-folder content)',
1392
+ '',
1393
+ `- OK: ${payload.ok ? 'yes' : 'no'}`,
1394
+ `- Corpus docs: ${payload.corpus_doc_count} (recall-eligible: ${payload.recall_eligible_count})`,
1395
+ `- Evaluated real items: ${payload.evaluated_cases} (paraphrasable: ${payload.paraphrasable_cases})`,
1396
+ `- Hybrid coverage (exact + semantic, recommended): ${payload.hybrid_coverage} (always >= exact; lift over exact ${payload.hybrid_lift_over_exact})`,
1397
+ `- Exact-token coverage: ${payload.exact_token_coverage}`,
1398
+ `- Semantic-only coverage: ${payload.semantic_coverage} (can be lower than exact on real corpora, since the lexical-semantic layer lacks exact-token rarity weighting; this is why hybrid is recommended)`,
1399
+ `- Paraphrase-only: semantic ${payload.paraphrase_semantic_coverage} vs exact ${payload.paraphrase_exact_coverage}`,
1400
+ `- Provider: ${payload.embedding_provider} | model call: ${payload.model_calls ? 'yes' : 'no'} | content stored: ${payload.content_stored ? 'yes' : 'no'}`,
1401
+ `- Human-judged relevance: ${payload.human_judged ? 'yes' : 'no (auto-derived paraphrase queries)'}`,
1402
+ '',
1403
+ 'Per-kind semantic coverage:',
1404
+ ...Object.entries(payload.per_kind_coverage).map(([kind, value]) => ` - ${kind}: ${value.semantic_coverage} (${value.cases} items)`),
1405
+ '',
1406
+ payload.note,
1407
+ ].join('\n'),
1408
+ });
1409
+ }
1410
+
1411
+ function renderSemanticEvalText(payload) {
1412
+ return [
1413
+ '# Neurain recall eval (semantic)',
1414
+ '',
1415
+ `- OK: ${payload.ok ? 'yes' : 'no'}`,
1416
+ `- Type: ${payload.eval_type}`,
1417
+ `- Provider: ${payload.embedding_provider}`,
1418
+ `- Cases: ${payload.evaluated_cases}`,
1419
+ `- Semantic Hit@${payload.top}: ${payload.semantic_hit_at_top}`,
1420
+ payload.exact_token_baseline_hit_at_top !== undefined ? `- Exact-token baseline Hit@${payload.top}: ${payload.exact_token_baseline_hit_at_top}` : '',
1421
+ payload.semantic_improvement !== undefined ? `- Semantic improvement: ${payload.semantic_improvement}` : '',
1422
+ `- Source support: ${payload.source_support_rate}`,
1423
+ `- Host isolation: ${payload.host_isolation_rate}`,
1424
+ `- Private leakage findings: ${payload.private_leakage_findings.length}`,
1425
+ payload.no_answer_abstention_rate !== undefined ? `- No-answer abstention: ${payload.no_answer_abstention_rate}` : '',
1426
+ payload.rebuild_equivalent !== undefined ? `- Rebuild equivalent: ${payload.rebuild_equivalent ? 'yes' : 'no'}` : '',
1427
+ `- Target root untouched: ${payload.target_root_untouched ? 'yes' : 'no'}`,
1428
+ payload.temp_root_cleanup_verified !== undefined ? `- Temp root cleanup verified: ${payload.temp_root_cleanup_verified ? 'yes' : 'no'}` : '',
1429
+ ].filter(Boolean).join('\n');
1430
+ }
1431
+
1432
+ function renderRecallEval(root, args) {
1433
+ if (args.semantic) {
1434
+ const caseFile = args['case-file'] || '';
1435
+ return evaluateSemanticRecall(root, {
1436
+ top: Number(args.top || 5),
1437
+ minCases: Number(args['min-cases'] || (caseFile ? 1 : 50)),
1438
+ fixtureSize: Number(args['fixture-size'] || 60),
1439
+ caseFile,
1440
+ provider: args.provider || 'local-lexical',
1441
+ minScore: args['min-score'] !== undefined ? Number(args['min-score']) : 0.34,
1442
+ }).then((payload) => args.json ? { json: true, payload } : { text: renderSemanticEvalText(payload) });
1443
+ }
1444
+ const fixtureSize = Number(args['fixture-size'] || args.cases || 0);
1445
+ const caseFile = args['case-file'] || '';
1446
+ return evaluateCrossHostRecall(root, {
1447
+ top: Number(args.top || 5),
1448
+ minCases: Number(args['min-cases'] || (caseFile ? 1 : 2)),
1449
+ maxCases: Number(args['max-cases'] || fixtureSize || 50),
1450
+ fixtureSize,
1451
+ privateProbeCount: Number(args['private-probes'] || 20),
1452
+ caseFile,
1453
+ }).then((payload) => args.json ? { json: true, payload } : {
1454
+ text: [
1455
+ '# Neurain recall eval',
1456
+ '',
1457
+ `- OK: ${payload.ok ? 'yes' : 'no'}`,
1458
+ `- Type: ${payload.eval_type}`,
1459
+ `- Hosts: ${payload.host_count}`,
1460
+ `- Cases: ${payload.evaluated_cases}`,
1461
+ `- Journal integrity: ${payload.journal_integrity_ok ? 'ok' : 'failed'}`,
1462
+ `- Hit@${payload.top}: ${payload.hit_at_top}`,
1463
+ `- Source support: ${payload.source_support_rate}`,
1464
+ `- Host isolation: ${payload.host_isolation_rate}`,
1465
+ `- Private leakage findings: ${payload.private_leakage_findings.length}`,
1466
+ payload.case_file ? `- Case file: ${payload.case_file}` : '',
1467
+ payload.fixture?.generated ? `- Fixture safe cases: ${payload.fixture.safe_cases}` : '',
1468
+ payload.missing_evidence ? `- Missing evidence: ${payload.missing_evidence}` : '',
1469
+ ].filter(Boolean).join('\n'),
1470
+ });
1471
+ }
1472
+
1473
+ function renderRecallBench(root, args) {
1474
+ const payload = benchRecall(root, {
1475
+ suites: args.suites || args['suite-dir'] || '',
1476
+ area: args.area || '',
1477
+ top: Number(args.top || 5),
1478
+ baseline: args.baseline !== undefined ? Number(args.baseline) : undefined,
1479
+ matcher: args.matcher || 'strict',
1480
+ maxPerLayer: args['max-per-layer'] !== undefined ? Number(args['max-per-layer']) : undefined,
1481
+ routing: args.routing || 'auto',
1482
+ caseId: args.case || '',
1483
+ explain: Boolean(args.explain),
1484
+ });
1485
+ if (args.json) return { json: true, payload };
1486
+ if (payload.reason === 'no_suites') return { text: `# Neurain recall bench\n\n- No suites found. ${payload.note || ''}` };
1487
+ if (payload.case) {
1488
+ const c = payload.case;
1489
+ return {
1490
+ text: [
1491
+ `# Neurain recall bench [case ${c.id}]`,
1492
+ '',
1493
+ `- OK: ${payload.ok ? 'yes' : 'no'}`,
1494
+ `- Question: ${c.question}`,
1495
+ `- Source hit (strict/loose): ${c.source_ok_strict ? 'yes' : 'no'} / ${c.source_ok_loose ? 'yes' : 'no'}`,
1496
+ `- Entity hit: ${c.entity_ok ? 'yes' : 'no'}`,
1497
+ `- Expected in corpus: ${JSON.stringify(c.expected_in_corpus)}`,
1498
+ c.missing_expected.length ? `- Missing: ${c.missing_expected.join(', ')}` : '',
1499
+ ...(c.top_results || []).map((r) => ` [${r.layer}] ${r.path} (score ${r.score}) ${JSON.stringify(r.signals)}`),
1500
+ ].filter(Boolean).join('\n'),
1501
+ };
1502
+ }
1503
+ const pct = (x) => `${(x * 100).toFixed(1)}%`;
1504
+ return {
1505
+ text: [
1506
+ `# Neurain recall bench (area=${payload.area || 'all'}, top-${payload.top}, ${payload.strategy})`,
1507
+ '',
1508
+ ...payload.suites.map((r) => `- ${r.suite}: src ${pct(r.source_recall)} (loose ${pct(r.source_recall_loose)}) / ent ${pct(r.entity_recall)} ${r.passed ? 'PASS' : 'FAIL'} (${r.cases})`),
1509
+ `- OVERALL: src ${pct(payload.overall.source_recall)} (loose ${pct(payload.overall.source_recall_loose)}) / ent ${pct(payload.overall.entity_recall)} (${payload.overall.cases} cases)`,
1510
+ payload.gate ? `- GATE: ${pct(payload.gate.value)} vs baseline ${pct(payload.gate.baseline)} -> ${payload.gate.passed ? 'PASS' : 'FAIL'}` : '- GATE: (no baseline set)',
1511
+ `- ok: ${payload.ok ? 'yes' : 'no'}`,
1512
+ ].join('\n'),
1513
+ };
1514
+ }
1515
+
1516
+ function renderRecallScorecard(root, args) {
1517
+ const payload = scorecardRecall(root, {
1518
+ suites: args.suites || args['suite-dir'] || '',
1519
+ area: args.area || '',
1520
+ top: Number(args.top || 5),
1521
+ matcher: args.matcher || 'strict',
1522
+ maxPerLayer: args['max-per-layer'] !== undefined ? Number(args['max-per-layer']) : undefined,
1523
+ });
1524
+ if (args.json) return { json: true, payload };
1525
+ if (payload.reason === 'no_suites') return { text: '# Neurain recall scorecard\n\n- No suites found.' };
1526
+ return {
1527
+ text: [
1528
+ `# Neurain recall scorecard (area=${payload.area || 'all'}, top-${payload.top})`,
1529
+ '',
1530
+ `- Cases: ${payload.cases}`,
1531
+ `- Hit@${payload.top}: ${payload.hit_at_k}`,
1532
+ `- R@${payload.top}: ${payload.recall_at_k}`,
1533
+ `- MRR: ${payload.mrr}`,
1534
+ `- Entity-R: ${payload.entity_recall}`,
1535
+ `- Latency: avg ${payload.latency_ms_avg}ms / p95 ${payload.latency_ms_p95}ms`,
1536
+ ].join('\n'),
1537
+ };
1538
+ }
1539
+
1540
+ async function sqliteCapability() {
1541
+ try {
1542
+ const sqlite = await import('node:sqlite');
1543
+ return { available: true, DatabaseSync: sqlite.DatabaseSync, error: null };
1544
+ } catch (error) {
1545
+ return { available: false, DatabaseSync: null, error: error.message };
1546
+ }
1547
+ }
1548
+
1549
+ function buildSqliteIndex(DatabaseSync, file, docs, manifestHash) {
1550
+ const db = new DatabaseSync(file);
1551
+ try {
1552
+ db.exec(`
1553
+ PRAGMA journal_mode = WAL;
1554
+ PRAGMA synchronous = NORMAL;
1555
+ CREATE TABLE recall_meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
1556
+ CREATE TABLE recall_docs (
1557
+ doc_id TEXT PRIMARY KEY,
1558
+ path TEXT NOT NULL,
1559
+ kind TEXT NOT NULL,
1560
+ host TEXT NOT NULL,
1561
+ scope TEXT NOT NULL,
1562
+ sensitivity TEXT NOT NULL,
1563
+ source_hash TEXT NOT NULL,
1564
+ title TEXT NOT NULL,
1565
+ snippet TEXT NOT NULL,
1566
+ indexed_at TEXT NOT NULL
1567
+ );
1568
+ CREATE VIRTUAL TABLE recall_fts USING fts5(doc_id UNINDEXED, title, body, tokenize = 'unicode61');
1569
+ CREATE INDEX recall_docs_path_idx ON recall_docs(path);
1570
+ CREATE INDEX recall_docs_host_idx ON recall_docs(host);
1571
+ `);
1572
+ const insertDoc = db.prepare(`
1573
+ INSERT INTO recall_docs (doc_id, path, kind, host, scope, sensitivity, source_hash, title, snippet, indexed_at)
1574
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1575
+ `);
1576
+ const insertFts = db.prepare('INSERT INTO recall_fts (doc_id, title, body) VALUES (?, ?, ?)');
1577
+ const indexedAt = timestamp();
1578
+ db.exec('BEGIN');
1579
+ for (const doc of docs) {
1580
+ insertDoc.run(doc.doc_id, doc.path, doc.kind, doc.host, doc.scope, doc.sensitivity, doc.source_hash, doc.title, doc.snippet, indexedAt);
1581
+ insertFts.run(doc.doc_id, doc.title, doc.body);
1582
+ }
1583
+ const insertMeta = db.prepare('INSERT INTO recall_meta (key, value) VALUES (?, ?)');
1584
+ insertMeta.run('rebuilt_at', indexedAt);
1585
+ insertMeta.run('source_manifest_hash', manifestHash);
1586
+ insertMeta.run('fts5_available', 'true');
1587
+ insertMeta.run('markdown_canonical', 'true');
1588
+ insertMeta.run('row_count', String(docs.length));
1589
+ db.exec('COMMIT');
1590
+ db.exec('PRAGMA wal_checkpoint(TRUNCATE)');
1591
+ } catch (error) {
1592
+ try { db.exec('ROLLBACK'); } catch {}
1593
+ throw error;
1594
+ } finally {
1595
+ db.close();
1596
+ cleanupSqliteAuxFiles(file);
1597
+ }
1598
+ }
1599
+
1600
+ function collectRecallDocs(root, { recallCfg = recallConfig(root) } = {}) {
1601
+ const docs = [
1602
+ ...collectMarkdownDocs(root, recallCfg),
1603
+ ...collectEventDocs(root),
1604
+ ...collectReceiptDocs(root),
1605
+ ];
1606
+ const unique = new Map();
1607
+ for (const doc of docs) unique.set(doc.doc_id, doc);
1608
+ return [...unique.values()].sort((a, b) => a.path.localeCompare(b.path));
1609
+ }
1610
+
1611
+ // W-A corpus expansion: the markdown corpus now includes the general area
1612
+ // knowledge class (10_areas/<area>/**.md), hubs, and the area registry, not just
1613
+ // current/log/product. Private exclusion is decided at collection time by the
1614
+ // label resolver (per-file frontmatter + area baseline + boundary path markers),
1615
+ // which fixes the old substring gate that dropped `..._tokenomics/` because the
1616
+ // path contained `token`. config.recall.include/exclude extend the whitelist.
1617
+ function collectMarkdownDocs(root, recallCfg = recallConfig(root)) {
1618
+ return listRecallMarkdownFiles(root, recallCfg).map(({ rel, text, sensitivity }) => docFromText({
1619
+ path: rel,
1620
+ kind: kindForPath(rel),
1621
+ host: 'markdown',
1622
+ scope: scopeForPath(rel),
1623
+ sensitivity,
1624
+ title: titleForText(text, rel),
1625
+ body: text,
1626
+ }));
1627
+ }
1628
+
1629
+ function collectEventDocs(root) {
1630
+ const listed = listJournalEvents(root, { limit: 1000 }).events || [];
1631
+ return listed
1632
+ .filter((event) => event.sensitivity !== 'private')
1633
+ .filter((event) => event.prompt_context_allowed !== false)
1634
+ .filter((event) => event.safety?.indexing_allowed !== false && event.safety?.cross_host_allowed !== false)
1635
+ .filter((event) => safeToIndex(event.summary))
1636
+ .map((event) => docFromText({
1637
+ path: `${event.receipt_path || '00_system/neurain/events.ndjson'}#${event.event_id}`,
1638
+ kind: `event:${event.type}`,
1639
+ host: event.host || 'cli',
1640
+ scope: event.scope || 'global',
1641
+ sensitivity: 'internal',
1642
+ title: `${event.type} event`,
1643
+ body: `${event.type}\n${event.summary}\n${(event.source_ids || []).join('\n')}`,
1644
+ }));
1645
+ }
1646
+
1647
+ function collectReceiptDocs(root) {
1648
+ const dir = safeResolve(root, 'output/receipts');
1649
+ if (!fs.existsSync(dir)) return [];
1650
+ const docs = [];
1651
+ for (const file of walkFiles(dir, { includeRaw: false, maxFiles: 10000 })) {
1652
+ const rel = `output/receipts/${relPath(dir, file)}`;
1653
+ if (!/\.json$/i.test(rel)) continue;
1654
+ if (rel.startsWith(`${receiptDirRel}/`)) continue;
1655
+ if (inferSensitivityFromPath(rel) === 'private') continue;
1656
+ let parsed;
1657
+ try {
1658
+ parsed = JSON.parse(readText(file, '{}'));
1659
+ } catch {
1660
+ continue;
1661
+ }
1662
+ if (parsed.sensitivity === 'private') continue;
1663
+ const sourceIds = [
1664
+ ...(Array.isArray(parsed.source_ids) ? parsed.source_ids : []),
1665
+ ...(Array.isArray(parsed.candidate?.source_ids) ? parsed.candidate.source_ids : []),
1666
+ ];
1667
+ if (sourceIds.some((source) => inferSensitivityFromPath(source) === 'private')) continue;
1668
+ const safeBody = receiptBody(parsed);
1669
+ if (!safeToIndex(safeBody)) continue;
1670
+ docs.push(docFromText({
1671
+ path: rel,
1672
+ kind: `receipt:${String(parsed.command || 'unknown').replace(/\s+/g, '-')}`,
1673
+ host: 'receipt',
1674
+ scope: parsed.scope || parsed.candidate?.scope || 'global',
1675
+ sensitivity: 'internal',
1676
+ title: `${parsed.command || 'receipt'} receipt`,
1677
+ body: safeBody,
1678
+ }));
1679
+ }
1680
+ return docs;
1681
+ }
1682
+
1683
+ function docFromText({ path: rel, kind, host, scope, sensitivity, title, body }) {
1684
+ const normalizedBody = String(body || '').replace(/\s+/g, ' ').trim();
1685
+ const sourceHash = sha256(`${rel}\n${normalizedBody}`);
1686
+ return {
1687
+ doc_id: `recall-${sha256(`${rel}:${sourceHash}`).slice(0, 16)}`,
1688
+ path: rel,
1689
+ kind,
1690
+ host,
1691
+ scope,
1692
+ sensitivity,
1693
+ source_hash: sourceHash,
1694
+ title: String(title || path.basename(rel)).slice(0, 180),
1695
+ snippet: normalizedBody.slice(0, 220),
1696
+ body: normalizedBody,
1697
+ };
1698
+ }
1699
+
1700
+ function receiptBody(receipt) {
1701
+ return stableJson({
1702
+ command: receipt.command || '',
1703
+ generated_at: receipt.generated_at || receipt.created_at || '',
1704
+ event_id: receipt.event_id || '',
1705
+ candidate_id: receipt.candidate_id || '',
1706
+ candidate_hash: receipt.candidate_hash || '',
1707
+ registry_path: receipt.registry_path || '',
1708
+ planned_changes: Array.isArray(receipt.planned_changes)
1709
+ ? receipt.planned_changes.map((item) => ({
1710
+ title: item.title || '',
1711
+ from_status: item.from_status || '',
1712
+ to_status: item.to_status || '',
1713
+ reason: item.reason || '',
1714
+ }))
1715
+ : [],
1716
+ writes: Array.isArray(receipt.writes) ? receipt.writes.map(safeReceiptWriteValue).filter(Boolean) : [],
1717
+ });
1718
+ }
1719
+
1720
+ function safeReceiptWriteValue(item) {
1721
+ const pathValue = String(item?.path || '');
1722
+ if (pathValue && inferSensitivityFromPath(pathValue) === 'private') return '';
1723
+ const value = pathValue || String(item?.action || '');
1724
+ if (!safeToIndex(value)) return '';
1725
+ return value;
1726
+ }
1727
+
1728
+ function toFtsQuery(query) {
1729
+ const terms = String(query || '').toLowerCase().match(/[\p{L}\p{N}_-]+/gu) || [];
1730
+ return terms.slice(0, 8).map((term) => `"${term.replace(/"/g, '""')}"`).join(' OR ');
1731
+ }
1732
+
1733
+ function queryForEvent(event) {
1734
+ const stop = new Set(['after', 'with', 'from', 'that', 'this', 'event', 'recall', 'memory', 'safe']);
1735
+ const terms = String(event.summary || '').toLowerCase().match(/[\p{L}\p{N}_-]+/gu) || [];
1736
+ return terms.filter((term) => term.length >= 4 && !stop.has(term)).slice(0, 4).join(' ');
1737
+ }
1738
+
1739
+ function fallbackRecallSearch(root, payload) {
1740
+ const terms = payload.query.toLowerCase().split(/\s+/).filter(Boolean);
1741
+ const host = String(payload.host || '');
1742
+ const scope = String(payload.scope || '');
1743
+ const results = collectRecallDocs(root)
1744
+ .filter((doc) => !host || doc.host === host)
1745
+ .filter((doc) => !scope || doc.scope === scope)
1746
+ .map((doc) => ({
1747
+ doc,
1748
+ score: terms.reduce((score, term) => score + (doc.body.toLowerCase().includes(term) ? 10 : 0) + (doc.title.toLowerCase().includes(term) ? 15 : 0), 0),
1749
+ }))
1750
+ .filter((item) => item.score > 0)
1751
+ .sort((a, b) => b.score - a.score || a.doc.path.localeCompare(b.doc.path))
1752
+ .slice(0, payload.top)
1753
+ .map(({ doc, score }) => ({
1754
+ path: doc.path,
1755
+ kind: doc.kind,
1756
+ host: doc.host,
1757
+ scope: doc.scope,
1758
+ sensitivity: doc.sensitivity,
1759
+ title: doc.title,
1760
+ snippet: doc.snippet,
1761
+ source_hash: doc.source_hash,
1762
+ score,
1763
+ }));
1764
+ return {
1765
+ ...payload,
1766
+ ok: true,
1767
+ source: 'markdown_fallback',
1768
+ fallback_used: true,
1769
+ results,
1770
+ };
1771
+ }
1772
+
1773
+ function replaceSqliteFile(target, tmp) {
1774
+ cleanupSqliteSidecars(target);
1775
+ cleanupSqliteAuxFiles(tmp);
1776
+ fs.renameSync(tmp, target);
1777
+ cleanupSqliteAuxFiles(target);
1778
+ }
1779
+
1780
+ function sqliteReadOnlyLocation(file) {
1781
+ return `file:${String(file).replace(/%/g, '%25').replace(/\?/g, '%3F').replace(/#/g, '%23').replace(/ /g, '%20')}?mode=ro&immutable=1`;
1782
+ }
1783
+
1784
+ function cleanupSqliteSidecars(file) {
1785
+ for (const target of [file, `${file}-wal`, `${file}-shm`]) {
1786
+ try { fs.rmSync(target, { force: true }); } catch {}
1787
+ }
1788
+ }
1789
+
1790
+ function cleanupSqliteAuxFiles(file) {
1791
+ for (const target of [`${file}-wal`, `${file}-shm`]) {
1792
+ try { fs.rmSync(target, { force: true }); } catch {}
1793
+ }
1794
+ }
1795
+
1796
+ function stableJson(value) {
1797
+ if (Array.isArray(value)) return `[${value.map(stableJson).join(',')}]`;
1798
+ if (value && typeof value === 'object') {
1799
+ return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`).join(',')}}`;
1800
+ }
1801
+ return JSON.stringify(value);
1802
+ }