moflo 4.9.36 → 4.9.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,18 @@
24
24
 
25
25
  **Search `tests` when looking for test coverage** of a function, module, or behavior — it indexes the test tree separately so you can pinpoint specs without grepping the whole repo.
26
26
 
27
+ ### Traverse Chunks, Don't Bulk-Retrieve
28
+
29
+ Search returns chunked guidance with a compact `navigation` crumb (`parentDoc`, `prevChunk`, `nextChunk`, `chunkTitle`). Use it:
30
+
31
+ | Want | Use |
32
+ |------|-----|
33
+ | Adjacent / sibling / hierarchical context | `mcp__moflo__memory_get_neighbors` |
34
+ | Full content of one chunk | `mcp__moflo__memory_retrieve` (returns full nav for further traversal) |
35
+ | Whole source doc | `Read` `parentPath` from any chunk's nav |
36
+
37
+ Full protocol: `.claude/guidance/moflo-memory-protocol.md`. Don't retrieve every search hit blindly.
38
+
27
39
  ### Tool Selection (MCP-first)
28
40
 
29
41
  | Tool | Purpose |
@@ -0,0 +1,30 @@
1
+ # MoFlo Memory Protocol — Search, Traverse, Retrieve
2
+
3
+ **Purpose:** How to use moflo's chunked memory effectively. Search returns navigable chunks — traverse the chunk graph, do not bulk-retrieve every hit.
4
+
5
+ ---
6
+
7
+ ## Decision Table
8
+
9
+ | You want | Use | Why |
10
+ |----------|-----|-----|
11
+ | Find an entry-point | `mcp__moflo__memory_search` | Returns chunk hits with compact `navigation` (parentDoc, prev/next, chunkTitle) |
12
+ | Adjacent context (1 chunk over) | `mcp__moflo__memory_get_neighbors` `{ key, include: ['prev','next'] }` | One round-trip, returns shaped entries with full nav |
13
+ | Same-section peers (h2/h3 family) | `memory_get_neighbors` `{ include: ['siblings'] }` or `['parent','children']` | Hierarchical traversal — cheaper than re-searching |
14
+ | Full content of one chunk | `mcp__moflo__memory_retrieve` `{ key }` | Returns full nav object for further traversal |
15
+ | Whole source doc when truly needed | `Read` `parentPath` from any chunk's nav | Disk read is cheaper than re-indexed `doc-*` |
16
+
17
+ ## Anti-Patterns
18
+
19
+ | Don't | Do instead |
20
+ |-------|-----------|
21
+ | Retrieve every search hit blindly | Read the search snippet + `navigation`; retrieve or traverse only the chunks you need |
22
+ | Open the source file when a chunk would do | Stay in the chunk graph; `Read` `parentPath` only for the rare full-doc case |
23
+ | Search again for a key you already have | `memory_retrieve` or `memory_get_neighbors` directly |
24
+
25
+ ---
26
+
27
+ ## See Also
28
+
29
+ - `.claude/guidance/moflo-agent-rules.md` § Memory-First Protocol — namespaces, query examples, MCP-first tool selection
30
+ - `.claude/guidance/moflo-memory-strategy.md` — How chunking, embeddings, and the RAG index work
@@ -18,6 +18,10 @@ CLI fallback when MCP is unavailable: `npx flo memory search --query "..." --nam
18
18
 
19
19
  The full namespace reference, query examples by domain, and tool catalog live in `.claude/guidance/moflo-agent-rules.md` § Memory-First Protocol — read that next.
20
20
 
21
+ ### Traverse, don't bulk-retrieve
22
+
23
+ Search hits carry a compact `navigation` crumb. For adjacent/sibling/hierarchical context, call `mcp__moflo__memory_get_neighbors` (one round-trip) instead of retrieving every hit. Full protocol: `.claude/guidance/moflo-memory-protocol.md`.
24
+
21
25
  ---
22
26
 
23
27
  ## Step 2: Apply Universal Agent Rules
@@ -307,7 +307,7 @@ switch (command) {
307
307
  process.stdout.write('REMINDER: Use TaskCreate before spawning agents. Task tool is blocked until then.\n');
308
308
  }
309
309
  if (config.memory_first && s.memoryRequired && !s.memorySearched) {
310
- process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents.\n');
310
+ process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
311
311
  }
312
312
  if (s.lastNamespaceHint) {
313
313
  // Per-actor single-shot. Each session_id gets the hint at most once per
@@ -376,7 +376,7 @@ switch (command) {
376
376
  if (!s.memoryRequired || isMemorySearchedFor(s)) break;
377
377
  var target = (process.env.TOOL_INPUT_pattern || '') + ' ' + (process.env.TOOL_INPUT_path || '');
378
378
  if (EXEMPT.some(function(p) { return target.indexOf(p) >= 0; })) break;
379
- process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search.\n');
379
+ process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
380
380
  process.exit(2);
381
381
  }
382
382
  case 'check-before-read': {
@@ -386,7 +386,7 @@ switch (command) {
386
386
  var fp = process.env.TOOL_INPUT_file_path || '';
387
387
  var isGuidance = fp.indexOf('.claude/guidance/') >= 0 || fp.indexOf('.claude\\guidance\\') >= 0;
388
388
  if (!isGuidance && EXEMPT.some(function(p) { return fp.indexOf(p) >= 0; })) break;
389
- process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search.\n');
389
+ process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
390
390
  process.exit(2);
391
391
  }
392
392
  case 'record-task-created': {
@@ -1,3 +1,3 @@
1
1
  {
2
- "directive": "MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol."
2
+ "directive": "MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol. When search returns chunk hits, traverse via mcp__moflo__memory_get_neighbors before retrieving — see `.claude/guidance/moflo-memory-protocol.md`."
3
3
  }
@@ -22,7 +22,7 @@ const path = require('path');
22
22
  // Defense-in-depth copy of the canonical directive in subagent-bootstrap.json.
23
23
  // Kept as a single-line literal so the parity test in tests/bin/subagent-start.test.ts
24
24
  // can verify it matches the JSON via plain substring containment.
25
- const FALLBACK_DIRECTIVE = 'MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol.';
25
+ const FALLBACK_DIRECTIVE = 'MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol. When search returns chunk hits, traverse via mcp__moflo__memory_get_neighbors before retrieving — see `.claude/guidance/moflo-memory-protocol.md`.';
26
26
 
27
27
  function loadDirective() {
28
28
  const jsonPath = path.join(__dirname, 'subagent-bootstrap.json');
@@ -121,6 +121,14 @@ mcp__moflo__memory_stats — { namespace: "learnings" }
121
121
 
122
122
  Flag empty `learnings` as `info` (project hasn't accumulated decisions yet — fine for new projects). Flag empty `guidance` as `warn` (no indexed guidance means semantic search is degraded).
123
123
 
124
+ **Legacy `doc-*` residue (#1053 S4)** — moflo retired whole-document indexing in favor of chunk-only RAG. The `purge-doc-entries` migration runs on session-start; if any `doc-*` rows linger, the migration didn't fire (ran with no DB, errored, or the install is below the migration's introduction).
125
+
126
+ ```
127
+ mcp__moflo__memory_search — { query: "doc-", namespace: "guidance", threshold: 0, limit: 5 }
128
+ ```
129
+
130
+ If any returned `key` starts with `doc-`, flag `info`: "legacy doc-* rows present — `purge-doc-entries` migration did not run; fixable via `flo healer --fix` or manual `node node_modules/moflo/bin/run-migrations.mjs`".
131
+
124
132
  ### 1i. Hooks & MCP Wiring
125
133
 
126
134
  Read `.claude/settings.json`. Check:
package/bin/gate.cjs CHANGED
@@ -307,7 +307,7 @@ switch (command) {
307
307
  process.stdout.write('REMINDER: Use TaskCreate before spawning agents. Task tool is blocked until then.\n');
308
308
  }
309
309
  if (config.memory_first && s.memoryRequired && !s.memorySearched) {
310
- process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents.\n');
310
+ process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
311
311
  }
312
312
  if (s.lastNamespaceHint) {
313
313
  // Per-actor single-shot. Each session_id gets the hint at most once per
@@ -376,7 +376,7 @@ switch (command) {
376
376
  if (!s.memoryRequired || isMemorySearchedFor(s)) break;
377
377
  var target = (process.env.TOOL_INPUT_pattern || '') + ' ' + (process.env.TOOL_INPUT_path || '');
378
378
  if (EXEMPT.some(function(p) { return target.indexOf(p) >= 0; })) break;
379
- process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search.\n');
379
+ process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
380
380
  process.exit(2);
381
381
  }
382
382
  case 'check-before-read': {
@@ -386,7 +386,7 @@ switch (command) {
386
386
  var fp = process.env.TOOL_INPUT_file_path || '';
387
387
  var isGuidance = fp.indexOf('.claude/guidance/') >= 0 || fp.indexOf('.claude\\guidance\\') >= 0;
388
388
  if (!isGuidance && EXEMPT.some(function(p) { return fp.indexOf(p) >= 0; })) break;
389
- process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search.\n');
389
+ process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
390
390
  process.exit(2);
391
391
  }
392
392
  case 'record-task-created': {
@@ -272,6 +272,22 @@ function getEntryHash(db, key) {
272
272
  return null;
273
273
  }
274
274
 
275
+ // #1053 S4: doc-* entries retired. Doc-level skip check now reads
276
+ // docContentHash off chunk-0 (every chunk carries it).
277
+ function getDocHashFromChunkZero(db, chunkPrefix) {
278
+ const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
279
+ stmt.bind([`${chunkPrefix}-0`, NAMESPACE]);
280
+ const entry = stmt.step() ? stmt.getAsObject() : null;
281
+ stmt.free();
282
+ if (entry?.metadata) {
283
+ try {
284
+ const meta = JSON.parse(entry.metadata);
285
+ return meta.docContentHash;
286
+ } catch { /* ignore */ }
287
+ }
288
+ return null;
289
+ }
290
+
275
291
  /**
276
292
  * Extract overlapping context from adjacent text
277
293
  * @param {string} text - The text to extract from
@@ -536,9 +552,10 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
536
552
  const content = readFileSync(filePath, 'utf-8');
537
553
  const contentHash = hashContent(content);
538
554
 
539
- // Check if content changed (skip if same hash unless --force)
555
+ // Check if content changed (skip if same hash unless --force).
556
+ // #1053 S4: doc-* retired — read docContentHash off chunk-0 instead.
540
557
  if (!force) {
541
- const existingHash = getEntryHash(db, docKey);
558
+ const existingHash = getDocHashFromChunkZero(db, chunkPrefix);
542
559
  if (existingHash === contentHash) {
543
560
  return { docKey, status: 'unchanged', chunks: 0 };
544
561
  }
@@ -547,25 +564,19 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
547
564
  const stats = statSync(filePath);
548
565
  const relativePath = '/' + relative(projectRoot, filePath).replace(/\\/g, '/');
549
566
 
550
- // Delete old chunks for this file before re-indexing
567
+ // Delete old chunks for this file before re-indexing.
568
+ // #1053 S4: also delete any legacy doc-* row (one-time cleanup if a
569
+ // pre-S4 install left one behind for this prefix).
551
570
  deleteByPrefix(db, chunkPrefix);
571
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, docKey]);
552
572
 
553
- // 1. Store full document
554
- const docMetadata = {
555
- ...extraMetadata,
556
- type: 'document',
557
- filePath: relativePath,
558
- fileSize: stats.size,
559
- lastModified: stats.mtime.toISOString(),
560
- contentHash,
561
- indexedAt: new Date().toISOString(),
562
- ragVersion: '2.0', // Mark as full RAG indexed
563
- };
564
-
565
- storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document', ...extraTags]);
566
- debug(`Stored document: ${docKey}`);
573
+ // #1053 S4: Chunker no longer writes doc-* entries. Audit found zero
574
+ // production readers — they duplicated chunk semantic territory and
575
+ // ate ~13% of search slots without unique signal. parentDoc on chunks
576
+ // remains as an identifier/grouping label; callers needing the source
577
+ // file Read parentPath, per shipped/moflo-memory-protocol.md.
567
578
 
568
- // 2. Chunk and store semantic pieces with full RAG linking
579
+ // Chunk and store semantic pieces with full RAG linking
569
580
  const chunks = chunkMarkdown(content, fileName);
570
581
 
571
582
  if (chunks.length === 0) {
@@ -576,14 +587,6 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
576
587
  const hierarchy = buildHierarchy(chunks, chunkPrefix);
577
588
  const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
578
589
 
579
- // Update document with children references
580
- const docChildrenMeta = {
581
- ...docMetadata,
582
- children: siblings,
583
- chunkCount: chunks.length,
584
- };
585
- storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document', ...extraTags]);
586
-
587
590
  for (let i = 0; i < chunks.length; i++) {
588
591
  const chunk = chunks[i];
589
592
  const chunkKey = `${chunkPrefix}-${i}`;
@@ -592,13 +595,11 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
592
595
  const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
593
596
  const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
594
597
 
595
- // Extract overlapping context from adjacent chunks
596
- const contextBefore = i > 0
597
- ? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
598
- : null;
599
- const contextAfter = i < chunks.length - 1
600
- ? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
601
- : null;
598
+ // #1053 S5: dropped extractOverlapContext + preamble wrapping. The
599
+ // preambles were a workaround for missing traversal — once memory_get_neighbors
600
+ // is wired (S2), prevChunk/nextChunk metadata + a real call is the
601
+ // alternative path. Saved ~25-30% bloat per chunk on disk and in
602
+ // embeddings.
602
603
 
603
604
  // Get hierarchical relationships
604
605
  const hierInfo = hierarchy[chunkKey];
@@ -608,9 +609,13 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
608
609
  type: 'chunk',
609
610
  ragVersion: '2.0',
610
611
 
611
- // Document relationship
612
+ // Document relationship — parentDoc is an identifier/grouping label
613
+ // only after #1053 S4; the actual source doc is at parentPath.
614
+ // docContentHash is the file-level hash, used by the skip-if-unchanged
615
+ // check (the chunker reads it off chunk-0 to decide whether to re-index).
612
616
  parentDoc: docKey,
613
617
  parentPath: relativePath,
618
+ docContentHash: contentHash,
614
619
 
615
620
  // Sequential navigation (forward/backward links)
616
621
  chunkIndex: i,
@@ -632,30 +637,14 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
632
637
  isPart: chunk.isPart || false,
633
638
  partNum: chunk.partNum || null,
634
639
 
635
- // Overlapping context for continuity
636
- contextOverlapPercent: overlapPercent,
637
- hasContextBefore: !!contextBefore,
638
- hasContextAfter: !!contextAfter,
639
-
640
640
  // Content metadata
641
641
  contentLength: chunk.content.length,
642
642
  contentHash: hashContent(chunk.content),
643
643
  indexedAt: new Date().toISOString(),
644
644
  };
645
645
 
646
- // Build searchable content with title context
647
- // Include overlap context for better retrieval
648
- let searchableContent = `# ${chunk.title}\n\n`;
649
-
650
- if (contextBefore) {
651
- searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
652
- }
653
-
654
- searchableContent += chunk.content;
655
-
656
- if (contextAfter) {
657
- searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
658
- }
646
+ // #1053 S5: title heading + chunk body. No prev/next preamble.
647
+ const searchableContent = `# ${chunk.title}\n\n${chunk.content}`;
659
648
 
660
649
  // Store chunk with full metadata
661
650
  storeEntry(
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Migration: hard-delete every legacy `doc-*` whole-document entry from the
3
+ * guidance namespace. The chunker no longer writes these (#1053 S4) — audit
4
+ * found zero production readers, they duplicated chunk semantic territory,
5
+ * and they ate ~13% of search slots on every query without unique signal.
6
+ *
7
+ * Idempotent: re-runs are no-ops because there will be no `doc-*` rows left.
8
+ *
9
+ * @module bin/migrations/purge-doc-entries
10
+ */
11
+
12
+ import { existsSync, readFileSync, writeFileSync } from 'fs';
13
+ import { mofloResolveURL } from '../lib/moflo-resolve.mjs';
14
+ import { memoryDbPath } from '../lib/moflo-paths.mjs';
15
+
16
+ export const name = 'purge-doc-entries';
17
+
18
+ /**
19
+ * @param {string} projectRoot
20
+ * @returns {Promise<{purged:number}>}
21
+ */
22
+ export async function run(projectRoot) {
23
+ const dbPath = memoryDbPath(projectRoot);
24
+ if (!existsSync(dbPath)) return { purged: 0 };
25
+
26
+ // Lazy-load sql.js — keeps the manifest-stamped no-op path off the WASM
27
+ // init cost (~30ms cold).
28
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
29
+ const SQL = await initSqlJs();
30
+ const db = new SQL.Database(readFileSync(dbPath));
31
+
32
+ // Scope: every namespace, since both `flo memory index-guidance` and
33
+ // `bin/index-guidance.mjs` historically wrote doc-* across whatever
34
+ // namespace the entry was scoped to (default for guidance: `guidance`).
35
+ // Conservative — match the prefix only, never sweep user-stored keys
36
+ // that happen to start with "doc".
37
+ const countStmt = db.prepare(`SELECT COUNT(*) AS cnt FROM memory_entries WHERE key LIKE 'doc-%'`);
38
+ countStmt.step();
39
+ const beforeCount = Number(countStmt.getAsObject().cnt ?? 0);
40
+ countStmt.free();
41
+
42
+ if (beforeCount === 0) {
43
+ db.close();
44
+ return { purged: 0 };
45
+ }
46
+
47
+ db.run(`DELETE FROM memory_entries WHERE key LIKE 'doc-%'`);
48
+ const purged = db.getRowsModified?.() ?? beforeCount;
49
+
50
+ if (purged > 0) writeFileSync(dbPath, Buffer.from(db.export()));
51
+ db.close();
52
+ return { purged };
53
+ }
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Migration: strip the legacy `[Context from previous section:]` /
3
+ * `[Context from next section:]` preamble blocks from every existing chunk
4
+ * (#1053 S5). The chunker no longer writes them — they were a workaround for
5
+ * missing traversal, and once memory_get_neighbors is wired (S2),
6
+ * prevChunk/nextChunk metadata + a real call is the alternative path.
7
+ *
8
+ * For every chunk whose content carries a preamble marker:
9
+ * 1. Strip the preamble block(s) in place
10
+ * 2. NULL the embedding column so build-embeddings regenerates it from the
11
+ * cleaned content on the next indexer pass
12
+ *
13
+ * Idempotent: chunks already in the new shape (no preamble markers) are
14
+ * untouched.
15
+ *
16
+ * @module bin/migrations/strip-context-preambles
17
+ */
18
+
19
+ import { existsSync, readFileSync, writeFileSync } from 'fs';
20
+ import { mofloResolveURL } from '../lib/moflo-resolve.mjs';
21
+ import { memoryDbPath } from '../lib/moflo-paths.mjs';
22
+
23
+ export const name = 'strip-context-preambles';
24
+ // Run after purge-doc-entries (which itself has order=0 default). Explicit
25
+ // ordering keeps this independent of fs sort order.
26
+ export const order = 20;
27
+
28
+ // Validated against real chunks; the back-to-back `---` runs that earlier
29
+ // drafts mishandled are absorbed by the trailing `(?:---\n\n)*` / leading
30
+ // `(?:\n\n---)+` greediness.
31
+ const PREV_PREAMBLE = /\[Context from previous section:\][\s\S]*?\n\n---\n\n(?:---\n\n)*/g;
32
+ const NEXT_PREAMBLE = /(?:\n\n---)+\n\n\[Context from next section:\][\s\S]*$/g;
33
+
34
+ function strip(content) {
35
+ // Reset lastIndex defensively — global regex state can leak across calls
36
+ // when reused on a hot path.
37
+ PREV_PREAMBLE.lastIndex = 0;
38
+ NEXT_PREAMBLE.lastIndex = 0;
39
+ return content.replace(PREV_PREAMBLE, '').replace(NEXT_PREAMBLE, '');
40
+ }
41
+
42
+ /**
43
+ * @param {string} projectRoot
44
+ * @returns {Promise<{stripped:number, untouched:number}>}
45
+ */
46
+ export async function run(projectRoot) {
47
+ const dbPath = memoryDbPath(projectRoot);
48
+ if (!existsSync(dbPath)) return { stripped: 0, untouched: 0 };
49
+
50
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
51
+ const SQL = await initSqlJs();
52
+ const db = new SQL.Database(readFileSync(dbPath));
53
+
54
+ // Only chunks can carry the preamble — the chunker is the only writer of
55
+ // those markers. Filter on key prefix to keep the LIKE selective; manual
56
+ // memory entries containing the literal string are extremely unlikely and
57
+ // the strip is a no-op for them anyway.
58
+ const stmt = db.prepare(
59
+ `SELECT id, content FROM memory_entries WHERE key LIKE 'chunk-%' AND status = 'active'`,
60
+ );
61
+ const rows = [];
62
+ while (stmt.step()) rows.push(stmt.getAsObject());
63
+ stmt.free();
64
+
65
+ if (rows.length === 0) {
66
+ db.close();
67
+ return { stripped: 0, untouched: 0 };
68
+ }
69
+
70
+ let stripped = 0;
71
+ let untouched = 0;
72
+ const update = db.prepare(`UPDATE memory_entries SET content = ?, embedding = NULL WHERE id = ?`);
73
+ try {
74
+ for (const row of rows) {
75
+ const original = String(row.content || '');
76
+ // Cheap prefix-check to avoid running the regex on chunks that have no
77
+ // preamble — covers the common idempotent re-run case in O(1).
78
+ if (!original.includes('[Context from previous section:]') && !original.includes('[Context from next section:]')) {
79
+ untouched++;
80
+ continue;
81
+ }
82
+ const cleaned = strip(original);
83
+ if (cleaned === original) {
84
+ untouched++;
85
+ continue;
86
+ }
87
+ update.run([cleaned, row.id]);
88
+ stripped++;
89
+ }
90
+ } finally {
91
+ update.free();
92
+ }
93
+
94
+ if (stripped > 0) writeFileSync(dbPath, Buffer.from(db.export()));
95
+ db.close();
96
+ return { stripped, untouched };
97
+ }
@@ -164,6 +164,7 @@ async function semanticSearch(queryText, options = {}) {
164
164
  preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
165
165
  type: metadata.type || 'unknown',
166
166
  parentDoc: metadata.parentDoc || null,
167
+ parentPath: metadata.parentPath || null,
167
168
  chunkTitle: metadata.chunkTitle || null,
168
169
  });
169
170
  } catch (err) {
@@ -262,7 +263,9 @@ async function main() {
262
263
  console.log(` Key: ${top.key}`);
263
264
  console.log(` Score: ${top.score.toFixed(4)}`);
264
265
  if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
265
- if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
266
+ // #1053 S4: doc-* retired — parentPath is the actionable source location.
267
+ if (top.parentPath) console.log(` Parent: ${top.parentPath}`);
268
+ else if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
266
269
  console.log(` Preview: ${top.preview}...`);
267
270
  } catch (err) {
268
271
  console.error(`[semantic-search] Error: ${err.message}`);
@@ -1605,6 +1605,8 @@ function runMigrationsAndAnnounce(runnerPath) {
1605
1605
  const labels = {
1606
1606
  'knowledge-to-learnings': 'consolidated knowledge → learnings',
1607
1607
  'knowledge-purge': 'removed legacy knowledge namespace rows',
1608
+ 'purge-doc-entries': 'pruned legacy doc-* rows (chunk-only RAG, #1053)',
1609
+ 'strip-context-preambles': 'stripped chunk preambles; embeddings will rebuild on next index pass (#1053)',
1608
1610
  };
1609
1611
 
1610
1612
  for (const line of raw.split('\n')) {