moflo 4.9.35 → 4.9.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/guidance/shipped/moflo-agent-rules.md +12 -0
- package/.claude/guidance/shipped/moflo-memory-protocol.md +30 -0
- package/.claude/guidance/shipped/moflo-subagents.md +4 -0
- package/.claude/helpers/gate.cjs +3 -3
- package/.claude/helpers/subagent-bootstrap.json +1 -1
- package/.claude/helpers/subagent-start.cjs +1 -1
- package/.claude/skills/eldar/SKILL.md +8 -0
- package/bin/gate.cjs +3 -3
- package/bin/index-guidance.mjs +41 -52
- package/bin/migrations/purge-doc-entries.mjs +53 -0
- package/bin/migrations/strip-context-preambles.mjs +97 -0
- package/bin/semantic-search.mjs +4 -1
- package/bin/session-start-launcher.mjs +2 -0
- package/dist/src/cli/commands/doctor-checks-memory-access.js +179 -0
- package/dist/src/cli/commands/memory.js +41 -52
- package/dist/src/cli/init/claudemd-generator.js +4 -0
- package/dist/src/cli/init/moflo-init.js +13 -5
- package/dist/src/cli/mcp-tools/memory-tools.js +169 -31
- package/dist/src/cli/memory/auto-memory-bridge.js +8 -11
- package/dist/src/cli/memory/bridge-entries.js +6 -2
- package/dist/src/cli/memory/memory-initializer.js +17 -11
- package/dist/src/cli/services/claude-stats.js +2 -16
- package/dist/src/cli/services/subagent-bootstrap.js +1 -1
- package/dist/src/cli/shared/utils/claude-projects-path.js +32 -0
- package/dist/src/cli/version.js +1 -1
- package/package.json +2 -2
|
@@ -24,6 +24,18 @@
|
|
|
24
24
|
|
|
25
25
|
**Search `tests` when looking for test coverage** of a function, module, or behavior — it indexes the test tree separately so you can pinpoint specs without grepping the whole repo.
|
|
26
26
|
|
|
27
|
+
### Traverse Chunks, Don't Bulk-Retrieve
|
|
28
|
+
|
|
29
|
+
Search returns chunked guidance with a compact `navigation` crumb (`parentDoc`, `prevChunk`, `nextChunk`, `chunkTitle`). Use it:
|
|
30
|
+
|
|
31
|
+
| Want | Use |
|
|
32
|
+
|------|-----|
|
|
33
|
+
| Adjacent / sibling / hierarchical context | `mcp__moflo__memory_get_neighbors` |
|
|
34
|
+
| Full content of one chunk | `mcp__moflo__memory_retrieve` (returns full nav for further traversal) |
|
|
35
|
+
| Whole source doc | `Read` `parentPath` from any chunk's nav |
|
|
36
|
+
|
|
37
|
+
Full protocol: `.claude/guidance/moflo-memory-protocol.md`. Don't retrieve every search hit blindly.
|
|
38
|
+
|
|
27
39
|
### Tool Selection (MCP-first)
|
|
28
40
|
|
|
29
41
|
| Tool | Purpose |
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# MoFlo Memory Protocol — Search, Traverse, Retrieve
|
|
2
|
+
|
|
3
|
+
**Purpose:** How to use moflo's chunked memory effectively. Search returns navigable chunks — traverse the chunk graph, do not bulk-retrieve every hit.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Decision Table
|
|
8
|
+
|
|
9
|
+
| You want | Use | Why |
|
|
10
|
+
|----------|-----|-----|
|
|
11
|
+
| Find an entry-point | `mcp__moflo__memory_search` | Returns chunk hits with compact `navigation` (parentDoc, prev/next, chunkTitle) |
|
|
12
|
+
| Adjacent context (1 chunk over) | `mcp__moflo__memory_get_neighbors` `{ key, include: ['prev','next'] }` | One round-trip, returns shaped entries with full nav |
|
|
13
|
+
| Same-section peers (h2/h3 family) | `memory_get_neighbors` `{ include: ['siblings'] }` or `['parent','children']` | Hierarchical traversal — cheaper than re-searching |
|
|
14
|
+
| Full content of one chunk | `mcp__moflo__memory_retrieve` `{ key }` | Returns full nav object for further traversal |
|
|
15
|
+
| Whole source doc when truly needed | `Read` `parentPath` from any chunk's nav | Disk read is cheaper than re-indexed `doc-*` |
|
|
16
|
+
|
|
17
|
+
## Anti-Patterns
|
|
18
|
+
|
|
19
|
+
| Don't | Do instead |
|
|
20
|
+
|-------|-----------|
|
|
21
|
+
| Retrieve every search hit blindly | Read the search snippet + `navigation`; retrieve or traverse only the chunks you need |
|
|
22
|
+
| Open the source file when a chunk would do | Stay in the chunk graph; `Read` `parentPath` only for the rare full-doc case |
|
|
23
|
+
| Search again for a key you already have | `memory_retrieve` or `memory_get_neighbors` directly |
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## See Also
|
|
28
|
+
|
|
29
|
+
- `.claude/guidance/moflo-agent-rules.md` § Memory-First Protocol — namespaces, query examples, MCP-first tool selection
|
|
30
|
+
- `.claude/guidance/moflo-memory-strategy.md` — How chunking, embeddings, and the RAG index work
|
|
@@ -18,6 +18,10 @@ CLI fallback when MCP is unavailable: `npx flo memory search --query "..." --nam
|
|
|
18
18
|
|
|
19
19
|
The full namespace reference, query examples by domain, and tool catalog live in `.claude/guidance/moflo-agent-rules.md` § Memory-First Protocol — read that next.
|
|
20
20
|
|
|
21
|
+
### Traverse, don't bulk-retrieve
|
|
22
|
+
|
|
23
|
+
Search hits carry a compact `navigation` crumb. For adjacent/sibling/hierarchical context, call `mcp__moflo__memory_get_neighbors` (one round-trip) instead of retrieving every hit. Full protocol: `.claude/guidance/moflo-memory-protocol.md`.
|
|
24
|
+
|
|
21
25
|
---
|
|
22
26
|
|
|
23
27
|
## Step 2: Apply Universal Agent Rules
|
package/.claude/helpers/gate.cjs
CHANGED
|
@@ -307,7 +307,7 @@ switch (command) {
|
|
|
307
307
|
process.stdout.write('REMINDER: Use TaskCreate before spawning agents. Task tool is blocked until then.\n');
|
|
308
308
|
}
|
|
309
309
|
if (config.memory_first && s.memoryRequired && !s.memorySearched) {
|
|
310
|
-
process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents
|
|
310
|
+
process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
|
|
311
311
|
}
|
|
312
312
|
if (s.lastNamespaceHint) {
|
|
313
313
|
// Per-actor single-shot. Each session_id gets the hint at most once per
|
|
@@ -376,7 +376,7 @@ switch (command) {
|
|
|
376
376
|
if (!s.memoryRequired || isMemorySearchedFor(s)) break;
|
|
377
377
|
var target = (process.env.TOOL_INPUT_pattern || '') + ' ' + (process.env.TOOL_INPUT_path || '');
|
|
378
378
|
if (EXEMPT.some(function(p) { return target.indexOf(p) >= 0; })) break;
|
|
379
|
-
process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search
|
|
379
|
+
process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
|
|
380
380
|
process.exit(2);
|
|
381
381
|
}
|
|
382
382
|
case 'check-before-read': {
|
|
@@ -386,7 +386,7 @@ switch (command) {
|
|
|
386
386
|
var fp = process.env.TOOL_INPUT_file_path || '';
|
|
387
387
|
var isGuidance = fp.indexOf('.claude/guidance/') >= 0 || fp.indexOf('.claude\\guidance\\') >= 0;
|
|
388
388
|
if (!isGuidance && EXEMPT.some(function(p) { return fp.indexOf(p) >= 0; })) break;
|
|
389
|
-
process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search
|
|
389
|
+
process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
|
|
390
390
|
process.exit(2);
|
|
391
391
|
}
|
|
392
392
|
case 'record-task-created': {
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
{
|
|
2
|
-
"directive": "MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol."
|
|
2
|
+
"directive": "MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol. When search returns chunk hits, traverse via mcp__moflo__memory_get_neighbors before retrieving — see `.claude/guidance/moflo-memory-protocol.md`."
|
|
3
3
|
}
|
|
@@ -22,7 +22,7 @@ const path = require('path');
|
|
|
22
22
|
// Defense-in-depth copy of the canonical directive in subagent-bootstrap.json.
|
|
23
23
|
// Kept as a single-line literal so the parity test in tests/bin/subagent-start.test.ts
|
|
24
24
|
// can verify it matches the JSON via plain substring containment.
|
|
25
|
-
const FALLBACK_DIRECTIVE = 'MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol.';
|
|
25
|
+
const FALLBACK_DIRECTIVE = 'MANDATORY FIRST ACTION: Your very first tool call MUST be mcp__moflo__memory_search (any query, any namespace). The memory-first gate WILL BLOCK all Glob, Grep, and Read calls until you do this. After memory search, follow `.claude/guidance/moflo-subagents.md` protocol. When search returns chunk hits, traverse via mcp__moflo__memory_get_neighbors before retrieving — see `.claude/guidance/moflo-memory-protocol.md`.';
|
|
26
26
|
|
|
27
27
|
function loadDirective() {
|
|
28
28
|
const jsonPath = path.join(__dirname, 'subagent-bootstrap.json');
|
|
@@ -121,6 +121,14 @@ mcp__moflo__memory_stats — { namespace: "learnings" }
|
|
|
121
121
|
|
|
122
122
|
Flag empty `learnings` as `info` (project hasn't accumulated decisions yet — fine for new projects). Flag empty `guidance` as `warn` (no indexed guidance means semantic search is degraded).
|
|
123
123
|
|
|
124
|
+
**Legacy `doc-*` residue (#1053 S4)** — moflo retired whole-document indexing in favor of chunk-only RAG. The `purge-doc-entries` migration runs on session-start; if any `doc-*` rows linger, the migration didn't fire (ran with no DB, errored, or the install is below the migration's introduction).
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
mcp__moflo__memory_search — { query: "doc-", namespace: "guidance", threshold: 0, limit: 5 }
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
If any returned `key` starts with `doc-`, flag `info`: "legacy doc-* rows present — `purge-doc-entries` migration did not run; fixable via `flo healer --fix` or manual `node node_modules/moflo/bin/run-migrations.mjs`".
|
|
131
|
+
|
|
124
132
|
### 1i. Hooks & MCP Wiring
|
|
125
133
|
|
|
126
134
|
Read `.claude/settings.json`. Check:
|
package/bin/gate.cjs
CHANGED
|
@@ -307,7 +307,7 @@ switch (command) {
|
|
|
307
307
|
process.stdout.write('REMINDER: Use TaskCreate before spawning agents. Task tool is blocked until then.\n');
|
|
308
308
|
}
|
|
309
309
|
if (config.memory_first && s.memoryRequired && !s.memorySearched) {
|
|
310
|
-
process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents
|
|
310
|
+
process.stdout.write('REMINDER: Search memory (mcp__moflo__memory_search) before spawning agents. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
|
|
311
311
|
}
|
|
312
312
|
if (s.lastNamespaceHint) {
|
|
313
313
|
// Per-actor single-shot. Each session_id gets the hint at most once per
|
|
@@ -376,7 +376,7 @@ switch (command) {
|
|
|
376
376
|
if (!s.memoryRequired || isMemorySearchedFor(s)) break;
|
|
377
377
|
var target = (process.env.TOOL_INPUT_pattern || '') + ' ' + (process.env.TOOL_INPUT_path || '');
|
|
378
378
|
if (EXEMPT.some(function(p) { return target.indexOf(p) >= 0; })) break;
|
|
379
|
-
process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search
|
|
379
|
+
process.stderr.write('BLOCKED: Search memory before exploring files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
|
|
380
380
|
process.exit(2);
|
|
381
381
|
}
|
|
382
382
|
case 'check-before-read': {
|
|
@@ -386,7 +386,7 @@ switch (command) {
|
|
|
386
386
|
var fp = process.env.TOOL_INPUT_file_path || '';
|
|
387
387
|
var isGuidance = fp.indexOf('.claude/guidance/') >= 0 || fp.indexOf('.claude\\guidance\\') >= 0;
|
|
388
388
|
if (!isGuidance && EXEMPT.some(function(p) { return fp.indexOf(p) >= 0; })) break;
|
|
389
|
-
process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search
|
|
389
|
+
process.stderr.write('BLOCKED: Search memory before reading files. Use mcp__moflo__memory_search. On chunk hits, traverse via mcp__moflo__memory_get_neighbors — see .claude/guidance/moflo-memory-protocol.md\n');
|
|
390
390
|
process.exit(2);
|
|
391
391
|
}
|
|
392
392
|
case 'record-task-created': {
|
package/bin/index-guidance.mjs
CHANGED
|
@@ -272,6 +272,22 @@ function getEntryHash(db, key) {
|
|
|
272
272
|
return null;
|
|
273
273
|
}
|
|
274
274
|
|
|
275
|
+
// #1053 S4: doc-* entries retired. Doc-level skip check now reads
|
|
276
|
+
// docContentHash off chunk-0 (every chunk carries it).
|
|
277
|
+
function getDocHashFromChunkZero(db, chunkPrefix) {
|
|
278
|
+
const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
|
|
279
|
+
stmt.bind([`${chunkPrefix}-0`, NAMESPACE]);
|
|
280
|
+
const entry = stmt.step() ? stmt.getAsObject() : null;
|
|
281
|
+
stmt.free();
|
|
282
|
+
if (entry?.metadata) {
|
|
283
|
+
try {
|
|
284
|
+
const meta = JSON.parse(entry.metadata);
|
|
285
|
+
return meta.docContentHash;
|
|
286
|
+
} catch { /* ignore */ }
|
|
287
|
+
}
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
|
|
275
291
|
/**
|
|
276
292
|
* Extract overlapping context from adjacent text
|
|
277
293
|
* @param {string} text - The text to extract from
|
|
@@ -536,9 +552,10 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
|
|
|
536
552
|
const content = readFileSync(filePath, 'utf-8');
|
|
537
553
|
const contentHash = hashContent(content);
|
|
538
554
|
|
|
539
|
-
// Check if content changed (skip if same hash unless --force)
|
|
555
|
+
// Check if content changed (skip if same hash unless --force).
|
|
556
|
+
// #1053 S4: doc-* retired — read docContentHash off chunk-0 instead.
|
|
540
557
|
if (!force) {
|
|
541
|
-
const existingHash =
|
|
558
|
+
const existingHash = getDocHashFromChunkZero(db, chunkPrefix);
|
|
542
559
|
if (existingHash === contentHash) {
|
|
543
560
|
return { docKey, status: 'unchanged', chunks: 0 };
|
|
544
561
|
}
|
|
@@ -547,25 +564,19 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
|
|
|
547
564
|
const stats = statSync(filePath);
|
|
548
565
|
const relativePath = '/' + relative(projectRoot, filePath).replace(/\\/g, '/');
|
|
549
566
|
|
|
550
|
-
// Delete old chunks for this file before re-indexing
|
|
567
|
+
// Delete old chunks for this file before re-indexing.
|
|
568
|
+
// #1053 S4: also delete any legacy doc-* row (one-time cleanup if a
|
|
569
|
+
// pre-S4 install left one behind for this prefix).
|
|
551
570
|
deleteByPrefix(db, chunkPrefix);
|
|
571
|
+
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, docKey]);
|
|
552
572
|
|
|
553
|
-
//
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
fileSize: stats.size,
|
|
559
|
-
lastModified: stats.mtime.toISOString(),
|
|
560
|
-
contentHash,
|
|
561
|
-
indexedAt: new Date().toISOString(),
|
|
562
|
-
ragVersion: '2.0', // Mark as full RAG indexed
|
|
563
|
-
};
|
|
564
|
-
|
|
565
|
-
storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document', ...extraTags]);
|
|
566
|
-
debug(`Stored document: ${docKey}`);
|
|
573
|
+
// #1053 S4: Chunker no longer writes doc-* entries. Audit found zero
|
|
574
|
+
// production readers — they duplicated chunk semantic territory and
|
|
575
|
+
// ate ~13% of search slots without unique signal. parentDoc on chunks
|
|
576
|
+
// remains as an identifier/grouping label; callers needing the source
|
|
577
|
+
// file Read parentPath, per shipped/moflo-memory-protocol.md.
|
|
567
578
|
|
|
568
|
-
//
|
|
579
|
+
// Chunk and store semantic pieces with full RAG linking
|
|
569
580
|
const chunks = chunkMarkdown(content, fileName);
|
|
570
581
|
|
|
571
582
|
if (chunks.length === 0) {
|
|
@@ -576,14 +587,6 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
|
|
|
576
587
|
const hierarchy = buildHierarchy(chunks, chunkPrefix);
|
|
577
588
|
const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
|
|
578
589
|
|
|
579
|
-
// Update document with children references
|
|
580
|
-
const docChildrenMeta = {
|
|
581
|
-
...docMetadata,
|
|
582
|
-
children: siblings,
|
|
583
|
-
chunkCount: chunks.length,
|
|
584
|
-
};
|
|
585
|
-
storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document', ...extraTags]);
|
|
586
|
-
|
|
587
590
|
for (let i = 0; i < chunks.length; i++) {
|
|
588
591
|
const chunk = chunks[i];
|
|
589
592
|
const chunkKey = `${chunkPrefix}-${i}`;
|
|
@@ -592,13 +595,11 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
|
|
|
592
595
|
const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
|
|
593
596
|
const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
|
|
594
597
|
|
|
595
|
-
//
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
|
|
601
|
-
: null;
|
|
598
|
+
// #1053 S5: dropped extractOverlapContext + preamble wrapping. The
|
|
599
|
+
// preambles were a workaround for missing traversal — once memory_get_neighbors
|
|
600
|
+
// is wired (S2), prevChunk/nextChunk metadata + a real call is the
|
|
601
|
+
// alternative path. Saved ~25-30% bloat per chunk on disk and in
|
|
602
|
+
// embeddings.
|
|
602
603
|
|
|
603
604
|
// Get hierarchical relationships
|
|
604
605
|
const hierInfo = hierarchy[chunkKey];
|
|
@@ -608,9 +609,13 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
|
|
|
608
609
|
type: 'chunk',
|
|
609
610
|
ragVersion: '2.0',
|
|
610
611
|
|
|
611
|
-
// Document relationship
|
|
612
|
+
// Document relationship — parentDoc is an identifier/grouping label
|
|
613
|
+
// only after #1053 S4; the actual source doc is at parentPath.
|
|
614
|
+
// docContentHash is the file-level hash, used by the skip-if-unchanged
|
|
615
|
+
// check (the chunker reads it off chunk-0 to decide whether to re-index).
|
|
612
616
|
parentDoc: docKey,
|
|
613
617
|
parentPath: relativePath,
|
|
618
|
+
docContentHash: contentHash,
|
|
614
619
|
|
|
615
620
|
// Sequential navigation (forward/backward links)
|
|
616
621
|
chunkIndex: i,
|
|
@@ -632,30 +637,14 @@ function indexFile(db, filePath, keyPrefix, options = {}) {
|
|
|
632
637
|
isPart: chunk.isPart || false,
|
|
633
638
|
partNum: chunk.partNum || null,
|
|
634
639
|
|
|
635
|
-
// Overlapping context for continuity
|
|
636
|
-
contextOverlapPercent: overlapPercent,
|
|
637
|
-
hasContextBefore: !!contextBefore,
|
|
638
|
-
hasContextAfter: !!contextAfter,
|
|
639
|
-
|
|
640
640
|
// Content metadata
|
|
641
641
|
contentLength: chunk.content.length,
|
|
642
642
|
contentHash: hashContent(chunk.content),
|
|
643
643
|
indexedAt: new Date().toISOString(),
|
|
644
644
|
};
|
|
645
645
|
|
|
646
|
-
//
|
|
647
|
-
|
|
648
|
-
let searchableContent = `# ${chunk.title}\n\n`;
|
|
649
|
-
|
|
650
|
-
if (contextBefore) {
|
|
651
|
-
searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
searchableContent += chunk.content;
|
|
655
|
-
|
|
656
|
-
if (contextAfter) {
|
|
657
|
-
searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
|
|
658
|
-
}
|
|
646
|
+
// #1053 S5: title heading + chunk body. No prev/next preamble.
|
|
647
|
+
const searchableContent = `# ${chunk.title}\n\n${chunk.content}`;
|
|
659
648
|
|
|
660
649
|
// Store chunk with full metadata
|
|
661
650
|
storeEntry(
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration: hard-delete every legacy `doc-*` whole-document entry from the
|
|
3
|
+
* guidance namespace. The chunker no longer writes these (#1053 S4) — audit
|
|
4
|
+
* found zero production readers, they duplicated chunk semantic territory,
|
|
5
|
+
* and they ate ~13% of search slots on every query without unique signal.
|
|
6
|
+
*
|
|
7
|
+
* Idempotent: re-runs are no-ops because there will be no `doc-*` rows left.
|
|
8
|
+
*
|
|
9
|
+
* @module bin/migrations/purge-doc-entries
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { existsSync, readFileSync, writeFileSync } from 'fs';
|
|
13
|
+
import { mofloResolveURL } from '../lib/moflo-resolve.mjs';
|
|
14
|
+
import { memoryDbPath } from '../lib/moflo-paths.mjs';
|
|
15
|
+
|
|
16
|
+
export const name = 'purge-doc-entries';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @param {string} projectRoot
|
|
20
|
+
* @returns {Promise<{purged:number}>}
|
|
21
|
+
*/
|
|
22
|
+
export async function run(projectRoot) {
|
|
23
|
+
const dbPath = memoryDbPath(projectRoot);
|
|
24
|
+
if (!existsSync(dbPath)) return { purged: 0 };
|
|
25
|
+
|
|
26
|
+
// Lazy-load sql.js — keeps the manifest-stamped no-op path off the WASM
|
|
27
|
+
// init cost (~30ms cold).
|
|
28
|
+
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
29
|
+
const SQL = await initSqlJs();
|
|
30
|
+
const db = new SQL.Database(readFileSync(dbPath));
|
|
31
|
+
|
|
32
|
+
// Scope: every namespace, since both `flo memory index-guidance` and
|
|
33
|
+
// `bin/index-guidance.mjs` historically wrote doc-* across whatever
|
|
34
|
+
// namespace the entry was scoped to (default for guidance: `guidance`).
|
|
35
|
+
// Conservative — match the prefix only, never sweep user-stored keys
|
|
36
|
+
// that happen to start with "doc".
|
|
37
|
+
const countStmt = db.prepare(`SELECT COUNT(*) AS cnt FROM memory_entries WHERE key LIKE 'doc-%'`);
|
|
38
|
+
countStmt.step();
|
|
39
|
+
const beforeCount = Number(countStmt.getAsObject().cnt ?? 0);
|
|
40
|
+
countStmt.free();
|
|
41
|
+
|
|
42
|
+
if (beforeCount === 0) {
|
|
43
|
+
db.close();
|
|
44
|
+
return { purged: 0 };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
db.run(`DELETE FROM memory_entries WHERE key LIKE 'doc-%'`);
|
|
48
|
+
const purged = db.getRowsModified?.() ?? beforeCount;
|
|
49
|
+
|
|
50
|
+
if (purged > 0) writeFileSync(dbPath, Buffer.from(db.export()));
|
|
51
|
+
db.close();
|
|
52
|
+
return { purged };
|
|
53
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration: strip the legacy `[Context from previous section:]` /
|
|
3
|
+
* `[Context from next section:]` preamble blocks from every existing chunk
|
|
4
|
+
* (#1053 S5). The chunker no longer writes them — they were a workaround for
|
|
5
|
+
* missing traversal, and once memory_get_neighbors is wired (S2),
|
|
6
|
+
* prevChunk/nextChunk metadata + a real call is the alternative path.
|
|
7
|
+
*
|
|
8
|
+
* For every chunk whose content carries a preamble marker:
|
|
9
|
+
* 1. Strip the preamble block(s) in place
|
|
10
|
+
* 2. NULL the embedding column so build-embeddings regenerates it from the
|
|
11
|
+
* cleaned content on the next indexer pass
|
|
12
|
+
*
|
|
13
|
+
* Idempotent: chunks already in the new shape (no preamble markers) are
|
|
14
|
+
* untouched.
|
|
15
|
+
*
|
|
16
|
+
* @module bin/migrations/strip-context-preambles
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { existsSync, readFileSync, writeFileSync } from 'fs';
|
|
20
|
+
import { mofloResolveURL } from '../lib/moflo-resolve.mjs';
|
|
21
|
+
import { memoryDbPath } from '../lib/moflo-paths.mjs';
|
|
22
|
+
|
|
23
|
+
export const name = 'strip-context-preambles';
|
|
24
|
+
// Run after purge-doc-entries (which itself has order=0 default). Explicit
|
|
25
|
+
// ordering keeps this independent of fs sort order.
|
|
26
|
+
export const order = 20;
|
|
27
|
+
|
|
28
|
+
// Validated against real chunks; the back-to-back `---` runs that earlier
|
|
29
|
+
// drafts mishandled are absorbed by the trailing `(?:---\n\n)*` / leading
|
|
30
|
+
// `(?:\n\n---)+` greediness.
|
|
31
|
+
const PREV_PREAMBLE = /\[Context from previous section:\][\s\S]*?\n\n---\n\n(?:---\n\n)*/g;
|
|
32
|
+
const NEXT_PREAMBLE = /(?:\n\n---)+\n\n\[Context from next section:\][\s\S]*$/g;
|
|
33
|
+
|
|
34
|
+
function strip(content) {
|
|
35
|
+
// Reset lastIndex defensively — global regex state can leak across calls
|
|
36
|
+
// when reused on a hot path.
|
|
37
|
+
PREV_PREAMBLE.lastIndex = 0;
|
|
38
|
+
NEXT_PREAMBLE.lastIndex = 0;
|
|
39
|
+
return content.replace(PREV_PREAMBLE, '').replace(NEXT_PREAMBLE, '');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* @param {string} projectRoot
|
|
44
|
+
* @returns {Promise<{stripped:number, untouched:number}>}
|
|
45
|
+
*/
|
|
46
|
+
export async function run(projectRoot) {
|
|
47
|
+
const dbPath = memoryDbPath(projectRoot);
|
|
48
|
+
if (!existsSync(dbPath)) return { stripped: 0, untouched: 0 };
|
|
49
|
+
|
|
50
|
+
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
51
|
+
const SQL = await initSqlJs();
|
|
52
|
+
const db = new SQL.Database(readFileSync(dbPath));
|
|
53
|
+
|
|
54
|
+
// Only chunks can carry the preamble — the chunker is the only writer of
|
|
55
|
+
// those markers. Filter on key prefix to keep the LIKE selective; manual
|
|
56
|
+
// memory entries containing the literal string are extremely unlikely and
|
|
57
|
+
// the strip is a no-op for them anyway.
|
|
58
|
+
const stmt = db.prepare(
|
|
59
|
+
`SELECT id, content FROM memory_entries WHERE key LIKE 'chunk-%' AND status = 'active'`,
|
|
60
|
+
);
|
|
61
|
+
const rows = [];
|
|
62
|
+
while (stmt.step()) rows.push(stmt.getAsObject());
|
|
63
|
+
stmt.free();
|
|
64
|
+
|
|
65
|
+
if (rows.length === 0) {
|
|
66
|
+
db.close();
|
|
67
|
+
return { stripped: 0, untouched: 0 };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
let stripped = 0;
|
|
71
|
+
let untouched = 0;
|
|
72
|
+
const update = db.prepare(`UPDATE memory_entries SET content = ?, embedding = NULL WHERE id = ?`);
|
|
73
|
+
try {
|
|
74
|
+
for (const row of rows) {
|
|
75
|
+
const original = String(row.content || '');
|
|
76
|
+
// Cheap prefix-check to avoid running the regex on chunks that have no
|
|
77
|
+
// preamble — covers the common idempotent re-run case in O(1).
|
|
78
|
+
if (!original.includes('[Context from previous section:]') && !original.includes('[Context from next section:]')) {
|
|
79
|
+
untouched++;
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
const cleaned = strip(original);
|
|
83
|
+
if (cleaned === original) {
|
|
84
|
+
untouched++;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
update.run([cleaned, row.id]);
|
|
88
|
+
stripped++;
|
|
89
|
+
}
|
|
90
|
+
} finally {
|
|
91
|
+
update.free();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (stripped > 0) writeFileSync(dbPath, Buffer.from(db.export()));
|
|
95
|
+
db.close();
|
|
96
|
+
return { stripped, untouched };
|
|
97
|
+
}
|
package/bin/semantic-search.mjs
CHANGED
|
@@ -164,6 +164,7 @@ async function semanticSearch(queryText, options = {}) {
|
|
|
164
164
|
preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
|
|
165
165
|
type: metadata.type || 'unknown',
|
|
166
166
|
parentDoc: metadata.parentDoc || null,
|
|
167
|
+
parentPath: metadata.parentPath || null,
|
|
167
168
|
chunkTitle: metadata.chunkTitle || null,
|
|
168
169
|
});
|
|
169
170
|
} catch (err) {
|
|
@@ -262,7 +263,9 @@ async function main() {
|
|
|
262
263
|
console.log(` Key: ${top.key}`);
|
|
263
264
|
console.log(` Score: ${top.score.toFixed(4)}`);
|
|
264
265
|
if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
|
|
265
|
-
|
|
266
|
+
// #1053 S4: doc-* retired — parentPath is the actionable source location.
|
|
267
|
+
if (top.parentPath) console.log(` Parent: ${top.parentPath}`);
|
|
268
|
+
else if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
|
|
266
269
|
console.log(` Preview: ${top.preview}...`);
|
|
267
270
|
} catch (err) {
|
|
268
271
|
console.error(`[semantic-search] Error: ${err.message}`);
|
|
@@ -1605,6 +1605,8 @@ function runMigrationsAndAnnounce(runnerPath) {
|
|
|
1605
1605
|
const labels = {
|
|
1606
1606
|
'knowledge-to-learnings': 'consolidated knowledge → learnings',
|
|
1607
1607
|
'knowledge-purge': 'removed legacy knowledge namespace rows',
|
|
1608
|
+
'purge-doc-entries': 'pruned legacy doc-* rows (chunk-only RAG, #1053)',
|
|
1609
|
+
'strip-context-preambles': 'stripped chunk preambles; embeddings will rebuild on next index pass (#1053)',
|
|
1608
1610
|
};
|
|
1609
1611
|
|
|
1610
1612
|
for (const line of raw.split('\n')) {
|