moflo 4.8.32 → 4.8.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/bin/generate-code-map.mjs +955 -955
  2. package/bin/index-guidance.mjs +905 -905
  3. package/bin/index-tests.mjs +728 -728
  4. package/bin/setup-project.mjs +252 -252
  5. package/package.json +10 -5
  6. package/src/@claude-flow/cli/dist/src/commands/doctor.js +1339 -1107
  7. package/src/@claude-flow/cli/dist/src/index.js +2 -18
  8. package/src/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.js +17 -0
  9. package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +4 -7
  10. package/src/@claude-flow/cli/dist/src/version.js +6 -0
  11. package/src/@claude-flow/cli/package.json +1 -1
  12. package/src/@claude-flow/neural/README.md +260 -0
  13. package/src/@claude-flow/neural/dist/algorithms/a2c.js +361 -0
  14. package/src/@claude-flow/neural/dist/algorithms/curiosity.js +392 -0
  15. package/src/@claude-flow/neural/dist/algorithms/decision-transformer.js +415 -0
  16. package/src/@claude-flow/neural/dist/algorithms/dqn.js +303 -0
  17. package/src/@claude-flow/neural/dist/algorithms/index.js +74 -0
  18. package/src/@claude-flow/neural/dist/algorithms/ppo.js +331 -0
  19. package/src/@claude-flow/neural/dist/algorithms/q-learning.js +259 -0
  20. package/src/@claude-flow/neural/dist/algorithms/sarsa.js +297 -0
  21. package/src/@claude-flow/neural/dist/application/index.js +7 -0
  22. package/src/@claude-flow/neural/dist/application/services/neural-application-service.js +161 -0
  23. package/src/@claude-flow/neural/dist/domain/entities/pattern.js +134 -0
  24. package/src/@claude-flow/neural/dist/domain/index.js +8 -0
  25. package/src/@claude-flow/neural/dist/domain/services/learning-service.js +195 -0
  26. package/src/@claude-flow/neural/dist/index.js +201 -0
  27. package/src/@claude-flow/neural/dist/modes/balanced.js +234 -0
  28. package/src/@claude-flow/neural/dist/modes/base.js +77 -0
  29. package/src/@claude-flow/neural/dist/modes/batch.js +316 -0
  30. package/src/@claude-flow/neural/dist/modes/edge.js +310 -0
  31. package/src/@claude-flow/neural/dist/modes/index.js +13 -0
  32. package/src/@claude-flow/neural/dist/modes/real-time.js +196 -0
  33. package/src/@claude-flow/neural/dist/modes/research.js +389 -0
  34. package/src/@claude-flow/neural/dist/pattern-learner.js +603 -0
  35. package/src/@claude-flow/neural/dist/reasoning-bank.js +993 -0
  36. package/src/@claude-flow/neural/dist/reasoningbank-adapter.js +463 -0
  37. package/src/@claude-flow/neural/dist/sona-integration.js +326 -0
  38. package/src/@claude-flow/neural/dist/sona-manager.js +695 -0
  39. package/src/@claude-flow/neural/dist/types.js +11 -0
  40. package/src/@claude-flow/neural/package.json +26 -0
@@ -1,906 +1,906 @@
1
1
  #!/usr/bin/env node
2
- /**
3
- * Index guidance files into claude-flow memory with full RAG linked segments
4
- *
5
- * Strategy:
6
- * - Full documents stored as `doc-{name}` for complete retrieval
7
- * - Semantic chunks stored as `chunk-{name}-{n}` for precise search
8
- * - FULL RAG LINKING:
9
- * - parentDoc: link to full document
10
- * - prevChunk/nextChunk: forward/backward navigation
11
- * - siblings: all chunk keys from same document
12
- * - children: sub-chunks for hierarchical headers (h2 -> h3)
13
- * - contextBefore/contextAfter: overlapping text for context continuity
14
- * - Chunking based on markdown headers (## and ###) for natural boundaries
15
- * - After indexing, generates embeddings for semantic search (HNSW)
16
- *
17
- * Usage:
18
- * node node_modules/moflo/bin/index-guidance.mjs # Index all + generate embeddings
19
- * npx flo-index --force # Force reindex all
20
- * npx flo-index --file X # Index specific file
21
- * npx flo-index --no-embeddings # Skip embedding generation
22
- * npx flo-index --overlap 20 # Set context overlap % (default: 15)
23
- */
24
-
25
- import { existsSync, readdirSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
26
- import { resolve, dirname, basename, extname } from 'path';
27
- import { fileURLToPath } from 'url';
28
- import { mofloResolveURL } from './lib/moflo-resolve.mjs';
29
- const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
30
-
31
-
32
- const __dirname = dirname(fileURLToPath(import.meta.url));
33
-
34
- function findProjectRoot() {
35
- let dir = process.cwd();
36
- const root = resolve(dir, '/');
37
- while (dir !== root) {
38
- if (existsSync(resolve(dir, 'package.json'))) return dir;
39
- dir = dirname(dir);
40
- }
41
- return process.cwd();
42
- }
43
-
44
- const projectRoot = findProjectRoot();
45
-
46
- // Locate the moflo package root (for bundled guidance that ships with moflo)
47
- const mofloRoot = resolve(__dirname, '..');
48
-
49
- const NAMESPACE = 'guidance';
50
- const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
51
-
52
- // ============================================================================
53
- // Load guidance directories from moflo.yaml, falling back to defaults
54
- // ============================================================================
55
-
56
- function loadGuidanceDirs() {
57
- const dirs = [];
58
-
59
- // 1. Read moflo.yaml / moflo.config.json for user-configured directories
60
- let configDirs = null;
61
- const yamlPath = resolve(projectRoot, 'moflo.yaml');
62
- const jsonPath = resolve(projectRoot, 'moflo.config.json');
63
-
64
- if (existsSync(yamlPath)) {
65
- try {
66
- const content = readFileSync(yamlPath, 'utf-8');
67
- // Simple YAML array extraction — avoids needing js-yaml at runtime
68
- // Matches: guidance:\n directories:\n - .claude/guidance\n - docs/guides
69
- const guidanceBlock = content.match(/guidance:\s*\n\s+directories:\s*\n((?:\s+-\s+.+\n?)+)/);
70
- if (guidanceBlock) {
71
- const items = guidanceBlock[1].match(/-\s+(.+)/g);
72
- if (items && items.length > 0) {
73
- configDirs = items.map(item => item.replace(/^-\s+/, '').trim());
74
- }
75
- }
76
- } catch { /* ignore parse errors, fall through to defaults */ }
77
- } else if (existsSync(jsonPath)) {
78
- try {
79
- const raw = JSON.parse(readFileSync(jsonPath, 'utf-8'));
80
- if (raw.guidance?.directories && Array.isArray(raw.guidance.directories)) {
81
- configDirs = raw.guidance.directories;
82
- }
83
- } catch { /* ignore parse errors */ }
84
- }
85
-
86
- // Use config dirs or fall back to defaults
87
- // Each directory gets a unique prefix derived from its path to avoid key collisions
88
- // when multiple directories contain files with the same name.
89
- const userDirs = configDirs || ['.claude/guidance', 'docs/guides'];
90
- for (const d of userDirs) {
91
- const prefix = d.replace(/\\/g, '/')
92
- .replace(/^\.claude\//, '')
93
- .replace(/^back-office\/api\/\.claude\//, 'bo-api-')
94
- .replace(/^back-office\/ui\/\.claude\//, 'bo-ui-')
95
- .replace(/[^a-zA-Z0-9-]/g, '-')
96
- .replace(/-+/g, '-')
97
- .replace(/^-|-$/g, '') || 'guidance';
98
- dirs.push({ path: d, prefix });
99
- }
100
-
101
- // 2. Include moflo's own bundled guidance (ships with the package)
102
- // Only when running inside a consumer project (not moflo itself)
103
- // Shipped guidance lives in .claude/guidance/shipped/ — internal/ is excluded from npm
104
- const bundledShippedDir = resolve(mofloRoot, '.claude/guidance/shipped');
105
- const bundledGuidanceDir = existsSync(bundledShippedDir)
106
- ? bundledShippedDir
107
- : resolve(mofloRoot, '.claude/guidance');
108
- const projectGuidanceDir = resolve(projectRoot, '.claude/guidance');
109
- if (
110
- existsSync(bundledGuidanceDir) &&
111
- resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir) &&
112
- resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir, 'shipped')
113
- ) {
114
- dirs.push({ path: bundledGuidanceDir, prefix: 'moflo-bundled', absolute: true });
115
- }
116
-
117
- // 3. CLAUDE.md files are NOT indexed — Claude loads them into context automatically.
118
- // Indexing them wastes vectors and creates duplicate keys across subprojects.
119
-
120
- return dirs;
121
- }
122
-
123
- const GUIDANCE_DIRS = loadGuidanceDirs();
124
-
125
- // Chunking config - optimized for Claude's retrieval
126
- const MIN_CHUNK_SIZE = 50; // Lower minimum to avoid mega-chunks
127
- const MAX_CHUNK_SIZE = 4000; // Larger chunks for code-heavy docs (fits context better)
128
- const FORCE_CHUNK_THRESHOLD = 6000; // Force paragraph-split if file > this and < 3 chunks
129
- const DEFAULT_OVERLAP_PERCENT = 20; // Increased context overlap for better continuity
130
-
131
- // Parse args
132
- const args = process.argv.slice(2);
133
- const force = args.includes('--force');
134
- const specificFile = args.includes('--file') ? args[args.indexOf('--file') + 1] : null;
135
- const verbose = args.includes('--verbose') || args.includes('-v');
136
- const skipEmbeddings = args.includes('--no-embeddings');
137
- const overlapPercent = args.includes('--overlap')
138
- ? parseInt(args[args.indexOf('--overlap') + 1], 10) || DEFAULT_OVERLAP_PERCENT
139
- : DEFAULT_OVERLAP_PERCENT;
140
-
141
- function log(msg) {
142
- console.log(`[index-guidance] ${msg}`);
143
- }
144
-
145
- function debug(msg) {
146
- if (verbose) console.log(`[index-guidance] ${msg}`);
147
- }
148
-
149
- function ensureDbDir() {
150
- const dir = dirname(DB_PATH);
151
- if (!existsSync(dir)) {
152
- mkdirSync(dir, { recursive: true });
153
- }
154
- }
155
-
156
- async function getDb() {
157
- ensureDbDir();
158
- const SQL = await initSqlJs();
159
- let db;
160
- if (existsSync(DB_PATH)) {
161
- const buffer = readFileSync(DB_PATH);
162
- db = new SQL.Database(buffer);
163
- } else {
164
- db = new SQL.Database();
165
- }
166
-
167
- // Ensure table exists with unique constraint
168
- db.run(`
169
- CREATE TABLE IF NOT EXISTS memory_entries (
170
- id TEXT PRIMARY KEY,
171
- key TEXT NOT NULL,
172
- namespace TEXT DEFAULT 'default',
173
- content TEXT NOT NULL,
174
- type TEXT DEFAULT 'semantic',
175
- embedding TEXT,
176
- embedding_model TEXT DEFAULT 'local',
177
- embedding_dimensions INTEGER,
178
- tags TEXT,
179
- metadata TEXT,
180
- owner_id TEXT,
181
- created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
182
- updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
183
- expires_at INTEGER,
184
- last_accessed_at INTEGER,
185
- access_count INTEGER DEFAULT 0,
186
- status TEXT DEFAULT 'active',
187
- UNIQUE(namespace, key)
188
- )
189
- `);
190
-
191
- db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
192
- db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
193
-
194
- return db;
195
- }
196
-
197
- function saveDb(db) {
198
- const data = db.export();
199
- writeFileSync(DB_PATH, Buffer.from(data));
200
- }
201
-
202
- function generateId() {
203
- return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
204
- }
205
-
206
- function hashContent(content) {
207
- let hash = 0;
208
- for (let i = 0; i < content.length; i++) {
209
- const char = content.charCodeAt(i);
210
- hash = ((hash << 5) - hash) + char;
211
- hash = hash & hash;
212
- }
213
- return hash.toString(16);
214
- }
215
-
216
- function storeEntry(db, key, content, metadata = {}, tags = []) {
217
- const now = Date.now();
218
- const id = generateId();
219
- const metaJson = JSON.stringify(metadata);
220
- const tagsJson = JSON.stringify(tags);
221
-
222
- db.run(`
223
- INSERT OR REPLACE INTO memory_entries
224
- (id, key, namespace, content, metadata, tags, created_at, updated_at, status)
225
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
226
- `, [id, key, NAMESPACE, content, metaJson, tagsJson, now, now]);
227
-
228
- return true;
229
- }
230
-
231
- function deleteByPrefix(db, prefix) {
232
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${prefix}%`]);
233
- }
234
-
235
- function getEntryHash(db, key) {
236
- const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
237
- stmt.bind([key, NAMESPACE]);
238
- const entry = stmt.step() ? stmt.getAsObject() : null;
239
- stmt.free();
240
- if (entry?.metadata) {
241
- try {
242
- const meta = JSON.parse(entry.metadata);
243
- return meta.contentHash;
244
- } catch { /* ignore */ }
245
- }
246
- return null;
247
- }
248
-
249
- /**
250
- * Extract overlapping context from adjacent text
251
- * @param {string} text - The text to extract from
252
- * @param {number} percent - Percentage of text to extract
253
- * @param {string} position - 'start' or 'end'
254
- * @returns {string} - The extracted context
255
- */
256
- function extractOverlapContext(text, percent, position) {
257
- if (!text || percent <= 0) return '';
258
-
259
- const targetLength = Math.floor(text.length * (percent / 100));
260
- if (targetLength < 20) return ''; // Too short to be useful
261
-
262
- if (position === 'start') {
263
- // Get first N% of text, try to break at sentence/paragraph
264
- let end = targetLength;
265
- const nextPara = text.indexOf('\n\n', targetLength - 50);
266
- const nextSentence = text.indexOf('. ', targetLength - 30);
267
-
268
- if (nextPara > 0 && nextPara < targetLength + 100) {
269
- end = nextPara;
270
- } else if (nextSentence > 0 && nextSentence < targetLength + 50) {
271
- end = nextSentence + 1;
272
- }
273
-
274
- return text.substring(0, end).trim();
275
- } else {
276
- // Get last N% of text, try to break at sentence/paragraph
277
- let start = text.length - targetLength;
278
- const prevPara = text.lastIndexOf('\n\n', start + 50);
279
- const prevSentence = text.lastIndexOf('. ', start + 30);
280
-
281
- if (prevPara > 0 && prevPara > start - 100) {
282
- start = prevPara + 2;
283
- } else if (prevSentence > 0 && prevSentence > start - 50) {
284
- start = prevSentence + 2;
285
- }
286
-
287
- return text.substring(start).trim();
288
- }
289
- }
290
-
291
- /**
292
- * Split markdown content into semantic chunks based on headers
293
- * Returns array of { title, content, level, headerLine }
294
- */
295
- function chunkMarkdown(content, fileName) {
296
- const lines = content.split('\n');
297
- const chunks = [];
298
- let currentChunk = { title: fileName, content: [], level: 0, headerLine: 0 };
299
-
300
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
301
- // Strip CRLF carriage returns for Windows compatibility
302
- const line = lines[lineNum].replace(/\r$/, '');
303
-
304
- // Check for headers (## and ###)
305
- const h2Match = line.match(/^## (.+)$/);
306
- const h3Match = line.match(/^### (.+)$/);
307
-
308
- if (h2Match || h3Match) {
309
- // Save current chunk if it has content
310
- if (currentChunk.content.length > 0) {
311
- const chunkContent = currentChunk.content.join('\n').trim();
312
- if (chunkContent.length >= MIN_CHUNK_SIZE) {
313
- chunks.push({
314
- title: currentChunk.title,
315
- content: chunkContent,
316
- level: currentChunk.level,
317
- headerLine: currentChunk.headerLine
318
- });
319
- }
320
- }
321
-
322
- // Start new chunk
323
- currentChunk = {
324
- title: h2Match ? h2Match[1] : h3Match[1],
325
- content: [line],
326
- level: h2Match ? 2 : 3,
327
- headerLine: lineNum
328
- };
329
- } else {
330
- currentChunk.content.push(line);
331
- }
332
- }
333
-
334
- // Don't forget the last chunk
335
- if (currentChunk.content.length > 0) {
336
- const chunkContent = currentChunk.content.join('\n').trim();
337
- if (chunkContent.length >= MIN_CHUNK_SIZE) {
338
- chunks.push({
339
- title: currentChunk.title,
340
- content: chunkContent,
341
- level: currentChunk.level,
342
- headerLine: currentChunk.headerLine
343
- });
344
- }
345
- }
346
-
347
- // Handle chunks that are too large - split by paragraphs
348
- const finalChunks = [];
349
- for (const chunk of chunks) {
350
- if (chunk.content.length > MAX_CHUNK_SIZE) {
351
- const paragraphs = chunk.content.split(/\n\n+/);
352
- let currentPart = [];
353
- let currentLength = 0;
354
- let partNum = 1;
355
-
356
- for (const para of paragraphs) {
357
- if (currentLength + para.length > MAX_CHUNK_SIZE && currentPart.length > 0) {
358
- finalChunks.push({
359
- title: `${chunk.title} (part ${partNum})`,
360
- content: currentPart.join('\n\n'),
361
- level: chunk.level,
362
- headerLine: chunk.headerLine,
363
- isPart: true,
364
- partNum
365
- });
366
- currentPart = [para];
367
- currentLength = para.length;
368
- partNum++;
369
- } else {
370
- currentPart.push(para);
371
- currentLength += para.length;
372
- }
373
- }
374
-
375
- if (currentPart.length > 0) {
376
- finalChunks.push({
377
- title: partNum > 1 ? `${chunk.title} (part ${partNum})` : chunk.title,
378
- content: currentPart.join('\n\n'),
379
- level: chunk.level,
380
- headerLine: chunk.headerLine,
381
- isPart: partNum > 1,
382
- partNum: partNum > 1 ? partNum : undefined
383
- });
384
- }
385
- } else {
386
- finalChunks.push(chunk);
387
- }
388
- }
389
-
390
- // FORCE CHUNKING: If file is large but resulted in few chunks, split by sections
391
- const totalContent = finalChunks.reduce((acc, c) => acc + c.content.length, 0);
392
- if (totalContent > FORCE_CHUNK_THRESHOLD && finalChunks.length < 3) {
393
- debug(` Force-chunking: ${totalContent} bytes in ${finalChunks.length} chunks - splitting by sections`);
394
- const allContent = finalChunks.map(c => c.content).join('\n\n');
395
-
396
- // Split on --- horizontal rules first, then on ## headers, then on paragraphs
397
- const TARGET_CHUNK_SIZE = 2500;
398
- const rawSections = allContent.split(/\n---+\n/);
399
- let sections = [];
400
-
401
- for (const raw of rawSections) {
402
- // Further split on ## headers if section is too large
403
- if (raw.length > TARGET_CHUNK_SIZE) {
404
- const headerSplit = raw.split(/\n(?=## )/);
405
- for (const hSect of headerSplit) {
406
- if (hSect.length > TARGET_CHUNK_SIZE) {
407
- // Split very long sections on single newlines as last resort
408
- const lines = hSect.split('\n');
409
- let chunk = '';
410
- for (const line of lines) {
411
- if (chunk.length + line.length > TARGET_CHUNK_SIZE && chunk.length > 100) {
412
- sections.push(chunk.trim());
413
- chunk = line;
414
- } else {
415
- chunk += (chunk ? '\n' : '') + line;
416
- }
417
- }
418
- if (chunk.trim().length > 30) sections.push(chunk.trim());
419
- } else if (hSect.trim().length > 30) {
420
- sections.push(hSect.trim());
421
- }
422
- }
423
- } else if (raw.trim().length > 30) {
424
- sections.push(raw.trim());
425
- }
426
- }
427
-
428
- // Now group sections into chunks
429
- const forcedChunks = [];
430
- let currentGroup = [];
431
- let currentLength = 0;
432
- let groupNum = 1;
433
-
434
- const flushGroup = () => {
435
- if (currentGroup.length === 0) return;
436
- const firstLine = currentGroup[0].split('\n')[0].trim();
437
- const title = firstLine.startsWith('#')
438
- ? firstLine.replace(/^#+\s*/, '').slice(0, 60)
439
- : `${fileName} Section ${groupNum}`;
440
-
441
- forcedChunks.push({
442
- title,
443
- content: currentGroup.join('\n\n'),
444
- level: 2,
445
- headerLine: 0,
446
- isForced: true,
447
- forceNum: groupNum
448
- });
449
- groupNum++;
450
- currentGroup = [];
451
- currentLength = 0;
452
- };
453
-
454
- for (const section of sections) {
455
- if (currentLength + section.length > TARGET_CHUNK_SIZE && currentGroup.length > 0) {
456
- flushGroup();
457
- }
458
- currentGroup.push(section);
459
- currentLength += section.length;
460
- }
461
- flushGroup();
462
-
463
- // Always use force-chunked results if we got multiple chunks
464
- if (forcedChunks.length >= 2) {
465
- debug(` Force-chunking produced ${forcedChunks.length} chunks (was ${finalChunks.length})`);
466
- return forcedChunks;
467
- }
468
- }
469
-
470
- return finalChunks;
471
- }
472
-
473
- /**
474
- * Build hierarchical relationships between chunks
475
- * H2 chunks are parents of subsequent H3 chunks
476
- */
477
- function buildHierarchy(chunks, chunkPrefix) {
478
- const hierarchy = {};
479
- let currentH2Index = null;
480
-
481
- for (let i = 0; i < chunks.length; i++) {
482
- const chunk = chunks[i];
483
- const chunkKey = `${chunkPrefix}-${i}`;
484
-
485
- hierarchy[chunkKey] = {
486
- parent: null,
487
- children: []
488
- };
489
-
490
- if (chunk.level === 2) {
491
- currentH2Index = i;
492
- } else if (chunk.level === 3 && currentH2Index !== null) {
493
- const parentKey = `${chunkPrefix}-${currentH2Index}`;
494
- hierarchy[chunkKey].parent = parentKey;
495
- hierarchy[parentKey].children.push(chunkKey);
496
- }
497
- }
498
-
499
- return hierarchy;
500
- }
501
-
502
- function indexFile(db, filePath, keyPrefix) {
503
- const fileName = basename(filePath, extname(filePath));
504
- const docKey = `doc-${keyPrefix}-${fileName}`;
505
- const chunkPrefix = `chunk-${keyPrefix}-${fileName}`;
506
-
507
- try {
508
- const content = readFileSync(filePath, 'utf-8');
509
- const contentHash = hashContent(content);
510
-
511
- // Check if content changed (skip if same hash unless --force)
512
- if (!force) {
513
- const existingHash = getEntryHash(db, docKey);
514
- if (existingHash === contentHash) {
515
- return { docKey, status: 'unchanged', chunks: 0 };
516
- }
517
- }
518
-
519
- const stats = statSync(filePath);
520
- const relativePath = filePath.replace(projectRoot, '').replace(/\\/g, '/');
521
-
522
- // Delete old chunks for this file before re-indexing
523
- deleteByPrefix(db, chunkPrefix);
524
-
525
- // 1. Store full document
526
- const docMetadata = {
527
- type: 'document',
528
- filePath: relativePath,
529
- fileSize: stats.size,
530
- lastModified: stats.mtime.toISOString(),
531
- contentHash,
532
- indexedAt: new Date().toISOString(),
533
- ragVersion: '2.0', // Mark as full RAG indexed
534
- };
535
-
536
- storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document']);
537
- debug(`Stored document: ${docKey}`);
538
-
539
- // 2. Chunk and store semantic pieces with full RAG linking
540
- const chunks = chunkMarkdown(content, fileName);
541
-
542
- if (chunks.length === 0) {
543
- return { docKey, status: 'indexed', chunks: 0 };
544
- }
545
-
546
- // Build hierarchy and sibling list
547
- const hierarchy = buildHierarchy(chunks, chunkPrefix);
548
- const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
549
-
550
- // Update document with children references
551
- const docChildrenMeta = {
552
- ...docMetadata,
553
- children: siblings,
554
- chunkCount: chunks.length,
555
- };
556
- storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document']);
557
-
558
- for (let i = 0; i < chunks.length; i++) {
559
- const chunk = chunks[i];
560
- const chunkKey = `${chunkPrefix}-${i}`;
561
-
562
- // Build prev/next links
563
- const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
564
- const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
565
-
566
- // Extract overlapping context from adjacent chunks
567
- const contextBefore = i > 0
568
- ? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
569
- : null;
570
- const contextAfter = i < chunks.length - 1
571
- ? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
572
- : null;
573
-
574
- // Get hierarchical relationships
575
- const hierInfo = hierarchy[chunkKey];
576
-
577
- const chunkMetadata = {
578
- type: 'chunk',
579
- ragVersion: '2.0',
580
-
581
- // Document relationship
582
- parentDoc: docKey,
583
- parentPath: relativePath,
584
-
585
- // Sequential navigation (forward/backward links)
586
- chunkIndex: i,
587
- totalChunks: chunks.length,
588
- prevChunk,
589
- nextChunk,
590
-
591
- // Sibling awareness
592
- siblings,
593
-
594
- // Hierarchical relationships (h2 -> h3)
595
- hierarchicalParent: hierInfo.parent,
596
- hierarchicalChildren: hierInfo.children.length > 0 ? hierInfo.children : null,
597
-
598
- // Chunk info
599
- chunkTitle: chunk.title,
600
- headerLevel: chunk.level,
601
- headerLine: chunk.headerLine,
602
- isPart: chunk.isPart || false,
603
- partNum: chunk.partNum || null,
604
-
605
- // Overlapping context for continuity
606
- contextOverlapPercent: overlapPercent,
607
- hasContextBefore: !!contextBefore,
608
- hasContextAfter: !!contextAfter,
609
-
610
- // Content metadata
611
- contentLength: chunk.content.length,
612
- contentHash: hashContent(chunk.content),
613
- indexedAt: new Date().toISOString(),
614
- };
615
-
616
- // Build searchable content with title context
617
- // Include overlap context for better retrieval
618
- let searchableContent = `# ${chunk.title}\n\n`;
619
-
620
- if (contextBefore) {
621
- searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
622
- }
623
-
624
- searchableContent += chunk.content;
625
-
626
- if (contextAfter) {
627
- searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
628
- }
629
-
630
- // Store chunk with full metadata
631
- storeEntry(
632
- db,
633
- chunkKey,
634
- searchableContent,
635
- chunkMetadata,
636
- [keyPrefix, 'chunk', `level-${chunk.level}`, chunk.title.toLowerCase().replace(/[^a-z0-9]+/g, '-')]
637
- );
638
-
639
- debug(` Stored chunk ${i}: ${chunk.title} (${chunk.content.length} chars, prev=${!!prevChunk}, next=${!!nextChunk})`);
640
- }
641
-
642
- return { docKey, status: 'indexed', chunks: chunks.length };
643
- } catch (err) {
644
- return { docKey, status: 'error', error: err.message, chunks: 0 };
645
- }
646
- }
647
-
648
- /**
649
- * Recursively collect all .md files under a directory.
650
- * Skips node_modules, .git, and other non-content directories.
651
- */
652
- function walkMdFiles(dir) {
653
- const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage', '.next', '.reports']);
654
- // CLAUDE.md is loaded into context by Claude automatically — skip to avoid duplicate vectors
655
- const SKIP_FILES = new Set(['CLAUDE.md']);
656
- const files = [];
657
-
658
- function walk(current) {
659
- if (!existsSync(current)) return;
660
- for (const entry of readdirSync(current, { withFileTypes: true })) {
661
- if (entry.isDirectory()) {
662
- if (!SKIP_DIRS.has(entry.name)) walk(resolve(current, entry.name));
663
- } else if (entry.isFile() && entry.name.endsWith('.md') && !SKIP_FILES.has(entry.name)) {
664
- files.push(resolve(current, entry.name));
665
- }
666
- }
667
- }
668
-
669
- walk(dir);
670
- return files;
671
- }
672
-
673
- function indexDirectory(db, dirConfig) {
674
- const dirPath = dirConfig.absolute ? dirConfig.path : resolve(projectRoot, dirConfig.path);
675
- const results = [];
676
-
677
- if (!existsSync(dirPath)) {
678
- if (verbose) debug(`Directory not found: ${dirConfig.path}`);
679
- return results;
680
- }
681
-
682
- const allMdFiles = walkMdFiles(dirPath);
683
- const filtered = dirConfig.fileFilter
684
- ? allMdFiles.filter(f => dirConfig.fileFilter.includes(basename(f)))
685
- : allMdFiles;
686
-
687
- for (const filePath of filtered) {
688
- const result = indexFile(db, filePath, dirConfig.prefix);
689
- results.push(result);
690
- }
691
-
692
- return results;
693
- }
694
-
695
- /**
696
- * Remove stale entries for files that no longer exist on disk.
697
- * Uses the set of docKeys seen during the current indexing run to determine
698
- * which entries are stale, rather than reconstructing file paths from keys
699
- * (which breaks for files in subdirectories).
700
- */
701
- function cleanStaleEntries(db, currentDocKeys) {
702
- const docsStmt = db.prepare(
703
- `SELECT DISTINCT key FROM memory_entries WHERE namespace = ? AND key LIKE 'doc-%'`
704
- );
705
- docsStmt.bind([NAMESPACE]);
706
- const docs = [];
707
- while (docsStmt.step()) docs.push(docsStmt.getAsObject());
708
- docsStmt.free();
709
-
710
- let staleCount = 0;
711
-
712
- for (const { key } of docs) {
713
- // If this doc key was seen during the current indexing run, it's not stale
714
- if (currentDocKeys.has(key)) continue;
715
-
716
- const chunkPrefix = key.replace('doc-', 'chunk-');
717
- const countBefore = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
718
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${chunkPrefix}%`]);
719
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
720
- const countAfter = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
721
- const removed = countBefore - countAfter;
722
- if (removed > 0) {
723
- log(` Removed ${removed} stale entries for deleted file: ${key}`);
724
- staleCount += removed;
725
- }
726
- }
727
-
728
- // Also clean any orphaned entries not matching doc-/chunk- patterns
729
- const orphanStmt = db.prepare(
730
- `SELECT key FROM memory_entries WHERE namespace = ? AND key NOT LIKE 'doc-%' AND key NOT LIKE 'chunk-%'`
731
- );
732
- orphanStmt.bind([NAMESPACE]);
733
- const orphans = [];
734
- while (orphanStmt.step()) orphans.push(orphanStmt.getAsObject());
735
- orphanStmt.free();
736
- for (const { key } of orphans) {
737
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
738
- staleCount++;
739
- log(` Removed orphan entry: ${key}`);
740
- }
741
-
742
- return staleCount;
743
- }
744
-
745
- // Main
746
- console.log('');
747
- log('Indexing guidance files with FULL RAG linked segments...');
748
- log(` Context overlap: ${overlapPercent}%`);
749
- log(` Directories (${GUIDANCE_DIRS.length}):`);
750
- for (const d of GUIDANCE_DIRS) {
751
- const dirPath = d.absolute ? d.path : resolve(projectRoot, d.path);
752
- const exists = existsSync(dirPath);
753
- log(` ${exists ? '✓' : '✗'} ${d.absolute ? dirPath : d.path} [${d.prefix}]`);
754
- }
755
- console.log('');
756
-
757
- const db = await getDb();
758
- let docsIndexed = 0;
759
- let chunksIndexed = 0;
760
- let unchanged = 0;
761
- let errors = 0;
762
- const currentDocKeys = new Set();
763
-
764
- if (specificFile) {
765
- // Index single file
766
- const filePath = resolve(projectRoot, specificFile);
767
- if (!existsSync(filePath)) {
768
- log(`File not found: ${specificFile}`);
769
- process.exit(1);
770
- }
771
-
772
- let prefix = 'docs';
773
- if (specificFile.includes('.claude/guidance/')) {
774
- prefix = 'guidance';
775
- }
776
-
777
- const result = indexFile(db, filePath, prefix);
778
- log(`${result.docKey}: ${result.status} (${result.chunks} chunks)`);
779
-
780
- if (result.status === 'indexed') {
781
- docsIndexed++;
782
- chunksIndexed += result.chunks;
783
- } else if (result.status === 'unchanged') {
784
- unchanged++;
785
- } else {
786
- errors++;
787
- }
788
- } else {
789
- // Index all directories
790
- for (const dir of GUIDANCE_DIRS) {
791
- log(`Scanning ${dir.path}/...`);
792
- const results = indexDirectory(db, dir);
793
-
794
- for (const result of results) {
795
- if (result.status === 'indexed' || result.status === 'unchanged') {
796
- currentDocKeys.add(result.docKey);
797
- }
798
- if (result.status === 'indexed') {
799
- log(` ✅ ${result.docKey} (${result.chunks} chunks)`);
800
- docsIndexed++;
801
- chunksIndexed += result.chunks;
802
- } else if (result.status === 'unchanged') {
803
- unchanged++;
804
- } else {
805
- log(` ❌ ${result.docKey}: ${result.error}`);
806
- errors++;
807
- }
808
- }
809
- }
810
- }
811
-
812
- // Clean stale entries for deleted files (unless indexing a specific file)
813
- let staleRemoved = 0;
814
- if (!specificFile) {
815
- log('Cleaning stale entries for deleted files...');
816
- staleRemoved = cleanStaleEntries(db, currentDocKeys);
817
- if (staleRemoved === 0) {
818
- log(' No stale entries found');
819
- }
820
- }
821
-
822
- // Write changes back to disk
823
- if (docsIndexed > 0 || chunksIndexed > 0 || staleRemoved > 0) saveDb(db);
824
-
825
- // Check for entries missing embeddings (e.g. prior background run failed)
826
- let missingEmbeddings = 0;
827
- {
828
- const stmt = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = ? AND (embedding IS NULL OR embedding = '')`);
829
- stmt.bind([NAMESPACE]);
830
- if (stmt.step()) missingEmbeddings = stmt.getAsObject().cnt;
831
- stmt.free();
832
- }
833
- db.close();
834
-
835
- console.log('');
836
- log('═══════════════════════════════════════════════════════════');
837
- log(' FULL RAG INDEXING COMPLETE');
838
- log('═══════════════════════════════════════════════════════════');
839
- log(` Documents indexed: ${docsIndexed}`);
840
- log(` Chunks created: ${chunksIndexed}`);
841
- log(` Unchanged: ${unchanged}`);
842
- log(` Stale removed: ${staleRemoved}`);
843
- log(` Errors: ${errors}`);
844
- log('');
845
- log(' RAG Features Enabled:');
846
- log(` • Forward/backward links (prevChunk/nextChunk)`);
847
- log(` • Sibling awareness (all chunks from same doc)`);
848
- log(` • Hierarchical links (h2 -> h3 parent/children)`);
849
- log(` • Context overlap: ${overlapPercent}% (contextBefore/contextAfter)`);
850
- log('═══════════════════════════════════════════════════════════');
851
-
852
- // Generate embeddings for new entries or backfill missing ones
853
- // Runs in BACKGROUND to avoid blocking startup
854
- const needsEmbeddings = (docsIndexed > 0 || chunksIndexed > 0 || missingEmbeddings > 0);
855
- if (!skipEmbeddings && needsEmbeddings) {
856
- if (missingEmbeddings > 0 && docsIndexed === 0 && chunksIndexed === 0) {
857
- log(`${missingEmbeddings} entries missing embeddings — backfilling...`);
858
- }
859
- console.log('');
860
- log('Spawning embedding generation in background...');
861
-
862
- const { spawn } = await import('child_process');
863
-
864
- // Look for build-embeddings script in multiple locations:
865
- // 1. Shipped with moflo (node_modules/moflo/bin/)
866
- // 2. Project-local (.claude/scripts/)
867
- const mofloScript = resolve(__dirname, 'build-embeddings.mjs');
868
- const projectLocalScript = resolve(projectRoot, '.claude/scripts/build-embeddings.mjs');
869
- const embeddingScript = existsSync(mofloScript) ? mofloScript : projectLocalScript;
870
-
871
- if (existsSync(embeddingScript)) {
872
- const embeddingArgs = ['--namespace', NAMESPACE];
873
-
874
- // Create log file for background process output
875
- const logDir = resolve(projectRoot, '.swarm/logs');
876
- if (!existsSync(logDir)) {
877
- mkdirSync(logDir, { recursive: true });
878
- }
879
- const logFile = resolve(logDir, 'embeddings.log');
880
- const { openSync } = await import('fs');
881
- const out = openSync(logFile, 'a');
882
- const err = openSync(logFile, 'a');
883
-
884
- // Spawn in background - don't wait for completion
885
- const proc = spawn('node', [embeddingScript, ...embeddingArgs], {
886
- stdio: ['ignore', out, err],
887
- cwd: projectRoot,
888
- detached: true,
889
- windowsHide: true // Suppress command windows on Windows
890
- });
891
- proc.unref(); // Allow parent to exit independently
892
-
893
- log(`Background embedding started (PID: ${proc.pid})`);
894
- log(`Log file: .swarm/logs/embeddings.log`);
895
- } else {
896
- log('⚠️ Embedding script not found, skipping embedding generation');
897
- }
898
- } else if (skipEmbeddings) {
899
- log('Skipping embedding generation (--no-embeddings)');
900
- } else {
901
- log('No new content indexed, skipping embedding generation');
902
- }
903
-
904
- if (errors > 0) {
905
- process.exit(1);
906
- }
2
+ /**
3
+ * Index guidance files into claude-flow memory with full RAG linked segments
4
+ *
5
+ * Strategy:
6
+ * - Full documents stored as `doc-{name}` for complete retrieval
7
+ * - Semantic chunks stored as `chunk-{name}-{n}` for precise search
8
+ * - FULL RAG LINKING:
9
+ * - parentDoc: link to full document
10
+ * - prevChunk/nextChunk: forward/backward navigation
11
+ * - siblings: all chunk keys from same document
12
+ * - children: sub-chunks for hierarchical headers (h2 -> h3)
13
+ * - contextBefore/contextAfter: overlapping text for context continuity
14
+ * - Chunking based on markdown headers (## and ###) for natural boundaries
15
+ * - After indexing, generates embeddings for semantic search (HNSW)
16
+ *
17
+ * Usage:
18
+ * node node_modules/moflo/bin/index-guidance.mjs # Index all + generate embeddings
19
+ * npx flo-index --force # Force reindex all
20
+ * npx flo-index --file X # Index specific file
21
+ * npx flo-index --no-embeddings # Skip embedding generation
22
+ * npx flo-index --overlap 20 # Set context overlap % (default: 15)
23
+ */
24
+
25
+ import { existsSync, readdirSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
26
+ import { resolve, dirname, basename, extname } from 'path';
27
+ import { fileURLToPath } from 'url';
28
+ import { mofloResolveURL } from './lib/moflo-resolve.mjs';
29
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
30
+
31
+
32
+ const __dirname = dirname(fileURLToPath(import.meta.url));
33
+
34
+ function findProjectRoot() {
35
+ let dir = process.cwd();
36
+ const root = resolve(dir, '/');
37
+ while (dir !== root) {
38
+ if (existsSync(resolve(dir, 'package.json'))) return dir;
39
+ dir = dirname(dir);
40
+ }
41
+ return process.cwd();
42
+ }
43
+
44
+ const projectRoot = findProjectRoot();
45
+
46
+ // Locate the moflo package root (for bundled guidance that ships with moflo)
47
+ const mofloRoot = resolve(__dirname, '..');
48
+
49
+ const NAMESPACE = 'guidance';
50
+ const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
51
+
52
+ // ============================================================================
53
+ // Load guidance directories from moflo.yaml, falling back to defaults
54
+ // ============================================================================
55
+
56
+ function loadGuidanceDirs() {
57
+ const dirs = [];
58
+
59
+ // 1. Read moflo.yaml / moflo.config.json for user-configured directories
60
+ let configDirs = null;
61
+ const yamlPath = resolve(projectRoot, 'moflo.yaml');
62
+ const jsonPath = resolve(projectRoot, 'moflo.config.json');
63
+
64
+ if (existsSync(yamlPath)) {
65
+ try {
66
+ const content = readFileSync(yamlPath, 'utf-8');
67
+ // Simple YAML array extraction — avoids needing js-yaml at runtime
68
+ // Matches: guidance:\n directories:\n - .claude/guidance\n - docs/guides
69
+ const guidanceBlock = content.match(/guidance:\s*\n\s+directories:\s*\n((?:\s+-\s+.+\n?)+)/);
70
+ if (guidanceBlock) {
71
+ const items = guidanceBlock[1].match(/-\s+(.+)/g);
72
+ if (items && items.length > 0) {
73
+ configDirs = items.map(item => item.replace(/^-\s+/, '').trim());
74
+ }
75
+ }
76
+ } catch { /* ignore parse errors, fall through to defaults */ }
77
+ } else if (existsSync(jsonPath)) {
78
+ try {
79
+ const raw = JSON.parse(readFileSync(jsonPath, 'utf-8'));
80
+ if (raw.guidance?.directories && Array.isArray(raw.guidance.directories)) {
81
+ configDirs = raw.guidance.directories;
82
+ }
83
+ } catch { /* ignore parse errors */ }
84
+ }
85
+
86
+ // Use config dirs or fall back to defaults
87
+ // Each directory gets a unique prefix derived from its path to avoid key collisions
88
+ // when multiple directories contain files with the same name.
89
+ const userDirs = configDirs || ['.claude/guidance', 'docs/guides'];
90
+ for (const d of userDirs) {
91
+ const prefix = d.replace(/\\/g, '/')
92
+ .replace(/^\.claude\//, '')
93
+ .replace(/^back-office\/api\/\.claude\//, 'bo-api-')
94
+ .replace(/^back-office\/ui\/\.claude\//, 'bo-ui-')
95
+ .replace(/[^a-zA-Z0-9-]/g, '-')
96
+ .replace(/-+/g, '-')
97
+ .replace(/^-|-$/g, '') || 'guidance';
98
+ dirs.push({ path: d, prefix });
99
+ }
100
+
101
+ // 2. Include moflo's own bundled guidance (ships with the package)
102
+ // Only when running inside a consumer project (not moflo itself)
103
+ // Shipped guidance lives in .claude/guidance/shipped/ — internal/ is excluded from npm
104
+ const bundledShippedDir = resolve(mofloRoot, '.claude/guidance/shipped');
105
+ const bundledGuidanceDir = existsSync(bundledShippedDir)
106
+ ? bundledShippedDir
107
+ : resolve(mofloRoot, '.claude/guidance');
108
+ const projectGuidanceDir = resolve(projectRoot, '.claude/guidance');
109
+ if (
110
+ existsSync(bundledGuidanceDir) &&
111
+ resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir) &&
112
+ resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir, 'shipped')
113
+ ) {
114
+ dirs.push({ path: bundledGuidanceDir, prefix: 'moflo-bundled', absolute: true });
115
+ }
116
+
117
+ // 3. CLAUDE.md files are NOT indexed — Claude loads them into context automatically.
118
+ // Indexing them wastes vectors and creates duplicate keys across subprojects.
119
+
120
+ return dirs;
121
+ }
122
+
123
+ const GUIDANCE_DIRS = loadGuidanceDirs();
124
+
125
+ // Chunking config - optimized for Claude's retrieval
126
+ const MIN_CHUNK_SIZE = 50; // Lower minimum to avoid mega-chunks
127
+ const MAX_CHUNK_SIZE = 4000; // Larger chunks for code-heavy docs (fits context better)
128
+ const FORCE_CHUNK_THRESHOLD = 6000; // Force paragraph-split if file > this and < 3 chunks
129
+ const DEFAULT_OVERLAP_PERCENT = 20; // Increased context overlap for better continuity
130
+
131
+ // Parse args
132
+ const args = process.argv.slice(2);
133
+ const force = args.includes('--force');
134
+ const specificFile = args.includes('--file') ? args[args.indexOf('--file') + 1] : null;
135
+ const verbose = args.includes('--verbose') || args.includes('-v');
136
+ const skipEmbeddings = args.includes('--no-embeddings');
137
+ const overlapPercent = args.includes('--overlap')
138
+ ? parseInt(args[args.indexOf('--overlap') + 1], 10) || DEFAULT_OVERLAP_PERCENT
139
+ : DEFAULT_OVERLAP_PERCENT;
140
+
141
+ function log(msg) {
142
+ console.log(`[index-guidance] ${msg}`);
143
+ }
144
+
145
+ function debug(msg) {
146
+ if (verbose) console.log(`[index-guidance] ${msg}`);
147
+ }
148
+
149
+ function ensureDbDir() {
150
+ const dir = dirname(DB_PATH);
151
+ if (!existsSync(dir)) {
152
+ mkdirSync(dir, { recursive: true });
153
+ }
154
+ }
155
+
156
+ async function getDb() {
157
+ ensureDbDir();
158
+ const SQL = await initSqlJs();
159
+ let db;
160
+ if (existsSync(DB_PATH)) {
161
+ const buffer = readFileSync(DB_PATH);
162
+ db = new SQL.Database(buffer);
163
+ } else {
164
+ db = new SQL.Database();
165
+ }
166
+
167
+ // Ensure table exists with unique constraint
168
+ db.run(`
169
+ CREATE TABLE IF NOT EXISTS memory_entries (
170
+ id TEXT PRIMARY KEY,
171
+ key TEXT NOT NULL,
172
+ namespace TEXT DEFAULT 'default',
173
+ content TEXT NOT NULL,
174
+ type TEXT DEFAULT 'semantic',
175
+ embedding TEXT,
176
+ embedding_model TEXT DEFAULT 'local',
177
+ embedding_dimensions INTEGER,
178
+ tags TEXT,
179
+ metadata TEXT,
180
+ owner_id TEXT,
181
+ created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
182
+ updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
183
+ expires_at INTEGER,
184
+ last_accessed_at INTEGER,
185
+ access_count INTEGER DEFAULT 0,
186
+ status TEXT DEFAULT 'active',
187
+ UNIQUE(namespace, key)
188
+ )
189
+ `);
190
+
191
+ db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
192
+ db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
193
+
194
+ return db;
195
+ }
196
+
197
+ function saveDb(db) {
198
+ const data = db.export();
199
+ writeFileSync(DB_PATH, Buffer.from(data));
200
+ }
201
+
202
+ function generateId() {
203
+ return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
204
+ }
205
+
206
+ function hashContent(content) {
207
+ let hash = 0;
208
+ for (let i = 0; i < content.length; i++) {
209
+ const char = content.charCodeAt(i);
210
+ hash = ((hash << 5) - hash) + char;
211
+ hash = hash & hash;
212
+ }
213
+ return hash.toString(16);
214
+ }
215
+
216
+ function storeEntry(db, key, content, metadata = {}, tags = []) {
217
+ const now = Date.now();
218
+ const id = generateId();
219
+ const metaJson = JSON.stringify(metadata);
220
+ const tagsJson = JSON.stringify(tags);
221
+
222
+ db.run(`
223
+ INSERT OR REPLACE INTO memory_entries
224
+ (id, key, namespace, content, metadata, tags, created_at, updated_at, status)
225
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
226
+ `, [id, key, NAMESPACE, content, metaJson, tagsJson, now, now]);
227
+
228
+ return true;
229
+ }
230
+
231
+ function deleteByPrefix(db, prefix) {
232
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${prefix}%`]);
233
+ }
234
+
235
+ function getEntryHash(db, key) {
236
+ const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
237
+ stmt.bind([key, NAMESPACE]);
238
+ const entry = stmt.step() ? stmt.getAsObject() : null;
239
+ stmt.free();
240
+ if (entry?.metadata) {
241
+ try {
242
+ const meta = JSON.parse(entry.metadata);
243
+ return meta.contentHash;
244
+ } catch { /* ignore */ }
245
+ }
246
+ return null;
247
+ }
248
+
249
+ /**
250
+ * Extract overlapping context from adjacent text
251
+ * @param {string} text - The text to extract from
252
+ * @param {number} percent - Percentage of text to extract
253
+ * @param {string} position - 'start' or 'end'
254
+ * @returns {string} - The extracted context
255
+ */
256
+ function extractOverlapContext(text, percent, position) {
257
+ if (!text || percent <= 0) return '';
258
+
259
+ const targetLength = Math.floor(text.length * (percent / 100));
260
+ if (targetLength < 20) return ''; // Too short to be useful
261
+
262
+ if (position === 'start') {
263
+ // Get first N% of text, try to break at sentence/paragraph
264
+ let end = targetLength;
265
+ const nextPara = text.indexOf('\n\n', targetLength - 50);
266
+ const nextSentence = text.indexOf('. ', targetLength - 30);
267
+
268
+ if (nextPara > 0 && nextPara < targetLength + 100) {
269
+ end = nextPara;
270
+ } else if (nextSentence > 0 && nextSentence < targetLength + 50) {
271
+ end = nextSentence + 1;
272
+ }
273
+
274
+ return text.substring(0, end).trim();
275
+ } else {
276
+ // Get last N% of text, try to break at sentence/paragraph
277
+ let start = text.length - targetLength;
278
+ const prevPara = text.lastIndexOf('\n\n', start + 50);
279
+ const prevSentence = text.lastIndexOf('. ', start + 30);
280
+
281
+ if (prevPara > 0 && prevPara > start - 100) {
282
+ start = prevPara + 2;
283
+ } else if (prevSentence > 0 && prevSentence > start - 50) {
284
+ start = prevSentence + 2;
285
+ }
286
+
287
+ return text.substring(start).trim();
288
+ }
289
+ }
290
+
291
+ /**
292
+ * Split markdown content into semantic chunks based on headers
293
+ * Returns array of { title, content, level, headerLine }
294
+ */
295
+ function chunkMarkdown(content, fileName) {
296
+ const lines = content.split('\n');
297
+ const chunks = [];
298
+ let currentChunk = { title: fileName, content: [], level: 0, headerLine: 0 };
299
+
300
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
301
+ // Strip CRLF carriage returns for Windows compatibility
302
+ const line = lines[lineNum].replace(/\r$/, '');
303
+
304
+ // Check for headers (## and ###)
305
+ const h2Match = line.match(/^## (.+)$/);
306
+ const h3Match = line.match(/^### (.+)$/);
307
+
308
+ if (h2Match || h3Match) {
309
+ // Save current chunk if it has content
310
+ if (currentChunk.content.length > 0) {
311
+ const chunkContent = currentChunk.content.join('\n').trim();
312
+ if (chunkContent.length >= MIN_CHUNK_SIZE) {
313
+ chunks.push({
314
+ title: currentChunk.title,
315
+ content: chunkContent,
316
+ level: currentChunk.level,
317
+ headerLine: currentChunk.headerLine
318
+ });
319
+ }
320
+ }
321
+
322
+ // Start new chunk
323
+ currentChunk = {
324
+ title: h2Match ? h2Match[1] : h3Match[1],
325
+ content: [line],
326
+ level: h2Match ? 2 : 3,
327
+ headerLine: lineNum
328
+ };
329
+ } else {
330
+ currentChunk.content.push(line);
331
+ }
332
+ }
333
+
334
+ // Don't forget the last chunk
335
+ if (currentChunk.content.length > 0) {
336
+ const chunkContent = currentChunk.content.join('\n').trim();
337
+ if (chunkContent.length >= MIN_CHUNK_SIZE) {
338
+ chunks.push({
339
+ title: currentChunk.title,
340
+ content: chunkContent,
341
+ level: currentChunk.level,
342
+ headerLine: currentChunk.headerLine
343
+ });
344
+ }
345
+ }
346
+
347
+ // Handle chunks that are too large - split by paragraphs
348
+ const finalChunks = [];
349
+ for (const chunk of chunks) {
350
+ if (chunk.content.length > MAX_CHUNK_SIZE) {
351
+ const paragraphs = chunk.content.split(/\n\n+/);
352
+ let currentPart = [];
353
+ let currentLength = 0;
354
+ let partNum = 1;
355
+
356
+ for (const para of paragraphs) {
357
+ if (currentLength + para.length > MAX_CHUNK_SIZE && currentPart.length > 0) {
358
+ finalChunks.push({
359
+ title: `${chunk.title} (part ${partNum})`,
360
+ content: currentPart.join('\n\n'),
361
+ level: chunk.level,
362
+ headerLine: chunk.headerLine,
363
+ isPart: true,
364
+ partNum
365
+ });
366
+ currentPart = [para];
367
+ currentLength = para.length;
368
+ partNum++;
369
+ } else {
370
+ currentPart.push(para);
371
+ currentLength += para.length;
372
+ }
373
+ }
374
+
375
+ if (currentPart.length > 0) {
376
+ finalChunks.push({
377
+ title: partNum > 1 ? `${chunk.title} (part ${partNum})` : chunk.title,
378
+ content: currentPart.join('\n\n'),
379
+ level: chunk.level,
380
+ headerLine: chunk.headerLine,
381
+ isPart: partNum > 1,
382
+ partNum: partNum > 1 ? partNum : undefined
383
+ });
384
+ }
385
+ } else {
386
+ finalChunks.push(chunk);
387
+ }
388
+ }
389
+
390
+ // FORCE CHUNKING: If file is large but resulted in few chunks, split by sections
391
+ const totalContent = finalChunks.reduce((acc, c) => acc + c.content.length, 0);
392
+ if (totalContent > FORCE_CHUNK_THRESHOLD && finalChunks.length < 3) {
393
+ debug(` Force-chunking: ${totalContent} bytes in ${finalChunks.length} chunks - splitting by sections`);
394
+ const allContent = finalChunks.map(c => c.content).join('\n\n');
395
+
396
+ // Split on --- horizontal rules first, then on ## headers, then on paragraphs
397
+ const TARGET_CHUNK_SIZE = 2500;
398
+ const rawSections = allContent.split(/\n---+\n/);
399
+ let sections = [];
400
+
401
+ for (const raw of rawSections) {
402
+ // Further split on ## headers if section is too large
403
+ if (raw.length > TARGET_CHUNK_SIZE) {
404
+ const headerSplit = raw.split(/\n(?=## )/);
405
+ for (const hSect of headerSplit) {
406
+ if (hSect.length > TARGET_CHUNK_SIZE) {
407
+ // Split very long sections on single newlines as last resort
408
+ const lines = hSect.split('\n');
409
+ let chunk = '';
410
+ for (const line of lines) {
411
+ if (chunk.length + line.length > TARGET_CHUNK_SIZE && chunk.length > 100) {
412
+ sections.push(chunk.trim());
413
+ chunk = line;
414
+ } else {
415
+ chunk += (chunk ? '\n' : '') + line;
416
+ }
417
+ }
418
+ if (chunk.trim().length > 30) sections.push(chunk.trim());
419
+ } else if (hSect.trim().length > 30) {
420
+ sections.push(hSect.trim());
421
+ }
422
+ }
423
+ } else if (raw.trim().length > 30) {
424
+ sections.push(raw.trim());
425
+ }
426
+ }
427
+
428
+ // Now group sections into chunks
429
+ const forcedChunks = [];
430
+ let currentGroup = [];
431
+ let currentLength = 0;
432
+ let groupNum = 1;
433
+
434
+ const flushGroup = () => {
435
+ if (currentGroup.length === 0) return;
436
+ const firstLine = currentGroup[0].split('\n')[0].trim();
437
+ const title = firstLine.startsWith('#')
438
+ ? firstLine.replace(/^#+\s*/, '').slice(0, 60)
439
+ : `${fileName} Section ${groupNum}`;
440
+
441
+ forcedChunks.push({
442
+ title,
443
+ content: currentGroup.join('\n\n'),
444
+ level: 2,
445
+ headerLine: 0,
446
+ isForced: true,
447
+ forceNum: groupNum
448
+ });
449
+ groupNum++;
450
+ currentGroup = [];
451
+ currentLength = 0;
452
+ };
453
+
454
+ for (const section of sections) {
455
+ if (currentLength + section.length > TARGET_CHUNK_SIZE && currentGroup.length > 0) {
456
+ flushGroup();
457
+ }
458
+ currentGroup.push(section);
459
+ currentLength += section.length;
460
+ }
461
+ flushGroup();
462
+
463
+ // Always use force-chunked results if we got multiple chunks
464
+ if (forcedChunks.length >= 2) {
465
+ debug(` Force-chunking produced ${forcedChunks.length} chunks (was ${finalChunks.length})`);
466
+ return forcedChunks;
467
+ }
468
+ }
469
+
470
+ return finalChunks;
471
+ }
472
+
473
+ /**
474
+ * Build hierarchical relationships between chunks
475
+ * H2 chunks are parents of subsequent H3 chunks
476
+ */
477
+ function buildHierarchy(chunks, chunkPrefix) {
478
+ const hierarchy = {};
479
+ let currentH2Index = null;
480
+
481
+ for (let i = 0; i < chunks.length; i++) {
482
+ const chunk = chunks[i];
483
+ const chunkKey = `${chunkPrefix}-${i}`;
484
+
485
+ hierarchy[chunkKey] = {
486
+ parent: null,
487
+ children: []
488
+ };
489
+
490
+ if (chunk.level === 2) {
491
+ currentH2Index = i;
492
+ } else if (chunk.level === 3 && currentH2Index !== null) {
493
+ const parentKey = `${chunkPrefix}-${currentH2Index}`;
494
+ hierarchy[chunkKey].parent = parentKey;
495
+ hierarchy[parentKey].children.push(chunkKey);
496
+ }
497
+ }
498
+
499
+ return hierarchy;
500
+ }
501
+
502
+ function indexFile(db, filePath, keyPrefix) {
503
+ const fileName = basename(filePath, extname(filePath));
504
+ const docKey = `doc-${keyPrefix}-${fileName}`;
505
+ const chunkPrefix = `chunk-${keyPrefix}-${fileName}`;
506
+
507
+ try {
508
+ const content = readFileSync(filePath, 'utf-8');
509
+ const contentHash = hashContent(content);
510
+
511
+ // Check if content changed (skip if same hash unless --force)
512
+ if (!force) {
513
+ const existingHash = getEntryHash(db, docKey);
514
+ if (existingHash === contentHash) {
515
+ return { docKey, status: 'unchanged', chunks: 0 };
516
+ }
517
+ }
518
+
519
+ const stats = statSync(filePath);
520
+ const relativePath = filePath.replace(projectRoot, '').replace(/\\/g, '/');
521
+
522
+ // Delete old chunks for this file before re-indexing
523
+ deleteByPrefix(db, chunkPrefix);
524
+
525
+ // 1. Store full document
526
+ const docMetadata = {
527
+ type: 'document',
528
+ filePath: relativePath,
529
+ fileSize: stats.size,
530
+ lastModified: stats.mtime.toISOString(),
531
+ contentHash,
532
+ indexedAt: new Date().toISOString(),
533
+ ragVersion: '2.0', // Mark as full RAG indexed
534
+ };
535
+
536
+ storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document']);
537
+ debug(`Stored document: ${docKey}`);
538
+
539
+ // 2. Chunk and store semantic pieces with full RAG linking
540
+ const chunks = chunkMarkdown(content, fileName);
541
+
542
+ if (chunks.length === 0) {
543
+ return { docKey, status: 'indexed', chunks: 0 };
544
+ }
545
+
546
+ // Build hierarchy and sibling list
547
+ const hierarchy = buildHierarchy(chunks, chunkPrefix);
548
+ const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
549
+
550
+ // Update document with children references
551
+ const docChildrenMeta = {
552
+ ...docMetadata,
553
+ children: siblings,
554
+ chunkCount: chunks.length,
555
+ };
556
+ storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document']);
557
+
558
+ for (let i = 0; i < chunks.length; i++) {
559
+ const chunk = chunks[i];
560
+ const chunkKey = `${chunkPrefix}-${i}`;
561
+
562
+ // Build prev/next links
563
+ const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
564
+ const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
565
+
566
+ // Extract overlapping context from adjacent chunks
567
+ const contextBefore = i > 0
568
+ ? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
569
+ : null;
570
+ const contextAfter = i < chunks.length - 1
571
+ ? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
572
+ : null;
573
+
574
+ // Get hierarchical relationships
575
+ const hierInfo = hierarchy[chunkKey];
576
+
577
+ const chunkMetadata = {
578
+ type: 'chunk',
579
+ ragVersion: '2.0',
580
+
581
+ // Document relationship
582
+ parentDoc: docKey,
583
+ parentPath: relativePath,
584
+
585
+ // Sequential navigation (forward/backward links)
586
+ chunkIndex: i,
587
+ totalChunks: chunks.length,
588
+ prevChunk,
589
+ nextChunk,
590
+
591
+ // Sibling awareness
592
+ siblings,
593
+
594
+ // Hierarchical relationships (h2 -> h3)
595
+ hierarchicalParent: hierInfo.parent,
596
+ hierarchicalChildren: hierInfo.children.length > 0 ? hierInfo.children : null,
597
+
598
+ // Chunk info
599
+ chunkTitle: chunk.title,
600
+ headerLevel: chunk.level,
601
+ headerLine: chunk.headerLine,
602
+ isPart: chunk.isPart || false,
603
+ partNum: chunk.partNum || null,
604
+
605
+ // Overlapping context for continuity
606
+ contextOverlapPercent: overlapPercent,
607
+ hasContextBefore: !!contextBefore,
608
+ hasContextAfter: !!contextAfter,
609
+
610
+ // Content metadata
611
+ contentLength: chunk.content.length,
612
+ contentHash: hashContent(chunk.content),
613
+ indexedAt: new Date().toISOString(),
614
+ };
615
+
616
+ // Build searchable content with title context
617
+ // Include overlap context for better retrieval
618
+ let searchableContent = `# ${chunk.title}\n\n`;
619
+
620
+ if (contextBefore) {
621
+ searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
622
+ }
623
+
624
+ searchableContent += chunk.content;
625
+
626
+ if (contextAfter) {
627
+ searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
628
+ }
629
+
630
+ // Store chunk with full metadata
631
+ storeEntry(
632
+ db,
633
+ chunkKey,
634
+ searchableContent,
635
+ chunkMetadata,
636
+ [keyPrefix, 'chunk', `level-${chunk.level}`, chunk.title.toLowerCase().replace(/[^a-z0-9]+/g, '-')]
637
+ );
638
+
639
+ debug(` Stored chunk ${i}: ${chunk.title} (${chunk.content.length} chars, prev=${!!prevChunk}, next=${!!nextChunk})`);
640
+ }
641
+
642
+ return { docKey, status: 'indexed', chunks: chunks.length };
643
+ } catch (err) {
644
+ return { docKey, status: 'error', error: err.message, chunks: 0 };
645
+ }
646
+ }
647
+
648
+ /**
649
+ * Recursively collect all .md files under a directory.
650
+ * Skips node_modules, .git, and other non-content directories.
651
+ */
652
+ function walkMdFiles(dir) {
653
+ const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage', '.next', '.reports']);
654
+ // CLAUDE.md is loaded into context by Claude automatically — skip to avoid duplicate vectors
655
+ const SKIP_FILES = new Set(['CLAUDE.md']);
656
+ const files = [];
657
+
658
+ function walk(current) {
659
+ if (!existsSync(current)) return;
660
+ for (const entry of readdirSync(current, { withFileTypes: true })) {
661
+ if (entry.isDirectory()) {
662
+ if (!SKIP_DIRS.has(entry.name)) walk(resolve(current, entry.name));
663
+ } else if (entry.isFile() && entry.name.endsWith('.md') && !SKIP_FILES.has(entry.name)) {
664
+ files.push(resolve(current, entry.name));
665
+ }
666
+ }
667
+ }
668
+
669
+ walk(dir);
670
+ return files;
671
+ }
672
+
673
+ function indexDirectory(db, dirConfig) {
674
+ const dirPath = dirConfig.absolute ? dirConfig.path : resolve(projectRoot, dirConfig.path);
675
+ const results = [];
676
+
677
+ if (!existsSync(dirPath)) {
678
+ if (verbose) debug(`Directory not found: ${dirConfig.path}`);
679
+ return results;
680
+ }
681
+
682
+ const allMdFiles = walkMdFiles(dirPath);
683
+ const filtered = dirConfig.fileFilter
684
+ ? allMdFiles.filter(f => dirConfig.fileFilter.includes(basename(f)))
685
+ : allMdFiles;
686
+
687
+ for (const filePath of filtered) {
688
+ const result = indexFile(db, filePath, dirConfig.prefix);
689
+ results.push(result);
690
+ }
691
+
692
+ return results;
693
+ }
694
+
695
+ /**
696
+ * Remove stale entries for files that no longer exist on disk.
697
+ * Uses the set of docKeys seen during the current indexing run to determine
698
+ * which entries are stale, rather than reconstructing file paths from keys
699
+ * (which breaks for files in subdirectories).
700
+ */
701
+ function cleanStaleEntries(db, currentDocKeys) {
702
+ const docsStmt = db.prepare(
703
+ `SELECT DISTINCT key FROM memory_entries WHERE namespace = ? AND key LIKE 'doc-%'`
704
+ );
705
+ docsStmt.bind([NAMESPACE]);
706
+ const docs = [];
707
+ while (docsStmt.step()) docs.push(docsStmt.getAsObject());
708
+ docsStmt.free();
709
+
710
+ let staleCount = 0;
711
+
712
+ for (const { key } of docs) {
713
+ // If this doc key was seen during the current indexing run, it's not stale
714
+ if (currentDocKeys.has(key)) continue;
715
+
716
+ const chunkPrefix = key.replace('doc-', 'chunk-');
717
+ const countBefore = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
718
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${chunkPrefix}%`]);
719
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
720
+ const countAfter = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
721
+ const removed = countBefore - countAfter;
722
+ if (removed > 0) {
723
+ log(` Removed ${removed} stale entries for deleted file: ${key}`);
724
+ staleCount += removed;
725
+ }
726
+ }
727
+
728
+ // Also clean any orphaned entries not matching doc-/chunk- patterns
729
+ const orphanStmt = db.prepare(
730
+ `SELECT key FROM memory_entries WHERE namespace = ? AND key NOT LIKE 'doc-%' AND key NOT LIKE 'chunk-%'`
731
+ );
732
+ orphanStmt.bind([NAMESPACE]);
733
+ const orphans = [];
734
+ while (orphanStmt.step()) orphans.push(orphanStmt.getAsObject());
735
+ orphanStmt.free();
736
+ for (const { key } of orphans) {
737
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
738
+ staleCount++;
739
+ log(` Removed orphan entry: ${key}`);
740
+ }
741
+
742
+ return staleCount;
743
+ }
744
+
745
+ // Main
746
+ console.log('');
747
+ log('Indexing guidance files with FULL RAG linked segments...');
748
+ log(` Context overlap: ${overlapPercent}%`);
749
+ log(` Directories (${GUIDANCE_DIRS.length}):`);
750
+ for (const d of GUIDANCE_DIRS) {
751
+ const dirPath = d.absolute ? d.path : resolve(projectRoot, d.path);
752
+ const exists = existsSync(dirPath);
753
+ log(` ${exists ? '✓' : '✗'} ${d.absolute ? dirPath : d.path} [${d.prefix}]`);
754
+ }
755
+ console.log('');
756
+
757
+ const db = await getDb();
758
+ let docsIndexed = 0;
759
+ let chunksIndexed = 0;
760
+ let unchanged = 0;
761
+ let errors = 0;
762
+ const currentDocKeys = new Set();
763
+
764
+ if (specificFile) {
765
+ // Index single file
766
+ const filePath = resolve(projectRoot, specificFile);
767
+ if (!existsSync(filePath)) {
768
+ log(`File not found: ${specificFile}`);
769
+ process.exit(1);
770
+ }
771
+
772
+ let prefix = 'docs';
773
+ if (specificFile.includes('.claude/guidance/')) {
774
+ prefix = 'guidance';
775
+ }
776
+
777
+ const result = indexFile(db, filePath, prefix);
778
+ log(`${result.docKey}: ${result.status} (${result.chunks} chunks)`);
779
+
780
+ if (result.status === 'indexed') {
781
+ docsIndexed++;
782
+ chunksIndexed += result.chunks;
783
+ } else if (result.status === 'unchanged') {
784
+ unchanged++;
785
+ } else {
786
+ errors++;
787
+ }
788
+ } else {
789
+ // Index all directories
790
+ for (const dir of GUIDANCE_DIRS) {
791
+ log(`Scanning ${dir.path}/...`);
792
+ const results = indexDirectory(db, dir);
793
+
794
+ for (const result of results) {
795
+ if (result.status === 'indexed' || result.status === 'unchanged') {
796
+ currentDocKeys.add(result.docKey);
797
+ }
798
+ if (result.status === 'indexed') {
799
+ log(` ✅ ${result.docKey} (${result.chunks} chunks)`);
800
+ docsIndexed++;
801
+ chunksIndexed += result.chunks;
802
+ } else if (result.status === 'unchanged') {
803
+ unchanged++;
804
+ } else {
805
+ log(` ❌ ${result.docKey}: ${result.error}`);
806
+ errors++;
807
+ }
808
+ }
809
+ }
810
+ }
811
+
812
+ // Clean stale entries for deleted files (unless indexing a specific file)
813
+ let staleRemoved = 0;
814
+ if (!specificFile) {
815
+ log('Cleaning stale entries for deleted files...');
816
+ staleRemoved = cleanStaleEntries(db, currentDocKeys);
817
+ if (staleRemoved === 0) {
818
+ log(' No stale entries found');
819
+ }
820
+ }
821
+
822
+ // Write changes back to disk
823
+ if (docsIndexed > 0 || chunksIndexed > 0 || staleRemoved > 0) saveDb(db);
824
+
825
+ // Check for entries missing embeddings (e.g. prior background run failed)
826
+ let missingEmbeddings = 0;
827
+ {
828
+ const stmt = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = ? AND (embedding IS NULL OR embedding = '')`);
829
+ stmt.bind([NAMESPACE]);
830
+ if (stmt.step()) missingEmbeddings = stmt.getAsObject().cnt;
831
+ stmt.free();
832
+ }
833
+ db.close();
834
+
835
+ console.log('');
836
+ log('═══════════════════════════════════════════════════════════');
837
+ log(' FULL RAG INDEXING COMPLETE');
838
+ log('═══════════════════════════════════════════════════════════');
839
+ log(` Documents indexed: ${docsIndexed}`);
840
+ log(` Chunks created: ${chunksIndexed}`);
841
+ log(` Unchanged: ${unchanged}`);
842
+ log(` Stale removed: ${staleRemoved}`);
843
+ log(` Errors: ${errors}`);
844
+ log('');
845
+ log(' RAG Features Enabled:');
846
+ log(` • Forward/backward links (prevChunk/nextChunk)`);
847
+ log(` • Sibling awareness (all chunks from same doc)`);
848
+ log(` • Hierarchical links (h2 -> h3 parent/children)`);
849
+ log(` • Context overlap: ${overlapPercent}% (contextBefore/contextAfter)`);
850
+ log('═══════════════════════════════════════════════════════════');
851
+
852
+ // Generate embeddings for new entries or backfill missing ones
853
+ // Runs in BACKGROUND to avoid blocking startup
854
+ const needsEmbeddings = (docsIndexed > 0 || chunksIndexed > 0 || missingEmbeddings > 0);
855
+ if (!skipEmbeddings && needsEmbeddings) {
856
+ if (missingEmbeddings > 0 && docsIndexed === 0 && chunksIndexed === 0) {
857
+ log(`${missingEmbeddings} entries missing embeddings — backfilling...`);
858
+ }
859
+ console.log('');
860
+ log('Spawning embedding generation in background...');
861
+
862
+ const { spawn } = await import('child_process');
863
+
864
+ // Look for build-embeddings script in multiple locations:
865
+ // 1. Shipped with moflo (node_modules/moflo/bin/)
866
+ // 2. Project-local (.claude/scripts/)
867
+ const mofloScript = resolve(__dirname, 'build-embeddings.mjs');
868
+ const projectLocalScript = resolve(projectRoot, '.claude/scripts/build-embeddings.mjs');
869
+ const embeddingScript = existsSync(mofloScript) ? mofloScript : projectLocalScript;
870
+
871
+ if (existsSync(embeddingScript)) {
872
+ const embeddingArgs = ['--namespace', NAMESPACE];
873
+
874
+ // Create log file for background process output
875
+ const logDir = resolve(projectRoot, '.swarm/logs');
876
+ if (!existsSync(logDir)) {
877
+ mkdirSync(logDir, { recursive: true });
878
+ }
879
+ const logFile = resolve(logDir, 'embeddings.log');
880
+ const { openSync } = await import('fs');
881
+ const out = openSync(logFile, 'a');
882
+ const err = openSync(logFile, 'a');
883
+
884
+ // Spawn in background - don't wait for completion
885
+ const proc = spawn('node', [embeddingScript, ...embeddingArgs], {
886
+ stdio: ['ignore', out, err],
887
+ cwd: projectRoot,
888
+ detached: true,
889
+ windowsHide: true // Suppress command windows on Windows
890
+ });
891
+ proc.unref(); // Allow parent to exit independently
892
+
893
+ log(`Background embedding started (PID: ${proc.pid})`);
894
+ log(`Log file: .swarm/logs/embeddings.log`);
895
+ } else {
896
+ log('⚠️ Embedding script not found, skipping embedding generation');
897
+ }
898
+ } else if (skipEmbeddings) {
899
+ log('Skipping embedding generation (--no-embeddings)');
900
+ } else {
901
+ log('No new content indexed, skipping embedding generation');
902
+ }
903
+
904
+ if (errors > 0) {
905
+ process.exit(1);
906
+ }