moflo 4.8.21 → 4.8.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/.claude/agents/browser/browser-agent.yaml +182 -182
  2. package/.claude/agents/core/coder.md +265 -265
  3. package/.claude/agents/core/planner.md +167 -167
  4. package/.claude/agents/core/researcher.md +189 -189
  5. package/.claude/agents/core/reviewer.md +325 -325
  6. package/.claude/agents/core/tester.md +318 -318
  7. package/.claude/agents/database-specialist.yaml +21 -21
  8. package/.claude/agents/dual-mode/codex-coordinator.md +224 -224
  9. package/.claude/agents/dual-mode/codex-worker.md +211 -211
  10. package/.claude/agents/dual-mode/dual-orchestrator.md +291 -291
  11. package/.claude/agents/github/code-review-swarm.md +537 -537
  12. package/.claude/agents/github/github-modes.md +172 -172
  13. package/.claude/agents/github/issue-tracker.md +318 -318
  14. package/.claude/agents/github/multi-repo-swarm.md +552 -552
  15. package/.claude/agents/github/pr-manager.md +190 -190
  16. package/.claude/agents/github/project-board-sync.md +508 -508
  17. package/.claude/agents/github/release-manager.md +366 -366
  18. package/.claude/agents/github/release-swarm.md +582 -582
  19. package/.claude/agents/github/repo-architect.md +397 -397
  20. package/.claude/agents/github/swarm-issue.md +572 -572
  21. package/.claude/agents/github/swarm-pr.md +427 -427
  22. package/.claude/agents/github/sync-coordinator.md +451 -451
  23. package/.claude/agents/github/workflow-automation.md +634 -634
  24. package/.claude/agents/goal/code-goal-planner.md +445 -445
  25. package/.claude/agents/hive-mind/collective-intelligence-coordinator.md +129 -129
  26. package/.claude/agents/hive-mind/queen-coordinator.md +202 -202
  27. package/.claude/agents/hive-mind/scout-explorer.md +241 -241
  28. package/.claude/agents/hive-mind/swarm-memory-manager.md +192 -192
  29. package/.claude/agents/hive-mind/worker-specialist.md +216 -216
  30. package/.claude/agents/index.yaml +17 -17
  31. package/.claude/agents/neural/safla-neural.md +73 -73
  32. package/.claude/agents/project-coordinator.yaml +15 -15
  33. package/.claude/agents/python-specialist.yaml +21 -21
  34. package/.claude/agents/reasoning/goal-planner.md +72 -72
  35. package/.claude/agents/security-auditor.yaml +20 -20
  36. package/.claude/agents/swarm/adaptive-coordinator.md +395 -395
  37. package/.claude/agents/swarm/hierarchical-coordinator.md +326 -326
  38. package/.claude/agents/swarm/mesh-coordinator.md +391 -391
  39. package/.claude/agents/templates/migration-plan.md +745 -745
  40. package/.claude/agents/typescript-specialist.yaml +21 -21
  41. package/.claude/checkpoints/1767754460.json +8 -8
  42. package/.claude/commands/agents/agent-spawning.md +28 -28
  43. package/.claude/commands/github/github-modes.md +146 -146
  44. package/.claude/commands/github/github-swarm.md +121 -121
  45. package/.claude/commands/github/issue-tracker.md +291 -291
  46. package/.claude/commands/github/pr-manager.md +169 -169
  47. package/.claude/commands/github/release-manager.md +337 -337
  48. package/.claude/commands/github/repo-architect.md +366 -366
  49. package/.claude/commands/github/sync-coordinator.md +300 -300
  50. package/.claude/commands/memory/neural.md +47 -47
  51. package/.claude/commands/sparc/analyzer.md +51 -51
  52. package/.claude/commands/sparc/architect.md +53 -53
  53. package/.claude/commands/sparc/ask.md +97 -97
  54. package/.claude/commands/sparc/batch-executor.md +54 -54
  55. package/.claude/commands/sparc/code.md +89 -89
  56. package/.claude/commands/sparc/coder.md +54 -54
  57. package/.claude/commands/sparc/debug.md +83 -83
  58. package/.claude/commands/sparc/debugger.md +54 -54
  59. package/.claude/commands/sparc/designer.md +53 -53
  60. package/.claude/commands/sparc/devops.md +109 -109
  61. package/.claude/commands/sparc/docs-writer.md +80 -80
  62. package/.claude/commands/sparc/documenter.md +54 -54
  63. package/.claude/commands/sparc/innovator.md +54 -54
  64. package/.claude/commands/sparc/integration.md +83 -83
  65. package/.claude/commands/sparc/mcp.md +117 -117
  66. package/.claude/commands/sparc/memory-manager.md +54 -54
  67. package/.claude/commands/sparc/optimizer.md +54 -54
  68. package/.claude/commands/sparc/orchestrator.md +131 -131
  69. package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -83
  70. package/.claude/commands/sparc/refinement-optimization-mode.md +83 -83
  71. package/.claude/commands/sparc/researcher.md +54 -54
  72. package/.claude/commands/sparc/reviewer.md +54 -54
  73. package/.claude/commands/sparc/security-review.md +80 -80
  74. package/.claude/commands/sparc/sparc-modes.md +174 -174
  75. package/.claude/commands/sparc/sparc.md +111 -111
  76. package/.claude/commands/sparc/spec-pseudocode.md +80 -80
  77. package/.claude/commands/sparc/supabase-admin.md +348 -348
  78. package/.claude/commands/sparc/swarm-coordinator.md +54 -54
  79. package/.claude/commands/sparc/tdd.md +54 -54
  80. package/.claude/commands/sparc/tester.md +54 -54
  81. package/.claude/commands/sparc/tutorial.md +79 -79
  82. package/.claude/commands/sparc/workflow-manager.md +54 -54
  83. package/.claude/commands/sparc.md +166 -166
  84. package/.claude/commands/swarm/analysis.md +95 -95
  85. package/.claude/commands/swarm/development.md +96 -96
  86. package/.claude/commands/swarm/examples.md +168 -168
  87. package/.claude/commands/swarm/maintenance.md +102 -102
  88. package/.claude/commands/swarm/optimization.md +117 -117
  89. package/.claude/commands/swarm/research.md +136 -136
  90. package/.claude/commands/swarm/testing.md +131 -131
  91. package/.claude/commands/workflows/development.md +77 -77
  92. package/.claude/commands/workflows/research.md +62 -62
  93. package/.claude/guidance/moflo-bootstrap.md +126 -126
  94. package/.claude/guidance/shipped/agent-bootstrap.md +126 -126
  95. package/.claude/guidance/shipped/guidance-memory-strategy.md +262 -262
  96. package/.claude/guidance/shipped/memory-strategy.md +204 -204
  97. package/.claude/guidance/shipped/moflo.md +668 -653
  98. package/.claude/guidance/shipped/task-swarm-integration.md +441 -441
  99. package/.claude/helpers/intelligence.cjs +207 -207
  100. package/.claude/helpers/statusline.cjs +851 -851
  101. package/.claude/settings.local.json +18 -0
  102. package/.claude/skills/fl/SKILL.md +583 -583
  103. package/.claude/skills/flo/SKILL.md +583 -583
  104. package/.claude/skills/github-code-review/SKILL.md +1140 -1140
  105. package/.claude/skills/github-multi-repo/SKILL.md +874 -874
  106. package/.claude/skills/github-project-management/SKILL.md +1277 -1277
  107. package/.claude/skills/github-release-management/SKILL.md +1081 -1081
  108. package/.claude/skills/github-workflow-automation/SKILL.md +1065 -1065
  109. package/.claude/skills/hive-mind-advanced/SKILL.md +712 -712
  110. package/.claude/skills/hooks-automation/SKILL.md +1201 -1201
  111. package/.claude/skills/performance-analysis/SKILL.md +563 -563
  112. package/.claude/skills/sparc-methodology/SKILL.md +1115 -1115
  113. package/.claude/skills/swarm-advanced/SKILL.md +973 -973
  114. package/.claude/workflow-state.json +4 -4
  115. package/LICENSE +21 -21
  116. package/README.md +685 -685
  117. package/bin/cli.js +0 -0
  118. package/bin/gate-hook.mjs +50 -50
  119. package/bin/gate.cjs +138 -138
  120. package/bin/generate-code-map.mjs +775 -775
  121. package/bin/hook-handler.cjs +83 -83
  122. package/bin/hooks.mjs +656 -656
  123. package/bin/index-guidance.mjs +892 -892
  124. package/bin/index-tests.mjs +709 -709
  125. package/bin/lib/process-manager.mjs +243 -243
  126. package/bin/lib/registry-cleanup.cjs +41 -41
  127. package/bin/prompt-hook.mjs +72 -72
  128. package/bin/semantic-search.mjs +472 -472
  129. package/bin/session-start-launcher.mjs +238 -238
  130. package/bin/setup-project.mjs +250 -250
  131. package/package.json +123 -123
  132. package/src/@claude-flow/cli/README.md +452 -452
  133. package/src/@claude-flow/cli/bin/cli.js +180 -180
  134. package/src/@claude-flow/cli/bin/preinstall.cjs +2 -2
  135. package/src/@claude-flow/cli/dist/src/commands/completions.js +409 -409
  136. package/src/@claude-flow/cli/dist/src/commands/doctor.js +5 -1
  137. package/src/@claude-flow/cli/dist/src/commands/embeddings.js +25 -25
  138. package/src/@claude-flow/cli/dist/src/commands/github.js +61 -61
  139. package/src/@claude-flow/cli/dist/src/commands/hive-mind.js +90 -90
  140. package/src/@claude-flow/cli/dist/src/commands/hooks.js +9 -9
  141. package/src/@claude-flow/cli/dist/src/commands/ruvector/import.js +14 -14
  142. package/src/@claude-flow/cli/dist/src/commands/ruvector/setup.js +624 -624
  143. package/src/@claude-flow/cli/dist/src/config/moflo-config.d.ts +3 -0
  144. package/src/@claude-flow/cli/dist/src/config/moflo-config.js +101 -91
  145. package/src/@claude-flow/cli/dist/src/index.d.ts +5 -0
  146. package/src/@claude-flow/cli/dist/src/index.js +44 -0
  147. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +29 -29
  148. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +43 -43
  149. package/src/@claude-flow/cli/dist/src/init/executor.js +453 -453
  150. package/src/@claude-flow/cli/dist/src/init/helpers-generator.js +482 -482
  151. package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +30 -30
  152. package/src/@claude-flow/cli/dist/src/init/moflo-init.js +140 -140
  153. package/src/@claude-flow/cli/dist/src/init/statusline-generator.js +876 -876
  154. package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +371 -371
  155. package/src/@claude-flow/cli/dist/src/runtime/headless.js +28 -28
  156. package/src/@claude-flow/cli/dist/src/services/container-worker-pool.d.ts +197 -0
  157. package/src/@claude-flow/cli/dist/src/services/container-worker-pool.js +584 -0
  158. package/src/@claude-flow/cli/dist/src/services/daemon-lock.d.ts +14 -0
  159. package/src/@claude-flow/cli/dist/src/services/daemon-lock.js +1 -1
  160. package/src/@claude-flow/cli/dist/src/services/headless-worker-executor.js +84 -84
  161. package/src/@claude-flow/cli/package.json +1 -1
  162. package/src/@claude-flow/guidance/README.md +1195 -1195
  163. package/src/@claude-flow/guidance/package.json +198 -198
  164. package/src/@claude-flow/memory/README.md +587 -587
  165. package/src/@claude-flow/memory/dist/agent-memory-scope.test.js +4 -7
  166. package/src/@claude-flow/memory/dist/agentdb-backend.d.ts +2 -0
  167. package/src/@claude-flow/memory/dist/agentdb-backend.js +28 -26
  168. package/src/@claude-flow/memory/dist/auto-memory-bridge.test.js +36 -39
  169. package/src/@claude-flow/memory/dist/benchmark.test.js +1 -1
  170. package/src/@claude-flow/memory/dist/controller-registry.test.js +43 -0
  171. package/src/@claude-flow/memory/dist/database-provider.d.ts +2 -2
  172. package/src/@claude-flow/memory/dist/database-provider.js +6 -3
  173. package/src/@claude-flow/memory/dist/database-provider.test.js +1 -3
  174. package/src/@claude-flow/memory/dist/hybrid-backend.d.ts +245 -0
  175. package/src/@claude-flow/memory/dist/hybrid-backend.js +569 -0
  176. package/src/@claude-flow/memory/dist/hybrid-backend.test.d.ts +8 -0
  177. package/src/@claude-flow/memory/dist/hybrid-backend.test.js +320 -0
  178. package/src/@claude-flow/memory/dist/index.d.ts +3 -0
  179. package/src/@claude-flow/memory/dist/index.js +3 -0
  180. package/src/@claude-flow/memory/dist/sqlite-backend.d.ts +121 -0
  181. package/src/@claude-flow/memory/dist/sqlite-backend.js +572 -0
  182. package/src/@claude-flow/memory/dist/sqljs-backend.d.ts +4 -3
  183. package/src/@claude-flow/memory/dist/sqljs-backend.js +31 -30
  184. package/src/@claude-flow/memory/package.json +44 -44
  185. package/src/@claude-flow/shared/README.md +323 -323
  186. package/src/@claude-flow/shared/dist/core/config/defaults.js +1 -1
  187. package/src/@claude-flow/shared/dist/core/config/loader.js +1 -1
  188. package/src/@claude-flow/shared/dist/core/config/schema.js +1 -1
  189. package/src/@claude-flow/shared/dist/events/event-store.js +34 -50
  190. package/src/@claude-flow/shared/dist/events/event-store.test.js +4 -8
  191. package/src/@claude-flow/shared/dist/hooks/executor.js +4 -7
  192. package/src/@claude-flow/shared/dist/hooks/safety/file-organization.js +1 -1
  193. package/src/@claude-flow/shared/dist/hooks/safety/git-commit.js +3 -3
  194. package/src/@claude-flow/shared/dist/hooks/verify-exports.test.js +6 -6
  195. package/src/@claude-flow/shared/dist/utils/secure-logger.js +1 -1
  196. package/src/README.md +493 -493
  197. package/src/@claude-flow/guidance/dist/adversarial.d.ts +0 -284
  198. package/src/@claude-flow/guidance/dist/adversarial.js +0 -572
  199. package/src/@claude-flow/guidance/dist/analyzer.d.ts +0 -530
  200. package/src/@claude-flow/guidance/dist/analyzer.js +0 -2518
  201. package/src/@claude-flow/guidance/dist/artifacts.d.ts +0 -283
  202. package/src/@claude-flow/guidance/dist/artifacts.js +0 -356
  203. package/src/@claude-flow/guidance/dist/authority.d.ts +0 -290
  204. package/src/@claude-flow/guidance/dist/authority.js +0 -558
  205. package/src/@claude-flow/guidance/dist/capabilities.d.ts +0 -209
  206. package/src/@claude-flow/guidance/dist/capabilities.js +0 -485
  207. package/src/@claude-flow/guidance/dist/coherence.d.ts +0 -233
  208. package/src/@claude-flow/guidance/dist/coherence.js +0 -372
  209. package/src/@claude-flow/guidance/dist/compiler.d.ts +0 -87
  210. package/src/@claude-flow/guidance/dist/compiler.js +0 -419
  211. package/src/@claude-flow/guidance/dist/conformance-kit.d.ts +0 -225
  212. package/src/@claude-flow/guidance/dist/conformance-kit.js +0 -629
  213. package/src/@claude-flow/guidance/dist/continue-gate.d.ts +0 -214
  214. package/src/@claude-flow/guidance/dist/continue-gate.js +0 -353
  215. package/src/@claude-flow/guidance/dist/crypto-utils.d.ts +0 -17
  216. package/src/@claude-flow/guidance/dist/crypto-utils.js +0 -24
  217. package/src/@claude-flow/guidance/dist/evolution.d.ts +0 -282
  218. package/src/@claude-flow/guidance/dist/evolution.js +0 -500
  219. package/src/@claude-flow/guidance/dist/gates.d.ts +0 -79
  220. package/src/@claude-flow/guidance/dist/gates.js +0 -302
  221. package/src/@claude-flow/guidance/dist/gateway.d.ts +0 -206
  222. package/src/@claude-flow/guidance/dist/gateway.js +0 -452
  223. package/src/@claude-flow/guidance/dist/generators.d.ts +0 -153
  224. package/src/@claude-flow/guidance/dist/generators.js +0 -682
  225. package/src/@claude-flow/guidance/dist/headless.d.ts +0 -177
  226. package/src/@claude-flow/guidance/dist/headless.js +0 -342
  227. package/src/@claude-flow/guidance/dist/hooks.d.ts +0 -109
  228. package/src/@claude-flow/guidance/dist/hooks.js +0 -347
  229. package/src/@claude-flow/guidance/dist/index.d.ts +0 -205
  230. package/src/@claude-flow/guidance/dist/index.js +0 -321
  231. package/src/@claude-flow/guidance/dist/ledger.d.ts +0 -162
  232. package/src/@claude-flow/guidance/dist/ledger.js +0 -375
  233. package/src/@claude-flow/guidance/dist/manifest-validator.d.ts +0 -289
  234. package/src/@claude-flow/guidance/dist/manifest-validator.js +0 -838
  235. package/src/@claude-flow/guidance/dist/memory-gate.d.ts +0 -222
  236. package/src/@claude-flow/guidance/dist/memory-gate.js +0 -382
  237. package/src/@claude-flow/guidance/dist/meta-governance.d.ts +0 -265
  238. package/src/@claude-flow/guidance/dist/meta-governance.js +0 -348
  239. package/src/@claude-flow/guidance/dist/optimizer.d.ts +0 -104
  240. package/src/@claude-flow/guidance/dist/optimizer.js +0 -329
  241. package/src/@claude-flow/guidance/dist/persistence.d.ts +0 -189
  242. package/src/@claude-flow/guidance/dist/persistence.js +0 -464
  243. package/src/@claude-flow/guidance/dist/proof.d.ts +0 -185
  244. package/src/@claude-flow/guidance/dist/proof.js +0 -238
  245. package/src/@claude-flow/guidance/dist/retriever.d.ts +0 -116
  246. package/src/@claude-flow/guidance/dist/retriever.js +0 -394
  247. package/src/@claude-flow/guidance/dist/ruvbot-integration.d.ts +0 -370
  248. package/src/@claude-flow/guidance/dist/ruvbot-integration.js +0 -738
  249. package/src/@claude-flow/guidance/dist/temporal.d.ts +0 -426
  250. package/src/@claude-flow/guidance/dist/temporal.js +0 -658
  251. package/src/@claude-flow/guidance/dist/trust.d.ts +0 -283
  252. package/src/@claude-flow/guidance/dist/trust.js +0 -473
  253. package/src/@claude-flow/guidance/dist/truth-anchors.d.ts +0 -276
  254. package/src/@claude-flow/guidance/dist/truth-anchors.js +0 -488
  255. package/src/@claude-flow/guidance/dist/types.d.ts +0 -378
  256. package/src/@claude-flow/guidance/dist/types.js +0 -10
  257. package/src/@claude-flow/guidance/dist/uncertainty.d.ts +0 -372
  258. package/src/@claude-flow/guidance/dist/uncertainty.js +0 -619
  259. package/src/@claude-flow/guidance/dist/wasm-kernel.d.ts +0 -48
  260. package/src/@claude-flow/guidance/dist/wasm-kernel.js +0 -158
@@ -1,893 +1,893 @@
1
1
  #!/usr/bin/env node
2
- /**
3
- * Index guidance files into claude-flow memory with full RAG linked segments
4
- *
5
- * Strategy:
6
- * - Full documents stored as `doc-{name}` for complete retrieval
7
- * - Semantic chunks stored as `chunk-{name}-{n}` for precise search
8
- * - FULL RAG LINKING:
9
- * - parentDoc: link to full document
10
- * - prevChunk/nextChunk: forward/backward navigation
11
- * - siblings: all chunk keys from same document
12
- * - children: sub-chunks for hierarchical headers (h2 -> h3)
13
- * - contextBefore/contextAfter: overlapping text for context continuity
14
- * - Chunking based on markdown headers (## and ###) for natural boundaries
15
- * - After indexing, generates embeddings for semantic search (HNSW)
16
- *
17
- * Usage:
18
- * node node_modules/moflo/bin/index-guidance.mjs # Index all + generate embeddings
19
- * npx flo-index --force # Force reindex all
20
- * npx flo-index --file X # Index specific file
21
- * npx flo-index --no-embeddings # Skip embedding generation
22
- * npx flo-index --overlap 20 # Set context overlap % (default: 15)
23
- */
24
-
25
- import { existsSync, readdirSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
26
- import { resolve, dirname, basename, extname } from 'path';
27
- import { fileURLToPath } from 'url';
28
- import { mofloResolveURL } from './lib/moflo-resolve.mjs';
29
- const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
30
-
31
-
32
- const __dirname = dirname(fileURLToPath(import.meta.url));
33
-
34
- function findProjectRoot() {
35
- let dir = process.cwd();
36
- const root = resolve(dir, '/');
37
- while (dir !== root) {
38
- if (existsSync(resolve(dir, 'package.json'))) return dir;
39
- dir = dirname(dir);
40
- }
41
- return process.cwd();
42
- }
43
-
44
- const projectRoot = findProjectRoot();
45
-
46
- // Locate the moflo package root (for bundled guidance that ships with moflo)
47
- const mofloRoot = resolve(__dirname, '..');
48
-
49
- const NAMESPACE = 'guidance';
50
- const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
51
-
52
- // ============================================================================
53
- // Load guidance directories from moflo.yaml, falling back to defaults
54
- // ============================================================================
55
-
56
- function loadGuidanceDirs() {
57
- const dirs = [];
58
-
59
- // 1. Read moflo.yaml / moflo.config.json for user-configured directories
60
- let configDirs = null;
61
- const yamlPath = resolve(projectRoot, 'moflo.yaml');
62
- const jsonPath = resolve(projectRoot, 'moflo.config.json');
63
-
64
- if (existsSync(yamlPath)) {
65
- try {
66
- const content = readFileSync(yamlPath, 'utf-8');
67
- // Simple YAML array extraction — avoids needing js-yaml at runtime
68
- // Matches: guidance:\n directories:\n - .claude/guidance\n - docs/guides
69
- const guidanceBlock = content.match(/guidance:\s*\n\s+directories:\s*\n((?:\s+-\s+.+\n?)+)/);
70
- if (guidanceBlock) {
71
- const items = guidanceBlock[1].match(/-\s+(.+)/g);
72
- if (items && items.length > 0) {
73
- configDirs = items.map(item => item.replace(/^-\s+/, '').trim());
74
- }
75
- }
76
- } catch { /* ignore parse errors, fall through to defaults */ }
77
- } else if (existsSync(jsonPath)) {
78
- try {
79
- const raw = JSON.parse(readFileSync(jsonPath, 'utf-8'));
80
- if (raw.guidance?.directories && Array.isArray(raw.guidance.directories)) {
81
- configDirs = raw.guidance.directories;
82
- }
83
- } catch { /* ignore parse errors */ }
84
- }
85
-
86
- // Use config dirs or fall back to defaults
87
- // Each directory gets a unique prefix derived from its path to avoid key collisions
88
- // when multiple directories contain files with the same name.
89
- const userDirs = configDirs || ['.claude/guidance', 'docs/guides'];
90
- for (const d of userDirs) {
91
- const prefix = d.replace(/\\/g, '/')
92
- .replace(/^\.claude\//, '')
93
- .replace(/^back-office\/api\/\.claude\//, 'bo-api-')
94
- .replace(/^back-office\/ui\/\.claude\//, 'bo-ui-')
95
- .replace(/[^a-zA-Z0-9-]/g, '-')
96
- .replace(/-+/g, '-')
97
- .replace(/^-|-$/g, '') || 'guidance';
98
- dirs.push({ path: d, prefix });
99
- }
100
-
101
- // 2. Include moflo's own bundled guidance (ships with the package)
102
- // Only when running inside a consumer project (not moflo itself)
103
- // Shipped guidance lives in .claude/guidance/shipped/ — internal/ is excluded from npm
104
- const bundledShippedDir = resolve(mofloRoot, '.claude/guidance/shipped');
105
- const bundledGuidanceDir = existsSync(bundledShippedDir)
106
- ? bundledShippedDir
107
- : resolve(mofloRoot, '.claude/guidance');
108
- const projectGuidanceDir = resolve(projectRoot, '.claude/guidance');
109
- if (
110
- existsSync(bundledGuidanceDir) &&
111
- resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir) &&
112
- resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir, 'shipped')
113
- ) {
114
- dirs.push({ path: bundledGuidanceDir, prefix: 'moflo-bundled', absolute: true });
115
- }
116
-
117
- // 3. CLAUDE.md files are NOT indexed — Claude loads them into context automatically.
118
- // Indexing them wastes vectors and creates duplicate keys across subprojects.
119
-
120
- return dirs;
121
- }
122
-
123
- const GUIDANCE_DIRS = loadGuidanceDirs();
124
-
125
- // Chunking config - optimized for Claude's retrieval
126
- const MIN_CHUNK_SIZE = 50; // Lower minimum to avoid mega-chunks
127
- const MAX_CHUNK_SIZE = 4000; // Larger chunks for code-heavy docs (fits context better)
128
- const FORCE_CHUNK_THRESHOLD = 6000; // Force paragraph-split if file > this and < 3 chunks
129
- const DEFAULT_OVERLAP_PERCENT = 20; // Increased context overlap for better continuity
130
-
131
- // Parse args
132
- const args = process.argv.slice(2);
133
- const force = args.includes('--force');
134
- const specificFile = args.includes('--file') ? args[args.indexOf('--file') + 1] : null;
135
- const verbose = args.includes('--verbose') || args.includes('-v');
136
- const skipEmbeddings = args.includes('--no-embeddings');
137
- const overlapPercent = args.includes('--overlap')
138
- ? parseInt(args[args.indexOf('--overlap') + 1], 10) || DEFAULT_OVERLAP_PERCENT
139
- : DEFAULT_OVERLAP_PERCENT;
140
-
141
- function log(msg) {
142
- console.log(`[index-guidance] ${msg}`);
143
- }
144
-
145
- function debug(msg) {
146
- if (verbose) console.log(`[index-guidance] ${msg}`);
147
- }
148
-
149
- function ensureDbDir() {
150
- const dir = dirname(DB_PATH);
151
- if (!existsSync(dir)) {
152
- mkdirSync(dir, { recursive: true });
153
- }
154
- }
155
-
156
- async function getDb() {
157
- ensureDbDir();
158
- const SQL = await initSqlJs();
159
- let db;
160
- if (existsSync(DB_PATH)) {
161
- const buffer = readFileSync(DB_PATH);
162
- db = new SQL.Database(buffer);
163
- } else {
164
- db = new SQL.Database();
165
- }
166
-
167
- // Ensure table exists with unique constraint
168
- db.run(`
169
- CREATE TABLE IF NOT EXISTS memory_entries (
170
- id TEXT PRIMARY KEY,
171
- key TEXT NOT NULL,
172
- namespace TEXT DEFAULT 'default',
173
- content TEXT NOT NULL,
174
- type TEXT DEFAULT 'semantic',
175
- embedding TEXT,
176
- embedding_model TEXT DEFAULT 'local',
177
- embedding_dimensions INTEGER,
178
- tags TEXT,
179
- metadata TEXT,
180
- owner_id TEXT,
181
- created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
182
- updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
183
- expires_at INTEGER,
184
- last_accessed_at INTEGER,
185
- access_count INTEGER DEFAULT 0,
186
- status TEXT DEFAULT 'active',
187
- UNIQUE(namespace, key)
188
- )
189
- `);
190
-
191
- db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
192
- db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
193
-
194
- return db;
195
- }
196
-
197
- function saveDb(db) {
198
- const data = db.export();
199
- writeFileSync(DB_PATH, Buffer.from(data));
200
- }
201
-
202
- function generateId() {
203
- return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
204
- }
205
-
206
- function hashContent(content) {
207
- let hash = 0;
208
- for (let i = 0; i < content.length; i++) {
209
- const char = content.charCodeAt(i);
210
- hash = ((hash << 5) - hash) + char;
211
- hash = hash & hash;
212
- }
213
- return hash.toString(16);
214
- }
215
-
216
- function storeEntry(db, key, content, metadata = {}, tags = []) {
217
- const now = Date.now();
218
- const id = generateId();
219
- const metaJson = JSON.stringify(metadata);
220
- const tagsJson = JSON.stringify(tags);
221
-
222
- db.run(`
223
- INSERT OR REPLACE INTO memory_entries
224
- (id, key, namespace, content, metadata, tags, created_at, updated_at, status)
225
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
226
- `, [id, key, NAMESPACE, content, metaJson, tagsJson, now, now]);
227
-
228
- return true;
229
- }
230
-
231
- function deleteByPrefix(db, prefix) {
232
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${prefix}%`]);
233
- }
234
-
235
- function getEntryHash(db, key) {
236
- const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
237
- stmt.bind([key, NAMESPACE]);
238
- const entry = stmt.step() ? stmt.getAsObject() : null;
239
- stmt.free();
240
- if (entry?.metadata) {
241
- try {
242
- const meta = JSON.parse(entry.metadata);
243
- return meta.contentHash;
244
- } catch { /* ignore */ }
245
- }
246
- return null;
247
- }
248
-
249
- /**
250
- * Extract overlapping context from adjacent text
251
- * @param {string} text - The text to extract from
252
- * @param {number} percent - Percentage of text to extract
253
- * @param {string} position - 'start' or 'end'
254
- * @returns {string} - The extracted context
255
- */
256
- function extractOverlapContext(text, percent, position) {
257
- if (!text || percent <= 0) return '';
258
-
259
- const targetLength = Math.floor(text.length * (percent / 100));
260
- if (targetLength < 20) return ''; // Too short to be useful
261
-
262
- if (position === 'start') {
263
- // Get first N% of text, try to break at sentence/paragraph
264
- let end = targetLength;
265
- const nextPara = text.indexOf('\n\n', targetLength - 50);
266
- const nextSentence = text.indexOf('. ', targetLength - 30);
267
-
268
- if (nextPara > 0 && nextPara < targetLength + 100) {
269
- end = nextPara;
270
- } else if (nextSentence > 0 && nextSentence < targetLength + 50) {
271
- end = nextSentence + 1;
272
- }
273
-
274
- return text.substring(0, end).trim();
275
- } else {
276
- // Get last N% of text, try to break at sentence/paragraph
277
- let start = text.length - targetLength;
278
- const prevPara = text.lastIndexOf('\n\n', start + 50);
279
- const prevSentence = text.lastIndexOf('. ', start + 30);
280
-
281
- if (prevPara > 0 && prevPara > start - 100) {
282
- start = prevPara + 2;
283
- } else if (prevSentence > 0 && prevSentence > start - 50) {
284
- start = prevSentence + 2;
285
- }
286
-
287
- return text.substring(start).trim();
288
- }
289
- }
290
-
291
- /**
292
- * Split markdown content into semantic chunks based on headers
293
- * Returns array of { title, content, level, headerLine }
294
- */
295
- function chunkMarkdown(content, fileName) {
296
- const lines = content.split('\n');
297
- const chunks = [];
298
- let currentChunk = { title: fileName, content: [], level: 0, headerLine: 0 };
299
-
300
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
301
- // Strip CRLF carriage returns for Windows compatibility
302
- const line = lines[lineNum].replace(/\r$/, '');
303
-
304
- // Check for headers (## and ###)
305
- const h2Match = line.match(/^## (.+)$/);
306
- const h3Match = line.match(/^### (.+)$/);
307
-
308
- if (h2Match || h3Match) {
309
- // Save current chunk if it has content
310
- if (currentChunk.content.length > 0) {
311
- const chunkContent = currentChunk.content.join('\n').trim();
312
- if (chunkContent.length >= MIN_CHUNK_SIZE) {
313
- chunks.push({
314
- title: currentChunk.title,
315
- content: chunkContent,
316
- level: currentChunk.level,
317
- headerLine: currentChunk.headerLine
318
- });
319
- }
320
- }
321
-
322
- // Start new chunk
323
- currentChunk = {
324
- title: h2Match ? h2Match[1] : h3Match[1],
325
- content: [line],
326
- level: h2Match ? 2 : 3,
327
- headerLine: lineNum
328
- };
329
- } else {
330
- currentChunk.content.push(line);
331
- }
332
- }
333
-
334
- // Don't forget the last chunk
335
- if (currentChunk.content.length > 0) {
336
- const chunkContent = currentChunk.content.join('\n').trim();
337
- if (chunkContent.length >= MIN_CHUNK_SIZE) {
338
- chunks.push({
339
- title: currentChunk.title,
340
- content: chunkContent,
341
- level: currentChunk.level,
342
- headerLine: currentChunk.headerLine
343
- });
344
- }
345
- }
346
-
347
- // Handle chunks that are too large - split by paragraphs
348
- const finalChunks = [];
349
- for (const chunk of chunks) {
350
- if (chunk.content.length > MAX_CHUNK_SIZE) {
351
- const paragraphs = chunk.content.split(/\n\n+/);
352
- let currentPart = [];
353
- let currentLength = 0;
354
- let partNum = 1;
355
-
356
- for (const para of paragraphs) {
357
- if (currentLength + para.length > MAX_CHUNK_SIZE && currentPart.length > 0) {
358
- finalChunks.push({
359
- title: `${chunk.title} (part ${partNum})`,
360
- content: currentPart.join('\n\n'),
361
- level: chunk.level,
362
- headerLine: chunk.headerLine,
363
- isPart: true,
364
- partNum
365
- });
366
- currentPart = [para];
367
- currentLength = para.length;
368
- partNum++;
369
- } else {
370
- currentPart.push(para);
371
- currentLength += para.length;
372
- }
373
- }
374
-
375
- if (currentPart.length > 0) {
376
- finalChunks.push({
377
- title: partNum > 1 ? `${chunk.title} (part ${partNum})` : chunk.title,
378
- content: currentPart.join('\n\n'),
379
- level: chunk.level,
380
- headerLine: chunk.headerLine,
381
- isPart: partNum > 1,
382
- partNum: partNum > 1 ? partNum : undefined
383
- });
384
- }
385
- } else {
386
- finalChunks.push(chunk);
387
- }
388
- }
389
-
390
- // FORCE CHUNKING: If file is large but resulted in few chunks, split by sections
391
- const totalContent = finalChunks.reduce((acc, c) => acc + c.content.length, 0);
392
- if (totalContent > FORCE_CHUNK_THRESHOLD && finalChunks.length < 3) {
393
- debug(` Force-chunking: ${totalContent} bytes in ${finalChunks.length} chunks - splitting by sections`);
394
- const allContent = finalChunks.map(c => c.content).join('\n\n');
395
-
396
- // Split on --- horizontal rules first, then on ## headers, then on paragraphs
397
- const TARGET_CHUNK_SIZE = 2500;
398
- const rawSections = allContent.split(/\n---+\n/);
399
- let sections = [];
400
-
401
- for (const raw of rawSections) {
402
- // Further split on ## headers if section is too large
403
- if (raw.length > TARGET_CHUNK_SIZE) {
404
- const headerSplit = raw.split(/\n(?=## )/);
405
- for (const hSect of headerSplit) {
406
- if (hSect.length > TARGET_CHUNK_SIZE) {
407
- // Split very long sections on single newlines as last resort
408
- const lines = hSect.split('\n');
409
- let chunk = '';
410
- for (const line of lines) {
411
- if (chunk.length + line.length > TARGET_CHUNK_SIZE && chunk.length > 100) {
412
- sections.push(chunk.trim());
413
- chunk = line;
414
- } else {
415
- chunk += (chunk ? '\n' : '') + line;
416
- }
417
- }
418
- if (chunk.trim().length > 30) sections.push(chunk.trim());
419
- } else if (hSect.trim().length > 30) {
420
- sections.push(hSect.trim());
421
- }
422
- }
423
- } else if (raw.trim().length > 30) {
424
- sections.push(raw.trim());
425
- }
426
- }
427
-
428
- // Now group sections into chunks
429
- const forcedChunks = [];
430
- let currentGroup = [];
431
- let currentLength = 0;
432
- let groupNum = 1;
433
-
434
- const flushGroup = () => {
435
- if (currentGroup.length === 0) return;
436
- const firstLine = currentGroup[0].split('\n')[0].trim();
437
- const title = firstLine.startsWith('#')
438
- ? firstLine.replace(/^#+\s*/, '').slice(0, 60)
439
- : `${fileName} Section ${groupNum}`;
440
-
441
- forcedChunks.push({
442
- title,
443
- content: currentGroup.join('\n\n'),
444
- level: 2,
445
- headerLine: 0,
446
- isForced: true,
447
- forceNum: groupNum
448
- });
449
- groupNum++;
450
- currentGroup = [];
451
- currentLength = 0;
452
- };
453
-
454
- for (const section of sections) {
455
- if (currentLength + section.length > TARGET_CHUNK_SIZE && currentGroup.length > 0) {
456
- flushGroup();
457
- }
458
- currentGroup.push(section);
459
- currentLength += section.length;
460
- }
461
- flushGroup();
462
-
463
- // Always use force-chunked results if we got multiple chunks
464
- if (forcedChunks.length >= 2) {
465
- debug(` Force-chunking produced ${forcedChunks.length} chunks (was ${finalChunks.length})`);
466
- return forcedChunks;
467
- }
468
- }
469
-
470
- return finalChunks;
471
- }
472
-
473
- /**
474
- * Build hierarchical relationships between chunks
475
- * H2 chunks are parents of subsequent H3 chunks
476
- */
477
- function buildHierarchy(chunks, chunkPrefix) {
478
- const hierarchy = {};
479
- let currentH2Index = null;
480
-
481
- for (let i = 0; i < chunks.length; i++) {
482
- const chunk = chunks[i];
483
- const chunkKey = `${chunkPrefix}-${i}`;
484
-
485
- hierarchy[chunkKey] = {
486
- parent: null,
487
- children: []
488
- };
489
-
490
- if (chunk.level === 2) {
491
- currentH2Index = i;
492
- } else if (chunk.level === 3 && currentH2Index !== null) {
493
- const parentKey = `${chunkPrefix}-${currentH2Index}`;
494
- hierarchy[chunkKey].parent = parentKey;
495
- hierarchy[parentKey].children.push(chunkKey);
496
- }
497
- }
498
-
499
- return hierarchy;
500
- }
501
-
502
- function indexFile(db, filePath, keyPrefix) {
503
- const fileName = basename(filePath, extname(filePath));
504
- const docKey = `doc-${keyPrefix}-${fileName}`;
505
- const chunkPrefix = `chunk-${keyPrefix}-${fileName}`;
506
-
507
- try {
508
- const content = readFileSync(filePath, 'utf-8');
509
- const contentHash = hashContent(content);
510
-
511
- // Check if content changed (skip if same hash unless --force)
512
- if (!force) {
513
- const existingHash = getEntryHash(db, docKey);
514
- if (existingHash === contentHash) {
515
- return { docKey, status: 'unchanged', chunks: 0 };
516
- }
517
- }
518
-
519
- const stats = statSync(filePath);
520
- const relativePath = filePath.replace(projectRoot, '').replace(/\\/g, '/');
521
-
522
- // Delete old chunks for this file before re-indexing
523
- deleteByPrefix(db, chunkPrefix);
524
-
525
- // 1. Store full document
526
- const docMetadata = {
527
- type: 'document',
528
- filePath: relativePath,
529
- fileSize: stats.size,
530
- lastModified: stats.mtime.toISOString(),
531
- contentHash,
532
- indexedAt: new Date().toISOString(),
533
- ragVersion: '2.0', // Mark as full RAG indexed
534
- };
535
-
536
- storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document']);
537
- debug(`Stored document: ${docKey}`);
538
-
539
- // 2. Chunk and store semantic pieces with full RAG linking
540
- const chunks = chunkMarkdown(content, fileName);
541
-
542
- if (chunks.length === 0) {
543
- return { docKey, status: 'indexed', chunks: 0 };
544
- }
545
-
546
- // Build hierarchy and sibling list
547
- const hierarchy = buildHierarchy(chunks, chunkPrefix);
548
- const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
549
-
550
- // Update document with children references
551
- const docChildrenMeta = {
552
- ...docMetadata,
553
- children: siblings,
554
- chunkCount: chunks.length,
555
- };
556
- storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document']);
557
-
558
- for (let i = 0; i < chunks.length; i++) {
559
- const chunk = chunks[i];
560
- const chunkKey = `${chunkPrefix}-${i}`;
561
-
562
- // Build prev/next links
563
- const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
564
- const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
565
-
566
- // Extract overlapping context from adjacent chunks
567
- const contextBefore = i > 0
568
- ? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
569
- : null;
570
- const contextAfter = i < chunks.length - 1
571
- ? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
572
- : null;
573
-
574
- // Get hierarchical relationships
575
- const hierInfo = hierarchy[chunkKey];
576
-
577
- const chunkMetadata = {
578
- type: 'chunk',
579
- ragVersion: '2.0',
580
-
581
- // Document relationship
582
- parentDoc: docKey,
583
- parentPath: relativePath,
584
-
585
- // Sequential navigation (forward/backward links)
586
- chunkIndex: i,
587
- totalChunks: chunks.length,
588
- prevChunk,
589
- nextChunk,
590
-
591
- // Sibling awareness
592
- siblings,
593
-
594
- // Hierarchical relationships (h2 -> h3)
595
- hierarchicalParent: hierInfo.parent,
596
- hierarchicalChildren: hierInfo.children.length > 0 ? hierInfo.children : null,
597
-
598
- // Chunk info
599
- chunkTitle: chunk.title,
600
- headerLevel: chunk.level,
601
- headerLine: chunk.headerLine,
602
- isPart: chunk.isPart || false,
603
- partNum: chunk.partNum || null,
604
-
605
- // Overlapping context for continuity
606
- contextOverlapPercent: overlapPercent,
607
- hasContextBefore: !!contextBefore,
608
- hasContextAfter: !!contextAfter,
609
-
610
- // Content metadata
611
- contentLength: chunk.content.length,
612
- contentHash: hashContent(chunk.content),
613
- indexedAt: new Date().toISOString(),
614
- };
615
-
616
- // Build searchable content with title context
617
- // Include overlap context for better retrieval
618
- let searchableContent = `# ${chunk.title}\n\n`;
619
-
620
- if (contextBefore) {
621
- searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
622
- }
623
-
624
- searchableContent += chunk.content;
625
-
626
- if (contextAfter) {
627
- searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
628
- }
629
-
630
- // Store chunk with full metadata
631
- storeEntry(
632
- db,
633
- chunkKey,
634
- searchableContent,
635
- chunkMetadata,
636
- [keyPrefix, 'chunk', `level-${chunk.level}`, chunk.title.toLowerCase().replace(/[^a-z0-9]+/g, '-')]
637
- );
638
-
639
- debug(` Stored chunk ${i}: ${chunk.title} (${chunk.content.length} chars, prev=${!!prevChunk}, next=${!!nextChunk})`);
640
- }
641
-
642
- return { docKey, status: 'indexed', chunks: chunks.length };
643
- } catch (err) {
644
- return { docKey, status: 'error', error: err.message, chunks: 0 };
645
- }
646
- }
647
-
648
- /**
649
- * Recursively collect all .md files under a directory.
650
- * Skips node_modules, .git, and other non-content directories.
651
- */
652
- function walkMdFiles(dir) {
653
- const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage', '.next', '.reports']);
654
- // CLAUDE.md is loaded into context by Claude automatically — skip to avoid duplicate vectors
655
- const SKIP_FILES = new Set(['CLAUDE.md']);
656
- const files = [];
657
-
658
- function walk(current) {
659
- if (!existsSync(current)) return;
660
- for (const entry of readdirSync(current, { withFileTypes: true })) {
661
- if (entry.isDirectory()) {
662
- if (!SKIP_DIRS.has(entry.name)) walk(resolve(current, entry.name));
663
- } else if (entry.isFile() && entry.name.endsWith('.md') && !SKIP_FILES.has(entry.name)) {
664
- files.push(resolve(current, entry.name));
665
- }
666
- }
667
- }
668
-
669
- walk(dir);
670
- return files;
671
- }
672
-
673
- function indexDirectory(db, dirConfig) {
674
- const dirPath = dirConfig.absolute ? dirConfig.path : resolve(projectRoot, dirConfig.path);
675
- const results = [];
676
-
677
- if (!existsSync(dirPath)) {
678
- if (verbose) debug(`Directory not found: ${dirConfig.path}`);
679
- return results;
680
- }
681
-
682
- const allMdFiles = walkMdFiles(dirPath);
683
- const filtered = dirConfig.fileFilter
684
- ? allMdFiles.filter(f => dirConfig.fileFilter.includes(basename(f)))
685
- : allMdFiles;
686
-
687
- for (const filePath of filtered) {
688
- const result = indexFile(db, filePath, dirConfig.prefix);
689
- results.push(result);
690
- }
691
-
692
- return results;
693
- }
694
-
695
- /**
696
- * Remove stale entries for files that no longer exist on disk.
697
- * Uses the set of docKeys seen during the current indexing run to determine
698
- * which entries are stale, rather than reconstructing file paths from keys
699
- * (which breaks for files in subdirectories).
700
- */
701
- function cleanStaleEntries(db, currentDocKeys) {
702
- const docsStmt = db.prepare(
703
- `SELECT DISTINCT key FROM memory_entries WHERE namespace = ? AND key LIKE 'doc-%'`
704
- );
705
- docsStmt.bind([NAMESPACE]);
706
- const docs = [];
707
- while (docsStmt.step()) docs.push(docsStmt.getAsObject());
708
- docsStmt.free();
709
-
710
- let staleCount = 0;
711
-
712
- for (const { key } of docs) {
713
- // If this doc key was seen during the current indexing run, it's not stale
714
- if (currentDocKeys.has(key)) continue;
715
-
716
- const chunkPrefix = key.replace('doc-', 'chunk-');
717
- const countBefore = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
718
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${chunkPrefix}%`]);
719
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
720
- const countAfter = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
721
- const removed = countBefore - countAfter;
722
- if (removed > 0) {
723
- log(` Removed ${removed} stale entries for deleted file: ${key}`);
724
- staleCount += removed;
725
- }
726
- }
727
-
728
- // Also clean any orphaned entries not matching doc-/chunk- patterns
729
- const orphanStmt = db.prepare(
730
- `SELECT key FROM memory_entries WHERE namespace = ? AND key NOT LIKE 'doc-%' AND key NOT LIKE 'chunk-%'`
731
- );
732
- orphanStmt.bind([NAMESPACE]);
733
- const orphans = [];
734
- while (orphanStmt.step()) orphans.push(orphanStmt.getAsObject());
735
- orphanStmt.free();
736
- for (const { key } of orphans) {
737
- db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
738
- staleCount++;
739
- log(` Removed orphan entry: ${key}`);
740
- }
741
-
742
- return staleCount;
743
- }
744
-
745
- // Main
746
- console.log('');
747
- log('Indexing guidance files with FULL RAG linked segments...');
748
- log(` Context overlap: ${overlapPercent}%`);
749
- log(` Directories (${GUIDANCE_DIRS.length}):`);
750
- for (const d of GUIDANCE_DIRS) {
751
- const dirPath = d.absolute ? d.path : resolve(projectRoot, d.path);
752
- const exists = existsSync(dirPath);
753
- log(` ${exists ? '✓' : '✗'} ${d.absolute ? dirPath : d.path} [${d.prefix}]`);
754
- }
755
- console.log('');
756
-
757
- const db = await getDb();
758
- let docsIndexed = 0;
759
- let chunksIndexed = 0;
760
- let unchanged = 0;
761
- let errors = 0;
762
- const currentDocKeys = new Set();
763
-
764
- if (specificFile) {
765
- // Index single file
766
- const filePath = resolve(projectRoot, specificFile);
767
- if (!existsSync(filePath)) {
768
- log(`File not found: ${specificFile}`);
769
- process.exit(1);
770
- }
771
-
772
- let prefix = 'docs';
773
- if (specificFile.includes('.claude/guidance/')) {
774
- prefix = 'guidance';
775
- }
776
-
777
- const result = indexFile(db, filePath, prefix);
778
- log(`${result.docKey}: ${result.status} (${result.chunks} chunks)`);
779
-
780
- if (result.status === 'indexed') {
781
- docsIndexed++;
782
- chunksIndexed += result.chunks;
783
- } else if (result.status === 'unchanged') {
784
- unchanged++;
785
- } else {
786
- errors++;
787
- }
788
- } else {
789
- // Index all directories
790
- for (const dir of GUIDANCE_DIRS) {
791
- log(`Scanning ${dir.path}/...`);
792
- const results = indexDirectory(db, dir);
793
-
794
- for (const result of results) {
795
- if (result.status === 'indexed' || result.status === 'unchanged') {
796
- currentDocKeys.add(result.docKey);
797
- }
798
- if (result.status === 'indexed') {
799
- log(` ✅ ${result.docKey} (${result.chunks} chunks)`);
800
- docsIndexed++;
801
- chunksIndexed += result.chunks;
802
- } else if (result.status === 'unchanged') {
803
- unchanged++;
804
- } else {
805
- log(` ❌ ${result.docKey}: ${result.error}`);
806
- errors++;
807
- }
808
- }
809
- }
810
- }
811
-
812
- // Clean stale entries for deleted files (unless indexing a specific file)
813
- let staleRemoved = 0;
814
- if (!specificFile) {
815
- log('Cleaning stale entries for deleted files...');
816
- staleRemoved = cleanStaleEntries(db, currentDocKeys);
817
- if (staleRemoved === 0) {
818
- log(' No stale entries found');
819
- }
820
- }
821
-
822
- // Write changes back to disk and close
823
- if (docsIndexed > 0 || chunksIndexed > 0 || staleRemoved > 0) saveDb(db);
824
- db.close();
825
-
826
- console.log('');
827
- log('═══════════════════════════════════════════════════════════');
828
- log(' FULL RAG INDEXING COMPLETE');
829
- log('═══════════════════════════════════════════════════════════');
830
- log(` Documents indexed: ${docsIndexed}`);
831
- log(` Chunks created: ${chunksIndexed}`);
832
- log(` Unchanged: ${unchanged}`);
833
- log(` Stale removed: ${staleRemoved}`);
834
- log(` Errors: ${errors}`);
835
- log('');
836
- log(' RAG Features Enabled:');
837
- log(` • Forward/backward links (prevChunk/nextChunk)`);
838
- log(` • Sibling awareness (all chunks from same doc)`);
839
- log(` • Hierarchical links (h2 -> h3 parent/children)`);
840
- log(` • Context overlap: ${overlapPercent}% (contextBefore/contextAfter)`);
841
- log('═══════════════════════════════════════════════════════════');
842
-
843
- // Generate embeddings for new entries (unless skipped or nothing changed)
844
- // Runs in BACKGROUND to avoid blocking startup
845
- if (!skipEmbeddings && (docsIndexed > 0 || chunksIndexed > 0)) {
846
- console.log('');
847
- log('Spawning embedding generation in background...');
848
-
849
- const { spawn } = await import('child_process');
850
-
851
- // Look for build-embeddings script in multiple locations:
852
- // 1. Shipped with moflo (node_modules/moflo/bin/)
853
- // 2. Project-local (.claude/scripts/)
854
- const mofloScript = resolve(__dirname, 'build-embeddings.mjs');
855
- const projectLocalScript = resolve(projectRoot, '.claude/scripts/build-embeddings.mjs');
856
- const embeddingScript = existsSync(mofloScript) ? mofloScript : projectLocalScript;
857
-
858
- if (existsSync(embeddingScript)) {
859
- const embeddingArgs = ['--namespace', NAMESPACE];
860
-
861
- // Create log file for background process output
862
- const logDir = resolve(projectRoot, '.swarm/logs');
863
- if (!existsSync(logDir)) {
864
- mkdirSync(logDir, { recursive: true });
865
- }
866
- const logFile = resolve(logDir, 'embeddings.log');
867
- const { openSync } = await import('fs');
868
- const out = openSync(logFile, 'a');
869
- const err = openSync(logFile, 'a');
870
-
871
- // Spawn in background - don't wait for completion
872
- const proc = spawn('node', [embeddingScript, ...embeddingArgs], {
873
- stdio: ['ignore', out, err],
874
- cwd: projectRoot,
875
- detached: true,
876
- windowsHide: true // Suppress command windows on Windows
877
- });
878
- proc.unref(); // Allow parent to exit independently
879
-
880
- log(`Background embedding started (PID: ${proc.pid})`);
881
- log(`Log file: .swarm/logs/embeddings.log`);
882
- } else {
883
- log('⚠️ Embedding script not found, skipping embedding generation');
884
- }
885
- } else if (skipEmbeddings) {
886
- log('Skipping embedding generation (--no-embeddings)');
887
- } else {
888
- log('No new content indexed, skipping embedding generation');
889
- }
890
-
891
- if (errors > 0) {
892
- process.exit(1);
893
- }
2
+ /**
3
+ * Index guidance files into claude-flow memory with full RAG linked segments
4
+ *
5
+ * Strategy:
6
+ * - Full documents stored as `doc-{name}` for complete retrieval
7
+ * - Semantic chunks stored as `chunk-{name}-{n}` for precise search
8
+ * - FULL RAG LINKING:
9
+ * - parentDoc: link to full document
10
+ * - prevChunk/nextChunk: forward/backward navigation
11
+ * - siblings: all chunk keys from same document
12
+ * - children: sub-chunks for hierarchical headers (h2 -> h3)
13
+ * - contextBefore/contextAfter: overlapping text for context continuity
14
+ * - Chunking based on markdown headers (## and ###) for natural boundaries
15
+ * - After indexing, generates embeddings for semantic search (HNSW)
16
+ *
17
+ * Usage:
18
+ * node node_modules/moflo/bin/index-guidance.mjs # Index all + generate embeddings
19
+ * npx flo-index --force # Force reindex all
20
+ * npx flo-index --file X # Index specific file
21
+ * npx flo-index --no-embeddings # Skip embedding generation
22
+ * npx flo-index --overlap 20 # Set context overlap % (default: 15)
23
+ */
24
+
25
+ import { existsSync, readdirSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
26
+ import { resolve, dirname, basename, extname } from 'path';
27
+ import { fileURLToPath } from 'url';
28
+ import { mofloResolveURL } from './lib/moflo-resolve.mjs';
29
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
30
+
31
+
32
+ const __dirname = dirname(fileURLToPath(import.meta.url));
33
+
34
+ function findProjectRoot() {
35
+ let dir = process.cwd();
36
+ const root = resolve(dir, '/');
37
+ while (dir !== root) {
38
+ if (existsSync(resolve(dir, 'package.json'))) return dir;
39
+ dir = dirname(dir);
40
+ }
41
+ return process.cwd();
42
+ }
43
+
44
+ const projectRoot = findProjectRoot();
45
+
46
+ // Locate the moflo package root (for bundled guidance that ships with moflo)
47
+ const mofloRoot = resolve(__dirname, '..');
48
+
49
+ const NAMESPACE = 'guidance';
50
+ const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
51
+
52
+ // ============================================================================
53
+ // Load guidance directories from moflo.yaml, falling back to defaults
54
+ // ============================================================================
55
+
56
+ function loadGuidanceDirs() {
57
+ const dirs = [];
58
+
59
+ // 1. Read moflo.yaml / moflo.config.json for user-configured directories
60
+ let configDirs = null;
61
+ const yamlPath = resolve(projectRoot, 'moflo.yaml');
62
+ const jsonPath = resolve(projectRoot, 'moflo.config.json');
63
+
64
+ if (existsSync(yamlPath)) {
65
+ try {
66
+ const content = readFileSync(yamlPath, 'utf-8');
67
+ // Simple YAML array extraction — avoids needing js-yaml at runtime
68
+ // Matches: guidance:\n directories:\n - .claude/guidance\n - docs/guides
69
+ const guidanceBlock = content.match(/guidance:\s*\n\s+directories:\s*\n((?:\s+-\s+.+\n?)+)/);
70
+ if (guidanceBlock) {
71
+ const items = guidanceBlock[1].match(/-\s+(.+)/g);
72
+ if (items && items.length > 0) {
73
+ configDirs = items.map(item => item.replace(/^-\s+/, '').trim());
74
+ }
75
+ }
76
+ } catch { /* ignore parse errors, fall through to defaults */ }
77
+ } else if (existsSync(jsonPath)) {
78
+ try {
79
+ const raw = JSON.parse(readFileSync(jsonPath, 'utf-8'));
80
+ if (raw.guidance?.directories && Array.isArray(raw.guidance.directories)) {
81
+ configDirs = raw.guidance.directories;
82
+ }
83
+ } catch { /* ignore parse errors */ }
84
+ }
85
+
86
+ // Use config dirs or fall back to defaults
87
+ // Each directory gets a unique prefix derived from its path to avoid key collisions
88
+ // when multiple directories contain files with the same name.
89
+ const userDirs = configDirs || ['.claude/guidance', 'docs/guides'];
90
+ for (const d of userDirs) {
91
+ const prefix = d.replace(/\\/g, '/')
92
+ .replace(/^\.claude\//, '')
93
+ .replace(/^back-office\/api\/\.claude\//, 'bo-api-')
94
+ .replace(/^back-office\/ui\/\.claude\//, 'bo-ui-')
95
+ .replace(/[^a-zA-Z0-9-]/g, '-')
96
+ .replace(/-+/g, '-')
97
+ .replace(/^-|-$/g, '') || 'guidance';
98
+ dirs.push({ path: d, prefix });
99
+ }
100
+
101
+ // 2. Include moflo's own bundled guidance (ships with the package)
102
+ // Only when running inside a consumer project (not moflo itself)
103
+ // Shipped guidance lives in .claude/guidance/shipped/ — internal/ is excluded from npm
104
+ const bundledShippedDir = resolve(mofloRoot, '.claude/guidance/shipped');
105
+ const bundledGuidanceDir = existsSync(bundledShippedDir)
106
+ ? bundledShippedDir
107
+ : resolve(mofloRoot, '.claude/guidance');
108
+ const projectGuidanceDir = resolve(projectRoot, '.claude/guidance');
109
+ if (
110
+ existsSync(bundledGuidanceDir) &&
111
+ resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir) &&
112
+ resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir, 'shipped')
113
+ ) {
114
+ dirs.push({ path: bundledGuidanceDir, prefix: 'moflo-bundled', absolute: true });
115
+ }
116
+
117
+ // 3. CLAUDE.md files are NOT indexed — Claude loads them into context automatically.
118
+ // Indexing them wastes vectors and creates duplicate keys across subprojects.
119
+
120
+ return dirs;
121
+ }
122
+
123
+ const GUIDANCE_DIRS = loadGuidanceDirs();
124
+
125
+ // Chunking config - optimized for Claude's retrieval
126
+ const MIN_CHUNK_SIZE = 50; // Lower minimum to avoid mega-chunks
127
+ const MAX_CHUNK_SIZE = 4000; // Larger chunks for code-heavy docs (fits context better)
128
+ const FORCE_CHUNK_THRESHOLD = 6000; // Force paragraph-split if file > this and < 3 chunks
129
+ const DEFAULT_OVERLAP_PERCENT = 20; // Increased context overlap for better continuity
130
+
131
+ // Parse args
132
+ const args = process.argv.slice(2);
133
+ const force = args.includes('--force');
134
+ const specificFile = args.includes('--file') ? args[args.indexOf('--file') + 1] : null;
135
+ const verbose = args.includes('--verbose') || args.includes('-v');
136
+ const skipEmbeddings = args.includes('--no-embeddings');
137
+ const overlapPercent = args.includes('--overlap')
138
+ ? parseInt(args[args.indexOf('--overlap') + 1], 10) || DEFAULT_OVERLAP_PERCENT
139
+ : DEFAULT_OVERLAP_PERCENT;
140
+
141
+ function log(msg) {
142
+ console.log(`[index-guidance] ${msg}`);
143
+ }
144
+
145
+ function debug(msg) {
146
+ if (verbose) console.log(`[index-guidance] ${msg}`);
147
+ }
148
+
149
+ function ensureDbDir() {
150
+ const dir = dirname(DB_PATH);
151
+ if (!existsSync(dir)) {
152
+ mkdirSync(dir, { recursive: true });
153
+ }
154
+ }
155
+
156
+ async function getDb() {
157
+ ensureDbDir();
158
+ const SQL = await initSqlJs();
159
+ let db;
160
+ if (existsSync(DB_PATH)) {
161
+ const buffer = readFileSync(DB_PATH);
162
+ db = new SQL.Database(buffer);
163
+ } else {
164
+ db = new SQL.Database();
165
+ }
166
+
167
+ // Ensure table exists with unique constraint
168
+ db.run(`
169
+ CREATE TABLE IF NOT EXISTS memory_entries (
170
+ id TEXT PRIMARY KEY,
171
+ key TEXT NOT NULL,
172
+ namespace TEXT DEFAULT 'default',
173
+ content TEXT NOT NULL,
174
+ type TEXT DEFAULT 'semantic',
175
+ embedding TEXT,
176
+ embedding_model TEXT DEFAULT 'local',
177
+ embedding_dimensions INTEGER,
178
+ tags TEXT,
179
+ metadata TEXT,
180
+ owner_id TEXT,
181
+ created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
182
+ updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
183
+ expires_at INTEGER,
184
+ last_accessed_at INTEGER,
185
+ access_count INTEGER DEFAULT 0,
186
+ status TEXT DEFAULT 'active',
187
+ UNIQUE(namespace, key)
188
+ )
189
+ `);
190
+
191
+ db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
192
+ db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
193
+
194
+ return db;
195
+ }
196
+
197
+ function saveDb(db) {
198
+ const data = db.export();
199
+ writeFileSync(DB_PATH, Buffer.from(data));
200
+ }
201
+
202
+ function generateId() {
203
+ return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
204
+ }
205
+
206
+ function hashContent(content) {
207
+ let hash = 0;
208
+ for (let i = 0; i < content.length; i++) {
209
+ const char = content.charCodeAt(i);
210
+ hash = ((hash << 5) - hash) + char;
211
+ hash = hash & hash;
212
+ }
213
+ return hash.toString(16);
214
+ }
215
+
216
+ function storeEntry(db, key, content, metadata = {}, tags = []) {
217
+ const now = Date.now();
218
+ const id = generateId();
219
+ const metaJson = JSON.stringify(metadata);
220
+ const tagsJson = JSON.stringify(tags);
221
+
222
+ db.run(`
223
+ INSERT OR REPLACE INTO memory_entries
224
+ (id, key, namespace, content, metadata, tags, created_at, updated_at, status)
225
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
226
+ `, [id, key, NAMESPACE, content, metaJson, tagsJson, now, now]);
227
+
228
+ return true;
229
+ }
230
+
231
+ function deleteByPrefix(db, prefix) {
232
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${prefix}%`]);
233
+ }
234
+
235
+ function getEntryHash(db, key) {
236
+ const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
237
+ stmt.bind([key, NAMESPACE]);
238
+ const entry = stmt.step() ? stmt.getAsObject() : null;
239
+ stmt.free();
240
+ if (entry?.metadata) {
241
+ try {
242
+ const meta = JSON.parse(entry.metadata);
243
+ return meta.contentHash;
244
+ } catch { /* ignore */ }
245
+ }
246
+ return null;
247
+ }
248
+
249
+ /**
250
+ * Extract overlapping context from adjacent text
251
+ * @param {string} text - The text to extract from
252
+ * @param {number} percent - Percentage of text to extract
253
+ * @param {string} position - 'start' or 'end'
254
+ * @returns {string} - The extracted context
255
+ */
256
+ function extractOverlapContext(text, percent, position) {
257
+ if (!text || percent <= 0) return '';
258
+
259
+ const targetLength = Math.floor(text.length * (percent / 100));
260
+ if (targetLength < 20) return ''; // Too short to be useful
261
+
262
+ if (position === 'start') {
263
+ // Get first N% of text, try to break at sentence/paragraph
264
+ let end = targetLength;
265
+ const nextPara = text.indexOf('\n\n', targetLength - 50);
266
+ const nextSentence = text.indexOf('. ', targetLength - 30);
267
+
268
+ if (nextPara > 0 && nextPara < targetLength + 100) {
269
+ end = nextPara;
270
+ } else if (nextSentence > 0 && nextSentence < targetLength + 50) {
271
+ end = nextSentence + 1;
272
+ }
273
+
274
+ return text.substring(0, end).trim();
275
+ } else {
276
+ // Get last N% of text, try to break at sentence/paragraph
277
+ let start = text.length - targetLength;
278
+ const prevPara = text.lastIndexOf('\n\n', start + 50);
279
+ const prevSentence = text.lastIndexOf('. ', start + 30);
280
+
281
+ if (prevPara > 0 && prevPara > start - 100) {
282
+ start = prevPara + 2;
283
+ } else if (prevSentence > 0 && prevSentence > start - 50) {
284
+ start = prevSentence + 2;
285
+ }
286
+
287
+ return text.substring(start).trim();
288
+ }
289
+ }
290
+
291
+ /**
292
+ * Split markdown content into semantic chunks based on headers
293
+ * Returns array of { title, content, level, headerLine }
294
+ */
295
+ function chunkMarkdown(content, fileName) {
296
+ const lines = content.split('\n');
297
+ const chunks = [];
298
+ let currentChunk = { title: fileName, content: [], level: 0, headerLine: 0 };
299
+
300
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
301
+ // Strip CRLF carriage returns for Windows compatibility
302
+ const line = lines[lineNum].replace(/\r$/, '');
303
+
304
+ // Check for headers (## and ###)
305
+ const h2Match = line.match(/^## (.+)$/);
306
+ const h3Match = line.match(/^### (.+)$/);
307
+
308
+ if (h2Match || h3Match) {
309
+ // Save current chunk if it has content
310
+ if (currentChunk.content.length > 0) {
311
+ const chunkContent = currentChunk.content.join('\n').trim();
312
+ if (chunkContent.length >= MIN_CHUNK_SIZE) {
313
+ chunks.push({
314
+ title: currentChunk.title,
315
+ content: chunkContent,
316
+ level: currentChunk.level,
317
+ headerLine: currentChunk.headerLine
318
+ });
319
+ }
320
+ }
321
+
322
+ // Start new chunk
323
+ currentChunk = {
324
+ title: h2Match ? h2Match[1] : h3Match[1],
325
+ content: [line],
326
+ level: h2Match ? 2 : 3,
327
+ headerLine: lineNum
328
+ };
329
+ } else {
330
+ currentChunk.content.push(line);
331
+ }
332
+ }
333
+
334
+ // Don't forget the last chunk
335
+ if (currentChunk.content.length > 0) {
336
+ const chunkContent = currentChunk.content.join('\n').trim();
337
+ if (chunkContent.length >= MIN_CHUNK_SIZE) {
338
+ chunks.push({
339
+ title: currentChunk.title,
340
+ content: chunkContent,
341
+ level: currentChunk.level,
342
+ headerLine: currentChunk.headerLine
343
+ });
344
+ }
345
+ }
346
+
347
+ // Handle chunks that are too large - split by paragraphs
348
+ const finalChunks = [];
349
+ for (const chunk of chunks) {
350
+ if (chunk.content.length > MAX_CHUNK_SIZE) {
351
+ const paragraphs = chunk.content.split(/\n\n+/);
352
+ let currentPart = [];
353
+ let currentLength = 0;
354
+ let partNum = 1;
355
+
356
+ for (const para of paragraphs) {
357
+ if (currentLength + para.length > MAX_CHUNK_SIZE && currentPart.length > 0) {
358
+ finalChunks.push({
359
+ title: `${chunk.title} (part ${partNum})`,
360
+ content: currentPart.join('\n\n'),
361
+ level: chunk.level,
362
+ headerLine: chunk.headerLine,
363
+ isPart: true,
364
+ partNum
365
+ });
366
+ currentPart = [para];
367
+ currentLength = para.length;
368
+ partNum++;
369
+ } else {
370
+ currentPart.push(para);
371
+ currentLength += para.length;
372
+ }
373
+ }
374
+
375
+ if (currentPart.length > 0) {
376
+ finalChunks.push({
377
+ title: partNum > 1 ? `${chunk.title} (part ${partNum})` : chunk.title,
378
+ content: currentPart.join('\n\n'),
379
+ level: chunk.level,
380
+ headerLine: chunk.headerLine,
381
+ isPart: partNum > 1,
382
+ partNum: partNum > 1 ? partNum : undefined
383
+ });
384
+ }
385
+ } else {
386
+ finalChunks.push(chunk);
387
+ }
388
+ }
389
+
390
+ // FORCE CHUNKING: If file is large but resulted in few chunks, split by sections
391
+ const totalContent = finalChunks.reduce((acc, c) => acc + c.content.length, 0);
392
+ if (totalContent > FORCE_CHUNK_THRESHOLD && finalChunks.length < 3) {
393
+ debug(` Force-chunking: ${totalContent} bytes in ${finalChunks.length} chunks - splitting by sections`);
394
+ const allContent = finalChunks.map(c => c.content).join('\n\n');
395
+
396
+ // Split on --- horizontal rules first, then on ## headers, then on paragraphs
397
+ const TARGET_CHUNK_SIZE = 2500;
398
+ const rawSections = allContent.split(/\n---+\n/);
399
+ let sections = [];
400
+
401
+ for (const raw of rawSections) {
402
+ // Further split on ## headers if section is too large
403
+ if (raw.length > TARGET_CHUNK_SIZE) {
404
+ const headerSplit = raw.split(/\n(?=## )/);
405
+ for (const hSect of headerSplit) {
406
+ if (hSect.length > TARGET_CHUNK_SIZE) {
407
+ // Split very long sections on single newlines as last resort
408
+ const lines = hSect.split('\n');
409
+ let chunk = '';
410
+ for (const line of lines) {
411
+ if (chunk.length + line.length > TARGET_CHUNK_SIZE && chunk.length > 100) {
412
+ sections.push(chunk.trim());
413
+ chunk = line;
414
+ } else {
415
+ chunk += (chunk ? '\n' : '') + line;
416
+ }
417
+ }
418
+ if (chunk.trim().length > 30) sections.push(chunk.trim());
419
+ } else if (hSect.trim().length > 30) {
420
+ sections.push(hSect.trim());
421
+ }
422
+ }
423
+ } else if (raw.trim().length > 30) {
424
+ sections.push(raw.trim());
425
+ }
426
+ }
427
+
428
+ // Now group sections into chunks
429
+ const forcedChunks = [];
430
+ let currentGroup = [];
431
+ let currentLength = 0;
432
+ let groupNum = 1;
433
+
434
+ const flushGroup = () => {
435
+ if (currentGroup.length === 0) return;
436
+ const firstLine = currentGroup[0].split('\n')[0].trim();
437
+ const title = firstLine.startsWith('#')
438
+ ? firstLine.replace(/^#+\s*/, '').slice(0, 60)
439
+ : `${fileName} Section ${groupNum}`;
440
+
441
+ forcedChunks.push({
442
+ title,
443
+ content: currentGroup.join('\n\n'),
444
+ level: 2,
445
+ headerLine: 0,
446
+ isForced: true,
447
+ forceNum: groupNum
448
+ });
449
+ groupNum++;
450
+ currentGroup = [];
451
+ currentLength = 0;
452
+ };
453
+
454
+ for (const section of sections) {
455
+ if (currentLength + section.length > TARGET_CHUNK_SIZE && currentGroup.length > 0) {
456
+ flushGroup();
457
+ }
458
+ currentGroup.push(section);
459
+ currentLength += section.length;
460
+ }
461
+ flushGroup();
462
+
463
+ // Always use force-chunked results if we got multiple chunks
464
+ if (forcedChunks.length >= 2) {
465
+ debug(` Force-chunking produced ${forcedChunks.length} chunks (was ${finalChunks.length})`);
466
+ return forcedChunks;
467
+ }
468
+ }
469
+
470
+ return finalChunks;
471
+ }
472
+
473
+ /**
474
+ * Build hierarchical relationships between chunks
475
+ * H2 chunks are parents of subsequent H3 chunks
476
+ */
477
+ function buildHierarchy(chunks, chunkPrefix) {
478
+ const hierarchy = {};
479
+ let currentH2Index = null;
480
+
481
+ for (let i = 0; i < chunks.length; i++) {
482
+ const chunk = chunks[i];
483
+ const chunkKey = `${chunkPrefix}-${i}`;
484
+
485
+ hierarchy[chunkKey] = {
486
+ parent: null,
487
+ children: []
488
+ };
489
+
490
+ if (chunk.level === 2) {
491
+ currentH2Index = i;
492
+ } else if (chunk.level === 3 && currentH2Index !== null) {
493
+ const parentKey = `${chunkPrefix}-${currentH2Index}`;
494
+ hierarchy[chunkKey].parent = parentKey;
495
+ hierarchy[parentKey].children.push(chunkKey);
496
+ }
497
+ }
498
+
499
+ return hierarchy;
500
+ }
501
+
502
+ function indexFile(db, filePath, keyPrefix) {
503
+ const fileName = basename(filePath, extname(filePath));
504
+ const docKey = `doc-${keyPrefix}-${fileName}`;
505
+ const chunkPrefix = `chunk-${keyPrefix}-${fileName}`;
506
+
507
+ try {
508
+ const content = readFileSync(filePath, 'utf-8');
509
+ const contentHash = hashContent(content);
510
+
511
+ // Check if content changed (skip if same hash unless --force)
512
+ if (!force) {
513
+ const existingHash = getEntryHash(db, docKey);
514
+ if (existingHash === contentHash) {
515
+ return { docKey, status: 'unchanged', chunks: 0 };
516
+ }
517
+ }
518
+
519
+ const stats = statSync(filePath);
520
+ const relativePath = filePath.replace(projectRoot, '').replace(/\\/g, '/');
521
+
522
+ // Delete old chunks for this file before re-indexing
523
+ deleteByPrefix(db, chunkPrefix);
524
+
525
+ // 1. Store full document
526
+ const docMetadata = {
527
+ type: 'document',
528
+ filePath: relativePath,
529
+ fileSize: stats.size,
530
+ lastModified: stats.mtime.toISOString(),
531
+ contentHash,
532
+ indexedAt: new Date().toISOString(),
533
+ ragVersion: '2.0', // Mark as full RAG indexed
534
+ };
535
+
536
+ storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document']);
537
+ debug(`Stored document: ${docKey}`);
538
+
539
+ // 2. Chunk and store semantic pieces with full RAG linking
540
+ const chunks = chunkMarkdown(content, fileName);
541
+
542
+ if (chunks.length === 0) {
543
+ return { docKey, status: 'indexed', chunks: 0 };
544
+ }
545
+
546
+ // Build hierarchy and sibling list
547
+ const hierarchy = buildHierarchy(chunks, chunkPrefix);
548
+ const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
549
+
550
+ // Update document with children references
551
+ const docChildrenMeta = {
552
+ ...docMetadata,
553
+ children: siblings,
554
+ chunkCount: chunks.length,
555
+ };
556
+ storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document']);
557
+
558
+ for (let i = 0; i < chunks.length; i++) {
559
+ const chunk = chunks[i];
560
+ const chunkKey = `${chunkPrefix}-${i}`;
561
+
562
+ // Build prev/next links
563
+ const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
564
+ const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
565
+
566
+ // Extract overlapping context from adjacent chunks
567
+ const contextBefore = i > 0
568
+ ? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
569
+ : null;
570
+ const contextAfter = i < chunks.length - 1
571
+ ? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
572
+ : null;
573
+
574
+ // Get hierarchical relationships
575
+ const hierInfo = hierarchy[chunkKey];
576
+
577
+ const chunkMetadata = {
578
+ type: 'chunk',
579
+ ragVersion: '2.0',
580
+
581
+ // Document relationship
582
+ parentDoc: docKey,
583
+ parentPath: relativePath,
584
+
585
+ // Sequential navigation (forward/backward links)
586
+ chunkIndex: i,
587
+ totalChunks: chunks.length,
588
+ prevChunk,
589
+ nextChunk,
590
+
591
+ // Sibling awareness
592
+ siblings,
593
+
594
+ // Hierarchical relationships (h2 -> h3)
595
+ hierarchicalParent: hierInfo.parent,
596
+ hierarchicalChildren: hierInfo.children.length > 0 ? hierInfo.children : null,
597
+
598
+ // Chunk info
599
+ chunkTitle: chunk.title,
600
+ headerLevel: chunk.level,
601
+ headerLine: chunk.headerLine,
602
+ isPart: chunk.isPart || false,
603
+ partNum: chunk.partNum || null,
604
+
605
+ // Overlapping context for continuity
606
+ contextOverlapPercent: overlapPercent,
607
+ hasContextBefore: !!contextBefore,
608
+ hasContextAfter: !!contextAfter,
609
+
610
+ // Content metadata
611
+ contentLength: chunk.content.length,
612
+ contentHash: hashContent(chunk.content),
613
+ indexedAt: new Date().toISOString(),
614
+ };
615
+
616
+ // Build searchable content with title context
617
+ // Include overlap context for better retrieval
618
+ let searchableContent = `# ${chunk.title}\n\n`;
619
+
620
+ if (contextBefore) {
621
+ searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
622
+ }
623
+
624
+ searchableContent += chunk.content;
625
+
626
+ if (contextAfter) {
627
+ searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
628
+ }
629
+
630
+ // Store chunk with full metadata
631
+ storeEntry(
632
+ db,
633
+ chunkKey,
634
+ searchableContent,
635
+ chunkMetadata,
636
+ [keyPrefix, 'chunk', `level-${chunk.level}`, chunk.title.toLowerCase().replace(/[^a-z0-9]+/g, '-')]
637
+ );
638
+
639
+ debug(` Stored chunk ${i}: ${chunk.title} (${chunk.content.length} chars, prev=${!!prevChunk}, next=${!!nextChunk})`);
640
+ }
641
+
642
+ return { docKey, status: 'indexed', chunks: chunks.length };
643
+ } catch (err) {
644
+ return { docKey, status: 'error', error: err.message, chunks: 0 };
645
+ }
646
+ }
647
+
648
+ /**
649
+ * Recursively collect all .md files under a directory.
650
+ * Skips node_modules, .git, and other non-content directories.
651
+ */
652
+ function walkMdFiles(dir) {
653
+ const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage', '.next', '.reports']);
654
+ // CLAUDE.md is loaded into context by Claude automatically — skip to avoid duplicate vectors
655
+ const SKIP_FILES = new Set(['CLAUDE.md']);
656
+ const files = [];
657
+
658
+ function walk(current) {
659
+ if (!existsSync(current)) return;
660
+ for (const entry of readdirSync(current, { withFileTypes: true })) {
661
+ if (entry.isDirectory()) {
662
+ if (!SKIP_DIRS.has(entry.name)) walk(resolve(current, entry.name));
663
+ } else if (entry.isFile() && entry.name.endsWith('.md') && !SKIP_FILES.has(entry.name)) {
664
+ files.push(resolve(current, entry.name));
665
+ }
666
+ }
667
+ }
668
+
669
+ walk(dir);
670
+ return files;
671
+ }
672
+
673
+ function indexDirectory(db, dirConfig) {
674
+ const dirPath = dirConfig.absolute ? dirConfig.path : resolve(projectRoot, dirConfig.path);
675
+ const results = [];
676
+
677
+ if (!existsSync(dirPath)) {
678
+ if (verbose) debug(`Directory not found: ${dirConfig.path}`);
679
+ return results;
680
+ }
681
+
682
+ const allMdFiles = walkMdFiles(dirPath);
683
+ const filtered = dirConfig.fileFilter
684
+ ? allMdFiles.filter(f => dirConfig.fileFilter.includes(basename(f)))
685
+ : allMdFiles;
686
+
687
+ for (const filePath of filtered) {
688
+ const result = indexFile(db, filePath, dirConfig.prefix);
689
+ results.push(result);
690
+ }
691
+
692
+ return results;
693
+ }
694
+
695
+ /**
696
+ * Remove stale entries for files that no longer exist on disk.
697
+ * Uses the set of docKeys seen during the current indexing run to determine
698
+ * which entries are stale, rather than reconstructing file paths from keys
699
+ * (which breaks for files in subdirectories).
700
+ */
701
+ function cleanStaleEntries(db, currentDocKeys) {
702
+ const docsStmt = db.prepare(
703
+ `SELECT DISTINCT key FROM memory_entries WHERE namespace = ? AND key LIKE 'doc-%'`
704
+ );
705
+ docsStmt.bind([NAMESPACE]);
706
+ const docs = [];
707
+ while (docsStmt.step()) docs.push(docsStmt.getAsObject());
708
+ docsStmt.free();
709
+
710
+ let staleCount = 0;
711
+
712
+ for (const { key } of docs) {
713
+ // If this doc key was seen during the current indexing run, it's not stale
714
+ if (currentDocKeys.has(key)) continue;
715
+
716
+ const chunkPrefix = key.replace('doc-', 'chunk-');
717
+ const countBefore = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
718
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${chunkPrefix}%`]);
719
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
720
+ const countAfter = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
721
+ const removed = countBefore - countAfter;
722
+ if (removed > 0) {
723
+ log(` Removed ${removed} stale entries for deleted file: ${key}`);
724
+ staleCount += removed;
725
+ }
726
+ }
727
+
728
+ // Also clean any orphaned entries not matching doc-/chunk- patterns
729
+ const orphanStmt = db.prepare(
730
+ `SELECT key FROM memory_entries WHERE namespace = ? AND key NOT LIKE 'doc-%' AND key NOT LIKE 'chunk-%'`
731
+ );
732
+ orphanStmt.bind([NAMESPACE]);
733
+ const orphans = [];
734
+ while (orphanStmt.step()) orphans.push(orphanStmt.getAsObject());
735
+ orphanStmt.free();
736
+ for (const { key } of orphans) {
737
+ db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
738
+ staleCount++;
739
+ log(` Removed orphan entry: ${key}`);
740
+ }
741
+
742
+ return staleCount;
743
+ }
744
+
745
+ // Main
746
+ console.log('');
747
+ log('Indexing guidance files with FULL RAG linked segments...');
748
+ log(` Context overlap: ${overlapPercent}%`);
749
+ log(` Directories (${GUIDANCE_DIRS.length}):`);
750
+ for (const d of GUIDANCE_DIRS) {
751
+ const dirPath = d.absolute ? d.path : resolve(projectRoot, d.path);
752
+ const exists = existsSync(dirPath);
753
+ log(` ${exists ? '✓' : '✗'} ${d.absolute ? dirPath : d.path} [${d.prefix}]`);
754
+ }
755
+ console.log('');
756
+
757
+ const db = await getDb();
758
+ let docsIndexed = 0;
759
+ let chunksIndexed = 0;
760
+ let unchanged = 0;
761
+ let errors = 0;
762
+ const currentDocKeys = new Set();
763
+
764
+ if (specificFile) {
765
+ // Index single file
766
+ const filePath = resolve(projectRoot, specificFile);
767
+ if (!existsSync(filePath)) {
768
+ log(`File not found: ${specificFile}`);
769
+ process.exit(1);
770
+ }
771
+
772
+ let prefix = 'docs';
773
+ if (specificFile.includes('.claude/guidance/')) {
774
+ prefix = 'guidance';
775
+ }
776
+
777
+ const result = indexFile(db, filePath, prefix);
778
+ log(`${result.docKey}: ${result.status} (${result.chunks} chunks)`);
779
+
780
+ if (result.status === 'indexed') {
781
+ docsIndexed++;
782
+ chunksIndexed += result.chunks;
783
+ } else if (result.status === 'unchanged') {
784
+ unchanged++;
785
+ } else {
786
+ errors++;
787
+ }
788
+ } else {
789
+ // Index all directories
790
+ for (const dir of GUIDANCE_DIRS) {
791
+ log(`Scanning ${dir.path}/...`);
792
+ const results = indexDirectory(db, dir);
793
+
794
+ for (const result of results) {
795
+ if (result.status === 'indexed' || result.status === 'unchanged') {
796
+ currentDocKeys.add(result.docKey);
797
+ }
798
+ if (result.status === 'indexed') {
799
+ log(` ✅ ${result.docKey} (${result.chunks} chunks)`);
800
+ docsIndexed++;
801
+ chunksIndexed += result.chunks;
802
+ } else if (result.status === 'unchanged') {
803
+ unchanged++;
804
+ } else {
805
+ log(` ❌ ${result.docKey}: ${result.error}`);
806
+ errors++;
807
+ }
808
+ }
809
+ }
810
+ }
811
+
812
+ // Clean stale entries for deleted files (unless indexing a specific file)
813
+ let staleRemoved = 0;
814
+ if (!specificFile) {
815
+ log('Cleaning stale entries for deleted files...');
816
+ staleRemoved = cleanStaleEntries(db, currentDocKeys);
817
+ if (staleRemoved === 0) {
818
+ log(' No stale entries found');
819
+ }
820
+ }
821
+
822
+ // Write changes back to disk and close
823
+ if (docsIndexed > 0 || chunksIndexed > 0 || staleRemoved > 0) saveDb(db);
824
+ db.close();
825
+
826
+ console.log('');
827
+ log('═══════════════════════════════════════════════════════════');
828
+ log(' FULL RAG INDEXING COMPLETE');
829
+ log('═══════════════════════════════════════════════════════════');
830
+ log(` Documents indexed: ${docsIndexed}`);
831
+ log(` Chunks created: ${chunksIndexed}`);
832
+ log(` Unchanged: ${unchanged}`);
833
+ log(` Stale removed: ${staleRemoved}`);
834
+ log(` Errors: ${errors}`);
835
+ log('');
836
+ log(' RAG Features Enabled:');
837
+ log(` • Forward/backward links (prevChunk/nextChunk)`);
838
+ log(` • Sibling awareness (all chunks from same doc)`);
839
+ log(` • Hierarchical links (h2 -> h3 parent/children)`);
840
+ log(` • Context overlap: ${overlapPercent}% (contextBefore/contextAfter)`);
841
+ log('═══════════════════════════════════════════════════════════');
842
+
843
+ // Generate embeddings for new entries (unless skipped or nothing changed)
844
+ // Runs in BACKGROUND to avoid blocking startup
845
+ if (!skipEmbeddings && (docsIndexed > 0 || chunksIndexed > 0)) {
846
+ console.log('');
847
+ log('Spawning embedding generation in background...');
848
+
849
+ const { spawn } = await import('child_process');
850
+
851
+ // Look for build-embeddings script in multiple locations:
852
+ // 1. Shipped with moflo (node_modules/moflo/bin/)
853
+ // 2. Project-local (.claude/scripts/)
854
+ const mofloScript = resolve(__dirname, 'build-embeddings.mjs');
855
+ const projectLocalScript = resolve(projectRoot, '.claude/scripts/build-embeddings.mjs');
856
+ const embeddingScript = existsSync(mofloScript) ? mofloScript : projectLocalScript;
857
+
858
+ if (existsSync(embeddingScript)) {
859
+ const embeddingArgs = ['--namespace', NAMESPACE];
860
+
861
+ // Create log file for background process output
862
+ const logDir = resolve(projectRoot, '.swarm/logs');
863
+ if (!existsSync(logDir)) {
864
+ mkdirSync(logDir, { recursive: true });
865
+ }
866
+ const logFile = resolve(logDir, 'embeddings.log');
867
+ const { openSync } = await import('fs');
868
+ const out = openSync(logFile, 'a');
869
+ const err = openSync(logFile, 'a');
870
+
871
+ // Spawn in background - don't wait for completion
872
+ const proc = spawn('node', [embeddingScript, ...embeddingArgs], {
873
+ stdio: ['ignore', out, err],
874
+ cwd: projectRoot,
875
+ detached: true,
876
+ windowsHide: true // Suppress command windows on Windows
877
+ });
878
+ proc.unref(); // Allow parent to exit independently
879
+
880
+ log(`Background embedding started (PID: ${proc.pid})`);
881
+ log(`Log file: .swarm/logs/embeddings.log`);
882
+ } else {
883
+ log('⚠️ Embedding script not found, skipping embedding generation');
884
+ }
885
+ } else if (skipEmbeddings) {
886
+ log('Skipping embedding generation (--no-embeddings)');
887
+ } else {
888
+ log('No new content indexed, skipping embedding generation');
889
+ }
890
+
891
+ if (errors > 0) {
892
+ process.exit(1);
893
+ }