moflo 4.8.21 → 4.8.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/browser/browser-agent.yaml +182 -182
- package/.claude/agents/core/coder.md +265 -265
- package/.claude/agents/core/planner.md +167 -167
- package/.claude/agents/core/researcher.md +189 -189
- package/.claude/agents/core/reviewer.md +325 -325
- package/.claude/agents/core/tester.md +318 -318
- package/.claude/agents/database-specialist.yaml +21 -21
- package/.claude/agents/dual-mode/codex-coordinator.md +224 -224
- package/.claude/agents/dual-mode/codex-worker.md +211 -211
- package/.claude/agents/dual-mode/dual-orchestrator.md +291 -291
- package/.claude/agents/github/code-review-swarm.md +537 -537
- package/.claude/agents/github/github-modes.md +172 -172
- package/.claude/agents/github/issue-tracker.md +318 -318
- package/.claude/agents/github/multi-repo-swarm.md +552 -552
- package/.claude/agents/github/pr-manager.md +190 -190
- package/.claude/agents/github/project-board-sync.md +508 -508
- package/.claude/agents/github/release-manager.md +366 -366
- package/.claude/agents/github/release-swarm.md +582 -582
- package/.claude/agents/github/repo-architect.md +397 -397
- package/.claude/agents/github/swarm-issue.md +572 -572
- package/.claude/agents/github/swarm-pr.md +427 -427
- package/.claude/agents/github/sync-coordinator.md +451 -451
- package/.claude/agents/github/workflow-automation.md +634 -634
- package/.claude/agents/goal/code-goal-planner.md +445 -445
- package/.claude/agents/hive-mind/collective-intelligence-coordinator.md +129 -129
- package/.claude/agents/hive-mind/queen-coordinator.md +202 -202
- package/.claude/agents/hive-mind/scout-explorer.md +241 -241
- package/.claude/agents/hive-mind/swarm-memory-manager.md +192 -192
- package/.claude/agents/hive-mind/worker-specialist.md +216 -216
- package/.claude/agents/index.yaml +17 -17
- package/.claude/agents/neural/safla-neural.md +73 -73
- package/.claude/agents/project-coordinator.yaml +15 -15
- package/.claude/agents/python-specialist.yaml +21 -21
- package/.claude/agents/reasoning/goal-planner.md +72 -72
- package/.claude/agents/security-auditor.yaml +20 -20
- package/.claude/agents/swarm/adaptive-coordinator.md +395 -395
- package/.claude/agents/swarm/hierarchical-coordinator.md +326 -326
- package/.claude/agents/swarm/mesh-coordinator.md +391 -391
- package/.claude/agents/templates/migration-plan.md +745 -745
- package/.claude/agents/typescript-specialist.yaml +21 -21
- package/.claude/checkpoints/1767754460.json +8 -8
- package/.claude/commands/agents/agent-spawning.md +28 -28
- package/.claude/commands/github/github-modes.md +146 -146
- package/.claude/commands/github/github-swarm.md +121 -121
- package/.claude/commands/github/issue-tracker.md +291 -291
- package/.claude/commands/github/pr-manager.md +169 -169
- package/.claude/commands/github/release-manager.md +337 -337
- package/.claude/commands/github/repo-architect.md +366 -366
- package/.claude/commands/github/sync-coordinator.md +300 -300
- package/.claude/commands/memory/neural.md +47 -47
- package/.claude/commands/sparc/analyzer.md +51 -51
- package/.claude/commands/sparc/architect.md +53 -53
- package/.claude/commands/sparc/ask.md +97 -97
- package/.claude/commands/sparc/batch-executor.md +54 -54
- package/.claude/commands/sparc/code.md +89 -89
- package/.claude/commands/sparc/coder.md +54 -54
- package/.claude/commands/sparc/debug.md +83 -83
- package/.claude/commands/sparc/debugger.md +54 -54
- package/.claude/commands/sparc/designer.md +53 -53
- package/.claude/commands/sparc/devops.md +109 -109
- package/.claude/commands/sparc/docs-writer.md +80 -80
- package/.claude/commands/sparc/documenter.md +54 -54
- package/.claude/commands/sparc/innovator.md +54 -54
- package/.claude/commands/sparc/integration.md +83 -83
- package/.claude/commands/sparc/mcp.md +117 -117
- package/.claude/commands/sparc/memory-manager.md +54 -54
- package/.claude/commands/sparc/optimizer.md +54 -54
- package/.claude/commands/sparc/orchestrator.md +131 -131
- package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -83
- package/.claude/commands/sparc/refinement-optimization-mode.md +83 -83
- package/.claude/commands/sparc/researcher.md +54 -54
- package/.claude/commands/sparc/reviewer.md +54 -54
- package/.claude/commands/sparc/security-review.md +80 -80
- package/.claude/commands/sparc/sparc-modes.md +174 -174
- package/.claude/commands/sparc/sparc.md +111 -111
- package/.claude/commands/sparc/spec-pseudocode.md +80 -80
- package/.claude/commands/sparc/supabase-admin.md +348 -348
- package/.claude/commands/sparc/swarm-coordinator.md +54 -54
- package/.claude/commands/sparc/tdd.md +54 -54
- package/.claude/commands/sparc/tester.md +54 -54
- package/.claude/commands/sparc/tutorial.md +79 -79
- package/.claude/commands/sparc/workflow-manager.md +54 -54
- package/.claude/commands/sparc.md +166 -166
- package/.claude/commands/swarm/analysis.md +95 -95
- package/.claude/commands/swarm/development.md +96 -96
- package/.claude/commands/swarm/examples.md +168 -168
- package/.claude/commands/swarm/maintenance.md +102 -102
- package/.claude/commands/swarm/optimization.md +117 -117
- package/.claude/commands/swarm/research.md +136 -136
- package/.claude/commands/swarm/testing.md +131 -131
- package/.claude/commands/workflows/development.md +77 -77
- package/.claude/commands/workflows/research.md +62 -62
- package/.claude/guidance/moflo-bootstrap.md +126 -126
- package/.claude/guidance/shipped/agent-bootstrap.md +126 -126
- package/.claude/guidance/shipped/guidance-memory-strategy.md +262 -262
- package/.claude/guidance/shipped/memory-strategy.md +204 -204
- package/.claude/guidance/shipped/moflo.md +668 -653
- package/.claude/guidance/shipped/task-swarm-integration.md +441 -441
- package/.claude/helpers/intelligence.cjs +207 -207
- package/.claude/helpers/statusline.cjs +851 -851
- package/.claude/settings.local.json +18 -0
- package/.claude/skills/fl/SKILL.md +583 -583
- package/.claude/skills/flo/SKILL.md +583 -583
- package/.claude/skills/github-code-review/SKILL.md +1140 -1140
- package/.claude/skills/github-multi-repo/SKILL.md +874 -874
- package/.claude/skills/github-project-management/SKILL.md +1277 -1277
- package/.claude/skills/github-release-management/SKILL.md +1081 -1081
- package/.claude/skills/github-workflow-automation/SKILL.md +1065 -1065
- package/.claude/skills/hive-mind-advanced/SKILL.md +712 -712
- package/.claude/skills/hooks-automation/SKILL.md +1201 -1201
- package/.claude/skills/performance-analysis/SKILL.md +563 -563
- package/.claude/skills/sparc-methodology/SKILL.md +1115 -1115
- package/.claude/skills/swarm-advanced/SKILL.md +973 -973
- package/.claude/workflow-state.json +4 -4
- package/LICENSE +21 -21
- package/README.md +685 -685
- package/bin/cli.js +0 -0
- package/bin/gate-hook.mjs +50 -50
- package/bin/gate.cjs +138 -138
- package/bin/generate-code-map.mjs +775 -775
- package/bin/hook-handler.cjs +83 -83
- package/bin/hooks.mjs +656 -656
- package/bin/index-guidance.mjs +892 -892
- package/bin/index-tests.mjs +709 -709
- package/bin/lib/process-manager.mjs +243 -243
- package/bin/lib/registry-cleanup.cjs +41 -41
- package/bin/prompt-hook.mjs +72 -72
- package/bin/semantic-search.mjs +472 -472
- package/bin/session-start-launcher.mjs +238 -238
- package/bin/setup-project.mjs +250 -250
- package/package.json +123 -123
- package/src/@claude-flow/cli/README.md +452 -452
- package/src/@claude-flow/cli/bin/cli.js +180 -180
- package/src/@claude-flow/cli/bin/preinstall.cjs +2 -2
- package/src/@claude-flow/cli/dist/src/commands/completions.js +409 -409
- package/src/@claude-flow/cli/dist/src/commands/doctor.js +5 -1
- package/src/@claude-flow/cli/dist/src/commands/embeddings.js +25 -25
- package/src/@claude-flow/cli/dist/src/commands/github.js +61 -61
- package/src/@claude-flow/cli/dist/src/commands/hive-mind.js +90 -90
- package/src/@claude-flow/cli/dist/src/commands/hooks.js +9 -9
- package/src/@claude-flow/cli/dist/src/commands/ruvector/import.js +14 -14
- package/src/@claude-flow/cli/dist/src/commands/ruvector/setup.js +624 -624
- package/src/@claude-flow/cli/dist/src/config/moflo-config.d.ts +3 -0
- package/src/@claude-flow/cli/dist/src/config/moflo-config.js +101 -91
- package/src/@claude-flow/cli/dist/src/index.d.ts +5 -0
- package/src/@claude-flow/cli/dist/src/index.js +44 -0
- package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +29 -29
- package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +43 -43
- package/src/@claude-flow/cli/dist/src/init/executor.js +453 -453
- package/src/@claude-flow/cli/dist/src/init/helpers-generator.js +482 -482
- package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +30 -30
- package/src/@claude-flow/cli/dist/src/init/moflo-init.js +140 -140
- package/src/@claude-flow/cli/dist/src/init/statusline-generator.js +876 -876
- package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +371 -371
- package/src/@claude-flow/cli/dist/src/runtime/headless.js +28 -28
- package/src/@claude-flow/cli/dist/src/services/container-worker-pool.d.ts +197 -0
- package/src/@claude-flow/cli/dist/src/services/container-worker-pool.js +584 -0
- package/src/@claude-flow/cli/dist/src/services/daemon-lock.d.ts +14 -0
- package/src/@claude-flow/cli/dist/src/services/daemon-lock.js +1 -1
- package/src/@claude-flow/cli/dist/src/services/headless-worker-executor.js +84 -84
- package/src/@claude-flow/cli/package.json +1 -1
- package/src/@claude-flow/guidance/README.md +1195 -1195
- package/src/@claude-flow/guidance/package.json +198 -198
- package/src/@claude-flow/memory/README.md +587 -587
- package/src/@claude-flow/memory/dist/agent-memory-scope.test.js +4 -7
- package/src/@claude-flow/memory/dist/agentdb-backend.d.ts +2 -0
- package/src/@claude-flow/memory/dist/agentdb-backend.js +28 -26
- package/src/@claude-flow/memory/dist/auto-memory-bridge.test.js +36 -39
- package/src/@claude-flow/memory/dist/benchmark.test.js +1 -1
- package/src/@claude-flow/memory/dist/controller-registry.test.js +43 -0
- package/src/@claude-flow/memory/dist/database-provider.d.ts +2 -2
- package/src/@claude-flow/memory/dist/database-provider.js +6 -3
- package/src/@claude-flow/memory/dist/database-provider.test.js +1 -3
- package/src/@claude-flow/memory/dist/hybrid-backend.d.ts +245 -0
- package/src/@claude-flow/memory/dist/hybrid-backend.js +569 -0
- package/src/@claude-flow/memory/dist/hybrid-backend.test.d.ts +8 -0
- package/src/@claude-flow/memory/dist/hybrid-backend.test.js +320 -0
- package/src/@claude-flow/memory/dist/index.d.ts +3 -0
- package/src/@claude-flow/memory/dist/index.js +3 -0
- package/src/@claude-flow/memory/dist/sqlite-backend.d.ts +121 -0
- package/src/@claude-flow/memory/dist/sqlite-backend.js +572 -0
- package/src/@claude-flow/memory/dist/sqljs-backend.d.ts +4 -3
- package/src/@claude-flow/memory/dist/sqljs-backend.js +31 -30
- package/src/@claude-flow/memory/package.json +44 -44
- package/src/@claude-flow/shared/README.md +323 -323
- package/src/@claude-flow/shared/dist/core/config/defaults.js +1 -1
- package/src/@claude-flow/shared/dist/core/config/loader.js +1 -1
- package/src/@claude-flow/shared/dist/core/config/schema.js +1 -1
- package/src/@claude-flow/shared/dist/events/event-store.js +34 -50
- package/src/@claude-flow/shared/dist/events/event-store.test.js +4 -8
- package/src/@claude-flow/shared/dist/hooks/executor.js +4 -7
- package/src/@claude-flow/shared/dist/hooks/safety/file-organization.js +1 -1
- package/src/@claude-flow/shared/dist/hooks/safety/git-commit.js +3 -3
- package/src/@claude-flow/shared/dist/hooks/verify-exports.test.js +6 -6
- package/src/@claude-flow/shared/dist/utils/secure-logger.js +1 -1
- package/src/README.md +493 -493
- package/src/@claude-flow/guidance/dist/adversarial.d.ts +0 -284
- package/src/@claude-flow/guidance/dist/adversarial.js +0 -572
- package/src/@claude-flow/guidance/dist/analyzer.d.ts +0 -530
- package/src/@claude-flow/guidance/dist/analyzer.js +0 -2518
- package/src/@claude-flow/guidance/dist/artifacts.d.ts +0 -283
- package/src/@claude-flow/guidance/dist/artifacts.js +0 -356
- package/src/@claude-flow/guidance/dist/authority.d.ts +0 -290
- package/src/@claude-flow/guidance/dist/authority.js +0 -558
- package/src/@claude-flow/guidance/dist/capabilities.d.ts +0 -209
- package/src/@claude-flow/guidance/dist/capabilities.js +0 -485
- package/src/@claude-flow/guidance/dist/coherence.d.ts +0 -233
- package/src/@claude-flow/guidance/dist/coherence.js +0 -372
- package/src/@claude-flow/guidance/dist/compiler.d.ts +0 -87
- package/src/@claude-flow/guidance/dist/compiler.js +0 -419
- package/src/@claude-flow/guidance/dist/conformance-kit.d.ts +0 -225
- package/src/@claude-flow/guidance/dist/conformance-kit.js +0 -629
- package/src/@claude-flow/guidance/dist/continue-gate.d.ts +0 -214
- package/src/@claude-flow/guidance/dist/continue-gate.js +0 -353
- package/src/@claude-flow/guidance/dist/crypto-utils.d.ts +0 -17
- package/src/@claude-flow/guidance/dist/crypto-utils.js +0 -24
- package/src/@claude-flow/guidance/dist/evolution.d.ts +0 -282
- package/src/@claude-flow/guidance/dist/evolution.js +0 -500
- package/src/@claude-flow/guidance/dist/gates.d.ts +0 -79
- package/src/@claude-flow/guidance/dist/gates.js +0 -302
- package/src/@claude-flow/guidance/dist/gateway.d.ts +0 -206
- package/src/@claude-flow/guidance/dist/gateway.js +0 -452
- package/src/@claude-flow/guidance/dist/generators.d.ts +0 -153
- package/src/@claude-flow/guidance/dist/generators.js +0 -682
- package/src/@claude-flow/guidance/dist/headless.d.ts +0 -177
- package/src/@claude-flow/guidance/dist/headless.js +0 -342
- package/src/@claude-flow/guidance/dist/hooks.d.ts +0 -109
- package/src/@claude-flow/guidance/dist/hooks.js +0 -347
- package/src/@claude-flow/guidance/dist/index.d.ts +0 -205
- package/src/@claude-flow/guidance/dist/index.js +0 -321
- package/src/@claude-flow/guidance/dist/ledger.d.ts +0 -162
- package/src/@claude-flow/guidance/dist/ledger.js +0 -375
- package/src/@claude-flow/guidance/dist/manifest-validator.d.ts +0 -289
- package/src/@claude-flow/guidance/dist/manifest-validator.js +0 -838
- package/src/@claude-flow/guidance/dist/memory-gate.d.ts +0 -222
- package/src/@claude-flow/guidance/dist/memory-gate.js +0 -382
- package/src/@claude-flow/guidance/dist/meta-governance.d.ts +0 -265
- package/src/@claude-flow/guidance/dist/meta-governance.js +0 -348
- package/src/@claude-flow/guidance/dist/optimizer.d.ts +0 -104
- package/src/@claude-flow/guidance/dist/optimizer.js +0 -329
- package/src/@claude-flow/guidance/dist/persistence.d.ts +0 -189
- package/src/@claude-flow/guidance/dist/persistence.js +0 -464
- package/src/@claude-flow/guidance/dist/proof.d.ts +0 -185
- package/src/@claude-flow/guidance/dist/proof.js +0 -238
- package/src/@claude-flow/guidance/dist/retriever.d.ts +0 -116
- package/src/@claude-flow/guidance/dist/retriever.js +0 -394
- package/src/@claude-flow/guidance/dist/ruvbot-integration.d.ts +0 -370
- package/src/@claude-flow/guidance/dist/ruvbot-integration.js +0 -738
- package/src/@claude-flow/guidance/dist/temporal.d.ts +0 -426
- package/src/@claude-flow/guidance/dist/temporal.js +0 -658
- package/src/@claude-flow/guidance/dist/trust.d.ts +0 -283
- package/src/@claude-flow/guidance/dist/trust.js +0 -473
- package/src/@claude-flow/guidance/dist/truth-anchors.d.ts +0 -276
- package/src/@claude-flow/guidance/dist/truth-anchors.js +0 -488
- package/src/@claude-flow/guidance/dist/types.d.ts +0 -378
- package/src/@claude-flow/guidance/dist/types.js +0 -10
- package/src/@claude-flow/guidance/dist/uncertainty.d.ts +0 -372
- package/src/@claude-flow/guidance/dist/uncertainty.js +0 -619
- package/src/@claude-flow/guidance/dist/wasm-kernel.d.ts +0 -48
- package/src/@claude-flow/guidance/dist/wasm-kernel.js +0 -158
package/bin/index-guidance.mjs
CHANGED
|
@@ -1,893 +1,893 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Index guidance files into claude-flow memory with full RAG linked segments
|
|
4
|
-
*
|
|
5
|
-
* Strategy:
|
|
6
|
-
* - Full documents stored as `doc-{name}` for complete retrieval
|
|
7
|
-
* - Semantic chunks stored as `chunk-{name}-{n}` for precise search
|
|
8
|
-
* - FULL RAG LINKING:
|
|
9
|
-
* - parentDoc: link to full document
|
|
10
|
-
* - prevChunk/nextChunk: forward/backward navigation
|
|
11
|
-
* - siblings: all chunk keys from same document
|
|
12
|
-
* - children: sub-chunks for hierarchical headers (h2 -> h3)
|
|
13
|
-
* - contextBefore/contextAfter: overlapping text for context continuity
|
|
14
|
-
* - Chunking based on markdown headers (## and ###) for natural boundaries
|
|
15
|
-
* - After indexing, generates embeddings for semantic search (HNSW)
|
|
16
|
-
*
|
|
17
|
-
* Usage:
|
|
18
|
-
* node node_modules/moflo/bin/index-guidance.mjs # Index all + generate embeddings
|
|
19
|
-
* npx flo-index --force # Force reindex all
|
|
20
|
-
* npx flo-index --file X # Index specific file
|
|
21
|
-
* npx flo-index --no-embeddings # Skip embedding generation
|
|
22
|
-
* npx flo-index --overlap 20 # Set context overlap % (default: 15)
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
import { existsSync, readdirSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
|
|
26
|
-
import { resolve, dirname, basename, extname } from 'path';
|
|
27
|
-
import { fileURLToPath } from 'url';
|
|
28
|
-
import { mofloResolveURL } from './lib/moflo-resolve.mjs';
|
|
29
|
-
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
33
|
-
|
|
34
|
-
function findProjectRoot() {
|
|
35
|
-
let dir = process.cwd();
|
|
36
|
-
const root = resolve(dir, '/');
|
|
37
|
-
while (dir !== root) {
|
|
38
|
-
if (existsSync(resolve(dir, 'package.json'))) return dir;
|
|
39
|
-
dir = dirname(dir);
|
|
40
|
-
}
|
|
41
|
-
return process.cwd();
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
const projectRoot = findProjectRoot();
|
|
45
|
-
|
|
46
|
-
// Locate the moflo package root (for bundled guidance that ships with moflo)
|
|
47
|
-
const mofloRoot = resolve(__dirname, '..');
|
|
48
|
-
|
|
49
|
-
const NAMESPACE = 'guidance';
|
|
50
|
-
const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
|
|
51
|
-
|
|
52
|
-
// ============================================================================
|
|
53
|
-
// Load guidance directories from moflo.yaml, falling back to defaults
|
|
54
|
-
// ============================================================================
|
|
55
|
-
|
|
56
|
-
function loadGuidanceDirs() {
|
|
57
|
-
const dirs = [];
|
|
58
|
-
|
|
59
|
-
// 1. Read moflo.yaml / moflo.config.json for user-configured directories
|
|
60
|
-
let configDirs = null;
|
|
61
|
-
const yamlPath = resolve(projectRoot, 'moflo.yaml');
|
|
62
|
-
const jsonPath = resolve(projectRoot, 'moflo.config.json');
|
|
63
|
-
|
|
64
|
-
if (existsSync(yamlPath)) {
|
|
65
|
-
try {
|
|
66
|
-
const content = readFileSync(yamlPath, 'utf-8');
|
|
67
|
-
// Simple YAML array extraction — avoids needing js-yaml at runtime
|
|
68
|
-
// Matches: guidance:\n directories:\n - .claude/guidance\n - docs/guides
|
|
69
|
-
const guidanceBlock = content.match(/guidance:\s*\n\s+directories:\s*\n((?:\s+-\s+.+\n?)+)/);
|
|
70
|
-
if (guidanceBlock) {
|
|
71
|
-
const items = guidanceBlock[1].match(/-\s+(.+)/g);
|
|
72
|
-
if (items && items.length > 0) {
|
|
73
|
-
configDirs = items.map(item => item.replace(/^-\s+/, '').trim());
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
} catch { /* ignore parse errors, fall through to defaults */ }
|
|
77
|
-
} else if (existsSync(jsonPath)) {
|
|
78
|
-
try {
|
|
79
|
-
const raw = JSON.parse(readFileSync(jsonPath, 'utf-8'));
|
|
80
|
-
if (raw.guidance?.directories && Array.isArray(raw.guidance.directories)) {
|
|
81
|
-
configDirs = raw.guidance.directories;
|
|
82
|
-
}
|
|
83
|
-
} catch { /* ignore parse errors */ }
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// Use config dirs or fall back to defaults
|
|
87
|
-
// Each directory gets a unique prefix derived from its path to avoid key collisions
|
|
88
|
-
// when multiple directories contain files with the same name.
|
|
89
|
-
const userDirs = configDirs || ['.claude/guidance', 'docs/guides'];
|
|
90
|
-
for (const d of userDirs) {
|
|
91
|
-
const prefix = d.replace(/\\/g, '/')
|
|
92
|
-
.replace(/^\.claude\//, '')
|
|
93
|
-
.replace(/^back-office\/api\/\.claude\//, 'bo-api-')
|
|
94
|
-
.replace(/^back-office\/ui\/\.claude\//, 'bo-ui-')
|
|
95
|
-
.replace(/[^a-zA-Z0-9-]/g, '-')
|
|
96
|
-
.replace(/-+/g, '-')
|
|
97
|
-
.replace(/^-|-$/g, '') || 'guidance';
|
|
98
|
-
dirs.push({ path: d, prefix });
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// 2. Include moflo's own bundled guidance (ships with the package)
|
|
102
|
-
// Only when running inside a consumer project (not moflo itself)
|
|
103
|
-
// Shipped guidance lives in .claude/guidance/shipped/ — internal/ is excluded from npm
|
|
104
|
-
const bundledShippedDir = resolve(mofloRoot, '.claude/guidance/shipped');
|
|
105
|
-
const bundledGuidanceDir = existsSync(bundledShippedDir)
|
|
106
|
-
? bundledShippedDir
|
|
107
|
-
: resolve(mofloRoot, '.claude/guidance');
|
|
108
|
-
const projectGuidanceDir = resolve(projectRoot, '.claude/guidance');
|
|
109
|
-
if (
|
|
110
|
-
existsSync(bundledGuidanceDir) &&
|
|
111
|
-
resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir) &&
|
|
112
|
-
resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir, 'shipped')
|
|
113
|
-
) {
|
|
114
|
-
dirs.push({ path: bundledGuidanceDir, prefix: 'moflo-bundled', absolute: true });
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// 3. CLAUDE.md files are NOT indexed — Claude loads them into context automatically.
|
|
118
|
-
// Indexing them wastes vectors and creates duplicate keys across subprojects.
|
|
119
|
-
|
|
120
|
-
return dirs;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
const GUIDANCE_DIRS = loadGuidanceDirs();
|
|
124
|
-
|
|
125
|
-
// Chunking config - optimized for Claude's retrieval
|
|
126
|
-
const MIN_CHUNK_SIZE = 50; // Lower minimum to avoid mega-chunks
|
|
127
|
-
const MAX_CHUNK_SIZE = 4000; // Larger chunks for code-heavy docs (fits context better)
|
|
128
|
-
const FORCE_CHUNK_THRESHOLD = 6000; // Force paragraph-split if file > this and < 3 chunks
|
|
129
|
-
const DEFAULT_OVERLAP_PERCENT = 20; // Increased context overlap for better continuity
|
|
130
|
-
|
|
131
|
-
// Parse args
|
|
132
|
-
const args = process.argv.slice(2);
|
|
133
|
-
const force = args.includes('--force');
|
|
134
|
-
const specificFile = args.includes('--file') ? args[args.indexOf('--file') + 1] : null;
|
|
135
|
-
const verbose = args.includes('--verbose') || args.includes('-v');
|
|
136
|
-
const skipEmbeddings = args.includes('--no-embeddings');
|
|
137
|
-
const overlapPercent = args.includes('--overlap')
|
|
138
|
-
? parseInt(args[args.indexOf('--overlap') + 1], 10) || DEFAULT_OVERLAP_PERCENT
|
|
139
|
-
: DEFAULT_OVERLAP_PERCENT;
|
|
140
|
-
|
|
141
|
-
function log(msg) {
|
|
142
|
-
console.log(`[index-guidance] ${msg}`);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
function debug(msg) {
|
|
146
|
-
if (verbose) console.log(`[index-guidance] ${msg}`);
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function ensureDbDir() {
|
|
150
|
-
const dir = dirname(DB_PATH);
|
|
151
|
-
if (!existsSync(dir)) {
|
|
152
|
-
mkdirSync(dir, { recursive: true });
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
async function getDb() {
|
|
157
|
-
ensureDbDir();
|
|
158
|
-
const SQL = await initSqlJs();
|
|
159
|
-
let db;
|
|
160
|
-
if (existsSync(DB_PATH)) {
|
|
161
|
-
const buffer = readFileSync(DB_PATH);
|
|
162
|
-
db = new SQL.Database(buffer);
|
|
163
|
-
} else {
|
|
164
|
-
db = new SQL.Database();
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// Ensure table exists with unique constraint
|
|
168
|
-
db.run(`
|
|
169
|
-
CREATE TABLE IF NOT EXISTS memory_entries (
|
|
170
|
-
id TEXT PRIMARY KEY,
|
|
171
|
-
key TEXT NOT NULL,
|
|
172
|
-
namespace TEXT DEFAULT 'default',
|
|
173
|
-
content TEXT NOT NULL,
|
|
174
|
-
type TEXT DEFAULT 'semantic',
|
|
175
|
-
embedding TEXT,
|
|
176
|
-
embedding_model TEXT DEFAULT 'local',
|
|
177
|
-
embedding_dimensions INTEGER,
|
|
178
|
-
tags TEXT,
|
|
179
|
-
metadata TEXT,
|
|
180
|
-
owner_id TEXT,
|
|
181
|
-
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
|
|
182
|
-
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
|
|
183
|
-
expires_at INTEGER,
|
|
184
|
-
last_accessed_at INTEGER,
|
|
185
|
-
access_count INTEGER DEFAULT 0,
|
|
186
|
-
status TEXT DEFAULT 'active',
|
|
187
|
-
UNIQUE(namespace, key)
|
|
188
|
-
)
|
|
189
|
-
`);
|
|
190
|
-
|
|
191
|
-
db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
|
|
192
|
-
db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
|
|
193
|
-
|
|
194
|
-
return db;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function saveDb(db) {
|
|
198
|
-
const data = db.export();
|
|
199
|
-
writeFileSync(DB_PATH, Buffer.from(data));
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
function generateId() {
|
|
203
|
-
return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
function hashContent(content) {
|
|
207
|
-
let hash = 0;
|
|
208
|
-
for (let i = 0; i < content.length; i++) {
|
|
209
|
-
const char = content.charCodeAt(i);
|
|
210
|
-
hash = ((hash << 5) - hash) + char;
|
|
211
|
-
hash = hash & hash;
|
|
212
|
-
}
|
|
213
|
-
return hash.toString(16);
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
function storeEntry(db, key, content, metadata = {}, tags = []) {
|
|
217
|
-
const now = Date.now();
|
|
218
|
-
const id = generateId();
|
|
219
|
-
const metaJson = JSON.stringify(metadata);
|
|
220
|
-
const tagsJson = JSON.stringify(tags);
|
|
221
|
-
|
|
222
|
-
db.run(`
|
|
223
|
-
INSERT OR REPLACE INTO memory_entries
|
|
224
|
-
(id, key, namespace, content, metadata, tags, created_at, updated_at, status)
|
|
225
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
|
|
226
|
-
`, [id, key, NAMESPACE, content, metaJson, tagsJson, now, now]);
|
|
227
|
-
|
|
228
|
-
return true;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
function deleteByPrefix(db, prefix) {
|
|
232
|
-
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${prefix}%`]);
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
function getEntryHash(db, key) {
|
|
236
|
-
const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
|
|
237
|
-
stmt.bind([key, NAMESPACE]);
|
|
238
|
-
const entry = stmt.step() ? stmt.getAsObject() : null;
|
|
239
|
-
stmt.free();
|
|
240
|
-
if (entry?.metadata) {
|
|
241
|
-
try {
|
|
242
|
-
const meta = JSON.parse(entry.metadata);
|
|
243
|
-
return meta.contentHash;
|
|
244
|
-
} catch { /* ignore */ }
|
|
245
|
-
}
|
|
246
|
-
return null;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
/**
|
|
250
|
-
* Extract overlapping context from adjacent text
|
|
251
|
-
* @param {string} text - The text to extract from
|
|
252
|
-
* @param {number} percent - Percentage of text to extract
|
|
253
|
-
* @param {string} position - 'start' or 'end'
|
|
254
|
-
* @returns {string} - The extracted context
|
|
255
|
-
*/
|
|
256
|
-
function extractOverlapContext(text, percent, position) {
|
|
257
|
-
if (!text || percent <= 0) return '';
|
|
258
|
-
|
|
259
|
-
const targetLength = Math.floor(text.length * (percent / 100));
|
|
260
|
-
if (targetLength < 20) return ''; // Too short to be useful
|
|
261
|
-
|
|
262
|
-
if (position === 'start') {
|
|
263
|
-
// Get first N% of text, try to break at sentence/paragraph
|
|
264
|
-
let end = targetLength;
|
|
265
|
-
const nextPara = text.indexOf('\n\n', targetLength - 50);
|
|
266
|
-
const nextSentence = text.indexOf('. ', targetLength - 30);
|
|
267
|
-
|
|
268
|
-
if (nextPara > 0 && nextPara < targetLength + 100) {
|
|
269
|
-
end = nextPara;
|
|
270
|
-
} else if (nextSentence > 0 && nextSentence < targetLength + 50) {
|
|
271
|
-
end = nextSentence + 1;
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
return text.substring(0, end).trim();
|
|
275
|
-
} else {
|
|
276
|
-
// Get last N% of text, try to break at sentence/paragraph
|
|
277
|
-
let start = text.length - targetLength;
|
|
278
|
-
const prevPara = text.lastIndexOf('\n\n', start + 50);
|
|
279
|
-
const prevSentence = text.lastIndexOf('. ', start + 30);
|
|
280
|
-
|
|
281
|
-
if (prevPara > 0 && prevPara > start - 100) {
|
|
282
|
-
start = prevPara + 2;
|
|
283
|
-
} else if (prevSentence > 0 && prevSentence > start - 50) {
|
|
284
|
-
start = prevSentence + 2;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
return text.substring(start).trim();
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
/**
|
|
292
|
-
* Split markdown content into semantic chunks based on headers
|
|
293
|
-
* Returns array of { title, content, level, headerLine }
|
|
294
|
-
*/
|
|
295
|
-
function chunkMarkdown(content, fileName) {
|
|
296
|
-
const lines = content.split('\n');
|
|
297
|
-
const chunks = [];
|
|
298
|
-
let currentChunk = { title: fileName, content: [], level: 0, headerLine: 0 };
|
|
299
|
-
|
|
300
|
-
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
301
|
-
// Strip CRLF carriage returns for Windows compatibility
|
|
302
|
-
const line = lines[lineNum].replace(/\r$/, '');
|
|
303
|
-
|
|
304
|
-
// Check for headers (## and ###)
|
|
305
|
-
const h2Match = line.match(/^## (.+)$/);
|
|
306
|
-
const h3Match = line.match(/^### (.+)$/);
|
|
307
|
-
|
|
308
|
-
if (h2Match || h3Match) {
|
|
309
|
-
// Save current chunk if it has content
|
|
310
|
-
if (currentChunk.content.length > 0) {
|
|
311
|
-
const chunkContent = currentChunk.content.join('\n').trim();
|
|
312
|
-
if (chunkContent.length >= MIN_CHUNK_SIZE) {
|
|
313
|
-
chunks.push({
|
|
314
|
-
title: currentChunk.title,
|
|
315
|
-
content: chunkContent,
|
|
316
|
-
level: currentChunk.level,
|
|
317
|
-
headerLine: currentChunk.headerLine
|
|
318
|
-
});
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
// Start new chunk
|
|
323
|
-
currentChunk = {
|
|
324
|
-
title: h2Match ? h2Match[1] : h3Match[1],
|
|
325
|
-
content: [line],
|
|
326
|
-
level: h2Match ? 2 : 3,
|
|
327
|
-
headerLine: lineNum
|
|
328
|
-
};
|
|
329
|
-
} else {
|
|
330
|
-
currentChunk.content.push(line);
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
// Don't forget the last chunk
|
|
335
|
-
if (currentChunk.content.length > 0) {
|
|
336
|
-
const chunkContent = currentChunk.content.join('\n').trim();
|
|
337
|
-
if (chunkContent.length >= MIN_CHUNK_SIZE) {
|
|
338
|
-
chunks.push({
|
|
339
|
-
title: currentChunk.title,
|
|
340
|
-
content: chunkContent,
|
|
341
|
-
level: currentChunk.level,
|
|
342
|
-
headerLine: currentChunk.headerLine
|
|
343
|
-
});
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// Handle chunks that are too large - split by paragraphs
|
|
348
|
-
const finalChunks = [];
|
|
349
|
-
for (const chunk of chunks) {
|
|
350
|
-
if (chunk.content.length > MAX_CHUNK_SIZE) {
|
|
351
|
-
const paragraphs = chunk.content.split(/\n\n+/);
|
|
352
|
-
let currentPart = [];
|
|
353
|
-
let currentLength = 0;
|
|
354
|
-
let partNum = 1;
|
|
355
|
-
|
|
356
|
-
for (const para of paragraphs) {
|
|
357
|
-
if (currentLength + para.length > MAX_CHUNK_SIZE && currentPart.length > 0) {
|
|
358
|
-
finalChunks.push({
|
|
359
|
-
title: `${chunk.title} (part ${partNum})`,
|
|
360
|
-
content: currentPart.join('\n\n'),
|
|
361
|
-
level: chunk.level,
|
|
362
|
-
headerLine: chunk.headerLine,
|
|
363
|
-
isPart: true,
|
|
364
|
-
partNum
|
|
365
|
-
});
|
|
366
|
-
currentPart = [para];
|
|
367
|
-
currentLength = para.length;
|
|
368
|
-
partNum++;
|
|
369
|
-
} else {
|
|
370
|
-
currentPart.push(para);
|
|
371
|
-
currentLength += para.length;
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
if (currentPart.length > 0) {
|
|
376
|
-
finalChunks.push({
|
|
377
|
-
title: partNum > 1 ? `${chunk.title} (part ${partNum})` : chunk.title,
|
|
378
|
-
content: currentPart.join('\n\n'),
|
|
379
|
-
level: chunk.level,
|
|
380
|
-
headerLine: chunk.headerLine,
|
|
381
|
-
isPart: partNum > 1,
|
|
382
|
-
partNum: partNum > 1 ? partNum : undefined
|
|
383
|
-
});
|
|
384
|
-
}
|
|
385
|
-
} else {
|
|
386
|
-
finalChunks.push(chunk);
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
// FORCE CHUNKING: If file is large but resulted in few chunks, split by sections
|
|
391
|
-
const totalContent = finalChunks.reduce((acc, c) => acc + c.content.length, 0);
|
|
392
|
-
if (totalContent > FORCE_CHUNK_THRESHOLD && finalChunks.length < 3) {
|
|
393
|
-
debug(` Force-chunking: ${totalContent} bytes in ${finalChunks.length} chunks - splitting by sections`);
|
|
394
|
-
const allContent = finalChunks.map(c => c.content).join('\n\n');
|
|
395
|
-
|
|
396
|
-
// Split on --- horizontal rules first, then on ## headers, then on paragraphs
|
|
397
|
-
const TARGET_CHUNK_SIZE = 2500;
|
|
398
|
-
const rawSections = allContent.split(/\n---+\n/);
|
|
399
|
-
let sections = [];
|
|
400
|
-
|
|
401
|
-
for (const raw of rawSections) {
|
|
402
|
-
// Further split on ## headers if section is too large
|
|
403
|
-
if (raw.length > TARGET_CHUNK_SIZE) {
|
|
404
|
-
const headerSplit = raw.split(/\n(?=## )/);
|
|
405
|
-
for (const hSect of headerSplit) {
|
|
406
|
-
if (hSect.length > TARGET_CHUNK_SIZE) {
|
|
407
|
-
// Split very long sections on single newlines as last resort
|
|
408
|
-
const lines = hSect.split('\n');
|
|
409
|
-
let chunk = '';
|
|
410
|
-
for (const line of lines) {
|
|
411
|
-
if (chunk.length + line.length > TARGET_CHUNK_SIZE && chunk.length > 100) {
|
|
412
|
-
sections.push(chunk.trim());
|
|
413
|
-
chunk = line;
|
|
414
|
-
} else {
|
|
415
|
-
chunk += (chunk ? '\n' : '') + line;
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
if (chunk.trim().length > 30) sections.push(chunk.trim());
|
|
419
|
-
} else if (hSect.trim().length > 30) {
|
|
420
|
-
sections.push(hSect.trim());
|
|
421
|
-
}
|
|
422
|
-
}
|
|
423
|
-
} else if (raw.trim().length > 30) {
|
|
424
|
-
sections.push(raw.trim());
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
// Now group sections into chunks
|
|
429
|
-
const forcedChunks = [];
|
|
430
|
-
let currentGroup = [];
|
|
431
|
-
let currentLength = 0;
|
|
432
|
-
let groupNum = 1;
|
|
433
|
-
|
|
434
|
-
const flushGroup = () => {
|
|
435
|
-
if (currentGroup.length === 0) return;
|
|
436
|
-
const firstLine = currentGroup[0].split('\n')[0].trim();
|
|
437
|
-
const title = firstLine.startsWith('#')
|
|
438
|
-
? firstLine.replace(/^#+\s*/, '').slice(0, 60)
|
|
439
|
-
: `${fileName} Section ${groupNum}`;
|
|
440
|
-
|
|
441
|
-
forcedChunks.push({
|
|
442
|
-
title,
|
|
443
|
-
content: currentGroup.join('\n\n'),
|
|
444
|
-
level: 2,
|
|
445
|
-
headerLine: 0,
|
|
446
|
-
isForced: true,
|
|
447
|
-
forceNum: groupNum
|
|
448
|
-
});
|
|
449
|
-
groupNum++;
|
|
450
|
-
currentGroup = [];
|
|
451
|
-
currentLength = 0;
|
|
452
|
-
};
|
|
453
|
-
|
|
454
|
-
for (const section of sections) {
|
|
455
|
-
if (currentLength + section.length > TARGET_CHUNK_SIZE && currentGroup.length > 0) {
|
|
456
|
-
flushGroup();
|
|
457
|
-
}
|
|
458
|
-
currentGroup.push(section);
|
|
459
|
-
currentLength += section.length;
|
|
460
|
-
}
|
|
461
|
-
flushGroup();
|
|
462
|
-
|
|
463
|
-
// Always use force-chunked results if we got multiple chunks
|
|
464
|
-
if (forcedChunks.length >= 2) {
|
|
465
|
-
debug(` Force-chunking produced ${forcedChunks.length} chunks (was ${finalChunks.length})`);
|
|
466
|
-
return forcedChunks;
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
return finalChunks;
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
/**
|
|
474
|
-
* Build hierarchical relationships between chunks
|
|
475
|
-
* H2 chunks are parents of subsequent H3 chunks
|
|
476
|
-
*/
|
|
477
|
-
function buildHierarchy(chunks, chunkPrefix) {
|
|
478
|
-
const hierarchy = {};
|
|
479
|
-
let currentH2Index = null;
|
|
480
|
-
|
|
481
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
482
|
-
const chunk = chunks[i];
|
|
483
|
-
const chunkKey = `${chunkPrefix}-${i}`;
|
|
484
|
-
|
|
485
|
-
hierarchy[chunkKey] = {
|
|
486
|
-
parent: null,
|
|
487
|
-
children: []
|
|
488
|
-
};
|
|
489
|
-
|
|
490
|
-
if (chunk.level === 2) {
|
|
491
|
-
currentH2Index = i;
|
|
492
|
-
} else if (chunk.level === 3 && currentH2Index !== null) {
|
|
493
|
-
const parentKey = `${chunkPrefix}-${currentH2Index}`;
|
|
494
|
-
hierarchy[chunkKey].parent = parentKey;
|
|
495
|
-
hierarchy[parentKey].children.push(chunkKey);
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
return hierarchy;
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
function indexFile(db, filePath, keyPrefix) {
|
|
503
|
-
const fileName = basename(filePath, extname(filePath));
|
|
504
|
-
const docKey = `doc-${keyPrefix}-${fileName}`;
|
|
505
|
-
const chunkPrefix = `chunk-${keyPrefix}-${fileName}`;
|
|
506
|
-
|
|
507
|
-
try {
|
|
508
|
-
const content = readFileSync(filePath, 'utf-8');
|
|
509
|
-
const contentHash = hashContent(content);
|
|
510
|
-
|
|
511
|
-
// Check if content changed (skip if same hash unless --force)
|
|
512
|
-
if (!force) {
|
|
513
|
-
const existingHash = getEntryHash(db, docKey);
|
|
514
|
-
if (existingHash === contentHash) {
|
|
515
|
-
return { docKey, status: 'unchanged', chunks: 0 };
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
const stats = statSync(filePath);
|
|
520
|
-
const relativePath = filePath.replace(projectRoot, '').replace(/\\/g, '/');
|
|
521
|
-
|
|
522
|
-
// Delete old chunks for this file before re-indexing
|
|
523
|
-
deleteByPrefix(db, chunkPrefix);
|
|
524
|
-
|
|
525
|
-
// 1. Store full document
|
|
526
|
-
const docMetadata = {
|
|
527
|
-
type: 'document',
|
|
528
|
-
filePath: relativePath,
|
|
529
|
-
fileSize: stats.size,
|
|
530
|
-
lastModified: stats.mtime.toISOString(),
|
|
531
|
-
contentHash,
|
|
532
|
-
indexedAt: new Date().toISOString(),
|
|
533
|
-
ragVersion: '2.0', // Mark as full RAG indexed
|
|
534
|
-
};
|
|
535
|
-
|
|
536
|
-
storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document']);
|
|
537
|
-
debug(`Stored document: ${docKey}`);
|
|
538
|
-
|
|
539
|
-
// 2. Chunk and store semantic pieces with full RAG linking
|
|
540
|
-
const chunks = chunkMarkdown(content, fileName);
|
|
541
|
-
|
|
542
|
-
if (chunks.length === 0) {
|
|
543
|
-
return { docKey, status: 'indexed', chunks: 0 };
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
// Build hierarchy and sibling list
|
|
547
|
-
const hierarchy = buildHierarchy(chunks, chunkPrefix);
|
|
548
|
-
const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
|
|
549
|
-
|
|
550
|
-
// Update document with children references
|
|
551
|
-
const docChildrenMeta = {
|
|
552
|
-
...docMetadata,
|
|
553
|
-
children: siblings,
|
|
554
|
-
chunkCount: chunks.length,
|
|
555
|
-
};
|
|
556
|
-
storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document']);
|
|
557
|
-
|
|
558
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
559
|
-
const chunk = chunks[i];
|
|
560
|
-
const chunkKey = `${chunkPrefix}-${i}`;
|
|
561
|
-
|
|
562
|
-
// Build prev/next links
|
|
563
|
-
const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
|
|
564
|
-
const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
|
|
565
|
-
|
|
566
|
-
// Extract overlapping context from adjacent chunks
|
|
567
|
-
const contextBefore = i > 0
|
|
568
|
-
? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
|
|
569
|
-
: null;
|
|
570
|
-
const contextAfter = i < chunks.length - 1
|
|
571
|
-
? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
|
|
572
|
-
: null;
|
|
573
|
-
|
|
574
|
-
// Get hierarchical relationships
|
|
575
|
-
const hierInfo = hierarchy[chunkKey];
|
|
576
|
-
|
|
577
|
-
const chunkMetadata = {
|
|
578
|
-
type: 'chunk',
|
|
579
|
-
ragVersion: '2.0',
|
|
580
|
-
|
|
581
|
-
// Document relationship
|
|
582
|
-
parentDoc: docKey,
|
|
583
|
-
parentPath: relativePath,
|
|
584
|
-
|
|
585
|
-
// Sequential navigation (forward/backward links)
|
|
586
|
-
chunkIndex: i,
|
|
587
|
-
totalChunks: chunks.length,
|
|
588
|
-
prevChunk,
|
|
589
|
-
nextChunk,
|
|
590
|
-
|
|
591
|
-
// Sibling awareness
|
|
592
|
-
siblings,
|
|
593
|
-
|
|
594
|
-
// Hierarchical relationships (h2 -> h3)
|
|
595
|
-
hierarchicalParent: hierInfo.parent,
|
|
596
|
-
hierarchicalChildren: hierInfo.children.length > 0 ? hierInfo.children : null,
|
|
597
|
-
|
|
598
|
-
// Chunk info
|
|
599
|
-
chunkTitle: chunk.title,
|
|
600
|
-
headerLevel: chunk.level,
|
|
601
|
-
headerLine: chunk.headerLine,
|
|
602
|
-
isPart: chunk.isPart || false,
|
|
603
|
-
partNum: chunk.partNum || null,
|
|
604
|
-
|
|
605
|
-
// Overlapping context for continuity
|
|
606
|
-
contextOverlapPercent: overlapPercent,
|
|
607
|
-
hasContextBefore: !!contextBefore,
|
|
608
|
-
hasContextAfter: !!contextAfter,
|
|
609
|
-
|
|
610
|
-
// Content metadata
|
|
611
|
-
contentLength: chunk.content.length,
|
|
612
|
-
contentHash: hashContent(chunk.content),
|
|
613
|
-
indexedAt: new Date().toISOString(),
|
|
614
|
-
};
|
|
615
|
-
|
|
616
|
-
// Build searchable content with title context
|
|
617
|
-
// Include overlap context for better retrieval
|
|
618
|
-
let searchableContent = `# ${chunk.title}\n\n`;
|
|
619
|
-
|
|
620
|
-
if (contextBefore) {
|
|
621
|
-
searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
searchableContent += chunk.content;
|
|
625
|
-
|
|
626
|
-
if (contextAfter) {
|
|
627
|
-
searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
// Store chunk with full metadata
|
|
631
|
-
storeEntry(
|
|
632
|
-
db,
|
|
633
|
-
chunkKey,
|
|
634
|
-
searchableContent,
|
|
635
|
-
chunkMetadata,
|
|
636
|
-
[keyPrefix, 'chunk', `level-${chunk.level}`, chunk.title.toLowerCase().replace(/[^a-z0-9]+/g, '-')]
|
|
637
|
-
);
|
|
638
|
-
|
|
639
|
-
debug(` Stored chunk ${i}: ${chunk.title} (${chunk.content.length} chars, prev=${!!prevChunk}, next=${!!nextChunk})`);
|
|
640
|
-
}
|
|
641
|
-
|
|
642
|
-
return { docKey, status: 'indexed', chunks: chunks.length };
|
|
643
|
-
} catch (err) {
|
|
644
|
-
return { docKey, status: 'error', error: err.message, chunks: 0 };
|
|
645
|
-
}
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
/**
|
|
649
|
-
* Recursively collect all .md files under a directory.
|
|
650
|
-
* Skips node_modules, .git, and other non-content directories.
|
|
651
|
-
*/
|
|
652
|
-
function walkMdFiles(dir) {
|
|
653
|
-
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage', '.next', '.reports']);
|
|
654
|
-
// CLAUDE.md is loaded into context by Claude automatically — skip to avoid duplicate vectors
|
|
655
|
-
const SKIP_FILES = new Set(['CLAUDE.md']);
|
|
656
|
-
const files = [];
|
|
657
|
-
|
|
658
|
-
function walk(current) {
|
|
659
|
-
if (!existsSync(current)) return;
|
|
660
|
-
for (const entry of readdirSync(current, { withFileTypes: true })) {
|
|
661
|
-
if (entry.isDirectory()) {
|
|
662
|
-
if (!SKIP_DIRS.has(entry.name)) walk(resolve(current, entry.name));
|
|
663
|
-
} else if (entry.isFile() && entry.name.endsWith('.md') && !SKIP_FILES.has(entry.name)) {
|
|
664
|
-
files.push(resolve(current, entry.name));
|
|
665
|
-
}
|
|
666
|
-
}
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
walk(dir);
|
|
670
|
-
return files;
|
|
671
|
-
}
|
|
672
|
-
|
|
673
|
-
function indexDirectory(db, dirConfig) {
|
|
674
|
-
const dirPath = dirConfig.absolute ? dirConfig.path : resolve(projectRoot, dirConfig.path);
|
|
675
|
-
const results = [];
|
|
676
|
-
|
|
677
|
-
if (!existsSync(dirPath)) {
|
|
678
|
-
if (verbose) debug(`Directory not found: ${dirConfig.path}`);
|
|
679
|
-
return results;
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
const allMdFiles = walkMdFiles(dirPath);
|
|
683
|
-
const filtered = dirConfig.fileFilter
|
|
684
|
-
? allMdFiles.filter(f => dirConfig.fileFilter.includes(basename(f)))
|
|
685
|
-
: allMdFiles;
|
|
686
|
-
|
|
687
|
-
for (const filePath of filtered) {
|
|
688
|
-
const result = indexFile(db, filePath, dirConfig.prefix);
|
|
689
|
-
results.push(result);
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
return results;
|
|
693
|
-
}
|
|
694
|
-
|
|
695
|
-
/**
|
|
696
|
-
* Remove stale entries for files that no longer exist on disk.
|
|
697
|
-
* Uses the set of docKeys seen during the current indexing run to determine
|
|
698
|
-
* which entries are stale, rather than reconstructing file paths from keys
|
|
699
|
-
* (which breaks for files in subdirectories).
|
|
700
|
-
*/
|
|
701
|
-
function cleanStaleEntries(db, currentDocKeys) {
|
|
702
|
-
const docsStmt = db.prepare(
|
|
703
|
-
`SELECT DISTINCT key FROM memory_entries WHERE namespace = ? AND key LIKE 'doc-%'`
|
|
704
|
-
);
|
|
705
|
-
docsStmt.bind([NAMESPACE]);
|
|
706
|
-
const docs = [];
|
|
707
|
-
while (docsStmt.step()) docs.push(docsStmt.getAsObject());
|
|
708
|
-
docsStmt.free();
|
|
709
|
-
|
|
710
|
-
let staleCount = 0;
|
|
711
|
-
|
|
712
|
-
for (const { key } of docs) {
|
|
713
|
-
// If this doc key was seen during the current indexing run, it's not stale
|
|
714
|
-
if (currentDocKeys.has(key)) continue;
|
|
715
|
-
|
|
716
|
-
const chunkPrefix = key.replace('doc-', 'chunk-');
|
|
717
|
-
const countBefore = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
|
|
718
|
-
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${chunkPrefix}%`]);
|
|
719
|
-
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
|
|
720
|
-
const countAfter = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
|
|
721
|
-
const removed = countBefore - countAfter;
|
|
722
|
-
if (removed > 0) {
|
|
723
|
-
log(` Removed ${removed} stale entries for deleted file: ${key}`);
|
|
724
|
-
staleCount += removed;
|
|
725
|
-
}
|
|
726
|
-
}
|
|
727
|
-
|
|
728
|
-
// Also clean any orphaned entries not matching doc-/chunk- patterns
|
|
729
|
-
const orphanStmt = db.prepare(
|
|
730
|
-
`SELECT key FROM memory_entries WHERE namespace = ? AND key NOT LIKE 'doc-%' AND key NOT LIKE 'chunk-%'`
|
|
731
|
-
);
|
|
732
|
-
orphanStmt.bind([NAMESPACE]);
|
|
733
|
-
const orphans = [];
|
|
734
|
-
while (orphanStmt.step()) orphans.push(orphanStmt.getAsObject());
|
|
735
|
-
orphanStmt.free();
|
|
736
|
-
for (const { key } of orphans) {
|
|
737
|
-
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
|
|
738
|
-
staleCount++;
|
|
739
|
-
log(` Removed orphan entry: ${key}`);
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
return staleCount;
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
// Main
|
|
746
|
-
console.log('');
|
|
747
|
-
log('Indexing guidance files with FULL RAG linked segments...');
|
|
748
|
-
log(` Context overlap: ${overlapPercent}%`);
|
|
749
|
-
log(` Directories (${GUIDANCE_DIRS.length}):`);
|
|
750
|
-
for (const d of GUIDANCE_DIRS) {
|
|
751
|
-
const dirPath = d.absolute ? d.path : resolve(projectRoot, d.path);
|
|
752
|
-
const exists = existsSync(dirPath);
|
|
753
|
-
log(` ${exists ? '✓' : '✗'} ${d.absolute ? dirPath : d.path} [${d.prefix}]`);
|
|
754
|
-
}
|
|
755
|
-
console.log('');
|
|
756
|
-
|
|
757
|
-
const db = await getDb();
|
|
758
|
-
let docsIndexed = 0;
|
|
759
|
-
let chunksIndexed = 0;
|
|
760
|
-
let unchanged = 0;
|
|
761
|
-
let errors = 0;
|
|
762
|
-
const currentDocKeys = new Set();
|
|
763
|
-
|
|
764
|
-
if (specificFile) {
|
|
765
|
-
// Index single file
|
|
766
|
-
const filePath = resolve(projectRoot, specificFile);
|
|
767
|
-
if (!existsSync(filePath)) {
|
|
768
|
-
log(`File not found: ${specificFile}`);
|
|
769
|
-
process.exit(1);
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
let prefix = 'docs';
|
|
773
|
-
if (specificFile.includes('.claude/guidance/')) {
|
|
774
|
-
prefix = 'guidance';
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
const result = indexFile(db, filePath, prefix);
|
|
778
|
-
log(`${result.docKey}: ${result.status} (${result.chunks} chunks)`);
|
|
779
|
-
|
|
780
|
-
if (result.status === 'indexed') {
|
|
781
|
-
docsIndexed++;
|
|
782
|
-
chunksIndexed += result.chunks;
|
|
783
|
-
} else if (result.status === 'unchanged') {
|
|
784
|
-
unchanged++;
|
|
785
|
-
} else {
|
|
786
|
-
errors++;
|
|
787
|
-
}
|
|
788
|
-
} else {
|
|
789
|
-
// Index all directories
|
|
790
|
-
for (const dir of GUIDANCE_DIRS) {
|
|
791
|
-
log(`Scanning ${dir.path}/...`);
|
|
792
|
-
const results = indexDirectory(db, dir);
|
|
793
|
-
|
|
794
|
-
for (const result of results) {
|
|
795
|
-
if (result.status === 'indexed' || result.status === 'unchanged') {
|
|
796
|
-
currentDocKeys.add(result.docKey);
|
|
797
|
-
}
|
|
798
|
-
if (result.status === 'indexed') {
|
|
799
|
-
log(` ✅ ${result.docKey} (${result.chunks} chunks)`);
|
|
800
|
-
docsIndexed++;
|
|
801
|
-
chunksIndexed += result.chunks;
|
|
802
|
-
} else if (result.status === 'unchanged') {
|
|
803
|
-
unchanged++;
|
|
804
|
-
} else {
|
|
805
|
-
log(` ❌ ${result.docKey}: ${result.error}`);
|
|
806
|
-
errors++;
|
|
807
|
-
}
|
|
808
|
-
}
|
|
809
|
-
}
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
// Clean stale entries for deleted files (unless indexing a specific file)
|
|
813
|
-
let staleRemoved = 0;
|
|
814
|
-
if (!specificFile) {
|
|
815
|
-
log('Cleaning stale entries for deleted files...');
|
|
816
|
-
staleRemoved = cleanStaleEntries(db, currentDocKeys);
|
|
817
|
-
if (staleRemoved === 0) {
|
|
818
|
-
log(' No stale entries found');
|
|
819
|
-
}
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
// Write changes back to disk and close
|
|
823
|
-
if (docsIndexed > 0 || chunksIndexed > 0 || staleRemoved > 0) saveDb(db);
|
|
824
|
-
db.close();
|
|
825
|
-
|
|
826
|
-
console.log('');
|
|
827
|
-
log('═══════════════════════════════════════════════════════════');
|
|
828
|
-
log(' FULL RAG INDEXING COMPLETE');
|
|
829
|
-
log('═══════════════════════════════════════════════════════════');
|
|
830
|
-
log(` Documents indexed: ${docsIndexed}`);
|
|
831
|
-
log(` Chunks created: ${chunksIndexed}`);
|
|
832
|
-
log(` Unchanged: ${unchanged}`);
|
|
833
|
-
log(` Stale removed: ${staleRemoved}`);
|
|
834
|
-
log(` Errors: ${errors}`);
|
|
835
|
-
log('');
|
|
836
|
-
log(' RAG Features Enabled:');
|
|
837
|
-
log(` • Forward/backward links (prevChunk/nextChunk)`);
|
|
838
|
-
log(` • Sibling awareness (all chunks from same doc)`);
|
|
839
|
-
log(` • Hierarchical links (h2 -> h3 parent/children)`);
|
|
840
|
-
log(` • Context overlap: ${overlapPercent}% (contextBefore/contextAfter)`);
|
|
841
|
-
log('═══════════════════════════════════════════════════════════');
|
|
842
|
-
|
|
843
|
-
// Generate embeddings for new entries (unless skipped or nothing changed)
|
|
844
|
-
// Runs in BACKGROUND to avoid blocking startup
|
|
845
|
-
if (!skipEmbeddings && (docsIndexed > 0 || chunksIndexed > 0)) {
|
|
846
|
-
console.log('');
|
|
847
|
-
log('Spawning embedding generation in background...');
|
|
848
|
-
|
|
849
|
-
const { spawn } = await import('child_process');
|
|
850
|
-
|
|
851
|
-
// Look for build-embeddings script in multiple locations:
|
|
852
|
-
// 1. Shipped with moflo (node_modules/moflo/bin/)
|
|
853
|
-
// 2. Project-local (.claude/scripts/)
|
|
854
|
-
const mofloScript = resolve(__dirname, 'build-embeddings.mjs');
|
|
855
|
-
const projectLocalScript = resolve(projectRoot, '.claude/scripts/build-embeddings.mjs');
|
|
856
|
-
const embeddingScript = existsSync(mofloScript) ? mofloScript : projectLocalScript;
|
|
857
|
-
|
|
858
|
-
if (existsSync(embeddingScript)) {
|
|
859
|
-
const embeddingArgs = ['--namespace', NAMESPACE];
|
|
860
|
-
|
|
861
|
-
// Create log file for background process output
|
|
862
|
-
const logDir = resolve(projectRoot, '.swarm/logs');
|
|
863
|
-
if (!existsSync(logDir)) {
|
|
864
|
-
mkdirSync(logDir, { recursive: true });
|
|
865
|
-
}
|
|
866
|
-
const logFile = resolve(logDir, 'embeddings.log');
|
|
867
|
-
const { openSync } = await import('fs');
|
|
868
|
-
const out = openSync(logFile, 'a');
|
|
869
|
-
const err = openSync(logFile, 'a');
|
|
870
|
-
|
|
871
|
-
// Spawn in background - don't wait for completion
|
|
872
|
-
const proc = spawn('node', [embeddingScript, ...embeddingArgs], {
|
|
873
|
-
stdio: ['ignore', out, err],
|
|
874
|
-
cwd: projectRoot,
|
|
875
|
-
detached: true,
|
|
876
|
-
windowsHide: true // Suppress command windows on Windows
|
|
877
|
-
});
|
|
878
|
-
proc.unref(); // Allow parent to exit independently
|
|
879
|
-
|
|
880
|
-
log(`Background embedding started (PID: ${proc.pid})`);
|
|
881
|
-
log(`Log file: .swarm/logs/embeddings.log`);
|
|
882
|
-
} else {
|
|
883
|
-
log('⚠️ Embedding script not found, skipping embedding generation');
|
|
884
|
-
}
|
|
885
|
-
} else if (skipEmbeddings) {
|
|
886
|
-
log('Skipping embedding generation (--no-embeddings)');
|
|
887
|
-
} else {
|
|
888
|
-
log('No new content indexed, skipping embedding generation');
|
|
889
|
-
}
|
|
890
|
-
|
|
891
|
-
if (errors > 0) {
|
|
892
|
-
process.exit(1);
|
|
893
|
-
}
|
|
2
|
+
/**
|
|
3
|
+
* Index guidance files into claude-flow memory with full RAG linked segments
|
|
4
|
+
*
|
|
5
|
+
* Strategy:
|
|
6
|
+
* - Full documents stored as `doc-{name}` for complete retrieval
|
|
7
|
+
* - Semantic chunks stored as `chunk-{name}-{n}` for precise search
|
|
8
|
+
* - FULL RAG LINKING:
|
|
9
|
+
* - parentDoc: link to full document
|
|
10
|
+
* - prevChunk/nextChunk: forward/backward navigation
|
|
11
|
+
* - siblings: all chunk keys from same document
|
|
12
|
+
* - children: sub-chunks for hierarchical headers (h2 -> h3)
|
|
13
|
+
* - contextBefore/contextAfter: overlapping text for context continuity
|
|
14
|
+
* - Chunking based on markdown headers (## and ###) for natural boundaries
|
|
15
|
+
* - After indexing, generates embeddings for semantic search (HNSW)
|
|
16
|
+
*
|
|
17
|
+
* Usage:
|
|
18
|
+
* node node_modules/moflo/bin/index-guidance.mjs # Index all + generate embeddings
|
|
19
|
+
* npx flo-index --force # Force reindex all
|
|
20
|
+
* npx flo-index --file X # Index specific file
|
|
21
|
+
* npx flo-index --no-embeddings # Skip embedding generation
|
|
22
|
+
* npx flo-index --overlap 20 # Set context overlap % (default: 15)
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { existsSync, readdirSync, readFileSync, statSync, mkdirSync, writeFileSync } from 'fs';
|
|
26
|
+
import { resolve, dirname, basename, extname } from 'path';
|
|
27
|
+
import { fileURLToPath } from 'url';
|
|
28
|
+
import { mofloResolveURL } from './lib/moflo-resolve.mjs';
|
|
29
|
+
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
33
|
+
|
|
34
|
+
function findProjectRoot() {
|
|
35
|
+
let dir = process.cwd();
|
|
36
|
+
const root = resolve(dir, '/');
|
|
37
|
+
while (dir !== root) {
|
|
38
|
+
if (existsSync(resolve(dir, 'package.json'))) return dir;
|
|
39
|
+
dir = dirname(dir);
|
|
40
|
+
}
|
|
41
|
+
return process.cwd();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const projectRoot = findProjectRoot();
|
|
45
|
+
|
|
46
|
+
// Locate the moflo package root (for bundled guidance that ships with moflo)
|
|
47
|
+
const mofloRoot = resolve(__dirname, '..');
|
|
48
|
+
|
|
49
|
+
const NAMESPACE = 'guidance';
|
|
50
|
+
const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
|
|
51
|
+
|
|
52
|
+
// ============================================================================
|
|
53
|
+
// Load guidance directories from moflo.yaml, falling back to defaults
|
|
54
|
+
// ============================================================================
|
|
55
|
+
|
|
56
|
+
function loadGuidanceDirs() {
|
|
57
|
+
const dirs = [];
|
|
58
|
+
|
|
59
|
+
// 1. Read moflo.yaml / moflo.config.json for user-configured directories
|
|
60
|
+
let configDirs = null;
|
|
61
|
+
const yamlPath = resolve(projectRoot, 'moflo.yaml');
|
|
62
|
+
const jsonPath = resolve(projectRoot, 'moflo.config.json');
|
|
63
|
+
|
|
64
|
+
if (existsSync(yamlPath)) {
|
|
65
|
+
try {
|
|
66
|
+
const content = readFileSync(yamlPath, 'utf-8');
|
|
67
|
+
// Simple YAML array extraction — avoids needing js-yaml at runtime
|
|
68
|
+
// Matches: guidance:\n directories:\n - .claude/guidance\n - docs/guides
|
|
69
|
+
const guidanceBlock = content.match(/guidance:\s*\n\s+directories:\s*\n((?:\s+-\s+.+\n?)+)/);
|
|
70
|
+
if (guidanceBlock) {
|
|
71
|
+
const items = guidanceBlock[1].match(/-\s+(.+)/g);
|
|
72
|
+
if (items && items.length > 0) {
|
|
73
|
+
configDirs = items.map(item => item.replace(/^-\s+/, '').trim());
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
} catch { /* ignore parse errors, fall through to defaults */ }
|
|
77
|
+
} else if (existsSync(jsonPath)) {
|
|
78
|
+
try {
|
|
79
|
+
const raw = JSON.parse(readFileSync(jsonPath, 'utf-8'));
|
|
80
|
+
if (raw.guidance?.directories && Array.isArray(raw.guidance.directories)) {
|
|
81
|
+
configDirs = raw.guidance.directories;
|
|
82
|
+
}
|
|
83
|
+
} catch { /* ignore parse errors */ }
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Use config dirs or fall back to defaults
|
|
87
|
+
// Each directory gets a unique prefix derived from its path to avoid key collisions
|
|
88
|
+
// when multiple directories contain files with the same name.
|
|
89
|
+
const userDirs = configDirs || ['.claude/guidance', 'docs/guides'];
|
|
90
|
+
for (const d of userDirs) {
|
|
91
|
+
const prefix = d.replace(/\\/g, '/')
|
|
92
|
+
.replace(/^\.claude\//, '')
|
|
93
|
+
.replace(/^back-office\/api\/\.claude\//, 'bo-api-')
|
|
94
|
+
.replace(/^back-office\/ui\/\.claude\//, 'bo-ui-')
|
|
95
|
+
.replace(/[^a-zA-Z0-9-]/g, '-')
|
|
96
|
+
.replace(/-+/g, '-')
|
|
97
|
+
.replace(/^-|-$/g, '') || 'guidance';
|
|
98
|
+
dirs.push({ path: d, prefix });
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 2. Include moflo's own bundled guidance (ships with the package)
|
|
102
|
+
// Only when running inside a consumer project (not moflo itself)
|
|
103
|
+
// Shipped guidance lives in .claude/guidance/shipped/ — internal/ is excluded from npm
|
|
104
|
+
const bundledShippedDir = resolve(mofloRoot, '.claude/guidance/shipped');
|
|
105
|
+
const bundledGuidanceDir = existsSync(bundledShippedDir)
|
|
106
|
+
? bundledShippedDir
|
|
107
|
+
: resolve(mofloRoot, '.claude/guidance');
|
|
108
|
+
const projectGuidanceDir = resolve(projectRoot, '.claude/guidance');
|
|
109
|
+
if (
|
|
110
|
+
existsSync(bundledGuidanceDir) &&
|
|
111
|
+
resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir) &&
|
|
112
|
+
resolve(bundledGuidanceDir) !== resolve(projectGuidanceDir, 'shipped')
|
|
113
|
+
) {
|
|
114
|
+
dirs.push({ path: bundledGuidanceDir, prefix: 'moflo-bundled', absolute: true });
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// 3. CLAUDE.md files are NOT indexed — Claude loads them into context automatically.
|
|
118
|
+
// Indexing them wastes vectors and creates duplicate keys across subprojects.
|
|
119
|
+
|
|
120
|
+
return dirs;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const GUIDANCE_DIRS = loadGuidanceDirs();
|
|
124
|
+
|
|
125
|
+
// Chunking config - optimized for Claude's retrieval
|
|
126
|
+
const MIN_CHUNK_SIZE = 50; // Lower minimum to avoid mega-chunks
|
|
127
|
+
const MAX_CHUNK_SIZE = 4000; // Larger chunks for code-heavy docs (fits context better)
|
|
128
|
+
const FORCE_CHUNK_THRESHOLD = 6000; // Force paragraph-split if file > this and < 3 chunks
|
|
129
|
+
const DEFAULT_OVERLAP_PERCENT = 20; // Increased context overlap for better continuity
|
|
130
|
+
|
|
131
|
+
// Parse args
|
|
132
|
+
const args = process.argv.slice(2);
|
|
133
|
+
const force = args.includes('--force');
|
|
134
|
+
const specificFile = args.includes('--file') ? args[args.indexOf('--file') + 1] : null;
|
|
135
|
+
const verbose = args.includes('--verbose') || args.includes('-v');
|
|
136
|
+
const skipEmbeddings = args.includes('--no-embeddings');
|
|
137
|
+
const overlapPercent = args.includes('--overlap')
|
|
138
|
+
? parseInt(args[args.indexOf('--overlap') + 1], 10) || DEFAULT_OVERLAP_PERCENT
|
|
139
|
+
: DEFAULT_OVERLAP_PERCENT;
|
|
140
|
+
|
|
141
|
+
function log(msg) {
|
|
142
|
+
console.log(`[index-guidance] ${msg}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function debug(msg) {
|
|
146
|
+
if (verbose) console.log(`[index-guidance] ${msg}`);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function ensureDbDir() {
|
|
150
|
+
const dir = dirname(DB_PATH);
|
|
151
|
+
if (!existsSync(dir)) {
|
|
152
|
+
mkdirSync(dir, { recursive: true });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function getDb() {
|
|
157
|
+
ensureDbDir();
|
|
158
|
+
const SQL = await initSqlJs();
|
|
159
|
+
let db;
|
|
160
|
+
if (existsSync(DB_PATH)) {
|
|
161
|
+
const buffer = readFileSync(DB_PATH);
|
|
162
|
+
db = new SQL.Database(buffer);
|
|
163
|
+
} else {
|
|
164
|
+
db = new SQL.Database();
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Ensure table exists with unique constraint
|
|
168
|
+
db.run(`
|
|
169
|
+
CREATE TABLE IF NOT EXISTS memory_entries (
|
|
170
|
+
id TEXT PRIMARY KEY,
|
|
171
|
+
key TEXT NOT NULL,
|
|
172
|
+
namespace TEXT DEFAULT 'default',
|
|
173
|
+
content TEXT NOT NULL,
|
|
174
|
+
type TEXT DEFAULT 'semantic',
|
|
175
|
+
embedding TEXT,
|
|
176
|
+
embedding_model TEXT DEFAULT 'local',
|
|
177
|
+
embedding_dimensions INTEGER,
|
|
178
|
+
tags TEXT,
|
|
179
|
+
metadata TEXT,
|
|
180
|
+
owner_id TEXT,
|
|
181
|
+
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
|
|
182
|
+
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
|
|
183
|
+
expires_at INTEGER,
|
|
184
|
+
last_accessed_at INTEGER,
|
|
185
|
+
access_count INTEGER DEFAULT 0,
|
|
186
|
+
status TEXT DEFAULT 'active',
|
|
187
|
+
UNIQUE(namespace, key)
|
|
188
|
+
)
|
|
189
|
+
`);
|
|
190
|
+
|
|
191
|
+
db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
|
|
192
|
+
db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
|
|
193
|
+
|
|
194
|
+
return db;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function saveDb(db) {
|
|
198
|
+
const data = db.export();
|
|
199
|
+
writeFileSync(DB_PATH, Buffer.from(data));
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function generateId() {
|
|
203
|
+
return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function hashContent(content) {
|
|
207
|
+
let hash = 0;
|
|
208
|
+
for (let i = 0; i < content.length; i++) {
|
|
209
|
+
const char = content.charCodeAt(i);
|
|
210
|
+
hash = ((hash << 5) - hash) + char;
|
|
211
|
+
hash = hash & hash;
|
|
212
|
+
}
|
|
213
|
+
return hash.toString(16);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function storeEntry(db, key, content, metadata = {}, tags = []) {
|
|
217
|
+
const now = Date.now();
|
|
218
|
+
const id = generateId();
|
|
219
|
+
const metaJson = JSON.stringify(metadata);
|
|
220
|
+
const tagsJson = JSON.stringify(tags);
|
|
221
|
+
|
|
222
|
+
db.run(`
|
|
223
|
+
INSERT OR REPLACE INTO memory_entries
|
|
224
|
+
(id, key, namespace, content, metadata, tags, created_at, updated_at, status)
|
|
225
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
|
|
226
|
+
`, [id, key, NAMESPACE, content, metaJson, tagsJson, now, now]);
|
|
227
|
+
|
|
228
|
+
return true;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function deleteByPrefix(db, prefix) {
|
|
232
|
+
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${prefix}%`]);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function getEntryHash(db, key) {
|
|
236
|
+
const stmt = db.prepare('SELECT metadata FROM memory_entries WHERE key = ? AND namespace = ?');
|
|
237
|
+
stmt.bind([key, NAMESPACE]);
|
|
238
|
+
const entry = stmt.step() ? stmt.getAsObject() : null;
|
|
239
|
+
stmt.free();
|
|
240
|
+
if (entry?.metadata) {
|
|
241
|
+
try {
|
|
242
|
+
const meta = JSON.parse(entry.metadata);
|
|
243
|
+
return meta.contentHash;
|
|
244
|
+
} catch { /* ignore */ }
|
|
245
|
+
}
|
|
246
|
+
return null;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Extract overlapping context from adjacent text
|
|
251
|
+
* @param {string} text - The text to extract from
|
|
252
|
+
* @param {number} percent - Percentage of text to extract
|
|
253
|
+
* @param {string} position - 'start' or 'end'
|
|
254
|
+
* @returns {string} - The extracted context
|
|
255
|
+
*/
|
|
256
|
+
function extractOverlapContext(text, percent, position) {
|
|
257
|
+
if (!text || percent <= 0) return '';
|
|
258
|
+
|
|
259
|
+
const targetLength = Math.floor(text.length * (percent / 100));
|
|
260
|
+
if (targetLength < 20) return ''; // Too short to be useful
|
|
261
|
+
|
|
262
|
+
if (position === 'start') {
|
|
263
|
+
// Get first N% of text, try to break at sentence/paragraph
|
|
264
|
+
let end = targetLength;
|
|
265
|
+
const nextPara = text.indexOf('\n\n', targetLength - 50);
|
|
266
|
+
const nextSentence = text.indexOf('. ', targetLength - 30);
|
|
267
|
+
|
|
268
|
+
if (nextPara > 0 && nextPara < targetLength + 100) {
|
|
269
|
+
end = nextPara;
|
|
270
|
+
} else if (nextSentence > 0 && nextSentence < targetLength + 50) {
|
|
271
|
+
end = nextSentence + 1;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return text.substring(0, end).trim();
|
|
275
|
+
} else {
|
|
276
|
+
// Get last N% of text, try to break at sentence/paragraph
|
|
277
|
+
let start = text.length - targetLength;
|
|
278
|
+
const prevPara = text.lastIndexOf('\n\n', start + 50);
|
|
279
|
+
const prevSentence = text.lastIndexOf('. ', start + 30);
|
|
280
|
+
|
|
281
|
+
if (prevPara > 0 && prevPara > start - 100) {
|
|
282
|
+
start = prevPara + 2;
|
|
283
|
+
} else if (prevSentence > 0 && prevSentence > start - 50) {
|
|
284
|
+
start = prevSentence + 2;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return text.substring(start).trim();
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Split markdown content into semantic chunks based on headers
|
|
293
|
+
* Returns array of { title, content, level, headerLine }
|
|
294
|
+
*/
|
|
295
|
+
function chunkMarkdown(content, fileName) {
|
|
296
|
+
const lines = content.split('\n');
|
|
297
|
+
const chunks = [];
|
|
298
|
+
let currentChunk = { title: fileName, content: [], level: 0, headerLine: 0 };
|
|
299
|
+
|
|
300
|
+
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
301
|
+
// Strip CRLF carriage returns for Windows compatibility
|
|
302
|
+
const line = lines[lineNum].replace(/\r$/, '');
|
|
303
|
+
|
|
304
|
+
// Check for headers (## and ###)
|
|
305
|
+
const h2Match = line.match(/^## (.+)$/);
|
|
306
|
+
const h3Match = line.match(/^### (.+)$/);
|
|
307
|
+
|
|
308
|
+
if (h2Match || h3Match) {
|
|
309
|
+
// Save current chunk if it has content
|
|
310
|
+
if (currentChunk.content.length > 0) {
|
|
311
|
+
const chunkContent = currentChunk.content.join('\n').trim();
|
|
312
|
+
if (chunkContent.length >= MIN_CHUNK_SIZE) {
|
|
313
|
+
chunks.push({
|
|
314
|
+
title: currentChunk.title,
|
|
315
|
+
content: chunkContent,
|
|
316
|
+
level: currentChunk.level,
|
|
317
|
+
headerLine: currentChunk.headerLine
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Start new chunk
|
|
323
|
+
currentChunk = {
|
|
324
|
+
title: h2Match ? h2Match[1] : h3Match[1],
|
|
325
|
+
content: [line],
|
|
326
|
+
level: h2Match ? 2 : 3,
|
|
327
|
+
headerLine: lineNum
|
|
328
|
+
};
|
|
329
|
+
} else {
|
|
330
|
+
currentChunk.content.push(line);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Don't forget the last chunk
|
|
335
|
+
if (currentChunk.content.length > 0) {
|
|
336
|
+
const chunkContent = currentChunk.content.join('\n').trim();
|
|
337
|
+
if (chunkContent.length >= MIN_CHUNK_SIZE) {
|
|
338
|
+
chunks.push({
|
|
339
|
+
title: currentChunk.title,
|
|
340
|
+
content: chunkContent,
|
|
341
|
+
level: currentChunk.level,
|
|
342
|
+
headerLine: currentChunk.headerLine
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Handle chunks that are too large - split by paragraphs
|
|
348
|
+
const finalChunks = [];
|
|
349
|
+
for (const chunk of chunks) {
|
|
350
|
+
if (chunk.content.length > MAX_CHUNK_SIZE) {
|
|
351
|
+
const paragraphs = chunk.content.split(/\n\n+/);
|
|
352
|
+
let currentPart = [];
|
|
353
|
+
let currentLength = 0;
|
|
354
|
+
let partNum = 1;
|
|
355
|
+
|
|
356
|
+
for (const para of paragraphs) {
|
|
357
|
+
if (currentLength + para.length > MAX_CHUNK_SIZE && currentPart.length > 0) {
|
|
358
|
+
finalChunks.push({
|
|
359
|
+
title: `${chunk.title} (part ${partNum})`,
|
|
360
|
+
content: currentPart.join('\n\n'),
|
|
361
|
+
level: chunk.level,
|
|
362
|
+
headerLine: chunk.headerLine,
|
|
363
|
+
isPart: true,
|
|
364
|
+
partNum
|
|
365
|
+
});
|
|
366
|
+
currentPart = [para];
|
|
367
|
+
currentLength = para.length;
|
|
368
|
+
partNum++;
|
|
369
|
+
} else {
|
|
370
|
+
currentPart.push(para);
|
|
371
|
+
currentLength += para.length;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (currentPart.length > 0) {
|
|
376
|
+
finalChunks.push({
|
|
377
|
+
title: partNum > 1 ? `${chunk.title} (part ${partNum})` : chunk.title,
|
|
378
|
+
content: currentPart.join('\n\n'),
|
|
379
|
+
level: chunk.level,
|
|
380
|
+
headerLine: chunk.headerLine,
|
|
381
|
+
isPart: partNum > 1,
|
|
382
|
+
partNum: partNum > 1 ? partNum : undefined
|
|
383
|
+
});
|
|
384
|
+
}
|
|
385
|
+
} else {
|
|
386
|
+
finalChunks.push(chunk);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// FORCE CHUNKING: If file is large but resulted in few chunks, split by sections
|
|
391
|
+
const totalContent = finalChunks.reduce((acc, c) => acc + c.content.length, 0);
|
|
392
|
+
if (totalContent > FORCE_CHUNK_THRESHOLD && finalChunks.length < 3) {
|
|
393
|
+
debug(` Force-chunking: ${totalContent} bytes in ${finalChunks.length} chunks - splitting by sections`);
|
|
394
|
+
const allContent = finalChunks.map(c => c.content).join('\n\n');
|
|
395
|
+
|
|
396
|
+
// Split on --- horizontal rules first, then on ## headers, then on paragraphs
|
|
397
|
+
const TARGET_CHUNK_SIZE = 2500;
|
|
398
|
+
const rawSections = allContent.split(/\n---+\n/);
|
|
399
|
+
let sections = [];
|
|
400
|
+
|
|
401
|
+
for (const raw of rawSections) {
|
|
402
|
+
// Further split on ## headers if section is too large
|
|
403
|
+
if (raw.length > TARGET_CHUNK_SIZE) {
|
|
404
|
+
const headerSplit = raw.split(/\n(?=## )/);
|
|
405
|
+
for (const hSect of headerSplit) {
|
|
406
|
+
if (hSect.length > TARGET_CHUNK_SIZE) {
|
|
407
|
+
// Split very long sections on single newlines as last resort
|
|
408
|
+
const lines = hSect.split('\n');
|
|
409
|
+
let chunk = '';
|
|
410
|
+
for (const line of lines) {
|
|
411
|
+
if (chunk.length + line.length > TARGET_CHUNK_SIZE && chunk.length > 100) {
|
|
412
|
+
sections.push(chunk.trim());
|
|
413
|
+
chunk = line;
|
|
414
|
+
} else {
|
|
415
|
+
chunk += (chunk ? '\n' : '') + line;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
if (chunk.trim().length > 30) sections.push(chunk.trim());
|
|
419
|
+
} else if (hSect.trim().length > 30) {
|
|
420
|
+
sections.push(hSect.trim());
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
} else if (raw.trim().length > 30) {
|
|
424
|
+
sections.push(raw.trim());
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Now group sections into chunks
|
|
429
|
+
const forcedChunks = [];
|
|
430
|
+
let currentGroup = [];
|
|
431
|
+
let currentLength = 0;
|
|
432
|
+
let groupNum = 1;
|
|
433
|
+
|
|
434
|
+
const flushGroup = () => {
|
|
435
|
+
if (currentGroup.length === 0) return;
|
|
436
|
+
const firstLine = currentGroup[0].split('\n')[0].trim();
|
|
437
|
+
const title = firstLine.startsWith('#')
|
|
438
|
+
? firstLine.replace(/^#+\s*/, '').slice(0, 60)
|
|
439
|
+
: `${fileName} Section ${groupNum}`;
|
|
440
|
+
|
|
441
|
+
forcedChunks.push({
|
|
442
|
+
title,
|
|
443
|
+
content: currentGroup.join('\n\n'),
|
|
444
|
+
level: 2,
|
|
445
|
+
headerLine: 0,
|
|
446
|
+
isForced: true,
|
|
447
|
+
forceNum: groupNum
|
|
448
|
+
});
|
|
449
|
+
groupNum++;
|
|
450
|
+
currentGroup = [];
|
|
451
|
+
currentLength = 0;
|
|
452
|
+
};
|
|
453
|
+
|
|
454
|
+
for (const section of sections) {
|
|
455
|
+
if (currentLength + section.length > TARGET_CHUNK_SIZE && currentGroup.length > 0) {
|
|
456
|
+
flushGroup();
|
|
457
|
+
}
|
|
458
|
+
currentGroup.push(section);
|
|
459
|
+
currentLength += section.length;
|
|
460
|
+
}
|
|
461
|
+
flushGroup();
|
|
462
|
+
|
|
463
|
+
// Always use force-chunked results if we got multiple chunks
|
|
464
|
+
if (forcedChunks.length >= 2) {
|
|
465
|
+
debug(` Force-chunking produced ${forcedChunks.length} chunks (was ${finalChunks.length})`);
|
|
466
|
+
return forcedChunks;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
return finalChunks;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Build hierarchical relationships between chunks
|
|
475
|
+
* H2 chunks are parents of subsequent H3 chunks
|
|
476
|
+
*/
|
|
477
|
+
function buildHierarchy(chunks, chunkPrefix) {
|
|
478
|
+
const hierarchy = {};
|
|
479
|
+
let currentH2Index = null;
|
|
480
|
+
|
|
481
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
482
|
+
const chunk = chunks[i];
|
|
483
|
+
const chunkKey = `${chunkPrefix}-${i}`;
|
|
484
|
+
|
|
485
|
+
hierarchy[chunkKey] = {
|
|
486
|
+
parent: null,
|
|
487
|
+
children: []
|
|
488
|
+
};
|
|
489
|
+
|
|
490
|
+
if (chunk.level === 2) {
|
|
491
|
+
currentH2Index = i;
|
|
492
|
+
} else if (chunk.level === 3 && currentH2Index !== null) {
|
|
493
|
+
const parentKey = `${chunkPrefix}-${currentH2Index}`;
|
|
494
|
+
hierarchy[chunkKey].parent = parentKey;
|
|
495
|
+
hierarchy[parentKey].children.push(chunkKey);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return hierarchy;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function indexFile(db, filePath, keyPrefix) {
|
|
503
|
+
const fileName = basename(filePath, extname(filePath));
|
|
504
|
+
const docKey = `doc-${keyPrefix}-${fileName}`;
|
|
505
|
+
const chunkPrefix = `chunk-${keyPrefix}-${fileName}`;
|
|
506
|
+
|
|
507
|
+
try {
|
|
508
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
509
|
+
const contentHash = hashContent(content);
|
|
510
|
+
|
|
511
|
+
// Check if content changed (skip if same hash unless --force)
|
|
512
|
+
if (!force) {
|
|
513
|
+
const existingHash = getEntryHash(db, docKey);
|
|
514
|
+
if (existingHash === contentHash) {
|
|
515
|
+
return { docKey, status: 'unchanged', chunks: 0 };
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const stats = statSync(filePath);
|
|
520
|
+
const relativePath = filePath.replace(projectRoot, '').replace(/\\/g, '/');
|
|
521
|
+
|
|
522
|
+
// Delete old chunks for this file before re-indexing
|
|
523
|
+
deleteByPrefix(db, chunkPrefix);
|
|
524
|
+
|
|
525
|
+
// 1. Store full document
|
|
526
|
+
const docMetadata = {
|
|
527
|
+
type: 'document',
|
|
528
|
+
filePath: relativePath,
|
|
529
|
+
fileSize: stats.size,
|
|
530
|
+
lastModified: stats.mtime.toISOString(),
|
|
531
|
+
contentHash,
|
|
532
|
+
indexedAt: new Date().toISOString(),
|
|
533
|
+
ragVersion: '2.0', // Mark as full RAG indexed
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
storeEntry(db, docKey, content, docMetadata, [keyPrefix, 'document']);
|
|
537
|
+
debug(`Stored document: ${docKey}`);
|
|
538
|
+
|
|
539
|
+
// 2. Chunk and store semantic pieces with full RAG linking
|
|
540
|
+
const chunks = chunkMarkdown(content, fileName);
|
|
541
|
+
|
|
542
|
+
if (chunks.length === 0) {
|
|
543
|
+
return { docKey, status: 'indexed', chunks: 0 };
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// Build hierarchy and sibling list
|
|
547
|
+
const hierarchy = buildHierarchy(chunks, chunkPrefix);
|
|
548
|
+
const siblings = chunks.map((_, i) => `${chunkPrefix}-${i}`);
|
|
549
|
+
|
|
550
|
+
// Update document with children references
|
|
551
|
+
const docChildrenMeta = {
|
|
552
|
+
...docMetadata,
|
|
553
|
+
children: siblings,
|
|
554
|
+
chunkCount: chunks.length,
|
|
555
|
+
};
|
|
556
|
+
storeEntry(db, docKey, content, docChildrenMeta, [keyPrefix, 'document']);
|
|
557
|
+
|
|
558
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
559
|
+
const chunk = chunks[i];
|
|
560
|
+
const chunkKey = `${chunkPrefix}-${i}`;
|
|
561
|
+
|
|
562
|
+
// Build prev/next links
|
|
563
|
+
const prevChunk = i > 0 ? `${chunkPrefix}-${i - 1}` : null;
|
|
564
|
+
const nextChunk = i < chunks.length - 1 ? `${chunkPrefix}-${i + 1}` : null;
|
|
565
|
+
|
|
566
|
+
// Extract overlapping context from adjacent chunks
|
|
567
|
+
const contextBefore = i > 0
|
|
568
|
+
? extractOverlapContext(chunks[i - 1].content, overlapPercent, 'end')
|
|
569
|
+
: null;
|
|
570
|
+
const contextAfter = i < chunks.length - 1
|
|
571
|
+
? extractOverlapContext(chunks[i + 1].content, overlapPercent, 'start')
|
|
572
|
+
: null;
|
|
573
|
+
|
|
574
|
+
// Get hierarchical relationships
|
|
575
|
+
const hierInfo = hierarchy[chunkKey];
|
|
576
|
+
|
|
577
|
+
const chunkMetadata = {
|
|
578
|
+
type: 'chunk',
|
|
579
|
+
ragVersion: '2.0',
|
|
580
|
+
|
|
581
|
+
// Document relationship
|
|
582
|
+
parentDoc: docKey,
|
|
583
|
+
parentPath: relativePath,
|
|
584
|
+
|
|
585
|
+
// Sequential navigation (forward/backward links)
|
|
586
|
+
chunkIndex: i,
|
|
587
|
+
totalChunks: chunks.length,
|
|
588
|
+
prevChunk,
|
|
589
|
+
nextChunk,
|
|
590
|
+
|
|
591
|
+
// Sibling awareness
|
|
592
|
+
siblings,
|
|
593
|
+
|
|
594
|
+
// Hierarchical relationships (h2 -> h3)
|
|
595
|
+
hierarchicalParent: hierInfo.parent,
|
|
596
|
+
hierarchicalChildren: hierInfo.children.length > 0 ? hierInfo.children : null,
|
|
597
|
+
|
|
598
|
+
// Chunk info
|
|
599
|
+
chunkTitle: chunk.title,
|
|
600
|
+
headerLevel: chunk.level,
|
|
601
|
+
headerLine: chunk.headerLine,
|
|
602
|
+
isPart: chunk.isPart || false,
|
|
603
|
+
partNum: chunk.partNum || null,
|
|
604
|
+
|
|
605
|
+
// Overlapping context for continuity
|
|
606
|
+
contextOverlapPercent: overlapPercent,
|
|
607
|
+
hasContextBefore: !!contextBefore,
|
|
608
|
+
hasContextAfter: !!contextAfter,
|
|
609
|
+
|
|
610
|
+
// Content metadata
|
|
611
|
+
contentLength: chunk.content.length,
|
|
612
|
+
contentHash: hashContent(chunk.content),
|
|
613
|
+
indexedAt: new Date().toISOString(),
|
|
614
|
+
};
|
|
615
|
+
|
|
616
|
+
// Build searchable content with title context
|
|
617
|
+
// Include overlap context for better retrieval
|
|
618
|
+
let searchableContent = `# ${chunk.title}\n\n`;
|
|
619
|
+
|
|
620
|
+
if (contextBefore) {
|
|
621
|
+
searchableContent += `[Context from previous section:]\n${contextBefore}\n\n---\n\n`;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
searchableContent += chunk.content;
|
|
625
|
+
|
|
626
|
+
if (contextAfter) {
|
|
627
|
+
searchableContent += `\n\n---\n\n[Context from next section:]\n${contextAfter}`;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// Store chunk with full metadata
|
|
631
|
+
storeEntry(
|
|
632
|
+
db,
|
|
633
|
+
chunkKey,
|
|
634
|
+
searchableContent,
|
|
635
|
+
chunkMetadata,
|
|
636
|
+
[keyPrefix, 'chunk', `level-${chunk.level}`, chunk.title.toLowerCase().replace(/[^a-z0-9]+/g, '-')]
|
|
637
|
+
);
|
|
638
|
+
|
|
639
|
+
debug(` Stored chunk ${i}: ${chunk.title} (${chunk.content.length} chars, prev=${!!prevChunk}, next=${!!nextChunk})`);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
return { docKey, status: 'indexed', chunks: chunks.length };
|
|
643
|
+
} catch (err) {
|
|
644
|
+
return { docKey, status: 'error', error: err.message, chunks: 0 };
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
/**
|
|
649
|
+
* Recursively collect all .md files under a directory.
|
|
650
|
+
* Skips node_modules, .git, and other non-content directories.
|
|
651
|
+
*/
|
|
652
|
+
function walkMdFiles(dir) {
|
|
653
|
+
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage', '.next', '.reports']);
|
|
654
|
+
// CLAUDE.md is loaded into context by Claude automatically — skip to avoid duplicate vectors
|
|
655
|
+
const SKIP_FILES = new Set(['CLAUDE.md']);
|
|
656
|
+
const files = [];
|
|
657
|
+
|
|
658
|
+
function walk(current) {
|
|
659
|
+
if (!existsSync(current)) return;
|
|
660
|
+
for (const entry of readdirSync(current, { withFileTypes: true })) {
|
|
661
|
+
if (entry.isDirectory()) {
|
|
662
|
+
if (!SKIP_DIRS.has(entry.name)) walk(resolve(current, entry.name));
|
|
663
|
+
} else if (entry.isFile() && entry.name.endsWith('.md') && !SKIP_FILES.has(entry.name)) {
|
|
664
|
+
files.push(resolve(current, entry.name));
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
walk(dir);
|
|
670
|
+
return files;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
function indexDirectory(db, dirConfig) {
|
|
674
|
+
const dirPath = dirConfig.absolute ? dirConfig.path : resolve(projectRoot, dirConfig.path);
|
|
675
|
+
const results = [];
|
|
676
|
+
|
|
677
|
+
if (!existsSync(dirPath)) {
|
|
678
|
+
if (verbose) debug(`Directory not found: ${dirConfig.path}`);
|
|
679
|
+
return results;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
const allMdFiles = walkMdFiles(dirPath);
|
|
683
|
+
const filtered = dirConfig.fileFilter
|
|
684
|
+
? allMdFiles.filter(f => dirConfig.fileFilter.includes(basename(f)))
|
|
685
|
+
: allMdFiles;
|
|
686
|
+
|
|
687
|
+
for (const filePath of filtered) {
|
|
688
|
+
const result = indexFile(db, filePath, dirConfig.prefix);
|
|
689
|
+
results.push(result);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
return results;
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
/**
|
|
696
|
+
* Remove stale entries for files that no longer exist on disk.
|
|
697
|
+
* Uses the set of docKeys seen during the current indexing run to determine
|
|
698
|
+
* which entries are stale, rather than reconstructing file paths from keys
|
|
699
|
+
* (which breaks for files in subdirectories).
|
|
700
|
+
*/
|
|
701
|
+
function cleanStaleEntries(db, currentDocKeys) {
|
|
702
|
+
const docsStmt = db.prepare(
|
|
703
|
+
`SELECT DISTINCT key FROM memory_entries WHERE namespace = ? AND key LIKE 'doc-%'`
|
|
704
|
+
);
|
|
705
|
+
docsStmt.bind([NAMESPACE]);
|
|
706
|
+
const docs = [];
|
|
707
|
+
while (docsStmt.step()) docs.push(docsStmt.getAsObject());
|
|
708
|
+
docsStmt.free();
|
|
709
|
+
|
|
710
|
+
let staleCount = 0;
|
|
711
|
+
|
|
712
|
+
for (const { key } of docs) {
|
|
713
|
+
// If this doc key was seen during the current indexing run, it's not stale
|
|
714
|
+
if (currentDocKeys.has(key)) continue;
|
|
715
|
+
|
|
716
|
+
const chunkPrefix = key.replace('doc-', 'chunk-');
|
|
717
|
+
const countBefore = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
|
|
718
|
+
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key LIKE ?`, [NAMESPACE, `${chunkPrefix}%`]);
|
|
719
|
+
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
|
|
720
|
+
const countAfter = db.exec(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = '${NAMESPACE}'`)[0]?.values[0][0] || 0;
|
|
721
|
+
const removed = countBefore - countAfter;
|
|
722
|
+
if (removed > 0) {
|
|
723
|
+
log(` Removed ${removed} stale entries for deleted file: ${key}`);
|
|
724
|
+
staleCount += removed;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// Also clean any orphaned entries not matching doc-/chunk- patterns
|
|
729
|
+
const orphanStmt = db.prepare(
|
|
730
|
+
`SELECT key FROM memory_entries WHERE namespace = ? AND key NOT LIKE 'doc-%' AND key NOT LIKE 'chunk-%'`
|
|
731
|
+
);
|
|
732
|
+
orphanStmt.bind([NAMESPACE]);
|
|
733
|
+
const orphans = [];
|
|
734
|
+
while (orphanStmt.step()) orphans.push(orphanStmt.getAsObject());
|
|
735
|
+
orphanStmt.free();
|
|
736
|
+
for (const { key } of orphans) {
|
|
737
|
+
db.run(`DELETE FROM memory_entries WHERE namespace = ? AND key = ?`, [NAMESPACE, key]);
|
|
738
|
+
staleCount++;
|
|
739
|
+
log(` Removed orphan entry: ${key}`);
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
return staleCount;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// Main
|
|
746
|
+
console.log('');
|
|
747
|
+
log('Indexing guidance files with FULL RAG linked segments...');
|
|
748
|
+
log(` Context overlap: ${overlapPercent}%`);
|
|
749
|
+
log(` Directories (${GUIDANCE_DIRS.length}):`);
|
|
750
|
+
for (const d of GUIDANCE_DIRS) {
|
|
751
|
+
const dirPath = d.absolute ? d.path : resolve(projectRoot, d.path);
|
|
752
|
+
const exists = existsSync(dirPath);
|
|
753
|
+
log(` ${exists ? '✓' : '✗'} ${d.absolute ? dirPath : d.path} [${d.prefix}]`);
|
|
754
|
+
}
|
|
755
|
+
console.log('');
|
|
756
|
+
|
|
757
|
+
const db = await getDb();
|
|
758
|
+
let docsIndexed = 0;
|
|
759
|
+
let chunksIndexed = 0;
|
|
760
|
+
let unchanged = 0;
|
|
761
|
+
let errors = 0;
|
|
762
|
+
const currentDocKeys = new Set();
|
|
763
|
+
|
|
764
|
+
if (specificFile) {
|
|
765
|
+
// Index single file
|
|
766
|
+
const filePath = resolve(projectRoot, specificFile);
|
|
767
|
+
if (!existsSync(filePath)) {
|
|
768
|
+
log(`File not found: ${specificFile}`);
|
|
769
|
+
process.exit(1);
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
let prefix = 'docs';
|
|
773
|
+
if (specificFile.includes('.claude/guidance/')) {
|
|
774
|
+
prefix = 'guidance';
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
const result = indexFile(db, filePath, prefix);
|
|
778
|
+
log(`${result.docKey}: ${result.status} (${result.chunks} chunks)`);
|
|
779
|
+
|
|
780
|
+
if (result.status === 'indexed') {
|
|
781
|
+
docsIndexed++;
|
|
782
|
+
chunksIndexed += result.chunks;
|
|
783
|
+
} else if (result.status === 'unchanged') {
|
|
784
|
+
unchanged++;
|
|
785
|
+
} else {
|
|
786
|
+
errors++;
|
|
787
|
+
}
|
|
788
|
+
} else {
|
|
789
|
+
// Index all directories
|
|
790
|
+
for (const dir of GUIDANCE_DIRS) {
|
|
791
|
+
log(`Scanning ${dir.path}/...`);
|
|
792
|
+
const results = indexDirectory(db, dir);
|
|
793
|
+
|
|
794
|
+
for (const result of results) {
|
|
795
|
+
if (result.status === 'indexed' || result.status === 'unchanged') {
|
|
796
|
+
currentDocKeys.add(result.docKey);
|
|
797
|
+
}
|
|
798
|
+
if (result.status === 'indexed') {
|
|
799
|
+
log(` ✅ ${result.docKey} (${result.chunks} chunks)`);
|
|
800
|
+
docsIndexed++;
|
|
801
|
+
chunksIndexed += result.chunks;
|
|
802
|
+
} else if (result.status === 'unchanged') {
|
|
803
|
+
unchanged++;
|
|
804
|
+
} else {
|
|
805
|
+
log(` ❌ ${result.docKey}: ${result.error}`);
|
|
806
|
+
errors++;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// Clean stale entries for deleted files (unless indexing a specific file)
|
|
813
|
+
let staleRemoved = 0;
|
|
814
|
+
if (!specificFile) {
|
|
815
|
+
log('Cleaning stale entries for deleted files...');
|
|
816
|
+
staleRemoved = cleanStaleEntries(db, currentDocKeys);
|
|
817
|
+
if (staleRemoved === 0) {
|
|
818
|
+
log(' No stale entries found');
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// Write changes back to disk and close
|
|
823
|
+
if (docsIndexed > 0 || chunksIndexed > 0 || staleRemoved > 0) saveDb(db);
|
|
824
|
+
db.close();
|
|
825
|
+
|
|
826
|
+
console.log('');
|
|
827
|
+
log('═══════════════════════════════════════════════════════════');
|
|
828
|
+
log(' FULL RAG INDEXING COMPLETE');
|
|
829
|
+
log('═══════════════════════════════════════════════════════════');
|
|
830
|
+
log(` Documents indexed: ${docsIndexed}`);
|
|
831
|
+
log(` Chunks created: ${chunksIndexed}`);
|
|
832
|
+
log(` Unchanged: ${unchanged}`);
|
|
833
|
+
log(` Stale removed: ${staleRemoved}`);
|
|
834
|
+
log(` Errors: ${errors}`);
|
|
835
|
+
log('');
|
|
836
|
+
log(' RAG Features Enabled:');
|
|
837
|
+
log(` • Forward/backward links (prevChunk/nextChunk)`);
|
|
838
|
+
log(` • Sibling awareness (all chunks from same doc)`);
|
|
839
|
+
log(` • Hierarchical links (h2 -> h3 parent/children)`);
|
|
840
|
+
log(` • Context overlap: ${overlapPercent}% (contextBefore/contextAfter)`);
|
|
841
|
+
log('═══════════════════════════════════════════════════════════');
|
|
842
|
+
|
|
843
|
+
// Generate embeddings for new entries (unless skipped or nothing changed)
|
|
844
|
+
// Runs in BACKGROUND to avoid blocking startup
|
|
845
|
+
if (!skipEmbeddings && (docsIndexed > 0 || chunksIndexed > 0)) {
|
|
846
|
+
console.log('');
|
|
847
|
+
log('Spawning embedding generation in background...');
|
|
848
|
+
|
|
849
|
+
const { spawn } = await import('child_process');
|
|
850
|
+
|
|
851
|
+
// Look for build-embeddings script in multiple locations:
|
|
852
|
+
// 1. Shipped with moflo (node_modules/moflo/bin/)
|
|
853
|
+
// 2. Project-local (.claude/scripts/)
|
|
854
|
+
const mofloScript = resolve(__dirname, 'build-embeddings.mjs');
|
|
855
|
+
const projectLocalScript = resolve(projectRoot, '.claude/scripts/build-embeddings.mjs');
|
|
856
|
+
const embeddingScript = existsSync(mofloScript) ? mofloScript : projectLocalScript;
|
|
857
|
+
|
|
858
|
+
if (existsSync(embeddingScript)) {
|
|
859
|
+
const embeddingArgs = ['--namespace', NAMESPACE];
|
|
860
|
+
|
|
861
|
+
// Create log file for background process output
|
|
862
|
+
const logDir = resolve(projectRoot, '.swarm/logs');
|
|
863
|
+
if (!existsSync(logDir)) {
|
|
864
|
+
mkdirSync(logDir, { recursive: true });
|
|
865
|
+
}
|
|
866
|
+
const logFile = resolve(logDir, 'embeddings.log');
|
|
867
|
+
const { openSync } = await import('fs');
|
|
868
|
+
const out = openSync(logFile, 'a');
|
|
869
|
+
const err = openSync(logFile, 'a');
|
|
870
|
+
|
|
871
|
+
// Spawn in background - don't wait for completion
|
|
872
|
+
const proc = spawn('node', [embeddingScript, ...embeddingArgs], {
|
|
873
|
+
stdio: ['ignore', out, err],
|
|
874
|
+
cwd: projectRoot,
|
|
875
|
+
detached: true,
|
|
876
|
+
windowsHide: true // Suppress command windows on Windows
|
|
877
|
+
});
|
|
878
|
+
proc.unref(); // Allow parent to exit independently
|
|
879
|
+
|
|
880
|
+
log(`Background embedding started (PID: ${proc.pid})`);
|
|
881
|
+
log(`Log file: .swarm/logs/embeddings.log`);
|
|
882
|
+
} else {
|
|
883
|
+
log('⚠️ Embedding script not found, skipping embedding generation');
|
|
884
|
+
}
|
|
885
|
+
} else if (skipEmbeddings) {
|
|
886
|
+
log('Skipping embedding generation (--no-embeddings)');
|
|
887
|
+
} else {
|
|
888
|
+
log('No new content indexed, skipping embedding generation');
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
if (errors > 0) {
|
|
892
|
+
process.exit(1);
|
|
893
|
+
}
|