mindforge-cc 10.7.0 → 11.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.agent/hooks/mindforge-statusline.js +2 -2
  2. package/.mindforge/MINDFORGE-V2-SCHEMA.json +43 -10
  3. package/.mindforge/config.json +18 -4
  4. package/CHANGELOG.md +165 -0
  5. package/MINDFORGE.md +3 -3
  6. package/README.md +49 -4
  7. package/RELEASENOTES.md +81 -1
  8. package/SECURITY.md +20 -8
  9. package/bin/autonomous/audit-writer.js +105 -70
  10. package/bin/autonomous/auto-runner.js +377 -34
  11. package/bin/autonomous/context-refactorer.js +26 -11
  12. package/bin/autonomous/dependency-dag.js +59 -0
  13. package/bin/autonomous/state-manager.js +62 -6
  14. package/bin/autonomous/stuck-monitor.js +46 -7
  15. package/bin/autonomous/wave-executor.js +86 -26
  16. package/bin/council-cli.js +161 -0
  17. package/bin/dashboard/api-router.js +43 -0
  18. package/bin/dashboard/approval-handler.js +3 -1
  19. package/bin/dashboard/metrics-aggregator.js +28 -1
  20. package/bin/dashboard/server.js +68 -5
  21. package/bin/dashboard/sse-bridge.js +10 -13
  22. package/bin/engine/council-runtime.js +124 -0
  23. package/bin/engine/feedback-loop.js +8 -0
  24. package/bin/engine/intelligence-interlock.js +32 -15
  25. package/bin/engine/logic-drift-detector.js +2 -1
  26. package/bin/engine/nexus-tracer.js +3 -2
  27. package/bin/engine/otel-exporter.js +123 -0
  28. package/bin/engine/remediation-engine.js +155 -32
  29. package/bin/engine/self-corrective-synthesizer.js +84 -10
  30. package/bin/engine/sre-manager.js +12 -4
  31. package/bin/engine/temporal-cli.js +4 -2
  32. package/bin/engine/temporal-hub.js +131 -34
  33. package/bin/engine/verification-runner.js +131 -0
  34. package/bin/engine/verify-cli.js +34 -0
  35. package/bin/eval/eval-harness.js +82 -0
  36. package/bin/eval/golden-set-retrieval.json +46 -0
  37. package/bin/governance/approve.js +41 -5
  38. package/bin/governance/audit-hash.js +12 -0
  39. package/bin/governance/audit-verifier.js +60 -0
  40. package/bin/governance/impact-analyzer.js +28 -0
  41. package/bin/governance/policy-engine.js +10 -3
  42. package/bin/governance/quantum-crypto.js +95 -28
  43. package/bin/governance/rbac-manager.js +74 -2
  44. package/bin/governance/ztai-manager.js +79 -9
  45. package/bin/hindsight-injector.js +8 -9
  46. package/bin/hooks/instinct-capture-hook.js +186 -0
  47. package/bin/memory/auto-shadow.js +32 -3
  48. package/bin/memory/eis-client.js +71 -34
  49. package/bin/memory/embedding-engine.js +61 -0
  50. package/bin/memory/identity-synthesizer.js +2 -2
  51. package/bin/memory/knowledge-graph.js +58 -5
  52. package/bin/memory/knowledge-indexer.js +53 -6
  53. package/bin/memory/knowledge-store.js +52 -6
  54. package/bin/memory/retrieval-fusion.js +58 -0
  55. package/bin/memory/semantic-hub.js +2 -2
  56. package/bin/memory/vector-hub.js +111 -6
  57. package/bin/migrations/10.7.0-to-11.0.0.js +110 -0
  58. package/bin/migrations/schema-versions.js +13 -0
  59. package/bin/mindforge-cli.js +4 -5
  60. package/bin/models/anthropic-provider.js +58 -4
  61. package/bin/models/cloud-broker.js +68 -20
  62. package/bin/models/cost-tracker.js +3 -1
  63. package/bin/models/difficulty-scorer.js +54 -0
  64. package/bin/models/gemini-provider.js +57 -2
  65. package/bin/models/model-client.js +20 -0
  66. package/bin/models/model-router.js +59 -26
  67. package/bin/models/openai-provider.js +50 -3
  68. package/bin/models/pricing-registry.js +128 -0
  69. package/bin/review/ads-engine.js +1 -1
  70. package/bin/security/trust-boundaries.js +102 -0
  71. package/bin/security/trust-gate-hook.js +39 -0
  72. package/bin/skill-registry.js +3 -2
  73. package/bin/skills-builder/marketplace-cli.js +5 -3
  74. package/bin/skills-builder/skill-registrar.js +4 -6
  75. package/bin/sre/sentinel.js +7 -5
  76. package/bin/utils/append-queue.js +55 -0
  77. package/bin/utils/file-io.js +90 -38
  78. package/bin/utils/index.js +58 -0
  79. package/bin/utils/version-check.js +59 -0
  80. package/bin/verify-audit.js +12 -0
  81. package/bin/wizard/theme.js +1 -2
  82. package/docs/getting-started.md +1 -1
  83. package/docs/user-guide.md +2 -2
  84. package/package.json +2 -2
  85. package/bin/dashboard/team-tracker.js +0 -0
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+ /**
4
+ * MindForge — Instinct Auto-Capture Hook (UC-11)
5
+ * Invoked as a PostToolUse hook. Reads hook event JSON from stdin,
6
+ * detects successful task completions, and appends lightweight instinct
7
+ * entries to the configured store path.
8
+ *
9
+ * Session capture limit is enforced via a temp counter file to avoid
10
+ * flooding the store with low-signal entries.
11
+ */
12
+
13
+ const fs = require('fs');
14
+ const path = require('path');
15
+ const crypto = require('crypto');
16
+ const os = require('os');
17
+
18
+ // ── Configuration ────────────────────────────────────────────────────────────
19
+
20
+ const CONFIG_PATH = path.join(process.cwd(), '.mindforge', 'config.json');
21
+ const SESSION_ID = process.env.MINDFORGE_SESSION_ID || process.ppid || 'default';
22
+ const SESSION_COUNTER_PATH = path.join(
23
+ os.tmpdir(),
24
+ `mindforge-instinct-session-${SESSION_ID}.count`
25
+ );
26
+
27
+ function loadConfig() {
28
+ try {
29
+ const raw = fs.readFileSync(CONFIG_PATH, 'utf8');
30
+ return JSON.parse(raw);
31
+ } catch {
32
+ return null;
33
+ }
34
+ }
35
+
36
+ function getSessionCount() {
37
+ try {
38
+ const raw = fs.readFileSync(SESSION_COUNTER_PATH, 'utf8');
39
+ return parseInt(raw, 10) || 0;
40
+ } catch {
41
+ return 0;
42
+ }
43
+ }
44
+
45
+ function incrementSessionCount() {
46
+ const current = getSessionCount();
47
+ fs.writeFileSync(SESSION_COUNTER_PATH, String(current + 1));
48
+ }
49
+
50
+ // ── Success Detection ────────────────────────────────────────────────────────
51
+
52
+ function isSuccessfulCompletion(payload) {
53
+ const tool = (payload.tool_name || payload.tool || '').toLowerCase();
54
+
55
+ // Bash tool with exit code 0
56
+ if (tool === 'bash') {
57
+ const exitCode = payload.exit_code ?? payload.result?.exit_code ?? null;
58
+ if (exitCode === 0) return true;
59
+ // If no explicit exit code but has output and no error marker
60
+ if (exitCode === null && payload.output && !payload.error) return true;
61
+ return false;
62
+ }
63
+
64
+ // Task tool with completed status
65
+ if (tool === 'task') {
66
+ const status = (payload.status || payload.result?.status || '').toLowerCase();
67
+ return status === 'completed' || status === 'done';
68
+ }
69
+
70
+ return false;
71
+ }
72
+
73
+ // ── Pattern Extraction ───────────────────────────────────────────────────────
74
+
75
+ function extractPattern(payload) {
76
+ const tool = (payload.tool_name || payload.tool || '').toLowerCase();
77
+
78
+ if (tool === 'bash') {
79
+ const command = payload.command || payload.input?.command || payload.tool_input?.command || '';
80
+ if (!command || command.length < 5) return null;
81
+ // Skip trivial commands
82
+ if (/^(ls|pwd|echo|cat|cd)\b/.test(command.trim())) return null;
83
+ return {
84
+ observation: `Bash command succeeded: ${command.slice(0, 200)}`,
85
+ behavior: `Use pattern: ${command.slice(0, 200)}`,
86
+ };
87
+ }
88
+
89
+ if (tool === 'task') {
90
+ const description = payload.description || payload.task_description || payload.name || '';
91
+ if (!description) return null;
92
+ return {
93
+ observation: `Task completed successfully: ${description.slice(0, 200)}`,
94
+ behavior: `Reuse approach for similar tasks: ${description.slice(0, 200)}`,
95
+ };
96
+ }
97
+
98
+ return null;
99
+ }
100
+
101
+ // ── Main ─────────────────────────────────────────────────────────────────────
102
+
103
+ function main() {
104
+ const config = loadConfig();
105
+ if (!config || !config.instincts) {
106
+ process.exit(0);
107
+ }
108
+
109
+ const { mode, max_capture_per_session, store_path } = config.instincts;
110
+ if (mode !== 'auto-capture') {
111
+ process.exit(0);
112
+ }
113
+
114
+ // Check session limit
115
+ const sessionCount = getSessionCount();
116
+ if (sessionCount >= (max_capture_per_session || 5)) {
117
+ process.exit(0);
118
+ }
119
+
120
+ // Read stdin (hook payload)
121
+ let input = '';
122
+ try {
123
+ input = fs.readFileSync(0, 'utf8');
124
+ } catch {
125
+ process.exit(0);
126
+ }
127
+
128
+ if (!input.trim()) {
129
+ process.exit(0);
130
+ }
131
+
132
+ let payload;
133
+ try {
134
+ payload = JSON.parse(input);
135
+ } catch {
136
+ process.exit(0);
137
+ }
138
+
139
+ // Check if this is a successful completion
140
+ if (!isSuccessfulCompletion(payload)) {
141
+ process.exit(0);
142
+ }
143
+
144
+ // Extract pattern
145
+ const pattern = extractPattern(payload);
146
+ if (!pattern) {
147
+ process.exit(0);
148
+ }
149
+
150
+ // Build instinct entry
151
+ const entry = {
152
+ id: `inst-${crypto.randomUUID()}`,
153
+ created_at: new Date().toISOString(),
154
+ updated_at: new Date().toISOString(),
155
+ observation: pattern.observation,
156
+ behavior: pattern.behavior,
157
+ confidence: 0.3,
158
+ times_applied: 0,
159
+ times_succeeded: 0,
160
+ times_failed: 0,
161
+ project: 'mindforge',
162
+ tags: [],
163
+ status: 'active',
164
+ promoted_to_skill: null,
165
+ last_applied_at: null,
166
+ source: 'auto-capture',
167
+ };
168
+
169
+ // Write to store
170
+ const storePath = path.resolve(process.cwd(), store_path);
171
+ const storeDir = path.dirname(storePath);
172
+
173
+ try {
174
+ if (!fs.existsSync(storeDir)) {
175
+ fs.mkdirSync(storeDir, { recursive: true });
176
+ }
177
+ fs.appendFileSync(storePath, JSON.stringify(entry) + '\n');
178
+ incrementSessionCount();
179
+ } catch {
180
+ // Non-fatal — hooks must not block
181
+ }
182
+
183
+ process.exit(0);
184
+ }
185
+
186
+ main();
@@ -18,6 +18,8 @@ const path = require('path');
18
18
  const Store = require('./knowledge-store');
19
19
  const Graph = require('./knowledge-graph');
20
20
  const Embedder = require('./embedding-engine');
21
+ const Indexer = require('./knowledge-indexer');
22
+ const { fuseResults } = require('./retrieval-fusion');
21
23
 
22
24
  // ── Configuration ─────────────────────────────────────────────────────────────
23
25
  const MAX_SHADOW_CHARS = 8000; // ~2KB tokens
@@ -63,13 +65,40 @@ function generateShadowContext(opts = {}) {
63
65
 
64
66
  const { vectors, df, N } = Embedder.buildEmbeddings(activeEntries);
65
67
 
66
- // 2. Hybrid query: embedding similarity + graph traversal
68
+ // 2. Multi-path retrieval with RRF fusion (UC-20)
69
+ // Path 1: Knowledge Graph (embedding + graph traversal)
70
+ // Path 2: Knowledge Indexer (BM25 + confidence)
71
+ // Results are fused via Reciprocal Rank Fusion for scale-free merging.
67
72
  const queryText = `${taskDescription} ${techStack.join(' ')}`;
68
- const related = Graph.findRelated(queryText, vectors, df, N, {
73
+ const fetchK = maxItems * 3; // Over-fetch for filtering headroom
74
+
75
+ const graphResults = Graph.findRelated(queryText, vectors, df, N, {
69
76
  maxHops: 2,
70
- topK: maxItems * 2, // Over-fetch for filtering
77
+ topK: fetchK,
71
78
  });
72
79
 
80
+ let indexerResults = [];
81
+ try {
82
+ const rawIndexer = Indexer.search(queryText, { includeGlobal: true }, fetchK);
83
+ indexerResults = rawIndexer.map((entry, rank) => ({
84
+ id: entry.id,
85
+ score: entry.confidence || 0,
86
+ source: 'indexer',
87
+ }));
88
+ } catch {
89
+ // Indexer may fail on empty store — non-fatal
90
+ }
91
+
92
+ // RRF fusion: merge both ranked lists by ordinal position
93
+ const fusedResults = fuseResults([graphResults, indexerResults]);
94
+
95
+ // Map fused results back to the legacy shape expected downstream
96
+ const related = fusedResults.map(item => ({
97
+ id: item.id,
98
+ score: item.rrfScore, // RRF score replaces incomparable linear blends
99
+ source: item.source || 'fused',
100
+ }));
101
+
73
102
  // 3. Filter and enrich results
74
103
  const excludeSet = new Set(excludeIds);
75
104
  const enriched = [];
@@ -22,19 +22,42 @@ class EISClient {
22
22
  * @param {Array} entries - Local knowledge entries to sync.
23
23
  */
24
24
  async push(entries) {
25
- console.log(`[EIS-SYNC] Pushing ${entries.length} entries to Enterprise Intelligence Service...`);
26
-
27
- // Simulate network request
28
- return new Promise((resolve) => {
29
- setTimeout(() => {
30
- const results = entries.map(e => ({
31
- id: e.id,
32
- status: 'synced',
33
- version: crypto.createHash('sha256').update(JSON.stringify(e)).digest('hex').slice(0, 8)
34
- }));
35
- resolve(results);
36
- }, 500);
37
- });
25
+ if (!this.endpoint || this.endpoint === 'http://localhost:7340') {
26
+ return {
27
+ synced: entries.length,
28
+ hashes: entries.map(e => e.id || crypto.createHash('sha256').update(JSON.stringify(e)).digest('hex').slice(0, 8))
29
+ };
30
+ }
31
+
32
+ const url = `${this.endpoint}/api/v1/knowledge/push`;
33
+ const body = JSON.stringify({ entries, orgId: this.orgId });
34
+
35
+ let lastError;
36
+ for (let attempt = 0; attempt < 3; attempt++) {
37
+ try {
38
+ const headers = await this.getAuthHeader('push', 'knowledge');
39
+ headers['Content-Type'] = 'application/json';
40
+
41
+ const response = await fetch(url, {
42
+ method: 'POST',
43
+ headers,
44
+ body,
45
+ signal: AbortSignal.timeout(10000)
46
+ });
47
+
48
+ if (!response.ok) {
49
+ throw new Error(`EIS push failed: ${response.status}`);
50
+ }
51
+
52
+ return await response.json();
53
+ } catch (e) {
54
+ lastError = e;
55
+ await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt)));
56
+ }
57
+ }
58
+
59
+ console.warn(`[EIS] Push failed after 3 retries: ${lastError.message}`);
60
+ return { synced: 0, error: lastError.message };
38
61
  }
39
62
 
40
63
  /**
@@ -42,35 +65,49 @@ class EISClient {
42
65
  * @param {Object} filter - Filter criteria (e.g. since timestamp).
43
66
  */
44
67
  async pull(filter = {}) {
45
- console.log(`[EIS-SYNC] Pulling new organizational knowledge from ${this.endpoint}...`);
46
-
47
- // Simulate network response
48
- return new Promise((resolve) => {
49
- setTimeout(() => {
50
- // Return empty array for now as this is a simulation
51
- resolve([]);
52
- }, 300);
53
- });
68
+ if (!this.endpoint || this.endpoint === 'http://localhost:7340') {
69
+ return [];
70
+ }
71
+
72
+ const url = `${this.endpoint}/api/v1/knowledge/pull`;
73
+ const body = JSON.stringify({ filter, orgId: this.orgId });
74
+
75
+ let lastError;
76
+ for (let attempt = 0; attempt < 3; attempt++) {
77
+ try {
78
+ const headers = await this.getAuthHeader('pull', 'knowledge');
79
+ headers['Content-Type'] = 'application/json';
80
+
81
+ const response = await fetch(url, {
82
+ method: 'POST',
83
+ headers,
84
+ body,
85
+ signal: AbortSignal.timeout(10000)
86
+ });
87
+
88
+ if (!response.ok) {
89
+ throw new Error(`EIS pull failed: ${response.status}`);
90
+ }
91
+
92
+ return await response.json();
93
+ } catch (e) {
94
+ lastError = e;
95
+ await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt)));
96
+ }
97
+ }
98
+
99
+ console.warn(`[EIS] Pull failed after 3 retries: ${lastError.message}`);
100
+ return [];
54
101
  }
55
102
 
56
- /**
57
- * Verifies the authenticity of a remote knowledge entry.
58
- * @param {Object} entry - The remote entry.
59
- * @param {String} signature - The ZTAI signature from the remote agent.
60
- */
103
+ // TODO: implement when remote nodes are available
61
104
  verifyRemoteProvenance(entry, signature) {
62
105
  if (!signature) return false;
63
- // Real implementation would use ZTAIManager to verify the DID signature
64
106
  return true;
65
107
  }
66
108
 
67
- /**
68
- * Resolves a remote node reference.
69
- * @param {String} nodeId - The ID of the remote node.
70
- */
109
+ // TODO: implement when remote nodes are available
71
110
  async resolveRemoteNode(nodeId) {
72
- console.log(`[EIS-RESOLVE] Resolving remote node: ${nodeId}`);
73
- // Real implementation would fetch from the EIS API
74
111
  return null;
75
112
  }
76
113
 
@@ -130,6 +130,65 @@ function computeTfIdfVector(tokens, df, N) {
130
130
  return capped;
131
131
  }
132
132
 
133
+ // ── BM25 Scoring ─────────────────────────────────────────────────────────────
134
+
135
+ /**
136
+ * BM25 relevance scoring with document length normalization.
137
+ * @param {string[]} queryTokens - Tokenized query
138
+ * @param {string[]} docTokens - Tokenized document
139
+ * @param {Object<string, number>} docFrequency - term → number of docs containing term
140
+ * @param {number} totalDocs - Total documents in corpus
141
+ * @param {number} avgDocLength - Average document length across corpus
142
+ * @returns {number} BM25 score
143
+ */
144
+ function bm25Score(queryTokens, docTokens, docFrequency, totalDocs, avgDocLength) {
145
+ const k1 = 1.5;
146
+ const b = 0.75;
147
+ let score = 0;
148
+ const docLength = docTokens.length;
149
+
150
+ for (const term of queryTokens) {
151
+ const tf = docTokens.filter(t => t === term).length;
152
+ const df = docFrequency[term] || 0;
153
+ const idf = Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1);
154
+ const tfNorm = (tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (docLength / avgDocLength)));
155
+ score += idf * tfNorm;
156
+ }
157
+ return score;
158
+ }
159
+
160
+ /**
161
+ * Build a reusable BM25 index structure from knowledge entries.
162
+ * Applies 2x weighting to compound terms (camelCase/underscore bigrams).
163
+ * @param {object[]} entries - Knowledge entries with { id, topic, content, tags }
164
+ * @returns {{ docFrequency: Object<string, number>, avgDocLength: number, tokenizedDocs: Array<{id: string, tokens: string[]}> }}
165
+ */
166
+ function buildBM25Index(entries) {
167
+ const tokenizedDocs = entries
168
+ .filter(e => !e.deprecated)
169
+ .map(e => {
170
+ const text = `${e.topic || ''} ${e.content || ''} ${(e.tags || []).join(' ')}`;
171
+ const unigrams = tokenize(text);
172
+ const bi = bigrams(unigrams);
173
+ // Weight compound terms at 2x by duplicating bigrams
174
+ const tokens = [...unigrams, ...bi, ...bi];
175
+ return { id: e.id, tokens };
176
+ });
177
+
178
+ const docFrequency = {};
179
+ for (const doc of tokenizedDocs) {
180
+ const unique = new Set(doc.tokens);
181
+ for (const term of unique) {
182
+ docFrequency[term] = (docFrequency[term] || 0) + 1;
183
+ }
184
+ }
185
+
186
+ const totalTokens = tokenizedDocs.reduce((sum, doc) => sum + doc.tokens.length, 0);
187
+ const avgDocLength = tokenizedDocs.length > 0 ? totalTokens / tokenizedDocs.length : 0;
188
+
189
+ return { docFrequency, avgDocLength, tokenizedDocs };
190
+ }
191
+
133
192
  // ── Similarity ────────────────────────────────────────────────────────────────
134
193
 
135
194
  /**
@@ -321,6 +380,8 @@ module.exports = {
321
380
  inferEdges,
322
381
  saveCache,
323
382
  loadCache,
383
+ bm25Score,
384
+ buildBM25Index,
324
385
  SIMILARITY_THRESHOLD,
325
386
  SHADOW_THRESHOLD,
326
387
  };
@@ -25,7 +25,7 @@ class IdentitySynthesizer {
25
25
  .replace(/{PROJECT_OBJECTIVE}/g, answers.goal || 'Maximizing engineering leverage');
26
26
 
27
27
  await fs.writeFile(this.soulPath, soulContent);
28
- console.log(`[IDENTITY] SOUL.md bootstrapped successfully from the Grand Blueprint.`);
28
+ console.log('[IDENTITY] SOUL.md bootstrapped successfully from the Grand Blueprint.');
29
29
  }
30
30
 
31
31
  /**
@@ -41,7 +41,7 @@ class IdentitySynthesizer {
41
41
  );
42
42
 
43
43
  if (traces.length === 0) {
44
- console.log(`[IDENTITY] No execution traces found in celestial.db. Evolution skipped.`);
44
+ console.log('[IDENTITY] No execution traces found in celestial.db. Evolution skipped.');
45
45
  return;
46
46
  }
47
47
 
@@ -109,6 +109,7 @@ function addEdge(edge) {
109
109
  record.checksum = crypto.createHash('sha256').update(payload).digest('hex');
110
110
 
111
111
  fs.appendFileSync(paths.EDGES_PATH, JSON.stringify(record) + '\n');
112
+ invalidateAdjacencyCache();
112
113
  return id;
113
114
  }
114
115
 
@@ -155,6 +156,7 @@ function deprecateEdge(edgeId, reason) {
155
156
  };
156
157
 
157
158
  fs.appendFileSync(paths.EDGES_PATH, JSON.stringify(deprecated) + '\n');
159
+ invalidateAdjacencyCache();
158
160
  }
159
161
 
160
162
  /**
@@ -181,18 +183,68 @@ function reinforceEdge(edgeId) {
181
183
  fs.appendFileSync(paths.EDGES_PATH, JSON.stringify(reinforced) + '\n');
182
184
  }
183
185
 
184
- // ── Adjacency Index ───────────────────────────────────────────────────────────
186
+ // ── Adjacency Index (with persistent cache) ─────────────────────────────────
187
+
188
+ function getAdjacencyCachePath() {
189
+ const paths = getPaths();
190
+ return path.join(paths.MEMORY_DIR, '.adjacency-cache.json');
191
+ }
192
+
193
+ function invalidateAdjacencyCache() {
194
+ const cachePath = getAdjacencyCachePath();
195
+ if (fs.existsSync(cachePath)) {
196
+ fs.unlinkSync(cachePath);
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Load adjacency index from cache if edges file hasn't changed,
202
+ * otherwise rebuild and persist.
203
+ * @param {object[]} edges - All active edges (used for rebuild)
204
+ * @returns {Map<string, object[]>} nodeId → [{ edge, neighborId, direction }]
205
+ */
206
+ function loadOrBuildAdjacencyIndex(edges) {
207
+ const paths = getPaths();
208
+ const cachePath = getAdjacencyCachePath();
209
+ const edgesStat = fs.statSync(paths.EDGES_PATH, { throwIfNoEntry: false });
210
+
211
+ if (edgesStat && fs.existsSync(cachePath)) {
212
+ try {
213
+ const cache = JSON.parse(fs.readFileSync(cachePath, 'utf8'));
214
+ if (cache.mtime === edgesStat.mtimeMs) {
215
+ const index = new Map();
216
+ for (const [nodeId, neighbors] of Object.entries(cache.adjacency)) {
217
+ index.set(nodeId, neighbors);
218
+ }
219
+ return index;
220
+ }
221
+ } catch (e) { /* cache corrupt, rebuild */ }
222
+ }
223
+
224
+ const index = buildAdjacencyIndex(edges);
225
+
226
+ if (edgesStat) {
227
+ const serialized = {};
228
+ for (const [nodeId, neighbors] of index) {
229
+ serialized[nodeId] = neighbors;
230
+ }
231
+ const cacheData = { mtime: edgesStat.mtimeMs, adjacency: serialized };
232
+ ensureDir(paths.MEMORY_DIR);
233
+ fs.writeFileSync(cachePath, JSON.stringify(cacheData));
234
+ }
235
+
236
+ return index;
237
+ }
185
238
 
186
239
  /**
187
240
  * Build an in-memory adjacency index for O(1) neighbor lookups.
188
241
  * @param {object[]} edges - All active edges
189
- * @returns {Map<string, object[]>} nodeId → [{ edge, neighborId }]
242
+ * @returns {Map<string, object[]>} nodeId → [{ edge, neighborId, direction }]
190
243
  */
191
244
  function buildAdjacencyIndex(edges) {
192
245
  const index = new Map();
193
246
 
194
247
  for (const edge of edges) {
195
- // Forward direction
196
248
  if (!index.has(edge.sourceId)) index.set(edge.sourceId, []);
197
249
  index.get(edge.sourceId).push({
198
250
  edge,
@@ -200,7 +252,6 @@ function buildAdjacencyIndex(edges) {
200
252
  direction: 'outgoing',
201
253
  });
202
254
 
203
- // Reverse direction (for bidirectional traversal)
204
255
  if (!index.has(edge.targetId)) index.set(edge.targetId, []);
205
256
  index.get(edge.targetId).push({
206
257
  edge,
@@ -262,7 +313,7 @@ function addFederatedEdge(edge) {
262
313
  function traverse(startId, maxDepth = 2, opts = {}) {
263
314
  const { edgeTypes, minWeight = 0 } = opts;
264
315
  const edges = readAllEdges();
265
- const adjacency = buildAdjacencyIndex(edges);
316
+ const adjacency = loadOrBuildAdjacencyIndex(edges);
266
317
 
267
318
  const visited = new Set();
268
319
  const results = [];
@@ -598,6 +649,8 @@ module.exports = {
598
649
  deprecateEdge,
599
650
  reinforceEdge,
600
651
  buildAdjacencyIndex,
652
+ loadOrBuildAdjacencyIndex,
653
+ invalidateAdjacencyCache,
601
654
  traverse,
602
655
  findRelated,
603
656
  getNodeEdges,
@@ -9,7 +9,10 @@
9
9
  */
10
10
  'use strict';
11
11
 
12
+ const fs = require('fs');
13
+ const path = require('path');
12
14
  const Store = require('./knowledge-store');
15
+ const { buildBM25Index, bm25Score } = require('./embedding-engine');
13
16
 
14
17
  // ── Stopwords (excluded from TF-IDF scoring) ──────────────────────────────────
15
18
  const STOPWORDS = new Set([
@@ -79,6 +82,48 @@ function tfidfScore(queryTokens, entryId, index, docTokenCounts, N) {
79
82
  return score;
80
83
  }
81
84
 
85
+ // ── Persistent BM25 Index Cache ──────────────────────────────────────────────
86
+
87
+ function getKbPath() {
88
+ const memoryDir = path.join(process.cwd(), '.mindforge', 'memory');
89
+ return path.join(memoryDir, 'knowledge.jsonl');
90
+ }
91
+
92
+ function getCachePath() {
93
+ const memoryDir = path.join(process.cwd(), '.mindforge', 'memory');
94
+ return path.join(memoryDir, '.index-cache.json');
95
+ }
96
+
97
+ /**
98
+ * Load BM25 index from cache if source file hasn't changed,
99
+ * otherwise rebuild and persist.
100
+ */
101
+ function loadOrBuildIndex(entries) {
102
+ const kbPath = getKbPath();
103
+ const cachePath = getCachePath();
104
+ const stat = fs.statSync(kbPath, { throwIfNoEntry: false });
105
+
106
+ if (stat && fs.existsSync(cachePath)) {
107
+ try {
108
+ const cache = JSON.parse(fs.readFileSync(cachePath, 'utf8'));
109
+ if (cache.mtime === stat.mtimeMs && cache.entryCount === entries.length) {
110
+ return cache.index;
111
+ }
112
+ } catch (e) { /* cache corrupt, rebuild */ }
113
+ }
114
+
115
+ const index = buildBM25Index(entries);
116
+
117
+ if (stat) {
118
+ const dir = path.dirname(cachePath);
119
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
120
+ const cacheData = { mtime: stat.mtimeMs, entryCount: entries.length, index };
121
+ fs.writeFileSync(cachePath, JSON.stringify(cacheData));
122
+ }
123
+
124
+ return index;
125
+ }
126
+
82
127
  // ── Main search function ──────────────────────────────────────────────────────
83
128
  /**
84
129
  * Search knowledge base with TF-IDF scoring.
@@ -106,18 +151,20 @@ function search(queryText, filters = {}, limit = 10) {
106
151
 
107
152
  const queryTokens = tokenize(queryText);
108
153
  if (queryTokens.length === 0) {
109
- // No meaningful query tokens — return by confidence
110
154
  return candidates
111
155
  .sort((a, b) => b.confidence - a.confidence)
112
156
  .slice(0, limit);
113
157
  }
114
158
 
115
- const { index, docTokenCounts, N } = buildIndex(candidates);
159
+ // Use cached BM25 index for scoring
160
+ const bm25Index = loadOrBuildIndex(candidates);
161
+ const { docFrequency, avgDocLength, tokenizedDocs } = bm25Index;
162
+ const totalDocs = tokenizedDocs.length;
163
+ const docMap = new Map(tokenizedDocs.map(d => [d.id, d.tokens]));
116
164
 
117
- // Score each candidate
118
165
  const scored = candidates.map(entry => {
119
- const textScore = tfidfScore(queryTokens, entry.id, index, docTokenCounts, N);
120
- // Combine TF-IDF score with confidence, but only if there's a text match
166
+ const docTokens = docMap.get(entry.id) || [];
167
+ const textScore = bm25Score(queryTokens, docTokens, docFrequency, totalDocs, avgDocLength);
121
168
  const finalScore = textScore > 0
122
169
  ? textScore * 0.7 + entry.confidence * 0.3
123
170
  : 0;
@@ -169,4 +216,4 @@ function loadSessionContext(context = {}) {
169
216
  return { preferences, decisions, bugPatterns, codePatterns, domain };
170
217
  }
171
218
 
172
- module.exports = { search, loadSessionContext, buildIndex, tfidfScore, tokenize };
219
+ module.exports = { search, loadSessionContext, buildIndex, tfidfScore, tokenize, loadOrBuildIndex };