openclaw-node-harness 2.0.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/README.md +646 -3
  2. package/bin/hyperagent.mjs +419 -0
  3. package/bin/mesh-agent.js +401 -12
  4. package/bin/mesh-bridge.js +66 -1
  5. package/bin/mesh-task-daemon.js +816 -26
  6. package/bin/mesh.js +403 -1
  7. package/config/claude-settings.json +95 -0
  8. package/config/daemon.json.template +2 -1
  9. package/config/git-hooks/pre-commit +13 -0
  10. package/config/git-hooks/pre-push +12 -0
  11. package/config/harness-rules.json +174 -0
  12. package/config/plan-templates/team-bugfix.yaml +52 -0
  13. package/config/plan-templates/team-deploy.yaml +50 -0
  14. package/config/plan-templates/team-feature.yaml +71 -0
  15. package/config/roles/qa-engineer.yaml +36 -0
  16. package/config/roles/solidity-dev.yaml +51 -0
  17. package/config/roles/tech-architect.yaml +36 -0
  18. package/config/rules/framework/solidity.md +22 -0
  19. package/config/rules/framework/typescript.md +21 -0
  20. package/config/rules/framework/unity.md +21 -0
  21. package/config/rules/universal/design-docs.md +18 -0
  22. package/config/rules/universal/git-hygiene.md +18 -0
  23. package/config/rules/universal/security.md +19 -0
  24. package/config/rules/universal/test-standards.md +19 -0
  25. package/identity/DELEGATION.md +6 -6
  26. package/install.sh +293 -8
  27. package/lib/circling-parser.js +119 -0
  28. package/lib/hyperagent-store.mjs +652 -0
  29. package/lib/kanban-io.js +9 -0
  30. package/lib/mcp-knowledge/bench.mjs +118 -0
  31. package/lib/mcp-knowledge/core.mjs +528 -0
  32. package/lib/mcp-knowledge/package.json +25 -0
  33. package/lib/mcp-knowledge/server.mjs +245 -0
  34. package/lib/mcp-knowledge/test.mjs +802 -0
  35. package/lib/memory-budget.mjs +261 -0
  36. package/lib/mesh-collab.js +301 -1
  37. package/lib/mesh-harness.js +427 -0
  38. package/lib/mesh-plans.js +13 -5
  39. package/lib/mesh-tasks.js +67 -0
  40. package/lib/plan-templates.js +226 -0
  41. package/lib/pre-compression-flush.mjs +320 -0
  42. package/lib/role-loader.js +292 -0
  43. package/lib/rule-loader.js +358 -0
  44. package/lib/session-store.mjs +458 -0
  45. package/lib/transcript-parser.mjs +292 -0
  46. package/mission-control/drizzle/soul_schema_update.sql +29 -0
  47. package/mission-control/drizzle.config.ts +1 -4
  48. package/mission-control/package-lock.json +1571 -83
  49. package/mission-control/package.json +6 -2
  50. package/mission-control/scripts/gen-chronology.js +3 -3
  51. package/mission-control/scripts/import-pipeline-v2.js +0 -16
  52. package/mission-control/scripts/import-pipeline.js +0 -15
  53. package/mission-control/src/app/api/cowork/clusters/[id]/members/route.ts +117 -0
  54. package/mission-control/src/app/api/cowork/clusters/[id]/route.ts +84 -0
  55. package/mission-control/src/app/api/cowork/clusters/route.ts +141 -0
  56. package/mission-control/src/app/api/cowork/dispatch/route.ts +128 -0
  57. package/mission-control/src/app/api/cowork/events/route.ts +65 -0
  58. package/mission-control/src/app/api/cowork/intervene/route.ts +259 -0
  59. package/mission-control/src/app/api/cowork/sessions/[id]/route.ts +37 -0
  60. package/mission-control/src/app/api/cowork/sessions/route.ts +64 -0
  61. package/mission-control/src/app/api/diagnostics/route.ts +97 -0
  62. package/mission-control/src/app/api/diagnostics/test-runner/route.ts +990 -0
  63. package/mission-control/src/app/api/mesh/events/route.ts +95 -19
  64. package/mission-control/src/app/api/mesh/identity/route.ts +11 -0
  65. package/mission-control/src/app/api/mesh/tasks/[id]/route.ts +92 -0
  66. package/mission-control/src/app/api/mesh/tasks/route.ts +91 -0
  67. package/mission-control/src/app/api/tasks/[id]/handoff/route.ts +1 -1
  68. package/mission-control/src/app/api/tasks/[id]/route.ts +90 -4
  69. package/mission-control/src/app/api/tasks/route.ts +21 -30
  70. package/mission-control/src/app/cowork/page.tsx +261 -0
  71. package/mission-control/src/app/diagnostics/page.tsx +385 -0
  72. package/mission-control/src/app/graph/page.tsx +26 -0
  73. package/mission-control/src/app/memory/page.tsx +1 -1
  74. package/mission-control/src/app/obsidian/page.tsx +36 -6
  75. package/mission-control/src/app/roadmap/page.tsx +24 -0
  76. package/mission-control/src/app/souls/page.tsx +2 -2
  77. package/mission-control/src/components/board/execution-config.tsx +431 -0
  78. package/mission-control/src/components/board/kanban-board.tsx +75 -9
  79. package/mission-control/src/components/board/kanban-column.tsx +135 -19
  80. package/mission-control/src/components/board/task-card.tsx +55 -2
  81. package/mission-control/src/components/board/unified-task-dialog.tsx +82 -4
  82. package/mission-control/src/components/cowork/cluster-card.tsx +176 -0
  83. package/mission-control/src/components/cowork/create-cluster-dialog.tsx +251 -0
  84. package/mission-control/src/components/cowork/dispatch-form.tsx +423 -0
  85. package/mission-control/src/components/cowork/role-picker.tsx +102 -0
  86. package/mission-control/src/components/cowork/session-card.tsx +284 -0
  87. package/mission-control/src/components/layout/sidebar.tsx +39 -2
  88. package/mission-control/src/lib/__tests__/daily-log.test.ts +82 -0
  89. package/mission-control/src/lib/__tests__/memory-md.test.ts +87 -0
  90. package/mission-control/src/lib/__tests__/mesh-kv-sync.test.ts +465 -0
  91. package/mission-control/src/lib/__tests__/mocks/mock-kv.ts +131 -0
  92. package/mission-control/src/lib/__tests__/status-kanban.test.ts +46 -0
  93. package/mission-control/src/lib/__tests__/task-markdown.test.ts +188 -0
  94. package/mission-control/src/lib/__tests__/wikilinks.test.ts +175 -0
  95. package/mission-control/src/lib/config.ts +58 -0
  96. package/mission-control/src/lib/db/index.ts +69 -0
  97. package/mission-control/src/lib/db/schema.ts +61 -3
  98. package/mission-control/src/lib/hooks.ts +309 -0
  99. package/mission-control/src/lib/memory/entities.ts +3 -2
  100. package/mission-control/src/lib/nats.ts +66 -1
  101. package/mission-control/src/lib/parsers/task-markdown.ts +52 -2
  102. package/mission-control/src/lib/parsers/transcript.ts +4 -4
  103. package/mission-control/src/lib/scheduler.ts +12 -11
  104. package/mission-control/src/lib/sync/mesh-kv.ts +279 -0
  105. package/mission-control/src/lib/sync/tasks.ts +23 -1
  106. package/mission-control/src/lib/task-id.ts +32 -0
  107. package/mission-control/src/lib/tts/index.ts +33 -9
  108. package/mission-control/tsconfig.json +2 -1
  109. package/mission-control/vitest.config.ts +14 -0
  110. package/package.json +15 -2
  111. package/services/service-manifest.json +1 -1
  112. package/skills/cc-godmode/references/agents.md +8 -8
  113. package/workspace-bin/memory-daemon.mjs +199 -5
  114. package/workspace-bin/session-search.mjs +204 -0
  115. package/workspace-bin/web-fetch.mjs +65 -0
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Benchmark: index real workspace files and run search queries
4
+ */
5
+ import { pipeline } from '@huggingface/transformers';
6
+ import { statSync, rmSync } from 'node:fs';
7
+
8
+ import {
9
+ scanMarkdownFiles,
10
+ chunkMarkdown,
11
+ hashContent,
12
+ initDatabase,
13
+ EMBEDDING_DIM,
14
+ MAX_CHUNK_CHARS,
15
+ INCLUDE_DIRS,
16
+ EXCLUDE_PATTERNS,
17
+ MODEL_NAME,
18
+ } from './core.mjs';
19
+ import { readFileSync } from 'node:fs';
20
+ import { createHash } from 'node:crypto';
21
+
22
+ const WORKSPACE = process.env.KNOWLEDGE_ROOT || '/Users/moltymac/.openclaw/workspace';
23
+ const DB_PATH = '/tmp/mcp-knowledge-bench.db';
24
+
25
+ // Clean previous run
26
+ try { rmSync(DB_PATH); } catch {}
27
+
28
+ // Init DB
29
+ const db = initDatabase(DB_PATH);
30
+
31
+ const files = scanMarkdownFiles(WORKSPACE, INCLUDE_DIRS, EXCLUDE_PATTERNS);
32
+ console.log(`Scanning ${files.length} files...`);
33
+
34
+ console.log('Loading embedding model...');
35
+ const extractor = await pipeline('feature-extraction', MODEL_NAME, { dtype: 'fp32' });
36
+ console.log('Model loaded.\n');
37
+
38
+ const insertDoc = db.prepare('INSERT INTO documents (path, content_hash, last_indexed, chunk_count) VALUES (?, ?, ?, ?)');
39
+ const insertChunk = db.prepare('INSERT INTO chunks (doc_path, section, text, snippet) VALUES (?, ?, ?, ?)');
40
+
41
+ let totalChunks = 0;
42
+ let totalFiles = 0;
43
+ const start = Date.now();
44
+
45
+ for (const file of files) {
46
+ let content;
47
+ try { content = readFileSync(file.path, 'utf-8'); } catch { continue; }
48
+ if (!content.trim()) continue;
49
+
50
+ const hash = hashContent(content);
51
+ const chunks = chunkMarkdown(content);
52
+ const vectors = [];
53
+ for (const c of chunks) {
54
+ const r = await extractor(c.text, { pooling: 'mean', normalize: true, truncation: true, max_length: 256 });
55
+ vectors.push(new Float32Array(r.data));
56
+ }
57
+
58
+ db.transaction(() => {
59
+ insertDoc.run(file.rel, hash, Date.now(), chunks.length);
60
+ for (let i = 0; i < chunks.length; i++) {
61
+ const snippet = chunks[i].text.slice(0, 250).replace(/\n/g, ' ');
62
+ const info = insertChunk.run(file.rel, chunks[i].section, chunks[i].text, snippet);
63
+ const vecBuf = Buffer.from(vectors[i].buffer);
64
+ db.prepare(`INSERT INTO chunk_vectors VALUES (${info.lastInsertRowid}, ?)`).run(vecBuf);
65
+ }
66
+ })();
67
+ totalChunks += chunks.length;
68
+ totalFiles++;
69
+ if (totalFiles % 50 === 0) console.log(` ${totalFiles}/${files.length} files, ${totalChunks} chunks...`);
70
+ }
71
+
72
+ const elapsed = ((Date.now() - start) / 1000).toFixed(1);
73
+ console.log(`\nIndexed: ${totalFiles} files → ${totalChunks} chunks in ${elapsed}s`);
74
+
75
+ // Search queries
76
+ console.log('\n═══ Semantic Search Tests ═══');
77
+ async function search(query) {
78
+ const t0 = Date.now();
79
+ const r = await extractor(query, { pooling: 'mean', normalize: true, truncation: true, max_length: 256 });
80
+ const buf = Buffer.from(new Float32Array(r.data).buffer);
81
+ const results = db.prepare(`
82
+ SELECT cv.distance, c.doc_path, c.section, c.snippet
83
+ FROM chunk_vectors cv JOIN chunks c ON c.id = cv.rowid
84
+ WHERE embedding MATCH ? AND k = 5 ORDER BY distance
85
+ `).all(buf);
86
+ const ms = Date.now() - t0;
87
+ return { results, ms };
88
+ }
89
+
90
+ const queries = [
91
+ 'oracle threat model GPS spoofing',
92
+ 'faction lore verdant pact nature magic',
93
+ 'smart contract mana well architecture',
94
+ 'Daedalus identity soul companion',
95
+ 'what is the mana harvesting mechanism',
96
+ 'biome oracle location verification',
97
+ ];
98
+
99
+ for (const q of queries) {
100
+ const { results, ms } = await search(q);
101
+ console.log(`\nQuery: "${q}" (${ms}ms)`);
102
+ for (const r of results.slice(0, 3)) {
103
+ const score = (1 - r.distance * r.distance / 2).toFixed(3);
104
+ console.log(` [${score}] ${r.doc_path} → ${r.section.slice(0, 60)}`);
105
+ }
106
+ }
107
+
108
+ // Stats
109
+ const { size } = statSync(DB_PATH);
110
+ console.log(`\n═══ Stats ═══`);
111
+ console.log(`DB size: ${(size / 1024 / 1024).toFixed(1)} MB`);
112
+ console.log(`Files: ${totalFiles}`);
113
+ console.log(`Chunks: ${totalChunks}`);
114
+ console.log(`Index time: ${elapsed}s`);
115
+ console.log(`Avg per file: ${(parseFloat(elapsed) * 1000 / totalFiles).toFixed(0)}ms`);
116
+
117
+ db.close();
118
+ setTimeout(() => process.exit(0), 200);
@@ -0,0 +1,528 @@
1
+ /**
2
+ * @openclaw/mcp-knowledge — core logic
3
+ *
4
+ * Scanner, chunker, embedder, indexer, search.
5
+ * Transport-agnostic: imported by server.mjs, test.mjs, bench.mjs.
6
+ */
7
+
8
+ import Database from 'better-sqlite3';
9
+ import * as sqliteVec from 'sqlite-vec';
10
+ import { pipeline } from '@huggingface/transformers';
11
+ import { createHash } from 'node:crypto';
12
+ import { readFileSync, readdirSync, statSync, existsSync } from 'node:fs';
13
+ import { join, relative, extname } from 'node:path';
14
+
15
+ // ─── Configuration ───────────────────────────────────────────────────────────
16
+
17
+ export const WORKSPACE = process.env.KNOWLEDGE_ROOT || process.cwd();
18
+ export const DB_PATH = process.env.KNOWLEDGE_DB || join(WORKSPACE, '.knowledge.db');
19
+ export const MODEL_NAME = process.env.KNOWLEDGE_MODEL || 'Xenova/all-MiniLM-L6-v2';
20
+ export const EMBEDDING_DIM = 384;
21
+ // all-MiniLM-L6-v2 truncates at 256 word-piece tokens (~1000 chars).
22
+ // We chunk at 1800 chars; tail content beyond ~256 tokens is truncated at embed time.
23
+ // This is acceptable: headings and opening sentences carry the most semantic signal.
24
+ export const MAX_CHUNK_CHARS = 1800;
25
+ export const POLL_INTERVAL_MS = parseInt(process.env.KNOWLEDGE_POLL_MS || '300000', 10);
26
+ export const SNIPPET_LENGTH = 250;
27
+
28
+ const INCLUDE_DIRS_DEFAULT = [
29
+ 'memory/',
30
+ 'projects/arcane/lore/',
31
+ 'projects/arcane/knowledge_base/',
32
+ 'projects/arcane/notes/',
33
+ 'projects/arcane/geoblar/',
34
+ '.learnings/',
35
+ 'SOUL.md',
36
+ 'PRINCIPLES.md',
37
+ 'AGENTS.md',
38
+ 'MEMORY.md',
39
+ 'ARCHITECTURE.md',
40
+ ];
41
+ export const INCLUDE_DIRS = process.env.KNOWLEDGE_INCLUDE
42
+ ? process.env.KNOWLEDGE_INCLUDE.split(',').map(s => s.trim()).filter(Boolean)
43
+ : INCLUDE_DIRS_DEFAULT;
44
+
45
+ export const EXCLUDE_PATTERNS = [
46
+ /node_modules/,
47
+ /\.sol$/,
48
+ /\.pdf$/,
49
+ /\.json$/,
50
+ /\.lock$/,
51
+ /\.png$/,
52
+ /\.jpg$/,
53
+ /\.gif$/,
54
+ /\.svg$/,
55
+ /active-tasks\.md$/,
56
+ /task-backlog\.md$/,
57
+ /\.bak$/,
58
+ /\.smart-env/,
59
+ /\.obsidian/,
60
+ /cache\//,
61
+ /artifacts\//,
62
+ ];
63
+
64
+ // ─── Markdown Scanner ────────────────────────────────────────────────────────
65
+
66
+ export function scanMarkdownFiles(root, includePaths, excludePatterns) {
67
+ const files = [];
68
+ for (const inc of includePaths) {
69
+ const fullPath = join(root, inc);
70
+ if (!existsSync(fullPath)) continue;
71
+ const stat = statSync(fullPath);
72
+ if (stat.isFile() && fullPath.endsWith('.md')) {
73
+ const rel = relative(root, fullPath);
74
+ if (!excludePatterns.some(p => p.test(rel))) {
75
+ files.push({ path: fullPath, rel });
76
+ }
77
+ } else if (stat.isDirectory()) {
78
+ walkDir(fullPath, root, excludePatterns, files);
79
+ }
80
+ }
81
+ return files;
82
+ }
83
+
84
+ function walkDir(dir, root, excludePatterns, results) {
85
+ let entries;
86
+ try {
87
+ entries = readdirSync(dir, { withFileTypes: true });
88
+ } catch {
89
+ return;
90
+ }
91
+ for (const entry of entries) {
92
+ const full = join(dir, entry.name);
93
+ const rel = relative(root, full);
94
+ if (excludePatterns.some(p => p.test(rel))) continue;
95
+ if (entry.isDirectory()) {
96
+ walkDir(full, root, excludePatterns, results);
97
+ } else if (entry.isFile() && extname(entry.name) === '.md') {
98
+ results.push({ path: full, rel });
99
+ }
100
+ }
101
+ }
102
+
103
+ // ─── Heading-Based Chunker ───────────────────────────────────────────────────
104
+
105
+ export function chunkMarkdown(text, filePath) {
106
+ const lines = text.split('\n');
107
+ const chunks = [];
108
+ let currentSection = '';
109
+ let currentBody = [];
110
+ let currentLevel = 0;
111
+
112
+ function flushChunk() {
113
+ const body = currentBody.join('\n').trim();
114
+ if (!body) return;
115
+ const fullText = currentSection ? `${currentSection}\n${body}` : body;
116
+ if (fullText.length <= MAX_CHUNK_CHARS) {
117
+ chunks.push({ section: currentSection || '(top)', text: fullText });
118
+ } else {
119
+ splitOversized(currentSection, body, currentLevel, chunks);
120
+ }
121
+ }
122
+
123
+ for (const line of lines) {
124
+ const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
125
+ if (headerMatch) {
126
+ flushChunk();
127
+ currentLevel = headerMatch[1].length;
128
+ currentSection = line.trim();
129
+ currentBody = [];
130
+ } else {
131
+ currentBody.push(line);
132
+ }
133
+ }
134
+ flushChunk();
135
+
136
+ return chunks.length > 0 ? chunks : [{ section: '(document)', text: text.slice(0, MAX_CHUNK_CHARS) }];
137
+ }
138
+
139
+ export function splitOversized(section, body, level, chunks) {
140
+ const nextLevel = level + 1;
141
+ if (nextLevel <= 6) {
142
+ const subPattern = new RegExp(`^#{${nextLevel}}\\s+.+$`, 'm');
143
+ if (subPattern.test(body)) {
144
+ const subLines = body.split('\n');
145
+ let subSection = section;
146
+ let subBody = [];
147
+
148
+ for (const line of subLines) {
149
+ const subMatch = line.match(new RegExp(`^(#{${nextLevel}})\\s+(.+)$`));
150
+ if (subMatch) {
151
+ const subText = subBody.join('\n').trim();
152
+ if (subText) {
153
+ const full = subSection ? `${subSection}\n${subText}` : subText;
154
+ if (full.length <= MAX_CHUNK_CHARS) {
155
+ chunks.push({ section: subSection || '(top)', text: full });
156
+ } else {
157
+ splitOversized(subSection, subText, nextLevel, chunks);
158
+ }
159
+ }
160
+ subSection = line.trim();
161
+ subBody = [];
162
+ } else {
163
+ subBody.push(line);
164
+ }
165
+ }
166
+ const remaining = subBody.join('\n').trim();
167
+ if (remaining) {
168
+ const full = subSection ? `${subSection}\n${remaining}` : remaining;
169
+ if (full.length <= MAX_CHUNK_CHARS) {
170
+ chunks.push({ section: subSection || '(top)', text: full });
171
+ } else {
172
+ splitByParagraphs(subSection, remaining, chunks);
173
+ }
174
+ }
175
+ return;
176
+ }
177
+ }
178
+ splitByParagraphs(section, body, chunks);
179
+ }
180
+
181
+ export function splitByParagraphs(section, text, chunks) {
182
+ const paragraphs = text.split(/\n\n+/);
183
+ let buffer = '';
184
+ let idx = 0;
185
+ for (const para of paragraphs) {
186
+ if (buffer.length + para.length + 2 > MAX_CHUNK_CHARS && buffer) {
187
+ chunks.push({ section: `${section} [part ${++idx}]`, text: buffer.trim() });
188
+ buffer = '';
189
+ }
190
+ buffer += (buffer ? '\n\n' : '') + para;
191
+ }
192
+ if (buffer.trim()) {
193
+ chunks.push({
194
+ section: idx > 0 ? `${section} [part ${++idx}]` : (section || '(document)'),
195
+ text: buffer.trim(),
196
+ });
197
+ }
198
+ }
199
+
200
+ // ─── SHA-256 Content Hashing ─────────────────────────────────────────────────
201
+
202
+ export function hashContent(text) {
203
+ return createHash('sha256').update(text).digest('hex');
204
+ }
205
+
206
+ // ─── SQLite + sqlite-vec Storage ─────────────────────────────────────────────
207
+
208
+ export function initDatabase(dbPath) {
209
+ const db = new Database(dbPath);
210
+ sqliteVec.load(db);
211
+
212
+ db.pragma('journal_mode = WAL');
213
+ db.pragma('foreign_keys = ON');
214
+
215
+ db.exec(`
216
+ CREATE TABLE IF NOT EXISTS documents (
217
+ path TEXT PRIMARY KEY,
218
+ content_hash TEXT NOT NULL,
219
+ last_indexed INTEGER NOT NULL,
220
+ chunk_count INTEGER NOT NULL DEFAULT 0
221
+ );
222
+
223
+ CREATE TABLE IF NOT EXISTS chunks (
224
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
225
+ doc_path TEXT NOT NULL,
226
+ section TEXT NOT NULL,
227
+ text TEXT NOT NULL,
228
+ snippet TEXT NOT NULL,
229
+ FOREIGN KEY (doc_path) REFERENCES documents(path) ON DELETE CASCADE
230
+ );
231
+
232
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunk_vectors USING vec0(
233
+ embedding float[${EMBEDDING_DIM}]
234
+ );
235
+
236
+ CREATE INDEX IF NOT EXISTS idx_chunks_doc_path ON chunks(doc_path);
237
+
238
+ CREATE TABLE IF NOT EXISTS meta (
239
+ key TEXT PRIMARY KEY,
240
+ value TEXT
241
+ );
242
+ `);
243
+
244
+ return db;
245
+ }
246
+
247
+ // ─── Embedding Pipeline ──────────────────────────────────────────────────────
248
+
249
+ let embedder = null;
250
+
251
+ export async function getEmbedder() {
252
+ if (!embedder) {
253
+ try {
254
+ embedder = await pipeline('feature-extraction', MODEL_NAME, {
255
+ dtype: 'fp32',
256
+ });
257
+ } catch (err) {
258
+ throw new Error(
259
+ `Embedding model not cached. Run with internet access to download ${MODEL_NAME} (~17MB, one-time).\n` +
260
+ `Original error: ${err.message}`
261
+ );
262
+ }
263
+ }
264
+ return embedder;
265
+ }
266
+
267
+ export async function embed(text) {
268
+ const model = await getEmbedder();
269
+ const result = await model(text, { pooling: 'mean', normalize: true, truncation: true, max_length: 256 });
270
+ return new Float32Array(result.data);
271
+ }
272
+
273
+ export async function embedBatch(texts) {
274
+ const results = [];
275
+ for (const text of texts) {
276
+ results.push(await embed(text));
277
+ }
278
+ return results;
279
+ }
280
+
281
+ // ─── Indexer ─────────────────────────────────────────────────────────────────
282
+
283
+ export async function indexWorkspace(db, root, opts = {}) {
284
+ const force = opts.force || false;
285
+ const files = scanMarkdownFiles(root, INCLUDE_DIRS, EXCLUDE_PATTERNS);
286
+
287
+ const getDoc = db.prepare('SELECT content_hash FROM documents WHERE path = ?');
288
+ const deleteChunks = db.prepare('DELETE FROM chunks WHERE doc_path = ?');
289
+ const deleteDoc = db.prepare('DELETE FROM documents WHERE path = ?');
290
+ const insertDoc = db.prepare(
291
+ 'INSERT OR REPLACE INTO documents (path, content_hash, last_indexed, chunk_count) VALUES (?, ?, ?, ?)'
292
+ );
293
+ const insertChunk = db.prepare(
294
+ 'INSERT INTO chunks (doc_path, section, text, snippet) VALUES (?, ?, ?, ?)'
295
+ );
296
+ const deleteVec = db.prepare('DELETE FROM chunk_vectors WHERE rowid = ?');
297
+ const getChunkIds = db.prepare('SELECT id FROM chunks WHERE doc_path = ?');
298
+
299
+ const existingPaths = new Set(
300
+ db.prepare('SELECT path FROM documents').all().map(r => r.path)
301
+ );
302
+ const currentPaths = new Set(files.map(f => f.rel));
303
+
304
+ let indexed = 0;
305
+ let skipped = 0;
306
+ let deleted = 0;
307
+
308
+ // Delete removed files
309
+ for (const existingPath of existingPaths) {
310
+ if (!currentPaths.has(existingPath)) {
311
+ const chunkIds = getChunkIds.all(existingPath);
312
+ for (const { id } of chunkIds) {
313
+ deleteVec.run(id);
314
+ }
315
+ deleteChunks.run(existingPath);
316
+ deleteDoc.run(existingPath);
317
+ deleted++;
318
+ }
319
+ }
320
+
321
+ // Index new/changed files
322
+ for (const file of files) {
323
+ let content;
324
+ try {
325
+ content = readFileSync(file.path, 'utf-8');
326
+ } catch {
327
+ continue;
328
+ }
329
+
330
+ const hash = hashContent(content);
331
+ const existing = getDoc.get(file.rel);
332
+
333
+ if (!force && existing && existing.content_hash === hash) {
334
+ skipped++;
335
+ continue;
336
+ }
337
+
338
+ // Remove old chunks + vectors for this doc
339
+ if (existing) {
340
+ const oldChunkIds = getChunkIds.all(file.rel);
341
+ for (const { id } of oldChunkIds) {
342
+ deleteVec.run(id);
343
+ }
344
+ deleteChunks.run(file.rel);
345
+ }
346
+
347
+ // Chunk and embed
348
+ const chunks = chunkMarkdown(content, file.rel);
349
+ const texts = chunks.map(c => c.text);
350
+ const vectors = await embedBatch(texts);
351
+
352
+ // sqlite-vec quirk: rowid must be literal, not bound param with better-sqlite3.
353
+ const doInsert = db.transaction(() => {
354
+ insertDoc.run(file.rel, hash, Date.now(), chunks.length);
355
+ for (let i = 0; i < chunks.length; i++) {
356
+ const snippet = chunks[i].text.slice(0, SNIPPET_LENGTH).replace(/\n/g, ' ');
357
+ const info = insertChunk.run(file.rel, chunks[i].section, chunks[i].text, snippet);
358
+ const vecBuf = Buffer.from(vectors[i].buffer);
359
+ db.prepare(`INSERT INTO chunk_vectors VALUES (${info.lastInsertRowid}, ?)`).run(vecBuf);
360
+ }
361
+ });
362
+ doInsert();
363
+ indexed++;
364
+ }
365
+
366
+ db.prepare('INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)').run(
367
+ 'last_index_time', Date.now().toString()
368
+ );
369
+
370
+ return { indexed, skipped, deleted, total: files.length };
371
+ }
372
+
373
+ // ─── Search Functions ────────────────────────────────────────────────────────
374
+
375
+ export async function semanticSearch(db, query, limit = 10) {
376
+ const count = db.prepare('SELECT COUNT(*) as c FROM chunk_vectors').get().c;
377
+ if (count === 0) return [];
378
+
379
+ const queryVec = await embed(query);
380
+ const vecBuf = Buffer.from(queryVec.buffer);
381
+
382
+ const results = db.prepare(`
383
+ SELECT
384
+ cv.rowid,
385
+ cv.distance,
386
+ c.doc_path,
387
+ c.section,
388
+ c.snippet
389
+ FROM chunk_vectors cv
390
+ JOIN chunks c ON c.id = cv.rowid
391
+ WHERE embedding MATCH ? AND k = ${limit}
392
+ ORDER BY distance
393
+ `).all(vecBuf);
394
+
395
+ return results.map(r => ({
396
+ path: r.doc_path,
397
+ section: r.section,
398
+ score: parseFloat((1 - r.distance * r.distance / 2).toFixed(4)),
399
+ snippet: r.snippet,
400
+ }));
401
+ }
402
+
403
+ export async function findRelated(db, docPath, limit = 10) {
404
+ const chunkIds = db.prepare('SELECT id FROM chunks WHERE doc_path = ?').all(docPath);
405
+
406
+ if (chunkIds.length === 0) {
407
+ return { error: `Document not found in index: ${docPath}` };
408
+ }
409
+
410
+ const vectors = [];
411
+ for (const { id } of chunkIds) {
412
+ const row = db.prepare('SELECT embedding FROM chunk_vectors WHERE rowid = ?').get(id);
413
+ if (row) {
414
+ vectors.push(new Float32Array(row.embedding.buffer, row.embedding.byteOffset, EMBEDDING_DIM));
415
+ }
416
+ }
417
+
418
+ if (vectors.length === 0) {
419
+ return { error: `No embeddings found for: ${docPath}` };
420
+ }
421
+
422
+ // Average the vectors
423
+ const avg = new Float32Array(EMBEDDING_DIM);
424
+ for (const vec of vectors) {
425
+ for (let i = 0; i < EMBEDDING_DIM; i++) avg[i] += vec[i];
426
+ }
427
+ for (let i = 0; i < EMBEDDING_DIM; i++) avg[i] /= vectors.length;
428
+
429
+ // Normalize
430
+ let norm = 0;
431
+ for (let i = 0; i < EMBEDDING_DIM; i++) norm += avg[i] * avg[i];
432
+ norm = Math.sqrt(norm);
433
+ if (norm > 0) {
434
+ for (let i = 0; i < EMBEDDING_DIM; i++) avg[i] /= norm;
435
+ }
436
+
437
+ const vecBuf = Buffer.from(avg.buffer);
438
+
439
+ const rawResults = db.prepare(`
440
+ SELECT
441
+ cv.rowid,
442
+ cv.distance,
443
+ c.doc_path,
444
+ c.section,
445
+ c.snippet
446
+ FROM chunk_vectors cv
447
+ JOIN chunks c ON c.id = cv.rowid
448
+ WHERE embedding MATCH ? AND k = ${limit * 3}
449
+ ORDER BY distance
450
+ `).all(vecBuf);
451
+
452
+ const results = rawResults.filter(r => r.doc_path !== docPath);
453
+
454
+ // Deduplicate by document (keep best score per doc)
455
+ const seen = new Map();
456
+ for (const r of results) {
457
+ if (!seen.has(r.doc_path) || r.distance < seen.get(r.doc_path).distance) {
458
+ seen.set(r.doc_path, r);
459
+ }
460
+ }
461
+
462
+ return [...seen.values()].slice(0, limit).map(r => ({
463
+ path: r.doc_path,
464
+ section: r.section,
465
+ score: parseFloat((1 - r.distance * r.distance / 2).toFixed(4)),
466
+ snippet: r.snippet,
467
+ }));
468
+ }
469
+
470
+ export function getStats(db) {
471
+ const docs = db.prepare('SELECT COUNT(*) as count FROM documents').get();
472
+ const chunks = db.prepare('SELECT COUNT(*) as count FROM chunks').get();
473
+ const lastIndex = db.prepare('SELECT value FROM meta WHERE key = ?').get('last_index_time');
474
+
475
+ return {
476
+ documents: docs.count,
477
+ chunks: chunks.count,
478
+ embedding_dim: EMBEDDING_DIM,
479
+ model: MODEL_NAME,
480
+ workspace: WORKSPACE,
481
+ db_path: DB_PATH,
482
+ last_indexed: lastIndex ? new Date(parseInt(lastIndex.value)).toISOString() : null,
483
+ };
484
+ }
485
+
486
+ // ─── Background Polling ──────────────────────────────────────────────────────
487
+
488
+ export function startPolling(db, root) {
489
+ if (POLL_INTERVAL_MS <= 0) return;
490
+ async function poll() {
491
+ try {
492
+ await indexWorkspace(db, root);
493
+ } catch (err) {
494
+ process.stderr.write(`[mcp-knowledge] poll error: ${err.message}\n`);
495
+ }
496
+ setTimeout(poll, POLL_INTERVAL_MS);
497
+ }
498
+ setTimeout(poll, POLL_INTERVAL_MS);
499
+ }
500
+
501
+ // ─── Engine Factory ──────────────────────────────────────────────────────────
502
+
503
+ export async function createKnowledgeEngine(opts = {}) {
504
+ const workspace = opts.workspace || WORKSPACE;
505
+ const dbPath = opts.dbPath || DB_PATH;
506
+
507
+ process.stderr.write(`[mcp-knowledge] workspace: ${workspace}\n`);
508
+ process.stderr.write(`[mcp-knowledge] database: ${dbPath}\n`);
509
+ process.stderr.write(`[mcp-knowledge] model: ${MODEL_NAME}\n`);
510
+
511
+ const db = initDatabase(dbPath);
512
+
513
+ process.stderr.write('[mcp-knowledge] initial indexing...\n');
514
+ const result = await indexWorkspace(db, workspace);
515
+ process.stderr.write(
516
+ `[mcp-knowledge] indexed: ${result.indexed}, skipped: ${result.skipped}, deleted: ${result.deleted}, total: ${result.total}\n`
517
+ );
518
+
519
+ startPolling(db, workspace);
520
+
521
+ return {
522
+ db,
523
+ search: (query, limit) => semanticSearch(db, query, limit),
524
+ related: (docPath, limit) => findRelated(db, docPath, limit),
525
+ reindex: (force) => indexWorkspace(db, workspace, { force }),
526
+ stats: () => getStats(db),
527
+ };
528
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "@openclaw/mcp-knowledge",
3
+ "version": "0.1.0",
4
+ "description": "LLM-agnostic MCP server for semantic search over markdown knowledge bases",
5
+ "type": "module",
6
+ "main": "server.mjs",
7
+ "bin": {
8
+ "mcp-knowledge": "./server.mjs"
9
+ },
10
+ "scripts": {
11
+ "start": "node server.mjs",
12
+ "start:http": "KNOWLEDGE_PORT=3100 node server.mjs",
13
+ "test": "node test.mjs"
14
+ },
15
+ "dependencies": {
16
+ "@huggingface/transformers": "^3.0.0",
17
+ "@modelcontextprotocol/sdk": "^1.0.0",
18
+ "better-sqlite3": "^11.0.0",
19
+ "sqlite-vec": "^0.1.0"
20
+ },
21
+ "engines": {
22
+ "node": ">=20.0.0"
23
+ },
24
+ "license": "MIT"
25
+ }