morpheus-cli 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +63 -43
  2. package/dist/channels/discord.js +3 -6
  3. package/dist/channels/telegram.js +3 -6
  4. package/dist/cli/commands/restart.js +15 -0
  5. package/dist/cli/commands/start.js +16 -0
  6. package/dist/config/manager.js +61 -0
  7. package/dist/config/paths.js +1 -0
  8. package/dist/config/schemas.js +11 -3
  9. package/dist/http/api.js +3 -0
  10. package/dist/http/routers/link.js +239 -0
  11. package/dist/http/routers/skills.js +1 -8
  12. package/dist/runtime/apoc.js +1 -1
  13. package/dist/runtime/audit/repository.js +1 -1
  14. package/dist/runtime/link-chunker.js +214 -0
  15. package/dist/runtime/link-repository.js +301 -0
  16. package/dist/runtime/link-search.js +298 -0
  17. package/dist/runtime/link-worker.js +284 -0
  18. package/dist/runtime/link.js +295 -0
  19. package/dist/runtime/memory/sati/service.js +1 -1
  20. package/dist/runtime/neo.js +1 -1
  21. package/dist/runtime/oracle.js +81 -44
  22. package/dist/runtime/scaffold.js +4 -17
  23. package/dist/runtime/skills/__tests__/loader.test.js +7 -10
  24. package/dist/runtime/skills/__tests__/registry.test.js +2 -18
  25. package/dist/runtime/skills/__tests__/tool.test.js +55 -224
  26. package/dist/runtime/skills/index.js +1 -2
  27. package/dist/runtime/skills/loader.js +0 -2
  28. package/dist/runtime/skills/registry.js +8 -20
  29. package/dist/runtime/skills/schema.js +0 -4
  30. package/dist/runtime/skills/tool.js +42 -209
  31. package/dist/runtime/smiths/delegator.js +1 -1
  32. package/dist/runtime/smiths/registry.js +1 -1
  33. package/dist/runtime/tasks/worker.js +12 -44
  34. package/dist/runtime/trinity.js +1 -1
  35. package/dist/types/config.js +14 -0
  36. package/dist/ui/assets/AuditDashboard-93LCGHG1.js +1 -0
  37. package/dist/ui/assets/{Chat-BNtutgja.js → Chat-CK5sNcQ1.js} +8 -8
  38. package/dist/ui/assets/{Chronos-3C8RPZcl.js → Chronos-m2h--GEe.js} +1 -1
  39. package/dist/ui/assets/{ConfirmationModal-ZQPBeJ2Z.js → ConfirmationModal-Dd5pUJme.js} +1 -1
  40. package/dist/ui/assets/{Dashboard-CqkHzr2F.js → Dashboard-ODwl7d-a.js} +1 -1
  41. package/dist/ui/assets/{DeleteConfirmationModal-CioxFWn_.js → DeleteConfirmationModal-CCcojDmr.js} +1 -1
  42. package/dist/ui/assets/Documents-dWnSoxFO.js +7 -0
  43. package/dist/ui/assets/{Logs-DBVanS0O.js → Logs-Dc9Z2LBj.js} +1 -1
  44. package/dist/ui/assets/{MCPManager-vXfL3P2U.js → MCPManager-CMkb8vMn.js} +1 -1
  45. package/dist/ui/assets/{ModelPricing-DyfdunLT.js → ModelPricing-DtHPPbEQ.js} +1 -1
  46. package/dist/ui/assets/{Notifications-VL-vep6d.js → Notifications-BPvo-DWP.js} +1 -1
  47. package/dist/ui/assets/{Pagination-oTGieBLM.js → Pagination-BHZKk42X.js} +1 -1
  48. package/dist/ui/assets/{SatiMemories-jaadkW0U.js → SatiMemories-BUPu1Lxr.js} +1 -1
  49. package/dist/ui/assets/SessionAudit-CFKF4DA8.js +9 -0
  50. package/dist/ui/assets/Settings-C4JrXfsR.js +47 -0
  51. package/dist/ui/assets/{Skills-DE3zziXL.js → Skills-BUlvJgJ4.js} +1 -1
  52. package/dist/ui/assets/{Smiths-pmogN1mU.js → Smiths-CDtJdY0I.js} +1 -1
  53. package/dist/ui/assets/{Tasks-Bs8s34Jc.js → Tasks-DK_cOsNK.js} +1 -1
  54. package/dist/ui/assets/{TrinityDatabases-D7uihcdp.js → TrinityDatabases-X07by-19.js} +1 -1
  55. package/dist/ui/assets/{UsageStats-B9gePLZ0.js → UsageStats-dYcgckLq.js} +1 -1
  56. package/dist/ui/assets/{WebhookManager-B2L3rCLM.js → WebhookManager-DDw5eX2R.js} +1 -1
  57. package/dist/ui/assets/{audit-Cggeu9mM.js → audit-DZ5WLUEm.js} +1 -1
  58. package/dist/ui/assets/{chronos-D3-sWhfU.js → chronos-B_HI4mlq.js} +1 -1
  59. package/dist/ui/assets/{config-CBqRUPgn.js → config-B-YxlVrc.js} +1 -1
  60. package/dist/ui/assets/index-DVjwJ8jT.css +1 -0
  61. package/dist/ui/assets/{index-zKplfrXZ.js → index-DfJwcKqG.js} +5 -5
  62. package/dist/ui/assets/{mcp-uL1R9hyA.js → mcp-k-_pwbqA.js} +1 -1
  63. package/dist/ui/assets/{skills-jmw8yTJs.js → skills-xMXangks.js} +1 -1
  64. package/dist/ui/assets/{stats-HOms6GnM.js → stats-C4QZIv5O.js} +1 -1
  65. package/dist/ui/assets/{vendor-icons-DMd9RGvJ.js → vendor-icons-NHF9HNeN.js} +1 -1
  66. package/dist/ui/index.html +3 -3
  67. package/dist/ui/sw.js +1 -1
  68. package/package.json +3 -1
  69. package/dist/runtime/__tests__/keymaker.test.js +0 -148
  70. package/dist/runtime/keymaker.js +0 -157
  71. package/dist/ui/assets/AuditDashboard-DliJ1CX0.js +0 -1
  72. package/dist/ui/assets/SessionAudit-BsXrWlwz.js +0 -9
  73. package/dist/ui/assets/Settings-B4eezRcg.js +0 -47
  74. package/dist/ui/assets/index-D4fzIKy1.css +0 -1
@@ -0,0 +1,301 @@
1
+ import Database from 'better-sqlite3';
2
+ import fs from 'fs-extra';
3
+ import path from 'path';
4
+ import { homedir } from 'os';
5
+ import { randomUUID } from 'crypto';
6
+ import loadVecExtension from './memory/sqlite-vec.js';
7
+ import { DisplayManager } from './display.js';
8
+ // ─── Repository ──────────────────────────────────────────────────────────────
9
+ const EMBEDDING_DIM = 384;
10
+ export class LinkRepository {
11
+ static instance = null;
12
+ db = null;
13
+ dbPath;
14
+ display = DisplayManager.getInstance();
15
+ constructor(dbPath) {
16
+ this.dbPath = dbPath || path.join(homedir(), '.morpheus', 'memory', 'link.db');
17
+ }
18
+ static getInstance(dbPath) {
19
+ if (!LinkRepository.instance) {
20
+ LinkRepository.instance = new LinkRepository(dbPath);
21
+ }
22
+ return LinkRepository.instance;
23
+ }
24
+ static resetInstance() {
25
+ if (LinkRepository.instance?.db) {
26
+ LinkRepository.instance.db.close();
27
+ }
28
+ LinkRepository.instance = null;
29
+ }
30
+ initialize() {
31
+ fs.ensureDirSync(path.dirname(this.dbPath));
32
+ this.db = new Database(this.dbPath, { timeout: 5000 });
33
+ this.db.pragma('journal_mode = WAL');
34
+ this.db.pragma('foreign_keys = ON');
35
+ loadVecExtension(this.db);
36
+ this.createSchema();
37
+ }
38
+ createSchema() {
39
+ if (!this.db)
40
+ throw new Error('DB not initialized');
41
+ // Documents table
42
+ this.db.exec(`
43
+ CREATE TABLE IF NOT EXISTS documents (
44
+ id TEXT PRIMARY KEY,
45
+ filename TEXT NOT NULL,
46
+ file_path TEXT NOT NULL UNIQUE,
47
+ file_hash TEXT NOT NULL,
48
+ file_size INTEGER NOT NULL,
49
+ status TEXT NOT NULL DEFAULT 'pending',
50
+ error_message TEXT,
51
+ chunk_count INTEGER DEFAULT 0,
52
+ created_at TEXT NOT NULL,
53
+ updated_at TEXT NOT NULL
54
+ );
55
+ `);
56
+ // Chunks table
57
+ this.db.exec(`
58
+ CREATE TABLE IF NOT EXISTS chunks (
59
+ id TEXT PRIMARY KEY,
60
+ document_id TEXT NOT NULL,
61
+ position INTEGER NOT NULL,
62
+ content TEXT NOT NULL,
63
+ char_start INTEGER NOT NULL,
64
+ char_end INTEGER NOT NULL,
65
+ created_at TEXT NOT NULL,
66
+ UNIQUE(document_id, position),
67
+ FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
68
+ );
69
+ `);
70
+ // Embeddings table (sqlite-vec)
71
+ this.db.exec(`
72
+ CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0(
73
+ chunk_id TEXT PRIMARY KEY,
74
+ embedding FLOAT[${EMBEDDING_DIM}]
75
+ );
76
+ `);
77
+ // FTS5 virtual table for BM25 search
78
+ this.db.exec(`
79
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
80
+ content,
81
+ content='chunks',
82
+ content_rowid='rowid',
83
+ tokenize='porter unicode61'
84
+ );
85
+ `);
86
+ // Indexes
87
+ this.db.exec(`
88
+ CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
89
+ CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(file_hash);
90
+ CREATE INDEX IF NOT EXISTS idx_chunks_document ON chunks(document_id);
91
+ `);
92
+ // Triggers to keep FTS in sync
93
+ this.db.exec(`
94
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
95
+ INSERT INTO chunks_fts(rowid, content) VALUES (NEW.rowid, NEW.content);
96
+ END;
97
+
98
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
99
+ INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.rowid, OLD.content);
100
+ END;
101
+
102
+ CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
103
+ INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.rowid, OLD.content);
104
+ INSERT INTO chunks_fts(rowid, content) VALUES (NEW.rowid, NEW.content);
105
+ END;
106
+ `);
107
+ }
108
+ // ─── Document CRUD ────────────────────────────────────────────────────────
109
+ createDocument(input) {
110
+ if (!this.db)
111
+ throw new Error('DB not initialized');
112
+ const now = new Date().toISOString();
113
+ const id = randomUUID();
114
+ this.db.prepare(`
115
+ INSERT INTO documents (id, filename, file_path, file_hash, file_size, status, error_message, chunk_count, created_at, updated_at)
116
+ VALUES (?, ?, ?, ?, ?, 'pending', NULL, 0, ?, ?)
117
+ `).run(id, input.filename, input.file_path, input.file_hash, input.file_size, now, now);
118
+ return this.getDocument(id);
119
+ }
120
+ getDocument(id) {
121
+ if (!this.db)
122
+ throw new Error('DB not initialized');
123
+ const row = this.db.prepare('SELECT * FROM documents WHERE id = ?').get(id);
124
+ return row ? this.deserializeDocument(row) : null;
125
+ }
126
+ getDocumentByPath(file_path) {
127
+ if (!this.db)
128
+ throw new Error('DB not initialized');
129
+ const row = this.db.prepare('SELECT * FROM documents WHERE file_path = ?').get(file_path);
130
+ return row ? this.deserializeDocument(row) : null;
131
+ }
132
+ getDocumentByHash(hash) {
133
+ if (!this.db)
134
+ throw new Error('DB not initialized');
135
+ const row = this.db.prepare('SELECT * FROM documents WHERE file_hash = ?').get(hash);
136
+ return row ? this.deserializeDocument(row) : null;
137
+ }
138
+ listDocuments(status) {
139
+ if (!this.db)
140
+ throw new Error('DB not initialized');
141
+ let query = 'SELECT * FROM documents';
142
+ const params = [];
143
+ if (status) {
144
+ query += ' WHERE status = ?';
145
+ params.push(status);
146
+ }
147
+ query += ' ORDER BY updated_at DESC';
148
+ const rows = this.db.prepare(query).all(...params);
149
+ return rows.map((r) => this.deserializeDocument(r));
150
+ }
151
+ updateDocumentStatus(id, status, error_message) {
152
+ if (!this.db)
153
+ throw new Error('DB not initialized');
154
+ const now = new Date().toISOString();
155
+ this.db.prepare(`
156
+ UPDATE documents SET status = ?, error_message = ?, updated_at = ? WHERE id = ?
157
+ `).run(status, error_message ?? null, now, id);
158
+ return this.getDocument(id);
159
+ }
160
+ updateDocumentChunkCount(id, chunk_count) {
161
+ if (!this.db)
162
+ throw new Error('DB not initialized');
163
+ const now = new Date().toISOString();
164
+ this.db.prepare(`
165
+ UPDATE documents SET chunk_count = ?, status = 'indexed', updated_at = ? WHERE id = ?
166
+ `).run(chunk_count, now, id);
167
+ }
168
+ deleteDocument(id) {
169
+ if (!this.db)
170
+ throw new Error('DB not initialized');
171
+ // CASCADE will delete chunks and embeddings automatically
172
+ const result = this.db.prepare('DELETE FROM documents WHERE id = ?').run(id);
173
+ return result.changes > 0;
174
+ }
175
+ deleteDocumentByPath(file_path) {
176
+ if (!this.db)
177
+ throw new Error('DB not initialized');
178
+ const result = this.db.prepare('DELETE FROM documents WHERE file_path = ?').run(file_path);
179
+ return result.changes > 0;
180
+ }
181
+ // ─── Chunk CRUD ───────────────────────────────────────────────────────────
182
+ createChunk(input) {
183
+ if (!this.db)
184
+ throw new Error('DB not initialized');
185
+ const now = new Date().toISOString();
186
+ const id = randomUUID();
187
+ this.db.prepare(`
188
+ INSERT INTO chunks (id, document_id, position, content, char_start, char_end, created_at)
189
+ VALUES (?, ?, ?, ?, ?, ?, ?)
190
+ `).run(id, input.document_id, input.position, input.content, input.char_start, input.char_end, now);
191
+ return this.getChunk(id);
192
+ }
193
+ createChunks(inputs) {
194
+ if (!this.db)
195
+ throw new Error('DB not initialized');
196
+ const now = new Date().toISOString();
197
+ const stmt = this.db.prepare(`
198
+ INSERT INTO chunks (id, document_id, position, content, char_start, char_end, created_at)
199
+ VALUES (?, ?, ?, ?, ?, ?, ?)
200
+ `);
201
+ const insertMany = this.db.transaction((items) => {
202
+ for (const input of items) {
203
+ stmt.run(randomUUID(), input.document_id, input.position, input.content, input.char_start, input.char_end, now);
204
+ }
205
+ });
206
+ insertMany(inputs);
207
+ }
208
+ getChunk(id) {
209
+ if (!this.db)
210
+ throw new Error('DB not initialized');
211
+ const row = this.db.prepare('SELECT * FROM chunks WHERE id = ?').get(id);
212
+ return row ? this.deserializeChunk(row) : null;
213
+ }
214
+ getChunksByDocument(document_id) {
215
+ if (!this.db)
216
+ throw new Error('DB not initialized');
217
+ const rows = this.db.prepare('SELECT * FROM chunks WHERE document_id = ? ORDER BY position').all(document_id);
218
+ return rows.map((r) => this.deserializeChunk(r));
219
+ }
220
+ deleteChunksByDocument(document_id) {
221
+ if (!this.db)
222
+ throw new Error('DB not initialized');
223
+ this.db.prepare('DELETE FROM chunks WHERE document_id = ?').run(document_id);
224
+ }
225
+ // ─── Embeddings ───────────────────────────────────────────────────────────
226
+ createEmbedding(chunk_id, embedding) {
227
+ if (!this.db)
228
+ throw new Error('DB not initialized');
229
+ const embeddingBlob = new Float32Array(embedding);
230
+ this.db.prepare(`
231
+ INSERT INTO embeddings (chunk_id, embedding) VALUES (?, ?)
232
+ `).run(chunk_id, embeddingBlob);
233
+ }
234
+ createEmbeddings(items) {
235
+ if (!this.db)
236
+ throw new Error('DB not initialized');
237
+ const stmt = this.db.prepare(`
238
+ INSERT INTO embeddings (chunk_id, embedding) VALUES (?, ?)
239
+ `);
240
+ const insertMany = this.db.transaction((items) => {
241
+ for (const item of items) {
242
+ const embeddingBlob = new Float32Array(item.embedding);
243
+ stmt.run(item.chunk_id, embeddingBlob);
244
+ }
245
+ });
246
+ insertMany(items);
247
+ }
248
+ deleteEmbeddingsByDocument(document_id) {
249
+ if (!this.db)
250
+ throw new Error('DB not initialized');
251
+ // Get all chunk IDs for this document
252
+ const chunks = this.db.prepare('SELECT id FROM chunks WHERE document_id = ?').all(document_id);
253
+ const chunkIds = chunks.map(c => c.id);
254
+ if (chunkIds.length === 0)
255
+ return;
256
+ const placeholders = chunkIds.map(() => '?').join(',');
257
+ this.db.prepare(`DELETE FROM embeddings WHERE chunk_id IN (${placeholders})`).run(...chunkIds);
258
+ }
259
+ // ─── Stats ─────────────────────────────────────────────────────────────────
260
+ getStats() {
261
+ if (!this.db)
262
+ throw new Error('DB not initialized');
263
+ const documents_total = this.db.prepare('SELECT COUNT(*) as cnt FROM documents').get().cnt;
264
+ const documents_indexed = this.db.prepare("SELECT COUNT(*) as cnt FROM documents WHERE status = 'indexed'").get().cnt;
265
+ const chunks_total = this.db.prepare('SELECT COUNT(*) as cnt FROM chunks').get().cnt;
266
+ return { documents_total, documents_indexed, chunks_total };
267
+ }
268
+ // ─── Cleanup ───────────────────────────────────────────────────────────────
269
+ close() {
270
+ if (this.db) {
271
+ this.db.close();
272
+ this.db = null;
273
+ }
274
+ }
275
+ // ─── Deserializers ─────────────────────────────────────────────────────────
276
+ deserializeDocument(row) {
277
+ return {
278
+ id: row.id,
279
+ filename: row.filename,
280
+ file_path: row.file_path,
281
+ file_hash: row.file_hash,
282
+ file_size: row.file_size,
283
+ status: row.status,
284
+ error_message: row.error_message ?? null,
285
+ chunk_count: row.chunk_count ?? 0,
286
+ created_at: row.created_at,
287
+ updated_at: row.updated_at,
288
+ };
289
+ }
290
+ deserializeChunk(row) {
291
+ return {
292
+ id: row.id,
293
+ document_id: row.document_id,
294
+ position: row.position,
295
+ content: row.content,
296
+ char_start: row.char_start,
297
+ char_end: row.char_end,
298
+ created_at: row.created_at,
299
+ };
300
+ }
301
+ }
@@ -0,0 +1,298 @@
1
+ import { LinkRepository } from './link-repository.js';
2
+ import { ConfigManager } from '../config/manager.js';
3
+ import { EmbeddingService } from './memory/embedding.service.js';
4
+ /**
5
+ * LinkSearch - Hybrid search for Link documents
6
+ *
7
+ * Combines vector similarity search (80% weight) with BM25 text search (20% weight)
8
+ * for optimal retrieval of relevant document chunks.
9
+ */
10
+ export class LinkSearch {
11
+ static instance = null;
12
+ repository;
13
+ db = null;
14
+ embeddingService = null;
15
+ constructor() {
16
+ this.repository = LinkRepository.getInstance();
17
+ }
18
+ static getInstance() {
19
+ if (!LinkSearch.instance) {
20
+ LinkSearch.instance = new LinkSearch();
21
+ }
22
+ return LinkSearch.instance;
23
+ }
24
+ static resetInstance() {
25
+ LinkSearch.instance = null;
26
+ }
27
+ async initialize() {
28
+ // Get the database from the repository
29
+ this.db = this.repository.db;
30
+ // Initialize embedding service
31
+ this.embeddingService = await EmbeddingService.getInstance();
32
+ }
33
+ /**
34
+ * Perform vector similarity search using sqlite-vec.
35
+ */
36
+ vectorSearch(queryEmbedding, limit) {
37
+ if (!this.db) {
38
+ throw new Error('LinkSearch not initialized');
39
+ }
40
+ const embeddingBlob = new Float32Array(queryEmbedding);
41
+ // Query vector similarity using cosine distance
42
+ const rows = this.db.prepare(`
43
+ SELECT
44
+ e.chunk_id,
45
+ c.document_id,
46
+ d.filename,
47
+ c.position,
48
+ c.content,
49
+ vec_distance_cosine(e.embedding, ?) as distance
50
+ FROM embeddings e
51
+ JOIN chunks c ON e.chunk_id = c.id
52
+ JOIN documents d ON c.document_id = d.id
53
+ WHERE d.status = 'indexed'
54
+ ORDER BY distance ASC
55
+ LIMIT ?
56
+ `).all(embeddingBlob, limit);
57
+ // Convert distance to similarity score (1 - distance for cosine)
58
+ return rows.map(row => ({
59
+ chunk_id: row.chunk_id,
60
+ document_id: row.document_id,
61
+ filename: row.filename,
62
+ position: row.position,
63
+ content: row.content,
64
+ score: 1 - row.distance,
65
+ }));
66
+ }
67
+ /**
68
+ * Perform BM25 full-text search using FTS5.
69
+ */
70
+ bm25Search(query, limit) {
71
+ if (!this.db) {
72
+ throw new Error('LinkSearch not initialized');
73
+ }
74
+ // Sanitize query: remove characters that could break FTS5 syntax (like ?, *, OR, etc)
75
+ // keeping only letters, numbers and spaces.
76
+ const escapedQuery = query
77
+ .replace(/[^\p{L}\p{N}\s]/gu, ' ')
78
+ .replace(/\s+/g, ' ')
79
+ .trim();
80
+ // Return empty results if query is empty after sanitization
81
+ if (!escapedQuery) {
82
+ return [];
83
+ }
84
+ const rows = this.db.prepare(`
85
+ SELECT
86
+ c.id as chunk_id,
87
+ c.document_id,
88
+ d.filename,
89
+ c.position,
90
+ c.content,
91
+ bm25(chunks_fts) as bm25_score
92
+ FROM chunks_fts fts
93
+ JOIN chunks c ON c.rowid = fts.rowid
94
+ JOIN documents d ON c.document_id = d.id
95
+ WHERE d.status = 'indexed'
96
+ AND chunks_fts MATCH ?
97
+ ORDER BY bm25_score ASC
98
+ LIMIT ?
99
+ `).all(escapedQuery, limit);
100
+ // BM25 returns negative scores for better matches, negate and normalize
101
+ return rows.map(row => ({
102
+ chunk_id: row.chunk_id,
103
+ document_id: row.document_id,
104
+ filename: row.filename,
105
+ position: row.position,
106
+ content: row.content,
107
+ score: -row.bm25_score, // Negate since BM25 returns negative for better matches
108
+ }));
109
+ }
110
+ /**
111
+ * Normalize scores to 0-1 range using min-max scaling.
112
+ */
113
+ normalizeScores(results) {
114
+ if (results.length === 0)
115
+ return results;
116
+ const scores = results.map(r => r.score);
117
+ const min = Math.min(...scores);
118
+ const max = Math.max(...scores);
119
+ const range = max - min;
120
+ if (range === 0) {
121
+ // All scores are the same
122
+ return results.map(r => ({ ...r, score: 1 }));
123
+ }
124
+ return results.map(r => ({
125
+ ...r,
126
+ score: (r.score - min) / range,
127
+ }));
128
+ }
129
+ /**
130
+ * Perform hybrid search combining vector and BM25 results.
131
+ */
132
+ hybridSearch(queryEmbedding, queryText, limit, threshold) {
133
+ const config = ConfigManager.getInstance().getLinkConfig();
134
+ const vectorWeight = config.vector_weight;
135
+ const bm25Weight = config.bm25_weight;
136
+ // Get results from both methods (fetch more for better merging)
137
+ const fetchLimit = limit * 3;
138
+ const vectorResults = this.vectorSearch(queryEmbedding, fetchLimit);
139
+ const bm25Results = this.bm25Search(queryText, fetchLimit);
140
+ // Normalize scores
141
+ const normalizedVector = this.normalizeScores(vectorResults);
142
+ const normalizedBM25 = this.normalizeScores(bm25Results);
143
+ // Create maps for quick lookup
144
+ const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
145
+ const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
146
+ // Combine all unique chunk IDs
147
+ const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
148
+ // Calculate combined scores
149
+ const combined = [];
150
+ for (const chunkId of allChunkIds) {
151
+ const vResult = vectorMap.get(chunkId);
152
+ const bResult = bm25Map.get(chunkId);
153
+ const vectorScore = vResult?.score ?? 0;
154
+ const bm25Score = bResult?.score ?? 0;
155
+ // Weighted combination
156
+ const combinedScore = (vectorScore * vectorWeight) + (bm25Score * bm25Weight);
157
+ // Get the data from whichever result has it
158
+ const data = vResult || bResult;
159
+ if (!data)
160
+ continue;
161
+ combined.push({
162
+ chunk_id: chunkId,
163
+ document_id: data.document_id,
164
+ filename: data.filename,
165
+ position: data.position,
166
+ content: data.content,
167
+ score: combinedScore,
168
+ vector_score: vectorScore,
169
+ bm25_score: bm25Score,
170
+ });
171
+ }
172
+ // Sort by combined score and filter by threshold
173
+ const filtered = combined
174
+ .filter(r => r.score >= threshold)
175
+ .sort((a, b) => b.score - a.score)
176
+ .slice(0, limit);
177
+ return filtered;
178
+ }
179
+ /**
180
+ * Search with a text query (generates embedding internally).
181
+ */
182
+ async search(queryText, limit, threshold) {
183
+ if (!this.embeddingService) {
184
+ throw new Error('LinkSearch not initialized');
185
+ }
186
+ const config = ConfigManager.getInstance().getLinkConfig();
187
+ const maxResults = limit ?? config.max_results;
188
+ const minThreshold = threshold ?? config.score_threshold;
189
+ // Generate embedding for the query
190
+ const queryEmbedding = await this.embeddingService.generate(queryText);
191
+ return this.hybridSearch(queryEmbedding, queryText, maxResults, minThreshold);
192
+ }
193
+ /**
194
+ * Search within a specific document by document_id.
195
+ * Runs vector + BM25 search filtered to chunks belonging to that document.
196
+ */
197
+ async searchInDocument(queryText, documentId, limit, threshold) {
198
+ if (!this.embeddingService || !this.db) {
199
+ throw new Error('LinkSearch not initialized');
200
+ }
201
+ const config = ConfigManager.getInstance().getLinkConfig();
202
+ const maxResults = limit ?? config.max_results;
203
+ const minThreshold = threshold ?? config.score_threshold;
204
+ const vectorWeight = config.vector_weight;
205
+ const bm25Weight = config.bm25_weight;
206
+ const fetchLimit = maxResults * 3;
207
+ // Generate embedding for the query
208
+ const queryEmbedding = await this.embeddingService.generate(queryText);
209
+ const embeddingBlob = new Float32Array(queryEmbedding);
210
+ // Vector search filtered by document
211
+ const vectorRows = this.db.prepare(`
212
+ SELECT
213
+ e.chunk_id,
214
+ c.document_id,
215
+ d.filename,
216
+ c.position,
217
+ c.content,
218
+ vec_distance_cosine(e.embedding, ?) as distance
219
+ FROM embeddings e
220
+ JOIN chunks c ON e.chunk_id = c.id
221
+ JOIN documents d ON c.document_id = d.id
222
+ WHERE d.status = 'indexed' AND c.document_id = ?
223
+ ORDER BY distance ASC
224
+ LIMIT ?
225
+ `).all(embeddingBlob, documentId, fetchLimit);
226
+ const vectorResults = vectorRows.map(row => ({
227
+ chunk_id: row.chunk_id,
228
+ document_id: row.document_id,
229
+ filename: row.filename,
230
+ position: row.position,
231
+ content: row.content,
232
+ score: 1 - row.distance,
233
+ }));
234
+ // BM25 search filtered by document
235
+ const escapedQuery = queryText
236
+ .replace(/[^\p{L}\p{N}\s]/gu, ' ')
237
+ .replace(/\s+/g, ' ')
238
+ .trim();
239
+ let bm25Results = [];
240
+ if (escapedQuery) {
241
+ const bm25Rows = this.db.prepare(`
242
+ SELECT
243
+ c.id as chunk_id,
244
+ c.document_id,
245
+ d.filename,
246
+ c.position,
247
+ c.content,
248
+ bm25(chunks_fts) as bm25_score
249
+ FROM chunks_fts fts
250
+ JOIN chunks c ON c.rowid = fts.rowid
251
+ JOIN documents d ON c.document_id = d.id
252
+ WHERE d.status = 'indexed'
253
+ AND c.document_id = ?
254
+ AND chunks_fts MATCH ?
255
+ ORDER BY bm25_score ASC
256
+ LIMIT ?
257
+ `).all(documentId, escapedQuery, fetchLimit);
258
+ bm25Results = bm25Rows.map(row => ({
259
+ chunk_id: row.chunk_id,
260
+ document_id: row.document_id,
261
+ filename: row.filename,
262
+ position: row.position,
263
+ content: row.content,
264
+ score: -row.bm25_score,
265
+ }));
266
+ }
267
+ // Normalize and combine
268
+ const normalizedVector = this.normalizeScores(vectorResults);
269
+ const normalizedBM25 = this.normalizeScores(bm25Results);
270
+ const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
271
+ const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
272
+ const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
273
+ const combined = [];
274
+ for (const chunkId of allChunkIds) {
275
+ const vResult = vectorMap.get(chunkId);
276
+ const bResult = bm25Map.get(chunkId);
277
+ const vectorScore = vResult?.score ?? 0;
278
+ const bm25Score = bResult?.score ?? 0;
279
+ const data = vResult || bResult;
280
+ if (!data)
281
+ continue;
282
+ combined.push({
283
+ chunk_id: chunkId,
284
+ document_id: data.document_id,
285
+ filename: data.filename,
286
+ position: data.position,
287
+ content: data.content,
288
+ score: (vectorScore * vectorWeight) + (bm25Score * bm25Weight),
289
+ vector_score: vectorScore,
290
+ bm25_score: bm25Score,
291
+ });
292
+ }
293
+ return combined
294
+ .filter(r => r.score >= minThreshold)
295
+ .sort((a, b) => b.score - a.score)
296
+ .slice(0, maxResults);
297
+ }
298
+ }