morpheus-cli 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -43
- package/dist/channels/discord.js +3 -6
- package/dist/channels/telegram.js +3 -6
- package/dist/cli/commands/restart.js +15 -0
- package/dist/cli/commands/start.js +16 -0
- package/dist/config/manager.js +61 -0
- package/dist/config/paths.js +1 -0
- package/dist/config/schemas.js +11 -3
- package/dist/http/api.js +3 -0
- package/dist/http/routers/link.js +239 -0
- package/dist/http/routers/skills.js +1 -8
- package/dist/runtime/apoc.js +1 -1
- package/dist/runtime/audit/repository.js +1 -1
- package/dist/runtime/link-chunker.js +214 -0
- package/dist/runtime/link-repository.js +301 -0
- package/dist/runtime/link-search.js +298 -0
- package/dist/runtime/link-worker.js +284 -0
- package/dist/runtime/link.js +295 -0
- package/dist/runtime/memory/sati/service.js +1 -1
- package/dist/runtime/neo.js +1 -1
- package/dist/runtime/oracle.js +81 -44
- package/dist/runtime/scaffold.js +4 -17
- package/dist/runtime/skills/__tests__/loader.test.js +7 -10
- package/dist/runtime/skills/__tests__/registry.test.js +2 -18
- package/dist/runtime/skills/__tests__/tool.test.js +55 -224
- package/dist/runtime/skills/index.js +1 -2
- package/dist/runtime/skills/loader.js +0 -2
- package/dist/runtime/skills/registry.js +8 -20
- package/dist/runtime/skills/schema.js +0 -4
- package/dist/runtime/skills/tool.js +42 -209
- package/dist/runtime/smiths/delegator.js +1 -1
- package/dist/runtime/smiths/registry.js +1 -1
- package/dist/runtime/tasks/worker.js +12 -44
- package/dist/runtime/trinity.js +1 -1
- package/dist/types/config.js +14 -0
- package/dist/ui/assets/AuditDashboard-93LCGHG1.js +1 -0
- package/dist/ui/assets/{Chat-BNtutgja.js → Chat-CK5sNcQ1.js} +8 -8
- package/dist/ui/assets/{Chronos-3C8RPZcl.js → Chronos-m2h--GEe.js} +1 -1
- package/dist/ui/assets/{ConfirmationModal-ZQPBeJ2Z.js → ConfirmationModal-Dd5pUJme.js} +1 -1
- package/dist/ui/assets/{Dashboard-CqkHzr2F.js → Dashboard-ODwl7d-a.js} +1 -1
- package/dist/ui/assets/{DeleteConfirmationModal-CioxFWn_.js → DeleteConfirmationModal-CCcojDmr.js} +1 -1
- package/dist/ui/assets/Documents-dWnSoxFO.js +7 -0
- package/dist/ui/assets/{Logs-DBVanS0O.js → Logs-Dc9Z2LBj.js} +1 -1
- package/dist/ui/assets/{MCPManager-vXfL3P2U.js → MCPManager-CMkb8vMn.js} +1 -1
- package/dist/ui/assets/{ModelPricing-DyfdunLT.js → ModelPricing-DtHPPbEQ.js} +1 -1
- package/dist/ui/assets/{Notifications-VL-vep6d.js → Notifications-BPvo-DWP.js} +1 -1
- package/dist/ui/assets/{Pagination-oTGieBLM.js → Pagination-BHZKk42X.js} +1 -1
- package/dist/ui/assets/{SatiMemories-jaadkW0U.js → SatiMemories-BUPu1Lxr.js} +1 -1
- package/dist/ui/assets/SessionAudit-CFKF4DA8.js +9 -0
- package/dist/ui/assets/Settings-C4JrXfsR.js +47 -0
- package/dist/ui/assets/{Skills-DE3zziXL.js → Skills-BUlvJgJ4.js} +1 -1
- package/dist/ui/assets/{Smiths-pmogN1mU.js → Smiths-CDtJdY0I.js} +1 -1
- package/dist/ui/assets/{Tasks-Bs8s34Jc.js → Tasks-DK_cOsNK.js} +1 -1
- package/dist/ui/assets/{TrinityDatabases-D7uihcdp.js → TrinityDatabases-X07by-19.js} +1 -1
- package/dist/ui/assets/{UsageStats-B9gePLZ0.js → UsageStats-dYcgckLq.js} +1 -1
- package/dist/ui/assets/{WebhookManager-B2L3rCLM.js → WebhookManager-DDw5eX2R.js} +1 -1
- package/dist/ui/assets/{audit-Cggeu9mM.js → audit-DZ5WLUEm.js} +1 -1
- package/dist/ui/assets/{chronos-D3-sWhfU.js → chronos-B_HI4mlq.js} +1 -1
- package/dist/ui/assets/{config-CBqRUPgn.js → config-B-YxlVrc.js} +1 -1
- package/dist/ui/assets/index-DVjwJ8jT.css +1 -0
- package/dist/ui/assets/{index-zKplfrXZ.js → index-DfJwcKqG.js} +5 -5
- package/dist/ui/assets/{mcp-uL1R9hyA.js → mcp-k-_pwbqA.js} +1 -1
- package/dist/ui/assets/{skills-jmw8yTJs.js → skills-xMXangks.js} +1 -1
- package/dist/ui/assets/{stats-HOms6GnM.js → stats-C4QZIv5O.js} +1 -1
- package/dist/ui/assets/{vendor-icons-DMd9RGvJ.js → vendor-icons-NHF9HNeN.js} +1 -1
- package/dist/ui/index.html +3 -3
- package/dist/ui/sw.js +1 -1
- package/package.json +3 -1
- package/dist/runtime/__tests__/keymaker.test.js +0 -148
- package/dist/runtime/keymaker.js +0 -157
- package/dist/ui/assets/AuditDashboard-DliJ1CX0.js +0 -1
- package/dist/ui/assets/SessionAudit-BsXrWlwz.js +0 -9
- package/dist/ui/assets/Settings-B4eezRcg.js +0 -47
- package/dist/ui/assets/index-D4fzIKy1.css +0 -1
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { homedir } from 'os';
|
|
5
|
+
import { randomUUID } from 'crypto';
|
|
6
|
+
import loadVecExtension from './memory/sqlite-vec.js';
|
|
7
|
+
import { DisplayManager } from './display.js';
|
|
8
|
+
// ─── Repository ──────────────────────────────────────────────────────────────
|
|
9
|
+
const EMBEDDING_DIM = 384;
|
|
10
|
+
export class LinkRepository {
|
|
11
|
+
static instance = null;
|
|
12
|
+
db = null;
|
|
13
|
+
dbPath;
|
|
14
|
+
display = DisplayManager.getInstance();
|
|
15
|
+
constructor(dbPath) {
|
|
16
|
+
this.dbPath = dbPath || path.join(homedir(), '.morpheus', 'memory', 'link.db');
|
|
17
|
+
}
|
|
18
|
+
static getInstance(dbPath) {
|
|
19
|
+
if (!LinkRepository.instance) {
|
|
20
|
+
LinkRepository.instance = new LinkRepository(dbPath);
|
|
21
|
+
}
|
|
22
|
+
return LinkRepository.instance;
|
|
23
|
+
}
|
|
24
|
+
static resetInstance() {
|
|
25
|
+
if (LinkRepository.instance?.db) {
|
|
26
|
+
LinkRepository.instance.db.close();
|
|
27
|
+
}
|
|
28
|
+
LinkRepository.instance = null;
|
|
29
|
+
}
|
|
30
|
+
initialize() {
|
|
31
|
+
fs.ensureDirSync(path.dirname(this.dbPath));
|
|
32
|
+
this.db = new Database(this.dbPath, { timeout: 5000 });
|
|
33
|
+
this.db.pragma('journal_mode = WAL');
|
|
34
|
+
this.db.pragma('foreign_keys = ON');
|
|
35
|
+
loadVecExtension(this.db);
|
|
36
|
+
this.createSchema();
|
|
37
|
+
}
|
|
38
|
+
createSchema() {
|
|
39
|
+
if (!this.db)
|
|
40
|
+
throw new Error('DB not initialized');
|
|
41
|
+
// Documents table
|
|
42
|
+
this.db.exec(`
|
|
43
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
44
|
+
id TEXT PRIMARY KEY,
|
|
45
|
+
filename TEXT NOT NULL,
|
|
46
|
+
file_path TEXT NOT NULL UNIQUE,
|
|
47
|
+
file_hash TEXT NOT NULL,
|
|
48
|
+
file_size INTEGER NOT NULL,
|
|
49
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
50
|
+
error_message TEXT,
|
|
51
|
+
chunk_count INTEGER DEFAULT 0,
|
|
52
|
+
created_at TEXT NOT NULL,
|
|
53
|
+
updated_at TEXT NOT NULL
|
|
54
|
+
);
|
|
55
|
+
`);
|
|
56
|
+
// Chunks table
|
|
57
|
+
this.db.exec(`
|
|
58
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
59
|
+
id TEXT PRIMARY KEY,
|
|
60
|
+
document_id TEXT NOT NULL,
|
|
61
|
+
position INTEGER NOT NULL,
|
|
62
|
+
content TEXT NOT NULL,
|
|
63
|
+
char_start INTEGER NOT NULL,
|
|
64
|
+
char_end INTEGER NOT NULL,
|
|
65
|
+
created_at TEXT NOT NULL,
|
|
66
|
+
UNIQUE(document_id, position),
|
|
67
|
+
FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
68
|
+
);
|
|
69
|
+
`);
|
|
70
|
+
// Embeddings table (sqlite-vec)
|
|
71
|
+
this.db.exec(`
|
|
72
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0(
|
|
73
|
+
chunk_id TEXT PRIMARY KEY,
|
|
74
|
+
embedding FLOAT[${EMBEDDING_DIM}]
|
|
75
|
+
);
|
|
76
|
+
`);
|
|
77
|
+
// FTS5 virtual table for BM25 search
|
|
78
|
+
this.db.exec(`
|
|
79
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
80
|
+
content,
|
|
81
|
+
content='chunks',
|
|
82
|
+
content_rowid='rowid',
|
|
83
|
+
tokenize='porter unicode61'
|
|
84
|
+
);
|
|
85
|
+
`);
|
|
86
|
+
// Indexes
|
|
87
|
+
this.db.exec(`
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
|
|
89
|
+
CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(file_hash);
|
|
90
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_document ON chunks(document_id);
|
|
91
|
+
`);
|
|
92
|
+
// Triggers to keep FTS in sync
|
|
93
|
+
this.db.exec(`
|
|
94
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
|
95
|
+
INSERT INTO chunks_fts(rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
96
|
+
END;
|
|
97
|
+
|
|
98
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
|
99
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.rowid, OLD.content);
|
|
100
|
+
END;
|
|
101
|
+
|
|
102
|
+
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
|
103
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.rowid, OLD.content);
|
|
104
|
+
INSERT INTO chunks_fts(rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
105
|
+
END;
|
|
106
|
+
`);
|
|
107
|
+
}
|
|
108
|
+
// ─── Document CRUD ────────────────────────────────────────────────────────
|
|
109
|
+
createDocument(input) {
|
|
110
|
+
if (!this.db)
|
|
111
|
+
throw new Error('DB not initialized');
|
|
112
|
+
const now = new Date().toISOString();
|
|
113
|
+
const id = randomUUID();
|
|
114
|
+
this.db.prepare(`
|
|
115
|
+
INSERT INTO documents (id, filename, file_path, file_hash, file_size, status, error_message, chunk_count, created_at, updated_at)
|
|
116
|
+
VALUES (?, ?, ?, ?, ?, 'pending', NULL, 0, ?, ?)
|
|
117
|
+
`).run(id, input.filename, input.file_path, input.file_hash, input.file_size, now, now);
|
|
118
|
+
return this.getDocument(id);
|
|
119
|
+
}
|
|
120
|
+
getDocument(id) {
|
|
121
|
+
if (!this.db)
|
|
122
|
+
throw new Error('DB not initialized');
|
|
123
|
+
const row = this.db.prepare('SELECT * FROM documents WHERE id = ?').get(id);
|
|
124
|
+
return row ? this.deserializeDocument(row) : null;
|
|
125
|
+
}
|
|
126
|
+
getDocumentByPath(file_path) {
|
|
127
|
+
if (!this.db)
|
|
128
|
+
throw new Error('DB not initialized');
|
|
129
|
+
const row = this.db.prepare('SELECT * FROM documents WHERE file_path = ?').get(file_path);
|
|
130
|
+
return row ? this.deserializeDocument(row) : null;
|
|
131
|
+
}
|
|
132
|
+
getDocumentByHash(hash) {
|
|
133
|
+
if (!this.db)
|
|
134
|
+
throw new Error('DB not initialized');
|
|
135
|
+
const row = this.db.prepare('SELECT * FROM documents WHERE file_hash = ?').get(hash);
|
|
136
|
+
return row ? this.deserializeDocument(row) : null;
|
|
137
|
+
}
|
|
138
|
+
listDocuments(status) {
|
|
139
|
+
if (!this.db)
|
|
140
|
+
throw new Error('DB not initialized');
|
|
141
|
+
let query = 'SELECT * FROM documents';
|
|
142
|
+
const params = [];
|
|
143
|
+
if (status) {
|
|
144
|
+
query += ' WHERE status = ?';
|
|
145
|
+
params.push(status);
|
|
146
|
+
}
|
|
147
|
+
query += ' ORDER BY updated_at DESC';
|
|
148
|
+
const rows = this.db.prepare(query).all(...params);
|
|
149
|
+
return rows.map((r) => this.deserializeDocument(r));
|
|
150
|
+
}
|
|
151
|
+
updateDocumentStatus(id, status, error_message) {
|
|
152
|
+
if (!this.db)
|
|
153
|
+
throw new Error('DB not initialized');
|
|
154
|
+
const now = new Date().toISOString();
|
|
155
|
+
this.db.prepare(`
|
|
156
|
+
UPDATE documents SET status = ?, error_message = ?, updated_at = ? WHERE id = ?
|
|
157
|
+
`).run(status, error_message ?? null, now, id);
|
|
158
|
+
return this.getDocument(id);
|
|
159
|
+
}
|
|
160
|
+
updateDocumentChunkCount(id, chunk_count) {
|
|
161
|
+
if (!this.db)
|
|
162
|
+
throw new Error('DB not initialized');
|
|
163
|
+
const now = new Date().toISOString();
|
|
164
|
+
this.db.prepare(`
|
|
165
|
+
UPDATE documents SET chunk_count = ?, status = 'indexed', updated_at = ? WHERE id = ?
|
|
166
|
+
`).run(chunk_count, now, id);
|
|
167
|
+
}
|
|
168
|
+
deleteDocument(id) {
|
|
169
|
+
if (!this.db)
|
|
170
|
+
throw new Error('DB not initialized');
|
|
171
|
+
// CASCADE will delete chunks and embeddings automatically
|
|
172
|
+
const result = this.db.prepare('DELETE FROM documents WHERE id = ?').run(id);
|
|
173
|
+
return result.changes > 0;
|
|
174
|
+
}
|
|
175
|
+
deleteDocumentByPath(file_path) {
|
|
176
|
+
if (!this.db)
|
|
177
|
+
throw new Error('DB not initialized');
|
|
178
|
+
const result = this.db.prepare('DELETE FROM documents WHERE file_path = ?').run(file_path);
|
|
179
|
+
return result.changes > 0;
|
|
180
|
+
}
|
|
181
|
+
// ─── Chunk CRUD ───────────────────────────────────────────────────────────
|
|
182
|
+
createChunk(input) {
|
|
183
|
+
if (!this.db)
|
|
184
|
+
throw new Error('DB not initialized');
|
|
185
|
+
const now = new Date().toISOString();
|
|
186
|
+
const id = randomUUID();
|
|
187
|
+
this.db.prepare(`
|
|
188
|
+
INSERT INTO chunks (id, document_id, position, content, char_start, char_end, created_at)
|
|
189
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
190
|
+
`).run(id, input.document_id, input.position, input.content, input.char_start, input.char_end, now);
|
|
191
|
+
return this.getChunk(id);
|
|
192
|
+
}
|
|
193
|
+
createChunks(inputs) {
|
|
194
|
+
if (!this.db)
|
|
195
|
+
throw new Error('DB not initialized');
|
|
196
|
+
const now = new Date().toISOString();
|
|
197
|
+
const stmt = this.db.prepare(`
|
|
198
|
+
INSERT INTO chunks (id, document_id, position, content, char_start, char_end, created_at)
|
|
199
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
200
|
+
`);
|
|
201
|
+
const insertMany = this.db.transaction((items) => {
|
|
202
|
+
for (const input of items) {
|
|
203
|
+
stmt.run(randomUUID(), input.document_id, input.position, input.content, input.char_start, input.char_end, now);
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
insertMany(inputs);
|
|
207
|
+
}
|
|
208
|
+
getChunk(id) {
|
|
209
|
+
if (!this.db)
|
|
210
|
+
throw new Error('DB not initialized');
|
|
211
|
+
const row = this.db.prepare('SELECT * FROM chunks WHERE id = ?').get(id);
|
|
212
|
+
return row ? this.deserializeChunk(row) : null;
|
|
213
|
+
}
|
|
214
|
+
getChunksByDocument(document_id) {
|
|
215
|
+
if (!this.db)
|
|
216
|
+
throw new Error('DB not initialized');
|
|
217
|
+
const rows = this.db.prepare('SELECT * FROM chunks WHERE document_id = ? ORDER BY position').all(document_id);
|
|
218
|
+
return rows.map((r) => this.deserializeChunk(r));
|
|
219
|
+
}
|
|
220
|
+
deleteChunksByDocument(document_id) {
|
|
221
|
+
if (!this.db)
|
|
222
|
+
throw new Error('DB not initialized');
|
|
223
|
+
this.db.prepare('DELETE FROM chunks WHERE document_id = ?').run(document_id);
|
|
224
|
+
}
|
|
225
|
+
// ─── Embeddings ───────────────────────────────────────────────────────────
|
|
226
|
+
createEmbedding(chunk_id, embedding) {
|
|
227
|
+
if (!this.db)
|
|
228
|
+
throw new Error('DB not initialized');
|
|
229
|
+
const embeddingBlob = new Float32Array(embedding);
|
|
230
|
+
this.db.prepare(`
|
|
231
|
+
INSERT INTO embeddings (chunk_id, embedding) VALUES (?, ?)
|
|
232
|
+
`).run(chunk_id, embeddingBlob);
|
|
233
|
+
}
|
|
234
|
+
createEmbeddings(items) {
|
|
235
|
+
if (!this.db)
|
|
236
|
+
throw new Error('DB not initialized');
|
|
237
|
+
const stmt = this.db.prepare(`
|
|
238
|
+
INSERT INTO embeddings (chunk_id, embedding) VALUES (?, ?)
|
|
239
|
+
`);
|
|
240
|
+
const insertMany = this.db.transaction((items) => {
|
|
241
|
+
for (const item of items) {
|
|
242
|
+
const embeddingBlob = new Float32Array(item.embedding);
|
|
243
|
+
stmt.run(item.chunk_id, embeddingBlob);
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
insertMany(items);
|
|
247
|
+
}
|
|
248
|
+
deleteEmbeddingsByDocument(document_id) {
|
|
249
|
+
if (!this.db)
|
|
250
|
+
throw new Error('DB not initialized');
|
|
251
|
+
// Get all chunk IDs for this document
|
|
252
|
+
const chunks = this.db.prepare('SELECT id FROM chunks WHERE document_id = ?').all(document_id);
|
|
253
|
+
const chunkIds = chunks.map(c => c.id);
|
|
254
|
+
if (chunkIds.length === 0)
|
|
255
|
+
return;
|
|
256
|
+
const placeholders = chunkIds.map(() => '?').join(',');
|
|
257
|
+
this.db.prepare(`DELETE FROM embeddings WHERE chunk_id IN (${placeholders})`).run(...chunkIds);
|
|
258
|
+
}
|
|
259
|
+
// ─── Stats ─────────────────────────────────────────────────────────────────
|
|
260
|
+
getStats() {
|
|
261
|
+
if (!this.db)
|
|
262
|
+
throw new Error('DB not initialized');
|
|
263
|
+
const documents_total = this.db.prepare('SELECT COUNT(*) as cnt FROM documents').get().cnt;
|
|
264
|
+
const documents_indexed = this.db.prepare("SELECT COUNT(*) as cnt FROM documents WHERE status = 'indexed'").get().cnt;
|
|
265
|
+
const chunks_total = this.db.prepare('SELECT COUNT(*) as cnt FROM chunks').get().cnt;
|
|
266
|
+
return { documents_total, documents_indexed, chunks_total };
|
|
267
|
+
}
|
|
268
|
+
// ─── Cleanup ───────────────────────────────────────────────────────────────
|
|
269
|
+
close() {
|
|
270
|
+
if (this.db) {
|
|
271
|
+
this.db.close();
|
|
272
|
+
this.db = null;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
// ─── Deserializers ─────────────────────────────────────────────────────────
|
|
276
|
+
deserializeDocument(row) {
|
|
277
|
+
return {
|
|
278
|
+
id: row.id,
|
|
279
|
+
filename: row.filename,
|
|
280
|
+
file_path: row.file_path,
|
|
281
|
+
file_hash: row.file_hash,
|
|
282
|
+
file_size: row.file_size,
|
|
283
|
+
status: row.status,
|
|
284
|
+
error_message: row.error_message ?? null,
|
|
285
|
+
chunk_count: row.chunk_count ?? 0,
|
|
286
|
+
created_at: row.created_at,
|
|
287
|
+
updated_at: row.updated_at,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
deserializeChunk(row) {
|
|
291
|
+
return {
|
|
292
|
+
id: row.id,
|
|
293
|
+
document_id: row.document_id,
|
|
294
|
+
position: row.position,
|
|
295
|
+
content: row.content,
|
|
296
|
+
char_start: row.char_start,
|
|
297
|
+
char_end: row.char_end,
|
|
298
|
+
created_at: row.created_at,
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
}
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
import { LinkRepository } from './link-repository.js';
|
|
2
|
+
import { ConfigManager } from '../config/manager.js';
|
|
3
|
+
import { EmbeddingService } from './memory/embedding.service.js';
|
|
4
|
+
/**
|
|
5
|
+
* LinkSearch - Hybrid search for Link documents
|
|
6
|
+
*
|
|
7
|
+
* Combines vector similarity search (80% weight) with BM25 text search (20% weight)
|
|
8
|
+
* for optimal retrieval of relevant document chunks.
|
|
9
|
+
*/
|
|
10
|
+
export class LinkSearch {
|
|
11
|
+
static instance = null;
|
|
12
|
+
repository;
|
|
13
|
+
db = null;
|
|
14
|
+
embeddingService = null;
|
|
15
|
+
constructor() {
|
|
16
|
+
this.repository = LinkRepository.getInstance();
|
|
17
|
+
}
|
|
18
|
+
static getInstance() {
|
|
19
|
+
if (!LinkSearch.instance) {
|
|
20
|
+
LinkSearch.instance = new LinkSearch();
|
|
21
|
+
}
|
|
22
|
+
return LinkSearch.instance;
|
|
23
|
+
}
|
|
24
|
+
static resetInstance() {
|
|
25
|
+
LinkSearch.instance = null;
|
|
26
|
+
}
|
|
27
|
+
async initialize() {
|
|
28
|
+
// Get the database from the repository
|
|
29
|
+
this.db = this.repository.db;
|
|
30
|
+
// Initialize embedding service
|
|
31
|
+
this.embeddingService = await EmbeddingService.getInstance();
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Perform vector similarity search using sqlite-vec.
|
|
35
|
+
*/
|
|
36
|
+
vectorSearch(queryEmbedding, limit) {
|
|
37
|
+
if (!this.db) {
|
|
38
|
+
throw new Error('LinkSearch not initialized');
|
|
39
|
+
}
|
|
40
|
+
const embeddingBlob = new Float32Array(queryEmbedding);
|
|
41
|
+
// Query vector similarity using cosine distance
|
|
42
|
+
const rows = this.db.prepare(`
|
|
43
|
+
SELECT
|
|
44
|
+
e.chunk_id,
|
|
45
|
+
c.document_id,
|
|
46
|
+
d.filename,
|
|
47
|
+
c.position,
|
|
48
|
+
c.content,
|
|
49
|
+
vec_distance_cosine(e.embedding, ?) as distance
|
|
50
|
+
FROM embeddings e
|
|
51
|
+
JOIN chunks c ON e.chunk_id = c.id
|
|
52
|
+
JOIN documents d ON c.document_id = d.id
|
|
53
|
+
WHERE d.status = 'indexed'
|
|
54
|
+
ORDER BY distance ASC
|
|
55
|
+
LIMIT ?
|
|
56
|
+
`).all(embeddingBlob, limit);
|
|
57
|
+
// Convert distance to similarity score (1 - distance for cosine)
|
|
58
|
+
return rows.map(row => ({
|
|
59
|
+
chunk_id: row.chunk_id,
|
|
60
|
+
document_id: row.document_id,
|
|
61
|
+
filename: row.filename,
|
|
62
|
+
position: row.position,
|
|
63
|
+
content: row.content,
|
|
64
|
+
score: 1 - row.distance,
|
|
65
|
+
}));
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Perform BM25 full-text search using FTS5.
|
|
69
|
+
*/
|
|
70
|
+
bm25Search(query, limit) {
|
|
71
|
+
if (!this.db) {
|
|
72
|
+
throw new Error('LinkSearch not initialized');
|
|
73
|
+
}
|
|
74
|
+
// Sanitize query: remove characters that could break FTS5 syntax (like ?, *, OR, etc)
|
|
75
|
+
// keeping only letters, numbers and spaces.
|
|
76
|
+
const escapedQuery = query
|
|
77
|
+
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
|
|
78
|
+
.replace(/\s+/g, ' ')
|
|
79
|
+
.trim();
|
|
80
|
+
// Return empty results if query is empty after sanitization
|
|
81
|
+
if (!escapedQuery) {
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
const rows = this.db.prepare(`
|
|
85
|
+
SELECT
|
|
86
|
+
c.id as chunk_id,
|
|
87
|
+
c.document_id,
|
|
88
|
+
d.filename,
|
|
89
|
+
c.position,
|
|
90
|
+
c.content,
|
|
91
|
+
bm25(chunks_fts) as bm25_score
|
|
92
|
+
FROM chunks_fts fts
|
|
93
|
+
JOIN chunks c ON c.rowid = fts.rowid
|
|
94
|
+
JOIN documents d ON c.document_id = d.id
|
|
95
|
+
WHERE d.status = 'indexed'
|
|
96
|
+
AND chunks_fts MATCH ?
|
|
97
|
+
ORDER BY bm25_score ASC
|
|
98
|
+
LIMIT ?
|
|
99
|
+
`).all(escapedQuery, limit);
|
|
100
|
+
// BM25 returns negative scores for better matches, negate and normalize
|
|
101
|
+
return rows.map(row => ({
|
|
102
|
+
chunk_id: row.chunk_id,
|
|
103
|
+
document_id: row.document_id,
|
|
104
|
+
filename: row.filename,
|
|
105
|
+
position: row.position,
|
|
106
|
+
content: row.content,
|
|
107
|
+
score: -row.bm25_score, // Negate since BM25 returns negative for better matches
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Normalize scores to 0-1 range using min-max scaling.
|
|
112
|
+
*/
|
|
113
|
+
normalizeScores(results) {
|
|
114
|
+
if (results.length === 0)
|
|
115
|
+
return results;
|
|
116
|
+
const scores = results.map(r => r.score);
|
|
117
|
+
const min = Math.min(...scores);
|
|
118
|
+
const max = Math.max(...scores);
|
|
119
|
+
const range = max - min;
|
|
120
|
+
if (range === 0) {
|
|
121
|
+
// All scores are the same
|
|
122
|
+
return results.map(r => ({ ...r, score: 1 }));
|
|
123
|
+
}
|
|
124
|
+
return results.map(r => ({
|
|
125
|
+
...r,
|
|
126
|
+
score: (r.score - min) / range,
|
|
127
|
+
}));
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Perform hybrid search combining vector and BM25 results.
|
|
131
|
+
*/
|
|
132
|
+
hybridSearch(queryEmbedding, queryText, limit, threshold) {
|
|
133
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
134
|
+
const vectorWeight = config.vector_weight;
|
|
135
|
+
const bm25Weight = config.bm25_weight;
|
|
136
|
+
// Get results from both methods (fetch more for better merging)
|
|
137
|
+
const fetchLimit = limit * 3;
|
|
138
|
+
const vectorResults = this.vectorSearch(queryEmbedding, fetchLimit);
|
|
139
|
+
const bm25Results = this.bm25Search(queryText, fetchLimit);
|
|
140
|
+
// Normalize scores
|
|
141
|
+
const normalizedVector = this.normalizeScores(vectorResults);
|
|
142
|
+
const normalizedBM25 = this.normalizeScores(bm25Results);
|
|
143
|
+
// Create maps for quick lookup
|
|
144
|
+
const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
|
|
145
|
+
const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
|
|
146
|
+
// Combine all unique chunk IDs
|
|
147
|
+
const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
|
|
148
|
+
// Calculate combined scores
|
|
149
|
+
const combined = [];
|
|
150
|
+
for (const chunkId of allChunkIds) {
|
|
151
|
+
const vResult = vectorMap.get(chunkId);
|
|
152
|
+
const bResult = bm25Map.get(chunkId);
|
|
153
|
+
const vectorScore = vResult?.score ?? 0;
|
|
154
|
+
const bm25Score = bResult?.score ?? 0;
|
|
155
|
+
// Weighted combination
|
|
156
|
+
const combinedScore = (vectorScore * vectorWeight) + (bm25Score * bm25Weight);
|
|
157
|
+
// Get the data from whichever result has it
|
|
158
|
+
const data = vResult || bResult;
|
|
159
|
+
if (!data)
|
|
160
|
+
continue;
|
|
161
|
+
combined.push({
|
|
162
|
+
chunk_id: chunkId,
|
|
163
|
+
document_id: data.document_id,
|
|
164
|
+
filename: data.filename,
|
|
165
|
+
position: data.position,
|
|
166
|
+
content: data.content,
|
|
167
|
+
score: combinedScore,
|
|
168
|
+
vector_score: vectorScore,
|
|
169
|
+
bm25_score: bm25Score,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
// Sort by combined score and filter by threshold
|
|
173
|
+
const filtered = combined
|
|
174
|
+
.filter(r => r.score >= threshold)
|
|
175
|
+
.sort((a, b) => b.score - a.score)
|
|
176
|
+
.slice(0, limit);
|
|
177
|
+
return filtered;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Search with a text query (generates embedding internally).
|
|
181
|
+
*/
|
|
182
|
+
async search(queryText, limit, threshold) {
|
|
183
|
+
if (!this.embeddingService) {
|
|
184
|
+
throw new Error('LinkSearch not initialized');
|
|
185
|
+
}
|
|
186
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
187
|
+
const maxResults = limit ?? config.max_results;
|
|
188
|
+
const minThreshold = threshold ?? config.score_threshold;
|
|
189
|
+
// Generate embedding for the query
|
|
190
|
+
const queryEmbedding = await this.embeddingService.generate(queryText);
|
|
191
|
+
return this.hybridSearch(queryEmbedding, queryText, maxResults, minThreshold);
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Search within a specific document by document_id.
|
|
195
|
+
* Runs vector + BM25 search filtered to chunks belonging to that document.
|
|
196
|
+
*/
|
|
197
|
+
async searchInDocument(queryText, documentId, limit, threshold) {
|
|
198
|
+
if (!this.embeddingService || !this.db) {
|
|
199
|
+
throw new Error('LinkSearch not initialized');
|
|
200
|
+
}
|
|
201
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
202
|
+
const maxResults = limit ?? config.max_results;
|
|
203
|
+
const minThreshold = threshold ?? config.score_threshold;
|
|
204
|
+
const vectorWeight = config.vector_weight;
|
|
205
|
+
const bm25Weight = config.bm25_weight;
|
|
206
|
+
const fetchLimit = maxResults * 3;
|
|
207
|
+
// Generate embedding for the query
|
|
208
|
+
const queryEmbedding = await this.embeddingService.generate(queryText);
|
|
209
|
+
const embeddingBlob = new Float32Array(queryEmbedding);
|
|
210
|
+
// Vector search filtered by document
|
|
211
|
+
const vectorRows = this.db.prepare(`
|
|
212
|
+
SELECT
|
|
213
|
+
e.chunk_id,
|
|
214
|
+
c.document_id,
|
|
215
|
+
d.filename,
|
|
216
|
+
c.position,
|
|
217
|
+
c.content,
|
|
218
|
+
vec_distance_cosine(e.embedding, ?) as distance
|
|
219
|
+
FROM embeddings e
|
|
220
|
+
JOIN chunks c ON e.chunk_id = c.id
|
|
221
|
+
JOIN documents d ON c.document_id = d.id
|
|
222
|
+
WHERE d.status = 'indexed' AND c.document_id = ?
|
|
223
|
+
ORDER BY distance ASC
|
|
224
|
+
LIMIT ?
|
|
225
|
+
`).all(embeddingBlob, documentId, fetchLimit);
|
|
226
|
+
const vectorResults = vectorRows.map(row => ({
|
|
227
|
+
chunk_id: row.chunk_id,
|
|
228
|
+
document_id: row.document_id,
|
|
229
|
+
filename: row.filename,
|
|
230
|
+
position: row.position,
|
|
231
|
+
content: row.content,
|
|
232
|
+
score: 1 - row.distance,
|
|
233
|
+
}));
|
|
234
|
+
// BM25 search filtered by document
|
|
235
|
+
const escapedQuery = queryText
|
|
236
|
+
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
|
|
237
|
+
.replace(/\s+/g, ' ')
|
|
238
|
+
.trim();
|
|
239
|
+
let bm25Results = [];
|
|
240
|
+
if (escapedQuery) {
|
|
241
|
+
const bm25Rows = this.db.prepare(`
|
|
242
|
+
SELECT
|
|
243
|
+
c.id as chunk_id,
|
|
244
|
+
c.document_id,
|
|
245
|
+
d.filename,
|
|
246
|
+
c.position,
|
|
247
|
+
c.content,
|
|
248
|
+
bm25(chunks_fts) as bm25_score
|
|
249
|
+
FROM chunks_fts fts
|
|
250
|
+
JOIN chunks c ON c.rowid = fts.rowid
|
|
251
|
+
JOIN documents d ON c.document_id = d.id
|
|
252
|
+
WHERE d.status = 'indexed'
|
|
253
|
+
AND c.document_id = ?
|
|
254
|
+
AND chunks_fts MATCH ?
|
|
255
|
+
ORDER BY bm25_score ASC
|
|
256
|
+
LIMIT ?
|
|
257
|
+
`).all(documentId, escapedQuery, fetchLimit);
|
|
258
|
+
bm25Results = bm25Rows.map(row => ({
|
|
259
|
+
chunk_id: row.chunk_id,
|
|
260
|
+
document_id: row.document_id,
|
|
261
|
+
filename: row.filename,
|
|
262
|
+
position: row.position,
|
|
263
|
+
content: row.content,
|
|
264
|
+
score: -row.bm25_score,
|
|
265
|
+
}));
|
|
266
|
+
}
|
|
267
|
+
// Normalize and combine
|
|
268
|
+
const normalizedVector = this.normalizeScores(vectorResults);
|
|
269
|
+
const normalizedBM25 = this.normalizeScores(bm25Results);
|
|
270
|
+
const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
|
|
271
|
+
const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
|
|
272
|
+
const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
|
|
273
|
+
const combined = [];
|
|
274
|
+
for (const chunkId of allChunkIds) {
|
|
275
|
+
const vResult = vectorMap.get(chunkId);
|
|
276
|
+
const bResult = bm25Map.get(chunkId);
|
|
277
|
+
const vectorScore = vResult?.score ?? 0;
|
|
278
|
+
const bm25Score = bResult?.score ?? 0;
|
|
279
|
+
const data = vResult || bResult;
|
|
280
|
+
if (!data)
|
|
281
|
+
continue;
|
|
282
|
+
combined.push({
|
|
283
|
+
chunk_id: chunkId,
|
|
284
|
+
document_id: data.document_id,
|
|
285
|
+
filename: data.filename,
|
|
286
|
+
position: data.position,
|
|
287
|
+
content: data.content,
|
|
288
|
+
score: (vectorScore * vectorWeight) + (bm25Score * bm25Weight),
|
|
289
|
+
vector_score: vectorScore,
|
|
290
|
+
bm25_score: bm25Score,
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
return combined
|
|
294
|
+
.filter(r => r.score >= minThreshold)
|
|
295
|
+
.sort((a, b) => b.score - a.score)
|
|
296
|
+
.slice(0, maxResults);
|
|
297
|
+
}
|
|
298
|
+
}
|