morpheus-cli 0.9.5 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -43
- package/dist/channels/discord.js +71 -21
- package/dist/channels/telegram.js +73 -19
- package/dist/cli/commands/restart.js +15 -0
- package/dist/cli/commands/start.js +18 -0
- package/dist/config/manager.js +61 -0
- package/dist/config/paths.js +1 -0
- package/dist/config/schemas.js +11 -3
- package/dist/http/api.js +3 -0
- package/dist/http/routers/link.js +239 -0
- package/dist/http/routers/skills.js +1 -8
- package/dist/runtime/apoc.js +1 -1
- package/dist/runtime/audit/repository.js +1 -1
- package/dist/runtime/link-chunker.js +214 -0
- package/dist/runtime/link-repository.js +301 -0
- package/dist/runtime/link-search.js +298 -0
- package/dist/runtime/link-worker.js +284 -0
- package/dist/runtime/link.js +295 -0
- package/dist/runtime/memory/sati/service.js +1 -1
- package/dist/runtime/memory/sqlite.js +52 -0
- package/dist/runtime/neo.js +1 -1
- package/dist/runtime/oracle.js +81 -44
- package/dist/runtime/scaffold.js +4 -17
- package/dist/runtime/skills/__tests__/loader.test.js +7 -10
- package/dist/runtime/skills/__tests__/registry.test.js +2 -18
- package/dist/runtime/skills/__tests__/tool.test.js +55 -224
- package/dist/runtime/skills/index.js +1 -2
- package/dist/runtime/skills/loader.js +0 -2
- package/dist/runtime/skills/registry.js +8 -20
- package/dist/runtime/skills/schema.js +0 -4
- package/dist/runtime/skills/tool.js +42 -209
- package/dist/runtime/smiths/delegator.js +1 -1
- package/dist/runtime/smiths/registry.js +1 -1
- package/dist/runtime/tasks/worker.js +12 -44
- package/dist/runtime/trinity.js +1 -1
- package/dist/types/config.js +14 -0
- package/dist/ui/assets/AuditDashboard-93LCGHG1.js +1 -0
- package/dist/ui/assets/{Chat-BNtutgja.js → Chat-CK5sNcQ1.js} +8 -8
- package/dist/ui/assets/{Chronos-3C8RPZcl.js → Chronos-m2h--GEe.js} +1 -1
- package/dist/ui/assets/{ConfirmationModal-ZQPBeJ2Z.js → ConfirmationModal-Dd5pUJme.js} +1 -1
- package/dist/ui/assets/{Dashboard-CqkHzr2F.js → Dashboard-ODwl7d-a.js} +1 -1
- package/dist/ui/assets/{DeleteConfirmationModal-CioxFWn_.js → DeleteConfirmationModal-CCcojDmr.js} +1 -1
- package/dist/ui/assets/Documents-dWnSoxFO.js +7 -0
- package/dist/ui/assets/{Logs-DBVanS0O.js → Logs-Dc9Z2LBj.js} +1 -1
- package/dist/ui/assets/{MCPManager-vXfL3P2U.js → MCPManager-CMkb8vMn.js} +1 -1
- package/dist/ui/assets/{ModelPricing-DyfdunLT.js → ModelPricing-DtHPPbEQ.js} +1 -1
- package/dist/ui/assets/{Notifications-VL-vep6d.js → Notifications-BPvo-DWP.js} +1 -1
- package/dist/ui/assets/{Pagination-oTGieBLM.js → Pagination-BHZKk42X.js} +1 -1
- package/dist/ui/assets/{SatiMemories-jaadkW0U.js → SatiMemories-BUPu1Lxr.js} +1 -1
- package/dist/ui/assets/SessionAudit-CFKF4DA8.js +9 -0
- package/dist/ui/assets/Settings-C4JrXfsR.js +47 -0
- package/dist/ui/assets/{Skills-DE3zziXL.js → Skills-BUlvJgJ4.js} +1 -1
- package/dist/ui/assets/{Smiths-pmogN1mU.js → Smiths-CDtJdY0I.js} +1 -1
- package/dist/ui/assets/{Tasks-Bs8s34Jc.js → Tasks-DK_cOsNK.js} +1 -1
- package/dist/ui/assets/{TrinityDatabases-D7uihcdp.js → TrinityDatabases-X07by-19.js} +1 -1
- package/dist/ui/assets/{UsageStats-B9gePLZ0.js → UsageStats-dYcgckLq.js} +1 -1
- package/dist/ui/assets/{WebhookManager-B2L3rCLM.js → WebhookManager-DDw5eX2R.js} +1 -1
- package/dist/ui/assets/{audit-Cggeu9mM.js → audit-DZ5WLUEm.js} +1 -1
- package/dist/ui/assets/{chronos-D3-sWhfU.js → chronos-B_HI4mlq.js} +1 -1
- package/dist/ui/assets/{config-CBqRUPgn.js → config-B-YxlVrc.js} +1 -1
- package/dist/ui/assets/index-DVjwJ8jT.css +1 -0
- package/dist/ui/assets/{index-zKplfrXZ.js → index-DfJwcKqG.js} +5 -5
- package/dist/ui/assets/{mcp-uL1R9hyA.js → mcp-k-_pwbqA.js} +1 -1
- package/dist/ui/assets/{skills-jmw8yTJs.js → skills-xMXangks.js} +1 -1
- package/dist/ui/assets/{stats-HOms6GnM.js → stats-C4QZIv5O.js} +1 -1
- package/dist/ui/assets/{vendor-icons-DMd9RGvJ.js → vendor-icons-NHF9HNeN.js} +1 -1
- package/dist/ui/index.html +3 -3
- package/dist/ui/sw.js +1 -1
- package/package.json +3 -1
- package/dist/runtime/__tests__/keymaker.test.js +0 -148
- package/dist/runtime/keymaker.js +0 -157
- package/dist/ui/assets/AuditDashboard-DliJ1CX0.js +0 -1
- package/dist/ui/assets/SessionAudit-BsXrWlwz.js +0 -9
- package/dist/ui/assets/Settings-B4eezRcg.js +0 -47
- package/dist/ui/assets/index-D4fzIKy1.css +0 -1
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
import { LinkRepository } from './link-repository.js';
|
|
2
|
+
import { ConfigManager } from '../config/manager.js';
|
|
3
|
+
import { EmbeddingService } from './memory/embedding.service.js';
|
|
4
|
+
/**
|
|
5
|
+
* LinkSearch - Hybrid search for Link documents
|
|
6
|
+
*
|
|
7
|
+
* Combines vector similarity search (80% weight) with BM25 text search (20% weight)
|
|
8
|
+
* for optimal retrieval of relevant document chunks.
|
|
9
|
+
*/
|
|
10
|
+
export class LinkSearch {
|
|
11
|
+
static instance = null;
|
|
12
|
+
repository;
|
|
13
|
+
db = null;
|
|
14
|
+
embeddingService = null;
|
|
15
|
+
constructor() {
|
|
16
|
+
this.repository = LinkRepository.getInstance();
|
|
17
|
+
}
|
|
18
|
+
static getInstance() {
|
|
19
|
+
if (!LinkSearch.instance) {
|
|
20
|
+
LinkSearch.instance = new LinkSearch();
|
|
21
|
+
}
|
|
22
|
+
return LinkSearch.instance;
|
|
23
|
+
}
|
|
24
|
+
static resetInstance() {
|
|
25
|
+
LinkSearch.instance = null;
|
|
26
|
+
}
|
|
27
|
+
async initialize() {
|
|
28
|
+
// Get the database from the repository
|
|
29
|
+
this.db = this.repository.db;
|
|
30
|
+
// Initialize embedding service
|
|
31
|
+
this.embeddingService = await EmbeddingService.getInstance();
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Perform vector similarity search using sqlite-vec.
|
|
35
|
+
*/
|
|
36
|
+
vectorSearch(queryEmbedding, limit) {
|
|
37
|
+
if (!this.db) {
|
|
38
|
+
throw new Error('LinkSearch not initialized');
|
|
39
|
+
}
|
|
40
|
+
const embeddingBlob = new Float32Array(queryEmbedding);
|
|
41
|
+
// Query vector similarity using cosine distance
|
|
42
|
+
const rows = this.db.prepare(`
|
|
43
|
+
SELECT
|
|
44
|
+
e.chunk_id,
|
|
45
|
+
c.document_id,
|
|
46
|
+
d.filename,
|
|
47
|
+
c.position,
|
|
48
|
+
c.content,
|
|
49
|
+
vec_distance_cosine(e.embedding, ?) as distance
|
|
50
|
+
FROM embeddings e
|
|
51
|
+
JOIN chunks c ON e.chunk_id = c.id
|
|
52
|
+
JOIN documents d ON c.document_id = d.id
|
|
53
|
+
WHERE d.status = 'indexed'
|
|
54
|
+
ORDER BY distance ASC
|
|
55
|
+
LIMIT ?
|
|
56
|
+
`).all(embeddingBlob, limit);
|
|
57
|
+
// Convert distance to similarity score (1 - distance for cosine)
|
|
58
|
+
return rows.map(row => ({
|
|
59
|
+
chunk_id: row.chunk_id,
|
|
60
|
+
document_id: row.document_id,
|
|
61
|
+
filename: row.filename,
|
|
62
|
+
position: row.position,
|
|
63
|
+
content: row.content,
|
|
64
|
+
score: 1 - row.distance,
|
|
65
|
+
}));
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Perform BM25 full-text search using FTS5.
|
|
69
|
+
*/
|
|
70
|
+
bm25Search(query, limit) {
|
|
71
|
+
if (!this.db) {
|
|
72
|
+
throw new Error('LinkSearch not initialized');
|
|
73
|
+
}
|
|
74
|
+
// Sanitize query: remove characters that could break FTS5 syntax (like ?, *, OR, etc)
|
|
75
|
+
// keeping only letters, numbers and spaces.
|
|
76
|
+
const escapedQuery = query
|
|
77
|
+
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
|
|
78
|
+
.replace(/\s+/g, ' ')
|
|
79
|
+
.trim();
|
|
80
|
+
// Return empty results if query is empty after sanitization
|
|
81
|
+
if (!escapedQuery) {
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
const rows = this.db.prepare(`
|
|
85
|
+
SELECT
|
|
86
|
+
c.id as chunk_id,
|
|
87
|
+
c.document_id,
|
|
88
|
+
d.filename,
|
|
89
|
+
c.position,
|
|
90
|
+
c.content,
|
|
91
|
+
bm25(chunks_fts) as bm25_score
|
|
92
|
+
FROM chunks_fts fts
|
|
93
|
+
JOIN chunks c ON c.rowid = fts.rowid
|
|
94
|
+
JOIN documents d ON c.document_id = d.id
|
|
95
|
+
WHERE d.status = 'indexed'
|
|
96
|
+
AND chunks_fts MATCH ?
|
|
97
|
+
ORDER BY bm25_score ASC
|
|
98
|
+
LIMIT ?
|
|
99
|
+
`).all(escapedQuery, limit);
|
|
100
|
+
// BM25 returns negative scores for better matches, negate and normalize
|
|
101
|
+
return rows.map(row => ({
|
|
102
|
+
chunk_id: row.chunk_id,
|
|
103
|
+
document_id: row.document_id,
|
|
104
|
+
filename: row.filename,
|
|
105
|
+
position: row.position,
|
|
106
|
+
content: row.content,
|
|
107
|
+
score: -row.bm25_score, // Negate since BM25 returns negative for better matches
|
|
108
|
+
}));
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Normalize scores to 0-1 range using min-max scaling.
|
|
112
|
+
*/
|
|
113
|
+
normalizeScores(results) {
|
|
114
|
+
if (results.length === 0)
|
|
115
|
+
return results;
|
|
116
|
+
const scores = results.map(r => r.score);
|
|
117
|
+
const min = Math.min(...scores);
|
|
118
|
+
const max = Math.max(...scores);
|
|
119
|
+
const range = max - min;
|
|
120
|
+
if (range === 0) {
|
|
121
|
+
// All scores are the same
|
|
122
|
+
return results.map(r => ({ ...r, score: 1 }));
|
|
123
|
+
}
|
|
124
|
+
return results.map(r => ({
|
|
125
|
+
...r,
|
|
126
|
+
score: (r.score - min) / range,
|
|
127
|
+
}));
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Perform hybrid search combining vector and BM25 results.
|
|
131
|
+
*/
|
|
132
|
+
hybridSearch(queryEmbedding, queryText, limit, threshold) {
|
|
133
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
134
|
+
const vectorWeight = config.vector_weight;
|
|
135
|
+
const bm25Weight = config.bm25_weight;
|
|
136
|
+
// Get results from both methods (fetch more for better merging)
|
|
137
|
+
const fetchLimit = limit * 3;
|
|
138
|
+
const vectorResults = this.vectorSearch(queryEmbedding, fetchLimit);
|
|
139
|
+
const bm25Results = this.bm25Search(queryText, fetchLimit);
|
|
140
|
+
// Normalize scores
|
|
141
|
+
const normalizedVector = this.normalizeScores(vectorResults);
|
|
142
|
+
const normalizedBM25 = this.normalizeScores(bm25Results);
|
|
143
|
+
// Create maps for quick lookup
|
|
144
|
+
const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
|
|
145
|
+
const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
|
|
146
|
+
// Combine all unique chunk IDs
|
|
147
|
+
const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
|
|
148
|
+
// Calculate combined scores
|
|
149
|
+
const combined = [];
|
|
150
|
+
for (const chunkId of allChunkIds) {
|
|
151
|
+
const vResult = vectorMap.get(chunkId);
|
|
152
|
+
const bResult = bm25Map.get(chunkId);
|
|
153
|
+
const vectorScore = vResult?.score ?? 0;
|
|
154
|
+
const bm25Score = bResult?.score ?? 0;
|
|
155
|
+
// Weighted combination
|
|
156
|
+
const combinedScore = (vectorScore * vectorWeight) + (bm25Score * bm25Weight);
|
|
157
|
+
// Get the data from whichever result has it
|
|
158
|
+
const data = vResult || bResult;
|
|
159
|
+
if (!data)
|
|
160
|
+
continue;
|
|
161
|
+
combined.push({
|
|
162
|
+
chunk_id: chunkId,
|
|
163
|
+
document_id: data.document_id,
|
|
164
|
+
filename: data.filename,
|
|
165
|
+
position: data.position,
|
|
166
|
+
content: data.content,
|
|
167
|
+
score: combinedScore,
|
|
168
|
+
vector_score: vectorScore,
|
|
169
|
+
bm25_score: bm25Score,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
// Sort by combined score and filter by threshold
|
|
173
|
+
const filtered = combined
|
|
174
|
+
.filter(r => r.score >= threshold)
|
|
175
|
+
.sort((a, b) => b.score - a.score)
|
|
176
|
+
.slice(0, limit);
|
|
177
|
+
return filtered;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Search with a text query (generates embedding internally).
|
|
181
|
+
*/
|
|
182
|
+
async search(queryText, limit, threshold) {
|
|
183
|
+
if (!this.embeddingService) {
|
|
184
|
+
throw new Error('LinkSearch not initialized');
|
|
185
|
+
}
|
|
186
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
187
|
+
const maxResults = limit ?? config.max_results;
|
|
188
|
+
const minThreshold = threshold ?? config.score_threshold;
|
|
189
|
+
// Generate embedding for the query
|
|
190
|
+
const queryEmbedding = await this.embeddingService.generate(queryText);
|
|
191
|
+
return this.hybridSearch(queryEmbedding, queryText, maxResults, minThreshold);
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Search within a specific document by document_id.
|
|
195
|
+
* Runs vector + BM25 search filtered to chunks belonging to that document.
|
|
196
|
+
*/
|
|
197
|
+
async searchInDocument(queryText, documentId, limit, threshold) {
|
|
198
|
+
if (!this.embeddingService || !this.db) {
|
|
199
|
+
throw new Error('LinkSearch not initialized');
|
|
200
|
+
}
|
|
201
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
202
|
+
const maxResults = limit ?? config.max_results;
|
|
203
|
+
const minThreshold = threshold ?? config.score_threshold;
|
|
204
|
+
const vectorWeight = config.vector_weight;
|
|
205
|
+
const bm25Weight = config.bm25_weight;
|
|
206
|
+
const fetchLimit = maxResults * 3;
|
|
207
|
+
// Generate embedding for the query
|
|
208
|
+
const queryEmbedding = await this.embeddingService.generate(queryText);
|
|
209
|
+
const embeddingBlob = new Float32Array(queryEmbedding);
|
|
210
|
+
// Vector search filtered by document
|
|
211
|
+
const vectorRows = this.db.prepare(`
|
|
212
|
+
SELECT
|
|
213
|
+
e.chunk_id,
|
|
214
|
+
c.document_id,
|
|
215
|
+
d.filename,
|
|
216
|
+
c.position,
|
|
217
|
+
c.content,
|
|
218
|
+
vec_distance_cosine(e.embedding, ?) as distance
|
|
219
|
+
FROM embeddings e
|
|
220
|
+
JOIN chunks c ON e.chunk_id = c.id
|
|
221
|
+
JOIN documents d ON c.document_id = d.id
|
|
222
|
+
WHERE d.status = 'indexed' AND c.document_id = ?
|
|
223
|
+
ORDER BY distance ASC
|
|
224
|
+
LIMIT ?
|
|
225
|
+
`).all(embeddingBlob, documentId, fetchLimit);
|
|
226
|
+
const vectorResults = vectorRows.map(row => ({
|
|
227
|
+
chunk_id: row.chunk_id,
|
|
228
|
+
document_id: row.document_id,
|
|
229
|
+
filename: row.filename,
|
|
230
|
+
position: row.position,
|
|
231
|
+
content: row.content,
|
|
232
|
+
score: 1 - row.distance,
|
|
233
|
+
}));
|
|
234
|
+
// BM25 search filtered by document
|
|
235
|
+
const escapedQuery = queryText
|
|
236
|
+
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
|
|
237
|
+
.replace(/\s+/g, ' ')
|
|
238
|
+
.trim();
|
|
239
|
+
let bm25Results = [];
|
|
240
|
+
if (escapedQuery) {
|
|
241
|
+
const bm25Rows = this.db.prepare(`
|
|
242
|
+
SELECT
|
|
243
|
+
c.id as chunk_id,
|
|
244
|
+
c.document_id,
|
|
245
|
+
d.filename,
|
|
246
|
+
c.position,
|
|
247
|
+
c.content,
|
|
248
|
+
bm25(chunks_fts) as bm25_score
|
|
249
|
+
FROM chunks_fts fts
|
|
250
|
+
JOIN chunks c ON c.rowid = fts.rowid
|
|
251
|
+
JOIN documents d ON c.document_id = d.id
|
|
252
|
+
WHERE d.status = 'indexed'
|
|
253
|
+
AND c.document_id = ?
|
|
254
|
+
AND chunks_fts MATCH ?
|
|
255
|
+
ORDER BY bm25_score ASC
|
|
256
|
+
LIMIT ?
|
|
257
|
+
`).all(documentId, escapedQuery, fetchLimit);
|
|
258
|
+
bm25Results = bm25Rows.map(row => ({
|
|
259
|
+
chunk_id: row.chunk_id,
|
|
260
|
+
document_id: row.document_id,
|
|
261
|
+
filename: row.filename,
|
|
262
|
+
position: row.position,
|
|
263
|
+
content: row.content,
|
|
264
|
+
score: -row.bm25_score,
|
|
265
|
+
}));
|
|
266
|
+
}
|
|
267
|
+
// Normalize and combine
|
|
268
|
+
const normalizedVector = this.normalizeScores(vectorResults);
|
|
269
|
+
const normalizedBM25 = this.normalizeScores(bm25Results);
|
|
270
|
+
const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
|
|
271
|
+
const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
|
|
272
|
+
const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
|
|
273
|
+
const combined = [];
|
|
274
|
+
for (const chunkId of allChunkIds) {
|
|
275
|
+
const vResult = vectorMap.get(chunkId);
|
|
276
|
+
const bResult = bm25Map.get(chunkId);
|
|
277
|
+
const vectorScore = vResult?.score ?? 0;
|
|
278
|
+
const bm25Score = bResult?.score ?? 0;
|
|
279
|
+
const data = vResult || bResult;
|
|
280
|
+
if (!data)
|
|
281
|
+
continue;
|
|
282
|
+
combined.push({
|
|
283
|
+
chunk_id: chunkId,
|
|
284
|
+
document_id: data.document_id,
|
|
285
|
+
filename: data.filename,
|
|
286
|
+
position: data.position,
|
|
287
|
+
content: data.content,
|
|
288
|
+
score: (vectorScore * vectorWeight) + (bm25Score * bm25Weight),
|
|
289
|
+
vector_score: vectorScore,
|
|
290
|
+
bm25_score: bm25Score,
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
return combined
|
|
294
|
+
.filter(r => r.score >= minThreshold)
|
|
295
|
+
.sort((a, b) => b.score - a.score)
|
|
296
|
+
.slice(0, maxResults);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import { homedir } from 'os';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import fs from 'fs-extra';
|
|
4
|
+
import { LinkRepository } from './link-repository.js';
|
|
5
|
+
import { LinkSearch } from './link-search.js';
|
|
6
|
+
import { hashFile, processDocument, isSupportedFormat } from './link-chunker.js';
|
|
7
|
+
import { EmbeddingService } from './memory/embedding.service.js';
|
|
8
|
+
import { ConfigManager } from '../config/manager.js';
|
|
9
|
+
import { DisplayManager } from './display.js';
|
|
10
|
+
/**
|
|
11
|
+
* LinkWorker - Background worker for document indexing
|
|
12
|
+
*
|
|
13
|
+
* Scans ~/.morpheus/docs folder, processes new/changed documents,
|
|
14
|
+
* generates embeddings, and removes deleted documents from the index.
|
|
15
|
+
*/
|
|
16
|
+
export class LinkWorker {
|
|
17
|
+
static instance = null;
|
|
18
|
+
repository;
|
|
19
|
+
search;
|
|
20
|
+
embeddingService = null;
|
|
21
|
+
intervalId = null;
|
|
22
|
+
isRunning = false;
|
|
23
|
+
display = DisplayManager.getInstance();
|
|
24
|
+
docsPath;
|
|
25
|
+
constructor() {
|
|
26
|
+
this.repository = LinkRepository.getInstance();
|
|
27
|
+
this.search = LinkSearch.getInstance();
|
|
28
|
+
this.docsPath = path.join(homedir(), '.morpheus', 'docs');
|
|
29
|
+
}
|
|
30
|
+
static getInstance() {
|
|
31
|
+
if (!LinkWorker.instance) {
|
|
32
|
+
LinkWorker.instance = new LinkWorker();
|
|
33
|
+
}
|
|
34
|
+
return LinkWorker.instance;
|
|
35
|
+
}
|
|
36
|
+
static setInstance(instance) {
|
|
37
|
+
LinkWorker.instance = instance;
|
|
38
|
+
}
|
|
39
|
+
static resetInstance() {
|
|
40
|
+
if (LinkWorker.instance) {
|
|
41
|
+
LinkWorker.instance.stop();
|
|
42
|
+
}
|
|
43
|
+
LinkWorker.instance = null;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Start the background worker with interval-based scanning.
|
|
47
|
+
*/
|
|
48
|
+
start() {
|
|
49
|
+
if (this.isRunning)
|
|
50
|
+
return;
|
|
51
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
52
|
+
const intervalMs = config.scan_interval_ms;
|
|
53
|
+
this.isRunning = true;
|
|
54
|
+
this.display.log('LinkWorker started', { source: 'Link' });
|
|
55
|
+
// Run initial scan immediately
|
|
56
|
+
this.tick().catch(err => {
|
|
57
|
+
this.display.log(`LinkWorker initial scan failed: ${err.message}`, { source: 'Link', level: 'error' });
|
|
58
|
+
});
|
|
59
|
+
// Schedule periodic scans
|
|
60
|
+
this.intervalId = setInterval(() => {
|
|
61
|
+
this.tick().catch(err => {
|
|
62
|
+
this.display.log(`LinkWorker tick failed: ${err.message}`, { source: 'Link', level: 'error' });
|
|
63
|
+
});
|
|
64
|
+
}, intervalMs);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Stop the background worker.
|
|
68
|
+
*/
|
|
69
|
+
stop() {
|
|
70
|
+
if (this.intervalId) {
|
|
71
|
+
clearInterval(this.intervalId);
|
|
72
|
+
this.intervalId = null;
|
|
73
|
+
}
|
|
74
|
+
this.isRunning = false;
|
|
75
|
+
this.display.log('LinkWorker stopped', { source: 'Link' });
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Update the scan interval (hot-reload).
|
|
79
|
+
*/
|
|
80
|
+
updateInterval(intervalMs) {
|
|
81
|
+
if (this.intervalId) {
|
|
82
|
+
clearInterval(this.intervalId);
|
|
83
|
+
this.intervalId = setInterval(() => {
|
|
84
|
+
this.tick().catch(err => {
|
|
85
|
+
this.display.log(`LinkWorker tick failed: ${err.message}`, { source: 'Link', level: 'error' });
|
|
86
|
+
});
|
|
87
|
+
}, intervalMs);
|
|
88
|
+
this.display.log(`LinkWorker interval updated to ${intervalMs}ms`, { source: 'Link' });
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Perform a single scan cycle.
|
|
93
|
+
*/
|
|
94
|
+
async tick() {
|
|
95
|
+
// Ensure embedding service is initialized
|
|
96
|
+
if (!this.embeddingService) {
|
|
97
|
+
this.embeddingService = await EmbeddingService.getInstance();
|
|
98
|
+
}
|
|
99
|
+
// Ensure docs folder exists
|
|
100
|
+
await fs.ensureDir(this.docsPath);
|
|
101
|
+
const stats = {
|
|
102
|
+
indexed: 0,
|
|
103
|
+
removed: 0,
|
|
104
|
+
errors: 0,
|
|
105
|
+
};
|
|
106
|
+
try {
|
|
107
|
+
// Scan for new/changed documents
|
|
108
|
+
const files = await this.scanFolder();
|
|
109
|
+
this.display.log(`LinkWorker found ${files.length} files`, { source: 'Link', level: 'debug' });
|
|
110
|
+
// Process each file
|
|
111
|
+
for (const filePath of files) {
|
|
112
|
+
try {
|
|
113
|
+
const result = await this.processDocument(filePath);
|
|
114
|
+
if (result === 'indexed') {
|
|
115
|
+
stats.indexed++;
|
|
116
|
+
}
|
|
117
|
+
else if (result === 'error') {
|
|
118
|
+
stats.errors++;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
catch (err) {
|
|
122
|
+
this.display.log(`Failed to process ${filePath}: ${err.message}`, { source: 'Link', level: 'error' });
|
|
123
|
+
stats.errors++;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// Remove deleted documents
|
|
127
|
+
stats.removed = await this.removeDeletedDocuments(files);
|
|
128
|
+
if (stats.indexed > 0 || stats.removed > 0) {
|
|
129
|
+
this.display.log(`LinkWorker: indexed ${stats.indexed}, removed ${stats.removed}, errors ${stats.errors}`, { source: 'Link', level: 'info' });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
this.display.log(`LinkWorker tick error: ${err.message}`, { source: 'Link', level: 'error' });
|
|
134
|
+
stats.errors++;
|
|
135
|
+
}
|
|
136
|
+
return stats;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Scan the docs folder for supported files.
|
|
140
|
+
*/
|
|
141
|
+
async scanFolder() {
|
|
142
|
+
const files = [];
|
|
143
|
+
const entries = await fs.readdir(this.docsPath, { withFileTypes: true });
|
|
144
|
+
for (const entry of entries) {
|
|
145
|
+
if (entry.isFile()) {
|
|
146
|
+
const filePath = path.join(this.docsPath, entry.name);
|
|
147
|
+
if (isSupportedFormat(filePath)) {
|
|
148
|
+
files.push(filePath);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return files;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Process a single document: check hash, parse, chunk, embed.
|
|
156
|
+
*/
|
|
157
|
+
async processDocument(filePath) {
|
|
158
|
+
const existingDoc = this.repository.getDocumentByPath(filePath);
|
|
159
|
+
// Calculate file hash
|
|
160
|
+
let fileHash;
|
|
161
|
+
try {
|
|
162
|
+
fileHash = await hashFile(filePath);
|
|
163
|
+
}
|
|
164
|
+
catch (err) {
|
|
165
|
+
// File might not be readable
|
|
166
|
+
if (existingDoc) {
|
|
167
|
+
this.repository.updateDocumentStatus(existingDoc.id, 'error', `Failed to read file: ${err.message}`);
|
|
168
|
+
}
|
|
169
|
+
return 'error';
|
|
170
|
+
}
|
|
171
|
+
// Check if document already indexed with same hash
|
|
172
|
+
if (existingDoc && existingDoc.file_hash === fileHash && existingDoc.status === 'indexed') {
|
|
173
|
+
return 'skipped';
|
|
174
|
+
}
|
|
175
|
+
// Get file stats
|
|
176
|
+
const stats = await fs.stat(filePath);
|
|
177
|
+
const fileSize = stats.size;
|
|
178
|
+
// Check max file size
|
|
179
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
180
|
+
const maxSizeBytes = config.max_file_size_mb * 1024 * 1024;
|
|
181
|
+
if (fileSize > maxSizeBytes) {
|
|
182
|
+
if (existingDoc) {
|
|
183
|
+
this.repository.updateDocumentStatus(existingDoc.id, 'error', `File exceeds max size of ${config.max_file_size_mb}MB`);
|
|
184
|
+
}
|
|
185
|
+
return 'error';
|
|
186
|
+
}
|
|
187
|
+
// Create or update document record
|
|
188
|
+
const filename = path.basename(filePath);
|
|
189
|
+
let document;
|
|
190
|
+
if (existingDoc) {
|
|
191
|
+
// Update existing document - delete old chunks first
|
|
192
|
+
this.repository.deleteChunksByDocument(existingDoc.id);
|
|
193
|
+
this.repository.deleteEmbeddingsByDocument(existingDoc.id);
|
|
194
|
+
this.repository.updateDocumentStatus(existingDoc.id, 'indexing');
|
|
195
|
+
document = existingDoc;
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
// Create new document
|
|
199
|
+
document = this.repository.createDocument({
|
|
200
|
+
filename,
|
|
201
|
+
file_path: filePath,
|
|
202
|
+
file_hash: fileHash,
|
|
203
|
+
file_size: fileSize,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
try {
|
|
207
|
+
// Index the document
|
|
208
|
+
await this.indexDocument(document.id, filePath, fileHash);
|
|
209
|
+
// Update status to indexed
|
|
210
|
+
const chunks = this.repository.getChunksByDocument(document.id);
|
|
211
|
+
this.repository.updateDocumentChunkCount(document.id, chunks.length);
|
|
212
|
+
this.display.log(`Indexed document: ${filename} (${chunks.length} chunks)`, { source: 'Link', level: 'debug' });
|
|
213
|
+
return 'indexed';
|
|
214
|
+
}
|
|
215
|
+
catch (err) {
|
|
216
|
+
this.repository.updateDocumentStatus(document.id, 'error', err.message);
|
|
217
|
+
return 'error';
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Index a document: parse, chunk, generate embeddings.
|
|
222
|
+
*/
|
|
223
|
+
async indexDocument(documentId, filePath, fileHash) {
|
|
224
|
+
const config = ConfigManager.getInstance().getLinkConfig();
|
|
225
|
+
const chunkSize = config.chunk_size;
|
|
226
|
+
// Parse and chunk the document
|
|
227
|
+
const processed = await processDocument(filePath, chunkSize);
|
|
228
|
+
// Verify hash matches (file might have changed during processing)
|
|
229
|
+
if (processed.hash !== fileHash) {
|
|
230
|
+
throw new Error('File changed during processing - hash mismatch');
|
|
231
|
+
}
|
|
232
|
+
// Create chunks in database
|
|
233
|
+
const chunkInputs = processed.chunks.map(chunk => ({
|
|
234
|
+
document_id: documentId,
|
|
235
|
+
position: chunk.position,
|
|
236
|
+
content: chunk.content,
|
|
237
|
+
char_start: chunk.char_start,
|
|
238
|
+
char_end: chunk.char_end,
|
|
239
|
+
}));
|
|
240
|
+
this.repository.createChunks(chunkInputs);
|
|
241
|
+
// Get the created chunks with IDs
|
|
242
|
+
const chunks = this.repository.getChunksByDocument(documentId);
|
|
243
|
+
// Generate embeddings for each chunk
|
|
244
|
+
await this.generateEmbeddings(chunks);
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Generate embeddings for chunks using Sati's EmbeddingService.
|
|
248
|
+
*/
|
|
249
|
+
async generateEmbeddings(chunks) {
|
|
250
|
+
if (!this.embeddingService) {
|
|
251
|
+
this.embeddingService = await EmbeddingService.getInstance();
|
|
252
|
+
}
|
|
253
|
+
const embeddings = [];
|
|
254
|
+
// Process in batches to avoid memory issues
|
|
255
|
+
const batchSize = 50;
|
|
256
|
+
for (let i = 0; i < chunks.length; i += batchSize) {
|
|
257
|
+
const batch = chunks.slice(i, i + batchSize);
|
|
258
|
+
const batchEmbeddings = await Promise.all(batch.map(async (chunk) => {
|
|
259
|
+
const embedding = await this.embeddingService.generate(chunk.content);
|
|
260
|
+
return { chunk_id: chunk.id, embedding };
|
|
261
|
+
}));
|
|
262
|
+
embeddings.push(...batchEmbeddings);
|
|
263
|
+
}
|
|
264
|
+
// Store embeddings in database
|
|
265
|
+
this.repository.createEmbeddings(embeddings);
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Remove documents that no longer exist in the docs folder.
|
|
269
|
+
*/
|
|
270
|
+
async removeDeletedDocuments(existingFiles) {
|
|
271
|
+
const existingPaths = new Set(existingFiles);
|
|
272
|
+
const documents = this.repository.listDocuments();
|
|
273
|
+
let removed = 0;
|
|
274
|
+
for (const doc of documents) {
|
|
275
|
+
if (!existingPaths.has(doc.file_path)) {
|
|
276
|
+
// Document file no longer exists - remove from index
|
|
277
|
+
this.repository.deleteDocument(doc.id);
|
|
278
|
+
removed++;
|
|
279
|
+
this.display.log(`Removed deleted document: ${doc.filename}`, { source: 'Link', level: 'debug' });
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
return removed;
|
|
283
|
+
}
|
|
284
|
+
}
|