morpheus-cli 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +63 -43
  2. package/dist/channels/discord.js +71 -21
  3. package/dist/channels/telegram.js +73 -19
  4. package/dist/cli/commands/restart.js +15 -0
  5. package/dist/cli/commands/start.js +18 -0
  6. package/dist/config/manager.js +61 -0
  7. package/dist/config/paths.js +1 -0
  8. package/dist/config/schemas.js +11 -3
  9. package/dist/http/api.js +3 -0
  10. package/dist/http/routers/link.js +239 -0
  11. package/dist/http/routers/skills.js +1 -8
  12. package/dist/runtime/apoc.js +1 -1
  13. package/dist/runtime/audit/repository.js +1 -1
  14. package/dist/runtime/link-chunker.js +214 -0
  15. package/dist/runtime/link-repository.js +301 -0
  16. package/dist/runtime/link-search.js +298 -0
  17. package/dist/runtime/link-worker.js +284 -0
  18. package/dist/runtime/link.js +295 -0
  19. package/dist/runtime/memory/sati/service.js +1 -1
  20. package/dist/runtime/memory/sqlite.js +52 -0
  21. package/dist/runtime/neo.js +1 -1
  22. package/dist/runtime/oracle.js +81 -44
  23. package/dist/runtime/scaffold.js +4 -17
  24. package/dist/runtime/skills/__tests__/loader.test.js +7 -10
  25. package/dist/runtime/skills/__tests__/registry.test.js +2 -18
  26. package/dist/runtime/skills/__tests__/tool.test.js +55 -224
  27. package/dist/runtime/skills/index.js +1 -2
  28. package/dist/runtime/skills/loader.js +0 -2
  29. package/dist/runtime/skills/registry.js +8 -20
  30. package/dist/runtime/skills/schema.js +0 -4
  31. package/dist/runtime/skills/tool.js +42 -209
  32. package/dist/runtime/smiths/delegator.js +1 -1
  33. package/dist/runtime/smiths/registry.js +1 -1
  34. package/dist/runtime/tasks/worker.js +12 -44
  35. package/dist/runtime/trinity.js +1 -1
  36. package/dist/types/config.js +14 -0
  37. package/dist/ui/assets/AuditDashboard-93LCGHG1.js +1 -0
  38. package/dist/ui/assets/{Chat-BNtutgja.js → Chat-CK5sNcQ1.js} +8 -8
  39. package/dist/ui/assets/{Chronos-3C8RPZcl.js → Chronos-m2h--GEe.js} +1 -1
  40. package/dist/ui/assets/{ConfirmationModal-ZQPBeJ2Z.js → ConfirmationModal-Dd5pUJme.js} +1 -1
  41. package/dist/ui/assets/{Dashboard-CqkHzr2F.js → Dashboard-ODwl7d-a.js} +1 -1
  42. package/dist/ui/assets/{DeleteConfirmationModal-CioxFWn_.js → DeleteConfirmationModal-CCcojDmr.js} +1 -1
  43. package/dist/ui/assets/Documents-dWnSoxFO.js +7 -0
  44. package/dist/ui/assets/{Logs-DBVanS0O.js → Logs-Dc9Z2LBj.js} +1 -1
  45. package/dist/ui/assets/{MCPManager-vXfL3P2U.js → MCPManager-CMkb8vMn.js} +1 -1
  46. package/dist/ui/assets/{ModelPricing-DyfdunLT.js → ModelPricing-DtHPPbEQ.js} +1 -1
  47. package/dist/ui/assets/{Notifications-VL-vep6d.js → Notifications-BPvo-DWP.js} +1 -1
  48. package/dist/ui/assets/{Pagination-oTGieBLM.js → Pagination-BHZKk42X.js} +1 -1
  49. package/dist/ui/assets/{SatiMemories-jaadkW0U.js → SatiMemories-BUPu1Lxr.js} +1 -1
  50. package/dist/ui/assets/SessionAudit-CFKF4DA8.js +9 -0
  51. package/dist/ui/assets/Settings-C4JrXfsR.js +47 -0
  52. package/dist/ui/assets/{Skills-DE3zziXL.js → Skills-BUlvJgJ4.js} +1 -1
  53. package/dist/ui/assets/{Smiths-pmogN1mU.js → Smiths-CDtJdY0I.js} +1 -1
  54. package/dist/ui/assets/{Tasks-Bs8s34Jc.js → Tasks-DK_cOsNK.js} +1 -1
  55. package/dist/ui/assets/{TrinityDatabases-D7uihcdp.js → TrinityDatabases-X07by-19.js} +1 -1
  56. package/dist/ui/assets/{UsageStats-B9gePLZ0.js → UsageStats-dYcgckLq.js} +1 -1
  57. package/dist/ui/assets/{WebhookManager-B2L3rCLM.js → WebhookManager-DDw5eX2R.js} +1 -1
  58. package/dist/ui/assets/{audit-Cggeu9mM.js → audit-DZ5WLUEm.js} +1 -1
  59. package/dist/ui/assets/{chronos-D3-sWhfU.js → chronos-B_HI4mlq.js} +1 -1
  60. package/dist/ui/assets/{config-CBqRUPgn.js → config-B-YxlVrc.js} +1 -1
  61. package/dist/ui/assets/index-DVjwJ8jT.css +1 -0
  62. package/dist/ui/assets/{index-zKplfrXZ.js → index-DfJwcKqG.js} +5 -5
  63. package/dist/ui/assets/{mcp-uL1R9hyA.js → mcp-k-_pwbqA.js} +1 -1
  64. package/dist/ui/assets/{skills-jmw8yTJs.js → skills-xMXangks.js} +1 -1
  65. package/dist/ui/assets/{stats-HOms6GnM.js → stats-C4QZIv5O.js} +1 -1
  66. package/dist/ui/assets/{vendor-icons-DMd9RGvJ.js → vendor-icons-NHF9HNeN.js} +1 -1
  67. package/dist/ui/index.html +3 -3
  68. package/dist/ui/sw.js +1 -1
  69. package/package.json +3 -1
  70. package/dist/runtime/__tests__/keymaker.test.js +0 -148
  71. package/dist/runtime/keymaker.js +0 -157
  72. package/dist/ui/assets/AuditDashboard-DliJ1CX0.js +0 -1
  73. package/dist/ui/assets/SessionAudit-BsXrWlwz.js +0 -9
  74. package/dist/ui/assets/Settings-B4eezRcg.js +0 -47
  75. package/dist/ui/assets/index-D4fzIKy1.css +0 -1
@@ -0,0 +1,298 @@
1
+ import { LinkRepository } from './link-repository.js';
2
+ import { ConfigManager } from '../config/manager.js';
3
+ import { EmbeddingService } from './memory/embedding.service.js';
4
+ /**
5
+ * LinkSearch - Hybrid search for Link documents
6
+ *
7
+ * Combines vector similarity search (80% weight) with BM25 text search (20% weight)
8
+ * for optimal retrieval of relevant document chunks.
9
+ */
10
+ export class LinkSearch {
11
+ static instance = null;
12
+ repository;
13
+ db = null;
14
+ embeddingService = null;
15
+ constructor() {
16
+ this.repository = LinkRepository.getInstance();
17
+ }
18
+ static getInstance() {
19
+ if (!LinkSearch.instance) {
20
+ LinkSearch.instance = new LinkSearch();
21
+ }
22
+ return LinkSearch.instance;
23
+ }
24
+ static resetInstance() {
25
+ LinkSearch.instance = null;
26
+ }
27
+ async initialize() {
28
+ // Get the database from the repository
29
+ this.db = this.repository.db;
30
+ // Initialize embedding service
31
+ this.embeddingService = await EmbeddingService.getInstance();
32
+ }
33
+ /**
34
+ * Perform vector similarity search using sqlite-vec.
35
+ */
36
+ vectorSearch(queryEmbedding, limit) {
37
+ if (!this.db) {
38
+ throw new Error('LinkSearch not initialized');
39
+ }
40
+ const embeddingBlob = new Float32Array(queryEmbedding);
41
+ // Query vector similarity using cosine distance
42
+ const rows = this.db.prepare(`
43
+ SELECT
44
+ e.chunk_id,
45
+ c.document_id,
46
+ d.filename,
47
+ c.position,
48
+ c.content,
49
+ vec_distance_cosine(e.embedding, ?) as distance
50
+ FROM embeddings e
51
+ JOIN chunks c ON e.chunk_id = c.id
52
+ JOIN documents d ON c.document_id = d.id
53
+ WHERE d.status = 'indexed'
54
+ ORDER BY distance ASC
55
+ LIMIT ?
56
+ `).all(embeddingBlob, limit);
57
+ // Convert distance to similarity score (1 - distance for cosine)
58
+ return rows.map(row => ({
59
+ chunk_id: row.chunk_id,
60
+ document_id: row.document_id,
61
+ filename: row.filename,
62
+ position: row.position,
63
+ content: row.content,
64
+ score: 1 - row.distance,
65
+ }));
66
+ }
67
+ /**
68
+ * Perform BM25 full-text search using FTS5.
69
+ */
70
+ bm25Search(query, limit) {
71
+ if (!this.db) {
72
+ throw new Error('LinkSearch not initialized');
73
+ }
74
+ // Sanitize query: remove characters that could break FTS5 syntax (like ?, *, OR, etc)
75
+ // keeping only letters, numbers and spaces.
76
+ const escapedQuery = query
77
+ .replace(/[^\p{L}\p{N}\s]/gu, ' ')
78
+ .replace(/\s+/g, ' ')
79
+ .trim();
80
+ // Return empty results if query is empty after sanitization
81
+ if (!escapedQuery) {
82
+ return [];
83
+ }
84
+ const rows = this.db.prepare(`
85
+ SELECT
86
+ c.id as chunk_id,
87
+ c.document_id,
88
+ d.filename,
89
+ c.position,
90
+ c.content,
91
+ bm25(chunks_fts) as bm25_score
92
+ FROM chunks_fts fts
93
+ JOIN chunks c ON c.rowid = fts.rowid
94
+ JOIN documents d ON c.document_id = d.id
95
+ WHERE d.status = 'indexed'
96
+ AND chunks_fts MATCH ?
97
+ ORDER BY bm25_score ASC
98
+ LIMIT ?
99
+ `).all(escapedQuery, limit);
100
+ // BM25 returns negative scores for better matches, negate and normalize
101
+ return rows.map(row => ({
102
+ chunk_id: row.chunk_id,
103
+ document_id: row.document_id,
104
+ filename: row.filename,
105
+ position: row.position,
106
+ content: row.content,
107
+ score: -row.bm25_score, // Negate since BM25 returns negative for better matches
108
+ }));
109
+ }
110
+ /**
111
+ * Normalize scores to 0-1 range using min-max scaling.
112
+ */
113
+ normalizeScores(results) {
114
+ if (results.length === 0)
115
+ return results;
116
+ const scores = results.map(r => r.score);
117
+ const min = Math.min(...scores);
118
+ const max = Math.max(...scores);
119
+ const range = max - min;
120
+ if (range === 0) {
121
+ // All scores are the same
122
+ return results.map(r => ({ ...r, score: 1 }));
123
+ }
124
+ return results.map(r => ({
125
+ ...r,
126
+ score: (r.score - min) / range,
127
+ }));
128
+ }
129
+ /**
130
+ * Perform hybrid search combining vector and BM25 results.
131
+ */
132
+ hybridSearch(queryEmbedding, queryText, limit, threshold) {
133
+ const config = ConfigManager.getInstance().getLinkConfig();
134
+ const vectorWeight = config.vector_weight;
135
+ const bm25Weight = config.bm25_weight;
136
+ // Get results from both methods (fetch more for better merging)
137
+ const fetchLimit = limit * 3;
138
+ const vectorResults = this.vectorSearch(queryEmbedding, fetchLimit);
139
+ const bm25Results = this.bm25Search(queryText, fetchLimit);
140
+ // Normalize scores
141
+ const normalizedVector = this.normalizeScores(vectorResults);
142
+ const normalizedBM25 = this.normalizeScores(bm25Results);
143
+ // Create maps for quick lookup
144
+ const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
145
+ const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
146
+ // Combine all unique chunk IDs
147
+ const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
148
+ // Calculate combined scores
149
+ const combined = [];
150
+ for (const chunkId of allChunkIds) {
151
+ const vResult = vectorMap.get(chunkId);
152
+ const bResult = bm25Map.get(chunkId);
153
+ const vectorScore = vResult?.score ?? 0;
154
+ const bm25Score = bResult?.score ?? 0;
155
+ // Weighted combination
156
+ const combinedScore = (vectorScore * vectorWeight) + (bm25Score * bm25Weight);
157
+ // Get the data from whichever result has it
158
+ const data = vResult || bResult;
159
+ if (!data)
160
+ continue;
161
+ combined.push({
162
+ chunk_id: chunkId,
163
+ document_id: data.document_id,
164
+ filename: data.filename,
165
+ position: data.position,
166
+ content: data.content,
167
+ score: combinedScore,
168
+ vector_score: vectorScore,
169
+ bm25_score: bm25Score,
170
+ });
171
+ }
172
+ // Sort by combined score and filter by threshold
173
+ const filtered = combined
174
+ .filter(r => r.score >= threshold)
175
+ .sort((a, b) => b.score - a.score)
176
+ .slice(0, limit);
177
+ return filtered;
178
+ }
179
+ /**
180
+ * Search with a text query (generates embedding internally).
181
+ */
182
+ async search(queryText, limit, threshold) {
183
+ if (!this.embeddingService) {
184
+ throw new Error('LinkSearch not initialized');
185
+ }
186
+ const config = ConfigManager.getInstance().getLinkConfig();
187
+ const maxResults = limit ?? config.max_results;
188
+ const minThreshold = threshold ?? config.score_threshold;
189
+ // Generate embedding for the query
190
+ const queryEmbedding = await this.embeddingService.generate(queryText);
191
+ return this.hybridSearch(queryEmbedding, queryText, maxResults, minThreshold);
192
+ }
193
+ /**
194
+ * Search within a specific document by document_id.
195
+ * Runs vector + BM25 search filtered to chunks belonging to that document.
196
+ */
197
+ async searchInDocument(queryText, documentId, limit, threshold) {
198
+ if (!this.embeddingService || !this.db) {
199
+ throw new Error('LinkSearch not initialized');
200
+ }
201
+ const config = ConfigManager.getInstance().getLinkConfig();
202
+ const maxResults = limit ?? config.max_results;
203
+ const minThreshold = threshold ?? config.score_threshold;
204
+ const vectorWeight = config.vector_weight;
205
+ const bm25Weight = config.bm25_weight;
206
+ const fetchLimit = maxResults * 3;
207
+ // Generate embedding for the query
208
+ const queryEmbedding = await this.embeddingService.generate(queryText);
209
+ const embeddingBlob = new Float32Array(queryEmbedding);
210
+ // Vector search filtered by document
211
+ const vectorRows = this.db.prepare(`
212
+ SELECT
213
+ e.chunk_id,
214
+ c.document_id,
215
+ d.filename,
216
+ c.position,
217
+ c.content,
218
+ vec_distance_cosine(e.embedding, ?) as distance
219
+ FROM embeddings e
220
+ JOIN chunks c ON e.chunk_id = c.id
221
+ JOIN documents d ON c.document_id = d.id
222
+ WHERE d.status = 'indexed' AND c.document_id = ?
223
+ ORDER BY distance ASC
224
+ LIMIT ?
225
+ `).all(embeddingBlob, documentId, fetchLimit);
226
+ const vectorResults = vectorRows.map(row => ({
227
+ chunk_id: row.chunk_id,
228
+ document_id: row.document_id,
229
+ filename: row.filename,
230
+ position: row.position,
231
+ content: row.content,
232
+ score: 1 - row.distance,
233
+ }));
234
+ // BM25 search filtered by document
235
+ const escapedQuery = queryText
236
+ .replace(/[^\p{L}\p{N}\s]/gu, ' ')
237
+ .replace(/\s+/g, ' ')
238
+ .trim();
239
+ let bm25Results = [];
240
+ if (escapedQuery) {
241
+ const bm25Rows = this.db.prepare(`
242
+ SELECT
243
+ c.id as chunk_id,
244
+ c.document_id,
245
+ d.filename,
246
+ c.position,
247
+ c.content,
248
+ bm25(chunks_fts) as bm25_score
249
+ FROM chunks_fts fts
250
+ JOIN chunks c ON c.rowid = fts.rowid
251
+ JOIN documents d ON c.document_id = d.id
252
+ WHERE d.status = 'indexed'
253
+ AND c.document_id = ?
254
+ AND chunks_fts MATCH ?
255
+ ORDER BY bm25_score ASC
256
+ LIMIT ?
257
+ `).all(documentId, escapedQuery, fetchLimit);
258
+ bm25Results = bm25Rows.map(row => ({
259
+ chunk_id: row.chunk_id,
260
+ document_id: row.document_id,
261
+ filename: row.filename,
262
+ position: row.position,
263
+ content: row.content,
264
+ score: -row.bm25_score,
265
+ }));
266
+ }
267
+ // Normalize and combine
268
+ const normalizedVector = this.normalizeScores(vectorResults);
269
+ const normalizedBM25 = this.normalizeScores(bm25Results);
270
+ const vectorMap = new Map(normalizedVector.map(r => [r.chunk_id, r]));
271
+ const bm25Map = new Map(normalizedBM25.map(r => [r.chunk_id, r]));
272
+ const allChunkIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
273
+ const combined = [];
274
+ for (const chunkId of allChunkIds) {
275
+ const vResult = vectorMap.get(chunkId);
276
+ const bResult = bm25Map.get(chunkId);
277
+ const vectorScore = vResult?.score ?? 0;
278
+ const bm25Score = bResult?.score ?? 0;
279
+ const data = vResult || bResult;
280
+ if (!data)
281
+ continue;
282
+ combined.push({
283
+ chunk_id: chunkId,
284
+ document_id: data.document_id,
285
+ filename: data.filename,
286
+ position: data.position,
287
+ content: data.content,
288
+ score: (vectorScore * vectorWeight) + (bm25Score * bm25Weight),
289
+ vector_score: vectorScore,
290
+ bm25_score: bm25Score,
291
+ });
292
+ }
293
+ return combined
294
+ .filter(r => r.score >= minThreshold)
295
+ .sort((a, b) => b.score - a.score)
296
+ .slice(0, maxResults);
297
+ }
298
+ }
@@ -0,0 +1,284 @@
1
+ import { homedir } from 'os';
2
+ import path from 'path';
3
+ import fs from 'fs-extra';
4
+ import { LinkRepository } from './link-repository.js';
5
+ import { LinkSearch } from './link-search.js';
6
+ import { hashFile, processDocument, isSupportedFormat } from './link-chunker.js';
7
+ import { EmbeddingService } from './memory/embedding.service.js';
8
+ import { ConfigManager } from '../config/manager.js';
9
+ import { DisplayManager } from './display.js';
10
+ /**
11
+ * LinkWorker - Background worker for document indexing
12
+ *
13
+ * Scans ~/.morpheus/docs folder, processes new/changed documents,
14
+ * generates embeddings, and removes deleted documents from the index.
15
+ */
16
+ export class LinkWorker {
17
+ static instance = null;
18
+ repository;
19
+ search;
20
+ embeddingService = null;
21
+ intervalId = null;
22
+ isRunning = false;
23
+ display = DisplayManager.getInstance();
24
+ docsPath;
25
+ constructor() {
26
+ this.repository = LinkRepository.getInstance();
27
+ this.search = LinkSearch.getInstance();
28
+ this.docsPath = path.join(homedir(), '.morpheus', 'docs');
29
+ }
30
+ static getInstance() {
31
+ if (!LinkWorker.instance) {
32
+ LinkWorker.instance = new LinkWorker();
33
+ }
34
+ return LinkWorker.instance;
35
+ }
36
+ static setInstance(instance) {
37
+ LinkWorker.instance = instance;
38
+ }
39
+ static resetInstance() {
40
+ if (LinkWorker.instance) {
41
+ LinkWorker.instance.stop();
42
+ }
43
+ LinkWorker.instance = null;
44
+ }
45
+ /**
46
+ * Start the background worker with interval-based scanning.
47
+ */
48
+ start() {
49
+ if (this.isRunning)
50
+ return;
51
+ const config = ConfigManager.getInstance().getLinkConfig();
52
+ const intervalMs = config.scan_interval_ms;
53
+ this.isRunning = true;
54
+ this.display.log('LinkWorker started', { source: 'Link' });
55
+ // Run initial scan immediately
56
+ this.tick().catch(err => {
57
+ this.display.log(`LinkWorker initial scan failed: ${err.message}`, { source: 'Link', level: 'error' });
58
+ });
59
+ // Schedule periodic scans
60
+ this.intervalId = setInterval(() => {
61
+ this.tick().catch(err => {
62
+ this.display.log(`LinkWorker tick failed: ${err.message}`, { source: 'Link', level: 'error' });
63
+ });
64
+ }, intervalMs);
65
+ }
66
+ /**
67
+ * Stop the background worker.
68
+ */
69
+ stop() {
70
+ if (this.intervalId) {
71
+ clearInterval(this.intervalId);
72
+ this.intervalId = null;
73
+ }
74
+ this.isRunning = false;
75
+ this.display.log('LinkWorker stopped', { source: 'Link' });
76
+ }
77
+ /**
78
+ * Update the scan interval (hot-reload).
79
+ */
80
+ updateInterval(intervalMs) {
81
+ if (this.intervalId) {
82
+ clearInterval(this.intervalId);
83
+ this.intervalId = setInterval(() => {
84
+ this.tick().catch(err => {
85
+ this.display.log(`LinkWorker tick failed: ${err.message}`, { source: 'Link', level: 'error' });
86
+ });
87
+ }, intervalMs);
88
+ this.display.log(`LinkWorker interval updated to ${intervalMs}ms`, { source: 'Link' });
89
+ }
90
+ }
91
+ /**
92
+ * Perform a single scan cycle.
93
+ */
94
+ async tick() {
95
+ // Ensure embedding service is initialized
96
+ if (!this.embeddingService) {
97
+ this.embeddingService = await EmbeddingService.getInstance();
98
+ }
99
+ // Ensure docs folder exists
100
+ await fs.ensureDir(this.docsPath);
101
+ const stats = {
102
+ indexed: 0,
103
+ removed: 0,
104
+ errors: 0,
105
+ };
106
+ try {
107
+ // Scan for new/changed documents
108
+ const files = await this.scanFolder();
109
+ this.display.log(`LinkWorker found ${files.length} files`, { source: 'Link', level: 'debug' });
110
+ // Process each file
111
+ for (const filePath of files) {
112
+ try {
113
+ const result = await this.processDocument(filePath);
114
+ if (result === 'indexed') {
115
+ stats.indexed++;
116
+ }
117
+ else if (result === 'error') {
118
+ stats.errors++;
119
+ }
120
+ }
121
+ catch (err) {
122
+ this.display.log(`Failed to process ${filePath}: ${err.message}`, { source: 'Link', level: 'error' });
123
+ stats.errors++;
124
+ }
125
+ }
126
+ // Remove deleted documents
127
+ stats.removed = await this.removeDeletedDocuments(files);
128
+ if (stats.indexed > 0 || stats.removed > 0) {
129
+ this.display.log(`LinkWorker: indexed ${stats.indexed}, removed ${stats.removed}, errors ${stats.errors}`, { source: 'Link', level: 'info' });
130
+ }
131
+ }
132
+ catch (err) {
133
+ this.display.log(`LinkWorker tick error: ${err.message}`, { source: 'Link', level: 'error' });
134
+ stats.errors++;
135
+ }
136
+ return stats;
137
+ }
138
+ /**
139
+ * Scan the docs folder for supported files.
140
+ */
141
+ async scanFolder() {
142
+ const files = [];
143
+ const entries = await fs.readdir(this.docsPath, { withFileTypes: true });
144
+ for (const entry of entries) {
145
+ if (entry.isFile()) {
146
+ const filePath = path.join(this.docsPath, entry.name);
147
+ if (isSupportedFormat(filePath)) {
148
+ files.push(filePath);
149
+ }
150
+ }
151
+ }
152
+ return files;
153
+ }
154
+ /**
155
+ * Process a single document: check hash, parse, chunk, embed.
156
+ */
157
+ async processDocument(filePath) {
158
+ const existingDoc = this.repository.getDocumentByPath(filePath);
159
+ // Calculate file hash
160
+ let fileHash;
161
+ try {
162
+ fileHash = await hashFile(filePath);
163
+ }
164
+ catch (err) {
165
+ // File might not be readable
166
+ if (existingDoc) {
167
+ this.repository.updateDocumentStatus(existingDoc.id, 'error', `Failed to read file: ${err.message}`);
168
+ }
169
+ return 'error';
170
+ }
171
+ // Check if document already indexed with same hash
172
+ if (existingDoc && existingDoc.file_hash === fileHash && existingDoc.status === 'indexed') {
173
+ return 'skipped';
174
+ }
175
+ // Get file stats
176
+ const stats = await fs.stat(filePath);
177
+ const fileSize = stats.size;
178
+ // Check max file size
179
+ const config = ConfigManager.getInstance().getLinkConfig();
180
+ const maxSizeBytes = config.max_file_size_mb * 1024 * 1024;
181
+ if (fileSize > maxSizeBytes) {
182
+ if (existingDoc) {
183
+ this.repository.updateDocumentStatus(existingDoc.id, 'error', `File exceeds max size of ${config.max_file_size_mb}MB`);
184
+ }
185
+ return 'error';
186
+ }
187
+ // Create or update document record
188
+ const filename = path.basename(filePath);
189
+ let document;
190
+ if (existingDoc) {
191
+ // Update existing document - delete old chunks first
192
+ this.repository.deleteChunksByDocument(existingDoc.id);
193
+ this.repository.deleteEmbeddingsByDocument(existingDoc.id);
194
+ this.repository.updateDocumentStatus(existingDoc.id, 'indexing');
195
+ document = existingDoc;
196
+ }
197
+ else {
198
+ // Create new document
199
+ document = this.repository.createDocument({
200
+ filename,
201
+ file_path: filePath,
202
+ file_hash: fileHash,
203
+ file_size: fileSize,
204
+ });
205
+ }
206
+ try {
207
+ // Index the document
208
+ await this.indexDocument(document.id, filePath, fileHash);
209
+ // Update status to indexed
210
+ const chunks = this.repository.getChunksByDocument(document.id);
211
+ this.repository.updateDocumentChunkCount(document.id, chunks.length);
212
+ this.display.log(`Indexed document: ${filename} (${chunks.length} chunks)`, { source: 'Link', level: 'debug' });
213
+ return 'indexed';
214
+ }
215
+ catch (err) {
216
+ this.repository.updateDocumentStatus(document.id, 'error', err.message);
217
+ return 'error';
218
+ }
219
+ }
220
+ /**
221
+ * Index a document: parse, chunk, generate embeddings.
222
+ */
223
+ async indexDocument(documentId, filePath, fileHash) {
224
+ const config = ConfigManager.getInstance().getLinkConfig();
225
+ const chunkSize = config.chunk_size;
226
+ // Parse and chunk the document
227
+ const processed = await processDocument(filePath, chunkSize);
228
+ // Verify hash matches (file might have changed during processing)
229
+ if (processed.hash !== fileHash) {
230
+ throw new Error('File changed during processing - hash mismatch');
231
+ }
232
+ // Create chunks in database
233
+ const chunkInputs = processed.chunks.map(chunk => ({
234
+ document_id: documentId,
235
+ position: chunk.position,
236
+ content: chunk.content,
237
+ char_start: chunk.char_start,
238
+ char_end: chunk.char_end,
239
+ }));
240
+ this.repository.createChunks(chunkInputs);
241
+ // Get the created chunks with IDs
242
+ const chunks = this.repository.getChunksByDocument(documentId);
243
+ // Generate embeddings for each chunk
244
+ await this.generateEmbeddings(chunks);
245
+ }
246
+ /**
247
+ * Generate embeddings for chunks using Sati's EmbeddingService.
248
+ */
249
+ async generateEmbeddings(chunks) {
250
+ if (!this.embeddingService) {
251
+ this.embeddingService = await EmbeddingService.getInstance();
252
+ }
253
+ const embeddings = [];
254
+ // Process in batches to avoid memory issues
255
+ const batchSize = 50;
256
+ for (let i = 0; i < chunks.length; i += batchSize) {
257
+ const batch = chunks.slice(i, i + batchSize);
258
+ const batchEmbeddings = await Promise.all(batch.map(async (chunk) => {
259
+ const embedding = await this.embeddingService.generate(chunk.content);
260
+ return { chunk_id: chunk.id, embedding };
261
+ }));
262
+ embeddings.push(...batchEmbeddings);
263
+ }
264
+ // Store embeddings in database
265
+ this.repository.createEmbeddings(embeddings);
266
+ }
267
+ /**
268
+ * Remove documents that no longer exist in the docs folder.
269
+ */
270
+ async removeDeletedDocuments(existingFiles) {
271
+ const existingPaths = new Set(existingFiles);
272
+ const documents = this.repository.listDocuments();
273
+ let removed = 0;
274
+ for (const doc of documents) {
275
+ if (!existingPaths.has(doc.file_path)) {
276
+ // Document file no longer exists - remove from index
277
+ this.repository.deleteDocument(doc.id);
278
+ removed++;
279
+ this.display.log(`Removed deleted document: ${doc.filename}`, { source: 'Link', level: 'debug' });
280
+ }
281
+ }
282
+ return removed;
283
+ }
284
+ }