@mrxkun/mcfast-mcp 4.1.10 → 4.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/index.js +176 -21
- package/src/memory/memory-engine.js +213 -13
- package/src/memory/stores/base-database.js +223 -0
- package/src/memory/utils/chunker.js +1 -0
- package/src/memory/utils/indexer.js +110 -4
- package/src/memory/utils/logger.js +162 -0
- package/src/memory/utils/vector-index.js +241 -0
- package/src/memory/watchers/file-watcher.js +255 -103
- package/src/tools/project_analyze.js +491 -0
- package/src/utils/audit-queue.js +1 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector Index for fast similarity search
|
|
3
|
+
* Implements HNSW-like in-memory index for embeddings
|
|
4
|
+
*
|
|
5
|
+
* This provides O(log n) search instead of O(n) linear scan
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export class VectorIndex {
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.dimension = options.dimension || 1024;
|
|
11
|
+
this.maxElements = options.maxElements || 10000;
|
|
12
|
+
this.efConstruction = options.efConstruction || 200;
|
|
13
|
+
this.M = options.M || 16;
|
|
14
|
+
|
|
15
|
+
// In-memory storage
|
|
16
|
+
this.vectors = new Map(); // id -> Float32Array
|
|
17
|
+
this.metadata = new Map(); // id -> metadata
|
|
18
|
+
|
|
19
|
+
// Simple HNSW-like structure (layer 0 only for simplicity)
|
|
20
|
+
this.connections = new Map(); // id -> [neighbor_ids]
|
|
21
|
+
|
|
22
|
+
// Entry point for search
|
|
23
|
+
this.entryPoint = null;
|
|
24
|
+
|
|
25
|
+
// Stats
|
|
26
|
+
this.stats = {
|
|
27
|
+
searches: 0,
|
|
28
|
+
totalDuration: 0,
|
|
29
|
+
avgDuration: 0
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Add vector to index
|
|
35
|
+
*/
|
|
36
|
+
add(id, vector, metadata = {}) {
|
|
37
|
+
if (vector.length !== this.dimension) {
|
|
38
|
+
throw new Error(`Vector dimension ${vector.length} != expected ${this.dimension}`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const vec = vector instanceof Float32Array ? vector : new Float32Array(vector);
|
|
42
|
+
this.vectors.set(id, vec);
|
|
43
|
+
this.metadata.set(id, metadata);
|
|
44
|
+
|
|
45
|
+
// Initialize empty connections
|
|
46
|
+
if (!this.connections.has(id)) {
|
|
47
|
+
this.connections.set(id, []);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Set entry point if first element
|
|
51
|
+
if (!this.entryPoint) {
|
|
52
|
+
this.entryPoint = id;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Build connections (simplified - just connect to nearest)
|
|
56
|
+
this._buildConnections(id, vec);
|
|
57
|
+
|
|
58
|
+
return this;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Build connections to nearest neighbors
|
|
63
|
+
*/
|
|
64
|
+
_buildConnections(id, vector) {
|
|
65
|
+
// Find nearest neighbors
|
|
66
|
+
const neighbors = [];
|
|
67
|
+
|
|
68
|
+
for (const [otherId, otherVec] of this.vectors) {
|
|
69
|
+
if (otherId === id) continue;
|
|
70
|
+
|
|
71
|
+
const dist = this._cosineDistance(vector, otherVec);
|
|
72
|
+
neighbors.push({ id: otherId, dist });
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Sort by distance and take M nearest
|
|
76
|
+
neighbors.sort((a, b) => a.dist - b.dist);
|
|
77
|
+
const nearest = neighbors.slice(0, this.M);
|
|
78
|
+
|
|
79
|
+
// Update connections
|
|
80
|
+
const connections = this.connections.get(id) || [];
|
|
81
|
+
nearest.forEach(n => connections.push(n.id));
|
|
82
|
+
this.connections.set(id, connections);
|
|
83
|
+
|
|
84
|
+
// Add reverse connections
|
|
85
|
+
nearest.forEach(n => {
|
|
86
|
+
const reverseConns = this.connections.get(n.id) || [];
|
|
87
|
+
if (!reverseConns.includes(id)) {
|
|
88
|
+
reverseConns.push(id);
|
|
89
|
+
this.connections.set(n.id, reverseConns);
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Search for nearest neighbors
|
|
96
|
+
*/
|
|
97
|
+
search(queryVector, k = 5, ef = 10) {
|
|
98
|
+
const startTime = performance.now();
|
|
99
|
+
|
|
100
|
+
if (!this.entryPoint || this.vectors.size === 0) {
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const query = queryVector instanceof Float32Array
|
|
105
|
+
? queryVector
|
|
106
|
+
: new Float32Array(queryVector);
|
|
107
|
+
|
|
108
|
+
// Greedy search from entry point
|
|
109
|
+
let current = this.entryPoint;
|
|
110
|
+
let bestDist = this._cosineDistance(query, this.vectors.get(current));
|
|
111
|
+
let visited = new Set([current]);
|
|
112
|
+
|
|
113
|
+
// Explore neighbors
|
|
114
|
+
let candidates = [current];
|
|
115
|
+
let results = [{ id: current, dist: bestDist }];
|
|
116
|
+
|
|
117
|
+
while (candidates.length > 0) {
|
|
118
|
+
const candidate = candidates.shift();
|
|
119
|
+
const candidateVec = this.vectors.get(candidate);
|
|
120
|
+
const candidateDist = this._cosineDistance(query, candidateVec);
|
|
121
|
+
|
|
122
|
+
// Update results
|
|
123
|
+
if (results.length < k || candidateDist < results[results.length - 1].dist) {
|
|
124
|
+
results.push({ id: candidate, dist: candidateDist });
|
|
125
|
+
results.sort((a, b) => a.dist - b.dist);
|
|
126
|
+
results = results.slice(0, k);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Get neighbors
|
|
130
|
+
const neighbors = this.connections.get(candidate) || [];
|
|
131
|
+
for (const neighbor of neighbors) {
|
|
132
|
+
if (visited.has(neighbor)) continue;
|
|
133
|
+
visited.add(neighbor);
|
|
134
|
+
|
|
135
|
+
const neighborDist = this._cosineDistance(query, this.vectors.get(neighbor));
|
|
136
|
+
|
|
137
|
+
// Add to candidates if promising
|
|
138
|
+
if (results.length < k || neighborDist < results[results.length - 1].dist) {
|
|
139
|
+
candidates.push(neighbor);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Sort final results
|
|
145
|
+
results.sort((a, b) => a.dist - b.dist);
|
|
146
|
+
|
|
147
|
+
// Convert distance to similarity
|
|
148
|
+
const searchResults = results.slice(0, k).map(r => ({
|
|
149
|
+
id: r.id,
|
|
150
|
+
similarity: 1 - r.dist,
|
|
151
|
+
distance: r.dist,
|
|
152
|
+
metadata: this.metadata.get(r.id)
|
|
153
|
+
}));
|
|
154
|
+
|
|
155
|
+
// Update stats
|
|
156
|
+
const duration = performance.now() - startTime;
|
|
157
|
+
this.stats.searches++;
|
|
158
|
+
this.stats.totalDuration += duration;
|
|
159
|
+
this.stats.avgDuration = this.stats.totalDuration / this.stats.searches;
|
|
160
|
+
|
|
161
|
+
return searchResults;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Cosine distance (1 - cosine similarity)
|
|
166
|
+
*/
|
|
167
|
+
_cosineDistance(a, b) {
|
|
168
|
+
let dotProduct = 0;
|
|
169
|
+
let normA = 0;
|
|
170
|
+
let normB = 0;
|
|
171
|
+
|
|
172
|
+
for (let i = 0; i < a.length; i++) {
|
|
173
|
+
dotProduct += a[i] * b[i];
|
|
174
|
+
normA += a[i] * a[i];
|
|
175
|
+
normB += b[i] * b[i];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const similarity = dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
179
|
+
return 1 - similarity;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Bulk add vectors
|
|
184
|
+
*/
|
|
185
|
+
bulkAdd(items) {
|
|
186
|
+
for (const item of items) {
|
|
187
|
+
this.add(item.id, item.vector, item.metadata);
|
|
188
|
+
}
|
|
189
|
+
return this;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Get statistics
|
|
194
|
+
*/
|
|
195
|
+
getStats() {
|
|
196
|
+
return {
|
|
197
|
+
...this.stats,
|
|
198
|
+
size: this.vectors.size,
|
|
199
|
+
dimension: this.dimension,
|
|
200
|
+
maxElements: this.maxElements
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Clear index
|
|
206
|
+
*/
|
|
207
|
+
clear() {
|
|
208
|
+
this.vectors.clear();
|
|
209
|
+
this.metadata.clear();
|
|
210
|
+
this.connections.clear();
|
|
211
|
+
this.entryPoint = null;
|
|
212
|
+
this.stats = { searches: 0, totalDuration: 0, avgDuration: 0 };
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Serialize to JSON
|
|
217
|
+
*/
|
|
218
|
+
toJSON() {
|
|
219
|
+
const data = {};
|
|
220
|
+
for (const [id, vec] of this.vectors) {
|
|
221
|
+
data[id] = {
|
|
222
|
+
vector: Array.from(vec),
|
|
223
|
+
metadata: this.metadata.get(id)
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return data;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Load from JSON
|
|
231
|
+
*/
|
|
232
|
+
static fromJSON(json, options = {}) {
|
|
233
|
+
const index = new VectorIndex(options);
|
|
234
|
+
for (const [id, item] of Object.entries(json)) {
|
|
235
|
+
index.add(id, item.vector, item.metadata);
|
|
236
|
+
}
|
|
237
|
+
return index;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
export default VectorIndex;
|
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
* File Watcher
|
|
3
3
|
* Theo dõi thay đổi file và index tự động
|
|
4
4
|
* Debounced 1.5s để batch rapid changes
|
|
5
|
+
*
|
|
6
|
+
* IMPROVEMENTS v4.1.10:
|
|
7
|
+
* - Added periodic cleanup for pendingUpdates Map
|
|
8
|
+
* - Added error boundaries for async operations
|
|
9
|
+
* - Added graceful shutdown cleanup
|
|
5
10
|
*/
|
|
6
11
|
|
|
7
12
|
import chokidar from 'chokidar';
|
|
@@ -29,9 +34,13 @@ export class FileWatcher {
|
|
|
29
34
|
this.watcher = null;
|
|
30
35
|
this.pendingUpdates = new Map();
|
|
31
36
|
this.isProcessing = false;
|
|
37
|
+
this.cleanupInterval = null;
|
|
38
|
+
this.isRunning = false;
|
|
32
39
|
|
|
33
40
|
// Configuration
|
|
34
41
|
this.debounceMs = options.debounceMs || 1500;
|
|
42
|
+
this.cleanupIntervalMs = options.cleanupIntervalMs || 30000; // Cleanup every 30s
|
|
43
|
+
this.maxPendingUpdates = options.maxPendingUpdates || 1000;
|
|
35
44
|
this.ignored = options.ignored || [
|
|
36
45
|
'**/node_modules/**',
|
|
37
46
|
'**/.git/**',
|
|
@@ -46,40 +55,100 @@ export class FileWatcher {
|
|
|
46
55
|
filesAdded: 0,
|
|
47
56
|
filesChanged: 0,
|
|
48
57
|
filesDeleted: 0,
|
|
49
|
-
errors: 0
|
|
58
|
+
errors: 0,
|
|
59
|
+
totalProcessed: 0
|
|
50
60
|
};
|
|
51
61
|
}
|
|
52
62
|
|
|
53
63
|
async start() {
|
|
64
|
+
if (this.isRunning) {
|
|
65
|
+
console.error('[FileWatcher] Already running');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
|
|
54
69
|
console.error(`[FileWatcher] Starting watcher for: ${this.projectPath}`);
|
|
55
70
|
console.error(`[FileWatcher] Debounce: ${this.debounceMs}ms`);
|
|
56
71
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
72
|
+
try {
|
|
73
|
+
this.watcher = chokidar.watch(this.projectPath, {
|
|
74
|
+
ignored: this.ignored,
|
|
75
|
+
persistent: true,
|
|
76
|
+
ignoreInitial: true,
|
|
77
|
+
awaitWriteFinish: {
|
|
78
|
+
stabilityThreshold: 300,
|
|
79
|
+
pollInterval: 100
|
|
80
|
+
}
|
|
81
|
+
});
|
|
66
82
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
83
|
+
// Bind event handlers with error handling
|
|
84
|
+
this.watcher.on('add', filePath => this.safeHandleAdd(filePath));
|
|
85
|
+
this.watcher.on('change', filePath => this.safeHandleChange(filePath));
|
|
86
|
+
this.watcher.on('unlink', filePath => this.safeHandleDelete(filePath));
|
|
87
|
+
this.watcher.on('error', error => this.handleError(error));
|
|
88
|
+
|
|
89
|
+
// Setup debounced flush
|
|
90
|
+
this.flushQueue = debounce(() => this.processQueue(), this.debounceMs);
|
|
91
|
+
|
|
92
|
+
// Wait for ready
|
|
93
|
+
await new Promise((resolve, reject) => {
|
|
94
|
+
this.watcher.once('ready', resolve);
|
|
95
|
+
this.watcher.once('error', reject);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Start periodic cleanup
|
|
99
|
+
this.startCleanup();
|
|
100
|
+
|
|
101
|
+
this.isRunning = true;
|
|
102
|
+
console.error(`[FileWatcher] Ready and watching`);
|
|
103
|
+
} catch (error) {
|
|
104
|
+
console.error(`[FileWatcher] Failed to start:`, error.message);
|
|
105
|
+
throw error;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Safe handlers with try-catch
|
|
110
|
+
safeHandleAdd(filePath) {
|
|
111
|
+
try {
|
|
112
|
+
this.handleAdd(filePath);
|
|
113
|
+
} catch (error) {
|
|
114
|
+
console.error(`[FileWatcher] Error in handleAdd:`, error.message);
|
|
115
|
+
this.stats.errors++;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
safeHandleChange(filePath) {
|
|
120
|
+
try {
|
|
121
|
+
this.handleChange(filePath);
|
|
122
|
+
} catch (error) {
|
|
123
|
+
console.error(`[FileWatcher] Error in handleChange:`, error.message);
|
|
124
|
+
this.stats.errors++;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
safeHandleDelete(filePath) {
|
|
129
|
+
try {
|
|
130
|
+
this.handleDelete(filePath);
|
|
131
|
+
} catch (error) {
|
|
132
|
+
console.error(`[FileWatcher] Error in handleDelete:`, error.message);
|
|
133
|
+
this.stats.errors++;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Periodic cleanup to prevent memory leak
|
|
138
|
+
startCleanup() {
|
|
139
|
+
if (this.cleanupInterval) {
|
|
140
|
+
clearInterval(this.cleanupInterval);
|
|
141
|
+
}
|
|
75
142
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
143
|
+
this.cleanupInterval = setInterval(() => {
|
|
144
|
+
if (this.pendingUpdates.size > this.maxPendingUpdates) {
|
|
145
|
+
console.error(`[FileWatcher] Cleanup: clearing ${this.pendingUpdates.size} pending updates`);
|
|
146
|
+
this.pendingUpdates.clear();
|
|
147
|
+
}
|
|
148
|
+
}, this.cleanupIntervalMs);
|
|
81
149
|
|
|
82
|
-
|
|
150
|
+
// Prevent interval from keeping process alive
|
|
151
|
+
this.cleanupInterval.unref();
|
|
83
152
|
}
|
|
84
153
|
|
|
85
154
|
handleAdd(filePath) {
|
|
@@ -119,7 +188,6 @@ export class FileWatcher {
|
|
|
119
188
|
|
|
120
189
|
async processQueue() {
|
|
121
190
|
if (this.isProcessing) {
|
|
122
|
-
// If already processing, debounce will call again
|
|
123
191
|
return;
|
|
124
192
|
}
|
|
125
193
|
|
|
@@ -139,14 +207,25 @@ export class FileWatcher {
|
|
|
139
207
|
|
|
140
208
|
// Process deletions first
|
|
141
209
|
for (const update of deletes) {
|
|
142
|
-
|
|
210
|
+
try {
|
|
211
|
+
await this.processDelete(update);
|
|
212
|
+
} catch (error) {
|
|
213
|
+
console.error(`[FileWatcher] Error processing delete:`, error.message);
|
|
214
|
+
this.stats.errors++;
|
|
215
|
+
}
|
|
143
216
|
}
|
|
144
217
|
|
|
145
218
|
// Process additions/changes
|
|
146
219
|
for (const update of adds) {
|
|
147
|
-
|
|
220
|
+
try {
|
|
221
|
+
await this.processFile(update);
|
|
222
|
+
} catch (error) {
|
|
223
|
+
console.error(`[FileWatcher] Error processing file:`, error.message);
|
|
224
|
+
this.stats.errors++;
|
|
225
|
+
}
|
|
148
226
|
}
|
|
149
227
|
|
|
228
|
+
this.stats.totalProcessed += updates.length;
|
|
150
229
|
console.error(`[FileWatcher] Processed ${updates.length} updates`);
|
|
151
230
|
|
|
152
231
|
} catch (error) {
|
|
@@ -165,19 +244,19 @@ export class FileWatcher {
|
|
|
165
244
|
const stats = await fs.stat(filePath).catch(() => null);
|
|
166
245
|
if (!stats || !stats.isFile()) return;
|
|
167
246
|
|
|
168
|
-
//
|
|
169
|
-
if (stats.size > 1024 * 1024) {
|
|
170
|
-
console.error(`[FileWatcher] Skipping large file: ${filePath} (${Math.round(stats.size / 1024)}KB)`);
|
|
171
|
-
return;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// Read file content
|
|
247
|
+
// Read file content first
|
|
175
248
|
const content = await fs.readFile(filePath, 'utf-8');
|
|
176
249
|
const contentHash = crypto.createHash('md5').update(content).digest('hex');
|
|
177
250
|
|
|
251
|
+
// Handle large files (> 1MB)
|
|
252
|
+
if (stats.size > 1024 * 1024) {
|
|
253
|
+
console.error(`[FileWatcher] Large file detected: ${filePath} (${Math.round(stats.size / 1024)}KB)`);
|
|
254
|
+
return this.indexLargeFile(filePath, content, contentHash, stats);
|
|
255
|
+
}
|
|
256
|
+
|
|
178
257
|
// Check if already indexed with same hash
|
|
179
|
-
|
|
180
|
-
|
|
258
|
+
const existingFile = this.memory.codebaseDb?.getFileByPath?.(filePath);
|
|
259
|
+
if (existingFile && existingFile.content_hash === contentHash) {
|
|
181
260
|
return;
|
|
182
261
|
}
|
|
183
262
|
|
|
@@ -222,77 +301,102 @@ export class FileWatcher {
|
|
|
222
301
|
async indexMarkdownFile(filePath, content, contentHash, stats) {
|
|
223
302
|
console.error(`[FileWatcher] Indexing Markdown: ${filePath}`);
|
|
224
303
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
// Chunk the content
|
|
230
|
-
const { MarkdownChunker } = await import('../utils/markdown-chunker.js');
|
|
231
|
-
const chunker = new MarkdownChunker();
|
|
232
|
-
const chunks = chunker.chunk(content, relativePath);
|
|
233
|
-
|
|
234
|
-
// Track file
|
|
235
|
-
this.memory.memoryDb?.upsertFile?.(relativePath, contentHash, stats.mtimeMs, stats.size);
|
|
236
|
-
|
|
237
|
-
// Generate embeddings and insert chunks
|
|
238
|
-
for (const chunk of chunks) {
|
|
239
|
-
// Check cache first
|
|
240
|
-
let embedding = null;
|
|
241
|
-
const cached = this.memory.memoryDb?.getCachedEmbedding?.(chunk.contentHash);
|
|
304
|
+
try {
|
|
305
|
+
// Delete old chunks if updating
|
|
306
|
+
const relativePath = path.relative(this.projectPath, filePath);
|
|
307
|
+
this.memory.memoryDb?.deleteChunksByFile?.(relativePath);
|
|
242
308
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
embedding = await this.memory.embedder?.embedCode?.(chunk.content);
|
|
248
|
-
|
|
249
|
-
// Cache it
|
|
250
|
-
if (embedding) {
|
|
251
|
-
this.memory.memoryDb?.cacheEmbedding?.(
|
|
252
|
-
chunk.contentHash,
|
|
253
|
-
Buffer.from(new Float32Array(embedding).buffer),
|
|
254
|
-
'simple-embedder',
|
|
255
|
-
embedding.length
|
|
256
|
-
);
|
|
257
|
-
}
|
|
258
|
-
}
|
|
309
|
+
// Chunk the content
|
|
310
|
+
const { MarkdownChunker } = await import('../utils/markdown-chunker.js');
|
|
311
|
+
const chunker = new MarkdownChunker();
|
|
312
|
+
const chunks = chunker.chunk(content, relativePath);
|
|
259
313
|
|
|
260
|
-
//
|
|
261
|
-
this.memory.memoryDb?.
|
|
262
|
-
id: chunk.id,
|
|
263
|
-
file_path: chunk.filePath,
|
|
264
|
-
start_line: chunk.startLine,
|
|
265
|
-
end_line: chunk.endLine,
|
|
266
|
-
content: chunk.content,
|
|
267
|
-
content_hash: chunk.contentHash,
|
|
268
|
-
chunk_type: chunk.chunkType
|
|
269
|
-
});
|
|
314
|
+
// Track file
|
|
315
|
+
this.memory.memoryDb?.upsertFile?.(relativePath, contentHash, stats.mtimeMs, stats.size);
|
|
270
316
|
|
|
271
|
-
//
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
embedding
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
317
|
+
// Generate embeddings and insert chunks
|
|
318
|
+
for (const chunk of chunks) {
|
|
319
|
+
try {
|
|
320
|
+
// Check cache first
|
|
321
|
+
let embedding = null;
|
|
322
|
+
const cached = this.memory.memoryDb?.getCachedEmbedding?.(chunk.contentHash);
|
|
323
|
+
|
|
324
|
+
if (cached) {
|
|
325
|
+
embedding = cached.embedding;
|
|
326
|
+
} else {
|
|
327
|
+
// Generate embedding
|
|
328
|
+
embedding = await this.memory.embedder?.embedCode?.(chunk.content);
|
|
329
|
+
|
|
330
|
+
// Cache it
|
|
331
|
+
if (embedding) {
|
|
332
|
+
this.memory.memoryDb?.cacheEmbedding?.(
|
|
333
|
+
chunk.contentHash,
|
|
334
|
+
Buffer.from(new Float32Array(embedding).buffer),
|
|
335
|
+
'simple-embedder',
|
|
336
|
+
embedding.length
|
|
337
|
+
);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Insert chunk
|
|
342
|
+
this.memory.memoryDb?.insertChunk?.({
|
|
343
|
+
id: chunk.id,
|
|
344
|
+
file_path: chunk.filePath,
|
|
345
|
+
start_line: chunk.startLine,
|
|
346
|
+
end_line: chunk.endLine,
|
|
347
|
+
content: chunk.content,
|
|
348
|
+
content_hash: chunk.contentHash,
|
|
349
|
+
chunk_type: chunk.chunkType
|
|
350
|
+
});
|
|
351
|
+
|
|
352
|
+
// Insert embedding
|
|
353
|
+
if (embedding) {
|
|
354
|
+
this.memory.memoryDb?.insertEmbedding?.({
|
|
355
|
+
chunk_id: chunk.id,
|
|
356
|
+
embedding: Buffer.from(new Float32Array(embedding).buffer),
|
|
357
|
+
model: 'simple-embedder',
|
|
358
|
+
dimensions: embedding.length
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
} catch (chunkError) {
|
|
362
|
+
console.error(`[FileWatcher] Error indexing chunk:`, chunkError.message);
|
|
363
|
+
}
|
|
279
364
|
}
|
|
365
|
+
|
|
366
|
+
console.error(`[FileWatcher] Indexed ${chunks.length} chunks from ${filePath}`);
|
|
367
|
+
} catch (error) {
|
|
368
|
+
console.error(`[FileWatcher] Error indexing markdown ${filePath}:`, error.message);
|
|
369
|
+
this.stats.errors++;
|
|
280
370
|
}
|
|
281
|
-
|
|
282
|
-
console.error(`[FileWatcher] Indexed ${chunks.length} chunks from ${filePath}`);
|
|
283
371
|
}
|
|
284
372
|
|
|
285
|
-
async indexCodeFile(filePath, content, contentHash, stats) {
|
|
373
|
+
async indexCodeFile(filePath, content, contentHash, stats, maxRetries = 3) {
|
|
286
374
|
console.error(`[FileWatcher] Indexing code: ${filePath}`);
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
375
|
+
|
|
376
|
+
if (!this.memory.indexer) {
|
|
377
|
+
console.error(`[FileWatcher] No indexer available`);
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
290
382
|
try {
|
|
291
383
|
const indexed = await this.memory.indexer.indexFile(filePath, content);
|
|
292
|
-
await this.memory.storeIndexed(indexed);
|
|
293
|
-
|
|
384
|
+
const stored = await this.memory.storeIndexed(indexed);
|
|
385
|
+
|
|
386
|
+
if (stored) {
|
|
387
|
+
console.error(`[FileWatcher] Indexed ${indexed.facts.length} facts, ${indexed.chunks.length} chunks`);
|
|
388
|
+
return;
|
|
389
|
+
} else {
|
|
390
|
+
throw new Error('storeIndexed returned false');
|
|
391
|
+
}
|
|
294
392
|
} catch (error) {
|
|
295
|
-
|
|
393
|
+
if (attempt === maxRetries) {
|
|
394
|
+
console.error(`[FileWatcher] Failed to index after ${maxRetries} attempts:`, error.message);
|
|
395
|
+
this.stats.errors++;
|
|
396
|
+
} else {
|
|
397
|
+
console.warn(`[FileWatcher] Retry ${attempt}/${maxRetries}:`, error.message);
|
|
398
|
+
await new Promise(resolve => setTimeout(resolve, 100 * Math.pow(2, attempt - 1)));
|
|
399
|
+
}
|
|
296
400
|
}
|
|
297
401
|
}
|
|
298
402
|
}
|
|
@@ -302,25 +406,73 @@ export class FileWatcher {
|
|
|
302
406
|
return ext === '.md' || ext === '.markdown';
|
|
303
407
|
}
|
|
304
408
|
|
|
409
|
+
async indexLargeFile(filePath, content, contentHash, stats) {
|
|
410
|
+
const chunkSize = 500 * 1024;
|
|
411
|
+
try {
|
|
412
|
+
const lines = content.split('\n');
|
|
413
|
+
let chunkContent = '';
|
|
414
|
+
let chunkIndex = 0;
|
|
415
|
+
|
|
416
|
+
for (let i = 0; i < lines.length; i++) {
|
|
417
|
+
chunkContent += lines[i] + '\n';
|
|
418
|
+
|
|
419
|
+
if (chunkContent.length >= chunkSize || i === lines.length - 1) {
|
|
420
|
+
console.error(`[FileWatcher] Indexing chunk ${chunkIndex + 1} of large file`);
|
|
421
|
+
|
|
422
|
+
if (this.isMarkdownFile(filePath)) {
|
|
423
|
+
await this.indexMarkdownFile(filePath, chunkContent, contentHash + '_' + chunkIndex, stats);
|
|
424
|
+
} else {
|
|
425
|
+
await this.indexCodeFile(filePath, chunkContent, contentHash + '_' + chunkIndex, stats);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
chunkIndex++;
|
|
429
|
+
chunkContent = '';
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
console.error(`[FileWatcher] Large file indexed in ${chunkIndex} chunks`);
|
|
434
|
+
this.stats.filesChanged++;
|
|
435
|
+
|
|
436
|
+
} catch (error) {
|
|
437
|
+
console.error(`[FileWatcher] Failed to index large file:`, error.message);
|
|
438
|
+
this.stats.errors++;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
305
442
|
getStats() {
|
|
306
443
|
return {
|
|
307
444
|
...this.stats,
|
|
308
445
|
pendingUpdates: this.pendingUpdates.size,
|
|
309
|
-
isProcessing: this.isProcessing
|
|
446
|
+
isProcessing: this.isProcessing,
|
|
447
|
+
isRunning: this.isRunning
|
|
310
448
|
};
|
|
311
449
|
}
|
|
312
450
|
|
|
313
451
|
async stop() {
|
|
452
|
+
if (!this.isRunning) return;
|
|
453
|
+
|
|
454
|
+
console.error('[FileWatcher] Stopping...');
|
|
455
|
+
|
|
456
|
+
// Stop cleanup interval
|
|
457
|
+
if (this.cleanupInterval) {
|
|
458
|
+
clearInterval(this.cleanupInterval);
|
|
459
|
+
this.cleanupInterval = null;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Process remaining updates
|
|
463
|
+
if (this.pendingUpdates.size > 0) {
|
|
464
|
+
console.error(`[FileWatcher] Processing remaining ${this.pendingUpdates.size} updates...`);
|
|
465
|
+
await this.processQueue();
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Close watcher
|
|
314
469
|
if (this.watcher) {
|
|
315
|
-
// Process any remaining updates
|
|
316
|
-
if (this.pendingUpdates.size > 0) {
|
|
317
|
-
await this.processQueue();
|
|
318
|
-
}
|
|
319
|
-
|
|
320
470
|
await this.watcher.close();
|
|
321
471
|
this.watcher = null;
|
|
322
|
-
console.error(`[FileWatcher] Stopped`);
|
|
323
472
|
}
|
|
473
|
+
|
|
474
|
+
this.isRunning = false;
|
|
475
|
+
console.error('[FileWatcher] Stopped');
|
|
324
476
|
}
|
|
325
477
|
}
|
|
326
478
|
|