@grec0/memory-bank-mcp 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +420 -425
- package/dist/common/chunker.js +515 -3
- package/dist/common/embeddingService.js +51 -39
- package/dist/common/fileScanner.js +48 -29
- package/dist/common/indexManager.js +85 -46
- package/dist/common/logger.js +54 -0
- package/dist/common/vectorStore.js +47 -4
- package/dist/index.js +1 -1
- package/dist/tools/analyzeCoverage.js +66 -46
- package/dist/tools/indexCode.js +1 -0
- package/package.json +2 -1
- package/dist/common/setup.js +0 -49
- package/dist/common/utils.js +0 -215
- package/dist/operations/boardMemberships.js +0 -186
- package/dist/operations/boards.js +0 -268
- package/dist/operations/cards.js +0 -426
- package/dist/operations/comments.js +0 -249
- package/dist/operations/labels.js +0 -258
- package/dist/operations/lists.js +0 -157
- package/dist/operations/projects.js +0 -102
- package/dist/operations/tasks.js +0 -238
- package/dist/tools/board-summary.js +0 -151
- package/dist/tools/card-details.js +0 -106
- package/dist/tools/create-card-with-tasks.js +0 -81
- package/dist/tools/workflow-actions.js +0 -145
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Embedding service for Memory Bank using OpenAI
|
|
3
3
|
* Generates vector embeddings for code chunks
|
|
4
4
|
*/
|
|
5
|
+
import { logger } from "./logger.js";
|
|
5
6
|
import OpenAI from "openai";
|
|
6
7
|
import * as crypto from "crypto";
|
|
7
8
|
import * as fs from "fs";
|
|
@@ -49,7 +50,7 @@ export class EmbeddingService {
|
|
|
49
50
|
}
|
|
50
51
|
}
|
|
51
52
|
/**
|
|
52
|
-
* Saves embedding cache to disk
|
|
53
|
+
* Saves embedding cache to disk using streams to handle large files
|
|
53
54
|
*/
|
|
54
55
|
saveCache() {
|
|
55
56
|
try {
|
|
@@ -58,8 +59,16 @@ export class EmbeddingService {
|
|
|
58
59
|
fs.mkdirSync(dir, { recursive: true });
|
|
59
60
|
}
|
|
60
61
|
const entries = Array.from(this.cache.values());
|
|
61
|
-
fs.
|
|
62
|
-
|
|
62
|
+
const stream = fs.createWriteStream(this.options.cachePath, { encoding: 'utf8' });
|
|
63
|
+
stream.write('[\n');
|
|
64
|
+
entries.forEach((entry, index) => {
|
|
65
|
+
const isLast = index === entries.length - 1;
|
|
66
|
+
const line = JSON.stringify(entry) + (isLast ? '' : ',\n');
|
|
67
|
+
stream.write(line);
|
|
68
|
+
});
|
|
69
|
+
stream.write('\n]');
|
|
70
|
+
stream.end();
|
|
71
|
+
console.error(`Updated embedding cache (Total: ${entries.length} entries)`);
|
|
63
72
|
}
|
|
64
73
|
catch (error) {
|
|
65
74
|
console.error(`Warning: Could not save embedding cache: ${error}`);
|
|
@@ -136,14 +145,14 @@ export class EmbeddingService {
|
|
|
136
145
|
// Check if it's a rate limit error
|
|
137
146
|
if (error?.status === 429 || error?.code === "rate_limit_exceeded") {
|
|
138
147
|
const backoffMs = Math.pow(2, attempt) * 1000; // Exponential backoff
|
|
139
|
-
|
|
148
|
+
logger.warn(`Rate limit hit, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})`);
|
|
140
149
|
await this.sleep(backoffMs);
|
|
141
150
|
continue;
|
|
142
151
|
}
|
|
143
152
|
// Check if it's a temporary error
|
|
144
153
|
if (error?.status >= 500 && error?.status < 600) {
|
|
145
154
|
const backoffMs = Math.pow(2, attempt) * 1000;
|
|
146
|
-
|
|
155
|
+
logger.warn(`Server error ${error.status}, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})`);
|
|
147
156
|
await this.sleep(backoffMs);
|
|
148
157
|
continue;
|
|
149
158
|
}
|
|
@@ -185,9 +194,10 @@ export class EmbeddingService {
|
|
|
185
194
|
/**
|
|
186
195
|
* Generates embeddings for multiple chunks in batches
|
|
187
196
|
*/
|
|
188
|
-
async generateBatchEmbeddings(chunks) {
|
|
197
|
+
async generateBatchEmbeddings(chunks, options = {}) {
|
|
189
198
|
const results = [];
|
|
190
199
|
const toGenerate = [];
|
|
200
|
+
const shouldSave = options.autoSave !== undefined ? options.autoSave : true;
|
|
191
201
|
// Check cache and collect chunks that need generation
|
|
192
202
|
for (let i = 0; i < chunks.length; i++) {
|
|
193
203
|
const chunk = chunks[i];
|
|
@@ -204,41 +214,43 @@ export class EmbeddingService {
|
|
|
204
214
|
toGenerate.push({ ...chunk, index: i });
|
|
205
215
|
}
|
|
206
216
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
217
|
+
if (toGenerate.length > 0) {
|
|
218
|
+
logger.info(`Generating embeddings: ${toGenerate.length} new, ${chunks.length - toGenerate.length} cached`);
|
|
219
|
+
// Process in batches
|
|
220
|
+
for (let i = 0; i < toGenerate.length; i += this.options.batchSize) {
|
|
221
|
+
const batch = toGenerate.slice(i, i + this.options.batchSize);
|
|
222
|
+
const batchTexts = batch.map((item) => item.content);
|
|
223
|
+
logger.info(`Processing batch ${Math.floor(i / this.options.batchSize) + 1}/${Math.ceil(toGenerate.length / this.options.batchSize)}`);
|
|
224
|
+
try {
|
|
225
|
+
const vectors = await this.generateBatchWithRetry(batchTexts);
|
|
226
|
+
// Store results and cache
|
|
227
|
+
for (let j = 0; j < batch.length; j++) {
|
|
228
|
+
const item = batch[j];
|
|
229
|
+
const vector = vectors[j];
|
|
230
|
+
// Cache the result
|
|
231
|
+
this.cacheEmbedding(item.id, item.content, vector);
|
|
232
|
+
// Estimate tokens
|
|
233
|
+
const tokens = Math.ceil(item.content.length / 4);
|
|
234
|
+
results[item.index] = {
|
|
235
|
+
chunkId: item.id,
|
|
236
|
+
vector,
|
|
237
|
+
model: this.options.model,
|
|
238
|
+
tokens,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
catch (error) {
|
|
243
|
+
logger.error(`Error generating batch embeddings:`, error);
|
|
244
|
+
throw error;
|
|
245
|
+
}
|
|
246
|
+
// Small delay between batches to avoid rate limits
|
|
247
|
+
if (i + this.options.batchSize < toGenerate.length) {
|
|
248
|
+
await this.sleep(100);
|
|
229
249
|
}
|
|
230
|
-
}
|
|
231
|
-
catch (error) {
|
|
232
|
-
console.error(`Error generating batch embeddings: ${error}`);
|
|
233
|
-
throw error;
|
|
234
|
-
}
|
|
235
|
-
// Small delay between batches to avoid rate limits
|
|
236
|
-
if (i + this.options.batchSize < toGenerate.length) {
|
|
237
|
-
await this.sleep(100);
|
|
238
250
|
}
|
|
239
251
|
}
|
|
240
252
|
// Save cache after batch processing
|
|
241
|
-
if (this.options.enableCache && toGenerate.length > 0) {
|
|
253
|
+
if (shouldSave && this.options.enableCache && toGenerate.length > 0) {
|
|
242
254
|
this.saveCache();
|
|
243
255
|
}
|
|
244
256
|
return results;
|
|
@@ -256,7 +268,7 @@ export class EmbeddingService {
|
|
|
256
268
|
return response.data[0].embedding;
|
|
257
269
|
}
|
|
258
270
|
catch (error) {
|
|
259
|
-
|
|
271
|
+
logger.error(`Error generating query embedding:`, error);
|
|
260
272
|
throw error;
|
|
261
273
|
}
|
|
262
274
|
}
|
|
@@ -268,7 +280,7 @@ export class EmbeddingService {
|
|
|
268
280
|
if (fs.existsSync(this.options.cachePath)) {
|
|
269
281
|
fs.unlinkSync(this.options.cachePath);
|
|
270
282
|
}
|
|
271
|
-
|
|
283
|
+
logger.info("Embedding cache cleared");
|
|
272
284
|
}
|
|
273
285
|
/**
|
|
274
286
|
* Gets cache statistics
|
|
@@ -6,6 +6,7 @@ import * as fs from "fs";
|
|
|
6
6
|
import * as path from "path";
|
|
7
7
|
import * as crypto from "crypto";
|
|
8
8
|
import ignoreLib from "ignore";
|
|
9
|
+
import { logger } from "./logger.js";
|
|
9
10
|
// Handle ignore library export
|
|
10
11
|
const ignore = typeof ignoreLib === 'function' ? ignoreLib : ignoreLib.default;
|
|
11
12
|
// Language detection by file extension
|
|
@@ -76,10 +77,10 @@ export function loadIgnorePatterns(rootPath) {
|
|
|
76
77
|
try {
|
|
77
78
|
const gitignoreContent = fs.readFileSync(gitignorePath, "utf-8");
|
|
78
79
|
ig.add(gitignoreContent);
|
|
79
|
-
|
|
80
|
+
logger.debug(`Loaded .gitignore patterns from ${gitignorePath}`);
|
|
80
81
|
}
|
|
81
82
|
catch (error) {
|
|
82
|
-
|
|
83
|
+
logger.warn(`Could not read .gitignore: ${error}`);
|
|
83
84
|
}
|
|
84
85
|
}
|
|
85
86
|
// Load .memoryignore if exists
|
|
@@ -88,19 +89,19 @@ export function loadIgnorePatterns(rootPath) {
|
|
|
88
89
|
try {
|
|
89
90
|
const memoryignoreContent = fs.readFileSync(memoryignorePath, "utf-8");
|
|
90
91
|
ig.add(memoryignoreContent);
|
|
91
|
-
|
|
92
|
+
logger.debug(`Loaded .memoryignore patterns from ${memoryignorePath}`);
|
|
92
93
|
}
|
|
93
94
|
catch (error) {
|
|
94
|
-
|
|
95
|
+
logger.warn(`Could not read .memoryignore: ${error}`);
|
|
95
96
|
}
|
|
96
97
|
}
|
|
97
98
|
return ig;
|
|
98
99
|
}
|
|
99
100
|
/**
|
|
100
|
-
* Calculates SHA-256 hash of file content
|
|
101
|
+
* Calculates SHA-256 hash of file content asynchronously
|
|
101
102
|
*/
|
|
102
|
-
export function calculateFileHash(filePath) {
|
|
103
|
-
const content = fs.
|
|
103
|
+
export async function calculateFileHash(filePath) {
|
|
104
|
+
const content = await fs.promises.readFile(filePath);
|
|
104
105
|
return crypto.createHash("sha256").update(content).digest("hex");
|
|
105
106
|
}
|
|
106
107
|
/**
|
|
@@ -138,17 +139,20 @@ export function isCodeFile(filePath) {
|
|
|
138
139
|
return codeFileNames.has(basename);
|
|
139
140
|
}
|
|
140
141
|
/**
|
|
141
|
-
* Recursively scans directory for code files
|
|
142
|
+
* Recursively scans directory for code files asynchronously
|
|
142
143
|
*/
|
|
143
|
-
function scanDirectoryRecursive(dirPath, rootPath, ig, options, results) {
|
|
144
|
+
async function scanDirectoryRecursive(dirPath, rootPath, ig, options, results, concurrencyLimit = 20) {
|
|
144
145
|
let entries;
|
|
145
146
|
try {
|
|
146
|
-
entries = fs.
|
|
147
|
+
entries = await fs.promises.readdir(dirPath, { withFileTypes: true });
|
|
147
148
|
}
|
|
148
149
|
catch (error) {
|
|
149
|
-
|
|
150
|
+
logger.warn(`Could not read directory ${dirPath}: ${error}`);
|
|
150
151
|
return;
|
|
151
152
|
}
|
|
153
|
+
// Process subdirectories sequentially to avoid recursion explosion
|
|
154
|
+
// but process files in current directory in parallel
|
|
155
|
+
const filesToProcess = [];
|
|
152
156
|
for (const entry of entries) {
|
|
153
157
|
const fullPath = path.join(dirPath, entry.name);
|
|
154
158
|
const relativePath = path.relative(rootPath, fullPath);
|
|
@@ -163,23 +167,35 @@ function scanDirectoryRecursive(dirPath, rootPath, ig, options, results) {
|
|
|
163
167
|
}
|
|
164
168
|
if (entry.isDirectory()) {
|
|
165
169
|
if (options.recursive) {
|
|
166
|
-
scanDirectoryRecursive(fullPath, rootPath, ig, options, results);
|
|
170
|
+
await scanDirectoryRecursive(fullPath, rootPath, ig, options, results, concurrencyLimit);
|
|
167
171
|
}
|
|
168
172
|
}
|
|
169
173
|
else if (entry.isFile()) {
|
|
174
|
+
filesToProcess.push(entry);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// Process files in batches
|
|
178
|
+
for (let i = 0; i < filesToProcess.length; i += concurrencyLimit) {
|
|
179
|
+
const batch = filesToProcess.slice(i, i + concurrencyLimit);
|
|
180
|
+
await Promise.all(batch.map(async (entry) => {
|
|
181
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
182
|
+
// Calculate relative path from PROJECT root (not just scan root)
|
|
183
|
+
// And strictly normalize to forward slashes for consistency
|
|
184
|
+
const relativePathRaw = path.relative(options.projectRoot, fullPath);
|
|
185
|
+
const relativePath = relativePathRaw.split(path.sep).join("/");
|
|
170
186
|
try {
|
|
171
187
|
// Check if it's a code file
|
|
172
188
|
if (!isCodeFile(fullPath)) {
|
|
173
|
-
|
|
189
|
+
return;
|
|
174
190
|
}
|
|
175
|
-
const stats = fs.
|
|
191
|
+
const stats = await fs.promises.stat(fullPath);
|
|
176
192
|
// Check file size limit
|
|
177
193
|
if (stats.size > options.maxFileSize) {
|
|
178
|
-
|
|
179
|
-
|
|
194
|
+
logger.warn(`Skipping large file (${stats.size} bytes): ${relativePath}`);
|
|
195
|
+
return;
|
|
180
196
|
}
|
|
181
197
|
// Calculate hash and collect metadata
|
|
182
|
-
const hash = calculateFileHash(fullPath);
|
|
198
|
+
const hash = await calculateFileHash(fullPath);
|
|
183
199
|
const language = detectLanguage(fullPath);
|
|
184
200
|
const extension = path.extname(fullPath);
|
|
185
201
|
results.push({
|
|
@@ -193,17 +209,18 @@ function scanDirectoryRecursive(dirPath, rootPath, ig, options, results) {
|
|
|
193
209
|
});
|
|
194
210
|
}
|
|
195
211
|
catch (error) {
|
|
196
|
-
|
|
212
|
+
logger.warn(`Could not process file ${fullPath}: ${error}`);
|
|
197
213
|
}
|
|
198
|
-
}
|
|
214
|
+
}));
|
|
199
215
|
}
|
|
200
216
|
}
|
|
201
217
|
/**
|
|
202
|
-
* Scans workspace for code files
|
|
218
|
+
* Scans workspace for code files asynchronously
|
|
203
219
|
*/
|
|
204
|
-
export function scanFiles(options) {
|
|
220
|
+
export async function scanFiles(options) {
|
|
205
221
|
const fullOptions = {
|
|
206
222
|
rootPath: options.rootPath,
|
|
223
|
+
projectRoot: options.projectRoot || options.rootPath,
|
|
207
224
|
recursive: options.recursive !== undefined ? options.recursive : true,
|
|
208
225
|
includeHidden: options.includeHidden !== undefined ? options.includeHidden : false,
|
|
209
226
|
maxFileSize: options.maxFileSize || 10 * 1024 * 1024, // 10MB default
|
|
@@ -212,23 +229,23 @@ export function scanFiles(options) {
|
|
|
212
229
|
if (!fs.existsSync(fullOptions.rootPath)) {
|
|
213
230
|
throw new Error(`Root path does not exist: ${fullOptions.rootPath}`);
|
|
214
231
|
}
|
|
215
|
-
const stats = fs.
|
|
232
|
+
const stats = await fs.promises.stat(fullOptions.rootPath);
|
|
216
233
|
if (!stats.isDirectory()) {
|
|
217
234
|
throw new Error(`Root path is not a directory: ${fullOptions.rootPath}`);
|
|
218
235
|
}
|
|
219
|
-
|
|
236
|
+
logger.info(`Scanning files in: ${fullOptions.rootPath}`);
|
|
220
237
|
// Load ignore patterns
|
|
221
238
|
const ig = loadIgnorePatterns(fullOptions.rootPath);
|
|
222
239
|
// Scan files
|
|
223
240
|
const results = [];
|
|
224
|
-
scanDirectoryRecursive(fullOptions.rootPath, fullOptions.rootPath, ig, fullOptions, results);
|
|
225
|
-
|
|
241
|
+
await scanDirectoryRecursive(fullOptions.rootPath, fullOptions.rootPath, ig, fullOptions, results);
|
|
242
|
+
logger.info(`Found ${results.length} code files to index`);
|
|
226
243
|
return results;
|
|
227
244
|
}
|
|
228
245
|
/**
|
|
229
246
|
* Scans a single file and returns its metadata
|
|
230
247
|
*/
|
|
231
|
-
export function scanSingleFile(filePath, rootPath) {
|
|
248
|
+
export async function scanSingleFile(filePath, rootPath, projectRoot) {
|
|
232
249
|
try {
|
|
233
250
|
if (!fs.existsSync(filePath)) {
|
|
234
251
|
throw new Error(`File does not exist: ${filePath}`);
|
|
@@ -240,10 +257,12 @@ export function scanSingleFile(filePath, rootPath) {
|
|
|
240
257
|
if (!isCodeFile(filePath)) {
|
|
241
258
|
return null;
|
|
242
259
|
}
|
|
243
|
-
const hash = calculateFileHash(filePath);
|
|
260
|
+
const hash = await calculateFileHash(filePath);
|
|
244
261
|
const language = detectLanguage(filePath);
|
|
245
262
|
const extension = path.extname(filePath);
|
|
246
|
-
|
|
263
|
+
// Calculate relative path from PROJECT root
|
|
264
|
+
const relativePathRaw = path.relative(projectRoot || rootPath, filePath);
|
|
265
|
+
const relativePath = relativePathRaw.split(path.sep).join("/");
|
|
247
266
|
return {
|
|
248
267
|
path: relativePath,
|
|
249
268
|
absolutePath: filePath,
|
|
@@ -255,7 +274,7 @@ export function scanSingleFile(filePath, rootPath) {
|
|
|
255
274
|
};
|
|
256
275
|
}
|
|
257
276
|
catch (error) {
|
|
258
|
-
|
|
277
|
+
logger.error(`Error scanning file ${filePath}:`, error);
|
|
259
278
|
return null;
|
|
260
279
|
}
|
|
261
280
|
}
|
|
@@ -6,6 +6,8 @@ import * as fs from "fs";
|
|
|
6
6
|
import * as path from "path";
|
|
7
7
|
import { scanFiles, scanSingleFile } from "./fileScanner.js";
|
|
8
8
|
import { chunkCode } from "./chunker.js";
|
|
9
|
+
import { logger } from "./logger.js";
|
|
10
|
+
import * as crypto from "crypto";
|
|
9
11
|
/**
|
|
10
12
|
* Index manager coordinating the entire indexing pipeline
|
|
11
13
|
*/
|
|
@@ -14,12 +16,22 @@ export class IndexManager {
|
|
|
14
16
|
vectorStore;
|
|
15
17
|
metadataPath;
|
|
16
18
|
metadata;
|
|
17
|
-
|
|
19
|
+
projectRoot;
|
|
20
|
+
projectId;
|
|
21
|
+
constructor(embeddingService, vectorStore, storagePath = ".memorybank", projectRoot) {
|
|
18
22
|
this.embeddingService = embeddingService;
|
|
19
23
|
this.vectorStore = vectorStore;
|
|
20
24
|
this.metadataPath = path.join(storagePath, "index-metadata.json");
|
|
25
|
+
this.projectRoot = projectRoot || process.cwd();
|
|
26
|
+
this.projectId = this.generateProjectId(this.projectRoot);
|
|
21
27
|
this.metadata = this.loadMetadata();
|
|
22
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Generates a unique project ID from the project root path
|
|
31
|
+
*/
|
|
32
|
+
generateProjectId(projectRoot) {
|
|
33
|
+
return crypto.createHash("sha256").update(projectRoot).digest("hex").substring(0, 16);
|
|
34
|
+
}
|
|
23
35
|
/**
|
|
24
36
|
* Loads index metadata from disk
|
|
25
37
|
*/
|
|
@@ -31,7 +43,7 @@ export class IndexManager {
|
|
|
31
43
|
}
|
|
32
44
|
}
|
|
33
45
|
catch (error) {
|
|
34
|
-
|
|
46
|
+
logger.warn(`Could not load index metadata: ${error}`);
|
|
35
47
|
}
|
|
36
48
|
return {
|
|
37
49
|
version: "1.0",
|
|
@@ -51,7 +63,7 @@ export class IndexManager {
|
|
|
51
63
|
fs.writeFileSync(this.metadataPath, JSON.stringify(this.metadata, null, 2));
|
|
52
64
|
}
|
|
53
65
|
catch (error) {
|
|
54
|
-
|
|
66
|
+
logger.warn(`Could not save index metadata: ${error}`);
|
|
55
67
|
}
|
|
56
68
|
}
|
|
57
69
|
/**
|
|
@@ -73,14 +85,14 @@ export class IndexManager {
|
|
|
73
85
|
/**
|
|
74
86
|
* Indexes a single file
|
|
75
87
|
*/
|
|
76
|
-
async indexFile(file, forceReindex = false) {
|
|
88
|
+
async indexFile(file, forceReindex = false, saveMetadata = true) {
|
|
77
89
|
try {
|
|
78
90
|
// Check if file needs reindexing
|
|
79
91
|
if (!this.needsReindexing(file, forceReindex)) {
|
|
80
|
-
|
|
92
|
+
logger.debug(`Skipping ${file.path} (no changes)`);
|
|
81
93
|
return { chunksCreated: 0 };
|
|
82
94
|
}
|
|
83
|
-
|
|
95
|
+
logger.info(`Indexing: ${file.path}`);
|
|
84
96
|
// Read file content
|
|
85
97
|
const content = fs.readFileSync(file.absolutePath, "utf-8");
|
|
86
98
|
// Get chunk size from environment or use defaults
|
|
@@ -95,20 +107,26 @@ export class IndexManager {
|
|
|
95
107
|
chunkOverlap,
|
|
96
108
|
});
|
|
97
109
|
if (chunks.length === 0) {
|
|
98
|
-
|
|
110
|
+
logger.warn(`No chunks created for ${file.path}`);
|
|
111
|
+
return { chunksCreated: 0 };
|
|
112
|
+
}
|
|
113
|
+
logger.debug(` Created ${chunks.length} chunks`);
|
|
114
|
+
// Filter out invalid chunks (fail-safe)
|
|
115
|
+
const validChunks = chunks.filter(c => c.content && c.content.trim().length > 0 && c.content.trim() !== "}");
|
|
116
|
+
if (validChunks.length === 0) {
|
|
117
|
+
logger.warn(`No valid chunks after filtering for ${file.path}`);
|
|
99
118
|
return { chunksCreated: 0 };
|
|
100
119
|
}
|
|
101
|
-
console.error(` Created ${chunks.length} chunks`);
|
|
102
120
|
// Generate embeddings
|
|
103
|
-
const embeddingInputs =
|
|
121
|
+
const embeddingInputs = validChunks.map((chunk) => ({
|
|
104
122
|
id: chunk.id,
|
|
105
123
|
content: chunk.content,
|
|
106
124
|
}));
|
|
107
125
|
const embeddings = await this.embeddingService.generateBatchEmbeddings(embeddingInputs);
|
|
108
|
-
|
|
126
|
+
logger.debug(` Generated ${embeddings.length} embeddings`);
|
|
109
127
|
// Prepare chunk records for storage
|
|
110
128
|
const timestamp = Date.now();
|
|
111
|
-
const chunkRecords =
|
|
129
|
+
const chunkRecords = validChunks.map((chunk, i) => ({
|
|
112
130
|
id: chunk.id,
|
|
113
131
|
vector: embeddings[i].vector,
|
|
114
132
|
filePath: chunk.filePath,
|
|
@@ -116,17 +134,18 @@ export class IndexManager {
|
|
|
116
134
|
startLine: chunk.startLine,
|
|
117
135
|
endLine: chunk.endLine,
|
|
118
136
|
chunkType: chunk.chunkType,
|
|
119
|
-
name: chunk.name,
|
|
137
|
+
name: chunk.name || "",
|
|
120
138
|
language: chunk.language,
|
|
121
139
|
fileHash: file.hash,
|
|
122
140
|
timestamp,
|
|
123
141
|
context: chunk.context,
|
|
142
|
+
projectId: this.projectId,
|
|
124
143
|
}));
|
|
125
144
|
// Delete old chunks for this file
|
|
126
|
-
await this.vectorStore.deleteChunksByFile(file.path);
|
|
145
|
+
await this.vectorStore.deleteChunksByFile(file.path, this.projectId);
|
|
127
146
|
// Insert new chunks
|
|
128
147
|
await this.vectorStore.insertChunks(chunkRecords);
|
|
129
|
-
|
|
148
|
+
logger.debug(` Stored ${chunkRecords.length} chunks in vector store`);
|
|
130
149
|
// Update metadata
|
|
131
150
|
this.metadata.files[file.path] = {
|
|
132
151
|
hash: file.hash,
|
|
@@ -134,12 +153,14 @@ export class IndexManager {
|
|
|
134
153
|
chunkCount: chunks.length,
|
|
135
154
|
};
|
|
136
155
|
this.metadata.lastIndexed = timestamp;
|
|
137
|
-
|
|
156
|
+
if (saveMetadata) {
|
|
157
|
+
this.saveMetadata();
|
|
158
|
+
}
|
|
138
159
|
return { chunksCreated: chunks.length };
|
|
139
160
|
}
|
|
140
161
|
catch (error) {
|
|
141
162
|
const errorMsg = `Error indexing ${file.path}: ${error}`;
|
|
142
|
-
|
|
163
|
+
logger.error(errorMsg);
|
|
143
164
|
return { chunksCreated: 0, error: errorMsg };
|
|
144
165
|
}
|
|
145
166
|
}
|
|
@@ -148,19 +169,20 @@ export class IndexManager {
|
|
|
148
169
|
*/
|
|
149
170
|
async indexFiles(options) {
|
|
150
171
|
const startTime = Date.now();
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
172
|
+
logger.info(`=== Starting indexing process ===`);
|
|
173
|
+
logger.info(`Root path: ${options.rootPath}`);
|
|
174
|
+
logger.info(`Force reindex: ${options.forceReindex || false}`);
|
|
154
175
|
// Initialize vector store
|
|
155
176
|
await this.vectorStore.initialize();
|
|
156
177
|
// Scan files
|
|
157
|
-
|
|
158
|
-
const files = scanFiles({
|
|
178
|
+
logger.info(`Scanning files...`);
|
|
179
|
+
const files = await scanFiles({
|
|
159
180
|
rootPath: options.rootPath,
|
|
181
|
+
projectRoot: options.projectRoot,
|
|
160
182
|
recursive: options.recursive !== undefined ? options.recursive : true,
|
|
161
183
|
});
|
|
162
184
|
if (files.length === 0) {
|
|
163
|
-
|
|
185
|
+
logger.warn("No files found to index");
|
|
164
186
|
return {
|
|
165
187
|
filesProcessed: 0,
|
|
166
188
|
chunksCreated: 0,
|
|
@@ -170,9 +192,9 @@ export class IndexManager {
|
|
|
170
192
|
}
|
|
171
193
|
// Filter files that need reindexing
|
|
172
194
|
const filesToIndex = files.filter((file) => this.needsReindexing(file, options.forceReindex || false));
|
|
173
|
-
|
|
195
|
+
logger.info(`Found ${files.length} files, ${filesToIndex.length} need indexing`);
|
|
174
196
|
if (filesToIndex.length === 0) {
|
|
175
|
-
|
|
197
|
+
logger.info("All files are up to date");
|
|
176
198
|
return {
|
|
177
199
|
filesProcessed: 0,
|
|
178
200
|
chunksCreated: 0,
|
|
@@ -180,28 +202,45 @@ export class IndexManager {
|
|
|
180
202
|
duration: Date.now() - startTime,
|
|
181
203
|
};
|
|
182
204
|
}
|
|
183
|
-
// Index files
|
|
205
|
+
// Index files in batches
|
|
184
206
|
const errors = [];
|
|
185
207
|
let totalChunks = 0;
|
|
186
208
|
let processedFiles = 0;
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
const
|
|
191
|
-
|
|
192
|
-
|
|
209
|
+
const batchSize = 5; // Concurrency limit
|
|
210
|
+
for (let i = 0; i < filesToIndex.length; i += batchSize) {
|
|
211
|
+
const batch = filesToIndex.slice(i, i + batchSize);
|
|
212
|
+
const batchNum = Math.floor(i / batchSize) + 1;
|
|
213
|
+
const totalBatches = Math.ceil(filesToIndex.length / batchSize);
|
|
214
|
+
logger.info(`Processing batch ${batchNum}/${totalBatches} (${batch.length} files)`);
|
|
215
|
+
const batchPromises = batch.map(async (file, index) => {
|
|
216
|
+
logger.debug(`[${i + index + 1}/${filesToIndex.length}] Processing ${file.path}`);
|
|
217
|
+
return this.indexFile(file, options.forceReindex || false, false); // Don't save metadata per file
|
|
218
|
+
});
|
|
219
|
+
const results = await Promise.all(batchPromises);
|
|
220
|
+
// Process results
|
|
221
|
+
for (const result of results) {
|
|
222
|
+
if (result.error) {
|
|
223
|
+
errors.push(result.error);
|
|
224
|
+
}
|
|
225
|
+
else {
|
|
226
|
+
processedFiles++;
|
|
227
|
+
totalChunks += result.chunksCreated;
|
|
228
|
+
}
|
|
193
229
|
}
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
230
|
+
// Save metadata and embedding cache after each batch
|
|
231
|
+
this.saveMetadata();
|
|
232
|
+
this.embeddingService.saveCache();
|
|
233
|
+
// Small delay between batches
|
|
234
|
+
if (i + batchSize < filesToIndex.length) {
|
|
235
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
197
236
|
}
|
|
198
237
|
}
|
|
199
238
|
const duration = Date.now() - startTime;
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
239
|
+
logger.info(`=== Indexing complete ===`);
|
|
240
|
+
logger.info(`Files processed: ${processedFiles}`);
|
|
241
|
+
logger.info(`Chunks created: ${totalChunks}`);
|
|
242
|
+
logger.info(`Errors: ${errors.length}`);
|
|
243
|
+
logger.info(`Duration: ${(duration / 1000).toFixed(2)}s`);
|
|
205
244
|
return {
|
|
206
245
|
filesProcessed: processedFiles,
|
|
207
246
|
chunksCreated: totalChunks,
|
|
@@ -212,10 +251,10 @@ export class IndexManager {
|
|
|
212
251
|
/**
|
|
213
252
|
* Re-indexes a specific file by path
|
|
214
253
|
*/
|
|
215
|
-
async reindexFile(filePath, rootPath) {
|
|
254
|
+
async reindexFile(filePath, rootPath, projectRoot) {
|
|
216
255
|
try {
|
|
217
256
|
// Scan the specific file
|
|
218
|
-
const file = scanSingleFile(filePath, rootPath);
|
|
257
|
+
const file = await scanSingleFile(filePath, rootPath, projectRoot);
|
|
219
258
|
if (!file) {
|
|
220
259
|
return {
|
|
221
260
|
success: false,
|
|
@@ -310,23 +349,23 @@ export class IndexManager {
|
|
|
310
349
|
this.saveMetadata();
|
|
311
350
|
// Clear embedding cache
|
|
312
351
|
this.embeddingService.clearCache();
|
|
313
|
-
|
|
352
|
+
logger.info("Index cleared");
|
|
314
353
|
}
|
|
315
354
|
/**
|
|
316
355
|
* Removes a file from the index
|
|
317
356
|
*/
|
|
318
357
|
async removeFile(filePath) {
|
|
319
358
|
await this.vectorStore.initialize();
|
|
320
|
-
await this.vectorStore.deleteChunksByFile(filePath);
|
|
359
|
+
await this.vectorStore.deleteChunksByFile(filePath, this.projectId);
|
|
321
360
|
delete this.metadata.files[filePath];
|
|
322
361
|
this.saveMetadata();
|
|
323
|
-
|
|
362
|
+
logger.info(`Removed ${filePath} from index`);
|
|
324
363
|
}
|
|
325
364
|
}
|
|
326
365
|
/**
|
|
327
366
|
* Creates an index manager from environment variables
|
|
328
367
|
*/
|
|
329
|
-
export function createIndexManager(embeddingService, vectorStore) {
|
|
368
|
+
export function createIndexManager(embeddingService, vectorStore, workspaceRoot) {
|
|
330
369
|
const storagePath = process.env.MEMORYBANK_STORAGE_PATH || ".memorybank";
|
|
331
|
-
return new IndexManager(embeddingService, vectorStore, storagePath);
|
|
370
|
+
return new IndexManager(embeddingService, vectorStore, storagePath, workspaceRoot);
|
|
332
371
|
}
|