@grec0/memory-bank-mcp 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  * @fileoverview Embedding service for Memory Bank using OpenAI
3
3
  * Generates vector embeddings for code chunks
4
4
  */
5
+ import { logger } from "./logger.js";
5
6
  import OpenAI from "openai";
6
7
  import * as crypto from "crypto";
7
8
  import * as fs from "fs";
@@ -49,7 +50,7 @@ export class EmbeddingService {
49
50
  }
50
51
  }
51
52
  /**
52
- * Saves embedding cache to disk
53
+ * Saves embedding cache to disk using streams to handle large files
53
54
  */
54
55
  saveCache() {
55
56
  try {
@@ -58,8 +59,16 @@ export class EmbeddingService {
58
59
  fs.mkdirSync(dir, { recursive: true });
59
60
  }
60
61
  const entries = Array.from(this.cache.values());
61
- fs.writeFileSync(this.options.cachePath, JSON.stringify(entries, null, 2));
62
- console.error(`Saved ${entries.length} embeddings to cache`);
62
+ const stream = fs.createWriteStream(this.options.cachePath, { encoding: 'utf8' });
63
+ stream.write('[\n');
64
+ entries.forEach((entry, index) => {
65
+ const isLast = index === entries.length - 1;
66
+ const line = JSON.stringify(entry) + (isLast ? '' : ',\n');
67
+ stream.write(line);
68
+ });
69
+ stream.write('\n]');
70
+ stream.end();
71
+ console.error(`Updated embedding cache (Total: ${entries.length} entries)`);
63
72
  }
64
73
  catch (error) {
65
74
  console.error(`Warning: Could not save embedding cache: ${error}`);
@@ -136,14 +145,14 @@ export class EmbeddingService {
136
145
  // Check if it's a rate limit error
137
146
  if (error?.status === 429 || error?.code === "rate_limit_exceeded") {
138
147
  const backoffMs = Math.pow(2, attempt) * 1000; // Exponential backoff
139
- console.error(`Rate limit hit, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})`);
148
+ logger.warn(`Rate limit hit, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})`);
140
149
  await this.sleep(backoffMs);
141
150
  continue;
142
151
  }
143
152
  // Check if it's a temporary error
144
153
  if (error?.status >= 500 && error?.status < 600) {
145
154
  const backoffMs = Math.pow(2, attempt) * 1000;
146
- console.error(`Server error ${error.status}, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})`);
155
+ logger.warn(`Server error ${error.status}, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})`);
147
156
  await this.sleep(backoffMs);
148
157
  continue;
149
158
  }
@@ -185,9 +194,10 @@ export class EmbeddingService {
185
194
  /**
186
195
  * Generates embeddings for multiple chunks in batches
187
196
  */
188
- async generateBatchEmbeddings(chunks) {
197
+ async generateBatchEmbeddings(chunks, options = {}) {
189
198
  const results = [];
190
199
  const toGenerate = [];
200
+ const shouldSave = options.autoSave !== undefined ? options.autoSave : true;
191
201
  // Check cache and collect chunks that need generation
192
202
  for (let i = 0; i < chunks.length; i++) {
193
203
  const chunk = chunks[i];
@@ -204,41 +214,43 @@ export class EmbeddingService {
204
214
  toGenerate.push({ ...chunk, index: i });
205
215
  }
206
216
  }
207
- console.error(`Generating embeddings: ${toGenerate.length} new, ${chunks.length - toGenerate.length} cached`);
208
- // Process in batches
209
- for (let i = 0; i < toGenerate.length; i += this.options.batchSize) {
210
- const batch = toGenerate.slice(i, i + this.options.batchSize);
211
- const batchTexts = batch.map((item) => item.content);
212
- console.error(`Processing batch ${Math.floor(i / this.options.batchSize) + 1}/${Math.ceil(toGenerate.length / this.options.batchSize)}`);
213
- try {
214
- const vectors = await this.generateBatchWithRetry(batchTexts);
215
- // Store results and cache
216
- for (let j = 0; j < batch.length; j++) {
217
- const item = batch[j];
218
- const vector = vectors[j];
219
- // Cache the result
220
- this.cacheEmbedding(item.id, item.content, vector);
221
- // Estimate tokens
222
- const tokens = Math.ceil(item.content.length / 4);
223
- results[item.index] = {
224
- chunkId: item.id,
225
- vector,
226
- model: this.options.model,
227
- tokens,
228
- };
217
+ if (toGenerate.length > 0) {
218
+ logger.info(`Generating embeddings: ${toGenerate.length} new, ${chunks.length - toGenerate.length} cached`);
219
+ // Process in batches
220
+ for (let i = 0; i < toGenerate.length; i += this.options.batchSize) {
221
+ const batch = toGenerate.slice(i, i + this.options.batchSize);
222
+ const batchTexts = batch.map((item) => item.content);
223
+ logger.info(`Processing batch ${Math.floor(i / this.options.batchSize) + 1}/${Math.ceil(toGenerate.length / this.options.batchSize)}`);
224
+ try {
225
+ const vectors = await this.generateBatchWithRetry(batchTexts);
226
+ // Store results and cache
227
+ for (let j = 0; j < batch.length; j++) {
228
+ const item = batch[j];
229
+ const vector = vectors[j];
230
+ // Cache the result
231
+ this.cacheEmbedding(item.id, item.content, vector);
232
+ // Estimate tokens
233
+ const tokens = Math.ceil(item.content.length / 4);
234
+ results[item.index] = {
235
+ chunkId: item.id,
236
+ vector,
237
+ model: this.options.model,
238
+ tokens,
239
+ };
240
+ }
241
+ }
242
+ catch (error) {
243
+ logger.error(`Error generating batch embeddings:`, error);
244
+ throw error;
245
+ }
246
+ // Small delay between batches to avoid rate limits
247
+ if (i + this.options.batchSize < toGenerate.length) {
248
+ await this.sleep(100);
229
249
  }
230
- }
231
- catch (error) {
232
- console.error(`Error generating batch embeddings: ${error}`);
233
- throw error;
234
- }
235
- // Small delay between batches to avoid rate limits
236
- if (i + this.options.batchSize < toGenerate.length) {
237
- await this.sleep(100);
238
250
  }
239
251
  }
240
252
  // Save cache after batch processing
241
- if (this.options.enableCache && toGenerate.length > 0) {
253
+ if (shouldSave && this.options.enableCache && toGenerate.length > 0) {
242
254
  this.saveCache();
243
255
  }
244
256
  return results;
@@ -256,7 +268,7 @@ export class EmbeddingService {
256
268
  return response.data[0].embedding;
257
269
  }
258
270
  catch (error) {
259
- console.error(`Error generating query embedding: ${error}`);
271
+ logger.error(`Error generating query embedding:`, error);
260
272
  throw error;
261
273
  }
262
274
  }
@@ -268,7 +280,7 @@ export class EmbeddingService {
268
280
  if (fs.existsSync(this.options.cachePath)) {
269
281
  fs.unlinkSync(this.options.cachePath);
270
282
  }
271
- console.error("Embedding cache cleared");
283
+ logger.info("Embedding cache cleared");
272
284
  }
273
285
  /**
274
286
  * Gets cache statistics
@@ -6,6 +6,7 @@ import * as fs from "fs";
6
6
  import * as path from "path";
7
7
  import * as crypto from "crypto";
8
8
  import ignoreLib from "ignore";
9
+ import { logger } from "./logger.js";
9
10
  // Handle ignore library export
10
11
  const ignore = typeof ignoreLib === 'function' ? ignoreLib : ignoreLib.default;
11
12
  // Language detection by file extension
@@ -76,10 +77,10 @@ export function loadIgnorePatterns(rootPath) {
76
77
  try {
77
78
  const gitignoreContent = fs.readFileSync(gitignorePath, "utf-8");
78
79
  ig.add(gitignoreContent);
79
- console.error(`Loaded .gitignore patterns from ${gitignorePath}`);
80
+ logger.debug(`Loaded .gitignore patterns from ${gitignorePath}`);
80
81
  }
81
82
  catch (error) {
82
- console.error(`Warning: Could not read .gitignore: ${error}`);
83
+ logger.warn(`Could not read .gitignore: ${error}`);
83
84
  }
84
85
  }
85
86
  // Load .memoryignore if exists
@@ -88,19 +89,19 @@ export function loadIgnorePatterns(rootPath) {
88
89
  try {
89
90
  const memoryignoreContent = fs.readFileSync(memoryignorePath, "utf-8");
90
91
  ig.add(memoryignoreContent);
91
- console.error(`Loaded .memoryignore patterns from ${memoryignorePath}`);
92
+ logger.debug(`Loaded .memoryignore patterns from ${memoryignorePath}`);
92
93
  }
93
94
  catch (error) {
94
- console.error(`Warning: Could not read .memoryignore: ${error}`);
95
+ logger.warn(`Could not read .memoryignore: ${error}`);
95
96
  }
96
97
  }
97
98
  return ig;
98
99
  }
99
100
  /**
100
- * Calculates SHA-256 hash of file content
101
+ * Calculates SHA-256 hash of file content asynchronously
101
102
  */
102
- export function calculateFileHash(filePath) {
103
- const content = fs.readFileSync(filePath);
103
+ export async function calculateFileHash(filePath) {
104
+ const content = await fs.promises.readFile(filePath);
104
105
  return crypto.createHash("sha256").update(content).digest("hex");
105
106
  }
106
107
  /**
@@ -138,17 +139,20 @@ export function isCodeFile(filePath) {
138
139
  return codeFileNames.has(basename);
139
140
  }
140
141
  /**
141
- * Recursively scans directory for code files
142
+ * Recursively scans directory for code files asynchronously
142
143
  */
143
- function scanDirectoryRecursive(dirPath, rootPath, ig, options, results) {
144
+ async function scanDirectoryRecursive(dirPath, rootPath, ig, options, results, concurrencyLimit = 20) {
144
145
  let entries;
145
146
  try {
146
- entries = fs.readdirSync(dirPath, { withFileTypes: true });
147
+ entries = await fs.promises.readdir(dirPath, { withFileTypes: true });
147
148
  }
148
149
  catch (error) {
149
- console.error(`Warning: Could not read directory ${dirPath}: ${error}`);
150
+ logger.warn(`Could not read directory ${dirPath}: ${error}`);
150
151
  return;
151
152
  }
153
+ // Process subdirectories sequentially to avoid recursion explosion
154
+ // but process files in current directory in parallel
155
+ const filesToProcess = [];
152
156
  for (const entry of entries) {
153
157
  const fullPath = path.join(dirPath, entry.name);
154
158
  const relativePath = path.relative(rootPath, fullPath);
@@ -163,23 +167,35 @@ function scanDirectoryRecursive(dirPath, rootPath, ig, options, results) {
163
167
  }
164
168
  if (entry.isDirectory()) {
165
169
  if (options.recursive) {
166
- scanDirectoryRecursive(fullPath, rootPath, ig, options, results);
170
+ await scanDirectoryRecursive(fullPath, rootPath, ig, options, results, concurrencyLimit);
167
171
  }
168
172
  }
169
173
  else if (entry.isFile()) {
174
+ filesToProcess.push(entry);
175
+ }
176
+ }
177
+ // Process files in batches
178
+ for (let i = 0; i < filesToProcess.length; i += concurrencyLimit) {
179
+ const batch = filesToProcess.slice(i, i + concurrencyLimit);
180
+ await Promise.all(batch.map(async (entry) => {
181
+ const fullPath = path.join(dirPath, entry.name);
182
+ // Calculate relative path from PROJECT root (not just scan root)
183
+ // And strictly normalize to forward slashes for consistency
184
+ const relativePathRaw = path.relative(options.projectRoot, fullPath);
185
+ const relativePath = relativePathRaw.split(path.sep).join("/");
170
186
  try {
171
187
  // Check if it's a code file
172
188
  if (!isCodeFile(fullPath)) {
173
- continue;
189
+ return;
174
190
  }
175
- const stats = fs.statSync(fullPath);
191
+ const stats = await fs.promises.stat(fullPath);
176
192
  // Check file size limit
177
193
  if (stats.size > options.maxFileSize) {
178
- console.error(`Skipping large file (${stats.size} bytes): ${relativePath}`);
179
- continue;
194
+ logger.warn(`Skipping large file (${stats.size} bytes): ${relativePath}`);
195
+ return;
180
196
  }
181
197
  // Calculate hash and collect metadata
182
- const hash = calculateFileHash(fullPath);
198
+ const hash = await calculateFileHash(fullPath);
183
199
  const language = detectLanguage(fullPath);
184
200
  const extension = path.extname(fullPath);
185
201
  results.push({
@@ -193,17 +209,18 @@ function scanDirectoryRecursive(dirPath, rootPath, ig, options, results) {
193
209
  });
194
210
  }
195
211
  catch (error) {
196
- console.error(`Warning: Could not process file ${fullPath}: ${error}`);
212
+ logger.warn(`Could not process file ${fullPath}: ${error}`);
197
213
  }
198
- }
214
+ }));
199
215
  }
200
216
  }
201
217
  /**
202
- * Scans workspace for code files
218
+ * Scans workspace for code files asynchronously
203
219
  */
204
- export function scanFiles(options) {
220
+ export async function scanFiles(options) {
205
221
  const fullOptions = {
206
222
  rootPath: options.rootPath,
223
+ projectRoot: options.projectRoot || options.rootPath,
207
224
  recursive: options.recursive !== undefined ? options.recursive : true,
208
225
  includeHidden: options.includeHidden !== undefined ? options.includeHidden : false,
209
226
  maxFileSize: options.maxFileSize || 10 * 1024 * 1024, // 10MB default
@@ -212,23 +229,23 @@ export function scanFiles(options) {
212
229
  if (!fs.existsSync(fullOptions.rootPath)) {
213
230
  throw new Error(`Root path does not exist: ${fullOptions.rootPath}`);
214
231
  }
215
- const stats = fs.statSync(fullOptions.rootPath);
232
+ const stats = await fs.promises.stat(fullOptions.rootPath);
216
233
  if (!stats.isDirectory()) {
217
234
  throw new Error(`Root path is not a directory: ${fullOptions.rootPath}`);
218
235
  }
219
- console.error(`Scanning files in: ${fullOptions.rootPath}`);
236
+ logger.info(`Scanning files in: ${fullOptions.rootPath}`);
220
237
  // Load ignore patterns
221
238
  const ig = loadIgnorePatterns(fullOptions.rootPath);
222
239
  // Scan files
223
240
  const results = [];
224
- scanDirectoryRecursive(fullOptions.rootPath, fullOptions.rootPath, ig, fullOptions, results);
225
- console.error(`Found ${results.length} code files to index`);
241
+ await scanDirectoryRecursive(fullOptions.rootPath, fullOptions.rootPath, ig, fullOptions, results);
242
+ logger.info(`Found ${results.length} code files to index`);
226
243
  return results;
227
244
  }
228
245
  /**
229
246
  * Scans a single file and returns its metadata
230
247
  */
231
- export function scanSingleFile(filePath, rootPath) {
248
+ export async function scanSingleFile(filePath, rootPath, projectRoot) {
232
249
  try {
233
250
  if (!fs.existsSync(filePath)) {
234
251
  throw new Error(`File does not exist: ${filePath}`);
@@ -240,10 +257,12 @@ export function scanSingleFile(filePath, rootPath) {
240
257
  if (!isCodeFile(filePath)) {
241
258
  return null;
242
259
  }
243
- const hash = calculateFileHash(filePath);
260
+ const hash = await calculateFileHash(filePath);
244
261
  const language = detectLanguage(filePath);
245
262
  const extension = path.extname(filePath);
246
- const relativePath = path.relative(rootPath, filePath);
263
+ // Calculate relative path from PROJECT root
264
+ const relativePathRaw = path.relative(projectRoot || rootPath, filePath);
265
+ const relativePath = relativePathRaw.split(path.sep).join("/");
247
266
  return {
248
267
  path: relativePath,
249
268
  absolutePath: filePath,
@@ -255,7 +274,7 @@ export function scanSingleFile(filePath, rootPath) {
255
274
  };
256
275
  }
257
276
  catch (error) {
258
- console.error(`Error scanning file ${filePath}: ${error}`);
277
+ logger.error(`Error scanning file ${filePath}:`, error);
259
278
  return null;
260
279
  }
261
280
  }
@@ -6,6 +6,8 @@ import * as fs from "fs";
6
6
  import * as path from "path";
7
7
  import { scanFiles, scanSingleFile } from "./fileScanner.js";
8
8
  import { chunkCode } from "./chunker.js";
9
+ import { logger } from "./logger.js";
10
+ import * as crypto from "crypto";
9
11
  /**
10
12
  * Index manager coordinating the entire indexing pipeline
11
13
  */
@@ -14,12 +16,22 @@ export class IndexManager {
14
16
  vectorStore;
15
17
  metadataPath;
16
18
  metadata;
17
- constructor(embeddingService, vectorStore, storagePath = ".memorybank") {
19
+ projectRoot;
20
+ projectId;
21
+ constructor(embeddingService, vectorStore, storagePath = ".memorybank", projectRoot) {
18
22
  this.embeddingService = embeddingService;
19
23
  this.vectorStore = vectorStore;
20
24
  this.metadataPath = path.join(storagePath, "index-metadata.json");
25
+ this.projectRoot = projectRoot || process.cwd();
26
+ this.projectId = this.generateProjectId(this.projectRoot);
21
27
  this.metadata = this.loadMetadata();
22
28
  }
29
+ /**
30
+ * Generates a unique project ID from the project root path
31
+ */
32
+ generateProjectId(projectRoot) {
33
+ return crypto.createHash("sha256").update(projectRoot).digest("hex").substring(0, 16);
34
+ }
23
35
  /**
24
36
  * Loads index metadata from disk
25
37
  */
@@ -31,7 +43,7 @@ export class IndexManager {
31
43
  }
32
44
  }
33
45
  catch (error) {
34
- console.error(`Warning: Could not load index metadata: ${error}`);
46
+ logger.warn(`Could not load index metadata: ${error}`);
35
47
  }
36
48
  return {
37
49
  version: "1.0",
@@ -51,7 +63,7 @@ export class IndexManager {
51
63
  fs.writeFileSync(this.metadataPath, JSON.stringify(this.metadata, null, 2));
52
64
  }
53
65
  catch (error) {
54
- console.error(`Warning: Could not save index metadata: ${error}`);
66
+ logger.warn(`Could not save index metadata: ${error}`);
55
67
  }
56
68
  }
57
69
  /**
@@ -73,14 +85,14 @@ export class IndexManager {
73
85
  /**
74
86
  * Indexes a single file
75
87
  */
76
- async indexFile(file, forceReindex = false) {
88
+ async indexFile(file, forceReindex = false, saveMetadata = true) {
77
89
  try {
78
90
  // Check if file needs reindexing
79
91
  if (!this.needsReindexing(file, forceReindex)) {
80
- console.error(`Skipping ${file.path} (no changes)`);
92
+ logger.debug(`Skipping ${file.path} (no changes)`);
81
93
  return { chunksCreated: 0 };
82
94
  }
83
- console.error(`Indexing: ${file.path}`);
95
+ logger.info(`Indexing: ${file.path}`);
84
96
  // Read file content
85
97
  const content = fs.readFileSync(file.absolutePath, "utf-8");
86
98
  // Get chunk size from environment or use defaults
@@ -95,20 +107,26 @@ export class IndexManager {
95
107
  chunkOverlap,
96
108
  });
97
109
  if (chunks.length === 0) {
98
- console.error(`Warning: No chunks created for ${file.path}`);
110
+ logger.warn(`No chunks created for ${file.path}`);
111
+ return { chunksCreated: 0 };
112
+ }
113
+ logger.debug(` Created ${chunks.length} chunks`);
114
+ // Filter out invalid chunks (fail-safe)
115
+ const validChunks = chunks.filter(c => c.content && c.content.trim().length > 0 && c.content.trim() !== "}");
116
+ if (validChunks.length === 0) {
117
+ logger.warn(`No valid chunks after filtering for ${file.path}`);
99
118
  return { chunksCreated: 0 };
100
119
  }
101
- console.error(` Created ${chunks.length} chunks`);
102
120
  // Generate embeddings
103
- const embeddingInputs = chunks.map((chunk) => ({
121
+ const embeddingInputs = validChunks.map((chunk) => ({
104
122
  id: chunk.id,
105
123
  content: chunk.content,
106
124
  }));
107
125
  const embeddings = await this.embeddingService.generateBatchEmbeddings(embeddingInputs);
108
- console.error(` Generated ${embeddings.length} embeddings`);
126
+ logger.debug(` Generated ${embeddings.length} embeddings`);
109
127
  // Prepare chunk records for storage
110
128
  const timestamp = Date.now();
111
- const chunkRecords = chunks.map((chunk, i) => ({
129
+ const chunkRecords = validChunks.map((chunk, i) => ({
112
130
  id: chunk.id,
113
131
  vector: embeddings[i].vector,
114
132
  filePath: chunk.filePath,
@@ -116,17 +134,18 @@ export class IndexManager {
116
134
  startLine: chunk.startLine,
117
135
  endLine: chunk.endLine,
118
136
  chunkType: chunk.chunkType,
119
- name: chunk.name,
137
+ name: chunk.name || "",
120
138
  language: chunk.language,
121
139
  fileHash: file.hash,
122
140
  timestamp,
123
141
  context: chunk.context,
142
+ projectId: this.projectId,
124
143
  }));
125
144
  // Delete old chunks for this file
126
- await this.vectorStore.deleteChunksByFile(file.path);
145
+ await this.vectorStore.deleteChunksByFile(file.path, this.projectId);
127
146
  // Insert new chunks
128
147
  await this.vectorStore.insertChunks(chunkRecords);
129
- console.error(` Stored ${chunkRecords.length} chunks in vector store`);
148
+ logger.debug(` Stored ${chunkRecords.length} chunks in vector store`);
130
149
  // Update metadata
131
150
  this.metadata.files[file.path] = {
132
151
  hash: file.hash,
@@ -134,12 +153,14 @@ export class IndexManager {
134
153
  chunkCount: chunks.length,
135
154
  };
136
155
  this.metadata.lastIndexed = timestamp;
137
- this.saveMetadata();
156
+ if (saveMetadata) {
157
+ this.saveMetadata();
158
+ }
138
159
  return { chunksCreated: chunks.length };
139
160
  }
140
161
  catch (error) {
141
162
  const errorMsg = `Error indexing ${file.path}: ${error}`;
142
- console.error(errorMsg);
163
+ logger.error(errorMsg);
143
164
  return { chunksCreated: 0, error: errorMsg };
144
165
  }
145
166
  }
@@ -148,19 +169,20 @@ export class IndexManager {
148
169
  */
149
170
  async indexFiles(options) {
150
171
  const startTime = Date.now();
151
- console.error(`\n=== Starting indexing process ===`);
152
- console.error(`Root path: ${options.rootPath}`);
153
- console.error(`Force reindex: ${options.forceReindex || false}`);
172
+ logger.info(`=== Starting indexing process ===`);
173
+ logger.info(`Root path: ${options.rootPath}`);
174
+ logger.info(`Force reindex: ${options.forceReindex || false}`);
154
175
  // Initialize vector store
155
176
  await this.vectorStore.initialize();
156
177
  // Scan files
157
- console.error(`\nScanning files...`);
158
- const files = scanFiles({
178
+ logger.info(`Scanning files...`);
179
+ const files = await scanFiles({
159
180
  rootPath: options.rootPath,
181
+ projectRoot: options.projectRoot,
160
182
  recursive: options.recursive !== undefined ? options.recursive : true,
161
183
  });
162
184
  if (files.length === 0) {
163
- console.error("No files found to index");
185
+ logger.warn("No files found to index");
164
186
  return {
165
187
  filesProcessed: 0,
166
188
  chunksCreated: 0,
@@ -170,9 +192,9 @@ export class IndexManager {
170
192
  }
171
193
  // Filter files that need reindexing
172
194
  const filesToIndex = files.filter((file) => this.needsReindexing(file, options.forceReindex || false));
173
- console.error(`\nFound ${files.length} files, ${filesToIndex.length} need indexing`);
195
+ logger.info(`Found ${files.length} files, ${filesToIndex.length} need indexing`);
174
196
  if (filesToIndex.length === 0) {
175
- console.error("All files are up to date");
197
+ logger.info("All files are up to date");
176
198
  return {
177
199
  filesProcessed: 0,
178
200
  chunksCreated: 0,
@@ -180,28 +202,45 @@ export class IndexManager {
180
202
  duration: Date.now() - startTime,
181
203
  };
182
204
  }
183
- // Index files
205
+ // Index files in batches
184
206
  const errors = [];
185
207
  let totalChunks = 0;
186
208
  let processedFiles = 0;
187
- for (let i = 0; i < filesToIndex.length; i++) {
188
- const file = filesToIndex[i];
189
- console.error(`\n[${i + 1}/${filesToIndex.length}] Processing ${file.path}`);
190
- const result = await this.indexFile(file, options.forceReindex || false);
191
- if (result.error) {
192
- errors.push(result.error);
209
+ const batchSize = 5; // Concurrency limit
210
+ for (let i = 0; i < filesToIndex.length; i += batchSize) {
211
+ const batch = filesToIndex.slice(i, i + batchSize);
212
+ const batchNum = Math.floor(i / batchSize) + 1;
213
+ const totalBatches = Math.ceil(filesToIndex.length / batchSize);
214
+ logger.info(`Processing batch ${batchNum}/${totalBatches} (${batch.length} files)`);
215
+ const batchPromises = batch.map(async (file, index) => {
216
+ logger.debug(`[${i + index + 1}/${filesToIndex.length}] Processing ${file.path}`);
217
+ return this.indexFile(file, options.forceReindex || false, false); // Don't save metadata per file
218
+ });
219
+ const results = await Promise.all(batchPromises);
220
+ // Process results
221
+ for (const result of results) {
222
+ if (result.error) {
223
+ errors.push(result.error);
224
+ }
225
+ else {
226
+ processedFiles++;
227
+ totalChunks += result.chunksCreated;
228
+ }
193
229
  }
194
- else {
195
- processedFiles++;
196
- totalChunks += result.chunksCreated;
230
+ // Save metadata and embedding cache after each batch
231
+ this.saveMetadata();
232
+ this.embeddingService.saveCache();
233
+ // Small delay between batches
234
+ if (i + batchSize < filesToIndex.length) {
235
+ await new Promise(resolve => setTimeout(resolve, 100));
197
236
  }
198
237
  }
199
238
  const duration = Date.now() - startTime;
200
- console.error(`\n=== Indexing complete ===`);
201
- console.error(`Files processed: ${processedFiles}`);
202
- console.error(`Chunks created: ${totalChunks}`);
203
- console.error(`Errors: ${errors.length}`);
204
- console.error(`Duration: ${(duration / 1000).toFixed(2)}s`);
239
+ logger.info(`=== Indexing complete ===`);
240
+ logger.info(`Files processed: ${processedFiles}`);
241
+ logger.info(`Chunks created: ${totalChunks}`);
242
+ logger.info(`Errors: ${errors.length}`);
243
+ logger.info(`Duration: ${(duration / 1000).toFixed(2)}s`);
205
244
  return {
206
245
  filesProcessed: processedFiles,
207
246
  chunksCreated: totalChunks,
@@ -212,10 +251,10 @@ export class IndexManager {
212
251
  /**
213
252
  * Re-indexes a specific file by path
214
253
  */
215
- async reindexFile(filePath, rootPath) {
254
+ async reindexFile(filePath, rootPath, projectRoot) {
216
255
  try {
217
256
  // Scan the specific file
218
- const file = scanSingleFile(filePath, rootPath);
257
+ const file = await scanSingleFile(filePath, rootPath, projectRoot);
219
258
  if (!file) {
220
259
  return {
221
260
  success: false,
@@ -310,23 +349,23 @@ export class IndexManager {
310
349
  this.saveMetadata();
311
350
  // Clear embedding cache
312
351
  this.embeddingService.clearCache();
313
- console.error("Index cleared");
352
+ logger.info("Index cleared");
314
353
  }
315
354
  /**
316
355
  * Removes a file from the index
317
356
  */
318
357
  async removeFile(filePath) {
319
358
  await this.vectorStore.initialize();
320
- await this.vectorStore.deleteChunksByFile(filePath);
359
+ await this.vectorStore.deleteChunksByFile(filePath, this.projectId);
321
360
  delete this.metadata.files[filePath];
322
361
  this.saveMetadata();
323
- console.error(`Removed ${filePath} from index`);
362
+ logger.info(`Removed ${filePath} from index`);
324
363
  }
325
364
  }
326
365
  /**
327
366
  * Creates an index manager from environment variables
328
367
  */
329
- export function createIndexManager(embeddingService, vectorStore) {
368
+ export function createIndexManager(embeddingService, vectorStore, workspaceRoot) {
330
369
  const storagePath = process.env.MEMORYBANK_STORAGE_PATH || ".memorybank";
331
- return new IndexManager(embeddingService, vectorStore, storagePath);
370
+ return new IndexManager(embeddingService, vectorStore, storagePath, workspaceRoot);
332
371
  }