@zilliz/claude-context-core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +275 -0
  3. package/dist/context.d.ts +234 -0
  4. package/dist/context.d.ts.map +1 -0
  5. package/dist/context.js +879 -0
  6. package/dist/context.js.map +1 -0
  7. package/dist/embedding/base-embedding.d.ts +45 -0
  8. package/dist/embedding/base-embedding.d.ts.map +1 -0
  9. package/dist/embedding/base-embedding.js +36 -0
  10. package/dist/embedding/base-embedding.js.map +1 -0
  11. package/dist/embedding/gemini-embedding.d.ts +51 -0
  12. package/dist/embedding/gemini-embedding.d.ts.map +1 -0
  13. package/dist/embedding/gemini-embedding.js +143 -0
  14. package/dist/embedding/gemini-embedding.js.map +1 -0
  15. package/dist/embedding/index.d.ts +6 -0
  16. package/dist/embedding/index.d.ts.map +1 -0
  17. package/dist/embedding/index.js +24 -0
  18. package/dist/embedding/index.js.map +1 -0
  19. package/dist/embedding/ollama-embedding.d.ts +64 -0
  20. package/dist/embedding/ollama-embedding.d.ts.map +1 -0
  21. package/dist/embedding/ollama-embedding.js +205 -0
  22. package/dist/embedding/ollama-embedding.js.map +1 -0
  23. package/dist/embedding/openai-embedding.d.ts +36 -0
  24. package/dist/embedding/openai-embedding.d.ts.map +1 -0
  25. package/dist/embedding/openai-embedding.js +103 -0
  26. package/dist/embedding/openai-embedding.js.map +1 -0
  27. package/dist/embedding/voyageai-embedding.d.ts +43 -0
  28. package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
  29. package/dist/embedding/voyageai-embedding.js +223 -0
  30. package/dist/embedding/voyageai-embedding.js.map +1 -0
  31. package/dist/index.d.ts +8 -0
  32. package/dist/index.d.ts.map +1 -0
  33. package/dist/index.js +24 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/splitter/ast-splitter.d.ts +22 -0
  36. package/dist/splitter/ast-splitter.d.ts.map +1 -0
  37. package/dist/splitter/ast-splitter.js +227 -0
  38. package/dist/splitter/ast-splitter.js.map +1 -0
  39. package/dist/splitter/index.d.ts +41 -0
  40. package/dist/splitter/index.d.ts.map +1 -0
  41. package/dist/splitter/index.js +27 -0
  42. package/dist/splitter/index.js.map +1 -0
  43. package/dist/splitter/langchain-splitter.d.ts +13 -0
  44. package/dist/splitter/langchain-splitter.d.ts.map +1 -0
  45. package/dist/splitter/langchain-splitter.js +118 -0
  46. package/dist/splitter/langchain-splitter.js.map +1 -0
  47. package/dist/sync/merkle.d.ts +26 -0
  48. package/dist/sync/merkle.d.ts.map +1 -0
  49. package/dist/sync/merkle.js +112 -0
  50. package/dist/sync/merkle.js.map +1 -0
  51. package/dist/sync/synchronizer.d.ts +30 -0
  52. package/dist/sync/synchronizer.d.ts.map +1 -0
  53. package/dist/sync/synchronizer.js +339 -0
  54. package/dist/sync/synchronizer.js.map +1 -0
  55. package/dist/types.d.ts +14 -0
  56. package/dist/types.d.ts.map +1 -0
  57. package/dist/types.js +3 -0
  58. package/dist/types.js.map +1 -0
  59. package/dist/utils/env-manager.d.ts +19 -0
  60. package/dist/utils/env-manager.d.ts.map +1 -0
  61. package/dist/utils/env-manager.js +125 -0
  62. package/dist/utils/env-manager.js.map +1 -0
  63. package/dist/utils/index.d.ts +2 -0
  64. package/dist/utils/index.d.ts.map +1 -0
  65. package/dist/utils/index.js +7 -0
  66. package/dist/utils/index.js.map +1 -0
  67. package/dist/vectordb/index.d.ts +5 -0
  68. package/dist/vectordb/index.d.ts.map +1 -0
  69. package/dist/vectordb/index.js +14 -0
  70. package/dist/vectordb/index.js.map +1 -0
  71. package/dist/vectordb/milvus-restful-vectordb.d.ts +51 -0
  72. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
  73. package/dist/vectordb/milvus-restful-vectordb.js +406 -0
  74. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
  75. package/dist/vectordb/milvus-vectordb.d.ts +34 -0
  76. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
  77. package/dist/vectordb/milvus-vectordb.js +248 -0
  78. package/dist/vectordb/milvus-vectordb.js.map +1 -0
  79. package/dist/vectordb/types.d.ts +75 -0
  80. package/dist/vectordb/types.d.ts.map +1 -0
  81. package/dist/vectordb/types.js +9 -0
  82. package/dist/vectordb/types.js.map +1 -0
  83. package/dist/vectordb/zilliz-utils.d.ts +135 -0
  84. package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
  85. package/dist/vectordb/zilliz-utils.js +192 -0
  86. package/dist/vectordb/zilliz-utils.js.map +1 -0
  87. package/package.json +56 -0
@@ -0,0 +1,879 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.CodeContext = void 0;
37
+ const splitter_1 = require("./splitter");
38
+ const embedding_1 = require("./embedding");
39
+ const env_manager_1 = require("./utils/env-manager");
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const crypto = __importStar(require("crypto"));
43
+ const synchronizer_1 = require("./sync/synchronizer");
44
+ const DEFAULT_SUPPORTED_EXTENSIONS = [
45
+ // Programming languages
46
+ '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
47
+ '.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
48
+ // Text and markup files
49
+ '.md', '.markdown', '.ipynb',
50
+ // '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
51
+ // '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
52
+ ];
53
+ const DEFAULT_IGNORE_PATTERNS = [
54
+ // Common build output and dependency directories
55
+ 'node_modules/**',
56
+ 'dist/**',
57
+ 'build/**',
58
+ 'out/**',
59
+ 'target/**',
60
+ 'coverage/**',
61
+ '.nyc_output/**',
62
+ // IDE and editor files
63
+ '.vscode/**',
64
+ '.idea/**',
65
+ '*.swp',
66
+ '*.swo',
67
+ // Version control
68
+ '.git/**',
69
+ '.svn/**',
70
+ '.hg/**',
71
+ // Cache directories
72
+ '.cache/**',
73
+ '__pycache__/**',
74
+ '.pytest_cache/**',
75
+ // Logs and temporary files
76
+ 'logs/**',
77
+ 'tmp/**',
78
+ 'temp/**',
79
+ '*.log',
80
+ // Environment and config files
81
+ '.env',
82
+ '.env.*',
83
+ '*.local',
84
+ // Minified and bundled files
85
+ '*.min.js',
86
+ '*.min.css',
87
+ '*.min.map',
88
+ '*.bundle.js',
89
+ '*.bundle.css',
90
+ '*.chunk.js',
91
+ '*.vendor.js',
92
+ '*.polyfills.js',
93
+ '*.runtime.js',
94
+ '*.map', // source map files
95
+ 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
96
+ 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
97
+ 'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
98
+ ];
99
+ class CodeContext {
100
+ constructor(config = {}) {
101
+ this.synchronizers = new Map();
102
+ // Initialize services
103
+ this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
104
+ apiKey: env_manager_1.envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
105
+ model: 'text-embedding-3-small',
106
+ ...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') })
107
+ });
108
+ if (!config.vectorDatabase) {
109
+ throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.');
110
+ }
111
+ this.vectorDatabase = config.vectorDatabase;
112
+ this.codeSplitter = config.codeSplitter || new splitter_1.AstCodeSplitter(2500, 300);
113
+ // Load custom extensions from environment variables
114
+ const envCustomExtensions = this.getCustomExtensionsFromEnv();
115
+ // Combine default extensions with config extensions and env extensions
116
+ const allSupportedExtensions = [
117
+ ...DEFAULT_SUPPORTED_EXTENSIONS,
118
+ ...(config.supportedExtensions || []),
119
+ ...(config.customExtensions || []),
120
+ ...envCustomExtensions
121
+ ];
122
+ // Remove duplicates
123
+ this.supportedExtensions = [...new Set(allSupportedExtensions)];
124
+ // Load custom ignore patterns from environment variables
125
+ const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
126
+ // Start with default ignore patterns
127
+ const allIgnorePatterns = [
128
+ ...DEFAULT_IGNORE_PATTERNS,
129
+ ...(config.ignorePatterns || []),
130
+ ...(config.customIgnorePatterns || []),
131
+ ...envCustomIgnorePatterns
132
+ ];
133
+ // Remove duplicates
134
+ this.ignorePatterns = [...new Set(allIgnorePatterns)];
135
+ console.log(`🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
136
+ if (envCustomExtensions.length > 0) {
137
+ console.log(`📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
138
+ }
139
+ if (envCustomIgnorePatterns.length > 0) {
140
+ console.log(`🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`);
141
+ }
142
+ }
143
+ /**
144
+ * Generate collection name based on codebase path
145
+ */
146
+ getCollectionName(codebasePath) {
147
+ const normalizedPath = path.resolve(codebasePath);
148
+ const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
149
+ return `code_chunks_${hash.substring(0, 8)}`;
150
+ }
151
+ /**
152
+ * Index entire codebase
153
+ * @param codebasePath Codebase path
154
+ * @param progressCallback Optional progress callback function
155
+ * @returns Indexing statistics
156
+ */
157
+ async indexCodebase(codebasePath, progressCallback) {
158
+ console.log(`🚀 Starting to index codebase: ${codebasePath}`);
159
+ // 1. Load ignore patterns from various ignore files
160
+ await this.loadGitignorePatterns(codebasePath);
161
+ // 2. Check and prepare vector collection
162
+ progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
163
+ console.log(`Debug2: Preparing vector collection for codebase`);
164
+ await this.prepareCollection(codebasePath);
165
+ // 3. Recursively traverse codebase to get all supported files
166
+ progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
167
+ const codeFiles = await this.getCodeFiles(codebasePath);
168
+ console.log(`📁 Found ${codeFiles.length} code files`);
169
+ if (codeFiles.length === 0) {
170
+ progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 });
171
+ return { indexedFiles: 0, totalChunks: 0, status: 'completed' };
172
+ }
173
+ // 3. Process each file with streaming chunk processing
174
+ // Reserve 10% for preparation, 90% for actual indexing
175
+ const indexingStartPercentage = 10;
176
+ const indexingEndPercentage = 100;
177
+ const indexingRange = indexingEndPercentage - indexingStartPercentage;
178
+ const result = await this.processFileList(codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => {
179
+ // Calculate progress percentage
180
+ const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange;
181
+ console.log(`📊 Processed ${fileIndex}/${totalFiles} files`);
182
+ progressCallback?.({
183
+ phase: `Processing files (${fileIndex}/${totalFiles})...`,
184
+ current: fileIndex,
185
+ total: totalFiles,
186
+ percentage: Math.round(progressPercentage)
187
+ });
188
+ });
189
+ console.log(`✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
190
+ progressCallback?.({
191
+ phase: 'Indexing complete!',
192
+ current: result.processedFiles,
193
+ total: codeFiles.length,
194
+ percentage: 100
195
+ });
196
+ return {
197
+ indexedFiles: result.processedFiles,
198
+ totalChunks: result.totalChunks,
199
+ status: result.status
200
+ };
201
+ }
202
+ async reindexByChange(codebasePath, progressCallback) {
203
+ const collectionName = this.getCollectionName(codebasePath);
204
+ const synchronizer = this.synchronizers.get(collectionName);
205
+ if (!synchronizer) {
206
+ // To be safe, let's initialize if it's not there.
207
+ const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
208
+ await newSynchronizer.initialize();
209
+ this.synchronizers.set(collectionName, newSynchronizer);
210
+ }
211
+ const currentSynchronizer = this.synchronizers.get(collectionName);
212
+ progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 });
213
+ const { added, removed, modified } = await currentSynchronizer.checkForChanges();
214
+ const totalChanges = added.length + removed.length + modified.length;
215
+ if (totalChanges === 0) {
216
+ progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 });
217
+ console.log('✅ No file changes detected.');
218
+ return { added: 0, removed: 0, modified: 0 };
219
+ }
220
+ console.log(`🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
221
+ let processedChanges = 0;
222
+ const updateProgress = (phase) => {
223
+ processedChanges++;
224
+ const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100);
225
+ progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage });
226
+ };
227
+ // Handle removed files
228
+ for (const file of removed) {
229
+ await this.deleteFileChunks(collectionName, file);
230
+ updateProgress(`Removed ${file}`);
231
+ }
232
+ // Handle modified files
233
+ for (const file of modified) {
234
+ await this.deleteFileChunks(collectionName, file);
235
+ updateProgress(`Deleted old chunks for ${file}`);
236
+ }
237
+ // Handle added and modified files
238
+ const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f));
239
+ if (filesToIndex.length > 0) {
240
+ await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
241
+ updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
242
+ });
243
+ }
244
+ console.log(`✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
245
+ progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 });
246
+ return { added: added.length, removed: removed.length, modified: modified.length };
247
+ }
248
+ async deleteFileChunks(collectionName, relativePath) {
249
+ const results = await this.vectorDatabase.query(collectionName, `relativePath == "${relativePath}"`, ['id']);
250
+ if (results.length > 0) {
251
+ const ids = results.map(r => r.id).filter(id => id);
252
+ if (ids.length > 0) {
253
+ await this.vectorDatabase.delete(collectionName, ids);
254
+ console.log(`Deleted ${ids.length} chunks for file ${relativePath}`);
255
+ }
256
+ }
257
+ }
258
+ /**
259
+ * Semantic search
260
+ * @param codebasePath Codebase path to search in
261
+ * @param query Search query
262
+ * @param topK Number of results to return
263
+ * @param threshold Similarity threshold
264
+ */
265
+ async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5) {
266
+ console.log(`🔍 Executing semantic search: "${query}" in ${codebasePath}`);
267
+ // 1. Generate query vector
268
+ const queryEmbedding = await this.embedding.embed(query);
269
+ // 2. Search in vector database
270
+ const searchResults = await this.vectorDatabase.search(this.getCollectionName(codebasePath), queryEmbedding.vector, { topK, threshold });
271
+ // 3. Convert to semantic search result format
272
+ const results = searchResults.map(result => ({
273
+ content: result.document.content,
274
+ relativePath: result.document.relativePath,
275
+ startLine: result.document.startLine,
276
+ endLine: result.document.endLine,
277
+ language: result.document.metadata.language || 'unknown',
278
+ score: result.score
279
+ }));
280
+ console.log(`✅ Found ${results.length} relevant results`);
281
+ return results;
282
+ }
283
+ /**
284
+ * Check if index exists for codebase
285
+ * @param codebasePath Codebase path to check
286
+ * @returns Whether index exists
287
+ */
288
+ async hasIndex(codebasePath) {
289
+ const collectionName = this.getCollectionName(codebasePath);
290
+ return await this.vectorDatabase.hasCollection(collectionName);
291
+ }
292
+ /**
293
+ * Clear index
294
+ * @param codebasePath Codebase path to clear index for
295
+ * @param progressCallback Optional progress callback function
296
+ */
297
+ async clearIndex(codebasePath, progressCallback) {
298
+ console.log(`🧹 Cleaning index data for ${codebasePath}...`);
299
+ progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });
300
+ const collectionName = this.getCollectionName(codebasePath);
301
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
302
+ progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
303
+ if (collectionExists) {
304
+ await this.vectorDatabase.dropCollection(collectionName);
305
+ }
306
+ // Delete snapshot file
307
+ await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
308
+ progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 });
309
+ console.log('✅ Index data cleaned');
310
+ }
311
+ /**
312
+ * Update ignore patterns (merges with default patterns and existing patterns)
313
+ * @param ignorePatterns Array of ignore patterns to add to defaults
314
+ */
315
+ updateIgnorePatterns(ignorePatterns) {
316
+ // Merge with default patterns and any existing custom patterns, avoiding duplicates
317
+ const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
318
+ const uniquePatterns = [];
319
+ const patternSet = new Set(mergedPatterns);
320
+ patternSet.forEach(pattern => uniquePatterns.push(pattern));
321
+ this.ignorePatterns = uniquePatterns;
322
+ console.log(`🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
323
+ }
324
+ /**
325
+ * Add custom ignore patterns (from MCP or other sources) without replacing existing ones
326
+ * @param customPatterns Array of custom ignore patterns to add
327
+ */
328
+ addCustomIgnorePatterns(customPatterns) {
329
+ if (customPatterns.length === 0)
330
+ return;
331
+ // Merge current patterns with new custom patterns, avoiding duplicates
332
+ const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
333
+ const uniquePatterns = [];
334
+ const patternSet = new Set(mergedPatterns);
335
+ patternSet.forEach(pattern => uniquePatterns.push(pattern));
336
+ this.ignorePatterns = uniquePatterns;
337
+ console.log(`🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
338
+ }
339
+ /**
340
+ * Reset ignore patterns to defaults only
341
+ */
342
+ resetIgnorePatternsToDefaults() {
343
+ this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS];
344
+ console.log(`🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
345
+ }
346
+ /**
347
+ * Update embedding instance
348
+ * @param embedding New embedding instance
349
+ */
350
+ updateEmbedding(embedding) {
351
+ this.embedding = embedding;
352
+ console.log(`🔄 Updated embedding provider: ${embedding.getProvider()}`);
353
+ }
354
+ /**
355
+ * Update vector database instance
356
+ * @param vectorDatabase New vector database instance
357
+ */
358
+ updateVectorDatabase(vectorDatabase) {
359
+ this.vectorDatabase = vectorDatabase;
360
+ console.log(`🔄 Updated vector database`);
361
+ }
362
+ /**
363
+ * Update splitter instance
364
+ * @param splitter New splitter instance
365
+ */
366
+ updateSplitter(splitter) {
367
+ this.codeSplitter = splitter;
368
+ console.log(`🔄 Updated splitter instance`);
369
+ }
370
+ /**
371
+ * Prepare vector collection
372
+ */
373
+ async prepareCollection(codebasePath) {
374
+ // Create new collection
375
+ console.log(`🔧 Preparing vector collection for codebase: ${codebasePath}`);
376
+ const collectionName = this.getCollectionName(codebasePath);
377
+ // For Ollama embeddings, ensure dimension is detected before creating collection
378
+ if (this.embedding.getProvider() === 'Ollama' && typeof this.embedding.initializeDimension === 'function') {
379
+ await this.embedding.initializeDimension();
380
+ }
381
+ const dimension = this.embedding.getDimension();
382
+ const dirName = path.basename(codebasePath);
383
+ await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
384
+ console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
385
+ }
386
+ /**
387
+ * Recursively get all code files in the codebase
388
+ */
389
+ async getCodeFiles(codebasePath) {
390
+ const files = [];
391
+ const traverseDirectory = async (currentPath) => {
392
+ const entries = await fs.promises.readdir(currentPath, { withFileTypes: true });
393
+ for (const entry of entries) {
394
+ const fullPath = path.join(currentPath, entry.name);
395
+ // Check if path matches ignore patterns
396
+ if (this.matchesIgnorePattern(fullPath, codebasePath)) {
397
+ continue;
398
+ }
399
+ if (entry.isDirectory()) {
400
+ await traverseDirectory(fullPath);
401
+ }
402
+ else if (entry.isFile()) {
403
+ const ext = path.extname(entry.name);
404
+ if (this.supportedExtensions.includes(ext)) {
405
+ files.push(fullPath);
406
+ }
407
+ }
408
+ }
409
+ };
410
+ await traverseDirectory(codebasePath);
411
+ return files;
412
+ }
413
+ /**
414
+ * Process a list of files with streaming chunk processing
415
+ * @param filePaths Array of file paths to process
416
+ * @param codebasePath Base path for the codebase
417
+ * @param onFileProcessed Callback called when each file is processed
418
+ * @returns Object with processed file count and total chunk count
419
+ */
420
+ async processFileList(filePaths, codebasePath, onFileProcessed) {
421
+ const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
422
+ const CHUNK_LIMIT = 450000;
423
+ console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
424
+ let chunkBuffer = [];
425
+ let processedFiles = 0;
426
+ let totalChunks = 0;
427
+ let limitReached = false;
428
+ for (let i = 0; i < filePaths.length; i++) {
429
+ const filePath = filePaths[i];
430
+ try {
431
+ const content = await fs.promises.readFile(filePath, 'utf-8');
432
+ const language = this.getLanguageFromExtension(path.extname(filePath));
433
+ const chunks = await this.codeSplitter.split(content, language, filePath);
434
+ // Log files with many chunks or large content
435
+ if (chunks.length > 50) {
436
+ console.warn(`⚠️ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`);
437
+ }
438
+ else if (content.length > 100000) {
439
+ console.log(`📄 Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`);
440
+ }
441
+ // Add chunks to buffer
442
+ for (const chunk of chunks) {
443
+ chunkBuffer.push({ chunk, codebasePath });
444
+ totalChunks++;
445
+ // Process batch when buffer reaches EMBEDDING_BATCH_SIZE
446
+ if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) {
447
+ try {
448
+ await this.processChunkBuffer(chunkBuffer);
449
+ }
450
+ catch (error) {
451
+ // TODO:
452
+ console.error(`❌ Failed to process chunk batch: ${error}`);
453
+ }
454
+ finally {
455
+ chunkBuffer = []; // Always clear buffer, even on failure
456
+ }
457
+ }
458
+ // Check if chunk limit is reached
459
+ if (totalChunks >= CHUNK_LIMIT) {
460
+ console.warn(`⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`);
461
+ limitReached = true;
462
+ break; // Exit the inner loop (over chunks)
463
+ }
464
+ }
465
+ processedFiles++;
466
+ onFileProcessed?.(filePath, i + 1, filePaths.length);
467
+ if (limitReached) {
468
+ break; // Exit the outer loop (over files)
469
+ }
470
+ }
471
+ catch (error) {
472
+ console.warn(`⚠️ Skipping file ${filePath}: ${error}`);
473
+ }
474
+ }
475
+ // Process any remaining chunks in the buffer
476
+ if (chunkBuffer.length > 0) {
477
+ console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks`);
478
+ try {
479
+ await this.processChunkBuffer(chunkBuffer);
480
+ }
481
+ catch (error) {
482
+ console.error(`❌ Failed to process final chunk batch: ${error}`);
483
+ }
484
+ }
485
+ return {
486
+ processedFiles,
487
+ totalChunks,
488
+ status: limitReached ? 'limit_reached' : 'completed'
489
+ };
490
+ }
491
+ /**
492
+ * Process accumulated chunk buffer
493
+ */
494
+ async processChunkBuffer(chunkBuffer) {
495
+ if (chunkBuffer.length === 0)
496
+ return;
497
+ // Extract chunks and ensure they all have the same codebasePath
498
+ const chunks = chunkBuffer.map(item => item.chunk);
499
+ const codebasePath = chunkBuffer[0].codebasePath;
500
+ // Estimate tokens (rough estimation: 1 token ≈ 4 characters)
501
+ const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
502
+ console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens)`);
503
+ await this.processChunkBatch(chunks, codebasePath);
504
+ }
505
+ /**
506
+ * Process a batch of chunks
507
+ */
508
+ async processChunkBatch(chunks, codebasePath) {
509
+ // Generate embedding vectors
510
+ const chunkContents = chunks.map(chunk => chunk.content);
511
+ const embeddings = await this.embedding.embedBatch(chunkContents);
512
+ // Prepare vector documents
513
+ const documents = chunks.map((chunk, index) => {
514
+ if (!chunk.metadata.filePath) {
515
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
516
+ }
517
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
518
+ const fileExtension = path.extname(chunk.metadata.filePath);
519
+ // Extract metadata that should be stored separately
520
+ const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
521
+ return {
522
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
523
+ vector: embeddings[index].vector,
524
+ content: chunk.content,
525
+ relativePath,
526
+ startLine: chunk.metadata.startLine || 0,
527
+ endLine: chunk.metadata.endLine || 0,
528
+ fileExtension,
529
+ metadata: {
530
+ ...restMetadata,
531
+ codebasePath,
532
+ language: chunk.metadata.language || 'unknown',
533
+ chunkIndex: index
534
+ }
535
+ };
536
+ });
537
+ // Store to vector database
538
+ await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
539
+ }
540
+ /**
541
+ * Get programming language based on file extension
542
+ */
543
+ getLanguageFromExtension(ext) {
544
+ const languageMap = {
545
+ '.ts': 'typescript',
546
+ '.tsx': 'typescript',
547
+ '.js': 'javascript',
548
+ '.jsx': 'javascript',
549
+ '.py': 'python',
550
+ '.java': 'java',
551
+ '.cpp': 'cpp',
552
+ '.c': 'c',
553
+ '.h': 'c',
554
+ '.hpp': 'cpp',
555
+ '.cs': 'csharp',
556
+ '.go': 'go',
557
+ '.rs': 'rust',
558
+ '.php': 'php',
559
+ '.rb': 'ruby',
560
+ '.swift': 'swift',
561
+ '.kt': 'kotlin',
562
+ '.scala': 'scala',
563
+ '.m': 'objective-c',
564
+ '.mm': 'objective-c',
565
+ '.ipynb': 'jupyter'
566
+ };
567
+ return languageMap[ext] || 'text';
568
+ }
569
+ /**
570
+ * Generate unique ID based on chunk content and location
571
+ * @param relativePath Relative path to the file
572
+ * @param startLine Start line number
573
+ * @param endLine End line number
574
+ * @param content Chunk content
575
+ * @returns Hash-based unique ID
576
+ */
577
+ generateId(relativePath, startLine, endLine, content) {
578
+ const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`;
579
+ const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex');
580
+ return `chunk_${hash.substring(0, 16)}`;
581
+ }
582
+ /**
583
+ * Read ignore patterns from file (e.g., .gitignore)
584
+ * @param filePath Path to the ignore file
585
+ * @returns Array of ignore patterns
586
+ */
587
+ static async getIgnorePatternsFromFile(filePath) {
588
+ try {
589
+ const content = await fs.promises.readFile(filePath, 'utf-8');
590
+ return content
591
+ .split('\n')
592
+ .map(line => line.trim())
593
+ .filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments
594
+ }
595
+ catch (error) {
596
+ console.warn(`⚠️ Could not read ignore file ${filePath}: ${error}`);
597
+ return [];
598
+ }
599
+ }
600
+ /**
601
+ * Load ignore patterns from various ignore files in the codebase
602
+ * This method preserves any existing custom patterns that were added before
603
+ * @param codebasePath Path to the codebase
604
+ */
605
+ async loadGitignorePatterns(codebasePath) {
606
+ try {
607
+ let fileBasedPatterns = [];
608
+ // 1. Load .gitignore
609
+ const gitignorePath = path.join(codebasePath, '.gitignore');
610
+ const gitignorePatterns = await this.loadIgnoreFile(gitignorePath, '.gitignore');
611
+ fileBasedPatterns.push(...gitignorePatterns);
612
+ // 2. Load all .xxxignore files in codebase directory
613
+ const ignoreFiles = await this.findIgnoreFiles(codebasePath);
614
+ for (const ignoreFile of ignoreFiles) {
615
+ const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
616
+ fileBasedPatterns.push(...patterns);
617
+ }
618
+ // 3. Load global ~/.codecontext/.codecontextignore
619
+ const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
620
+ fileBasedPatterns.push(...globalIgnorePatterns);
621
+ // 4. Merge file-based patterns with existing patterns (which may include custom MCP patterns)
622
+ if (fileBasedPatterns.length > 0) {
623
+ this.addCustomIgnorePatterns(fileBasedPatterns);
624
+ console.log(`🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
625
+ }
626
+ else {
627
+ console.log('📄 No ignore files found, keeping existing patterns');
628
+ }
629
+ }
630
+ catch (error) {
631
+ console.warn(`⚠️ Failed to load ignore patterns: ${error}`);
632
+ // Continue with existing patterns on error - don't reset them
633
+ }
634
+ }
635
+ /**
636
+ * Find all .xxxignore files in the codebase directory (excluding .gitignore as it's handled separately)
637
+ * @param codebasePath Path to the codebase
638
+ * @returns Array of ignore file paths
639
+ */
640
+ async findIgnoreFiles(codebasePath) {
641
+ try {
642
+ const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true });
643
+ const ignoreFiles = [];
644
+ for (const entry of entries) {
645
+ if (entry.isFile() &&
646
+ entry.name.startsWith('.') &&
647
+ entry.name.endsWith('ignore') &&
648
+ entry.name !== '.gitignore') { // Exclude .gitignore as it's handled separately
649
+ ignoreFiles.push(path.join(codebasePath, entry.name));
650
+ }
651
+ }
652
+ if (ignoreFiles.length > 0) {
653
+ console.log(`📄 Found additional ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
654
+ }
655
+ return ignoreFiles;
656
+ }
657
+ catch (error) {
658
+ console.warn(`⚠️ Failed to scan for ignore files: ${error}`);
659
+ return [];
660
+ }
661
+ }
662
+ /**
663
+ * Load global ignore file from ~/.codecontext/.codecontextignore
664
+ * @returns Array of ignore patterns
665
+ */
666
+ async loadGlobalIgnoreFile() {
667
+ try {
668
+ const homeDir = require('os').homedir();
669
+ const globalIgnorePath = path.join(homeDir, '.codecontext', '.codecontextignore');
670
+ return await this.loadIgnoreFile(globalIgnorePath, 'global .codecontextignore');
671
+ }
672
+ catch (error) {
673
+ // Global ignore file is optional, don't log warnings
674
+ return [];
675
+ }
676
+ }
677
+ /**
678
+ * Load ignore patterns from a specific ignore file
679
+ * @param filePath Path to the ignore file
680
+ * @param fileName Display name for logging
681
+ * @returns Array of ignore patterns
682
+ */
683
+ async loadIgnoreFile(filePath, fileName) {
684
+ try {
685
+ await fs.promises.access(filePath);
686
+ console.log(`📄 Found ${fileName} file at: ${filePath}`);
687
+ const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(filePath);
688
+ if (ignorePatterns.length > 0) {
689
+ console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`);
690
+ return ignorePatterns;
691
+ }
692
+ else {
693
+ console.log(`📄 ${fileName} file found but no valid patterns detected`);
694
+ return [];
695
+ }
696
+ }
697
+ catch (error) {
698
+ if (fileName === '.gitignore' || fileName.includes('global')) {
699
+ console.log(`📄 No ${fileName} file found`);
700
+ }
701
+ return [];
702
+ }
703
+ }
704
+ /**
705
+ * Check if a path matches any ignore pattern
706
+ * @param filePath Path to check
707
+ * @param basePath Base path for relative pattern matching
708
+ * @returns True if path should be ignored
709
+ */
710
+ matchesIgnorePattern(filePath, basePath) {
711
+ if (this.ignorePatterns.length === 0) {
712
+ return false;
713
+ }
714
+ const relativePath = path.relative(basePath, filePath);
715
+ const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators
716
+ for (const pattern of this.ignorePatterns) {
717
+ if (this.isPatternMatch(normalizedPath, pattern)) {
718
+ return true;
719
+ }
720
+ }
721
+ return false;
722
+ }
723
+ /**
724
+ * Simple glob pattern matching
725
+ * @param filePath File path to test
726
+ * @param pattern Glob pattern
727
+ * @returns True if pattern matches
728
+ */
729
+ isPatternMatch(filePath, pattern) {
730
+ // Handle directory patterns (ending with /)
731
+ if (pattern.endsWith('/')) {
732
+ const dirPattern = pattern.slice(0, -1);
733
+ const pathParts = filePath.split('/');
734
+ return pathParts.some(part => this.simpleGlobMatch(part, dirPattern));
735
+ }
736
+ // Handle file patterns
737
+ if (pattern.includes('/')) {
738
+ // Pattern with path separator - match exact path
739
+ return this.simpleGlobMatch(filePath, pattern);
740
+ }
741
+ else {
742
+ // Pattern without path separator - match filename in any directory
743
+ const fileName = path.basename(filePath);
744
+ return this.simpleGlobMatch(fileName, pattern);
745
+ }
746
+ }
747
+ /**
748
+ * Simple glob matching supporting * wildcard
749
+ * @param text Text to test
750
+ * @param pattern Pattern with * wildcards
751
+ * @returns True if pattern matches
752
+ */
753
+ simpleGlobMatch(text, pattern) {
754
+ // Convert glob pattern to regex
755
+ const regexPattern = pattern
756
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except *
757
+ .replace(/\*/g, '.*'); // Convert * to .*
758
+ const regex = new RegExp(`^${regexPattern}$`);
759
+ return regex.test(text);
760
+ }
761
+ /**
762
+ * Get custom extensions from environment variables
763
+ * Supports CUSTOM_EXTENSIONS as comma-separated list
764
+ * @returns Array of custom extensions
765
+ */
766
+ getCustomExtensionsFromEnv() {
767
+ const envExtensions = env_manager_1.envManager.get('CUSTOM_EXTENSIONS');
768
+ if (!envExtensions) {
769
+ return [];
770
+ }
771
+ try {
772
+ const extensions = envExtensions
773
+ .split(',')
774
+ .map(ext => ext.trim())
775
+ .filter(ext => ext.length > 0)
776
+ .map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
777
+ return extensions;
778
+ }
779
+ catch (error) {
780
+ console.warn(`⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`);
781
+ return [];
782
+ }
783
+ }
784
+ /**
785
+ * Get custom ignore patterns from environment variables
786
+ * Supports CUSTOM_IGNORE_PATTERNS as comma-separated list
787
+ * @returns Array of custom ignore patterns
788
+ */
789
+ getCustomIgnorePatternsFromEnv() {
790
+ const envIgnorePatterns = env_manager_1.envManager.get('CUSTOM_IGNORE_PATTERNS');
791
+ if (!envIgnorePatterns) {
792
+ return [];
793
+ }
794
+ try {
795
+ const patterns = envIgnorePatterns
796
+ .split(',')
797
+ .map(pattern => pattern.trim())
798
+ .filter(pattern => pattern.length > 0);
799
+ return patterns;
800
+ }
801
+ catch (error) {
802
+ console.warn(`⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`);
803
+ return [];
804
+ }
805
+ }
806
+ /**
807
+ * Add custom extensions (from MCP or other sources) without replacing existing ones
808
+ * @param customExtensions Array of custom extensions to add
809
+ */
810
+ addCustomExtensions(customExtensions) {
811
+ if (customExtensions.length === 0)
812
+ return;
813
+ // Ensure extensions start with dot
814
+ const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}`);
815
+ // Merge current extensions with new custom extensions, avoiding duplicates
816
+ const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions];
817
+ const uniqueExtensions = [...new Set(mergedExtensions)];
818
+ this.supportedExtensions = uniqueExtensions;
819
+ console.log(`📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`);
820
+ }
821
+ /**
822
+ * Get current splitter information
823
+ */
824
+ getSplitterInfo() {
825
+ const splitterName = this.codeSplitter.constructor.name;
826
+ if (splitterName === 'AstCodeSplitter') {
827
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
828
+ return {
829
+ type: 'ast',
830
+ hasBuiltinFallback: true,
831
+ supportedLanguages: AstCodeSplitter.getSupportedLanguages()
832
+ };
833
+ }
834
+ else {
835
+ return {
836
+ type: 'langchain',
837
+ hasBuiltinFallback: false
838
+ };
839
+ }
840
+ }
841
+ /**
842
+ * Check if current splitter supports a specific language
843
+ * @param language Programming language
844
+ */
845
+ isLanguageSupported(language) {
846
+ const splitterName = this.codeSplitter.constructor.name;
847
+ if (splitterName === 'AstCodeSplitter') {
848
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
849
+ return AstCodeSplitter.isLanguageSupported(language);
850
+ }
851
+ // LangChain splitter supports most languages
852
+ return true;
853
+ }
854
+ /**
855
+ * Get which strategy would be used for a specific language
856
+ * @param language Programming language
857
+ */
858
+ getSplitterStrategyForLanguage(language) {
859
+ const splitterName = this.codeSplitter.constructor.name;
860
+ if (splitterName === 'AstCodeSplitter') {
861
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
862
+ const isSupported = AstCodeSplitter.isLanguageSupported(language);
863
+ return {
864
+ strategy: isSupported ? 'ast' : 'langchain',
865
+ reason: isSupported
866
+ ? 'Language supported by AST parser'
867
+ : 'Language not supported by AST, will fallback to LangChain'
868
+ };
869
+ }
870
+ else {
871
+ return {
872
+ strategy: 'langchain',
873
+ reason: 'Using LangChain splitter directly'
874
+ };
875
+ }
876
+ }
877
+ }
878
+ exports.CodeContext = CodeContext;
879
+ //# sourceMappingURL=context.js.map