@mcampa/claude-context-core 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +285 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/context.d.ts +276 -0
  5. package/dist/context.d.ts.map +1 -0
  6. package/dist/context.js +1080 -0
  7. package/dist/context.js.map +1 -0
  8. package/dist/embedding/base-embedding.d.ts +51 -0
  9. package/dist/embedding/base-embedding.d.ts.map +1 -0
  10. package/dist/embedding/base-embedding.js +36 -0
  11. package/dist/embedding/base-embedding.js.map +1 -0
  12. package/dist/embedding/gemini-embedding.d.ts +53 -0
  13. package/dist/embedding/gemini-embedding.d.ts.map +1 -0
  14. package/dist/embedding/gemini-embedding.js +152 -0
  15. package/dist/embedding/gemini-embedding.js.map +1 -0
  16. package/dist/embedding/index.d.ts +6 -0
  17. package/dist/embedding/index.d.ts.map +1 -0
  18. package/dist/embedding/index.js +24 -0
  19. package/dist/embedding/index.js.map +1 -0
  20. package/dist/embedding/ollama-embedding.d.ts +55 -0
  21. package/dist/embedding/ollama-embedding.d.ts.map +1 -0
  22. package/dist/embedding/ollama-embedding.js +192 -0
  23. package/dist/embedding/ollama-embedding.js.map +1 -0
  24. package/dist/embedding/openai-embedding.d.ts +36 -0
  25. package/dist/embedding/openai-embedding.d.ts.map +1 -0
  26. package/dist/embedding/openai-embedding.js +159 -0
  27. package/dist/embedding/openai-embedding.js.map +1 -0
  28. package/dist/embedding/voyageai-embedding.d.ts +44 -0
  29. package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
  30. package/dist/embedding/voyageai-embedding.js +227 -0
  31. package/dist/embedding/voyageai-embedding.js.map +1 -0
  32. package/dist/index.d.ts +8 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +24 -0
  35. package/dist/index.js.map +1 -0
  36. package/dist/splitter/ast-splitter.d.ts +22 -0
  37. package/dist/splitter/ast-splitter.d.ts.map +1 -0
  38. package/dist/splitter/ast-splitter.js +234 -0
  39. package/dist/splitter/ast-splitter.js.map +1 -0
  40. package/dist/splitter/index.d.ts +41 -0
  41. package/dist/splitter/index.d.ts.map +1 -0
  42. package/dist/splitter/index.js +27 -0
  43. package/dist/splitter/index.js.map +1 -0
  44. package/dist/splitter/langchain-splitter.d.ts +13 -0
  45. package/dist/splitter/langchain-splitter.d.ts.map +1 -0
  46. package/dist/splitter/langchain-splitter.js +118 -0
  47. package/dist/splitter/langchain-splitter.js.map +1 -0
  48. package/dist/sync/merkle.d.ts +26 -0
  49. package/dist/sync/merkle.d.ts.map +1 -0
  50. package/dist/sync/merkle.js +112 -0
  51. package/dist/sync/merkle.js.map +1 -0
  52. package/dist/sync/synchronizer.d.ts +30 -0
  53. package/dist/sync/synchronizer.d.ts.map +1 -0
  54. package/dist/sync/synchronizer.js +339 -0
  55. package/dist/sync/synchronizer.js.map +1 -0
  56. package/dist/types.d.ts +14 -0
  57. package/dist/types.d.ts.map +1 -0
  58. package/dist/types.js +3 -0
  59. package/dist/types.js.map +1 -0
  60. package/dist/utils/env-manager.d.ts +19 -0
  61. package/dist/utils/env-manager.d.ts.map +1 -0
  62. package/dist/utils/env-manager.js +125 -0
  63. package/dist/utils/env-manager.js.map +1 -0
  64. package/dist/utils/git.d.ts +11 -0
  65. package/dist/utils/git.d.ts.map +1 -0
  66. package/dist/utils/git.js +46 -0
  67. package/dist/utils/git.js.map +1 -0
  68. package/dist/utils/index.d.ts +2 -0
  69. package/dist/utils/index.d.ts.map +1 -0
  70. package/dist/utils/index.js +7 -0
  71. package/dist/utils/index.js.map +1 -0
  72. package/dist/vectordb/index.d.ts +5 -0
  73. package/dist/vectordb/index.d.ts.map +1 -0
  74. package/dist/vectordb/index.js +14 -0
  75. package/dist/vectordb/index.js.map +1 -0
  76. package/dist/vectordb/milvus-restful-vectordb.d.ts +75 -0
  77. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
  78. package/dist/vectordb/milvus-restful-vectordb.js +703 -0
  79. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
  80. package/dist/vectordb/milvus-vectordb.d.ts +60 -0
  81. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
  82. package/dist/vectordb/milvus-vectordb.js +638 -0
  83. package/dist/vectordb/milvus-vectordb.js.map +1 -0
  84. package/dist/vectordb/types.d.ts +120 -0
  85. package/dist/vectordb/types.d.ts.map +1 -0
  86. package/dist/vectordb/types.js +9 -0
  87. package/dist/vectordb/types.js.map +1 -0
  88. package/dist/vectordb/zilliz-utils.d.ts +135 -0
  89. package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
  90. package/dist/vectordb/zilliz-utils.js +192 -0
  91. package/dist/vectordb/zilliz-utils.js.map +1 -0
  92. package/package.json +58 -0
@@ -0,0 +1,1080 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.Context = void 0;
37
+ const splitter_1 = require("./splitter");
38
+ const embedding_1 = require("./embedding");
39
+ const env_manager_1 = require("./utils/env-manager");
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const crypto = __importStar(require("crypto"));
43
+ const synchronizer_1 = require("./sync/synchronizer");
44
+ const git_1 = require("./utils/git");
45
+ const DEFAULT_SUPPORTED_EXTENSIONS = [
46
+ // Programming languages
47
+ '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
48
+ '.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
49
+ // Text and markup files
50
+ '.md', '.markdown', '.ipynb',
51
+ // '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
52
+ // '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
53
+ ];
54
+ const DEFAULT_IGNORE_PATTERNS = [
55
+ // Common build output and dependency directories
56
+ 'node_modules/**',
57
+ 'dist/**',
58
+ 'build/**',
59
+ 'out/**',
60
+ 'target/**',
61
+ 'coverage/**',
62
+ '.nyc_output/**',
63
+ // IDE and editor files
64
+ '.vscode/**',
65
+ '.idea/**',
66
+ '*.swp',
67
+ '*.swo',
68
+ // Version control
69
+ '.git/**',
70
+ '.svn/**',
71
+ '.hg/**',
72
+ // Cache directories
73
+ '.cache/**',
74
+ '__pycache__/**',
75
+ '.pytest_cache/**',
76
+ // Logs and temporary files
77
+ 'logs/**',
78
+ 'tmp/**',
79
+ 'temp/**',
80
+ '*.log',
81
+ // Environment and config files
82
+ '.env',
83
+ '.env.*',
84
+ '*.local',
85
+ // Minified and bundled files
86
+ '*.min.js',
87
+ '*.min.css',
88
+ '*.min.map',
89
+ '*.bundle.js',
90
+ '*.bundle.css',
91
+ '*.chunk.js',
92
+ '*.vendor.js',
93
+ '*.polyfills.js',
94
+ '*.runtime.js',
95
+ '*.map', // source map files
96
+ 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
97
+ 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
98
+ 'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
99
+ ];
100
+ class Context {
101
+ constructor(config = {}) {
102
+ this.synchronizers = new Map();
103
+ // Initialize services
104
+ this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
105
+ apiKey: env_manager_1.envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
106
+ model: 'text-embedding-3-small',
107
+ ...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') })
108
+ });
109
+ if (!config.vectorDatabase) {
110
+ throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.');
111
+ }
112
+ this.vectorDatabase = config.vectorDatabase;
113
+ this.codeSplitter = config.codeSplitter || new splitter_1.AstCodeSplitter(2500, 300);
114
+ // Load custom extensions from environment variables
115
+ const envCustomExtensions = this.getCustomExtensionsFromEnv();
116
+ // Combine default extensions with config extensions and env extensions
117
+ const allSupportedExtensions = [
118
+ ...DEFAULT_SUPPORTED_EXTENSIONS,
119
+ ...(config.supportedExtensions || []),
120
+ ...(config.customExtensions || []),
121
+ ...envCustomExtensions
122
+ ];
123
+ // Remove duplicates
124
+ this.supportedExtensions = [...new Set(allSupportedExtensions)];
125
+ // Load custom ignore patterns from environment variables
126
+ const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
127
+ // Start with default ignore patterns
128
+ const allIgnorePatterns = [
129
+ ...DEFAULT_IGNORE_PATTERNS,
130
+ ...(config.ignorePatterns || []),
131
+ ...(config.customIgnorePatterns || []),
132
+ ...envCustomIgnorePatterns
133
+ ];
134
+ // Remove duplicates
135
+ this.ignorePatterns = [...new Set(allIgnorePatterns)];
136
+ console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
137
+ if (envCustomExtensions.length > 0) {
138
+ console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
139
+ }
140
+ if (envCustomIgnorePatterns.length > 0) {
141
+ console.log(`[Context] 🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`);
142
+ }
143
+ }
144
+ /**
145
+ * Get embedding instance
146
+ */
147
+ getEmbedding() {
148
+ return this.embedding;
149
+ }
150
+ /**
151
+ * Get vector database instance
152
+ */
153
+ getVectorDatabase() {
154
+ return this.vectorDatabase;
155
+ }
156
+ /**
157
+ * Get code splitter instance
158
+ */
159
+ getCodeSplitter() {
160
+ return this.codeSplitter;
161
+ }
162
+ /**
163
+ * Get supported extensions
164
+ */
165
+ getSupportedExtensions() {
166
+ return [...this.supportedExtensions];
167
+ }
168
+ /**
169
+ * Get ignore patterns
170
+ */
171
+ getIgnorePatterns() {
172
+ return [...this.ignorePatterns];
173
+ }
174
+ /**
175
+ * Get synchronizers map
176
+ */
177
+ getSynchronizers() {
178
+ return new Map(this.synchronizers);
179
+ }
180
+ /**
181
+ * Set synchronizer for a collection
182
+ */
183
+ setSynchronizer(collectionName, synchronizer) {
184
+ this.synchronizers.set(collectionName, synchronizer);
185
+ }
186
+ /**
187
+ * Public wrapper for loadIgnorePatterns private method
188
+ */
189
+ async getLoadedIgnorePatterns(codebasePath) {
190
+ return this.loadIgnorePatterns(codebasePath);
191
+ }
192
+ /**
193
+ * Public wrapper for prepareCollection private method
194
+ */
195
+ async getPreparedCollection(codebasePath) {
196
+ return this.prepareCollection(codebasePath);
197
+ }
198
+ /**
199
+ * Get isHybrid setting from environment variable with default true
200
+ */
201
+ getIsHybrid() {
202
+ const isHybridEnv = env_manager_1.envManager.get('HYBRID_MODE');
203
+ if (isHybridEnv === undefined || isHybridEnv === null) {
204
+ return true; // Default to true
205
+ }
206
+ return isHybridEnv.toLowerCase() === 'true';
207
+ }
208
+ /**
209
+ * Generate collection name based on codebase path and hybrid mode
210
+ */
211
+ getCollectionName(codebasePath) {
212
+ const normalizedPath = path.resolve(codebasePath);
213
+ const gitRemoteUrl = (0, git_1.extractGitRemoteUrl)(normalizedPath);
214
+ let hash = '';
215
+ if (gitRemoteUrl) {
216
+ hash = crypto.createHash('md5').update(gitRemoteUrl).digest('hex') + '_git';
217
+ }
218
+ else {
219
+ hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
220
+ }
221
+ const isHybrid = this.getIsHybrid();
222
+ const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
223
+ return `${prefix}_${hash.substring(0, 8)}`;
224
+ }
225
+ /**
226
+ * Index a codebase for semantic search
227
+ * @param codebasePath Codebase root path
228
+ * @param progressCallback Optional progress callback function
229
+ * @param forceReindex Whether to recreate the collection even if it exists
230
+ * @returns Indexing statistics
231
+ */
232
+ async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
233
+ const isHybrid = this.getIsHybrid();
234
+ const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
235
+ console.log(`[Context] 🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
236
+ // 1. Load ignore patterns from various ignore files
237
+ await this.loadIgnorePatterns(codebasePath);
238
+ // 2. Check and prepare vector collection
239
+ progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
240
+ console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`);
241
+ await this.prepareCollection(codebasePath, forceReindex);
242
+ // 3. Recursively traverse codebase to get all supported files
243
+ progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
244
+ const codeFiles = await this.getCodeFiles(codebasePath);
245
+ console.log(`[Context] 📁 Found ${codeFiles.length} code files`);
246
+ if (codeFiles.length === 0) {
247
+ progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 });
248
+ return { indexedFiles: 0, totalChunks: 0, status: 'completed' };
249
+ }
250
+ // 3. Process each file with streaming chunk processing
251
+ // Reserve 10% for preparation, 90% for actual indexing
252
+ const indexingStartPercentage = 10;
253
+ const indexingEndPercentage = 100;
254
+ const indexingRange = indexingEndPercentage - indexingStartPercentage;
255
+ const result = await this.processFileList(codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => {
256
+ // Calculate progress percentage
257
+ const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange;
258
+ console.log(`[Context] 📊 Processed ${fileIndex}/${totalFiles} files`);
259
+ progressCallback?.({
260
+ phase: `Processing files (${fileIndex}/${totalFiles})...`,
261
+ current: fileIndex,
262
+ total: totalFiles,
263
+ percentage: Math.round(progressPercentage)
264
+ });
265
+ });
266
+ console.log(`[Context] ✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
267
+ progressCallback?.({
268
+ phase: 'Indexing complete!',
269
+ current: result.processedFiles,
270
+ total: codeFiles.length,
271
+ percentage: 100
272
+ });
273
+ return {
274
+ indexedFiles: result.processedFiles,
275
+ totalChunks: result.totalChunks,
276
+ status: result.status
277
+ };
278
+ }
279
+ async reindexByChange(codebasePath, progressCallback) {
280
+ const collectionName = this.getCollectionName(codebasePath);
281
+ const synchronizer = this.synchronizers.get(collectionName);
282
+ if (!synchronizer) {
283
+ // Load project-specific ignore patterns before creating FileSynchronizer
284
+ await this.loadIgnorePatterns(codebasePath);
285
+ // To be safe, let's initialize if it's not there.
286
+ const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
287
+ await newSynchronizer.initialize();
288
+ this.synchronizers.set(collectionName, newSynchronizer);
289
+ }
290
+ const currentSynchronizer = this.synchronizers.get(collectionName);
291
+ progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 });
292
+ const { added, removed, modified } = await currentSynchronizer.checkForChanges();
293
+ const totalChanges = added.length + removed.length + modified.length;
294
+ if (totalChanges === 0) {
295
+ progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 });
296
+ console.log('[Context] ✅ No file changes detected.');
297
+ return { added: 0, removed: 0, modified: 0 };
298
+ }
299
+ console.log(`[Context] 🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
300
+ let processedChanges = 0;
301
+ const updateProgress = (phase) => {
302
+ processedChanges++;
303
+ const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100);
304
+ progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage });
305
+ };
306
+ // Handle removed files
307
+ for (const file of removed) {
308
+ await this.deleteFileChunks(collectionName, file);
309
+ updateProgress(`Removed ${file}`);
310
+ }
311
+ // Handle modified files
312
+ for (const file of modified) {
313
+ await this.deleteFileChunks(collectionName, file);
314
+ updateProgress(`Deleted old chunks for ${file}`);
315
+ }
316
+ // Handle added and modified files
317
+ const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f));
318
+ if (filesToIndex.length > 0) {
319
+ await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
320
+ updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
321
+ });
322
+ }
323
+ console.log(`[Context] ✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
324
+ progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 });
325
+ return { added: added.length, removed: removed.length, modified: modified.length };
326
+ }
327
+ async deleteFileChunks(collectionName, relativePath) {
328
+ // Escape backslashes for Milvus query expression (Windows path compatibility)
329
+ const escapedPath = relativePath.replace(/\\/g, '\\\\');
330
+ const results = await this.vectorDatabase.query(collectionName, `relativePath == "${escapedPath}"`, ['id']);
331
+ if (results.length > 0) {
332
+ const ids = results.map(r => r.id).filter(id => id);
333
+ if (ids.length > 0) {
334
+ await this.vectorDatabase.delete(collectionName, ids);
335
+ console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`);
336
+ }
337
+ }
338
+ }
339
+ /**
340
+ * Semantic search with unified implementation
341
+ * @param codebasePath Codebase path to search in
342
+ * @param query Search query
343
+ * @param topK Number of results to return
344
+ * @param threshold Similarity threshold
345
+ */
346
+ async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
347
+ const isHybrid = this.getIsHybrid();
348
+ const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
349
+ console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
350
+ const collectionName = this.getCollectionName(codebasePath);
351
+ console.log(`[Context] 🔍 Using collection: ${collectionName}`);
352
+ // Check if collection exists and has data
353
+ const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
354
+ if (!hasCollection) {
355
+ console.log(`[Context] ⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
356
+ return [];
357
+ }
358
+ if (isHybrid === true) {
359
+ try {
360
+ // Check collection stats to see if it has data
361
+ const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
362
+ console.log(`[Context] 🔍 Collection '${collectionName}' exists and appears to have data`);
363
+ }
364
+ catch (error) {
365
+ console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
366
+ }
367
+ // 1. Generate query vector
368
+ console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
369
+ const queryEmbedding = await this.embedding.embed(query);
370
+ console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
371
+ console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
372
+ // 2. Prepare hybrid search requests
373
+ const searchRequests = [
374
+ {
375
+ data: queryEmbedding.vector,
376
+ anns_field: "vector",
377
+ param: { "nprobe": 10 },
378
+ limit: topK
379
+ },
380
+ {
381
+ data: query,
382
+ anns_field: "sparse_vector",
383
+ param: { "drop_ratio_search": 0.2 },
384
+ limit: topK
385
+ }
386
+ ];
387
+ console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
388
+ console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
389
+ // 3. Execute hybrid search
390
+ console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
391
+ const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
392
+ rerank: {
393
+ strategy: 'rrf',
394
+ params: { k: 100 }
395
+ },
396
+ limit: topK,
397
+ filterExpr
398
+ });
399
+ console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
400
+ // 4. Convert to semantic search result format
401
+ const results = searchResults.map(result => ({
402
+ content: result.document.content,
403
+ relativePath: result.document.relativePath,
404
+ startLine: result.document.startLine,
405
+ endLine: result.document.endLine,
406
+ language: result.document.metadata.language || 'unknown',
407
+ score: result.score
408
+ }));
409
+ console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
410
+ if (results.length > 0) {
411
+ console.log(`[Context] 🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
412
+ }
413
+ return results;
414
+ }
415
+ else {
416
+ // Regular semantic search
417
+ // 1. Generate query vector
418
+ const queryEmbedding = await this.embedding.embed(query);
419
+ // 2. Search in vector database
420
+ const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr });
421
+ // 3. Convert to semantic search result format
422
+ const results = searchResults.map(result => ({
423
+ content: result.document.content,
424
+ relativePath: result.document.relativePath,
425
+ startLine: result.document.startLine,
426
+ endLine: result.document.endLine,
427
+ language: result.document.metadata.language || 'unknown',
428
+ score: result.score
429
+ }));
430
+ console.log(`[Context] ✅ Found ${results.length} relevant results`);
431
+ return results;
432
+ }
433
+ }
434
+ /**
435
+ * Check if index exists for codebase
436
+ * @param codebasePath Codebase path to check
437
+ * @returns Whether index exists
438
+ */
439
+ async hasIndex(codebasePath) {
440
+ const collectionName = this.getCollectionName(codebasePath);
441
+ return await this.vectorDatabase.hasCollection(collectionName);
442
+ }
443
+ /**
444
+ * Clear index
445
+ * @param codebasePath Codebase path to clear index for
446
+ * @param progressCallback Optional progress callback function
447
+ */
448
+ async clearIndex(codebasePath, progressCallback) {
449
+ console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`);
450
+ progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });
451
+ const collectionName = this.getCollectionName(codebasePath);
452
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
453
+ progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
454
+ if (collectionExists) {
455
+ await this.vectorDatabase.dropCollection(collectionName);
456
+ }
457
+ // Delete snapshot file
458
+ await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
459
+ progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 });
460
+ console.log('[Context] ✅ Index data cleaned');
461
+ }
462
+ /**
463
+ * Update ignore patterns (merges with default patterns and existing patterns)
464
+ * @param ignorePatterns Array of ignore patterns to add to defaults
465
+ */
466
+ updateIgnorePatterns(ignorePatterns) {
467
+ // Merge with default patterns and any existing custom patterns, avoiding duplicates
468
+ const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
469
+ const uniquePatterns = [];
470
+ const patternSet = new Set(mergedPatterns);
471
+ patternSet.forEach(pattern => uniquePatterns.push(pattern));
472
+ this.ignorePatterns = uniquePatterns;
473
+ console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
474
+ }
475
+ /**
476
+ * Add custom ignore patterns (from MCP or other sources) without replacing existing ones
477
+ * @param customPatterns Array of custom ignore patterns to add
478
+ */
479
+ addCustomIgnorePatterns(customPatterns) {
480
+ if (customPatterns.length === 0)
481
+ return;
482
+ // Merge current patterns with new custom patterns, avoiding duplicates
483
+ const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
484
+ const uniquePatterns = [];
485
+ const patternSet = new Set(mergedPatterns);
486
+ patternSet.forEach(pattern => uniquePatterns.push(pattern));
487
+ this.ignorePatterns = uniquePatterns;
488
+ console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
489
+ }
490
+ /**
491
+ * Reset ignore patterns to defaults only
492
+ */
493
+ resetIgnorePatternsToDefaults() {
494
+ this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS];
495
+ console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
496
+ }
497
+ /**
498
+ * Update embedding instance
499
+ * @param embedding New embedding instance
500
+ */
501
+ updateEmbedding(embedding) {
502
+ this.embedding = embedding;
503
+ console.log(`[Context] 🔄 Updated embedding provider: ${embedding.getProvider()}`);
504
+ }
505
+ /**
506
+ * Update vector database instance
507
+ * @param vectorDatabase New vector database instance
508
+ */
509
+ updateVectorDatabase(vectorDatabase) {
510
+ this.vectorDatabase = vectorDatabase;
511
+ console.log(`[Context] 🔄 Updated vector database`);
512
+ }
513
+ /**
514
+ * Update splitter instance
515
+ * @param splitter New splitter instance
516
+ */
517
+ updateSplitter(splitter) {
518
+ this.codeSplitter = splitter;
519
+ console.log(`[Context] 🔄 Updated splitter instance`);
520
+ }
521
+ /**
522
+ * Prepare vector collection
523
+ */
524
+ async prepareCollection(codebasePath, forceReindex = false) {
525
+ const isHybrid = this.getIsHybrid();
526
+ const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
527
+ console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
528
+ const collectionName = this.getCollectionName(codebasePath);
529
+ // Check if collection already exists
530
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
531
+ if (collectionExists && !forceReindex) {
532
+ console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
533
+ return;
534
+ }
535
+ if (collectionExists && forceReindex) {
536
+ console.log(`[Context] 🗑️ Dropping existing collection ${collectionName} for force reindex...`);
537
+ await this.vectorDatabase.dropCollection(collectionName);
538
+ console.log(`[Context] ✅ Collection ${collectionName} dropped successfully`);
539
+ }
540
+ console.log(`[Context] 🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
541
+ const dimension = await this.embedding.detectDimension();
542
+ console.log(`[Context] 📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
543
+ const dirName = path.basename(codebasePath);
544
+ if (isHybrid === true) {
545
+ await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
546
+ }
547
+ else {
548
+ await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
549
+ }
550
+ console.log(`[Context] ✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
551
+ }
552
+ /**
553
+ * Recursively get all code files in the codebase
554
+ */
555
+ async getCodeFiles(codebasePath) {
556
+ const files = [];
557
+ const traverseDirectory = async (currentPath) => {
558
+ const entries = await fs.promises.readdir(currentPath, { withFileTypes: true });
559
+ for (const entry of entries) {
560
+ const fullPath = path.join(currentPath, entry.name);
561
+ // Check if path matches ignore patterns
562
+ if (this.matchesIgnorePattern(fullPath, codebasePath)) {
563
+ continue;
564
+ }
565
+ if (entry.isDirectory()) {
566
+ await traverseDirectory(fullPath);
567
+ }
568
+ else if (entry.isFile()) {
569
+ const ext = path.extname(entry.name);
570
+ if (this.supportedExtensions.includes(ext)) {
571
+ files.push(fullPath);
572
+ }
573
+ }
574
+ }
575
+ };
576
+ await traverseDirectory(codebasePath);
577
+ return files;
578
+ }
579
+ /**
580
+ * Process a list of files with streaming chunk processing
581
+ * @param filePaths Array of file paths to process
582
+ * @param codebasePath Base path for the codebase
583
+ * @param onFileProcessed Callback called when each file is processed
584
+ * @returns Object with processed file count and total chunk count
585
+ */
586
+ async processFileList(filePaths, codebasePath, onFileProcessed) {
587
+ const isHybrid = this.getIsHybrid();
588
+ const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
589
+ const CHUNK_LIMIT = 450000;
590
+ console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
591
+ let chunkBuffer = [];
592
+ let processedFiles = 0;
593
+ let totalChunks = 0;
594
+ let limitReached = false;
595
+ for (let i = 0; i < filePaths.length; i++) {
596
+ const filePath = filePaths[i];
597
+ try {
598
+ const content = await fs.promises.readFile(filePath, 'utf-8');
599
+ const language = this.getLanguageFromExtension(path.extname(filePath));
600
+ const chunks = await this.codeSplitter.split(content, language, filePath);
601
+ // Log files with many chunks or large content
602
+ if (chunks.length > 50) {
603
+ console.warn(`[Context] ⚠️ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`);
604
+ }
605
+ else if (content.length > 100000) {
606
+ console.log(`📄 Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`);
607
+ }
608
+ // Add chunks to buffer
609
+ for (const chunk of chunks) {
610
+ chunkBuffer.push({ chunk, codebasePath });
611
+ totalChunks++;
612
+ // Process batch when buffer reaches EMBEDDING_BATCH_SIZE
613
+ if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) {
614
+ try {
615
+ await this.processChunkBuffer(chunkBuffer);
616
+ }
617
+ catch (error) {
618
+ const searchType = isHybrid === true ? 'hybrid' : 'regular';
619
+ console.error(`[Context] ❌ Failed to process chunk batch for ${searchType}:`, error);
620
+ if (error instanceof Error) {
621
+ console.error('[Context] Stack trace:', error.stack);
622
+ }
623
+ }
624
+ finally {
625
+ chunkBuffer = []; // Always clear buffer, even on failure
626
+ }
627
+ }
628
+ // Check if chunk limit is reached
629
+ if (totalChunks >= CHUNK_LIMIT) {
630
+ console.warn(`[Context] ⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`);
631
+ limitReached = true;
632
+ break; // Exit the inner loop (over chunks)
633
+ }
634
+ }
635
+ processedFiles++;
636
+ onFileProcessed?.(filePath, i + 1, filePaths.length);
637
+ if (limitReached) {
638
+ break; // Exit the outer loop (over files)
639
+ }
640
+ }
641
+ catch (error) {
642
+ console.warn(`[Context] ⚠️ Skipping file ${filePath}: ${error}`);
643
+ }
644
+ }
645
+ // Process any remaining chunks in the buffer
646
+ if (chunkBuffer.length > 0) {
647
+ const searchType = isHybrid === true ? 'hybrid' : 'regular';
648
+ console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
649
+ try {
650
+ await this.processChunkBuffer(chunkBuffer);
651
+ }
652
+ catch (error) {
653
+ console.error(`[Context] ❌ Failed to process final chunk batch for ${searchType}:`, error);
654
+ if (error instanceof Error) {
655
+ console.error('[Context] Stack trace:', error.stack);
656
+ }
657
+ }
658
+ }
659
+ return {
660
+ processedFiles,
661
+ totalChunks,
662
+ status: limitReached ? 'limit_reached' : 'completed'
663
+ };
664
+ }
665
+ /**
666
+ * Process accumulated chunk buffer
667
+ */
668
+ async processChunkBuffer(chunkBuffer) {
669
+ if (chunkBuffer.length === 0)
670
+ return;
671
+ // Extract chunks and ensure they all have the same codebasePath
672
+ const chunks = chunkBuffer.map(item => item.chunk);
673
+ const codebasePath = chunkBuffer[0].codebasePath;
674
+ // Estimate tokens (rough estimation: 1 token ≈ 4 characters)
675
+ const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
676
+ const isHybrid = this.getIsHybrid();
677
+ const searchType = isHybrid === true ? 'hybrid' : 'regular';
678
+ console.log(`[Context] 🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
679
+ await this.processChunkBatch(chunks, codebasePath);
680
+ }
681
+ /**
682
+ * Process a batch of chunks
683
+ */
684
+ async processChunkBatch(chunks, codebasePath) {
685
+ const isHybrid = this.getIsHybrid();
686
+ // Generate embedding vectors
687
+ const chunkContents = chunks.map(chunk => chunk.content);
688
+ const embeddings = await this.embedding.embedBatch(chunkContents);
689
+ if (isHybrid === true) {
690
+ // Create hybrid vector documents
691
+ const documents = chunks.map((chunk, index) => {
692
+ if (!chunk.metadata.filePath) {
693
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
694
+ }
695
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
696
+ const fileExtension = path.extname(chunk.metadata.filePath);
697
+ const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
698
+ return {
699
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
700
+ content: chunk.content, // Full text content for BM25 and storage
701
+ vector: embeddings[index].vector, // Dense vector
702
+ relativePath,
703
+ startLine: chunk.metadata.startLine || 0,
704
+ endLine: chunk.metadata.endLine || 0,
705
+ fileExtension,
706
+ metadata: {
707
+ ...restMetadata,
708
+ codebasePath,
709
+ language: chunk.metadata.language || 'unknown',
710
+ chunkIndex: index
711
+ }
712
+ };
713
+ });
714
+ // Store to vector database
715
+ await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
716
+ }
717
+ else {
718
+ // Create regular vector documents
719
+ const documents = chunks.map((chunk, index) => {
720
+ if (!chunk.metadata.filePath) {
721
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
722
+ }
723
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
724
+ const fileExtension = path.extname(chunk.metadata.filePath);
725
+ const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
726
+ return {
727
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
728
+ vector: embeddings[index].vector,
729
+ content: chunk.content,
730
+ relativePath,
731
+ startLine: chunk.metadata.startLine || 0,
732
+ endLine: chunk.metadata.endLine || 0,
733
+ fileExtension,
734
+ metadata: {
735
+ ...restMetadata,
736
+ codebasePath,
737
+ language: chunk.metadata.language || 'unknown',
738
+ chunkIndex: index
739
+ }
740
+ };
741
+ });
742
+ // Store to vector database
743
+ await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
744
+ }
745
+ }
746
+ /**
747
+ * Get programming language based on file extension
748
+ */
749
+ getLanguageFromExtension(ext) {
750
+ const languageMap = {
751
+ '.ts': 'typescript',
752
+ '.tsx': 'typescript',
753
+ '.js': 'javascript',
754
+ '.jsx': 'javascript',
755
+ '.py': 'python',
756
+ '.java': 'java',
757
+ '.cpp': 'cpp',
758
+ '.c': 'c',
759
+ '.h': 'c',
760
+ '.hpp': 'cpp',
761
+ '.cs': 'csharp',
762
+ '.go': 'go',
763
+ '.rs': 'rust',
764
+ '.php': 'php',
765
+ '.rb': 'ruby',
766
+ '.swift': 'swift',
767
+ '.kt': 'kotlin',
768
+ '.scala': 'scala',
769
+ '.m': 'objective-c',
770
+ '.mm': 'objective-c',
771
+ '.ipynb': 'jupyter'
772
+ };
773
+ return languageMap[ext] || 'text';
774
+ }
775
+ /**
776
+ * Generate unique ID based on chunk content and location
777
+ * @param relativePath Relative path to the file
778
+ * @param startLine Start line number
779
+ * @param endLine End line number
780
+ * @param content Chunk content
781
+ * @returns Hash-based unique ID
782
+ */
783
+ generateId(relativePath, startLine, endLine, content) {
784
+ const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`;
785
+ const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex');
786
+ return `chunk_${hash.substring(0, 16)}`;
787
+ }
788
+ /**
789
+ * Read ignore patterns from file (e.g., .gitignore)
790
+ * @param filePath Path to the ignore file
791
+ * @returns Array of ignore patterns
792
+ */
793
+ static async getIgnorePatternsFromFile(filePath) {
794
+ try {
795
+ const content = await fs.promises.readFile(filePath, 'utf-8');
796
+ return content
797
+ .split('\n')
798
+ .map(line => line.trim())
799
+ .filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments
800
+ }
801
+ catch (error) {
802
+ console.warn(`[Context] ⚠️ Could not read ignore file ${filePath}: ${error}`);
803
+ return [];
804
+ }
805
+ }
806
+ /**
807
+ * Load ignore patterns from various ignore files in the codebase
808
+ * This method preserves any existing custom patterns that were added before
809
+ * @param codebasePath Path to the codebase
810
+ */
811
+ async loadIgnorePatterns(codebasePath) {
812
+ try {
813
+ let fileBasedPatterns = [];
814
+ // Load all .xxxignore files in codebase directory
815
+ const ignoreFiles = await this.findIgnoreFiles(codebasePath);
816
+ for (const ignoreFile of ignoreFiles) {
817
+ const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
818
+ fileBasedPatterns.push(...patterns);
819
+ }
820
+ // Load global ~/.context/.contextignore
821
+ const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
822
+ fileBasedPatterns.push(...globalIgnorePatterns);
823
+ // Merge file-based patterns with existing patterns (which may include custom MCP patterns)
824
+ if (fileBasedPatterns.length > 0) {
825
+ this.addCustomIgnorePatterns(fileBasedPatterns);
826
+ console.log(`[Context] 🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
827
+ }
828
+ else {
829
+ console.log('📄 No ignore files found, keeping existing patterns');
830
+ }
831
+ }
832
+ catch (error) {
833
+ console.warn(`[Context] ⚠️ Failed to load ignore patterns: ${error}`);
834
+ // Continue with existing patterns on error - don't reset them
835
+ }
836
+ }
837
+ /**
838
+ * Find all .xxxignore files in the codebase directory
839
+ * @param codebasePath Path to the codebase
840
+ * @returns Array of ignore file paths
841
+ */
842
+ async findIgnoreFiles(codebasePath) {
843
+ try {
844
+ const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true });
845
+ const ignoreFiles = [];
846
+ for (const entry of entries) {
847
+ if (entry.isFile() &&
848
+ entry.name.startsWith('.') &&
849
+ entry.name.endsWith('ignore')) {
850
+ ignoreFiles.push(path.join(codebasePath, entry.name));
851
+ }
852
+ }
853
+ if (ignoreFiles.length > 0) {
854
+ console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
855
+ }
856
+ return ignoreFiles;
857
+ }
858
+ catch (error) {
859
+ console.warn(`[Context] ⚠️ Failed to scan for ignore files: ${error}`);
860
+ return [];
861
+ }
862
+ }
863
+ /**
864
+ * Load global ignore file from ~/.context/.contextignore
865
+ * @returns Array of ignore patterns
866
+ */
867
+ async loadGlobalIgnoreFile() {
868
+ try {
869
+ const homeDir = require('os').homedir();
870
+ const globalIgnorePath = path.join(homeDir, '.context', '.contextignore');
871
+ return await this.loadIgnoreFile(globalIgnorePath, 'global .contextignore');
872
+ }
873
+ catch (error) {
874
+ // Global ignore file is optional, don't log warnings
875
+ return [];
876
+ }
877
+ }
878
+ /**
879
+ * Load ignore patterns from a specific ignore file
880
+ * @param filePath Path to the ignore file
881
+ * @param fileName Display name for logging
882
+ * @returns Array of ignore patterns
883
+ */
884
+ async loadIgnoreFile(filePath, fileName) {
885
+ try {
886
+ await fs.promises.access(filePath);
887
+ console.log(`📄 Found ${fileName} file at: ${filePath}`);
888
+ const ignorePatterns = await Context.getIgnorePatternsFromFile(filePath);
889
+ if (ignorePatterns.length > 0) {
890
+ console.log(`[Context] 🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`);
891
+ return ignorePatterns;
892
+ }
893
+ else {
894
+ console.log(`📄 ${fileName} file found but no valid patterns detected`);
895
+ return [];
896
+ }
897
+ }
898
+ catch (error) {
899
+ if (fileName.includes('global')) {
900
+ console.log(`📄 No ${fileName} file found`);
901
+ }
902
+ return [];
903
+ }
904
+ }
905
+ /**
906
+ * Check if a path matches any ignore pattern
907
+ * @param filePath Path to check
908
+ * @param basePath Base path for relative pattern matching
909
+ * @returns True if path should be ignored
910
+ */
911
+ matchesIgnorePattern(filePath, basePath) {
912
+ if (this.ignorePatterns.length === 0) {
913
+ return false;
914
+ }
915
+ const relativePath = path.relative(basePath, filePath);
916
+ const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators
917
+ for (const pattern of this.ignorePatterns) {
918
+ if (this.isPatternMatch(normalizedPath, pattern)) {
919
+ return true;
920
+ }
921
+ }
922
+ return false;
923
+ }
924
+ /**
925
+ * Simple glob pattern matching
926
+ * @param filePath File path to test
927
+ * @param pattern Glob pattern
928
+ * @returns True if pattern matches
929
+ */
930
+ isPatternMatch(filePath, pattern) {
931
+ // Handle directory patterns (ending with /)
932
+ if (pattern.endsWith('/')) {
933
+ const dirPattern = pattern.slice(0, -1);
934
+ const pathParts = filePath.split('/');
935
+ return pathParts.some(part => this.simpleGlobMatch(part, dirPattern));
936
+ }
937
+ // Handle file patterns
938
+ if (pattern.includes('/')) {
939
+ // Pattern with path separator - match exact path
940
+ return this.simpleGlobMatch(filePath, pattern);
941
+ }
942
+ else {
943
+ // Pattern without path separator - match filename in any directory
944
+ const fileName = path.basename(filePath);
945
+ return this.simpleGlobMatch(fileName, pattern);
946
+ }
947
+ }
948
+ /**
949
+ * Simple glob matching supporting * wildcard
950
+ * @param text Text to test
951
+ * @param pattern Pattern with * wildcards
952
+ * @returns True if pattern matches
953
+ */
954
+ simpleGlobMatch(text, pattern) {
955
+ // Convert glob pattern to regex
956
+ const regexPattern = pattern
957
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except *
958
+ .replace(/\*/g, '.*'); // Convert * to .*
959
+ const regex = new RegExp(`^${regexPattern}$`);
960
+ return regex.test(text);
961
+ }
962
+ /**
963
+ * Get custom extensions from environment variables
964
+ * Supports CUSTOM_EXTENSIONS as comma-separated list
965
+ * @returns Array of custom extensions
966
+ */
967
+ getCustomExtensionsFromEnv() {
968
+ const envExtensions = env_manager_1.envManager.get('CUSTOM_EXTENSIONS');
969
+ if (!envExtensions) {
970
+ return [];
971
+ }
972
+ try {
973
+ const extensions = envExtensions
974
+ .split(',')
975
+ .map(ext => ext.trim())
976
+ .filter(ext => ext.length > 0)
977
+ .map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
978
+ return extensions;
979
+ }
980
+ catch (error) {
981
+ console.warn(`[Context] ⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`);
982
+ return [];
983
+ }
984
+ }
985
+ /**
986
+ * Get custom ignore patterns from environment variables
987
+ * Supports CUSTOM_IGNORE_PATTERNS as comma-separated list
988
+ * @returns Array of custom ignore patterns
989
+ */
990
+ getCustomIgnorePatternsFromEnv() {
991
+ const envIgnorePatterns = env_manager_1.envManager.get('CUSTOM_IGNORE_PATTERNS');
992
+ if (!envIgnorePatterns) {
993
+ return [];
994
+ }
995
+ try {
996
+ const patterns = envIgnorePatterns
997
+ .split(',')
998
+ .map(pattern => pattern.trim())
999
+ .filter(pattern => pattern.length > 0);
1000
+ return patterns;
1001
+ }
1002
+ catch (error) {
1003
+ console.warn(`[Context] ⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`);
1004
+ return [];
1005
+ }
1006
+ }
1007
+ /**
1008
+ * Add custom extensions (from MCP or other sources) without replacing existing ones
1009
+ * @param customExtensions Array of custom extensions to add
1010
+ */
1011
+ addCustomExtensions(customExtensions) {
1012
+ if (customExtensions.length === 0)
1013
+ return;
1014
+ // Ensure extensions start with dot
1015
+ const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}`);
1016
+ // Merge current extensions with new custom extensions, avoiding duplicates
1017
+ const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions];
1018
+ const uniqueExtensions = [...new Set(mergedExtensions)];
1019
+ this.supportedExtensions = uniqueExtensions;
1020
+ console.log(`[Context] 📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`);
1021
+ }
1022
+ /**
1023
+ * Get current splitter information
1024
+ */
1025
+ getSplitterInfo() {
1026
+ const splitterName = this.codeSplitter.constructor.name;
1027
+ if (splitterName === 'AstCodeSplitter') {
1028
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
1029
+ return {
1030
+ type: 'ast',
1031
+ hasBuiltinFallback: true,
1032
+ supportedLanguages: AstCodeSplitter.getSupportedLanguages()
1033
+ };
1034
+ }
1035
+ else {
1036
+ return {
1037
+ type: 'langchain',
1038
+ hasBuiltinFallback: false
1039
+ };
1040
+ }
1041
+ }
1042
+ /**
1043
+ * Check if current splitter supports a specific language
1044
+ * @param language Programming language
1045
+ */
1046
+ isLanguageSupported(language) {
1047
+ const splitterName = this.codeSplitter.constructor.name;
1048
+ if (splitterName === 'AstCodeSplitter') {
1049
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
1050
+ return AstCodeSplitter.isLanguageSupported(language);
1051
+ }
1052
+ // LangChain splitter supports most languages
1053
+ return true;
1054
+ }
1055
+ /**
1056
+ * Get which strategy would be used for a specific language
1057
+ * @param language Programming language
1058
+ */
1059
+ getSplitterStrategyForLanguage(language) {
1060
+ const splitterName = this.codeSplitter.constructor.name;
1061
+ if (splitterName === 'AstCodeSplitter') {
1062
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
1063
+ const isSupported = AstCodeSplitter.isLanguageSupported(language);
1064
+ return {
1065
+ strategy: isSupported ? 'ast' : 'langchain',
1066
+ reason: isSupported
1067
+ ? 'Language supported by AST parser'
1068
+ : 'Language not supported by AST, will fallback to LangChain'
1069
+ };
1070
+ }
1071
+ else {
1072
+ return {
1073
+ strategy: 'langchain',
1074
+ reason: 'Using LangChain splitter directly'
1075
+ };
1076
+ }
1077
+ }
1078
+ }
1079
+ exports.Context = Context;
1080
+ //# sourceMappingURL=context.js.map