@pleaseai/context-please-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +287 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/context.d.ts +276 -0
  5. package/dist/context.d.ts.map +1 -0
  6. package/dist/context.js +1072 -0
  7. package/dist/context.js.map +1 -0
  8. package/dist/embedding/base-embedding.d.ts +51 -0
  9. package/dist/embedding/base-embedding.d.ts.map +1 -0
  10. package/dist/embedding/base-embedding.js +36 -0
  11. package/dist/embedding/base-embedding.js.map +1 -0
  12. package/dist/embedding/gemini-embedding.d.ts +53 -0
  13. package/dist/embedding/gemini-embedding.d.ts.map +1 -0
  14. package/dist/embedding/gemini-embedding.js +152 -0
  15. package/dist/embedding/gemini-embedding.js.map +1 -0
  16. package/dist/embedding/index.d.ts +6 -0
  17. package/dist/embedding/index.d.ts.map +1 -0
  18. package/dist/embedding/index.js +24 -0
  19. package/dist/embedding/index.js.map +1 -0
  20. package/dist/embedding/ollama-embedding.d.ts +55 -0
  21. package/dist/embedding/ollama-embedding.d.ts.map +1 -0
  22. package/dist/embedding/ollama-embedding.js +192 -0
  23. package/dist/embedding/ollama-embedding.js.map +1 -0
  24. package/dist/embedding/openai-embedding.d.ts +36 -0
  25. package/dist/embedding/openai-embedding.d.ts.map +1 -0
  26. package/dist/embedding/openai-embedding.js +159 -0
  27. package/dist/embedding/openai-embedding.js.map +1 -0
  28. package/dist/embedding/voyageai-embedding.d.ts +44 -0
  29. package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
  30. package/dist/embedding/voyageai-embedding.js +227 -0
  31. package/dist/embedding/voyageai-embedding.js.map +1 -0
  32. package/dist/index.d.ts +8 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +24 -0
  35. package/dist/index.js.map +1 -0
  36. package/dist/splitter/ast-splitter.d.ts +22 -0
  37. package/dist/splitter/ast-splitter.d.ts.map +1 -0
  38. package/dist/splitter/ast-splitter.js +234 -0
  39. package/dist/splitter/ast-splitter.js.map +1 -0
  40. package/dist/splitter/index.d.ts +41 -0
  41. package/dist/splitter/index.d.ts.map +1 -0
  42. package/dist/splitter/index.js +27 -0
  43. package/dist/splitter/index.js.map +1 -0
  44. package/dist/splitter/langchain-splitter.d.ts +13 -0
  45. package/dist/splitter/langchain-splitter.d.ts.map +1 -0
  46. package/dist/splitter/langchain-splitter.js +118 -0
  47. package/dist/splitter/langchain-splitter.js.map +1 -0
  48. package/dist/sync/merkle.d.ts +26 -0
  49. package/dist/sync/merkle.d.ts.map +1 -0
  50. package/dist/sync/merkle.js +112 -0
  51. package/dist/sync/merkle.js.map +1 -0
  52. package/dist/sync/synchronizer.d.ts +30 -0
  53. package/dist/sync/synchronizer.d.ts.map +1 -0
  54. package/dist/sync/synchronizer.js +339 -0
  55. package/dist/sync/synchronizer.js.map +1 -0
  56. package/dist/types.d.ts +14 -0
  57. package/dist/types.d.ts.map +1 -0
  58. package/dist/types.js +3 -0
  59. package/dist/types.js.map +1 -0
  60. package/dist/utils/env-manager.d.ts +19 -0
  61. package/dist/utils/env-manager.d.ts.map +1 -0
  62. package/dist/utils/env-manager.js +125 -0
  63. package/dist/utils/env-manager.js.map +1 -0
  64. package/dist/utils/index.d.ts +2 -0
  65. package/dist/utils/index.d.ts.map +1 -0
  66. package/dist/utils/index.js +7 -0
  67. package/dist/utils/index.js.map +1 -0
  68. package/dist/vectordb/base/base-vector-database.d.ts +58 -0
  69. package/dist/vectordb/base/base-vector-database.d.ts.map +1 -0
  70. package/dist/vectordb/base/base-vector-database.js +32 -0
  71. package/dist/vectordb/base/base-vector-database.js.map +1 -0
  72. package/dist/vectordb/factory.d.ts +80 -0
  73. package/dist/vectordb/factory.d.ts.map +1 -0
  74. package/dist/vectordb/factory.js +89 -0
  75. package/dist/vectordb/factory.js.map +1 -0
  76. package/dist/vectordb/index.d.ts +12 -0
  77. package/dist/vectordb/index.d.ts.map +1 -0
  78. package/dist/vectordb/index.js +27 -0
  79. package/dist/vectordb/index.js.map +1 -0
  80. package/dist/vectordb/milvus-restful-vectordb.d.ts +75 -0
  81. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
  82. package/dist/vectordb/milvus-restful-vectordb.js +707 -0
  83. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
  84. package/dist/vectordb/milvus-vectordb.d.ts +59 -0
  85. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
  86. package/dist/vectordb/milvus-vectordb.js +641 -0
  87. package/dist/vectordb/milvus-vectordb.js.map +1 -0
  88. package/dist/vectordb/qdrant-vectordb.d.ts +124 -0
  89. package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -0
  90. package/dist/vectordb/qdrant-vectordb.js +582 -0
  91. package/dist/vectordb/qdrant-vectordb.js.map +1 -0
  92. package/dist/vectordb/sparse/index.d.ts +4 -0
  93. package/dist/vectordb/sparse/index.d.ts.map +1 -0
  94. package/dist/vectordb/sparse/index.js +23 -0
  95. package/dist/vectordb/sparse/index.js.map +1 -0
  96. package/dist/vectordb/sparse/simple-bm25.d.ts +104 -0
  97. package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -0
  98. package/dist/vectordb/sparse/simple-bm25.js +189 -0
  99. package/dist/vectordb/sparse/simple-bm25.js.map +1 -0
  100. package/dist/vectordb/sparse/sparse-vector-generator.d.ts +54 -0
  101. package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -0
  102. package/dist/vectordb/sparse/sparse-vector-generator.js +3 -0
  103. package/dist/vectordb/sparse/sparse-vector-generator.js.map +1 -0
  104. package/dist/vectordb/sparse/types.d.ts +38 -0
  105. package/dist/vectordb/sparse/types.d.ts.map +1 -0
  106. package/dist/vectordb/sparse/types.js +3 -0
  107. package/dist/vectordb/sparse/types.js.map +1 -0
  108. package/dist/vectordb/types.d.ts +120 -0
  109. package/dist/vectordb/types.d.ts.map +1 -0
  110. package/dist/vectordb/types.js +9 -0
  111. package/dist/vectordb/types.js.map +1 -0
  112. package/dist/vectordb/zilliz-utils.d.ts +135 -0
  113. package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
  114. package/dist/vectordb/zilliz-utils.js +192 -0
  115. package/dist/vectordb/zilliz-utils.js.map +1 -0
  116. package/package.json +61 -0
@@ -0,0 +1,1072 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.Context = void 0;
37
+ const splitter_1 = require("./splitter");
38
+ const embedding_1 = require("./embedding");
39
+ const env_manager_1 = require("./utils/env-manager");
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const crypto = __importStar(require("crypto"));
43
+ const synchronizer_1 = require("./sync/synchronizer");
44
+ const DEFAULT_SUPPORTED_EXTENSIONS = [
45
+ // Programming languages
46
+ '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
47
+ '.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
48
+ // Text and markup files
49
+ '.md', '.markdown', '.ipynb',
50
+ // '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
51
+ // '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
52
+ ];
53
+ const DEFAULT_IGNORE_PATTERNS = [
54
+ // Common build output and dependency directories
55
+ 'node_modules/**',
56
+ 'dist/**',
57
+ 'build/**',
58
+ 'out/**',
59
+ 'target/**',
60
+ 'coverage/**',
61
+ '.nyc_output/**',
62
+ // IDE and editor files
63
+ '.vscode/**',
64
+ '.idea/**',
65
+ '*.swp',
66
+ '*.swo',
67
+ // Version control
68
+ '.git/**',
69
+ '.svn/**',
70
+ '.hg/**',
71
+ // Cache directories
72
+ '.cache/**',
73
+ '__pycache__/**',
74
+ '.pytest_cache/**',
75
+ // Logs and temporary files
76
+ 'logs/**',
77
+ 'tmp/**',
78
+ 'temp/**',
79
+ '*.log',
80
+ // Environment and config files
81
+ '.env',
82
+ '.env.*',
83
+ '*.local',
84
+ // Minified and bundled files
85
+ '*.min.js',
86
+ '*.min.css',
87
+ '*.min.map',
88
+ '*.bundle.js',
89
+ '*.bundle.css',
90
+ '*.chunk.js',
91
+ '*.vendor.js',
92
+ '*.polyfills.js',
93
+ '*.runtime.js',
94
+ '*.map', // source map files
95
+ 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
96
+ 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
97
+ 'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
98
+ ];
99
+ class Context {
100
+ constructor(config = {}) {
101
+ this.synchronizers = new Map();
102
+ // Initialize services
103
+ this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
104
+ apiKey: env_manager_1.envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
105
+ model: 'text-embedding-3-small',
106
+ ...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') })
107
+ });
108
+ if (!config.vectorDatabase) {
109
+ throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.');
110
+ }
111
+ this.vectorDatabase = config.vectorDatabase;
112
+ this.codeSplitter = config.codeSplitter || new splitter_1.AstCodeSplitter(2500, 300);
113
+ // Load custom extensions from environment variables
114
+ const envCustomExtensions = this.getCustomExtensionsFromEnv();
115
+ // Combine default extensions with config extensions and env extensions
116
+ const allSupportedExtensions = [
117
+ ...DEFAULT_SUPPORTED_EXTENSIONS,
118
+ ...(config.supportedExtensions || []),
119
+ ...(config.customExtensions || []),
120
+ ...envCustomExtensions
121
+ ];
122
+ // Remove duplicates
123
+ this.supportedExtensions = [...new Set(allSupportedExtensions)];
124
+ // Load custom ignore patterns from environment variables
125
+ const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
126
+ // Start with default ignore patterns
127
+ const allIgnorePatterns = [
128
+ ...DEFAULT_IGNORE_PATTERNS,
129
+ ...(config.ignorePatterns || []),
130
+ ...(config.customIgnorePatterns || []),
131
+ ...envCustomIgnorePatterns
132
+ ];
133
+ // Remove duplicates
134
+ this.ignorePatterns = [...new Set(allIgnorePatterns)];
135
+ console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
136
+ if (envCustomExtensions.length > 0) {
137
+ console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
138
+ }
139
+ if (envCustomIgnorePatterns.length > 0) {
140
+ console.log(`[Context] 🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`);
141
+ }
142
+ }
143
+ /**
144
+ * Get embedding instance
145
+ */
146
+ getEmbedding() {
147
+ return this.embedding;
148
+ }
149
+ /**
150
+ * Get vector database instance
151
+ */
152
+ getVectorDatabase() {
153
+ return this.vectorDatabase;
154
+ }
155
+ /**
156
+ * Get code splitter instance
157
+ */
158
+ getCodeSplitter() {
159
+ return this.codeSplitter;
160
+ }
161
+ /**
162
+ * Get supported extensions
163
+ */
164
+ getSupportedExtensions() {
165
+ return [...this.supportedExtensions];
166
+ }
167
+ /**
168
+ * Get ignore patterns
169
+ */
170
+ getIgnorePatterns() {
171
+ return [...this.ignorePatterns];
172
+ }
173
+ /**
174
+ * Get synchronizers map
175
+ */
176
+ getSynchronizers() {
177
+ return new Map(this.synchronizers);
178
+ }
179
+ /**
180
+ * Set synchronizer for a collection
181
+ */
182
+ setSynchronizer(collectionName, synchronizer) {
183
+ this.synchronizers.set(collectionName, synchronizer);
184
+ }
185
+ /**
186
+ * Public wrapper for loadIgnorePatterns private method
187
+ */
188
+ async getLoadedIgnorePatterns(codebasePath) {
189
+ return this.loadIgnorePatterns(codebasePath);
190
+ }
191
+ /**
192
+ * Public wrapper for prepareCollection private method
193
+ */
194
+ async getPreparedCollection(codebasePath) {
195
+ return this.prepareCollection(codebasePath);
196
+ }
197
+ /**
198
+ * Get isHybrid setting from environment variable with default true
199
+ */
200
+ getIsHybrid() {
201
+ const isHybridEnv = env_manager_1.envManager.get('HYBRID_MODE');
202
+ if (isHybridEnv === undefined || isHybridEnv === null) {
203
+ return true; // Default to true
204
+ }
205
+ return isHybridEnv.toLowerCase() === 'true';
206
+ }
207
+ /**
208
+ * Generate collection name based on codebase path and hybrid mode
209
+ */
210
+ getCollectionName(codebasePath) {
211
+ const isHybrid = this.getIsHybrid();
212
+ const normalizedPath = path.resolve(codebasePath);
213
+ const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
214
+ const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
215
+ return `${prefix}_${hash.substring(0, 8)}`;
216
+ }
217
+ /**
218
+ * Index a codebase for semantic search
219
+ * @param codebasePath Codebase root path
220
+ * @param progressCallback Optional progress callback function
221
+ * @param forceReindex Whether to recreate the collection even if it exists
222
+ * @returns Indexing statistics
223
+ */
224
+ async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
225
+ const isHybrid = this.getIsHybrid();
226
+ const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
227
+ console.log(`[Context] 🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
228
+ // 1. Load ignore patterns from various ignore files
229
+ await this.loadIgnorePatterns(codebasePath);
230
+ // 2. Check and prepare vector collection
231
+ progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
232
+ console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`);
233
+ await this.prepareCollection(codebasePath, forceReindex);
234
+ // 3. Recursively traverse codebase to get all supported files
235
+ progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
236
+ const codeFiles = await this.getCodeFiles(codebasePath);
237
+ console.log(`[Context] 📁 Found ${codeFiles.length} code files`);
238
+ if (codeFiles.length === 0) {
239
+ progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 });
240
+ return { indexedFiles: 0, totalChunks: 0, status: 'completed' };
241
+ }
242
+ // 3. Process each file with streaming chunk processing
243
+ // Reserve 10% for preparation, 90% for actual indexing
244
+ const indexingStartPercentage = 10;
245
+ const indexingEndPercentage = 100;
246
+ const indexingRange = indexingEndPercentage - indexingStartPercentage;
247
+ const result = await this.processFileList(codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => {
248
+ // Calculate progress percentage
249
+ const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange;
250
+ console.log(`[Context] 📊 Processed ${fileIndex}/${totalFiles} files`);
251
+ progressCallback?.({
252
+ phase: `Processing files (${fileIndex}/${totalFiles})...`,
253
+ current: fileIndex,
254
+ total: totalFiles,
255
+ percentage: Math.round(progressPercentage)
256
+ });
257
+ });
258
+ console.log(`[Context] ✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
259
+ progressCallback?.({
260
+ phase: 'Indexing complete!',
261
+ current: result.processedFiles,
262
+ total: codeFiles.length,
263
+ percentage: 100
264
+ });
265
+ return {
266
+ indexedFiles: result.processedFiles,
267
+ totalChunks: result.totalChunks,
268
+ status: result.status
269
+ };
270
+ }
271
+ async reindexByChange(codebasePath, progressCallback) {
272
+ const collectionName = this.getCollectionName(codebasePath);
273
+ const synchronizer = this.synchronizers.get(collectionName);
274
+ if (!synchronizer) {
275
+ // Load project-specific ignore patterns before creating FileSynchronizer
276
+ await this.loadIgnorePatterns(codebasePath);
277
+ // To be safe, let's initialize if it's not there.
278
+ const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
279
+ await newSynchronizer.initialize();
280
+ this.synchronizers.set(collectionName, newSynchronizer);
281
+ }
282
+ const currentSynchronizer = this.synchronizers.get(collectionName);
283
+ progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 });
284
+ const { added, removed, modified } = await currentSynchronizer.checkForChanges();
285
+ const totalChanges = added.length + removed.length + modified.length;
286
+ if (totalChanges === 0) {
287
+ progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 });
288
+ console.log('[Context] ✅ No file changes detected.');
289
+ return { added: 0, removed: 0, modified: 0 };
290
+ }
291
+ console.log(`[Context] 🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
292
+ let processedChanges = 0;
293
+ const updateProgress = (phase) => {
294
+ processedChanges++;
295
+ const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100);
296
+ progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage });
297
+ };
298
+ // Handle removed files
299
+ for (const file of removed) {
300
+ await this.deleteFileChunks(collectionName, file);
301
+ updateProgress(`Removed ${file}`);
302
+ }
303
+ // Handle modified files
304
+ for (const file of modified) {
305
+ await this.deleteFileChunks(collectionName, file);
306
+ updateProgress(`Deleted old chunks for ${file}`);
307
+ }
308
+ // Handle added and modified files
309
+ const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f));
310
+ if (filesToIndex.length > 0) {
311
+ await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
312
+ updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
313
+ });
314
+ }
315
+ console.log(`[Context] ✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
316
+ progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 });
317
+ return { added: added.length, removed: removed.length, modified: modified.length };
318
+ }
319
+ async deleteFileChunks(collectionName, relativePath) {
320
+ // Escape backslashes for Milvus query expression (Windows path compatibility)
321
+ const escapedPath = relativePath.replace(/\\/g, '\\\\');
322
+ const results = await this.vectorDatabase.query(collectionName, `relativePath == "${escapedPath}"`, ['id']);
323
+ if (results.length > 0) {
324
+ const ids = results.map(r => r.id).filter(id => id);
325
+ if (ids.length > 0) {
326
+ await this.vectorDatabase.delete(collectionName, ids);
327
+ console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`);
328
+ }
329
+ }
330
+ }
331
+ /**
332
+ * Semantic search with unified implementation
333
+ * @param codebasePath Codebase path to search in
334
+ * @param query Search query
335
+ * @param topK Number of results to return
336
+ * @param threshold Similarity threshold
337
+ */
338
+ async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
339
+ const isHybrid = this.getIsHybrid();
340
+ const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
341
+ console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
342
+ const collectionName = this.getCollectionName(codebasePath);
343
+ console.log(`[Context] 🔍 Using collection: ${collectionName}`);
344
+ // Check if collection exists and has data
345
+ const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
346
+ if (!hasCollection) {
347
+ console.log(`[Context] ⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
348
+ return [];
349
+ }
350
+ if (isHybrid === true) {
351
+ try {
352
+ // Check collection stats to see if it has data
353
+ const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
354
+ console.log(`[Context] 🔍 Collection '${collectionName}' exists and appears to have data`);
355
+ }
356
+ catch (error) {
357
+ console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
358
+ }
359
+ // 1. Generate query vector
360
+ console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
361
+ const queryEmbedding = await this.embedding.embed(query);
362
+ console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
363
+ console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
364
+ // 2. Prepare hybrid search requests
365
+ const searchRequests = [
366
+ {
367
+ data: queryEmbedding.vector,
368
+ anns_field: "vector",
369
+ param: { "nprobe": 10 },
370
+ limit: topK
371
+ },
372
+ {
373
+ data: query,
374
+ anns_field: "sparse_vector",
375
+ param: { "drop_ratio_search": 0.2 },
376
+ limit: topK
377
+ }
378
+ ];
379
+ console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
380
+ console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
381
+ // 3. Execute hybrid search
382
+ console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
383
+ const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
384
+ rerank: {
385
+ strategy: 'rrf',
386
+ params: { k: 100 }
387
+ },
388
+ limit: topK,
389
+ filterExpr
390
+ });
391
+ console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
392
+ // 4. Convert to semantic search result format
393
+ const results = searchResults.map(result => ({
394
+ content: result.document.content,
395
+ relativePath: result.document.relativePath,
396
+ startLine: result.document.startLine,
397
+ endLine: result.document.endLine,
398
+ language: result.document.metadata.language || 'unknown',
399
+ score: result.score
400
+ }));
401
+ console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
402
+ if (results.length > 0) {
403
+ console.log(`[Context] 🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
404
+ }
405
+ return results;
406
+ }
407
+ else {
408
+ // Regular semantic search
409
+ // 1. Generate query vector
410
+ const queryEmbedding = await this.embedding.embed(query);
411
+ // 2. Search in vector database
412
+ const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr });
413
+ // 3. Convert to semantic search result format
414
+ const results = searchResults.map(result => ({
415
+ content: result.document.content,
416
+ relativePath: result.document.relativePath,
417
+ startLine: result.document.startLine,
418
+ endLine: result.document.endLine,
419
+ language: result.document.metadata.language || 'unknown',
420
+ score: result.score
421
+ }));
422
+ console.log(`[Context] ✅ Found ${results.length} relevant results`);
423
+ return results;
424
+ }
425
+ }
426
+ /**
427
+ * Check if index exists for codebase
428
+ * @param codebasePath Codebase path to check
429
+ * @returns Whether index exists
430
+ */
431
+ async hasIndex(codebasePath) {
432
+ const collectionName = this.getCollectionName(codebasePath);
433
+ return await this.vectorDatabase.hasCollection(collectionName);
434
+ }
435
+ /**
436
+ * Clear index
437
+ * @param codebasePath Codebase path to clear index for
438
+ * @param progressCallback Optional progress callback function
439
+ */
440
+ async clearIndex(codebasePath, progressCallback) {
441
+ console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`);
442
+ progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });
443
+ const collectionName = this.getCollectionName(codebasePath);
444
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
445
+ progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
446
+ if (collectionExists) {
447
+ await this.vectorDatabase.dropCollection(collectionName);
448
+ }
449
+ // Delete snapshot file
450
+ await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
451
+ progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 });
452
+ console.log('[Context] ✅ Index data cleaned');
453
+ }
454
+ /**
455
+ * Update ignore patterns (merges with default patterns and existing patterns)
456
+ * @param ignorePatterns Array of ignore patterns to add to defaults
457
+ */
458
+ updateIgnorePatterns(ignorePatterns) {
459
+ // Merge with default patterns and any existing custom patterns, avoiding duplicates
460
+ const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
461
+ const uniquePatterns = [];
462
+ const patternSet = new Set(mergedPatterns);
463
+ patternSet.forEach(pattern => uniquePatterns.push(pattern));
464
+ this.ignorePatterns = uniquePatterns;
465
+ console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
466
+ }
467
+ /**
468
+ * Add custom ignore patterns (from MCP or other sources) without replacing existing ones
469
+ * @param customPatterns Array of custom ignore patterns to add
470
+ */
471
+ addCustomIgnorePatterns(customPatterns) {
472
+ if (customPatterns.length === 0)
473
+ return;
474
+ // Merge current patterns with new custom patterns, avoiding duplicates
475
+ const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
476
+ const uniquePatterns = [];
477
+ const patternSet = new Set(mergedPatterns);
478
+ patternSet.forEach(pattern => uniquePatterns.push(pattern));
479
+ this.ignorePatterns = uniquePatterns;
480
+ console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
481
+ }
482
+ /**
483
+ * Reset ignore patterns to defaults only
484
+ */
485
+ resetIgnorePatternsToDefaults() {
486
+ this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS];
487
+ console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
488
+ }
489
+ /**
490
+ * Update embedding instance
491
+ * @param embedding New embedding instance
492
+ */
493
+ updateEmbedding(embedding) {
494
+ this.embedding = embedding;
495
+ console.log(`[Context] 🔄 Updated embedding provider: ${embedding.getProvider()}`);
496
+ }
497
+ /**
498
+ * Update vector database instance
499
+ * @param vectorDatabase New vector database instance
500
+ */
501
+ updateVectorDatabase(vectorDatabase) {
502
+ this.vectorDatabase = vectorDatabase;
503
+ console.log(`[Context] 🔄 Updated vector database`);
504
+ }
505
+ /**
506
+ * Update splitter instance
507
+ * @param splitter New splitter instance
508
+ */
509
+ updateSplitter(splitter) {
510
+ this.codeSplitter = splitter;
511
+ console.log(`[Context] 🔄 Updated splitter instance`);
512
+ }
513
+ /**
514
+ * Prepare vector collection
515
+ */
516
+ async prepareCollection(codebasePath, forceReindex = false) {
517
+ const isHybrid = this.getIsHybrid();
518
+ const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
519
+ console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
520
+ const collectionName = this.getCollectionName(codebasePath);
521
+ // Check if collection already exists
522
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
523
+ if (collectionExists && !forceReindex) {
524
+ console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
525
+ return;
526
+ }
527
+ if (collectionExists && forceReindex) {
528
+ console.log(`[Context] 🗑️ Dropping existing collection ${collectionName} for force reindex...`);
529
+ await this.vectorDatabase.dropCollection(collectionName);
530
+ console.log(`[Context] ✅ Collection ${collectionName} dropped successfully`);
531
+ }
532
+ console.log(`[Context] 🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
533
+ const dimension = await this.embedding.detectDimension();
534
+ console.log(`[Context] 📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
535
+ const dirName = path.basename(codebasePath);
536
+ if (isHybrid === true) {
537
+ await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
538
+ }
539
+ else {
540
+ await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
541
+ }
542
+ console.log(`[Context] ✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
543
+ }
544
+ /**
545
+ * Recursively get all code files in the codebase
546
+ */
547
+ async getCodeFiles(codebasePath) {
548
+ const files = [];
549
+ const traverseDirectory = async (currentPath) => {
550
+ const entries = await fs.promises.readdir(currentPath, { withFileTypes: true });
551
+ for (const entry of entries) {
552
+ const fullPath = path.join(currentPath, entry.name);
553
+ // Check if path matches ignore patterns
554
+ if (this.matchesIgnorePattern(fullPath, codebasePath)) {
555
+ continue;
556
+ }
557
+ if (entry.isDirectory()) {
558
+ await traverseDirectory(fullPath);
559
+ }
560
+ else if (entry.isFile()) {
561
+ const ext = path.extname(entry.name);
562
+ if (this.supportedExtensions.includes(ext)) {
563
+ files.push(fullPath);
564
+ }
565
+ }
566
+ }
567
+ };
568
+ await traverseDirectory(codebasePath);
569
+ return files;
570
+ }
571
+ /**
572
+ * Process a list of files with streaming chunk processing
573
+ * @param filePaths Array of file paths to process
574
+ * @param codebasePath Base path for the codebase
575
+ * @param onFileProcessed Callback called when each file is processed
576
+ * @returns Object with processed file count and total chunk count
577
+ */
578
+ async processFileList(filePaths, codebasePath, onFileProcessed) {
579
+ const isHybrid = this.getIsHybrid();
580
+ const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
581
+ const CHUNK_LIMIT = 450000;
582
+ console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
583
+ let chunkBuffer = [];
584
+ let processedFiles = 0;
585
+ let totalChunks = 0;
586
+ let limitReached = false;
587
+ for (let i = 0; i < filePaths.length; i++) {
588
+ const filePath = filePaths[i];
589
+ try {
590
+ const content = await fs.promises.readFile(filePath, 'utf-8');
591
+ const language = this.getLanguageFromExtension(path.extname(filePath));
592
+ const chunks = await this.codeSplitter.split(content, language, filePath);
593
+ // Log files with many chunks or large content
594
+ if (chunks.length > 50) {
595
+ console.warn(`[Context] ⚠️ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`);
596
+ }
597
+ else if (content.length > 100000) {
598
+ console.log(`📄 Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`);
599
+ }
600
+ // Add chunks to buffer
601
+ for (const chunk of chunks) {
602
+ chunkBuffer.push({ chunk, codebasePath });
603
+ totalChunks++;
604
+ // Process batch when buffer reaches EMBEDDING_BATCH_SIZE
605
+ if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) {
606
+ try {
607
+ await this.processChunkBuffer(chunkBuffer);
608
+ }
609
+ catch (error) {
610
+ const searchType = isHybrid === true ? 'hybrid' : 'regular';
611
+ console.error(`[Context] ❌ Failed to process chunk batch for ${searchType}:`, error);
612
+ if (error instanceof Error) {
613
+ console.error('[Context] Stack trace:', error.stack);
614
+ }
615
+ }
616
+ finally {
617
+ chunkBuffer = []; // Always clear buffer, even on failure
618
+ }
619
+ }
620
+ // Check if chunk limit is reached
621
+ if (totalChunks >= CHUNK_LIMIT) {
622
+ console.warn(`[Context] ⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`);
623
+ limitReached = true;
624
+ break; // Exit the inner loop (over chunks)
625
+ }
626
+ }
627
+ processedFiles++;
628
+ onFileProcessed?.(filePath, i + 1, filePaths.length);
629
+ if (limitReached) {
630
+ break; // Exit the outer loop (over files)
631
+ }
632
+ }
633
+ catch (error) {
634
+ console.warn(`[Context] ⚠️ Skipping file ${filePath}: ${error}`);
635
+ }
636
+ }
637
+ // Process any remaining chunks in the buffer
638
+ if (chunkBuffer.length > 0) {
639
+ const searchType = isHybrid === true ? 'hybrid' : 'regular';
640
+ console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
641
+ try {
642
+ await this.processChunkBuffer(chunkBuffer);
643
+ }
644
+ catch (error) {
645
+ console.error(`[Context] ❌ Failed to process final chunk batch for ${searchType}:`, error);
646
+ if (error instanceof Error) {
647
+ console.error('[Context] Stack trace:', error.stack);
648
+ }
649
+ }
650
+ }
651
+ return {
652
+ processedFiles,
653
+ totalChunks,
654
+ status: limitReached ? 'limit_reached' : 'completed'
655
+ };
656
+ }
657
+ /**
658
+ * Process accumulated chunk buffer
659
+ */
660
+ async processChunkBuffer(chunkBuffer) {
661
+ if (chunkBuffer.length === 0)
662
+ return;
663
+ // Extract chunks and ensure they all have the same codebasePath
664
+ const chunks = chunkBuffer.map(item => item.chunk);
665
+ const codebasePath = chunkBuffer[0].codebasePath;
666
+ // Estimate tokens (rough estimation: 1 token ≈ 4 characters)
667
+ const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
668
+ const isHybrid = this.getIsHybrid();
669
+ const searchType = isHybrid === true ? 'hybrid' : 'regular';
670
+ console.log(`[Context] 🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
671
+ await this.processChunkBatch(chunks, codebasePath);
672
+ }
673
+ /**
674
+ * Process a batch of chunks
675
+ */
676
+ async processChunkBatch(chunks, codebasePath) {
677
+ const isHybrid = this.getIsHybrid();
678
+ // Generate embedding vectors
679
+ const chunkContents = chunks.map(chunk => chunk.content);
680
+ const embeddings = await this.embedding.embedBatch(chunkContents);
681
+ if (isHybrid === true) {
682
+ // Create hybrid vector documents
683
+ const documents = chunks.map((chunk, index) => {
684
+ if (!chunk.metadata.filePath) {
685
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
686
+ }
687
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
688
+ const fileExtension = path.extname(chunk.metadata.filePath);
689
+ const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
690
+ return {
691
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
692
+ content: chunk.content, // Full text content for BM25 and storage
693
+ vector: embeddings[index].vector, // Dense vector
694
+ relativePath,
695
+ startLine: chunk.metadata.startLine || 0,
696
+ endLine: chunk.metadata.endLine || 0,
697
+ fileExtension,
698
+ metadata: {
699
+ ...restMetadata,
700
+ codebasePath,
701
+ language: chunk.metadata.language || 'unknown',
702
+ chunkIndex: index
703
+ }
704
+ };
705
+ });
706
+ // Store to vector database
707
+ await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
708
+ }
709
+ else {
710
+ // Create regular vector documents
711
+ const documents = chunks.map((chunk, index) => {
712
+ if (!chunk.metadata.filePath) {
713
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
714
+ }
715
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
716
+ const fileExtension = path.extname(chunk.metadata.filePath);
717
+ const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
718
+ return {
719
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
720
+ vector: embeddings[index].vector,
721
+ content: chunk.content,
722
+ relativePath,
723
+ startLine: chunk.metadata.startLine || 0,
724
+ endLine: chunk.metadata.endLine || 0,
725
+ fileExtension,
726
+ metadata: {
727
+ ...restMetadata,
728
+ codebasePath,
729
+ language: chunk.metadata.language || 'unknown',
730
+ chunkIndex: index
731
+ }
732
+ };
733
+ });
734
+ // Store to vector database
735
+ await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
736
+ }
737
+ }
738
+ /**
739
+ * Get programming language based on file extension
740
+ */
741
+ getLanguageFromExtension(ext) {
742
+ const languageMap = {
743
+ '.ts': 'typescript',
744
+ '.tsx': 'typescript',
745
+ '.js': 'javascript',
746
+ '.jsx': 'javascript',
747
+ '.py': 'python',
748
+ '.java': 'java',
749
+ '.cpp': 'cpp',
750
+ '.c': 'c',
751
+ '.h': 'c',
752
+ '.hpp': 'cpp',
753
+ '.cs': 'csharp',
754
+ '.go': 'go',
755
+ '.rs': 'rust',
756
+ '.php': 'php',
757
+ '.rb': 'ruby',
758
+ '.swift': 'swift',
759
+ '.kt': 'kotlin',
760
+ '.scala': 'scala',
761
+ '.m': 'objective-c',
762
+ '.mm': 'objective-c',
763
+ '.ipynb': 'jupyter'
764
+ };
765
+ return languageMap[ext] || 'text';
766
+ }
767
+ /**
768
+ * Generate unique ID based on chunk content and location
769
+ * @param relativePath Relative path to the file
770
+ * @param startLine Start line number
771
+ * @param endLine End line number
772
+ * @param content Chunk content
773
+ * @returns Hash-based unique ID
774
+ */
775
+ generateId(relativePath, startLine, endLine, content) {
776
+ const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`;
777
+ const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex');
778
+ return `chunk_${hash.substring(0, 16)}`;
779
+ }
780
+ /**
781
+ * Read ignore patterns from file (e.g., .gitignore)
782
+ * @param filePath Path to the ignore file
783
+ * @returns Array of ignore patterns
784
+ */
785
+ static async getIgnorePatternsFromFile(filePath) {
786
+ try {
787
+ const content = await fs.promises.readFile(filePath, 'utf-8');
788
+ return content
789
+ .split('\n')
790
+ .map(line => line.trim())
791
+ .filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments
792
+ }
793
+ catch (error) {
794
+ console.warn(`[Context] ⚠️ Could not read ignore file ${filePath}: ${error}`);
795
+ return [];
796
+ }
797
+ }
798
+ /**
799
+ * Load ignore patterns from various ignore files in the codebase
800
+ * This method preserves any existing custom patterns that were added before
801
+ * @param codebasePath Path to the codebase
802
+ */
803
+ async loadIgnorePatterns(codebasePath) {
804
+ try {
805
+ let fileBasedPatterns = [];
806
+ // Load all .xxxignore files in codebase directory
807
+ const ignoreFiles = await this.findIgnoreFiles(codebasePath);
808
+ for (const ignoreFile of ignoreFiles) {
809
+ const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
810
+ fileBasedPatterns.push(...patterns);
811
+ }
812
+ // Load global ~/.context/.contextignore
813
+ const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
814
+ fileBasedPatterns.push(...globalIgnorePatterns);
815
+ // Merge file-based patterns with existing patterns (which may include custom MCP patterns)
816
+ if (fileBasedPatterns.length > 0) {
817
+ this.addCustomIgnorePatterns(fileBasedPatterns);
818
+ console.log(`[Context] 🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
819
+ }
820
+ else {
821
+ console.log('📄 No ignore files found, keeping existing patterns');
822
+ }
823
+ }
824
+ catch (error) {
825
+ console.warn(`[Context] ⚠️ Failed to load ignore patterns: ${error}`);
826
+ // Continue with existing patterns on error - don't reset them
827
+ }
828
+ }
829
+ /**
830
+ * Find all .xxxignore files in the codebase directory
831
+ * @param codebasePath Path to the codebase
832
+ * @returns Array of ignore file paths
833
+ */
834
+ async findIgnoreFiles(codebasePath) {
835
+ try {
836
+ const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true });
837
+ const ignoreFiles = [];
838
+ for (const entry of entries) {
839
+ if (entry.isFile() &&
840
+ entry.name.startsWith('.') &&
841
+ entry.name.endsWith('ignore')) {
842
+ ignoreFiles.push(path.join(codebasePath, entry.name));
843
+ }
844
+ }
845
+ if (ignoreFiles.length > 0) {
846
+ console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
847
+ }
848
+ return ignoreFiles;
849
+ }
850
+ catch (error) {
851
+ console.warn(`[Context] ⚠️ Failed to scan for ignore files: ${error}`);
852
+ return [];
853
+ }
854
+ }
855
+ /**
856
+ * Load global ignore file from ~/.context/.contextignore
857
+ * @returns Array of ignore patterns
858
+ */
859
+ async loadGlobalIgnoreFile() {
860
+ try {
861
+ const homeDir = require('os').homedir();
862
+ const globalIgnorePath = path.join(homeDir, '.context', '.contextignore');
863
+ return await this.loadIgnoreFile(globalIgnorePath, 'global .contextignore');
864
+ }
865
+ catch (error) {
866
+ // Global ignore file is optional, don't log warnings
867
+ return [];
868
+ }
869
+ }
870
+ /**
871
+ * Load ignore patterns from a specific ignore file
872
+ * @param filePath Path to the ignore file
873
+ * @param fileName Display name for logging
874
+ * @returns Array of ignore patterns
875
+ */
876
+ async loadIgnoreFile(filePath, fileName) {
877
+ try {
878
+ await fs.promises.access(filePath);
879
+ console.log(`📄 Found ${fileName} file at: ${filePath}`);
880
+ const ignorePatterns = await Context.getIgnorePatternsFromFile(filePath);
881
+ if (ignorePatterns.length > 0) {
882
+ console.log(`[Context] 🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`);
883
+ return ignorePatterns;
884
+ }
885
+ else {
886
+ console.log(`📄 ${fileName} file found but no valid patterns detected`);
887
+ return [];
888
+ }
889
+ }
890
+ catch (error) {
891
+ if (fileName.includes('global')) {
892
+ console.log(`📄 No ${fileName} file found`);
893
+ }
894
+ return [];
895
+ }
896
+ }
897
+ /**
898
+ * Check if a path matches any ignore pattern
899
+ * @param filePath Path to check
900
+ * @param basePath Base path for relative pattern matching
901
+ * @returns True if path should be ignored
902
+ */
903
+ matchesIgnorePattern(filePath, basePath) {
904
+ if (this.ignorePatterns.length === 0) {
905
+ return false;
906
+ }
907
+ const relativePath = path.relative(basePath, filePath);
908
+ const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators
909
+ for (const pattern of this.ignorePatterns) {
910
+ if (this.isPatternMatch(normalizedPath, pattern)) {
911
+ return true;
912
+ }
913
+ }
914
+ return false;
915
+ }
916
+ /**
917
+ * Simple glob pattern matching
918
+ * @param filePath File path to test
919
+ * @param pattern Glob pattern
920
+ * @returns True if pattern matches
921
+ */
922
+ isPatternMatch(filePath, pattern) {
923
+ // Handle directory patterns (ending with /)
924
+ if (pattern.endsWith('/')) {
925
+ const dirPattern = pattern.slice(0, -1);
926
+ const pathParts = filePath.split('/');
927
+ return pathParts.some(part => this.simpleGlobMatch(part, dirPattern));
928
+ }
929
+ // Handle file patterns
930
+ if (pattern.includes('/')) {
931
+ // Pattern with path separator - match exact path
932
+ return this.simpleGlobMatch(filePath, pattern);
933
+ }
934
+ else {
935
+ // Pattern without path separator - match filename in any directory
936
+ const fileName = path.basename(filePath);
937
+ return this.simpleGlobMatch(fileName, pattern);
938
+ }
939
+ }
940
+ /**
941
+ * Simple glob matching supporting * wildcard
942
+ * @param text Text to test
943
+ * @param pattern Pattern with * wildcards
944
+ * @returns True if pattern matches
945
+ */
946
+ simpleGlobMatch(text, pattern) {
947
+ // Convert glob pattern to regex
948
+ const regexPattern = pattern
949
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except *
950
+ .replace(/\*/g, '.*'); // Convert * to .*
951
+ const regex = new RegExp(`^${regexPattern}$`);
952
+ return regex.test(text);
953
+ }
954
+ /**
955
+ * Get custom extensions from environment variables
956
+ * Supports CUSTOM_EXTENSIONS as comma-separated list
957
+ * @returns Array of custom extensions
958
+ */
959
+ getCustomExtensionsFromEnv() {
960
+ const envExtensions = env_manager_1.envManager.get('CUSTOM_EXTENSIONS');
961
+ if (!envExtensions) {
962
+ return [];
963
+ }
964
+ try {
965
+ const extensions = envExtensions
966
+ .split(',')
967
+ .map(ext => ext.trim())
968
+ .filter(ext => ext.length > 0)
969
+ .map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
970
+ return extensions;
971
+ }
972
+ catch (error) {
973
+ console.warn(`[Context] ⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`);
974
+ return [];
975
+ }
976
+ }
977
+ /**
978
+ * Get custom ignore patterns from environment variables
979
+ * Supports CUSTOM_IGNORE_PATTERNS as comma-separated list
980
+ * @returns Array of custom ignore patterns
981
+ */
982
+ getCustomIgnorePatternsFromEnv() {
983
+ const envIgnorePatterns = env_manager_1.envManager.get('CUSTOM_IGNORE_PATTERNS');
984
+ if (!envIgnorePatterns) {
985
+ return [];
986
+ }
987
+ try {
988
+ const patterns = envIgnorePatterns
989
+ .split(',')
990
+ .map(pattern => pattern.trim())
991
+ .filter(pattern => pattern.length > 0);
992
+ return patterns;
993
+ }
994
+ catch (error) {
995
+ console.warn(`[Context] ⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`);
996
+ return [];
997
+ }
998
+ }
999
+ /**
1000
+ * Add custom extensions (from MCP or other sources) without replacing existing ones
1001
+ * @param customExtensions Array of custom extensions to add
1002
+ */
1003
+ addCustomExtensions(customExtensions) {
1004
+ if (customExtensions.length === 0)
1005
+ return;
1006
+ // Ensure extensions start with dot
1007
+ const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}`);
1008
+ // Merge current extensions with new custom extensions, avoiding duplicates
1009
+ const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions];
1010
+ const uniqueExtensions = [...new Set(mergedExtensions)];
1011
+ this.supportedExtensions = uniqueExtensions;
1012
+ console.log(`[Context] 📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`);
1013
+ }
1014
+ /**
1015
+ * Get current splitter information
1016
+ */
1017
+ getSplitterInfo() {
1018
+ const splitterName = this.codeSplitter.constructor.name;
1019
+ if (splitterName === 'AstCodeSplitter') {
1020
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
1021
+ return {
1022
+ type: 'ast',
1023
+ hasBuiltinFallback: true,
1024
+ supportedLanguages: AstCodeSplitter.getSupportedLanguages()
1025
+ };
1026
+ }
1027
+ else {
1028
+ return {
1029
+ type: 'langchain',
1030
+ hasBuiltinFallback: false
1031
+ };
1032
+ }
1033
+ }
1034
+ /**
1035
+ * Check if current splitter supports a specific language
1036
+ * @param language Programming language
1037
+ */
1038
+ isLanguageSupported(language) {
1039
+ const splitterName = this.codeSplitter.constructor.name;
1040
+ if (splitterName === 'AstCodeSplitter') {
1041
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
1042
+ return AstCodeSplitter.isLanguageSupported(language);
1043
+ }
1044
+ // LangChain splitter supports most languages
1045
+ return true;
1046
+ }
1047
+ /**
1048
+ * Get which strategy would be used for a specific language
1049
+ * @param language Programming language
1050
+ */
1051
+ getSplitterStrategyForLanguage(language) {
1052
+ const splitterName = this.codeSplitter.constructor.name;
1053
+ if (splitterName === 'AstCodeSplitter') {
1054
+ const { AstCodeSplitter } = require('./splitter/ast-splitter');
1055
+ const isSupported = AstCodeSplitter.isLanguageSupported(language);
1056
+ return {
1057
+ strategy: isSupported ? 'ast' : 'langchain',
1058
+ reason: isSupported
1059
+ ? 'Language supported by AST parser'
1060
+ : 'Language not supported by AST, will fallback to LangChain'
1061
+ };
1062
+ }
1063
+ else {
1064
+ return {
1065
+ strategy: 'langchain',
1066
+ reason: 'Using LangChain splitter directly'
1067
+ };
1068
+ }
1069
+ }
1070
+ }
1071
+ exports.Context = Context;
1072
+ //# sourceMappingURL=context.js.map