@mcampa/ai-context-core 0.0.1-beta.05e8984

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +354 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/context.d.ts +276 -0
  5. package/dist/context.d.ts.map +1 -0
  6. package/dist/context.js +1177 -0
  7. package/dist/context.js.map +1 -0
  8. package/dist/embedding/base-embedding.d.ts +51 -0
  9. package/dist/embedding/base-embedding.d.ts.map +1 -0
  10. package/dist/embedding/base-embedding.js +36 -0
  11. package/dist/embedding/base-embedding.js.map +1 -0
  12. package/dist/embedding/gemini-embedding.d.ts +53 -0
  13. package/dist/embedding/gemini-embedding.d.ts.map +1 -0
  14. package/dist/embedding/gemini-embedding.js +154 -0
  15. package/dist/embedding/gemini-embedding.js.map +1 -0
  16. package/dist/embedding/index.d.ts +6 -0
  17. package/dist/embedding/index.d.ts.map +1 -0
  18. package/dist/embedding/index.js +24 -0
  19. package/dist/embedding/index.js.map +1 -0
  20. package/dist/embedding/ollama-embedding.d.ts +55 -0
  21. package/dist/embedding/ollama-embedding.d.ts.map +1 -0
  22. package/dist/embedding/ollama-embedding.js +193 -0
  23. package/dist/embedding/ollama-embedding.js.map +1 -0
  24. package/dist/embedding/openai-embedding.d.ts +36 -0
  25. package/dist/embedding/openai-embedding.d.ts.map +1 -0
  26. package/dist/embedding/openai-embedding.js +161 -0
  27. package/dist/embedding/openai-embedding.js.map +1 -0
  28. package/dist/embedding/voyageai-embedding.d.ts +44 -0
  29. package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
  30. package/dist/embedding/voyageai-embedding.js +227 -0
  31. package/dist/embedding/voyageai-embedding.js.map +1 -0
  32. package/dist/index.d.ts +8 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +24 -0
  35. package/dist/index.js.map +1 -0
  36. package/dist/splitter/ast-splitter.d.ts +22 -0
  37. package/dist/splitter/ast-splitter.d.ts.map +1 -0
  38. package/dist/splitter/ast-splitter.js +308 -0
  39. package/dist/splitter/ast-splitter.js.map +1 -0
  40. package/dist/splitter/index.d.ts +41 -0
  41. package/dist/splitter/index.d.ts.map +1 -0
  42. package/dist/splitter/index.js +27 -0
  43. package/dist/splitter/index.js.map +1 -0
  44. package/dist/splitter/langchain-splitter.d.ts +13 -0
  45. package/dist/splitter/langchain-splitter.d.ts.map +1 -0
  46. package/dist/splitter/langchain-splitter.js +118 -0
  47. package/dist/splitter/langchain-splitter.js.map +1 -0
  48. package/dist/sync/merkle.d.ts +30 -0
  49. package/dist/sync/merkle.d.ts.map +1 -0
  50. package/dist/sync/merkle.js +112 -0
  51. package/dist/sync/merkle.js.map +1 -0
  52. package/dist/sync/synchronizer.d.ts +30 -0
  53. package/dist/sync/synchronizer.d.ts.map +1 -0
  54. package/dist/sync/synchronizer.js +347 -0
  55. package/dist/sync/synchronizer.js.map +1 -0
  56. package/dist/types.d.ts +14 -0
  57. package/dist/types.d.ts.map +1 -0
  58. package/dist/types.js +3 -0
  59. package/dist/types.js.map +1 -0
  60. package/dist/utils/env-manager.d.ts +19 -0
  61. package/dist/utils/env-manager.d.ts.map +1 -0
  62. package/dist/utils/env-manager.js +125 -0
  63. package/dist/utils/env-manager.js.map +1 -0
  64. package/dist/utils/index.d.ts +2 -0
  65. package/dist/utils/index.d.ts.map +1 -0
  66. package/dist/utils/index.js +7 -0
  67. package/dist/utils/index.js.map +1 -0
  68. package/dist/vectordb/index.d.ts +5 -0
  69. package/dist/vectordb/index.d.ts.map +1 -0
  70. package/dist/vectordb/index.js +14 -0
  71. package/dist/vectordb/index.js.map +1 -0
  72. package/dist/vectordb/milvus-restful-vectordb.d.ts +75 -0
  73. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
  74. package/dist/vectordb/milvus-restful-vectordb.js +728 -0
  75. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
  76. package/dist/vectordb/milvus-vectordb.d.ts +60 -0
  77. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
  78. package/dist/vectordb/milvus-vectordb.js +662 -0
  79. package/dist/vectordb/milvus-vectordb.js.map +1 -0
  80. package/dist/vectordb/types.d.ts +120 -0
  81. package/dist/vectordb/types.d.ts.map +1 -0
  82. package/dist/vectordb/types.js +9 -0
  83. package/dist/vectordb/types.js.map +1 -0
  84. package/dist/vectordb/zilliz-utils.d.ts +135 -0
  85. package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
  86. package/dist/vectordb/zilliz-utils.js +197 -0
  87. package/dist/vectordb/zilliz-utils.js.map +1 -0
  88. package/package.json +58 -0
@@ -0,0 +1,1177 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.Context = void 0;
37
+ const splitter_1 = require("./splitter");
38
+ const embedding_1 = require("./embedding");
39
+ const env_manager_1 = require("./utils/env-manager");
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const crypto = __importStar(require("crypto"));
43
+ const synchronizer_1 = require("./sync/synchronizer");
44
+ const DEFAULT_SUPPORTED_EXTENSIONS = [
45
+ // Programming languages
46
+ ".ts",
47
+ ".tsx",
48
+ ".js",
49
+ ".jsx",
50
+ ".py",
51
+ ".java",
52
+ ".cpp",
53
+ ".c",
54
+ ".h",
55
+ ".hpp",
56
+ ".cs",
57
+ ".go",
58
+ ".rs",
59
+ ".php",
60
+ ".rb",
61
+ ".swift",
62
+ ".kt",
63
+ ".scala",
64
+ ".m",
65
+ ".mm",
66
+ // Text and markup files
67
+ ".md",
68
+ ".markdown",
69
+ ".ipynb",
70
+ // '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
71
+ // '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
72
+ ];
73
+ const DEFAULT_IGNORE_PATTERNS = [
74
+ // Common build output and dependency directories
75
+ "node_modules/**",
76
+ "dist/**",
77
+ "build/**",
78
+ "out/**",
79
+ "target/**",
80
+ "coverage/**",
81
+ ".nyc_output/**",
82
+ // IDE and editor files
83
+ ".vscode/**",
84
+ ".idea/**",
85
+ "*.swp",
86
+ "*.swo",
87
+ // Version control
88
+ ".git/**",
89
+ ".svn/**",
90
+ ".hg/**",
91
+ // Cache directories
92
+ ".cache/**",
93
+ "__pycache__/**",
94
+ ".pytest_cache/**",
95
+ // Logs and temporary files
96
+ "logs/**",
97
+ "tmp/**",
98
+ "temp/**",
99
+ "*.log",
100
+ // Environment and config files
101
+ ".env",
102
+ ".env.*",
103
+ "*.local",
104
+ // Minified and bundled files
105
+ "*.min.js",
106
+ "*.min.css",
107
+ "*.min.map",
108
+ "*.bundle.js",
109
+ "*.bundle.css",
110
+ "*.chunk.js",
111
+ "*.vendor.js",
112
+ "*.polyfills.js",
113
+ "*.runtime.js",
114
+ "*.map", // source map files
115
+ "node_modules",
116
+ ".git",
117
+ ".svn",
118
+ ".hg",
119
+ "build",
120
+ "dist",
121
+ "out",
122
+ "target",
123
+ ".vscode",
124
+ ".idea",
125
+ "__pycache__",
126
+ ".pytest_cache",
127
+ "coverage",
128
+ ".nyc_output",
129
+ "logs",
130
+ "tmp",
131
+ "temp",
132
+ ];
133
+ class Context {
134
+ constructor(config) {
135
+ this.synchronizers = new Map();
136
+ this.name = config.name || "my-context";
137
+ // Initialize services
138
+ this.embedding =
139
+ config.embedding ||
140
+ new embedding_1.OpenAIEmbedding({
141
+ apiKey: env_manager_1.envManager.get("OPENAI_API_KEY") || "your-openai-api-key",
142
+ model: "text-embedding-3-small",
143
+ ...(env_manager_1.envManager.get("OPENAI_BASE_URL") && {
144
+ baseURL: env_manager_1.envManager.get("OPENAI_BASE_URL"),
145
+ }),
146
+ });
147
+ if (!config.vectorDatabase) {
148
+ throw new Error("VectorDatabase is required. Please provide a vectorDatabase instance in the config.");
149
+ }
150
+ this.vectorDatabase = config.vectorDatabase;
151
+ this.codeSplitter = config.codeSplitter || new splitter_1.AstCodeSplitter(2500, 300);
152
+ // Load custom extensions from environment variables
153
+ const envCustomExtensions = this.getCustomExtensionsFromEnv();
154
+ // If supportedExtensions is provided, use it as the base (not defaults)
155
+ // Otherwise use defaults
156
+ const baseExtensions = config.supportedExtensions || DEFAULT_SUPPORTED_EXTENSIONS;
157
+ const allSupportedExtensions = [
158
+ ...baseExtensions,
159
+ ...(config.customExtensions || []),
160
+ ...envCustomExtensions,
161
+ ];
162
+ // Remove duplicates
163
+ this.supportedExtensions = [...new Set(allSupportedExtensions)];
164
+ // Load custom ignore patterns from environment variables
165
+ const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
166
+ // If ignorePatterns is provided, use it as the base (not defaults)
167
+ // Otherwise use defaults
168
+ const baseIgnorePatterns = config.ignorePatterns || DEFAULT_IGNORE_PATTERNS;
169
+ const allIgnorePatterns = [
170
+ ...baseIgnorePatterns,
171
+ ...(config.customIgnorePatterns || []),
172
+ ...envCustomIgnorePatterns,
173
+ ];
174
+ // Remove duplicates
175
+ this.ignorePatterns = [...new Set(allIgnorePatterns)];
176
+ console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
177
+ if (envCustomExtensions.length > 0) {
178
+ console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(", ")}`);
179
+ }
180
+ if (envCustomIgnorePatterns.length > 0) {
181
+ console.log(`[Context] 🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(", ")}`);
182
+ }
183
+ }
184
+ /**
185
+ * Get embedding instance
186
+ */
187
+ getEmbedding() {
188
+ return this.embedding;
189
+ }
190
+ /**
191
+ * Get vector database instance
192
+ */
193
+ getVectorDatabase() {
194
+ return this.vectorDatabase;
195
+ }
196
+ /**
197
+ * Get code splitter instance
198
+ */
199
+ getCodeSplitter() {
200
+ return this.codeSplitter;
201
+ }
202
+ /**
203
+ * Get supported extensions
204
+ */
205
+ getSupportedExtensions() {
206
+ return [...this.supportedExtensions];
207
+ }
208
+ /**
209
+ * Get ignore patterns
210
+ */
211
+ getIgnorePatterns() {
212
+ return [...this.ignorePatterns];
213
+ }
214
+ /**
215
+ * Get synchronizers map
216
+ */
217
+ getSynchronizers() {
218
+ return new Map(this.synchronizers);
219
+ }
220
+ /**
221
+ * Set synchronizer for a collection
222
+ */
223
+ setSynchronizer(collectionName, synchronizer) {
224
+ this.synchronizers.set(collectionName, synchronizer);
225
+ }
226
+ /**
227
+ * Public wrapper for loadIgnorePatterns private method
228
+ */
229
+ async getLoadedIgnorePatterns(codebasePath) {
230
+ return this.loadIgnorePatterns(codebasePath);
231
+ }
232
+ /**
233
+ * Public wrapper for prepareCollection private method
234
+ */
235
+ async getPreparedCollection(codebasePath) {
236
+ return this.prepareCollection(codebasePath);
237
+ }
238
+ /**
239
+ * Get isHybrid setting from environment variable with default true
240
+ */
241
+ getIsHybrid() {
242
+ const isHybridEnv = env_manager_1.envManager.get("HYBRID_MODE");
243
+ if (isHybridEnv === undefined || isHybridEnv === null) {
244
+ return true; // Default to true
245
+ }
246
+ return isHybridEnv.toLowerCase() === "true";
247
+ }
248
+ /**
249
+ * Generate collection name based on codebase path and hybrid mode
250
+ */
251
+ getCollectionName() {
252
+ const isHybrid = this.getIsHybrid();
253
+ const prefix = isHybrid === true ? "hybrid_code_chunks" : "code_chunks";
254
+ return `${prefix}_${this.name}`;
255
+ }
256
+ /**
257
+ * Index a codebase for semantic search
258
+ * @param codebasePath Codebase root path
259
+ * @param progressCallback Optional progress callback function
260
+ * @param forceReindex Whether to recreate the collection even if it exists
261
+ * @returns Indexing statistics
262
+ */
263
+ async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
264
+ const isHybrid = this.getIsHybrid();
265
+ const searchType = isHybrid === true ? "hybrid search" : "semantic search";
266
+ console.log(`[Context] 🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
267
+ // 1. Load ignore patterns from various ignore files
268
+ await this.loadIgnorePatterns(codebasePath);
269
+ // 2. Check and prepare vector collection
270
+ progressCallback?.({
271
+ phase: "Preparing collection...",
272
+ current: 0,
273
+ total: 100,
274
+ percentage: 0,
275
+ });
276
+ console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? " (FORCE REINDEX)" : ""}`);
277
+ await this.prepareCollection(codebasePath, forceReindex);
278
+ // 3. Recursively traverse codebase to get all supported files
279
+ progressCallback?.({
280
+ phase: "Scanning files...",
281
+ current: 5,
282
+ total: 100,
283
+ percentage: 5,
284
+ });
285
+ const codeFiles = await this.getCodeFiles(codebasePath);
286
+ console.log(`[Context] 📁 Found ${codeFiles.length} code files`);
287
+ if (codeFiles.length === 0) {
288
+ progressCallback?.({
289
+ phase: "No files to index",
290
+ current: 100,
291
+ total: 100,
292
+ percentage: 100,
293
+ });
294
+ return { indexedFiles: 0, totalChunks: 0, status: "completed" };
295
+ }
296
+ // 3. Process each file with streaming chunk processing
297
+ // Reserve 10% for preparation, 90% for actual indexing
298
+ const indexingStartPercentage = 10;
299
+ const indexingEndPercentage = 100;
300
+ const indexingRange = indexingEndPercentage - indexingStartPercentage;
301
+ const result = await this.processFileList(codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => {
302
+ // Calculate progress percentage
303
+ const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange;
304
+ console.log(`[Context] 📊 Processed ${fileIndex}/${totalFiles} files`);
305
+ progressCallback?.({
306
+ phase: `Processing files (${fileIndex}/${totalFiles})...`,
307
+ current: fileIndex,
308
+ total: totalFiles,
309
+ percentage: Math.round(progressPercentage),
310
+ });
311
+ });
312
+ console.log(`[Context] ✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
313
+ progressCallback?.({
314
+ phase: "Indexing complete!",
315
+ current: result.processedFiles,
316
+ total: codeFiles.length,
317
+ percentage: 100,
318
+ });
319
+ return {
320
+ indexedFiles: result.processedFiles,
321
+ totalChunks: result.totalChunks,
322
+ status: result.status,
323
+ };
324
+ }
325
+ async reindexByChange(codebasePath, progressCallback) {
326
+ const collectionName = this.getCollectionName();
327
+ const synchronizer = this.synchronizers.get(collectionName);
328
+ if (!synchronizer) {
329
+ // Load project-specific ignore patterns before creating FileSynchronizer
330
+ await this.loadIgnorePatterns(codebasePath);
331
+ // To be safe, let's initialize if it's not there.
332
+ const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
333
+ await newSynchronizer.initialize();
334
+ this.synchronizers.set(collectionName, newSynchronizer);
335
+ }
336
+ const currentSynchronizer = this.synchronizers.get(collectionName);
337
+ progressCallback?.({
338
+ phase: "Checking for file changes...",
339
+ current: 0,
340
+ total: 100,
341
+ percentage: 0,
342
+ });
343
+ const { added, removed, modified } = await currentSynchronizer.checkForChanges();
344
+ const totalChanges = added.length + removed.length + modified.length;
345
+ if (totalChanges === 0) {
346
+ progressCallback?.({
347
+ phase: "No changes detected",
348
+ current: 100,
349
+ total: 100,
350
+ percentage: 100,
351
+ });
352
+ console.log("[Context] ✅ No file changes detected.");
353
+ return { added: 0, removed: 0, modified: 0 };
354
+ }
355
+ console.log(`[Context] 🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
356
+ let processedChanges = 0;
357
+ const updateProgress = (phase) => {
358
+ processedChanges++;
359
+ const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) *
360
+ 100);
361
+ progressCallback?.({
362
+ phase,
363
+ current: processedChanges,
364
+ total: totalChanges,
365
+ percentage,
366
+ });
367
+ };
368
+ // Handle removed files
369
+ for (const file of removed) {
370
+ await this.deleteFileChunks(collectionName, file);
371
+ updateProgress(`Removed ${file}`);
372
+ }
373
+ // Handle modified files
374
+ for (const file of modified) {
375
+ await this.deleteFileChunks(collectionName, file);
376
+ updateProgress(`Deleted old chunks for ${file}`);
377
+ }
378
+ // Handle added and modified files
379
+ const filesToIndex = [...added, ...modified].map((f) => path.join(codebasePath, f));
380
+ if (filesToIndex.length > 0) {
381
+ await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
382
+ updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
383
+ });
384
+ }
385
+ console.log(`[Context] ✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
386
+ progressCallback?.({
387
+ phase: "Re-indexing complete!",
388
+ current: totalChanges,
389
+ total: totalChanges,
390
+ percentage: 100,
391
+ });
392
+ return {
393
+ added: added.length,
394
+ removed: removed.length,
395
+ modified: modified.length,
396
+ };
397
+ }
398
+ async deleteFileChunks(collectionName, relativePath) {
399
+ // Escape backslashes for Milvus query expression (Windows path compatibility)
400
+ const escapedPath = relativePath.replace(/\\/g, "\\\\");
401
+ const results = await this.vectorDatabase.query(collectionName, `relativePath == "${escapedPath}"`, ["id"]);
402
+ if (results.length > 0) {
403
+ const ids = results.map((r) => r.id).filter((id) => id);
404
+ if (ids.length > 0) {
405
+ await this.vectorDatabase.delete(collectionName, ids);
406
+ console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`);
407
+ }
408
+ }
409
+ }
410
+ /**
411
+ * Semantic search with unified implementation
412
+ * @param query Search query
413
+ * @param topK Number of results to return
414
+ * @param threshold Similarity threshold
415
+ */
416
+ async semanticSearch(query, topK = 5, threshold = 0.5, filterExpr) {
417
+ const isHybrid = this.getIsHybrid();
418
+ const searchType = isHybrid === true ? "hybrid search" : "semantic search";
419
+ console.log(`[Context] 🔍 Executing ${searchType}: "${query}"`);
420
+ const collectionName = this.getCollectionName();
421
+ console.log(`[Context] 🔍 Using collection: ${collectionName}`);
422
+ // Check if collection exists and has data
423
+ const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
424
+ if (!hasCollection) {
425
+ console.log(`[Context] ⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
426
+ return [];
427
+ }
428
+ if (isHybrid === true) {
429
+ try {
430
+ // Check collection stats to see if it has data
431
+ const _stats = await this.vectorDatabase.query(collectionName, "", ["id"], 1);
432
+ console.log(`[Context] 🔍 Collection '${collectionName}' exists and appears to have data`);
433
+ }
434
+ catch (error) {
435
+ console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
436
+ }
437
+ // 1. Generate query vector
438
+ console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
439
+ const queryEmbedding = await this.embedding.embed(query);
440
+ console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
441
+ console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(", ")}]`);
442
+ // 2. Prepare hybrid search requests
443
+ const searchRequests = [
444
+ {
445
+ data: queryEmbedding.vector,
446
+ anns_field: "vector",
447
+ param: { nprobe: 10 },
448
+ limit: topK,
449
+ },
450
+ {
451
+ data: query,
452
+ anns_field: "sparse_vector",
453
+ param: { drop_ratio_search: 0.2 },
454
+ limit: topK,
455
+ },
456
+ ];
457
+ console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
458
+ console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
459
+ // 3. Execute hybrid search
460
+ console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
461
+ const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
462
+ rerank: {
463
+ strategy: "rrf",
464
+ params: { k: 100 },
465
+ },
466
+ limit: topK,
467
+ filterExpr,
468
+ });
469
+ console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
470
+ // 4. Convert to semantic search result format
471
+ const results = searchResults.map((result) => ({
472
+ content: result.document.content,
473
+ relativePath: result.document.relativePath,
474
+ startLine: result.document.startLine,
475
+ endLine: result.document.endLine,
476
+ language: String(result.document.metadata.language || "unknown"),
477
+ score: result.score,
478
+ }));
479
+ console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
480
+ if (results.length > 0) {
481
+ console.log(`[Context] 🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
482
+ }
483
+ return results;
484
+ }
485
+ else {
486
+ // Regular semantic search
487
+ // 1. Generate query vector
488
+ const queryEmbedding = await this.embedding.embed(query);
489
+ // 2. Search in vector database
490
+ const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr });
491
+ // 3. Convert to semantic search result format
492
+ const results = searchResults.map((result) => ({
493
+ content: result.document.content,
494
+ relativePath: result.document.relativePath,
495
+ startLine: result.document.startLine,
496
+ endLine: result.document.endLine,
497
+ language: String(result.document.metadata.language || "unknown"),
498
+ score: result.score,
499
+ }));
500
+ console.log(`[Context] ✅ Found ${results.length} relevant results`);
501
+ return results;
502
+ }
503
+ }
504
+ /**
505
+ * Check if index exists
506
+ * @returns Whether index exists
507
+ */
508
+ async hasIndex() {
509
+ const collectionName = this.getCollectionName();
510
+ return await this.vectorDatabase.hasCollection(collectionName);
511
+ }
512
+ /**
513
+ * Clear index
514
+ * @param codebasePath Codebase path to clear index for
515
+ * @param progressCallback Optional progress callback function
516
+ */
517
+ async clearIndex(codebasePath, progressCallback) {
518
+ console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`);
519
+ progressCallback?.({
520
+ phase: "Checking existing index...",
521
+ current: 0,
522
+ total: 100,
523
+ percentage: 0,
524
+ });
525
+ const collectionName = this.getCollectionName();
526
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
527
+ progressCallback?.({
528
+ phase: "Removing index data...",
529
+ current: 50,
530
+ total: 100,
531
+ percentage: 50,
532
+ });
533
+ if (collectionExists) {
534
+ await this.vectorDatabase.dropCollection(collectionName);
535
+ }
536
+ // Delete snapshot file
537
+ await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
538
+ progressCallback?.({
539
+ phase: "Index cleared",
540
+ current: 100,
541
+ total: 100,
542
+ percentage: 100,
543
+ });
544
+ console.log("[Context] ✅ Index data cleaned");
545
+ }
546
+ /**
547
+ * Update ignore patterns (merges with default patterns and existing patterns)
548
+ * @param ignorePatterns Array of ignore patterns to add to defaults
549
+ */
550
+ updateIgnorePatterns(ignorePatterns) {
551
+ // Merge with default patterns and any existing custom patterns, avoiding duplicates
552
+ const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
553
+ const uniquePatterns = [];
554
+ const patternSet = new Set(mergedPatterns);
555
+ patternSet.forEach((pattern) => uniquePatterns.push(pattern));
556
+ this.ignorePatterns = uniquePatterns;
557
+ console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
558
+ }
559
+ /**
560
+ * Add custom ignore patterns (from MCP or other sources) without replacing existing ones
561
+ * @param customPatterns Array of custom ignore patterns to add
562
+ */
563
+ addCustomIgnorePatterns(customPatterns) {
564
+ if (customPatterns.length === 0)
565
+ return;
566
+ // Merge current patterns with new custom patterns, avoiding duplicates
567
+ const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
568
+ const uniquePatterns = [];
569
+ const patternSet = new Set(mergedPatterns);
570
+ patternSet.forEach((pattern) => uniquePatterns.push(pattern));
571
+ this.ignorePatterns = uniquePatterns;
572
+ console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
573
+ }
574
+ /**
575
+ * Reset ignore patterns to defaults only
576
+ */
577
+ resetIgnorePatternsToDefaults() {
578
+ this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS];
579
+ console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
580
+ }
581
+ /**
582
+ * Update embedding instance
583
+ * @param embedding New embedding instance
584
+ */
585
+ updateEmbedding(embedding) {
586
+ this.embedding = embedding;
587
+ console.log(`[Context] 🔄 Updated embedding provider: ${embedding.getProvider()}`);
588
+ }
589
+ /**
590
+ * Update vector database instance
591
+ * @param vectorDatabase New vector database instance
592
+ */
593
+ updateVectorDatabase(vectorDatabase) {
594
+ this.vectorDatabase = vectorDatabase;
595
+ console.log(`[Context] 🔄 Updated vector database`);
596
+ }
597
+ /**
598
+ * Update splitter instance
599
+ * @param splitter New splitter instance
600
+ */
601
+ updateSplitter(splitter) {
602
+ this.codeSplitter = splitter;
603
+ console.log(`[Context] 🔄 Updated splitter instance`);
604
+ }
605
+ /**
606
+ * Prepare vector collection
607
+ */
608
+ async prepareCollection(codebasePath, forceReindex = false) {
609
+ const isHybrid = this.getIsHybrid();
610
+ const collectionType = isHybrid === true ? "hybrid vector" : "vector";
611
+ console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? " (FORCE REINDEX)" : ""}`);
612
+ const collectionName = this.getCollectionName();
613
+ // Check if collection already exists
614
+ const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
615
+ if (collectionExists && !forceReindex) {
616
+ console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
617
+ return;
618
+ }
619
+ if (collectionExists && forceReindex) {
620
+ console.log(`[Context] 🗑️ Dropping existing collection ${collectionName} for force reindex...`);
621
+ await this.vectorDatabase.dropCollection(collectionName);
622
+ console.log(`[Context] ✅ Collection ${collectionName} dropped successfully`);
623
+ }
624
+ console.log(`[Context] 🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
625
+ const dimension = await this.embedding.detectDimension();
626
+ console.log(`[Context] 📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
627
+ const dirName = path.basename(codebasePath);
628
+ if (isHybrid === true) {
629
+ await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
630
+ }
631
+ else {
632
+ await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
633
+ }
634
+ console.log(`[Context] ✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
635
+ }
636
+ /**
637
+ * Recursively get all code files in the codebase
638
+ */
639
+ async getCodeFiles(codebasePath) {
640
+ const files = [];
641
+ const traverseDirectory = async (currentPath) => {
642
+ const entries = await fs.promises.readdir(currentPath, {
643
+ withFileTypes: true,
644
+ });
645
+ for (const entry of entries) {
646
+ const fullPath = path.join(currentPath, entry.name);
647
+ // Check if path matches ignore patterns
648
+ if (this.matchesIgnorePattern(fullPath, codebasePath)) {
649
+ continue;
650
+ }
651
+ if (entry.isDirectory()) {
652
+ await traverseDirectory(fullPath);
653
+ }
654
+ else if (entry.isFile()) {
655
+ const ext = path.extname(entry.name);
656
+ if (this.supportedExtensions.includes(ext)) {
657
+ files.push(fullPath);
658
+ }
659
+ }
660
+ }
661
+ };
662
+ await traverseDirectory(codebasePath);
663
+ return files;
664
+ }
665
+ /**
666
+ * Process a list of files with streaming chunk processing
667
+ * @param filePaths Array of file paths to process
668
+ * @param codebasePath Base path for the codebase
669
+ * @param onFileProcessed Callback called when each file is processed
670
+ * @returns Object with processed file count and total chunk count
671
+ */
672
+ async processFileList(filePaths, codebasePath, onFileProcessed) {
673
+ const isHybrid = this.getIsHybrid();
674
+ const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get("EMBEDDING_BATCH_SIZE") || "100", 10));
675
+ const CHUNK_LIMIT = 450000;
676
+ console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
677
+ let chunkBuffer = [];
678
+ let processedFiles = 0;
679
+ let totalChunks = 0;
680
+ let limitReached = false;
681
+ for (let i = 0; i < filePaths.length; i++) {
682
+ const filePath = filePaths[i];
683
+ const relativeFilePath = path.relative(codebasePath, filePath);
684
+ try {
685
+ const content = await fs.promises.readFile(filePath, "utf-8");
686
+ const language = this.getLanguageFromExtension(path.extname(filePath));
687
+ const chunks = await this.codeSplitter.split(content, language, filePath);
688
+ // Log files with many chunks or large content
689
+ if (chunks.length > 50) {
690
+ console.warn(`[Context] ⚠️ File ${relativeFilePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`);
691
+ }
692
+ else if (content.length > 100000) {
693
+ console.log(`📄 Large file ${relativeFilePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`);
694
+ }
695
+ // Add chunks to buffer
696
+ for (const chunk of chunks) {
697
+ chunkBuffer.push({ chunk, codebasePath });
698
+ totalChunks++;
699
+ // Process batch when buffer reaches EMBEDDING_BATCH_SIZE
700
+ if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) {
701
+ try {
702
+ await this.processChunkBuffer(chunkBuffer);
703
+ }
704
+ catch (error) {
705
+ const searchType = isHybrid === true ? "hybrid" : "regular";
706
+ console.error(`[Context] ❌ Failed to process chunk batch for ${searchType}:`, error);
707
+ if (error instanceof Error) {
708
+ console.error("[Context] Stack trace:", error.stack);
709
+ }
710
+ }
711
+ finally {
712
+ chunkBuffer = []; // Always clear buffer, even on failure
713
+ }
714
+ }
715
+ // Check if chunk limit is reached
716
+ if (totalChunks >= CHUNK_LIMIT) {
717
+ console.warn(`[Context] ⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`);
718
+ limitReached = true;
719
+ break; // Exit the inner loop (over chunks)
720
+ }
721
+ }
722
+ processedFiles++;
723
+ onFileProcessed?.(relativeFilePath, i + 1, filePaths.length);
724
+ if (limitReached) {
725
+ break; // Exit the outer loop (over files)
726
+ }
727
+ }
728
+ catch (error) {
729
+ console.warn(`[Context] ⚠️ Skipping file ${relativeFilePath}: ${error}`);
730
+ }
731
+ }
732
+ // Process any remaining chunks in the buffer
733
+ if (chunkBuffer.length > 0) {
734
+ const searchType = isHybrid === true ? "hybrid" : "regular";
735
+ console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
736
+ try {
737
+ await this.processChunkBuffer(chunkBuffer);
738
+ }
739
+ catch (error) {
740
+ console.error(`[Context] ❌ Failed to process final chunk batch for ${searchType}:`, error);
741
+ if (error instanceof Error) {
742
+ console.error("[Context] Stack trace:", error.stack);
743
+ }
744
+ }
745
+ }
746
+ return {
747
+ processedFiles,
748
+ totalChunks,
749
+ status: limitReached ? "limit_reached" : "completed",
750
+ };
751
+ }
752
+ /**
753
+ * Process accumulated chunk buffer
754
+ */
755
+ async processChunkBuffer(chunkBuffer) {
756
+ if (chunkBuffer.length === 0)
757
+ return;
758
+ // Extract chunks and ensure they all have the same codebasePath
759
+ const chunks = chunkBuffer.map((item) => item.chunk);
760
+ const codebasePath = chunkBuffer[0].codebasePath;
761
+ // Estimate tokens (rough estimation: 1 token ≈ 4 characters)
762
+ const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
763
+ const isHybrid = this.getIsHybrid();
764
+ const searchType = isHybrid === true ? "hybrid" : "regular";
765
+ console.log(`[Context] 🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
766
+ await this.processChunkBatch(chunks, codebasePath);
767
+ }
768
+ /**
769
+ * Process a batch of chunks
770
+ */
771
+ async processChunkBatch(chunks, codebasePath) {
772
+ const isHybrid = this.getIsHybrid();
773
+ // Generate embedding vectors
774
+ const chunkContents = chunks.map((chunk) => chunk.content);
775
+ const embeddings = await this.embedding.embedBatch(chunkContents);
776
+ if (isHybrid === true) {
777
+ // Create hybrid vector documents
778
+ const documents = chunks.map((chunk, index) => {
779
+ if (!chunk.metadata.filePath) {
780
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
781
+ }
782
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
783
+ const fileExtension = path.extname(chunk.metadata.filePath);
784
+ const { filePath: _filePath, startLine: _startLine, endLine: _endLine, ...restMetadata } = chunk.metadata;
785
+ return {
786
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
787
+ content: chunk.content, // Full text content for BM25 and storage
788
+ vector: embeddings[index].vector, // Dense vector
789
+ relativePath,
790
+ startLine: chunk.metadata.startLine || 0,
791
+ endLine: chunk.metadata.endLine || 0,
792
+ fileExtension,
793
+ metadata: {
794
+ ...restMetadata,
795
+ contextName: this.name,
796
+ codebasePath,
797
+ language: chunk.metadata.language || "unknown",
798
+ chunkIndex: index,
799
+ },
800
+ };
801
+ });
802
+ // Store to vector database
803
+ await this.vectorDatabase.insertHybrid(this.getCollectionName(), documents);
804
+ }
805
+ else {
806
+ // Create regular vector documents
807
+ const documents = chunks.map((chunk, index) => {
808
+ if (!chunk.metadata.filePath) {
809
+ throw new Error(`Missing filePath in chunk metadata at index ${index}`);
810
+ }
811
+ const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
812
+ const fileExtension = path.extname(chunk.metadata.filePath);
813
+ const { filePath: _filePath, startLine: _startLine, endLine: _endLine, ...restMetadata } = chunk.metadata;
814
+ return {
815
+ id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
816
+ vector: embeddings[index].vector,
817
+ content: chunk.content,
818
+ relativePath,
819
+ startLine: chunk.metadata.startLine || 0,
820
+ endLine: chunk.metadata.endLine || 0,
821
+ fileExtension,
822
+ metadata: {
823
+ ...restMetadata,
824
+ contextName: this.name,
825
+ codebasePath,
826
+ language: chunk.metadata.language || "unknown",
827
+ chunkIndex: index,
828
+ },
829
+ };
830
+ });
831
+ // Store to vector database
832
+ await this.vectorDatabase.insert(this.getCollectionName(), documents);
833
+ }
834
+ }
835
+ /**
836
+ * Get programming language based on file extension
837
+ */
838
+ getLanguageFromExtension(ext) {
839
+ const languageMap = {
840
+ ".ts": "typescript",
841
+ ".tsx": "typescript",
842
+ ".js": "javascript",
843
+ ".jsx": "javascript",
844
+ ".py": "python",
845
+ ".java": "java",
846
+ ".cpp": "cpp",
847
+ ".c": "c",
848
+ ".h": "c",
849
+ ".hpp": "cpp",
850
+ ".cs": "csharp",
851
+ ".go": "go",
852
+ ".rs": "rust",
853
+ ".php": "php",
854
+ ".rb": "ruby",
855
+ ".swift": "swift",
856
+ ".kt": "kotlin",
857
+ ".scala": "scala",
858
+ ".m": "objective-c",
859
+ ".mm": "objective-c",
860
+ ".ipynb": "jupyter",
861
+ };
862
+ return languageMap[ext] || "text";
863
+ }
864
+ /**
865
+ * Generate unique ID based on chunk content and location
866
+ * @param relativePath Relative path to the file
867
+ * @param startLine Start line number
868
+ * @param endLine End line number
869
+ * @param content Chunk content
870
+ * @returns Hash-based unique ID
871
+ */
872
+ generateId(relativePath, startLine, endLine, content) {
873
+ const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`;
874
+ const hash = crypto
875
+ .createHash("sha256")
876
+ .update(combinedString, "utf-8")
877
+ .digest("hex");
878
+ return `chunk_${hash.substring(0, 16)}`;
879
+ }
880
+ /**
881
+ * Read ignore patterns from file (e.g., .gitignore)
882
+ * @param filePath Path to the ignore file
883
+ * @returns Array of ignore patterns
884
+ */
885
+ static async getIgnorePatternsFromFile(filePath) {
886
+ try {
887
+ const content = await fs.promises.readFile(filePath, "utf-8");
888
+ return content
889
+ .split("\n")
890
+ .map((line) => line.trim())
891
+ .filter((line) => line && !line.startsWith("#")); // Filter out empty lines and comments
892
+ }
893
+ catch (error) {
894
+ console.warn(`[Context] ⚠️ Could not read ignore file ${filePath}: ${error}`);
895
+ return [];
896
+ }
897
+ }
898
+ /**
899
+ * Load ignore patterns from various ignore files in the codebase
900
+ * This method preserves any existing custom patterns that were added before
901
+ * @param codebasePath Path to the codebase
902
+ */
903
+ async loadIgnorePatterns(codebasePath) {
904
+ try {
905
+ const fileBasedPatterns = [];
906
+ // Load all .xxxignore files in codebase directory
907
+ const ignoreFiles = await this.findIgnoreFiles(codebasePath);
908
+ for (const ignoreFile of ignoreFiles) {
909
+ const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
910
+ fileBasedPatterns.push(...patterns);
911
+ }
912
+ // Load global ~/.context/.contextignore
913
+ const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
914
+ fileBasedPatterns.push(...globalIgnorePatterns);
915
+ // Merge file-based patterns with existing patterns (which may include custom MCP patterns)
916
+ if (fileBasedPatterns.length > 0) {
917
+ this.addCustomIgnorePatterns(fileBasedPatterns);
918
+ console.log(`[Context] 🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
919
+ }
920
+ else {
921
+ console.log("📄 No ignore files found, keeping existing patterns");
922
+ }
923
+ }
924
+ catch (error) {
925
+ console.warn(`[Context] ⚠️ Failed to load ignore patterns: ${error}`);
926
+ // Continue with existing patterns on error - don't reset them
927
+ }
928
+ }
929
+ /**
930
+ * Find all .xxxignore files in the codebase directory
931
+ * @param codebasePath Path to the codebase
932
+ * @returns Array of ignore file paths
933
+ */
934
+ async findIgnoreFiles(codebasePath) {
935
+ try {
936
+ const entries = await fs.promises.readdir(codebasePath, {
937
+ withFileTypes: true,
938
+ });
939
+ const ignoreFiles = [];
940
+ for (const entry of entries) {
941
+ if (entry.isFile() &&
942
+ entry.name.startsWith(".") &&
943
+ entry.name.endsWith("ignore")) {
944
+ ignoreFiles.push(path.join(codebasePath, entry.name));
945
+ }
946
+ }
947
+ if (ignoreFiles.length > 0) {
948
+ console.log(`📄 Found ignore files: ${ignoreFiles.map((f) => path.basename(f)).join(", ")}`);
949
+ }
950
+ return ignoreFiles;
951
+ }
952
+ catch (error) {
953
+ console.warn(`[Context] ⚠️ Failed to scan for ignore files: ${error}`);
954
+ return [];
955
+ }
956
+ }
957
+ /**
958
+ * Load global ignore file from ~/.context/.contextignore
959
+ * @returns Array of ignore patterns
960
+ */
961
+ async loadGlobalIgnoreFile() {
962
+ try {
963
+ const homeDir = require("os").homedir();
964
+ const globalIgnorePath = path.join(homeDir, ".context", ".contextignore");
965
+ return await this.loadIgnoreFile(globalIgnorePath, "global .contextignore");
966
+ }
967
+ catch (_error) {
968
+ // Global ignore file is optional, don't log warnings
969
+ return [];
970
+ }
971
+ }
972
+ /**
973
+ * Load ignore patterns from a specific ignore file
974
+ * @param filePath Path to the ignore file
975
+ * @param fileName Display name for logging
976
+ * @returns Array of ignore patterns
977
+ */
978
+ async loadIgnoreFile(filePath, fileName) {
979
+ try {
980
+ await fs.promises.access(filePath);
981
+ console.log(`📄 Found ${fileName} file at: ${filePath}`);
982
+ const ignorePatterns = await Context.getIgnorePatternsFromFile(filePath);
983
+ if (ignorePatterns.length > 0) {
984
+ console.log(`[Context] 🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`);
985
+ return ignorePatterns;
986
+ }
987
+ else {
988
+ console.log(`📄 ${fileName} file found but no valid patterns detected`);
989
+ return [];
990
+ }
991
+ }
992
+ catch (_error) {
993
+ if (fileName.includes("global")) {
994
+ console.log(`📄 No ${fileName} file found`);
995
+ }
996
+ return [];
997
+ }
998
+ }
999
+ /**
1000
+ * Check if a path matches any ignore pattern
1001
+ * @param filePath Path to check
1002
+ * @param basePath Base path for relative pattern matching
1003
+ * @returns True if path should be ignored
1004
+ */
1005
+ matchesIgnorePattern(filePath, basePath) {
1006
+ if (this.ignorePatterns.length === 0) {
1007
+ return false;
1008
+ }
1009
+ const relativePath = path.relative(basePath, filePath);
1010
+ const normalizedPath = relativePath.replace(/\\/g, "/"); // Normalize path separators
1011
+ for (const pattern of this.ignorePatterns) {
1012
+ if (this.isPatternMatch(normalizedPath, pattern)) {
1013
+ return true;
1014
+ }
1015
+ }
1016
+ return false;
1017
+ }
1018
+ /**
1019
+ * Simple glob pattern matching
1020
+ * @param filePath File path to test
1021
+ * @param pattern Glob pattern
1022
+ * @returns True if pattern matches
1023
+ */
1024
+ isPatternMatch(filePath, pattern) {
1025
+ // Handle directory patterns (ending with /)
1026
+ if (pattern.endsWith("/")) {
1027
+ const dirPattern = pattern.slice(0, -1);
1028
+ const pathParts = filePath.split("/");
1029
+ return pathParts.some((part) => this.simpleGlobMatch(part, dirPattern));
1030
+ }
1031
+ // Handle file patterns
1032
+ if (pattern.includes("/")) {
1033
+ // Pattern with path separator - match exact path
1034
+ return this.simpleGlobMatch(filePath, pattern);
1035
+ }
1036
+ else {
1037
+ // Pattern without path separator - match filename in any directory
1038
+ const fileName = path.basename(filePath);
1039
+ return this.simpleGlobMatch(fileName, pattern);
1040
+ }
1041
+ }
1042
+ /**
1043
+ * Simple glob matching supporting * wildcard
1044
+ * @param text Text to test
1045
+ * @param pattern Pattern with * wildcards
1046
+ * @returns True if pattern matches
1047
+ */
1048
+ simpleGlobMatch(text, pattern) {
1049
+ // Convert glob pattern to regex
1050
+ const regexPattern = pattern
1051
+ .replace(/[.+^${}()|[\]\\]/g, "\\$&") // Escape regex special chars except *
1052
+ .replace(/\*/g, ".*"); // Convert * to .*
1053
+ const regex = new RegExp(`^${regexPattern}$`);
1054
+ return regex.test(text);
1055
+ }
1056
+ /**
1057
+ * Get custom extensions from environment variables
1058
+ * Supports CUSTOM_EXTENSIONS as comma-separated list
1059
+ * @returns Array of custom extensions
1060
+ */
1061
+ getCustomExtensionsFromEnv() {
1062
+ const envExtensions = env_manager_1.envManager.get("CUSTOM_EXTENSIONS");
1063
+ if (!envExtensions) {
1064
+ return [];
1065
+ }
1066
+ try {
1067
+ const extensions = envExtensions
1068
+ .split(",")
1069
+ .map((ext) => ext.trim())
1070
+ .filter((ext) => ext.length > 0)
1071
+ .map((ext) => (ext.startsWith(".") ? ext : `.${ext}`)); // Ensure extensions start with dot
1072
+ return extensions;
1073
+ }
1074
+ catch (error) {
1075
+ console.warn(`[Context] ⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`);
1076
+ return [];
1077
+ }
1078
+ }
1079
+ /**
1080
+ * Get custom ignore patterns from environment variables
1081
+ * Supports CUSTOM_IGNORE_PATTERNS as comma-separated list
1082
+ * @returns Array of custom ignore patterns
1083
+ */
1084
+ getCustomIgnorePatternsFromEnv() {
1085
+ const envIgnorePatterns = env_manager_1.envManager.get("CUSTOM_IGNORE_PATTERNS");
1086
+ if (!envIgnorePatterns) {
1087
+ return [];
1088
+ }
1089
+ try {
1090
+ const patterns = envIgnorePatterns
1091
+ .split(",")
1092
+ .map((pattern) => pattern.trim())
1093
+ .filter((pattern) => pattern.length > 0);
1094
+ return patterns;
1095
+ }
1096
+ catch (error) {
1097
+ console.warn(`[Context] ⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`);
1098
+ return [];
1099
+ }
1100
+ }
1101
+ /**
1102
+ * Add custom extensions (from MCP or other sources) without replacing existing ones
1103
+ * @param customExtensions Array of custom extensions to add
1104
+ */
1105
+ addCustomExtensions(customExtensions) {
1106
+ if (customExtensions.length === 0)
1107
+ return;
1108
+ // Ensure extensions start with dot
1109
+ const normalizedExtensions = customExtensions.map((ext) => ext.startsWith(".") ? ext : `.${ext}`);
1110
+ // Merge current extensions with new custom extensions, avoiding duplicates
1111
+ const mergedExtensions = [
1112
+ ...this.supportedExtensions,
1113
+ ...normalizedExtensions,
1114
+ ];
1115
+ const uniqueExtensions = [...new Set(mergedExtensions)];
1116
+ this.supportedExtensions = uniqueExtensions;
1117
+ console.log(`[Context] 📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`);
1118
+ }
1119
+ /**
1120
+ * Get current splitter information
1121
+ */
1122
+ getSplitterInfo() {
1123
+ const splitterName = this.codeSplitter.constructor.name;
1124
+ if (splitterName === "AstCodeSplitter") {
1125
+ const { AstCodeSplitter } = require("./splitter/ast-splitter");
1126
+ return {
1127
+ type: "ast",
1128
+ hasBuiltinFallback: true,
1129
+ supportedLanguages: AstCodeSplitter.getSupportedLanguages(),
1130
+ };
1131
+ }
1132
+ else {
1133
+ return {
1134
+ type: "langchain",
1135
+ hasBuiltinFallback: false,
1136
+ };
1137
+ }
1138
+ }
1139
+ /**
1140
+ * Check if current splitter supports a specific language
1141
+ * @param language Programming language
1142
+ */
1143
+ isLanguageSupported(language) {
1144
+ const splitterName = this.codeSplitter.constructor.name;
1145
+ if (splitterName === "AstCodeSplitter") {
1146
+ const { AstCodeSplitter } = require("./splitter/ast-splitter");
1147
+ return AstCodeSplitter.isLanguageSupported(language);
1148
+ }
1149
+ // LangChain splitter supports most languages
1150
+ return true;
1151
+ }
1152
+ /**
1153
+ * Get which strategy would be used for a specific language
1154
+ * @param language Programming language
1155
+ */
1156
+ getSplitterStrategyForLanguage(language) {
1157
+ const splitterName = this.codeSplitter.constructor.name;
1158
+ if (splitterName === "AstCodeSplitter") {
1159
+ const { AstCodeSplitter } = require("./splitter/ast-splitter");
1160
+ const isSupported = AstCodeSplitter.isLanguageSupported(language);
1161
+ return {
1162
+ strategy: isSupported ? "ast" : "langchain",
1163
+ reason: isSupported
1164
+ ? "Language supported by AST parser"
1165
+ : "Language not supported by AST, will fallback to LangChain",
1166
+ };
1167
+ }
1168
+ else {
1169
+ return {
1170
+ strategy: "langchain",
1171
+ reason: "Using LangChain splitter directly",
1172
+ };
1173
+ }
1174
+ }
1175
+ }
1176
+ exports.Context = Context;
1177
+ //# sourceMappingURL=context.js.map