@zilliz/claude-context-core 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +275 -0
- package/dist/context.d.ts +234 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +879 -0
- package/dist/context.js.map +1 -0
- package/dist/embedding/base-embedding.d.ts +45 -0
- package/dist/embedding/base-embedding.d.ts.map +1 -0
- package/dist/embedding/base-embedding.js +36 -0
- package/dist/embedding/base-embedding.js.map +1 -0
- package/dist/embedding/gemini-embedding.d.ts +51 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -0
- package/dist/embedding/gemini-embedding.js +143 -0
- package/dist/embedding/gemini-embedding.js.map +1 -0
- package/dist/embedding/index.d.ts +6 -0
- package/dist/embedding/index.d.ts.map +1 -0
- package/dist/embedding/index.js +24 -0
- package/dist/embedding/index.js.map +1 -0
- package/dist/embedding/ollama-embedding.d.ts +64 -0
- package/dist/embedding/ollama-embedding.d.ts.map +1 -0
- package/dist/embedding/ollama-embedding.js +205 -0
- package/dist/embedding/ollama-embedding.js.map +1 -0
- package/dist/embedding/openai-embedding.d.ts +36 -0
- package/dist/embedding/openai-embedding.d.ts.map +1 -0
- package/dist/embedding/openai-embedding.js +103 -0
- package/dist/embedding/openai-embedding.js.map +1 -0
- package/dist/embedding/voyageai-embedding.d.ts +43 -0
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
- package/dist/embedding/voyageai-embedding.js +223 -0
- package/dist/embedding/voyageai-embedding.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/splitter/ast-splitter.d.ts +22 -0
- package/dist/splitter/ast-splitter.d.ts.map +1 -0
- package/dist/splitter/ast-splitter.js +227 -0
- package/dist/splitter/ast-splitter.js.map +1 -0
- package/dist/splitter/index.d.ts +41 -0
- package/dist/splitter/index.d.ts.map +1 -0
- package/dist/splitter/index.js +27 -0
- package/dist/splitter/index.js.map +1 -0
- package/dist/splitter/langchain-splitter.d.ts +13 -0
- package/dist/splitter/langchain-splitter.d.ts.map +1 -0
- package/dist/splitter/langchain-splitter.js +118 -0
- package/dist/splitter/langchain-splitter.js.map +1 -0
- package/dist/sync/merkle.d.ts +26 -0
- package/dist/sync/merkle.d.ts.map +1 -0
- package/dist/sync/merkle.js +112 -0
- package/dist/sync/merkle.js.map +1 -0
- package/dist/sync/synchronizer.d.ts +30 -0
- package/dist/sync/synchronizer.d.ts.map +1 -0
- package/dist/sync/synchronizer.js +339 -0
- package/dist/sync/synchronizer.js.map +1 -0
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/env-manager.d.ts +19 -0
- package/dist/utils/env-manager.d.ts.map +1 -0
- package/dist/utils/env-manager.js +125 -0
- package/dist/utils/env-manager.js.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +7 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/vectordb/index.d.ts +5 -0
- package/dist/vectordb/index.d.ts.map +1 -0
- package/dist/vectordb/index.js +14 -0
- package/dist/vectordb/index.js.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts +51 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.js +406 -0
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
- package/dist/vectordb/milvus-vectordb.d.ts +34 -0
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
- package/dist/vectordb/milvus-vectordb.js +248 -0
- package/dist/vectordb/milvus-vectordb.js.map +1 -0
- package/dist/vectordb/types.d.ts +75 -0
- package/dist/vectordb/types.d.ts.map +1 -0
- package/dist/vectordb/types.js +9 -0
- package/dist/vectordb/types.js.map +1 -0
- package/dist/vectordb/zilliz-utils.d.ts +135 -0
- package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
- package/dist/vectordb/zilliz-utils.js +192 -0
- package/dist/vectordb/zilliz-utils.js.map +1 -0
- package/package.json +56 -0
package/dist/context.js
ADDED
|
@@ -0,0 +1,879 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.CodeContext = void 0;
|
|
37
|
+
const splitter_1 = require("./splitter");
|
|
38
|
+
const embedding_1 = require("./embedding");
|
|
39
|
+
const env_manager_1 = require("./utils/env-manager");
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
const path = __importStar(require("path"));
|
|
42
|
+
const crypto = __importStar(require("crypto"));
|
|
43
|
+
const synchronizer_1 = require("./sync/synchronizer");
|
|
44
|
+
const DEFAULT_SUPPORTED_EXTENSIONS = [
|
|
45
|
+
// Programming languages
|
|
46
|
+
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
|
|
47
|
+
'.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
|
|
48
|
+
// Text and markup files
|
|
49
|
+
'.md', '.markdown', '.ipynb',
|
|
50
|
+
// '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
|
|
51
|
+
// '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
|
|
52
|
+
];
|
|
53
|
+
const DEFAULT_IGNORE_PATTERNS = [
|
|
54
|
+
// Common build output and dependency directories
|
|
55
|
+
'node_modules/**',
|
|
56
|
+
'dist/**',
|
|
57
|
+
'build/**',
|
|
58
|
+
'out/**',
|
|
59
|
+
'target/**',
|
|
60
|
+
'coverage/**',
|
|
61
|
+
'.nyc_output/**',
|
|
62
|
+
// IDE and editor files
|
|
63
|
+
'.vscode/**',
|
|
64
|
+
'.idea/**',
|
|
65
|
+
'*.swp',
|
|
66
|
+
'*.swo',
|
|
67
|
+
// Version control
|
|
68
|
+
'.git/**',
|
|
69
|
+
'.svn/**',
|
|
70
|
+
'.hg/**',
|
|
71
|
+
// Cache directories
|
|
72
|
+
'.cache/**',
|
|
73
|
+
'__pycache__/**',
|
|
74
|
+
'.pytest_cache/**',
|
|
75
|
+
// Logs and temporary files
|
|
76
|
+
'logs/**',
|
|
77
|
+
'tmp/**',
|
|
78
|
+
'temp/**',
|
|
79
|
+
'*.log',
|
|
80
|
+
// Environment and config files
|
|
81
|
+
'.env',
|
|
82
|
+
'.env.*',
|
|
83
|
+
'*.local',
|
|
84
|
+
// Minified and bundled files
|
|
85
|
+
'*.min.js',
|
|
86
|
+
'*.min.css',
|
|
87
|
+
'*.min.map',
|
|
88
|
+
'*.bundle.js',
|
|
89
|
+
'*.bundle.css',
|
|
90
|
+
'*.chunk.js',
|
|
91
|
+
'*.vendor.js',
|
|
92
|
+
'*.polyfills.js',
|
|
93
|
+
'*.runtime.js',
|
|
94
|
+
'*.map', // source map files
|
|
95
|
+
'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
|
|
96
|
+
'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
|
|
97
|
+
'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
|
|
98
|
+
];
|
|
99
|
+
class CodeContext {
|
|
100
|
+
constructor(config = {}) {
|
|
101
|
+
this.synchronizers = new Map();
|
|
102
|
+
// Initialize services
|
|
103
|
+
this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
|
|
104
|
+
apiKey: env_manager_1.envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
|
|
105
|
+
model: 'text-embedding-3-small',
|
|
106
|
+
...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') })
|
|
107
|
+
});
|
|
108
|
+
if (!config.vectorDatabase) {
|
|
109
|
+
throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.');
|
|
110
|
+
}
|
|
111
|
+
this.vectorDatabase = config.vectorDatabase;
|
|
112
|
+
this.codeSplitter = config.codeSplitter || new splitter_1.AstCodeSplitter(2500, 300);
|
|
113
|
+
// Load custom extensions from environment variables
|
|
114
|
+
const envCustomExtensions = this.getCustomExtensionsFromEnv();
|
|
115
|
+
// Combine default extensions with config extensions and env extensions
|
|
116
|
+
const allSupportedExtensions = [
|
|
117
|
+
...DEFAULT_SUPPORTED_EXTENSIONS,
|
|
118
|
+
...(config.supportedExtensions || []),
|
|
119
|
+
...(config.customExtensions || []),
|
|
120
|
+
...envCustomExtensions
|
|
121
|
+
];
|
|
122
|
+
// Remove duplicates
|
|
123
|
+
this.supportedExtensions = [...new Set(allSupportedExtensions)];
|
|
124
|
+
// Load custom ignore patterns from environment variables
|
|
125
|
+
const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
|
|
126
|
+
// Start with default ignore patterns
|
|
127
|
+
const allIgnorePatterns = [
|
|
128
|
+
...DEFAULT_IGNORE_PATTERNS,
|
|
129
|
+
...(config.ignorePatterns || []),
|
|
130
|
+
...(config.customIgnorePatterns || []),
|
|
131
|
+
...envCustomIgnorePatterns
|
|
132
|
+
];
|
|
133
|
+
// Remove duplicates
|
|
134
|
+
this.ignorePatterns = [...new Set(allIgnorePatterns)];
|
|
135
|
+
console.log(`🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
|
|
136
|
+
if (envCustomExtensions.length > 0) {
|
|
137
|
+
console.log(`📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
|
|
138
|
+
}
|
|
139
|
+
if (envCustomIgnorePatterns.length > 0) {
|
|
140
|
+
console.log(`🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Generate collection name based on codebase path
|
|
145
|
+
*/
|
|
146
|
+
getCollectionName(codebasePath) {
|
|
147
|
+
const normalizedPath = path.resolve(codebasePath);
|
|
148
|
+
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
|
149
|
+
return `code_chunks_${hash.substring(0, 8)}`;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Index entire codebase
|
|
153
|
+
* @param codebasePath Codebase path
|
|
154
|
+
* @param progressCallback Optional progress callback function
|
|
155
|
+
* @returns Indexing statistics
|
|
156
|
+
*/
|
|
157
|
+
async indexCodebase(codebasePath, progressCallback) {
|
|
158
|
+
console.log(`🚀 Starting to index codebase: ${codebasePath}`);
|
|
159
|
+
// 1. Load ignore patterns from various ignore files
|
|
160
|
+
await this.loadGitignorePatterns(codebasePath);
|
|
161
|
+
// 2. Check and prepare vector collection
|
|
162
|
+
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
|
|
163
|
+
console.log(`Debug2: Preparing vector collection for codebase`);
|
|
164
|
+
await this.prepareCollection(codebasePath);
|
|
165
|
+
// 3. Recursively traverse codebase to get all supported files
|
|
166
|
+
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
|
|
167
|
+
const codeFiles = await this.getCodeFiles(codebasePath);
|
|
168
|
+
console.log(`📁 Found ${codeFiles.length} code files`);
|
|
169
|
+
if (codeFiles.length === 0) {
|
|
170
|
+
progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 });
|
|
171
|
+
return { indexedFiles: 0, totalChunks: 0, status: 'completed' };
|
|
172
|
+
}
|
|
173
|
+
// 3. Process each file with streaming chunk processing
|
|
174
|
+
// Reserve 10% for preparation, 90% for actual indexing
|
|
175
|
+
const indexingStartPercentage = 10;
|
|
176
|
+
const indexingEndPercentage = 100;
|
|
177
|
+
const indexingRange = indexingEndPercentage - indexingStartPercentage;
|
|
178
|
+
const result = await this.processFileList(codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => {
|
|
179
|
+
// Calculate progress percentage
|
|
180
|
+
const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange;
|
|
181
|
+
console.log(`📊 Processed ${fileIndex}/${totalFiles} files`);
|
|
182
|
+
progressCallback?.({
|
|
183
|
+
phase: `Processing files (${fileIndex}/${totalFiles})...`,
|
|
184
|
+
current: fileIndex,
|
|
185
|
+
total: totalFiles,
|
|
186
|
+
percentage: Math.round(progressPercentage)
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
console.log(`✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
|
|
190
|
+
progressCallback?.({
|
|
191
|
+
phase: 'Indexing complete!',
|
|
192
|
+
current: result.processedFiles,
|
|
193
|
+
total: codeFiles.length,
|
|
194
|
+
percentage: 100
|
|
195
|
+
});
|
|
196
|
+
return {
|
|
197
|
+
indexedFiles: result.processedFiles,
|
|
198
|
+
totalChunks: result.totalChunks,
|
|
199
|
+
status: result.status
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
async reindexByChange(codebasePath, progressCallback) {
|
|
203
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
204
|
+
const synchronizer = this.synchronizers.get(collectionName);
|
|
205
|
+
if (!synchronizer) {
|
|
206
|
+
// To be safe, let's initialize if it's not there.
|
|
207
|
+
const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
|
|
208
|
+
await newSynchronizer.initialize();
|
|
209
|
+
this.synchronizers.set(collectionName, newSynchronizer);
|
|
210
|
+
}
|
|
211
|
+
const currentSynchronizer = this.synchronizers.get(collectionName);
|
|
212
|
+
progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 });
|
|
213
|
+
const { added, removed, modified } = await currentSynchronizer.checkForChanges();
|
|
214
|
+
const totalChanges = added.length + removed.length + modified.length;
|
|
215
|
+
if (totalChanges === 0) {
|
|
216
|
+
progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 });
|
|
217
|
+
console.log('✅ No file changes detected.');
|
|
218
|
+
return { added: 0, removed: 0, modified: 0 };
|
|
219
|
+
}
|
|
220
|
+
console.log(`🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
|
|
221
|
+
let processedChanges = 0;
|
|
222
|
+
const updateProgress = (phase) => {
|
|
223
|
+
processedChanges++;
|
|
224
|
+
const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100);
|
|
225
|
+
progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage });
|
|
226
|
+
};
|
|
227
|
+
// Handle removed files
|
|
228
|
+
for (const file of removed) {
|
|
229
|
+
await this.deleteFileChunks(collectionName, file);
|
|
230
|
+
updateProgress(`Removed ${file}`);
|
|
231
|
+
}
|
|
232
|
+
// Handle modified files
|
|
233
|
+
for (const file of modified) {
|
|
234
|
+
await this.deleteFileChunks(collectionName, file);
|
|
235
|
+
updateProgress(`Deleted old chunks for ${file}`);
|
|
236
|
+
}
|
|
237
|
+
// Handle added and modified files
|
|
238
|
+
const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f));
|
|
239
|
+
if (filesToIndex.length > 0) {
|
|
240
|
+
await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
|
|
241
|
+
updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
console.log(`✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
|
|
245
|
+
progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 });
|
|
246
|
+
return { added: added.length, removed: removed.length, modified: modified.length };
|
|
247
|
+
}
|
|
248
|
+
async deleteFileChunks(collectionName, relativePath) {
|
|
249
|
+
const results = await this.vectorDatabase.query(collectionName, `relativePath == "${relativePath}"`, ['id']);
|
|
250
|
+
if (results.length > 0) {
|
|
251
|
+
const ids = results.map(r => r.id).filter(id => id);
|
|
252
|
+
if (ids.length > 0) {
|
|
253
|
+
await this.vectorDatabase.delete(collectionName, ids);
|
|
254
|
+
console.log(`Deleted ${ids.length} chunks for file ${relativePath}`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Semantic search
|
|
260
|
+
* @param codebasePath Codebase path to search in
|
|
261
|
+
* @param query Search query
|
|
262
|
+
* @param topK Number of results to return
|
|
263
|
+
* @param threshold Similarity threshold
|
|
264
|
+
*/
|
|
265
|
+
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5) {
|
|
266
|
+
console.log(`🔍 Executing semantic search: "${query}" in ${codebasePath}`);
|
|
267
|
+
// 1. Generate query vector
|
|
268
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
269
|
+
// 2. Search in vector database
|
|
270
|
+
const searchResults = await this.vectorDatabase.search(this.getCollectionName(codebasePath), queryEmbedding.vector, { topK, threshold });
|
|
271
|
+
// 3. Convert to semantic search result format
|
|
272
|
+
const results = searchResults.map(result => ({
|
|
273
|
+
content: result.document.content,
|
|
274
|
+
relativePath: result.document.relativePath,
|
|
275
|
+
startLine: result.document.startLine,
|
|
276
|
+
endLine: result.document.endLine,
|
|
277
|
+
language: result.document.metadata.language || 'unknown',
|
|
278
|
+
score: result.score
|
|
279
|
+
}));
|
|
280
|
+
console.log(`✅ Found ${results.length} relevant results`);
|
|
281
|
+
return results;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Check if index exists for codebase
|
|
285
|
+
* @param codebasePath Codebase path to check
|
|
286
|
+
* @returns Whether index exists
|
|
287
|
+
*/
|
|
288
|
+
async hasIndex(codebasePath) {
|
|
289
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
290
|
+
return await this.vectorDatabase.hasCollection(collectionName);
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Clear index
|
|
294
|
+
* @param codebasePath Codebase path to clear index for
|
|
295
|
+
* @param progressCallback Optional progress callback function
|
|
296
|
+
*/
|
|
297
|
+
async clearIndex(codebasePath, progressCallback) {
|
|
298
|
+
console.log(`🧹 Cleaning index data for ${codebasePath}...`);
|
|
299
|
+
progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });
|
|
300
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
301
|
+
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
|
302
|
+
progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
|
|
303
|
+
if (collectionExists) {
|
|
304
|
+
await this.vectorDatabase.dropCollection(collectionName);
|
|
305
|
+
}
|
|
306
|
+
// Delete snapshot file
|
|
307
|
+
await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
|
|
308
|
+
progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 });
|
|
309
|
+
console.log('✅ Index data cleaned');
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Update ignore patterns (merges with default patterns and existing patterns)
|
|
313
|
+
* @param ignorePatterns Array of ignore patterns to add to defaults
|
|
314
|
+
*/
|
|
315
|
+
updateIgnorePatterns(ignorePatterns) {
|
|
316
|
+
// Merge with default patterns and any existing custom patterns, avoiding duplicates
|
|
317
|
+
const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
|
|
318
|
+
const uniquePatterns = [];
|
|
319
|
+
const patternSet = new Set(mergedPatterns);
|
|
320
|
+
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
|
321
|
+
this.ignorePatterns = uniquePatterns;
|
|
322
|
+
console.log(`🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Add custom ignore patterns (from MCP or other sources) without replacing existing ones
|
|
326
|
+
* @param customPatterns Array of custom ignore patterns to add
|
|
327
|
+
*/
|
|
328
|
+
addCustomIgnorePatterns(customPatterns) {
|
|
329
|
+
if (customPatterns.length === 0)
|
|
330
|
+
return;
|
|
331
|
+
// Merge current patterns with new custom patterns, avoiding duplicates
|
|
332
|
+
const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
|
|
333
|
+
const uniquePatterns = [];
|
|
334
|
+
const patternSet = new Set(mergedPatterns);
|
|
335
|
+
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
|
336
|
+
this.ignorePatterns = uniquePatterns;
|
|
337
|
+
console.log(`🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Reset ignore patterns to defaults only
|
|
341
|
+
*/
|
|
342
|
+
resetIgnorePatternsToDefaults() {
|
|
343
|
+
this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS];
|
|
344
|
+
console.log(`🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Update embedding instance
|
|
348
|
+
* @param embedding New embedding instance
|
|
349
|
+
*/
|
|
350
|
+
updateEmbedding(embedding) {
|
|
351
|
+
this.embedding = embedding;
|
|
352
|
+
console.log(`🔄 Updated embedding provider: ${embedding.getProvider()}`);
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Update vector database instance
|
|
356
|
+
* @param vectorDatabase New vector database instance
|
|
357
|
+
*/
|
|
358
|
+
updateVectorDatabase(vectorDatabase) {
|
|
359
|
+
this.vectorDatabase = vectorDatabase;
|
|
360
|
+
console.log(`🔄 Updated vector database`);
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Update splitter instance
|
|
364
|
+
* @param splitter New splitter instance
|
|
365
|
+
*/
|
|
366
|
+
updateSplitter(splitter) {
|
|
367
|
+
this.codeSplitter = splitter;
|
|
368
|
+
console.log(`🔄 Updated splitter instance`);
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Prepare vector collection
|
|
372
|
+
*/
|
|
373
|
+
async prepareCollection(codebasePath) {
|
|
374
|
+
// Create new collection
|
|
375
|
+
console.log(`🔧 Preparing vector collection for codebase: ${codebasePath}`);
|
|
376
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
377
|
+
// For Ollama embeddings, ensure dimension is detected before creating collection
|
|
378
|
+
if (this.embedding.getProvider() === 'Ollama' && typeof this.embedding.initializeDimension === 'function') {
|
|
379
|
+
await this.embedding.initializeDimension();
|
|
380
|
+
}
|
|
381
|
+
const dimension = this.embedding.getDimension();
|
|
382
|
+
const dirName = path.basename(codebasePath);
|
|
383
|
+
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
|
384
|
+
console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Recursively get all code files in the codebase
|
|
388
|
+
*/
|
|
389
|
+
async getCodeFiles(codebasePath) {
|
|
390
|
+
const files = [];
|
|
391
|
+
const traverseDirectory = async (currentPath) => {
|
|
392
|
+
const entries = await fs.promises.readdir(currentPath, { withFileTypes: true });
|
|
393
|
+
for (const entry of entries) {
|
|
394
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
395
|
+
// Check if path matches ignore patterns
|
|
396
|
+
if (this.matchesIgnorePattern(fullPath, codebasePath)) {
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
if (entry.isDirectory()) {
|
|
400
|
+
await traverseDirectory(fullPath);
|
|
401
|
+
}
|
|
402
|
+
else if (entry.isFile()) {
|
|
403
|
+
const ext = path.extname(entry.name);
|
|
404
|
+
if (this.supportedExtensions.includes(ext)) {
|
|
405
|
+
files.push(fullPath);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
await traverseDirectory(codebasePath);
|
|
411
|
+
return files;
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Process a list of files with streaming chunk processing
|
|
415
|
+
* @param filePaths Array of file paths to process
|
|
416
|
+
* @param codebasePath Base path for the codebase
|
|
417
|
+
* @param onFileProcessed Callback called when each file is processed
|
|
418
|
+
* @returns Object with processed file count and total chunk count
|
|
419
|
+
*/
|
|
420
|
+
async processFileList(filePaths, codebasePath, onFileProcessed) {
|
|
421
|
+
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
|
422
|
+
const CHUNK_LIMIT = 450000;
|
|
423
|
+
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
|
424
|
+
let chunkBuffer = [];
|
|
425
|
+
let processedFiles = 0;
|
|
426
|
+
let totalChunks = 0;
|
|
427
|
+
let limitReached = false;
|
|
428
|
+
for (let i = 0; i < filePaths.length; i++) {
|
|
429
|
+
const filePath = filePaths[i];
|
|
430
|
+
try {
|
|
431
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
432
|
+
const language = this.getLanguageFromExtension(path.extname(filePath));
|
|
433
|
+
const chunks = await this.codeSplitter.split(content, language, filePath);
|
|
434
|
+
// Log files with many chunks or large content
|
|
435
|
+
if (chunks.length > 50) {
|
|
436
|
+
console.warn(`⚠️ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`);
|
|
437
|
+
}
|
|
438
|
+
else if (content.length > 100000) {
|
|
439
|
+
console.log(`📄 Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`);
|
|
440
|
+
}
|
|
441
|
+
// Add chunks to buffer
|
|
442
|
+
for (const chunk of chunks) {
|
|
443
|
+
chunkBuffer.push({ chunk, codebasePath });
|
|
444
|
+
totalChunks++;
|
|
445
|
+
// Process batch when buffer reaches EMBEDDING_BATCH_SIZE
|
|
446
|
+
if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) {
|
|
447
|
+
try {
|
|
448
|
+
await this.processChunkBuffer(chunkBuffer);
|
|
449
|
+
}
|
|
450
|
+
catch (error) {
|
|
451
|
+
// TODO:
|
|
452
|
+
console.error(`❌ Failed to process chunk batch: ${error}`);
|
|
453
|
+
}
|
|
454
|
+
finally {
|
|
455
|
+
chunkBuffer = []; // Always clear buffer, even on failure
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
// Check if chunk limit is reached
|
|
459
|
+
if (totalChunks >= CHUNK_LIMIT) {
|
|
460
|
+
console.warn(`⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`);
|
|
461
|
+
limitReached = true;
|
|
462
|
+
break; // Exit the inner loop (over chunks)
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
processedFiles++;
|
|
466
|
+
onFileProcessed?.(filePath, i + 1, filePaths.length);
|
|
467
|
+
if (limitReached) {
|
|
468
|
+
break; // Exit the outer loop (over files)
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
catch (error) {
|
|
472
|
+
console.warn(`⚠️ Skipping file ${filePath}: ${error}`);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
// Process any remaining chunks in the buffer
|
|
476
|
+
if (chunkBuffer.length > 0) {
|
|
477
|
+
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks`);
|
|
478
|
+
try {
|
|
479
|
+
await this.processChunkBuffer(chunkBuffer);
|
|
480
|
+
}
|
|
481
|
+
catch (error) {
|
|
482
|
+
console.error(`❌ Failed to process final chunk batch: ${error}`);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
return {
|
|
486
|
+
processedFiles,
|
|
487
|
+
totalChunks,
|
|
488
|
+
status: limitReached ? 'limit_reached' : 'completed'
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
/**
|
|
492
|
+
* Process accumulated chunk buffer
|
|
493
|
+
*/
|
|
494
|
+
async processChunkBuffer(chunkBuffer) {
|
|
495
|
+
if (chunkBuffer.length === 0)
|
|
496
|
+
return;
|
|
497
|
+
// Extract chunks and ensure they all have the same codebasePath
|
|
498
|
+
const chunks = chunkBuffer.map(item => item.chunk);
|
|
499
|
+
const codebasePath = chunkBuffer[0].codebasePath;
|
|
500
|
+
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
|
501
|
+
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
|
502
|
+
console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens)`);
|
|
503
|
+
await this.processChunkBatch(chunks, codebasePath);
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Process a batch of chunks
|
|
507
|
+
*/
|
|
508
|
+
async processChunkBatch(chunks, codebasePath) {
|
|
509
|
+
// Generate embedding vectors
|
|
510
|
+
const chunkContents = chunks.map(chunk => chunk.content);
|
|
511
|
+
const embeddings = await this.embedding.embedBatch(chunkContents);
|
|
512
|
+
// Prepare vector documents
|
|
513
|
+
const documents = chunks.map((chunk, index) => {
|
|
514
|
+
if (!chunk.metadata.filePath) {
|
|
515
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
516
|
+
}
|
|
517
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
518
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
519
|
+
// Extract metadata that should be stored separately
|
|
520
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
521
|
+
return {
|
|
522
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
523
|
+
vector: embeddings[index].vector,
|
|
524
|
+
content: chunk.content,
|
|
525
|
+
relativePath,
|
|
526
|
+
startLine: chunk.metadata.startLine || 0,
|
|
527
|
+
endLine: chunk.metadata.endLine || 0,
|
|
528
|
+
fileExtension,
|
|
529
|
+
metadata: {
|
|
530
|
+
...restMetadata,
|
|
531
|
+
codebasePath,
|
|
532
|
+
language: chunk.metadata.language || 'unknown',
|
|
533
|
+
chunkIndex: index
|
|
534
|
+
}
|
|
535
|
+
};
|
|
536
|
+
});
|
|
537
|
+
// Store to vector database
|
|
538
|
+
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Get programming language based on file extension
|
|
542
|
+
*/
|
|
543
|
+
getLanguageFromExtension(ext) {
|
|
544
|
+
const languageMap = {
|
|
545
|
+
'.ts': 'typescript',
|
|
546
|
+
'.tsx': 'typescript',
|
|
547
|
+
'.js': 'javascript',
|
|
548
|
+
'.jsx': 'javascript',
|
|
549
|
+
'.py': 'python',
|
|
550
|
+
'.java': 'java',
|
|
551
|
+
'.cpp': 'cpp',
|
|
552
|
+
'.c': 'c',
|
|
553
|
+
'.h': 'c',
|
|
554
|
+
'.hpp': 'cpp',
|
|
555
|
+
'.cs': 'csharp',
|
|
556
|
+
'.go': 'go',
|
|
557
|
+
'.rs': 'rust',
|
|
558
|
+
'.php': 'php',
|
|
559
|
+
'.rb': 'ruby',
|
|
560
|
+
'.swift': 'swift',
|
|
561
|
+
'.kt': 'kotlin',
|
|
562
|
+
'.scala': 'scala',
|
|
563
|
+
'.m': 'objective-c',
|
|
564
|
+
'.mm': 'objective-c',
|
|
565
|
+
'.ipynb': 'jupyter'
|
|
566
|
+
};
|
|
567
|
+
return languageMap[ext] || 'text';
|
|
568
|
+
}
|
|
569
|
+
/**
|
|
570
|
+
* Generate unique ID based on chunk content and location
|
|
571
|
+
* @param relativePath Relative path to the file
|
|
572
|
+
* @param startLine Start line number
|
|
573
|
+
* @param endLine End line number
|
|
574
|
+
* @param content Chunk content
|
|
575
|
+
* @returns Hash-based unique ID
|
|
576
|
+
*/
|
|
577
|
+
generateId(relativePath, startLine, endLine, content) {
|
|
578
|
+
const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`;
|
|
579
|
+
const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex');
|
|
580
|
+
return `chunk_${hash.substring(0, 16)}`;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Read ignore patterns from file (e.g., .gitignore)
|
|
584
|
+
* @param filePath Path to the ignore file
|
|
585
|
+
* @returns Array of ignore patterns
|
|
586
|
+
*/
|
|
587
|
+
static async getIgnorePatternsFromFile(filePath) {
|
|
588
|
+
try {
|
|
589
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
590
|
+
return content
|
|
591
|
+
.split('\n')
|
|
592
|
+
.map(line => line.trim())
|
|
593
|
+
.filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments
|
|
594
|
+
}
|
|
595
|
+
catch (error) {
|
|
596
|
+
console.warn(`⚠️ Could not read ignore file ${filePath}: ${error}`);
|
|
597
|
+
return [];
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
/**
|
|
601
|
+
* Load ignore patterns from various ignore files in the codebase
|
|
602
|
+
* This method preserves any existing custom patterns that were added before
|
|
603
|
+
* @param codebasePath Path to the codebase
|
|
604
|
+
*/
|
|
605
|
+
async loadGitignorePatterns(codebasePath) {
|
|
606
|
+
try {
|
|
607
|
+
let fileBasedPatterns = [];
|
|
608
|
+
// 1. Load .gitignore
|
|
609
|
+
const gitignorePath = path.join(codebasePath, '.gitignore');
|
|
610
|
+
const gitignorePatterns = await this.loadIgnoreFile(gitignorePath, '.gitignore');
|
|
611
|
+
fileBasedPatterns.push(...gitignorePatterns);
|
|
612
|
+
// 2. Load all .xxxignore files in codebase directory
|
|
613
|
+
const ignoreFiles = await this.findIgnoreFiles(codebasePath);
|
|
614
|
+
for (const ignoreFile of ignoreFiles) {
|
|
615
|
+
const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
|
|
616
|
+
fileBasedPatterns.push(...patterns);
|
|
617
|
+
}
|
|
618
|
+
// 3. Load global ~/.codecontext/.codecontextignore
|
|
619
|
+
const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
|
|
620
|
+
fileBasedPatterns.push(...globalIgnorePatterns);
|
|
621
|
+
// 4. Merge file-based patterns with existing patterns (which may include custom MCP patterns)
|
|
622
|
+
if (fileBasedPatterns.length > 0) {
|
|
623
|
+
this.addCustomIgnorePatterns(fileBasedPatterns);
|
|
624
|
+
console.log(`🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
|
|
625
|
+
}
|
|
626
|
+
else {
|
|
627
|
+
console.log('📄 No ignore files found, keeping existing patterns');
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
catch (error) {
|
|
631
|
+
console.warn(`⚠️ Failed to load ignore patterns: ${error}`);
|
|
632
|
+
// Continue with existing patterns on error - don't reset them
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Find all .xxxignore files in the codebase directory (excluding .gitignore as it's handled separately)
|
|
637
|
+
* @param codebasePath Path to the codebase
|
|
638
|
+
* @returns Array of ignore file paths
|
|
639
|
+
*/
|
|
640
|
+
async findIgnoreFiles(codebasePath) {
|
|
641
|
+
try {
|
|
642
|
+
const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true });
|
|
643
|
+
const ignoreFiles = [];
|
|
644
|
+
for (const entry of entries) {
|
|
645
|
+
if (entry.isFile() &&
|
|
646
|
+
entry.name.startsWith('.') &&
|
|
647
|
+
entry.name.endsWith('ignore') &&
|
|
648
|
+
entry.name !== '.gitignore') { // Exclude .gitignore as it's handled separately
|
|
649
|
+
ignoreFiles.push(path.join(codebasePath, entry.name));
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
if (ignoreFiles.length > 0) {
|
|
653
|
+
console.log(`📄 Found additional ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
|
|
654
|
+
}
|
|
655
|
+
return ignoreFiles;
|
|
656
|
+
}
|
|
657
|
+
catch (error) {
|
|
658
|
+
console.warn(`⚠️ Failed to scan for ignore files: ${error}`);
|
|
659
|
+
return [];
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
/**
|
|
663
|
+
* Load global ignore file from ~/.codecontext/.codecontextignore
|
|
664
|
+
* @returns Array of ignore patterns
|
|
665
|
+
*/
|
|
666
|
+
async loadGlobalIgnoreFile() {
|
|
667
|
+
try {
|
|
668
|
+
const homeDir = require('os').homedir();
|
|
669
|
+
const globalIgnorePath = path.join(homeDir, '.codecontext', '.codecontextignore');
|
|
670
|
+
return await this.loadIgnoreFile(globalIgnorePath, 'global .codecontextignore');
|
|
671
|
+
}
|
|
672
|
+
catch (error) {
|
|
673
|
+
// Global ignore file is optional, don't log warnings
|
|
674
|
+
return [];
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* Load ignore patterns from a specific ignore file
|
|
679
|
+
* @param filePath Path to the ignore file
|
|
680
|
+
* @param fileName Display name for logging
|
|
681
|
+
* @returns Array of ignore patterns
|
|
682
|
+
*/
|
|
683
|
+
async loadIgnoreFile(filePath, fileName) {
|
|
684
|
+
try {
|
|
685
|
+
await fs.promises.access(filePath);
|
|
686
|
+
console.log(`📄 Found ${fileName} file at: ${filePath}`);
|
|
687
|
+
const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(filePath);
|
|
688
|
+
if (ignorePatterns.length > 0) {
|
|
689
|
+
console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`);
|
|
690
|
+
return ignorePatterns;
|
|
691
|
+
}
|
|
692
|
+
else {
|
|
693
|
+
console.log(`📄 ${fileName} file found but no valid patterns detected`);
|
|
694
|
+
return [];
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
catch (error) {
|
|
698
|
+
if (fileName === '.gitignore' || fileName.includes('global')) {
|
|
699
|
+
console.log(`📄 No ${fileName} file found`);
|
|
700
|
+
}
|
|
701
|
+
return [];
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
/**
|
|
705
|
+
* Check if a path matches any ignore pattern
|
|
706
|
+
* @param filePath Path to check
|
|
707
|
+
* @param basePath Base path for relative pattern matching
|
|
708
|
+
* @returns True if path should be ignored
|
|
709
|
+
*/
|
|
710
|
+
matchesIgnorePattern(filePath, basePath) {
|
|
711
|
+
if (this.ignorePatterns.length === 0) {
|
|
712
|
+
return false;
|
|
713
|
+
}
|
|
714
|
+
const relativePath = path.relative(basePath, filePath);
|
|
715
|
+
const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators
|
|
716
|
+
for (const pattern of this.ignorePatterns) {
|
|
717
|
+
if (this.isPatternMatch(normalizedPath, pattern)) {
|
|
718
|
+
return true;
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
return false;
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Simple glob pattern matching
|
|
725
|
+
* @param filePath File path to test
|
|
726
|
+
* @param pattern Glob pattern
|
|
727
|
+
* @returns True if pattern matches
|
|
728
|
+
*/
|
|
729
|
+
isPatternMatch(filePath, pattern) {
|
|
730
|
+
// Handle directory patterns (ending with /)
|
|
731
|
+
if (pattern.endsWith('/')) {
|
|
732
|
+
const dirPattern = pattern.slice(0, -1);
|
|
733
|
+
const pathParts = filePath.split('/');
|
|
734
|
+
return pathParts.some(part => this.simpleGlobMatch(part, dirPattern));
|
|
735
|
+
}
|
|
736
|
+
// Handle file patterns
|
|
737
|
+
if (pattern.includes('/')) {
|
|
738
|
+
// Pattern with path separator - match exact path
|
|
739
|
+
return this.simpleGlobMatch(filePath, pattern);
|
|
740
|
+
}
|
|
741
|
+
else {
|
|
742
|
+
// Pattern without path separator - match filename in any directory
|
|
743
|
+
const fileName = path.basename(filePath);
|
|
744
|
+
return this.simpleGlobMatch(fileName, pattern);
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Simple glob matching supporting * wildcard
|
|
749
|
+
* @param text Text to test
|
|
750
|
+
* @param pattern Pattern with * wildcards
|
|
751
|
+
* @returns True if pattern matches
|
|
752
|
+
*/
|
|
753
|
+
simpleGlobMatch(text, pattern) {
|
|
754
|
+
// Convert glob pattern to regex
|
|
755
|
+
const regexPattern = pattern
|
|
756
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except *
|
|
757
|
+
.replace(/\*/g, '.*'); // Convert * to .*
|
|
758
|
+
const regex = new RegExp(`^${regexPattern}$`);
|
|
759
|
+
return regex.test(text);
|
|
760
|
+
}
|
|
761
|
+
/**
|
|
762
|
+
* Get custom extensions from environment variables
|
|
763
|
+
* Supports CUSTOM_EXTENSIONS as comma-separated list
|
|
764
|
+
* @returns Array of custom extensions
|
|
765
|
+
*/
|
|
766
|
+
getCustomExtensionsFromEnv() {
|
|
767
|
+
const envExtensions = env_manager_1.envManager.get('CUSTOM_EXTENSIONS');
|
|
768
|
+
if (!envExtensions) {
|
|
769
|
+
return [];
|
|
770
|
+
}
|
|
771
|
+
try {
|
|
772
|
+
const extensions = envExtensions
|
|
773
|
+
.split(',')
|
|
774
|
+
.map(ext => ext.trim())
|
|
775
|
+
.filter(ext => ext.length > 0)
|
|
776
|
+
.map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
|
|
777
|
+
return extensions;
|
|
778
|
+
}
|
|
779
|
+
catch (error) {
|
|
780
|
+
console.warn(`⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`);
|
|
781
|
+
return [];
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
/**
|
|
785
|
+
* Get custom ignore patterns from environment variables
|
|
786
|
+
* Supports CUSTOM_IGNORE_PATTERNS as comma-separated list
|
|
787
|
+
* @returns Array of custom ignore patterns
|
|
788
|
+
*/
|
|
789
|
+
getCustomIgnorePatternsFromEnv() {
|
|
790
|
+
const envIgnorePatterns = env_manager_1.envManager.get('CUSTOM_IGNORE_PATTERNS');
|
|
791
|
+
if (!envIgnorePatterns) {
|
|
792
|
+
return [];
|
|
793
|
+
}
|
|
794
|
+
try {
|
|
795
|
+
const patterns = envIgnorePatterns
|
|
796
|
+
.split(',')
|
|
797
|
+
.map(pattern => pattern.trim())
|
|
798
|
+
.filter(pattern => pattern.length > 0);
|
|
799
|
+
return patterns;
|
|
800
|
+
}
|
|
801
|
+
catch (error) {
|
|
802
|
+
console.warn(`⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`);
|
|
803
|
+
return [];
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
/**
|
|
807
|
+
* Add custom extensions (from MCP or other sources) without replacing existing ones
|
|
808
|
+
* @param customExtensions Array of custom extensions to add
|
|
809
|
+
*/
|
|
810
|
+
addCustomExtensions(customExtensions) {
|
|
811
|
+
if (customExtensions.length === 0)
|
|
812
|
+
return;
|
|
813
|
+
// Ensure extensions start with dot
|
|
814
|
+
const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}`);
|
|
815
|
+
// Merge current extensions with new custom extensions, avoiding duplicates
|
|
816
|
+
const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions];
|
|
817
|
+
const uniqueExtensions = [...new Set(mergedExtensions)];
|
|
818
|
+
this.supportedExtensions = uniqueExtensions;
|
|
819
|
+
console.log(`📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`);
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Get current splitter information
|
|
823
|
+
*/
|
|
824
|
+
getSplitterInfo() {
|
|
825
|
+
const splitterName = this.codeSplitter.constructor.name;
|
|
826
|
+
if (splitterName === 'AstCodeSplitter') {
|
|
827
|
+
const { AstCodeSplitter } = require('./splitter/ast-splitter');
|
|
828
|
+
return {
|
|
829
|
+
type: 'ast',
|
|
830
|
+
hasBuiltinFallback: true,
|
|
831
|
+
supportedLanguages: AstCodeSplitter.getSupportedLanguages()
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
else {
|
|
835
|
+
return {
|
|
836
|
+
type: 'langchain',
|
|
837
|
+
hasBuiltinFallback: false
|
|
838
|
+
};
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Check if current splitter supports a specific language
|
|
843
|
+
* @param language Programming language
|
|
844
|
+
*/
|
|
845
|
+
isLanguageSupported(language) {
|
|
846
|
+
const splitterName = this.codeSplitter.constructor.name;
|
|
847
|
+
if (splitterName === 'AstCodeSplitter') {
|
|
848
|
+
const { AstCodeSplitter } = require('./splitter/ast-splitter');
|
|
849
|
+
return AstCodeSplitter.isLanguageSupported(language);
|
|
850
|
+
}
|
|
851
|
+
// LangChain splitter supports most languages
|
|
852
|
+
return true;
|
|
853
|
+
}
|
|
854
|
+
/**
|
|
855
|
+
* Get which strategy would be used for a specific language
|
|
856
|
+
* @param language Programming language
|
|
857
|
+
*/
|
|
858
|
+
getSplitterStrategyForLanguage(language) {
|
|
859
|
+
const splitterName = this.codeSplitter.constructor.name;
|
|
860
|
+
if (splitterName === 'AstCodeSplitter') {
|
|
861
|
+
const { AstCodeSplitter } = require('./splitter/ast-splitter');
|
|
862
|
+
const isSupported = AstCodeSplitter.isLanguageSupported(language);
|
|
863
|
+
return {
|
|
864
|
+
strategy: isSupported ? 'ast' : 'langchain',
|
|
865
|
+
reason: isSupported
|
|
866
|
+
? 'Language supported by AST parser'
|
|
867
|
+
: 'Language not supported by AST, will fallback to LangChain'
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
else {
|
|
871
|
+
return {
|
|
872
|
+
strategy: 'langchain',
|
|
873
|
+
reason: 'Using LangChain splitter directly'
|
|
874
|
+
};
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
exports.CodeContext = CodeContext;
|
|
879
|
+
//# sourceMappingURL=context.js.map
|