@pleaseai/context-please-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +24 -0
- package/README.md +287 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/context.d.ts +276 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +1072 -0
- package/dist/context.js.map +1 -0
- package/dist/embedding/base-embedding.d.ts +51 -0
- package/dist/embedding/base-embedding.d.ts.map +1 -0
- package/dist/embedding/base-embedding.js +36 -0
- package/dist/embedding/base-embedding.js.map +1 -0
- package/dist/embedding/gemini-embedding.d.ts +53 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -0
- package/dist/embedding/gemini-embedding.js +152 -0
- package/dist/embedding/gemini-embedding.js.map +1 -0
- package/dist/embedding/index.d.ts +6 -0
- package/dist/embedding/index.d.ts.map +1 -0
- package/dist/embedding/index.js +24 -0
- package/dist/embedding/index.js.map +1 -0
- package/dist/embedding/ollama-embedding.d.ts +55 -0
- package/dist/embedding/ollama-embedding.d.ts.map +1 -0
- package/dist/embedding/ollama-embedding.js +192 -0
- package/dist/embedding/ollama-embedding.js.map +1 -0
- package/dist/embedding/openai-embedding.d.ts +36 -0
- package/dist/embedding/openai-embedding.d.ts.map +1 -0
- package/dist/embedding/openai-embedding.js +159 -0
- package/dist/embedding/openai-embedding.js.map +1 -0
- package/dist/embedding/voyageai-embedding.d.ts +44 -0
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
- package/dist/embedding/voyageai-embedding.js +227 -0
- package/dist/embedding/voyageai-embedding.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/splitter/ast-splitter.d.ts +22 -0
- package/dist/splitter/ast-splitter.d.ts.map +1 -0
- package/dist/splitter/ast-splitter.js +234 -0
- package/dist/splitter/ast-splitter.js.map +1 -0
- package/dist/splitter/index.d.ts +41 -0
- package/dist/splitter/index.d.ts.map +1 -0
- package/dist/splitter/index.js +27 -0
- package/dist/splitter/index.js.map +1 -0
- package/dist/splitter/langchain-splitter.d.ts +13 -0
- package/dist/splitter/langchain-splitter.d.ts.map +1 -0
- package/dist/splitter/langchain-splitter.js +118 -0
- package/dist/splitter/langchain-splitter.js.map +1 -0
- package/dist/sync/merkle.d.ts +26 -0
- package/dist/sync/merkle.d.ts.map +1 -0
- package/dist/sync/merkle.js +112 -0
- package/dist/sync/merkle.js.map +1 -0
- package/dist/sync/synchronizer.d.ts +30 -0
- package/dist/sync/synchronizer.d.ts.map +1 -0
- package/dist/sync/synchronizer.js +339 -0
- package/dist/sync/synchronizer.js.map +1 -0
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/env-manager.d.ts +19 -0
- package/dist/utils/env-manager.d.ts.map +1 -0
- package/dist/utils/env-manager.js +125 -0
- package/dist/utils/env-manager.js.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +7 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/vectordb/base/base-vector-database.d.ts +58 -0
- package/dist/vectordb/base/base-vector-database.d.ts.map +1 -0
- package/dist/vectordb/base/base-vector-database.js +32 -0
- package/dist/vectordb/base/base-vector-database.js.map +1 -0
- package/dist/vectordb/factory.d.ts +80 -0
- package/dist/vectordb/factory.d.ts.map +1 -0
- package/dist/vectordb/factory.js +89 -0
- package/dist/vectordb/factory.js.map +1 -0
- package/dist/vectordb/index.d.ts +12 -0
- package/dist/vectordb/index.d.ts.map +1 -0
- package/dist/vectordb/index.js +27 -0
- package/dist/vectordb/index.js.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts +75 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.js +707 -0
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
- package/dist/vectordb/milvus-vectordb.d.ts +59 -0
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
- package/dist/vectordb/milvus-vectordb.js +641 -0
- package/dist/vectordb/milvus-vectordb.js.map +1 -0
- package/dist/vectordb/qdrant-vectordb.d.ts +124 -0
- package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -0
- package/dist/vectordb/qdrant-vectordb.js +582 -0
- package/dist/vectordb/qdrant-vectordb.js.map +1 -0
- package/dist/vectordb/sparse/index.d.ts +4 -0
- package/dist/vectordb/sparse/index.d.ts.map +1 -0
- package/dist/vectordb/sparse/index.js +23 -0
- package/dist/vectordb/sparse/index.js.map +1 -0
- package/dist/vectordb/sparse/simple-bm25.d.ts +104 -0
- package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -0
- package/dist/vectordb/sparse/simple-bm25.js +189 -0
- package/dist/vectordb/sparse/simple-bm25.js.map +1 -0
- package/dist/vectordb/sparse/sparse-vector-generator.d.ts +54 -0
- package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -0
- package/dist/vectordb/sparse/sparse-vector-generator.js +3 -0
- package/dist/vectordb/sparse/sparse-vector-generator.js.map +1 -0
- package/dist/vectordb/sparse/types.d.ts +38 -0
- package/dist/vectordb/sparse/types.d.ts.map +1 -0
- package/dist/vectordb/sparse/types.js +3 -0
- package/dist/vectordb/sparse/types.js.map +1 -0
- package/dist/vectordb/types.d.ts +120 -0
- package/dist/vectordb/types.d.ts.map +1 -0
- package/dist/vectordb/types.js +9 -0
- package/dist/vectordb/types.js.map +1 -0
- package/dist/vectordb/zilliz-utils.d.ts +135 -0
- package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
- package/dist/vectordb/zilliz-utils.js +192 -0
- package/dist/vectordb/zilliz-utils.js.map +1 -0
- package/package.json +61 -0
package/dist/context.js
ADDED
@@ -0,0 +1,1072 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
3
|
+
if (k2 === undefined) k2 = k;
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
7
|
+
}
|
8
|
+
Object.defineProperty(o, k2, desc);
|
9
|
+
}) : (function(o, m, k, k2) {
|
10
|
+
if (k2 === undefined) k2 = k;
|
11
|
+
o[k2] = m[k];
|
12
|
+
}));
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
15
|
+
}) : function(o, v) {
|
16
|
+
o["default"] = v;
|
17
|
+
});
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
19
|
+
var ownKeys = function(o) {
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
21
|
+
var ar = [];
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
23
|
+
return ar;
|
24
|
+
};
|
25
|
+
return ownKeys(o);
|
26
|
+
};
|
27
|
+
return function (mod) {
|
28
|
+
if (mod && mod.__esModule) return mod;
|
29
|
+
var result = {};
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
31
|
+
__setModuleDefault(result, mod);
|
32
|
+
return result;
|
33
|
+
};
|
34
|
+
})();
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
36
|
+
exports.Context = void 0;
|
37
|
+
const splitter_1 = require("./splitter");
|
38
|
+
const embedding_1 = require("./embedding");
|
39
|
+
const env_manager_1 = require("./utils/env-manager");
|
40
|
+
const fs = __importStar(require("fs"));
|
41
|
+
const path = __importStar(require("path"));
|
42
|
+
const crypto = __importStar(require("crypto"));
|
43
|
+
const synchronizer_1 = require("./sync/synchronizer");
|
44
|
+
const DEFAULT_SUPPORTED_EXTENSIONS = [
|
45
|
+
// Programming languages
|
46
|
+
'.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp',
|
47
|
+
'.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm',
|
48
|
+
// Text and markup files
|
49
|
+
'.md', '.markdown', '.ipynb',
|
50
|
+
// '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
|
51
|
+
// '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
|
52
|
+
];
|
53
|
+
const DEFAULT_IGNORE_PATTERNS = [
|
54
|
+
// Common build output and dependency directories
|
55
|
+
'node_modules/**',
|
56
|
+
'dist/**',
|
57
|
+
'build/**',
|
58
|
+
'out/**',
|
59
|
+
'target/**',
|
60
|
+
'coverage/**',
|
61
|
+
'.nyc_output/**',
|
62
|
+
// IDE and editor files
|
63
|
+
'.vscode/**',
|
64
|
+
'.idea/**',
|
65
|
+
'*.swp',
|
66
|
+
'*.swo',
|
67
|
+
// Version control
|
68
|
+
'.git/**',
|
69
|
+
'.svn/**',
|
70
|
+
'.hg/**',
|
71
|
+
// Cache directories
|
72
|
+
'.cache/**',
|
73
|
+
'__pycache__/**',
|
74
|
+
'.pytest_cache/**',
|
75
|
+
// Logs and temporary files
|
76
|
+
'logs/**',
|
77
|
+
'tmp/**',
|
78
|
+
'temp/**',
|
79
|
+
'*.log',
|
80
|
+
// Environment and config files
|
81
|
+
'.env',
|
82
|
+
'.env.*',
|
83
|
+
'*.local',
|
84
|
+
// Minified and bundled files
|
85
|
+
'*.min.js',
|
86
|
+
'*.min.css',
|
87
|
+
'*.min.map',
|
88
|
+
'*.bundle.js',
|
89
|
+
'*.bundle.css',
|
90
|
+
'*.chunk.js',
|
91
|
+
'*.vendor.js',
|
92
|
+
'*.polyfills.js',
|
93
|
+
'*.runtime.js',
|
94
|
+
'*.map', // source map files
|
95
|
+
'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
|
96
|
+
'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
|
97
|
+
'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
|
98
|
+
];
|
99
|
+
class Context {
|
100
|
+
constructor(config = {}) {
|
101
|
+
this.synchronizers = new Map();
|
102
|
+
// Initialize services
|
103
|
+
this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
|
104
|
+
apiKey: env_manager_1.envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
|
105
|
+
model: 'text-embedding-3-small',
|
106
|
+
...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') })
|
107
|
+
});
|
108
|
+
if (!config.vectorDatabase) {
|
109
|
+
throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.');
|
110
|
+
}
|
111
|
+
this.vectorDatabase = config.vectorDatabase;
|
112
|
+
this.codeSplitter = config.codeSplitter || new splitter_1.AstCodeSplitter(2500, 300);
|
113
|
+
// Load custom extensions from environment variables
|
114
|
+
const envCustomExtensions = this.getCustomExtensionsFromEnv();
|
115
|
+
// Combine default extensions with config extensions and env extensions
|
116
|
+
const allSupportedExtensions = [
|
117
|
+
...DEFAULT_SUPPORTED_EXTENSIONS,
|
118
|
+
...(config.supportedExtensions || []),
|
119
|
+
...(config.customExtensions || []),
|
120
|
+
...envCustomExtensions
|
121
|
+
];
|
122
|
+
// Remove duplicates
|
123
|
+
this.supportedExtensions = [...new Set(allSupportedExtensions)];
|
124
|
+
// Load custom ignore patterns from environment variables
|
125
|
+
const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
|
126
|
+
// Start with default ignore patterns
|
127
|
+
const allIgnorePatterns = [
|
128
|
+
...DEFAULT_IGNORE_PATTERNS,
|
129
|
+
...(config.ignorePatterns || []),
|
130
|
+
...(config.customIgnorePatterns || []),
|
131
|
+
...envCustomIgnorePatterns
|
132
|
+
];
|
133
|
+
// Remove duplicates
|
134
|
+
this.ignorePatterns = [...new Set(allIgnorePatterns)];
|
135
|
+
console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
|
136
|
+
if (envCustomExtensions.length > 0) {
|
137
|
+
console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
|
138
|
+
}
|
139
|
+
if (envCustomIgnorePatterns.length > 0) {
|
140
|
+
console.log(`[Context] 🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
/**
|
144
|
+
* Get embedding instance
|
145
|
+
*/
|
146
|
+
getEmbedding() {
|
147
|
+
return this.embedding;
|
148
|
+
}
|
149
|
+
/**
|
150
|
+
* Get vector database instance
|
151
|
+
*/
|
152
|
+
getVectorDatabase() {
|
153
|
+
return this.vectorDatabase;
|
154
|
+
}
|
155
|
+
/**
|
156
|
+
* Get code splitter instance
|
157
|
+
*/
|
158
|
+
getCodeSplitter() {
|
159
|
+
return this.codeSplitter;
|
160
|
+
}
|
161
|
+
/**
|
162
|
+
* Get supported extensions
|
163
|
+
*/
|
164
|
+
getSupportedExtensions() {
|
165
|
+
return [...this.supportedExtensions];
|
166
|
+
}
|
167
|
+
/**
|
168
|
+
* Get ignore patterns
|
169
|
+
*/
|
170
|
+
getIgnorePatterns() {
|
171
|
+
return [...this.ignorePatterns];
|
172
|
+
}
|
173
|
+
/**
|
174
|
+
* Get synchronizers map
|
175
|
+
*/
|
176
|
+
getSynchronizers() {
|
177
|
+
return new Map(this.synchronizers);
|
178
|
+
}
|
179
|
+
/**
|
180
|
+
* Set synchronizer for a collection
|
181
|
+
*/
|
182
|
+
setSynchronizer(collectionName, synchronizer) {
|
183
|
+
this.synchronizers.set(collectionName, synchronizer);
|
184
|
+
}
|
185
|
+
/**
|
186
|
+
* Public wrapper for loadIgnorePatterns private method
|
187
|
+
*/
|
188
|
+
async getLoadedIgnorePatterns(codebasePath) {
|
189
|
+
return this.loadIgnorePatterns(codebasePath);
|
190
|
+
}
|
191
|
+
/**
|
192
|
+
* Public wrapper for prepareCollection private method
|
193
|
+
*/
|
194
|
+
async getPreparedCollection(codebasePath) {
|
195
|
+
return this.prepareCollection(codebasePath);
|
196
|
+
}
|
197
|
+
/**
|
198
|
+
* Get isHybrid setting from environment variable with default true
|
199
|
+
*/
|
200
|
+
getIsHybrid() {
|
201
|
+
const isHybridEnv = env_manager_1.envManager.get('HYBRID_MODE');
|
202
|
+
if (isHybridEnv === undefined || isHybridEnv === null) {
|
203
|
+
return true; // Default to true
|
204
|
+
}
|
205
|
+
return isHybridEnv.toLowerCase() === 'true';
|
206
|
+
}
|
207
|
+
/**
|
208
|
+
* Generate collection name based on codebase path and hybrid mode
|
209
|
+
*/
|
210
|
+
getCollectionName(codebasePath) {
|
211
|
+
const isHybrid = this.getIsHybrid();
|
212
|
+
const normalizedPath = path.resolve(codebasePath);
|
213
|
+
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
214
|
+
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
|
215
|
+
return `${prefix}_${hash.substring(0, 8)}`;
|
216
|
+
}
|
217
|
+
/**
|
218
|
+
* Index a codebase for semantic search
|
219
|
+
* @param codebasePath Codebase root path
|
220
|
+
* @param progressCallback Optional progress callback function
|
221
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
222
|
+
* @returns Indexing statistics
|
223
|
+
*/
|
224
|
+
async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
|
225
|
+
const isHybrid = this.getIsHybrid();
|
226
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
227
|
+
console.log(`[Context] 🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
|
228
|
+
// 1. Load ignore patterns from various ignore files
|
229
|
+
await this.loadIgnorePatterns(codebasePath);
|
230
|
+
// 2. Check and prepare vector collection
|
231
|
+
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
|
232
|
+
console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
233
|
+
await this.prepareCollection(codebasePath, forceReindex);
|
234
|
+
// 3. Recursively traverse codebase to get all supported files
|
235
|
+
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
|
236
|
+
const codeFiles = await this.getCodeFiles(codebasePath);
|
237
|
+
console.log(`[Context] 📁 Found ${codeFiles.length} code files`);
|
238
|
+
if (codeFiles.length === 0) {
|
239
|
+
progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 });
|
240
|
+
return { indexedFiles: 0, totalChunks: 0, status: 'completed' };
|
241
|
+
}
|
242
|
+
// 3. Process each file with streaming chunk processing
|
243
|
+
// Reserve 10% for preparation, 90% for actual indexing
|
244
|
+
const indexingStartPercentage = 10;
|
245
|
+
const indexingEndPercentage = 100;
|
246
|
+
const indexingRange = indexingEndPercentage - indexingStartPercentage;
|
247
|
+
const result = await this.processFileList(codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => {
|
248
|
+
// Calculate progress percentage
|
249
|
+
const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange;
|
250
|
+
console.log(`[Context] 📊 Processed ${fileIndex}/${totalFiles} files`);
|
251
|
+
progressCallback?.({
|
252
|
+
phase: `Processing files (${fileIndex}/${totalFiles})...`,
|
253
|
+
current: fileIndex,
|
254
|
+
total: totalFiles,
|
255
|
+
percentage: Math.round(progressPercentage)
|
256
|
+
});
|
257
|
+
});
|
258
|
+
console.log(`[Context] ✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
|
259
|
+
progressCallback?.({
|
260
|
+
phase: 'Indexing complete!',
|
261
|
+
current: result.processedFiles,
|
262
|
+
total: codeFiles.length,
|
263
|
+
percentage: 100
|
264
|
+
});
|
265
|
+
return {
|
266
|
+
indexedFiles: result.processedFiles,
|
267
|
+
totalChunks: result.totalChunks,
|
268
|
+
status: result.status
|
269
|
+
};
|
270
|
+
}
|
271
|
+
async reindexByChange(codebasePath, progressCallback) {
|
272
|
+
const collectionName = this.getCollectionName(codebasePath);
|
273
|
+
const synchronizer = this.synchronizers.get(collectionName);
|
274
|
+
if (!synchronizer) {
|
275
|
+
// Load project-specific ignore patterns before creating FileSynchronizer
|
276
|
+
await this.loadIgnorePatterns(codebasePath);
|
277
|
+
// To be safe, let's initialize if it's not there.
|
278
|
+
const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
|
279
|
+
await newSynchronizer.initialize();
|
280
|
+
this.synchronizers.set(collectionName, newSynchronizer);
|
281
|
+
}
|
282
|
+
const currentSynchronizer = this.synchronizers.get(collectionName);
|
283
|
+
progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 });
|
284
|
+
const { added, removed, modified } = await currentSynchronizer.checkForChanges();
|
285
|
+
const totalChanges = added.length + removed.length + modified.length;
|
286
|
+
if (totalChanges === 0) {
|
287
|
+
progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 });
|
288
|
+
console.log('[Context] ✅ No file changes detected.');
|
289
|
+
return { added: 0, removed: 0, modified: 0 };
|
290
|
+
}
|
291
|
+
console.log(`[Context] 🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
|
292
|
+
let processedChanges = 0;
|
293
|
+
const updateProgress = (phase) => {
|
294
|
+
processedChanges++;
|
295
|
+
const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100);
|
296
|
+
progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage });
|
297
|
+
};
|
298
|
+
// Handle removed files
|
299
|
+
for (const file of removed) {
|
300
|
+
await this.deleteFileChunks(collectionName, file);
|
301
|
+
updateProgress(`Removed ${file}`);
|
302
|
+
}
|
303
|
+
// Handle modified files
|
304
|
+
for (const file of modified) {
|
305
|
+
await this.deleteFileChunks(collectionName, file);
|
306
|
+
updateProgress(`Deleted old chunks for ${file}`);
|
307
|
+
}
|
308
|
+
// Handle added and modified files
|
309
|
+
const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f));
|
310
|
+
if (filesToIndex.length > 0) {
|
311
|
+
await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
|
312
|
+
updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
|
313
|
+
});
|
314
|
+
}
|
315
|
+
console.log(`[Context] ✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
|
316
|
+
progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 });
|
317
|
+
return { added: added.length, removed: removed.length, modified: modified.length };
|
318
|
+
}
|
319
|
+
async deleteFileChunks(collectionName, relativePath) {
|
320
|
+
// Escape backslashes for Milvus query expression (Windows path compatibility)
|
321
|
+
const escapedPath = relativePath.replace(/\\/g, '\\\\');
|
322
|
+
const results = await this.vectorDatabase.query(collectionName, `relativePath == "${escapedPath}"`, ['id']);
|
323
|
+
if (results.length > 0) {
|
324
|
+
const ids = results.map(r => r.id).filter(id => id);
|
325
|
+
if (ids.length > 0) {
|
326
|
+
await this.vectorDatabase.delete(collectionName, ids);
|
327
|
+
console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`);
|
328
|
+
}
|
329
|
+
}
|
330
|
+
}
|
331
|
+
/**
|
332
|
+
* Semantic search with unified implementation
|
333
|
+
* @param codebasePath Codebase path to search in
|
334
|
+
* @param query Search query
|
335
|
+
* @param topK Number of results to return
|
336
|
+
* @param threshold Similarity threshold
|
337
|
+
*/
|
338
|
+
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
339
|
+
const isHybrid = this.getIsHybrid();
|
340
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
341
|
+
console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
342
|
+
const collectionName = this.getCollectionName(codebasePath);
|
343
|
+
console.log(`[Context] 🔍 Using collection: ${collectionName}`);
|
344
|
+
// Check if collection exists and has data
|
345
|
+
const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
|
346
|
+
if (!hasCollection) {
|
347
|
+
console.log(`[Context] ⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
|
348
|
+
return [];
|
349
|
+
}
|
350
|
+
if (isHybrid === true) {
|
351
|
+
try {
|
352
|
+
// Check collection stats to see if it has data
|
353
|
+
const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
|
354
|
+
console.log(`[Context] 🔍 Collection '${collectionName}' exists and appears to have data`);
|
355
|
+
}
|
356
|
+
catch (error) {
|
357
|
+
console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
358
|
+
}
|
359
|
+
// 1. Generate query vector
|
360
|
+
console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
|
361
|
+
const queryEmbedding = await this.embedding.embed(query);
|
362
|
+
console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
363
|
+
console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
364
|
+
// 2. Prepare hybrid search requests
|
365
|
+
const searchRequests = [
|
366
|
+
{
|
367
|
+
data: queryEmbedding.vector,
|
368
|
+
anns_field: "vector",
|
369
|
+
param: { "nprobe": 10 },
|
370
|
+
limit: topK
|
371
|
+
},
|
372
|
+
{
|
373
|
+
data: query,
|
374
|
+
anns_field: "sparse_vector",
|
375
|
+
param: { "drop_ratio_search": 0.2 },
|
376
|
+
limit: topK
|
377
|
+
}
|
378
|
+
];
|
379
|
+
console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
380
|
+
console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
381
|
+
// 3. Execute hybrid search
|
382
|
+
console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`);
|
383
|
+
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
384
|
+
rerank: {
|
385
|
+
strategy: 'rrf',
|
386
|
+
params: { k: 100 }
|
387
|
+
},
|
388
|
+
limit: topK,
|
389
|
+
filterExpr
|
390
|
+
});
|
391
|
+
console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
|
392
|
+
// 4. Convert to semantic search result format
|
393
|
+
const results = searchResults.map(result => ({
|
394
|
+
content: result.document.content,
|
395
|
+
relativePath: result.document.relativePath,
|
396
|
+
startLine: result.document.startLine,
|
397
|
+
endLine: result.document.endLine,
|
398
|
+
language: result.document.metadata.language || 'unknown',
|
399
|
+
score: result.score
|
400
|
+
}));
|
401
|
+
console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
|
402
|
+
if (results.length > 0) {
|
403
|
+
console.log(`[Context] 🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
|
404
|
+
}
|
405
|
+
return results;
|
406
|
+
}
|
407
|
+
else {
|
408
|
+
// Regular semantic search
|
409
|
+
// 1. Generate query vector
|
410
|
+
const queryEmbedding = await this.embedding.embed(query);
|
411
|
+
// 2. Search in vector database
|
412
|
+
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr });
|
413
|
+
// 3. Convert to semantic search result format
|
414
|
+
const results = searchResults.map(result => ({
|
415
|
+
content: result.document.content,
|
416
|
+
relativePath: result.document.relativePath,
|
417
|
+
startLine: result.document.startLine,
|
418
|
+
endLine: result.document.endLine,
|
419
|
+
language: result.document.metadata.language || 'unknown',
|
420
|
+
score: result.score
|
421
|
+
}));
|
422
|
+
console.log(`[Context] ✅ Found ${results.length} relevant results`);
|
423
|
+
return results;
|
424
|
+
}
|
425
|
+
}
|
426
|
+
/**
|
427
|
+
* Check if index exists for codebase
|
428
|
+
* @param codebasePath Codebase path to check
|
429
|
+
* @returns Whether index exists
|
430
|
+
*/
|
431
|
+
async hasIndex(codebasePath) {
|
432
|
+
const collectionName = this.getCollectionName(codebasePath);
|
433
|
+
return await this.vectorDatabase.hasCollection(collectionName);
|
434
|
+
}
|
435
|
+
/**
|
436
|
+
* Clear index
|
437
|
+
* @param codebasePath Codebase path to clear index for
|
438
|
+
* @param progressCallback Optional progress callback function
|
439
|
+
*/
|
440
|
+
async clearIndex(codebasePath, progressCallback) {
|
441
|
+
console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`);
|
442
|
+
progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });
|
443
|
+
const collectionName = this.getCollectionName(codebasePath);
|
444
|
+
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
445
|
+
progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
|
446
|
+
if (collectionExists) {
|
447
|
+
await this.vectorDatabase.dropCollection(collectionName);
|
448
|
+
}
|
449
|
+
// Delete snapshot file
|
450
|
+
await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
|
451
|
+
progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 });
|
452
|
+
console.log('[Context] ✅ Index data cleaned');
|
453
|
+
}
|
454
|
+
/**
|
455
|
+
* Update ignore patterns (merges with default patterns and existing patterns)
|
456
|
+
* @param ignorePatterns Array of ignore patterns to add to defaults
|
457
|
+
*/
|
458
|
+
updateIgnorePatterns(ignorePatterns) {
|
459
|
+
// Merge with default patterns and any existing custom patterns, avoiding duplicates
|
460
|
+
const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
|
461
|
+
const uniquePatterns = [];
|
462
|
+
const patternSet = new Set(mergedPatterns);
|
463
|
+
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
464
|
+
this.ignorePatterns = uniquePatterns;
|
465
|
+
console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
|
466
|
+
}
|
467
|
+
/**
|
468
|
+
* Add custom ignore patterns (from MCP or other sources) without replacing existing ones
|
469
|
+
* @param customPatterns Array of custom ignore patterns to add
|
470
|
+
*/
|
471
|
+
addCustomIgnorePatterns(customPatterns) {
|
472
|
+
if (customPatterns.length === 0)
|
473
|
+
return;
|
474
|
+
// Merge current patterns with new custom patterns, avoiding duplicates
|
475
|
+
const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
|
476
|
+
const uniquePatterns = [];
|
477
|
+
const patternSet = new Set(mergedPatterns);
|
478
|
+
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
479
|
+
this.ignorePatterns = uniquePatterns;
|
480
|
+
console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
|
481
|
+
}
|
482
|
+
/**
|
483
|
+
* Reset ignore patterns to defaults only
|
484
|
+
*/
|
485
|
+
resetIgnorePatternsToDefaults() {
|
486
|
+
this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS];
|
487
|
+
console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
|
488
|
+
}
|
489
|
+
/**
|
490
|
+
* Update embedding instance
|
491
|
+
* @param embedding New embedding instance
|
492
|
+
*/
|
493
|
+
updateEmbedding(embedding) {
|
494
|
+
this.embedding = embedding;
|
495
|
+
console.log(`[Context] 🔄 Updated embedding provider: ${embedding.getProvider()}`);
|
496
|
+
}
|
497
|
+
/**
|
498
|
+
* Update vector database instance
|
499
|
+
* @param vectorDatabase New vector database instance
|
500
|
+
*/
|
501
|
+
updateVectorDatabase(vectorDatabase) {
|
502
|
+
this.vectorDatabase = vectorDatabase;
|
503
|
+
console.log(`[Context] 🔄 Updated vector database`);
|
504
|
+
}
|
505
|
+
/**
|
506
|
+
* Update splitter instance
|
507
|
+
* @param splitter New splitter instance
|
508
|
+
*/
|
509
|
+
updateSplitter(splitter) {
|
510
|
+
this.codeSplitter = splitter;
|
511
|
+
console.log(`[Context] 🔄 Updated splitter instance`);
|
512
|
+
}
|
513
|
+
/**
|
514
|
+
* Prepare vector collection
|
515
|
+
*/
|
516
|
+
async prepareCollection(codebasePath, forceReindex = false) {
|
517
|
+
const isHybrid = this.getIsHybrid();
|
518
|
+
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
|
519
|
+
console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
520
|
+
const collectionName = this.getCollectionName(codebasePath);
|
521
|
+
// Check if collection already exists
|
522
|
+
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
523
|
+
if (collectionExists && !forceReindex) {
|
524
|
+
console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
|
525
|
+
return;
|
526
|
+
}
|
527
|
+
if (collectionExists && forceReindex) {
|
528
|
+
console.log(`[Context] 🗑️ Dropping existing collection ${collectionName} for force reindex...`);
|
529
|
+
await this.vectorDatabase.dropCollection(collectionName);
|
530
|
+
console.log(`[Context] ✅ Collection ${collectionName} dropped successfully`);
|
531
|
+
}
|
532
|
+
console.log(`[Context] 🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
|
533
|
+
const dimension = await this.embedding.detectDimension();
|
534
|
+
console.log(`[Context] 📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
|
535
|
+
const dirName = path.basename(codebasePath);
|
536
|
+
if (isHybrid === true) {
|
537
|
+
await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
|
538
|
+
}
|
539
|
+
else {
|
540
|
+
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
541
|
+
}
|
542
|
+
console.log(`[Context] ✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
543
|
+
}
|
544
|
+
/**
|
545
|
+
* Recursively get all code files in the codebase
|
546
|
+
*/
|
547
|
+
async getCodeFiles(codebasePath) {
|
548
|
+
const files = [];
|
549
|
+
const traverseDirectory = async (currentPath) => {
|
550
|
+
const entries = await fs.promises.readdir(currentPath, { withFileTypes: true });
|
551
|
+
for (const entry of entries) {
|
552
|
+
const fullPath = path.join(currentPath, entry.name);
|
553
|
+
// Check if path matches ignore patterns
|
554
|
+
if (this.matchesIgnorePattern(fullPath, codebasePath)) {
|
555
|
+
continue;
|
556
|
+
}
|
557
|
+
if (entry.isDirectory()) {
|
558
|
+
await traverseDirectory(fullPath);
|
559
|
+
}
|
560
|
+
else if (entry.isFile()) {
|
561
|
+
const ext = path.extname(entry.name);
|
562
|
+
if (this.supportedExtensions.includes(ext)) {
|
563
|
+
files.push(fullPath);
|
564
|
+
}
|
565
|
+
}
|
566
|
+
}
|
567
|
+
};
|
568
|
+
await traverseDirectory(codebasePath);
|
569
|
+
return files;
|
570
|
+
}
|
571
|
+
/**
|
572
|
+
* Process a list of files with streaming chunk processing
|
573
|
+
* @param filePaths Array of file paths to process
|
574
|
+
* @param codebasePath Base path for the codebase
|
575
|
+
* @param onFileProcessed Callback called when each file is processed
|
576
|
+
* @returns Object with processed file count and total chunk count
|
577
|
+
*/
|
578
|
+
async processFileList(filePaths, codebasePath, onFileProcessed) {
|
579
|
+
const isHybrid = this.getIsHybrid();
|
580
|
+
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
581
|
+
const CHUNK_LIMIT = 450000;
|
582
|
+
console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
583
|
+
let chunkBuffer = [];
|
584
|
+
let processedFiles = 0;
|
585
|
+
let totalChunks = 0;
|
586
|
+
let limitReached = false;
|
587
|
+
for (let i = 0; i < filePaths.length; i++) {
|
588
|
+
const filePath = filePaths[i];
|
589
|
+
try {
|
590
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
591
|
+
const language = this.getLanguageFromExtension(path.extname(filePath));
|
592
|
+
const chunks = await this.codeSplitter.split(content, language, filePath);
|
593
|
+
// Log files with many chunks or large content
|
594
|
+
if (chunks.length > 50) {
|
595
|
+
console.warn(`[Context] ⚠️ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`);
|
596
|
+
}
|
597
|
+
else if (content.length > 100000) {
|
598
|
+
console.log(`📄 Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`);
|
599
|
+
}
|
600
|
+
// Add chunks to buffer
|
601
|
+
for (const chunk of chunks) {
|
602
|
+
chunkBuffer.push({ chunk, codebasePath });
|
603
|
+
totalChunks++;
|
604
|
+
// Process batch when buffer reaches EMBEDDING_BATCH_SIZE
|
605
|
+
if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) {
|
606
|
+
try {
|
607
|
+
await this.processChunkBuffer(chunkBuffer);
|
608
|
+
}
|
609
|
+
catch (error) {
|
610
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
611
|
+
console.error(`[Context] ❌ Failed to process chunk batch for ${searchType}:`, error);
|
612
|
+
if (error instanceof Error) {
|
613
|
+
console.error('[Context] Stack trace:', error.stack);
|
614
|
+
}
|
615
|
+
}
|
616
|
+
finally {
|
617
|
+
chunkBuffer = []; // Always clear buffer, even on failure
|
618
|
+
}
|
619
|
+
}
|
620
|
+
// Check if chunk limit is reached
|
621
|
+
if (totalChunks >= CHUNK_LIMIT) {
|
622
|
+
console.warn(`[Context] ⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`);
|
623
|
+
limitReached = true;
|
624
|
+
break; // Exit the inner loop (over chunks)
|
625
|
+
}
|
626
|
+
}
|
627
|
+
processedFiles++;
|
628
|
+
onFileProcessed?.(filePath, i + 1, filePaths.length);
|
629
|
+
if (limitReached) {
|
630
|
+
break; // Exit the outer loop (over files)
|
631
|
+
}
|
632
|
+
}
|
633
|
+
catch (error) {
|
634
|
+
console.warn(`[Context] ⚠️ Skipping file ${filePath}: ${error}`);
|
635
|
+
}
|
636
|
+
}
|
637
|
+
// Process any remaining chunks in the buffer
|
638
|
+
if (chunkBuffer.length > 0) {
|
639
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
640
|
+
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
|
641
|
+
try {
|
642
|
+
await this.processChunkBuffer(chunkBuffer);
|
643
|
+
}
|
644
|
+
catch (error) {
|
645
|
+
console.error(`[Context] ❌ Failed to process final chunk batch for ${searchType}:`, error);
|
646
|
+
if (error instanceof Error) {
|
647
|
+
console.error('[Context] Stack trace:', error.stack);
|
648
|
+
}
|
649
|
+
}
|
650
|
+
}
|
651
|
+
return {
|
652
|
+
processedFiles,
|
653
|
+
totalChunks,
|
654
|
+
status: limitReached ? 'limit_reached' : 'completed'
|
655
|
+
};
|
656
|
+
}
|
657
|
+
/**
|
658
|
+
* Process accumulated chunk buffer
|
659
|
+
*/
|
660
|
+
async processChunkBuffer(chunkBuffer) {
|
661
|
+
if (chunkBuffer.length === 0)
|
662
|
+
return;
|
663
|
+
// Extract chunks and ensure they all have the same codebasePath
|
664
|
+
const chunks = chunkBuffer.map(item => item.chunk);
|
665
|
+
const codebasePath = chunkBuffer[0].codebasePath;
|
666
|
+
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
667
|
+
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
668
|
+
const isHybrid = this.getIsHybrid();
|
669
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
670
|
+
console.log(`[Context] 🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
|
671
|
+
await this.processChunkBatch(chunks, codebasePath);
|
672
|
+
}
|
673
|
+
/**
|
674
|
+
* Process a batch of chunks
|
675
|
+
*/
|
676
|
+
async processChunkBatch(chunks, codebasePath) {
|
677
|
+
const isHybrid = this.getIsHybrid();
|
678
|
+
// Generate embedding vectors
|
679
|
+
const chunkContents = chunks.map(chunk => chunk.content);
|
680
|
+
const embeddings = await this.embedding.embedBatch(chunkContents);
|
681
|
+
if (isHybrid === true) {
|
682
|
+
// Create hybrid vector documents
|
683
|
+
const documents = chunks.map((chunk, index) => {
|
684
|
+
if (!chunk.metadata.filePath) {
|
685
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
686
|
+
}
|
687
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
688
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
689
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
690
|
+
return {
|
691
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
692
|
+
content: chunk.content, // Full text content for BM25 and storage
|
693
|
+
vector: embeddings[index].vector, // Dense vector
|
694
|
+
relativePath,
|
695
|
+
startLine: chunk.metadata.startLine || 0,
|
696
|
+
endLine: chunk.metadata.endLine || 0,
|
697
|
+
fileExtension,
|
698
|
+
metadata: {
|
699
|
+
...restMetadata,
|
700
|
+
codebasePath,
|
701
|
+
language: chunk.metadata.language || 'unknown',
|
702
|
+
chunkIndex: index
|
703
|
+
}
|
704
|
+
};
|
705
|
+
});
|
706
|
+
// Store to vector database
|
707
|
+
await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
|
708
|
+
}
|
709
|
+
else {
|
710
|
+
// Create regular vector documents
|
711
|
+
const documents = chunks.map((chunk, index) => {
|
712
|
+
if (!chunk.metadata.filePath) {
|
713
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
714
|
+
}
|
715
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
716
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
717
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
718
|
+
return {
|
719
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
720
|
+
vector: embeddings[index].vector,
|
721
|
+
content: chunk.content,
|
722
|
+
relativePath,
|
723
|
+
startLine: chunk.metadata.startLine || 0,
|
724
|
+
endLine: chunk.metadata.endLine || 0,
|
725
|
+
fileExtension,
|
726
|
+
metadata: {
|
727
|
+
...restMetadata,
|
728
|
+
codebasePath,
|
729
|
+
language: chunk.metadata.language || 'unknown',
|
730
|
+
chunkIndex: index
|
731
|
+
}
|
732
|
+
};
|
733
|
+
});
|
734
|
+
// Store to vector database
|
735
|
+
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
736
|
+
}
|
737
|
+
}
|
738
|
+
/**
|
739
|
+
* Get programming language based on file extension
|
740
|
+
*/
|
741
|
+
getLanguageFromExtension(ext) {
|
742
|
+
const languageMap = {
|
743
|
+
'.ts': 'typescript',
|
744
|
+
'.tsx': 'typescript',
|
745
|
+
'.js': 'javascript',
|
746
|
+
'.jsx': 'javascript',
|
747
|
+
'.py': 'python',
|
748
|
+
'.java': 'java',
|
749
|
+
'.cpp': 'cpp',
|
750
|
+
'.c': 'c',
|
751
|
+
'.h': 'c',
|
752
|
+
'.hpp': 'cpp',
|
753
|
+
'.cs': 'csharp',
|
754
|
+
'.go': 'go',
|
755
|
+
'.rs': 'rust',
|
756
|
+
'.php': 'php',
|
757
|
+
'.rb': 'ruby',
|
758
|
+
'.swift': 'swift',
|
759
|
+
'.kt': 'kotlin',
|
760
|
+
'.scala': 'scala',
|
761
|
+
'.m': 'objective-c',
|
762
|
+
'.mm': 'objective-c',
|
763
|
+
'.ipynb': 'jupyter'
|
764
|
+
};
|
765
|
+
return languageMap[ext] || 'text';
|
766
|
+
}
|
767
|
+
/**
|
768
|
+
* Generate unique ID based on chunk content and location
|
769
|
+
* @param relativePath Relative path to the file
|
770
|
+
* @param startLine Start line number
|
771
|
+
* @param endLine End line number
|
772
|
+
* @param content Chunk content
|
773
|
+
* @returns Hash-based unique ID
|
774
|
+
*/
|
775
|
+
generateId(relativePath, startLine, endLine, content) {
|
776
|
+
const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`;
|
777
|
+
const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex');
|
778
|
+
return `chunk_${hash.substring(0, 16)}`;
|
779
|
+
}
|
780
|
+
/**
|
781
|
+
* Read ignore patterns from file (e.g., .gitignore)
|
782
|
+
* @param filePath Path to the ignore file
|
783
|
+
* @returns Array of ignore patterns
|
784
|
+
*/
|
785
|
+
static async getIgnorePatternsFromFile(filePath) {
|
786
|
+
try {
|
787
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
788
|
+
return content
|
789
|
+
.split('\n')
|
790
|
+
.map(line => line.trim())
|
791
|
+
.filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments
|
792
|
+
}
|
793
|
+
catch (error) {
|
794
|
+
console.warn(`[Context] ⚠️ Could not read ignore file ${filePath}: ${error}`);
|
795
|
+
return [];
|
796
|
+
}
|
797
|
+
}
|
798
|
+
/**
|
799
|
+
* Load ignore patterns from various ignore files in the codebase
|
800
|
+
* This method preserves any existing custom patterns that were added before
|
801
|
+
* @param codebasePath Path to the codebase
|
802
|
+
*/
|
803
|
+
async loadIgnorePatterns(codebasePath) {
|
804
|
+
try {
|
805
|
+
let fileBasedPatterns = [];
|
806
|
+
// Load all .xxxignore files in codebase directory
|
807
|
+
const ignoreFiles = await this.findIgnoreFiles(codebasePath);
|
808
|
+
for (const ignoreFile of ignoreFiles) {
|
809
|
+
const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
|
810
|
+
fileBasedPatterns.push(...patterns);
|
811
|
+
}
|
812
|
+
// Load global ~/.context/.contextignore
|
813
|
+
const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
|
814
|
+
fileBasedPatterns.push(...globalIgnorePatterns);
|
815
|
+
// Merge file-based patterns with existing patterns (which may include custom MCP patterns)
|
816
|
+
if (fileBasedPatterns.length > 0) {
|
817
|
+
this.addCustomIgnorePatterns(fileBasedPatterns);
|
818
|
+
console.log(`[Context] 🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
|
819
|
+
}
|
820
|
+
else {
|
821
|
+
console.log('📄 No ignore files found, keeping existing patterns');
|
822
|
+
}
|
823
|
+
}
|
824
|
+
catch (error) {
|
825
|
+
console.warn(`[Context] ⚠️ Failed to load ignore patterns: ${error}`);
|
826
|
+
// Continue with existing patterns on error - don't reset them
|
827
|
+
}
|
828
|
+
}
|
829
|
+
/**
|
830
|
+
* Find all .xxxignore files in the codebase directory
|
831
|
+
* @param codebasePath Path to the codebase
|
832
|
+
* @returns Array of ignore file paths
|
833
|
+
*/
|
834
|
+
async findIgnoreFiles(codebasePath) {
|
835
|
+
try {
|
836
|
+
const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true });
|
837
|
+
const ignoreFiles = [];
|
838
|
+
for (const entry of entries) {
|
839
|
+
if (entry.isFile() &&
|
840
|
+
entry.name.startsWith('.') &&
|
841
|
+
entry.name.endsWith('ignore')) {
|
842
|
+
ignoreFiles.push(path.join(codebasePath, entry.name));
|
843
|
+
}
|
844
|
+
}
|
845
|
+
if (ignoreFiles.length > 0) {
|
846
|
+
console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
|
847
|
+
}
|
848
|
+
return ignoreFiles;
|
849
|
+
}
|
850
|
+
catch (error) {
|
851
|
+
console.warn(`[Context] ⚠️ Failed to scan for ignore files: ${error}`);
|
852
|
+
return [];
|
853
|
+
}
|
854
|
+
}
|
855
|
+
/**
|
856
|
+
* Load global ignore file from ~/.context/.contextignore
|
857
|
+
* @returns Array of ignore patterns
|
858
|
+
*/
|
859
|
+
async loadGlobalIgnoreFile() {
|
860
|
+
try {
|
861
|
+
const homeDir = require('os').homedir();
|
862
|
+
const globalIgnorePath = path.join(homeDir, '.context', '.contextignore');
|
863
|
+
return await this.loadIgnoreFile(globalIgnorePath, 'global .contextignore');
|
864
|
+
}
|
865
|
+
catch (error) {
|
866
|
+
// Global ignore file is optional, don't log warnings
|
867
|
+
return [];
|
868
|
+
}
|
869
|
+
}
|
870
|
+
/**
|
871
|
+
* Load ignore patterns from a specific ignore file
|
872
|
+
* @param filePath Path to the ignore file
|
873
|
+
* @param fileName Display name for logging
|
874
|
+
* @returns Array of ignore patterns
|
875
|
+
*/
|
876
|
+
async loadIgnoreFile(filePath, fileName) {
|
877
|
+
try {
|
878
|
+
await fs.promises.access(filePath);
|
879
|
+
console.log(`📄 Found ${fileName} file at: ${filePath}`);
|
880
|
+
const ignorePatterns = await Context.getIgnorePatternsFromFile(filePath);
|
881
|
+
if (ignorePatterns.length > 0) {
|
882
|
+
console.log(`[Context] 🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`);
|
883
|
+
return ignorePatterns;
|
884
|
+
}
|
885
|
+
else {
|
886
|
+
console.log(`📄 ${fileName} file found but no valid patterns detected`);
|
887
|
+
return [];
|
888
|
+
}
|
889
|
+
}
|
890
|
+
catch (error) {
|
891
|
+
if (fileName.includes('global')) {
|
892
|
+
console.log(`📄 No ${fileName} file found`);
|
893
|
+
}
|
894
|
+
return [];
|
895
|
+
}
|
896
|
+
}
|
897
|
+
/**
|
898
|
+
* Check if a path matches any ignore pattern
|
899
|
+
* @param filePath Path to check
|
900
|
+
* @param basePath Base path for relative pattern matching
|
901
|
+
* @returns True if path should be ignored
|
902
|
+
*/
|
903
|
+
matchesIgnorePattern(filePath, basePath) {
|
904
|
+
if (this.ignorePatterns.length === 0) {
|
905
|
+
return false;
|
906
|
+
}
|
907
|
+
const relativePath = path.relative(basePath, filePath);
|
908
|
+
const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators
|
909
|
+
for (const pattern of this.ignorePatterns) {
|
910
|
+
if (this.isPatternMatch(normalizedPath, pattern)) {
|
911
|
+
return true;
|
912
|
+
}
|
913
|
+
}
|
914
|
+
return false;
|
915
|
+
}
|
916
|
+
/**
|
917
|
+
* Simple glob pattern matching
|
918
|
+
* @param filePath File path to test
|
919
|
+
* @param pattern Glob pattern
|
920
|
+
* @returns True if pattern matches
|
921
|
+
*/
|
922
|
+
isPatternMatch(filePath, pattern) {
|
923
|
+
// Handle directory patterns (ending with /)
|
924
|
+
if (pattern.endsWith('/')) {
|
925
|
+
const dirPattern = pattern.slice(0, -1);
|
926
|
+
const pathParts = filePath.split('/');
|
927
|
+
return pathParts.some(part => this.simpleGlobMatch(part, dirPattern));
|
928
|
+
}
|
929
|
+
// Handle file patterns
|
930
|
+
if (pattern.includes('/')) {
|
931
|
+
// Pattern with path separator - match exact path
|
932
|
+
return this.simpleGlobMatch(filePath, pattern);
|
933
|
+
}
|
934
|
+
else {
|
935
|
+
// Pattern without path separator - match filename in any directory
|
936
|
+
const fileName = path.basename(filePath);
|
937
|
+
return this.simpleGlobMatch(fileName, pattern);
|
938
|
+
}
|
939
|
+
}
|
940
|
+
/**
|
941
|
+
* Simple glob matching supporting * wildcard
|
942
|
+
* @param text Text to test
|
943
|
+
* @param pattern Pattern with * wildcards
|
944
|
+
* @returns True if pattern matches
|
945
|
+
*/
|
946
|
+
simpleGlobMatch(text, pattern) {
|
947
|
+
// Convert glob pattern to regex
|
948
|
+
const regexPattern = pattern
|
949
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except *
|
950
|
+
.replace(/\*/g, '.*'); // Convert * to .*
|
951
|
+
const regex = new RegExp(`^${regexPattern}$`);
|
952
|
+
return regex.test(text);
|
953
|
+
}
|
954
|
+
/**
|
955
|
+
* Get custom extensions from environment variables
|
956
|
+
* Supports CUSTOM_EXTENSIONS as comma-separated list
|
957
|
+
* @returns Array of custom extensions
|
958
|
+
*/
|
959
|
+
getCustomExtensionsFromEnv() {
|
960
|
+
const envExtensions = env_manager_1.envManager.get('CUSTOM_EXTENSIONS');
|
961
|
+
if (!envExtensions) {
|
962
|
+
return [];
|
963
|
+
}
|
964
|
+
try {
|
965
|
+
const extensions = envExtensions
|
966
|
+
.split(',')
|
967
|
+
.map(ext => ext.trim())
|
968
|
+
.filter(ext => ext.length > 0)
|
969
|
+
.map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
|
970
|
+
return extensions;
|
971
|
+
}
|
972
|
+
catch (error) {
|
973
|
+
console.warn(`[Context] ⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`);
|
974
|
+
return [];
|
975
|
+
}
|
976
|
+
}
|
977
|
+
/**
|
978
|
+
* Get custom ignore patterns from environment variables
|
979
|
+
* Supports CUSTOM_IGNORE_PATTERNS as comma-separated list
|
980
|
+
* @returns Array of custom ignore patterns
|
981
|
+
*/
|
982
|
+
getCustomIgnorePatternsFromEnv() {
|
983
|
+
const envIgnorePatterns = env_manager_1.envManager.get('CUSTOM_IGNORE_PATTERNS');
|
984
|
+
if (!envIgnorePatterns) {
|
985
|
+
return [];
|
986
|
+
}
|
987
|
+
try {
|
988
|
+
const patterns = envIgnorePatterns
|
989
|
+
.split(',')
|
990
|
+
.map(pattern => pattern.trim())
|
991
|
+
.filter(pattern => pattern.length > 0);
|
992
|
+
return patterns;
|
993
|
+
}
|
994
|
+
catch (error) {
|
995
|
+
console.warn(`[Context] ⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`);
|
996
|
+
return [];
|
997
|
+
}
|
998
|
+
}
|
999
|
+
/**
|
1000
|
+
* Add custom extensions (from MCP or other sources) without replacing existing ones
|
1001
|
+
* @param customExtensions Array of custom extensions to add
|
1002
|
+
*/
|
1003
|
+
addCustomExtensions(customExtensions) {
|
1004
|
+
if (customExtensions.length === 0)
|
1005
|
+
return;
|
1006
|
+
// Ensure extensions start with dot
|
1007
|
+
const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}`);
|
1008
|
+
// Merge current extensions with new custom extensions, avoiding duplicates
|
1009
|
+
const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions];
|
1010
|
+
const uniqueExtensions = [...new Set(mergedExtensions)];
|
1011
|
+
this.supportedExtensions = uniqueExtensions;
|
1012
|
+
console.log(`[Context] 📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`);
|
1013
|
+
}
|
1014
|
+
/**
|
1015
|
+
* Get current splitter information
|
1016
|
+
*/
|
1017
|
+
getSplitterInfo() {
|
1018
|
+
const splitterName = this.codeSplitter.constructor.name;
|
1019
|
+
if (splitterName === 'AstCodeSplitter') {
|
1020
|
+
const { AstCodeSplitter } = require('./splitter/ast-splitter');
|
1021
|
+
return {
|
1022
|
+
type: 'ast',
|
1023
|
+
hasBuiltinFallback: true,
|
1024
|
+
supportedLanguages: AstCodeSplitter.getSupportedLanguages()
|
1025
|
+
};
|
1026
|
+
}
|
1027
|
+
else {
|
1028
|
+
return {
|
1029
|
+
type: 'langchain',
|
1030
|
+
hasBuiltinFallback: false
|
1031
|
+
};
|
1032
|
+
}
|
1033
|
+
}
|
1034
|
+
/**
|
1035
|
+
* Check if current splitter supports a specific language
|
1036
|
+
* @param language Programming language
|
1037
|
+
*/
|
1038
|
+
isLanguageSupported(language) {
|
1039
|
+
const splitterName = this.codeSplitter.constructor.name;
|
1040
|
+
if (splitterName === 'AstCodeSplitter') {
|
1041
|
+
const { AstCodeSplitter } = require('./splitter/ast-splitter');
|
1042
|
+
return AstCodeSplitter.isLanguageSupported(language);
|
1043
|
+
}
|
1044
|
+
// LangChain splitter supports most languages
|
1045
|
+
return true;
|
1046
|
+
}
|
1047
|
+
/**
|
1048
|
+
* Get which strategy would be used for a specific language
|
1049
|
+
* @param language Programming language
|
1050
|
+
*/
|
1051
|
+
getSplitterStrategyForLanguage(language) {
|
1052
|
+
const splitterName = this.codeSplitter.constructor.name;
|
1053
|
+
if (splitterName === 'AstCodeSplitter') {
|
1054
|
+
const { AstCodeSplitter } = require('./splitter/ast-splitter');
|
1055
|
+
const isSupported = AstCodeSplitter.isLanguageSupported(language);
|
1056
|
+
return {
|
1057
|
+
strategy: isSupported ? 'ast' : 'langchain',
|
1058
|
+
reason: isSupported
|
1059
|
+
? 'Language supported by AST parser'
|
1060
|
+
: 'Language not supported by AST, will fallback to LangChain'
|
1061
|
+
};
|
1062
|
+
}
|
1063
|
+
else {
|
1064
|
+
return {
|
1065
|
+
strategy: 'langchain',
|
1066
|
+
reason: 'Using LangChain splitter directly'
|
1067
|
+
};
|
1068
|
+
}
|
1069
|
+
}
|
1070
|
+
}
|
1071
|
+
exports.Context = Context;
|
1072
|
+
//# sourceMappingURL=context.js.map
|