@zilliz/claude-context-core 0.0.13 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context.d.ts +50 -8
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +267 -79
- package/dist/context.js.map +1 -1
- package/dist/embedding/base-embedding.d.ts +6 -0
- package/dist/embedding/base-embedding.d.ts.map +1 -1
- package/dist/embedding/base-embedding.js.map +1 -1
- package/dist/embedding/gemini-embedding.d.ts +1 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -1
- package/dist/embedding/gemini-embedding.js +4 -0
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/ollama-embedding.d.ts +1 -10
- package/dist/embedding/ollama-embedding.d.ts.map +1 -1
- package/dist/embedding/ollama-embedding.js +42 -52
- package/dist/embedding/ollama-embedding.js.map +1 -1
- package/dist/embedding/openai-embedding.d.ts +2 -2
- package/dist/embedding/openai-embedding.d.ts.map +1 -1
- package/dist/embedding/openai-embedding.js +78 -32
- package/dist/embedding/openai-embedding.js.map +1 -1
- package/dist/embedding/voyageai-embedding.d.ts +1 -0
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
- package/dist/embedding/voyageai-embedding.js +4 -0
- package/dist/embedding/voyageai-embedding.js.map +1 -1
- package/dist/vectordb/index.d.ts +1 -1
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts +5 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.js +246 -0
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
- package/dist/vectordb/milvus-vectordb.d.ts +4 -1
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-vectordb.js +201 -0
- package/dist/vectordb/milvus-vectordb.js.map +1 -1
- package/dist/vectordb/types.d.ts +41 -3
- package/dist/vectordb/types.d.ts.map +1 -1
- package/dist/vectordb/types.js.map +1 -1
- package/package.json +1 -1
package/dist/context.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { Splitter } from './splitter';
|
|
|
2
2
|
import { Embedding } from './embedding';
|
|
3
3
|
import { VectorDatabase } from './vectordb';
|
|
4
4
|
import { SemanticSearchResult } from './types';
|
|
5
|
+
import { FileSynchronizer } from './sync/synchronizer';
|
|
5
6
|
export interface ContextConfig {
|
|
6
7
|
embedding?: Embedding;
|
|
7
8
|
vectorDatabase?: VectorDatabase;
|
|
@@ -20,13 +21,54 @@ export declare class Context {
|
|
|
20
21
|
private synchronizers;
|
|
21
22
|
constructor(config?: ContextConfig);
|
|
22
23
|
/**
|
|
23
|
-
*
|
|
24
|
+
* Get embedding instance
|
|
24
25
|
*/
|
|
25
|
-
|
|
26
|
+
getEmbedding(): Embedding;
|
|
26
27
|
/**
|
|
27
|
-
*
|
|
28
|
-
|
|
28
|
+
* Get vector database instance
|
|
29
|
+
*/
|
|
30
|
+
getVectorDatabase(): VectorDatabase;
|
|
31
|
+
/**
|
|
32
|
+
* Get code splitter instance
|
|
33
|
+
*/
|
|
34
|
+
getCodeSplitter(): Splitter;
|
|
35
|
+
/**
|
|
36
|
+
* Get supported extensions
|
|
37
|
+
*/
|
|
38
|
+
getSupportedExtensions(): string[];
|
|
39
|
+
/**
|
|
40
|
+
* Get ignore patterns
|
|
41
|
+
*/
|
|
42
|
+
getIgnorePatterns(): string[];
|
|
43
|
+
/**
|
|
44
|
+
* Get synchronizers map
|
|
45
|
+
*/
|
|
46
|
+
getSynchronizers(): Map<string, FileSynchronizer>;
|
|
47
|
+
/**
|
|
48
|
+
* Set synchronizer for a collection
|
|
49
|
+
*/
|
|
50
|
+
setSynchronizer(collectionName: string, synchronizer: FileSynchronizer): void;
|
|
51
|
+
/**
|
|
52
|
+
* Public wrapper for loadIgnorePatterns private method
|
|
53
|
+
*/
|
|
54
|
+
getLoadedIgnorePatterns(codebasePath: string): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Public wrapper for prepareCollection private method
|
|
57
|
+
*/
|
|
58
|
+
getPreparedCollection(codebasePath: string): Promise<void>;
|
|
59
|
+
/**
|
|
60
|
+
* Get isHybrid setting from environment variable with default true
|
|
61
|
+
*/
|
|
62
|
+
private getIsHybrid;
|
|
63
|
+
/**
|
|
64
|
+
* Generate collection name based on codebase path and hybrid mode
|
|
65
|
+
*/
|
|
66
|
+
getCollectionName(codebasePath: string): string;
|
|
67
|
+
/**
|
|
68
|
+
* Index a codebase for semantic search
|
|
69
|
+
* @param codebasePath Codebase root path
|
|
29
70
|
* @param progressCallback Optional progress callback function
|
|
71
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
|
30
72
|
* @returns Indexing statistics
|
|
31
73
|
*/
|
|
32
74
|
indexCodebase(codebasePath: string, progressCallback?: (progress: {
|
|
@@ -34,7 +76,7 @@ export declare class Context {
|
|
|
34
76
|
current: number;
|
|
35
77
|
total: number;
|
|
36
78
|
percentage: number;
|
|
37
|
-
}) => void): Promise<{
|
|
79
|
+
}) => void, forceReindex?: boolean): Promise<{
|
|
38
80
|
indexedFiles: number;
|
|
39
81
|
totalChunks: number;
|
|
40
82
|
status: 'completed' | 'limit_reached';
|
|
@@ -51,7 +93,7 @@ export declare class Context {
|
|
|
51
93
|
}>;
|
|
52
94
|
private deleteFileChunks;
|
|
53
95
|
/**
|
|
54
|
-
* Semantic search
|
|
96
|
+
* Semantic search with unified implementation
|
|
55
97
|
* @param codebasePath Codebase path to search in
|
|
56
98
|
* @param query Search query
|
|
57
99
|
* @param topK Number of results to return
|
|
@@ -152,9 +194,9 @@ export declare class Context {
|
|
|
152
194
|
* This method preserves any existing custom patterns that were added before
|
|
153
195
|
* @param codebasePath Path to the codebase
|
|
154
196
|
*/
|
|
155
|
-
private
|
|
197
|
+
private loadIgnorePatterns;
|
|
156
198
|
/**
|
|
157
|
-
* Find all .xxxignore files in the codebase directory
|
|
199
|
+
* Find all .xxxignore files in the codebase directory
|
|
158
200
|
* @param codebasePath Path to the codebase
|
|
159
201
|
* @returns Array of ignore file paths
|
|
160
202
|
*/
|
package/dist/context.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,QAAQ,EAGX,MAAM,YAAY,CAAC;AACpB,OAAO,EACH,SAAS,EAGZ,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,cAAc,
|
|
1
|
+
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,QAAQ,EAGX,MAAM,YAAY,CAAC;AACpB,OAAO,EACH,SAAS,EAGZ,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,cAAc,EAMjB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAK/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAiEvD,MAAM,WAAW,aAAa;IAC1B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,YAAY,CAAC,EAAE,QAAQ,CAAC;IACxB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAC;CACnC;AAED,qBAAa,OAAO;IAChB,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,YAAY,CAAW;IAC/B,OAAO,CAAC,mBAAmB,CAAW;IACtC,OAAO,CAAC,cAAc,CAAW;IACjC,OAAO,CAAC,aAAa,CAAuC;gBAEhD,MAAM,GAAE,aAAkB;IAkDtC;;OAEG;IACH,YAAY,IAAI,SAAS;IAIzB;;OAEG;IACH,iBAAiB,IAAI,cAAc;IAInC;;OAEG;IACH,eAAe,IAAI,QAAQ;IAI3B;;OAEG;IACH,sBAAsB,IAAI,MAAM,EAAE;IAIlC;;OAEG;IACH,iBAAiB,IAAI,MAAM,EAAE;IAI7B;;OAEG;IACH,gBAAgB,IAAI,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC;IAIjD;;OAEG;IACH,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,YAAY,EAAE,gBAAgB,GAAG,IAAI;IAI7E;;OAEG;IACG,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIlE;;OAEG;IACG,qBAAqB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIhE;;OAEG;IACH,OAAO,CAAC,WAAW;IAQnB;;OAEG;IACI,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM;IAQtD;;;;;;OAMG;IACG,aAAa,CACf,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,EAC5G,YAAY,GAAE,OAAe,GAC9B,OAAO,CAAC;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,WAAW,GAAG,eAAe,CAAA;KAAE,CAAC;IA8D1F,eAAe,CACjB,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC7G,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;YAkElD,gBAAgB;IAkB9B;;;;;;OAMG;IACG,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC;IA4GrI;;;;OAIG;IACG,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKtD;;;;OAIG;IACG,UAAU,CACZ,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC7G,OAAO,CAAC,IAAI,CAAC;IAqBhB;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAUpD;;;OAGG;IACH,uBAAuB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAYvD;;OAEG;IACH,6BAA6B,IAAI,IAAI;IAKrC;;;OAGG;IACH,eAAe,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI;IAK3C;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,cAAc,GAAG,IAAI;IAK1D;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKxC;;OAEG;YACW,iBAAiB;IAkC/B;;OAEG;YACW,YAAY;IA6B1B;;;;;;GAMD;YACe,eAAe;IA2F7B;;GAED;YACe,kBAAkB;IAgBhC;;OAEG;YACW,iBAAiB;IAsE/B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IA2BhC;;;;;;;OAOG;IACH,OAAO,CAAC,UAAU;IAMlB;;;;OAIG;WACU,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAa3E;;;;OAIG;YACW,kBAAkB;IA4BhC;;;;OAIG;YACW,eAAe;IAwB7B;;;OAGG;YACW,oBAAoB;IAWlC;;;;;OAKG;YACW,cAAc;IAsB5B;;;;;OAKG;IACH,OAAO,CAAC,oBAAoB;IAiB5B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAmBtB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAUvB;;;;OAIG;IACH,OAAO,CAAC,0BAA0B;IAoBlC;;;;OAIG;IACH,OAAO,CAAC,8BAA8B;IAmBtC;;;OAGG;IACH,mBAAmB,CAAC,gBAAgB,EAAE,MAAM,EAAE,GAAG,IAAI;IAerD;;OAEG;IACH,eAAe,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,kBAAkB,EAAE,OAAO,CAAC;QAAC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE;IAkB/F;;;OAGG;IACH,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAY9C;;;OAGG;IACH,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG;QAAE,QAAQ,EAAE,KAAK,GAAG,WAAW,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAoBtG"}
|
package/dist/context.js
CHANGED
|
@@ -141,27 +141,96 @@ class Context {
|
|
|
141
141
|
}
|
|
142
142
|
}
|
|
143
143
|
/**
|
|
144
|
-
*
|
|
144
|
+
* Get embedding instance
|
|
145
|
+
*/
|
|
146
|
+
getEmbedding() {
|
|
147
|
+
return this.embedding;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Get vector database instance
|
|
151
|
+
*/
|
|
152
|
+
getVectorDatabase() {
|
|
153
|
+
return this.vectorDatabase;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Get code splitter instance
|
|
157
|
+
*/
|
|
158
|
+
getCodeSplitter() {
|
|
159
|
+
return this.codeSplitter;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get supported extensions
|
|
163
|
+
*/
|
|
164
|
+
getSupportedExtensions() {
|
|
165
|
+
return [...this.supportedExtensions];
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Get ignore patterns
|
|
169
|
+
*/
|
|
170
|
+
getIgnorePatterns() {
|
|
171
|
+
return [...this.ignorePatterns];
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Get synchronizers map
|
|
175
|
+
*/
|
|
176
|
+
getSynchronizers() {
|
|
177
|
+
return new Map(this.synchronizers);
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Set synchronizer for a collection
|
|
181
|
+
*/
|
|
182
|
+
setSynchronizer(collectionName, synchronizer) {
|
|
183
|
+
this.synchronizers.set(collectionName, synchronizer);
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Public wrapper for loadIgnorePatterns private method
|
|
187
|
+
*/
|
|
188
|
+
async getLoadedIgnorePatterns(codebasePath) {
|
|
189
|
+
return this.loadIgnorePatterns(codebasePath);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Public wrapper for prepareCollection private method
|
|
193
|
+
*/
|
|
194
|
+
async getPreparedCollection(codebasePath) {
|
|
195
|
+
return this.prepareCollection(codebasePath);
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Get isHybrid setting from environment variable with default true
|
|
199
|
+
*/
|
|
200
|
+
getIsHybrid() {
|
|
201
|
+
const isHybridEnv = env_manager_1.envManager.get('HYBRID_MODE');
|
|
202
|
+
if (isHybridEnv === undefined || isHybridEnv === null) {
|
|
203
|
+
return true; // Default to true
|
|
204
|
+
}
|
|
205
|
+
return isHybridEnv.toLowerCase() === 'true';
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Generate collection name based on codebase path and hybrid mode
|
|
145
209
|
*/
|
|
146
210
|
getCollectionName(codebasePath) {
|
|
211
|
+
const isHybrid = this.getIsHybrid();
|
|
147
212
|
const normalizedPath = path.resolve(codebasePath);
|
|
148
213
|
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
|
149
|
-
|
|
214
|
+
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
|
|
215
|
+
return `${prefix}_${hash.substring(0, 8)}`;
|
|
150
216
|
}
|
|
151
217
|
/**
|
|
152
|
-
* Index
|
|
153
|
-
* @param codebasePath Codebase path
|
|
218
|
+
* Index a codebase for semantic search
|
|
219
|
+
* @param codebasePath Codebase root path
|
|
154
220
|
* @param progressCallback Optional progress callback function
|
|
221
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
|
155
222
|
* @returns Indexing statistics
|
|
156
223
|
*/
|
|
157
|
-
async indexCodebase(codebasePath, progressCallback) {
|
|
158
|
-
|
|
224
|
+
async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
|
|
225
|
+
const isHybrid = this.getIsHybrid();
|
|
226
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
227
|
+
console.log(`🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
|
|
159
228
|
// 1. Load ignore patterns from various ignore files
|
|
160
|
-
await this.
|
|
229
|
+
await this.loadIgnorePatterns(codebasePath);
|
|
161
230
|
// 2. Check and prepare vector collection
|
|
162
231
|
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
|
|
163
|
-
console.log(`Debug2: Preparing vector collection for codebase`);
|
|
164
|
-
await this.prepareCollection(codebasePath);
|
|
232
|
+
console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
|
233
|
+
await this.prepareCollection(codebasePath, forceReindex);
|
|
165
234
|
// 3. Recursively traverse codebase to get all supported files
|
|
166
235
|
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
|
|
167
236
|
const codeFiles = await this.getCodeFiles(codebasePath);
|
|
@@ -204,7 +273,7 @@ class Context {
|
|
|
204
273
|
const synchronizer = this.synchronizers.get(collectionName);
|
|
205
274
|
if (!synchronizer) {
|
|
206
275
|
// Load project-specific ignore patterns before creating FileSynchronizer
|
|
207
|
-
await this.
|
|
276
|
+
await this.loadIgnorePatterns(codebasePath);
|
|
208
277
|
// To be safe, let's initialize if it's not there.
|
|
209
278
|
const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
|
|
210
279
|
await newSynchronizer.initialize();
|
|
@@ -260,29 +329,98 @@ class Context {
|
|
|
260
329
|
}
|
|
261
330
|
}
|
|
262
331
|
/**
|
|
263
|
-
* Semantic search
|
|
332
|
+
* Semantic search with unified implementation
|
|
264
333
|
* @param codebasePath Codebase path to search in
|
|
265
334
|
* @param query Search query
|
|
266
335
|
* @param topK Number of results to return
|
|
267
336
|
* @param threshold Similarity threshold
|
|
268
337
|
*/
|
|
269
338
|
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5) {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
//
|
|
276
|
-
const
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
339
|
+
const isHybrid = this.getIsHybrid();
|
|
340
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
341
|
+
console.log(`🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
|
342
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
343
|
+
console.log(`🔍 Using collection: ${collectionName}`);
|
|
344
|
+
// Check if collection exists and has data
|
|
345
|
+
const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
|
|
346
|
+
if (!hasCollection) {
|
|
347
|
+
console.log(`⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
|
|
348
|
+
return [];
|
|
349
|
+
}
|
|
350
|
+
if (isHybrid === true) {
|
|
351
|
+
try {
|
|
352
|
+
// Check collection stats to see if it has data
|
|
353
|
+
const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
|
|
354
|
+
console.log(`🔍 Collection '${collectionName}' exists and appears to have data`);
|
|
355
|
+
}
|
|
356
|
+
catch (error) {
|
|
357
|
+
console.log(`⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
|
358
|
+
}
|
|
359
|
+
// 1. Generate query vector
|
|
360
|
+
console.log(`🔍 Generating embeddings for query: "${query}"`);
|
|
361
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
362
|
+
console.log(`✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
|
363
|
+
console.log(`🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
|
364
|
+
// 2. Prepare hybrid search requests
|
|
365
|
+
const searchRequests = [
|
|
366
|
+
{
|
|
367
|
+
data: queryEmbedding.vector,
|
|
368
|
+
anns_field: "vector",
|
|
369
|
+
param: { "nprobe": 10 },
|
|
370
|
+
limit: topK
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
data: query,
|
|
374
|
+
anns_field: "sparse_vector",
|
|
375
|
+
param: { "drop_ratio_search": 0.2 },
|
|
376
|
+
limit: topK
|
|
377
|
+
}
|
|
378
|
+
];
|
|
379
|
+
console.log(`🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
|
380
|
+
console.log(`🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
|
381
|
+
// 3. Execute hybrid search
|
|
382
|
+
console.log(`🔍 Executing hybrid search with RRF reranking...`);
|
|
383
|
+
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
|
384
|
+
rerank: {
|
|
385
|
+
strategy: 'rrf',
|
|
386
|
+
params: { k: 100 }
|
|
387
|
+
},
|
|
388
|
+
limit: topK
|
|
389
|
+
});
|
|
390
|
+
console.log(`🔍 Raw search results count: ${searchResults.length}`);
|
|
391
|
+
// 4. Convert to semantic search result format
|
|
392
|
+
const results = searchResults.map(result => ({
|
|
393
|
+
content: result.document.content,
|
|
394
|
+
relativePath: result.document.relativePath,
|
|
395
|
+
startLine: result.document.startLine,
|
|
396
|
+
endLine: result.document.endLine,
|
|
397
|
+
language: result.document.metadata.language || 'unknown',
|
|
398
|
+
score: result.score
|
|
399
|
+
}));
|
|
400
|
+
console.log(`✅ Found ${results.length} relevant hybrid results`);
|
|
401
|
+
if (results.length > 0) {
|
|
402
|
+
console.log(`🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
|
|
403
|
+
}
|
|
404
|
+
return results;
|
|
405
|
+
}
|
|
406
|
+
else {
|
|
407
|
+
// Regular semantic search
|
|
408
|
+
// 1. Generate query vector
|
|
409
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
410
|
+
// 2. Search in vector database
|
|
411
|
+
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold });
|
|
412
|
+
// 3. Convert to semantic search result format
|
|
413
|
+
const results = searchResults.map(result => ({
|
|
414
|
+
content: result.document.content,
|
|
415
|
+
relativePath: result.document.relativePath,
|
|
416
|
+
startLine: result.document.startLine,
|
|
417
|
+
endLine: result.document.endLine,
|
|
418
|
+
language: result.document.metadata.language || 'unknown',
|
|
419
|
+
score: result.score
|
|
420
|
+
}));
|
|
421
|
+
console.log(`✅ Found ${results.length} relevant results`);
|
|
422
|
+
return results;
|
|
423
|
+
}
|
|
286
424
|
}
|
|
287
425
|
/**
|
|
288
426
|
* Check if index exists for codebase
|
|
@@ -374,17 +512,32 @@ class Context {
|
|
|
374
512
|
/**
|
|
375
513
|
* Prepare vector collection
|
|
376
514
|
*/
|
|
377
|
-
async prepareCollection(codebasePath) {
|
|
378
|
-
|
|
379
|
-
|
|
515
|
+
async prepareCollection(codebasePath, forceReindex = false) {
|
|
516
|
+
const isHybrid = this.getIsHybrid();
|
|
517
|
+
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
|
|
518
|
+
console.log(`🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
|
380
519
|
const collectionName = this.getCollectionName(codebasePath);
|
|
381
|
-
//
|
|
382
|
-
|
|
383
|
-
|
|
520
|
+
// Check if collection already exists
|
|
521
|
+
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
|
522
|
+
if (collectionExists && !forceReindex) {
|
|
523
|
+
console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
if (collectionExists && forceReindex) {
|
|
527
|
+
console.log(`🗑️ Dropping existing collection ${collectionName} for force reindex...`);
|
|
528
|
+
await this.vectorDatabase.dropCollection(collectionName);
|
|
529
|
+
console.log(`✅ Collection ${collectionName} dropped successfully`);
|
|
384
530
|
}
|
|
385
|
-
|
|
531
|
+
console.log(`🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
|
|
532
|
+
const dimension = await this.embedding.detectDimension();
|
|
533
|
+
console.log(`📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
|
|
386
534
|
const dirName = path.basename(codebasePath);
|
|
387
|
-
|
|
535
|
+
if (isHybrid === true) {
|
|
536
|
+
await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
|
540
|
+
}
|
|
388
541
|
console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
|
389
542
|
}
|
|
390
543
|
/**
|
|
@@ -422,6 +575,7 @@ class Context {
|
|
|
422
575
|
* @returns Object with processed file count and total chunk count
|
|
423
576
|
*/
|
|
424
577
|
async processFileList(filePaths, codebasePath, onFileProcessed) {
|
|
578
|
+
const isHybrid = this.getIsHybrid();
|
|
425
579
|
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
|
426
580
|
const CHUNK_LIMIT = 450000;
|
|
427
581
|
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
|
@@ -452,8 +606,11 @@ class Context {
|
|
|
452
606
|
await this.processChunkBuffer(chunkBuffer);
|
|
453
607
|
}
|
|
454
608
|
catch (error) {
|
|
455
|
-
|
|
456
|
-
console.error(`❌ Failed to process chunk batch
|
|
609
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
610
|
+
console.error(`❌ Failed to process chunk batch for ${searchType}:`, error);
|
|
611
|
+
if (error instanceof Error) {
|
|
612
|
+
console.error('Stack trace:', error.stack);
|
|
613
|
+
}
|
|
457
614
|
}
|
|
458
615
|
finally {
|
|
459
616
|
chunkBuffer = []; // Always clear buffer, even on failure
|
|
@@ -478,12 +635,16 @@ class Context {
|
|
|
478
635
|
}
|
|
479
636
|
// Process any remaining chunks in the buffer
|
|
480
637
|
if (chunkBuffer.length > 0) {
|
|
481
|
-
|
|
638
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
639
|
+
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
|
|
482
640
|
try {
|
|
483
641
|
await this.processChunkBuffer(chunkBuffer);
|
|
484
642
|
}
|
|
485
643
|
catch (error) {
|
|
486
|
-
console.error(`❌ Failed to process final chunk batch
|
|
644
|
+
console.error(`❌ Failed to process final chunk batch for ${searchType}:`, error);
|
|
645
|
+
if (error instanceof Error) {
|
|
646
|
+
console.error('Stack trace:', error.stack);
|
|
647
|
+
}
|
|
487
648
|
}
|
|
488
649
|
}
|
|
489
650
|
return {
|
|
@@ -503,43 +664,75 @@ class Context {
|
|
|
503
664
|
const codebasePath = chunkBuffer[0].codebasePath;
|
|
504
665
|
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
|
505
666
|
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
|
506
|
-
|
|
667
|
+
const isHybrid = this.getIsHybrid();
|
|
668
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
669
|
+
console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
|
|
507
670
|
await this.processChunkBatch(chunks, codebasePath);
|
|
508
671
|
}
|
|
509
672
|
/**
|
|
510
673
|
* Process a batch of chunks
|
|
511
674
|
*/
|
|
512
675
|
async processChunkBatch(chunks, codebasePath) {
|
|
676
|
+
const isHybrid = this.getIsHybrid();
|
|
513
677
|
// Generate embedding vectors
|
|
514
678
|
const chunkContents = chunks.map(chunk => chunk.content);
|
|
515
679
|
const embeddings = await this.embedding.embedBatch(chunkContents);
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
522
|
-
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
523
|
-
// Extract metadata that should be stored separately
|
|
524
|
-
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
525
|
-
return {
|
|
526
|
-
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
527
|
-
vector: embeddings[index].vector,
|
|
528
|
-
content: chunk.content,
|
|
529
|
-
relativePath,
|
|
530
|
-
startLine: chunk.metadata.startLine || 0,
|
|
531
|
-
endLine: chunk.metadata.endLine || 0,
|
|
532
|
-
fileExtension,
|
|
533
|
-
metadata: {
|
|
534
|
-
...restMetadata,
|
|
535
|
-
codebasePath,
|
|
536
|
-
language: chunk.metadata.language || 'unknown',
|
|
537
|
-
chunkIndex: index
|
|
680
|
+
if (isHybrid === true) {
|
|
681
|
+
// Create hybrid vector documents
|
|
682
|
+
const documents = chunks.map((chunk, index) => {
|
|
683
|
+
if (!chunk.metadata.filePath) {
|
|
684
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
538
685
|
}
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
686
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
687
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
688
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
689
|
+
return {
|
|
690
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
691
|
+
content: chunk.content, // Full text content for BM25 and storage
|
|
692
|
+
vector: embeddings[index].vector, // Dense vector
|
|
693
|
+
relativePath,
|
|
694
|
+
startLine: chunk.metadata.startLine || 0,
|
|
695
|
+
endLine: chunk.metadata.endLine || 0,
|
|
696
|
+
fileExtension,
|
|
697
|
+
metadata: {
|
|
698
|
+
...restMetadata,
|
|
699
|
+
codebasePath,
|
|
700
|
+
language: chunk.metadata.language || 'unknown',
|
|
701
|
+
chunkIndex: index
|
|
702
|
+
}
|
|
703
|
+
};
|
|
704
|
+
});
|
|
705
|
+
// Store to vector database
|
|
706
|
+
await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
|
|
707
|
+
}
|
|
708
|
+
else {
|
|
709
|
+
// Create regular vector documents
|
|
710
|
+
const documents = chunks.map((chunk, index) => {
|
|
711
|
+
if (!chunk.metadata.filePath) {
|
|
712
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
713
|
+
}
|
|
714
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
715
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
716
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
717
|
+
return {
|
|
718
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
719
|
+
vector: embeddings[index].vector,
|
|
720
|
+
content: chunk.content,
|
|
721
|
+
relativePath,
|
|
722
|
+
startLine: chunk.metadata.startLine || 0,
|
|
723
|
+
endLine: chunk.metadata.endLine || 0,
|
|
724
|
+
fileExtension,
|
|
725
|
+
metadata: {
|
|
726
|
+
...restMetadata,
|
|
727
|
+
codebasePath,
|
|
728
|
+
language: chunk.metadata.language || 'unknown',
|
|
729
|
+
chunkIndex: index
|
|
730
|
+
}
|
|
731
|
+
};
|
|
732
|
+
});
|
|
733
|
+
// Store to vector database
|
|
734
|
+
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
|
735
|
+
}
|
|
543
736
|
}
|
|
544
737
|
/**
|
|
545
738
|
* Get programming language based on file extension
|
|
@@ -606,23 +799,19 @@ class Context {
|
|
|
606
799
|
* This method preserves any existing custom patterns that were added before
|
|
607
800
|
* @param codebasePath Path to the codebase
|
|
608
801
|
*/
|
|
609
|
-
async
|
|
802
|
+
async loadIgnorePatterns(codebasePath) {
|
|
610
803
|
try {
|
|
611
804
|
let fileBasedPatterns = [];
|
|
612
|
-
//
|
|
613
|
-
const gitignorePath = path.join(codebasePath, '.gitignore');
|
|
614
|
-
const gitignorePatterns = await this.loadIgnoreFile(gitignorePath, '.gitignore');
|
|
615
|
-
fileBasedPatterns.push(...gitignorePatterns);
|
|
616
|
-
// 2. Load all .xxxignore files in codebase directory
|
|
805
|
+
// Load all .xxxignore files in codebase directory
|
|
617
806
|
const ignoreFiles = await this.findIgnoreFiles(codebasePath);
|
|
618
807
|
for (const ignoreFile of ignoreFiles) {
|
|
619
808
|
const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
|
|
620
809
|
fileBasedPatterns.push(...patterns);
|
|
621
810
|
}
|
|
622
|
-
//
|
|
811
|
+
// Load global ~/.context/.contextignore
|
|
623
812
|
const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
|
|
624
813
|
fileBasedPatterns.push(...globalIgnorePatterns);
|
|
625
|
-
//
|
|
814
|
+
// Merge file-based patterns with existing patterns (which may include custom MCP patterns)
|
|
626
815
|
if (fileBasedPatterns.length > 0) {
|
|
627
816
|
this.addCustomIgnorePatterns(fileBasedPatterns);
|
|
628
817
|
console.log(`🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
|
|
@@ -637,7 +826,7 @@ class Context {
|
|
|
637
826
|
}
|
|
638
827
|
}
|
|
639
828
|
/**
|
|
640
|
-
* Find all .xxxignore files in the codebase directory
|
|
829
|
+
* Find all .xxxignore files in the codebase directory
|
|
641
830
|
* @param codebasePath Path to the codebase
|
|
642
831
|
* @returns Array of ignore file paths
|
|
643
832
|
*/
|
|
@@ -648,13 +837,12 @@ class Context {
|
|
|
648
837
|
for (const entry of entries) {
|
|
649
838
|
if (entry.isFile() &&
|
|
650
839
|
entry.name.startsWith('.') &&
|
|
651
|
-
entry.name.endsWith('ignore')
|
|
652
|
-
entry.name !== '.gitignore') { // Exclude .gitignore as it's handled separately
|
|
840
|
+
entry.name.endsWith('ignore')) {
|
|
653
841
|
ignoreFiles.push(path.join(codebasePath, entry.name));
|
|
654
842
|
}
|
|
655
843
|
}
|
|
656
844
|
if (ignoreFiles.length > 0) {
|
|
657
|
-
console.log(`📄 Found
|
|
845
|
+
console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
|
|
658
846
|
}
|
|
659
847
|
return ignoreFiles;
|
|
660
848
|
}
|
|
@@ -699,7 +887,7 @@ class Context {
|
|
|
699
887
|
}
|
|
700
888
|
}
|
|
701
889
|
catch (error) {
|
|
702
|
-
if (fileName
|
|
890
|
+
if (fileName.includes('global')) {
|
|
703
891
|
console.log(`📄 No ${fileName} file found`);
|
|
704
892
|
}
|
|
705
893
|
return [];
|