@zilliz/claude-context-core 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context.d.ts +50 -8
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +271 -79
- package/dist/context.js.map +1 -1
- package/dist/embedding/base-embedding.d.ts +6 -0
- package/dist/embedding/base-embedding.d.ts.map +1 -1
- package/dist/embedding/base-embedding.js.map +1 -1
- package/dist/embedding/gemini-embedding.d.ts +1 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -1
- package/dist/embedding/gemini-embedding.js +4 -0
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/ollama-embedding.d.ts +1 -10
- package/dist/embedding/ollama-embedding.d.ts.map +1 -1
- package/dist/embedding/ollama-embedding.js +42 -52
- package/dist/embedding/ollama-embedding.js.map +1 -1
- package/dist/embedding/openai-embedding.d.ts +2 -2
- package/dist/embedding/openai-embedding.d.ts.map +1 -1
- package/dist/embedding/openai-embedding.js +78 -32
- package/dist/embedding/openai-embedding.js.map +1 -1
- package/dist/embedding/voyageai-embedding.d.ts +1 -0
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
- package/dist/embedding/voyageai-embedding.js +4 -0
- package/dist/embedding/voyageai-embedding.js.map +1 -1
- package/dist/vectordb/index.d.ts +1 -1
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts +5 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.js +246 -0
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
- package/dist/vectordb/milvus-vectordb.d.ts +4 -1
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-vectordb.js +201 -0
- package/dist/vectordb/milvus-vectordb.js.map +1 -1
- package/dist/vectordb/types.d.ts +41 -3
- package/dist/vectordb/types.d.ts.map +1 -1
- package/dist/vectordb/types.js.map +1 -1
- package/package.json +1 -1
package/dist/context.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { Splitter } from './splitter';
|
|
|
2
2
|
import { Embedding } from './embedding';
|
|
3
3
|
import { VectorDatabase } from './vectordb';
|
|
4
4
|
import { SemanticSearchResult } from './types';
|
|
5
|
+
import { FileSynchronizer } from './sync/synchronizer';
|
|
5
6
|
export interface ContextConfig {
|
|
6
7
|
embedding?: Embedding;
|
|
7
8
|
vectorDatabase?: VectorDatabase;
|
|
@@ -20,13 +21,54 @@ export declare class Context {
|
|
|
20
21
|
private synchronizers;
|
|
21
22
|
constructor(config?: ContextConfig);
|
|
22
23
|
/**
|
|
23
|
-
*
|
|
24
|
+
* Get embedding instance
|
|
24
25
|
*/
|
|
25
|
-
|
|
26
|
+
getEmbedding(): Embedding;
|
|
26
27
|
/**
|
|
27
|
-
*
|
|
28
|
-
|
|
28
|
+
* Get vector database instance
|
|
29
|
+
*/
|
|
30
|
+
getVectorDatabase(): VectorDatabase;
|
|
31
|
+
/**
|
|
32
|
+
* Get code splitter instance
|
|
33
|
+
*/
|
|
34
|
+
getCodeSplitter(): Splitter;
|
|
35
|
+
/**
|
|
36
|
+
* Get supported extensions
|
|
37
|
+
*/
|
|
38
|
+
getSupportedExtensions(): string[];
|
|
39
|
+
/**
|
|
40
|
+
* Get ignore patterns
|
|
41
|
+
*/
|
|
42
|
+
getIgnorePatterns(): string[];
|
|
43
|
+
/**
|
|
44
|
+
* Get synchronizers map
|
|
45
|
+
*/
|
|
46
|
+
getSynchronizers(): Map<string, FileSynchronizer>;
|
|
47
|
+
/**
|
|
48
|
+
* Set synchronizer for a collection
|
|
49
|
+
*/
|
|
50
|
+
setSynchronizer(collectionName: string, synchronizer: FileSynchronizer): void;
|
|
51
|
+
/**
|
|
52
|
+
* Public wrapper for loadIgnorePatterns private method
|
|
53
|
+
*/
|
|
54
|
+
getLoadedIgnorePatterns(codebasePath: string): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Public wrapper for prepareCollection private method
|
|
57
|
+
*/
|
|
58
|
+
getPreparedCollection(codebasePath: string): Promise<void>;
|
|
59
|
+
/**
|
|
60
|
+
* Get isHybrid setting from environment variable with default true
|
|
61
|
+
*/
|
|
62
|
+
private getIsHybrid;
|
|
63
|
+
/**
|
|
64
|
+
* Generate collection name based on codebase path and hybrid mode
|
|
65
|
+
*/
|
|
66
|
+
getCollectionName(codebasePath: string): string;
|
|
67
|
+
/**
|
|
68
|
+
* Index a codebase for semantic search
|
|
69
|
+
* @param codebasePath Codebase root path
|
|
29
70
|
* @param progressCallback Optional progress callback function
|
|
71
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
|
30
72
|
* @returns Indexing statistics
|
|
31
73
|
*/
|
|
32
74
|
indexCodebase(codebasePath: string, progressCallback?: (progress: {
|
|
@@ -34,7 +76,7 @@ export declare class Context {
|
|
|
34
76
|
current: number;
|
|
35
77
|
total: number;
|
|
36
78
|
percentage: number;
|
|
37
|
-
}) => void): Promise<{
|
|
79
|
+
}) => void, forceReindex?: boolean): Promise<{
|
|
38
80
|
indexedFiles: number;
|
|
39
81
|
totalChunks: number;
|
|
40
82
|
status: 'completed' | 'limit_reached';
|
|
@@ -51,7 +93,7 @@ export declare class Context {
|
|
|
51
93
|
}>;
|
|
52
94
|
private deleteFileChunks;
|
|
53
95
|
/**
|
|
54
|
-
* Semantic search
|
|
96
|
+
* Semantic search with unified implementation
|
|
55
97
|
* @param codebasePath Codebase path to search in
|
|
56
98
|
* @param query Search query
|
|
57
99
|
* @param topK Number of results to return
|
|
@@ -152,9 +194,9 @@ export declare class Context {
|
|
|
152
194
|
* This method preserves any existing custom patterns that were added before
|
|
153
195
|
* @param codebasePath Path to the codebase
|
|
154
196
|
*/
|
|
155
|
-
private
|
|
197
|
+
private loadIgnorePatterns;
|
|
156
198
|
/**
|
|
157
|
-
* Find all .xxxignore files in the codebase directory
|
|
199
|
+
* Find all .xxxignore files in the codebase directory
|
|
158
200
|
* @param codebasePath Path to the codebase
|
|
159
201
|
* @returns Array of ignore file paths
|
|
160
202
|
*/
|
package/dist/context.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,QAAQ,EAGX,MAAM,YAAY,CAAC;AACpB,OAAO,EACH,SAAS,EAGZ,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,cAAc,
|
|
1
|
+
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,QAAQ,EAGX,MAAM,YAAY,CAAC;AACpB,OAAO,EACH,SAAS,EAGZ,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,cAAc,EAMjB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAK/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAiEvD,MAAM,WAAW,aAAa;IAC1B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,YAAY,CAAC,EAAE,QAAQ,CAAC;IACxB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAC;CACnC;AAED,qBAAa,OAAO;IAChB,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,YAAY,CAAW;IAC/B,OAAO,CAAC,mBAAmB,CAAW;IACtC,OAAO,CAAC,cAAc,CAAW;IACjC,OAAO,CAAC,aAAa,CAAuC;gBAEhD,MAAM,GAAE,aAAkB;IAkDtC;;OAEG;IACH,YAAY,IAAI,SAAS;IAIzB;;OAEG;IACH,iBAAiB,IAAI,cAAc;IAInC;;OAEG;IACH,eAAe,IAAI,QAAQ;IAI3B;;OAEG;IACH,sBAAsB,IAAI,MAAM,EAAE;IAIlC;;OAEG;IACH,iBAAiB,IAAI,MAAM,EAAE;IAI7B;;OAEG;IACH,gBAAgB,IAAI,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC;IAIjD;;OAEG;IACH,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,YAAY,EAAE,gBAAgB,GAAG,IAAI;IAI7E;;OAEG;IACG,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIlE;;OAEG;IACG,qBAAqB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIhE;;OAEG;IACH,OAAO,CAAC,WAAW;IAQnB;;OAEG;IACI,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM;IAQtD;;;;;;OAMG;IACG,aAAa,CACf,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,EAC5G,YAAY,GAAE,OAAe,GAC9B,OAAO,CAAC;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,WAAW,GAAG,eAAe,CAAA;KAAE,CAAC;IA8D1F,eAAe,CACjB,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC7G,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;YAkElD,gBAAgB;IAkB9B;;;;;;OAMG;IACG,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC;IA4GrI;;;;OAIG;IACG,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKtD;;;;OAIG;IACG,UAAU,CACZ,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC7G,OAAO,CAAC,IAAI,CAAC;IAqBhB;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAUpD;;;OAGG;IACH,uBAAuB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAYvD;;OAEG;IACH,6BAA6B,IAAI,IAAI;IAKrC;;;OAGG;IACH,eAAe,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI;IAK3C;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,cAAc,GAAG,IAAI;IAK1D;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKxC;;OAEG;YACW,iBAAiB;IAkC/B;;OAEG;YACW,YAAY;IA6B1B;;;;;;GAMD;YACe,eAAe;IA2F7B;;GAED;YACe,kBAAkB;IAgBhC;;OAEG;YACW,iBAAiB;IAsE/B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IA2BhC;;;;;;;OAOG;IACH,OAAO,CAAC,UAAU;IAMlB;;;;OAIG;WACU,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAa3E;;;;OAIG;YACW,kBAAkB;IA4BhC;;;;OAIG;YACW,eAAe;IAwB7B;;;OAGG;YACW,oBAAoB;IAWlC;;;;;OAKG;YACW,cAAc;IAsB5B;;;;;OAKG;IACH,OAAO,CAAC,oBAAoB;IAiB5B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAmBtB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAUvB;;;;OAIG;IACH,OAAO,CAAC,0BAA0B;IAoBlC;;;;OAIG;IACH,OAAO,CAAC,8BAA8B;IAmBtC;;;OAGG;IACH,mBAAmB,CAAC,gBAAgB,EAAE,MAAM,EAAE,GAAG,IAAI;IAerD;;OAEG;IACH,eAAe,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,kBAAkB,EAAE,OAAO,CAAC;QAAC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE;IAkB/F;;;OAGG;IACH,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAY9C;;;OAGG;IACH,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG;QAAE,QAAQ,EAAE,KAAK,GAAG,WAAW,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAoBtG"}
|
package/dist/context.js
CHANGED
|
@@ -141,27 +141,96 @@ class Context {
|
|
|
141
141
|
}
|
|
142
142
|
}
|
|
143
143
|
/**
|
|
144
|
-
*
|
|
144
|
+
* Get embedding instance
|
|
145
|
+
*/
|
|
146
|
+
getEmbedding() {
|
|
147
|
+
return this.embedding;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Get vector database instance
|
|
151
|
+
*/
|
|
152
|
+
getVectorDatabase() {
|
|
153
|
+
return this.vectorDatabase;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Get code splitter instance
|
|
157
|
+
*/
|
|
158
|
+
getCodeSplitter() {
|
|
159
|
+
return this.codeSplitter;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get supported extensions
|
|
163
|
+
*/
|
|
164
|
+
getSupportedExtensions() {
|
|
165
|
+
return [...this.supportedExtensions];
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Get ignore patterns
|
|
169
|
+
*/
|
|
170
|
+
getIgnorePatterns() {
|
|
171
|
+
return [...this.ignorePatterns];
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Get synchronizers map
|
|
175
|
+
*/
|
|
176
|
+
getSynchronizers() {
|
|
177
|
+
return new Map(this.synchronizers);
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Set synchronizer for a collection
|
|
181
|
+
*/
|
|
182
|
+
setSynchronizer(collectionName, synchronizer) {
|
|
183
|
+
this.synchronizers.set(collectionName, synchronizer);
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Public wrapper for loadIgnorePatterns private method
|
|
187
|
+
*/
|
|
188
|
+
async getLoadedIgnorePatterns(codebasePath) {
|
|
189
|
+
return this.loadIgnorePatterns(codebasePath);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Public wrapper for prepareCollection private method
|
|
193
|
+
*/
|
|
194
|
+
async getPreparedCollection(codebasePath) {
|
|
195
|
+
return this.prepareCollection(codebasePath);
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Get isHybrid setting from environment variable with default true
|
|
199
|
+
*/
|
|
200
|
+
getIsHybrid() {
|
|
201
|
+
const isHybridEnv = env_manager_1.envManager.get('HYBRID_MODE');
|
|
202
|
+
if (isHybridEnv === undefined || isHybridEnv === null) {
|
|
203
|
+
return true; // Default to true
|
|
204
|
+
}
|
|
205
|
+
return isHybridEnv.toLowerCase() === 'true';
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Generate collection name based on codebase path and hybrid mode
|
|
145
209
|
*/
|
|
146
210
|
getCollectionName(codebasePath) {
|
|
211
|
+
const isHybrid = this.getIsHybrid();
|
|
147
212
|
const normalizedPath = path.resolve(codebasePath);
|
|
148
213
|
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
|
149
|
-
|
|
214
|
+
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
|
|
215
|
+
return `${prefix}_${hash.substring(0, 8)}`;
|
|
150
216
|
}
|
|
151
217
|
/**
|
|
152
|
-
* Index
|
|
153
|
-
* @param codebasePath Codebase path
|
|
218
|
+
* Index a codebase for semantic search
|
|
219
|
+
* @param codebasePath Codebase root path
|
|
154
220
|
* @param progressCallback Optional progress callback function
|
|
221
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
|
155
222
|
* @returns Indexing statistics
|
|
156
223
|
*/
|
|
157
|
-
async indexCodebase(codebasePath, progressCallback) {
|
|
158
|
-
|
|
224
|
+
async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
|
|
225
|
+
const isHybrid = this.getIsHybrid();
|
|
226
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
227
|
+
console.log(`🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
|
|
159
228
|
// 1. Load ignore patterns from various ignore files
|
|
160
|
-
await this.
|
|
229
|
+
await this.loadIgnorePatterns(codebasePath);
|
|
161
230
|
// 2. Check and prepare vector collection
|
|
162
231
|
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
|
|
163
|
-
console.log(`Debug2: Preparing vector collection for codebase`);
|
|
164
|
-
await this.prepareCollection(codebasePath);
|
|
232
|
+
console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
|
233
|
+
await this.prepareCollection(codebasePath, forceReindex);
|
|
165
234
|
// 3. Recursively traverse codebase to get all supported files
|
|
166
235
|
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
|
|
167
236
|
const codeFiles = await this.getCodeFiles(codebasePath);
|
|
@@ -203,6 +272,8 @@ class Context {
|
|
|
203
272
|
const collectionName = this.getCollectionName(codebasePath);
|
|
204
273
|
const synchronizer = this.synchronizers.get(collectionName);
|
|
205
274
|
if (!synchronizer) {
|
|
275
|
+
// Load project-specific ignore patterns before creating FileSynchronizer
|
|
276
|
+
await this.loadIgnorePatterns(codebasePath);
|
|
206
277
|
// To be safe, let's initialize if it's not there.
|
|
207
278
|
const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
|
|
208
279
|
await newSynchronizer.initialize();
|
|
@@ -246,7 +317,9 @@ class Context {
|
|
|
246
317
|
return { added: added.length, removed: removed.length, modified: modified.length };
|
|
247
318
|
}
|
|
248
319
|
async deleteFileChunks(collectionName, relativePath) {
|
|
249
|
-
|
|
320
|
+
// Escape backslashes for Milvus query expression (Windows path compatibility)
|
|
321
|
+
const escapedPath = relativePath.replace(/\\/g, '\\\\');
|
|
322
|
+
const results = await this.vectorDatabase.query(collectionName, `relativePath == "${escapedPath}"`, ['id']);
|
|
250
323
|
if (results.length > 0) {
|
|
251
324
|
const ids = results.map(r => r.id).filter(id => id);
|
|
252
325
|
if (ids.length > 0) {
|
|
@@ -256,29 +329,98 @@ class Context {
|
|
|
256
329
|
}
|
|
257
330
|
}
|
|
258
331
|
/**
|
|
259
|
-
* Semantic search
|
|
332
|
+
* Semantic search with unified implementation
|
|
260
333
|
* @param codebasePath Codebase path to search in
|
|
261
334
|
* @param query Search query
|
|
262
335
|
* @param topK Number of results to return
|
|
263
336
|
* @param threshold Similarity threshold
|
|
264
337
|
*/
|
|
265
338
|
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5) {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
//
|
|
272
|
-
const
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
339
|
+
const isHybrid = this.getIsHybrid();
|
|
340
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
341
|
+
console.log(`🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
|
342
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
343
|
+
console.log(`🔍 Using collection: ${collectionName}`);
|
|
344
|
+
// Check if collection exists and has data
|
|
345
|
+
const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
|
|
346
|
+
if (!hasCollection) {
|
|
347
|
+
console.log(`⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
|
|
348
|
+
return [];
|
|
349
|
+
}
|
|
350
|
+
if (isHybrid === true) {
|
|
351
|
+
try {
|
|
352
|
+
// Check collection stats to see if it has data
|
|
353
|
+
const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
|
|
354
|
+
console.log(`🔍 Collection '${collectionName}' exists and appears to have data`);
|
|
355
|
+
}
|
|
356
|
+
catch (error) {
|
|
357
|
+
console.log(`⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
|
358
|
+
}
|
|
359
|
+
// 1. Generate query vector
|
|
360
|
+
console.log(`🔍 Generating embeddings for query: "${query}"`);
|
|
361
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
362
|
+
console.log(`✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
|
363
|
+
console.log(`🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
|
364
|
+
// 2. Prepare hybrid search requests
|
|
365
|
+
const searchRequests = [
|
|
366
|
+
{
|
|
367
|
+
data: queryEmbedding.vector,
|
|
368
|
+
anns_field: "vector",
|
|
369
|
+
param: { "nprobe": 10 },
|
|
370
|
+
limit: topK
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
data: query,
|
|
374
|
+
anns_field: "sparse_vector",
|
|
375
|
+
param: { "drop_ratio_search": 0.2 },
|
|
376
|
+
limit: topK
|
|
377
|
+
}
|
|
378
|
+
];
|
|
379
|
+
console.log(`🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
|
380
|
+
console.log(`🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
|
381
|
+
// 3. Execute hybrid search
|
|
382
|
+
console.log(`🔍 Executing hybrid search with RRF reranking...`);
|
|
383
|
+
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
|
384
|
+
rerank: {
|
|
385
|
+
strategy: 'rrf',
|
|
386
|
+
params: { k: 100 }
|
|
387
|
+
},
|
|
388
|
+
limit: topK
|
|
389
|
+
});
|
|
390
|
+
console.log(`🔍 Raw search results count: ${searchResults.length}`);
|
|
391
|
+
// 4. Convert to semantic search result format
|
|
392
|
+
const results = searchResults.map(result => ({
|
|
393
|
+
content: result.document.content,
|
|
394
|
+
relativePath: result.document.relativePath,
|
|
395
|
+
startLine: result.document.startLine,
|
|
396
|
+
endLine: result.document.endLine,
|
|
397
|
+
language: result.document.metadata.language || 'unknown',
|
|
398
|
+
score: result.score
|
|
399
|
+
}));
|
|
400
|
+
console.log(`✅ Found ${results.length} relevant hybrid results`);
|
|
401
|
+
if (results.length > 0) {
|
|
402
|
+
console.log(`🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
|
|
403
|
+
}
|
|
404
|
+
return results;
|
|
405
|
+
}
|
|
406
|
+
else {
|
|
407
|
+
// Regular semantic search
|
|
408
|
+
// 1. Generate query vector
|
|
409
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
410
|
+
// 2. Search in vector database
|
|
411
|
+
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold });
|
|
412
|
+
// 3. Convert to semantic search result format
|
|
413
|
+
const results = searchResults.map(result => ({
|
|
414
|
+
content: result.document.content,
|
|
415
|
+
relativePath: result.document.relativePath,
|
|
416
|
+
startLine: result.document.startLine,
|
|
417
|
+
endLine: result.document.endLine,
|
|
418
|
+
language: result.document.metadata.language || 'unknown',
|
|
419
|
+
score: result.score
|
|
420
|
+
}));
|
|
421
|
+
console.log(`✅ Found ${results.length} relevant results`);
|
|
422
|
+
return results;
|
|
423
|
+
}
|
|
282
424
|
}
|
|
283
425
|
/**
|
|
284
426
|
* Check if index exists for codebase
|
|
@@ -370,17 +512,32 @@ class Context {
|
|
|
370
512
|
/**
|
|
371
513
|
* Prepare vector collection
|
|
372
514
|
*/
|
|
373
|
-
async prepareCollection(codebasePath) {
|
|
374
|
-
|
|
375
|
-
|
|
515
|
+
async prepareCollection(codebasePath, forceReindex = false) {
|
|
516
|
+
const isHybrid = this.getIsHybrid();
|
|
517
|
+
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
|
|
518
|
+
console.log(`🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
|
376
519
|
const collectionName = this.getCollectionName(codebasePath);
|
|
377
|
-
//
|
|
378
|
-
|
|
379
|
-
|
|
520
|
+
// Check if collection already exists
|
|
521
|
+
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
|
522
|
+
if (collectionExists && !forceReindex) {
|
|
523
|
+
console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
if (collectionExists && forceReindex) {
|
|
527
|
+
console.log(`🗑️ Dropping existing collection ${collectionName} for force reindex...`);
|
|
528
|
+
await this.vectorDatabase.dropCollection(collectionName);
|
|
529
|
+
console.log(`✅ Collection ${collectionName} dropped successfully`);
|
|
380
530
|
}
|
|
381
|
-
|
|
531
|
+
console.log(`🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
|
|
532
|
+
const dimension = await this.embedding.detectDimension();
|
|
533
|
+
console.log(`📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
|
|
382
534
|
const dirName = path.basename(codebasePath);
|
|
383
|
-
|
|
535
|
+
if (isHybrid === true) {
|
|
536
|
+
await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
|
540
|
+
}
|
|
384
541
|
console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
|
385
542
|
}
|
|
386
543
|
/**
|
|
@@ -418,6 +575,7 @@ class Context {
|
|
|
418
575
|
* @returns Object with processed file count and total chunk count
|
|
419
576
|
*/
|
|
420
577
|
async processFileList(filePaths, codebasePath, onFileProcessed) {
|
|
578
|
+
const isHybrid = this.getIsHybrid();
|
|
421
579
|
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
|
422
580
|
const CHUNK_LIMIT = 450000;
|
|
423
581
|
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
|
@@ -448,8 +606,11 @@ class Context {
|
|
|
448
606
|
await this.processChunkBuffer(chunkBuffer);
|
|
449
607
|
}
|
|
450
608
|
catch (error) {
|
|
451
|
-
|
|
452
|
-
console.error(`❌ Failed to process chunk batch
|
|
609
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
610
|
+
console.error(`❌ Failed to process chunk batch for ${searchType}:`, error);
|
|
611
|
+
if (error instanceof Error) {
|
|
612
|
+
console.error('Stack trace:', error.stack);
|
|
613
|
+
}
|
|
453
614
|
}
|
|
454
615
|
finally {
|
|
455
616
|
chunkBuffer = []; // Always clear buffer, even on failure
|
|
@@ -474,12 +635,16 @@ class Context {
|
|
|
474
635
|
}
|
|
475
636
|
// Process any remaining chunks in the buffer
|
|
476
637
|
if (chunkBuffer.length > 0) {
|
|
477
|
-
|
|
638
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
639
|
+
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
|
|
478
640
|
try {
|
|
479
641
|
await this.processChunkBuffer(chunkBuffer);
|
|
480
642
|
}
|
|
481
643
|
catch (error) {
|
|
482
|
-
console.error(`❌ Failed to process final chunk batch
|
|
644
|
+
console.error(`❌ Failed to process final chunk batch for ${searchType}:`, error);
|
|
645
|
+
if (error instanceof Error) {
|
|
646
|
+
console.error('Stack trace:', error.stack);
|
|
647
|
+
}
|
|
483
648
|
}
|
|
484
649
|
}
|
|
485
650
|
return {
|
|
@@ -499,43 +664,75 @@ class Context {
|
|
|
499
664
|
const codebasePath = chunkBuffer[0].codebasePath;
|
|
500
665
|
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
|
501
666
|
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
|
502
|
-
|
|
667
|
+
const isHybrid = this.getIsHybrid();
|
|
668
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
669
|
+
console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
|
|
503
670
|
await this.processChunkBatch(chunks, codebasePath);
|
|
504
671
|
}
|
|
505
672
|
/**
|
|
506
673
|
* Process a batch of chunks
|
|
507
674
|
*/
|
|
508
675
|
async processChunkBatch(chunks, codebasePath) {
|
|
676
|
+
const isHybrid = this.getIsHybrid();
|
|
509
677
|
// Generate embedding vectors
|
|
510
678
|
const chunkContents = chunks.map(chunk => chunk.content);
|
|
511
679
|
const embeddings = await this.embedding.embedBatch(chunkContents);
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
518
|
-
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
519
|
-
// Extract metadata that should be stored separately
|
|
520
|
-
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
521
|
-
return {
|
|
522
|
-
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
523
|
-
vector: embeddings[index].vector,
|
|
524
|
-
content: chunk.content,
|
|
525
|
-
relativePath,
|
|
526
|
-
startLine: chunk.metadata.startLine || 0,
|
|
527
|
-
endLine: chunk.metadata.endLine || 0,
|
|
528
|
-
fileExtension,
|
|
529
|
-
metadata: {
|
|
530
|
-
...restMetadata,
|
|
531
|
-
codebasePath,
|
|
532
|
-
language: chunk.metadata.language || 'unknown',
|
|
533
|
-
chunkIndex: index
|
|
680
|
+
if (isHybrid === true) {
|
|
681
|
+
// Create hybrid vector documents
|
|
682
|
+
const documents = chunks.map((chunk, index) => {
|
|
683
|
+
if (!chunk.metadata.filePath) {
|
|
684
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
534
685
|
}
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
686
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
687
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
688
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
689
|
+
return {
|
|
690
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
691
|
+
content: chunk.content, // Full text content for BM25 and storage
|
|
692
|
+
vector: embeddings[index].vector, // Dense vector
|
|
693
|
+
relativePath,
|
|
694
|
+
startLine: chunk.metadata.startLine || 0,
|
|
695
|
+
endLine: chunk.metadata.endLine || 0,
|
|
696
|
+
fileExtension,
|
|
697
|
+
metadata: {
|
|
698
|
+
...restMetadata,
|
|
699
|
+
codebasePath,
|
|
700
|
+
language: chunk.metadata.language || 'unknown',
|
|
701
|
+
chunkIndex: index
|
|
702
|
+
}
|
|
703
|
+
};
|
|
704
|
+
});
|
|
705
|
+
// Store to vector database
|
|
706
|
+
await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
|
|
707
|
+
}
|
|
708
|
+
else {
|
|
709
|
+
// Create regular vector documents
|
|
710
|
+
const documents = chunks.map((chunk, index) => {
|
|
711
|
+
if (!chunk.metadata.filePath) {
|
|
712
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
713
|
+
}
|
|
714
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
715
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
716
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
717
|
+
return {
|
|
718
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
719
|
+
vector: embeddings[index].vector,
|
|
720
|
+
content: chunk.content,
|
|
721
|
+
relativePath,
|
|
722
|
+
startLine: chunk.metadata.startLine || 0,
|
|
723
|
+
endLine: chunk.metadata.endLine || 0,
|
|
724
|
+
fileExtension,
|
|
725
|
+
metadata: {
|
|
726
|
+
...restMetadata,
|
|
727
|
+
codebasePath,
|
|
728
|
+
language: chunk.metadata.language || 'unknown',
|
|
729
|
+
chunkIndex: index
|
|
730
|
+
}
|
|
731
|
+
};
|
|
732
|
+
});
|
|
733
|
+
// Store to vector database
|
|
734
|
+
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
|
735
|
+
}
|
|
539
736
|
}
|
|
540
737
|
/**
|
|
541
738
|
* Get programming language based on file extension
|
|
@@ -602,23 +799,19 @@ class Context {
|
|
|
602
799
|
* This method preserves any existing custom patterns that were added before
|
|
603
800
|
* @param codebasePath Path to the codebase
|
|
604
801
|
*/
|
|
605
|
-
async
|
|
802
|
+
async loadIgnorePatterns(codebasePath) {
|
|
606
803
|
try {
|
|
607
804
|
let fileBasedPatterns = [];
|
|
608
|
-
//
|
|
609
|
-
const gitignorePath = path.join(codebasePath, '.gitignore');
|
|
610
|
-
const gitignorePatterns = await this.loadIgnoreFile(gitignorePath, '.gitignore');
|
|
611
|
-
fileBasedPatterns.push(...gitignorePatterns);
|
|
612
|
-
// 2. Load all .xxxignore files in codebase directory
|
|
805
|
+
// Load all .xxxignore files in codebase directory
|
|
613
806
|
const ignoreFiles = await this.findIgnoreFiles(codebasePath);
|
|
614
807
|
for (const ignoreFile of ignoreFiles) {
|
|
615
808
|
const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
|
|
616
809
|
fileBasedPatterns.push(...patterns);
|
|
617
810
|
}
|
|
618
|
-
//
|
|
811
|
+
// Load global ~/.context/.contextignore
|
|
619
812
|
const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
|
|
620
813
|
fileBasedPatterns.push(...globalIgnorePatterns);
|
|
621
|
-
//
|
|
814
|
+
// Merge file-based patterns with existing patterns (which may include custom MCP patterns)
|
|
622
815
|
if (fileBasedPatterns.length > 0) {
|
|
623
816
|
this.addCustomIgnorePatterns(fileBasedPatterns);
|
|
624
817
|
console.log(`🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
|
|
@@ -633,7 +826,7 @@ class Context {
|
|
|
633
826
|
}
|
|
634
827
|
}
|
|
635
828
|
/**
|
|
636
|
-
* Find all .xxxignore files in the codebase directory
|
|
829
|
+
* Find all .xxxignore files in the codebase directory
|
|
637
830
|
* @param codebasePath Path to the codebase
|
|
638
831
|
* @returns Array of ignore file paths
|
|
639
832
|
*/
|
|
@@ -644,13 +837,12 @@ class Context {
|
|
|
644
837
|
for (const entry of entries) {
|
|
645
838
|
if (entry.isFile() &&
|
|
646
839
|
entry.name.startsWith('.') &&
|
|
647
|
-
entry.name.endsWith('ignore')
|
|
648
|
-
entry.name !== '.gitignore') { // Exclude .gitignore as it's handled separately
|
|
840
|
+
entry.name.endsWith('ignore')) {
|
|
649
841
|
ignoreFiles.push(path.join(codebasePath, entry.name));
|
|
650
842
|
}
|
|
651
843
|
}
|
|
652
844
|
if (ignoreFiles.length > 0) {
|
|
653
|
-
console.log(`📄 Found
|
|
845
|
+
console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
|
|
654
846
|
}
|
|
655
847
|
return ignoreFiles;
|
|
656
848
|
}
|
|
@@ -695,7 +887,7 @@ class Context {
|
|
|
695
887
|
}
|
|
696
888
|
}
|
|
697
889
|
catch (error) {
|
|
698
|
-
if (fileName
|
|
890
|
+
if (fileName.includes('global')) {
|
|
699
891
|
console.log(`📄 No ${fileName} file found`);
|
|
700
892
|
}
|
|
701
893
|
return [];
|