@zilliz/claude-context-core 0.0.13 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context.d.ts +51 -9
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +269 -80
- package/dist/context.js.map +1 -1
- package/dist/embedding/base-embedding.d.ts +6 -0
- package/dist/embedding/base-embedding.d.ts.map +1 -1
- package/dist/embedding/base-embedding.js.map +1 -1
- package/dist/embedding/gemini-embedding.d.ts +1 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -1
- package/dist/embedding/gemini-embedding.js +4 -0
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/ollama-embedding.d.ts +1 -10
- package/dist/embedding/ollama-embedding.d.ts.map +1 -1
- package/dist/embedding/ollama-embedding.js +42 -52
- package/dist/embedding/ollama-embedding.js.map +1 -1
- package/dist/embedding/openai-embedding.d.ts +2 -2
- package/dist/embedding/openai-embedding.d.ts.map +1 -1
- package/dist/embedding/openai-embedding.js +78 -32
- package/dist/embedding/openai-embedding.js.map +1 -1
- package/dist/embedding/voyageai-embedding.d.ts +1 -0
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
- package/dist/embedding/voyageai-embedding.js +4 -0
- package/dist/embedding/voyageai-embedding.js.map +1 -1
- package/dist/splitter/ast-splitter.d.ts.map +1 -1
- package/dist/splitter/ast-splitter.js +6 -3
- package/dist/splitter/ast-splitter.js.map +1 -1
- package/dist/vectordb/index.d.ts +1 -1
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts +9 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.js +275 -0
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
- package/dist/vectordb/milvus-vectordb.d.ts +8 -1
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-vectordb.js +236 -0
- package/dist/vectordb/milvus-vectordb.js.map +1 -1
- package/dist/vectordb/types.d.ts +43 -3
- package/dist/vectordb/types.d.ts.map +1 -1
- package/dist/vectordb/types.js.map +1 -1
- package/package.json +2 -1
package/dist/context.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { Splitter } from './splitter';
|
|
|
2
2
|
import { Embedding } from './embedding';
|
|
3
3
|
import { VectorDatabase } from './vectordb';
|
|
4
4
|
import { SemanticSearchResult } from './types';
|
|
5
|
+
import { FileSynchronizer } from './sync/synchronizer';
|
|
5
6
|
export interface ContextConfig {
|
|
6
7
|
embedding?: Embedding;
|
|
7
8
|
vectorDatabase?: VectorDatabase;
|
|
@@ -20,13 +21,54 @@ export declare class Context {
|
|
|
20
21
|
private synchronizers;
|
|
21
22
|
constructor(config?: ContextConfig);
|
|
22
23
|
/**
|
|
23
|
-
*
|
|
24
|
+
* Get embedding instance
|
|
24
25
|
*/
|
|
25
|
-
|
|
26
|
+
getEmbedding(): Embedding;
|
|
26
27
|
/**
|
|
27
|
-
*
|
|
28
|
-
|
|
28
|
+
* Get vector database instance
|
|
29
|
+
*/
|
|
30
|
+
getVectorDatabase(): VectorDatabase;
|
|
31
|
+
/**
|
|
32
|
+
* Get code splitter instance
|
|
33
|
+
*/
|
|
34
|
+
getCodeSplitter(): Splitter;
|
|
35
|
+
/**
|
|
36
|
+
* Get supported extensions
|
|
37
|
+
*/
|
|
38
|
+
getSupportedExtensions(): string[];
|
|
39
|
+
/**
|
|
40
|
+
* Get ignore patterns
|
|
41
|
+
*/
|
|
42
|
+
getIgnorePatterns(): string[];
|
|
43
|
+
/**
|
|
44
|
+
* Get synchronizers map
|
|
45
|
+
*/
|
|
46
|
+
getSynchronizers(): Map<string, FileSynchronizer>;
|
|
47
|
+
/**
|
|
48
|
+
* Set synchronizer for a collection
|
|
49
|
+
*/
|
|
50
|
+
setSynchronizer(collectionName: string, synchronizer: FileSynchronizer): void;
|
|
51
|
+
/**
|
|
52
|
+
* Public wrapper for loadIgnorePatterns private method
|
|
53
|
+
*/
|
|
54
|
+
getLoadedIgnorePatterns(codebasePath: string): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Public wrapper for prepareCollection private method
|
|
57
|
+
*/
|
|
58
|
+
getPreparedCollection(codebasePath: string): Promise<void>;
|
|
59
|
+
/**
|
|
60
|
+
* Get isHybrid setting from environment variable with default true
|
|
61
|
+
*/
|
|
62
|
+
private getIsHybrid;
|
|
63
|
+
/**
|
|
64
|
+
* Generate collection name based on codebase path and hybrid mode
|
|
65
|
+
*/
|
|
66
|
+
getCollectionName(codebasePath: string): string;
|
|
67
|
+
/**
|
|
68
|
+
* Index a codebase for semantic search
|
|
69
|
+
* @param codebasePath Codebase root path
|
|
29
70
|
* @param progressCallback Optional progress callback function
|
|
71
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
|
30
72
|
* @returns Indexing statistics
|
|
31
73
|
*/
|
|
32
74
|
indexCodebase(codebasePath: string, progressCallback?: (progress: {
|
|
@@ -34,7 +76,7 @@ export declare class Context {
|
|
|
34
76
|
current: number;
|
|
35
77
|
total: number;
|
|
36
78
|
percentage: number;
|
|
37
|
-
}) => void): Promise<{
|
|
79
|
+
}) => void, forceReindex?: boolean): Promise<{
|
|
38
80
|
indexedFiles: number;
|
|
39
81
|
totalChunks: number;
|
|
40
82
|
status: 'completed' | 'limit_reached';
|
|
@@ -51,13 +93,13 @@ export declare class Context {
|
|
|
51
93
|
}>;
|
|
52
94
|
private deleteFileChunks;
|
|
53
95
|
/**
|
|
54
|
-
* Semantic search
|
|
96
|
+
* Semantic search with unified implementation
|
|
55
97
|
* @param codebasePath Codebase path to search in
|
|
56
98
|
* @param query Search query
|
|
57
99
|
* @param topK Number of results to return
|
|
58
100
|
* @param threshold Similarity threshold
|
|
59
101
|
*/
|
|
60
|
-
semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number): Promise<SemanticSearchResult[]>;
|
|
102
|
+
semanticSearch(codebasePath: string, query: string, topK?: number, threshold?: number, filterExpr?: string): Promise<SemanticSearchResult[]>;
|
|
61
103
|
/**
|
|
62
104
|
* Check if index exists for codebase
|
|
63
105
|
* @param codebasePath Codebase path to check
|
|
@@ -152,9 +194,9 @@ export declare class Context {
|
|
|
152
194
|
* This method preserves any existing custom patterns that were added before
|
|
153
195
|
* @param codebasePath Path to the codebase
|
|
154
196
|
*/
|
|
155
|
-
private
|
|
197
|
+
private loadIgnorePatterns;
|
|
156
198
|
/**
|
|
157
|
-
* Find all .xxxignore files in the codebase directory
|
|
199
|
+
* Find all .xxxignore files in the codebase directory
|
|
158
200
|
* @param codebasePath Path to the codebase
|
|
159
201
|
* @returns Array of ignore file paths
|
|
160
202
|
*/
|
package/dist/context.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,QAAQ,EAGX,MAAM,YAAY,CAAC;AACpB,OAAO,EACH,SAAS,EAGZ,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,cAAc,
|
|
1
|
+
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,QAAQ,EAGX,MAAM,YAAY,CAAC;AACpB,OAAO,EACH,SAAS,EAGZ,MAAM,aAAa,CAAC;AACrB,OAAO,EACH,cAAc,EAMjB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAK/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAiEvD,MAAM,WAAW,aAAa;IAC1B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,YAAY,CAAC,EAAE,QAAQ,CAAC;IACxB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAC;CACnC;AAED,qBAAa,OAAO;IAChB,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,YAAY,CAAW;IAC/B,OAAO,CAAC,mBAAmB,CAAW;IACtC,OAAO,CAAC,cAAc,CAAW;IACjC,OAAO,CAAC,aAAa,CAAuC;gBAEhD,MAAM,GAAE,aAAkB;IAkDtC;;OAEG;IACH,YAAY,IAAI,SAAS;IAIzB;;OAEG;IACH,iBAAiB,IAAI,cAAc;IAInC;;OAEG;IACH,eAAe,IAAI,QAAQ;IAI3B;;OAEG;IACH,sBAAsB,IAAI,MAAM,EAAE;IAIlC;;OAEG;IACH,iBAAiB,IAAI,MAAM,EAAE;IAI7B;;OAEG;IACH,gBAAgB,IAAI,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC;IAIjD;;OAEG;IACH,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,YAAY,EAAE,gBAAgB,GAAG,IAAI;IAI7E;;OAEG;IACG,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIlE;;OAEG;IACG,qBAAqB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIhE;;OAEG;IACH,OAAO,CAAC,WAAW;IAQnB;;OAEG;IACI,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM;IAQtD;;;;;;OAMG;IACG,aAAa,CACf,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,EAC5G,YAAY,GAAE,OAAe,GAC9B,OAAO,CAAC;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,WAAW,GAAG,eAAe,CAAA;KAAE,CAAC;IA8D1F,eAAe,CACjB,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC7G,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;YAkElD,gBAAgB;IAkB9B;;;;;;OAMG;IACG,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,EAAE,SAAS,GAAE,MAAY,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC;IA6G1J;;;;OAIG;IACG,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKtD;;;;OAIG;IACG,UAAU,CACZ,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC7G,OAAO,CAAC,IAAI,CAAC;IAqBhB;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAUpD;;;OAGG;IACH,uBAAuB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAYvD;;OAEG;IACH,6BAA6B,IAAI,IAAI;IAKrC;;;OAGG;IACH,eAAe,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI;IAK3C;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,cAAc,GAAG,IAAI;IAK1D;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKxC;;OAEG;YACW,iBAAiB;IAkC/B;;OAEG;YACW,YAAY;IA6B1B;;;;;;GAMD;YACe,eAAe;IA2F7B;;GAED;YACe,kBAAkB;IAgBhC;;OAEG;YACW,iBAAiB;IAsE/B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IA2BhC;;;;;;;OAOG;IACH,OAAO,CAAC,UAAU;IAMlB;;;;OAIG;WACU,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAa3E;;;;OAIG;YACW,kBAAkB;IA4BhC;;;;OAIG;YACW,eAAe;IAwB7B;;;OAGG;YACW,oBAAoB;IAWlC;;;;;OAKG;YACW,cAAc;IAsB5B;;;;;OAKG;IACH,OAAO,CAAC,oBAAoB;IAiB5B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAmBtB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAUvB;;;;OAIG;IACH,OAAO,CAAC,0BAA0B;IAoBlC;;;;OAIG;IACH,OAAO,CAAC,8BAA8B;IAmBtC;;;OAGG;IACH,mBAAmB,CAAC,gBAAgB,EAAE,MAAM,EAAE,GAAG,IAAI;IAerD;;OAEG;IACH,eAAe,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,kBAAkB,EAAE,OAAO,CAAC;QAAC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE;IAkB/F;;;OAGG;IACH,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAY9C;;;OAGG;IACH,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG;QAAE,QAAQ,EAAE,KAAK,GAAG,WAAW,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAoBtG"}
|
package/dist/context.js
CHANGED
|
@@ -141,27 +141,96 @@ class Context {
|
|
|
141
141
|
}
|
|
142
142
|
}
|
|
143
143
|
/**
|
|
144
|
-
*
|
|
144
|
+
* Get embedding instance
|
|
145
|
+
*/
|
|
146
|
+
getEmbedding() {
|
|
147
|
+
return this.embedding;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Get vector database instance
|
|
151
|
+
*/
|
|
152
|
+
getVectorDatabase() {
|
|
153
|
+
return this.vectorDatabase;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Get code splitter instance
|
|
157
|
+
*/
|
|
158
|
+
getCodeSplitter() {
|
|
159
|
+
return this.codeSplitter;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get supported extensions
|
|
163
|
+
*/
|
|
164
|
+
getSupportedExtensions() {
|
|
165
|
+
return [...this.supportedExtensions];
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Get ignore patterns
|
|
169
|
+
*/
|
|
170
|
+
getIgnorePatterns() {
|
|
171
|
+
return [...this.ignorePatterns];
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Get synchronizers map
|
|
175
|
+
*/
|
|
176
|
+
getSynchronizers() {
|
|
177
|
+
return new Map(this.synchronizers);
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Set synchronizer for a collection
|
|
181
|
+
*/
|
|
182
|
+
setSynchronizer(collectionName, synchronizer) {
|
|
183
|
+
this.synchronizers.set(collectionName, synchronizer);
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Public wrapper for loadIgnorePatterns private method
|
|
187
|
+
*/
|
|
188
|
+
async getLoadedIgnorePatterns(codebasePath) {
|
|
189
|
+
return this.loadIgnorePatterns(codebasePath);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Public wrapper for prepareCollection private method
|
|
193
|
+
*/
|
|
194
|
+
async getPreparedCollection(codebasePath) {
|
|
195
|
+
return this.prepareCollection(codebasePath);
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Get isHybrid setting from environment variable with default true
|
|
199
|
+
*/
|
|
200
|
+
getIsHybrid() {
|
|
201
|
+
const isHybridEnv = env_manager_1.envManager.get('HYBRID_MODE');
|
|
202
|
+
if (isHybridEnv === undefined || isHybridEnv === null) {
|
|
203
|
+
return true; // Default to true
|
|
204
|
+
}
|
|
205
|
+
return isHybridEnv.toLowerCase() === 'true';
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Generate collection name based on codebase path and hybrid mode
|
|
145
209
|
*/
|
|
146
210
|
getCollectionName(codebasePath) {
|
|
211
|
+
const isHybrid = this.getIsHybrid();
|
|
147
212
|
const normalizedPath = path.resolve(codebasePath);
|
|
148
213
|
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
|
|
149
|
-
|
|
214
|
+
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
|
|
215
|
+
return `${prefix}_${hash.substring(0, 8)}`;
|
|
150
216
|
}
|
|
151
217
|
/**
|
|
152
|
-
* Index
|
|
153
|
-
* @param codebasePath Codebase path
|
|
218
|
+
* Index a codebase for semantic search
|
|
219
|
+
* @param codebasePath Codebase root path
|
|
154
220
|
* @param progressCallback Optional progress callback function
|
|
221
|
+
* @param forceReindex Whether to recreate the collection even if it exists
|
|
155
222
|
* @returns Indexing statistics
|
|
156
223
|
*/
|
|
157
|
-
async indexCodebase(codebasePath, progressCallback) {
|
|
158
|
-
|
|
224
|
+
async indexCodebase(codebasePath, progressCallback, forceReindex = false) {
|
|
225
|
+
const isHybrid = this.getIsHybrid();
|
|
226
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
227
|
+
console.log(`🚀 Starting to index codebase with ${searchType}: ${codebasePath}`);
|
|
159
228
|
// 1. Load ignore patterns from various ignore files
|
|
160
|
-
await this.
|
|
229
|
+
await this.loadIgnorePatterns(codebasePath);
|
|
161
230
|
// 2. Check and prepare vector collection
|
|
162
231
|
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
|
|
163
|
-
console.log(`Debug2: Preparing vector collection for codebase`);
|
|
164
|
-
await this.prepareCollection(codebasePath);
|
|
232
|
+
console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
|
233
|
+
await this.prepareCollection(codebasePath, forceReindex);
|
|
165
234
|
// 3. Recursively traverse codebase to get all supported files
|
|
166
235
|
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
|
|
167
236
|
const codeFiles = await this.getCodeFiles(codebasePath);
|
|
@@ -204,7 +273,7 @@ class Context {
|
|
|
204
273
|
const synchronizer = this.synchronizers.get(collectionName);
|
|
205
274
|
if (!synchronizer) {
|
|
206
275
|
// Load project-specific ignore patterns before creating FileSynchronizer
|
|
207
|
-
await this.
|
|
276
|
+
await this.loadIgnorePatterns(codebasePath);
|
|
208
277
|
// To be safe, let's initialize if it's not there.
|
|
209
278
|
const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
|
|
210
279
|
await newSynchronizer.initialize();
|
|
@@ -260,29 +329,99 @@ class Context {
|
|
|
260
329
|
}
|
|
261
330
|
}
|
|
262
331
|
/**
|
|
263
|
-
* Semantic search
|
|
332
|
+
* Semantic search with unified implementation
|
|
264
333
|
* @param codebasePath Codebase path to search in
|
|
265
334
|
* @param query Search query
|
|
266
335
|
* @param topK Number of results to return
|
|
267
336
|
* @param threshold Similarity threshold
|
|
268
337
|
*/
|
|
269
|
-
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5) {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
//
|
|
276
|
-
const
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
338
|
+
async semanticSearch(codebasePath, query, topK = 5, threshold = 0.5, filterExpr) {
|
|
339
|
+
const isHybrid = this.getIsHybrid();
|
|
340
|
+
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
|
|
341
|
+
console.log(`🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
|
|
342
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
343
|
+
console.log(`🔍 Using collection: ${collectionName}`);
|
|
344
|
+
// Check if collection exists and has data
|
|
345
|
+
const hasCollection = await this.vectorDatabase.hasCollection(collectionName);
|
|
346
|
+
if (!hasCollection) {
|
|
347
|
+
console.log(`⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`);
|
|
348
|
+
return [];
|
|
349
|
+
}
|
|
350
|
+
if (isHybrid === true) {
|
|
351
|
+
try {
|
|
352
|
+
// Check collection stats to see if it has data
|
|
353
|
+
const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1);
|
|
354
|
+
console.log(`🔍 Collection '${collectionName}' exists and appears to have data`);
|
|
355
|
+
}
|
|
356
|
+
catch (error) {
|
|
357
|
+
console.log(`⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
|
358
|
+
}
|
|
359
|
+
// 1. Generate query vector
|
|
360
|
+
console.log(`🔍 Generating embeddings for query: "${query}"`);
|
|
361
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
362
|
+
console.log(`✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`);
|
|
363
|
+
console.log(`🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`);
|
|
364
|
+
// 2. Prepare hybrid search requests
|
|
365
|
+
const searchRequests = [
|
|
366
|
+
{
|
|
367
|
+
data: queryEmbedding.vector,
|
|
368
|
+
anns_field: "vector",
|
|
369
|
+
param: { "nprobe": 10 },
|
|
370
|
+
limit: topK
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
data: query,
|
|
374
|
+
anns_field: "sparse_vector",
|
|
375
|
+
param: { "drop_ratio_search": 0.2 },
|
|
376
|
+
limit: topK
|
|
377
|
+
}
|
|
378
|
+
];
|
|
379
|
+
console.log(`🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
|
380
|
+
console.log(`🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
|
381
|
+
// 3. Execute hybrid search
|
|
382
|
+
console.log(`🔍 Executing hybrid search with RRF reranking...`);
|
|
383
|
+
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
|
384
|
+
rerank: {
|
|
385
|
+
strategy: 'rrf',
|
|
386
|
+
params: { k: 100 }
|
|
387
|
+
},
|
|
388
|
+
limit: topK,
|
|
389
|
+
filterExpr
|
|
390
|
+
});
|
|
391
|
+
console.log(`🔍 Raw search results count: ${searchResults.length}`);
|
|
392
|
+
// 4. Convert to semantic search result format
|
|
393
|
+
const results = searchResults.map(result => ({
|
|
394
|
+
content: result.document.content,
|
|
395
|
+
relativePath: result.document.relativePath,
|
|
396
|
+
startLine: result.document.startLine,
|
|
397
|
+
endLine: result.document.endLine,
|
|
398
|
+
language: result.document.metadata.language || 'unknown',
|
|
399
|
+
score: result.score
|
|
400
|
+
}));
|
|
401
|
+
console.log(`✅ Found ${results.length} relevant hybrid results`);
|
|
402
|
+
if (results.length > 0) {
|
|
403
|
+
console.log(`🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`);
|
|
404
|
+
}
|
|
405
|
+
return results;
|
|
406
|
+
}
|
|
407
|
+
else {
|
|
408
|
+
// Regular semantic search
|
|
409
|
+
// 1. Generate query vector
|
|
410
|
+
const queryEmbedding = await this.embedding.embed(query);
|
|
411
|
+
// 2. Search in vector database
|
|
412
|
+
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr });
|
|
413
|
+
// 3. Convert to semantic search result format
|
|
414
|
+
const results = searchResults.map(result => ({
|
|
415
|
+
content: result.document.content,
|
|
416
|
+
relativePath: result.document.relativePath,
|
|
417
|
+
startLine: result.document.startLine,
|
|
418
|
+
endLine: result.document.endLine,
|
|
419
|
+
language: result.document.metadata.language || 'unknown',
|
|
420
|
+
score: result.score
|
|
421
|
+
}));
|
|
422
|
+
console.log(`✅ Found ${results.length} relevant results`);
|
|
423
|
+
return results;
|
|
424
|
+
}
|
|
286
425
|
}
|
|
287
426
|
/**
|
|
288
427
|
* Check if index exists for codebase
|
|
@@ -374,17 +513,32 @@ class Context {
|
|
|
374
513
|
/**
|
|
375
514
|
* Prepare vector collection
|
|
376
515
|
*/
|
|
377
|
-
async prepareCollection(codebasePath) {
|
|
378
|
-
|
|
379
|
-
|
|
516
|
+
async prepareCollection(codebasePath, forceReindex = false) {
|
|
517
|
+
const isHybrid = this.getIsHybrid();
|
|
518
|
+
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
|
|
519
|
+
console.log(`🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
|
|
380
520
|
const collectionName = this.getCollectionName(codebasePath);
|
|
381
|
-
//
|
|
382
|
-
|
|
383
|
-
|
|
521
|
+
// Check if collection already exists
|
|
522
|
+
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
|
|
523
|
+
if (collectionExists && !forceReindex) {
|
|
524
|
+
console.log(`📋 Collection ${collectionName} already exists, skipping creation`);
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
if (collectionExists && forceReindex) {
|
|
528
|
+
console.log(`🗑️ Dropping existing collection ${collectionName} for force reindex...`);
|
|
529
|
+
await this.vectorDatabase.dropCollection(collectionName);
|
|
530
|
+
console.log(`✅ Collection ${collectionName} dropped successfully`);
|
|
384
531
|
}
|
|
385
|
-
|
|
532
|
+
console.log(`🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`);
|
|
533
|
+
const dimension = await this.embedding.detectDimension();
|
|
534
|
+
console.log(`📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`);
|
|
386
535
|
const dirName = path.basename(codebasePath);
|
|
387
|
-
|
|
536
|
+
if (isHybrid === true) {
|
|
537
|
+
await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`);
|
|
538
|
+
}
|
|
539
|
+
else {
|
|
540
|
+
await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`);
|
|
541
|
+
}
|
|
388
542
|
console.log(`✅ Collection ${collectionName} created successfully (dimension: ${dimension})`);
|
|
389
543
|
}
|
|
390
544
|
/**
|
|
@@ -422,6 +576,7 @@ class Context {
|
|
|
422
576
|
* @returns Object with processed file count and total chunk count
|
|
423
577
|
*/
|
|
424
578
|
async processFileList(filePaths, codebasePath, onFileProcessed) {
|
|
579
|
+
const isHybrid = this.getIsHybrid();
|
|
425
580
|
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
|
426
581
|
const CHUNK_LIMIT = 450000;
|
|
427
582
|
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
|
@@ -452,8 +607,11 @@ class Context {
|
|
|
452
607
|
await this.processChunkBuffer(chunkBuffer);
|
|
453
608
|
}
|
|
454
609
|
catch (error) {
|
|
455
|
-
|
|
456
|
-
console.error(`❌ Failed to process chunk batch
|
|
610
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
611
|
+
console.error(`❌ Failed to process chunk batch for ${searchType}:`, error);
|
|
612
|
+
if (error instanceof Error) {
|
|
613
|
+
console.error('Stack trace:', error.stack);
|
|
614
|
+
}
|
|
457
615
|
}
|
|
458
616
|
finally {
|
|
459
617
|
chunkBuffer = []; // Always clear buffer, even on failure
|
|
@@ -478,12 +636,16 @@ class Context {
|
|
|
478
636
|
}
|
|
479
637
|
// Process any remaining chunks in the buffer
|
|
480
638
|
if (chunkBuffer.length > 0) {
|
|
481
|
-
|
|
639
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
640
|
+
console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`);
|
|
482
641
|
try {
|
|
483
642
|
await this.processChunkBuffer(chunkBuffer);
|
|
484
643
|
}
|
|
485
644
|
catch (error) {
|
|
486
|
-
console.error(`❌ Failed to process final chunk batch
|
|
645
|
+
console.error(`❌ Failed to process final chunk batch for ${searchType}:`, error);
|
|
646
|
+
if (error instanceof Error) {
|
|
647
|
+
console.error('Stack trace:', error.stack);
|
|
648
|
+
}
|
|
487
649
|
}
|
|
488
650
|
}
|
|
489
651
|
return {
|
|
@@ -503,43 +665,75 @@ class Context {
|
|
|
503
665
|
const codebasePath = chunkBuffer[0].codebasePath;
|
|
504
666
|
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
|
505
667
|
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
|
506
|
-
|
|
668
|
+
const isHybrid = this.getIsHybrid();
|
|
669
|
+
const searchType = isHybrid === true ? 'hybrid' : 'regular';
|
|
670
|
+
console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`);
|
|
507
671
|
await this.processChunkBatch(chunks, codebasePath);
|
|
508
672
|
}
|
|
509
673
|
/**
|
|
510
674
|
* Process a batch of chunks
|
|
511
675
|
*/
|
|
512
676
|
async processChunkBatch(chunks, codebasePath) {
|
|
677
|
+
const isHybrid = this.getIsHybrid();
|
|
513
678
|
// Generate embedding vectors
|
|
514
679
|
const chunkContents = chunks.map(chunk => chunk.content);
|
|
515
680
|
const embeddings = await this.embedding.embedBatch(chunkContents);
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
522
|
-
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
523
|
-
// Extract metadata that should be stored separately
|
|
524
|
-
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
525
|
-
return {
|
|
526
|
-
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
527
|
-
vector: embeddings[index].vector,
|
|
528
|
-
content: chunk.content,
|
|
529
|
-
relativePath,
|
|
530
|
-
startLine: chunk.metadata.startLine || 0,
|
|
531
|
-
endLine: chunk.metadata.endLine || 0,
|
|
532
|
-
fileExtension,
|
|
533
|
-
metadata: {
|
|
534
|
-
...restMetadata,
|
|
535
|
-
codebasePath,
|
|
536
|
-
language: chunk.metadata.language || 'unknown',
|
|
537
|
-
chunkIndex: index
|
|
681
|
+
if (isHybrid === true) {
|
|
682
|
+
// Create hybrid vector documents
|
|
683
|
+
const documents = chunks.map((chunk, index) => {
|
|
684
|
+
if (!chunk.metadata.filePath) {
|
|
685
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
538
686
|
}
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
687
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
688
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
689
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
690
|
+
return {
|
|
691
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
692
|
+
content: chunk.content, // Full text content for BM25 and storage
|
|
693
|
+
vector: embeddings[index].vector, // Dense vector
|
|
694
|
+
relativePath,
|
|
695
|
+
startLine: chunk.metadata.startLine || 0,
|
|
696
|
+
endLine: chunk.metadata.endLine || 0,
|
|
697
|
+
fileExtension,
|
|
698
|
+
metadata: {
|
|
699
|
+
...restMetadata,
|
|
700
|
+
codebasePath,
|
|
701
|
+
language: chunk.metadata.language || 'unknown',
|
|
702
|
+
chunkIndex: index
|
|
703
|
+
}
|
|
704
|
+
};
|
|
705
|
+
});
|
|
706
|
+
// Store to vector database
|
|
707
|
+
await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents);
|
|
708
|
+
}
|
|
709
|
+
else {
|
|
710
|
+
// Create regular vector documents
|
|
711
|
+
const documents = chunks.map((chunk, index) => {
|
|
712
|
+
if (!chunk.metadata.filePath) {
|
|
713
|
+
throw new Error(`Missing filePath in chunk metadata at index ${index}`);
|
|
714
|
+
}
|
|
715
|
+
const relativePath = path.relative(codebasePath, chunk.metadata.filePath);
|
|
716
|
+
const fileExtension = path.extname(chunk.metadata.filePath);
|
|
717
|
+
const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata;
|
|
718
|
+
return {
|
|
719
|
+
id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content),
|
|
720
|
+
vector: embeddings[index].vector,
|
|
721
|
+
content: chunk.content,
|
|
722
|
+
relativePath,
|
|
723
|
+
startLine: chunk.metadata.startLine || 0,
|
|
724
|
+
endLine: chunk.metadata.endLine || 0,
|
|
725
|
+
fileExtension,
|
|
726
|
+
metadata: {
|
|
727
|
+
...restMetadata,
|
|
728
|
+
codebasePath,
|
|
729
|
+
language: chunk.metadata.language || 'unknown',
|
|
730
|
+
chunkIndex: index
|
|
731
|
+
}
|
|
732
|
+
};
|
|
733
|
+
});
|
|
734
|
+
// Store to vector database
|
|
735
|
+
await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents);
|
|
736
|
+
}
|
|
543
737
|
}
|
|
544
738
|
/**
|
|
545
739
|
* Get programming language based on file extension
|
|
@@ -606,23 +800,19 @@ class Context {
|
|
|
606
800
|
* This method preserves any existing custom patterns that were added before
|
|
607
801
|
* @param codebasePath Path to the codebase
|
|
608
802
|
*/
|
|
609
|
-
async
|
|
803
|
+
async loadIgnorePatterns(codebasePath) {
|
|
610
804
|
try {
|
|
611
805
|
let fileBasedPatterns = [];
|
|
612
|
-
//
|
|
613
|
-
const gitignorePath = path.join(codebasePath, '.gitignore');
|
|
614
|
-
const gitignorePatterns = await this.loadIgnoreFile(gitignorePath, '.gitignore');
|
|
615
|
-
fileBasedPatterns.push(...gitignorePatterns);
|
|
616
|
-
// 2. Load all .xxxignore files in codebase directory
|
|
806
|
+
// Load all .xxxignore files in codebase directory
|
|
617
807
|
const ignoreFiles = await this.findIgnoreFiles(codebasePath);
|
|
618
808
|
for (const ignoreFile of ignoreFiles) {
|
|
619
809
|
const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile));
|
|
620
810
|
fileBasedPatterns.push(...patterns);
|
|
621
811
|
}
|
|
622
|
-
//
|
|
812
|
+
// Load global ~/.context/.contextignore
|
|
623
813
|
const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
|
|
624
814
|
fileBasedPatterns.push(...globalIgnorePatterns);
|
|
625
|
-
//
|
|
815
|
+
// Merge file-based patterns with existing patterns (which may include custom MCP patterns)
|
|
626
816
|
if (fileBasedPatterns.length > 0) {
|
|
627
817
|
this.addCustomIgnorePatterns(fileBasedPatterns);
|
|
628
818
|
console.log(`🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
|
|
@@ -637,7 +827,7 @@ class Context {
|
|
|
637
827
|
}
|
|
638
828
|
}
|
|
639
829
|
/**
|
|
640
|
-
* Find all .xxxignore files in the codebase directory
|
|
830
|
+
* Find all .xxxignore files in the codebase directory
|
|
641
831
|
* @param codebasePath Path to the codebase
|
|
642
832
|
* @returns Array of ignore file paths
|
|
643
833
|
*/
|
|
@@ -648,13 +838,12 @@ class Context {
|
|
|
648
838
|
for (const entry of entries) {
|
|
649
839
|
if (entry.isFile() &&
|
|
650
840
|
entry.name.startsWith('.') &&
|
|
651
|
-
entry.name.endsWith('ignore')
|
|
652
|
-
entry.name !== '.gitignore') { // Exclude .gitignore as it's handled separately
|
|
841
|
+
entry.name.endsWith('ignore')) {
|
|
653
842
|
ignoreFiles.push(path.join(codebasePath, entry.name));
|
|
654
843
|
}
|
|
655
844
|
}
|
|
656
845
|
if (ignoreFiles.length > 0) {
|
|
657
|
-
console.log(`📄 Found
|
|
846
|
+
console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
|
|
658
847
|
}
|
|
659
848
|
return ignoreFiles;
|
|
660
849
|
}
|
|
@@ -699,7 +888,7 @@ class Context {
|
|
|
699
888
|
}
|
|
700
889
|
}
|
|
701
890
|
catch (error) {
|
|
702
|
-
if (fileName
|
|
891
|
+
if (fileName.includes('global')) {
|
|
703
892
|
console.log(`📄 No ${fileName} file found`);
|
|
704
893
|
}
|
|
705
894
|
return [];
|