@pleaseai/context-please-core 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -46
- package/dist/.tsbuildinfo +1 -1
- package/dist/context.d.ts +12 -12
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +125 -73
- package/dist/context.js.map +1 -1
- package/dist/embedding/base-embedding.d.ts.map +1 -1
- package/dist/embedding/base-embedding.js +1 -1
- package/dist/embedding/base-embedding.js.map +1 -1
- package/dist/embedding/gemini-embedding.d.ts +2 -1
- package/dist/embedding/gemini-embedding.d.ts.map +1 -1
- package/dist/embedding/gemini-embedding.js +8 -8
- package/dist/embedding/gemini-embedding.js.map +1 -1
- package/dist/embedding/index.d.ts +2 -2
- package/dist/embedding/index.d.ts.map +1 -1
- package/dist/embedding/index.js +2 -2
- package/dist/embedding/index.js.map +1 -1
- package/dist/embedding/ollama-embedding.d.ts +2 -1
- package/dist/embedding/ollama-embedding.d.ts.map +1 -1
- package/dist/embedding/ollama-embedding.js +5 -5
- package/dist/embedding/ollama-embedding.js.map +1 -1
- package/dist/embedding/openai-embedding.d.ts +2 -1
- package/dist/embedding/openai-embedding.d.ts.map +1 -1
- package/dist/embedding/openai-embedding.js +10 -10
- package/dist/embedding/openai-embedding.js.map +1 -1
- package/dist/embedding/voyageai-embedding.d.ts +2 -1
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -1
- package/dist/embedding/voyageai-embedding.js +23 -23
- package/dist/embedding/voyageai-embedding.js.map +1 -1
- package/dist/index.d.ts +4 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/splitter/ast-splitter.d.ts +1 -1
- package/dist/splitter/ast-splitter.d.ts.map +1 -1
- package/dist/splitter/ast-splitter.js +29 -15
- package/dist/splitter/ast-splitter.js.map +1 -1
- package/dist/splitter/index.d.ts +4 -4
- package/dist/splitter/index.d.ts.map +1 -1
- package/dist/splitter/index.js +1 -1
- package/dist/splitter/index.js.map +1 -1
- package/dist/splitter/langchain-splitter.d.ts +1 -1
- package/dist/splitter/langchain-splitter.d.ts.map +1 -1
- package/dist/splitter/langchain-splitter.js.map +1 -1
- package/dist/sync/merkle.d.ts.map +1 -1
- package/dist/sync/merkle.js +9 -9
- package/dist/sync/merkle.js.map +1 -1
- package/dist/sync/synchronizer.d.ts.map +1 -1
- package/dist/sync/synchronizer.js +15 -15
- package/dist/sync/synchronizer.js.map +1 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/env-manager.d.ts.map +1 -1
- package/dist/utils/env-manager.js +3 -3
- package/dist/utils/env-manager.js.map +1 -1
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js.map +1 -1
- package/dist/vectordb/base/base-vector-database.d.ts +1 -1
- package/dist/vectordb/base/base-vector-database.d.ts.map +1 -1
- package/dist/vectordb/base/base-vector-database.js.map +1 -1
- package/dist/vectordb/factory.d.ts +6 -6
- package/dist/vectordb/factory.d.ts.map +1 -1
- package/dist/vectordb/factory.js +1 -1
- package/dist/vectordb/factory.js.map +1 -1
- package/dist/vectordb/index.d.ts +9 -9
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js +8 -8
- package/dist/vectordb/index.js.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.d.ts +6 -5
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-restful-vectordb.js +136 -136
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -1
- package/dist/vectordb/milvus-vectordb.d.ts +5 -4
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -1
- package/dist/vectordb/milvus-vectordb.js +31 -31
- package/dist/vectordb/milvus-vectordb.js.map +1 -1
- package/dist/vectordb/qdrant-vectordb.d.ts +28 -3
- package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -1
- package/dist/vectordb/qdrant-vectordb.js +298 -73
- package/dist/vectordb/qdrant-vectordb.js.map +1 -1
- package/dist/vectordb/sparse/index.d.ts +2 -2
- package/dist/vectordb/sparse/index.d.ts.map +1 -1
- package/dist/vectordb/sparse/index.js +4 -4
- package/dist/vectordb/sparse/index.js.map +1 -1
- package/dist/vectordb/sparse/simple-bm25.d.ts +12 -2
- package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -1
- package/dist/vectordb/sparse/simple-bm25.js +82 -9
- package/dist/vectordb/sparse/simple-bm25.js.map +1 -1
- package/dist/vectordb/sparse/sparse-vector-generator.d.ts +7 -7
- package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -1
- package/dist/vectordb/sparse/types.d.ts.map +1 -1
- package/dist/vectordb/types.d.ts +12 -12
- package/dist/vectordb/types.d.ts.map +1 -1
- package/dist/vectordb/types.js +1 -1
- package/dist/vectordb/types.js.map +1 -1
- package/dist/vectordb/zilliz-utils.d.ts +10 -10
- package/dist/vectordb/zilliz-utils.d.ts.map +1 -1
- package/dist/vectordb/zilliz-utils.js +16 -17
- package/dist/vectordb/zilliz-utils.js.map +1 -1
- package/package.json +14 -13
package/dist/context.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
1
|
+
import type { Embedding } from './embedding';
|
|
2
|
+
import type { Splitter } from './splitter';
|
|
3
|
+
import type { SemanticSearchResult } from './types';
|
|
4
|
+
import type { VectorDatabase } from './vectordb';
|
|
5
5
|
import { FileSynchronizer } from './sync/synchronizer';
|
|
6
6
|
export interface ContextConfig {
|
|
7
7
|
embedding?: Embedding;
|
|
@@ -155,16 +155,16 @@ export declare class Context {
|
|
|
155
155
|
*/
|
|
156
156
|
private getCodeFiles;
|
|
157
157
|
/**
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
158
|
+
* Process a list of files with streaming chunk processing
|
|
159
|
+
* @param filePaths Array of file paths to process
|
|
160
|
+
* @param codebasePath Base path for the codebase
|
|
161
|
+
* @param onFileProcessed Callback called when each file is processed
|
|
162
|
+
* @returns Object with processed file count and total chunk count
|
|
163
|
+
*/
|
|
164
164
|
private processFileList;
|
|
165
165
|
/**
|
|
166
|
-
|
|
167
|
-
|
|
166
|
+
* Process accumulated chunk buffer
|
|
167
|
+
*/
|
|
168
168
|
private processChunkBuffer;
|
|
169
169
|
/**
|
|
170
170
|
* Process a batch of chunks
|
package/dist/context.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"context.d.ts","sourceRoot":"","sources":["../src/context.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EAEV,MAAM,aAAa,CAAA;AACpB,OAAO,KAAK,EAEV,QAAQ,EACT,MAAM,YAAY,CAAA;AACnB,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAA;AACnD,OAAO,KAAK,EAGV,cAAc,EAGf,MAAM,YAAY,CAAA;AAUnB,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AAqGtD,MAAM,WAAW,aAAa;IAC5B,SAAS,CAAC,EAAE,SAAS,CAAA;IACrB,cAAc,CAAC,EAAE,cAAc,CAAA;IAC/B,YAAY,CAAC,EAAE,QAAQ,CAAA;IACvB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC9B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oBAAoB,CAAC,EAAE,MAAM,EAAE,CAAA;CAChC;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,cAAc,CAAgB;IACtC,OAAO,CAAC,YAAY,CAAU;IAC9B,OAAO,CAAC,mBAAmB,CAAU;IACrC,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,aAAa,CAAsC;gBAE/C,MAAM,GAAE,aAAkB;IAkDtC;;OAEG;IACH,YAAY,IAAI,SAAS;IAIzB;;OAEG;IACH,iBAAiB,IAAI,cAAc;IAInC;;OAEG;IACH,eAAe,IAAI,QAAQ;IAI3B;;OAEG;IACH,sBAAsB,IAAI,MAAM,EAAE;IAIlC;;OAEG;IACH,iBAAiB,IAAI,MAAM,EAAE;IAI7B;;OAEG;IACH,gBAAgB,IAAI,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC;IAIjD;;OAEG;IACH,eAAe,CAAC,cAAc,EAAE,MAAM,EAAE,YAAY,EAAE,gBAAgB,GAAG,IAAI;IAI7E;;OAEG;IACG,uBAAuB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIlE;;OAEG;IACG,qBAAqB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIhE;;OAEG;IACH,OAAO,CAAC,WAAW;IAQnB;;OAEG;IACI,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM;IAQtD;;;;;;OAMG;IACG,aAAa,CACjB,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,EAC5G,YAAY,GAAE,OAAe,GAC5B,OAAO,CAAC;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,WAAW,GAAG,eAAe,CAAA;KAAE,CAAC;IA8D1F,eAAe,CACnB,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC3G,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;YAiFlD,gBAAgB;IAkB9B;;;;;;OAMG;IACG,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,EAAE,SAAS,GAAE,MAAY,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,EAAE,CAAC;IA2H1J;;;;OAIG;IACG,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKtD;;;;OAIG;IACG,UAAU,CACd,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAC3G,OAAO,CAAC,IAAI,CAAC;IA0BhB;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAUpD;;;OAGG;IACH,uBAAuB,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,IAAI;IAavD;;OAEG;IACH,6BAA6B,IAAI,IAAI;IAKrC;;;OAGG;IACH,eAAe,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI;IAK3C;;;OAGG;IACH,oBAAoB,CAAC,cAAc,EAAE,cAAc,GAAG,IAAI;IAK1D;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKxC;;OAEG;YACW,iBAAiB;IAmC/B;;OAEG;YACW,YAAY;IA8B1B;;;;;;OAMG;YACW,eAAe;IA4I7B;;OAEG;YACW,kBAAkB;IAiBhC;;OAEG;YACW,iBAAiB;IAuE/B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IA2BhC;;;;;;;OAOG;IACH,OAAO,CAAC,UAAU;IAMlB;;;;OAIG;WACU,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAc3E;;;;OAIG;YACW,kBAAkB;IA8BhC;;;;OAIG;YACW,eAAe;IAyB7B;;;OAGG;YACW,oBAAoB;IAYlC;;;;;OAKG;YACW,cAAc;IAwB5B;;;;;OAKG;IACH,OAAO,CAAC,oBAAoB;IAiB5B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAoBtB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAUvB;;;;OAIG;IACH,OAAO,CAAC,0BAA0B;IAqBlC;;;;OAIG;IACH,OAAO,CAAC,8BAA8B;IAoBtC;;;OAGG;IACH,mBAAmB,CAAC,gBAAgB,EAAE,MAAM,EAAE,GAAG,IAAI;IAgBrD;;OAEG;IACH,eAAe,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,kBAAkB,EAAE,OAAO,CAAC;QAAC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE;IAmB/F;;;OAGG;IACH,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAY9C;;;OAGG;IACH,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG;QAAE,QAAQ,EAAE,KAAK,GAAG,WAAW,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAqBpG"}
|
package/dist/context.js
CHANGED
|
@@ -34,20 +34,40 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.Context = void 0;
|
|
37
|
-
const
|
|
37
|
+
const crypto = __importStar(require("node:crypto"));
|
|
38
|
+
const fs = __importStar(require("node:fs"));
|
|
39
|
+
const path = __importStar(require("node:path"));
|
|
38
40
|
const embedding_1 = require("./embedding");
|
|
39
|
-
const
|
|
40
|
-
const env_manager_1 = require("./utils/env-manager");
|
|
41
|
-
const fs = __importStar(require("fs"));
|
|
42
|
-
const path = __importStar(require("path"));
|
|
43
|
-
const crypto = __importStar(require("crypto"));
|
|
41
|
+
const splitter_1 = require("./splitter");
|
|
44
42
|
const synchronizer_1 = require("./sync/synchronizer");
|
|
43
|
+
const env_manager_1 = require("./utils/env-manager");
|
|
44
|
+
const qdrant_vectordb_1 = require("./vectordb/qdrant-vectordb");
|
|
45
45
|
const DEFAULT_SUPPORTED_EXTENSIONS = [
|
|
46
46
|
// Programming languages
|
|
47
|
-
'.ts',
|
|
48
|
-
'.
|
|
47
|
+
'.ts',
|
|
48
|
+
'.tsx',
|
|
49
|
+
'.js',
|
|
50
|
+
'.jsx',
|
|
51
|
+
'.py',
|
|
52
|
+
'.java',
|
|
53
|
+
'.cpp',
|
|
54
|
+
'.c',
|
|
55
|
+
'.h',
|
|
56
|
+
'.hpp',
|
|
57
|
+
'.cs',
|
|
58
|
+
'.go',
|
|
59
|
+
'.rs',
|
|
60
|
+
'.php',
|
|
61
|
+
'.rb',
|
|
62
|
+
'.swift',
|
|
63
|
+
'.kt',
|
|
64
|
+
'.scala',
|
|
65
|
+
'.m',
|
|
66
|
+
'.mm',
|
|
49
67
|
// Text and markup files
|
|
50
|
-
'.md',
|
|
68
|
+
'.md',
|
|
69
|
+
'.markdown',
|
|
70
|
+
'.ipynb',
|
|
51
71
|
// '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm',
|
|
52
72
|
// '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env'
|
|
53
73
|
];
|
|
@@ -93,9 +113,23 @@ const DEFAULT_IGNORE_PATTERNS = [
|
|
|
93
113
|
'*.polyfills.js',
|
|
94
114
|
'*.runtime.js',
|
|
95
115
|
'*.map', // source map files
|
|
96
|
-
'node_modules',
|
|
97
|
-
'
|
|
98
|
-
'
|
|
116
|
+
'node_modules',
|
|
117
|
+
'.git',
|
|
118
|
+
'.svn',
|
|
119
|
+
'.hg',
|
|
120
|
+
'build',
|
|
121
|
+
'dist',
|
|
122
|
+
'out',
|
|
123
|
+
'target',
|
|
124
|
+
'.vscode',
|
|
125
|
+
'.idea',
|
|
126
|
+
'__pycache__',
|
|
127
|
+
'.pytest_cache',
|
|
128
|
+
'coverage',
|
|
129
|
+
'.nyc_output',
|
|
130
|
+
'logs',
|
|
131
|
+
'tmp',
|
|
132
|
+
'temp',
|
|
99
133
|
];
|
|
100
134
|
class Context {
|
|
101
135
|
constructor(config = {}) {
|
|
@@ -104,7 +138,7 @@ class Context {
|
|
|
104
138
|
this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
|
|
105
139
|
apiKey: env_manager_1.envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
|
|
106
140
|
model: 'text-embedding-3-small',
|
|
107
|
-
...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') })
|
|
141
|
+
...(env_manager_1.envManager.get('OPENAI_BASE_URL') && { baseURL: env_manager_1.envManager.get('OPENAI_BASE_URL') }),
|
|
108
142
|
});
|
|
109
143
|
if (!config.vectorDatabase) {
|
|
110
144
|
throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.');
|
|
@@ -118,18 +152,18 @@ class Context {
|
|
|
118
152
|
...DEFAULT_SUPPORTED_EXTENSIONS,
|
|
119
153
|
...(config.supportedExtensions || []),
|
|
120
154
|
...(config.customExtensions || []),
|
|
121
|
-
...envCustomExtensions
|
|
155
|
+
...envCustomExtensions,
|
|
122
156
|
];
|
|
123
157
|
// Remove duplicates
|
|
124
158
|
this.supportedExtensions = [...new Set(allSupportedExtensions)];
|
|
125
|
-
// Load custom ignore patterns from environment variables
|
|
159
|
+
// Load custom ignore patterns from environment variables
|
|
126
160
|
const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
|
|
127
161
|
// Start with default ignore patterns
|
|
128
162
|
const allIgnorePatterns = [
|
|
129
163
|
...DEFAULT_IGNORE_PATTERNS,
|
|
130
164
|
...(config.ignorePatterns || []),
|
|
131
165
|
...(config.customIgnorePatterns || []),
|
|
132
|
-
...envCustomIgnorePatterns
|
|
166
|
+
...envCustomIgnorePatterns,
|
|
133
167
|
];
|
|
134
168
|
// Remove duplicates
|
|
135
169
|
this.ignorePatterns = [...new Set(allIgnorePatterns)];
|
|
@@ -253,7 +287,7 @@ class Context {
|
|
|
253
287
|
phase: `Processing files (${fileIndex}/${totalFiles})...`,
|
|
254
288
|
current: fileIndex,
|
|
255
289
|
total: totalFiles,
|
|
256
|
-
percentage: Math.round(progressPercentage)
|
|
290
|
+
percentage: Math.round(progressPercentage),
|
|
257
291
|
});
|
|
258
292
|
});
|
|
259
293
|
console.log(`[Context] ✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`);
|
|
@@ -261,12 +295,12 @@ class Context {
|
|
|
261
295
|
phase: 'Indexing complete!',
|
|
262
296
|
current: result.processedFiles,
|
|
263
297
|
total: codeFiles.length,
|
|
264
|
-
percentage: 100
|
|
298
|
+
percentage: 100,
|
|
265
299
|
});
|
|
266
300
|
return {
|
|
267
301
|
indexedFiles: result.processedFiles,
|
|
268
302
|
totalChunks: result.totalChunks,
|
|
269
|
-
status: result.status
|
|
303
|
+
status: result.status,
|
|
270
304
|
};
|
|
271
305
|
}
|
|
272
306
|
async reindexByChange(codebasePath, progressCallback) {
|
|
@@ -321,7 +355,7 @@ class Context {
|
|
|
321
355
|
updateProgress(`Deleted old chunks for ${file}`);
|
|
322
356
|
}
|
|
323
357
|
// Handle added and modified files
|
|
324
|
-
const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f));
|
|
358
|
+
const filesToIndex = [...added, ...modified].map((f) => path.join(codebasePath, f));
|
|
325
359
|
if (filesToIndex.length > 0) {
|
|
326
360
|
await this.processFileList(filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => {
|
|
327
361
|
updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`);
|
|
@@ -336,7 +370,7 @@ class Context {
|
|
|
336
370
|
const escapedPath = relativePath.replace(/\\/g, '\\\\');
|
|
337
371
|
const results = await this.vectorDatabase.query(collectionName, `relativePath == "${escapedPath}"`, ['id']);
|
|
338
372
|
if (results.length > 0) {
|
|
339
|
-
const ids = results.map(r => r.id).filter(id => id);
|
|
373
|
+
const ids = results.map((r) => r.id).filter((id) => id);
|
|
340
374
|
if (ids.length > 0) {
|
|
341
375
|
await this.vectorDatabase.delete(collectionName, ids);
|
|
342
376
|
console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`);
|
|
@@ -371,6 +405,17 @@ class Context {
|
|
|
371
405
|
catch (error) {
|
|
372
406
|
console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error);
|
|
373
407
|
}
|
|
408
|
+
// Load BM25 model if using Qdrant and model is not yet trained
|
|
409
|
+
if (this.vectorDatabase instanceof qdrant_vectordb_1.QdrantVectorDatabase) {
|
|
410
|
+
const bm25Generator = this.vectorDatabase.getBM25Generator();
|
|
411
|
+
if (!bm25Generator.isTrained()) {
|
|
412
|
+
console.log('[Context] 📂 BM25 model not loaded, attempting to load from disk...');
|
|
413
|
+
const loaded = await this.vectorDatabase.loadBM25Model(collectionName);
|
|
414
|
+
if (!loaded) {
|
|
415
|
+
console.warn('[Context] ⚠️ Failed to load BM25 model. Hybrid search quality may be degraded.');
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
374
419
|
// 1. Generate query vector
|
|
375
420
|
console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`);
|
|
376
421
|
const queryEmbedding = await this.embedding.embed(query);
|
|
@@ -380,16 +425,16 @@ class Context {
|
|
|
380
425
|
const searchRequests = [
|
|
381
426
|
{
|
|
382
427
|
data: queryEmbedding.vector,
|
|
383
|
-
anns_field:
|
|
384
|
-
param: {
|
|
385
|
-
limit: topK
|
|
428
|
+
anns_field: 'vector',
|
|
429
|
+
param: { nprobe: 10 },
|
|
430
|
+
limit: topK,
|
|
386
431
|
},
|
|
387
432
|
{
|
|
388
433
|
data: query,
|
|
389
|
-
anns_field:
|
|
390
|
-
param: {
|
|
391
|
-
limit: topK
|
|
392
|
-
}
|
|
434
|
+
anns_field: 'sparse_vector',
|
|
435
|
+
param: { drop_ratio_search: 0.2 },
|
|
436
|
+
limit: topK,
|
|
437
|
+
},
|
|
393
438
|
];
|
|
394
439
|
console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`);
|
|
395
440
|
console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`);
|
|
@@ -398,20 +443,20 @@ class Context {
|
|
|
398
443
|
const searchResults = await this.vectorDatabase.hybridSearch(collectionName, searchRequests, {
|
|
399
444
|
rerank: {
|
|
400
445
|
strategy: 'rrf',
|
|
401
|
-
params: { k: 100 }
|
|
446
|
+
params: { k: 100 },
|
|
402
447
|
},
|
|
403
448
|
limit: topK,
|
|
404
|
-
filterExpr
|
|
449
|
+
filterExpr,
|
|
405
450
|
});
|
|
406
451
|
console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`);
|
|
407
452
|
// 4. Convert to semantic search result format
|
|
408
|
-
const results = searchResults.map(result => ({
|
|
453
|
+
const results = searchResults.map((result) => ({
|
|
409
454
|
content: result.document.content,
|
|
410
455
|
relativePath: result.document.relativePath,
|
|
411
456
|
startLine: result.document.startLine,
|
|
412
457
|
endLine: result.document.endLine,
|
|
413
458
|
language: result.document.metadata.language || 'unknown',
|
|
414
|
-
score: result.score
|
|
459
|
+
score: result.score,
|
|
415
460
|
}));
|
|
416
461
|
console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
|
|
417
462
|
if (results.length > 0) {
|
|
@@ -426,13 +471,13 @@ class Context {
|
|
|
426
471
|
// 2. Search in vector database
|
|
427
472
|
const searchResults = await this.vectorDatabase.search(collectionName, queryEmbedding.vector, { topK, threshold, filterExpr });
|
|
428
473
|
// 3. Convert to semantic search result format
|
|
429
|
-
const results = searchResults.map(result => ({
|
|
474
|
+
const results = searchResults.map((result) => ({
|
|
430
475
|
content: result.document.content,
|
|
431
476
|
relativePath: result.document.relativePath,
|
|
432
477
|
startLine: result.document.startLine,
|
|
433
478
|
endLine: result.document.endLine,
|
|
434
479
|
language: result.document.metadata.language || 'unknown',
|
|
435
|
-
score: result.score
|
|
480
|
+
score: result.score,
|
|
436
481
|
}));
|
|
437
482
|
console.log(`[Context] ✅ Found ${results.length} relevant results`);
|
|
438
483
|
return results;
|
|
@@ -461,6 +506,10 @@ class Context {
|
|
|
461
506
|
if (collectionExists) {
|
|
462
507
|
await this.vectorDatabase.dropCollection(collectionName);
|
|
463
508
|
}
|
|
509
|
+
// Delete BM25 model if using Qdrant
|
|
510
|
+
if (this.vectorDatabase instanceof qdrant_vectordb_1.QdrantVectorDatabase) {
|
|
511
|
+
await this.vectorDatabase.deleteBM25Model(collectionName);
|
|
512
|
+
}
|
|
464
513
|
// Delete snapshot file
|
|
465
514
|
await synchronizer_1.FileSynchronizer.deleteSnapshot(codebasePath);
|
|
466
515
|
progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 });
|
|
@@ -475,7 +524,7 @@ class Context {
|
|
|
475
524
|
const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
|
|
476
525
|
const uniquePatterns = [];
|
|
477
526
|
const patternSet = new Set(mergedPatterns);
|
|
478
|
-
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
|
527
|
+
patternSet.forEach((pattern) => uniquePatterns.push(pattern));
|
|
479
528
|
this.ignorePatterns = uniquePatterns;
|
|
480
529
|
console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
|
|
481
530
|
}
|
|
@@ -490,7 +539,7 @@ class Context {
|
|
|
490
539
|
const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
|
|
491
540
|
const uniquePatterns = [];
|
|
492
541
|
const patternSet = new Set(mergedPatterns);
|
|
493
|
-
patternSet.forEach(pattern => uniquePatterns.push(pattern));
|
|
542
|
+
patternSet.forEach((pattern) => uniquePatterns.push(pattern));
|
|
494
543
|
this.ignorePatterns = uniquePatterns;
|
|
495
544
|
console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
|
|
496
545
|
}
|
|
@@ -584,15 +633,15 @@ class Context {
|
|
|
584
633
|
return files;
|
|
585
634
|
}
|
|
586
635
|
/**
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
636
|
+
* Process a list of files with streaming chunk processing
|
|
637
|
+
* @param filePaths Array of file paths to process
|
|
638
|
+
* @param codebasePath Base path for the codebase
|
|
639
|
+
* @param onFileProcessed Callback called when each file is processed
|
|
640
|
+
* @returns Object with processed file count and total chunk count
|
|
641
|
+
*/
|
|
593
642
|
async processFileList(filePaths, codebasePath, onFileProcessed) {
|
|
594
643
|
const isHybrid = this.getIsHybrid();
|
|
595
|
-
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
|
644
|
+
const EMBEDDING_BATCH_SIZE = Math.max(1, Number.parseInt(env_manager_1.envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
|
|
596
645
|
const CHUNK_LIMIT = 450000;
|
|
597
646
|
console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
|
|
598
647
|
// For Qdrant hybrid search, we need to train BM25 on the full corpus first
|
|
@@ -662,12 +711,15 @@ class Context {
|
|
|
662
711
|
if (needsBM25Training && allChunks.length > 0) {
|
|
663
712
|
console.log(`[Context] 🎓 Training BM25 on ${allChunks.length} chunks for Qdrant hybrid search...`);
|
|
664
713
|
// Extract corpus texts for BM25 training
|
|
665
|
-
const corpus = allChunks.map(item => item.chunk.content);
|
|
714
|
+
const corpus = allChunks.map((item) => item.chunk.content);
|
|
666
715
|
// Get BM25 generator and train it
|
|
667
716
|
if (this.vectorDatabase instanceof qdrant_vectordb_1.QdrantVectorDatabase) {
|
|
717
|
+
const collectionName = this.getCollectionName(codebasePath);
|
|
668
718
|
const bm25Generator = this.vectorDatabase.getBM25Generator();
|
|
669
719
|
bm25Generator.learn(corpus);
|
|
670
720
|
console.log(`[Context] ✅ BM25 training completed on ${corpus.length} documents`);
|
|
721
|
+
// Save BM25 model to disk for future use
|
|
722
|
+
await this.vectorDatabase.saveBM25Model(collectionName);
|
|
671
723
|
}
|
|
672
724
|
// Now process all chunks in batches
|
|
673
725
|
console.log(`[Context] 📝 Processing ${allChunks.length} chunks in batches of ${EMBEDDING_BATCH_SIZE}...`);
|
|
@@ -702,17 +754,17 @@ class Context {
|
|
|
702
754
|
return {
|
|
703
755
|
processedFiles,
|
|
704
756
|
totalChunks,
|
|
705
|
-
status: limitReached ? 'limit_reached' : 'completed'
|
|
757
|
+
status: limitReached ? 'limit_reached' : 'completed',
|
|
706
758
|
};
|
|
707
759
|
}
|
|
708
760
|
/**
|
|
709
|
-
|
|
710
|
-
|
|
761
|
+
* Process accumulated chunk buffer
|
|
762
|
+
*/
|
|
711
763
|
async processChunkBuffer(chunkBuffer) {
|
|
712
764
|
if (chunkBuffer.length === 0)
|
|
713
765
|
return;
|
|
714
766
|
// Extract chunks and ensure they all have the same codebasePath
|
|
715
|
-
const chunks = chunkBuffer.map(item => item.chunk);
|
|
767
|
+
const chunks = chunkBuffer.map((item) => item.chunk);
|
|
716
768
|
const codebasePath = chunkBuffer[0].codebasePath;
|
|
717
769
|
// Estimate tokens (rough estimation: 1 token ≈ 4 characters)
|
|
718
770
|
const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0);
|
|
@@ -727,7 +779,7 @@ class Context {
|
|
|
727
779
|
async processChunkBatch(chunks, codebasePath) {
|
|
728
780
|
const isHybrid = this.getIsHybrid();
|
|
729
781
|
// Generate embedding vectors
|
|
730
|
-
const chunkContents = chunks.map(chunk => chunk.content);
|
|
782
|
+
const chunkContents = chunks.map((chunk) => chunk.content);
|
|
731
783
|
const embeddings = await this.embedding.embedBatch(chunkContents);
|
|
732
784
|
if (isHybrid === true) {
|
|
733
785
|
// Create hybrid vector documents
|
|
@@ -750,8 +802,8 @@ class Context {
|
|
|
750
802
|
...restMetadata,
|
|
751
803
|
codebasePath,
|
|
752
804
|
language: chunk.metadata.language || 'unknown',
|
|
753
|
-
chunkIndex: index
|
|
754
|
-
}
|
|
805
|
+
chunkIndex: index,
|
|
806
|
+
},
|
|
755
807
|
};
|
|
756
808
|
});
|
|
757
809
|
// Store to vector database
|
|
@@ -778,8 +830,8 @@ class Context {
|
|
|
778
830
|
...restMetadata,
|
|
779
831
|
codebasePath,
|
|
780
832
|
language: chunk.metadata.language || 'unknown',
|
|
781
|
-
chunkIndex: index
|
|
782
|
-
}
|
|
833
|
+
chunkIndex: index,
|
|
834
|
+
},
|
|
783
835
|
};
|
|
784
836
|
});
|
|
785
837
|
// Store to vector database
|
|
@@ -811,7 +863,7 @@ class Context {
|
|
|
811
863
|
'.scala': 'scala',
|
|
812
864
|
'.m': 'objective-c',
|
|
813
865
|
'.mm': 'objective-c',
|
|
814
|
-
'.ipynb': 'jupyter'
|
|
866
|
+
'.ipynb': 'jupyter',
|
|
815
867
|
};
|
|
816
868
|
return languageMap[ext] || 'text';
|
|
817
869
|
}
|
|
@@ -838,8 +890,8 @@ class Context {
|
|
|
838
890
|
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
839
891
|
return content
|
|
840
892
|
.split('\n')
|
|
841
|
-
.map(line => line.trim())
|
|
842
|
-
.filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments
|
|
893
|
+
.map((line) => line.trim())
|
|
894
|
+
.filter((line) => line && !line.startsWith('#')); // Filter out empty lines and comments
|
|
843
895
|
}
|
|
844
896
|
catch (error) {
|
|
845
897
|
console.warn(`[Context] ⚠️ Could not read ignore file ${filePath}: ${error}`);
|
|
@@ -853,7 +905,7 @@ class Context {
|
|
|
853
905
|
*/
|
|
854
906
|
async loadIgnorePatterns(codebasePath) {
|
|
855
907
|
try {
|
|
856
|
-
|
|
908
|
+
const fileBasedPatterns = [];
|
|
857
909
|
// Load all .xxxignore files in codebase directory
|
|
858
910
|
const ignoreFiles = await this.findIgnoreFiles(codebasePath);
|
|
859
911
|
for (const ignoreFile of ignoreFiles) {
|
|
@@ -887,14 +939,14 @@ class Context {
|
|
|
887
939
|
const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true });
|
|
888
940
|
const ignoreFiles = [];
|
|
889
941
|
for (const entry of entries) {
|
|
890
|
-
if (entry.isFile()
|
|
891
|
-
entry.name.startsWith('.')
|
|
892
|
-
entry.name.endsWith('ignore')) {
|
|
942
|
+
if (entry.isFile()
|
|
943
|
+
&& entry.name.startsWith('.')
|
|
944
|
+
&& entry.name.endsWith('ignore')) {
|
|
893
945
|
ignoreFiles.push(path.join(codebasePath, entry.name));
|
|
894
946
|
}
|
|
895
947
|
}
|
|
896
948
|
if (ignoreFiles.length > 0) {
|
|
897
|
-
console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`);
|
|
949
|
+
console.log(`📄 Found ignore files: ${ignoreFiles.map((f) => path.basename(f)).join(', ')}`);
|
|
898
950
|
}
|
|
899
951
|
return ignoreFiles;
|
|
900
952
|
}
|
|
@@ -909,7 +961,7 @@ class Context {
|
|
|
909
961
|
*/
|
|
910
962
|
async loadGlobalIgnoreFile() {
|
|
911
963
|
try {
|
|
912
|
-
const homeDir = require('os').homedir();
|
|
964
|
+
const homeDir = require('node:os').homedir();
|
|
913
965
|
const globalIgnorePath = path.join(homeDir, '.context', '.contextignore');
|
|
914
966
|
return await this.loadIgnoreFile(globalIgnorePath, 'global .contextignore');
|
|
915
967
|
}
|
|
@@ -975,7 +1027,7 @@ class Context {
|
|
|
975
1027
|
if (pattern.endsWith('/')) {
|
|
976
1028
|
const dirPattern = pattern.slice(0, -1);
|
|
977
1029
|
const pathParts = filePath.split('/');
|
|
978
|
-
return pathParts.some(part => this.simpleGlobMatch(part, dirPattern));
|
|
1030
|
+
return pathParts.some((part) => this.simpleGlobMatch(part, dirPattern));
|
|
979
1031
|
}
|
|
980
1032
|
// Handle file patterns
|
|
981
1033
|
if (pattern.includes('/')) {
|
|
@@ -1015,9 +1067,9 @@ class Context {
|
|
|
1015
1067
|
try {
|
|
1016
1068
|
const extensions = envExtensions
|
|
1017
1069
|
.split(',')
|
|
1018
|
-
.map(ext => ext.trim())
|
|
1019
|
-
.filter(ext => ext.length > 0)
|
|
1020
|
-
.map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
|
|
1070
|
+
.map((ext) => ext.trim())
|
|
1071
|
+
.filter((ext) => ext.length > 0)
|
|
1072
|
+
.map((ext) => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot
|
|
1021
1073
|
return extensions;
|
|
1022
1074
|
}
|
|
1023
1075
|
catch (error) {
|
|
@@ -1038,8 +1090,8 @@ class Context {
|
|
|
1038
1090
|
try {
|
|
1039
1091
|
const patterns = envIgnorePatterns
|
|
1040
1092
|
.split(',')
|
|
1041
|
-
.map(pattern => pattern.trim())
|
|
1042
|
-
.filter(pattern => pattern.length > 0);
|
|
1093
|
+
.map((pattern) => pattern.trim())
|
|
1094
|
+
.filter((pattern) => pattern.length > 0);
|
|
1043
1095
|
return patterns;
|
|
1044
1096
|
}
|
|
1045
1097
|
catch (error) {
|
|
@@ -1055,7 +1107,7 @@ class Context {
|
|
|
1055
1107
|
if (customExtensions.length === 0)
|
|
1056
1108
|
return;
|
|
1057
1109
|
// Ensure extensions start with dot
|
|
1058
|
-
const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}`);
|
|
1110
|
+
const normalizedExtensions = customExtensions.map((ext) => ext.startsWith('.') ? ext : `.${ext}`);
|
|
1059
1111
|
// Merge current extensions with new custom extensions, avoiding duplicates
|
|
1060
1112
|
const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions];
|
|
1061
1113
|
const uniqueExtensions = [...new Set(mergedExtensions)];
|
|
@@ -1072,13 +1124,13 @@ class Context {
|
|
|
1072
1124
|
return {
|
|
1073
1125
|
type: 'ast',
|
|
1074
1126
|
hasBuiltinFallback: true,
|
|
1075
|
-
supportedLanguages: AstCodeSplitter.getSupportedLanguages()
|
|
1127
|
+
supportedLanguages: AstCodeSplitter.getSupportedLanguages(),
|
|
1076
1128
|
};
|
|
1077
1129
|
}
|
|
1078
1130
|
else {
|
|
1079
1131
|
return {
|
|
1080
1132
|
type: 'langchain',
|
|
1081
|
-
hasBuiltinFallback: false
|
|
1133
|
+
hasBuiltinFallback: false,
|
|
1082
1134
|
};
|
|
1083
1135
|
}
|
|
1084
1136
|
}
|
|
@@ -1108,13 +1160,13 @@ class Context {
|
|
|
1108
1160
|
strategy: isSupported ? 'ast' : 'langchain',
|
|
1109
1161
|
reason: isSupported
|
|
1110
1162
|
? 'Language supported by AST parser'
|
|
1111
|
-
: 'Language not supported by AST, will fallback to LangChain'
|
|
1163
|
+
: 'Language not supported by AST, will fallback to LangChain',
|
|
1112
1164
|
};
|
|
1113
1165
|
}
|
|
1114
1166
|
else {
|
|
1115
1167
|
return {
|
|
1116
1168
|
strategy: 'langchain',
|
|
1117
|
-
reason: 'Using LangChain splitter directly'
|
|
1169
|
+
reason: 'Using LangChain splitter directly',
|
|
1118
1170
|
};
|
|
1119
1171
|
}
|
|
1120
1172
|
}
|