@softerist/heuristic-mcp 2.1.47 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -75
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import fsSync from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
|
|
6
|
+
const MAGIC_VECTORS = 'HMCV';
|
|
7
|
+
const MAGIC_RECORDS = 'HMCR';
|
|
8
|
+
const MAGIC_CONTENT = 'HMCC';
|
|
9
|
+
const STORE_VERSION = 1;
|
|
10
|
+
|
|
11
|
+
const VECTOR_HEADER_SIZE = 20;
|
|
12
|
+
const RECORD_HEADER_SIZE = 20;
|
|
13
|
+
const CONTENT_HEADER_SIZE = 20;
|
|
14
|
+
const RECORD_SIZE = 32;
|
|
15
|
+
|
|
16
|
+
const VECTORS_FILE = 'vectors.bin';
|
|
17
|
+
const RECORDS_FILE = 'records.bin';
|
|
18
|
+
const CONTENT_FILE = 'content.bin';
|
|
19
|
+
const FILES_FILE = 'files.json';
|
|
20
|
+
|
|
21
|
+
function writeMagic(buffer, magic) {
|
|
22
|
+
buffer.write(magic, 0, 'ascii');
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function readMagic(buffer) {
|
|
26
|
+
return buffer.toString('ascii', 0, 4);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function ensureLittleEndian() {
|
|
30
|
+
if (os.endianness() !== 'LE') {
|
|
31
|
+
throw new Error('Binary vector store requires little-endian architecture');
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function getDataView(buffer) {
|
|
36
|
+
return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function readHeader(buffer, magic, headerSize) {
|
|
40
|
+
if (buffer.length < headerSize) {
|
|
41
|
+
throw new Error('Binary store header is truncated');
|
|
42
|
+
}
|
|
43
|
+
const actualMagic = readMagic(buffer);
|
|
44
|
+
if (actualMagic !== magic) {
|
|
45
|
+
throw new Error(`Invalid binary store magic (${actualMagic})`);
|
|
46
|
+
}
|
|
47
|
+
const view = getDataView(buffer);
|
|
48
|
+
const version = view.getUint32(4, true);
|
|
49
|
+
if (version !== STORE_VERSION) {
|
|
50
|
+
throw new Error(`Unsupported binary store version (${version})`);
|
|
51
|
+
}
|
|
52
|
+
return view;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function writeVectorsHeader(buffer, dim, count) {
|
|
56
|
+
writeMagic(buffer, MAGIC_VECTORS);
|
|
57
|
+
const view = getDataView(buffer);
|
|
58
|
+
view.setUint32(4, STORE_VERSION, true);
|
|
59
|
+
view.setUint32(8, dim, true);
|
|
60
|
+
view.setUint32(12, count, true);
|
|
61
|
+
view.setUint32(16, 0, true);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function writeRecordsHeader(buffer, count, fileCount) {
|
|
65
|
+
writeMagic(buffer, MAGIC_RECORDS);
|
|
66
|
+
const view = getDataView(buffer);
|
|
67
|
+
view.setUint32(4, STORE_VERSION, true);
|
|
68
|
+
view.setUint32(8, count, true);
|
|
69
|
+
view.setUint32(12, fileCount, true);
|
|
70
|
+
view.setUint32(16, 0, true);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function writeContentHeader(buffer, totalBytes) {
|
|
74
|
+
writeMagic(buffer, MAGIC_CONTENT);
|
|
75
|
+
const view = getDataView(buffer);
|
|
76
|
+
view.setUint32(4, STORE_VERSION, true);
|
|
77
|
+
const value = BigInt(totalBytes);
|
|
78
|
+
view.setBigUint64(8, value, true);
|
|
79
|
+
view.setUint32(16, 0, true);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function readBigUint(view, offset) {
|
|
83
|
+
const value = view.getBigUint64(offset, true);
|
|
84
|
+
if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
|
|
85
|
+
throw new Error('Binary store content offset exceeds safe integer range');
|
|
86
|
+
}
|
|
87
|
+
return Number(value);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function normalizeContent(value) {
|
|
91
|
+
if (value === null || value === undefined) return '';
|
|
92
|
+
if (typeof value !== 'string') return String(value);
|
|
93
|
+
return value;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export class BinaryVectorStore {
|
|
97
|
+
constructor({
|
|
98
|
+
vectorsBuffer,
|
|
99
|
+
recordsBuffer,
|
|
100
|
+
vectorsHandle,
|
|
101
|
+
vectorsFd,
|
|
102
|
+
contentHandle,
|
|
103
|
+
contentBuffer,
|
|
104
|
+
contentSize,
|
|
105
|
+
files,
|
|
106
|
+
dim,
|
|
107
|
+
count,
|
|
108
|
+
contentCacheEntries,
|
|
109
|
+
vectorCacheEntries,
|
|
110
|
+
}) {
|
|
111
|
+
this.vectorsBuffer = vectorsBuffer;
|
|
112
|
+
this.recordsBuffer = recordsBuffer;
|
|
113
|
+
this.vectorsHandle = vectorsHandle ?? null;
|
|
114
|
+
this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
|
|
115
|
+
this.contentHandle = contentHandle ?? null;
|
|
116
|
+
this.contentBuffer = contentBuffer ?? null;
|
|
117
|
+
this.contentSize = Number.isFinite(contentSize)
|
|
118
|
+
? contentSize
|
|
119
|
+
: contentBuffer
|
|
120
|
+
? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
|
|
121
|
+
: 0;
|
|
122
|
+
this.files = files;
|
|
123
|
+
this.dim = dim;
|
|
124
|
+
this.count = count;
|
|
125
|
+
this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
|
|
126
|
+
this.contentCache = new Map();
|
|
127
|
+
this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
|
|
128
|
+
this.vectorCache = new Map();
|
|
129
|
+
|
|
130
|
+
this.vectorDataOffset = VECTOR_HEADER_SIZE;
|
|
131
|
+
this.recordDataOffset = RECORD_HEADER_SIZE;
|
|
132
|
+
this.contentDataOffset = CONTENT_HEADER_SIZE;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async close() {
|
|
136
|
+
this.contentCache.clear();
|
|
137
|
+
this.vectorCache.clear();
|
|
138
|
+
this.vectorsBuffer = null;
|
|
139
|
+
this.recordsBuffer = null;
|
|
140
|
+
this.contentBuffer = null;
|
|
141
|
+
this.files = null;
|
|
142
|
+
if (this.vectorsHandle) {
|
|
143
|
+
try {
|
|
144
|
+
await this.vectorsHandle.close();
|
|
145
|
+
} catch {
|
|
146
|
+
// ignore close errors
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
this.vectorsHandle = null;
|
|
150
|
+
if (Number.isInteger(this.vectorsFd)) {
|
|
151
|
+
try {
|
|
152
|
+
fsSync.closeSync(this.vectorsFd);
|
|
153
|
+
} catch {
|
|
154
|
+
// ignore close errors
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
this.vectorsFd = null;
|
|
158
|
+
if (this.contentHandle) {
|
|
159
|
+
try {
|
|
160
|
+
await this.contentHandle.close();
|
|
161
|
+
} catch {
|
|
162
|
+
// ignore close errors
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
this.contentHandle = null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
static getPaths(cacheDir) {
|
|
169
|
+
return {
|
|
170
|
+
vectorsPath: path.join(cacheDir, VECTORS_FILE),
|
|
171
|
+
recordsPath: path.join(cacheDir, RECORDS_FILE),
|
|
172
|
+
contentPath: path.join(cacheDir, CONTENT_FILE),
|
|
173
|
+
filesPath: path.join(cacheDir, FILES_FILE),
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
static async load(cacheDir, { contentCacheEntries, vectorCacheEntries, vectorLoadMode } = {}) {
|
|
178
|
+
ensureLittleEndian();
|
|
179
|
+
const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(cacheDir);
|
|
180
|
+
|
|
181
|
+
let contentReadHandle = null;
|
|
182
|
+
let vectorsFd = null;
|
|
183
|
+
|
|
184
|
+
try {
|
|
185
|
+
const loadVectorsFromDisk = String(vectorLoadMode).toLowerCase() === 'disk';
|
|
186
|
+
let vectorsBuffer = null;
|
|
187
|
+
|
|
188
|
+
const [recordsBuffer, filesRaw] = await Promise.all([
|
|
189
|
+
fs.readFile(recordsPath),
|
|
190
|
+
fs.readFile(filesPath, 'utf-8'),
|
|
191
|
+
]);
|
|
192
|
+
|
|
193
|
+
if (loadVectorsFromDisk) {
|
|
194
|
+
vectorsFd = fsSync.openSync(vectorsPath, 'r');
|
|
195
|
+
const headerBuffer = Buffer.alloc(VECTOR_HEADER_SIZE);
|
|
196
|
+
const bytesRead = fsSync.readSync(vectorsFd, headerBuffer, 0, VECTOR_HEADER_SIZE, 0);
|
|
197
|
+
if (bytesRead < VECTOR_HEADER_SIZE) {
|
|
198
|
+
throw new Error('Binary store vectors header is truncated');
|
|
199
|
+
}
|
|
200
|
+
vectorsBuffer = headerBuffer;
|
|
201
|
+
} else {
|
|
202
|
+
vectorsBuffer = await fs.readFile(vectorsPath);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
|
|
206
|
+
const dim = vectorsView.getUint32(8, true);
|
|
207
|
+
const count = vectorsView.getUint32(12, true);
|
|
208
|
+
|
|
209
|
+
const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
|
|
210
|
+
const recordCount = recordsView.getUint32(8, true);
|
|
211
|
+
const fileCount = recordsView.getUint32(12, true);
|
|
212
|
+
|
|
213
|
+
if (recordCount !== count) {
|
|
214
|
+
throw new Error(`Binary store count mismatch (${recordCount} != ${count})`);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
contentReadHandle = await fs.open(contentPath, 'r');
|
|
218
|
+
let totalContentBytes = 0;
|
|
219
|
+
|
|
220
|
+
const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
|
|
221
|
+
const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
|
|
222
|
+
if (bytesRead < CONTENT_HEADER_SIZE) {
|
|
223
|
+
throw new Error('Binary store content header is truncated');
|
|
224
|
+
}
|
|
225
|
+
const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
|
|
226
|
+
totalContentBytes = readBigUint(contentView, 8);
|
|
227
|
+
const stats = await contentReadHandle.stat();
|
|
228
|
+
const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
|
|
229
|
+
if (stats.size < expectedContentSize) {
|
|
230
|
+
throw new Error('Binary store content file truncated');
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const files = JSON.parse(filesRaw);
|
|
234
|
+
if (!Array.isArray(files) || files.length !== fileCount) {
|
|
235
|
+
throw new Error('Binary store file table is invalid');
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return new BinaryVectorStore({
|
|
239
|
+
vectorsBuffer,
|
|
240
|
+
recordsBuffer,
|
|
241
|
+
vectorsHandle: null,
|
|
242
|
+
vectorsFd,
|
|
243
|
+
contentHandle: contentReadHandle,
|
|
244
|
+
contentSize: totalContentBytes,
|
|
245
|
+
files,
|
|
246
|
+
dim,
|
|
247
|
+
count,
|
|
248
|
+
contentCacheEntries,
|
|
249
|
+
vectorCacheEntries,
|
|
250
|
+
});
|
|
251
|
+
} catch (err) {
|
|
252
|
+
if (contentReadHandle) await contentReadHandle.close().catch(() => {});
|
|
253
|
+
if (Number.isInteger(vectorsFd)) {
|
|
254
|
+
try {
|
|
255
|
+
fsSync.closeSync(vectorsFd);
|
|
256
|
+
} catch {
|
|
257
|
+
// ignore close errors
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
throw err;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
get length() {
|
|
265
|
+
return this.count;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
getRecord(index) {
|
|
269
|
+
if (index < 0 || index >= this.count) return null;
|
|
270
|
+
const offset = this.recordDataOffset + index * RECORD_SIZE;
|
|
271
|
+
const view = getDataView(this.recordsBuffer);
|
|
272
|
+
|
|
273
|
+
const fileId = view.getUint32(offset, true);
|
|
274
|
+
const startLine = view.getUint32(offset + 4, true);
|
|
275
|
+
const endLine = view.getUint32(offset + 8, true);
|
|
276
|
+
const contentOffset = readBigUint(view, offset + 12);
|
|
277
|
+
const contentLength = view.getUint32(offset + 20, true);
|
|
278
|
+
|
|
279
|
+
return {
|
|
280
|
+
fileId,
|
|
281
|
+
file: this.files[fileId],
|
|
282
|
+
startLine,
|
|
283
|
+
endLine,
|
|
284
|
+
contentOffset,
|
|
285
|
+
contentLength,
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
getVector(index) {
|
|
290
|
+
if (index < 0 || index >= this.count) return null;
|
|
291
|
+
if (this.vectorCacheEntries > 0) {
|
|
292
|
+
const cached = this.vectorCache.get(index);
|
|
293
|
+
if (cached) {
|
|
294
|
+
this.vectorCache.delete(index);
|
|
295
|
+
this.vectorCache.set(index, cached);
|
|
296
|
+
return cached;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const offset = this.vectorDataOffset + index * this.dim * 4;
|
|
301
|
+
const byteLength = this.dim * 4;
|
|
302
|
+
let vector = null;
|
|
303
|
+
|
|
304
|
+
if (this.vectorsBuffer && this.vectorsBuffer.length >= this.vectorDataOffset + byteLength) {
|
|
305
|
+
vector = new Float32Array(
|
|
306
|
+
this.vectorsBuffer.buffer,
|
|
307
|
+
this.vectorsBuffer.byteOffset + offset,
|
|
308
|
+
this.dim,
|
|
309
|
+
);
|
|
310
|
+
} else if (Number.isInteger(this.vectorsFd)) {
|
|
311
|
+
const buffer = Buffer.allocUnsafe(byteLength);
|
|
312
|
+
const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
|
|
313
|
+
if (bytesRead === byteLength) {
|
|
314
|
+
vector = new Float32Array(buffer.buffer, buffer.byteOffset, this.dim);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (vector && this.vectorCacheEntries > 0) {
|
|
319
|
+
this.vectorCache.set(index, vector);
|
|
320
|
+
if (this.vectorCache.size > this.vectorCacheEntries) {
|
|
321
|
+
const firstKey = this.vectorCache.keys().next().value;
|
|
322
|
+
this.vectorCache.delete(firstKey);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return vector;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
async getContent(index) {
|
|
330
|
+
if (index < 0 || index >= this.count) return null;
|
|
331
|
+
if (this.contentCacheEntries > 0) {
|
|
332
|
+
const cached = this.contentCache.get(index);
|
|
333
|
+
if (cached !== undefined) {
|
|
334
|
+
this.contentCache.delete(index);
|
|
335
|
+
this.contentCache.set(index, cached);
|
|
336
|
+
return cached;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
const record = this.getRecord(index);
|
|
341
|
+
if (!record || record.contentLength === 0) return '';
|
|
342
|
+
const contentLimit = record.contentOffset + record.contentLength;
|
|
343
|
+
if (Number.isFinite(this.contentSize) && contentLimit > this.contentSize) {
|
|
344
|
+
return '';
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
let content = '';
|
|
348
|
+
if (this.contentBuffer) {
|
|
349
|
+
const start = this.contentDataOffset + record.contentOffset;
|
|
350
|
+
const end = start + record.contentLength;
|
|
351
|
+
content = this.contentBuffer.slice(start, end).toString('utf-8');
|
|
352
|
+
} else if (this.contentHandle) {
|
|
353
|
+
const start = this.contentDataOffset + record.contentOffset;
|
|
354
|
+
const length = record.contentLength;
|
|
355
|
+
const buffer = Buffer.alloc(length);
|
|
356
|
+
const { bytesRead } = await this.contentHandle.read(buffer, 0, length, start);
|
|
357
|
+
content = buffer.slice(0, bytesRead).toString('utf-8');
|
|
358
|
+
} else {
|
|
359
|
+
return '';
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (this.contentCacheEntries > 0) {
|
|
363
|
+
this.contentCache.set(index, content);
|
|
364
|
+
if (this.contentCache.size > this.contentCacheEntries) {
|
|
365
|
+
const firstKey = this.contentCache.keys().next().value;
|
|
366
|
+
this.contentCache.delete(firstKey);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return content;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
async toChunkViews({ includeContent = false, includeVector = true } = {}) {
|
|
374
|
+
const chunks = new Array(this.count);
|
|
375
|
+
for (let i = 0; i < this.count; i += 1) {
|
|
376
|
+
const record = this.getRecord(i);
|
|
377
|
+
if (!record) continue;
|
|
378
|
+
const chunk = {
|
|
379
|
+
file: record.file,
|
|
380
|
+
startLine: record.startLine,
|
|
381
|
+
endLine: record.endLine,
|
|
382
|
+
_index: i,
|
|
383
|
+
_binaryIndex: i,
|
|
384
|
+
};
|
|
385
|
+
if (includeVector) {
|
|
386
|
+
chunk.vector = this.getVector(i);
|
|
387
|
+
}
|
|
388
|
+
if (includeContent) {
|
|
389
|
+
chunk.content = await this.getContent(i);
|
|
390
|
+
}
|
|
391
|
+
chunks[i] = chunk;
|
|
392
|
+
}
|
|
393
|
+
return chunks;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
getAllFileIndices() {
|
|
397
|
+
const map = new Map();
|
|
398
|
+
for (let i = 0; i < this.count; i++) {
|
|
399
|
+
const record = this.getRecord(i);
|
|
400
|
+
if (record) {
|
|
401
|
+
let list = map.get(record.file);
|
|
402
|
+
if (!list) {
|
|
403
|
+
list = [];
|
|
404
|
+
map.set(record.file, list);
|
|
405
|
+
}
|
|
406
|
+
list.push(i);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return map;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
static async write(cacheDir, chunks, {
|
|
413
|
+
contentCacheEntries,
|
|
414
|
+
getContent,
|
|
415
|
+
preRename,
|
|
416
|
+
} = {}) {
|
|
417
|
+
ensureLittleEndian();
|
|
418
|
+
const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(cacheDir);
|
|
419
|
+
|
|
420
|
+
const tmpSuffix = `.tmp-${process.pid}`;
|
|
421
|
+
const vectorsTmp = `${vectorsPath}${tmpSuffix}`;
|
|
422
|
+
const recordsTmp = `${recordsPath}${tmpSuffix}`;
|
|
423
|
+
const contentTmp = `${contentPath}${tmpSuffix}`;
|
|
424
|
+
const filesTmp = `${filesPath}${tmpSuffix}`;
|
|
425
|
+
|
|
426
|
+
const fileIds = new Map();
|
|
427
|
+
const files = [];
|
|
428
|
+
let dim = null;
|
|
429
|
+
|
|
430
|
+
const denseChunks = [];
|
|
431
|
+
const denseSourceIndices = [];
|
|
432
|
+
for (let i = 0; i < chunks.length; i += 1) {
|
|
433
|
+
const chunk = chunks[i];
|
|
434
|
+
if (!chunk) continue;
|
|
435
|
+
denseChunks.push(chunk);
|
|
436
|
+
denseSourceIndices.push(i);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
const recordEntries = new Array(denseChunks.length);
|
|
440
|
+
let contentOffset = 0;
|
|
441
|
+
|
|
442
|
+
for (let i = 0; i < denseChunks.length; i += 1) {
|
|
443
|
+
const chunk = denseChunks[i];
|
|
444
|
+
const sourceIndex = denseSourceIndices[i];
|
|
445
|
+
|
|
446
|
+
const file = chunk.file;
|
|
447
|
+
if (!fileIds.has(file)) {
|
|
448
|
+
fileIds.set(file, files.length);
|
|
449
|
+
files.push(file);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
if (chunk.vector === undefined || chunk.vector === null) {
|
|
453
|
+
throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
|
|
454
|
+
}
|
|
455
|
+
const vector = chunk.vector instanceof Float32Array ? chunk.vector : new Float32Array(chunk.vector);
|
|
456
|
+
if (!vector) {
|
|
457
|
+
throw new Error('Missing vector data for binary cache write');
|
|
458
|
+
}
|
|
459
|
+
if (vector.length === 0) {
|
|
460
|
+
throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
|
|
461
|
+
}
|
|
462
|
+
if (dim === null) {
|
|
463
|
+
dim = vector.length;
|
|
464
|
+
} else if (vector.length !== dim) {
|
|
465
|
+
throw new Error('Vector dimension mismatch in binary cache write');
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
const contentSource =
|
|
469
|
+
chunk.content !== undefined && chunk.content !== null
|
|
470
|
+
? chunk.content
|
|
471
|
+
: getContent
|
|
472
|
+
? await getContent(chunk, sourceIndex)
|
|
473
|
+
: '';
|
|
474
|
+
const contentValue = normalizeContent(contentSource);
|
|
475
|
+
const contentLength = Buffer.byteLength(contentValue, 'utf-8');
|
|
476
|
+
|
|
477
|
+
recordEntries[i] = {
|
|
478
|
+
fileId: fileIds.get(file),
|
|
479
|
+
startLine: chunk.startLine ?? 0,
|
|
480
|
+
endLine: chunk.endLine ?? 0,
|
|
481
|
+
contentOffset,
|
|
482
|
+
contentLength,
|
|
483
|
+
vector,
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
contentOffset += contentLength;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if (!dim) dim = 0;
|
|
490
|
+
const count = denseChunks.length;
|
|
491
|
+
|
|
492
|
+
await fs.writeFile(filesTmp, JSON.stringify(files, null, 2));
|
|
493
|
+
|
|
494
|
+
let vectorsHandle = null;
|
|
495
|
+
let recordsHandle = null;
|
|
496
|
+
let contentHandle = null;
|
|
497
|
+
|
|
498
|
+
try {
|
|
499
|
+
vectorsHandle = await fs.open(vectorsTmp, 'w');
|
|
500
|
+
recordsHandle = await fs.open(recordsTmp, 'w');
|
|
501
|
+
contentHandle = await fs.open(contentTmp, 'w');
|
|
502
|
+
|
|
503
|
+
const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
|
|
504
|
+
writeVectorsHeader(vectorsHeader, dim, count);
|
|
505
|
+
await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
|
|
506
|
+
|
|
507
|
+
const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
|
|
508
|
+
writeRecordsHeader(recordsHeader, count, files.length);
|
|
509
|
+
await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
|
|
510
|
+
|
|
511
|
+
const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
|
|
512
|
+
writeContentHeader(contentHeader, contentOffset);
|
|
513
|
+
await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
|
|
514
|
+
|
|
515
|
+
let vectorPos = VECTOR_HEADER_SIZE;
|
|
516
|
+
let recordPos = RECORD_HEADER_SIZE;
|
|
517
|
+
let contentPos = CONTENT_HEADER_SIZE;
|
|
518
|
+
|
|
519
|
+
for (let i = 0; i < count; i += 1) {
|
|
520
|
+
const entry = recordEntries[i];
|
|
521
|
+
if (!entry) continue;
|
|
522
|
+
|
|
523
|
+
const recordBuffer = Buffer.alloc(RECORD_SIZE);
|
|
524
|
+
const view = getDataView(recordBuffer);
|
|
525
|
+
view.setUint32(0, entry.fileId, true);
|
|
526
|
+
view.setUint32(4, entry.startLine, true);
|
|
527
|
+
view.setUint32(8, entry.endLine, true);
|
|
528
|
+
view.setBigUint64(12, BigInt(entry.contentOffset), true);
|
|
529
|
+
view.setUint32(20, entry.contentLength, true);
|
|
530
|
+
view.setUint32(24, 0, true);
|
|
531
|
+
view.setUint32(28, 0, true);
|
|
532
|
+
|
|
533
|
+
await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
|
|
534
|
+
recordPos += recordBuffer.length;
|
|
535
|
+
|
|
536
|
+
const vectorBuffer = Buffer.from(entry.vector.buffer, entry.vector.byteOffset, entry.vector.byteLength);
|
|
537
|
+
await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
|
|
538
|
+
vectorPos += vectorBuffer.length;
|
|
539
|
+
|
|
540
|
+
if (entry.contentLength > 0) {
|
|
541
|
+
// Re-fetch content to avoid holding all strings in memory
|
|
542
|
+
const chunk = denseChunks[i];
|
|
543
|
+
const sourceIndex = denseSourceIndices[i];
|
|
544
|
+
const contentSource =
|
|
545
|
+
chunk.content !== undefined && chunk.content !== null
|
|
546
|
+
? chunk.content
|
|
547
|
+
: getContent
|
|
548
|
+
? await getContent(chunk, sourceIndex)
|
|
549
|
+
: '';
|
|
550
|
+
const val = normalizeContent(contentSource);
|
|
551
|
+
const contentBuffer = Buffer.from(val, 'utf-8');
|
|
552
|
+
await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
|
|
553
|
+
contentPos += contentBuffer.length;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
} finally {
|
|
557
|
+
const closes = [];
|
|
558
|
+
if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
|
|
559
|
+
if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
|
|
560
|
+
if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
|
|
561
|
+
await Promise.all(closes);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
if (preRename) {
|
|
565
|
+
await preRename();
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
await Promise.all([
|
|
569
|
+
fs.rename(vectorsTmp, vectorsPath),
|
|
570
|
+
fs.rename(recordsTmp, recordsPath),
|
|
571
|
+
fs.rename(contentTmp, contentPath),
|
|
572
|
+
fs.rename(filesTmp, filesPath),
|
|
573
|
+
]);
|
|
574
|
+
|
|
575
|
+
const [vectorsBuffer, recordsBuffer] = await Promise.all([
|
|
576
|
+
fs.readFile(vectorsPath),
|
|
577
|
+
fs.readFile(recordsPath),
|
|
578
|
+
]);
|
|
579
|
+
const contentReadHandle = await fs.open(contentPath, 'r');
|
|
580
|
+
|
|
581
|
+
return new BinaryVectorStore({
|
|
582
|
+
vectorsBuffer,
|
|
583
|
+
recordsBuffer,
|
|
584
|
+
contentHandle: contentReadHandle,
|
|
585
|
+
contentSize: contentOffset,
|
|
586
|
+
files,
|
|
587
|
+
dim,
|
|
588
|
+
count,
|
|
589
|
+
contentCacheEntries,
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@softerist/heuristic-mcp",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "An enhanced MCP server providing intelligent semantic code search with find-similar-code, recency ranking, and improved chunking. Fork of smart-coding-mcp.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
"test": "vitest run",
|
|
14
14
|
"test:watch": "vitest",
|
|
15
15
|
"clean": "node scripts/clear-cache.js",
|
|
16
|
+
"lint": "eslint .",
|
|
17
|
+
"format": "prettier --write .",
|
|
16
18
|
"postinstall": "node scripts/postinstall.js && node scripts/download-model.js"
|
|
17
19
|
},
|
|
18
20
|
"keywords": [
|
|
@@ -49,7 +51,8 @@
|
|
|
49
51
|
"@xenova/transformers": "^2.17.2",
|
|
50
52
|
"chokidar": "^3.5.3",
|
|
51
53
|
"fdir": "^6.5.0",
|
|
52
|
-
"
|
|
54
|
+
"ignore": "^7.0.5",
|
|
55
|
+
"punycode": "^2.3.1"
|
|
53
56
|
},
|
|
54
57
|
"optionalDependencies": {
|
|
55
58
|
"hnswlib-node": "^3.0.0"
|
|
@@ -57,7 +60,15 @@
|
|
|
57
60
|
"engines": {
|
|
58
61
|
"node": ">=18.0.0"
|
|
59
62
|
},
|
|
63
|
+
"overrides": {
|
|
64
|
+
"punycode": "^2.3.1"
|
|
65
|
+
},
|
|
60
66
|
"devDependencies": {
|
|
67
|
+
"@eslint/js": "^9.39.2",
|
|
68
|
+
"@vitest/coverage-v8": "^4.0.18",
|
|
69
|
+
"eslint": "^9.39.2",
|
|
70
|
+
"globals": "^17.1.0",
|
|
71
|
+
"prettier": "^3.8.1",
|
|
61
72
|
"vitest": "^4.0.16"
|
|
62
73
|
}
|
|
63
74
|
}
|
package/scripts/clear-cache.js
CHANGED
|
@@ -1,27 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import fs from
|
|
3
|
-
import
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import { loadConfig } from '../lib/config.js';
|
|
4
4
|
|
|
5
5
|
async function clearCache() {
|
|
6
6
|
try {
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
// Try to load cache directory from config
|
|
11
|
-
try {
|
|
12
|
-
const configData = await fs.readFile(configPath, "utf-8");
|
|
13
|
-
const config = JSON.parse(configData);
|
|
14
|
-
if (config.cacheDirectory) {
|
|
15
|
-
cacheDir = path.resolve(config.cacheDirectory);
|
|
16
|
-
}
|
|
17
|
-
} catch {
|
|
18
|
-
console.log("Using default cache directory");
|
|
19
|
-
}
|
|
7
|
+
const config = await loadConfig(process.cwd());
|
|
8
|
+
const cacheDir = config.cacheDirectory;
|
|
20
9
|
|
|
21
10
|
// Remove cache directory
|
|
22
11
|
await fs.rm(cacheDir, { recursive: true, force: true });
|
|
23
|
-
console.
|
|
24
|
-
console.
|
|
12
|
+
console.info(`Cache cleared successfully: ${cacheDir}`);
|
|
13
|
+
console.info('Next startup will perform a full reindex.');
|
|
25
14
|
} catch (error) {
|
|
26
15
|
console.error(`Error clearing cache: ${error.message}`);
|
|
27
16
|
process.exit(1);
|