@softerist/heuristic-mcp 3.0.17 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.jsonc +23 -6
- package/features/ann-config.js +7 -14
- package/features/clear-cache.js +3 -3
- package/features/find-similar-code.js +17 -22
- package/features/hybrid-search.js +59 -67
- package/features/index-codebase.js +305 -268
- package/features/lifecycle.js +370 -176
- package/features/package-version.js +15 -26
- package/features/register.js +75 -57
- package/features/resources.js +21 -47
- package/features/set-workspace.js +31 -43
- package/index.js +818 -172
- package/lib/cache-utils.js +95 -99
- package/lib/cache.js +121 -166
- package/lib/cli.js +246 -238
- package/lib/config.js +232 -62
- package/lib/constants.js +22 -2
- package/lib/embed-query-process.js +13 -29
- package/lib/embedding-process.js +29 -19
- package/lib/embedding-worker.js +166 -149
- package/lib/ignore-patterns.js +39 -39
- package/lib/json-writer.js +7 -34
- package/lib/logging.js +11 -42
- package/lib/onnx-backend.js +4 -4
- package/lib/path-utils.js +4 -21
- package/lib/project-detector.js +3 -3
- package/lib/server-lifecycle.js +109 -15
- package/lib/settings-editor.js +25 -18
- package/lib/slice-normalize.js +6 -16
- package/lib/tokenizer.js +56 -109
- package/lib/utils.js +62 -81
- package/lib/vector-store-binary.js +7 -7
- package/lib/vector-store-sqlite.js +35 -67
- package/lib/workspace-cache-key.js +36 -0
- package/lib/workspace-env.js +55 -14
- package/package.json +86 -86
package/lib/embedding-process.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { pipeline } from '@huggingface/transformers';
|
|
1
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
2
2
|
import { configureNativeOnnxBackend } from './onnx-backend.js';
|
|
3
3
|
import {
|
|
4
4
|
EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
|
|
@@ -7,9 +7,11 @@ import {
|
|
|
7
7
|
EMBEDDING_PROCESS_GC_STATE_INITIAL,
|
|
8
8
|
} from './constants.js';
|
|
9
9
|
import readline from 'readline';
|
|
10
|
+
import path from 'path';
|
|
11
|
+
import os from 'os';
|
|
10
12
|
import { pathToFileURL } from 'url';
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
|
|
13
15
|
let currentRequestId = -1;
|
|
14
16
|
const log = (...args) => {
|
|
15
17
|
if (currentRequestId > 0 && !process.env.EMBEDDING_PROCESS_VERBOSE) {
|
|
@@ -44,6 +46,16 @@ let gcSupported = typeof global.gc === 'function';
|
|
|
44
46
|
let nativeBackendConfigured = false;
|
|
45
47
|
const gcState = { ...EMBEDDING_PROCESS_GC_STATE_INITIAL };
|
|
46
48
|
|
|
49
|
+
function getGlobalCacheDir() {
|
|
50
|
+
if (process.platform === 'win32') {
|
|
51
|
+
return process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
|
|
52
|
+
}
|
|
53
|
+
if (process.platform === 'darwin') {
|
|
54
|
+
return path.join(os.homedir(), 'Library', 'Caches');
|
|
55
|
+
}
|
|
56
|
+
return process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
|
|
57
|
+
}
|
|
58
|
+
|
|
47
59
|
function toPositiveNumber(value, fallback) {
|
|
48
60
|
const parsed = Number(value);
|
|
49
61
|
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
@@ -138,6 +150,7 @@ async function getEmbedder(embeddingModel, numThreads) {
|
|
|
138
150
|
if (!embedderPromise) {
|
|
139
151
|
configuredModel = embeddingModel;
|
|
140
152
|
setThreads(numThreads);
|
|
153
|
+
env.cacheDir = path.join(getGlobalCacheDir(), 'xenova');
|
|
141
154
|
log(`Loading model ${embeddingModel}...`);
|
|
142
155
|
const loadStart = Date.now();
|
|
143
156
|
embedderPromise = pipeline('feature-extraction', embeddingModel, {
|
|
@@ -176,10 +189,7 @@ function resetEmbeddingProcessState() {
|
|
|
176
189
|
gcState.requestsSinceLastRun = 0;
|
|
177
190
|
}
|
|
178
191
|
|
|
179
|
-
|
|
180
|
-
* Unload the embedding model to free memory.
|
|
181
|
-
* This nulls out the cached pipeline and triggers GC if available.
|
|
182
|
-
*/
|
|
192
|
+
|
|
183
193
|
async function unloadModel() {
|
|
184
194
|
if (!embedderPromise) {
|
|
185
195
|
log('[Child] No model loaded, nothing to unload');
|
|
@@ -189,7 +199,7 @@ async function unloadModel() {
|
|
|
189
199
|
try {
|
|
190
200
|
const embedder = await embedderPromise;
|
|
191
201
|
|
|
192
|
-
|
|
202
|
+
|
|
193
203
|
if (embedder && typeof embedder.dispose === 'function') {
|
|
194
204
|
try {
|
|
195
205
|
await embedder.dispose();
|
|
@@ -202,12 +212,12 @@ async function unloadModel() {
|
|
|
202
212
|
log(`[Child] Error during model unload: ${err.message}`);
|
|
203
213
|
}
|
|
204
214
|
|
|
205
|
-
|
|
215
|
+
|
|
206
216
|
embedderPromise = null;
|
|
207
217
|
configuredModel = null;
|
|
208
218
|
configuredThreads = null;
|
|
209
219
|
|
|
210
|
-
|
|
220
|
+
|
|
211
221
|
if (gcSupported) {
|
|
212
222
|
maybeRunGc(resolveGcPolicy(), { reason: 'post-unload', force: true });
|
|
213
223
|
}
|
|
@@ -251,8 +261,8 @@ async function runEmbedding(payload) {
|
|
|
251
261
|
gcState.requestsSinceLastRun += 1;
|
|
252
262
|
}
|
|
253
263
|
|
|
254
|
-
|
|
255
|
-
|
|
264
|
+
|
|
265
|
+
|
|
256
266
|
const BATCH_SIZE =
|
|
257
267
|
Number.isInteger(batchSize) && batchSize > 0 ? Math.min(batchSize, 256) : 1;
|
|
258
268
|
|
|
@@ -262,17 +272,17 @@ async function runEmbedding(payload) {
|
|
|
262
272
|
const batchTexts = batchChunks.map((c) => c.text);
|
|
263
273
|
|
|
264
274
|
try {
|
|
265
|
-
|
|
275
|
+
|
|
266
276
|
const output = await embedder(batchTexts, { pooling: 'mean', normalize: true });
|
|
267
277
|
|
|
268
|
-
|
|
278
|
+
|
|
269
279
|
const hiddenSize = output.dims[output.dims.length - 1];
|
|
270
280
|
|
|
271
281
|
for (let j = 0; j < batchChunks.length; j++) {
|
|
272
282
|
const chunk = batchChunks[j];
|
|
273
283
|
const vecStart = j * hiddenSize;
|
|
274
284
|
const vecEnd = vecStart + hiddenSize;
|
|
275
|
-
|
|
285
|
+
|
|
276
286
|
const vector = new Float32Array(output.data.subarray(vecStart, vecEnd));
|
|
277
287
|
|
|
278
288
|
results.push({
|
|
@@ -285,17 +295,17 @@ async function runEmbedding(payload) {
|
|
|
285
295
|
});
|
|
286
296
|
}
|
|
287
297
|
|
|
288
|
-
|
|
298
|
+
|
|
289
299
|
if (typeof output.dispose === 'function') {
|
|
290
300
|
try {
|
|
291
301
|
output.dispose();
|
|
292
302
|
} catch {
|
|
293
|
-
|
|
303
|
+
|
|
294
304
|
}
|
|
295
305
|
}
|
|
296
306
|
disposeCount++;
|
|
297
307
|
} catch (error) {
|
|
298
|
-
|
|
308
|
+
|
|
299
309
|
log(`Batch failed, falling back to single: ${error.message}`);
|
|
300
310
|
for (const chunk of batchChunks) {
|
|
301
311
|
try {
|
|
@@ -305,7 +315,7 @@ async function runEmbedding(payload) {
|
|
|
305
315
|
try {
|
|
306
316
|
output.dispose();
|
|
307
317
|
} catch {
|
|
308
|
-
|
|
318
|
+
|
|
309
319
|
}
|
|
310
320
|
}
|
|
311
321
|
disposeCount++;
|
|
@@ -329,7 +339,7 @@ async function runEmbedding(payload) {
|
|
|
329
339
|
}
|
|
330
340
|
}
|
|
331
341
|
|
|
332
|
-
|
|
342
|
+
|
|
333
343
|
if (batchEnd % 20 === 0 || batchEnd === chunks.length) {
|
|
334
344
|
const elapsed = ((Date.now() - start) / 1000).toFixed(1);
|
|
335
345
|
log(
|