@softerist/heuristic-mcp 3.0.16 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.jsonc +23 -6
- package/features/ann-config.js +7 -14
- package/features/clear-cache.js +3 -3
- package/features/find-similar-code.js +17 -22
- package/features/hybrid-search.js +59 -67
- package/features/index-codebase.js +305 -268
- package/features/lifecycle.js +370 -176
- package/features/package-version.js +15 -26
- package/features/register.js +75 -57
- package/features/resources.js +21 -47
- package/features/set-workspace.js +31 -43
- package/index.js +819 -173
- package/lib/cache-utils.js +95 -99
- package/lib/cache.js +121 -166
- package/lib/cli.js +114 -21
- package/lib/config.js +232 -62
- package/lib/constants.js +22 -2
- package/lib/embed-query-process.js +13 -29
- package/lib/embedding-process.js +29 -19
- package/lib/embedding-worker.js +166 -149
- package/lib/ignore-patterns.js +39 -39
- package/lib/json-writer.js +7 -34
- package/lib/logging.js +11 -42
- package/lib/onnx-backend.js +4 -4
- package/lib/path-utils.js +4 -21
- package/lib/project-detector.js +3 -3
- package/lib/server-lifecycle.js +109 -15
- package/lib/settings-editor.js +25 -18
- package/lib/slice-normalize.js +6 -16
- package/lib/tokenizer.js +56 -109
- package/lib/utils.js +62 -81
- package/lib/vector-store-binary.js +7 -7
- package/lib/vector-store-sqlite.js +35 -67
- package/lib/workspace-cache-key.js +36 -0
- package/lib/workspace-env.js +55 -14
- package/package.json +86 -86
|
@@ -6,7 +6,7 @@ import readline from 'readline';
|
|
|
6
6
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
7
|
const EMBEDDING_PROCESS_PATH = path.join(__dirname, 'embedding-process.js');
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
let persistentChild = null;
|
|
11
11
|
let childReadline = null;
|
|
12
12
|
let idleTimer = null;
|
|
@@ -14,16 +14,13 @@ let currentConfig = null;
|
|
|
14
14
|
let pendingRequests = [];
|
|
15
15
|
let isProcessingRequest = false;
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
const DEFAULT_IDLE_TIMEOUT_MS = 30000;
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
* Get or create the persistent embedding child process.
|
|
22
|
-
* The child stays alive for consecutive queries, then exits after idle timeout.
|
|
23
|
-
*/
|
|
20
|
+
|
|
24
21
|
function getOrCreateChild(config) {
|
|
25
22
|
if (persistentChild && !persistentChild.killed) {
|
|
26
|
-
|
|
23
|
+
|
|
27
24
|
resetIdleTimer(config);
|
|
28
25
|
return persistentChild;
|
|
29
26
|
}
|
|
@@ -41,7 +38,7 @@ function getOrCreateChild(config) {
|
|
|
41
38
|
|
|
42
39
|
currentConfig = config;
|
|
43
40
|
|
|
44
|
-
|
|
41
|
+
|
|
45
42
|
childReadline = readline.createInterface({
|
|
46
43
|
input: persistentChild.stdout,
|
|
47
44
|
crlfDelay: Infinity,
|
|
@@ -50,7 +47,7 @@ function getOrCreateChild(config) {
|
|
|
50
47
|
childReadline.on('line', (line) => {
|
|
51
48
|
if (!line.trim()) return;
|
|
52
49
|
|
|
53
|
-
|
|
50
|
+
|
|
54
51
|
if (pendingRequests.length > 0) {
|
|
55
52
|
const { resolve, reject, startTime } = pendingRequests.shift();
|
|
56
53
|
try {
|
|
@@ -93,7 +90,7 @@ function getOrCreateChild(config) {
|
|
|
93
90
|
persistentChild.on('error', (err) => {
|
|
94
91
|
console.error(`[EmbedPool] Child process error: ${err.message}`);
|
|
95
92
|
cleanupChild();
|
|
96
|
-
|
|
93
|
+
|
|
97
94
|
for (const { reject } of pendingRequests) {
|
|
98
95
|
reject(new Error(`Child process error: ${err.message}`));
|
|
99
96
|
}
|
|
@@ -105,7 +102,7 @@ function getOrCreateChild(config) {
|
|
|
105
102
|
console.info(`[EmbedPool] Child process exited with code ${code}`);
|
|
106
103
|
}
|
|
107
104
|
cleanupChild();
|
|
108
|
-
|
|
105
|
+
|
|
109
106
|
for (const { reject } of pendingRequests) {
|
|
110
107
|
reject(new Error(`Child process exited unexpectedly with code ${code}`));
|
|
111
108
|
}
|
|
@@ -154,10 +151,10 @@ function cleanupChild() {
|
|
|
154
151
|
function shutdownChild() {
|
|
155
152
|
if (persistentChild && !persistentChild.killed) {
|
|
156
153
|
try {
|
|
157
|
-
|
|
154
|
+
|
|
158
155
|
persistentChild.stdin.write(JSON.stringify({ type: 'shutdown' }) + '\n');
|
|
159
156
|
} catch {
|
|
160
|
-
|
|
157
|
+
|
|
161
158
|
persistentChild.kill();
|
|
162
159
|
}
|
|
163
160
|
}
|
|
@@ -183,15 +180,7 @@ function processNextRequest() {
|
|
|
183
180
|
}
|
|
184
181
|
}
|
|
185
182
|
|
|
186
|
-
|
|
187
|
-
* Embed a single query string using a persistent child process.
|
|
188
|
-
* The child process stays alive for consecutive queries, then exits after idle timeout.
|
|
189
|
-
* This gives fast consecutive searches + memory cleanup after idle period.
|
|
190
|
-
*
|
|
191
|
-
* @param {string} query - The query text to embed
|
|
192
|
-
* @param {object} config - Configuration object with embeddingModel and embeddingProcessNumThreads
|
|
193
|
-
* @returns {Promise<Float32Array>} - The embedding vector
|
|
194
|
-
*/
|
|
183
|
+
|
|
195
184
|
export async function embedQueryInChildProcess(query, config) {
|
|
196
185
|
return new Promise((resolve, reject) => {
|
|
197
186
|
const payload = {
|
|
@@ -212,17 +201,12 @@ export async function embedQueryInChildProcess(query, config) {
|
|
|
212
201
|
});
|
|
213
202
|
}
|
|
214
203
|
|
|
215
|
-
|
|
216
|
-
* Force shutdown the persistent child process to immediately free memory.
|
|
217
|
-
* Called when user explicitly wants to free memory.
|
|
218
|
-
*/
|
|
204
|
+
|
|
219
205
|
export function forceShutdownEmbeddingPool() {
|
|
220
206
|
shutdownChild();
|
|
221
207
|
}
|
|
222
208
|
|
|
223
|
-
|
|
224
|
-
* Check if the persistent child process is currently running.
|
|
225
|
-
*/
|
|
209
|
+
|
|
226
210
|
export function isEmbeddingPoolActive() {
|
|
227
211
|
return persistentChild !== null && !persistentChild.killed;
|
|
228
212
|
}
|
package/lib/embedding-process.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { pipeline } from '@huggingface/transformers';
|
|
1
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
2
2
|
import { configureNativeOnnxBackend } from './onnx-backend.js';
|
|
3
3
|
import {
|
|
4
4
|
EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
|
|
@@ -7,9 +7,11 @@ import {
|
|
|
7
7
|
EMBEDDING_PROCESS_GC_STATE_INITIAL,
|
|
8
8
|
} from './constants.js';
|
|
9
9
|
import readline from 'readline';
|
|
10
|
+
import path from 'path';
|
|
11
|
+
import os from 'os';
|
|
10
12
|
import { pathToFileURL } from 'url';
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
|
|
13
15
|
let currentRequestId = -1;
|
|
14
16
|
const log = (...args) => {
|
|
15
17
|
if (currentRequestId > 0 && !process.env.EMBEDDING_PROCESS_VERBOSE) {
|
|
@@ -44,6 +46,16 @@ let gcSupported = typeof global.gc === 'function';
|
|
|
44
46
|
let nativeBackendConfigured = false;
|
|
45
47
|
const gcState = { ...EMBEDDING_PROCESS_GC_STATE_INITIAL };
|
|
46
48
|
|
|
49
|
+
function getGlobalCacheDir() {
|
|
50
|
+
if (process.platform === 'win32') {
|
|
51
|
+
return process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
|
|
52
|
+
}
|
|
53
|
+
if (process.platform === 'darwin') {
|
|
54
|
+
return path.join(os.homedir(), 'Library', 'Caches');
|
|
55
|
+
}
|
|
56
|
+
return process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
|
|
57
|
+
}
|
|
58
|
+
|
|
47
59
|
function toPositiveNumber(value, fallback) {
|
|
48
60
|
const parsed = Number(value);
|
|
49
61
|
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
@@ -138,6 +150,7 @@ async function getEmbedder(embeddingModel, numThreads) {
|
|
|
138
150
|
if (!embedderPromise) {
|
|
139
151
|
configuredModel = embeddingModel;
|
|
140
152
|
setThreads(numThreads);
|
|
153
|
+
env.cacheDir = path.join(getGlobalCacheDir(), 'xenova');
|
|
141
154
|
log(`Loading model ${embeddingModel}...`);
|
|
142
155
|
const loadStart = Date.now();
|
|
143
156
|
embedderPromise = pipeline('feature-extraction', embeddingModel, {
|
|
@@ -176,10 +189,7 @@ function resetEmbeddingProcessState() {
|
|
|
176
189
|
gcState.requestsSinceLastRun = 0;
|
|
177
190
|
}
|
|
178
191
|
|
|
179
|
-
|
|
180
|
-
* Unload the embedding model to free memory.
|
|
181
|
-
* This nulls out the cached pipeline and triggers GC if available.
|
|
182
|
-
*/
|
|
192
|
+
|
|
183
193
|
async function unloadModel() {
|
|
184
194
|
if (!embedderPromise) {
|
|
185
195
|
log('[Child] No model loaded, nothing to unload');
|
|
@@ -189,7 +199,7 @@ async function unloadModel() {
|
|
|
189
199
|
try {
|
|
190
200
|
const embedder = await embedderPromise;
|
|
191
201
|
|
|
192
|
-
|
|
202
|
+
|
|
193
203
|
if (embedder && typeof embedder.dispose === 'function') {
|
|
194
204
|
try {
|
|
195
205
|
await embedder.dispose();
|
|
@@ -202,12 +212,12 @@ async function unloadModel() {
|
|
|
202
212
|
log(`[Child] Error during model unload: ${err.message}`);
|
|
203
213
|
}
|
|
204
214
|
|
|
205
|
-
|
|
215
|
+
|
|
206
216
|
embedderPromise = null;
|
|
207
217
|
configuredModel = null;
|
|
208
218
|
configuredThreads = null;
|
|
209
219
|
|
|
210
|
-
|
|
220
|
+
|
|
211
221
|
if (gcSupported) {
|
|
212
222
|
maybeRunGc(resolveGcPolicy(), { reason: 'post-unload', force: true });
|
|
213
223
|
}
|
|
@@ -251,8 +261,8 @@ async function runEmbedding(payload) {
|
|
|
251
261
|
gcState.requestsSinceLastRun += 1;
|
|
252
262
|
}
|
|
253
263
|
|
|
254
|
-
|
|
255
|
-
|
|
264
|
+
|
|
265
|
+
|
|
256
266
|
const BATCH_SIZE =
|
|
257
267
|
Number.isInteger(batchSize) && batchSize > 0 ? Math.min(batchSize, 256) : 1;
|
|
258
268
|
|
|
@@ -262,17 +272,17 @@ async function runEmbedding(payload) {
|
|
|
262
272
|
const batchTexts = batchChunks.map((c) => c.text);
|
|
263
273
|
|
|
264
274
|
try {
|
|
265
|
-
|
|
275
|
+
|
|
266
276
|
const output = await embedder(batchTexts, { pooling: 'mean', normalize: true });
|
|
267
277
|
|
|
268
|
-
|
|
278
|
+
|
|
269
279
|
const hiddenSize = output.dims[output.dims.length - 1];
|
|
270
280
|
|
|
271
281
|
for (let j = 0; j < batchChunks.length; j++) {
|
|
272
282
|
const chunk = batchChunks[j];
|
|
273
283
|
const vecStart = j * hiddenSize;
|
|
274
284
|
const vecEnd = vecStart + hiddenSize;
|
|
275
|
-
|
|
285
|
+
|
|
276
286
|
const vector = new Float32Array(output.data.subarray(vecStart, vecEnd));
|
|
277
287
|
|
|
278
288
|
results.push({
|
|
@@ -285,17 +295,17 @@ async function runEmbedding(payload) {
|
|
|
285
295
|
});
|
|
286
296
|
}
|
|
287
297
|
|
|
288
|
-
|
|
298
|
+
|
|
289
299
|
if (typeof output.dispose === 'function') {
|
|
290
300
|
try {
|
|
291
301
|
output.dispose();
|
|
292
302
|
} catch {
|
|
293
|
-
|
|
303
|
+
|
|
294
304
|
}
|
|
295
305
|
}
|
|
296
306
|
disposeCount++;
|
|
297
307
|
} catch (error) {
|
|
298
|
-
|
|
308
|
+
|
|
299
309
|
log(`Batch failed, falling back to single: ${error.message}`);
|
|
300
310
|
for (const chunk of batchChunks) {
|
|
301
311
|
try {
|
|
@@ -305,7 +315,7 @@ async function runEmbedding(payload) {
|
|
|
305
315
|
try {
|
|
306
316
|
output.dispose();
|
|
307
317
|
} catch {
|
|
308
|
-
|
|
318
|
+
|
|
309
319
|
}
|
|
310
320
|
}
|
|
311
321
|
disposeCount++;
|
|
@@ -329,7 +339,7 @@ async function runEmbedding(payload) {
|
|
|
329
339
|
}
|
|
330
340
|
}
|
|
331
341
|
|
|
332
|
-
|
|
342
|
+
|
|
333
343
|
if (batchEnd % 20 === 0 || batchEnd === chunks.length) {
|
|
334
344
|
const elapsed = ((Date.now() - start) / 1000).toFixed(1);
|
|
335
345
|
log(
|