@softerist/heuristic-mcp 3.0.12 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -56
- package/config.jsonc +173 -102
- package/index.js +69 -57
- package/lib/cache.js +55 -26
- package/lib/config.js +528 -79
- package/lib/constants.js +27 -0
- package/lib/embed-query-process.js +7 -6
- package/lib/embedding-process.js +113 -27
- package/lib/embedding-worker.js +299 -180
- package/lib/project-detector.js +1 -1
- package/lib/vector-store-binary.js +64 -55
- package/lib/vector-store-sqlite.js +83 -73
- package/package.json +1 -1
package/lib/constants.js
CHANGED
|
@@ -163,6 +163,33 @@ export const HNSWLIB_ERROR_RESET_MS = 5 * 60 * 1000; // 5 minutes
|
|
|
163
163
|
*/
|
|
164
164
|
export const DEFAULT_READER_WAIT_TIMEOUT_MS = 5000;
|
|
165
165
|
|
|
166
|
+
// ================================
|
|
167
|
+
// Embedding Process GC Constants
|
|
168
|
+
// ================================
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Default RSS threshold (MB) for adaptive GC in embedding child process.
|
|
172
|
+
*/
|
|
173
|
+
export const EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB = 2048;
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Minimum interval (ms) between adaptive GC runs in embedding child process.
|
|
177
|
+
*/
|
|
178
|
+
export const EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS = 15_000;
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Backstop: run GC after this many requests without collection.
|
|
182
|
+
*/
|
|
183
|
+
export const EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION = 8;
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Initial mutable state shape for embedding child process GC tracking.
|
|
187
|
+
*/
|
|
188
|
+
export const EMBEDDING_PROCESS_GC_STATE_INITIAL = Object.freeze({
|
|
189
|
+
lastRunAtMs: 0,
|
|
190
|
+
requestsSinceLastRun: 0,
|
|
191
|
+
});
|
|
192
|
+
|
|
166
193
|
// ================================
|
|
167
194
|
// Vector Store Format Constants
|
|
168
195
|
// ================================
|
|
@@ -31,12 +31,13 @@ function getOrCreateChild(config) {
|
|
|
31
31
|
const args = ['--expose-gc', EMBEDDING_PROCESS_PATH];
|
|
32
32
|
persistentChild = spawn(process.execPath, args, {
|
|
33
33
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
34
|
-
env: {
|
|
35
|
-
...process.env,
|
|
36
|
-
EMBEDDING_PROCESS_PERSISTENT: 'true',
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
34
|
+
env: {
|
|
35
|
+
...process.env,
|
|
36
|
+
EMBEDDING_PROCESS_PERSISTENT: 'true',
|
|
37
|
+
EMBEDDING_PROCESS_RUN_MAIN: 'true',
|
|
38
|
+
EMBEDDING_PROCESS_VERBOSE: config.verbose ? 'true' : '',
|
|
39
|
+
},
|
|
40
|
+
});
|
|
40
41
|
|
|
41
42
|
currentConfig = config;
|
|
42
43
|
|
package/lib/embedding-process.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
import { pipeline } from '@huggingface/transformers';
|
|
2
2
|
import { configureNativeOnnxBackend } from './onnx-backend.js';
|
|
3
|
+
import {
|
|
4
|
+
EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
|
|
5
|
+
EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS,
|
|
6
|
+
EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB,
|
|
7
|
+
EMBEDDING_PROCESS_GC_STATE_INITIAL,
|
|
8
|
+
} from './constants.js';
|
|
3
9
|
import readline from 'readline';
|
|
4
10
|
import { pathToFileURL } from 'url';
|
|
5
11
|
|
|
@@ -36,6 +42,79 @@ let configuredModel = null;
|
|
|
36
42
|
let requestCounter = 0;
|
|
37
43
|
let gcSupported = typeof global.gc === 'function';
|
|
38
44
|
let nativeBackendConfigured = false;
|
|
45
|
+
const gcState = { ...EMBEDDING_PROCESS_GC_STATE_INITIAL };
|
|
46
|
+
|
|
47
|
+
function toPositiveNumber(value, fallback) {
|
|
48
|
+
const parsed = Number(value);
|
|
49
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function toNonNegativeInteger(value, fallback) {
|
|
53
|
+
const parsed = Number.parseInt(value, 10);
|
|
54
|
+
return Number.isInteger(parsed) && parsed >= 0 ? parsed : fallback;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function toPositiveInteger(value, fallback) {
|
|
58
|
+
const parsed = Number.parseInt(value, 10);
|
|
59
|
+
return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function resolveGcPolicy(payload) {
|
|
63
|
+
return {
|
|
64
|
+
rssThresholdMb: toPositiveNumber(
|
|
65
|
+
payload?.gcRssThresholdMb,
|
|
66
|
+
EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB
|
|
67
|
+
),
|
|
68
|
+
minIntervalMs: toNonNegativeInteger(
|
|
69
|
+
payload?.gcMinIntervalMs,
|
|
70
|
+
EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS
|
|
71
|
+
),
|
|
72
|
+
maxRequestsWithoutCollection: toPositiveInteger(
|
|
73
|
+
payload?.gcMaxRequestsWithoutCollection,
|
|
74
|
+
EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION
|
|
75
|
+
),
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function maybeRunGc(policy, { reason = 'unknown', force = false } = {}) {
|
|
80
|
+
if (!gcSupported) return false;
|
|
81
|
+
|
|
82
|
+
const before = process.memoryUsage();
|
|
83
|
+
const rssBeforeMb = before.rss / 1024 / 1024;
|
|
84
|
+
const rssTrigger = rssBeforeMb >= policy.rssThresholdMb;
|
|
85
|
+
const requestTrigger = gcState.requestsSinceLastRun >= policy.maxRequestsWithoutCollection;
|
|
86
|
+
|
|
87
|
+
if (!force && !rssTrigger && !requestTrigger) {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const now = Date.now();
|
|
92
|
+
if (
|
|
93
|
+
!force &&
|
|
94
|
+
policy.minIntervalMs > 0 &&
|
|
95
|
+
gcState.lastRunAtMs > 0 &&
|
|
96
|
+
now - gcState.lastRunAtMs < policy.minIntervalMs
|
|
97
|
+
) {
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
global.gc();
|
|
102
|
+
const after = process.memoryUsage();
|
|
103
|
+
gcState.lastRunAtMs = now;
|
|
104
|
+
gcState.requestsSinceLastRun = 0;
|
|
105
|
+
|
|
106
|
+
let trigger = 'forced';
|
|
107
|
+
if (!force) {
|
|
108
|
+
if (rssTrigger && requestTrigger) trigger = 'rss+requests';
|
|
109
|
+
else if (rssTrigger) trigger = 'rss';
|
|
110
|
+
else trigger = 'requests';
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
log(
|
|
114
|
+
`[Child:${process.pid}] GC ${reason}: trigger=${trigger} rss ${(before.rss / 1024 / 1024).toFixed(1)}MB -> ${(after.rss / 1024 / 1024).toFixed(1)}MB`
|
|
115
|
+
);
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
39
118
|
|
|
40
119
|
function ensureNativeBackend(threads) {
|
|
41
120
|
if (nativeBackendConfigured && !threads) return;
|
|
@@ -64,6 +143,11 @@ async function getEmbedder(embeddingModel, numThreads) {
|
|
|
64
143
|
embedderPromise = pipeline('feature-extraction', embeddingModel, {
|
|
65
144
|
quantized: true,
|
|
66
145
|
dtype: 'fp32',
|
|
146
|
+
session_options: {
|
|
147
|
+
numThreads,
|
|
148
|
+
intraOpNumThreads: numThreads,
|
|
149
|
+
interOpNumThreads: 1,
|
|
150
|
+
},
|
|
67
151
|
}).then((model) => {
|
|
68
152
|
const loadSec = ((Date.now() - loadStart) / 1000).toFixed(1);
|
|
69
153
|
log(`Model ready in ${loadSec}s, ${formatMemory()}`);
|
|
@@ -88,6 +172,8 @@ function resetEmbeddingProcessState() {
|
|
|
88
172
|
requestCounter = 0;
|
|
89
173
|
currentRequestId = -1;
|
|
90
174
|
nativeBackendConfigured = false;
|
|
175
|
+
gcState.lastRunAtMs = 0;
|
|
176
|
+
gcState.requestsSinceLastRun = 0;
|
|
91
177
|
}
|
|
92
178
|
|
|
93
179
|
/**
|
|
@@ -123,13 +209,7 @@ async function unloadModel() {
|
|
|
123
209
|
|
|
124
210
|
// Trigger garbage collection if available
|
|
125
211
|
if (gcSupported) {
|
|
126
|
-
|
|
127
|
-
global.gc();
|
|
128
|
-
const after = process.memoryUsage();
|
|
129
|
-
log(
|
|
130
|
-
`[Child] Post-unload GC: rss ${(before.rss / 1024 / 1024).toFixed(1)}MB -> ${(after.rss / 1024 / 1024).toFixed(1)}MB, ` +
|
|
131
|
-
`heap ${(before.heapUsed / 1024 / 1024).toFixed(1)}MB -> ${(after.heapUsed / 1024 / 1024).toFixed(1)}MB`
|
|
132
|
-
);
|
|
212
|
+
maybeRunGc(resolveGcPolicy(), { reason: 'post-unload', force: true });
|
|
133
213
|
}
|
|
134
214
|
|
|
135
215
|
log(`[Child] Model unloaded, ${formatMemory()}`);
|
|
@@ -142,8 +222,18 @@ async function runEmbedding(payload) {
|
|
|
142
222
|
chunks = [],
|
|
143
223
|
numThreads = 1,
|
|
144
224
|
batchSize = null,
|
|
225
|
+
enableExplicitGc = true,
|
|
226
|
+
gcRssThresholdMb = EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB,
|
|
227
|
+
gcMinIntervalMs = EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS,
|
|
228
|
+
gcMaxRequestsWithoutCollection = EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
|
|
145
229
|
requestId = null,
|
|
146
230
|
} = payload || {};
|
|
231
|
+
const shouldRunGc = enableExplicitGc !== false && gcSupported;
|
|
232
|
+
const gcPolicy = resolveGcPolicy({
|
|
233
|
+
gcRssThresholdMb,
|
|
234
|
+
gcMinIntervalMs,
|
|
235
|
+
gcMaxRequestsWithoutCollection,
|
|
236
|
+
});
|
|
147
237
|
|
|
148
238
|
if (!embeddingModel) {
|
|
149
239
|
throw new Error('Missing embeddingModel');
|
|
@@ -157,6 +247,9 @@ async function runEmbedding(payload) {
|
|
|
157
247
|
const results = [];
|
|
158
248
|
let disposeCount = 0;
|
|
159
249
|
const start = Date.now();
|
|
250
|
+
if (shouldRunGc) {
|
|
251
|
+
gcState.requestsSinceLastRun += 1;
|
|
252
|
+
}
|
|
160
253
|
|
|
161
254
|
// Batch embedding - tunable for throughput vs memory tradeoffs
|
|
162
255
|
// FORCE BATCH_SIZE = 1 to restore 1.0 files/s speed (batching adds overhead on CPU)
|
|
@@ -244,13 +337,10 @@ async function runEmbedding(payload) {
|
|
|
244
337
|
);
|
|
245
338
|
}
|
|
246
339
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
log(
|
|
252
|
-
`[Child:${process.pid}] Request ${reqId}: GC triggered after ${batchEnd}/${chunks.length} chunks in ${elapsed}s, ${formatMemory()}`
|
|
253
|
-
);
|
|
340
|
+
if (shouldRunGc && (batchEnd % 20 === 0 || batchEnd === chunks.length)) {
|
|
341
|
+
maybeRunGc(gcPolicy, {
|
|
342
|
+
reason: `request ${reqId} progress ${batchEnd}/${chunks.length}`,
|
|
343
|
+
});
|
|
254
344
|
}
|
|
255
345
|
}
|
|
256
346
|
|
|
@@ -258,13 +348,8 @@ async function runEmbedding(payload) {
|
|
|
258
348
|
log(
|
|
259
349
|
`[Child:${process.pid}] Request ${reqId}: done ${results.length} chunks in ${totalSec}s, ${disposeCount} tensors disposed, ${formatMemory()}`
|
|
260
350
|
);
|
|
261
|
-
if (
|
|
262
|
-
|
|
263
|
-
global.gc();
|
|
264
|
-
const after = process.memoryUsage();
|
|
265
|
-
log(
|
|
266
|
-
`[Child:${process.pid}] Request ${reqId}: GC rss ${(before.rss / 1024 / 1024).toFixed(1)}MB -> ${(after.rss / 1024 / 1024).toFixed(1)}MB`
|
|
267
|
-
);
|
|
351
|
+
if (shouldRunGc) {
|
|
352
|
+
maybeRunGc(gcPolicy, { reason: `request ${reqId} end` });
|
|
268
353
|
}
|
|
269
354
|
const usage = process.memoryUsage();
|
|
270
355
|
return {
|
|
@@ -337,12 +422,13 @@ async function main() {
|
|
|
337
422
|
process.stdout.write(JSON.stringify(output));
|
|
338
423
|
}
|
|
339
424
|
|
|
340
|
-
function shouldRunMain() {
|
|
341
|
-
if (process.env.
|
|
342
|
-
if (
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
425
|
+
function shouldRunMain() {
|
|
426
|
+
if (process.env.EMBEDDING_PROCESS_RUN_MAIN === 'true') return true;
|
|
427
|
+
if (process.env.VITEST) return false;
|
|
428
|
+
if (!process.argv[1]) return false;
|
|
429
|
+
const entryUrl = pathToFileURL(process.argv[1]).href;
|
|
430
|
+
return import.meta.url === entryUrl;
|
|
431
|
+
}
|
|
346
432
|
|
|
347
433
|
if (shouldRunMain()) {
|
|
348
434
|
main().catch((err) => {
|