@softerist/heuristic-mcp 3.0.12 → 3.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/constants.js CHANGED
@@ -163,6 +163,33 @@ export const HNSWLIB_ERROR_RESET_MS = 5 * 60 * 1000; // 5 minutes
163
163
  */
164
164
  export const DEFAULT_READER_WAIT_TIMEOUT_MS = 5000;
165
165
 
166
+ // ================================
167
+ // Embedding Process GC Constants
168
+ // ================================
169
+
170
+ /**
171
+ * Default RSS threshold (MB) for adaptive GC in embedding child process.
172
+ */
173
+ export const EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB = 2048;
174
+
175
+ /**
176
+ * Minimum interval (ms) between adaptive GC runs in embedding child process.
177
+ */
178
+ export const EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS = 15_000;
179
+
180
+ /**
181
+ * Backstop: run GC after this many requests without collection.
182
+ */
183
+ export const EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION = 8;
184
+
185
+ /**
186
+ * Initial mutable state shape for embedding child process GC tracking.
187
+ */
188
+ export const EMBEDDING_PROCESS_GC_STATE_INITIAL = Object.freeze({
189
+ lastRunAtMs: 0,
190
+ requestsSinceLastRun: 0,
191
+ });
192
+
166
193
  // ================================
167
194
  // Vector Store Format Constants
168
195
  // ================================
@@ -31,12 +31,13 @@ function getOrCreateChild(config) {
31
31
  const args = ['--expose-gc', EMBEDDING_PROCESS_PATH];
32
32
  persistentChild = spawn(process.execPath, args, {
33
33
  stdio: ['pipe', 'pipe', 'pipe'],
34
- env: {
35
- ...process.env,
36
- EMBEDDING_PROCESS_PERSISTENT: 'true',
37
- EMBEDDING_PROCESS_VERBOSE: config.verbose ? 'true' : '',
38
- },
39
- });
34
+ env: {
35
+ ...process.env,
36
+ EMBEDDING_PROCESS_PERSISTENT: 'true',
37
+ EMBEDDING_PROCESS_RUN_MAIN: 'true',
38
+ EMBEDDING_PROCESS_VERBOSE: config.verbose ? 'true' : '',
39
+ },
40
+ });
40
41
 
41
42
  currentConfig = config;
42
43
 
@@ -1,5 +1,11 @@
1
1
  import { pipeline } from '@huggingface/transformers';
2
2
  import { configureNativeOnnxBackend } from './onnx-backend.js';
3
+ import {
4
+ EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
5
+ EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS,
6
+ EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB,
7
+ EMBEDDING_PROCESS_GC_STATE_INITIAL,
8
+ } from './constants.js';
3
9
  import readline from 'readline';
4
10
  import { pathToFileURL } from 'url';
5
11
 
@@ -36,6 +42,79 @@ let configuredModel = null;
36
42
  let requestCounter = 0;
37
43
  let gcSupported = typeof global.gc === 'function';
38
44
  let nativeBackendConfigured = false;
45
+ const gcState = { ...EMBEDDING_PROCESS_GC_STATE_INITIAL };
46
+
47
+ function toPositiveNumber(value, fallback) {
48
+ const parsed = Number(value);
49
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
50
+ }
51
+
52
+ function toNonNegativeInteger(value, fallback) {
53
+ const parsed = Number.parseInt(value, 10);
54
+ return Number.isInteger(parsed) && parsed >= 0 ? parsed : fallback;
55
+ }
56
+
57
+ function toPositiveInteger(value, fallback) {
58
+ const parsed = Number.parseInt(value, 10);
59
+ return Number.isInteger(parsed) && parsed > 0 ? parsed : fallback;
60
+ }
61
+
62
+ function resolveGcPolicy(payload) {
63
+ return {
64
+ rssThresholdMb: toPositiveNumber(
65
+ payload?.gcRssThresholdMb,
66
+ EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB
67
+ ),
68
+ minIntervalMs: toNonNegativeInteger(
69
+ payload?.gcMinIntervalMs,
70
+ EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS
71
+ ),
72
+ maxRequestsWithoutCollection: toPositiveInteger(
73
+ payload?.gcMaxRequestsWithoutCollection,
74
+ EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION
75
+ ),
76
+ };
77
+ }
78
+
79
+ function maybeRunGc(policy, { reason = 'unknown', force = false } = {}) {
80
+ if (!gcSupported) return false;
81
+
82
+ const before = process.memoryUsage();
83
+ const rssBeforeMb = before.rss / 1024 / 1024;
84
+ const rssTrigger = rssBeforeMb >= policy.rssThresholdMb;
85
+ const requestTrigger = gcState.requestsSinceLastRun >= policy.maxRequestsWithoutCollection;
86
+
87
+ if (!force && !rssTrigger && !requestTrigger) {
88
+ return false;
89
+ }
90
+
91
+ const now = Date.now();
92
+ if (
93
+ !force &&
94
+ policy.minIntervalMs > 0 &&
95
+ gcState.lastRunAtMs > 0 &&
96
+ now - gcState.lastRunAtMs < policy.minIntervalMs
97
+ ) {
98
+ return false;
99
+ }
100
+
101
+ global.gc();
102
+ const after = process.memoryUsage();
103
+ gcState.lastRunAtMs = now;
104
+ gcState.requestsSinceLastRun = 0;
105
+
106
+ let trigger = 'forced';
107
+ if (!force) {
108
+ if (rssTrigger && requestTrigger) trigger = 'rss+requests';
109
+ else if (rssTrigger) trigger = 'rss';
110
+ else trigger = 'requests';
111
+ }
112
+
113
+ log(
114
+ `[Child:${process.pid}] GC ${reason}: trigger=${trigger} rss ${(before.rss / 1024 / 1024).toFixed(1)}MB -> ${(after.rss / 1024 / 1024).toFixed(1)}MB`
115
+ );
116
+ return true;
117
+ }
39
118
 
40
119
  function ensureNativeBackend(threads) {
41
120
  if (nativeBackendConfigured && !threads) return;
@@ -64,6 +143,11 @@ async function getEmbedder(embeddingModel, numThreads) {
64
143
  embedderPromise = pipeline('feature-extraction', embeddingModel, {
65
144
  quantized: true,
66
145
  dtype: 'fp32',
146
+ session_options: {
147
+ numThreads,
148
+ intraOpNumThreads: numThreads,
149
+ interOpNumThreads: 1,
150
+ },
67
151
  }).then((model) => {
68
152
  const loadSec = ((Date.now() - loadStart) / 1000).toFixed(1);
69
153
  log(`Model ready in ${loadSec}s, ${formatMemory()}`);
@@ -88,6 +172,8 @@ function resetEmbeddingProcessState() {
88
172
  requestCounter = 0;
89
173
  currentRequestId = -1;
90
174
  nativeBackendConfigured = false;
175
+ gcState.lastRunAtMs = 0;
176
+ gcState.requestsSinceLastRun = 0;
91
177
  }
92
178
 
93
179
  /**
@@ -123,13 +209,7 @@ async function unloadModel() {
123
209
 
124
210
  // Trigger garbage collection if available
125
211
  if (gcSupported) {
126
- const before = process.memoryUsage();
127
- global.gc();
128
- const after = process.memoryUsage();
129
- log(
130
- `[Child] Post-unload GC: rss ${(before.rss / 1024 / 1024).toFixed(1)}MB -> ${(after.rss / 1024 / 1024).toFixed(1)}MB, ` +
131
- `heap ${(before.heapUsed / 1024 / 1024).toFixed(1)}MB -> ${(after.heapUsed / 1024 / 1024).toFixed(1)}MB`
132
- );
212
+ maybeRunGc(resolveGcPolicy(), { reason: 'post-unload', force: true });
133
213
  }
134
214
 
135
215
  log(`[Child] Model unloaded, ${formatMemory()}`);
@@ -142,8 +222,18 @@ async function runEmbedding(payload) {
142
222
  chunks = [],
143
223
  numThreads = 1,
144
224
  batchSize = null,
225
+ enableExplicitGc = true,
226
+ gcRssThresholdMb = EMBEDDING_PROCESS_DEFAULT_GC_RSS_THRESHOLD_MB,
227
+ gcMinIntervalMs = EMBEDDING_PROCESS_DEFAULT_GC_MIN_INTERVAL_MS,
228
+ gcMaxRequestsWithoutCollection = EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
145
229
  requestId = null,
146
230
  } = payload || {};
231
+ const shouldRunGc = enableExplicitGc !== false && gcSupported;
232
+ const gcPolicy = resolveGcPolicy({
233
+ gcRssThresholdMb,
234
+ gcMinIntervalMs,
235
+ gcMaxRequestsWithoutCollection,
236
+ });
147
237
 
148
238
  if (!embeddingModel) {
149
239
  throw new Error('Missing embeddingModel');
@@ -157,6 +247,9 @@ async function runEmbedding(payload) {
157
247
  const results = [];
158
248
  let disposeCount = 0;
159
249
  const start = Date.now();
250
+ if (shouldRunGc) {
251
+ gcState.requestsSinceLastRun += 1;
252
+ }
160
253
 
161
254
  // Batch embedding - tunable for throughput vs memory tradeoffs
162
255
  // FORCE BATCH_SIZE = 1 to restore 1.0 files/s speed (batching adds overhead on CPU)
@@ -244,13 +337,10 @@ async function runEmbedding(payload) {
244
337
  );
245
338
  }
246
339
 
247
- // Trigger GC every 100 chunks to reduce overhead (was 40)
248
- if (batchEnd % 100 === 0 && typeof global.gc === 'function') {
249
- global.gc();
250
- const elapsed = ((Date.now() - start) / 1000).toFixed(1);
251
- log(
252
- `[Child:${process.pid}] Request ${reqId}: GC triggered after ${batchEnd}/${chunks.length} chunks in ${elapsed}s, ${formatMemory()}`
253
- );
340
+ if (shouldRunGc && (batchEnd % 20 === 0 || batchEnd === chunks.length)) {
341
+ maybeRunGc(gcPolicy, {
342
+ reason: `request ${reqId} progress ${batchEnd}/${chunks.length}`,
343
+ });
254
344
  }
255
345
  }
256
346
 
@@ -258,13 +348,8 @@ async function runEmbedding(payload) {
258
348
  log(
259
349
  `[Child:${process.pid}] Request ${reqId}: done ${results.length} chunks in ${totalSec}s, ${disposeCount} tensors disposed, ${formatMemory()}`
260
350
  );
261
- if (gcSupported) {
262
- const before = process.memoryUsage();
263
- global.gc();
264
- const after = process.memoryUsage();
265
- log(
266
- `[Child:${process.pid}] Request ${reqId}: GC rss ${(before.rss / 1024 / 1024).toFixed(1)}MB -> ${(after.rss / 1024 / 1024).toFixed(1)}MB`
267
- );
351
+ if (shouldRunGc) {
352
+ maybeRunGc(gcPolicy, { reason: `request ${reqId} end` });
268
353
  }
269
354
  const usage = process.memoryUsage();
270
355
  return {
@@ -337,12 +422,13 @@ async function main() {
337
422
  process.stdout.write(JSON.stringify(output));
338
423
  }
339
424
 
340
- function shouldRunMain() {
341
- if (process.env.VITEST) return false;
342
- if (!process.argv[1]) return false;
343
- const entryUrl = pathToFileURL(process.argv[1]).href;
344
- return import.meta.url === entryUrl;
345
- }
425
+ function shouldRunMain() {
426
+ if (process.env.EMBEDDING_PROCESS_RUN_MAIN === 'true') return true;
427
+ if (process.env.VITEST) return false;
428
+ if (!process.argv[1]) return false;
429
+ const entryUrl = pathToFileURL(process.argv[1]).href;
430
+ return import.meta.url === entryUrl;
431
+ }
346
432
 
347
433
  if (shouldRunMain()) {
348
434
  main().catch((err) => {