@softerist/heuristic-mcp 3.0.17 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { pipeline } from '@huggingface/transformers';
1
+ import { pipeline, env } from '@huggingface/transformers';
2
2
  import { configureNativeOnnxBackend } from './onnx-backend.js';
3
3
  import {
4
4
  EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
@@ -7,9 +7,11 @@ import {
7
7
  EMBEDDING_PROCESS_GC_STATE_INITIAL,
8
8
  } from './constants.js';
9
9
  import readline from 'readline';
10
+ import path from 'path';
11
+ import os from 'os';
10
12
  import { pathToFileURL } from 'url';
11
13
 
12
- // Always log to stderr for debugging (goes to parent's stderr)
14
+
13
15
  let currentRequestId = -1;
14
16
  const log = (...args) => {
15
17
  if (currentRequestId > 0 && !process.env.EMBEDDING_PROCESS_VERBOSE) {
@@ -44,6 +46,16 @@ let gcSupported = typeof global.gc === 'function';
44
46
  let nativeBackendConfigured = false;
45
47
  const gcState = { ...EMBEDDING_PROCESS_GC_STATE_INITIAL };
46
48
 
49
+ function getGlobalCacheDir() {
50
+ if (process.platform === 'win32') {
51
+ return process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
52
+ }
53
+ if (process.platform === 'darwin') {
54
+ return path.join(os.homedir(), 'Library', 'Caches');
55
+ }
56
+ return process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
57
+ }
58
+
47
59
  function toPositiveNumber(value, fallback) {
48
60
  const parsed = Number(value);
49
61
  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
@@ -138,6 +150,7 @@ async function getEmbedder(embeddingModel, numThreads) {
138
150
  if (!embedderPromise) {
139
151
  configuredModel = embeddingModel;
140
152
  setThreads(numThreads);
153
+ env.cacheDir = path.join(getGlobalCacheDir(), 'xenova');
141
154
  log(`Loading model ${embeddingModel}...`);
142
155
  const loadStart = Date.now();
143
156
  embedderPromise = pipeline('feature-extraction', embeddingModel, {
@@ -176,10 +189,7 @@ function resetEmbeddingProcessState() {
176
189
  gcState.requestsSinceLastRun = 0;
177
190
  }
178
191
 
179
- /**
180
- * Unload the embedding model to free memory.
181
- * This nulls out the cached pipeline and triggers GC if available.
182
- */
192
+
183
193
  async function unloadModel() {
184
194
  if (!embedderPromise) {
185
195
  log('[Child] No model loaded, nothing to unload');
@@ -189,7 +199,7 @@ async function unloadModel() {
189
199
  try {
190
200
  const embedder = await embedderPromise;
191
201
 
192
- // Try to dispose the pipeline if it has a dispose method
202
+
193
203
  if (embedder && typeof embedder.dispose === 'function') {
194
204
  try {
195
205
  await embedder.dispose();
@@ -202,12 +212,12 @@ async function unloadModel() {
202
212
  log(`[Child] Error during model unload: ${err.message}`);
203
213
  }
204
214
 
205
- // Clear references
215
+
206
216
  embedderPromise = null;
207
217
  configuredModel = null;
208
218
  configuredThreads = null;
209
219
 
210
- // Trigger garbage collection if available
220
+
211
221
  if (gcSupported) {
212
222
  maybeRunGc(resolveGcPolicy(), { reason: 'post-unload', force: true });
213
223
  }
@@ -251,8 +261,8 @@ async function runEmbedding(payload) {
251
261
  gcState.requestsSinceLastRun += 1;
252
262
  }
253
263
 
254
- // Batch embedding - tunable for throughput vs memory tradeoffs
255
- // FORCE BATCH_SIZE = 1 to restore 1.0 files/s speed (batching adds overhead on CPU)
264
+
265
+
256
266
  const BATCH_SIZE =
257
267
  Number.isInteger(batchSize) && batchSize > 0 ? Math.min(batchSize, 256) : 1;
258
268
 
@@ -262,17 +272,17 @@ async function runEmbedding(payload) {
262
272
  const batchTexts = batchChunks.map((c) => c.text);
263
273
 
264
274
  try {
265
- // Process batch of texts in single inference call
275
+
266
276
  const output = await embedder(batchTexts, { pooling: 'mean', normalize: true });
267
277
 
268
- // Output shape: [batch_size, hidden_size]
278
+
269
279
  const hiddenSize = output.dims[output.dims.length - 1];
270
280
 
271
281
  for (let j = 0; j < batchChunks.length; j++) {
272
282
  const chunk = batchChunks[j];
273
283
  const vecStart = j * hiddenSize;
274
284
  const vecEnd = vecStart + hiddenSize;
275
- // Deep copy the slice before disposing
285
+
276
286
  const vector = new Float32Array(output.data.subarray(vecStart, vecEnd));
277
287
 
278
288
  results.push({
@@ -285,17 +295,17 @@ async function runEmbedding(payload) {
285
295
  });
286
296
  }
287
297
 
288
- // Dispose tensor after extracting all vectors
298
+
289
299
  if (typeof output.dispose === 'function') {
290
300
  try {
291
301
  output.dispose();
292
302
  } catch {
293
- /* ignore */
303
+
294
304
  }
295
305
  }
296
306
  disposeCount++;
297
307
  } catch (error) {
298
- // Fallback: if batch fails, try one at a time
308
+
299
309
  log(`Batch failed, falling back to single: ${error.message}`);
300
310
  for (const chunk of batchChunks) {
301
311
  try {
@@ -305,7 +315,7 @@ async function runEmbedding(payload) {
305
315
  try {
306
316
  output.dispose();
307
317
  } catch {
308
- /* ignore */
318
+
309
319
  }
310
320
  }
311
321
  disposeCount++;
@@ -329,7 +339,7 @@ async function runEmbedding(payload) {
329
339
  }
330
340
  }
331
341
 
332
- // Progress logging every 20 chunks
342
+
333
343
  if (batchEnd % 20 === 0 || batchEnd === chunks.length) {
334
344
  const elapsed = ((Date.now() - start) / 1000).toFixed(1);
335
345
  log(