npm - @softerist/heuristic-mcp - Versions diffs - 3.0.17 → 3.1.0 - Mend

@softerist/heuristic-mcp 3.0.17 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/config.jsonc +23 -6
package/features/ann-config.js +7 -14
package/features/clear-cache.js +3 -3
package/features/find-similar-code.js +17 -22
package/features/hybrid-search.js +59 -67
package/features/index-codebase.js +305 -268
package/features/lifecycle.js +370 -176
package/features/package-version.js +15 -26
package/features/register.js +75 -57
package/features/resources.js +21 -47
package/features/set-workspace.js +31 -43
package/index.js +818 -172
package/lib/cache-utils.js +95 -99
package/lib/cache.js +121 -166
package/lib/cli.js +246 -238
package/lib/config.js +232 -62
package/lib/constants.js +22 -2
package/lib/embed-query-process.js +13 -29
package/lib/embedding-process.js +29 -19
package/lib/embedding-worker.js +166 -149
package/lib/ignore-patterns.js +39 -39
package/lib/json-writer.js +7 -34
package/lib/logging.js +11 -42
package/lib/onnx-backend.js +4 -4
package/lib/path-utils.js +4 -21
package/lib/project-detector.js +3 -3
package/lib/server-lifecycle.js +109 -15
package/lib/settings-editor.js +25 -18
package/lib/slice-normalize.js +6 -16
package/lib/tokenizer.js +56 -109
package/lib/utils.js +62 -81
package/lib/vector-store-binary.js +7 -7
package/lib/vector-store-sqlite.js +35 -67
package/lib/workspace-cache-key.js +36 -0
package/lib/workspace-env.js +55 -14
package/package.json +86 -86

package/lib/embedding-process.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { pipeline } from '@huggingface/transformers';
+import { pipeline, env } from '@huggingface/transformers';
 import { configureNativeOnnxBackend } from './onnx-backend.js';
 import {
   EMBEDDING_PROCESS_DEFAULT_GC_MAX_REQUESTS_WITHOUT_COLLECTION,
@@ -7,9 +7,11 @@ import {
   EMBEDDING_PROCESS_GC_STATE_INITIAL,
 } from './constants.js';
 import readline from 'readline';
+import path from 'path';
+import os from 'os';
 import { pathToFileURL } from 'url';
-// Always log to stderr for debugging (goes to parent's stderr)
 let currentRequestId = -1;
 const log = (...args) => {
   if (currentRequestId > 0 && !process.env.EMBEDDING_PROCESS_VERBOSE) {
@@ -44,6 +46,16 @@ let gcSupported = typeof global.gc === 'function';
 let nativeBackendConfigured = false;
 const gcState = { ...EMBEDDING_PROCESS_GC_STATE_INITIAL };
+function getGlobalCacheDir() {
+  if (process.platform === 'win32') {
+    return process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
+  }
+  if (process.platform === 'darwin') {
+    return path.join(os.homedir(), 'Library', 'Caches');
+  }
+  return process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
+}
 function toPositiveNumber(value, fallback) {
   const parsed = Number(value);
   return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
@@ -138,6 +150,7 @@ async function getEmbedder(embeddingModel, numThreads) {
   if (!embedderPromise) {
     configuredModel = embeddingModel;
     setThreads(numThreads);
+    env.cacheDir = path.join(getGlobalCacheDir(), 'xenova');
     log(`Loading model ${embeddingModel}...`);
     const loadStart = Date.now();
     embedderPromise = pipeline('feature-extraction', embeddingModel, {
@@ -176,10 +189,7 @@ function resetEmbeddingProcessState() {
   gcState.requestsSinceLastRun = 0;
 }
-/**
- * Unload the embedding model to free memory.
- * This nulls out the cached pipeline and triggers GC if available.
- */
 async function unloadModel() {
   if (!embedderPromise) {
     log('[Child] No model loaded, nothing to unload');
@@ -189,7 +199,7 @@ async function unloadModel() {
   try {
     const embedder = await embedderPromise;
-    // Try to dispose the pipeline if it has a dispose method
     if (embedder && typeof embedder.dispose === 'function') {
       try {
         await embedder.dispose();
@@ -202,12 +212,12 @@ async function unloadModel() {
     log(`[Child] Error during model unload: ${err.message}`);
   }
-  // Clear references
   embedderPromise = null;
   configuredModel = null;
   configuredThreads = null;
-  // Trigger garbage collection if available
   if (gcSupported) {
     maybeRunGc(resolveGcPolicy(), { reason: 'post-unload', force: true });
   }
@@ -251,8 +261,8 @@ async function runEmbedding(payload) {
     gcState.requestsSinceLastRun += 1;
   }
-  // Batch embedding - tunable for throughput vs memory tradeoffs
-  // FORCE BATCH_SIZE = 1 to restore 1.0 files/s speed (batching adds overhead on CPU)
   const BATCH_SIZE =
     Number.isInteger(batchSize) && batchSize > 0 ? Math.min(batchSize, 256) : 1;
@@ -262,17 +272,17 @@ async function runEmbedding(payload) {
     const batchTexts = batchChunks.map((c) => c.text);
     try {
-      // Process batch of texts in single inference call
       const output = await embedder(batchTexts, { pooling: 'mean', normalize: true });
-      // Output shape: [batch_size, hidden_size]
       const hiddenSize = output.dims[output.dims.length - 1];
       for (let j = 0; j < batchChunks.length; j++) {
         const chunk = batchChunks[j];
         const vecStart = j * hiddenSize;
         const vecEnd = vecStart + hiddenSize;
-        // Deep copy the slice before disposing
         const vector = new Float32Array(output.data.subarray(vecStart, vecEnd));
         results.push({
@@ -285,17 +295,17 @@ async function runEmbedding(payload) {
         });
       }
-      // Dispose tensor after extracting all vectors
       if (typeof output.dispose === 'function') {
         try {
           output.dispose();
         } catch {
-          /* ignore */
         }
       }
       disposeCount++;
     } catch (error) {
-      // Fallback: if batch fails, try one at a time
       log(`Batch failed, falling back to single: ${error.message}`);
       for (const chunk of batchChunks) {
         try {
@@ -305,7 +315,7 @@ async function runEmbedding(payload) {
             try {
               output.dispose();
             } catch {
-              /* ignore */
             }
           }
           disposeCount++;
@@ -329,7 +339,7 @@ async function runEmbedding(payload) {
       }
     }
-    // Progress logging every 20 chunks
     if (batchEnd % 20 === 0 || batchEnd === chunks.length) {
       const elapsed = ((Date.now() - start) / 1000).toFixed(1);
       log(