npm - smart-coding-mcp - Versions diffs - 1.4.1 → 2.1.0 - Mend

smart-coding-mcp 1.4.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +131 -31
package/config.json +7 -2
package/features/get-status.js +163 -0
package/features/hybrid-search.js +23 -4
package/features/index-codebase.js +145 -60
package/features/set-workspace.js +155 -0
package/index.js +152 -64
package/lib/ast-chunker.js +273 -0
package/lib/config.js +91 -2
package/lib/embedding-worker.js +29 -2
package/lib/mrl-embedder.js +133 -0
package/lib/resource-throttle.js +85 -0
package/lib/sqlite-cache.js +408 -0
package/lib/tokenizer.js +4 -0
package/package.json +6 -3
package/test/ast-chunker.test.js +105 -0
package/test/device-detection.test.js +110 -0
package/test/embedding-model.test.js +14 -11
package/test/helpers.js +3 -3
package/test/mrl-embedder.test.js +108 -0

package/lib/config.js CHANGED Viewed

@@ -62,10 +62,25 @@ const DEFAULT_CONFIG = {
   watchFiles: false,
   verbose: false,
   workerThreads: "auto", // "auto" = CPU cores - 1, or set a number
-  embeddingModel: "Xenova/all-MiniLM-L6-v2",
+  embeddingModel: "nomic-ai/nomic-embed-text-v1.5",
+  embeddingDimension: 128, // MRL dimension: 64, 128, 256, 512, 768 (changed from 256 to 128 for better performance)
+  device: "auto", // "cpu", "webgpu", or "auto"
+  chunkingMode: "smart", // "smart", "ast", or "line"
   semanticWeight: 0.7,
   exactMatchBoost: 1.5,
-  smartIndexing: true
+  smartIndexing: true,
+  // Resource throttling (prevents CPU exhaustion)
+  maxCpuPercent: 25,        // Max CPU usage during indexing (default: 25%)
+  batchDelay: 250,          // Delay between batches in ms (default: 250)
+  maxWorkers: 'auto',       // Max worker threads ('auto' = 25% of cores, or specific number)
+  // Startup behavior
+  autoIndexDelay: null,     // Delay before auto-indexing (ms), null = disabled (lazy index on first search)
+  // Progressive indexing
+  incrementalSaveInterval: 5, // Save to cache every N batches
+  allowPartialSearch: true    // Allow searches while indexing is in progress
 };
 let config = { ...DEFAULT_CONFIG };
@@ -237,6 +252,80 @@ export async function loadConfig(workspaceDir = null) {
     }
   }
+  // MRL embedding dimension
+  if (process.env.SMART_CODING_EMBEDDING_DIMENSION !== undefined) {
+    const value = parseInt(process.env.SMART_CODING_EMBEDDING_DIMENSION, 10);
+    const validDims = [64, 128, 256, 512, 768];
+    if (validDims.includes(value)) {
+      config.embeddingDimension = value;
+      console.error(`[Config] Using embedding dimension: ${value}`);
+    } else {
+      console.error(`[Config] Invalid SMART_CODING_EMBEDDING_DIMENSION: ${value}, using default (must be 64, 128, 256, 512, or 768)`);
+    }
+  }
+  // Device selection
+  if (process.env.SMART_CODING_DEVICE !== undefined) {
+    const value = process.env.SMART_CODING_DEVICE.trim().toLowerCase();
+    const validDevices = ['cpu', 'webgpu', 'auto'];
+    if (validDevices.includes(value)) {
+      config.device = value;
+      console.error(`[Config] Using device: ${value}`);
+    } else {
+      console.error(`[Config] Invalid SMART_CODING_DEVICE: ${value}, using default (must be 'cpu', 'webgpu', or 'auto')`);
+    }
+  }
+  // Chunking mode
+  if (process.env.SMART_CODING_CHUNKING_MODE !== undefined) {
+    const value = process.env.SMART_CODING_CHUNKING_MODE.trim().toLowerCase();
+    const validModes = ['smart', 'ast', 'line'];
+    if (validModes.includes(value)) {
+      config.chunkingMode = value;
+      console.error(`[Config] Using chunking mode: ${value}`);
+    } else {
+      console.error(`[Config] Invalid SMART_CODING_CHUNKING_MODE: ${value}, using default (must be 'smart', 'ast', or 'line')`);
+    }
+  }
+  // Resource throttling - Max CPU percent
+  if (process.env.SMART_CODING_MAX_CPU_PERCENT !== undefined) {
+    const value = parseInt(process.env.SMART_CODING_MAX_CPU_PERCENT, 10);
+    if (!isNaN(value) && value >= 10 && value <= 100) {
+      config.maxCpuPercent = value;
+      console.error(`[Config] Max CPU usage: ${value}%`);
+    } else {
+      console.error(`[Config] Invalid SMART_CODING_MAX_CPU_PERCENT: ${value}, using default (must be 10-100)`);
+    }
+  }
+  // Resource throttling - Batch delay
+  if (process.env.SMART_CODING_BATCH_DELAY !== undefined) {
+    const value = parseInt(process.env.SMART_CODING_BATCH_DELAY, 10);
+    if (!isNaN(value) && value >= 0 && value <= 5000) {
+      config.batchDelay = value;
+      console.error(`[Config] Batch delay: ${value}ms`);
+    } else {
+      console.error(`[Config] Invalid SMART_CODING_BATCH_DELAY: ${value}, using default (must be 0-5000)`);
+    }
+  }
+  // Resource throttling - Max workers
+  if (process.env.SMART_CODING_MAX_WORKERS !== undefined) {
+    const value = process.env.SMART_CODING_MAX_WORKERS.trim().toLowerCase();
+    if (value === 'auto') {
+      config.maxWorkers = 'auto';
+    } else {
+      const numValue = parseInt(value, 10);
+      if (!isNaN(numValue) && numValue >= 1 && numValue <= 32) {
+        config.maxWorkers = numValue;
+        console.error(`[Config] Max workers: ${numValue}`);
+      } else {
+        console.error(`[Config] Invalid SMART_CODING_MAX_WORKERS: ${value}, using default (must be 'auto' or 1-32)`);
+      }
+    }
+  }
   return config;
 }

package/lib/embedding-worker.js CHANGED Viewed

@@ -1,12 +1,38 @@
 import { parentPort, workerData } from "worker_threads";
-import { pipeline } from "@xenova/transformers";
+import { pipeline, layer_norm } from "@huggingface/transformers";
 let embedder = null;
+const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
 // Initialize the embedding model once when worker starts
 async function initializeEmbedder() {
   if (!embedder) {
-    embedder = await pipeline("feature-extraction", workerData.embeddingModel);
+    const modelName = workerData.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
+    const dimension = workerData.embeddingDimension || 256;
+    const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
+    const isNomic = modelName.includes('nomic');
+    const extractor = await pipeline("feature-extraction", modelName);
+    if (isNomic) {
+      // MRL embedder with dimension slicing
+      embedder = async function(text, options = {}) {
+        let embeddings = await extractor(text, { pooling: 'mean' });
+        embeddings = layer_norm(embeddings, [embeddings.dims[1]])
+          .slice(null, [0, targetDim])
+          .normalize(2, -1);
+        return { data: embeddings.data };
+      };
+      embedder.dimension = targetDim;
+    } else {
+      // Legacy embedder (MiniLM etc.)
+      embedder = async function(text, options = {}) {
+        return await extractor(text, { pooling: 'mean', normalize: true });
+      };
+      embedder.dimension = 384;
+    }
+    embedder.modelName = modelName;
   }
   return embedder;
 }
@@ -65,3 +91,4 @@ initializeEmbedder().then(() => {
 }).catch((error) => {
   parentPort.postMessage({ type: "error", error: error.message });
 });

package/lib/mrl-embedder.js ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * MRL (Matryoshka Representation Learning) Embedder
+ *
+ * Provides flexible embedding dimensions (64, 128, 256, 512, 768) using
+ * nomic-embed-text-v1.5 with layer normalization and dimension slicing.
+ */
+import { pipeline, layer_norm } from '@huggingface/transformers';
+// Valid MRL dimensions for nomic-embed-text-v1.5
+const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
+/**
+ * Create an MRL-enabled embedder with configurable output dimensions
+ *
+ * @param {string} modelName - Model identifier (e.g., 'nomic-ai/nomic-embed-text-v1.5')
+ * @param {object} options - Configuration options
+ * @param {number} options.dimension - Target embedding dimension (64, 128, 256, 512, 768)
+ * @param {string} options.device - Device to use ('cpu', 'webgpu', 'auto')
+ * @returns {Function} Embedder function compatible with existing codebase
+ */
+export async function createMRLEmbedder(modelName, options = {}) {
+  const dimension = options.dimension || 256;
+  const device = options.device || 'cpu';
+  // Validate dimension
+  if (!VALID_DIMENSIONS.includes(dimension)) {
+    console.error(`[MRL] Invalid dimension ${dimension}, using 256. Valid: ${VALID_DIMENSIONS.join(', ')}`);
+  }
+  const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
+  console.error(`[MRL] Loading ${modelName} (output: ${targetDim}d, device: ${device})`);
+  // Detect best device if auto
+  const finalDevice = device === 'auto' ? detectBestDevice() : device;
+  // Create the feature extraction pipeline
+  const pipelineOptions = {};
+  if (finalDevice === 'webgpu') {
+    pipelineOptions.device = 'webgpu';
+  }
+  const extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
+  console.error(`[MRL] Model loaded on ${finalDevice}`);
+  /**
+   * Embed text with MRL dimension slicing
+   * Compatible with existing embedder(text, options) signature
+   */
+  async function embed(text, embedOptions = {}) {
+    // Generate full 768d embedding
+    let embeddings = await extractor(text, { pooling: 'mean' });
+    // Apply MRL: layer_norm -> slice -> normalize
+    embeddings = layer_norm(embeddings, [embeddings.dims[1]])
+      .slice(null, [0, targetDim])
+      .normalize(2, -1);
+    // Return in format compatible with existing code (has .data property)
+    return {
+      data: embeddings.data,
+      dims: [embeddings.dims[0], targetDim]
+    };
+  }
+  // Attach metadata
+  embed.modelName = modelName;
+  embed.dimension = targetDim;
+  embed.device = finalDevice;
+  return embed;
+}
+/**
+ * Detect best available device for inference
+ */
+function detectBestDevice() {
+  // WebGPU check (browser environment)
+  if (typeof navigator !== 'undefined' && navigator.gpu) {
+    return 'webgpu';
+  }
+  // Node.js with experimental WebGPU (Node 20+)
+  // This would require --experimental-webgpu flag
+  // For now, default to CPU in Node.js
+  return 'cpu';
+}
+/**
+ * Create a legacy-compatible embedder (384d, MiniLM)
+ * Used as fallback if MRL model fails to load
+ */
+export async function createLegacyEmbedder(modelName = 'Xenova/all-MiniLM-L6-v2') {
+  console.error(`[Embedder] Loading legacy model: ${modelName}`);
+  const extractor = await pipeline('feature-extraction', modelName);
+  async function embed(text, options = {}) {
+    const output = await extractor(text, { pooling: 'mean', normalize: true });
+    return output;
+  }
+  embed.modelName = modelName;
+  embed.dimension = 384;
+  embed.device = 'cpu';
+  return embed;
+}
+/**
+ * Smart embedder factory - picks MRL or legacy based on config
+ */
+export async function createEmbedder(config) {
+  const model = config.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
+  const dimension = config.embeddingDimension || 256;
+  const device = config.device || 'cpu';
+  // Use MRL for nomic models
+  if (model.includes('nomic')) {
+    try {
+      return await createMRLEmbedder(model, { dimension, device });
+    } catch (err) {
+      console.error(`[Embedder] MRL model failed: ${err.message}, falling back to legacy`);
+      return await createLegacyEmbedder();
+    }
+  }
+  // Use legacy for MiniLM and other models
+  return await createLegacyEmbedder(model);
+}
+export { VALID_DIMENSIONS };

package/lib/resource-throttle.js ADDED Viewed

@@ -0,0 +1,85 @@
+import os from 'os';
+/**
+ * Resource throttling utility to prevent CPU/memory exhaustion during indexing
+ * Ensures the MCP server doesn't freeze the user's laptop
+ */
+export class ResourceThrottle {
+  constructor(config) {
+    // Max CPU usage as percentage (default 50%)
+    this.maxCpuPercent = config.maxCpuPercent || 50;
+    // Delay between batches in milliseconds
+    this.batchDelay = config.batchDelay || 100;
+    // Max worker threads (override auto-detection)
+    const cpuCount = os.cpus().length;
+    if (config.maxWorkers === 'auto' || config.maxWorkers === undefined) {
+      // Use 25% of cores by default for throttling (more conservative)
+      this.maxWorkers = Math.max(1, Math.floor(cpuCount * 0.25));
+    } else {
+      // Validate and parse the value
+      const parsed = typeof config.maxWorkers === 'number'
+        ? config.maxWorkers
+        : parseInt(config.maxWorkers, 10);
+      if (isNaN(parsed) || parsed < 1) {
+        console.error(`[Throttle] Invalid maxWorkers: ${config.maxWorkers}, using auto`);
+        this.maxWorkers = Math.max(1, Math.floor(cpuCount * 0.5));
+      } else {
+        this.maxWorkers = Math.max(1, Math.min(parsed, cpuCount));
+      }
+    }
+    console.error(`[Throttle] CPU limit: ${this.maxCpuPercent}%, Batch delay: ${this.batchDelay}ms, Max workers: ${this.maxWorkers}`);
+  }
+  /**
+   * Execute work with delay to throttle CPU usage
+   */
+  async throttledBatch(work, signal = null) {
+    // Execute the work
+    if (work) {
+      await work();
+    }
+    // Apply delay if not aborted
+    if (!signal?.aborted && this.batchDelay > 0) {
+      await this.sleep(this.batchDelay);
+    }
+  }
+  /**
+   * Sleep utility
+   */
+  sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+  }
+  /**
+   * Calculate optimal worker count based on CPU limit
+   */
+  getWorkerCount(requestedWorkers) {
+    if (requestedWorkers === 'auto') {
+      return this.maxWorkers;
+    }
+    return Math.min(requestedWorkers, this.maxWorkers);
+  }
+  /**
+   * Check if we should pause due to high CPU usage
+   * This is a simple implementation - could be enhanced with actual CPU monitoring
+   */
+  async checkCpuUsage() {
+    // Future enhancement: monitor actual CPU usage and pause if needed
+    // For now, we rely on worker limits and batch delays
+    return true;
+  }
+}
+/**
+ * Sleep utility function
+ */
+export function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}