smart-coding-mcp 1.4.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.js CHANGED
@@ -62,10 +62,25 @@ const DEFAULT_CONFIG = {
62
62
  watchFiles: false,
63
63
  verbose: false,
64
64
  workerThreads: "auto", // "auto" = CPU cores - 1, or set a number
65
- embeddingModel: "Xenova/all-MiniLM-L6-v2",
65
+ embeddingModel: "nomic-ai/nomic-embed-text-v1.5",
66
+ embeddingDimension: 128, // MRL dimension: 64, 128, 256, 512, 768 (changed from 256 to 128 for better performance)
67
+ device: "auto", // "cpu", "webgpu", or "auto"
68
+ chunkingMode: "smart", // "smart", "ast", or "line"
66
69
  semanticWeight: 0.7,
67
70
  exactMatchBoost: 1.5,
68
- smartIndexing: true
71
+ smartIndexing: true,
72
+
73
+ // Resource throttling (prevents CPU exhaustion)
74
+ maxCpuPercent: 25, // Max CPU usage during indexing (default: 25%)
75
+ batchDelay: 250, // Delay between batches in ms (default: 250)
76
+ maxWorkers: 'auto', // Max worker threads ('auto' = 25% of cores, or specific number)
77
+
78
+ // Startup behavior
79
+ autoIndexDelay: null, // Delay before auto-indexing (ms), null = disabled (lazy index on first search)
80
+
81
+ // Progressive indexing
82
+ incrementalSaveInterval: 5, // Save to cache every N batches
83
+ allowPartialSearch: true // Allow searches while indexing is in progress
69
84
  };
70
85
 
71
86
  let config = { ...DEFAULT_CONFIG };
@@ -237,6 +252,80 @@ export async function loadConfig(workspaceDir = null) {
237
252
  }
238
253
  }
239
254
 
255
+ // MRL embedding dimension
256
+ if (process.env.SMART_CODING_EMBEDDING_DIMENSION !== undefined) {
257
+ const value = parseInt(process.env.SMART_CODING_EMBEDDING_DIMENSION, 10);
258
+ const validDims = [64, 128, 256, 512, 768];
259
+ if (validDims.includes(value)) {
260
+ config.embeddingDimension = value;
261
+ console.error(`[Config] Using embedding dimension: ${value}`);
262
+ } else {
263
+ console.error(`[Config] Invalid SMART_CODING_EMBEDDING_DIMENSION: ${value}, using default (must be 64, 128, 256, 512, or 768)`);
264
+ }
265
+ }
266
+
267
+ // Device selection
268
+ if (process.env.SMART_CODING_DEVICE !== undefined) {
269
+ const value = process.env.SMART_CODING_DEVICE.trim().toLowerCase();
270
+ const validDevices = ['cpu', 'webgpu', 'auto'];
271
+ if (validDevices.includes(value)) {
272
+ config.device = value;
273
+ console.error(`[Config] Using device: ${value}`);
274
+ } else {
275
+ console.error(`[Config] Invalid SMART_CODING_DEVICE: ${value}, using default (must be 'cpu', 'webgpu', or 'auto')`);
276
+ }
277
+ }
278
+
279
+ // Chunking mode
280
+ if (process.env.SMART_CODING_CHUNKING_MODE !== undefined) {
281
+ const value = process.env.SMART_CODING_CHUNKING_MODE.trim().toLowerCase();
282
+ const validModes = ['smart', 'ast', 'line'];
283
+ if (validModes.includes(value)) {
284
+ config.chunkingMode = value;
285
+ console.error(`[Config] Using chunking mode: ${value}`);
286
+ } else {
287
+ console.error(`[Config] Invalid SMART_CODING_CHUNKING_MODE: ${value}, using default (must be 'smart', 'ast', or 'line')`);
288
+ }
289
+ }
290
+
291
+ // Resource throttling - Max CPU percent
292
+ if (process.env.SMART_CODING_MAX_CPU_PERCENT !== undefined) {
293
+ const value = parseInt(process.env.SMART_CODING_MAX_CPU_PERCENT, 10);
294
+ if (!isNaN(value) && value >= 10 && value <= 100) {
295
+ config.maxCpuPercent = value;
296
+ console.error(`[Config] Max CPU usage: ${value}%`);
297
+ } else {
298
+ console.error(`[Config] Invalid SMART_CODING_MAX_CPU_PERCENT: ${value}, using default (must be 10-100)`);
299
+ }
300
+ }
301
+
302
+ // Resource throttling - Batch delay
303
+ if (process.env.SMART_CODING_BATCH_DELAY !== undefined) {
304
+ const value = parseInt(process.env.SMART_CODING_BATCH_DELAY, 10);
305
+ if (!isNaN(value) && value >= 0 && value <= 5000) {
306
+ config.batchDelay = value;
307
+ console.error(`[Config] Batch delay: ${value}ms`);
308
+ } else {
309
+ console.error(`[Config] Invalid SMART_CODING_BATCH_DELAY: ${value}, using default (must be 0-5000)`);
310
+ }
311
+ }
312
+
313
+ // Resource throttling - Max workers
314
+ if (process.env.SMART_CODING_MAX_WORKERS !== undefined) {
315
+ const value = process.env.SMART_CODING_MAX_WORKERS.trim().toLowerCase();
316
+ if (value === 'auto') {
317
+ config.maxWorkers = 'auto';
318
+ } else {
319
+ const numValue = parseInt(value, 10);
320
+ if (!isNaN(numValue) && numValue >= 1 && numValue <= 32) {
321
+ config.maxWorkers = numValue;
322
+ console.error(`[Config] Max workers: ${numValue}`);
323
+ } else {
324
+ console.error(`[Config] Invalid SMART_CODING_MAX_WORKERS: ${value}, using default (must be 'auto' or 1-32)`);
325
+ }
326
+ }
327
+ }
328
+
240
329
  return config;
241
330
  }
242
331
 
@@ -1,12 +1,38 @@
1
1
  import { parentPort, workerData } from "worker_threads";
2
- import { pipeline } from "@xenova/transformers";
2
+ import { pipeline, layer_norm } from "@huggingface/transformers";
3
3
 
4
4
  let embedder = null;
5
+ const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
5
6
 
6
7
  // Initialize the embedding model once when worker starts
7
8
  async function initializeEmbedder() {
8
9
  if (!embedder) {
9
- embedder = await pipeline("feature-extraction", workerData.embeddingModel);
10
+ const modelName = workerData.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
11
+ const dimension = workerData.embeddingDimension || 256;
12
+ const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
13
+ const isNomic = modelName.includes('nomic');
14
+
15
+ const extractor = await pipeline("feature-extraction", modelName);
16
+
17
+ if (isNomic) {
18
+ // MRL embedder with dimension slicing
19
+ embedder = async function(text, options = {}) {
20
+ let embeddings = await extractor(text, { pooling: 'mean' });
21
+ embeddings = layer_norm(embeddings, [embeddings.dims[1]])
22
+ .slice(null, [0, targetDim])
23
+ .normalize(2, -1);
24
+ return { data: embeddings.data };
25
+ };
26
+ embedder.dimension = targetDim;
27
+ } else {
28
+ // Legacy embedder (MiniLM etc.)
29
+ embedder = async function(text, options = {}) {
30
+ return await extractor(text, { pooling: 'mean', normalize: true });
31
+ };
32
+ embedder.dimension = 384;
33
+ }
34
+
35
+ embedder.modelName = modelName;
10
36
  }
11
37
  return embedder;
12
38
  }
@@ -65,3 +91,4 @@ initializeEmbedder().then(() => {
65
91
  }).catch((error) => {
66
92
  parentPort.postMessage({ type: "error", error: error.message });
67
93
  });
94
+
@@ -0,0 +1,133 @@
1
+ /**
2
+ * MRL (Matryoshka Representation Learning) Embedder
3
+ *
4
+ * Provides flexible embedding dimensions (64, 128, 256, 512, 768) using
5
+ * nomic-embed-text-v1.5 with layer normalization and dimension slicing.
6
+ */
7
+
8
+ import { pipeline, layer_norm } from '@huggingface/transformers';
9
+
10
+ // Valid MRL dimensions for nomic-embed-text-v1.5
11
+ const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
12
+
13
+ /**
14
+ * Create an MRL-enabled embedder with configurable output dimensions
15
+ *
16
+ * @param {string} modelName - Model identifier (e.g., 'nomic-ai/nomic-embed-text-v1.5')
17
+ * @param {object} options - Configuration options
18
+ * @param {number} options.dimension - Target embedding dimension (64, 128, 256, 512, 768)
19
+ * @param {string} options.device - Device to use ('cpu', 'webgpu', 'auto')
20
+ * @returns {Function} Embedder function compatible with existing codebase
21
+ */
22
+ export async function createMRLEmbedder(modelName, options = {}) {
23
+ const dimension = options.dimension || 256;
24
+ const device = options.device || 'cpu';
25
+
26
+ // Validate dimension
27
+ if (!VALID_DIMENSIONS.includes(dimension)) {
28
+ console.error(`[MRL] Invalid dimension ${dimension}, using 256. Valid: ${VALID_DIMENSIONS.join(', ')}`);
29
+ }
30
+
31
+ const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
32
+
33
+ console.error(`[MRL] Loading ${modelName} (output: ${targetDim}d, device: ${device})`);
34
+
35
+ // Detect best device if auto
36
+ const finalDevice = device === 'auto' ? detectBestDevice() : device;
37
+
38
+ // Create the feature extraction pipeline
39
+ const pipelineOptions = {};
40
+ if (finalDevice === 'webgpu') {
41
+ pipelineOptions.device = 'webgpu';
42
+ }
43
+
44
+ const extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
45
+
46
+ console.error(`[MRL] Model loaded on ${finalDevice}`);
47
+
48
+ /**
49
+ * Embed text with MRL dimension slicing
50
+ * Compatible with existing embedder(text, options) signature
51
+ */
52
+ async function embed(text, embedOptions = {}) {
53
+ // Generate full 768d embedding
54
+ let embeddings = await extractor(text, { pooling: 'mean' });
55
+
56
+ // Apply MRL: layer_norm -> slice -> normalize
57
+ embeddings = layer_norm(embeddings, [embeddings.dims[1]])
58
+ .slice(null, [0, targetDim])
59
+ .normalize(2, -1);
60
+
61
+ // Return in format compatible with existing code (has .data property)
62
+ return {
63
+ data: embeddings.data,
64
+ dims: [embeddings.dims[0], targetDim]
65
+ };
66
+ }
67
+
68
+ // Attach metadata
69
+ embed.modelName = modelName;
70
+ embed.dimension = targetDim;
71
+ embed.device = finalDevice;
72
+
73
+ return embed;
74
+ }
75
+
76
+ /**
77
+ * Detect best available device for inference
78
+ */
79
+ function detectBestDevice() {
80
+ // WebGPU check (browser environment)
81
+ if (typeof navigator !== 'undefined' && navigator.gpu) {
82
+ return 'webgpu';
83
+ }
84
+
85
+ // Node.js with experimental WebGPU (Node 20+)
86
+ // This would require --experimental-webgpu flag
87
+ // For now, default to CPU in Node.js
88
+ return 'cpu';
89
+ }
90
+
91
+ /**
92
+ * Create a legacy-compatible embedder (384d, MiniLM)
93
+ * Used as fallback if MRL model fails to load
94
+ */
95
+ export async function createLegacyEmbedder(modelName = 'Xenova/all-MiniLM-L6-v2') {
96
+ console.error(`[Embedder] Loading legacy model: ${modelName}`);
97
+ const extractor = await pipeline('feature-extraction', modelName);
98
+
99
+ async function embed(text, options = {}) {
100
+ const output = await extractor(text, { pooling: 'mean', normalize: true });
101
+ return output;
102
+ }
103
+
104
+ embed.modelName = modelName;
105
+ embed.dimension = 384;
106
+ embed.device = 'cpu';
107
+
108
+ return embed;
109
+ }
110
+
111
+ /**
112
+ * Smart embedder factory - picks MRL or legacy based on config
113
+ */
114
+ export async function createEmbedder(config) {
115
+ const model = config.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
116
+ const dimension = config.embeddingDimension || 256;
117
+ const device = config.device || 'cpu';
118
+
119
+ // Use MRL for nomic models
120
+ if (model.includes('nomic')) {
121
+ try {
122
+ return await createMRLEmbedder(model, { dimension, device });
123
+ } catch (err) {
124
+ console.error(`[Embedder] MRL model failed: ${err.message}, falling back to legacy`);
125
+ return await createLegacyEmbedder();
126
+ }
127
+ }
128
+
129
+ // Use legacy for MiniLM and other models
130
+ return await createLegacyEmbedder(model);
131
+ }
132
+
133
+ export { VALID_DIMENSIONS };
@@ -0,0 +1,85 @@
1
+ import os from 'os';
2
+
3
+ /**
4
+ * Resource throttling utility to prevent CPU/memory exhaustion during indexing
5
+ * Ensures the MCP server doesn't freeze the user's laptop
6
+ */
7
+ export class ResourceThrottle {
8
+ constructor(config) {
9
+ // Max CPU usage as percentage (default 50%)
10
+ this.maxCpuPercent = config.maxCpuPercent || 50;
11
+
12
+ // Delay between batches in milliseconds
13
+ this.batchDelay = config.batchDelay || 100;
14
+
15
+ // Max worker threads (override auto-detection)
16
+ const cpuCount = os.cpus().length;
17
+ if (config.maxWorkers === 'auto' || config.maxWorkers === undefined) {
18
+ // Use 25% of cores by default for throttling (more conservative)
19
+ this.maxWorkers = Math.max(1, Math.floor(cpuCount * 0.25));
20
+ } else {
21
+ // Validate and parse the value
22
+ const parsed = typeof config.maxWorkers === 'number'
23
+ ? config.maxWorkers
24
+ : parseInt(config.maxWorkers, 10);
25
+
26
+ if (isNaN(parsed) || parsed < 1) {
27
+ console.error(`[Throttle] Invalid maxWorkers: ${config.maxWorkers}, using auto`);
28
+ this.maxWorkers = Math.max(1, Math.floor(cpuCount * 0.5));
29
+ } else {
30
+ this.maxWorkers = Math.max(1, Math.min(parsed, cpuCount));
31
+ }
32
+ }
33
+
34
+ console.error(`[Throttle] CPU limit: ${this.maxCpuPercent}%, Batch delay: ${this.batchDelay}ms, Max workers: ${this.maxWorkers}`);
35
+ }
36
+
37
+ /**
38
+ * Execute work with delay to throttle CPU usage
39
+ */
40
+ async throttledBatch(work, signal = null) {
41
+ // Execute the work
42
+ if (work) {
43
+ await work();
44
+ }
45
+
46
+ // Apply delay if not aborted
47
+ if (!signal?.aborted && this.batchDelay > 0) {
48
+ await this.sleep(this.batchDelay);
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Sleep utility
54
+ */
55
+ sleep(ms) {
56
+ return new Promise(resolve => setTimeout(resolve, ms));
57
+ }
58
+
59
+ /**
60
+ * Calculate optimal worker count based on CPU limit
61
+ */
62
+ getWorkerCount(requestedWorkers) {
63
+ if (requestedWorkers === 'auto') {
64
+ return this.maxWorkers;
65
+ }
66
+ return Math.min(requestedWorkers, this.maxWorkers);
67
+ }
68
+
69
+ /**
70
+ * Check if we should pause due to high CPU usage
71
+ * This is a simple implementation - could be enhanced with actual CPU monitoring
72
+ */
73
+ async checkCpuUsage() {
74
+ // Future enhancement: monitor actual CPU usage and pause if needed
75
+ // For now, we rely on worker limits and batch delays
76
+ return true;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Sleep utility function
82
+ */
83
+ export function sleep(ms) {
84
+ return new Promise(resolve => setTimeout(resolve, ms));
85
+ }