smart-coding-mcp 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,31 @@
1
1
  import { parentPort, workerData } from "worker_threads";
2
2
  import { pipeline, layer_norm } from "@huggingface/transformers";
3
+ import { existsSync, rmSync } from 'fs';
4
+ import { join, dirname } from 'path';
5
+ import { fileURLToPath } from 'url';
3
6
 
4
7
  let embedder = null;
5
8
  const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
6
9
 
10
+ /**
11
+ * Clear the HuggingFace transformers cache for a specific model
12
+ * Used for auto-recovery from corrupted model files
13
+ */
14
+ function clearModelCache(modelName) {
15
+ try {
16
+ const transformersPath = dirname(fileURLToPath(import.meta.resolve('@huggingface/transformers')));
17
+ const cacheDir = join(transformersPath, '.cache', ...modelName.split('/'));
18
+ if (existsSync(cacheDir)) {
19
+ console.error(`[Worker] Clearing corrupted cache: ${cacheDir}`);
20
+ rmSync(cacheDir, { recursive: true, force: true });
21
+ return true;
22
+ }
23
+ } catch (e) {
24
+ console.error(`[Worker] Failed to clear cache: ${e.message}`);
25
+ }
26
+ return false;
27
+ }
28
+
7
29
  // Initialize the embedding model once when worker starts
8
30
  async function initializeEmbedder() {
9
31
  if (!embedder) {
@@ -11,12 +33,27 @@ async function initializeEmbedder() {
11
33
  const dimension = workerData.embeddingDimension || 256;
12
34
  const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
13
35
  const isNomic = modelName.includes('nomic');
14
-
15
- const extractor = await pipeline("feature-extraction", modelName);
36
+
37
+ // Load model with auto-recovery for corrupted files
38
+ let extractor;
39
+ try {
40
+ extractor = await pipeline("feature-extraction", modelName);
41
+ } catch (err) {
42
+ if (err.message && err.message.includes('Protobuf parsing failed')) {
43
+ console.error(`[Worker] Corrupted model detected, attempting auto-recovery...`);
44
+ if (clearModelCache(modelName)) {
45
+ extractor = await pipeline("feature-extraction", modelName);
46
+ } else {
47
+ throw err;
48
+ }
49
+ } else {
50
+ throw err;
51
+ }
52
+ }
16
53
 
17
54
  if (isNomic) {
18
55
  // MRL embedder with dimension slicing
19
- embedder = async function(text, options = {}) {
56
+ embedder = async function(text) {
20
57
  let embeddings = await extractor(text, { pooling: 'mean' });
21
58
  embeddings = layer_norm(embeddings, [embeddings.dims[1]])
22
59
  .slice(null, [0, targetDim])
@@ -26,7 +63,7 @@ async function initializeEmbedder() {
26
63
  embedder.dimension = targetDim;
27
64
  } else {
28
65
  // Legacy embedder (MiniLM etc.)
29
- embedder = async function(text, options = {}) {
66
+ embedder = async function(text) {
30
67
  return await extractor(text, { pooling: 'mean', normalize: true });
31
68
  };
32
69
  embedder.dimension = 384;
@@ -1,15 +1,47 @@
1
1
  /**
2
2
  * MRL (Matryoshka Representation Learning) Embedder
3
- *
3
+ *
4
4
  * Provides flexible embedding dimensions (64, 128, 256, 512, 768) using
5
5
  * nomic-embed-text-v1.5 with layer normalization and dimension slicing.
6
6
  */
7
7
 
8
8
  import { pipeline, layer_norm } from '@huggingface/transformers';
9
+ import { existsSync, rmSync } from 'fs';
10
+ import { join, dirname } from 'path';
11
+ import { fileURLToPath } from 'url';
9
12
 
10
13
  // Valid MRL dimensions for nomic-embed-text-v1.5
11
14
  const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
12
15
 
16
+ /**
17
+ * Clear the HuggingFace transformers cache for a specific model
18
+ * Used for auto-recovery from corrupted model files
19
+ */
20
+ function clearModelCache(modelName) {
21
+ try {
22
+ // Find the transformers package location
23
+ const transformersPath = dirname(fileURLToPath(import.meta.resolve('@huggingface/transformers')));
24
+ const cacheDir = join(transformersPath, '.cache', modelName.replace('/', '-'));
25
+
26
+ if (existsSync(cacheDir)) {
27
+ console.error(`[MRL] Clearing corrupted cache: ${cacheDir}`);
28
+ rmSync(cacheDir, { recursive: true, force: true });
29
+ return true;
30
+ }
31
+
32
+ // Also try the model name with original slash (nomic-ai/nomic-embed-text-v1.5)
33
+ const cacheDir2 = join(transformersPath, '.cache', ...modelName.split('/'));
34
+ if (existsSync(cacheDir2)) {
35
+ console.error(`[MRL] Clearing corrupted cache: ${cacheDir2}`);
36
+ rmSync(cacheDir2, { recursive: true, force: true });
37
+ return true;
38
+ }
39
+ } catch (e) {
40
+ console.error(`[MRL] Failed to clear cache: ${e.message}`);
41
+ }
42
+ return false;
43
+ }
44
+
13
45
  /**
14
46
  * Create an MRL-enabled embedder with configurable output dimensions
15
47
  *
@@ -35,13 +67,29 @@ export async function createMRLEmbedder(modelName, options = {}) {
35
67
  // Detect best device if auto
36
68
  const finalDevice = device === 'auto' ? detectBestDevice() : device;
37
69
 
38
- // Create the feature extraction pipeline
70
+ // Create the feature extraction pipeline with auto-recovery for corrupted models
39
71
  const pipelineOptions = {};
40
72
  if (finalDevice === 'webgpu') {
41
73
  pipelineOptions.device = 'webgpu';
42
74
  }
43
-
44
- const extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
75
+
76
+ let extractor;
77
+ try {
78
+ extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
79
+ } catch (err) {
80
+ // Detect corrupted ONNX model (Protobuf parsing failed)
81
+ if (err.message && err.message.includes('Protobuf parsing failed')) {
82
+ console.error(`[MRL] Corrupted model detected, attempting auto-recovery...`);
83
+ if (clearModelCache(modelName)) {
84
+ // Retry after clearing cache
85
+ extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
86
+ } else {
87
+ throw err;
88
+ }
89
+ } else {
90
+ throw err;
91
+ }
92
+ }
45
93
 
46
94
  console.error(`[MRL] Model loaded on ${finalDevice}`);
47
95
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-coding-mcp",
3
- "version": "2.3.0",
3
+ "version": "2.3.1",
4
4
  "description": "An extensible MCP server that enhances coding productivity with AI-powered features including semantic code search, intelligent indexing, and more, using local LLMs",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Tests for Model Cache Auto-Recovery
3
+ *
4
+ * Tests the auto-recovery mechanism for corrupted ONNX model files:
5
+ * - Cache directory detection and clearing
6
+ * - Protobuf parsing error detection
7
+ * - Retry after cache clear
8
+ */
9
+
10
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
11
+ import { existsSync, mkdirSync, writeFileSync, rmSync } from 'fs';
12
+ import { join, dirname } from 'path';
13
+ import { fileURLToPath } from 'url';
14
+ import { tmpdir } from 'os';
15
+
16
+ // Create a test cache directory structure
17
+ const testCacheBase = join(tmpdir(), 'smart-coding-mcp-test-cache');
18
+
19
+ describe('Model Cache Recovery', () => {
20
+ beforeEach(() => {
21
+ // Clean up before each test
22
+ if (existsSync(testCacheBase)) {
23
+ rmSync(testCacheBase, { recursive: true, force: true });
24
+ }
25
+ });
26
+
27
+ afterEach(() => {
28
+ // Clean up after each test
29
+ if (existsSync(testCacheBase)) {
30
+ rmSync(testCacheBase, { recursive: true, force: true });
31
+ }
32
+ });
33
+
34
+ describe('Cache Directory Detection', () => {
35
+ it('should detect cache directory with forward slash model names', () => {
36
+ // Create a mock cache structure
37
+ const cacheDir = join(testCacheBase, 'nomic-ai', 'nomic-embed-text-v1.5', 'onnx');
38
+ mkdirSync(cacheDir, { recursive: true });
39
+ writeFileSync(join(cacheDir, 'model.onnx'), 'corrupted data');
40
+
41
+ expect(existsSync(cacheDir)).toBe(true);
42
+
43
+ // Simulate cache clear logic
44
+ const modelName = 'nomic-ai/nomic-embed-text-v1.5';
45
+ const cachePath = join(testCacheBase, ...modelName.split('/'));
46
+
47
+ if (existsSync(cachePath)) {
48
+ rmSync(cachePath, { recursive: true, force: true });
49
+ }
50
+
51
+ expect(existsSync(cachePath)).toBe(false);
52
+ });
53
+
54
+ it('should handle non-existent cache gracefully', () => {
55
+ const nonExistentPath = join(testCacheBase, 'does-not-exist');
56
+
57
+ // Should not throw
58
+ let cleared = false;
59
+ if (existsSync(nonExistentPath)) {
60
+ rmSync(nonExistentPath, { recursive: true, force: true });
61
+ cleared = true;
62
+ }
63
+
64
+ expect(cleared).toBe(false);
65
+ });
66
+
67
+ it('should clear nested model cache directories', () => {
68
+ // Create nested structure mimicking real cache
69
+ const modelDir = join(testCacheBase, 'nomic-ai', 'nomic-embed-text-v1.5');
70
+ const onnxDir = join(modelDir, 'onnx');
71
+ const tokenizerDir = join(modelDir, 'tokenizer');
72
+
73
+ mkdirSync(onnxDir, { recursive: true });
74
+ mkdirSync(tokenizerDir, { recursive: true });
75
+
76
+ writeFileSync(join(onnxDir, 'model.onnx'), 'corrupted');
77
+ writeFileSync(join(tokenizerDir, 'tokenizer.json'), '{}');
78
+
79
+ expect(existsSync(onnxDir)).toBe(true);
80
+ expect(existsSync(tokenizerDir)).toBe(true);
81
+
82
+ // Clear the model directory (not just onnx)
83
+ rmSync(modelDir, { recursive: true, force: true });
84
+
85
+ expect(existsSync(modelDir)).toBe(false);
86
+ expect(existsSync(onnxDir)).toBe(false);
87
+ expect(existsSync(tokenizerDir)).toBe(false);
88
+ });
89
+ });
90
+
91
+ describe('Error Detection', () => {
92
+ it('should identify Protobuf parsing errors as recoverable', () => {
93
+ const recoverableErrors = [
94
+ 'Protobuf parsing failed',
95
+ 'Load model from /path/to/model.onnx failed:Protobuf parsing failed.',
96
+ 'Error: Protobuf parsing failed'
97
+ ];
98
+
99
+ for (const errorMsg of recoverableErrors) {
100
+ const isRecoverable = errorMsg.includes('Protobuf parsing failed');
101
+ expect(isRecoverable).toBe(true);
102
+ }
103
+ });
104
+
105
+ it('should not identify other errors as recoverable', () => {
106
+ const nonRecoverableErrors = [
107
+ 'Network error',
108
+ 'File not found',
109
+ 'Out of memory',
110
+ 'Invalid model format',
111
+ 'ONNX runtime error'
112
+ ];
113
+
114
+ for (const errorMsg of nonRecoverableErrors) {
115
+ const isRecoverable = errorMsg.includes('Protobuf parsing failed');
116
+ expect(isRecoverable).toBe(false);
117
+ }
118
+ });
119
+ });
120
+
121
+ describe('Recovery Flow', () => {
122
+ it('should simulate recovery sequence', async () => {
123
+ // Create corrupted cache
124
+ const cacheDir = join(testCacheBase, 'nomic-ai', 'nomic-embed-text-v1.5');
125
+ mkdirSync(join(cacheDir, 'onnx'), { recursive: true });
126
+ writeFileSync(join(cacheDir, 'onnx', 'model.onnx'), 'corrupted protobuf data');
127
+
128
+ let loadAttempts = 0;
129
+ let cacheCleared = false;
130
+
131
+ // Simulate the recovery flow
132
+ const mockLoadModel = async () => {
133
+ loadAttempts++;
134
+ if (loadAttempts === 1 && !cacheCleared) {
135
+ throw new Error('Load model from /path/model.onnx failed:Protobuf parsing failed.');
136
+ }
137
+ return { success: true };
138
+ };
139
+
140
+ const mockClearCache = () => {
141
+ if (existsSync(cacheDir)) {
142
+ rmSync(cacheDir, { recursive: true, force: true });
143
+ cacheCleared = true;
144
+ return true;
145
+ }
146
+ return false;
147
+ };
148
+
149
+ // First attempt should fail
150
+ let result;
151
+ try {
152
+ result = await mockLoadModel();
153
+ } catch (err) {
154
+ if (err.message.includes('Protobuf parsing failed')) {
155
+ // Clear cache and retry
156
+ mockClearCache();
157
+ result = await mockLoadModel();
158
+ }
159
+ }
160
+
161
+ expect(loadAttempts).toBe(2);
162
+ expect(cacheCleared).toBe(true);
163
+ expect(result.success).toBe(true);
164
+ expect(existsSync(cacheDir)).toBe(false);
165
+ });
166
+
167
+ it('should propagate non-recoverable errors', async () => {
168
+ const mockLoadModel = async () => {
169
+ throw new Error('Network connection failed');
170
+ };
171
+
172
+ await expect(async () => {
173
+ try {
174
+ await mockLoadModel();
175
+ } catch (err) {
176
+ if (err.message.includes('Protobuf parsing failed')) {
177
+ // Would clear cache and retry, but this error is different
178
+ }
179
+ throw err;
180
+ }
181
+ }).rejects.toThrow('Network connection failed');
182
+ });
183
+
184
+ it('should handle cache clear failure gracefully', async () => {
185
+ let loadAttempts = 0;
186
+
187
+ const mockLoadModel = async () => {
188
+ loadAttempts++;
189
+ throw new Error('Protobuf parsing failed');
190
+ };
191
+
192
+ const mockClearCache = () => {
193
+ // Simulate cache clear failure (e.g., permission denied)
194
+ return false;
195
+ };
196
+
197
+ await expect(async () => {
198
+ try {
199
+ await mockLoadModel();
200
+ } catch (err) {
201
+ if (err.message.includes('Protobuf parsing failed')) {
202
+ if (!mockClearCache()) {
203
+ // Cache clear failed, re-throw original error
204
+ throw err;
205
+ }
206
+ } else {
207
+ throw err;
208
+ }
209
+ }
210
+ }).rejects.toThrow('Protobuf parsing failed');
211
+
212
+ expect(loadAttempts).toBe(1);
213
+ });
214
+ });
215
+
216
+ describe('Path Resolution', () => {
217
+ it('should handle model names with organization prefix', () => {
218
+ const modelName = 'nomic-ai/nomic-embed-text-v1.5';
219
+ const parts = modelName.split('/');
220
+
221
+ expect(parts).toEqual(['nomic-ai', 'nomic-embed-text-v1.5']);
222
+ expect(parts.length).toBe(2);
223
+ });
224
+
225
+ it('should handle model names without organization prefix', () => {
226
+ const modelName = 'all-MiniLM-L6-v2';
227
+ const parts = modelName.split('/');
228
+
229
+ expect(parts).toEqual(['all-MiniLM-L6-v2']);
230
+ expect(parts.length).toBe(1);
231
+ });
232
+
233
+ it('should build correct cache paths', () => {
234
+ const baseDir = '/some/cache/path';
235
+ const modelName = 'nomic-ai/nomic-embed-text-v1.5';
236
+
237
+ const cachePath = join(baseDir, ...modelName.split('/'));
238
+
239
+ expect(cachePath).toBe('/some/cache/path/nomic-ai/nomic-embed-text-v1.5');
240
+ });
241
+ });
242
+ });