smart-coding-mcp 2.3.1 → 2.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,21 +20,24 @@ const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
20
20
  function clearModelCache(modelName) {
21
21
  try {
22
22
  // Find the transformers package location
23
- const transformersPath = dirname(fileURLToPath(import.meta.resolve('@huggingface/transformers')));
24
- const cacheDir = join(transformersPath, '.cache', modelName.replace('/', '-'));
23
+ // import.meta.resolve may return .../dist/index.js, so check parent too
24
+ const resolvedPath = dirname(fileURLToPath(import.meta.resolve('@huggingface/transformers')));
25
+ const possibleRoots = [resolvedPath, dirname(resolvedPath)];
25
26
 
26
- if (existsSync(cacheDir)) {
27
- console.error(`[MRL] Clearing corrupted cache: ${cacheDir}`);
28
- rmSync(cacheDir, { recursive: true, force: true });
29
- return true;
30
- }
27
+ for (const root of possibleRoots) {
28
+ // Try different cache path patterns
29
+ const cachePaths = [
30
+ join(root, '.cache', modelName.replace('/', '-')), // nomic-ai-nomic-embed-text-v1.5
31
+ join(root, '.cache', ...modelName.split('/')) // nomic-ai/nomic-embed-text-v1.5
32
+ ];
31
33
 
32
- // Also try the model name with original slash (nomic-ai/nomic-embed-text-v1.5)
33
- const cacheDir2 = join(transformersPath, '.cache', ...modelName.split('/'));
34
- if (existsSync(cacheDir2)) {
35
- console.error(`[MRL] Clearing corrupted cache: ${cacheDir2}`);
36
- rmSync(cacheDir2, { recursive: true, force: true });
37
- return true;
34
+ for (const cacheDir of cachePaths) {
35
+ if (existsSync(cacheDir)) {
36
+ console.error(`[MRL] Clearing corrupted cache: ${cacheDir}`);
37
+ rmSync(cacheDir, { recursive: true, force: true });
38
+ return true;
39
+ }
40
+ }
38
41
  }
39
42
  } catch (e) {
40
43
  console.error(`[MRL] Failed to clear cache: ${e.message}`);
@@ -74,43 +77,87 @@ export async function createMRLEmbedder(modelName, options = {}) {
74
77
  }
75
78
 
76
79
  let extractor;
80
+
81
+ // Helper to detect corruption errors
82
+ function isCorruptionError(err) {
83
+ if (!err.message) return false;
84
+ return err.message.includes('Protobuf parsing failed') ||
85
+ err.message.includes('Invalid model') ||
86
+ err.message.includes('ONNX') && err.message.includes('corrupt');
87
+ }
88
+
89
+ // Helper to load/reload the extractor
90
+ async function loadExtractor(clearCache = false) {
91
+ if (clearCache) {
92
+ console.error(`[MRL] Corrupted model detected, attempting auto-recovery...`);
93
+ clearModelCache(modelName);
94
+ }
95
+ return await pipeline('feature-extraction', modelName, pipelineOptions);
96
+ }
97
+
77
98
  try {
78
- extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
99
+ extractor = await loadExtractor();
79
100
  } catch (err) {
80
- // Detect corrupted ONNX model (Protobuf parsing failed)
81
- if (err.message && err.message.includes('Protobuf parsing failed')) {
82
- console.error(`[MRL] Corrupted model detected, attempting auto-recovery...`);
83
- if (clearModelCache(modelName)) {
84
- // Retry after clearing cache
85
- extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
86
- } else {
87
- throw err;
88
- }
101
+ if (isCorruptionError(err)) {
102
+ extractor = await loadExtractor(true);
89
103
  } else {
90
104
  throw err;
91
105
  }
92
106
  }
93
-
107
+
94
108
  console.error(`[MRL] Model loaded on ${finalDevice}`);
95
-
109
+
110
+ // Fallback embedder for when MRL model fails at runtime
111
+ let fallbackEmbedder = null;
112
+
96
113
  /**
97
114
  * Embed text with MRL dimension slicing
98
115
  * Compatible with existing embedder(text, options) signature
116
+ * Includes runtime auto-recovery for corrupted models with fallback
99
117
  */
100
118
  async function embed(text, embedOptions = {}) {
101
- // Generate full 768d embedding
102
- let embeddings = await extractor(text, { pooling: 'mean' });
103
-
104
- // Apply MRL: layer_norm -> slice -> normalize
105
- embeddings = layer_norm(embeddings, [embeddings.dims[1]])
106
- .slice(null, [0, targetDim])
107
- .normalize(2, -1);
108
-
109
- // Return in format compatible with existing code (has .data property)
110
- return {
111
- data: embeddings.data,
112
- dims: [embeddings.dims[0], targetDim]
113
- };
119
+ // If we've fallen back to legacy, use it
120
+ if (fallbackEmbedder) {
121
+ return await fallbackEmbedder(text, embedOptions);
122
+ }
123
+
124
+ async function doEmbed() {
125
+ // Generate full 768d embedding
126
+ let embeddings = await extractor(text, { pooling: 'mean' });
127
+
128
+ // Apply MRL: layer_norm -> slice -> normalize
129
+ embeddings = layer_norm(embeddings, [embeddings.dims[1]])
130
+ .slice(null, [0, targetDim])
131
+ .normalize(2, -1);
132
+
133
+ // Return in format compatible with existing code (has .data property)
134
+ return {
135
+ data: embeddings.data,
136
+ dims: [embeddings.dims[0], targetDim]
137
+ };
138
+ }
139
+
140
+ try {
141
+ return await doEmbed();
142
+ } catch (err) {
143
+ // Runtime corruption detection - try reload first
144
+ if (isCorruptionError(err)) {
145
+ console.error(`[MRL] Runtime corruption detected, attempting reload...`);
146
+ try {
147
+ extractor = await loadExtractor(true);
148
+ return await doEmbed();
149
+ } catch (reloadErr) {
150
+ // Reload failed - fall back to legacy model
151
+ console.error(`[MRL] Reload failed, falling back to legacy model...`);
152
+ const { createLegacyEmbedder } = await import('./mrl-embedder.js');
153
+ fallbackEmbedder = await createLegacyEmbedder();
154
+ embed.dimension = fallbackEmbedder.dimension;
155
+ embed.modelName = fallbackEmbedder.modelName;
156
+ return await fallbackEmbedder(text, embedOptions);
157
+ }
158
+ }
159
+ throw err;
160
+ }
114
161
  }
115
162
 
116
163
  // Attach metadata
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-coding-mcp",
3
- "version": "2.3.1",
3
+ "version": "2.3.3",
4
4
  "description": "An extensible MCP server that enhances coding productivity with AI-powered features including semantic code search, intelligent indexing, and more, using local LLMs",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -7,7 +7,7 @@
7
7
  * - Semantic similarity at different dimensions
8
8
  */
9
9
 
10
- import { describe, it, expect, beforeAll } from 'vitest';
10
+ import { describe, it, expect, beforeAll, vi } from 'vitest';
11
11
  import { createMRLEmbedder, createLegacyEmbedder, createEmbedder, VALID_DIMENSIONS } from '../lib/mrl-embedder.js';
12
12
  import { cosineSimilarity } from '../lib/utils.js';
13
13
 
@@ -101,8 +101,114 @@ describe('createEmbedder Factory', () => {
101
101
  embeddingModel: 'Xenova/all-MiniLM-L6-v2',
102
102
  device: 'cpu'
103
103
  };
104
-
104
+
105
105
  const embedder = await createEmbedder(config);
106
106
  expect(embedder.dimension).toBe(384);
107
107
  }, 120000);
108
108
  });
109
+
110
+ describe('Auto-Recovery Logic', () => {
111
+ describe('Corruption Error Detection', () => {
112
+ it('should detect Protobuf parsing errors', async () => {
113
+ // We test this indirectly through the createEmbedder fallback behavior
114
+ // When MRL fails, it should fall back to legacy
115
+ const config = {
116
+ embeddingModel: 'nomic-ai/nomic-embed-text-v1.5',
117
+ embeddingDimension: 256,
118
+ device: 'cpu'
119
+ };
120
+
121
+ // This should succeed (model loads or recovers)
122
+ const embedder = await createEmbedder(config);
123
+ expect(embedder).toBeDefined();
124
+ expect(typeof embedder).toBe('function');
125
+ }, 120000);
126
+ });
127
+
128
+ describe('Runtime Recovery', () => {
129
+ let embedder;
130
+
131
+ beforeAll(async () => {
132
+ embedder = await createMRLEmbedder('nomic-ai/nomic-embed-text-v1.5', { dimension: 256 });
133
+ }, 120000);
134
+
135
+ it('should successfully embed after model is loaded', async () => {
136
+ const result = await embedder('test recovery');
137
+ expect(result.data).toBeDefined();
138
+ expect(result.dims[1]).toBe(256);
139
+ });
140
+
141
+ it('should have correct metadata after successful embedding', () => {
142
+ expect(embedder.dimension).toBe(256);
143
+ expect(embedder.modelName).toBe('nomic-ai/nomic-embed-text-v1.5');
144
+ });
145
+
146
+ it('should handle multiple sequential embeddings', async () => {
147
+ const texts = ['first text', 'second text', 'third text'];
148
+
149
+ for (const text of texts) {
150
+ const result = await embedder(text);
151
+ expect(result.data).toBeDefined();
152
+ expect(Array.from(result.data).length).toBe(256);
153
+ }
154
+ });
155
+ });
156
+
157
+ describe('Fallback Behavior', () => {
158
+ it('createEmbedder should fall back to legacy when MRL fails completely', async () => {
159
+ // Test that the factory handles failures gracefully
160
+ // Using a known-working legacy model
161
+ const config = {
162
+ embeddingModel: 'Xenova/all-MiniLM-L6-v2',
163
+ device: 'cpu'
164
+ };
165
+
166
+ const embedder = await createEmbedder(config);
167
+ expect(embedder.dimension).toBe(384);
168
+ expect(embedder.modelName).toBe('Xenova/all-MiniLM-L6-v2');
169
+
170
+ // Verify it actually works
171
+ const result = await embedder('fallback test');
172
+ expect(result.data).toBeDefined();
173
+ }, 120000);
174
+
175
+ it('legacy embedder should produce valid embeddings', async () => {
176
+ const embedder = await createLegacyEmbedder();
177
+
178
+ expect(embedder.dimension).toBe(384);
179
+ expect(embedder.modelName).toBe('Xenova/all-MiniLM-L6-v2');
180
+
181
+ const result = await embedder('legacy embedding test');
182
+ const vector = Array.from(result.data);
183
+
184
+ expect(vector.length).toBe(384);
185
+
186
+ // Check it's normalized
187
+ const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
188
+ expect(magnitude).toBeCloseTo(1, 2);
189
+ }, 120000);
190
+ });
191
+ });
192
+
193
+ describe('Auto-Recovery with Mocked Pipeline', () => {
194
+ it('should handle corruption and recovery flow', async () => {
195
+ // This test verifies the recovery logic exists and embedder is resilient
196
+ const embedder = await createMRLEmbedder('nomic-ai/nomic-embed-text-v1.5', { dimension: 128 });
197
+
198
+ // Verify embedder works
199
+ const result1 = await embedder('before corruption test');
200
+ expect(result1.dims[1]).toBe(128);
201
+
202
+ // Run multiple embeddings to ensure stability
203
+ const results = await Promise.all([
204
+ embedder('concurrent test 1'),
205
+ embedder('concurrent test 2'),
206
+ embedder('concurrent test 3')
207
+ ]);
208
+
209
+ results.forEach(result => {
210
+ expect(result.dims[1]).toBe(128);
211
+ expect(Array.from(result.data).length).toBe(128);
212
+ });
213
+ }, 120000);
214
+ });
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Real Integration Tests for MRL Auto-Recovery
3
+ *
4
+ * Tests embedder stability and error handling with real models.
5
+ * Destructive corruption tests are in mrl-recovery.test.js (mocked).
6
+ */
7
+
8
+ import { describe, it, expect, beforeAll } from 'vitest';
9
+ import { existsSync, readdirSync } from 'fs';
10
+ import { join, dirname } from 'path';
11
+ import { fileURLToPath } from 'url';
12
+ import { createMRLEmbedder, createLegacyEmbedder, createEmbedder } from '../lib/mrl-embedder.js';
13
+
14
+ // Find the transformers cache directory
15
+ function getTransformersCacheDir() {
16
+ const transformersPath = dirname(fileURLToPath(import.meta.resolve('@huggingface/transformers')));
17
+ const cacheInParent = join(dirname(transformersPath), '.cache');
18
+ if (existsSync(cacheInParent)) return cacheInParent;
19
+ return join(transformersPath, '.cache');
20
+ }
21
+
22
+ describe('MRL Embedder Integration', () => {
23
+ const modelName = 'nomic-ai/nomic-embed-text-v1.5';
24
+ let embedder;
25
+
26
+ beforeAll(async () => {
27
+ console.log('[Test] Loading MRL embedder...');
28
+ embedder = await createMRLEmbedder(modelName, { dimension: 256 });
29
+ console.log('[Test] MRL embedder loaded');
30
+ }, 300000);
31
+
32
+ it('should create embedder with correct metadata', () => {
33
+ expect(embedder.dimension).toBe(256);
34
+ expect(embedder.modelName).toBe(modelName);
35
+ expect(embedder.device).toBe('cpu');
36
+ });
37
+
38
+ it('should produce correct dimension embeddings', async () => {
39
+ const result = await embedder('test embedding');
40
+ expect(result.data).toBeDefined();
41
+ expect(result.dims[1]).toBe(256);
42
+ expect(Array.from(result.data).length).toBe(256);
43
+ });
44
+
45
+ it('should produce normalized vectors', async () => {
46
+ const result = await embedder('normalized test');
47
+ const vector = Array.from(result.data);
48
+ const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
49
+ expect(magnitude).toBeCloseTo(1, 3);
50
+ });
51
+
52
+ it('should handle multiple sequential embeddings', async () => {
53
+ const texts = ['first', 'second', 'third', 'fourth', 'fifth'];
54
+
55
+ for (const text of texts) {
56
+ const result = await embedder(text);
57
+ expect(result.data).toBeDefined();
58
+ expect(Array.from(result.data).length).toBe(256);
59
+ }
60
+ });
61
+
62
+ it('should handle concurrent embeddings', async () => {
63
+ const results = await Promise.all([
64
+ embedder('concurrent 1'),
65
+ embedder('concurrent 2'),
66
+ embedder('concurrent 3')
67
+ ]);
68
+
69
+ results.forEach(result => {
70
+ expect(result.data).toBeDefined();
71
+ expect(result.dims[1]).toBe(256);
72
+ });
73
+ });
74
+
75
+ it('should handle various input types', async () => {
76
+ const inputs = [
77
+ 'normal text',
78
+ 'a'.repeat(500), // long text
79
+ 'special: @#$%^&*()',
80
+ 'unicode: 你好世界',
81
+ ' whitespace '
82
+ ];
83
+
84
+ for (const input of inputs) {
85
+ const result = await embedder(input);
86
+ expect(result.data).toBeDefined();
87
+ }
88
+ });
89
+ });
90
+
91
+ describe('Legacy Embedder Integration', () => {
92
+ let embedder;
93
+
94
+ beforeAll(async () => {
95
+ embedder = await createLegacyEmbedder();
96
+ }, 120000);
97
+
98
+ it('should create legacy embedder with correct metadata', () => {
99
+ expect(embedder.dimension).toBe(384);
100
+ expect(embedder.modelName).toBe('Xenova/all-MiniLM-L6-v2');
101
+ });
102
+
103
+ it('should produce 384d embeddings', async () => {
104
+ const result = await embedder('legacy test');
105
+ expect(Array.from(result.data).length).toBe(384);
106
+ });
107
+ });
108
+
109
+ describe('Factory Function', () => {
110
+ it('should create MRL embedder for nomic model', async () => {
111
+ const embedder = await createEmbedder({
112
+ embeddingModel: 'nomic-ai/nomic-embed-text-v1.5',
113
+ embeddingDimension: 128,
114
+ device: 'cpu'
115
+ });
116
+
117
+ expect(embedder.dimension).toBe(128);
118
+ expect(embedder.modelName).toContain('nomic');
119
+ }, 300000);
120
+
121
+ it('should create legacy embedder for MiniLM', async () => {
122
+ const embedder = await createEmbedder({
123
+ embeddingModel: 'Xenova/all-MiniLM-L6-v2',
124
+ device: 'cpu'
125
+ });
126
+
127
+ expect(embedder.dimension).toBe(384);
128
+ expect(embedder.modelName).toBe('Xenova/all-MiniLM-L6-v2');
129
+ }, 120000);
130
+ });
131
+
132
+ describe('Cache Location Verification', () => {
133
+ it('should find transformers cache directory', () => {
134
+ const cacheDir = getTransformersCacheDir();
135
+ expect(existsSync(cacheDir)).toBe(true);
136
+ console.log(`[Test] Cache directory: ${cacheDir}`);
137
+ });
138
+
139
+ it('should have model files in cache', () => {
140
+ const cacheDir = getTransformersCacheDir();
141
+ const modelDir = join(cacheDir, 'nomic-ai', 'nomic-embed-text-v1.5', 'onnx');
142
+
143
+ if (existsSync(modelDir)) {
144
+ const files = readdirSync(modelDir);
145
+ expect(files.some(f => f.endsWith('.onnx'))).toBe(true);
146
+ console.log(`[Test] Model files: ${files.join(', ')}`);
147
+ } else {
148
+ console.log('[Test] Model directory not found (may need download)');
149
+ }
150
+ });
151
+ });
@@ -0,0 +1,248 @@
1
+ /**
2
+ * Tests for MRL Embedder Auto-Recovery
3
+ *
4
+ * Uses mocked pipeline to test corruption detection and recovery:
5
+ * - Startup corruption → clear cache → reload
6
+ * - Runtime corruption → reload → retry
7
+ * - Runtime corruption → reload fails → fallback to legacy
8
+ */
9
+
10
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
11
+
12
+ // Mock the transformers pipeline
13
+ const mockPipeline = vi.fn();
14
+ const mockLayerNorm = vi.fn();
15
+
16
+ vi.mock('@huggingface/transformers', () => ({
17
+ pipeline: (...args) => mockPipeline(...args),
18
+ layer_norm: (...args) => mockLayerNorm(...args)
19
+ }));
20
+
21
+ // Mock fs for cache clearing
22
+ vi.mock('fs', () => ({
23
+ existsSync: vi.fn(() => true),
24
+ rmSync: vi.fn()
25
+ }));
26
+
27
+ describe('MRL Auto-Recovery (Mocked)', () => {
28
+ let callCount = 0;
29
+
30
+ beforeEach(() => {
31
+ vi.clearAllMocks();
32
+ callCount = 0;
33
+
34
+ // Default mock implementation for layer_norm
35
+ mockLayerNorm.mockImplementation((tensor) => ({
36
+ slice: () => ({
37
+ normalize: () => ({
38
+ data: new Float32Array(256).fill(0.1),
39
+ dims: [1, 256]
40
+ })
41
+ }),
42
+ dims: tensor.dims
43
+ }));
44
+ });
45
+
46
+ afterEach(() => {
47
+ vi.resetModules();
48
+ });
49
+
50
+ describe('Startup Recovery', () => {
51
+ it('should recover from corruption on first load', async () => {
52
+ // First call throws corruption, second succeeds
53
+ mockPipeline
54
+ .mockRejectedValueOnce(new Error('Protobuf parsing failed'))
55
+ .mockResolvedValueOnce(async () => ({
56
+ data: new Float32Array(768).fill(0.1),
57
+ dims: [1, 768]
58
+ }));
59
+
60
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
61
+ const embedder = await createMRLEmbedder('test-model', { dimension: 256 });
62
+
63
+ expect(mockPipeline).toHaveBeenCalledTimes(2);
64
+ expect(embedder).toBeDefined();
65
+ });
66
+
67
+ it('should throw if recovery also fails with non-corruption error', async () => {
68
+ mockPipeline.mockRejectedValue(new Error('Network error'));
69
+
70
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
71
+
72
+ await expect(createMRLEmbedder('test-model', { dimension: 256 }))
73
+ .rejects.toThrow('Network error');
74
+ });
75
+ });
76
+
77
+ describe('Runtime Recovery', () => {
78
+ it('should reload model on runtime corruption and retry', async () => {
79
+ let embedCallCount = 0;
80
+ const mockExtractor = vi.fn().mockImplementation(async () => {
81
+ embedCallCount++;
82
+ if (embedCallCount === 1) {
83
+ throw new Error('Protobuf parsing failed');
84
+ }
85
+ return {
86
+ data: new Float32Array(768).fill(0.1),
87
+ dims: [1, 768]
88
+ };
89
+ });
90
+
91
+ mockPipeline.mockResolvedValue(mockExtractor);
92
+
93
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
94
+ const embedder = await createMRLEmbedder('test-model', { dimension: 256 });
95
+
96
+ // First embed triggers corruption, then reload and retry
97
+ const result = await embedder('test text');
98
+
99
+ expect(result.data).toBeDefined();
100
+ // Pipeline called: 1 (init) + 1 (reload after corruption) = 2
101
+ expect(mockPipeline).toHaveBeenCalledTimes(2);
102
+ });
103
+
104
+ it('should detect various corruption error messages', async () => {
105
+ const corruptionErrors = [
106
+ 'Protobuf parsing failed',
107
+ 'Invalid model format',
108
+ 'ONNX model is corrupt'
109
+ ];
110
+
111
+ for (const errorMsg of corruptionErrors) {
112
+ vi.resetModules();
113
+ vi.clearAllMocks();
114
+
115
+ let throwError = true;
116
+ const mockExtractor = vi.fn().mockImplementation(async () => {
117
+ if (throwError) {
118
+ throwError = false;
119
+ throw new Error(errorMsg);
120
+ }
121
+ return {
122
+ data: new Float32Array(768).fill(0.1),
123
+ dims: [1, 768]
124
+ };
125
+ });
126
+
127
+ mockPipeline.mockResolvedValue(mockExtractor);
128
+ mockLayerNorm.mockImplementation((tensor) => ({
129
+ slice: () => ({
130
+ normalize: () => ({
131
+ data: new Float32Array(256).fill(0.1),
132
+ dims: [1, 256]
133
+ })
134
+ }),
135
+ dims: tensor.dims
136
+ }));
137
+
138
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
139
+ const embedder = await createMRLEmbedder('test-model', { dimension: 256 });
140
+
141
+ const result = await embedder('test');
142
+ expect(result.data).toBeDefined();
143
+ }
144
+ });
145
+ });
146
+
147
+ describe('Fallback to Legacy', () => {
148
+ it('should fall back to legacy when reload also fails', async () => {
149
+ let pipelineCallCount = 0;
150
+ let embedCallCount = 0;
151
+
152
+ // Mock extractor that fails on first embed call
153
+ const corruptExtractor = vi.fn().mockImplementation(async () => {
154
+ embedCallCount++;
155
+ if (embedCallCount === 1) {
156
+ throw new Error('Protobuf parsing failed');
157
+ }
158
+ return {
159
+ data: new Float32Array(768).fill(0.1),
160
+ dims: [1, 768]
161
+ };
162
+ });
163
+
164
+ // Mock legacy extractor that works
165
+ const legacyExtractor = vi.fn().mockResolvedValue({
166
+ data: new Float32Array(384).fill(0.2),
167
+ dims: [1, 384]
168
+ });
169
+
170
+ mockPipeline.mockImplementation(async (task, model) => {
171
+ pipelineCallCount++;
172
+ if (pipelineCallCount <= 2) {
173
+ if (pipelineCallCount === 2) {
174
+ // Reload attempt fails
175
+ throw new Error('Network timeout');
176
+ }
177
+ return corruptExtractor;
178
+ }
179
+ // Third call is legacy model
180
+ return legacyExtractor;
181
+ });
182
+
183
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
184
+ const embedder = await createMRLEmbedder('nomic-ai/nomic-embed-text-v1.5', { dimension: 256 });
185
+
186
+ // This should trigger: corruption → reload fail → fallback to legacy
187
+ const result = await embedder('test text');
188
+
189
+ expect(result.data).toBeDefined();
190
+ // Dimension should update to legacy (384)
191
+ expect(embedder.dimension).toBe(384);
192
+ });
193
+
194
+ it('should use fallback for subsequent calls after switching', async () => {
195
+ let pipelineCallCount = 0;
196
+ let embedCallCount = 0;
197
+
198
+ const corruptExtractor = vi.fn().mockImplementation(async () => {
199
+ embedCallCount++;
200
+ throw new Error('Protobuf parsing failed');
201
+ });
202
+
203
+ const legacyExtractor = vi.fn().mockResolvedValue({
204
+ data: new Float32Array(384).fill(0.2),
205
+ dims: [1, 384]
206
+ });
207
+
208
+ mockPipeline.mockImplementation(async (task, model) => {
209
+ pipelineCallCount++;
210
+ if (model.includes('nomic')) {
211
+ if (pipelineCallCount >= 2) {
212
+ throw new Error('Model unavailable');
213
+ }
214
+ return corruptExtractor;
215
+ }
216
+ return legacyExtractor;
217
+ });
218
+
219
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
220
+ const embedder = await createMRLEmbedder('nomic-ai/nomic-embed-text-v1.5', { dimension: 256 });
221
+
222
+ // First call triggers fallback
223
+ await embedder('first');
224
+
225
+ // Subsequent calls should use legacy directly
226
+ const legacyCallsBefore = legacyExtractor.mock.calls.length;
227
+ await embedder('second');
228
+ await embedder('third');
229
+
230
+ expect(legacyExtractor.mock.calls.length).toBe(legacyCallsBefore + 2);
231
+ });
232
+ });
233
+
234
+ describe('Non-Corruption Errors', () => {
235
+ it('should throw non-corruption errors without recovery attempt', async () => {
236
+ const mockExtractor = vi.fn().mockRejectedValue(new Error('Out of memory'));
237
+ mockPipeline.mockResolvedValue(mockExtractor);
238
+
239
+ const { createMRLEmbedder } = await import('../lib/mrl-embedder.js');
240
+ const embedder = await createMRLEmbedder('test-model', { dimension: 256 });
241
+
242
+ await expect(embedder('test')).rejects.toThrow('Out of memory');
243
+
244
+ // Should not have attempted reload (only initial load)
245
+ expect(mockPipeline).toHaveBeenCalledTimes(1);
246
+ });
247
+ });
248
+ });