@ruvector/edge-net 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +281 -10
- package/core-invariants.js +942 -0
- package/models/adapter-hub.js +1008 -0
- package/models/adapter-security.js +792 -0
- package/models/benchmark.js +688 -0
- package/models/distribution.js +791 -0
- package/models/index.js +109 -0
- package/models/integrity.js +753 -0
- package/models/loader.js +725 -0
- package/models/microlora.js +1298 -0
- package/models/model-loader.js +922 -0
- package/models/model-optimizer.js +1245 -0
- package/models/model-registry.js +696 -0
- package/models/model-utils.js +548 -0
- package/models/models-cli.js +914 -0
- package/models/registry.json +214 -0
- package/models/training-utils.js +1418 -0
- package/models/wasm-core.js +1025 -0
- package/network-genesis.js +2847 -0
- package/onnx-worker.js +462 -8
- package/package.json +33 -3
- package/plugins/SECURITY-AUDIT.md +654 -0
- package/plugins/cli.js +43 -3
- package/plugins/implementations/e2e-encryption.js +57 -12
- package/plugins/plugin-loader.js +610 -21
- package/tests/model-optimizer.test.js +644 -0
- package/tests/network-genesis.test.js +562 -0
- package/tests/plugin-benchmark.js +1239 -0
- package/tests/plugin-system-test.js +163 -0
- package/tests/wasm-core.test.js +368 -0
|
@@ -0,0 +1,644 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @ruvector/edge-net Model Optimizer Tests
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive tests for model quantization and optimization
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { describe, it, before, after } from 'node:test';
|
|
8
|
+
import assert from 'node:assert';
|
|
9
|
+
import {
|
|
10
|
+
ModelOptimizer,
|
|
11
|
+
QuantizationEngine,
|
|
12
|
+
PruningEngine,
|
|
13
|
+
OnnxOptimizer,
|
|
14
|
+
DistillationEngine,
|
|
15
|
+
BenchmarkEngine,
|
|
16
|
+
TARGET_MODELS,
|
|
17
|
+
QUANTIZATION_CONFIGS,
|
|
18
|
+
PRUNING_STRATEGIES,
|
|
19
|
+
} from '../models/model-optimizer.js';
|
|
20
|
+
import {
|
|
21
|
+
ComprehensiveBenchmark,
|
|
22
|
+
AccuracyMeter,
|
|
23
|
+
LatencyProfiler,
|
|
24
|
+
MemoryProfiler,
|
|
25
|
+
BENCHMARK_PROFILES,
|
|
26
|
+
} from '../models/benchmark.js';
|
|
27
|
+
|
|
28
|
+
// ============================================
|
|
29
|
+
// MODEL OPTIMIZER TESTS
|
|
30
|
+
// ============================================
|
|
31
|
+
|
|
32
|
+
describe('ModelOptimizer', () => {
|
|
33
|
+
let optimizer;
|
|
34
|
+
|
|
35
|
+
before(() => {
|
|
36
|
+
optimizer = new ModelOptimizer();
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
describe('Configuration', () => {
|
|
40
|
+
it('should have all target models defined', () => {
|
|
41
|
+
const models = optimizer.getTargetModels();
|
|
42
|
+
assert.ok(models['phi-1.5'], 'phi-1.5 should be defined');
|
|
43
|
+
assert.ok(models['qwen-0.5b'], 'qwen-0.5b should be defined');
|
|
44
|
+
assert.ok(models['minilm-l6'], 'minilm-l6 should be defined');
|
|
45
|
+
assert.ok(models['e5-small'], 'e5-small should be defined');
|
|
46
|
+
assert.ok(models['bge-small'], 'bge-small should be defined');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('should have correct target sizes', () => {
|
|
50
|
+
const models = optimizer.getTargetModels();
|
|
51
|
+
assert.strictEqual(models['phi-1.5'].originalSize, 280);
|
|
52
|
+
assert.strictEqual(models['phi-1.5'].targetSize, 70);
|
|
53
|
+
assert.strictEqual(models['qwen-0.5b'].originalSize, 430);
|
|
54
|
+
assert.strictEqual(models['qwen-0.5b'].targetSize, 100);
|
|
55
|
+
assert.strictEqual(models['minilm-l6'].originalSize, 22);
|
|
56
|
+
assert.strictEqual(models['minilm-l6'].targetSize, 8);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('should get individual model config', () => {
|
|
60
|
+
const config = optimizer.getModelConfig('phi-1.5');
|
|
61
|
+
assert.ok(config, 'Should return config');
|
|
62
|
+
assert.strictEqual(config.type, 'generation');
|
|
63
|
+
assert.ok(config.capabilities.includes('code'), 'Should include code capability');
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('should return null for unknown model', () => {
|
|
67
|
+
const config = optimizer.getModelConfig('unknown-model');
|
|
68
|
+
assert.strictEqual(config, null);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
describe('Quantization', () => {
|
|
73
|
+
it('should quantize model to INT8', async () => {
|
|
74
|
+
const result = await optimizer.quantize('minilm-l6', 'int8');
|
|
75
|
+
|
|
76
|
+
assert.strictEqual(result.model, 'minilm-l6');
|
|
77
|
+
assert.strictEqual(result.method, 'int8');
|
|
78
|
+
assert.strictEqual(result.status, 'completed');
|
|
79
|
+
assert.strictEqual(result.compressionRatio, 4);
|
|
80
|
+
assert.ok(result.quantizedSizeMB < result.originalSizeMB);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('should quantize model to INT4', async () => {
|
|
84
|
+
const result = await optimizer.quantize('phi-1.5', 'int4');
|
|
85
|
+
|
|
86
|
+
assert.strictEqual(result.method, 'int4');
|
|
87
|
+
assert.strictEqual(result.compressionRatio, 8);
|
|
88
|
+
assert.ok(result.quantizedSizeMB <= 35, 'INT4 should compress to ~35MB for phi-1.5');
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it('should quantize model to FP16', async () => {
|
|
92
|
+
const result = await optimizer.quantize('e5-small', 'fp16');
|
|
93
|
+
|
|
94
|
+
assert.strictEqual(result.method, 'fp16');
|
|
95
|
+
assert.strictEqual(result.compressionRatio, 2);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('should throw for unknown model', async () => {
|
|
99
|
+
await assert.rejects(
|
|
100
|
+
() => optimizer.quantize('unknown-model', 'int8'),
|
|
101
|
+
/Unknown model/
|
|
102
|
+
);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('should throw for unknown quantization method', async () => {
|
|
106
|
+
await assert.rejects(
|
|
107
|
+
() => optimizer.quantize('minilm-l6', 'int2'),
|
|
108
|
+
/Unknown quantization method/
|
|
109
|
+
);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('should emit events during quantization', async () => {
|
|
113
|
+
let startEmitted = false;
|
|
114
|
+
let completeEmitted = false;
|
|
115
|
+
|
|
116
|
+
optimizer.on('quantize:start', () => { startEmitted = true; });
|
|
117
|
+
optimizer.on('quantize:complete', () => { completeEmitted = true; });
|
|
118
|
+
|
|
119
|
+
await optimizer.quantize('bge-small', 'int8');
|
|
120
|
+
|
|
121
|
+
assert.ok(startEmitted, 'Should emit start event');
|
|
122
|
+
assert.ok(completeEmitted, 'Should emit complete event');
|
|
123
|
+
|
|
124
|
+
optimizer.removeAllListeners();
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
describe('Pruning', () => {
|
|
129
|
+
it('should prune model with default settings', async () => {
|
|
130
|
+
const result = await optimizer.prune('minilm-l6');
|
|
131
|
+
|
|
132
|
+
assert.strictEqual(result.model, 'minilm-l6');
|
|
133
|
+
assert.strictEqual(result.status, 'completed');
|
|
134
|
+
assert.ok(result.achievedSparsity > 0, 'Should have achieved sparsity');
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it('should prune model with custom sparsity', async () => {
|
|
138
|
+
const result = await optimizer.prune('phi-1.5', { sparsity: 0.7 });
|
|
139
|
+
|
|
140
|
+
assert.strictEqual(result.targetSparsity, 0.7);
|
|
141
|
+
assert.ok(result.layerResults.length > 0, 'Should have layer results');
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('should prune attention heads when requested', async () => {
|
|
145
|
+
const result = await optimizer.prune('minilm-l6', {
|
|
146
|
+
sparsity: 0.5,
|
|
147
|
+
pruneHeads: true,
|
|
148
|
+
headPruneFraction: 0.25,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
assert.ok(result.headPruning, 'Should have head pruning results');
|
|
152
|
+
assert.ok(result.headPruning.prunedHeads > 0, 'Should prune some heads');
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('should support different sparsity schedules', async () => {
|
|
156
|
+
const uniformResult = await optimizer.prune('e5-small', {
|
|
157
|
+
sparsity: 0.5,
|
|
158
|
+
sparsitySchedule: 'uniform',
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const cubicResult = await optimizer.prune('e5-small', {
|
|
162
|
+
sparsity: 0.5,
|
|
163
|
+
sparsitySchedule: 'cubic',
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// Cubic should have varying sparsity across layers
|
|
167
|
+
const uniformSparsities = uniformResult.layerResults.map(l => l.sparsity);
|
|
168
|
+
const cubicSparsities = cubicResult.layerResults.map(l => l.sparsity);
|
|
169
|
+
|
|
170
|
+
// All uniform should be equal
|
|
171
|
+
const uniformUnique = new Set(uniformSparsities);
|
|
172
|
+
assert.strictEqual(uniformUnique.size, 1, 'Uniform should have equal sparsity');
|
|
173
|
+
|
|
174
|
+
// Cubic should have varying values
|
|
175
|
+
const cubicUnique = new Set(cubicSparsities);
|
|
176
|
+
assert.ok(cubicUnique.size > 1, 'Cubic should have varying sparsity');
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
describe('Knowledge Distillation', () => {
|
|
181
|
+
it('should setup distillation configuration', () => {
|
|
182
|
+
const config = optimizer.setupDistillation('phi-1.5', 'minilm-l6', {
|
|
183
|
+
temperature: 6.0,
|
|
184
|
+
alpha: 0.7,
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
assert.strictEqual(config.teacher, 'phi-1.5');
|
|
188
|
+
assert.strictEqual(config.student, 'minilm-l6');
|
|
189
|
+
assert.strictEqual(config.temperature, 6.0);
|
|
190
|
+
assert.strictEqual(config.alpha, 0.7);
|
|
191
|
+
assert.ok(config.trainingConfig, 'Should have training config');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('should throw for invalid models', () => {
|
|
195
|
+
assert.throws(
|
|
196
|
+
() => optimizer.setupDistillation('unknown', 'minilm-l6'),
|
|
197
|
+
/must be valid/
|
|
198
|
+
);
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
describe('ONNX Optimization', () => {
|
|
203
|
+
it('should apply ONNX optimization passes', async () => {
|
|
204
|
+
const result = await optimizer.optimizeOnnx('minilm-l6');
|
|
205
|
+
|
|
206
|
+
assert.strictEqual(result.model, 'minilm-l6');
|
|
207
|
+
assert.ok(result.passes.length > 0, 'Should apply passes');
|
|
208
|
+
assert.ok(result.optimizedGraph, 'Should have optimized graph');
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
describe('Export', () => {
|
|
213
|
+
it('should export optimized model', async () => {
|
|
214
|
+
// First quantize
|
|
215
|
+
await optimizer.quantize('minilm-l6', 'int8');
|
|
216
|
+
|
|
217
|
+
// Then export
|
|
218
|
+
const result = await optimizer.export('minilm-l6', 'onnx');
|
|
219
|
+
|
|
220
|
+
assert.ok(result.path, 'Should have export path');
|
|
221
|
+
assert.ok(result.optimization, 'Should reference optimization');
|
|
222
|
+
assert.strictEqual(result.format, 'onnx');
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it('should indicate if target is met', async () => {
|
|
226
|
+
await optimizer.quantize('minilm-l6', 'int8');
|
|
227
|
+
const result = await optimizer.export('minilm-l6', 'onnx');
|
|
228
|
+
|
|
229
|
+
// 22MB / 4 = 5.5MB, target is 8MB, should meet target
|
|
230
|
+
assert.strictEqual(result.meetsTarget, true);
|
|
231
|
+
});
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
describe('Full Pipeline', () => {
|
|
235
|
+
it('should run full optimization pipeline', async () => {
|
|
236
|
+
const result = await optimizer.optimizePipeline('minilm-l6', {
|
|
237
|
+
quantizeMethod: 'int8',
|
|
238
|
+
prune: true,
|
|
239
|
+
sparsity: 0.3,
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
assert.strictEqual(result.model, 'minilm-l6');
|
|
243
|
+
assert.ok(result.steps.length >= 3, 'Should have multiple steps');
|
|
244
|
+
assert.ok(result.meetsTarget, 'MiniLM-L6 should meet target with INT8');
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it('should run pipeline without pruning', async () => {
|
|
248
|
+
const result = await optimizer.optimizePipeline('e5-small', {
|
|
249
|
+
quantizeMethod: 'int4',
|
|
250
|
+
prune: false,
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
const hasPruning = result.steps.some(s => s.step === 'prune');
|
|
254
|
+
assert.strictEqual(hasPruning, false, 'Should not have pruning step');
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
describe('Statistics', () => {
|
|
259
|
+
it('should track statistics', async () => {
|
|
260
|
+
const freshOptimizer = new ModelOptimizer();
|
|
261
|
+
|
|
262
|
+
await freshOptimizer.quantize('minilm-l6', 'int8');
|
|
263
|
+
await freshOptimizer.prune('minilm-l6');
|
|
264
|
+
await freshOptimizer.export('minilm-l6', 'onnx');
|
|
265
|
+
|
|
266
|
+
const stats = freshOptimizer.getStats();
|
|
267
|
+
|
|
268
|
+
assert.strictEqual(stats.quantizations, 1);
|
|
269
|
+
assert.strictEqual(stats.prunings, 1);
|
|
270
|
+
assert.strictEqual(stats.exports, 1);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
it('should list models with optimization status', async () => {
|
|
274
|
+
const freshOptimizer = new ModelOptimizer();
|
|
275
|
+
await freshOptimizer.quantize('phi-1.5', 'int4');
|
|
276
|
+
|
|
277
|
+
const models = freshOptimizer.listModels();
|
|
278
|
+
|
|
279
|
+
const phi = models.find(m => m.key === 'phi-1.5');
|
|
280
|
+
assert.ok(phi.optimized, 'phi-1.5 should be marked as optimized');
|
|
281
|
+
|
|
282
|
+
const qwen = models.find(m => m.key === 'qwen-0.5b');
|
|
283
|
+
assert.strictEqual(qwen.optimized, false, 'qwen-0.5b should not be optimized');
|
|
284
|
+
});
|
|
285
|
+
});
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// ============================================
|
|
289
|
+
// QUANTIZATION ENGINE TESTS
|
|
290
|
+
// ============================================
|
|
291
|
+
|
|
292
|
+
describe('QuantizationEngine', () => {
|
|
293
|
+
let engine;
|
|
294
|
+
|
|
295
|
+
before(() => {
|
|
296
|
+
engine = new QuantizationEngine();
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
describe('Tensor Quantization', () => {
|
|
300
|
+
it('should quantize tensor to INT8', () => {
|
|
301
|
+
const tensor = [0.5, -0.3, 0.8, -0.9, 0.1];
|
|
302
|
+
const result = engine.quantizeTensor(tensor, { bits: 8, symmetric: false });
|
|
303
|
+
|
|
304
|
+
// Non-symmetric uses Uint8Array (0-255 range), symmetric uses Int8Array
|
|
305
|
+
assert.ok(result.data instanceof Uint8Array, 'Non-symmetric should return Uint8Array');
|
|
306
|
+
assert.strictEqual(result.originalLength, tensor.length);
|
|
307
|
+
assert.ok(result.params.scale > 0, 'Should have positive scale');
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
it('should compute correct quantization parameters', () => {
|
|
311
|
+
const tensor = [-1, 0, 1];
|
|
312
|
+
const params = engine.computeQuantParams(tensor, { bits: 8, symmetric: true });
|
|
313
|
+
|
|
314
|
+
assert.strictEqual(params.min, -1);
|
|
315
|
+
assert.strictEqual(params.max, 1);
|
|
316
|
+
assert.strictEqual(params.zeroPoint, 0, 'Symmetric should have zero point = 0');
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
it('should dequantize tensor correctly', () => {
|
|
320
|
+
const original = [0.5, -0.3, 0.8];
|
|
321
|
+
const quantized = engine.quantizeTensor(original, { bits: 8, symmetric: false });
|
|
322
|
+
const dequantized = engine.dequantizeTensor(quantized, quantized.params);
|
|
323
|
+
|
|
324
|
+
// Check reconstruction error is small
|
|
325
|
+
for (let i = 0; i < original.length; i++) {
|
|
326
|
+
const error = Math.abs(original[i] - dequantized[i]);
|
|
327
|
+
assert.ok(error < 0.1, `Reconstruction error should be small: ${error}`);
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
describe('INT4 Block Quantization', () => {
|
|
333
|
+
it('should quantize to INT4 blocks', () => {
|
|
334
|
+
const tensor = new Float32Array(64).map(() => Math.random() - 0.5);
|
|
335
|
+
const result = engine.quantizeInt4Block(tensor, 32);
|
|
336
|
+
|
|
337
|
+
assert.ok(result.data instanceof Uint8Array, 'Should return Uint8Array');
|
|
338
|
+
assert.ok(result.scales instanceof Float32Array, 'Should have scales');
|
|
339
|
+
assert.strictEqual(result.scales.length, 2, 'Should have 2 blocks for 64 elements');
|
|
340
|
+
assert.ok(result.compressionRatio > 1, 'Should have compression');
|
|
341
|
+
});
|
|
342
|
+
});
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
// ============================================
|
|
346
|
+
// PRUNING ENGINE TESTS
|
|
347
|
+
// ============================================
|
|
348
|
+
|
|
349
|
+
describe('PruningEngine', () => {
|
|
350
|
+
let engine;
|
|
351
|
+
|
|
352
|
+
before(() => {
|
|
353
|
+
engine = new PruningEngine();
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
describe('Magnitude Pruning', () => {
|
|
357
|
+
it('should prune smallest magnitude weights', () => {
|
|
358
|
+
const tensor = [0.9, 0.1, -0.8, -0.05, 0.7];
|
|
359
|
+
const result = engine.magnitudePrune(tensor, 0.4); // Prune 40%
|
|
360
|
+
|
|
361
|
+
assert.strictEqual(result.prunedCount, 2); // 40% of 5 = 2
|
|
362
|
+
assert.ok(result.mask[1] === 0 || result.mask[3] === 0, 'Small values should be pruned');
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
it('should preserve specified sparsity', () => {
|
|
366
|
+
const tensor = new Float32Array(1000).map(() => Math.random());
|
|
367
|
+
const sparsity = 0.5;
|
|
368
|
+
const result = engine.magnitudePrune(tensor, sparsity);
|
|
369
|
+
|
|
370
|
+
const actualSparsity = result.prunedCount / tensor.length;
|
|
371
|
+
assert.ok(Math.abs(actualSparsity - sparsity) < 0.01, 'Should achieve target sparsity');
|
|
372
|
+
});
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
describe('Structured Pruning', () => {
|
|
376
|
+
it('should prune attention heads', () => {
|
|
377
|
+
const numHeads = 12;
|
|
378
|
+
const headDim = 64;
|
|
379
|
+
const weights = new Float32Array(numHeads * headDim).map(() => Math.random());
|
|
380
|
+
|
|
381
|
+
const result = engine.structuredPruneHeads(weights, numHeads, 0.25);
|
|
382
|
+
|
|
383
|
+
assert.strictEqual(result.prunedHeads, 3, 'Should prune 25% of heads (3)');
|
|
384
|
+
assert.strictEqual(result.remainingHeads.length, 9);
|
|
385
|
+
assert.strictEqual(result.data.length, 9 * headDim);
|
|
386
|
+
});
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
describe('Layer-wise Sparsity', () => {
|
|
390
|
+
it('should compute uniform sparsity', () => {
|
|
391
|
+
const sparsity = engine.computeLayerSparsity(5, 12, 0.5, 'uniform');
|
|
392
|
+
assert.strictEqual(sparsity, 0.5);
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
it('should compute cubic sparsity', () => {
|
|
396
|
+
const early = engine.computeLayerSparsity(2, 12, 0.5, 'cubic');
|
|
397
|
+
const late = engine.computeLayerSparsity(10, 12, 0.5, 'cubic');
|
|
398
|
+
|
|
399
|
+
assert.ok(late > early, 'Later layers should have higher sparsity in cubic');
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
it('should preserve first and last layers', () => {
|
|
403
|
+
const first = engine.computeLayerSparsity(0, 12, 0.5, 'first-last-preserved');
|
|
404
|
+
const last = engine.computeLayerSparsity(11, 12, 0.5, 'first-last-preserved');
|
|
405
|
+
const middle = engine.computeLayerSparsity(6, 12, 0.5, 'first-last-preserved');
|
|
406
|
+
|
|
407
|
+
assert.ok(first < middle, 'First layer should have lower sparsity');
|
|
408
|
+
assert.ok(last < middle, 'Last layer should have lower sparsity');
|
|
409
|
+
});
|
|
410
|
+
});
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
// ============================================
|
|
414
|
+
// ONNX OPTIMIZER TESTS
|
|
415
|
+
// ============================================
|
|
416
|
+
|
|
417
|
+
describe('OnnxOptimizer', () => {
|
|
418
|
+
let optimizer;
|
|
419
|
+
|
|
420
|
+
before(() => {
|
|
421
|
+
optimizer = new OnnxOptimizer();
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
it('should list available passes', () => {
|
|
425
|
+
const passes = optimizer.getAvailablePasses();
|
|
426
|
+
|
|
427
|
+
assert.ok(passes.includes('constant-folding'));
|
|
428
|
+
assert.ok(passes.includes('fuse-attention'));
|
|
429
|
+
assert.ok(passes.includes('memory-optimization'));
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
it('should apply all optimization passes', () => {
|
|
433
|
+
const graph = {
|
|
434
|
+
nodes: new Array(50).fill(null),
|
|
435
|
+
attentionHeads: 12,
|
|
436
|
+
};
|
|
437
|
+
|
|
438
|
+
const result = optimizer.applyAllPasses(graph);
|
|
439
|
+
|
|
440
|
+
assert.ok(result.passes.length > 0, 'Should apply passes');
|
|
441
|
+
assert.ok(result.graph.constantsFolded, 'Should fold constants');
|
|
442
|
+
assert.ok(result.graph.attentionFused, 'Should fuse attention');
|
|
443
|
+
});
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
// ============================================
|
|
447
|
+
// DISTILLATION ENGINE TESTS
|
|
448
|
+
// ============================================
|
|
449
|
+
|
|
450
|
+
describe('DistillationEngine', () => {
|
|
451
|
+
let engine;
|
|
452
|
+
|
|
453
|
+
before(() => {
|
|
454
|
+
engine = new DistillationEngine();
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
it('should configure distillation', () => {
|
|
458
|
+
const config = engine.configure({
|
|
459
|
+
teacher: 'phi-1.5',
|
|
460
|
+
student: 'minilm-l6',
|
|
461
|
+
temperature: 4.0,
|
|
462
|
+
alpha: 0.5,
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
assert.strictEqual(config.temperature, 4.0);
|
|
466
|
+
assert.strictEqual(config.alpha, 0.5);
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
it('should compute distillation loss', () => {
|
|
470
|
+
engine.configure({ temperature: 4.0, alpha: 0.5 });
|
|
471
|
+
|
|
472
|
+
const teacherLogits = [2.0, 1.0, 0.5, 0.1];
|
|
473
|
+
const studentLogits = [1.8, 1.1, 0.4, 0.2];
|
|
474
|
+
const labels = [1, 0, 0, 0];
|
|
475
|
+
|
|
476
|
+
const loss = engine.computeLoss(teacherLogits, studentLogits, labels);
|
|
477
|
+
|
|
478
|
+
assert.ok(typeof loss.total === 'number', 'Should compute total loss');
|
|
479
|
+
assert.ok(loss.total >= 0, 'Loss should be non-negative');
|
|
480
|
+
assert.ok(typeof loss.distillation === 'number', 'Should have distillation loss');
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
it('should get training configuration', () => {
|
|
484
|
+
const config = engine.getTrainingConfig();
|
|
485
|
+
|
|
486
|
+
assert.ok(config.epochs > 0, 'Should have epochs');
|
|
487
|
+
assert.ok(config.learningRate > 0, 'Should have learning rate');
|
|
488
|
+
assert.ok(config.batchSize > 0, 'Should have batch size');
|
|
489
|
+
});
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
// ============================================
|
|
493
|
+
// BENCHMARK TESTS
|
|
494
|
+
// ============================================
|
|
495
|
+
|
|
496
|
+
describe('ComprehensiveBenchmark', () => {
|
|
497
|
+
let benchmark;
|
|
498
|
+
|
|
499
|
+
before(() => {
|
|
500
|
+
benchmark = new ComprehensiveBenchmark();
|
|
501
|
+
});
|
|
502
|
+
|
|
503
|
+
after(() => {
|
|
504
|
+
benchmark.reset();
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
describe('Accuracy Meter', () => {
|
|
508
|
+
it('should compute MSE', () => {
|
|
509
|
+
const meter = new AccuracyMeter();
|
|
510
|
+
meter.addPrediction([1, 2, 3], [1.1, 2.1, 3.1]);
|
|
511
|
+
meter.addPrediction([4, 5, 6], [4.1, 5.1, 6.1]);
|
|
512
|
+
|
|
513
|
+
const metrics = meter.getMetrics();
|
|
514
|
+
|
|
515
|
+
assert.ok(metrics.mse > 0, 'MSE should be positive');
|
|
516
|
+
assert.ok(metrics.mse < 0.1, 'MSE should be small for close values');
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
it('should compute cosine similarity', () => {
|
|
520
|
+
const meter = new AccuracyMeter();
|
|
521
|
+
meter.addPrediction([1, 0, 0], [1, 0, 0]); // Identical
|
|
522
|
+
meter.addPrediction([0, 1, 0], [0, 1, 0]); // Identical
|
|
523
|
+
|
|
524
|
+
const metrics = meter.getMetrics();
|
|
525
|
+
|
|
526
|
+
assert.ok(metrics.cosineSimilarity > 0.99, 'Should have high similarity');
|
|
527
|
+
});
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
describe('Latency Profiler', () => {
|
|
531
|
+
it('should measure latency', async () => {
|
|
532
|
+
const profiler = new LatencyProfiler();
|
|
533
|
+
|
|
534
|
+
profiler.start('test-section');
|
|
535
|
+
await new Promise(resolve => setTimeout(resolve, 10));
|
|
536
|
+
profiler.end('test-section');
|
|
537
|
+
|
|
538
|
+
const stats = profiler.getStats('test-section');
|
|
539
|
+
|
|
540
|
+
assert.ok(stats.mean >= 10, 'Should measure at least 10ms');
|
|
541
|
+
assert.strictEqual(stats.count, 1);
|
|
542
|
+
});
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
describe('Memory Profiler', () => {
|
|
546
|
+
it('should take snapshots', () => {
|
|
547
|
+
const profiler = new MemoryProfiler();
|
|
548
|
+
|
|
549
|
+
const snapshot = profiler.snapshot('test');
|
|
550
|
+
|
|
551
|
+
assert.ok(snapshot.label === 'test');
|
|
552
|
+
assert.ok(typeof snapshot.heapUsed === 'number');
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
it('should track peak memory', () => {
|
|
556
|
+
const profiler = new MemoryProfiler();
|
|
557
|
+
|
|
558
|
+
profiler.snapshot('before');
|
|
559
|
+
// Allocate some memory
|
|
560
|
+
const arr = new Array(1000000).fill(0);
|
|
561
|
+
profiler.snapshot('after');
|
|
562
|
+
|
|
563
|
+
const summary = profiler.getSummary();
|
|
564
|
+
assert.ok(summary.peakMemoryMB > 0);
|
|
565
|
+
});
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
describe('Benchmark Suite', () => {
|
|
569
|
+
it('should run quick benchmark', async () => {
|
|
570
|
+
const result = await benchmark.runSuite('minilm-l6', 'quick');
|
|
571
|
+
|
|
572
|
+
assert.strictEqual(result.model, 'minilm-l6');
|
|
573
|
+
assert.ok(result.benchmarks.length > 0);
|
|
574
|
+
assert.ok(result.summary, 'Should have summary');
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
it('should generate recommendations', async () => {
|
|
578
|
+
const result = await benchmark.runSuite('e5-small', 'quick');
|
|
579
|
+
|
|
580
|
+
const firstBenchmark = result.benchmarks[0];
|
|
581
|
+
assert.ok(firstBenchmark.recommendation, 'Should have recommendation');
|
|
582
|
+
assert.ok(typeof firstBenchmark.recommendation.score === 'number');
|
|
583
|
+
});
|
|
584
|
+
});
|
|
585
|
+
});
|
|
586
|
+
|
|
587
|
+
// ============================================
|
|
588
|
+
// INTEGRATION TESTS
|
|
589
|
+
// ============================================
|
|
590
|
+
|
|
591
|
+
describe('Integration', () => {
|
|
592
|
+
it('should optimize phi-1.5 to meet target size', async () => {
|
|
593
|
+
const optimizer = new ModelOptimizer();
|
|
594
|
+
|
|
595
|
+
// Quantize to INT4 for maximum compression
|
|
596
|
+
const quantResult = await optimizer.quantize('phi-1.5', 'int4');
|
|
597
|
+
|
|
598
|
+
// 280MB / 8 = 35MB, target is 70MB
|
|
599
|
+
assert.ok(quantResult.quantizedSizeMB <= 70, 'Should meet target size');
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
it('should optimize qwen-0.5b to meet target size', async () => {
|
|
603
|
+
const optimizer = new ModelOptimizer();
|
|
604
|
+
|
|
605
|
+
const quantResult = await optimizer.quantize('qwen-0.5b', 'int4');
|
|
606
|
+
|
|
607
|
+
// 430MB / 8 = ~54MB, target is 100MB
|
|
608
|
+
assert.ok(quantResult.quantizedSizeMB <= 100, 'Should meet target size');
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
it('should optimize minilm-l6 to meet target size', async () => {
|
|
612
|
+
const optimizer = new ModelOptimizer();
|
|
613
|
+
|
|
614
|
+
const quantResult = await optimizer.quantize('minilm-l6', 'int8');
|
|
615
|
+
|
|
616
|
+
// 22MB / 4 = 5.5MB, target is 8MB
|
|
617
|
+
assert.ok(quantResult.quantizedSizeMB <= 8, 'Should meet target size');
|
|
618
|
+
});
|
|
619
|
+
|
|
620
|
+
it('should run complete optimization workflow', async () => {
|
|
621
|
+
const optimizer = new ModelOptimizer();
|
|
622
|
+
|
|
623
|
+
// Full pipeline
|
|
624
|
+
const result = await optimizer.optimizePipeline('bge-small', {
|
|
625
|
+
quantizeMethod: 'int8',
|
|
626
|
+
prune: true,
|
|
627
|
+
sparsity: 0.3,
|
|
628
|
+
benchmark: true,
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
assert.ok(result.meetsTarget, 'Should meet target');
|
|
632
|
+
assert.ok(result.steps.length >= 4, 'Should complete all steps');
|
|
633
|
+
|
|
634
|
+
// Verify all steps completed
|
|
635
|
+
const stepNames = result.steps.map(s => s.step);
|
|
636
|
+
assert.ok(stepNames.includes('quantize'));
|
|
637
|
+
assert.ok(stepNames.includes('prune'));
|
|
638
|
+
assert.ok(stepNames.includes('onnx-optimize'));
|
|
639
|
+
assert.ok(stepNames.includes('export'));
|
|
640
|
+
assert.ok(stepNames.includes('benchmark'));
|
|
641
|
+
});
|
|
642
|
+
});
|
|
643
|
+
|
|
644
|
+
console.log('Model Optimizer Tests');
|