@ruvector/edge-net 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,688 @@
1
+ /**
2
+ * @ruvector/edge-net Benchmark Utilities
3
+ *
4
+ * Comprehensive benchmarking for model optimization
5
+ *
6
+ * @module @ruvector/edge-net/models/benchmark
7
+ */
8
+
9
+ import { EventEmitter } from 'events';
10
+ import { ModelOptimizer, TARGET_MODELS, QUANTIZATION_CONFIGS } from './model-optimizer.js';
11
+
12
+ // ============================================
13
+ // BENCHMARK CONFIGURATION
14
+ // ============================================
15
+
16
+ /**
17
+ * Benchmark profiles for different scenarios
18
+ */
19
+ export const BENCHMARK_PROFILES = {
20
+ 'quick': {
21
+ iterations: 50,
22
+ warmupIterations: 5,
23
+ inputSizes: [[1, 128]],
24
+ quantMethods: ['int8'],
25
+ },
26
+ 'standard': {
27
+ iterations: 100,
28
+ warmupIterations: 10,
29
+ inputSizes: [[1, 128], [1, 512], [4, 256]],
30
+ quantMethods: ['int8', 'int4', 'fp16'],
31
+ },
32
+ 'comprehensive': {
33
+ iterations: 500,
34
+ warmupIterations: 50,
35
+ inputSizes: [[1, 64], [1, 128], [1, 256], [1, 512], [1, 1024], [4, 256], [8, 128]],
36
+ quantMethods: ['int8', 'int4', 'fp16', 'int8-fp16-mixed'],
37
+ },
38
+ 'edge-device': {
39
+ iterations: 100,
40
+ warmupIterations: 10,
41
+ inputSizes: [[1, 128], [1, 256]],
42
+ quantMethods: ['int4'],
43
+ memoryLimit: 512, // MB
44
+ },
45
+ 'accuracy-focus': {
46
+ iterations: 200,
47
+ warmupIterations: 20,
48
+ inputSizes: [[1, 512]],
49
+ quantMethods: ['fp16', 'int8'],
50
+ measureAccuracy: true,
51
+ },
52
+ };
53
+
54
+ // ============================================
55
+ // ACCURACY MEASUREMENT
56
+ // ============================================
57
+
58
+ /**
59
+ * Accuracy metrics for quantized models
60
+ */
61
+ export class AccuracyMeter {
62
+ constructor() {
63
+ this.predictions = [];
64
+ this.groundTruth = [];
65
+ this.originalOutputs = [];
66
+ this.quantizedOutputs = [];
67
+ }
68
+
69
+ /**
70
+ * Add prediction pair for accuracy measurement
71
+ */
72
+ addPrediction(original, quantized, groundTruth = null) {
73
+ this.originalOutputs.push(original);
74
+ this.quantizedOutputs.push(quantized);
75
+ if (groundTruth !== null) {
76
+ this.groundTruth.push(groundTruth);
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Compute Mean Squared Error
82
+ */
83
+ computeMSE() {
84
+ if (this.originalOutputs.length === 0) return 0;
85
+
86
+ let totalMSE = 0;
87
+ let count = 0;
88
+
89
+ for (let i = 0; i < this.originalOutputs.length; i++) {
90
+ const orig = this.originalOutputs[i];
91
+ const quant = this.quantizedOutputs[i];
92
+
93
+ let mse = 0;
94
+ const len = Math.min(orig.length, quant.length);
95
+ for (let j = 0; j < len; j++) {
96
+ const diff = orig[j] - quant[j];
97
+ mse += diff * diff;
98
+ }
99
+ totalMSE += mse / len;
100
+ count++;
101
+ }
102
+
103
+ return totalMSE / count;
104
+ }
105
+
106
+ /**
107
+ * Compute cosine similarity between original and quantized
108
+ */
109
+ computeCosineSimilarity() {
110
+ if (this.originalOutputs.length === 0) return 1.0;
111
+
112
+ let totalSim = 0;
113
+
114
+ for (let i = 0; i < this.originalOutputs.length; i++) {
115
+ const orig = this.originalOutputs[i];
116
+ const quant = this.quantizedOutputs[i];
117
+
118
+ let dot = 0, normA = 0, normB = 0;
119
+ const len = Math.min(orig.length, quant.length);
120
+
121
+ for (let j = 0; j < len; j++) {
122
+ dot += orig[j] * quant[j];
123
+ normA += orig[j] * orig[j];
124
+ normB += quant[j] * quant[j];
125
+ }
126
+
127
+ totalSim += dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
128
+ }
129
+
130
+ return totalSim / this.originalOutputs.length;
131
+ }
132
+
133
+ /**
134
+ * Compute max absolute error
135
+ */
136
+ computeMaxError() {
137
+ let maxError = 0;
138
+
139
+ for (let i = 0; i < this.originalOutputs.length; i++) {
140
+ const orig = this.originalOutputs[i];
141
+ const quant = this.quantizedOutputs[i];
142
+ const len = Math.min(orig.length, quant.length);
143
+
144
+ for (let j = 0; j < len; j++) {
145
+ maxError = Math.max(maxError, Math.abs(orig[j] - quant[j]));
146
+ }
147
+ }
148
+
149
+ return maxError;
150
+ }
151
+
152
+ /**
153
+ * Get comprehensive accuracy metrics
154
+ */
155
+ getMetrics() {
156
+ const mse = this.computeMSE();
157
+
158
+ return {
159
+ mse,
160
+ rmse: Math.sqrt(mse),
161
+ cosineSimilarity: this.computeCosineSimilarity(),
162
+ maxError: this.computeMaxError(),
163
+ samples: this.originalOutputs.length,
164
+ accuracyRetained: this.computeCosineSimilarity() * 100,
165
+ };
166
+ }
167
+
168
+ /**
169
+ * Reset meter
170
+ */
171
+ reset() {
172
+ this.predictions = [];
173
+ this.groundTruth = [];
174
+ this.originalOutputs = [];
175
+ this.quantizedOutputs = [];
176
+ }
177
+ }
178
+
179
+ // ============================================
180
+ // LATENCY PROFILER
181
+ // ============================================
182
+
183
+ /**
184
+ * Detailed latency profiling
185
+ */
186
+ export class LatencyProfiler {
187
+ constructor() {
188
+ this.measurements = new Map();
189
+ }
190
+
191
+ /**
192
+ * Start timing a section
193
+ */
194
+ start(label) {
195
+ if (!this.measurements.has(label)) {
196
+ this.measurements.set(label, {
197
+ samples: [],
198
+ running: null,
199
+ });
200
+ }
201
+ this.measurements.get(label).running = performance.now();
202
+ }
203
+
204
+ /**
205
+ * End timing a section
206
+ */
207
+ end(label) {
208
+ const entry = this.measurements.get(label);
209
+ if (entry && entry.running !== null) {
210
+ const duration = performance.now() - entry.running;
211
+ entry.samples.push(duration);
212
+ entry.running = null;
213
+ return duration;
214
+ }
215
+ return 0;
216
+ }
217
+
218
+ /**
219
+ * Get statistics for a label
220
+ */
221
+ getStats(label) {
222
+ const entry = this.measurements.get(label);
223
+ if (!entry || entry.samples.length === 0) {
224
+ return null;
225
+ }
226
+
227
+ const samples = [...entry.samples].sort((a, b) => a - b);
228
+ const sum = samples.reduce((a, b) => a + b, 0);
229
+
230
+ return {
231
+ label,
232
+ count: samples.length,
233
+ mean: sum / samples.length,
234
+ median: samples[Math.floor(samples.length / 2)],
235
+ min: samples[0],
236
+ max: samples[samples.length - 1],
237
+ p95: samples[Math.floor(samples.length * 0.95)],
238
+ p99: samples[Math.floor(samples.length * 0.99)],
239
+ std: Math.sqrt(samples.reduce((acc, v) => acc + Math.pow(v - sum / samples.length, 2), 0) / samples.length),
240
+ };
241
+ }
242
+
243
+ /**
244
+ * Get all statistics
245
+ */
246
+ getAllStats() {
247
+ const stats = {};
248
+ for (const label of this.measurements.keys()) {
249
+ stats[label] = this.getStats(label);
250
+ }
251
+ return stats;
252
+ }
253
+
254
+ /**
255
+ * Reset profiler
256
+ */
257
+ reset() {
258
+ this.measurements.clear();
259
+ }
260
+ }
261
+
262
+ // ============================================
263
+ // MEMORY PROFILER
264
+ // ============================================
265
+
266
+ /**
267
+ * Memory usage profiler
268
+ */
269
+ export class MemoryProfiler {
270
+ constructor() {
271
+ this.snapshots = [];
272
+ this.peakMemory = 0;
273
+ }
274
+
275
+ /**
276
+ * Take memory snapshot
277
+ */
278
+ snapshot(label = 'snapshot') {
279
+ const memUsage = this.getMemoryUsage();
280
+ const snapshot = {
281
+ label,
282
+ timestamp: Date.now(),
283
+ ...memUsage,
284
+ };
285
+
286
+ this.snapshots.push(snapshot);
287
+ this.peakMemory = Math.max(this.peakMemory, memUsage.heapUsed);
288
+
289
+ return snapshot;
290
+ }
291
+
292
+ /**
293
+ * Get current memory usage
294
+ */
295
+ getMemoryUsage() {
296
+ if (typeof process !== 'undefined' && process.memoryUsage) {
297
+ const usage = process.memoryUsage();
298
+ return {
299
+ heapUsed: usage.heapUsed / (1024 * 1024),
300
+ heapTotal: usage.heapTotal / (1024 * 1024),
301
+ external: usage.external / (1024 * 1024),
302
+ rss: usage.rss / (1024 * 1024),
303
+ };
304
+ }
305
+
306
+ // Browser fallback
307
+ if (typeof performance !== 'undefined' && performance.memory) {
308
+ return {
309
+ heapUsed: performance.memory.usedJSHeapSize / (1024 * 1024),
310
+ heapTotal: performance.memory.totalJSHeapSize / (1024 * 1024),
311
+ external: 0,
312
+ rss: 0,
313
+ };
314
+ }
315
+
316
+ return { heapUsed: 0, heapTotal: 0, external: 0, rss: 0 };
317
+ }
318
+
319
+ /**
320
+ * Get memory delta between two snapshots
321
+ */
322
+ getDelta(startLabel, endLabel) {
323
+ const start = this.snapshots.find(s => s.label === startLabel);
324
+ const end = this.snapshots.find(s => s.label === endLabel);
325
+
326
+ if (!start || !end) return null;
327
+
328
+ return {
329
+ heapDelta: end.heapUsed - start.heapUsed,
330
+ timeDelta: end.timestamp - start.timestamp,
331
+ };
332
+ }
333
+
334
+ /**
335
+ * Get profiler summary
336
+ */
337
+ getSummary() {
338
+ return {
339
+ snapshots: this.snapshots.length,
340
+ peakMemoryMB: this.peakMemory,
341
+ currentMemoryMB: this.getMemoryUsage().heapUsed,
342
+ history: this.snapshots,
343
+ };
344
+ }
345
+
346
+ /**
347
+ * Reset profiler
348
+ */
349
+ reset() {
350
+ this.snapshots = [];
351
+ this.peakMemory = 0;
352
+ }
353
+ }
354
+
355
+ // ============================================
356
+ // COMPREHENSIVE BENCHMARK RUNNER
357
+ // ============================================
358
+
359
+ /**
360
+ * ComprehensiveBenchmark - Full benchmark suite for model optimization
361
+ */
362
+ export class ComprehensiveBenchmark extends EventEmitter {
363
+ constructor(options = {}) {
364
+ super();
365
+ this.optimizer = options.optimizer || new ModelOptimizer();
366
+ this.latencyProfiler = new LatencyProfiler();
367
+ this.memoryProfiler = new MemoryProfiler();
368
+ this.accuracyMeter = new AccuracyMeter();
369
+ this.results = [];
370
+ }
371
+
372
+ /**
373
+ * Run benchmark suite on a model
374
+ */
375
+ async runSuite(model, profile = 'standard') {
376
+ const profileConfig = BENCHMARK_PROFILES[profile] || BENCHMARK_PROFILES.standard;
377
+ const modelConfig = TARGET_MODELS[model];
378
+
379
+ if (!modelConfig) {
380
+ throw new Error(`Unknown model: ${model}`);
381
+ }
382
+
383
+ this.emit('suite:start', { model, profile });
384
+
385
+ const suiteResults = {
386
+ model,
387
+ profile,
388
+ modelConfig,
389
+ timestamp: new Date().toISOString(),
390
+ benchmarks: [],
391
+ };
392
+
393
+ // Memory baseline
394
+ this.memoryProfiler.snapshot('baseline');
395
+
396
+ // Benchmark each quantization method
397
+ for (const method of profileConfig.quantMethods) {
398
+ const methodResult = await this.benchmarkQuantization(
399
+ model,
400
+ method,
401
+ profileConfig
402
+ );
403
+ suiteResults.benchmarks.push(methodResult);
404
+ }
405
+
406
+ // Memory after benchmarks
407
+ this.memoryProfiler.snapshot('after-benchmarks');
408
+
409
+ // Add memory profile
410
+ suiteResults.memory = this.memoryProfiler.getSummary();
411
+
412
+ // Add summary
413
+ suiteResults.summary = this.generateSummary(suiteResults);
414
+
415
+ this.results.push(suiteResults);
416
+ this.emit('suite:complete', suiteResults);
417
+
418
+ return suiteResults;
419
+ }
420
+
421
+ /**
422
+ * Benchmark a specific quantization method
423
+ */
424
+ async benchmarkQuantization(model, method, config) {
425
+ this.emit('benchmark:start', { model, method });
426
+
427
+ const quantConfig = QUANTIZATION_CONFIGS[method];
428
+ const modelConfig = TARGET_MODELS[model];
429
+
430
+ // Quantize model
431
+ this.latencyProfiler.start('quantization');
432
+ const quantResult = await this.optimizer.quantize(model, method);
433
+ this.latencyProfiler.end('quantization');
434
+
435
+ // Simulate inference benchmarks for each input size
436
+ const inferenceBenchmarks = [];
437
+
438
+ for (const inputSize of config.inputSizes) {
439
+ const batchSize = inputSize[0];
440
+ const seqLen = inputSize[1];
441
+
442
+ this.latencyProfiler.start(`inference-${batchSize}x${seqLen}`);
443
+
444
+ // Warmup
445
+ for (let i = 0; i < config.warmupIterations; i++) {
446
+ await this.simulateInference(modelConfig, batchSize, seqLen, method);
447
+ }
448
+
449
+ // Measure
450
+ const times = [];
451
+ for (let i = 0; i < config.iterations; i++) {
452
+ const start = performance.now();
453
+ await this.simulateInference(modelConfig, batchSize, seqLen, method);
454
+ times.push(performance.now() - start);
455
+ }
456
+
457
+ this.latencyProfiler.end(`inference-${batchSize}x${seqLen}`);
458
+
459
+ times.sort((a, b) => a - b);
460
+
461
+ inferenceBenchmarks.push({
462
+ inputSize: `${batchSize}x${seqLen}`,
463
+ iterations: config.iterations,
464
+ meanMs: times.reduce((a, b) => a + b) / times.length,
465
+ medianMs: times[Math.floor(times.length / 2)],
466
+ p95Ms: times[Math.floor(times.length * 0.95)],
467
+ minMs: times[0],
468
+ maxMs: times[times.length - 1],
469
+ tokensPerSecond: (seqLen * batchSize * 1000) / (times.reduce((a, b) => a + b) / times.length),
470
+ });
471
+ }
472
+
473
+ // Measure accuracy if requested
474
+ let accuracyMetrics = null;
475
+ if (config.measureAccuracy) {
476
+ // Generate test outputs
477
+ for (let i = 0; i < 100; i++) {
478
+ const original = new Float32Array(modelConfig.hiddenSize).map(() => Math.random());
479
+ const quantized = this.simulateQuantizedOutput(original, method);
480
+ this.accuracyMeter.addPrediction(Array.from(original), Array.from(quantized));
481
+ }
482
+ accuracyMetrics = this.accuracyMeter.getMetrics();
483
+ this.accuracyMeter.reset();
484
+ }
485
+
486
+ const result = {
487
+ method,
488
+ quantization: quantResult,
489
+ inference: inferenceBenchmarks,
490
+ accuracy: accuracyMetrics,
491
+ latencyProfile: this.latencyProfiler.getAllStats(),
492
+ compression: {
493
+ original: modelConfig.originalSize,
494
+ quantized: modelConfig.originalSize / quantConfig.compression,
495
+ ratio: quantConfig.compression,
496
+ },
497
+ recommendation: this.getRecommendation(model, method, inferenceBenchmarks),
498
+ };
499
+
500
+ this.emit('benchmark:complete', result);
501
+
502
+ return result;
503
+ }
504
+
505
+ /**
506
+ * Simulate model inference
507
+ */
508
+ async simulateInference(config, batchSize, seqLen, method) {
509
+ // Base latency depends on model size and batch
510
+ const quantConfig = QUANTIZATION_CONFIGS[method];
511
+ const baseLatency = (config.originalSize / 100) * (batchSize * seqLen / 512);
512
+ const speedup = quantConfig?.speedup || 1;
513
+
514
+ const latency = baseLatency / speedup;
515
+ await new Promise(resolve => setTimeout(resolve, latency));
516
+
517
+ return new Float32Array(config.hiddenSize).map(() => Math.random());
518
+ }
519
+
520
+ /**
521
+ * Simulate quantized output with added noise
522
+ */
523
+ simulateQuantizedOutput(original, method) {
524
+ const quantConfig = QUANTIZATION_CONFIGS[method];
525
+ const noise = quantConfig?.accuracyLoss || 0.01;
526
+
527
+ return new Float32Array(original.length).map((_, i) => {
528
+ return original[i] + (Math.random() - 0.5) * 2 * noise;
529
+ });
530
+ }
531
+
532
+ /**
533
+ * Generate recommendation based on benchmark results
534
+ */
535
+ getRecommendation(model, method, inferenceBenchmarks) {
536
+ const modelConfig = TARGET_MODELS[model];
537
+ const quantConfig = QUANTIZATION_CONFIGS[method];
538
+
539
+ const avgLatency = inferenceBenchmarks.reduce((a, b) => a + b.meanMs, 0) / inferenceBenchmarks.length;
540
+ const targetMet = (modelConfig.originalSize / quantConfig.compression) <= modelConfig.targetSize;
541
+
542
+ let score = 0;
543
+ let reasons = [];
544
+
545
+ // Size target met
546
+ if (targetMet) {
547
+ score += 30;
548
+ reasons.push('Meets size target');
549
+ }
550
+
551
+ // Good latency
552
+ if (avgLatency < 10) {
553
+ score += 30;
554
+ reasons.push('Excellent latency (<10ms)');
555
+ } else if (avgLatency < 50) {
556
+ score += 20;
557
+ reasons.push('Good latency (<50ms)');
558
+ }
559
+
560
+ // Low accuracy loss
561
+ if (quantConfig.accuracyLoss < 0.02) {
562
+ score += 25;
563
+ reasons.push('Minimal accuracy loss (<2%)');
564
+ } else if (quantConfig.accuracyLoss < 0.05) {
565
+ score += 15;
566
+ reasons.push('Acceptable accuracy loss (<5%)');
567
+ }
568
+
569
+ // Compression ratio
570
+ if (quantConfig.compression >= 4) {
571
+ score += 15;
572
+ reasons.push('High compression (4x+)');
573
+ }
574
+
575
+ return {
576
+ score,
577
+ rating: score >= 80 ? 'Excellent' : score >= 60 ? 'Good' : score >= 40 ? 'Acceptable' : 'Poor',
578
+ reasons,
579
+ recommended: score >= 60,
580
+ };
581
+ }
582
+
583
+ /**
584
+ * Generate suite summary
585
+ */
586
+ generateSummary(suiteResults) {
587
+ const benchmarks = suiteResults.benchmarks;
588
+
589
+ // Find best method
590
+ let bestMethod = null;
591
+ let bestScore = 0;
592
+
593
+ for (const b of benchmarks) {
594
+ if (b.recommendation.score > bestScore) {
595
+ bestScore = b.recommendation.score;
596
+ bestMethod = b.method;
597
+ }
598
+ }
599
+
600
+ // Calculate averages
601
+ const avgLatency = benchmarks.reduce((sum, b) => {
602
+ return sum + b.inference.reduce((s, i) => s + i.meanMs, 0) / b.inference.length;
603
+ }, 0) / benchmarks.length;
604
+
605
+ return {
606
+ modelKey: suiteResults.model,
607
+ modelType: suiteResults.modelConfig.type,
608
+ originalSizeMB: suiteResults.modelConfig.originalSize,
609
+ targetSizeMB: suiteResults.modelConfig.targetSize,
610
+ bestMethod,
611
+ bestScore,
612
+ avgLatencyMs: avgLatency,
613
+ methodsEvaluated: benchmarks.length,
614
+ recommendation: bestMethod ? `Use ${bestMethod} quantization for optimal edge deployment` : 'No suitable method found',
615
+ };
616
+ }
617
+
618
+ /**
619
+ * Run benchmarks on all target models
620
+ */
621
+ async runAllModels(profile = 'standard') {
622
+ const allResults = [];
623
+
624
+ for (const modelKey of Object.keys(TARGET_MODELS)) {
625
+ try {
626
+ const result = await this.runSuite(modelKey, profile);
627
+ allResults.push(result);
628
+ } catch (error) {
629
+ allResults.push({
630
+ model: modelKey,
631
+ error: error.message,
632
+ });
633
+ }
634
+ }
635
+
636
+ return {
637
+ timestamp: new Date().toISOString(),
638
+ profile,
639
+ results: allResults,
640
+ summary: this.generateOverallSummary(allResults),
641
+ };
642
+ }
643
+
644
+ /**
645
+ * Generate overall summary for all models
646
+ */
647
+ generateOverallSummary(allResults) {
648
+ const successful = allResults.filter(r => !r.error);
649
+
650
+ return {
651
+ totalModels: allResults.length,
652
+ successfulBenchmarks: successful.length,
653
+ failedBenchmarks: allResults.length - successful.length,
654
+ recommendations: successful.map(r => ({
655
+ model: r.model,
656
+ bestMethod: r.summary?.bestMethod,
657
+ score: r.summary?.bestScore,
658
+ })),
659
+ };
660
+ }
661
+
662
+ /**
663
+ * Export results to JSON
664
+ */
665
+ exportResults() {
666
+ return {
667
+ exported: new Date().toISOString(),
668
+ results: this.results,
669
+ };
670
+ }
671
+
672
+ /**
673
+ * Reset benchmark state
674
+ */
675
+ reset() {
676
+ this.latencyProfiler.reset();
677
+ this.memoryProfiler.reset();
678
+ this.accuracyMeter.reset();
679
+ this.results = [];
680
+ }
681
+ }
682
+
683
+ // ============================================
684
+ // EXPORTS
685
+ // ============================================
686
+
687
+ // BENCHMARK_PROFILES already exported at declaration (line 19)
688
+ export default ComprehensiveBenchmark;