@ruvector/edge-net 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +281 -10
- package/core-invariants.js +942 -0
- package/models/adapter-hub.js +1008 -0
- package/models/adapter-security.js +792 -0
- package/models/benchmark.js +688 -0
- package/models/distribution.js +791 -0
- package/models/index.js +109 -0
- package/models/integrity.js +753 -0
- package/models/loader.js +725 -0
- package/models/microlora.js +1298 -0
- package/models/model-loader.js +922 -0
- package/models/model-optimizer.js +1245 -0
- package/models/model-registry.js +696 -0
- package/models/model-utils.js +548 -0
- package/models/models-cli.js +914 -0
- package/models/registry.json +214 -0
- package/models/training-utils.js +1418 -0
- package/models/wasm-core.js +1025 -0
- package/network-genesis.js +2847 -0
- package/onnx-worker.js +462 -8
- package/package.json +33 -3
- package/plugins/SECURITY-AUDIT.md +654 -0
- package/plugins/cli.js +43 -3
- package/plugins/implementations/e2e-encryption.js +57 -12
- package/plugins/plugin-loader.js +610 -21
- package/tests/model-optimizer.test.js +644 -0
- package/tests/network-genesis.test.js +562 -0
- package/tests/plugin-benchmark.js +1239 -0
- package/tests/plugin-system-test.js +163 -0
- package/tests/wasm-core.test.js +368 -0
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @ruvector/edge-net Benchmark Utilities
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive benchmarking for model optimization
|
|
5
|
+
*
|
|
6
|
+
* @module @ruvector/edge-net/models/benchmark
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { EventEmitter } from 'events';
|
|
10
|
+
import { ModelOptimizer, TARGET_MODELS, QUANTIZATION_CONFIGS } from './model-optimizer.js';
|
|
11
|
+
|
|
12
|
+
// ============================================
|
|
13
|
+
// BENCHMARK CONFIGURATION
|
|
14
|
+
// ============================================
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Benchmark profiles for different scenarios
|
|
18
|
+
*/
|
|
19
|
+
export const BENCHMARK_PROFILES = {
|
|
20
|
+
'quick': {
|
|
21
|
+
iterations: 50,
|
|
22
|
+
warmupIterations: 5,
|
|
23
|
+
inputSizes: [[1, 128]],
|
|
24
|
+
quantMethods: ['int8'],
|
|
25
|
+
},
|
|
26
|
+
'standard': {
|
|
27
|
+
iterations: 100,
|
|
28
|
+
warmupIterations: 10,
|
|
29
|
+
inputSizes: [[1, 128], [1, 512], [4, 256]],
|
|
30
|
+
quantMethods: ['int8', 'int4', 'fp16'],
|
|
31
|
+
},
|
|
32
|
+
'comprehensive': {
|
|
33
|
+
iterations: 500,
|
|
34
|
+
warmupIterations: 50,
|
|
35
|
+
inputSizes: [[1, 64], [1, 128], [1, 256], [1, 512], [1, 1024], [4, 256], [8, 128]],
|
|
36
|
+
quantMethods: ['int8', 'int4', 'fp16', 'int8-fp16-mixed'],
|
|
37
|
+
},
|
|
38
|
+
'edge-device': {
|
|
39
|
+
iterations: 100,
|
|
40
|
+
warmupIterations: 10,
|
|
41
|
+
inputSizes: [[1, 128], [1, 256]],
|
|
42
|
+
quantMethods: ['int4'],
|
|
43
|
+
memoryLimit: 512, // MB
|
|
44
|
+
},
|
|
45
|
+
'accuracy-focus': {
|
|
46
|
+
iterations: 200,
|
|
47
|
+
warmupIterations: 20,
|
|
48
|
+
inputSizes: [[1, 512]],
|
|
49
|
+
quantMethods: ['fp16', 'int8'],
|
|
50
|
+
measureAccuracy: true,
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
// ============================================
|
|
55
|
+
// ACCURACY MEASUREMENT
|
|
56
|
+
// ============================================
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Accuracy metrics for quantized models
|
|
60
|
+
*/
|
|
61
|
+
export class AccuracyMeter {
|
|
62
|
+
constructor() {
|
|
63
|
+
this.predictions = [];
|
|
64
|
+
this.groundTruth = [];
|
|
65
|
+
this.originalOutputs = [];
|
|
66
|
+
this.quantizedOutputs = [];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Add prediction pair for accuracy measurement
|
|
71
|
+
*/
|
|
72
|
+
addPrediction(original, quantized, groundTruth = null) {
|
|
73
|
+
this.originalOutputs.push(original);
|
|
74
|
+
this.quantizedOutputs.push(quantized);
|
|
75
|
+
if (groundTruth !== null) {
|
|
76
|
+
this.groundTruth.push(groundTruth);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Compute Mean Squared Error
|
|
82
|
+
*/
|
|
83
|
+
computeMSE() {
|
|
84
|
+
if (this.originalOutputs.length === 0) return 0;
|
|
85
|
+
|
|
86
|
+
let totalMSE = 0;
|
|
87
|
+
let count = 0;
|
|
88
|
+
|
|
89
|
+
for (let i = 0; i < this.originalOutputs.length; i++) {
|
|
90
|
+
const orig = this.originalOutputs[i];
|
|
91
|
+
const quant = this.quantizedOutputs[i];
|
|
92
|
+
|
|
93
|
+
let mse = 0;
|
|
94
|
+
const len = Math.min(orig.length, quant.length);
|
|
95
|
+
for (let j = 0; j < len; j++) {
|
|
96
|
+
const diff = orig[j] - quant[j];
|
|
97
|
+
mse += diff * diff;
|
|
98
|
+
}
|
|
99
|
+
totalMSE += mse / len;
|
|
100
|
+
count++;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return totalMSE / count;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Compute cosine similarity between original and quantized
|
|
108
|
+
*/
|
|
109
|
+
computeCosineSimilarity() {
|
|
110
|
+
if (this.originalOutputs.length === 0) return 1.0;
|
|
111
|
+
|
|
112
|
+
let totalSim = 0;
|
|
113
|
+
|
|
114
|
+
for (let i = 0; i < this.originalOutputs.length; i++) {
|
|
115
|
+
const orig = this.originalOutputs[i];
|
|
116
|
+
const quant = this.quantizedOutputs[i];
|
|
117
|
+
|
|
118
|
+
let dot = 0, normA = 0, normB = 0;
|
|
119
|
+
const len = Math.min(orig.length, quant.length);
|
|
120
|
+
|
|
121
|
+
for (let j = 0; j < len; j++) {
|
|
122
|
+
dot += orig[j] * quant[j];
|
|
123
|
+
normA += orig[j] * orig[j];
|
|
124
|
+
normB += quant[j] * quant[j];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
totalSim += dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return totalSim / this.originalOutputs.length;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Compute max absolute error
|
|
135
|
+
*/
|
|
136
|
+
computeMaxError() {
|
|
137
|
+
let maxError = 0;
|
|
138
|
+
|
|
139
|
+
for (let i = 0; i < this.originalOutputs.length; i++) {
|
|
140
|
+
const orig = this.originalOutputs[i];
|
|
141
|
+
const quant = this.quantizedOutputs[i];
|
|
142
|
+
const len = Math.min(orig.length, quant.length);
|
|
143
|
+
|
|
144
|
+
for (let j = 0; j < len; j++) {
|
|
145
|
+
maxError = Math.max(maxError, Math.abs(orig[j] - quant[j]));
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return maxError;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Get comprehensive accuracy metrics
|
|
154
|
+
*/
|
|
155
|
+
getMetrics() {
|
|
156
|
+
const mse = this.computeMSE();
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
mse,
|
|
160
|
+
rmse: Math.sqrt(mse),
|
|
161
|
+
cosineSimilarity: this.computeCosineSimilarity(),
|
|
162
|
+
maxError: this.computeMaxError(),
|
|
163
|
+
samples: this.originalOutputs.length,
|
|
164
|
+
accuracyRetained: this.computeCosineSimilarity() * 100,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Reset meter
|
|
170
|
+
*/
|
|
171
|
+
reset() {
|
|
172
|
+
this.predictions = [];
|
|
173
|
+
this.groundTruth = [];
|
|
174
|
+
this.originalOutputs = [];
|
|
175
|
+
this.quantizedOutputs = [];
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ============================================
|
|
180
|
+
// LATENCY PROFILER
|
|
181
|
+
// ============================================
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Detailed latency profiling
|
|
185
|
+
*/
|
|
186
|
+
export class LatencyProfiler {
|
|
187
|
+
constructor() {
|
|
188
|
+
this.measurements = new Map();
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Start timing a section
|
|
193
|
+
*/
|
|
194
|
+
start(label) {
|
|
195
|
+
if (!this.measurements.has(label)) {
|
|
196
|
+
this.measurements.set(label, {
|
|
197
|
+
samples: [],
|
|
198
|
+
running: null,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
this.measurements.get(label).running = performance.now();
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* End timing a section
|
|
206
|
+
*/
|
|
207
|
+
end(label) {
|
|
208
|
+
const entry = this.measurements.get(label);
|
|
209
|
+
if (entry && entry.running !== null) {
|
|
210
|
+
const duration = performance.now() - entry.running;
|
|
211
|
+
entry.samples.push(duration);
|
|
212
|
+
entry.running = null;
|
|
213
|
+
return duration;
|
|
214
|
+
}
|
|
215
|
+
return 0;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Get statistics for a label
|
|
220
|
+
*/
|
|
221
|
+
getStats(label) {
|
|
222
|
+
const entry = this.measurements.get(label);
|
|
223
|
+
if (!entry || entry.samples.length === 0) {
|
|
224
|
+
return null;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const samples = [...entry.samples].sort((a, b) => a - b);
|
|
228
|
+
const sum = samples.reduce((a, b) => a + b, 0);
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
label,
|
|
232
|
+
count: samples.length,
|
|
233
|
+
mean: sum / samples.length,
|
|
234
|
+
median: samples[Math.floor(samples.length / 2)],
|
|
235
|
+
min: samples[0],
|
|
236
|
+
max: samples[samples.length - 1],
|
|
237
|
+
p95: samples[Math.floor(samples.length * 0.95)],
|
|
238
|
+
p99: samples[Math.floor(samples.length * 0.99)],
|
|
239
|
+
std: Math.sqrt(samples.reduce((acc, v) => acc + Math.pow(v - sum / samples.length, 2), 0) / samples.length),
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Get all statistics
|
|
245
|
+
*/
|
|
246
|
+
getAllStats() {
|
|
247
|
+
const stats = {};
|
|
248
|
+
for (const label of this.measurements.keys()) {
|
|
249
|
+
stats[label] = this.getStats(label);
|
|
250
|
+
}
|
|
251
|
+
return stats;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Reset profiler
|
|
256
|
+
*/
|
|
257
|
+
reset() {
|
|
258
|
+
this.measurements.clear();
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// ============================================
|
|
263
|
+
// MEMORY PROFILER
|
|
264
|
+
// ============================================
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Memory usage profiler
|
|
268
|
+
*/
|
|
269
|
+
export class MemoryProfiler {
|
|
270
|
+
constructor() {
|
|
271
|
+
this.snapshots = [];
|
|
272
|
+
this.peakMemory = 0;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Take memory snapshot
|
|
277
|
+
*/
|
|
278
|
+
snapshot(label = 'snapshot') {
|
|
279
|
+
const memUsage = this.getMemoryUsage();
|
|
280
|
+
const snapshot = {
|
|
281
|
+
label,
|
|
282
|
+
timestamp: Date.now(),
|
|
283
|
+
...memUsage,
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
this.snapshots.push(snapshot);
|
|
287
|
+
this.peakMemory = Math.max(this.peakMemory, memUsage.heapUsed);
|
|
288
|
+
|
|
289
|
+
return snapshot;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Get current memory usage
|
|
294
|
+
*/
|
|
295
|
+
getMemoryUsage() {
|
|
296
|
+
if (typeof process !== 'undefined' && process.memoryUsage) {
|
|
297
|
+
const usage = process.memoryUsage();
|
|
298
|
+
return {
|
|
299
|
+
heapUsed: usage.heapUsed / (1024 * 1024),
|
|
300
|
+
heapTotal: usage.heapTotal / (1024 * 1024),
|
|
301
|
+
external: usage.external / (1024 * 1024),
|
|
302
|
+
rss: usage.rss / (1024 * 1024),
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Browser fallback
|
|
307
|
+
if (typeof performance !== 'undefined' && performance.memory) {
|
|
308
|
+
return {
|
|
309
|
+
heapUsed: performance.memory.usedJSHeapSize / (1024 * 1024),
|
|
310
|
+
heapTotal: performance.memory.totalJSHeapSize / (1024 * 1024),
|
|
311
|
+
external: 0,
|
|
312
|
+
rss: 0,
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return { heapUsed: 0, heapTotal: 0, external: 0, rss: 0 };
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Get memory delta between two snapshots
|
|
321
|
+
*/
|
|
322
|
+
getDelta(startLabel, endLabel) {
|
|
323
|
+
const start = this.snapshots.find(s => s.label === startLabel);
|
|
324
|
+
const end = this.snapshots.find(s => s.label === endLabel);
|
|
325
|
+
|
|
326
|
+
if (!start || !end) return null;
|
|
327
|
+
|
|
328
|
+
return {
|
|
329
|
+
heapDelta: end.heapUsed - start.heapUsed,
|
|
330
|
+
timeDelta: end.timestamp - start.timestamp,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Get profiler summary
|
|
336
|
+
*/
|
|
337
|
+
getSummary() {
|
|
338
|
+
return {
|
|
339
|
+
snapshots: this.snapshots.length,
|
|
340
|
+
peakMemoryMB: this.peakMemory,
|
|
341
|
+
currentMemoryMB: this.getMemoryUsage().heapUsed,
|
|
342
|
+
history: this.snapshots,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Reset profiler
|
|
348
|
+
*/
|
|
349
|
+
reset() {
|
|
350
|
+
this.snapshots = [];
|
|
351
|
+
this.peakMemory = 0;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ============================================
|
|
356
|
+
// COMPREHENSIVE BENCHMARK RUNNER
|
|
357
|
+
// ============================================
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* ComprehensiveBenchmark - Full benchmark suite for model optimization
|
|
361
|
+
*/
|
|
362
|
+
export class ComprehensiveBenchmark extends EventEmitter {
|
|
363
|
+
constructor(options = {}) {
|
|
364
|
+
super();
|
|
365
|
+
this.optimizer = options.optimizer || new ModelOptimizer();
|
|
366
|
+
this.latencyProfiler = new LatencyProfiler();
|
|
367
|
+
this.memoryProfiler = new MemoryProfiler();
|
|
368
|
+
this.accuracyMeter = new AccuracyMeter();
|
|
369
|
+
this.results = [];
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Run benchmark suite on a model
|
|
374
|
+
*/
|
|
375
|
+
async runSuite(model, profile = 'standard') {
|
|
376
|
+
const profileConfig = BENCHMARK_PROFILES[profile] || BENCHMARK_PROFILES.standard;
|
|
377
|
+
const modelConfig = TARGET_MODELS[model];
|
|
378
|
+
|
|
379
|
+
if (!modelConfig) {
|
|
380
|
+
throw new Error(`Unknown model: ${model}`);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
this.emit('suite:start', { model, profile });
|
|
384
|
+
|
|
385
|
+
const suiteResults = {
|
|
386
|
+
model,
|
|
387
|
+
profile,
|
|
388
|
+
modelConfig,
|
|
389
|
+
timestamp: new Date().toISOString(),
|
|
390
|
+
benchmarks: [],
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
// Memory baseline
|
|
394
|
+
this.memoryProfiler.snapshot('baseline');
|
|
395
|
+
|
|
396
|
+
// Benchmark each quantization method
|
|
397
|
+
for (const method of profileConfig.quantMethods) {
|
|
398
|
+
const methodResult = await this.benchmarkQuantization(
|
|
399
|
+
model,
|
|
400
|
+
method,
|
|
401
|
+
profileConfig
|
|
402
|
+
);
|
|
403
|
+
suiteResults.benchmarks.push(methodResult);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Memory after benchmarks
|
|
407
|
+
this.memoryProfiler.snapshot('after-benchmarks');
|
|
408
|
+
|
|
409
|
+
// Add memory profile
|
|
410
|
+
suiteResults.memory = this.memoryProfiler.getSummary();
|
|
411
|
+
|
|
412
|
+
// Add summary
|
|
413
|
+
suiteResults.summary = this.generateSummary(suiteResults);
|
|
414
|
+
|
|
415
|
+
this.results.push(suiteResults);
|
|
416
|
+
this.emit('suite:complete', suiteResults);
|
|
417
|
+
|
|
418
|
+
return suiteResults;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Benchmark a specific quantization method
|
|
423
|
+
*/
|
|
424
|
+
async benchmarkQuantization(model, method, config) {
|
|
425
|
+
this.emit('benchmark:start', { model, method });
|
|
426
|
+
|
|
427
|
+
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
428
|
+
const modelConfig = TARGET_MODELS[model];
|
|
429
|
+
|
|
430
|
+
// Quantize model
|
|
431
|
+
this.latencyProfiler.start('quantization');
|
|
432
|
+
const quantResult = await this.optimizer.quantize(model, method);
|
|
433
|
+
this.latencyProfiler.end('quantization');
|
|
434
|
+
|
|
435
|
+
// Simulate inference benchmarks for each input size
|
|
436
|
+
const inferenceBenchmarks = [];
|
|
437
|
+
|
|
438
|
+
for (const inputSize of config.inputSizes) {
|
|
439
|
+
const batchSize = inputSize[0];
|
|
440
|
+
const seqLen = inputSize[1];
|
|
441
|
+
|
|
442
|
+
this.latencyProfiler.start(`inference-${batchSize}x${seqLen}`);
|
|
443
|
+
|
|
444
|
+
// Warmup
|
|
445
|
+
for (let i = 0; i < config.warmupIterations; i++) {
|
|
446
|
+
await this.simulateInference(modelConfig, batchSize, seqLen, method);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Measure
|
|
450
|
+
const times = [];
|
|
451
|
+
for (let i = 0; i < config.iterations; i++) {
|
|
452
|
+
const start = performance.now();
|
|
453
|
+
await this.simulateInference(modelConfig, batchSize, seqLen, method);
|
|
454
|
+
times.push(performance.now() - start);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
this.latencyProfiler.end(`inference-${batchSize}x${seqLen}`);
|
|
458
|
+
|
|
459
|
+
times.sort((a, b) => a - b);
|
|
460
|
+
|
|
461
|
+
inferenceBenchmarks.push({
|
|
462
|
+
inputSize: `${batchSize}x${seqLen}`,
|
|
463
|
+
iterations: config.iterations,
|
|
464
|
+
meanMs: times.reduce((a, b) => a + b) / times.length,
|
|
465
|
+
medianMs: times[Math.floor(times.length / 2)],
|
|
466
|
+
p95Ms: times[Math.floor(times.length * 0.95)],
|
|
467
|
+
minMs: times[0],
|
|
468
|
+
maxMs: times[times.length - 1],
|
|
469
|
+
tokensPerSecond: (seqLen * batchSize * 1000) / (times.reduce((a, b) => a + b) / times.length),
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Measure accuracy if requested
|
|
474
|
+
let accuracyMetrics = null;
|
|
475
|
+
if (config.measureAccuracy) {
|
|
476
|
+
// Generate test outputs
|
|
477
|
+
for (let i = 0; i < 100; i++) {
|
|
478
|
+
const original = new Float32Array(modelConfig.hiddenSize).map(() => Math.random());
|
|
479
|
+
const quantized = this.simulateQuantizedOutput(original, method);
|
|
480
|
+
this.accuracyMeter.addPrediction(Array.from(original), Array.from(quantized));
|
|
481
|
+
}
|
|
482
|
+
accuracyMetrics = this.accuracyMeter.getMetrics();
|
|
483
|
+
this.accuracyMeter.reset();
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
const result = {
|
|
487
|
+
method,
|
|
488
|
+
quantization: quantResult,
|
|
489
|
+
inference: inferenceBenchmarks,
|
|
490
|
+
accuracy: accuracyMetrics,
|
|
491
|
+
latencyProfile: this.latencyProfiler.getAllStats(),
|
|
492
|
+
compression: {
|
|
493
|
+
original: modelConfig.originalSize,
|
|
494
|
+
quantized: modelConfig.originalSize / quantConfig.compression,
|
|
495
|
+
ratio: quantConfig.compression,
|
|
496
|
+
},
|
|
497
|
+
recommendation: this.getRecommendation(model, method, inferenceBenchmarks),
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
this.emit('benchmark:complete', result);
|
|
501
|
+
|
|
502
|
+
return result;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Simulate model inference
|
|
507
|
+
*/
|
|
508
|
+
async simulateInference(config, batchSize, seqLen, method) {
|
|
509
|
+
// Base latency depends on model size and batch
|
|
510
|
+
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
511
|
+
const baseLatency = (config.originalSize / 100) * (batchSize * seqLen / 512);
|
|
512
|
+
const speedup = quantConfig?.speedup || 1;
|
|
513
|
+
|
|
514
|
+
const latency = baseLatency / speedup;
|
|
515
|
+
await new Promise(resolve => setTimeout(resolve, latency));
|
|
516
|
+
|
|
517
|
+
return new Float32Array(config.hiddenSize).map(() => Math.random());
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Simulate quantized output with added noise
|
|
522
|
+
*/
|
|
523
|
+
simulateQuantizedOutput(original, method) {
|
|
524
|
+
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
525
|
+
const noise = quantConfig?.accuracyLoss || 0.01;
|
|
526
|
+
|
|
527
|
+
return new Float32Array(original.length).map((_, i) => {
|
|
528
|
+
return original[i] + (Math.random() - 0.5) * 2 * noise;
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Generate recommendation based on benchmark results
|
|
534
|
+
*/
|
|
535
|
+
getRecommendation(model, method, inferenceBenchmarks) {
|
|
536
|
+
const modelConfig = TARGET_MODELS[model];
|
|
537
|
+
const quantConfig = QUANTIZATION_CONFIGS[method];
|
|
538
|
+
|
|
539
|
+
const avgLatency = inferenceBenchmarks.reduce((a, b) => a + b.meanMs, 0) / inferenceBenchmarks.length;
|
|
540
|
+
const targetMet = (modelConfig.originalSize / quantConfig.compression) <= modelConfig.targetSize;
|
|
541
|
+
|
|
542
|
+
let score = 0;
|
|
543
|
+
let reasons = [];
|
|
544
|
+
|
|
545
|
+
// Size target met
|
|
546
|
+
if (targetMet) {
|
|
547
|
+
score += 30;
|
|
548
|
+
reasons.push('Meets size target');
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// Good latency
|
|
552
|
+
if (avgLatency < 10) {
|
|
553
|
+
score += 30;
|
|
554
|
+
reasons.push('Excellent latency (<10ms)');
|
|
555
|
+
} else if (avgLatency < 50) {
|
|
556
|
+
score += 20;
|
|
557
|
+
reasons.push('Good latency (<50ms)');
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Low accuracy loss
|
|
561
|
+
if (quantConfig.accuracyLoss < 0.02) {
|
|
562
|
+
score += 25;
|
|
563
|
+
reasons.push('Minimal accuracy loss (<2%)');
|
|
564
|
+
} else if (quantConfig.accuracyLoss < 0.05) {
|
|
565
|
+
score += 15;
|
|
566
|
+
reasons.push('Acceptable accuracy loss (<5%)');
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Compression ratio
|
|
570
|
+
if (quantConfig.compression >= 4) {
|
|
571
|
+
score += 15;
|
|
572
|
+
reasons.push('High compression (4x+)');
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
return {
|
|
576
|
+
score,
|
|
577
|
+
rating: score >= 80 ? 'Excellent' : score >= 60 ? 'Good' : score >= 40 ? 'Acceptable' : 'Poor',
|
|
578
|
+
reasons,
|
|
579
|
+
recommended: score >= 60,
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* Generate suite summary
|
|
585
|
+
*/
|
|
586
|
+
generateSummary(suiteResults) {
|
|
587
|
+
const benchmarks = suiteResults.benchmarks;
|
|
588
|
+
|
|
589
|
+
// Find best method
|
|
590
|
+
let bestMethod = null;
|
|
591
|
+
let bestScore = 0;
|
|
592
|
+
|
|
593
|
+
for (const b of benchmarks) {
|
|
594
|
+
if (b.recommendation.score > bestScore) {
|
|
595
|
+
bestScore = b.recommendation.score;
|
|
596
|
+
bestMethod = b.method;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// Calculate averages
|
|
601
|
+
const avgLatency = benchmarks.reduce((sum, b) => {
|
|
602
|
+
return sum + b.inference.reduce((s, i) => s + i.meanMs, 0) / b.inference.length;
|
|
603
|
+
}, 0) / benchmarks.length;
|
|
604
|
+
|
|
605
|
+
return {
|
|
606
|
+
modelKey: suiteResults.model,
|
|
607
|
+
modelType: suiteResults.modelConfig.type,
|
|
608
|
+
originalSizeMB: suiteResults.modelConfig.originalSize,
|
|
609
|
+
targetSizeMB: suiteResults.modelConfig.targetSize,
|
|
610
|
+
bestMethod,
|
|
611
|
+
bestScore,
|
|
612
|
+
avgLatencyMs: avgLatency,
|
|
613
|
+
methodsEvaluated: benchmarks.length,
|
|
614
|
+
recommendation: bestMethod ? `Use ${bestMethod} quantization for optimal edge deployment` : 'No suitable method found',
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
/**
|
|
619
|
+
* Run benchmarks on all target models
|
|
620
|
+
*/
|
|
621
|
+
async runAllModels(profile = 'standard') {
|
|
622
|
+
const allResults = [];
|
|
623
|
+
|
|
624
|
+
for (const modelKey of Object.keys(TARGET_MODELS)) {
|
|
625
|
+
try {
|
|
626
|
+
const result = await this.runSuite(modelKey, profile);
|
|
627
|
+
allResults.push(result);
|
|
628
|
+
} catch (error) {
|
|
629
|
+
allResults.push({
|
|
630
|
+
model: modelKey,
|
|
631
|
+
error: error.message,
|
|
632
|
+
});
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
return {
|
|
637
|
+
timestamp: new Date().toISOString(),
|
|
638
|
+
profile,
|
|
639
|
+
results: allResults,
|
|
640
|
+
summary: this.generateOverallSummary(allResults),
|
|
641
|
+
};
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
/**
|
|
645
|
+
* Generate overall summary for all models
|
|
646
|
+
*/
|
|
647
|
+
generateOverallSummary(allResults) {
|
|
648
|
+
const successful = allResults.filter(r => !r.error);
|
|
649
|
+
|
|
650
|
+
return {
|
|
651
|
+
totalModels: allResults.length,
|
|
652
|
+
successfulBenchmarks: successful.length,
|
|
653
|
+
failedBenchmarks: allResults.length - successful.length,
|
|
654
|
+
recommendations: successful.map(r => ({
|
|
655
|
+
model: r.model,
|
|
656
|
+
bestMethod: r.summary?.bestMethod,
|
|
657
|
+
score: r.summary?.bestScore,
|
|
658
|
+
})),
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Export results to JSON
|
|
664
|
+
*/
|
|
665
|
+
exportResults() {
|
|
666
|
+
return {
|
|
667
|
+
exported: new Date().toISOString(),
|
|
668
|
+
results: this.results,
|
|
669
|
+
};
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
/**
|
|
673
|
+
* Reset benchmark state
|
|
674
|
+
*/
|
|
675
|
+
reset() {
|
|
676
|
+
this.latencyProfiler.reset();
|
|
677
|
+
this.memoryProfiler.reset();
|
|
678
|
+
this.accuracyMeter.reset();
|
|
679
|
+
this.results = [];
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// ============================================
|
|
684
|
+
// EXPORTS
|
|
685
|
+
// ============================================
|
|
686
|
+
|
|
687
|
+
// BENCHMARK_PROFILES already exported at declaration (line 19)
|
|
688
|
+
export default ComprehensiveBenchmark;
|