wasm-bindgen-lite 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,887 @@
1
+ /**
2
+ * SIMD Benchmark and Analysis Module
3
+ *
4
+ * Builds variant matrix (scalar/autovec/explicit-*), runs SIMD detection,
5
+ * and executes benchmarks with comprehensive reporting.
6
+ */
7
+
8
+ import { execSync, spawn } from 'node:child_process'
9
+ import { readFileSync, writeFileSync, mkdirSync, existsSync, copyFileSync, rmSync } from 'node:fs'
10
+ import { join, dirname } from 'node:path'
11
+ import { createHash } from 'node:crypto'
12
+ import { performance } from 'node:perf_hooks'
13
+ import { fileURLToPath } from 'node:url'
14
+
15
+ const __dirname = dirname(fileURLToPath(import.meta.url))
16
+ const SIMD_DETECT_PATH = join(__dirname, '../../bench/simd-detect/target/release/simd-detect')
17
+
18
+ /**
19
+ * Default benchmark configuration
20
+ */
21
+ const DEFAULT_BENCH_CONFIG = {
22
+ warmupRuns: 10,
23
+ samples: 60,
24
+ outputDir: 'bench_out',
25
+ dataSizes: [1024, 16384, 65536, 262144, 1048576], // 1KB, 16KB, 64KB, 256KB, 1MB
26
+ }
27
+
28
+ /**
29
+ * Build a single variant
30
+ */
31
+ function buildVariant({ crateDir, targetDir, wasmFileStem, variant, release }) {
32
+ const { name, rustflags, features } = variant
33
+
34
+ console.log(` Building ${name}...`)
35
+
36
+ const args = ['build', '--target', 'wasm32-unknown-unknown']
37
+ if (release) args.push('--release')
38
+ if (features && features.length > 0) {
39
+ args.push('--features', features.join(','))
40
+ }
41
+
42
+ const env = { ...process.env, RUSTFLAGS: rustflags }
43
+ if (targetDir) env.CARGO_TARGET_DIR = targetDir
44
+
45
+ try {
46
+ execSync(`cargo ${args.join(' ')}`, {
47
+ cwd: crateDir,
48
+ env,
49
+ stdio: 'pipe',
50
+ })
51
+ } catch (err) {
52
+ console.error(`Failed to build ${name}:`, err.message)
53
+ throw err
54
+ }
55
+
56
+ const profile = release ? 'release' : 'debug'
57
+ return join(targetDir, 'wasm32-unknown-unknown', profile, `${wasmFileStem}.wasm`)
58
+ }
59
+
60
+ /**
61
+ * Run simd-detect on a WASM file
62
+ */
63
+ function runSimdDetect(wasmPath, variantName) {
64
+ if (!existsSync(SIMD_DETECT_PATH)) {
65
+ console.warn('simd-detect not found, skipping SIMD analysis')
66
+ return null
67
+ }
68
+
69
+ try {
70
+ const output = execSync(`"${SIMD_DETECT_PATH}" "${wasmPath}" --variant ${variantName}`, {
71
+ encoding: 'utf-8',
72
+ maxBuffer: 10 * 1024 * 1024,
73
+ })
74
+ return JSON.parse(output)
75
+ } catch (err) {
76
+ console.warn(`simd-detect failed: ${err.message}`)
77
+ return null
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Generate variant configurations from simd config
83
+ */
84
+ function generateVariants(simdConfig) {
85
+ const variants = []
86
+ const features = simdConfig?.features || {}
87
+ const featureNames = Object.keys(features)
88
+
89
+ // Scalar: no SIMD
90
+ variants.push({
91
+ name: 'scalar',
92
+ description: 'Scalar baseline (no SIMD)',
93
+ rustflags: '-C opt-level=3',
94
+ features: [],
95
+ simd: false,
96
+ })
97
+
98
+ // Autovec: +simd128 but no explicit features
99
+ variants.push({
100
+ name: 'autovec',
101
+ description: 'LLVM autovectorization (+simd128)',
102
+ rustflags: '-C opt-level=3 -C target-feature=+simd128',
103
+ features: [],
104
+ simd: true,
105
+ })
106
+
107
+ // Individual explicit features
108
+ for (const [featureName, featureConfig] of Object.entries(features)) {
109
+ const displayName = featureConfig.name || featureName
110
+ // Use display name for variant name if available, otherwise simplify feature name
111
+ const variantName = displayName.startsWith('explicit-')
112
+ ? displayName
113
+ : `explicit-${displayName}`
114
+ variants.push({
115
+ name: variantName,
116
+ description: `Explicit SIMD: ${displayName}`,
117
+ rustflags: '-C opt-level=3 -C target-feature=+simd128',
118
+ features: [featureName],
119
+ simd: true,
120
+ })
121
+ }
122
+
123
+ // All explicit features combined
124
+ if (featureNames.length > 0) {
125
+ const allFeature = simdConfig.allFeature || 'explicit-simd'
126
+ variants.push({
127
+ name: 'explicit-all',
128
+ description: 'All explicit SIMD features',
129
+ rustflags: '-C opt-level=3 -C target-feature=+simd128',
130
+ features: [allFeature],
131
+ simd: true,
132
+ })
133
+ }
134
+
135
+ return variants
136
+ }
137
+
138
+ /**
139
+ * Compute SIMD provenance (compiler vs explicit)
140
+ */
141
+ function computeProvenance(simdReports) {
142
+ const provenance = { summary: {} }
143
+
144
+ const scalar = simdReports['scalar']
145
+ const autovec = simdReports['autovec']
146
+
147
+ if (!scalar || !autovec) return provenance
148
+
149
+ for (const [name, report] of Object.entries(simdReports)) {
150
+ if (name === 'scalar' || name === 'autovec') continue
151
+
152
+ provenance.summary[name] = {
153
+ total_scalar: scalar.total_simd_ops,
154
+ total_autovec: autovec.total_simd_ops,
155
+ total_explicit: report.total_simd_ops,
156
+ compiler_added: autovec.total_simd_ops - scalar.total_simd_ops,
157
+ explicit_added: report.total_simd_ops - autovec.total_simd_ops,
158
+ }
159
+ }
160
+
161
+ return provenance
162
+ }
163
+
164
+ /**
165
+ * Load a WASM module and get its exports
166
+ */
167
+ async function loadWasmModule(wasmPath) {
168
+ const wasmBytes = readFileSync(wasmPath)
169
+ const module = await WebAssembly.compile(wasmBytes)
170
+ const instance = await WebAssembly.instantiate(module, {})
171
+ return instance.exports
172
+ }
173
+
174
+ /**
175
+ * Statistics helpers
176
+ */
177
+ function computeStats(times) {
178
+ const sorted = [...times].sort((a, b) => a - b)
179
+ const len = sorted.length
180
+ const sum = sorted.reduce((a, b) => a + b, 0)
181
+ const mean = sum / len
182
+ const median = len % 2 === 0
183
+ ? (sorted[len/2 - 1] + sorted[len/2]) / 2
184
+ : sorted[Math.floor(len/2)]
185
+ const p5 = sorted[Math.floor(len * 0.05)]
186
+ const p95 = sorted[Math.floor(len * 0.95)]
187
+ const variance = sorted.reduce((acc, t) => acc + (t - mean) ** 2, 0) / len
188
+ const stddev = Math.sqrt(variance)
189
+
190
+ return { mean, median, min: sorted[0], max: sorted[len-1], p5, p95, stddev }
191
+ }
192
+
193
+ /**
194
+ * Generate valid base64 test data
195
+ */
196
+ function generateTestData(size, forDecode = false, urlSafe = false) {
197
+ if (!forDecode) {
198
+ // For encode: random bytes
199
+ const data = new Uint8Array(size)
200
+ for (let i = 0; i < size; i++) {
201
+ data[i] = Math.floor(Math.random() * 256)
202
+ }
203
+ return data
204
+ }
205
+
206
+ // For decode: valid base64 string (without padding for simplicity)
207
+ // Make size a multiple of 4
208
+ const alignedSize = Math.floor(size / 4) * 4
209
+ // Use URL-safe or standard alphabet based on function
210
+ const chars = urlSafe
211
+ ? 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
212
+ : 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
213
+ const data = new Uint8Array(alignedSize)
214
+ for (let i = 0; i < alignedSize; i++) {
215
+ data[i] = chars.charCodeAt(Math.floor(Math.random() * 64))
216
+ }
217
+ return data
218
+ }
219
+
220
+ /**
221
+ * High-resolution timing using hrtime
222
+ */
223
+ function hrtimeMs() {
224
+ const [sec, nsec] = process.hrtime()
225
+ return sec * 1000 + nsec / 1e6
226
+ }
227
+
228
+ /**
229
+ * Run benchmarks on all variants
230
+ */
231
+ async function runBenchmarks({ distDir, manifest, exports, benchConfig }) {
232
+ if (!exports || exports.length === 0) {
233
+ console.log('\n No exports defined, skipping performance benchmarks')
234
+ return null
235
+ }
236
+
237
+ const { warmupRuns, samples, dataSizes } = benchConfig
238
+ const results = {}
239
+
240
+ console.log('\nRunning performance benchmarks...')
241
+
242
+ for (const variant of manifest.variants) {
243
+ const wasmPath = join(distDir, variant.path)
244
+
245
+ try {
246
+ const wasm = await loadWasmModule(wasmPath)
247
+
248
+ // Check for alloc_bytes/free_bytes
249
+ if (!wasm.alloc_bytes || !wasm.free_bytes) {
250
+ console.log(` ${variant.name}: missing alloc/free, skipping`)
251
+ continue
252
+ }
253
+
254
+ results[variant.name] = { functions: {} }
255
+
256
+ for (const exp of exports) {
257
+ const fn = wasm[exp.abi]
258
+ if (!fn) continue
259
+
260
+ results[variant.name].functions[exp.name] = { sizes: {} }
261
+
262
+ // Test all configured sizes to show scaling
263
+ const testSizes = dataSizes
264
+
265
+ for (const size of testSizes) {
266
+ // Generate appropriate test data
267
+ const isDecoder = exp.name.toLowerCase().includes('decode')
268
+ const isUrlSafe = exp.name.toLowerCase().includes('url')
269
+ const testData = generateTestData(size, isDecoder, isUrlSafe)
270
+ const actualInputSize = testData.length
271
+
272
+ // Calculate output size
273
+ let outSize
274
+ if (exp.outSize) {
275
+ const len = actualInputSize
276
+ outSize = eval(exp.outSize)
277
+ } else {
278
+ outSize = actualInputSize * 2
279
+ }
280
+
281
+ // Allocate buffers
282
+ const inPtr = wasm.alloc_bytes(actualInputSize)
283
+ const outPtr = wasm.alloc_bytes(outSize)
284
+
285
+ // Copy input data
286
+ let mem = new Uint8Array(wasm.memory.buffer)
287
+ mem.set(testData, inPtr)
288
+
289
+ // Warmup and verify function works
290
+ let workingResult = -1
291
+ for (let i = 0; i < warmupRuns; i++) {
292
+ mem = new Uint8Array(wasm.memory.buffer)
293
+ workingResult = fn(inPtr, actualInputSize, outPtr, outSize)
294
+ }
295
+
296
+ // Skip if function returns error
297
+ if (workingResult < 0) {
298
+ wasm.free_bytes(inPtr, actualInputSize)
299
+ wasm.free_bytes(outPtr, outSize)
300
+ continue
301
+ }
302
+
303
+ // Benchmark - batched iterations for timing accuracy
304
+ const batchSize = Math.max(10, Math.floor(1000000 / actualInputSize))
305
+ const times = []
306
+
307
+ for (let i = 0; i < samples; i++) {
308
+ mem = new Uint8Array(wasm.memory.buffer)
309
+ const start = hrtimeMs()
310
+ for (let j = 0; j < batchSize; j++) {
311
+ fn(inPtr, actualInputSize, outPtr, outSize)
312
+ }
313
+ const end = hrtimeMs()
314
+ times.push((end - start) / batchSize)
315
+ }
316
+
317
+ // Free buffers
318
+ wasm.free_bytes(inPtr, actualInputSize)
319
+ wasm.free_bytes(outPtr, outSize)
320
+
321
+ const stats = computeStats(times)
322
+ // Convert ms to seconds, size to MB
323
+ const throughputMBps = stats.median > 0.0001
324
+ ? (actualInputSize / (1024 * 1024)) / (stats.median / 1000)
325
+ : 0
326
+
327
+ results[variant.name].functions[exp.name].sizes[size] = {
328
+ ...stats,
329
+ throughputMBps,
330
+ samples,
331
+ batchSize,
332
+ actualInputSize,
333
+ }
334
+ }
335
+ }
336
+
337
+ // Compute overall average throughput for the variant (largest size only for accuracy)
338
+ const funcResults = Object.values(results[variant.name].functions)
339
+ if (funcResults.length > 0) {
340
+ let totalThroughput = 0
341
+ let count = 0
342
+ for (const f of funcResults) {
343
+ const sizes = Object.keys(f.sizes).map(Number).sort((a, b) => b - a)
344
+ if (sizes.length > 0) {
345
+ totalThroughput += f.sizes[sizes[0]].throughputMBps
346
+ count++
347
+ }
348
+ }
349
+ results[variant.name].avgThroughputMBps = count > 0 ? totalThroughput / count : 0
350
+ }
351
+
352
+ console.log(` ✓ ${variant.name}: ${results[variant.name].avgThroughputMBps?.toFixed(1) || '?'} MB/s avg`)
353
+
354
+ } catch (err) {
355
+ console.log(` ✗ ${variant.name}: ${err.message}`)
356
+ }
357
+ }
358
+
359
+ // Compute speedups relative to scalar (per function, per size)
360
+ const scalarResult = results['scalar']
361
+ if (scalarResult && scalarResult.functions) {
362
+ for (const [variantName, variantResult] of Object.entries(results)) {
363
+ if (variantName === 'scalar' || !variantResult.functions) continue
364
+
365
+ variantResult.speedups = {}
366
+
367
+ for (const [fnName, fnResult] of Object.entries(variantResult.functions)) {
368
+ const scalarFn = scalarResult.functions[fnName]
369
+ if (!scalarFn) continue
370
+
371
+ variantResult.speedups[fnName] = {}
372
+
373
+ for (const [size, stats] of Object.entries(fnResult.sizes)) {
374
+ const scalarStats = scalarFn.sizes[size]
375
+ if (scalarStats && scalarStats.throughputMBps > 0) {
376
+ variantResult.speedups[fnName][size] = stats.throughputMBps / scalarStats.throughputMBps
377
+ }
378
+ }
379
+ }
380
+
381
+ // Overall speedup (avg of largest size across all functions)
382
+ if (scalarResult.avgThroughputMBps > 0) {
383
+ variantResult.speedupVsScalar = variantResult.avgThroughputMBps / scalarResult.avgThroughputMBps
384
+ }
385
+ }
386
+ }
387
+
388
+ // Compute size-based summary (average speedup across all functions at each size)
389
+ const sizeSummary = {}
390
+ const sizes = new Set()
391
+
392
+ for (const result of Object.values(results)) {
393
+ if (!result.functions) continue
394
+ for (const fnResult of Object.values(result.functions)) {
395
+ for (const size of Object.keys(fnResult.sizes)) {
396
+ sizes.add(parseInt(size))
397
+ }
398
+ }
399
+ }
400
+
401
+ for (const size of [...sizes].sort((a, b) => a - b)) {
402
+ sizeSummary[size] = {}
403
+
404
+ for (const [variantName, variantResult] of Object.entries(results)) {
405
+ if (variantName === 'scalar' || !variantResult.speedups) continue
406
+
407
+ let totalSpeedup = 0
408
+ let count = 0
409
+
410
+ for (const fnSpeedups of Object.values(variantResult.speedups)) {
411
+ if (fnSpeedups[size]) {
412
+ totalSpeedup += fnSpeedups[size]
413
+ count++
414
+ }
415
+ }
416
+
417
+ if (count > 0) {
418
+ sizeSummary[size][variantName] = totalSpeedup / count
419
+ }
420
+ }
421
+ }
422
+
423
+ return { results, sizeSummary }
424
+ }
425
+
426
+ /**
427
+ * Build all variants and run SIMD analysis
428
+ */
429
+ export function buildVariantsAndAnalyze({ cfg, outputDir }) {
430
+ const simdConfig = cfg.simd || {}
431
+ const variants = generateVariants(simdConfig)
432
+
433
+ console.log(`\nBuilding ${variants.length} WASM variants...`)
434
+
435
+ // Get target directory
436
+ let targetDir
437
+ try {
438
+ const raw = execSync('cargo metadata --format-version 1 --no-deps', {
439
+ cwd: cfg.crateDir,
440
+ stdio: ['ignore', 'pipe', 'inherit'],
441
+ }).toString()
442
+ const meta = JSON.parse(raw)
443
+ targetDir = meta?.target_directory || join(cfg.crateDir, 'target')
444
+ } catch {
445
+ targetDir = join(cfg.crateDir, 'target')
446
+ }
447
+
448
+ // Create output directories
449
+ const distDir = join(outputDir, 'dist')
450
+ const simdOutDir = join(outputDir, 'simd_out')
451
+ mkdirSync(distDir, { recursive: true })
452
+ mkdirSync(simdOutDir, { recursive: true })
453
+
454
+ const manifest = {
455
+ generated: new Date().toISOString(),
456
+ crate: cfg.crateName,
457
+ variants: [],
458
+ }
459
+
460
+ const simdReports = {}
461
+
462
+ for (const variant of variants) {
463
+ try {
464
+ // Build
465
+ const srcWasm = buildVariant({
466
+ crateDir: cfg.crateDir,
467
+ targetDir,
468
+ wasmFileStem: cfg.wasmFileStem,
469
+ variant,
470
+ release: cfg.release,
471
+ })
472
+
473
+ // Copy to dist
474
+ const destWasm = join(distDir, `${variant.name}.wasm`)
475
+ copyFileSync(srcWasm, destWasm)
476
+
477
+ // Compute hash and size
478
+ const wasmData = readFileSync(destWasm)
479
+ const hash = createHash('sha256').update(wasmData).digest('hex').slice(0, 16)
480
+
481
+ manifest.variants.push({
482
+ name: variant.name,
483
+ description: variant.description,
484
+ path: `${variant.name}.wasm`,
485
+ hash,
486
+ size: wasmData.length,
487
+ features: variant.features,
488
+ simd: variant.simd,
489
+ })
490
+
491
+ console.log(` ✓ ${variant.name} (${wasmData.length} bytes)`)
492
+
493
+ // Run SIMD analysis
494
+ const simdReport = runSimdDetect(destWasm, variant.name)
495
+ if (simdReport) {
496
+ simdReports[variant.name] = simdReport
497
+ writeFileSync(
498
+ join(simdOutDir, `${variant.name}.json`),
499
+ JSON.stringify(simdReport, null, 2)
500
+ )
501
+ console.log(` SIMD: ${simdReport.total_simd_ops} ops (${(simdReport.overall_simd_density * 100).toFixed(1)}%)`)
502
+ }
503
+ } catch (err) {
504
+ console.error(` ✗ ${variant.name}: ${err.message}`)
505
+ }
506
+ }
507
+
508
+ // Write manifest
509
+ writeFileSync(join(distDir, 'manifest.json'), JSON.stringify(manifest, null, 2))
510
+
511
+ // Compute provenance
512
+ const provenance = computeProvenance(simdReports)
513
+
514
+ return { manifest, simdReports, provenance, distDir, simdOutDir }
515
+ }
516
+
517
+ /**
518
+ * Format size for display
519
+ */
520
+ function formatSize(bytes) {
521
+ if (bytes >= 1048576) return `${(bytes / 1048576).toFixed(0)} MB`
522
+ return `${(bytes / 1024).toFixed(0)} KB`
523
+ }
524
+
525
+ /**
526
+ * Generate Markdown report
527
+ */
528
+ function generateMarkdownReport(results) {
529
+ const { manifest, simdReports, provenance, benchResults, sizeSummary } = results
530
+ const variants = manifest.variants
531
+
532
+ let md = `# SIMD Analysis Report
533
+
534
+ **Crate:** ${manifest.crate}
535
+ **Generated:** ${results.generated}
536
+
537
+ ## Build Variants
538
+
539
+ | Variant | Description | Features | Size |
540
+ |---------|-------------|----------|------|
541
+ ${variants.map(v => `| ${v.name} | ${v.description} | ${v.features.length ? v.features.join(', ') : 'none'} | ${formatSize(v.size)} |`).join('\n')}
542
+
543
+ ## SIMD Instruction Analysis
544
+
545
+ | Variant | Total Ops | SIMD Ops | Density | Size |
546
+ |---------|-----------|----------|---------|------|
547
+ ${Object.entries(simdReports).map(([name, s]) =>
548
+ `| ${name} | ${s.total_ops} | ${s.total_simd_ops} | ${(s.overall_simd_density * 100).toFixed(1)}% | ${formatSize(s.wasm_size)} |`
549
+ ).join('\n')}
550
+
551
+ ## SIMD Provenance
552
+
553
+ | Variant | Scalar | Autovec | Explicit | Compiler Added | Explicit Added |
554
+ |---------|--------|---------|----------|----------------|----------------|
555
+ ${Object.entries(provenance.summary).map(([name, p]) =>
556
+ `| ${name} | ${p.total_scalar} | ${p.total_autovec} | ${p.total_explicit} | +${p.compiler_added} | +${p.explicit_added} |`
557
+ ).join('\n')}
558
+ `
559
+
560
+ if (benchResults && Object.keys(benchResults).length > 0) {
561
+ md += `
562
+ ## Performance Summary
563
+
564
+ | Variant | Throughput | Speedup vs Scalar |
565
+ |---------|------------|-------------------|
566
+ ${Object.entries(benchResults).map(([name, r]) => {
567
+ const speedup = r.speedupVsScalar ? `${r.speedupVsScalar.toFixed(2)}x` : '1.00x (baseline)'
568
+ return `| ${name} | ${r.avgThroughputMBps?.toFixed(1) || '?'} MB/s | ${speedup} |`
569
+ }).join('\n')}
570
+ `
571
+
572
+ if (sizeSummary && Object.keys(sizeSummary).length > 0) {
573
+ const sizes = Object.keys(sizeSummary).map(Number).sort((a, b) => a - b)
574
+ const variantNames = Object.keys(sizeSummary[sizes[0]] || {})
575
+
576
+ md += `
577
+ ## Speedup by Data Size
578
+
579
+ Shows how SIMD benefits scale with input size.
580
+
581
+ | Variant | ${sizes.map(s => formatSize(s)).join(' | ')} |
582
+ |---------|${sizes.map(() => '------').join('|')}|
583
+ | scalar | ${sizes.map(() => '1.00x').join(' | ')} |
584
+ ${variantNames.map(name => {
585
+ const speedups = sizes.map(size => {
586
+ const s = sizeSummary[size]?.[name]
587
+ return s ? `${s.toFixed(1)}x` : '-'
588
+ }).join(' | ')
589
+ return `| ${name} | ${speedups} |`
590
+ }).join('\n')}
591
+ `
592
+ }
593
+ }
594
+
595
+ return md
596
+ }
597
+
598
+ /**
599
+ * Generate HTML report
600
+ */
601
+ function generateHtmlReport(results) {
602
+ const { manifest, simdReports, provenance, benchResults, sizeSummary } = results
603
+ const variants = manifest.variants
604
+
605
+ const simdTable = Object.keys(simdReports).length > 0 ? `
606
+ <h3>SIMD Instruction Analysis</h3>
607
+ <table>
608
+ <thead>
609
+ <tr><th>Variant</th><th>Total Ops</th><th>SIMD Ops</th><th>Density</th><th>Size</th></tr>
610
+ </thead>
611
+ <tbody>
612
+ ${variants.map(v => {
613
+ const s = simdReports[v.name]
614
+ if (!s) return ''
615
+ return `<tr>
616
+ <td class="variant">${v.name}</td>
617
+ <td>${s.total_ops}</td>
618
+ <td>${s.total_simd_ops}</td>
619
+ <td>${(s.overall_simd_density * 100).toFixed(1)}%</td>
620
+ <td>${(s.wasm_size / 1024).toFixed(1)} KB</td>
621
+ </tr>`
622
+ }).join('')}
623
+ </tbody>
624
+ </table>` : ''
625
+
626
+ const provenanceTable = Object.keys(provenance.summary).length > 0 ? `
627
+ <h3>SIMD Provenance</h3>
628
+ <table>
629
+ <thead>
630
+ <tr><th>Variant</th><th>Scalar</th><th>Autovec</th><th>Explicit</th><th>Compiler +</th><th>Explicit +</th></tr>
631
+ </thead>
632
+ <tbody>
633
+ ${Object.entries(provenance.summary).map(([name, p]) => `
634
+ <tr>
635
+ <td class="variant">${name}</td>
636
+ <td>${p.total_scalar}</td>
637
+ <td>${p.total_autovec}</td>
638
+ <td>${p.total_explicit}</td>
639
+ <td class="compiler">${p.compiler_added}</td>
640
+ <td class="explicit">${p.explicit_added}</td>
641
+ </tr>
642
+ `).join('')}
643
+ </tbody>
644
+ </table>` : ''
645
+
646
+ // Speedup by size table
647
+ const sizes = sizeSummary ? Object.keys(sizeSummary).map(Number).sort((a, b) => a - b) : []
648
+ const variantNames = sizes.length > 0 ? Object.keys(sizeSummary[sizes[0]] || {}) : []
649
+
650
+ const speedupBySize = sizeSummary && sizes.length > 0 ? `
651
+ <h3>Speedup by Data Size</h3>
652
+ <p class="note">Shows how SIMD benefits scale with input size</p>
653
+ <table>
654
+ <thead>
655
+ <tr>
656
+ <th>Variant</th>
657
+ ${sizes.map(s => `<th>${formatSize(s)}</th>`).join('')}
658
+ </tr>
659
+ </thead>
660
+ <tbody>
661
+ <tr>
662
+ <td class="variant">scalar</td>
663
+ ${sizes.map(() => `<td class="baseline">1.00x</td>`).join('')}
664
+ </tr>
665
+ ${variantNames.map(name => `
666
+ <tr>
667
+ <td class="variant">${name}</td>
668
+ ${sizes.map(size => {
669
+ const speedup = sizeSummary[size]?.[name]
670
+ const cls = speedup > 5 ? 'speedup-high' : speedup > 1.5 ? 'speedup' : ''
671
+ return `<td class="${cls}">${speedup ? speedup.toFixed(1) + 'x' : '-'}</td>`
672
+ }).join('')}
673
+ </tr>
674
+ `).join('')}
675
+ </tbody>
676
+ </table>` : ''
677
+
678
+ const benchTable = benchResults && Object.keys(benchResults).length > 0 ? `
679
+ <h3>Performance Summary (Largest Size)</h3>
680
+ <table>
681
+ <thead>
682
+ <tr><th>Variant</th><th>Avg Throughput</th><th>Speedup vs Scalar</th></tr>
683
+ </thead>
684
+ <tbody>
685
+ ${Object.entries(benchResults).map(([name, r]) => `
686
+ <tr>
687
+ <td class="variant">${name}</td>
688
+ <td>${r.avgThroughputMBps?.toFixed(1) || '?'} MB/s</td>
689
+ <td class="${r.speedupVsScalar > 1.1 ? 'speedup' : ''}">${r.speedupVsScalar ? r.speedupVsScalar.toFixed(2) + 'x' : '1.00x (baseline)'}</td>
690
+ </tr>
691
+ `).join('')}
692
+ </tbody>
693
+ </table>
694
+
695
+ ${speedupBySize}
696
+
697
+ <details>
698
+ <summary>Detailed Results by Function & Size</summary>
699
+ ${Object.entries(benchResults).map(([variantName, variantResult]) => {
700
+ if (!variantResult.functions) return ''
701
+ return Object.entries(variantResult.functions).map(([fnName, fnResult]) => `
702
+ <h4>${variantName} → ${fnName}</h4>
703
+ <table>
704
+ <thead>
705
+ <tr><th>Size</th><th>Median</th><th>Throughput</th><th>Speedup</th></tr>
706
+ </thead>
707
+ <tbody>
708
+ ${Object.entries(fnResult.sizes).map(([size, stats]) => {
709
+ const speedup = variantResult.speedups?.[fnName]?.[size]
710
+ return `
711
+ <tr>
712
+ <td>${formatSize(parseInt(size))}</td>
713
+ <td>${stats.median.toFixed(3)} ms</td>
714
+ <td>${stats.throughputMBps.toFixed(1)} MB/s</td>
715
+ <td class="${speedup > 1.5 ? 'speedup' : ''}">${speedup ? speedup.toFixed(2) + 'x' : '1.00x'}</td>
716
+ </tr>
717
+ `
718
+ }).join('')}
719
+ </tbody>
720
+ </table>
721
+ `).join('')
722
+ }).join('')}
723
+ </details>` : ''
724
+
725
+ return `<!DOCTYPE html>
726
+ <html lang="en">
727
+ <head>
728
+ <meta charset="UTF-8">
729
+ <title>SIMD Analysis Report</title>
730
+ <style>
731
+ :root { --bg: #0a0a0a; --surface: #141414; --border: #2a2a2a; --text: #e8e8e8; --dim: #777; --accent: #ff6b35; --green: #4ade80; --purple: #c084fc; --orange: #fb923c; }
732
+ * { box-sizing: border-box; margin: 0; padding: 0; }
733
+ body { font-family: 'SF Mono', monospace; background: var(--bg); color: var(--text); padding: 2rem; line-height: 1.6; }
734
+ h1 { color: var(--accent); margin-bottom: 0.5rem; }
735
+ h2 { margin: 2rem 0 1rem; border-bottom: 1px solid var(--border); padding-bottom: 0.5rem; }
736
+ h3 { color: var(--accent); margin: 1.5rem 0 0.75rem; font-size: 1rem; }
737
+ table { width: 100%; border-collapse: collapse; background: var(--surface); border-radius: 8px; overflow: hidden; font-size: 0.85rem; margin-bottom: 1rem; }
738
+ th, td { padding: 0.6rem 0.75rem; text-align: left; border-bottom: 1px solid var(--border); }
739
+ th { background: rgba(255,107,53,0.1); color: var(--accent); font-size: 0.75rem; text-transform: uppercase; }
740
+ .variant { font-weight: 500; }
741
+ .compiler { color: var(--orange); }
742
+ .explicit { color: var(--purple); }
743
+ .speedup { color: var(--green); }
744
+ .speedup-high { color: var(--green); font-weight: bold; background: rgba(74,222,128,0.1); }
745
+ .baseline { color: var(--dim); }
746
+ .note { color: var(--dim); font-size: 0.75rem; margin-bottom: 0.5rem; }
747
+ h4 { color: var(--dim); margin: 1rem 0 0.5rem; font-size: 0.85rem; }
748
+ pre { background: var(--surface); padding: 1rem; border-radius: 8px; overflow-x: auto; font-size: 0.75rem; color: var(--dim); }
749
+ .subtitle { color: var(--dim); font-size: 0.85rem; margin-bottom: 2rem; }
750
+ details { margin-top: 1.5rem; }
751
+ summary { cursor: pointer; color: var(--accent); font-weight: 500; margin-bottom: 1rem; }
752
+ summary:hover { text-decoration: underline; }
753
+ </style>
754
+ </head>
755
+ <body>
756
+ <h1>SIMD Analysis Report</h1>
757
+ <p class="subtitle">Crate: ${manifest.crate} • Generated: ${results.generated}</p>
758
+
759
+ <h2>Variants</h2>
760
+ <table>
761
+ <thead><tr><th>Name</th><th>Description</th><th>Features</th><th>Size</th></tr></thead>
762
+ <tbody>
763
+ ${variants.map(v => `<tr>
764
+ <td class="variant">${v.name}</td>
765
+ <td>${v.description}</td>
766
+ <td>${v.features.length ? v.features.join(', ') : 'none'}</td>
767
+ <td>${(v.size / 1024).toFixed(1)} KB</td>
768
+ </tr>`).join('')}
769
+ </tbody>
770
+ </table>
771
+
772
+ <h2>SIMD Analysis</h2>
773
+ ${simdTable}
774
+ ${provenanceTable}
775
+
776
+ ${benchTable}
777
+ </body>
778
+ </html>`
779
+ }
780
+
781
+ /**
782
+ * Main bench command entry point
783
+ */
784
+ export async function runBench(cfg, cliOpts = {}) {
785
+ const benchConfig = {
786
+ ...DEFAULT_BENCH_CONFIG,
787
+ ...(cfg.bench || {}),
788
+ }
789
+
790
+ const outputDir = join(cfg.crateDir, benchConfig.outputDir)
791
+
792
+ // Clean if requested
793
+ if (cliOpts.clean && existsSync(outputDir)) {
794
+ rmSync(outputDir, { recursive: true })
795
+ }
796
+ mkdirSync(outputDir, { recursive: true })
797
+
798
+ console.log('SIMD Variant Build & Analysis')
799
+ console.log('═'.repeat(50))
800
+
801
+ // Build variants and analyze
802
+ const { manifest, simdReports, provenance, distDir, simdOutDir } =
803
+ buildVariantsAndAnalyze({ cfg, outputDir })
804
+
805
+ // Run performance benchmarks
806
+ const benchData = await runBenchmarks({
807
+ distDir,
808
+ manifest,
809
+ exports: cfg.exports,
810
+ benchConfig,
811
+ })
812
+
813
+ const benchResults = benchData?.results || null
814
+ const sizeSummary = benchData?.sizeSummary || null
815
+
816
+ // Generate report
817
+ const results = {
818
+ generated: new Date().toISOString(),
819
+ manifest,
820
+ simdReports,
821
+ provenance,
822
+ benchResults,
823
+ sizeSummary,
824
+ }
825
+
826
+ // Write JSON report
827
+ const jsonPath = join(outputDir, 'report.json')
828
+ writeFileSync(jsonPath, JSON.stringify(results, null, 2))
829
+
830
+ // Write HTML report
831
+ const htmlPath = join(outputDir, 'report.html')
832
+ writeFileSync(htmlPath, generateHtmlReport(results))
833
+
834
+ // Write Markdown report
835
+ const mdPath = join(outputDir, 'report.md')
836
+ writeFileSync(mdPath, generateMarkdownReport(results))
837
+
838
+ console.log('\n' + '═'.repeat(50))
839
+ console.log('Summary')
840
+ console.log('═'.repeat(50))
841
+
842
+ console.log('\nSIMD Analysis:')
843
+ for (const [name, report] of Object.entries(simdReports)) {
844
+ console.log(` ${name}: ${report.total_simd_ops} SIMD ops`)
845
+ }
846
+
847
+ if (Object.keys(provenance.summary).length > 0) {
848
+ console.log('\nProvenance:')
849
+ for (const [name, p] of Object.entries(provenance.summary)) {
850
+ console.log(` ${name}: +${p.compiler_added} compiler, +${p.explicit_added} explicit`)
851
+ }
852
+ }
853
+
854
+ if (benchResults) {
855
+ console.log('\nPerformance (largest size):')
856
+ for (const [name, result] of Object.entries(benchResults)) {
857
+ const speedup = result.speedupVsScalar
858
+ ? ` (${result.speedupVsScalar.toFixed(2)}x vs scalar)`
859
+ : ' (baseline)'
860
+ console.log(` ${name}: ${result.avgThroughputMBps?.toFixed(1) || '?'} MB/s${speedup}`)
861
+ }
862
+
863
+ if (sizeSummary && Object.keys(sizeSummary).length > 0) {
864
+ console.log('\nSpeedup by data size:')
865
+ const sizes = Object.keys(sizeSummary).map(Number).sort((a, b) => a - b)
866
+ const variants = Object.keys(sizeSummary[sizes[0]] || {})
867
+
868
+ // Header
869
+ const sizeLabels = sizes.map(s => s >= 1048576 ? `${(s/1048576).toFixed(0)}MB` : `${(s/1024).toFixed(0)}KB`)
870
+ console.log(` ${'Variant'.padEnd(20)} ${sizeLabels.map(l => l.padStart(8)).join(' ')}`)
871
+
872
+ // Rows
873
+ for (const variant of variants) {
874
+ const speedups = sizes.map(size => {
875
+ const s = sizeSummary[size]?.[variant]
876
+ return s ? `${s.toFixed(1)}x`.padStart(8) : ' - '
877
+ }).join(' ')
878
+ console.log(` ${variant.padEnd(20)} ${speedups}`)
879
+ }
880
+ }
881
+ }
882
+
883
+ console.log(`\nReports written to: ${outputDir}`)
884
+ console.log(` - ${jsonPath}`)
885
+ console.log(` - ${htmlPath}`)
886
+ console.log(` - ${mdPath}`)
887
+ }