@sparkleideas/performance 3.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,459 @@
1
+ /**
2
+ * @sparkleideas/performance - Flash Attention Benchmarks
3
+ *
4
+ * Comprehensive benchmark suite for Flash Attention performance validation.
5
+ * Validates 2.49x-7.47x speedup targets and memory efficiency improvements.
6
+ */
7
+
8
+ import {
9
+ FlashAttentionOptimizer,
10
+ createFlashAttentionOptimizer,
11
+ type BenchmarkResult,
12
+ type AttentionInput,
13
+ } from './attention-integration.js';
14
+ import {
15
+ FlashAttention,
16
+ DotProductAttention,
17
+ MultiHeadAttention,
18
+ LinearAttention,
19
+ } from '@ruvector/attention';
20
+
21
+ // ============================================================================
22
+ // Types
23
+ // ============================================================================
24
+
25
+ export interface ComparisonBenchmark {
26
+ name: string;
27
+ dimension: number;
28
+ numKeys: number;
29
+ iterations: number;
30
+ results: {
31
+ flash: {
32
+ averageTimeMs: number;
33
+ opsPerSecond: number;
34
+ memoryUsageBytes?: number;
35
+ };
36
+ baseline: {
37
+ averageTimeMs: number;
38
+ opsPerSecond: number;
39
+ memoryUsageBytes?: number;
40
+ };
41
+ speedup: number;
42
+ memoryReduction?: number; // Percentage
43
+ };
44
+ meetsTarget: boolean;
45
+ timestamp: Date;
46
+ }
47
+
48
+ export interface SuiteResult {
49
+ suiteName: string;
50
+ benchmarks: ComparisonBenchmark[];
51
+ summary: {
52
+ averageSpeedup: number;
53
+ minSpeedup: number;
54
+ maxSpeedup: number;
55
+ targetsMet: number;
56
+ totalBenchmarks: number;
57
+ successRate: number;
58
+ };
59
+ timestamp: Date;
60
+ }
61
+
62
+ export interface MemoryProfile {
63
+ dimension: number;
64
+ numKeys: number;
65
+ flashMemoryBytes: number;
66
+ baselineMemoryBytes: number;
67
+ reduction: number; // Percentage
68
+ reductionBytes: number;
69
+ }
70
+
71
+ // ============================================================================
72
+ // Benchmark Runner
73
+ // ============================================================================
74
+
75
+ export class AttentionBenchmarkRunner {
76
+ /**
77
+ * Run comprehensive benchmark suite across multiple dimensions
78
+ */
79
+ runComprehensiveSuite(): SuiteResult {
80
+ const benchmarks: ComparisonBenchmark[] = [];
81
+
82
+ // Test configurations: [dimension, numKeys, iterations]
83
+ const configs: [number, number, number][] = [
84
+ [128, 50, 1000], // Small: Mobile/edge devices
85
+ [256, 100, 1000], // Medium: Standard use cases
86
+ [512, 100, 1000], // Large: High-performance scenarios
87
+ [768, 150, 500], // XL: Transformer models
88
+ [1024, 200, 500], // XXL: Large language models
89
+ ];
90
+
91
+ for (const [dim, numKeys, iterations] of configs) {
92
+ const benchmark = this.runComparison(dim, numKeys, iterations);
93
+ benchmarks.push(benchmark);
94
+ }
95
+
96
+ return this.createSuiteResult('Comprehensive Flash Attention Suite', benchmarks);
97
+ }
98
+
99
+ /**
100
+ * Run benchmark comparing Flash Attention vs baseline
101
+ */
102
+ runComparison(
103
+ dimension: number,
104
+ numKeys: number = 100,
105
+ iterations: number = 1000
106
+ ): ComparisonBenchmark {
107
+ // Create Flash and baseline attention mechanisms
108
+ const flash = new FlashAttention(dimension, 64);
109
+ const baseline = new DotProductAttention(dimension);
110
+
111
+ // Create test data
112
+ const query = new Float32Array(dimension);
113
+ const keys = Array.from({ length: numKeys }, () => new Float32Array(dimension));
114
+ const values = Array.from({ length: numKeys }, () => new Float32Array(dimension));
115
+
116
+ // Fill with random data
117
+ for (let i = 0; i < dimension; i++) {
118
+ query[i] = Math.random();
119
+ }
120
+ for (let i = 0; i < numKeys; i++) {
121
+ for (let j = 0; j < dimension; j++) {
122
+ keys[i][j] = Math.random();
123
+ values[i][j] = Math.random();
124
+ }
125
+ }
126
+
127
+ // Benchmark Flash Attention
128
+ const flashStart = performance.now();
129
+ for (let i = 0; i < iterations; i++) {
130
+ flash.computeRaw(query, keys, values);
131
+ }
132
+ const flashEnd = performance.now();
133
+ const flashTimeMs = flashEnd - flashStart;
134
+ const flashAvgMs = flashTimeMs / iterations;
135
+ const flashOps = 1000 / flashAvgMs;
136
+
137
+ // Benchmark baseline
138
+ const baselineStart = performance.now();
139
+ for (let i = 0; i < iterations; i++) {
140
+ baseline.computeRaw(query, keys, values);
141
+ }
142
+ const baselineEnd = performance.now();
143
+ const baselineTimeMs = baselineEnd - baselineStart;
144
+ const baselineAvgMs = baselineTimeMs / iterations;
145
+ const baselineOps = 1000 / baselineAvgMs;
146
+
147
+ const speedup = baselineAvgMs / flashAvgMs;
148
+ const meetsTarget = speedup >= 2.49; // Minimum V3 target
149
+
150
+ return {
151
+ name: `Flash Attention ${dimension}D x ${numKeys} keys`,
152
+ dimension,
153
+ numKeys,
154
+ iterations,
155
+ results: {
156
+ flash: {
157
+ averageTimeMs: flashAvgMs,
158
+ opsPerSecond: flashOps,
159
+ memoryUsageBytes: undefined,
160
+ },
161
+ baseline: {
162
+ averageTimeMs: baselineAvgMs,
163
+ opsPerSecond: baselineOps,
164
+ memoryUsageBytes: undefined,
165
+ },
166
+ speedup,
167
+ memoryReduction: undefined,
168
+ },
169
+ meetsTarget,
170
+ timestamp: new Date(),
171
+ };
172
+ }
173
+
174
+ /**
175
+ * Run memory profiling benchmark
176
+ */
177
+ runMemoryProfile(
178
+ dimensions: number[] = [128, 256, 512, 768, 1024]
179
+ ): MemoryProfile[] {
180
+ const profiles: MemoryProfile[] = [];
181
+
182
+ for (const dim of dimensions) {
183
+ const numKeys = Math.min(200, dim / 2);
184
+ const profile = this.profileMemory(dim, numKeys);
185
+ profiles.push(profile);
186
+ }
187
+
188
+ return profiles;
189
+ }
190
+
191
+ /**
192
+ * Run stress test with increasing load
193
+ */
194
+ runStressTest(): ComparisonBenchmark[] {
195
+ const results: ComparisonBenchmark[] = [];
196
+
197
+ // Progressively increase load
198
+ const stressConfigs: [number, number, number][] = [
199
+ [512, 100, 100],
200
+ [512, 500, 100],
201
+ [512, 1000, 100],
202
+ [512, 2000, 50],
203
+ [512, 5000, 50],
204
+ ];
205
+
206
+ for (const [dim, numKeys, iterations] of stressConfigs) {
207
+ try {
208
+ const benchmark = this.runComparison(dim, numKeys, iterations);
209
+ results.push(benchmark);
210
+ } catch (error) {
211
+ console.error(`Stress test failed at ${numKeys} keys:`, error);
212
+ break;
213
+ }
214
+ }
215
+
216
+ return results;
217
+ }
218
+
219
+ /**
220
+ * Validate V3 performance targets (2.49x-7.47x speedup)
221
+ */
222
+ validateV3Targets(): {
223
+ meetsMinimum: boolean;
224
+ meetsMaximum: boolean;
225
+ actualSpeedup: number;
226
+ target: { min: number; max: number };
227
+ } {
228
+ const optimizer = createFlashAttentionOptimizer(512);
229
+ const result = optimizer.benchmark();
230
+
231
+ return {
232
+ meetsMinimum: result.speedup >= 2.49,
233
+ meetsMaximum: result.speedup <= 7.47,
234
+ actualSpeedup: result.speedup,
235
+ target: { min: 2.49, max: 7.47 },
236
+ };
237
+ }
238
+
239
+ /**
240
+ * Profile memory usage for a specific configuration
241
+ */
242
+ private profileMemory(
243
+ dimension: number,
244
+ numKeys: number
245
+ ): MemoryProfile {
246
+ // Create test data
247
+ const query = new Float32Array(dimension).fill(1);
248
+ const keys = Array.from({ length: numKeys }, () =>
249
+ new Float32Array(dimension).fill(1)
250
+ );
251
+ const values = Array.from({ length: numKeys }, () =>
252
+ new Float32Array(dimension).fill(1)
253
+ );
254
+
255
+ // Measure Flash Attention memory
256
+ const flashMemoryBefore = this.getMemoryUsage();
257
+ const flashAttention = new FlashAttention(dimension, 64); // Add blockSize
258
+ flashAttention.compute(query, keys, values);
259
+ const flashMemoryAfter = this.getMemoryUsage();
260
+ const flashMemoryBytes = flashMemoryAfter - flashMemoryBefore;
261
+
262
+ // Measure baseline memory
263
+ const baselineMemoryBefore = this.getMemoryUsage();
264
+ const baselineAttention = new DotProductAttention(dimension);
265
+ baselineAttention.compute(query, keys, values);
266
+ const baselineMemoryAfter = this.getMemoryUsage();
267
+ const baselineMemoryBytes = baselineMemoryAfter - baselineMemoryBefore;
268
+
269
+ const reductionBytes = baselineMemoryBytes - flashMemoryBytes;
270
+ const reduction = (reductionBytes / baselineMemoryBytes) * 100;
271
+
272
+ return {
273
+ dimension,
274
+ numKeys,
275
+ flashMemoryBytes: Math.max(0, flashMemoryBytes),
276
+ baselineMemoryBytes: Math.max(0, baselineMemoryBytes),
277
+ reduction: Math.max(0, reduction),
278
+ reductionBytes: Math.max(0, reductionBytes),
279
+ };
280
+ }
281
+
282
+ /**
283
+ * Calculate memory reduction percentage
284
+ */
285
+ private calculateMemoryReduction(
286
+ baselineBytes?: number,
287
+ flashBytes?: number
288
+ ): number | undefined {
289
+ if (!baselineBytes || !flashBytes) {
290
+ return undefined;
291
+ }
292
+
293
+ const reduction = ((baselineBytes - flashBytes) / baselineBytes) * 100;
294
+ return Math.max(0, reduction);
295
+ }
296
+
297
+ /**
298
+ * Get current memory usage
299
+ */
300
+ private getMemoryUsage(): number {
301
+ if (typeof process !== 'undefined' && process.memoryUsage) {
302
+ return process.memoryUsage().heapUsed;
303
+ }
304
+ return 0;
305
+ }
306
+
307
+ /**
308
+ * Create suite result with summary statistics
309
+ */
310
+ private createSuiteResult(
311
+ suiteName: string,
312
+ benchmarks: ComparisonBenchmark[]
313
+ ): SuiteResult {
314
+ const speedups = benchmarks.map(b => b.results.speedup);
315
+ const averageSpeedup = speedups.reduce((a, b) => a + b, 0) / speedups.length;
316
+ const minSpeedup = Math.min(...speedups);
317
+ const maxSpeedup = Math.max(...speedups);
318
+ const targetsMet = benchmarks.filter(b => b.meetsTarget).length;
319
+ const totalBenchmarks = benchmarks.length;
320
+ const successRate = (targetsMet / totalBenchmarks) * 100;
321
+
322
+ return {
323
+ suiteName,
324
+ benchmarks,
325
+ summary: {
326
+ averageSpeedup,
327
+ minSpeedup,
328
+ maxSpeedup,
329
+ targetsMet,
330
+ totalBenchmarks,
331
+ successRate,
332
+ },
333
+ timestamp: new Date(),
334
+ };
335
+ }
336
+ }
337
+
338
+ // ============================================================================
339
+ // Utility Functions
340
+ // ============================================================================
341
+
342
+ /**
343
+ * Format benchmark results as human-readable table
344
+ */
345
+ export function formatBenchmarkTable(benchmark: ComparisonBenchmark): string {
346
+ const { name, results, meetsTarget } = benchmark;
347
+ const status = meetsTarget ? '✓' : '✗';
348
+
349
+ return `
350
+ ${status} ${name}
351
+ Flash Attention: ${results.flash.averageTimeMs.toFixed(3)}ms
352
+ Baseline: ${results.baseline.averageTimeMs.toFixed(3)}ms
353
+ Speedup: ${results.speedup.toFixed(2)}x
354
+ Memory Reduction: ${results.memoryReduction?.toFixed(1) ?? 'N/A'}%
355
+ Target Met: ${meetsTarget ? 'YES' : 'NO'} (target: ≥2.49x)
356
+ `.trim();
357
+ }
358
+
359
+ /**
360
+ * Format suite results as summary report
361
+ */
362
+ export function formatSuiteReport(suite: SuiteResult): string {
363
+ const { suiteName, summary, benchmarks } = suite;
364
+
365
+ const header = `
366
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
367
+ ${suiteName}
368
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
369
+
370
+ Summary:
371
+ Average Speedup: ${summary.averageSpeedup.toFixed(2)}x
372
+ Min Speedup: ${summary.minSpeedup.toFixed(2)}x
373
+ Max Speedup: ${summary.maxSpeedup.toFixed(2)}x
374
+ Targets Met: ${summary.targetsMet}/${summary.totalBenchmarks} (${summary.successRate.toFixed(1)}%)
375
+ Target Range: 2.49x - 7.47x
376
+
377
+ Benchmarks:
378
+ `.trim();
379
+
380
+ const benchmarkTables = benchmarks
381
+ .map(b => formatBenchmarkTable(b))
382
+ .join('\n\n');
383
+
384
+ return `${header}\n\n${benchmarkTables}\n`;
385
+ }
386
+
387
+ /**
388
+ * Format memory profile as table
389
+ */
390
+ export function formatMemoryProfile(profiles: MemoryProfile[]): string {
391
+ const header = `
392
+ Memory Profile Results
393
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
394
+ Dim Keys Flash (KB) Baseline (KB) Reduction
395
+ `.trim();
396
+
397
+ const rows = profiles.map(p => {
398
+ const flashKB = (p.flashMemoryBytes / 1024).toFixed(1);
399
+ const baselineKB = (p.baselineMemoryBytes / 1024).toFixed(1);
400
+ const reduction = p.reduction.toFixed(1);
401
+
402
+ return `${p.dimension.toString().padEnd(6)} ${p.numKeys.toString().padEnd(7)} ${flashKB.padEnd(11)} ${baselineKB.padEnd(14)} ${reduction}%`;
403
+ });
404
+
405
+ return `${header}\n${rows.join('\n')}`;
406
+ }
407
+
408
+ // ============================================================================
409
+ // Convenience Functions
410
+ // ============================================================================
411
+
412
+ /**
413
+ * Quick performance validation
414
+ */
415
+ export function quickValidation(): boolean {
416
+ const runner = new AttentionBenchmarkRunner();
417
+ const validation = runner.validateV3Targets();
418
+
419
+ console.log(`
420
+ V3 Performance Target Validation
421
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
422
+ Target Range: 2.49x - 7.47x
423
+ Actual Speedup: ${validation.actualSpeedup.toFixed(2)}x
424
+ Meets Minimum: ${validation.meetsMinimum ? 'YES ✓' : 'NO ✗'}
425
+ Within Range: ${validation.meetsMaximum ? 'YES ✓' : 'NO ✗'}
426
+ `.trim());
427
+
428
+ return validation.meetsMinimum && validation.meetsMaximum;
429
+ }
430
+
431
+ /**
432
+ * Run and display comprehensive benchmark suite
433
+ */
434
+ export function runAndDisplaySuite(): SuiteResult {
435
+ const runner = new AttentionBenchmarkRunner();
436
+ const suite = runner.runComprehensiveSuite();
437
+
438
+ console.log(formatSuiteReport(suite));
439
+
440
+ return suite;
441
+ }
442
+
443
+ /**
444
+ * Run and display memory profile
445
+ */
446
+ export function runAndDisplayMemoryProfile(): MemoryProfile[] {
447
+ const runner = new AttentionBenchmarkRunner();
448
+ const profiles = runner.runMemoryProfile();
449
+
450
+ console.log(formatMemoryProfile(profiles));
451
+
452
+ return profiles;
453
+ }
454
+
455
+ // ============================================================================
456
+ // Exports
457
+ // ============================================================================
458
+
459
+ export { FlashAttentionOptimizer, createFlashAttentionOptimizer };