@sparkleideas/performance 3.0.0-alpha.20 → 3.0.0-alpha.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/attention-benchmarks.d.ts +122 -0
- package/dist/attention-benchmarks.d.ts.map +1 -0
- package/dist/attention-benchmarks.js +322 -0
- package/dist/attention-benchmarks.js.map +1 -0
- package/dist/attention-integration.d.ts +133 -0
- package/dist/attention-integration.d.ts.map +1 -0
- package/dist/attention-integration.js +366 -0
- package/dist/attention-integration.js.map +1 -0
- package/dist/examples/flash-attention-demo.d.ts +13 -0
- package/dist/examples/flash-attention-demo.d.ts.map +1 -0
- package/dist/examples/flash-attention-demo.js +113 -0
- package/dist/examples/flash-attention-demo.js.map +1 -0
- package/dist/examples/quick-test.d.ts +9 -0
- package/dist/examples/quick-test.d.ts.map +1 -0
- package/dist/examples/quick-test.js +53 -0
- package/dist/examples/quick-test.js.map +1 -0
- package/dist/framework/benchmark.d.ts +192 -0
- package/dist/framework/benchmark.d.ts.map +1 -0
- package/dist/framework/benchmark.js +408 -0
- package/dist/framework/benchmark.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/package.json +1 -1
- package/tsconfig.build.tsbuildinfo +1 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sparkleideas/performance - Flash Attention Benchmarks
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive benchmark suite for Flash Attention performance validation.
|
|
5
|
+
* Validates 2.49x-7.47x speedup targets and memory efficiency improvements.
|
|
6
|
+
*/
|
|
7
|
+
import { FlashAttentionOptimizer, createFlashAttentionOptimizer } from './attention-integration.js';
|
|
8
|
+
export interface ComparisonBenchmark {
|
|
9
|
+
name: string;
|
|
10
|
+
dimension: number;
|
|
11
|
+
numKeys: number;
|
|
12
|
+
iterations: number;
|
|
13
|
+
results: {
|
|
14
|
+
flash: {
|
|
15
|
+
averageTimeMs: number;
|
|
16
|
+
opsPerSecond: number;
|
|
17
|
+
memoryUsageBytes?: number;
|
|
18
|
+
};
|
|
19
|
+
baseline: {
|
|
20
|
+
averageTimeMs: number;
|
|
21
|
+
opsPerSecond: number;
|
|
22
|
+
memoryUsageBytes?: number;
|
|
23
|
+
};
|
|
24
|
+
speedup: number;
|
|
25
|
+
memoryReduction?: number;
|
|
26
|
+
};
|
|
27
|
+
meetsTarget: boolean;
|
|
28
|
+
timestamp: Date;
|
|
29
|
+
}
|
|
30
|
+
export interface SuiteResult {
|
|
31
|
+
suiteName: string;
|
|
32
|
+
benchmarks: ComparisonBenchmark[];
|
|
33
|
+
summary: {
|
|
34
|
+
averageSpeedup: number;
|
|
35
|
+
minSpeedup: number;
|
|
36
|
+
maxSpeedup: number;
|
|
37
|
+
targetsMet: number;
|
|
38
|
+
totalBenchmarks: number;
|
|
39
|
+
successRate: number;
|
|
40
|
+
};
|
|
41
|
+
timestamp: Date;
|
|
42
|
+
}
|
|
43
|
+
export interface MemoryProfile {
|
|
44
|
+
dimension: number;
|
|
45
|
+
numKeys: number;
|
|
46
|
+
flashMemoryBytes: number;
|
|
47
|
+
baselineMemoryBytes: number;
|
|
48
|
+
reduction: number;
|
|
49
|
+
reductionBytes: number;
|
|
50
|
+
}
|
|
51
|
+
export declare class AttentionBenchmarkRunner {
|
|
52
|
+
/**
|
|
53
|
+
* Run comprehensive benchmark suite across multiple dimensions
|
|
54
|
+
*/
|
|
55
|
+
runComprehensiveSuite(): SuiteResult;
|
|
56
|
+
/**
|
|
57
|
+
* Run benchmark comparing Flash Attention vs baseline
|
|
58
|
+
*/
|
|
59
|
+
runComparison(dimension: number, numKeys?: number, iterations?: number): ComparisonBenchmark;
|
|
60
|
+
/**
|
|
61
|
+
* Run memory profiling benchmark
|
|
62
|
+
*/
|
|
63
|
+
runMemoryProfile(dimensions?: number[]): MemoryProfile[];
|
|
64
|
+
/**
|
|
65
|
+
* Run stress test with increasing load
|
|
66
|
+
*/
|
|
67
|
+
runStressTest(): ComparisonBenchmark[];
|
|
68
|
+
/**
|
|
69
|
+
* Validate V3 performance targets (2.49x-7.47x speedup)
|
|
70
|
+
*/
|
|
71
|
+
validateV3Targets(): {
|
|
72
|
+
meetsMinimum: boolean;
|
|
73
|
+
meetsMaximum: boolean;
|
|
74
|
+
actualSpeedup: number;
|
|
75
|
+
target: {
|
|
76
|
+
min: number;
|
|
77
|
+
max: number;
|
|
78
|
+
};
|
|
79
|
+
};
|
|
80
|
+
/**
|
|
81
|
+
* Profile memory usage for a specific configuration
|
|
82
|
+
*/
|
|
83
|
+
private profileMemory;
|
|
84
|
+
/**
|
|
85
|
+
* Calculate memory reduction percentage
|
|
86
|
+
*/
|
|
87
|
+
private calculateMemoryReduction;
|
|
88
|
+
/**
|
|
89
|
+
* Get current memory usage
|
|
90
|
+
*/
|
|
91
|
+
private getMemoryUsage;
|
|
92
|
+
/**
|
|
93
|
+
* Create suite result with summary statistics
|
|
94
|
+
*/
|
|
95
|
+
private createSuiteResult;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Format benchmark results as human-readable table
|
|
99
|
+
*/
|
|
100
|
+
export declare function formatBenchmarkTable(benchmark: ComparisonBenchmark): string;
|
|
101
|
+
/**
|
|
102
|
+
* Format suite results as summary report
|
|
103
|
+
*/
|
|
104
|
+
export declare function formatSuiteReport(suite: SuiteResult): string;
|
|
105
|
+
/**
|
|
106
|
+
* Format memory profile as table
|
|
107
|
+
*/
|
|
108
|
+
export declare function formatMemoryProfile(profiles: MemoryProfile[]): string;
|
|
109
|
+
/**
|
|
110
|
+
* Quick performance validation
|
|
111
|
+
*/
|
|
112
|
+
export declare function quickValidation(): boolean;
|
|
113
|
+
/**
|
|
114
|
+
* Run and display comprehensive benchmark suite
|
|
115
|
+
*/
|
|
116
|
+
export declare function runAndDisplaySuite(): SuiteResult;
|
|
117
|
+
/**
|
|
118
|
+
* Run and display memory profile
|
|
119
|
+
*/
|
|
120
|
+
export declare function runAndDisplayMemoryProfile(): MemoryProfile[];
|
|
121
|
+
export { FlashAttentionOptimizer, createFlashAttentionOptimizer };
|
|
122
|
+
//# sourceMappingURL=attention-benchmarks.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"attention-benchmarks.d.ts","sourceRoot":"","sources":["../src/attention-benchmarks.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,uBAAuB,EACvB,6BAA6B,EAG9B,MAAM,4BAA4B,CAAC;AAYpC,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE;QACP,KAAK,EAAE;YACL,aAAa,EAAE,MAAM,CAAC;YACtB,YAAY,EAAE,MAAM,CAAC;YACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC3B,CAAC;QACF,QAAQ,EAAE;YACR,aAAa,EAAE,MAAM,CAAC;YACtB,YAAY,EAAE,MAAM,CAAC;YACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC3B,CAAC;QACF,OAAO,EAAE,MAAM,CAAC;QAChB,eAAe,CAAC,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,mBAAmB,EAAE,CAAC;IAClC,OAAO,EAAE;QACP,cAAc,EAAE,MAAM,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;CACxB;AAMD,qBAAa,wBAAwB;IACnC;;OAEG;IACH,qBAAqB,IAAI,WAAW;IAoBpC;;OAEG;IACH,aAAa,CACX,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,MAAY,EACrB,UAAU,GAAE,MAAa,GACxB,mBAAmB;IAoEtB;;OAEG;IACH,gBAAgB,CACd,UAAU,GAAE,MAAM,EAA+B,GAChD,aAAa,EAAE;IAYlB;;OAEG;IACH,aAAa,IAAI,mBAAmB,EAAE;IAyBtC;;OAEG;IACH,iBAAiB,IAAI;QACnB,YAAY,EAAE,OAAO,CAAC;QACtB,YAAY,EAAE,OAAO,CAAC;QACtB,aAAa,EAAE,MAAM,CAAC;QACtB,MAAM,EAAE;YAAE,GAAG,EAAE,MAAM,CAAC;YAAC,GAAG,EAAE,MAAM,CAAA;SAAE,CAAC;KACtC;IAYD;;OAEG;IACH,OAAO,CAAC,aAAa;IAwCrB;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAYhC;;OAEG;IACH,OAAO,CAAC,cAAc;IAOtB;;OAEG;IACH,OAAO,CAAC,iBAAiB;CA0B1B;AAMD;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,SAAS,EAAE,mBAAmB,GAAG,MAAM,CAY3E;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,WAAW,GAAG,MAAM,CAuB5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,GAAG,MAAM,CAgBrE;AAMD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAczC;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,WAAW,CAOhD;AAED;;GAEG;AACH,wBAAgB,0BAA0B,IAAI,aAAa,EAAE,CAO5D;AAMD,OAAO,EAAE,uBAAuB,EAAE,6BAA6B,EAAE,CAAC"}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sparkleideas/performance - Flash Attention Benchmarks
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive benchmark suite for Flash Attention performance validation.
|
|
5
|
+
* Validates 2.49x-7.47x speedup targets and memory efficiency improvements.
|
|
6
|
+
*/
|
|
7
|
+
import { FlashAttentionOptimizer, createFlashAttentionOptimizer, } from './attention-integration.js';
|
|
8
|
+
import { FlashAttention, DotProductAttention, } from '@ruvector/attention';
|
|
9
|
+
// ============================================================================
|
|
10
|
+
// Benchmark Runner
|
|
11
|
+
// ============================================================================
|
|
12
|
+
export class AttentionBenchmarkRunner {
|
|
13
|
+
/**
|
|
14
|
+
* Run comprehensive benchmark suite across multiple dimensions
|
|
15
|
+
*/
|
|
16
|
+
runComprehensiveSuite() {
|
|
17
|
+
const benchmarks = [];
|
|
18
|
+
// Test configurations: [dimension, numKeys, iterations]
|
|
19
|
+
const configs = [
|
|
20
|
+
[128, 50, 1000], // Small: Mobile/edge devices
|
|
21
|
+
[256, 100, 1000], // Medium: Standard use cases
|
|
22
|
+
[512, 100, 1000], // Large: High-performance scenarios
|
|
23
|
+
[768, 150, 500], // XL: Transformer models
|
|
24
|
+
[1024, 200, 500], // XXL: Large language models
|
|
25
|
+
];
|
|
26
|
+
for (const [dim, numKeys, iterations] of configs) {
|
|
27
|
+
const benchmark = this.runComparison(dim, numKeys, iterations);
|
|
28
|
+
benchmarks.push(benchmark);
|
|
29
|
+
}
|
|
30
|
+
return this.createSuiteResult('Comprehensive Flash Attention Suite', benchmarks);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Run benchmark comparing Flash Attention vs baseline
|
|
34
|
+
*/
|
|
35
|
+
runComparison(dimension, numKeys = 100, iterations = 1000) {
|
|
36
|
+
// Create Flash and baseline attention mechanisms
|
|
37
|
+
const flash = new FlashAttention(dimension, 64);
|
|
38
|
+
const baseline = new DotProductAttention(dimension);
|
|
39
|
+
// Create test data
|
|
40
|
+
const query = new Float32Array(dimension);
|
|
41
|
+
const keys = Array.from({ length: numKeys }, () => new Float32Array(dimension));
|
|
42
|
+
const values = Array.from({ length: numKeys }, () => new Float32Array(dimension));
|
|
43
|
+
// Fill with random data
|
|
44
|
+
for (let i = 0; i < dimension; i++) {
|
|
45
|
+
query[i] = Math.random();
|
|
46
|
+
}
|
|
47
|
+
for (let i = 0; i < numKeys; i++) {
|
|
48
|
+
for (let j = 0; j < dimension; j++) {
|
|
49
|
+
keys[i][j] = Math.random();
|
|
50
|
+
values[i][j] = Math.random();
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// Benchmark Flash Attention
|
|
54
|
+
const flashStart = performance.now();
|
|
55
|
+
for (let i = 0; i < iterations; i++) {
|
|
56
|
+
flash.computeRaw(query, keys, values);
|
|
57
|
+
}
|
|
58
|
+
const flashEnd = performance.now();
|
|
59
|
+
const flashTimeMs = flashEnd - flashStart;
|
|
60
|
+
const flashAvgMs = flashTimeMs / iterations;
|
|
61
|
+
const flashOps = 1000 / flashAvgMs;
|
|
62
|
+
// Benchmark baseline
|
|
63
|
+
const baselineStart = performance.now();
|
|
64
|
+
for (let i = 0; i < iterations; i++) {
|
|
65
|
+
baseline.computeRaw(query, keys, values);
|
|
66
|
+
}
|
|
67
|
+
const baselineEnd = performance.now();
|
|
68
|
+
const baselineTimeMs = baselineEnd - baselineStart;
|
|
69
|
+
const baselineAvgMs = baselineTimeMs / iterations;
|
|
70
|
+
const baselineOps = 1000 / baselineAvgMs;
|
|
71
|
+
const speedup = baselineAvgMs / flashAvgMs;
|
|
72
|
+
const meetsTarget = speedup >= 2.49; // Minimum V3 target
|
|
73
|
+
return {
|
|
74
|
+
name: `Flash Attention ${dimension}D x ${numKeys} keys`,
|
|
75
|
+
dimension,
|
|
76
|
+
numKeys,
|
|
77
|
+
iterations,
|
|
78
|
+
results: {
|
|
79
|
+
flash: {
|
|
80
|
+
averageTimeMs: flashAvgMs,
|
|
81
|
+
opsPerSecond: flashOps,
|
|
82
|
+
memoryUsageBytes: undefined,
|
|
83
|
+
},
|
|
84
|
+
baseline: {
|
|
85
|
+
averageTimeMs: baselineAvgMs,
|
|
86
|
+
opsPerSecond: baselineOps,
|
|
87
|
+
memoryUsageBytes: undefined,
|
|
88
|
+
},
|
|
89
|
+
speedup,
|
|
90
|
+
memoryReduction: undefined,
|
|
91
|
+
},
|
|
92
|
+
meetsTarget,
|
|
93
|
+
timestamp: new Date(),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Run memory profiling benchmark
|
|
98
|
+
*/
|
|
99
|
+
runMemoryProfile(dimensions = [128, 256, 512, 768, 1024]) {
|
|
100
|
+
const profiles = [];
|
|
101
|
+
for (const dim of dimensions) {
|
|
102
|
+
const numKeys = Math.min(200, dim / 2);
|
|
103
|
+
const profile = this.profileMemory(dim, numKeys);
|
|
104
|
+
profiles.push(profile);
|
|
105
|
+
}
|
|
106
|
+
return profiles;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Run stress test with increasing load
|
|
110
|
+
*/
|
|
111
|
+
runStressTest() {
|
|
112
|
+
const results = [];
|
|
113
|
+
// Progressively increase load
|
|
114
|
+
const stressConfigs = [
|
|
115
|
+
[512, 100, 100],
|
|
116
|
+
[512, 500, 100],
|
|
117
|
+
[512, 1000, 100],
|
|
118
|
+
[512, 2000, 50],
|
|
119
|
+
[512, 5000, 50],
|
|
120
|
+
];
|
|
121
|
+
for (const [dim, numKeys, iterations] of stressConfigs) {
|
|
122
|
+
try {
|
|
123
|
+
const benchmark = this.runComparison(dim, numKeys, iterations);
|
|
124
|
+
results.push(benchmark);
|
|
125
|
+
}
|
|
126
|
+
catch (error) {
|
|
127
|
+
console.error(`Stress test failed at ${numKeys} keys:`, error);
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return results;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Validate V3 performance targets (2.49x-7.47x speedup)
|
|
135
|
+
*/
|
|
136
|
+
validateV3Targets() {
|
|
137
|
+
const optimizer = createFlashAttentionOptimizer(512);
|
|
138
|
+
const result = optimizer.benchmark();
|
|
139
|
+
return {
|
|
140
|
+
meetsMinimum: result.speedup >= 2.49,
|
|
141
|
+
meetsMaximum: result.speedup <= 7.47,
|
|
142
|
+
actualSpeedup: result.speedup,
|
|
143
|
+
target: { min: 2.49, max: 7.47 },
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Profile memory usage for a specific configuration
|
|
148
|
+
*/
|
|
149
|
+
profileMemory(dimension, numKeys) {
|
|
150
|
+
// Create test data
|
|
151
|
+
const query = new Float32Array(dimension).fill(1);
|
|
152
|
+
const keys = Array.from({ length: numKeys }, () => new Float32Array(dimension).fill(1));
|
|
153
|
+
const values = Array.from({ length: numKeys }, () => new Float32Array(dimension).fill(1));
|
|
154
|
+
// Measure Flash Attention memory
|
|
155
|
+
const flashMemoryBefore = this.getMemoryUsage();
|
|
156
|
+
const flashAttention = new FlashAttention(dimension, 64); // Add blockSize
|
|
157
|
+
flashAttention.compute(query, keys, values);
|
|
158
|
+
const flashMemoryAfter = this.getMemoryUsage();
|
|
159
|
+
const flashMemoryBytes = flashMemoryAfter - flashMemoryBefore;
|
|
160
|
+
// Measure baseline memory
|
|
161
|
+
const baselineMemoryBefore = this.getMemoryUsage();
|
|
162
|
+
const baselineAttention = new DotProductAttention(dimension);
|
|
163
|
+
baselineAttention.compute(query, keys, values);
|
|
164
|
+
const baselineMemoryAfter = this.getMemoryUsage();
|
|
165
|
+
const baselineMemoryBytes = baselineMemoryAfter - baselineMemoryBefore;
|
|
166
|
+
const reductionBytes = baselineMemoryBytes - flashMemoryBytes;
|
|
167
|
+
const reduction = (reductionBytes / baselineMemoryBytes) * 100;
|
|
168
|
+
return {
|
|
169
|
+
dimension,
|
|
170
|
+
numKeys,
|
|
171
|
+
flashMemoryBytes: Math.max(0, flashMemoryBytes),
|
|
172
|
+
baselineMemoryBytes: Math.max(0, baselineMemoryBytes),
|
|
173
|
+
reduction: Math.max(0, reduction),
|
|
174
|
+
reductionBytes: Math.max(0, reductionBytes),
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Calculate memory reduction percentage
|
|
179
|
+
*/
|
|
180
|
+
calculateMemoryReduction(baselineBytes, flashBytes) {
|
|
181
|
+
if (!baselineBytes || !flashBytes) {
|
|
182
|
+
return undefined;
|
|
183
|
+
}
|
|
184
|
+
const reduction = ((baselineBytes - flashBytes) / baselineBytes) * 100;
|
|
185
|
+
return Math.max(0, reduction);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Get current memory usage
|
|
189
|
+
*/
|
|
190
|
+
getMemoryUsage() {
|
|
191
|
+
if (typeof process !== 'undefined' && process.memoryUsage) {
|
|
192
|
+
return process.memoryUsage().heapUsed;
|
|
193
|
+
}
|
|
194
|
+
return 0;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Create suite result with summary statistics
|
|
198
|
+
*/
|
|
199
|
+
createSuiteResult(suiteName, benchmarks) {
|
|
200
|
+
const speedups = benchmarks.map(b => b.results.speedup);
|
|
201
|
+
const averageSpeedup = speedups.reduce((a, b) => a + b, 0) / speedups.length;
|
|
202
|
+
const minSpeedup = Math.min(...speedups);
|
|
203
|
+
const maxSpeedup = Math.max(...speedups);
|
|
204
|
+
const targetsMet = benchmarks.filter(b => b.meetsTarget).length;
|
|
205
|
+
const totalBenchmarks = benchmarks.length;
|
|
206
|
+
const successRate = (targetsMet / totalBenchmarks) * 100;
|
|
207
|
+
return {
|
|
208
|
+
suiteName,
|
|
209
|
+
benchmarks,
|
|
210
|
+
summary: {
|
|
211
|
+
averageSpeedup,
|
|
212
|
+
minSpeedup,
|
|
213
|
+
maxSpeedup,
|
|
214
|
+
targetsMet,
|
|
215
|
+
totalBenchmarks,
|
|
216
|
+
successRate,
|
|
217
|
+
},
|
|
218
|
+
timestamp: new Date(),
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// ============================================================================
|
|
223
|
+
// Utility Functions
|
|
224
|
+
// ============================================================================
|
|
225
|
+
/**
|
|
226
|
+
* Format benchmark results as human-readable table
|
|
227
|
+
*/
|
|
228
|
+
export function formatBenchmarkTable(benchmark) {
|
|
229
|
+
const { name, results, meetsTarget } = benchmark;
|
|
230
|
+
const status = meetsTarget ? '✓' : '✗';
|
|
231
|
+
return `
|
|
232
|
+
${status} ${name}
|
|
233
|
+
Flash Attention: ${results.flash.averageTimeMs.toFixed(3)}ms
|
|
234
|
+
Baseline: ${results.baseline.averageTimeMs.toFixed(3)}ms
|
|
235
|
+
Speedup: ${results.speedup.toFixed(2)}x
|
|
236
|
+
Memory Reduction: ${results.memoryReduction?.toFixed(1) ?? 'N/A'}%
|
|
237
|
+
Target Met: ${meetsTarget ? 'YES' : 'NO'} (target: ≥2.49x)
|
|
238
|
+
`.trim();
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Format suite results as summary report
|
|
242
|
+
*/
|
|
243
|
+
export function formatSuiteReport(suite) {
|
|
244
|
+
const { suiteName, summary, benchmarks } = suite;
|
|
245
|
+
const header = `
|
|
246
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
247
|
+
${suiteName}
|
|
248
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
249
|
+
|
|
250
|
+
Summary:
|
|
251
|
+
Average Speedup: ${summary.averageSpeedup.toFixed(2)}x
|
|
252
|
+
Min Speedup: ${summary.minSpeedup.toFixed(2)}x
|
|
253
|
+
Max Speedup: ${summary.maxSpeedup.toFixed(2)}x
|
|
254
|
+
Targets Met: ${summary.targetsMet}/${summary.totalBenchmarks} (${summary.successRate.toFixed(1)}%)
|
|
255
|
+
Target Range: 2.49x - 7.47x
|
|
256
|
+
|
|
257
|
+
Benchmarks:
|
|
258
|
+
`.trim();
|
|
259
|
+
const benchmarkTables = benchmarks
|
|
260
|
+
.map(b => formatBenchmarkTable(b))
|
|
261
|
+
.join('\n\n');
|
|
262
|
+
return `${header}\n\n${benchmarkTables}\n`;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Format memory profile as table
|
|
266
|
+
*/
|
|
267
|
+
export function formatMemoryProfile(profiles) {
|
|
268
|
+
const header = `
|
|
269
|
+
Memory Profile Results
|
|
270
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
271
|
+
Dim Keys Flash (KB) Baseline (KB) Reduction
|
|
272
|
+
`.trim();
|
|
273
|
+
const rows = profiles.map(p => {
|
|
274
|
+
const flashKB = (p.flashMemoryBytes / 1024).toFixed(1);
|
|
275
|
+
const baselineKB = (p.baselineMemoryBytes / 1024).toFixed(1);
|
|
276
|
+
const reduction = p.reduction.toFixed(1);
|
|
277
|
+
return `${p.dimension.toString().padEnd(6)} ${p.numKeys.toString().padEnd(7)} ${flashKB.padEnd(11)} ${baselineKB.padEnd(14)} ${reduction}%`;
|
|
278
|
+
});
|
|
279
|
+
return `${header}\n${rows.join('\n')}`;
|
|
280
|
+
}
|
|
281
|
+
// ============================================================================
|
|
282
|
+
// Convenience Functions
|
|
283
|
+
// ============================================================================
|
|
284
|
+
/**
|
|
285
|
+
* Quick performance validation
|
|
286
|
+
*/
|
|
287
|
+
export function quickValidation() {
|
|
288
|
+
const runner = new AttentionBenchmarkRunner();
|
|
289
|
+
const validation = runner.validateV3Targets();
|
|
290
|
+
console.log(`
|
|
291
|
+
V3 Performance Target Validation
|
|
292
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
293
|
+
Target Range: 2.49x - 7.47x
|
|
294
|
+
Actual Speedup: ${validation.actualSpeedup.toFixed(2)}x
|
|
295
|
+
Meets Minimum: ${validation.meetsMinimum ? 'YES ✓' : 'NO ✗'}
|
|
296
|
+
Within Range: ${validation.meetsMaximum ? 'YES ✓' : 'NO ✗'}
|
|
297
|
+
`.trim());
|
|
298
|
+
return validation.meetsMinimum && validation.meetsMaximum;
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Run and display comprehensive benchmark suite
|
|
302
|
+
*/
|
|
303
|
+
export function runAndDisplaySuite() {
|
|
304
|
+
const runner = new AttentionBenchmarkRunner();
|
|
305
|
+
const suite = runner.runComprehensiveSuite();
|
|
306
|
+
console.log(formatSuiteReport(suite));
|
|
307
|
+
return suite;
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Run and display memory profile
|
|
311
|
+
*/
|
|
312
|
+
export function runAndDisplayMemoryProfile() {
|
|
313
|
+
const runner = new AttentionBenchmarkRunner();
|
|
314
|
+
const profiles = runner.runMemoryProfile();
|
|
315
|
+
console.log(formatMemoryProfile(profiles));
|
|
316
|
+
return profiles;
|
|
317
|
+
}
|
|
318
|
+
// ============================================================================
|
|
319
|
+
// Exports
|
|
320
|
+
// ============================================================================
|
|
321
|
+
export { FlashAttentionOptimizer, createFlashAttentionOptimizer };
|
|
322
|
+
//# sourceMappingURL=attention-benchmarks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"attention-benchmarks.js","sourceRoot":"","sources":["../src/attention-benchmarks.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,uBAAuB,EACvB,6BAA6B,GAG9B,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,cAAc,EACd,mBAAmB,GAGpB,MAAM,qBAAqB,CAAC;AAoD7B,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E,MAAM,OAAO,wBAAwB;IACnC;;OAEG;IACH,qBAAqB;QACnB,MAAM,UAAU,GAA0B,EAAE,CAAC;QAE7C,wDAAwD;QACxD,MAAM,OAAO,GAA+B;YAC1C,CAAC,GAAG,EAAE,EAAE,EAAE,IAAI,CAAC,EAAK,6BAA6B;YACjD,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,EAAI,6BAA6B;YACjD,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,EAAI,oCAAoC;YACxD,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAK,yBAAyB;YAC7C,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,EAAI,6BAA6B;SAClD,CAAC;QAEF,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,EAAE,UAAU,CAAC,IAAI,OAAO,EAAE,CAAC;YACjD,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;YAC/D,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC7B,CAAC;QAED,OAAO,IAAI,CAAC,iBAAiB,CAAC,qCAAqC,EAAE,UAAU,CAAC,CAAC;IACnF,CAAC;IAED;;OAEG;IACH,aAAa,CACX,SAAiB,EACjB,UAAkB,GAAG,EACrB,aAAqB,IAAI;QAEzB,iDAAiD;QACjD,MAAM,KAAK,GAAG,IAAI,cAAc,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAChD,MAAM,QAAQ,GAAG,IAAI,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAEpD,mBAAmB;QACnB,MAAM,KAAK,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,CAAC,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;QAChF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,CAAC,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;QAElF,wBAAwB;QACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC3B,CAAC;QACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC3B,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC/B,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QACxC,CAAC;QACD,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACnC,MAAM,WAAW,GAAG,QAAQ,GAAG,UAAU,CAAC;QAC1C,MAAM,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;QAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,UAAU,CAAC;QAEnC,qBAAqB;QACrB,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAC3C,CAAC;QACD,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACtC,MAAM,cAAc,GAAG,WAAW,GAAG,aAAa,CAAC;QACnD,MAAM,aAAa,GAAG,cAAc,GAAG,UAAU,CAAC;QAClD,MAAM,WAAW,GAAG,IAAI,GAAG,aAAa,CAAC;QAEzC,MAAM,OAAO,GAAG,aAAa,GAAG,UAAU,CAAC;QAC3C,MAAM,WAAW,GAAG,OAAO,IAAI,IAAI,CAAC,CAAC,oBAAoB;QAEzD,OAAO;YACL,IAAI,EAAE,mBAAmB,SAAS,OAAO,OAAO,OAAO;YACvD,SAAS;YACT,OAAO;YACP,UAAU;YACV,OAAO,EAAE;gBACP,KAAK,EAAE;oBACL,aAAa,EAAE,UAAU;oBACzB,YAAY,EAAE,QAAQ;oBACtB,gBAAgB,EAAE,SAAS;iBAC5B;gBACD,QAAQ,EAAE;oBACR,aAAa,EAAE,aAAa;oBAC5B,YAAY,EAAE,WAAW;oBACzB,gBAAgB,EAAE,SAAS;iBAC5B;gBACD,OAAO;gBACP,eAAe,EAAE,SAAS;aAC3B;YACD,WAAW;YACX,SAAS,EAAE,IAAI,IAAI,EAAE;SACtB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,gBAAgB,CACd,aAAuB,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC;QAEjD,MAAM,QAAQ,GAAoB,EAAE,CAAC;QAErC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;YACvC,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACjD,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,aAAa;QACX,MAAM,OAAO,GAA0B,EAAE,CAAC;QAE1C,8BAA8B;QAC9B,MAAM,aAAa,GAA+B;YAChD,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;YACf,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;YACf,CAAC,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC;YAChB,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC;YACf,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC;SAChB,CAAC;QAEF,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,EAAE,UAAU,CAAC,IAAI,aAAa,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CAAC,yBAAyB,OAAO,QAAQ,EAAE,KAAK,CAAC,CAAC;gBAC/D,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,iBAAiB;QAMf,MAAM,SAAS,GAAG,6BAA6B,CAAC,GAAG,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,CAAC;QAErC,OAAO;YACL,YAAY,EAAE,MAAM,CAAC,OAAO,IAAI,IAAI;YACpC,YAAY,EAAE,MAAM,CAAC,OAAO,IAAI,IAAI;YACpC,aAAa,EAAE,MAAM,CAAC,OAAO;YAC7B,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,aAAa,CACnB,SAAiB,EACjB,OAAe;QAEf,mBAAmB;QACnB,MAAM,KAAK,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClD,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,CAChD,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CACpC,CAAC;QACF,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,CAClD,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CACpC,CAAC;QAEF,iCAAiC;QACjC,MAAM,iBAAiB,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAChD,MAAM,cAAc,GAAG,IAAI,cAAc,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB;QAC1E,cAAc,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAC5C,MAAM,gBAAgB,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC/C,MAAM,gBAAgB,GAAG,gBAAgB,GAAG,iBAAiB,CAAC;QAE9D,0BAA0B;QAC1B,MAAM,oBAAoB,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QACnD,MAAM,iBAAiB,GAAG,IAAI,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAC7D,iBAAiB,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAC/C,MAAM,mBAAmB,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAClD,MAAM,mBAAmB,GAAG,mBAAmB,GAAG,oBAAoB,CAAC;QAEvE,MAAM,cAAc,GAAG,mBAAmB,GAAG,gBAAgB,CAAC;QAC9D,MAAM,SAAS,GAAG,CAAC,cAAc,GAAG,mBAAmB,CAAC,GAAG,GAAG,CAAC;QAE/D,OAAO;YACL,SAAS;YACT,OAAO;YACP,gBAAgB,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,gBAAgB,CAAC;YAC/C,mBAAmB,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,CAAC;YACrD,SAAS,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC;YACjC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,cAAc,CAAC;SAC5C,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,wBAAwB,CAC9B,aAAsB,EACtB,UAAmB;QAEnB,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,EAAE,CAAC;YAClC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,SAAS,GAAG,CAAC,CAAC,aAAa,GAAG,UAAU,CAAC,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC;QACvE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACK,cAAc;QACpB,IAAI,OAAO,OAAO,KAAK,WAAW,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;YAC1D,OAAO,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC;QACxC,CAAC;QACD,OAAO,CAAC,CAAC;IACX,CAAC;IAED;;OAEG;IACK,iBAAiB,CACvB,SAAiB,EACjB,UAAiC;QAEjC,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACxD,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC7E,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC;QAChE,MAAM,eAAe,GAAG,UAAU,CAAC,MAAM,CAAC;QAC1C,MAAM,WAAW,GAAG,CAAC,UAAU,GAAG,eAAe,CAAC,GAAG,GAAG,CAAC;QAEzD,OAAO;YACL,SAAS;YACT,UAAU;YACV,OAAO,EAAE;gBACP,cAAc;gBACd,UAAU;gBACV,UAAU;gBACV,UAAU;gBACV,eAAe;gBACf,WAAW;aACZ;YACD,SAAS,EAAE,IAAI,IAAI,EAAE;SACtB,CAAC;IACJ,CAAC;CACF;AAED,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,SAA8B;IACjE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,SAAS,CAAC;IACjD,MAAM,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEvC,OAAO;EACP,MAAM,IAAI,IAAI;sBACM,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;sBACtC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;sBACzC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;sBAC1B,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK;sBAC5C,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;CAC/C,CAAC,IAAI,EAAE,CAAC;AACT,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,KAAkB;IAClD,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC;IAEjD,MAAM,MAAM,GAAG;;EAEf,SAAS;;;;sBAIW,OAAO,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC;sBACjC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;sBAC7B,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;sBAC7B,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,eAAe,KAAK,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;;;;CAIrG,CAAC,IAAI,EAAE,CAAC;IAEP,MAAM,eAAe,GAAG,UAAU;SAC/B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO,GAAG,MAAM,OAAO,eAAe,IAAI,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAAyB;IAC3D,MAAM,MAAM,GAAG;;;;CAIhB,CAAC,IAAI,EAAE,CAAC;IAEP,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;QAC5B,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACvD,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7D,MAAM,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAEzC,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,SAAS,GAAG,CAAC;IAC9I,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,MAAM,KAAK,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;AACzC,CAAC;AAED,+EAA+E;AAC/E,wBAAwB;AACxB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,UAAU,eAAe;IAC7B,MAAM,MAAM,GAAG,IAAI,wBAAwB,EAAE,CAAC;IAC9C,MAAM,UAAU,GAAG,MAAM,CAAC,iBAAiB,EAAE,CAAC;IAE9C,OAAO,CAAC,GAAG,CAAC;;;;mBAIK,UAAU,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;mBACnC,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;mBAC1C,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;GAC1D,CAAC,IAAI,EAAE,CAAC,CAAC;IAEV,OAAO,UAAU,CAAC,YAAY,IAAI,UAAU,CAAC,YAAY,CAAC;AAC5D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,MAAM,GAAG,IAAI,wBAAwB,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,EAAE,CAAC;IAE7C,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,CAAC;IAEtC,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B;IACxC,MAAM,MAAM,GAAG,IAAI,wBAAwB,EAAE,CAAC;IAC9C,MAAM,QAAQ,GAAG,MAAM,CAAC,gBAAgB,EAAE,CAAC;IAE3C,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC,CAAC;IAE3C,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,OAAO,EAAE,uBAAuB,EAAE,6BAA6B,EAAE,CAAC"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sparkleideas/performance - Flash Attention Integration
|
|
3
|
+
*
|
|
4
|
+
* Integrates @ruvector/attention Flash Attention capabilities into V3 performance module.
|
|
5
|
+
* Provides optimized attention mechanisms with 2.49x-7.47x speedup targets.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Flash Attention for memory-efficient processing
|
|
9
|
+
* - Automatic runtime selection (NAPI/WASM/JS)
|
|
10
|
+
* - Performance benchmarking and metrics
|
|
11
|
+
* - Speedup tracking and validation
|
|
12
|
+
*/
|
|
13
|
+
import { FlashAttention, DotProductAttention, type BenchmarkResult as AttentionBenchmarkResult } from '@ruvector/attention';
|
|
14
|
+
export interface AttentionInput {
|
|
15
|
+
query: Float32Array | number[];
|
|
16
|
+
keys: Float32Array[] | number[][];
|
|
17
|
+
values: Float32Array[] | number[][];
|
|
18
|
+
dim?: number;
|
|
19
|
+
blockSize?: number;
|
|
20
|
+
}
|
|
21
|
+
export interface AttentionOutput {
|
|
22
|
+
result: Float32Array;
|
|
23
|
+
runtime: 'napi' | 'wasm' | 'js';
|
|
24
|
+
executionTimeMs: number;
|
|
25
|
+
memoryUsageBytes?: number;
|
|
26
|
+
}
|
|
27
|
+
export interface BenchmarkResult {
|
|
28
|
+
flashAttention: {
|
|
29
|
+
averageTimeMs: number;
|
|
30
|
+
opsPerSecond: number;
|
|
31
|
+
memoryUsageBytes?: number;
|
|
32
|
+
};
|
|
33
|
+
baseline: {
|
|
34
|
+
averageTimeMs: number;
|
|
35
|
+
opsPerSecond: number;
|
|
36
|
+
memoryUsageBytes?: number;
|
|
37
|
+
};
|
|
38
|
+
speedup: number;
|
|
39
|
+
meetsTarget: boolean;
|
|
40
|
+
timestamp: Date;
|
|
41
|
+
}
|
|
42
|
+
export interface PerformanceMetrics {
|
|
43
|
+
totalOperations: number;
|
|
44
|
+
averageSpeedup: number;
|
|
45
|
+
peakSpeedup: number;
|
|
46
|
+
averageExecutionTimeMs: number;
|
|
47
|
+
totalMemorySavedBytes: number;
|
|
48
|
+
successRate: number;
|
|
49
|
+
baselineMemoryBytes: number;
|
|
50
|
+
optimizedMemoryBytes: number;
|
|
51
|
+
memorySavedBytes: number;
|
|
52
|
+
memorySavedPercent: number;
|
|
53
|
+
peakMemoryBytes: number;
|
|
54
|
+
}
|
|
55
|
+
export declare class FlashAttentionOptimizer {
|
|
56
|
+
private readonly dim;
|
|
57
|
+
private readonly blockSize;
|
|
58
|
+
private flashAttention;
|
|
59
|
+
private baselineAttention;
|
|
60
|
+
private metrics;
|
|
61
|
+
constructor(dim?: number, blockSize?: number);
|
|
62
|
+
/**
|
|
63
|
+
* Optimize attention computation using Flash Attention
|
|
64
|
+
* @param input - Query, keys, and values for attention computation
|
|
65
|
+
* @returns Optimized attention output with performance metrics
|
|
66
|
+
*/
|
|
67
|
+
optimize(input: AttentionInput): AttentionOutput;
|
|
68
|
+
/**
|
|
69
|
+
* Benchmark Flash Attention vs baseline attention
|
|
70
|
+
* @returns Comprehensive benchmark results with speedup metrics
|
|
71
|
+
*/
|
|
72
|
+
benchmark(): BenchmarkResult;
|
|
73
|
+
/**
|
|
74
|
+
* Get current speedup factor from accumulated metrics
|
|
75
|
+
* @returns Average speedup factor across all operations
|
|
76
|
+
*/
|
|
77
|
+
getSpeedup(): number;
|
|
78
|
+
/**
|
|
79
|
+
* Get comprehensive performance metrics
|
|
80
|
+
* @returns Detailed performance statistics
|
|
81
|
+
*/
|
|
82
|
+
getMetrics(): PerformanceMetrics;
|
|
83
|
+
/**
|
|
84
|
+
* Reset all metrics
|
|
85
|
+
*/
|
|
86
|
+
resetMetrics(): void;
|
|
87
|
+
/**
|
|
88
|
+
* Ensure input is Float32Array for optimal performance
|
|
89
|
+
*/
|
|
90
|
+
private ensureFloat32Array;
|
|
91
|
+
/**
|
|
92
|
+
* Detect which runtime is being used
|
|
93
|
+
*/
|
|
94
|
+
private detectRuntime;
|
|
95
|
+
/**
|
|
96
|
+
* Get current memory usage in bytes
|
|
97
|
+
*/
|
|
98
|
+
private getMemoryUsage;
|
|
99
|
+
/**
|
|
100
|
+
* Force garbage collection if available (requires --expose-gc flag)
|
|
101
|
+
* This helps get more accurate memory measurements
|
|
102
|
+
*/
|
|
103
|
+
private forceGC;
|
|
104
|
+
/**
|
|
105
|
+
* Benchmark memory usage across multiple dimensions
|
|
106
|
+
* Validates the 50-75% memory reduction target
|
|
107
|
+
* @param dimensions - Array of dimensions to test (default: [128, 256, 512, 1024])
|
|
108
|
+
* @returns Memory profiling results for each dimension
|
|
109
|
+
*/
|
|
110
|
+
benchmarkMemory(dimensions?: number[]): {
|
|
111
|
+
dimension: number;
|
|
112
|
+
baselineMemoryBytes: number;
|
|
113
|
+
optimizedMemoryBytes: number;
|
|
114
|
+
memorySavedBytes: number;
|
|
115
|
+
memorySavedPercent: number;
|
|
116
|
+
meetsTarget: boolean;
|
|
117
|
+
}[];
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Create a Flash Attention optimizer with default settings
|
|
121
|
+
* @param dim - Dimension of attention vectors (default: 512)
|
|
122
|
+
* @param blockSize - Block size for Flash Attention (default: 64)
|
|
123
|
+
* @returns Configured FlashAttentionOptimizer instance
|
|
124
|
+
*/
|
|
125
|
+
export declare function createFlashAttentionOptimizer(dim?: number, blockSize?: number): FlashAttentionOptimizer;
|
|
126
|
+
/**
|
|
127
|
+
* Quick benchmark of Flash Attention performance
|
|
128
|
+
* @param dim - Dimension to test (default: 512)
|
|
129
|
+
* @returns Benchmark results with speedup metrics
|
|
130
|
+
*/
|
|
131
|
+
export declare function quickBenchmark(dim?: number): BenchmarkResult;
|
|
132
|
+
export { FlashAttention, DotProductAttention, type AttentionBenchmarkResult, };
|
|
133
|
+
//# sourceMappingURL=attention-integration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"attention-integration.d.ts","sourceRoot":"","sources":["../src/attention-integration.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EACL,cAAc,EACd,mBAAmB,EACnB,KAAK,eAAe,IAAI,wBAAwB,EAEjD,MAAM,qBAAqB,CAAC;AAM7B,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,YAAY,GAAG,MAAM,EAAE,CAAC;IAC/B,IAAI,EAAE,YAAY,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;IAClC,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;IACpC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC;IAChC,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE;QACd,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,QAAQ,EAAE;QACR,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,kBAAkB;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC;IAEpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,EAAE,MAAM,CAAC;CACzB;AAMD,qBAAa,uBAAuB;IAgBhC,OAAO,CAAC,QAAQ,CAAC,GAAG;IACpB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAhB5B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,iBAAiB,CAAsB;IAC/C,OAAO,CAAC,OAAO,CAUb;gBAGiB,GAAG,GAAE,MAAY,EACjB,SAAS,GAAE,MAAW;IAgBzC;;;;OAIG;IACH,QAAQ,CAAC,KAAK,EAAE,cAAc,GAAG,eAAe;IA4BhD;;;OAGG;IACH,SAAS,IAAI,eAAe;IA2G5B;;;OAGG;IACH,UAAU,IAAI,MAAM;IAOpB;;;OAGG;IACH,UAAU,IAAI,kBAAkB;IAgChC;;OAEG;IACH,YAAY,IAAI,IAAI;IAapB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAO1B;;OAEG;IACH,OAAO,CAAC,aAAa;IAmBrB;;OAEG;IACH,OAAO,CAAC,cAAc;IAOtB;;;OAGG;IACH,OAAO,CAAC,OAAO;IAMf;;;;;OAKG;IACH,eAAe,CACb,UAAU,GAAE,MAAM,EAA0B,GAC3C;QACD,SAAS,EAAE,MAAM,CAAC;QAClB,mBAAmB,EAAE,MAAM,CAAC;QAC5B,oBAAoB,EAAE,MAAM,CAAC;QAC7B,gBAAgB,EAAE,MAAM,CAAC;QACzB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,WAAW,EAAE,OAAO,CAAC;KACtB,EAAE;CAwFJ;AAMD;;;;;GAKG;AACH,wBAAgB,6BAA6B,CAC3C,GAAG,GAAE,MAAY,EACjB,SAAS,GAAE,MAAW,GACrB,uBAAuB,CAEzB;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,GAAG,GAAE,MAAY,GAAG,eAAe,CAGjE;AAMD,OAAO,EACL,cAAc,EACd,mBAAmB,EACnB,KAAK,wBAAwB,GAC9B,CAAC"}
|