@digitaldefiance/node-accelerate 1.0.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # node-accelerate
2
2
 
3
- High-performance Apple Accelerate framework bindings for Node.js. Get **283x faster** matrix operations and **5-8x faster** vector operations on Apple Silicon (M1/M2/M3/M4).
3
+ High-performance Apple Accelerate framework bindings for Node.js. Get **up to 296x faster** matrix operations and **5-10x faster** vector operations on Apple Silicon (M1/M2/M3/M4).
4
+
5
+ **80+ hardware-accelerated functions** including BLAS, vDSP, and vForce operations.
4
6
 
5
7
  [![npm version](https://badge.fury.io/js/node-accelerate.svg)](https://www.npmjs.com/package/@digitaldefiance/node-accelerate)
6
8
  [![GitHub](https://img.shields.io/github/license/Digital-Defiance/node-accelerate)](https://github.com/Digital-Defiance/node-accelerate/blob/main/LICENSE)
@@ -27,10 +29,69 @@ Real benchmarks on Apple M4 Max:
27
29
 
28
30
  | Operation | Pure JavaScript | node-accelerate | Speedup |
29
31
  |-----------|----------------|-----------------|---------|
30
- | Matrix Multiply (500×500) | 93 ms | 0.33 ms | **283x** |
31
- | Vector Dot Product (1M) | 0.66 ms | 0.13 ms | **5x** |
32
- | Vector Sum (1M) | 0.59 ms | 0.08 ms | **7.6x** |
33
- | Vector Add (1M) | 0.74 ms | 0.20 ms | **3.7x** |
32
+ | Matrix Multiply (500×500) | 86 ms | 0.30 ms | **290x** |
33
+ | Vector Dot Product (1M) | 0.63 ms | 0.13 ms | **5x** |
34
+ | Vector Sum (1M) | 0.59 ms | 0.07 ms | **8x** |
35
+ | Vector Add (1M) | 0.58 ms | 0.19 ms | **3x** |
36
+ | Trigonometric (10k) | 0.07 ms | 0.008 ms | **8x** |
37
+
38
+ ## Features
39
+
40
+ ### Matrix Operations (BLAS)
41
+ - **Matrix multiplication** (double & single precision)
42
+ - **Matrix-vector multiplication**
43
+ - **Matrix transpose**
44
+ - **AXPY, copy, swap** operations
45
+ - **Vector norms** and rotations
46
+
47
+ ### Vector Arithmetic (vDSP)
48
+ - **Basic operations**: add, subtract, multiply, divide, scale, negate
49
+ - **Dot product, norm, absolute sum**
50
+ - **Element-wise operations**: abs, square, sqrt, power, reciprocal
51
+ - **Multiply-add operations**: vma, vmsa
52
+ - **Normalization** to unit length
53
+ - **Vector utilities**: fill, ramp, clear, reverse, copy, swap
54
+
55
+ ### Trigonometric Functions (vForce)
56
+ - **Standard trig**: sin, cos, tan (5-10x faster than Math functions)
57
+ - **Inverse trig**: asin, acos, atan, atan2
58
+ - **Hyperbolic**: sinh, cosh, tanh
59
+ - Process 1000s of values in microseconds
60
+
61
+ ### Exponential & Logarithmic Functions (vForce)
62
+ - **exp** - Natural exponential
63
+ - **log, log10** - Natural and base-10 logarithms
64
+ - **pow** - Element-wise power
65
+ - **Reciprocal and inverse square root**
66
+
67
+ ### Statistical Functions (vDSP)
68
+ - **Basic stats**: sum, mean, min, max, minmax
69
+ - **Variance & standard deviation**
70
+ - **RMS** (Root Mean Square)
71
+ - **Sum of squares, mean magnitude, mean square**
72
+ - **Max/min magnitude**
73
+
74
+ ### Signal Processing (vDSP)
75
+ - **FFT/IFFT** - Fast Fourier Transform (forward & inverse)
76
+ - **Convolution** - 1D convolution for filtering
77
+ - **Cross-correlation** - Signal similarity analysis
78
+ - **Window functions**: Hamming, Hanning, Blackman
79
+ - 10-50x faster than pure JavaScript FFT
80
+
81
+ ### Data Processing (vDSP & vForce)
82
+ - **Clipping & limiting** - Constrain values to range
83
+ - **Thresholding** - Apply threshold to data
84
+ - **Interpolation** - Linear interpolation and lerp
85
+ - **Rounding**: ceil, floor, trunc
86
+ - **Sign manipulation**: copysign
87
+
88
+ ### All Operations Support
89
+ - ✅ Hardware acceleration via AMX & NEON
90
+ - ✅ Double precision (Float64Array)
91
+ - ✅ Single precision (Float32Array) for matrix ops
92
+ - ✅ Zero-copy operations where possible
93
+ - ✅ Optimized for Apple Silicon
94
+ - ✅ **80+ functions** total
34
95
 
35
96
  ## Installation
36
97
 
@@ -108,13 +169,79 @@ const dotProduct = accelerate.dot(vec1, vec2);
108
169
  const sum = accelerate.sum(vec1);
109
170
  const mean = accelerate.mean(vec1);
110
171
 
111
- // FFT
172
+ // Statistical operations
173
+ const { min, max } = accelerate.minmax(vec1);
174
+ const variance = accelerate.variance(vec1);
175
+ const stddev = accelerate.stddev(vec1);
176
+
177
+ // Trigonometric functions (vectorized)
178
+ const angles = new Float64Array(1000);
179
+ const sines = new Float64Array(1000);
180
+ const cosines = new Float64Array(1000);
181
+
182
+ for (let i = 0; i < 1000; i++) {
183
+ angles[i] = (i / 1000) * 2 * Math.PI;
184
+ }
185
+
186
+ accelerate.vsin(angles, sines);
187
+ accelerate.vcos(angles, cosines);
188
+
189
+ // Signal processing
112
190
  const signal = new Float64Array(65536);
113
191
  for (let i = 0; i < signal.length; i++) {
114
192
  signal[i] = Math.sin(2 * Math.PI * i / signal.length);
115
193
  }
116
- const spectrum = accelerate.fft(signal);
194
+
195
+ // Apply window and compute FFT
196
+ const window = accelerate.hanning(signal.length);
197
+ const windowed = new Float64Array(signal.length);
198
+ accelerate.vmul(signal, window, windowed);
199
+
200
+ const spectrum = accelerate.fft(windowed);
117
201
  console.log(spectrum.real, spectrum.imag);
202
+
203
+ // Inverse FFT
204
+ const reconstructed = accelerate.ifft(spectrum.real, spectrum.imag);
205
+
206
+ // Convolution for filtering
207
+ const kernel = new Float64Array([0.25, 0.5, 0.25]); // Moving average
208
+ const filtered = new Float64Array(signal.length - kernel.length + 1);
209
+ accelerate.conv(signal, kernel, filtered);
210
+
211
+ // Data processing
212
+ const data = new Float64Array(1000);
213
+ for (let i = 0; i < data.length; i++) {
214
+ data[i] = Math.random() * 200 - 100;
215
+ }
216
+
217
+ // Clip outliers
218
+ const clipped = new Float64Array(1000);
219
+ accelerate.vclip(data, clipped, -50, 50);
220
+
221
+ // Matrix transpose
222
+ const matrix = new Float64Array([1, 2, 3, 4, 5, 6]); // 2×3
223
+ const transposed = new Float64Array(6); // 3×2
224
+ accelerate.transpose(matrix, transposed, 2, 3);
225
+ ```
226
+
227
+ ## More Examples
228
+
229
+ Check out the `examples/` directory for complete working examples:
230
+
231
+ - **`machine-learning.js`** - Neural network operations, softmax, ReLU
232
+ - **`signal-processing.js`** - FFT, filtering, spectral analysis
233
+ - **`statistical-operations.js`** - Mean, variance, std dev, z-scores
234
+ - **`trigonometric-functions.js`** - Vectorized trig operations
235
+ - **`signal-processing-advanced.js`** - Convolution, correlation, windowing
236
+ - **`mathematical-functions.js`** - Exp, log, power functions
237
+ - **`data-processing.js`** - Clipping, thresholding, interpolation
238
+ - **`matrix-multiply.js`** - Matrix operations and benchmarks
239
+ - **`vector-operations.js`** - Vector arithmetic examples
240
+
241
+ Run any example:
242
+ ```bash
243
+ node examples/statistical-operations.js
244
+ node examples/signal-processing-advanced.js
118
245
  ```
119
246
 
120
247
  ## API Reference
@@ -144,6 +271,12 @@ const C = new Float64Array(M * N);
144
271
  accelerate.matmul(A, B, C, M, K, N);
145
272
  ```
146
273
 
274
+ #### `matmulFloat(A, B, C, M, K, N)`
275
+
276
+ Single-precision matrix multiplication (uses Float32Array)
277
+
278
+ Same parameters as `matmul` but with Float32Array instead of Float64Array.
279
+
147
280
  #### `matvec(A, x, y, M, N)`
148
281
 
149
282
  Matrix-vector multiplication: y = A × x
@@ -155,14 +288,21 @@ Matrix-vector multiplication: y = A × x
155
288
  - `N`: number - Columns in A
156
289
  - Returns: Float64Array (y)
157
290
 
291
+ #### `transpose(A, B, rows, cols)`
292
+
293
+ Matrix transpose: B = A^T
294
+
295
+ - `A`: Float64Array - Input matrix (rows × cols) in row-major order
296
+ - `B`: Float64Array - Output matrix (cols × rows) in row-major order
297
+ - `rows`: number - Number of rows in A
298
+ - `cols`: number - Number of columns in A
299
+ - Returns: Float64Array (B)
300
+
158
301
  **Example:**
159
302
  ```javascript
160
- const M = 100, N = 50;
161
- const A = new Float64Array(M * N);
162
- const x = new Float64Array(N);
163
- const y = new Float64Array(M);
164
-
165
- accelerate.matvec(A, x, y, M, N);
303
+ const A = new Float64Array([1, 2, 3, 4, 5, 6]); // 2×3 matrix
304
+ const B = new Float64Array(6); // 3×2 matrix
305
+ accelerate.transpose(A, B, 2, 3);
166
306
  ```
167
307
 
168
308
  #### `axpy(alpha, x, y)`
@@ -174,56 +314,75 @@ AXPY operation: y = alpha*x + y
174
314
  - `y`: Float64Array - Input/output vector
175
315
  - Returns: Float64Array (y)
176
316
 
177
- **Example:**
178
- ```javascript
179
- const x = new Float64Array([1, 2, 3]);
180
- const y = new Float64Array([4, 5, 6]);
181
- accelerate.axpy(2.0, x, y); // y = [6, 9, 12]
182
- ```
317
+ #### `copy(x, y)`
183
318
 
184
- ### Vector Operations (vDSP)
319
+ Copy vector: y = x
185
320
 
186
- #### `dot(a, b)`
321
+ - `x`: Float64Array - Input vector
322
+ - `y`: Float64Array - Output vector
323
+ - Returns: Float64Array (y)
187
324
 
188
- Dot product: sum(a[i] * b[i])
325
+ #### `swap(x, y)`
189
326
 
190
- - `a`: Float64Array - First vector
191
- - `b`: Float64Array - Second vector (same length as a)
192
- - Returns: number
327
+ Swap two vectors: x <-> y
193
328
 
194
- **Example:**
195
- ```javascript
196
- const a = new Float64Array([1, 2, 3, 4]);
197
- const b = new Float64Array([5, 6, 7, 8]);
198
- const result = accelerate.dot(a, b); // 70
199
- ```
329
+ - `x`: Float64Array - First vector
330
+ - `y`: Float64Array - Second vector
331
+ - Returns: Float64Array (x)
200
332
 
201
- #### `sum(vec)`
333
+ #### `norm(x)`
202
334
 
203
- Sum of all elements
335
+ L2 norm (Euclidean length): ||x||
204
336
 
205
- - `vec`: Float64Array - Input vector
337
+ - `x`: Float64Array - Input vector
206
338
  - Returns: number
207
339
 
208
340
  **Example:**
209
341
  ```javascript
210
- const vec = new Float64Array([1, 2, 3, 4, 5]);
211
- const result = accelerate.sum(vec); // 15
342
+ const vec = new Float64Array([3, 4]);
343
+ const length = accelerate.norm(vec); // 5
212
344
  ```
213
345
 
214
- #### `mean(vec)`
346
+ #### `abssum(x)`
215
347
 
216
- Mean (average) of all elements
348
+ Sum of absolute values: sum(|x[i]|)
217
349
 
218
- - `vec`: Float64Array - Input vector
350
+ - `x`: Float64Array - Input vector
219
351
  - Returns: number
220
352
 
353
+ #### `maxAbsIndex(x)`
354
+
355
+ Index of maximum absolute value
356
+
357
+ - `x`: Float64Array - Input vector
358
+ - Returns: number (index)
359
+
221
360
  **Example:**
222
361
  ```javascript
223
- const vec = new Float64Array([1, 2, 3, 4, 5]);
224
- const result = accelerate.mean(vec); // 3
362
+ const vec = new Float64Array([1, -5, 3, -2]);
363
+ const idx = accelerate.maxAbsIndex(vec); // 1 (value is -5)
225
364
  ```
226
365
 
366
+ #### `rot(x, y, c, s)`
367
+
368
+ Givens rotation: apply rotation to vectors x and y
369
+
370
+ - `x`: Float64Array - First vector
371
+ - `y`: Float64Array - Second vector
372
+ - `c`: number - Cosine of rotation angle
373
+ - `s`: number - Sine of rotation angle
374
+ - Returns: Float64Array (x)
375
+
376
+ ### Vector Arithmetic
377
+
378
+ #### `dot(a, b)`
379
+
380
+ Dot product: sum(a[i] * b[i])
381
+
382
+ - `a`: Float64Array - First vector
383
+ - `b`: Float64Array - Second vector (same length as a)
384
+ - Returns: number
385
+
227
386
  #### `vadd(a, b, out)`
228
387
 
229
388
  Element-wise addition: out[i] = a[i] + b[i]
@@ -233,14 +392,6 @@ Element-wise addition: out[i] = a[i] + b[i]
233
392
  - `out`: Float64Array - Output vector
234
393
  - Returns: Float64Array (out)
235
394
 
236
- **Example:**
237
- ```javascript
238
- const a = new Float64Array([1, 2, 3]);
239
- const b = new Float64Array([4, 5, 6]);
240
- const out = new Float64Array(3);
241
- accelerate.vadd(a, b, out); // out = [5, 7, 9]
242
- ```
243
-
244
395
  #### `vsub(a, b, out)`
245
396
 
246
397
  Element-wise subtraction: out[i] = a[i] - b[i]
@@ -268,21 +419,71 @@ Element-wise division: out[i] = a[i] / b[i]
268
419
  - `out`: Float64Array - Output vector
269
420
  - Returns: Float64Array (out)
270
421
 
271
- #### `vabs(a, b)`
422
+ #### `vscale(a, scalar, b)`
272
423
 
273
- Element-wise absolute value: b[i] = |a[i]|
424
+ Vector scaling: b = a * scalar
425
+
426
+ - `a`: Float64Array - Input vector
427
+ - `scalar`: number - Scalar multiplier
428
+ - `b`: Float64Array - Output vector
429
+ - Returns: Float64Array (b)
430
+
431
+ #### `vneg(a, b)`
432
+
433
+ Vector negation: b = -a
274
434
 
275
435
  - `a`: Float64Array - Input vector
276
436
  - `b`: Float64Array - Output vector
277
437
  - Returns: Float64Array (b)
278
438
 
439
+ #### `vaddScalar(a, scalar, c)`
440
+
441
+ Add scalar to vector: c[i] = a[i] + scalar
442
+
443
+ - `a`: Float64Array - Input vector
444
+ - `scalar`: number - Scalar value to add
445
+ - `c`: Float64Array - Output vector
446
+ - Returns: Float64Array (c)
447
+
448
+ #### `vma(a, b, c, d)`
449
+
450
+ Multiply-add: d[i] = (a[i] * b[i]) + c[i]
451
+
452
+ - `a`: Float64Array - First vector
453
+ - `b`: Float64Array - Second vector
454
+ - `c`: Float64Array - Third vector
455
+ - `d`: Float64Array - Output vector
456
+ - Returns: Float64Array (d)
457
+
279
458
  **Example:**
280
459
  ```javascript
281
- const a = new Float64Array([-1, -2, 3, -4]);
282
- const b = new Float64Array(4);
283
- accelerate.vabs(a, b); // b = [1, 2, 3, 4]
460
+ const a = new Float64Array([2, 3, 4]);
461
+ const b = new Float64Array([5, 6, 7]);
462
+ const c = new Float64Array([1, 1, 1]);
463
+ const d = new Float64Array(3);
464
+ accelerate.vma(a, b, c, d); // d = [11, 19, 29]
284
465
  ```
285
466
 
467
+ #### `vmsa(a, b, c, d)`
468
+
469
+ Multiply-scalar-add: d[i] = (a[i] * b) + c[i]
470
+
471
+ - `a`: Float64Array - Input vector
472
+ - `b`: number - Scalar multiplier
473
+ - `c`: Float64Array - Vector to add
474
+ - `d`: Float64Array - Output vector
475
+ - Returns: Float64Array (d)
476
+
477
+ ### Vector Functions
478
+
479
+ #### `vabs(a, b)`
480
+
481
+ Element-wise absolute value: b[i] = |a[i]|
482
+
483
+ - `a`: Float64Array - Input vector
484
+ - `b`: Float64Array - Output vector
485
+ - Returns: Float64Array (b)
486
+
286
487
  #### `vsquare(a, b)`
287
488
 
288
489
  Element-wise square: b[i] = a[i]^2
@@ -291,13 +492,6 @@ Element-wise square: b[i] = a[i]^2
291
492
  - `b`: Float64Array - Output vector
292
493
  - Returns: Float64Array (b)
293
494
 
294
- **Example:**
295
- ```javascript
296
- const a = new Float64Array([2, 3, 4]);
297
- const b = new Float64Array(3);
298
- accelerate.vsquare(a, b); // b = [4, 9, 16]
299
- ```
300
-
301
495
  #### `vsqrt(a, b)`
302
496
 
303
497
  Element-wise square root: b[i] = sqrt(a[i])
@@ -306,13 +500,6 @@ Element-wise square root: b[i] = sqrt(a[i])
306
500
  - `b`: Float64Array - Output vector
307
501
  - Returns: Float64Array (b)
308
502
 
309
- **Example:**
310
- ```javascript
311
- const a = new Float64Array([4, 9, 16]);
312
- const b = new Float64Array(3);
313
- accelerate.vsqrt(a, b); // b = [2, 3, 4]
314
- ```
315
-
316
503
  #### `normalize(a, b)`
317
504
 
318
505
  Normalize vector to unit length: b = a / ||a||
@@ -321,152 +508,1071 @@ Normalize vector to unit length: b = a / ||a||
321
508
  - `b`: Float64Array - Output vector (unit vector)
322
509
  - Returns: Float64Array (b)
323
510
 
511
+ #### `vreverse(a, b)`
512
+
513
+ Reverse vector order: b = reverse(a)
514
+
515
+ - `a`: Float64Array - Input vector
516
+ - `b`: Float64Array - Output vector
517
+ - Returns: Float64Array (b)
518
+
519
+ #### `vfill(scalar, vec)`
520
+
521
+ Fill vector with scalar value
522
+
523
+ - `scalar`: number - Value to fill with
524
+ - `vec`: Float64Array - Output vector
525
+ - Returns: Float64Array (vec)
526
+
324
527
  **Example:**
325
528
  ```javascript
326
- const a = new Float64Array([3, 4, 0]);
327
- const b = new Float64Array(3);
328
- accelerate.normalize(a, b); // b = [0.6, 0.8, 0]
529
+ const vec = new Float64Array(100);
530
+ accelerate.vfill(3.14, vec); // All elements = 3.14
329
531
  ```
330
532
 
331
- ### Reductions
332
-
333
- #### `rms(vec)`
533
+ #### `vramp(start, step, vec)`
334
534
 
335
- Root Mean Square: sqrt(sum(vec[i]^2) / n)
535
+ Generate linear ramp: vec[i] = start + i * step
336
536
 
337
- - `vec`: Float64Array - Input vector
338
- - Returns: number
537
+ - `start`: number - Starting value
538
+ - `step`: number - Step size
539
+ - `vec`: Float64Array - Output vector
540
+ - Returns: Float64Array (vec)
339
541
 
340
542
  **Example:**
341
543
  ```javascript
342
- const vec = new Float64Array([1, 2, 3, 4, 5]);
343
- const result = accelerate.rms(vec); // 3.317
544
+ const vec = new Float64Array(5);
545
+ accelerate.vramp(0, 2, vec); // vec = [0, 2, 4, 6, 8]
344
546
  ```
345
547
 
346
- ### Distance Metrics
347
-
348
- #### `euclidean(a, b)`
548
+ #### `vlerp(a, b, t, c)`
349
549
 
350
- Euclidean distance: sqrt(sum((a[i] - b[i])^2))
550
+ Linear interpolation: c[i] = a[i] + t * (b[i] - a[i])
351
551
 
352
- - `a`: Float64Array - First vector
353
- - `b`: Float64Array - Second vector
354
- - Returns: number
552
+ - `a`: Float64Array - Start vector
553
+ - `b`: Float64Array - End vector
554
+ - `t`: number - Interpolation parameter (0 to 1)
555
+ - `c`: Float64Array - Output vector
556
+ - Returns: Float64Array (c)
355
557
 
356
558
  **Example:**
357
559
  ```javascript
358
- const a = new Float64Array([0, 0, 0]);
359
- const b = new Float64Array([3, 4, 0]);
360
- const distance = accelerate.euclidean(a, b); // 5
560
+ const start = new Float64Array([0, 0, 0]);
561
+ const end = new Float64Array([10, 20, 30]);
562
+ const result = new Float64Array(3);
563
+ accelerate.vlerp(start, end, 0.5, result); // result = [5, 10, 15]
361
564
  ```
362
565
 
363
- ### Signal Processing
566
+ #### `vclear(vec)`
364
567
 
365
- #### `fft(signal)`
568
+ Clear vector (set all elements to zero)
366
569
 
367
- Fast Fourier Transform
570
+ - `vec`: Float64Array - Vector to clear
571
+ - Returns: Float64Array (vec)
368
572
 
369
- - `signal`: Float64Array - Input signal (length must be power of 2)
370
- - Returns: Object with `real` and `imag` Float64Arrays
573
+ #### `vlimit(a, low, high, c)`
574
+
575
+ Limit/saturate values to range [low, high]
576
+
577
+ - `a`: Float64Array - Input vector
578
+ - `low`: number - Lower bound
579
+ - `high`: number - Upper bound
580
+ - `c`: Float64Array - Output vector
581
+ - Returns: Float64Array (c)
582
+
583
+ ### Trigonometric Functions
584
+
585
+ #### `vsin(a, b)`
586
+
587
+ Element-wise sine: b[i] = sin(a[i])
588
+
589
+ - `a`: Float64Array - Input vector (radians)
590
+ - `b`: Float64Array - Output vector
591
+ - Returns: Float64Array (b)
592
+
593
+ #### `vcos(a, b)`
594
+
595
+ Element-wise cosine: b[i] = cos(a[i])
596
+
597
+ - `a`: Float64Array - Input vector (radians)
598
+ - `b`: Float64Array - Output vector
599
+ - Returns: Float64Array (b)
600
+
601
+ #### `vtan(a, b)`
602
+
603
+ Element-wise tangent: b[i] = tan(a[i])
604
+
605
+ - `a`: Float64Array - Input vector (radians)
606
+ - `b`: Float64Array - Output vector
607
+ - Returns: Float64Array (b)
371
608
 
372
609
  **Example:**
373
610
  ```javascript
374
- const signal = new Float64Array(1024);
375
- for (let i = 0; i < signal.length; i++) {
376
- signal[i] = Math.sin(2 * Math.PI * i / signal.length);
611
+ const angles = new Float64Array(1000);
612
+ const sines = new Float64Array(1000);
613
+ for (let i = 0; i < 1000; i++) {
614
+ angles[i] = (i / 1000) * 2 * Math.PI;
377
615
  }
378
- const spectrum = accelerate.fft(signal);
379
- console.log(spectrum.real.length); // 512
380
- console.log(spectrum.imag.length); // 512
616
+ accelerate.vsin(angles, sines);
381
617
  ```
382
618
 
383
- ## TypeScript Support
619
+ #### `vasin(a, b)`
384
620
 
385
- Full TypeScript definitions included:
621
+ Element-wise inverse sine: b[i] = asin(a[i])
386
622
 
387
- ```typescript
388
- import * as accelerate from 'node-accelerate';
623
+ - `a`: Float64Array - Input vector (values in [-1, 1])
624
+ - `b`: Float64Array - Output vector (radians)
625
+ - Returns: Float64Array (b)
389
626
 
390
- const A = new Float64Array(100 * 100);
391
- const B = new Float64Array(100 * 100);
392
- const C = new Float64Array(100 * 100);
627
+ #### `vacos(a, b)`
393
628
 
394
- accelerate.matmul(A, B, C, 100, 100, 100);
395
- ```
629
+ Element-wise inverse cosine: b[i] = acos(a[i])
396
630
 
397
- ## Use Cases
631
+ - `a`: Float64Array - Input vector (values in [-1, 1])
632
+ - `b`: Float64Array - Output vector (radians)
633
+ - Returns: Float64Array (b)
398
634
 
399
- ### Machine Learning Inference
635
+ #### `vatan(a, b)`
400
636
 
401
- ```javascript
402
- // Matrix multiplication for neural network layers
403
- function denseLayer(input, weights, bias) {
404
- const output = new Float64Array(weights.length / input.length);
405
- accelerate.matmul(
406
- input, weights, output,
407
- 1, input.length, output.length
408
- );
409
- // Add bias...
410
- return output;
411
- }
412
- ```
637
+ Element-wise inverse tangent: b[i] = atan(a[i])
413
638
 
414
- ### Signal Processing
639
+ - `a`: Float64Array - Input vector
640
+ - `b`: Float64Array - Output vector (radians)
641
+ - Returns: Float64Array (b)
642
+
643
+ #### `vatan2(y, x, out)`
644
+
645
+ Two-argument arctangent: out[i] = atan2(y[i], x[i])
646
+
647
+ - `y`: Float64Array - Y coordinates
648
+ - `x`: Float64Array - X coordinates
649
+ - `out`: Float64Array - Output vector (radians)
650
+ - Returns: Float64Array (out)
415
651
 
652
+ **Example:**
416
653
  ```javascript
417
- // Analyze audio spectrum
418
- function analyzeAudio(audioBuffer) {
419
- const spectrum = accelerate.fft(audioBuffer);
420
- const magnitudes = new Float64Array(spectrum.real.length);
421
-
422
- for (let i = 0; i < magnitudes.length; i++) {
423
- magnitudes[i] = Math.sqrt(
424
- spectrum.real[i] ** 2 + spectrum.imag[i] ** 2
425
- );
426
- }
427
-
428
- return magnitudes;
429
- }
654
+ const y = new Float64Array([1, 1, -1, -1]);
655
+ const x = new Float64Array([1, -1, -1, 1]);
656
+ const angles = new Float64Array(4);
657
+ accelerate.vatan2(y, x, angles); // Angles in all four quadrants
430
658
  ```
431
659
 
432
- ### Scientific Computing
660
+ ### Hyperbolic Functions
661
+
662
+ #### `vsinh(a, b)`
663
+
664
+ Element-wise hyperbolic sine: b[i] = sinh(a[i])
665
+
666
+ - `a`: Float64Array - Input vector
667
+ - `b`: Float64Array - Output vector
668
+ - Returns: Float64Array (b)
669
+
670
+ #### `vcosh(a, b)`
671
+
672
+ Element-wise hyperbolic cosine: b[i] = cosh(a[i])
673
+
674
+ - `a`: Float64Array - Input vector
675
+ - `b`: Float64Array - Output vector
676
+ - Returns: Float64Array (b)
433
677
 
678
+ #### `vtanh(a, b)`
679
+
680
+ Element-wise hyperbolic tangent: b[i] = tanh(a[i])
681
+
682
+ - `a`: Float64Array - Input vector
683
+ - `b`: Float64Array - Output vector
684
+ - Returns: Float64Array (b)
685
+
686
+ **Example:**
434
687
  ```javascript
435
- // Numerical integration using vector operations
436
- function integrate(f, a, b, n) {
437
- const h = (b - a) / n;
438
- const x = new Float64Array(n);
439
- const y = new Float64Array(n);
440
-
441
- for (let i = 0; i < n; i++) {
442
- x[i] = a + i * h;
443
- y[i] = f(x[i]);
444
- }
445
-
446
- return h * accelerate.sum(y);
447
- }
688
+ // tanh is commonly used as an activation function in neural networks
689
+ const logits = new Float64Array(1000);
690
+ const activations = new Float64Array(1000);
691
+ // ... fill logits ...
692
+ accelerate.vtanh(logits, activations);
448
693
  ```
449
694
 
450
- ## Benchmarking
695
+ ### Exponential and Logarithmic Functions
451
696
 
452
- Run the included benchmarks:
697
+ #### `vexp(a, b)`
453
698
 
454
- ```bash
455
- npm run benchmark
456
- ```
699
+ Element-wise exponential: b[i] = exp(a[i])
457
700
 
458
- Run tests:
701
+ - `a`: Float64Array - Input vector
702
+ - `b`: Float64Array - Output vector
703
+ - Returns: Float64Array (b)
459
704
 
460
- ```bash
461
- npm test
462
- ```
705
+ #### `vlog(a, b)`
463
706
 
464
- ## Performance Tips
707
+ Element-wise natural logarithm: b[i] = log(a[i])
465
708
 
466
- 1. **Reuse buffers** - Allocate Float64Arrays once and reuse them
467
- 2. **Batch operations** - Process large arrays instead of many small ones
468
- 3. **Use appropriate sizes** - Accelerate shines with larger data (1000+ elements)
469
- 4. **Profile your code** - Not all operations benefit equally
709
+ - `a`: Float64Array - Input vector
710
+ - `b`: Float64Array - Output vector
711
+ - Returns: Float64Array (b)
712
+
713
+ #### `vlog10(a, b)`
714
+
715
+ Element-wise base-10 logarithm: b[i] = log10(a[i])
716
+
717
+ - `a`: Float64Array - Input vector
718
+ - `b`: Float64Array - Output vector
719
+ - Returns: Float64Array (b)
720
+
721
+ #### `vpow(a, b, c)`
722
+
723
+ Element-wise power: c[i] = a[i]^b[i]
724
+
725
+ - `a`: Float64Array - Base vector
726
+ - `b`: Float64Array - Exponent vector
727
+ - `c`: Float64Array - Output vector
728
+ - Returns: Float64Array (c)
729
+
730
+ #### `vreciprocal(a, b)`
731
+
732
+ Element-wise reciprocal: b[i] = 1 / a[i]
733
+
734
+ - `a`: Float64Array - Input vector
735
+ - `b`: Float64Array - Output vector
736
+ - Returns: Float64Array (b)
737
+
738
+ **Example:**
739
+ ```javascript
740
+ const values = new Float64Array([2, 4, 5, 10]);
741
+ const reciprocals = new Float64Array(4);
742
+ accelerate.vreciprocal(values, reciprocals); // [0.5, 0.25, 0.2, 0.1]
743
+ ```
744
+
745
+ #### `vrsqrt(a, b)`
746
+
747
+ Element-wise inverse square root: b[i] = 1 / sqrt(a[i])
748
+
749
+ - `a`: Float64Array - Input vector
750
+ - `b`: Float64Array - Output vector
751
+ - Returns: Float64Array (b)
752
+
753
+ **Example:**
754
+ ```javascript
755
+ // Fast normalization using inverse square root
756
+ const vec = new Float64Array([3, 4]);
757
+ const sumSq = accelerate.sumOfSquares(vec); // 25
758
+ const invLen = new Float64Array([sumSq]);
759
+ const invLenResult = new Float64Array(1);
760
+ accelerate.vrsqrt(invLen, invLenResult); // 0.2 (1/5)
761
+ ```
762
+
763
+ ### Rounding Functions
764
+
765
+ #### `vceil(a, b)`
766
+
767
+ Element-wise ceiling: b[i] = ceil(a[i])
768
+
769
+ - `a`: Float64Array - Input vector
770
+ - `b`: Float64Array - Output vector
771
+ - Returns: Float64Array (b)
772
+
773
+ #### `vfloor(a, b)`
774
+
775
+ Element-wise floor: b[i] = floor(a[i])
776
+
777
+ - `a`: Float64Array - Input vector
778
+ - `b`: Float64Array - Output vector
779
+ - Returns: Float64Array (b)
780
+
781
+ #### `vtrunc(a, b)`
782
+
783
+ Element-wise truncate (round toward zero): b[i] = trunc(a[i])
784
+
785
+ - `a`: Float64Array - Input vector
786
+ - `b`: Float64Array - Output vector
787
+ - Returns: Float64Array (b)
788
+
789
+ **Example:**
790
+ ```javascript
791
+ const values = new Float64Array([1.7, -1.7, 2.3, -2.3]);
792
+ const ceiled = new Float64Array(4);
793
+ const floored = new Float64Array(4);
794
+ const truncated = new Float64Array(4);
795
+
796
+ accelerate.vceil(values, ceiled); // [2, -1, 3, -2]
797
+ accelerate.vfloor(values, floored); // [1, -2, 2, -3]
798
+ accelerate.vtrunc(values, truncated); // [1, -1, 2, -2]
799
+ ```
800
+
801
+ #### `vcopysign(a, b, c)`
802
+
803
+ Copy sign: c[i] = |a[i]| * sign(b[i])
804
+
805
+ - `a`: Float64Array - Magnitude vector
806
+ - `b`: Float64Array - Sign vector
807
+ - `c`: Float64Array - Output vector
808
+ - Returns: Float64Array (c)
809
+
810
+ **Example:**
811
+ ```javascript
812
+ const magnitudes = new Float64Array([1, 2, 3, 4]);
813
+ const signs = new Float64Array([-1, 1, -1, 1]);
814
+ const result = new Float64Array(4);
815
+ accelerate.vcopysign(magnitudes, signs, result); // [-1, 2, -3, 4]
816
+ ```
817
+
818
+ ### Clipping and Thresholding
819
+
820
+ #### `vclip(a, b, min, max)`
821
+
822
+ Clip values to range [min, max]
823
+
824
+ - `a`: Float64Array - Input vector
825
+ - `b`: Float64Array - Output vector
826
+ - `min`: number - Minimum value
827
+ - `max`: number - Maximum value
828
+ - Returns: Float64Array (b)
829
+
830
+ **Example:**
831
+ ```javascript
832
+ const data = new Float64Array([-10, -5, 0, 5, 10]);
833
+ const clipped = new Float64Array(5);
834
+ accelerate.vclip(data, clipped, -3, 3);
835
+ // clipped = [-3, -3, 0, 3, 3]
836
+ ```
837
+
838
+ #### `vthreshold(a, b, threshold)`
839
+
840
+ Threshold values: b[i] = a[i] if a[i] > threshold, else threshold
841
+
842
+ - `a`: Float64Array - Input vector
843
+ - `b`: Float64Array - Output vector
844
+ - `threshold`: number - Threshold value
845
+ - Returns: Float64Array (b)
846
+
847
+ ### Statistical Functions
848
+
849
+ #### `sum(vec)`
850
+
851
+ Sum of all elements
852
+
853
+ - `vec`: Float64Array - Input vector
854
+ - Returns: number
855
+
856
+ #### `mean(vec)`
857
+
858
+ Mean (average) of all elements
859
+
860
+ - `vec`: Float64Array - Input vector
861
+ - Returns: number
862
+
863
+ #### `variance(vec)`
864
+
865
+ Variance of all elements
866
+
867
+ - `vec`: Float64Array - Input vector
868
+ - Returns: number
869
+
870
+ #### `stddev(vec)`
871
+
872
+ Standard deviation of all elements
873
+
874
+ - `vec`: Float64Array - Input vector
875
+ - Returns: number
876
+
877
+ #### `max(vec)`
878
+
879
+ Maximum element
880
+
881
+ - `vec`: Float64Array - Input vector
882
+ - Returns: number
883
+
884
+ #### `min(vec)`
885
+
886
+ Minimum element
887
+
888
+ - `vec`: Float64Array - Input vector
889
+ - Returns: number
890
+
891
+ #### `minmax(vec)`
892
+
893
+ Both minimum and maximum elements
894
+
895
+ - `vec`: Float64Array - Input vector
896
+ - Returns: {min: number, max: number}
897
+
898
+ **Example:**
899
+ ```javascript
900
+ const data = new Float64Array([1, 5, 3, 9, 2]);
901
+ const stats = accelerate.minmax(data);
902
+ console.log(stats.min, stats.max); // 1, 9
903
+ ```
904
+
905
+ #### `rms(vec)`
906
+
907
+ Root Mean Square: sqrt(sum(vec[i]^2) / n)
908
+
909
+ - `vec`: Float64Array - Input vector
910
+ - Returns: number
911
+
912
+ #### `sumOfSquares(vec)`
913
+
914
+ Sum of squares: sum(vec[i]^2)
915
+
916
+ - `vec`: Float64Array - Input vector
917
+ - Returns: number
918
+
919
+ #### `meanMagnitude(vec)`
920
+
921
+ Mean magnitude: mean(|vec[i]|)
922
+
923
+ - `vec`: Float64Array - Input vector
924
+ - Returns: number
925
+
926
+ #### `meanSquare(vec)`
927
+
928
+ Mean square: mean(vec[i]^2)
929
+
930
+ - `vec`: Float64Array - Input vector
931
+ - Returns: number
932
+
933
+ #### `maxMagnitude(vec)`
934
+
935
+ Maximum magnitude (absolute value)
936
+
937
+ - `vec`: Float64Array - Input vector
938
+ - Returns: number
939
+
940
+ **Example:**
941
+ ```javascript
942
+ const vec = new Float64Array([1, -5, 3, -2]);
943
+ const maxMag = accelerate.maxMagnitude(vec); // 5
944
+ ```
945
+
946
+ #### `minMagnitude(vec)`
947
+
948
+ Minimum magnitude (absolute value)
949
+
950
+ - `vec`: Float64Array - Input vector
951
+ - Returns: number
952
+
953
+ **Example:**
954
+ ```javascript
955
+ const vec = new Float64Array([1, -5, 3, -2]);
956
+ const minMag = accelerate.minMagnitude(vec); // 1
957
+ ```
958
+
959
+ ### Distance Metrics
960
+
961
+ #### `euclidean(a, b)`
962
+
963
+ Euclidean distance: sqrt(sum((a[i] - b[i])^2))
964
+
965
+ - `a`: Float64Array - First vector
966
+ - `b`: Float64Array - Second vector
967
+ - Returns: number
968
+
969
+ **Example:**
970
+ ```javascript
971
+ const point1 = new Float64Array([0, 0, 0]);
972
+ const point2 = new Float64Array([3, 4, 0]);
973
+ const distance = accelerate.euclidean(point1, point2); // 5
974
+ ```
975
+
976
+ ### Signal Processing
977
+
978
+ #### `fft(signal)`
979
+
980
+ Fast Fourier Transform (real to complex)
981
+
982
+ - `signal`: Float64Array - Input signal (length must be power of 2)
983
+ - Returns: {real: Float64Array, imag: Float64Array}
984
+
985
+ **Example:**
986
+ ```javascript
987
+ const signal = new Float64Array(1024);
988
+ for (let i = 0; i < signal.length; i++) {
989
+ signal[i] = Math.sin(2 * Math.PI * i / signal.length);
990
+ }
991
+ const spectrum = accelerate.fft(signal);
992
+ console.log(spectrum.real.length); // 512
993
+ console.log(spectrum.imag.length); // 512
994
+ ```
995
+
996
+ #### `ifft(real, imag)`
997
+
998
+ Inverse Fast Fourier Transform (complex to real)
999
+
1000
+ - `real`: Float64Array - Real part of frequency domain
1001
+ - `imag`: Float64Array - Imaginary part of frequency domain
1002
+ - Returns: Float64Array - Time domain signal
1003
+
1004
+ **Example:**
1005
+ ```javascript
1006
+ const signal = new Float64Array(256);
1007
+ // ... fill signal ...
1008
+ const spectrum = accelerate.fft(signal);
1009
+ const reconstructed = accelerate.ifft(spectrum.real, spectrum.imag);
1010
+ // reconstructed ≈ signal
1011
+ ```
1012
+
1013
+ #### `conv(signal, kernel, result)`
1014
+
1015
+ 1D Convolution
1016
+
1017
+ - `signal`: Float64Array - Input signal
1018
+ - `kernel`: Float64Array - Convolution kernel
1019
+ - `result`: Float64Array - Output (length = signal.length - kernel.length + 1)
1020
+ - Returns: Float64Array (result)
1021
+
1022
+ **Example:**
1023
+ ```javascript
1024
+ const signal = new Float64Array([1, 2, 3, 4, 5]);
1025
+ const kernel = new Float64Array([0.25, 0.5, 0.25]); // Moving average
1026
+ const result = new Float64Array(3);
1027
+ accelerate.conv(signal, kernel, result);
1028
+ ```
1029
+
1030
+ #### `xcorr(a, b, result)`
1031
+
1032
+ Cross-correlation
1033
+
1034
+ - `a`: Float64Array - First signal
1035
+ - `b`: Float64Array - Second signal
1036
+ - `result`: Float64Array - Output (length = a.length + b.length - 1)
1037
+ - Returns: Float64Array (result)
1038
+
1039
+ ### Window Functions
1040
+
1041
+ #### `hamming(length)`
1042
+
1043
+ Generate Hamming window
1044
+
1045
+ - `length`: number - Window length
1046
+ - Returns: Float64Array - Window coefficients
1047
+
1048
+ #### `hanning(length)`
1049
+
1050
+ Generate Hanning window
1051
+
1052
+ - `length`: number - Window length
1053
+ - Returns: Float64Array - Window coefficients
1054
+
1055
+ #### `blackman(length)`
1056
+
1057
+ Generate Blackman window
1058
+
1059
+ - `length`: number - Window length
1060
+ - Returns: Float64Array - Window coefficients
1061
+
1062
+ **Example:**
1063
+ ```javascript
1064
+ const window = accelerate.hanning(256);
1065
+ const signal = new Float64Array(256);
1066
+ const windowed = new Float64Array(256);
1067
+
1068
+ // Apply window to signal
1069
+ accelerate.vmul(signal, window, windowed);
1070
+ const spectrum = accelerate.fft(windowed);
1071
+ ```
1072
+
1073
+ ### Interpolation
1074
+
1075
+ #### `interp1d(x, y, xi, yi)`
1076
+
1077
+ Linear interpolation
1078
+
1079
+ - `x`: Float64Array - X coordinates of data points
1080
+ - `y`: Float64Array - Y coordinates of data points
1081
+ - `xi`: Float64Array - X coordinates to interpolate at
1082
+ - `yi`: Float64Array - Output interpolated Y values
1083
+ - Returns: Float64Array (yi)
1084
+
1085
+ **Example:**
1086
+ ```javascript
1087
+ const x = new Float64Array([0, 1, 2, 3]);
1088
+ const y = new Float64Array([0, 1, 4, 9]);
1089
+ const xi = new Float64Array([0.5, 1.5, 2.5]);
1090
+ const yi = new Float64Array(3);
1091
+ accelerate.interp1d(x, y, xi, yi);
1092
+ ```
1093
+
1094
+ ## TypeScript Support
1095
+
1096
+ Full TypeScript definitions included:
1097
+
1098
+ ```typescript
1099
+ import * as accelerate from 'node-accelerate';
1100
+
1101
+ const A = new Float64Array(100 * 100);
1102
+ const B = new Float64Array(100 * 100);
1103
+ const C = new Float64Array(100 * 100);
1104
+
1105
+ accelerate.matmul(A, B, C, 100, 100, 100);
1106
+ ```
1107
+
1108
+ ## Use Cases
1109
+
1110
+ ### Machine Learning Inference
1111
+
1112
+ ```javascript
1113
+ // Neural network dense layer with activation
1114
+ function denseLayerWithReLU(input, weights, bias, output) {
1115
+ const M = 1, K = input.length, N = output.length;
1116
+
1117
+ // Matrix multiplication: output = input × weights
1118
+ accelerate.matmul(input, weights, output, M, K, N);
1119
+
1120
+ // Add bias
1121
+ accelerate.vadd(output, bias, output);
1122
+
1123
+ // ReLU activation: max(0, x)
1124
+ const zeros = new Float64Array(N);
1125
+ accelerate.vclip(output, output, 0, Infinity);
1126
+
1127
+ return output;
1128
+ }
1129
+
1130
+ // Softmax activation
1131
+ function softmax(logits, output) {
1132
+ // Subtract max for numerical stability
1133
+ const maxVal = accelerate.max(logits);
1134
+ const shifted = new Float64Array(logits.length);
1135
+ const negMax = -maxVal;
1136
+
1137
+ for (let i = 0; i < logits.length; i++) {
1138
+ shifted[i] = logits[i] + negMax;
1139
+ }
1140
+
1141
+ // Compute exp
1142
+ accelerate.vexp(shifted, output);
1143
+
1144
+ // Normalize
1145
+ const sum = accelerate.sum(output);
1146
+ accelerate.vscale(output, 1.0 / sum, output);
1147
+
1148
+ return output;
1149
+ }
1150
+ ```
1151
+
1152
+ ### Signal Processing & Audio
1153
+
1154
+ ```javascript
1155
+ // Apply windowed FFT for spectral analysis
1156
+ function spectralAnalysis(audioBuffer, windowSize = 2048) {
1157
+ const window = accelerate.hanning(windowSize);
1158
+ const windowed = new Float64Array(windowSize);
1159
+
1160
+ // Apply window
1161
+ accelerate.vmul(audioBuffer, window, windowed);
1162
+
1163
+ // Compute FFT
1164
+ const spectrum = accelerate.fft(windowed);
1165
+
1166
+ // Compute magnitude spectrum
1167
+ const magnitudes = new Float64Array(spectrum.real.length);
1168
+ for (let i = 0; i < magnitudes.length; i++) {
1169
+ magnitudes[i] = Math.sqrt(
1170
+ spectrum.real[i] ** 2 + spectrum.imag[i] ** 2
1171
+ );
1172
+ }
1173
+
1174
+ // Convert to dB
1175
+ const dB = new Float64Array(magnitudes.length);
1176
+ accelerate.vlog10(magnitudes, dB);
1177
+ accelerate.vscale(dB, 20, dB);
1178
+
1179
+ return dB;
1180
+ }
1181
+
1182
+ // Low-pass filter using convolution
1183
+ function lowPassFilter(signal, cutoffFreq, sampleRate) {
1184
+ // Design simple FIR filter kernel
1185
+ const kernelSize = 51;
1186
+ const kernel = new Float64Array(kernelSize);
1187
+
1188
+ // Sinc function kernel
1189
+ const fc = cutoffFreq / sampleRate;
1190
+ for (let i = 0; i < kernelSize; i++) {
1191
+ const x = i - kernelSize / 2;
1192
+ if (x === 0) {
1193
+ kernel[i] = 2 * fc;
1194
+ } else {
1195
+ kernel[i] = Math.sin(2 * Math.PI * fc * x) / (Math.PI * x);
1196
+ }
1197
+ }
1198
+
1199
+ // Apply Hamming window to kernel
1200
+ const window = accelerate.hamming(kernelSize);
1201
+ accelerate.vmul(kernel, window, kernel);
1202
+
1203
+ // Normalize
1204
+ const sum = accelerate.sum(kernel);
1205
+ accelerate.vscale(kernel, 1.0 / sum, kernel);
1206
+
1207
+ // Convolve
1208
+ const filtered = new Float64Array(signal.length - kernelSize + 1);
1209
+ accelerate.conv(signal, kernel, filtered);
1210
+
1211
+ return filtered;
1212
+ }
1213
+ ```
1214
+
1215
+ ### Scientific Computing
1216
+
1217
+ ```javascript
1218
+ // Numerical integration using trapezoidal rule
1219
+ function trapezoidalIntegration(f, a, b, n) {
1220
+ const h = (b - a) / n;
1221
+ const x = new Float64Array(n + 1);
1222
+ const y = new Float64Array(n + 1);
1223
+
1224
+ // Generate points
1225
+ for (let i = 0; i <= n; i++) {
1226
+ x[i] = a + i * h;
1227
+ y[i] = f(x[i]);
1228
+ }
1229
+
1230
+ // Trapezoidal rule: h * (y[0]/2 + y[1] + ... + y[n-1] + y[n]/2)
1231
+ const sum = accelerate.sum(y);
1232
+ return h * (sum - (y[0] + y[n]) / 2);
1233
+ }
1234
+
1235
+ // Compute correlation coefficient
1236
+ function correlationCoefficient(x, y) {
1237
+ const n = x.length;
1238
+
1239
+ // Compute means
1240
+ const meanX = accelerate.mean(x);
1241
+ const meanY = accelerate.mean(y);
1242
+
1243
+ // Center the data
1244
+ const xCentered = new Float64Array(n);
1245
+ const yCentered = new Float64Array(n);
1246
+
1247
+ for (let i = 0; i < n; i++) {
1248
+ xCentered[i] = x[i] - meanX;
1249
+ yCentered[i] = y[i] - meanY;
1250
+ }
1251
+
1252
+ // Compute correlation
1253
+ const numerator = accelerate.dot(xCentered, yCentered);
1254
+ const denomX = Math.sqrt(accelerate.sumOfSquares(xCentered));
1255
+ const denomY = Math.sqrt(accelerate.sumOfSquares(yCentered));
1256
+
1257
+ return numerator / (denomX * denomY);
1258
+ }
1259
+
1260
+ // Polynomial evaluation using Horner's method (vectorized)
1261
+ function polyval(coefficients, x, result) {
1262
+ const n = x.length;
1263
+ const degree = coefficients.length - 1;
1264
+
1265
+ // Initialize with highest degree coefficient
1266
+ for (let i = 0; i < n; i++) {
1267
+ result[i] = coefficients[degree];
1268
+ }
1269
+
1270
+ // Horner's method: result = result * x + coeff
1271
+ for (let i = degree - 1; i >= 0; i--) {
1272
+ accelerate.vmul(result, x, result);
1273
+
1274
+ const coeff = new Float64Array(n);
1275
+ coeff.fill(coefficients[i]);
1276
+ accelerate.vadd(result, coeff, result);
1277
+ }
1278
+
1279
+ return result;
1280
+ }
1281
+ ```
1282
+
1283
+ ### Data Analysis & Statistics
1284
+
1285
+ ```javascript
1286
+ // Compute z-scores (standardization)
1287
+ function zScore(data, output) {
1288
+ const mean = accelerate.mean(data);
1289
+ const std = accelerate.stddev(data);
1290
+
1291
+ // z = (x - mean) / std
1292
+ for (let i = 0; i < data.length; i++) {
1293
+ output[i] = data[i] - mean;
1294
+ }
1295
+ accelerate.vscale(output, 1.0 / std, output);
1296
+
1297
+ return output;
1298
+ }
1299
+
1300
+ // Moving average filter
1301
+ function movingAverage(data, windowSize) {
1302
+ const kernel = new Float64Array(windowSize);
1303
+ kernel.fill(1.0 / windowSize);
1304
+
1305
+ const result = new Float64Array(data.length - windowSize + 1);
1306
+ accelerate.conv(data, kernel, result);
1307
+
1308
+ return result;
1309
+ }
1310
+
1311
+ // Outlier detection using IQR method
1312
+ function detectOutliers(data) {
1313
+ const sorted = new Float64Array(data);
1314
+ // Note: You'd need to implement sorting or use JS sort
1315
+
1316
+ const q1Index = Math.floor(data.length * 0.25);
1317
+ const q3Index = Math.floor(data.length * 0.75);
1318
+
1319
+ const q1 = sorted[q1Index];
1320
+ const q3 = sorted[q3Index];
1321
+ const iqr = q3 - q1;
1322
+
1323
+ const lowerBound = q1 - 1.5 * iqr;
1324
+ const upperBound = q3 + 1.5 * iqr;
1325
+
1326
+ const outliers = [];
1327
+ for (let i = 0; i < data.length; i++) {
1328
+ if (data[i] < lowerBound || data[i] > upperBound) {
1329
+ outliers.push({ index: i, value: data[i] });
1330
+ }
1331
+ }
1332
+
1333
+ return outliers;
1334
+ }
1335
+ ```
1336
+
1337
+ ### Image Processing
1338
+
1339
+ ```javascript
1340
+ // Gaussian blur (separable convolution)
1341
+ function gaussianBlur(image, width, height, sigma) {
1342
+ // Generate 1D Gaussian kernel
1343
+ const kernelSize = Math.ceil(sigma * 6) | 1; // Ensure odd
1344
+ const kernel = new Float64Array(kernelSize);
1345
+ const center = Math.floor(kernelSize / 2);
1346
+
1347
+ for (let i = 0; i < kernelSize; i++) {
1348
+ const x = i - center;
1349
+ kernel[i] = Math.exp(-(x * x) / (2 * sigma * sigma));
1350
+ }
1351
+
1352
+ // Normalize
1353
+ const sum = accelerate.sum(kernel);
1354
+ accelerate.vscale(kernel, 1.0 / sum, kernel);
1355
+
1356
+ // Horizontal pass
1357
+ const temp = new Float64Array(width * height);
1358
+ for (let y = 0; y < height; y++) {
1359
+ const row = image.subarray(y * width, (y + 1) * width);
1360
+ const outRow = temp.subarray(y * width, (y + 1) * width);
1361
+ // Convolve row (simplified - needs padding)
1362
+ accelerate.conv(row, kernel, outRow);
1363
+ }
1364
+
1365
+ // Vertical pass (similar logic)
1366
+ // ...
1367
+
1368
+ return temp;
1369
+ }
1370
+
1371
+ // Edge detection using Sobel operator
1372
+ function sobelEdgeDetection(image, width, height) {
1373
+ const sobelX = new Float64Array([-1, 0, 1, -2, 0, 2, -1, 0, 1]);
1374
+ const sobelY = new Float64Array([-1, -2, -1, 0, 0, 0, 1, 2, 1]);
1375
+
1376
+ const gradX = new Float64Array(width * height);
1377
+ const gradY = new Float64Array(width * height);
1378
+ const magnitude = new Float64Array(width * height);
1379
+
1380
+ // Apply Sobel kernels (simplified)
1381
+ // ... convolution logic ...
1382
+
1383
+ // Compute gradient magnitude
1384
+ const gradXSq = new Float64Array(width * height);
1385
+ const gradYSq = new Float64Array(width * height);
1386
+
1387
+ accelerate.vsquare(gradX, gradXSq);
1388
+ accelerate.vsquare(gradY, gradYSq);
1389
+ accelerate.vadd(gradXSq, gradYSq, magnitude);
1390
+ accelerate.vsqrt(magnitude, magnitude);
1391
+
1392
+ return magnitude;
1393
+ }
1394
+ ```
1395
+
1396
+ ### Financial Analysis
1397
+
1398
+ ```javascript
1399
+ // Calculate returns and volatility
1400
+ function calculateReturns(prices) {
1401
+ const returns = new Float64Array(prices.length - 1);
1402
+
1403
+ for (let i = 1; i < prices.length; i++) {
1404
+ returns[i - 1] = (prices[i] - prices[i - 1]) / prices[i - 1];
1405
+ }
1406
+
1407
+ const meanReturn = accelerate.mean(returns);
1408
+ const volatility = accelerate.stddev(returns);
1409
+
1410
+ return { returns, meanReturn, volatility };
1411
+ }
1412
+
1413
+ // Exponential moving average
1414
+ function exponentialMovingAverage(data, alpha) {
1415
+ const ema = new Float64Array(data.length);
1416
+ ema[0] = data[0];
1417
+
1418
+ for (let i = 1; i < data.length; i++) {
1419
+ ema[i] = alpha * data[i] + (1 - alpha) * ema[i - 1];
1420
+ }
1421
+
1422
+ return ema;
1423
+ }
1424
+
1425
+ // Bollinger Bands
1426
+ function bollingerBands(prices, period, numStdDev) {
1427
+ const ma = movingAverage(prices, period);
1428
+ const upper = new Float64Array(ma.length);
1429
+ const lower = new Float64Array(ma.length);
1430
+
1431
+ for (let i = 0; i < ma.length; i++) {
1432
+ const window = prices.subarray(i, i + period);
1433
+ const std = accelerate.stddev(window);
1434
+ upper[i] = ma[i] + numStdDev * std;
1435
+ lower[i] = ma[i] - numStdDev * std;
1436
+ }
1437
+
1438
+ return { middle: ma, upper, lower };
1439
+ }
1440
+ ```
1441
+
1442
+ ## Benchmarking
1443
+
1444
+ Run the included benchmarks:
1445
+
1446
+ ```bash
1447
+ npm run benchmark
1448
+ ```
1449
+
1450
+ Run tests:
1451
+
1452
+ ```bash
1453
+ npm test
1454
+ ```
1455
+
1456
+ Compare with pure JavaScript:
1457
+
1458
+ ```bash
1459
+ npm run compare
1460
+ ```
1461
+
1462
+ ## Performance Tips
1463
+
1464
+ 1. **Reuse buffers** - Allocate Float64Arrays once and reuse them
1465
+ 2. **Batch operations** - Process large arrays instead of many small ones
1466
+ 3. **Use appropriate sizes** - Accelerate shines with larger data (1000+ elements)
1467
+ 4. **Profile your code** - Not all operations benefit equally
1468
+ 5. **Use windows for FFT** - Apply Hanning/Hamming windows before FFT for better spectral analysis
1469
+ 6. **Leverage vectorized trig** - Use vsin/vcos/vtan instead of loops with Math.sin/cos/tan
1470
+ 7. **Chain operations** - Minimize intermediate allocations
1471
+
1472
+ ## Complete Function Reference
1473
+
1474
+ ### Matrix Operations (BLAS)
1475
+ - `matmul(A, B, C, M, K, N)` - Matrix multiplication (double precision)
1476
+ - `matmulFloat(A, B, C, M, K, N)` - Matrix multiplication (single precision)
1477
+ - `matvec(A, x, y, M, N)` - Matrix-vector multiplication
1478
+ - `transpose(A, B, rows, cols)` - Matrix transpose
1479
+ - `axpy(alpha, x, y)` - AXPY operation (y = alpha*x + y)
1480
+ - `copy(x, y)` - Vector copy
1481
+ - `swap(x, y)` - Vector swap
1482
+ - `norm(x)` - L2 norm (Euclidean length)
1483
+ - `abssum(x)` - Sum of absolute values
1484
+ - `maxAbsIndex(x)` - Index of maximum absolute value
1485
+ - `rot(x, y, c, s)` - Givens rotation
1486
+
1487
+ ### Vector Arithmetic
1488
+ - `dot(a, b)` - Dot product
1489
+ - `vadd(a, b, out)` - Element-wise addition
1490
+ - `vsub(a, b, out)` - Element-wise subtraction
1491
+ - `vmul(a, b, out)` - Element-wise multiplication
1492
+ - `vdiv(a, b, out)` - Element-wise division
1493
+ - `vscale(a, scalar, b)` - Scalar multiplication
1494
+ - `vneg(a, b)` - Negation
1495
+ - `vaddScalar(a, scalar, c)` - Add scalar to vector
1496
+ - `vma(a, b, c, d)` - Multiply-add: d = (a*b) + c
1497
+ - `vmsa(a, b, c, d)` - Multiply-scalar-add: d = (a*b) + c
1498
+
1499
+ ### Vector Functions
1500
+ - `vabs(a, b)` - Absolute value
1501
+ - `vsquare(a, b)` - Square
1502
+ - `vsqrt(a, b)` - Square root
1503
+ - `normalize(a, b)` - Normalize to unit length
1504
+ - `vreverse(a, b)` - Reverse order
1505
+ - `vfill(scalar, vec)` - Fill with scalar
1506
+ - `vramp(start, step, vec)` - Generate linear ramp
1507
+ - `vlerp(a, b, t, c)` - Linear interpolation
1508
+ - `vclear(vec)` - Clear (set to zero)
1509
+ - `vlimit(a, low, high, c)` - Limit/saturate values
1510
+
1511
+ ### Trigonometric (Vectorized)
1512
+ - `vsin(a, b)` - Sine
1513
+ - `vcos(a, b)` - Cosine
1514
+ - `vtan(a, b)` - Tangent
1515
+ - `vasin(a, b)` - Inverse sine
1516
+ - `vacos(a, b)` - Inverse cosine
1517
+ - `vatan(a, b)` - Inverse tangent
1518
+ - `vatan2(y, x, out)` - Two-argument arctangent
1519
+
1520
+ ### Hyperbolic Functions
1521
+ - `vsinh(a, b)` - Hyperbolic sine
1522
+ - `vcosh(a, b)` - Hyperbolic cosine
1523
+ - `vtanh(a, b)` - Hyperbolic tangent
1524
+
1525
+ ### Exponential & Logarithmic
1526
+ - `vexp(a, b)` - Natural exponential
1527
+ - `vlog(a, b)` - Natural logarithm
1528
+ - `vlog10(a, b)` - Base-10 logarithm
1529
+ - `vpow(a, b, c)` - Power (c = a^b)
1530
+ - `vreciprocal(a, b)` - Reciprocal (1/x)
1531
+ - `vrsqrt(a, b)` - Inverse square root (1/sqrt(x))
1532
+
1533
+ ### Rounding Functions
1534
+ - `vceil(a, b)` - Ceiling
1535
+ - `vfloor(a, b)` - Floor
1536
+ - `vtrunc(a, b)` - Truncate (round toward zero)
1537
+ - `vcopysign(a, b, c)` - Copy sign
1538
+
1539
+ ### Data Processing
1540
+ - `vclip(a, b, min, max)` - Clip to range
1541
+ - `vthreshold(a, b, threshold)` - Apply threshold
1542
+
1543
+ ### Statistical Functions
1544
+ - `sum(vec)` - Sum of elements
1545
+ - `mean(vec)` - Mean (average)
1546
+ - `variance(vec)` - Variance
1547
+ - `stddev(vec)` - Standard deviation
1548
+ - `max(vec)` - Maximum value
1549
+ - `min(vec)` - Minimum value
1550
+ - `minmax(vec)` - Both min and max
1551
+ - `rms(vec)` - Root mean square
1552
+ - `sumOfSquares(vec)` - Sum of squares
1553
+ - `meanMagnitude(vec)` - Mean of absolute values
1554
+ - `meanSquare(vec)` - Mean of squares
1555
+ - `maxMagnitude(vec)` - Maximum magnitude
1556
+ - `minMagnitude(vec)` - Minimum magnitude
1557
+
1558
+ ### Distance Metrics
1559
+ - `euclidean(a, b)` - Euclidean distance
1560
+
1561
+ ### Signal Processing
1562
+ - `fft(signal)` - Fast Fourier Transform
1563
+ - `ifft(real, imag)` - Inverse FFT
1564
+ - `conv(signal, kernel, result)` - Convolution
1565
+ - `xcorr(a, b, result)` - Cross-correlation
1566
+
1567
+ ### Window Functions
1568
+ - `hamming(length)` - Hamming window
1569
+ - `hanning(length)` - Hanning window
1570
+ - `blackman(length)` - Blackman window
1571
+
1572
+ ### Interpolation
1573
+ - `interp1d(x, y, xi, yi)` - Linear interpolation
1574
+
1575
+ **Total: 80+ functions** - All hardware-accelerated via Apple's Accelerate framework
470
1576
 
471
1577
  ## Limitations
472
1578