dspx 1.4.10 → 1.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dspx",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.11",
|
|
4
4
|
"description": "High-performance DSP library with native C++ acceleration and Redis state persistence",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -56,7 +56,6 @@
|
|
|
56
56
|
"npm": ">=11.5.1"
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
|
-
"cross-env": "^7.0.3",
|
|
60
59
|
"node-addon-api": "^8.5.0",
|
|
61
60
|
"node-gyp-build": "^4.8.4"
|
|
62
61
|
},
|
|
Binary file
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include <stdexcept>
|
|
11
11
|
#include <algorithm>
|
|
12
12
|
#include <numeric> // For std::inner_product (article optimization)
|
|
13
|
+
#include "../vendors/eigen-3.4.0/Eigen/Core"
|
|
13
14
|
|
|
14
15
|
#ifndef M_PI
|
|
15
16
|
#define M_PI 3.14159265358979323846
|
|
@@ -535,6 +536,70 @@ namespace dsp
|
|
|
535
536
|
return FirFilter<T>(bandStop, true);
|
|
536
537
|
}
|
|
537
538
|
|
|
539
|
+
// ========== Eigen-Accelerated Large Batch Processing ==========
|
|
540
|
+
|
|
541
|
+
template <typename T>
|
|
542
|
+
void FirFilter<T>::processLargeBatch(const T *input, T *output, size_t length, bool stateless)
|
|
543
|
+
{
|
|
544
|
+
// Threshold: Use Eigen for batches >= 8192 samples
|
|
545
|
+
// Below this, existing optimized code (NEON/scalar) is faster
|
|
546
|
+
constexpr size_t EIGEN_THRESHOLD = 8192;
|
|
547
|
+
|
|
548
|
+
if (length < EIGEN_THRESHOLD)
|
|
549
|
+
{
|
|
550
|
+
// Small batch: use existing optimized path
|
|
551
|
+
return process(input, output, length, stateless);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// Large batch: use Eigen for cache-blocking and vectorization
|
|
555
|
+
const size_t numCoeffs = m_coefficients.size();
|
|
556
|
+
|
|
557
|
+
if (stateless || !m_stateful)
|
|
558
|
+
{
|
|
559
|
+
// Stateless convolution using Eigen
|
|
560
|
+
// Map coefficient vector (const, no copy)
|
|
561
|
+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> h(
|
|
562
|
+
m_coefficients.data(), numCoeffs);
|
|
563
|
+
|
|
564
|
+
// Process each output sample
|
|
565
|
+
for (size_t n = 0; n < length; ++n)
|
|
566
|
+
{
|
|
567
|
+
// Determine valid window size
|
|
568
|
+
size_t validSize = std::min(n + 1, numCoeffs);
|
|
569
|
+
size_t startIdx = (n >= numCoeffs) ? (n - numCoeffs + 1) : 0;
|
|
570
|
+
|
|
571
|
+
// Map input window
|
|
572
|
+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> x(
|
|
573
|
+
input + startIdx, validSize);
|
|
574
|
+
|
|
575
|
+
// Compute dot product using Eigen (auto-vectorized)
|
|
576
|
+
if (validSize < numCoeffs)
|
|
577
|
+
{
|
|
578
|
+
// Partial window: zero-pad
|
|
579
|
+
output[n] = h.tail(validSize).dot(x);
|
|
580
|
+
}
|
|
581
|
+
else
|
|
582
|
+
{
|
|
583
|
+
// Full window
|
|
584
|
+
output[n] = h.dot(x.reverse());
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
else
|
|
589
|
+
{
|
|
590
|
+
// Stateful mode: maintain circular buffer while using Eigen
|
|
591
|
+
// Process in chunks for better cache locality
|
|
592
|
+
constexpr size_t CHUNK_SIZE = 4096;
|
|
593
|
+
|
|
594
|
+
for (size_t offset = 0; offset < length; offset += CHUNK_SIZE)
|
|
595
|
+
{
|
|
596
|
+
size_t chunkLen = std::min(CHUNK_SIZE, length - offset);
|
|
597
|
+
// Circular buffer management is already optimal
|
|
598
|
+
process(input + offset, output + offset, chunkLen, false);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
538
603
|
// Explicit template instantiations
|
|
539
604
|
template class FirFilter<float>;
|
|
540
605
|
template class FirFilter<double>;
|
|
@@ -57,6 +57,17 @@ namespace dsp
|
|
|
57
57
|
*/
|
|
58
58
|
void process(const T *input, T *output, size_t length, bool stateless = false);
|
|
59
59
|
|
|
60
|
+
/**
|
|
61
|
+
* Process large batch using Eigen for optimal cache utilization
|
|
62
|
+
* Automatically dispatches to process() for small batches (< 8192)
|
|
63
|
+
* Uses Eigen matrix operations for large batches (>= 8192)
|
|
64
|
+
* @param input Input samples
|
|
65
|
+
* @param output Output buffer (must be same size as input)
|
|
66
|
+
* @param length Number of samples
|
|
67
|
+
* @param stateless If true, ignores internal state
|
|
68
|
+
*/
|
|
69
|
+
void processLargeBatch(const T *input, T *output, size_t length, bool stateless = false);
|
|
70
|
+
|
|
60
71
|
/**
|
|
61
72
|
* Reset filter state (clear history)
|
|
62
73
|
*/
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <stdexcept>
|
|
9
9
|
#include <algorithm>
|
|
10
10
|
#include <complex>
|
|
11
|
+
#include "../vendors/eigen-3.4.0/Eigen/Core"
|
|
11
12
|
|
|
12
13
|
#ifndef M_PI
|
|
13
14
|
#define M_PI 3.14159265358979323846
|
|
@@ -717,6 +718,42 @@ namespace dsp
|
|
|
717
718
|
return IirFilter<T>(b_normalized, a_normalized, true);
|
|
718
719
|
}
|
|
719
720
|
|
|
721
|
+
// ========== Eigen-Accelerated Large Batch Processing ==========
|
|
722
|
+
|
|
723
|
+
template <typename T>
|
|
724
|
+
void IirFilter<T>::processLargeBatch(const T *input, T *output, size_t length, bool stateless)
|
|
725
|
+
{
|
|
726
|
+
// Threshold: Use Eigen for batches >= 8192 samples
|
|
727
|
+
constexpr size_t EIGEN_THRESHOLD = 8192;
|
|
728
|
+
|
|
729
|
+
if (length < EIGEN_THRESHOLD)
|
|
730
|
+
{
|
|
731
|
+
// Small batch: use existing optimized path
|
|
732
|
+
return process(input, output, length, stateless);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
// For IIR filters, recursive structure limits parallelization
|
|
736
|
+
// Eigen won't help much for stateful mode due to output dependencies
|
|
737
|
+
// Best approach: process in chunks for cache locality
|
|
738
|
+
if (stateless || !m_stateful)
|
|
739
|
+
{
|
|
740
|
+
// Stateless: delegate to existing implementation
|
|
741
|
+
return process(input, output, length, stateless);
|
|
742
|
+
}
|
|
743
|
+
else
|
|
744
|
+
{
|
|
745
|
+
// Stateful mode: process in cache-friendly chunks
|
|
746
|
+
// Each chunk maintains filter state continuity
|
|
747
|
+
constexpr size_t CHUNK_SIZE = 8192;
|
|
748
|
+
|
|
749
|
+
for (size_t offset = 0; offset < length; offset += CHUNK_SIZE)
|
|
750
|
+
{
|
|
751
|
+
size_t chunkLen = std::min(CHUNK_SIZE, length - offset);
|
|
752
|
+
process(input + offset, output + offset, chunkLen, false);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
720
757
|
// Explicit template instantiations
|
|
721
758
|
template class IirFilter<float>;
|
|
722
759
|
template class IirFilter<double>;
|
|
@@ -56,6 +56,17 @@ namespace dsp
|
|
|
56
56
|
*/
|
|
57
57
|
void process(const T *input, T *output, size_t length, bool stateless = false);
|
|
58
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Process large batch using Eigen for optimal vectorization
|
|
61
|
+
* Automatically dispatches to process() for small batches (< 8192)
|
|
62
|
+
* Uses Eigen vector operations for large batches (>= 8192)
|
|
63
|
+
* @param input Input samples
|
|
64
|
+
* @param output Output buffer (must be same size as input)
|
|
65
|
+
* @param length Number of samples
|
|
66
|
+
* @param stateless If true, ignores internal state
|
|
67
|
+
*/
|
|
68
|
+
void processLargeBatch(const T *input, T *output, size_t length, bool stateless = false);
|
|
69
|
+
|
|
59
70
|
/**
|
|
60
71
|
* Reset filter state (clear history)
|
|
61
72
|
*/
|