dspx 1.4.9 → 1.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dspx",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.11",
|
|
4
4
|
"description": "High-performance DSP library with native C++ acceleration and Redis state persistence",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -56,7 +56,6 @@
|
|
|
56
56
|
"npm": ">=11.5.1"
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
|
-
"cross-env": "^7.0.3",
|
|
60
59
|
"node-addon-api": "^8.5.0",
|
|
61
60
|
"node-gyp-build": "^4.8.4"
|
|
62
61
|
},
|
|
Binary file
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include <stdexcept>
|
|
11
11
|
#include <algorithm>
|
|
12
12
|
#include <numeric> // For std::inner_product (article optimization)
|
|
13
|
+
#include "../vendors/eigen-3.4.0/Eigen/Core"
|
|
13
14
|
|
|
14
15
|
#ifndef M_PI
|
|
15
16
|
#define M_PI 3.14159265358979323846
|
|
@@ -535,6 +536,70 @@ namespace dsp
|
|
|
535
536
|
return FirFilter<T>(bandStop, true);
|
|
536
537
|
}
|
|
537
538
|
|
|
539
|
+
// ========== Eigen-Accelerated Large Batch Processing ==========
|
|
540
|
+
|
|
541
|
+
template <typename T>
|
|
542
|
+
void FirFilter<T>::processLargeBatch(const T *input, T *output, size_t length, bool stateless)
|
|
543
|
+
{
|
|
544
|
+
// Threshold: Use Eigen for batches >= 8192 samples
|
|
545
|
+
// Below this, existing optimized code (NEON/scalar) is faster
|
|
546
|
+
constexpr size_t EIGEN_THRESHOLD = 8192;
|
|
547
|
+
|
|
548
|
+
if (length < EIGEN_THRESHOLD)
|
|
549
|
+
{
|
|
550
|
+
// Small batch: use existing optimized path
|
|
551
|
+
return process(input, output, length, stateless);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// Large batch: use Eigen for cache-blocking and vectorization
|
|
555
|
+
const size_t numCoeffs = m_coefficients.size();
|
|
556
|
+
|
|
557
|
+
if (stateless || !m_stateful)
|
|
558
|
+
{
|
|
559
|
+
// Stateless convolution using Eigen
|
|
560
|
+
// Map coefficient vector (const, no copy)
|
|
561
|
+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> h(
|
|
562
|
+
m_coefficients.data(), numCoeffs);
|
|
563
|
+
|
|
564
|
+
// Process each output sample
|
|
565
|
+
for (size_t n = 0; n < length; ++n)
|
|
566
|
+
{
|
|
567
|
+
// Determine valid window size
|
|
568
|
+
size_t validSize = std::min(n + 1, numCoeffs);
|
|
569
|
+
size_t startIdx = (n >= numCoeffs) ? (n - numCoeffs + 1) : 0;
|
|
570
|
+
|
|
571
|
+
// Map input window
|
|
572
|
+
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> x(
|
|
573
|
+
input + startIdx, validSize);
|
|
574
|
+
|
|
575
|
+
// Compute dot product using Eigen (auto-vectorized)
|
|
576
|
+
if (validSize < numCoeffs)
|
|
577
|
+
{
|
|
578
|
+
// Partial window: zero-pad
|
|
579
|
+
output[n] = h.tail(validSize).dot(x);
|
|
580
|
+
}
|
|
581
|
+
else
|
|
582
|
+
{
|
|
583
|
+
// Full window
|
|
584
|
+
output[n] = h.dot(x.reverse());
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
else
|
|
589
|
+
{
|
|
590
|
+
// Stateful mode: maintain circular buffer while using Eigen
|
|
591
|
+
// Process in chunks for better cache locality
|
|
592
|
+
constexpr size_t CHUNK_SIZE = 4096;
|
|
593
|
+
|
|
594
|
+
for (size_t offset = 0; offset < length; offset += CHUNK_SIZE)
|
|
595
|
+
{
|
|
596
|
+
size_t chunkLen = std::min(CHUNK_SIZE, length - offset);
|
|
597
|
+
// Circular buffer management is already optimal
|
|
598
|
+
process(input + offset, output + offset, chunkLen, false);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
538
603
|
// Explicit template instantiations
|
|
539
604
|
template class FirFilter<float>;
|
|
540
605
|
template class FirFilter<double>;
|
|
@@ -57,6 +57,17 @@ namespace dsp
|
|
|
57
57
|
*/
|
|
58
58
|
void process(const T *input, T *output, size_t length, bool stateless = false);
|
|
59
59
|
|
|
60
|
+
/**
|
|
61
|
+
* Process large batch using Eigen for optimal cache utilization
|
|
62
|
+
* Automatically dispatches to process() for small batches (< 8192)
|
|
63
|
+
* Uses Eigen matrix operations for large batches (>= 8192)
|
|
64
|
+
* @param input Input samples
|
|
65
|
+
* @param output Output buffer (must be same size as input)
|
|
66
|
+
* @param length Number of samples
|
|
67
|
+
* @param stateless If true, ignores internal state
|
|
68
|
+
*/
|
|
69
|
+
void processLargeBatch(const T *input, T *output, size_t length, bool stateless = false);
|
|
70
|
+
|
|
60
71
|
/**
|
|
61
72
|
* Reset filter state (clear history)
|
|
62
73
|
*/
|
|
@@ -233,58 +233,52 @@ namespace dsp::core
|
|
|
233
233
|
*/
|
|
234
234
|
std::pair<std::vector<float>, size_t> exportLinearState() const
|
|
235
235
|
{
|
|
236
|
-
#if defined(__ARM_NEON) || defined(__aarch64__)
|
|
237
236
|
const float *state = getState();
|
|
238
237
|
std::vector<float> linearState(m_bufferSize, 0.0f);
|
|
239
238
|
|
|
240
|
-
//
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
239
|
+
// Extract the circular buffer into linear format (oldest->newest)
|
|
240
|
+
// m_head points to the position of the most recent sample
|
|
241
|
+
// The convolution reads from (m_head - numTaps + 1), which is the oldest sample position
|
|
242
|
+
|
|
243
|
+
// Calculate the read start position (oldest valid sample)
|
|
244
|
+
size_t readStart = (m_head + m_bufferSize - m_numTaps + 1) & m_headMask;
|
|
244
245
|
|
|
245
|
-
//
|
|
246
|
+
// Copy samples in order: oldest->newest into linear positions [0..bufferSize-1]
|
|
246
247
|
for (size_t i = 0; i < m_bufferSize; ++i)
|
|
247
248
|
{
|
|
248
|
-
linearState[i] = state[(
|
|
249
|
+
linearState[i] = state[(readStart + i) & m_headMask];
|
|
249
250
|
}
|
|
250
251
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
const float *state = getState();
|
|
255
|
-
std::vector<float> linearState(state, state + m_bufferSize);
|
|
256
|
-
return {linearState, m_head};
|
|
257
|
-
#endif
|
|
252
|
+
// Return 0 as stateIndex to indicate the next write should go to position 0
|
|
253
|
+
// (overwriting the oldest sample in the linear layout)
|
|
254
|
+
return {linearState, 0};
|
|
258
255
|
}
|
|
259
256
|
|
|
260
257
|
/**
|
|
261
258
|
* @brief Import state from linear format (oldest->newest) after deserialization
|
|
262
259
|
* @param linearState Linear state vector (oldest->newest)
|
|
263
|
-
* @param stateIndex State index (
|
|
260
|
+
* @param stateIndex State index (must be 0 for linear layout)
|
|
264
261
|
*/
|
|
265
262
|
void importLinearState(const std::vector<float> &linearState, size_t stateIndex)
|
|
266
263
|
{
|
|
267
|
-
#if defined(__ARM_NEON) || defined(__aarch64__)
|
|
268
264
|
float *state = getState();
|
|
269
265
|
|
|
270
|
-
// Copy linear state into circular buffer
|
|
266
|
+
// Copy the linear state directly into the circular buffer starting at position 0
|
|
267
|
+
// This creates a "linearized" layout where oldest is at 0, newest at numTaps-1
|
|
271
268
|
for (size_t i = 0; i < m_bufferSize && i < linearState.size(); ++i)
|
|
272
269
|
{
|
|
273
270
|
state[i] = linearState[i];
|
|
274
271
|
state[i + m_bufferSize] = linearState[i]; // Guard zone
|
|
275
272
|
}
|
|
276
273
|
|
|
277
|
-
// Set
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
//
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
}
|
|
286
|
-
m_head = stateIndex;
|
|
287
|
-
#endif
|
|
274
|
+
// Set m_head to point to the newest sample (last valid position)
|
|
275
|
+
// After import: oldest=0, newest=(numTaps-1)
|
|
276
|
+
// m_head should point to position (numTaps - 1)
|
|
277
|
+
// Next write will go to position numTaps, which is correct
|
|
278
|
+
m_head = (m_numTaps > 0) ? (m_numTaps - 1) : 0;
|
|
279
|
+
|
|
280
|
+
// Mark buffer as filled so we don't return zeros during transient phase
|
|
281
|
+
m_samplesProcessed = m_numTaps;
|
|
288
282
|
}
|
|
289
283
|
|
|
290
284
|
size_t getNumTaps() const { return m_numTaps; }
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <stdexcept>
|
|
9
9
|
#include <algorithm>
|
|
10
10
|
#include <complex>
|
|
11
|
+
#include "../vendors/eigen-3.4.0/Eigen/Core"
|
|
11
12
|
|
|
12
13
|
#ifndef M_PI
|
|
13
14
|
#define M_PI 3.14159265358979323846
|
|
@@ -717,6 +718,42 @@ namespace dsp
|
|
|
717
718
|
return IirFilter<T>(b_normalized, a_normalized, true);
|
|
718
719
|
}
|
|
719
720
|
|
|
721
|
+
// ========== Eigen-Accelerated Large Batch Processing ==========
|
|
722
|
+
|
|
723
|
+
template <typename T>
|
|
724
|
+
void IirFilter<T>::processLargeBatch(const T *input, T *output, size_t length, bool stateless)
|
|
725
|
+
{
|
|
726
|
+
// Threshold: Use Eigen for batches >= 8192 samples
|
|
727
|
+
constexpr size_t EIGEN_THRESHOLD = 8192;
|
|
728
|
+
|
|
729
|
+
if (length < EIGEN_THRESHOLD)
|
|
730
|
+
{
|
|
731
|
+
// Small batch: use existing optimized path
|
|
732
|
+
return process(input, output, length, stateless);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
// For IIR filters, recursive structure limits parallelization
|
|
736
|
+
// Eigen won't help much for stateful mode due to output dependencies
|
|
737
|
+
// Best approach: process in chunks for cache locality
|
|
738
|
+
if (stateless || !m_stateful)
|
|
739
|
+
{
|
|
740
|
+
// Stateless: delegate to existing implementation
|
|
741
|
+
return process(input, output, length, stateless);
|
|
742
|
+
}
|
|
743
|
+
else
|
|
744
|
+
{
|
|
745
|
+
// Stateful mode: process in cache-friendly chunks
|
|
746
|
+
// Each chunk maintains filter state continuity
|
|
747
|
+
constexpr size_t CHUNK_SIZE = 8192;
|
|
748
|
+
|
|
749
|
+
for (size_t offset = 0; offset < length; offset += CHUNK_SIZE)
|
|
750
|
+
{
|
|
751
|
+
size_t chunkLen = std::min(CHUNK_SIZE, length - offset);
|
|
752
|
+
process(input + offset, output + offset, chunkLen, false);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
720
757
|
// Explicit template instantiations
|
|
721
758
|
template class IirFilter<float>;
|
|
722
759
|
template class IirFilter<double>;
|
|
@@ -56,6 +56,17 @@ namespace dsp
|
|
|
56
56
|
*/
|
|
57
57
|
void process(const T *input, T *output, size_t length, bool stateless = false);
|
|
58
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Process large batch using Eigen for optimal vectorization
|
|
61
|
+
* Automatically dispatches to process() for small batches (< 8192)
|
|
62
|
+
* Uses Eigen vector operations for large batches (>= 8192)
|
|
63
|
+
* @param input Input samples
|
|
64
|
+
* @param output Output buffer (must be same size as input)
|
|
65
|
+
* @param length Number of samples
|
|
66
|
+
* @param stateless If true, ignores internal state
|
|
67
|
+
*/
|
|
68
|
+
void processLargeBatch(const T *input, T *output, size_t length, bool stateless = false);
|
|
69
|
+
|
|
59
70
|
/**
|
|
60
71
|
* Reset filter state (clear history)
|
|
61
72
|
*/
|