dspx 1.4.9 → 1.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dspx",
3
- "version": "1.4.9",
3
+ "version": "1.4.11",
4
4
  "description": "High-performance DSP library with native C++ acceleration and Redis state persistence",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -56,7 +56,6 @@
56
56
  "npm": ">=11.5.1"
57
57
  },
58
58
  "dependencies": {
59
- "cross-env": "^7.0.3",
60
59
  "node-addon-api": "^8.5.0",
61
60
  "node-gyp-build": "^4.8.4"
62
61
  },
Binary file
@@ -10,6 +10,7 @@
10
10
  #include <stdexcept>
11
11
  #include <algorithm>
12
12
  #include <numeric> // For std::inner_product (article optimization)
13
+ #include "../vendors/eigen-3.4.0/Eigen/Core"
13
14
 
14
15
  #ifndef M_PI
15
16
  #define M_PI 3.14159265358979323846
@@ -535,6 +536,70 @@ namespace dsp
535
536
  return FirFilter<T>(bandStop, true);
536
537
  }
537
538
 
539
+ // ========== Eigen-Accelerated Large Batch Processing ==========
540
+
541
+ template <typename T>
542
+ void FirFilter<T>::processLargeBatch(const T *input, T *output, size_t length, bool stateless)
543
+ {
544
+ // Threshold: Use Eigen for batches >= 8192 samples
545
+ // Below this, existing optimized code (NEON/scalar) is faster
546
+ constexpr size_t EIGEN_THRESHOLD = 8192;
547
+
548
+ if (length < EIGEN_THRESHOLD)
549
+ {
550
+ // Small batch: use existing optimized path
551
+ return process(input, output, length, stateless);
552
+ }
553
+
554
+ // Large batch: use Eigen for cache-blocking and vectorization
555
+ const size_t numCoeffs = m_coefficients.size();
556
+
557
+ if (stateless || !m_stateful)
558
+ {
559
+ // Stateless convolution using Eigen
560
+ // Map coefficient vector (const, no copy)
561
+ Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> h(
562
+ m_coefficients.data(), numCoeffs);
563
+
564
+ // Process each output sample
565
+ for (size_t n = 0; n < length; ++n)
566
+ {
567
+ // Determine valid window size
568
+ size_t validSize = std::min(n + 1, numCoeffs);
569
+ size_t startIdx = (n >= numCoeffs) ? (n - numCoeffs + 1) : 0;
570
+
571
+ // Map input window
572
+ Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> x(
573
+ input + startIdx, validSize);
574
+
575
+ // Compute dot product using Eigen (auto-vectorized)
576
+ if (validSize < numCoeffs)
577
+ {
578
+ // Partial window: zero-pad
579
+ output[n] = h.tail(validSize).dot(x);
580
+ }
581
+ else
582
+ {
583
+ // Full window
584
+ output[n] = h.dot(x.reverse());
585
+ }
586
+ }
587
+ }
588
+ else
589
+ {
590
+ // Stateful mode: maintain circular buffer while using Eigen
591
+ // Process in chunks for better cache locality
592
+ constexpr size_t CHUNK_SIZE = 4096;
593
+
594
+ for (size_t offset = 0; offset < length; offset += CHUNK_SIZE)
595
+ {
596
+ size_t chunkLen = std::min(CHUNK_SIZE, length - offset);
597
+ // Circular buffer management is already optimal
598
+ process(input + offset, output + offset, chunkLen, false);
599
+ }
600
+ }
601
+ }
602
+
538
603
  // Explicit template instantiations
539
604
  template class FirFilter<float>;
540
605
  template class FirFilter<double>;
@@ -57,6 +57,17 @@ namespace dsp
57
57
  */
58
58
  void process(const T *input, T *output, size_t length, bool stateless = false);
59
59
 
60
+ /**
61
+ * Process large batch using Eigen for optimal cache utilization
62
+ * Automatically dispatches to process() for small batches (< 8192)
63
+ * Uses Eigen matrix operations for large batches (>= 8192)
64
+ * @param input Input samples
65
+ * @param output Output buffer (must be same size as input)
66
+ * @param length Number of samples
67
+ * @param stateless If true, ignores internal state
68
+ */
69
+ void processLargeBatch(const T *input, T *output, size_t length, bool stateless = false);
70
+
60
71
  /**
61
72
  * Reset filter state (clear history)
62
73
  */
@@ -233,58 +233,52 @@ namespace dsp::core
233
233
  */
234
234
  std::pair<std::vector<float>, size_t> exportLinearState() const
235
235
  {
236
- #if defined(__ARM_NEON) || defined(__aarch64__)
237
236
  const float *state = getState();
238
237
  std::vector<float> linearState(m_bufferSize, 0.0f);
239
238
 
240
- // Calculate oldest sample position in circular buffer
241
- size_t oldestPos = (m_head >= m_numTaps - 1)
242
- ? m_head - (m_numTaps - 1)
243
- : m_head + m_bufferSize - (m_numTaps - 1);
239
+ // Extract the circular buffer into linear format (oldest->newest)
240
+ // m_head points to the position of the most recent sample
241
+ // The convolution reads from (m_head - numTaps + 1), which is the oldest sample position
242
+
243
+ // Calculate the read start position (oldest valid sample)
244
+ size_t readStart = (m_head + m_bufferSize - m_numTaps + 1) & m_headMask;
244
245
 
245
- // Un-rotate: copy from oldest->newest into linear array
246
+ // Copy samples in order: oldest->newest into linear positions [0..bufferSize-1]
246
247
  for (size_t i = 0; i < m_bufferSize; ++i)
247
248
  {
248
- linearState[i] = state[(oldestPos + i) & m_headMask];
249
+ linearState[i] = state[(readStart + i) & m_headMask];
249
250
  }
250
251
 
251
- return {linearState, m_numTaps - 1};
252
- #else
253
- // Scalar path: state is already linear
254
- const float *state = getState();
255
- std::vector<float> linearState(state, state + m_bufferSize);
256
- return {linearState, m_head};
257
- #endif
252
+ // Return 0 as stateIndex to indicate the next write should go to position 0
253
+ // (overwriting the oldest sample in the linear layout)
254
+ return {linearState, 0};
258
255
  }
259
256
 
260
257
  /**
261
258
  * @brief Import state from linear format (oldest->newest) after deserialization
262
259
  * @param linearState Linear state vector (oldest->newest)
263
- * @param stateIndex State index (number of valid samples - 1)
260
+ * @param stateIndex State index (must be 0 for linear layout)
264
261
  */
265
262
  void importLinearState(const std::vector<float> &linearState, size_t stateIndex)
266
263
  {
267
- #if defined(__ARM_NEON) || defined(__aarch64__)
268
264
  float *state = getState();
269
265
 
270
- // Copy linear state into circular buffer
266
+ // Copy the linear state directly into the circular buffer starting at position 0
267
+ // This creates a "linearized" layout where oldest is at 0, newest at numTaps-1
271
268
  for (size_t i = 0; i < m_bufferSize && i < linearState.size(); ++i)
272
269
  {
273
270
  state[i] = linearState[i];
274
271
  state[i + m_bufferSize] = linearState[i]; // Guard zone
275
272
  }
276
273
 
277
- // Set head to point where newest sample will be written
278
- m_head = stateIndex;
279
- #else
280
- // Scalar path: direct copy
281
- float *state = getState();
282
- for (size_t i = 0; i < m_bufferSize && i < linearState.size(); ++i)
283
- {
284
- state[i] = linearState[i];
285
- }
286
- m_head = stateIndex;
287
- #endif
274
+ // Set m_head to point to the newest sample (last valid position)
275
+ // After import: oldest=0, newest=(numTaps-1)
276
+ // m_head should point to position (numTaps - 1)
277
+ // Next write will go to position numTaps, which is correct
278
+ m_head = (m_numTaps > 0) ? (m_numTaps - 1) : 0;
279
+
280
+ // Mark buffer as filled so we don't return zeros during transient phase
281
+ m_samplesProcessed = m_numTaps;
288
282
  }
289
283
 
290
284
  size_t getNumTaps() const { return m_numTaps; }
@@ -8,6 +8,7 @@
8
8
  #include <stdexcept>
9
9
  #include <algorithm>
10
10
  #include <complex>
11
+ #include "../vendors/eigen-3.4.0/Eigen/Core"
11
12
 
12
13
  #ifndef M_PI
13
14
  #define M_PI 3.14159265358979323846
@@ -717,6 +718,42 @@ namespace dsp
717
718
  return IirFilter<T>(b_normalized, a_normalized, true);
718
719
  }
719
720
 
721
+ // ========== Eigen-Accelerated Large Batch Processing ==========
722
+
723
+ template <typename T>
724
+ void IirFilter<T>::processLargeBatch(const T *input, T *output, size_t length, bool stateless)
725
+ {
726
+ // Threshold: Use Eigen for batches >= 8192 samples
727
+ constexpr size_t EIGEN_THRESHOLD = 8192;
728
+
729
+ if (length < EIGEN_THRESHOLD)
730
+ {
731
+ // Small batch: use existing optimized path
732
+ return process(input, output, length, stateless);
733
+ }
734
+
735
+ // For IIR filters, recursive structure limits parallelization
736
+ // Eigen won't help much for stateful mode due to output dependencies
737
+ // Best approach: process in chunks for cache locality
738
+ if (stateless || !m_stateful)
739
+ {
740
+ // Stateless: delegate to existing implementation
741
+ return process(input, output, length, stateless);
742
+ }
743
+ else
744
+ {
745
+ // Stateful mode: process in cache-friendly chunks
746
+ // Each chunk maintains filter state continuity
747
+ constexpr size_t CHUNK_SIZE = 8192;
748
+
749
+ for (size_t offset = 0; offset < length; offset += CHUNK_SIZE)
750
+ {
751
+ size_t chunkLen = std::min(CHUNK_SIZE, length - offset);
752
+ process(input + offset, output + offset, chunkLen, false);
753
+ }
754
+ }
755
+ }
756
+
720
757
  // Explicit template instantiations
721
758
  template class IirFilter<float>;
722
759
  template class IirFilter<double>;
@@ -56,6 +56,17 @@ namespace dsp
56
56
  */
57
57
  void process(const T *input, T *output, size_t length, bool stateless = false);
58
58
 
59
+ /**
60
+ * Process large batch using Eigen for optimal vectorization
61
+ * Automatically dispatches to process() for small batches (< 8192)
62
+ * Uses Eigen vector operations for large batches (>= 8192)
63
+ * @param input Input samples
64
+ * @param output Output buffer (must be same size as input)
65
+ * @param length Number of samples
66
+ * @param stateless If true, ignores internal state
67
+ */
68
+ void processLargeBatch(const T *input, T *output, size_t length, bool stateless = false);
69
+
59
70
  /**
60
71
  * Reset filter state (clear history)
61
72
  */