dspx 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ #pragma once
2
+
3
+ #include "../IDspStage.h"
4
+ #include "../utils/Toon.h"
5
+ #include "../utils/SimdOps.h"
6
+ #include <stdexcept>
7
+ #include <string>
8
+ #include <cmath>
9
+
10
+ namespace dsp::adapters
11
+ {
12
+ /**
13
+ * @brief Square Stage - Computes element-wise squaring of a signal.
14
+ *
15
+ * Implements: y[n] = x[n]^2
16
+ *
17
+ * **Use Cases:**
18
+ * - Energy calculation (signal power)
19
+ * - Non-linear signal transformation
20
+ * - Envelope detection
21
+ * - Part of Pan-Tompkins QRS detection algorithm
22
+ *
23
+ * **Note:** Squaring amplifies large values and suppresses small ones.
24
+ * This stage is stateless - no mode selection needed.
25
+ */
26
+ class SquareStage : public IDspStage
27
+ {
28
+ public:
29
+ SquareStage()
30
+ {
31
+ // No parameters needed - stateless operation
32
+ }
33
+
34
+ const char *getType() const override
35
+ {
36
+ return "square";
37
+ }
38
+
39
+ void process(float *buffer, size_t numSamples, int numChannels, const float *timestamps = nullptr) override
40
+ {
41
+ // Stateless squaring operation
42
+ dsp::simd::square_inplace(buffer, numSamples);
43
+ }
44
+
45
+ Napi::Object serializeState(Napi::Env env) const override
46
+ {
47
+ Napi::Object state = Napi::Object::New(env);
48
+ // No state to serialize - squaring is stateless
49
+ return state;
50
+ }
51
+
52
+ void deserializeState(const Napi::Object &state) override
53
+ {
54
+ // No state to deserialize
55
+ }
56
+
57
+ void reset() override
58
+ {
59
+ // No state to reset
60
+ }
61
+
62
+ void serializeToon(dsp::toon::Serializer &s) const override
63
+ {
64
+ // No state to serialize
65
+ }
66
+
67
+ void deserializeToon(dsp::toon::Deserializer &d) override
68
+ {
69
+ // No state to deserialize
70
+ }
71
+
72
+ bool isResizing() const override { return false; }
73
+
74
+ private:
75
+ // No member variables - stateless operation
76
+ };
77
+
78
+ } // namespace dsp::adapters
@@ -71,8 +71,9 @@ namespace dsp
71
71
  for (size_t i = 1; i < m_b_coeffs.size(); ++i)
72
72
  {
73
73
  // x_state stores x[n-1], x[n-2], ..., x[n-M]
74
- // Read backwards: x[n-i] is at position (m_x_index - (i-1)) & m_x_mask
75
- size_t idx = (m_x_index - (i - 1)) & m_x_mask;
74
+ // Read backwards: x[n-i] is at position (m_x_index + m_x_mask + 1 - (i-1)) & m_x_mask
75
+ // Adding buffer size before subtraction prevents underflow
76
+ size_t idx = (m_x_index + m_x_mask + 1 - (i - 1)) & m_x_mask;
76
77
  output += m_b_coeffs[i] * m_x_state[idx];
77
78
  }
78
79
 
@@ -80,8 +81,9 @@ namespace dsp
80
81
  for (size_t i = 0; i < m_a_coeffs.size(); ++i)
81
82
  {
82
83
  // y_state stores y[n-1], y[n-2], ..., y[n-N]
83
- // Read backwards: y[n-(i+1)] is at position (m_y_index - i) & m_y_mask
84
- size_t idx = (m_y_index - i) & m_y_mask;
84
+ // Read backwards: y[n-(i+1)] is at position (m_y_index + m_y_mask + 1 - i) & m_y_mask
85
+ // Adding buffer size before subtraction prevents underflow
86
+ size_t idx = (m_y_index + m_y_mask + 1 - i) & m_y_mask;
85
87
  output -= m_a_coeffs[i] * m_y_state[idx];
86
88
  }
87
89
 
@@ -131,14 +133,14 @@ namespace dsp
131
133
  T y = m_b_coeffs[0] * input[n];
132
134
  for (size_t i = 1; i < m_b_coeffs.size(); ++i)
133
135
  {
134
- size_t idx = (x_idx - (i - 1)) & x_mask;
136
+ size_t idx = (x_idx + x_mask + 1 - (i - 1)) & x_mask;
135
137
  y += m_b_coeffs[i] * x_temp[idx];
136
138
  }
137
139
 
138
140
  // Feedback
139
141
  for (size_t i = 0; i < m_a_coeffs.size(); ++i)
140
142
  {
141
- size_t idx = (y_idx - i) & y_mask;
143
+ size_t idx = (y_idx + y_mask + 1 - i) & y_mask;
142
144
  y -= m_a_coeffs[i] * y_temp[idx];
143
145
  }
144
146
 
@@ -162,14 +164,14 @@ namespace dsp
162
164
  T y = m_b_coeffs[0] * input[n];
163
165
  for (size_t i = 1; i < m_b_coeffs.size(); ++i)
164
166
  {
165
- size_t idx = (m_x_index - (i - 1)) & m_x_mask;
167
+ size_t idx = (m_x_index + m_x_mask + 1 - (i - 1)) & m_x_mask;
166
168
  y += m_b_coeffs[i] * m_x_state[idx];
167
169
  }
168
170
 
169
171
  // Feedback
170
172
  for (size_t i = 0; i < m_a_coeffs.size(); ++i)
171
173
  {
172
- size_t idx = (m_y_index - i) & m_y_mask;
174
+ size_t idx = (m_y_index + m_y_mask + 1 - i) & m_y_mask;
173
175
  y -= m_a_coeffs[i] * m_y_state[idx];
174
176
  }
175
177
 
@@ -1362,4 +1362,71 @@ namespace dsp::simd
1362
1362
  }
1363
1363
 
1364
1364
  // SIMD_X86 removed since SIMD_SSE3 covers it for most of the modern devices
1365
+ /**
1366
+ * @brief Apply square (x^2) in-place.
1367
+ * Essential for Pan-Tompkins QRS detection to amplify signal peaks.
1368
+ * @param buffer Input/output buffer (modified in-place)
1369
+ * @param size Number of elements
1370
+ */
1371
+ inline void square_inplace(float *buffer, size_t size)
1372
+ {
1373
+ #if defined(SIMD_AVX2)
1374
+ const size_t simd_width = 8;
1375
+ const size_t simd_count = size / simd_width;
1376
+ const size_t simd_end = simd_count * simd_width;
1377
+
1378
+ for (size_t i = 0; i < simd_end; i += simd_width)
1379
+ {
1380
+ __m256 values = _mm256_loadu_ps(&buffer[i]);
1381
+ __m256 result = _mm256_mul_ps(values, values); // x * x
1382
+ _mm256_storeu_ps(&buffer[i], result);
1383
+ }
1384
+
1385
+ for (size_t i = simd_end; i < size; ++i)
1386
+ {
1387
+ buffer[i] = buffer[i] * buffer[i];
1388
+ }
1389
+
1390
+ #elif defined(SIMD_SSE2)
1391
+ const size_t simd_width = 4;
1392
+ const size_t simd_count = size / simd_width;
1393
+ const size_t simd_end = simd_count * simd_width;
1394
+
1395
+ for (size_t i = 0; i < simd_end; i += simd_width)
1396
+ {
1397
+ __m128 values = _mm_loadu_ps(&buffer[i]);
1398
+ __m128 result = _mm_mul_ps(values, values);
1399
+ _mm_storeu_ps(&buffer[i], result);
1400
+ }
1401
+
1402
+ for (size_t i = simd_end; i < size; ++i)
1403
+ {
1404
+ buffer[i] = buffer[i] * buffer[i];
1405
+ }
1406
+
1407
+ #elif defined(SIMD_NEON)
1408
+ const size_t simd_width = 4;
1409
+ const size_t simd_count = size / simd_width;
1410
+ const size_t simd_end = simd_count * simd_width;
1411
+
1412
+ for (size_t i = 0; i < simd_end; i += simd_width)
1413
+ {
1414
+ float32x4_t values = vld1q_f32(&buffer[i]);
1415
+ float32x4_t result = vmulq_f32(values, values);
1416
+ vst1q_f32(&buffer[i], result);
1417
+ }
1418
+
1419
+ for (size_t i = simd_end; i < size; ++i)
1420
+ {
1421
+ buffer[i] = buffer[i] * buffer[i];
1422
+ }
1423
+
1424
+ #else
1425
+ // Scalar fallback
1426
+ for (size_t i = 0; i < size; ++i)
1427
+ {
1428
+ buffer[i] = buffer[i] * buffer[i];
1429
+ }
1430
+ #endif
1431
+ }
1365
1432
  } // namespace dsp::simd