dspx 1.0.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,466 @@
1
+ /**
2
+ * Short-Time Fourier Transform (STFT) Pipeline Stage
3
+ *
4
+ * Computes time-frequency representation using sliding window FFT/DFT.
5
+ * This is a STATEFUL operation that maintains circular buffers per channel.
6
+ *
7
+ * Features:
8
+ * - Leverages existing FftEngine for all FFT/DFT operations
9
+ * - Configurable window functions (Hann, Hamming, Blackman, Bartlett, None)
10
+ * - Multiple output formats (complex, magnitude, power, phase)
11
+ * - Automatic FFT vs DFT selection based on window size
12
+ * - Real and complex input support
13
+ * - Per-channel state management
14
+ * - SIMD-optimized magnitude/power/phase calculations
15
+ *
16
+ * Algorithm:
17
+ * 1. Maintain circular buffer per channel
18
+ * 2. When hop_size samples accumulated:
19
+ * a. Extract window_size samples
20
+ * b. Apply window function
21
+ * c. FFT/DFT → frequency domain
22
+ * d. Convert to requested output format
23
+ * 3. Output frequency bins
24
+ *
25
+ * Parameters:
26
+ * - windowSize: FFT window size (power of 2 recommended for FFT)
27
+ * - hopSize: Stride between windows (default: windowSize/2)
28
+ * - method: "fft" or "dft" (auto-selected if not specified)
29
+ * - type: "real" or "complex" input signal type
30
+ * - forward: true for forward transform, false for inverse
31
+ * - output: "complex", "magnitude", "power", or "phase"
32
+ * - window: "hann", "hamming", "blackman", "bartlett", or "none"
33
+ */
34
+
35
+ #pragma once
36
+
37
+ #include "../IDspStage.h"
38
+ #include "../core/FftEngine.h"
39
+ #include "../utils/CircularBufferArray.h"
40
+ #include "../utils/SimdOps.h"
41
+ #include <vector>
42
+ #include <complex>
43
+ #include <memory>
44
+ #include <stdexcept>
45
+ #include <cmath>
46
+ #include <algorithm>
47
+ #include <string>
48
+
49
+ #ifndef M_PI
50
+ #define M_PI 3.14159265358979323846
51
+ #endif
52
+
53
+ namespace dsp::adapters
54
+ {
55
+ class StftStage : public IDspStage
56
+ {
57
+ public:
58
+ /**
59
+ * @brief Constructs an STFT stage
60
+ * @param window_size FFT window size
61
+ * @param hop_size Number of samples to advance between windows
62
+ * @param method "fft" or "dft"
63
+ * @param type "real" or "complex"
64
+ * @param forward true for forward transform, false for inverse
65
+ * @param output "complex", "magnitude", "power", or "phase"
66
+ * @param window "hann", "hamming", "blackman", "bartlett", or "none"
67
+ */
68
+ explicit StftStage(
69
+ size_t window_size,
70
+ size_t hop_size,
71
+ const std::string &method,
72
+ const std::string &type,
73
+ bool forward,
74
+ const std::string &output,
75
+ const std::string &window)
76
+ : m_window_size(window_size),
77
+ m_hop_size(hop_size > 0 ? hop_size : window_size / 2),
78
+ m_method(method),
79
+ m_type(type),
80
+ m_forward(forward),
81
+ m_output(output),
82
+ m_window_type(window)
83
+ {
84
+ // Validate parameters
85
+ if (m_window_size == 0)
86
+ {
87
+ throw std::invalid_argument("STFT: window size must be greater than 0");
88
+ }
89
+
90
+ if (m_hop_size == 0 || m_hop_size > m_window_size)
91
+ {
92
+ throw std::invalid_argument("STFT: hop size must be between 1 and window_size");
93
+ }
94
+
95
+ if (m_method != "fft" && m_method != "dft")
96
+ {
97
+ throw std::invalid_argument("STFT: method must be 'fft' or 'dft'");
98
+ }
99
+
100
+ if (m_type != "real" && m_type != "complex")
101
+ {
102
+ throw std::invalid_argument("STFT: type must be 'real' or 'complex'");
103
+ }
104
+
105
+ if (m_output != "complex" && m_output != "magnitude" && m_output != "power" && m_output != "phase")
106
+ {
107
+ throw std::invalid_argument("STFT: output must be 'complex', 'magnitude', 'power', or 'phase'");
108
+ }
109
+
110
+ // Create FFT engine
111
+ m_fft_engine = std::make_unique<dsp::core::FftEngine<float>>(m_window_size);
112
+
113
+ // Check if FFT is valid for this size
114
+ if (m_method == "fft" && !m_fft_engine->isPowerOfTwo())
115
+ {
116
+ throw std::invalid_argument("STFT: FFT requires power-of-2 window size. Use DFT or adjust window size.");
117
+ }
118
+
119
+ // Pre-compute window function
120
+ generateWindowFunction();
121
+
122
+ // Allocate working buffers
123
+ m_window_data.resize(m_window_size);
124
+ m_fft_input.resize(m_window_size);
125
+ m_fft_output.resize(m_window_size);
126
+
127
+ // Calculate output size based on type
128
+ if (m_type == "real")
129
+ {
130
+ m_output_size = m_fft_engine->getHalfSize(); // N/2+1 for real inputs
131
+ }
132
+ else
133
+ {
134
+ m_output_size = m_window_size; // N for complex inputs
135
+ }
136
+ }
137
+
138
+ const char *getType() const override
139
+ {
140
+ return "stft";
141
+ }
142
+
143
+ void process(float *buffer, size_t numSamples, int numChannels, const float *timestamps = nullptr) override
144
+ {
145
+ // Lazy initialization of channel buffers
146
+ if (m_channel_buffers.size() != static_cast<size_t>(numChannels))
147
+ {
148
+ m_channel_buffers.clear();
149
+ m_samples_since_output.clear();
150
+
151
+ for (int i = 0; i < numChannels; ++i)
152
+ {
153
+ m_channel_buffers.emplace_back(m_window_size);
154
+ m_samples_since_output.push_back(0);
155
+ }
156
+ }
157
+
158
+ // Temporary output buffer
159
+ std::vector<float> output_buffer;
160
+ output_buffer.reserve(numSamples * m_output_size); // May expand due to frequency bins
161
+
162
+ // Process sample by sample (interleaved processing)
163
+ for (size_t i = 0; i < numSamples; ++i)
164
+ {
165
+ int channel = i % numChannels;
166
+ float sample = buffer[i];
167
+
168
+ // Add sample to channel's circular buffer
169
+ m_channel_buffers[channel].push(sample);
170
+ m_samples_since_output[channel]++;
171
+
172
+ // Check if it's time to compute STFT for this channel
173
+ if (m_channel_buffers[channel].getCount() >= m_window_size &&
174
+ m_samples_since_output[channel] >= m_hop_size)
175
+ {
176
+ // Compute STFT for this window
177
+ computeStft(channel, output_buffer);
178
+
179
+ // Reset hop counter
180
+ m_samples_since_output[channel] = 0;
181
+ }
182
+ }
183
+
184
+ // Copy output back to buffer (truncate or pad as needed)
185
+ size_t output_size = std::min(output_buffer.size(), numSamples);
186
+ std::copy(output_buffer.begin(), output_buffer.begin() + output_size, buffer);
187
+
188
+ // Pad with zeros if output is smaller
189
+ if (output_size < numSamples)
190
+ {
191
+ std::fill(buffer + output_size, buffer + numSamples, 0.0f);
192
+ }
193
+ }
194
+
195
+ Napi::Object serializeState(Napi::Env env) const override
196
+ {
197
+ Napi::Object state = Napi::Object::New(env);
198
+ state.Set("windowSize", Napi::Number::New(env, m_window_size));
199
+ state.Set("hopSize", Napi::Number::New(env, m_hop_size));
200
+ state.Set("method", Napi::String::New(env, m_method));
201
+ state.Set("type", Napi::String::New(env, m_type));
202
+ state.Set("forward", Napi::Boolean::New(env, m_forward));
203
+ state.Set("output", Napi::String::New(env, m_output));
204
+ state.Set("window", Napi::String::New(env, m_window_type));
205
+ state.Set("numChannels", Napi::Number::New(env, m_channel_buffers.size()));
206
+
207
+ // Serialize each channel's buffer
208
+ Napi::Array channelsArray = Napi::Array::New(env, m_channel_buffers.size());
209
+ for (size_t i = 0; i < m_channel_buffers.size(); ++i)
210
+ {
211
+ Napi::Object channelState = Napi::Object::New(env);
212
+
213
+ // Get buffer data
214
+ std::vector<float> buffer_data = m_channel_buffers[i].toVector();
215
+ Napi::Array bufferArray = Napi::Array::New(env, buffer_data.size());
216
+ for (size_t j = 0; j < buffer_data.size(); ++j)
217
+ {
218
+ bufferArray.Set(j, Napi::Number::New(env, buffer_data[j]));
219
+ }
220
+
221
+ channelState.Set("buffer", bufferArray);
222
+ channelState.Set("samplesSinceOutput", Napi::Number::New(env, m_samples_since_output[i]));
223
+ channelsArray.Set(static_cast<uint32_t>(i), channelState);
224
+ }
225
+ state.Set("channels", channelsArray);
226
+ return state;
227
+ }
228
+
229
+ void deserializeState(const Napi::Object &state) override
230
+ {
231
+ size_t windowSize = state.Get("windowSize").As<Napi::Number>().Uint32Value();
232
+ size_t hopSize = state.Get("hopSize").As<Napi::Number>().Uint32Value();
233
+
234
+ if (windowSize != m_window_size || hopSize != m_hop_size)
235
+ {
236
+ throw std::runtime_error("Window/hop size mismatch during deserialization");
237
+ }
238
+
239
+ uint32_t numChannels = state.Get("channels").As<Napi::Array>().Length();
240
+
241
+ // Recreate channel buffers
242
+ m_channel_buffers.clear();
243
+ m_samples_since_output.clear();
244
+
245
+ for (uint32_t i = 0; i < numChannels; ++i)
246
+ {
247
+ m_channel_buffers.emplace_back(m_window_size);
248
+ m_samples_since_output.push_back(0);
249
+ }
250
+
251
+ // Restore each channel's state
252
+ Napi::Array channelsArray = state.Get("channels").As<Napi::Array>();
253
+ for (uint32_t i = 0; i < numChannels; ++i)
254
+ {
255
+ Napi::Object channelState = channelsArray.Get(i).As<Napi::Object>();
256
+
257
+ // Restore buffer data
258
+ Napi::Array bufferArray = channelState.Get("buffer").As<Napi::Array>();
259
+ for (uint32_t j = 0; j < bufferArray.Length(); ++j)
260
+ {
261
+ float value = bufferArray.Get(j).As<Napi::Number>().FloatValue();
262
+ m_channel_buffers[i].push(value);
263
+ }
264
+
265
+ // Restore counter
266
+ m_samples_since_output[i] = channelState.Get("samplesSinceOutput").As<Napi::Number>().Uint32Value();
267
+ }
268
+ }
269
+
270
+ void reset() override
271
+ {
272
+ for (auto &buffer : m_channel_buffers)
273
+ {
274
+ buffer.clear();
275
+ }
276
+ std::fill(m_samples_since_output.begin(), m_samples_since_output.end(), 0);
277
+ }
278
+
279
+ private:
280
+ /**
281
+ * Generate window function coefficients
282
+ */
283
+ void generateWindowFunction()
284
+ {
285
+ m_window_function.resize(m_window_size);
286
+
287
+ if (m_window_type == "none")
288
+ {
289
+ // Rectangular window
290
+ std::fill(m_window_function.begin(), m_window_function.end(), 1.0f);
291
+ }
292
+ else if (m_window_type == "hann")
293
+ {
294
+ // Hann window: 0.5 * (1 - cos(2π*n/(N-1)))
295
+ for (size_t n = 0; n < m_window_size; ++n)
296
+ {
297
+ m_window_function[n] = 0.5f * (1.0f - std::cos(2.0f * M_PI * n / (m_window_size - 1)));
298
+ }
299
+ }
300
+ else if (m_window_type == "hamming")
301
+ {
302
+ // Hamming window: 0.54 - 0.46 * cos(2π*n/(N-1))
303
+ for (size_t n = 0; n < m_window_size; ++n)
304
+ {
305
+ m_window_function[n] = 0.54f - 0.46f * std::cos(2.0f * M_PI * n / (m_window_size - 1));
306
+ }
307
+ }
308
+ else if (m_window_type == "blackman")
309
+ {
310
+ // Blackman window: 0.42 - 0.5*cos(2π*n/(N-1)) + 0.08*cos(4π*n/(N-1))
311
+ for (size_t n = 0; n < m_window_size; ++n)
312
+ {
313
+ float cos1 = std::cos(2.0f * M_PI * n / (m_window_size - 1));
314
+ float cos2 = std::cos(4.0f * M_PI * n / (m_window_size - 1));
315
+ m_window_function[n] = 0.42f - 0.5f * cos1 + 0.08f * cos2;
316
+ }
317
+ }
318
+ else if (m_window_type == "bartlett")
319
+ {
320
+ // Bartlett (triangular) window: 1 - |2n/(N-1) - 1|
321
+ for (size_t n = 0; n < m_window_size; ++n)
322
+ {
323
+ m_window_function[n] = 1.0f - std::abs(2.0f * n / (m_window_size - 1) - 1.0f);
324
+ }
325
+ }
326
+ else
327
+ {
328
+ throw std::invalid_argument("STFT: Unknown window type '" + m_window_type + "'");
329
+ }
330
+ }
331
+
332
+ /**
333
+ * Compute STFT for current window in specified channel
334
+ */
335
+ void computeStft(size_t channel, std::vector<float> &output)
336
+ {
337
+ // Get current window data
338
+ m_window_data = m_channel_buffers[channel].toVector();
339
+
340
+ // Ensure we have enough data
341
+ if (m_window_data.size() < m_window_size)
342
+ {
343
+ // Pad with zeros if needed
344
+ m_window_data.resize(m_window_size, 0.0f);
345
+ }
346
+
347
+ // Apply window function
348
+ for (size_t i = 0; i < m_window_size; ++i)
349
+ {
350
+ m_window_data[i] *= m_window_function[i];
351
+ }
352
+
353
+ // Perform FFT/DFT using FftEngine
354
+ if (m_type == "real")
355
+ {
356
+ // Real input → use RFFT/RDFT
357
+ if (m_method == "fft")
358
+ {
359
+ m_fft_engine->rfft(m_window_data.data(), m_fft_output.data());
360
+ }
361
+ else
362
+ {
363
+ m_fft_engine->rdft(m_window_data.data(), m_fft_output.data());
364
+ }
365
+ }
366
+ else
367
+ {
368
+ // Complex input → use FFT/DFT
369
+ // Convert real to complex
370
+ for (size_t i = 0; i < m_window_size; ++i)
371
+ {
372
+ m_fft_input[i] = std::complex<float>(m_window_data[i], 0.0f);
373
+ }
374
+
375
+ if (m_method == "fft")
376
+ {
377
+ if (m_forward)
378
+ {
379
+ m_fft_engine->fft(m_fft_input.data(), m_fft_output.data());
380
+ }
381
+ else
382
+ {
383
+ m_fft_engine->ifft(m_fft_input.data(), m_fft_output.data());
384
+ }
385
+ }
386
+ else
387
+ {
388
+ if (m_forward)
389
+ {
390
+ m_fft_engine->dft(m_fft_input.data(), m_fft_output.data());
391
+ }
392
+ else
393
+ {
394
+ m_fft_engine->idft(m_fft_input.data(), m_fft_output.data());
395
+ }
396
+ }
397
+ }
398
+
399
+ // Convert to requested output format and append to output buffer
400
+ convertOutput(output);
401
+ }
402
+
403
+ /**
404
+ * Convert FFT output to requested format
405
+ */
406
+ void convertOutput(std::vector<float> &output)
407
+ {
408
+ if (m_output == "complex")
409
+ {
410
+ // Output real and imaginary parts interleaved
411
+ for (size_t i = 0; i < m_output_size; ++i)
412
+ {
413
+ output.push_back(m_fft_output[i].real());
414
+ output.push_back(m_fft_output[i].imag());
415
+ }
416
+ }
417
+ else if (m_output == "magnitude")
418
+ {
419
+ // Compute magnitude: |X[k]| = sqrt(Re² + Im²)
420
+ std::vector<float> magnitudes(m_output_size);
421
+ m_fft_engine->getMagnitude(m_fft_output.data(), magnitudes.data(), m_output_size);
422
+ output.insert(output.end(), magnitudes.begin(), magnitudes.end());
423
+ }
424
+ else if (m_output == "power")
425
+ {
426
+ // Compute power: |X[k]|²
427
+ std::vector<float> power(m_output_size);
428
+ m_fft_engine->getPower(m_fft_output.data(), power.data(), m_output_size);
429
+ output.insert(output.end(), power.begin(), power.end());
430
+ }
431
+ else if (m_output == "phase")
432
+ {
433
+ // Compute phase: atan2(Im, Re)
434
+ std::vector<float> phases(m_output_size);
435
+ m_fft_engine->getPhase(m_fft_output.data(), phases.data(), m_output_size);
436
+ output.insert(output.end(), phases.begin(), phases.end());
437
+ }
438
+ }
439
+
440
+ // Configuration
441
+ size_t m_window_size;
442
+ size_t m_hop_size;
443
+ std::string m_method; // "fft" or "dft"
444
+ std::string m_type; // "real" or "complex"
445
+ bool m_forward; // true for forward, false for inverse
446
+ std::string m_output; // "complex", "magnitude", "power", "phase"
447
+ std::string m_window_type; // "hann", "hamming", "blackman", "bartlett", "none"
448
+ size_t m_output_size; // Number of frequency bins per frame
449
+
450
+ // FFT engine
451
+ std::unique_ptr<dsp::core::FftEngine<float>> m_fft_engine;
452
+
453
+ // Window function coefficients
454
+ std::vector<float> m_window_function;
455
+
456
+ // Working buffers
457
+ std::vector<float> m_window_data;
458
+ std::vector<std::complex<float>> m_fft_input;
459
+ std::vector<std::complex<float>> m_fft_output;
460
+
461
+ // Per-channel state
462
+ std::vector<dsp::utils::CircularBufferArray<float>> m_channel_buffers;
463
+ std::vector<size_t> m_samples_since_output;
464
+ };
465
+
466
+ } // namespace dsp::adapters
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Discrete Cosine Transform (DCT) Engine
3
+ *
4
+ * Implements Type-II DCT (most common in audio processing):
5
+ * X[k] = sum_{n=0}^{N-1} x[n] * cos(π * k * (n + 0.5) / N)
6
+ *
7
+ * Features:
8
+ * - Pre-computed cosine table for performance
9
+ * - Forward DCT (time → frequency)
10
+ * - Inverse DCT (frequency → time)
11
+ * - Optimized for MFCC coefficient extraction
12
+ *
13
+ * The DCT is used in MFCC computation as the final step to:
14
+ * 1. Decorrelate Mel energies
15
+ * 2. Compress information into lower-order coefficients
16
+ * 3. Provide compact representation suitable for ML models
17
+ */
18
+
19
+ #ifndef DSP_CORE_DCT_ENGINE_H
20
+ #define DSP_CORE_DCT_ENGINE_H
21
+
22
+ #include <vector>
23
+ #include <cmath>
24
+ #include <stdexcept>
25
+
26
+ #ifndef M_PI
27
+ #define M_PI 3.14159265358979323846
28
+ #endif
29
+
30
+ namespace dsp
31
+ {
32
+ namespace core
33
+ {
34
+
35
+ template <typename T = float>
36
+ class DctEngine
37
+ {
38
+ public:
39
+ /**
40
+ * Constructor
41
+ * @param size DCT size (number of input/output coefficients)
42
+ */
43
+ explicit DctEngine(size_t size);
44
+
45
+ ~DctEngine() = default;
46
+
47
+ /**
48
+ * Forward DCT Type-II
49
+ * Transforms time-domain signal to DCT coefficients
50
+ *
51
+ * @param input Input signal (size N)
52
+ * @param output DCT coefficients (size N)
53
+ */
54
+ void dct(const T *input, T *output);
55
+
56
+ /**
57
+ * Inverse DCT Type-III (inverse of Type-II)
58
+ * Transforms DCT coefficients back to time-domain
59
+ *
60
+ * @param input DCT coefficients (size N)
61
+ * @param output Time-domain signal (size N)
62
+ */
63
+ void idct(const T *input, T *output);
64
+
65
+ /**
66
+ * Get DCT size
67
+ */
68
+ size_t getSize() const { return m_size; }
69
+
70
+ private:
71
+ size_t m_size; // DCT size
72
+
73
+ // Pre-computed cosine table for DCT
74
+ // cosTable[k][n] = cos(π * k * (n + 0.5) / N)
75
+ std::vector<std::vector<T>> m_cosineTable;
76
+
77
+ /**
78
+ * Initialize cosine lookup table
79
+ */
80
+ void initCosineTable();
81
+ };
82
+
83
+ // ========== Implementation ==========
84
+
85
+ template <typename T>
86
+ DctEngine<T>::DctEngine(size_t size)
87
+ : m_size(size)
88
+ {
89
+ if (size == 0)
90
+ {
91
+ throw std::invalid_argument("DCT size must be > 0");
92
+ }
93
+
94
+ // Pre-compute cosine table
95
+ initCosineTable();
96
+ }
97
+
98
+ template <typename T>
99
+ void DctEngine<T>::initCosineTable()
100
+ {
101
+ m_cosineTable.resize(m_size);
102
+
103
+ const T pi = static_cast<T>(M_PI);
104
+ const T N = static_cast<T>(m_size);
105
+
106
+ for (size_t k = 0; k < m_size; ++k)
107
+ {
108
+ m_cosineTable[k].resize(m_size);
109
+ for (size_t n = 0; n < m_size; ++n)
110
+ {
111
+ // DCT-II formula: cos(π * k * (n + 0.5) / N)
112
+ m_cosineTable[k][n] = std::cos(pi * static_cast<T>(k) * (static_cast<T>(n) + 0.5) / N);
113
+ }
114
+ }
115
+ }
116
+
117
+ template <typename T>
118
+ void DctEngine<T>::dct(const T *input, T *output)
119
+ {
120
+ // DCT-II: X[k] = sum_{n=0}^{N-1} x[n] * cos(π * k * (n + 0.5) / N)
121
+ const T sqrt2 = std::sqrt(static_cast<T>(2.0));
122
+ const T sqrtN = std::sqrt(static_cast<T>(m_size));
123
+
124
+ for (size_t k = 0; k < m_size; ++k)
125
+ {
126
+ T sum = 0;
127
+
128
+ for (size_t n = 0; n < m_size; ++n)
129
+ {
130
+ sum += input[n] * m_cosineTable[k][n];
131
+ }
132
+
133
+ // Orthonormal scaling
134
+ if (k == 0)
135
+ {
136
+ output[k] = sum / sqrtN;
137
+ }
138
+ else
139
+ {
140
+ output[k] = sum * sqrt2 / sqrtN;
141
+ }
142
+ }
143
+ }
144
+
145
+ template <typename T>
146
+ void DctEngine<T>::idct(const T *input, T *output)
147
+ {
148
+ // DCT-III (inverse of DCT-II): x[n] = sum_{k=0}^{N-1} X[k] * cos(π * k * (n + 0.5) / N)
149
+ const T sqrt2 = std::sqrt(static_cast<T>(2.0));
150
+ const T sqrtN = std::sqrt(static_cast<T>(m_size));
151
+
152
+ for (size_t n = 0; n < m_size; ++n)
153
+ {
154
+ T sum = input[0] / sqrtN; // DC component
155
+
156
+ for (size_t k = 1; k < m_size; ++k)
157
+ {
158
+ sum += input[k] * sqrt2 / sqrtN * m_cosineTable[k][n];
159
+ }
160
+
161
+ output[n] = sum;
162
+ }
163
+ }
164
+
165
+ // Explicit template instantiations
166
+ template class DctEngine<float>;
167
+ template class DctEngine<double>;
168
+
169
+ } // namespace core
170
+ } // namespace dsp
171
+
172
+ #endif // DSP_CORE_DCT_ENGINE_H