dspx 1.2.4 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -78
- package/binding.gyp +10 -0
- package/dist/FilterBankDesign.d.ts +233 -0
- package/dist/FilterBankDesign.d.ts.map +1 -0
- package/dist/FilterBankDesign.js +247 -0
- package/dist/FilterBankDesign.js.map +1 -0
- package/dist/advanced-dsp.d.ts +6 -6
- package/dist/advanced-dsp.d.ts.map +1 -1
- package/dist/advanced-dsp.js +35 -12
- package/dist/advanced-dsp.js.map +1 -1
- package/dist/backends.d.ts +0 -103
- package/dist/backends.d.ts.map +1 -1
- package/dist/backends.js +0 -217
- package/dist/backends.js.map +1 -1
- package/dist/bindings.d.ts +270 -17
- package/dist/bindings.d.ts.map +1 -1
- package/dist/bindings.js +566 -43
- package/dist/bindings.js.map +1 -1
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +67 -8
- package/dist/types.d.ts.map +1 -1
- package/dist/utils.d.ts +38 -8
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +84 -26
- package/dist/utils.js.map +1 -1
- package/package.json +1 -2
- package/prebuilds/win32-x64/dspx.node +0 -0
- package/scripts/add-dispose-to-tests.js +145 -0
- package/src/native/DspPipeline.cc +699 -126
- package/src/native/DspPipeline.h +13 -0
- package/src/native/FilterBankDesignBindings.cc +241 -0
- package/src/native/IDspStage.h +24 -0
- package/src/native/UtilityBindings.cc +130 -0
- package/src/native/adapters/AmplifyStage.h +148 -0
- package/src/native/adapters/ClipDetectionStage.h +15 -4
- package/src/native/adapters/ConvolutionStage.h +101 -0
- package/src/native/adapters/CumulativeMovingAverageStage.h +264 -0
- package/src/native/adapters/DecimatorStage.h +80 -0
- package/src/native/adapters/DifferentiatorStage.h +13 -0
- package/src/native/adapters/ExponentialMovingAverageStage.h +290 -0
- package/src/native/adapters/FilterBankStage.cc +336 -0
- package/src/native/adapters/FilterBankStage.h +170 -0
- package/src/native/adapters/FilterStage.cc +122 -0
- package/src/native/adapters/FilterStage.h +4 -0
- package/src/native/adapters/HilbertEnvelopeStage.h +55 -0
- package/src/native/adapters/IntegratorStage.h +15 -0
- package/src/native/adapters/InterpolatorStage.h +51 -0
- package/src/native/adapters/LinearRegressionStage.h +40 -0
- package/src/native/adapters/LmsStage.h +63 -0
- package/src/native/adapters/MeanAbsoluteValueStage.h +76 -0
- package/src/native/adapters/MovingAverageStage.h +119 -0
- package/src/native/adapters/PeakDetectionStage.h +53 -0
- package/src/native/adapters/RectifyStage.h +14 -0
- package/src/native/adapters/ResamplerStage.h +67 -0
- package/src/native/adapters/RlsStage.h +76 -0
- package/src/native/adapters/RmsStage.h +72 -0
- package/src/native/adapters/SnrStage.h +45 -0
- package/src/native/adapters/SquareStage.h +78 -0
- package/src/native/adapters/SscStage.h +65 -0
- package/src/native/adapters/StftStage.h +62 -0
- package/src/native/adapters/VarianceStage.h +59 -0
- package/src/native/adapters/WampStage.h +59 -0
- package/src/native/adapters/WaveformLengthStage.h +51 -0
- package/src/native/adapters/ZScoreNormalizeStage.h +64 -0
- package/src/native/core/CumulativeMovingAverageFilter.h +123 -0
- package/src/native/core/ExponentialMovingAverageFilter.h +129 -0
- package/src/native/core/FilterBankDesign.h +266 -0
- package/src/native/core/Policies.h +124 -0
- package/src/native/utils/CircularBufferArray.cc +2 -1
- package/src/native/utils/SimdOps.h +67 -0
- package/src/native/utils/Toon.h +195 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "IirFilter.h"
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <cmath>
|
|
7
|
+
#include <stdexcept>
|
|
8
|
+
#include <algorithm>
|
|
9
|
+
|
|
10
|
+
namespace dsp
|
|
11
|
+
{
|
|
12
|
+
namespace core
|
|
13
|
+
{
|
|
14
|
+
/**
|
|
15
|
+
* Filter coefficients structure for a single filter
|
|
16
|
+
*/
|
|
17
|
+
struct FilterCoefficients
|
|
18
|
+
{
|
|
19
|
+
std::vector<float> b; // Numerator coefficients
|
|
20
|
+
std::vector<float> a; // Denominator coefficients
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Filter Bank Design Engine
|
|
25
|
+
*
|
|
26
|
+
* Generates sets of bandpass filters covering a frequency range according to
|
|
27
|
+
* psychoacoustic (Mel, Bark) or mathematical (Linear, Log) scales.
|
|
28
|
+
*
|
|
29
|
+
* This is a stateless utility that performs frequency warping and filter design
|
|
30
|
+
* without maintaining any processing state.
|
|
31
|
+
*/
|
|
32
|
+
class FilterBankDesign
|
|
33
|
+
{
|
|
34
|
+
public:
|
|
35
|
+
enum class Scale
|
|
36
|
+
{
|
|
37
|
+
Linear, // Linear spacing in Hz
|
|
38
|
+
Log, // Logarithmic spacing
|
|
39
|
+
Mel, // Mel scale (mimics human hearing)
|
|
40
|
+
Bark // Bark scale (critical band rate)
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
enum class Type
|
|
44
|
+
{
|
|
45
|
+
Butterworth, // Maximally flat passband
|
|
46
|
+
Chebyshev1 // Equiripple passband
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Filter bank design options
|
|
51
|
+
*/
|
|
52
|
+
struct DesignOptions
|
|
53
|
+
{
|
|
54
|
+
Scale scale; // Frequency spacing scale
|
|
55
|
+
Type type; // Filter topology
|
|
56
|
+
int count; // Number of bands
|
|
57
|
+
double sampleRate; // Sample rate in Hz
|
|
58
|
+
double minFreq; // Minimum frequency in Hz
|
|
59
|
+
double maxFreq; // Maximum frequency in Hz
|
|
60
|
+
int order; // Filter order per band (steepness)
|
|
61
|
+
double rippleDb = 0.5; // Passband ripple for Chebyshev (dB)
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Design a filter bank with specified options
|
|
66
|
+
*
|
|
67
|
+
* @param opts Design options including scale, count, frequency range
|
|
68
|
+
* @return Vector of filter coefficients (one per band)
|
|
69
|
+
*
|
|
70
|
+
* @throws std::invalid_argument if options are invalid
|
|
71
|
+
*
|
|
72
|
+
* @example
|
|
73
|
+
* // Create 24-band Mel-spaced filter bank for speech analysis
|
|
74
|
+
* DesignOptions opts;
|
|
75
|
+
* opts.scale = Scale::Mel;
|
|
76
|
+
* opts.type = Type::Butterworth;
|
|
77
|
+
* opts.count = 24;
|
|
78
|
+
* opts.sampleRate = 44100;
|
|
79
|
+
* opts.minFreq = 20;
|
|
80
|
+
* opts.maxFreq = 8000;
|
|
81
|
+
* opts.order = 2;
|
|
82
|
+
* auto bank = FilterBankDesign::design(opts);
|
|
83
|
+
*/
|
|
84
|
+
static std::vector<FilterCoefficients> design(const DesignOptions &opts)
|
|
85
|
+
{
|
|
86
|
+
// Validate inputs
|
|
87
|
+
if (opts.count <= 0)
|
|
88
|
+
{
|
|
89
|
+
throw std::invalid_argument("Band count must be positive");
|
|
90
|
+
}
|
|
91
|
+
if (opts.minFreq < 0)
|
|
92
|
+
{
|
|
93
|
+
throw std::invalid_argument("Minimum frequency cannot be negative");
|
|
94
|
+
}
|
|
95
|
+
if (opts.minFreq >= opts.maxFreq)
|
|
96
|
+
{
|
|
97
|
+
throw std::invalid_argument("Invalid frequency range: minFreq must be < maxFreq");
|
|
98
|
+
}
|
|
99
|
+
if (opts.maxFreq > opts.sampleRate / 2.0)
|
|
100
|
+
{
|
|
101
|
+
throw std::invalid_argument("Maximum frequency must be <= Nyquist frequency");
|
|
102
|
+
}
|
|
103
|
+
if (opts.order <= 0)
|
|
104
|
+
{
|
|
105
|
+
throw std::invalid_argument("Filter order must be positive");
|
|
106
|
+
}
|
|
107
|
+
if (opts.sampleRate <= 0)
|
|
108
|
+
{
|
|
109
|
+
throw std::invalid_argument("Sample rate must be positive");
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Step 1: Convert frequency boundaries to target scale
|
|
113
|
+
double minVal = toScale(opts.minFreq, opts.scale);
|
|
114
|
+
double maxVal = toScale(opts.maxFreq, opts.scale);
|
|
115
|
+
double step = (maxVal - minVal) / opts.count;
|
|
116
|
+
|
|
117
|
+
// Step 2: Generate band edges in target scale, then convert back to Hz
|
|
118
|
+
std::vector<double> boundaries;
|
|
119
|
+
boundaries.reserve(opts.count + 1);
|
|
120
|
+
|
|
121
|
+
for (int i = 0; i <= opts.count; ++i)
|
|
122
|
+
{
|
|
123
|
+
double val = minVal + (i * step);
|
|
124
|
+
double hz = fromScale(val, opts.scale);
|
|
125
|
+
boundaries.push_back(hz);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Step 3: Create filters for each band
|
|
129
|
+
std::vector<FilterCoefficients> bank;
|
|
130
|
+
bank.reserve(opts.count);
|
|
131
|
+
|
|
132
|
+
for (int i = 0; i < opts.count; ++i)
|
|
133
|
+
{
|
|
134
|
+
double fLow = boundaries[i];
|
|
135
|
+
double fHigh = boundaries[i + 1];
|
|
136
|
+
|
|
137
|
+
// For the first band starting at 0 Hz, use a small positive value
|
|
138
|
+
// to avoid DC (bandpass filters can't have 0 Hz as lower bound)
|
|
139
|
+
if (fLow == 0.0)
|
|
140
|
+
{
|
|
141
|
+
fLow = 1.0; // 1 Hz minimum
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Normalize frequencies to [0, 0.5] range (0.5 = Nyquist)
|
|
145
|
+
double nLow = fLow / opts.sampleRate;
|
|
146
|
+
double nHigh = fHigh / opts.sampleRate;
|
|
147
|
+
|
|
148
|
+
// Safety clamping to avoid numerical issues
|
|
149
|
+
nLow = std::max(0.0001, std::min(nLow, 0.4999));
|
|
150
|
+
nHigh = std::max(0.0001, std::min(nHigh, 0.4999));
|
|
151
|
+
|
|
152
|
+
// Ensure proper ordering after clamping
|
|
153
|
+
if (nLow >= nHigh)
|
|
154
|
+
{
|
|
155
|
+
nHigh = nLow + 0.0001;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Design bandpass filter using existing IirFilter factory
|
|
159
|
+
IirFilter<float> filter = (opts.type == Type::Chebyshev1)
|
|
160
|
+
? IirFilter<float>::createChebyshevBandPass(
|
|
161
|
+
nLow, nHigh, opts.order, opts.rippleDb)
|
|
162
|
+
: IirFilter<float>::createButterworthBandPass(
|
|
163
|
+
nLow, nHigh, opts.order);
|
|
164
|
+
|
|
165
|
+
// Extract coefficients
|
|
166
|
+
FilterCoefficients coeffs;
|
|
167
|
+
coeffs.b = filter.getBCoefficients();
|
|
168
|
+
coeffs.a = filter.getACoefficients();
|
|
169
|
+
|
|
170
|
+
bank.push_back(coeffs);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return bank;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Get frequency boundaries for a filter bank design
|
|
178
|
+
* Useful for visualization and debugging
|
|
179
|
+
*
|
|
180
|
+
* @param opts Design options
|
|
181
|
+
* @return Vector of boundary frequencies in Hz
|
|
182
|
+
*/
|
|
183
|
+
static std::vector<double> getBoundaries(const DesignOptions &opts)
|
|
184
|
+
{
|
|
185
|
+
if (opts.count <= 0)
|
|
186
|
+
{
|
|
187
|
+
throw std::invalid_argument("Band count must be positive");
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
double minVal = toScale(opts.minFreq, opts.scale);
|
|
191
|
+
double maxVal = toScale(opts.maxFreq, opts.scale);
|
|
192
|
+
double step = (maxVal - minVal) / opts.count;
|
|
193
|
+
|
|
194
|
+
std::vector<double> boundaries;
|
|
195
|
+
boundaries.reserve(opts.count + 1);
|
|
196
|
+
|
|
197
|
+
for (int i = 0; i <= opts.count; ++i)
|
|
198
|
+
{
|
|
199
|
+
double val = minVal + (i * step);
|
|
200
|
+
boundaries.push_back(fromScale(val, opts.scale));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return boundaries;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
private:
|
|
207
|
+
/**
|
|
208
|
+
* Convert frequency from Hz to target scale
|
|
209
|
+
*/
|
|
210
|
+
static double toScale(double hz, Scale scale)
|
|
211
|
+
{
|
|
212
|
+
switch (scale)
|
|
213
|
+
{
|
|
214
|
+
case Scale::Linear:
|
|
215
|
+
return hz;
|
|
216
|
+
|
|
217
|
+
case Scale::Log:
|
|
218
|
+
return std::log10(hz);
|
|
219
|
+
|
|
220
|
+
case Scale::Mel:
|
|
221
|
+
// Mel scale: f_mel = 2595 * log10(1 + f_hz / 700)
|
|
222
|
+
return 2595.0 * std::log10(1.0 + hz / 700.0);
|
|
223
|
+
|
|
224
|
+
case Scale::Bark:
|
|
225
|
+
// Bark scale (Traunmüller 1990)
|
|
226
|
+
// z = 26.81 * f / (1960 + f) - 0.53
|
|
227
|
+
return 26.81 * hz / (1960.0 + hz) - 0.53;
|
|
228
|
+
|
|
229
|
+
default:
|
|
230
|
+
return hz;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Convert from scale back to Hz
|
|
236
|
+
*/
|
|
237
|
+
static double fromScale(double val, Scale scale)
|
|
238
|
+
{
|
|
239
|
+
switch (scale)
|
|
240
|
+
{
|
|
241
|
+
case Scale::Linear:
|
|
242
|
+
return val;
|
|
243
|
+
|
|
244
|
+
case Scale::Log:
|
|
245
|
+
return std::pow(10.0, val);
|
|
246
|
+
|
|
247
|
+
case Scale::Mel:
|
|
248
|
+
// Inverse Mel: f_hz = 700 * (10^(f_mel / 2595) - 1)
|
|
249
|
+
return 700.0 * (std::pow(10.0, val / 2595.0) - 1.0);
|
|
250
|
+
|
|
251
|
+
case Scale::Bark:
|
|
252
|
+
// Inverse Bark (Traunmüller 1990)
|
|
253
|
+
// f = 1960 * (z + 0.53) / (26.81 - (z + 0.53))
|
|
254
|
+
{
|
|
255
|
+
double adjusted = val + 0.53;
|
|
256
|
+
return 1960.0 * adjusted / (26.81 - adjusted);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
default:
|
|
260
|
+
return val;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
} // namespace core
|
|
266
|
+
} // namespace dsp
|
|
@@ -349,4 +349,128 @@ namespace dsp::core
|
|
|
349
349
|
void setCoefficients(const std::vector<T> &coeffs) { m_coefficients = coeffs; }
|
|
350
350
|
};
|
|
351
351
|
|
|
352
|
+
/**
|
|
353
|
+
* @brief Policy for Exponential Moving Average (EMA).
|
|
354
|
+
*
|
|
355
|
+
* Implements EMA: EMA(t) = α * value(t) + (1 - α) * EMA(t-1)
|
|
356
|
+
* where α (alpha) is the smoothing factor (0 < α ≤ 1).
|
|
357
|
+
*
|
|
358
|
+
* This policy is optimized for scalar operations and can be SIMD-accelerated
|
|
359
|
+
* in batch processing contexts.
|
|
360
|
+
*/
|
|
361
|
+
template <typename T>
|
|
362
|
+
struct EmaPolicy
|
|
363
|
+
{
|
|
364
|
+
T m_ema = 0; // Current EMA value
|
|
365
|
+
T m_alpha; // Smoothing factor
|
|
366
|
+
bool m_initialized = false;
|
|
367
|
+
|
|
368
|
+
explicit EmaPolicy(T alpha)
|
|
369
|
+
: m_alpha(alpha)
|
|
370
|
+
{
|
|
371
|
+
if (alpha <= 0 || alpha > 1)
|
|
372
|
+
{
|
|
373
|
+
throw std::invalid_argument("EMA alpha must be in range (0, 1]");
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
void onAdd(T val)
|
|
378
|
+
{
|
|
379
|
+
if (!m_initialized)
|
|
380
|
+
{
|
|
381
|
+
// Initialize with first value
|
|
382
|
+
m_ema = val;
|
|
383
|
+
m_initialized = true;
|
|
384
|
+
}
|
|
385
|
+
else
|
|
386
|
+
{
|
|
387
|
+
// EMA formula: EMA(t) = α * value(t) + (1 - α) * EMA(t-1)
|
|
388
|
+
m_ema = m_alpha * val + (static_cast<T>(1) - m_alpha) * m_ema;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
void onRemove(T val)
|
|
393
|
+
{
|
|
394
|
+
// EMA doesn't support removal in sliding window context
|
|
395
|
+
// This should not be called in typical EMA usage
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
void clear()
|
|
399
|
+
{
|
|
400
|
+
m_ema = 0;
|
|
401
|
+
m_initialized = false;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
T getResult(size_t count) const
|
|
405
|
+
{
|
|
406
|
+
return m_ema;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// For state serialization
|
|
410
|
+
std::pair<T, bool> getState() const { return {m_ema, m_initialized}; }
|
|
411
|
+
void setState(T ema, bool initialized)
|
|
412
|
+
{
|
|
413
|
+
m_ema = ema;
|
|
414
|
+
m_initialized = initialized;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
T getAlpha() const { return m_alpha; }
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* @brief Policy for Cumulative Moving Average (CMA).
|
|
422
|
+
*
|
|
423
|
+
* Implements CMA: CMA(n) = (CMA(n-1) * (n-1) + value(n)) / n
|
|
424
|
+
*
|
|
425
|
+
* Maintains the cumulative average over all samples seen since initialization.
|
|
426
|
+
* More efficient than recalculating from scratch each time.
|
|
427
|
+
*/
|
|
428
|
+
template <typename T>
|
|
429
|
+
struct CmaPolicy
|
|
430
|
+
{
|
|
431
|
+
T m_sum = 0; // Running sum of all values
|
|
432
|
+
size_t m_count = 0; // Total number of samples seen
|
|
433
|
+
|
|
434
|
+
void onAdd(T val)
|
|
435
|
+
{
|
|
436
|
+
m_sum += val;
|
|
437
|
+
m_count++;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
void onRemove(T val)
|
|
441
|
+
{
|
|
442
|
+
// CMA doesn't support removal in typical usage
|
|
443
|
+
// If called, decrement count and sum
|
|
444
|
+
if (m_count > 0)
|
|
445
|
+
{
|
|
446
|
+
m_sum -= val;
|
|
447
|
+
m_count--;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
void clear()
|
|
452
|
+
{
|
|
453
|
+
m_sum = 0;
|
|
454
|
+
m_count = 0;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
T getResult(size_t windowCount) const
|
|
458
|
+
{
|
|
459
|
+
// Use the policy's internal count, not the window count
|
|
460
|
+
if (m_count == 0)
|
|
461
|
+
return 0;
|
|
462
|
+
return m_sum / static_cast<T>(m_count);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// For state serialization
|
|
466
|
+
std::pair<T, size_t> getState() const { return {m_sum, m_count}; }
|
|
467
|
+
void setState(T sum, size_t count)
|
|
468
|
+
{
|
|
469
|
+
m_sum = sum;
|
|
470
|
+
m_count = count;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
size_t getCount() const { return m_count; }
|
|
474
|
+
};
|
|
475
|
+
|
|
352
476
|
} // namespace dsp::core
|
|
@@ -240,7 +240,8 @@ size_t CircularBufferArray<T>::expireOld(double currentTimestamp)
|
|
|
240
240
|
double cutoff_time = currentTimestamp - windowDuration_ms;
|
|
241
241
|
|
|
242
242
|
// Remove samples from tail while they're older than cutoff
|
|
243
|
-
|
|
243
|
+
// FIX: changed < to <= to correctly expire samples at the cutoff and avoid an additional windowduration delay
|
|
244
|
+
while (count > 0 && timestamps[tail] <= cutoff_time)
|
|
244
245
|
{
|
|
245
246
|
tail = (tail + 1) % capacity;
|
|
246
247
|
--count;
|
|
@@ -1362,4 +1362,71 @@ namespace dsp::simd
|
|
|
1362
1362
|
}
|
|
1363
1363
|
|
|
1364
1364
|
// SIMD_X86 removed since SIMD_SSE3 covers it for most of the modern devices
|
|
1365
|
+
/**
|
|
1366
|
+
* @brief Apply square (x^2) in-place.
|
|
1367
|
+
* Essential for Pan-Tompkins QRS detection to amplify signal peaks.
|
|
1368
|
+
* @param buffer Input/output buffer (modified in-place)
|
|
1369
|
+
* @param size Number of elements
|
|
1370
|
+
*/
|
|
1371
|
+
inline void square_inplace(float *buffer, size_t size)
|
|
1372
|
+
{
|
|
1373
|
+
#if defined(SIMD_AVX2)
|
|
1374
|
+
const size_t simd_width = 8;
|
|
1375
|
+
const size_t simd_count = size / simd_width;
|
|
1376
|
+
const size_t simd_end = simd_count * simd_width;
|
|
1377
|
+
|
|
1378
|
+
for (size_t i = 0; i < simd_end; i += simd_width)
|
|
1379
|
+
{
|
|
1380
|
+
__m256 values = _mm256_loadu_ps(&buffer[i]);
|
|
1381
|
+
__m256 result = _mm256_mul_ps(values, values); // x * x
|
|
1382
|
+
_mm256_storeu_ps(&buffer[i], result);
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
for (size_t i = simd_end; i < size; ++i)
|
|
1386
|
+
{
|
|
1387
|
+
buffer[i] = buffer[i] * buffer[i];
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
#elif defined(SIMD_SSE2)
|
|
1391
|
+
const size_t simd_width = 4;
|
|
1392
|
+
const size_t simd_count = size / simd_width;
|
|
1393
|
+
const size_t simd_end = simd_count * simd_width;
|
|
1394
|
+
|
|
1395
|
+
for (size_t i = 0; i < simd_end; i += simd_width)
|
|
1396
|
+
{
|
|
1397
|
+
__m128 values = _mm_loadu_ps(&buffer[i]);
|
|
1398
|
+
__m128 result = _mm_mul_ps(values, values);
|
|
1399
|
+
_mm_storeu_ps(&buffer[i], result);
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
for (size_t i = simd_end; i < size; ++i)
|
|
1403
|
+
{
|
|
1404
|
+
buffer[i] = buffer[i] * buffer[i];
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
#elif defined(SIMD_NEON)
|
|
1408
|
+
const size_t simd_width = 4;
|
|
1409
|
+
const size_t simd_count = size / simd_width;
|
|
1410
|
+
const size_t simd_end = simd_count * simd_width;
|
|
1411
|
+
|
|
1412
|
+
for (size_t i = 0; i < simd_end; i += simd_width)
|
|
1413
|
+
{
|
|
1414
|
+
float32x4_t values = vld1q_f32(&buffer[i]);
|
|
1415
|
+
float32x4_t result = vmulq_f32(values, values);
|
|
1416
|
+
vst1q_f32(&buffer[i], result);
|
|
1417
|
+
}
|
|
1418
|
+
|
|
1419
|
+
for (size_t i = simd_end; i < size; ++i)
|
|
1420
|
+
{
|
|
1421
|
+
buffer[i] = buffer[i] * buffer[i];
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
#else
|
|
1425
|
+
// Scalar fallback
|
|
1426
|
+
for (size_t i = 0; i < size; ++i)
|
|
1427
|
+
{
|
|
1428
|
+
buffer[i] = buffer[i] * buffer[i];
|
|
1429
|
+
}
|
|
1430
|
+
#endif
|
|
1431
|
+
}
|
|
1365
1432
|
} // namespace dsp::simd
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <vector>
|
|
3
|
+
#include <cstring>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <stdexcept>
|
|
6
|
+
|
|
7
|
+
namespace dsp
|
|
8
|
+
{
|
|
9
|
+
namespace toon
|
|
10
|
+
{
|
|
11
|
+
|
|
12
|
+
// TOON Protocol Tokens
|
|
13
|
+
enum Token : uint8_t
|
|
14
|
+
{
|
|
15
|
+
T_NULL = 0x00,
|
|
16
|
+
T_INT32 = 0x01,
|
|
17
|
+
T_FLOAT = 0x02,
|
|
18
|
+
T_STRING = 0x03,
|
|
19
|
+
T_FLOAT_ARRAY = 0x04, // Optimized for DSP buffers
|
|
20
|
+
T_OBJECT_START = 0x10,
|
|
21
|
+
T_OBJECT_END = 0x11,
|
|
22
|
+
T_ARRAY_START = 0x12,
|
|
23
|
+
T_ARRAY_END = 0x13,
|
|
24
|
+
T_BOOL = 0x14,
|
|
25
|
+
T_DOUBLE = 0x15
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
class Serializer
|
|
29
|
+
{
|
|
30
|
+
public:
|
|
31
|
+
std::vector<uint8_t> buffer;
|
|
32
|
+
|
|
33
|
+
void writeTag(Token tag)
|
|
34
|
+
{
|
|
35
|
+
buffer.push_back(static_cast<uint8_t>(tag));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
void writeInt32(int32_t val)
|
|
39
|
+
{
|
|
40
|
+
writeTag(T_INT32);
|
|
41
|
+
appendRaw(&val, sizeof(val));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
void writeFloat(float val)
|
|
45
|
+
{
|
|
46
|
+
writeTag(T_FLOAT);
|
|
47
|
+
appendRaw(&val, sizeof(val));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
void writeDouble(double val)
|
|
51
|
+
{
|
|
52
|
+
writeTag(T_DOUBLE);
|
|
53
|
+
appendRaw(&val, sizeof(val));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void writeBool(bool val)
|
|
57
|
+
{
|
|
58
|
+
writeTag(T_BOOL);
|
|
59
|
+
uint8_t byte = val ? 1 : 0;
|
|
60
|
+
buffer.push_back(byte);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
void writeString(const std::string &val)
|
|
64
|
+
{
|
|
65
|
+
writeTag(T_STRING);
|
|
66
|
+
int32_t len = static_cast<int32_t>(val.length());
|
|
67
|
+
appendRaw(&len, sizeof(len)); // Length prefix
|
|
68
|
+
appendRaw(val.data(), len);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Critical Optimization: Zero-copy-like write for DSP buffers
|
|
72
|
+
void writeFloatArray(const std::vector<float> &data)
|
|
73
|
+
{
|
|
74
|
+
writeTag(T_FLOAT_ARRAY);
|
|
75
|
+
int32_t count = static_cast<int32_t>(data.size());
|
|
76
|
+
appendRaw(&count, sizeof(count));
|
|
77
|
+
if (count > 0)
|
|
78
|
+
{
|
|
79
|
+
appendRaw(data.data(), count * sizeof(float));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
void startObject() { writeTag(T_OBJECT_START); }
|
|
84
|
+
void endObject() { writeTag(T_OBJECT_END); }
|
|
85
|
+
void startArray() { writeTag(T_ARRAY_START); }
|
|
86
|
+
void endArray() { writeTag(T_ARRAY_END); }
|
|
87
|
+
|
|
88
|
+
private:
|
|
89
|
+
void appendRaw(const void *ptr, size_t size)
|
|
90
|
+
{
|
|
91
|
+
const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
|
|
92
|
+
buffer.insert(buffer.end(), bytes, bytes + size);
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
class Deserializer
|
|
97
|
+
{
|
|
98
|
+
public:
|
|
99
|
+
const uint8_t *data;
|
|
100
|
+
size_t size;
|
|
101
|
+
size_t pos;
|
|
102
|
+
|
|
103
|
+
Deserializer(const uint8_t *d, size_t s) : data(d), size(s), pos(0) {}
|
|
104
|
+
|
|
105
|
+
Token peekToken()
|
|
106
|
+
{
|
|
107
|
+
if (pos >= size)
|
|
108
|
+
return T_NULL;
|
|
109
|
+
return static_cast<Token>(data[pos]);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
Token readToken()
|
|
113
|
+
{
|
|
114
|
+
if (pos >= size)
|
|
115
|
+
throw std::runtime_error("TOON: Unexpected EOF");
|
|
116
|
+
return static_cast<Token>(data[pos++]);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
int32_t readInt32()
|
|
120
|
+
{
|
|
121
|
+
consumeToken(T_INT32);
|
|
122
|
+
return readRaw<int32_t>();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
float readFloat()
|
|
126
|
+
{
|
|
127
|
+
consumeToken(T_FLOAT);
|
|
128
|
+
return readRaw<float>();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
double readDouble()
|
|
132
|
+
{
|
|
133
|
+
consumeToken(T_DOUBLE);
|
|
134
|
+
return readRaw<double>();
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
bool readBool()
|
|
138
|
+
{
|
|
139
|
+
consumeToken(T_BOOL);
|
|
140
|
+
if (pos >= size)
|
|
141
|
+
throw std::runtime_error("TOON: EOF reading bool");
|
|
142
|
+
return data[pos++] != 0;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
std::string readString()
|
|
146
|
+
{
|
|
147
|
+
consumeToken(T_STRING);
|
|
148
|
+
int32_t len = readRaw<int32_t>();
|
|
149
|
+
if (pos + len > size)
|
|
150
|
+
throw std::runtime_error("TOON: String out of bounds");
|
|
151
|
+
std::string s(reinterpret_cast<const char *>(data + pos), len);
|
|
152
|
+
pos += len;
|
|
153
|
+
return s;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
std::vector<float> readFloatArray()
|
|
157
|
+
{
|
|
158
|
+
consumeToken(T_FLOAT_ARRAY);
|
|
159
|
+
int32_t count = readRaw<int32_t>();
|
|
160
|
+
if (count < 0)
|
|
161
|
+
throw std::runtime_error("TOON: Invalid array count");
|
|
162
|
+
if (pos + count * sizeof(float) > size)
|
|
163
|
+
throw std::runtime_error("TOON: Array out of bounds");
|
|
164
|
+
|
|
165
|
+
std::vector<float> vec(count);
|
|
166
|
+
if (count > 0)
|
|
167
|
+
{
|
|
168
|
+
std::memcpy(vec.data(), data + pos, count * sizeof(float));
|
|
169
|
+
pos += count * sizeof(float);
|
|
170
|
+
}
|
|
171
|
+
return vec;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
void consumeToken(Token expected)
|
|
175
|
+
{
|
|
176
|
+
Token actual = readToken();
|
|
177
|
+
if (actual != expected)
|
|
178
|
+
throw std::runtime_error("TOON: Type mismatch");
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
private:
|
|
182
|
+
template <typename T>
|
|
183
|
+
T readRaw()
|
|
184
|
+
{
|
|
185
|
+
if (pos + sizeof(T) > size)
|
|
186
|
+
throw std::runtime_error("TOON: EOF reading value");
|
|
187
|
+
T val;
|
|
188
|
+
std::memcpy(&val, data + pos, sizeof(T));
|
|
189
|
+
pos += sizeof(T);
|
|
190
|
+
return val;
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
} // namespace toon
|
|
195
|
+
} // namespace dsp
|