dspx 1.1.3 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bindings.d.ts +36 -21
- package/dist/bindings.d.ts.map +1 -1
- package/dist/bindings.js +70 -22
- package/dist/bindings.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -6
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/prebuilds/win32-x64/dspx.node +0 -0
- package/src/native/adapters/FftStage.cc +247 -120
- package/src/native/core/Fftpack.cc +59 -17
- package/src/native/core/Fftpack.h +3 -2
|
@@ -51,7 +51,7 @@ namespace dsp
|
|
|
51
51
|
// ========== Public Transform Methods ==========
|
|
52
52
|
|
|
53
53
|
template <typename T>
|
|
54
|
-
void FftpackContext<T>::rfft(const T *input, std::complex<T> *output)
|
|
54
|
+
void FftpackContext<T>::rfft(const T *__restrict input, std::complex<T> *__restrict output)
|
|
55
55
|
{
|
|
56
56
|
if (m_n == 1)
|
|
57
57
|
{
|
|
@@ -59,15 +59,15 @@ namespace dsp
|
|
|
59
59
|
return;
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
-
//
|
|
63
|
-
std::
|
|
62
|
+
// OPTIMIZATION: Use memcpy for bulk copy (faster than std::copy for POD types)
|
|
63
|
+
std::memcpy(m_workBuffer.data(), input, m_n * sizeof(T));
|
|
64
64
|
|
|
65
65
|
// Perform forward real FFT
|
|
66
66
|
drftf1(m_n, m_workBuffer.data(), m_wsave.data(), m_wsave.data() + m_n, m_ifac.data());
|
|
67
67
|
|
|
68
|
+
// OPTIMIZATION: Improved format conversion with better cache locality
|
|
68
69
|
// Convert FFTPACK halfcomplex format to standard complex format
|
|
69
70
|
// FFTPACK stores: [DC, re1, re2, ..., reN/2-1, Nyquist, im1, im2, ..., imN/2-1]
|
|
70
|
-
// (for even N)
|
|
71
71
|
|
|
72
72
|
size_t halfSize = (m_n / 2) + 1;
|
|
73
73
|
|
|
@@ -76,17 +76,37 @@ namespace dsp
|
|
|
76
76
|
|
|
77
77
|
if (m_n % 2 == 0)
|
|
78
78
|
{
|
|
79
|
-
// Even N
|
|
80
|
-
|
|
79
|
+
// OPTIMIZATION: Even N - process in order for better cache locality
|
|
80
|
+
size_t half = m_n / 2;
|
|
81
|
+
|
|
82
|
+
// Process middle frequencies (unrolled by 2 for better ILP)
|
|
83
|
+
size_t i = 1;
|
|
84
|
+
for (; i + 1 < half; i += 2)
|
|
85
|
+
{
|
|
86
|
+
// First pair
|
|
87
|
+
output[i] = std::complex<T>(m_workBuffer[2 * i - 1], m_workBuffer[2 * i]);
|
|
88
|
+
// Second pair
|
|
89
|
+
output[i + 1] = std::complex<T>(m_workBuffer[2 * (i + 1) - 1], m_workBuffer[2 * (i + 1)]);
|
|
90
|
+
}
|
|
91
|
+
// Handle remainder
|
|
92
|
+
for (; i < half; ++i)
|
|
81
93
|
{
|
|
82
94
|
output[i] = std::complex<T>(m_workBuffer[2 * i - 1], m_workBuffer[2 * i]);
|
|
83
95
|
}
|
|
84
|
-
|
|
96
|
+
|
|
97
|
+
// Nyquist component (real)
|
|
98
|
+
output[half] = std::complex<T>(m_workBuffer[m_n - 1], 0);
|
|
85
99
|
}
|
|
86
100
|
else
|
|
87
101
|
{
|
|
88
|
-
// Odd N
|
|
89
|
-
|
|
102
|
+
// OPTIMIZATION: Odd N - unrolled loop
|
|
103
|
+
size_t i = 1;
|
|
104
|
+
for (; i + 1 < halfSize; i += 2)
|
|
105
|
+
{
|
|
106
|
+
output[i] = std::complex<T>(m_workBuffer[2 * i - 1], m_workBuffer[2 * i]);
|
|
107
|
+
output[i + 1] = std::complex<T>(m_workBuffer[2 * (i + 1) - 1], m_workBuffer[2 * (i + 1)]);
|
|
108
|
+
}
|
|
109
|
+
for (; i < halfSize; ++i)
|
|
90
110
|
{
|
|
91
111
|
output[i] = std::complex<T>(m_workBuffer[2 * i - 1], m_workBuffer[2 * i]);
|
|
92
112
|
}
|
|
@@ -94,7 +114,7 @@ namespace dsp
|
|
|
94
114
|
}
|
|
95
115
|
|
|
96
116
|
template <typename T>
|
|
97
|
-
void FftpackContext<T>::irfft(const std::complex<T> *input, T *output)
|
|
117
|
+
void FftpackContext<T>::irfft(const std::complex<T> *__restrict input, T *__restrict output)
|
|
98
118
|
{
|
|
99
119
|
if (m_n == 1)
|
|
100
120
|
{
|
|
@@ -102,24 +122,46 @@ namespace dsp
|
|
|
102
122
|
return;
|
|
103
123
|
}
|
|
104
124
|
|
|
125
|
+
// OPTIMIZATION: Improved format conversion with loop unrolling
|
|
105
126
|
// Convert standard complex format to FFTPACK halfcomplex format
|
|
106
127
|
m_workBuffer[0] = input[0].real(); // DC
|
|
107
128
|
|
|
108
129
|
if (m_n % 2 == 0)
|
|
109
130
|
{
|
|
110
|
-
// Even N
|
|
111
|
-
|
|
131
|
+
// OPTIMIZATION: Even N - unrolled conversion
|
|
132
|
+
size_t half = m_n / 2;
|
|
133
|
+
size_t i = 1;
|
|
134
|
+
|
|
135
|
+
// Unroll by 2
|
|
136
|
+
for (; i + 1 < half; i += 2)
|
|
112
137
|
{
|
|
113
138
|
m_workBuffer[2 * i - 1] = input[i].real();
|
|
114
139
|
m_workBuffer[2 * i] = input[i].imag();
|
|
140
|
+
m_workBuffer[2 * (i + 1) - 1] = input[i + 1].real();
|
|
141
|
+
m_workBuffer[2 * (i + 1)] = input[i + 1].imag();
|
|
115
142
|
}
|
|
116
|
-
|
|
143
|
+
for (; i < half; ++i)
|
|
144
|
+
{
|
|
145
|
+
m_workBuffer[2 * i - 1] = input[i].real();
|
|
146
|
+
m_workBuffer[2 * i] = input[i].imag();
|
|
147
|
+
}
|
|
148
|
+
m_workBuffer[m_n - 1] = input[half].real(); // Nyquist
|
|
117
149
|
}
|
|
118
150
|
else
|
|
119
151
|
{
|
|
120
|
-
// Odd N
|
|
152
|
+
// OPTIMIZATION: Odd N - unrolled conversion
|
|
121
153
|
size_t halfSize = (m_n / 2) + 1;
|
|
122
|
-
|
|
154
|
+
size_t i = 1;
|
|
155
|
+
|
|
156
|
+
// Unroll by 2
|
|
157
|
+
for (; i + 1 < halfSize; i += 2)
|
|
158
|
+
{
|
|
159
|
+
m_workBuffer[2 * i - 1] = input[i].real();
|
|
160
|
+
m_workBuffer[2 * i] = input[i].imag();
|
|
161
|
+
m_workBuffer[2 * (i + 1) - 1] = input[i + 1].real();
|
|
162
|
+
m_workBuffer[2 * (i + 1)] = input[i + 1].imag();
|
|
163
|
+
}
|
|
164
|
+
for (; i < halfSize; ++i)
|
|
123
165
|
{
|
|
124
166
|
m_workBuffer[2 * i - 1] = input[i].real();
|
|
125
167
|
m_workBuffer[2 * i] = input[i].imag();
|
|
@@ -129,8 +171,8 @@ namespace dsp
|
|
|
129
171
|
// Perform inverse real FFT
|
|
130
172
|
drftb1(m_n, m_workBuffer.data(), m_wsave.data(), m_wsave.data() + m_n, m_ifac.data());
|
|
131
173
|
|
|
132
|
-
//
|
|
133
|
-
std::
|
|
174
|
+
// OPTIMIZATION: Use memcpy for bulk copy
|
|
175
|
+
std::memcpy(output, m_workBuffer.data(), m_n * sizeof(T));
|
|
134
176
|
}
|
|
135
177
|
|
|
136
178
|
// ========== FFTPACK Initialization ==========
|
|
@@ -40,8 +40,9 @@ namespace dsp
|
|
|
40
40
|
explicit FftpackContext(size_t n);
|
|
41
41
|
|
|
42
42
|
// Forward/inverse real FFT
|
|
43
|
-
|
|
44
|
-
void
|
|
43
|
+
// Note: input/output buffers must not overlap (restrict semantics)
|
|
44
|
+
void rfft(const T *__restrict input, std::complex<T> *__restrict output);
|
|
45
|
+
void irfft(const std::complex<T> *__restrict input, T *__restrict output);
|
|
45
46
|
|
|
46
47
|
size_t size() const { return m_n; }
|
|
47
48
|
size_t halfSize() const { return (m_n / 2) + 1; }
|