react-native-audio-api 0.5.5 → 0.6.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/RNAudioAPI.podspec +1 -1
  2. package/android/src/main/cpp/audioapi/android/core/AudioPlayer.cpp +0 -20
  3. package/android/src/main/cpp/audioapi/android/core/AudioPlayer.h +0 -2
  4. package/android/src/main/java/com/swmansion/audioapi/AudioAPIPackage.kt +13 -0
  5. package/android/src/main/java/com/swmansion/audioapi/AudioManagerModule.kt +59 -0
  6. package/android/src/oldarch/NativeAudioManagerModuleSpec.java +99 -0
  7. package/common/cpp/audioapi/AudioAPIModuleInstaller.h +30 -6
  8. package/common/cpp/audioapi/HostObjects/OfflineAudioContextHostObject.h +70 -0
  9. package/common/cpp/audioapi/core/AudioContext.cpp +1 -12
  10. package/common/cpp/audioapi/core/AudioContext.h +0 -1
  11. package/common/cpp/audioapi/core/OfflineAudioContext.cpp +117 -0
  12. package/common/cpp/audioapi/core/OfflineAudioContext.h +40 -0
  13. package/common/cpp/audioapi/core/sources/AudioBufferSourceNode.cpp +3 -3
  14. package/common/cpp/audioapi/core/sources/AudioScheduledSourceNode.cpp +28 -2
  15. package/common/cpp/audioapi/core/utils/AudioNodeDestructor.cpp +53 -0
  16. package/common/cpp/audioapi/core/utils/AudioNodeDestructor.h +33 -0
  17. package/common/cpp/audioapi/core/utils/AudioNodeManager.cpp +13 -10
  18. package/common/cpp/audioapi/core/utils/AudioNodeManager.h +3 -0
  19. package/common/cpp/audioapi/libs/signalsmith-stretch/fft-accelerate.h +326 -0
  20. package/common/cpp/audioapi/libs/signalsmith-stretch/fft.h +1257 -413
  21. package/common/cpp/audioapi/libs/signalsmith-stretch/signalsmith-stretch.h +398 -232
  22. package/common/cpp/audioapi/libs/signalsmith-stretch/stft.h +625 -0
  23. package/ios/audioapi/ios/AudioAPIModule.mm +2 -3
  24. package/ios/audioapi/ios/AudioManagerModule.h +18 -0
  25. package/ios/audioapi/ios/AudioManagerModule.mm +92 -0
  26. package/ios/audioapi/ios/core/AudioPlayer.h +4 -12
  27. package/ios/audioapi/ios/core/AudioPlayer.m +26 -108
  28. package/ios/audioapi/ios/core/IOSAudioPlayer.h +1 -3
  29. package/ios/audioapi/ios/core/IOSAudioPlayer.mm +4 -28
  30. package/ios/audioapi/ios/system/AudioEngine.h +23 -0
  31. package/ios/audioapi/ios/system/AudioEngine.mm +137 -0
  32. package/ios/audioapi/ios/system/AudioSessionManager.h +22 -0
  33. package/ios/audioapi/ios/system/AudioSessionManager.mm +183 -0
  34. package/ios/audioapi/ios/system/LockScreenManager.h +23 -0
  35. package/ios/audioapi/ios/system/LockScreenManager.mm +299 -0
  36. package/ios/audioapi/ios/system/NotificationManager.h +16 -0
  37. package/ios/audioapi/ios/system/NotificationManager.mm +151 -0
  38. package/lib/module/api.js +3 -1
  39. package/lib/module/api.js.map +1 -1
  40. package/lib/module/api.web.js +1 -0
  41. package/lib/module/api.web.js.map +1 -1
  42. package/lib/module/core/AudioContext.js +2 -1
  43. package/lib/module/core/AudioContext.js.map +1 -1
  44. package/lib/module/core/OfflineAudioContext.js +57 -0
  45. package/lib/module/core/OfflineAudioContext.js.map +1 -0
  46. package/lib/module/specs/NativeAudioManagerModule.js +31 -0
  47. package/lib/module/specs/NativeAudioManagerModule.js.map +1 -0
  48. package/lib/module/specs/index.js +6 -0
  49. package/lib/module/specs/index.js.map +1 -0
  50. package/lib/module/system/AudioManager.js +66 -0
  51. package/lib/module/system/AudioManager.js.map +1 -0
  52. package/lib/module/system/index.js +4 -0
  53. package/lib/module/system/index.js.map +1 -0
  54. package/lib/module/system/types.js +2 -0
  55. package/lib/module/system/types.js.map +1 -0
  56. package/lib/module/web-core/OfflineAudioContext.js +90 -0
  57. package/lib/module/web-core/OfflineAudioContext.js.map +1 -0
  58. package/lib/typescript/api.d.ts +4 -1
  59. package/lib/typescript/api.d.ts.map +1 -1
  60. package/lib/typescript/api.web.d.ts +1 -0
  61. package/lib/typescript/api.web.d.ts.map +1 -1
  62. package/lib/typescript/core/AudioContext.d.ts.map +1 -1
  63. package/lib/typescript/core/OfflineAudioContext.d.ts +14 -0
  64. package/lib/typescript/core/OfflineAudioContext.d.ts.map +1 -0
  65. package/lib/typescript/interfaces.d.ts +6 -0
  66. package/lib/typescript/interfaces.d.ts.map +1 -1
  67. package/lib/typescript/specs/NativeAudioManagerModule.d.ts +13 -0
  68. package/lib/typescript/specs/NativeAudioManagerModule.d.ts.map +1 -0
  69. package/lib/typescript/specs/index.d.ts +4 -0
  70. package/lib/typescript/specs/index.d.ts.map +1 -0
  71. package/lib/typescript/system/AudioManager.d.ts +12 -0
  72. package/lib/typescript/system/AudioManager.d.ts.map +1 -0
  73. package/lib/typescript/system/index.d.ts +2 -0
  74. package/lib/typescript/system/index.d.ts.map +1 -0
  75. package/lib/typescript/system/types.d.ts +28 -0
  76. package/lib/typescript/system/types.d.ts.map +1 -0
  77. package/lib/typescript/types.d.ts +5 -0
  78. package/lib/typescript/types.d.ts.map +1 -1
  79. package/lib/typescript/web-core/OfflineAudioContext.d.ts +34 -0
  80. package/lib/typescript/web-core/OfflineAudioContext.d.ts.map +1 -0
  81. package/package.json +2 -2
  82. package/src/api.ts +12 -2
  83. package/src/api.web.ts +1 -0
  84. package/src/core/AudioContext.ts +6 -1
  85. package/src/core/OfflineAudioContext.ts +94 -0
  86. package/src/interfaces.ts +11 -0
  87. package/src/specs/NativeAudioManagerModule.ts +51 -0
  88. package/src/specs/index.ts +6 -0
  89. package/src/system/AudioManager.ts +122 -0
  90. package/src/system/index.ts +1 -0
  91. package/src/system/types.ts +68 -0
  92. package/src/types.ts +6 -0
  93. package/src/web-core/OfflineAudioContext.tsx +163 -0
  94. package/common/cpp/audioapi/libs/signalsmith-stretch/delay.h +0 -715
  95. package/common/cpp/audioapi/libs/signalsmith-stretch/perf.h +0 -82
  96. package/common/cpp/audioapi/libs/signalsmith-stretch/spectral.h +0 -493
@@ -1,522 +1,1366 @@
1
- #ifndef SIGNALSMITH_FFT_V5
2
- #define SIGNALSMITH_FFT_V5
1
+ #ifndef SIGNALSMITH_AUDIO_LINEAR_FFT_H
2
+ #define SIGNALSMITH_AUDIO_LINEAR_FFT_H
3
3
 
4
- #include <audioapi/libs/signalsmith-stretch/perf.h>
5
- #include <audioapi/core/Constants.h>
6
-
7
- #include <vector>
8
4
  #include <complex>
5
+ #include <vector>
9
6
  #include <cmath>
10
7
 
11
- namespace signalsmith { namespace fft {
12
- /** @defgroup FFT FFT (complex and real)
13
- @brief Fourier transforms (complex and real)
8
+ #if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099) && !defined(SIGNALSMITH_IGNORE_BROKEN_APPLECLANG)
9
+ # error Apple Clang 16.0.0 generates incorrect SIMD for ARM. If you HAVE to use this version of Clang, turn off -ffast-math.
10
+ #endif
14
11
 
15
- @{
16
- @file
17
- */
12
+ #ifndef M_PI
13
+ # define M_PI 3.14159265358979323846
14
+ #endif
18
15
 
19
- namespace _fft_impl {
16
+ namespace signalsmith { namespace linear {
20
17
 
21
- template <typename V>
22
- SIGNALSMITH_INLINE V complexReal(const std::complex<V> &c) {
23
- return ((V*)(&c))[0];
18
+ namespace _impl {
19
+ template<class V>
20
+ void complexMul(std::complex<V> *a, const std::complex<V> *b, const std::complex<V> *c, size_t size) {
21
+ for (size_t i = 0; i < size; ++i) {
22
+ auto bi = b[i], ci = c[i];
23
+ a[i] = {bi.real()*ci.real() - bi.imag()*ci.imag(), bi.imag()*ci.real() + bi.real()*ci.imag()};
24
24
  }
25
- template <typename V>
26
- SIGNALSMITH_INLINE V complexImag(const std::complex<V> &c) {
27
- return ((V*)(&c))[1];
25
+ }
26
+ template<class V>
27
+ void complexMulConj(std::complex<V> *a, const std::complex<V> *b, const std::complex<V> *c, size_t size) {
28
+ for (size_t i = 0; i < size; ++i) {
29
+ auto bi = b[i], ci = c[i];
30
+ a[i] = {bi.real()*ci.real() + bi.imag()*ci.imag(), bi.imag()*ci.real() - bi.real()*ci.imag()};
28
31
  }
29
-
30
- // Complex multiplication has edge-cases around Inf/NaN - handling those properly makes std::complex non-inlineable, so we use our own
31
- template <bool conjugateSecond, typename V>
32
- SIGNALSMITH_INLINE std::complex<V> complexMul(const std::complex<V> &a, const std::complex<V> &b) {
33
- V aReal = complexReal(a), aImag = complexImag(a);
34
- V bReal = complexReal(b), bImag = complexImag(b);
35
- return conjugateSecond ? std::complex<V>{
36
- bReal*aReal + bImag*aImag,
37
- bReal*aImag - bImag*aReal
38
- } : std::complex<V>{
39
- aReal*bReal - aImag*bImag,
40
- aReal*bImag + aImag*bReal
41
- };
32
+ }
33
+ template<class V>
34
+ void complexMul(V *ar, V *ai, const V *br, const V *bi, const V *cr, const V *ci, size_t size) {
35
+ for (size_t i = 0; i < size; ++i) {
36
+ V rr = br[i]*cr[i] - bi[i]*ci[i];
37
+ V ri = br[i]*ci[i] + bi[i]*cr[i];
38
+ ar[i] = rr;
39
+ ai[i] = ri;
42
40
  }
43
-
44
- template<bool flipped, typename V>
45
- SIGNALSMITH_INLINE std::complex<V> complexAddI(const std::complex<V> &a, const std::complex<V> &b) {
46
- V aReal = complexReal(a), aImag = complexImag(a);
47
- V bReal = complexReal(b), bImag = complexImag(b);
48
- return flipped ? std::complex<V>{
49
- aReal + bImag,
50
- aImag - bReal
51
- } : std::complex<V>{
52
- aReal - bImag,
53
- aImag + bReal
54
- };
41
+ }
42
+ template<class V>
43
+ void complexMulConj(V *ar, V *ai, const V *br, const V *bi, const V *cr, const V *ci, size_t size) {
44
+ for (size_t i = 0; i < size; ++i) {
45
+ V rr = cr[i]*br[i] + ci[i]*bi[i];
46
+ V ri = cr[i]*bi[i] - ci[i]*br[i];
47
+ ar[i] = rr;
48
+ ai[i] = ri;
55
49
  }
50
+ }
56
51
 
57
- // Use SFINAE to get an iterator from std::begin(), if supported - otherwise assume the value itself is an iterator
58
- template<typename T, typename=void>
59
- struct GetIterator {
60
- static T get(const T &t) {
61
- return t;
52
+ // Input: aStride elements next to each other -> output with bStride
53
+ template<size_t aStride, class V>
54
+ void interleaveCopy(const V *a, V *b, size_t bStride) {
55
+ for (size_t bi = 0; bi < bStride; ++bi) {
56
+ const V *offsetA = a + bi*aStride;
57
+ V *offsetB = b + bi;
58
+ for (size_t ai = 0; ai < aStride; ++ai) {
59
+ offsetB[ai*bStride] = offsetA[ai];
62
60
  }
63
- };
64
- template<typename T>
65
- struct GetIterator<T, decltype((void)std::begin(std::declval<T>()))> {
66
- static auto get(const T &t) -> decltype(std::begin(t)) {
67
- return std::begin(t);
61
+ }
62
+ }
63
+ template<class V>
64
+ void interleaveCopy(const V *a, V *b, size_t aStride, size_t bStride) {
65
+ for (size_t bi = 0; bi < bStride; ++bi) {
66
+ const V *offsetA = a + bi*aStride;
67
+ V *offsetB = b + bi;
68
+ for (size_t ai = 0; ai < aStride; ++ai) {
69
+ offsetB[ai*bStride] = offsetA[ai];
68
70
  }
69
- };
71
+ }
70
72
  }
73
+ template<size_t aStride, class V>
74
+ void interleaveCopy(const V *aReal, const V *aImag, V *bReal, V *bImag, size_t bStride) {
75
+ for (size_t bi = 0; bi < bStride; ++bi) {
76
+ const V *offsetAr = aReal + bi*aStride;
77
+ const V *offsetAi = aImag + bi*aStride;
78
+ V *offsetBr = bReal + bi;
79
+ V *offsetBi = bImag + bi;
80
+ for (size_t ai = 0; ai < aStride; ++ai) {
81
+ offsetBr[ai*bStride] = offsetAr[ai];
82
+ offsetBi[ai*bStride] = offsetAi[ai];
83
+ }
84
+ }
85
+ }
86
+ template<class V>
87
+ void interleaveCopy(const V *aReal, const V *aImag, V *bReal, V *bImag, size_t aStride, size_t bStride) {
88
+ for (size_t bi = 0; bi < bStride; ++bi) {
89
+ const V *offsetAr = aReal + bi*aStride;
90
+ const V *offsetAi = aImag + bi*aStride;
91
+ V *offsetBr = bReal + bi;
92
+ V *offsetBi = bImag + bi;
93
+ for (size_t ai = 0; ai < aStride; ++ai) {
94
+ offsetBr[ai*bStride] = offsetAr[ai];
95
+ offsetBi[ai*bStride] = offsetAi[ai];
96
+ }
97
+ }
98
+ }
99
+ }
71
100
 
72
- /** Floating-point FFT implementation.
73
- It is fast for 2^a * 3^b.
74
- Here are the peak and RMS errors for `float`/`double` computation:
75
- \diagram{fft-errors.svg Simulated errors for pure-tone harmonic inputs\, compared to a theoretical upper bound from "Roundoff error analysis of the fast Fourier transform" (G. Ramos, 1971)}
76
- */
77
- template<typename V=double>
78
- class FFT {
79
- using complex = std::complex<V>;
80
- size_t _size;
81
- std::vector<complex> workingVector;
82
-
83
- enum class StepType {
84
- generic, step2, step3, step4
85
- };
86
- struct Step {
87
- StepType type;
88
- size_t factor;
89
- size_t startIndex;
90
- size_t innerRepeats;
91
- size_t outerRepeats;
92
- size_t twiddleIndex;
93
- };
94
- std::vector<size_t> factors;
95
- std::vector<Step> plan;
96
- std::vector<complex> twiddleVector;
101
+ /// Fairly simple and very portable power-of-2 FFT
102
+ template<typename Sample>
103
+ struct SimpleFFT {
104
+ using Complex = std::complex<Sample>;
97
105
 
98
- struct PermutationPair {size_t from, to;};
99
- std::vector<PermutationPair> permutation;
106
+ SimpleFFT(size_t size=0) {
107
+ resize(size);
108
+ }
100
109
 
101
- void addPlanSteps(size_t factorIndex, size_t start, size_t length, size_t repeats) {
102
- if (factorIndex >= factors.size()) return;
110
+ void resize(size_t size) {
111
+ twiddles.resize(size*3/4);
112
+ for (size_t i = 0; i < size*3/4; ++i) {
113
+ Sample twiddlePhase = -2*M_PI*i/size;
114
+ twiddles[i] = std::polar(Sample(1), twiddlePhase);
115
+ }
116
+ working.resize(size);
117
+ }
103
118
 
104
- size_t factor = factors[factorIndex];
105
- if (factorIndex + 1 < factors.size()) {
106
- if (factors[factorIndex] == 2 && factors[factorIndex + 1] == 2) {
107
- ++factorIndex;
108
- factor = 4;
109
- }
110
- }
119
+ void fft(const Complex *time, Complex *freq) {
120
+ size_t size = working.size();
121
+ if (size <= 1) {
122
+ *freq = *time;
123
+ return;
124
+ }
125
+ fftPass<false>(size, 1, time, freq, working.data());
126
+ }
111
127
 
112
- size_t subLength = length/factor;
113
- Step mainStep{StepType::generic, factor, start, subLength, repeats, twiddleVector.size()};
128
+ void ifft(const Complex *freq, Complex *time) {
129
+ size_t size = working.size();
130
+ if (size <= 1) {
131
+ *time = *freq;
132
+ return;
133
+ }
134
+ fftPass<true>(size, 1, freq, time, working.data());
135
+ }
114
136
 
115
- if (factor == 2) mainStep.type = StepType::step2;
116
- if (factor == 3) mainStep.type = StepType::step3;
117
- if (factor == 4) mainStep.type = StepType::step4;
137
+ void fft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
138
+ size_t size = working.size();
139
+ if (size <= 1) {
140
+ *outR = *inR;
141
+ *outI = *inI;
142
+ return;
143
+ }
144
+ Sample *workingR = (Sample *)working.data(), *workingI = workingR + size;
145
+ fftPass<false>(size, 1, inR, inI, outR, outI, workingR, workingI);
146
+ }
147
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
148
+ size_t size = working.size();
149
+ if (size <= 1) {
150
+ *outR = *inR;
151
+ *outI = *inI;
152
+ return;
153
+ }
154
+ Sample *workingR = (Sample *)working.data(), *workingI = workingR + size;
155
+ fftPass<true>(size, 1, inR, inI, outR, outI, workingR, workingI);
156
+ }
157
+ private:
158
+ std::vector<Complex> twiddles;
159
+ std::vector<Complex> working;
118
160
 
119
- // Twiddles
120
- bool foundStep = false;
121
- for (const Step &existingStep : plan) {
122
- if (existingStep.factor == mainStep.factor && existingStep.innerRepeats == mainStep.innerRepeats) {
123
- foundStep = true;
124
- mainStep.twiddleIndex = existingStep.twiddleIndex;
125
- break;
126
- }
127
- }
128
- if (!foundStep) {
129
- for (size_t i = 0; i < subLength; ++i) {
130
- for (size_t f = 0; f < factor; ++f) {
131
- double phase = 2*audioapi::PI*i*f/length;
132
- complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))};
133
- twiddleVector.push_back(twiddle);
134
- }
135
- }
136
- }
161
+ template<bool conjB>
162
+ static Complex mul(const Complex &a, const Complex &b) {
163
+ return conjB ? Complex{
164
+ a.real()*b.real() + a.imag()*b.imag(),
165
+ a.imag()*b.real() - a.real()*b.imag()
166
+ } : Complex{
167
+ a.real()*b.real() - a.imag()*b.imag(),
168
+ a.imag()*b.real() + a.real()*b.imag()
169
+ };
170
+ }
137
171
 
138
- if (repeats == 1 && sizeof(complex)*subLength > 65536) {
139
- for (size_t i = 0; i < factor; ++i) {
140
- addPlanSteps(factorIndex + 1, start + i*subLength, subLength, 1);
141
- }
142
- } else {
143
- addPlanSteps(factorIndex + 1, start, subLength, repeats*factor);
144
- }
145
- plan.push_back(mainStep);
146
- }
147
- void setPlan() {
148
- factors.resize(0);
149
- size_t size = _size, factor = 2;
150
- while (size > 1) {
151
- if (size%factor == 0) {
152
- factors.push_back(factor);
153
- size /= factor;
154
- } else if (factor > sqrt(size)) {
155
- factor = size;
156
- } else {
157
- ++factor;
158
- }
159
- }
160
-
161
- plan.resize(0);
162
- twiddleVector.resize(0);
163
- addPlanSteps(0, 0, _size, 1);
164
- twiddleVector.shrink_to_fit();
165
-
166
- permutation.resize(0);
167
- permutation.reserve(_size);
168
- permutation.push_back(PermutationPair{0, 0});
169
- size_t indexLow = 0, indexHigh = factors.size();
170
- size_t inputStepLow = _size, outputStepLow = 1;
171
- size_t inputStepHigh = 1, outputStepHigh = _size;
172
- while (outputStepLow*inputStepHigh < _size) {
173
- size_t f, inputStep, outputStep;
174
- if (outputStepLow <= inputStepHigh) {
175
- f = factors[indexLow++];
176
- inputStep = (inputStepLow /= f);
177
- outputStep = outputStepLow;
178
- outputStepLow *= f;
179
- } else {
180
- f = factors[--indexHigh];
181
- inputStep = inputStepHigh;
182
- inputStepHigh *= f;
183
- outputStep = (outputStepHigh /= f);
184
- }
185
- size_t oldSize = permutation.size();
186
- for (size_t i = 1; i < f; ++i) {
187
- for (size_t j = 0; j < oldSize; ++j) {
188
- PermutationPair pair = permutation[j];
189
- pair.from += i*inputStep;
190
- pair.to += i*outputStep;
191
- permutation.push_back(pair);
192
- }
193
- }
172
+ // Calculate a [size]-point FFT, where each element is a block of [stride] values
173
+ template<bool inverse>
174
+ void fftPass(size_t size, size_t stride, const Complex *input, Complex *output, Complex *working) {
175
+ if (size/4 > 1) {
176
+ // Calculate four quarter-size FFTs
177
+ fftPass<inverse>(size/4, stride*4, input, working, output);
178
+ combine4<inverse>(size, stride, working, output);
179
+ } else if (size == 4) {
180
+ combine4<inverse>(4, stride, input, output);
181
+ } else {
182
+ // 2-point FFT
183
+ for (size_t s = 0; s < stride; ++s) {
184
+ Complex a = input[s];
185
+ Complex b = input[s + stride];
186
+ output[s] = a + b;
187
+ output[s + stride] = a - b;
194
188
  }
195
189
  }
190
+ }
196
191
 
197
- template<bool inverse, typename RandomAccessIterator>
198
- void fftStepGeneric(RandomAccessIterator &&origData, const Step &step) {
199
- complex *working = workingVector.data();
200
- const size_t stride = step.innerRepeats;
192
+ // Combine interleaved results into a single spectrum
193
+ template<bool inverse>
194
+ void combine4(size_t size, size_t stride, const Complex *input, Complex *output) const {
195
+ auto twiddleStep = working.size()/size;
196
+ for (size_t i = 0; i < size/4; ++i) {
197
+ Complex twiddleB = twiddles[i*twiddleStep];
198
+ Complex twiddleC = twiddles[i*2*twiddleStep];
199
+ Complex twiddleD = twiddles[i*3*twiddleStep];
201
200
 
202
- for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
203
- RandomAccessIterator data = origData;
201
+ const Complex *inputA = input + 4*i*stride;
202
+ const Complex *inputB = input + (4*i + 1)*stride;
203
+ const Complex *inputC = input + (4*i + 2)*stride;
204
+ const Complex *inputD = input + (4*i + 3)*stride;
205
+ Complex *outputA = output + i*stride;
206
+ Complex *outputB = output + (i + size/4)*stride;
207
+ Complex *outputC = output + (i + size/4*2)*stride;
208
+ Complex *outputD = output + (i + size/4*3)*stride;
209
+ for (size_t s = 0; s < stride; ++s) {
210
+ Complex a = inputA[s];
211
+ Complex b = mul<inverse>(inputB[s], twiddleB);
212
+ Complex c = mul<inverse>(inputC[s], twiddleC);
213
+ Complex d = mul<inverse>(inputD[s], twiddleD);
214
+ Complex ac0 = a + c, ac1 = a - c;
215
+ Complex bd0 = b + d, bd1 = inverse ? (b - d) : (d - b);
216
+ Complex bd1i = {-bd1.imag(), bd1.real()};
217
+ outputA[s] = ac0 + bd0;
218
+ outputB[s] = ac1 + bd1i;
219
+ outputC[s] = ac0 - bd0;
220
+ outputD[s] = ac1 - bd1i;
221
+ }
222
+ }
223
+ }
204
224
 
205
- const complex *twiddles = twiddleVector.data() + step.twiddleIndex;
206
- const size_t factor = step.factor;
207
- for (size_t repeat = 0; repeat < step.innerRepeats; ++repeat) {
208
- for (size_t i = 0; i < step.factor; ++i) {
209
- working[i] = _fft_impl::complexMul<inverse>(data[i*stride], twiddles[i]);
210
- }
211
- for (size_t f = 0; f < factor; ++f) {
212
- complex sum = working[0];
213
- for (size_t i = 1; i < factor; ++i) {
214
- double phase = 2*audioapi::PI*f*i/factor;
215
- complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))};
216
- sum += _fft_impl::complexMul<inverse>(working[i], twiddle);
217
- }
218
- data[f*stride] = sum;
219
- }
220
- ++data;
221
- twiddles += factor;
222
- }
223
- origData += step.factor*step.innerRepeats;
225
+ // The same thing, but translated for split-complex input/output
226
+ template<bool inverse>
227
+ void fftPass(size_t size, size_t stride, const Sample *inputR, const Sample *inputI, Sample *outputR, Sample *outputI, Sample *workingR, Sample *workingI) const {
228
+ if (size/4 > 1) {
229
+ // Calculate four quarter-size FFTs
230
+ fftPass<inverse>(size/4, stride*4, inputR, inputI, workingR, workingI, outputR, outputI);
231
+ combine4<inverse>(size, stride, workingR, workingI, outputR, outputI);
232
+ } else if (size == 4) {
233
+ combine4<inverse>(4, stride, inputR, inputI, outputR, outputI);
234
+ } else {
235
+ // 2-point FFT
236
+ for (size_t s = 0; s < stride; ++s) {
237
+ Sample ar = inputR[s], ai = inputI[s];
238
+ Sample br = inputR[s + stride], bi = inputI[s + stride];
239
+ outputR[s] = ar + br;
240
+ outputI[s] = ai + bi;
241
+ outputR[s + stride] = ar - br;
242
+ outputI[s + stride] = ai - bi;
224
243
  }
225
244
  }
245
+ }
226
246
 
227
- template<bool inverse, typename RandomAccessIterator>
228
- SIGNALSMITH_INLINE void fftStep2(RandomAccessIterator &&origData, const Step &step) {
229
- const size_t stride = step.innerRepeats;
230
- const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
231
- for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
232
- const complex* twiddles = origTwiddles;
233
- for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
234
- complex A = data[0];
235
- complex B = _fft_impl::complexMul<inverse>(data[stride], twiddles[1]);
247
+ // Combine interleaved results into a single spectrum
248
+ template<bool inverse>
249
+ void combine4(size_t size, size_t stride, const Sample *inputR, const Sample *inputI, Sample *outputR, Sample *outputI) const {
250
+ auto twiddleStep = working.size()/size;
251
+ for (size_t i = 0; i < size/4; ++i) {
252
+ Complex twiddleB = twiddles[i*twiddleStep];
253
+ Complex twiddleC = twiddles[i*2*twiddleStep];
254
+ Complex twiddleD = twiddles[i*3*twiddleStep];
236
255
 
237
- data[0] = A + B;
238
- data[stride] = A - B;
239
- twiddles += 2;
240
- }
241
- origData += 2*stride;
256
+ const Sample *inputAr = inputR + 4*i*stride, *inputAi = inputI + 4*i*stride;
257
+ const Sample *inputBr = inputR + (4*i + 1)*stride, *inputBi = inputI + (4*i + 1)*stride;
258
+ const Sample *inputCr = inputR + (4*i + 2)*stride, *inputCi = inputI + (4*i + 2)*stride;
259
+ const Sample *inputDr = inputR + (4*i + 3)*stride, *inputDi = inputI + (4*i + 3)*stride;
260
+ Sample *outputAr = outputR + i*stride, *outputAi = outputI + i*stride;
261
+ Sample *outputBr = outputR + (i + size/4)*stride, *outputBi = outputI + (i + size/4)*stride;
262
+ Sample *outputCr = outputR + (i + size/4*2)*stride, *outputCi = outputI + (i + size/4*2)*stride;
263
+ Sample *outputDr = outputR + (i + size/4*3)*stride, *outputDi = outputI + (i + size/4*3)*stride;
264
+ for (size_t s = 0; s < stride; ++s) {
265
+ Complex a = {inputAr[s], inputAi[s]};
266
+ Complex b = mul<inverse>({inputBr[s], inputBi[s]}, twiddleB);
267
+ Complex c = mul<inverse>({inputCr[s], inputCi[s]}, twiddleC);
268
+ Complex d = mul<inverse>({inputDr[s], inputDi[s]}, twiddleD);
269
+ Complex ac0 = a + c, ac1 = a - c;
270
+ Complex bd0 = b + d, bd1 = inverse ? (b - d) : (d - b);
271
+ Complex bd1i = {-bd1.imag(), bd1.real()};
272
+ outputAr[s] = ac0.real() + bd0.real();
273
+ outputAi[s] = ac0.imag() + bd0.imag();
274
+ outputBr[s] = ac1.real() + bd1i.real();
275
+ outputBi[s] = ac1.imag() + bd1i.imag();
276
+ outputCr[s] = ac0.real() - bd0.real();
277
+ outputCi[s] = ac0.imag() - bd0.imag();
278
+ outputDr[s] = ac1.real() - bd1i.real();
279
+ outputDi[s] = ac1.imag() - bd1i.imag();
242
280
  }
243
281
  }
282
+ }
283
+ };
244
284
 
245
- template<bool inverse, typename RandomAccessIterator>
246
- SIGNALSMITH_INLINE void fftStep3(RandomAccessIterator &&origData, const Step &step) {
247
- constexpr complex factor3 = {-0.5, inverse ? 0.8660254037844386 : -0.8660254037844386};
248
- const size_t stride = step.innerRepeats;
249
- const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
285
+ // Wraps a complex FFT into a real one
286
+ template<typename Sample, class ComplexFFT=SimpleFFT<Sample>>
287
+ struct SimpleRealFFT {
288
+ using Complex = std::complex<Sample>;
250
289
 
251
- for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
252
- const complex* twiddles = origTwiddles;
253
- for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
254
- complex A = data[0];
255
- complex B = _fft_impl::complexMul<inverse>(data[stride], twiddles[1]);
256
- complex C = _fft_impl::complexMul<inverse>(data[stride*2], twiddles[2]);
290
+ SimpleRealFFT(size_t size=0) {
291
+ resize(size);
292
+ }
257
293
 
258
- complex realSum = A + (B + C)*factor3.real();
259
- complex imagSum = (B - C)*factor3.imag();
294
+ void resize(size_t size) {
295
+ complexFft.resize(size);
296
+ tmpTime.resize(size);
297
+ tmpFreq.resize(size);
298
+ }
260
299
 
261
- data[0] = A + B + C;
262
- data[stride] = _fft_impl::complexAddI<false>(realSum, imagSum);
263
- data[stride*2] = _fft_impl::complexAddI<true>(realSum, imagSum);
300
+ void fft(const Sample *time, Complex *freq) {
301
+ for (size_t i = 0; i < tmpTime.size(); ++i) {
302
+ tmpTime[i] = time[i];
303
+ }
304
+ complexFft.fft(tmpTime.data(), tmpFreq.data());
305
+ for (size_t i = 0; i < tmpFreq.size()/2; ++i) {
306
+ freq[i] = tmpFreq[i];
307
+ }
308
+ freq[0] = {
309
+ tmpFreq[0].real(),
310
+ tmpFreq[tmpFreq.size()/2].real()
311
+ };
312
+ }
313
+ void fft(const Sample *inR, Sample *outR, Sample *outI) {
314
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + tmpFreq.size();
315
+ for (size_t i = 0; i < tmpTime.size()/2; ++i) {
316
+ tmpTime[i] = 0;
317
+ }
318
+ complexFft.fft(inR, (const Sample *)tmpTime.data(), tmpFreqR, tmpFreqI);
319
+ for (size_t i = 0; i < tmpTime.size()/2; ++i) {
320
+ outR[i] = tmpFreqR[i];
321
+ outI[i] = tmpFreqI[i];
322
+ }
323
+ outI[0] = tmpFreqR[tmpFreq.size()/2];
324
+ }
264
325
 
265
- twiddles += 3;
266
- }
267
- origData += 3*stride;
268
- }
326
+ void ifft(const Complex *freq, Sample *time) {
327
+ tmpFreq[0] = freq[0].real();
328
+ tmpFreq[tmpFreq.size()/2] = freq[0].imag();
329
+ for (size_t i = 1; i < tmpFreq.size()/2; ++i) {
330
+ tmpFreq[i] = freq[i];
331
+ tmpFreq[tmpFreq.size() - i] = std::conj(freq[i]);
332
+ }
333
+ complexFft.ifft(tmpFreq.data(), tmpTime.data());
334
+ for (size_t i = 0; i < tmpTime.size(); ++i) {
335
+ time[i] = tmpTime[i].real();
336
+ }
337
+ }
338
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR) {
339
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + tmpFreq.size();
340
+ tmpFreqR[0] = inR[0];
341
+ tmpFreqR[tmpFreq.size()/2] = inI[0];
342
+ tmpFreqI[0] = 0;
343
+ tmpFreqI[tmpFreq.size()/2] = 0;
344
+ for (size_t i = 1; i < tmpFreq.size()/2; ++i) {
345
+ tmpFreqR[i] = inR[i];
346
+ tmpFreqI[i] = inI[i];
347
+ tmpFreqR[tmpFreq.size() - i] = inR[i];
348
+ tmpFreqI[tmpFreq.size() - i] = -inI[i];
269
349
  }
350
+ complexFft.ifft(tmpFreqR, tmpFreqI, outR, (Sample *)tmpTime.data());
351
+ }
270
352
 
271
- template<bool inverse, typename RandomAccessIterator>
272
- SIGNALSMITH_INLINE void fftStep4(RandomAccessIterator &&origData, const Step &step) {
273
- const size_t stride = step.innerRepeats;
274
- const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
353
+ private:
354
+ ComplexFFT complexFft;
355
+ std::vector<Complex> tmpTime, tmpFreq;
356
+ };
275
357
 
276
- for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
277
- const complex* twiddles = origTwiddles;
278
- for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
279
- complex A = data[0];
280
- complex C = _fft_impl::complexMul<inverse>(data[stride], twiddles[2]);
281
- complex B = _fft_impl::complexMul<inverse>(data[stride*2], twiddles[1]);
282
- complex D = _fft_impl::complexMul<inverse>(data[stride*3], twiddles[3]);
358
+ /// A power-of-2 only FFT, specialised with platform-specific fast implementations where available
359
+ template<typename Sample>
360
+ struct Pow2FFT {
361
+ static constexpr bool prefersSplit = true; // whether this FFT implementation is faster when given split-complex inputs
362
+ using Complex = std::complex<Sample>;
283
363
 
284
- complex sumAC = A + C, sumBD = B + D;
285
- complex diffAC = A - C, diffBD = B - D;
364
+ Pow2FFT(size_t size=0) {
365
+ resize(size);
366
+ }
286
367
 
287
- data[0] = sumAC + sumBD;
288
- data[stride] = _fft_impl::complexAddI<!inverse>(diffAC, diffBD);
289
- data[stride*2] = sumAC - sumBD;
290
- data[stride*3] = _fft_impl::complexAddI<inverse>(diffAC, diffBD);
368
+ void resize(size_t size) {
369
+ simpleFFT.resize(size);
370
+ tmp.resize(size);
371
+ }
291
372
 
292
- twiddles += 4;
293
- }
294
- origData += 4*stride;
295
- }
373
+ void fft(const Complex *time, Complex *freq) {
374
+ simpleFFT.fft(time, freq);
375
+ }
376
+ void fft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
377
+ simpleFFT.fft(inR, inI, outR, outI);
378
+ }
379
+
380
+ void ifft(const Complex *freq, Complex *time) {
381
+ simpleFFT.ifft(freq, time);
382
+ }
383
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
384
+ simpleFFT.ifft(inR, inI, outR, outI);
385
+ }
386
+
387
+ private:
388
+ std::vector<Complex> tmp;
389
+ SimpleFFT<Sample> simpleFFT;
390
+ };
391
+
392
+ /// A power-of-2 only Real FFT, specialised with platform-specific fast implementations where available
393
+ template<typename Sample>
394
+ struct Pow2RealFFT : public SimpleRealFFT<Sample, Pow2FFT<Sample>> {
395
+ static constexpr bool prefersSplit = Pow2FFT<Sample>::prefersSplit;
396
+
397
+ using SimpleRealFFT<Sample, Pow2FFT<Sample>>::SimpleRealFFT;
398
+ };
399
+
400
+ /// An FFT which can handle multiples of 3 and 5, and can be computed in chunks
401
+ template<typename Sample, bool splitComputation=false>
402
+ struct SplitFFT {
403
+ using Complex = std::complex<Sample>;
404
+ static constexpr bool prefersSplit = Pow2FFT<Sample>::prefersSplit;
405
+
406
+ static constexpr size_t maxSplit = splitComputation ? 4 : 1;
407
+ static constexpr size_t minInnerSize = 32;
408
+
409
+ static size_t fastSizeAbove(size_t size) {
410
+ size_t pow2 = 1;
411
+ while (pow2 < 16 && pow2 < size) pow2 *= 2;
412
+ while (pow2*8 < size) pow2 *= 2;
413
+ size_t multiple = (size + pow2 - 1)/pow2; // will be 1-8
414
+ if (multiple == 7) ++multiple;
415
+ return multiple*pow2;
416
+ }
417
+
418
+ SplitFFT(size_t size=0) {
419
+ resize(size);
420
+ }
421
+
422
+ void resize(size_t size) {
423
+ innerSize = 1;
424
+ outerSize = size;
425
+
426
+ dftTmp.resize(0);
427
+ dftTwists.resize(0);
428
+ plan.resize(0);
429
+ if (!size) return;
430
+
431
+ // Inner size = largest power of 2 such that either the inner size >= minInnerSize, or we have the target number of splits
432
+ while (!(outerSize&1) && (outerSize > maxSplit || innerSize < minInnerSize)) {
433
+ innerSize *= 2;
434
+ outerSize /= 2;
296
435
  }
436
+ tmpFreq.resize(size);
437
+ innerFFT.resize(innerSize);
297
438
 
298
- template<typename InputIterator, typename OutputIterator>
299
- void permute(InputIterator input, OutputIterator data) {
300
- for (auto pair : permutation) {
301
- data[pair.from] = input[pair.to];
439
+ outerTwiddles.resize(innerSize*(outerSize - 1));
440
+ outerTwiddlesR.resize(innerSize*(outerSize - 1));
441
+ outerTwiddlesI.resize(innerSize*(outerSize - 1));
442
+ for (size_t i = 0; i < innerSize; ++i) {
443
+ for (size_t s = 1; s < outerSize; ++s) {
444
+ Sample twiddlePhase = Sample(-2*M_PI*i/innerSize*s/outerSize);
445
+ outerTwiddles[i + (s - 1)*innerSize] = std::polar(Sample(1), twiddlePhase);
302
446
  }
303
447
  }
448
+ for (size_t i = 0; i < outerTwiddles.size(); ++i) {
449
+ outerTwiddlesR[i] = outerTwiddles[i].real();
450
+ outerTwiddlesI[i] = outerTwiddles[i].imag();
451
+ }
452
+
453
+
454
+ StepType interleaveStep = StepType::interleaveOrderN;
455
+ StepType finalStep = StepType::finalOrderN;
456
+ if (outerSize == 2) {
457
+ interleaveStep = StepType::interleaveOrder2;
458
+ finalStep = StepType::finalOrder2;
459
+ }
460
+ if (outerSize == 3) {
461
+ interleaveStep = StepType::interleaveOrder3;
462
+ finalStep = StepType::finalOrder3;
463
+ }
464
+ if (outerSize == 4) {
465
+ interleaveStep = StepType::interleaveOrder4;
466
+ finalStep = StepType::finalOrder4;
467
+ }
468
+ if (outerSize == 5) {
469
+ interleaveStep = StepType::interleaveOrder5;
470
+ finalStep = StepType::finalOrder5;
471
+ }
304
472
 
305
- template<bool inverse, typename InputIterator, typename OutputIterator>
306
- void run(InputIterator &&input, OutputIterator &&data) {
307
- permute(input, data);
473
+ if (outerSize <= 1) {
474
+ if (size > 0) plan.push_back(Step{StepType::passthrough, 0});
475
+ } else {
476
+ plan.push_back({interleaveStep, 0});
477
+ plan.push_back({StepType::firstFFT, 0});
478
+ for (size_t s = 1; s < outerSize; ++s) {
479
+ plan.push_back({StepType::middleFFT, s*innerSize});
480
+ }
481
+ plan.push_back({StepType::twiddles, 0});
482
+ plan.push_back({finalStep, 0});
308
483
 
309
- for (const Step &step : plan) {
310
- switch (step.type) {
311
- case StepType::generic:
312
- fftStepGeneric<inverse>(data + step.startIndex, step);
313
- break;
314
- case StepType::step2:
315
- fftStep2<inverse>(data + step.startIndex, step);
316
- break;
317
- case StepType::step3:
318
- fftStep3<inverse>(data + step.startIndex, step);
319
- break;
320
- case StepType::step4:
321
- fftStep4<inverse>(data + step.startIndex, step);
322
- break;
484
+ if (finalStep == StepType::finalOrderN) {
485
+ dftTmp.resize(outerSize);
486
+ dftTwists.resize(outerSize);
487
+ for (size_t s = 0; s < outerSize; ++s) {
488
+ Sample dftPhase = Sample(-2*M_PI*s/outerSize);
489
+ dftTwists[s] = std::polar(Sample(1), dftPhase);
323
490
  }
324
491
  }
325
492
  }
493
+ }
326
494
 
327
- static bool validSize(size_t size) {
328
- constexpr static bool filter[32] = {
329
- 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, // 0-9
330
- 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, // 10-19
331
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, // 20-29
332
- 0, 0
333
- };
334
- return filter[size];
495
+ size_t size() const {
496
+ return innerSize*outerSize;
497
+ }
498
+ size_t steps() const {
499
+ return plan.size();
500
+ }
501
+
502
+ void fft(const Complex *time, Complex *freq) {
503
+ for (auto &step : plan) {
504
+ fftStep<false>(step, time, freq);
505
+ }
506
+ }
507
+ void fft(size_t step, const Complex *time, Complex *freq) {
508
+ fftStep<false>(plan[step], time, freq);
509
+ }
510
+ void fft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
511
+ for (auto &step : plan) {
512
+ fftStep<false>(step, inR, inI, outR, outI);
513
+ }
514
+ }
515
+ void fft(size_t step, const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
516
+ fftStep<false>(plan[step], inR, inI, outR, outI);
517
+ }
518
+
519
+ void ifft(const Complex *freq, Complex *time) {
520
+ for (auto &step : plan) {
521
+ fftStep<true>(step, freq, time);
335
522
  }
336
- public:
337
- static size_t fastSizeAbove(size_t size) {
338
- size_t power2 = 1;
339
- while (size >= 32) {
340
- size = (size - 1)/2 + 1;
341
- power2 *= 2;
523
+ }
524
+ void ifft(size_t step, const Complex *freq, Complex *time) {
525
+ fftStep<true>(plan[step], freq, time);
526
+ }
527
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
528
+ for (auto &step : plan) {
529
+ fftStep<true>(step, inR, inI, outR, outI);
530
+ }
531
+ }
532
+ void ifft(size_t step, const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
533
+ fftStep<true>(plan[step], inR, inI, outR, outI);
534
+ }
535
+ private:
536
+ using InnerFFT = Pow2FFT<Sample>;
537
+ InnerFFT innerFFT;
538
+
539
+ size_t innerSize, outerSize;
540
+ std::vector<Complex> tmpFreq;
541
+ std::vector<Complex> outerTwiddles;
542
+ std::vector<Sample> outerTwiddlesR, outerTwiddlesI;
543
+ std::vector<Complex> dftTwists, dftTmp;
544
+
545
+ enum class StepType {
546
+ passthrough,
547
+ interleaveOrder2, interleaveOrder3, interleaveOrder4, interleaveOrder5, interleaveOrderN,
548
+ firstFFT, middleFFT,
549
+ twiddles,
550
+ finalOrder2, finalOrder3, finalOrder4, finalOrder5, finalOrderN
551
+ };
552
+ struct Step {
553
+ StepType type;
554
+ size_t offset;
555
+ };
556
+ std::vector<Step> plan;
557
+
558
+ template<bool inverse>
559
+ void fftStep(Step step, const Complex *time, Complex *freq) {
560
+ switch (step.type) {
561
+ case (StepType::passthrough): {
562
+ if (inverse) {
563
+ innerFFT.ifft(time, freq);
564
+ } else {
565
+ innerFFT.fft(time, freq);
566
+ }
567
+ break;
568
+ }
569
+ case (StepType::interleaveOrder2): {
570
+ _impl::interleaveCopy<2>(time, tmpFreq.data(), innerSize);
571
+ break;
572
+ }
573
+ case (StepType::interleaveOrder3): {
574
+ _impl::interleaveCopy<3>(time, tmpFreq.data(), innerSize);
575
+ break;
342
576
  }
343
- while (size < 32 && !validSize(size)) {
344
- ++size;
577
+ case (StepType::interleaveOrder4): {
578
+ _impl::interleaveCopy<4>(time, tmpFreq.data(), innerSize);
579
+ break;
345
580
  }
346
- return power2*size;
581
+ case (StepType::interleaveOrder5): {
582
+ _impl::interleaveCopy<5>(time, tmpFreq.data(), innerSize);
583
+ break;
584
+ }
585
+ case (StepType::interleaveOrderN): {
586
+ _impl::interleaveCopy(time, tmpFreq.data(), outerSize, innerSize);
587
+ break;
588
+ }
589
+ case (StepType::firstFFT): {
590
+ if (inverse) {
591
+ innerFFT.ifft(tmpFreq.data(), freq);
592
+ } else {
593
+ innerFFT.fft(tmpFreq.data(), freq);
594
+ }
595
+ break;
596
+ }
597
+ case (StepType::middleFFT): {
598
+ Complex *offsetOut = freq + step.offset;
599
+ if (inverse) {
600
+ innerFFT.ifft(tmpFreq.data() + step.offset, offsetOut);
601
+ } else {
602
+ innerFFT.fft(tmpFreq.data() + step.offset, offsetOut);
603
+ }
604
+ break;
605
+ }
606
+ case (StepType::twiddles): {
607
+ if (inverse) {
608
+ _impl::complexMulConj(freq + innerSize, freq + innerSize, outerTwiddles.data(), innerSize*(outerSize - 1));
609
+ } else {
610
+ _impl::complexMul(freq + innerSize, freq + innerSize, outerTwiddles.data(), innerSize*(outerSize - 1));
611
+ }
612
+ break;
613
+ }
614
+ case StepType::finalOrder2:
615
+ finalPass2(freq);
616
+ break;
617
+ case StepType::finalOrder3:
618
+ finalPass3<inverse>(freq);
619
+ break;
620
+ case StepType::finalOrder4:
621
+ finalPass4<inverse>(freq);
622
+ break;
623
+ case StepType::finalOrder5:
624
+ finalPass5<inverse>(freq);
625
+ break;
626
+ case StepType::finalOrderN:
627
+ finalPassN<inverse>(freq);
628
+ break;
347
629
  }
348
- static size_t fastSizeBelow(size_t size) {
349
- size_t power2 = 1;
350
- while (size >= 32) {
351
- size /= 2;
352
- power2 *= 2;
630
+ }
631
+ template<bool inverse>
632
+ void fftStep(Step step, const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
633
+ Sample *tmpR = (Sample *)tmpFreq.data(), *tmpI = tmpR + tmpFreq.size();
634
+ switch (step.type) {
635
+ case (StepType::passthrough): {
636
+ if (inverse) {
637
+ innerFFT.ifft(inR, inI, outR, outI);
638
+ } else {
639
+ innerFFT.fft(inR, inI, outR, outI);
640
+ }
641
+ break;
642
+ }
643
+ case (StepType::interleaveOrder2): {
644
+ _impl::interleaveCopy<2>(inR, tmpR, innerSize);
645
+ _impl::interleaveCopy<2>(inI, tmpI, innerSize);
646
+ break;
647
+ }
648
+ case (StepType::interleaveOrder3): {
649
+ _impl::interleaveCopy<3>(inR, tmpR, innerSize);
650
+ _impl::interleaveCopy<3>(inI, tmpI, innerSize);
651
+ break;
652
+ }
653
+ case (StepType::interleaveOrder4): {
654
+ _impl::interleaveCopy<4>(inR, tmpR, innerSize);
655
+ _impl::interleaveCopy<4>(inI, tmpI, innerSize);
656
+ break;
657
+ }
658
+ case (StepType::interleaveOrder5): {
659
+ _impl::interleaveCopy<5>(inR, tmpR, innerSize);
660
+ _impl::interleaveCopy<5>(inI, tmpI, innerSize);
661
+ break;
353
662
  }
354
- while (size > 1 && !validSize(size)) {
355
- --size;
663
+ case (StepType::interleaveOrderN): {
664
+ _impl::interleaveCopy(inR, inI, tmpR, tmpI, outerSize, innerSize);
665
+ break;
356
666
  }
357
- return power2*size;
667
+ case (StepType::firstFFT): {
668
+ if (inverse) {
669
+ innerFFT.ifft(tmpR, tmpI, outR, outI);
670
+ } else {
671
+ innerFFT.fft(tmpR, tmpI, outR, outI);
672
+ }
673
+ break;
674
+ }
675
+ case (StepType::middleFFT): {
676
+ size_t offset = step.offset;
677
+ Sample *offsetOutR = outR + offset;
678
+ Sample *offsetOutI = outI + offset;
679
+ if (inverse) {
680
+ innerFFT.ifft(tmpR + offset, tmpI + offset, offsetOutR, offsetOutI);
681
+ } else {
682
+ innerFFT.fft(tmpR + offset, tmpI + offset, offsetOutR, offsetOutI);
683
+ }
684
+ break;
685
+ }
686
+ case(StepType::twiddles): {
687
+ auto *twiddlesR = outerTwiddlesR.data();
688
+ auto *twiddlesI = outerTwiddlesI.data();
689
+ if (inverse) {
690
+ _impl::complexMulConj(outR + innerSize, outI + innerSize, outR + innerSize, outI + innerSize, twiddlesR, twiddlesI, innerSize*(outerSize - 1));
691
+ } else {
692
+ _impl::complexMul(outR + innerSize, outI + innerSize, outR + innerSize, outI + innerSize, twiddlesR, twiddlesI, innerSize*(outerSize - 1));
693
+ }
694
+ break;
695
+ }
696
+ case StepType::finalOrder2:
697
+ finalPass2(outR, outI);
698
+ break;
699
+ case StepType::finalOrder3:
700
+ finalPass3<inverse>(outR, outI);
701
+ break;
702
+ case StepType::finalOrder4:
703
+ finalPass4<inverse>(outR, outI);
704
+ break;
705
+ case StepType::finalOrder5:
706
+ finalPass5<inverse>(outR, outI);
707
+ break;
708
+ case StepType::finalOrderN:
709
+ finalPassN<inverse>(outR, outI);
710
+ break;
358
711
  }
712
+ }
359
713
 
360
- FFT(size_t size, int fastDirection=0) : _size(0) {
361
- if (fastDirection > 0) size = fastSizeAbove(size);
362
- if (fastDirection < 0) size = fastSizeBelow(size);
363
- this->setSize(size);
714
+ void finalPass2(Complex *f0) {
715
+ auto *f1 = f0 + innerSize;
716
+ for (size_t i = 0; i < innerSize; ++i) {
717
+ Complex a = f0[i], b = f1[i];
718
+ f0[i] = a + b;
719
+ f1[i] = a - b;
364
720
  }
365
-
366
- size_t setSize(size_t size) {
367
- if (size != _size) {
368
- _size = size;
369
- workingVector.resize(size);
370
- setPlan();
371
- }
372
- return _size;
721
+ }
722
+ void finalPass2(Sample *f0r, Sample *f0i) {
723
+ auto *f1r = f0r + innerSize;
724
+ auto *f1i = f0i + innerSize;
725
+ for (size_t i = 0; i < innerSize; ++i) {
726
+ Sample ar = f0r[i], ai = f0i[i];
727
+ Sample br = f1r[i], bi = f1i[i];
728
+ f0r[i] = ar + br;
729
+ f0i[i] = ai + bi;
730
+ f1r[i] = ar - br;
731
+ f1i[i] = ai - bi;
373
732
  }
374
- size_t setFastSizeAbove(size_t size) {
375
- return setSize(fastSizeAbove(size));
733
+ }
734
+ template<bool inverse>
735
+ void finalPass3(Complex *f0) {
736
+ auto *f1 = f0 + innerSize;
737
+ auto *f2 = f0 + innerSize*2;
738
+ const Complex tw1{Sample(-0.5), Sample(-std::sqrt(0.75)*(inverse ? -1 : 1))};
739
+ for (size_t i = 0; i < innerSize; ++i) {
740
+ Complex a = f0[i], b = f1[i], c = f2[i];
741
+ Complex bc0 = b + c, bc1 = b - c;
742
+ f0[i] = a + bc0;
743
+ f1[i] = {
744
+ a.real() + bc0.real()*tw1.real() - bc1.imag()*tw1.imag(),
745
+ a.imag() + bc0.imag()*tw1.real() + bc1.real()*tw1.imag()
746
+ };
747
+ f2[i] = {
748
+ a.real() + bc0.real()*tw1.real() + bc1.imag()*tw1.imag(),
749
+ a.imag() + bc0.imag()*tw1.real() - bc1.real()*tw1.imag()
750
+ };
376
751
  }
377
- size_t setFastSizeBelow(size_t size) {
378
- return setSize(fastSizeBelow(size));
752
+ }
753
+ template<bool inverse>
754
+ void finalPass3(Sample *f0r, Sample *f0i) {
755
+ auto *f1r = f0r + innerSize;
756
+ auto *f1i = f0i + innerSize;
757
+ auto *f2r = f0r + innerSize*2;
758
+ auto *f2i = f0i + innerSize*2;
759
+ const Sample tw1r = -0.5, tw1i = -std::sqrt(0.75)*(inverse ? -1 : 1);
760
+
761
+ for (size_t i = 0; i < innerSize; ++i) {
762
+ Sample ar = f0r[i], ai = f0i[i], br = f1r[i], bi = f1i[i], cr = f2r[i], ci = f2i[i];
763
+
764
+ f0r[i] = ar + br + cr;
765
+ f0i[i] = ai + bi + ci;
766
+ f1r[i] = ar + br*tw1r - bi*tw1i + cr*tw1r + ci*tw1i;
767
+ f1i[i] = ai + bi*tw1r + br*tw1i - cr*tw1i + ci*tw1r;
768
+ f2r[i] = ar + br*tw1r + bi*tw1i + cr*tw1r - ci*tw1i;
769
+ f2i[i] = ai + bi*tw1r - br*tw1i + cr*tw1i + ci*tw1r;
379
770
  }
380
- const size_t & size() const {
381
- return _size;
771
+ }
772
+ template<bool inverse>
773
+ void finalPass4(Complex *f0) {
774
+ auto *f1 = f0 + innerSize;
775
+ auto *f2 = f0 + innerSize*2;
776
+ auto *f3 = f0 + innerSize*3;
777
+ for (size_t i = 0; i < innerSize; ++i) {
778
+ Complex a = f0[i], b = f1[i], c = f2[i], d = f3[i];
779
+
780
+ Complex ac0 = a + c, ac1 = a - c;
781
+ Complex bd0 = b + d, bd1 = inverse ? (b - d) : (d - b);
782
+ Complex bd1i = {-bd1.imag(), bd1.real()};
783
+ f0[i] = ac0 + bd0;
784
+ f1[i] = ac1 + bd1i;
785
+ f2[i] = ac0 - bd0;
786
+ f3[i] = ac1 - bd1i;
382
787
  }
788
+ }
789
+ template<bool inverse>
790
+ void finalPass4(Sample *f0r, Sample *f0i) {
791
+ auto *f1r = f0r + innerSize;
792
+ auto *f1i = f0i + innerSize;
793
+ auto *f2r = f0r + innerSize*2;
794
+ auto *f2i = f0i + innerSize*2;
795
+ auto *f3r = f0r + innerSize*3;
796
+ auto *f3i = f0i + innerSize*3;
797
+ for (size_t i = 0; i < innerSize; ++i) {
798
+ Sample ar = f0r[i], ai = f0i[i], br = f1r[i], bi = f1i[i], cr = f2r[i], ci = f2i[i], dr = f3r[i], di = f3i[i];
383
799
 
384
- template<typename InputIterator, typename OutputIterator>
385
- void fft(InputIterator &&input, OutputIterator &&output) {
386
- auto inputIter = _fft_impl::GetIterator<InputIterator>::get(input);
387
- auto outputIter = _fft_impl::GetIterator<OutputIterator>::get(output);
388
- return run<false>(inputIter, outputIter);
800
+ Sample ac0r = ar + cr, ac0i = ai + ci;
801
+ Sample ac1r = ar - cr, ac1i = ai - ci;
802
+ Sample bd0r = br + dr, bd0i = bi + di;
803
+ Sample bd1r = br - dr, bd1i = bi - di;
804
+
805
+ f0r[i] = ac0r + bd0r;
806
+ f0i[i] = ac0i + bd0i;
807
+ f1r[i] = inverse ? (ac1r - bd1i) : (ac1r + bd1i);
808
+ f1i[i] = inverse ? (ac1i + bd1r) : (ac1i - bd1r);
809
+ f2r[i] = ac0r - bd0r;
810
+ f2i[i] = ac0i - bd0i;
811
+ f3r[i] = inverse ? (ac1r + bd1i) : (ac1r - bd1i);
812
+ f3i[i] = inverse ? (ac1i - bd1r) : (ac1i + bd1r);
389
813
  }
814
+ }
815
+ template<bool inverse>
816
+ void finalPass5(Complex *f0) {
817
+ auto *f1 = f0 + innerSize;
818
+ auto *f2 = f0 + innerSize*2;
819
+ auto *f3 = f0 + innerSize*3;
820
+ auto *f4 = f0 + innerSize*4;
821
+ const Sample tw1r = 0.30901699437494745;
822
+ const Sample tw1i = -0.9510565162951535*(inverse ? -1 : 1);
823
+ const Sample tw2r = -0.8090169943749473;
824
+ const Sample tw2i = -0.5877852522924732*(inverse ? -1 : 1);
825
+ for (size_t i = 0; i < innerSize; ++i) {
826
+ Complex a = f0[i], b = f1[i], c = f2[i], d = f3[i], e = f4[i];
390
827
 
391
- template<typename InputIterator, typename OutputIterator>
392
- void ifft(InputIterator &&input, OutputIterator &&output) {
393
- auto inputIter = _fft_impl::GetIterator<InputIterator>::get(input);
394
- auto outputIter = _fft_impl::GetIterator<OutputIterator>::get(output);
395
- return run<true>(inputIter, outputIter);
828
+ Complex be0 = b + e, be1 = {e.imag() - b.imag(), b.real() - e.real()}; // (b - e)*i
829
+ Complex cd0 = c + d, cd1 = {d.imag() - c.imag(), c.real() - d.real()};
830
+
831
+ Complex bcde01 = be0*tw1r + cd0*tw2r;
832
+ Complex bcde02 = be0*tw2r + cd0*tw1r;
833
+ Complex bcde11 = be1*tw1i + cd1*tw2i;
834
+ Complex bcde12 = be1*tw2i - cd1*tw1i;
835
+
836
+ f0[i] = a + be0 + cd0;
837
+ f1[i] = a + bcde01 + bcde11;
838
+ f2[i] = a + bcde02 + bcde12;
839
+ f3[i] = a + bcde02 - bcde12;
840
+ f4[i] = a + bcde01 - bcde11;
396
841
  }
397
- };
842
+ }
843
+ template<bool inverse>
844
+ void finalPass5(Sample *f0r, Sample *f0i) {
845
+ auto *f1r = f0r + innerSize;
846
+ auto *f1i = f0i + innerSize;
847
+ auto *f2r = f0r + innerSize*2;
848
+ auto *f2i = f0i + innerSize*2;
849
+ auto *f3r = f0r + innerSize*3;
850
+ auto *f3i = f0i + innerSize*3;
851
+ auto *f4r = f0r + innerSize*4;
852
+ auto *f4i = f0i + innerSize*4;
398
853
 
399
- struct FFTOptions {
400
- static constexpr int halfFreqShift = 1;
401
- };
854
+ const Sample tw1r = 0.30901699437494745;
855
+ const Sample tw1i = -0.9510565162951535*(inverse ? -1 : 1);
856
+ const Sample tw2r = -0.8090169943749473;
857
+ const Sample tw2i = -0.5877852522924732*(inverse ? -1 : 1);
858
+ for (size_t i = 0; i < innerSize; ++i) {
859
+ Sample ar = f0r[i], ai = f0i[i], br = f1r[i], bi = f1i[i], cr = f2r[i], ci = f2i[i], dr = f3r[i], di = f3i[i], er = f4r[i], ei = f4i[i];
860
+
861
+ Sample be0r = br + er, be0i = bi + ei;
862
+ Sample be1r = ei - bi, be1i = br - er;
863
+ Sample cd0r = cr + dr, cd0i = ci + di;
864
+ Sample cd1r = di - ci, cd1i = cr - dr;
402
865
 
403
- template<typename V, int optionFlags=0>
404
- class RealFFT {
405
- static constexpr bool modified = (optionFlags&FFTOptions::halfFreqShift);
866
+ Sample bcde01r = be0r*tw1r + cd0r*tw2r, bcde01i = be0i*tw1r + cd0i*tw2r;
867
+ Sample bcde02r = be0r*tw2r + cd0r*tw1r, bcde02i = be0i*tw2r + cd0i*tw1r;
868
+ Sample bcde11r = be1r*tw1i + cd1r*tw2i, bcde11i = be1i*tw1i + cd1i*tw2i;
869
+ Sample bcde12r = be1r*tw2i - cd1r*tw1i, bcde12i = be1i*tw2i - cd1i*tw1i;
406
870
 
407
- using complex = std::complex<V>;
408
- std::vector<complex> complexBuffer1, complexBuffer2;
409
- std::vector<complex> twiddlesMinusI;
410
- std::vector<complex> modifiedRotations;
411
- FFT<V> complexFft;
412
- public:
413
- static size_t fastSizeAbove(size_t size) {
414
- return FFT<V>::fastSizeAbove((size + 1)/2)*2;
871
+ f0r[i] = ar + be0r + cd0r;
872
+ f0i[i] = ai + be0i + cd0i;
873
+ f1r[i] = ar + bcde01r + bcde11r;
874
+ f1i[i] = ai + bcde01i + bcde11i;
875
+ f2r[i] = ar + bcde02r + bcde12r;
876
+ f2i[i] = ai + bcde02i + bcde12i;
877
+ f3r[i] = ar + bcde02r - bcde12r;
878
+ f3i[i] = ai + bcde02i - bcde12i;
879
+ f4r[i] = ar + bcde01r - bcde11r;
880
+ f4i[i] = ai + bcde01i - bcde11i;
415
881
  }
416
- static size_t fastSizeBelow(size_t size) {
417
- return FFT<V>::fastSizeBelow(size/2)*2;
882
+ }
883
+
884
+ template<bool inverse>
885
+ void finalPassN(Complex *f0) {
886
+ for (size_t i = 0; i < innerSize; ++i) {
887
+ Complex *offsetFreq = f0 + i;
888
+ Complex sum = 0;
889
+ for (size_t i2 = 0; i2 < outerSize; ++i2) {
890
+ sum += (dftTmp[i2] = offsetFreq[i2*innerSize]);
891
+ }
892
+ offsetFreq[0] = sum;
893
+
894
+ for (size_t f = 1; f < outerSize; ++f) {
895
+ Complex sum = dftTmp[0];
896
+
897
+ for (size_t i2 = 1; i2 < outerSize; ++i2) {
898
+ size_t twistIndex = (i2*f)%outerSize;
899
+ Complex twist = inverse ? std::conj(dftTwists[twistIndex]) : dftTwists[twistIndex];
900
+ sum += Complex{
901
+ dftTmp[i2].real()*twist.real() - dftTmp[i2].imag()*twist.imag(),
902
+ dftTmp[i2].imag()*twist.real() + dftTmp[i2].real()*twist.imag()
903
+ };
904
+ }
905
+
906
+ offsetFreq[f*innerSize] = sum;
907
+ }
418
908
  }
909
+ }
910
+ template<bool inverse>
911
+ void finalPassN(Sample *f0r, Sample *f0i) {
912
+ Sample *tmpR = (Sample *)dftTmp.data(), *tmpI = tmpR + outerSize;
913
+
914
+ for (size_t i = 0; i < innerSize; ++i) {
915
+ Sample *offsetR = f0r + i;
916
+ Sample *offsetI = f0i + i;
917
+ Sample sumR = 0, sumI = 0;
918
+ for (size_t i2 = 0; i2 < outerSize; ++i2) {
919
+ sumR += (tmpR[i2] = offsetR[i2*innerSize]);
920
+ sumI += (tmpI[i2] = offsetI[i2*innerSize]);
921
+ }
922
+ offsetR[0] = sumR;
923
+ offsetI[0] = sumI;
924
+
925
+ for (size_t f = 1; f < outerSize; ++f) {
926
+ Sample sumR = *tmpR, sumI = *tmpI;
419
927
 
420
- RealFFT(size_t size=0, int fastDirection=0) : complexFft(0) {
421
- if (fastDirection > 0) size = fastSizeAbove(size);
422
- if (fastDirection < 0) size = fastSizeBelow(size);
423
- this->setSize(std::max<size_t>(size, 2));
928
+ for (size_t i2 = 1; i2 < outerSize; ++i2) {
929
+ size_t twistIndex = (i2*f)%outerSize;
930
+ Complex twist = inverse ? std::conj(dftTwists[twistIndex]) : dftTwists[twistIndex];
931
+ sumR += tmpR[i2]*twist.real() - tmpI[i2]*twist.imag();
932
+ sumI += tmpI[i2]*twist.real() + tmpR[i2]*twist.imag();
933
+ }
934
+
935
+ offsetR[f*innerSize] = sumR;
936
+ offsetI[f*innerSize] = sumI;
937
+ }
424
938
  }
939
+ }
940
+ };
941
+
942
+ template<typename Sample, bool splitComputation=false>
943
+ using FFT = SplitFFT<Sample, splitComputation>;
425
944
 
426
- size_t setSize(size_t size) {
427
- complexBuffer1.resize(size/2);
428
- complexBuffer2.resize(size/2);
945
+ /// A Real FFT which can handle multiples of 3 and 5, and can be computed in chunks
946
+ template<typename Sample, bool splitComputation=false, bool halfBinShift=false>
947
+ struct RealFFT {
948
+ using Complex = std::complex<Sample>;
949
+ static constexpr bool prefersSplit = SplitFFT<Sample, splitComputation>::prefersSplit;
429
950
 
430
- size_t hhSize = size/4 + 1;
431
- twiddlesMinusI.resize(hhSize);
432
- for (size_t i = 0; i < hhSize; ++i) {
433
- V rotPhase = -2*audioapi::PI*(modified ? i + 0.5 : i)/size;
434
- twiddlesMinusI[i] = {std::sin(rotPhase), -std::cos(rotPhase)};
951
+ static size_t fastSizeAbove(size_t size) {
952
+ return ComplexFFT::fastSizeAbove((size + 1)/2)*2;
953
+ }
954
+
955
+ RealFFT(size_t size=0) {
956
+ resize(size);
957
+ }
958
+
959
+ void resize(size_t size) {
960
+ size_t hSize = size/2;
961
+ complexFft.resize(hSize);
962
+ tmpFreq.resize(hSize);
963
+ tmpTime.resize(hSize);
964
+
965
+ twiddles.resize(hSize/2 + 1);
966
+
967
+ if (!halfBinShift) {
968
+ for (size_t i = 0; i < twiddles.size(); ++i) {
969
+ Sample rotPhase = i*(-2*M_PI/size) - M_PI/2; // bake rotation by (-i) into twiddles
970
+ twiddles[i] = std::polar(Sample(1), rotPhase);
435
971
  }
436
- if (modified) {
437
- modifiedRotations.resize(size/2);
438
- for (size_t i = 0; i < size/2; ++i) {
439
- V rotPhase = -2*audioapi::PI*i/size;
440
- modifiedRotations[i] = {std::cos(rotPhase), std::sin(rotPhase)};
441
- }
972
+ } else {
973
+ for (size_t i = 0; i < twiddles.size(); ++i) {
974
+ Sample rotPhase = (i + 0.5)*(-2*M_PI/size) - M_PI/2;
975
+ twiddles[i] = std::polar(Sample(1), rotPhase);
442
976
  }
443
977
 
444
- return complexFft.setSize(size/2);
445
- }
446
- size_t setFastSizeAbove(size_t size) {
447
- return setSize(fastSizeAbove(size));
448
- }
449
- size_t setFastSizeBelow(size_t size) {
450
- return setSize(fastSizeBelow(size));
451
- }
452
- size_t size() const {
453
- return complexFft.size()*2;
978
+ halfBinTwists.resize(hSize);
979
+ for (size_t i = 0; i < hSize; ++i) {
980
+ Sample twistPhase = -2*M_PI*i/size;
981
+ halfBinTwists[i] = std::polar(Sample(1), twistPhase);
982
+ }
454
983
  }
984
+ }
455
985
 
456
- template<typename InputIterator, typename OutputIterator>
457
- void fft(InputIterator &&input, OutputIterator &&output) {
986
+ size_t size() const {
987
+ return complexFft.size()*2;
988
+ }
989
+ size_t steps() const {
990
+ return complexFft.steps() + (splitComputation ? 3 : 2);
991
+ }
992
+
993
+ void fft(const Sample *time, Complex *freq) {
994
+ for (size_t s = 0; s < steps(); ++s) {
995
+ fft(s, time, freq);
996
+ }
997
+ }
998
+ void fft(size_t step, const Sample *time, Complex *freq) {
999
+ if (complexPrefersSplit) {
458
1000
  size_t hSize = complexFft.size();
459
- for (size_t i = 0; i < hSize; ++i) {
460
- if (modified) {
461
- complexBuffer1[i] = _fft_impl::complexMul<false>({input[2*i], input[2*i + 1]}, modifiedRotations[i]);
1001
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
1002
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
1003
+ if (step-- == 0) {
1004
+ size_t hSize = complexFft.size();
1005
+ if (halfBinShift) {
1006
+ for (size_t i = 0; i < hSize; ++i) {
1007
+ Sample tr = time[2*i], ti = time[2*i + 1];
1008
+ Complex twist = halfBinTwists[i];
1009
+ tmpTimeR[i] = tr*twist.real() - ti*twist.imag();
1010
+ tmpTimeI[i] = ti*twist.real() + tr*twist.imag();
1011
+ }
462
1012
  } else {
463
- complexBuffer1[i] = {input[2*i], input[2*i + 1]};
1013
+ for (size_t i = 0; i < hSize; ++i) {
1014
+ tmpTimeR[i] = time[2*i];
1015
+ tmpTimeI[i] = time[2*i + 1];
1016
+ }
1017
+ }
1018
+ } else if (step < complexFft.steps()) {
1019
+ complexFft.fft(step, tmpTimeR, tmpTimeI, tmpFreqR, tmpFreqI);
1020
+ } else {
1021
+ if (!halfBinShift) {
1022
+ Sample bin0r = tmpFreqR[0], bin0i = tmpFreqI[0];
1023
+ freq[0] = {bin0r + bin0i, bin0r - bin0i};
1024
+ }
1025
+
1026
+ size_t startI = halfBinShift ? 0 : 1;
1027
+ size_t endI = hSize/2 + 1;
1028
+ if (splitComputation) { // Do this last twiddle in two halves
1029
+ if (step == complexFft.steps()) {
1030
+ endI = (startI + endI)/2;
1031
+ } else {
1032
+ startI = (startI + endI)/2;
1033
+ }
1034
+ }
1035
+ for (size_t i = startI; i < endI; ++i) {
1036
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
1037
+ Complex twiddle = twiddles[i];
1038
+
1039
+ Sample oddR = (tmpFreqR[i] + tmpFreqR[conjI])*Sample(0.5);
1040
+ Sample oddI = (tmpFreqI[i] - tmpFreqI[conjI])*Sample(0.5);
1041
+ Sample evenIR = (tmpFreqR[i] - tmpFreqR[conjI])*Sample(0.5);
1042
+ Sample evenII = (tmpFreqI[i] + tmpFreqI[conjI])*Sample(0.5);
1043
+ Sample evenRotMinusIR = evenIR*twiddle.real() - evenII*twiddle.imag();
1044
+ Sample evenRotMinusII = evenII*twiddle.real() + evenIR*twiddle.imag();
1045
+
1046
+ freq[i] = {oddR + evenRotMinusIR, oddI + evenRotMinusII};
1047
+ freq[conjI] = {oddR - evenRotMinusIR, evenRotMinusII - oddI};
464
1048
  }
465
1049
  }
1050
+ } else {
1051
+ if (step-- == 0) {
1052
+ size_t hSize = complexFft.size();
1053
+ if (halfBinShift) {
1054
+ for (size_t i = 0; i < hSize; ++i) {
1055
+ Sample tr = time[2*i], ti = time[2*i + 1];
1056
+ Complex twist = halfBinTwists[i];
1057
+ tmpTime[i] = {
1058
+ tr*twist.real() - ti*twist.imag(),
1059
+ ti*twist.real() + tr*twist.imag()
1060
+ };
1061
+ }
1062
+ } else {
1063
+ for (size_t i = 0; i < hSize; ++i) {
1064
+ tmpTime[i] = {time[2*i], time[2*i + 1]};
1065
+ }
1066
+ }
1067
+ } else if (step < complexFft.steps()) {
1068
+ complexFft.fft(step, tmpTime.data(), tmpFreq.data());
1069
+ } else {
1070
+ if (!halfBinShift) {
1071
+ Complex bin0 = tmpFreq[0];
1072
+ freq[0] = { // pack DC & Nyquist together
1073
+ bin0.real() + bin0.imag(),
1074
+ bin0.real() - bin0.imag()
1075
+ };
1076
+ }
466
1077
 
467
- complexFft.fft(complexBuffer1.data(), complexBuffer2.data());
1078
+ size_t hSize = complexFft.size();
1079
+ size_t startI = halfBinShift ? 0 : 1;
1080
+ size_t endI = hSize/2 + 1;
1081
+ if (splitComputation) { // Do this last twiddle in two halves
1082
+ if (step == complexFft.steps()) {
1083
+ endI = (startI + endI)/2;
1084
+ } else {
1085
+ startI = (startI + endI)/2;
1086
+ }
1087
+ }
1088
+ for (size_t i = startI; i < endI; ++i) {
1089
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
1090
+ Complex twiddle = twiddles[i];
468
1091
 
469
- if (!modified) output[0] = {
470
- complexBuffer2[0].real() + complexBuffer2[0].imag(),
471
- complexBuffer2[0].real() - complexBuffer2[0].imag()
472
- };
473
- for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
474
- size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
1092
+ Complex odd = (tmpFreq[i] + std::conj(tmpFreq[conjI]))*Sample(0.5);
1093
+ Complex evenI = (tmpFreq[i] - std::conj(tmpFreq[conjI]))*Sample(0.5);
1094
+ Complex evenRotMinusI = { // twiddle includes a factor of -i
1095
+ evenI.real()*twiddle.real() - evenI.imag()*twiddle.imag(),
1096
+ evenI.imag()*twiddle.real() + evenI.real()*twiddle.imag()
1097
+ };
475
1098
 
476
- complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5;
477
- complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5;
478
- complex evenRotMinusI = _fft_impl::complexMul<false>(evenI, twiddlesMinusI[i]);
1099
+ freq[i] = odd + evenRotMinusI;
1100
+ freq[conjI] = {odd.real() - evenRotMinusI.real(), evenRotMinusI.imag() - odd.imag()};
1101
+ }
1102
+ }
1103
+ }
1104
+ }
1105
+ void fft(const Sample *inR, Sample *outR, Sample *outI) {
1106
+ for (size_t s = 0; s < steps(); ++s) {
1107
+ fft(s, inR, outR, outI);
1108
+ }
1109
+ }
1110
+ void fft(size_t step, const Sample *inR, Sample *outR, Sample *outI) {
1111
+ size_t hSize = complexFft.size();
1112
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
1113
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
1114
+ if (step-- == 0) {
1115
+ size_t hSize = complexFft.size();
1116
+ if (halfBinShift) {
1117
+ for (size_t i = 0; i < hSize; ++i) {
1118
+ Sample tr = inR[2*i], ti = inR[2*i + 1];
1119
+ Complex twist = halfBinTwists[i];
1120
+ tmpTimeR[i] = tr*twist.real() - ti*twist.imag();
1121
+ tmpTimeI[i] = ti*twist.real() + tr*twist.imag();
1122
+ }
1123
+ } else {
1124
+ for (size_t i = 0; i < hSize; ++i) {
1125
+ tmpTimeR[i] = inR[2*i];
1126
+ tmpTimeI[i] = inR[2*i + 1];
1127
+ }
1128
+ }
1129
+ } else if (step < complexFft.steps()) {
1130
+ complexFft.fft(step, tmpTimeR, tmpTimeI, tmpFreqR, tmpFreqI);
1131
+ } else {
1132
+ if (!halfBinShift) {
1133
+ Sample bin0r = tmpFreqR[0], bin0i = tmpFreqI[0];
1134
+ outR[0] = bin0r + bin0i;
1135
+ outI[0] = bin0r - bin0i;
1136
+ }
1137
+
1138
+ size_t startI = halfBinShift ? 0 : 1;
1139
+ size_t endI = hSize/2 + 1;
1140
+ if (splitComputation) { // Do this last twiddle in two halves
1141
+ if (step == complexFft.steps()) {
1142
+ endI = (startI + endI)/2;
1143
+ } else {
1144
+ startI = (startI + endI)/2;
1145
+ }
1146
+ }
1147
+ for (size_t i = startI; i < endI; ++i) {
1148
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
1149
+ Complex twiddle = twiddles[i];
1150
+
1151
+ Sample oddR = (tmpFreqR[i] + tmpFreqR[conjI])*Sample(0.5);
1152
+ Sample oddI = (tmpFreqI[i] - tmpFreqI[conjI])*Sample(0.5);
1153
+ Sample evenIR = (tmpFreqR[i] - tmpFreqR[conjI])*Sample(0.5);
1154
+ Sample evenII = (tmpFreqI[i] + tmpFreqI[conjI])*Sample(0.5);
1155
+ Sample evenRotMinusIR = evenIR*twiddle.real() - evenII*twiddle.imag();
1156
+ Sample evenRotMinusII = evenII*twiddle.real() + evenIR*twiddle.imag();
479
1157
 
480
- output[i] = odd + evenRotMinusI;
481
- output[conjI] = conj(odd - evenRotMinusI);
1158
+ outR[i] = oddR + evenRotMinusIR;
1159
+ outI[i] = oddI + evenRotMinusII;
1160
+ outR[conjI] = oddR - evenRotMinusIR;
1161
+ outI[conjI] = evenRotMinusII - oddI;
482
1162
  }
483
1163
  }
1164
+ }
484
1165
 
485
- template<typename InputIterator, typename OutputIterator>
486
- void ifft(InputIterator &&input, OutputIterator &&output) {
1166
+ void ifft(const Complex *freq, Sample *time) {
1167
+ for (size_t s = 0; s < steps(); ++s) {
1168
+ ifft(s, freq, time);
1169
+ }
1170
+ }
1171
+ void ifft(size_t step, const Complex *freq, Sample *time) {
1172
+ if (complexPrefersSplit) {
487
1173
  size_t hSize = complexFft.size();
488
- if (!modified) complexBuffer1[0] = {
489
- input[0].real() + input[0].imag(),
490
- input[0].real() - input[0].imag()
491
- };
492
- for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
493
- size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
494
- complex v = input[i], v2 = input[conjI];
1174
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
1175
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
1176
+
1177
+ bool splitFirst = splitComputation && (step-- == 0);
1178
+ if (splitFirst || step-- == 0) {
1179
+ Complex bin0 = freq[0];
1180
+ if (!halfBinShift) {
1181
+ tmpFreqR[0] = bin0.real() + bin0.imag();
1182
+ tmpFreqI[0] = bin0.real() - bin0.imag();
1183
+ }
1184
+ size_t startI = halfBinShift ? 0 : 1;
1185
+ size_t endI = hSize/2 + 1;
1186
+ if (splitComputation) { // Do this first twiddle in two halves
1187
+ if (splitFirst) {
1188
+ endI = (startI + endI)/2;
1189
+ } else {
1190
+ startI = (startI + endI)/2;
1191
+ }
1192
+ }
1193
+ for (size_t i = startI; i < endI; ++i) {
1194
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
1195
+ Complex twiddle = twiddles[i];
1196
+
1197
+ Complex odd = freq[i] + std::conj(freq[conjI]);
1198
+ Complex evenRotMinusI = freq[i] - std::conj(freq[conjI]);
1199
+ Complex evenI = { // Conjugate twiddle
1200
+ evenRotMinusI.real()*twiddle.real() + evenRotMinusI.imag()*twiddle.imag(),
1201
+ evenRotMinusI.imag()*twiddle.real() - evenRotMinusI.real()*twiddle.imag()
1202
+ };
495
1203
 
496
- complex odd = v + conj(v2);
497
- complex evenRotMinusI = v - conj(v2);
498
- complex evenI = _fft_impl::complexMul<true>(evenRotMinusI, twiddlesMinusI[i]);
1204
+ tmpFreqR[i] = odd.real() + evenI.real();
1205
+ tmpFreqI[i] = odd.imag() + evenI.imag();
1206
+ tmpFreqR[conjI] = odd.real() - evenI.real();
1207
+ tmpFreqI[conjI] = evenI.imag() - odd.imag();
1208
+ }
1209
+ } else if (step < complexFft.steps()) {
1210
+ complexFft.ifft(step, tmpFreqR, tmpFreqI, tmpTimeR, tmpTimeI);
1211
+ } else {
1212
+ size_t hSize = complexFft.size();
1213
+ if (halfBinShift) {
1214
+ for (size_t i = 0; i < hSize; ++i) {
1215
+ Sample tr = tmpTimeR[i], ti = tmpTimeI[i];
1216
+ Complex twist = halfBinTwists[i];
1217
+ time[2*i] = tr*twist.real() + ti*twist.imag();
1218
+ time[2*i + 1] = ti*twist.real() - tr*twist.imag();
1219
+ }
1220
+ } else {
1221
+ for (size_t i = 0; i < hSize; ++i) {
1222
+ time[2*i] = tmpTimeR[i];
1223
+ time[2*i + 1] = tmpTimeI[i];
1224
+ }
1225
+ }
1226
+ }
1227
+ } else {
1228
+ bool splitFirst = splitComputation && (step-- == 0);
1229
+ if (splitFirst || step-- == 0) {
1230
+ Complex bin0 = freq[0];
1231
+ if (!halfBinShift) {
1232
+ tmpFreq[0] = {
1233
+ bin0.real() + bin0.imag(),
1234
+ bin0.real() - bin0.imag()
1235
+ };
1236
+ }
1237
+ size_t hSize = complexFft.size();
1238
+ size_t startI = halfBinShift ? 0 : 1;
1239
+ size_t endI = hSize/2 + 1;
1240
+ if (splitComputation) { // Do this first twiddle in two halves
1241
+ if (splitFirst) {
1242
+ endI = (startI + endI)/2;
1243
+ } else {
1244
+ startI = (startI + endI)/2;
1245
+ }
1246
+ }
1247
+ for (size_t i = startI; i < endI; ++i) {
1248
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
1249
+ Complex twiddle = twiddles[i];
499
1250
 
500
- complexBuffer1[i] = odd + evenI;
501
- complexBuffer1[conjI] = conj(odd - evenI);
1251
+ Complex odd = freq[i] + std::conj(freq[conjI]);
1252
+ Complex evenRotMinusI = freq[i] - std::conj(freq[conjI]);
1253
+ Complex evenI = { // Conjugate twiddle
1254
+ evenRotMinusI.real()*twiddle.real() + evenRotMinusI.imag()*twiddle.imag(),
1255
+ evenRotMinusI.imag()*twiddle.real() - evenRotMinusI.real()*twiddle.imag()
1256
+ };
1257
+
1258
+ tmpFreq[i] = odd + evenI;
1259
+ tmpFreq[conjI] = {odd.real() - evenI.real(), evenI.imag() - odd.imag()};
1260
+ }
1261
+ } else if (step < complexFft.steps()) {
1262
+ // Can't just use time as (Complex *), since it might not be aligned properly
1263
+ complexFft.ifft(step, tmpFreq.data(), tmpTime.data());
1264
+ } else {
1265
+ size_t hSize = complexFft.size();
1266
+ if (halfBinShift) {
1267
+ for (size_t i = 0; i < hSize; ++i) {
1268
+ Complex t = tmpTime[i];
1269
+ Complex twist = halfBinTwists[i];
1270
+ time[2*i] = t.real()*twist.real() + t.imag()*twist.imag();
1271
+ time[2*i + 1] = t.imag()*twist.real() - t.real()*twist.imag();
1272
+ }
1273
+ } else {
1274
+ for (size_t i = 0; i < hSize; ++i) {
1275
+ time[2*i] = tmpTime[i].real();
1276
+ time[2*i + 1] = tmpTime[i].imag();
1277
+ }
1278
+ }
502
1279
  }
1280
+ }
1281
+ }
1282
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR) {
1283
+ for (size_t s = 0; s < steps(); ++s) {
1284
+ ifft(s, inR, inI, outR);
1285
+ }
1286
+ }
1287
+ void ifft(size_t step, const Sample *inR, const Sample *inI, Sample *outR) {
1288
+ size_t hSize = complexFft.size();
1289
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
1290
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
503
1291
 
504
- complexFft.ifft(complexBuffer1.data(), complexBuffer2.data());
1292
+ bool splitFirst = splitComputation && (step-- == 0);
1293
+ if (splitFirst || step-- == 0) {
1294
+ Sample bin0r = inR[0], bin0i = inI[0];
1295
+ if (!halfBinShift) {
1296
+ tmpFreqR[0] = bin0r + bin0i;
1297
+ tmpFreqI[0] = bin0r - bin0i;
1298
+ }
1299
+ size_t startI = halfBinShift ? 0 : 1;
1300
+ size_t endI = hSize/2 + 1;
1301
+ if (splitComputation) { // Do this first twiddle in two halves
1302
+ if (splitFirst) {
1303
+ endI = (startI + endI)/2;
1304
+ } else {
1305
+ startI = (startI + endI)/2;
1306
+ }
1307
+ }
1308
+ for (size_t i = startI; i < endI; ++i) {
1309
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
1310
+ Complex twiddle = twiddles[i];
1311
+ Sample fir = inR[i], fii = inI[i];
1312
+ Sample fcir = inR[conjI], fcii = inI[conjI];
505
1313
 
506
- for (size_t i = 0; i < hSize; ++i) {
507
- complex v = complexBuffer2[i];
508
- if (modified) v = _fft_impl::complexMul<true>(v, modifiedRotations[i]);
509
- output[2*i] = v.real();
510
- output[2*i + 1] = v.imag();
1314
+ Complex odd = {fir + fcir, fii - fcii};
1315
+ Complex evenRotMinusI = {fir - fcir, fii + fcii};
1316
+ Complex evenI = { // Conjugate twiddle
1317
+ evenRotMinusI.real()*twiddle.real() + evenRotMinusI.imag()*twiddle.imag(),
1318
+ evenRotMinusI.imag()*twiddle.real() - evenRotMinusI.real()*twiddle.imag()
1319
+ };
1320
+
1321
+ tmpFreqR[i] = odd.real() + evenI.real();
1322
+ tmpFreqI[i] = odd.imag() + evenI.imag();
1323
+ tmpFreqR[conjI] = odd.real() - evenI.real();
1324
+ tmpFreqI[conjI] = evenI.imag() - odd.imag();
1325
+ }
1326
+ } else if (step < complexFft.steps()) {
1327
+ // Can't just use time as (Complex *), since it might not be aligned properly
1328
+ complexFft.ifft(step, tmpFreqR, tmpFreqI, tmpTimeR, tmpTimeI);
1329
+ } else {
1330
+ if (halfBinShift) {
1331
+ for (size_t i = 0; i < hSize; ++i) {
1332
+ Sample tr = tmpTimeR[i], ti = tmpTimeI[i];
1333
+ Complex twist = halfBinTwists[i];
1334
+ outR[2*i] = tr*twist.real() + ti*twist.imag();
1335
+ outR[2*i + 1] = ti*twist.real() - tr*twist.imag();
1336
+ }
1337
+ } else {
1338
+ for (size_t i = 0; i < hSize; ++i) {
1339
+ outR[2*i] = tmpTimeR[i];
1340
+ outR[2*i + 1] = tmpTimeI[i];
1341
+ }
511
1342
  }
512
1343
  }
513
- };
1344
+ }
1345
+ private:
1346
+ static constexpr bool complexPrefersSplit = SplitFFT<Sample, splitComputation>::prefersSplit;
1347
+ std::vector<Complex> tmpFreq, tmpTime;
1348
+ std::vector<Complex> twiddles, halfBinTwists;
514
1349
 
515
- template<typename V>
516
- struct ModifiedRealFFT : public RealFFT<V, FFTOptions::halfFreqShift> {
517
- using RealFFT<V, FFTOptions::halfFreqShift>::RealFFT;
518
- };
1350
+ using ComplexFFT = SplitFFT<Sample, splitComputation>;
1351
+ ComplexFFT complexFft;
1352
+ };
1353
+
1354
+ template<typename Sample, bool splitComputation=false>
1355
+ using ModifiedRealFFT = RealFFT<Sample, splitComputation, true>;
519
1356
 
520
- /// @}
521
1357
  }} // namespace
1358
+
1359
+ // Platform-specific
1360
+ #if defined(HAVE_ACCELERATE)
1361
+ # include <audioapi/libs/signalsmith-stretch/fft-accelerate.h>
1362
+ #elif defined(SIGNALSMITH_USE_IPP)
1363
+ # include "./platform/fft-ipp.h"
1364
+ #endif
1365
+
522
1366
  #endif // include guard