react-native-audio-api 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,34 @@
1
1
  #ifndef SIGNALSMITH_STRETCH_H
2
2
  #define SIGNALSMITH_STRETCH_H
3
3
 
4
- #include <audioapi/libs/signalsmith-stretch/spectral.h>
5
- #include <audioapi/libs/signalsmith-stretch/delay.h>
6
- #include <audioapi/libs/signalsmith-stretch/perf.h>
4
+ #include <audioapi/libs/signalsmith-stretch/stft.h>
7
5
  #include <vector>
8
6
  #include <algorithm>
9
7
  #include <functional>
10
8
  #include <random>
9
+ #include <type_traits>
11
10
 
12
11
  namespace signalsmith { namespace stretch {
13
12
 
14
- template<typename Sample=float, class RandomEngine=std::default_random_engine>
13
+ namespace _impl {
14
+ template<bool conjugateSecond=false, typename V>
15
+ static std::complex<V> mul(const std::complex<V> &a, const std::complex<V> &b) {
16
+ return conjugateSecond ? std::complex<V>{
17
+ b.real()*a.real() + b.imag()*a.imag(),
18
+ b.real()*a.imag() - b.imag()*a.real()
19
+ } : std::complex<V>{
20
+ a.real()*b.real() - a.imag()*b.imag(),
21
+ a.real()*b.imag() + a.imag()*b.real()
22
+ };
23
+ }
24
+ template<typename V>
25
+ static V norm(const std::complex<V> &a) {
26
+ V r = a.real(), i = a.imag();
27
+ return r*r + i*i;
28
+ }
29
+ }
30
+
31
+ template<typename Sample=float, class RandomEngine=void>
15
32
  struct SignalsmithStretch {
16
33
  static constexpr size_t version[3] = {1, 1, 1};
17
34
 
@@ -19,44 +36,51 @@ struct SignalsmithStretch {
19
36
  SignalsmithStretch(long seed) : randomEngine(seed) {}
20
37
 
21
38
  int blockSamples() const {
22
- return stft.windowSize();
39
+ return int(stft.blockSamples());
23
40
  }
24
41
  int intervalSamples() const {
25
- return stft.interval();
42
+ return int(stft.defaultInterval());
26
43
  }
27
44
  int inputLatency() const {
28
- return stft.windowSize()/2;
45
+ return int(stft.analysisLatency());
29
46
  }
30
47
  int outputLatency() const {
31
- return stft.windowSize() - inputLatency();
48
+ return int(stft.synthesisLatency() + _splitComputation*stft.defaultInterval());
32
49
  }
33
50
 
34
51
  void reset() {
35
- stft.reset();
36
- inputBuffer.reset();
52
+ stft.reset(0.1);
53
+ stashedInput = stft.input;
54
+ stashedOutput = stft.output;
55
+
37
56
  prevInputOffset = -1;
38
57
  channelBands.assign(channelBands.size(), Band());
39
58
  silenceCounter = 0;
40
59
  didSeek = false;
41
- flushed = true;
60
+
61
+ blockProcess = {};
42
62
  }
43
63
 
44
64
  // Configures using a default preset
45
- void presetDefault(int nChannels, Sample sampleRate) {
46
- configure(nChannels, sampleRate*0.12, sampleRate*0.03);
65
+ void presetDefault(int nChannels, Sample sampleRate, bool splitComputation=false) {
66
+ configure(nChannels, sampleRate*0.12, sampleRate*0.03, splitComputation);
47
67
  }
48
- void presetCheaper(int nChannels, Sample sampleRate) {
49
- configure(nChannels, sampleRate*0.1, sampleRate*0.04);
68
+ void presetCheaper(int nChannels, Sample sampleRate, bool splitComputation=true) {
69
+ configure(nChannels, sampleRate*0.1, sampleRate*0.04, splitComputation);
50
70
  }
51
71
 
52
72
  // Manual setup
53
- void configure(int nChannels, int blockSamples, int intervalSamples) {
73
+ void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false) {
74
+ _splitComputation = splitComputation;
54
75
  channels = nChannels;
55
- stft.setWindow(stft.kaiser, true);
56
- stft.resize(channels, blockSamples, intervalSamples);
57
- bands = stft.bands();
58
- inputBuffer.resize(channels, blockSamples + intervalSamples + 1);
59
- timeBuffer.assign(stft.fftSize(), 0);
76
+ stft.configure(channels, channels, blockSamples, intervalSamples + 1);
77
+ stft.setInterval(intervalSamples, stft.kaiser);
78
+ stft.reset(0.1);
79
+ stashedInput = stft.input;
80
+ stashedOutput = stft.output;
81
+ tmpBuffer.resize(blockSamples + intervalSamples);
82
+
83
+ bands = int(stft.bands());
60
84
  channelBands.assign(bands*channels, Band());
61
85
 
62
86
  peaks.reserve(bands/2);
@@ -64,6 +88,8 @@ struct SignalsmithStretch {
64
88
  smoothedEnergy.resize(bands);
65
89
  outputMap.resize(bands);
66
90
  channelPredictions.resize(channels*bands);
91
+
92
+ blockProcess = {};
67
93
  }
68
94
 
69
95
  /// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
@@ -88,29 +114,54 @@ struct SignalsmithStretch {
88
114
  // Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
89
115
  template<class Inputs>
90
116
  void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
91
- inputBuffer.reset();
117
+ tmpBuffer.resize(0);
118
+ tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
119
+
120
+ int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size())); // start position in input
121
+ int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer
122
+
92
123
  Sample totalEnergy = 0;
93
124
  for (int c = 0; c < channels; ++c) {
94
125
  auto &&inputChannel = inputs[c];
95
- auto &&bufferChannel = inputBuffer[c];
96
- int startIndex = std::max<int>(0, inputSamples - stft.windowSize() - stft.interval());
97
126
  for (int i = startIndex; i < inputSamples; ++i) {
98
127
  Sample s = inputChannel[i];
99
128
  totalEnergy += s*s;
100
- bufferChannel[i] = s;
129
+ tmpBuffer[i - startIndex + padStart] = s;
101
130
  }
131
+
132
+ stft.writeInput(c, tmpBuffer.size(), tmpBuffer.data());
102
133
  }
134
+ stft.moveInput(tmpBuffer.size());
103
135
  if (totalEnergy >= noiseFloor) {
104
136
  silenceCounter = 0;
105
137
  silenceFirst = true;
106
138
  }
107
- inputBuffer += inputSamples;
108
139
  didSeek = true;
109
- seekTimeFactor = (playbackRate*stft.interval() > 1) ? 1/playbackRate : stft.interval();
140
+ seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
110
141
  }
111
142
 
112
143
  template<class Inputs, class Outputs>
113
144
  void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
145
+ #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_START
146
+ SIGNALSMITH_STRETCH_PROFILE_PROCESS_START(inputSamples, outputSamples);
147
+ #endif
148
+ int prevCopiedInput = 0;
149
+ auto copyInput = [&](int toIndex){
150
+
151
+ int length = std::min<int>(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput);
152
+ tmpBuffer.resize(length);
153
+ int offset = toIndex - length;
154
+ for (int c = 0; c < channels; ++c) {
155
+ auto &&inputBuffer = inputs[c];
156
+ for (int i = 0; i < length; ++i) {
157
+ tmpBuffer[i] = inputBuffer[i + offset];
158
+ }
159
+ stft.writeInput(c, length, tmpBuffer.data());
160
+ }
161
+ stft.moveInput(length);
162
+ prevCopiedInput = toIndex;
163
+ };
164
+
114
165
  Sample totalEnergy = 0;
115
166
  for (int c = 0; c < channels; ++c) {
116
167
  auto &&inputChannel = inputs[c];
@@ -119,10 +170,13 @@ struct SignalsmithStretch {
119
170
  totalEnergy += s*s;
120
171
  }
121
172
  }
173
+
122
174
  if (totalEnergy < noiseFloor) {
123
- if (silenceCounter >= 2*stft.windowSize()) {
124
- if (silenceFirst) {
175
+ if (silenceCounter >= 2*stft.blockSamples()) {
176
+ if (silenceFirst) { // first block of silence processing
125
177
  silenceFirst = false;
178
+ //stft.reset();
179
+ blockProcess = {};
126
180
  for (auto &b : channelBands) {
127
181
  b.input = b.prevInput = b.output = 0;
128
182
  b.inputEnergy = 0;
@@ -147,15 +201,7 @@ struct SignalsmithStretch {
147
201
  }
148
202
 
149
203
  // Store input in history buffer
150
- for (int c = 0; c < channels; ++c) {
151
- auto &&inputChannel = inputs[c];
152
- auto &&bufferChannel = inputBuffer[c];
153
- int startIndex = std::max<int>(0, inputSamples - stft.windowSize() - stft.interval());
154
- for (int i = startIndex; i < inputSamples; ++i) {
155
- bufferChannel[i] = inputChannel[i];
156
- }
157
- }
158
- inputBuffer += inputSamples;
204
+ copyInput(inputSamples);
159
205
  return;
160
206
  } else {
161
207
  silenceCounter += inputSamples;
@@ -166,149 +212,217 @@ struct SignalsmithStretch {
166
212
  }
167
213
 
168
214
  for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
169
- stft.ensureValid(outputIndex, [&](int outputOffset) {
215
+ bool newBlock = blockProcess.samplesSinceLast >= stft.defaultInterval();
216
+ if (newBlock) {
217
+ blockProcess.step = 0;
218
+ blockProcess.steps = 0; // how many processing steps this block will have
219
+ blockProcess.samplesSinceLast = 0;
220
+
170
221
  // Time to process a spectrum! Where should it come from in the input?
171
- int inputOffset = std::round(outputOffset*Sample(inputSamples)/outputSamples) - stft.windowSize();
222
+ int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
172
223
  int inputInterval = inputOffset - prevInputOffset;
173
224
  prevInputOffset = inputOffset;
174
225
 
175
- bool newSpectrum = didSeek || (inputInterval > 0);
176
- if (newSpectrum) {
177
- for (int c = 0; c < channels; ++c) {
178
- // Copy from the history buffer, if needed
179
- auto &&bufferChannel = inputBuffer[c];
180
- for (int i = 0; i < -inputOffset; ++i) {
181
- timeBuffer[i] = bufferChannel[i + inputOffset];
226
+ copyInput(inputOffset);
227
+ stashedInput = stft.input; // save the input state, since that's what we'll analyse later
228
+ if (_splitComputation) {
229
+ stashedOutput = stft.output; // save the current output, and read from it
230
+ stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis
231
+ }
232
+
233
+ blockProcess.newSpectrum = didSeek || (inputInterval > 0);
234
+ blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1;
235
+ if (blockProcess.newSpectrum) {
236
+ // make sure the previous input is the correct distance in the past (give or take 1 sample)
237
+ blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1;
238
+ if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1;
239
+
240
+ // analyse a new input
241
+ blockProcess.steps += stft.analyseSteps() + 1;
242
+ }
243
+
244
+ blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
245
+ didSeek = false;
246
+
247
+ updateProcessSpectrumSteps();
248
+ blockProcess.steps += processSpectrumSteps;
249
+
250
+ blockProcess.steps += stft.synthesiseSteps() + 1;
251
+ }
252
+
253
+ size_t processToStep = newBlock ? blockProcess.steps : 0;
254
+ if (_splitComputation) {
255
+ Sample processRatio = Sample(blockProcess.samplesSinceLast + 1)/stft.defaultInterval();
256
+ processToStep = std::min<size_t>(blockProcess.steps, (blockProcess.steps + 0.999f)*processRatio);
257
+ }
258
+
259
+ while (blockProcess.step < processToStep) {
260
+ size_t step = blockProcess.step++;
261
+ #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP
262
+ SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP(step, blockProcess.steps);
263
+ #endif
264
+ if (blockProcess.newSpectrum) {
265
+ if (blockProcess.reanalysePrev) {
266
+ // analyse past input
267
+ if (step < stft.analyseSteps()) {
268
+ stashedInput.swap(stft.input);
269
+ stft.analyseStep(step, stft.defaultInterval());
270
+ stashedInput.swap(stft.input);
271
+ continue;
182
272
  }
183
- // Copy the rest from the input
184
- auto &&inputChannel = inputs[c];
185
- for (int i = std::max<int>(0, -inputOffset); i < stft.windowSize(); ++i) {
186
- timeBuffer[i] = inputChannel[i + inputOffset];
273
+ step -= stft.analyseSteps();
274
+ if (step < 1) {
275
+ // Copy previous analysis to our band objects
276
+ for (int c = 0; c < channels; ++c) {
277
+ auto channelBands = bandsForChannel(c);
278
+ auto *spectrumBands = stft.spectrum(c);
279
+ for (int b = 0; b < bands; ++b) {
280
+ channelBands[b].prevInput = spectrumBands[b];
281
+ }
282
+ }
283
+ continue;
187
284
  }
188
- stft.analyse(c, timeBuffer);
285
+ step -= 1;
189
286
  }
190
- flushed = false; // TODO: first block after a flush should be gain-compensated
191
287
 
192
- for (int c = 0; c < channels; ++c) {
193
- auto channelBands_ = bandsForChannel(c);
194
- auto &&spectrumBands = stft.spectrum[c];
195
- for (int b = 0; b < bands; ++b) {
196
- channelBands_[b].input = spectrumBands[b];
197
- }
288
+ // Analyse latest (stashed) input
289
+ if (step < stft.analyseSteps()) {
290
+ stashedInput.swap(stft.input);
291
+ stft.analyseStep(step);
292
+ stashedInput.swap(stft.input);
293
+ continue;
198
294
  }
199
-
200
- if (didSeek || inputInterval != stft.interval()) { // make sure the previous input is the correct distance in the past
201
- int prevIntervalOffset = inputOffset - stft.interval();
295
+ step -= stft.analyseSteps();
296
+ if (step < 1) {
297
+ // Copy analysed spectrum into our band objects
202
298
  for (int c = 0; c < channels; ++c) {
203
- // Copy from the history buffer, if needed
204
- auto &&bufferChannel = inputBuffer[c];
205
- for (int i = 0; i < std::min(-prevIntervalOffset, stft.windowSize()); ++i) {
206
- timeBuffer[i] = bufferChannel[i + prevIntervalOffset];
207
- }
208
- // Copy the rest from the input
209
- auto &&inputChannel = inputs[c];
210
- for (int i = std::max<int>(0, -prevIntervalOffset); i < stft.windowSize(); ++i) {
211
- timeBuffer[i] = inputChannel[i + prevIntervalOffset];
212
- }
213
- stft.analyse(c, timeBuffer);
214
- }
215
- for (int c = 0; c < channels; ++c) {
216
- auto channelBands_ = bandsForChannel(c);
217
- auto &&spectrumBands = stft.spectrum[c];
299
+ auto channelBands = bandsForChannel(c);
300
+ auto *spectrumBands = stft.spectrum(c);
218
301
  for (int b = 0; b < bands; ++b) {
219
- channelBands_[b].prevInput = spectrumBands[b];
302
+ channelBands[b].input = spectrumBands[b];
220
303
  }
221
304
  }
305
+ continue;
222
306
  }
307
+ step -= 1;
223
308
  }
224
309
 
225
- Sample timeFactor = didSeek ? seekTimeFactor : stft.interval()/std::max<Sample>(1, inputInterval);
226
- processSpectrum(newSpectrum, timeFactor);
227
- didSeek = false;
310
+ if (step < processSpectrumSteps) {
311
+ processSpectrum(step);
312
+ continue;
313
+ }
314
+ step -= processSpectrumSteps;
228
315
 
229
- for (int c = 0; c < channels; ++c) {
230
- auto channelBands_ = bandsForChannel(c);
231
- auto &&spectrumBands = stft.spectrum[c];
232
- for (int b = 0; b < bands; ++b) {
233
- spectrumBands[b] = channelBands_[b].output;
316
+ if (step < 1) {
317
+ // Copy band objects into spectrum
318
+ for (int c = 0; c < channels; ++c) {
319
+ auto channelBands = bandsForChannel(c);
320
+ auto *spectrumBands = stft.spectrum(c);
321
+ for (int b = 0; b < bands; ++b) {
322
+ spectrumBands[b] = channelBands[b].output;
323
+ }
234
324
  }
325
+ continue;
235
326
  }
236
- });
327
+ step -= 1;
237
328
 
329
+ if (step < stft.synthesiseSteps()) {
330
+ stft.synthesiseStep(step);
331
+ continue;
332
+ }
333
+ }
334
+ #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP
335
+ SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP();
336
+ #endif
337
+
338
+ ++blockProcess.samplesSinceLast;
339
+ if (_splitComputation) stashedOutput.swap(stft.output);
238
340
  for (int c = 0; c < channels; ++c) {
239
341
  auto &&outputChannel = outputs[c];
240
- auto &&stftChannel = stft[c];
241
- outputChannel[outputIndex] = stftChannel[outputIndex];
342
+ Sample v = 0;
343
+ stft.readOutput(c, 1, &v);
344
+ outputChannel[outputIndex] = v;
242
345
  }
346
+ stft.moveOutput(1);
347
+ if (_splitComputation) stashedOutput.swap(stft.output);
243
348
  }
244
349
 
245
- // Store input in history buffer
246
- for (int c = 0; c < channels; ++c) {
247
- auto &&inputChannel = inputs[c];
248
- auto &&bufferChannel = inputBuffer[c];
249
- int startIndex = std::max<int>(0, inputSamples - stft.windowSize());
250
- for (int i = startIndex; i < inputSamples; ++i) {
251
- bufferChannel[i] = inputChannel[i];
252
- }
253
- }
254
- inputBuffer += inputSamples;
255
- stft += outputSamples;
350
+ copyInput(inputSamples);
256
351
  prevInputOffset -= inputSamples;
352
+ #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_END
353
+ SIGNALSMITH_STRETCH_PROFILE_PROCESS_END();
354
+ #endif
257
355
  }
258
356
 
259
357
  // Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()`
260
358
  template<class Outputs>
261
359
  void flush(Outputs &&outputs, int outputSamples) {
262
- int plainOutput = std::min<int>(outputSamples, stft.windowSize());
263
- int foldedBackOutput = std::min<int>(outputSamples, stft.windowSize() - plainOutput);
360
+ int plainOutput = std::min<int>(outputSamples, int(stft.blockSamples()));
361
+ int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
362
+ stft.finishOutput(1);
264
363
  for (int c = 0; c < channels; ++c) {
364
+ tmpBuffer.resize(plainOutput);
365
+ stft.readOutput(c, plainOutput, tmpBuffer.data());
265
366
  auto &&outputChannel = outputs[c];
266
- auto &&stftChannel = stft[c];
267
367
  for (int i = 0; i < plainOutput; ++i) {
268
368
  // TODO: plain output should be gain-
269
- outputChannel[i] = stftChannel[i];
369
+ outputChannel[i] = tmpBuffer[i];
270
370
  }
371
+ tmpBuffer.resize(foldedBackOutput);
372
+ stft.readOutput(c, plainOutput, foldedBackOutput, tmpBuffer.data());
271
373
  for (int i = 0; i < foldedBackOutput; ++i) {
272
- outputChannel[outputSamples - 1 - i] -= stftChannel[plainOutput + i];
273
- }
274
- for (int i = 0; i < plainOutput + foldedBackOutput; ++i) {
275
- stftChannel[i] = 0;
374
+ outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
276
375
  }
277
376
  }
278
- // Skip the output we just used/cleared
279
- stft += plainOutput + foldedBackOutput;
377
+ stft.reset(0.1);
378
+
280
379
  // Reset the phase-vocoder stuff, so the next block gets a fresh start
281
380
  for (int c = 0; c < channels; ++c) {
282
- auto channelBands_ = bandsForChannel(c);
381
+ auto channelBands = bandsForChannel(c);
283
382
  for (int b = 0; b < bands; ++b) {
284
- channelBands_[b].prevInput = channelBands_[b].output = 0;
383
+ channelBands[b].prevInput = channelBands[b].output = 0;
285
384
  }
286
385
  }
287
- flushed = true;
288
386
  }
289
387
  private:
388
+ bool _splitComputation = false;
389
+ struct {
390
+ size_t samplesSinceLast = -1;
391
+ size_t steps = 0;
392
+ size_t step = 0;
393
+
394
+ bool newSpectrum = false;
395
+ bool reanalysePrev = false;
396
+ bool mappedFrequencies = false;
397
+ Sample timeFactor;
398
+ } blockProcess;
399
+
290
400
  using Complex = std::complex<Sample>;
291
401
  static constexpr Sample noiseFloor{1e-15};
292
402
  static constexpr Sample maxCleanStretch{2}; // time-stretch ratio before we start randomising phases
293
- int silenceCounter = 0;
403
+ size_t silenceCounter = 0;
294
404
  bool silenceFirst = true;
295
405
 
296
406
  Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
297
407
  std::function<Sample(Sample)> customFreqMap = nullptr;
298
408
 
299
- signalsmith::spectral::STFT<Sample> stft{0, 1, 1};
300
- signalsmith::delay::MultiBuffer<Sample> inputBuffer;
409
+ using STFT = signalsmith::linear::DynamicSTFT<Sample, false, true>;
410
+ STFT stft;
411
+ typename STFT::Input stashedInput;
412
+ typename STFT::Output stashedOutput;
413
+
414
+ std::vector<Sample> tmpBuffer;
415
+
301
416
  int channels = 0, bands = 0;
302
417
  int prevInputOffset = -1;
303
- std::vector<Sample> timeBuffer;
304
- bool didSeek = false, flushed = true;
418
+ bool didSeek = false;
305
419
  Sample seekTimeFactor = 1;
306
420
 
307
421
  Sample bandToFreq(Sample b) const {
308
- return (b + Sample(0.5))/stft.fftSize();
422
+ return stft.binToFreq(b);
309
423
  }
310
424
  Sample freqToBand(Sample f) const {
311
- return f*stft.fftSize() - Sample(0.5);
425
+ return stft.freqToBin(f);
312
426
  }
313
427
 
314
428
  struct Band {
@@ -370,10 +484,10 @@ private:
370
484
  Complex input;
371
485
 
372
486
  Complex makeOutput(Complex phase) {
373
- Sample phaseNorm = std::norm(phase);
487
+ Sample phaseNorm = _impl::norm(phase);
374
488
  if (phaseNorm <= noiseFloor) {
375
489
  phase = input; // prediction is too weak, fall back to the input
376
- phaseNorm = std::norm(input) + noiseFloor;
490
+ phaseNorm = _impl::norm(input) + noiseFloor;
377
491
  }
378
492
  return phase*std::sqrt(energy/phaseNorm);
379
493
  }
@@ -383,49 +497,85 @@ private:
383
497
  return channelPredictions.data() + c*bands;
384
498
  }
385
499
 
386
- RandomEngine randomEngine;
500
+ // If RandomEngine=void, use std::default_random_engine;
501
+ using RandomEngineImpl = typename std::conditional<
502
+ std::is_void<RandomEngine>::value,
503
+ std::default_random_engine,
504
+ RandomEngine
505
+ >::type;
506
+ RandomEngineImpl randomEngine;
507
+
508
+ size_t processSpectrumSteps = 0;
509
+ static constexpr size_t splitMainPrediction = 8; // it's just heavy, since we're blending up to 4 different phase predictions
510
+ void updateProcessSpectrumSteps() {
511
+ processSpectrumSteps = 0;
512
+ if (blockProcess.newSpectrum) processSpectrumSteps += channels;
513
+ if (blockProcess.mappedFrequencies) {
514
+ processSpectrumSteps += smoothEnergySteps;
515
+ processSpectrumSteps += 1; // findPeaks
516
+ }
517
+ processSpectrumSteps += 1; // updating the output map
518
+ processSpectrumSteps += channels; // preliminary phase-vocoder prediction
519
+ processSpectrumSteps += splitMainPrediction;
520
+ if (blockProcess.newSpectrum) processSpectrumSteps += 1; // .input -> .prevInput
521
+ }
522
+ void processSpectrum(size_t step) {
523
+ Sample timeFactor = blockProcess.timeFactor;
387
524
 
388
- void processSpectrum(bool newSpectrum, Sample timeFactor) {
525
+ Sample smoothingBins = Sample(stft.fftSamples())/stft.defaultInterval();
526
+ int longVerticalStep = std::round(smoothingBins);
389
527
  timeFactor = std::max<Sample>(timeFactor, 1/maxCleanStretch);
390
528
  bool randomTimeFactor = (timeFactor > maxCleanStretch);
391
529
  std::uniform_real_distribution<Sample> timeFactorDist(maxCleanStretch*2*randomTimeFactor - timeFactor, timeFactor);
392
530
 
393
- if (newSpectrum) {
394
- for (int c = 0; c < channels; ++c) {
395
- auto bins = bandsForChannel(c);
531
+ if (blockProcess.newSpectrum) {
532
+ if (step < size_t(channels)) {
533
+ int channel = int(step);
534
+ auto bins = bandsForChannel(channel);
396
535
 
397
- Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.interval()*Sample(2*M_PI));
536
+ Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.defaultInterval()*Sample(2*M_PI));
398
537
  Sample freqStep = bandToFreq(1) - bandToFreq(0);
399
- Complex rotStep = std::polar(Sample(1), freqStep*stft.interval()*Sample(2*M_PI));
538
+ Complex rotStep = std::polar(Sample(1), freqStep*stft.defaultInterval()*Sample(2*M_PI));
400
539
 
401
540
  for (int b = 0; b < bands; ++b) {
402
541
  auto &bin = bins[b];
403
- bin.output = signalsmith::perf::mul(bin.output, rot);
404
- bin.prevInput = signalsmith::perf::mul(bin.prevInput, rot);
405
- rot = signalsmith::perf::mul(rot, rotStep);
542
+ bin.output = _impl::mul(bin.output, rot);
543
+ bin.prevInput = _impl::mul(bin.prevInput, rot);
544
+ rot = _impl::mul(rot, rotStep);
406
545
  }
546
+ return;
407
547
  }
548
+ step -= channels;
408
549
  }
409
-
410
- Sample smoothingBins = Sample(stft.fftSize())/stft.interval();
411
- int longVerticalStep = std::round(smoothingBins);
412
- if (customFreqMap || freqMultiplier != 1) {
413
- findPeaks(smoothingBins);
414
- updateOutputMap();
415
- } else { // we're not pitch-shifting, so no need to find peaks etc.
416
- for (int c = 0; c < channels; ++c) {
417
- Band *bins = bandsForChannel(c);
550
+ if (blockProcess.mappedFrequencies) {
551
+ if (step < smoothEnergySteps) {
552
+ smoothEnergy(step, smoothingBins);
553
+ return;
554
+ }
555
+ step -= smoothEnergySteps;
556
+ if (step-- == 0) {
557
+ findPeaks();
558
+ return;
559
+ }
560
+ }
561
+ if (step-- == 0) {
562
+ if (blockProcess.mappedFrequencies) {
563
+ updateOutputMap();
564
+ } else { // we're not pitch-shifting, so no need to find peaks etc.
565
+ for (int c = 0; c < channels; ++c) {
566
+ Band *bins = bandsForChannel(c);
567
+ for (int b = 0; b < bands; ++b) {
568
+ bins[b].inputEnergy = _impl::norm(bins[b].input);
569
+ }
570
+ }
418
571
  for (int b = 0; b < bands; ++b) {
419
- bins[b].inputEnergy = std::norm(bins[b].input);
572
+ outputMap[b] = {Sample(b), 1};
420
573
  }
421
574
  }
422
- for (int b = 0; b < bands; ++b) {
423
- outputMap[b] = {Sample(b), 1};
424
- }
575
+ return;
425
576
  }
426
-
427
- // Preliminary output prediction from phase-vocoder
428
- for (int c = 0; c < channels; ++c) {
577
+ if (step < size_t(channels)) {
578
+ int c = int(step);
429
579
  Band *bins = bandsForChannel(c);
430
580
  auto *predictions = predictionsForChannel(c);
431
581
  for (int b = 0; b < bands; ++b) {
@@ -441,122 +591,140 @@ private:
441
591
 
442
592
  auto &outputBin = bins[b];
443
593
  Complex prevInput = getFractional<&Band::prevInput>(c, lowIndex, fracIndex);
444
- Complex freqTwist = signalsmith::perf::mul<true>(prediction.input, prevInput);
445
- Complex phase = signalsmith::perf::mul(outputBin.output, freqTwist);
594
+ Complex freqTwist = _impl::mul<true>(prediction.input, prevInput);
595
+ Complex phase = _impl::mul(outputBin.output, freqTwist);
446
596
  outputBin.output = phase/(std::max(prevEnergy, prediction.energy) + noiseFloor);
447
597
  }
598
+ return;
448
599
  }
449
-
450
- // Re-predict using phase differences between frequencies
451
- for (int b = 0; b < bands; ++b) {
452
- // Find maximum-energy channel and calculate that
453
- int maxChannel = 0;
454
- Sample maxEnergy = predictionsForChannel(0)[b].energy;
455
- for (int c = 1; c < channels; ++c) {
456
- Sample e = predictionsForChannel(c)[b].energy;
457
- if (e > maxEnergy) {
458
- maxChannel = c;
459
- maxEnergy = e;
600
+ step -= channels;
601
+
602
+ if (step < splitMainPrediction) {
603
+ // Re-predict using phase differences between frequencies
604
+ size_t chunk = step;
605
+ int startB = int(bands*chunk/splitMainPrediction);
606
+ int endB = int(bands*(chunk + 1)/splitMainPrediction);
607
+ for (int b = startB; b < endB; ++b) {
608
+ // Find maximum-energy channel and calculate that
609
+ int maxChannel = 0;
610
+ Sample maxEnergy = predictionsForChannel(0)[b].energy;
611
+ for (int c = 1; c < channels; ++c) {
612
+ Sample e = predictionsForChannel(c)[b].energy;
613
+ if (e > maxEnergy) {
614
+ maxChannel = c;
615
+ maxEnergy = e;
616
+ }
460
617
  }
461
- }
462
618
 
463
- auto *predictions = predictionsForChannel(maxChannel);
464
- auto &prediction = predictions[b];
465
- auto *bins = bandsForChannel(maxChannel);
466
- auto &outputBin = bins[b];
619
+ auto *predictions = predictionsForChannel(maxChannel);
620
+ auto &prediction = predictions[b];
621
+ auto *bins = bandsForChannel(maxChannel);
622
+ auto &outputBin = bins[b];
467
623
 
468
- Complex phase = 0;
469
- auto mapPoint = outputMap[b];
624
+ Complex phase = 0;
625
+ auto mapPoint = outputMap[b];
470
626
 
471
- // Upwards vertical steps
472
- if (b > 0) {
473
- Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
474
- Complex downInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - binTimeFactor);
475
- Complex shortVerticalTwist = signalsmith::perf::mul<true>(prediction.input, downInput);
627
+ // Upwards vertical steps
628
+ if (b > 0) {
629
+ Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
630
+ Complex downInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - binTimeFactor);
631
+ Complex shortVerticalTwist = _impl::mul<true>(prediction.input, downInput);
476
632
 
477
- auto &downBin = bins[b - 1];
478
- phase += signalsmith::perf::mul(downBin.output, shortVerticalTwist);
633
+ auto &downBin = bins[b - 1];
634
+ phase += _impl::mul(downBin.output, shortVerticalTwist);
479
635
 
480
- if (b >= longVerticalStep) {
481
- Complex longDownInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - longVerticalStep*binTimeFactor);
482
- Complex longVerticalTwist = signalsmith::perf::mul<true>(prediction.input, longDownInput);
636
+ if (b >= longVerticalStep) {
637
+ Complex longDownInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - longVerticalStep*binTimeFactor);
638
+ Complex longVerticalTwist = _impl::mul<true>(prediction.input, longDownInput);
483
639
 
484
- auto &longDownBin = bins[b - longVerticalStep];
485
- phase += signalsmith::perf::mul(longDownBin.output, longVerticalTwist);
640
+ auto &longDownBin = bins[b - longVerticalStep];
641
+ phase += _impl::mul(longDownBin.output, longVerticalTwist);
642
+ }
486
643
  }
487
- }
488
- // Downwards vertical steps
489
- if (b < bands - 1) {
490
- auto &upPrediction = predictions[b + 1];
491
- auto &upMapPoint = outputMap[b + 1];
644
+ // Downwards vertical steps
645
+ if (b < bands - 1) {
646
+ auto &upPrediction = predictions[b + 1];
647
+ auto &upMapPoint = outputMap[b + 1];
492
648
 
493
- Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
494
- Complex downInput = getFractional<&Band::input>(maxChannel, upMapPoint.inputBin - binTimeFactor);
495
- Complex shortVerticalTwist = signalsmith::perf::mul<true>(upPrediction.input, downInput);
649
+ Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
650
+ Complex downInput = getFractional<&Band::input>(maxChannel, upMapPoint.inputBin - binTimeFactor);
651
+ Complex shortVerticalTwist = _impl::mul<true>(upPrediction.input, downInput);
496
652
 
497
- auto &upBin = bins[b + 1];
498
- phase += signalsmith::perf::mul<true>(upBin.output, shortVerticalTwist);
653
+ auto &upBin = bins[b + 1];
654
+ phase += _impl::mul<true>(upBin.output, shortVerticalTwist);
499
655
 
500
- if (b < bands - longVerticalStep) {
501
- auto &longUpPrediction = predictions[b + longVerticalStep];
502
- auto &longUpMapPoint = outputMap[b + longVerticalStep];
656
+ if (b < bands - longVerticalStep) {
657
+ auto &longUpPrediction = predictions[b + longVerticalStep];
658
+ auto &longUpMapPoint = outputMap[b + longVerticalStep];
503
659
 
504
- Complex longDownInput = getFractional<&Band::input>(maxChannel, longUpMapPoint.inputBin - longVerticalStep*binTimeFactor);
505
- Complex longVerticalTwist = signalsmith::perf::mul<true>(longUpPrediction.input, longDownInput);
660
+ Complex longDownInput = getFractional<&Band::input>(maxChannel, longUpMapPoint.inputBin - longVerticalStep*binTimeFactor);
661
+ Complex longVerticalTwist = _impl::mul<true>(longUpPrediction.input, longDownInput);
506
662
 
507
- auto &longUpBin = bins[b + longVerticalStep];
508
- phase += signalsmith::perf::mul<true>(longUpBin.output, longVerticalTwist);
663
+ auto &longUpBin = bins[b + longVerticalStep];
664
+ phase += _impl::mul<true>(longUpBin.output, longVerticalTwist);
665
+ }
509
666
  }
510
- }
511
667
 
512
- outputBin.output = prediction.makeOutput(phase);
668
+ outputBin.output = prediction.makeOutput(phase);
513
669
 
514
- // All other bins are locked in phase
515
- for (int c = 0; c < channels; ++c) {
516
- if (c != maxChannel) {
517
- auto &channelBin = bandsForChannel(c)[b];
518
- auto &channelPrediction = predictionsForChannel(c)[b];
670
+ // All other bins are locked in phase
671
+ for (int c = 0; c < channels; ++c) {
672
+ if (c != maxChannel) {
673
+ auto &channelBin = bandsForChannel(c)[b];
674
+ auto &channelPrediction = predictionsForChannel(c)[b];
519
675
 
520
- Complex channelTwist = signalsmith::perf::mul<true>(channelPrediction.input, prediction.input);
521
- Complex channelPhase = signalsmith::perf::mul(outputBin.output, channelTwist);
522
- channelBin.output = channelPrediction.makeOutput(channelPhase);
676
+ Complex channelTwist = _impl::mul<true>(channelPrediction.input, prediction.input);
677
+ Complex channelPhase = _impl::mul(outputBin.output, channelTwist);
678
+ channelBin.output = channelPrediction.makeOutput(channelPhase);
679
+ }
523
680
  }
524
681
  }
682
+ return;
525
683
  }
684
+ step -= splitMainPrediction;
526
685
 
527
- if (newSpectrum) {
528
- for (auto &bin : channelBands) {
529
- bin.prevInput = bin.input;
686
+ if (blockProcess.newSpectrum) {
687
+ if (step-- == 0) {
688
+ for (auto &bin : channelBands) {
689
+ bin.prevInput = bin.input;
690
+ }
530
691
  }
531
692
  }
532
693
  }
533
694
 
534
695
  // Produces smoothed energy across all channels
535
- void smoothEnergy(Sample smoothingBins) {
696
+ static constexpr size_t smoothEnergySteps = 3;
697
+ Sample smoothEnergyState = 0;
698
+ void smoothEnergy(size_t step, Sample smoothingBins) {
536
699
  Sample smoothingSlew = 1/(1 + smoothingBins*Sample(0.5));
537
- for (auto &e : energy) e = 0;
538
- for (int c = 0; c < channels; ++c) {
539
- Band *bins = bandsForChannel(c);
700
+ if (step-- == 0) {
701
+ for (auto &e : energy) e = 0;
702
+ for (int c = 0; c < channels; ++c) {
703
+ Band *bins = bandsForChannel(c);
704
+ for (int b = 0; b < bands; ++b) {
705
+ Sample e = _impl::norm(bins[b].input);
706
+ bins[b].inputEnergy = e; // Used for interpolating prediction energy
707
+ energy[b] += e;
708
+ }
709
+ }
540
710
  for (int b = 0; b < bands; ++b) {
541
- Sample e = std::norm(bins[b].input);
542
- bins[b].inputEnergy = e; // Used for interpolating prediction energy
543
- energy[b] += e;
711
+ smoothedEnergy[b] = energy[b];
544
712
  }
713
+ smoothEnergyState = 0;
714
+ return;
545
715
  }
546
- for (int b = 0; b < bands; ++b) {
547
- smoothedEnergy[b] = energy[b];
716
+
717
+ // The two other steps are repeated smoothing passes, down and up
718
+ Sample e = smoothEnergyState;
719
+ for (int b = bands - 1; b >= 0; --b) {
720
+ e += (smoothedEnergy[b] - e)*smoothingSlew;
721
+ smoothedEnergy[b] = e;
548
722
  }
549
- Sample e = 0;
550
- for (int repeat = 0; repeat < 2; ++repeat) {
551
- for (int b = bands - 1; b >= 0; --b) {
552
- e += (smoothedEnergy[b] - e)*smoothingSlew;
553
- smoothedEnergy[b] = e;
554
- }
555
- for (int b = 0; b < bands; ++b) {
556
- e += (smoothedEnergy[b] - e)*smoothingSlew;
557
- smoothedEnergy[b] = e;
558
- }
723
+ for (int b = 0; b < bands; ++b) {
724
+ e += (smoothedEnergy[b] - e)*smoothingSlew;
725
+ smoothedEnergy[b] = e;
559
726
  }
727
+ smoothEnergyState = e;
560
728
  }
561
729
 
562
730
  Sample mapFreq(Sample freq) const {
@@ -569,9 +737,7 @@ private:
569
737
  }
570
738
 
571
739
  // Identifies spectral peaks using energy across all channels
572
- void findPeaks(Sample smoothingBins) {
573
- smoothEnergy(smoothingBins);
574
-
740
+ void findPeaks() {
575
741
  peaks.resize(0);
576
742
 
577
743
  int start = 0;