react-native-audio-api 0.7.2-nightly-c06331b-20250823 → 0.7.2-nightly-799cc6b-20250825

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,14 @@
1
1
  #ifndef SIGNALSMITH_STRETCH_H
2
2
  #define SIGNALSMITH_STRETCH_H
3
3
 
4
- #include <audioapi/libs/signalsmith-stretch/stft.h>
4
+ #include <audioapi/libs/signalsmith-stretch/stft.h> // https://github.com/Signalsmith-Audio/linear
5
+
5
6
  #include <vector>
7
+ #include <array>
6
8
  #include <algorithm>
7
9
  #include <functional>
8
10
  #include <random>
11
+ #include <limits>
9
12
  #include <type_traits>
10
13
 
11
14
  namespace signalsmith { namespace stretch {
@@ -30,17 +33,12 @@ namespace _impl {
30
33
 
31
34
  template<typename Sample=float, class RandomEngine=void>
32
35
  struct SignalsmithStretch {
33
- static constexpr size_t version[3] = {1, 1, 1};
36
+ static constexpr size_t version[3] = {1, 3, 2};
34
37
 
35
38
  SignalsmithStretch() : randomEngine(std::random_device{}()) {}
36
39
  SignalsmithStretch(long seed) : randomEngine(seed) {}
37
40
 
38
- int blockSamples() const {
39
- return int(stft.blockSamples());
40
- }
41
- int intervalSamples() const {
42
- return int(stft.defaultInterval());
43
- }
41
+ // The difference between the internal position (centre of a block) and the input samples you're supplying
44
42
  int inputLatency() const {
45
43
  return int(stft.analysisLatency());
46
44
  }
@@ -57,8 +55,8 @@ struct SignalsmithStretch {
57
55
  channelBands.assign(channelBands.size(), Band());
58
56
  silenceCounter = 0;
59
57
  didSeek = false;
60
-
61
58
  blockProcess = {};
59
+ freqEstimateWeighted = freqEstimateWeight = 0;
62
60
  }
63
61
 
64
62
  // Configures using a default preset
@@ -78,7 +76,6 @@ struct SignalsmithStretch {
78
76
  stft.reset(0.1);
79
77
  stashedInput = stft.input;
80
78
  stashedOutput = stft.output;
81
- tmpBuffer.resize(blockSamples + intervalSamples);
82
79
 
83
80
  bands = int(stft.bands());
84
81
  channelBands.assign(bands*channels, Band());
@@ -90,6 +87,20 @@ struct SignalsmithStretch {
90
87
  channelPredictions.resize(channels*bands);
91
88
 
92
89
  blockProcess = {};
90
+ formantMetric.resize(bands + 2);
91
+
92
+ tmpProcessBuffer.resize(blockSamples + intervalSamples);
93
+ tmpPreRollBuffer.resize(outputLatency()*channels);
94
+ }
95
+ // For querying the existing config
96
+ int blockSamples() const {
97
+ return int(stft.blockSamples());
98
+ }
99
+ int intervalSamples() const {
100
+ return int(stft.defaultInterval());
101
+ }
102
+ bool splitComputation() const {
103
+ return _splitComputation;
93
104
  }
94
105
 
95
106
  /// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
@@ -104,21 +115,34 @@ struct SignalsmithStretch {
104
115
  }
105
116
  void setTransposeSemitones(Sample semitones, Sample tonalityLimit=0) {
106
117
  setTransposeFactor(std::pow(2, semitones/12), tonalityLimit);
107
- customFreqMap = nullptr;
108
118
  }
109
119
  // Sets a custom frequency map - should be monotonically increasing
110
120
  void setFreqMap(std::function<Sample(Sample)> inputToOutput) {
111
121
  customFreqMap = inputToOutput;
112
122
  }
113
123
 
114
- // Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
124
+ void setFormantFactor(Sample multiplier, bool compensatePitch=false) {
125
+ formantMultiplier = multiplier;
126
+ invFormantMultiplier = 1/multiplier;
127
+ formantCompensation = compensatePitch;
128
+ }
129
+ void setFormantSemitones(Sample semitones, bool compensatePitch=false) {
130
+ setFormantFactor(std::pow(2, semitones/12), compensatePitch);
131
+ }
132
+ // Rough guesstimate of the fundamental frequency, used for formant analysis. 0 means attempting to detect the pitch
133
+ void setFormantBase(Sample baseFreq=0) {
134
+ formantBaseFreq = baseFreq;
135
+ }
136
+
137
+ // Provide previous input ("pre-roll") to smoothly change the input location without interrupting the output. This doesn't do any calculation, just copies intput to a buffer.
138
+ // You should ideally feed it `seekLength()` frames of input, unless it's directly after a `.reset()` (in which case `.outputSeek()` might be a better choice)
115
139
  template<class Inputs>
116
140
  void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
117
- tmpBuffer.resize(0);
118
- tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
141
+ tmpProcessBuffer.resize(0);
142
+ tmpProcessBuffer.resize(stft.blockSamples() + stft.defaultInterval());
119
143
 
120
- int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size())); // start position in input
121
- int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer
144
+ int startIndex = std::max<int>(0, inputSamples - int(tmpProcessBuffer.size())); // start position in input
145
+ int padStart = int(tmpProcessBuffer.size() + startIndex) - inputSamples; // start position in tmpProcessBuffer
122
146
 
123
147
  Sample totalEnergy = 0;
124
148
  for (int c = 0; c < channels; ++c) {
@@ -126,12 +150,12 @@ struct SignalsmithStretch {
126
150
  for (int i = startIndex; i < inputSamples; ++i) {
127
151
  Sample s = inputChannel[i];
128
152
  totalEnergy += s*s;
129
- tmpBuffer[i - startIndex + padStart] = s;
153
+ tmpProcessBuffer[i - startIndex + padStart] = s;
130
154
  }
131
155
 
132
- stft.writeInput(c, tmpBuffer.size(), tmpBuffer.data());
156
+ stft.writeInput(c, tmpProcessBuffer.size(), tmpProcessBuffer.data());
133
157
  }
134
- stft.moveInput(tmpBuffer.size());
158
+ stft.moveInput(tmpProcessBuffer.size());
135
159
  if (totalEnergy >= noiseFloor) {
136
160
  silenceCounter = 0;
137
161
  silenceFirst = true;
@@ -139,6 +163,48 @@ struct SignalsmithStretch {
139
163
  didSeek = true;
140
164
  seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
141
165
  }
166
+ int seekLength() const {
167
+ return int(stft.blockSamples() + stft.defaultInterval());
168
+ }
169
+
170
+ // Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample.
171
+ // The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that.
172
+ template<class Inputs>
173
+ void outputSeek(Inputs &&inputs, int inputLength) {
174
+ // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset
175
+ reset();
176
+ // Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll
177
+ int surplusInput = std::max<int>(inputLength - inputLatency(), 0);
178
+ Sample playbackRate = surplusInput/Sample(outputLatency());
179
+
180
+ // Move the input position to the start of the sound
181
+ int seekSamples = inputLength - surplusInput;
182
+ seek(inputs, seekSamples, playbackRate);
183
+
184
+ tmpPreRollBuffer.resize(outputLatency()*channels);
185
+ struct BufferOutput {
186
+ Sample *samples;
187
+ int length;
188
+
189
+ Sample * operator[](int c) {
190
+ return samples + c*length;
191
+ }
192
+ } preRollOutput{tmpPreRollBuffer.data(), outputLatency()};
193
+
194
+ // Use the surplus input to produce pre-roll output
195
+ OffsetIO<Inputs> offsetInput{inputs, seekSamples};
196
+ process(offsetInput, surplusInput, preRollOutput, preRollOutput.length);
197
+
198
+ // put the thing down, flip it and reverse it
199
+ for (auto &v : tmpPreRollBuffer) v = -v;
200
+ for (int c = 0; c < channels; ++c) {
201
+ std::reverse(preRollOutput[c], preRollOutput[c] + preRollOutput.length);
202
+ stft.addOutput(c, preRollOutput.length, preRollOutput[c]);
203
+ }
204
+ }
205
+ int outputSeekLength(Sample playbackRate) const {
206
+ return inputLatency() + playbackRate*outputLatency();
207
+ }
142
208
 
143
209
  template<class Inputs, class Outputs>
144
210
  void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
@@ -149,14 +215,14 @@ struct SignalsmithStretch {
149
215
  auto copyInput = [&](int toIndex){
150
216
 
151
217
  int length = std::min<int>(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput);
152
- tmpBuffer.resize(length);
218
+ tmpProcessBuffer.resize(length);
153
219
  int offset = toIndex - length;
154
220
  for (int c = 0; c < channels; ++c) {
155
221
  auto &&inputBuffer = inputs[c];
156
222
  for (int i = 0; i < length; ++i) {
157
- tmpBuffer[i] = inputBuffer[i + offset];
223
+ tmpProcessBuffer[i] = inputBuffer[i + offset];
158
224
  }
159
- stft.writeInput(c, length, tmpBuffer.data());
225
+ stft.writeInput(c, length, tmpProcessBuffer.data());
160
226
  }
161
227
  stft.moveInput(length);
162
228
  prevCopiedInput = toIndex;
@@ -241,6 +307,8 @@ struct SignalsmithStretch {
241
307
  blockProcess.steps += stft.analyseSteps() + 1;
242
308
  }
243
309
 
310
+ blockProcess.processFormants = formantMultiplier != 1 || (formantCompensation && blockProcess.mappedFrequencies);
311
+
244
312
  blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
245
313
  didSeek = false;
246
314
 
@@ -354,28 +422,38 @@ struct SignalsmithStretch {
354
422
  #endif
355
423
  }
356
424
 
357
- // Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()`
425
+ // Read the remaining output, providing no further input. If `outputSamples` is more than one interval, it will compute additional blocks assuming a zero-valued input
358
426
  template<class Outputs>
359
- void flush(Outputs &&outputs, int outputSamples) {
360
- int plainOutput = std::min<int>(outputSamples, int(stft.blockSamples()));
361
- int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
427
+ void flush(Outputs &&outputs, int outputSamples, Sample playbackRate=0) {
428
+ struct Zeros {
429
+ struct Channel {
430
+ Sample operator[](int) {
431
+ return 0;
432
+ }
433
+ };
434
+ Channel operator[](int) {
435
+ return {};
436
+ }
437
+ } zeros;
438
+ // If we're asked for more than an interval of extra output, then zero-pad the input
439
+ int outputBlock = std::max<int>(0, outputSamples - stft.defaultInterval());
440
+ if (outputBlock > 0) process(zeros, outputBlock*playbackRate, outputs, outputBlock);
441
+
442
+ int tailSamples = outputSamples - outputBlock; // at most one interval
443
+ tmpProcessBuffer.resize(tailSamples);
362
444
  stft.finishOutput(1);
363
445
  for (int c = 0; c < channels; ++c) {
364
- tmpBuffer.resize(plainOutput);
365
- stft.readOutput(c, plainOutput, tmpBuffer.data());
446
+ stft.readOutput(c, tailSamples, tmpProcessBuffer.data());
366
447
  auto &&outputChannel = outputs[c];
367
- for (int i = 0; i < plainOutput; ++i) {
368
- // TODO: plain output should be gain-
369
- outputChannel[i] = tmpBuffer[i];
448
+ for (int i = 0; i < tailSamples; ++i) {
449
+ outputChannel[outputBlock + i] = tmpProcessBuffer[i];
370
450
  }
371
- tmpBuffer.resize(foldedBackOutput);
372
- stft.readOutput(c, plainOutput, foldedBackOutput, tmpBuffer.data());
373
- for (int i = 0; i < foldedBackOutput; ++i) {
374
- outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
451
+ stft.readOutput(c, tailSamples, tailSamples, tmpProcessBuffer.data());
452
+ for (int i = 0; i < tailSamples; ++i) {
453
+ outputChannel[outputBlock + tailSamples - 1 - i] -= tmpProcessBuffer[i];
375
454
  }
376
455
  }
377
- stft.reset(0.1);
378
-
456
+ stft.reset(0.1f);
379
457
  // Reset the phase-vocoder stuff, so the next block gets a fresh start
380
458
  for (int c = 0; c < channels; ++c) {
381
459
  auto channelBands = bandsForChannel(c);
@@ -384,16 +462,45 @@ struct SignalsmithStretch {
384
462
  }
385
463
  }
386
464
  }
465
+
466
+ // Process a complete audio buffer all in one go
467
+ template<class Inputs, class Outputs>
468
+ bool exact(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
469
+ Sample playbackRate = inputSamples/Sample(outputSamples);
470
+ auto seekLength = outputSeekLength(playbackRate);
471
+ if (inputSamples < seekLength) {
472
+ // to short for this - zero the output just to be polite
473
+ for (int c = 0; c < channels; ++c) {
474
+ auto &&channel = outputs[c];
475
+ for (int i = 0; i < outputSamples; ++i) {
476
+ channel[i] = 0;
477
+ }
478
+ }
479
+ return false;
480
+ }
481
+
482
+ outputSeek(inputs, seekLength);
483
+
484
+ int outputIndex = outputSamples - seekLength/playbackRate;
485
+ OffsetIO<Inputs> offsetInput{inputs, seekLength};
486
+ process(offsetInput, inputSamples - seekLength, outputs, outputIndex);
487
+
488
+ OffsetIO<Outputs> offsetOutput{outputs, outputIndex};
489
+ flush(offsetOutput, outputSamples - outputIndex, playbackRate);
490
+ return true;
491
+ }
492
+
387
493
  private:
388
494
  bool _splitComputation = false;
389
495
  struct {
390
- size_t samplesSinceLast = -1;
496
+ size_t samplesSinceLast = std::numeric_limits<size_t>::max();
391
497
  size_t steps = 0;
392
498
  size_t step = 0;
393
499
 
394
500
  bool newSpectrum = false;
395
501
  bool reanalysePrev = false;
396
502
  bool mappedFrequencies = false;
503
+ bool processFormants = false;
397
504
  Sample timeFactor;
398
505
  } blockProcess;
399
506
 
@@ -406,12 +513,15 @@ private:
406
513
  Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
407
514
  std::function<Sample(Sample)> customFreqMap = nullptr;
408
515
 
516
+ bool formantCompensation = false; // compensate for pitch/freq change
517
+ Sample formantMultiplier = 1, invFormantMultiplier = 1;
518
+
409
519
  using STFT = signalsmith::linear::DynamicSTFT<Sample, false, true>;
410
520
  STFT stft;
411
521
  typename STFT::Input stashedInput;
412
522
  typename STFT::Output stashedOutput;
413
523
 
414
- std::vector<Sample> tmpBuffer;
524
+ std::vector<Sample> tmpProcessBuffer, tmpPreRollBuffer;
415
525
 
416
526
  int channels = 0, bands = 0;
417
527
  int prevInputOffset = -1;
@@ -518,6 +628,7 @@ private:
518
628
  processSpectrumSteps += channels; // preliminary phase-vocoder prediction
519
629
  processSpectrumSteps += splitMainPrediction;
520
630
  if (blockProcess.newSpectrum) processSpectrumSteps += 1; // .input -> .prevInput
631
+ if (blockProcess.processFormants) processSpectrumSteps += 3;
521
632
  }
522
633
  void processSpectrum(size_t step) {
523
634
  Sample timeFactor = blockProcess.timeFactor;
@@ -568,12 +679,21 @@ private:
568
679
  bins[b].inputEnergy = _impl::norm(bins[b].input);
569
680
  }
570
681
  }
682
+
571
683
  for (int b = 0; b < bands; ++b) {
572
684
  outputMap[b] = {Sample(b), 1};
573
685
  }
574
686
  }
575
687
  return;
576
688
  }
689
+ if (blockProcess.processFormants) {
690
+ if (step < 3) {
691
+ updateFormants(step);
692
+ return;
693
+ }
694
+ step -= 3;
695
+ }
696
+ // Preliminary output prediction from phase-vocoder
577
697
  if (step < size_t(channels)) {
578
698
  int c = int(step);
579
699
  Band *bins = bandsForChannel(c);
@@ -730,8 +850,7 @@ private:
730
850
  Sample mapFreq(Sample freq) const {
731
851
  if (customFreqMap) return customFreqMap(freq);
732
852
  if (freq > freqTonalityLimit) {
733
- Sample diff = freq - freqTonalityLimit;
734
- return freqTonalityLimit*freqMultiplier + diff;
853
+ return freq + (freqMultiplier - 1)*freqTonalityLimit;
735
854
  }
736
855
  return freq*freqMultiplier;
737
856
  }
@@ -796,6 +915,145 @@ private:
796
915
  outputMap[b] = {b + topOffset, 1};
797
916
  }
798
917
  }
918
+
919
+ // If we mapped formants the same way as mapFreq(), this would be the inverse
920
+ Sample invMapFormant(Sample freq) const {
921
+ if (freq*invFormantMultiplier > freqTonalityLimit) {
922
+ return freq + (1 - formantMultiplier)*freqTonalityLimit;
923
+ }
924
+ return freq*invFormantMultiplier;
925
+ }
926
+
927
+ Sample freqEstimateWeighted = 0;
928
+ Sample freqEstimateWeight = 0;
929
+ Sample estimateFrequency() {
930
+ // 3 highest peaks in the input
931
+ std::array<int, 3> peakIndices{0, 0, 0};
932
+ for (int b = 1; b < bands - 1; ++b) {
933
+ Sample e = formantMetric[b];
934
+ // local maxima only
935
+ if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue;
936
+
937
+ if (e > formantMetric[peakIndices[0]]) {
938
+ if (e > formantMetric[peakIndices[1]]) {
939
+ if (e > formantMetric[peakIndices[2]]) {
940
+ peakIndices = {peakIndices[1], peakIndices[2], b};
941
+ } else {
942
+ peakIndices = {peakIndices[1], b, peakIndices[2]};
943
+ }
944
+ } else {
945
+ peakIndices[0] = b;
946
+ }
947
+ }
948
+ }
949
+
950
+ // VERY rough pitch estimation
951
+ int peakEstimate = peakIndices[2];
952
+ if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) {
953
+ int diff = std::abs(peakEstimate - peakIndices[1]);
954
+ if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff;
955
+ if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) {
956
+ int diff = std::abs(peakEstimate - peakIndices[0]);
957
+ if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff;
958
+ }
959
+ }
960
+ Sample weight = formantMetric[peakIndices[2]];
961
+ // Smooth it out a bit
962
+ freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25;
963
+ freqEstimateWeight += (weight - freqEstimateWeight)*0.25;
964
+
965
+ return freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
966
+ }
967
+
968
+ Sample freqEstimate;
969
+
970
+ std::vector<Sample> formantMetric;
971
+ Sample formantBaseFreq = 0;
972
+ void updateFormants(size_t step) {
973
+ if (step-- == 0) {
974
+ for (auto &e : formantMetric) e = 0;
975
+ for (int c = 0; c < channels; ++c) {
976
+ Band *bins = bandsForChannel(c);
977
+ for (int b = 0; b < bands; ++b) {
978
+ formantMetric[b] += bins[b].inputEnergy;
979
+ }
980
+ }
981
+
982
+ freqEstimate = freqToBand(formantBaseFreq);
983
+ if (formantBaseFreq <= 0) freqEstimate = estimateFrequency();
984
+ } else if (step-- == 0) {
985
+ Sample decay = 1 - 1/(freqEstimate*0.5 + 1);
986
+ Sample e = 0;
987
+ for (size_t repeat = 0; repeat < 2; ++repeat) {
988
+ for (int b = bands - 1; b >= 0; --b) {
989
+ e = std::max(formantMetric[b], e*decay);
990
+ formantMetric[b] = e;
991
+ }
992
+ for (int b = 0; b < bands; ++b) {
993
+ e = std::max(formantMetric[b], e*decay);
994
+ formantMetric[b] = e;
995
+ }
996
+ }
997
+ decay = 1/decay;
998
+ for (size_t repeat = 0; repeat < 2; ++repeat) {
999
+ for (int b = bands - 1; b >= 0; --b) {
1000
+ e = std::min(formantMetric[b], e*decay);
1001
+ formantMetric[b] = e;
1002
+ }
1003
+ for (int b = 0; b < bands; ++b) {
1004
+ e = std::min(formantMetric[b], e*decay);
1005
+ formantMetric[b] = e;
1006
+ }
1007
+ }
1008
+ } else {
1009
+ auto getFormant = [&](Sample band) -> Sample {
1010
+ if (band < 0) return 0;
1011
+ band = std::min<Sample>(band, bands);
1012
+ int floorBand = std::floor(band);
1013
+ Sample fracBand = band - floorBand;
1014
+ Sample low = formantMetric[floorBand], high = formantMetric[floorBand + 1];
1015
+ return low + (high - low)*fracBand;
1016
+ };
1017
+
1018
+ for (int b = 0; b < bands; ++b) {
1019
+ Sample inputF = bandToFreq(b);
1020
+ Sample outputF = formantCompensation ? mapFreq(inputF) : inputF;
1021
+ outputF = invMapFormant(outputF);
1022
+
1023
+ Sample inputE = formantMetric[b];
1024
+ Sample targetE = getFormant(freqToBand(outputF));
1025
+
1026
+ Sample formantRatio = targetE/(inputE + Sample(1e-30));
1027
+ Sample energyRatio = formantRatio;
1028
+
1029
+ for (int c = 0; c < channels; ++c) {
1030
+ Band *bins = bandsForChannel(c);
1031
+ // This is what's used to decide the output energy, so this affects the output
1032
+ bins[b].inputEnergy *= energyRatio;
1033
+ }
1034
+ }
1035
+ }
1036
+ }
1037
+
1038
+ // Proxy class to avoid copying/allocating anything
1039
+ template<class Io>
1040
+ struct OffsetIO {
1041
+ Io &io;
1042
+ int offset;
1043
+
1044
+ struct Channel {
1045
+ Io &io;
1046
+ int channel;
1047
+ int offset;
1048
+
1049
+ auto operator[](int i) -> decltype(io[0][0]) {
1050
+ return io[channel][i + offset];
1051
+ }
1052
+ };
1053
+ Channel operator[](int c) {
1054
+ return {io, c, offset};
1055
+ }
1056
+ };
799
1057
  };
800
1058
 
801
1059
  }} // namespace