npm - react-native-audio-api - Versions diffs - 0.5.6 → 0.5.7 - Mend

react-native-audio-api 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/common/cpp/audioapi/libs/signalsmith-stretch/signalsmith-stretch.h CHANGED Viewed

@@ -1,17 +1,34 @@
 #ifndef SIGNALSMITH_STRETCH_H
 #define SIGNALSMITH_STRETCH_H
-#include <audioapi/libs/signalsmith-stretch/spectral.h>
-#include <audioapi/libs/signalsmith-stretch/delay.h>
-#include <audioapi/libs/signalsmith-stretch/perf.h>
+#include <audioapi/libs/signalsmith-stretch/stft.h>
 #include <vector>
 #include <algorithm>
 #include <functional>
 #include <random>
+#include <type_traits>
 namespace signalsmith { namespace stretch {
-template<typename Sample=float, class RandomEngine=std::default_random_engine>
+namespace _impl {
+	template<bool conjugateSecond=false, typename V>
+	static std::complex<V> mul(const std::complex<V> &a, const std::complex<V> &b) {
+		return conjugateSecond ? std::complex<V>{
+			b.real()*a.real() + b.imag()*a.imag(),
+				b.real()*a.imag() - b.imag()*a.real()
+		} : std::complex<V>{
+			a.real()*b.real() - a.imag()*b.imag(),
+			a.real()*b.imag() + a.imag()*b.real()
+		};
+	}
+	template<typename V>
+	static V norm(const std::complex<V> &a) {
+		V r = a.real(), i = a.imag();
+		return r*r + i*i;
+	}
+}
+template<typename Sample=float, class RandomEngine=void>
 struct SignalsmithStretch {
 	static constexpr size_t version[3] = {1, 1, 1};
@@ -19,44 +36,51 @@ struct SignalsmithStretch {
 	SignalsmithStretch(long seed) : randomEngine(seed) {}
 	int blockSamples() const {
-		return stft.windowSize();
+		return int(stft.blockSamples());
 	}
 	int intervalSamples() const {
-		return stft.interval();
+		return int(stft.defaultInterval());
 	}
 	int inputLatency() const {
-		return stft.windowSize()/2;
+		return int(stft.analysisLatency());
 	}
 	int outputLatency() const {
-		return stft.windowSize() - inputLatency();
+		return int(stft.synthesisLatency() + _splitComputation*stft.defaultInterval());
 	}
 	void reset() {
-		stft.reset();
-		inputBuffer.reset();
+		stft.reset(0.1);
+		stashedInput = stft.input;
+		stashedOutput = stft.output;
 		prevInputOffset = -1;
 		channelBands.assign(channelBands.size(), Band());
 		silenceCounter = 0;
 		didSeek = false;
-		flushed = true;
+		blockProcess = {};
 	}
 	// Configures using a default preset
-	void presetDefault(int nChannels, Sample sampleRate) {
-		configure(nChannels, sampleRate*0.12, sampleRate*0.03);
+	void presetDefault(int nChannels, Sample sampleRate, bool splitComputation=false) {
+		configure(nChannels, sampleRate*0.12, sampleRate*0.03, splitComputation);
 	}
-	void presetCheaper(int nChannels, Sample sampleRate) {
-		configure(nChannels, sampleRate*0.1, sampleRate*0.04);
+	void presetCheaper(int nChannels, Sample sampleRate, bool splitComputation=true) {
+		configure(nChannels, sampleRate*0.1, sampleRate*0.04, splitComputation);
 	}
 	// Manual setup
-	void configure(int nChannels, int blockSamples, int intervalSamples) {
+	void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false) {
+		_splitComputation = splitComputation;
 		channels = nChannels;
-		stft.setWindow(stft.kaiser, true);
-		stft.resize(channels, blockSamples, intervalSamples);
-		bands = stft.bands();
-		inputBuffer.resize(channels, blockSamples + intervalSamples + 1);
-		timeBuffer.assign(stft.fftSize(), 0);
+		stft.configure(channels, channels, blockSamples, intervalSamples + 1);
+		stft.setInterval(intervalSamples, stft.kaiser);
+		stft.reset(0.1);
+		stashedInput = stft.input;
+		stashedOutput = stft.output;
+		tmpBuffer.resize(blockSamples + intervalSamples);
+		bands = int(stft.bands());
 		channelBands.assign(bands*channels, Band());
 		peaks.reserve(bands/2);
@@ -64,6 +88,8 @@ struct SignalsmithStretch {
 		smoothedEnergy.resize(bands);
 		outputMap.resize(bands);
 		channelPredictions.resize(channels*bands);
+		blockProcess = {};
 	}
 	/// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
@@ -88,29 +114,54 @@ struct SignalsmithStretch {
 	// Provide previous input ("pre-roll"), without affecting the speed calculation.  You should ideally feed it one block-length + one interval
 	template<class Inputs>
 	void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
-		inputBuffer.reset();
+		tmpBuffer.resize(0);
+		tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
+		int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size())); // start position in input
+		int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer
 		Sample totalEnergy = 0;
 		for (int c = 0; c < channels; ++c) {
 			auto &&inputChannel = inputs[c];
-			auto &&bufferChannel = inputBuffer[c];
-			int startIndex = std::max<int>(0, inputSamples - stft.windowSize() - stft.interval());
 			for (int i = startIndex; i < inputSamples; ++i) {
 				Sample s = inputChannel[i];
 				totalEnergy += s*s;
-				bufferChannel[i] = s;
+				tmpBuffer[i - startIndex + padStart] = s;
 			}
+			stft.writeInput(c, tmpBuffer.size(), tmpBuffer.data());
 		}
+		stft.moveInput(tmpBuffer.size());
 		if (totalEnergy >= noiseFloor) {
 			silenceCounter = 0;
 			silenceFirst = true;
 		}
-		inputBuffer += inputSamples;
 		didSeek = true;
-		seekTimeFactor = (playbackRate*stft.interval() > 1) ? 1/playbackRate : stft.interval();
+		seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
 	}
 	template<class Inputs, class Outputs>
 	void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
+#ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_START
+		SIGNALSMITH_STRETCH_PROFILE_PROCESS_START(inputSamples, outputSamples);
+#endif
+		int prevCopiedInput = 0;
+		auto copyInput = [&](int toIndex){
+			int length = std::min<int>(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput);
+			tmpBuffer.resize(length);
+			int offset = toIndex - length;
+			for (int c = 0; c < channels; ++c) {
+				auto &&inputBuffer = inputs[c];
+				for (int i = 0; i < length; ++i) {
+					tmpBuffer[i] = inputBuffer[i + offset];
+				}
+				stft.writeInput(c, length, tmpBuffer.data());
+			}
+			stft.moveInput(length);
+			prevCopiedInput = toIndex;
+		};
 		Sample totalEnergy = 0;
 		for (int c = 0; c < channels; ++c) {
 			auto &&inputChannel = inputs[c];
@@ -119,10 +170,13 @@ struct SignalsmithStretch {
 				totalEnergy += s*s;
 			}
 		}
 		if (totalEnergy < noiseFloor) {
-			if (silenceCounter >= 2*stft.windowSize()) {
-				if (silenceFirst) {
+			if (silenceCounter >= 2*stft.blockSamples()) {
+				if (silenceFirst) { // first block of silence processing
 					silenceFirst = false;
+					//stft.reset();
+					blockProcess = {};
 					for (auto &b : channelBands) {
 						b.input = b.prevInput = b.output = 0;
 						b.inputEnergy = 0;
@@ -147,15 +201,7 @@ struct SignalsmithStretch {
 				}
 				// Store input in history buffer
-				for (int c = 0; c < channels; ++c) {
-					auto &&inputChannel = inputs[c];
-					auto &&bufferChannel = inputBuffer[c];
-					int startIndex = std::max<int>(0, inputSamples - stft.windowSize() - stft.interval());
-					for (int i = startIndex; i < inputSamples; ++i) {
-						bufferChannel[i] = inputChannel[i];
-					}
-				}
-				inputBuffer += inputSamples;
+				copyInput(inputSamples);
 				return;
 			} else {
 				silenceCounter += inputSamples;
@@ -166,149 +212,217 @@ struct SignalsmithStretch {
 		}
 		for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
-			stft.ensureValid(outputIndex, [&](int outputOffset) {
+			bool newBlock = blockProcess.samplesSinceLast >= stft.defaultInterval();
+			if (newBlock) {
+				blockProcess.step = 0;
+				blockProcess.steps = 0; // how many processing steps this block will have
+				blockProcess.samplesSinceLast = 0;
 				// Time to process a spectrum!  Where should it come from in the input?
-				int inputOffset = std::round(outputOffset*Sample(inputSamples)/outputSamples) - stft.windowSize();
+				int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
 				int inputInterval = inputOffset - prevInputOffset;
 				prevInputOffset = inputOffset;
-				bool newSpectrum = didSeek || (inputInterval > 0);
-				if (newSpectrum) {
-					for (int c = 0; c < channels; ++c) {
-						// Copy from the history buffer, if needed
-						auto &&bufferChannel = inputBuffer[c];
-						for (int i = 0; i < -inputOffset; ++i) {
-							timeBuffer[i] = bufferChannel[i + inputOffset];
+				copyInput(inputOffset);
+				stashedInput = stft.input; // save the input state, since that's what we'll analyse later
+				if (_splitComputation) {
+					stashedOutput = stft.output; // save the current output, and read from it
+					stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis
+				}
+				blockProcess.newSpectrum = didSeek || (inputInterval > 0);
+				blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1;
+				if (blockProcess.newSpectrum) {
+					// make sure the previous input is the correct distance in the past (give or take 1 sample)
+					blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1;
+					if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1;
+					// analyse a new input
+					blockProcess.steps += stft.analyseSteps() + 1;
+				}
+				blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
+				didSeek = false;
+				updateProcessSpectrumSteps();
+				blockProcess.steps += processSpectrumSteps;
+				blockProcess.steps += stft.synthesiseSteps() + 1;
+			}
+			size_t processToStep = newBlock ? blockProcess.steps : 0;
+			if (_splitComputation) {
+				Sample processRatio = Sample(blockProcess.samplesSinceLast + 1)/stft.defaultInterval();
+				processToStep = std::min<size_t>(blockProcess.steps, (blockProcess.steps + 0.999f)*processRatio);
+			}
+			while (blockProcess.step < processToStep) {
+				size_t step = blockProcess.step++;
+#ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP
+				SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP(step, blockProcess.steps);
+#endif
+				if (blockProcess.newSpectrum) {
+					if (blockProcess.reanalysePrev) {
+						// analyse past input
+						if (step < stft.analyseSteps()) {
+							stashedInput.swap(stft.input);
+							stft.analyseStep(step, stft.defaultInterval());
+							stashedInput.swap(stft.input);
+							continue;
 						}
-						// Copy the rest from the input
-						auto &&inputChannel = inputs[c];
-						for (int i = std::max<int>(0, -inputOffset); i < stft.windowSize(); ++i) {
-							timeBuffer[i] = inputChannel[i + inputOffset];
+						step -= stft.analyseSteps();
+						if (step < 1) {
+							// Copy previous analysis to our band objects
+							for (int c = 0; c < channels; ++c) {
+								auto channelBands = bandsForChannel(c);
+								auto *spectrumBands = stft.spectrum(c);
+								for (int b = 0; b < bands; ++b) {
+									channelBands[b].prevInput = spectrumBands[b];
+								}
+							}
+							continue;
 						}
-						stft.analyse(c, timeBuffer);
+						step -= 1;
 					}
-					flushed = false; // TODO: first block after a flush should be gain-compensated
-					for (int c = 0; c < channels; ++c) {
-						auto channelBands_ = bandsForChannel(c);
-						auto &&spectrumBands = stft.spectrum[c];
-						for (int b = 0; b < bands; ++b) {
-							channelBands_[b].input = spectrumBands[b];
-						}
+					// Analyse latest (stashed) input
+					if (step < stft.analyseSteps()) {
+						stashedInput.swap(stft.input);
+						stft.analyseStep(step);
+						stashedInput.swap(stft.input);
+						continue;
 					}
-					if (didSeek || inputInterval != stft.interval()) { // make sure the previous input is the correct distance in the past
-						int prevIntervalOffset = inputOffset - stft.interval();
+					step -= stft.analyseSteps();
+					if (step < 1) {
+						// Copy analysed spectrum into our band objects
 						for (int c = 0; c < channels; ++c) {
-							// Copy from the history buffer, if needed
-							auto &&bufferChannel = inputBuffer[c];
-							for (int i = 0; i < std::min(-prevIntervalOffset, stft.windowSize()); ++i) {
-								timeBuffer[i] = bufferChannel[i + prevIntervalOffset];
-							}
-							// Copy the rest from the input
-							auto &&inputChannel = inputs[c];
-							for (int i = std::max<int>(0, -prevIntervalOffset); i < stft.windowSize(); ++i) {
-								timeBuffer[i] = inputChannel[i + prevIntervalOffset];
-							}
-							stft.analyse(c, timeBuffer);
-						}
-						for (int c = 0; c < channels; ++c) {
-							auto channelBands_ = bandsForChannel(c);
-							auto &&spectrumBands = stft.spectrum[c];
+							auto channelBands = bandsForChannel(c);
+							auto *spectrumBands = stft.spectrum(c);
 							for (int b = 0; b < bands; ++b) {
-								channelBands_[b].prevInput = spectrumBands[b];
+								channelBands[b].input = spectrumBands[b];
 							}
 						}
+						continue;
 					}
+					step -= 1;
 				}
-				Sample timeFactor = didSeek ? seekTimeFactor : stft.interval()/std::max<Sample>(1, inputInterval);
-				processSpectrum(newSpectrum, timeFactor);
-				didSeek = false;
+				if (step < processSpectrumSteps) {
+					processSpectrum(step);
+					continue;
+				}
+				step -= processSpectrumSteps;
-				for (int c = 0; c < channels; ++c) {
-					auto channelBands_ = bandsForChannel(c);
-					auto &&spectrumBands = stft.spectrum[c];
-					for (int b = 0; b < bands; ++b) {
-						spectrumBands[b] = channelBands_[b].output;
+				if (step < 1) {
+					// Copy band objects into spectrum
+					for (int c = 0; c < channels; ++c) {
+						auto channelBands = bandsForChannel(c);
+						auto *spectrumBands = stft.spectrum(c);
+						for (int b = 0; b < bands; ++b) {
+							spectrumBands[b] = channelBands[b].output;
+						}
 					}
+					continue;
 				}
-			});
+				step -= 1;
+				if (step < stft.synthesiseSteps()) {
+					stft.synthesiseStep(step);
+					continue;
+				}
+			}
+#ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP
+			SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP();
+#endif
+			++blockProcess.samplesSinceLast;
+			if (_splitComputation) stashedOutput.swap(stft.output);
 			for (int c = 0; c < channels; ++c) {
 				auto &&outputChannel = outputs[c];
-				auto &&stftChannel = stft[c];
-				outputChannel[outputIndex] = stftChannel[outputIndex];
+				Sample v = 0;
+				stft.readOutput(c, 1, &v);
+				outputChannel[outputIndex] = v;
 			}
+			stft.moveOutput(1);
+			if (_splitComputation) stashedOutput.swap(stft.output);
 		}
-		// Store input in history buffer
-		for (int c = 0; c < channels; ++c) {
-			auto &&inputChannel = inputs[c];
-			auto &&bufferChannel = inputBuffer[c];
-			int startIndex = std::max<int>(0, inputSamples - stft.windowSize());
-			for (int i = startIndex; i < inputSamples; ++i) {
-				bufferChannel[i] = inputChannel[i];
-			}
-		}
-		inputBuffer += inputSamples;
-		stft += outputSamples;
+		copyInput(inputSamples);
 		prevInputOffset -= inputSamples;
+#ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_END
+		SIGNALSMITH_STRETCH_PROFILE_PROCESS_END();
+#endif
 	}
 	// Read the remaining output, providing no further input.  `outputSamples` should ideally be at least `.outputLatency()`
 	template<class Outputs>
 	void flush(Outputs &&outputs, int outputSamples) {
-		int plainOutput = std::min<int>(outputSamples, stft.windowSize());
-		int foldedBackOutput = std::min<int>(outputSamples, stft.windowSize() - plainOutput);
+		int plainOutput = std::min<int>(outputSamples, int(stft.blockSamples()));
+		int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
+		stft.finishOutput(1);
 		for (int c = 0; c < channels; ++c) {
+			tmpBuffer.resize(plainOutput);
+			stft.readOutput(c, plainOutput, tmpBuffer.data());
 			auto &&outputChannel = outputs[c];
-			auto &&stftChannel = stft[c];
 			for (int i = 0; i < plainOutput; ++i) {
 				// TODO: plain output should be gain-
-				outputChannel[i] = stftChannel[i];
+				outputChannel[i] = tmpBuffer[i];
 			}
+			tmpBuffer.resize(foldedBackOutput);
+			stft.readOutput(c, plainOutput, foldedBackOutput, tmpBuffer.data());
 			for (int i = 0; i < foldedBackOutput; ++i) {
-				outputChannel[outputSamples - 1 - i] -= stftChannel[plainOutput + i];
-			}
-			for (int i = 0; i < plainOutput + foldedBackOutput; ++i) {
-				stftChannel[i] = 0;
+				outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
 			}
 		}
-		// Skip the output we just used/cleared
-		stft += plainOutput + foldedBackOutput;
+		stft.reset(0.1);
 		// Reset the phase-vocoder stuff, so the next block gets a fresh start
 		for (int c = 0; c < channels; ++c) {
-			auto channelBands_ = bandsForChannel(c);
+			auto channelBands = bandsForChannel(c);
 			for (int b = 0; b < bands; ++b) {
-				channelBands_[b].prevInput = channelBands_[b].output = 0;
+				channelBands[b].prevInput = channelBands[b].output = 0;
 			}
 		}
-		flushed = true;
 	}
 private:
+	bool _splitComputation = false;
+	struct {
+		size_t samplesSinceLast = -1;
+		size_t steps = 0;
+		size_t step = 0;
+		bool newSpectrum = false;
+		bool reanalysePrev = false;
+		bool mappedFrequencies = false;
+		Sample timeFactor;
+	} blockProcess;
 	using Complex = std::complex<Sample>;
 	static constexpr Sample noiseFloor{1e-15};
 	static constexpr Sample maxCleanStretch{2}; // time-stretch ratio before we start randomising phases
-	int silenceCounter = 0;
+	size_t silenceCounter = 0;
 	bool silenceFirst = true;
 	Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
 	std::function<Sample(Sample)> customFreqMap = nullptr;
-	signalsmith::spectral::STFT<Sample> stft{0, 1, 1};
-	signalsmith::delay::MultiBuffer<Sample> inputBuffer;
+	using STFT = signalsmith::linear::DynamicSTFT<Sample, false, true>;
+	STFT stft;
+	typename STFT::Input stashedInput;
+	typename STFT::Output stashedOutput;
+	std::vector<Sample> tmpBuffer;
 	int channels = 0, bands = 0;
 	int prevInputOffset = -1;
-	std::vector<Sample> timeBuffer;
-	bool didSeek = false, flushed = true;
+	bool didSeek = false;
 	Sample seekTimeFactor = 1;
 	Sample bandToFreq(Sample b) const {
-		return (b + Sample(0.5))/stft.fftSize();
+		return stft.binToFreq(b);
 	}
 	Sample freqToBand(Sample f) const {
-		return f*stft.fftSize() - Sample(0.5);
+		return stft.freqToBin(f);
 	}
 	struct Band {
@@ -370,10 +484,10 @@ private:
 		Complex input;
 		Complex makeOutput(Complex phase) {
-			Sample phaseNorm = std::norm(phase);
+			Sample phaseNorm = _impl::norm(phase);
 			if (phaseNorm <= noiseFloor) {
 				phase = input; // prediction is too weak, fall back to the input
-				phaseNorm = std::norm(input) + noiseFloor;
+				phaseNorm = _impl::norm(input) + noiseFloor;
 			}
 			return phase*std::sqrt(energy/phaseNorm);
 		}
@@ -383,49 +497,85 @@ private:
 		return channelPredictions.data() + c*bands;
 	}
-	RandomEngine randomEngine;
+	// If RandomEngine=void, use std::default_random_engine;
+	using RandomEngineImpl = typename std::conditional<
+		std::is_void<RandomEngine>::value,
+		std::default_random_engine,
+		RandomEngine
+	>::type;
+	RandomEngineImpl randomEngine;
+	size_t processSpectrumSteps = 0;
+	static constexpr size_t splitMainPrediction = 8; // it's just heavy, since we're blending up to 4 different phase predictions
+	void updateProcessSpectrumSteps() {
+		processSpectrumSteps = 0;
+		if (blockProcess.newSpectrum) processSpectrumSteps += channels;
+		if (blockProcess.mappedFrequencies) {
+			processSpectrumSteps += smoothEnergySteps;
+			processSpectrumSteps += 1; // findPeaks
+		}
+		processSpectrumSteps += 1; // updating the output map
+		processSpectrumSteps += channels; // preliminary phase-vocoder prediction
+		processSpectrumSteps += splitMainPrediction;
+		if (blockProcess.newSpectrum) processSpectrumSteps += 1; // .input -> .prevInput
+	}
+	void processSpectrum(size_t step) {
+		Sample timeFactor = blockProcess.timeFactor;
-	void processSpectrum(bool newSpectrum, Sample timeFactor) {
+		Sample smoothingBins = Sample(stft.fftSamples())/stft.defaultInterval();
+		int longVerticalStep = std::round(smoothingBins);
 		timeFactor = std::max<Sample>(timeFactor, 1/maxCleanStretch);
 		bool randomTimeFactor = (timeFactor > maxCleanStretch);
 		std::uniform_real_distribution<Sample> timeFactorDist(maxCleanStretch*2*randomTimeFactor - timeFactor, timeFactor);
-		if (newSpectrum) {
-			for (int c = 0; c < channels; ++c) {
-				auto bins = bandsForChannel(c);
+		if (blockProcess.newSpectrum) {
+			if (step < size_t(channels)) {
+				int channel = int(step);
+				auto bins = bandsForChannel(channel);
-				Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.interval()*Sample(2*M_PI));
+				Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.defaultInterval()*Sample(2*M_PI));
 				Sample freqStep = bandToFreq(1) - bandToFreq(0);
-				Complex rotStep = std::polar(Sample(1), freqStep*stft.interval()*Sample(2*M_PI));
+				Complex rotStep = std::polar(Sample(1), freqStep*stft.defaultInterval()*Sample(2*M_PI));
 				for (int b = 0; b < bands; ++b) {
 					auto &bin = bins[b];
-					bin.output = signalsmith::perf::mul(bin.output, rot);
-					bin.prevInput = signalsmith::perf::mul(bin.prevInput, rot);
-					rot = signalsmith::perf::mul(rot, rotStep);
+					bin.output = _impl::mul(bin.output, rot);
+					bin.prevInput = _impl::mul(bin.prevInput, rot);
+					rot = _impl::mul(rot, rotStep);
 				}
+				return;
 			}
+			step -= channels;
 		}
-		Sample smoothingBins = Sample(stft.fftSize())/stft.interval();
-		int longVerticalStep = std::round(smoothingBins);
-		if (customFreqMap || freqMultiplier != 1) {
-			findPeaks(smoothingBins);
-			updateOutputMap();
-		} else { // we're not pitch-shifting, so no need to find peaks etc.
-			for (int c = 0; c < channels; ++c) {
-				Band *bins = bandsForChannel(c);
+		if (blockProcess.mappedFrequencies) {
+			if (step < smoothEnergySteps) {
+				smoothEnergy(step, smoothingBins);
+				return;
+			}
+			step -= smoothEnergySteps;
+			if (step-- == 0) {
+				findPeaks();
+				return;
+			}
+		}
+		if (step-- == 0) {
+			if (blockProcess.mappedFrequencies) {
+				updateOutputMap();
+			} else { // we're not pitch-shifting, so no need to find peaks etc.
+				for (int c = 0; c < channels; ++c) {
+					Band *bins = bandsForChannel(c);
+					for (int b = 0; b < bands; ++b) {
+						bins[b].inputEnergy = _impl::norm(bins[b].input);
+					}
+				}
 				for (int b = 0; b < bands; ++b) {
-					bins[b].inputEnergy = std::norm(bins[b].input);
+					outputMap[b] = {Sample(b), 1};
 				}
 			}
-			for (int b = 0; b < bands; ++b) {
-				outputMap[b] = {Sample(b), 1};
-			}
+			return;
 		}
-		// Preliminary output prediction from phase-vocoder
-		for (int c = 0; c < channels; ++c) {
+		if (step < size_t(channels)) {
+			int c = int(step);
 			Band *bins = bandsForChannel(c);
 			auto *predictions = predictionsForChannel(c);
 			for (int b = 0; b < bands; ++b) {
@@ -441,122 +591,140 @@ private:
 				auto &outputBin = bins[b];
 				Complex prevInput = getFractional<&Band::prevInput>(c, lowIndex, fracIndex);
-				Complex freqTwist = signalsmith::perf::mul<true>(prediction.input, prevInput);
-				Complex phase = signalsmith::perf::mul(outputBin.output, freqTwist);
+				Complex freqTwist = _impl::mul<true>(prediction.input, prevInput);
+				Complex phase = _impl::mul(outputBin.output, freqTwist);
 				outputBin.output = phase/(std::max(prevEnergy, prediction.energy) + noiseFloor);
 			}
+			return;
 		}
-		// Re-predict using phase differences between frequencies
-		for (int b = 0; b < bands; ++b) {
-			// Find maximum-energy channel and calculate that
-			int maxChannel = 0;
-			Sample maxEnergy = predictionsForChannel(0)[b].energy;
-			for (int c = 1; c < channels; ++c) {
-				Sample e = predictionsForChannel(c)[b].energy;
-				if (e > maxEnergy) {
-					maxChannel = c;
-					maxEnergy = e;
+		step -= channels;
+		if (step < splitMainPrediction) {
+			// Re-predict using phase differences between frequencies
+			size_t chunk = step;
+			int startB = int(bands*chunk/splitMainPrediction);
+			int endB = int(bands*(chunk + 1)/splitMainPrediction);
+			for (int b = startB; b < endB; ++b) {
+				// Find maximum-energy channel and calculate that
+				int maxChannel = 0;
+				Sample maxEnergy = predictionsForChannel(0)[b].energy;
+				for (int c = 1; c < channels; ++c) {
+					Sample e = predictionsForChannel(c)[b].energy;
+					if (e > maxEnergy) {
+						maxChannel = c;
+						maxEnergy = e;
+					}
 				}
-			}
-			auto *predictions = predictionsForChannel(maxChannel);
-			auto &prediction = predictions[b];
-			auto *bins = bandsForChannel(maxChannel);
-			auto &outputBin = bins[b];
+				auto *predictions = predictionsForChannel(maxChannel);
+				auto &prediction = predictions[b];
+				auto *bins = bandsForChannel(maxChannel);
+				auto &outputBin = bins[b];
-			Complex phase = 0;
-			auto mapPoint = outputMap[b];
+				Complex phase = 0;
+				auto mapPoint = outputMap[b];
-			// Upwards vertical steps
-			if (b > 0) {
-				Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
-				Complex downInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - binTimeFactor);
-				Complex shortVerticalTwist = signalsmith::perf::mul<true>(prediction.input, downInput);
+				// Upwards vertical steps
+				if (b > 0) {
+					Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
+					Complex downInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - binTimeFactor);
+					Complex shortVerticalTwist = _impl::mul<true>(prediction.input, downInput);
-				auto &downBin = bins[b - 1];
-				phase += signalsmith::perf::mul(downBin.output, shortVerticalTwist);
+					auto &downBin = bins[b - 1];
+					phase += _impl::mul(downBin.output, shortVerticalTwist);
-				if (b >= longVerticalStep) {
-					Complex longDownInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - longVerticalStep*binTimeFactor);
-					Complex longVerticalTwist = signalsmith::perf::mul<true>(prediction.input, longDownInput);
+					if (b >= longVerticalStep) {
+						Complex longDownInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - longVerticalStep*binTimeFactor);
+						Complex longVerticalTwist = _impl::mul<true>(prediction.input, longDownInput);
-					auto &longDownBin = bins[b - longVerticalStep];
-					phase += signalsmith::perf::mul(longDownBin.output, longVerticalTwist);
+						auto &longDownBin = bins[b - longVerticalStep];
+						phase += _impl::mul(longDownBin.output, longVerticalTwist);
+					}
 				}
-			}
-			// Downwards vertical steps
-			if (b < bands - 1) {
-				auto &upPrediction = predictions[b + 1];
-				auto &upMapPoint = outputMap[b + 1];
+				// Downwards vertical steps
+				if (b < bands - 1) {
+					auto &upPrediction = predictions[b + 1];
+					auto &upMapPoint = outputMap[b + 1];
-				Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
-				Complex downInput = getFractional<&Band::input>(maxChannel, upMapPoint.inputBin - binTimeFactor);
-				Complex shortVerticalTwist = signalsmith::perf::mul<true>(upPrediction.input, downInput);
+					Sample binTimeFactor = randomTimeFactor ? timeFactorDist(randomEngine) : timeFactor;
+					Complex downInput = getFractional<&Band::input>(maxChannel, upMapPoint.inputBin - binTimeFactor);
+					Complex shortVerticalTwist = _impl::mul<true>(upPrediction.input, downInput);
-				auto &upBin = bins[b + 1];
-				phase += signalsmith::perf::mul<true>(upBin.output, shortVerticalTwist);
+					auto &upBin = bins[b + 1];
+					phase += _impl::mul<true>(upBin.output, shortVerticalTwist);
-				if (b < bands - longVerticalStep) {
-					auto &longUpPrediction = predictions[b + longVerticalStep];
-					auto &longUpMapPoint = outputMap[b + longVerticalStep];
+					if (b < bands - longVerticalStep) {
+						auto &longUpPrediction = predictions[b + longVerticalStep];
+						auto &longUpMapPoint = outputMap[b + longVerticalStep];
-					Complex longDownInput = getFractional<&Band::input>(maxChannel, longUpMapPoint.inputBin - longVerticalStep*binTimeFactor);
-					Complex longVerticalTwist = signalsmith::perf::mul<true>(longUpPrediction.input, longDownInput);
+						Complex longDownInput = getFractional<&Band::input>(maxChannel, longUpMapPoint.inputBin - longVerticalStep*binTimeFactor);
+						Complex longVerticalTwist = _impl::mul<true>(longUpPrediction.input, longDownInput);
-					auto &longUpBin = bins[b + longVerticalStep];
-					phase += signalsmith::perf::mul<true>(longUpBin.output, longVerticalTwist);
+						auto &longUpBin = bins[b + longVerticalStep];
+						phase += _impl::mul<true>(longUpBin.output, longVerticalTwist);
+					}
 				}
-			}
-			outputBin.output = prediction.makeOutput(phase);
+				outputBin.output = prediction.makeOutput(phase);
-			// All other bins are locked in phase
-			for (int c = 0; c < channels; ++c) {
-				if (c != maxChannel) {
-					auto &channelBin = bandsForChannel(c)[b];
-					auto &channelPrediction = predictionsForChannel(c)[b];
+				// All other bins are locked in phase
+				for (int c = 0; c < channels; ++c) {
+					if (c != maxChannel) {
+						auto &channelBin = bandsForChannel(c)[b];
+						auto &channelPrediction = predictionsForChannel(c)[b];
-					Complex channelTwist = signalsmith::perf::mul<true>(channelPrediction.input, prediction.input);
-					Complex channelPhase = signalsmith::perf::mul(outputBin.output, channelTwist);
-					channelBin.output = channelPrediction.makeOutput(channelPhase);
+						Complex channelTwist = _impl::mul<true>(channelPrediction.input, prediction.input);
+						Complex channelPhase = _impl::mul(outputBin.output, channelTwist);
+						channelBin.output = channelPrediction.makeOutput(channelPhase);
+					}
 				}
 			}
+			return;
 		}
+		step -= splitMainPrediction;
-		if (newSpectrum) {
-			for (auto &bin : channelBands) {
-				bin.prevInput = bin.input;
+		if (blockProcess.newSpectrum) {
+			if (step-- == 0) {
+				for (auto &bin : channelBands) {
+					bin.prevInput = bin.input;
+				}
 			}
 		}
 	}
 	// Produces smoothed energy across all channels
-	void smoothEnergy(Sample smoothingBins) {
+	static constexpr size_t smoothEnergySteps = 3;
+	Sample smoothEnergyState = 0;
+	void smoothEnergy(size_t step, Sample smoothingBins) {
 		Sample smoothingSlew = 1/(1 + smoothingBins*Sample(0.5));
-		for (auto &e : energy) e = 0;
-		for (int c = 0; c < channels; ++c) {
-			Band *bins = bandsForChannel(c);
+		if (step-- == 0) {
+			for (auto &e : energy) e = 0;
+			for (int c = 0; c < channels; ++c) {
+				Band *bins = bandsForChannel(c);
+				for (int b = 0; b < bands; ++b) {
+					Sample e = _impl::norm(bins[b].input);
+					bins[b].inputEnergy = e; // Used for interpolating prediction energy
+					energy[b] += e;
+				}
+			}
 			for (int b = 0; b < bands; ++b) {
-				Sample e = std::norm(bins[b].input);
-				bins[b].inputEnergy = e; // Used for interpolating prediction energy
-				energy[b] += e;
+				smoothedEnergy[b] = energy[b];
 			}
+			smoothEnergyState = 0;
+			return;
 		}
-		for (int b = 0; b < bands; ++b) {
-			smoothedEnergy[b] = energy[b];
+		// The two other steps are repeated smoothing passes, down and up
+		Sample e = smoothEnergyState;
+		for (int b = bands - 1; b >= 0; --b) {
+			e += (smoothedEnergy[b] - e)*smoothingSlew;
+			smoothedEnergy[b] = e;
 		}
-		Sample e = 0;
-		for (int repeat = 0; repeat < 2; ++repeat) {
-			for (int b = bands - 1; b >= 0; --b) {
-				e += (smoothedEnergy[b] - e)*smoothingSlew;
-				smoothedEnergy[b] = e;
-			}
-			for (int b = 0; b < bands; ++b) {
-				e += (smoothedEnergy[b] - e)*smoothingSlew;
-				smoothedEnergy[b] = e;
-			}
+		for (int b = 0; b < bands; ++b) {
+			e += (smoothedEnergy[b] - e)*smoothingSlew;
+			smoothedEnergy[b] = e;
 		}
+		smoothEnergyState = e;
 	}
 	Sample mapFreq(Sample freq) const {
@@ -569,9 +737,7 @@ private:
 	}
 	// Identifies spectral peaks using energy across all channels
-	void findPeaks(Sample smoothingBins) {
-		smoothEnergy(smoothingBins);
+	void findPeaks() {
 		peaks.resize(0);
 		int start = 0;