npm - react-native-audio-api - Versions diffs - 0.10.0-nightly-d3a7f65-20251028 → 0.10.0-nightly-e16d7ff-20251030 - Mend

react-native-audio-api 0.10.0-nightly-d3a7f65-20251028 → 0.10.0-nightly-e16d7ff-20251030

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/RNAudioAPI.podspec +7 -6
package/android/src/main/cpp/audioapi/CMakeLists.txt +6 -0
package/android/src/main/java/com/swmansion/audioapi/system/MediaNotificationManager.kt +11 -0
package/common/cpp/audioapi/HostObjects/BaseAudioContextHostObject.cpp +18 -0
package/common/cpp/audioapi/HostObjects/BaseAudioContextHostObject.h +1 -0
package/common/cpp/audioapi/HostObjects/effects/ConvolverNodeHostObject.cpp +47 -0
package/common/cpp/audioapi/HostObjects/effects/ConvolverNodeHostObject.h +20 -0
package/common/cpp/audioapi/core/AudioNode.h +3 -2
package/common/cpp/audioapi/core/BaseAudioContext.cpp +10 -0
package/common/cpp/audioapi/core/BaseAudioContext.h +2 -0
package/common/cpp/audioapi/core/effects/ConvolverNode.cpp +210 -0
package/common/cpp/audioapi/core/effects/ConvolverNode.h +55 -0
package/common/cpp/audioapi/core/sources/StreamerNode.cpp +59 -58
package/common/cpp/audioapi/core/sources/StreamerNode.h +37 -8
package/common/cpp/audioapi/core/utils/AudioNodeManager.cpp +5 -0
package/common/cpp/audioapi/dsp/AudioUtils.cpp +1 -1
package/common/cpp/audioapi/dsp/Convolver.cpp +213 -0
package/common/cpp/audioapi/dsp/Convolver.h +45 -0
package/common/cpp/audioapi/dsp/FFT.cpp +0 -26
package/common/cpp/audioapi/dsp/FFT.h +26 -2
package/common/cpp/audioapi/utils/AlignedAllocator.hpp +50 -0
package/common/cpp/audioapi/utils/AudioBus.cpp +28 -0
package/common/cpp/audioapi/utils/AudioBus.h +3 -0
package/common/cpp/test/CMakeLists.txt +16 -14
package/lib/commonjs/api.js +7 -0
package/lib/commonjs/api.js.map +1 -1
package/lib/commonjs/api.web.js +8 -0
package/lib/commonjs/api.web.js.map +1 -1
package/lib/commonjs/core/BaseAudioContext.js +12 -0
package/lib/commonjs/core/BaseAudioContext.js.map +1 -1
package/lib/commonjs/core/ConvolverNode.js +37 -0
package/lib/commonjs/core/ConvolverNode.js.map +1 -0
package/lib/commonjs/types.js +4 -0
package/lib/commonjs/web-core/AudioContext.js +12 -0
package/lib/commonjs/web-core/AudioContext.js.map +1 -1
package/lib/commonjs/web-core/ConvolverNode.js +40 -0
package/lib/commonjs/web-core/ConvolverNode.js.map +1 -0
package/lib/commonjs/web-core/ConvolverNodeOptions.js +6 -0
package/lib/commonjs/web-core/ConvolverNodeOptions.js.map +1 -0
package/lib/commonjs/web-core/OfflineAudioContext.js +12 -0
package/lib/commonjs/web-core/OfflineAudioContext.js.map +1 -1
package/lib/module/api.js +1 -0
package/lib/module/api.js.map +1 -1
package/lib/module/api.web.js +1 -0
package/lib/module/api.web.js.map +1 -1
package/lib/module/core/BaseAudioContext.js +12 -0
package/lib/module/core/BaseAudioContext.js.map +1 -1
package/lib/module/core/ConvolverNode.js +31 -0
package/lib/module/core/ConvolverNode.js.map +1 -0
package/lib/module/types.js +2 -0
package/lib/module/web-core/AudioContext.js +12 -0
package/lib/module/web-core/AudioContext.js.map +1 -1
package/lib/module/web-core/ConvolverNode.js +34 -0
package/lib/module/web-core/ConvolverNode.js.map +1 -0
package/lib/module/web-core/ConvolverNodeOptions.js +4 -0
package/lib/module/web-core/ConvolverNodeOptions.js.map +1 -0
package/lib/module/web-core/OfflineAudioContext.js +12 -0
package/lib/module/web-core/OfflineAudioContext.js.map +1 -1
package/lib/typescript/api.d.ts +1 -0
package/lib/typescript/api.d.ts.map +1 -1
package/lib/typescript/api.web.d.ts +1 -0
package/lib/typescript/api.web.d.ts.map +1 -1
package/lib/typescript/core/BaseAudioContext.d.ts +3 -1
package/lib/typescript/core/BaseAudioContext.d.ts.map +1 -1
package/lib/typescript/core/ConvolverNode.d.ts +12 -0
package/lib/typescript/core/ConvolverNode.d.ts.map +1 -0
package/lib/typescript/interfaces.d.ts +5 -0
package/lib/typescript/interfaces.d.ts.map +1 -1
package/lib/typescript/types.d.ts +5 -0
package/lib/typescript/types.d.ts.map +1 -1
package/lib/typescript/web-core/AudioContext.d.ts +3 -0
package/lib/typescript/web-core/AudioContext.d.ts.map +1 -1
package/lib/typescript/web-core/BaseAudioContext.d.ts +2 -0
package/lib/typescript/web-core/BaseAudioContext.d.ts.map +1 -1
package/lib/typescript/web-core/ConvolverNode.d.ts +11 -0
package/lib/typescript/web-core/ConvolverNode.d.ts.map +1 -0
package/lib/typescript/web-core/ConvolverNodeOptions.d.ts +6 -0
package/lib/typescript/web-core/ConvolverNodeOptions.d.ts.map +1 -0
package/lib/typescript/web-core/OfflineAudioContext.d.ts +3 -0
package/lib/typescript/web-core/OfflineAudioContext.d.ts.map +1 -1
package/package.json +1 -1
package/src/api.ts +1 -0
package/src/api.web.ts +1 -0
package/src/core/BaseAudioContext.ts +23 -0
package/src/core/ConvolverNode.ts +35 -0
package/src/interfaces.ts +11 -0
package/src/types.ts +7 -0
package/src/web-core/AudioContext.tsx +25 -0
package/src/web-core/BaseAudioContext.tsx +2 -0
package/src/web-core/ConvolverNode.tsx +43 -0
package/src/web-core/ConvolverNodeOptions.tsx +6 -0
package/src/web-core/OfflineAudioContext.tsx +25 -0

package/common/cpp/audioapi/core/sources/StreamerNode.cpp CHANGED Viewed

@@ -25,14 +25,12 @@ StreamerNode::StreamerNode(BaseAudioContext *context)
       codecpar_(nullptr),
       pkt_(nullptr),
       frame_(nullptr),
-      pendingFrame_(nullptr),
       bufferedBus_(nullptr),
-      bufferedBusIndex_(0),
-      maxBufferSize_(0),
       audio_stream_index_(-1),
       swrCtx_(nullptr),
       resampledData_(nullptr),
-      maxResampledSamples_(0) {}
+      maxResampledSamples_(0),
+      processedSamples_(0) {}
 StreamerNode::~StreamerNode() {
   cleanup();
@@ -66,24 +64,30 @@ bool StreamerNode::initialize(const std::string &input_url) {
     return false;
   }
-  maxBufferSize_ = BUFFER_LENGTH_SECONDS * codecCtx_->sample_rate;
-  // If decoding is faster than playing, we buffer few seconds of audio
-  bufferedBus_ = std::make_shared<AudioBus>(
-      maxBufferSize_, codecpar_->ch_layout.nb_channels, codecCtx_->sample_rate);
   channelCount_ = codecpar_->ch_layout.nb_channels;
   audioBus_ = std::make_shared<AudioBus>(
       RENDER_QUANTUM_SIZE, channelCount_, context_->getSampleRate());
+  auto [sender, receiver] = channels::spsc::channel<
+      StreamingData,
+      channels::spsc::OverflowStrategy::WAIT_ON_FULL,
+      channels::spsc::WaitStrategy::ATOMIC_WAIT>(CHANNEL_CAPACITY);
+  sender_ = std::move(sender);
+  receiver_ = std::move(receiver);
   streamingThread_ = std::thread(&StreamerNode::streamAudio, this);
-  streamFlag.store(true);
+  streamFlag.store(true, std::memory_order_release);
   isInitialized_ = true;
   return true;
 }
 void StreamerNode::stop(double when) {
   AudioScheduledSourceNode::stop(when);
-  streamFlag.store(false);
+  streamFlag.store(false, std::memory_order_release);
+  StreamingData dummy;
+  while (receiver_.try_receive(dummy) ==
+         channels::spsc::ResponseStatus::SUCCESS)
+    ; // clear the receiver
 }
 bool StreamerNode::setupResampler() {
@@ -122,29 +126,22 @@ bool StreamerNode::setupResampler() {
 }
 void StreamerNode::streamAudio() {
-  while (streamFlag.load()) {
-    if (pendingFrame_ != nullptr) {
-      if (!processFrameWithResampler(pendingFrame_)) {
+  while (streamFlag.load(std::memory_order_acquire)) {
+    if (av_read_frame(fmtCtx_, pkt_) < 0) {
+      return;
+    }
+    if (pkt_->stream_index == audio_stream_index_) {
+      if (avcodec_send_packet(codecCtx_, pkt_) != 0) {
         return;
       }
-    } else {
-      if (av_read_frame(fmtCtx_, pkt_) < 0) {
+      if (avcodec_receive_frame(codecCtx_, frame_) != 0) {
         return;
       }
-      if (pkt_->stream_index == audio_stream_index_) {
-        if (avcodec_send_packet(codecCtx_, pkt_) != 0) {
-          return;
-        }
-        if (avcodec_receive_frame(codecCtx_, frame_) != 0) {
-          return;
-        }
-        if (!processFrameWithResampler(frame_)) {
-          return;
-        }
+      if (!processFrameWithResampler(frame_)) {
+        return;
       }
-      av_packet_unref(pkt_);
     }
-    std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    av_packet_unref(pkt_);
   }
 }
@@ -160,28 +157,33 @@ std::shared_ptr<AudioBus> StreamerNode::processNode(
     return processingBus;
   }
-  // If we have enough buffered data, copy to output bus
-  if (bufferedBusIndex_ >= framesToProcess) {
-    Locker locker(mutex_);
+  int bufferRemaining = bufferedBusSize_ - processedSamples_;
+  int alreadyProcessed = 0;
+  if (bufferRemaining < framesToProcess) {
+    if (bufferedBus_ != nullptr) {
+      for (int ch = 0; ch < processingBus->getNumberOfChannels(); ch++) {
+        memcpy(
+            processingBus->getChannel(ch)->getData(),
+            bufferedBus_->getChannel(ch)->getData() + processedSamples_,
+            bufferRemaining * sizeof(float));
+      }
+      framesToProcess -= bufferRemaining;
+      alreadyProcessed += bufferRemaining;
+    }
+    StreamingData data;
+    receiver_.try_receive(data);
+    bufferedBus_ = std::make_shared<AudioBus>(std::move(data.bus));
+    bufferedBusSize_ = data.size;
+    processedSamples_ = 0;
+  }
+  if (bufferedBus_ != nullptr) {
     for (int ch = 0; ch < processingBus->getNumberOfChannels(); ch++) {
       memcpy(
-          processingBus->getChannel(ch)->getData(),
-          bufferedBus_->getChannel(ch)->getData(),
-          offsetLength * sizeof(float));
-      memmove(
-          bufferedBus_->getChannel(ch)->getData(),
-          bufferedBus_->getChannel(ch)->getData() + offsetLength,
-          (maxBufferSize_ - offsetLength) * sizeof(float));
+          processingBus->getChannel(ch)->getData() + alreadyProcessed,
+          bufferedBus_->getChannel(ch)->getData() + processedSamples_,
+          framesToProcess * sizeof(float));
     }
-    bufferedBusIndex_ -= offsetLength;
-  } else {
-    if (VERBOSE)
-      printf(
-          "Buffer underrun: have %zu, need %zu\n",
-          bufferedBusIndex_,
-          (size_t)framesToProcess);
-    processingBus->zero();
+    processedSamples_ += framesToProcess;
   }
   return processingBus;
@@ -220,22 +222,21 @@ bool StreamerNode::processFrameWithResampler(AVFrame *frame) {
     return false;
   }
-  // Check if converted data fits in buffer
-  if (bufferedBusIndex_ + converted_samples > maxBufferSize_) {
-    pendingFrame_ = frame;
+  // if we would like to finish dont copy anything
+  if (!streamFlag.load(std::memory_order_acquire)) {
     return true;
-  } else {
-    pendingFrame_ = nullptr;
   }
-  // Copy converted data to our buffer
-  Locker locker(mutex_);
+  auto bus = AudioBus(
+      static_cast<size_t>(converted_samples),
+      codecCtx_->ch_layout.nb_channels,
+      context_->getSampleRate());
   for (int ch = 0; ch < codecCtx_->ch_layout.nb_channels; ch++) {
     auto *src = reinterpret_cast<float *>(resampledData_[ch]);
-    float *dst = bufferedBus_->getChannel(ch)->getData() + bufferedBusIndex_;
+    float *dst = bus.getChannel(ch)->getData();
     memcpy(dst, src, converted_samples * sizeof(float));
   }
-  bufferedBusIndex_ += converted_samples;
+  StreamingData data{std::move(bus), static_cast<size_t>(converted_samples)};
+  sender_.send(std::move(data));
   return true;
 }
@@ -280,7 +281,7 @@ bool StreamerNode::setupDecoder() {
 }
 void StreamerNode::cleanup() {
-  streamFlag.store(false);
+  streamFlag.store(false, std::memory_order_release);
   // cleanup cannot be called from the streaming thread so there is no need to
   // check if we are in the same thread
   streamingThread_.join();

package/common/cpp/audioapi/core/sources/StreamerNode.h CHANGED Viewed

@@ -11,6 +11,7 @@
 #pragma once
 #include <audioapi/core/sources/AudioScheduledSourceNode.h>
+#include <audioapi/utils/AudioBus.h>
 #ifndef AUDIO_API_TEST_SUITE
 extern "C" {
@@ -27,8 +28,35 @@ extern "C" {
 #include <memory>
 #include <string>
 #include <atomic>
+#include <utility>
+#ifndef AUDIO_API_TEST_SUITE
+#include <audioapi/utils/SpscChannel.hpp>
-static bool constexpr VERBOSE = false;
+static constexpr audioapi::channels::spsc::OverflowStrategy STREAMER_NODE_SPSC_OVERFLOW_STRATEGY =
+    audioapi::channels::spsc::OverflowStrategy::WAIT_ON_FULL;
+static constexpr audioapi::channels::spsc::WaitStrategy STREAMER_NODE_SPSC_WAIT_STRATEGY =
+    audioapi::channels::spsc::WaitStrategy::ATOMIC_WAIT;
+#endif
+static constexpr bool VERBOSE = false;
+static constexpr int CHANNEL_CAPACITY = 32;
+struct StreamingData{
+  audioapi::AudioBus bus;
+  size_t size;
+  StreamingData() = default;
+  StreamingData(audioapi::AudioBus b, size_t s) : bus(b), size(s) {}
+  StreamingData(const StreamingData& data) : bus(data.bus), size(data.size) {}
+  StreamingData(StreamingData&& data) noexcept : bus(std::move(data.bus)), size(data.size) {}
+  StreamingData& operator=(const StreamingData& data) {
+    if (this == &data) {
+      return *this;
+    }
+    bus = data.bus;
+    size = data.size;
+    return *this;
+  }
+};
 namespace audioapi {
@@ -56,19 +84,20 @@ class StreamerNode : public AudioScheduledSourceNode {
   AVCodecParameters* codecpar_;
   AVPacket* pkt_;
   AVFrame* frame_; // Frame that is currently being processed
-  AVFrame* pendingFrame_; // Frame that is saved if bufferedBus is full
-  std::shared_ptr<AudioBus> bufferedBus_; // audio bus for buffering hls frames
-  size_t bufferedBusIndex_; // index in the buffered bus where we write the next frame
-  size_t maxBufferSize_; // maximum size of the buffered bus
-  int audio_stream_index_; // index of the audio stream channel in the input
   SwrContext* swrCtx_;
   uint8_t** resampledData_; // weird ffmpeg way of using raw byte pointers for resampled data
+  std::shared_ptr<AudioBus> bufferedBus_; // audio bus for buffering hls frames
+  size_t bufferedBusSize_; // size of currently buffered bus
+  int audio_stream_index_; // index of the audio stream channel in the input
   int maxResampledSamples_;
-  std::mutex mutex_;
+  size_t processedSamples_;
   std::thread streamingThread_;
   std::atomic<bool> streamFlag; // Flag to control the streaming thread
-  static constexpr float BUFFER_LENGTH_SECONDS = 5.0f; // Length of the buffer in seconds
   static constexpr int INITIAL_MAX_RESAMPLED_SAMPLES = 8192; // Initial size for resampled data
+  channels::spsc::Sender<StreamingData, STREAMER_NODE_SPSC_OVERFLOW_STRATEGY, STREAMER_NODE_SPSC_WAIT_STRATEGY> sender_;
+  channels::spsc::Receiver<StreamingData, STREAMER_NODE_SPSC_OVERFLOW_STRATEGY, STREAMER_NODE_SPSC_WAIT_STRATEGY> receiver_;
   /**
    * @brief Setting up the resampler

package/common/cpp/audioapi/core/utils/AudioNodeManager.cpp CHANGED Viewed

@@ -1,5 +1,6 @@
 #include <audioapi/core/AudioNode.h>
 #include <audioapi/core/AudioParam.h>
+#include <audioapi/core/effects/ConvolverNode.h>
 #include <audioapi/core/sources/AudioScheduledSourceNode.h>
 #include <audioapi/core/utils/AudioNodeManager.h>
 #include <audioapi/core/utils/Locker.h>
@@ -221,6 +222,10 @@ inline bool AudioNodeManager::nodeCanBeDestructed(
   if constexpr (std::is_base_of_v<AudioScheduledSourceNode, U>) {
     return node.use_count() == 1 &&
         (node->isUnscheduled() || node->isFinished());
+  } else if constexpr (std::is_base_of_v<
+                           ConvolverNode,
+                           U>) { // convolver overrides disabling behavior
+    return node.use_count() == 1 && !node->isEnabled();
   }
   return node.use_count() == 1;
 }

package/common/cpp/audioapi/dsp/AudioUtils.cpp CHANGED Viewed

@@ -30,4 +30,4 @@ float linearToDecibels(float value) {
 float decibelsToLinear(float value) {
   return pow(10, value / 20);
 }
-} // namespace audioapi::dsp
+} // namespace audioapi::dsp

package/common/cpp/audioapi/dsp/Convolver.cpp ADDED Viewed

@@ -0,0 +1,213 @@
+// implementation of linear convolution algorithm described in this paper:
+// https://publications.rwth-aachen.de/record/466561/files/466561.pdf page 110
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
+#include <audioapi/core/sources/AudioBuffer.h>
+#include <audioapi/dsp/Convolver.h>
+#include <audioapi/dsp/VectorMath.h>
+#include <audioapi/utils/AudioArray.h>
+#include <chrono>
+#include <iostream>
+namespace audioapi {
+Convolver::Convolver()
+    : _blockSize(0),
+      _segSize(0),
+      _segCount(0),
+      _fftComplexSize(0),
+      _segments(),
+      _segmentsIR(),
+      _fftBuffer(0),
+      _fft(nullptr),
+      _preMultiplied(),
+      _current(0),
+      _inputBuffer(0) {}
+void Convolver::reset() {
+  _blockSize = 0;
+  _segSize = 0;
+  _segCount = 0;
+  _fftComplexSize = 0;
+  _current = 0;
+  _fft = nullptr;
+  _segments.clear();
+  _segmentsIR.clear();
+  _preMultiplied.clear();
+  _fftBuffer.zero();
+  _inputBuffer.zero();
+}
+bool Convolver::init(
+    size_t blockSize,
+    const audioapi::AudioArray &ir,
+    size_t irLen) {
+  reset();
+  // blockSize must be a power of two
+  if ((blockSize & (blockSize - 1))) {
+    return false;
+  }
+  // Ignore zeros at the end of the impulse response because they only waste
+  // computation time
+  _blockSize = blockSize;
+  _trueSegmentCount = (size_t)(std::ceil((float)irLen / (float)_blockSize));
+  while (irLen > 0 && ::fabs(ir[irLen - 1]) < 10e-3) {
+    --irLen;
+  }
+  if (irLen == 0) {
+    return true;
+  }
+  // The length-N is split into P = N/B length-B sub filters
+  _segCount = (size_t)(std::ceil((float)irLen / (float)_blockSize));
+  _segSize = 2 * _blockSize;
+  // size of the FFT is 2B, so the complex size is B+1, due to the
+  // complex-conjugate symmetricity
+  _fftComplexSize = _segSize / 2 + 1;
+  _fft = std::make_shared<dsp::FFT>((int)_segSize);
+  _fftBuffer.resize(_segSize);
+  // segments preparation
+  for (int i = 0; i < _segCount; ++i) {
+    aligned_vec_complex vec(_fftComplexSize, std::complex<float>(0.0f, 0.0f));
+    _segments.push_back(vec);
+  }
+  // ir preparation
+  for (int i = 0; i < _segCount; ++i) {
+    aligned_vec_complex segment(_fftComplexSize);
+    const size_t remainingSamples = irLen - (i * _blockSize);
+    const size_t samplesToCopy = std::min(_blockSize, remainingSamples);
+    if (samplesToCopy > 0) {
+      memcpy(
+          _fftBuffer.getData(),
+          ir.getData() + i * _blockSize,
+          samplesToCopy * sizeof(float));
+    }
+    // Each sub filter is zero-padded to length 2B and transformed using a
+    // 2B-point real-to-complex FFT.
+    memset(_fftBuffer.getData() + _blockSize, 0, _blockSize * sizeof(float));
+    _fft->doFFT(_fftBuffer.getData(), segment);
+    segment.at(0).imag(0.0f); // ensure DC component is real
+    _segmentsIR.push_back(segment);
+  }
+  _preMultiplied = aligned_vec_complex(_fftComplexSize);
+  _inputBuffer.resize(_segSize);
+  _current = 0;
+  return true;
+}
+/// @brief Fast pairwise complex multiplication using ARM NEON intrinsics
+/// @param ir Impulse response
+/// @param audio Input audio signal
+/// @param pre Output buffer for pre-multiplied results
+/// @note IMPORTANT: ir, audio, and pre must be the same size and should be
+/// aligned to 16 bytes for optimal performance
+void pairwise_complex_multiply_fast(
+    const Convolver::aligned_vec_complex &ir,
+    const Convolver::aligned_vec_complex &audio,
+    Convolver::aligned_vec_complex &pre) {
+  size_t n = ir.size();
+/// @note Using ARM NEON intrinsics for SIMD optimization
+/// This implementation is on average 2x faster than the scalar version on ARM
+/// architectures With 16-byte alignment it can be even faster up to 2.5x
+#ifdef __ARM_NEON
+  size_t j = 0;
+  // Main vector loop: process 4 complex samples (8 floats) per iteration using
+  // vld2q/vst2q deinterleave
+  for (; j <= n - 4; j += 4) {
+    // load de-interleaved real/imag for 4 complex values
+    float32x4x2_t ir_de = vld2q_f32(reinterpret_cast<const float *>(&ir[j]));
+    float32x4x2_t a_de = vld2q_f32(reinterpret_cast<const float *>(&audio[j]));
+    float32x4x2_t pre_de = vld2q_f32(reinterpret_cast<float *>(&pre[j]));
+    float32x4_t ir_re = ir_de.val[0];
+    float32x4_t ir_im = ir_de.val[1];
+    float32x4_t a_re = a_de.val[0];
+    float32x4_t a_im = a_de.val[1];
+    // real = ir_re * a_re - ir_im * a_im
+    float32x4_t real = vmulq_f32(ir_re, a_re);
+    real = vmlsq_f32(real, ir_im, a_im);
+    // imag = ir_re * a_im + ir_im * a_re
+    float32x4_t imag = vmulq_f32(ir_re, a_im);
+    imag = vmlaq_f32(imag, ir_im, a_re);
+    // accumulate into pre
+    float32x4_t new_re = vaddq_f32(pre_de.val[0], real);
+    float32x4_t new_im = vaddq_f32(pre_de.val[1], imag);
+    float32x4x2_t out_de;
+    out_de.val[0] = new_re;
+    out_de.val[1] = new_im;
+    vst2q_f32(reinterpret_cast<float *>(&pre[j]), out_de);
+  }
+  // Tail
+  for (; j < n; ++j) {
+    pre[j] += ir[j] * audio[j];
+  }
+#else
+  // Fallback scalar implementation
+  for (size_t i = 0; i < n; ++i) {
+    pre[i] += ir[i] * audio[i];
+  }
+#endif
+}
+void Convolver::process(float *data, float *outputData) {
+  // The input buffer acts as a 2B-point sliding window of the input signal.
+  // With each new input block, the right half of the input buffer is shifted
+  // to the left and the new block is stored in the right half.
+  memmove(
+      _inputBuffer.getData(),
+      _inputBuffer.getData() + _blockSize,
+      _blockSize * sizeof(float));
+  memcpy(_inputBuffer.getData() + _blockSize, data, _blockSize * sizeof(float));
+  // All contents (DFT spectra) in the FDL are shifted up by one slot.
+  _current = (_current > 0) ? _current - 1 : _segCount - 1;
+  // A 2B-point real-to-complex FFT is computed from the input buffer,
+  // resulting in B+1 complex-conjugate symmetric DFT coefficients. The
+  // result is stored in the first FDL slot.
+  // _current marks first FDL slot, which is the current input block.
+  _fft->doFFT(_inputBuffer.getData(), _segments[_current]);
+  _segments[_current][0].imag(0.0f); // ensure DC component is real
+  // The P sub filter spectra are pairwisely multiplied with the input spectra
+  // in the FDL. The results are accumulated in the frequency-domain.
+  memset(
+      _preMultiplied.data(),
+      0,
+      _preMultiplied.size() * sizeof(std::complex<float>));
+  // this is a bottleneck of the algorithm
+  for (int i = 0; i < _segCount; ++i) {
+    const int indexAudio = (_current + i) % _segCount;
+    const auto &impulseResponseSegment = _segmentsIR[i];
+    const auto &audioSegment = _segments[indexAudio];
+    pairwise_complex_multiply_fast(
+        impulseResponseSegment, audioSegment, _preMultiplied);
+  }
+  // Of the accumulated spectral convolutions, an 2B-point complex-to-real
+  // IFFT is computed. From the resulting 2B samples, the left half is
+  // discarded and the right half is returned as the next output block.
+  _fft->doInverseFFT(_preMultiplied, _fftBuffer.getData());
+  memcpy(
+      outputData,
+      _fftBuffer.getData() + _blockSize,
+      _blockSize * sizeof(float));
+}
+} // namespace audioapi

package/common/cpp/audioapi/dsp/Convolver.h ADDED Viewed

@@ -0,0 +1,45 @@
+#pragma once
+#include <audioapi/utils/AudioArray.h>
+#include <audioapi/dsp/FFT.h>
+#include <vector>
+#include <cstring>
+#include <complex>
+#include <memory>
+#include <audioapi/utils/AlignedAllocator.hpp>
+namespace audioapi {
+class AudioBuffer;
+class Convolver {
+  using aligned_vec_complex =
+      std::vector<std::complex<float>, AlignedAllocator<std::complex<float>, 16>>;
+ public:
+    Convolver();
+    bool init(size_t blockSize, const AudioArray &ir, size_t irLen);
+    void process(float* inputData, float* outputData);
+    void reset();
+    inline size_t getSegCount() const { return _trueSegmentCount; }
+ private:
+    size_t _trueSegmentCount;
+    size_t _blockSize;
+    size_t _segSize;
+    size_t _segCount;
+    size_t _fftComplexSize;
+    std::vector<aligned_vec_complex> _segments;
+    std::vector<aligned_vec_complex> _segmentsIR;
+    AudioArray _fftBuffer;
+    std::shared_ptr<dsp::FFT> _fft;
+    aligned_vec_complex _preMultiplied;
+    size_t _current;
+    AudioArray _inputBuffer;
+  friend void pairwise_complex_multiply_fast(
+      const aligned_vec_complex& ir,
+      const aligned_vec_complex& audio,
+      aligned_vec_complex& pre);
+};
+} // namespace audioapi

package/common/cpp/audioapi/dsp/FFT.cpp CHANGED Viewed

@@ -12,30 +12,4 @@ FFT::~FFT() {
   pffft_aligned_free(work_);
 }
-void FFT::doFFT(float *in, std::vector<std::complex<float>> &out) {
-  pffft_transform_ordered(
-      pffftSetup_,
-      in,
-      reinterpret_cast<float *>(&out[0]),
-      work_,
-      PFFFT_FORWARD);
-  dsp::multiplyByScalar(
-      reinterpret_cast<float *>(&out[0]),
-      0.5f,
-      reinterpret_cast<float *>(&out[0]),
-      size_ * 2);
-}
-void FFT::doInverseFFT(std::vector<std::complex<float>> &in, float *out) {
-  pffft_transform_ordered(
-      pffftSetup_,
-      reinterpret_cast<float *>(&in[0]),
-      out,
-      work_,
-      PFFFT_BACKWARD);
-  dsp::multiplyByScalar(out, 1.0f / static_cast<float>(size_), out, size_);
-}
 } // namespace audioapi::dsp

package/common/cpp/audioapi/dsp/FFT.h CHANGED Viewed

@@ -16,8 +16,32 @@ class FFT {
   explicit FFT(int size);
   ~FFT();
-  void doFFT(float *in, std::vector<std::complex<float>> &out);
-  void doInverseFFT(std::vector<std::complex<float>> &in, float *out);
+  template<typename Allocator>
+  void doFFT(float *in, std::vector<std::complex<float>, Allocator> &out) {
+    pffft_transform_ordered(
+        pffftSetup_,
+        in,
+        reinterpret_cast<float *>(&out[0]),
+        work_,
+        PFFFT_FORWARD);
+    // this is a possible place for bugs and mistakes
+    // due to pffft implementation and how it stores results
+    // keep this information in mind
+    // out[0].real = DC component - should be pure real
+    // out[0].imag = Nyquist component - should be pure real
+  }
+  template<typename Allocator>
+  void doInverseFFT(std::vector<std::complex<float>, Allocator> &in, float *out) {
+    pffft_transform_ordered(
+        pffftSetup_,
+        reinterpret_cast<float *>(&in[0]),
+        out,
+        work_,
+        PFFFT_BACKWARD);
+    dsp::multiplyByScalar(out, 1.0f / static_cast<float>(size_), out, size_);
+  }
  private:
   int size_;

package/common/cpp/audioapi/utils/AlignedAllocator.hpp ADDED Viewed

@@ -0,0 +1,50 @@
+#pragma once
+#include <cstddef>
+#include <new>
+template<typename T, std::size_t Align = 16>
+class AlignedAllocator {
+public:
+  using value_type = T;
+  using size_type = std::size_t;
+  using difference_type = std::ptrdiff_t;
+  AlignedAllocator() noexcept = default;
+  template<class U> AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept {}
+  T* allocate(std::size_t n) {
+    // We want to maximize performance on hot paths, so we hint unlikely branches
+    if (n == 0) [[ unlikely ]] {
+      return nullptr;
+    }
+    std::size_t bytes = n * sizeof(T);
+    // C++17 aligned new
+    void* p = ::operator new(bytes, std::align_val_t(Align));
+    // We have more serious problems if this happens than speed concerns
+    // so we can opt the branch prediction
+    if (!p) [[ unlikely ]] {
+      throw std::bad_alloc();
+    }
+    return static_cast<T*>(p);
+  }
+  void deallocate(T* p, std::size_t) noexcept {
+      ::operator delete(p, std::align_val_t(Align));
+  }
+  // Rebind allocator to type U (required by std::vector)
+  template<class U>
+  struct rebind { using other = AlignedAllocator<U, Align>; };
+  // Comparison operators (required by std::vector)
+  template<typename U, std::size_t UAlign>
+  bool operator==(const AlignedAllocator<U, UAlign>&) const noexcept {
+    return Align == UAlign;
+  }
+  template<typename U, std::size_t UAlign>
+  bool operator!=(const AlignedAllocator<U, UAlign>&) const noexcept {
+    return Align != UAlign;
+  }
+};