react-native-audio-api 0.10.0-nightly-d3a7f65-20251028 → 0.10.0-nightly-e16d7ff-20251030

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/RNAudioAPI.podspec +7 -6
  2. package/android/src/main/cpp/audioapi/CMakeLists.txt +6 -0
  3. package/android/src/main/java/com/swmansion/audioapi/system/MediaNotificationManager.kt +11 -0
  4. package/common/cpp/audioapi/HostObjects/BaseAudioContextHostObject.cpp +18 -0
  5. package/common/cpp/audioapi/HostObjects/BaseAudioContextHostObject.h +1 -0
  6. package/common/cpp/audioapi/HostObjects/effects/ConvolverNodeHostObject.cpp +47 -0
  7. package/common/cpp/audioapi/HostObjects/effects/ConvolverNodeHostObject.h +20 -0
  8. package/common/cpp/audioapi/core/AudioNode.h +3 -2
  9. package/common/cpp/audioapi/core/BaseAudioContext.cpp +10 -0
  10. package/common/cpp/audioapi/core/BaseAudioContext.h +2 -0
  11. package/common/cpp/audioapi/core/effects/ConvolverNode.cpp +210 -0
  12. package/common/cpp/audioapi/core/effects/ConvolverNode.h +55 -0
  13. package/common/cpp/audioapi/core/sources/StreamerNode.cpp +59 -58
  14. package/common/cpp/audioapi/core/sources/StreamerNode.h +37 -8
  15. package/common/cpp/audioapi/core/utils/AudioNodeManager.cpp +5 -0
  16. package/common/cpp/audioapi/dsp/AudioUtils.cpp +1 -1
  17. package/common/cpp/audioapi/dsp/Convolver.cpp +213 -0
  18. package/common/cpp/audioapi/dsp/Convolver.h +45 -0
  19. package/common/cpp/audioapi/dsp/FFT.cpp +0 -26
  20. package/common/cpp/audioapi/dsp/FFT.h +26 -2
  21. package/common/cpp/audioapi/utils/AlignedAllocator.hpp +50 -0
  22. package/common/cpp/audioapi/utils/AudioBus.cpp +28 -0
  23. package/common/cpp/audioapi/utils/AudioBus.h +3 -0
  24. package/common/cpp/test/CMakeLists.txt +16 -14
  25. package/lib/commonjs/api.js +7 -0
  26. package/lib/commonjs/api.js.map +1 -1
  27. package/lib/commonjs/api.web.js +8 -0
  28. package/lib/commonjs/api.web.js.map +1 -1
  29. package/lib/commonjs/core/BaseAudioContext.js +12 -0
  30. package/lib/commonjs/core/BaseAudioContext.js.map +1 -1
  31. package/lib/commonjs/core/ConvolverNode.js +37 -0
  32. package/lib/commonjs/core/ConvolverNode.js.map +1 -0
  33. package/lib/commonjs/types.js +4 -0
  34. package/lib/commonjs/web-core/AudioContext.js +12 -0
  35. package/lib/commonjs/web-core/AudioContext.js.map +1 -1
  36. package/lib/commonjs/web-core/ConvolverNode.js +40 -0
  37. package/lib/commonjs/web-core/ConvolverNode.js.map +1 -0
  38. package/lib/commonjs/web-core/ConvolverNodeOptions.js +6 -0
  39. package/lib/commonjs/web-core/ConvolverNodeOptions.js.map +1 -0
  40. package/lib/commonjs/web-core/OfflineAudioContext.js +12 -0
  41. package/lib/commonjs/web-core/OfflineAudioContext.js.map +1 -1
  42. package/lib/module/api.js +1 -0
  43. package/lib/module/api.js.map +1 -1
  44. package/lib/module/api.web.js +1 -0
  45. package/lib/module/api.web.js.map +1 -1
  46. package/lib/module/core/BaseAudioContext.js +12 -0
  47. package/lib/module/core/BaseAudioContext.js.map +1 -1
  48. package/lib/module/core/ConvolverNode.js +31 -0
  49. package/lib/module/core/ConvolverNode.js.map +1 -0
  50. package/lib/module/types.js +2 -0
  51. package/lib/module/web-core/AudioContext.js +12 -0
  52. package/lib/module/web-core/AudioContext.js.map +1 -1
  53. package/lib/module/web-core/ConvolverNode.js +34 -0
  54. package/lib/module/web-core/ConvolverNode.js.map +1 -0
  55. package/lib/module/web-core/ConvolverNodeOptions.js +4 -0
  56. package/lib/module/web-core/ConvolverNodeOptions.js.map +1 -0
  57. package/lib/module/web-core/OfflineAudioContext.js +12 -0
  58. package/lib/module/web-core/OfflineAudioContext.js.map +1 -1
  59. package/lib/typescript/api.d.ts +1 -0
  60. package/lib/typescript/api.d.ts.map +1 -1
  61. package/lib/typescript/api.web.d.ts +1 -0
  62. package/lib/typescript/api.web.d.ts.map +1 -1
  63. package/lib/typescript/core/BaseAudioContext.d.ts +3 -1
  64. package/lib/typescript/core/BaseAudioContext.d.ts.map +1 -1
  65. package/lib/typescript/core/ConvolverNode.d.ts +12 -0
  66. package/lib/typescript/core/ConvolverNode.d.ts.map +1 -0
  67. package/lib/typescript/interfaces.d.ts +5 -0
  68. package/lib/typescript/interfaces.d.ts.map +1 -1
  69. package/lib/typescript/types.d.ts +5 -0
  70. package/lib/typescript/types.d.ts.map +1 -1
  71. package/lib/typescript/web-core/AudioContext.d.ts +3 -0
  72. package/lib/typescript/web-core/AudioContext.d.ts.map +1 -1
  73. package/lib/typescript/web-core/BaseAudioContext.d.ts +2 -0
  74. package/lib/typescript/web-core/BaseAudioContext.d.ts.map +1 -1
  75. package/lib/typescript/web-core/ConvolverNode.d.ts +11 -0
  76. package/lib/typescript/web-core/ConvolverNode.d.ts.map +1 -0
  77. package/lib/typescript/web-core/ConvolverNodeOptions.d.ts +6 -0
  78. package/lib/typescript/web-core/ConvolverNodeOptions.d.ts.map +1 -0
  79. package/lib/typescript/web-core/OfflineAudioContext.d.ts +3 -0
  80. package/lib/typescript/web-core/OfflineAudioContext.d.ts.map +1 -1
  81. package/package.json +1 -1
  82. package/src/api.ts +1 -0
  83. package/src/api.web.ts +1 -0
  84. package/src/core/BaseAudioContext.ts +23 -0
  85. package/src/core/ConvolverNode.ts +35 -0
  86. package/src/interfaces.ts +11 -0
  87. package/src/types.ts +7 -0
  88. package/src/web-core/AudioContext.tsx +25 -0
  89. package/src/web-core/BaseAudioContext.tsx +2 -0
  90. package/src/web-core/ConvolverNode.tsx +43 -0
  91. package/src/web-core/ConvolverNodeOptions.tsx +6 -0
  92. package/src/web-core/OfflineAudioContext.tsx +25 -0
@@ -25,14 +25,12 @@ StreamerNode::StreamerNode(BaseAudioContext *context)
25
25
  codecpar_(nullptr),
26
26
  pkt_(nullptr),
27
27
  frame_(nullptr),
28
- pendingFrame_(nullptr),
29
28
  bufferedBus_(nullptr),
30
- bufferedBusIndex_(0),
31
- maxBufferSize_(0),
32
29
  audio_stream_index_(-1),
33
30
  swrCtx_(nullptr),
34
31
  resampledData_(nullptr),
35
- maxResampledSamples_(0) {}
32
+ maxResampledSamples_(0),
33
+ processedSamples_(0) {}
36
34
 
37
35
  StreamerNode::~StreamerNode() {
38
36
  cleanup();
@@ -66,24 +64,30 @@ bool StreamerNode::initialize(const std::string &input_url) {
66
64
  return false;
67
65
  }
68
66
 
69
- maxBufferSize_ = BUFFER_LENGTH_SECONDS * codecCtx_->sample_rate;
70
- // If decoding is faster than playing, we buffer few seconds of audio
71
- bufferedBus_ = std::make_shared<AudioBus>(
72
- maxBufferSize_, codecpar_->ch_layout.nb_channels, codecCtx_->sample_rate);
73
-
74
67
  channelCount_ = codecpar_->ch_layout.nb_channels;
75
68
  audioBus_ = std::make_shared<AudioBus>(
76
69
  RENDER_QUANTUM_SIZE, channelCount_, context_->getSampleRate());
77
70
 
71
+ auto [sender, receiver] = channels::spsc::channel<
72
+ StreamingData,
73
+ channels::spsc::OverflowStrategy::WAIT_ON_FULL,
74
+ channels::spsc::WaitStrategy::ATOMIC_WAIT>(CHANNEL_CAPACITY);
75
+ sender_ = std::move(sender);
76
+ receiver_ = std::move(receiver);
77
+
78
78
  streamingThread_ = std::thread(&StreamerNode::streamAudio, this);
79
- streamFlag.store(true);
79
+ streamFlag.store(true, std::memory_order_release);
80
80
  isInitialized_ = true;
81
81
  return true;
82
82
  }
83
83
 
84
84
  void StreamerNode::stop(double when) {
85
85
  AudioScheduledSourceNode::stop(when);
86
- streamFlag.store(false);
86
+ streamFlag.store(false, std::memory_order_release);
87
+ StreamingData dummy;
88
+ while (receiver_.try_receive(dummy) ==
89
+ channels::spsc::ResponseStatus::SUCCESS)
90
+ ; // clear the receiver
87
91
  }
88
92
 
89
93
  bool StreamerNode::setupResampler() {
@@ -122,29 +126,22 @@ bool StreamerNode::setupResampler() {
122
126
  }
123
127
 
124
128
  void StreamerNode::streamAudio() {
125
- while (streamFlag.load()) {
126
- if (pendingFrame_ != nullptr) {
127
- if (!processFrameWithResampler(pendingFrame_)) {
129
+ while (streamFlag.load(std::memory_order_acquire)) {
130
+ if (av_read_frame(fmtCtx_, pkt_) < 0) {
131
+ return;
132
+ }
133
+ if (pkt_->stream_index == audio_stream_index_) {
134
+ if (avcodec_send_packet(codecCtx_, pkt_) != 0) {
128
135
  return;
129
136
  }
130
- } else {
131
- if (av_read_frame(fmtCtx_, pkt_) < 0) {
137
+ if (avcodec_receive_frame(codecCtx_, frame_) != 0) {
132
138
  return;
133
139
  }
134
- if (pkt_->stream_index == audio_stream_index_) {
135
- if (avcodec_send_packet(codecCtx_, pkt_) != 0) {
136
- return;
137
- }
138
- if (avcodec_receive_frame(codecCtx_, frame_) != 0) {
139
- return;
140
- }
141
- if (!processFrameWithResampler(frame_)) {
142
- return;
143
- }
140
+ if (!processFrameWithResampler(frame_)) {
141
+ return;
144
142
  }
145
- av_packet_unref(pkt_);
146
143
  }
147
- std::this_thread::sleep_for(std::chrono::milliseconds(10));
144
+ av_packet_unref(pkt_);
148
145
  }
149
146
  }
150
147
 
@@ -160,28 +157,33 @@ std::shared_ptr<AudioBus> StreamerNode::processNode(
160
157
  return processingBus;
161
158
  }
162
159
 
163
- // If we have enough buffered data, copy to output bus
164
- if (bufferedBusIndex_ >= framesToProcess) {
165
- Locker locker(mutex_);
160
+ int bufferRemaining = bufferedBusSize_ - processedSamples_;
161
+ int alreadyProcessed = 0;
162
+ if (bufferRemaining < framesToProcess) {
163
+ if (bufferedBus_ != nullptr) {
164
+ for (int ch = 0; ch < processingBus->getNumberOfChannels(); ch++) {
165
+ memcpy(
166
+ processingBus->getChannel(ch)->getData(),
167
+ bufferedBus_->getChannel(ch)->getData() + processedSamples_,
168
+ bufferRemaining * sizeof(float));
169
+ }
170
+ framesToProcess -= bufferRemaining;
171
+ alreadyProcessed += bufferRemaining;
172
+ }
173
+ StreamingData data;
174
+ receiver_.try_receive(data);
175
+ bufferedBus_ = std::make_shared<AudioBus>(std::move(data.bus));
176
+ bufferedBusSize_ = data.size;
177
+ processedSamples_ = 0;
178
+ }
179
+ if (bufferedBus_ != nullptr) {
166
180
  for (int ch = 0; ch < processingBus->getNumberOfChannels(); ch++) {
167
181
  memcpy(
168
- processingBus->getChannel(ch)->getData(),
169
- bufferedBus_->getChannel(ch)->getData(),
170
- offsetLength * sizeof(float));
171
-
172
- memmove(
173
- bufferedBus_->getChannel(ch)->getData(),
174
- bufferedBus_->getChannel(ch)->getData() + offsetLength,
175
- (maxBufferSize_ - offsetLength) * sizeof(float));
182
+ processingBus->getChannel(ch)->getData() + alreadyProcessed,
183
+ bufferedBus_->getChannel(ch)->getData() + processedSamples_,
184
+ framesToProcess * sizeof(float));
176
185
  }
177
- bufferedBusIndex_ -= offsetLength;
178
- } else {
179
- if (VERBOSE)
180
- printf(
181
- "Buffer underrun: have %zu, need %zu\n",
182
- bufferedBusIndex_,
183
- (size_t)framesToProcess);
184
- processingBus->zero();
186
+ processedSamples_ += framesToProcess;
185
187
  }
186
188
 
187
189
  return processingBus;
@@ -220,22 +222,21 @@ bool StreamerNode::processFrameWithResampler(AVFrame *frame) {
220
222
  return false;
221
223
  }
222
224
 
223
- // Check if converted data fits in buffer
224
- if (bufferedBusIndex_ + converted_samples > maxBufferSize_) {
225
- pendingFrame_ = frame;
225
+ // if we would like to finish dont copy anything
226
+ if (!streamFlag.load(std::memory_order_acquire)) {
226
227
  return true;
227
- } else {
228
- pendingFrame_ = nullptr;
229
228
  }
230
-
231
- // Copy converted data to our buffer
232
- Locker locker(mutex_);
229
+ auto bus = AudioBus(
230
+ static_cast<size_t>(converted_samples),
231
+ codecCtx_->ch_layout.nb_channels,
232
+ context_->getSampleRate());
233
233
  for (int ch = 0; ch < codecCtx_->ch_layout.nb_channels; ch++) {
234
234
  auto *src = reinterpret_cast<float *>(resampledData_[ch]);
235
- float *dst = bufferedBus_->getChannel(ch)->getData() + bufferedBusIndex_;
235
+ float *dst = bus.getChannel(ch)->getData();
236
236
  memcpy(dst, src, converted_samples * sizeof(float));
237
237
  }
238
- bufferedBusIndex_ += converted_samples;
238
+ StreamingData data{std::move(bus), static_cast<size_t>(converted_samples)};
239
+ sender_.send(std::move(data));
239
240
  return true;
240
241
  }
241
242
 
@@ -280,7 +281,7 @@ bool StreamerNode::setupDecoder() {
280
281
  }
281
282
 
282
283
  void StreamerNode::cleanup() {
283
- streamFlag.store(false);
284
+ streamFlag.store(false, std::memory_order_release);
284
285
  // cleanup cannot be called from the streaming thread so there is no need to
285
286
  // check if we are in the same thread
286
287
  streamingThread_.join();
@@ -11,6 +11,7 @@
11
11
  #pragma once
12
12
 
13
13
  #include <audioapi/core/sources/AudioScheduledSourceNode.h>
14
+ #include <audioapi/utils/AudioBus.h>
14
15
 
15
16
  #ifndef AUDIO_API_TEST_SUITE
16
17
  extern "C" {
@@ -27,8 +28,35 @@ extern "C" {
27
28
  #include <memory>
28
29
  #include <string>
29
30
  #include <atomic>
31
+ #include <utility>
32
+ #ifndef AUDIO_API_TEST_SUITE
33
+ #include <audioapi/utils/SpscChannel.hpp>
30
34
 
31
- static bool constexpr VERBOSE = false;
35
+ static constexpr audioapi::channels::spsc::OverflowStrategy STREAMER_NODE_SPSC_OVERFLOW_STRATEGY =
36
+ audioapi::channels::spsc::OverflowStrategy::WAIT_ON_FULL;
37
+ static constexpr audioapi::channels::spsc::WaitStrategy STREAMER_NODE_SPSC_WAIT_STRATEGY =
38
+ audioapi::channels::spsc::WaitStrategy::ATOMIC_WAIT;
39
+ #endif
40
+
41
+ static constexpr bool VERBOSE = false;
42
+ static constexpr int CHANNEL_CAPACITY = 32;
43
+
44
+ struct StreamingData{
45
+ audioapi::AudioBus bus;
46
+ size_t size;
47
+ StreamingData() = default;
48
+ StreamingData(audioapi::AudioBus b, size_t s) : bus(b), size(s) {}
49
+ StreamingData(const StreamingData& data) : bus(data.bus), size(data.size) {}
50
+ StreamingData(StreamingData&& data) noexcept : bus(std::move(data.bus)), size(data.size) {}
51
+ StreamingData& operator=(const StreamingData& data) {
52
+ if (this == &data) {
53
+ return *this;
54
+ }
55
+ bus = data.bus;
56
+ size = data.size;
57
+ return *this;
58
+ }
59
+ };
32
60
 
33
61
  namespace audioapi {
34
62
 
@@ -56,19 +84,20 @@ class StreamerNode : public AudioScheduledSourceNode {
56
84
  AVCodecParameters* codecpar_;
57
85
  AVPacket* pkt_;
58
86
  AVFrame* frame_; // Frame that is currently being processed
59
- AVFrame* pendingFrame_; // Frame that is saved if bufferedBus is full
60
- std::shared_ptr<AudioBus> bufferedBus_; // audio bus for buffering hls frames
61
- size_t bufferedBusIndex_; // index in the buffered bus where we write the next frame
62
- size_t maxBufferSize_; // maximum size of the buffered bus
63
- int audio_stream_index_; // index of the audio stream channel in the input
64
87
  SwrContext* swrCtx_;
65
88
  uint8_t** resampledData_; // weird ffmpeg way of using raw byte pointers for resampled data
89
+
90
+ std::shared_ptr<AudioBus> bufferedBus_; // audio bus for buffering hls frames
91
+ size_t bufferedBusSize_; // size of currently buffered bus
92
+ int audio_stream_index_; // index of the audio stream channel in the input
66
93
  int maxResampledSamples_;
67
- std::mutex mutex_;
94
+ size_t processedSamples_;
95
+
68
96
  std::thread streamingThread_;
69
97
  std::atomic<bool> streamFlag; // Flag to control the streaming thread
70
- static constexpr float BUFFER_LENGTH_SECONDS = 5.0f; // Length of the buffer in seconds
71
98
  static constexpr int INITIAL_MAX_RESAMPLED_SAMPLES = 8192; // Initial size for resampled data
99
+ channels::spsc::Sender<StreamingData, STREAMER_NODE_SPSC_OVERFLOW_STRATEGY, STREAMER_NODE_SPSC_WAIT_STRATEGY> sender_;
100
+ channels::spsc::Receiver<StreamingData, STREAMER_NODE_SPSC_OVERFLOW_STRATEGY, STREAMER_NODE_SPSC_WAIT_STRATEGY> receiver_;
72
101
 
73
102
  /**
74
103
  * @brief Setting up the resampler
@@ -1,5 +1,6 @@
1
1
  #include <audioapi/core/AudioNode.h>
2
2
  #include <audioapi/core/AudioParam.h>
3
+ #include <audioapi/core/effects/ConvolverNode.h>
3
4
  #include <audioapi/core/sources/AudioScheduledSourceNode.h>
4
5
  #include <audioapi/core/utils/AudioNodeManager.h>
5
6
  #include <audioapi/core/utils/Locker.h>
@@ -221,6 +222,10 @@ inline bool AudioNodeManager::nodeCanBeDestructed(
221
222
  if constexpr (std::is_base_of_v<AudioScheduledSourceNode, U>) {
222
223
  return node.use_count() == 1 &&
223
224
  (node->isUnscheduled() || node->isFinished());
225
+ } else if constexpr (std::is_base_of_v<
226
+ ConvolverNode,
227
+ U>) { // convolver overrides disabling behavior
228
+ return node.use_count() == 1 && !node->isEnabled();
224
229
  }
225
230
  return node.use_count() == 1;
226
231
  }
@@ -30,4 +30,4 @@ float linearToDecibels(float value) {
30
30
  float decibelsToLinear(float value) {
31
31
  return pow(10, value / 20);
32
32
  }
33
- } // namespace audioapi::dsp
33
+ } // namespace audioapi::dsp
@@ -0,0 +1,213 @@
1
+ // implementation of linear convolution algorithm described in this paper:
2
+ // https://publications.rwth-aachen.de/record/466561/files/466561.pdf page 110
3
+
4
+ #if defined(__ARM_NEON)
5
+ #include <arm_neon.h>
6
+ #endif
7
+
8
+ #include <audioapi/core/sources/AudioBuffer.h>
9
+ #include <audioapi/dsp/Convolver.h>
10
+ #include <audioapi/dsp/VectorMath.h>
11
+ #include <audioapi/utils/AudioArray.h>
12
+ #include <chrono>
13
+ #include <iostream>
14
+
15
+ namespace audioapi {
16
+
17
+ Convolver::Convolver()
18
+ : _blockSize(0),
19
+ _segSize(0),
20
+ _segCount(0),
21
+ _fftComplexSize(0),
22
+ _segments(),
23
+ _segmentsIR(),
24
+ _fftBuffer(0),
25
+ _fft(nullptr),
26
+ _preMultiplied(),
27
+ _current(0),
28
+ _inputBuffer(0) {}
29
+
30
+ void Convolver::reset() {
31
+ _blockSize = 0;
32
+ _segSize = 0;
33
+ _segCount = 0;
34
+ _fftComplexSize = 0;
35
+ _current = 0;
36
+ _fft = nullptr;
37
+ _segments.clear();
38
+ _segmentsIR.clear();
39
+ _preMultiplied.clear();
40
+ _fftBuffer.zero();
41
+ _inputBuffer.zero();
42
+ }
43
+
44
+ bool Convolver::init(
45
+ size_t blockSize,
46
+ const audioapi::AudioArray &ir,
47
+ size_t irLen) {
48
+ reset();
49
+ // blockSize must be a power of two
50
+ if ((blockSize & (blockSize - 1))) {
51
+ return false;
52
+ }
53
+
54
+ // Ignore zeros at the end of the impulse response because they only waste
55
+ // computation time
56
+ _blockSize = blockSize;
57
+ _trueSegmentCount = (size_t)(std::ceil((float)irLen / (float)_blockSize));
58
+ while (irLen > 0 && ::fabs(ir[irLen - 1]) < 10e-3) {
59
+ --irLen;
60
+ }
61
+
62
+ if (irLen == 0) {
63
+ return true;
64
+ }
65
+
66
+ // The length-N is split into P = N/B length-B sub filters
67
+ _segCount = (size_t)(std::ceil((float)irLen / (float)_blockSize));
68
+ _segSize = 2 * _blockSize;
69
+ // size of the FFT is 2B, so the complex size is B+1, due to the
70
+ // complex-conjugate symmetricity
71
+ _fftComplexSize = _segSize / 2 + 1;
72
+ _fft = std::make_shared<dsp::FFT>((int)_segSize);
73
+ _fftBuffer.resize(_segSize);
74
+
75
+ // segments preparation
76
+ for (int i = 0; i < _segCount; ++i) {
77
+ aligned_vec_complex vec(_fftComplexSize, std::complex<float>(0.0f, 0.0f));
78
+ _segments.push_back(vec);
79
+ }
80
+
81
+ // ir preparation
82
+ for (int i = 0; i < _segCount; ++i) {
83
+ aligned_vec_complex segment(_fftComplexSize);
84
+ const size_t remainingSamples = irLen - (i * _blockSize);
85
+ const size_t samplesToCopy = std::min(_blockSize, remainingSamples);
86
+
87
+ if (samplesToCopy > 0) {
88
+ memcpy(
89
+ _fftBuffer.getData(),
90
+ ir.getData() + i * _blockSize,
91
+ samplesToCopy * sizeof(float));
92
+ }
93
+ // Each sub filter is zero-padded to length 2B and transformed using a
94
+ // 2B-point real-to-complex FFT.
95
+ memset(_fftBuffer.getData() + _blockSize, 0, _blockSize * sizeof(float));
96
+ _fft->doFFT(_fftBuffer.getData(), segment);
97
+ segment.at(0).imag(0.0f); // ensure DC component is real
98
+ _segmentsIR.push_back(segment);
99
+ }
100
+
101
+ _preMultiplied = aligned_vec_complex(_fftComplexSize);
102
+ _inputBuffer.resize(_segSize);
103
+ _current = 0;
104
+
105
+ return true;
106
+ }
107
+
108
+ /// @brief Fast pairwise complex multiplication using ARM NEON intrinsics
109
+ /// @param ir Impulse response
110
+ /// @param audio Input audio signal
111
+ /// @param pre Output buffer for pre-multiplied results
112
+ /// @note IMPORTANT: ir, audio, and pre must be the same size and should be
113
+ /// aligned to 16 bytes for optimal performance
114
+ void pairwise_complex_multiply_fast(
115
+ const Convolver::aligned_vec_complex &ir,
116
+ const Convolver::aligned_vec_complex &audio,
117
+ Convolver::aligned_vec_complex &pre) {
118
+ size_t n = ir.size();
119
+
120
+ /// @note Using ARM NEON intrinsics for SIMD optimization
121
+ /// This implementation is on average 2x faster than the scalar version on ARM
122
+ /// architectures With 16-byte alignment it can be even faster up to 2.5x
123
+ #ifdef __ARM_NEON
124
+ size_t j = 0;
125
+
126
+ // Main vector loop: process 4 complex samples (8 floats) per iteration using
127
+ // vld2q/vst2q deinterleave
128
+ for (; j <= n - 4; j += 4) {
129
+ // load de-interleaved real/imag for 4 complex values
130
+ float32x4x2_t ir_de = vld2q_f32(reinterpret_cast<const float *>(&ir[j]));
131
+ float32x4x2_t a_de = vld2q_f32(reinterpret_cast<const float *>(&audio[j]));
132
+ float32x4x2_t pre_de = vld2q_f32(reinterpret_cast<float *>(&pre[j]));
133
+
134
+ float32x4_t ir_re = ir_de.val[0];
135
+ float32x4_t ir_im = ir_de.val[1];
136
+ float32x4_t a_re = a_de.val[0];
137
+ float32x4_t a_im = a_de.val[1];
138
+
139
+ // real = ir_re * a_re - ir_im * a_im
140
+ float32x4_t real = vmulq_f32(ir_re, a_re);
141
+ real = vmlsq_f32(real, ir_im, a_im);
142
+ // imag = ir_re * a_im + ir_im * a_re
143
+ float32x4_t imag = vmulq_f32(ir_re, a_im);
144
+ imag = vmlaq_f32(imag, ir_im, a_re);
145
+
146
+ // accumulate into pre
147
+ float32x4_t new_re = vaddq_f32(pre_de.val[0], real);
148
+ float32x4_t new_im = vaddq_f32(pre_de.val[1], imag);
149
+
150
+ float32x4x2_t out_de;
151
+ out_de.val[0] = new_re;
152
+ out_de.val[1] = new_im;
153
+
154
+ vst2q_f32(reinterpret_cast<float *>(&pre[j]), out_de);
155
+ }
156
+
157
+ // Tail
158
+ for (; j < n; ++j) {
159
+ pre[j] += ir[j] * audio[j];
160
+ }
161
+
162
+ #else
163
+ // Fallback scalar implementation
164
+ for (size_t i = 0; i < n; ++i) {
165
+ pre[i] += ir[i] * audio[i];
166
+ }
167
+ #endif
168
+ }
169
+
170
+ void Convolver::process(float *data, float *outputData) {
171
+ // The input buffer acts as a 2B-point sliding window of the input signal.
172
+ // With each new input block, the right half of the input buffer is shifted
173
+ // to the left and the new block is stored in the right half.
174
+ memmove(
175
+ _inputBuffer.getData(),
176
+ _inputBuffer.getData() + _blockSize,
177
+ _blockSize * sizeof(float));
178
+ memcpy(_inputBuffer.getData() + _blockSize, data, _blockSize * sizeof(float));
179
+
180
+ // All contents (DFT spectra) in the FDL are shifted up by one slot.
181
+ _current = (_current > 0) ? _current - 1 : _segCount - 1;
182
+ // A 2B-point real-to-complex FFT is computed from the input buffer,
183
+ // resulting in B+1 complex-conjugate symmetric DFT coefficients. The
184
+ // result is stored in the first FDL slot.
185
+ // _current marks first FDL slot, which is the current input block.
186
+ _fft->doFFT(_inputBuffer.getData(), _segments[_current]);
187
+ _segments[_current][0].imag(0.0f); // ensure DC component is real
188
+
189
+ // The P sub filter spectra are pairwisely multiplied with the input spectra
190
+ // in the FDL. The results are accumulated in the frequency-domain.
191
+ memset(
192
+ _preMultiplied.data(),
193
+ 0,
194
+ _preMultiplied.size() * sizeof(std::complex<float>));
195
+ // this is a bottleneck of the algorithm
196
+ for (int i = 0; i < _segCount; ++i) {
197
+ const int indexAudio = (_current + i) % _segCount;
198
+ const auto &impulseResponseSegment = _segmentsIR[i];
199
+ const auto &audioSegment = _segments[indexAudio];
200
+ pairwise_complex_multiply_fast(
201
+ impulseResponseSegment, audioSegment, _preMultiplied);
202
+ }
203
+ // Of the accumulated spectral convolutions, an 2B-point complex-to-real
204
+ // IFFT is computed. From the resulting 2B samples, the left half is
205
+ // discarded and the right half is returned as the next output block.
206
+ _fft->doInverseFFT(_preMultiplied, _fftBuffer.getData());
207
+
208
+ memcpy(
209
+ outputData,
210
+ _fftBuffer.getData() + _blockSize,
211
+ _blockSize * sizeof(float));
212
+ }
213
+ } // namespace audioapi
@@ -0,0 +1,45 @@
1
+ #pragma once
2
+
3
+ #include <audioapi/utils/AudioArray.h>
4
+ #include <audioapi/dsp/FFT.h>
5
+ #include <vector>
6
+ #include <cstring>
7
+ #include <complex>
8
+ #include <memory>
9
+ #include <audioapi/utils/AlignedAllocator.hpp>
10
+
11
+ namespace audioapi {
12
+
13
+ class AudioBuffer;
14
+
15
+ class Convolver {
16
+ using aligned_vec_complex =
17
+ std::vector<std::complex<float>, AlignedAllocator<std::complex<float>, 16>>;
18
+
19
+ public:
20
+ Convolver();
21
+ bool init(size_t blockSize, const AudioArray &ir, size_t irLen);
22
+ void process(float* inputData, float* outputData);
23
+ void reset();
24
+ inline size_t getSegCount() const { return _trueSegmentCount; }
25
+
26
+ private:
27
+ size_t _trueSegmentCount;
28
+ size_t _blockSize;
29
+ size_t _segSize;
30
+ size_t _segCount;
31
+ size_t _fftComplexSize;
32
+ std::vector<aligned_vec_complex> _segments;
33
+ std::vector<aligned_vec_complex> _segmentsIR;
34
+ AudioArray _fftBuffer;
35
+ std::shared_ptr<dsp::FFT> _fft;
36
+ aligned_vec_complex _preMultiplied;
37
+ size_t _current;
38
+ AudioArray _inputBuffer;
39
+
40
+ friend void pairwise_complex_multiply_fast(
41
+ const aligned_vec_complex& ir,
42
+ const aligned_vec_complex& audio,
43
+ aligned_vec_complex& pre);
44
+ };
45
+ } // namespace audioapi
@@ -12,30 +12,4 @@ FFT::~FFT() {
12
12
  pffft_aligned_free(work_);
13
13
  }
14
14
 
15
- void FFT::doFFT(float *in, std::vector<std::complex<float>> &out) {
16
- pffft_transform_ordered(
17
- pffftSetup_,
18
- in,
19
- reinterpret_cast<float *>(&out[0]),
20
- work_,
21
- PFFFT_FORWARD);
22
-
23
- dsp::multiplyByScalar(
24
- reinterpret_cast<float *>(&out[0]),
25
- 0.5f,
26
- reinterpret_cast<float *>(&out[0]),
27
- size_ * 2);
28
- }
29
-
30
- void FFT::doInverseFFT(std::vector<std::complex<float>> &in, float *out) {
31
- pffft_transform_ordered(
32
- pffftSetup_,
33
- reinterpret_cast<float *>(&in[0]),
34
- out,
35
- work_,
36
- PFFFT_BACKWARD);
37
-
38
- dsp::multiplyByScalar(out, 1.0f / static_cast<float>(size_), out, size_);
39
- }
40
-
41
15
  } // namespace audioapi::dsp
@@ -16,8 +16,32 @@ class FFT {
16
16
  explicit FFT(int size);
17
17
  ~FFT();
18
18
 
19
- void doFFT(float *in, std::vector<std::complex<float>> &out);
20
- void doInverseFFT(std::vector<std::complex<float>> &in, float *out);
19
+ template<typename Allocator>
20
+ void doFFT(float *in, std::vector<std::complex<float>, Allocator> &out) {
21
+ pffft_transform_ordered(
22
+ pffftSetup_,
23
+ in,
24
+ reinterpret_cast<float *>(&out[0]),
25
+ work_,
26
+ PFFFT_FORWARD);
27
+ // this is a possible place for bugs and mistakes
28
+ // due to pffft implementation and how it stores results
29
+ // keep this information in mind
30
+ // out[0].real = DC component - should be pure real
31
+ // out[0].imag = Nyquist component - should be pure real
32
+ }
33
+
34
+ template<typename Allocator>
35
+ void doInverseFFT(std::vector<std::complex<float>, Allocator> &in, float *out) {
36
+ pffft_transform_ordered(
37
+ pffftSetup_,
38
+ reinterpret_cast<float *>(&in[0]),
39
+ out,
40
+ work_,
41
+ PFFFT_BACKWARD);
42
+
43
+ dsp::multiplyByScalar(out, 1.0f / static_cast<float>(size_), out, size_);
44
+ }
21
45
 
22
46
  private:
23
47
  int size_;
@@ -0,0 +1,50 @@
1
+ #pragma once
2
+ #include <cstddef>
3
+ #include <new>
4
+
5
+ template<typename T, std::size_t Align = 16>
6
+ class AlignedAllocator {
7
+ public:
8
+ using value_type = T;
9
+ using size_type = std::size_t;
10
+ using difference_type = std::ptrdiff_t;
11
+
12
+ AlignedAllocator() noexcept = default;
13
+ template<class U> AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept {}
14
+
15
+ T* allocate(std::size_t n) {
16
+ // We want to maximize performance on hot paths, so we hint unlikely branches
17
+ if (n == 0) [[ unlikely ]] {
18
+ return nullptr;
19
+ }
20
+ std::size_t bytes = n * sizeof(T);
21
+ // C++17 aligned new
22
+ void* p = ::operator new(bytes, std::align_val_t(Align));
23
+
24
+ // We have more serious problems if this happens than speed concerns
25
+ // so we can opt the branch prediction
26
+ if (!p) [[ unlikely ]] {
27
+ throw std::bad_alloc();
28
+ }
29
+ return static_cast<T*>(p);
30
+ }
31
+
32
+ void deallocate(T* p, std::size_t) noexcept {
33
+ ::operator delete(p, std::align_val_t(Align));
34
+ }
35
+
36
+ // Rebind allocator to type U (required by std::vector)
37
+ template<class U>
38
+ struct rebind { using other = AlignedAllocator<U, Align>; };
39
+
40
+ // Comparison operators (required by std::vector)
41
+ template<typename U, std::size_t UAlign>
42
+ bool operator==(const AlignedAllocator<U, UAlign>&) const noexcept {
43
+ return Align == UAlign;
44
+ }
45
+
46
+ template<typename U, std::size_t UAlign>
47
+ bool operator!=(const AlignedAllocator<U, UAlign>&) const noexcept {
48
+ return Align != UAlign;
49
+ }
50
+ };