@stream-io/video-react-native-sdk 1.38.2 → 1.39.1-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1629 -0
- package/android/src/main/java/com/streamvideo/reactnative/StreamVideoReactNativeModule.kt +81 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/AudioPipeline.kt +436 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/EncoderConstants.kt +17 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/PipelineHost.kt +36 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/RecorderPlaybackSamplesSink.kt +60 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/RecorderVideoSink.kt +31 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/TracksRecorderManager.kt +329 -0
- package/android/src/main/java/com/streamvideo/reactnative/recorder/VideoPipeline.kt +472 -0
- package/dist/commonjs/components/Participant/ParticipantView/ParticipantLabel.js +4 -3
- package/dist/commonjs/components/Participant/ParticipantView/ParticipantLabel.js.map +1 -1
- package/dist/commonjs/hooks/index.js +11 -0
- package/dist/commonjs/hooks/index.js.map +1 -1
- package/dist/commonjs/hooks/useLoopbackRecording.js +243 -0
- package/dist/commonjs/hooks/useLoopbackRecording.js.map +1 -0
- package/dist/commonjs/utils/internal/callingx/callingx.js +2 -2
- package/dist/commonjs/utils/internal/callingx/callingx.js.map +1 -1
- package/dist/commonjs/version.js +1 -1
- package/dist/commonjs/version.js.map +1 -1
- package/dist/module/components/Participant/ParticipantView/ParticipantLabel.js +5 -4
- package/dist/module/components/Participant/ParticipantView/ParticipantLabel.js.map +1 -1
- package/dist/module/hooks/index.js +1 -0
- package/dist/module/hooks/index.js.map +1 -1
- package/dist/module/hooks/useLoopbackRecording.js +238 -0
- package/dist/module/hooks/useLoopbackRecording.js.map +1 -0
- package/dist/module/utils/internal/callingx/callingx.js +2 -2
- package/dist/module/utils/internal/callingx/callingx.js.map +1 -1
- package/dist/module/version.js +1 -1
- package/dist/module/version.js.map +1 -1
- package/dist/typescript/components/Participant/ParticipantView/ParticipantLabel.d.ts.map +1 -1
- package/dist/typescript/hooks/index.d.ts +1 -0
- package/dist/typescript/hooks/index.d.ts.map +1 -1
- package/dist/typescript/hooks/useLoopbackRecording.d.ts +85 -0
- package/dist/typescript/hooks/useLoopbackRecording.d.ts.map +1 -0
- package/dist/typescript/version.d.ts +1 -1
- package/dist/typescript/version.d.ts.map +1 -1
- package/ios/StreamVideoReactNative-Bridging-Header.h +2 -0
- package/ios/StreamVideoReactNative.m +81 -0
- package/ios/TracksRecorder/AudioPipeline.swift +270 -0
- package/ios/TracksRecorder/PipelineHost.swift +56 -0
- package/ios/TracksRecorder/RecorderAudioRenderTap.swift +154 -0
- package/ios/TracksRecorder/RecorderVideoSink.swift +137 -0
- package/ios/TracksRecorder/TracksRecorderManager.swift +327 -0
- package/ios/TracksRecorder/VideoPipeline.swift +297 -0
- package/package.json +7 -6
- package/src/components/Participant/ParticipantView/ParticipantLabel.tsx +5 -3
- package/src/hooks/index.ts +1 -0
- package/src/hooks/useLoopbackRecording.ts +438 -0
- package/src/utils/internal/callingx/callingx.ts +2 -2
- package/src/version.ts +1 -1
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright © 2026 Stream.io Inc. All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import AVFoundation
|
|
6
|
+
import CoreMedia
|
|
7
|
+
import Foundation
|
|
8
|
+
import WebRTC
|
|
9
|
+
|
|
10
|
+
/// Audio pipeline owned by `TracksRecorderManager`. Encapsulates the AAC audio path:
|
|
11
|
+
/// - the `RecorderAudioRenderTap` installed on
|
|
12
|
+
/// `RTCDefaultAudioProcessingModule.renderPreProcessingDelegate`
|
|
13
|
+
/// (post-mix decoded audio, no per-track lookup required),
|
|
14
|
+
/// - the in-place speaker mute (`muteOriginal: true` on the tap; the tap
|
|
15
|
+
/// zero-fills the buffer after copying for recording),
|
|
16
|
+
/// - the AAC `AVAssetWriterInput` (writer-driven encode via
|
|
17
|
+
/// `outputSettings`),
|
|
18
|
+
/// - per-recording counters / PTS range surfaced via `logSummary` at stop.
|
|
19
|
+
///
|
|
20
|
+
/// All state mutation runs on the host's serial queue. The tap's
|
|
21
|
+
/// callback runs on a WebRTC audio thread and re-dispatches onto
|
|
22
|
+
/// `host.queue` after copying the PCM buffer.
|
|
23
|
+
internal final class AudioPipeline {
|
|
24
|
+
|
|
25
|
+
private static let aacBitRate: NSNumber = NSNumber(value: 64_000)
|
|
26
|
+
|
|
27
|
+
private weak var host: PipelineHost?
|
|
28
|
+
|
|
29
|
+
private let apm: RTCDefaultAudioProcessingModule
|
|
30
|
+
|
|
31
|
+
private var renderTap: RecorderAudioRenderTap?
|
|
32
|
+
private var audioInput: AVAssetWriterInput?
|
|
33
|
+
private var inputAdded = false
|
|
34
|
+
|
|
35
|
+
// Diagnostic counters + PTS range, surfaced via [logSummary] at stop.
|
|
36
|
+
private var buffersReceived = 0
|
|
37
|
+
private var samplesAppended = 0
|
|
38
|
+
private var buffersDropped = 0
|
|
39
|
+
private var firstSamplePtsUs: Int64 = -1
|
|
40
|
+
private var lastSamplePtsUs: Int64 = -1
|
|
41
|
+
|
|
42
|
+
// MARK: - Init
|
|
43
|
+
|
|
44
|
+
init(host: PipelineHost, apm: RTCDefaultAudioProcessingModule) {
|
|
45
|
+
self.host = host
|
|
46
|
+
self.apm = apm
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// MARK: - Public API
|
|
50
|
+
|
|
51
|
+
/// Install the render-tap as the APM's `renderPreProcessingDelegate`.
|
|
52
|
+
/// The tap copies PCM into a new buffer for recording AND zero-fills the
|
|
53
|
+
/// original (post-mix decoded audio) so the speaker plays silence —
|
|
54
|
+
/// this gives "audio in the file, silence at the speaker" without
|
|
55
|
+
/// disrupting the recording. The standard `track.setVolume(0)` /
|
|
56
|
+
/// `track.isEnabled = false` mutes apply *before* this tap and would
|
|
57
|
+
/// silence the recording too.
|
|
58
|
+
func start() {
|
|
59
|
+
let tap = RecorderAudioRenderTap(muteOriginal: true) { [weak self] pcmBuffer in
|
|
60
|
+
self?.handleAudioBuffer(pcmBuffer: pcmBuffer)
|
|
61
|
+
}
|
|
62
|
+
renderTap = tap
|
|
63
|
+
apm.renderPreProcessingDelegate = tap
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/// On-queue. Clear the render-tap delegate slot — only if it still
|
|
67
|
+
/// points to this pipeline's tap. If another consumer has rotated in,
|
|
68
|
+
/// leave theirs alone.
|
|
69
|
+
func detachSink() {
|
|
70
|
+
if let tap = renderTap, apm.renderPreProcessingDelegate === tap {
|
|
71
|
+
apm.renderPreProcessingDelegate = nil
|
|
72
|
+
}
|
|
73
|
+
renderTap = nil
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/// On-queue. Marks the asset-writer input as finished so the writer can
|
|
77
|
+
/// finalise.
|
|
78
|
+
func markInputAsFinished() {
|
|
79
|
+
audioInput?.markAsFinished()
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
func logSummary() {
|
|
83
|
+
let tapCalls = renderTap?.callCount ?? -1
|
|
84
|
+
let durationMs: Int64
|
|
85
|
+
if firstSamplePtsUs >= 0 && lastSamplePtsUs >= firstSamplePtsUs {
|
|
86
|
+
durationMs = (lastSamplePtsUs - firstSamplePtsUs) / 1000
|
|
87
|
+
} else {
|
|
88
|
+
durationMs = -1
|
|
89
|
+
}
|
|
90
|
+
NSLog(
|
|
91
|
+
"[TracksRecorder.Audio] summary received=%d appended=%d dropped=%d tapCalls=%d firstPtsUs=%lld lastPtsUs=%lld durationMs=%lld",
|
|
92
|
+
buffersReceived,
|
|
93
|
+
samplesAppended,
|
|
94
|
+
buffersDropped,
|
|
95
|
+
tapCalls,
|
|
96
|
+
firstSamplePtsUs,
|
|
97
|
+
lastSamplePtsUs,
|
|
98
|
+
durationMs
|
|
99
|
+
)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// MARK: - Tap → queue bridge
|
|
103
|
+
|
|
104
|
+
private func handleAudioBuffer(pcmBuffer: AVAudioPCMBuffer) {
|
|
105
|
+
// Unlike `VideoPipeline`'s `CVPixelBuffer` closure capture, an
|
|
106
|
+
// ARC-retained `AVAudioPCMBuffer` does *not* extend the lifetime
|
|
107
|
+
// of the underlying PCM samples — those live in WebRTC's
|
|
108
|
+
// render-buffer pool and are reused the moment this callback
|
|
109
|
+
// returns. A deep copy before the queue hop is mandatory.
|
|
110
|
+
guard let copy = AudioPipeline.deepCopyPCMBuffer(pcmBuffer) else { return }
|
|
111
|
+
guard let host = host else { return }
|
|
112
|
+
|
|
113
|
+
// `DispatchTime.now().uptimeNanoseconds` is the monotonic clock
|
|
114
|
+
// that matches `RTCVideoFrame.timeStampNs` on iOS — both reduce
|
|
115
|
+
// to `mach_absolute_time()` converted to nanoseconds, so the
|
|
116
|
+
// shared time origin works coherently across both pipelines.
|
|
117
|
+
let captureTimeNs = DispatchTime.now().uptimeNanoseconds
|
|
118
|
+
host.queue.async { [weak self] in
|
|
119
|
+
self?.handleAudioBufferOnQueue(pcmBuffer: copy, captureTimeNs: captureTimeNs)
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
private func handleAudioBufferOnQueue(pcmBuffer: AVAudioPCMBuffer, captureTimeNs: UInt64) {
|
|
124
|
+
guard let host = host, host.isRecording, let writer = host.assetWriter else { return }
|
|
125
|
+
|
|
126
|
+
// Lazy-create the writer's audio input on the first buffer. The
|
|
127
|
+
// input's settings depend on the runtime PCM format reported by
|
|
128
|
+
// WebRTC.
|
|
129
|
+
if audioInput == nil {
|
|
130
|
+
configureAudioInput(format: pcmBuffer.format, writer: writer)
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
let pts = presentationTime(host: host, timestampNs: captureTimeNs)
|
|
134
|
+
|
|
135
|
+
guard writer.status == .writing,
|
|
136
|
+
let audioInput = audioInput,
|
|
137
|
+
audioInput.isReadyForMoreMediaData else {
|
|
138
|
+
buffersDropped += 1
|
|
139
|
+
return
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
guard let sampleBuffer = AudioPipeline.makeSampleBuffer(from: pcmBuffer, pts: pts) else {
|
|
143
|
+
buffersDropped += 1
|
|
144
|
+
return
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if audioInput.append(sampleBuffer) {
|
|
148
|
+
buffersReceived += 1
|
|
149
|
+
samplesAppended += 1
|
|
150
|
+
let ptsUs = Int64(CMTimeGetSeconds(pts) * 1_000_000)
|
|
151
|
+
if firstSamplePtsUs < 0 || ptsUs < firstSamplePtsUs {
|
|
152
|
+
firstSamplePtsUs = ptsUs
|
|
153
|
+
}
|
|
154
|
+
if ptsUs > lastSamplePtsUs {
|
|
155
|
+
lastSamplePtsUs = ptsUs
|
|
156
|
+
}
|
|
157
|
+
} else {
|
|
158
|
+
buffersDropped += 1
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// MARK: - Asset writer input setup
|
|
163
|
+
|
|
164
|
+
private func configureAudioInput(format: AVAudioFormat, writer: AVAssetWriter) {
|
|
165
|
+
let settings: [String: Any] = [
|
|
166
|
+
AVFormatIDKey: NSNumber(value: kAudioFormatMPEG4AAC),
|
|
167
|
+
AVSampleRateKey: NSNumber(value: format.sampleRate),
|
|
168
|
+
AVNumberOfChannelsKey: NSNumber(value: format.channelCount),
|
|
169
|
+
AVEncoderBitRateKey: AudioPipeline.aacBitRate,
|
|
170
|
+
]
|
|
171
|
+
let input = AVAssetWriterInput(mediaType: .audio, outputSettings: settings)
|
|
172
|
+
input.expectsMediaDataInRealTime = true
|
|
173
|
+
|
|
174
|
+
guard writer.canAdd(input) else {
|
|
175
|
+
NSLog("[TracksRecorder.Audio] writer cannot add audio input")
|
|
176
|
+
host?.onFatalError(makeRecorderError("audio_input_add_failed", code: 4))
|
|
177
|
+
return
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
writer.add(input)
|
|
181
|
+
audioInput = input
|
|
182
|
+
inputAdded = true
|
|
183
|
+
host?.onTrackAdded()
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// MARK: - PCM → CMSampleBuffer helper
|
|
187
|
+
|
|
188
|
+
/// Converts an `AVAudioPCMBuffer` into a `CMSampleBuffer` suitable for
|
|
189
|
+
/// `AVAssetWriterInput.append`. Returns `nil` if any Core Media call
|
|
190
|
+
/// fails; the caller treats that as a dropped buffer.
|
|
191
|
+
private static func makeSampleBuffer(
|
|
192
|
+
from pcmBuffer: AVAudioPCMBuffer,
|
|
193
|
+
pts: CMTime
|
|
194
|
+
) -> CMSampleBuffer? {
|
|
195
|
+
var formatDescription: CMAudioFormatDescription?
|
|
196
|
+
let createDescStatus = CMAudioFormatDescriptionCreate(
|
|
197
|
+
allocator: kCFAllocatorDefault,
|
|
198
|
+
asbd: pcmBuffer.format.streamDescription,
|
|
199
|
+
layoutSize: 0,
|
|
200
|
+
layout: nil,
|
|
201
|
+
magicCookieSize: 0,
|
|
202
|
+
magicCookie: nil,
|
|
203
|
+
extensions: nil,
|
|
204
|
+
formatDescriptionOut: &formatDescription
|
|
205
|
+
)
|
|
206
|
+
guard createDescStatus == noErr, let formatDesc = formatDescription else { return nil }
|
|
207
|
+
|
|
208
|
+
var sampleBuffer: CMSampleBuffer?
|
|
209
|
+
var timing = CMSampleTimingInfo(
|
|
210
|
+
duration: CMTime(value: 1, timescale: Int32(pcmBuffer.format.sampleRate)),
|
|
211
|
+
presentationTimeStamp: pts,
|
|
212
|
+
decodeTimeStamp: .invalid
|
|
213
|
+
)
|
|
214
|
+
let createStatus = CMSampleBufferCreate(
|
|
215
|
+
allocator: kCFAllocatorDefault,
|
|
216
|
+
dataBuffer: nil,
|
|
217
|
+
dataReady: false,
|
|
218
|
+
makeDataReadyCallback: nil,
|
|
219
|
+
refcon: nil,
|
|
220
|
+
formatDescription: formatDesc,
|
|
221
|
+
sampleCount: CMItemCount(pcmBuffer.frameLength),
|
|
222
|
+
sampleTimingEntryCount: 1,
|
|
223
|
+
sampleTimingArray: &timing,
|
|
224
|
+
sampleSizeEntryCount: 0,
|
|
225
|
+
sampleSizeArray: nil,
|
|
226
|
+
sampleBufferOut: &sampleBuffer
|
|
227
|
+
)
|
|
228
|
+
guard createStatus == noErr, let sb = sampleBuffer else { return nil }
|
|
229
|
+
|
|
230
|
+
let setStatus = CMSampleBufferSetDataBufferFromAudioBufferList(
|
|
231
|
+
sb,
|
|
232
|
+
blockBufferAllocator: kCFAllocatorDefault,
|
|
233
|
+
blockBufferMemoryAllocator: kCFAllocatorDefault,
|
|
234
|
+
flags: 0,
|
|
235
|
+
bufferList: pcmBuffer.audioBufferList
|
|
236
|
+
)
|
|
237
|
+
guard setStatus == noErr else { return nil }
|
|
238
|
+
return sb
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/// Returns a deep copy of the supplied `AVAudioPCMBuffer`. WebRTC owns
|
|
242
|
+
/// the source buffer's backing memory only for the duration of the
|
|
243
|
+
/// render-tap callback; ARC retains the wrapper across the queue hop
|
|
244
|
+
/// but not the underlying PCM samples. Copying here lets the recorder
|
|
245
|
+
/// queue read the data later without racing WebRTC's render-buffer
|
|
246
|
+
/// reuse.
|
|
247
|
+
private static func deepCopyPCMBuffer(_ source: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
|
|
248
|
+
guard let copy = AVAudioPCMBuffer(
|
|
249
|
+
pcmFormat: source.format,
|
|
250
|
+
frameCapacity: source.frameCapacity
|
|
251
|
+
) else { return nil }
|
|
252
|
+
copy.frameLength = source.frameLength
|
|
253
|
+
let frameLength = Int(source.frameLength)
|
|
254
|
+
let channelCount = Int(source.format.channelCount)
|
|
255
|
+
if let src = source.int16ChannelData, let dst = copy.int16ChannelData {
|
|
256
|
+
for ch in 0..<channelCount {
|
|
257
|
+
memcpy(dst[ch], src[ch], frameLength * MemoryLayout<Int16>.size)
|
|
258
|
+
}
|
|
259
|
+
} else if let src = source.floatChannelData, let dst = copy.floatChannelData {
|
|
260
|
+
for ch in 0..<channelCount {
|
|
261
|
+
memcpy(dst[ch], src[ch], frameLength * MemoryLayout<Float>.size)
|
|
262
|
+
}
|
|
263
|
+
} else if let src = source.int32ChannelData, let dst = copy.int32ChannelData {
|
|
264
|
+
for ch in 0..<channelCount {
|
|
265
|
+
memcpy(dst[ch], src[ch], frameLength * MemoryLayout<Int32>.size)
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
return copy
|
|
269
|
+
}
|
|
270
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright © 2026 Stream.io Inc. All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import AVFoundation
|
|
6
|
+
import CoreMedia
|
|
7
|
+
import Foundation
|
|
8
|
+
|
|
9
|
+
/// Internal coordination contract between `TracksRecorderManager` and its
|
|
10
|
+
/// per-kind pipelines (`VideoPipeline`, `AudioPipeline`). The pipelines own
|
|
11
|
+
/// their encoder + sink + drain logic; the host owns lifecycle, the asset
|
|
12
|
+
/// writer, the writer-start gate, the shared time origin, and the terminal-
|
|
13
|
+
/// completion barrier.
|
|
14
|
+
///
|
|
15
|
+
/// Every method on this protocol is called from the host's serial queue —
|
|
16
|
+
/// pipelines must `host.queue.async { ... }` before calling back into the
|
|
17
|
+
/// host. The protocol is class-bound so pipelines can hold a `weak`
|
|
18
|
+
/// reference and avoid retain cycles.
|
|
19
|
+
internal protocol PipelineHost: AnyObject {
|
|
20
|
+
/// The recorder's serial dispatch queue.
|
|
21
|
+
var queue: DispatchQueue { get }
|
|
22
|
+
|
|
23
|
+
var assetWriter: AVAssetWriter? { get }
|
|
24
|
+
|
|
25
|
+
var isRecording: Bool { get }
|
|
26
|
+
|
|
27
|
+
/// Returns the recording's shared time origin in nanoseconds. The first
|
|
28
|
+
/// pipeline to deliver a sample seeds the origin with its timestamp;
|
|
29
|
+
/// subsequent calls return the established value.
|
|
30
|
+
func seedOriginNs(_ timestampNs: UInt64) -> UInt64
|
|
31
|
+
|
|
32
|
+
/// Pipeline has added an input to the writer. The host decrements its
|
|
33
|
+
/// pending-pipeline counter and starts the writer once all expected
|
|
34
|
+
/// pipelines have reported their input.
|
|
35
|
+
func onTrackAdded()
|
|
36
|
+
|
|
37
|
+
func onFatalError(_ error: NSError)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/// Maps an absolute monotonic timestamp (nanoseconds) to presentation time
|
|
41
|
+
/// relative to the recording's shared origin. The first sample from either
|
|
42
|
+
/// pipeline seeds the origin via `host.seedOriginNs`; later samples use
|
|
43
|
+
/// elapsed = timestamp − origin (clamped to 0).
|
|
44
|
+
internal func presentationTime(host: PipelineHost, timestampNs: UInt64) -> CMTime {
|
|
45
|
+
let origin = host.seedOriginNs(timestampNs)
|
|
46
|
+
let elapsed: Int64 = timestampNs >= origin ? Int64(timestampNs - origin) : 0
|
|
47
|
+
return CMTime(value: elapsed, timescale: 1_000_000_000)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
internal func makeRecorderError(_ message: String, code: Int) -> NSError {
|
|
51
|
+
NSError(
|
|
52
|
+
domain: "io.stream.video.tracks-recorder",
|
|
53
|
+
code: code,
|
|
54
|
+
userInfo: [NSLocalizedDescriptionKey: message]
|
|
55
|
+
)
|
|
56
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright © 2026 Stream.io Inc. All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import AVFoundation
|
|
6
|
+
import CoreMedia
|
|
7
|
+
import Foundation
|
|
8
|
+
import WebRTC
|
|
9
|
+
|
|
10
|
+
/// Render-side audio tap used by `TracksRecorderManager`. Implements
|
|
11
|
+
/// `RTCAudioCustomProcessingDelegate` and is installed on
|
|
12
|
+
/// `RTCDefaultAudioProcessingModule.renderPreProcessingDelegate` for the
|
|
13
|
+
/// duration of a recording.
|
|
14
|
+
///
|
|
15
|
+
/// The render path WebRTC is using:
|
|
16
|
+
/// ```
|
|
17
|
+
/// SFU → decoder → audio mixer → renderPreProcessingDelegate →
|
|
18
|
+
/// render-side processing → speaker
|
|
19
|
+
/// ```
|
|
20
|
+
///
|
|
21
|
+
/// What the delegate sees per call: an `RTCAudioBuffer` that holds the
|
|
22
|
+
/// **post-mix** decoded audio about to be played to the speaker. In a
|
|
23
|
+
/// self-sub-only call, that's exactly the SFU echo of the local mic. In a
|
|
24
|
+
/// call with multiple remote participants the buffer contains the
|
|
25
|
+
/// post-mix output (everyone blended together).
|
|
26
|
+
///
|
|
27
|
+
/// **Important:** `RTCAudioBuffer` exposes `rawBuffer(forChannel:)` as
|
|
28
|
+
/// `UnsafeMutablePointer<Float>` in **FloatS16** format — i.e. Float32 values
|
|
29
|
+
/// in the Int16 range -32768…32767. Cast/clamp to `Int16` for the PCM
|
|
30
|
+
/// destination buffer (no normalisation needed).
|
|
31
|
+
///
|
|
32
|
+
/// **Threading:** all three protocol methods run on a WebRTC audio
|
|
33
|
+
/// processing thread. The buffer handler closure is invoked from there; the
|
|
34
|
+
/// caller is responsible for hopping queues if needed.
|
|
35
|
+
///
|
|
36
|
+
/// **Lifetime:** `RTCDefaultAudioProcessingModule.renderPreProcessingDelegate`
|
|
37
|
+
/// is `weak`, so the manager must keep this instance alive for the duration
|
|
38
|
+
/// of recording.
|
|
39
|
+
@objc public final class RecorderAudioRenderTap: NSObject, RTCAudioCustomProcessingDelegate {
|
|
40
|
+
|
|
41
|
+
typealias BufferHandler = (AVAudioPCMBuffer) -> Void
|
|
42
|
+
|
|
43
|
+
private let bufferHandler: BufferHandler
|
|
44
|
+
|
|
45
|
+
/// When `true`, the WebRTC `RTCAudioBuffer` is zero-filled in place
|
|
46
|
+
/// *after* the samples have been copied into the recording PCM buffer.
|
|
47
|
+
/// The recording keeps the original audio; everything downstream of
|
|
48
|
+
/// this delegate (render-side APM → audio device module → speaker)
|
|
49
|
+
/// sees silence. This yields "audio in the file, silence at the
|
|
50
|
+
/// speaker" without disrupting the recording — `track.setVolume(0)`
|
|
51
|
+
/// / `track.isEnabled = false` mutes apply *before* this tap and
|
|
52
|
+
/// would silence the recording too.
|
|
53
|
+
///
|
|
54
|
+
/// Side effect to be aware of: this mutes the entire post-mix
|
|
55
|
+
/// playback, not just one track. In a self-sub-only call post-mix ==
|
|
56
|
+
/// loopback, so it's effectively a per-track mute. With other remote
|
|
57
|
+
/// participants in the call they would be muted at the speaker too
|
|
58
|
+
/// while recording is active.
|
|
59
|
+
private let muteOriginal: Bool
|
|
60
|
+
|
|
61
|
+
private var processingSampleRate: Double = 0
|
|
62
|
+
private var processingChannels: Int = 0
|
|
63
|
+
private var avFormat: AVAudioFormat?
|
|
64
|
+
|
|
65
|
+
/// Atomic-style call counter — exposes whether the APM is invoking
|
|
66
|
+
/// `audioProcessingProcess(audioBuffer:)` at all.
|
|
67
|
+
private let counterLock = NSLock()
|
|
68
|
+
private var _callCount: Int = 0
|
|
69
|
+
@objc public var callCount: Int {
|
|
70
|
+
counterLock.lock()
|
|
71
|
+
defer { counterLock.unlock() }
|
|
72
|
+
return _callCount
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
init(muteOriginal: Bool, bufferHandler: @escaping BufferHandler) {
|
|
76
|
+
self.muteOriginal = muteOriginal
|
|
77
|
+
self.bufferHandler = bufferHandler
|
|
78
|
+
super.init()
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// MARK: - RTCAudioCustomProcessingDelegate
|
|
82
|
+
|
|
83
|
+
public func audioProcessingInitialize(sampleRate: Int, channels: Int) {
|
|
84
|
+
processingSampleRate = Double(sampleRate)
|
|
85
|
+
processingChannels = channels
|
|
86
|
+
avFormat = AVAudioFormat(
|
|
87
|
+
commonFormat: .pcmFormatInt16,
|
|
88
|
+
sampleRate: processingSampleRate,
|
|
89
|
+
channels: AVAudioChannelCount(channels),
|
|
90
|
+
interleaved: false
|
|
91
|
+
)
|
|
92
|
+
NSLog("[TracksRecorder] RenderTap initialize sampleRate=%d channels=%d",
|
|
93
|
+
sampleRate, channels)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) {
|
|
97
|
+
counterLock.lock()
|
|
98
|
+
_callCount += 1
|
|
99
|
+
let count = _callCount
|
|
100
|
+
counterLock.unlock()
|
|
101
|
+
|
|
102
|
+
if count == 1 {
|
|
103
|
+
NSLog("[TracksRecorder] RenderTap FIRST call frames=%d channels=%d",
|
|
104
|
+
audioBuffer.frames, audioBuffer.channels)
|
|
105
|
+
} else if count % 100 == 0 {
|
|
106
|
+
NSLog("[TracksRecorder] RenderTap call #%d", count)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
guard let format = avFormat else { return }
|
|
110
|
+
let frames = Int(audioBuffer.frames)
|
|
111
|
+
let channels = Int(audioBuffer.channels)
|
|
112
|
+
guard frames > 0, channels > 0 else { return }
|
|
113
|
+
|
|
114
|
+
guard let pcm = AVAudioPCMBuffer(
|
|
115
|
+
pcmFormat: format,
|
|
116
|
+
frameCapacity: AVAudioFrameCount(frames)
|
|
117
|
+
) else {
|
|
118
|
+
return
|
|
119
|
+
}
|
|
120
|
+
pcm.frameLength = AVAudioFrameCount(frames)
|
|
121
|
+
guard let dst = pcm.int16ChannelData else { return }
|
|
122
|
+
|
|
123
|
+
// Copy each channel: FloatS16 (Float32 in Int16 range) → Int16.
|
|
124
|
+
// No normalisation needed — values already span the Int16 range.
|
|
125
|
+
for ch in 0..<channels {
|
|
126
|
+
let src = audioBuffer.rawBuffer(forChannel: ch)
|
|
127
|
+
let dstChannel = dst[ch]
|
|
128
|
+
for i in 0..<frames {
|
|
129
|
+
let v = src[i]
|
|
130
|
+
if v >= 32767 {
|
|
131
|
+
dstChannel[i] = Int16.max
|
|
132
|
+
} else if v <= -32768 {
|
|
133
|
+
dstChannel[i] = Int16.min
|
|
134
|
+
} else {
|
|
135
|
+
dstChannel[i] = Int16(v)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// If `muteOriginal` is on, zero the source buffer in the same pass
|
|
139
|
+
// so the data continuing downstream to the speaker is silence.
|
|
140
|
+
if muteOriginal {
|
|
141
|
+
src[i] = 0
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
bufferHandler(pcm)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
public func audioProcessingRelease() {
|
|
150
|
+
avFormat = nil
|
|
151
|
+
// Deliberately preserve `_callCount` — useful in end-of-recording
|
|
152
|
+
// diagnostics even after release.
|
|
153
|
+
}
|
|
154
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright © 2026 Stream.io Inc. All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import CoreMedia
|
|
6
|
+
import CoreVideo
|
|
7
|
+
import Foundation
|
|
8
|
+
import WebRTC
|
|
9
|
+
|
|
10
|
+
/// Per-track video sink used by `TracksRecorderManager`. Implements
|
|
11
|
+
/// `RTCVideoRenderer` so it can be attached directly to an `RTCVideoTrack`.
|
|
12
|
+
///
|
|
13
|
+
/// Each delivered `RTCVideoFrame` is normalised to a CVPixelBuffer in the
|
|
14
|
+
/// hardware H.264 encoder's native format — **NV12**
|
|
15
|
+
/// (`kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange`).
|
|
16
|
+
/// `RTCCVPixelBuffer` sources (camera passthrough) are forwarded with
|
|
17
|
+
/// the underlying pixel buffer unchanged. `RTCI420Buffer` and other YUV
|
|
18
|
+
/// sources are converted into a fresh IOSurface-backed NV12 buffer via
|
|
19
|
+
/// a plane reorder (no color-space conversion required).
|
|
20
|
+
///
|
|
21
|
+
/// **Why NV12 and not BGRA?** AVAssetWriter's hardware encoder accepts
|
|
22
|
+
/// both, but BGRA requires an internal colour-space conversion that
|
|
23
|
+
/// fails with VideoToolbox `-16364` on certain stride/alignment
|
|
24
|
+
/// combinations a few frames in. NV12 is the encoder's native input;
|
|
25
|
+
/// passing it directly bypasses the failure entirely.
|
|
26
|
+
///
|
|
27
|
+
/// Threading: `renderFrame` runs on a WebRTC frame-delivery thread; the
|
|
28
|
+
/// callback must be safe to invoke from there. The manager serialises
|
|
29
|
+
/// further access on its own queue.
|
|
30
|
+
@objc final class RecorderVideoSink: NSObject, RTCVideoRenderer {
|
|
31
|
+
|
|
32
|
+
typealias FrameHandler = (_ pixelBuffer: CVPixelBuffer, _ width: Int32, _ height: Int32, _ timestampNs: Int64) -> Void
|
|
33
|
+
|
|
34
|
+
private let frameHandler: FrameHandler
|
|
35
|
+
|
|
36
|
+
init(frameHandler: @escaping FrameHandler) {
|
|
37
|
+
self.frameHandler = frameHandler
|
|
38
|
+
super.init()
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// MARK: - RTCVideoRenderer
|
|
42
|
+
|
|
43
|
+
func setSize(_ size: CGSize) {
|
|
44
|
+
// No-op: pixel buffer dimensions are derived from each incoming frame.
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
func renderFrame(_ frame: RTCVideoFrame?) {
|
|
48
|
+
guard let frame = frame, frame.width > 0, frame.height > 0 else { return }
|
|
49
|
+
|
|
50
|
+
let pixelBuffer: CVPixelBuffer?
|
|
51
|
+
if let cvBuffer = frame.buffer as? RTCCVPixelBuffer {
|
|
52
|
+
// Camera passthrough — already a CVPixelBuffer (typically NV12 on iOS).
|
|
53
|
+
pixelBuffer = cvBuffer.pixelBuffer
|
|
54
|
+
} else if let i420 = frame.buffer as? RTCI420Buffer {
|
|
55
|
+
pixelBuffer = Self.makeNV12PixelBuffer(fromI420: i420)
|
|
56
|
+
} else {
|
|
57
|
+
// Other YUV variants — normalise via toI420() first.
|
|
58
|
+
let i420 = frame.buffer.toI420()
|
|
59
|
+
if let concrete = i420 as? RTCI420Buffer {
|
|
60
|
+
pixelBuffer = Self.makeNV12PixelBuffer(fromI420: concrete)
|
|
61
|
+
} else {
|
|
62
|
+
pixelBuffer = nil
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
guard let outputBuffer = pixelBuffer else { return }
|
|
67
|
+
frameHandler(outputBuffer, frame.width, frame.height, frame.timeStampNs)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// MARK: - I420 → NV12
|
|
71
|
+
|
|
72
|
+
/// Allocates a fresh IOSurface-backed NV12 `CVPixelBuffer` and copies
|
|
73
|
+
/// the I420 source's planes into it (Y as-is, U+V interleaved into the
|
|
74
|
+
/// UV plane). No colour-space conversion — this is purely a plane
|
|
75
|
+
/// reorder, so the operation is both fast and bit-exact.
|
|
76
|
+
private static func makeNV12PixelBuffer(fromI420 i420: RTCI420Buffer) -> CVPixelBuffer? {
|
|
77
|
+
let width = Int(i420.width)
|
|
78
|
+
let height = Int(i420.height)
|
|
79
|
+
let chromaWidth = Int(i420.chromaWidth)
|
|
80
|
+
let chromaHeight = Int(i420.chromaHeight)
|
|
81
|
+
|
|
82
|
+
var pixelBuffer: CVPixelBuffer?
|
|
83
|
+
let attrs: [String: Any] = [
|
|
84
|
+
kCVPixelBufferIOSurfacePropertiesKey as String: [:] as [String: Any],
|
|
85
|
+
]
|
|
86
|
+
let status = CVPixelBufferCreate(
|
|
87
|
+
kCFAllocatorDefault,
|
|
88
|
+
width,
|
|
89
|
+
height,
|
|
90
|
+
kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange,
|
|
91
|
+
attrs as CFDictionary,
|
|
92
|
+
&pixelBuffer
|
|
93
|
+
)
|
|
94
|
+
guard status == kCVReturnSuccess, let buffer = pixelBuffer else { return nil }
|
|
95
|
+
|
|
96
|
+
CVPixelBufferLockBaseAddress(buffer, [])
|
|
97
|
+
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
|
|
98
|
+
|
|
99
|
+
// Plane 0: Y — direct copy.
|
|
100
|
+
guard let yDest = CVPixelBufferGetBaseAddressOfPlane(buffer, 0) else { return nil }
|
|
101
|
+
let yDestStride = CVPixelBufferGetBytesPerRowOfPlane(buffer, 0)
|
|
102
|
+
let ySrcStride = Int(i420.strideY)
|
|
103
|
+
let ySrc = UnsafeRawPointer(i420.dataY)
|
|
104
|
+
if ySrcStride == yDestStride {
|
|
105
|
+
memcpy(yDest, ySrc, ySrcStride * height)
|
|
106
|
+
} else {
|
|
107
|
+
let copyBytes = min(ySrcStride, yDestStride)
|
|
108
|
+
for row in 0..<height {
|
|
109
|
+
memcpy(
|
|
110
|
+
yDest.advanced(by: row * yDestStride),
|
|
111
|
+
ySrc.advanced(by: row * ySrcStride),
|
|
112
|
+
copyBytes
|
|
113
|
+
)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Plane 1: UV — interleave I420's U and V planes.
|
|
118
|
+
guard let uvDestRaw = CVPixelBufferGetBaseAddressOfPlane(buffer, 1) else { return nil }
|
|
119
|
+
let uvDestStride = CVPixelBufferGetBytesPerRowOfPlane(buffer, 1)
|
|
120
|
+
let uvDest = uvDestRaw.assumingMemoryBound(to: UInt8.self)
|
|
121
|
+
let uSrcStride = Int(i420.strideU)
|
|
122
|
+
let vSrcStride = Int(i420.strideV)
|
|
123
|
+
let uSrc = i420.dataU
|
|
124
|
+
let vSrc = i420.dataV
|
|
125
|
+
for row in 0..<chromaHeight {
|
|
126
|
+
let uRow = uSrc.advanced(by: row * uSrcStride)
|
|
127
|
+
let vRow = vSrc.advanced(by: row * vSrcStride)
|
|
128
|
+
let uvRow = uvDest.advanced(by: row * uvDestStride)
|
|
129
|
+
for col in 0..<chromaWidth {
|
|
130
|
+
uvRow[col * 2] = uRow[col]
|
|
131
|
+
uvRow[col * 2 + 1] = vRow[col]
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return buffer
|
|
136
|
+
}
|
|
137
|
+
}
|