@siteed/audio-studio 3.2.0-beta.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +356 -5
- package/android/src/main/java/net/siteed/audiostudio/AudioStreamDecoder.kt +306 -94
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +39 -6
- package/build/cjs/errors/AudioStreamError.js +9 -0
- package/build/cjs/errors/AudioStreamError.js.map +1 -1
- package/build/cjs/errors/AudioStreamError.test.js +22 -1
- package/build/cjs/errors/AudioStreamError.test.js.map +1 -1
- package/build/cjs/streamAudioData.js +99 -32
- package/build/cjs/streamAudioData.js.map +1 -1
- package/build/cjs/utils/audioProcessing.js +14 -10
- package/build/cjs/utils/audioProcessing.js.map +1 -1
- package/build/esm/errors/AudioStreamError.js +9 -0
- package/build/esm/errors/AudioStreamError.js.map +1 -1
- package/build/esm/errors/AudioStreamError.test.js +22 -1
- package/build/esm/errors/AudioStreamError.test.js.map +1 -1
- package/build/esm/streamAudioData.js +99 -32
- package/build/esm/streamAudioData.js.map +1 -1
- package/build/esm/utils/audioProcessing.js +14 -10
- package/build/esm/utils/audioProcessing.js.map +1 -1
- package/build/types/errors/AudioStreamError.d.ts.map +1 -1
- package/build/types/streamAudioData.d.ts +5 -0
- package/build/types/streamAudioData.d.ts.map +1 -1
- package/build/types/utils/audioProcessing.d.ts +2 -2
- package/build/types/utils/audioProcessing.d.ts.map +1 -1
- package/ios/AudioStreamDecoder.swift +191 -100
- package/ios/AudioStudioModule.swift +48 -9
- package/package.json +163 -146
- package/scripts/README.md +58 -0
- package/src/errors/AudioStreamError.test.ts +29 -2
- package/src/errors/AudioStreamError.ts +14 -0
- package/src/streamAudioData.ts +146 -42
- package/src/utils/audioProcessing.ts +25 -14
- package/android/src/androidTest/assets/chorus.wav +0 -0
- package/android/src/androidTest/assets/jfk.wav +0 -0
- package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
- package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioFinalMetadataContractInstrumentedTest.kt +0 -190
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +0 -197
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +0 -487
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +0 -250
- package/android/src/androidTest/java/net/siteed/audiostudio/OpusRangeDecodeRegressionInstrumentedTest.kt +0 -186
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +0 -332
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +0 -324
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +0 -253
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +0 -218
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +0 -120
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +0 -345
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +0 -340
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +0 -252
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +0 -95
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +0 -43
- package/android/src/test/java/net/siteed/audiostudio/AndroidCallStateTest.kt +0 -37
- package/android/src/test/java/net/siteed/audiostudio/AndroidEventEmitterTest.kt +0 -28
- package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +0 -279
- package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +0 -249
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +0 -151
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +0 -273
- package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +0 -140
- package/android/src/test/java/net/siteed/audiostudio/InterruptionAutoResumePolicyTest.kt +0 -49
- package/android/src/test/resources/chorus.wav +0 -0
- package/android/src/test/resources/generate_test_audio.py +0 -94
- package/android/src/test/resources/jfk.wav +0 -0
- package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
- package/android/src/test/resources/recorder_hello_world.wav +0 -0
- package/ios/AudioStudioTests/AudioFileHandlerTests.swift +0 -338
- package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +0 -331
- package/ios/AudioStudioTests/AudioStreamDecoderTests.swift +0 -128
- package/ios/AudioStudioTests/AudioTestHelpers.swift +0 -130
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +0 -334
- package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +0 -105
- package/ios/AudioStudioTests/Info.plist +0 -22
- package/ios/AudioStudioTests/README.md +0 -39
- package/ios/AudioStudioTests/SimpleAudioTest.swift +0 -98
- package/ios/AudioStudioTests/TestAudioGenerator.swift +0 -75
- package/ios/tests/README.md +0 -41
- package/ios/tests/integration/buffer_and_fallback_test.swift +0 -178
- package/ios/tests/integration/buffer_duration_test.swift +0 -185
- package/ios/tests/integration/compressed_only_output_test.swift +0 -271
- package/ios/tests/integration/output_control_test.swift +0 -322
- package/ios/tests/integration/run_integration_tests.sh +0 -37
- package/ios/tests/opus_support_test_macos.swift +0 -154
- package/ios/tests/standalone/audio_processing_test.swift +0 -144
- package/ios/tests/standalone/audio_recording_test.swift +0 -277
- package/ios/tests/standalone/audio_streaming_test.swift +0 -249
- package/ios/tests/standalone/standalone_test.swift +0 -144
|
@@ -11,6 +11,7 @@ import android.content.Context
|
|
|
11
11
|
import android.media.MediaCodec
|
|
12
12
|
import android.media.MediaExtractor
|
|
13
13
|
import android.media.MediaFormat
|
|
14
|
+
import android.media.MediaMetadataRetriever
|
|
14
15
|
import android.net.Uri
|
|
15
16
|
import android.os.Bundle
|
|
16
17
|
import androidx.core.os.bundleOf
|
|
@@ -20,6 +21,7 @@ import java.nio.ByteOrder
|
|
|
20
21
|
import java.util.concurrent.atomic.AtomicBoolean
|
|
21
22
|
import java.util.concurrent.atomic.AtomicInteger
|
|
22
23
|
import kotlin.concurrent.thread
|
|
24
|
+
import kotlin.math.ceil
|
|
23
25
|
|
|
24
26
|
interface AudioStreamDecoderDelegate {
|
|
25
27
|
fun streamDecoderEmit(eventName: String, payload: Bundle)
|
|
@@ -41,6 +43,7 @@ class AudioStreamDecoder(
|
|
|
41
43
|
val chunkDurationMs: Int,
|
|
42
44
|
val maxChunkBytes: Int?,
|
|
43
45
|
val maxBufferedChunks: Int,
|
|
46
|
+
val backpressureTimeoutMs: Long?,
|
|
44
47
|
)
|
|
45
48
|
|
|
46
49
|
companion object {
|
|
@@ -77,12 +80,15 @@ class AudioStreamDecoder(
|
|
|
77
80
|
}
|
|
78
81
|
|
|
79
82
|
private fun run() {
|
|
80
|
-
val
|
|
81
|
-
if (
|
|
83
|
+
val resolved = resolveFilePath(options.fileUri)
|
|
84
|
+
if (resolved == null) {
|
|
82
85
|
emitError("ERR_AUDIO_STREAM_FILE_NOT_FOUND", "Cannot resolve file: ${options.fileUri}")
|
|
83
86
|
return
|
|
84
87
|
}
|
|
88
|
+
val path = resolved.path
|
|
89
|
+
val tempFile = resolved.tempFile
|
|
85
90
|
if (!File(path).exists()) {
|
|
91
|
+
tempFile?.let { runCatching { it.delete() } }
|
|
86
92
|
emitError("ERR_AUDIO_STREAM_FILE_NOT_FOUND", "File not found: $path")
|
|
87
93
|
return
|
|
88
94
|
}
|
|
@@ -91,7 +97,7 @@ class AudioStreamDecoder(
|
|
|
91
97
|
var codec: MediaCodec? = null
|
|
92
98
|
var emittedChunks = 0
|
|
93
99
|
var emittedSamples = 0L
|
|
94
|
-
var
|
|
100
|
+
var backpressureTimedOut = false
|
|
95
101
|
var outputSampleRate = options.targetSampleRate ?: 0
|
|
96
102
|
var outputChannels = options.channels ?: 0
|
|
97
103
|
|
|
@@ -119,16 +125,48 @@ class AudioStreamDecoder(
|
|
|
119
125
|
}
|
|
120
126
|
extractor.selectTrack(trackIndex)
|
|
121
127
|
val format = extractor.getTrackFormat(trackIndex)
|
|
122
|
-
|
|
128
|
+
val extractorDurationUs = if (format.containsKey(MediaFormat.KEY_DURATION)) {
|
|
123
129
|
format.getLong(MediaFormat.KEY_DURATION)
|
|
124
130
|
} else {
|
|
125
|
-
-
|
|
131
|
+
-1L
|
|
132
|
+
}
|
|
133
|
+
val metadataDurationMs = if (extractorDurationUs <= 0) {
|
|
134
|
+
readMetadataDurationMs(path)
|
|
135
|
+
} else {
|
|
136
|
+
-1L
|
|
137
|
+
}
|
|
138
|
+
val assetDurationUs = if (extractorDurationUs > 0) {
|
|
139
|
+
extractorDurationUs
|
|
140
|
+
} else if (metadataDurationMs > 0) {
|
|
141
|
+
metadataDurationMs * 1000L
|
|
142
|
+
} else {
|
|
143
|
+
-1L
|
|
144
|
+
}
|
|
145
|
+
val assetDurationMs = if (assetDurationUs > 0) {
|
|
146
|
+
assetDurationUs / 1000.0
|
|
147
|
+
} else {
|
|
148
|
+
0.0
|
|
149
|
+
}
|
|
150
|
+
// Duration of the *decoded range*, not the whole file, so progress
|
|
151
|
+
// and completion payloads match what the caller actually receives.
|
|
152
|
+
val rangeDurationMs = when {
|
|
153
|
+
options.startTimeMs != null && options.endTimeMs != null ->
|
|
154
|
+
(options.endTimeMs - options.startTimeMs).toDouble().coerceAtLeast(0.0)
|
|
155
|
+
options.endTimeMs != null -> options.endTimeMs.toDouble().coerceAtLeast(0.0)
|
|
156
|
+
options.startTimeMs != null ->
|
|
157
|
+
(assetDurationMs - options.startTimeMs).coerceAtLeast(0.0)
|
|
158
|
+
else -> assetDurationMs
|
|
126
159
|
}
|
|
127
160
|
|
|
128
|
-
|
|
129
|
-
|
|
161
|
+
var sourceSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
162
|
+
var sourceChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
130
163
|
if (outputSampleRate <= 0) outputSampleRate = sourceSampleRate
|
|
131
164
|
if (outputChannels <= 0) outputChannels = minOf(2, maxOf(1, sourceChannels))
|
|
165
|
+
val maxOutputSamples = options.endTimeMs?.let {
|
|
166
|
+
((rangeDurationMs / 1000.0) * outputSampleRate.toDouble() * outputChannels.toDouble())
|
|
167
|
+
.toLong()
|
|
168
|
+
.coerceAtLeast(0L)
|
|
169
|
+
} ?: Long.MAX_VALUE
|
|
132
170
|
|
|
133
171
|
options.startTimeMs?.let {
|
|
134
172
|
extractor.seekTo(it * 1000L, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
|
|
@@ -156,14 +194,49 @@ class AudioStreamDecoder(
|
|
|
156
194
|
|
|
157
195
|
val endTimeUs = options.endTimeMs?.let { it * 1000L } ?: Long.MAX_VALUE
|
|
158
196
|
val rangeStartMs = options.startTimeMs ?: 0L
|
|
197
|
+
val targetStartUs = rangeStartMs * 1000L
|
|
159
198
|
val samplesPerChunk = run {
|
|
160
199
|
val byTime = (options.chunkDurationMs.toLong() *
|
|
161
200
|
outputSampleRate.toLong() / 1000L).toInt() * outputChannels
|
|
162
|
-
|
|
163
|
-
|
|
201
|
+
// Round byte-cap down to a multiple of `outputChannels` so a
|
|
202
|
+
// single interleaved frame is never split across chunks.
|
|
203
|
+
val byBytes = options.maxChunkBytes?.let {
|
|
204
|
+
(it / 4 / outputChannels) * outputChannels
|
|
205
|
+
} ?: Int.MAX_VALUE
|
|
206
|
+
val raw = minOf(byTime, byBytes)
|
|
207
|
+
maxOf(outputChannels, (raw / outputChannels) * outputChannels)
|
|
164
208
|
}
|
|
165
|
-
|
|
209
|
+
var pending = FloatArray(samplesPerChunk * 2)
|
|
166
210
|
var pendingLen = 0
|
|
211
|
+
var pendingHead = 0
|
|
212
|
+
fun pendingAvailable(): Int = pendingLen - pendingHead
|
|
213
|
+
fun compactPending() {
|
|
214
|
+
val available = pendingAvailable()
|
|
215
|
+
if (pendingHead == 0) return
|
|
216
|
+
if (available > 0) {
|
|
217
|
+
System.arraycopy(pending, pendingHead, pending, 0, available)
|
|
218
|
+
}
|
|
219
|
+
pendingHead = 0
|
|
220
|
+
pendingLen = available
|
|
221
|
+
}
|
|
222
|
+
fun appendPending(samples: FloatArray) {
|
|
223
|
+
if (samples.isEmpty()) return
|
|
224
|
+
val available = pendingAvailable()
|
|
225
|
+
val requiredLen = available + samples.size
|
|
226
|
+
if (requiredLen > pending.size) {
|
|
227
|
+
val grown = FloatArray(requiredLen.coerceAtLeast(pending.size * 2))
|
|
228
|
+
if (available > 0) {
|
|
229
|
+
System.arraycopy(pending, pendingHead, grown, 0, available)
|
|
230
|
+
}
|
|
231
|
+
pending = grown
|
|
232
|
+
pendingHead = 0
|
|
233
|
+
pendingLen = available
|
|
234
|
+
} else if (pending.size - pendingLen < samples.size && pendingHead > 0) {
|
|
235
|
+
compactPending()
|
|
236
|
+
}
|
|
237
|
+
System.arraycopy(samples, 0, pending, pendingLen, samples.size)
|
|
238
|
+
pendingLen += samples.size
|
|
239
|
+
}
|
|
167
240
|
val info = MediaCodec.BufferInfo()
|
|
168
241
|
var sawInputEOS = false
|
|
169
242
|
var sawOutputEOS = false
|
|
@@ -220,52 +293,72 @@ class AudioStreamDecoder(
|
|
|
220
293
|
outputChannels,
|
|
221
294
|
options.normalizeAudio
|
|
222
295
|
)
|
|
296
|
+
// SEEK_TO_CLOSEST_SYNC can land before the requested
|
|
297
|
+
// start (encoder priming on AAC/MP3, sync-frame
|
|
298
|
+
// granularity on lossy containers). Trim source-rate
|
|
299
|
+
// frames whose presentation time is before
|
|
300
|
+
// `targetStartUs` so the first emitted sample lines
|
|
301
|
+
// up with `startTimeMs`.
|
|
302
|
+
val bufferStartUs = info.presentationTimeUs
|
|
303
|
+
val trimmed: FloatArray = if (
|
|
304
|
+
targetStartUs > 0 &&
|
|
305
|
+
bufferStartUs in 0L until targetStartUs &&
|
|
306
|
+
converted.isNotEmpty()
|
|
307
|
+
) {
|
|
308
|
+
val deltaUs = targetStartUs - bufferStartUs
|
|
309
|
+
val skipFrames = (
|
|
310
|
+
deltaUs.toDouble() * sourceSampleRate.toDouble() /
|
|
311
|
+
1_000_000.0
|
|
312
|
+
).toInt()
|
|
313
|
+
val totalFrames = converted.size / outputChannels
|
|
314
|
+
val actualSkip = minOf(skipFrames, totalFrames)
|
|
315
|
+
if (actualSkip >= totalFrames) {
|
|
316
|
+
FloatArray(0)
|
|
317
|
+
} else if (actualSkip > 0) {
|
|
318
|
+
val skipFloats = actualSkip * outputChannels
|
|
319
|
+
val out = FloatArray(converted.size - skipFloats)
|
|
320
|
+
System.arraycopy(
|
|
321
|
+
converted,
|
|
322
|
+
skipFloats,
|
|
323
|
+
out,
|
|
324
|
+
0,
|
|
325
|
+
out.size
|
|
326
|
+
)
|
|
327
|
+
out
|
|
328
|
+
} else {
|
|
329
|
+
converted
|
|
330
|
+
}
|
|
331
|
+
} else {
|
|
332
|
+
converted
|
|
333
|
+
}
|
|
223
334
|
val resampled = resampler.process(
|
|
224
|
-
|
|
335
|
+
trimmed,
|
|
225
336
|
sourceSampleRate,
|
|
226
337
|
outputSampleRate
|
|
227
338
|
)
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
val
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
339
|
+
val remainingOutputSamples =
|
|
340
|
+
maxOutputSamples - emittedSamples - pendingAvailable().toLong()
|
|
341
|
+
val boundedResampled = when {
|
|
342
|
+
remainingOutputSamples <= 0L -> FloatArray(0)
|
|
343
|
+
resampled.size.toLong() > remainingOutputSamples -> {
|
|
344
|
+
val boundedSize = (
|
|
345
|
+
remainingOutputSamples.toInt() / outputChannels
|
|
346
|
+
) * outputChannels
|
|
347
|
+
resampled.copyOf(boundedSize)
|
|
236
348
|
}
|
|
237
|
-
|
|
238
|
-
pending
|
|
239
|
-
}
|
|
240
|
-
// Note: when grown, we use a transient buffer that we
|
|
241
|
-
// process and never write back into the original.
|
|
242
|
-
val workBuffer: FloatArray
|
|
243
|
-
if (target !== pending) {
|
|
244
|
-
System.arraycopy(resampled, 0, target, pendingLen, resampled.size)
|
|
245
|
-
pendingLen += resampled.size
|
|
246
|
-
workBuffer = target
|
|
247
|
-
} else {
|
|
248
|
-
System.arraycopy(resampled, 0, pending, pendingLen, resampled.size)
|
|
249
|
-
pendingLen += resampled.size
|
|
250
|
-
workBuffer = pending
|
|
349
|
+
else -> resampled
|
|
251
350
|
}
|
|
252
351
|
|
|
253
|
-
|
|
352
|
+
appendPending(boundedResampled)
|
|
353
|
+
|
|
354
|
+
chunkLoop@ while (pendingAvailable() >= samplesPerChunk) {
|
|
254
355
|
if (cancelled.get()) break
|
|
255
356
|
val chunk = FloatArray(samplesPerChunk)
|
|
256
|
-
System.arraycopy(
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
System.arraycopy(
|
|
261
|
-
workBuffer,
|
|
262
|
-
samplesPerChunk,
|
|
263
|
-
workBuffer,
|
|
264
|
-
0,
|
|
265
|
-
remainder
|
|
266
|
-
)
|
|
357
|
+
System.arraycopy(pending, pendingHead, chunk, 0, samplesPerChunk)
|
|
358
|
+
pendingHead += samplesPerChunk
|
|
359
|
+
if (pendingHead > pending.size / 2) {
|
|
360
|
+
compactPending()
|
|
267
361
|
}
|
|
268
|
-
pendingLen = remainder
|
|
269
362
|
|
|
270
363
|
val chunkDurationMs =
|
|
271
364
|
(chunk.size.toDouble() /
|
|
@@ -279,7 +372,7 @@ class AudioStreamDecoder(
|
|
|
279
372
|
index = emittedChunks,
|
|
280
373
|
startTimeMs = startMs,
|
|
281
374
|
endTimeMs = startMs + chunkDurationMs,
|
|
282
|
-
startSample =
|
|
375
|
+
startSample = emittedSamples / outputChannels,
|
|
283
376
|
sampleRate = outputSampleRate,
|
|
284
377
|
channels = outputChannels,
|
|
285
378
|
samples = chunk,
|
|
@@ -287,14 +380,29 @@ class AudioStreamDecoder(
|
|
|
287
380
|
)
|
|
288
381
|
emittedChunks += 1
|
|
289
382
|
emittedSamples += chunk.size
|
|
383
|
+
// Progress is elapsed decoded time within the
|
|
384
|
+
// requested range so `processedMs / durationMs`
|
|
385
|
+
// stays in [0, 1] regardless of `startTimeMs`.
|
|
386
|
+
// Chunk timestamps stay absolute (rangeStart +
|
|
387
|
+
// offset).
|
|
388
|
+
val elapsedMs = (emittedSamples.toDouble() /
|
|
389
|
+
(outputSampleRate.toDouble() *
|
|
390
|
+
outputChannels.toDouble())) * 1000.0
|
|
290
391
|
emitProgress(
|
|
291
|
-
processedMs =
|
|
292
|
-
durationMs =
|
|
392
|
+
processedMs = elapsedMs,
|
|
393
|
+
durationMs = rangeDurationMs,
|
|
293
394
|
emittedChunks = emittedChunks
|
|
294
395
|
)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
396
|
+
when (waitForAckOrCancel(emittedChunks - 1)) {
|
|
397
|
+
AckWaitResult.OK -> Unit
|
|
398
|
+
AckWaitResult.CANCELLED -> {
|
|
399
|
+
cancelled.set(true)
|
|
400
|
+
break@chunkLoop
|
|
401
|
+
}
|
|
402
|
+
AckWaitResult.TIMED_OUT -> {
|
|
403
|
+
backpressureTimedOut = true
|
|
404
|
+
break@chunkLoop
|
|
405
|
+
}
|
|
298
406
|
}
|
|
299
407
|
}
|
|
300
408
|
}
|
|
@@ -302,34 +410,55 @@ class AudioStreamDecoder(
|
|
|
302
410
|
if (info.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0) {
|
|
303
411
|
sawOutputEOS = true
|
|
304
412
|
}
|
|
413
|
+
if (backpressureTimedOut) {
|
|
414
|
+
break
|
|
415
|
+
}
|
|
305
416
|
} else if (outputIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
|
|
306
417
|
val newFormat = codec.outputFormat
|
|
418
|
+
// Codec output format is authoritative once decoding starts;
|
|
419
|
+
// the resampler is told the new source rate on its next call
|
|
420
|
+
// so the output rate (and chunk timestamps) stay correct.
|
|
307
421
|
if (newFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) {
|
|
308
|
-
|
|
309
|
-
// format when the codec exposes a different value.
|
|
310
|
-
@Suppress("UNUSED_VARIABLE")
|
|
311
|
-
val updatedRate = newFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
422
|
+
sourceSampleRate = newFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
312
423
|
}
|
|
424
|
+
if (newFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) {
|
|
425
|
+
sourceChannels = newFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (!cancelled.get() && !backpressureTimedOut) {
|
|
431
|
+
val resamplerTail = resampler.flush()
|
|
432
|
+
if (resamplerTail.isNotEmpty()) {
|
|
433
|
+
appendPending(resamplerTail)
|
|
313
434
|
}
|
|
314
435
|
}
|
|
315
436
|
|
|
437
|
+
if (backpressureTimedOut) {
|
|
438
|
+
emitError(
|
|
439
|
+
"ERR_AUDIO_STREAM_BACKPRESSURE_TIMEOUT",
|
|
440
|
+
"Timed out waiting for JS acknowledgement after ${options.backpressureTimeoutMs}ms"
|
|
441
|
+
)
|
|
442
|
+
return
|
|
443
|
+
}
|
|
444
|
+
|
|
316
445
|
if (cancelled.get()) {
|
|
317
446
|
emitError("ERR_AUDIO_STREAM_CANCELLED", "Stream cancelled")
|
|
318
447
|
emitComplete(
|
|
319
|
-
durationMs =
|
|
448
|
+
durationMs = rangeDurationMs,
|
|
320
449
|
sampleRate = outputSampleRate,
|
|
321
450
|
channels = outputChannels,
|
|
322
451
|
chunks = emittedChunks,
|
|
323
|
-
samples = emittedSamples
|
|
452
|
+
samples = emittedSamples,
|
|
324
453
|
cancelled = true
|
|
325
454
|
)
|
|
326
455
|
return
|
|
327
456
|
}
|
|
328
457
|
|
|
329
458
|
// Flush remainder as final chunk
|
|
330
|
-
if (
|
|
331
|
-
val tail = FloatArray(
|
|
332
|
-
System.arraycopy(pending,
|
|
459
|
+
if (pendingAvailable() > 0) {
|
|
460
|
+
val tail = FloatArray(pendingAvailable())
|
|
461
|
+
System.arraycopy(pending, pendingHead, tail, 0, tail.size)
|
|
333
462
|
val tailDurationMs =
|
|
334
463
|
(tail.size.toDouble() /
|
|
335
464
|
(outputSampleRate.toDouble() * outputChannels.toDouble())) *
|
|
@@ -342,7 +471,7 @@ class AudioStreamDecoder(
|
|
|
342
471
|
index = emittedChunks,
|
|
343
472
|
startTimeMs = startMs,
|
|
344
473
|
endTimeMs = startMs + tailDurationMs,
|
|
345
|
-
startSample =
|
|
474
|
+
startSample = emittedSamples / outputChannels,
|
|
346
475
|
sampleRate = outputSampleRate,
|
|
347
476
|
channels = outputChannels,
|
|
348
477
|
samples = tail,
|
|
@@ -350,7 +479,18 @@ class AudioStreamDecoder(
|
|
|
350
479
|
)
|
|
351
480
|
emittedChunks += 1
|
|
352
481
|
emittedSamples += tail.size
|
|
353
|
-
|
|
482
|
+
// Mirror the per-chunk progress emission so consumers always
|
|
483
|
+
// see a final `processedMs / durationMs ≈ 1.0` from
|
|
484
|
+
// `onProgress`.
|
|
485
|
+
val elapsedMs = (emittedSamples.toDouble() /
|
|
486
|
+
(outputSampleRate.toDouble() *
|
|
487
|
+
outputChannels.toDouble())) * 1000.0
|
|
488
|
+
emitProgress(
|
|
489
|
+
processedMs = elapsedMs,
|
|
490
|
+
durationMs = rangeDurationMs,
|
|
491
|
+
emittedChunks = emittedChunks
|
|
492
|
+
)
|
|
493
|
+
} else {
|
|
354
494
|
emitChunk(
|
|
355
495
|
index = emittedChunks,
|
|
356
496
|
startTimeMs = rangeStartMs +
|
|
@@ -361,7 +501,7 @@ class AudioStreamDecoder(
|
|
|
361
501
|
(emittedSamples.toDouble() /
|
|
362
502
|
(outputSampleRate.toDouble() * outputChannels.toDouble())) *
|
|
363
503
|
1000.0,
|
|
364
|
-
startSample =
|
|
504
|
+
startSample = emittedSamples / outputChannels,
|
|
365
505
|
sampleRate = outputSampleRate,
|
|
366
506
|
channels = outputChannels,
|
|
367
507
|
samples = FloatArray(0),
|
|
@@ -371,11 +511,11 @@ class AudioStreamDecoder(
|
|
|
371
511
|
}
|
|
372
512
|
|
|
373
513
|
emitComplete(
|
|
374
|
-
durationMs =
|
|
514
|
+
durationMs = rangeDurationMs,
|
|
375
515
|
sampleRate = outputSampleRate,
|
|
376
516
|
channels = outputChannels,
|
|
377
517
|
chunks = emittedChunks,
|
|
378
|
-
samples = emittedSamples
|
|
518
|
+
samples = emittedSamples,
|
|
379
519
|
cancelled = false
|
|
380
520
|
)
|
|
381
521
|
} catch (e: Exception) {
|
|
@@ -394,17 +534,40 @@ class AudioStreamDecoder(
|
|
|
394
534
|
try {
|
|
395
535
|
extractor.release()
|
|
396
536
|
} catch (_: Exception) { /* noop */ }
|
|
537
|
+
tempFile?.let { runCatching { it.delete() } }
|
|
397
538
|
}
|
|
398
539
|
}
|
|
399
540
|
|
|
400
|
-
private
|
|
401
|
-
|
|
541
|
+
private data class ResolvedFile(val path: String, val tempFile: File?)
|
|
542
|
+
|
|
543
|
+
private fun readMetadataDurationMs(path: String): Long {
|
|
544
|
+
return try {
|
|
545
|
+
val retriever = MediaMetadataRetriever()
|
|
546
|
+
try {
|
|
547
|
+
retriever.setDataSource(path)
|
|
548
|
+
retriever
|
|
549
|
+
.extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION)
|
|
550
|
+
?.toLongOrNull()
|
|
551
|
+
?: -1L
|
|
552
|
+
} finally {
|
|
553
|
+
retriever.release()
|
|
554
|
+
}
|
|
555
|
+
} catch (_: Exception) {
|
|
556
|
+
-1L
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
private fun resolveFilePath(uri: String): ResolvedFile? {
|
|
561
|
+
if (uri.startsWith("/")) return ResolvedFile(uri, null)
|
|
402
562
|
return try {
|
|
403
563
|
val parsed = Uri.parse(uri)
|
|
404
564
|
when (parsed.scheme) {
|
|
405
|
-
"file" -> parsed.path
|
|
565
|
+
"file" -> parsed.path?.let { ResolvedFile(it, null) }
|
|
406
566
|
"content" -> {
|
|
407
|
-
//
|
|
567
|
+
// MediaExtractor needs a real file path; copy the content
|
|
568
|
+
// URI into the cache dir and remember the temp file so we
|
|
569
|
+
// can delete it in `finally` (otherwise every call leaks a
|
|
570
|
+
// copy of the source audio onto disk).
|
|
408
571
|
val temp = File.createTempFile(
|
|
409
572
|
"audiostream_${options.requestId}_",
|
|
410
573
|
null,
|
|
@@ -413,9 +576,9 @@ class AudioStreamDecoder(
|
|
|
413
576
|
context.contentResolver.openInputStream(parsed)?.use { input ->
|
|
414
577
|
temp.outputStream().use { out -> input.copyTo(out) }
|
|
415
578
|
}
|
|
416
|
-
temp.absolutePath
|
|
579
|
+
ResolvedFile(temp.absolutePath, temp)
|
|
417
580
|
}
|
|
418
|
-
null -> uri
|
|
581
|
+
null -> ResolvedFile(uri, null)
|
|
419
582
|
else -> null
|
|
420
583
|
}
|
|
421
584
|
} catch (e: Exception) {
|
|
@@ -480,16 +643,23 @@ class AudioStreamDecoder(
|
|
|
480
643
|
return out
|
|
481
644
|
}
|
|
482
645
|
|
|
483
|
-
private
|
|
646
|
+
private enum class AckWaitResult { OK, CANCELLED, TIMED_OUT }
|
|
647
|
+
|
|
648
|
+
private fun waitForAckOrCancel(emittedIndex: Int): AckWaitResult {
|
|
649
|
+
val deadlineMs = options.backpressureTimeoutMs
|
|
650
|
+
?.takeIf { it > 0 }
|
|
651
|
+
?.let { System.currentTimeMillis() + it }
|
|
484
652
|
synchronized(ackLock) {
|
|
485
653
|
while (true) {
|
|
486
|
-
if (cancelled.get()) return
|
|
654
|
+
if (cancelled.get()) return AckWaitResult.CANCELLED
|
|
487
655
|
val inFlight = emittedIndex - lastAckedIndex.get()
|
|
488
|
-
if (inFlight < options.maxBufferedChunks) return
|
|
489
|
-
|
|
656
|
+
if (inFlight < options.maxBufferedChunks) return AckWaitResult.OK
|
|
657
|
+
val remainingMs = deadlineMs?.let { it - System.currentTimeMillis() }
|
|
658
|
+
if (remainingMs != null && remainingMs <= 0L) {
|
|
659
|
+
return AckWaitResult.TIMED_OUT
|
|
660
|
+
}
|
|
661
|
+
ackLock.wait(minOf(50L, remainingMs ?: 50L))
|
|
490
662
|
}
|
|
491
|
-
@Suppress("UNREACHABLE_CODE")
|
|
492
|
-
return false
|
|
493
663
|
}
|
|
494
664
|
}
|
|
495
665
|
|
|
@@ -497,7 +667,7 @@ class AudioStreamDecoder(
|
|
|
497
667
|
index: Int,
|
|
498
668
|
startTimeMs: Double,
|
|
499
669
|
endTimeMs: Double,
|
|
500
|
-
startSample:
|
|
670
|
+
startSample: Long,
|
|
501
671
|
sampleRate: Int,
|
|
502
672
|
channels: Int,
|
|
503
673
|
samples: FloatArray,
|
|
@@ -544,7 +714,7 @@ class AudioStreamDecoder(
|
|
|
544
714
|
sampleRate: Int,
|
|
545
715
|
channels: Int,
|
|
546
716
|
chunks: Int,
|
|
547
|
-
samples:
|
|
717
|
+
samples: Long,
|
|
548
718
|
cancelled: Boolean,
|
|
549
719
|
) {
|
|
550
720
|
val payload = bundleOf(
|
|
@@ -576,28 +746,49 @@ class AudioStreamDecoder(
|
|
|
576
746
|
private class LinearResampler(private val channels: Int) {
|
|
577
747
|
private var lastSrcFrame: FloatArray? = null
|
|
578
748
|
private var fractional: Double = 0.0
|
|
749
|
+
private var lastSourceRate: Int? = null
|
|
750
|
+
private var lastTargetRate: Int? = null
|
|
751
|
+
private var canFlushLastFrame = false
|
|
579
752
|
|
|
580
753
|
fun process(input: FloatArray, sourceRate: Int, targetRate: Int): FloatArray {
|
|
581
754
|
if (input.isEmpty() || channels <= 0) return FloatArray(0)
|
|
582
|
-
|
|
755
|
+
val srcFrames = input.size / channels
|
|
756
|
+
if (srcFrames <= 0) return FloatArray(0)
|
|
757
|
+
|
|
758
|
+
if (lastSourceRate != null &&
|
|
759
|
+
(lastSourceRate != sourceRate || lastTargetRate != targetRate)
|
|
760
|
+
) {
|
|
761
|
+
reset()
|
|
762
|
+
}
|
|
763
|
+
lastSourceRate = sourceRate
|
|
764
|
+
lastTargetRate = targetRate
|
|
765
|
+
|
|
766
|
+
if (sourceRate == targetRate) {
|
|
767
|
+
stashLastFrame(input, srcFrames)
|
|
768
|
+
fractional = 0.0
|
|
769
|
+
canFlushLastFrame = false
|
|
770
|
+
return input
|
|
771
|
+
}
|
|
583
772
|
|
|
584
773
|
val ratio = sourceRate.toDouble() / targetRate.toDouble()
|
|
585
|
-
val srcFrames = input.size / channels
|
|
586
774
|
val previousFrame = lastSrcFrame
|
|
587
775
|
val totalSrcFrames = srcFrames + if (previousFrame != null) 1 else 0
|
|
588
776
|
if (totalSrcFrames < 2) {
|
|
589
777
|
// Not enough to interpolate yet; stash and return empty.
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
System.arraycopy(input, (srcFrames - 1) * channels, dst, 0, channels)
|
|
593
|
-
}
|
|
594
|
-
}
|
|
778
|
+
stashLastFrame(input, srcFrames)
|
|
779
|
+
canFlushLastFrame = true
|
|
595
780
|
return FloatArray(0)
|
|
596
781
|
}
|
|
597
782
|
|
|
598
783
|
// Build virtual stream: [previousFrame?, input...].
|
|
599
784
|
val virtualLen = totalSrcFrames
|
|
600
|
-
val
|
|
785
|
+
val outFrames = ceil(((virtualLen - 1) - fractional) / ratio)
|
|
786
|
+
.toInt()
|
|
787
|
+
.coerceAtLeast(0)
|
|
788
|
+
// Add one frame of slack for floating-point boundary rounding; trim
|
|
789
|
+
// below if the conservative capacity was not used.
|
|
790
|
+
val out = FloatArray((outFrames + 1) * channels)
|
|
791
|
+
var outIdx = 0
|
|
601
792
|
var srcPos = fractional
|
|
602
793
|
while (srcPos < virtualLen - 1) {
|
|
603
794
|
val i = srcPos.toInt()
|
|
@@ -605,20 +796,41 @@ private class LinearResampler(private val channels: Int) {
|
|
|
605
796
|
for (c in 0 until channels) {
|
|
606
797
|
val a = sampleAt(i, c, previousFrame, input)
|
|
607
798
|
val b = sampleAt(i + 1, c, previousFrame, input)
|
|
608
|
-
out
|
|
799
|
+
out[outIdx++] = a + (b - a) * frac
|
|
609
800
|
}
|
|
610
801
|
srcPos += ratio
|
|
611
802
|
}
|
|
612
803
|
fractional = srcPos - (virtualLen - 1)
|
|
613
804
|
|
|
614
|
-
// Stash last source frame for the next call.
|
|
805
|
+
// Stash last source frame for the next call. The interpolation loop
|
|
806
|
+
// intentionally stops before the final frame so that it can interpolate
|
|
807
|
+
// across the next decoder buffer; flush() emits this tail at EOS.
|
|
808
|
+
stashLastFrame(input, srcFrames)
|
|
809
|
+
canFlushLastFrame = true
|
|
810
|
+
|
|
811
|
+
return if (outIdx == out.size) out else out.copyOf(outIdx)
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
fun flush(): FloatArray {
|
|
815
|
+
val tail = if (canFlushLastFrame) {
|
|
816
|
+
lastSrcFrame?.copyOf() ?: FloatArray(0)
|
|
817
|
+
} else {
|
|
818
|
+
FloatArray(0)
|
|
819
|
+
}
|
|
820
|
+
reset()
|
|
821
|
+
return tail
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
private fun reset() {
|
|
825
|
+
lastSrcFrame = null
|
|
826
|
+
fractional = 0.0
|
|
827
|
+
canFlushLastFrame = false
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
private fun stashLastFrame(input: FloatArray, srcFrames: Int) {
|
|
615
831
|
lastSrcFrame = FloatArray(channels).also { dst ->
|
|
616
832
|
System.arraycopy(input, (srcFrames - 1) * channels, dst, 0, channels)
|
|
617
833
|
}
|
|
618
|
-
|
|
619
|
-
val arr = FloatArray(out.size)
|
|
620
|
-
for (i in out.indices) arr[i] = out[i]
|
|
621
|
-
return arr
|
|
622
834
|
}
|
|
623
835
|
|
|
624
836
|
private fun sampleAt(
|