@siteed/audio-studio 3.2.0-beta.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +356 -5
  2. package/android/src/main/java/net/siteed/audiostudio/AudioStreamDecoder.kt +306 -94
  3. package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +39 -6
  4. package/build/cjs/errors/AudioStreamError.js +9 -0
  5. package/build/cjs/errors/AudioStreamError.js.map +1 -1
  6. package/build/cjs/errors/AudioStreamError.test.js +22 -1
  7. package/build/cjs/errors/AudioStreamError.test.js.map +1 -1
  8. package/build/cjs/streamAudioData.js +99 -32
  9. package/build/cjs/streamAudioData.js.map +1 -1
  10. package/build/cjs/utils/audioProcessing.js +14 -10
  11. package/build/cjs/utils/audioProcessing.js.map +1 -1
  12. package/build/esm/errors/AudioStreamError.js +9 -0
  13. package/build/esm/errors/AudioStreamError.js.map +1 -1
  14. package/build/esm/errors/AudioStreamError.test.js +22 -1
  15. package/build/esm/errors/AudioStreamError.test.js.map +1 -1
  16. package/build/esm/streamAudioData.js +99 -32
  17. package/build/esm/streamAudioData.js.map +1 -1
  18. package/build/esm/utils/audioProcessing.js +14 -10
  19. package/build/esm/utils/audioProcessing.js.map +1 -1
  20. package/build/types/errors/AudioStreamError.d.ts.map +1 -1
  21. package/build/types/streamAudioData.d.ts +5 -0
  22. package/build/types/streamAudioData.d.ts.map +1 -1
  23. package/build/types/utils/audioProcessing.d.ts +2 -2
  24. package/build/types/utils/audioProcessing.d.ts.map +1 -1
  25. package/ios/AudioStreamDecoder.swift +191 -100
  26. package/ios/AudioStudioModule.swift +48 -9
  27. package/package.json +163 -146
  28. package/scripts/README.md +58 -0
  29. package/src/errors/AudioStreamError.test.ts +29 -2
  30. package/src/errors/AudioStreamError.ts +14 -0
  31. package/src/streamAudioData.ts +146 -42
  32. package/src/utils/audioProcessing.ts +25 -14
  33. package/android/src/androidTest/assets/chorus.wav +0 -0
  34. package/android/src/androidTest/assets/jfk.wav +0 -0
  35. package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
  36. package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
  37. package/android/src/androidTest/java/net/siteed/audiostudio/AudioFinalMetadataContractInstrumentedTest.kt +0 -190
  38. package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +0 -197
  39. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +0 -487
  40. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +0 -250
  41. package/android/src/androidTest/java/net/siteed/audiostudio/OpusRangeDecodeRegressionInstrumentedTest.kt +0 -186
  42. package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +0 -332
  43. package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +0 -324
  44. package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +0 -253
  45. package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +0 -218
  46. package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +0 -120
  47. package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +0 -345
  48. package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +0 -340
  49. package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +0 -252
  50. package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +0 -95
  51. package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +0 -43
  52. package/android/src/test/java/net/siteed/audiostudio/AndroidCallStateTest.kt +0 -37
  53. package/android/src/test/java/net/siteed/audiostudio/AndroidEventEmitterTest.kt +0 -28
  54. package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +0 -279
  55. package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +0 -249
  56. package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +0 -151
  57. package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +0 -273
  58. package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +0 -140
  59. package/android/src/test/java/net/siteed/audiostudio/InterruptionAutoResumePolicyTest.kt +0 -49
  60. package/android/src/test/resources/chorus.wav +0 -0
  61. package/android/src/test/resources/generate_test_audio.py +0 -94
  62. package/android/src/test/resources/jfk.wav +0 -0
  63. package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
  64. package/android/src/test/resources/recorder_hello_world.wav +0 -0
  65. package/ios/AudioStudioTests/AudioFileHandlerTests.swift +0 -338
  66. package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +0 -331
  67. package/ios/AudioStudioTests/AudioStreamDecoderTests.swift +0 -128
  68. package/ios/AudioStudioTests/AudioTestHelpers.swift +0 -130
  69. package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +0 -334
  70. package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +0 -105
  71. package/ios/AudioStudioTests/Info.plist +0 -22
  72. package/ios/AudioStudioTests/README.md +0 -39
  73. package/ios/AudioStudioTests/SimpleAudioTest.swift +0 -98
  74. package/ios/AudioStudioTests/TestAudioGenerator.swift +0 -75
  75. package/ios/tests/README.md +0 -41
  76. package/ios/tests/integration/buffer_and_fallback_test.swift +0 -178
  77. package/ios/tests/integration/buffer_duration_test.swift +0 -185
  78. package/ios/tests/integration/compressed_only_output_test.swift +0 -271
  79. package/ios/tests/integration/output_control_test.swift +0 -322
  80. package/ios/tests/integration/run_integration_tests.sh +0 -37
  81. package/ios/tests/opus_support_test_macos.swift +0 -154
  82. package/ios/tests/standalone/audio_processing_test.swift +0 -144
  83. package/ios/tests/standalone/audio_recording_test.swift +0 -277
  84. package/ios/tests/standalone/audio_streaming_test.swift +0 -249
  85. package/ios/tests/standalone/standalone_test.swift +0 -144
@@ -11,6 +11,7 @@ import android.content.Context
11
11
  import android.media.MediaCodec
12
12
  import android.media.MediaExtractor
13
13
  import android.media.MediaFormat
14
+ import android.media.MediaMetadataRetriever
14
15
  import android.net.Uri
15
16
  import android.os.Bundle
16
17
  import androidx.core.os.bundleOf
@@ -20,6 +21,7 @@ import java.nio.ByteOrder
20
21
  import java.util.concurrent.atomic.AtomicBoolean
21
22
  import java.util.concurrent.atomic.AtomicInteger
22
23
  import kotlin.concurrent.thread
24
+ import kotlin.math.ceil
23
25
 
24
26
  interface AudioStreamDecoderDelegate {
25
27
  fun streamDecoderEmit(eventName: String, payload: Bundle)
@@ -41,6 +43,7 @@ class AudioStreamDecoder(
41
43
  val chunkDurationMs: Int,
42
44
  val maxChunkBytes: Int?,
43
45
  val maxBufferedChunks: Int,
46
+ val backpressureTimeoutMs: Long?,
44
47
  )
45
48
 
46
49
  companion object {
@@ -77,12 +80,15 @@ class AudioStreamDecoder(
77
80
  }
78
81
 
79
82
  private fun run() {
80
- val path = resolveFilePath(options.fileUri)
81
- if (path == null) {
83
+ val resolved = resolveFilePath(options.fileUri)
84
+ if (resolved == null) {
82
85
  emitError("ERR_AUDIO_STREAM_FILE_NOT_FOUND", "Cannot resolve file: ${options.fileUri}")
83
86
  return
84
87
  }
88
+ val path = resolved.path
89
+ val tempFile = resolved.tempFile
85
90
  if (!File(path).exists()) {
91
+ tempFile?.let { runCatching { it.delete() } }
86
92
  emitError("ERR_AUDIO_STREAM_FILE_NOT_FOUND", "File not found: $path")
87
93
  return
88
94
  }
@@ -91,7 +97,7 @@ class AudioStreamDecoder(
91
97
  var codec: MediaCodec? = null
92
98
  var emittedChunks = 0
93
99
  var emittedSamples = 0L
94
- var totalDurationUs: Long = -1
100
+ var backpressureTimedOut = false
95
101
  var outputSampleRate = options.targetSampleRate ?: 0
96
102
  var outputChannels = options.channels ?: 0
97
103
 
@@ -119,16 +125,48 @@ class AudioStreamDecoder(
119
125
  }
120
126
  extractor.selectTrack(trackIndex)
121
127
  val format = extractor.getTrackFormat(trackIndex)
122
- totalDurationUs = if (format.containsKey(MediaFormat.KEY_DURATION)) {
128
+ val extractorDurationUs = if (format.containsKey(MediaFormat.KEY_DURATION)) {
123
129
  format.getLong(MediaFormat.KEY_DURATION)
124
130
  } else {
125
- -1
131
+ -1L
132
+ }
133
+ val metadataDurationMs = if (extractorDurationUs <= 0) {
134
+ readMetadataDurationMs(path)
135
+ } else {
136
+ -1L
137
+ }
138
+ val assetDurationUs = if (extractorDurationUs > 0) {
139
+ extractorDurationUs
140
+ } else if (metadataDurationMs > 0) {
141
+ metadataDurationMs * 1000L
142
+ } else {
143
+ -1L
144
+ }
145
+ val assetDurationMs = if (assetDurationUs > 0) {
146
+ assetDurationUs / 1000.0
147
+ } else {
148
+ 0.0
149
+ }
150
+ // Duration of the *decoded range*, not the whole file, so progress
151
+ // and completion payloads match what the caller actually receives.
152
+ val rangeDurationMs = when {
153
+ options.startTimeMs != null && options.endTimeMs != null ->
154
+ (options.endTimeMs - options.startTimeMs).toDouble().coerceAtLeast(0.0)
155
+ options.endTimeMs != null -> options.endTimeMs.toDouble().coerceAtLeast(0.0)
156
+ options.startTimeMs != null ->
157
+ (assetDurationMs - options.startTimeMs).coerceAtLeast(0.0)
158
+ else -> assetDurationMs
126
159
  }
127
160
 
128
- val sourceSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
129
- val sourceChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
161
+ var sourceSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
162
+ var sourceChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
130
163
  if (outputSampleRate <= 0) outputSampleRate = sourceSampleRate
131
164
  if (outputChannels <= 0) outputChannels = minOf(2, maxOf(1, sourceChannels))
165
+ val maxOutputSamples = options.endTimeMs?.let {
166
+ ((rangeDurationMs / 1000.0) * outputSampleRate.toDouble() * outputChannels.toDouble())
167
+ .toLong()
168
+ .coerceAtLeast(0L)
169
+ } ?: Long.MAX_VALUE
132
170
 
133
171
  options.startTimeMs?.let {
134
172
  extractor.seekTo(it * 1000L, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
@@ -156,14 +194,49 @@ class AudioStreamDecoder(
156
194
 
157
195
  val endTimeUs = options.endTimeMs?.let { it * 1000L } ?: Long.MAX_VALUE
158
196
  val rangeStartMs = options.startTimeMs ?: 0L
197
+ val targetStartUs = rangeStartMs * 1000L
159
198
  val samplesPerChunk = run {
160
199
  val byTime = (options.chunkDurationMs.toLong() *
161
200
  outputSampleRate.toLong() / 1000L).toInt() * outputChannels
162
- val byBytes = options.maxChunkBytes?.let { it / 4 } ?: Int.MAX_VALUE
163
- maxOf(1, minOf(byTime, byBytes))
201
+ // Round byte-cap down to a multiple of `outputChannels` so a
202
+ // single interleaved frame is never split across chunks.
203
+ val byBytes = options.maxChunkBytes?.let {
204
+ (it / 4 / outputChannels) * outputChannels
205
+ } ?: Int.MAX_VALUE
206
+ val raw = minOf(byTime, byBytes)
207
+ maxOf(outputChannels, (raw / outputChannels) * outputChannels)
164
208
  }
165
- val pending = FloatArray(samplesPerChunk * 2)
209
+ var pending = FloatArray(samplesPerChunk * 2)
166
210
  var pendingLen = 0
211
+ var pendingHead = 0
212
+ fun pendingAvailable(): Int = pendingLen - pendingHead
213
+ fun compactPending() {
214
+ val available = pendingAvailable()
215
+ if (pendingHead == 0) return
216
+ if (available > 0) {
217
+ System.arraycopy(pending, pendingHead, pending, 0, available)
218
+ }
219
+ pendingHead = 0
220
+ pendingLen = available
221
+ }
222
+ fun appendPending(samples: FloatArray) {
223
+ if (samples.isEmpty()) return
224
+ val available = pendingAvailable()
225
+ val requiredLen = available + samples.size
226
+ if (requiredLen > pending.size) {
227
+ val grown = FloatArray(requiredLen.coerceAtLeast(pending.size * 2))
228
+ if (available > 0) {
229
+ System.arraycopy(pending, pendingHead, grown, 0, available)
230
+ }
231
+ pending = grown
232
+ pendingHead = 0
233
+ pendingLen = available
234
+ } else if (pending.size - pendingLen < samples.size && pendingHead > 0) {
235
+ compactPending()
236
+ }
237
+ System.arraycopy(samples, 0, pending, pendingLen, samples.size)
238
+ pendingLen += samples.size
239
+ }
167
240
  val info = MediaCodec.BufferInfo()
168
241
  var sawInputEOS = false
169
242
  var sawOutputEOS = false
@@ -220,52 +293,72 @@ class AudioStreamDecoder(
220
293
  outputChannels,
221
294
  options.normalizeAudio
222
295
  )
296
+ // SEEK_TO_CLOSEST_SYNC can land before the requested
297
+ // start (encoder priming on AAC/MP3, sync-frame
298
+ // granularity on lossy containers). Trim source-rate
299
+ // frames whose presentation time is before
300
+ // `targetStartUs` so the first emitted sample lines
301
+ // up with `startTimeMs`.
302
+ val bufferStartUs = info.presentationTimeUs
303
+ val trimmed: FloatArray = if (
304
+ targetStartUs > 0 &&
305
+ bufferStartUs in 0L until targetStartUs &&
306
+ converted.isNotEmpty()
307
+ ) {
308
+ val deltaUs = targetStartUs - bufferStartUs
309
+ val skipFrames = (
310
+ deltaUs.toDouble() * sourceSampleRate.toDouble() /
311
+ 1_000_000.0
312
+ ).toInt()
313
+ val totalFrames = converted.size / outputChannels
314
+ val actualSkip = minOf(skipFrames, totalFrames)
315
+ if (actualSkip >= totalFrames) {
316
+ FloatArray(0)
317
+ } else if (actualSkip > 0) {
318
+ val skipFloats = actualSkip * outputChannels
319
+ val out = FloatArray(converted.size - skipFloats)
320
+ System.arraycopy(
321
+ converted,
322
+ skipFloats,
323
+ out,
324
+ 0,
325
+ out.size
326
+ )
327
+ out
328
+ } else {
329
+ converted
330
+ }
331
+ } else {
332
+ converted
333
+ }
223
334
  val resampled = resampler.process(
224
- converted,
335
+ trimmed,
225
336
  sourceSampleRate,
226
337
  outputSampleRate
227
338
  )
228
-
229
- // Append into pending, grow if needed
230
- val newLen = pendingLen + resampled.size
231
- val target = if (newLen > pending.size) {
232
- val grown = FloatArray(newLen.coerceAtLeast(pending.size * 2))
233
- System.arraycopy(pending, 0, grown, 0, pendingLen)
234
- grown.also {
235
- System.arraycopy(it, 0, pending, 0, pendingLen)
339
+ val remainingOutputSamples =
340
+ maxOutputSamples - emittedSamples - pendingAvailable().toLong()
341
+ val boundedResampled = when {
342
+ remainingOutputSamples <= 0L -> FloatArray(0)
343
+ resampled.size.toLong() > remainingOutputSamples -> {
344
+ val boundedSize = (
345
+ remainingOutputSamples.toInt() / outputChannels
346
+ ) * outputChannels
347
+ resampled.copyOf(boundedSize)
236
348
  }
237
- } else {
238
- pending
239
- }
240
- // Note: when grown, we use a transient buffer that we
241
- // process and never write back into the original.
242
- val workBuffer: FloatArray
243
- if (target !== pending) {
244
- System.arraycopy(resampled, 0, target, pendingLen, resampled.size)
245
- pendingLen += resampled.size
246
- workBuffer = target
247
- } else {
248
- System.arraycopy(resampled, 0, pending, pendingLen, resampled.size)
249
- pendingLen += resampled.size
250
- workBuffer = pending
349
+ else -> resampled
251
350
  }
252
351
 
253
- while (pendingLen >= samplesPerChunk) {
352
+ appendPending(boundedResampled)
353
+
354
+ chunkLoop@ while (pendingAvailable() >= samplesPerChunk) {
254
355
  if (cancelled.get()) break
255
356
  val chunk = FloatArray(samplesPerChunk)
256
- System.arraycopy(workBuffer, 0, chunk, 0, samplesPerChunk)
257
- // Shift remainder forward.
258
- val remainder = pendingLen - samplesPerChunk
259
- if (remainder > 0) {
260
- System.arraycopy(
261
- workBuffer,
262
- samplesPerChunk,
263
- workBuffer,
264
- 0,
265
- remainder
266
- )
357
+ System.arraycopy(pending, pendingHead, chunk, 0, samplesPerChunk)
358
+ pendingHead += samplesPerChunk
359
+ if (pendingHead > pending.size / 2) {
360
+ compactPending()
267
361
  }
268
- pendingLen = remainder
269
362
 
270
363
  val chunkDurationMs =
271
364
  (chunk.size.toDouble() /
@@ -279,7 +372,7 @@ class AudioStreamDecoder(
279
372
  index = emittedChunks,
280
373
  startTimeMs = startMs,
281
374
  endTimeMs = startMs + chunkDurationMs,
282
- startSample = (emittedSamples / outputChannels).toInt(),
375
+ startSample = emittedSamples / outputChannels,
283
376
  sampleRate = outputSampleRate,
284
377
  channels = outputChannels,
285
378
  samples = chunk,
@@ -287,14 +380,29 @@ class AudioStreamDecoder(
287
380
  )
288
381
  emittedChunks += 1
289
382
  emittedSamples += chunk.size
383
+ // Progress is elapsed decoded time within the
384
+ // requested range so `processedMs / durationMs`
385
+ // stays in [0, 1] regardless of `startTimeMs`.
386
+ // Chunk timestamps stay absolute (rangeStart +
387
+ // offset).
388
+ val elapsedMs = (emittedSamples.toDouble() /
389
+ (outputSampleRate.toDouble() *
390
+ outputChannels.toDouble())) * 1000.0
290
391
  emitProgress(
291
- processedMs = startMs + chunkDurationMs,
292
- durationMs = if (totalDurationUs > 0) totalDurationUs / 1000.0 else 0.0,
392
+ processedMs = elapsedMs,
393
+ durationMs = rangeDurationMs,
293
394
  emittedChunks = emittedChunks
294
395
  )
295
- if (waitForAckOrCancel(emittedChunks - 1)) {
296
- cancelled.set(true)
297
- break
396
+ when (waitForAckOrCancel(emittedChunks - 1)) {
397
+ AckWaitResult.OK -> Unit
398
+ AckWaitResult.CANCELLED -> {
399
+ cancelled.set(true)
400
+ break@chunkLoop
401
+ }
402
+ AckWaitResult.TIMED_OUT -> {
403
+ backpressureTimedOut = true
404
+ break@chunkLoop
405
+ }
298
406
  }
299
407
  }
300
408
  }
@@ -302,34 +410,55 @@ class AudioStreamDecoder(
302
410
  if (info.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0) {
303
411
  sawOutputEOS = true
304
412
  }
413
+ if (backpressureTimedOut) {
414
+ break
415
+ }
305
416
  } else if (outputIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
306
417
  val newFormat = codec.outputFormat
418
+ // Codec output format is authoritative once decoding starts;
419
+ // the resampler is told the new source rate on its next call
420
+ // so the output rate (and chunk timestamps) stay correct.
307
421
  if (newFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) {
308
- // Source rate from codec output overrides extractor's
309
- // format when the codec exposes a different value.
310
- @Suppress("UNUSED_VARIABLE")
311
- val updatedRate = newFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
422
+ sourceSampleRate = newFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
312
423
  }
424
+ if (newFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) {
425
+ sourceChannels = newFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
426
+ }
427
+ }
428
+ }
429
+
430
+ if (!cancelled.get() && !backpressureTimedOut) {
431
+ val resamplerTail = resampler.flush()
432
+ if (resamplerTail.isNotEmpty()) {
433
+ appendPending(resamplerTail)
313
434
  }
314
435
  }
315
436
 
437
+ if (backpressureTimedOut) {
438
+ emitError(
439
+ "ERR_AUDIO_STREAM_BACKPRESSURE_TIMEOUT",
440
+ "Timed out waiting for JS acknowledgement after ${options.backpressureTimeoutMs}ms"
441
+ )
442
+ return
443
+ }
444
+
316
445
  if (cancelled.get()) {
317
446
  emitError("ERR_AUDIO_STREAM_CANCELLED", "Stream cancelled")
318
447
  emitComplete(
319
- durationMs = if (totalDurationUs > 0) totalDurationUs / 1000.0 else 0.0,
448
+ durationMs = rangeDurationMs,
320
449
  sampleRate = outputSampleRate,
321
450
  channels = outputChannels,
322
451
  chunks = emittedChunks,
323
- samples = emittedSamples.toInt(),
452
+ samples = emittedSamples,
324
453
  cancelled = true
325
454
  )
326
455
  return
327
456
  }
328
457
 
329
458
  // Flush remainder as final chunk
330
- if (pendingLen > 0) {
331
- val tail = FloatArray(pendingLen)
332
- System.arraycopy(pending, 0, tail, 0, pendingLen)
459
+ if (pendingAvailable() > 0) {
460
+ val tail = FloatArray(pendingAvailable())
461
+ System.arraycopy(pending, pendingHead, tail, 0, tail.size)
333
462
  val tailDurationMs =
334
463
  (tail.size.toDouble() /
335
464
  (outputSampleRate.toDouble() * outputChannels.toDouble())) *
@@ -342,7 +471,7 @@ class AudioStreamDecoder(
342
471
  index = emittedChunks,
343
472
  startTimeMs = startMs,
344
473
  endTimeMs = startMs + tailDurationMs,
345
- startSample = (emittedSamples / outputChannels).toInt(),
474
+ startSample = emittedSamples / outputChannels,
346
475
  sampleRate = outputSampleRate,
347
476
  channels = outputChannels,
348
477
  samples = tail,
@@ -350,7 +479,18 @@ class AudioStreamDecoder(
350
479
  )
351
480
  emittedChunks += 1
352
481
  emittedSamples += tail.size
353
- } else if (emittedChunks > 0) {
482
+ // Mirror the per-chunk progress emission so consumers always
483
+ // see a final `processedMs / durationMs ≈ 1.0` from
484
+ // `onProgress`.
485
+ val elapsedMs = (emittedSamples.toDouble() /
486
+ (outputSampleRate.toDouble() *
487
+ outputChannels.toDouble())) * 1000.0
488
+ emitProgress(
489
+ processedMs = elapsedMs,
490
+ durationMs = rangeDurationMs,
491
+ emittedChunks = emittedChunks
492
+ )
493
+ } else {
354
494
  emitChunk(
355
495
  index = emittedChunks,
356
496
  startTimeMs = rangeStartMs +
@@ -361,7 +501,7 @@ class AudioStreamDecoder(
361
501
  (emittedSamples.toDouble() /
362
502
  (outputSampleRate.toDouble() * outputChannels.toDouble())) *
363
503
  1000.0,
364
- startSample = (emittedSamples / outputChannels).toInt(),
504
+ startSample = emittedSamples / outputChannels,
365
505
  sampleRate = outputSampleRate,
366
506
  channels = outputChannels,
367
507
  samples = FloatArray(0),
@@ -371,11 +511,11 @@ class AudioStreamDecoder(
371
511
  }
372
512
 
373
513
  emitComplete(
374
- durationMs = if (totalDurationUs > 0) totalDurationUs / 1000.0 else 0.0,
514
+ durationMs = rangeDurationMs,
375
515
  sampleRate = outputSampleRate,
376
516
  channels = outputChannels,
377
517
  chunks = emittedChunks,
378
- samples = emittedSamples.toInt(),
518
+ samples = emittedSamples,
379
519
  cancelled = false
380
520
  )
381
521
  } catch (e: Exception) {
@@ -394,17 +534,40 @@ class AudioStreamDecoder(
394
534
  try {
395
535
  extractor.release()
396
536
  } catch (_: Exception) { /* noop */ }
537
+ tempFile?.let { runCatching { it.delete() } }
397
538
  }
398
539
  }
399
540
 
400
- private fun resolveFilePath(uri: String): String? {
401
- if (uri.startsWith("/")) return uri
541
+ private data class ResolvedFile(val path: String, val tempFile: File?)
542
+
543
+ private fun readMetadataDurationMs(path: String): Long {
544
+ return try {
545
+ val retriever = MediaMetadataRetriever()
546
+ try {
547
+ retriever.setDataSource(path)
548
+ retriever
549
+ .extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION)
550
+ ?.toLongOrNull()
551
+ ?: -1L
552
+ } finally {
553
+ retriever.release()
554
+ }
555
+ } catch (_: Exception) {
556
+ -1L
557
+ }
558
+ }
559
+
560
+ private fun resolveFilePath(uri: String): ResolvedFile? {
561
+ if (uri.startsWith("/")) return ResolvedFile(uri, null)
402
562
  return try {
403
563
  val parsed = Uri.parse(uri)
404
564
  when (parsed.scheme) {
405
- "file" -> parsed.path
565
+ "file" -> parsed.path?.let { ResolvedFile(it, null) }
406
566
  "content" -> {
407
- // Copy to cache so MediaExtractor can read by path.
567
+ // MediaExtractor needs a real file path; copy the content
568
+ // URI into the cache dir and remember the temp file so we
569
+ // can delete it in `finally` (otherwise every call leaks a
570
+ // copy of the source audio onto disk).
408
571
  val temp = File.createTempFile(
409
572
  "audiostream_${options.requestId}_",
410
573
  null,
@@ -413,9 +576,9 @@ class AudioStreamDecoder(
413
576
  context.contentResolver.openInputStream(parsed)?.use { input ->
414
577
  temp.outputStream().use { out -> input.copyTo(out) }
415
578
  }
416
- temp.absolutePath
579
+ ResolvedFile(temp.absolutePath, temp)
417
580
  }
418
- null -> uri
581
+ null -> ResolvedFile(uri, null)
419
582
  else -> null
420
583
  }
421
584
  } catch (e: Exception) {
@@ -480,16 +643,23 @@ class AudioStreamDecoder(
480
643
  return out
481
644
  }
482
645
 
483
- private fun waitForAckOrCancel(emittedIndex: Int): Boolean {
646
+ private enum class AckWaitResult { OK, CANCELLED, TIMED_OUT }
647
+
648
+ private fun waitForAckOrCancel(emittedIndex: Int): AckWaitResult {
649
+ val deadlineMs = options.backpressureTimeoutMs
650
+ ?.takeIf { it > 0 }
651
+ ?.let { System.currentTimeMillis() + it }
484
652
  synchronized(ackLock) {
485
653
  while (true) {
486
- if (cancelled.get()) return true
654
+ if (cancelled.get()) return AckWaitResult.CANCELLED
487
655
  val inFlight = emittedIndex - lastAckedIndex.get()
488
- if (inFlight < options.maxBufferedChunks) return false
489
- ackLock.wait(50L)
656
+ if (inFlight < options.maxBufferedChunks) return AckWaitResult.OK
657
+ val remainingMs = deadlineMs?.let { it - System.currentTimeMillis() }
658
+ if (remainingMs != null && remainingMs <= 0L) {
659
+ return AckWaitResult.TIMED_OUT
660
+ }
661
+ ackLock.wait(minOf(50L, remainingMs ?: 50L))
490
662
  }
491
- @Suppress("UNREACHABLE_CODE")
492
- return false
493
663
  }
494
664
  }
495
665
 
@@ -497,7 +667,7 @@ class AudioStreamDecoder(
497
667
  index: Int,
498
668
  startTimeMs: Double,
499
669
  endTimeMs: Double,
500
- startSample: Int,
670
+ startSample: Long,
501
671
  sampleRate: Int,
502
672
  channels: Int,
503
673
  samples: FloatArray,
@@ -544,7 +714,7 @@ class AudioStreamDecoder(
544
714
  sampleRate: Int,
545
715
  channels: Int,
546
716
  chunks: Int,
547
- samples: Int,
717
+ samples: Long,
548
718
  cancelled: Boolean,
549
719
  ) {
550
720
  val payload = bundleOf(
@@ -576,28 +746,49 @@ class AudioStreamDecoder(
576
746
  private class LinearResampler(private val channels: Int) {
577
747
  private var lastSrcFrame: FloatArray? = null
578
748
  private var fractional: Double = 0.0
749
+ private var lastSourceRate: Int? = null
750
+ private var lastTargetRate: Int? = null
751
+ private var canFlushLastFrame = false
579
752
 
580
753
  fun process(input: FloatArray, sourceRate: Int, targetRate: Int): FloatArray {
581
754
  if (input.isEmpty() || channels <= 0) return FloatArray(0)
582
- if (sourceRate == targetRate) return input
755
+ val srcFrames = input.size / channels
756
+ if (srcFrames <= 0) return FloatArray(0)
757
+
758
+ if (lastSourceRate != null &&
759
+ (lastSourceRate != sourceRate || lastTargetRate != targetRate)
760
+ ) {
761
+ reset()
762
+ }
763
+ lastSourceRate = sourceRate
764
+ lastTargetRate = targetRate
765
+
766
+ if (sourceRate == targetRate) {
767
+ stashLastFrame(input, srcFrames)
768
+ fractional = 0.0
769
+ canFlushLastFrame = false
770
+ return input
771
+ }
583
772
 
584
773
  val ratio = sourceRate.toDouble() / targetRate.toDouble()
585
- val srcFrames = input.size / channels
586
774
  val previousFrame = lastSrcFrame
587
775
  val totalSrcFrames = srcFrames + if (previousFrame != null) 1 else 0
588
776
  if (totalSrcFrames < 2) {
589
777
  // Not enough to interpolate yet; stash and return empty.
590
- lastSrcFrame = FloatArray(channels).also { dst ->
591
- if (srcFrames >= 1) {
592
- System.arraycopy(input, (srcFrames - 1) * channels, dst, 0, channels)
593
- }
594
- }
778
+ stashLastFrame(input, srcFrames)
779
+ canFlushLastFrame = true
595
780
  return FloatArray(0)
596
781
  }
597
782
 
598
783
  // Build virtual stream: [previousFrame?, input...].
599
784
  val virtualLen = totalSrcFrames
600
- val out = ArrayList<Float>(((virtualLen * targetRate) / sourceRate + 1) * channels)
785
+ val outFrames = ceil(((virtualLen - 1) - fractional) / ratio)
786
+ .toInt()
787
+ .coerceAtLeast(0)
788
+ // Add one frame of slack for floating-point boundary rounding; trim
789
+ // below if the conservative capacity was not used.
790
+ val out = FloatArray((outFrames + 1) * channels)
791
+ var outIdx = 0
601
792
  var srcPos = fractional
602
793
  while (srcPos < virtualLen - 1) {
603
794
  val i = srcPos.toInt()
@@ -605,20 +796,41 @@ private class LinearResampler(private val channels: Int) {
605
796
  for (c in 0 until channels) {
606
797
  val a = sampleAt(i, c, previousFrame, input)
607
798
  val b = sampleAt(i + 1, c, previousFrame, input)
608
- out.add(a + (b - a) * frac)
799
+ out[outIdx++] = a + (b - a) * frac
609
800
  }
610
801
  srcPos += ratio
611
802
  }
612
803
  fractional = srcPos - (virtualLen - 1)
613
804
 
614
- // Stash last source frame for the next call.
805
+ // Stash last source frame for the next call. The interpolation loop
806
+ // intentionally stops before the final frame so that it can interpolate
807
+ // across the next decoder buffer; flush() emits this tail at EOS.
808
+ stashLastFrame(input, srcFrames)
809
+ canFlushLastFrame = true
810
+
811
+ return if (outIdx == out.size) out else out.copyOf(outIdx)
812
+ }
813
+
814
+ fun flush(): FloatArray {
815
+ val tail = if (canFlushLastFrame) {
816
+ lastSrcFrame?.copyOf() ?: FloatArray(0)
817
+ } else {
818
+ FloatArray(0)
819
+ }
820
+ reset()
821
+ return tail
822
+ }
823
+
824
+ private fun reset() {
825
+ lastSrcFrame = null
826
+ fractional = 0.0
827
+ canFlushLastFrame = false
828
+ }
829
+
830
+ private fun stashLastFrame(input: FloatArray, srcFrames: Int) {
615
831
  lastSrcFrame = FloatArray(channels).also { dst ->
616
832
  System.arraycopy(input, (srcFrames - 1) * channels, dst, 0, channels)
617
833
  }
618
-
619
- val arr = FloatArray(out.size)
620
- for (i in out.indices) arr[i] = out[i]
621
- return arr
622
834
  }
623
835
 
624
836
  private fun sampleAt(