@siteed/audio-studio 3.0.5 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +19 -1
  2. package/README.md +108 -41
  3. package/android/src/androidTest/java/net/siteed/audiostudio/AudioFinalMetadataContractInstrumentedTest.kt +190 -0
  4. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +29 -83
  5. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +17 -1
  6. package/android/src/androidTest/java/net/siteed/audiostudio/OpusRangeDecodeRegressionInstrumentedTest.kt +186 -0
  7. package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +473 -380
  8. package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +74 -22
  9. package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +86 -19
  10. package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +174 -212
  11. package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +6 -0
  12. package/android/src/test/java/net/siteed/audiostudio/AndroidCallStateTest.kt +37 -0
  13. package/android/src/test/java/net/siteed/audiostudio/AndroidEventEmitterTest.kt +28 -0
  14. package/android/src/test/java/net/siteed/audiostudio/InterruptionAutoResumePolicyTest.kt +49 -0
  15. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  16. package/build/cjs/AudioAnalysis/extractPreview.js +92 -15
  17. package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -1
  18. package/build/cjs/AudioAnalysis/extractPreviewBars.js +134 -0
  19. package/build/cjs/AudioAnalysis/extractPreviewBars.js.map +1 -0
  20. package/build/cjs/AudioStudio.types.js.map +1 -1
  21. package/build/cjs/errors/AudioExtractionError.js +127 -0
  22. package/build/cjs/errors/AudioExtractionError.js.map +1 -0
  23. package/build/cjs/index.js +6 -1
  24. package/build/cjs/index.js.map +1 -1
  25. package/build/cjs/useAudioRecorder.js +36 -18
  26. package/build/cjs/useAudioRecorder.js.map +1 -1
  27. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  28. package/build/esm/AudioAnalysis/extractPreview.js +92 -15
  29. package/build/esm/AudioAnalysis/extractPreview.js.map +1 -1
  30. package/build/esm/AudioAnalysis/extractPreviewBars.js +128 -0
  31. package/build/esm/AudioAnalysis/extractPreviewBars.js.map +1 -0
  32. package/build/esm/AudioStudio.types.js.map +1 -1
  33. package/build/esm/errors/AudioExtractionError.js +122 -0
  34. package/build/esm/errors/AudioExtractionError.js.map +1 -0
  35. package/build/esm/index.js +2 -0
  36. package/build/esm/index.js.map +1 -1
  37. package/build/esm/useAudioRecorder.js +36 -18
  38. package/build/esm/useAudioRecorder.js.map +1 -1
  39. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +79 -0
  40. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  41. package/build/types/AudioAnalysis/extractPreview.d.ts +2 -2
  42. package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -1
  43. package/build/types/AudioAnalysis/extractPreviewBars.d.ts +12 -0
  44. package/build/types/AudioAnalysis/extractPreviewBars.d.ts.map +1 -0
  45. package/build/types/AudioStudio.types.d.ts +14 -1
  46. package/build/types/AudioStudio.types.d.ts.map +1 -1
  47. package/build/types/errors/AudioExtractionError.d.ts +24 -0
  48. package/build/types/errors/AudioExtractionError.d.ts.map +1 -0
  49. package/build/types/index.d.ts +3 -0
  50. package/build/types/index.d.ts.map +1 -1
  51. package/build/types/useAudioRecorder.d.ts.map +1 -1
  52. package/ios/AudioProcessor.swift +99 -0
  53. package/ios/AudioStreamManager.swift +79 -15
  54. package/ios/AudioStudioModule.swift +63 -0
  55. package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +41 -1
  56. package/package.json +7 -7
  57. package/src/AudioAnalysis/AudioAnalysis.types.ts +82 -0
  58. package/src/AudioAnalysis/extractPreview.ts +118 -17
  59. package/src/AudioAnalysis/extractPreviewBars.ts +193 -0
  60. package/src/AudioStudio.types.ts +15 -1
  61. package/src/errors/AudioExtractionError.ts +167 -0
  62. package/src/index.ts +10 -0
  63. package/src/useAudioRecorder.tsx +36 -14
@@ -10,6 +10,7 @@ import kotlin.system.measureTimeMillis
10
10
  import android.media.MediaExtractor
11
11
  import android.media.MediaFormat
12
12
  import android.media.MediaCodec
13
+ import java.io.ByteArrayOutputStream
13
14
  import java.io.FileInputStream
14
15
  import java.io.RandomAccessFile
15
16
  import java.util.zip.CRC32
@@ -865,27 +866,15 @@ class AudioProcessor(private val filesDir: File) {
865
866
  LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
866
867
 
867
868
  // Process using MediaExtractor
868
- val pcmData = decodeAudioToPCM(extractor, format)
869
- val processedData = if (decodingConfig != null) {
870
- processAudio(
871
- pcmData,
872
- originalSampleRate,
873
- decodingConfig.targetSampleRate,
874
- originalChannels,
875
- decodingConfig.targetChannels,
876
- decodingConfig.normalizeAudio
877
- )
878
- } else {
879
- pcmData
880
- }
881
-
882
- return AudioData(
883
- data = processedData,
884
- sampleRate = decodingConfig?.targetSampleRate ?: originalSampleRate,
885
- bitDepth = decodingConfig?.targetBitDepth ?: 16,
886
- channels = decodingConfig?.targetChannels ?: originalChannels,
887
- durationMs = totalDurationMs // Pass through the duration
869
+ val sourceData = AudioData(
870
+ data = decodeAudioToPCM(extractor, format),
871
+ sampleRate = originalSampleRate,
872
+ bitDepth = 16,
873
+ channels = originalChannels,
874
+ durationMs = totalDurationMs
888
875
  )
876
+
877
+ return decodingConfig?.let { processAudioData(sourceData, it) } ?: sourceData
889
878
  }
890
879
  } catch (e: Exception) {
891
880
  LogUtils.d(CLASS_NAME, "MediaExtractor failed, attempting WAV parser: ${e.message}")
@@ -898,21 +887,7 @@ class AudioProcessor(private val filesDir: File) {
898
887
  LogUtils.d(CLASS_NAME, "Falling back to WAV parser")
899
888
  return loadAudioFile(file.absolutePath)?.let { wavData ->
900
889
  if (decodingConfig != null) {
901
- val processedData = processAudio(
902
- wavData.data,
903
- wavData.sampleRate,
904
- decodingConfig.targetSampleRate,
905
- wavData.channels,
906
- decodingConfig.targetChannels,
907
- decodingConfig.normalizeAudio
908
- )
909
- AudioData(
910
- data = processedData,
911
- sampleRate = decodingConfig.targetSampleRate ?: wavData.sampleRate,
912
- bitDepth = decodingConfig.targetBitDepth,
913
- channels = decodingConfig.targetChannels ?: wavData.channels,
914
- durationMs = wavData.durationMs // Pass through the duration
915
- )
890
+ processAudioData(wavData, decodingConfig)
916
891
  } else {
917
892
  wavData
918
893
  }
@@ -975,126 +950,145 @@ class AudioProcessor(private val filesDir: File) {
975
950
  }
976
951
  }
977
952
 
978
- private fun resampleAudio(
953
+ fun processAudio(
979
954
  pcmData: ByteArray,
980
955
  originalSampleRate: Int,
981
- targetSampleRate: Int,
982
- originalChannels: Int
956
+ targetSampleRate: Int?,
957
+ originalChannels: Int,
958
+ targetChannels: Int?,
959
+ normalize: Boolean
983
960
  ): ByteArray {
984
- // Convert byte array to short array (16-bit samples)
985
- val shortArray = ShortArray(pcmData.size / 2)
986
- ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shortArray)
987
-
988
- // Convert to mono if needed
989
- val monoShortArray = if (originalChannels > 1) {
990
- convertToMono(shortArray, originalChannels)
991
- } else {
992
- shortArray
993
- }
961
+ return processAudioData(
962
+ AudioData(
963
+ data = pcmData,
964
+ sampleRate = originalSampleRate,
965
+ bitDepth = 16,
966
+ channels = originalChannels
967
+ ),
968
+ DecodingConfig(
969
+ targetSampleRate = targetSampleRate,
970
+ targetChannels = targetChannels,
971
+ targetBitDepth = 16,
972
+ normalizeAudio = normalize
973
+ )
974
+ ).data
975
+ }
994
976
 
995
- // Resample
996
- val resampleRatio = targetSampleRate.toDouble() / originalSampleRate
997
- val newLength = (monoShortArray.size * resampleRatio).toInt()
998
- val resampledArray = ShortArray(newLength)
977
+ fun processAudioData(audioData: AudioData, config: DecodingConfig): AudioData {
978
+ val targetSampleRate = config.targetSampleRate ?: audioData.sampleRate
979
+ val targetChannels = config.targetChannels ?: audioData.channels
980
+ val targetBitDepth = config.targetBitDepth
981
+
982
+ require(targetSampleRate > 0) { "targetSampleRate must be positive" }
983
+ require(targetChannels in 1..2) { "targetChannels must be 1 or 2, got: $targetChannels" }
984
+ require(targetBitDepth in listOf(8, 16, 32)) { "Unsupported target bit depth: $targetBitDepth" }
985
+ require(audioData.channels > 0) { "source channels must be positive" }
986
+ require(audioData.sampleRate > 0) { "source sampleRate must be positive" }
987
+ require(audioData.bitDepth in listOf(8, 16, 32)) { "Unsupported source bit depth: ${audioData.bitDepth}" }
988
+
989
+ val sourceSamples = decodePcmToFloatSamples(audioData.data, audioData.channels, audioData.bitDepth)
990
+ val channelConverted = convertFloatChannels(sourceSamples, audioData.channels, targetChannels)
991
+ val resampled = resampleFloatFrames(channelConverted, targetChannels, audioData.sampleRate, targetSampleRate)
992
+ val normalized = if (config.normalizeAudio) normalizeFloatSamples(resampled) else resampled
993
+ val processedData = encodeFloatSamplesToPcm(normalized, targetBitDepth)
994
+
995
+ return AudioData(
996
+ data = processedData,
997
+ sampleRate = targetSampleRate,
998
+ bitDepth = targetBitDepth,
999
+ channels = targetChannels,
1000
+ durationMs = computePcmDurationMs(processedData, targetSampleRate, targetChannels, targetBitDepth)
1001
+ )
1002
+ }
999
1003
 
1000
- for (i in resampledArray.indices) {
1001
- val originalIndex = (i / resampleRatio).toInt()
1002
- val nextIndex = minOf(originalIndex + 1, monoShortArray.size - 1)
1003
- val fraction = (i / resampleRatio) - originalIndex
1004
+ private fun computePcmDurationMs(data: ByteArray, sampleRate: Int, channels: Int, bitDepth: Int): Long {
1005
+ val bytesPerFrame = (channels * (bitDepth / 8)).coerceAtLeast(1)
1006
+ val frames = data.size / bytesPerFrame
1007
+ return (frames * 1000L) / sampleRate.coerceAtLeast(1)
1008
+ }
1004
1009
 
1005
- // Linear interpolation
1006
- val sample = linearInterpolate(
1007
- monoShortArray[originalIndex].toDouble(),
1008
- monoShortArray[nextIndex].toDouble(),
1009
- fraction
1010
- ).toInt().toShort()
1010
+ private fun decodePcmToFloatSamples(data: ByteArray, channels: Int, bitDepth: Int): FloatArray {
1011
+ if (data.isEmpty()) return FloatArray(0)
1012
+ val bytesPerSample = bitDepth / 8
1013
+ val totalSamples = data.size / bytesPerSample
1014
+ val alignedSamples = totalSamples - (totalSamples % channels)
1015
+ val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN)
1016
+ val samples = FloatArray(alignedSamples)
1011
1017
 
1012
- resampledArray[i] = sample
1018
+ for (i in 0 until alignedSamples) {
1019
+ samples[i] = when (bitDepth) {
1020
+ 8 -> ((buffer.get().toInt() and 0xFF) - 128) / 128.0f
1021
+ 16 -> buffer.short / 32768.0f
1022
+ 32 -> buffer.int / 2147483648.0f
1023
+ else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
1024
+ }.coerceIn(-1.0f, 1.0f)
1013
1025
  }
1014
1026
 
1015
- // Convert back to byte array
1016
- val resultBuffer = ByteBuffer.allocate(resampledArray.size * 2)
1017
- resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
1018
- resultBuffer.asShortBuffer().put(resampledArray)
1019
- return resultBuffer.array()
1027
+ return samples
1020
1028
  }
1021
1029
 
1022
- private fun convertToMono(stereoData: ShortArray, channels: Int): ShortArray {
1023
- val monoLength = stereoData.size / channels
1024
- val monoData = ShortArray(monoLength)
1030
+ private fun convertFloatChannels(samples: FloatArray, fromChannels: Int, toChannels: Int): FloatArray {
1031
+ if (fromChannels == toChannels || samples.isEmpty()) return samples
1032
+ val frames = samples.size / fromChannels
1025
1033
 
1026
- for (i in 0 until monoLength) {
1027
- var sum = 0
1028
- for (ch in 0 until channels) {
1029
- sum += stereoData[i * channels + ch]
1034
+ return when {
1035
+ fromChannels == 1 && toChannels == 2 -> FloatArray(frames * 2) { index -> samples[index / 2] }
1036
+ fromChannels == 2 && toChannels == 1 -> FloatArray(frames) { frame ->
1037
+ ((samples[frame * 2] + samples[frame * 2 + 1]) / 2.0f).coerceIn(-1.0f, 1.0f)
1030
1038
  }
1031
- monoData[i] = (sum / channels).toShort()
1039
+ else -> throw IllegalArgumentException("Unsupported channel conversion: $fromChannels to $toChannels")
1032
1040
  }
1033
-
1034
- return monoData
1035
1041
  }
1036
1042
 
1037
- private fun linearInterpolate(a: Double, b: Double, fraction: Double): Double {
1038
- return a + fraction * (b - a)
1039
- }
1043
+ private fun resampleFloatFrames(samples: FloatArray, channels: Int, fromSampleRate: Int, toSampleRate: Int): FloatArray {
1044
+ if (fromSampleRate == toSampleRate || samples.isEmpty()) return samples
1040
1045
 
1041
- fun processAudio(
1042
- pcmData: ByteArray,
1043
- originalSampleRate: Int,
1044
- targetSampleRate: Int?,
1045
- originalChannels: Int,
1046
- targetChannels: Int?,
1047
- normalize: Boolean
1048
- ): ByteArray {
1049
- var processedData = pcmData
1046
+ val sourceFrames = samples.size / channels
1047
+ if (sourceFrames == 0) return FloatArray(0)
1050
1048
 
1051
- // Only resample if target sample rate is explicitly specified and different
1052
- if (targetSampleRate != null && originalSampleRate != targetSampleRate) {
1053
- processedData = resampleAudio(processedData, originalSampleRate, targetSampleRate, originalChannels)
1054
- }
1049
+ val ratio = toSampleRate.toDouble() / fromSampleRate
1050
+ // roundToInt() preserves duration best for fractional sample-rate ratios;
1051
+ // callers derive metadata from the actual output frame count below.
1052
+ val targetFrames = maxOf(1, (sourceFrames * ratio).roundToInt())
1053
+ val output = FloatArray(targetFrames * channels)
1055
1054
 
1056
- // Only convert channels if target channels is explicitly specified and different
1057
- if (targetChannels != null && originalChannels != targetChannels) {
1058
- processedData = convertChannels(processedData, originalChannels, targetChannels)
1059
- }
1055
+ for (frame in 0 until targetFrames) {
1056
+ val sourcePosition = frame / ratio
1057
+ val sourceFrame = floor(sourcePosition).toInt().coerceIn(0, sourceFrames - 1)
1058
+ val nextFrame = minOf(sourceFrame + 1, sourceFrames - 1)
1059
+ val fraction = (sourcePosition - sourceFrame).toFloat()
1060
1060
 
1061
- // Only normalize if explicitly requested
1062
- if (normalize) {
1063
- processedData = normalizeAudio(processedData)
1061
+ for (channel in 0 until channels) {
1062
+ val a = samples[sourceFrame * channels + channel]
1063
+ val b = samples[nextFrame * channels + channel]
1064
+ output[frame * channels + channel] = (a + ((b - a) * fraction)).coerceIn(-1.0f, 1.0f)
1065
+ }
1064
1066
  }
1065
1067
 
1066
- return processedData
1068
+ return output
1067
1069
  }
1068
1070
 
1069
- private fun normalizeAudio(pcmData: ByteArray): ByteArray {
1070
- val shorts = ShortArray(pcmData.size / 2)
1071
- ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1072
-
1073
- // Find maximum amplitude
1074
- var maxAmplitude = 0
1075
- for (sample in shorts) {
1076
- maxAmplitude = maxOf(maxAmplitude, abs(sample.toInt()))
1077
- }
1071
+ private fun normalizeFloatSamples(samples: FloatArray): FloatArray {
1072
+ val maxAmplitude = samples.maxOfOrNull { abs(it) } ?: 0.0f
1073
+ if (maxAmplitude <= 0.0f) return samples
1074
+ return FloatArray(samples.size) { index -> (samples[index] / maxAmplitude).coerceIn(-1.0f, 1.0f) }
1075
+ }
1078
1076
 
1079
- // Normalize if we found a non-zero maximum
1080
- if (maxAmplitude > 0) {
1081
- val normalizationFactor = Short.MAX_VALUE.toFloat() / maxAmplitude
1082
- for (i in shorts.indices) {
1083
- shorts[i] = (shorts[i] * normalizationFactor).toInt().toShort()
1077
+ private fun encodeFloatSamplesToPcm(samples: FloatArray, bitDepth: Int): ByteArray {
1078
+ val bytesPerSample = bitDepth / 8
1079
+ val output = ByteBuffer.allocate(samples.size * bytesPerSample).order(ByteOrder.LITTLE_ENDIAN)
1080
+
1081
+ for (sample in samples) {
1082
+ val clamped = sample.coerceIn(-1.0f, 1.0f)
1083
+ when (bitDepth) {
1084
+ 8 -> output.put(((clamped * 127.0f) + 128.0f).roundToInt().coerceIn(0, 255).toByte())
1085
+ 16 -> output.putShort((clamped * 32767.0f).roundToInt().coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort())
1086
+ 32 -> output.putInt((clamped * Int.MAX_VALUE.toFloat()).roundToInt())
1087
+ else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
1084
1088
  }
1085
1089
  }
1086
1090
 
1087
- // Convert back to bytes
1088
- val resultBuffer = ByteBuffer.allocate(shorts.size * 2)
1089
- resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
1090
- resultBuffer.asShortBuffer().put(shorts)
1091
- return resultBuffer.array()
1092
- }
1093
-
1094
- private fun convertChannels(pcmData: ByteArray, originalChannels: Int, targetChannels: Int): ByteArray {
1095
- // Use the correct implementation from AudioFormatUtils
1096
- // Assuming 16-bit audio (which is the default for most audio processing)
1097
- return AudioFormatUtils.convertChannels(pcmData, originalChannels, targetChannels, 16)
1091
+ return output.array()
1098
1092
  }
1099
1093
 
1100
1094
  private fun debugWavHeader(file: File) {
@@ -1126,6 +1120,126 @@ class AudioProcessor(private val filesDir: File) {
1126
1120
  }
1127
1121
  }
1128
1122
 
1123
+ private data class PreviewBarFrameRange(
1124
+ val durationMs: Long,
1125
+ val startFrameIndex: Int,
1126
+ val endFrameIndex: Int,
1127
+ val framesInRange: Int,
1128
+ val bytesPerFrame: Int
1129
+ )
1130
+
1131
+ private fun validatePreviewBarRange(startTimeMs: Long?, endTimeMs: Long?, totalDurationMs: Long) {
1132
+ startTimeMs?.let { start ->
1133
+ require(start >= 0) { "startTimeMs must be non-negative, got: $start" }
1134
+ require(start <= totalDurationMs) { "startTimeMs ($start) is beyond audio duration ($totalDurationMs)" }
1135
+ }
1136
+ endTimeMs?.let { end ->
1137
+ require(end >= 0) { "endTimeMs must be non-negative, got: $end" }
1138
+ startTimeMs?.let { start ->
1139
+ require(start < end) { "startTimeMs ($start) must be less than endTimeMs ($end)" }
1140
+ }
1141
+ }
1142
+ }
1143
+
1144
+ private fun computePreviewBarFrameRange(
1145
+ audioData: AudioData,
1146
+ startTimeMs: Long?,
1147
+ endTimeMs: Long?
1148
+ ): PreviewBarFrameRange {
1149
+ val effectiveStartMs = startTimeMs ?: 0L
1150
+ val effectiveEndMs = (endTimeMs ?: audioData.durationMs).coerceAtMost(audioData.durationMs)
1151
+ val durationMs = (effectiveEndMs - effectiveStartMs).coerceAtLeast(1L)
1152
+ val bytesPerSample = (audioData.bitDepth / 8).coerceAtLeast(1)
1153
+ val bytesPerFrame = (bytesPerSample * audioData.channels).coerceAtLeast(1)
1154
+ val totalFrames = audioData.data.size / bytesPerFrame
1155
+ val startFrameIndex = ((effectiveStartMs * audioData.sampleRate) / 1000)
1156
+ .toInt()
1157
+ .coerceIn(0, totalFrames)
1158
+ val endFrameIndex = ((effectiveEndMs * audioData.sampleRate) / 1000)
1159
+ .toInt()
1160
+ .coerceIn(startFrameIndex, totalFrames)
1161
+ val framesInRange = endFrameIndex - startFrameIndex
1162
+ require(framesInRange > 0) { "Invalid sample range: contains no samples" }
1163
+
1164
+ return PreviewBarFrameRange(
1165
+ durationMs = durationMs,
1166
+ startFrameIndex = startFrameIndex,
1167
+ endFrameIndex = endFrameIndex,
1168
+ framesInRange = framesInRange,
1169
+ bytesPerFrame = bytesPerFrame
1170
+ )
1171
+ }
1172
+
1173
+ fun generatePreviewBars(
1174
+ audioData: AudioData,
1175
+ numberOfBars: Int,
1176
+ startTimeMs: Long? = null,
1177
+ endTimeMs: Long? = null,
1178
+ silenceRmsThreshold: Float = 0.01f
1179
+ ): Map<String, Any> {
1180
+ validatePreviewBarRange(startTimeMs, endTimeMs, audioData.durationMs)
1181
+ val requestedBars = numberOfBars.coerceAtLeast(1)
1182
+ val frameRange = computePreviewBarFrameRange(audioData, startTimeMs, endTimeMs)
1183
+ val framesPerBar = (frameRange.framesInRange / requestedBars).coerceAtLeast(1)
1184
+ val bars = mutableListOf<Map<String, Any>>()
1185
+ var minAmplitude = Float.MAX_VALUE
1186
+ var maxAmplitude = Float.NEGATIVE_INFINITY
1187
+ var minRms = Float.MAX_VALUE
1188
+ var maxRms = Float.NEGATIVE_INFINITY
1189
+
1190
+ val extractionTimeMs = measureTimeMillis {
1191
+ for (i in 0 until requestedBars) {
1192
+ val barStartFrame = frameRange.startFrameIndex + (i * framesPerBar)
1193
+ val barEndFrame = minOf(frameRange.startFrameIndex + ((i + 1) * framesPerBar), frameRange.endFrameIndex)
1194
+ if (barStartFrame >= barEndFrame) break
1195
+
1196
+ val barStartByte = barStartFrame * frameRange.bytesPerFrame
1197
+ val barEndByte = minOf(barEndFrame * frameRange.bytesPerFrame, audioData.data.size)
1198
+ val segmentBytes = audioData.data.sliceArray(barStartByte until barEndByte)
1199
+ val segmentData = when (audioData.bitDepth) {
1200
+ 16 -> convert16BitPcmToFloat(segmentBytes)
1201
+ 32 -> convert32BitPcmToFloat(segmentBytes)
1202
+ else -> convert8BitPcmToFloat(segmentBytes)
1203
+ }
1204
+ if (segmentData.isEmpty()) continue
1205
+
1206
+ val rms = sqrt(segmentData.map { it * it }.average().toFloat())
1207
+ val amplitude = segmentData.maxOf { abs(it) }
1208
+ minAmplitude = minOf(minAmplitude, amplitude)
1209
+ maxAmplitude = maxOf(maxAmplitude, amplitude)
1210
+ minRms = minOf(minRms, rms)
1211
+ maxRms = maxOf(maxRms, rms)
1212
+
1213
+ val startBarTimeMs = ((barStartFrame - frameRange.startFrameIndex).toDouble() / frameRange.framesInRange.toDouble() * frameRange.durationMs).toLong()
1214
+ val endBarTimeMs = ((barEndFrame - frameRange.startFrameIndex).toDouble() / frameRange.framesInRange.toDouble() * frameRange.durationMs).toLong()
1215
+ bars.add(mapOf(
1216
+ "id" to i,
1217
+ "amplitude" to amplitude.coerceIn(0f, 1f),
1218
+ "rms" to rms.coerceIn(0f, 1f),
1219
+ "silent" to (rms < silenceRmsThreshold),
1220
+ "startTimeMs" to startBarTimeMs,
1221
+ "endTimeMs" to endBarTimeMs.coerceAtLeast(startBarTimeMs)
1222
+ ))
1223
+ }
1224
+ }
1225
+
1226
+ check(bars.isNotEmpty()) { "No preview bars were generated" }
1227
+
1228
+ return mapOf(
1229
+ "bars" to bars,
1230
+ "durationMs" to frameRange.durationMs.toInt(),
1231
+ "sampleRate" to audioData.sampleRate,
1232
+ "numberOfChannels" to audioData.channels,
1233
+ "bitDepth" to audioData.bitDepth,
1234
+ "samples" to frameRange.framesInRange,
1235
+ "requestedNumberOfBars" to requestedBars,
1236
+ "barDurationMs" to (frameRange.durationMs.toDouble() / bars.size.toDouble()),
1237
+ "amplitudeRange" to mapOf("min" to minAmplitude, "max" to maxAmplitude),
1238
+ "rmsRange" to mapOf("min" to minRms, "max" to maxRms),
1239
+ "extractionTimeMs" to extractionTimeMs.toFloat()
1240
+ )
1241
+ }
1242
+
1129
1243
  fun generatePreview(
1130
1244
  audioData: AudioData,
1131
1245
  numberOfPoints: Int,
@@ -1310,49 +1424,48 @@ class AudioProcessor(private val filesDir: File) {
1310
1424
  ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
1311
1425
  ?: throw IllegalArgumentException("File not found: $fileUri")
1312
1426
 
1313
- // Use existing method to get audio format
1314
1427
  val format = getAudioFormat(fileUri) ?: throw IllegalArgumentException("Could not determine audio format")
1315
-
1316
- val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
1317
- val startByteOffset = ((startTimeMs * bytesPerSecond) / 1000).toInt()
1318
- val endByteOffset = ((endTimeMs * bytesPerSecond) / 1000).toInt()
1319
-
1320
- val startByte = headerSize + startByteOffset
1321
- val endByte = headerSize + endByteOffset
1428
+ val bytesPerFrame = format.channels * (format.bitDepth / 8)
1429
+ val totalFrames = ((file.length() - headerSize).coerceAtLeast(0L) / bytesPerFrame).toInt()
1430
+ val startFrame = ((startTimeMs * format.sampleRate) / 1000).toInt().coerceIn(0, totalFrames)
1431
+ val endFrame = ((endTimeMs * format.sampleRate) / 1000).toInt().coerceIn(startFrame, totalFrames)
1432
+ val startByte = headerSize + (startFrame * bytesPerFrame)
1433
+ val bytesToRead = (endFrame - startFrame) * bytesPerFrame
1322
1434
 
1323
1435
  LogUtils.d(CLASS_NAME, """
1324
1436
  Loading WAV range:
1325
1437
  - headerSize: $headerSize
1438
+ - startFrame: $startFrame
1439
+ - endFrame: $endFrame
1326
1440
  - startByte: $startByte
1327
- - endByte: $endByte
1328
- - bytesPerSecond: $bytesPerSecond
1441
+ - bytesToRead: $bytesToRead
1329
1442
  """.trimIndent())
1330
1443
 
1331
- var audioDataBytes = ByteArray((endByte - startByte).coerceAtLeast(0))
1444
+ val audioDataBytes = ByteArray(bytesToRead.coerceAtLeast(0))
1332
1445
  FileInputStream(file).use { fis ->
1333
- fis.skip(startByte.toLong())
1334
- fis.read(audioDataBytes)
1335
- }
1336
-
1337
- // Apply bit depth conversion if needed
1338
- var effectiveBitDepth = format.bitDepth
1339
- if (config.targetBitDepth != format.bitDepth) {
1340
- audioDataBytes = AudioFormatUtils.convertBitDepth(
1341
- audioDataBytes,
1342
- format.bitDepth,
1343
- config.targetBitDepth
1344
- )
1345
- effectiveBitDepth = config.targetBitDepth
1346
- LogUtils.d(CLASS_NAME, "Converted bit depth from ${format.bitDepth} to ${config.targetBitDepth}")
1446
+ var skipped = 0L
1447
+ while (skipped < startByte) {
1448
+ val delta = fis.skip(startByte - skipped)
1449
+ if (delta <= 0) break
1450
+ skipped += delta
1451
+ }
1452
+ var offset = 0
1453
+ while (offset < audioDataBytes.size) {
1454
+ val read = fis.read(audioDataBytes, offset, audioDataBytes.size - offset)
1455
+ if (read <= 0) break
1456
+ offset += read
1457
+ }
1347
1458
  }
1348
1459
 
1349
- return AudioData(
1460
+ val sourceData = AudioData(
1350
1461
  data = audioDataBytes,
1351
1462
  sampleRate = format.sampleRate,
1352
1463
  channels = format.channels,
1353
- bitDepth = effectiveBitDepth,
1354
- durationMs = endTimeMs - startTimeMs
1464
+ bitDepth = format.bitDepth,
1465
+ durationMs = computePcmDurationMs(audioDataBytes, format.sampleRate, format.channels, format.bitDepth)
1355
1466
  )
1467
+
1468
+ return processAudioData(sourceData, config)
1356
1469
  } catch (e: Exception) {
1357
1470
  LogUtils.e(CLASS_NAME, "Failed to load WAV range: ${e.message}", e)
1358
1471
  return null
@@ -1365,127 +1478,13 @@ class AudioProcessor(private val filesDir: File) {
1365
1478
  endTimeMs: Long,
1366
1479
  config: DecodingConfig
1367
1480
  ): AudioData? {
1368
- val extractor = MediaExtractor()
1369
- var decoder: MediaCodec? = null
1370
-
1371
- try {
1372
- extractor.setDataSource(fileUri.removePrefix("file://"))
1373
- val format = extractor.getTrackFormat(0)
1374
- extractor.selectTrack(0)
1375
-
1376
- val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
1377
- val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
1378
- val totalDurationUs = try {
1379
- format.getLong(MediaFormat.KEY_DURATION)
1380
- } catch (e: Exception) {
1381
- (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
1382
- }
1383
- LogUtils.d(CLASS_NAME, "Raw duration from format: ${totalDurationUs}us")
1384
-
1385
- val totalDurationMs = totalDurationUs / 1000
1386
- LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
1387
-
1388
- // Calculate valid time range
1389
- val validStartMs = startTimeMs.coerceIn(0, totalDurationMs) ?: 0
1390
- val validEndMs = endTimeMs.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
1391
- val effectiveDurationMs = validEndMs - validStartMs
1392
-
1393
- // Initialize decoder
1394
- decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
1395
- decoder.configure(format, null, null, 0)
1396
- decoder.start()
1397
-
1398
- // Seek to start position if needed
1399
- if (validStartMs > 0) {
1400
- extractor.seekTo(validStartMs * 1000, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
1401
- }
1402
-
1403
- // Calculate buffer sizes
1404
- val targetSampleRate = config.targetSampleRate ?: originalSampleRate
1405
- val targetChannels = config.targetChannels ?: originalChannels
1406
- val targetBitDepth = config.targetBitDepth ?: 16
1407
- val bytesPerSample = targetBitDepth / 8
1408
- val samplesPerSecond = targetSampleRate * targetChannels
1409
- val totalBytes = (effectiveDurationMs * samplesPerSecond * bytesPerSample) / 1000
1410
-
1411
- LogUtils.d(CLASS_NAME, """
1412
- Loading audio range:
1413
- - start: ${validStartMs}ms
1414
- - end: ${validEndMs}ms
1415
- - duration: ${effectiveDurationMs}ms
1416
- - bytes: $totalBytes
1417
- - format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
1418
- """.trimIndent())
1419
-
1420
- val outputBuffer = ByteBuffer.allocate(totalBytes.toInt())
1421
- val bufferInfo = MediaCodec.BufferInfo()
1422
- var isEOS = false
1423
-
1424
- while (!isEOS) {
1425
- // Handle input
1426
- val inputBufferId = decoder.dequeueInputBuffer(10000)
1427
- if (inputBufferId >= 0) {
1428
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
1429
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
1430
-
1431
- when {
1432
- sampleSize < 0 -> {
1433
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1434
- isEOS = true
1435
- }
1436
- extractor.sampleTime > validEndMs * 1000 -> {
1437
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1438
- isEOS = true
1439
- }
1440
- else -> {
1441
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
1442
- extractor.advance()
1443
- }
1444
- }
1445
- }
1446
-
1447
- // Handle output
1448
- val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
1449
- if (outputBufferId >= 0) {
1450
- val decodedBuffer = decoder.getOutputBuffer(outputBufferId)!!
1451
- if (bufferInfo.size > 0) {
1452
- // Set buffer position and limit based on the decoded data
1453
- decodedBuffer.position(bufferInfo.offset)
1454
- decodedBuffer.limit(bufferInfo.offset + bufferInfo.size)
1455
-
1456
- // Copy decoded data to our output buffer
1457
- outputBuffer.put(decodedBuffer)
1458
- }
1459
- decoder.releaseOutputBuffer(outputBufferId, false)
1460
-
1461
- // Check if we've reached the end
1462
- if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
1463
- isEOS = true
1464
- }
1465
- }
1466
- }
1467
-
1468
- // Prepare the final byte array
1469
- outputBuffer.flip()
1470
- val audioData = ByteArray(outputBuffer.remaining())
1471
- outputBuffer.get(audioData)
1472
-
1473
- return AudioData(
1474
- data = audioData,
1475
- sampleRate = targetSampleRate,
1476
- channels = targetChannels,
1477
- bitDepth = targetBitDepth,
1478
- durationMs = endTimeMs - startTimeMs // Use the actual time range
1479
- ).also {
1480
- LogUtils.d(CLASS_NAME, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
1481
- }
1481
+ return try {
1482
+ val sourceData = decodeAudioRangeToPCM(fileUri, startTimeMs, endTimeMs)
1483
+ ?: throw IllegalStateException("Failed to decode compressed audio range")
1484
+ processAudioData(sourceData, config)
1482
1485
  } catch (e: Exception) {
1483
1486
  LogUtils.e(CLASS_NAME, "Failed to load compressed audio range: ${e.message}", e)
1484
- return null
1485
- } finally {
1486
- decoder?.stop()
1487
- decoder?.release()
1488
- extractor.release()
1487
+ null
1489
1488
  }
1490
1489
  }
1491
1490
 
@@ -1520,42 +1519,28 @@ class AudioProcessor(private val filesDir: File) {
1520
1519
  - output: ${outputFile.name}
1521
1520
  """.trimIndent())
1522
1521
 
1523
- // Write WAV header
1524
- RandomAccessFile(outputFile, "rw").use { raf ->
1525
- // RIFF header
1526
- raf.write("RIFF".toByteArray())
1527
- val fileSize = audioData.data.size + 36 // File size minus RIFF header
1528
- raf.writeInt(fileSize)
1529
- raf.write("WAVE".toByteArray())
1530
-
1531
- // fmt chunk
1532
- raf.write("fmt ".toByteArray())
1533
- raf.writeInt(16) // Subchunk1Size (16 for PCM)
1534
- val formatBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1535
- formatBytes.putShort(1) // AudioFormat (1 for PCM)
1536
- raf.write(formatBytes.array())
1537
-
1538
- val channelsBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1539
- channelsBytes.putShort(audioData.channels.toShort())
1540
- raf.write(channelsBytes.array())
1541
-
1542
- val sampleRateBytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
1543
- sampleRateBytes.putInt(audioData.sampleRate)
1544
- raf.write(sampleRateBytes.array())
1545
-
1546
- val byteRate = audioData.sampleRate * audioData.channels * (audioData.bitDepth / 8)
1547
- raf.writeInt(byteRate) // ByteRate
1548
-
1549
- val blockAlign = audioData.channels * (audioData.bitDepth / 8)
1550
- raf.writeShort(blockAlign) // BlockAlign
1551
- raf.writeShort(audioData.bitDepth) // BitsPerSample
1522
+ val bytesPerSample = audioData.bitDepth / 8
1523
+ val byteRate = audioData.sampleRate * audioData.channels * bytesPerSample
1524
+ val blockAlign = audioData.channels * bytesPerSample
1525
+ val wavHeader = ByteBuffer.allocate(44).order(ByteOrder.LITTLE_ENDIAN).apply {
1526
+ put("RIFF".toByteArray())
1527
+ putInt(audioData.data.size + 36) // File size minus RIFF header
1528
+ put("WAVE".toByteArray())
1529
+ put("fmt ".toByteArray())
1530
+ putInt(16) // PCM fmt chunk size
1531
+ putShort(1) // PCM format
1532
+ putShort(audioData.channels.toShort())
1533
+ putInt(audioData.sampleRate)
1534
+ putInt(byteRate)
1535
+ putShort(blockAlign.toShort())
1536
+ putShort(audioData.bitDepth.toShort())
1537
+ put("data".toByteArray())
1538
+ putInt(audioData.data.size)
1539
+ }
1552
1540
 
1553
- // data chunk
1554
- raf.write("data".toByteArray())
1555
- raf.writeInt(audioData.data.size) // Subchunk2Size
1556
-
1557
- // Write audio data
1558
- raf.write(audioData.data)
1541
+ outputFile.outputStream().use { output ->
1542
+ output.write(wavHeader.array())
1543
+ output.write(audioData.data)
1559
1544
  }
1560
1545
 
1561
1546
  // Debug WAV header to verify
@@ -2020,7 +2005,10 @@ class AudioProcessor(private val filesDir: File) {
2020
2005
  val extractor = MediaExtractor()
2021
2006
  try {
2022
2007
  extractor.setDataSource(file.absolutePath)
2023
- val format = extractor.getTrackFormat(0)
2008
+ val trackIndex = (0 until extractor.trackCount).find {
2009
+ extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
2010
+ } ?: return null
2011
+ val format = extractor.getTrackFormat(trackIndex)
2024
2012
  return AudioFormat(
2025
2013
  sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE),
2026
2014
  channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
@@ -2125,96 +2113,136 @@ class AudioProcessor(private val filesDir: File) {
2125
2113
  }
2126
2114
 
2127
2115
  /**
2128
- * Decodes a specific time range of an audio file directly to PCM data
2129
- * This is more efficient than decoding the entire file when only a portion is needed
2116
+ * Decodes a specific time range of a compressed audio file directly to PCM data.
2117
+ * The returned data is source-rate/channel PCM clipped on frame boundaries; callers
2118
+ * must pass it through processAudioData(...) to apply target format options.
2130
2119
  */
2131
2120
  fun decodeAudioRangeToPCM(fileUri: String, startTimeMs: Long, endTimeMs: Long): AudioData? {
2121
+ require(endTimeMs >= startTimeMs) { "endTimeMs must be >= startTimeMs" }
2122
+
2123
+ val cleanUri = fileUri.removePrefix("file://")
2124
+ val file = File(cleanUri).takeIf { it.exists() }
2125
+ ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
2126
+ ?: run {
2127
+ LogUtils.e(CLASS_NAME, "File not found: $cleanUri")
2128
+ return null
2129
+ }
2130
+
2132
2131
  val extractor = MediaExtractor()
2133
- var decoder: android.media.MediaCodec? = null
2134
-
2132
+ var decoder: MediaCodec? = null
2133
+
2135
2134
  try {
2136
- extractor.setDataSource(fileUri)
2137
- val trackIndex = (0 until extractor.trackCount).find {
2138
- extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
2135
+ extractor.setDataSource(file.absolutePath)
2136
+ val trackIndex = (0 until extractor.trackCount).find {
2137
+ extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
2139
2138
  } ?: return null
2140
-
2139
+
2141
2140
  extractor.selectTrack(trackIndex)
2142
- val format = extractor.getTrackFormat(trackIndex)
2143
-
2144
- val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
2145
- val channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
2146
- decoder = android.media.MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
2147
- decoder.configure(format, null, null, 0)
2141
+ val inputFormat = extractor.getTrackFormat(trackIndex)
2142
+ var sampleRate = inputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
2143
+ var channels = inputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
2144
+ var outputEncoding = getPcmEncoding(inputFormat)
2145
+ var bitDepth = getPcmStorageBitDepth(outputEncoding)
2146
+ val mimeType = inputFormat.getString(MediaFormat.KEY_MIME)
2147
+ ?: throw IllegalArgumentException("Audio track is missing MIME type")
2148
+
2149
+ decoder = MediaCodec.createDecoderByType(mimeType)
2150
+ decoder.configure(inputFormat, null, null, 0)
2148
2151
  decoder.start()
2149
2152
 
2150
2153
  extractor.seekTo(startTimeMs * 1000, MediaExtractor.SEEK_TO_PREVIOUS_SYNC)
2151
- val pcmData = mutableListOf<Byte>()
2152
- val bufferInfo = android.media.MediaCodec.BufferInfo()
2153
- var isEOS = false
2154
- var firstBufferTimeUs: Long? = null
2155
2154
 
2156
- while (!isEOS) {
2157
- val inputBufferId = decoder.dequeueInputBuffer(10000)
2158
- if (inputBufferId >= 0) {
2159
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
2160
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
2161
- if (sampleSize < 0 || extractor.sampleTime > endTimeMs * 1000) {
2162
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, android.media.MediaCodec.BUFFER_FLAG_END_OF_STREAM)
2163
- isEOS = true
2164
- } else {
2165
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
2166
- extractor.advance()
2155
+ val pcmOutput = ByteArrayOutputStream()
2156
+ val bufferInfo = MediaCodec.BufferInfo()
2157
+ var inputDone = false
2158
+ var outputDone = false
2159
+ var firstBufferTimeUs: Long? = null
2160
+ val requestedDurationMs = (endTimeMs - startTimeMs).coerceAtLeast(1L)
2161
+
2162
+ while (!outputDone) {
2163
+ if (!inputDone) {
2164
+ val inputBufferId = decoder.dequeueInputBuffer(10_000)
2165
+ if (inputBufferId >= 0) {
2166
+ val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
2167
+ inputBuffer.clear()
2168
+ val sampleSize = extractor.readSampleData(inputBuffer, 0)
2169
+ val sampleTime = extractor.sampleTime
2170
+
2171
+ if (sampleSize < 0 || sampleTime < 0 || sampleTime > endTimeMs * 1000) {
2172
+ decoder.queueInputBuffer(
2173
+ inputBufferId,
2174
+ 0,
2175
+ 0,
2176
+ 0,
2177
+ MediaCodec.BUFFER_FLAG_END_OF_STREAM
2178
+ )
2179
+ inputDone = true
2180
+ } else {
2181
+ decoder.queueInputBuffer(inputBufferId, 0, sampleSize, sampleTime, 0)
2182
+ extractor.advance()
2183
+ }
2167
2184
  }
2168
2185
  }
2169
2186
 
2170
- val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
2171
- if (outputBufferId >= 0) {
2172
- val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
2173
- if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
2174
- val chunk = ByteArray(bufferInfo.size)
2175
- outputBuffer.get(chunk)
2176
- pcmData.addAll(chunk.toList())
2177
- decoder.releaseOutputBuffer(outputBufferId, false)
2178
- }
2179
- }
2187
+ when (val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10_000)) {
2188
+ MediaCodec.INFO_TRY_AGAIN_LATER -> Unit
2189
+ MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> {
2190
+ // Decoder format changes are expected before the first decoded output.
2191
+ // If a decoder ever changes format mid-stream, metadata follows the
2192
+ // latest format while the byte guard still prevents runaway output.
2193
+ decoder.outputFormat?.let { outputFormat ->
2194
+ sampleRate = outputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
2195
+ channels = outputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
2196
+ outputEncoding = getPcmEncoding(outputFormat)
2197
+ bitDepth = getPcmStorageBitDepth(outputEncoding)
2198
+ LogUtils.d(CLASS_NAME, "Decoder output format changed: ${sampleRate}Hz, $channels channels, $bitDepth-bit")
2199
+ }
2200
+ }
2201
+ else -> if (outputBufferId >= 0) {
2202
+ val outputBuffer = decoder.getOutputBuffer(outputBufferId)
2203
+ if (outputBuffer != null && bufferInfo.size > 0) {
2204
+ if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
2205
+ outputBuffer.position(bufferInfo.offset)
2206
+ outputBuffer.limit(bufferInfo.offset + bufferInfo.size)
2207
+ writeDecodedPcmChunk(outputBuffer, bufferInfo.size, outputEncoding, pcmOutput)
2208
+
2209
+ enforceDecodedRangeGuard(
2210
+ decodedBytes = pcmOutput.size(),
2211
+ requestedDurationMs = requestedDurationMs,
2212
+ sampleRate = sampleRate,
2213
+ channels = channels,
2214
+ bitDepth = bitDepth
2215
+ )
2216
+ }
2180
2217
 
2181
- // If we didn't get any data or first buffer time, return null
2182
- if (pcmData.isEmpty() || firstBufferTimeUs == null) {
2183
- return null
2218
+ outputDone = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0
2219
+ decoder.releaseOutputBuffer(outputBufferId, false)
2220
+ }
2221
+ }
2184
2222
  }
2185
2223
 
2186
- // Trim PCM data to exact time range
2187
- val bytesPerSample = 2 // 16-bit PCM
2188
- val bytesPerFrame = bytesPerSample * channels
2189
- val samplesPerSecond = sampleRate * channels
2190
- val dt = 1_000_000.0 / sampleRate // Time per sample in microseconds
2191
-
2192
- val allSamples = java.nio.ByteBuffer.wrap(pcmData.toByteArray()).order(java.nio.ByteOrder.LITTLE_ENDIAN).asShortBuffer()
2193
- val totalSamples = allSamples.capacity()
2194
-
2195
- // Calculate sample indices for the exact time range
2196
- val startSample = ((startTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(0, totalSamples)
2197
- val endSample = ((endTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(startSample, totalSamples)
2198
-
2199
- // Create a new ShortBuffer view starting at the correct position
2200
- allSamples.position(startSample)
2201
- val trimmedSamples = ShortArray(endSample - startSample)
2202
- for (i in trimmedSamples.indices) {
2203
- trimmedSamples[i] = allSamples.get()
2204
- }
2205
-
2206
- // Convert ShortArray to ByteArray
2207
- val trimmedBytes = ByteArray(trimmedSamples.size * 2)
2208
- val byteBuffer = java.nio.ByteBuffer.wrap(trimmedBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
2209
- val shortBuffer = byteBuffer.asShortBuffer()
2210
- shortBuffer.put(trimmedSamples)
2224
+ val decodedBytes = pcmOutput.toByteArray()
2225
+ val firstTimeUs = firstBufferTimeUs ?: return null
2226
+ val bytesPerFrame = channels * (bitDepth / 8)
2227
+ val totalFrames = decodedBytes.size / bytesPerFrame
2228
+ if (totalFrames <= 0) return null
2229
+
2230
+ val startFrame = (((startTimeMs * 1000L) - firstTimeUs).toDouble() * sampleRate / 1_000_000.0)
2231
+ .floorToInt()
2232
+ .coerceIn(0, totalFrames)
2233
+ val endFrame = (((endTimeMs * 1000L) - firstTimeUs).toDouble() * sampleRate / 1_000_000.0)
2234
+ .ceilToInt()
2235
+ .coerceIn(startFrame, totalFrames)
2236
+ val startByte = startFrame * bytesPerFrame
2237
+ val endByte = endFrame * bytesPerFrame
2238
+ val trimmedBytes = decodedBytes.copyOfRange(startByte, endByte)
2211
2239
 
2212
2240
  return AudioData(
2213
2241
  data = trimmedBytes,
2214
2242
  sampleRate = sampleRate,
2215
2243
  channels = channels,
2216
- bitDepth = 16, // MediaCodec typically decodes to 16-bit PCM
2217
- durationMs = endTimeMs - startTimeMs
2244
+ bitDepth = bitDepth,
2245
+ durationMs = computePcmDurationMs(trimmedBytes, sampleRate, channels, bitDepth)
2218
2246
  )
2219
2247
  } catch (e: Exception) {
2220
2248
  LogUtils.e(CLASS_NAME, "Failed to decode audio range: ${e.message}", e)
@@ -2222,11 +2250,14 @@ class AudioProcessor(private val filesDir: File) {
2222
2250
  } finally {
2223
2251
  try {
2224
2252
  decoder?.stop()
2253
+ } catch (e: Exception) {
2254
+ LogUtils.w(CLASS_NAME, "Error stopping decoder: ${e.message}")
2255
+ }
2256
+ try {
2225
2257
  decoder?.release()
2226
2258
  } catch (e: Exception) {
2227
2259
  LogUtils.w(CLASS_NAME, "Error releasing decoder: ${e.message}")
2228
2260
  }
2229
-
2230
2261
  try {
2231
2262
  extractor.release()
2232
2263
  } catch (e: Exception) {
@@ -2234,4 +2265,66 @@ class AudioProcessor(private val filesDir: File) {
2234
2265
  }
2235
2266
  }
2236
2267
  }
2268
+
2269
+ private fun getPcmEncoding(format: MediaFormat): Int {
2270
+ return if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.N &&
2271
+ format.containsKey(MediaFormat.KEY_PCM_ENCODING)
2272
+ ) {
2273
+ format.getInteger(MediaFormat.KEY_PCM_ENCODING)
2274
+ } else {
2275
+ android.media.AudioFormat.ENCODING_PCM_16BIT
2276
+ }
2277
+ }
2278
+
2279
+ private fun getPcmStorageBitDepth(encoding: Int): Int {
2280
+ return when (encoding) {
2281
+ android.media.AudioFormat.ENCODING_PCM_8BIT -> 8
2282
+ android.media.AudioFormat.ENCODING_PCM_16BIT -> 16
2283
+ // PCM_FLOAT decoder buffers are downconverted to signed 16-bit PCM
2284
+ // in writeDecodedPcmChunk, so persisted bytes and metadata are 16-bit.
2285
+ android.media.AudioFormat.ENCODING_PCM_FLOAT -> 16
2286
+ else -> 16
2287
+ }
2288
+ }
2289
+
2290
+ private fun writeDecodedPcmChunk(
2291
+ outputBuffer: ByteBuffer,
2292
+ size: Int,
2293
+ encoding: Int,
2294
+ output: ByteArrayOutputStream
2295
+ ) {
2296
+ if (encoding == android.media.AudioFormat.ENCODING_PCM_FLOAT) {
2297
+ val floatBuffer = outputBuffer.order(ByteOrder.nativeOrder()).asFloatBuffer()
2298
+ val pcm16 = ByteBuffer.allocate(floatBuffer.remaining() * 2).order(ByteOrder.LITTLE_ENDIAN)
2299
+ while (floatBuffer.hasRemaining()) {
2300
+ val sample = floatBuffer.get().coerceIn(-1.0f, 1.0f)
2301
+ pcm16.putShort((sample * 32767.0f).roundToInt().toShort())
2302
+ }
2303
+ output.write(pcm16.array())
2304
+ return
2305
+ }
2306
+
2307
+ val chunk = ByteArray(size)
2308
+ outputBuffer.get(chunk)
2309
+ output.write(chunk)
2310
+ }
2311
+
2312
+ private fun enforceDecodedRangeGuard(
2313
+ decodedBytes: Int,
2314
+ requestedDurationMs: Long,
2315
+ sampleRate: Int,
2316
+ channels: Int,
2317
+ bitDepth: Int
2318
+ ) {
2319
+ val bytesPerFrame = channels * (bitDepth / 8)
2320
+ val expectedBytes = ((requestedDurationMs + 5_000L) * sampleRate * bytesPerFrame) / 1000L
2321
+ val maxBytes = maxOf(1_048_576L, expectedBytes * 4L)
2322
+ check(decodedBytes <= maxBytes) {
2323
+ "Decoded audio range exceeded safety guard: $decodedBytes bytes > $maxBytes bytes"
2324
+ }
2325
+ }
2326
+
2327
+ private fun Double.floorToInt(): Int = floor(this).toInt()
2328
+ private fun Double.ceilToInt(): Int = ceil(this).toInt()
2329
+
2237
2330
  }