@siteed/audio-studio 3.0.5 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/README.md +108 -41
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioFinalMetadataContractInstrumentedTest.kt +190 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +29 -83
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +17 -1
- package/android/src/androidTest/java/net/siteed/audiostudio/OpusRangeDecodeRegressionInstrumentedTest.kt +186 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +473 -380
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +74 -22
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +86 -19
- package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +174 -212
- package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +6 -0
- package/android/src/test/java/net/siteed/audiostudio/AndroidCallStateTest.kt +37 -0
- package/android/src/test/java/net/siteed/audiostudio/AndroidEventEmitterTest.kt +28 -0
- package/android/src/test/java/net/siteed/audiostudio/InterruptionAutoResumePolicyTest.kt +49 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractPreview.js +92 -15
- package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractPreviewBars.js +134 -0
- package/build/cjs/AudioAnalysis/extractPreviewBars.js.map +1 -0
- package/build/cjs/AudioStudio.types.js.map +1 -1
- package/build/cjs/errors/AudioExtractionError.js +127 -0
- package/build/cjs/errors/AudioExtractionError.js.map +1 -0
- package/build/cjs/index.js +6 -1
- package/build/cjs/index.js.map +1 -1
- package/build/cjs/useAudioRecorder.js +36 -18
- package/build/cjs/useAudioRecorder.js.map +1 -1
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/esm/AudioAnalysis/extractPreview.js +92 -15
- package/build/esm/AudioAnalysis/extractPreview.js.map +1 -1
- package/build/esm/AudioAnalysis/extractPreviewBars.js +128 -0
- package/build/esm/AudioAnalysis/extractPreviewBars.js.map +1 -0
- package/build/esm/AudioStudio.types.js.map +1 -1
- package/build/esm/errors/AudioExtractionError.js +122 -0
- package/build/esm/errors/AudioExtractionError.js.map +1 -0
- package/build/esm/index.js +2 -0
- package/build/esm/index.js.map +1 -1
- package/build/esm/useAudioRecorder.js +36 -18
- package/build/esm/useAudioRecorder.js.map +1 -1
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +79 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractPreview.d.ts +2 -2
- package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractPreviewBars.d.ts +12 -0
- package/build/types/AudioAnalysis/extractPreviewBars.d.ts.map +1 -0
- package/build/types/AudioStudio.types.d.ts +14 -1
- package/build/types/AudioStudio.types.d.ts.map +1 -1
- package/build/types/errors/AudioExtractionError.d.ts +24 -0
- package/build/types/errors/AudioExtractionError.d.ts.map +1 -0
- package/build/types/index.d.ts +3 -0
- package/build/types/index.d.ts.map +1 -1
- package/build/types/useAudioRecorder.d.ts.map +1 -1
- package/ios/AudioProcessor.swift +99 -0
- package/ios/AudioStreamManager.swift +79 -15
- package/ios/AudioStudioModule.swift +63 -0
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +41 -1
- package/package.json +7 -7
- package/src/AudioAnalysis/AudioAnalysis.types.ts +82 -0
- package/src/AudioAnalysis/extractPreview.ts +118 -17
- package/src/AudioAnalysis/extractPreviewBars.ts +193 -0
- package/src/AudioStudio.types.ts +15 -1
- package/src/errors/AudioExtractionError.ts +167 -0
- package/src/index.ts +10 -0
- package/src/useAudioRecorder.tsx +36 -14
|
@@ -10,6 +10,7 @@ import kotlin.system.measureTimeMillis
|
|
|
10
10
|
import android.media.MediaExtractor
|
|
11
11
|
import android.media.MediaFormat
|
|
12
12
|
import android.media.MediaCodec
|
|
13
|
+
import java.io.ByteArrayOutputStream
|
|
13
14
|
import java.io.FileInputStream
|
|
14
15
|
import java.io.RandomAccessFile
|
|
15
16
|
import java.util.zip.CRC32
|
|
@@ -865,27 +866,15 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
865
866
|
LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
|
|
866
867
|
|
|
867
868
|
// Process using MediaExtractor
|
|
868
|
-
val
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
originalChannels,
|
|
875
|
-
decodingConfig.targetChannels,
|
|
876
|
-
decodingConfig.normalizeAudio
|
|
877
|
-
)
|
|
878
|
-
} else {
|
|
879
|
-
pcmData
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
return AudioData(
|
|
883
|
-
data = processedData,
|
|
884
|
-
sampleRate = decodingConfig?.targetSampleRate ?: originalSampleRate,
|
|
885
|
-
bitDepth = decodingConfig?.targetBitDepth ?: 16,
|
|
886
|
-
channels = decodingConfig?.targetChannels ?: originalChannels,
|
|
887
|
-
durationMs = totalDurationMs // Pass through the duration
|
|
869
|
+
val sourceData = AudioData(
|
|
870
|
+
data = decodeAudioToPCM(extractor, format),
|
|
871
|
+
sampleRate = originalSampleRate,
|
|
872
|
+
bitDepth = 16,
|
|
873
|
+
channels = originalChannels,
|
|
874
|
+
durationMs = totalDurationMs
|
|
888
875
|
)
|
|
876
|
+
|
|
877
|
+
return decodingConfig?.let { processAudioData(sourceData, it) } ?: sourceData
|
|
889
878
|
}
|
|
890
879
|
} catch (e: Exception) {
|
|
891
880
|
LogUtils.d(CLASS_NAME, "MediaExtractor failed, attempting WAV parser: ${e.message}")
|
|
@@ -898,21 +887,7 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
898
887
|
LogUtils.d(CLASS_NAME, "Falling back to WAV parser")
|
|
899
888
|
return loadAudioFile(file.absolutePath)?.let { wavData ->
|
|
900
889
|
if (decodingConfig != null) {
|
|
901
|
-
|
|
902
|
-
wavData.data,
|
|
903
|
-
wavData.sampleRate,
|
|
904
|
-
decodingConfig.targetSampleRate,
|
|
905
|
-
wavData.channels,
|
|
906
|
-
decodingConfig.targetChannels,
|
|
907
|
-
decodingConfig.normalizeAudio
|
|
908
|
-
)
|
|
909
|
-
AudioData(
|
|
910
|
-
data = processedData,
|
|
911
|
-
sampleRate = decodingConfig.targetSampleRate ?: wavData.sampleRate,
|
|
912
|
-
bitDepth = decodingConfig.targetBitDepth,
|
|
913
|
-
channels = decodingConfig.targetChannels ?: wavData.channels,
|
|
914
|
-
durationMs = wavData.durationMs // Pass through the duration
|
|
915
|
-
)
|
|
890
|
+
processAudioData(wavData, decodingConfig)
|
|
916
891
|
} else {
|
|
917
892
|
wavData
|
|
918
893
|
}
|
|
@@ -975,126 +950,145 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
975
950
|
}
|
|
976
951
|
}
|
|
977
952
|
|
|
978
|
-
|
|
953
|
+
fun processAudio(
|
|
979
954
|
pcmData: ByteArray,
|
|
980
955
|
originalSampleRate: Int,
|
|
981
|
-
targetSampleRate: Int
|
|
982
|
-
originalChannels: Int
|
|
956
|
+
targetSampleRate: Int?,
|
|
957
|
+
originalChannels: Int,
|
|
958
|
+
targetChannels: Int?,
|
|
959
|
+
normalize: Boolean
|
|
983
960
|
): ByteArray {
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
961
|
+
return processAudioData(
|
|
962
|
+
AudioData(
|
|
963
|
+
data = pcmData,
|
|
964
|
+
sampleRate = originalSampleRate,
|
|
965
|
+
bitDepth = 16,
|
|
966
|
+
channels = originalChannels
|
|
967
|
+
),
|
|
968
|
+
DecodingConfig(
|
|
969
|
+
targetSampleRate = targetSampleRate,
|
|
970
|
+
targetChannels = targetChannels,
|
|
971
|
+
targetBitDepth = 16,
|
|
972
|
+
normalizeAudio = normalize
|
|
973
|
+
)
|
|
974
|
+
).data
|
|
975
|
+
}
|
|
994
976
|
|
|
995
|
-
|
|
996
|
-
val
|
|
997
|
-
val
|
|
998
|
-
val
|
|
977
|
+
fun processAudioData(audioData: AudioData, config: DecodingConfig): AudioData {
|
|
978
|
+
val targetSampleRate = config.targetSampleRate ?: audioData.sampleRate
|
|
979
|
+
val targetChannels = config.targetChannels ?: audioData.channels
|
|
980
|
+
val targetBitDepth = config.targetBitDepth
|
|
981
|
+
|
|
982
|
+
require(targetSampleRate > 0) { "targetSampleRate must be positive" }
|
|
983
|
+
require(targetChannels in 1..2) { "targetChannels must be 1 or 2, got: $targetChannels" }
|
|
984
|
+
require(targetBitDepth in listOf(8, 16, 32)) { "Unsupported target bit depth: $targetBitDepth" }
|
|
985
|
+
require(audioData.channels > 0) { "source channels must be positive" }
|
|
986
|
+
require(audioData.sampleRate > 0) { "source sampleRate must be positive" }
|
|
987
|
+
require(audioData.bitDepth in listOf(8, 16, 32)) { "Unsupported source bit depth: ${audioData.bitDepth}" }
|
|
988
|
+
|
|
989
|
+
val sourceSamples = decodePcmToFloatSamples(audioData.data, audioData.channels, audioData.bitDepth)
|
|
990
|
+
val channelConverted = convertFloatChannels(sourceSamples, audioData.channels, targetChannels)
|
|
991
|
+
val resampled = resampleFloatFrames(channelConverted, targetChannels, audioData.sampleRate, targetSampleRate)
|
|
992
|
+
val normalized = if (config.normalizeAudio) normalizeFloatSamples(resampled) else resampled
|
|
993
|
+
val processedData = encodeFloatSamplesToPcm(normalized, targetBitDepth)
|
|
994
|
+
|
|
995
|
+
return AudioData(
|
|
996
|
+
data = processedData,
|
|
997
|
+
sampleRate = targetSampleRate,
|
|
998
|
+
bitDepth = targetBitDepth,
|
|
999
|
+
channels = targetChannels,
|
|
1000
|
+
durationMs = computePcmDurationMs(processedData, targetSampleRate, targetChannels, targetBitDepth)
|
|
1001
|
+
)
|
|
1002
|
+
}
|
|
999
1003
|
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
+
private fun computePcmDurationMs(data: ByteArray, sampleRate: Int, channels: Int, bitDepth: Int): Long {
|
|
1005
|
+
val bytesPerFrame = (channels * (bitDepth / 8)).coerceAtLeast(1)
|
|
1006
|
+
val frames = data.size / bytesPerFrame
|
|
1007
|
+
return (frames * 1000L) / sampleRate.coerceAtLeast(1)
|
|
1008
|
+
}
|
|
1004
1009
|
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1010
|
+
private fun decodePcmToFloatSamples(data: ByteArray, channels: Int, bitDepth: Int): FloatArray {
|
|
1011
|
+
if (data.isEmpty()) return FloatArray(0)
|
|
1012
|
+
val bytesPerSample = bitDepth / 8
|
|
1013
|
+
val totalSamples = data.size / bytesPerSample
|
|
1014
|
+
val alignedSamples = totalSamples - (totalSamples % channels)
|
|
1015
|
+
val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN)
|
|
1016
|
+
val samples = FloatArray(alignedSamples)
|
|
1011
1017
|
|
|
1012
|
-
|
|
1018
|
+
for (i in 0 until alignedSamples) {
|
|
1019
|
+
samples[i] = when (bitDepth) {
|
|
1020
|
+
8 -> ((buffer.get().toInt() and 0xFF) - 128) / 128.0f
|
|
1021
|
+
16 -> buffer.short / 32768.0f
|
|
1022
|
+
32 -> buffer.int / 2147483648.0f
|
|
1023
|
+
else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
|
|
1024
|
+
}.coerceIn(-1.0f, 1.0f)
|
|
1013
1025
|
}
|
|
1014
1026
|
|
|
1015
|
-
|
|
1016
|
-
val resultBuffer = ByteBuffer.allocate(resampledArray.size * 2)
|
|
1017
|
-
resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
|
|
1018
|
-
resultBuffer.asShortBuffer().put(resampledArray)
|
|
1019
|
-
return resultBuffer.array()
|
|
1027
|
+
return samples
|
|
1020
1028
|
}
|
|
1021
1029
|
|
|
1022
|
-
private fun
|
|
1023
|
-
|
|
1024
|
-
val
|
|
1030
|
+
private fun convertFloatChannels(samples: FloatArray, fromChannels: Int, toChannels: Int): FloatArray {
|
|
1031
|
+
if (fromChannels == toChannels || samples.isEmpty()) return samples
|
|
1032
|
+
val frames = samples.size / fromChannels
|
|
1025
1033
|
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1034
|
+
return when {
|
|
1035
|
+
fromChannels == 1 && toChannels == 2 -> FloatArray(frames * 2) { index -> samples[index / 2] }
|
|
1036
|
+
fromChannels == 2 && toChannels == 1 -> FloatArray(frames) { frame ->
|
|
1037
|
+
((samples[frame * 2] + samples[frame * 2 + 1]) / 2.0f).coerceIn(-1.0f, 1.0f)
|
|
1030
1038
|
}
|
|
1031
|
-
|
|
1039
|
+
else -> throw IllegalArgumentException("Unsupported channel conversion: $fromChannels to $toChannels")
|
|
1032
1040
|
}
|
|
1033
|
-
|
|
1034
|
-
return monoData
|
|
1035
1041
|
}
|
|
1036
1042
|
|
|
1037
|
-
private fun
|
|
1038
|
-
|
|
1039
|
-
}
|
|
1043
|
+
private fun resampleFloatFrames(samples: FloatArray, channels: Int, fromSampleRate: Int, toSampleRate: Int): FloatArray {
|
|
1044
|
+
if (fromSampleRate == toSampleRate || samples.isEmpty()) return samples
|
|
1040
1045
|
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
originalSampleRate: Int,
|
|
1044
|
-
targetSampleRate: Int?,
|
|
1045
|
-
originalChannels: Int,
|
|
1046
|
-
targetChannels: Int?,
|
|
1047
|
-
normalize: Boolean
|
|
1048
|
-
): ByteArray {
|
|
1049
|
-
var processedData = pcmData
|
|
1046
|
+
val sourceFrames = samples.size / channels
|
|
1047
|
+
if (sourceFrames == 0) return FloatArray(0)
|
|
1050
1048
|
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1049
|
+
val ratio = toSampleRate.toDouble() / fromSampleRate
|
|
1050
|
+
// roundToInt() preserves duration best for fractional sample-rate ratios;
|
|
1051
|
+
// callers derive metadata from the actual output frame count below.
|
|
1052
|
+
val targetFrames = maxOf(1, (sourceFrames * ratio).roundToInt())
|
|
1053
|
+
val output = FloatArray(targetFrames * channels)
|
|
1055
1054
|
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1055
|
+
for (frame in 0 until targetFrames) {
|
|
1056
|
+
val sourcePosition = frame / ratio
|
|
1057
|
+
val sourceFrame = floor(sourcePosition).toInt().coerceIn(0, sourceFrames - 1)
|
|
1058
|
+
val nextFrame = minOf(sourceFrame + 1, sourceFrames - 1)
|
|
1059
|
+
val fraction = (sourcePosition - sourceFrame).toFloat()
|
|
1060
1060
|
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1061
|
+
for (channel in 0 until channels) {
|
|
1062
|
+
val a = samples[sourceFrame * channels + channel]
|
|
1063
|
+
val b = samples[nextFrame * channels + channel]
|
|
1064
|
+
output[frame * channels + channel] = (a + ((b - a) * fraction)).coerceIn(-1.0f, 1.0f)
|
|
1065
|
+
}
|
|
1064
1066
|
}
|
|
1065
1067
|
|
|
1066
|
-
return
|
|
1068
|
+
return output
|
|
1067
1069
|
}
|
|
1068
1070
|
|
|
1069
|
-
private fun
|
|
1070
|
-
val
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
var maxAmplitude = 0
|
|
1075
|
-
for (sample in shorts) {
|
|
1076
|
-
maxAmplitude = maxOf(maxAmplitude, abs(sample.toInt()))
|
|
1077
|
-
}
|
|
1071
|
+
private fun normalizeFloatSamples(samples: FloatArray): FloatArray {
|
|
1072
|
+
val maxAmplitude = samples.maxOfOrNull { abs(it) } ?: 0.0f
|
|
1073
|
+
if (maxAmplitude <= 0.0f) return samples
|
|
1074
|
+
return FloatArray(samples.size) { index -> (samples[index] / maxAmplitude).coerceIn(-1.0f, 1.0f) }
|
|
1075
|
+
}
|
|
1078
1076
|
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1077
|
+
private fun encodeFloatSamplesToPcm(samples: FloatArray, bitDepth: Int): ByteArray {
|
|
1078
|
+
val bytesPerSample = bitDepth / 8
|
|
1079
|
+
val output = ByteBuffer.allocate(samples.size * bytesPerSample).order(ByteOrder.LITTLE_ENDIAN)
|
|
1080
|
+
|
|
1081
|
+
for (sample in samples) {
|
|
1082
|
+
val clamped = sample.coerceIn(-1.0f, 1.0f)
|
|
1083
|
+
when (bitDepth) {
|
|
1084
|
+
8 -> output.put(((clamped * 127.0f) + 128.0f).roundToInt().coerceIn(0, 255).toByte())
|
|
1085
|
+
16 -> output.putShort((clamped * 32767.0f).roundToInt().coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort())
|
|
1086
|
+
32 -> output.putInt((clamped * Int.MAX_VALUE.toFloat()).roundToInt())
|
|
1087
|
+
else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
|
|
1084
1088
|
}
|
|
1085
1089
|
}
|
|
1086
1090
|
|
|
1087
|
-
|
|
1088
|
-
val resultBuffer = ByteBuffer.allocate(shorts.size * 2)
|
|
1089
|
-
resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
|
|
1090
|
-
resultBuffer.asShortBuffer().put(shorts)
|
|
1091
|
-
return resultBuffer.array()
|
|
1092
|
-
}
|
|
1093
|
-
|
|
1094
|
-
private fun convertChannels(pcmData: ByteArray, originalChannels: Int, targetChannels: Int): ByteArray {
|
|
1095
|
-
// Use the correct implementation from AudioFormatUtils
|
|
1096
|
-
// Assuming 16-bit audio (which is the default for most audio processing)
|
|
1097
|
-
return AudioFormatUtils.convertChannels(pcmData, originalChannels, targetChannels, 16)
|
|
1091
|
+
return output.array()
|
|
1098
1092
|
}
|
|
1099
1093
|
|
|
1100
1094
|
private fun debugWavHeader(file: File) {
|
|
@@ -1126,6 +1120,126 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
1126
1120
|
}
|
|
1127
1121
|
}
|
|
1128
1122
|
|
|
1123
|
+
private data class PreviewBarFrameRange(
|
|
1124
|
+
val durationMs: Long,
|
|
1125
|
+
val startFrameIndex: Int,
|
|
1126
|
+
val endFrameIndex: Int,
|
|
1127
|
+
val framesInRange: Int,
|
|
1128
|
+
val bytesPerFrame: Int
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
private fun validatePreviewBarRange(startTimeMs: Long?, endTimeMs: Long?, totalDurationMs: Long) {
|
|
1132
|
+
startTimeMs?.let { start ->
|
|
1133
|
+
require(start >= 0) { "startTimeMs must be non-negative, got: $start" }
|
|
1134
|
+
require(start <= totalDurationMs) { "startTimeMs ($start) is beyond audio duration ($totalDurationMs)" }
|
|
1135
|
+
}
|
|
1136
|
+
endTimeMs?.let { end ->
|
|
1137
|
+
require(end >= 0) { "endTimeMs must be non-negative, got: $end" }
|
|
1138
|
+
startTimeMs?.let { start ->
|
|
1139
|
+
require(start < end) { "startTimeMs ($start) must be less than endTimeMs ($end)" }
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
private fun computePreviewBarFrameRange(
|
|
1145
|
+
audioData: AudioData,
|
|
1146
|
+
startTimeMs: Long?,
|
|
1147
|
+
endTimeMs: Long?
|
|
1148
|
+
): PreviewBarFrameRange {
|
|
1149
|
+
val effectiveStartMs = startTimeMs ?: 0L
|
|
1150
|
+
val effectiveEndMs = (endTimeMs ?: audioData.durationMs).coerceAtMost(audioData.durationMs)
|
|
1151
|
+
val durationMs = (effectiveEndMs - effectiveStartMs).coerceAtLeast(1L)
|
|
1152
|
+
val bytesPerSample = (audioData.bitDepth / 8).coerceAtLeast(1)
|
|
1153
|
+
val bytesPerFrame = (bytesPerSample * audioData.channels).coerceAtLeast(1)
|
|
1154
|
+
val totalFrames = audioData.data.size / bytesPerFrame
|
|
1155
|
+
val startFrameIndex = ((effectiveStartMs * audioData.sampleRate) / 1000)
|
|
1156
|
+
.toInt()
|
|
1157
|
+
.coerceIn(0, totalFrames)
|
|
1158
|
+
val endFrameIndex = ((effectiveEndMs * audioData.sampleRate) / 1000)
|
|
1159
|
+
.toInt()
|
|
1160
|
+
.coerceIn(startFrameIndex, totalFrames)
|
|
1161
|
+
val framesInRange = endFrameIndex - startFrameIndex
|
|
1162
|
+
require(framesInRange > 0) { "Invalid sample range: contains no samples" }
|
|
1163
|
+
|
|
1164
|
+
return PreviewBarFrameRange(
|
|
1165
|
+
durationMs = durationMs,
|
|
1166
|
+
startFrameIndex = startFrameIndex,
|
|
1167
|
+
endFrameIndex = endFrameIndex,
|
|
1168
|
+
framesInRange = framesInRange,
|
|
1169
|
+
bytesPerFrame = bytesPerFrame
|
|
1170
|
+
)
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
fun generatePreviewBars(
|
|
1174
|
+
audioData: AudioData,
|
|
1175
|
+
numberOfBars: Int,
|
|
1176
|
+
startTimeMs: Long? = null,
|
|
1177
|
+
endTimeMs: Long? = null,
|
|
1178
|
+
silenceRmsThreshold: Float = 0.01f
|
|
1179
|
+
): Map<String, Any> {
|
|
1180
|
+
validatePreviewBarRange(startTimeMs, endTimeMs, audioData.durationMs)
|
|
1181
|
+
val requestedBars = numberOfBars.coerceAtLeast(1)
|
|
1182
|
+
val frameRange = computePreviewBarFrameRange(audioData, startTimeMs, endTimeMs)
|
|
1183
|
+
val framesPerBar = (frameRange.framesInRange / requestedBars).coerceAtLeast(1)
|
|
1184
|
+
val bars = mutableListOf<Map<String, Any>>()
|
|
1185
|
+
var minAmplitude = Float.MAX_VALUE
|
|
1186
|
+
var maxAmplitude = Float.NEGATIVE_INFINITY
|
|
1187
|
+
var minRms = Float.MAX_VALUE
|
|
1188
|
+
var maxRms = Float.NEGATIVE_INFINITY
|
|
1189
|
+
|
|
1190
|
+
val extractionTimeMs = measureTimeMillis {
|
|
1191
|
+
for (i in 0 until requestedBars) {
|
|
1192
|
+
val barStartFrame = frameRange.startFrameIndex + (i * framesPerBar)
|
|
1193
|
+
val barEndFrame = minOf(frameRange.startFrameIndex + ((i + 1) * framesPerBar), frameRange.endFrameIndex)
|
|
1194
|
+
if (barStartFrame >= barEndFrame) break
|
|
1195
|
+
|
|
1196
|
+
val barStartByte = barStartFrame * frameRange.bytesPerFrame
|
|
1197
|
+
val barEndByte = minOf(barEndFrame * frameRange.bytesPerFrame, audioData.data.size)
|
|
1198
|
+
val segmentBytes = audioData.data.sliceArray(barStartByte until barEndByte)
|
|
1199
|
+
val segmentData = when (audioData.bitDepth) {
|
|
1200
|
+
16 -> convert16BitPcmToFloat(segmentBytes)
|
|
1201
|
+
32 -> convert32BitPcmToFloat(segmentBytes)
|
|
1202
|
+
else -> convert8BitPcmToFloat(segmentBytes)
|
|
1203
|
+
}
|
|
1204
|
+
if (segmentData.isEmpty()) continue
|
|
1205
|
+
|
|
1206
|
+
val rms = sqrt(segmentData.map { it * it }.average().toFloat())
|
|
1207
|
+
val amplitude = segmentData.maxOf { abs(it) }
|
|
1208
|
+
minAmplitude = minOf(minAmplitude, amplitude)
|
|
1209
|
+
maxAmplitude = maxOf(maxAmplitude, amplitude)
|
|
1210
|
+
minRms = minOf(minRms, rms)
|
|
1211
|
+
maxRms = maxOf(maxRms, rms)
|
|
1212
|
+
|
|
1213
|
+
val startBarTimeMs = ((barStartFrame - frameRange.startFrameIndex).toDouble() / frameRange.framesInRange.toDouble() * frameRange.durationMs).toLong()
|
|
1214
|
+
val endBarTimeMs = ((barEndFrame - frameRange.startFrameIndex).toDouble() / frameRange.framesInRange.toDouble() * frameRange.durationMs).toLong()
|
|
1215
|
+
bars.add(mapOf(
|
|
1216
|
+
"id" to i,
|
|
1217
|
+
"amplitude" to amplitude.coerceIn(0f, 1f),
|
|
1218
|
+
"rms" to rms.coerceIn(0f, 1f),
|
|
1219
|
+
"silent" to (rms < silenceRmsThreshold),
|
|
1220
|
+
"startTimeMs" to startBarTimeMs,
|
|
1221
|
+
"endTimeMs" to endBarTimeMs.coerceAtLeast(startBarTimeMs)
|
|
1222
|
+
))
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
check(bars.isNotEmpty()) { "No preview bars were generated" }
|
|
1227
|
+
|
|
1228
|
+
return mapOf(
|
|
1229
|
+
"bars" to bars,
|
|
1230
|
+
"durationMs" to frameRange.durationMs.toInt(),
|
|
1231
|
+
"sampleRate" to audioData.sampleRate,
|
|
1232
|
+
"numberOfChannels" to audioData.channels,
|
|
1233
|
+
"bitDepth" to audioData.bitDepth,
|
|
1234
|
+
"samples" to frameRange.framesInRange,
|
|
1235
|
+
"requestedNumberOfBars" to requestedBars,
|
|
1236
|
+
"barDurationMs" to (frameRange.durationMs.toDouble() / bars.size.toDouble()),
|
|
1237
|
+
"amplitudeRange" to mapOf("min" to minAmplitude, "max" to maxAmplitude),
|
|
1238
|
+
"rmsRange" to mapOf("min" to minRms, "max" to maxRms),
|
|
1239
|
+
"extractionTimeMs" to extractionTimeMs.toFloat()
|
|
1240
|
+
)
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1129
1243
|
fun generatePreview(
|
|
1130
1244
|
audioData: AudioData,
|
|
1131
1245
|
numberOfPoints: Int,
|
|
@@ -1310,49 +1424,48 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
1310
1424
|
?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
|
|
1311
1425
|
?: throw IllegalArgumentException("File not found: $fileUri")
|
|
1312
1426
|
|
|
1313
|
-
// Use existing method to get audio format
|
|
1314
1427
|
val format = getAudioFormat(fileUri) ?: throw IllegalArgumentException("Could not determine audio format")
|
|
1315
|
-
|
|
1316
|
-
val
|
|
1317
|
-
val
|
|
1318
|
-
val
|
|
1319
|
-
|
|
1320
|
-
val
|
|
1321
|
-
val endByte = headerSize + endByteOffset
|
|
1428
|
+
val bytesPerFrame = format.channels * (format.bitDepth / 8)
|
|
1429
|
+
val totalFrames = ((file.length() - headerSize).coerceAtLeast(0L) / bytesPerFrame).toInt()
|
|
1430
|
+
val startFrame = ((startTimeMs * format.sampleRate) / 1000).toInt().coerceIn(0, totalFrames)
|
|
1431
|
+
val endFrame = ((endTimeMs * format.sampleRate) / 1000).toInt().coerceIn(startFrame, totalFrames)
|
|
1432
|
+
val startByte = headerSize + (startFrame * bytesPerFrame)
|
|
1433
|
+
val bytesToRead = (endFrame - startFrame) * bytesPerFrame
|
|
1322
1434
|
|
|
1323
1435
|
LogUtils.d(CLASS_NAME, """
|
|
1324
1436
|
Loading WAV range:
|
|
1325
1437
|
- headerSize: $headerSize
|
|
1438
|
+
- startFrame: $startFrame
|
|
1439
|
+
- endFrame: $endFrame
|
|
1326
1440
|
- startByte: $startByte
|
|
1327
|
-
-
|
|
1328
|
-
- bytesPerSecond: $bytesPerSecond
|
|
1441
|
+
- bytesToRead: $bytesToRead
|
|
1329
1442
|
""".trimIndent())
|
|
1330
1443
|
|
|
1331
|
-
|
|
1444
|
+
val audioDataBytes = ByteArray(bytesToRead.coerceAtLeast(0))
|
|
1332
1445
|
FileInputStream(file).use { fis ->
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
audioDataBytes,
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
effectiveBitDepth = config.targetBitDepth
|
|
1346
|
-
LogUtils.d(CLASS_NAME, "Converted bit depth from ${format.bitDepth} to ${config.targetBitDepth}")
|
|
1446
|
+
var skipped = 0L
|
|
1447
|
+
while (skipped < startByte) {
|
|
1448
|
+
val delta = fis.skip(startByte - skipped)
|
|
1449
|
+
if (delta <= 0) break
|
|
1450
|
+
skipped += delta
|
|
1451
|
+
}
|
|
1452
|
+
var offset = 0
|
|
1453
|
+
while (offset < audioDataBytes.size) {
|
|
1454
|
+
val read = fis.read(audioDataBytes, offset, audioDataBytes.size - offset)
|
|
1455
|
+
if (read <= 0) break
|
|
1456
|
+
offset += read
|
|
1457
|
+
}
|
|
1347
1458
|
}
|
|
1348
1459
|
|
|
1349
|
-
|
|
1460
|
+
val sourceData = AudioData(
|
|
1350
1461
|
data = audioDataBytes,
|
|
1351
1462
|
sampleRate = format.sampleRate,
|
|
1352
1463
|
channels = format.channels,
|
|
1353
|
-
bitDepth =
|
|
1354
|
-
durationMs =
|
|
1464
|
+
bitDepth = format.bitDepth,
|
|
1465
|
+
durationMs = computePcmDurationMs(audioDataBytes, format.sampleRate, format.channels, format.bitDepth)
|
|
1355
1466
|
)
|
|
1467
|
+
|
|
1468
|
+
return processAudioData(sourceData, config)
|
|
1356
1469
|
} catch (e: Exception) {
|
|
1357
1470
|
LogUtils.e(CLASS_NAME, "Failed to load WAV range: ${e.message}", e)
|
|
1358
1471
|
return null
|
|
@@ -1365,127 +1478,13 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
1365
1478
|
endTimeMs: Long,
|
|
1366
1479
|
config: DecodingConfig
|
|
1367
1480
|
): AudioData? {
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
extractor.setDataSource(fileUri.removePrefix("file://"))
|
|
1373
|
-
val format = extractor.getTrackFormat(0)
|
|
1374
|
-
extractor.selectTrack(0)
|
|
1375
|
-
|
|
1376
|
-
val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
1377
|
-
val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
1378
|
-
val totalDurationUs = try {
|
|
1379
|
-
format.getLong(MediaFormat.KEY_DURATION)
|
|
1380
|
-
} catch (e: Exception) {
|
|
1381
|
-
(format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
|
|
1382
|
-
}
|
|
1383
|
-
LogUtils.d(CLASS_NAME, "Raw duration from format: ${totalDurationUs}us")
|
|
1384
|
-
|
|
1385
|
-
val totalDurationMs = totalDurationUs / 1000
|
|
1386
|
-
LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
|
|
1387
|
-
|
|
1388
|
-
// Calculate valid time range
|
|
1389
|
-
val validStartMs = startTimeMs.coerceIn(0, totalDurationMs) ?: 0
|
|
1390
|
-
val validEndMs = endTimeMs.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
|
|
1391
|
-
val effectiveDurationMs = validEndMs - validStartMs
|
|
1392
|
-
|
|
1393
|
-
// Initialize decoder
|
|
1394
|
-
decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
|
|
1395
|
-
decoder.configure(format, null, null, 0)
|
|
1396
|
-
decoder.start()
|
|
1397
|
-
|
|
1398
|
-
// Seek to start position if needed
|
|
1399
|
-
if (validStartMs > 0) {
|
|
1400
|
-
extractor.seekTo(validStartMs * 1000, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
|
|
1401
|
-
}
|
|
1402
|
-
|
|
1403
|
-
// Calculate buffer sizes
|
|
1404
|
-
val targetSampleRate = config.targetSampleRate ?: originalSampleRate
|
|
1405
|
-
val targetChannels = config.targetChannels ?: originalChannels
|
|
1406
|
-
val targetBitDepth = config.targetBitDepth ?: 16
|
|
1407
|
-
val bytesPerSample = targetBitDepth / 8
|
|
1408
|
-
val samplesPerSecond = targetSampleRate * targetChannels
|
|
1409
|
-
val totalBytes = (effectiveDurationMs * samplesPerSecond * bytesPerSample) / 1000
|
|
1410
|
-
|
|
1411
|
-
LogUtils.d(CLASS_NAME, """
|
|
1412
|
-
Loading audio range:
|
|
1413
|
-
- start: ${validStartMs}ms
|
|
1414
|
-
- end: ${validEndMs}ms
|
|
1415
|
-
- duration: ${effectiveDurationMs}ms
|
|
1416
|
-
- bytes: $totalBytes
|
|
1417
|
-
- format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
|
|
1418
|
-
""".trimIndent())
|
|
1419
|
-
|
|
1420
|
-
val outputBuffer = ByteBuffer.allocate(totalBytes.toInt())
|
|
1421
|
-
val bufferInfo = MediaCodec.BufferInfo()
|
|
1422
|
-
var isEOS = false
|
|
1423
|
-
|
|
1424
|
-
while (!isEOS) {
|
|
1425
|
-
// Handle input
|
|
1426
|
-
val inputBufferId = decoder.dequeueInputBuffer(10000)
|
|
1427
|
-
if (inputBufferId >= 0) {
|
|
1428
|
-
val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
|
|
1429
|
-
val sampleSize = extractor.readSampleData(inputBuffer, 0)
|
|
1430
|
-
|
|
1431
|
-
when {
|
|
1432
|
-
sampleSize < 0 -> {
|
|
1433
|
-
decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
|
|
1434
|
-
isEOS = true
|
|
1435
|
-
}
|
|
1436
|
-
extractor.sampleTime > validEndMs * 1000 -> {
|
|
1437
|
-
decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
|
|
1438
|
-
isEOS = true
|
|
1439
|
-
}
|
|
1440
|
-
else -> {
|
|
1441
|
-
decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
|
|
1442
|
-
extractor.advance()
|
|
1443
|
-
}
|
|
1444
|
-
}
|
|
1445
|
-
}
|
|
1446
|
-
|
|
1447
|
-
// Handle output
|
|
1448
|
-
val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
|
|
1449
|
-
if (outputBufferId >= 0) {
|
|
1450
|
-
val decodedBuffer = decoder.getOutputBuffer(outputBufferId)!!
|
|
1451
|
-
if (bufferInfo.size > 0) {
|
|
1452
|
-
// Set buffer position and limit based on the decoded data
|
|
1453
|
-
decodedBuffer.position(bufferInfo.offset)
|
|
1454
|
-
decodedBuffer.limit(bufferInfo.offset + bufferInfo.size)
|
|
1455
|
-
|
|
1456
|
-
// Copy decoded data to our output buffer
|
|
1457
|
-
outputBuffer.put(decodedBuffer)
|
|
1458
|
-
}
|
|
1459
|
-
decoder.releaseOutputBuffer(outputBufferId, false)
|
|
1460
|
-
|
|
1461
|
-
// Check if we've reached the end
|
|
1462
|
-
if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
|
|
1463
|
-
isEOS = true
|
|
1464
|
-
}
|
|
1465
|
-
}
|
|
1466
|
-
}
|
|
1467
|
-
|
|
1468
|
-
// Prepare the final byte array
|
|
1469
|
-
outputBuffer.flip()
|
|
1470
|
-
val audioData = ByteArray(outputBuffer.remaining())
|
|
1471
|
-
outputBuffer.get(audioData)
|
|
1472
|
-
|
|
1473
|
-
return AudioData(
|
|
1474
|
-
data = audioData,
|
|
1475
|
-
sampleRate = targetSampleRate,
|
|
1476
|
-
channels = targetChannels,
|
|
1477
|
-
bitDepth = targetBitDepth,
|
|
1478
|
-
durationMs = endTimeMs - startTimeMs // Use the actual time range
|
|
1479
|
-
).also {
|
|
1480
|
-
LogUtils.d(CLASS_NAME, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
|
|
1481
|
-
}
|
|
1481
|
+
return try {
|
|
1482
|
+
val sourceData = decodeAudioRangeToPCM(fileUri, startTimeMs, endTimeMs)
|
|
1483
|
+
?: throw IllegalStateException("Failed to decode compressed audio range")
|
|
1484
|
+
processAudioData(sourceData, config)
|
|
1482
1485
|
} catch (e: Exception) {
|
|
1483
1486
|
LogUtils.e(CLASS_NAME, "Failed to load compressed audio range: ${e.message}", e)
|
|
1484
|
-
|
|
1485
|
-
} finally {
|
|
1486
|
-
decoder?.stop()
|
|
1487
|
-
decoder?.release()
|
|
1488
|
-
extractor.release()
|
|
1487
|
+
null
|
|
1489
1488
|
}
|
|
1490
1489
|
}
|
|
1491
1490
|
|
|
@@ -1520,42 +1519,28 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
1520
1519
|
- output: ${outputFile.name}
|
|
1521
1520
|
""".trimIndent())
|
|
1522
1521
|
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
// fmt chunk
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
val sampleRateBytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
|
|
1543
|
-
sampleRateBytes.putInt(audioData.sampleRate)
|
|
1544
|
-
raf.write(sampleRateBytes.array())
|
|
1545
|
-
|
|
1546
|
-
val byteRate = audioData.sampleRate * audioData.channels * (audioData.bitDepth / 8)
|
|
1547
|
-
raf.writeInt(byteRate) // ByteRate
|
|
1548
|
-
|
|
1549
|
-
val blockAlign = audioData.channels * (audioData.bitDepth / 8)
|
|
1550
|
-
raf.writeShort(blockAlign) // BlockAlign
|
|
1551
|
-
raf.writeShort(audioData.bitDepth) // BitsPerSample
|
|
1522
|
+
val bytesPerSample = audioData.bitDepth / 8
|
|
1523
|
+
val byteRate = audioData.sampleRate * audioData.channels * bytesPerSample
|
|
1524
|
+
val blockAlign = audioData.channels * bytesPerSample
|
|
1525
|
+
val wavHeader = ByteBuffer.allocate(44).order(ByteOrder.LITTLE_ENDIAN).apply {
|
|
1526
|
+
put("RIFF".toByteArray())
|
|
1527
|
+
putInt(audioData.data.size + 36) // File size minus RIFF header
|
|
1528
|
+
put("WAVE".toByteArray())
|
|
1529
|
+
put("fmt ".toByteArray())
|
|
1530
|
+
putInt(16) // PCM fmt chunk size
|
|
1531
|
+
putShort(1) // PCM format
|
|
1532
|
+
putShort(audioData.channels.toShort())
|
|
1533
|
+
putInt(audioData.sampleRate)
|
|
1534
|
+
putInt(byteRate)
|
|
1535
|
+
putShort(blockAlign.toShort())
|
|
1536
|
+
putShort(audioData.bitDepth.toShort())
|
|
1537
|
+
put("data".toByteArray())
|
|
1538
|
+
putInt(audioData.data.size)
|
|
1539
|
+
}
|
|
1552
1540
|
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
// Write audio data
|
|
1558
|
-
raf.write(audioData.data)
|
|
1541
|
+
outputFile.outputStream().use { output ->
|
|
1542
|
+
output.write(wavHeader.array())
|
|
1543
|
+
output.write(audioData.data)
|
|
1559
1544
|
}
|
|
1560
1545
|
|
|
1561
1546
|
// Debug WAV header to verify
|
|
@@ -2020,7 +2005,10 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
2020
2005
|
val extractor = MediaExtractor()
|
|
2021
2006
|
try {
|
|
2022
2007
|
extractor.setDataSource(file.absolutePath)
|
|
2023
|
-
val
|
|
2008
|
+
val trackIndex = (0 until extractor.trackCount).find {
|
|
2009
|
+
extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
|
|
2010
|
+
} ?: return null
|
|
2011
|
+
val format = extractor.getTrackFormat(trackIndex)
|
|
2024
2012
|
return AudioFormat(
|
|
2025
2013
|
sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE),
|
|
2026
2014
|
channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
|
|
@@ -2125,96 +2113,136 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
2125
2113
|
}
|
|
2126
2114
|
|
|
2127
2115
|
/**
|
|
2128
|
-
* Decodes a specific time range of
|
|
2129
|
-
*
|
|
2116
|
+
* Decodes a specific time range of a compressed audio file directly to PCM data.
|
|
2117
|
+
* The returned data is source-rate/channel PCM clipped on frame boundaries; callers
|
|
2118
|
+
* must pass it through processAudioData(...) to apply target format options.
|
|
2130
2119
|
*/
|
|
2131
2120
|
fun decodeAudioRangeToPCM(fileUri: String, startTimeMs: Long, endTimeMs: Long): AudioData? {
|
|
2121
|
+
require(endTimeMs >= startTimeMs) { "endTimeMs must be >= startTimeMs" }
|
|
2122
|
+
|
|
2123
|
+
val cleanUri = fileUri.removePrefix("file://")
|
|
2124
|
+
val file = File(cleanUri).takeIf { it.exists() }
|
|
2125
|
+
?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
|
|
2126
|
+
?: run {
|
|
2127
|
+
LogUtils.e(CLASS_NAME, "File not found: $cleanUri")
|
|
2128
|
+
return null
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2132
2131
|
val extractor = MediaExtractor()
|
|
2133
|
-
var decoder:
|
|
2134
|
-
|
|
2132
|
+
var decoder: MediaCodec? = null
|
|
2133
|
+
|
|
2135
2134
|
try {
|
|
2136
|
-
extractor.setDataSource(
|
|
2137
|
-
val trackIndex = (0 until extractor.trackCount).find {
|
|
2138
|
-
extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
|
|
2135
|
+
extractor.setDataSource(file.absolutePath)
|
|
2136
|
+
val trackIndex = (0 until extractor.trackCount).find {
|
|
2137
|
+
extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
|
|
2139
2138
|
} ?: return null
|
|
2140
|
-
|
|
2139
|
+
|
|
2141
2140
|
extractor.selectTrack(trackIndex)
|
|
2142
|
-
val
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2141
|
+
val inputFormat = extractor.getTrackFormat(trackIndex)
|
|
2142
|
+
var sampleRate = inputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
2143
|
+
var channels = inputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
2144
|
+
var outputEncoding = getPcmEncoding(inputFormat)
|
|
2145
|
+
var bitDepth = getPcmStorageBitDepth(outputEncoding)
|
|
2146
|
+
val mimeType = inputFormat.getString(MediaFormat.KEY_MIME)
|
|
2147
|
+
?: throw IllegalArgumentException("Audio track is missing MIME type")
|
|
2148
|
+
|
|
2149
|
+
decoder = MediaCodec.createDecoderByType(mimeType)
|
|
2150
|
+
decoder.configure(inputFormat, null, null, 0)
|
|
2148
2151
|
decoder.start()
|
|
2149
2152
|
|
|
2150
2153
|
extractor.seekTo(startTimeMs * 1000, MediaExtractor.SEEK_TO_PREVIOUS_SYNC)
|
|
2151
|
-
val pcmData = mutableListOf<Byte>()
|
|
2152
|
-
val bufferInfo = android.media.MediaCodec.BufferInfo()
|
|
2153
|
-
var isEOS = false
|
|
2154
|
-
var firstBufferTimeUs: Long? = null
|
|
2155
2154
|
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2155
|
+
val pcmOutput = ByteArrayOutputStream()
|
|
2156
|
+
val bufferInfo = MediaCodec.BufferInfo()
|
|
2157
|
+
var inputDone = false
|
|
2158
|
+
var outputDone = false
|
|
2159
|
+
var firstBufferTimeUs: Long? = null
|
|
2160
|
+
val requestedDurationMs = (endTimeMs - startTimeMs).coerceAtLeast(1L)
|
|
2161
|
+
|
|
2162
|
+
while (!outputDone) {
|
|
2163
|
+
if (!inputDone) {
|
|
2164
|
+
val inputBufferId = decoder.dequeueInputBuffer(10_000)
|
|
2165
|
+
if (inputBufferId >= 0) {
|
|
2166
|
+
val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
|
|
2167
|
+
inputBuffer.clear()
|
|
2168
|
+
val sampleSize = extractor.readSampleData(inputBuffer, 0)
|
|
2169
|
+
val sampleTime = extractor.sampleTime
|
|
2170
|
+
|
|
2171
|
+
if (sampleSize < 0 || sampleTime < 0 || sampleTime > endTimeMs * 1000) {
|
|
2172
|
+
decoder.queueInputBuffer(
|
|
2173
|
+
inputBufferId,
|
|
2174
|
+
0,
|
|
2175
|
+
0,
|
|
2176
|
+
0,
|
|
2177
|
+
MediaCodec.BUFFER_FLAG_END_OF_STREAM
|
|
2178
|
+
)
|
|
2179
|
+
inputDone = true
|
|
2180
|
+
} else {
|
|
2181
|
+
decoder.queueInputBuffer(inputBufferId, 0, sampleSize, sampleTime, 0)
|
|
2182
|
+
extractor.advance()
|
|
2183
|
+
}
|
|
2167
2184
|
}
|
|
2168
2185
|
}
|
|
2169
2186
|
|
|
2170
|
-
val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo,
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2187
|
+
when (val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10_000)) {
|
|
2188
|
+
MediaCodec.INFO_TRY_AGAIN_LATER -> Unit
|
|
2189
|
+
MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> {
|
|
2190
|
+
// Decoder format changes are expected before the first decoded output.
|
|
2191
|
+
// If a decoder ever changes format mid-stream, metadata follows the
|
|
2192
|
+
// latest format while the byte guard still prevents runaway output.
|
|
2193
|
+
decoder.outputFormat?.let { outputFormat ->
|
|
2194
|
+
sampleRate = outputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
2195
|
+
channels = outputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
2196
|
+
outputEncoding = getPcmEncoding(outputFormat)
|
|
2197
|
+
bitDepth = getPcmStorageBitDepth(outputEncoding)
|
|
2198
|
+
LogUtils.d(CLASS_NAME, "Decoder output format changed: ${sampleRate}Hz, $channels channels, $bitDepth-bit")
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
else -> if (outputBufferId >= 0) {
|
|
2202
|
+
val outputBuffer = decoder.getOutputBuffer(outputBufferId)
|
|
2203
|
+
if (outputBuffer != null && bufferInfo.size > 0) {
|
|
2204
|
+
if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
|
|
2205
|
+
outputBuffer.position(bufferInfo.offset)
|
|
2206
|
+
outputBuffer.limit(bufferInfo.offset + bufferInfo.size)
|
|
2207
|
+
writeDecodedPcmChunk(outputBuffer, bufferInfo.size, outputEncoding, pcmOutput)
|
|
2208
|
+
|
|
2209
|
+
enforceDecodedRangeGuard(
|
|
2210
|
+
decodedBytes = pcmOutput.size(),
|
|
2211
|
+
requestedDurationMs = requestedDurationMs,
|
|
2212
|
+
sampleRate = sampleRate,
|
|
2213
|
+
channels = channels,
|
|
2214
|
+
bitDepth = bitDepth
|
|
2215
|
+
)
|
|
2216
|
+
}
|
|
2180
2217
|
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2218
|
+
outputDone = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0
|
|
2219
|
+
decoder.releaseOutputBuffer(outputBufferId, false)
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2184
2222
|
}
|
|
2185
2223
|
|
|
2186
|
-
|
|
2187
|
-
val
|
|
2188
|
-
val bytesPerFrame =
|
|
2189
|
-
val
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
val
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
val trimmedSamples = ShortArray(endSample - startSample)
|
|
2202
|
-
for (i in trimmedSamples.indices) {
|
|
2203
|
-
trimmedSamples[i] = allSamples.get()
|
|
2204
|
-
}
|
|
2205
|
-
|
|
2206
|
-
// Convert ShortArray to ByteArray
|
|
2207
|
-
val trimmedBytes = ByteArray(trimmedSamples.size * 2)
|
|
2208
|
-
val byteBuffer = java.nio.ByteBuffer.wrap(trimmedBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
|
|
2209
|
-
val shortBuffer = byteBuffer.asShortBuffer()
|
|
2210
|
-
shortBuffer.put(trimmedSamples)
|
|
2224
|
+
val decodedBytes = pcmOutput.toByteArray()
|
|
2225
|
+
val firstTimeUs = firstBufferTimeUs ?: return null
|
|
2226
|
+
val bytesPerFrame = channels * (bitDepth / 8)
|
|
2227
|
+
val totalFrames = decodedBytes.size / bytesPerFrame
|
|
2228
|
+
if (totalFrames <= 0) return null
|
|
2229
|
+
|
|
2230
|
+
val startFrame = (((startTimeMs * 1000L) - firstTimeUs).toDouble() * sampleRate / 1_000_000.0)
|
|
2231
|
+
.floorToInt()
|
|
2232
|
+
.coerceIn(0, totalFrames)
|
|
2233
|
+
val endFrame = (((endTimeMs * 1000L) - firstTimeUs).toDouble() * sampleRate / 1_000_000.0)
|
|
2234
|
+
.ceilToInt()
|
|
2235
|
+
.coerceIn(startFrame, totalFrames)
|
|
2236
|
+
val startByte = startFrame * bytesPerFrame
|
|
2237
|
+
val endByte = endFrame * bytesPerFrame
|
|
2238
|
+
val trimmedBytes = decodedBytes.copyOfRange(startByte, endByte)
|
|
2211
2239
|
|
|
2212
2240
|
return AudioData(
|
|
2213
2241
|
data = trimmedBytes,
|
|
2214
2242
|
sampleRate = sampleRate,
|
|
2215
2243
|
channels = channels,
|
|
2216
|
-
bitDepth =
|
|
2217
|
-
durationMs =
|
|
2244
|
+
bitDepth = bitDepth,
|
|
2245
|
+
durationMs = computePcmDurationMs(trimmedBytes, sampleRate, channels, bitDepth)
|
|
2218
2246
|
)
|
|
2219
2247
|
} catch (e: Exception) {
|
|
2220
2248
|
LogUtils.e(CLASS_NAME, "Failed to decode audio range: ${e.message}", e)
|
|
@@ -2222,11 +2250,14 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
2222
2250
|
} finally {
|
|
2223
2251
|
try {
|
|
2224
2252
|
decoder?.stop()
|
|
2253
|
+
} catch (e: Exception) {
|
|
2254
|
+
LogUtils.w(CLASS_NAME, "Error stopping decoder: ${e.message}")
|
|
2255
|
+
}
|
|
2256
|
+
try {
|
|
2225
2257
|
decoder?.release()
|
|
2226
2258
|
} catch (e: Exception) {
|
|
2227
2259
|
LogUtils.w(CLASS_NAME, "Error releasing decoder: ${e.message}")
|
|
2228
2260
|
}
|
|
2229
|
-
|
|
2230
2261
|
try {
|
|
2231
2262
|
extractor.release()
|
|
2232
2263
|
} catch (e: Exception) {
|
|
@@ -2234,4 +2265,66 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
2234
2265
|
}
|
|
2235
2266
|
}
|
|
2236
2267
|
}
|
|
2268
|
+
|
|
2269
|
+
private fun getPcmEncoding(format: MediaFormat): Int {
|
|
2270
|
+
return if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.N &&
|
|
2271
|
+
format.containsKey(MediaFormat.KEY_PCM_ENCODING)
|
|
2272
|
+
) {
|
|
2273
|
+
format.getInteger(MediaFormat.KEY_PCM_ENCODING)
|
|
2274
|
+
} else {
|
|
2275
|
+
android.media.AudioFormat.ENCODING_PCM_16BIT
|
|
2276
|
+
}
|
|
2277
|
+
}
|
|
2278
|
+
|
|
2279
|
+
private fun getPcmStorageBitDepth(encoding: Int): Int {
|
|
2280
|
+
return when (encoding) {
|
|
2281
|
+
android.media.AudioFormat.ENCODING_PCM_8BIT -> 8
|
|
2282
|
+
android.media.AudioFormat.ENCODING_PCM_16BIT -> 16
|
|
2283
|
+
// PCM_FLOAT decoder buffers are downconverted to signed 16-bit PCM
|
|
2284
|
+
// in writeDecodedPcmChunk, so persisted bytes and metadata are 16-bit.
|
|
2285
|
+
android.media.AudioFormat.ENCODING_PCM_FLOAT -> 16
|
|
2286
|
+
else -> 16
|
|
2287
|
+
}
|
|
2288
|
+
}
|
|
2289
|
+
|
|
2290
|
+
private fun writeDecodedPcmChunk(
|
|
2291
|
+
outputBuffer: ByteBuffer,
|
|
2292
|
+
size: Int,
|
|
2293
|
+
encoding: Int,
|
|
2294
|
+
output: ByteArrayOutputStream
|
|
2295
|
+
) {
|
|
2296
|
+
if (encoding == android.media.AudioFormat.ENCODING_PCM_FLOAT) {
|
|
2297
|
+
val floatBuffer = outputBuffer.order(ByteOrder.nativeOrder()).asFloatBuffer()
|
|
2298
|
+
val pcm16 = ByteBuffer.allocate(floatBuffer.remaining() * 2).order(ByteOrder.LITTLE_ENDIAN)
|
|
2299
|
+
while (floatBuffer.hasRemaining()) {
|
|
2300
|
+
val sample = floatBuffer.get().coerceIn(-1.0f, 1.0f)
|
|
2301
|
+
pcm16.putShort((sample * 32767.0f).roundToInt().toShort())
|
|
2302
|
+
}
|
|
2303
|
+
output.write(pcm16.array())
|
|
2304
|
+
return
|
|
2305
|
+
}
|
|
2306
|
+
|
|
2307
|
+
val chunk = ByteArray(size)
|
|
2308
|
+
outputBuffer.get(chunk)
|
|
2309
|
+
output.write(chunk)
|
|
2310
|
+
}
|
|
2311
|
+
|
|
2312
|
+
private fun enforceDecodedRangeGuard(
|
|
2313
|
+
decodedBytes: Int,
|
|
2314
|
+
requestedDurationMs: Long,
|
|
2315
|
+
sampleRate: Int,
|
|
2316
|
+
channels: Int,
|
|
2317
|
+
bitDepth: Int
|
|
2318
|
+
) {
|
|
2319
|
+
val bytesPerFrame = channels * (bitDepth / 8)
|
|
2320
|
+
val expectedBytes = ((requestedDurationMs + 5_000L) * sampleRate * bytesPerFrame) / 1000L
|
|
2321
|
+
val maxBytes = maxOf(1_048_576L, expectedBytes * 4L)
|
|
2322
|
+
check(decodedBytes <= maxBytes) {
|
|
2323
|
+
"Decoded audio range exceeded safety guard: $decodedBytes bytes > $maxBytes bytes"
|
|
2324
|
+
}
|
|
2325
|
+
}
|
|
2326
|
+
|
|
2327
|
+
private fun Double.floorToInt(): Int = floor(this).toInt()
|
|
2328
|
+
private fun Double.ceilToInt(): Int = ceil(this).toInt()
|
|
2329
|
+
|
|
2237
2330
|
}
|