@siteed/expo-audio-stream 1.12.2 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -2
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +866 -70
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +4 -0
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +30 -9
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +163 -24
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +62 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +10 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +158 -0
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/index.d.ts +3 -2
- package/build/index.d.ts.map +1 -1
- package/build/index.js +2 -2
- package/build/index.js.map +1 -1
- package/build/useAudioRecorder.d.ts.map +1 -1
- package/build/useAudioRecorder.js +35 -16
- package/build/useAudioRecorder.js.map +1 -1
- package/ios/AudioProcessor.swift +391 -1
- package/ios/ExpoAudioStreamModule.swift +100 -0
- package/ios/Features.swift +30 -0
- package/package.json +1 -1
- package/plugin/build/index.d.ts +0 -1
- package/plugin/build/index.js +0 -5
- package/plugin/src/index.ts +0 -6
- package/src/AudioAnalysis/AudioAnalysis.types.ts +66 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +219 -0
- package/src/index.ts +12 -1
- package/src/useAudioRecorder.tsx +37 -16
|
@@ -9,6 +9,19 @@ import java.io.File
|
|
|
9
9
|
import java.io.IOException
|
|
10
10
|
import java.util.concurrent.atomic.AtomicLong
|
|
11
11
|
import kotlin.system.measureTimeMillis
|
|
12
|
+
import android.media.MediaExtractor
|
|
13
|
+
import android.media.MediaFormat
|
|
14
|
+
import android.media.MediaCodec
|
|
15
|
+
import java.io.FileInputStream
|
|
16
|
+
import java.nio.channels.FileChannel
|
|
17
|
+
import java.io.RandomAccessFile
|
|
18
|
+
|
|
19
|
+
data class DecodingConfig(
|
|
20
|
+
val targetSampleRate: Int? = null, // Optional target sample rate
|
|
21
|
+
val targetChannels: Int? = null, // Optional target number of channels
|
|
22
|
+
val targetBitDepth: Int = 16, // Default to 16-bit PCM
|
|
23
|
+
val normalizeAudio: Boolean = false // Whether to normalize audio levels
|
|
24
|
+
)
|
|
12
25
|
|
|
13
26
|
class AudioProcessor(private val filesDir: File) {
|
|
14
27
|
companion object {
|
|
@@ -27,103 +40,142 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
27
40
|
}
|
|
28
41
|
}
|
|
29
42
|
|
|
30
|
-
data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int)
|
|
43
|
+
data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int, val durationMs: Long = 0)
|
|
31
44
|
|
|
32
45
|
private var cumulativeMinAmplitude = Float.MAX_VALUE
|
|
33
46
|
private var cumulativeMaxAmplitude = Float.NEGATIVE_INFINITY
|
|
34
47
|
|
|
35
|
-
|
|
36
|
-
fun loadAudioFile(originalFileUri: String, skipWavHeader: Boolean = false): AudioData? {
|
|
37
|
-
// Remove the file:// prefix if present
|
|
38
|
-
val fileUri = originalFileUri.removePrefix("file://")
|
|
39
|
-
var file = File(fileUri)
|
|
40
|
-
|
|
41
|
-
// Check if the file exists at the provided fileUri
|
|
42
|
-
if (!file.exists()) {
|
|
43
|
-
// Fallback to filesDir if the file does not exist at fileUri
|
|
44
|
-
file = File(filesDir, file.name)
|
|
45
|
-
if (!file.exists()) {
|
|
46
|
-
Log.e("AudioProcessor", "File does not exist at provided path or in filesDir: $fileUri")
|
|
47
|
-
return null
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// Check if the file has a valid extension
|
|
52
|
-
val validExtensions = listOf("wav", "pcm")
|
|
53
|
-
val fileExtension = file.extension.lowercase()
|
|
54
|
-
if (fileExtension !in validExtensions) {
|
|
55
|
-
Log.e("AudioProcessor", "Invalid file extension: $fileExtension. Supported extensions are: $validExtensions")
|
|
56
|
-
return null
|
|
57
|
-
}
|
|
58
|
-
|
|
48
|
+
fun loadAudioFile(filePath: String, debug: Boolean = false): AudioData? {
|
|
59
49
|
try {
|
|
60
|
-
val
|
|
50
|
+
val fileUri = filePath.removePrefix("file://")
|
|
51
|
+
Log.d("AudioProcessor", "Processing WAV file: $fileUri")
|
|
61
52
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
53
|
+
val file = File(fileUri).takeIf { it.exists() } ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
|
|
54
|
+
?: run {
|
|
55
|
+
Log.e("AudioProcessor", "File not found: $fileUri")
|
|
56
|
+
return null
|
|
57
|
+
}
|
|
66
58
|
|
|
67
|
-
|
|
68
|
-
val
|
|
69
|
-
|
|
59
|
+
val raf = RandomAccessFile(file, "r")
|
|
60
|
+
val fileSize = raf.length()
|
|
61
|
+
|
|
62
|
+
// Read RIFF header
|
|
63
|
+
val riffHeader = ByteArray(4).apply { raf.readFully(this) }
|
|
64
|
+
if (String(riffHeader) != "RIFF") {
|
|
70
65
|
Log.e("AudioProcessor", "Invalid RIFF header")
|
|
71
66
|
return null
|
|
72
67
|
}
|
|
73
68
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
69
|
+
// Read file size (4 bytes little-endian)
|
|
70
|
+
val fileSizeBytes = ByteArray(4).apply { raf.readFully(this) }
|
|
71
|
+
val expectedFileSize = ByteBuffer.wrap(fileSizeBytes).order(ByteOrder.LITTLE_ENDIAN).int + 8L
|
|
72
|
+
|
|
73
|
+
// Read WAVE header
|
|
74
|
+
val waveHeader = ByteArray(4).apply { raf.readFully(this) }
|
|
75
|
+
if (String(waveHeader) != "WAVE") {
|
|
76
|
+
Log.e("AudioProcessor", "Invalid WAVE header")
|
|
77
77
|
return null
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
var
|
|
81
|
-
var
|
|
80
|
+
var fmtChunkFound = false
|
|
81
|
+
var dataChunkFound = false
|
|
82
82
|
var sampleRate = 0
|
|
83
83
|
var channels = 0
|
|
84
84
|
var bitDepth = 0
|
|
85
|
+
var dataOffset = 0L
|
|
86
|
+
var dataSize = 0L
|
|
87
|
+
|
|
88
|
+
// Parse chunks
|
|
89
|
+
while (raf.filePointer < fileSize - 8) {
|
|
90
|
+
val chunkId = ByteArray(4).apply { raf.readFully(this) }.toString(Charsets.UTF_8)
|
|
91
|
+
val chunkSizeBytes = ByteArray(4).apply { raf.readFully(this) }
|
|
92
|
+
val chunkSize = ByteBuffer.wrap(chunkSizeBytes).order(ByteOrder.LITTLE_ENDIAN).int.toLong() and 0xFFFFFFFFL
|
|
85
93
|
|
|
86
|
-
|
|
87
|
-
while (offset < fileData.size - 8) {
|
|
88
|
-
val chunkId = String(fileData.sliceArray(offset until offset + 4))
|
|
89
|
-
val chunkSize = ByteBuffer.wrap(fileData.sliceArray(offset + 4 until offset + 8)).order(ByteOrder.LITTLE_ENDIAN).int
|
|
94
|
+
Log.d("AudioProcessor", "Found chunk: $chunkId ($chunkSize bytes)")
|
|
90
95
|
|
|
91
96
|
when (chunkId) {
|
|
92
97
|
"fmt " -> {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
if (chunkSize < 16) {
|
|
99
|
+
Log.e("AudioProcessor", "Invalid fmt chunk size")
|
|
100
|
+
return null
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
val formatData = ByteArray(16)
|
|
104
|
+
raf.readFully(formatData)
|
|
105
|
+
val formatBuffer = ByteBuffer.wrap(formatData).order(ByteOrder.LITTLE_ENDIAN)
|
|
106
|
+
|
|
107
|
+
val audioFormat = formatBuffer.short // Skip audio format
|
|
108
|
+
channels = formatBuffer.short.toInt() and 0xFFFF
|
|
109
|
+
sampleRate = formatBuffer.int
|
|
110
|
+
val byteRate = formatBuffer.int
|
|
111
|
+
val blockAlign = formatBuffer.short
|
|
112
|
+
bitDepth = formatBuffer.short.toInt() and 0xFFFF
|
|
113
|
+
|
|
114
|
+
Log.d("AudioProcessor", "Raw format data: ${formatData.joinToString(", ")}")
|
|
115
|
+
Log.d("AudioProcessor", "Format chunk: audioFormat=$audioFormat, channels=$channels, sampleRate=$sampleRate, bitDepth=$bitDepth, byteRate=$byteRate, blockAlign=$blockAlign")
|
|
116
|
+
|
|
117
|
+
if (bitDepth !in listOf(8, 16, 32)) {
|
|
118
|
+
Log.e("AudioProcessor", "Invalid bit depth: $bitDepth")
|
|
119
|
+
return null
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
val remainingFmtBytes = chunkSize - 16
|
|
123
|
+
if (remainingFmtBytes > 0) {
|
|
124
|
+
raf.skipBytes(remainingFmtBytes.toInt())
|
|
125
|
+
}
|
|
126
|
+
fmtChunkFound = true
|
|
96
127
|
}
|
|
97
128
|
"data" -> {
|
|
129
|
+
dataOffset = raf.filePointer
|
|
98
130
|
dataSize = chunkSize
|
|
99
|
-
|
|
131
|
+
dataChunkFound = true
|
|
100
132
|
break
|
|
101
133
|
}
|
|
134
|
+
else -> {
|
|
135
|
+
// Skip unknown chunks
|
|
136
|
+
val skipBytes = chunkSize
|
|
137
|
+
if (skipBytes > 0) {
|
|
138
|
+
val actualSkip = minOf(skipBytes, fileSize - raf.filePointer)
|
|
139
|
+
raf.seek(raf.filePointer + actualSkip)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
102
142
|
}
|
|
103
|
-
|
|
104
|
-
offset += chunkSize + 8 // Move to the next chunk
|
|
105
143
|
}
|
|
106
144
|
|
|
107
|
-
if (
|
|
108
|
-
Log.e("AudioProcessor", "
|
|
145
|
+
if (!fmtChunkFound || !dataChunkFound) {
|
|
146
|
+
Log.e("AudioProcessor", "Missing essential chunks (fmt=$fmtChunkFound, data=$dataChunkFound)")
|
|
109
147
|
return null
|
|
110
148
|
}
|
|
111
149
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
150
|
+
// Calculate actual data size if it seems wrong
|
|
151
|
+
if (dataSize <= 0 || dataSize > fileSize - dataOffset) {
|
|
152
|
+
dataSize = fileSize - dataOffset
|
|
153
|
+
Log.d("AudioProcessor", "Adjusted data size to: $dataSize")
|
|
116
154
|
}
|
|
117
155
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
156
|
+
Log.d("AudioProcessor", "Reading PCM data: offset=$dataOffset, size=$dataSize")
|
|
157
|
+
|
|
158
|
+
val wavData = ByteArray(dataSize.toInt())
|
|
159
|
+
raf.seek(dataOffset)
|
|
160
|
+
raf.readFully(wavData)
|
|
161
|
+
|
|
162
|
+
// Calculate duration in ms
|
|
163
|
+
// Each sample is bitsPerSample/8 bytes, and we have 'channels' samples per frame
|
|
164
|
+
val bytesPerFrame = channels * (bitDepth / 8)
|
|
165
|
+
val numFrames = wavData.size / bytesPerFrame
|
|
166
|
+
val durationMs = (numFrames * 1000L) / sampleRate
|
|
167
|
+
|
|
168
|
+
Log.d(Constants.TAG, "WAV duration calculation: size=${wavData.size}, bytesPerFrame=$bytesPerFrame, numFrames=$numFrames, sampleRate=$sampleRate, duration=${durationMs}ms")
|
|
169
|
+
|
|
170
|
+
return AudioData(
|
|
171
|
+
data = wavData,
|
|
172
|
+
sampleRate = sampleRate,
|
|
173
|
+
channels = channels,
|
|
174
|
+
bitDepth = bitDepth,
|
|
175
|
+
durationMs = durationMs
|
|
176
|
+
)
|
|
125
177
|
} catch (e: Exception) {
|
|
126
|
-
Log.e(
|
|
178
|
+
Log.e(Constants.TAG, "Failed to load WAV file: ${e.message}")
|
|
127
179
|
return null
|
|
128
180
|
}
|
|
129
181
|
}
|
|
@@ -249,7 +301,6 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
249
301
|
)
|
|
250
302
|
}
|
|
251
303
|
|
|
252
|
-
|
|
253
304
|
fun resetCumulativeAmplitudeRange() {
|
|
254
305
|
cumulativeMinAmplitude = Float.MAX_VALUE
|
|
255
306
|
cumulativeMaxAmplitude = Float.MIN_VALUE
|
|
@@ -280,8 +331,6 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
280
331
|
}
|
|
281
332
|
}
|
|
282
333
|
|
|
283
|
-
|
|
284
|
-
|
|
285
334
|
/**
|
|
286
335
|
* Computes the features of the audio data.
|
|
287
336
|
* @param segmentData The segment data.
|
|
@@ -445,8 +494,6 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
445
494
|
}
|
|
446
495
|
}
|
|
447
496
|
|
|
448
|
-
|
|
449
|
-
|
|
450
497
|
/**
|
|
451
498
|
* Computes the Mel filter bank.
|
|
452
499
|
* @param numFilters The number of Mel filters.
|
|
@@ -482,7 +529,6 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
482
529
|
return melFilters
|
|
483
530
|
}
|
|
484
531
|
|
|
485
|
-
|
|
486
532
|
/**
|
|
487
533
|
* Computes the Discrete Cosine Transform (DCT) of the log energies.
|
|
488
534
|
* @param logEnergies The log energies.
|
|
@@ -504,7 +550,6 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
504
550
|
return dct.toList()
|
|
505
551
|
}
|
|
506
552
|
|
|
507
|
-
|
|
508
553
|
/**
|
|
509
554
|
* Extracts the spectral centroid from the audio data.
|
|
510
555
|
* @param segmentData The segment data.
|
|
@@ -520,7 +565,6 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
520
565
|
return (weightedSum / sum) * (sampleRate / 2) / magnitudeSpectrum.size
|
|
521
566
|
}
|
|
522
567
|
|
|
523
|
-
|
|
524
568
|
/**
|
|
525
569
|
* Extracts the spectral flatness from the audio data.
|
|
526
570
|
* @param segmentData The segment data.
|
|
@@ -687,4 +731,756 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
687
731
|
// Compute the HNR
|
|
688
732
|
return if (autocorrelation[0] != 0f) 10 * log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation)) else 0f
|
|
689
733
|
}
|
|
734
|
+
|
|
735
|
+
fun loadAudioFromAnyFormat(fileUri: String, decodingConfig: DecodingConfig? = null): AudioData? {
|
|
736
|
+
val cleanUri = fileUri.removePrefix("file://")
|
|
737
|
+
val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
|
|
738
|
+
?: run {
|
|
739
|
+
Log.e("AudioProcessor", "File not found in any location: $cleanUri")
|
|
740
|
+
return null
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// First try MediaExtractor
|
|
744
|
+
val extractor = MediaExtractor()
|
|
745
|
+
try {
|
|
746
|
+
Log.d("AudioProcessor", "Attempting MediaExtractor with path: ${file.absolutePath}")
|
|
747
|
+
extractor.setDataSource(file.absolutePath)
|
|
748
|
+
|
|
749
|
+
// Find the first audio track
|
|
750
|
+
val audioTrackIndex = (0 until extractor.trackCount)
|
|
751
|
+
.find { extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true }
|
|
752
|
+
|
|
753
|
+
if (audioTrackIndex != null) {
|
|
754
|
+
val format = extractor.getTrackFormat(audioTrackIndex)
|
|
755
|
+
extractor.selectTrack(audioTrackIndex)
|
|
756
|
+
|
|
757
|
+
// Get original audio properties
|
|
758
|
+
val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
759
|
+
val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
760
|
+
val totalDurationUs = try {
|
|
761
|
+
format.getLong(MediaFormat.KEY_DURATION)
|
|
762
|
+
} catch (e: Exception) {
|
|
763
|
+
(format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
|
|
764
|
+
}
|
|
765
|
+
Log.d("AudioProcessor", "Raw duration from format: ${totalDurationUs}us")
|
|
766
|
+
|
|
767
|
+
val totalDurationMs = totalDurationUs / 1000
|
|
768
|
+
Log.d("AudioProcessor", "Final duration: ${totalDurationMs}ms")
|
|
769
|
+
|
|
770
|
+
// Process using MediaExtractor
|
|
771
|
+
val pcmData = decodeAudioToPCM(extractor, format)
|
|
772
|
+
val processedData = if (decodingConfig != null) {
|
|
773
|
+
processAudio(
|
|
774
|
+
pcmData,
|
|
775
|
+
originalSampleRate,
|
|
776
|
+
decodingConfig.targetSampleRate,
|
|
777
|
+
originalChannels,
|
|
778
|
+
decodingConfig.targetChannels,
|
|
779
|
+
decodingConfig.normalizeAudio
|
|
780
|
+
)
|
|
781
|
+
} else {
|
|
782
|
+
pcmData
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
return AudioData(
|
|
786
|
+
data = processedData,
|
|
787
|
+
sampleRate = decodingConfig?.targetSampleRate ?: originalSampleRate,
|
|
788
|
+
bitDepth = decodingConfig?.targetBitDepth ?: 16,
|
|
789
|
+
channels = decodingConfig?.targetChannels ?: originalChannels,
|
|
790
|
+
durationMs = totalDurationMs // Pass through the duration
|
|
791
|
+
)
|
|
792
|
+
}
|
|
793
|
+
} catch (e: Exception) {
|
|
794
|
+
Log.d("AudioProcessor", "MediaExtractor failed, attempting WAV parser: ${e.message}")
|
|
795
|
+
} finally {
|
|
796
|
+
extractor.release()
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// If MediaExtractor failed and file is WAV, try WAV parser
|
|
800
|
+
if (file.name.lowercase().endsWith(".wav")) {
|
|
801
|
+
Log.d("AudioProcessor", "Falling back to WAV parser")
|
|
802
|
+
return loadAudioFile(file.absolutePath, false)?.let { wavData ->
|
|
803
|
+
if (decodingConfig != null) {
|
|
804
|
+
val processedData = processAudio(
|
|
805
|
+
wavData.data,
|
|
806
|
+
wavData.sampleRate,
|
|
807
|
+
decodingConfig.targetSampleRate,
|
|
808
|
+
wavData.channels,
|
|
809
|
+
decodingConfig.targetChannels,
|
|
810
|
+
decodingConfig.normalizeAudio
|
|
811
|
+
)
|
|
812
|
+
AudioData(
|
|
813
|
+
data = processedData,
|
|
814
|
+
sampleRate = decodingConfig.targetSampleRate ?: wavData.sampleRate,
|
|
815
|
+
bitDepth = decodingConfig.targetBitDepth,
|
|
816
|
+
channels = decodingConfig.targetChannels ?: wavData.channels,
|
|
817
|
+
durationMs = wavData.durationMs // Pass through the duration
|
|
818
|
+
)
|
|
819
|
+
} else {
|
|
820
|
+
wavData
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
Log.e("AudioProcessor", "Failed to process audio file with both MediaExtractor and WAV parser")
|
|
826
|
+
return null
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
private fun decodeAudioToPCM(extractor: MediaExtractor, format: MediaFormat): ByteArray {
|
|
830
|
+
val decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
|
|
831
|
+
decoder.configure(format, null, null, 0)
|
|
832
|
+
decoder.start()
|
|
833
|
+
|
|
834
|
+
val info = MediaCodec.BufferInfo()
|
|
835
|
+
val pcmData = mutableListOf<Byte>()
|
|
836
|
+
|
|
837
|
+
var isEOS = false
|
|
838
|
+
while (!isEOS) {
|
|
839
|
+
val inputBufferId = decoder.dequeueInputBuffer(10000)
|
|
840
|
+
if (inputBufferId >= 0) {
|
|
841
|
+
val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
|
|
842
|
+
val sampleSize = extractor.readSampleData(inputBuffer, 0)
|
|
843
|
+
|
|
844
|
+
if (sampleSize < 0) {
|
|
845
|
+
decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
|
|
846
|
+
isEOS = true
|
|
847
|
+
} else {
|
|
848
|
+
decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
|
|
849
|
+
extractor.advance()
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
val outputBufferId = decoder.dequeueOutputBuffer(info, 10000)
|
|
854
|
+
if (outputBufferId >= 0) {
|
|
855
|
+
val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
|
|
856
|
+
val chunk = ByteArray(info.size)
|
|
857
|
+
outputBuffer.get(chunk)
|
|
858
|
+
pcmData.addAll(chunk.toList())
|
|
859
|
+
decoder.releaseOutputBuffer(outputBufferId, false)
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
decoder.stop()
|
|
864
|
+
decoder.release()
|
|
865
|
+
|
|
866
|
+
return pcmData.toByteArray()
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
private fun resampleAudio(
|
|
870
|
+
pcmData: ByteArray,
|
|
871
|
+
originalSampleRate: Int,
|
|
872
|
+
targetSampleRate: Int,
|
|
873
|
+
originalChannels: Int
|
|
874
|
+
): ByteArray {
|
|
875
|
+
// Convert byte array to short array (16-bit samples)
|
|
876
|
+
val shortArray = ShortArray(pcmData.size / 2)
|
|
877
|
+
ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shortArray)
|
|
878
|
+
|
|
879
|
+
// Convert to mono if needed
|
|
880
|
+
val monoShortArray = if (originalChannels > 1) {
|
|
881
|
+
convertToMono(shortArray, originalChannels)
|
|
882
|
+
} else {
|
|
883
|
+
shortArray
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
// Resample
|
|
887
|
+
val resampleRatio = targetSampleRate.toDouble() / originalSampleRate
|
|
888
|
+
val newLength = (monoShortArray.size * resampleRatio).toInt()
|
|
889
|
+
val resampledArray = ShortArray(newLength)
|
|
890
|
+
|
|
891
|
+
for (i in resampledArray.indices) {
|
|
892
|
+
val originalIndex = (i / resampleRatio).toInt()
|
|
893
|
+
val nextIndex = minOf(originalIndex + 1, monoShortArray.size - 1)
|
|
894
|
+
val fraction = (i / resampleRatio) - originalIndex
|
|
895
|
+
|
|
896
|
+
// Linear interpolation
|
|
897
|
+
val sample = linearInterpolate(
|
|
898
|
+
monoShortArray[originalIndex].toDouble(),
|
|
899
|
+
monoShortArray[nextIndex].toDouble(),
|
|
900
|
+
fraction
|
|
901
|
+
).toInt().toShort()
|
|
902
|
+
|
|
903
|
+
resampledArray[i] = sample
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
// Convert back to byte array
|
|
907
|
+
val resultBuffer = ByteBuffer.allocate(resampledArray.size * 2)
|
|
908
|
+
resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
|
|
909
|
+
resultBuffer.asShortBuffer().put(resampledArray)
|
|
910
|
+
return resultBuffer.array()
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
private fun convertToMono(stereoData: ShortArray, channels: Int): ShortArray {
|
|
914
|
+
val monoLength = stereoData.size / channels
|
|
915
|
+
val monoData = ShortArray(monoLength)
|
|
916
|
+
|
|
917
|
+
for (i in 0 until monoLength) {
|
|
918
|
+
var sum = 0
|
|
919
|
+
for (ch in 0 until channels) {
|
|
920
|
+
sum += stereoData[i * channels + ch]
|
|
921
|
+
}
|
|
922
|
+
monoData[i] = (sum / channels).toShort()
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
return monoData
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
private fun linearInterpolate(a: Double, b: Double, fraction: Double): Double {
|
|
929
|
+
return a + fraction * (b - a)
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
private fun processAudio(
|
|
933
|
+
pcmData: ByteArray,
|
|
934
|
+
originalSampleRate: Int,
|
|
935
|
+
targetSampleRate: Int?,
|
|
936
|
+
originalChannels: Int,
|
|
937
|
+
targetChannels: Int?,
|
|
938
|
+
normalize: Boolean
|
|
939
|
+
): ByteArray {
|
|
940
|
+
var processedData = pcmData
|
|
941
|
+
|
|
942
|
+
// Only resample if target sample rate is explicitly specified and different
|
|
943
|
+
if (targetSampleRate != null && originalSampleRate != targetSampleRate) {
|
|
944
|
+
processedData = resampleAudio(processedData, originalSampleRate, targetSampleRate, originalChannels)
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
// Only convert channels if target channels is explicitly specified and different
|
|
948
|
+
if (targetChannels != null && originalChannels != targetChannels) {
|
|
949
|
+
processedData = convertChannels(processedData, originalChannels, targetChannels)
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
// Only normalize if explicitly requested
|
|
953
|
+
if (normalize) {
|
|
954
|
+
processedData = normalizeAudio(processedData)
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
return processedData
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
private fun normalizeAudio(pcmData: ByteArray): ByteArray {
|
|
961
|
+
val shorts = ShortArray(pcmData.size / 2)
|
|
962
|
+
ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
|
|
963
|
+
|
|
964
|
+
// Find maximum amplitude
|
|
965
|
+
var maxAmplitude = 0
|
|
966
|
+
for (sample in shorts) {
|
|
967
|
+
maxAmplitude = maxOf(maxAmplitude, abs(sample.toInt()))
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
// Normalize if we found a non-zero maximum
|
|
971
|
+
if (maxAmplitude > 0) {
|
|
972
|
+
val normalizationFactor = Short.MAX_VALUE.toFloat() / maxAmplitude
|
|
973
|
+
for (i in shorts.indices) {
|
|
974
|
+
shorts[i] = (shorts[i] * normalizationFactor).toInt().toShort()
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Convert back to bytes
|
|
979
|
+
val resultBuffer = ByteBuffer.allocate(shorts.size * 2)
|
|
980
|
+
resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
|
|
981
|
+
resultBuffer.asShortBuffer().put(shorts)
|
|
982
|
+
return resultBuffer.array()
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
private fun convertChannels(pcmData: ByteArray, originalChannels: Int, targetChannels: Int): ByteArray {
|
|
986
|
+
val result = ByteArray(pcmData.size * targetChannels / originalChannels)
|
|
987
|
+
val inputBuffer = ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
|
|
988
|
+
val outputBuffer = ByteBuffer.wrap(result).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
|
|
989
|
+
|
|
990
|
+
for (i in 0 until result.size) {
|
|
991
|
+
val channelData = ShortArray(targetChannels)
|
|
992
|
+
for (j in 0 until targetChannels) {
|
|
993
|
+
channelData[j] = inputBuffer.get()
|
|
994
|
+
}
|
|
995
|
+
outputBuffer.put(channelData)
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
return result
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
private fun debugWavHeader(file: File) {
|
|
1002
|
+
try {
|
|
1003
|
+
val bytes = ByteArray(44) // Standard WAV header size
|
|
1004
|
+
RandomAccessFile(file, "r").use { raf ->
|
|
1005
|
+
raf.readFully(bytes)
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
Log.d("AudioProcessor", "WAV Header Bytes: ${bytes.joinToString(", ") { String.format("%02X", it) }}")
|
|
1009
|
+
Log.d("AudioProcessor", "ASCII: ${bytes.map { it.toInt().toChar() }.joinToString("")}")
|
|
1010
|
+
|
|
1011
|
+
val buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)
|
|
1012
|
+
Log.d("AudioProcessor", """
|
|
1013
|
+
RIFF header: ${String(bytes, 0, 4)}
|
|
1014
|
+
File size: ${buffer.getInt(4)}
|
|
1015
|
+
WAVE header: ${String(bytes, 8, 4)}
|
|
1016
|
+
fmt header: ${String(bytes, 12, 4)}
|
|
1017
|
+
Chunk size: ${buffer.getInt(16)}
|
|
1018
|
+
Audio format: ${buffer.getShort(20)}
|
|
1019
|
+
Channels: ${buffer.getShort(22)}
|
|
1020
|
+
Sample rate: ${buffer.getInt(24)}
|
|
1021
|
+
Byte rate: ${buffer.getInt(28)}
|
|
1022
|
+
Block align: ${buffer.getShort(32)}
|
|
1023
|
+
Bits per sample: ${buffer.getShort(34)}
|
|
1024
|
+
""".trimIndent())
|
|
1025
|
+
} catch (e: Exception) {
|
|
1026
|
+
Log.e("AudioProcessor", "Failed to debug WAV header: ${e.message}")
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
fun generatePreview(
|
|
1031
|
+
audioData: AudioData,
|
|
1032
|
+
numberOfPoints: Int,
|
|
1033
|
+
startTimeMs: Long? = null,
|
|
1034
|
+
endTimeMs: Long? = null,
|
|
1035
|
+
config: RecordingConfig
|
|
1036
|
+
): AudioAnalysisData {
|
|
1037
|
+
val totalDurationMs = audioData.durationMs
|
|
1038
|
+
|
|
1039
|
+
Log.d(Constants.TAG, "Total audio duration: ${totalDurationMs}ms")
|
|
1040
|
+
|
|
1041
|
+
// Validate time range
|
|
1042
|
+
if (startTimeMs != null) {
|
|
1043
|
+
require(startTimeMs >= 0) { "startTime must be non-negative, got: $startTimeMs" }
|
|
1044
|
+
require(startTimeMs <= totalDurationMs) { "startTime ($startTimeMs) is beyond audio duration ($totalDurationMs)" }
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
if (endTimeMs != null) {
|
|
1048
|
+
require(endTimeMs >= 0) { "endTime must be non-negative, got: $endTimeMs" }
|
|
1049
|
+
if (endTimeMs > totalDurationMs) {
|
|
1050
|
+
Log.w(Constants.TAG, "endTime ($endTimeMs) is beyond audio duration ($totalDurationMs), clamping to duration")
|
|
1051
|
+
}
|
|
1052
|
+
if (startTimeMs != null) {
|
|
1053
|
+
require(startTimeMs < endTimeMs) { "startTime ($startTimeMs) must be less than endTime ($endTimeMs)" }
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
// Calculate effective range
|
|
1058
|
+
val effectiveStartMs = startTimeMs ?: 0L
|
|
1059
|
+
val effectiveEndMs = (endTimeMs ?: totalDurationMs).coerceAtMost(totalDurationMs)
|
|
1060
|
+
val durationMs = effectiveEndMs - effectiveStartMs
|
|
1061
|
+
|
|
1062
|
+
Log.d(Constants.TAG, "Preview range: ${effectiveStartMs}ms to ${effectiveEndMs}ms (${durationMs}ms)")
|
|
1063
|
+
|
|
1064
|
+
// Calculate sample range
|
|
1065
|
+
val startSampleIndex = ((effectiveStartMs * audioData.sampleRate) / 1000).toInt()
|
|
1066
|
+
val endSampleIndex = ((effectiveEndMs * audioData.sampleRate) / 1000).toInt().coerceAtMost(audioData.data.size)
|
|
1067
|
+
val samplesInRange = endSampleIndex - startSampleIndex
|
|
1068
|
+
|
|
1069
|
+
if (samplesInRange <= 0) {
|
|
1070
|
+
throw IllegalArgumentException("Invalid sample range: contains no samples")
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
val samplesPerPoint = (samplesInRange / numberOfPoints).coerceAtLeast(1)
|
|
1074
|
+
val pointsPerSecond = numberOfPoints.toDouble() / (durationMs.toDouble() / 1000.0)
|
|
1075
|
+
|
|
1076
|
+
val dataPoints = mutableListOf<DataPoint>()
|
|
1077
|
+
var minAmplitude = Float.MAX_VALUE
|
|
1078
|
+
var maxAmplitude = Float.MIN_VALUE
|
|
1079
|
+
|
|
1080
|
+
val extractionTimeMs = measureTimeMillis {
|
|
1081
|
+
for (i in 0 until numberOfPoints) {
|
|
1082
|
+
val pointStartSample = startSampleIndex + (i * samplesPerPoint)
|
|
1083
|
+
val pointEndSample = minOf(startSampleIndex + ((i + 1) * samplesPerPoint), endSampleIndex)
|
|
1084
|
+
|
|
1085
|
+
if (pointStartSample >= pointEndSample) break
|
|
1086
|
+
|
|
1087
|
+
try {
|
|
1088
|
+
val segmentBytes = audioData.data.sliceArray(pointStartSample until pointEndSample)
|
|
1089
|
+
|
|
1090
|
+
// Convert PCM bytes to float samples with proper bit depth handling
|
|
1091
|
+
val segmentData = when (audioData.bitDepth) {
|
|
1092
|
+
16 -> convert16BitPcmToFloat(segmentBytes)
|
|
1093
|
+
32 -> convert32BitPcmToFloat(segmentBytes)
|
|
1094
|
+
else -> convert8BitPcmToFloat(segmentBytes)
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
// Calculate time points based on actual sample rate
|
|
1098
|
+
val startTimePoint = ((pointStartSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
|
|
1099
|
+
val endTimePoint = ((pointEndSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
|
|
1100
|
+
|
|
1101
|
+
val amplitude = when (config.algorithm.lowercase()) {
|
|
1102
|
+
"peak" -> segmentData.maxOf { abs(it) }
|
|
1103
|
+
else -> sqrt(segmentData.map { it * it }.average().toFloat())
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
minAmplitude = minOf(minAmplitude, amplitude)
|
|
1107
|
+
maxAmplitude = maxOf(maxAmplitude, amplitude)
|
|
1108
|
+
|
|
1109
|
+
dataPoints.add(DataPoint(
|
|
1110
|
+
id = i.toLong(),
|
|
1111
|
+
amplitude = amplitude,
|
|
1112
|
+
startTime = startTimePoint,
|
|
1113
|
+
endTime = endTimePoint,
|
|
1114
|
+
startPosition = pointStartSample,
|
|
1115
|
+
endPosition = pointEndSample,
|
|
1116
|
+
samples = pointEndSample - pointStartSample
|
|
1117
|
+
))
|
|
1118
|
+
} catch (e: Exception) {
|
|
1119
|
+
Log.e(Constants.TAG, "Error processing segment $i: ${e.message}")
|
|
1120
|
+
throw IllegalStateException("Failed to process audio segment: ${e.message}", e)
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
if (dataPoints.isEmpty()) {
|
|
1126
|
+
throw IllegalStateException("No data points were generated")
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
return AudioAnalysisData(
|
|
1130
|
+
pointsPerSecond = pointsPerSecond,
|
|
1131
|
+
durationMs = durationMs.toInt(),
|
|
1132
|
+
bitDepth = audioData.bitDepth,
|
|
1133
|
+
numberOfChannels = audioData.channels,
|
|
1134
|
+
sampleRate = audioData.sampleRate,
|
|
1135
|
+
samples = samplesInRange,
|
|
1136
|
+
dataPoints = dataPoints,
|
|
1137
|
+
amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
|
|
1138
|
+
speakerChanges = emptyList(),
|
|
1139
|
+
extractionTimeMs = extractionTimeMs.toFloat()
|
|
1140
|
+
)
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
// Add these conversion helpers
|
|
1144
|
+
private fun convert16BitPcmToFloat(bytes: ByteArray): FloatArray {
|
|
1145
|
+
val shorts = ShortArray(bytes.size / 2)
|
|
1146
|
+
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
|
|
1147
|
+
return shorts.map { it.toFloat() / Short.MAX_VALUE }.toFloatArray()
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
private fun convert32BitPcmToFloat(bytes: ByteArray): FloatArray {
|
|
1151
|
+
val ints = IntArray(bytes.size / 4)
|
|
1152
|
+
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(ints)
|
|
1153
|
+
return ints.map { it.toFloat() / Int.MAX_VALUE }.toFloatArray()
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
private fun convert8BitPcmToFloat(bytes: ByteArray): FloatArray {
|
|
1157
|
+
return bytes.map { (it.toInt() - 128).toFloat() / 127f }.toFloatArray()
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
fun loadAudioRange(
|
|
1161
|
+
fileUri: String,
|
|
1162
|
+
startTimeMs: Long? = null,
|
|
1163
|
+
endTimeMs: Long? = null,
|
|
1164
|
+
config: DecodingConfig
|
|
1165
|
+
): AudioData? {
|
|
1166
|
+
try {
|
|
1167
|
+
// Clean up the URI and get a proper File object
|
|
1168
|
+
val cleanUri = fileUri.removePrefix("file://")
|
|
1169
|
+
val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
|
|
1170
|
+
?: run {
|
|
1171
|
+
Log.e(Constants.TAG, "File not found in any location: $cleanUri")
|
|
1172
|
+
return null
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
// Check if it's a WAV file by reading first 4 bytes
|
|
1176
|
+
val isWav = FileInputStream(file).use { fis ->
|
|
1177
|
+
val header = ByteArray(4)
|
|
1178
|
+
fis.read(header)
|
|
1179
|
+
String(header) == "RIFF"
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
return if (isWav) {
|
|
1183
|
+
loadWavRange(file, startTimeMs, endTimeMs, config)
|
|
1184
|
+
} else {
|
|
1185
|
+
loadCompressedAudioRange(file, startTimeMs, endTimeMs, config)
|
|
1186
|
+
}
|
|
1187
|
+
} catch (e: Exception) {
|
|
1188
|
+
Log.e(Constants.TAG, "Failed to load audio range: ${e.message}", e)
|
|
1189
|
+
return null
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
private fun loadWavRange(
|
|
1194
|
+
file: File,
|
|
1195
|
+
startTimeMs: Long?,
|
|
1196
|
+
endTimeMs: Long?,
|
|
1197
|
+
config: DecodingConfig
|
|
1198
|
+
): AudioData? {
|
|
1199
|
+
try {
|
|
1200
|
+
// Read WAV header to get format info
|
|
1201
|
+
val fis = FileInputStream(file)
|
|
1202
|
+
val headerBuffer = ByteArray(44) // WAV header is 44 bytes
|
|
1203
|
+
fis.read(headerBuffer)
|
|
1204
|
+
|
|
1205
|
+
// Parse WAV header
|
|
1206
|
+
val sampleRate = ByteBuffer.wrap(headerBuffer, 24, 4).order(ByteOrder.LITTLE_ENDIAN).int
|
|
1207
|
+
val channels = ByteBuffer.wrap(headerBuffer, 22, 2).order(ByteOrder.LITTLE_ENDIAN).short.toInt()
|
|
1208
|
+
val bitDepth = ByteBuffer.wrap(headerBuffer, 34, 2).order(ByteOrder.LITTLE_ENDIAN).short.toInt()
|
|
1209
|
+
|
|
1210
|
+
// Calculate duration
|
|
1211
|
+
val bytesPerFrame = channels * (bitDepth / 8)
|
|
1212
|
+
val numFrames = (file.length() - 44) / bytesPerFrame // Subtract header size
|
|
1213
|
+
val durationMs = (numFrames * 1000L) / sampleRate
|
|
1214
|
+
|
|
1215
|
+
// Calculate positions
|
|
1216
|
+
val startByte = 44 + ((startTimeMs ?: 0) * sampleRate * bytesPerFrame / 1000)
|
|
1217
|
+
val endByte = 44 + ((endTimeMs ?: (file.length() * 1000 / (sampleRate * bytesPerFrame))) * sampleRate * bytesPerFrame / 1000)
|
|
1218
|
+
val length = (endByte - startByte).toInt()
|
|
1219
|
+
|
|
1220
|
+
Log.d(Constants.TAG, """
|
|
1221
|
+
Loading WAV section:
|
|
1222
|
+
- start: ${startTimeMs}ms (pos: $startByte)
|
|
1223
|
+
- end: ${endTimeMs}ms (pos: $endByte)
|
|
1224
|
+
- length: $length bytes
|
|
1225
|
+
- format: ${sampleRate}Hz, $channels channels, $bitDepth-bit
|
|
1226
|
+
""".trimIndent())
|
|
1227
|
+
|
|
1228
|
+
// Read the requested section
|
|
1229
|
+
val audioData = ByteArray(length)
|
|
1230
|
+
fis.skip(startByte - 44) // Skip to start position (accounting for header we already read)
|
|
1231
|
+
fis.read(audioData)
|
|
1232
|
+
fis.close()
|
|
1233
|
+
|
|
1234
|
+
return AudioData(
|
|
1235
|
+
data = audioData,
|
|
1236
|
+
sampleRate = config.targetSampleRate ?: sampleRate,
|
|
1237
|
+
channels = config.targetChannels ?: channels,
|
|
1238
|
+
bitDepth = config.targetBitDepth ?: bitDepth,
|
|
1239
|
+
durationMs = durationMs // Pass the duration
|
|
1240
|
+
)
|
|
1241
|
+
} catch (e: Exception) {
|
|
1242
|
+
Log.e(Constants.TAG, "Failed to load WAV range: ${e.message}", e)
|
|
1243
|
+
return null
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
private fun loadCompressedAudioRange(
|
|
1248
|
+
file: File,
|
|
1249
|
+
startTimeMs: Long?,
|
|
1250
|
+
endTimeMs: Long?,
|
|
1251
|
+
config: DecodingConfig
|
|
1252
|
+
): AudioData? {
|
|
1253
|
+
val extractor = MediaExtractor()
|
|
1254
|
+
var decoder: MediaCodec? = null
|
|
1255
|
+
|
|
1256
|
+
try {
|
|
1257
|
+
extractor.setDataSource(file.absolutePath)
|
|
1258
|
+
val format = extractor.getTrackFormat(0)
|
|
1259
|
+
extractor.selectTrack(0)
|
|
1260
|
+
|
|
1261
|
+
val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
1262
|
+
val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
1263
|
+
val totalDurationUs = try {
|
|
1264
|
+
format.getLong(MediaFormat.KEY_DURATION)
|
|
1265
|
+
} catch (e: Exception) {
|
|
1266
|
+
(format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
|
|
1267
|
+
}
|
|
1268
|
+
Log.d("AudioProcessor", "Raw duration from format: ${totalDurationUs}us")
|
|
1269
|
+
|
|
1270
|
+
val totalDurationMs = totalDurationUs / 1000
|
|
1271
|
+
Log.d("AudioProcessor", "Final duration: ${totalDurationMs}ms")
|
|
1272
|
+
|
|
1273
|
+
// Calculate valid time range
|
|
1274
|
+
val validStartMs = startTimeMs?.coerceIn(0, totalDurationMs) ?: 0
|
|
1275
|
+
val validEndMs = endTimeMs?.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
|
|
1276
|
+
val effectiveDurationMs = validEndMs - validStartMs
|
|
1277
|
+
|
|
1278
|
+
// Initialize decoder
|
|
1279
|
+
decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
|
|
1280
|
+
decoder.configure(format, null, null, 0)
|
|
1281
|
+
decoder.start()
|
|
1282
|
+
|
|
1283
|
+
// Seek to start position if needed
|
|
1284
|
+
if (validStartMs > 0) {
|
|
1285
|
+
extractor.seekTo(validStartMs * 1000, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
// Calculate buffer sizes
|
|
1289
|
+
val targetSampleRate = config.targetSampleRate ?: originalSampleRate
|
|
1290
|
+
val targetChannels = config.targetChannels ?: originalChannels
|
|
1291
|
+
val targetBitDepth = config.targetBitDepth ?: 16
|
|
1292
|
+
val bytesPerSample = targetBitDepth / 8
|
|
1293
|
+
val samplesPerSecond = targetSampleRate * targetChannels
|
|
1294
|
+
val totalBytes = (effectiveDurationMs * samplesPerSecond * bytesPerSample) / 1000
|
|
1295
|
+
|
|
1296
|
+
Log.d(Constants.TAG, """
|
|
1297
|
+
Loading audio range:
|
|
1298
|
+
- start: ${validStartMs}ms
|
|
1299
|
+
- end: ${validEndMs}ms
|
|
1300
|
+
- duration: ${effectiveDurationMs}ms
|
|
1301
|
+
- bytes: $totalBytes
|
|
1302
|
+
- format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
|
|
1303
|
+
""".trimIndent())
|
|
1304
|
+
|
|
1305
|
+
val outputBuffer = ByteBuffer.allocateDirect(totalBytes.toInt())
|
|
1306
|
+
val bufferInfo = MediaCodec.BufferInfo()
|
|
1307
|
+
var isEOS = false
|
|
1308
|
+
|
|
1309
|
+
while (!isEOS) {
|
|
1310
|
+
// Handle input
|
|
1311
|
+
val inputBufferId = decoder.dequeueInputBuffer(10000)
|
|
1312
|
+
if (inputBufferId >= 0) {
|
|
1313
|
+
val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
|
|
1314
|
+
val sampleSize = extractor.readSampleData(inputBuffer, 0)
|
|
1315
|
+
|
|
1316
|
+
when {
|
|
1317
|
+
sampleSize < 0 -> {
|
|
1318
|
+
decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
|
|
1319
|
+
isEOS = true
|
|
1320
|
+
}
|
|
1321
|
+
extractor.sampleTime > validEndMs * 1000 -> {
|
|
1322
|
+
decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
|
|
1323
|
+
isEOS = true
|
|
1324
|
+
}
|
|
1325
|
+
else -> {
|
|
1326
|
+
decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
|
|
1327
|
+
extractor.advance()
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
// Handle output
|
|
1333
|
+
val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
|
|
1334
|
+
if (outputBufferId >= 0) {
|
|
1335
|
+
val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
|
|
1336
|
+
if (bufferInfo.size > 0) {
|
|
1337
|
+
outputBuffer.limit(bufferInfo.offset + bufferInfo.size)
|
|
1338
|
+
outputBuffer.position(bufferInfo.offset)
|
|
1339
|
+
if (outputBuffer.remaining() <= totalBytes - outputBuffer.position()) {
|
|
1340
|
+
outputBuffer.get(ByteArray(outputBuffer.remaining()))
|
|
1341
|
+
}
|
|
1342
|
+
}
|
|
1343
|
+
decoder.releaseOutputBuffer(outputBufferId, false)
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
outputBuffer.flip()
|
|
1348
|
+
val audioData = ByteArray(outputBuffer.remaining())
|
|
1349
|
+
outputBuffer.get(audioData)
|
|
1350
|
+
|
|
1351
|
+
return AudioData(
|
|
1352
|
+
data = audioData,
|
|
1353
|
+
sampleRate = targetSampleRate,
|
|
1354
|
+
channels = targetChannels,
|
|
1355
|
+
bitDepth = targetBitDepth,
|
|
1356
|
+
durationMs = effectiveDurationMs // Pass the duration
|
|
1357
|
+
).also {
|
|
1358
|
+
Log.d(Constants.TAG, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
|
|
1359
|
+
}
|
|
1360
|
+
} catch (e: Exception) {
|
|
1361
|
+
Log.e(Constants.TAG, "Failed to load compressed audio range: ${e.message}", e)
|
|
1362
|
+
return null
|
|
1363
|
+
} finally {
|
|
1364
|
+
decoder?.stop()
|
|
1365
|
+
decoder?.release()
|
|
1366
|
+
extractor.release()
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
// Future audio editing methods
|
|
1371
|
+
fun trimAudio(
|
|
1372
|
+
fileUri: String,
|
|
1373
|
+
startTimeMs: Long,
|
|
1374
|
+
endTimeMs: Long,
|
|
1375
|
+
config: DecodingConfig? = null,
|
|
1376
|
+
outputFileName: String? = null
|
|
1377
|
+
): AudioData? {
|
|
1378
|
+
try {
|
|
1379
|
+
// Load the specified range
|
|
1380
|
+
val audioData = loadAudioRange(fileUri, startTimeMs, endTimeMs, config ?: DecodingConfig())
|
|
1381
|
+
?: return null
|
|
1382
|
+
|
|
1383
|
+
// Generate output filename if not provided
|
|
1384
|
+
val outputFile = if (outputFileName != null) {
|
|
1385
|
+
File(filesDir, outputFileName)
|
|
1386
|
+
} else {
|
|
1387
|
+
val timestamp = System.currentTimeMillis()
|
|
1388
|
+
File(filesDir, "trimmed_${timestamp}.wav")
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
val durationMs = (endTimeMs - startTimeMs).toInt()
|
|
1392
|
+
|
|
1393
|
+
Log.d(Constants.TAG, """
|
|
1394
|
+
Trimming audio:
|
|
1395
|
+
- start: ${startTimeMs}ms
|
|
1396
|
+
- end: ${endTimeMs}ms
|
|
1397
|
+
- duration: ${durationMs}ms
|
|
1398
|
+
- output: ${outputFile.name}
|
|
1399
|
+
""".trimIndent())
|
|
1400
|
+
|
|
1401
|
+
// Write WAV header
|
|
1402
|
+
RandomAccessFile(outputFile, "rw").use { raf ->
|
|
1403
|
+
// RIFF header
|
|
1404
|
+
raf.write("RIFF".toByteArray())
|
|
1405
|
+
val fileSize = audioData.data.size + 36 // File size minus RIFF header
|
|
1406
|
+
raf.writeInt(fileSize)
|
|
1407
|
+
raf.write("WAVE".toByteArray())
|
|
1408
|
+
|
|
1409
|
+
// fmt chunk
|
|
1410
|
+
raf.write("fmt ".toByteArray())
|
|
1411
|
+
raf.writeInt(16) // Subchunk1Size (16 for PCM)
|
|
1412
|
+
val formatBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
|
|
1413
|
+
formatBytes.putShort(1) // AudioFormat (1 for PCM)
|
|
1414
|
+
raf.write(formatBytes.array())
|
|
1415
|
+
|
|
1416
|
+
val channelsBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
|
|
1417
|
+
channelsBytes.putShort(audioData.channels.toShort())
|
|
1418
|
+
raf.write(channelsBytes.array())
|
|
1419
|
+
|
|
1420
|
+
val sampleRateBytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
|
|
1421
|
+
sampleRateBytes.putInt(audioData.sampleRate)
|
|
1422
|
+
raf.write(sampleRateBytes.array())
|
|
1423
|
+
|
|
1424
|
+
val byteRate = audioData.sampleRate * audioData.channels * (audioData.bitDepth / 8)
|
|
1425
|
+
raf.writeInt(byteRate) // ByteRate
|
|
1426
|
+
|
|
1427
|
+
val blockAlign = audioData.channels * (audioData.bitDepth / 8)
|
|
1428
|
+
raf.writeShort(blockAlign) // BlockAlign
|
|
1429
|
+
raf.writeShort(audioData.bitDepth) // BitsPerSample
|
|
1430
|
+
|
|
1431
|
+
// data chunk
|
|
1432
|
+
raf.write("data".toByteArray())
|
|
1433
|
+
raf.writeInt(audioData.data.size) // Subchunk2Size
|
|
1434
|
+
|
|
1435
|
+
// Write audio data
|
|
1436
|
+
raf.write(audioData.data)
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
// Debug WAV header to verify
|
|
1440
|
+
debugWavHeader(outputFile)
|
|
1441
|
+
|
|
1442
|
+
// Return the trimmed audio data
|
|
1443
|
+
return AudioData(
|
|
1444
|
+
data = audioData.data,
|
|
1445
|
+
sampleRate = audioData.sampleRate,
|
|
1446
|
+
channels = audioData.channels,
|
|
1447
|
+
bitDepth = audioData.bitDepth
|
|
1448
|
+
)
|
|
1449
|
+
} catch (e: Exception) {
|
|
1450
|
+
Log.e(Constants.TAG, "Failed to trim audio: ${e.message}", e)
|
|
1451
|
+
return null
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
fun removeSection(
|
|
1456
|
+
fileUri: String,
|
|
1457
|
+
startTimeMs: Long,
|
|
1458
|
+
endTimeMs: Long,
|
|
1459
|
+
config: DecodingConfig? = null
|
|
1460
|
+
): AudioData? {
|
|
1461
|
+
// TODO: Implement removing a section by concatenating before and after ranges
|
|
1462
|
+
// This will use loadAudioRange to get two sections and join them
|
|
1463
|
+
return null
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
fun joinAudioSections(
|
|
1467
|
+
sections: List<AudioData>,
|
|
1468
|
+
config: DecodingConfig? = null
|
|
1469
|
+
): AudioData? {
|
|
1470
|
+
// TODO: Implement joining multiple audio sections
|
|
1471
|
+
// This will be used by removeSection and other future editing features
|
|
1472
|
+
return null
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
// Helper method for future editing features
|
|
1476
|
+
private fun convertAudioFormat(
|
|
1477
|
+
audioData: AudioData,
|
|
1478
|
+
targetSampleRate: Int? = null,
|
|
1479
|
+
targetChannels: Int? = null,
|
|
1480
|
+
targetBitDepth: Int? = null
|
|
1481
|
+
): AudioData {
|
|
1482
|
+
// TODO: Implement audio format conversion
|
|
1483
|
+
// This will help ensure consistent format when joining sections
|
|
1484
|
+
return audioData
|
|
1485
|
+
}
|
|
690
1486
|
}
|