@siteed/expo-audio-stream 2.0.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/README.md +46 -27
  2. package/build/index.d.ts +11 -12
  3. package/build/index.js +44 -10
  4. package/package.json +49 -110
  5. package/src/index.ts +18 -33
  6. package/CHANGELOG.md +0 -195
  7. package/android/build.gradle +0 -105
  8. package/android/src/main/AndroidManifest.xml +0 -27
  9. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -166
  10. package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
  11. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -131
  12. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -103
  13. package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -435
  14. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -1936
  15. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -1437
  16. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -138
  17. package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -20
  18. package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
  19. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -509
  20. package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
  21. package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
  22. package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -70
  23. package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -59
  24. package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
  25. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -205
  26. package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
  27. package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
  28. package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
  29. package/android/src/main/res/drawable/ic_microphone.xml +0 -13
  30. package/android/src/main/res/drawable/ic_pause.xml +0 -10
  31. package/android/src/main/res/drawable/ic_play.xml +0 -10
  32. package/android/src/main/res/drawable/ic_stop.xml +0 -10
  33. package/android/src/main/res/layout/notification_recording.xml +0 -37
  34. package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +0 -56
  35. package/app.plugin.js +0 -1
  36. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -144
  37. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  38. package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
  39. package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  40. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -78
  41. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  42. package/build/AudioAnalysis/extractAudioAnalysis.js +0 -229
  43. package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  44. package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
  45. package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  46. package/build/AudioAnalysis/extractWaveform.js +0 -11
  47. package/build/AudioAnalysis/extractWaveform.js.map +0 -1
  48. package/build/AudioRecorder.provider.d.ts +0 -11
  49. package/build/AudioRecorder.provider.d.ts.map +0 -1
  50. package/build/AudioRecorder.provider.js +0 -37
  51. package/build/AudioRecorder.provider.js.map +0 -1
  52. package/build/ExpoAudioStream.native.d.ts +0 -3
  53. package/build/ExpoAudioStream.native.d.ts.map +0 -1
  54. package/build/ExpoAudioStream.native.js +0 -6
  55. package/build/ExpoAudioStream.native.js.map +0 -1
  56. package/build/ExpoAudioStream.types.d.ts +0 -206
  57. package/build/ExpoAudioStream.types.d.ts.map +0 -1
  58. package/build/ExpoAudioStream.types.js +0 -2
  59. package/build/ExpoAudioStream.types.js.map +0 -1
  60. package/build/ExpoAudioStream.web.d.ts +0 -59
  61. package/build/ExpoAudioStream.web.d.ts.map +0 -1
  62. package/build/ExpoAudioStream.web.js +0 -285
  63. package/build/ExpoAudioStream.web.js.map +0 -1
  64. package/build/ExpoAudioStreamModule.d.ts +0 -3
  65. package/build/ExpoAudioStreamModule.d.ts.map +0 -1
  66. package/build/ExpoAudioStreamModule.js +0 -239
  67. package/build/ExpoAudioStreamModule.js.map +0 -1
  68. package/build/WebRecorder.web.d.ts +0 -119
  69. package/build/WebRecorder.web.d.ts.map +0 -1
  70. package/build/WebRecorder.web.js +0 -436
  71. package/build/WebRecorder.web.js.map +0 -1
  72. package/build/constants.d.ts +0 -11
  73. package/build/constants.d.ts.map +0 -1
  74. package/build/constants.js +0 -14
  75. package/build/constants.js.map +0 -1
  76. package/build/events.d.ts +0 -26
  77. package/build/events.d.ts.map +0 -1
  78. package/build/events.js +0 -21
  79. package/build/events.js.map +0 -1
  80. package/build/index.d.ts.map +0 -1
  81. package/build/index.js.map +0 -1
  82. package/build/useAudioRecorder.d.ts +0 -21
  83. package/build/useAudioRecorder.d.ts.map +0 -1
  84. package/build/useAudioRecorder.js +0 -427
  85. package/build/useAudioRecorder.js.map +0 -1
  86. package/build/utils/BlobFix.d.ts +0 -9
  87. package/build/utils/BlobFix.d.ts.map +0 -1
  88. package/build/utils/BlobFix.js +0 -498
  89. package/build/utils/BlobFix.js.map +0 -1
  90. package/build/utils/audioProcessing.d.ts +0 -24
  91. package/build/utils/audioProcessing.d.ts.map +0 -1
  92. package/build/utils/audioProcessing.js +0 -133
  93. package/build/utils/audioProcessing.js.map +0 -1
  94. package/build/utils/concatenateBuffers.d.ts +0 -8
  95. package/build/utils/concatenateBuffers.d.ts.map +0 -1
  96. package/build/utils/concatenateBuffers.js +0 -21
  97. package/build/utils/concatenateBuffers.js.map +0 -1
  98. package/build/utils/convertPCMToFloat32.d.ts +0 -13
  99. package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
  100. package/build/utils/convertPCMToFloat32.js +0 -120
  101. package/build/utils/convertPCMToFloat32.js.map +0 -1
  102. package/build/utils/encodingToBitDepth.d.ts +0 -5
  103. package/build/utils/encodingToBitDepth.d.ts.map +0 -1
  104. package/build/utils/encodingToBitDepth.js +0 -13
  105. package/build/utils/encodingToBitDepth.js.map +0 -1
  106. package/build/utils/getWavFileInfo.d.ts +0 -26
  107. package/build/utils/getWavFileInfo.d.ts.map +0 -1
  108. package/build/utils/getWavFileInfo.js +0 -92
  109. package/build/utils/getWavFileInfo.js.map +0 -1
  110. package/build/utils/writeWavHeader.d.ts +0 -49
  111. package/build/utils/writeWavHeader.d.ts.map +0 -1
  112. package/build/utils/writeWavHeader.js +0 -91
  113. package/build/utils/writeWavHeader.js.map +0 -1
  114. package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  115. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  116. package/build/workers/InlineFeaturesExtractor.web.js +0 -828
  117. package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
  118. package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
  119. package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  120. package/build/workers/inlineAudioWebWorker.web.js +0 -157
  121. package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
  122. package/expo-module.config.json +0 -9
  123. package/ios/AudioAnalysisData.swift +0 -74
  124. package/ios/AudioNotificationManager.swift +0 -135
  125. package/ios/AudioProcessingHelpers.swift +0 -743
  126. package/ios/AudioProcessor.swift +0 -858
  127. package/ios/AudioStreamError.swift +0 -7
  128. package/ios/AudioStreamManager.swift +0 -1708
  129. package/ios/AudioStreamManagerDelegate.swift +0 -16
  130. package/ios/DataPoint.swift +0 -54
  131. package/ios/DecodingConfig.swift +0 -47
  132. package/ios/ExpoAudioStream.podspec +0 -27
  133. package/ios/ExpoAudioStreamModule.swift +0 -698
  134. package/ios/FFT.swift +0 -62
  135. package/ios/Features.swift +0 -95
  136. package/ios/Logger.swift +0 -7
  137. package/ios/NotificationExtension.swift +0 -15
  138. package/ios/RecordingResult.swift +0 -22
  139. package/ios/RecordingSettings.swift +0 -265
  140. package/ios/WaveformExtractor.swift +0 -105
  141. package/plugin/build/index.d.ts +0 -21
  142. package/plugin/build/index.js +0 -191
  143. package/plugin/src/index.ts +0 -278
  144. package/plugin/tsconfig.json +0 -10
  145. package/plugin/tsconfig.tsbuildinfo +0 -1
  146. package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -165
  147. package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -370
  148. package/src/AudioAnalysis/extractWaveform.ts +0 -22
  149. package/src/AudioRecorder.provider.tsx +0 -54
  150. package/src/ExpoAudioStream.native.ts +0 -6
  151. package/src/ExpoAudioStream.types.ts +0 -329
  152. package/src/ExpoAudioStream.web.ts +0 -359
  153. package/src/ExpoAudioStreamModule.ts +0 -286
  154. package/src/WebRecorder.web.ts +0 -580
  155. package/src/constants.ts +0 -18
  156. package/src/events.ts +0 -60
  157. package/src/useAudioRecorder.tsx +0 -620
  158. package/src/utils/BlobFix.ts +0 -559
  159. package/src/utils/audioProcessing.ts +0 -205
  160. package/src/utils/concatenateBuffers.ts +0 -24
  161. package/src/utils/convertPCMToFloat32.ts +0 -170
  162. package/src/utils/encodingToBitDepth.ts +0 -18
  163. package/src/utils/getWavFileInfo.ts +0 -132
  164. package/src/utils/writeWavHeader.ts +0 -114
  165. package/src/workers/InlineFeaturesExtractor.web.tsx +0 -827
  166. package/src/workers/inlineAudioWebWorker.web.tsx +0 -156
@@ -1,1936 +0,0 @@
1
- // packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt
2
- package net.siteed.audiostream
3
-
4
- import java.nio.ByteBuffer
5
- import java.nio.ByteOrder
6
- import kotlin.math.*
7
- import android.util.Log
8
- import java.io.File
9
- import java.util.concurrent.atomic.AtomicLong
10
- import kotlin.system.measureTimeMillis
11
- import android.media.MediaExtractor
12
- import android.media.MediaFormat
13
- import android.media.MediaCodec
14
- import java.io.FileInputStream
15
- import java.io.RandomAccessFile
16
- import java.util.zip.CRC32
17
-
18
- data class DecodingConfig(
19
- val targetSampleRate: Int? = null, // Optional target sample rate
20
- val targetChannels: Int? = null, // Optional target number of channels
21
- val targetBitDepth: Int = 16, // Default to 16-bit PCM
22
- val normalizeAudio: Boolean = false // Whether to normalize audio levels
23
- )
24
-
25
- class AudioProcessor(private val filesDir: File) {
26
- companion object {
27
- const val DCT_SQRT_DIVISOR = 2.0
28
- private const val N_FFT = 1024
29
- private const val N_CHROMA = 12
30
-
31
- private val uniqueIdCounter = AtomicLong(0L) // Keep as companion object property to maintain during pause/resume cycles
32
-
33
- fun resetUniqueIdCounter() {
34
- uniqueIdCounter.set(0L)
35
- }
36
- }
37
-
38
- data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int, val durationMs: Long = 0)
39
-
40
- private var cumulativeMinAmplitude = Float.MAX_VALUE
41
- private var cumulativeMaxAmplitude = Float.NEGATIVE_INFINITY
42
-
43
- private fun loadAudioFile(filePath: String): AudioData? {
44
- try {
45
- val fileUri = filePath.removePrefix("file://")
46
- Log.d("AudioProcessor", "Processing WAV file: $fileUri")
47
-
48
- val file = File(fileUri).takeIf { it.exists() } ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
49
- ?: run {
50
- Log.e("AudioProcessor", "File not found: $fileUri")
51
- return null
52
- }
53
-
54
- val raf = RandomAccessFile(file, "r")
55
- val fileSize = raf.length()
56
-
57
- // Read RIFF header
58
- val riffHeader = ByteArray(4).apply { raf.readFully(this) }
59
- if (String(riffHeader) != "RIFF") {
60
- Log.e("AudioProcessor", "Invalid RIFF header")
61
- return null
62
- }
63
-
64
- // Read WAVE header
65
- val waveHeader = ByteArray(4).apply { raf.readFully(this) }
66
- if (String(waveHeader) != "WAVE") {
67
- Log.e("AudioProcessor", "Invalid WAVE header")
68
- return null
69
- }
70
-
71
- var fmtChunkFound = false
72
- var dataChunkFound = false
73
- var sampleRate = 0
74
- var channels = 0
75
- var bitDepth = 0
76
- var dataOffset = 0L
77
- var dataSize = 0L
78
-
79
- // Parse chunks
80
- while (raf.filePointer < fileSize - 8) {
81
- val chunkId = ByteArray(4).apply { raf.readFully(this) }.toString(Charsets.UTF_8)
82
- val chunkSizeBytes = ByteArray(4).apply { raf.readFully(this) }
83
- val chunkSize = ByteBuffer.wrap(chunkSizeBytes).order(ByteOrder.LITTLE_ENDIAN).int.toLong() and 0xFFFFFFFFL
84
-
85
- Log.d("AudioProcessor", "Found chunk: $chunkId ($chunkSize bytes)")
86
-
87
- when (chunkId) {
88
- "fmt " -> {
89
- if (chunkSize < 16) {
90
- Log.e("AudioProcessor", "Invalid fmt chunk size")
91
- return null
92
- }
93
-
94
- val formatData = ByteArray(16)
95
- raf.readFully(formatData)
96
- val formatBuffer = ByteBuffer.wrap(formatData).order(ByteOrder.LITTLE_ENDIAN)
97
-
98
- val audioFormat = formatBuffer.short // Skip audio format
99
- channels = formatBuffer.short.toInt() and 0xFFFF
100
- sampleRate = formatBuffer.int
101
- val byteRate = formatBuffer.int
102
- val blockAlign = formatBuffer.short
103
- bitDepth = formatBuffer.short.toInt() and 0xFFFF
104
-
105
- Log.d("AudioProcessor", "Raw format data: ${formatData.joinToString(", ")}")
106
- Log.d("AudioProcessor", "Format chunk: audioFormat=$audioFormat, channels=$channels, sampleRate=$sampleRate, bitDepth=$bitDepth, byteRate=$byteRate, blockAlign=$blockAlign")
107
-
108
- if (bitDepth !in listOf(8, 16, 32)) {
109
- Log.e("AudioProcessor", "Invalid bit depth: $bitDepth")
110
- return null
111
- }
112
-
113
- val remainingFmtBytes = chunkSize - 16
114
- if (remainingFmtBytes > 0) {
115
- raf.skipBytes(remainingFmtBytes.toInt())
116
- }
117
- fmtChunkFound = true
118
- }
119
- "data" -> {
120
- dataOffset = raf.filePointer
121
- dataSize = chunkSize
122
- dataChunkFound = true
123
- break
124
- }
125
- else -> {
126
- // Skip unknown chunks
127
- val skipBytes = chunkSize
128
- if (skipBytes > 0) {
129
- val actualSkip = minOf(skipBytes, fileSize - raf.filePointer)
130
- raf.seek(raf.filePointer + actualSkip)
131
- }
132
- }
133
- }
134
- }
135
-
136
- if (!fmtChunkFound || !dataChunkFound) {
137
- Log.e("AudioProcessor", "Missing essential chunks (fmt=$fmtChunkFound, data=$dataChunkFound)")
138
- return null
139
- }
140
-
141
- // Calculate actual data size if it seems wrong
142
- if (dataSize <= 0 || dataSize > fileSize - dataOffset) {
143
- dataSize = fileSize - dataOffset
144
- Log.d("AudioProcessor", "Adjusted data size to: $dataSize")
145
- }
146
-
147
- Log.d("AudioProcessor", "Reading PCM data: offset=$dataOffset, size=$dataSize")
148
-
149
- val wavData = ByteArray(dataSize.toInt())
150
- raf.seek(dataOffset)
151
- raf.readFully(wavData)
152
-
153
- // Calculate duration in ms
154
- // Each sample is bitsPerSample/8 bytes, and we have 'channels' samples per frame
155
- val bytesPerFrame = channels * (bitDepth / 8)
156
- val numFrames = wavData.size / bytesPerFrame
157
- val durationMs = (numFrames * 1000L) / sampleRate
158
-
159
- Log.d(Constants.TAG, "WAV duration calculation: size=${wavData.size}, bytesPerFrame=$bytesPerFrame, numFrames=$numFrames, sampleRate=$sampleRate, duration=${durationMs}ms")
160
-
161
- return AudioData(
162
- data = wavData,
163
- sampleRate = sampleRate,
164
- channels = channels,
165
- bitDepth = bitDepth,
166
- durationMs = durationMs
167
- )
168
- } catch (e: Exception) {
169
- Log.e(Constants.TAG, "Failed to load WAV file: ${e.message}")
170
- return null
171
- }
172
- }
173
-
174
- /**
175
- * Processes the audio data and extracts features.
176
- * @param data The audio data in bytes.
177
- * @param config The recording configuration.
178
- * @return AudioAnalysisData containing the extracted features.
179
- */
180
- fun processAudioData(data: ByteArray, config: RecordingConfig): AudioAnalysisData {
181
- if (data.isEmpty()) {
182
- Log.e("AudioProcessor", "Received empty audio data")
183
- return AudioAnalysisData(
184
- segmentDurationMs = config.segmentDurationMs,
185
- durationMs = 0,
186
- bitDepth = 16,
187
- numberOfChannels = config.channels,
188
- sampleRate = config.sampleRate,
189
- samples = 0,
190
- dataPoints = emptyList(),
191
- amplitudeRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
192
- rmsRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
193
- extractionTimeMs = 0f,
194
- )
195
- }
196
-
197
- val sampleRate = config.sampleRate.toFloat()
198
- val bitDepth = when (config.encoding) {
199
- "pcm_8bit" -> 8
200
- "pcm_16bit" -> 16
201
- "pcm_32bit" -> 32
202
- else -> throw IllegalArgumentException("Unsupported encoding: ${config.encoding}")
203
- }
204
- val channelData = convertToFloatArray(data, bitDepth)
205
- val featureOptions = config.features
206
-
207
- val totalSamples = channelData.size
208
- // Update samplesPerSegment calculation to use proper formula
209
- val samplesPerSegment = ((config.segmentDurationMs / 1000.0) * sampleRate).toInt()
210
- val totalPoints = ceil(totalSamples.toDouble() / samplesPerSegment).toInt()
211
-
212
- Log.d("AudioProcessor", "Extracting waveform totalSize=${data.size} with $totalSamples samples --> $totalPoints points")
213
- Log.d("AudioProcessor", "segmentDuration: ${config.segmentDurationMs}ms, samplesPerSegment: $samplesPerSegment")
214
-
215
- // Remove expectedPoints calculation since it used pointsPerSecond
216
- val samplesPerPoint = ceil(channelData.size / totalPoints.toDouble()).toInt()
217
- Log.d("AudioProcessor", "Extracting waveform with samplesPerPoints=$samplesPerPoint")
218
-
219
- val dataPoints = mutableListOf<DataPoint>()
220
- var minAmplitude = Float.MAX_VALUE
221
- var maxAmplitude = Float.NEGATIVE_INFINITY
222
- var minRms = Float.MAX_VALUE
223
- var maxRms = Float.NEGATIVE_INFINITY
224
- // Calculate total duration in milliseconds based on sample rate and total samples
225
- val durationMs = (totalSamples.toFloat() / sampleRate * 1000).toInt()
226
-
227
- // Measure the time taken for audio processing
228
- val extractionTimeMs = measureTimeMillis {
229
- for (i in 0 until totalPoints) {
230
- val start = i * samplesPerSegment
231
- val end = min(start + samplesPerSegment, totalSamples)
232
- val segmentData = channelData.sliceArray(start until end)
233
-
234
- var sumSquares = 0f
235
- var zeroCrossings = 0
236
- var prevValue = 0f
237
- var localMinAmplitude = Float.MAX_VALUE
238
- var localMaxAmplitude = Float.MIN_VALUE
239
-
240
- for (value in segmentData) {
241
- sumSquares += value * value
242
- if (prevValue != 0f && value * prevValue < 0) zeroCrossings += 1
243
- prevValue = value
244
-
245
- val absValue = abs(value)
246
- localMinAmplitude = min(localMinAmplitude, absValue)
247
- localMaxAmplitude = max(localMaxAmplitude, absValue)
248
- }
249
-
250
- val features = computeFeatures(
251
- segmentData = segmentData,
252
- sampleRate = sampleRate,
253
- sumSquares = sumSquares,
254
- zeroCrossings = zeroCrossings,
255
- segmentLength = segmentData.size,
256
- featureOptions = featureOptions,
257
- minAmplitude = localMinAmplitude,
258
- maxAmplitude = localMaxAmplitude
259
- )
260
- val rms = features.rms
261
- val silent = rms < 0.01
262
- val dB = 20 * log10(rms.toDouble()).toFloat()
263
- minAmplitude = min(minAmplitude, localMinAmplitude)
264
- maxAmplitude = max(maxAmplitude, localMaxAmplitude)
265
- minRms = min(minRms, rms)
266
- maxRms = max(maxRms, rms)
267
-
268
- val bytesPerSample = bitDepth / 8
269
- val startPosition = start * bytesPerSample * config.channels
270
- val endPosition = end * bytesPerSample * config.channels
271
-
272
- // Update cumulative amplitude range
273
- cumulativeMinAmplitude = min(cumulativeMinAmplitude, localMinAmplitude)
274
- cumulativeMaxAmplitude = max(cumulativeMaxAmplitude, localMaxAmplitude)
275
-
276
- val dataPoint = DataPoint(
277
- id = uniqueIdCounter.getAndIncrement(),
278
- amplitude = localMaxAmplitude, // Always use peak amplitude
279
- rms = rms, // Always include RMS
280
- dB = dB,
281
- silent = silent,
282
- features = features,
283
- speech = SpeechFeatures(isActive = !silent),
284
- startTime = startPosition / (sampleRate * bytesPerSample * config.channels),
285
- endTime = endPosition / (sampleRate * bytesPerSample * config.channels),
286
- startPosition = startPosition,
287
- endPosition = endPosition,
288
- samples = segmentData.size
289
- )
290
-
291
- dataPoints.add(dataPoint)
292
- }
293
- }
294
-
295
- return AudioAnalysisData(
296
- segmentDurationMs = config.segmentDurationMs,
297
- durationMs = durationMs,
298
- bitDepth = bitDepth,
299
- numberOfChannels = config.channels,
300
- sampleRate = config.sampleRate, // Use config.sampleRate instead of sampleRate
301
- samples = totalSamples, // Use totalSamples instead of samplesInRange
302
- dataPoints = dataPoints,
303
- amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
304
- rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
305
- extractionTimeMs = extractionTimeMs.toFloat()
306
- )
307
- }
308
-
309
- fun resetCumulativeAmplitudeRange() {
310
- cumulativeMinAmplitude = Float.MAX_VALUE
311
- cumulativeMaxAmplitude = Float.MIN_VALUE
312
- }
313
-
314
- /**
315
- * Converts the audio data to a float array.
316
- * @param data The audio data in bytes.
317
- * @param bitDepth The bit depth of the audio data.
318
- * @return The converted float array.
319
- */
320
- private fun convertToFloatArray(data: ByteArray, bitDepth: Int): FloatArray {
321
- return when (bitDepth) {
322
- 16 -> {
323
- val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
324
- val array = ShortArray(buffer.remaining())
325
- buffer.get(array)
326
- array.map { it / 32768.0f }.toFloatArray()
327
- }
328
- 8 -> data.map { (it.toInt() - 128) / 128.0f }.toFloatArray()
329
- 32 -> {
330
- val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
331
- val array = IntArray(buffer.remaining())
332
- buffer.get(array)
333
- array.map { it / Int.MAX_VALUE.toFloat() }.toFloatArray()
334
- }
335
- else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
336
- }
337
- }
338
-
339
- /**
340
- * Computes the features of the audio data.
341
- * @param segmentData The segment data.
342
- * @param sampleRate The sample rate of the audio data.
343
- * @param minAmplitude The minimum amplitude.
344
- * @param maxAmplitude The maximum amplitude.
345
- * @param sumSquares The sum of squares.
346
- * @param zeroCrossings The zero crossings.
347
- * @param segmentLength The length of the segment.
348
- * @param featureOptions The feature options to compute.
349
- * @return The computed features.
350
- */
351
- private fun computeFeatures(
352
- segmentData: FloatArray,
353
- sampleRate: Float,
354
- minAmplitude: Float,
355
- maxAmplitude: Float,
356
- sumSquares: Float,
357
- zeroCrossings: Int,
358
- segmentLength: Int,
359
- featureOptions: Map<String, Boolean>
360
- ): Features {
361
- val rms = sqrt(sumSquares / segmentLength)
362
- val energy = if (featureOptions["energy"] == true) sumSquares else 0f
363
- val zcr = if (featureOptions["zcr"] == true) zeroCrossings / segmentLength.toFloat() else 0f
364
-
365
- val mfcc = try {
366
- if (featureOptions["mfcc"] == true) computeMFCC(segmentData, sampleRate) else emptyList()
367
- } catch (e: Exception) {
368
- Log.e("AudioProcessor", "Failed to extract MFCC: ${e.message}", e)
369
- emptyList()
370
- }
371
-
372
- val melSpectrogram = try {
373
- if (featureOptions["melSpectrogram"] == true) computeMelSpectrogram(segmentData, sampleRate) else emptyList()
374
- } catch (e: Exception) {
375
- Log.e("AudioProcessor", "Failed to compute mel spectrogram: ${e.message}", e)
376
- emptyList()
377
- }
378
-
379
- val chroma = try {
380
- if (featureOptions["chromagram"] == true) computeChroma(segmentData, sampleRate) else emptyList()
381
- } catch (e: Exception) {
382
- Log.e("AudioProcessor", "Failed to compute chroma: ${e.message}", e)
383
- emptyList()
384
- }
385
-
386
- val spectralFeatures = if (featureOptions["spectralCentroid"] == true ||
387
- featureOptions["spectralFlatness"] == true ||
388
- featureOptions["spectralRollOff"] == true ||
389
- featureOptions["spectralBandwidth"] == true) {
390
- extractSpectralFeatures(segmentData, sampleRate)
391
- } else {
392
- SpectralFeatures()
393
- }
394
-
395
- val tempo = try {
396
- if (featureOptions["tempo"] == true) extractTempo(segmentData, sampleRate) else 0f
397
- } catch (e: Exception) {
398
- Log.e("AudioProcessor", "Failed to extract tempo: ${e.message}", e)
399
- 0f
400
- }
401
-
402
- val hnr = try {
403
- if (featureOptions["hnr"] == true) extractHNR(segmentData) else 0f
404
- } catch (e: Exception) {
405
- Log.e("AudioProcessor", "Failed to extract HNR: ${e.message}", e)
406
- 0f
407
- }
408
-
409
- val spectralContrast = try {
410
- if (featureOptions["spectralContrast"] == true) computeSpectralContrast(segmentData, sampleRate) else emptyList()
411
- } catch (e: Exception) {
412
- Log.e("AudioProcessor", "Failed to compute spectral contrast: ${e.message}", e)
413
- emptyList()
414
- }
415
-
416
- val tonnetz = try {
417
- if (featureOptions["tonnetz"] == true) computeTonnetz(segmentData, sampleRate) else emptyList()
418
- } catch (e: Exception) {
419
- Log.e("AudioProcessor", "Failed to compute tonnetz: ${e.message}", e)
420
- emptyList()
421
- }
422
-
423
- val pitch = if (featureOptions["pitch"] == true) estimatePitch(segmentData, sampleRate) else 0.0f
424
-
425
- val crc32Value = if (featureOptions["crc32"] == true) {
426
- val byteBuffer = ByteBuffer.allocate(segmentData.size * 4)
427
- .order(ByteOrder.LITTLE_ENDIAN)
428
- segmentData.forEach { value ->
429
- byteBuffer.putFloat(value)
430
- }
431
-
432
- val crc32 = CRC32()
433
- crc32.update(byteBuffer.array())
434
- crc32.value
435
- } else null
436
-
437
- return Features(
438
- energy = energy,
439
- mfcc = mfcc,
440
- rms = rms,
441
- minAmplitude = minAmplitude,
442
- maxAmplitude = maxAmplitude,
443
- zcr = zcr,
444
- spectralCentroid = spectralFeatures.centroid,
445
- spectralFlatness = spectralFeatures.flatness,
446
- spectralRollOff = spectralFeatures.rollOff,
447
- spectralBandwidth = spectralFeatures.bandwidth,
448
- tempo = tempo,
449
- hnr = hnr,
450
- melSpectrogram = melSpectrogram,
451
- chromagram = chroma,
452
- spectralContrast = spectralContrast,
453
- tonnetz = tonnetz,
454
- pitch = pitch,
455
- crc32 = crc32Value
456
- )
457
- }
458
-
459
- private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
460
- val hopLength = 512
461
- val frameLength = 2048
462
-
463
- // Compute onset strength signal using spectral flux
464
- val onsetEnvelope = mutableListOf<Float>()
465
- var previousSpectrum = FloatArray(frameLength / 2)
466
-
467
- // Process frames with spectral flux
468
- for (i in 0 until segmentData.size - frameLength step hopLength) {
469
- val frame = segmentData.slice(i until minOf(i + frameLength, segmentData.size)).toFloatArray()
470
- val fft = FFT(frameLength)
471
- val fftData = frame.copyOf(frameLength)
472
- fft.realForward(fftData)
473
-
474
- // Compute magnitude spectrum
475
- val magnitudes = FloatArray(frameLength / 2)
476
- for (j in magnitudes.indices) {
477
- val re = fftData[2 * j]
478
- val im = if (2 * j + 1 < fftData.size) fftData[2 * j + 1] else 0f
479
- magnitudes[j] = sqrt(re * re + im * im)
480
- }
481
-
482
- // Calculate spectral flux (sum of positive differences)
483
- var flux = 0f
484
- for (j in magnitudes.indices) {
485
- flux += maxOf(magnitudes[j] - previousSpectrum[j], 0f)
486
- }
487
- onsetEnvelope.add(flux)
488
- previousSpectrum = magnitudes
489
- }
490
-
491
- // Find peaks in onset envelope
492
- val peaks = mutableListOf<Int>()
493
- for (i in 1 until onsetEnvelope.size - 1) {
494
- if (onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1]) {
495
- peaks.add(i)
496
- }
497
- }
498
-
499
- // Calculate tempo from peak intervals
500
- return if (peaks.size > 1) {
501
- val intervals = peaks.zipWithNext { a, b -> b - a }
502
- val averageInterval = intervals.average().toFloat()
503
- 60f * sampleRate / (hopLength * averageInterval)
504
- } else {
505
- 120f // Default tempo if no clear peaks found
506
- }
507
- }
508
-
509
- private fun extractSpectralFeatures(samples: FloatArray, sampleRate: Float): SpectralFeatures {
510
- // FFT requires a fixed-size buffer (N_FFT). If our input is larger,
511
- // we'll analyze just the first N_FFT samples to prevent buffer overflow.
512
- // This is a common practice in audio analysis where we process chunks
513
- // of consistent size rather than variable-length segments.
514
- val windowed = if (samples.size > N_FFT) {
515
- // If samples are larger than FFT size, take the first N_FFT samples
516
- applyHannWindow(samples.copyOf(N_FFT))
517
- } else {
518
- applyHannWindow(samples)
519
- }
520
-
521
- // Create padded array for FFT, ensuring we don't exceed N_FFT size
522
- // Zero padding is automatic since FloatArray initializes with zeros
523
- val paddedSamples = FloatArray(N_FFT).also { padded ->
524
- windowed.copyInto(padded, 0, 0, minOf(windowed.size, N_FFT))
525
- }
526
-
527
- // Perform FFT
528
- val fft = FFT(N_FFT)
529
- fft.realForward(paddedSamples)
530
-
531
- // Calculate magnitude spectrum (only need first half due to symmetry)
532
- // Add 1 to include both DC (0 Hz) and Nyquist frequency components
533
- val magnitudeSpectrum = FloatArray(N_FFT / 2 + 1)
534
- for (i in 0 until N_FFT / 2) { // Since we're only going up to N_FFT/2, the check is unnecessary
535
- val re = paddedSamples[2 * i]
536
- val im = paddedSamples[2 * i + 1] // This will always be within bounds
537
- magnitudeSpectrum[i] = sqrt(re * re + im * im)
538
- }
539
- // Handle Nyquist frequency component separately
540
- magnitudeSpectrum[N_FFT / 2] = abs(paddedSamples[1])
541
-
542
- // Compute power spectrum for spectral flatness
543
- val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
544
-
545
- // Compute spectral features
546
- val centroid = computeSpectralCentroid(magnitudeSpectrum, sampleRate)
547
- val flatness = computeSpectralFlatness(powerSpectrum)
548
- val rollOff = computeSpectralRollOff(magnitudeSpectrum, sampleRate)
549
- val bandwidth = computeSpectralBandwidth(magnitudeSpectrum, sampleRate, centroid)
550
-
551
- return SpectralFeatures(
552
- centroid = centroid,
553
- flatness = flatness,
554
- rollOff = rollOff,
555
- bandwidth = bandwidth
556
- )
557
- }
558
-
559
- private fun computeSpectralCentroid(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
560
- val sum = magnitudeSpectrum.sum()
561
- if (sum == 0f) return 0f
562
-
563
- val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
564
- index * (sampleRate / N_FFT) * value
565
- }.sum()
566
-
567
- return weightedSum / sum
568
- }
569
-
570
- private fun computeSpectralFlatness(powerSpectrum: FloatArray): Float {
571
- // Calculate geometric mean using log-space to avoid numerical issues
572
- var sumLogValues = 0.0f
573
- for (value in powerSpectrum) {
574
- sumLogValues += ln(value + 1e-10f) // Add small epsilon to avoid log(0)
575
- }
576
- val geometricMean = exp(sumLogValues / powerSpectrum.size)
577
-
578
- // Calculate arithmetic mean
579
- val arithmeticMean = powerSpectrum.sum() / powerSpectrum.size
580
-
581
- return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
582
- }
583
-
584
- private fun computeSpectralRollOff(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
585
- val totalEnergy = magnitudeSpectrum.sum()
586
- var cumulativeEnergy = 0f
587
- val rollOffThreshold = totalEnergy * 0.85f
588
-
589
- for ((index, value) in magnitudeSpectrum.withIndex()) {
590
- cumulativeEnergy += value
591
- if (cumulativeEnergy >= rollOffThreshold) {
592
- return index * (sampleRate / N_FFT)
593
- }
594
- }
595
-
596
- return 0f
597
- }
598
-
599
- private fun computeSpectralBandwidth(
600
- magnitudeSpectrum: FloatArray,
601
- sampleRate: Float,
602
- centroid: Float
603
- ): Float {
604
- val sum = magnitudeSpectrum.sum()
605
- if (sum == 0f) return 0f
606
-
607
- // Match iOS frequency calculation
608
- val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
609
- val freq = index * sampleRate / (2 * magnitudeSpectrum.size)
610
- value * (freq - centroid).pow(2)
611
- }.sum()
612
-
613
- return sqrt(weightedSum / sum)
614
- }
615
-
616
- private data class SpectralFeatures(
617
- val centroid: Float = 0f,
618
- val flatness: Float = 0f,
619
- val rollOff: Float = 0f,
620
- val bandwidth: Float = 0f
621
- )
622
-
623
- /**
624
- * Resets the segment data.
625
- * @param sumSquaresUpdater Function to reset sum of squares.
626
- * @param zeroCrossingsUpdater Function to reset zero crossings.
627
- * @param localMinAmplitudeUpdater Function to reset local min amplitude.
628
- * @param localMaxAmplitudeUpdater Function to reset local max amplitude.
629
- * @param segmentData The segment data list to reset.
630
- */
631
- private fun resetSegmentData(
632
- sumSquaresUpdater: (Float) -> Unit,
633
- zeroCrossingsUpdater: (Int) -> Unit,
634
- localMinAmplitudeUpdater: (Float) -> Unit,
635
- localMaxAmplitudeUpdater: (Float) -> Unit,
636
- segmentData: MutableList<Float>
637
- ) {
638
- sumSquaresUpdater(0f)
639
- zeroCrossingsUpdater(0)
640
- localMinAmplitudeUpdater(Float.MAX_VALUE)
641
- localMaxAmplitudeUpdater(Float.MIN_VALUE)
642
- segmentData.clear()
643
- }
644
-
645
- /**
646
- * Computes the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
647
- */
648
- private fun computeMFCC(samples: FloatArray, sampleRate: Float): List<Float> {
649
- val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
650
- val melFilters = computeMelFilterbank(
651
- numFilters = 26,
652
- powerSpectrumSize = powerSpectrum.size,
653
- sampleRate = sampleRate
654
- )
655
-
656
- if (melFilters.any { it.size != powerSpectrum.size }) {
657
- Log.e("AudioProcessor", "Mel filter size (${melFilters[0].size}) does not match power spectrum size (${powerSpectrum.size})")
658
- return emptyList()
659
- }
660
-
661
- val melEnergies = FloatArray(26) { i ->
662
- var energy = 0f
663
- for (j in powerSpectrum.indices) {
664
- energy += powerSpectrum[j] * melFilters[i][j]
665
- }
666
- ln(maxOf(energy, 1e-10f))
667
- }
668
-
669
- val mfcc = FloatArray(13) { i ->
670
- var sum = 0f
671
- for (j in melEnergies.indices) {
672
- sum += melEnergies[j] * cos(PI * i * (2 * j + 1) / (2 * 26)).toFloat()
673
- }
674
- sum * sqrt(2f / 26)
675
- }
676
-
677
- return mfcc.toList()
678
- }
679
-
680
- /**
681
- * Computes the Mel filter bank.
682
- * @param numFilters The number of Mel filters.
683
- * @param powerSpectrumSize The size of the power spectrum.
684
- * @param sampleRate The sample rate of the audio data.
685
- * @return A list of Mel filters.
686
- */
687
- private fun computeMelFilterbank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): Array<FloatArray> {
688
- val fMin = 0f
689
- val fMax = sampleRate / 2
690
-
691
- // Convert Hz to Mel
692
- val melMin = hzToMel(fMin)
693
- val melMax = hzToMel(fMax)
694
-
695
- // Create equally spaced points in Mel scale
696
- val melPoints = FloatArray(numFilters + 2)
697
- val melStep = (melMax - melMin) / (numFilters + 1)
698
- for (i in melPoints.indices) {
699
- melPoints[i] = melMin + i * melStep
700
- }
701
-
702
- // Convert back to Hz
703
- val hzPoints = melPoints.map { melToHz(it) }
704
-
705
- // Convert to FFT bin numbers, clamping to valid range
706
- val bins = hzPoints.map { minOf((it * powerSpectrumSize / sampleRate).roundToInt(), powerSpectrumSize - 1) }.toList()
707
-
708
- // Create the filterbank matrix with size matching powerSpectrumSize
709
- val filterbank = Array(numFilters) { FloatArray(powerSpectrumSize) { 0f } }
710
-
711
- // Ensure safe access to bins by limiting the loop and checking boundaries
712
- for (i in 0 until numFilters) {
713
- if (i + 2 < bins.size) { // Check to prevent out-of-bounds access
714
- val startBin = bins[i]
715
- val centerBin = bins[i + 1]
716
- val endBin = bins[i + 2]
717
-
718
- // Left slope (ascending triangle)
719
- if (centerBin > startBin) {
720
- for (j in startBin until centerBin) {
721
- filterbank[i][j] = (j - startBin).toFloat() / (centerBin - startBin).toFloat()
722
- }
723
- }
724
- // Right slope (descending triangle)
725
- if (endBin > centerBin) {
726
- for (j in centerBin until endBin) {
727
- filterbank[i][j] = (endBin - j).toFloat() / (endBin - centerBin).toFloat()
728
- }
729
- }
730
- }
731
- }
732
-
733
- return filterbank
734
- }
735
-
736
- /**
737
- * Computes the Discrete Cosine Transform (DCT) of the log energies.
738
- * @param logEnergies The log energies.
739
- * @param numCoefficients The number of coefficients to compute.
740
- * @return A list of MFCC coefficients.
741
- */
742
- private fun computeDCT(logEnergies: List<Float>, numCoefficients: Int): List<Float> {
743
- val n = logEnergies.size
744
- val dct = FloatArray(numCoefficients)
745
-
746
- for (i in 0 until numCoefficients) {
747
- var sum = 0.0
748
- for (j in logEnergies.indices) {
749
- sum += logEnergies[j] * cos(PI * i * (j + 0.5) / n)
750
- }
751
- dct[i] = (sum / sqrt(DCT_SQRT_DIVISOR * n)).toFloat()
752
- }
753
-
754
- return dct.toList()
755
- }
756
-
757
- /**
758
- * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
759
- * @param segmentData The segment data.
760
- * @return The HNR.
761
- */
762
- private fun extractHNR(segmentData: FloatArray): Float {
763
- val frameSize = segmentData.size
764
- val autocorrelation = FloatArray(frameSize)
765
-
766
- // Compute the autocorrelation of the segment data
767
- for (i in segmentData.indices) {
768
- var sum = 0f
769
- for (j in 0 until frameSize - i) {
770
- sum += segmentData[j] * segmentData[j + i]
771
- }
772
- autocorrelation[i] = sum
773
- }
774
-
775
- // Find peaks with minimum prominence
776
- val maxAutocorrelation = autocorrelation.maxOrNull() ?: 0f
777
- val peaks = findPeaks(autocorrelation, minProminence = 0.1f * maxAutocorrelation)
778
-
779
- if (peaks.isNotEmpty()) {
780
- val firstPeakIndex = peaks.firstOrNull { it > 0 } ?: 0
781
- val harmonicEnergy = autocorrelation[firstPeakIndex]
782
- val noiseEnergy = autocorrelation[0] - harmonicEnergy
783
- if (noiseEnergy > 0) {
784
- return 10 * log10(harmonicEnergy / noiseEnergy)
785
- }
786
- }
787
-
788
- return 0f
789
- }
790
-
791
- private fun findPeaks(data: FloatArray, minProminence: Float): List<Int> {
792
- val peaks = mutableListOf<Int>()
793
- for (i in 1 until data.size - 1) {
794
- if (data[i] > data[i - 1] && data[i] > data[i + 1]) {
795
- val prominence = data[i] - maxOf(data[i - 1], data[i + 1])
796
- if (prominence >= minProminence) {
797
- peaks.add(i)
798
- }
799
- }
800
- }
801
- return peaks
802
- }
803
-
804
- fun loadAudioFromAnyFormat(fileUri: String, decodingConfig: DecodingConfig? = null): AudioData? {
805
- val cleanUri = fileUri.removePrefix("file://")
806
- val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
807
- ?: run {
808
- Log.e("AudioProcessor", "File not found in any location: $cleanUri")
809
- return null
810
- }
811
-
812
- // First try MediaExtractor
813
- val extractor = MediaExtractor()
814
- try {
815
- Log.d("AudioProcessor", "Attempting MediaExtractor with path: ${file.absolutePath}")
816
- extractor.setDataSource(file.absolutePath)
817
-
818
- // Find the first audio track
819
- val audioTrackIndex = (0 until extractor.trackCount)
820
- .find { extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true }
821
-
822
- if (audioTrackIndex != null) {
823
- val format = extractor.getTrackFormat(audioTrackIndex)
824
- extractor.selectTrack(audioTrackIndex)
825
-
826
- // Get original audio properties
827
- val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
828
- val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
829
- val totalDurationUs = try {
830
- format.getLong(MediaFormat.KEY_DURATION)
831
- } catch (e: Exception) {
832
- (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
833
- }
834
- Log.d("AudioProcessor", "Raw duration from format: ${totalDurationUs}us")
835
-
836
- val totalDurationMs = totalDurationUs / 1000
837
- Log.d("AudioProcessor", "Final duration: ${totalDurationMs}ms")
838
-
839
- // Process using MediaExtractor
840
- val pcmData = decodeAudioToPCM(extractor, format)
841
- val processedData = if (decodingConfig != null) {
842
- processAudio(
843
- pcmData,
844
- originalSampleRate,
845
- decodingConfig.targetSampleRate,
846
- originalChannels,
847
- decodingConfig.targetChannels,
848
- decodingConfig.normalizeAudio
849
- )
850
- } else {
851
- pcmData
852
- }
853
-
854
- return AudioData(
855
- data = processedData,
856
- sampleRate = decodingConfig?.targetSampleRate ?: originalSampleRate,
857
- bitDepth = decodingConfig?.targetBitDepth ?: 16,
858
- channels = decodingConfig?.targetChannels ?: originalChannels,
859
- durationMs = totalDurationMs // Pass through the duration
860
- )
861
- }
862
- } catch (e: Exception) {
863
- Log.d("AudioProcessor", "MediaExtractor failed, attempting WAV parser: ${e.message}")
864
- } finally {
865
- extractor.release()
866
- }
867
-
868
- // If MediaExtractor failed and file is WAV, try WAV parser
869
- if (file.name.lowercase().endsWith(".wav")) {
870
- Log.d("AudioProcessor", "Falling back to WAV parser")
871
- return loadAudioFile(file.absolutePath)?.let { wavData ->
872
- if (decodingConfig != null) {
873
- val processedData = processAudio(
874
- wavData.data,
875
- wavData.sampleRate,
876
- decodingConfig.targetSampleRate,
877
- wavData.channels,
878
- decodingConfig.targetChannels,
879
- decodingConfig.normalizeAudio
880
- )
881
- AudioData(
882
- data = processedData,
883
- sampleRate = decodingConfig.targetSampleRate ?: wavData.sampleRate,
884
- bitDepth = decodingConfig.targetBitDepth,
885
- channels = decodingConfig.targetChannels ?: wavData.channels,
886
- durationMs = wavData.durationMs // Pass through the duration
887
- )
888
- } else {
889
- wavData
890
- }
891
- }
892
- }
893
-
894
- Log.e("AudioProcessor", "Failed to process audio file with both MediaExtractor and WAV parser")
895
- return null
896
- }
897
-
898
- private fun decodeAudioToPCM(extractor: MediaExtractor, format: MediaFormat): ByteArray {
899
- val decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
900
- decoder.configure(format, null, null, 0)
901
- decoder.start()
902
-
903
- val info = MediaCodec.BufferInfo()
904
- val pcmData = mutableListOf<Byte>()
905
-
906
- var isEOS = false
907
- while (!isEOS) {
908
- val inputBufferId = decoder.dequeueInputBuffer(10000)
909
- if (inputBufferId >= 0) {
910
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
911
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
912
-
913
- if (sampleSize < 0) {
914
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
915
- isEOS = true
916
- } else {
917
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
918
- extractor.advance()
919
- }
920
- }
921
-
922
- val outputBufferId = decoder.dequeueOutputBuffer(info, 10000)
923
- if (outputBufferId >= 0) {
924
- val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
925
- val chunk = ByteArray(info.size)
926
- outputBuffer.get(chunk)
927
- pcmData.addAll(chunk.toList())
928
- decoder.releaseOutputBuffer(outputBufferId, false)
929
- }
930
- }
931
-
932
- decoder.stop()
933
- decoder.release()
934
-
935
- return pcmData.toByteArray()
936
- }
937
-
938
- private fun resampleAudio(
939
- pcmData: ByteArray,
940
- originalSampleRate: Int,
941
- targetSampleRate: Int,
942
- originalChannels: Int
943
- ): ByteArray {
944
- // Convert byte array to short array (16-bit samples)
945
- val shortArray = ShortArray(pcmData.size / 2)
946
- ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shortArray)
947
-
948
- // Convert to mono if needed
949
- val monoShortArray = if (originalChannels > 1) {
950
- convertToMono(shortArray, originalChannels)
951
- } else {
952
- shortArray
953
- }
954
-
955
- // Resample
956
- val resampleRatio = targetSampleRate.toDouble() / originalSampleRate
957
- val newLength = (monoShortArray.size * resampleRatio).toInt()
958
- val resampledArray = ShortArray(newLength)
959
-
960
- for (i in resampledArray.indices) {
961
- val originalIndex = (i / resampleRatio).toInt()
962
- val nextIndex = minOf(originalIndex + 1, monoShortArray.size - 1)
963
- val fraction = (i / resampleRatio) - originalIndex
964
-
965
- // Linear interpolation
966
- val sample = linearInterpolate(
967
- monoShortArray[originalIndex].toDouble(),
968
- monoShortArray[nextIndex].toDouble(),
969
- fraction
970
- ).toInt().toShort()
971
-
972
- resampledArray[i] = sample
973
- }
974
-
975
- // Convert back to byte array
976
- val resultBuffer = ByteBuffer.allocate(resampledArray.size * 2)
977
- resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
978
- resultBuffer.asShortBuffer().put(resampledArray)
979
- return resultBuffer.array()
980
- }
981
-
982
- private fun convertToMono(stereoData: ShortArray, channels: Int): ShortArray {
983
- val monoLength = stereoData.size / channels
984
- val monoData = ShortArray(monoLength)
985
-
986
- for (i in 0 until monoLength) {
987
- var sum = 0
988
- for (ch in 0 until channels) {
989
- sum += stereoData[i * channels + ch]
990
- }
991
- monoData[i] = (sum / channels).toShort()
992
- }
993
-
994
- return monoData
995
- }
996
-
997
- private fun linearInterpolate(a: Double, b: Double, fraction: Double): Double {
998
- return a + fraction * (b - a)
999
- }
1000
-
1001
- private fun processAudio(
1002
- pcmData: ByteArray,
1003
- originalSampleRate: Int,
1004
- targetSampleRate: Int?,
1005
- originalChannels: Int,
1006
- targetChannels: Int?,
1007
- normalize: Boolean
1008
- ): ByteArray {
1009
- var processedData = pcmData
1010
-
1011
- // Only resample if target sample rate is explicitly specified and different
1012
- if (targetSampleRate != null && originalSampleRate != targetSampleRate) {
1013
- processedData = resampleAudio(processedData, originalSampleRate, targetSampleRate, originalChannels)
1014
- }
1015
-
1016
- // Only convert channels if target channels is explicitly specified and different
1017
- if (targetChannels != null && originalChannels != targetChannels) {
1018
- processedData = convertChannels(processedData, originalChannels, targetChannels)
1019
- }
1020
-
1021
- // Only normalize if explicitly requested
1022
- if (normalize) {
1023
- processedData = normalizeAudio(processedData)
1024
- }
1025
-
1026
- return processedData
1027
- }
1028
-
1029
- private fun normalizeAudio(pcmData: ByteArray): ByteArray {
1030
- val shorts = ShortArray(pcmData.size / 2)
1031
- ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1032
-
1033
- // Find maximum amplitude
1034
- var maxAmplitude = 0
1035
- for (sample in shorts) {
1036
- maxAmplitude = maxOf(maxAmplitude, abs(sample.toInt()))
1037
- }
1038
-
1039
- // Normalize if we found a non-zero maximum
1040
- if (maxAmplitude > 0) {
1041
- val normalizationFactor = Short.MAX_VALUE.toFloat() / maxAmplitude
1042
- for (i in shorts.indices) {
1043
- shorts[i] = (shorts[i] * normalizationFactor).toInt().toShort()
1044
- }
1045
- }
1046
-
1047
- // Convert back to bytes
1048
- val resultBuffer = ByteBuffer.allocate(shorts.size * 2)
1049
- resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
1050
- resultBuffer.asShortBuffer().put(shorts)
1051
- return resultBuffer.array()
1052
- }
1053
-
1054
- private fun convertChannels(pcmData: ByteArray, originalChannels: Int, targetChannels: Int): ByteArray {
1055
- val result = ByteArray(pcmData.size * targetChannels / originalChannels)
1056
- val inputBuffer = ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
1057
- val outputBuffer = ByteBuffer.wrap(result).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
1058
-
1059
- for (i in result.indices) {
1060
- val channelData = ShortArray(targetChannels)
1061
- for (j in 0 until targetChannels) {
1062
- channelData[j] = inputBuffer.get()
1063
- }
1064
- outputBuffer.put(channelData)
1065
- }
1066
-
1067
- return result
1068
- }
1069
-
1070
- private fun debugWavHeader(file: File) {
1071
- try {
1072
- val bytes = ByteArray(44) // Standard WAV header size
1073
- RandomAccessFile(file, "r").use { raf ->
1074
- raf.readFully(bytes)
1075
- }
1076
-
1077
- Log.d("AudioProcessor", "WAV Header Bytes: ${bytes.joinToString(", ") { String.format("%02X", it) }}")
1078
- Log.d("AudioProcessor", "ASCII: ${bytes.map { it.toInt().toChar() }.joinToString("")}")
1079
-
1080
- val buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)
1081
- Log.d("AudioProcessor", """
1082
- RIFF header: ${String(bytes, 0, 4)}
1083
- File size: ${buffer.getInt(4)}
1084
- WAVE header: ${String(bytes, 8, 4)}
1085
- fmt header: ${String(bytes, 12, 4)}
1086
- Chunk size: ${buffer.getInt(16)}
1087
- Audio format: ${buffer.getShort(20)}
1088
- Channels: ${buffer.getShort(22)}
1089
- Sample rate: ${buffer.getInt(24)}
1090
- Byte rate: ${buffer.getInt(28)}
1091
- Block align: ${buffer.getShort(32)}
1092
- Bits per sample: ${buffer.getShort(34)}
1093
- """.trimIndent())
1094
- } catch (e: Exception) {
1095
- Log.e("AudioProcessor", "Failed to debug WAV header: ${e.message}")
1096
- }
1097
- }
1098
-
1099
- fun generatePreview(
1100
- audioData: AudioData,
1101
- numberOfPoints: Int,
1102
- startTimeMs: Long? = null,
1103
- endTimeMs: Long? = null,
1104
- config: RecordingConfig
1105
- ): AudioAnalysisData {
1106
- val totalDurationMs = audioData.durationMs
1107
-
1108
- Log.d(Constants.TAG, "Total audio duration: ${totalDurationMs}ms")
1109
-
1110
- // Validate time range
1111
- if (startTimeMs != null) {
1112
- require(startTimeMs >= 0) { "startTime must be non-negative, got: $startTimeMs" }
1113
- require(startTimeMs <= totalDurationMs) { "startTime ($startTimeMs) is beyond audio duration ($totalDurationMs)" }
1114
- }
1115
-
1116
- if (endTimeMs != null) {
1117
- require(endTimeMs >= 0) { "endTime must be non-negative, got: $endTimeMs" }
1118
- if (endTimeMs > totalDurationMs) {
1119
- Log.w(Constants.TAG, "endTime ($endTimeMs) is beyond audio duration ($totalDurationMs), clamping to duration")
1120
- }
1121
- if (startTimeMs != null) {
1122
- require(startTimeMs < endTimeMs) { "startTime ($startTimeMs) must be less than endTime ($endTimeMs)" }
1123
- }
1124
- }
1125
-
1126
- // Calculate effective range
1127
- val effectiveStartMs = startTimeMs ?: 0L
1128
- val effectiveEndMs = (endTimeMs ?: totalDurationMs).coerceAtMost(totalDurationMs)
1129
- val durationMs = effectiveEndMs - effectiveStartMs
1130
-
1131
- Log.d(Constants.TAG, "Preview range: ${effectiveStartMs}ms to ${effectiveEndMs}ms (${durationMs}ms)")
1132
-
1133
- // Calculate sample range
1134
- val startSampleIndex = ((effectiveStartMs * audioData.sampleRate) / 1000).toInt()
1135
- val endSampleIndex = ((effectiveEndMs * audioData.sampleRate) / 1000).toInt().coerceAtMost(audioData.data.size)
1136
- val samplesInRange = endSampleIndex - startSampleIndex
1137
-
1138
- if (samplesInRange <= 0) {
1139
- throw IllegalArgumentException("Invalid sample range: contains no samples")
1140
- }
1141
-
1142
- val samplesPerPoint = (samplesInRange / numberOfPoints).coerceAtLeast(1)
1143
- val pointsPerSecond = numberOfPoints.toDouble() / (durationMs.toDouble() / 1000.0)
1144
-
1145
- val dataPoints = mutableListOf<DataPoint>()
1146
- var minAmplitude = Float.MAX_VALUE
1147
- var maxAmplitude = Float.MIN_VALUE
1148
- var minRms = Float.MAX_VALUE // Add minRms
1149
- var maxRms = Float.MIN_VALUE // Add maxRms
1150
-
1151
- val extractionTimeMs = measureTimeMillis {
1152
- for (i in 0 until numberOfPoints) {
1153
- val pointStartSample = startSampleIndex + (i * samplesPerPoint)
1154
- val pointEndSample = minOf(startSampleIndex + ((i + 1) * samplesPerPoint), endSampleIndex)
1155
-
1156
- if (pointStartSample >= pointEndSample) break
1157
-
1158
- try {
1159
- val segmentBytes = audioData.data.sliceArray(pointStartSample until pointEndSample)
1160
-
1161
- // Convert PCM bytes to float samples with proper bit depth handling
1162
- val segmentData = when (audioData.bitDepth) {
1163
- 16 -> convert16BitPcmToFloat(segmentBytes)
1164
- 32 -> convert32BitPcmToFloat(segmentBytes)
1165
- else -> convert8BitPcmToFloat(segmentBytes)
1166
- }
1167
-
1168
- // Calculate time points based on actual sample rate
1169
- val startTimePoint = ((pointStartSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
1170
- val endTimePoint = ((pointEndSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
1171
-
1172
- val rms = sqrt(segmentData.map { it * it }.average().toFloat())
1173
- val amplitude = segmentData.maxOf { abs(it) } // Always use peak amplitude
1174
-
1175
- minAmplitude = minOf(minAmplitude, amplitude)
1176
- maxAmplitude = maxOf(maxAmplitude, amplitude)
1177
- minRms = minOf(minRms, rms)
1178
- maxRms = maxOf(maxRms, rms)
1179
-
1180
- dataPoints.add(DataPoint(
1181
- id = i.toLong(),
1182
- amplitude = amplitude, // Peak amplitude
1183
- rms = rms, // RMS value
1184
- dB = 20 * log10(amplitude.toDouble()).toFloat(),
1185
- silent = amplitude < 0.01,
1186
- features = null,
1187
- speech = null,
1188
- startTime = startTimePoint,
1189
- endTime = endTimePoint,
1190
- startPosition = pointStartSample,
1191
- endPosition = pointEndSample,
1192
- samples = segmentData.size
1193
- ))
1194
- } catch (e: Exception) {
1195
- Log.e(Constants.TAG, "Error processing segment $i: ${e.message}")
1196
- throw IllegalStateException("Failed to process audio segment: ${e.message}", e)
1197
- }
1198
- }
1199
- }
1200
-
1201
- if (dataPoints.isEmpty()) {
1202
- throw IllegalStateException("No data points were generated")
1203
- }
1204
-
1205
- return AudioAnalysisData(
1206
- segmentDurationMs = config.segmentDurationMs,
1207
- durationMs = durationMs.toInt(),
1208
- bitDepth = audioData.bitDepth,
1209
- numberOfChannels = audioData.channels,
1210
- sampleRate = audioData.sampleRate,
1211
- samples = samplesInRange,
1212
- dataPoints = dataPoints,
1213
- amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
1214
- rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
1215
- extractionTimeMs = extractionTimeMs.toFloat()
1216
- )
1217
- }
1218
-
1219
- // Add these conversion helpers
1220
- private fun convert16BitPcmToFloat(bytes: ByteArray): FloatArray {
1221
- val shorts = ShortArray(bytes.size / 2)
1222
- ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1223
- return shorts.map { it.toFloat() / Short.MAX_VALUE }.toFloatArray()
1224
- }
1225
-
1226
- private fun convert32BitPcmToFloat(bytes: ByteArray): FloatArray {
1227
- val ints = IntArray(bytes.size / 4)
1228
- ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(ints)
1229
- return ints.map { it.toFloat() / Int.MAX_VALUE }.toFloatArray()
1230
- }
1231
-
1232
- private fun convert8BitPcmToFloat(bytes: ByteArray): FloatArray {
1233
- return bytes.map { (it.toInt() - 128).toFloat() / 127f }.toFloatArray()
1234
- }
1235
-
1236
- fun loadAudioRange(fileUri: String, startTimeMs: Long, endTimeMs: Long, config: DecodingConfig? = null): AudioData? {
1237
- try {
1238
- // Use default config if none provided
1239
- val effectiveConfig = config ?: DecodingConfig(
1240
- targetSampleRate = null,
1241
- targetChannels = null,
1242
- targetBitDepth = 16,
1243
- normalizeAudio = false
1244
- )
1245
-
1246
- // First check if it's a WAV file by extension
1247
- val isWavByExtension = fileUri.lowercase().endsWith(".wav")
1248
-
1249
- // Then verify WAV header if needed
1250
- val headerSize = if (isWavByExtension) {
1251
- getWavHeaderSize(fileUri)
1252
- } else null
1253
-
1254
- // If it's a WAV file (by extension and header verification)
1255
- return if (isWavByExtension && headerSize != null) {
1256
- Log.d(Constants.TAG, "Loading WAV range with header size: $headerSize bytes")
1257
- loadWavRange(fileUri, startTimeMs, endTimeMs, effectiveConfig, headerSize)
1258
- } else {
1259
- if (isWavByExtension) {
1260
- Log.w(Constants.TAG, "File has .wav extension but invalid header, falling back to compressed loader")
1261
- }
1262
- Log.d(Constants.TAG, "Loading compressed audio range")
1263
- loadCompressedAudioRange(fileUri, startTimeMs, endTimeMs, effectiveConfig)
1264
- }
1265
- } catch (e: Exception) {
1266
- Log.e(Constants.TAG, "Failed to load audio range: ${e.message}", e)
1267
- return null
1268
- }
1269
- }
1270
-
1271
- private fun loadWavRange(
1272
- fileUri: String,
1273
- startTimeMs: Long,
1274
- endTimeMs: Long,
1275
- config: DecodingConfig,
1276
- headerSize: Int
1277
- ): AudioData? {
1278
- try {
1279
- val file = File(fileUri.removePrefix("file://")).takeIf { it.exists() }
1280
- ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
1281
- ?: throw IllegalArgumentException("File not found: $fileUri")
1282
-
1283
- // Use existing method to get audio format
1284
- val format = getAudioFormat(fileUri) ?: throw IllegalArgumentException("Could not determine audio format")
1285
-
1286
- val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
1287
- val startByteOffset = ((startTimeMs * bytesPerSecond) / 1000).toInt()
1288
- val endByteOffset = ((endTimeMs * bytesPerSecond) / 1000).toInt()
1289
-
1290
- val startByte = headerSize + startByteOffset
1291
- val endByte = headerSize + endByteOffset
1292
-
1293
- Log.d(Constants.TAG, """
1294
- Loading WAV range:
1295
- - headerSize: $headerSize
1296
- - startByte: $startByte
1297
- - endByte: $endByte
1298
- - bytesPerSecond: $bytesPerSecond
1299
- """.trimIndent())
1300
-
1301
- var audioDataBytes = ByteArray((endByte - startByte).coerceAtLeast(0))
1302
- FileInputStream(file).use { fis ->
1303
- fis.skip(startByte.toLong())
1304
- fis.read(audioDataBytes)
1305
- }
1306
-
1307
- // Apply bit depth conversion if needed
1308
- var effectiveBitDepth = format.bitDepth
1309
- if (config.targetBitDepth != format.bitDepth) {
1310
- audioDataBytes = AudioFormatUtils.convertBitDepth(
1311
- audioDataBytes,
1312
- format.bitDepth,
1313
- config.targetBitDepth
1314
- )
1315
- effectiveBitDepth = config.targetBitDepth
1316
- Log.d(Constants.TAG, "Converted bit depth from ${format.bitDepth} to ${config.targetBitDepth}")
1317
- }
1318
-
1319
- return AudioData(
1320
- data = audioDataBytes,
1321
- sampleRate = format.sampleRate,
1322
- channels = format.channels,
1323
- bitDepth = effectiveBitDepth,
1324
- durationMs = endTimeMs - startTimeMs
1325
- )
1326
- } catch (e: Exception) {
1327
- Log.e(Constants.TAG, "Failed to load WAV range: ${e.message}", e)
1328
- return null
1329
- }
1330
- }
1331
-
1332
- private fun loadCompressedAudioRange(
1333
- fileUri: String,
1334
- startTimeMs: Long,
1335
- endTimeMs: Long,
1336
- config: DecodingConfig
1337
- ): AudioData? {
1338
- val extractor = MediaExtractor()
1339
- var decoder: MediaCodec? = null
1340
-
1341
- try {
1342
- extractor.setDataSource(fileUri.removePrefix("file://"))
1343
- val format = extractor.getTrackFormat(0)
1344
- extractor.selectTrack(0)
1345
-
1346
- val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
1347
- val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
1348
- val totalDurationUs = try {
1349
- format.getLong(MediaFormat.KEY_DURATION)
1350
- } catch (e: Exception) {
1351
- (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
1352
- }
1353
- Log.d("AudioProcessor", "Raw duration from format: ${totalDurationUs}us")
1354
-
1355
- val totalDurationMs = totalDurationUs / 1000
1356
- Log.d("AudioProcessor", "Final duration: ${totalDurationMs}ms")
1357
-
1358
- // Calculate valid time range
1359
- val validStartMs = startTimeMs.coerceIn(0, totalDurationMs) ?: 0
1360
- val validEndMs = endTimeMs.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
1361
- val effectiveDurationMs = validEndMs - validStartMs
1362
-
1363
- // Initialize decoder
1364
- decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
1365
- decoder.configure(format, null, null, 0)
1366
- decoder.start()
1367
-
1368
- // Seek to start position if needed
1369
- if (validStartMs > 0) {
1370
- extractor.seekTo(validStartMs * 1000, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
1371
- }
1372
-
1373
- // Calculate buffer sizes
1374
- val targetSampleRate = config.targetSampleRate ?: originalSampleRate
1375
- val targetChannels = config.targetChannels ?: originalChannels
1376
- val targetBitDepth = config.targetBitDepth ?: 16
1377
- val bytesPerSample = targetBitDepth / 8
1378
- val samplesPerSecond = targetSampleRate * targetChannels
1379
- val totalBytes = (effectiveDurationMs * samplesPerSecond * bytesPerSample) / 1000
1380
-
1381
- Log.d(Constants.TAG, """
1382
- Loading audio range:
1383
- - start: ${validStartMs}ms
1384
- - end: ${validEndMs}ms
1385
- - duration: ${effectiveDurationMs}ms
1386
- - bytes: $totalBytes
1387
- - format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
1388
- """.trimIndent())
1389
-
1390
- val outputBuffer = ByteBuffer.allocate(totalBytes.toInt())
1391
- val bufferInfo = MediaCodec.BufferInfo()
1392
- var isEOS = false
1393
-
1394
- while (!isEOS) {
1395
- // Handle input
1396
- val inputBufferId = decoder.dequeueInputBuffer(10000)
1397
- if (inputBufferId >= 0) {
1398
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
1399
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
1400
-
1401
- when {
1402
- sampleSize < 0 -> {
1403
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1404
- isEOS = true
1405
- }
1406
- extractor.sampleTime > validEndMs * 1000 -> {
1407
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1408
- isEOS = true
1409
- }
1410
- else -> {
1411
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
1412
- extractor.advance()
1413
- }
1414
- }
1415
- }
1416
-
1417
- // Handle output
1418
- val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
1419
- if (outputBufferId >= 0) {
1420
- val decodedBuffer = decoder.getOutputBuffer(outputBufferId)!!
1421
- if (bufferInfo.size > 0) {
1422
- // Set buffer position and limit based on the decoded data
1423
- decodedBuffer.position(bufferInfo.offset)
1424
- decodedBuffer.limit(bufferInfo.offset + bufferInfo.size)
1425
-
1426
- // Copy decoded data to our output buffer
1427
- outputBuffer.put(decodedBuffer)
1428
- }
1429
- decoder.releaseOutputBuffer(outputBufferId, false)
1430
-
1431
- // Check if we've reached the end
1432
- if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
1433
- isEOS = true
1434
- }
1435
- }
1436
- }
1437
-
1438
- // Prepare the final byte array
1439
- outputBuffer.flip()
1440
- val audioData = ByteArray(outputBuffer.remaining())
1441
- outputBuffer.get(audioData)
1442
-
1443
- return AudioData(
1444
- data = audioData,
1445
- sampleRate = targetSampleRate,
1446
- channels = targetChannels,
1447
- bitDepth = targetBitDepth,
1448
- durationMs = endTimeMs - startTimeMs // Use the actual time range
1449
- ).also {
1450
- Log.d(Constants.TAG, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
1451
- }
1452
- } catch (e: Exception) {
1453
- Log.e(Constants.TAG, "Failed to load compressed audio range: ${e.message}", e)
1454
- return null
1455
- } finally {
1456
- decoder?.stop()
1457
- decoder?.release()
1458
- extractor.release()
1459
- }
1460
- }
1461
-
1462
- // Future audio editing methods
1463
- fun trimAudio(
1464
- fileUri: String,
1465
- startTimeMs: Long,
1466
- endTimeMs: Long,
1467
- config: DecodingConfig? = null,
1468
- outputFileName: String? = null
1469
- ): AudioData? {
1470
- try {
1471
- // Load the specified range
1472
- val audioData = loadAudioRange(fileUri, startTimeMs, endTimeMs, config ?: DecodingConfig())
1473
- ?: return null
1474
-
1475
- // Generate output filename if not provided
1476
- val outputFile = if (outputFileName != null) {
1477
- File(filesDir, outputFileName)
1478
- } else {
1479
- val timestamp = System.currentTimeMillis()
1480
- File(filesDir, "trimmed_${timestamp}.wav")
1481
- }
1482
-
1483
- val durationMs = (endTimeMs - startTimeMs).toInt()
1484
-
1485
- Log.d(Constants.TAG, """
1486
- Trimming audio:
1487
- - start: ${startTimeMs}ms
1488
- - end: ${endTimeMs}ms
1489
- - duration: ${durationMs}ms
1490
- - output: ${outputFile.name}
1491
- """.trimIndent())
1492
-
1493
- // Write WAV header
1494
- RandomAccessFile(outputFile, "rw").use { raf ->
1495
- // RIFF header
1496
- raf.write("RIFF".toByteArray())
1497
- val fileSize = audioData.data.size + 36 // File size minus RIFF header
1498
- raf.writeInt(fileSize)
1499
- raf.write("WAVE".toByteArray())
1500
-
1501
- // fmt chunk
1502
- raf.write("fmt ".toByteArray())
1503
- raf.writeInt(16) // Subchunk1Size (16 for PCM)
1504
- val formatBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1505
- formatBytes.putShort(1) // AudioFormat (1 for PCM)
1506
- raf.write(formatBytes.array())
1507
-
1508
- val channelsBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1509
- channelsBytes.putShort(audioData.channels.toShort())
1510
- raf.write(channelsBytes.array())
1511
-
1512
- val sampleRateBytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
1513
- sampleRateBytes.putInt(audioData.sampleRate)
1514
- raf.write(sampleRateBytes.array())
1515
-
1516
- val byteRate = audioData.sampleRate * audioData.channels * (audioData.bitDepth / 8)
1517
- raf.writeInt(byteRate) // ByteRate
1518
-
1519
- val blockAlign = audioData.channels * (audioData.bitDepth / 8)
1520
- raf.writeShort(blockAlign) // BlockAlign
1521
- raf.writeShort(audioData.bitDepth) // BitsPerSample
1522
-
1523
- // data chunk
1524
- raf.write("data".toByteArray())
1525
- raf.writeInt(audioData.data.size) // Subchunk2Size
1526
-
1527
- // Write audio data
1528
- raf.write(audioData.data)
1529
- }
1530
-
1531
- // Debug WAV header to verify
1532
- debugWavHeader(outputFile)
1533
-
1534
- // Return the trimmed audio data
1535
- return AudioData(
1536
- data = audioData.data,
1537
- sampleRate = audioData.sampleRate,
1538
- channels = audioData.channels,
1539
- bitDepth = audioData.bitDepth
1540
- )
1541
- } catch (e: Exception) {
1542
- Log.e(Constants.TAG, "Failed to trim audio: ${e.message}", e)
1543
- return null
1544
- }
1545
- }
1546
-
1547
- fun removeSection(
1548
- fileUri: String,
1549
- startTimeMs: Long,
1550
- endTimeMs: Long,
1551
- config: DecodingConfig? = null
1552
- ): AudioData? {
1553
- // TODO: Implement removing a section by concatenating before and after ranges
1554
- // This will use loadAudioRange to get two sections and join them
1555
- return null
1556
- }
1557
-
1558
- fun joinAudioSections(
1559
- sections: List<AudioData>,
1560
- config: DecodingConfig? = null
1561
- ): AudioData? {
1562
- // TODO: Implement joining multiple audio sections
1563
- // This will be used by removeSection and other future editing features
1564
- return null
1565
- }
1566
-
1567
- // Helper method for future editing features
1568
- private fun convertAudioFormat(
1569
- audioData: AudioData,
1570
- targetSampleRate: Int? = null,
1571
- targetChannels: Int? = null,
1572
- targetBitDepth: Int? = null
1573
- ): AudioData {
1574
- // TODO: Implement audio format conversion
1575
- // This will help ensure consistent format when joining sections
1576
- return audioData
1577
- }
1578
-
1579
- // Add new function to process entire file
1580
- fun processEntireFile(audioData: AudioData): Features {
1581
- val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
1582
-
1583
- // Compute basic features for the entire file
1584
- val sumSquares = samples.sumOf { it * it.toDouble() }.toFloat()
1585
- val segmentLength = samples.size
1586
- val zeroCrossings = countZeroCrossings(samples)
1587
- val minAmplitude = samples.minOrNull() ?: 0f
1588
- val maxAmplitude = samples.maxOrNull() ?: 0f
1589
-
1590
- // Use existing computeFeatures with the entire file as one segment
1591
- return computeFeatures(
1592
- segmentData = samples,
1593
- sampleRate = audioData.sampleRate.toFloat(),
1594
- sumSquares = sumSquares,
1595
- zeroCrossings = zeroCrossings,
1596
- segmentLength = segmentLength,
1597
- minAmplitude = minAmplitude,
1598
- maxAmplitude = maxAmplitude,
1599
- featureOptions = mapOf() // Dont compute complex features
1600
- )
1601
- }
1602
-
1603
- private fun countZeroCrossings(data: FloatArray): Int {
1604
- var crossings = 0
1605
- for (i in 1 until data.size) {
1606
- if (data[i - 1] * data[i] < 0) crossings++
1607
- }
1608
- return crossings
1609
- }
1610
-
1611
- private fun hzToMel(hz: Float): Float {
1612
- return 2595f * log10(1f + hz / 700f)
1613
- }
1614
-
1615
- private fun melToHz(mel: Float): Float {
1616
- return 700f * (10f.pow(mel / 2595f) - 1f)
1617
- }
1618
-
1619
- private fun applyHannWindow(samples: FloatArray): FloatArray {
1620
- val output = FloatArray(samples.size)
1621
- for (i in samples.indices) {
1622
- val multiplier = 0.5f * (1f - cos(2f * PI.toFloat() * i / (samples.size - 1)))
1623
- output[i] = samples[i] * multiplier
1624
- }
1625
- return output
1626
- }
1627
-
1628
- private fun computeMelSpectrogram(samples: FloatArray, sampleRate: Float): List<Float> {
1629
- val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
1630
- val melFilters = computeMelFilterbank(
1631
- numFilters = 128,
1632
- powerSpectrumSize = powerSpectrum.size,
1633
- sampleRate = sampleRate
1634
- )
1635
-
1636
- // Apply Mel filters to power spectrum
1637
- return melFilters.map { filter ->
1638
- var energy = 0f
1639
- for (j in powerSpectrum.indices) {
1640
- energy += powerSpectrum[j] * filter[j]
1641
- }
1642
- kotlin.math.ln(maxOf(energy, 1e-10f))
1643
- }
1644
- }
1645
-
1646
- private fun computeChroma(samples: FloatArray, sampleRate: Float): List<Float> {
1647
- val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
1648
- val chroma = FloatArray(N_CHROMA) { 0f }
1649
- val freqsPerBin = sampleRate / N_FFT
1650
-
1651
- for (i in 0 until N_FFT / 2) {
1652
- val freq = i * freqsPerBin
1653
- if (freq > 0) {
1654
- val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
1655
- if (pitchClass in 0..11) {
1656
- val magnitude = sqrt(magnitudeSpectrum[2 * i] * magnitudeSpectrum[2 * i] +
1657
- (if (2 * i + 1 < magnitudeSpectrum.size) magnitudeSpectrum[2 * i + 1] else 0f) *
1658
- magnitudeSpectrum[2 * i + 1])
1659
- chroma[pitchClass] += magnitude
1660
- }
1661
- }
1662
- }
1663
-
1664
- return chroma.toList()
1665
- }
1666
-
1667
- private fun computeSpectralContrast(samples: FloatArray, sampleRate: Float): List<Float> {
1668
- val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
1669
- // ... rest of spectral contrast computation using magnitudeSpectrum ...
1670
- // Implementation depends on your specific requirements
1671
- return emptyList() // Placeholder
1672
- }
1673
-
1674
- private fun computeTonnetz(samples: FloatArray, sampleRate: Float): List<Float> {
1675
- // First compute chroma features
1676
- val chroma = computeChroma(samples, sampleRate)
1677
-
1678
- // Tonnetz transformation matrix (6x12)
1679
- val tonnetzMatrix = arrayOf(
1680
- floatArrayOf(1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Perfect fifth
1681
- floatArrayOf(0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Minor third
1682
- floatArrayOf(0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f), // Major third
1683
- floatArrayOf(0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f), // Perfect fifth
1684
- floatArrayOf(0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 0f, 0f, 1f, 0f), // Minor third
1685
- floatArrayOf(1f, 0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f) // Major third
1686
- )
1687
-
1688
- // Compute tonnetz features
1689
- val tonnetz = mutableListOf<Float>()
1690
- for (row in tonnetzMatrix) {
1691
- var sum = 0f
1692
- for (i in row.indices) {
1693
- sum += row[i] * (chroma.getOrNull(i) ?: 0f)
1694
- }
1695
- tonnetz.add(sum)
1696
- }
1697
-
1698
- return tonnetz
1699
- }
1700
-
1701
- private fun nextPowerOfTwo(n: Int): Int {
1702
- var value = 1
1703
- while (value < n) {
1704
- value *= 2
1705
- }
1706
- return value
1707
- }
1708
-
1709
- private fun estimatePitch(segment: FloatArray, sampleRate: Float): Float {
1710
- if (segment.size < 2) return 0.0f
1711
-
1712
- // Apply Hann window
1713
- val windowed = applyHannWindow(segment)
1714
-
1715
- // Pad for FFT - ensure length is power of 2 and sufficient for autocorrelation
1716
- val fftLength = nextPowerOfTwo(segment.size * 2)
1717
- val padded = FloatArray(fftLength) // Initialize with zeros
1718
- windowed.copyInto(padded) // Copy windowed data into padded array
1719
-
1720
- // Perform forward FFT
1721
- val fft = FFT(fftLength)
1722
- try {
1723
- fft.realForward(padded)
1724
- } catch (e: Exception) {
1725
- Log.e("AudioProcessor", "FFT forward transform failed: ${e.message}")
1726
- return 0.0f
1727
- }
1728
-
1729
- // Compute power spectrum
1730
- val powerSpectrum = FloatArray(fftLength)
1731
- try {
1732
- // Handle DC and Nyquist components separately
1733
- powerSpectrum[0] = padded[0] * padded[0]
1734
- powerSpectrum[fftLength/2] = padded[1] * padded[1]
1735
-
1736
- // Handle remaining frequencies
1737
- for (i in 1 until fftLength/2) {
1738
- val re = padded[2 * i]
1739
- val im = padded[2 * i + 1]
1740
- powerSpectrum[i] = re * re + im * im
1741
- powerSpectrum[fftLength - i] = powerSpectrum[i] // Mirror for inverse FFT
1742
- }
1743
- } catch (e: Exception) {
1744
- Log.e("AudioProcessor", "Power spectrum computation failed: ${e.message}")
1745
- return 0.0f
1746
- }
1747
-
1748
- // Inverse FFT to get autocorrelation
1749
- val autocorrelation = FloatArray(fftLength)
1750
- try {
1751
- fft.realInverse(powerSpectrum, autocorrelation)
1752
- } catch (e: Exception) {
1753
- Log.e("AudioProcessor", "FFT inverse transform failed: ${e.message}")
1754
- return 0.0f
1755
- }
1756
-
1757
- // Normalize autocorrelation
1758
- val normFactor = 1.0f / autocorrelation[0] // Normalize by zero-lag autocorrelation
1759
- for (i in autocorrelation.indices) {
1760
- autocorrelation[i] *= normFactor
1761
- }
1762
-
1763
- // Find the first peak within pitch range (50-500 Hz)
1764
- val minLag = (sampleRate / 500.0f).toInt().coerceAtLeast(1)
1765
- val maxLag = (sampleRate / 50.0f).toInt().coerceAtMost(autocorrelation.size - 1)
1766
-
1767
- var maxCorr = -1.0f
1768
- var pitchLag = 0
1769
-
1770
- // Add peak picking criteria
1771
- val threshold = 0.3f // Correlation threshold
1772
- var isPeak = false
1773
-
1774
- for (lag in minLag..maxLag) {
1775
- if (lag > 0 && lag < autocorrelation.size - 1) {
1776
- // Check if this point is a peak
1777
- isPeak = autocorrelation[lag] > autocorrelation[lag - 1] &&
1778
- autocorrelation[lag] > autocorrelation[lag + 1] &&
1779
- autocorrelation[lag] > threshold
1780
-
1781
- if (isPeak && autocorrelation[lag] > maxCorr) {
1782
- maxCorr = autocorrelation[lag]
1783
- pitchLag = lag
1784
- }
1785
- }
1786
- }
1787
-
1788
- return if (pitchLag > 0) sampleRate / pitchLag else 0.0f
1789
- }
1790
-
1791
- /**
1792
- * Prepares FFT by applying Hann window, padding, and computing both power and magnitude spectra.
1793
- * @param samples Input audio samples
1794
- * @param sampleRate Sampling rate in Hz
1795
- * @param fftLength FFT size (must be power of 2)
1796
- * @return Pair of power spectrum and magnitude spectrum
1797
- */
1798
- private fun prepareFFT(samples: FloatArray, sampleRate: Float, fftLength: Int = nextPowerOfTwo(samples.size.coerceAtLeast(2048))): Pair<FloatArray, FloatArray> {
1799
- val windowed = applyHannWindow(samples)
1800
- val padded = windowed.copyOf(fftLength)
1801
- val fft = FFT(fftLength)
1802
- fft.realForward(padded)
1803
-
1804
- val magnitudeSpectrum = FloatArray(fftLength / 2 + 1)
1805
- for (i in 0 until fftLength / 2) {
1806
- val re = padded[2 * i]
1807
- val im = padded[2 * i + 1]
1808
- magnitudeSpectrum[i] = sqrt(re * re + im * im)
1809
- }
1810
- magnitudeSpectrum[fftLength / 2] = abs(padded[1])
1811
-
1812
- val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
1813
- return Pair(powerSpectrum, magnitudeSpectrum)
1814
- }
1815
-
1816
- data class AudioFormat(
1817
- val sampleRate: Int,
1818
- val channels: Int,
1819
- val bitDepth: Int
1820
- )
1821
-
1822
- fun getAudioFormat(fileUri: String): AudioFormat? {
1823
- val cleanUri = fileUri.removePrefix("file://")
1824
- val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
1825
- ?: run {
1826
- Log.e(Constants.TAG, "File not found: $cleanUri")
1827
- return null
1828
- }
1829
-
1830
- val extractor = MediaExtractor()
1831
- try {
1832
- extractor.setDataSource(file.absolutePath)
1833
- val format = extractor.getTrackFormat(0)
1834
- return AudioFormat(
1835
- sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE),
1836
- channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
1837
- bitDepth = 16 // Most compressed formats decode to 16-bit PCM
1838
- )
1839
- } catch (e: Exception) {
1840
- Log.e(Constants.TAG, "Failed to get audio format: ${e.message}")
1841
- return null
1842
- } finally {
1843
- extractor.release()
1844
- }
1845
- }
1846
-
1847
- /**
1848
- * Gets the size of the audio file header.
1849
- * For WAV files, this includes the RIFF header and all metadata chunks before the data chunk.
1850
- * For other formats, this will return null as header size handling is format-specific.
1851
- *
1852
- * @param fileUri The URI of the audio file to analyze
1853
- * @return The size of the header in bytes, or null if:
1854
- * - The file is not a WAV file
1855
- * - The file cannot be read
1856
- * - The file format is invalid
1857
- * - The data chunk cannot be found
1858
- *
1859
- * WAV File Structure:
1860
- * - RIFF header (12 bytes)
1861
- * - "RIFF" identifier (4 bytes)
1862
- * - File size (4 bytes)
1863
- * - "WAVE" identifier (4 bytes)
1864
- * - Format chunk ("fmt ") (24 bytes typically)
1865
- * - Optional metadata chunks (variable size)
1866
- * - LIST (metadata like artist, title)
1867
- * - JUNK (padding)
1868
- * - fact (additional format info)
1869
- * - cue (cue points)
1870
- * - Data chunk
1871
- * - "data" identifier (4 bytes)
1872
- * - Chunk size (4 bytes)
1873
- * - Actual audio data
1874
- */
1875
- fun getWavHeaderSize(fileUri: String): Int? {
1876
- val cleanUri = fileUri.removePrefix("file://")
1877
- val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
1878
- ?: run {
1879
- Log.e(Constants.TAG, "File not found: $cleanUri")
1880
- return null
1881
- }
1882
-
1883
- try {
1884
- val inputStream = FileInputStream(file)
1885
- val buffer = ByteArray(12) // Read RIFF header and chunk size
1886
-
1887
- // Read RIFF header
1888
- if (inputStream.read(buffer) != 12) {
1889
- Log.e(Constants.TAG, "Failed to read RIFF header")
1890
- return null
1891
- }
1892
-
1893
- // Verify RIFF header
1894
- if (String(buffer, 0, 4) != "RIFF" || String(buffer, 8, 4) != "WAVE") {
1895
- Log.e(Constants.TAG, "Invalid WAV file format")
1896
- return null
1897
- }
1898
-
1899
- var headerSize = 12
1900
- var chunkSize: Int
1901
-
1902
- // Read chunks until we find the data chunk
1903
- while (true) {
1904
- if (inputStream.read(buffer, 0, 8) != 8) {
1905
- Log.e(Constants.TAG, "Unexpected end of file while reading chunks")
1906
- break
1907
- }
1908
-
1909
- chunkSize = (buffer[7].toInt() and 0xFF shl 24) or
1910
- (buffer[6].toInt() and 0xFF shl 16) or
1911
- (buffer[5].toInt() and 0xFF shl 8) or
1912
- (buffer[4].toInt() and 0xFF)
1913
-
1914
- val chunkId = String(buffer, 0, 4)
1915
- Log.d(Constants.TAG, "Found chunk: $chunkId, size: $chunkSize")
1916
-
1917
- if (chunkId == "data") {
1918
- headerSize += 8 // Add chunk header size
1919
- Log.d(Constants.TAG, "Found data chunk at offset: $headerSize")
1920
- break
1921
- }
1922
-
1923
- headerSize += 8 + chunkSize // Add chunk header and data size
1924
- inputStream.skip(chunkSize.toLong()) // Skip chunk data
1925
- }
1926
-
1927
- inputStream.close()
1928
- Log.d(Constants.TAG, "Total WAV header size: $headerSize bytes")
1929
- return headerSize
1930
-
1931
- } catch (e: Exception) {
1932
- Log.e(Constants.TAG, "Error calculating WAV header size: ${e.message}")
1933
- return null
1934
- }
1935
- }
1936
- }