@siteed/expo-audio-stream 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/.size-limit.json +6 -0
  2. package/README.md +6 -6
  3. package/android/build.gradle +5 -0
  4. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +120 -0
  5. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +34 -4
  6. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +635 -0
  7. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +194 -79
  8. package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
  9. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +48 -2
  10. package/android/src/main/java/net/siteed/audiostream/FFT.kt +44 -0
  11. package/android/src/main/java/net/siteed/audiostream/Features.kt +56 -0
  12. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +12 -0
  13. package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +56 -0
  14. package/app.plugin.js +1 -1
  15. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +76 -0
  16. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
  17. package/build/AudioAnalysis/AudioAnalysis.types.js +3 -0
  18. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  19. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +4 -0
  20. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
  21. package/build/AudioAnalysis/extractAudioAnalysis.js +101 -0
  22. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  23. package/build/AudioAnalysis/extractWaveform.d.ts +8 -0
  24. package/build/AudioAnalysis/extractWaveform.d.ts.map +1 -0
  25. package/build/AudioAnalysis/extractWaveform.js +14 -0
  26. package/build/AudioAnalysis/extractWaveform.js.map +1 -0
  27. package/build/AudioRecorder.provider.d.ts +14 -1
  28. package/build/AudioRecorder.provider.d.ts.map +1 -1
  29. package/build/AudioRecorder.provider.js +18 -5
  30. package/build/AudioRecorder.provider.js.map +1 -1
  31. package/build/ExpoAudioStream.native.d.ts +3 -0
  32. package/build/ExpoAudioStream.native.d.ts.map +1 -0
  33. package/build/ExpoAudioStream.native.js +6 -0
  34. package/build/ExpoAudioStream.native.js.map +1 -0
  35. package/build/ExpoAudioStream.types.d.ts +35 -20
  36. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  37. package/build/ExpoAudioStream.types.js.map +1 -1
  38. package/build/ExpoAudioStream.web.d.ts +42 -0
  39. package/build/ExpoAudioStream.web.d.ts.map +1 -0
  40. package/build/ExpoAudioStream.web.js +185 -0
  41. package/build/ExpoAudioStream.web.js.map +1 -0
  42. package/build/ExpoAudioStreamModule.d.ts +2 -2
  43. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  44. package/build/ExpoAudioStreamModule.js +16 -3
  45. package/build/ExpoAudioStreamModule.js.map +1 -1
  46. package/build/WebRecorder.web.d.ts +51 -0
  47. package/build/WebRecorder.web.d.ts.map +1 -0
  48. package/build/WebRecorder.web.js +288 -0
  49. package/build/WebRecorder.web.js.map +1 -0
  50. package/build/constants.d.ts +11 -0
  51. package/build/constants.d.ts.map +1 -0
  52. package/build/constants.js +14 -0
  53. package/build/constants.js.map +1 -0
  54. package/build/events.d.ts +6 -0
  55. package/build/events.d.ts.map +1 -0
  56. package/build/events.js +15 -0
  57. package/build/events.js.map +1 -0
  58. package/build/index.d.ts +8 -7
  59. package/build/index.d.ts.map +1 -1
  60. package/build/index.js +7 -14
  61. package/build/index.js.map +1 -1
  62. package/build/logger.d.ts +9 -0
  63. package/build/logger.d.ts.map +1 -0
  64. package/build/logger.js +17 -0
  65. package/build/logger.js.map +1 -0
  66. package/build/useAudioRecorder.d.ts +37 -0
  67. package/build/useAudioRecorder.d.ts.map +1 -0
  68. package/build/useAudioRecorder.js +271 -0
  69. package/build/useAudioRecorder.js.map +1 -0
  70. package/build/utils/convertPCMToFloat32.d.ts +11 -0
  71. package/build/utils/convertPCMToFloat32.d.ts.map +1 -0
  72. package/build/utils/convertPCMToFloat32.js +41 -0
  73. package/build/utils/convertPCMToFloat32.js.map +1 -0
  74. package/build/utils/encodingToBitDepth.d.ts +5 -0
  75. package/build/utils/encodingToBitDepth.d.ts.map +1 -0
  76. package/build/utils/encodingToBitDepth.js +13 -0
  77. package/build/utils/encodingToBitDepth.js.map +1 -0
  78. package/build/utils/getWavFileInfo.d.ts +25 -0
  79. package/build/utils/getWavFileInfo.d.ts.map +1 -0
  80. package/build/utils/getWavFileInfo.js +89 -0
  81. package/build/utils/getWavFileInfo.js.map +1 -0
  82. package/build/utils/writeWavHeader.d.ts +9 -0
  83. package/build/utils/writeWavHeader.d.ts.map +1 -0
  84. package/build/utils/writeWavHeader.js +41 -0
  85. package/build/utils/writeWavHeader.js.map +1 -0
  86. package/build/workers/InlineFeaturesExtractor.web.d.ts +2 -0
  87. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
  88. package/build/workers/InlineFeaturesExtractor.web.js +303 -0
  89. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -0
  90. package/build/workers/inlineAudioWebWorker.web.d.ts +2 -0
  91. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
  92. package/build/workers/inlineAudioWebWorker.web.js +243 -0
  93. package/build/workers/inlineAudioWebWorker.web.js.map +1 -0
  94. package/expo-module.config.json +13 -4
  95. package/ios/AudioAnalysisData.swift +39 -0
  96. package/ios/AudioProcessingHelpers.swift +59 -0
  97. package/ios/AudioProcessor.swift +317 -0
  98. package/ios/AudioStreamError.swift +7 -0
  99. package/ios/AudioStreamManager.swift +243 -54
  100. package/ios/AudioStreamManagerDelegate.swift +4 -0
  101. package/ios/DataPoint.swift +41 -0
  102. package/ios/ExpoAudioStreamModule.swift +198 -6
  103. package/ios/Features.swift +44 -0
  104. package/ios/RecordingResult.swift +19 -0
  105. package/ios/RecordingSettings.swift +13 -0
  106. package/ios/WaveformExtractor.swift +105 -0
  107. package/package.json +13 -12
  108. package/plugin/tsconfig.json +13 -8
  109. package/publish.sh +8 -0
  110. package/src/AudioAnalysis/AudioAnalysis.types.ts +85 -0
  111. package/src/AudioAnalysis/extractAudioAnalysis.ts +136 -0
  112. package/src/AudioAnalysis/extractWaveform.ts +25 -0
  113. package/src/AudioRecorder.provider.tsx +36 -8
  114. package/src/ExpoAudioStream.native.ts +6 -0
  115. package/src/ExpoAudioStream.types.ts +50 -25
  116. package/src/ExpoAudioStream.web.ts +229 -0
  117. package/src/ExpoAudioStreamModule.ts +22 -3
  118. package/src/WebRecorder.web.ts +416 -0
  119. package/src/constants.ts +18 -0
  120. package/src/events.ts +25 -0
  121. package/src/index.ts +14 -29
  122. package/src/logger.ts +26 -0
  123. package/src/useAudioRecorder.tsx +415 -0
  124. package/src/utils/convertPCMToFloat32.ts +48 -0
  125. package/src/utils/encodingToBitDepth.ts +18 -0
  126. package/src/utils/getWavFileInfo.ts +125 -0
  127. package/src/utils/writeWavHeader.ts +56 -0
  128. package/src/workers/InlineFeaturesExtractor.web.tsx +302 -0
  129. package/src/workers/inlineAudioWebWorker.web.tsx +242 -0
  130. package/build/ExpoAudioStreamModule.web.d.ts +0 -37
  131. package/build/ExpoAudioStreamModule.web.d.ts.map +0 -1
  132. package/build/ExpoAudioStreamModule.web.js +0 -156
  133. package/build/ExpoAudioStreamModule.web.js.map +0 -1
  134. package/build/useAudioRecording.d.ts +0 -23
  135. package/build/useAudioRecording.d.ts.map +0 -1
  136. package/build/useAudioRecording.js +0 -189
  137. package/build/useAudioRecording.js.map +0 -1
  138. package/docs/demo.gif +0 -0
  139. package/release-it.js +0 -18
  140. package/src/ExpoAudioStreamModule.web.ts +0 -181
  141. package/src/useAudioRecording.ts +0 -268
  142. package/yarn-error.log +0 -7793
@@ -0,0 +1,635 @@
1
+ // net/siteed/audiostream/AudioProcessor.kt
2
+ package net.siteed.audiostream
3
+
4
+ import java.nio.ByteBuffer
5
+ import java.nio.ByteOrder
6
+ import kotlin.math.*
7
+ import android.util.Log
8
+ import java.io.File
9
+ import java.io.IOException
10
+ import kotlin.system.measureTimeMillis
11
+
12
+ class AudioProcessor(private val filesDir: File) {
13
+ companion object {
14
+ const val NUM_MFCC_COEFFICIENTS = 13
15
+ const val NUM_MEL_FILTERS = 26
16
+ const val MEL_MIN_FREQ = 0.0
17
+ const val MEL_MAX_FREQ_DIVISOR = 2595.0
18
+ const val MEL_MAX_FREQ_CONSTANT = 700.0
19
+ const val DCT_SQRT_DIVISOR = 2.0
20
+ const val LOG_BASE = 10.0
21
+ }
22
+
23
+ data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int)
24
+
25
+ // Add a counter for unique IDs
26
+ private var uniqueIdCounter = 0L
27
+
28
+ fun loadAudioFile(originalFileUri: String, skipWavHeader: Boolean = false): AudioData? {
29
+ // Remove the file:// prefix if present
30
+ val fileUri = originalFileUri.removePrefix("file://")
31
+ var file = File(fileUri)
32
+
33
+ // Check if the file exists at the provided fileUri
34
+ if (!file.exists()) {
35
+ // Fallback to filesDir if the file does not exist at fileUri
36
+ file = File(filesDir, file.name)
37
+ if (!file.exists()) {
38
+ Log.e("AudioProcessor", "File does not exist at provided path or in filesDir: $fileUri")
39
+ return null
40
+ }
41
+ }
42
+
43
+ // Check if the file has a valid extension
44
+ val validExtensions = listOf("wav", "pcm")
45
+ val fileExtension = file.extension.lowercase()
46
+ if (fileExtension !in validExtensions) {
47
+ Log.e("AudioProcessor", "Invalid file extension: $fileExtension. Supported extensions are: $validExtensions")
48
+ return null
49
+ }
50
+
51
+ try {
52
+ val fileData = file.readBytes()
53
+
54
+ if (fileData.size < Constants.WAV_HEADER_SIZE) {
55
+ Log.e("AudioProcessor", "File is too small to be a valid WAV file")
56
+ return null
57
+ }
58
+
59
+ val header = fileData.sliceArray(0 until Constants.WAV_HEADER_SIZE)
60
+ val sampleRate = byteArrayToInt(header.sliceArray(24..27))
61
+ val channels = byteArrayToShort(header.sliceArray(22..23))
62
+ val bitDepth = byteArrayToShort(header.sliceArray(34..35))
63
+
64
+ val audioData = if (skipWavHeader) {
65
+ fileData.sliceArray(Constants.WAV_HEADER_SIZE until fileData.size)
66
+ } else {
67
+ fileData
68
+ }
69
+
70
+ return AudioData(audioData, sampleRate, bitDepth.toInt(), channels.toInt())
71
+ } catch (e: IOException) {
72
+ Log.e("AudioProcessor", "Failed to load audio file: ${e.message}", e)
73
+ return null
74
+ } catch (e: IllegalArgumentException) {
75
+ Log.e("AudioProcessor", "Invalid audio file format: ${e.message}", e)
76
+ return null
77
+ } catch (e: Exception) {
78
+ Log.e("AudioProcessor", "Unexpected error: ${e.message}", e)
79
+ return null
80
+ }
81
+ }
82
+
83
+ private fun byteArrayToInt(bytes: ByteArray): Int {
84
+ return (bytes[0].toInt() and 0xFF) or
85
+ ((bytes[1].toInt() and 0xFF) shl 8) or
86
+ ((bytes[2].toInt() and 0xFF) shl 16) or
87
+ ((bytes[3].toInt() and 0xFF) shl 24)
88
+ }
89
+
90
+ private fun byteArrayToShort(bytes: ByteArray): Short {
91
+ return (bytes[0].toInt() and 0xFF or
92
+ (bytes[1].toInt() and 0xFF shl 8)).toShort()
93
+ }
94
+
95
+ /**
96
+ * Processes the audio data and extracts features.
97
+ * @param data The audio data in bytes.
98
+ * @param config The recording configuration.
99
+ * @return AudioAnalysisData containing the extracted features.
100
+ */
101
+ fun processAudioData(data: ByteArray, config: RecordingConfig): AudioAnalysisData {
102
+ val sampleRate = config.sampleRate.toFloat()
103
+ val bitDepth = when (config.encoding) {
104
+ "pcm_8bit" -> 8
105
+ "pcm_16bit" -> 16
106
+ "pcm_32bit" -> 32
107
+ else -> throw IllegalArgumentException("Unsupported encoding: ${config.encoding}")
108
+ }
109
+ val channelData = convertToFloatArray(data, bitDepth)
110
+ val pointsPerSecond = config.pointsPerSecond
111
+ val algorithm = config.algorithm
112
+ val featureOptions = config.features
113
+
114
+ val totalSamples = channelData.size
115
+ val segmentDurationSeconds = totalSamples.toDouble() / sampleRate
116
+ val totalPoints = max((segmentDurationSeconds * pointsPerSecond).toInt(), 1)
117
+ val pointInterval = ceil(totalSamples / totalPoints.toDouble()).toInt()
118
+
119
+ Log.d("AudioProcessor", "Extracting waveform totalSize=${data.size} with $totalSamples samples and $pointsPerSecond points per second --> $pointInterval samples per point")
120
+ Log.d("AudioProcessor", "segmentDuration: $segmentDurationSeconds seconds")
121
+
122
+ val expectedPoints = segmentDurationSeconds * pointsPerSecond
123
+ val samplesPerPoint = ceil(channelData.size / expectedPoints).toInt()
124
+ Log.d("AudioProcessor", "Extracting waveform with expectedPoints=$expectedPoints , samplesPerPoints=$samplesPerPoint")
125
+
126
+ val dataPoints = mutableListOf<DataPoint>()
127
+ var minAmplitude = Float.MAX_VALUE
128
+ var maxAmplitude = Float.MIN_VALUE
129
+ val durationMs = (segmentDurationSeconds * 1000).toInt()
130
+
131
+ // Measure the time taken for audio processing
132
+ // Measure the time taken for audio processing
133
+ val extractionTimeMs = measureTimeMillis {
134
+ var currentPosition = 0 // Track the current byte position
135
+
136
+ for (i in 0 until totalPoints) {
137
+ val start = i * samplesPerPoint
138
+ val end = min(start + samplesPerPoint, totalSamples)
139
+ val segmentData = channelData.sliceArray(start until end)
140
+
141
+ var sumSquares = 0f
142
+ var zeroCrossings = 0
143
+ var prevValue = 0f
144
+ var localMinAmplitude = Float.MAX_VALUE
145
+ var localMaxAmplitude = Float.MIN_VALUE
146
+
147
+ for (value in segmentData) {
148
+ sumSquares += value * value
149
+ if (prevValue != 0f && value * prevValue < 0) zeroCrossings += 1
150
+ prevValue = value
151
+
152
+ val absValue = abs(value)
153
+ localMinAmplitude = min(localMinAmplitude, absValue)
154
+ localMaxAmplitude = max(localMaxAmplitude, absValue)
155
+ }
156
+
157
+ val features = computeFeatures(segmentData, sampleRate, minAmplitude, maxAmplitude, sumSquares, zeroCrossings, segmentData.size, featureOptions)
158
+ val rms = features.rms
159
+ val silent = rms < 0.01
160
+ val dB = if (featureOptions["dB"] == true) 20 * log10(rms.toDouble()).toFloat() else 0f
161
+ minAmplitude = min(minAmplitude, rms)
162
+ maxAmplitude = max(maxAmplitude, rms)
163
+
164
+ val bytesPerSample = bitDepth / 8
165
+ val startPosition = start * bytesPerSample * config.channels
166
+ val endPosition = end * bytesPerSample * config.channels
167
+
168
+ val dataPoint = DataPoint(
169
+ id = uniqueIdCounter++, // Assign unique ID and increment the counter
170
+ amplitude = if (algorithm == "peak") localMaxAmplitude else rms,
171
+ activeSpeech = null,
172
+ dB = dB,
173
+ silent = silent,
174
+ features = features,
175
+ samples = segmentData.size,
176
+ startTime = startPosition / (sampleRate * bytesPerSample * config.channels),
177
+ endTime = endPosition / (sampleRate * bytesPerSample * config.channels),
178
+ startPosition = startPosition,
179
+ endPosition = endPosition,
180
+ speaker = 0
181
+ )
182
+
183
+ dataPoints.add(dataPoint)
184
+ }
185
+ }
186
+
187
+ return AudioAnalysisData(
188
+ pointsPerSecond = pointsPerSecond,
189
+ durationMs = durationMs,
190
+ bitDepth = bitDepth,
191
+ numberOfChannels = config.channels,
192
+ sampleRate = config.sampleRate,
193
+ samples = totalSamples,
194
+ dataPoints = dataPoints,
195
+ amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
196
+ speakerChanges = emptyList(),
197
+ extractionTimeMs = extractionTimeMs.toFloat() // Return the measured extraction time
198
+ )
199
+ }
200
+
201
+
202
+
203
+ /**
204
+ * Converts the audio data to a float array.
205
+ * @param data The audio data in bytes.
206
+ * @param bitDepth The bit depth of the audio data.
207
+ * @return The converted float array.
208
+ */
209
+ private fun convertToFloatArray(data: ByteArray, bitDepth: Int): FloatArray {
210
+ return when (bitDepth) {
211
+ 16 -> {
212
+ val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
213
+ val array = ShortArray(buffer.remaining())
214
+ buffer.get(array)
215
+ array.map { it / 32768.0f }.toFloatArray()
216
+ }
217
+ 8 -> data.map { (it.toInt() - 128) / 128.0f }.toFloatArray()
218
+ 32 -> {
219
+ val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
220
+ val array = IntArray(buffer.remaining())
221
+ buffer.get(array)
222
+ array.map { it / Int.MAX_VALUE.toFloat() }.toFloatArray()
223
+ }
224
+ else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
225
+ }
226
+ }
227
+
228
+
229
+
230
+ /**
231
+ * Computes the features of the audio data.
232
+ * @param segmentData The segment data.
233
+ * @param sampleRate The sample rate of the audio data.
234
+ * @param minAmplitude The minimum amplitude.
235
+ * @param maxAmplitude The maximum amplitude.
236
+ * @param sumSquares The sum of squares.
237
+ * @param zeroCrossings The zero crossings.
238
+ * @param segmentLength The length of the segment.
239
+ * @param featureOptions The feature options to compute.
240
+ * @return The computed features.
241
+ */
242
+ private fun computeFeatures(
243
+ segmentData: FloatArray,
244
+ sampleRate: Float,
245
+ minAmplitude: Float,
246
+ maxAmplitude: Float,
247
+ sumSquares: Float,
248
+ zeroCrossings: Int,
249
+ segmentLength: Int,
250
+ featureOptions: Map<String, Boolean>
251
+ ): Features {
252
+ val rms = sqrt(sumSquares / segmentLength)
253
+ val energy = if (featureOptions["energy"] == true) sumSquares else 0f
254
+ val zcr = if (featureOptions["zcr"] == true) zeroCrossings / segmentLength.toFloat() else 0f
255
+
256
+ val mfcc = try {
257
+ if (featureOptions["mfcc"] == true) extractMFCC(segmentData, sampleRate) else emptyList()
258
+ } catch (e: Exception) {
259
+ Log.e("AudioProcessor", "Failed to extract MFCC: ${e.message}", e)
260
+ emptyList()
261
+ }
262
+
263
+ val spectralCentroid = try {
264
+ if (featureOptions["spectralCentroid"] == true) extractSpectralCentroid(segmentData, sampleRate) else 0f
265
+ } catch (e: Exception) {
266
+ Log.e("AudioProcessor", "Failed to extract spectral centroid: ${e.message}", e)
267
+ 0f
268
+ }
269
+
270
+ val spectralFlatness = try {
271
+ if (featureOptions["spectralFlatness"] == true) extractSpectralFlatness(segmentData) else 0f
272
+ } catch (e: Exception) {
273
+ Log.e("AudioProcessor", "Failed to extract spectral flatness: ${e.message}", e)
274
+ 0f
275
+ }
276
+
277
+ val spectralRollOff = try {
278
+ if (featureOptions["spectralRollOff"] == true) extractSpectralRollOff(segmentData, sampleRate) else 0f
279
+ } catch (e: Exception) {
280
+ Log.e("AudioProcessor", "Failed to extract spectral roll-off: ${e.message}", e)
281
+ 0f
282
+ }
283
+
284
+ val spectralBandwidth = try {
285
+ if (featureOptions["spectralBandwidth"] == true) extractSpectralBandwidth(segmentData, sampleRate) else 0f
286
+ } catch (e: Exception) {
287
+ Log.e("AudioProcessor", "Failed to extract spectral bandwidth: ${e.message}", e)
288
+ 0f
289
+ }
290
+
291
+ val chromagram = try {
292
+ if (featureOptions["chromagram"] == true) extractChromagram(segmentData, sampleRate) else emptyList()
293
+ } catch (e: Exception) {
294
+ Log.e("AudioProcessor", "Failed to extract chromagram: ${e.message}", e)
295
+ emptyList()
296
+ }
297
+
298
+ val tempo = try {
299
+ if (featureOptions["tempo"] == true) extractTempo(segmentData, sampleRate) else 0f
300
+ } catch (e: Exception) {
301
+ Log.e("AudioProcessor", "Failed to extract tempo: ${e.message}", e)
302
+ 0f
303
+ }
304
+
305
+ val hnr = try {
306
+ if (featureOptions["hnr"] == true) extractHNR(segmentData) else 0f
307
+ } catch (e: Exception) {
308
+ Log.e("AudioProcessor", "Failed to extract HNR: ${e.message}", e)
309
+ 0f
310
+ }
311
+
312
+ return Features(
313
+ energy = energy,
314
+ mfcc = mfcc,
315
+ rms = rms,
316
+ zcr = zcr,
317
+ minAmplitude = minAmplitude,
318
+ maxAmplitude = maxAmplitude,
319
+ spectralCentroid = spectralCentroid,
320
+ spectralFlatness = spectralFlatness,
321
+ spectralRollOff = spectralRollOff,
322
+ spectralBandwidth = spectralBandwidth,
323
+ chromagram = chromagram,
324
+ tempo = tempo,
325
+ hnr = hnr
326
+ )
327
+ }
328
+
329
+ /**
330
+ * Resets the segment data.
331
+ * @param sumSquaresUpdater Function to reset sum of squares.
332
+ * @param zeroCrossingsUpdater Function to reset zero crossings.
333
+ * @param localMinAmplitudeUpdater Function to reset local min amplitude.
334
+ * @param localMaxAmplitudeUpdater Function to reset local max amplitude.
335
+ * @param segmentData The segment data list to reset.
336
+ */
337
+ private fun resetSegmentData(
338
+ sumSquaresUpdater: (Float) -> Unit,
339
+ zeroCrossingsUpdater: (Int) -> Unit,
340
+ localMinAmplitudeUpdater: (Float) -> Unit,
341
+ localMaxAmplitudeUpdater: (Float) -> Unit,
342
+ segmentData: MutableList<Float>
343
+ ) {
344
+ sumSquaresUpdater(0f)
345
+ zeroCrossingsUpdater(0)
346
+ localMinAmplitudeUpdater(Float.MAX_VALUE)
347
+ localMaxAmplitudeUpdater(Float.MIN_VALUE)
348
+ segmentData.clear()
349
+ }
350
+
351
+ /**
352
+ * Extracts the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
353
+ * @param segmentData The segment data.
354
+ * @param sampleRate The sample rate of the audio data.
355
+ * @return The MFCC coefficients.
356
+ */
357
+ private fun extractMFCC(segmentData: FloatArray, sampleRate: Float): List<Float> {
358
+ if (segmentData.size < 2) {
359
+ Log.e("AudioProcessor", "Segment data is too small for MFCC extraction: size=${segmentData.size}")
360
+ return emptyList()
361
+ }
362
+
363
+ val fftData = segmentData.copyOf()
364
+ val fft = FFT(fftData.size)
365
+ fft.realForward(fftData)
366
+
367
+ // Compute the power spectrum
368
+ val powerSpectrum = try {
369
+ fftData.map { it * it }.chunked(2) { (re, im) -> sqrt(re + im) }
370
+ } catch (e: Exception) {
371
+ Log.e("AudioProcessor", "Error computing power spectrum: ${e.message}", e)
372
+ return emptyList()
373
+ }
374
+
375
+ // Compute Mel filter bank
376
+ val melFilterBank = computeMelFilterBank(NUM_MEL_FILTERS, powerSpectrum.size, sampleRate)
377
+ val filterEnergies = melFilterBank.map { filter ->
378
+ filter.zip(powerSpectrum).sumOf { (f, p) -> (f * p).toDouble() }.toFloat()
379
+ }
380
+
381
+ // Apply log to filter energies
382
+ val logEnergies = filterEnergies.map { ln(it + Float.MIN_VALUE) }
383
+
384
+ // Compute Discrete Cosine Transform (DCT) of log energies to get MFCCs
385
+ return try {
386
+ computeDCT(logEnergies, NUM_MFCC_COEFFICIENTS)
387
+ } catch (e: Exception) {
388
+ Log.e("AudioProcessor", "Error computing DCT: ${e.message}", e)
389
+ emptyList()
390
+ }
391
+ }
392
+
393
+
394
+
395
+ /**
396
+ * Computes the Mel filter bank.
397
+ * @param numFilters The number of Mel filters.
398
+ * @param powerSpectrumSize The size of the power spectrum.
399
+ * @param sampleRate The sample rate of the audio data.
400
+ * @return A list of Mel filters.
401
+ */
402
+ private fun computeMelFilterBank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): List<List<Float>> {
403
+ val melFilters = mutableListOf<List<Float>>()
404
+ val melMaxFreq = MEL_MAX_FREQ_DIVISOR * log10(1.0 + sampleRate / 2.0 / MEL_MAX_FREQ_CONSTANT)
405
+ val melPoints = DoubleArray(numFilters + 2) { i ->
406
+ MEL_MIN_FREQ + i * (melMaxFreq - MEL_MIN_FREQ) / (numFilters + 1)
407
+ }
408
+
409
+ val hzPoints = melPoints.map { MEL_MAX_FREQ_CONSTANT * (LOG_BASE.pow(it / MEL_MAX_FREQ_DIVISOR) - 1.0) }
410
+ val bin = hzPoints.map { it * (powerSpectrumSize - 1) / sampleRate }
411
+
412
+ for (i in 1..numFilters) {
413
+ val filter = FloatArray(powerSpectrumSize)
414
+ for (j in bin[i - 1].toInt() until bin[i].toInt()) {
415
+ if (j >= 0 && j < filter.size) {
416
+ filter[j] = ((j - bin[i - 1]) / (bin[i] - bin[i - 1])).toFloat()
417
+ }
418
+ }
419
+ for (j in bin[i].toInt() until bin[i + 1].toInt()) {
420
+ if (j >= 0 && j < filter.size) {
421
+ filter[j] = ((bin[i + 1] - j) / (bin[i + 1] - bin[i])).toFloat()
422
+ }
423
+ }
424
+ melFilters.add(filter.toList())
425
+ }
426
+
427
+ return melFilters
428
+ }
429
+
430
+
431
+ /**
432
+ * Computes the Discrete Cosine Transform (DCT) of the log energies.
433
+ * @param logEnergies The log energies.
434
+ * @param numCoefficients The number of coefficients to compute.
435
+ * @return A list of MFCC coefficients.
436
+ */
437
+ private fun computeDCT(logEnergies: List<Float>, numCoefficients: Int): List<Float> {
438
+ val n = logEnergies.size
439
+ val dct = FloatArray(numCoefficients)
440
+
441
+ for (i in 0 until numCoefficients) {
442
+ var sum = 0.0
443
+ for (j in logEnergies.indices) {
444
+ sum += logEnergies[j] * cos(PI * i * (j + 0.5) / n)
445
+ }
446
+ dct[i] = (sum / sqrt(DCT_SQRT_DIVISOR * n)).toFloat()
447
+ }
448
+
449
+ return dct.toList()
450
+ }
451
+
452
+
453
+ /**
454
+ * Extracts the spectral centroid from the audio data.
455
+ * @param segmentData The segment data.
456
+ * @param sampleRate The sample rate of the audio data.
457
+ * @return The spectral centroid.
458
+ */
459
+ private fun extractSpectralCentroid(segmentData: FloatArray, sampleRate: Float): Float {
460
+ val magnitudeSpectrum = segmentData.map { it * it }.toFloatArray()
461
+ val sum = magnitudeSpectrum.sum()
462
+ if (sum == 0f) return 0f
463
+
464
+ val weightedSum = magnitudeSpectrum.mapIndexed { index, value -> index * value }.sum()
465
+ return (weightedSum / sum) * (sampleRate / 2) / magnitudeSpectrum.size
466
+ }
467
+
468
+
469
+ /**
470
+ * Extracts the spectral flatness from the audio data.
471
+ * @param segmentData The segment data.
472
+ * @return The spectral flatness.
473
+ */
474
+ private fun extractSpectralFlatness(segmentData: FloatArray): Float {
475
+ val magnitudeSpectrum = segmentData.map { abs(it) }
476
+ val geometricMean = exp(magnitudeSpectrum.map { ln(it + Float.MIN_VALUE) }.average()).toFloat()
477
+ val arithmeticMean = magnitudeSpectrum.average().toFloat()
478
+ return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
479
+ }
480
+
481
+ /**
482
+ * Extracts the spectral roll-off from the audio data.
483
+ * @param segmentData The segment data.
484
+ * @param sampleRate The sample rate of the audio data.
485
+ * @return The spectral roll-off.
486
+ */
487
+ private fun extractSpectralRollOff(segmentData: FloatArray, sampleRate: Float): Float {
488
+ val magnitudeSpectrum = segmentData.map { abs(it) }
489
+ val totalEnergy = magnitudeSpectrum.sum()
490
+ var cumulativeEnergy = 0f
491
+ val rollOffThreshold = totalEnergy * 0.85f
492
+
493
+ for ((index, value) in magnitudeSpectrum.withIndex()) {
494
+ cumulativeEnergy += value
495
+ if (cumulativeEnergy >= rollOffThreshold) {
496
+ return index.toFloat() / magnitudeSpectrum.size * (sampleRate / 2)
497
+ }
498
+ }
499
+
500
+ return 0f
501
+ }
502
+
503
+ /**
504
+ * Extracts the spectral bandwidth from the audio data.
505
+ * @param segmentData The segment data.
506
+ * @param sampleRate The sample rate of the audio data.
507
+ * @return The spectral bandwidth.
508
+ */
509
+ private fun extractSpectralBandwidth(segmentData: FloatArray, sampleRate: Float): Float {
510
+ val centroid = extractSpectralCentroid(segmentData, sampleRate)
511
+ val magnitudeSpectrum = segmentData.map { abs(it) }
512
+ val sum = magnitudeSpectrum.sum()
513
+ if (sum == 0f) return 0f
514
+
515
+ val weightedSum = magnitudeSpectrum.mapIndexed { index, value -> value * (index - centroid).pow(2) }.sum()
516
+ return sqrt(weightedSum / sum)
517
+ }
518
+
519
+ /**
520
+ * Extracts the chromagram from the audio data.
521
+ * @param segmentData The segment data.
522
+ * @param sampleRate The sample rate of the audio data.
523
+ * @return The chromagram.
524
+ */
525
+ private fun extractChromagram(segmentData: FloatArray, sampleRate: Float): List<Float> {
526
+ val fftData = segmentData.copyOf()
527
+ val fft = FFT(fftData.size)
528
+ fft.realForward(fftData)
529
+
530
+ // Compute the magnitude spectrum
531
+ val magnitudeSpectrum = fftData.map { abs(it) }
532
+
533
+ // Initialize the chromagram with 12 bins (one for each pitch class)
534
+ val chromagram = FloatArray(12)
535
+
536
+ // Map frequencies to pitch classes
537
+ for (i in magnitudeSpectrum.indices) {
538
+ val freq = i * sampleRate / magnitudeSpectrum.size
539
+ val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
540
+ if (pitchClass in 0..11) {
541
+ chromagram[pitchClass] += magnitudeSpectrum[i]
542
+ }
543
+ }
544
+
545
+ return chromagram.toList()
546
+ }
547
+
548
+ /**
549
+ * Extracts the tempo from the audio data.
550
+ * @param segmentData The segment data.
551
+ * @param sampleRate The sample rate of the audio data.
552
+ * @return The tempo.
553
+ */
554
+ private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
555
+ // Calculate the onset strength envelope
556
+ val onsetEnv = calculateOnsetEnvelope(segmentData, sampleRate)
557
+
558
+ // Find peaks in the onset envelope
559
+ val peaks = findPeaks(onsetEnv)
560
+
561
+ // Calculate the inter-onset intervals (IOIs)
562
+ val iois = peaks.zipWithNext { a, b -> (b - a).toFloat() / sampleRate }
563
+
564
+ // Calculate the tempo in beats per minute (BPM)
565
+ val avgIoi = iois.average().toFloat()
566
+ return if (avgIoi != 0f) 60f / avgIoi else 0f
567
+ }
568
+
569
+ /**
570
+ * Calculates the onset envelope of the audio signal.
571
+ * @param segmentData The segment data.
572
+ * @param sampleRate The sample rate of the audio data.
573
+ * @return The onset envelope.
574
+ */
575
+ private fun calculateOnsetEnvelope(segmentData: FloatArray, sampleRate: Float): FloatArray {
576
+ val frameSize = sampleRate.toInt() / 100 // Assume 10ms frames
577
+ val onsetEnv = FloatArray(segmentData.size / frameSize)
578
+ var previousSpectrum = FloatArray(frameSize)
579
+
580
+ for (i in onsetEnv.indices) {
581
+ val frame = segmentData.sliceArray(i * frameSize until min((i + 1) * frameSize, segmentData.size))
582
+ val magnitudeSpectrum = frame.map { abs(it) }.toFloatArray()
583
+ val onset = magnitudeSpectrum.zip(previousSpectrum) { a, b -> max(0f, a - b) }.sum()
584
+ onsetEnv[i] = onset
585
+ previousSpectrum = magnitudeSpectrum
586
+ }
587
+
588
+ return onsetEnv
589
+ }
590
+
591
+ /**
592
+ * Finds the peaks in the onset envelope.
593
+ * @param onsetEnv The onset envelope.
594
+ * @return A list of peak indices.
595
+ */
596
+ private fun findPeaks(onsetEnv: FloatArray): List<Int> {
597
+ val peaks = mutableListOf<Int>()
598
+ for (i in 1 until onsetEnv.size - 1) {
599
+ if (onsetEnv[i] > onsetEnv[i - 1] && onsetEnv[i] > onsetEnv[i + 1]) {
600
+ peaks.add(i)
601
+ }
602
+ }
603
+ return peaks
604
+ }
605
+
606
+ /**
607
+ * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
608
+ * @param segmentData The segment data.
609
+ * @return The HNR.
610
+ */
611
+ /**
612
+ * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
613
+ * @param segmentData The segment data as FloatArray.
614
+ * @return The HNR.
615
+ */
616
+ private fun extractHNR(segmentData: FloatArray): Float {
617
+ val frameSize = segmentData.size
618
+ val autocorrelation = FloatArray(frameSize)
619
+
620
+ // Compute the autocorrelation of the segment data
621
+ for (i in segmentData.indices) {
622
+ var sum = 0f
623
+ for (j in 0 until frameSize - i) {
624
+ sum += segmentData[j] * segmentData[j + i]
625
+ }
626
+ autocorrelation[i] = sum
627
+ }
628
+
629
+ // Find the maximum autocorrelation value (excluding the zero lag)
630
+ val maxAutocorrelation = autocorrelation.drop(1).maxOrNull() ?: 0f
631
+
632
+ // Compute the HNR
633
+ return if (autocorrelation[0] != 0f) 10 * log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation)) else 0f
634
+ }
635
+ }