@siteed/expo-audio-stream 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.size-limit.json +6 -0
- package/README.md +6 -6
- package/android/build.gradle +5 -0
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +120 -0
- package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +34 -4
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +635 -0
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +194 -79
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +48 -2
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +44 -0
- package/android/src/main/java/net/siteed/audiostream/Features.kt +56 -0
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +12 -0
- package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +56 -0
- package/app.plugin.js +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +76 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
- package/build/AudioAnalysis/AudioAnalysis.types.js +3 -0
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +4 -0
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
- package/build/AudioAnalysis/extractAudioAnalysis.js +101 -0
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/AudioAnalysis/extractWaveform.d.ts +8 -0
- package/build/AudioAnalysis/extractWaveform.d.ts.map +1 -0
- package/build/AudioAnalysis/extractWaveform.js +14 -0
- package/build/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/AudioRecorder.provider.d.ts +14 -1
- package/build/AudioRecorder.provider.d.ts.map +1 -1
- package/build/AudioRecorder.provider.js +18 -5
- package/build/AudioRecorder.provider.js.map +1 -1
- package/build/ExpoAudioStream.native.d.ts +3 -0
- package/build/ExpoAudioStream.native.d.ts.map +1 -0
- package/build/ExpoAudioStream.native.js +6 -0
- package/build/ExpoAudioStream.native.js.map +1 -0
- package/build/ExpoAudioStream.types.d.ts +35 -20
- package/build/ExpoAudioStream.types.d.ts.map +1 -1
- package/build/ExpoAudioStream.types.js.map +1 -1
- package/build/ExpoAudioStream.web.d.ts +42 -0
- package/build/ExpoAudioStream.web.d.ts.map +1 -0
- package/build/ExpoAudioStream.web.js +185 -0
- package/build/ExpoAudioStream.web.js.map +1 -0
- package/build/ExpoAudioStreamModule.d.ts +2 -2
- package/build/ExpoAudioStreamModule.d.ts.map +1 -1
- package/build/ExpoAudioStreamModule.js +16 -3
- package/build/ExpoAudioStreamModule.js.map +1 -1
- package/build/WebRecorder.web.d.ts +51 -0
- package/build/WebRecorder.web.d.ts.map +1 -0
- package/build/WebRecorder.web.js +288 -0
- package/build/WebRecorder.web.js.map +1 -0
- package/build/constants.d.ts +11 -0
- package/build/constants.d.ts.map +1 -0
- package/build/constants.js +14 -0
- package/build/constants.js.map +1 -0
- package/build/events.d.ts +6 -0
- package/build/events.d.ts.map +1 -0
- package/build/events.js +15 -0
- package/build/events.js.map +1 -0
- package/build/index.d.ts +8 -7
- package/build/index.d.ts.map +1 -1
- package/build/index.js +7 -14
- package/build/index.js.map +1 -1
- package/build/logger.d.ts +9 -0
- package/build/logger.d.ts.map +1 -0
- package/build/logger.js +17 -0
- package/build/logger.js.map +1 -0
- package/build/useAudioRecorder.d.ts +37 -0
- package/build/useAudioRecorder.d.ts.map +1 -0
- package/build/useAudioRecorder.js +271 -0
- package/build/useAudioRecorder.js.map +1 -0
- package/build/utils/convertPCMToFloat32.d.ts +11 -0
- package/build/utils/convertPCMToFloat32.d.ts.map +1 -0
- package/build/utils/convertPCMToFloat32.js +41 -0
- package/build/utils/convertPCMToFloat32.js.map +1 -0
- package/build/utils/encodingToBitDepth.d.ts +5 -0
- package/build/utils/encodingToBitDepth.d.ts.map +1 -0
- package/build/utils/encodingToBitDepth.js +13 -0
- package/build/utils/encodingToBitDepth.js.map +1 -0
- package/build/utils/getWavFileInfo.d.ts +25 -0
- package/build/utils/getWavFileInfo.d.ts.map +1 -0
- package/build/utils/getWavFileInfo.js +89 -0
- package/build/utils/getWavFileInfo.js.map +1 -0
- package/build/utils/writeWavHeader.d.ts +9 -0
- package/build/utils/writeWavHeader.d.ts.map +1 -0
- package/build/utils/writeWavHeader.js +41 -0
- package/build/utils/writeWavHeader.js.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts +2 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.js +303 -0
- package/build/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/workers/inlineAudioWebWorker.web.d.ts +2 -0
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
- package/build/workers/inlineAudioWebWorker.web.js +243 -0
- package/build/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/expo-module.config.json +13 -4
- package/ios/AudioAnalysisData.swift +39 -0
- package/ios/AudioProcessingHelpers.swift +59 -0
- package/ios/AudioProcessor.swift +317 -0
- package/ios/AudioStreamError.swift +7 -0
- package/ios/AudioStreamManager.swift +243 -54
- package/ios/AudioStreamManagerDelegate.swift +4 -0
- package/ios/DataPoint.swift +41 -0
- package/ios/ExpoAudioStreamModule.swift +198 -6
- package/ios/Features.swift +44 -0
- package/ios/RecordingResult.swift +19 -0
- package/ios/RecordingSettings.swift +13 -0
- package/ios/WaveformExtractor.swift +105 -0
- package/package.json +13 -12
- package/plugin/tsconfig.json +13 -8
- package/publish.sh +8 -0
- package/src/AudioAnalysis/AudioAnalysis.types.ts +85 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +136 -0
- package/src/AudioAnalysis/extractWaveform.ts +25 -0
- package/src/AudioRecorder.provider.tsx +36 -8
- package/src/ExpoAudioStream.native.ts +6 -0
- package/src/ExpoAudioStream.types.ts +50 -25
- package/src/ExpoAudioStream.web.ts +229 -0
- package/src/ExpoAudioStreamModule.ts +22 -3
- package/src/WebRecorder.web.ts +416 -0
- package/src/constants.ts +18 -0
- package/src/events.ts +25 -0
- package/src/index.ts +14 -29
- package/src/logger.ts +26 -0
- package/src/useAudioRecorder.tsx +415 -0
- package/src/utils/convertPCMToFloat32.ts +48 -0
- package/src/utils/encodingToBitDepth.ts +18 -0
- package/src/utils/getWavFileInfo.ts +125 -0
- package/src/utils/writeWavHeader.ts +56 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +302 -0
- package/src/workers/inlineAudioWebWorker.web.tsx +242 -0
- package/build/ExpoAudioStreamModule.web.d.ts +0 -37
- package/build/ExpoAudioStreamModule.web.d.ts.map +0 -1
- package/build/ExpoAudioStreamModule.web.js +0 -156
- package/build/ExpoAudioStreamModule.web.js.map +0 -1
- package/build/useAudioRecording.d.ts +0 -23
- package/build/useAudioRecording.d.ts.map +0 -1
- package/build/useAudioRecording.js +0 -189
- package/build/useAudioRecording.js.map +0 -1
- package/docs/demo.gif +0 -0
- package/release-it.js +0 -18
- package/src/ExpoAudioStreamModule.web.ts +0 -181
- package/src/useAudioRecording.ts +0 -268
- package/yarn-error.log +0 -7793
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
// net/siteed/audiostream/AudioProcessor.kt
|
|
2
|
+
package net.siteed.audiostream
|
|
3
|
+
|
|
4
|
+
import java.nio.ByteBuffer
|
|
5
|
+
import java.nio.ByteOrder
|
|
6
|
+
import kotlin.math.*
|
|
7
|
+
import android.util.Log
|
|
8
|
+
import java.io.File
|
|
9
|
+
import java.io.IOException
|
|
10
|
+
import kotlin.system.measureTimeMillis
|
|
11
|
+
|
|
12
|
+
class AudioProcessor(private val filesDir: File) {
|
|
13
|
+
companion object {
|
|
14
|
+
const val NUM_MFCC_COEFFICIENTS = 13
|
|
15
|
+
const val NUM_MEL_FILTERS = 26
|
|
16
|
+
const val MEL_MIN_FREQ = 0.0
|
|
17
|
+
const val MEL_MAX_FREQ_DIVISOR = 2595.0
|
|
18
|
+
const val MEL_MAX_FREQ_CONSTANT = 700.0
|
|
19
|
+
const val DCT_SQRT_DIVISOR = 2.0
|
|
20
|
+
const val LOG_BASE = 10.0
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int)
|
|
24
|
+
|
|
25
|
+
// Add a counter for unique IDs
|
|
26
|
+
private var uniqueIdCounter = 0L
|
|
27
|
+
|
|
28
|
+
fun loadAudioFile(originalFileUri: String, skipWavHeader: Boolean = false): AudioData? {
|
|
29
|
+
// Remove the file:// prefix if present
|
|
30
|
+
val fileUri = originalFileUri.removePrefix("file://")
|
|
31
|
+
var file = File(fileUri)
|
|
32
|
+
|
|
33
|
+
// Check if the file exists at the provided fileUri
|
|
34
|
+
if (!file.exists()) {
|
|
35
|
+
// Fallback to filesDir if the file does not exist at fileUri
|
|
36
|
+
file = File(filesDir, file.name)
|
|
37
|
+
if (!file.exists()) {
|
|
38
|
+
Log.e("AudioProcessor", "File does not exist at provided path or in filesDir: $fileUri")
|
|
39
|
+
return null
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Check if the file has a valid extension
|
|
44
|
+
val validExtensions = listOf("wav", "pcm")
|
|
45
|
+
val fileExtension = file.extension.lowercase()
|
|
46
|
+
if (fileExtension !in validExtensions) {
|
|
47
|
+
Log.e("AudioProcessor", "Invalid file extension: $fileExtension. Supported extensions are: $validExtensions")
|
|
48
|
+
return null
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
val fileData = file.readBytes()
|
|
53
|
+
|
|
54
|
+
if (fileData.size < Constants.WAV_HEADER_SIZE) {
|
|
55
|
+
Log.e("AudioProcessor", "File is too small to be a valid WAV file")
|
|
56
|
+
return null
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
val header = fileData.sliceArray(0 until Constants.WAV_HEADER_SIZE)
|
|
60
|
+
val sampleRate = byteArrayToInt(header.sliceArray(24..27))
|
|
61
|
+
val channels = byteArrayToShort(header.sliceArray(22..23))
|
|
62
|
+
val bitDepth = byteArrayToShort(header.sliceArray(34..35))
|
|
63
|
+
|
|
64
|
+
val audioData = if (skipWavHeader) {
|
|
65
|
+
fileData.sliceArray(Constants.WAV_HEADER_SIZE until fileData.size)
|
|
66
|
+
} else {
|
|
67
|
+
fileData
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return AudioData(audioData, sampleRate, bitDepth.toInt(), channels.toInt())
|
|
71
|
+
} catch (e: IOException) {
|
|
72
|
+
Log.e("AudioProcessor", "Failed to load audio file: ${e.message}", e)
|
|
73
|
+
return null
|
|
74
|
+
} catch (e: IllegalArgumentException) {
|
|
75
|
+
Log.e("AudioProcessor", "Invalid audio file format: ${e.message}", e)
|
|
76
|
+
return null
|
|
77
|
+
} catch (e: Exception) {
|
|
78
|
+
Log.e("AudioProcessor", "Unexpected error: ${e.message}", e)
|
|
79
|
+
return null
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
private fun byteArrayToInt(bytes: ByteArray): Int {
|
|
84
|
+
return (bytes[0].toInt() and 0xFF) or
|
|
85
|
+
((bytes[1].toInt() and 0xFF) shl 8) or
|
|
86
|
+
((bytes[2].toInt() and 0xFF) shl 16) or
|
|
87
|
+
((bytes[3].toInt() and 0xFF) shl 24)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
private fun byteArrayToShort(bytes: ByteArray): Short {
|
|
91
|
+
return (bytes[0].toInt() and 0xFF or
|
|
92
|
+
(bytes[1].toInt() and 0xFF shl 8)).toShort()
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Processes the audio data and extracts features.
|
|
97
|
+
* @param data The audio data in bytes.
|
|
98
|
+
* @param config The recording configuration.
|
|
99
|
+
* @return AudioAnalysisData containing the extracted features.
|
|
100
|
+
*/
|
|
101
|
+
fun processAudioData(data: ByteArray, config: RecordingConfig): AudioAnalysisData {
|
|
102
|
+
val sampleRate = config.sampleRate.toFloat()
|
|
103
|
+
val bitDepth = when (config.encoding) {
|
|
104
|
+
"pcm_8bit" -> 8
|
|
105
|
+
"pcm_16bit" -> 16
|
|
106
|
+
"pcm_32bit" -> 32
|
|
107
|
+
else -> throw IllegalArgumentException("Unsupported encoding: ${config.encoding}")
|
|
108
|
+
}
|
|
109
|
+
val channelData = convertToFloatArray(data, bitDepth)
|
|
110
|
+
val pointsPerSecond = config.pointsPerSecond
|
|
111
|
+
val algorithm = config.algorithm
|
|
112
|
+
val featureOptions = config.features
|
|
113
|
+
|
|
114
|
+
val totalSamples = channelData.size
|
|
115
|
+
val segmentDurationSeconds = totalSamples.toDouble() / sampleRate
|
|
116
|
+
val totalPoints = max((segmentDurationSeconds * pointsPerSecond).toInt(), 1)
|
|
117
|
+
val pointInterval = ceil(totalSamples / totalPoints.toDouble()).toInt()
|
|
118
|
+
|
|
119
|
+
Log.d("AudioProcessor", "Extracting waveform totalSize=${data.size} with $totalSamples samples and $pointsPerSecond points per second --> $pointInterval samples per point")
|
|
120
|
+
Log.d("AudioProcessor", "segmentDuration: $segmentDurationSeconds seconds")
|
|
121
|
+
|
|
122
|
+
val expectedPoints = segmentDurationSeconds * pointsPerSecond
|
|
123
|
+
val samplesPerPoint = ceil(channelData.size / expectedPoints).toInt()
|
|
124
|
+
Log.d("AudioProcessor", "Extracting waveform with expectedPoints=$expectedPoints , samplesPerPoints=$samplesPerPoint")
|
|
125
|
+
|
|
126
|
+
val dataPoints = mutableListOf<DataPoint>()
|
|
127
|
+
var minAmplitude = Float.MAX_VALUE
|
|
128
|
+
var maxAmplitude = Float.MIN_VALUE
|
|
129
|
+
val durationMs = (segmentDurationSeconds * 1000).toInt()
|
|
130
|
+
|
|
131
|
+
// Measure the time taken for audio processing
|
|
132
|
+
// Measure the time taken for audio processing
|
|
133
|
+
val extractionTimeMs = measureTimeMillis {
|
|
134
|
+
var currentPosition = 0 // Track the current byte position
|
|
135
|
+
|
|
136
|
+
for (i in 0 until totalPoints) {
|
|
137
|
+
val start = i * samplesPerPoint
|
|
138
|
+
val end = min(start + samplesPerPoint, totalSamples)
|
|
139
|
+
val segmentData = channelData.sliceArray(start until end)
|
|
140
|
+
|
|
141
|
+
var sumSquares = 0f
|
|
142
|
+
var zeroCrossings = 0
|
|
143
|
+
var prevValue = 0f
|
|
144
|
+
var localMinAmplitude = Float.MAX_VALUE
|
|
145
|
+
var localMaxAmplitude = Float.MIN_VALUE
|
|
146
|
+
|
|
147
|
+
for (value in segmentData) {
|
|
148
|
+
sumSquares += value * value
|
|
149
|
+
if (prevValue != 0f && value * prevValue < 0) zeroCrossings += 1
|
|
150
|
+
prevValue = value
|
|
151
|
+
|
|
152
|
+
val absValue = abs(value)
|
|
153
|
+
localMinAmplitude = min(localMinAmplitude, absValue)
|
|
154
|
+
localMaxAmplitude = max(localMaxAmplitude, absValue)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
val features = computeFeatures(segmentData, sampleRate, minAmplitude, maxAmplitude, sumSquares, zeroCrossings, segmentData.size, featureOptions)
|
|
158
|
+
val rms = features.rms
|
|
159
|
+
val silent = rms < 0.01
|
|
160
|
+
val dB = if (featureOptions["dB"] == true) 20 * log10(rms.toDouble()).toFloat() else 0f
|
|
161
|
+
minAmplitude = min(minAmplitude, rms)
|
|
162
|
+
maxAmplitude = max(maxAmplitude, rms)
|
|
163
|
+
|
|
164
|
+
val bytesPerSample = bitDepth / 8
|
|
165
|
+
val startPosition = start * bytesPerSample * config.channels
|
|
166
|
+
val endPosition = end * bytesPerSample * config.channels
|
|
167
|
+
|
|
168
|
+
val dataPoint = DataPoint(
|
|
169
|
+
id = uniqueIdCounter++, // Assign unique ID and increment the counter
|
|
170
|
+
amplitude = if (algorithm == "peak") localMaxAmplitude else rms,
|
|
171
|
+
activeSpeech = null,
|
|
172
|
+
dB = dB,
|
|
173
|
+
silent = silent,
|
|
174
|
+
features = features,
|
|
175
|
+
samples = segmentData.size,
|
|
176
|
+
startTime = startPosition / (sampleRate * bytesPerSample * config.channels),
|
|
177
|
+
endTime = endPosition / (sampleRate * bytesPerSample * config.channels),
|
|
178
|
+
startPosition = startPosition,
|
|
179
|
+
endPosition = endPosition,
|
|
180
|
+
speaker = 0
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
dataPoints.add(dataPoint)
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return AudioAnalysisData(
|
|
188
|
+
pointsPerSecond = pointsPerSecond,
|
|
189
|
+
durationMs = durationMs,
|
|
190
|
+
bitDepth = bitDepth,
|
|
191
|
+
numberOfChannels = config.channels,
|
|
192
|
+
sampleRate = config.sampleRate,
|
|
193
|
+
samples = totalSamples,
|
|
194
|
+
dataPoints = dataPoints,
|
|
195
|
+
amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
|
|
196
|
+
speakerChanges = emptyList(),
|
|
197
|
+
extractionTimeMs = extractionTimeMs.toFloat() // Return the measured extraction time
|
|
198
|
+
)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Converts the audio data to a float array.
|
|
205
|
+
* @param data The audio data in bytes.
|
|
206
|
+
* @param bitDepth The bit depth of the audio data.
|
|
207
|
+
* @return The converted float array.
|
|
208
|
+
*/
|
|
209
|
+
private fun convertToFloatArray(data: ByteArray, bitDepth: Int): FloatArray {
|
|
210
|
+
return when (bitDepth) {
|
|
211
|
+
16 -> {
|
|
212
|
+
val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
|
|
213
|
+
val array = ShortArray(buffer.remaining())
|
|
214
|
+
buffer.get(array)
|
|
215
|
+
array.map { it / 32768.0f }.toFloatArray()
|
|
216
|
+
}
|
|
217
|
+
8 -> data.map { (it.toInt() - 128) / 128.0f }.toFloatArray()
|
|
218
|
+
32 -> {
|
|
219
|
+
val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
|
|
220
|
+
val array = IntArray(buffer.remaining())
|
|
221
|
+
buffer.get(array)
|
|
222
|
+
array.map { it / Int.MAX_VALUE.toFloat() }.toFloatArray()
|
|
223
|
+
}
|
|
224
|
+
else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Computes the features of the audio data.
|
|
232
|
+
* @param segmentData The segment data.
|
|
233
|
+
* @param sampleRate The sample rate of the audio data.
|
|
234
|
+
* @param minAmplitude The minimum amplitude.
|
|
235
|
+
* @param maxAmplitude The maximum amplitude.
|
|
236
|
+
* @param sumSquares The sum of squares.
|
|
237
|
+
* @param zeroCrossings The zero crossings.
|
|
238
|
+
* @param segmentLength The length of the segment.
|
|
239
|
+
* @param featureOptions The feature options to compute.
|
|
240
|
+
* @return The computed features.
|
|
241
|
+
*/
|
|
242
|
+
private fun computeFeatures(
|
|
243
|
+
segmentData: FloatArray,
|
|
244
|
+
sampleRate: Float,
|
|
245
|
+
minAmplitude: Float,
|
|
246
|
+
maxAmplitude: Float,
|
|
247
|
+
sumSquares: Float,
|
|
248
|
+
zeroCrossings: Int,
|
|
249
|
+
segmentLength: Int,
|
|
250
|
+
featureOptions: Map<String, Boolean>
|
|
251
|
+
): Features {
|
|
252
|
+
val rms = sqrt(sumSquares / segmentLength)
|
|
253
|
+
val energy = if (featureOptions["energy"] == true) sumSquares else 0f
|
|
254
|
+
val zcr = if (featureOptions["zcr"] == true) zeroCrossings / segmentLength.toFloat() else 0f
|
|
255
|
+
|
|
256
|
+
val mfcc = try {
|
|
257
|
+
if (featureOptions["mfcc"] == true) extractMFCC(segmentData, sampleRate) else emptyList()
|
|
258
|
+
} catch (e: Exception) {
|
|
259
|
+
Log.e("AudioProcessor", "Failed to extract MFCC: ${e.message}", e)
|
|
260
|
+
emptyList()
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
val spectralCentroid = try {
|
|
264
|
+
if (featureOptions["spectralCentroid"] == true) extractSpectralCentroid(segmentData, sampleRate) else 0f
|
|
265
|
+
} catch (e: Exception) {
|
|
266
|
+
Log.e("AudioProcessor", "Failed to extract spectral centroid: ${e.message}", e)
|
|
267
|
+
0f
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
val spectralFlatness = try {
|
|
271
|
+
if (featureOptions["spectralFlatness"] == true) extractSpectralFlatness(segmentData) else 0f
|
|
272
|
+
} catch (e: Exception) {
|
|
273
|
+
Log.e("AudioProcessor", "Failed to extract spectral flatness: ${e.message}", e)
|
|
274
|
+
0f
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
val spectralRollOff = try {
|
|
278
|
+
if (featureOptions["spectralRollOff"] == true) extractSpectralRollOff(segmentData, sampleRate) else 0f
|
|
279
|
+
} catch (e: Exception) {
|
|
280
|
+
Log.e("AudioProcessor", "Failed to extract spectral roll-off: ${e.message}", e)
|
|
281
|
+
0f
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
val spectralBandwidth = try {
|
|
285
|
+
if (featureOptions["spectralBandwidth"] == true) extractSpectralBandwidth(segmentData, sampleRate) else 0f
|
|
286
|
+
} catch (e: Exception) {
|
|
287
|
+
Log.e("AudioProcessor", "Failed to extract spectral bandwidth: ${e.message}", e)
|
|
288
|
+
0f
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
val chromagram = try {
|
|
292
|
+
if (featureOptions["chromagram"] == true) extractChromagram(segmentData, sampleRate) else emptyList()
|
|
293
|
+
} catch (e: Exception) {
|
|
294
|
+
Log.e("AudioProcessor", "Failed to extract chromagram: ${e.message}", e)
|
|
295
|
+
emptyList()
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
val tempo = try {
|
|
299
|
+
if (featureOptions["tempo"] == true) extractTempo(segmentData, sampleRate) else 0f
|
|
300
|
+
} catch (e: Exception) {
|
|
301
|
+
Log.e("AudioProcessor", "Failed to extract tempo: ${e.message}", e)
|
|
302
|
+
0f
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
val hnr = try {
|
|
306
|
+
if (featureOptions["hnr"] == true) extractHNR(segmentData) else 0f
|
|
307
|
+
} catch (e: Exception) {
|
|
308
|
+
Log.e("AudioProcessor", "Failed to extract HNR: ${e.message}", e)
|
|
309
|
+
0f
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return Features(
|
|
313
|
+
energy = energy,
|
|
314
|
+
mfcc = mfcc,
|
|
315
|
+
rms = rms,
|
|
316
|
+
zcr = zcr,
|
|
317
|
+
minAmplitude = minAmplitude,
|
|
318
|
+
maxAmplitude = maxAmplitude,
|
|
319
|
+
spectralCentroid = spectralCentroid,
|
|
320
|
+
spectralFlatness = spectralFlatness,
|
|
321
|
+
spectralRollOff = spectralRollOff,
|
|
322
|
+
spectralBandwidth = spectralBandwidth,
|
|
323
|
+
chromagram = chromagram,
|
|
324
|
+
tempo = tempo,
|
|
325
|
+
hnr = hnr
|
|
326
|
+
)
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Resets the segment data.
|
|
331
|
+
* @param sumSquaresUpdater Function to reset sum of squares.
|
|
332
|
+
* @param zeroCrossingsUpdater Function to reset zero crossings.
|
|
333
|
+
* @param localMinAmplitudeUpdater Function to reset local min amplitude.
|
|
334
|
+
* @param localMaxAmplitudeUpdater Function to reset local max amplitude.
|
|
335
|
+
* @param segmentData The segment data list to reset.
|
|
336
|
+
*/
|
|
337
|
+
private fun resetSegmentData(
|
|
338
|
+
sumSquaresUpdater: (Float) -> Unit,
|
|
339
|
+
zeroCrossingsUpdater: (Int) -> Unit,
|
|
340
|
+
localMinAmplitudeUpdater: (Float) -> Unit,
|
|
341
|
+
localMaxAmplitudeUpdater: (Float) -> Unit,
|
|
342
|
+
segmentData: MutableList<Float>
|
|
343
|
+
) {
|
|
344
|
+
sumSquaresUpdater(0f)
|
|
345
|
+
zeroCrossingsUpdater(0)
|
|
346
|
+
localMinAmplitudeUpdater(Float.MAX_VALUE)
|
|
347
|
+
localMaxAmplitudeUpdater(Float.MIN_VALUE)
|
|
348
|
+
segmentData.clear()
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Extracts the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
|
|
353
|
+
* @param segmentData The segment data.
|
|
354
|
+
* @param sampleRate The sample rate of the audio data.
|
|
355
|
+
* @return The MFCC coefficients.
|
|
356
|
+
*/
|
|
357
|
+
private fun extractMFCC(segmentData: FloatArray, sampleRate: Float): List<Float> {
|
|
358
|
+
if (segmentData.size < 2) {
|
|
359
|
+
Log.e("AudioProcessor", "Segment data is too small for MFCC extraction: size=${segmentData.size}")
|
|
360
|
+
return emptyList()
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
val fftData = segmentData.copyOf()
|
|
364
|
+
val fft = FFT(fftData.size)
|
|
365
|
+
fft.realForward(fftData)
|
|
366
|
+
|
|
367
|
+
// Compute the power spectrum
|
|
368
|
+
val powerSpectrum = try {
|
|
369
|
+
fftData.map { it * it }.chunked(2) { (re, im) -> sqrt(re + im) }
|
|
370
|
+
} catch (e: Exception) {
|
|
371
|
+
Log.e("AudioProcessor", "Error computing power spectrum: ${e.message}", e)
|
|
372
|
+
return emptyList()
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Compute Mel filter bank
|
|
376
|
+
val melFilterBank = computeMelFilterBank(NUM_MEL_FILTERS, powerSpectrum.size, sampleRate)
|
|
377
|
+
val filterEnergies = melFilterBank.map { filter ->
|
|
378
|
+
filter.zip(powerSpectrum).sumOf { (f, p) -> (f * p).toDouble() }.toFloat()
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Apply log to filter energies
|
|
382
|
+
val logEnergies = filterEnergies.map { ln(it + Float.MIN_VALUE) }
|
|
383
|
+
|
|
384
|
+
// Compute Discrete Cosine Transform (DCT) of log energies to get MFCCs
|
|
385
|
+
return try {
|
|
386
|
+
computeDCT(logEnergies, NUM_MFCC_COEFFICIENTS)
|
|
387
|
+
} catch (e: Exception) {
|
|
388
|
+
Log.e("AudioProcessor", "Error computing DCT: ${e.message}", e)
|
|
389
|
+
emptyList()
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Computes the Mel filter bank.
|
|
397
|
+
* @param numFilters The number of Mel filters.
|
|
398
|
+
* @param powerSpectrumSize The size of the power spectrum.
|
|
399
|
+
* @param sampleRate The sample rate of the audio data.
|
|
400
|
+
* @return A list of Mel filters.
|
|
401
|
+
*/
|
|
402
|
+
private fun computeMelFilterBank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): List<List<Float>> {
|
|
403
|
+
val melFilters = mutableListOf<List<Float>>()
|
|
404
|
+
val melMaxFreq = MEL_MAX_FREQ_DIVISOR * log10(1.0 + sampleRate / 2.0 / MEL_MAX_FREQ_CONSTANT)
|
|
405
|
+
val melPoints = DoubleArray(numFilters + 2) { i ->
|
|
406
|
+
MEL_MIN_FREQ + i * (melMaxFreq - MEL_MIN_FREQ) / (numFilters + 1)
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
val hzPoints = melPoints.map { MEL_MAX_FREQ_CONSTANT * (LOG_BASE.pow(it / MEL_MAX_FREQ_DIVISOR) - 1.0) }
|
|
410
|
+
val bin = hzPoints.map { it * (powerSpectrumSize - 1) / sampleRate }
|
|
411
|
+
|
|
412
|
+
for (i in 1..numFilters) {
|
|
413
|
+
val filter = FloatArray(powerSpectrumSize)
|
|
414
|
+
for (j in bin[i - 1].toInt() until bin[i].toInt()) {
|
|
415
|
+
if (j >= 0 && j < filter.size) {
|
|
416
|
+
filter[j] = ((j - bin[i - 1]) / (bin[i] - bin[i - 1])).toFloat()
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
for (j in bin[i].toInt() until bin[i + 1].toInt()) {
|
|
420
|
+
if (j >= 0 && j < filter.size) {
|
|
421
|
+
filter[j] = ((bin[i + 1] - j) / (bin[i + 1] - bin[i])).toFloat()
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
melFilters.add(filter.toList())
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
return melFilters
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Computes the Discrete Cosine Transform (DCT) of the log energies.
|
|
433
|
+
* @param logEnergies The log energies.
|
|
434
|
+
* @param numCoefficients The number of coefficients to compute.
|
|
435
|
+
* @return A list of MFCC coefficients.
|
|
436
|
+
*/
|
|
437
|
+
private fun computeDCT(logEnergies: List<Float>, numCoefficients: Int): List<Float> {
|
|
438
|
+
val n = logEnergies.size
|
|
439
|
+
val dct = FloatArray(numCoefficients)
|
|
440
|
+
|
|
441
|
+
for (i in 0 until numCoefficients) {
|
|
442
|
+
var sum = 0.0
|
|
443
|
+
for (j in logEnergies.indices) {
|
|
444
|
+
sum += logEnergies[j] * cos(PI * i * (j + 0.5) / n)
|
|
445
|
+
}
|
|
446
|
+
dct[i] = (sum / sqrt(DCT_SQRT_DIVISOR * n)).toFloat()
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
return dct.toList()
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Extracts the spectral centroid from the audio data.
|
|
455
|
+
* @param segmentData The segment data.
|
|
456
|
+
* @param sampleRate The sample rate of the audio data.
|
|
457
|
+
* @return The spectral centroid.
|
|
458
|
+
*/
|
|
459
|
+
private fun extractSpectralCentroid(segmentData: FloatArray, sampleRate: Float): Float {
|
|
460
|
+
val magnitudeSpectrum = segmentData.map { it * it }.toFloatArray()
|
|
461
|
+
val sum = magnitudeSpectrum.sum()
|
|
462
|
+
if (sum == 0f) return 0f
|
|
463
|
+
|
|
464
|
+
val weightedSum = magnitudeSpectrum.mapIndexed { index, value -> index * value }.sum()
|
|
465
|
+
return (weightedSum / sum) * (sampleRate / 2) / magnitudeSpectrum.size
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* Extracts the spectral flatness from the audio data.
|
|
471
|
+
* @param segmentData The segment data.
|
|
472
|
+
* @return The spectral flatness.
|
|
473
|
+
*/
|
|
474
|
+
private fun extractSpectralFlatness(segmentData: FloatArray): Float {
|
|
475
|
+
val magnitudeSpectrum = segmentData.map { abs(it) }
|
|
476
|
+
val geometricMean = exp(magnitudeSpectrum.map { ln(it + Float.MIN_VALUE) }.average()).toFloat()
|
|
477
|
+
val arithmeticMean = magnitudeSpectrum.average().toFloat()
|
|
478
|
+
return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* Extracts the spectral roll-off from the audio data.
|
|
483
|
+
* @param segmentData The segment data.
|
|
484
|
+
* @param sampleRate The sample rate of the audio data.
|
|
485
|
+
* @return The spectral roll-off.
|
|
486
|
+
*/
|
|
487
|
+
private fun extractSpectralRollOff(segmentData: FloatArray, sampleRate: Float): Float {
|
|
488
|
+
val magnitudeSpectrum = segmentData.map { abs(it) }
|
|
489
|
+
val totalEnergy = magnitudeSpectrum.sum()
|
|
490
|
+
var cumulativeEnergy = 0f
|
|
491
|
+
val rollOffThreshold = totalEnergy * 0.85f
|
|
492
|
+
|
|
493
|
+
for ((index, value) in magnitudeSpectrum.withIndex()) {
|
|
494
|
+
cumulativeEnergy += value
|
|
495
|
+
if (cumulativeEnergy >= rollOffThreshold) {
|
|
496
|
+
return index.toFloat() / magnitudeSpectrum.size * (sampleRate / 2)
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
return 0f
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Extracts the spectral bandwidth from the audio data.
|
|
505
|
+
* @param segmentData The segment data.
|
|
506
|
+
* @param sampleRate The sample rate of the audio data.
|
|
507
|
+
* @return The spectral bandwidth.
|
|
508
|
+
*/
|
|
509
|
+
private fun extractSpectralBandwidth(segmentData: FloatArray, sampleRate: Float): Float {
|
|
510
|
+
val centroid = extractSpectralCentroid(segmentData, sampleRate)
|
|
511
|
+
val magnitudeSpectrum = segmentData.map { abs(it) }
|
|
512
|
+
val sum = magnitudeSpectrum.sum()
|
|
513
|
+
if (sum == 0f) return 0f
|
|
514
|
+
|
|
515
|
+
val weightedSum = magnitudeSpectrum.mapIndexed { index, value -> value * (index - centroid).pow(2) }.sum()
|
|
516
|
+
return sqrt(weightedSum / sum)
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* Extracts the chromagram from the audio data.
|
|
521
|
+
* @param segmentData The segment data.
|
|
522
|
+
* @param sampleRate The sample rate of the audio data.
|
|
523
|
+
* @return The chromagram.
|
|
524
|
+
*/
|
|
525
|
+
private fun extractChromagram(segmentData: FloatArray, sampleRate: Float): List<Float> {
|
|
526
|
+
val fftData = segmentData.copyOf()
|
|
527
|
+
val fft = FFT(fftData.size)
|
|
528
|
+
fft.realForward(fftData)
|
|
529
|
+
|
|
530
|
+
// Compute the magnitude spectrum
|
|
531
|
+
val magnitudeSpectrum = fftData.map { abs(it) }
|
|
532
|
+
|
|
533
|
+
// Initialize the chromagram with 12 bins (one for each pitch class)
|
|
534
|
+
val chromagram = FloatArray(12)
|
|
535
|
+
|
|
536
|
+
// Map frequencies to pitch classes
|
|
537
|
+
for (i in magnitudeSpectrum.indices) {
|
|
538
|
+
val freq = i * sampleRate / magnitudeSpectrum.size
|
|
539
|
+
val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
|
|
540
|
+
if (pitchClass in 0..11) {
|
|
541
|
+
chromagram[pitchClass] += magnitudeSpectrum[i]
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
return chromagram.toList()
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Extracts the tempo from the audio data.
|
|
550
|
+
* @param segmentData The segment data.
|
|
551
|
+
* @param sampleRate The sample rate of the audio data.
|
|
552
|
+
* @return The tempo.
|
|
553
|
+
*/
|
|
554
|
+
private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
|
|
555
|
+
// Calculate the onset strength envelope
|
|
556
|
+
val onsetEnv = calculateOnsetEnvelope(segmentData, sampleRate)
|
|
557
|
+
|
|
558
|
+
// Find peaks in the onset envelope
|
|
559
|
+
val peaks = findPeaks(onsetEnv)
|
|
560
|
+
|
|
561
|
+
// Calculate the inter-onset intervals (IOIs)
|
|
562
|
+
val iois = peaks.zipWithNext { a, b -> (b - a).toFloat() / sampleRate }
|
|
563
|
+
|
|
564
|
+
// Calculate the tempo in beats per minute (BPM)
|
|
565
|
+
val avgIoi = iois.average().toFloat()
|
|
566
|
+
return if (avgIoi != 0f) 60f / avgIoi else 0f
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
/**
|
|
570
|
+
* Calculates the onset envelope of the audio signal.
|
|
571
|
+
* @param segmentData The segment data.
|
|
572
|
+
* @param sampleRate The sample rate of the audio data.
|
|
573
|
+
* @return The onset envelope.
|
|
574
|
+
*/
|
|
575
|
+
private fun calculateOnsetEnvelope(segmentData: FloatArray, sampleRate: Float): FloatArray {
|
|
576
|
+
val frameSize = sampleRate.toInt() / 100 // Assume 10ms frames
|
|
577
|
+
val onsetEnv = FloatArray(segmentData.size / frameSize)
|
|
578
|
+
var previousSpectrum = FloatArray(frameSize)
|
|
579
|
+
|
|
580
|
+
for (i in onsetEnv.indices) {
|
|
581
|
+
val frame = segmentData.sliceArray(i * frameSize until min((i + 1) * frameSize, segmentData.size))
|
|
582
|
+
val magnitudeSpectrum = frame.map { abs(it) }.toFloatArray()
|
|
583
|
+
val onset = magnitudeSpectrum.zip(previousSpectrum) { a, b -> max(0f, a - b) }.sum()
|
|
584
|
+
onsetEnv[i] = onset
|
|
585
|
+
previousSpectrum = magnitudeSpectrum
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
return onsetEnv
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Finds the peaks in the onset envelope.
|
|
593
|
+
* @param onsetEnv The onset envelope.
|
|
594
|
+
* @return A list of peak indices.
|
|
595
|
+
*/
|
|
596
|
+
private fun findPeaks(onsetEnv: FloatArray): List<Int> {
|
|
597
|
+
val peaks = mutableListOf<Int>()
|
|
598
|
+
for (i in 1 until onsetEnv.size - 1) {
|
|
599
|
+
if (onsetEnv[i] > onsetEnv[i - 1] && onsetEnv[i] > onsetEnv[i + 1]) {
|
|
600
|
+
peaks.add(i)
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
return peaks
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
|
|
608
|
+
* @param segmentData The segment data.
|
|
609
|
+
* @return The HNR.
|
|
610
|
+
*/
|
|
611
|
+
/**
|
|
612
|
+
* Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
|
|
613
|
+
* @param segmentData The segment data as FloatArray.
|
|
614
|
+
* @return The HNR.
|
|
615
|
+
*/
|
|
616
|
+
private fun extractHNR(segmentData: FloatArray): Float {
|
|
617
|
+
val frameSize = segmentData.size
|
|
618
|
+
val autocorrelation = FloatArray(frameSize)
|
|
619
|
+
|
|
620
|
+
// Compute the autocorrelation of the segment data
|
|
621
|
+
for (i in segmentData.indices) {
|
|
622
|
+
var sum = 0f
|
|
623
|
+
for (j in 0 until frameSize - i) {
|
|
624
|
+
sum += segmentData[j] * segmentData[j + i]
|
|
625
|
+
}
|
|
626
|
+
autocorrelation[i] = sum
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
// Find the maximum autocorrelation value (excluding the zero lag)
|
|
630
|
+
val maxAutocorrelation = autocorrelation.drop(1).maxOrNull() ?: 0f
|
|
631
|
+
|
|
632
|
+
// Compute the HNR
|
|
633
|
+
return if (autocorrelation[0] != 0f) 10 * log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation)) else 0f
|
|
634
|
+
}
|
|
635
|
+
}
|