@siteed/expo-audio-stream 1.16.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +28 -1
  2. package/README.md +1 -1
  3. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
  4. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
  5. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
  6. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +134 -23
  7. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
  8. package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
  9. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
  10. package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
  11. package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
  12. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +4 -4
  13. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
  14. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  15. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  16. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
  17. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  18. package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
  19. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  20. package/build/ExpoAudioStream.types.d.ts +49 -3
  21. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  22. package/build/ExpoAudioStream.types.js.map +1 -1
  23. package/build/ExpoAudioStream.web.d.ts +2 -0
  24. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  25. package/build/ExpoAudioStream.web.js +8 -1
  26. package/build/ExpoAudioStream.web.js.map +1 -1
  27. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  28. package/build/ExpoAudioStreamModule.js +216 -12
  29. package/build/ExpoAudioStreamModule.js.map +1 -1
  30. package/build/WebRecorder.web.d.ts +67 -13
  31. package/build/WebRecorder.web.d.ts.map +1 -1
  32. package/build/WebRecorder.web.js +178 -173
  33. package/build/WebRecorder.web.js.map +1 -1
  34. package/build/index.d.ts +3 -3
  35. package/build/index.d.ts.map +1 -1
  36. package/build/index.js +2 -2
  37. package/build/index.js.map +1 -1
  38. package/build/useAudioRecorder.d.ts.map +1 -1
  39. package/build/useAudioRecorder.js +12 -8
  40. package/build/useAudioRecorder.js.map +1 -1
  41. package/build/utils/audioProcessing.d.ts +24 -0
  42. package/build/utils/audioProcessing.d.ts.map +1 -0
  43. package/build/utils/audioProcessing.js +133 -0
  44. package/build/utils/audioProcessing.js.map +1 -0
  45. package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
  46. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
  47. package/build/workers/InlineFeaturesExtractor.web.js +692 -175
  48. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
  49. package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
  50. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
  51. package/build/workers/inlineAudioWebWorker.web.js +3 -2
  52. package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
  53. package/ios/AudioAnalysisData.swift +51 -16
  54. package/ios/AudioProcessingHelpers.swift +710 -26
  55. package/ios/AudioProcessor.swift +334 -185
  56. package/ios/AudioStreamManager.swift +66 -22
  57. package/ios/DataPoint.swift +25 -12
  58. package/ios/DecodingConfig.swift +47 -0
  59. package/ios/ExpoAudioStreamModule.swift +189 -104
  60. package/ios/FFT.swift +62 -0
  61. package/ios/Features.swift +24 -3
  62. package/ios/RecordingSettings.swift +9 -7
  63. package/package.json +2 -1
  64. package/plugin/build/index.d.ts +2 -0
  65. package/plugin/build/index.js +10 -3
  66. package/plugin/src/index.ts +10 -1
  67. package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
  68. package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
  69. package/src/ExpoAudioStream.types.ts +57 -7
  70. package/src/ExpoAudioStream.web.ts +8 -1
  71. package/src/ExpoAudioStreamModule.ts +255 -10
  72. package/src/WebRecorder.web.ts +231 -243
  73. package/src/index.ts +5 -3
  74. package/src/useAudioRecorder.tsx +14 -10
  75. package/src/utils/audioProcessing.ts +205 -0
  76. package/src/workers/InlineFeaturesExtractor.web.tsx +692 -175
  77. package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
@@ -1,3 +1,4 @@
1
+ // packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt
1
2
  package net.siteed.audiostream
2
3
 
3
4
  import android.Manifest
@@ -12,6 +13,7 @@ import expo.modules.kotlin.Promise
12
13
  import expo.modules.kotlin.modules.Module
13
14
  import expo.modules.kotlin.modules.ModuleDefinition
14
15
  import expo.modules.interfaces.permissions.Permissions
16
+ import java.util.zip.CRC32
15
17
 
16
18
  class ExpoAudioStreamModule : Module(), EventSender {
17
19
  private lateinit var audioRecorderManager: AudioRecorderManager
@@ -55,28 +57,77 @@ class ExpoAudioStreamModule : Module(), EventSender {
55
57
  try {
56
58
  val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
57
59
 
58
- // Get decoding options
59
- val decodingOptionsMap = options["decodingOptions"] as? Map<String, Any>
60
- val decodingConfig = if (decodingOptionsMap != null) {
60
+ // Get time or byte range options
61
+ val startTimeMs = options["startTimeMs"] as? Number
62
+ val endTimeMs = options["endTimeMs"] as? Number
63
+ val position = options["position"] as? Number
64
+ val length = options["length"] as? Number
65
+ val segmentDurationMs = (options["segmentDurationMs"] as? Number)?.toInt() ?: 100
66
+
67
+ // Validate ranges - can have time range OR byte range OR no range
68
+ val hasTimeRange = startTimeMs != null && endTimeMs != null
69
+ val hasByteRange = position != null && length != null
70
+
71
+ // Only throw if both ranges are provided
72
+ if (hasTimeRange && hasByteRange) {
73
+ throw IllegalArgumentException("Cannot specify both time range and byte range")
74
+ }
75
+
76
+ // Get decoding options with default configuration
77
+ val defaultConfig = DecodingConfig(
78
+ targetSampleRate = null,
79
+ targetChannels = 1, // Default to mono
80
+ targetBitDepth = 16,
81
+ normalizeAudio = false
82
+ )
83
+
84
+ val config = (options["decodingOptions"] as? Map<String, Any>)?.let { decodingOptionsMap ->
61
85
  DecodingConfig(
62
86
  targetSampleRate = decodingOptionsMap["targetSampleRate"] as? Int,
63
87
  targetChannels = decodingOptionsMap["targetChannels"] as? Int,
64
88
  targetBitDepth = (decodingOptionsMap["targetBitDepth"] as? Int) ?: 16,
65
89
  normalizeAudio = (decodingOptionsMap["normalizeAudio"] as? Boolean) ?: false
66
90
  )
67
- } else null
68
-
69
- val audioData = audioProcessor.loadAudioFromAnyFormat(fileUri, decodingConfig)
70
- ?: throw IllegalStateException("Failed to load audio file")
91
+ } ?: defaultConfig
92
+
93
+ // Load audio data based on range type (or full file if no range specified)
94
+ val audioData = when {
95
+ hasByteRange -> {
96
+ val format = audioProcessor.getAudioFormat(fileUri)
97
+ ?: throw IllegalArgumentException("Could not determine audio format")
98
+
99
+ // Calculate time range from byte position
100
+ val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
101
+ val effectiveStartTimeMs = (position!!.toLong() * 1000) / bytesPerSecond
102
+ val effectiveEndTimeMs = effectiveStartTimeMs + (length!!.toLong() * 1000) / bytesPerSecond
103
+
104
+ Log.d(Constants.TAG, "Loading audio with byte range: position=$position, length=$length")
105
+
106
+ audioProcessor.loadAudioRange(
107
+ fileUri = fileUri,
108
+ startTimeMs = effectiveStartTimeMs,
109
+ endTimeMs = effectiveEndTimeMs,
110
+ config = config
111
+ )
112
+ }
113
+ hasTimeRange -> {
114
+ Log.d(Constants.TAG, "Loading audio with time range: startTimeMs=$startTimeMs, endTimeMs=$endTimeMs")
115
+
116
+ audioProcessor.loadAudioRange(
117
+ fileUri = fileUri,
118
+ startTimeMs = startTimeMs!!.toLong(),
119
+ endTimeMs = endTimeMs!!.toLong(),
120
+ config = config
121
+ )
122
+ }
123
+ else -> {
124
+ Log.d(Constants.TAG, "Loading entire audio file")
125
+ audioProcessor.loadAudioFromAnyFormat(fileUri, config)
126
+ }
127
+ } ?: throw IllegalStateException("Failed to load audio data")
71
128
 
72
- val pointsPerSecond = (options["pointsPerSecond"] as? Double) ?: 20.0
73
- val algorithm = options["algorithm"] as? String ?: "peak"
74
129
  val featuresMap = options["features"] as? Map<*, *>
75
- val features = featuresMap?.filterKeys { it is String }
76
- ?.filterValues { it is Boolean }
77
- ?.mapKeys { it.key as String }
78
- ?.mapValues { it.value as Boolean }
79
- ?: emptyMap()
130
+ val features = Features.parseFeatureOptions(featuresMap)
80
131
 
81
132
  val recordingConfig = RecordingConfig(
82
133
  sampleRate = audioData.sampleRate,
@@ -87,8 +138,7 @@ class ExpoAudioStreamModule : Module(), EventSender {
87
138
  32 -> "pcm_32bit"
88
139
  else -> throw IllegalArgumentException("Unsupported bit depth: ${audioData.bitDepth}")
89
140
  },
90
- pointsPerSecond = pointsPerSecond,
91
- algorithm = algorithm,
141
+ segmentDurationMs = segmentDurationMs,
92
142
  features = features
93
143
  )
94
144
 
@@ -98,7 +148,7 @@ class ExpoAudioStreamModule : Module(), EventSender {
98
148
  val analysisData = audioProcessor.processAudioData(audioData.data, recordingConfig)
99
149
  promise.resolve(analysisData.toDictionary())
100
150
  } catch (e: Exception) {
101
- Log.e(Constants.TAG, "Audio processing failed: ${e.message}", e)
151
+ Log.e(Constants.TAG, "Failed to extract audio analysis: ${e.message}", e)
102
152
  promise.reject("PROCESSING_ERROR", e.message ?: "Unknown error", e)
103
153
  }
104
154
  }
@@ -189,85 +239,6 @@ class ExpoAudioStreamModule : Module(), EventSender {
189
239
  }
190
240
  }
191
241
 
192
- AsyncFunction("extractPreview") { options: Map<String, Any>, promise: Promise ->
193
- try {
194
- val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
195
- val numberOfPoints = (options["numberOfPoints"] as? Int) ?: 100
196
- val algorithm = (options["algorithm"] as? String)?.lowercase() ?: "rms"
197
- val startTime = (options["startTime"] as? Number)?.toLong()
198
- val endTime = (options["endTime"] as? Number)?.toLong()
199
-
200
- Log.d(Constants.TAG, """
201
- Extracting preview with params:
202
- - fileUri: $fileUri
203
- - numberOfPoints: $numberOfPoints
204
- - algorithm: $algorithm
205
- - startTime: ${startTime ?: "none"}
206
- - endTime: ${endTime ?: "none"}
207
- """.trimIndent())
208
-
209
- // Get decoding options with defaults
210
- val decodingOptionsMap = options["decodingOptions"] as? Map<String, Any>
211
- val decodingConfig = if (decodingOptionsMap != null) {
212
- DecodingConfig(
213
- targetSampleRate = decodingOptionsMap["targetSampleRate"] as? Int ?: 22050,
214
- targetChannels = decodingOptionsMap["targetChannels"] as? Int ?: 1,
215
- targetBitDepth = (decodingOptionsMap["targetBitDepth"] as? Int) ?: 16,
216
- normalizeAudio = (decodingOptionsMap["normalizeAudio"] as? Boolean) ?: false
217
- )
218
- } else DecodingConfig(
219
- targetSampleRate = 16000,
220
- targetChannels = 1,
221
- targetBitDepth = 16,
222
- normalizeAudio = false
223
- )
224
-
225
- Log.d(Constants.TAG, """
226
- Using decoding config:
227
- - targetSampleRate: ${decodingConfig.targetSampleRate}
228
- - targetChannels: ${decodingConfig.targetChannels}
229
- - targetBitDepth: ${decodingConfig.targetBitDepth}
230
- - normalizeAudio: ${decodingConfig.normalizeAudio}
231
- """.trimIndent())
232
-
233
- // Use loadAudioRange when time range is specified, otherwise fall back to loadAudioFromAnyFormat
234
- val audioData = if (startTime != null && endTime != null) {
235
- audioProcessor.loadAudioRange(fileUri, startTime, endTime, decodingConfig)
236
- } else {
237
- audioProcessor.loadAudioFromAnyFormat(fileUri, decodingConfig)
238
- } ?: throw IllegalStateException("Failed to load audio file")
239
-
240
- val previewConfig = RecordingConfig(
241
- sampleRate = audioData.sampleRate,
242
- channels = audioData.channels,
243
- encoding = when (audioData.bitDepth) {
244
- 8 -> "pcm_8bit"
245
- 16 -> "pcm_16bit"
246
- 32 -> "pcm_32bit"
247
- else -> throw IllegalArgumentException("Unsupported bit depth: ${audioData.bitDepth}")
248
- },
249
- pointsPerSecond = 0.0, // Will be overridden by numberOfPoints
250
- algorithm = algorithm,
251
- features = emptyMap() // No features needed for preview
252
- )
253
-
254
- val preview = audioProcessor.generatePreview(
255
- audioData = audioData,
256
- numberOfPoints = numberOfPoints,
257
- startTimeMs = startTime,
258
- endTimeMs = endTime,
259
- config = previewConfig
260
- )
261
-
262
- Log.d(Constants.TAG, "Preview generated successfully with ${preview.dataPoints.size} points")
263
- promise.resolve(preview.toDictionary())
264
- } catch (e: Exception) {
265
- Log.e(Constants.TAG, "Preview generation failed: ${e.message}", e)
266
- Log.e(Constants.TAG, "Stack trace: ${e.stackTraceToString()}")
267
- promise.reject("PROCESSING_ERROR", e.message ?: "Unknown error", e)
268
- }
269
- }
270
-
271
242
  AsyncFunction("trimAudio") { options: Map<String, Any>, promise: Promise ->
272
243
  try {
273
244
  val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
@@ -341,6 +312,175 @@ class ExpoAudioStreamModule : Module(), EventSender {
341
312
 
342
313
  promise.resolve(status)
343
314
  }
315
+
316
+ AsyncFunction("extractAudioData") { options: Map<String, Any>, promise: Promise ->
317
+ try {
318
+ val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
319
+ val startTimeMs = options["startTimeMs"] as? Number
320
+ val endTimeMs = options["endTimeMs"] as? Number
321
+ val position = options["position"] as? Number
322
+ val length = options["length"] as? Number
323
+
324
+ // Validate that we have either time range or byte range, but not both and not neither
325
+ val hasTimeRange = startTimeMs != null && endTimeMs != null
326
+ val hasByteRange = position != null && length != null
327
+
328
+ if (!hasTimeRange && !hasByteRange) {
329
+ throw IllegalArgumentException("Must specify either time range (startTimeMs, endTimeMs) or byte range (position, length)")
330
+ }
331
+ if (hasTimeRange && hasByteRange) {
332
+ throw IllegalArgumentException("Cannot specify both time range and byte range")
333
+ }
334
+
335
+ // Get decoding options
336
+ val decodingOptionsMap = options["decodingOptions"] as? Map<String, Any>
337
+ val decodingConfig = if (decodingOptionsMap != null) {
338
+ DecodingConfig(
339
+ targetSampleRate = decodingOptionsMap["targetSampleRate"] as? Int,
340
+ targetChannels = decodingOptionsMap["targetChannels"] as? Int,
341
+ targetBitDepth = (decodingOptionsMap["targetBitDepth"] as? Int) ?: 16,
342
+ normalizeAudio = (decodingOptionsMap["normalizeAudio"] as? Boolean) ?: false
343
+ ).also {
344
+ Log.d(Constants.TAG, """
345
+ Using decoding config:
346
+ - targetSampleRate: ${it.targetSampleRate ?: "original"}
347
+ - targetChannels: ${it.targetChannels ?: "original"}
348
+ - targetBitDepth: ${it.targetBitDepth}
349
+ - normalizeAudio: ${it.normalizeAudio}
350
+ """.trimIndent())
351
+ }
352
+ } else null
353
+
354
+ val audioData = if (hasByteRange) {
355
+ val format = audioProcessor.getAudioFormat(fileUri)
356
+ ?: throw IllegalArgumentException("Could not determine audio format")
357
+
358
+ // Calculate time range from byte position
359
+ val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
360
+ val effectiveStartTimeMs = (position!!.toLong() * 1000) / bytesPerSecond
361
+ val effectiveEndTimeMs = effectiveStartTimeMs + (length!!.toLong() * 1000) / bytesPerSecond
362
+
363
+ Log.d(Constants.TAG, """
364
+ Converting byte range to time range:
365
+ - position: $position bytes
366
+ - length: $length bytes
367
+ - bytesPerSecond: $bytesPerSecond
368
+ - effectiveStartTimeMs: $effectiveStartTimeMs
369
+ - effectiveEndTimeMs: $effectiveEndTimeMs
370
+ """.trimIndent())
371
+
372
+ audioProcessor.loadAudioRange(
373
+ fileUri = fileUri,
374
+ startTimeMs = effectiveStartTimeMs,
375
+ endTimeMs = effectiveEndTimeMs,
376
+ config = decodingConfig
377
+ )
378
+ } else {
379
+ // Must be time range due to earlier validation
380
+ Log.d(Constants.TAG, """
381
+ Using time range:
382
+ - startTimeMs: $startTimeMs
383
+ - endTimeMs: $endTimeMs
384
+ """.trimIndent())
385
+
386
+ audioProcessor.loadAudioRange(
387
+ fileUri = fileUri,
388
+ startTimeMs = startTimeMs!!.toLong(),
389
+ endTimeMs = endTimeMs!!.toLong(),
390
+ config = decodingConfig
391
+ )
392
+ } ?: throw IllegalStateException("Failed to load audio data")
393
+
394
+ Log.d(Constants.TAG, """
395
+ Audio data loaded successfully:
396
+ - data size: ${audioData.data.size} bytes
397
+ - sampleRate: ${audioData.sampleRate}
398
+ - channels: ${audioData.channels}
399
+ - bitDepth: ${audioData.bitDepth}
400
+ - durationMs: ${audioData.durationMs}
401
+ """.trimIndent())
402
+
403
+ val includeNormalizedData = options["includeNormalizedData"] as? Boolean ?: false
404
+ val includeBase64Data = options["includeBase64Data"] as? Boolean ?: false
405
+ val includeWavHeader = options["includeWavHeader"] as? Boolean ?: false
406
+ val bytesPerSample = audioData.bitDepth / 8
407
+ val samples = audioData.data.size / (bytesPerSample * audioData.channels)
408
+
409
+ // Create the result map
410
+ val resultMap = mutableMapOf<String, Any>()
411
+
412
+ // Add WAV header if requested
413
+ if (includeWavHeader) {
414
+ // Use ByteArrayOutputStream to write the WAV header and data
415
+ val outputStream = java.io.ByteArrayOutputStream()
416
+ val audioFileHandler = AudioFileHandler(appContext.reactContext!!.filesDir)
417
+
418
+ // Write the WAV header
419
+ audioFileHandler.writeWavHeader(
420
+ outputStream,
421
+ audioData.sampleRate,
422
+ audioData.channels,
423
+ audioData.bitDepth
424
+ )
425
+
426
+ // Write the PCM data
427
+ outputStream.write(audioData.data)
428
+
429
+ // Get the complete WAV data
430
+ val wavData = outputStream.toByteArray()
431
+
432
+ resultMap["pcmData"] = wavData
433
+ resultMap["hasWavHeader"] = true
434
+
435
+ Log.d(Constants.TAG, "Added WAV header to PCM data, total size: ${wavData.size} bytes")
436
+ } else {
437
+ resultMap["pcmData"] = audioData.data
438
+ resultMap["hasWavHeader"] = false
439
+ }
440
+
441
+ // Add the rest of the data
442
+ resultMap.putAll(mapOf(
443
+ "sampleRate" to audioData.sampleRate,
444
+ "channels" to audioData.channels,
445
+ "bitDepth" to audioData.bitDepth,
446
+ "durationMs" to audioData.durationMs,
447
+ "format" to "pcm_${audioData.bitDepth}bit",
448
+ "samples" to samples
449
+ ))
450
+
451
+ // Add checksum if requested
452
+ if (options["computeChecksum"] == true) {
453
+ val crc32 = CRC32()
454
+ crc32.update(audioData.data)
455
+ resultMap["checksum"] = crc32.value.toInt()
456
+
457
+ Log.d(Constants.TAG, "Computed CRC32 checksum: ${crc32.value}")
458
+ }
459
+
460
+ if (includeNormalizedData) {
461
+ val float32Data = AudioFormatUtils.convertByteArrayToFloatArray(
462
+ audioData.data,
463
+ "pcm_${audioData.bitDepth}bit"
464
+ )
465
+ resultMap["normalizedData"] = float32Data
466
+ }
467
+
468
+ if (includeBase64Data) {
469
+ // Convert the PCM data to a base64 string
470
+ val base64Data = android.util.Base64.encodeToString(
471
+ audioData.data,
472
+ android.util.Base64.NO_WRAP
473
+ )
474
+ resultMap["base64Data"] = base64Data
475
+ }
476
+
477
+ promise.resolve(resultMap)
478
+ } catch (e: Exception) {
479
+ Log.e(Constants.TAG, "Failed to extract audio data: ${e.message}")
480
+ Log.e(Constants.TAG, "Stack trace: ${e.stackTraceToString()}")
481
+ promise.reject("PROCESSING_ERROR", e.message ?: "Unknown error", e)
482
+ }
483
+ }
344
484
  }
345
485
 
346
486
  private fun initializeManager() {
@@ -1,18 +1,47 @@
1
+ // packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/FFT.kt
1
2
  package net.siteed.audiostream
2
3
 
3
4
  import kotlin.math.PI
4
5
  import kotlin.math.cos
5
6
  import kotlin.math.sin
7
+ import kotlin.math.sqrt
6
8
 
7
9
  class FFT(private val n: Int) {
8
10
  private val cosTable = FloatArray(n / 2)
9
11
  private val sinTable = FloatArray(n / 2)
12
+ private val hannWindow = FloatArray(n)
10
13
 
11
14
  init {
15
+ // Precompute trig tables
12
16
  for (i in 0 until n / 2) {
13
17
  cosTable[i] = cos(2.0 * PI * i / n).toFloat()
14
18
  sinTable[i] = sin(2.0 * PI * i / n).toFloat()
15
19
  }
20
+
21
+ // Precompute normalized Hann window to match vDSP
22
+ val normalizationFactor = sqrt(2.0f / n) // Match vDSP normalization
23
+ for (i in hannWindow.indices) {
24
+ hannWindow[i] = normalizationFactor * 0.5f * (1 - cos(2.0 * PI * i / (n - 1))).toFloat()
25
+ }
26
+ }
27
+
28
+ fun processSegment(segment: FloatArray): FloatArray {
29
+ // Pad or truncate input to match FFT length
30
+ val paddedSegment = if (segment.size < n) {
31
+ segment + FloatArray(n - segment.size)
32
+ } else {
33
+ segment.copyOf(n)
34
+ }
35
+
36
+ // Apply normalized Hann window
37
+ for (i in paddedSegment.indices) {
38
+ paddedSegment[i] *= hannWindow[i]
39
+ }
40
+
41
+ // Perform FFT
42
+ realForward(paddedSegment)
43
+
44
+ return paddedSegment
16
45
  }
17
46
 
18
47
  fun realForward(data: FloatArray) {
@@ -41,4 +70,30 @@ class FFT(private val n: Int) {
41
70
  data[i + n / 2] = even[i] - t
42
71
  }
43
72
  }
73
+
74
+ fun realInverse(powerSpectrum: FloatArray, output: FloatArray) {
75
+ // Copy power spectrum to complex format for inverse FFT
76
+ val complexData = FloatArray(n * 2)
77
+ for (i in 0 until n/2 + 1) {
78
+ complexData[2 * i] = powerSpectrum[i]
79
+ if (2 * i + 1 < complexData.size) {
80
+ complexData[2 * i + 1] = 0f
81
+ }
82
+ }
83
+
84
+ // Conjugate for inverse FFT
85
+ for (i in 0 until n) {
86
+ if (2 * i + 1 < complexData.size) {
87
+ complexData[2 * i + 1] = -complexData[2 * i + 1]
88
+ }
89
+ }
90
+
91
+ // Perform forward FFT (which is inverse when input is conjugated)
92
+ realForward(complexData)
93
+
94
+ // Copy real part to output and conjugate again
95
+ for (i in 0 until n) {
96
+ output[i] = complexData[2 * i] / n
97
+ }
98
+ }
44
99
  }
@@ -14,12 +14,17 @@ data class Features(
14
14
  val spectralFlatness: Float = 0f,
15
15
  val spectralRollOff: Float = 0f,
16
16
  val spectralBandwidth: Float = 0f,
17
- val chromagram: List<Float> = emptyList(),
18
17
  val tempo: Float = 0f,
19
- val hnr: Float = 0f
18
+ val hnr: Float = 0f,
19
+ val melSpectrogram: List<Float> = emptyList(),
20
+ val chromagram: List<Float> = emptyList(),
21
+ val spectralContrast: List<Float> = emptyList(),
22
+ val tonnetz: List<Float> = emptyList(),
23
+ val pitch: Float = 0f,
24
+ val crc32: Long? = null
20
25
  ) {
21
26
  fun toDictionary(): Map<String, Any> {
22
- return mapOf(
27
+ val baseMap = mapOf(
23
28
  "energy" to energy,
24
29
  "mfcc" to mfcc,
25
30
  "rms" to rms,
@@ -30,10 +35,16 @@ data class Features(
30
35
  "spectralFlatness" to spectralFlatness,
31
36
  "spectralRollOff" to spectralRollOff,
32
37
  "spectralBandwidth" to spectralBandwidth,
33
- "chromagram" to chromagram,
34
38
  "tempo" to tempo,
35
- "hnr" to hnr
39
+ "hnr" to hnr,
40
+ "melSpectrogram" to melSpectrogram,
41
+ "chromagram" to chromagram,
42
+ "spectralContrast" to spectralContrast,
43
+ "tonnetz" to tonnetz,
44
+ "pitch" to pitch,
45
+ "crc32" to (crc32 ?: 0)
36
46
  )
47
+ return baseMap.filterValues { it != null }
37
48
  }
38
49
 
39
50
  fun toBundle(): Bundle {
@@ -48,9 +59,40 @@ data class Features(
48
59
  "spectralFlatness" to spectralFlatness,
49
60
  "spectralRollOff" to spectralRollOff,
50
61
  "spectralBandwidth" to spectralBandwidth,
51
- "chromagram" to chromagram,
52
62
  "tempo" to tempo,
53
- "hnr" to hnr
63
+ "hnr" to hnr,
64
+ "melSpectrogram" to melSpectrogram,
65
+ "chromagram" to chromagram,
66
+ "spectralContrast" to spectralContrast,
67
+ "tonnetz" to tonnetz,
68
+ "pitch" to pitch,
69
+ "crc32" to (crc32 ?: 0)
54
70
  )
55
71
  }
72
+
73
+ companion object {
74
+ fun parseFeatureOptions(options: Map<*, *>?): Map<String, Boolean> {
75
+ return options?.let { map ->
76
+ mapOf(
77
+ "energy" to (map["energy"] as? Boolean ?: false),
78
+ "mfcc" to (map["mfcc"] as? Boolean ?: false),
79
+ "rms" to (map["rms"] as? Boolean ?: false),
80
+ "zcr" to (map["zcr"] as? Boolean ?: false),
81
+ "dB" to (map["dB"] as? Boolean ?: false),
82
+ "spectralCentroid" to (map["spectralCentroid"] as? Boolean ?: false),
83
+ "spectralFlatness" to (map["spectralFlatness"] as? Boolean ?: false),
84
+ "spectralRollOff" to (map["spectralRollOff"] as? Boolean ?: false),
85
+ "spectralBandwidth" to (map["spectralBandwidth"] as? Boolean ?: false),
86
+ "chromagram" to (map["chromagram"] as? Boolean ?: false),
87
+ "tempo" to (map["tempo"] as? Boolean ?: false),
88
+ "hnr" to (map["hnr"] as? Boolean ?: false),
89
+ "melSpectrogram" to (map["melSpectrogram"] as? Boolean ?: false),
90
+ "spectralContrast" to (map["spectralContrast"] as? Boolean ?: false),
91
+ "tonnetz" to (map["tonnetz"] as? Boolean ?: false),
92
+ "pitch" to (map["pitch"] as? Boolean ?: false),
93
+ "crc32" to (map["crc32"] as? Boolean ?: false)
94
+ )
95
+ } ?: emptyMap()
96
+ }
97
+ }
56
98
  }
@@ -10,9 +10,9 @@ data class RecordingConfig(
10
10
  val encoding: String = "pcm_16bit",
11
11
  val keepAwake: Boolean = true,
12
12
  val interval: Long = Constants.DEFAULT_INTERVAL,
13
+ val intervalAnalysis: Long = Constants.DEFAULT_INTERVAL_ANALYSIS,
13
14
  val enableProcessing: Boolean = false,
14
- val pointsPerSecond: Double = 20.0,
15
- val algorithm: String = "rms",
15
+ val segmentDurationMs: Int = 100,
16
16
  val showNotification: Boolean = false,
17
17
  val showWaveformInNotification: Boolean = false,
18
18
  val notification: NotificationConfig = NotificationConfig(),
@@ -89,9 +89,9 @@ data class RecordingConfig(
89
89
  encoding = options.getStringOrDefault("encoding", "pcm_16bit"),
90
90
  keepAwake = options.getBooleanOrDefault("keepAwake", true),
91
91
  interval = options.getNumberOrDefault("interval", Constants.DEFAULT_INTERVAL),
92
+ intervalAnalysis = options.getNumberOrDefault("intervalAnalysis", Constants.DEFAULT_INTERVAL_ANALYSIS),
92
93
  enableProcessing = options.getBooleanOrDefault("enableProcessing", false),
93
- pointsPerSecond = options.getNumberOrDefault("pointsPerSecond", 20.0),
94
- algorithm = options.getStringOrDefault("algorithm", "rms"),
94
+ segmentDurationMs = options.getNumberOrDefault("segmentDurationMs", 100),
95
95
  showNotification = options.getBooleanOrDefault("showNotification", false),
96
96
  showWaveformInNotification = options.getBooleanOrDefault("showWaveformInNotification", false),
97
97
  notification = notificationConfig,