@siteed/audio-studio 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +41 -35
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +500 -479
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +10 -7
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -1
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.web.js +78 -97
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.web.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +15 -12
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractAudioData.js +144 -2
- package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -1
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.web.js +9 -56
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.web.js.map +1 -1
- package/build/cjs/AudioAnalysis/wasmConfig.js +4 -4
- package/build/cjs/AudioAnalysis/wasmConfig.js.map +1 -1
- package/build/cjs/AudioAnalysis/wasmLoader.web.js +79 -0
- package/build/cjs/AudioAnalysis/wasmLoader.web.js.map +1 -0
- package/build/cjs/AudioStudioModule.js +4 -599
- package/build/cjs/AudioStudioModule.js.map +1 -1
- package/build/cjs/trimAudio.js +227 -0
- package/build/cjs/trimAudio.js.map +1 -1
- package/build/cjs/utils/encodeCompressedAudio.web.js +65 -0
- package/build/cjs/utils/encodeCompressedAudio.web.js.map +1 -0
- package/build/cjs/utils/resampleAudioBuffer.web.js +25 -0
- package/build/cjs/utils/resampleAudioBuffer.web.js.map +1 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js +8 -5
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -1
- package/build/esm/AudioAnalysis/audioFeaturesWasm.web.js +76 -62
- package/build/esm/AudioAnalysis/audioFeaturesWasm.web.js.map +1 -1
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js +15 -12
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/esm/AudioAnalysis/extractAudioData.js +144 -2
- package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -1
- package/build/esm/AudioAnalysis/melSpectrogramWasm.web.js +9 -23
- package/build/esm/AudioAnalysis/melSpectrogramWasm.web.js.map +1 -1
- package/build/esm/AudioAnalysis/wasmConfig.js +4 -4
- package/build/esm/AudioAnalysis/wasmConfig.js.map +1 -1
- package/build/esm/AudioAnalysis/wasmLoader.web.js +43 -0
- package/build/esm/AudioAnalysis/wasmLoader.web.js.map +1 -0
- package/build/esm/AudioStudioModule.js +4 -596
- package/build/esm/AudioStudioModule.js.map +1 -1
- package/build/esm/trimAudio.js +227 -0
- package/build/esm/trimAudio.js.map +1 -1
- package/build/esm/utils/encodeCompressedAudio.web.js +62 -0
- package/build/esm/utils/encodeCompressedAudio.web.js.map +1 -0
- package/build/esm/utils/resampleAudioBuffer.web.js +22 -0
- package/build/esm/utils/resampleAudioBuffer.web.js.map +1 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +11 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +5 -9
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -1
- package/build/types/AudioAnalysis/audioFeaturesWasm.web.d.ts +35 -16
- package/build/types/AudioAnalysis/audioFeaturesWasm.web.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractAudioData.d.ts +2 -2
- package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -1
- package/build/types/AudioAnalysis/melSpectrogramWasm.web.d.ts.map +1 -1
- package/build/types/AudioAnalysis/wasmLoader.web.d.ts +3 -0
- package/build/types/AudioAnalysis/wasmLoader.web.d.ts.map +1 -0
- package/build/types/AudioStudioModule.d.ts.map +1 -1
- package/build/types/trimAudio.d.ts.map +1 -1
- package/build/types/utils/encodeCompressedAudio.web.d.ts +10 -0
- package/build/types/utils/encodeCompressedAudio.web.d.ts.map +1 -0
- package/build/types/utils/resampleAudioBuffer.web.d.ts +2 -0
- package/build/types/utils/resampleAudioBuffer.web.d.ts.map +1 -0
- package/ios/AudioStreamManager.swift +135 -89
- package/ios/AudioStudioModule.swift +239 -216
- package/package.json +1 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +12 -0
- package/src/AudioAnalysis/audioFeaturesWasm.ts +17 -22
- package/src/AudioAnalysis/audioFeaturesWasm.web.ts +102 -94
- package/src/AudioAnalysis/extractAudioAnalysis.ts +23 -20
- package/src/AudioAnalysis/extractAudioData.ts +186 -4
- package/src/AudioAnalysis/melSpectrogramWasm.web.ts +10 -27
- package/src/AudioAnalysis/wasmConfig.ts +4 -4
- package/src/AudioAnalysis/wasmLoader.web.ts +53 -0
- package/src/AudioStudioModule.ts +6 -854
- package/src/trimAudio.ts +351 -0
- package/src/utils/encodeCompressedAudio.web.ts +78 -0
- package/src/utils/resampleAudioBuffer.web.ts +39 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js +0 -18
- package/build/cjs/AudioAnalysis/extractWaveform.js.map +0 -1
- package/build/esm/AudioAnalysis/extractWaveform.js +0 -11
- package/build/esm/AudioAnalysis/extractWaveform.js.map +0 -1
- package/build/types/AudioAnalysis/extractWaveform.d.ts +0 -8
- package/build/types/AudioAnalysis/extractWaveform.d.ts.map +0 -1
- package/src/AudioAnalysis/extractWaveform.ts +0 -22
|
@@ -16,6 +16,8 @@ import expo.modules.interfaces.permissions.Permissions
|
|
|
16
16
|
import java.util.zip.CRC32
|
|
17
17
|
import kotlinx.coroutines.CoroutineScope
|
|
18
18
|
import kotlinx.coroutines.Dispatchers
|
|
19
|
+
import kotlinx.coroutines.SupervisorJob
|
|
20
|
+
import kotlinx.coroutines.cancelChildren
|
|
19
21
|
import kotlinx.coroutines.launch
|
|
20
22
|
import kotlinx.coroutines.withContext
|
|
21
23
|
|
|
@@ -31,7 +33,7 @@ class AudioStudioModule : Module(), EventSender {
|
|
|
31
33
|
private var enableNotificationHandling: Boolean = false // Default to false until we check manifest
|
|
32
34
|
private var enableBackgroundAudio: Boolean = false // Default to false until we check manifest
|
|
33
35
|
private var enableDeviceDetection: Boolean = false // Default to false until we check manifest
|
|
34
|
-
private val coroutineScope = CoroutineScope(Dispatchers.Main)
|
|
36
|
+
private val coroutineScope = CoroutineScope(Dispatchers.Main + SupervisorJob())
|
|
35
37
|
|
|
36
38
|
private val audioFileHandler by lazy {
|
|
37
39
|
AudioFileHandler(appContext.reactContext?.filesDir ?: throw IllegalStateException("React context not available"))
|
|
@@ -183,28 +185,27 @@ class AudioStudioModule : Module(), EventSender {
|
|
|
183
185
|
|
|
184
186
|
|
|
185
187
|
AsyncFunction("prepareRecording") { options: Map<String, Any?>, promise: Promise ->
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
promise.resolve(true)
|
|
188
|
+
// Heavy native init (AudioRecord probe, MediaRecorder.prepare, file I/O)
|
|
189
|
+
// must run off the main thread to keep the JS/UI thread responsive.
|
|
190
|
+
// Module-scoped Job ensures cancellation on module destroy.
|
|
191
|
+
coroutineScope.launch(Dispatchers.IO) {
|
|
192
|
+
try {
|
|
193
|
+
val opts = if (options["showNotification"] as? Boolean == true && !enableNotificationHandling) {
|
|
194
|
+
LogUtils.d(CLASS_NAME, "Notification permission not in manifest, disabling showNotification")
|
|
195
|
+
options.toMutableMap().apply { this["showNotification"] = false }
|
|
195
196
|
} else {
|
|
196
|
-
|
|
197
|
+
options
|
|
197
198
|
}
|
|
198
|
-
|
|
199
|
-
if (audioRecorderManager.prepareRecording(
|
|
199
|
+
|
|
200
|
+
if (audioRecorderManager.prepareRecording(opts)) {
|
|
200
201
|
promise.resolve(true)
|
|
201
202
|
} else {
|
|
202
203
|
promise.reject("PREPARE_ERROR", "Failed to prepare recording", null)
|
|
203
204
|
}
|
|
205
|
+
} catch (e: Exception) {
|
|
206
|
+
LogUtils.e(CLASS_NAME, "Error preparing recording", e)
|
|
207
|
+
promise.reject("PREPARE_ERROR", "Failed to prepare recording: ${e.message}", e)
|
|
204
208
|
}
|
|
205
|
-
} catch (e: Exception) {
|
|
206
|
-
LogUtils.e(CLASS_NAME, "Error preparing recording", e)
|
|
207
|
-
promise.reject("PREPARE_ERROR", "Failed to prepare recording: ${e.message}", e)
|
|
208
209
|
}
|
|
209
210
|
}
|
|
210
211
|
|
|
@@ -368,281 +369,294 @@ class AudioStudioModule : Module(), EventSender {
|
|
|
368
369
|
}
|
|
369
370
|
|
|
370
371
|
AsyncFunction("trimAudio") { options: Map<String, Any>, promise: Promise ->
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
372
|
+
// Trim does heavy decode/encode + file I/O — must run off the
|
|
373
|
+
// shared module executor so other JS calls don't queue behind it.
|
|
374
|
+
coroutineScope.launch(Dispatchers.IO) {
|
|
375
|
+
try {
|
|
376
|
+
val fileUri = options["fileUri"] as? String ?: run {
|
|
377
|
+
promise.reject("INVALID_URI", "fileUri is required", null)
|
|
378
|
+
return@launch
|
|
379
|
+
}
|
|
376
380
|
|
|
377
|
-
|
|
378
|
-
|
|
381
|
+
LogUtils.d(CLASS_NAME, "trimAudio called with fileUri: $fileUri")
|
|
382
|
+
LogUtils.d(CLASS_NAME, "Full options: $options")
|
|
379
383
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
// Validate output format if provided
|
|
393
|
-
if (outputFormatMap != null) {
|
|
394
|
-
val format = outputFormatMap["format"] as? String
|
|
395
|
-
if (format != null && format != "wav" && format != "aac" && format != "opus") {
|
|
396
|
-
LogUtils.w(CLASS_NAME, "Requested format '$format' is not fully supported. Using 'aac' instead.")
|
|
397
|
-
// Create a new map with the corrected format
|
|
398
|
-
val newOutputFormat = HashMap<String, Any>(outputFormatMap)
|
|
399
|
-
newOutputFormat["format"] = "aac"
|
|
400
|
-
outputFormatMap = newOutputFormat
|
|
384
|
+
val mode = options["mode"] as? String ?: "single"
|
|
385
|
+
val startTimeMs = (options["startTimeMs"] as? Number)?.toLong()
|
|
386
|
+
val endTimeMs = (options["endTimeMs"] as? Number)?.toLong()
|
|
387
|
+
|
|
388
|
+
@Suppress("UNCHECKED_CAST")
|
|
389
|
+
val rawRanges = options["ranges"] as? List<Map<String, Any>>
|
|
390
|
+
val ranges = rawRanges?.map { range ->
|
|
391
|
+
mapOf(
|
|
392
|
+
"startTimeMs" to ((range["startTimeMs"] as? Number)?.toLong() ?: 0L),
|
|
393
|
+
"endTimeMs" to ((range["endTimeMs"] as? Number)?.toLong() ?: 0L)
|
|
394
|
+
)
|
|
401
395
|
}
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
"
|
|
412
|
-
|
|
413
|
-
|
|
396
|
+
|
|
397
|
+
val outputFileName = options["outputFileName"] as? String
|
|
398
|
+
|
|
399
|
+
@Suppress("UNCHECKED_CAST")
|
|
400
|
+
var outputFormatMap = options["outputFormat"] as? Map<String, Any>
|
|
401
|
+
|
|
402
|
+
if (outputFormatMap != null) {
|
|
403
|
+
val format = outputFormatMap["format"] as? String
|
|
404
|
+
if (format != null && format != "wav" && format != "aac" && format != "opus") {
|
|
405
|
+
LogUtils.w(CLASS_NAME, "Requested format '$format' is not fully supported. Using 'aac' instead.")
|
|
406
|
+
val newOutputFormat = HashMap<String, Any>(outputFormatMap)
|
|
407
|
+
newOutputFormat["format"] = "aac"
|
|
408
|
+
outputFormatMap = newOutputFormat
|
|
409
|
+
}
|
|
414
410
|
}
|
|
415
|
-
}
|
|
416
411
|
|
|
417
|
-
|
|
418
|
-
val startTime = System.currentTimeMillis()
|
|
419
|
-
|
|
420
|
-
// Perform the trim operation
|
|
421
|
-
val result = audioTrimmer.trimAudio(
|
|
422
|
-
fileUri = fileUri,
|
|
423
|
-
mode = mode,
|
|
424
|
-
startTimeMs = startTimeMs,
|
|
425
|
-
endTimeMs = endTimeMs,
|
|
426
|
-
ranges = ranges,
|
|
427
|
-
outputFileName = outputFileName,
|
|
428
|
-
outputFormat = outputFormatMap,
|
|
429
|
-
progressListener = progressListener
|
|
430
|
-
)
|
|
412
|
+
LogUtils.d(CLASS_NAME, "Output format options: $outputFormatMap")
|
|
431
413
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
414
|
+
val progressListener = object : AudioTrimmer.ProgressListener {
|
|
415
|
+
override fun onProgress(progress: Float, bytesProcessed: Long, totalBytes: Long) {
|
|
416
|
+
sendEvent(Constants.TRIM_PROGRESS_EVENT, mapOf(
|
|
417
|
+
"progress" to progress,
|
|
418
|
+
"bytesProcessed" to bytesProcessed,
|
|
419
|
+
"totalBytes" to totalBytes
|
|
420
|
+
))
|
|
421
|
+
}
|
|
422
|
+
}
|
|
440
423
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
424
|
+
val startTime = System.currentTimeMillis()
|
|
425
|
+
|
|
426
|
+
val result = audioTrimmer.trimAudio(
|
|
427
|
+
fileUri = fileUri,
|
|
428
|
+
mode = mode,
|
|
429
|
+
startTimeMs = startTimeMs,
|
|
430
|
+
endTimeMs = endTimeMs,
|
|
431
|
+
ranges = ranges,
|
|
432
|
+
outputFileName = outputFileName,
|
|
433
|
+
outputFormat = outputFormatMap,
|
|
434
|
+
progressListener = progressListener
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
val processingTimeMs = System.currentTimeMillis() - startTime
|
|
438
|
+
|
|
439
|
+
val resultWithProcessingTime = result.toMutableMap()
|
|
440
|
+
resultWithProcessingTime["processingInfo"] = mapOf(
|
|
441
|
+
"durationMs" to processingTimeMs
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
LogUtils.d(CLASS_NAME, "Trim operation completed successfully in ${processingTimeMs}ms: $result")
|
|
445
|
+
promise.resolve(resultWithProcessingTime)
|
|
446
|
+
} catch (e: Exception) {
|
|
447
|
+
LogUtils.e(CLASS_NAME, "Error trimming audio: ${e.message}", e)
|
|
448
|
+
promise.reject("TRIM_ERROR", "Error trimming audio: ${e.message}", e)
|
|
449
|
+
}
|
|
446
450
|
}
|
|
447
451
|
}
|
|
448
452
|
|
|
449
453
|
AsyncFunction("extractMelSpectrogram") { options: Map<String, Any>, promise: Promise ->
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
454
|
+
// Heavy DSP: file decode + STFT + mel projection. Off the shared
|
|
455
|
+
// module executor so other JS calls don't block.
|
|
456
|
+
coroutineScope.launch(Dispatchers.IO) {
|
|
457
|
+
try {
|
|
458
|
+
LogUtils.d(CLASS_NAME, "extractMelSpectrogram called with options: $options")
|
|
453
459
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
460
|
+
// Extract required parameters with detailed logging
|
|
461
|
+
val fileUri = options["fileUri"] as? String
|
|
462
|
+
LogUtils.d(CLASS_NAME, "fileUri: $fileUri")
|
|
463
|
+
if (fileUri == null) {
|
|
464
|
+
LogUtils.e(CLASS_NAME, "Missing required parameter: fileUri")
|
|
465
|
+
throw IllegalArgumentException("fileUri is required")
|
|
466
|
+
}
|
|
461
467
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
+
val windowSizeMs = options["windowSizeMs"] as? Double
|
|
469
|
+
LogUtils.d(CLASS_NAME, "windowSizeMs: $windowSizeMs")
|
|
470
|
+
if (windowSizeMs == null) {
|
|
471
|
+
LogUtils.e(CLASS_NAME, "Missing required parameter: windowSizeMs")
|
|
472
|
+
throw IllegalArgumentException("windowSizeMs is required")
|
|
473
|
+
}
|
|
468
474
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
+
val hopLengthMs = options["hopLengthMs"] as? Double
|
|
476
|
+
LogUtils.d(CLASS_NAME, "hopLengthMs: $hopLengthMs")
|
|
477
|
+
if (hopLengthMs == null) {
|
|
478
|
+
LogUtils.e(CLASS_NAME, "Missing required parameter: hopLengthMs")
|
|
479
|
+
throw IllegalArgumentException("hopLengthMs is required")
|
|
480
|
+
}
|
|
475
481
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
482
|
+
// Handle nMels which might come as Double from JavaScript
|
|
483
|
+
val nMelsValue = options["nMels"]
|
|
484
|
+
LogUtils.d(CLASS_NAME, "Raw nMels value: $nMelsValue (type: ${nMelsValue?.javaClass?.name})")
|
|
479
485
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
486
|
+
val nMels = when (nMelsValue) {
|
|
487
|
+
is Int -> nMelsValue
|
|
488
|
+
is Double -> nMelsValue.toInt()
|
|
489
|
+
is Number -> nMelsValue.toInt()
|
|
490
|
+
else -> {
|
|
491
|
+
LogUtils.e(CLASS_NAME, "Missing or invalid required parameter: nMels")
|
|
492
|
+
throw IllegalArgumentException("nMels is required and must be a number")
|
|
493
|
+
}
|
|
487
494
|
}
|
|
488
|
-
}
|
|
489
495
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
496
|
+
LogUtils.d(CLASS_NAME, "Converted nMels: $nMels (from ${nMelsValue?.javaClass?.name})")
|
|
497
|
+
|
|
498
|
+
// Extract optional parameters with defaults
|
|
499
|
+
val fMin = options["fMin"] as? Double ?: 0.0
|
|
500
|
+
val fMax = options["fMax"] as? Double
|
|
501
|
+
val windowType = options["windowType"] as? String ?: "hann"
|
|
502
|
+
val normalize = options["normalize"] as? Boolean ?: false
|
|
503
|
+
val logScale = options["logScale"] as? Boolean ?: true
|
|
498
504
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
505
|
+
// Fix the conversion from Number to Long to preserve decimal values
|
|
506
|
+
val startTimeMsNumber = options["startTimeMs"] as? Number
|
|
507
|
+
val endTimeMsNumber = options["endTimeMs"] as? Number
|
|
508
|
+
val startTimeMs = startTimeMsNumber?.toLong() ?: startTimeMsNumber?.toDouble()?.toLong()
|
|
509
|
+
val endTimeMs = endTimeMsNumber?.toLong() ?: endTimeMsNumber?.toDouble()?.toLong()
|
|
510
|
+
|
|
511
|
+
LogUtils.d(CLASS_NAME, """
|
|
512
|
+
Optional parameters:
|
|
513
|
+
- fMin: $fMin
|
|
514
|
+
- fMax: $fMax
|
|
515
|
+
- windowType: $windowType
|
|
516
|
+
- normalize: $normalize
|
|
517
|
+
- logScale: $logScale
|
|
518
|
+
- startTimeMs: $startTimeMs (original: $startTimeMsNumber)
|
|
519
|
+
- endTimeMs: $endTimeMs (original: $endTimeMsNumber)
|
|
520
|
+
""".trimIndent())
|
|
521
|
+
|
|
522
|
+
// Handle decoding options
|
|
523
|
+
val decodingOptions = options["decodingOptions"] as? Map<String, Any>
|
|
524
|
+
LogUtils.d(CLASS_NAME, "Decoding options: $decodingOptions")
|
|
519
525
|
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
526
|
+
val config = decodingOptions?.let {
|
|
527
|
+
val targetSampleRateValue = it["targetSampleRate"]
|
|
528
|
+
val targetSampleRate = when (targetSampleRateValue) {
|
|
529
|
+
is Int -> targetSampleRateValue
|
|
530
|
+
is Double -> targetSampleRateValue.toInt()
|
|
531
|
+
is Number -> targetSampleRateValue.toInt()
|
|
532
|
+
else -> null
|
|
533
|
+
}
|
|
528
534
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
535
|
+
val targetChannelsValue = it["targetChannels"]
|
|
536
|
+
val targetChannels = when (targetChannelsValue) {
|
|
537
|
+
is Int -> targetChannelsValue
|
|
538
|
+
is Double -> targetChannelsValue.toInt()
|
|
539
|
+
is Number -> targetChannelsValue.toInt()
|
|
540
|
+
else -> 1
|
|
541
|
+
}
|
|
536
542
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
543
|
+
val targetBitDepthValue = it["targetBitDepth"]
|
|
544
|
+
val targetBitDepth = when (targetBitDepthValue) {
|
|
545
|
+
is Int -> targetBitDepthValue
|
|
546
|
+
is Double -> targetBitDepthValue.toInt()
|
|
547
|
+
is Number -> targetBitDepthValue.toInt()
|
|
548
|
+
else -> 16
|
|
549
|
+
}
|
|
544
550
|
|
|
545
|
-
|
|
551
|
+
val normalizeAudio = it["normalizeAudio"] as? Boolean ?: false
|
|
546
552
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
553
|
+
DecodingConfig(
|
|
554
|
+
targetSampleRate = targetSampleRate,
|
|
555
|
+
targetChannels = targetChannels,
|
|
556
|
+
targetBitDepth = targetBitDepth,
|
|
557
|
+
normalizeAudio = normalizeAudio
|
|
558
|
+
).also { config ->
|
|
559
|
+
LogUtils.d(CLASS_NAME, """
|
|
560
|
+
Using decoding config:
|
|
561
|
+
- targetSampleRate: ${config.targetSampleRate ?: "original"}
|
|
562
|
+
- targetChannels: ${config.targetChannels ?: "original"}
|
|
563
|
+
- targetBitDepth: ${config.targetBitDepth}
|
|
564
|
+
- normalizeAudio: ${config.normalizeAudio}
|
|
565
|
+
""".trimIndent())
|
|
566
|
+
}
|
|
567
|
+
} ?: DecodingConfig(targetSampleRate = null, targetChannels = 1, targetBitDepth = 16).also {
|
|
568
|
+
LogUtils.d(CLASS_NAME, "Using default decoding config")
|
|
560
569
|
}
|
|
561
|
-
} ?: DecodingConfig(targetSampleRate = null, targetChannels = 1, targetBitDepth = 16).also {
|
|
562
|
-
LogUtils.d(CLASS_NAME, "Using default decoding config")
|
|
563
|
-
}
|
|
564
570
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
571
|
+
// Check if the audio data is too short
|
|
572
|
+
if (startTimeMs != null && endTimeMs != null) {
|
|
573
|
+
val durationMs = endTimeMs - startTimeMs
|
|
574
|
+
LogUtils.d(CLASS_NAME, "Audio duration for spectrogram: $durationMs ms")
|
|
575
|
+
if (durationMs < 25) { // 25ms is minimum for a single window
|
|
576
|
+
LogUtils.w(CLASS_NAME, "Audio duration is too short for spectrogram analysis: $durationMs ms")
|
|
577
|
+
throw IllegalArgumentException("Audio duration must be at least 25ms for spectrogram analysis")
|
|
578
|
+
}
|
|
572
579
|
}
|
|
573
|
-
}
|
|
574
580
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
581
|
+
// Load audio data with optional time range
|
|
582
|
+
LogUtils.d(CLASS_NAME, "Loading audio data...")
|
|
583
|
+
val audioData = when {
|
|
584
|
+
startTimeMs != null && endTimeMs != null -> {
|
|
585
|
+
LogUtils.d(CLASS_NAME, "Loading audio range: $startTimeMs to $endTimeMs ms")
|
|
586
|
+
audioProcessor.loadAudioRange(fileUri, startTimeMs, endTimeMs, config)
|
|
587
|
+
}
|
|
588
|
+
else -> {
|
|
589
|
+
LogUtils.d(CLASS_NAME, "Loading entire audio file")
|
|
590
|
+
audioProcessor.loadAudioFromAnyFormat(fileUri, config)
|
|
591
|
+
}
|
|
585
592
|
}
|
|
586
|
-
}
|
|
587
593
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
594
|
+
if (audioData == null) {
|
|
595
|
+
LogUtils.e(CLASS_NAME, "Failed to load audio data")
|
|
596
|
+
throw IllegalStateException("Failed to load audio data")
|
|
597
|
+
}
|
|
592
598
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
// Validate that we have enough audio data for processing
|
|
603
|
-
if (audioData.data.size == 0 || audioData.durationMs < windowSizeMs) {
|
|
604
|
-
LogUtils.e(CLASS_NAME, "Audio data is too short for spectrogram analysis: ${audioData.durationMs}ms, data size: ${audioData.data.size} bytes")
|
|
605
|
-
throw IllegalArgumentException(
|
|
606
|
-
"Audio data is too short for spectrogram analysis. " +
|
|
607
|
-
"Duration: ${audioData.durationMs}ms, minimum required: ${windowSizeMs}ms"
|
|
608
|
-
)
|
|
609
|
-
}
|
|
599
|
+
LogUtils.d(CLASS_NAME, """
|
|
600
|
+
Audio data loaded successfully:
|
|
601
|
+
- data size: ${audioData.data.size} bytes
|
|
602
|
+
- sampleRate: ${audioData.sampleRate}
|
|
603
|
+
- channels: ${audioData.channels}
|
|
604
|
+
- bitDepth: ${audioData.bitDepth}
|
|
605
|
+
- durationMs: ${audioData.durationMs}
|
|
606
|
+
""".trimIndent())
|
|
610
607
|
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
608
|
+
// Validate that we have enough audio data for processing
|
|
609
|
+
if (audioData.data.size == 0 || audioData.durationMs < windowSizeMs) {
|
|
610
|
+
LogUtils.e(CLASS_NAME, "Audio data is too short for spectrogram analysis: ${audioData.durationMs}ms, data size: ${audioData.data.size} bytes")
|
|
611
|
+
throw IllegalArgumentException(
|
|
612
|
+
"Audio data is too short for spectrogram analysis. " +
|
|
613
|
+
"Duration: ${audioData.durationMs}ms, minimum required: ${windowSizeMs}ms"
|
|
614
|
+
)
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Compute mel-spectrogram
|
|
618
|
+
LogUtils.d(CLASS_NAME, "Computing mel-spectrogram...")
|
|
619
|
+
val spectrogramData = audioProcessor.extractMelSpectrogram(
|
|
620
|
+
audioData = audioData,
|
|
621
|
+
windowSizeMs = windowSizeMs.toFloat(),
|
|
622
|
+
hopLengthMs = hopLengthMs.toFloat(),
|
|
623
|
+
nMels = nMels,
|
|
624
|
+
fMin = fMin.toFloat(),
|
|
625
|
+
fMax = fMax?.toFloat() ?: (audioData.sampleRate.toFloat() / 2),
|
|
626
|
+
normalize = normalize,
|
|
627
|
+
logScaling = logScale,
|
|
628
|
+
windowType = windowType
|
|
629
|
+
)
|
|
624
630
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
631
|
+
LogUtils.d(CLASS_NAME, "Mel-spectrogram computed successfully with ${spectrogramData.spectrogram.size} time steps")
|
|
632
|
+
|
|
633
|
+
// Convert to map for React Native
|
|
634
|
+
val result = mapOf(
|
|
635
|
+
"spectrogram" to spectrogramData.spectrogram.map { it.toList() },
|
|
636
|
+
"sampleRate" to audioData.sampleRate,
|
|
637
|
+
"nMels" to nMels,
|
|
638
|
+
"timeSteps" to spectrogramData.spectrogram.size,
|
|
639
|
+
"durationMs" to audioData.durationMs
|
|
640
|
+
)
|
|
635
641
|
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
+
LogUtils.d(CLASS_NAME, "Returning result with ${result["timeSteps"]} time steps and $nMels mel bands")
|
|
643
|
+
promise.resolve(result)
|
|
644
|
+
} catch (e: Exception) {
|
|
645
|
+
LogUtils.e(CLASS_NAME, "Failed to extract mel-spectrogram: ${e.message}")
|
|
646
|
+
LogUtils.e(CLASS_NAME, "Stack trace: ${e.stackTraceToString()}")
|
|
647
|
+
promise.reject("SPECTROGRAM_ERROR", e.message ?: "Unknown error", e)
|
|
648
|
+
}
|
|
642
649
|
}
|
|
643
650
|
}
|
|
644
651
|
|
|
645
652
|
OnDestroy {
|
|
653
|
+
// Cancel in-flight prepare/trim/extract coroutines so promises
|
|
654
|
+
// and event sends do not outlive the React context. Use
|
|
655
|
+
// cancelChildren rather than cancel() so the scope itself stays
|
|
656
|
+
// usable: Expo can re-invoke definition() on dev-client reloads
|
|
657
|
+
// while keeping the same module instance, and a fully cancelled
|
|
658
|
+
// scope would silently no-op every subsequent launch.
|
|
659
|
+
coroutineScope.coroutineContext.cancelChildren()
|
|
646
660
|
AudioRecorderManager.destroy()
|
|
647
661
|
}
|
|
648
662
|
|
|
@@ -663,271 +677,278 @@ class AudioStudioModule : Module(), EventSender {
|
|
|
663
677
|
|
|
664
678
|
|
|
665
679
|
AsyncFunction("extractAudioAnalysis") { options: Map<String, Any>, promise: Promise ->
|
|
666
|
-
|
|
667
|
-
|
|
680
|
+
// Off the shared executor so other JS calls don't block during
|
|
681
|
+
// multi-second analysis on large files.
|
|
682
|
+
coroutineScope.launch(Dispatchers.IO) {
|
|
683
|
+
try {
|
|
684
|
+
val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
|
|
685
|
+
|
|
686
|
+
// Get time or byte range options
|
|
687
|
+
val startTimeMs = options["startTimeMs"] as? Number
|
|
688
|
+
val endTimeMs = options["endTimeMs"] as? Number
|
|
689
|
+
val position = options["position"] as? Number
|
|
690
|
+
val length = options["length"] as? Number
|
|
691
|
+
val segmentDurationMs = (options["segmentDurationMs"] as? Number)?.toInt() ?: 100
|
|
668
692
|
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
val position = options["position"] as? Number
|
|
673
|
-
val length = options["length"] as? Number
|
|
674
|
-
val segmentDurationMs = (options["segmentDurationMs"] as? Number)?.toInt() ?: 100
|
|
693
|
+
// Validate ranges - can have time range OR byte range OR no range
|
|
694
|
+
val hasTimeRange = startTimeMs != null && endTimeMs != null
|
|
695
|
+
val hasByteRange = position != null && length != null
|
|
675
696
|
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
697
|
+
// Only throw if both ranges are provided
|
|
698
|
+
if (hasTimeRange && hasByteRange) {
|
|
699
|
+
throw IllegalArgumentException("Cannot specify both time range and byte range")
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Get decoding options with default configuration
|
|
703
|
+
val defaultConfig = DecodingConfig(
|
|
704
|
+
targetSampleRate = null,
|
|
705
|
+
targetChannels = 1, // Default to mono
|
|
706
|
+
targetBitDepth = 16,
|
|
707
|
+
normalizeAudio = false
|
|
708
|
+
)
|
|
679
709
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
710
|
+
val config = (options["decodingOptions"] as? Map<String, Any>)?.let { decodingOptionsMap ->
|
|
711
|
+
DecodingConfig(
|
|
712
|
+
targetSampleRate = decodingOptionsMap["targetSampleRate"] as? Int,
|
|
713
|
+
targetChannels = decodingOptionsMap["targetChannels"] as? Int,
|
|
714
|
+
targetBitDepth = (decodingOptionsMap["targetBitDepth"] as? Int) ?: 16,
|
|
715
|
+
normalizeAudio = (decodingOptionsMap["normalizeAudio"] as? Boolean) ?: false
|
|
716
|
+
)
|
|
717
|
+
} ?: defaultConfig
|
|
718
|
+
|
|
719
|
+
// Load audio data based on range type (or full file if no range specified)
|
|
720
|
+
val audioData = when {
|
|
721
|
+
hasByteRange -> {
|
|
722
|
+
val format = audioProcessor.getAudioFormat(fileUri)
|
|
723
|
+
?: throw IllegalArgumentException("Could not determine audio format")
|
|
724
|
+
|
|
725
|
+
// Calculate time range from byte position
|
|
726
|
+
val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
|
|
727
|
+
val effectiveStartTimeMs = (position!!.toLong() * 1000) / bytesPerSecond
|
|
728
|
+
val effectiveEndTimeMs = effectiveStartTimeMs + (length!!.toLong() * 1000) / bytesPerSecond
|
|
729
|
+
|
|
730
|
+
LogUtils.d(CLASS_NAME, "Loading audio with byte range: position=$position, length=$length")
|
|
731
|
+
|
|
732
|
+
audioProcessor.loadAudioRange(
|
|
733
|
+
fileUri = fileUri,
|
|
734
|
+
startTimeMs = effectiveStartTimeMs,
|
|
735
|
+
endTimeMs = effectiveEndTimeMs,
|
|
736
|
+
config = config
|
|
737
|
+
)
|
|
738
|
+
}
|
|
739
|
+
hasTimeRange -> {
|
|
740
|
+
LogUtils.d(CLASS_NAME, "Loading audio with time range: startTimeMs=$startTimeMs, endTimeMs=$endTimeMs")
|
|
741
|
+
|
|
742
|
+
audioProcessor.loadAudioRange(
|
|
743
|
+
fileUri = fileUri,
|
|
744
|
+
startTimeMs = startTimeMs!!.toLong(),
|
|
745
|
+
endTimeMs = endTimeMs!!.toLong(),
|
|
746
|
+
config = config
|
|
747
|
+
)
|
|
748
|
+
}
|
|
749
|
+
else -> {
|
|
750
|
+
LogUtils.d(CLASS_NAME, "Loading entire audio file")
|
|
751
|
+
audioProcessor.loadAudioFromAnyFormat(fileUri, config)
|
|
752
|
+
}
|
|
753
|
+
} ?: throw IllegalStateException("Failed to load audio data")
|
|
754
|
+
|
|
755
|
+
val featuresMap = options["features"] as? Map<*, *>
|
|
756
|
+
val features = Features.parseFeatureOptions(featuresMap)
|
|
757
|
+
|
|
758
|
+
val recordingConfig = RecordingConfig(
|
|
759
|
+
sampleRate = audioData.sampleRate,
|
|
760
|
+
channels = audioData.channels,
|
|
761
|
+
encoding = when (audioData.bitDepth) {
|
|
762
|
+
8 -> "pcm_8bit"
|
|
763
|
+
16 -> "pcm_16bit"
|
|
764
|
+
32 -> "pcm_32bit"
|
|
765
|
+
else -> throw IllegalArgumentException("Unsupported bit depth: ${audioData.bitDepth}")
|
|
766
|
+
},
|
|
767
|
+
segmentDurationMs = segmentDurationMs,
|
|
768
|
+
features = features
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
LogUtils.d(CLASS_NAME, "extractAudioAnalysis: $recordingConfig")
|
|
772
|
+
audioProcessor.resetCumulativeAmplitudeRange()
|
|
773
|
+
|
|
774
|
+
val analysisData = audioProcessor.processAudioData(audioData.data, recordingConfig)
|
|
775
|
+
promise.resolve(analysisData.toDictionary())
|
|
776
|
+
} catch (e: Exception) {
|
|
777
|
+
LogUtils.e(CLASS_NAME, "Failed to extract audio analysis: ${e.message}", e)
|
|
778
|
+
promise.reject("PROCESSING_ERROR", e.message ?: "Unknown error", e)
|
|
683
779
|
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
684
782
|
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
783
|
+
AsyncFunction("extractAudioData") { options: Map<String, Any>, promise: Promise ->
|
|
784
|
+
// Off the shared executor so concurrent JS calls don't block.
|
|
785
|
+
coroutineScope.launch(Dispatchers.IO) {
|
|
786
|
+
try {
|
|
787
|
+
val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
|
|
788
|
+
val startTimeMs = options["startTimeMs"] as? Number
|
|
789
|
+
val endTimeMs = options["endTimeMs"] as? Number
|
|
790
|
+
val position = options["position"] as? Number
|
|
791
|
+
val length = options["length"] as? Number
|
|
692
792
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
793
|
+
// Validate that we have either time range or byte range, but not both and not neither
|
|
794
|
+
val hasTimeRange = startTimeMs != null && endTimeMs != null
|
|
795
|
+
val hasByteRange = position != null && length != null
|
|
796
|
+
|
|
797
|
+
if (!hasTimeRange && !hasByteRange) {
|
|
798
|
+
throw IllegalArgumentException("Must specify either time range (startTimeMs, endTimeMs) or byte range (position, length)")
|
|
799
|
+
}
|
|
800
|
+
if (hasTimeRange && hasByteRange) {
|
|
801
|
+
throw IllegalArgumentException("Cannot specify both time range and byte range")
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
// Get decoding options
|
|
805
|
+
val decodingOptionsMap = options["decodingOptions"] as? Map<String, Any>
|
|
806
|
+
val decodingConfig = if (decodingOptionsMap != null) {
|
|
807
|
+
DecodingConfig(
|
|
808
|
+
targetSampleRate = decodingOptionsMap["targetSampleRate"] as? Int,
|
|
809
|
+
targetChannels = decodingOptionsMap["targetChannels"] as? Int,
|
|
810
|
+
targetBitDepth = (decodingOptionsMap["targetBitDepth"] as? Int) ?: 16,
|
|
811
|
+
normalizeAudio = (decodingOptionsMap["normalizeAudio"] as? Boolean) ?: false
|
|
812
|
+
).also {
|
|
813
|
+
LogUtils.d(CLASS_NAME, """
|
|
814
|
+
Using decoding config:
|
|
815
|
+
- targetSampleRate: ${it.targetSampleRate ?: "original"}
|
|
816
|
+
- targetChannels: ${it.targetChannels ?: "original"}
|
|
817
|
+
- targetBitDepth: ${it.targetBitDepth}
|
|
818
|
+
- normalizeAudio: ${it.normalizeAudio}
|
|
819
|
+
""".trimIndent())
|
|
820
|
+
}
|
|
821
|
+
} else null
|
|
701
822
|
|
|
702
|
-
|
|
703
|
-
val audioData = when {
|
|
704
|
-
hasByteRange -> {
|
|
823
|
+
val audioData = if (hasByteRange) {
|
|
705
824
|
val format = audioProcessor.getAudioFormat(fileUri)
|
|
706
825
|
?: throw IllegalArgumentException("Could not determine audio format")
|
|
707
|
-
|
|
826
|
+
|
|
708
827
|
// Calculate time range from byte position
|
|
709
828
|
val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
|
|
710
829
|
val effectiveStartTimeMs = (position!!.toLong() * 1000) / bytesPerSecond
|
|
711
830
|
val effectiveEndTimeMs = effectiveStartTimeMs + (length!!.toLong() * 1000) / bytesPerSecond
|
|
712
|
-
|
|
713
|
-
LogUtils.d(CLASS_NAME, "
|
|
714
|
-
|
|
831
|
+
|
|
832
|
+
LogUtils.d(CLASS_NAME, """
|
|
833
|
+
Converting byte range to time range:
|
|
834
|
+
- position: $position bytes
|
|
835
|
+
- length: $length bytes
|
|
836
|
+
- bytesPerSecond: $bytesPerSecond
|
|
837
|
+
- effectiveStartTimeMs: $effectiveStartTimeMs
|
|
838
|
+
- effectiveEndTimeMs: $effectiveEndTimeMs
|
|
839
|
+
""".trimIndent())
|
|
840
|
+
|
|
715
841
|
audioProcessor.loadAudioRange(
|
|
716
842
|
fileUri = fileUri,
|
|
717
843
|
startTimeMs = effectiveStartTimeMs,
|
|
718
844
|
endTimeMs = effectiveEndTimeMs,
|
|
719
|
-
config =
|
|
845
|
+
config = decodingConfig
|
|
720
846
|
)
|
|
721
|
-
}
|
|
722
|
-
|
|
723
|
-
LogUtils.d(CLASS_NAME, "
|
|
724
|
-
|
|
847
|
+
} else {
|
|
848
|
+
// Must be time range due to earlier validation
|
|
849
|
+
LogUtils.d(CLASS_NAME, """
|
|
850
|
+
Using time range:
|
|
851
|
+
- startTimeMs: $startTimeMs
|
|
852
|
+
- endTimeMs: $endTimeMs
|
|
853
|
+
""".trimIndent())
|
|
854
|
+
|
|
725
855
|
audioProcessor.loadAudioRange(
|
|
726
856
|
fileUri = fileUri,
|
|
727
857
|
startTimeMs = startTimeMs!!.toLong(),
|
|
728
858
|
endTimeMs = endTimeMs!!.toLong(),
|
|
729
|
-
config =
|
|
859
|
+
config = decodingConfig
|
|
730
860
|
)
|
|
731
|
-
}
|
|
732
|
-
else -> {
|
|
733
|
-
LogUtils.d(CLASS_NAME, "Loading entire audio file")
|
|
734
|
-
audioProcessor.loadAudioFromAnyFormat(fileUri, config)
|
|
735
|
-
}
|
|
736
|
-
} ?: throw IllegalStateException("Failed to load audio data")
|
|
737
|
-
|
|
738
|
-
val featuresMap = options["features"] as? Map<*, *>
|
|
739
|
-
val features = Features.parseFeatureOptions(featuresMap)
|
|
740
|
-
|
|
741
|
-
val recordingConfig = RecordingConfig(
|
|
742
|
-
sampleRate = audioData.sampleRate,
|
|
743
|
-
channels = audioData.channels,
|
|
744
|
-
encoding = when (audioData.bitDepth) {
|
|
745
|
-
8 -> "pcm_8bit"
|
|
746
|
-
16 -> "pcm_16bit"
|
|
747
|
-
32 -> "pcm_32bit"
|
|
748
|
-
else -> throw IllegalArgumentException("Unsupported bit depth: ${audioData.bitDepth}")
|
|
749
|
-
},
|
|
750
|
-
segmentDurationMs = segmentDurationMs,
|
|
751
|
-
features = features
|
|
752
|
-
)
|
|
753
|
-
|
|
754
|
-
LogUtils.d(CLASS_NAME, "extractAudioAnalysis: $recordingConfig")
|
|
755
|
-
audioProcessor.resetCumulativeAmplitudeRange()
|
|
756
|
-
|
|
757
|
-
val analysisData = audioProcessor.processAudioData(audioData.data, recordingConfig)
|
|
758
|
-
promise.resolve(analysisData.toDictionary())
|
|
759
|
-
} catch (e: Exception) {
|
|
760
|
-
LogUtils.e(CLASS_NAME, "Failed to extract audio analysis: ${e.message}", e)
|
|
761
|
-
promise.reject("PROCESSING_ERROR", e.message ?: "Unknown error", e)
|
|
762
|
-
}
|
|
763
|
-
}
|
|
764
|
-
|
|
765
|
-
AsyncFunction("extractAudioData") { options: Map<String, Any>, promise: Promise ->
|
|
766
|
-
try {
|
|
767
|
-
val fileUri = requireNotNull(options["fileUri"] as? String) { "fileUri is required" }
|
|
768
|
-
val startTimeMs = options["startTimeMs"] as? Number
|
|
769
|
-
val endTimeMs = options["endTimeMs"] as? Number
|
|
770
|
-
val position = options["position"] as? Number
|
|
771
|
-
val length = options["length"] as? Number
|
|
772
|
-
|
|
773
|
-
// Validate that we have either time range or byte range, but not both and not neither
|
|
774
|
-
val hasTimeRange = startTimeMs != null && endTimeMs != null
|
|
775
|
-
val hasByteRange = position != null && length != null
|
|
776
|
-
|
|
777
|
-
if (!hasTimeRange && !hasByteRange) {
|
|
778
|
-
throw IllegalArgumentException("Must specify either time range (startTimeMs, endTimeMs) or byte range (position, length)")
|
|
779
|
-
}
|
|
780
|
-
if (hasTimeRange && hasByteRange) {
|
|
781
|
-
throw IllegalArgumentException("Cannot specify both time range and byte range")
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
// Get decoding options
|
|
785
|
-
val decodingOptionsMap = options["decodingOptions"] as? Map<String, Any>
|
|
786
|
-
val decodingConfig = if (decodingOptionsMap != null) {
|
|
787
|
-
DecodingConfig(
|
|
788
|
-
targetSampleRate = decodingOptionsMap["targetSampleRate"] as? Int,
|
|
789
|
-
targetChannels = decodingOptionsMap["targetChannels"] as? Int,
|
|
790
|
-
targetBitDepth = (decodingOptionsMap["targetBitDepth"] as? Int) ?: 16,
|
|
791
|
-
normalizeAudio = (decodingOptionsMap["normalizeAudio"] as? Boolean) ?: false
|
|
792
|
-
).also {
|
|
793
|
-
LogUtils.d(CLASS_NAME, """
|
|
794
|
-
Using decoding config:
|
|
795
|
-
- targetSampleRate: ${it.targetSampleRate ?: "original"}
|
|
796
|
-
- targetChannels: ${it.targetChannels ?: "original"}
|
|
797
|
-
- targetBitDepth: ${it.targetBitDepth}
|
|
798
|
-
- normalizeAudio: ${it.normalizeAudio}
|
|
799
|
-
""".trimIndent())
|
|
800
|
-
}
|
|
801
|
-
} else null
|
|
861
|
+
} ?: throw IllegalStateException("Failed to load audio data")
|
|
802
862
|
|
|
803
|
-
val audioData = if (hasByteRange) {
|
|
804
|
-
val format = audioProcessor.getAudioFormat(fileUri)
|
|
805
|
-
?: throw IllegalArgumentException("Could not determine audio format")
|
|
806
|
-
|
|
807
|
-
// Calculate time range from byte position
|
|
808
|
-
val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
|
|
809
|
-
val effectiveStartTimeMs = (position!!.toLong() * 1000) / bytesPerSecond
|
|
810
|
-
val effectiveEndTimeMs = effectiveStartTimeMs + (length!!.toLong() * 1000) / bytesPerSecond
|
|
811
|
-
|
|
812
863
|
LogUtils.d(CLASS_NAME, """
|
|
813
|
-
|
|
814
|
-
-
|
|
815
|
-
-
|
|
816
|
-
-
|
|
817
|
-
-
|
|
818
|
-
-
|
|
864
|
+
Audio data loaded successfully:
|
|
865
|
+
- data size: ${audioData.data.size} bytes
|
|
866
|
+
- sampleRate: ${audioData.sampleRate}
|
|
867
|
+
- channels: ${audioData.channels}
|
|
868
|
+
- bitDepth: ${audioData.bitDepth}
|
|
869
|
+
- durationMs: ${audioData.durationMs}
|
|
819
870
|
""".trimIndent())
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
)
|
|
827
|
-
} else {
|
|
828
|
-
// Must be time range due to earlier validation
|
|
829
|
-
LogUtils.d(CLASS_NAME, """
|
|
830
|
-
Using time range:
|
|
831
|
-
- startTimeMs: $startTimeMs
|
|
832
|
-
- endTimeMs: $endTimeMs
|
|
833
|
-
""".trimIndent())
|
|
834
|
-
|
|
835
|
-
audioProcessor.loadAudioRange(
|
|
836
|
-
fileUri = fileUri,
|
|
837
|
-
startTimeMs = startTimeMs!!.toLong(),
|
|
838
|
-
endTimeMs = endTimeMs!!.toLong(),
|
|
839
|
-
config = decodingConfig
|
|
840
|
-
)
|
|
841
|
-
} ?: throw IllegalStateException("Failed to load audio data")
|
|
842
|
-
|
|
843
|
-
LogUtils.d(CLASS_NAME, """
|
|
844
|
-
Audio data loaded successfully:
|
|
845
|
-
- data size: ${audioData.data.size} bytes
|
|
846
|
-
- sampleRate: ${audioData.sampleRate}
|
|
847
|
-
- channels: ${audioData.channels}
|
|
848
|
-
- bitDepth: ${audioData.bitDepth}
|
|
849
|
-
- durationMs: ${audioData.durationMs}
|
|
850
|
-
""".trimIndent())
|
|
851
|
-
|
|
852
|
-
val includeNormalizedData = options["includeNormalizedData"] as? Boolean ?: false
|
|
853
|
-
val includeBase64Data = options["includeBase64Data"] as? Boolean ?: false
|
|
854
|
-
val includeWavHeader = options["includeWavHeader"] as? Boolean ?: false
|
|
855
|
-
val bytesPerSample = audioData.bitDepth / 8
|
|
856
|
-
val samples = audioData.data.size / (bytesPerSample * audioData.channels)
|
|
871
|
+
|
|
872
|
+
val includeNormalizedData = options["includeNormalizedData"] as? Boolean ?: false
|
|
873
|
+
val includeBase64Data = options["includeBase64Data"] as? Boolean ?: false
|
|
874
|
+
val includeWavHeader = options["includeWavHeader"] as? Boolean ?: false
|
|
875
|
+
val bytesPerSample = audioData.bitDepth / 8
|
|
876
|
+
val samples = audioData.data.size / (bytesPerSample * audioData.channels)
|
|
857
877
|
|
|
858
|
-
|
|
859
|
-
|
|
878
|
+
// Create the result map
|
|
879
|
+
val resultMap = mutableMapOf<String, Any>()
|
|
860
880
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
881
|
+
// Add WAV header if requested
|
|
882
|
+
if (includeWavHeader) {
|
|
883
|
+
// Use ByteArrayOutputStream to write the WAV header and data
|
|
884
|
+
val outputStream = java.io.ByteArrayOutputStream()
|
|
885
|
+
val audioFileHandler = AudioFileHandler(appContext.reactContext!!.filesDir)
|
|
866
886
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
887
|
+
// Write the WAV header
|
|
888
|
+
audioFileHandler.writeWavHeader(
|
|
889
|
+
outputStream,
|
|
890
|
+
audioData.sampleRate,
|
|
891
|
+
audioData.channels,
|
|
892
|
+
audioData.bitDepth
|
|
893
|
+
)
|
|
874
894
|
|
|
875
|
-
|
|
876
|
-
|
|
895
|
+
// Write the PCM data
|
|
896
|
+
outputStream.write(audioData.data)
|
|
877
897
|
|
|
878
|
-
|
|
879
|
-
|
|
898
|
+
// Get the complete WAV data
|
|
899
|
+
val wavData = outputStream.toByteArray()
|
|
880
900
|
|
|
881
|
-
|
|
882
|
-
|
|
901
|
+
resultMap["pcmData"] = wavData
|
|
902
|
+
resultMap["hasWavHeader"] = true
|
|
883
903
|
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
904
|
+
LogUtils.d(CLASS_NAME, "Added WAV header to PCM data, total size: ${wavData.size} bytes")
|
|
905
|
+
} else {
|
|
906
|
+
resultMap["pcmData"] = audioData.data
|
|
907
|
+
resultMap["hasWavHeader"] = false
|
|
908
|
+
}
|
|
889
909
|
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
910
|
+
// Add the rest of the data
|
|
911
|
+
resultMap.putAll(mapOf(
|
|
912
|
+
"sampleRate" to audioData.sampleRate,
|
|
913
|
+
"channels" to audioData.channels,
|
|
914
|
+
"bitDepth" to audioData.bitDepth,
|
|
915
|
+
"durationMs" to audioData.durationMs,
|
|
916
|
+
"format" to "pcm_${audioData.bitDepth}bit",
|
|
917
|
+
"samples" to samples
|
|
918
|
+
))
|
|
899
919
|
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
920
|
+
// Add checksum if requested
|
|
921
|
+
if (options["computeChecksum"] == true) {
|
|
922
|
+
val crc32 = CRC32()
|
|
923
|
+
crc32.update(audioData.data)
|
|
924
|
+
resultMap["checksum"] = crc32.value.toInt()
|
|
905
925
|
|
|
906
|
-
|
|
907
|
-
|
|
926
|
+
LogUtils.d(CLASS_NAME, "Computed CRC32 checksum: ${crc32.value}")
|
|
927
|
+
}
|
|
908
928
|
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
929
|
+
if (includeNormalizedData) {
|
|
930
|
+
val float32Data = AudioFormatUtils.convertByteArrayToFloatArray(
|
|
931
|
+
audioData.data,
|
|
932
|
+
"pcm_${audioData.bitDepth}bit"
|
|
933
|
+
)
|
|
934
|
+
resultMap["normalizedData"] = float32Data
|
|
935
|
+
}
|
|
916
936
|
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
937
|
+
if (includeBase64Data) {
|
|
938
|
+
// Convert the PCM data to a base64 string
|
|
939
|
+
val base64Data = android.util.Base64.encodeToString(
|
|
940
|
+
audioData.data,
|
|
941
|
+
android.util.Base64.NO_WRAP
|
|
942
|
+
)
|
|
943
|
+
resultMap["base64Data"] = base64Data
|
|
944
|
+
}
|
|
925
945
|
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
946
|
+
promise.resolve(resultMap)
|
|
947
|
+
} catch (e: Exception) {
|
|
948
|
+
LogUtils.e(CLASS_NAME, "Failed to extract audio data: ${e.message}")
|
|
949
|
+
LogUtils.e(CLASS_NAME, "Stack trace: ${e.stackTraceToString()}")
|
|
950
|
+
promise.reject("PROCESSING_ERROR", e.message ?: "Unknown error", e)
|
|
951
|
+
}
|
|
931
952
|
}
|
|
932
953
|
}
|
|
933
954
|
}
|