@siteed/audio-studio 3.0.3 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -1
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +41 -35
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +500 -485
- package/build/cjs/AudioAnalysis/wasmConfig.js.map +1 -1
- package/build/cjs/AudioAnalysis/wasmLoader.web.js +2 -1
- package/build/cjs/AudioAnalysis/wasmLoader.web.js.map +1 -1
- package/build/cjs/trimAudio.js.map +1 -1
- package/build/esm/AudioAnalysis/wasmConfig.js.map +1 -1
- package/build/esm/AudioAnalysis/wasmLoader.web.js +2 -1
- package/build/esm/AudioAnalysis/wasmLoader.web.js.map +1 -1
- package/build/esm/trimAudio.js.map +1 -1
- package/build/types/AudioAnalysis/wasmLoader.web.d.ts.map +1 -1
- package/build/types/trimAudio.d.ts.map +1 -1
- package/ios/AudioStreamManager.swift +135 -89
- package/ios/AudioStudioModule.swift +239 -216
- package/package.json +1 -1
- package/src/AudioAnalysis/wasmConfig.ts +1 -1
- package/src/AudioAnalysis/wasmLoader.web.ts +8 -3
- package/src/trimAudio.ts +19 -5
|
@@ -24,6 +24,14 @@ public class AudioStudioModule: Module, AudioStreamManagerDelegate, AudioDeviceM
|
|
|
24
24
|
private let notificationIdentifier = "audio_recording_notification"
|
|
25
25
|
private var deviceManager = AudioDeviceManager()
|
|
26
26
|
private var deviceChangeObserver: Any?
|
|
27
|
+
|
|
28
|
+
// Serial queue for AVAudioEngine lifecycle ops (prepare/start/stop).
|
|
29
|
+
// Prevents concurrent mutation of shared engine state and keeps callers
|
|
30
|
+
// off the main thread to avoid UI freezes during heavy native init.
|
|
31
|
+
private let audioLifecycleQueue = DispatchQueue(
|
|
32
|
+
label: "net.siteed.audiostudio.lifecycle",
|
|
33
|
+
qos: .userInitiated
|
|
34
|
+
)
|
|
27
35
|
|
|
28
36
|
public func definition() -> ModuleDefinition {
|
|
29
37
|
Name("AudioStudio")
|
|
@@ -220,30 +228,37 @@ public class AudioStudioModule: Module, AudioStreamManagerDelegate, AudioDeviceM
|
|
|
220
228
|
}
|
|
221
229
|
}
|
|
222
230
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
+
// Serialize on lifecycle queue: avoids racing with prepare/stop
|
|
232
|
+
// and keeps the JS/UI thread responsive while audio session
|
|
233
|
+
// and AVAudioEngine come up.
|
|
234
|
+
self.audioLifecycleQueue.async {
|
|
235
|
+
let result = self.streamManager.startRecording(settings: settings)
|
|
236
|
+
DispatchQueue.main.async {
|
|
237
|
+
if let result = result {
|
|
238
|
+
var resultDict: [String: Any] = [
|
|
239
|
+
"fileUri": result.fileUri,
|
|
240
|
+
"channels": result.channels,
|
|
241
|
+
"bitDepth": result.bitDepth,
|
|
242
|
+
"sampleRate": result.sampleRate,
|
|
243
|
+
"mimeType": result.mimeType,
|
|
244
|
+
]
|
|
231
245
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
}
|
|
246
|
+
if let compression = result.compression {
|
|
247
|
+
resultDict["compression"] = [
|
|
248
|
+
"compressedFileUri": compression.compressedFileUri,
|
|
249
|
+
"mimeType": compression.mimeType,
|
|
250
|
+
"bitrate": compression.bitrate,
|
|
251
|
+
"format": compression.format
|
|
252
|
+
]
|
|
253
|
+
}
|
|
241
254
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
255
|
+
Logger.info("AudioStudioModule", "Recording started successfully")
|
|
256
|
+
promise.resolve(resultDict)
|
|
257
|
+
} else {
|
|
258
|
+
Logger.error("AudioStudioModule", "Failed to start recording")
|
|
259
|
+
promise.reject("ERROR", "Failed to start recording.")
|
|
260
|
+
}
|
|
261
|
+
}
|
|
247
262
|
}
|
|
248
263
|
|
|
249
264
|
case .failure(let error):
|
|
@@ -275,21 +290,28 @@ public class AudioStudioModule: Module, AudioStreamManagerDelegate, AudioDeviceM
|
|
|
275
290
|
promise.reject("PERMISSION_DENIED", "Recording permission has not been granted")
|
|
276
291
|
return
|
|
277
292
|
}
|
|
278
|
-
|
|
293
|
+
|
|
279
294
|
// Create settings with validation
|
|
280
295
|
let settingsResult = RecordingSettings.fromDictionary(options)
|
|
281
|
-
|
|
296
|
+
|
|
282
297
|
switch settingsResult {
|
|
283
298
|
case .success(let settings):
|
|
284
|
-
Logger.debug("AudioStudioModule", "prepareRecording: Settings parsed
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
299
|
+
Logger.debug("AudioStudioModule", "prepareRecording: Settings parsed. Dispatching to serial audio queue.")
|
|
300
|
+
// Serial queue prevents concurrent AVAudioEngine mutation if
|
|
301
|
+
// prepare/start/stop overlap. Off-main keeps UI responsive.
|
|
302
|
+
self.audioLifecycleQueue.async {
|
|
303
|
+
let ok = self.streamManager.prepareRecording(settings: settings)
|
|
304
|
+
DispatchQueue.main.async {
|
|
305
|
+
if ok {
|
|
306
|
+
Logger.info("AudioStudioModule", "prepareRecording: Preparation successful.")
|
|
307
|
+
promise.resolve(true)
|
|
308
|
+
} else {
|
|
309
|
+
Logger.error("AudioStudioModule", "prepareRecording: streamManager.prepareRecording returned false.")
|
|
310
|
+
promise.reject("ERROR", "Failed to prepare recording.")
|
|
311
|
+
}
|
|
312
|
+
}
|
|
291
313
|
}
|
|
292
|
-
|
|
314
|
+
|
|
293
315
|
case .failure(let error):
|
|
294
316
|
promise.reject("INVALID_SETTINGS", error.localizedDescription)
|
|
295
317
|
}
|
|
@@ -314,36 +336,43 @@ public class AudioStudioModule: Module, AudioStreamManagerDelegate, AudioDeviceM
|
|
|
314
336
|
/// - promise: A promise to resolve with the recording result or reject with an error.
|
|
315
337
|
AsyncFunction("stopRecording") { (promise: Promise) in
|
|
316
338
|
Logger.debug("AudioStudioModule", "stopRecording called.")
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
339
|
+
|
|
340
|
+
// Serialize on lifecycle queue: stop flushes file handles and
|
|
341
|
+
// tears down AVAudioEngine; must not race with start/prepare and
|
|
342
|
+
// must not block the JS/UI thread.
|
|
343
|
+
self.audioLifecycleQueue.async {
|
|
344
|
+
let recordingResult = self.streamManager.stopRecording()
|
|
345
|
+
DispatchQueue.main.async {
|
|
346
|
+
if let recordingResult = recordingResult {
|
|
347
|
+
var resultDict: [String: Any] = [
|
|
348
|
+
"fileUri": recordingResult.fileUri,
|
|
349
|
+
"filename": recordingResult.filename,
|
|
350
|
+
"durationMs": recordingResult.duration,
|
|
351
|
+
"size": recordingResult.size,
|
|
352
|
+
"channels": recordingResult.channels,
|
|
353
|
+
"bitDepth": recordingResult.bitDepth,
|
|
354
|
+
"sampleRate": recordingResult.sampleRate,
|
|
355
|
+
"mimeType": recordingResult.mimeType,
|
|
356
|
+
"createdAt": Date().timeIntervalSince1970 * 1000,
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
if let compression = recordingResult.compression {
|
|
360
|
+
resultDict["compression"] = [
|
|
361
|
+
"compressedFileUri": compression.compressedFileUri,
|
|
362
|
+
"mimeType": compression.mimeType,
|
|
363
|
+
"bitrate": compression.bitrate,
|
|
364
|
+
"format": compression.format,
|
|
365
|
+
"size": compression.size
|
|
366
|
+
]
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
Logger.info("AudioStudioModule", "stopRecording: Recording stopped successfully. fileUri: \(recordingResult.fileUri), size: \(recordingResult.size)")
|
|
370
|
+
promise.resolve(resultDict)
|
|
371
|
+
} else {
|
|
372
|
+
Logger.error("AudioStudioModule", "stopRecording: streamManager.stopRecording returned nil.")
|
|
373
|
+
promise.reject("ERROR", "Failed to stop recording or no recording in progress.")
|
|
374
|
+
}
|
|
340
375
|
}
|
|
341
|
-
|
|
342
|
-
Logger.info("AudioStudioModule", "stopRecording: Recording stopped successfully. fileUri: \(recordingResult.fileUri), size: \(recordingResult.size)")
|
|
343
|
-
promise.resolve(resultDict)
|
|
344
|
-
} else {
|
|
345
|
-
Logger.error("AudioStudioModule", "stopRecording: streamManager.stopRecording returned nil.")
|
|
346
|
-
promise.reject("ERROR", "Failed to stop recording or no recording in progress.")
|
|
347
376
|
}
|
|
348
377
|
}
|
|
349
378
|
|
|
@@ -609,95 +638,89 @@ public class AudioStudioModule: Module, AudioStreamManagerDelegate, AudioDeviceM
|
|
|
609
638
|
return
|
|
610
639
|
}
|
|
611
640
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
641
|
+
// File decode + frame read can take 100s of ms on large files.
|
|
642
|
+
// Move off main to keep JS/UI responsive.
|
|
643
|
+
DispatchQueue.global(qos: .userInitiated).async {
|
|
644
|
+
do {
|
|
645
|
+
let audioFile = try AVAudioFile(forReading: url)
|
|
646
|
+
let format = audioFile.processingFormat
|
|
647
|
+
let sampleRate = format.sampleRate
|
|
648
|
+
let channels = Int(format.channelCount)
|
|
649
|
+
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
618
650
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
let endFrame: AVAudioFramePosition
|
|
651
|
+
let startFrame: AVAudioFramePosition
|
|
652
|
+
let endFrame: AVAudioFramePosition
|
|
622
653
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
}
|
|
654
|
+
if hasTimeRange {
|
|
655
|
+
startFrame = AVAudioFramePosition(startTimeMs! * sampleRate / 1000.0)
|
|
656
|
+
endFrame = AVAudioFramePosition(endTimeMs! * sampleRate / 1000.0)
|
|
657
|
+
} else {
|
|
658
|
+
let bytesPerFrame = Int64(channels * (bitDepth / 8))
|
|
659
|
+
startFrame = AVAudioFramePosition(position!) / bytesPerFrame
|
|
660
|
+
endFrame = startFrame + (AVAudioFramePosition(length!) / bytesPerFrame)
|
|
661
|
+
}
|
|
632
662
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
663
|
+
guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
|
|
664
|
+
promise.reject("INVALID_RANGE", "Invalid range specified")
|
|
665
|
+
return
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
let frameCount = AVAudioFrameCount(endFrame - startFrame)
|
|
638
669
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
// Pass both options separately - normalizeAudio from decodingOptions, and includeNormalizedData as is
|
|
643
|
-
let decodingConfig = DecodingConfig.fromDictionary(decodingOptions)
|
|
644
|
-
|
|
645
|
-
let (pcmData, normalizedData, base64Data) = try extractRawAudioData(
|
|
646
|
-
from: url,
|
|
647
|
-
startFrame: startFrame,
|
|
648
|
-
frameCount: frameCount,
|
|
649
|
-
format: format,
|
|
650
|
-
decodingConfig: decodingConfig,
|
|
651
|
-
includeNormalizedData: includeNormalizedData,
|
|
652
|
-
includeBase64Data: includeBase64Data
|
|
653
|
-
)
|
|
670
|
+
let decodingConfig = DecodingConfig.fromDictionary(decodingOptions)
|
|
654
671
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
bitDepth: bitDepth
|
|
672
|
+
let (pcmData, normalizedData, base64Data) = try extractRawAudioData(
|
|
673
|
+
from: url,
|
|
674
|
+
startFrame: startFrame,
|
|
675
|
+
frameCount: frameCount,
|
|
676
|
+
format: format,
|
|
677
|
+
decodingConfig: decodingConfig,
|
|
678
|
+
includeNormalizedData: includeNormalizedData,
|
|
679
|
+
includeBase64Data: includeBase64Data
|
|
664
680
|
)
|
|
665
|
-
resultDict["pcmData"] = wavData
|
|
666
|
-
resultDict["hasWavHeader"] = true
|
|
667
|
-
} else {
|
|
668
|
-
resultDict["pcmData"] = pcmData
|
|
669
|
-
resultDict["hasWavHeader"] = false
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
// Add the rest of the data
|
|
673
|
-
resultDict["sampleRate"] = Int(sampleRate)
|
|
674
|
-
resultDict["channels"] = channels
|
|
675
|
-
resultDict["bitDepth"] = bitDepth
|
|
676
|
-
resultDict["durationMs"] = Int(Double(frameCount) * 1000.0 / sampleRate)
|
|
677
|
-
resultDict["format"] = "pcm_\(bitDepth)bit"
|
|
678
|
-
resultDict["samples"] = Int(frameCount) * channels
|
|
679
|
-
|
|
680
|
-
// Add normalized data if requested, regardless of normalization setting
|
|
681
|
-
if includeNormalizedData {
|
|
682
|
-
resultDict["normalizedData"] = normalizedData
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
// Add checksum if requested
|
|
686
|
-
if options["computeChecksum"] as? Bool == true {
|
|
687
|
-
let checksum = calculateCRC32(data: pcmData)
|
|
688
|
-
resultDict["checksum"] = Int(checksum)
|
|
689
|
-
|
|
690
|
-
Logger.debug("AudioStudioModule", "Computed CRC32 checksum: \(checksum)")
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
if let includeBase64Data = options["includeBase64Data"] as? Bool, includeBase64Data {
|
|
694
|
-
resultDict["base64Data"] = base64Data
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
promise.resolve(resultDict)
|
|
698
681
|
|
|
699
|
-
|
|
700
|
-
|
|
682
|
+
var resultDict: [String: Any] = [:]
|
|
683
|
+
|
|
684
|
+
if includeWavHeader {
|
|
685
|
+
let wavData = createWavHeader(
|
|
686
|
+
pcmData: pcmData,
|
|
687
|
+
sampleRate: Int(sampleRate),
|
|
688
|
+
channels: channels,
|
|
689
|
+
bitDepth: bitDepth
|
|
690
|
+
)
|
|
691
|
+
resultDict["pcmData"] = wavData
|
|
692
|
+
resultDict["hasWavHeader"] = true
|
|
693
|
+
} else {
|
|
694
|
+
resultDict["pcmData"] = pcmData
|
|
695
|
+
resultDict["hasWavHeader"] = false
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
resultDict["sampleRate"] = Int(sampleRate)
|
|
699
|
+
resultDict["channels"] = channels
|
|
700
|
+
resultDict["bitDepth"] = bitDepth
|
|
701
|
+
resultDict["durationMs"] = Int(Double(frameCount) * 1000.0 / sampleRate)
|
|
702
|
+
resultDict["format"] = "pcm_\(bitDepth)bit"
|
|
703
|
+
resultDict["samples"] = Int(frameCount) * channels
|
|
704
|
+
|
|
705
|
+
if includeNormalizedData {
|
|
706
|
+
resultDict["normalizedData"] = normalizedData
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
if options["computeChecksum"] as? Bool == true {
|
|
710
|
+
let checksum = calculateCRC32(data: pcmData)
|
|
711
|
+
resultDict["checksum"] = Int(checksum)
|
|
712
|
+
Logger.debug("AudioStudioModule", "Computed CRC32 checksum: \(checksum)")
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
if let includeBase64Data = options["includeBase64Data"] as? Bool, includeBase64Data {
|
|
716
|
+
resultDict["base64Data"] = base64Data
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
promise.resolve(resultDict)
|
|
720
|
+
|
|
721
|
+
} catch {
|
|
722
|
+
promise.reject("PROCESSING_ERROR", "Failed to process audio file: \(error.localizedDescription)")
|
|
723
|
+
}
|
|
701
724
|
}
|
|
702
725
|
}
|
|
703
726
|
|
|
@@ -710,90 +733,90 @@ public class AudioStudioModule: Module, AudioStreamManagerDelegate, AudioDeviceM
|
|
|
710
733
|
/// - promise: A promise to resolve with the extracted mel spectrogram data or reject with an error.
|
|
711
734
|
/// - Returns: Promise to be resolved with mel spectrogram data.
|
|
712
735
|
AsyncFunction("extractMelSpectrogram") { (options: [String: Any], promise: Promise) in
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
736
|
+
// Heavy DSP: file decode + STFT + mel projection. Multi-second on
|
|
737
|
+
// large files. Move off main to keep JS/UI responsive.
|
|
738
|
+
DispatchQueue.global(qos: .userInitiated).async {
|
|
739
|
+
do {
|
|
740
|
+
guard let fileUri = options["fileUri"] as? String else {
|
|
741
|
+
throw NSError(domain: "AudioStudio", code: -1, userInfo: [NSLocalizedDescriptionKey: "fileUri is required"])
|
|
742
|
+
}
|
|
743
|
+
guard let windowSizeMs = options["windowSizeMs"] as? Double else {
|
|
744
|
+
throw NSError(domain: "AudioStudio", code: -1, userInfo: [NSLocalizedDescriptionKey: "windowSizeMs is required"])
|
|
745
|
+
}
|
|
746
|
+
guard let hopLengthMs = options["hopLengthMs"] as? Double else {
|
|
747
|
+
throw NSError(domain: "AudioStudio", code: -1, userInfo: [NSLocalizedDescriptionKey: "hopLengthMs is required"])
|
|
748
|
+
}
|
|
749
|
+
guard let nMels = options["nMels"] as? Int ?? (options["nMels"] as? Double).map({ Int($0) }) else {
|
|
750
|
+
throw NSError(domain: "AudioStudio", code: -1, userInfo: [NSLocalizedDescriptionKey: "nMels is required"])
|
|
751
|
+
}
|
|
726
752
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
753
|
+
let fMin = Float(options["fMin"] as? Double ?? 0.0)
|
|
754
|
+
let fMaxParam = options["fMax"] as? Double
|
|
755
|
+
let windowType = options["windowType"] as? String ?? "hann"
|
|
756
|
+
let logScale = options["logScale"] as? Bool ?? true
|
|
757
|
+
let normalize = options["normalize"] as? Bool ?? false
|
|
758
|
+
let startTimeMs = options["startTimeMs"] as? Double
|
|
759
|
+
let endTimeMs = options["endTimeMs"] as? Double
|
|
734
760
|
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
var samples = audioData.samples
|
|
761
|
+
let audioData = try loadAudioFile(fileUri)
|
|
762
|
+
let sampleRate = audioData.sampleRate
|
|
763
|
+
var samples = audioData.samples
|
|
739
764
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
765
|
+
if let startMs = startTimeMs {
|
|
766
|
+
let startSample = Int(startMs * Double(sampleRate) / 1000.0)
|
|
767
|
+
let endSample: Int
|
|
768
|
+
if let endMs = endTimeMs {
|
|
769
|
+
endSample = min(Int(endMs * Double(sampleRate) / 1000.0), samples.count)
|
|
770
|
+
} else {
|
|
771
|
+
endSample = samples.count
|
|
772
|
+
}
|
|
773
|
+
if startSample < endSample && startSample < samples.count {
|
|
774
|
+
samples = Array(samples[startSample..<endSample])
|
|
775
|
+
}
|
|
751
776
|
}
|
|
752
|
-
}
|
|
753
777
|
|
|
754
|
-
|
|
778
|
+
let fMax = fMaxParam.map { Float($0) } ?? Float(sampleRate) / 2.0
|
|
755
779
|
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
let hopLengthSamples = Int(hopLengthMs * Double(sampleRate) / 1000.0)
|
|
780
|
+
let windowSizeSamples = Int(windowSizeMs * Double(sampleRate) / 1000.0)
|
|
781
|
+
let hopLengthSamples = Int(hopLengthMs * Double(sampleRate) / 1000.0)
|
|
759
782
|
|
|
760
|
-
|
|
783
|
+
let windowTypeInt: Int32 = windowType.lowercased() == "hamming" ? 1 : 0
|
|
761
784
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
)
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
}
|
|
785
|
+
guard let result = samples.withUnsafeBufferPointer({ bufferPtr -> [AnyHashable: Any]? in
|
|
786
|
+
guard let baseAddress = bufferPtr.baseAddress else { return nil }
|
|
787
|
+
return MelSpectrogramWrapper.compute(
|
|
788
|
+
withSamples: baseAddress,
|
|
789
|
+
numSamples: Int32(samples.count),
|
|
790
|
+
sampleRate: Int32(sampleRate),
|
|
791
|
+
fftLength: 2048,
|
|
792
|
+
windowSizeSamples: Int32(windowSizeSamples),
|
|
793
|
+
hopLengthSamples: Int32(hopLengthSamples),
|
|
794
|
+
nMels: Int32(nMels),
|
|
795
|
+
fMin: fMin,
|
|
796
|
+
fMax: fMax,
|
|
797
|
+
windowType: windowTypeInt,
|
|
798
|
+
logScale: logScale,
|
|
799
|
+
normalize: normalize
|
|
800
|
+
)
|
|
801
|
+
}) else {
|
|
802
|
+
throw NSError(domain: "AudioStudio", code: -1, userInfo: [NSLocalizedDescriptionKey: "Audio data is too short for spectrogram analysis"])
|
|
803
|
+
}
|
|
782
804
|
|
|
783
|
-
|
|
784
|
-
|
|
805
|
+
let timeSteps = result["timeSteps"] as! Int
|
|
806
|
+
let durationMs = Double(samples.count) / Double(sampleRate) * 1000.0
|
|
785
807
|
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
808
|
+
let output: [String: Any] = [
|
|
809
|
+
"spectrogram": result["spectrogram"]!,
|
|
810
|
+
"sampleRate": sampleRate,
|
|
811
|
+
"nMels": nMels,
|
|
812
|
+
"timeSteps": timeSteps,
|
|
813
|
+
"durationMs": durationMs
|
|
814
|
+
]
|
|
793
815
|
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
816
|
+
promise.resolve(output)
|
|
817
|
+
} catch {
|
|
818
|
+
promise.reject("SPECTROGRAM_ERROR", "Failed to extract mel spectrogram: \(error.localizedDescription)")
|
|
819
|
+
}
|
|
797
820
|
}
|
|
798
821
|
}
|
|
799
822
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@siteed/audio-studio",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.4",
|
|
4
4
|
"description": "Comprehensive audio processing library for React Native and Expo with recording, analysis, visualization, and streaming capabilities across iOS, Android, and web",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -8,7 +8,7 @@ const WASM_VERSION = '3.0.2'
|
|
|
8
8
|
const DEFAULT_WASM_CDN = `https://cdn.jsdelivr.net/npm/@siteed/audio-studio@${WASM_VERSION}/prebuilt/wasm/mel-spectrogram.js`
|
|
9
9
|
|
|
10
10
|
let _wasmUrl: string = DEFAULT_WASM_CDN
|
|
11
|
-
const _resetListeners:
|
|
11
|
+
const _resetListeners: (() => void)[] = []
|
|
12
12
|
|
|
13
13
|
export function _registerModuleReset(fn: () => void): void {
|
|
14
14
|
_resetListeners.push(fn)
|
|
@@ -15,7 +15,8 @@ function loadScriptTag(url: string): Promise<void> {
|
|
|
15
15
|
const script = document.createElement('script')
|
|
16
16
|
script.src = url
|
|
17
17
|
script.onload = () => resolve()
|
|
18
|
-
script.onerror = () =>
|
|
18
|
+
script.onerror = () =>
|
|
19
|
+
reject(new Error(`Failed to load script: ${url}`))
|
|
19
20
|
document.head.appendChild(script)
|
|
20
21
|
})
|
|
21
22
|
}
|
|
@@ -25,13 +26,17 @@ export function getWasmModule(): Promise<AudioFeaturesWasmModule> {
|
|
|
25
26
|
modulePromise = (async () => {
|
|
26
27
|
const url = getMelSpectrogramWasmUrl()
|
|
27
28
|
// Try ESM import first; fall back to <script> tag for UMD modules
|
|
28
|
-
const mod = await import(
|
|
29
|
+
const mod = await import(
|
|
30
|
+
/* webpackIgnore: true */ /* @vite-ignore */ url
|
|
31
|
+
)
|
|
29
32
|
let factory: unknown = mod.default ?? mod
|
|
30
33
|
if (typeof factory !== 'function') {
|
|
31
34
|
// UMD fallback: load via <script> tag so the top-level `var` becomes a global and
|
|
32
35
|
// document.currentScript.src is set (Emscripten uses it to locate the .wasm binary).
|
|
33
36
|
await loadScriptTag(url)
|
|
34
|
-
factory = (globalThis as Record<string, unknown>)[
|
|
37
|
+
factory = (globalThis as Record<string, unknown>)[
|
|
38
|
+
WASM_GLOBAL_NAME
|
|
39
|
+
]
|
|
35
40
|
}
|
|
36
41
|
if (typeof factory !== 'function') {
|
|
37
42
|
throw new TypeError(
|
package/src/trimAudio.ts
CHANGED
|
@@ -34,7 +34,10 @@ function sliceAudioBuffer(
|
|
|
34
34
|
return out
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
function encodeBufferToWav(
|
|
37
|
+
function encodeBufferToWav(
|
|
38
|
+
buffer: AudioBuffer,
|
|
39
|
+
bitDepth: BitDepth
|
|
40
|
+
): ArrayBuffer {
|
|
38
41
|
const { length, numberOfChannels, sampleRate } = buffer
|
|
39
42
|
const channels: Float32Array[] = []
|
|
40
43
|
for (let c = 0; c < numberOfChannels; c++) {
|
|
@@ -44,7 +47,9 @@ function encodeBufferToWav(buffer: AudioBuffer, bitDepth: BitDepth): ArrayBuffer
|
|
|
44
47
|
for (let i = 0; i < length; i++) {
|
|
45
48
|
for (let c = 0; c < numberOfChannels; c++) {
|
|
46
49
|
const clamped = Math.max(-1, Math.min(1, channels[c][i]))
|
|
47
|
-
interleavedData[i * numberOfChannels + c] = Math.round(
|
|
50
|
+
interleavedData[i * numberOfChannels + c] = Math.round(
|
|
51
|
+
clamped * 32767
|
|
52
|
+
)
|
|
48
53
|
}
|
|
49
54
|
}
|
|
50
55
|
return writeWavHeader({
|
|
@@ -322,7 +327,10 @@ export async function trimAudio(
|
|
|
322
327
|
}
|
|
323
328
|
|
|
324
329
|
if (format === 'wav') {
|
|
325
|
-
outputData = encodeBufferToWav(
|
|
330
|
+
outputData = encodeBufferToWav(
|
|
331
|
+
resultBuffer,
|
|
332
|
+
targetBitDepth as BitDepth
|
|
333
|
+
)
|
|
326
334
|
outputMimeType = 'audio/wav'
|
|
327
335
|
} else if (format === 'opus') {
|
|
328
336
|
try {
|
|
@@ -338,7 +346,10 @@ export async function trimAudio(
|
|
|
338
346
|
console.warn(
|
|
339
347
|
`Failed to encode to ${format}, falling back to WAV: ${error}`
|
|
340
348
|
)
|
|
341
|
-
outputData = encodeBufferToWav(
|
|
349
|
+
outputData = encodeBufferToWav(
|
|
350
|
+
resultBuffer,
|
|
351
|
+
targetBitDepth as BitDepth
|
|
352
|
+
)
|
|
342
353
|
outputMimeType = 'audio/wav'
|
|
343
354
|
}
|
|
344
355
|
} else {
|
|
@@ -346,7 +357,10 @@ export async function trimAudio(
|
|
|
346
357
|
console.warn(
|
|
347
358
|
`Format ${format} not supported on web, using WAV instead`
|
|
348
359
|
)
|
|
349
|
-
outputData = encodeBufferToWav(
|
|
360
|
+
outputData = encodeBufferToWav(
|
|
361
|
+
resultBuffer,
|
|
362
|
+
targetBitDepth as BitDepth
|
|
363
|
+
)
|
|
350
364
|
outputMimeType = 'audio/wav'
|
|
351
365
|
}
|
|
352
366
|
|