@siteed/expo-audio-stream 1.17.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +26 -1
  2. package/README.md +1 -1
  3. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
  4. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
  5. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
  6. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
  7. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
  8. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
  9. package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
  10. package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
  11. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
  12. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
  13. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  14. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  15. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
  16. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  17. package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
  18. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  19. package/build/ExpoAudioStream.types.d.ts +47 -3
  20. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  21. package/build/ExpoAudioStream.types.js.map +1 -1
  22. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  23. package/build/ExpoAudioStream.web.js +0 -1
  24. package/build/ExpoAudioStream.web.js.map +1 -1
  25. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  26. package/build/ExpoAudioStreamModule.js +216 -12
  27. package/build/ExpoAudioStreamModule.js.map +1 -1
  28. package/build/WebRecorder.web.d.ts +67 -13
  29. package/build/WebRecorder.web.d.ts.map +1 -1
  30. package/build/WebRecorder.web.js +177 -173
  31. package/build/WebRecorder.web.js.map +1 -1
  32. package/build/index.d.ts +3 -3
  33. package/build/index.d.ts.map +1 -1
  34. package/build/index.js +2 -2
  35. package/build/index.js.map +1 -1
  36. package/build/useAudioRecorder.d.ts.map +1 -1
  37. package/build/useAudioRecorder.js +12 -8
  38. package/build/useAudioRecorder.js.map +1 -1
  39. package/build/utils/audioProcessing.d.ts +24 -0
  40. package/build/utils/audioProcessing.d.ts.map +1 -0
  41. package/build/utils/audioProcessing.js +133 -0
  42. package/build/utils/audioProcessing.js.map +1 -0
  43. package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
  44. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
  45. package/build/workers/InlineFeaturesExtractor.web.js +694 -194
  46. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
  47. package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
  48. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
  49. package/build/workers/inlineAudioWebWorker.web.js +3 -2
  50. package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
  51. package/ios/AudioAnalysisData.swift +51 -16
  52. package/ios/AudioProcessingHelpers.swift +710 -26
  53. package/ios/AudioProcessor.swift +334 -185
  54. package/ios/AudioStreamManager.swift +2 -3
  55. package/ios/DataPoint.swift +25 -12
  56. package/ios/DecodingConfig.swift +47 -0
  57. package/ios/ExpoAudioStreamModule.swift +187 -103
  58. package/ios/FFT.swift +62 -0
  59. package/ios/Features.swift +24 -3
  60. package/ios/RecordingSettings.swift +7 -7
  61. package/package.json +2 -1
  62. package/plugin/build/index.js +6 -1
  63. package/plugin/src/index.ts +9 -1
  64. package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
  65. package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
  66. package/src/ExpoAudioStream.types.ts +53 -7
  67. package/src/ExpoAudioStream.web.ts +0 -1
  68. package/src/ExpoAudioStreamModule.ts +255 -10
  69. package/src/WebRecorder.web.ts +231 -244
  70. package/src/index.ts +5 -3
  71. package/src/useAudioRecorder.tsx +14 -10
  72. package/src/utils/audioProcessing.ts +205 -0
  73. package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
  74. package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
@@ -1588,9 +1588,8 @@ class AudioStreamManager: NSObject {
1588
1588
  let processingResult = processor.processAudioBuffer(
1589
1589
  data: dataToProcess,
1590
1590
  sampleRate: Float(settings.sampleRate),
1591
- pointsPerSecond: settings.pointsPerSecond ?? 10,
1592
- algorithm: settings.algorithm ?? "rms",
1593
- featureOptions: settings.featureOptions ?? ["rms": true, "zcr": true],
1591
+ segmentDurationMs: settings.segmentDurationMs,
1592
+ featureOptions: settings.featureOptions ?? [:],
1594
1593
  bitDepth: settings.bitDepth,
1595
1594
  numberOfChannels: settings.numberOfChannels
1596
1595
  )
@@ -7,19 +7,31 @@
7
7
 
8
8
  import Foundation
9
9
 
10
+ public struct SpeechFeatures {
11
+ public var isActive: Bool
12
+ public var speakerId: Int?
13
+
14
+ func toDictionary() -> [String: Any] {
15
+ return [
16
+ "isActive": isActive,
17
+ "speakerId": speakerId as Any
18
+ ]
19
+ }
20
+ }
10
21
 
11
22
  public struct DataPoint {
12
23
  public var id: Int
13
24
  public var amplitude: Float
14
- public var activeSpeech: Bool?
15
- public var dB: Float?
16
- public var silent: Bool?
25
+ public var rms: Float
26
+ public var dB: Float
27
+ public var silent: Bool
17
28
  public var features: Features?
18
- public var startTime: Float?
19
- public var endTime: Float?
20
- public var startPosition: Int?
21
- public var endPosition: Int?
22
- public var speaker: Int?
29
+ public var speech: SpeechFeatures?
30
+ public let startTime: Float // in seconds
31
+ public let endTime: Float // in seconds
32
+ public let startPosition: Int // byte position in audio file
33
+ public let endPosition: Int // byte position in audio file
34
+ public let samples: Int // number of samples in segment
23
35
  }
24
36
 
25
37
  extension DataPoint {
@@ -27,15 +39,16 @@ extension DataPoint {
27
39
  return [
28
40
  "id": id,
29
41
  "amplitude": amplitude,
30
- "activeSpeech": activeSpeech ?? false,
31
- "dB": dB ?? 0,
32
- "silent": silent ?? false,
42
+ "rms": rms,
43
+ "dB": dB,
44
+ "silent": silent,
33
45
  "features": features?.toDictionary() ?? [:],
46
+ "speech": speech?.toDictionary() ?? [:],
34
47
  "startTime": startTime ?? 0,
35
48
  "endTime": endTime ?? 0,
36
49
  "startPosition": startPosition ?? 0,
37
50
  "endPosition": endPosition ?? 0,
38
- "speaker": speaker ?? 0
51
+ "samples": samples ?? 0
39
52
  ]
40
53
  }
41
54
  }
@@ -0,0 +1,47 @@
1
+ //
2
+ // DecodingConfig.swift
3
+ // Pods
4
+ //
5
+ // Created by Arthur Breton on 24/2/2025.
6
+ //
7
+
8
+ import AVFoundation
9
+
10
+ public struct DecodingConfig {
11
+ let targetSampleRate: Double?
12
+ let targetChannels: Int?
13
+ let targetBitDepth: Int?
14
+ let normalizeAudio: Bool
15
+
16
+ static func fromDictionary(_ dict: [String: Any]?) -> DecodingConfig {
17
+ guard let dict = dict else {
18
+ return DecodingConfig.default
19
+ }
20
+
21
+ return DecodingConfig(
22
+ targetSampleRate: dict["targetSampleRate"] as? Double,
23
+ targetChannels: dict["targetChannels"] as? Int,
24
+ targetBitDepth: dict["targetBitDepth"] as? Int,
25
+ normalizeAudio: dict["normalizeAudio"] as? Bool ?? false
26
+ )
27
+ }
28
+
29
+ static var `default`: DecodingConfig {
30
+ return DecodingConfig(
31
+ targetSampleRate: nil,
32
+ targetChannels: nil,
33
+ targetBitDepth: nil,
34
+ normalizeAudio: false
35
+ )
36
+ }
37
+
38
+ func toAudioFormat(baseFormat: AVAudioFormat) -> AVAudioFormat {
39
+ let sampleRate = targetSampleRate ?? baseFormat.sampleRate
40
+ let channels = targetChannels ?? Int(baseFormat.channelCount)
41
+
42
+ return AVAudioFormat(
43
+ standardFormatWithSampleRate: sampleRate,
44
+ channels: AVAudioChannelCount(channels)
45
+ )!
46
+ }
47
+ }
@@ -1,3 +1,4 @@
1
+ // packages/expo-audio-stream/ios/ExpoAudioStreamModule.swift
1
2
  import ExpoModulesCore
2
3
  import AVFoundation
3
4
 
@@ -31,72 +32,55 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
31
32
  /// - options: A dictionary containing:
32
33
  /// - `fileUri`: The URI of the audio file.
33
34
  /// - `pointsPerSecond`: The number of data points to extract per second of audio.
34
- /// - `algorithm`: The algorithm to use for extraction.
35
35
  /// - `features`: A dictionary specifying which features to extract (e.g., `energy`, `mfcc`, `rms`, etc.).
36
36
  /// - promise: A promise to resolve with the extracted audio analysis data or reject with an error.
37
37
  /// - Returns: Promise to be resolved with audio analysis data.
38
38
  AsyncFunction("extractAudioAnalysis") { (options: [String: Any], promise: Promise) in
39
39
  guard let fileUri = options["fileUri"] as? String,
40
- let url = URL(string: fileUri),
41
- let pointsPerSecond = options["pointsPerSecond"] as? Int,
42
- let algorithm = options["algorithm"] as? String else {
43
- promise.reject("INVALID_ARGUMENTS", "Invalid arguments provided")
40
+ let url = URL(string: fileUri) else {
41
+ promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
42
+ return
43
+ }
44
+
45
+ // Get time or byte range options
46
+ let startTimeMs = options["startTimeMs"] as? Double
47
+ let endTimeMs = options["endTimeMs"] as? Double
48
+ let position = options["position"] as? Int
49
+ let byteLength = options["length"] as? Int
50
+
51
+ // Validate ranges - can have time range OR byte range OR no range
52
+ let hasTimeRange = startTimeMs != nil && endTimeMs != nil
53
+ let hasByteRange = position != nil && byteLength != nil
54
+
55
+ // Only throw if both ranges are provided
56
+ guard !(hasTimeRange && hasByteRange) else {
57
+ promise.reject("INVALID_ARGUMENTS", "Cannot specify both time range and byte range")
44
58
  return
45
59
  }
46
60
 
47
61
  let features = options["features"] as? [String: Bool] ?? [:]
48
62
  let featureOptions = self.extractFeatureOptions(from: features)
63
+ let segmentDurationMs = options["segmentDurationMs"] as? Int ?? 100 // Default value of 100ms
49
64
 
50
- DispatchQueue.global().async {
65
+ DispatchQueue.global().async(execute: {
51
66
  do {
52
67
  let audioFile = try AVAudioFile(forReading: url)
53
68
  let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
54
69
  let numberOfChannels = Int(audioFile.fileFormat.channelCount)
70
+ let sampleRate = audioFile.fileFormat.sampleRate
55
71
 
56
- let audioProcessor = try AudioProcessor(url: url, resolve: { result in
57
- promise.resolve(result)
58
- }, reject: { code, message in
59
- promise.reject(code, message)
60
- })
72
+ // Convert time range to byte range if needed
73
+ let effectivePosition: Int?
74
+ let effectiveLength: Int?
61
75
 
62
- if let result = audioProcessor.processAudioData(numberOfSamples: nil, pointsPerSecond: pointsPerSecond, algorithm: algorithm, featureOptions: featureOptions, bitDepth: bitDepth, numberOfChannels: numberOfChannels) {
63
- promise.resolve(result.toDictionary())
76
+ if hasTimeRange {
77
+ let bytesPerSecond = Int(sampleRate) * numberOfChannels * (bitDepth / 8)
78
+ effectivePosition = Int(startTimeMs! * Double(bytesPerSecond) / 1000.0)
79
+ effectiveLength = Int((endTimeMs! - startTimeMs!) * Double(bytesPerSecond) / 1000.0)
64
80
  } else {
65
- promise.reject("PROCESSING_ERROR", "Failed to process audio data")
81
+ effectivePosition = position
82
+ effectiveLength = byteLength
66
83
  }
67
- } catch {
68
- promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
69
- }
70
- }
71
- }
72
-
73
- /// Extracts waveform data from an audio file.
74
- ///
75
- /// - Parameters:
76
- /// - options: A dictionary containing:
77
- /// - `fileUri`: The URI of the audio file.
78
- /// - `numberOfSamples`: The number of samples to extract for the waveform.
79
- /// - `offset`: The optional offset to start reading from. Defaults to 0 if not provided.
80
- /// - `length`: The optional length of the audio to read. Defaults to the entire file if not provided.
81
- /// - promise: A promise to resolve with the extracted waveform data or reject with an error.
82
- /// - Returns: Promise to be resolved with waveform data.
83
- AsyncFunction("extractWaveform") { (options: [String: Any], promise: Promise) in
84
- guard let fileUri = options["fileUri"] as? String,
85
- let url = URL(string: fileUri),
86
- let numberOfSamples = options["numberOfSamples"] as? Int else {
87
- promise.reject("INVALID_ARGUMENTS", "Invalid arguments provided")
88
- return
89
- }
90
-
91
- let offset = options["offset"] as? Int ?? 0
92
- DispatchQueue.global().async {
93
- do {
94
- let audioFile = try AVAudioFile(forReading: url)
95
- let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
96
- let numberOfChannels = Int(audioFile.fileFormat.channelCount)
97
-
98
- // If length is not provided, default to the entire file length
99
- let length = options["length"] as? UInt ?? UInt(audioFile.length - AVAudioFramePosition(offset))
100
84
 
101
85
  let audioProcessor = try AudioProcessor(url: url, resolve: { result in
102
86
  promise.resolve(result)
@@ -104,15 +88,25 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
104
88
  promise.reject(code, message)
105
89
  })
106
90
 
107
- if let result = audioProcessor.processAudioData(numberOfSamples: numberOfSamples, offset: offset, length: length, pointsPerSecond: nil, algorithm: "rms", featureOptions: [:], bitDepth: bitDepth, numberOfChannels: numberOfChannels) {
91
+ if let result = audioProcessor.processAudioData(
92
+ numberOfSamples: nil,
93
+ offset: 0,
94
+ length: nil,
95
+ segmentDurationMs: segmentDurationMs,
96
+ featureOptions: featureOptions,
97
+ bitDepth: bitDepth,
98
+ numberOfChannels: numberOfChannels,
99
+ position: effectivePosition,
100
+ byteLength: effectiveLength
101
+ ) {
108
102
  promise.resolve(result.toDictionary())
109
103
  } else {
110
- promise.reject("EXTRACTION_ERROR", "Failed to extract waveform")
104
+ promise.reject("PROCESSING_ERROR", "Failed to process audio data")
111
105
  }
112
106
  } catch {
113
- promise.reject("EXTRACTION_ERROR", "Failed to initialize waveform extractor: \(error.localizedDescription)")
107
+ promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
114
108
  }
115
- }
109
+ })
116
110
  }
117
111
 
118
112
 
@@ -315,57 +309,6 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
315
309
  }
316
310
  }
317
311
 
318
- /// Extracts audio features from an audio file.
319
- /// - Parameters:
320
- /// - options: A dictionary containing:
321
- /// - `fileUri`: The URI of the audio file.
322
- /// - `startTimeMs`: Optional start time in milliseconds.
323
- /// - `endTimeMs`: Optional end time in milliseconds.
324
- /// - `pointsPerSecond`: Number of points per second for analysis.
325
- /// - `algorithm`: The algorithm to use for extraction.
326
- /// - `featureOptions`: Features to extract.
327
- AsyncFunction("extractPreview") { (options: [String: Any], promise: Promise) in
328
- guard let fileUri = options["fileUri"] as? String,
329
- let url = URL(string: fileUri) else {
330
- promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
331
- return
332
- }
333
-
334
- let startTimeMs = options["startTimeMs"] as? Double
335
- let endTimeMs = options["endTimeMs"] as? Double
336
- let pointsPerSecond = options["pointsPerSecond"] as? Int ?? 20
337
- let algorithm = options["algorithm"] as? String ?? "rms"
338
- let featureOptions = options["featureOptions"] as? [String: Bool] ?? [:]
339
-
340
- DispatchQueue.global().async {
341
- do {
342
- let audioProcessor = try AudioProcessor(
343
- url: url,
344
- resolve: { result in
345
- promise.resolve(result)
346
- },
347
- reject: { code, message in
348
- promise.reject(code, message)
349
- }
350
- )
351
-
352
- if let result = audioProcessor.processAudioData(
353
- startTimeMs: startTimeMs,
354
- endTimeMs: endTimeMs,
355
- pointsPerSecond: pointsPerSecond,
356
- algorithm: algorithm,
357
- featureOptions: featureOptions
358
- ) {
359
- promise.resolve(result.toDictionary())
360
- } else {
361
- promise.reject("PROCESSING_ERROR", "Failed to process audio data")
362
- }
363
- } catch {
364
- promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
365
- }
366
- }
367
- }
368
-
369
312
  /// Trims an audio file to specified start and end times.
370
313
  /// - Parameters:
371
314
  /// - options: A dictionary containing:
@@ -414,6 +357,142 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
414
357
  }
415
358
  }
416
359
  }
360
+
361
+ /// Extracts raw PCM audio data from a file with time or byte range support
362
+ /// - Parameters:
363
+ /// - options: A dictionary containing:
364
+ /// - `fileUri`: The URI of the audio file
365
+ /// - `startTimeMs`: Optional start time in milliseconds
366
+ /// - `endTimeMs`: Optional end time in milliseconds
367
+ /// - `position`: Optional byte position
368
+ /// - `length`: Optional byte length
369
+ /// - `includeNormalizedData`: Boolean to include normalized audio data in [-1, 1] range
370
+ /// - `includeWavHeader`: Boolean to include WAV header in the PCM data
371
+ /// - `decodingOptions`: Decoding configuration
372
+ /// - `includeBase64Data`: Boolean to include base64 encoded string representation of the audio data
373
+ /// - `computeChecksum`: Boolean to compute and include CRC32 checksum of the PCM data
374
+ AsyncFunction("extractAudioData") { (options: [String: Any], promise: Promise) in
375
+ guard let fileUri = options["fileUri"] as? String,
376
+ let url = URL(string: fileUri) else {
377
+ promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
378
+ return
379
+ }
380
+
381
+ // Get time or byte range options
382
+ let startTimeMs = options["startTimeMs"] as? Double
383
+ let endTimeMs = options["endTimeMs"] as? Double
384
+ let position = options["position"] as? Int
385
+ let length = options["length"] as? Int
386
+ let includeWavHeader = options["includeWavHeader"] as? Bool ?? false
387
+
388
+ // Validate that we have either time range or byte range, but not both and not neither
389
+ let hasTimeRange = startTimeMs != nil && endTimeMs != nil
390
+ let hasByteRange = position != nil && length != nil
391
+
392
+ guard hasTimeRange || hasByteRange else {
393
+ promise.reject("INVALID_ARGUMENTS", "Must specify either time range (startTimeMs, endTimeMs) or byte range (position, length)")
394
+ return
395
+ }
396
+
397
+ guard !(hasTimeRange && hasByteRange) else {
398
+ promise.reject("INVALID_ARGUMENTS", "Cannot specify both time range and byte range")
399
+ return
400
+ }
401
+
402
+ do {
403
+ let audioFile = try AVAudioFile(forReading: url)
404
+ let format = audioFile.processingFormat
405
+ let sampleRate = format.sampleRate
406
+ let channels = Int(format.channelCount)
407
+ let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
408
+
409
+ // Calculate frame positions
410
+ let startFrame: AVAudioFramePosition
411
+ let endFrame: AVAudioFramePosition
412
+
413
+ if hasTimeRange {
414
+ startFrame = AVAudioFramePosition(startTimeMs! * sampleRate / 1000.0)
415
+ endFrame = AVAudioFramePosition(endTimeMs! * sampleRate / 1000.0)
416
+ } else {
417
+ // Convert byte position to frame position
418
+ let bytesPerFrame = Int64(channels * (bitDepth / 8))
419
+ startFrame = AVAudioFramePosition(position!) / bytesPerFrame
420
+ endFrame = startFrame + (AVAudioFramePosition(length!) / bytesPerFrame)
421
+ }
422
+
423
+ // Validate frame range
424
+ guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
425
+ promise.reject("INVALID_RANGE", "Invalid range specified")
426
+ return
427
+ }
428
+
429
+ let frameCount = AVAudioFrameCount(endFrame - startFrame)
430
+
431
+ // Create decoding config that includes normalization preference
432
+ var decodingOptions = options["decodingOptions"] as? [String: Any] ?? [:]
433
+ let includeNormalizedData = options["includeNormalizedData"] as? Bool ?? false
434
+
435
+ // Pass both options separately - normalizeAudio from decodingOptions, and includeNormalizedData as is
436
+ let decodingConfig = DecodingConfig.fromDictionary(decodingOptions)
437
+
438
+ let (pcmData, normalizedData, base64Data) = try extractRawAudioData(
439
+ from: url,
440
+ startFrame: startFrame,
441
+ frameCount: frameCount,
442
+ format: format,
443
+ decodingConfig: decodingConfig,
444
+ includeNormalizedData: includeNormalizedData,
445
+ includeBase64Data: options["includeBase64Data"] as? Bool ?? false
446
+ )
447
+
448
+ var resultDict: [String: Any] = [:]
449
+
450
+ if includeWavHeader {
451
+ // Create WAV header and prepend it to the PCM data
452
+ let wavData = createWavHeader(
453
+ pcmData: pcmData,
454
+ sampleRate: Int(sampleRate),
455
+ channels: channels,
456
+ bitDepth: bitDepth
457
+ )
458
+ resultDict["pcmData"] = wavData
459
+ resultDict["hasWavHeader"] = true
460
+ } else {
461
+ resultDict["pcmData"] = pcmData
462
+ resultDict["hasWavHeader"] = false
463
+ }
464
+
465
+ // Add the rest of the data
466
+ resultDict["sampleRate"] = Int(sampleRate)
467
+ resultDict["channels"] = channels
468
+ resultDict["bitDepth"] = bitDepth
469
+ resultDict["durationMs"] = Int(Double(frameCount) * 1000.0 / sampleRate)
470
+ resultDict["format"] = "pcm_\(bitDepth)bit"
471
+ resultDict["samples"] = Int(frameCount) * channels
472
+
473
+ // Add normalized data if requested, regardless of normalization setting
474
+ if includeNormalizedData {
475
+ resultDict["normalizedData"] = normalizedData
476
+ }
477
+
478
+ // Add checksum if requested
479
+ if options["computeChecksum"] as? Bool == true {
480
+ let checksum = calculateCRC32(data: pcmData)
481
+ resultDict["checksum"] = Int(checksum)
482
+
483
+ Logger.debug("Computed CRC32 checksum: \(checksum)")
484
+ }
485
+
486
+ if let includeBase64Data = options["includeBase64Data"] as? Bool, includeBase64Data {
487
+ resultDict["base64Data"] = base64Data
488
+ }
489
+
490
+ promise.resolve(resultDict)
491
+
492
+ } catch {
493
+ promise.reject("PROCESSING_ERROR", "Failed to process audio file: \(error.localizedDescription)")
494
+ }
495
+ }
417
496
  }
418
497
 
419
498
  func audioStreamManager(_ manager: AudioStreamManager, didReceiveInterruption info: [String: Any]) {
@@ -582,7 +661,12 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
582
661
  "spectralBandwidth": options["spectralBandwidth"] as? Bool ?? false,
583
662
  "chromagram": options["chromagram"] as? Bool ?? false,
584
663
  "tempo": options["tempo"] as? Bool ?? false,
585
- "hnr": options["hnr"] as? Bool ?? false
664
+ "hnr": options["hnr"] as? Bool ?? false,
665
+ "melSpectrogram": options["melSpectrogram"] as? Bool ?? false,
666
+ "spectralContrast": options["spectralContrast"] as? Bool ?? false,
667
+ "tonnetz": options["tonnetz"] as? Bool ?? false,
668
+ "pitch": options["pitch"] as? Bool ?? false,
669
+ "crc32": options["crc32"] as? Bool ?? false
586
670
  ]
587
671
  }
588
672
 
package/ios/FFT.swift ADDED
@@ -0,0 +1,62 @@
1
+ //
2
+ // FFT.swift
3
+ // Pods
4
+ //
5
+ // Created by Arthur Breton on 20/2/2025.
6
+ //
7
+
8
+ import Accelerate
9
+
10
+ class FFT {
11
+ private let length: Int
12
+ private var setup: vDSP_DFT_Setup?
13
+
14
+ init(_ length: Int) {
15
+ self.length = length
16
+ self.setup = vDSP_DFT_zop_CreateSetup(
17
+ nil,
18
+ vDSP_Length(length),
19
+ vDSP_DFT_Direction.FORWARD
20
+ )
21
+ }
22
+
23
+ deinit {
24
+ if let setup = setup {
25
+ vDSP_DFT_DestroySetup(setup)
26
+ }
27
+ }
28
+
29
+ func realForward(_ data: inout [Float]) {
30
+ var realIn = data
31
+ var imagIn = [Float](repeating: 0.0, count: length)
32
+ var realOut = [Float](repeating: 0.0, count: length)
33
+ var imagOut = [Float](repeating: 0.0, count: length)
34
+
35
+ // Perform FFT
36
+ vDSP_DFT_Execute(setup!,
37
+ &realIn,
38
+ &imagIn,
39
+ &realOut,
40
+ &imagOut)
41
+
42
+ // Ensure data array has enough space for both real and imaginary parts
43
+ if data.count < 2 * length {
44
+ data.append(contentsOf: [Float](repeating: 0.0, count: 2 * length - data.count))
45
+ }
46
+
47
+ // Combine real and imaginary parts
48
+ for i in 0..<length {
49
+ let j = i * 2
50
+ data[j] = realOut[i]
51
+ data[j + 1] = imagOut[i]
52
+ }
53
+ }
54
+
55
+ func processSegment(_ segment: [Float]) -> [Float] {
56
+ var fftData = segment.count < length ?
57
+ segment + [Float](repeating: 0, count: length - segment.count) :
58
+ Array(segment.prefix(length))
59
+ realForward(&fftData)
60
+ return fftData
61
+ }
62
+ }
@@ -21,6 +21,11 @@ public struct Features {
21
21
  var chromagram: [Float]?
22
22
  var tempo: Float?
23
23
  var hnr: Float?
24
+ var melSpectrogram: [Float]?
25
+ var spectralContrast: [Float]?
26
+ var tonnetz: [Float]?
27
+ var pitch: Float?
28
+ var crc32: UInt32?
24
29
 
25
30
  init(
26
31
  energy: Float = 0,
@@ -35,7 +40,12 @@ public struct Features {
35
40
  spectralBandwidth: Float? = nil,
36
41
  chromagram: [Float]? = nil,
37
42
  tempo: Float? = nil,
38
- hnr: Float? = nil
43
+ hnr: Float? = nil,
44
+ melSpectrogram: [Float]? = nil,
45
+ spectralContrast: [Float]? = nil,
46
+ tonnetz: [Float]? = nil,
47
+ pitch: Float? = nil,
48
+ crc32: UInt32? = nil
39
49
  ) {
40
50
  self.energy = energy
41
51
  self.mfcc = mfcc
@@ -50,12 +60,17 @@ public struct Features {
50
60
  self.chromagram = chromagram
51
61
  self.tempo = tempo
52
62
  self.hnr = hnr
63
+ self.melSpectrogram = melSpectrogram
64
+ self.spectralContrast = spectralContrast
65
+ self.tonnetz = tonnetz
66
+ self.pitch = pitch
67
+ self.crc32 = crc32
53
68
  }
54
69
  }
55
70
 
56
71
  extension Features {
57
72
  func toDictionary() -> [String: Any] {
58
- return [
73
+ var dict: [String: Any] = [
59
74
  "energy": energy,
60
75
  "mfcc": mfcc,
61
76
  "rms": rms,
@@ -68,7 +83,13 @@ extension Features {
68
83
  "spectralBandwidth": spectralBandwidth ?? 0,
69
84
  "chromagram": chromagram ?? [],
70
85
  "tempo": tempo ?? 0,
71
- "hnr": hnr ?? 0
86
+ "hnr": hnr ?? 0,
87
+ "melSpectrogram": melSpectrogram ?? [],
88
+ "spectralContrast": spectralContrast ?? [],
89
+ "tonnetz": tonnetz ?? [],
90
+ "pitch": pitch ?? 0,
91
+ "crc32": crc32 ?? 0
72
92
  ]
93
+ return dict
73
94
  }
74
95
  }
@@ -88,9 +88,7 @@ struct RecordingSettings {
88
88
  var showNotification: Bool = false
89
89
  var enableProcessing: Bool = false
90
90
 
91
- // Analysis settings
92
- var pointsPerSecond: Int? = 1000
93
- var algorithm: String? = "rms"
91
+ // Remove pointsPerSecond and algorithm
94
92
  var featureOptions: [String: Bool]? = ["rms": true, "zcr": true]
95
93
 
96
94
  // iOS-specific configuration
@@ -105,10 +103,12 @@ struct RecordingSettings {
105
103
 
106
104
  let autoResumeAfterInterruption: Bool
107
105
 
108
- // Make these optional with nil default values
109
106
  var outputDirectory: String? = nil
110
107
  var filename: String? = nil
111
108
 
109
+ // Update default to 100ms
110
+ var segmentDurationMs: Int = 100 // Default 100ms segments
111
+
112
112
  static func fromDictionary(_ dict: [String: Any]) -> Result<RecordingSettings, Error> {
113
113
  // Extract compression settings
114
114
  let compression = dict["compression"] as? [String: Any]
@@ -148,11 +148,11 @@ struct RecordingSettings {
148
148
  settings.showNotification = dict["showNotification"] as? Bool ?? false
149
149
  settings.enableProcessing = dict["enableProcessing"] as? Bool ?? false
150
150
 
151
- // Parse analysis settings
152
- settings.pointsPerSecond = dict["pointsPerSecond"] as? Int
153
- settings.algorithm = dict["algorithm"] as? String
154
151
  settings.featureOptions = dict["features"] as? [String: Bool]
155
152
 
153
+ // Update segmentDurationMs parsing
154
+ settings.segmentDurationMs = dict["segmentDurationMs"] as? Int ?? 100
155
+
156
156
  // Parse iOS-specific config
157
157
  if let iosDict = dict["ios"] as? [String: Any],
158
158
  let audioSessionDict = iosDict["audioSession"] as? [String: Any] {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@siteed/expo-audio-stream",
3
- "version": "1.17.0",
3
+ "version": "2.0.1",
4
4
  "description": "stream audio crossplatform",
5
5
  "license": "MIT",
6
6
  "main": "build/index.js",
@@ -105,6 +105,7 @@
105
105
  },
106
106
  "dependencies": {
107
107
  "@siteed/design-system": "^0.35.1",
108
+ "crc-32": "^1.2.2",
108
109
  "expo-modules-core": "~2.1.4"
109
110
  }
110
111
  }
@@ -48,7 +48,12 @@ const withRecordingPermission = (config, props) => {
48
48
  if (options.iosBackgroundModes?.useAudio === true &&
49
49
  enableBackgroundAudio === true &&
50
50
  !existingBackgroundModes.includes('audio')) {
51
- existingBackgroundModes.push('audio');
51
+ // Don't automatically add 'audio' background mode as it's only for playback
52
+ // existingBackgroundModes.push('audio')
53
+ // Instead, ensure processing mode is used for background recording
54
+ if (options.iosBackgroundModes?.useProcessing !== true) {
55
+ console.warn(`${LOG_PREFIX} Warning: Background audio recording requires 'processing' background mode. Please enable 'useProcessing' in iosBackgroundModes.`);
56
+ }
52
57
  }
53
58
  if (options.iosBackgroundModes?.useVoIP === true &&
54
59
  enablePhoneStateHandling === true) {