@siteed/expo-audio-stream 1.17.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -1
- package/README.md +1 -1
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
- package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
- package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/ExpoAudioStream.types.d.ts +47 -3
- package/build/ExpoAudioStream.types.d.ts.map +1 -1
- package/build/ExpoAudioStream.types.js.map +1 -1
- package/build/ExpoAudioStream.web.d.ts.map +1 -1
- package/build/ExpoAudioStream.web.js +0 -1
- package/build/ExpoAudioStream.web.js.map +1 -1
- package/build/ExpoAudioStreamModule.d.ts.map +1 -1
- package/build/ExpoAudioStreamModule.js +216 -12
- package/build/ExpoAudioStreamModule.js.map +1 -1
- package/build/WebRecorder.web.d.ts +67 -13
- package/build/WebRecorder.web.d.ts.map +1 -1
- package/build/WebRecorder.web.js +177 -173
- package/build/WebRecorder.web.js.map +1 -1
- package/build/index.d.ts +3 -3
- package/build/index.d.ts.map +1 -1
- package/build/index.js +2 -2
- package/build/index.js.map +1 -1
- package/build/useAudioRecorder.d.ts.map +1 -1
- package/build/useAudioRecorder.js +12 -8
- package/build/useAudioRecorder.js.map +1 -1
- package/build/utils/audioProcessing.d.ts +24 -0
- package/build/utils/audioProcessing.d.ts.map +1 -0
- package/build/utils/audioProcessing.js +133 -0
- package/build/utils/audioProcessing.js.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
- package/build/workers/InlineFeaturesExtractor.web.js +694 -194
- package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
- package/build/workers/inlineAudioWebWorker.web.js +3 -2
- package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
- package/ios/AudioAnalysisData.swift +51 -16
- package/ios/AudioProcessingHelpers.swift +710 -26
- package/ios/AudioProcessor.swift +334 -185
- package/ios/AudioStreamManager.swift +2 -3
- package/ios/DataPoint.swift +25 -12
- package/ios/DecodingConfig.swift +47 -0
- package/ios/ExpoAudioStreamModule.swift +187 -103
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +24 -3
- package/ios/RecordingSettings.swift +7 -7
- package/package.json +2 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
- package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
- package/src/ExpoAudioStream.types.ts +53 -7
- package/src/ExpoAudioStream.web.ts +0 -1
- package/src/ExpoAudioStreamModule.ts +255 -10
- package/src/WebRecorder.web.ts +231 -244
- package/src/index.ts +5 -3
- package/src/useAudioRecorder.tsx +14 -10
- package/src/utils/audioProcessing.ts +205 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
- package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
|
@@ -1588,9 +1588,8 @@ class AudioStreamManager: NSObject {
|
|
|
1588
1588
|
let processingResult = processor.processAudioBuffer(
|
|
1589
1589
|
data: dataToProcess,
|
|
1590
1590
|
sampleRate: Float(settings.sampleRate),
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
featureOptions: settings.featureOptions ?? ["rms": true, "zcr": true],
|
|
1591
|
+
segmentDurationMs: settings.segmentDurationMs,
|
|
1592
|
+
featureOptions: settings.featureOptions ?? [:],
|
|
1594
1593
|
bitDepth: settings.bitDepth,
|
|
1595
1594
|
numberOfChannels: settings.numberOfChannels
|
|
1596
1595
|
)
|
package/ios/DataPoint.swift
CHANGED
|
@@ -7,19 +7,31 @@
|
|
|
7
7
|
|
|
8
8
|
import Foundation
|
|
9
9
|
|
|
10
|
+
public struct SpeechFeatures {
|
|
11
|
+
public var isActive: Bool
|
|
12
|
+
public var speakerId: Int?
|
|
13
|
+
|
|
14
|
+
func toDictionary() -> [String: Any] {
|
|
15
|
+
return [
|
|
16
|
+
"isActive": isActive,
|
|
17
|
+
"speakerId": speakerId as Any
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
}
|
|
10
21
|
|
|
11
22
|
public struct DataPoint {
|
|
12
23
|
public var id: Int
|
|
13
24
|
public var amplitude: Float
|
|
14
|
-
public var
|
|
15
|
-
public var dB: Float
|
|
16
|
-
public var silent: Bool
|
|
25
|
+
public var rms: Float
|
|
26
|
+
public var dB: Float
|
|
27
|
+
public var silent: Bool
|
|
17
28
|
public var features: Features?
|
|
18
|
-
public var
|
|
19
|
-
public
|
|
20
|
-
public
|
|
21
|
-
public
|
|
22
|
-
public
|
|
29
|
+
public var speech: SpeechFeatures?
|
|
30
|
+
public let startTime: Float // in seconds
|
|
31
|
+
public let endTime: Float // in seconds
|
|
32
|
+
public let startPosition: Int // byte position in audio file
|
|
33
|
+
public let endPosition: Int // byte position in audio file
|
|
34
|
+
public let samples: Int // number of samples in segment
|
|
23
35
|
}
|
|
24
36
|
|
|
25
37
|
extension DataPoint {
|
|
@@ -27,15 +39,16 @@ extension DataPoint {
|
|
|
27
39
|
return [
|
|
28
40
|
"id": id,
|
|
29
41
|
"amplitude": amplitude,
|
|
30
|
-
"
|
|
31
|
-
"dB": dB
|
|
32
|
-
"silent": silent
|
|
42
|
+
"rms": rms,
|
|
43
|
+
"dB": dB,
|
|
44
|
+
"silent": silent,
|
|
33
45
|
"features": features?.toDictionary() ?? [:],
|
|
46
|
+
"speech": speech?.toDictionary() ?? [:],
|
|
34
47
|
"startTime": startTime ?? 0,
|
|
35
48
|
"endTime": endTime ?? 0,
|
|
36
49
|
"startPosition": startPosition ?? 0,
|
|
37
50
|
"endPosition": endPosition ?? 0,
|
|
38
|
-
"
|
|
51
|
+
"samples": samples ?? 0
|
|
39
52
|
]
|
|
40
53
|
}
|
|
41
54
|
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
//
|
|
2
|
+
// DecodingConfig.swift
|
|
3
|
+
// Pods
|
|
4
|
+
//
|
|
5
|
+
// Created by Arthur Breton on 24/2/2025.
|
|
6
|
+
//
|
|
7
|
+
|
|
8
|
+
import AVFoundation
|
|
9
|
+
|
|
10
|
+
public struct DecodingConfig {
|
|
11
|
+
let targetSampleRate: Double?
|
|
12
|
+
let targetChannels: Int?
|
|
13
|
+
let targetBitDepth: Int?
|
|
14
|
+
let normalizeAudio: Bool
|
|
15
|
+
|
|
16
|
+
static func fromDictionary(_ dict: [String: Any]?) -> DecodingConfig {
|
|
17
|
+
guard let dict = dict else {
|
|
18
|
+
return DecodingConfig.default
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return DecodingConfig(
|
|
22
|
+
targetSampleRate: dict["targetSampleRate"] as? Double,
|
|
23
|
+
targetChannels: dict["targetChannels"] as? Int,
|
|
24
|
+
targetBitDepth: dict["targetBitDepth"] as? Int,
|
|
25
|
+
normalizeAudio: dict["normalizeAudio"] as? Bool ?? false
|
|
26
|
+
)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
static var `default`: DecodingConfig {
|
|
30
|
+
return DecodingConfig(
|
|
31
|
+
targetSampleRate: nil,
|
|
32
|
+
targetChannels: nil,
|
|
33
|
+
targetBitDepth: nil,
|
|
34
|
+
normalizeAudio: false
|
|
35
|
+
)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
func toAudioFormat(baseFormat: AVAudioFormat) -> AVAudioFormat {
|
|
39
|
+
let sampleRate = targetSampleRate ?? baseFormat.sampleRate
|
|
40
|
+
let channels = targetChannels ?? Int(baseFormat.channelCount)
|
|
41
|
+
|
|
42
|
+
return AVAudioFormat(
|
|
43
|
+
standardFormatWithSampleRate: sampleRate,
|
|
44
|
+
channels: AVAudioChannelCount(channels)
|
|
45
|
+
)!
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// packages/expo-audio-stream/ios/ExpoAudioStreamModule.swift
|
|
1
2
|
import ExpoModulesCore
|
|
2
3
|
import AVFoundation
|
|
3
4
|
|
|
@@ -31,72 +32,55 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
31
32
|
/// - options: A dictionary containing:
|
|
32
33
|
/// - `fileUri`: The URI of the audio file.
|
|
33
34
|
/// - `pointsPerSecond`: The number of data points to extract per second of audio.
|
|
34
|
-
/// - `algorithm`: The algorithm to use for extraction.
|
|
35
35
|
/// - `features`: A dictionary specifying which features to extract (e.g., `energy`, `mfcc`, `rms`, etc.).
|
|
36
36
|
/// - promise: A promise to resolve with the extracted audio analysis data or reject with an error.
|
|
37
37
|
/// - Returns: Promise to be resolved with audio analysis data.
|
|
38
38
|
AsyncFunction("extractAudioAnalysis") { (options: [String: Any], promise: Promise) in
|
|
39
39
|
guard let fileUri = options["fileUri"] as? String,
|
|
40
|
-
let url = URL(string: fileUri)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
let url = URL(string: fileUri) else {
|
|
41
|
+
promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
|
|
42
|
+
return
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Get time or byte range options
|
|
46
|
+
let startTimeMs = options["startTimeMs"] as? Double
|
|
47
|
+
let endTimeMs = options["endTimeMs"] as? Double
|
|
48
|
+
let position = options["position"] as? Int
|
|
49
|
+
let byteLength = options["length"] as? Int
|
|
50
|
+
|
|
51
|
+
// Validate ranges - can have time range OR byte range OR no range
|
|
52
|
+
let hasTimeRange = startTimeMs != nil && endTimeMs != nil
|
|
53
|
+
let hasByteRange = position != nil && byteLength != nil
|
|
54
|
+
|
|
55
|
+
// Only throw if both ranges are provided
|
|
56
|
+
guard !(hasTimeRange && hasByteRange) else {
|
|
57
|
+
promise.reject("INVALID_ARGUMENTS", "Cannot specify both time range and byte range")
|
|
44
58
|
return
|
|
45
59
|
}
|
|
46
60
|
|
|
47
61
|
let features = options["features"] as? [String: Bool] ?? [:]
|
|
48
62
|
let featureOptions = self.extractFeatureOptions(from: features)
|
|
63
|
+
let segmentDurationMs = options["segmentDurationMs"] as? Int ?? 100 // Default value of 100ms
|
|
49
64
|
|
|
50
|
-
DispatchQueue.global().async {
|
|
65
|
+
DispatchQueue.global().async(execute: {
|
|
51
66
|
do {
|
|
52
67
|
let audioFile = try AVAudioFile(forReading: url)
|
|
53
68
|
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
54
69
|
let numberOfChannels = Int(audioFile.fileFormat.channelCount)
|
|
70
|
+
let sampleRate = audioFile.fileFormat.sampleRate
|
|
55
71
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
promise.reject(code, message)
|
|
60
|
-
})
|
|
72
|
+
// Convert time range to byte range if needed
|
|
73
|
+
let effectivePosition: Int?
|
|
74
|
+
let effectiveLength: Int?
|
|
61
75
|
|
|
62
|
-
if
|
|
63
|
-
|
|
76
|
+
if hasTimeRange {
|
|
77
|
+
let bytesPerSecond = Int(sampleRate) * numberOfChannels * (bitDepth / 8)
|
|
78
|
+
effectivePosition = Int(startTimeMs! * Double(bytesPerSecond) / 1000.0)
|
|
79
|
+
effectiveLength = Int((endTimeMs! - startTimeMs!) * Double(bytesPerSecond) / 1000.0)
|
|
64
80
|
} else {
|
|
65
|
-
|
|
81
|
+
effectivePosition = position
|
|
82
|
+
effectiveLength = byteLength
|
|
66
83
|
}
|
|
67
|
-
} catch {
|
|
68
|
-
promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/// Extracts waveform data from an audio file.
|
|
74
|
-
///
|
|
75
|
-
/// - Parameters:
|
|
76
|
-
/// - options: A dictionary containing:
|
|
77
|
-
/// - `fileUri`: The URI of the audio file.
|
|
78
|
-
/// - `numberOfSamples`: The number of samples to extract for the waveform.
|
|
79
|
-
/// - `offset`: The optional offset to start reading from. Defaults to 0 if not provided.
|
|
80
|
-
/// - `length`: The optional length of the audio to read. Defaults to the entire file if not provided.
|
|
81
|
-
/// - promise: A promise to resolve with the extracted waveform data or reject with an error.
|
|
82
|
-
/// - Returns: Promise to be resolved with waveform data.
|
|
83
|
-
AsyncFunction("extractWaveform") { (options: [String: Any], promise: Promise) in
|
|
84
|
-
guard let fileUri = options["fileUri"] as? String,
|
|
85
|
-
let url = URL(string: fileUri),
|
|
86
|
-
let numberOfSamples = options["numberOfSamples"] as? Int else {
|
|
87
|
-
promise.reject("INVALID_ARGUMENTS", "Invalid arguments provided")
|
|
88
|
-
return
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
let offset = options["offset"] as? Int ?? 0
|
|
92
|
-
DispatchQueue.global().async {
|
|
93
|
-
do {
|
|
94
|
-
let audioFile = try AVAudioFile(forReading: url)
|
|
95
|
-
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
96
|
-
let numberOfChannels = Int(audioFile.fileFormat.channelCount)
|
|
97
|
-
|
|
98
|
-
// If length is not provided, default to the entire file length
|
|
99
|
-
let length = options["length"] as? UInt ?? UInt(audioFile.length - AVAudioFramePosition(offset))
|
|
100
84
|
|
|
101
85
|
let audioProcessor = try AudioProcessor(url: url, resolve: { result in
|
|
102
86
|
promise.resolve(result)
|
|
@@ -104,15 +88,25 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
104
88
|
promise.reject(code, message)
|
|
105
89
|
})
|
|
106
90
|
|
|
107
|
-
if let result = audioProcessor.processAudioData(
|
|
91
|
+
if let result = audioProcessor.processAudioData(
|
|
92
|
+
numberOfSamples: nil,
|
|
93
|
+
offset: 0,
|
|
94
|
+
length: nil,
|
|
95
|
+
segmentDurationMs: segmentDurationMs,
|
|
96
|
+
featureOptions: featureOptions,
|
|
97
|
+
bitDepth: bitDepth,
|
|
98
|
+
numberOfChannels: numberOfChannels,
|
|
99
|
+
position: effectivePosition,
|
|
100
|
+
byteLength: effectiveLength
|
|
101
|
+
) {
|
|
108
102
|
promise.resolve(result.toDictionary())
|
|
109
103
|
} else {
|
|
110
|
-
promise.reject("
|
|
104
|
+
promise.reject("PROCESSING_ERROR", "Failed to process audio data")
|
|
111
105
|
}
|
|
112
106
|
} catch {
|
|
113
|
-
promise.reject("
|
|
107
|
+
promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
|
|
114
108
|
}
|
|
115
|
-
}
|
|
109
|
+
})
|
|
116
110
|
}
|
|
117
111
|
|
|
118
112
|
|
|
@@ -315,57 +309,6 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
315
309
|
}
|
|
316
310
|
}
|
|
317
311
|
|
|
318
|
-
/// Extracts audio features from an audio file.
|
|
319
|
-
/// - Parameters:
|
|
320
|
-
/// - options: A dictionary containing:
|
|
321
|
-
/// - `fileUri`: The URI of the audio file.
|
|
322
|
-
/// - `startTimeMs`: Optional start time in milliseconds.
|
|
323
|
-
/// - `endTimeMs`: Optional end time in milliseconds.
|
|
324
|
-
/// - `pointsPerSecond`: Number of points per second for analysis.
|
|
325
|
-
/// - `algorithm`: The algorithm to use for extraction.
|
|
326
|
-
/// - `featureOptions`: Features to extract.
|
|
327
|
-
AsyncFunction("extractPreview") { (options: [String: Any], promise: Promise) in
|
|
328
|
-
guard let fileUri = options["fileUri"] as? String,
|
|
329
|
-
let url = URL(string: fileUri) else {
|
|
330
|
-
promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
|
|
331
|
-
return
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
let startTimeMs = options["startTimeMs"] as? Double
|
|
335
|
-
let endTimeMs = options["endTimeMs"] as? Double
|
|
336
|
-
let pointsPerSecond = options["pointsPerSecond"] as? Int ?? 20
|
|
337
|
-
let algorithm = options["algorithm"] as? String ?? "rms"
|
|
338
|
-
let featureOptions = options["featureOptions"] as? [String: Bool] ?? [:]
|
|
339
|
-
|
|
340
|
-
DispatchQueue.global().async {
|
|
341
|
-
do {
|
|
342
|
-
let audioProcessor = try AudioProcessor(
|
|
343
|
-
url: url,
|
|
344
|
-
resolve: { result in
|
|
345
|
-
promise.resolve(result)
|
|
346
|
-
},
|
|
347
|
-
reject: { code, message in
|
|
348
|
-
promise.reject(code, message)
|
|
349
|
-
}
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
if let result = audioProcessor.processAudioData(
|
|
353
|
-
startTimeMs: startTimeMs,
|
|
354
|
-
endTimeMs: endTimeMs,
|
|
355
|
-
pointsPerSecond: pointsPerSecond,
|
|
356
|
-
algorithm: algorithm,
|
|
357
|
-
featureOptions: featureOptions
|
|
358
|
-
) {
|
|
359
|
-
promise.resolve(result.toDictionary())
|
|
360
|
-
} else {
|
|
361
|
-
promise.reject("PROCESSING_ERROR", "Failed to process audio data")
|
|
362
|
-
}
|
|
363
|
-
} catch {
|
|
364
|
-
promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
|
|
369
312
|
/// Trims an audio file to specified start and end times.
|
|
370
313
|
/// - Parameters:
|
|
371
314
|
/// - options: A dictionary containing:
|
|
@@ -414,6 +357,142 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
414
357
|
}
|
|
415
358
|
}
|
|
416
359
|
}
|
|
360
|
+
|
|
361
|
+
/// Extracts raw PCM audio data from a file with time or byte range support
|
|
362
|
+
/// - Parameters:
|
|
363
|
+
/// - options: A dictionary containing:
|
|
364
|
+
/// - `fileUri`: The URI of the audio file
|
|
365
|
+
/// - `startTimeMs`: Optional start time in milliseconds
|
|
366
|
+
/// - `endTimeMs`: Optional end time in milliseconds
|
|
367
|
+
/// - `position`: Optional byte position
|
|
368
|
+
/// - `length`: Optional byte length
|
|
369
|
+
/// - `includeNormalizedData`: Boolean to include normalized audio data in [-1, 1] range
|
|
370
|
+
/// - `includeWavHeader`: Boolean to include WAV header in the PCM data
|
|
371
|
+
/// - `decodingOptions`: Decoding configuration
|
|
372
|
+
/// - `includeBase64Data`: Boolean to include base64 encoded string representation of the audio data
|
|
373
|
+
/// - `computeChecksum`: Boolean to compute and include CRC32 checksum of the PCM data
|
|
374
|
+
AsyncFunction("extractAudioData") { (options: [String: Any], promise: Promise) in
|
|
375
|
+
guard let fileUri = options["fileUri"] as? String,
|
|
376
|
+
let url = URL(string: fileUri) else {
|
|
377
|
+
promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
|
|
378
|
+
return
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Get time or byte range options
|
|
382
|
+
let startTimeMs = options["startTimeMs"] as? Double
|
|
383
|
+
let endTimeMs = options["endTimeMs"] as? Double
|
|
384
|
+
let position = options["position"] as? Int
|
|
385
|
+
let length = options["length"] as? Int
|
|
386
|
+
let includeWavHeader = options["includeWavHeader"] as? Bool ?? false
|
|
387
|
+
|
|
388
|
+
// Validate that we have either time range or byte range, but not both and not neither
|
|
389
|
+
let hasTimeRange = startTimeMs != nil && endTimeMs != nil
|
|
390
|
+
let hasByteRange = position != nil && length != nil
|
|
391
|
+
|
|
392
|
+
guard hasTimeRange || hasByteRange else {
|
|
393
|
+
promise.reject("INVALID_ARGUMENTS", "Must specify either time range (startTimeMs, endTimeMs) or byte range (position, length)")
|
|
394
|
+
return
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
guard !(hasTimeRange && hasByteRange) else {
|
|
398
|
+
promise.reject("INVALID_ARGUMENTS", "Cannot specify both time range and byte range")
|
|
399
|
+
return
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
do {
|
|
403
|
+
let audioFile = try AVAudioFile(forReading: url)
|
|
404
|
+
let format = audioFile.processingFormat
|
|
405
|
+
let sampleRate = format.sampleRate
|
|
406
|
+
let channels = Int(format.channelCount)
|
|
407
|
+
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
408
|
+
|
|
409
|
+
// Calculate frame positions
|
|
410
|
+
let startFrame: AVAudioFramePosition
|
|
411
|
+
let endFrame: AVAudioFramePosition
|
|
412
|
+
|
|
413
|
+
if hasTimeRange {
|
|
414
|
+
startFrame = AVAudioFramePosition(startTimeMs! * sampleRate / 1000.0)
|
|
415
|
+
endFrame = AVAudioFramePosition(endTimeMs! * sampleRate / 1000.0)
|
|
416
|
+
} else {
|
|
417
|
+
// Convert byte position to frame position
|
|
418
|
+
let bytesPerFrame = Int64(channels * (bitDepth / 8))
|
|
419
|
+
startFrame = AVAudioFramePosition(position!) / bytesPerFrame
|
|
420
|
+
endFrame = startFrame + (AVAudioFramePosition(length!) / bytesPerFrame)
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Validate frame range
|
|
424
|
+
guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
|
|
425
|
+
promise.reject("INVALID_RANGE", "Invalid range specified")
|
|
426
|
+
return
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
let frameCount = AVAudioFrameCount(endFrame - startFrame)
|
|
430
|
+
|
|
431
|
+
// Create decoding config that includes normalization preference
|
|
432
|
+
var decodingOptions = options["decodingOptions"] as? [String: Any] ?? [:]
|
|
433
|
+
let includeNormalizedData = options["includeNormalizedData"] as? Bool ?? false
|
|
434
|
+
|
|
435
|
+
// Pass both options separately - normalizeAudio from decodingOptions, and includeNormalizedData as is
|
|
436
|
+
let decodingConfig = DecodingConfig.fromDictionary(decodingOptions)
|
|
437
|
+
|
|
438
|
+
let (pcmData, normalizedData, base64Data) = try extractRawAudioData(
|
|
439
|
+
from: url,
|
|
440
|
+
startFrame: startFrame,
|
|
441
|
+
frameCount: frameCount,
|
|
442
|
+
format: format,
|
|
443
|
+
decodingConfig: decodingConfig,
|
|
444
|
+
includeNormalizedData: includeNormalizedData,
|
|
445
|
+
includeBase64Data: options["includeBase64Data"] as? Bool ?? false
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
var resultDict: [String: Any] = [:]
|
|
449
|
+
|
|
450
|
+
if includeWavHeader {
|
|
451
|
+
// Create WAV header and prepend it to the PCM data
|
|
452
|
+
let wavData = createWavHeader(
|
|
453
|
+
pcmData: pcmData,
|
|
454
|
+
sampleRate: Int(sampleRate),
|
|
455
|
+
channels: channels,
|
|
456
|
+
bitDepth: bitDepth
|
|
457
|
+
)
|
|
458
|
+
resultDict["pcmData"] = wavData
|
|
459
|
+
resultDict["hasWavHeader"] = true
|
|
460
|
+
} else {
|
|
461
|
+
resultDict["pcmData"] = pcmData
|
|
462
|
+
resultDict["hasWavHeader"] = false
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Add the rest of the data
|
|
466
|
+
resultDict["sampleRate"] = Int(sampleRate)
|
|
467
|
+
resultDict["channels"] = channels
|
|
468
|
+
resultDict["bitDepth"] = bitDepth
|
|
469
|
+
resultDict["durationMs"] = Int(Double(frameCount) * 1000.0 / sampleRate)
|
|
470
|
+
resultDict["format"] = "pcm_\(bitDepth)bit"
|
|
471
|
+
resultDict["samples"] = Int(frameCount) * channels
|
|
472
|
+
|
|
473
|
+
// Add normalized data if requested, regardless of normalization setting
|
|
474
|
+
if includeNormalizedData {
|
|
475
|
+
resultDict["normalizedData"] = normalizedData
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Add checksum if requested
|
|
479
|
+
if options["computeChecksum"] as? Bool == true {
|
|
480
|
+
let checksum = calculateCRC32(data: pcmData)
|
|
481
|
+
resultDict["checksum"] = Int(checksum)
|
|
482
|
+
|
|
483
|
+
Logger.debug("Computed CRC32 checksum: \(checksum)")
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
if let includeBase64Data = options["includeBase64Data"] as? Bool, includeBase64Data {
|
|
487
|
+
resultDict["base64Data"] = base64Data
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
promise.resolve(resultDict)
|
|
491
|
+
|
|
492
|
+
} catch {
|
|
493
|
+
promise.reject("PROCESSING_ERROR", "Failed to process audio file: \(error.localizedDescription)")
|
|
494
|
+
}
|
|
495
|
+
}
|
|
417
496
|
}
|
|
418
497
|
|
|
419
498
|
func audioStreamManager(_ manager: AudioStreamManager, didReceiveInterruption info: [String: Any]) {
|
|
@@ -582,7 +661,12 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
582
661
|
"spectralBandwidth": options["spectralBandwidth"] as? Bool ?? false,
|
|
583
662
|
"chromagram": options["chromagram"] as? Bool ?? false,
|
|
584
663
|
"tempo": options["tempo"] as? Bool ?? false,
|
|
585
|
-
"hnr": options["hnr"] as? Bool ?? false
|
|
664
|
+
"hnr": options["hnr"] as? Bool ?? false,
|
|
665
|
+
"melSpectrogram": options["melSpectrogram"] as? Bool ?? false,
|
|
666
|
+
"spectralContrast": options["spectralContrast"] as? Bool ?? false,
|
|
667
|
+
"tonnetz": options["tonnetz"] as? Bool ?? false,
|
|
668
|
+
"pitch": options["pitch"] as? Bool ?? false,
|
|
669
|
+
"crc32": options["crc32"] as? Bool ?? false
|
|
586
670
|
]
|
|
587
671
|
}
|
|
588
672
|
|
package/ios/FFT.swift
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
//
|
|
2
|
+
// FFT.swift
|
|
3
|
+
// Pods
|
|
4
|
+
//
|
|
5
|
+
// Created by Arthur Breton on 20/2/2025.
|
|
6
|
+
//
|
|
7
|
+
|
|
8
|
+
import Accelerate
|
|
9
|
+
|
|
10
|
+
class FFT {
|
|
11
|
+
private let length: Int
|
|
12
|
+
private var setup: vDSP_DFT_Setup?
|
|
13
|
+
|
|
14
|
+
init(_ length: Int) {
|
|
15
|
+
self.length = length
|
|
16
|
+
self.setup = vDSP_DFT_zop_CreateSetup(
|
|
17
|
+
nil,
|
|
18
|
+
vDSP_Length(length),
|
|
19
|
+
vDSP_DFT_Direction.FORWARD
|
|
20
|
+
)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
deinit {
|
|
24
|
+
if let setup = setup {
|
|
25
|
+
vDSP_DFT_DestroySetup(setup)
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
func realForward(_ data: inout [Float]) {
|
|
30
|
+
var realIn = data
|
|
31
|
+
var imagIn = [Float](repeating: 0.0, count: length)
|
|
32
|
+
var realOut = [Float](repeating: 0.0, count: length)
|
|
33
|
+
var imagOut = [Float](repeating: 0.0, count: length)
|
|
34
|
+
|
|
35
|
+
// Perform FFT
|
|
36
|
+
vDSP_DFT_Execute(setup!,
|
|
37
|
+
&realIn,
|
|
38
|
+
&imagIn,
|
|
39
|
+
&realOut,
|
|
40
|
+
&imagOut)
|
|
41
|
+
|
|
42
|
+
// Ensure data array has enough space for both real and imaginary parts
|
|
43
|
+
if data.count < 2 * length {
|
|
44
|
+
data.append(contentsOf: [Float](repeating: 0.0, count: 2 * length - data.count))
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Combine real and imaginary parts
|
|
48
|
+
for i in 0..<length {
|
|
49
|
+
let j = i * 2
|
|
50
|
+
data[j] = realOut[i]
|
|
51
|
+
data[j + 1] = imagOut[i]
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
func processSegment(_ segment: [Float]) -> [Float] {
|
|
56
|
+
var fftData = segment.count < length ?
|
|
57
|
+
segment + [Float](repeating: 0, count: length - segment.count) :
|
|
58
|
+
Array(segment.prefix(length))
|
|
59
|
+
realForward(&fftData)
|
|
60
|
+
return fftData
|
|
61
|
+
}
|
|
62
|
+
}
|
package/ios/Features.swift
CHANGED
|
@@ -21,6 +21,11 @@ public struct Features {
|
|
|
21
21
|
var chromagram: [Float]?
|
|
22
22
|
var tempo: Float?
|
|
23
23
|
var hnr: Float?
|
|
24
|
+
var melSpectrogram: [Float]?
|
|
25
|
+
var spectralContrast: [Float]?
|
|
26
|
+
var tonnetz: [Float]?
|
|
27
|
+
var pitch: Float?
|
|
28
|
+
var crc32: UInt32?
|
|
24
29
|
|
|
25
30
|
init(
|
|
26
31
|
energy: Float = 0,
|
|
@@ -35,7 +40,12 @@ public struct Features {
|
|
|
35
40
|
spectralBandwidth: Float? = nil,
|
|
36
41
|
chromagram: [Float]? = nil,
|
|
37
42
|
tempo: Float? = nil,
|
|
38
|
-
hnr: Float? = nil
|
|
43
|
+
hnr: Float? = nil,
|
|
44
|
+
melSpectrogram: [Float]? = nil,
|
|
45
|
+
spectralContrast: [Float]? = nil,
|
|
46
|
+
tonnetz: [Float]? = nil,
|
|
47
|
+
pitch: Float? = nil,
|
|
48
|
+
crc32: UInt32? = nil
|
|
39
49
|
) {
|
|
40
50
|
self.energy = energy
|
|
41
51
|
self.mfcc = mfcc
|
|
@@ -50,12 +60,17 @@ public struct Features {
|
|
|
50
60
|
self.chromagram = chromagram
|
|
51
61
|
self.tempo = tempo
|
|
52
62
|
self.hnr = hnr
|
|
63
|
+
self.melSpectrogram = melSpectrogram
|
|
64
|
+
self.spectralContrast = spectralContrast
|
|
65
|
+
self.tonnetz = tonnetz
|
|
66
|
+
self.pitch = pitch
|
|
67
|
+
self.crc32 = crc32
|
|
53
68
|
}
|
|
54
69
|
}
|
|
55
70
|
|
|
56
71
|
extension Features {
|
|
57
72
|
func toDictionary() -> [String: Any] {
|
|
58
|
-
|
|
73
|
+
var dict: [String: Any] = [
|
|
59
74
|
"energy": energy,
|
|
60
75
|
"mfcc": mfcc,
|
|
61
76
|
"rms": rms,
|
|
@@ -68,7 +83,13 @@ extension Features {
|
|
|
68
83
|
"spectralBandwidth": spectralBandwidth ?? 0,
|
|
69
84
|
"chromagram": chromagram ?? [],
|
|
70
85
|
"tempo": tempo ?? 0,
|
|
71
|
-
"hnr": hnr ?? 0
|
|
86
|
+
"hnr": hnr ?? 0,
|
|
87
|
+
"melSpectrogram": melSpectrogram ?? [],
|
|
88
|
+
"spectralContrast": spectralContrast ?? [],
|
|
89
|
+
"tonnetz": tonnetz ?? [],
|
|
90
|
+
"pitch": pitch ?? 0,
|
|
91
|
+
"crc32": crc32 ?? 0
|
|
72
92
|
]
|
|
93
|
+
return dict
|
|
73
94
|
}
|
|
74
95
|
}
|
|
@@ -88,9 +88,7 @@ struct RecordingSettings {
|
|
|
88
88
|
var showNotification: Bool = false
|
|
89
89
|
var enableProcessing: Bool = false
|
|
90
90
|
|
|
91
|
-
//
|
|
92
|
-
var pointsPerSecond: Int? = 1000
|
|
93
|
-
var algorithm: String? = "rms"
|
|
91
|
+
// Remove pointsPerSecond and algorithm
|
|
94
92
|
var featureOptions: [String: Bool]? = ["rms": true, "zcr": true]
|
|
95
93
|
|
|
96
94
|
// iOS-specific configuration
|
|
@@ -105,10 +103,12 @@ struct RecordingSettings {
|
|
|
105
103
|
|
|
106
104
|
let autoResumeAfterInterruption: Bool
|
|
107
105
|
|
|
108
|
-
// Make these optional with nil default values
|
|
109
106
|
var outputDirectory: String? = nil
|
|
110
107
|
var filename: String? = nil
|
|
111
108
|
|
|
109
|
+
// Update default to 100ms
|
|
110
|
+
var segmentDurationMs: Int = 100 // Default 100ms segments
|
|
111
|
+
|
|
112
112
|
static func fromDictionary(_ dict: [String: Any]) -> Result<RecordingSettings, Error> {
|
|
113
113
|
// Extract compression settings
|
|
114
114
|
let compression = dict["compression"] as? [String: Any]
|
|
@@ -148,11 +148,11 @@ struct RecordingSettings {
|
|
|
148
148
|
settings.showNotification = dict["showNotification"] as? Bool ?? false
|
|
149
149
|
settings.enableProcessing = dict["enableProcessing"] as? Bool ?? false
|
|
150
150
|
|
|
151
|
-
// Parse analysis settings
|
|
152
|
-
settings.pointsPerSecond = dict["pointsPerSecond"] as? Int
|
|
153
|
-
settings.algorithm = dict["algorithm"] as? String
|
|
154
151
|
settings.featureOptions = dict["features"] as? [String: Bool]
|
|
155
152
|
|
|
153
|
+
// Update segmentDurationMs parsing
|
|
154
|
+
settings.segmentDurationMs = dict["segmentDurationMs"] as? Int ?? 100
|
|
155
|
+
|
|
156
156
|
// Parse iOS-specific config
|
|
157
157
|
if let iosDict = dict["ios"] as? [String: Any],
|
|
158
158
|
let audioSessionDict = iosDict["audioSession"] as? [String: Any] {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@siteed/expo-audio-stream",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "stream audio crossplatform",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"main": "build/index.js",
|
|
@@ -105,6 +105,7 @@
|
|
|
105
105
|
},
|
|
106
106
|
"dependencies": {
|
|
107
107
|
"@siteed/design-system": "^0.35.1",
|
|
108
|
+
"crc-32": "^1.2.2",
|
|
108
109
|
"expo-modules-core": "~2.1.4"
|
|
109
110
|
}
|
|
110
111
|
}
|