@siteed/expo-audio-stream 2.0.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -27
- package/build/index.d.ts +11 -12
- package/build/index.js +44 -10
- package/package.json +49 -110
- package/src/index.ts +18 -33
- package/CHANGELOG.md +0 -195
- package/android/build.gradle +0 -105
- package/android/src/main/AndroidManifest.xml +0 -27
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -166
- package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
- package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -131
- package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -103
- package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -435
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -1936
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -1437
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -138
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -20
- package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -509
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
- package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
- package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -70
- package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -59
- package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -205
- package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
- package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
- package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
- package/android/src/main/res/drawable/ic_microphone.xml +0 -13
- package/android/src/main/res/drawable/ic_pause.xml +0 -10
- package/android/src/main/res/drawable/ic_play.xml +0 -10
- package/android/src/main/res/drawable/ic_stop.xml +0 -10
- package/android/src/main/res/layout/notification_recording.xml +0 -37
- package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +0 -56
- package/app.plugin.js +0 -1
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -144
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -78
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +0 -229
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
- package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
- package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
- package/build/AudioAnalysis/extractWaveform.js +0 -11
- package/build/AudioAnalysis/extractWaveform.js.map +0 -1
- package/build/AudioRecorder.provider.d.ts +0 -11
- package/build/AudioRecorder.provider.d.ts.map +0 -1
- package/build/AudioRecorder.provider.js +0 -37
- package/build/AudioRecorder.provider.js.map +0 -1
- package/build/ExpoAudioStream.native.d.ts +0 -3
- package/build/ExpoAudioStream.native.d.ts.map +0 -1
- package/build/ExpoAudioStream.native.js +0 -6
- package/build/ExpoAudioStream.native.js.map +0 -1
- package/build/ExpoAudioStream.types.d.ts +0 -206
- package/build/ExpoAudioStream.types.d.ts.map +0 -1
- package/build/ExpoAudioStream.types.js +0 -2
- package/build/ExpoAudioStream.types.js.map +0 -1
- package/build/ExpoAudioStream.web.d.ts +0 -59
- package/build/ExpoAudioStream.web.d.ts.map +0 -1
- package/build/ExpoAudioStream.web.js +0 -285
- package/build/ExpoAudioStream.web.js.map +0 -1
- package/build/ExpoAudioStreamModule.d.ts +0 -3
- package/build/ExpoAudioStreamModule.d.ts.map +0 -1
- package/build/ExpoAudioStreamModule.js +0 -239
- package/build/ExpoAudioStreamModule.js.map +0 -1
- package/build/WebRecorder.web.d.ts +0 -119
- package/build/WebRecorder.web.d.ts.map +0 -1
- package/build/WebRecorder.web.js +0 -436
- package/build/WebRecorder.web.js.map +0 -1
- package/build/constants.d.ts +0 -11
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -14
- package/build/constants.js.map +0 -1
- package/build/events.d.ts +0 -26
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -21
- package/build/events.js.map +0 -1
- package/build/index.d.ts.map +0 -1
- package/build/index.js.map +0 -1
- package/build/useAudioRecorder.d.ts +0 -21
- package/build/useAudioRecorder.d.ts.map +0 -1
- package/build/useAudioRecorder.js +0 -427
- package/build/useAudioRecorder.js.map +0 -1
- package/build/utils/BlobFix.d.ts +0 -9
- package/build/utils/BlobFix.d.ts.map +0 -1
- package/build/utils/BlobFix.js +0 -498
- package/build/utils/BlobFix.js.map +0 -1
- package/build/utils/audioProcessing.d.ts +0 -24
- package/build/utils/audioProcessing.d.ts.map +0 -1
- package/build/utils/audioProcessing.js +0 -133
- package/build/utils/audioProcessing.js.map +0 -1
- package/build/utils/concatenateBuffers.d.ts +0 -8
- package/build/utils/concatenateBuffers.d.ts.map +0 -1
- package/build/utils/concatenateBuffers.js +0 -21
- package/build/utils/concatenateBuffers.js.map +0 -1
- package/build/utils/convertPCMToFloat32.d.ts +0 -13
- package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
- package/build/utils/convertPCMToFloat32.js +0 -120
- package/build/utils/convertPCMToFloat32.js.map +0 -1
- package/build/utils/encodingToBitDepth.d.ts +0 -5
- package/build/utils/encodingToBitDepth.d.ts.map +0 -1
- package/build/utils/encodingToBitDepth.js +0 -13
- package/build/utils/encodingToBitDepth.js.map +0 -1
- package/build/utils/getWavFileInfo.d.ts +0 -26
- package/build/utils/getWavFileInfo.d.ts.map +0 -1
- package/build/utils/getWavFileInfo.js +0 -92
- package/build/utils/getWavFileInfo.js.map +0 -1
- package/build/utils/writeWavHeader.d.ts +0 -49
- package/build/utils/writeWavHeader.d.ts.map +0 -1
- package/build/utils/writeWavHeader.js +0 -91
- package/build/utils/writeWavHeader.js.map +0 -1
- package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
- package/build/workers/InlineFeaturesExtractor.web.js +0 -828
- package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
- package/build/workers/inlineAudioWebWorker.web.js +0 -157
- package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
- package/expo-module.config.json +0 -9
- package/ios/AudioAnalysisData.swift +0 -74
- package/ios/AudioNotificationManager.swift +0 -135
- package/ios/AudioProcessingHelpers.swift +0 -743
- package/ios/AudioProcessor.swift +0 -858
- package/ios/AudioStreamError.swift +0 -7
- package/ios/AudioStreamManager.swift +0 -1708
- package/ios/AudioStreamManagerDelegate.swift +0 -16
- package/ios/DataPoint.swift +0 -54
- package/ios/DecodingConfig.swift +0 -47
- package/ios/ExpoAudioStream.podspec +0 -27
- package/ios/ExpoAudioStreamModule.swift +0 -698
- package/ios/FFT.swift +0 -62
- package/ios/Features.swift +0 -95
- package/ios/Logger.swift +0 -7
- package/ios/NotificationExtension.swift +0 -15
- package/ios/RecordingResult.swift +0 -22
- package/ios/RecordingSettings.swift +0 -265
- package/ios/WaveformExtractor.swift +0 -105
- package/plugin/build/index.d.ts +0 -21
- package/plugin/build/index.js +0 -191
- package/plugin/src/index.ts +0 -278
- package/plugin/tsconfig.json +0 -10
- package/plugin/tsconfig.tsbuildinfo +0 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -165
- package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -370
- package/src/AudioAnalysis/extractWaveform.ts +0 -22
- package/src/AudioRecorder.provider.tsx +0 -54
- package/src/ExpoAudioStream.native.ts +0 -6
- package/src/ExpoAudioStream.types.ts +0 -329
- package/src/ExpoAudioStream.web.ts +0 -359
- package/src/ExpoAudioStreamModule.ts +0 -286
- package/src/WebRecorder.web.ts +0 -580
- package/src/constants.ts +0 -18
- package/src/events.ts +0 -60
- package/src/useAudioRecorder.tsx +0 -620
- package/src/utils/BlobFix.ts +0 -559
- package/src/utils/audioProcessing.ts +0 -205
- package/src/utils/concatenateBuffers.ts +0 -24
- package/src/utils/convertPCMToFloat32.ts +0 -170
- package/src/utils/encodingToBitDepth.ts +0 -18
- package/src/utils/getWavFileInfo.ts +0 -132
- package/src/utils/writeWavHeader.ts +0 -114
- package/src/workers/InlineFeaturesExtractor.web.tsx +0 -827
- package/src/workers/inlineAudioWebWorker.web.tsx +0 -156
package/ios/AudioProcessor.swift
DELETED
|
@@ -1,858 +0,0 @@
|
|
|
1
|
-
// packages/expo-audio-stream/ios/AudioProcessor.swift
|
|
2
|
-
|
|
3
|
-
import Foundation
|
|
4
|
-
import Accelerate
|
|
5
|
-
import AVFoundation
|
|
6
|
-
import QuartzCore
|
|
7
|
-
|
|
8
|
-
public struct TrimResult {
|
|
9
|
-
public let uri: String
|
|
10
|
-
public let duration: Double
|
|
11
|
-
public let size: Int64
|
|
12
|
-
|
|
13
|
-
public init(uri: String, duration: Double, size: Int64) {
|
|
14
|
-
self.uri = uri
|
|
15
|
-
self.duration = duration
|
|
16
|
-
self.size = size
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
public class AudioProcessor {
|
|
21
|
-
public private(set) var audioFile: AVAudioFile?
|
|
22
|
-
private var result: (Any) -> Void
|
|
23
|
-
private var reject: (String, String) -> Void
|
|
24
|
-
private var waveformData = Array<Float>()
|
|
25
|
-
private var progress: Float = 0.0
|
|
26
|
-
private var channelCount: Int = 1
|
|
27
|
-
private var currentProgress: Float = 0.0
|
|
28
|
-
private let extractionQueue = DispatchQueue(label: "AudioProcessor", attributes: .concurrent)
|
|
29
|
-
private var _abortExtraction: Bool = false
|
|
30
|
-
|
|
31
|
-
// Add a counter for unique IDs
|
|
32
|
-
private var uniqueIdCounter = 0
|
|
33
|
-
|
|
34
|
-
public var abortExtraction: Bool {
|
|
35
|
-
get { _abortExtraction }
|
|
36
|
-
set { _abortExtraction = newValue }
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Initializer for file-based processing
|
|
40
|
-
public init(url: URL, resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) throws {
|
|
41
|
-
self.audioFile = try AVAudioFile(forReading: url)
|
|
42
|
-
self.result = resolve
|
|
43
|
-
self.reject = reject
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// Initializer for buffer-based processing
|
|
47
|
-
public init(resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) {
|
|
48
|
-
self.result = resolve
|
|
49
|
-
self.reject = reject
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
deinit {
|
|
54
|
-
audioFile = nil
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/// Error types for AudioProcessor
|
|
58
|
-
public enum AudioProcessorError: Error {
|
|
59
|
-
case fileInitializationFailed(String)
|
|
60
|
-
case bufferCreationFailed
|
|
61
|
-
case audioReadError(String)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
/// Extracts and processes audio data from the audio file.
|
|
66
|
-
/// - Parameters:
|
|
67
|
-
/// - numberOfSamples: The number of samples to extract (for waveform).
|
|
68
|
-
/// - offset: The offset to start reading from (in samples).
|
|
69
|
-
/// - length: The length of the audio to read (in samples).
|
|
70
|
-
/// - segmentDurationMs: The duration of each segment in milliseconds.
|
|
71
|
-
/// - featureOptions: The features to extract.
|
|
72
|
-
/// - bitDepth: The bit depth of the audio data.
|
|
73
|
-
/// - numberOfChannels: The number of channels in the audio data.
|
|
74
|
-
/// - position: The position to start reading from (in bytes).
|
|
75
|
-
/// - byteLength: The length of the audio to read (in bytes).
|
|
76
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features.
|
|
77
|
-
public func processAudioData(
|
|
78
|
-
numberOfSamples: Int?,
|
|
79
|
-
offset: Int? = 0,
|
|
80
|
-
length: UInt? = nil,
|
|
81
|
-
segmentDurationMs: Int = 100, // Default 100ms
|
|
82
|
-
featureOptions: [String: Bool],
|
|
83
|
-
bitDepth: Int,
|
|
84
|
-
numberOfChannels: Int,
|
|
85
|
-
position: Int? = nil,
|
|
86
|
-
byteLength: Int? = nil
|
|
87
|
-
) -> AudioAnalysisData? {
|
|
88
|
-
guard let audioFile = audioFile else {
|
|
89
|
-
reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
|
|
90
|
-
return nil
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
let totalFrameCount = AVAudioFrameCount(audioFile.length)
|
|
94
|
-
var framesPerBuffer: AVAudioFrameCount
|
|
95
|
-
let actualPointsPerSecond: Int
|
|
96
|
-
|
|
97
|
-
NSLog("""
|
|
98
|
-
[AudioProcessor] Starting audio processing:
|
|
99
|
-
- totalFrameCount: \(totalFrameCount)
|
|
100
|
-
- bitDepth: \(bitDepth)
|
|
101
|
-
- numberOfChannels: \(numberOfChannels)
|
|
102
|
-
- position: \(position ?? -1)
|
|
103
|
-
- byteLength: \(byteLength ?? -1)
|
|
104
|
-
- offset: \(offset ?? -1)
|
|
105
|
-
- length: \(length ?? 0)
|
|
106
|
-
""")
|
|
107
|
-
|
|
108
|
-
// Use position/byteLength if provided, otherwise fall back to offset/length
|
|
109
|
-
let effectiveOffset: Int64 = if let position = position {
|
|
110
|
-
Int64(position / (bitDepth / 8) / numberOfChannels)
|
|
111
|
-
} else {
|
|
112
|
-
Int64(offset ?? 0)
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
let effectiveLength: Int64 = if let byteLength = byteLength {
|
|
116
|
-
Int64(byteLength / (bitDepth / 8) / numberOfChannels)
|
|
117
|
-
} else if let length = length {
|
|
118
|
-
Int64(length)
|
|
119
|
-
} else {
|
|
120
|
-
Int64(totalFrameCount) - effectiveOffset
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
NSLog("""
|
|
124
|
-
[AudioProcessor] Calculated frame positions:
|
|
125
|
-
- effectiveOffset: \(effectiveOffset)
|
|
126
|
-
- effectiveLength: \(effectiveLength)
|
|
127
|
-
- expectedEndFrame: \(effectiveOffset + effectiveLength)
|
|
128
|
-
- totalFrameCount: \(totalFrameCount)
|
|
129
|
-
""")
|
|
130
|
-
|
|
131
|
-
// Validate frame boundaries
|
|
132
|
-
if effectiveOffset < 0 || effectiveOffset >= Int64(totalFrameCount) {
|
|
133
|
-
NSLog("[AudioProcessor] ERROR: Invalid offset value")
|
|
134
|
-
reject("INVALID_OFFSET", "Offset value (\(effectiveOffset)) is outside valid range [0, \(totalFrameCount)]")
|
|
135
|
-
return nil
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
if effectiveLength <= 0 {
|
|
139
|
-
NSLog("[AudioProcessor] ERROR: Invalid length value")
|
|
140
|
-
reject("INVALID_LENGTH", "Length value (\(effectiveLength)) must be positive")
|
|
141
|
-
return nil
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if effectiveOffset + effectiveLength > Int64(totalFrameCount) {
|
|
145
|
-
NSLog("[AudioProcessor] ERROR: Requested range exceeds file length")
|
|
146
|
-
reject("INVALID_RANGE", "Requested range [\(effectiveOffset), \(effectiveOffset + effectiveLength)] exceeds file length \(totalFrameCount)")
|
|
147
|
-
return nil
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
var startFrame: AVAudioFramePosition = effectiveOffset
|
|
151
|
-
let endFrame: AVAudioFramePosition = effectiveOffset + effectiveLength
|
|
152
|
-
|
|
153
|
-
// Calculate frames per segment based on segment duration
|
|
154
|
-
let framesPerSegment = AVAudioFrameCount(Float(audioFile.fileFormat.sampleRate) * Float(segmentDurationMs) / 1000.0)
|
|
155
|
-
|
|
156
|
-
if let numberOfSamples = numberOfSamples {
|
|
157
|
-
framesPerBuffer = AVAudioFrameCount(max(1, effectiveLength / Int64(numberOfSamples)))
|
|
158
|
-
} else {
|
|
159
|
-
framesPerBuffer = framesPerSegment
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
|
|
163
|
-
reject("BUFFER_CREATION_FAILED", "Failed to create AVAudioPCMBuffer.")
|
|
164
|
-
return nil
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
channelCount = Int(audioFile.processingFormat.channelCount)
|
|
168
|
-
var data = Array(repeating: [Float](repeating: 0, count: Int(framesPerBuffer)), count: channelCount)
|
|
169
|
-
|
|
170
|
-
var channelData = [Float]()
|
|
171
|
-
while startFrame < endFrame {
|
|
172
|
-
let remainingFrames = endFrame - startFrame
|
|
173
|
-
let currentFramesPerBuffer = min(AVAudioFrameCount(framesPerBuffer), AVAudioFrameCount(remainingFrames))
|
|
174
|
-
|
|
175
|
-
if currentFramesPerBuffer <= 0 {
|
|
176
|
-
break
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if abortExtraction {
|
|
180
|
-
audioFile.framePosition = startFrame
|
|
181
|
-
abortExtraction = false
|
|
182
|
-
return nil
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
do {
|
|
186
|
-
audioFile.framePosition = startFrame
|
|
187
|
-
try audioFile.read(into: buffer, frameCount: currentFramesPerBuffer)
|
|
188
|
-
} catch {
|
|
189
|
-
reject("AUDIO_READ_ERROR", "Couldn't read into buffer: \(error.localizedDescription)")
|
|
190
|
-
return nil
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
//TODO: check if we need conversion based on bitDepth here
|
|
194
|
-
guard let floatData = buffer.floatChannelData else {
|
|
195
|
-
reject("BUFFER_DATA_ERROR", "Failed to retrieve float data from buffer.")
|
|
196
|
-
return nil
|
|
197
|
-
}
|
|
198
|
-
for frame in 0..<Int(buffer.frameLength) {
|
|
199
|
-
channelData.append(floatData[0][frame])
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
startFrame += AVAudioFramePosition(currentFramesPerBuffer)
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
NSLog("""
|
|
206
|
-
[AudioProcessor] Audio processing completed:
|
|
207
|
-
- processedFrames: \(endFrame - startFrame)
|
|
208
|
-
- framesPerBuffer: \(framesPerBuffer)
|
|
209
|
-
""")
|
|
210
|
-
|
|
211
|
-
return processChannelData(
|
|
212
|
-
channelData: channelData,
|
|
213
|
-
sampleRate: Float(audioFile.fileFormat.sampleRate),
|
|
214
|
-
segmentDurationMs: segmentDurationMs,
|
|
215
|
-
featureOptions: featureOptions,
|
|
216
|
-
bitDepth: bitDepth,
|
|
217
|
-
numberOfChannels: numberOfChannels
|
|
218
|
-
)
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/// Processes audio data from a buffer.
|
|
222
|
-
/// - Parameters:
|
|
223
|
-
/// - data: The audio data buffer.
|
|
224
|
-
/// - sampleRate: The sample rate of the audio data.
|
|
225
|
-
/// - segmentDurationMs: The duration of each segment in milliseconds.
|
|
226
|
-
/// - featureOptions: The features to extract.
|
|
227
|
-
/// - bitDepth: The bit depth of the audio data.
|
|
228
|
-
/// - numberOfChannels: The number of channels in the audio data.
|
|
229
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features.
|
|
230
|
-
public func processAudioBuffer(
|
|
231
|
-
data: Data,
|
|
232
|
-
sampleRate: Float,
|
|
233
|
-
segmentDurationMs: Int,
|
|
234
|
-
featureOptions: [String: Bool],
|
|
235
|
-
bitDepth: Int,
|
|
236
|
-
numberOfChannels: Int
|
|
237
|
-
) -> AudioAnalysisData? {
|
|
238
|
-
guard !data.isEmpty else {
|
|
239
|
-
Logger.debug("Data is empty, rejecting")
|
|
240
|
-
reject("DATA_EMPTY", "The audio data is empty.")
|
|
241
|
-
return nil
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
// Convert Data to Float array based on bit depth
|
|
245
|
-
let floatData: [Float]
|
|
246
|
-
switch bitDepth {
|
|
247
|
-
case 16:
|
|
248
|
-
floatData = data.withUnsafeBytes { bufferPointer in
|
|
249
|
-
let int16Pointer = bufferPointer.bindMemory(to: Int16.self)
|
|
250
|
-
return int16Pointer.map { Float($0) / Float(Int16.max) }
|
|
251
|
-
}
|
|
252
|
-
case 32:
|
|
253
|
-
floatData = data.withUnsafeBytes { bufferPointer in
|
|
254
|
-
let int32Pointer = bufferPointer.bindMemory(to: Int32.self)
|
|
255
|
-
return int32Pointer.map { Float($0) / Float(Int32.max) }
|
|
256
|
-
}
|
|
257
|
-
default:
|
|
258
|
-
Logger.debug("Unsupported bit depth. Rejecting")
|
|
259
|
-
reject("UNSUPPORTED_BIT_DEPTH", "Unsupported bit depth: \(bitDepth)")
|
|
260
|
-
return nil
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
return processChannelData(
|
|
264
|
-
channelData: floatData,
|
|
265
|
-
sampleRate: sampleRate,
|
|
266
|
-
segmentDurationMs: segmentDurationMs,
|
|
267
|
-
featureOptions: featureOptions,
|
|
268
|
-
bitDepth: bitDepth,
|
|
269
|
-
numberOfChannels: numberOfChannels
|
|
270
|
-
)
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
/// Processes the given audio channel data to extract features.
|
|
274
|
-
/// - Parameters:
|
|
275
|
-
/// - channelData: The audio channel data to process.
|
|
276
|
-
/// - sampleRate: The sample rate of the audio data.
|
|
277
|
-
/// - segmentDurationMs: The duration of each segment in milliseconds.
|
|
278
|
-
/// - featureOptions: The features to extract.
|
|
279
|
-
/// - bitDepth: The bit depth of the audio data.
|
|
280
|
-
/// - numberOfChannels: The number of channels in the audio data.
|
|
281
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features.
|
|
282
|
-
private func processChannelData(
|
|
283
|
-
channelData: [Float],
|
|
284
|
-
sampleRate: Float,
|
|
285
|
-
segmentDurationMs: Int,
|
|
286
|
-
featureOptions: [String: Bool],
|
|
287
|
-
bitDepth: Int,
|
|
288
|
-
numberOfChannels: Int
|
|
289
|
-
) -> AudioAnalysisData? {
|
|
290
|
-
Logger.debug("Processing audio data with sample rate: \(sampleRate), segmentDurationMs: \(segmentDurationMs), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
|
|
291
|
-
|
|
292
|
-
let startTime = CACurrentMediaTime()
|
|
293
|
-
|
|
294
|
-
let length = channelData.count
|
|
295
|
-
// Calculate points per segment based on segment duration
|
|
296
|
-
let samplesPerSegment = Int(Float(segmentDurationMs) * sampleRate / 1000.0)
|
|
297
|
-
var dataPoints = [DataPoint]()
|
|
298
|
-
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
299
|
-
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
300
|
-
|
|
301
|
-
// Calculate bytes per sample
|
|
302
|
-
let bytesPerSample = bitDepth / 8
|
|
303
|
-
|
|
304
|
-
// Process data in segments
|
|
305
|
-
var i = 0
|
|
306
|
-
while i < length {
|
|
307
|
-
let segmentEnd = min(i + samplesPerSegment, length)
|
|
308
|
-
let segment = Array(channelData[i..<segmentEnd])
|
|
309
|
-
|
|
310
|
-
// Calculate byte positions and timing
|
|
311
|
-
let startPosition = i * bytesPerSample * numberOfChannels
|
|
312
|
-
let endPosition = segmentEnd * bytesPerSample * numberOfChannels
|
|
313
|
-
let startTime = Float(i) / sampleRate
|
|
314
|
-
let endTime = Float(segmentEnd) / sampleRate
|
|
315
|
-
|
|
316
|
-
// Process segment and create data point
|
|
317
|
-
let dataPoint = processSegment(
|
|
318
|
-
segment,
|
|
319
|
-
sampleRate: sampleRate,
|
|
320
|
-
featureOptions: featureOptions,
|
|
321
|
-
startTime: startTime,
|
|
322
|
-
endTime: endTime,
|
|
323
|
-
startPosition: startPosition,
|
|
324
|
-
endPosition: endPosition
|
|
325
|
-
)
|
|
326
|
-
dataPoints.append(dataPoint)
|
|
327
|
-
|
|
328
|
-
// Update min/max amplitudes
|
|
329
|
-
minAmplitude = min(minAmplitude, segment.min() ?? minAmplitude)
|
|
330
|
-
maxAmplitude = max(maxAmplitude, segment.max() ?? maxAmplitude)
|
|
331
|
-
|
|
332
|
-
i += samplesPerSegment
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
let endTime = CACurrentMediaTime()
|
|
336
|
-
let processingTimeMs = Float((endTime - startTime) * 1000)
|
|
337
|
-
|
|
338
|
-
Logger.debug("Processed \(dataPoints.count) data points in \(processingTimeMs) ms")
|
|
339
|
-
|
|
340
|
-
return AudioAnalysisData(
|
|
341
|
-
segmentDurationMs: segmentDurationMs,
|
|
342
|
-
durationMs: Int(Float(length) / sampleRate * 1000),
|
|
343
|
-
bitDepth: bitDepth,
|
|
344
|
-
numberOfChannels: numberOfChannels,
|
|
345
|
-
sampleRate: Int(sampleRate),
|
|
346
|
-
samples: length,
|
|
347
|
-
dataPoints: dataPoints,
|
|
348
|
-
amplitudeRange: AudioAnalysisData.AmplitudeRange(
|
|
349
|
-
min: minAmplitude,
|
|
350
|
-
max: maxAmplitude
|
|
351
|
-
),
|
|
352
|
-
rmsRange: AudioAnalysisData.AmplitudeRange(
|
|
353
|
-
min: 0,
|
|
354
|
-
max: 1
|
|
355
|
-
),
|
|
356
|
-
speechAnalysis: nil,
|
|
357
|
-
extractionTimeMs: processingTimeMs
|
|
358
|
-
)
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
private func processSegment(
|
|
362
|
-
_ segment: [Float],
|
|
363
|
-
sampleRate: Float,
|
|
364
|
-
featureOptions: [String: Bool],
|
|
365
|
-
startTime: Float,
|
|
366
|
-
endTime: Float,
|
|
367
|
-
startPosition: Int,
|
|
368
|
-
endPosition: Int
|
|
369
|
-
) -> DataPoint {
|
|
370
|
-
let sumSquares: Float = segment.reduce(0) { $0 + $1 * $1 }
|
|
371
|
-
let rms = sqrt(sumSquares / Float(segment.count))
|
|
372
|
-
let silent = rms < 0.01
|
|
373
|
-
let dB = Float(20 * log10(Double(rms)))
|
|
374
|
-
|
|
375
|
-
let features = computeFeatures(
|
|
376
|
-
segmentData: segment,
|
|
377
|
-
sampleRate: sampleRate,
|
|
378
|
-
sumSquares: sumSquares,
|
|
379
|
-
zeroCrossings: 0,
|
|
380
|
-
segmentLength: segment.count,
|
|
381
|
-
featureOptions: featureOptions
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
let dataPoint = DataPoint(
|
|
386
|
-
id: Int(uniqueIdCounter),
|
|
387
|
-
amplitude: segment.max() ?? 0,
|
|
388
|
-
rms: rms,
|
|
389
|
-
dB: dB,
|
|
390
|
-
silent: silent,
|
|
391
|
-
features: features,
|
|
392
|
-
speech: SpeechFeatures(isActive: !silent),
|
|
393
|
-
startTime: startTime,
|
|
394
|
-
endTime: endTime,
|
|
395
|
-
startPosition: startPosition,
|
|
396
|
-
endPosition: endPosition,
|
|
397
|
-
samples: segment.count
|
|
398
|
-
)
|
|
399
|
-
uniqueIdCounter += 1
|
|
400
|
-
return dataPoint
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
private func computeFeatures(
|
|
404
|
-
segmentData: [Float],
|
|
405
|
-
sampleRate: Float,
|
|
406
|
-
sumSquares: Float,
|
|
407
|
-
zeroCrossings: Int,
|
|
408
|
-
segmentLength: Int,
|
|
409
|
-
featureOptions: [String: Bool]
|
|
410
|
-
) -> Features {
|
|
411
|
-
let rms = sqrt(sumSquares / Float(segmentLength))
|
|
412
|
-
let energy = featureOptions["energy"] == true ? sumSquares : 0
|
|
413
|
-
let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
|
|
414
|
-
let mfcc = featureOptions["mfcc"] == true ? extractMFCC(from: segmentData, sampleRate: sampleRate) : []
|
|
415
|
-
let spectralCentroid = featureOptions["spectralCentroid"] == true ? extractSpectralCentroid(from: segmentData, sampleRate: sampleRate) : 0
|
|
416
|
-
let spectralFlatness = featureOptions["spectralFlatness"] == true ? extractSpectralFlatness(from: segmentData) : 0
|
|
417
|
-
let spectralRollOff = featureOptions["spectralRollOff"] == true ? extractSpectralRollOff(from: segmentData, sampleRate: sampleRate) : 0
|
|
418
|
-
let spectralBandwidth = featureOptions["spectralBandwidth"] == true ? extractSpectralBandwidth(from: segmentData, sampleRate: sampleRate) : 0
|
|
419
|
-
let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
|
|
420
|
-
let tempo = featureOptions["tempo"] == true ? extractTempo(from: segmentData, sampleRate: sampleRate) : 0
|
|
421
|
-
let hnr = featureOptions["hnr"] == true ? extractHNR(from: segmentData) : 0
|
|
422
|
-
let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
|
|
423
|
-
let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
|
|
424
|
-
let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
|
|
425
|
-
let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : 0
|
|
426
|
-
|
|
427
|
-
// Calculate min and max amplitudes from the segment data
|
|
428
|
-
let minAmplitude = segmentData.map(abs).min() ?? 0
|
|
429
|
-
let maxAmplitude = segmentData.map(abs).max() ?? 0
|
|
430
|
-
|
|
431
|
-
let crc32Value = featureOptions["crc32"] == true ?
|
|
432
|
-
calculateCRC32(from: segmentData, count: segmentData.count) : nil
|
|
433
|
-
|
|
434
|
-
return Features(
|
|
435
|
-
energy: energy,
|
|
436
|
-
mfcc: mfcc,
|
|
437
|
-
rms: rms,
|
|
438
|
-
minAmplitude: minAmplitude,
|
|
439
|
-
maxAmplitude: maxAmplitude,
|
|
440
|
-
zcr: zcr,
|
|
441
|
-
spectralCentroid: spectralCentroid,
|
|
442
|
-
spectralFlatness: spectralFlatness,
|
|
443
|
-
spectralRollOff: spectralRollOff,
|
|
444
|
-
spectralBandwidth: spectralBandwidth,
|
|
445
|
-
chromagram: chromagram,
|
|
446
|
-
tempo: tempo,
|
|
447
|
-
hnr: hnr,
|
|
448
|
-
melSpectrogram: melSpectrogram,
|
|
449
|
-
spectralContrast: spectralContrast,
|
|
450
|
-
tonnetz: tonnetz,
|
|
451
|
-
pitch: pitch,
|
|
452
|
-
crc32: crc32Value
|
|
453
|
-
)
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
/// Processes audio data with time range support
|
|
457
|
-
public func processAudioData(
|
|
458
|
-
startTimeMs: Double? = nil,
|
|
459
|
-
endTimeMs: Double? = nil,
|
|
460
|
-
segmentDurationMs: Int = 100, // Default 100ms
|
|
461
|
-
featureOptions: [String: Bool]
|
|
462
|
-
) -> AudioAnalysisData? {
|
|
463
|
-
guard let audioFile = audioFile else {
|
|
464
|
-
Logger.debug("No audio file loaded")
|
|
465
|
-
return nil
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
let startTime = CACurrentMediaTime()
|
|
469
|
-
let sampleRate = Float(audioFile.fileFormat.sampleRate)
|
|
470
|
-
let totalFrameCount = AVAudioFrameCount(audioFile.length)
|
|
471
|
-
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
472
|
-
let numberOfChannels = Int(audioFile.fileFormat.channelCount)
|
|
473
|
-
|
|
474
|
-
// Convert time to frames
|
|
475
|
-
let startFrame = startTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? 0
|
|
476
|
-
let endFrame = endTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? audioFile.length
|
|
477
|
-
|
|
478
|
-
// Validate frame range
|
|
479
|
-
guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
|
|
480
|
-
Logger.debug("Invalid time range")
|
|
481
|
-
return nil
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
// Calculate frames per buffer based on segment duration
|
|
485
|
-
let framesPerBuffer = AVAudioFrameCount(Float(sampleRate) * Float(segmentDurationMs) / 1000.0)
|
|
486
|
-
|
|
487
|
-
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
|
|
488
|
-
Logger.debug("Failed to create buffer")
|
|
489
|
-
return nil
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
var dataPoints: [DataPoint] = []
|
|
493
|
-
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
494
|
-
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
495
|
-
var currentId = 0
|
|
496
|
-
|
|
497
|
-
audioFile.framePosition = startFrame
|
|
498
|
-
var currentFrame = startFrame
|
|
499
|
-
|
|
500
|
-
while currentFrame < endFrame {
|
|
501
|
-
let framesToRead = min(framesPerBuffer, AVAudioFrameCount(endFrame - currentFrame))
|
|
502
|
-
|
|
503
|
-
do {
|
|
504
|
-
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
505
|
-
|
|
506
|
-
guard let channelData = buffer.floatChannelData else {
|
|
507
|
-
continue
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
// Process each channel's data
|
|
511
|
-
var summedData = [Float](repeating: 0, count: Int(framesToRead))
|
|
512
|
-
for channel in 0..<numberOfChannels {
|
|
513
|
-
let channelBuffer = UnsafeBufferPointer(start: channelData[channel], count: Int(framesToRead))
|
|
514
|
-
for (index, sample) in channelBuffer.enumerated() {
|
|
515
|
-
summedData[index] += sample
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
// Average across channels
|
|
520
|
-
for i in 0..<summedData.count {
|
|
521
|
-
summedData[i] /= Float(numberOfChannels)
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
// Calculate both peak amplitude and RMS
|
|
525
|
-
var localMax: Float = 0
|
|
526
|
-
var rms: Float = 0
|
|
527
|
-
vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
|
|
528
|
-
|
|
529
|
-
// Calculate RMS using vDSP
|
|
530
|
-
var meanSquare: Float = 0
|
|
531
|
-
vDSP_measqv(summedData, 1, &meanSquare, vDSP_Length(framesToRead))
|
|
532
|
-
rms = sqrt(meanSquare)
|
|
533
|
-
|
|
534
|
-
minAmplitude = min(minAmplitude, localMax)
|
|
535
|
-
maxAmplitude = max(maxAmplitude, localMax)
|
|
536
|
-
|
|
537
|
-
// Create data point
|
|
538
|
-
let startTime = Float(currentFrame) / Float(sampleRate)
|
|
539
|
-
let endTime = Float(currentFrame + Int64(framesToRead)) / Float(sampleRate)
|
|
540
|
-
|
|
541
|
-
let dataPoint = DataPoint(
|
|
542
|
-
id: currentId,
|
|
543
|
-
amplitude: localMax, // Always use peak amplitude
|
|
544
|
-
rms: rms, // Use calculated RMS value
|
|
545
|
-
dB: Float(20 * log10(Double(rms))), // Use RMS for dB calculation
|
|
546
|
-
silent: rms < 0.01, // Use RMS for silence detection
|
|
547
|
-
features: computeFeatures(
|
|
548
|
-
segmentData: Array(UnsafeBufferPointer(start: summedData, count: Int(framesToRead))),
|
|
549
|
-
sampleRate: sampleRate,
|
|
550
|
-
sumSquares: rms * rms,
|
|
551
|
-
zeroCrossings: 0,
|
|
552
|
-
segmentLength: Int(framesToRead),
|
|
553
|
-
featureOptions: featureOptions
|
|
554
|
-
),
|
|
555
|
-
speech: SpeechFeatures(isActive: rms >= 0.01),
|
|
556
|
-
startTime: startTime,
|
|
557
|
-
endTime: endTime,
|
|
558
|
-
startPosition: Int(currentFrame),
|
|
559
|
-
endPosition: Int(currentFrame + Int64(framesToRead)),
|
|
560
|
-
samples: Int(framesToRead)
|
|
561
|
-
)
|
|
562
|
-
|
|
563
|
-
dataPoints.append(dataPoint)
|
|
564
|
-
currentId += 1
|
|
565
|
-
} catch {
|
|
566
|
-
Logger.debug("Error reading audio data: \(error)")
|
|
567
|
-
return nil
|
|
568
|
-
}
|
|
569
|
-
|
|
570
|
-
currentFrame += Int64(framesToRead)
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
let endTime = CACurrentMediaTime()
|
|
574
|
-
let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
|
|
575
|
-
|
|
576
|
-
return AudioAnalysisData(
|
|
577
|
-
segmentDurationMs: segmentDurationMs,
|
|
578
|
-
durationMs: Int(Float(endFrame - startFrame) * 1000 / sampleRate),
|
|
579
|
-
bitDepth: bitDepth,
|
|
580
|
-
numberOfChannels: numberOfChannels,
|
|
581
|
-
sampleRate: Int(sampleRate),
|
|
582
|
-
samples: Int(endFrame - startFrame),
|
|
583
|
-
dataPoints: dataPoints,
|
|
584
|
-
amplitudeRange: AudioAnalysisData.AmplitudeRange(
|
|
585
|
-
min: minAmplitude,
|
|
586
|
-
max: maxAmplitude
|
|
587
|
-
),
|
|
588
|
-
rmsRange: AudioAnalysisData.AmplitudeRange(
|
|
589
|
-
min: 0,
|
|
590
|
-
max: 1
|
|
591
|
-
),
|
|
592
|
-
speechAnalysis: nil,
|
|
593
|
-
extractionTimeMs: extractionTime
|
|
594
|
-
)
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
/// Trims audio file to specified range
|
|
598
|
-
public func trimAudio(
|
|
599
|
-
startTimeMs: Double,
|
|
600
|
-
endTimeMs: Double,
|
|
601
|
-
outputFormat: [String: Any]?
|
|
602
|
-
) -> TrimResult? {
|
|
603
|
-
guard let currentAudioFile = audioFile else {
|
|
604
|
-
Logger.debug("No audio file loaded")
|
|
605
|
-
return nil
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
let sampleRate = currentAudioFile.fileFormat.sampleRate
|
|
609
|
-
let startFrame = AVAudioFramePosition(startTimeMs * sampleRate / 1000.0)
|
|
610
|
-
let endFrame = AVAudioFramePosition(endTimeMs * sampleRate / 1000.0)
|
|
611
|
-
|
|
612
|
-
// Create output format
|
|
613
|
-
let outputSettings = createOutputSettings(from: outputFormat, originalFormat: currentAudioFile.fileFormat)
|
|
614
|
-
|
|
615
|
-
// Create temporary output file
|
|
616
|
-
let outputURL = FileManager.default.temporaryDirectory
|
|
617
|
-
.appendingPathComponent(UUID().uuidString)
|
|
618
|
-
.appendingPathExtension("wav")
|
|
619
|
-
|
|
620
|
-
do {
|
|
621
|
-
let outputFile = try AVAudioFile(
|
|
622
|
-
forWriting: outputURL,
|
|
623
|
-
settings: outputSettings,
|
|
624
|
-
commonFormat: .pcmFormatFloat32,
|
|
625
|
-
interleaved: false
|
|
626
|
-
)
|
|
627
|
-
|
|
628
|
-
// Read and write in chunks
|
|
629
|
-
let bufferSize = 32768
|
|
630
|
-
let buffer = AVAudioPCMBuffer(
|
|
631
|
-
pcmFormat: currentAudioFile.processingFormat,
|
|
632
|
-
frameCapacity: AVAudioFrameCount(bufferSize)
|
|
633
|
-
)!
|
|
634
|
-
|
|
635
|
-
currentAudioFile.framePosition = startFrame
|
|
636
|
-
var currentFrame = startFrame
|
|
637
|
-
|
|
638
|
-
while currentFrame < endFrame {
|
|
639
|
-
let framesToRead = min(
|
|
640
|
-
AVAudioFrameCount(bufferSize),
|
|
641
|
-
AVAudioFrameCount(endFrame - currentFrame)
|
|
642
|
-
)
|
|
643
|
-
|
|
644
|
-
try currentAudioFile.read(into: buffer, frameCount: framesToRead)
|
|
645
|
-
try outputFile.write(from: buffer)
|
|
646
|
-
|
|
647
|
-
currentFrame += Int64(framesToRead)
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
// Get file size
|
|
651
|
-
let attributes = try FileManager.default.attributesOfItem(atPath: outputURL.path)
|
|
652
|
-
let fileSize = attributes[.size] as! Int64
|
|
653
|
-
|
|
654
|
-
// After successful trim, update the class property
|
|
655
|
-
audioFile = try AVAudioFile(forReading: outputURL)
|
|
656
|
-
|
|
657
|
-
// After successful trim, create the result
|
|
658
|
-
let trimmedDuration = (endTimeMs - startTimeMs) / 1000.0 // Convert to seconds
|
|
659
|
-
let result = TrimResult(
|
|
660
|
-
uri: outputURL.absoluteString,
|
|
661
|
-
duration: trimmedDuration, // Use actual trimmed duration
|
|
662
|
-
size: fileSize
|
|
663
|
-
)
|
|
664
|
-
|
|
665
|
-
return result
|
|
666
|
-
} catch {
|
|
667
|
-
Logger.debug("Error trimming audio: \(error)")
|
|
668
|
-
return nil
|
|
669
|
-
}
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
private func createOutputSettings(
|
|
673
|
-
from options: [String: Any]?,
|
|
674
|
-
originalFormat: AVAudioFormat
|
|
675
|
-
) -> [String: Any] {
|
|
676
|
-
var settings: [String: Any] = [:]
|
|
677
|
-
|
|
678
|
-
// Use original format settings as defaults
|
|
679
|
-
settings[AVFormatIDKey] = kAudioFormatLinearPCM
|
|
680
|
-
settings[AVSampleRateKey] = options?["sampleRate"] as? Double ?? originalFormat.sampleRate
|
|
681
|
-
settings[AVNumberOfChannelsKey] = options?["channels"] as? Int ?? originalFormat.channelCount
|
|
682
|
-
settings[AVLinearPCMBitDepthKey] = options?["bitDepth"] as? Int ?? 16
|
|
683
|
-
settings[AVLinearPCMIsFloatKey] = false
|
|
684
|
-
settings[AVLinearPCMIsBigEndianKey] = false
|
|
685
|
-
settings[AVLinearPCMIsNonInterleaved] = false
|
|
686
|
-
|
|
687
|
-
return settings
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
/// Extracts a preview of the audio data with consistent time range support
|
|
691
|
-
/// - Parameters:
|
|
692
|
-
/// - numberOfPoints: The number of points to extract
|
|
693
|
-
/// - startTimeMs: Optional start time in milliseconds
|
|
694
|
-
/// - endTimeMs: Optional end time in milliseconds
|
|
695
|
-
/// - featureOptions: The features to extract
|
|
696
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features
|
|
697
|
-
public func extractPreview(
|
|
698
|
-
numberOfPoints: Int,
|
|
699
|
-
startTimeMs: Double? = nil,
|
|
700
|
-
endTimeMs: Double? = nil,
|
|
701
|
-
featureOptions: [String: Bool]
|
|
702
|
-
) -> AudioAnalysisData? {
|
|
703
|
-
guard let audioFile = audioFile else {
|
|
704
|
-
reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
|
|
705
|
-
return nil
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
let sampleRate = Float(audioFile.fileFormat.sampleRate)
|
|
709
|
-
let totalDurationMs = Double(audioFile.length) / Double(sampleRate) * 1000
|
|
710
|
-
|
|
711
|
-
// Calculate effective time range
|
|
712
|
-
let effectiveStartMs = startTimeMs ?? 0.0
|
|
713
|
-
let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
|
|
714
|
-
let durationMs = effectiveEndMs - effectiveStartMs // This is the actual duration we want to use
|
|
715
|
-
|
|
716
|
-
// Convert time to frames with proper offset
|
|
717
|
-
let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
|
|
718
|
-
let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
|
|
719
|
-
let samplesInRange = Int(endFrame - startFrame)
|
|
720
|
-
|
|
721
|
-
guard samplesInRange > 0 else {
|
|
722
|
-
reject("INVALID_RANGE", "Invalid sample range: contains no samples")
|
|
723
|
-
return nil
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
// Calculate exact samples per point to get the requested number of points
|
|
727
|
-
let samplesPerPoint = samplesInRange / numberOfPoints
|
|
728
|
-
var dataPoints = [DataPoint]()
|
|
729
|
-
dataPoints.reserveCapacity(numberOfPoints)
|
|
730
|
-
|
|
731
|
-
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
732
|
-
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
733
|
-
|
|
734
|
-
let bytesPerSample = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16 / 8
|
|
735
|
-
|
|
736
|
-
for i in 0..<numberOfPoints {
|
|
737
|
-
let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
|
|
738
|
-
let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
|
|
739
|
-
let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
|
|
740
|
-
|
|
741
|
-
// Calculate byte positions
|
|
742
|
-
let startPosition = Int(pointStartFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
|
|
743
|
-
let endPosition = Int(pointEndFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
|
|
744
|
-
let segmentStartTime = Float(pointStartFrame) / sampleRate
|
|
745
|
-
let segmentEndTime = Float(pointEndFrame) / sampleRate
|
|
746
|
-
|
|
747
|
-
do {
|
|
748
|
-
audioFile.framePosition = pointStartFrame
|
|
749
|
-
let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
|
|
750
|
-
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
751
|
-
|
|
752
|
-
guard let floatData = buffer.floatChannelData else { continue }
|
|
753
|
-
|
|
754
|
-
var sumSquares: Float = 0
|
|
755
|
-
var zeroCrossings = 0
|
|
756
|
-
var prevValue: Float = 0
|
|
757
|
-
var localMinAmplitude: Float = .greatestFiniteMagnitude
|
|
758
|
-
var localMaxAmplitude: Float = -.greatestFiniteMagnitude
|
|
759
|
-
|
|
760
|
-
// Process samples for this point
|
|
761
|
-
for frame in 0..<Int(framesToRead) {
|
|
762
|
-
let value = floatData[0][frame]
|
|
763
|
-
sumSquares += value * value
|
|
764
|
-
if frame > 0 && value * prevValue < 0 {
|
|
765
|
-
zeroCrossings += 1
|
|
766
|
-
}
|
|
767
|
-
prevValue = value
|
|
768
|
-
|
|
769
|
-
let absValue = abs(value)
|
|
770
|
-
localMinAmplitude = min(localMinAmplitude, absValue)
|
|
771
|
-
localMaxAmplitude = max(localMaxAmplitude, absValue)
|
|
772
|
-
}
|
|
773
|
-
|
|
774
|
-
let features = computeFeatures(segmentData: Array(UnsafeBufferPointer(start: floatData[0], count: Int(framesToRead))),
|
|
775
|
-
sampleRate: sampleRate,
|
|
776
|
-
sumSquares: sumSquares,
|
|
777
|
-
zeroCrossings: zeroCrossings,
|
|
778
|
-
segmentLength: Int(framesToRead),
|
|
779
|
-
featureOptions: featureOptions)
|
|
780
|
-
|
|
781
|
-
let rms = features.rms
|
|
782
|
-
let silent = rms < 0.01
|
|
783
|
-
let dB = Float(20 * log10(Double(rms)))
|
|
784
|
-
|
|
785
|
-
let dataPoint = DataPoint(
|
|
786
|
-
id: Int(uniqueIdCounter),
|
|
787
|
-
amplitude: localMaxAmplitude,
|
|
788
|
-
rms: rms,
|
|
789
|
-
dB: dB,
|
|
790
|
-
silent: silent,
|
|
791
|
-
features: features,
|
|
792
|
-
speech: SpeechFeatures(isActive: !silent),
|
|
793
|
-
startTime: segmentStartTime,
|
|
794
|
-
endTime: segmentEndTime,
|
|
795
|
-
startPosition: startPosition,
|
|
796
|
-
endPosition: endPosition,
|
|
797
|
-
samples: Int(framesToRead)
|
|
798
|
-
)
|
|
799
|
-
dataPoints.append(dataPoint)
|
|
800
|
-
uniqueIdCounter += 1
|
|
801
|
-
|
|
802
|
-
minAmplitude = min(minAmplitude, localMinAmplitude)
|
|
803
|
-
maxAmplitude = max(maxAmplitude, localMaxAmplitude)
|
|
804
|
-
} catch {
|
|
805
|
-
reject("AUDIO_READ_ERROR", "Error reading audio data: \(error.localizedDescription)")
|
|
806
|
-
return nil
|
|
807
|
-
}
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
let startTime = CACurrentMediaTime() // Start timing
|
|
811
|
-
|
|
812
|
-
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
813
|
-
let numberOfChannels = Int(audioFile.processingFormat.channelCount)
|
|
814
|
-
|
|
815
|
-
NSLog("""
|
|
816
|
-
[AudioProcessor] Starting preview extraction:
|
|
817
|
-
- numberOfPoints: \(numberOfPoints)
|
|
818
|
-
- startTimeMs: \(String(describing: startTimeMs))
|
|
819
|
-
- endTimeMs: \(String(describing: endTimeMs))
|
|
820
|
-
- durationMs: \(durationMs)
|
|
821
|
-
- sampleRate: \(sampleRate)
|
|
822
|
-
- bitDepth: \(bitDepth)
|
|
823
|
-
- channels: \(numberOfChannels)
|
|
824
|
-
- samplesInRange: \(samplesInRange)
|
|
825
|
-
- samplesPerPoint: \(samplesPerPoint)
|
|
826
|
-
""")
|
|
827
|
-
|
|
828
|
-
let endTime = CACurrentMediaTime()
|
|
829
|
-
let extractionTimeMs = Float((endTime - startTime) * 1000)
|
|
830
|
-
|
|
831
|
-
NSLog("""
|
|
832
|
-
[AudioProcessor] Preview extraction completed:
|
|
833
|
-
- dataPoints generated: \(dataPoints.count)
|
|
834
|
-
- extractionTimeMs: \(String(format: "%.2f", extractionTimeMs))ms
|
|
835
|
-
- amplitudeRange: (min: \(String(format: "%.6f", minAmplitude)), max: \(String(format: "%.6f", maxAmplitude)))
|
|
836
|
-
""")
|
|
837
|
-
|
|
838
|
-
return AudioAnalysisData(
|
|
839
|
-
segmentDurationMs: 100, // Default 100ms
|
|
840
|
-
durationMs: Int(durationMs), // Use actual duration of trimmed section
|
|
841
|
-
bitDepth: bitDepth,
|
|
842
|
-
numberOfChannels: numberOfChannels,
|
|
843
|
-
sampleRate: Int(sampleRate),
|
|
844
|
-
samples: samplesInRange,
|
|
845
|
-
dataPoints: dataPoints,
|
|
846
|
-
amplitudeRange: AudioAnalysisData.AmplitudeRange(
|
|
847
|
-
min: minAmplitude,
|
|
848
|
-
max: maxAmplitude
|
|
849
|
-
),
|
|
850
|
-
rmsRange: AudioAnalysisData.AmplitudeRange(
|
|
851
|
-
min: 0,
|
|
852
|
-
max: 1
|
|
853
|
-
),
|
|
854
|
-
speechAnalysis: nil,
|
|
855
|
-
extractionTimeMs: extractionTimeMs
|
|
856
|
-
)
|
|
857
|
-
}
|
|
858
|
-
}
|