@siteed/expo-audio-stream 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -222
- package/build/index.d.ts +11 -15
- package/build/index.js +44 -14
- package/package.json +49 -110
- package/src/index.ts +18 -32
- package/CHANGELOG.md +0 -206
- package/android/build.gradle +0 -105
- package/android/src/main/AndroidManifest.xml +0 -27
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -166
- package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
- package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -131
- package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -103
- package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -435
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -2235
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -1437
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -152
- package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +0 -1099
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -21
- package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -739
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
- package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
- package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -70
- package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -59
- package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -205
- package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
- package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
- package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
- package/android/src/main/res/drawable/ic_microphone.xml +0 -13
- package/android/src/main/res/drawable/ic_pause.xml +0 -10
- package/android/src/main/res/drawable/ic_play.xml +0 -10
- package/android/src/main/res/drawable/ic_stop.xml +0 -10
- package/android/src/main/res/layout/notification_recording.xml +0 -37
- package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +0 -56
- package/app.plugin.js +0 -1
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -179
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -68
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +0 -203
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
- package/build/AudioAnalysis/extractAudioData.d.ts +0 -3
- package/build/AudioAnalysis/extractAudioData.d.ts.map +0 -1
- package/build/AudioAnalysis/extractAudioData.js +0 -5
- package/build/AudioAnalysis/extractAudioData.js.map +0 -1
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts +0 -14
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +0 -1
- package/build/AudioAnalysis/extractMelSpectrogram.js +0 -85
- package/build/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
- package/build/AudioAnalysis/extractPreview.d.ts +0 -11
- package/build/AudioAnalysis/extractPreview.d.ts.map +0 -1
- package/build/AudioAnalysis/extractPreview.js +0 -25
- package/build/AudioAnalysis/extractPreview.js.map +0 -1
- package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
- package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
- package/build/AudioAnalysis/extractWaveform.js +0 -11
- package/build/AudioAnalysis/extractWaveform.js.map +0 -1
- package/build/AudioRecorder.provider.d.ts +0 -11
- package/build/AudioRecorder.provider.d.ts.map +0 -1
- package/build/AudioRecorder.provider.js +0 -37
- package/build/AudioRecorder.provider.js.map +0 -1
- package/build/ExpoAudioStream.native.d.ts +0 -3
- package/build/ExpoAudioStream.native.d.ts.map +0 -1
- package/build/ExpoAudioStream.native.js +0 -6
- package/build/ExpoAudioStream.native.js.map +0 -1
- package/build/ExpoAudioStream.types.d.ts +0 -532
- package/build/ExpoAudioStream.types.d.ts.map +0 -1
- package/build/ExpoAudioStream.types.js +0 -2
- package/build/ExpoAudioStream.types.js.map +0 -1
- package/build/ExpoAudioStream.web.d.ts +0 -59
- package/build/ExpoAudioStream.web.d.ts.map +0 -1
- package/build/ExpoAudioStream.web.js +0 -285
- package/build/ExpoAudioStream.web.js.map +0 -1
- package/build/ExpoAudioStreamModule.d.ts +0 -3
- package/build/ExpoAudioStreamModule.d.ts.map +0 -1
- package/build/ExpoAudioStreamModule.js +0 -693
- package/build/ExpoAudioStreamModule.js.map +0 -1
- package/build/WebRecorder.web.d.ts +0 -119
- package/build/WebRecorder.web.d.ts.map +0 -1
- package/build/WebRecorder.web.js +0 -436
- package/build/WebRecorder.web.js.map +0 -1
- package/build/constants.d.ts +0 -11
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -14
- package/build/constants.js.map +0 -1
- package/build/events.d.ts +0 -26
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -21
- package/build/events.js.map +0 -1
- package/build/index.d.ts.map +0 -1
- package/build/index.js.map +0 -1
- package/build/trimAudio.d.ts +0 -25
- package/build/trimAudio.d.ts.map +0 -1
- package/build/trimAudio.js +0 -67
- package/build/trimAudio.js.map +0 -1
- package/build/useAudioRecorder.d.ts +0 -21
- package/build/useAudioRecorder.d.ts.map +0 -1
- package/build/useAudioRecorder.js +0 -427
- package/build/useAudioRecorder.js.map +0 -1
- package/build/utils/BlobFix.d.ts +0 -9
- package/build/utils/BlobFix.d.ts.map +0 -1
- package/build/utils/BlobFix.js +0 -498
- package/build/utils/BlobFix.js.map +0 -1
- package/build/utils/audioProcessing.d.ts +0 -24
- package/build/utils/audioProcessing.d.ts.map +0 -1
- package/build/utils/audioProcessing.js +0 -133
- package/build/utils/audioProcessing.js.map +0 -1
- package/build/utils/concatenateBuffers.d.ts +0 -8
- package/build/utils/concatenateBuffers.d.ts.map +0 -1
- package/build/utils/concatenateBuffers.js +0 -21
- package/build/utils/concatenateBuffers.js.map +0 -1
- package/build/utils/convertPCMToFloat32.d.ts +0 -13
- package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
- package/build/utils/convertPCMToFloat32.js +0 -120
- package/build/utils/convertPCMToFloat32.js.map +0 -1
- package/build/utils/encodingToBitDepth.d.ts +0 -5
- package/build/utils/encodingToBitDepth.d.ts.map +0 -1
- package/build/utils/encodingToBitDepth.js +0 -13
- package/build/utils/encodingToBitDepth.js.map +0 -1
- package/build/utils/getWavFileInfo.d.ts +0 -26
- package/build/utils/getWavFileInfo.d.ts.map +0 -1
- package/build/utils/getWavFileInfo.js +0 -92
- package/build/utils/getWavFileInfo.js.map +0 -1
- package/build/utils/writeWavHeader.d.ts +0 -49
- package/build/utils/writeWavHeader.d.ts.map +0 -1
- package/build/utils/writeWavHeader.js +0 -91
- package/build/utils/writeWavHeader.js.map +0 -1
- package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
- package/build/workers/InlineFeaturesExtractor.web.js +0 -828
- package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
- package/build/workers/inlineAudioWebWorker.web.js +0 -157
- package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
- package/expo-module.config.json +0 -9
- package/ios/AudioAnalysisData.swift +0 -74
- package/ios/AudioNotificationManager.swift +0 -135
- package/ios/AudioProcessingHelpers.swift +0 -743
- package/ios/AudioProcessor.swift +0 -1313
- package/ios/AudioStreamError.swift +0 -7
- package/ios/AudioStreamManager.swift +0 -1708
- package/ios/AudioStreamManagerDelegate.swift +0 -16
- package/ios/DataPoint.swift +0 -54
- package/ios/DecodingConfig.swift +0 -47
- package/ios/ExpoAudioStream.podspec +0 -27
- package/ios/ExpoAudioStreamModule.swift +0 -805
- package/ios/FFT.swift +0 -62
- package/ios/Features.swift +0 -95
- package/ios/Logger.swift +0 -7
- package/ios/NotificationExtension.swift +0 -15
- package/ios/RecordingResult.swift +0 -22
- package/ios/RecordingSettings.swift +0 -265
- package/ios/WaveformExtractor.swift +0 -105
- package/plugin/build/index.d.ts +0 -21
- package/plugin/build/index.js +0 -191
- package/plugin/src/index.ts +0 -278
- package/plugin/tsconfig.json +0 -10
- package/plugin/tsconfig.tsbuildinfo +0 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -202
- package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -333
- package/src/AudioAnalysis/extractAudioData.ts +0 -6
- package/src/AudioAnalysis/extractMelSpectrogram.ts +0 -144
- package/src/AudioAnalysis/extractPreview.ts +0 -34
- package/src/AudioAnalysis/extractWaveform.ts +0 -22
- package/src/AudioRecorder.provider.tsx +0 -54
- package/src/ExpoAudioStream.native.ts +0 -6
- package/src/ExpoAudioStream.types.ts +0 -641
- package/src/ExpoAudioStream.web.ts +0 -359
- package/src/ExpoAudioStreamModule.ts +0 -967
- package/src/WebRecorder.web.ts +0 -580
- package/src/constants.ts +0 -18
- package/src/events.ts +0 -60
- package/src/trimAudio.ts +0 -90
- package/src/useAudioRecorder.tsx +0 -620
- package/src/utils/BlobFix.ts +0 -559
- package/src/utils/audioProcessing.ts +0 -205
- package/src/utils/concatenateBuffers.ts +0 -24
- package/src/utils/convertPCMToFloat32.ts +0 -170
- package/src/utils/encodingToBitDepth.ts +0 -18
- package/src/utils/getWavFileInfo.ts +0 -132
- package/src/utils/writeWavHeader.ts +0 -114
- package/src/workers/InlineFeaturesExtractor.web.tsx +0 -827
- package/src/workers/inlineAudioWebWorker.web.tsx +0 -156
package/ios/AudioProcessor.swift
DELETED
|
@@ -1,1313 +0,0 @@
|
|
|
1
|
-
// packages/expo-audio-stream/ios/AudioProcessor.swift
|
|
2
|
-
|
|
3
|
-
import Foundation
|
|
4
|
-
import Accelerate
|
|
5
|
-
import AVFoundation
|
|
6
|
-
import QuartzCore
|
|
7
|
-
|
|
8
|
-
public struct TrimResult {
|
|
9
|
-
let uri: String
|
|
10
|
-
let filename: String
|
|
11
|
-
let durationMs: Double
|
|
12
|
-
let size: Int64
|
|
13
|
-
let sampleRate: Int
|
|
14
|
-
let channels: Int
|
|
15
|
-
let bitDepth: Int
|
|
16
|
-
let mimeType: String
|
|
17
|
-
let requestedFormat: String
|
|
18
|
-
let actualFormat: String
|
|
19
|
-
let compression: [String: Any]?
|
|
20
|
-
|
|
21
|
-
init(
|
|
22
|
-
uri: String,
|
|
23
|
-
filename: String,
|
|
24
|
-
durationMs: Double,
|
|
25
|
-
size: Int64,
|
|
26
|
-
sampleRate: Int,
|
|
27
|
-
channels: Int,
|
|
28
|
-
bitDepth: Int,
|
|
29
|
-
mimeType: String,
|
|
30
|
-
requestedFormat: String,
|
|
31
|
-
actualFormat: String,
|
|
32
|
-
compression: [String: Any]?
|
|
33
|
-
) {
|
|
34
|
-
self.uri = uri
|
|
35
|
-
self.filename = filename
|
|
36
|
-
self.durationMs = durationMs
|
|
37
|
-
self.size = size
|
|
38
|
-
self.sampleRate = sampleRate
|
|
39
|
-
self.channels = channels
|
|
40
|
-
self.bitDepth = bitDepth
|
|
41
|
-
self.mimeType = mimeType
|
|
42
|
-
self.requestedFormat = requestedFormat
|
|
43
|
-
self.actualFormat = actualFormat
|
|
44
|
-
self.compression = compression
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
func toDictionary() -> [String: Any] {
|
|
48
|
-
var dict: [String: Any] = [
|
|
49
|
-
"uri": uri,
|
|
50
|
-
"filename": filename,
|
|
51
|
-
"durationMs": durationMs,
|
|
52
|
-
"size": size,
|
|
53
|
-
"sampleRate": sampleRate,
|
|
54
|
-
"channels": channels,
|
|
55
|
-
"bitDepth": bitDepth,
|
|
56
|
-
"mimeType": mimeType,
|
|
57
|
-
"requestedFormat": requestedFormat,
|
|
58
|
-
"actualFormat": actualFormat
|
|
59
|
-
]
|
|
60
|
-
if let compression = compression {
|
|
61
|
-
dict["compression"] = compression
|
|
62
|
-
}
|
|
63
|
-
return dict
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
public class AudioProcessor {
|
|
68
|
-
public private(set) var audioFile: AVAudioFile?
|
|
69
|
-
private var result: (Any) -> Void
|
|
70
|
-
private var reject: (String, String) -> Void
|
|
71
|
-
private var waveformData = Array<Float>()
|
|
72
|
-
private var progress: Float = 0.0
|
|
73
|
-
private var channelCount: Int = 1
|
|
74
|
-
private var currentProgress: Float = 0.0
|
|
75
|
-
private let extractionQueue = DispatchQueue(label: "AudioProcessor", attributes: .concurrent)
|
|
76
|
-
private var _abortExtraction: Bool = false
|
|
77
|
-
|
|
78
|
-
// Add a counter for unique IDs
|
|
79
|
-
private var uniqueIdCounter = 0
|
|
80
|
-
|
|
81
|
-
public var abortExtraction: Bool {
|
|
82
|
-
get { _abortExtraction }
|
|
83
|
-
set { _abortExtraction = newValue }
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// Initializer for file-based processing
|
|
87
|
-
public init(url: URL, resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) throws {
|
|
88
|
-
self.audioFile = try AVAudioFile(forReading: url)
|
|
89
|
-
self.result = resolve
|
|
90
|
-
self.reject = reject
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Initializer for buffer-based processing
|
|
94
|
-
public init(resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) {
|
|
95
|
-
self.result = resolve
|
|
96
|
-
self.reject = reject
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
deinit {
|
|
101
|
-
audioFile = nil
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/// Error types for AudioProcessor
|
|
105
|
-
public enum AudioProcessorError: Error {
|
|
106
|
-
case fileInitializationFailed(String)
|
|
107
|
-
case bufferCreationFailed
|
|
108
|
-
case audioReadError(String)
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
/// Extracts and processes audio data from the audio file.
|
|
113
|
-
/// - Parameters:
|
|
114
|
-
/// - numberOfSamples: The number of samples to extract (for waveform).
|
|
115
|
-
/// - offset: The offset to start reading from (in samples).
|
|
116
|
-
/// - length: The length of the audio to read (in samples).
|
|
117
|
-
/// - segmentDurationMs: The duration of each segment in milliseconds.
|
|
118
|
-
/// - featureOptions: The features to extract.
|
|
119
|
-
/// - bitDepth: The bit depth of the audio data.
|
|
120
|
-
/// - numberOfChannels: The number of channels in the audio data.
|
|
121
|
-
/// - position: The position to start reading from (in bytes).
|
|
122
|
-
/// - byteLength: The length of the audio to read (in bytes).
|
|
123
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features.
|
|
124
|
-
public func processAudioData(
|
|
125
|
-
numberOfSamples: Int?,
|
|
126
|
-
offset: Int? = 0,
|
|
127
|
-
length: UInt? = nil,
|
|
128
|
-
segmentDurationMs: Int = 100, // Default 100ms
|
|
129
|
-
featureOptions: [String: Bool],
|
|
130
|
-
bitDepth: Int,
|
|
131
|
-
numberOfChannels: Int,
|
|
132
|
-
position: Int? = nil,
|
|
133
|
-
byteLength: Int? = nil
|
|
134
|
-
) -> AudioAnalysisData? {
|
|
135
|
-
guard let audioFile = audioFile else {
|
|
136
|
-
reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
|
|
137
|
-
return nil
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
let totalFrameCount = AVAudioFrameCount(audioFile.length)
|
|
141
|
-
var framesPerBuffer: AVAudioFrameCount
|
|
142
|
-
let actualPointsPerSecond: Int
|
|
143
|
-
|
|
144
|
-
NSLog("""
|
|
145
|
-
[AudioProcessor] Starting audio processing:
|
|
146
|
-
- totalFrameCount: \(totalFrameCount)
|
|
147
|
-
- bitDepth: \(bitDepth)
|
|
148
|
-
- numberOfChannels: \(numberOfChannels)
|
|
149
|
-
- position: \(position ?? -1)
|
|
150
|
-
- byteLength: \(byteLength ?? -1)
|
|
151
|
-
- offset: \(offset ?? -1)
|
|
152
|
-
- length: \(length ?? 0)
|
|
153
|
-
""")
|
|
154
|
-
|
|
155
|
-
// Use position/byteLength if provided, otherwise fall back to offset/length
|
|
156
|
-
let effectiveOffset: Int64 = if let position = position {
|
|
157
|
-
Int64(position / (bitDepth / 8) / numberOfChannels)
|
|
158
|
-
} else {
|
|
159
|
-
Int64(offset ?? 0)
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
let effectiveLength: Int64 = if let byteLength = byteLength {
|
|
163
|
-
Int64(byteLength / (bitDepth / 8) / numberOfChannels)
|
|
164
|
-
} else if let length = length {
|
|
165
|
-
Int64(length)
|
|
166
|
-
} else {
|
|
167
|
-
Int64(totalFrameCount) - effectiveOffset
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
NSLog("""
|
|
171
|
-
[AudioProcessor] Calculated frame positions:
|
|
172
|
-
- effectiveOffset: \(effectiveOffset)
|
|
173
|
-
- effectiveLength: \(effectiveLength)
|
|
174
|
-
- expectedEndFrame: \(effectiveOffset + effectiveLength)
|
|
175
|
-
- totalFrameCount: \(totalFrameCount)
|
|
176
|
-
""")
|
|
177
|
-
|
|
178
|
-
// Validate frame boundaries
|
|
179
|
-
if effectiveOffset < 0 || effectiveOffset >= Int64(totalFrameCount) {
|
|
180
|
-
NSLog("[AudioProcessor] ERROR: Invalid offset value")
|
|
181
|
-
reject("INVALID_OFFSET", "Offset value (\(effectiveOffset)) is outside valid range [0, \(totalFrameCount)]")
|
|
182
|
-
return nil
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
if effectiveLength <= 0 {
|
|
186
|
-
NSLog("[AudioProcessor] ERROR: Invalid length value")
|
|
187
|
-
reject("INVALID_LENGTH", "Length value (\(effectiveLength)) must be positive")
|
|
188
|
-
return nil
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
if effectiveOffset + effectiveLength > Int64(totalFrameCount) {
|
|
192
|
-
NSLog("[AudioProcessor] ERROR: Requested range exceeds file length")
|
|
193
|
-
reject("INVALID_RANGE", "Requested range [\(effectiveOffset), \(effectiveOffset + effectiveLength)] exceeds file length \(totalFrameCount)")
|
|
194
|
-
return nil
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
var startFrame: AVAudioFramePosition = effectiveOffset
|
|
198
|
-
let endFrame: AVAudioFramePosition = effectiveOffset + effectiveLength
|
|
199
|
-
|
|
200
|
-
// Calculate frames per segment based on segment duration
|
|
201
|
-
let framesPerSegment = AVAudioFrameCount(Float(audioFile.fileFormat.sampleRate) * Float(segmentDurationMs) / 1000.0)
|
|
202
|
-
|
|
203
|
-
if let numberOfSamples = numberOfSamples {
|
|
204
|
-
framesPerBuffer = AVAudioFrameCount(max(1, effectiveLength / Int64(numberOfSamples)))
|
|
205
|
-
} else {
|
|
206
|
-
framesPerBuffer = framesPerSegment
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
|
|
210
|
-
reject("BUFFER_CREATION_FAILED", "Failed to create AVAudioPCMBuffer.")
|
|
211
|
-
return nil
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
channelCount = Int(audioFile.processingFormat.channelCount)
|
|
215
|
-
var data = Array(repeating: [Float](repeating: 0, count: Int(framesPerBuffer)), count: channelCount)
|
|
216
|
-
|
|
217
|
-
var channelData = [Float]()
|
|
218
|
-
while startFrame < endFrame {
|
|
219
|
-
let remainingFrames = endFrame - startFrame
|
|
220
|
-
let currentFramesPerBuffer = min(AVAudioFrameCount(framesPerBuffer), AVAudioFrameCount(remainingFrames))
|
|
221
|
-
|
|
222
|
-
if currentFramesPerBuffer <= 0 {
|
|
223
|
-
break
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if abortExtraction {
|
|
227
|
-
audioFile.framePosition = startFrame
|
|
228
|
-
abortExtraction = false
|
|
229
|
-
return nil
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
do {
|
|
233
|
-
audioFile.framePosition = startFrame
|
|
234
|
-
try audioFile.read(into: buffer, frameCount: currentFramesPerBuffer)
|
|
235
|
-
} catch {
|
|
236
|
-
reject("AUDIO_READ_ERROR", "Couldn't read into buffer: \(error.localizedDescription)")
|
|
237
|
-
return nil
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
//TODO: check if we need conversion based on bitDepth here
|
|
241
|
-
guard let floatData = buffer.floatChannelData else {
|
|
242
|
-
reject("BUFFER_DATA_ERROR", "Failed to retrieve float data from buffer.")
|
|
243
|
-
return nil
|
|
244
|
-
}
|
|
245
|
-
for frame in 0..<Int(buffer.frameLength) {
|
|
246
|
-
channelData.append(floatData[0][frame])
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
startFrame += AVAudioFramePosition(currentFramesPerBuffer)
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
NSLog("""
|
|
253
|
-
[AudioProcessor] Audio processing completed:
|
|
254
|
-
- processedFrames: \(endFrame - startFrame)
|
|
255
|
-
- framesPerBuffer: \(framesPerBuffer)
|
|
256
|
-
""")
|
|
257
|
-
|
|
258
|
-
return processChannelData(
|
|
259
|
-
channelData: channelData,
|
|
260
|
-
sampleRate: Float(audioFile.fileFormat.sampleRate),
|
|
261
|
-
segmentDurationMs: segmentDurationMs,
|
|
262
|
-
featureOptions: featureOptions,
|
|
263
|
-
bitDepth: bitDepth,
|
|
264
|
-
numberOfChannels: numberOfChannels
|
|
265
|
-
)
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
/// Processes audio data from a buffer.
|
|
269
|
-
/// - Parameters:
|
|
270
|
-
/// - data: The audio data buffer.
|
|
271
|
-
/// - sampleRate: The sample rate of the audio data.
|
|
272
|
-
/// - segmentDurationMs: The duration of each segment in milliseconds.
|
|
273
|
-
/// - featureOptions: The features to extract.
|
|
274
|
-
/// - bitDepth: The bit depth of the audio data.
|
|
275
|
-
/// - numberOfChannels: The number of channels in the audio data.
|
|
276
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features.
|
|
277
|
-
public func processAudioBuffer(
|
|
278
|
-
data: Data,
|
|
279
|
-
sampleRate: Float,
|
|
280
|
-
segmentDurationMs: Int,
|
|
281
|
-
featureOptions: [String: Bool],
|
|
282
|
-
bitDepth: Int,
|
|
283
|
-
numberOfChannels: Int
|
|
284
|
-
) -> AudioAnalysisData? {
|
|
285
|
-
guard !data.isEmpty else {
|
|
286
|
-
Logger.debug("Data is empty, rejecting")
|
|
287
|
-
reject("DATA_EMPTY", "The audio data is empty.")
|
|
288
|
-
return nil
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
// Convert Data to Float array based on bit depth
|
|
292
|
-
let floatData: [Float]
|
|
293
|
-
switch bitDepth {
|
|
294
|
-
case 16:
|
|
295
|
-
floatData = data.withUnsafeBytes { bufferPointer in
|
|
296
|
-
let int16Pointer = bufferPointer.bindMemory(to: Int16.self)
|
|
297
|
-
return int16Pointer.map { Float($0) / Float(Int16.max) }
|
|
298
|
-
}
|
|
299
|
-
case 32:
|
|
300
|
-
floatData = data.withUnsafeBytes { bufferPointer in
|
|
301
|
-
let int32Pointer = bufferPointer.bindMemory(to: Int32.self)
|
|
302
|
-
return int32Pointer.map { Float($0) / Float(Int32.max) }
|
|
303
|
-
}
|
|
304
|
-
default:
|
|
305
|
-
Logger.debug("Unsupported bit depth. Rejecting")
|
|
306
|
-
reject("UNSUPPORTED_BIT_DEPTH", "Unsupported bit depth: \(bitDepth)")
|
|
307
|
-
return nil
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
return processChannelData(
|
|
311
|
-
channelData: floatData,
|
|
312
|
-
sampleRate: sampleRate,
|
|
313
|
-
segmentDurationMs: segmentDurationMs,
|
|
314
|
-
featureOptions: featureOptions,
|
|
315
|
-
bitDepth: bitDepth,
|
|
316
|
-
numberOfChannels: numberOfChannels
|
|
317
|
-
)
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
/// Processes the given audio channel data to extract features.
|
|
321
|
-
/// - Parameters:
|
|
322
|
-
/// - channelData: The audio channel data to process.
|
|
323
|
-
/// - sampleRate: The sample rate of the audio data.
|
|
324
|
-
/// - segmentDurationMs: The duration of each segment in milliseconds.
|
|
325
|
-
/// - featureOptions: The features to extract.
|
|
326
|
-
/// - bitDepth: The bit depth of the audio data.
|
|
327
|
-
/// - numberOfChannels: The number of channels in the audio data.
|
|
328
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features.
|
|
329
|
-
private func processChannelData(
|
|
330
|
-
channelData: [Float],
|
|
331
|
-
sampleRate: Float,
|
|
332
|
-
segmentDurationMs: Int,
|
|
333
|
-
featureOptions: [String: Bool],
|
|
334
|
-
bitDepth: Int,
|
|
335
|
-
numberOfChannels: Int
|
|
336
|
-
) -> AudioAnalysisData? {
|
|
337
|
-
Logger.debug("Processing audio data with sample rate: \(sampleRate), segmentDurationMs: \(segmentDurationMs), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
|
|
338
|
-
|
|
339
|
-
let startTime = CACurrentMediaTime()
|
|
340
|
-
|
|
341
|
-
let length = channelData.count
|
|
342
|
-
// Calculate points per segment based on segment duration
|
|
343
|
-
let samplesPerSegment = Int(Float(segmentDurationMs) * sampleRate / 1000.0)
|
|
344
|
-
var dataPoints = [DataPoint]()
|
|
345
|
-
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
346
|
-
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
347
|
-
|
|
348
|
-
// Calculate bytes per sample
|
|
349
|
-
let bytesPerSample = bitDepth / 8
|
|
350
|
-
|
|
351
|
-
// Process data in segments
|
|
352
|
-
var i = 0
|
|
353
|
-
while i < length {
|
|
354
|
-
let segmentEnd = min(i + samplesPerSegment, length)
|
|
355
|
-
let segment = Array(channelData[i..<segmentEnd])
|
|
356
|
-
|
|
357
|
-
// Calculate byte positions and timing
|
|
358
|
-
let startPosition = i * bytesPerSample * numberOfChannels
|
|
359
|
-
let endPosition = segmentEnd * bytesPerSample * numberOfChannels
|
|
360
|
-
let startTime = Float(i) / sampleRate
|
|
361
|
-
let endTime = Float(segmentEnd) / sampleRate
|
|
362
|
-
|
|
363
|
-
// Process segment and create data point
|
|
364
|
-
let dataPoint = processSegment(
|
|
365
|
-
segment,
|
|
366
|
-
sampleRate: sampleRate,
|
|
367
|
-
featureOptions: featureOptions,
|
|
368
|
-
startTime: startTime,
|
|
369
|
-
endTime: endTime,
|
|
370
|
-
startPosition: startPosition,
|
|
371
|
-
endPosition: endPosition
|
|
372
|
-
)
|
|
373
|
-
dataPoints.append(dataPoint)
|
|
374
|
-
|
|
375
|
-
// Update min/max amplitudes
|
|
376
|
-
minAmplitude = min(minAmplitude, segment.min() ?? minAmplitude)
|
|
377
|
-
maxAmplitude = max(maxAmplitude, segment.max() ?? maxAmplitude)
|
|
378
|
-
|
|
379
|
-
i += samplesPerSegment
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
let endTime = CACurrentMediaTime()
|
|
383
|
-
let processingTimeMs = Float((endTime - startTime) * 1000)
|
|
384
|
-
|
|
385
|
-
Logger.debug("Processed \(dataPoints.count) data points in \(processingTimeMs) ms")
|
|
386
|
-
|
|
387
|
-
return AudioAnalysisData(
|
|
388
|
-
segmentDurationMs: segmentDurationMs,
|
|
389
|
-
durationMs: Int(Float(length) / sampleRate * 1000),
|
|
390
|
-
bitDepth: bitDepth,
|
|
391
|
-
numberOfChannels: numberOfChannels,
|
|
392
|
-
sampleRate: Int(sampleRate),
|
|
393
|
-
samples: length,
|
|
394
|
-
dataPoints: dataPoints,
|
|
395
|
-
amplitudeRange: AudioAnalysisData.AmplitudeRange(
|
|
396
|
-
min: minAmplitude,
|
|
397
|
-
max: maxAmplitude
|
|
398
|
-
),
|
|
399
|
-
rmsRange: AudioAnalysisData.AmplitudeRange(
|
|
400
|
-
min: 0,
|
|
401
|
-
max: 1
|
|
402
|
-
),
|
|
403
|
-
speechAnalysis: nil,
|
|
404
|
-
extractionTimeMs: processingTimeMs
|
|
405
|
-
)
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
private func processSegment(
|
|
409
|
-
_ segment: [Float],
|
|
410
|
-
sampleRate: Float,
|
|
411
|
-
featureOptions: [String: Bool],
|
|
412
|
-
startTime: Float,
|
|
413
|
-
endTime: Float,
|
|
414
|
-
startPosition: Int,
|
|
415
|
-
endPosition: Int
|
|
416
|
-
) -> DataPoint {
|
|
417
|
-
let sumSquares: Float = segment.reduce(0) { $0 + $1 * $1 }
|
|
418
|
-
let rms = sqrt(sumSquares / Float(segment.count))
|
|
419
|
-
let silent = rms < 0.01
|
|
420
|
-
let dB = Float(20 * log10(Double(rms)))
|
|
421
|
-
|
|
422
|
-
let features = computeFeatures(
|
|
423
|
-
segmentData: segment,
|
|
424
|
-
sampleRate: sampleRate,
|
|
425
|
-
sumSquares: sumSquares,
|
|
426
|
-
zeroCrossings: 0,
|
|
427
|
-
segmentLength: segment.count,
|
|
428
|
-
featureOptions: featureOptions
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
let dataPoint = DataPoint(
|
|
433
|
-
id: Int(uniqueIdCounter),
|
|
434
|
-
amplitude: segment.max() ?? 0,
|
|
435
|
-
rms: rms,
|
|
436
|
-
dB: dB,
|
|
437
|
-
silent: silent,
|
|
438
|
-
features: features,
|
|
439
|
-
speech: SpeechFeatures(isActive: !silent),
|
|
440
|
-
startTime: startTime,
|
|
441
|
-
endTime: endTime,
|
|
442
|
-
startPosition: startPosition,
|
|
443
|
-
endPosition: endPosition,
|
|
444
|
-
samples: segment.count
|
|
445
|
-
)
|
|
446
|
-
uniqueIdCounter += 1
|
|
447
|
-
return dataPoint
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
private func computeFeatures(
|
|
451
|
-
segmentData: [Float],
|
|
452
|
-
sampleRate: Float,
|
|
453
|
-
sumSquares: Float,
|
|
454
|
-
zeroCrossings: Int,
|
|
455
|
-
segmentLength: Int,
|
|
456
|
-
featureOptions: [String: Bool]
|
|
457
|
-
) -> Features {
|
|
458
|
-
let rms = sqrt(sumSquares / Float(segmentLength))
|
|
459
|
-
let energy = featureOptions["energy"] == true ? sumSquares : 0
|
|
460
|
-
let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
|
|
461
|
-
let mfcc = featureOptions["mfcc"] == true ? extractMFCC(from: segmentData, sampleRate: sampleRate) : []
|
|
462
|
-
let spectralCentroid = featureOptions["spectralCentroid"] == true ? extractSpectralCentroid(from: segmentData, sampleRate: sampleRate) : 0
|
|
463
|
-
let spectralFlatness = featureOptions["spectralFlatness"] == true ? extractSpectralFlatness(from: segmentData) : 0
|
|
464
|
-
let spectralRollOff = featureOptions["spectralRollOff"] == true ? extractSpectralRollOff(from: segmentData, sampleRate: sampleRate) : 0
|
|
465
|
-
let spectralBandwidth = featureOptions["spectralBandwidth"] == true ? extractSpectralBandwidth(from: segmentData, sampleRate: sampleRate) : 0
|
|
466
|
-
let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
|
|
467
|
-
let tempo = featureOptions["tempo"] == true ? extractTempo(from: segmentData, sampleRate: sampleRate) : 0
|
|
468
|
-
let hnr = featureOptions["hnr"] == true ? extractHNR(from: segmentData) : 0
|
|
469
|
-
let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
|
|
470
|
-
let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
|
|
471
|
-
let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
|
|
472
|
-
let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : 0
|
|
473
|
-
|
|
474
|
-
// Calculate min and max amplitudes from the segment data
|
|
475
|
-
let minAmplitude = segmentData.map(abs).min() ?? 0
|
|
476
|
-
let maxAmplitude = segmentData.map(abs).max() ?? 0
|
|
477
|
-
|
|
478
|
-
let crc32Value = featureOptions["crc32"] == true ?
|
|
479
|
-
calculateCRC32(from: segmentData, count: segmentData.count) : nil
|
|
480
|
-
|
|
481
|
-
return Features(
|
|
482
|
-
energy: energy,
|
|
483
|
-
mfcc: mfcc,
|
|
484
|
-
rms: rms,
|
|
485
|
-
minAmplitude: minAmplitude,
|
|
486
|
-
maxAmplitude: maxAmplitude,
|
|
487
|
-
zcr: zcr,
|
|
488
|
-
spectralCentroid: spectralCentroid,
|
|
489
|
-
spectralFlatness: spectralFlatness,
|
|
490
|
-
spectralRollOff: spectralRollOff,
|
|
491
|
-
spectralBandwidth: spectralBandwidth,
|
|
492
|
-
chromagram: chromagram,
|
|
493
|
-
tempo: tempo,
|
|
494
|
-
hnr: hnr,
|
|
495
|
-
melSpectrogram: melSpectrogram,
|
|
496
|
-
spectralContrast: spectralContrast,
|
|
497
|
-
tonnetz: tonnetz,
|
|
498
|
-
pitch: pitch,
|
|
499
|
-
crc32: crc32Value
|
|
500
|
-
)
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
/// Processes audio data with time range support
|
|
504
|
-
public func processAudioData(
|
|
505
|
-
startTimeMs: Double? = nil,
|
|
506
|
-
endTimeMs: Double? = nil,
|
|
507
|
-
segmentDurationMs: Int = 100, // Default 100ms
|
|
508
|
-
featureOptions: [String: Bool]
|
|
509
|
-
) -> AudioAnalysisData? {
|
|
510
|
-
guard let audioFile = audioFile else {
|
|
511
|
-
Logger.debug("No audio file loaded")
|
|
512
|
-
return nil
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
let startTime = CACurrentMediaTime()
|
|
516
|
-
let sampleRate = Float(audioFile.fileFormat.sampleRate)
|
|
517
|
-
let totalFrameCount = AVAudioFrameCount(audioFile.length)
|
|
518
|
-
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
519
|
-
let numberOfChannels = Int(audioFile.fileFormat.channelCount)
|
|
520
|
-
|
|
521
|
-
// Convert time to frames
|
|
522
|
-
let startFrame = startTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? 0
|
|
523
|
-
let endFrame = endTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? audioFile.length
|
|
524
|
-
|
|
525
|
-
// Validate frame range
|
|
526
|
-
guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
|
|
527
|
-
Logger.debug("Invalid time range")
|
|
528
|
-
return nil
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
// Calculate frames per buffer based on segment duration
|
|
532
|
-
let framesPerBuffer = AVAudioFrameCount(Float(sampleRate) * Float(segmentDurationMs) / 1000.0)
|
|
533
|
-
|
|
534
|
-
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
|
|
535
|
-
Logger.debug("Failed to create buffer")
|
|
536
|
-
return nil
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
var dataPoints: [DataPoint] = []
|
|
540
|
-
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
541
|
-
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
542
|
-
var currentId = 0
|
|
543
|
-
|
|
544
|
-
audioFile.framePosition = startFrame
|
|
545
|
-
var currentFrame = startFrame
|
|
546
|
-
|
|
547
|
-
while currentFrame < endFrame {
|
|
548
|
-
let framesToRead = min(framesPerBuffer, AVAudioFrameCount(endFrame - currentFrame))
|
|
549
|
-
|
|
550
|
-
do {
|
|
551
|
-
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
552
|
-
|
|
553
|
-
guard let channelData = buffer.floatChannelData else {
|
|
554
|
-
continue
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
// Process each channel's data
|
|
558
|
-
var summedData = [Float](repeating: 0, count: Int(framesToRead))
|
|
559
|
-
for channel in 0..<numberOfChannels {
|
|
560
|
-
let channelBuffer = UnsafeBufferPointer(start: channelData[channel], count: Int(framesToRead))
|
|
561
|
-
for (index, sample) in channelBuffer.enumerated() {
|
|
562
|
-
summedData[index] += sample
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
// Average across channels
|
|
567
|
-
for i in 0..<summedData.count {
|
|
568
|
-
summedData[i] /= Float(numberOfChannels)
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
// Calculate both peak amplitude and RMS
|
|
572
|
-
var localMax: Float = 0
|
|
573
|
-
var rms: Float = 0
|
|
574
|
-
vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
|
|
575
|
-
|
|
576
|
-
// Calculate RMS using vDSP
|
|
577
|
-
var meanSquare: Float = 0
|
|
578
|
-
vDSP_measqv(summedData, 1, &meanSquare, vDSP_Length(framesToRead))
|
|
579
|
-
rms = sqrt(meanSquare)
|
|
580
|
-
|
|
581
|
-
minAmplitude = min(minAmplitude, localMax)
|
|
582
|
-
maxAmplitude = max(maxAmplitude, localMax)
|
|
583
|
-
|
|
584
|
-
// Create data point
|
|
585
|
-
let startTime = Float(currentFrame) / Float(sampleRate)
|
|
586
|
-
let endTime = Float(currentFrame + Int64(framesToRead)) / Float(sampleRate)
|
|
587
|
-
|
|
588
|
-
let dataPoint = DataPoint(
|
|
589
|
-
id: currentId,
|
|
590
|
-
amplitude: localMax, // Always use peak amplitude
|
|
591
|
-
rms: rms, // Use calculated RMS value
|
|
592
|
-
dB: Float(20 * log10(Double(rms))), // Use RMS for dB calculation
|
|
593
|
-
silent: rms < 0.01, // Use RMS for silence detection
|
|
594
|
-
features: computeFeatures(
|
|
595
|
-
segmentData: Array(UnsafeBufferPointer(start: summedData, count: Int(framesToRead))),
|
|
596
|
-
sampleRate: sampleRate,
|
|
597
|
-
sumSquares: rms * rms,
|
|
598
|
-
zeroCrossings: 0,
|
|
599
|
-
segmentLength: Int(framesToRead),
|
|
600
|
-
featureOptions: featureOptions
|
|
601
|
-
),
|
|
602
|
-
speech: SpeechFeatures(isActive: rms >= 0.01),
|
|
603
|
-
startTime: startTime,
|
|
604
|
-
endTime: endTime,
|
|
605
|
-
startPosition: Int(currentFrame),
|
|
606
|
-
endPosition: Int(currentFrame + Int64(framesToRead)),
|
|
607
|
-
samples: Int(framesToRead)
|
|
608
|
-
)
|
|
609
|
-
|
|
610
|
-
dataPoints.append(dataPoint)
|
|
611
|
-
currentId += 1
|
|
612
|
-
} catch {
|
|
613
|
-
Logger.debug("Error reading audio data: \(error)")
|
|
614
|
-
return nil
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
currentFrame += Int64(framesToRead)
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
let endTime = CACurrentMediaTime()
|
|
621
|
-
let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
|
|
622
|
-
|
|
623
|
-
return AudioAnalysisData(
|
|
624
|
-
segmentDurationMs: segmentDurationMs,
|
|
625
|
-
durationMs: Int(Float(endFrame - startFrame) * 1000 / sampleRate),
|
|
626
|
-
bitDepth: bitDepth,
|
|
627
|
-
numberOfChannels: numberOfChannels,
|
|
628
|
-
sampleRate: Int(sampleRate),
|
|
629
|
-
samples: Int(endFrame - startFrame),
|
|
630
|
-
dataPoints: dataPoints,
|
|
631
|
-
amplitudeRange: AudioAnalysisData.AmplitudeRange(
|
|
632
|
-
min: minAmplitude,
|
|
633
|
-
max: maxAmplitude
|
|
634
|
-
),
|
|
635
|
-
rmsRange: AudioAnalysisData.AmplitudeRange(
|
|
636
|
-
min: 0,
|
|
637
|
-
max: 1
|
|
638
|
-
),
|
|
639
|
-
speechAnalysis: nil,
|
|
640
|
-
extractionTimeMs: extractionTime
|
|
641
|
-
)
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
/// Trims audio file to specified range
|
|
645
|
-
public func trimAudio(
|
|
646
|
-
mode: String,
|
|
647
|
-
startTimeMs: Double?,
|
|
648
|
-
endTimeMs: Double?,
|
|
649
|
-
ranges: [[String: Double]]?,
|
|
650
|
-
outputFileName: String?,
|
|
651
|
-
outputFormat: [String: Any]?,
|
|
652
|
-
decodingOptions: [String: Any]?,
|
|
653
|
-
progressCallback: ((Float, Int64, Int64) -> Void)? = nil
|
|
654
|
-
) -> TrimResult? {
|
|
655
|
-
// Log the input parameters
|
|
656
|
-
Logger.debug("Starting audio trim operation:")
|
|
657
|
-
Logger.debug("- Mode: \(mode)")
|
|
658
|
-
if let start = startTimeMs, let end = endTimeMs {
|
|
659
|
-
Logger.debug("- Time range: \(start)ms to \(end)ms")
|
|
660
|
-
}
|
|
661
|
-
if let ranges = ranges {
|
|
662
|
-
Logger.debug("- Ranges count: \(ranges.count)")
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
// Log output format details
|
|
666
|
-
if let format = outputFormat {
|
|
667
|
-
let formatType = format["format"] as? String ?? "unknown"
|
|
668
|
-
let bitrate = format["bitrate"] as? Int ?? 0
|
|
669
|
-
Logger.debug("- Output format: \(formatType), bitrate: \(bitrate)")
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
guard let audioFile = audioFile else { return nil }
|
|
673
|
-
|
|
674
|
-
let inputFormat = audioFile.processingFormat
|
|
675
|
-
let inputSampleRate = inputFormat.sampleRate
|
|
676
|
-
let inputChannels = Int(inputFormat.channelCount)
|
|
677
|
-
let totalDurationMs = Double(audioFile.length) / inputSampleRate * 1000
|
|
678
|
-
|
|
679
|
-
// Compute ranges to keep
|
|
680
|
-
let keepRanges = computeKeepRanges(
|
|
681
|
-
mode: mode,
|
|
682
|
-
startTimeMs: startTimeMs,
|
|
683
|
-
endTimeMs: endTimeMs,
|
|
684
|
-
ranges: ranges,
|
|
685
|
-
totalDurationMs: totalDurationMs
|
|
686
|
-
)
|
|
687
|
-
|
|
688
|
-
guard !keepRanges.isEmpty else { return nil }
|
|
689
|
-
|
|
690
|
-
// Output format setup
|
|
691
|
-
let requestedFormat = outputFormat?["format"] as? String ?? "wav"
|
|
692
|
-
let validFormats = ["wav", "aac", "opus"]
|
|
693
|
-
let formatStr = validFormats.contains(requestedFormat.lowercased()) ? requestedFormat.lowercased() : "aac"
|
|
694
|
-
|
|
695
|
-
if formatStr != requestedFormat.lowercased() {
|
|
696
|
-
Logger.debug("Unsupported format '\(requestedFormat)', falling back to 'aac'")
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
let targetSampleRate = outputFormat?["sampleRate"] as? Double ?? inputSampleRate
|
|
700
|
-
let targetChannels = outputFormat?["channels"] as? Int ?? inputChannels
|
|
701
|
-
let targetBitDepth = outputFormat?["bitDepth"] as? Int ?? 16
|
|
702
|
-
let bitrate = outputFormat?["bitrate"] as? Int ?? 128000
|
|
703
|
-
|
|
704
|
-
let fileExtension = formatStr == "wav" ? "wav" : (formatStr == "aac" ? "aac" : "opus")
|
|
705
|
-
let outputURL = FileManager.default.temporaryDirectory
|
|
706
|
-
.appendingPathComponent(outputFileName ?? UUID().uuidString)
|
|
707
|
-
.appendingPathExtension(fileExtension)
|
|
708
|
-
|
|
709
|
-
let decodingConfig = DecodingConfig.fromDictionary(decodingOptions ?? [:])
|
|
710
|
-
let needFormatChange = decodingConfig.targetSampleRate != nil || decodingConfig.targetChannels != nil || decodingConfig.targetBitDepth != nil
|
|
711
|
-
let isWavInput = audioFile.fileFormat.settings[AVFormatIDKey] as? UInt32 == kAudioFormatLinearPCM
|
|
712
|
-
|
|
713
|
-
do {
|
|
714
|
-
if isWavInput && formatStr == "wav" && !needFormatChange {
|
|
715
|
-
// Fast path: WAV-to-WAV with no format changes
|
|
716
|
-
let outputFile = try AVAudioFile(forWriting: outputURL, settings: inputFormat.settings)
|
|
717
|
-
var totalFrames: Int64 = 0
|
|
718
|
-
for range in keepRanges {
|
|
719
|
-
// Break down complex expression
|
|
720
|
-
let startTimeInSeconds = range[0] / 1000
|
|
721
|
-
let endTimeInSeconds = range[1] / 1000
|
|
722
|
-
let startFramePosition = startTimeInSeconds * inputSampleRate
|
|
723
|
-
let endFramePosition = endTimeInSeconds * inputSampleRate
|
|
724
|
-
totalFrames += Int64(endFramePosition - startFramePosition)
|
|
725
|
-
}
|
|
726
|
-
var cumulativeFrames: Int64 = 0
|
|
727
|
-
|
|
728
|
-
for range in keepRanges {
|
|
729
|
-
// Break down complex expressions
|
|
730
|
-
let startTimeInSeconds = range[0] / 1000
|
|
731
|
-
let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
|
|
732
|
-
|
|
733
|
-
let endTimeInSeconds = range[1] / 1000
|
|
734
|
-
let endFramePosition = endTimeInSeconds * inputSampleRate
|
|
735
|
-
let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
|
|
736
|
-
|
|
737
|
-
let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
|
|
738
|
-
audioFile.framePosition = startFrame
|
|
739
|
-
try audioFile.read(into: buffer, frameCount: frameCount)
|
|
740
|
-
try outputFile.write(from: buffer)
|
|
741
|
-
cumulativeFrames += Int64(frameCount)
|
|
742
|
-
let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
|
|
743
|
-
progressCallback?(progress, Int64(frameCount) * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame), totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
// When creating the output file
|
|
747
|
-
Logger.debug("Creating output file at: \(outputURL.path)")
|
|
748
|
-
|
|
749
|
-
// After processing is complete
|
|
750
|
-
Logger.debug("Trim operation completed")
|
|
751
|
-
Logger.debug("- Output file: \(outputURL.path)")
|
|
752
|
-
Logger.debug("- File exists: \(FileManager.default.fileExists(atPath: outputURL.path))")
|
|
753
|
-
Logger.debug("- File size: \(try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int64 ?? 0) bytes")
|
|
754
|
-
Logger.debug("- File extension: \(outputURL.pathExtension)")
|
|
755
|
-
|
|
756
|
-
return createTrimResult(from: outputURL, keepRanges: keepRanges, formatStr: formatStr, sampleRate: Int(inputSampleRate), channels: inputChannels, bitDepth: 16, bitrate: bitrate)
|
|
757
|
-
} else {
|
|
758
|
-
// Non-fast path: Decode and re-encode
|
|
759
|
-
let targetFormat = AVAudioFormat(
|
|
760
|
-
commonFormat: .pcmFormatFloat32,
|
|
761
|
-
sampleRate: targetSampleRate,
|
|
762
|
-
channels: AVAudioChannelCount(targetChannels),
|
|
763
|
-
interleaved: false
|
|
764
|
-
)!
|
|
765
|
-
|
|
766
|
-
var totalFrames: Int64 = 0
|
|
767
|
-
for range in keepRanges {
|
|
768
|
-
// Break down complex expression
|
|
769
|
-
let startTimeInSeconds = range[0] / 1000
|
|
770
|
-
let endTimeInSeconds = range[1] / 1000
|
|
771
|
-
let startFramePosition = startTimeInSeconds * inputSampleRate
|
|
772
|
-
let endFramePosition = endTimeInSeconds * inputSampleRate
|
|
773
|
-
totalFrames += Int64(endFramePosition - startFramePosition)
|
|
774
|
-
}
|
|
775
|
-
var cumulativeFrames: Int64 = 0
|
|
776
|
-
|
|
777
|
-
if formatStr == "wav" {
|
|
778
|
-
let outputFile = try AVAudioFile(forWriting: outputURL, settings: [
|
|
779
|
-
AVFormatIDKey: kAudioFormatLinearPCM,
|
|
780
|
-
AVSampleRateKey: targetSampleRate,
|
|
781
|
-
AVNumberOfChannelsKey: targetChannels,
|
|
782
|
-
AVLinearPCMBitDepthKey: targetBitDepth,
|
|
783
|
-
AVLinearPCMIsFloatKey: false,
|
|
784
|
-
AVLinearPCMIsBigEndianKey: false
|
|
785
|
-
])
|
|
786
|
-
|
|
787
|
-
for range in keepRanges {
|
|
788
|
-
// Break down complex expressions
|
|
789
|
-
let startTimeInSeconds = range[0] / 1000
|
|
790
|
-
let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
|
|
791
|
-
|
|
792
|
-
let endTimeInSeconds = range[1] / 1000
|
|
793
|
-
let endFramePosition = endTimeInSeconds * inputSampleRate
|
|
794
|
-
let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
|
|
795
|
-
|
|
796
|
-
let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
|
|
797
|
-
audioFile.framePosition = startFrame
|
|
798
|
-
try audioFile.read(into: buffer, frameCount: frameCount)
|
|
799
|
-
let converter = AVAudioConverter(from: inputFormat, to: targetFormat)!
|
|
800
|
-
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
|
|
801
|
-
try converter.convert(to: convertedBuffer, from: buffer)
|
|
802
|
-
try outputFile.write(from: convertedBuffer)
|
|
803
|
-
cumulativeFrames += Int64(frameCount)
|
|
804
|
-
let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
|
|
805
|
-
progressCallback?(progress, 0, totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
|
|
806
|
-
}
|
|
807
|
-
return createTrimResult(from: outputURL, keepRanges: keepRanges, formatStr: formatStr, sampleRate: Int(targetSampleRate), channels: targetChannels, bitDepth: targetBitDepth, bitrate: bitrate)
|
|
808
|
-
} else {
|
|
809
|
-
// AAC or Opus output
|
|
810
|
-
let outputSettings: [String: Any]
|
|
811
|
-
let fileType: AVFileType
|
|
812
|
-
|
|
813
|
-
if formatStr == "aac" {
|
|
814
|
-
// AAC settings
|
|
815
|
-
let outputExtension = "m4a"
|
|
816
|
-
let tempOutputURL = FileManager.default.temporaryDirectory
|
|
817
|
-
.appendingPathComponent(outputFileName ?? UUID().uuidString)
|
|
818
|
-
.appendingPathExtension(outputExtension)
|
|
819
|
-
|
|
820
|
-
// Validate and adjust sample rate for AAC
|
|
821
|
-
// AAC typically supports: 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 Hz
|
|
822
|
-
let supportedSampleRates = [8000.0, 11025.0, 12000.0, 16000.0, 22050.0, 24000.0, 32000.0, 44100.0, 48000.0]
|
|
823
|
-
|
|
824
|
-
// Default to 44100 if not specified
|
|
825
|
-
var sampleRate = outputFormat?["sampleRate"] as? Double ?? 44100.0
|
|
826
|
-
|
|
827
|
-
// Find closest supported sample rate
|
|
828
|
-
if !supportedSampleRates.contains(sampleRate) {
|
|
829
|
-
let closestRate = supportedSampleRates.min(by: { abs($0 - sampleRate) < abs($1 - sampleRate) }) ?? 44100.0
|
|
830
|
-
Logger.debug("Unsupported sample rate \(sampleRate)Hz for AAC, using closest supported rate: \(closestRate)Hz")
|
|
831
|
-
sampleRate = closestRate
|
|
832
|
-
}
|
|
833
|
-
|
|
834
|
-
// Validate channels (AAC typically supports 1 or 2 channels)
|
|
835
|
-
var channels = outputFormat?["channels"] as? Int ?? 2
|
|
836
|
-
if channels > 2 {
|
|
837
|
-
Logger.debug("AAC encoding doesn't support \(channels) channels, limiting to 2 channels")
|
|
838
|
-
channels = 2
|
|
839
|
-
} else if channels < 1 {
|
|
840
|
-
channels = 1
|
|
841
|
-
}
|
|
842
|
-
|
|
843
|
-
// Validate bitrate (AAC typically supports 8000-320000 bps)
|
|
844
|
-
var bitrate = outputFormat?["bitrate"] as? Int ?? 128000
|
|
845
|
-
if bitrate < 8000 {
|
|
846
|
-
Logger.debug("AAC bitrate too low, setting to minimum 8000 bps")
|
|
847
|
-
bitrate = 8000
|
|
848
|
-
} else if bitrate > 320000 {
|
|
849
|
-
Logger.debug("AAC bitrate too high, setting to maximum 320000 bps")
|
|
850
|
-
bitrate = 320000
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
// Set up proper audio settings for AAC
|
|
854
|
-
outputSettings = [
|
|
855
|
-
AVFormatIDKey: kAudioFormatMPEG4AAC,
|
|
856
|
-
AVSampleRateKey: sampleRate,
|
|
857
|
-
AVNumberOfChannelsKey: channels,
|
|
858
|
-
AVEncoderBitRateKey: bitrate,
|
|
859
|
-
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
|
|
860
|
-
]
|
|
861
|
-
fileType = .m4a
|
|
862
|
-
|
|
863
|
-
Logger.debug("""
|
|
864
|
-
Configuring AAC output:
|
|
865
|
-
- Container: m4a
|
|
866
|
-
- Format: AAC
|
|
867
|
-
- Sample rate: \(sampleRate)Hz
|
|
868
|
-
- Channels: \(channels)
|
|
869
|
-
- Bitrate: \(bitrate) bps
|
|
870
|
-
- Output path: \(tempOutputURL.path)
|
|
871
|
-
- File type: \(fileType)
|
|
872
|
-
""")
|
|
873
|
-
} else {
|
|
874
|
-
// Opus settings - use CAF container which can hold Opus
|
|
875
|
-
outputSettings = [
|
|
876
|
-
AVFormatIDKey: kAudioFormatOpus,
|
|
877
|
-
AVSampleRateKey: targetSampleRate,
|
|
878
|
-
AVNumberOfChannelsKey: targetChannels,
|
|
879
|
-
AVEncoderBitRateKey: bitrate
|
|
880
|
-
]
|
|
881
|
-
fileType = .caf // Core Audio Format can contain Opus
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
// Use proper file extension for the container format
|
|
885
|
-
let tempFileExtension = formatStr == "aac" ? "m4a" : "caf"
|
|
886
|
-
let tempOutputURL = FileManager.default.temporaryDirectory
|
|
887
|
-
.appendingPathComponent(outputFileName ?? UUID().uuidString)
|
|
888
|
-
.appendingPathExtension(tempFileExtension)
|
|
889
|
-
|
|
890
|
-
// Create the asset writer with the appropriate file type
|
|
891
|
-
let assetWriter = try AVAssetWriter(
|
|
892
|
-
outputURL: tempOutputURL,
|
|
893
|
-
fileType: fileType
|
|
894
|
-
)
|
|
895
|
-
|
|
896
|
-
// Configure the writer input with better settings
|
|
897
|
-
let writerInput = AVAssetWriterInput(mediaType: .audio, outputSettings: outputSettings)
|
|
898
|
-
writerInput.expectsMediaDataInRealTime = false
|
|
899
|
-
assetWriter.add(writerInput)
|
|
900
|
-
|
|
901
|
-
// Start the writing session
|
|
902
|
-
assetWriter.startWriting()
|
|
903
|
-
assetWriter.startSession(atSourceTime: CMTime.zero)
|
|
904
|
-
|
|
905
|
-
// Improved buffer handling
|
|
906
|
-
let bufferSize = 32768 // Use a larger buffer for better performance
|
|
907
|
-
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: AVAudioFrameCount(bufferSize))!
|
|
908
|
-
|
|
909
|
-
for range in keepRanges {
|
|
910
|
-
let startTimeInSeconds = range[0] / 1000
|
|
911
|
-
let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
|
|
912
|
-
|
|
913
|
-
let endTimeInSeconds = range[1] / 1000
|
|
914
|
-
let endFramePosition = endTimeInSeconds * inputSampleRate
|
|
915
|
-
let totalFramesToProcess = AVAudioFrameCount(endFramePosition - Double(startFrame))
|
|
916
|
-
|
|
917
|
-
// Process in chunks for better memory management
|
|
918
|
-
var framesProcessed: AVAudioFrameCount = 0
|
|
919
|
-
audioFile.framePosition = startFrame
|
|
920
|
-
|
|
921
|
-
while framesProcessed < totalFramesToProcess {
|
|
922
|
-
let framesToRead = min(AVAudioFrameCount(bufferSize), totalFramesToProcess - framesProcessed)
|
|
923
|
-
let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: framesToRead)!
|
|
924
|
-
|
|
925
|
-
do {
|
|
926
|
-
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
927
|
-
|
|
928
|
-
// Convert the buffer to the target format
|
|
929
|
-
let converter = AVAudioConverter(from: inputFormat, to: targetFormat)!
|
|
930
|
-
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: framesToRead)!
|
|
931
|
-
|
|
932
|
-
var error: NSError?
|
|
933
|
-
let conversionStatus = converter.convert(to: convertedBuffer, error: &error) { inNumPackets, outStatus in
|
|
934
|
-
outStatus.pointee = .haveData
|
|
935
|
-
return buffer
|
|
936
|
-
}
|
|
937
|
-
|
|
938
|
-
if let error = error {
|
|
939
|
-
Logger.debug("Conversion error: \(error)")
|
|
940
|
-
continue
|
|
941
|
-
}
|
|
942
|
-
|
|
943
|
-
// Create a sample buffer and append to writer
|
|
944
|
-
if let sampleBuffer = createSampleBuffer(from: convertedBuffer) {
|
|
945
|
-
// Wait until the writer is ready
|
|
946
|
-
while !writerInput.isReadyForMoreMediaData {
|
|
947
|
-
Thread.sleep(forTimeInterval: 0.01)
|
|
948
|
-
}
|
|
949
|
-
|
|
950
|
-
if !writerInput.append(sampleBuffer) {
|
|
951
|
-
Logger.debug("Failed to append sample buffer: \(assetWriter.error?.localizedDescription ?? "Unknown error")")
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
|
|
955
|
-
framesProcessed += framesToRead
|
|
956
|
-
cumulativeFrames += Int64(framesToRead)
|
|
957
|
-
let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
|
|
958
|
-
progressCallback?(progress, 0, totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
|
|
959
|
-
|
|
960
|
-
if framesProcessed % 10000 == 0 { // Log every 10000 frames to avoid excessive logging
|
|
961
|
-
Logger.debug("Processed \(framesProcessed)/\(totalFramesToProcess) frames")
|
|
962
|
-
}
|
|
963
|
-
|
|
964
|
-
} catch {
|
|
965
|
-
Logger.debug("Error reading audio: \(error)")
|
|
966
|
-
break
|
|
967
|
-
}
|
|
968
|
-
}
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
// Finish writing properly
|
|
972
|
-
writerInput.markAsFinished()
|
|
973
|
-
let finishSemaphore = DispatchSemaphore(value: 0)
|
|
974
|
-
assetWriter.finishWriting {
|
|
975
|
-
if let error = assetWriter.error {
|
|
976
|
-
Logger.debug("Error finishing writing: \(error)")
|
|
977
|
-
} else {
|
|
978
|
-
Logger.debug("Writing finished successfully")
|
|
979
|
-
|
|
980
|
-
// Verify the output file
|
|
981
|
-
let fileExists = FileManager.default.fileExists(atPath: tempOutputURL.path)
|
|
982
|
-
let fileSize = (try? FileManager.default.attributesOfItem(atPath: tempOutputURL.path)[.size] as? Int64) ?? 0
|
|
983
|
-
|
|
984
|
-
Logger.debug("""
|
|
985
|
-
Output file verification:
|
|
986
|
-
- Path: \(tempOutputURL.path)
|
|
987
|
-
- Exists: \(fileExists)
|
|
988
|
-
- Size: \(fileSize) bytes
|
|
989
|
-
- Extension: \(tempOutputURL.pathExtension)
|
|
990
|
-
""")
|
|
991
|
-
}
|
|
992
|
-
finishSemaphore.signal()
|
|
993
|
-
}
|
|
994
|
-
finishSemaphore.wait()
|
|
995
|
-
|
|
996
|
-
// Verify the file was created successfully
|
|
997
|
-
guard FileManager.default.fileExists(atPath: tempOutputURL.path) else {
|
|
998
|
-
reject("FILE_CREATION_FAILED", "Failed to create output file")
|
|
999
|
-
return nil
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
// Create compression info
|
|
1003
|
-
var compressionInfo: [String: Any] = [
|
|
1004
|
-
"format": formatStr,
|
|
1005
|
-
"bitrate": bitrate,
|
|
1006
|
-
"size": (try? FileManager.default.attributesOfItem(atPath: tempOutputURL.path)[.size] as? Int64) ?? 0
|
|
1007
|
-
]
|
|
1008
|
-
|
|
1009
|
-
// Add fallback information if applicable
|
|
1010
|
-
if formatStr != requestedFormat.lowercased() {
|
|
1011
|
-
compressionInfo["requestedFormat"] = requestedFormat
|
|
1012
|
-
compressionInfo["fallbackReason"] = "Unsupported format"
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
// Use the correct MIME type
|
|
1016
|
-
let mimeType = formatStr == "aac" ? "audio/mp4" : "audio/opus"
|
|
1017
|
-
|
|
1018
|
-
return TrimResult(
|
|
1019
|
-
uri: tempOutputURL.absoluteString,
|
|
1020
|
-
filename: tempOutputURL.lastPathComponent,
|
|
1021
|
-
durationMs: keepRanges.map { $0[1] - $0[0] }.reduce(0, +),
|
|
1022
|
-
size: (try? FileManager.default.attributesOfItem(atPath: tempOutputURL.path)[.size] as? Int64) ?? 0,
|
|
1023
|
-
sampleRate: Int(targetSampleRate),
|
|
1024
|
-
channels: targetChannels,
|
|
1025
|
-
bitDepth: 16,
|
|
1026
|
-
mimeType: mimeType,
|
|
1027
|
-
requestedFormat: formatStr,
|
|
1028
|
-
actualFormat: tempFileExtension,
|
|
1029
|
-
compression: compressionInfo
|
|
1030
|
-
)
|
|
1031
|
-
}
|
|
1032
|
-
}
|
|
1033
|
-
} catch {
|
|
1034
|
-
reject("TRIM_ERROR", "Failed to trim audio: \(error.localizedDescription)")
|
|
1035
|
-
return nil
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
private func computeKeepRanges(mode: String, startTimeMs: Double?, endTimeMs: Double?, ranges: [[String: Double]]?, totalDurationMs: Double) -> [[Double]] {
|
|
1040
|
-
switch mode {
|
|
1041
|
-
case "single":
|
|
1042
|
-
guard let start = startTimeMs, let end = endTimeMs else { return [] }
|
|
1043
|
-
return [[start, end]]
|
|
1044
|
-
case "keep":
|
|
1045
|
-
return ranges?.map { [$0["startTimeMs"] ?? 0, $0["endTimeMs"] ?? totalDurationMs] } ?? []
|
|
1046
|
-
case "remove":
|
|
1047
|
-
let removeRanges = ranges?.map { [$0["startTimeMs"] ?? 0, $0["endTimeMs"] ?? totalDurationMs] }.sorted { $0[0] < $1[0] } ?? []
|
|
1048
|
-
var keepRanges: [[Double]] = []
|
|
1049
|
-
var lastEnd = 0.0
|
|
1050
|
-
for range in removeRanges {
|
|
1051
|
-
if range[0] > lastEnd {
|
|
1052
|
-
keepRanges.append([lastEnd, range[0]])
|
|
1053
|
-
}
|
|
1054
|
-
lastEnd = max(lastEnd, range[1])
|
|
1055
|
-
}
|
|
1056
|
-
if lastEnd < totalDurationMs {
|
|
1057
|
-
keepRanges.append([lastEnd, totalDurationMs])
|
|
1058
|
-
}
|
|
1059
|
-
return keepRanges
|
|
1060
|
-
default:
|
|
1061
|
-
return []
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
|
-
|
|
1065
|
-
private func createTrimResult(from url: URL, keepRanges: [[Double]], formatStr: String, sampleRate: Int, channels: Int, bitDepth: Int, bitrate: Int, compression: [String: Any]? = nil) -> TrimResult {
|
|
1066
|
-
let durationMs = keepRanges.map { $0[1] - $0[0] }.reduce(0, +)
|
|
1067
|
-
let size = (try? FileManager.default.attributesOfItem(atPath: url.path)[.size] as? Int64 ?? 0) ?? 0
|
|
1068
|
-
let fileExtension = formatStr == "wav" ? "wav" : (formatStr == "aac" ? "aac" : "opus")
|
|
1069
|
-
return TrimResult(
|
|
1070
|
-
uri: url.absoluteString,
|
|
1071
|
-
filename: url.lastPathComponent,
|
|
1072
|
-
durationMs: durationMs,
|
|
1073
|
-
size: size,
|
|
1074
|
-
sampleRate: sampleRate,
|
|
1075
|
-
channels: channels,
|
|
1076
|
-
bitDepth: bitDepth,
|
|
1077
|
-
mimeType: "audio/\(fileExtension)",
|
|
1078
|
-
requestedFormat: formatStr,
|
|
1079
|
-
actualFormat: fileExtension,
|
|
1080
|
-
compression: compression
|
|
1081
|
-
)
|
|
1082
|
-
}
|
|
1083
|
-
|
|
1084
|
-
private func createSampleBuffer(from buffer: AVAudioPCMBuffer) -> CMSampleBuffer? {
|
|
1085
|
-
var formatDesc: CMAudioFormatDescription?
|
|
1086
|
-
CMAudioFormatDescriptionCreate(
|
|
1087
|
-
allocator: kCFAllocatorDefault,
|
|
1088
|
-
asbd: buffer.format.streamDescription,
|
|
1089
|
-
layoutSize: 0,
|
|
1090
|
-
layout: nil,
|
|
1091
|
-
magicCookieSize: 0,
|
|
1092
|
-
magicCookie: nil,
|
|
1093
|
-
extensions: nil,
|
|
1094
|
-
formatDescriptionOut: &formatDesc
|
|
1095
|
-
)
|
|
1096
|
-
guard let format = formatDesc else { return nil }
|
|
1097
|
-
|
|
1098
|
-
var sampleBuffer: CMSampleBuffer?
|
|
1099
|
-
var timingInfo = CMSampleTimingInfo(
|
|
1100
|
-
duration: CMTime(value: 1, timescale: CMTimeScale(buffer.format.sampleRate)),
|
|
1101
|
-
presentationTimeStamp: .zero,
|
|
1102
|
-
decodeTimeStamp: .invalid
|
|
1103
|
-
)
|
|
1104
|
-
|
|
1105
|
-
CMSampleBufferCreate(
|
|
1106
|
-
allocator: kCFAllocatorDefault,
|
|
1107
|
-
dataBuffer: nil,
|
|
1108
|
-
dataReady: false,
|
|
1109
|
-
makeDataReadyCallback: nil,
|
|
1110
|
-
refcon: nil,
|
|
1111
|
-
formatDescription: format,
|
|
1112
|
-
sampleCount: CMItemCount(buffer.frameLength),
|
|
1113
|
-
sampleTimingEntryCount: 1,
|
|
1114
|
-
sampleTimingArray: &timingInfo,
|
|
1115
|
-
sampleSizeEntryCount: 0,
|
|
1116
|
-
sampleSizeArray: nil,
|
|
1117
|
-
sampleBufferOut: &sampleBuffer
|
|
1118
|
-
)
|
|
1119
|
-
guard let sampleBuf = sampleBuffer else { return nil }
|
|
1120
|
-
|
|
1121
|
-
var dataBuffer: CMBlockBuffer?
|
|
1122
|
-
CMBlockBufferCreateWithMemoryBlock(
|
|
1123
|
-
allocator: kCFAllocatorDefault,
|
|
1124
|
-
memoryBlock: UnsafeMutableRawPointer(buffer.floatChannelData![0]),
|
|
1125
|
-
blockLength: Int(buffer.frameLength * buffer.format.streamDescription.pointee.mBytesPerFrame),
|
|
1126
|
-
blockAllocator: kCFAllocatorNull,
|
|
1127
|
-
customBlockSource: nil,
|
|
1128
|
-
offsetToData: 0,
|
|
1129
|
-
dataLength: Int(buffer.frameLength * buffer.format.streamDescription.pointee.mBytesPerFrame),
|
|
1130
|
-
flags: 0,
|
|
1131
|
-
blockBufferOut: &dataBuffer
|
|
1132
|
-
)
|
|
1133
|
-
guard let blockBuf = dataBuffer else { return nil }
|
|
1134
|
-
|
|
1135
|
-
CMSampleBufferSetDataBuffer(sampleBuf, newValue: blockBuf)
|
|
1136
|
-
|
|
1137
|
-
return sampleBuf
|
|
1138
|
-
}
|
|
1139
|
-
|
|
1140
|
-
/// Extracts a preview of the audio data with consistent time range support
|
|
1141
|
-
/// - Parameters:
|
|
1142
|
-
/// - numberOfPoints: The number of points to extract
|
|
1143
|
-
/// - startTimeMs: Optional start time in milliseconds
|
|
1144
|
-
/// - endTimeMs: Optional end time in milliseconds
|
|
1145
|
-
/// - featureOptions: The features to extract
|
|
1146
|
-
/// - Returns: An `AudioAnalysisData` object containing the extracted features
|
|
1147
|
-
public func extractPreview(
|
|
1148
|
-
numberOfPoints: Int,
|
|
1149
|
-
startTimeMs: Double? = nil,
|
|
1150
|
-
endTimeMs: Double? = nil,
|
|
1151
|
-
featureOptions: [String: Bool]
|
|
1152
|
-
) -> AudioAnalysisData? {
|
|
1153
|
-
guard let audioFile = audioFile else {
|
|
1154
|
-
reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
|
|
1155
|
-
return nil
|
|
1156
|
-
}
|
|
1157
|
-
|
|
1158
|
-
let sampleRate = Float(audioFile.fileFormat.sampleRate)
|
|
1159
|
-
let totalDurationMs = Double(audioFile.length) / Double(sampleRate) * 1000
|
|
1160
|
-
|
|
1161
|
-
// Calculate effective time range
|
|
1162
|
-
let effectiveStartMs = startTimeMs ?? 0.0
|
|
1163
|
-
let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
|
|
1164
|
-
let durationMs = effectiveEndMs - effectiveStartMs // This is the actual duration we want to use
|
|
1165
|
-
|
|
1166
|
-
// Convert time to frames with proper offset
|
|
1167
|
-
let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
|
|
1168
|
-
let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
|
|
1169
|
-
let samplesInRange = Int(endFrame - startFrame)
|
|
1170
|
-
|
|
1171
|
-
guard samplesInRange > 0 else {
|
|
1172
|
-
reject("INVALID_RANGE", "Invalid sample range: contains no samples")
|
|
1173
|
-
return nil
|
|
1174
|
-
}
|
|
1175
|
-
|
|
1176
|
-
// Calculate exact samples per point to get the requested number of points
|
|
1177
|
-
let samplesPerPoint = samplesInRange / numberOfPoints
|
|
1178
|
-
var dataPoints = [DataPoint]()
|
|
1179
|
-
dataPoints.reserveCapacity(numberOfPoints)
|
|
1180
|
-
|
|
1181
|
-
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
1182
|
-
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
1183
|
-
|
|
1184
|
-
let bytesPerSample = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16 / 8
|
|
1185
|
-
|
|
1186
|
-
for i in 0..<numberOfPoints {
|
|
1187
|
-
let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
|
|
1188
|
-
let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
|
|
1189
|
-
let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
|
|
1190
|
-
|
|
1191
|
-
// Calculate byte positions
|
|
1192
|
-
let startPosition = Int(pointStartFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
|
|
1193
|
-
let endPosition = Int(pointEndFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
|
|
1194
|
-
let segmentStartTime = Float(pointStartFrame) / sampleRate
|
|
1195
|
-
let segmentEndTime = Float(pointEndFrame) / sampleRate
|
|
1196
|
-
|
|
1197
|
-
do {
|
|
1198
|
-
audioFile.framePosition = pointStartFrame
|
|
1199
|
-
let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
|
|
1200
|
-
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
1201
|
-
|
|
1202
|
-
guard let floatData = buffer.floatChannelData else { continue }
|
|
1203
|
-
|
|
1204
|
-
var sumSquares: Float = 0
|
|
1205
|
-
var zeroCrossings = 0
|
|
1206
|
-
var prevValue: Float = 0
|
|
1207
|
-
var localMinAmplitude: Float = .greatestFiniteMagnitude
|
|
1208
|
-
var localMaxAmplitude: Float = -.greatestFiniteMagnitude
|
|
1209
|
-
|
|
1210
|
-
// Process samples for this point
|
|
1211
|
-
for frame in 0..<Int(framesToRead) {
|
|
1212
|
-
let value = floatData[0][frame]
|
|
1213
|
-
sumSquares += value * value
|
|
1214
|
-
if frame > 0 && value * prevValue < 0 {
|
|
1215
|
-
zeroCrossings += 1
|
|
1216
|
-
}
|
|
1217
|
-
prevValue = value
|
|
1218
|
-
|
|
1219
|
-
let absValue = abs(value)
|
|
1220
|
-
localMinAmplitude = min(localMinAmplitude, absValue)
|
|
1221
|
-
localMaxAmplitude = max(localMaxAmplitude, absValue)
|
|
1222
|
-
}
|
|
1223
|
-
|
|
1224
|
-
let features = computeFeatures(segmentData: Array(UnsafeBufferPointer(start: floatData[0], count: Int(framesToRead))),
|
|
1225
|
-
sampleRate: sampleRate,
|
|
1226
|
-
sumSquares: sumSquares,
|
|
1227
|
-
zeroCrossings: zeroCrossings,
|
|
1228
|
-
segmentLength: Int(framesToRead),
|
|
1229
|
-
featureOptions: featureOptions)
|
|
1230
|
-
|
|
1231
|
-
let rms = features.rms
|
|
1232
|
-
let silent = rms < 0.01
|
|
1233
|
-
let dB = Float(20 * log10(Double(rms)))
|
|
1234
|
-
|
|
1235
|
-
let dataPoint = DataPoint(
|
|
1236
|
-
id: Int(uniqueIdCounter),
|
|
1237
|
-
amplitude: localMaxAmplitude,
|
|
1238
|
-
rms: rms,
|
|
1239
|
-
dB: dB,
|
|
1240
|
-
silent: silent,
|
|
1241
|
-
features: features,
|
|
1242
|
-
speech: SpeechFeatures(isActive: !silent),
|
|
1243
|
-
startTime: segmentStartTime,
|
|
1244
|
-
endTime: segmentEndTime,
|
|
1245
|
-
startPosition: startPosition,
|
|
1246
|
-
endPosition: endPosition,
|
|
1247
|
-
samples: Int(framesToRead)
|
|
1248
|
-
)
|
|
1249
|
-
dataPoints.append(dataPoint)
|
|
1250
|
-
uniqueIdCounter += 1
|
|
1251
|
-
|
|
1252
|
-
minAmplitude = min(minAmplitude, localMinAmplitude)
|
|
1253
|
-
maxAmplitude = max(maxAmplitude, localMaxAmplitude)
|
|
1254
|
-
} catch {
|
|
1255
|
-
reject("AUDIO_READ_ERROR", "Error reading audio data: \(error.localizedDescription)")
|
|
1256
|
-
return nil
|
|
1257
|
-
}
|
|
1258
|
-
}
|
|
1259
|
-
|
|
1260
|
-
let startTime = CACurrentMediaTime() // Start timing
|
|
1261
|
-
|
|
1262
|
-
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
1263
|
-
let numberOfChannels = Int(audioFile.processingFormat.channelCount)
|
|
1264
|
-
|
|
1265
|
-
NSLog("""
|
|
1266
|
-
[AudioProcessor] Starting preview extraction:
|
|
1267
|
-
- numberOfPoints: \(numberOfPoints)
|
|
1268
|
-
- startTimeMs: \(String(describing: startTimeMs))
|
|
1269
|
-
- endTimeMs: \(String(describing: endTimeMs))
|
|
1270
|
-
- durationMs: \(durationMs)
|
|
1271
|
-
- sampleRate: \(sampleRate)
|
|
1272
|
-
- bitDepth: \(bitDepth)
|
|
1273
|
-
- channels: \(numberOfChannels)
|
|
1274
|
-
- samplesInRange: \(samplesInRange)
|
|
1275
|
-
- samplesPerPoint: \(samplesPerPoint)
|
|
1276
|
-
""")
|
|
1277
|
-
|
|
1278
|
-
let endTime = CACurrentMediaTime()
|
|
1279
|
-
let extractionTimeMs = Float((endTime - startTime) * 1000)
|
|
1280
|
-
|
|
1281
|
-
NSLog("""
|
|
1282
|
-
[AudioProcessor] Preview extraction completed:
|
|
1283
|
-
- dataPoints generated: \(dataPoints.count)
|
|
1284
|
-
- extractionTimeMs: \(String(format: "%.2f", extractionTimeMs))ms
|
|
1285
|
-
- amplitudeRange: (min: \(String(format: "%.6f", minAmplitude)), max: \(String(format: "%.6f", maxAmplitude)))
|
|
1286
|
-
""")
|
|
1287
|
-
|
|
1288
|
-
return AudioAnalysisData(
|
|
1289
|
-
segmentDurationMs: 100, // Default 100ms
|
|
1290
|
-
durationMs: Int(durationMs), // Use actual duration of trimmed section
|
|
1291
|
-
bitDepth: bitDepth,
|
|
1292
|
-
numberOfChannels: numberOfChannels,
|
|
1293
|
-
sampleRate: Int(sampleRate),
|
|
1294
|
-
samples: samplesInRange,
|
|
1295
|
-
dataPoints: dataPoints,
|
|
1296
|
-
amplitudeRange: AudioAnalysisData.AmplitudeRange(
|
|
1297
|
-
min: minAmplitude,
|
|
1298
|
-
max: maxAmplitude
|
|
1299
|
-
),
|
|
1300
|
-
rmsRange: AudioAnalysisData.AmplitudeRange(
|
|
1301
|
-
min: 0,
|
|
1302
|
-
max: 1
|
|
1303
|
-
),
|
|
1304
|
-
speechAnalysis: nil,
|
|
1305
|
-
extractionTimeMs: extractionTimeMs
|
|
1306
|
-
)
|
|
1307
|
-
}
|
|
1308
|
-
|
|
1309
|
-
// Add this helper function to the AudioProcessor class
|
|
1310
|
-
private func getDocumentsDirectory() -> URL {
|
|
1311
|
-
return FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
|
|
1312
|
-
}
|
|
1313
|
-
}
|