@siteed/expo-audio-stream 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/README.md +40 -222
  2. package/build/index.d.ts +11 -15
  3. package/build/index.js +44 -14
  4. package/package.json +49 -110
  5. package/src/index.ts +18 -32
  6. package/CHANGELOG.md +0 -206
  7. package/android/build.gradle +0 -105
  8. package/android/src/main/AndroidManifest.xml +0 -27
  9. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -166
  10. package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
  11. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -131
  12. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -103
  13. package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -435
  14. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -2235
  15. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -1437
  16. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -152
  17. package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +0 -1099
  18. package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -21
  19. package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
  20. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -739
  21. package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
  22. package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
  23. package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -70
  24. package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -59
  25. package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
  26. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -205
  27. package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
  28. package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
  29. package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
  30. package/android/src/main/res/drawable/ic_microphone.xml +0 -13
  31. package/android/src/main/res/drawable/ic_pause.xml +0 -10
  32. package/android/src/main/res/drawable/ic_play.xml +0 -10
  33. package/android/src/main/res/drawable/ic_stop.xml +0 -10
  34. package/android/src/main/res/layout/notification_recording.xml +0 -37
  35. package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +0 -56
  36. package/app.plugin.js +0 -1
  37. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -179
  38. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  39. package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
  40. package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  41. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -68
  42. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  43. package/build/AudioAnalysis/extractAudioAnalysis.js +0 -203
  44. package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  45. package/build/AudioAnalysis/extractAudioData.d.ts +0 -3
  46. package/build/AudioAnalysis/extractAudioData.d.ts.map +0 -1
  47. package/build/AudioAnalysis/extractAudioData.js +0 -5
  48. package/build/AudioAnalysis/extractAudioData.js.map +0 -1
  49. package/build/AudioAnalysis/extractMelSpectrogram.d.ts +0 -14
  50. package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +0 -1
  51. package/build/AudioAnalysis/extractMelSpectrogram.js +0 -85
  52. package/build/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
  53. package/build/AudioAnalysis/extractPreview.d.ts +0 -11
  54. package/build/AudioAnalysis/extractPreview.d.ts.map +0 -1
  55. package/build/AudioAnalysis/extractPreview.js +0 -25
  56. package/build/AudioAnalysis/extractPreview.js.map +0 -1
  57. package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
  58. package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  59. package/build/AudioAnalysis/extractWaveform.js +0 -11
  60. package/build/AudioAnalysis/extractWaveform.js.map +0 -1
  61. package/build/AudioRecorder.provider.d.ts +0 -11
  62. package/build/AudioRecorder.provider.d.ts.map +0 -1
  63. package/build/AudioRecorder.provider.js +0 -37
  64. package/build/AudioRecorder.provider.js.map +0 -1
  65. package/build/ExpoAudioStream.native.d.ts +0 -3
  66. package/build/ExpoAudioStream.native.d.ts.map +0 -1
  67. package/build/ExpoAudioStream.native.js +0 -6
  68. package/build/ExpoAudioStream.native.js.map +0 -1
  69. package/build/ExpoAudioStream.types.d.ts +0 -532
  70. package/build/ExpoAudioStream.types.d.ts.map +0 -1
  71. package/build/ExpoAudioStream.types.js +0 -2
  72. package/build/ExpoAudioStream.types.js.map +0 -1
  73. package/build/ExpoAudioStream.web.d.ts +0 -59
  74. package/build/ExpoAudioStream.web.d.ts.map +0 -1
  75. package/build/ExpoAudioStream.web.js +0 -285
  76. package/build/ExpoAudioStream.web.js.map +0 -1
  77. package/build/ExpoAudioStreamModule.d.ts +0 -3
  78. package/build/ExpoAudioStreamModule.d.ts.map +0 -1
  79. package/build/ExpoAudioStreamModule.js +0 -693
  80. package/build/ExpoAudioStreamModule.js.map +0 -1
  81. package/build/WebRecorder.web.d.ts +0 -119
  82. package/build/WebRecorder.web.d.ts.map +0 -1
  83. package/build/WebRecorder.web.js +0 -436
  84. package/build/WebRecorder.web.js.map +0 -1
  85. package/build/constants.d.ts +0 -11
  86. package/build/constants.d.ts.map +0 -1
  87. package/build/constants.js +0 -14
  88. package/build/constants.js.map +0 -1
  89. package/build/events.d.ts +0 -26
  90. package/build/events.d.ts.map +0 -1
  91. package/build/events.js +0 -21
  92. package/build/events.js.map +0 -1
  93. package/build/index.d.ts.map +0 -1
  94. package/build/index.js.map +0 -1
  95. package/build/trimAudio.d.ts +0 -25
  96. package/build/trimAudio.d.ts.map +0 -1
  97. package/build/trimAudio.js +0 -67
  98. package/build/trimAudio.js.map +0 -1
  99. package/build/useAudioRecorder.d.ts +0 -21
  100. package/build/useAudioRecorder.d.ts.map +0 -1
  101. package/build/useAudioRecorder.js +0 -427
  102. package/build/useAudioRecorder.js.map +0 -1
  103. package/build/utils/BlobFix.d.ts +0 -9
  104. package/build/utils/BlobFix.d.ts.map +0 -1
  105. package/build/utils/BlobFix.js +0 -498
  106. package/build/utils/BlobFix.js.map +0 -1
  107. package/build/utils/audioProcessing.d.ts +0 -24
  108. package/build/utils/audioProcessing.d.ts.map +0 -1
  109. package/build/utils/audioProcessing.js +0 -133
  110. package/build/utils/audioProcessing.js.map +0 -1
  111. package/build/utils/concatenateBuffers.d.ts +0 -8
  112. package/build/utils/concatenateBuffers.d.ts.map +0 -1
  113. package/build/utils/concatenateBuffers.js +0 -21
  114. package/build/utils/concatenateBuffers.js.map +0 -1
  115. package/build/utils/convertPCMToFloat32.d.ts +0 -13
  116. package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
  117. package/build/utils/convertPCMToFloat32.js +0 -120
  118. package/build/utils/convertPCMToFloat32.js.map +0 -1
  119. package/build/utils/encodingToBitDepth.d.ts +0 -5
  120. package/build/utils/encodingToBitDepth.d.ts.map +0 -1
  121. package/build/utils/encodingToBitDepth.js +0 -13
  122. package/build/utils/encodingToBitDepth.js.map +0 -1
  123. package/build/utils/getWavFileInfo.d.ts +0 -26
  124. package/build/utils/getWavFileInfo.d.ts.map +0 -1
  125. package/build/utils/getWavFileInfo.js +0 -92
  126. package/build/utils/getWavFileInfo.js.map +0 -1
  127. package/build/utils/writeWavHeader.d.ts +0 -49
  128. package/build/utils/writeWavHeader.d.ts.map +0 -1
  129. package/build/utils/writeWavHeader.js +0 -91
  130. package/build/utils/writeWavHeader.js.map +0 -1
  131. package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  132. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  133. package/build/workers/InlineFeaturesExtractor.web.js +0 -828
  134. package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
  135. package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
  136. package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  137. package/build/workers/inlineAudioWebWorker.web.js +0 -157
  138. package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
  139. package/expo-module.config.json +0 -9
  140. package/ios/AudioAnalysisData.swift +0 -74
  141. package/ios/AudioNotificationManager.swift +0 -135
  142. package/ios/AudioProcessingHelpers.swift +0 -743
  143. package/ios/AudioProcessor.swift +0 -1313
  144. package/ios/AudioStreamError.swift +0 -7
  145. package/ios/AudioStreamManager.swift +0 -1708
  146. package/ios/AudioStreamManagerDelegate.swift +0 -16
  147. package/ios/DataPoint.swift +0 -54
  148. package/ios/DecodingConfig.swift +0 -47
  149. package/ios/ExpoAudioStream.podspec +0 -27
  150. package/ios/ExpoAudioStreamModule.swift +0 -805
  151. package/ios/FFT.swift +0 -62
  152. package/ios/Features.swift +0 -95
  153. package/ios/Logger.swift +0 -7
  154. package/ios/NotificationExtension.swift +0 -15
  155. package/ios/RecordingResult.swift +0 -22
  156. package/ios/RecordingSettings.swift +0 -265
  157. package/ios/WaveformExtractor.swift +0 -105
  158. package/plugin/build/index.d.ts +0 -21
  159. package/plugin/build/index.js +0 -191
  160. package/plugin/src/index.ts +0 -278
  161. package/plugin/tsconfig.json +0 -10
  162. package/plugin/tsconfig.tsbuildinfo +0 -1
  163. package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -202
  164. package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -333
  165. package/src/AudioAnalysis/extractAudioData.ts +0 -6
  166. package/src/AudioAnalysis/extractMelSpectrogram.ts +0 -144
  167. package/src/AudioAnalysis/extractPreview.ts +0 -34
  168. package/src/AudioAnalysis/extractWaveform.ts +0 -22
  169. package/src/AudioRecorder.provider.tsx +0 -54
  170. package/src/ExpoAudioStream.native.ts +0 -6
  171. package/src/ExpoAudioStream.types.ts +0 -641
  172. package/src/ExpoAudioStream.web.ts +0 -359
  173. package/src/ExpoAudioStreamModule.ts +0 -967
  174. package/src/WebRecorder.web.ts +0 -580
  175. package/src/constants.ts +0 -18
  176. package/src/events.ts +0 -60
  177. package/src/trimAudio.ts +0 -90
  178. package/src/useAudioRecorder.tsx +0 -620
  179. package/src/utils/BlobFix.ts +0 -559
  180. package/src/utils/audioProcessing.ts +0 -205
  181. package/src/utils/concatenateBuffers.ts +0 -24
  182. package/src/utils/convertPCMToFloat32.ts +0 -170
  183. package/src/utils/encodingToBitDepth.ts +0 -18
  184. package/src/utils/getWavFileInfo.ts +0 -132
  185. package/src/utils/writeWavHeader.ts +0 -114
  186. package/src/workers/InlineFeaturesExtractor.web.tsx +0 -827
  187. package/src/workers/inlineAudioWebWorker.web.tsx +0 -156
@@ -1,1313 +0,0 @@
1
- // packages/expo-audio-stream/ios/AudioProcessor.swift
2
-
3
- import Foundation
4
- import Accelerate
5
- import AVFoundation
6
- import QuartzCore
7
-
8
- public struct TrimResult {
9
- let uri: String
10
- let filename: String
11
- let durationMs: Double
12
- let size: Int64
13
- let sampleRate: Int
14
- let channels: Int
15
- let bitDepth: Int
16
- let mimeType: String
17
- let requestedFormat: String
18
- let actualFormat: String
19
- let compression: [String: Any]?
20
-
21
- init(
22
- uri: String,
23
- filename: String,
24
- durationMs: Double,
25
- size: Int64,
26
- sampleRate: Int,
27
- channels: Int,
28
- bitDepth: Int,
29
- mimeType: String,
30
- requestedFormat: String,
31
- actualFormat: String,
32
- compression: [String: Any]?
33
- ) {
34
- self.uri = uri
35
- self.filename = filename
36
- self.durationMs = durationMs
37
- self.size = size
38
- self.sampleRate = sampleRate
39
- self.channels = channels
40
- self.bitDepth = bitDepth
41
- self.mimeType = mimeType
42
- self.requestedFormat = requestedFormat
43
- self.actualFormat = actualFormat
44
- self.compression = compression
45
- }
46
-
47
- func toDictionary() -> [String: Any] {
48
- var dict: [String: Any] = [
49
- "uri": uri,
50
- "filename": filename,
51
- "durationMs": durationMs,
52
- "size": size,
53
- "sampleRate": sampleRate,
54
- "channels": channels,
55
- "bitDepth": bitDepth,
56
- "mimeType": mimeType,
57
- "requestedFormat": requestedFormat,
58
- "actualFormat": actualFormat
59
- ]
60
- if let compression = compression {
61
- dict["compression"] = compression
62
- }
63
- return dict
64
- }
65
- }
66
-
67
- public class AudioProcessor {
68
- public private(set) var audioFile: AVAudioFile?
69
- private var result: (Any) -> Void
70
- private var reject: (String, String) -> Void
71
- private var waveformData = Array<Float>()
72
- private var progress: Float = 0.0
73
- private var channelCount: Int = 1
74
- private var currentProgress: Float = 0.0
75
- private let extractionQueue = DispatchQueue(label: "AudioProcessor", attributes: .concurrent)
76
- private var _abortExtraction: Bool = false
77
-
78
- // Add a counter for unique IDs
79
- private var uniqueIdCounter = 0
80
-
81
- public var abortExtraction: Bool {
82
- get { _abortExtraction }
83
- set { _abortExtraction = newValue }
84
- }
85
-
86
- // Initializer for file-based processing
87
- public init(url: URL, resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) throws {
88
- self.audioFile = try AVAudioFile(forReading: url)
89
- self.result = resolve
90
- self.reject = reject
91
- }
92
-
93
- // Initializer for buffer-based processing
94
- public init(resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) {
95
- self.result = resolve
96
- self.reject = reject
97
- }
98
-
99
-
100
- deinit {
101
- audioFile = nil
102
- }
103
-
104
- /// Error types for AudioProcessor
105
- public enum AudioProcessorError: Error {
106
- case fileInitializationFailed(String)
107
- case bufferCreationFailed
108
- case audioReadError(String)
109
- }
110
-
111
-
112
- /// Extracts and processes audio data from the audio file.
113
- /// - Parameters:
114
- /// - numberOfSamples: The number of samples to extract (for waveform).
115
- /// - offset: The offset to start reading from (in samples).
116
- /// - length: The length of the audio to read (in samples).
117
- /// - segmentDurationMs: The duration of each segment in milliseconds.
118
- /// - featureOptions: The features to extract.
119
- /// - bitDepth: The bit depth of the audio data.
120
- /// - numberOfChannels: The number of channels in the audio data.
121
- /// - position: The position to start reading from (in bytes).
122
- /// - byteLength: The length of the audio to read (in bytes).
123
- /// - Returns: An `AudioAnalysisData` object containing the extracted features.
124
- public func processAudioData(
125
- numberOfSamples: Int?,
126
- offset: Int? = 0,
127
- length: UInt? = nil,
128
- segmentDurationMs: Int = 100, // Default 100ms
129
- featureOptions: [String: Bool],
130
- bitDepth: Int,
131
- numberOfChannels: Int,
132
- position: Int? = nil,
133
- byteLength: Int? = nil
134
- ) -> AudioAnalysisData? {
135
- guard let audioFile = audioFile else {
136
- reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
137
- return nil
138
- }
139
-
140
- let totalFrameCount = AVAudioFrameCount(audioFile.length)
141
- var framesPerBuffer: AVAudioFrameCount
142
- let actualPointsPerSecond: Int
143
-
144
- NSLog("""
145
- [AudioProcessor] Starting audio processing:
146
- - totalFrameCount: \(totalFrameCount)
147
- - bitDepth: \(bitDepth)
148
- - numberOfChannels: \(numberOfChannels)
149
- - position: \(position ?? -1)
150
- - byteLength: \(byteLength ?? -1)
151
- - offset: \(offset ?? -1)
152
- - length: \(length ?? 0)
153
- """)
154
-
155
- // Use position/byteLength if provided, otherwise fall back to offset/length
156
- let effectiveOffset: Int64 = if let position = position {
157
- Int64(position / (bitDepth / 8) / numberOfChannels)
158
- } else {
159
- Int64(offset ?? 0)
160
- }
161
-
162
- let effectiveLength: Int64 = if let byteLength = byteLength {
163
- Int64(byteLength / (bitDepth / 8) / numberOfChannels)
164
- } else if let length = length {
165
- Int64(length)
166
- } else {
167
- Int64(totalFrameCount) - effectiveOffset
168
- }
169
-
170
- NSLog("""
171
- [AudioProcessor] Calculated frame positions:
172
- - effectiveOffset: \(effectiveOffset)
173
- - effectiveLength: \(effectiveLength)
174
- - expectedEndFrame: \(effectiveOffset + effectiveLength)
175
- - totalFrameCount: \(totalFrameCount)
176
- """)
177
-
178
- // Validate frame boundaries
179
- if effectiveOffset < 0 || effectiveOffset >= Int64(totalFrameCount) {
180
- NSLog("[AudioProcessor] ERROR: Invalid offset value")
181
- reject("INVALID_OFFSET", "Offset value (\(effectiveOffset)) is outside valid range [0, \(totalFrameCount)]")
182
- return nil
183
- }
184
-
185
- if effectiveLength <= 0 {
186
- NSLog("[AudioProcessor] ERROR: Invalid length value")
187
- reject("INVALID_LENGTH", "Length value (\(effectiveLength)) must be positive")
188
- return nil
189
- }
190
-
191
- if effectiveOffset + effectiveLength > Int64(totalFrameCount) {
192
- NSLog("[AudioProcessor] ERROR: Requested range exceeds file length")
193
- reject("INVALID_RANGE", "Requested range [\(effectiveOffset), \(effectiveOffset + effectiveLength)] exceeds file length \(totalFrameCount)")
194
- return nil
195
- }
196
-
197
- var startFrame: AVAudioFramePosition = effectiveOffset
198
- let endFrame: AVAudioFramePosition = effectiveOffset + effectiveLength
199
-
200
- // Calculate frames per segment based on segment duration
201
- let framesPerSegment = AVAudioFrameCount(Float(audioFile.fileFormat.sampleRate) * Float(segmentDurationMs) / 1000.0)
202
-
203
- if let numberOfSamples = numberOfSamples {
204
- framesPerBuffer = AVAudioFrameCount(max(1, effectiveLength / Int64(numberOfSamples)))
205
- } else {
206
- framesPerBuffer = framesPerSegment
207
- }
208
-
209
- guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
210
- reject("BUFFER_CREATION_FAILED", "Failed to create AVAudioPCMBuffer.")
211
- return nil
212
- }
213
-
214
- channelCount = Int(audioFile.processingFormat.channelCount)
215
- var data = Array(repeating: [Float](repeating: 0, count: Int(framesPerBuffer)), count: channelCount)
216
-
217
- var channelData = [Float]()
218
- while startFrame < endFrame {
219
- let remainingFrames = endFrame - startFrame
220
- let currentFramesPerBuffer = min(AVAudioFrameCount(framesPerBuffer), AVAudioFrameCount(remainingFrames))
221
-
222
- if currentFramesPerBuffer <= 0 {
223
- break
224
- }
225
-
226
- if abortExtraction {
227
- audioFile.framePosition = startFrame
228
- abortExtraction = false
229
- return nil
230
- }
231
-
232
- do {
233
- audioFile.framePosition = startFrame
234
- try audioFile.read(into: buffer, frameCount: currentFramesPerBuffer)
235
- } catch {
236
- reject("AUDIO_READ_ERROR", "Couldn't read into buffer: \(error.localizedDescription)")
237
- return nil
238
- }
239
-
240
- //TODO: check if we need conversion based on bitDepth here
241
- guard let floatData = buffer.floatChannelData else {
242
- reject("BUFFER_DATA_ERROR", "Failed to retrieve float data from buffer.")
243
- return nil
244
- }
245
- for frame in 0..<Int(buffer.frameLength) {
246
- channelData.append(floatData[0][frame])
247
- }
248
-
249
- startFrame += AVAudioFramePosition(currentFramesPerBuffer)
250
- }
251
-
252
- NSLog("""
253
- [AudioProcessor] Audio processing completed:
254
- - processedFrames: \(endFrame - startFrame)
255
- - framesPerBuffer: \(framesPerBuffer)
256
- """)
257
-
258
- return processChannelData(
259
- channelData: channelData,
260
- sampleRate: Float(audioFile.fileFormat.sampleRate),
261
- segmentDurationMs: segmentDurationMs,
262
- featureOptions: featureOptions,
263
- bitDepth: bitDepth,
264
- numberOfChannels: numberOfChannels
265
- )
266
- }
267
-
268
- /// Processes audio data from a buffer.
269
- /// - Parameters:
270
- /// - data: The audio data buffer.
271
- /// - sampleRate: The sample rate of the audio data.
272
- /// - segmentDurationMs: The duration of each segment in milliseconds.
273
- /// - featureOptions: The features to extract.
274
- /// - bitDepth: The bit depth of the audio data.
275
- /// - numberOfChannels: The number of channels in the audio data.
276
- /// - Returns: An `AudioAnalysisData` object containing the extracted features.
277
- public func processAudioBuffer(
278
- data: Data,
279
- sampleRate: Float,
280
- segmentDurationMs: Int,
281
- featureOptions: [String: Bool],
282
- bitDepth: Int,
283
- numberOfChannels: Int
284
- ) -> AudioAnalysisData? {
285
- guard !data.isEmpty else {
286
- Logger.debug("Data is empty, rejecting")
287
- reject("DATA_EMPTY", "The audio data is empty.")
288
- return nil
289
- }
290
-
291
- // Convert Data to Float array based on bit depth
292
- let floatData: [Float]
293
- switch bitDepth {
294
- case 16:
295
- floatData = data.withUnsafeBytes { bufferPointer in
296
- let int16Pointer = bufferPointer.bindMemory(to: Int16.self)
297
- return int16Pointer.map { Float($0) / Float(Int16.max) }
298
- }
299
- case 32:
300
- floatData = data.withUnsafeBytes { bufferPointer in
301
- let int32Pointer = bufferPointer.bindMemory(to: Int32.self)
302
- return int32Pointer.map { Float($0) / Float(Int32.max) }
303
- }
304
- default:
305
- Logger.debug("Unsupported bit depth. Rejecting")
306
- reject("UNSUPPORTED_BIT_DEPTH", "Unsupported bit depth: \(bitDepth)")
307
- return nil
308
- }
309
-
310
- return processChannelData(
311
- channelData: floatData,
312
- sampleRate: sampleRate,
313
- segmentDurationMs: segmentDurationMs,
314
- featureOptions: featureOptions,
315
- bitDepth: bitDepth,
316
- numberOfChannels: numberOfChannels
317
- )
318
- }
319
-
320
- /// Processes the given audio channel data to extract features.
321
- /// - Parameters:
322
- /// - channelData: The audio channel data to process.
323
- /// - sampleRate: The sample rate of the audio data.
324
- /// - segmentDurationMs: The duration of each segment in milliseconds.
325
- /// - featureOptions: The features to extract.
326
- /// - bitDepth: The bit depth of the audio data.
327
- /// - numberOfChannels: The number of channels in the audio data.
328
- /// - Returns: An `AudioAnalysisData` object containing the extracted features.
329
- private func processChannelData(
330
- channelData: [Float],
331
- sampleRate: Float,
332
- segmentDurationMs: Int,
333
- featureOptions: [String: Bool],
334
- bitDepth: Int,
335
- numberOfChannels: Int
336
- ) -> AudioAnalysisData? {
337
- Logger.debug("Processing audio data with sample rate: \(sampleRate), segmentDurationMs: \(segmentDurationMs), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
338
-
339
- let startTime = CACurrentMediaTime()
340
-
341
- let length = channelData.count
342
- // Calculate points per segment based on segment duration
343
- let samplesPerSegment = Int(Float(segmentDurationMs) * sampleRate / 1000.0)
344
- var dataPoints = [DataPoint]()
345
- var minAmplitude: Float = .greatestFiniteMagnitude
346
- var maxAmplitude: Float = -.greatestFiniteMagnitude
347
-
348
- // Calculate bytes per sample
349
- let bytesPerSample = bitDepth / 8
350
-
351
- // Process data in segments
352
- var i = 0
353
- while i < length {
354
- let segmentEnd = min(i + samplesPerSegment, length)
355
- let segment = Array(channelData[i..<segmentEnd])
356
-
357
- // Calculate byte positions and timing
358
- let startPosition = i * bytesPerSample * numberOfChannels
359
- let endPosition = segmentEnd * bytesPerSample * numberOfChannels
360
- let startTime = Float(i) / sampleRate
361
- let endTime = Float(segmentEnd) / sampleRate
362
-
363
- // Process segment and create data point
364
- let dataPoint = processSegment(
365
- segment,
366
- sampleRate: sampleRate,
367
- featureOptions: featureOptions,
368
- startTime: startTime,
369
- endTime: endTime,
370
- startPosition: startPosition,
371
- endPosition: endPosition
372
- )
373
- dataPoints.append(dataPoint)
374
-
375
- // Update min/max amplitudes
376
- minAmplitude = min(minAmplitude, segment.min() ?? minAmplitude)
377
- maxAmplitude = max(maxAmplitude, segment.max() ?? maxAmplitude)
378
-
379
- i += samplesPerSegment
380
- }
381
-
382
- let endTime = CACurrentMediaTime()
383
- let processingTimeMs = Float((endTime - startTime) * 1000)
384
-
385
- Logger.debug("Processed \(dataPoints.count) data points in \(processingTimeMs) ms")
386
-
387
- return AudioAnalysisData(
388
- segmentDurationMs: segmentDurationMs,
389
- durationMs: Int(Float(length) / sampleRate * 1000),
390
- bitDepth: bitDepth,
391
- numberOfChannels: numberOfChannels,
392
- sampleRate: Int(sampleRate),
393
- samples: length,
394
- dataPoints: dataPoints,
395
- amplitudeRange: AudioAnalysisData.AmplitudeRange(
396
- min: minAmplitude,
397
- max: maxAmplitude
398
- ),
399
- rmsRange: AudioAnalysisData.AmplitudeRange(
400
- min: 0,
401
- max: 1
402
- ),
403
- speechAnalysis: nil,
404
- extractionTimeMs: processingTimeMs
405
- )
406
- }
407
-
408
- private func processSegment(
409
- _ segment: [Float],
410
- sampleRate: Float,
411
- featureOptions: [String: Bool],
412
- startTime: Float,
413
- endTime: Float,
414
- startPosition: Int,
415
- endPosition: Int
416
- ) -> DataPoint {
417
- let sumSquares: Float = segment.reduce(0) { $0 + $1 * $1 }
418
- let rms = sqrt(sumSquares / Float(segment.count))
419
- let silent = rms < 0.01
420
- let dB = Float(20 * log10(Double(rms)))
421
-
422
- let features = computeFeatures(
423
- segmentData: segment,
424
- sampleRate: sampleRate,
425
- sumSquares: sumSquares,
426
- zeroCrossings: 0,
427
- segmentLength: segment.count,
428
- featureOptions: featureOptions
429
- )
430
-
431
-
432
- let dataPoint = DataPoint(
433
- id: Int(uniqueIdCounter),
434
- amplitude: segment.max() ?? 0,
435
- rms: rms,
436
- dB: dB,
437
- silent: silent,
438
- features: features,
439
- speech: SpeechFeatures(isActive: !silent),
440
- startTime: startTime,
441
- endTime: endTime,
442
- startPosition: startPosition,
443
- endPosition: endPosition,
444
- samples: segment.count
445
- )
446
- uniqueIdCounter += 1
447
- return dataPoint
448
- }
449
-
450
- private func computeFeatures(
451
- segmentData: [Float],
452
- sampleRate: Float,
453
- sumSquares: Float,
454
- zeroCrossings: Int,
455
- segmentLength: Int,
456
- featureOptions: [String: Bool]
457
- ) -> Features {
458
- let rms = sqrt(sumSquares / Float(segmentLength))
459
- let energy = featureOptions["energy"] == true ? sumSquares : 0
460
- let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
461
- let mfcc = featureOptions["mfcc"] == true ? extractMFCC(from: segmentData, sampleRate: sampleRate) : []
462
- let spectralCentroid = featureOptions["spectralCentroid"] == true ? extractSpectralCentroid(from: segmentData, sampleRate: sampleRate) : 0
463
- let spectralFlatness = featureOptions["spectralFlatness"] == true ? extractSpectralFlatness(from: segmentData) : 0
464
- let spectralRollOff = featureOptions["spectralRollOff"] == true ? extractSpectralRollOff(from: segmentData, sampleRate: sampleRate) : 0
465
- let spectralBandwidth = featureOptions["spectralBandwidth"] == true ? extractSpectralBandwidth(from: segmentData, sampleRate: sampleRate) : 0
466
- let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
467
- let tempo = featureOptions["tempo"] == true ? extractTempo(from: segmentData, sampleRate: sampleRate) : 0
468
- let hnr = featureOptions["hnr"] == true ? extractHNR(from: segmentData) : 0
469
- let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
470
- let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
471
- let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
472
- let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : 0
473
-
474
- // Calculate min and max amplitudes from the segment data
475
- let minAmplitude = segmentData.map(abs).min() ?? 0
476
- let maxAmplitude = segmentData.map(abs).max() ?? 0
477
-
478
- let crc32Value = featureOptions["crc32"] == true ?
479
- calculateCRC32(from: segmentData, count: segmentData.count) : nil
480
-
481
- return Features(
482
- energy: energy,
483
- mfcc: mfcc,
484
- rms: rms,
485
- minAmplitude: minAmplitude,
486
- maxAmplitude: maxAmplitude,
487
- zcr: zcr,
488
- spectralCentroid: spectralCentroid,
489
- spectralFlatness: spectralFlatness,
490
- spectralRollOff: spectralRollOff,
491
- spectralBandwidth: spectralBandwidth,
492
- chromagram: chromagram,
493
- tempo: tempo,
494
- hnr: hnr,
495
- melSpectrogram: melSpectrogram,
496
- spectralContrast: spectralContrast,
497
- tonnetz: tonnetz,
498
- pitch: pitch,
499
- crc32: crc32Value
500
- )
501
- }
502
-
503
- /// Processes audio data with time range support
504
- public func processAudioData(
505
- startTimeMs: Double? = nil,
506
- endTimeMs: Double? = nil,
507
- segmentDurationMs: Int = 100, // Default 100ms
508
- featureOptions: [String: Bool]
509
- ) -> AudioAnalysisData? {
510
- guard let audioFile = audioFile else {
511
- Logger.debug("No audio file loaded")
512
- return nil
513
- }
514
-
515
- let startTime = CACurrentMediaTime()
516
- let sampleRate = Float(audioFile.fileFormat.sampleRate)
517
- let totalFrameCount = AVAudioFrameCount(audioFile.length)
518
- let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
519
- let numberOfChannels = Int(audioFile.fileFormat.channelCount)
520
-
521
- // Convert time to frames
522
- let startFrame = startTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? 0
523
- let endFrame = endTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? audioFile.length
524
-
525
- // Validate frame range
526
- guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
527
- Logger.debug("Invalid time range")
528
- return nil
529
- }
530
-
531
- // Calculate frames per buffer based on segment duration
532
- let framesPerBuffer = AVAudioFrameCount(Float(sampleRate) * Float(segmentDurationMs) / 1000.0)
533
-
534
- guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
535
- Logger.debug("Failed to create buffer")
536
- return nil
537
- }
538
-
539
- var dataPoints: [DataPoint] = []
540
- var minAmplitude: Float = .greatestFiniteMagnitude
541
- var maxAmplitude: Float = -.greatestFiniteMagnitude
542
- var currentId = 0
543
-
544
- audioFile.framePosition = startFrame
545
- var currentFrame = startFrame
546
-
547
- while currentFrame < endFrame {
548
- let framesToRead = min(framesPerBuffer, AVAudioFrameCount(endFrame - currentFrame))
549
-
550
- do {
551
- try audioFile.read(into: buffer, frameCount: framesToRead)
552
-
553
- guard let channelData = buffer.floatChannelData else {
554
- continue
555
- }
556
-
557
- // Process each channel's data
558
- var summedData = [Float](repeating: 0, count: Int(framesToRead))
559
- for channel in 0..<numberOfChannels {
560
- let channelBuffer = UnsafeBufferPointer(start: channelData[channel], count: Int(framesToRead))
561
- for (index, sample) in channelBuffer.enumerated() {
562
- summedData[index] += sample
563
- }
564
- }
565
-
566
- // Average across channels
567
- for i in 0..<summedData.count {
568
- summedData[i] /= Float(numberOfChannels)
569
- }
570
-
571
- // Calculate both peak amplitude and RMS
572
- var localMax: Float = 0
573
- var rms: Float = 0
574
- vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
575
-
576
- // Calculate RMS using vDSP
577
- var meanSquare: Float = 0
578
- vDSP_measqv(summedData, 1, &meanSquare, vDSP_Length(framesToRead))
579
- rms = sqrt(meanSquare)
580
-
581
- minAmplitude = min(minAmplitude, localMax)
582
- maxAmplitude = max(maxAmplitude, localMax)
583
-
584
- // Create data point
585
- let startTime = Float(currentFrame) / Float(sampleRate)
586
- let endTime = Float(currentFrame + Int64(framesToRead)) / Float(sampleRate)
587
-
588
- let dataPoint = DataPoint(
589
- id: currentId,
590
- amplitude: localMax, // Always use peak amplitude
591
- rms: rms, // Use calculated RMS value
592
- dB: Float(20 * log10(Double(rms))), // Use RMS for dB calculation
593
- silent: rms < 0.01, // Use RMS for silence detection
594
- features: computeFeatures(
595
- segmentData: Array(UnsafeBufferPointer(start: summedData, count: Int(framesToRead))),
596
- sampleRate: sampleRate,
597
- sumSquares: rms * rms,
598
- zeroCrossings: 0,
599
- segmentLength: Int(framesToRead),
600
- featureOptions: featureOptions
601
- ),
602
- speech: SpeechFeatures(isActive: rms >= 0.01),
603
- startTime: startTime,
604
- endTime: endTime,
605
- startPosition: Int(currentFrame),
606
- endPosition: Int(currentFrame + Int64(framesToRead)),
607
- samples: Int(framesToRead)
608
- )
609
-
610
- dataPoints.append(dataPoint)
611
- currentId += 1
612
- } catch {
613
- Logger.debug("Error reading audio data: \(error)")
614
- return nil
615
- }
616
-
617
- currentFrame += Int64(framesToRead)
618
- }
619
-
620
- let endTime = CACurrentMediaTime()
621
- let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
622
-
623
- return AudioAnalysisData(
624
- segmentDurationMs: segmentDurationMs,
625
- durationMs: Int(Float(endFrame - startFrame) * 1000 / sampleRate),
626
- bitDepth: bitDepth,
627
- numberOfChannels: numberOfChannels,
628
- sampleRate: Int(sampleRate),
629
- samples: Int(endFrame - startFrame),
630
- dataPoints: dataPoints,
631
- amplitudeRange: AudioAnalysisData.AmplitudeRange(
632
- min: minAmplitude,
633
- max: maxAmplitude
634
- ),
635
- rmsRange: AudioAnalysisData.AmplitudeRange(
636
- min: 0,
637
- max: 1
638
- ),
639
- speechAnalysis: nil,
640
- extractionTimeMs: extractionTime
641
- )
642
- }
643
-
644
- /// Trims audio file to specified range
645
- public func trimAudio(
646
- mode: String,
647
- startTimeMs: Double?,
648
- endTimeMs: Double?,
649
- ranges: [[String: Double]]?,
650
- outputFileName: String?,
651
- outputFormat: [String: Any]?,
652
- decodingOptions: [String: Any]?,
653
- progressCallback: ((Float, Int64, Int64) -> Void)? = nil
654
- ) -> TrimResult? {
655
- // Log the input parameters
656
- Logger.debug("Starting audio trim operation:")
657
- Logger.debug("- Mode: \(mode)")
658
- if let start = startTimeMs, let end = endTimeMs {
659
- Logger.debug("- Time range: \(start)ms to \(end)ms")
660
- }
661
- if let ranges = ranges {
662
- Logger.debug("- Ranges count: \(ranges.count)")
663
- }
664
-
665
- // Log output format details
666
- if let format = outputFormat {
667
- let formatType = format["format"] as? String ?? "unknown"
668
- let bitrate = format["bitrate"] as? Int ?? 0
669
- Logger.debug("- Output format: \(formatType), bitrate: \(bitrate)")
670
- }
671
-
672
- guard let audioFile = audioFile else { return nil }
673
-
674
- let inputFormat = audioFile.processingFormat
675
- let inputSampleRate = inputFormat.sampleRate
676
- let inputChannels = Int(inputFormat.channelCount)
677
- let totalDurationMs = Double(audioFile.length) / inputSampleRate * 1000
678
-
679
- // Compute ranges to keep
680
- let keepRanges = computeKeepRanges(
681
- mode: mode,
682
- startTimeMs: startTimeMs,
683
- endTimeMs: endTimeMs,
684
- ranges: ranges,
685
- totalDurationMs: totalDurationMs
686
- )
687
-
688
- guard !keepRanges.isEmpty else { return nil }
689
-
690
- // Output format setup
691
- let requestedFormat = outputFormat?["format"] as? String ?? "wav"
692
- let validFormats = ["wav", "aac", "opus"]
693
- let formatStr = validFormats.contains(requestedFormat.lowercased()) ? requestedFormat.lowercased() : "aac"
694
-
695
- if formatStr != requestedFormat.lowercased() {
696
- Logger.debug("Unsupported format '\(requestedFormat)', falling back to 'aac'")
697
- }
698
-
699
- let targetSampleRate = outputFormat?["sampleRate"] as? Double ?? inputSampleRate
700
- let targetChannels = outputFormat?["channels"] as? Int ?? inputChannels
701
- let targetBitDepth = outputFormat?["bitDepth"] as? Int ?? 16
702
- let bitrate = outputFormat?["bitrate"] as? Int ?? 128000
703
-
704
- let fileExtension = formatStr == "wav" ? "wav" : (formatStr == "aac" ? "aac" : "opus")
705
- let outputURL = FileManager.default.temporaryDirectory
706
- .appendingPathComponent(outputFileName ?? UUID().uuidString)
707
- .appendingPathExtension(fileExtension)
708
-
709
- let decodingConfig = DecodingConfig.fromDictionary(decodingOptions ?? [:])
710
- let needFormatChange = decodingConfig.targetSampleRate != nil || decodingConfig.targetChannels != nil || decodingConfig.targetBitDepth != nil
711
- let isWavInput = audioFile.fileFormat.settings[AVFormatIDKey] as? UInt32 == kAudioFormatLinearPCM
712
-
713
- do {
714
- if isWavInput && formatStr == "wav" && !needFormatChange {
715
- // Fast path: WAV-to-WAV with no format changes
716
- let outputFile = try AVAudioFile(forWriting: outputURL, settings: inputFormat.settings)
717
- var totalFrames: Int64 = 0
718
- for range in keepRanges {
719
- // Break down complex expression
720
- let startTimeInSeconds = range[0] / 1000
721
- let endTimeInSeconds = range[1] / 1000
722
- let startFramePosition = startTimeInSeconds * inputSampleRate
723
- let endFramePosition = endTimeInSeconds * inputSampleRate
724
- totalFrames += Int64(endFramePosition - startFramePosition)
725
- }
726
- var cumulativeFrames: Int64 = 0
727
-
728
- for range in keepRanges {
729
- // Break down complex expressions
730
- let startTimeInSeconds = range[0] / 1000
731
- let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
732
-
733
- let endTimeInSeconds = range[1] / 1000
734
- let endFramePosition = endTimeInSeconds * inputSampleRate
735
- let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
736
-
737
- let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
738
- audioFile.framePosition = startFrame
739
- try audioFile.read(into: buffer, frameCount: frameCount)
740
- try outputFile.write(from: buffer)
741
- cumulativeFrames += Int64(frameCount)
742
- let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
743
- progressCallback?(progress, Int64(frameCount) * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame), totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
744
- }
745
-
746
- // When creating the output file
747
- Logger.debug("Creating output file at: \(outputURL.path)")
748
-
749
- // After processing is complete
750
- Logger.debug("Trim operation completed")
751
- Logger.debug("- Output file: \(outputURL.path)")
752
- Logger.debug("- File exists: \(FileManager.default.fileExists(atPath: outputURL.path))")
753
- Logger.debug("- File size: \(try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int64 ?? 0) bytes")
754
- Logger.debug("- File extension: \(outputURL.pathExtension)")
755
-
756
- return createTrimResult(from: outputURL, keepRanges: keepRanges, formatStr: formatStr, sampleRate: Int(inputSampleRate), channels: inputChannels, bitDepth: 16, bitrate: bitrate)
757
- } else {
758
- // Non-fast path: Decode and re-encode
759
- let targetFormat = AVAudioFormat(
760
- commonFormat: .pcmFormatFloat32,
761
- sampleRate: targetSampleRate,
762
- channels: AVAudioChannelCount(targetChannels),
763
- interleaved: false
764
- )!
765
-
766
- var totalFrames: Int64 = 0
767
- for range in keepRanges {
768
- // Break down complex expression
769
- let startTimeInSeconds = range[0] / 1000
770
- let endTimeInSeconds = range[1] / 1000
771
- let startFramePosition = startTimeInSeconds * inputSampleRate
772
- let endFramePosition = endTimeInSeconds * inputSampleRate
773
- totalFrames += Int64(endFramePosition - startFramePosition)
774
- }
775
- var cumulativeFrames: Int64 = 0
776
-
777
- if formatStr == "wav" {
778
- let outputFile = try AVAudioFile(forWriting: outputURL, settings: [
779
- AVFormatIDKey: kAudioFormatLinearPCM,
780
- AVSampleRateKey: targetSampleRate,
781
- AVNumberOfChannelsKey: targetChannels,
782
- AVLinearPCMBitDepthKey: targetBitDepth,
783
- AVLinearPCMIsFloatKey: false,
784
- AVLinearPCMIsBigEndianKey: false
785
- ])
786
-
787
- for range in keepRanges {
788
- // Break down complex expressions
789
- let startTimeInSeconds = range[0] / 1000
790
- let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
791
-
792
- let endTimeInSeconds = range[1] / 1000
793
- let endFramePosition = endTimeInSeconds * inputSampleRate
794
- let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
795
-
796
- let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
797
- audioFile.framePosition = startFrame
798
- try audioFile.read(into: buffer, frameCount: frameCount)
799
- let converter = AVAudioConverter(from: inputFormat, to: targetFormat)!
800
- let convertedBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
801
- try converter.convert(to: convertedBuffer, from: buffer)
802
- try outputFile.write(from: convertedBuffer)
803
- cumulativeFrames += Int64(frameCount)
804
- let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
805
- progressCallback?(progress, 0, totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
806
- }
807
- return createTrimResult(from: outputURL, keepRanges: keepRanges, formatStr: formatStr, sampleRate: Int(targetSampleRate), channels: targetChannels, bitDepth: targetBitDepth, bitrate: bitrate)
808
- } else {
809
- // AAC or Opus output
810
- let outputSettings: [String: Any]
811
- let fileType: AVFileType
812
-
813
- if formatStr == "aac" {
814
- // AAC settings
815
- let outputExtension = "m4a"
816
- let tempOutputURL = FileManager.default.temporaryDirectory
817
- .appendingPathComponent(outputFileName ?? UUID().uuidString)
818
- .appendingPathExtension(outputExtension)
819
-
820
- // Validate and adjust sample rate for AAC
821
- // AAC typically supports: 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 Hz
822
- let supportedSampleRates = [8000.0, 11025.0, 12000.0, 16000.0, 22050.0, 24000.0, 32000.0, 44100.0, 48000.0]
823
-
824
- // Default to 44100 if not specified
825
- var sampleRate = outputFormat?["sampleRate"] as? Double ?? 44100.0
826
-
827
- // Find closest supported sample rate
828
- if !supportedSampleRates.contains(sampleRate) {
829
- let closestRate = supportedSampleRates.min(by: { abs($0 - sampleRate) < abs($1 - sampleRate) }) ?? 44100.0
830
- Logger.debug("Unsupported sample rate \(sampleRate)Hz for AAC, using closest supported rate: \(closestRate)Hz")
831
- sampleRate = closestRate
832
- }
833
-
834
- // Validate channels (AAC typically supports 1 or 2 channels)
835
- var channels = outputFormat?["channels"] as? Int ?? 2
836
- if channels > 2 {
837
- Logger.debug("AAC encoding doesn't support \(channels) channels, limiting to 2 channels")
838
- channels = 2
839
- } else if channels < 1 {
840
- channels = 1
841
- }
842
-
843
- // Validate bitrate (AAC typically supports 8000-320000 bps)
844
- var bitrate = outputFormat?["bitrate"] as? Int ?? 128000
845
- if bitrate < 8000 {
846
- Logger.debug("AAC bitrate too low, setting to minimum 8000 bps")
847
- bitrate = 8000
848
- } else if bitrate > 320000 {
849
- Logger.debug("AAC bitrate too high, setting to maximum 320000 bps")
850
- bitrate = 320000
851
- }
852
-
853
- // Set up proper audio settings for AAC
854
- outputSettings = [
855
- AVFormatIDKey: kAudioFormatMPEG4AAC,
856
- AVSampleRateKey: sampleRate,
857
- AVNumberOfChannelsKey: channels,
858
- AVEncoderBitRateKey: bitrate,
859
- AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
860
- ]
861
- fileType = .m4a
862
-
863
- Logger.debug("""
864
- Configuring AAC output:
865
- - Container: m4a
866
- - Format: AAC
867
- - Sample rate: \(sampleRate)Hz
868
- - Channels: \(channels)
869
- - Bitrate: \(bitrate) bps
870
- - Output path: \(tempOutputURL.path)
871
- - File type: \(fileType)
872
- """)
873
- } else {
874
- // Opus settings - use CAF container which can hold Opus
875
- outputSettings = [
876
- AVFormatIDKey: kAudioFormatOpus,
877
- AVSampleRateKey: targetSampleRate,
878
- AVNumberOfChannelsKey: targetChannels,
879
- AVEncoderBitRateKey: bitrate
880
- ]
881
- fileType = .caf // Core Audio Format can contain Opus
882
- }
883
-
884
- // Use proper file extension for the container format
885
- let tempFileExtension = formatStr == "aac" ? "m4a" : "caf"
886
- let tempOutputURL = FileManager.default.temporaryDirectory
887
- .appendingPathComponent(outputFileName ?? UUID().uuidString)
888
- .appendingPathExtension(tempFileExtension)
889
-
890
- // Create the asset writer with the appropriate file type
891
- let assetWriter = try AVAssetWriter(
892
- outputURL: tempOutputURL,
893
- fileType: fileType
894
- )
895
-
896
- // Configure the writer input with better settings
897
- let writerInput = AVAssetWriterInput(mediaType: .audio, outputSettings: outputSettings)
898
- writerInput.expectsMediaDataInRealTime = false
899
- assetWriter.add(writerInput)
900
-
901
- // Start the writing session
902
- assetWriter.startWriting()
903
- assetWriter.startSession(atSourceTime: CMTime.zero)
904
-
905
- // Improved buffer handling
906
- let bufferSize = 32768 // Use a larger buffer for better performance
907
- let pcmBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: AVAudioFrameCount(bufferSize))!
908
-
909
- for range in keepRanges {
910
- let startTimeInSeconds = range[0] / 1000
911
- let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
912
-
913
- let endTimeInSeconds = range[1] / 1000
914
- let endFramePosition = endTimeInSeconds * inputSampleRate
915
- let totalFramesToProcess = AVAudioFrameCount(endFramePosition - Double(startFrame))
916
-
917
- // Process in chunks for better memory management
918
- var framesProcessed: AVAudioFrameCount = 0
919
- audioFile.framePosition = startFrame
920
-
921
- while framesProcessed < totalFramesToProcess {
922
- let framesToRead = min(AVAudioFrameCount(bufferSize), totalFramesToProcess - framesProcessed)
923
- let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: framesToRead)!
924
-
925
- do {
926
- try audioFile.read(into: buffer, frameCount: framesToRead)
927
-
928
- // Convert the buffer to the target format
929
- let converter = AVAudioConverter(from: inputFormat, to: targetFormat)!
930
- let convertedBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: framesToRead)!
931
-
932
- var error: NSError?
933
- let conversionStatus = converter.convert(to: convertedBuffer, error: &error) { inNumPackets, outStatus in
934
- outStatus.pointee = .haveData
935
- return buffer
936
- }
937
-
938
- if let error = error {
939
- Logger.debug("Conversion error: \(error)")
940
- continue
941
- }
942
-
943
- // Create a sample buffer and append to writer
944
- if let sampleBuffer = createSampleBuffer(from: convertedBuffer) {
945
- // Wait until the writer is ready
946
- while !writerInput.isReadyForMoreMediaData {
947
- Thread.sleep(forTimeInterval: 0.01)
948
- }
949
-
950
- if !writerInput.append(sampleBuffer) {
951
- Logger.debug("Failed to append sample buffer: \(assetWriter.error?.localizedDescription ?? "Unknown error")")
952
- }
953
- }
954
-
955
- framesProcessed += framesToRead
956
- cumulativeFrames += Int64(framesToRead)
957
- let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
958
- progressCallback?(progress, 0, totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
959
-
960
- if framesProcessed % 10000 == 0 { // Log every 10000 frames to avoid excessive logging
961
- Logger.debug("Processed \(framesProcessed)/\(totalFramesToProcess) frames")
962
- }
963
-
964
- } catch {
965
- Logger.debug("Error reading audio: \(error)")
966
- break
967
- }
968
- }
969
- }
970
-
971
- // Finish writing properly
972
- writerInput.markAsFinished()
973
- let finishSemaphore = DispatchSemaphore(value: 0)
974
- assetWriter.finishWriting {
975
- if let error = assetWriter.error {
976
- Logger.debug("Error finishing writing: \(error)")
977
- } else {
978
- Logger.debug("Writing finished successfully")
979
-
980
- // Verify the output file
981
- let fileExists = FileManager.default.fileExists(atPath: tempOutputURL.path)
982
- let fileSize = (try? FileManager.default.attributesOfItem(atPath: tempOutputURL.path)[.size] as? Int64) ?? 0
983
-
984
- Logger.debug("""
985
- Output file verification:
986
- - Path: \(tempOutputURL.path)
987
- - Exists: \(fileExists)
988
- - Size: \(fileSize) bytes
989
- - Extension: \(tempOutputURL.pathExtension)
990
- """)
991
- }
992
- finishSemaphore.signal()
993
- }
994
- finishSemaphore.wait()
995
-
996
- // Verify the file was created successfully
997
- guard FileManager.default.fileExists(atPath: tempOutputURL.path) else {
998
- reject("FILE_CREATION_FAILED", "Failed to create output file")
999
- return nil
1000
- }
1001
-
1002
- // Create compression info
1003
- var compressionInfo: [String: Any] = [
1004
- "format": formatStr,
1005
- "bitrate": bitrate,
1006
- "size": (try? FileManager.default.attributesOfItem(atPath: tempOutputURL.path)[.size] as? Int64) ?? 0
1007
- ]
1008
-
1009
- // Add fallback information if applicable
1010
- if formatStr != requestedFormat.lowercased() {
1011
- compressionInfo["requestedFormat"] = requestedFormat
1012
- compressionInfo["fallbackReason"] = "Unsupported format"
1013
- }
1014
-
1015
- // Use the correct MIME type
1016
- let mimeType = formatStr == "aac" ? "audio/mp4" : "audio/opus"
1017
-
1018
- return TrimResult(
1019
- uri: tempOutputURL.absoluteString,
1020
- filename: tempOutputURL.lastPathComponent,
1021
- durationMs: keepRanges.map { $0[1] - $0[0] }.reduce(0, +),
1022
- size: (try? FileManager.default.attributesOfItem(atPath: tempOutputURL.path)[.size] as? Int64) ?? 0,
1023
- sampleRate: Int(targetSampleRate),
1024
- channels: targetChannels,
1025
- bitDepth: 16,
1026
- mimeType: mimeType,
1027
- requestedFormat: formatStr,
1028
- actualFormat: tempFileExtension,
1029
- compression: compressionInfo
1030
- )
1031
- }
1032
- }
1033
- } catch {
1034
- reject("TRIM_ERROR", "Failed to trim audio: \(error.localizedDescription)")
1035
- return nil
1036
- }
1037
- }
1038
-
1039
- private func computeKeepRanges(mode: String, startTimeMs: Double?, endTimeMs: Double?, ranges: [[String: Double]]?, totalDurationMs: Double) -> [[Double]] {
1040
- switch mode {
1041
- case "single":
1042
- guard let start = startTimeMs, let end = endTimeMs else { return [] }
1043
- return [[start, end]]
1044
- case "keep":
1045
- return ranges?.map { [$0["startTimeMs"] ?? 0, $0["endTimeMs"] ?? totalDurationMs] } ?? []
1046
- case "remove":
1047
- let removeRanges = ranges?.map { [$0["startTimeMs"] ?? 0, $0["endTimeMs"] ?? totalDurationMs] }.sorted { $0[0] < $1[0] } ?? []
1048
- var keepRanges: [[Double]] = []
1049
- var lastEnd = 0.0
1050
- for range in removeRanges {
1051
- if range[0] > lastEnd {
1052
- keepRanges.append([lastEnd, range[0]])
1053
- }
1054
- lastEnd = max(lastEnd, range[1])
1055
- }
1056
- if lastEnd < totalDurationMs {
1057
- keepRanges.append([lastEnd, totalDurationMs])
1058
- }
1059
- return keepRanges
1060
- default:
1061
- return []
1062
- }
1063
- }
1064
-
1065
- private func createTrimResult(from url: URL, keepRanges: [[Double]], formatStr: String, sampleRate: Int, channels: Int, bitDepth: Int, bitrate: Int, compression: [String: Any]? = nil) -> TrimResult {
1066
- let durationMs = keepRanges.map { $0[1] - $0[0] }.reduce(0, +)
1067
- let size = (try? FileManager.default.attributesOfItem(atPath: url.path)[.size] as? Int64 ?? 0) ?? 0
1068
- let fileExtension = formatStr == "wav" ? "wav" : (formatStr == "aac" ? "aac" : "opus")
1069
- return TrimResult(
1070
- uri: url.absoluteString,
1071
- filename: url.lastPathComponent,
1072
- durationMs: durationMs,
1073
- size: size,
1074
- sampleRate: sampleRate,
1075
- channels: channels,
1076
- bitDepth: bitDepth,
1077
- mimeType: "audio/\(fileExtension)",
1078
- requestedFormat: formatStr,
1079
- actualFormat: fileExtension,
1080
- compression: compression
1081
- )
1082
- }
1083
-
1084
- private func createSampleBuffer(from buffer: AVAudioPCMBuffer) -> CMSampleBuffer? {
1085
- var formatDesc: CMAudioFormatDescription?
1086
- CMAudioFormatDescriptionCreate(
1087
- allocator: kCFAllocatorDefault,
1088
- asbd: buffer.format.streamDescription,
1089
- layoutSize: 0,
1090
- layout: nil,
1091
- magicCookieSize: 0,
1092
- magicCookie: nil,
1093
- extensions: nil,
1094
- formatDescriptionOut: &formatDesc
1095
- )
1096
- guard let format = formatDesc else { return nil }
1097
-
1098
- var sampleBuffer: CMSampleBuffer?
1099
- var timingInfo = CMSampleTimingInfo(
1100
- duration: CMTime(value: 1, timescale: CMTimeScale(buffer.format.sampleRate)),
1101
- presentationTimeStamp: .zero,
1102
- decodeTimeStamp: .invalid
1103
- )
1104
-
1105
- CMSampleBufferCreate(
1106
- allocator: kCFAllocatorDefault,
1107
- dataBuffer: nil,
1108
- dataReady: false,
1109
- makeDataReadyCallback: nil,
1110
- refcon: nil,
1111
- formatDescription: format,
1112
- sampleCount: CMItemCount(buffer.frameLength),
1113
- sampleTimingEntryCount: 1,
1114
- sampleTimingArray: &timingInfo,
1115
- sampleSizeEntryCount: 0,
1116
- sampleSizeArray: nil,
1117
- sampleBufferOut: &sampleBuffer
1118
- )
1119
- guard let sampleBuf = sampleBuffer else { return nil }
1120
-
1121
- var dataBuffer: CMBlockBuffer?
1122
- CMBlockBufferCreateWithMemoryBlock(
1123
- allocator: kCFAllocatorDefault,
1124
- memoryBlock: UnsafeMutableRawPointer(buffer.floatChannelData![0]),
1125
- blockLength: Int(buffer.frameLength * buffer.format.streamDescription.pointee.mBytesPerFrame),
1126
- blockAllocator: kCFAllocatorNull,
1127
- customBlockSource: nil,
1128
- offsetToData: 0,
1129
- dataLength: Int(buffer.frameLength * buffer.format.streamDescription.pointee.mBytesPerFrame),
1130
- flags: 0,
1131
- blockBufferOut: &dataBuffer
1132
- )
1133
- guard let blockBuf = dataBuffer else { return nil }
1134
-
1135
- CMSampleBufferSetDataBuffer(sampleBuf, newValue: blockBuf)
1136
-
1137
- return sampleBuf
1138
- }
1139
-
1140
- /// Extracts a preview of the audio data with consistent time range support
1141
- /// - Parameters:
1142
- /// - numberOfPoints: The number of points to extract
1143
- /// - startTimeMs: Optional start time in milliseconds
1144
- /// - endTimeMs: Optional end time in milliseconds
1145
- /// - featureOptions: The features to extract
1146
- /// - Returns: An `AudioAnalysisData` object containing the extracted features
1147
- public func extractPreview(
1148
- numberOfPoints: Int,
1149
- startTimeMs: Double? = nil,
1150
- endTimeMs: Double? = nil,
1151
- featureOptions: [String: Bool]
1152
- ) -> AudioAnalysisData? {
1153
- guard let audioFile = audioFile else {
1154
- reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
1155
- return nil
1156
- }
1157
-
1158
- let sampleRate = Float(audioFile.fileFormat.sampleRate)
1159
- let totalDurationMs = Double(audioFile.length) / Double(sampleRate) * 1000
1160
-
1161
- // Calculate effective time range
1162
- let effectiveStartMs = startTimeMs ?? 0.0
1163
- let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
1164
- let durationMs = effectiveEndMs - effectiveStartMs // This is the actual duration we want to use
1165
-
1166
- // Convert time to frames with proper offset
1167
- let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
1168
- let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
1169
- let samplesInRange = Int(endFrame - startFrame)
1170
-
1171
- guard samplesInRange > 0 else {
1172
- reject("INVALID_RANGE", "Invalid sample range: contains no samples")
1173
- return nil
1174
- }
1175
-
1176
- // Calculate exact samples per point to get the requested number of points
1177
- let samplesPerPoint = samplesInRange / numberOfPoints
1178
- var dataPoints = [DataPoint]()
1179
- dataPoints.reserveCapacity(numberOfPoints)
1180
-
1181
- var minAmplitude: Float = .greatestFiniteMagnitude
1182
- var maxAmplitude: Float = -.greatestFiniteMagnitude
1183
-
1184
- let bytesPerSample = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16 / 8
1185
-
1186
- for i in 0..<numberOfPoints {
1187
- let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
1188
- let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
1189
- let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
1190
-
1191
- // Calculate byte positions
1192
- let startPosition = Int(pointStartFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
1193
- let endPosition = Int(pointEndFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
1194
- let segmentStartTime = Float(pointStartFrame) / sampleRate
1195
- let segmentEndTime = Float(pointEndFrame) / sampleRate
1196
-
1197
- do {
1198
- audioFile.framePosition = pointStartFrame
1199
- let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
1200
- try audioFile.read(into: buffer, frameCount: framesToRead)
1201
-
1202
- guard let floatData = buffer.floatChannelData else { continue }
1203
-
1204
- var sumSquares: Float = 0
1205
- var zeroCrossings = 0
1206
- var prevValue: Float = 0
1207
- var localMinAmplitude: Float = .greatestFiniteMagnitude
1208
- var localMaxAmplitude: Float = -.greatestFiniteMagnitude
1209
-
1210
- // Process samples for this point
1211
- for frame in 0..<Int(framesToRead) {
1212
- let value = floatData[0][frame]
1213
- sumSquares += value * value
1214
- if frame > 0 && value * prevValue < 0 {
1215
- zeroCrossings += 1
1216
- }
1217
- prevValue = value
1218
-
1219
- let absValue = abs(value)
1220
- localMinAmplitude = min(localMinAmplitude, absValue)
1221
- localMaxAmplitude = max(localMaxAmplitude, absValue)
1222
- }
1223
-
1224
- let features = computeFeatures(segmentData: Array(UnsafeBufferPointer(start: floatData[0], count: Int(framesToRead))),
1225
- sampleRate: sampleRate,
1226
- sumSquares: sumSquares,
1227
- zeroCrossings: zeroCrossings,
1228
- segmentLength: Int(framesToRead),
1229
- featureOptions: featureOptions)
1230
-
1231
- let rms = features.rms
1232
- let silent = rms < 0.01
1233
- let dB = Float(20 * log10(Double(rms)))
1234
-
1235
- let dataPoint = DataPoint(
1236
- id: Int(uniqueIdCounter),
1237
- amplitude: localMaxAmplitude,
1238
- rms: rms,
1239
- dB: dB,
1240
- silent: silent,
1241
- features: features,
1242
- speech: SpeechFeatures(isActive: !silent),
1243
- startTime: segmentStartTime,
1244
- endTime: segmentEndTime,
1245
- startPosition: startPosition,
1246
- endPosition: endPosition,
1247
- samples: Int(framesToRead)
1248
- )
1249
- dataPoints.append(dataPoint)
1250
- uniqueIdCounter += 1
1251
-
1252
- minAmplitude = min(minAmplitude, localMinAmplitude)
1253
- maxAmplitude = max(maxAmplitude, localMaxAmplitude)
1254
- } catch {
1255
- reject("AUDIO_READ_ERROR", "Error reading audio data: \(error.localizedDescription)")
1256
- return nil
1257
- }
1258
- }
1259
-
1260
- let startTime = CACurrentMediaTime() // Start timing
1261
-
1262
- let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
1263
- let numberOfChannels = Int(audioFile.processingFormat.channelCount)
1264
-
1265
- NSLog("""
1266
- [AudioProcessor] Starting preview extraction:
1267
- - numberOfPoints: \(numberOfPoints)
1268
- - startTimeMs: \(String(describing: startTimeMs))
1269
- - endTimeMs: \(String(describing: endTimeMs))
1270
- - durationMs: \(durationMs)
1271
- - sampleRate: \(sampleRate)
1272
- - bitDepth: \(bitDepth)
1273
- - channels: \(numberOfChannels)
1274
- - samplesInRange: \(samplesInRange)
1275
- - samplesPerPoint: \(samplesPerPoint)
1276
- """)
1277
-
1278
- let endTime = CACurrentMediaTime()
1279
- let extractionTimeMs = Float((endTime - startTime) * 1000)
1280
-
1281
- NSLog("""
1282
- [AudioProcessor] Preview extraction completed:
1283
- - dataPoints generated: \(dataPoints.count)
1284
- - extractionTimeMs: \(String(format: "%.2f", extractionTimeMs))ms
1285
- - amplitudeRange: (min: \(String(format: "%.6f", minAmplitude)), max: \(String(format: "%.6f", maxAmplitude)))
1286
- """)
1287
-
1288
- return AudioAnalysisData(
1289
- segmentDurationMs: 100, // Default 100ms
1290
- durationMs: Int(durationMs), // Use actual duration of trimmed section
1291
- bitDepth: bitDepth,
1292
- numberOfChannels: numberOfChannels,
1293
- sampleRate: Int(sampleRate),
1294
- samples: samplesInRange,
1295
- dataPoints: dataPoints,
1296
- amplitudeRange: AudioAnalysisData.AmplitudeRange(
1297
- min: minAmplitude,
1298
- max: maxAmplitude
1299
- ),
1300
- rmsRange: AudioAnalysisData.AmplitudeRange(
1301
- min: 0,
1302
- max: 1
1303
- ),
1304
- speechAnalysis: nil,
1305
- extractionTimeMs: extractionTimeMs
1306
- )
1307
- }
1308
-
1309
- // Add this helper function to the AudioProcessor class
1310
- private func getDocumentsDirectory() -> URL {
1311
- return FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
1312
- }
1313
- }