@siteed/expo-audio-stream 2.0.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/README.md +46 -27
  2. package/build/index.d.ts +11 -12
  3. package/build/index.js +44 -10
  4. package/package.json +49 -110
  5. package/src/index.ts +18 -33
  6. package/CHANGELOG.md +0 -195
  7. package/android/build.gradle +0 -105
  8. package/android/src/main/AndroidManifest.xml +0 -27
  9. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -166
  10. package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
  11. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -131
  12. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -103
  13. package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -435
  14. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -1936
  15. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -1437
  16. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -138
  17. package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -20
  18. package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
  19. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -509
  20. package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
  21. package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
  22. package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -70
  23. package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -59
  24. package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
  25. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -205
  26. package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
  27. package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
  28. package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
  29. package/android/src/main/res/drawable/ic_microphone.xml +0 -13
  30. package/android/src/main/res/drawable/ic_pause.xml +0 -10
  31. package/android/src/main/res/drawable/ic_play.xml +0 -10
  32. package/android/src/main/res/drawable/ic_stop.xml +0 -10
  33. package/android/src/main/res/layout/notification_recording.xml +0 -37
  34. package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +0 -56
  35. package/app.plugin.js +0 -1
  36. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -144
  37. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  38. package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
  39. package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  40. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -78
  41. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  42. package/build/AudioAnalysis/extractAudioAnalysis.js +0 -229
  43. package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  44. package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
  45. package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  46. package/build/AudioAnalysis/extractWaveform.js +0 -11
  47. package/build/AudioAnalysis/extractWaveform.js.map +0 -1
  48. package/build/AudioRecorder.provider.d.ts +0 -11
  49. package/build/AudioRecorder.provider.d.ts.map +0 -1
  50. package/build/AudioRecorder.provider.js +0 -37
  51. package/build/AudioRecorder.provider.js.map +0 -1
  52. package/build/ExpoAudioStream.native.d.ts +0 -3
  53. package/build/ExpoAudioStream.native.d.ts.map +0 -1
  54. package/build/ExpoAudioStream.native.js +0 -6
  55. package/build/ExpoAudioStream.native.js.map +0 -1
  56. package/build/ExpoAudioStream.types.d.ts +0 -206
  57. package/build/ExpoAudioStream.types.d.ts.map +0 -1
  58. package/build/ExpoAudioStream.types.js +0 -2
  59. package/build/ExpoAudioStream.types.js.map +0 -1
  60. package/build/ExpoAudioStream.web.d.ts +0 -59
  61. package/build/ExpoAudioStream.web.d.ts.map +0 -1
  62. package/build/ExpoAudioStream.web.js +0 -285
  63. package/build/ExpoAudioStream.web.js.map +0 -1
  64. package/build/ExpoAudioStreamModule.d.ts +0 -3
  65. package/build/ExpoAudioStreamModule.d.ts.map +0 -1
  66. package/build/ExpoAudioStreamModule.js +0 -239
  67. package/build/ExpoAudioStreamModule.js.map +0 -1
  68. package/build/WebRecorder.web.d.ts +0 -119
  69. package/build/WebRecorder.web.d.ts.map +0 -1
  70. package/build/WebRecorder.web.js +0 -436
  71. package/build/WebRecorder.web.js.map +0 -1
  72. package/build/constants.d.ts +0 -11
  73. package/build/constants.d.ts.map +0 -1
  74. package/build/constants.js +0 -14
  75. package/build/constants.js.map +0 -1
  76. package/build/events.d.ts +0 -26
  77. package/build/events.d.ts.map +0 -1
  78. package/build/events.js +0 -21
  79. package/build/events.js.map +0 -1
  80. package/build/index.d.ts.map +0 -1
  81. package/build/index.js.map +0 -1
  82. package/build/useAudioRecorder.d.ts +0 -21
  83. package/build/useAudioRecorder.d.ts.map +0 -1
  84. package/build/useAudioRecorder.js +0 -427
  85. package/build/useAudioRecorder.js.map +0 -1
  86. package/build/utils/BlobFix.d.ts +0 -9
  87. package/build/utils/BlobFix.d.ts.map +0 -1
  88. package/build/utils/BlobFix.js +0 -498
  89. package/build/utils/BlobFix.js.map +0 -1
  90. package/build/utils/audioProcessing.d.ts +0 -24
  91. package/build/utils/audioProcessing.d.ts.map +0 -1
  92. package/build/utils/audioProcessing.js +0 -133
  93. package/build/utils/audioProcessing.js.map +0 -1
  94. package/build/utils/concatenateBuffers.d.ts +0 -8
  95. package/build/utils/concatenateBuffers.d.ts.map +0 -1
  96. package/build/utils/concatenateBuffers.js +0 -21
  97. package/build/utils/concatenateBuffers.js.map +0 -1
  98. package/build/utils/convertPCMToFloat32.d.ts +0 -13
  99. package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
  100. package/build/utils/convertPCMToFloat32.js +0 -120
  101. package/build/utils/convertPCMToFloat32.js.map +0 -1
  102. package/build/utils/encodingToBitDepth.d.ts +0 -5
  103. package/build/utils/encodingToBitDepth.d.ts.map +0 -1
  104. package/build/utils/encodingToBitDepth.js +0 -13
  105. package/build/utils/encodingToBitDepth.js.map +0 -1
  106. package/build/utils/getWavFileInfo.d.ts +0 -26
  107. package/build/utils/getWavFileInfo.d.ts.map +0 -1
  108. package/build/utils/getWavFileInfo.js +0 -92
  109. package/build/utils/getWavFileInfo.js.map +0 -1
  110. package/build/utils/writeWavHeader.d.ts +0 -49
  111. package/build/utils/writeWavHeader.d.ts.map +0 -1
  112. package/build/utils/writeWavHeader.js +0 -91
  113. package/build/utils/writeWavHeader.js.map +0 -1
  114. package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  115. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  116. package/build/workers/InlineFeaturesExtractor.web.js +0 -828
  117. package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
  118. package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
  119. package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  120. package/build/workers/inlineAudioWebWorker.web.js +0 -157
  121. package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
  122. package/expo-module.config.json +0 -9
  123. package/ios/AudioAnalysisData.swift +0 -74
  124. package/ios/AudioNotificationManager.swift +0 -135
  125. package/ios/AudioProcessingHelpers.swift +0 -743
  126. package/ios/AudioProcessor.swift +0 -858
  127. package/ios/AudioStreamError.swift +0 -7
  128. package/ios/AudioStreamManager.swift +0 -1708
  129. package/ios/AudioStreamManagerDelegate.swift +0 -16
  130. package/ios/DataPoint.swift +0 -54
  131. package/ios/DecodingConfig.swift +0 -47
  132. package/ios/ExpoAudioStream.podspec +0 -27
  133. package/ios/ExpoAudioStreamModule.swift +0 -698
  134. package/ios/FFT.swift +0 -62
  135. package/ios/Features.swift +0 -95
  136. package/ios/Logger.swift +0 -7
  137. package/ios/NotificationExtension.swift +0 -15
  138. package/ios/RecordingResult.swift +0 -22
  139. package/ios/RecordingSettings.swift +0 -265
  140. package/ios/WaveformExtractor.swift +0 -105
  141. package/plugin/build/index.d.ts +0 -21
  142. package/plugin/build/index.js +0 -191
  143. package/plugin/src/index.ts +0 -278
  144. package/plugin/tsconfig.json +0 -10
  145. package/plugin/tsconfig.tsbuildinfo +0 -1
  146. package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -165
  147. package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -370
  148. package/src/AudioAnalysis/extractWaveform.ts +0 -22
  149. package/src/AudioRecorder.provider.tsx +0 -54
  150. package/src/ExpoAudioStream.native.ts +0 -6
  151. package/src/ExpoAudioStream.types.ts +0 -329
  152. package/src/ExpoAudioStream.web.ts +0 -359
  153. package/src/ExpoAudioStreamModule.ts +0 -286
  154. package/src/WebRecorder.web.ts +0 -580
  155. package/src/constants.ts +0 -18
  156. package/src/events.ts +0 -60
  157. package/src/useAudioRecorder.tsx +0 -620
  158. package/src/utils/BlobFix.ts +0 -559
  159. package/src/utils/audioProcessing.ts +0 -205
  160. package/src/utils/concatenateBuffers.ts +0 -24
  161. package/src/utils/convertPCMToFloat32.ts +0 -170
  162. package/src/utils/encodingToBitDepth.ts +0 -18
  163. package/src/utils/getWavFileInfo.ts +0 -132
  164. package/src/utils/writeWavHeader.ts +0 -114
  165. package/src/workers/InlineFeaturesExtractor.web.tsx +0 -827
  166. package/src/workers/inlineAudioWebWorker.web.tsx +0 -156
@@ -1,858 +0,0 @@
1
- // packages/expo-audio-stream/ios/AudioProcessor.swift
2
-
3
- import Foundation
4
- import Accelerate
5
- import AVFoundation
6
- import QuartzCore
7
-
8
- public struct TrimResult {
9
- public let uri: String
10
- public let duration: Double
11
- public let size: Int64
12
-
13
- public init(uri: String, duration: Double, size: Int64) {
14
- self.uri = uri
15
- self.duration = duration
16
- self.size = size
17
- }
18
- }
19
-
20
- public class AudioProcessor {
21
- public private(set) var audioFile: AVAudioFile?
22
- private var result: (Any) -> Void
23
- private var reject: (String, String) -> Void
24
- private var waveformData = Array<Float>()
25
- private var progress: Float = 0.0
26
- private var channelCount: Int = 1
27
- private var currentProgress: Float = 0.0
28
- private let extractionQueue = DispatchQueue(label: "AudioProcessor", attributes: .concurrent)
29
- private var _abortExtraction: Bool = false
30
-
31
- // Add a counter for unique IDs
32
- private var uniqueIdCounter = 0
33
-
34
- public var abortExtraction: Bool {
35
- get { _abortExtraction }
36
- set { _abortExtraction = newValue }
37
- }
38
-
39
- // Initializer for file-based processing
40
- public init(url: URL, resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) throws {
41
- self.audioFile = try AVAudioFile(forReading: url)
42
- self.result = resolve
43
- self.reject = reject
44
- }
45
-
46
- // Initializer for buffer-based processing
47
- public init(resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) {
48
- self.result = resolve
49
- self.reject = reject
50
- }
51
-
52
-
53
- deinit {
54
- audioFile = nil
55
- }
56
-
57
- /// Error types for AudioProcessor
58
- public enum AudioProcessorError: Error {
59
- case fileInitializationFailed(String)
60
- case bufferCreationFailed
61
- case audioReadError(String)
62
- }
63
-
64
-
65
- /// Extracts and processes audio data from the audio file.
66
- /// - Parameters:
67
- /// - numberOfSamples: The number of samples to extract (for waveform).
68
- /// - offset: The offset to start reading from (in samples).
69
- /// - length: The length of the audio to read (in samples).
70
- /// - segmentDurationMs: The duration of each segment in milliseconds.
71
- /// - featureOptions: The features to extract.
72
- /// - bitDepth: The bit depth of the audio data.
73
- /// - numberOfChannels: The number of channels in the audio data.
74
- /// - position: The position to start reading from (in bytes).
75
- /// - byteLength: The length of the audio to read (in bytes).
76
- /// - Returns: An `AudioAnalysisData` object containing the extracted features.
77
- public func processAudioData(
78
- numberOfSamples: Int?,
79
- offset: Int? = 0,
80
- length: UInt? = nil,
81
- segmentDurationMs: Int = 100, // Default 100ms
82
- featureOptions: [String: Bool],
83
- bitDepth: Int,
84
- numberOfChannels: Int,
85
- position: Int? = nil,
86
- byteLength: Int? = nil
87
- ) -> AudioAnalysisData? {
88
- guard let audioFile = audioFile else {
89
- reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
90
- return nil
91
- }
92
-
93
- let totalFrameCount = AVAudioFrameCount(audioFile.length)
94
- var framesPerBuffer: AVAudioFrameCount
95
- let actualPointsPerSecond: Int
96
-
97
- NSLog("""
98
- [AudioProcessor] Starting audio processing:
99
- - totalFrameCount: \(totalFrameCount)
100
- - bitDepth: \(bitDepth)
101
- - numberOfChannels: \(numberOfChannels)
102
- - position: \(position ?? -1)
103
- - byteLength: \(byteLength ?? -1)
104
- - offset: \(offset ?? -1)
105
- - length: \(length ?? 0)
106
- """)
107
-
108
- // Use position/byteLength if provided, otherwise fall back to offset/length
109
- let effectiveOffset: Int64 = if let position = position {
110
- Int64(position / (bitDepth / 8) / numberOfChannels)
111
- } else {
112
- Int64(offset ?? 0)
113
- }
114
-
115
- let effectiveLength: Int64 = if let byteLength = byteLength {
116
- Int64(byteLength / (bitDepth / 8) / numberOfChannels)
117
- } else if let length = length {
118
- Int64(length)
119
- } else {
120
- Int64(totalFrameCount) - effectiveOffset
121
- }
122
-
123
- NSLog("""
124
- [AudioProcessor] Calculated frame positions:
125
- - effectiveOffset: \(effectiveOffset)
126
- - effectiveLength: \(effectiveLength)
127
- - expectedEndFrame: \(effectiveOffset + effectiveLength)
128
- - totalFrameCount: \(totalFrameCount)
129
- """)
130
-
131
- // Validate frame boundaries
132
- if effectiveOffset < 0 || effectiveOffset >= Int64(totalFrameCount) {
133
- NSLog("[AudioProcessor] ERROR: Invalid offset value")
134
- reject("INVALID_OFFSET", "Offset value (\(effectiveOffset)) is outside valid range [0, \(totalFrameCount)]")
135
- return nil
136
- }
137
-
138
- if effectiveLength <= 0 {
139
- NSLog("[AudioProcessor] ERROR: Invalid length value")
140
- reject("INVALID_LENGTH", "Length value (\(effectiveLength)) must be positive")
141
- return nil
142
- }
143
-
144
- if effectiveOffset + effectiveLength > Int64(totalFrameCount) {
145
- NSLog("[AudioProcessor] ERROR: Requested range exceeds file length")
146
- reject("INVALID_RANGE", "Requested range [\(effectiveOffset), \(effectiveOffset + effectiveLength)] exceeds file length \(totalFrameCount)")
147
- return nil
148
- }
149
-
150
- var startFrame: AVAudioFramePosition = effectiveOffset
151
- let endFrame: AVAudioFramePosition = effectiveOffset + effectiveLength
152
-
153
- // Calculate frames per segment based on segment duration
154
- let framesPerSegment = AVAudioFrameCount(Float(audioFile.fileFormat.sampleRate) * Float(segmentDurationMs) / 1000.0)
155
-
156
- if let numberOfSamples = numberOfSamples {
157
- framesPerBuffer = AVAudioFrameCount(max(1, effectiveLength / Int64(numberOfSamples)))
158
- } else {
159
- framesPerBuffer = framesPerSegment
160
- }
161
-
162
- guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
163
- reject("BUFFER_CREATION_FAILED", "Failed to create AVAudioPCMBuffer.")
164
- return nil
165
- }
166
-
167
- channelCount = Int(audioFile.processingFormat.channelCount)
168
- var data = Array(repeating: [Float](repeating: 0, count: Int(framesPerBuffer)), count: channelCount)
169
-
170
- var channelData = [Float]()
171
- while startFrame < endFrame {
172
- let remainingFrames = endFrame - startFrame
173
- let currentFramesPerBuffer = min(AVAudioFrameCount(framesPerBuffer), AVAudioFrameCount(remainingFrames))
174
-
175
- if currentFramesPerBuffer <= 0 {
176
- break
177
- }
178
-
179
- if abortExtraction {
180
- audioFile.framePosition = startFrame
181
- abortExtraction = false
182
- return nil
183
- }
184
-
185
- do {
186
- audioFile.framePosition = startFrame
187
- try audioFile.read(into: buffer, frameCount: currentFramesPerBuffer)
188
- } catch {
189
- reject("AUDIO_READ_ERROR", "Couldn't read into buffer: \(error.localizedDescription)")
190
- return nil
191
- }
192
-
193
- //TODO: check if we need conversion based on bitDepth here
194
- guard let floatData = buffer.floatChannelData else {
195
- reject("BUFFER_DATA_ERROR", "Failed to retrieve float data from buffer.")
196
- return nil
197
- }
198
- for frame in 0..<Int(buffer.frameLength) {
199
- channelData.append(floatData[0][frame])
200
- }
201
-
202
- startFrame += AVAudioFramePosition(currentFramesPerBuffer)
203
- }
204
-
205
- NSLog("""
206
- [AudioProcessor] Audio processing completed:
207
- - processedFrames: \(endFrame - startFrame)
208
- - framesPerBuffer: \(framesPerBuffer)
209
- """)
210
-
211
- return processChannelData(
212
- channelData: channelData,
213
- sampleRate: Float(audioFile.fileFormat.sampleRate),
214
- segmentDurationMs: segmentDurationMs,
215
- featureOptions: featureOptions,
216
- bitDepth: bitDepth,
217
- numberOfChannels: numberOfChannels
218
- )
219
- }
220
-
221
- /// Processes audio data from a buffer.
222
- /// - Parameters:
223
- /// - data: The audio data buffer.
224
- /// - sampleRate: The sample rate of the audio data.
225
- /// - segmentDurationMs: The duration of each segment in milliseconds.
226
- /// - featureOptions: The features to extract.
227
- /// - bitDepth: The bit depth of the audio data.
228
- /// - numberOfChannels: The number of channels in the audio data.
229
- /// - Returns: An `AudioAnalysisData` object containing the extracted features.
230
- public func processAudioBuffer(
231
- data: Data,
232
- sampleRate: Float,
233
- segmentDurationMs: Int,
234
- featureOptions: [String: Bool],
235
- bitDepth: Int,
236
- numberOfChannels: Int
237
- ) -> AudioAnalysisData? {
238
- guard !data.isEmpty else {
239
- Logger.debug("Data is empty, rejecting")
240
- reject("DATA_EMPTY", "The audio data is empty.")
241
- return nil
242
- }
243
-
244
- // Convert Data to Float array based on bit depth
245
- let floatData: [Float]
246
- switch bitDepth {
247
- case 16:
248
- floatData = data.withUnsafeBytes { bufferPointer in
249
- let int16Pointer = bufferPointer.bindMemory(to: Int16.self)
250
- return int16Pointer.map { Float($0) / Float(Int16.max) }
251
- }
252
- case 32:
253
- floatData = data.withUnsafeBytes { bufferPointer in
254
- let int32Pointer = bufferPointer.bindMemory(to: Int32.self)
255
- return int32Pointer.map { Float($0) / Float(Int32.max) }
256
- }
257
- default:
258
- Logger.debug("Unsupported bit depth. Rejecting")
259
- reject("UNSUPPORTED_BIT_DEPTH", "Unsupported bit depth: \(bitDepth)")
260
- return nil
261
- }
262
-
263
- return processChannelData(
264
- channelData: floatData,
265
- sampleRate: sampleRate,
266
- segmentDurationMs: segmentDurationMs,
267
- featureOptions: featureOptions,
268
- bitDepth: bitDepth,
269
- numberOfChannels: numberOfChannels
270
- )
271
- }
272
-
273
- /// Processes the given audio channel data to extract features.
274
- /// - Parameters:
275
- /// - channelData: The audio channel data to process.
276
- /// - sampleRate: The sample rate of the audio data.
277
- /// - segmentDurationMs: The duration of each segment in milliseconds.
278
- /// - featureOptions: The features to extract.
279
- /// - bitDepth: The bit depth of the audio data.
280
- /// - numberOfChannels: The number of channels in the audio data.
281
- /// - Returns: An `AudioAnalysisData` object containing the extracted features.
282
- private func processChannelData(
283
- channelData: [Float],
284
- sampleRate: Float,
285
- segmentDurationMs: Int,
286
- featureOptions: [String: Bool],
287
- bitDepth: Int,
288
- numberOfChannels: Int
289
- ) -> AudioAnalysisData? {
290
- Logger.debug("Processing audio data with sample rate: \(sampleRate), segmentDurationMs: \(segmentDurationMs), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
291
-
292
- let startTime = CACurrentMediaTime()
293
-
294
- let length = channelData.count
295
- // Calculate points per segment based on segment duration
296
- let samplesPerSegment = Int(Float(segmentDurationMs) * sampleRate / 1000.0)
297
- var dataPoints = [DataPoint]()
298
- var minAmplitude: Float = .greatestFiniteMagnitude
299
- var maxAmplitude: Float = -.greatestFiniteMagnitude
300
-
301
- // Calculate bytes per sample
302
- let bytesPerSample = bitDepth / 8
303
-
304
- // Process data in segments
305
- var i = 0
306
- while i < length {
307
- let segmentEnd = min(i + samplesPerSegment, length)
308
- let segment = Array(channelData[i..<segmentEnd])
309
-
310
- // Calculate byte positions and timing
311
- let startPosition = i * bytesPerSample * numberOfChannels
312
- let endPosition = segmentEnd * bytesPerSample * numberOfChannels
313
- let startTime = Float(i) / sampleRate
314
- let endTime = Float(segmentEnd) / sampleRate
315
-
316
- // Process segment and create data point
317
- let dataPoint = processSegment(
318
- segment,
319
- sampleRate: sampleRate,
320
- featureOptions: featureOptions,
321
- startTime: startTime,
322
- endTime: endTime,
323
- startPosition: startPosition,
324
- endPosition: endPosition
325
- )
326
- dataPoints.append(dataPoint)
327
-
328
- // Update min/max amplitudes
329
- minAmplitude = min(minAmplitude, segment.min() ?? minAmplitude)
330
- maxAmplitude = max(maxAmplitude, segment.max() ?? maxAmplitude)
331
-
332
- i += samplesPerSegment
333
- }
334
-
335
- let endTime = CACurrentMediaTime()
336
- let processingTimeMs = Float((endTime - startTime) * 1000)
337
-
338
- Logger.debug("Processed \(dataPoints.count) data points in \(processingTimeMs) ms")
339
-
340
- return AudioAnalysisData(
341
- segmentDurationMs: segmentDurationMs,
342
- durationMs: Int(Float(length) / sampleRate * 1000),
343
- bitDepth: bitDepth,
344
- numberOfChannels: numberOfChannels,
345
- sampleRate: Int(sampleRate),
346
- samples: length,
347
- dataPoints: dataPoints,
348
- amplitudeRange: AudioAnalysisData.AmplitudeRange(
349
- min: minAmplitude,
350
- max: maxAmplitude
351
- ),
352
- rmsRange: AudioAnalysisData.AmplitudeRange(
353
- min: 0,
354
- max: 1
355
- ),
356
- speechAnalysis: nil,
357
- extractionTimeMs: processingTimeMs
358
- )
359
- }
360
-
361
- private func processSegment(
362
- _ segment: [Float],
363
- sampleRate: Float,
364
- featureOptions: [String: Bool],
365
- startTime: Float,
366
- endTime: Float,
367
- startPosition: Int,
368
- endPosition: Int
369
- ) -> DataPoint {
370
- let sumSquares: Float = segment.reduce(0) { $0 + $1 * $1 }
371
- let rms = sqrt(sumSquares / Float(segment.count))
372
- let silent = rms < 0.01
373
- let dB = Float(20 * log10(Double(rms)))
374
-
375
- let features = computeFeatures(
376
- segmentData: segment,
377
- sampleRate: sampleRate,
378
- sumSquares: sumSquares,
379
- zeroCrossings: 0,
380
- segmentLength: segment.count,
381
- featureOptions: featureOptions
382
- )
383
-
384
-
385
- let dataPoint = DataPoint(
386
- id: Int(uniqueIdCounter),
387
- amplitude: segment.max() ?? 0,
388
- rms: rms,
389
- dB: dB,
390
- silent: silent,
391
- features: features,
392
- speech: SpeechFeatures(isActive: !silent),
393
- startTime: startTime,
394
- endTime: endTime,
395
- startPosition: startPosition,
396
- endPosition: endPosition,
397
- samples: segment.count
398
- )
399
- uniqueIdCounter += 1
400
- return dataPoint
401
- }
402
-
403
- private func computeFeatures(
404
- segmentData: [Float],
405
- sampleRate: Float,
406
- sumSquares: Float,
407
- zeroCrossings: Int,
408
- segmentLength: Int,
409
- featureOptions: [String: Bool]
410
- ) -> Features {
411
- let rms = sqrt(sumSquares / Float(segmentLength))
412
- let energy = featureOptions["energy"] == true ? sumSquares : 0
413
- let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
414
- let mfcc = featureOptions["mfcc"] == true ? extractMFCC(from: segmentData, sampleRate: sampleRate) : []
415
- let spectralCentroid = featureOptions["spectralCentroid"] == true ? extractSpectralCentroid(from: segmentData, sampleRate: sampleRate) : 0
416
- let spectralFlatness = featureOptions["spectralFlatness"] == true ? extractSpectralFlatness(from: segmentData) : 0
417
- let spectralRollOff = featureOptions["spectralRollOff"] == true ? extractSpectralRollOff(from: segmentData, sampleRate: sampleRate) : 0
418
- let spectralBandwidth = featureOptions["spectralBandwidth"] == true ? extractSpectralBandwidth(from: segmentData, sampleRate: sampleRate) : 0
419
- let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
420
- let tempo = featureOptions["tempo"] == true ? extractTempo(from: segmentData, sampleRate: sampleRate) : 0
421
- let hnr = featureOptions["hnr"] == true ? extractHNR(from: segmentData) : 0
422
- let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
423
- let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
424
- let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
425
- let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : 0
426
-
427
- // Calculate min and max amplitudes from the segment data
428
- let minAmplitude = segmentData.map(abs).min() ?? 0
429
- let maxAmplitude = segmentData.map(abs).max() ?? 0
430
-
431
- let crc32Value = featureOptions["crc32"] == true ?
432
- calculateCRC32(from: segmentData, count: segmentData.count) : nil
433
-
434
- return Features(
435
- energy: energy,
436
- mfcc: mfcc,
437
- rms: rms,
438
- minAmplitude: minAmplitude,
439
- maxAmplitude: maxAmplitude,
440
- zcr: zcr,
441
- spectralCentroid: spectralCentroid,
442
- spectralFlatness: spectralFlatness,
443
- spectralRollOff: spectralRollOff,
444
- spectralBandwidth: spectralBandwidth,
445
- chromagram: chromagram,
446
- tempo: tempo,
447
- hnr: hnr,
448
- melSpectrogram: melSpectrogram,
449
- spectralContrast: spectralContrast,
450
- tonnetz: tonnetz,
451
- pitch: pitch,
452
- crc32: crc32Value
453
- )
454
- }
455
-
456
- /// Processes audio data with time range support
457
- public func processAudioData(
458
- startTimeMs: Double? = nil,
459
- endTimeMs: Double? = nil,
460
- segmentDurationMs: Int = 100, // Default 100ms
461
- featureOptions: [String: Bool]
462
- ) -> AudioAnalysisData? {
463
- guard let audioFile = audioFile else {
464
- Logger.debug("No audio file loaded")
465
- return nil
466
- }
467
-
468
- let startTime = CACurrentMediaTime()
469
- let sampleRate = Float(audioFile.fileFormat.sampleRate)
470
- let totalFrameCount = AVAudioFrameCount(audioFile.length)
471
- let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
472
- let numberOfChannels = Int(audioFile.fileFormat.channelCount)
473
-
474
- // Convert time to frames
475
- let startFrame = startTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? 0
476
- let endFrame = endTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? audioFile.length
477
-
478
- // Validate frame range
479
- guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
480
- Logger.debug("Invalid time range")
481
- return nil
482
- }
483
-
484
- // Calculate frames per buffer based on segment duration
485
- let framesPerBuffer = AVAudioFrameCount(Float(sampleRate) * Float(segmentDurationMs) / 1000.0)
486
-
487
- guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
488
- Logger.debug("Failed to create buffer")
489
- return nil
490
- }
491
-
492
- var dataPoints: [DataPoint] = []
493
- var minAmplitude: Float = .greatestFiniteMagnitude
494
- var maxAmplitude: Float = -.greatestFiniteMagnitude
495
- var currentId = 0
496
-
497
- audioFile.framePosition = startFrame
498
- var currentFrame = startFrame
499
-
500
- while currentFrame < endFrame {
501
- let framesToRead = min(framesPerBuffer, AVAudioFrameCount(endFrame - currentFrame))
502
-
503
- do {
504
- try audioFile.read(into: buffer, frameCount: framesToRead)
505
-
506
- guard let channelData = buffer.floatChannelData else {
507
- continue
508
- }
509
-
510
- // Process each channel's data
511
- var summedData = [Float](repeating: 0, count: Int(framesToRead))
512
- for channel in 0..<numberOfChannels {
513
- let channelBuffer = UnsafeBufferPointer(start: channelData[channel], count: Int(framesToRead))
514
- for (index, sample) in channelBuffer.enumerated() {
515
- summedData[index] += sample
516
- }
517
- }
518
-
519
- // Average across channels
520
- for i in 0..<summedData.count {
521
- summedData[i] /= Float(numberOfChannels)
522
- }
523
-
524
- // Calculate both peak amplitude and RMS
525
- var localMax: Float = 0
526
- var rms: Float = 0
527
- vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
528
-
529
- // Calculate RMS using vDSP
530
- var meanSquare: Float = 0
531
- vDSP_measqv(summedData, 1, &meanSquare, vDSP_Length(framesToRead))
532
- rms = sqrt(meanSquare)
533
-
534
- minAmplitude = min(minAmplitude, localMax)
535
- maxAmplitude = max(maxAmplitude, localMax)
536
-
537
- // Create data point
538
- let startTime = Float(currentFrame) / Float(sampleRate)
539
- let endTime = Float(currentFrame + Int64(framesToRead)) / Float(sampleRate)
540
-
541
- let dataPoint = DataPoint(
542
- id: currentId,
543
- amplitude: localMax, // Always use peak amplitude
544
- rms: rms, // Use calculated RMS value
545
- dB: Float(20 * log10(Double(rms))), // Use RMS for dB calculation
546
- silent: rms < 0.01, // Use RMS for silence detection
547
- features: computeFeatures(
548
- segmentData: Array(UnsafeBufferPointer(start: summedData, count: Int(framesToRead))),
549
- sampleRate: sampleRate,
550
- sumSquares: rms * rms,
551
- zeroCrossings: 0,
552
- segmentLength: Int(framesToRead),
553
- featureOptions: featureOptions
554
- ),
555
- speech: SpeechFeatures(isActive: rms >= 0.01),
556
- startTime: startTime,
557
- endTime: endTime,
558
- startPosition: Int(currentFrame),
559
- endPosition: Int(currentFrame + Int64(framesToRead)),
560
- samples: Int(framesToRead)
561
- )
562
-
563
- dataPoints.append(dataPoint)
564
- currentId += 1
565
- } catch {
566
- Logger.debug("Error reading audio data: \(error)")
567
- return nil
568
- }
569
-
570
- currentFrame += Int64(framesToRead)
571
- }
572
-
573
- let endTime = CACurrentMediaTime()
574
- let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
575
-
576
- return AudioAnalysisData(
577
- segmentDurationMs: segmentDurationMs,
578
- durationMs: Int(Float(endFrame - startFrame) * 1000 / sampleRate),
579
- bitDepth: bitDepth,
580
- numberOfChannels: numberOfChannels,
581
- sampleRate: Int(sampleRate),
582
- samples: Int(endFrame - startFrame),
583
- dataPoints: dataPoints,
584
- amplitudeRange: AudioAnalysisData.AmplitudeRange(
585
- min: minAmplitude,
586
- max: maxAmplitude
587
- ),
588
- rmsRange: AudioAnalysisData.AmplitudeRange(
589
- min: 0,
590
- max: 1
591
- ),
592
- speechAnalysis: nil,
593
- extractionTimeMs: extractionTime
594
- )
595
- }
596
-
597
- /// Trims audio file to specified range
598
- public func trimAudio(
599
- startTimeMs: Double,
600
- endTimeMs: Double,
601
- outputFormat: [String: Any]?
602
- ) -> TrimResult? {
603
- guard let currentAudioFile = audioFile else {
604
- Logger.debug("No audio file loaded")
605
- return nil
606
- }
607
-
608
- let sampleRate = currentAudioFile.fileFormat.sampleRate
609
- let startFrame = AVAudioFramePosition(startTimeMs * sampleRate / 1000.0)
610
- let endFrame = AVAudioFramePosition(endTimeMs * sampleRate / 1000.0)
611
-
612
- // Create output format
613
- let outputSettings = createOutputSettings(from: outputFormat, originalFormat: currentAudioFile.fileFormat)
614
-
615
- // Create temporary output file
616
- let outputURL = FileManager.default.temporaryDirectory
617
- .appendingPathComponent(UUID().uuidString)
618
- .appendingPathExtension("wav")
619
-
620
- do {
621
- let outputFile = try AVAudioFile(
622
- forWriting: outputURL,
623
- settings: outputSettings,
624
- commonFormat: .pcmFormatFloat32,
625
- interleaved: false
626
- )
627
-
628
- // Read and write in chunks
629
- let bufferSize = 32768
630
- let buffer = AVAudioPCMBuffer(
631
- pcmFormat: currentAudioFile.processingFormat,
632
- frameCapacity: AVAudioFrameCount(bufferSize)
633
- )!
634
-
635
- currentAudioFile.framePosition = startFrame
636
- var currentFrame = startFrame
637
-
638
- while currentFrame < endFrame {
639
- let framesToRead = min(
640
- AVAudioFrameCount(bufferSize),
641
- AVAudioFrameCount(endFrame - currentFrame)
642
- )
643
-
644
- try currentAudioFile.read(into: buffer, frameCount: framesToRead)
645
- try outputFile.write(from: buffer)
646
-
647
- currentFrame += Int64(framesToRead)
648
- }
649
-
650
- // Get file size
651
- let attributes = try FileManager.default.attributesOfItem(atPath: outputURL.path)
652
- let fileSize = attributes[.size] as! Int64
653
-
654
- // After successful trim, update the class property
655
- audioFile = try AVAudioFile(forReading: outputURL)
656
-
657
- // After successful trim, create the result
658
- let trimmedDuration = (endTimeMs - startTimeMs) / 1000.0 // Convert to seconds
659
- let result = TrimResult(
660
- uri: outputURL.absoluteString,
661
- duration: trimmedDuration, // Use actual trimmed duration
662
- size: fileSize
663
- )
664
-
665
- return result
666
- } catch {
667
- Logger.debug("Error trimming audio: \(error)")
668
- return nil
669
- }
670
- }
671
-
672
- private func createOutputSettings(
673
- from options: [String: Any]?,
674
- originalFormat: AVAudioFormat
675
- ) -> [String: Any] {
676
- var settings: [String: Any] = [:]
677
-
678
- // Use original format settings as defaults
679
- settings[AVFormatIDKey] = kAudioFormatLinearPCM
680
- settings[AVSampleRateKey] = options?["sampleRate"] as? Double ?? originalFormat.sampleRate
681
- settings[AVNumberOfChannelsKey] = options?["channels"] as? Int ?? originalFormat.channelCount
682
- settings[AVLinearPCMBitDepthKey] = options?["bitDepth"] as? Int ?? 16
683
- settings[AVLinearPCMIsFloatKey] = false
684
- settings[AVLinearPCMIsBigEndianKey] = false
685
- settings[AVLinearPCMIsNonInterleaved] = false
686
-
687
- return settings
688
- }
689
-
690
- /// Extracts a preview of the audio data with consistent time range support
691
- /// - Parameters:
692
- /// - numberOfPoints: The number of points to extract
693
- /// - startTimeMs: Optional start time in milliseconds
694
- /// - endTimeMs: Optional end time in milliseconds
695
- /// - featureOptions: The features to extract
696
- /// - Returns: An `AudioAnalysisData` object containing the extracted features
697
- public func extractPreview(
698
- numberOfPoints: Int,
699
- startTimeMs: Double? = nil,
700
- endTimeMs: Double? = nil,
701
- featureOptions: [String: Bool]
702
- ) -> AudioAnalysisData? {
703
- guard let audioFile = audioFile else {
704
- reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
705
- return nil
706
- }
707
-
708
- let sampleRate = Float(audioFile.fileFormat.sampleRate)
709
- let totalDurationMs = Double(audioFile.length) / Double(sampleRate) * 1000
710
-
711
- // Calculate effective time range
712
- let effectiveStartMs = startTimeMs ?? 0.0
713
- let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
714
- let durationMs = effectiveEndMs - effectiveStartMs // This is the actual duration we want to use
715
-
716
- // Convert time to frames with proper offset
717
- let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
718
- let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
719
- let samplesInRange = Int(endFrame - startFrame)
720
-
721
- guard samplesInRange > 0 else {
722
- reject("INVALID_RANGE", "Invalid sample range: contains no samples")
723
- return nil
724
- }
725
-
726
- // Calculate exact samples per point to get the requested number of points
727
- let samplesPerPoint = samplesInRange / numberOfPoints
728
- var dataPoints = [DataPoint]()
729
- dataPoints.reserveCapacity(numberOfPoints)
730
-
731
- var minAmplitude: Float = .greatestFiniteMagnitude
732
- var maxAmplitude: Float = -.greatestFiniteMagnitude
733
-
734
- let bytesPerSample = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16 / 8
735
-
736
- for i in 0..<numberOfPoints {
737
- let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
738
- let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
739
- let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
740
-
741
- // Calculate byte positions
742
- let startPosition = Int(pointStartFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
743
- let endPosition = Int(pointEndFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
744
- let segmentStartTime = Float(pointStartFrame) / sampleRate
745
- let segmentEndTime = Float(pointEndFrame) / sampleRate
746
-
747
- do {
748
- audioFile.framePosition = pointStartFrame
749
- let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
750
- try audioFile.read(into: buffer, frameCount: framesToRead)
751
-
752
- guard let floatData = buffer.floatChannelData else { continue }
753
-
754
- var sumSquares: Float = 0
755
- var zeroCrossings = 0
756
- var prevValue: Float = 0
757
- var localMinAmplitude: Float = .greatestFiniteMagnitude
758
- var localMaxAmplitude: Float = -.greatestFiniteMagnitude
759
-
760
- // Process samples for this point
761
- for frame in 0..<Int(framesToRead) {
762
- let value = floatData[0][frame]
763
- sumSquares += value * value
764
- if frame > 0 && value * prevValue < 0 {
765
- zeroCrossings += 1
766
- }
767
- prevValue = value
768
-
769
- let absValue = abs(value)
770
- localMinAmplitude = min(localMinAmplitude, absValue)
771
- localMaxAmplitude = max(localMaxAmplitude, absValue)
772
- }
773
-
774
- let features = computeFeatures(segmentData: Array(UnsafeBufferPointer(start: floatData[0], count: Int(framesToRead))),
775
- sampleRate: sampleRate,
776
- sumSquares: sumSquares,
777
- zeroCrossings: zeroCrossings,
778
- segmentLength: Int(framesToRead),
779
- featureOptions: featureOptions)
780
-
781
- let rms = features.rms
782
- let silent = rms < 0.01
783
- let dB = Float(20 * log10(Double(rms)))
784
-
785
- let dataPoint = DataPoint(
786
- id: Int(uniqueIdCounter),
787
- amplitude: localMaxAmplitude,
788
- rms: rms,
789
- dB: dB,
790
- silent: silent,
791
- features: features,
792
- speech: SpeechFeatures(isActive: !silent),
793
- startTime: segmentStartTime,
794
- endTime: segmentEndTime,
795
- startPosition: startPosition,
796
- endPosition: endPosition,
797
- samples: Int(framesToRead)
798
- )
799
- dataPoints.append(dataPoint)
800
- uniqueIdCounter += 1
801
-
802
- minAmplitude = min(minAmplitude, localMinAmplitude)
803
- maxAmplitude = max(maxAmplitude, localMaxAmplitude)
804
- } catch {
805
- reject("AUDIO_READ_ERROR", "Error reading audio data: \(error.localizedDescription)")
806
- return nil
807
- }
808
- }
809
-
810
- let startTime = CACurrentMediaTime() // Start timing
811
-
812
- let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
813
- let numberOfChannels = Int(audioFile.processingFormat.channelCount)
814
-
815
- NSLog("""
816
- [AudioProcessor] Starting preview extraction:
817
- - numberOfPoints: \(numberOfPoints)
818
- - startTimeMs: \(String(describing: startTimeMs))
819
- - endTimeMs: \(String(describing: endTimeMs))
820
- - durationMs: \(durationMs)
821
- - sampleRate: \(sampleRate)
822
- - bitDepth: \(bitDepth)
823
- - channels: \(numberOfChannels)
824
- - samplesInRange: \(samplesInRange)
825
- - samplesPerPoint: \(samplesPerPoint)
826
- """)
827
-
828
- let endTime = CACurrentMediaTime()
829
- let extractionTimeMs = Float((endTime - startTime) * 1000)
830
-
831
- NSLog("""
832
- [AudioProcessor] Preview extraction completed:
833
- - dataPoints generated: \(dataPoints.count)
834
- - extractionTimeMs: \(String(format: "%.2f", extractionTimeMs))ms
835
- - amplitudeRange: (min: \(String(format: "%.6f", minAmplitude)), max: \(String(format: "%.6f", maxAmplitude)))
836
- """)
837
-
838
- return AudioAnalysisData(
839
- segmentDurationMs: 100, // Default 100ms
840
- durationMs: Int(durationMs), // Use actual duration of trimmed section
841
- bitDepth: bitDepth,
842
- numberOfChannels: numberOfChannels,
843
- sampleRate: Int(sampleRate),
844
- samples: samplesInRange,
845
- dataPoints: dataPoints,
846
- amplitudeRange: AudioAnalysisData.AmplitudeRange(
847
- min: minAmplitude,
848
- max: maxAmplitude
849
- ),
850
- rmsRange: AudioAnalysisData.AmplitudeRange(
851
- min: 0,
852
- max: 1
853
- ),
854
- speechAnalysis: nil,
855
- extractionTimeMs: extractionTimeMs
856
- )
857
- }
858
- }