@siteed/expo-audio-stream 2.1.0 → 2.2.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/README.md +23 -260
  2. package/build/index.d.ts +11 -15
  3. package/build/index.js +54 -14
  4. package/build/src/index.d.ts +11 -0
  5. package/build/src/index.js +54 -0
  6. package/package.json +49 -110
  7. package/src/index.ts +18 -32
  8. package/CHANGELOG.md +0 -206
  9. package/android/build.gradle +0 -105
  10. package/android/src/main/AndroidManifest.xml +0 -27
  11. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -166
  12. package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
  13. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -131
  14. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -103
  15. package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -435
  16. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -2235
  17. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -1437
  18. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -152
  19. package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +0 -1099
  20. package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -21
  21. package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
  22. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -739
  23. package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
  24. package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
  25. package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -70
  26. package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -59
  27. package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
  28. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -205
  29. package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
  30. package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
  31. package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
  32. package/android/src/main/res/drawable/ic_microphone.xml +0 -13
  33. package/android/src/main/res/drawable/ic_pause.xml +0 -10
  34. package/android/src/main/res/drawable/ic_play.xml +0 -10
  35. package/android/src/main/res/drawable/ic_stop.xml +0 -10
  36. package/android/src/main/res/layout/notification_recording.xml +0 -37
  37. package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +0 -56
  38. package/app.plugin.js +0 -1
  39. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -179
  40. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  41. package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
  42. package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  43. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -68
  44. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  45. package/build/AudioAnalysis/extractAudioAnalysis.js +0 -203
  46. package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  47. package/build/AudioAnalysis/extractAudioData.d.ts +0 -3
  48. package/build/AudioAnalysis/extractAudioData.d.ts.map +0 -1
  49. package/build/AudioAnalysis/extractAudioData.js +0 -5
  50. package/build/AudioAnalysis/extractAudioData.js.map +0 -1
  51. package/build/AudioAnalysis/extractMelSpectrogram.d.ts +0 -14
  52. package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +0 -1
  53. package/build/AudioAnalysis/extractMelSpectrogram.js +0 -85
  54. package/build/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
  55. package/build/AudioAnalysis/extractPreview.d.ts +0 -11
  56. package/build/AudioAnalysis/extractPreview.d.ts.map +0 -1
  57. package/build/AudioAnalysis/extractPreview.js +0 -25
  58. package/build/AudioAnalysis/extractPreview.js.map +0 -1
  59. package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
  60. package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  61. package/build/AudioAnalysis/extractWaveform.js +0 -11
  62. package/build/AudioAnalysis/extractWaveform.js.map +0 -1
  63. package/build/AudioRecorder.provider.d.ts +0 -11
  64. package/build/AudioRecorder.provider.d.ts.map +0 -1
  65. package/build/AudioRecorder.provider.js +0 -37
  66. package/build/AudioRecorder.provider.js.map +0 -1
  67. package/build/ExpoAudioStream.native.d.ts +0 -3
  68. package/build/ExpoAudioStream.native.d.ts.map +0 -1
  69. package/build/ExpoAudioStream.native.js +0 -6
  70. package/build/ExpoAudioStream.native.js.map +0 -1
  71. package/build/ExpoAudioStream.types.d.ts +0 -532
  72. package/build/ExpoAudioStream.types.d.ts.map +0 -1
  73. package/build/ExpoAudioStream.types.js +0 -2
  74. package/build/ExpoAudioStream.types.js.map +0 -1
  75. package/build/ExpoAudioStream.web.d.ts +0 -59
  76. package/build/ExpoAudioStream.web.d.ts.map +0 -1
  77. package/build/ExpoAudioStream.web.js +0 -285
  78. package/build/ExpoAudioStream.web.js.map +0 -1
  79. package/build/ExpoAudioStreamModule.d.ts +0 -3
  80. package/build/ExpoAudioStreamModule.d.ts.map +0 -1
  81. package/build/ExpoAudioStreamModule.js +0 -693
  82. package/build/ExpoAudioStreamModule.js.map +0 -1
  83. package/build/WebRecorder.web.d.ts +0 -119
  84. package/build/WebRecorder.web.d.ts.map +0 -1
  85. package/build/WebRecorder.web.js +0 -436
  86. package/build/WebRecorder.web.js.map +0 -1
  87. package/build/constants.d.ts +0 -11
  88. package/build/constants.d.ts.map +0 -1
  89. package/build/constants.js +0 -14
  90. package/build/constants.js.map +0 -1
  91. package/build/events.d.ts +0 -26
  92. package/build/events.d.ts.map +0 -1
  93. package/build/events.js +0 -21
  94. package/build/events.js.map +0 -1
  95. package/build/index.d.ts.map +0 -1
  96. package/build/index.js.map +0 -1
  97. package/build/trimAudio.d.ts +0 -25
  98. package/build/trimAudio.d.ts.map +0 -1
  99. package/build/trimAudio.js +0 -67
  100. package/build/trimAudio.js.map +0 -1
  101. package/build/useAudioRecorder.d.ts +0 -21
  102. package/build/useAudioRecorder.d.ts.map +0 -1
  103. package/build/useAudioRecorder.js +0 -427
  104. package/build/useAudioRecorder.js.map +0 -1
  105. package/build/utils/BlobFix.d.ts +0 -9
  106. package/build/utils/BlobFix.d.ts.map +0 -1
  107. package/build/utils/BlobFix.js +0 -498
  108. package/build/utils/BlobFix.js.map +0 -1
  109. package/build/utils/audioProcessing.d.ts +0 -24
  110. package/build/utils/audioProcessing.d.ts.map +0 -1
  111. package/build/utils/audioProcessing.js +0 -133
  112. package/build/utils/audioProcessing.js.map +0 -1
  113. package/build/utils/concatenateBuffers.d.ts +0 -8
  114. package/build/utils/concatenateBuffers.d.ts.map +0 -1
  115. package/build/utils/concatenateBuffers.js +0 -21
  116. package/build/utils/concatenateBuffers.js.map +0 -1
  117. package/build/utils/convertPCMToFloat32.d.ts +0 -13
  118. package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
  119. package/build/utils/convertPCMToFloat32.js +0 -120
  120. package/build/utils/convertPCMToFloat32.js.map +0 -1
  121. package/build/utils/encodingToBitDepth.d.ts +0 -5
  122. package/build/utils/encodingToBitDepth.d.ts.map +0 -1
  123. package/build/utils/encodingToBitDepth.js +0 -13
  124. package/build/utils/encodingToBitDepth.js.map +0 -1
  125. package/build/utils/getWavFileInfo.d.ts +0 -26
  126. package/build/utils/getWavFileInfo.d.ts.map +0 -1
  127. package/build/utils/getWavFileInfo.js +0 -92
  128. package/build/utils/getWavFileInfo.js.map +0 -1
  129. package/build/utils/writeWavHeader.d.ts +0 -49
  130. package/build/utils/writeWavHeader.d.ts.map +0 -1
  131. package/build/utils/writeWavHeader.js +0 -91
  132. package/build/utils/writeWavHeader.js.map +0 -1
  133. package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  134. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  135. package/build/workers/InlineFeaturesExtractor.web.js +0 -828
  136. package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
  137. package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
  138. package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  139. package/build/workers/inlineAudioWebWorker.web.js +0 -157
  140. package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
  141. package/expo-module.config.json +0 -9
  142. package/ios/AudioAnalysisData.swift +0 -74
  143. package/ios/AudioNotificationManager.swift +0 -135
  144. package/ios/AudioProcessingHelpers.swift +0 -743
  145. package/ios/AudioProcessor.swift +0 -1313
  146. package/ios/AudioStreamError.swift +0 -7
  147. package/ios/AudioStreamManager.swift +0 -1708
  148. package/ios/AudioStreamManagerDelegate.swift +0 -16
  149. package/ios/DataPoint.swift +0 -54
  150. package/ios/DecodingConfig.swift +0 -47
  151. package/ios/ExpoAudioStream.podspec +0 -27
  152. package/ios/ExpoAudioStreamModule.swift +0 -805
  153. package/ios/FFT.swift +0 -62
  154. package/ios/Features.swift +0 -95
  155. package/ios/Logger.swift +0 -7
  156. package/ios/NotificationExtension.swift +0 -15
  157. package/ios/RecordingResult.swift +0 -22
  158. package/ios/RecordingSettings.swift +0 -265
  159. package/ios/WaveformExtractor.swift +0 -105
  160. package/plugin/build/index.d.ts +0 -21
  161. package/plugin/build/index.js +0 -191
  162. package/plugin/src/index.ts +0 -278
  163. package/plugin/tsconfig.json +0 -10
  164. package/plugin/tsconfig.tsbuildinfo +0 -1
  165. package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -202
  166. package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -333
  167. package/src/AudioAnalysis/extractAudioData.ts +0 -6
  168. package/src/AudioAnalysis/extractMelSpectrogram.ts +0 -144
  169. package/src/AudioAnalysis/extractPreview.ts +0 -34
  170. package/src/AudioAnalysis/extractWaveform.ts +0 -22
  171. package/src/AudioRecorder.provider.tsx +0 -54
  172. package/src/ExpoAudioStream.native.ts +0 -6
  173. package/src/ExpoAudioStream.types.ts +0 -641
  174. package/src/ExpoAudioStream.web.ts +0 -359
  175. package/src/ExpoAudioStreamModule.ts +0 -967
  176. package/src/WebRecorder.web.ts +0 -580
  177. package/src/constants.ts +0 -18
  178. package/src/events.ts +0 -60
  179. package/src/trimAudio.ts +0 -90
  180. package/src/useAudioRecorder.tsx +0 -620
  181. package/src/utils/BlobFix.ts +0 -559
  182. package/src/utils/audioProcessing.ts +0 -205
  183. package/src/utils/concatenateBuffers.ts +0 -24
  184. package/src/utils/convertPCMToFloat32.ts +0 -170
  185. package/src/utils/encodingToBitDepth.ts +0 -18
  186. package/src/utils/getWavFileInfo.ts +0 -132
  187. package/src/utils/writeWavHeader.ts +0 -114
  188. package/src/workers/InlineFeaturesExtractor.web.tsx +0 -827
  189. package/src/workers/inlineAudioWebWorker.web.tsx +0 -156
@@ -1,743 +0,0 @@
1
- // packages/expo-audio-stream/ios/AudioProcessingHelpers.swift
2
-
3
- import Accelerate
4
- import AVFoundation
5
- import QuartzCore
6
- import zlib
7
-
8
- // Constants
9
- private let FFT_LENGTH = 1024
10
- private let sharedFFT = FFT(FFT_LENGTH)
11
-
12
- // Main feature extraction functions
13
- func extractMFCC(from segment: [Float], sampleRate: Float) -> [Float] {
14
- let nMFCC = 40
15
-
16
- // Apply Hann window and prepare for FFT
17
- let windowed = applyHannWindow(to: segment)
18
- let fftData = sharedFFT.processSegment(windowed)
19
-
20
- // Compute power spectrum
21
- let powerSpectrum = computePowerSpectrum(from: fftData)
22
-
23
- // Apply Mel filterbank
24
- let melFilters = computeMelFilterbank(numFilters: nMFCC, fftSize: FFT_LENGTH, sampleRate: sampleRate)
25
- var melEnergies = [Float](repeating: 0, count: nMFCC)
26
-
27
- // Safe array access with bounds checking
28
- for i in 0..<nMFCC {
29
- var energy: Float = 0
30
- let filterBank = melFilters[i]
31
- let minLength = min(powerSpectrum.count, filterBank.count)
32
-
33
- for j in 0..<minLength {
34
- energy += powerSpectrum[j] * filterBank[j]
35
- }
36
- melEnergies[i] = log(max(energy, .leastNormalMagnitude))
37
- }
38
-
39
- // Apply DCT
40
- return computeDCT(from: melEnergies)
41
- }
42
-
43
- func extractSpectralCentroid(from segment: [Float], sampleRate: Float) -> Float {
44
- let fftData = sharedFFT.processSegment(segment)
45
-
46
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
47
- let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
48
-
49
- let sumMagnitudes = magnitudes.reduce(0, +)
50
- guard sumMagnitudes > 0 else { return 0 }
51
-
52
- let weightedSum = zip(frequencies, magnitudes)
53
- .map { $0.0 * $0.1 }
54
- .reduce(0, +)
55
-
56
- return weightedSum / sumMagnitudes
57
- }
58
-
59
- func extractSpectralFlatness(from segment: [Float]) -> Float {
60
- let fftData = sharedFFT.processSegment(segment)
61
-
62
- // Compute power spectrum
63
- let powerSpectrum = computePowerSpectrum(from: fftData)
64
-
65
- // Calculate geometric mean using log-space to avoid numerical issues
66
- var sumLogValues: Float = 0.0
67
- for value in powerSpectrum {
68
- sumLogValues += log(value + 1e-10) // Add small epsilon to avoid log(0)
69
- }
70
- let geometricMean = exp(sumLogValues / Float(powerSpectrum.count))
71
-
72
- // Calculate arithmetic mean
73
- let arithmeticMean = powerSpectrum.reduce(0, +) / Float(powerSpectrum.count)
74
-
75
- return arithmeticMean > 0 ? geometricMean / arithmeticMean : 0.0
76
- }
77
-
78
- func extractSpectralRollOff(from segment: [Float], sampleRate: Float) -> Float {
79
- let fftData = sharedFFT.processSegment(segment)
80
-
81
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
82
- let totalEnergy = magnitudes.reduce(0, +)
83
- let threshold = 0.85 * totalEnergy // 85% rolloff point
84
-
85
- var cumulativeEnergy: Float = 0
86
- for (index, magnitude) in magnitudes.enumerated() {
87
- cumulativeEnergy += magnitude
88
- if cumulativeEnergy >= threshold {
89
- return Float(index) * sampleRate / Float(2 * magnitudes.count)
90
- }
91
- }
92
-
93
- return 0.0
94
- }
95
-
96
- func extractSpectralBandwidth(from segment: [Float], sampleRate: Float) -> Float {
97
- let fftData = sharedFFT.processSegment(segment)
98
-
99
- let centroid = extractSpectralCentroid(from: segment, sampleRate: sampleRate)
100
-
101
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
102
- let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
103
-
104
- let sumMagnitudes = magnitudes.reduce(0, +)
105
- guard sumMagnitudes > 0 else { return 0 }
106
-
107
- let variance = zip(frequencies, magnitudes)
108
- .map { pow($0.0 - centroid, 2) * $0.1 }
109
- .reduce(0, +)
110
-
111
- return sqrt(variance / sumMagnitudes)
112
- }
113
-
114
- func extractChromagram(from segment: [Float], sampleRate: Float) -> [Float] {
115
- let fftData = sharedFFT.processSegment(segment)
116
- let numBins = fftData.count / 2
117
- let nChroma = 12
118
- var chroma = [Float](repeating: 0, count: nChroma)
119
- let freqsPerBin = sampleRate / Float(FFT_LENGTH)
120
-
121
- for i in 0..<numBins {
122
- let freq = Float(i) * freqsPerBin
123
- if freq > 0 {
124
- let pitchClass = Int((12 * log2(freq / 440.0)).truncatingRemainder(dividingBy: 12))
125
- if pitchClass >= 0 && pitchClass < nChroma {
126
- let realIndex = 2 * i
127
- let imagIndex = realIndex + 1
128
-
129
- let re = realIndex < fftData.count ? fftData[realIndex] : 0
130
- let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
131
- let magnitude = sqrt(re * re + im * im)
132
-
133
- chroma[pitchClass] += magnitude
134
- }
135
- }
136
- }
137
-
138
- return chroma
139
- }
140
-
141
- func extractTempo(from segment: [Float], sampleRate: Float) -> Float {
142
- let hopLength = 512
143
- let frameLength = 2048
144
-
145
- // Compute onset strength signal using spectral flux
146
- var onsetEnvelope = [Float]()
147
- var previousSpectrum = [Float](repeating: 0, count: frameLength / 2)
148
-
149
- // Ensure we have enough samples for at least one frame
150
- guard segment.count >= frameLength else {
151
- return 120.0 // Return default tempo if segment is too short
152
- }
153
-
154
- // Safe frame processing
155
- for i in stride(from: 0, to: max(0, segment.count - frameLength), by: hopLength) {
156
- let endIndex = min(i + frameLength, segment.count)
157
- let frame = Array(segment[i..<endIndex])
158
- var fftData = frame + [Float](repeating: 0, count: frameLength - frame.count)
159
- sharedFFT.realForward(&fftData)
160
-
161
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
162
- var flux: Float = 0
163
- for j in 0..<min(magnitudes.count, previousSpectrum.count) {
164
- flux += max(magnitudes[j] - previousSpectrum[j], 0)
165
- }
166
- onsetEnvelope.append(flux)
167
- previousSpectrum = magnitudes
168
- }
169
-
170
- // Find peaks in onset envelope - ensure we have enough points
171
- var peaks = [Int]()
172
- if onsetEnvelope.count >= 3 {
173
- for i in 1..<(onsetEnvelope.count - 1) {
174
- if onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1] {
175
- peaks.append(i)
176
- }
177
- }
178
- }
179
-
180
- // Calculate tempo from peak intervals
181
- if peaks.count > 1 {
182
- let intervals = zip(peaks, peaks.dropFirst()).map { $1 - $0 }
183
- if !intervals.isEmpty {
184
- let averageInterval = Float(intervals.reduce(0, +)) / Float(intervals.count)
185
- if averageInterval > 0 {
186
- let tempo = 60.0 * sampleRate / Float(hopLength) / averageInterval
187
- // Constrain tempo to reasonable range (20-300 BPM)
188
- return min(300.0, max(20.0, tempo))
189
- }
190
- }
191
- }
192
-
193
- return 120.0 // Default tempo if no clear peaks found
194
- }
195
-
196
- private func findPeaks(in data: [Float], minProminence: Float) -> [Int] {
197
- var peaks = [Int]()
198
- for i in 1..<data.count - 1 {
199
- if data[i] > data[i - 1] && data[i] > data[i + 1] {
200
- let prominence = data[i] - max(data[i - 1], data[i + 1])
201
- if prominence >= minProminence {
202
- peaks.append(i)
203
- }
204
- }
205
- }
206
- return peaks
207
- }
208
-
209
- func extractHNR(from segment: [Float]) -> Float {
210
- let frameSize = segment.count
211
- var autocorrelation = [Float](repeating: 0, count: frameSize)
212
-
213
- // Compute autocorrelation
214
- vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(frameSize), vDSP_Length(frameSize))
215
-
216
- // Find peaks with minimum prominence
217
- if let maxValue = autocorrelation.max() {
218
- let peaks = findPeaks(in: autocorrelation, minProminence: 0.1 * maxValue)
219
-
220
- // Find first peak after zero lag
221
- if let firstPeakIndex = peaks.first(where: { $0 > 0 }) {
222
- let harmonicEnergy = autocorrelation[firstPeakIndex]
223
- let noiseEnergy = autocorrelation[0] - harmonicEnergy
224
- if noiseEnergy > 0 {
225
- return 10 * log10(harmonicEnergy / noiseEnergy)
226
- }
227
- }
228
- }
229
-
230
- return 0.0
231
- }
232
-
233
- // Helper functions
234
- private func computeMagnitudeSpectrum(from fftData: [Float]) -> [Float] {
235
- let numBins = fftData.count / 2 // Since FFT data contains real and imaginary pairs
236
- var magnitudes = [Float]()
237
-
238
- for i in 0..<numBins {
239
- let realIndex = 2 * i
240
- let imagIndex = realIndex + 1
241
-
242
- let re = realIndex < fftData.count ? fftData[realIndex] : 0
243
- let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
244
- magnitudes.append(sqrt(re*re + im*im))
245
- }
246
- return magnitudes
247
- }
248
-
249
- private func applyHannWindow(to segment: [Float]) -> [Float] {
250
- var window = [Float](repeating: 0, count: segment.count)
251
- vDSP_hann_window(&window, vDSP_Length(segment.count), Int32(vDSP_HANN_NORM))
252
-
253
- var result = [Float](repeating: 0, count: segment.count)
254
- vDSP_vmul(segment, 1, window, 1, &result, 1, vDSP_Length(segment.count))
255
-
256
- return result
257
- }
258
-
259
- private func computePowerSpectrum(from fftData: [Float]) -> [Float] {
260
- let numBins = fftData.count / 2
261
- var powerSpectrum = [Float]()
262
-
263
- for i in 0..<numBins {
264
- let realIndex = 2 * i
265
- let imagIndex = realIndex + 1
266
-
267
- let re = realIndex < fftData.count ? fftData[realIndex] : 0
268
- let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
269
- powerSpectrum.append(re*re + im*im)
270
- }
271
- return powerSpectrum
272
- }
273
-
274
- private func computeMelFilterbank(numFilters: Int, fftSize: Int, sampleRate: Float) -> [[Float]] {
275
- let fMin: Float = 0
276
- let fMax = sampleRate / 2
277
-
278
- let melMin = hzToMel(fMin)
279
- let melMax = hzToMel(fMax)
280
- let melStep = (melMax - melMin) / Float(numFilters + 1)
281
-
282
- let melPoints = (0...numFilters+1).map { melMin + Float($0) * melStep }
283
- let hzPoints = melPoints.map { melToHz($0) }
284
- let bins = hzPoints.map { Int(($0 * Float(fftSize) / sampleRate).rounded()) }
285
-
286
- var filterbank = [[Float]](repeating: [Float](repeating: 0, count: 1 + fftSize/2), count: numFilters)
287
-
288
- for i in 0..<numFilters {
289
- for j in bins[i]..<bins[i+2] {
290
- if j < bins[i+1] {
291
- filterbank[i][j] = Float(j - bins[i]) / Float(bins[i+1] - bins[i])
292
- } else {
293
- filterbank[i][j] = Float(bins[i+2] - j) / Float(bins[i+2] - bins[i+1])
294
- }
295
- }
296
- }
297
-
298
- return filterbank
299
- }
300
-
301
- private func hzToMel(_ hz: Float) -> Float {
302
- return 2595 * log10(1 + hz/700)
303
- }
304
-
305
- private func melToHz(_ mel: Float) -> Float {
306
- return 700 * (pow(10, mel/2595) - 1)
307
- }
308
-
309
- private func computeDCT(from input: [Float]) -> [Float] {
310
- let N = input.count
311
- var output = [Float](repeating: 0, count: N)
312
- let scale = sqrt(2.0 / Float(N))
313
-
314
- for i in 0..<N {
315
- var sum: Float = 0
316
- for j in 0..<N {
317
- sum += input[j] * cos(.pi * Float(i) * (2 * Float(j) + 1) / (2 * Float(N)))
318
- }
319
- output[i] = scale * sum
320
- }
321
-
322
- return output
323
- }
324
-
325
- func computeMelSpectrogram(from segment: [Float], sampleRate: Float) -> [Float] {
326
- let nMels = 128
327
- let fftData = sharedFFT.processSegment(segment)
328
-
329
- let powerSpectrum = computePowerSpectrum(from: fftData)
330
- let melFilters = computeMelFilterbank(numFilters: nMels, fftSize: FFT_LENGTH, sampleRate: sampleRate)
331
-
332
- return melFilters.map { filter in
333
- zip(filter, powerSpectrum)
334
- .map { $0 * $1 }
335
- .reduce(0, +)
336
- }
337
- }
338
-
339
- func computeSpectralContrast(from segment: [Float], sampleRate: Float) -> [Float] {
340
- let nBands = 7
341
- let fftData = sharedFFT.processSegment(segment)
342
-
343
- let magnitudeSpectrum = computeMagnitudeSpectrum(from: fftData)
344
- var contrast = [Float]()
345
-
346
- // Define standard octave-based frequency bands
347
- let bandFrequencies = [
348
- (20.0, 125.0), // Sub-bass
349
- (125.0, 250.0), // Bass
350
- (250.0, 500.0), // Low-mids
351
- (500.0, 1000.0), // Mids
352
- (1000.0, 2000.0), // High-mids
353
- (2000.0, 4000.0), // Presence
354
- (4000.0, min(8000.0, Double(sampleRate) / 2.0)) // Brilliance
355
- ]
356
-
357
- // Calculate frequency resolution
358
- let freqResolution = Float(sampleRate) / Float(FFT_LENGTH)
359
-
360
- for (lowFreq, highFreq) in bandFrequencies {
361
- // Convert frequencies to FFT bin indices
362
- let startBin = Int(Float(lowFreq) / freqResolution)
363
- let endBin = min(Int(Float(highFreq) / freqResolution), magnitudeSpectrum.count - 1)
364
-
365
- if startBin < endBin {
366
- let bandSpectrum = Array(magnitudeSpectrum[startBin...endBin])
367
-
368
- // Sort magnitudes for percentile calculation
369
- let sortedMagnitudes = bandSpectrum.sorted()
370
- let length = sortedMagnitudes.count
371
-
372
- // Calculate peak (95th percentile) and valley (5th percentile)
373
- let peakIndex = Int(Float(length) * 0.95)
374
- let valleyIndex = Int(Float(length) * 0.05)
375
- let peak = sortedMagnitudes[peakIndex]
376
- let valley = sortedMagnitudes[valleyIndex]
377
-
378
- // Calculate contrast in dB scale
379
- let contrastValue = 20 * log10(peak / max(valley, .leastNormalMagnitude))
380
- contrast.append(contrastValue)
381
- } else {
382
- contrast.append(0)
383
- }
384
- }
385
-
386
- return contrast
387
- }
388
-
389
- // Original function for backward compatibility
390
- func computeTonnetz(from segment: [Float], sampleRate: Float) -> [Float] {
391
- let chroma = extractChromagram(from: segment, sampleRate: sampleRate)
392
- return computeTonnetz(fromChroma: chroma)
393
- }
394
-
395
- // New optimized function that accepts pre-computed chromagram
396
- func computeTonnetz(fromChroma chroma: [Float]) -> [Float] {
397
- // Tonnetz transformation matrix (6x12)
398
- let tonnetzMatrix: [[Float]] = [
399
- [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], // Perfect fifth
400
- [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], // Minor third
401
- [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], // Major third
402
- [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], // Perfect fifth
403
- [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], // Minor third
404
- [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0] // Major third
405
- ]
406
-
407
- // Compute tonnetz features
408
- return tonnetzMatrix.map { row in
409
- zip(row, chroma).map { $0 * $1 }.reduce(0, +)
410
- }
411
- }
412
-
413
- struct AudioData {
414
- let samples: [Float]
415
- let sampleRate: Int
416
- }
417
-
418
- func loadAudioFile(_ fileUri: String) throws -> AudioData {
419
- guard let url = URL(string: fileUri) else {
420
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid file URL"])
421
- }
422
-
423
- let file = try AVAudioFile(forReading: url)
424
- let format = file.processingFormat
425
- let frameCount = UInt32(file.length)
426
- let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
427
-
428
- try file.read(into: buffer)
429
-
430
- // Convert buffer to float array
431
- let samples: [Float]
432
- if let floatData = buffer.floatChannelData?[0] {
433
- samples = Array(UnsafeBufferPointer(start: floatData, count: Int(frameCount)))
434
- } else {
435
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to read audio data"])
436
- }
437
-
438
- return AudioData(samples: samples, sampleRate: Int(format.sampleRate))
439
- }
440
-
441
- func computeEnergy(from samples: [Float]) -> Float {
442
- var energy: Float = 0
443
- vDSP_measqv(samples, 1, &energy, vDSP_Length(samples.count))
444
- return energy / Float(samples.count)
445
- }
446
-
447
- func computeRMS(from samples: [Float]) -> Float {
448
- let energy = computeEnergy(from: samples)
449
- return sqrt(energy)
450
- }
451
-
452
- func computeZCR(from samples: [Float]) -> Float {
453
- var zeroCrossings: Int = 0
454
- for i in 1..<samples.count {
455
- if (samples[i-1] * samples[i]) < 0 {
456
- zeroCrossings += 1
457
- }
458
- }
459
- return Float(zeroCrossings) / Float(samples.count)
460
- }
461
-
462
- // Keep in AudioProcessingHelpers.swift
463
- private let N_MFCC = 40
464
- private let N_FFT = 1024
465
- private let N_MELS = 128
466
- private let N_CHROMA = 12
467
- private let N_BANDS = 7
468
-
469
- // Core audio processing functions
470
- func calculateZeroCrossingRate(_ data: [Float]) -> Float {
471
- var count: Float = 0
472
- for i in 1..<data.count {
473
- if (data[i] >= 0 && data[i-1] < 0) || (data[i] < 0 && data[i-1] >= 0) {
474
- count += 1
475
- }
476
- }
477
- return count / Float(data.count)
478
- }
479
-
480
- func calculateEnergy(_ data: [Float]) -> Float {
481
- var energy: Float = 0
482
- vDSP_svesq(data, 1, &energy, vDSP_Length(data.count))
483
- return energy / Float(data.count)
484
- }
485
-
486
- // Feature extraction functions
487
- func computeFeatures(segmentData: [Float], sampleRate: Float, sumSquares: Float, zeroCrossings: Int, segmentLength: Int, featureOptions: [String: Bool]) -> Features {
488
- let rms = sqrt(sumSquares / Float(segmentLength))
489
- let energy = featureOptions["energy"] == true ? sumSquares : 0
490
- let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
491
-
492
- // Compute min and max amplitudes
493
- let minAmplitude = segmentData.min() ?? 0
494
- let maxAmplitude = segmentData.max() ?? 0
495
-
496
- // Call feature extraction functions
497
- let mfcc = featureOptions["mfcc"] == true ? extractMFCC(from: segmentData, sampleRate: sampleRate) : []
498
- let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
499
- let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
500
- let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
501
- let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
502
-
503
- // Add pitch calculation
504
- let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : nil
505
-
506
- return Features(
507
- energy: energy,
508
- mfcc: mfcc,
509
- rms: rms,
510
- zcr: zcr,
511
- spectralCentroid: extractSpectralCentroid(from: segmentData, sampleRate: sampleRate),
512
- spectralFlatness: extractSpectralFlatness(from: segmentData),
513
- spectralRollOff: extractSpectralRollOff(from: segmentData, sampleRate: sampleRate),
514
- spectralBandwidth: extractSpectralBandwidth(from: segmentData, sampleRate: sampleRate),
515
- chromagram: chromagram,
516
- tempo: extractTempo(from: segmentData, sampleRate: sampleRate),
517
- hnr: extractHNR(from: segmentData),
518
- melSpectrogram: melSpectrogram,
519
- spectralContrast: spectralContrast,
520
- tonnetz: tonnetz,
521
- pitch: pitch
522
- )
523
- }
524
-
525
- private func nextPowerOfTwo(_ n: Int) -> Int {
526
- var power = 1
527
- while power < n {
528
- power *= 2
529
- }
530
- return power
531
- }
532
-
533
- func estimatePitch(from segment: [Float], sampleRate: Float) -> Float {
534
- guard segment.count >= 2 else { return 0.0 }
535
-
536
- // Apply a Hann window to reduce edge effects
537
- let windowed = applyHannWindow(to: segment)
538
-
539
- // Pad the signal for FFT
540
- let fftLength = nextPowerOfTwo(segment.count * 2 - 1)
541
- var padded = windowed + [Float](repeating: 0, count: fftLength - windowed.count)
542
- sharedFFT.realForward(&padded)
543
-
544
- // Compute autocorrelation using FFT
545
- var autocorrelation = [Float](repeating: 0, count: fftLength)
546
- vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(segment.count), vDSP_Length(segment.count))
547
-
548
- // Find the first peak within the pitch range (50-500 Hz)
549
- let minLag = Int(sampleRate / 500.0) // Max frequency
550
- let maxLag = Int(sampleRate / 50.0) // Min frequency
551
- var maxCorr: Float = -1.0
552
- var pitchLag = 0
553
-
554
- // Skip the first few samples to avoid the zero-lag peak
555
- for lag in minLag...maxLag {
556
- if autocorrelation[lag] > maxCorr {
557
- maxCorr = autocorrelation[lag]
558
- pitchLag = lag
559
- }
560
- }
561
-
562
- // Convert lag to frequency (sampleRate / lag)
563
- return pitchLag > 0 ? sampleRate / Float(pitchLag) : 0.0
564
- }
565
-
566
- // Add speech detection helper function
567
- func detectSpeech(from segment: [Float], rms: Float) -> (isActive: Bool, probability: Float) {
568
- // Simple speech detection based on RMS and zero-crossing rate
569
- let zcr = calculateZeroCrossingRate(segment)
570
- let isSpeech = rms > 0.01 && zcr > 0.1 && zcr < 0.5
571
- let probability = min(1.0, max(0.0, rms * 10)) // Simple probability estimation
572
-
573
- return (isActive: isSpeech, probability: probability)
574
- }
575
-
576
- func extractRawAudioData(
577
- from url: URL,
578
- startFrame: AVAudioFramePosition,
579
- frameCount: AVAudioFrameCount,
580
- format: AVAudioFormat,
581
- decodingConfig: DecodingConfig,
582
- includeNormalizedData: Bool,
583
- includeBase64Data: Bool
584
- ) throws -> (pcmData: Data, floatData: [Float]?, base64Data: String?) {
585
- // Apply decoding configuration
586
- let targetFormat = decodingConfig.toAudioFormat(baseFormat: format)
587
-
588
- let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
589
- let audioFile = try AVAudioFile(forReading: url)
590
-
591
- audioFile.framePosition = startFrame
592
- try audioFile.read(into: buffer, frameCount: frameCount)
593
-
594
- // Convert to target format if different from source
595
- let finalBuffer: AVAudioPCMBuffer
596
- if targetFormat != format {
597
- let converter = AVAudioConverter(from: format, to: targetFormat)!
598
- finalBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
599
-
600
- var error: NSError?
601
- let status = converter.convert(to: finalBuffer, error: &error) { inNumPackets, outStatus in
602
- outStatus.pointee = .haveData
603
- return buffer
604
- }
605
-
606
- if let error = error {
607
- throw error
608
- }
609
- } else {
610
- finalBuffer = buffer
611
- }
612
-
613
- guard let floatData = finalBuffer.floatChannelData else {
614
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to get float channel data"])
615
- }
616
-
617
- let channels = Int(targetFormat.channelCount)
618
- let totalSamples = Int(finalBuffer.frameLength) * channels
619
-
620
- // Use targetBitDepth from decodingConfig instead of format's bit depth
621
- let targetBitDepth = decodingConfig.targetBitDepth ?? 16
622
- let bytesPerSample = targetBitDepth / 8
623
- var pcmData = Data(capacity: totalSamples * bytesPerSample)
624
-
625
- // Convert float samples to PCM format with specified bit depth
626
- for frame in 0..<Int(finalBuffer.frameLength) {
627
- for channel in 0..<channels {
628
- let sample = floatData[channel][frame]
629
-
630
- let normalizedSample = decodingConfig.normalizeAudio ?
631
- max(-1.0, min(1.0, sample)) : sample
632
-
633
- switch targetBitDepth {
634
- case 16:
635
- let intValue = Int16(normalizedSample * Float(Int16.max))
636
- pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
637
- case 32:
638
- let intValue = Int32(normalizedSample * Float(Int32.max))
639
- pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
640
- default:
641
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unsupported bit depth \(targetBitDepth)"])
642
- }
643
- }
644
- }
645
-
646
- // Only process normalized data if requested
647
- let normalizedData: [Float]? = includeNormalizedData ?
648
- Array(UnsafeBufferPointer(start: floatData[0], count: Int(finalBuffer.frameLength))) :
649
- nil
650
-
651
- // Convert to base64 if requested
652
- let base64Data: String? = includeBase64Data ?
653
- pcmData.base64EncodedString() :
654
- nil
655
-
656
- return (pcmData: pcmData, floatData: normalizedData, base64Data: base64Data)
657
- }
658
-
659
- // Update the CRC32 function to use zlib's implementation
660
- func calculateCRC32(data: Data) -> UInt32 {
661
- data.withUnsafeBytes { buffer in
662
- let ptr = buffer.baseAddress?.assumingMemoryBound(to: UInt8.self)
663
- return UInt32(crc32(0, ptr, UInt32(buffer.count)))
664
- }
665
- }
666
-
667
- func calculateCRC32(from floatArray: [Float], count: Int) -> UInt32 {
668
- return floatArray.withUnsafeBytes { floatBytes -> UInt32 in
669
- // Get raw pointer to the bytes with proper alignment
670
- let byteCount = count * MemoryLayout<Float>.size
671
- return UInt32(crc32(0, floatBytes.baseAddress, UInt32(byteCount)))
672
- }
673
- }
674
-
675
- func createWavHeader(pcmData: Data, sampleRate: Int, channels: Int, bitDepth: Int) -> Data {
676
- let headerSize = 44
677
- let totalDataLen = pcmData.count + headerSize - 8
678
- let bytesPerSample = bitDepth / 8
679
- let byteRate = sampleRate * channels * bytesPerSample
680
- let blockAlign = channels * bytesPerSample
681
-
682
- var header = Data(capacity: headerSize)
683
-
684
- // RIFF header
685
- header.append(contentsOf: "RIFF".data(using: .ascii)!)
686
-
687
- // Total data length
688
- header.append(UInt32(totalDataLen).littleEndian.data)
689
-
690
- // WAVE header
691
- header.append(contentsOf: "WAVE".data(using: .ascii)!)
692
-
693
- // 'fmt ' chunk
694
- header.append(contentsOf: "fmt ".data(using: .ascii)!)
695
-
696
- // 16 for PCM format
697
- header.append(UInt32(16).littleEndian.data)
698
-
699
- // Format = 1 for PCM
700
- header.append(UInt16(1).littleEndian.data)
701
-
702
- // Number of channels
703
- header.append(UInt16(channels).littleEndian.data)
704
-
705
- // Sample rate
706
- header.append(UInt32(sampleRate).littleEndian.data)
707
-
708
- // Byte rate
709
- header.append(UInt32(byteRate).littleEndian.data)
710
-
711
- // Block align
712
- header.append(UInt16(blockAlign).littleEndian.data)
713
-
714
- // Bits per sample
715
- header.append(UInt16(bitDepth).littleEndian.data)
716
-
717
- // 'data' chunk
718
- header.append(contentsOf: "data".data(using: .ascii)!)
719
-
720
- // Data length
721
- header.append(UInt32(pcmData.count).littleEndian.data)
722
-
723
- // Combine header and PCM data
724
- var wavData = header
725
- wavData.append(pcmData)
726
-
727
- return wavData
728
- }
729
-
730
- // Extension to help with binary data conversion
731
- extension UInt16 {
732
- var data: Data {
733
- var value = self
734
- return Data(bytes: &value, count: MemoryLayout<UInt16>.size)
735
- }
736
- }
737
-
738
- extension UInt32 {
739
- var data: Data {
740
- var value = self
741
- return Data(bytes: &value, count: MemoryLayout<UInt32>.size)
742
- }
743
- }