@siteed/audio-studio 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. package/CHANGELOG.md +535 -0
  2. package/LICENSE +21 -0
  3. package/README.md +167 -0
  4. package/android/build.gradle +143 -0
  5. package/android/src/androidTest/assets/chorus.wav +0 -0
  6. package/android/src/androidTest/assets/jfk.wav +0 -0
  7. package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
  8. package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
  9. package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +197 -0
  10. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +541 -0
  11. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +234 -0
  12. package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +332 -0
  13. package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +324 -0
  14. package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +253 -0
  15. package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +218 -0
  16. package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +120 -0
  17. package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +345 -0
  18. package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +340 -0
  19. package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +252 -0
  20. package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +95 -0
  21. package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +43 -0
  22. package/android/src/main/AndroidManifest.xml +30 -0
  23. package/android/src/main/CMakeLists.txt +29 -0
  24. package/android/src/main/java/net/siteed/audiostudio/AudioAnalysisData.kt +188 -0
  25. package/android/src/main/java/net/siteed/audiostudio/AudioDataEncoder.kt +9 -0
  26. package/android/src/main/java/net/siteed/audiostudio/AudioDeviceManager.kt +1741 -0
  27. package/android/src/main/java/net/siteed/audiostudio/AudioFeaturesNative.kt +26 -0
  28. package/android/src/main/java/net/siteed/audiostudio/AudioFileHandler.kt +136 -0
  29. package/android/src/main/java/net/siteed/audiostudio/AudioFormatUtils.kt +354 -0
  30. package/android/src/main/java/net/siteed/audiostudio/AudioNotificationsManager.kt +439 -0
  31. package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +2237 -0
  32. package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +2163 -0
  33. package/android/src/main/java/net/siteed/audiostudio/AudioRecordingService.kt +167 -0
  34. package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +1112 -0
  35. package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +1099 -0
  36. package/android/src/main/java/net/siteed/audiostudio/Constants.kt +37 -0
  37. package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +7 -0
  38. package/android/src/main/java/net/siteed/audiostudio/FFT.kt +100 -0
  39. package/android/src/main/java/net/siteed/audiostudio/Features.kt +98 -0
  40. package/android/src/main/java/net/siteed/audiostudio/LogUtils.kt +93 -0
  41. package/android/src/main/java/net/siteed/audiostudio/MelSpectrogramNative.kt +36 -0
  42. package/android/src/main/java/net/siteed/audiostudio/NotificationConfig.kt +72 -0
  43. package/android/src/main/java/net/siteed/audiostudio/PermissionUtils.kt +68 -0
  44. package/android/src/main/java/net/siteed/audiostudio/RecordingActionReceiver.kt +59 -0
  45. package/android/src/main/java/net/siteed/audiostudio/RecordingConfig.kt +259 -0
  46. package/android/src/main/java/net/siteed/audiostudio/WaveformConfig.kt +19 -0
  47. package/android/src/main/java/net/siteed/audiostudio/WaveformRenderer.kt +159 -0
  48. package/android/src/main/jni/AudioFeaturesJNI.cpp +152 -0
  49. package/android/src/main/jni/MelSpectrogramJNI.cpp +165 -0
  50. package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
  51. package/android/src/main/res/drawable/ic_microphone.xml +13 -0
  52. package/android/src/main/res/drawable/ic_pause.xml +10 -0
  53. package/android/src/main/res/drawable/ic_play.xml +10 -0
  54. package/android/src/main/res/drawable/ic_stop.xml +10 -0
  55. package/android/src/main/res/layout/notification_recording.xml +37 -0
  56. package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +279 -0
  57. package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +249 -0
  58. package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +151 -0
  59. package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +273 -0
  60. package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +140 -0
  61. package/android/src/test/resources/chorus.wav +0 -0
  62. package/android/src/test/resources/generate_test_audio.py +94 -0
  63. package/android/src/test/resources/jfk.wav +0 -0
  64. package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
  65. package/android/src/test/resources/recorder_hello_world.wav +0 -0
  66. package/app.plugin.js +3 -0
  67. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +4 -0
  68. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  69. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +164 -0
  70. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
  71. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +213 -0
  72. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  73. package/build/cjs/AudioAnalysis/extractAudioData.js +21 -0
  74. package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -0
  75. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +90 -0
  76. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  77. package/build/cjs/AudioAnalysis/extractPreview.js +28 -0
  78. package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -0
  79. package/build/cjs/AudioAnalysis/extractWaveform.js +18 -0
  80. package/build/cjs/AudioAnalysis/extractWaveform.js.map +1 -0
  81. package/build/cjs/AudioAnalysis/melSpectrogramWasm.js +149 -0
  82. package/build/cjs/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
  83. package/build/cjs/AudioDeviceManager.js +688 -0
  84. package/build/cjs/AudioDeviceManager.js.map +1 -0
  85. package/build/cjs/AudioRecorder.provider.js +78 -0
  86. package/build/cjs/AudioRecorder.provider.js.map +1 -0
  87. package/build/cjs/AudioStudio.native.js +8 -0
  88. package/build/cjs/AudioStudio.native.js.map +1 -0
  89. package/build/cjs/AudioStudio.types.js +11 -0
  90. package/build/cjs/AudioStudio.types.js.map +1 -0
  91. package/build/cjs/AudioStudio.web.js +708 -0
  92. package/build/cjs/AudioStudio.web.js.map +1 -0
  93. package/build/cjs/AudioStudioModule.js +718 -0
  94. package/build/cjs/AudioStudioModule.js.map +1 -0
  95. package/build/cjs/WebRecorder.web.js +865 -0
  96. package/build/cjs/WebRecorder.web.js.map +1 -0
  97. package/build/cjs/constants/platformLimitations.js +99 -0
  98. package/build/cjs/constants/platformLimitations.js.map +1 -0
  99. package/build/cjs/constants.js +20 -0
  100. package/build/cjs/constants.js.map +1 -0
  101. package/build/cjs/events.js +29 -0
  102. package/build/cjs/events.js.map +1 -0
  103. package/build/cjs/hooks/useAudioDevices.js +179 -0
  104. package/build/cjs/hooks/useAudioDevices.js.map +1 -0
  105. package/build/cjs/index.js +64 -0
  106. package/build/cjs/index.js.map +1 -0
  107. package/build/cjs/trimAudio.js +76 -0
  108. package/build/cjs/trimAudio.js.map +1 -0
  109. package/build/cjs/useAudioRecorder.js +535 -0
  110. package/build/cjs/useAudioRecorder.js.map +1 -0
  111. package/build/cjs/utils/BlobFix.js +502 -0
  112. package/build/cjs/utils/BlobFix.js.map +1 -0
  113. package/build/cjs/utils/audioProcessing.js +136 -0
  114. package/build/cjs/utils/audioProcessing.js.map +1 -0
  115. package/build/cjs/utils/cleanNativeOptions.js +22 -0
  116. package/build/cjs/utils/cleanNativeOptions.js.map +1 -0
  117. package/build/cjs/utils/concatenateBuffers.js +25 -0
  118. package/build/cjs/utils/concatenateBuffers.js.map +1 -0
  119. package/build/cjs/utils/convertPCMToFloat32.js +124 -0
  120. package/build/cjs/utils/convertPCMToFloat32.js.map +1 -0
  121. package/build/cjs/utils/crc32.js +52 -0
  122. package/build/cjs/utils/crc32.js.map +1 -0
  123. package/build/cjs/utils/encodingToBitDepth.js +17 -0
  124. package/build/cjs/utils/encodingToBitDepth.js.map +1 -0
  125. package/build/cjs/utils/getWavFileInfo.js +96 -0
  126. package/build/cjs/utils/getWavFileInfo.js.map +1 -0
  127. package/build/cjs/utils/writeWavHeader.js +88 -0
  128. package/build/cjs/utils/writeWavHeader.js.map +1 -0
  129. package/build/cjs/workers/InlineFeaturesExtractor.web.js +294 -0
  130. package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +1 -0
  131. package/build/cjs/workers/inlineAudioWebWorker.web.js +190 -0
  132. package/build/cjs/workers/inlineAudioWebWorker.web.js.map +1 -0
  133. package/build/cjs/workers/wasmGlueString.web.js +27 -0
  134. package/build/cjs/workers/wasmGlueString.web.js.map +1 -0
  135. package/build/esm/AudioAnalysis/AudioAnalysis.types.js +3 -0
  136. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  137. package/build/esm/AudioAnalysis/audioFeaturesWasm.js +126 -0
  138. package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
  139. package/build/esm/AudioAnalysis/extractAudioAnalysis.js +205 -0
  140. package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  141. package/build/esm/AudioAnalysis/extractAudioData.js +14 -0
  142. package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -0
  143. package/build/esm/AudioAnalysis/extractMelSpectrogram.js +86 -0
  144. package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  145. package/build/esm/AudioAnalysis/extractPreview.js +25 -0
  146. package/build/esm/AudioAnalysis/extractPreview.js.map +1 -0
  147. package/build/esm/AudioAnalysis/extractWaveform.js +11 -0
  148. package/build/esm/AudioAnalysis/extractWaveform.js.map +1 -0
  149. package/build/esm/AudioAnalysis/melSpectrogramWasm.js +111 -0
  150. package/build/esm/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
  151. package/build/esm/AudioDeviceManager.js +681 -0
  152. package/build/esm/AudioDeviceManager.js.map +1 -0
  153. package/build/esm/AudioRecorder.provider.js +40 -0
  154. package/build/esm/AudioRecorder.provider.js.map +1 -0
  155. package/build/esm/AudioStudio.native.js +6 -0
  156. package/build/esm/AudioStudio.native.js.map +1 -0
  157. package/build/esm/AudioStudio.types.js +8 -0
  158. package/build/esm/AudioStudio.types.js.map +1 -0
  159. package/build/esm/AudioStudio.web.js +704 -0
  160. package/build/esm/AudioStudio.web.js.map +1 -0
  161. package/build/esm/AudioStudioModule.js +713 -0
  162. package/build/esm/AudioStudioModule.js.map +1 -0
  163. package/build/esm/WebRecorder.web.js +861 -0
  164. package/build/esm/WebRecorder.web.js.map +1 -0
  165. package/build/esm/constants/platformLimitations.js +90 -0
  166. package/build/esm/constants/platformLimitations.js.map +1 -0
  167. package/build/esm/constants.js +17 -0
  168. package/build/esm/constants.js.map +1 -0
  169. package/build/esm/events.js +21 -0
  170. package/build/esm/events.js.map +1 -0
  171. package/build/esm/hooks/useAudioDevices.js +176 -0
  172. package/build/esm/hooks/useAudioDevices.js.map +1 -0
  173. package/build/esm/index.js +23 -0
  174. package/build/esm/index.js.map +1 -0
  175. package/build/esm/trimAudio.js +69 -0
  176. package/build/esm/trimAudio.js.map +1 -0
  177. package/build/esm/useAudioRecorder.js +529 -0
  178. package/build/esm/useAudioRecorder.js.map +1 -0
  179. package/build/esm/utils/BlobFix.js +498 -0
  180. package/build/esm/utils/BlobFix.js.map +1 -0
  181. package/build/esm/utils/audioProcessing.js +133 -0
  182. package/build/esm/utils/audioProcessing.js.map +1 -0
  183. package/build/esm/utils/cleanNativeOptions.js +19 -0
  184. package/build/esm/utils/cleanNativeOptions.js.map +1 -0
  185. package/build/esm/utils/concatenateBuffers.js +21 -0
  186. package/build/esm/utils/concatenateBuffers.js.map +1 -0
  187. package/build/esm/utils/convertPCMToFloat32.js +120 -0
  188. package/build/esm/utils/convertPCMToFloat32.js.map +1 -0
  189. package/build/esm/utils/crc32.js +50 -0
  190. package/build/esm/utils/crc32.js.map +1 -0
  191. package/build/esm/utils/encodingToBitDepth.js +13 -0
  192. package/build/esm/utils/encodingToBitDepth.js.map +1 -0
  193. package/build/esm/utils/getWavFileInfo.js +92 -0
  194. package/build/esm/utils/getWavFileInfo.js.map +1 -0
  195. package/build/esm/utils/writeWavHeader.js +84 -0
  196. package/build/esm/utils/writeWavHeader.js.map +1 -0
  197. package/build/esm/workers/InlineFeaturesExtractor.web.js +291 -0
  198. package/build/esm/workers/InlineFeaturesExtractor.web.js.map +1 -0
  199. package/build/esm/workers/inlineAudioWebWorker.web.js +187 -0
  200. package/build/esm/workers/inlineAudioWebWorker.web.js.map +1 -0
  201. package/build/esm/workers/wasmGlueString.web.js +24 -0
  202. package/build/esm/workers/wasmGlueString.web.js.map +1 -0
  203. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +198 -0
  204. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
  205. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +24 -0
  206. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -0
  207. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +74 -0
  208. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
  209. package/build/types/AudioAnalysis/extractAudioData.d.ts +3 -0
  210. package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -0
  211. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +20 -0
  212. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
  213. package/build/types/AudioAnalysis/extractPreview.d.ts +11 -0
  214. package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -0
  215. package/build/types/AudioAnalysis/extractWaveform.d.ts +8 -0
  216. package/build/types/AudioAnalysis/extractWaveform.d.ts.map +1 -0
  217. package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts +16 -0
  218. package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts.map +1 -0
  219. package/build/types/AudioDeviceManager.d.ts +187 -0
  220. package/build/types/AudioDeviceManager.d.ts.map +1 -0
  221. package/build/types/AudioRecorder.provider.d.ts +11 -0
  222. package/build/types/AudioRecorder.provider.d.ts.map +1 -0
  223. package/build/types/AudioStudio.native.d.ts +3 -0
  224. package/build/types/AudioStudio.native.d.ts.map +1 -0
  225. package/build/types/AudioStudio.types.d.ts +760 -0
  226. package/build/types/AudioStudio.types.d.ts.map +1 -0
  227. package/build/types/AudioStudio.web.d.ts +96 -0
  228. package/build/types/AudioStudio.web.d.ts.map +1 -0
  229. package/build/types/AudioStudioModule.d.ts +3 -0
  230. package/build/types/AudioStudioModule.d.ts.map +1 -0
  231. package/build/types/WebRecorder.web.d.ts +208 -0
  232. package/build/types/WebRecorder.web.d.ts.map +1 -0
  233. package/build/types/constants/platformLimitations.d.ts +40 -0
  234. package/build/types/constants/platformLimitations.d.ts.map +1 -0
  235. package/build/types/constants.d.ts +14 -0
  236. package/build/types/constants.d.ts.map +1 -0
  237. package/build/types/events.d.ts +29 -0
  238. package/build/types/events.d.ts.map +1 -0
  239. package/build/types/hooks/useAudioDevices.d.ts +15 -0
  240. package/build/types/hooks/useAudioDevices.d.ts.map +1 -0
  241. package/build/types/index.d.ts +21 -0
  242. package/build/types/index.d.ts.map +1 -0
  243. package/build/types/trimAudio.d.ts +25 -0
  244. package/build/types/trimAudio.d.ts.map +1 -0
  245. package/build/types/useAudioRecorder.d.ts +22 -0
  246. package/build/types/useAudioRecorder.d.ts.map +1 -0
  247. package/build/types/utils/BlobFix.d.ts +9 -0
  248. package/build/types/utils/BlobFix.d.ts.map +1 -0
  249. package/build/types/utils/audioProcessing.d.ts +24 -0
  250. package/build/types/utils/audioProcessing.d.ts.map +1 -0
  251. package/build/types/utils/cleanNativeOptions.d.ts +15 -0
  252. package/build/types/utils/cleanNativeOptions.d.ts.map +1 -0
  253. package/build/types/utils/concatenateBuffers.d.ts +8 -0
  254. package/build/types/utils/concatenateBuffers.d.ts.map +1 -0
  255. package/build/types/utils/convertPCMToFloat32.d.ts +13 -0
  256. package/build/types/utils/convertPCMToFloat32.d.ts.map +1 -0
  257. package/build/types/utils/crc32.d.ts +7 -0
  258. package/build/types/utils/crc32.d.ts.map +1 -0
  259. package/build/types/utils/encodingToBitDepth.d.ts +5 -0
  260. package/build/types/utils/encodingToBitDepth.d.ts.map +1 -0
  261. package/build/types/utils/getWavFileInfo.d.ts +26 -0
  262. package/build/types/utils/getWavFileInfo.d.ts.map +1 -0
  263. package/build/types/utils/writeWavHeader.d.ts +34 -0
  264. package/build/types/utils/writeWavHeader.d.ts.map +1 -0
  265. package/build/types/workers/InlineFeaturesExtractor.web.d.ts +2 -0
  266. package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
  267. package/build/types/workers/inlineAudioWebWorker.web.d.ts +2 -0
  268. package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
  269. package/build/types/workers/wasmGlueString.web.d.ts +2 -0
  270. package/build/types/workers/wasmGlueString.web.d.ts.map +1 -0
  271. package/cpp/AudioFeatures.cpp +274 -0
  272. package/cpp/AudioFeatures.h +85 -0
  273. package/cpp/AudioFeaturesBridge.cpp +146 -0
  274. package/cpp/AudioFeaturesBridge.h +47 -0
  275. package/cpp/MelSpectrogram.cpp +227 -0
  276. package/cpp/MelSpectrogram.h +82 -0
  277. package/cpp/MelSpectrogramBridge.cpp +112 -0
  278. package/cpp/MelSpectrogramBridge.h +33 -0
  279. package/cpp/kiss_fft/COPYING +11 -0
  280. package/cpp/kiss_fft/_kiss_fft_guts.h +167 -0
  281. package/cpp/kiss_fft/kiss_fft.c +424 -0
  282. package/cpp/kiss_fft/kiss_fft.h +160 -0
  283. package/cpp/kiss_fft/kiss_fft_log.h +36 -0
  284. package/cpp/kiss_fft/kiss_fftr.c +155 -0
  285. package/cpp/kiss_fft/kiss_fftr.h +54 -0
  286. package/expo-module.config.json +10 -0
  287. package/ios/AudioAnalysisData.swift +74 -0
  288. package/ios/AudioDeviceManager.swift +670 -0
  289. package/ios/AudioFeaturesWrapper.h +21 -0
  290. package/ios/AudioFeaturesWrapper.mm +63 -0
  291. package/ios/AudioNotificationManager.swift +154 -0
  292. package/ios/AudioProcessingHelpers.swift +797 -0
  293. package/ios/AudioProcessor.swift +1191 -0
  294. package/ios/AudioStreamError.swift +7 -0
  295. package/ios/AudioStreamManager.swift +2369 -0
  296. package/ios/AudioStreamManagerDelegate.swift +16 -0
  297. package/ios/AudioStudio.podspec +39 -0
  298. package/ios/AudioStudioModule.swift +1111 -0
  299. package/ios/AudioStudioTests/AudioFileHandlerTests.swift +338 -0
  300. package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +331 -0
  301. package/ios/AudioStudioTests/AudioTestHelpers.swift +130 -0
  302. package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +294 -0
  303. package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +105 -0
  304. package/ios/AudioStudioTests/Info.plist +22 -0
  305. package/ios/AudioStudioTests/README.md +39 -0
  306. package/ios/AudioStudioTests/SimpleAudioTest.swift +98 -0
  307. package/ios/AudioStudioTests/TestAudioGenerator.swift +75 -0
  308. package/ios/DataPoint.swift +54 -0
  309. package/ios/DecodingConfig.swift +59 -0
  310. package/ios/FFT.swift +62 -0
  311. package/ios/Features.swift +95 -0
  312. package/ios/ISSUE_IOS.md +68 -0
  313. package/ios/Logger.swift +39 -0
  314. package/ios/MelSpectrogramWrapper.h +30 -0
  315. package/ios/MelSpectrogramWrapper.mm +97 -0
  316. package/ios/NotificationExtension.swift +15 -0
  317. package/ios/RecordingResult.swift +22 -0
  318. package/ios/RecordingSettings.swift +311 -0
  319. package/ios/WaveformExtractor.swift +105 -0
  320. package/ios/tests/README.md +41 -0
  321. package/ios/tests/integration/buffer_and_fallback_test.swift +178 -0
  322. package/ios/tests/integration/buffer_duration_test.swift +185 -0
  323. package/ios/tests/integration/compressed_only_output_test.swift +271 -0
  324. package/ios/tests/integration/output_control_test.swift +322 -0
  325. package/ios/tests/integration/run_integration_tests.sh +37 -0
  326. package/ios/tests/opus_support_test_macos.swift +154 -0
  327. package/ios/tests/standalone/audio_processing_test.swift +144 -0
  328. package/ios/tests/standalone/audio_recording_test.swift +277 -0
  329. package/ios/tests/standalone/audio_streaming_test.swift +249 -0
  330. package/ios/tests/standalone/standalone_test.swift +144 -0
  331. package/package.json +146 -0
  332. package/plugin/build/index.cjs +194 -0
  333. package/plugin/build/index.d.cts +22 -0
  334. package/plugin/build/index.js +194 -0
  335. package/plugin/src/index.ts +285 -0
  336. package/plugin/tsconfig.json +10 -0
  337. package/plugin/tsconfig.tsbuildinfo +1 -0
  338. package/prebuilt/wasm/mel-spectrogram.js +18 -0
  339. package/src/AudioAnalysis/AudioAnalysis.types.ts +226 -0
  340. package/src/AudioAnalysis/audio-features-wasm.d.ts +37 -0
  341. package/src/AudioAnalysis/audioFeaturesWasm.ts +200 -0
  342. package/src/AudioAnalysis/extractAudioAnalysis.ts +350 -0
  343. package/src/AudioAnalysis/extractAudioData.ts +17 -0
  344. package/src/AudioAnalysis/extractMelSpectrogram.ts +140 -0
  345. package/src/AudioAnalysis/extractPreview.ts +34 -0
  346. package/src/AudioAnalysis/extractWaveform.ts +22 -0
  347. package/src/AudioAnalysis/mel-spectrogram-wasm.d.ts +48 -0
  348. package/src/AudioAnalysis/melSpectrogramWasm.ts +179 -0
  349. package/src/AudioDeviceManager.ts +800 -0
  350. package/src/AudioRecorder.provider.tsx +57 -0
  351. package/src/AudioStudio.native.ts +6 -0
  352. package/src/AudioStudio.types.ts +899 -0
  353. package/src/AudioStudio.web.ts +911 -0
  354. package/src/AudioStudioModule.ts +984 -0
  355. package/src/WebRecorder.web.ts +1114 -0
  356. package/src/constants/platformLimitations.ts +118 -0
  357. package/src/constants.ts +21 -0
  358. package/src/events.ts +63 -0
  359. package/src/hooks/useAudioDevices.ts +213 -0
  360. package/src/index.ts +67 -0
  361. package/src/trimAudio.ts +94 -0
  362. package/src/types/crc-32.d.ts +9 -0
  363. package/src/useAudioRecorder.tsx +784 -0
  364. package/src/utils/BlobFix.ts +561 -0
  365. package/src/utils/audioProcessing.ts +205 -0
  366. package/src/utils/cleanNativeOptions.ts +18 -0
  367. package/src/utils/concatenateBuffers.ts +24 -0
  368. package/src/utils/convertPCMToFloat32.ts +170 -0
  369. package/src/utils/crc32.ts +59 -0
  370. package/src/utils/encodingToBitDepth.ts +18 -0
  371. package/src/utils/getWavFileInfo.ts +132 -0
  372. package/src/utils/writeWavHeader.ts +115 -0
  373. package/src/workers/InlineFeaturesExtractor.web.tsx +291 -0
  374. package/src/workers/inlineAudioWebWorker.web.tsx +186 -0
  375. package/src/workers/wasmGlueString.web.ts +23 -0
@@ -0,0 +1,797 @@
1
+ // packages/audio-studio/ios/AudioProcessingHelpers.swift
2
+
3
+ import Accelerate
4
+ import AVFoundation
5
+ import QuartzCore
6
+ import zlib
7
+
8
+ // Constants
9
+ private let FFT_LENGTH = 1024
10
+ private let sharedFFT = FFT(FFT_LENGTH)
11
+
12
+ // Main feature extraction functions
13
+ func extractMFCC(from segment: [Float], sampleRate: Float) -> [Float] {
14
+ let nMFCC = 40
15
+
16
+ // Apply Hann window and prepare for FFT
17
+ let windowed = applyHannWindow(to: segment)
18
+ let fftData = sharedFFT.processSegment(windowed)
19
+
20
+ // Compute power spectrum
21
+ let powerSpectrum = computePowerSpectrum(from: fftData)
22
+
23
+ // Apply Mel filterbank
24
+ let melFilters = computeMelFilterbank(numFilters: nMFCC, fftSize: FFT_LENGTH, sampleRate: sampleRate)
25
+ var melEnergies = [Float](repeating: 0, count: nMFCC)
26
+
27
+ // Safe array access with bounds checking
28
+ for i in 0..<nMFCC {
29
+ var energy: Float = 0
30
+ let filterBank = melFilters[i]
31
+ let minLength = min(powerSpectrum.count, filterBank.count)
32
+
33
+ for j in 0..<minLength {
34
+ energy += powerSpectrum[j] * filterBank[j]
35
+ }
36
+ melEnergies[i] = log(max(energy, .leastNormalMagnitude))
37
+ }
38
+
39
+ // Apply DCT
40
+ return computeDCT(from: melEnergies)
41
+ }
42
+
43
+ func extractSpectralCentroid(from segment: [Float], sampleRate: Float) -> Float {
44
+ let fftData = sharedFFT.processSegment(segment)
45
+
46
+ let magnitudes = computeMagnitudeSpectrum(from: fftData)
47
+ let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
48
+
49
+ let sumMagnitudes = magnitudes.reduce(0, +)
50
+ guard sumMagnitudes > 0 else { return 0 }
51
+
52
+ let weightedSum = zip(frequencies, magnitudes)
53
+ .map { $0.0 * $0.1 }
54
+ .reduce(0, +)
55
+
56
+ return weightedSum / sumMagnitudes
57
+ }
58
+
59
+ func extractSpectralFlatness(from segment: [Float]) -> Float {
60
+ let fftData = sharedFFT.processSegment(segment)
61
+
62
+ // Compute power spectrum
63
+ let powerSpectrum = computePowerSpectrum(from: fftData)
64
+
65
+ // Calculate geometric mean using log-space to avoid numerical issues
66
+ var sumLogValues: Float = 0.0
67
+ for value in powerSpectrum {
68
+ sumLogValues += log(value + 1e-10) // Add small epsilon to avoid log(0)
69
+ }
70
+ let geometricMean = exp(sumLogValues / Float(powerSpectrum.count))
71
+
72
+ // Calculate arithmetic mean
73
+ let arithmeticMean = powerSpectrum.reduce(0, +) / Float(powerSpectrum.count)
74
+
75
+ return arithmeticMean > 0 ? geometricMean / arithmeticMean : 0.0
76
+ }
77
+
78
+ func extractSpectralRollOff(from segment: [Float], sampleRate: Float) -> Float {
79
+ let fftData = sharedFFT.processSegment(segment)
80
+
81
+ let magnitudes = computeMagnitudeSpectrum(from: fftData)
82
+ let totalEnergy = magnitudes.reduce(0, +)
83
+ let threshold = 0.85 * totalEnergy // 85% rolloff point
84
+
85
+ var cumulativeEnergy: Float = 0
86
+ for (index, magnitude) in magnitudes.enumerated() {
87
+ cumulativeEnergy += magnitude
88
+ if cumulativeEnergy >= threshold {
89
+ return Float(index) * sampleRate / Float(2 * magnitudes.count)
90
+ }
91
+ }
92
+
93
+ return 0.0
94
+ }
95
+
96
+ func extractSpectralBandwidth(from segment: [Float], sampleRate: Float) -> Float {
97
+ let fftData = sharedFFT.processSegment(segment)
98
+
99
+ let centroid = extractSpectralCentroid(from: segment, sampleRate: sampleRate)
100
+
101
+ let magnitudes = computeMagnitudeSpectrum(from: fftData)
102
+ let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
103
+
104
+ let sumMagnitudes = magnitudes.reduce(0, +)
105
+ guard sumMagnitudes > 0 else { return 0 }
106
+
107
+ let variance = zip(frequencies, magnitudes)
108
+ .map { pow($0.0 - centroid, 2) * $0.1 }
109
+ .reduce(0, +)
110
+
111
+ return sqrt(variance / sumMagnitudes)
112
+ }
113
+
114
+ func extractChromagram(from segment: [Float], sampleRate: Float) -> [Float] {
115
+ let fftData = sharedFFT.processSegment(segment)
116
+ let numBins = fftData.count / 2
117
+ let nChroma = 12
118
+ var chroma = [Float](repeating: 0, count: nChroma)
119
+ let freqsPerBin = sampleRate / Float(FFT_LENGTH)
120
+
121
+ for i in 0..<numBins {
122
+ let freq = Float(i) * freqsPerBin
123
+ if freq > 0 {
124
+ let pitchClass = Int((12 * log2(freq / 440.0)).truncatingRemainder(dividingBy: 12))
125
+ if pitchClass >= 0 && pitchClass < nChroma {
126
+ let realIndex = 2 * i
127
+ let imagIndex = realIndex + 1
128
+
129
+ let re = realIndex < fftData.count ? fftData[realIndex] : 0
130
+ let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
131
+ let magnitude = sqrt(re * re + im * im)
132
+
133
+ chroma[pitchClass] += magnitude
134
+ }
135
+ }
136
+ }
137
+
138
+ return chroma
139
+ }
140
+
141
+ func extractTempo(from segment: [Float], sampleRate: Float) -> Float {
142
+ let hopLength = 512
143
+ let frameLength = 2048
144
+
145
+ // Compute onset strength signal using spectral flux
146
+ var onsetEnvelope = [Float]()
147
+ var previousSpectrum = [Float](repeating: 0, count: frameLength / 2)
148
+
149
+ // Ensure we have enough samples for at least one frame
150
+ guard segment.count >= frameLength else {
151
+ return 120.0 // Return default tempo if segment is too short
152
+ }
153
+
154
+ // Safe frame processing
155
+ for i in stride(from: 0, to: max(0, segment.count - frameLength), by: hopLength) {
156
+ let endIndex = min(i + frameLength, segment.count)
157
+ let frame = Array(segment[i..<endIndex])
158
+ var fftData = frame + [Float](repeating: 0, count: frameLength - frame.count)
159
+ sharedFFT.realForward(&fftData)
160
+
161
+ let magnitudes = computeMagnitudeSpectrum(from: fftData)
162
+ var flux: Float = 0
163
+ for j in 0..<min(magnitudes.count, previousSpectrum.count) {
164
+ flux += max(magnitudes[j] - previousSpectrum[j], 0)
165
+ }
166
+ onsetEnvelope.append(flux)
167
+ previousSpectrum = magnitudes
168
+ }
169
+
170
+ // Find peaks in onset envelope - ensure we have enough points
171
+ var peaks = [Int]()
172
+ if onsetEnvelope.count >= 3 {
173
+ for i in 1..<(onsetEnvelope.count - 1) {
174
+ if onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1] {
175
+ peaks.append(i)
176
+ }
177
+ }
178
+ }
179
+
180
+ // Calculate tempo from peak intervals
181
+ if peaks.count > 1 {
182
+ let intervals = zip(peaks, peaks.dropFirst()).map { $1 - $0 }
183
+ if !intervals.isEmpty {
184
+ let averageInterval = Float(intervals.reduce(0, +)) / Float(intervals.count)
185
+ if averageInterval > 0 {
186
+ let tempo = 60.0 * sampleRate / Float(hopLength) / averageInterval
187
+ // Constrain tempo to reasonable range (20-300 BPM)
188
+ return min(300.0, max(20.0, tempo))
189
+ }
190
+ }
191
+ }
192
+
193
+ return 120.0 // Default tempo if no clear peaks found
194
+ }
195
+
196
+ private func findPeaks(in data: [Float], minProminence: Float) -> [Int] {
197
+ var peaks = [Int]()
198
+ for i in 1..<data.count - 1 {
199
+ if data[i] > data[i - 1] && data[i] > data[i + 1] {
200
+ let prominence = data[i] - max(data[i - 1], data[i + 1])
201
+ if prominence >= minProminence {
202
+ peaks.append(i)
203
+ }
204
+ }
205
+ }
206
+ return peaks
207
+ }
208
+
209
+ func extractHNR(from segment: [Float]) -> Float {
210
+ let frameSize = segment.count
211
+ var autocorrelation = [Float](repeating: 0, count: frameSize)
212
+
213
+ // Compute autocorrelation
214
+ vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(frameSize), vDSP_Length(frameSize))
215
+
216
+ // Find peaks with minimum prominence
217
+ if let maxValue = autocorrelation.max() {
218
+ let peaks = findPeaks(in: autocorrelation, minProminence: 0.1 * maxValue)
219
+
220
+ // Find first peak after zero lag
221
+ if let firstPeakIndex = peaks.first(where: { $0 > 0 }) {
222
+ let harmonicEnergy = autocorrelation[firstPeakIndex]
223
+ let noiseEnergy = autocorrelation[0] - harmonicEnergy
224
+ if noiseEnergy > 0 {
225
+ return 10 * log10(harmonicEnergy / noiseEnergy)
226
+ }
227
+ }
228
+ }
229
+
230
+ return 0.0
231
+ }
232
+
233
+ // Helper functions
234
+ private func computeMagnitudeSpectrum(from fftData: [Float]) -> [Float] {
235
+ let numBins = fftData.count / 2 // Since FFT data contains real and imaginary pairs
236
+ var magnitudes = [Float]()
237
+
238
+ for i in 0..<numBins {
239
+ let realIndex = 2 * i
240
+ let imagIndex = realIndex + 1
241
+
242
+ let re = realIndex < fftData.count ? fftData[realIndex] : 0
243
+ let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
244
+ magnitudes.append(sqrt(re*re + im*im))
245
+ }
246
+ return magnitudes
247
+ }
248
+
249
+ private func applyHannWindow(to segment: [Float]) -> [Float] {
250
+ var window = [Float](repeating: 0, count: segment.count)
251
+ vDSP_hann_window(&window, vDSP_Length(segment.count), Int32(vDSP_HANN_NORM))
252
+
253
+ var result = [Float](repeating: 0, count: segment.count)
254
+ vDSP_vmul(segment, 1, window, 1, &result, 1, vDSP_Length(segment.count))
255
+
256
+ return result
257
+ }
258
+
259
+ private func computePowerSpectrum(from fftData: [Float]) -> [Float] {
260
+ let numBins = fftData.count / 2
261
+ var powerSpectrum = [Float]()
262
+
263
+ for i in 0..<numBins {
264
+ let realIndex = 2 * i
265
+ let imagIndex = realIndex + 1
266
+
267
+ let re = realIndex < fftData.count ? fftData[realIndex] : 0
268
+ let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
269
+ powerSpectrum.append(re*re + im*im)
270
+ }
271
+ return powerSpectrum
272
+ }
273
+
274
+ private func computeMelFilterbank(numFilters: Int, fftSize: Int, sampleRate: Float) -> [[Float]] {
275
+ let fMin: Float = 0
276
+ let fMax = sampleRate / 2
277
+
278
+ let melMin = hzToMel(fMin)
279
+ let melMax = hzToMel(fMax)
280
+ let melStep = (melMax - melMin) / Float(numFilters + 1)
281
+
282
+ let melPoints = (0...numFilters+1).map { melMin + Float($0) * melStep }
283
+ let hzPoints = melPoints.map { melToHz($0) }
284
+ let bins = hzPoints.map { Int(($0 * Float(fftSize) / sampleRate).rounded()) }
285
+
286
+ var filterbank = [[Float]](repeating: [Float](repeating: 0, count: 1 + fftSize/2), count: numFilters)
287
+
288
+ for i in 0..<numFilters {
289
+ for j in bins[i]..<bins[i+2] {
290
+ if j < bins[i+1] {
291
+ filterbank[i][j] = Float(j - bins[i]) / Float(bins[i+1] - bins[i])
292
+ } else {
293
+ filterbank[i][j] = Float(bins[i+2] - j) / Float(bins[i+2] - bins[i+1])
294
+ }
295
+ }
296
+ }
297
+
298
+ return filterbank
299
+ }
300
+
301
+ private func hzToMel(_ hz: Float) -> Float {
302
+ return 2595 * log10(1 + hz/700)
303
+ }
304
+
305
+ private func melToHz(_ mel: Float) -> Float {
306
+ return 700 * (pow(10, mel/2595) - 1)
307
+ }
308
+
309
+ private func computeDCT(from input: [Float]) -> [Float] {
310
+ let N = input.count
311
+ var output = [Float](repeating: 0, count: N)
312
+ let scale = sqrt(2.0 / Float(N))
313
+
314
+ for i in 0..<N {
315
+ var sum: Float = 0
316
+ for j in 0..<N {
317
+ sum += input[j] * cos(.pi * Float(i) * (2 * Float(j) + 1) / (2 * Float(N)))
318
+ }
319
+ output[i] = scale * sum
320
+ }
321
+
322
+ return output
323
+ }
324
+
325
+ func computeMelSpectrogram(from segment: [Float], sampleRate: Float) -> [Float] {
326
+ let nMels: Int32 = 128
327
+ let fftLength: Int32 = 2048
328
+ let windowSize = Int32(min(segment.count, Int(fftLength)))
329
+ let hopLength = windowSize // single frame
330
+
331
+ MelSpectrogramWrapper.initWithSampleRate(
332
+ Int32(sampleRate),
333
+ fftLength: fftLength,
334
+ windowSizeSamples: windowSize,
335
+ hopLengthSamples: hopLength,
336
+ nMels: nMels,
337
+ fMin: 0.0,
338
+ fMax: sampleRate / 2.0,
339
+ windowType: 0 // Hann
340
+ )
341
+
342
+ let melResult: [NSNumber]? = segment.withUnsafeBufferPointer { bufPtr in
343
+ return MelSpectrogramWrapper.computeFrame(
344
+ withSamples: bufPtr.baseAddress,
345
+ frameSize: Int32(segment.count)
346
+ )
347
+ }
348
+
349
+ guard let result = melResult else {
350
+ return []
351
+ }
352
+
353
+ return result.map { $0.floatValue }
354
+ }
355
+
356
+ func computeSpectralContrast(from segment: [Float], sampleRate: Float) -> [Float] {
357
+ let fftData = sharedFFT.processSegment(segment)
358
+
359
+ let magnitudeSpectrum = computeMagnitudeSpectrum(from: fftData)
360
+ var contrast = [Float]()
361
+
362
+ // Define standard octave-based frequency bands
363
+ let bandFrequencies = [
364
+ (20.0, 125.0), // Sub-bass
365
+ (125.0, 250.0), // Bass
366
+ (250.0, 500.0), // Low-mids
367
+ (500.0, 1000.0), // Mids
368
+ (1000.0, 2000.0), // High-mids
369
+ (2000.0, 4000.0), // Presence
370
+ (4000.0, min(8000.0, Double(sampleRate) / 2.0)) // Brilliance
371
+ ]
372
+
373
+ // Calculate frequency resolution
374
+ let freqResolution = Float(sampleRate) / Float(FFT_LENGTH)
375
+
376
+ for (lowFreq, highFreq) in bandFrequencies {
377
+ // Convert frequencies to FFT bin indices
378
+ let startBin = Int(Float(lowFreq) / freqResolution)
379
+ let endBin = min(Int(Float(highFreq) / freqResolution), magnitudeSpectrum.count - 1)
380
+
381
+ if startBin < endBin {
382
+ let bandSpectrum = Array(magnitudeSpectrum[startBin...endBin])
383
+
384
+ // Sort magnitudes for percentile calculation
385
+ let sortedMagnitudes = bandSpectrum.sorted()
386
+ let length = sortedMagnitudes.count
387
+
388
+ // Calculate peak (95th percentile) and valley (5th percentile)
389
+ let peakIndex = Int(Float(length) * 0.95)
390
+ let valleyIndex = Int(Float(length) * 0.05)
391
+ let peak = sortedMagnitudes[peakIndex]
392
+ let valley = sortedMagnitudes[valleyIndex]
393
+
394
+ // Calculate contrast in dB scale
395
+ let contrastValue = 20 * log10(peak / max(valley, .leastNormalMagnitude))
396
+ contrast.append(contrastValue)
397
+ } else {
398
+ contrast.append(0)
399
+ }
400
+ }
401
+
402
+ return contrast
403
+ }
404
+
405
+ // Original function for backward compatibility
406
+ func computeTonnetz(from segment: [Float], sampleRate: Float) -> [Float] {
407
+ let chroma = extractChromagram(from: segment, sampleRate: sampleRate)
408
+ return computeTonnetz(fromChroma: chroma)
409
+ }
410
+
411
+ // New optimized function that accepts pre-computed chromagram
412
+ func computeTonnetz(fromChroma chroma: [Float]) -> [Float] {
413
+ // Tonnetz transformation matrix (6x12)
414
+ let tonnetzMatrix: [[Float]] = [
415
+ [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], // Perfect fifth
416
+ [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], // Minor third
417
+ [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], // Major third
418
+ [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], // Perfect fifth
419
+ [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], // Minor third
420
+ [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0] // Major third
421
+ ]
422
+
423
+ // Compute tonnetz features
424
+ return tonnetzMatrix.map { row in
425
+ zip(row, chroma).map { $0 * $1 }.reduce(0, +)
426
+ }
427
+ }
428
+
429
+ struct AudioData {
430
+ let samples: [Float]
431
+ let sampleRate: Int
432
+ }
433
+
434
+ func loadAudioFile(_ fileUri: String) throws -> AudioData {
435
+ guard let url = URL(string: fileUri) else {
436
+ throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid file URL"])
437
+ }
438
+
439
+ let file = try AVAudioFile(forReading: url)
440
+ let format = file.processingFormat
441
+ let frameCount = UInt32(file.length)
442
+ let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
443
+
444
+ try file.read(into: buffer, frameCount: frameCount)
445
+
446
+ // Convert buffer to float array
447
+ let samples: [Float]
448
+ if let floatData = buffer.floatChannelData?[0] {
449
+ samples = Array(UnsafeBufferPointer(start: floatData, count: Int(frameCount)))
450
+ } else {
451
+ throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to read audio data"])
452
+ }
453
+
454
+ return AudioData(samples: samples, sampleRate: Int(format.sampleRate))
455
+ }
456
+
457
+ func computeEnergy(from samples: [Float]) -> Float {
458
+ var energy: Float = 0
459
+ vDSP_measqv(samples, 1, &energy, vDSP_Length(samples.count))
460
+ return energy / Float(samples.count)
461
+ }
462
+
463
+ func computeRMS(from samples: [Float]) -> Float {
464
+ let energy = computeEnergy(from: samples)
465
+ return sqrt(energy)
466
+ }
467
+
468
+ func computeZCR(from samples: [Float]) -> Float {
469
+ var zeroCrossings: Int = 0
470
+ for i in 1..<samples.count {
471
+ if (samples[i-1] * samples[i]) < 0 {
472
+ zeroCrossings += 1
473
+ }
474
+ }
475
+ return Float(zeroCrossings) / Float(samples.count)
476
+ }
477
+
478
+ // Keep in AudioProcessingHelpers.swift
479
+ private let N_MFCC = 40
480
+ private let N_FFT = 1024
481
+ private let N_MELS = 128
482
+ private let N_CHROMA = 12
483
+ private let N_BANDS = 7
484
+
485
+ // Core audio processing functions
486
+ func calculateZeroCrossingRate(_ data: [Float]) -> Float {
487
+ var count: Float = 0
488
+ for i in 1..<data.count {
489
+ if (data[i] >= 0 && data[i-1] < 0) || (data[i] < 0 && data[i-1] >= 0) {
490
+ count += 1
491
+ }
492
+ }
493
+ return count / Float(data.count)
494
+ }
495
+
496
+ func calculateEnergy(_ data: [Float]) -> Float {
497
+ var energy: Float = 0
498
+ vDSP_svesq(data, 1, &energy, vDSP_Length(data.count))
499
+ return energy / Float(data.count)
500
+ }
501
+
502
+ // Feature extraction functions
503
+ func computeFeatures(segmentData: [Float], sampleRate: Float, sumSquares: Float, zeroCrossings: Int, segmentLength: Int, featureOptions: [String: Bool]) -> Features {
504
+ let rms = sqrt(sumSquares / Float(segmentLength))
505
+ let energy = featureOptions["energy"] == true ? sumSquares : 0
506
+ let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
507
+
508
+ // Determine which C++ features are needed
509
+ let needSpectral = featureOptions["spectralCentroid"] == true ||
510
+ featureOptions["spectralFlatness"] == true ||
511
+ featureOptions["spectralRolloff"] == true ||
512
+ featureOptions["spectralBandwidth"] == true
513
+ let needMfcc = featureOptions["mfcc"] == true
514
+ let needChroma = featureOptions["chromagram"] == true
515
+
516
+ // Single C++ call for all FFT-based features (spectral + MFCC + chroma)
517
+ var spectralCentroid: Float = 0
518
+ var spectralFlatness: Float = 0
519
+ var spectralRolloff: Float = 0
520
+ var spectralBandwidth: Float = 0
521
+ var mfcc: [Float] = []
522
+ var chromagram: [Float] = []
523
+
524
+ if needSpectral || needMfcc || needChroma {
525
+ let cppResult = segmentData.withUnsafeBufferPointer { bufPtr in
526
+ AudioFeaturesWrapper.computeFrame(
527
+ withSamples: bufPtr.baseAddress,
528
+ numSamples: Int32(segmentData.count),
529
+ sampleRate: Int32(sampleRate),
530
+ fftLength: Int32(N_FFT),
531
+ nMfcc: 13,
532
+ nMelFilters: 26,
533
+ computeMfcc: needMfcc,
534
+ computeChroma: needChroma
535
+ )
536
+ }
537
+ if let result = cppResult {
538
+ if needSpectral {
539
+ spectralCentroid = (result["spectralCentroid"] as? NSNumber)?.floatValue ?? 0
540
+ spectralFlatness = (result["spectralFlatness"] as? NSNumber)?.floatValue ?? 0
541
+ spectralRolloff = (result["spectralRolloff"] as? NSNumber)?.floatValue ?? 0
542
+ spectralBandwidth = (result["spectralBandwidth"] as? NSNumber)?.floatValue ?? 0
543
+ }
544
+ if needMfcc {
545
+ mfcc = (result["mfcc"] as? [NSNumber])?.map { $0.floatValue } ?? []
546
+ }
547
+ if needChroma {
548
+ chromagram = (result["chromagram"] as? [NSNumber])?.map { $0.floatValue } ?? []
549
+ }
550
+ }
551
+ }
552
+
553
+ let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
554
+ let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
555
+ let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
556
+
557
+ let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : nil
558
+
559
+ return Features(
560
+ energy: energy,
561
+ mfcc: mfcc,
562
+ rms: rms,
563
+ zcr: zcr,
564
+ spectralCentroid: spectralCentroid,
565
+ spectralFlatness: spectralFlatness,
566
+ spectralRolloff: spectralRolloff,
567
+ spectralBandwidth: spectralBandwidth,
568
+ chromagram: chromagram,
569
+ tempo: extractTempo(from: segmentData, sampleRate: sampleRate),
570
+ hnr: extractHNR(from: segmentData),
571
+ melSpectrogram: melSpectrogram,
572
+ spectralContrast: spectralContrast,
573
+ tonnetz: tonnetz,
574
+ pitch: pitch
575
+ )
576
+ }
577
+
578
+ private func nextPowerOfTwo(_ n: Int) -> Int {
579
+ var power = 1
580
+ while power < n {
581
+ power *= 2
582
+ }
583
+ return power
584
+ }
585
+
586
+ func estimatePitch(from segment: [Float], sampleRate: Float) -> Float {
587
+ guard segment.count >= 2 else { return 0.0 }
588
+
589
+ // Apply a Hann window to reduce edge effects
590
+ let windowed = applyHannWindow(to: segment)
591
+
592
+ // Pad the signal for FFT
593
+ let fftLength = nextPowerOfTwo(segment.count * 2 - 1)
594
+ var padded = windowed + [Float](repeating: 0, count: fftLength - windowed.count)
595
+ sharedFFT.realForward(&padded)
596
+
597
+ // Compute autocorrelation using FFT
598
+ var autocorrelation = [Float](repeating: 0, count: fftLength)
599
+ vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(segment.count), vDSP_Length(segment.count))
600
+
601
+ // Find the first peak within the pitch range (50-500 Hz)
602
+ let minLag = Int(sampleRate / 500.0) // Max frequency
603
+ let maxLag = Int(sampleRate / 50.0) // Min frequency
604
+ var maxCorr: Float = -1.0
605
+ var pitchLag = 0
606
+
607
+ // Skip the first few samples to avoid the zero-lag peak
608
+ for lag in minLag...maxLag {
609
+ if autocorrelation[lag] > maxCorr {
610
+ maxCorr = autocorrelation[lag]
611
+ pitchLag = lag
612
+ }
613
+ }
614
+
615
+ // Convert lag to frequency (sampleRate / lag)
616
+ return pitchLag > 0 ? sampleRate / Float(pitchLag) : 0.0
617
+ }
618
+
619
+ // Add speech detection helper function
620
+ func detectSpeech(from segment: [Float], rms: Float) -> (isActive: Bool, probability: Float) {
621
+ // Simple speech detection based on RMS and zero-crossing rate
622
+ let zcr = calculateZeroCrossingRate(segment)
623
+ let isSpeech = rms > 0.01 && zcr > 0.1 && zcr < 0.5
624
+ let probability = min(1.0, max(0.0, rms * 10)) // Simple probability estimation
625
+
626
+ return (isActive: isSpeech, probability: probability)
627
+ }
628
+
629
+ func extractRawAudioData(
630
+ from url: URL,
631
+ startFrame: AVAudioFramePosition,
632
+ frameCount: AVAudioFrameCount,
633
+ format: AVAudioFormat,
634
+ decodingConfig: DecodingConfig,
635
+ includeNormalizedData: Bool,
636
+ includeBase64Data: Bool
637
+ ) throws -> (pcmData: Data, floatData: [Float]?, base64Data: String?) {
638
+ // Apply decoding configuration
639
+ let targetFormat = decodingConfig.toAudioFormat(baseFormat: format)
640
+
641
+ let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
642
+ let audioFile = try AVAudioFile(forReading: url)
643
+
644
+ audioFile.framePosition = startFrame
645
+ try audioFile.read(into: buffer, frameCount: frameCount)
646
+
647
+ // Convert to target format if different from source
648
+ let finalBuffer: AVAudioPCMBuffer
649
+ if targetFormat != format {
650
+ let converter = AVAudioConverter(from: format, to: targetFormat)!
651
+ finalBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
652
+
653
+ var error: NSError?
654
+ _ = converter.convert(to: finalBuffer, error: &error) { inNumPackets, outStatus in
655
+ outStatus.pointee = .haveData
656
+ return buffer
657
+ }
658
+
659
+ if let error = error {
660
+ Logger.debug("AudioProcessingHelpers", "Format conversion failed: \(error.localizedDescription)")
661
+ throw error
662
+ }
663
+ } else {
664
+ finalBuffer = buffer
665
+ }
666
+
667
+ guard let floatData = finalBuffer.floatChannelData else {
668
+ throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to get float channel data"])
669
+ }
670
+
671
+ let channels = Int(targetFormat.channelCount)
672
+ let totalSamples = Int(finalBuffer.frameLength) * channels
673
+
674
+ // Use targetBitDepth from decodingConfig instead of format's bit depth
675
+ let targetBitDepth = decodingConfig.targetBitDepth ?? 16
676
+ let bytesPerSample = targetBitDepth / 8
677
+ var pcmData = Data(capacity: totalSamples * bytesPerSample)
678
+
679
+ // Convert float samples to PCM format with specified bit depth
680
+ for frame in 0..<Int(finalBuffer.frameLength) {
681
+ for channel in 0..<channels {
682
+ let sample = floatData[channel][frame]
683
+
684
+ let normalizedSample = decodingConfig.normalizeAudio ?
685
+ max(-1.0, min(1.0, sample)) : sample
686
+
687
+ switch targetBitDepth {
688
+ case 16:
689
+ let intValue = Int16(normalizedSample * Float(Int16.max))
690
+ pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
691
+ case 32:
692
+ let intValue = Int32(normalizedSample * Float(Int32.max))
693
+ pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
694
+ default:
695
+ throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unsupported bit depth \(targetBitDepth)"])
696
+ }
697
+ }
698
+ }
699
+
700
+ // Only process normalized data if requested
701
+ let normalizedData: [Float]? = includeNormalizedData ?
702
+ Array(UnsafeBufferPointer(start: floatData[0], count: Int(finalBuffer.frameLength))) :
703
+ nil
704
+
705
+ // Convert to base64 if requested
706
+ let base64Data: String? = includeBase64Data ?
707
+ pcmData.base64EncodedString() :
708
+ nil
709
+
710
+ return (pcmData: pcmData, floatData: normalizedData, base64Data: base64Data)
711
+ }
712
+
713
+ // Update the CRC32 function to use zlib's implementation
714
+ func calculateCRC32(data: Data) -> UInt32 {
715
+ data.withUnsafeBytes { buffer in
716
+ let ptr = buffer.baseAddress?.assumingMemoryBound(to: UInt8.self)
717
+ return UInt32(crc32(0, ptr, UInt32(buffer.count)))
718
+ }
719
+ }
720
+
721
+ func calculateCRC32(from floatArray: [Float], count: Int) -> UInt32 {
722
+ return floatArray.withUnsafeBytes { floatBytes -> UInt32 in
723
+ // Get raw pointer to the bytes with proper alignment
724
+ let byteCount = count * MemoryLayout<Float>.size
725
+ return UInt32(crc32(0, floatBytes.baseAddress, UInt32(byteCount)))
726
+ }
727
+ }
728
+
729
+ func createWavHeader(pcmData: Data, sampleRate: Int, channels: Int, bitDepth: Int) -> Data {
730
+ let headerSize = 44
731
+ let totalDataLen = pcmData.count + headerSize - 8
732
+ let bytesPerSample = bitDepth / 8
733
+ let byteRate = sampleRate * channels * bytesPerSample
734
+ let blockAlign = channels * bytesPerSample
735
+
736
+ var header = Data(capacity: headerSize)
737
+
738
+ // RIFF header
739
+ header.append(contentsOf: "RIFF".data(using: .ascii)!)
740
+
741
+ // Total data length
742
+ header.append(UInt32(totalDataLen).littleEndian.data)
743
+
744
+ // WAVE header
745
+ header.append(contentsOf: "WAVE".data(using: .ascii)!)
746
+
747
+ // 'fmt ' chunk
748
+ header.append(contentsOf: "fmt ".data(using: .ascii)!)
749
+
750
+ // 16 for PCM format
751
+ header.append(UInt32(16).littleEndian.data)
752
+
753
+ // Format = 1 for PCM
754
+ header.append(UInt16(1).littleEndian.data)
755
+
756
+ // Number of channels
757
+ header.append(UInt16(channels).littleEndian.data)
758
+
759
+ // Sample rate
760
+ header.append(UInt32(sampleRate).littleEndian.data)
761
+
762
+ // Byte rate
763
+ header.append(UInt32(byteRate).littleEndian.data)
764
+
765
+ // Block align
766
+ header.append(UInt16(blockAlign).littleEndian.data)
767
+
768
+ // Bits per sample
769
+ header.append(UInt16(bitDepth).littleEndian.data)
770
+
771
+ // 'data' chunk
772
+ header.append(contentsOf: "data".data(using: .ascii)!)
773
+
774
+ // Data length
775
+ header.append(UInt32(pcmData.count).littleEndian.data)
776
+
777
+ // Combine header and PCM data
778
+ var wavData = header
779
+ wavData.append(pcmData)
780
+
781
+ return wavData
782
+ }
783
+
784
+ // Extension to help with binary data conversion
785
+ extension UInt16 {
786
+ var data: Data {
787
+ var value = self
788
+ return Data(bytes: &value, count: MemoryLayout<UInt16>.size)
789
+ }
790
+ }
791
+
792
+ extension UInt32 {
793
+ var data: Data {
794
+ var value = self
795
+ return Data(bytes: &value, count: MemoryLayout<UInt32>.size)
796
+ }
797
+ }