@siteed/expo-audio-studio 2.18.6 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +13 -297
  2. package/index.d.ts +1 -0
  3. package/index.js +1 -0
  4. package/package.json +6 -135
  5. package/CHANGELOG.md +0 -501
  6. package/LICENSE +0 -21
  7. package/android/build.gradle +0 -129
  8. package/android/src/androidTest/assets/chorus.wav +0 -0
  9. package/android/src/androidTest/assets/jfk.wav +0 -0
  10. package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
  11. package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
  12. package/android/src/androidTest/java/net/siteed/audiostream/AudioProcessorInstrumentedTest.kt +0 -197
  13. package/android/src/androidTest/java/net/siteed/audiostream/AudioRecorderInstrumentedTest.kt +0 -541
  14. package/android/src/androidTest/java/net/siteed/audiostream/AudioRecorderPerformanceInstrumentedTest.kt +0 -234
  15. package/android/src/androidTest/java/net/siteed/audiostream/integration/AudioFocusStrategyIntegrationTest.kt +0 -332
  16. package/android/src/androidTest/java/net/siteed/audiostream/integration/BufferDurationIntegrationTest.kt +0 -324
  17. package/android/src/androidTest/java/net/siteed/audiostream/integration/CompressedOnlyOutputTest.kt +0 -253
  18. package/android/src/androidTest/java/net/siteed/audiostream/integration/DeviceDisconnectionFallbackTest.kt +0 -218
  19. package/android/src/androidTest/java/net/siteed/audiostream/integration/EventEmissionIntervalTest.kt +0 -120
  20. package/android/src/androidTest/java/net/siteed/audiostream/integration/M4aFormatTest.kt +0 -345
  21. package/android/src/androidTest/java/net/siteed/audiostream/integration/OutputControlIntegrationTest.kt +0 -340
  22. package/android/src/androidTest/java/net/siteed/audiostream/integration/PcmStreamingDurationTest.kt +0 -252
  23. package/android/src/androidTest/java/net/siteed/audiostream/integration/README.md +0 -95
  24. package/android/src/androidTest/java/net/siteed/audiostream/integration/run_integration_tests.sh +0 -43
  25. package/android/src/main/AndroidManifest.xml +0 -30
  26. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -188
  27. package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
  28. package/android/src/main/java/net/siteed/audiostream/AudioDeviceManager.kt +0 -1741
  29. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -136
  30. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -354
  31. package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -439
  32. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -2237
  33. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2141
  34. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -167
  35. package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +0 -1099
  36. package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -37
  37. package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
  38. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -1113
  39. package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
  40. package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
  41. package/android/src/main/java/net/siteed/audiostream/LogUtils.kt +0 -93
  42. package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -72
  43. package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -68
  44. package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
  45. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -257
  46. package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
  47. package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
  48. package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
  49. package/android/src/main/res/drawable/ic_microphone.xml +0 -13
  50. package/android/src/main/res/drawable/ic_pause.xml +0 -10
  51. package/android/src/main/res/drawable/ic_play.xml +0 -10
  52. package/android/src/main/res/drawable/ic_stop.xml +0 -10
  53. package/android/src/main/res/layout/notification_recording.xml +0 -37
  54. package/android/src/test/java/net/siteed/audiostream/AudioFileHandlerTest.kt +0 -279
  55. package/android/src/test/java/net/siteed/audiostream/AudioFocusStrategyTest.kt +0 -249
  56. package/android/src/test/java/net/siteed/audiostream/AudioFormatTest.kt +0 -151
  57. package/android/src/test/java/net/siteed/audiostream/AudioFormatUtilsTest.kt +0 -273
  58. package/android/src/test/java/net/siteed/audiostream/DeviceDisconnectionFallbackUnitTest.kt +0 -140
  59. package/android/src/test/resources/chorus.wav +0 -0
  60. package/android/src/test/resources/generate_test_audio.py +0 -94
  61. package/android/src/test/resources/jfk.wav +0 -0
  62. package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
  63. package/android/src/test/resources/recorder_hello_world.wav +0 -0
  64. package/app.plugin.js +0 -3
  65. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +0 -4
  66. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  67. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +0 -210
  68. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  69. package/build/cjs/AudioAnalysis/extractAudioData.js +0 -21
  70. package/build/cjs/AudioAnalysis/extractAudioData.js.map +0 -1
  71. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +0 -92
  72. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
  73. package/build/cjs/AudioAnalysis/extractPreview.js +0 -28
  74. package/build/cjs/AudioAnalysis/extractPreview.js.map +0 -1
  75. package/build/cjs/AudioAnalysis/extractWaveform.js +0 -18
  76. package/build/cjs/AudioAnalysis/extractWaveform.js.map +0 -1
  77. package/build/cjs/AudioDeviceManager.js +0 -689
  78. package/build/cjs/AudioDeviceManager.js.map +0 -1
  79. package/build/cjs/AudioRecorder.provider.js +0 -78
  80. package/build/cjs/AudioRecorder.provider.js.map +0 -1
  81. package/build/cjs/ExpoAudioStream.native.js +0 -8
  82. package/build/cjs/ExpoAudioStream.native.js.map +0 -1
  83. package/build/cjs/ExpoAudioStream.types.js +0 -11
  84. package/build/cjs/ExpoAudioStream.types.js.map +0 -1
  85. package/build/cjs/ExpoAudioStream.web.js +0 -708
  86. package/build/cjs/ExpoAudioStream.web.js.map +0 -1
  87. package/build/cjs/ExpoAudioStreamModule.js +0 -718
  88. package/build/cjs/ExpoAudioStreamModule.js.map +0 -1
  89. package/build/cjs/WebRecorder.web.js +0 -777
  90. package/build/cjs/WebRecorder.web.js.map +0 -1
  91. package/build/cjs/constants/platformLimitations.js +0 -99
  92. package/build/cjs/constants/platformLimitations.js.map +0 -1
  93. package/build/cjs/constants.js +0 -17
  94. package/build/cjs/constants.js.map +0 -1
  95. package/build/cjs/events.js +0 -29
  96. package/build/cjs/events.js.map +0 -1
  97. package/build/cjs/hooks/useAudioDevices.js +0 -179
  98. package/build/cjs/hooks/useAudioDevices.js.map +0 -1
  99. package/build/cjs/index.js +0 -58
  100. package/build/cjs/index.js.map +0 -1
  101. package/build/cjs/trimAudio.js +0 -76
  102. package/build/cjs/trimAudio.js.map +0 -1
  103. package/build/cjs/useAudioRecorder.js +0 -518
  104. package/build/cjs/useAudioRecorder.js.map +0 -1
  105. package/build/cjs/utils/BlobFix.js +0 -502
  106. package/build/cjs/utils/BlobFix.js.map +0 -1
  107. package/build/cjs/utils/audioProcessing.js +0 -136
  108. package/build/cjs/utils/audioProcessing.js.map +0 -1
  109. package/build/cjs/utils/cleanNativeOptions.js +0 -22
  110. package/build/cjs/utils/cleanNativeOptions.js.map +0 -1
  111. package/build/cjs/utils/concatenateBuffers.js +0 -25
  112. package/build/cjs/utils/concatenateBuffers.js.map +0 -1
  113. package/build/cjs/utils/convertPCMToFloat32.js +0 -124
  114. package/build/cjs/utils/convertPCMToFloat32.js.map +0 -1
  115. package/build/cjs/utils/crc32.js +0 -52
  116. package/build/cjs/utils/crc32.js.map +0 -1
  117. package/build/cjs/utils/encodingToBitDepth.js +0 -17
  118. package/build/cjs/utils/encodingToBitDepth.js.map +0 -1
  119. package/build/cjs/utils/getWavFileInfo.js +0 -96
  120. package/build/cjs/utils/getWavFileInfo.js.map +0 -1
  121. package/build/cjs/utils/writeWavHeader.js +0 -88
  122. package/build/cjs/utils/writeWavHeader.js.map +0 -1
  123. package/build/cjs/workers/InlineFeaturesExtractor.web.js +0 -859
  124. package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +0 -1
  125. package/build/cjs/workers/inlineAudioWebWorker.web.js +0 -184
  126. package/build/cjs/workers/inlineAudioWebWorker.web.js.map +0 -1
  127. package/build/esm/AudioAnalysis/AudioAnalysis.types.js +0 -3
  128. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  129. package/build/esm/AudioAnalysis/extractAudioAnalysis.js +0 -202
  130. package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  131. package/build/esm/AudioAnalysis/extractAudioData.js +0 -14
  132. package/build/esm/AudioAnalysis/extractAudioData.js.map +0 -1
  133. package/build/esm/AudioAnalysis/extractMelSpectrogram.js +0 -89
  134. package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
  135. package/build/esm/AudioAnalysis/extractPreview.js +0 -25
  136. package/build/esm/AudioAnalysis/extractPreview.js.map +0 -1
  137. package/build/esm/AudioAnalysis/extractWaveform.js +0 -11
  138. package/build/esm/AudioAnalysis/extractWaveform.js.map +0 -1
  139. package/build/esm/AudioDeviceManager.js +0 -682
  140. package/build/esm/AudioDeviceManager.js.map +0 -1
  141. package/build/esm/AudioRecorder.provider.js +0 -40
  142. package/build/esm/AudioRecorder.provider.js.map +0 -1
  143. package/build/esm/ExpoAudioStream.native.js +0 -6
  144. package/build/esm/ExpoAudioStream.native.js.map +0 -1
  145. package/build/esm/ExpoAudioStream.types.js +0 -8
  146. package/build/esm/ExpoAudioStream.types.js.map +0 -1
  147. package/build/esm/ExpoAudioStream.web.js +0 -704
  148. package/build/esm/ExpoAudioStream.web.js.map +0 -1
  149. package/build/esm/ExpoAudioStreamModule.js +0 -713
  150. package/build/esm/ExpoAudioStreamModule.js.map +0 -1
  151. package/build/esm/WebRecorder.web.js +0 -773
  152. package/build/esm/WebRecorder.web.js.map +0 -1
  153. package/build/esm/constants/platformLimitations.js +0 -90
  154. package/build/esm/constants/platformLimitations.js.map +0 -1
  155. package/build/esm/constants.js +0 -14
  156. package/build/esm/constants.js.map +0 -1
  157. package/build/esm/events.js +0 -21
  158. package/build/esm/events.js.map +0 -1
  159. package/build/esm/hooks/useAudioDevices.js +0 -176
  160. package/build/esm/hooks/useAudioDevices.js.map +0 -1
  161. package/build/esm/index.js +0 -20
  162. package/build/esm/index.js.map +0 -1
  163. package/build/esm/trimAudio.js +0 -69
  164. package/build/esm/trimAudio.js.map +0 -1
  165. package/build/esm/useAudioRecorder.js +0 -512
  166. package/build/esm/useAudioRecorder.js.map +0 -1
  167. package/build/esm/utils/BlobFix.js +0 -498
  168. package/build/esm/utils/BlobFix.js.map +0 -1
  169. package/build/esm/utils/audioProcessing.js +0 -133
  170. package/build/esm/utils/audioProcessing.js.map +0 -1
  171. package/build/esm/utils/cleanNativeOptions.js +0 -19
  172. package/build/esm/utils/cleanNativeOptions.js.map +0 -1
  173. package/build/esm/utils/concatenateBuffers.js +0 -21
  174. package/build/esm/utils/concatenateBuffers.js.map +0 -1
  175. package/build/esm/utils/convertPCMToFloat32.js +0 -120
  176. package/build/esm/utils/convertPCMToFloat32.js.map +0 -1
  177. package/build/esm/utils/crc32.js +0 -50
  178. package/build/esm/utils/crc32.js.map +0 -1
  179. package/build/esm/utils/encodingToBitDepth.js +0 -13
  180. package/build/esm/utils/encodingToBitDepth.js.map +0 -1
  181. package/build/esm/utils/getWavFileInfo.js +0 -92
  182. package/build/esm/utils/getWavFileInfo.js.map +0 -1
  183. package/build/esm/utils/writeWavHeader.js +0 -84
  184. package/build/esm/utils/writeWavHeader.js.map +0 -1
  185. package/build/esm/workers/InlineFeaturesExtractor.web.js +0 -856
  186. package/build/esm/workers/InlineFeaturesExtractor.web.js.map +0 -1
  187. package/build/esm/workers/inlineAudioWebWorker.web.js +0 -181
  188. package/build/esm/workers/inlineAudioWebWorker.web.js.map +0 -1
  189. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +0 -196
  190. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  191. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +0 -74
  192. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  193. package/build/types/AudioAnalysis/extractAudioData.d.ts +0 -3
  194. package/build/types/AudioAnalysis/extractAudioData.d.ts.map +0 -1
  195. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +0 -14
  196. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +0 -1
  197. package/build/types/AudioAnalysis/extractPreview.d.ts +0 -11
  198. package/build/types/AudioAnalysis/extractPreview.d.ts.map +0 -1
  199. package/build/types/AudioAnalysis/extractWaveform.d.ts +0 -8
  200. package/build/types/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  201. package/build/types/AudioDeviceManager.d.ts +0 -187
  202. package/build/types/AudioDeviceManager.d.ts.map +0 -1
  203. package/build/types/AudioRecorder.provider.d.ts +0 -11
  204. package/build/types/AudioRecorder.provider.d.ts.map +0 -1
  205. package/build/types/ExpoAudioStream.native.d.ts +0 -3
  206. package/build/types/ExpoAudioStream.native.d.ts.map +0 -1
  207. package/build/types/ExpoAudioStream.types.d.ts +0 -738
  208. package/build/types/ExpoAudioStream.types.d.ts.map +0 -1
  209. package/build/types/ExpoAudioStream.web.d.ts +0 -96
  210. package/build/types/ExpoAudioStream.web.d.ts.map +0 -1
  211. package/build/types/ExpoAudioStreamModule.d.ts +0 -3
  212. package/build/types/ExpoAudioStreamModule.d.ts.map +0 -1
  213. package/build/types/WebRecorder.web.d.ts +0 -198
  214. package/build/types/WebRecorder.web.d.ts.map +0 -1
  215. package/build/types/constants/platformLimitations.d.ts +0 -40
  216. package/build/types/constants/platformLimitations.d.ts.map +0 -1
  217. package/build/types/constants.d.ts +0 -11
  218. package/build/types/constants.d.ts.map +0 -1
  219. package/build/types/events.d.ts +0 -26
  220. package/build/types/events.d.ts.map +0 -1
  221. package/build/types/hooks/useAudioDevices.d.ts +0 -15
  222. package/build/types/hooks/useAudioDevices.d.ts.map +0 -1
  223. package/build/types/index.d.ts +0 -18
  224. package/build/types/index.d.ts.map +0 -1
  225. package/build/types/trimAudio.d.ts +0 -25
  226. package/build/types/trimAudio.d.ts.map +0 -1
  227. package/build/types/useAudioRecorder.d.ts +0 -22
  228. package/build/types/useAudioRecorder.d.ts.map +0 -1
  229. package/build/types/utils/BlobFix.d.ts +0 -9
  230. package/build/types/utils/BlobFix.d.ts.map +0 -1
  231. package/build/types/utils/audioProcessing.d.ts +0 -24
  232. package/build/types/utils/audioProcessing.d.ts.map +0 -1
  233. package/build/types/utils/cleanNativeOptions.d.ts +0 -15
  234. package/build/types/utils/cleanNativeOptions.d.ts.map +0 -1
  235. package/build/types/utils/concatenateBuffers.d.ts +0 -8
  236. package/build/types/utils/concatenateBuffers.d.ts.map +0 -1
  237. package/build/types/utils/convertPCMToFloat32.d.ts +0 -13
  238. package/build/types/utils/convertPCMToFloat32.d.ts.map +0 -1
  239. package/build/types/utils/crc32.d.ts +0 -7
  240. package/build/types/utils/crc32.d.ts.map +0 -1
  241. package/build/types/utils/encodingToBitDepth.d.ts +0 -5
  242. package/build/types/utils/encodingToBitDepth.d.ts.map +0 -1
  243. package/build/types/utils/getWavFileInfo.d.ts +0 -26
  244. package/build/types/utils/getWavFileInfo.d.ts.map +0 -1
  245. package/build/types/utils/writeWavHeader.d.ts +0 -34
  246. package/build/types/utils/writeWavHeader.d.ts.map +0 -1
  247. package/build/types/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  248. package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  249. package/build/types/workers/inlineAudioWebWorker.web.d.ts +0 -2
  250. package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  251. package/expo-module.config.json +0 -10
  252. package/ios/AudioAnalysisData.swift +0 -74
  253. package/ios/AudioDeviceManager.swift +0 -670
  254. package/ios/AudioNotificationManager.swift +0 -154
  255. package/ios/AudioProcessingHelpers.swift +0 -743
  256. package/ios/AudioProcessor.swift +0 -1151
  257. package/ios/AudioStreamError.swift +0 -7
  258. package/ios/AudioStreamManager.swift +0 -2369
  259. package/ios/AudioStreamManagerDelegate.swift +0 -16
  260. package/ios/DataPoint.swift +0 -54
  261. package/ios/DecodingConfig.swift +0 -59
  262. package/ios/ExpoAudioStream.podspec +0 -33
  263. package/ios/ExpoAudioStreamModule.swift +0 -1019
  264. package/ios/ExpoAudioStudioTests/AudioFileHandlerTests.swift +0 -338
  265. package/ios/ExpoAudioStudioTests/AudioFormatUtilsTests.swift +0 -331
  266. package/ios/ExpoAudioStudioTests/AudioTestHelpers.swift +0 -130
  267. package/ios/ExpoAudioStudioTests/CompressedOnlyOutputTests.swift +0 -294
  268. package/ios/ExpoAudioStudioTests/EventEmissionIntervalTests.swift +0 -105
  269. package/ios/ExpoAudioStudioTests/Info.plist +0 -22
  270. package/ios/ExpoAudioStudioTests/README.md +0 -39
  271. package/ios/ExpoAudioStudioTests/SimpleAudioTest.swift +0 -98
  272. package/ios/ExpoAudioStudioTests/TestAudioGenerator.swift +0 -75
  273. package/ios/FFT.swift +0 -62
  274. package/ios/Features.swift +0 -95
  275. package/ios/ISSUE_IOS.md +0 -68
  276. package/ios/Logger.swift +0 -39
  277. package/ios/NotificationExtension.swift +0 -15
  278. package/ios/RecordingResult.swift +0 -22
  279. package/ios/RecordingSettings.swift +0 -308
  280. package/ios/WaveformExtractor.swift +0 -105
  281. package/ios/tests/README.md +0 -41
  282. package/ios/tests/integration/buffer_and_fallback_test.swift +0 -178
  283. package/ios/tests/integration/buffer_duration_test.swift +0 -185
  284. package/ios/tests/integration/compressed_only_output_test.swift +0 -271
  285. package/ios/tests/integration/output_control_test.swift +0 -322
  286. package/ios/tests/integration/run_integration_tests.sh +0 -37
  287. package/ios/tests/opus_support_test_macos.swift +0 -154
  288. package/ios/tests/standalone/audio_processing_test.swift +0 -144
  289. package/ios/tests/standalone/audio_recording_test.swift +0 -277
  290. package/ios/tests/standalone/audio_streaming_test.swift +0 -249
  291. package/ios/tests/standalone/standalone_test.swift +0 -144
  292. package/plugin/build/index.cjs +0 -194
  293. package/plugin/build/index.d.cts +0 -22
  294. package/plugin/build/index.js +0 -194
  295. package/plugin/src/index.ts +0 -285
  296. package/plugin/tsconfig.json +0 -10
  297. package/plugin/tsconfig.tsbuildinfo +0 -1
  298. package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -224
  299. package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -344
  300. package/src/AudioAnalysis/extractAudioData.ts +0 -17
  301. package/src/AudioAnalysis/extractMelSpectrogram.ts +0 -154
  302. package/src/AudioAnalysis/extractPreview.ts +0 -34
  303. package/src/AudioAnalysis/extractWaveform.ts +0 -22
  304. package/src/AudioDeviceManager.ts +0 -803
  305. package/src/AudioRecorder.provider.tsx +0 -57
  306. package/src/ExpoAudioStream.native.ts +0 -6
  307. package/src/ExpoAudioStream.types.ts +0 -874
  308. package/src/ExpoAudioStream.web.ts +0 -905
  309. package/src/ExpoAudioStreamModule.ts +0 -990
  310. package/src/WebRecorder.web.ts +0 -1005
  311. package/src/constants/platformLimitations.ts +0 -118
  312. package/src/constants.ts +0 -18
  313. package/src/events.ts +0 -60
  314. package/src/hooks/useAudioDevices.ts +0 -213
  315. package/src/index.ts +0 -54
  316. package/src/trimAudio.ts +0 -94
  317. package/src/types/crc-32.d.ts +0 -9
  318. package/src/useAudioRecorder.tsx +0 -766
  319. package/src/utils/BlobFix.ts +0 -561
  320. package/src/utils/audioProcessing.ts +0 -205
  321. package/src/utils/cleanNativeOptions.ts +0 -18
  322. package/src/utils/concatenateBuffers.ts +0 -24
  323. package/src/utils/convertPCMToFloat32.ts +0 -170
  324. package/src/utils/crc32.ts +0 -59
  325. package/src/utils/encodingToBitDepth.ts +0 -18
  326. package/src/utils/getWavFileInfo.ts +0 -132
  327. package/src/utils/writeWavHeader.ts +0 -115
  328. package/src/workers/InlineFeaturesExtractor.web.tsx +0 -855
  329. package/src/workers/inlineAudioWebWorker.web.tsx +0 -180
@@ -1,743 +0,0 @@
1
- // packages/expo-audio-stream/ios/AudioProcessingHelpers.swift
2
-
3
- import Accelerate
4
- import AVFoundation
5
- import QuartzCore
6
- import zlib
7
-
8
- // Constants
9
- private let FFT_LENGTH = 1024
10
- private let sharedFFT = FFT(FFT_LENGTH)
11
-
12
- // Main feature extraction functions
13
- func extractMFCC(from segment: [Float], sampleRate: Float) -> [Float] {
14
- let nMFCC = 40
15
-
16
- // Apply Hann window and prepare for FFT
17
- let windowed = applyHannWindow(to: segment)
18
- let fftData = sharedFFT.processSegment(windowed)
19
-
20
- // Compute power spectrum
21
- let powerSpectrum = computePowerSpectrum(from: fftData)
22
-
23
- // Apply Mel filterbank
24
- let melFilters = computeMelFilterbank(numFilters: nMFCC, fftSize: FFT_LENGTH, sampleRate: sampleRate)
25
- var melEnergies = [Float](repeating: 0, count: nMFCC)
26
-
27
- // Safe array access with bounds checking
28
- for i in 0..<nMFCC {
29
- var energy: Float = 0
30
- let filterBank = melFilters[i]
31
- let minLength = min(powerSpectrum.count, filterBank.count)
32
-
33
- for j in 0..<minLength {
34
- energy += powerSpectrum[j] * filterBank[j]
35
- }
36
- melEnergies[i] = log(max(energy, .leastNormalMagnitude))
37
- }
38
-
39
- // Apply DCT
40
- return computeDCT(from: melEnergies)
41
- }
42
-
43
- func extractSpectralCentroid(from segment: [Float], sampleRate: Float) -> Float {
44
- let fftData = sharedFFT.processSegment(segment)
45
-
46
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
47
- let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
48
-
49
- let sumMagnitudes = magnitudes.reduce(0, +)
50
- guard sumMagnitudes > 0 else { return 0 }
51
-
52
- let weightedSum = zip(frequencies, magnitudes)
53
- .map { $0.0 * $0.1 }
54
- .reduce(0, +)
55
-
56
- return weightedSum / sumMagnitudes
57
- }
58
-
59
- func extractSpectralFlatness(from segment: [Float]) -> Float {
60
- let fftData = sharedFFT.processSegment(segment)
61
-
62
- // Compute power spectrum
63
- let powerSpectrum = computePowerSpectrum(from: fftData)
64
-
65
- // Calculate geometric mean using log-space to avoid numerical issues
66
- var sumLogValues: Float = 0.0
67
- for value in powerSpectrum {
68
- sumLogValues += log(value + 1e-10) // Add small epsilon to avoid log(0)
69
- }
70
- let geometricMean = exp(sumLogValues / Float(powerSpectrum.count))
71
-
72
- // Calculate arithmetic mean
73
- let arithmeticMean = powerSpectrum.reduce(0, +) / Float(powerSpectrum.count)
74
-
75
- return arithmeticMean > 0 ? geometricMean / arithmeticMean : 0.0
76
- }
77
-
78
- func extractSpectralRollOff(from segment: [Float], sampleRate: Float) -> Float {
79
- let fftData = sharedFFT.processSegment(segment)
80
-
81
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
82
- let totalEnergy = magnitudes.reduce(0, +)
83
- let threshold = 0.85 * totalEnergy // 85% rolloff point
84
-
85
- var cumulativeEnergy: Float = 0
86
- for (index, magnitude) in magnitudes.enumerated() {
87
- cumulativeEnergy += magnitude
88
- if cumulativeEnergy >= threshold {
89
- return Float(index) * sampleRate / Float(2 * magnitudes.count)
90
- }
91
- }
92
-
93
- return 0.0
94
- }
95
-
96
- func extractSpectralBandwidth(from segment: [Float], sampleRate: Float) -> Float {
97
- let fftData = sharedFFT.processSegment(segment)
98
-
99
- let centroid = extractSpectralCentroid(from: segment, sampleRate: sampleRate)
100
-
101
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
102
- let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
103
-
104
- let sumMagnitudes = magnitudes.reduce(0, +)
105
- guard sumMagnitudes > 0 else { return 0 }
106
-
107
- let variance = zip(frequencies, magnitudes)
108
- .map { pow($0.0 - centroid, 2) * $0.1 }
109
- .reduce(0, +)
110
-
111
- return sqrt(variance / sumMagnitudes)
112
- }
113
-
114
- func extractChromagram(from segment: [Float], sampleRate: Float) -> [Float] {
115
- let fftData = sharedFFT.processSegment(segment)
116
- let numBins = fftData.count / 2
117
- let nChroma = 12
118
- var chroma = [Float](repeating: 0, count: nChroma)
119
- let freqsPerBin = sampleRate / Float(FFT_LENGTH)
120
-
121
- for i in 0..<numBins {
122
- let freq = Float(i) * freqsPerBin
123
- if freq > 0 {
124
- let pitchClass = Int((12 * log2(freq / 440.0)).truncatingRemainder(dividingBy: 12))
125
- if pitchClass >= 0 && pitchClass < nChroma {
126
- let realIndex = 2 * i
127
- let imagIndex = realIndex + 1
128
-
129
- let re = realIndex < fftData.count ? fftData[realIndex] : 0
130
- let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
131
- let magnitude = sqrt(re * re + im * im)
132
-
133
- chroma[pitchClass] += magnitude
134
- }
135
- }
136
- }
137
-
138
- return chroma
139
- }
140
-
141
- func extractTempo(from segment: [Float], sampleRate: Float) -> Float {
142
- let hopLength = 512
143
- let frameLength = 2048
144
-
145
- // Compute onset strength signal using spectral flux
146
- var onsetEnvelope = [Float]()
147
- var previousSpectrum = [Float](repeating: 0, count: frameLength / 2)
148
-
149
- // Ensure we have enough samples for at least one frame
150
- guard segment.count >= frameLength else {
151
- return 120.0 // Return default tempo if segment is too short
152
- }
153
-
154
- // Safe frame processing
155
- for i in stride(from: 0, to: max(0, segment.count - frameLength), by: hopLength) {
156
- let endIndex = min(i + frameLength, segment.count)
157
- let frame = Array(segment[i..<endIndex])
158
- var fftData = frame + [Float](repeating: 0, count: frameLength - frame.count)
159
- sharedFFT.realForward(&fftData)
160
-
161
- let magnitudes = computeMagnitudeSpectrum(from: fftData)
162
- var flux: Float = 0
163
- for j in 0..<min(magnitudes.count, previousSpectrum.count) {
164
- flux += max(magnitudes[j] - previousSpectrum[j], 0)
165
- }
166
- onsetEnvelope.append(flux)
167
- previousSpectrum = magnitudes
168
- }
169
-
170
- // Find peaks in onset envelope - ensure we have enough points
171
- var peaks = [Int]()
172
- if onsetEnvelope.count >= 3 {
173
- for i in 1..<(onsetEnvelope.count - 1) {
174
- if onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1] {
175
- peaks.append(i)
176
- }
177
- }
178
- }
179
-
180
- // Calculate tempo from peak intervals
181
- if peaks.count > 1 {
182
- let intervals = zip(peaks, peaks.dropFirst()).map { $1 - $0 }
183
- if !intervals.isEmpty {
184
- let averageInterval = Float(intervals.reduce(0, +)) / Float(intervals.count)
185
- if averageInterval > 0 {
186
- let tempo = 60.0 * sampleRate / Float(hopLength) / averageInterval
187
- // Constrain tempo to reasonable range (20-300 BPM)
188
- return min(300.0, max(20.0, tempo))
189
- }
190
- }
191
- }
192
-
193
- return 120.0 // Default tempo if no clear peaks found
194
- }
195
-
196
- private func findPeaks(in data: [Float], minProminence: Float) -> [Int] {
197
- var peaks = [Int]()
198
- for i in 1..<data.count - 1 {
199
- if data[i] > data[i - 1] && data[i] > data[i + 1] {
200
- let prominence = data[i] - max(data[i - 1], data[i + 1])
201
- if prominence >= minProminence {
202
- peaks.append(i)
203
- }
204
- }
205
- }
206
- return peaks
207
- }
208
-
209
- func extractHNR(from segment: [Float]) -> Float {
210
- let frameSize = segment.count
211
- var autocorrelation = [Float](repeating: 0, count: frameSize)
212
-
213
- // Compute autocorrelation
214
- vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(frameSize), vDSP_Length(frameSize))
215
-
216
- // Find peaks with minimum prominence
217
- if let maxValue = autocorrelation.max() {
218
- let peaks = findPeaks(in: autocorrelation, minProminence: 0.1 * maxValue)
219
-
220
- // Find first peak after zero lag
221
- if let firstPeakIndex = peaks.first(where: { $0 > 0 }) {
222
- let harmonicEnergy = autocorrelation[firstPeakIndex]
223
- let noiseEnergy = autocorrelation[0] - harmonicEnergy
224
- if noiseEnergy > 0 {
225
- return 10 * log10(harmonicEnergy / noiseEnergy)
226
- }
227
- }
228
- }
229
-
230
- return 0.0
231
- }
232
-
233
- // Helper functions
234
- private func computeMagnitudeSpectrum(from fftData: [Float]) -> [Float] {
235
- let numBins = fftData.count / 2 // Since FFT data contains real and imaginary pairs
236
- var magnitudes = [Float]()
237
-
238
- for i in 0..<numBins {
239
- let realIndex = 2 * i
240
- let imagIndex = realIndex + 1
241
-
242
- let re = realIndex < fftData.count ? fftData[realIndex] : 0
243
- let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
244
- magnitudes.append(sqrt(re*re + im*im))
245
- }
246
- return magnitudes
247
- }
248
-
249
- private func applyHannWindow(to segment: [Float]) -> [Float] {
250
- var window = [Float](repeating: 0, count: segment.count)
251
- vDSP_hann_window(&window, vDSP_Length(segment.count), Int32(vDSP_HANN_NORM))
252
-
253
- var result = [Float](repeating: 0, count: segment.count)
254
- vDSP_vmul(segment, 1, window, 1, &result, 1, vDSP_Length(segment.count))
255
-
256
- return result
257
- }
258
-
259
- private func computePowerSpectrum(from fftData: [Float]) -> [Float] {
260
- let numBins = fftData.count / 2
261
- var powerSpectrum = [Float]()
262
-
263
- for i in 0..<numBins {
264
- let realIndex = 2 * i
265
- let imagIndex = realIndex + 1
266
-
267
- let re = realIndex < fftData.count ? fftData[realIndex] : 0
268
- let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
269
- powerSpectrum.append(re*re + im*im)
270
- }
271
- return powerSpectrum
272
- }
273
-
274
- private func computeMelFilterbank(numFilters: Int, fftSize: Int, sampleRate: Float) -> [[Float]] {
275
- let fMin: Float = 0
276
- let fMax = sampleRate / 2
277
-
278
- let melMin = hzToMel(fMin)
279
- let melMax = hzToMel(fMax)
280
- let melStep = (melMax - melMin) / Float(numFilters + 1)
281
-
282
- let melPoints = (0...numFilters+1).map { melMin + Float($0) * melStep }
283
- let hzPoints = melPoints.map { melToHz($0) }
284
- let bins = hzPoints.map { Int(($0 * Float(fftSize) / sampleRate).rounded()) }
285
-
286
- var filterbank = [[Float]](repeating: [Float](repeating: 0, count: 1 + fftSize/2), count: numFilters)
287
-
288
- for i in 0..<numFilters {
289
- for j in bins[i]..<bins[i+2] {
290
- if j < bins[i+1] {
291
- filterbank[i][j] = Float(j - bins[i]) / Float(bins[i+1] - bins[i])
292
- } else {
293
- filterbank[i][j] = Float(bins[i+2] - j) / Float(bins[i+2] - bins[i+1])
294
- }
295
- }
296
- }
297
-
298
- return filterbank
299
- }
300
-
301
- private func hzToMel(_ hz: Float) -> Float {
302
- return 2595 * log10(1 + hz/700)
303
- }
304
-
305
- private func melToHz(_ mel: Float) -> Float {
306
- return 700 * (pow(10, mel/2595) - 1)
307
- }
308
-
309
- private func computeDCT(from input: [Float]) -> [Float] {
310
- let N = input.count
311
- var output = [Float](repeating: 0, count: N)
312
- let scale = sqrt(2.0 / Float(N))
313
-
314
- for i in 0..<N {
315
- var sum: Float = 0
316
- for j in 0..<N {
317
- sum += input[j] * cos(.pi * Float(i) * (2 * Float(j) + 1) / (2 * Float(N)))
318
- }
319
- output[i] = scale * sum
320
- }
321
-
322
- return output
323
- }
324
-
325
- func computeMelSpectrogram(from segment: [Float], sampleRate: Float) -> [Float] {
326
- let nMels = 128
327
- let fftData = sharedFFT.processSegment(segment)
328
-
329
- let powerSpectrum = computePowerSpectrum(from: fftData)
330
- let melFilters = computeMelFilterbank(numFilters: nMels, fftSize: FFT_LENGTH, sampleRate: sampleRate)
331
-
332
- return melFilters.map { filter in
333
- zip(filter, powerSpectrum)
334
- .map { $0 * $1 }
335
- .reduce(0, +)
336
- }
337
- }
338
-
339
- func computeSpectralContrast(from segment: [Float], sampleRate: Float) -> [Float] {
340
- let fftData = sharedFFT.processSegment(segment)
341
-
342
- let magnitudeSpectrum = computeMagnitudeSpectrum(from: fftData)
343
- var contrast = [Float]()
344
-
345
- // Define standard octave-based frequency bands
346
- let bandFrequencies = [
347
- (20.0, 125.0), // Sub-bass
348
- (125.0, 250.0), // Bass
349
- (250.0, 500.0), // Low-mids
350
- (500.0, 1000.0), // Mids
351
- (1000.0, 2000.0), // High-mids
352
- (2000.0, 4000.0), // Presence
353
- (4000.0, min(8000.0, Double(sampleRate) / 2.0)) // Brilliance
354
- ]
355
-
356
- // Calculate frequency resolution
357
- let freqResolution = Float(sampleRate) / Float(FFT_LENGTH)
358
-
359
- for (lowFreq, highFreq) in bandFrequencies {
360
- // Convert frequencies to FFT bin indices
361
- let startBin = Int(Float(lowFreq) / freqResolution)
362
- let endBin = min(Int(Float(highFreq) / freqResolution), magnitudeSpectrum.count - 1)
363
-
364
- if startBin < endBin {
365
- let bandSpectrum = Array(magnitudeSpectrum[startBin...endBin])
366
-
367
- // Sort magnitudes for percentile calculation
368
- let sortedMagnitudes = bandSpectrum.sorted()
369
- let length = sortedMagnitudes.count
370
-
371
- // Calculate peak (95th percentile) and valley (5th percentile)
372
- let peakIndex = Int(Float(length) * 0.95)
373
- let valleyIndex = Int(Float(length) * 0.05)
374
- let peak = sortedMagnitudes[peakIndex]
375
- let valley = sortedMagnitudes[valleyIndex]
376
-
377
- // Calculate contrast in dB scale
378
- let contrastValue = 20 * log10(peak / max(valley, .leastNormalMagnitude))
379
- contrast.append(contrastValue)
380
- } else {
381
- contrast.append(0)
382
- }
383
- }
384
-
385
- return contrast
386
- }
387
-
388
- // Original function for backward compatibility
389
- func computeTonnetz(from segment: [Float], sampleRate: Float) -> [Float] {
390
- let chroma = extractChromagram(from: segment, sampleRate: sampleRate)
391
- return computeTonnetz(fromChroma: chroma)
392
- }
393
-
394
- // New optimized function that accepts pre-computed chromagram
395
- func computeTonnetz(fromChroma chroma: [Float]) -> [Float] {
396
- // Tonnetz transformation matrix (6x12)
397
- let tonnetzMatrix: [[Float]] = [
398
- [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], // Perfect fifth
399
- [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], // Minor third
400
- [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], // Major third
401
- [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], // Perfect fifth
402
- [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], // Minor third
403
- [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0] // Major third
404
- ]
405
-
406
- // Compute tonnetz features
407
- return tonnetzMatrix.map { row in
408
- zip(row, chroma).map { $0 * $1 }.reduce(0, +)
409
- }
410
- }
411
-
412
- struct AudioData {
413
- let samples: [Float]
414
- let sampleRate: Int
415
- }
416
-
417
- func loadAudioFile(_ fileUri: String) throws -> AudioData {
418
- guard let url = URL(string: fileUri) else {
419
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid file URL"])
420
- }
421
-
422
- let file = try AVAudioFile(forReading: url)
423
- let format = file.processingFormat
424
- let frameCount = UInt32(file.length)
425
- let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
426
-
427
- try file.read(into: buffer, frameCount: frameCount)
428
-
429
- // Convert buffer to float array
430
- let samples: [Float]
431
- if let floatData = buffer.floatChannelData?[0] {
432
- samples = Array(UnsafeBufferPointer(start: floatData, count: Int(frameCount)))
433
- } else {
434
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to read audio data"])
435
- }
436
-
437
- return AudioData(samples: samples, sampleRate: Int(format.sampleRate))
438
- }
439
-
440
- func computeEnergy(from samples: [Float]) -> Float {
441
- var energy: Float = 0
442
- vDSP_measqv(samples, 1, &energy, vDSP_Length(samples.count))
443
- return energy / Float(samples.count)
444
- }
445
-
446
- func computeRMS(from samples: [Float]) -> Float {
447
- let energy = computeEnergy(from: samples)
448
- return sqrt(energy)
449
- }
450
-
451
- func computeZCR(from samples: [Float]) -> Float {
452
- var zeroCrossings: Int = 0
453
- for i in 1..<samples.count {
454
- if (samples[i-1] * samples[i]) < 0 {
455
- zeroCrossings += 1
456
- }
457
- }
458
- return Float(zeroCrossings) / Float(samples.count)
459
- }
460
-
461
- // Keep in AudioProcessingHelpers.swift
462
- private let N_MFCC = 40
463
- private let N_FFT = 1024
464
- private let N_MELS = 128
465
- private let N_CHROMA = 12
466
- private let N_BANDS = 7
467
-
468
- // Core audio processing functions
469
- func calculateZeroCrossingRate(_ data: [Float]) -> Float {
470
- var count: Float = 0
471
- for i in 1..<data.count {
472
- if (data[i] >= 0 && data[i-1] < 0) || (data[i] < 0 && data[i-1] >= 0) {
473
- count += 1
474
- }
475
- }
476
- return count / Float(data.count)
477
- }
478
-
479
- func calculateEnergy(_ data: [Float]) -> Float {
480
- var energy: Float = 0
481
- vDSP_svesq(data, 1, &energy, vDSP_Length(data.count))
482
- return energy / Float(data.count)
483
- }
484
-
485
- // Feature extraction functions
486
- func computeFeatures(segmentData: [Float], sampleRate: Float, sumSquares: Float, zeroCrossings: Int, segmentLength: Int, featureOptions: [String: Bool]) -> Features {
487
- let rms = sqrt(sumSquares / Float(segmentLength))
488
- let energy = featureOptions["energy"] == true ? sumSquares : 0
489
- let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
490
-
491
- // Compute min and max amplitudes
492
- let _ = segmentData.min() ?? 0
493
- let _ = segmentData.max() ?? 0
494
-
495
- // Call feature extraction functions
496
- let mfcc = featureOptions["mfcc"] == true ? extractMFCC(from: segmentData, sampleRate: sampleRate) : []
497
- let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
498
- let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
499
- let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
500
- let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
501
-
502
- // Add pitch calculation
503
- let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : nil
504
-
505
- return Features(
506
- energy: energy,
507
- mfcc: mfcc,
508
- rms: rms,
509
- zcr: zcr,
510
- spectralCentroid: extractSpectralCentroid(from: segmentData, sampleRate: sampleRate),
511
- spectralFlatness: extractSpectralFlatness(from: segmentData),
512
- spectralRollOff: extractSpectralRollOff(from: segmentData, sampleRate: sampleRate),
513
- spectralBandwidth: extractSpectralBandwidth(from: segmentData, sampleRate: sampleRate),
514
- chromagram: chromagram,
515
- tempo: extractTempo(from: segmentData, sampleRate: sampleRate),
516
- hnr: extractHNR(from: segmentData),
517
- melSpectrogram: melSpectrogram,
518
- spectralContrast: spectralContrast,
519
- tonnetz: tonnetz,
520
- pitch: pitch
521
- )
522
- }
523
-
524
- private func nextPowerOfTwo(_ n: Int) -> Int {
525
- var power = 1
526
- while power < n {
527
- power *= 2
528
- }
529
- return power
530
- }
531
-
532
- func estimatePitch(from segment: [Float], sampleRate: Float) -> Float {
533
- guard segment.count >= 2 else { return 0.0 }
534
-
535
- // Apply a Hann window to reduce edge effects
536
- let windowed = applyHannWindow(to: segment)
537
-
538
- // Pad the signal for FFT
539
- let fftLength = nextPowerOfTwo(segment.count * 2 - 1)
540
- var padded = windowed + [Float](repeating: 0, count: fftLength - windowed.count)
541
- sharedFFT.realForward(&padded)
542
-
543
- // Compute autocorrelation using FFT
544
- var autocorrelation = [Float](repeating: 0, count: fftLength)
545
- vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(segment.count), vDSP_Length(segment.count))
546
-
547
- // Find the first peak within the pitch range (50-500 Hz)
548
- let minLag = Int(sampleRate / 500.0) // Max frequency
549
- let maxLag = Int(sampleRate / 50.0) // Min frequency
550
- var maxCorr: Float = -1.0
551
- var pitchLag = 0
552
-
553
- // Skip the first few samples to avoid the zero-lag peak
554
- for lag in minLag...maxLag {
555
- if autocorrelation[lag] > maxCorr {
556
- maxCorr = autocorrelation[lag]
557
- pitchLag = lag
558
- }
559
- }
560
-
561
- // Convert lag to frequency (sampleRate / lag)
562
- return pitchLag > 0 ? sampleRate / Float(pitchLag) : 0.0
563
- }
564
-
565
- // Add speech detection helper function
566
- func detectSpeech(from segment: [Float], rms: Float) -> (isActive: Bool, probability: Float) {
567
- // Simple speech detection based on RMS and zero-crossing rate
568
- let zcr = calculateZeroCrossingRate(segment)
569
- let isSpeech = rms > 0.01 && zcr > 0.1 && zcr < 0.5
570
- let probability = min(1.0, max(0.0, rms * 10)) // Simple probability estimation
571
-
572
- return (isActive: isSpeech, probability: probability)
573
- }
574
-
575
- func extractRawAudioData(
576
- from url: URL,
577
- startFrame: AVAudioFramePosition,
578
- frameCount: AVAudioFrameCount,
579
- format: AVAudioFormat,
580
- decodingConfig: DecodingConfig,
581
- includeNormalizedData: Bool,
582
- includeBase64Data: Bool
583
- ) throws -> (pcmData: Data, floatData: [Float]?, base64Data: String?) {
584
- // Apply decoding configuration
585
- let targetFormat = decodingConfig.toAudioFormat(baseFormat: format)
586
-
587
- let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
588
- let audioFile = try AVAudioFile(forReading: url)
589
-
590
- audioFile.framePosition = startFrame
591
- try audioFile.read(into: buffer, frameCount: frameCount)
592
-
593
- // Convert to target format if different from source
594
- let finalBuffer: AVAudioPCMBuffer
595
- if targetFormat != format {
596
- let converter = AVAudioConverter(from: format, to: targetFormat)!
597
- finalBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
598
-
599
- var error: NSError?
600
- _ = converter.convert(to: finalBuffer, error: &error) { inNumPackets, outStatus in
601
- outStatus.pointee = .haveData
602
- return buffer
603
- }
604
-
605
- if let error = error {
606
- Logger.debug("AudioProcessingHelpers", "Format conversion failed: \(error.localizedDescription)")
607
- throw error
608
- }
609
- } else {
610
- finalBuffer = buffer
611
- }
612
-
613
- guard let floatData = finalBuffer.floatChannelData else {
614
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to get float channel data"])
615
- }
616
-
617
- let channels = Int(targetFormat.channelCount)
618
- let totalSamples = Int(finalBuffer.frameLength) * channels
619
-
620
- // Use targetBitDepth from decodingConfig instead of format's bit depth
621
- let targetBitDepth = decodingConfig.targetBitDepth ?? 16
622
- let bytesPerSample = targetBitDepth / 8
623
- var pcmData = Data(capacity: totalSamples * bytesPerSample)
624
-
625
- // Convert float samples to PCM format with specified bit depth
626
- for frame in 0..<Int(finalBuffer.frameLength) {
627
- for channel in 0..<channels {
628
- let sample = floatData[channel][frame]
629
-
630
- let normalizedSample = decodingConfig.normalizeAudio ?
631
- max(-1.0, min(1.0, sample)) : sample
632
-
633
- switch targetBitDepth {
634
- case 16:
635
- let intValue = Int16(normalizedSample * Float(Int16.max))
636
- pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
637
- case 32:
638
- let intValue = Int32(normalizedSample * Float(Int32.max))
639
- pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
640
- default:
641
- throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unsupported bit depth \(targetBitDepth)"])
642
- }
643
- }
644
- }
645
-
646
- // Only process normalized data if requested
647
- let normalizedData: [Float]? = includeNormalizedData ?
648
- Array(UnsafeBufferPointer(start: floatData[0], count: Int(finalBuffer.frameLength))) :
649
- nil
650
-
651
- // Convert to base64 if requested
652
- let base64Data: String? = includeBase64Data ?
653
- pcmData.base64EncodedString() :
654
- nil
655
-
656
- return (pcmData: pcmData, floatData: normalizedData, base64Data: base64Data)
657
- }
658
-
659
- // Update the CRC32 function to use zlib's implementation
660
- func calculateCRC32(data: Data) -> UInt32 {
661
- data.withUnsafeBytes { buffer in
662
- let ptr = buffer.baseAddress?.assumingMemoryBound(to: UInt8.self)
663
- return UInt32(crc32(0, ptr, UInt32(buffer.count)))
664
- }
665
- }
666
-
667
- func calculateCRC32(from floatArray: [Float], count: Int) -> UInt32 {
668
- return floatArray.withUnsafeBytes { floatBytes -> UInt32 in
669
- // Get raw pointer to the bytes with proper alignment
670
- let byteCount = count * MemoryLayout<Float>.size
671
- return UInt32(crc32(0, floatBytes.baseAddress, UInt32(byteCount)))
672
- }
673
- }
674
-
675
- func createWavHeader(pcmData: Data, sampleRate: Int, channels: Int, bitDepth: Int) -> Data {
676
- let headerSize = 44
677
- let totalDataLen = pcmData.count + headerSize - 8
678
- let bytesPerSample = bitDepth / 8
679
- let byteRate = sampleRate * channels * bytesPerSample
680
- let blockAlign = channels * bytesPerSample
681
-
682
- var header = Data(capacity: headerSize)
683
-
684
- // RIFF header
685
- header.append(contentsOf: "RIFF".data(using: .ascii)!)
686
-
687
- // Total data length
688
- header.append(UInt32(totalDataLen).littleEndian.data)
689
-
690
- // WAVE header
691
- header.append(contentsOf: "WAVE".data(using: .ascii)!)
692
-
693
- // 'fmt ' chunk
694
- header.append(contentsOf: "fmt ".data(using: .ascii)!)
695
-
696
- // 16 for PCM format
697
- header.append(UInt32(16).littleEndian.data)
698
-
699
- // Format = 1 for PCM
700
- header.append(UInt16(1).littleEndian.data)
701
-
702
- // Number of channels
703
- header.append(UInt16(channels).littleEndian.data)
704
-
705
- // Sample rate
706
- header.append(UInt32(sampleRate).littleEndian.data)
707
-
708
- // Byte rate
709
- header.append(UInt32(byteRate).littleEndian.data)
710
-
711
- // Block align
712
- header.append(UInt16(blockAlign).littleEndian.data)
713
-
714
- // Bits per sample
715
- header.append(UInt16(bitDepth).littleEndian.data)
716
-
717
- // 'data' chunk
718
- header.append(contentsOf: "data".data(using: .ascii)!)
719
-
720
- // Data length
721
- header.append(UInt32(pcmData.count).littleEndian.data)
722
-
723
- // Combine header and PCM data
724
- var wavData = header
725
- wavData.append(pcmData)
726
-
727
- return wavData
728
- }
729
-
730
- // Extension to help with binary data conversion
731
- extension UInt16 {
732
- var data: Data {
733
- var value = self
734
- return Data(bytes: &value, count: MemoryLayout<UInt16>.size)
735
- }
736
- }
737
-
738
- extension UInt32 {
739
- var data: Data {
740
- var value = self
741
- return Data(bytes: &value, count: MemoryLayout<UInt32>.size)
742
- }
743
- }