@siteed/audio-studio 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. package/CHANGELOG.md +535 -0
  2. package/LICENSE +21 -0
  3. package/README.md +167 -0
  4. package/android/build.gradle +143 -0
  5. package/android/src/androidTest/assets/chorus.wav +0 -0
  6. package/android/src/androidTest/assets/jfk.wav +0 -0
  7. package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
  8. package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
  9. package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +197 -0
  10. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +541 -0
  11. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +234 -0
  12. package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +332 -0
  13. package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +324 -0
  14. package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +253 -0
  15. package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +218 -0
  16. package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +120 -0
  17. package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +345 -0
  18. package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +340 -0
  19. package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +252 -0
  20. package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +95 -0
  21. package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +43 -0
  22. package/android/src/main/AndroidManifest.xml +30 -0
  23. package/android/src/main/CMakeLists.txt +29 -0
  24. package/android/src/main/java/net/siteed/audiostudio/AudioAnalysisData.kt +188 -0
  25. package/android/src/main/java/net/siteed/audiostudio/AudioDataEncoder.kt +9 -0
  26. package/android/src/main/java/net/siteed/audiostudio/AudioDeviceManager.kt +1741 -0
  27. package/android/src/main/java/net/siteed/audiostudio/AudioFeaturesNative.kt +26 -0
  28. package/android/src/main/java/net/siteed/audiostudio/AudioFileHandler.kt +136 -0
  29. package/android/src/main/java/net/siteed/audiostudio/AudioFormatUtils.kt +354 -0
  30. package/android/src/main/java/net/siteed/audiostudio/AudioNotificationsManager.kt +439 -0
  31. package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +2237 -0
  32. package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +2163 -0
  33. package/android/src/main/java/net/siteed/audiostudio/AudioRecordingService.kt +167 -0
  34. package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +1112 -0
  35. package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +1099 -0
  36. package/android/src/main/java/net/siteed/audiostudio/Constants.kt +37 -0
  37. package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +7 -0
  38. package/android/src/main/java/net/siteed/audiostudio/FFT.kt +100 -0
  39. package/android/src/main/java/net/siteed/audiostudio/Features.kt +98 -0
  40. package/android/src/main/java/net/siteed/audiostudio/LogUtils.kt +93 -0
  41. package/android/src/main/java/net/siteed/audiostudio/MelSpectrogramNative.kt +36 -0
  42. package/android/src/main/java/net/siteed/audiostudio/NotificationConfig.kt +72 -0
  43. package/android/src/main/java/net/siteed/audiostudio/PermissionUtils.kt +68 -0
  44. package/android/src/main/java/net/siteed/audiostudio/RecordingActionReceiver.kt +59 -0
  45. package/android/src/main/java/net/siteed/audiostudio/RecordingConfig.kt +259 -0
  46. package/android/src/main/java/net/siteed/audiostudio/WaveformConfig.kt +19 -0
  47. package/android/src/main/java/net/siteed/audiostudio/WaveformRenderer.kt +159 -0
  48. package/android/src/main/jni/AudioFeaturesJNI.cpp +152 -0
  49. package/android/src/main/jni/MelSpectrogramJNI.cpp +165 -0
  50. package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
  51. package/android/src/main/res/drawable/ic_microphone.xml +13 -0
  52. package/android/src/main/res/drawable/ic_pause.xml +10 -0
  53. package/android/src/main/res/drawable/ic_play.xml +10 -0
  54. package/android/src/main/res/drawable/ic_stop.xml +10 -0
  55. package/android/src/main/res/layout/notification_recording.xml +37 -0
  56. package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +279 -0
  57. package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +249 -0
  58. package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +151 -0
  59. package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +273 -0
  60. package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +140 -0
  61. package/android/src/test/resources/chorus.wav +0 -0
  62. package/android/src/test/resources/generate_test_audio.py +94 -0
  63. package/android/src/test/resources/jfk.wav +0 -0
  64. package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
  65. package/android/src/test/resources/recorder_hello_world.wav +0 -0
  66. package/app.plugin.js +3 -0
  67. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +4 -0
  68. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  69. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +164 -0
  70. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
  71. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +213 -0
  72. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  73. package/build/cjs/AudioAnalysis/extractAudioData.js +21 -0
  74. package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -0
  75. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +90 -0
  76. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  77. package/build/cjs/AudioAnalysis/extractPreview.js +28 -0
  78. package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -0
  79. package/build/cjs/AudioAnalysis/extractWaveform.js +18 -0
  80. package/build/cjs/AudioAnalysis/extractWaveform.js.map +1 -0
  81. package/build/cjs/AudioAnalysis/melSpectrogramWasm.js +149 -0
  82. package/build/cjs/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
  83. package/build/cjs/AudioDeviceManager.js +688 -0
  84. package/build/cjs/AudioDeviceManager.js.map +1 -0
  85. package/build/cjs/AudioRecorder.provider.js +78 -0
  86. package/build/cjs/AudioRecorder.provider.js.map +1 -0
  87. package/build/cjs/AudioStudio.native.js +8 -0
  88. package/build/cjs/AudioStudio.native.js.map +1 -0
  89. package/build/cjs/AudioStudio.types.js +11 -0
  90. package/build/cjs/AudioStudio.types.js.map +1 -0
  91. package/build/cjs/AudioStudio.web.js +708 -0
  92. package/build/cjs/AudioStudio.web.js.map +1 -0
  93. package/build/cjs/AudioStudioModule.js +718 -0
  94. package/build/cjs/AudioStudioModule.js.map +1 -0
  95. package/build/cjs/WebRecorder.web.js +865 -0
  96. package/build/cjs/WebRecorder.web.js.map +1 -0
  97. package/build/cjs/constants/platformLimitations.js +99 -0
  98. package/build/cjs/constants/platformLimitations.js.map +1 -0
  99. package/build/cjs/constants.js +20 -0
  100. package/build/cjs/constants.js.map +1 -0
  101. package/build/cjs/events.js +29 -0
  102. package/build/cjs/events.js.map +1 -0
  103. package/build/cjs/hooks/useAudioDevices.js +179 -0
  104. package/build/cjs/hooks/useAudioDevices.js.map +1 -0
  105. package/build/cjs/index.js +64 -0
  106. package/build/cjs/index.js.map +1 -0
  107. package/build/cjs/trimAudio.js +76 -0
  108. package/build/cjs/trimAudio.js.map +1 -0
  109. package/build/cjs/useAudioRecorder.js +535 -0
  110. package/build/cjs/useAudioRecorder.js.map +1 -0
  111. package/build/cjs/utils/BlobFix.js +502 -0
  112. package/build/cjs/utils/BlobFix.js.map +1 -0
  113. package/build/cjs/utils/audioProcessing.js +136 -0
  114. package/build/cjs/utils/audioProcessing.js.map +1 -0
  115. package/build/cjs/utils/cleanNativeOptions.js +22 -0
  116. package/build/cjs/utils/cleanNativeOptions.js.map +1 -0
  117. package/build/cjs/utils/concatenateBuffers.js +25 -0
  118. package/build/cjs/utils/concatenateBuffers.js.map +1 -0
  119. package/build/cjs/utils/convertPCMToFloat32.js +124 -0
  120. package/build/cjs/utils/convertPCMToFloat32.js.map +1 -0
  121. package/build/cjs/utils/crc32.js +52 -0
  122. package/build/cjs/utils/crc32.js.map +1 -0
  123. package/build/cjs/utils/encodingToBitDepth.js +17 -0
  124. package/build/cjs/utils/encodingToBitDepth.js.map +1 -0
  125. package/build/cjs/utils/getWavFileInfo.js +96 -0
  126. package/build/cjs/utils/getWavFileInfo.js.map +1 -0
  127. package/build/cjs/utils/writeWavHeader.js +88 -0
  128. package/build/cjs/utils/writeWavHeader.js.map +1 -0
  129. package/build/cjs/workers/InlineFeaturesExtractor.web.js +294 -0
  130. package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +1 -0
  131. package/build/cjs/workers/inlineAudioWebWorker.web.js +190 -0
  132. package/build/cjs/workers/inlineAudioWebWorker.web.js.map +1 -0
  133. package/build/cjs/workers/wasmGlueString.web.js +27 -0
  134. package/build/cjs/workers/wasmGlueString.web.js.map +1 -0
  135. package/build/esm/AudioAnalysis/AudioAnalysis.types.js +3 -0
  136. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  137. package/build/esm/AudioAnalysis/audioFeaturesWasm.js +126 -0
  138. package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
  139. package/build/esm/AudioAnalysis/extractAudioAnalysis.js +205 -0
  140. package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  141. package/build/esm/AudioAnalysis/extractAudioData.js +14 -0
  142. package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -0
  143. package/build/esm/AudioAnalysis/extractMelSpectrogram.js +86 -0
  144. package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  145. package/build/esm/AudioAnalysis/extractPreview.js +25 -0
  146. package/build/esm/AudioAnalysis/extractPreview.js.map +1 -0
  147. package/build/esm/AudioAnalysis/extractWaveform.js +11 -0
  148. package/build/esm/AudioAnalysis/extractWaveform.js.map +1 -0
  149. package/build/esm/AudioAnalysis/melSpectrogramWasm.js +111 -0
  150. package/build/esm/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
  151. package/build/esm/AudioDeviceManager.js +681 -0
  152. package/build/esm/AudioDeviceManager.js.map +1 -0
  153. package/build/esm/AudioRecorder.provider.js +40 -0
  154. package/build/esm/AudioRecorder.provider.js.map +1 -0
  155. package/build/esm/AudioStudio.native.js +6 -0
  156. package/build/esm/AudioStudio.native.js.map +1 -0
  157. package/build/esm/AudioStudio.types.js +8 -0
  158. package/build/esm/AudioStudio.types.js.map +1 -0
  159. package/build/esm/AudioStudio.web.js +704 -0
  160. package/build/esm/AudioStudio.web.js.map +1 -0
  161. package/build/esm/AudioStudioModule.js +713 -0
  162. package/build/esm/AudioStudioModule.js.map +1 -0
  163. package/build/esm/WebRecorder.web.js +861 -0
  164. package/build/esm/WebRecorder.web.js.map +1 -0
  165. package/build/esm/constants/platformLimitations.js +90 -0
  166. package/build/esm/constants/platformLimitations.js.map +1 -0
  167. package/build/esm/constants.js +17 -0
  168. package/build/esm/constants.js.map +1 -0
  169. package/build/esm/events.js +21 -0
  170. package/build/esm/events.js.map +1 -0
  171. package/build/esm/hooks/useAudioDevices.js +176 -0
  172. package/build/esm/hooks/useAudioDevices.js.map +1 -0
  173. package/build/esm/index.js +23 -0
  174. package/build/esm/index.js.map +1 -0
  175. package/build/esm/trimAudio.js +69 -0
  176. package/build/esm/trimAudio.js.map +1 -0
  177. package/build/esm/useAudioRecorder.js +529 -0
  178. package/build/esm/useAudioRecorder.js.map +1 -0
  179. package/build/esm/utils/BlobFix.js +498 -0
  180. package/build/esm/utils/BlobFix.js.map +1 -0
  181. package/build/esm/utils/audioProcessing.js +133 -0
  182. package/build/esm/utils/audioProcessing.js.map +1 -0
  183. package/build/esm/utils/cleanNativeOptions.js +19 -0
  184. package/build/esm/utils/cleanNativeOptions.js.map +1 -0
  185. package/build/esm/utils/concatenateBuffers.js +21 -0
  186. package/build/esm/utils/concatenateBuffers.js.map +1 -0
  187. package/build/esm/utils/convertPCMToFloat32.js +120 -0
  188. package/build/esm/utils/convertPCMToFloat32.js.map +1 -0
  189. package/build/esm/utils/crc32.js +50 -0
  190. package/build/esm/utils/crc32.js.map +1 -0
  191. package/build/esm/utils/encodingToBitDepth.js +13 -0
  192. package/build/esm/utils/encodingToBitDepth.js.map +1 -0
  193. package/build/esm/utils/getWavFileInfo.js +92 -0
  194. package/build/esm/utils/getWavFileInfo.js.map +1 -0
  195. package/build/esm/utils/writeWavHeader.js +84 -0
  196. package/build/esm/utils/writeWavHeader.js.map +1 -0
  197. package/build/esm/workers/InlineFeaturesExtractor.web.js +291 -0
  198. package/build/esm/workers/InlineFeaturesExtractor.web.js.map +1 -0
  199. package/build/esm/workers/inlineAudioWebWorker.web.js +187 -0
  200. package/build/esm/workers/inlineAudioWebWorker.web.js.map +1 -0
  201. package/build/esm/workers/wasmGlueString.web.js +24 -0
  202. package/build/esm/workers/wasmGlueString.web.js.map +1 -0
  203. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +198 -0
  204. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
  205. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +24 -0
  206. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -0
  207. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +74 -0
  208. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
  209. package/build/types/AudioAnalysis/extractAudioData.d.ts +3 -0
  210. package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -0
  211. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +20 -0
  212. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
  213. package/build/types/AudioAnalysis/extractPreview.d.ts +11 -0
  214. package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -0
  215. package/build/types/AudioAnalysis/extractWaveform.d.ts +8 -0
  216. package/build/types/AudioAnalysis/extractWaveform.d.ts.map +1 -0
  217. package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts +16 -0
  218. package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts.map +1 -0
  219. package/build/types/AudioDeviceManager.d.ts +187 -0
  220. package/build/types/AudioDeviceManager.d.ts.map +1 -0
  221. package/build/types/AudioRecorder.provider.d.ts +11 -0
  222. package/build/types/AudioRecorder.provider.d.ts.map +1 -0
  223. package/build/types/AudioStudio.native.d.ts +3 -0
  224. package/build/types/AudioStudio.native.d.ts.map +1 -0
  225. package/build/types/AudioStudio.types.d.ts +760 -0
  226. package/build/types/AudioStudio.types.d.ts.map +1 -0
  227. package/build/types/AudioStudio.web.d.ts +96 -0
  228. package/build/types/AudioStudio.web.d.ts.map +1 -0
  229. package/build/types/AudioStudioModule.d.ts +3 -0
  230. package/build/types/AudioStudioModule.d.ts.map +1 -0
  231. package/build/types/WebRecorder.web.d.ts +208 -0
  232. package/build/types/WebRecorder.web.d.ts.map +1 -0
  233. package/build/types/constants/platformLimitations.d.ts +40 -0
  234. package/build/types/constants/platformLimitations.d.ts.map +1 -0
  235. package/build/types/constants.d.ts +14 -0
  236. package/build/types/constants.d.ts.map +1 -0
  237. package/build/types/events.d.ts +29 -0
  238. package/build/types/events.d.ts.map +1 -0
  239. package/build/types/hooks/useAudioDevices.d.ts +15 -0
  240. package/build/types/hooks/useAudioDevices.d.ts.map +1 -0
  241. package/build/types/index.d.ts +21 -0
  242. package/build/types/index.d.ts.map +1 -0
  243. package/build/types/trimAudio.d.ts +25 -0
  244. package/build/types/trimAudio.d.ts.map +1 -0
  245. package/build/types/useAudioRecorder.d.ts +22 -0
  246. package/build/types/useAudioRecorder.d.ts.map +1 -0
  247. package/build/types/utils/BlobFix.d.ts +9 -0
  248. package/build/types/utils/BlobFix.d.ts.map +1 -0
  249. package/build/types/utils/audioProcessing.d.ts +24 -0
  250. package/build/types/utils/audioProcessing.d.ts.map +1 -0
  251. package/build/types/utils/cleanNativeOptions.d.ts +15 -0
  252. package/build/types/utils/cleanNativeOptions.d.ts.map +1 -0
  253. package/build/types/utils/concatenateBuffers.d.ts +8 -0
  254. package/build/types/utils/concatenateBuffers.d.ts.map +1 -0
  255. package/build/types/utils/convertPCMToFloat32.d.ts +13 -0
  256. package/build/types/utils/convertPCMToFloat32.d.ts.map +1 -0
  257. package/build/types/utils/crc32.d.ts +7 -0
  258. package/build/types/utils/crc32.d.ts.map +1 -0
  259. package/build/types/utils/encodingToBitDepth.d.ts +5 -0
  260. package/build/types/utils/encodingToBitDepth.d.ts.map +1 -0
  261. package/build/types/utils/getWavFileInfo.d.ts +26 -0
  262. package/build/types/utils/getWavFileInfo.d.ts.map +1 -0
  263. package/build/types/utils/writeWavHeader.d.ts +34 -0
  264. package/build/types/utils/writeWavHeader.d.ts.map +1 -0
  265. package/build/types/workers/InlineFeaturesExtractor.web.d.ts +2 -0
  266. package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
  267. package/build/types/workers/inlineAudioWebWorker.web.d.ts +2 -0
  268. package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
  269. package/build/types/workers/wasmGlueString.web.d.ts +2 -0
  270. package/build/types/workers/wasmGlueString.web.d.ts.map +1 -0
  271. package/cpp/AudioFeatures.cpp +274 -0
  272. package/cpp/AudioFeatures.h +85 -0
  273. package/cpp/AudioFeaturesBridge.cpp +146 -0
  274. package/cpp/AudioFeaturesBridge.h +47 -0
  275. package/cpp/MelSpectrogram.cpp +227 -0
  276. package/cpp/MelSpectrogram.h +82 -0
  277. package/cpp/MelSpectrogramBridge.cpp +112 -0
  278. package/cpp/MelSpectrogramBridge.h +33 -0
  279. package/cpp/kiss_fft/COPYING +11 -0
  280. package/cpp/kiss_fft/_kiss_fft_guts.h +167 -0
  281. package/cpp/kiss_fft/kiss_fft.c +424 -0
  282. package/cpp/kiss_fft/kiss_fft.h +160 -0
  283. package/cpp/kiss_fft/kiss_fft_log.h +36 -0
  284. package/cpp/kiss_fft/kiss_fftr.c +155 -0
  285. package/cpp/kiss_fft/kiss_fftr.h +54 -0
  286. package/expo-module.config.json +10 -0
  287. package/ios/AudioAnalysisData.swift +74 -0
  288. package/ios/AudioDeviceManager.swift +670 -0
  289. package/ios/AudioFeaturesWrapper.h +21 -0
  290. package/ios/AudioFeaturesWrapper.mm +63 -0
  291. package/ios/AudioNotificationManager.swift +154 -0
  292. package/ios/AudioProcessingHelpers.swift +797 -0
  293. package/ios/AudioProcessor.swift +1191 -0
  294. package/ios/AudioStreamError.swift +7 -0
  295. package/ios/AudioStreamManager.swift +2369 -0
  296. package/ios/AudioStreamManagerDelegate.swift +16 -0
  297. package/ios/AudioStudio.podspec +39 -0
  298. package/ios/AudioStudioModule.swift +1111 -0
  299. package/ios/AudioStudioTests/AudioFileHandlerTests.swift +338 -0
  300. package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +331 -0
  301. package/ios/AudioStudioTests/AudioTestHelpers.swift +130 -0
  302. package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +294 -0
  303. package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +105 -0
  304. package/ios/AudioStudioTests/Info.plist +22 -0
  305. package/ios/AudioStudioTests/README.md +39 -0
  306. package/ios/AudioStudioTests/SimpleAudioTest.swift +98 -0
  307. package/ios/AudioStudioTests/TestAudioGenerator.swift +75 -0
  308. package/ios/DataPoint.swift +54 -0
  309. package/ios/DecodingConfig.swift +59 -0
  310. package/ios/FFT.swift +62 -0
  311. package/ios/Features.swift +95 -0
  312. package/ios/ISSUE_IOS.md +68 -0
  313. package/ios/Logger.swift +39 -0
  314. package/ios/MelSpectrogramWrapper.h +30 -0
  315. package/ios/MelSpectrogramWrapper.mm +97 -0
  316. package/ios/NotificationExtension.swift +15 -0
  317. package/ios/RecordingResult.swift +22 -0
  318. package/ios/RecordingSettings.swift +311 -0
  319. package/ios/WaveformExtractor.swift +105 -0
  320. package/ios/tests/README.md +41 -0
  321. package/ios/tests/integration/buffer_and_fallback_test.swift +178 -0
  322. package/ios/tests/integration/buffer_duration_test.swift +185 -0
  323. package/ios/tests/integration/compressed_only_output_test.swift +271 -0
  324. package/ios/tests/integration/output_control_test.swift +322 -0
  325. package/ios/tests/integration/run_integration_tests.sh +37 -0
  326. package/ios/tests/opus_support_test_macos.swift +154 -0
  327. package/ios/tests/standalone/audio_processing_test.swift +144 -0
  328. package/ios/tests/standalone/audio_recording_test.swift +277 -0
  329. package/ios/tests/standalone/audio_streaming_test.swift +249 -0
  330. package/ios/tests/standalone/standalone_test.swift +144 -0
  331. package/package.json +146 -0
  332. package/plugin/build/index.cjs +194 -0
  333. package/plugin/build/index.d.cts +22 -0
  334. package/plugin/build/index.js +194 -0
  335. package/plugin/src/index.ts +285 -0
  336. package/plugin/tsconfig.json +10 -0
  337. package/plugin/tsconfig.tsbuildinfo +1 -0
  338. package/prebuilt/wasm/mel-spectrogram.js +18 -0
  339. package/src/AudioAnalysis/AudioAnalysis.types.ts +226 -0
  340. package/src/AudioAnalysis/audio-features-wasm.d.ts +37 -0
  341. package/src/AudioAnalysis/audioFeaturesWasm.ts +200 -0
  342. package/src/AudioAnalysis/extractAudioAnalysis.ts +350 -0
  343. package/src/AudioAnalysis/extractAudioData.ts +17 -0
  344. package/src/AudioAnalysis/extractMelSpectrogram.ts +140 -0
  345. package/src/AudioAnalysis/extractPreview.ts +34 -0
  346. package/src/AudioAnalysis/extractWaveform.ts +22 -0
  347. package/src/AudioAnalysis/mel-spectrogram-wasm.d.ts +48 -0
  348. package/src/AudioAnalysis/melSpectrogramWasm.ts +179 -0
  349. package/src/AudioDeviceManager.ts +800 -0
  350. package/src/AudioRecorder.provider.tsx +57 -0
  351. package/src/AudioStudio.native.ts +6 -0
  352. package/src/AudioStudio.types.ts +899 -0
  353. package/src/AudioStudio.web.ts +911 -0
  354. package/src/AudioStudioModule.ts +984 -0
  355. package/src/WebRecorder.web.ts +1114 -0
  356. package/src/constants/platformLimitations.ts +118 -0
  357. package/src/constants.ts +21 -0
  358. package/src/events.ts +63 -0
  359. package/src/hooks/useAudioDevices.ts +213 -0
  360. package/src/index.ts +67 -0
  361. package/src/trimAudio.ts +94 -0
  362. package/src/types/crc-32.d.ts +9 -0
  363. package/src/useAudioRecorder.tsx +784 -0
  364. package/src/utils/BlobFix.ts +561 -0
  365. package/src/utils/audioProcessing.ts +205 -0
  366. package/src/utils/cleanNativeOptions.ts +18 -0
  367. package/src/utils/concatenateBuffers.ts +24 -0
  368. package/src/utils/convertPCMToFloat32.ts +170 -0
  369. package/src/utils/crc32.ts +59 -0
  370. package/src/utils/encodingToBitDepth.ts +18 -0
  371. package/src/utils/getWavFileInfo.ts +132 -0
  372. package/src/utils/writeWavHeader.ts +115 -0
  373. package/src/workers/InlineFeaturesExtractor.web.tsx +291 -0
  374. package/src/workers/inlineAudioWebWorker.web.tsx +186 -0
  375. package/src/workers/wasmGlueString.web.ts +23 -0
@@ -0,0 +1,1191 @@
1
+ // packages/audio-studio/ios/AudioProcessor.swift
2
+
3
+ import Foundation
4
+ import Accelerate
5
+ import AVFoundation
6
+ import QuartzCore
7
+
8
+ // Constants
9
+ private let SILENCE_THRESHOLD_RMS: Float = 0.01
10
+
11
+ public struct TrimResult {
12
+ let uri: String
13
+ let filename: String
14
+ let durationMs: Double
15
+ let size: Int64
16
+ let sampleRate: Int
17
+ let channels: Int
18
+ let bitDepth: Int
19
+ let mimeType: String
20
+ let requestedFormat: String
21
+ let actualFormat: String
22
+ let compression: [String: Any]?
23
+
24
+ init(
25
+ uri: String,
26
+ filename: String,
27
+ durationMs: Double,
28
+ size: Int64,
29
+ sampleRate: Int,
30
+ channels: Int,
31
+ bitDepth: Int,
32
+ mimeType: String,
33
+ requestedFormat: String,
34
+ actualFormat: String,
35
+ compression: [String: Any]?
36
+ ) {
37
+ self.uri = uri
38
+ self.filename = filename
39
+ self.durationMs = durationMs
40
+ self.size = size
41
+ self.sampleRate = sampleRate
42
+ self.channels = channels
43
+ self.bitDepth = bitDepth
44
+ self.mimeType = mimeType
45
+ self.requestedFormat = requestedFormat
46
+ self.actualFormat = actualFormat
47
+ self.compression = compression
48
+ }
49
+
50
+ func toDictionary() -> [String: Any] {
51
+ var dict: [String: Any] = [
52
+ "uri": uri,
53
+ "filename": filename,
54
+ "durationMs": durationMs,
55
+ "size": size,
56
+ "sampleRate": sampleRate,
57
+ "channels": channels,
58
+ "bitDepth": bitDepth,
59
+ "mimeType": mimeType,
60
+ "requestedFormat": requestedFormat,
61
+ "actualFormat": actualFormat
62
+ ]
63
+ if let compression = compression {
64
+ dict["compression"] = compression
65
+ }
66
+ return dict
67
+ }
68
+ }
69
+
70
+ public class AudioProcessor {
71
+ public private(set) var audioFile: AVAudioFile?
72
+ private var result: (Any) -> Void
73
+ private var reject: (String, String) -> Void
74
+ private var waveformData = Array<Float>()
75
+ private var progress: Float = 0.0
76
+ private var channelCount: Int = 1
77
+ private var currentProgress: Float = 0.0
78
+ private let extractionQueue = DispatchQueue(label: "AudioProcessor", attributes: .concurrent)
79
+ private var _abortExtraction: Bool = false
80
+
81
+ // Add a counter for unique IDs
82
+ private var uniqueIdCounter = 0
83
+
84
+ public var abortExtraction: Bool {
85
+ get { _abortExtraction }
86
+ set { _abortExtraction = newValue }
87
+ }
88
+
89
+ // Initializer for file-based processing
90
+ public init(url: URL, resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) throws {
91
+ self.audioFile = try AVAudioFile(forReading: url)
92
+ self.result = resolve
93
+ self.reject = reject
94
+ }
95
+
96
+ // Initializer for buffer-based processing
97
+ public init(resolve: @escaping (Any) -> Void, reject: @escaping (String, String) -> Void) {
98
+ self.result = resolve
99
+ self.reject = reject
100
+ }
101
+
102
+
103
+ deinit {
104
+ audioFile = nil
105
+ }
106
+
107
+ /// Error types for AudioProcessor
108
+ public enum AudioProcessorError: Error {
109
+ case fileInitializationFailed(String)
110
+ case bufferCreationFailed
111
+ case audioReadError(String)
112
+ }
113
+
114
+
115
+ /// Extracts and processes audio data from the audio file.
116
+ /// - Parameters:
117
+ /// - numberOfSamples: The number of samples to extract (for waveform).
118
+ /// - offset: The offset to start reading from (in samples).
119
+ /// - length: The length of the audio to read (in samples).
120
+ /// - segmentDurationMs: The duration of each segment in milliseconds.
121
+ /// - featureOptions: The features to extract.
122
+ /// - bitDepth: The bit depth of the audio data.
123
+ /// - numberOfChannels: The number of channels in the audio data.
124
+ /// - position: The position to start reading from (in bytes).
125
+ /// - byteLength: The length of the audio to read (in bytes).
126
+ /// - Returns: An `AudioAnalysisData` object containing the extracted features.
127
+ public func processAudioData(
128
+ numberOfSamples: Int?,
129
+ offset: Int? = 0,
130
+ length: UInt? = nil,
131
+ segmentDurationMs: Int = 100, // Default 100ms
132
+ featureOptions: [String: Bool],
133
+ bitDepth: Int,
134
+ numberOfChannels: Int,
135
+ position: Int? = nil,
136
+ byteLength: Int? = nil
137
+ ) -> AudioAnalysisData? {
138
+ guard let audioFile = audioFile else {
139
+ reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
140
+ return nil
141
+ }
142
+
143
+ let totalFrameCount = AVAudioFrameCount(audioFile.length)
144
+ var framesPerBuffer: AVAudioFrameCount
145
+ let _: Int // Changed from actualPointsPerSecond
146
+
147
+ NSLog("""
148
+ [AudioProcessor] Starting audio processing:
149
+ - totalFrameCount: \(totalFrameCount)
150
+ - bitDepth: \(bitDepth)
151
+ - numberOfChannels: \(numberOfChannels)
152
+ - position: \(position ?? -1)
153
+ - byteLength: \(byteLength ?? -1)
154
+ - offset: \(offset ?? -1)
155
+ - length: \(length ?? 0)
156
+ """)
157
+
158
+ // Use position/byteLength if provided, otherwise fall back to offset/length
159
+ let effectiveOffset: Int64 = if let position = position {
160
+ Int64(position / (bitDepth / 8) / numberOfChannels)
161
+ } else {
162
+ Int64(offset ?? 0)
163
+ }
164
+
165
+ let effectiveLength: Int64 = if let byteLength = byteLength {
166
+ Int64(byteLength / (bitDepth / 8) / numberOfChannels)
167
+ } else if let length = length {
168
+ Int64(length)
169
+ } else {
170
+ Int64(totalFrameCount) - effectiveOffset
171
+ }
172
+
173
+ NSLog("""
174
+ [AudioProcessor] Calculated frame positions:
175
+ - effectiveOffset: \(effectiveOffset)
176
+ - effectiveLength: \(effectiveLength)
177
+ - expectedEndFrame: \(effectiveOffset + effectiveLength)
178
+ - totalFrameCount: \(totalFrameCount)
179
+ """)
180
+
181
+ // Validate frame boundaries
182
+ if effectiveOffset < 0 || effectiveOffset >= Int64(totalFrameCount) {
183
+ NSLog("[AudioProcessor] ERROR: Invalid offset value")
184
+ reject("INVALID_OFFSET", "Offset value (\(effectiveOffset)) is outside valid range [0, \(totalFrameCount)]")
185
+ return nil
186
+ }
187
+
188
+ if effectiveLength <= 0 {
189
+ NSLog("[AudioProcessor] ERROR: Invalid length value")
190
+ reject("INVALID_LENGTH", "Length value (\(effectiveLength)) must be positive")
191
+ return nil
192
+ }
193
+
194
+ if effectiveOffset + effectiveLength > Int64(totalFrameCount) {
195
+ NSLog("[AudioProcessor] ERROR: Requested range exceeds file length")
196
+ reject("INVALID_RANGE", "Requested range [\(effectiveOffset), \(effectiveOffset + effectiveLength)] exceeds file length \(totalFrameCount)")
197
+ return nil
198
+ }
199
+
200
+ var startFrame: AVAudioFramePosition = effectiveOffset
201
+ let endFrame: AVAudioFramePosition = effectiveOffset + effectiveLength
202
+
203
+ // Calculate frames per segment based on segment duration
204
+ let framesPerSegment = AVAudioFrameCount(Float(audioFile.fileFormat.sampleRate) * Float(segmentDurationMs) / 1000.0)
205
+
206
+ if let numberOfSamples = numberOfSamples {
207
+ framesPerBuffer = AVAudioFrameCount(max(1, effectiveLength / Int64(numberOfSamples)))
208
+ } else {
209
+ framesPerBuffer = framesPerSegment
210
+ }
211
+
212
+ guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
213
+ reject("BUFFER_CREATION_FAILED", "Failed to create AVAudioPCMBuffer.")
214
+ return nil
215
+ }
216
+
217
+ channelCount = Int(audioFile.processingFormat.channelCount)
218
+ let _ = Array(repeating: [Float](repeating: 0, count: Int(framesPerBuffer)), count: channelCount) // Changed from var data
219
+
220
+ var channelData = [Float]()
221
+ while startFrame < endFrame {
222
+ let remainingFrames = endFrame - startFrame
223
+ let currentFramesPerBuffer = min(AVAudioFrameCount(framesPerBuffer), AVAudioFrameCount(remainingFrames))
224
+
225
+ if currentFramesPerBuffer <= 0 {
226
+ break
227
+ }
228
+
229
+ if abortExtraction {
230
+ audioFile.framePosition = startFrame
231
+ abortExtraction = false
232
+ return nil
233
+ }
234
+
235
+ do {
236
+ audioFile.framePosition = startFrame
237
+ try audioFile.read(into: buffer, frameCount: currentFramesPerBuffer)
238
+ } catch {
239
+ reject("AUDIO_READ_ERROR", "Couldn't read into buffer: \(error.localizedDescription)")
240
+ return nil
241
+ }
242
+
243
+ //TODO: check if we need conversion based on bitDepth here
244
+ guard let floatData = buffer.floatChannelData else {
245
+ reject("BUFFER_DATA_ERROR", "Failed to retrieve float data from buffer.")
246
+ return nil
247
+ }
248
+ for frame in 0..<Int(buffer.frameLength) {
249
+ channelData.append(floatData[0][frame])
250
+ }
251
+
252
+ startFrame += AVAudioFramePosition(currentFramesPerBuffer)
253
+ }
254
+
255
+ NSLog("""
256
+ [AudioProcessor] Audio processing completed:
257
+ - processedFrames: \(endFrame - startFrame)
258
+ - framesPerBuffer: \(framesPerBuffer)
259
+ """)
260
+
261
+ return processChannelData(
262
+ channelData: channelData,
263
+ sampleRate: Float(audioFile.fileFormat.sampleRate),
264
+ segmentDurationMs: segmentDurationMs,
265
+ featureOptions: featureOptions,
266
+ bitDepth: bitDepth,
267
+ numberOfChannels: numberOfChannels
268
+ )
269
+ }
270
+
271
+ /// Processes audio data from a buffer.
272
+ /// - Parameters:
273
+ /// - data: The audio data buffer.
274
+ /// - sampleRate: The sample rate of the audio data.
275
+ /// - segmentDurationMs: The duration of each segment in milliseconds.
276
+ /// - featureOptions: The features to extract.
277
+ /// - bitDepth: The bit depth of the audio data.
278
+ /// - numberOfChannels: The number of channels in the audio data.
279
+ /// - Returns: An `AudioAnalysisData` object containing the extracted features.
280
+ public func processAudioBuffer(
281
+ data: Data,
282
+ sampleRate: Float,
283
+ segmentDurationMs: Int,
284
+ featureOptions: [String: Bool],
285
+ bitDepth: Int,
286
+ numberOfChannels: Int
287
+ ) -> AudioAnalysisData? {
288
+ guard !data.isEmpty else {
289
+ Logger.debug("AudioProcessor", "Data is empty, rejecting")
290
+ reject("DATA_EMPTY", "The audio data is empty.")
291
+ return nil
292
+ }
293
+
294
+ // Convert Data to Float array based on bit depth
295
+ let floatData: [Float]
296
+ switch bitDepth {
297
+ case 16:
298
+ floatData = data.withUnsafeBytes { bufferPointer in
299
+ let int16Pointer = bufferPointer.bindMemory(to: Int16.self)
300
+ return int16Pointer.map { Float($0) / Float(Int16.max) }
301
+ }
302
+ case 32:
303
+ floatData = data.withUnsafeBytes { bufferPointer in
304
+ let int32Pointer = bufferPointer.bindMemory(to: Int32.self)
305
+ return int32Pointer.map { Float($0) / Float(Int32.max) }
306
+ }
307
+ default:
308
+ Logger.debug("AudioProcessor", "Unsupported bit depth. Rejecting")
309
+ reject("UNSUPPORTED_BIT_DEPTH", "Unsupported bit depth: \(bitDepth)")
310
+ return nil
311
+ }
312
+
313
+ return processChannelData(
314
+ channelData: floatData,
315
+ sampleRate: sampleRate,
316
+ segmentDurationMs: segmentDurationMs,
317
+ featureOptions: featureOptions,
318
+ bitDepth: bitDepth,
319
+ numberOfChannels: numberOfChannels
320
+ )
321
+ }
322
+
323
+ /// Processes the given audio channel data to extract features.
324
+ /// - Parameters:
325
+ /// - channelData: The audio channel data to process.
326
+ /// - sampleRate: The sample rate of the audio data.
327
+ /// - segmentDurationMs: The duration of each segment in milliseconds.
328
+ /// - featureOptions: The features to extract.
329
+ /// - bitDepth: The bit depth of the audio data.
330
+ /// - numberOfChannels: The number of channels in the audio data.
331
+ /// - Returns: An `AudioAnalysisData` object containing the extracted features.
332
+ private func processChannelData(
333
+ channelData: [Float],
334
+ sampleRate: Float,
335
+ segmentDurationMs: Int,
336
+ featureOptions: [String: Bool],
337
+ bitDepth: Int,
338
+ numberOfChannels: Int
339
+ ) -> AudioAnalysisData? {
340
+ Logger.debug("AudioProcessor", "Processing audio data with sample rate: \(sampleRate), segmentDurationMs: \(segmentDurationMs), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
341
+
342
+ let startTime = CACurrentMediaTime()
343
+
344
+ let length = channelData.count
345
+ // Calculate points per segment based on segment duration
346
+ let samplesPerSegment = Int(Float(segmentDurationMs) * sampleRate / 1000.0)
347
+ var dataPoints = [DataPoint]()
348
+ var minAmplitude: Float = .greatestFiniteMagnitude
349
+ var maxAmplitude: Float = -.greatestFiniteMagnitude
350
+
351
+ // Calculate bytes per sample
352
+ let bytesPerSample = bitDepth / 8
353
+
354
+ // Process data in segments
355
+ var i = 0
356
+ while i < length {
357
+ let segmentEnd = min(i + samplesPerSegment, length)
358
+ let segment = Array(channelData[i..<segmentEnd])
359
+
360
+ // Calculate byte positions and timing
361
+ let startPosition = i * bytesPerSample * numberOfChannels
362
+ let endPosition = segmentEnd * bytesPerSample * numberOfChannels
363
+ let startTime = Float(i) / sampleRate
364
+ let endTime = Float(segmentEnd) / sampleRate
365
+
366
+ // Process segment and create data point
367
+ let dataPoint = processSegment(
368
+ segment,
369
+ sampleRate: sampleRate,
370
+ featureOptions: featureOptions,
371
+ startTime: startTime,
372
+ endTime: endTime,
373
+ startPosition: startPosition,
374
+ endPosition: endPosition
375
+ )
376
+ dataPoints.append(dataPoint)
377
+
378
+ // Update min/max amplitudes
379
+ minAmplitude = min(minAmplitude, segment.min() ?? minAmplitude)
380
+ maxAmplitude = max(maxAmplitude, segment.max() ?? maxAmplitude)
381
+
382
+ i += samplesPerSegment
383
+ }
384
+
385
+ let endTime = CACurrentMediaTime()
386
+ let processingTimeMs = Float((endTime - startTime) * 1000)
387
+
388
+ Logger.debug("AudioProcessor", "Processed \(dataPoints.count) data points in \(processingTimeMs) ms")
389
+
390
+ return AudioAnalysisData(
391
+ segmentDurationMs: segmentDurationMs,
392
+ durationMs: Int(Float(length) / sampleRate * 1000),
393
+ bitDepth: bitDepth,
394
+ numberOfChannels: numberOfChannels,
395
+ sampleRate: Int(sampleRate),
396
+ samples: length,
397
+ dataPoints: dataPoints,
398
+ amplitudeRange: AudioAnalysisData.AmplitudeRange(
399
+ min: minAmplitude,
400
+ max: maxAmplitude
401
+ ),
402
+ rmsRange: AudioAnalysisData.AmplitudeRange(
403
+ min: 0,
404
+ max: 1
405
+ ),
406
+ speechAnalysis: nil,
407
+ extractionTimeMs: processingTimeMs
408
+ )
409
+ }
410
+
411
+ private func processSegment(
412
+ _ segment: [Float],
413
+ sampleRate: Float,
414
+ featureOptions: [String: Bool],
415
+ startTime: Float,
416
+ endTime: Float,
417
+ startPosition: Int,
418
+ endPosition: Int
419
+ ) -> DataPoint {
420
+ let sumSquares: Float = segment.reduce(0) { $0 + $1 * $1 }
421
+ let rms = sqrt(sumSquares / Float(segment.count))
422
+ let silent = rms < SILENCE_THRESHOLD_RMS
423
+ let dB = Float(20 * log10(Double(rms)))
424
+
425
+ let features = computeFeatures(
426
+ segmentData: segment,
427
+ sampleRate: sampleRate,
428
+ sumSquares: sumSquares,
429
+ zeroCrossings: 0,
430
+ segmentLength: segment.count,
431
+ featureOptions: featureOptions
432
+ )
433
+
434
+
435
+ let dataPoint = DataPoint(
436
+ id: Int(uniqueIdCounter),
437
+ amplitude: segment.max() ?? 0,
438
+ rms: rms,
439
+ dB: dB,
440
+ silent: silent,
441
+ features: features,
442
+ speech: SpeechFeatures(isActive: !silent),
443
+ startTime: startTime,
444
+ endTime: endTime,
445
+ startPosition: startPosition,
446
+ endPosition: endPosition,
447
+ samples: segment.count
448
+ )
449
+ uniqueIdCounter += 1
450
+ return dataPoint
451
+ }
452
+
453
+ private func computeFeatures(
454
+ segmentData: [Float],
455
+ sampleRate: Float,
456
+ sumSquares: Float,
457
+ zeroCrossings: Int,
458
+ segmentLength: Int,
459
+ featureOptions: [String: Bool]
460
+ ) -> Features {
461
+ let rms = sqrt(sumSquares / Float(segmentLength))
462
+ let energy = featureOptions["energy"] == true ? sumSquares : 0
463
+ let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
464
+
465
+ // Determine which C++ features are needed
466
+ let needSpectral = featureOptions["spectralCentroid"] == true ||
467
+ featureOptions["spectralFlatness"] == true ||
468
+ featureOptions["spectralRolloff"] == true ||
469
+ featureOptions["spectralBandwidth"] == true
470
+ let needMfcc = featureOptions["mfcc"] == true
471
+ let needChroma = featureOptions["chromagram"] == true
472
+
473
+ // Single C++ call for all FFT-based features
474
+ var spectralCentroid: Float = 0
475
+ var spectralFlatness: Float = 0
476
+ var spectralRolloff: Float = 0
477
+ var spectralBandwidth: Float = 0
478
+ var mfcc: [Float] = []
479
+ var chromagram: [Float] = []
480
+
481
+ if needSpectral || needMfcc || needChroma {
482
+ let cppResult = segmentData.withUnsafeBufferPointer { bufPtr in
483
+ AudioFeaturesWrapper.computeFrame(
484
+ withSamples: bufPtr.baseAddress,
485
+ numSamples: Int32(segmentData.count),
486
+ sampleRate: Int32(sampleRate),
487
+ fftLength: 1024,
488
+ nMfcc: 13,
489
+ nMelFilters: 26,
490
+ computeMfcc: needMfcc,
491
+ computeChroma: needChroma
492
+ )
493
+ }
494
+ if let result = cppResult {
495
+ if needSpectral {
496
+ spectralCentroid = (result["spectralCentroid"] as? NSNumber)?.floatValue ?? 0
497
+ spectralFlatness = (result["spectralFlatness"] as? NSNumber)?.floatValue ?? 0
498
+ spectralRolloff = (result["spectralRolloff"] as? NSNumber)?.floatValue ?? 0
499
+ spectralBandwidth = (result["spectralBandwidth"] as? NSNumber)?.floatValue ?? 0
500
+ }
501
+ if needMfcc {
502
+ mfcc = (result["mfcc"] as? [NSNumber])?.map { $0.floatValue } ?? []
503
+ }
504
+ if needChroma {
505
+ chromagram = (result["chromagram"] as? [NSNumber])?.map { $0.floatValue } ?? []
506
+ }
507
+ }
508
+ }
509
+
510
+ let tempo = featureOptions["tempo"] == true ? extractTempo(from: segmentData, sampleRate: sampleRate) : 0
511
+ let hnr = featureOptions["hnr"] == true ? extractHNR(from: segmentData) : 0
512
+ let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
513
+ let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
514
+ let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
515
+ let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : 0
516
+
517
+ // Calculate min and max amplitudes from the segment data
518
+ let minAmplitude = segmentData.map(abs).min() ?? 0
519
+ let maxAmplitude = segmentData.map(abs).max() ?? 0
520
+
521
+ let crc32Value = featureOptions["crc32"] == true ?
522
+ calculateCRC32(from: segmentData, count: segmentData.count) : nil
523
+
524
+ return Features(
525
+ energy: energy,
526
+ mfcc: mfcc,
527
+ rms: rms,
528
+ minAmplitude: minAmplitude,
529
+ maxAmplitude: maxAmplitude,
530
+ zcr: zcr,
531
+ spectralCentroid: spectralCentroid,
532
+ spectralFlatness: spectralFlatness,
533
+ spectralRolloff: spectralRolloff,
534
+ spectralBandwidth: spectralBandwidth,
535
+ chromagram: chromagram,
536
+ tempo: tempo,
537
+ hnr: hnr,
538
+ melSpectrogram: melSpectrogram,
539
+ spectralContrast: spectralContrast,
540
+ tonnetz: tonnetz,
541
+ pitch: pitch,
542
+ crc32: crc32Value
543
+ )
544
+ }
545
+
546
+ /// Processes audio data with time range support
547
+ public func processAudioData(
548
+ startTimeMs: Double? = nil,
549
+ endTimeMs: Double? = nil,
550
+ segmentDurationMs: Int = 100, // Default 100ms
551
+ featureOptions: [String: Bool]
552
+ ) -> AudioAnalysisData? {
553
+ guard let audioFile = audioFile else {
554
+ Logger.debug("AudioProcessor", "No audio file loaded")
555
+ return nil
556
+ }
557
+
558
+ let startTime = CACurrentMediaTime()
559
+ let sampleRate = Float(audioFile.fileFormat.sampleRate)
560
+ let _ = AVAudioFrameCount(audioFile.length) // Changed from totalFrameCount
561
+ let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
562
+ let numberOfChannels = Int(audioFile.fileFormat.channelCount)
563
+
564
+ // Convert time to frames
565
+ let startFrame = startTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? 0
566
+ let endFrame = endTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? audioFile.length
567
+
568
+ // Validate frame range
569
+ guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
570
+ Logger.debug("AudioProcessor", "Invalid time range")
571
+ return nil
572
+ }
573
+
574
+ // Calculate frames per buffer based on segment duration
575
+ let framesPerBuffer = AVAudioFrameCount(Float(sampleRate) * Float(segmentDurationMs) / 1000.0)
576
+
577
+ guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
578
+ Logger.debug("AudioProcessor", "Failed to create buffer")
579
+ return nil
580
+ }
581
+
582
+ var dataPoints: [DataPoint] = []
583
+ var minAmplitude: Float = .greatestFiniteMagnitude
584
+ var maxAmplitude: Float = -.greatestFiniteMagnitude
585
+ var currentId = 0
586
+
587
+ audioFile.framePosition = startFrame
588
+ var currentFrame = startFrame
589
+
590
+ while currentFrame < endFrame {
591
+ let framesToRead = min(framesPerBuffer, AVAudioFrameCount(endFrame - currentFrame))
592
+
593
+ do {
594
+ try audioFile.read(into: buffer, frameCount: framesToRead)
595
+
596
+ guard let channelData = buffer.floatChannelData else {
597
+ continue
598
+ }
599
+
600
+ // Process each channel's data
601
+ var summedData = [Float](repeating: 0, count: Int(framesToRead))
602
+ for channel in 0..<numberOfChannels {
603
+ let channelBuffer = UnsafeBufferPointer(start: channelData[channel], count: Int(framesToRead))
604
+ for (index, sample) in channelBuffer.enumerated() {
605
+ summedData[index] += sample
606
+ }
607
+ }
608
+
609
+ // Average across channels
610
+ for i in 0..<summedData.count {
611
+ summedData[i] /= Float(numberOfChannels)
612
+ }
613
+
614
+ // Calculate both peak amplitude and RMS
615
+ var localMax: Float = 0
616
+ var rms: Float = 0
617
+ vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
618
+
619
+ // Calculate RMS using vDSP
620
+ var meanSquare: Float = 0
621
+ vDSP_measqv(summedData, 1, &meanSquare, vDSP_Length(framesToRead))
622
+ rms = sqrt(meanSquare)
623
+
624
+ minAmplitude = min(minAmplitude, localMax)
625
+ maxAmplitude = max(maxAmplitude, localMax)
626
+
627
+ // Create data point
628
+ let startTime = Float(currentFrame) / Float(sampleRate)
629
+ let endTime = Float(currentFrame + Int64(framesToRead)) / Float(sampleRate)
630
+
631
+ let dataPoint = DataPoint(
632
+ id: currentId,
633
+ amplitude: localMax, // Always use peak amplitude
634
+ rms: rms, // Use calculated RMS value
635
+ dB: Float(20 * log10(Double(rms))), // Use RMS for dB calculation
636
+ silent: rms < SILENCE_THRESHOLD_RMS, // Use RMS for silence detection
637
+ features: computeFeatures(
638
+ segmentData: Array(summedData[0..<Int(framesToRead)]), // Fixed dangling pointer
639
+ sampleRate: sampleRate,
640
+ sumSquares: rms * rms,
641
+ zeroCrossings: 0,
642
+ segmentLength: Int(framesToRead),
643
+ featureOptions: featureOptions
644
+ ),
645
+ speech: SpeechFeatures(isActive: rms >= SILENCE_THRESHOLD_RMS),
646
+ startTime: startTime,
647
+ endTime: endTime,
648
+ startPosition: Int(currentFrame),
649
+ endPosition: Int(currentFrame + Int64(framesToRead)),
650
+ samples: Int(framesToRead)
651
+ )
652
+
653
+ dataPoints.append(dataPoint)
654
+ currentId += 1
655
+ } catch {
656
+ Logger.debug("AudioProcessor", "Error reading audio data: \(error)")
657
+ return nil
658
+ }
659
+
660
+ currentFrame += Int64(framesToRead)
661
+ }
662
+
663
+ let endTime = CACurrentMediaTime()
664
+ let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
665
+
666
+ return AudioAnalysisData(
667
+ segmentDurationMs: segmentDurationMs,
668
+ durationMs: Int(Float(endFrame - startFrame) * 1000 / sampleRate),
669
+ bitDepth: bitDepth,
670
+ numberOfChannels: numberOfChannels,
671
+ sampleRate: Int(sampleRate),
672
+ samples: Int(endFrame - startFrame),
673
+ dataPoints: dataPoints,
674
+ amplitudeRange: AudioAnalysisData.AmplitudeRange(
675
+ min: minAmplitude,
676
+ max: maxAmplitude
677
+ ),
678
+ rmsRange: AudioAnalysisData.AmplitudeRange(
679
+ min: 0,
680
+ max: 1
681
+ ),
682
+ speechAnalysis: nil,
683
+ extractionTimeMs: extractionTime
684
+ )
685
+ }
686
+
687
+ /// Trims audio file to specified range
688
+ public func trimAudio(
689
+ mode: String,
690
+ startTimeMs: Double?,
691
+ endTimeMs: Double?,
692
+ ranges: [[String: Double]]?,
693
+ outputFileName: String?,
694
+ outputFormat: [String: Any]?,
695
+ decodingOptions: [String: Any]?,
696
+ progressCallback: ((Float, Int64, Int64) -> Void)? = nil
697
+ ) -> TrimResult? {
698
+ // Log the input parameters
699
+ Logger.debug("AudioProcessor", "Starting audio trim operation:")
700
+ Logger.debug("AudioProcessor", "- Mode: \(mode)")
701
+ if let start = startTimeMs, let end = endTimeMs {
702
+ Logger.debug("AudioProcessor", "- Time range: \(start)ms to \(end)ms")
703
+ }
704
+ if let ranges = ranges {
705
+ Logger.debug("AudioProcessor", "- Ranges count: \(ranges.count)")
706
+ }
707
+
708
+ // Log output format details
709
+ if let format = outputFormat {
710
+ let formatType = format["format"] as? String ?? "unknown"
711
+ let bitrate = format["bitrate"] as? Int ?? 0
712
+ Logger.debug("AudioProcessor", "- Output format: \(formatType), bitrate: \(bitrate)")
713
+ }
714
+
715
+ guard let audioFile = audioFile else { return nil }
716
+
717
+ let inputFormat = audioFile.processingFormat
718
+ let inputSampleRate = inputFormat.sampleRate
719
+ let inputChannels = Int(inputFormat.channelCount)
720
+ let totalDurationMs = Double(audioFile.length) / inputSampleRate * 1000
721
+
722
+ // Compute ranges to keep
723
+ let keepRanges = computeKeepRanges(
724
+ mode: mode,
725
+ startTimeMs: startTimeMs,
726
+ endTimeMs: endTimeMs,
727
+ ranges: ranges,
728
+ totalDurationMs: totalDurationMs
729
+ )
730
+
731
+ guard !keepRanges.isEmpty else { return nil }
732
+
733
+ // Output format setup
734
+ let requestedFormat = outputFormat?["format"] as? String ?? "wav"
735
+ let validFormats = ["wav", "aac"]
736
+ let formatStr = validFormats.contains(requestedFormat.lowercased()) ? requestedFormat.lowercased() : "aac"
737
+
738
+ if formatStr != requestedFormat.lowercased() {
739
+ Logger.debug("AudioProcessor", "Unsupported format '\(requestedFormat)', falling back to 'aac'")
740
+ }
741
+
742
+ let targetSampleRate = outputFormat?["sampleRate"] as? Double ?? inputSampleRate
743
+ let targetChannels = outputFormat?["channels"] as? Int ?? inputChannels
744
+ let targetBitDepth = outputFormat?["bitDepth"] as? Int ?? 16
745
+ let bitrate = outputFormat?["bitrate"] as? Int ?? 128000
746
+
747
+ let fileExtension = formatStr == "wav" ? "wav" : "aac"
748
+ let outputURL = FileManager.default.temporaryDirectory
749
+ .appendingPathComponent(outputFileName ?? UUID().uuidString)
750
+ .appendingPathExtension(fileExtension)
751
+
752
+ let decodingConfig = DecodingConfig.fromDictionary(decodingOptions ?? [:])
753
+ let needFormatChange = decodingConfig.targetSampleRate != nil || decodingConfig.targetChannels != nil || decodingConfig.targetBitDepth != nil
754
+ let isWavInput = audioFile.fileFormat.settings[AVFormatIDKey] as? UInt32 == kAudioFormatLinearPCM
755
+
756
+ do {
757
+ if isWavInput && formatStr == "wav" && !needFormatChange {
758
+ // Fast path: WAV-to-WAV with no format changes
759
+ let outputFile = try AVAudioFile(forWriting: outputURL, settings: inputFormat.settings)
760
+ var totalFrames: Int64 = 0
761
+ for range in keepRanges {
762
+ // Break down complex expression
763
+ let startTimeInSeconds = range[0] / 1000
764
+ let endTimeInSeconds = range[1] / 1000
765
+ let startFramePosition = startTimeInSeconds * inputSampleRate
766
+ let endFramePosition = endTimeInSeconds * inputSampleRate
767
+ totalFrames += Int64(endFramePosition - startFramePosition)
768
+ }
769
+ var cumulativeFrames: Int64 = 0
770
+
771
+ for range in keepRanges {
772
+ // Break down complex expressions
773
+ let startTimeInSeconds = range[0] / 1000
774
+ let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
775
+
776
+ let endTimeInSeconds = range[1] / 1000
777
+ let endFramePosition = endTimeInSeconds * inputSampleRate
778
+ let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
779
+
780
+ let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
781
+ audioFile.framePosition = startFrame
782
+ try audioFile.read(into: buffer, frameCount: frameCount)
783
+ try outputFile.write(from: buffer)
784
+ cumulativeFrames += Int64(frameCount)
785
+ let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
786
+ progressCallback?(progress, Int64(frameCount) * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame), totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
787
+ }
788
+
789
+ // When creating the output file
790
+ Logger.debug("AudioProcessor", "Creating output file at: \(outputURL.path)")
791
+
792
+ // After processing is complete
793
+ Logger.debug("AudioProcessor", "Trim operation completed")
794
+ Logger.debug("AudioProcessor", "- Output file: \(outputURL.path)")
795
+ Logger.debug("AudioProcessor", "- File exists: \(FileManager.default.fileExists(atPath: outputURL.path))")
796
+ Logger.debug("AudioProcessor", "- File size: \((try? FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int64) ?? 0) bytes") // Fixed optional unwrapping
797
+ Logger.debug("AudioProcessor", "- File extension: \(outputURL.pathExtension)")
798
+
799
+ return createTrimResult(from: outputURL, keepRanges: keepRanges, formatStr: formatStr, sampleRate: Int(inputSampleRate), channels: inputChannels, bitDepth: 16, bitrate: bitrate)
800
+ } else {
801
+ // Non-fast path: Decode and re-encode
802
+ let targetFormat = AVAudioFormat(
803
+ commonFormat: .pcmFormatFloat32,
804
+ sampleRate: targetSampleRate,
805
+ channels: AVAudioChannelCount(targetChannels),
806
+ interleaved: false
807
+ )!
808
+
809
+ var totalFrames: Int64 = 0
810
+ for range in keepRanges {
811
+ // Break down complex expression
812
+ let startTimeInSeconds = range[0] / 1000
813
+ let endTimeInSeconds = range[1] / 1000
814
+ let startFramePosition = startTimeInSeconds * inputSampleRate
815
+ let endFramePosition = endTimeInSeconds * inputSampleRate
816
+ totalFrames += Int64(endFramePosition - startFramePosition)
817
+ }
818
+ var cumulativeFrames: Int64 = 0
819
+
820
+ if formatStr == "wav" {
821
+ let outputFile = try AVAudioFile(forWriting: outputURL, settings: [
822
+ AVFormatIDKey: kAudioFormatLinearPCM,
823
+ AVSampleRateKey: targetSampleRate,
824
+ AVNumberOfChannelsKey: targetChannels,
825
+ AVLinearPCMBitDepthKey: targetBitDepth,
826
+ AVLinearPCMIsFloatKey: false,
827
+ AVLinearPCMIsBigEndianKey: false
828
+ ])
829
+
830
+ for range in keepRanges {
831
+ // Break down complex expressions
832
+ let startTimeInSeconds = range[0] / 1000
833
+ let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
834
+
835
+ let endTimeInSeconds = range[1] / 1000
836
+ let endFramePosition = endTimeInSeconds * inputSampleRate
837
+ let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
838
+
839
+ let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
840
+ audioFile.framePosition = startFrame
841
+ try audioFile.read(into: buffer, frameCount: frameCount)
842
+ let converter = AVAudioConverter(from: inputFormat, to: targetFormat)!
843
+ let convertedBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
844
+ var error: NSError?
845
+ _ = converter.convert(to: convertedBuffer, error: &error) { inNumPackets, outStatus in
846
+ outStatus.pointee = .haveData
847
+ return buffer
848
+ }
849
+ if let error = error {
850
+ Logger.debug("AudioProcessor", "Format conversion failed: \(error.localizedDescription)")
851
+ Logger.debug("AudioProcessor", "Skipping this buffer")
852
+ continue
853
+ }
854
+ try outputFile.write(from: convertedBuffer)
855
+ cumulativeFrames += Int64(frameCount)
856
+ let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
857
+ progressCallback?(progress, 0, totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
858
+ }
859
+ return createTrimResult(from: outputURL, keepRanges: keepRanges, formatStr: formatStr, sampleRate: Int(targetSampleRate), channels: targetChannels, bitDepth: targetBitDepth, bitrate: bitrate)
860
+ } else {
861
+ // Use AAC instead of Opus (Opus support removed)
862
+ Logger.debug("AudioProcessor", "Using AAC format instead of requested \(formatStr)")
863
+
864
+ // Keep the existing AAC settings structure for consistency
865
+ let outputSettings: [String: Any] = [
866
+ AVFormatIDKey: kAudioFormatMPEG4AAC,
867
+ AVSampleRateKey: targetSampleRate,
868
+ AVNumberOfChannelsKey: targetChannels,
869
+ AVEncoderBitRateKey: bitrate,
870
+ AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
871
+ ]
872
+ let _ = AVFileType.m4a // Changed from fileType
873
+
874
+ // 4. Update container extension logic for when Opus was selected
875
+ let _ = "m4a" // Changed from tempFileExtension
876
+
877
+ // 5. Update the MIME type logic for AAC only
878
+ let _ = "audio/mp4" // Changed from mimeType
879
+
880
+ let outputFile = try AVAudioFile(forWriting: outputURL, settings: outputSettings)
881
+ var totalFrames: Int64 = 0
882
+ for range in keepRanges {
883
+ // Break down complex expressions
884
+ let startTimeInSeconds = range[0] / 1000
885
+ let startFrame = AVAudioFramePosition(startTimeInSeconds * inputSampleRate)
886
+
887
+ let endTimeInSeconds = range[1] / 1000
888
+ let endFramePosition = endTimeInSeconds * inputSampleRate
889
+ let frameCount = AVAudioFrameCount(endFramePosition - Double(startFrame))
890
+
891
+ let buffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount)!
892
+ audioFile.framePosition = startFrame
893
+ try audioFile.read(into: buffer, frameCount: frameCount)
894
+ try outputFile.write(from: buffer)
895
+ totalFrames += Int64(frameCount)
896
+ let progress = Float(cumulativeFrames) / Float(totalFrames) * 100
897
+ progressCallback?(progress, 0, totalFrames * Int64(inputFormat.streamDescription.pointee.mBytesPerFrame))
898
+ }
899
+ return createTrimResult(
900
+ from: outputURL,
901
+ keepRanges: keepRanges,
902
+ formatStr: formatStr,
903
+ sampleRate: Int(targetSampleRate),
904
+ channels: targetChannels,
905
+ bitDepth: 16,
906
+ bitrate: bitrate,
907
+ compression: nil
908
+ )
909
+ }
910
+ }
911
+ } catch {
912
+ reject("TRIM_ERROR", "Failed to trim audio: \(error.localizedDescription)")
913
+ return nil
914
+ }
915
+ }
916
+
917
+ private func computeKeepRanges(mode: String, startTimeMs: Double?, endTimeMs: Double?, ranges: [[String: Double]]?, totalDurationMs: Double) -> [[Double]] {
918
+ switch mode {
919
+ case "single":
920
+ guard let start = startTimeMs, let end = endTimeMs else { return [] }
921
+ return [[start, end]]
922
+ case "keep":
923
+ return ranges?.map { [$0["startTimeMs"] ?? 0, $0["endTimeMs"] ?? totalDurationMs] } ?? []
924
+ case "remove":
925
+ let removeRanges = ranges?.map { [$0["startTimeMs"] ?? 0, $0["endTimeMs"] ?? totalDurationMs] }.sorted { $0[0] < $1[0] } ?? []
926
+ var keepRanges: [[Double]] = []
927
+ var lastEnd = 0.0
928
+ for range in removeRanges {
929
+ if range[0] > lastEnd {
930
+ keepRanges.append([lastEnd, range[0]])
931
+ }
932
+ lastEnd = max(lastEnd, range[1])
933
+ }
934
+ if lastEnd < totalDurationMs {
935
+ keepRanges.append([lastEnd, totalDurationMs])
936
+ }
937
+ return keepRanges
938
+ default:
939
+ return []
940
+ }
941
+ }
942
+
943
+ private func createTrimResult(from url: URL, keepRanges: [[Double]], formatStr: String, sampleRate: Int, channels: Int, bitDepth: Int, bitrate: Int, compression: [String: Any]? = nil) -> TrimResult {
944
+ let durationMs = keepRanges.map { $0[1] - $0[0] }.reduce(0, +)
945
+ let size = (try? FileManager.default.attributesOfItem(atPath: url.path)[.size] as? Int64 ?? 0) ?? 0
946
+ let fileExtension = formatStr == "wav" ? "wav" : "aac"
947
+ return TrimResult(
948
+ uri: url.absoluteString,
949
+ filename: url.lastPathComponent,
950
+ durationMs: durationMs,
951
+ size: size,
952
+ sampleRate: sampleRate,
953
+ channels: channels,
954
+ bitDepth: bitDepth,
955
+ mimeType: "audio/\(fileExtension)",
956
+ requestedFormat: formatStr,
957
+ actualFormat: fileExtension,
958
+ compression: compression
959
+ )
960
+ }
961
+
962
+ private func createSampleBuffer(from buffer: AVAudioPCMBuffer) -> CMSampleBuffer? {
963
+ var formatDesc: CMAudioFormatDescription?
964
+ CMAudioFormatDescriptionCreate(
965
+ allocator: kCFAllocatorDefault,
966
+ asbd: buffer.format.streamDescription,
967
+ layoutSize: 0,
968
+ layout: nil,
969
+ magicCookieSize: 0,
970
+ magicCookie: nil,
971
+ extensions: nil,
972
+ formatDescriptionOut: &formatDesc
973
+ )
974
+ guard let format = formatDesc else { return nil }
975
+
976
+ var sampleBuffer: CMSampleBuffer?
977
+ var timingInfo = CMSampleTimingInfo(
978
+ duration: CMTime(value: 1, timescale: CMTimeScale(buffer.format.sampleRate)),
979
+ presentationTimeStamp: .zero,
980
+ decodeTimeStamp: .invalid
981
+ )
982
+
983
+ CMSampleBufferCreate(
984
+ allocator: kCFAllocatorDefault,
985
+ dataBuffer: nil,
986
+ dataReady: false,
987
+ makeDataReadyCallback: nil,
988
+ refcon: nil,
989
+ formatDescription: format,
990
+ sampleCount: CMItemCount(buffer.frameLength),
991
+ sampleTimingEntryCount: 1,
992
+ sampleTimingArray: &timingInfo,
993
+ sampleSizeEntryCount: 0,
994
+ sampleSizeArray: nil,
995
+ sampleBufferOut: &sampleBuffer
996
+ )
997
+ guard let sampleBuf = sampleBuffer else { return nil }
998
+
999
+ var dataBuffer: CMBlockBuffer?
1000
+ CMBlockBufferCreateWithMemoryBlock(
1001
+ allocator: kCFAllocatorDefault,
1002
+ memoryBlock: UnsafeMutableRawPointer(buffer.floatChannelData![0]),
1003
+ blockLength: Int(buffer.frameLength * buffer.format.streamDescription.pointee.mBytesPerFrame),
1004
+ blockAllocator: kCFAllocatorNull,
1005
+ customBlockSource: nil,
1006
+ offsetToData: 0,
1007
+ dataLength: Int(buffer.frameLength * buffer.format.streamDescription.pointee.mBytesPerFrame),
1008
+ flags: 0,
1009
+ blockBufferOut: &dataBuffer
1010
+ )
1011
+ guard let blockBuf = dataBuffer else { return nil }
1012
+
1013
+ CMSampleBufferSetDataBuffer(sampleBuf, newValue: blockBuf)
1014
+
1015
+ return sampleBuf
1016
+ }
1017
+
1018
+ /// Extracts a preview of the audio data with consistent time range support
1019
+ /// - Parameters:
1020
+ /// - numberOfPoints: The number of points to extract
1021
+ /// - startTimeMs: Optional start time in milliseconds
1022
+ /// - endTimeMs: Optional end time in milliseconds
1023
+ /// - featureOptions: The features to extract
1024
+ /// - Returns: An `AudioAnalysisData` object containing the extracted features
1025
+ public func extractPreview(
1026
+ numberOfPoints: Int,
1027
+ startTimeMs: Double? = nil,
1028
+ endTimeMs: Double? = nil,
1029
+ featureOptions: [String: Bool]
1030
+ ) -> AudioAnalysisData? {
1031
+ guard let audioFile = audioFile else {
1032
+ reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
1033
+ return nil
1034
+ }
1035
+
1036
+ let sampleRate = Float(audioFile.fileFormat.sampleRate)
1037
+ let totalDurationMs = Double(audioFile.length) / Double(sampleRate) * 1000
1038
+
1039
+ // Calculate effective time range
1040
+ let effectiveStartMs = startTimeMs ?? 0.0
1041
+ let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
1042
+ let durationMs = effectiveEndMs - effectiveStartMs // This is the actual duration we want to use
1043
+
1044
+ // Convert time to frames with proper offset
1045
+ let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
1046
+ let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
1047
+ let samplesInRange = Int(endFrame - startFrame)
1048
+
1049
+ guard samplesInRange > 0 else {
1050
+ reject("INVALID_RANGE", "Invalid sample range: contains no samples")
1051
+ return nil
1052
+ }
1053
+
1054
+ // Calculate exact samples per point to get the requested number of points
1055
+ let samplesPerPoint = samplesInRange / numberOfPoints
1056
+ var dataPoints = [DataPoint]()
1057
+ dataPoints.reserveCapacity(numberOfPoints)
1058
+
1059
+ var minAmplitude: Float = .greatestFiniteMagnitude
1060
+ var maxAmplitude: Float = -.greatestFiniteMagnitude
1061
+
1062
+ let bytesPerSample = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16 / 8
1063
+
1064
+ for i in 0..<numberOfPoints {
1065
+ let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
1066
+ let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
1067
+ let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
1068
+
1069
+ // Calculate byte positions
1070
+ let startPosition = Int(pointStartFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
1071
+ let endPosition = Int(pointEndFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
1072
+ let segmentStartTime = Float(pointStartFrame) / sampleRate
1073
+ let segmentEndTime = Float(pointEndFrame) / sampleRate
1074
+
1075
+ do {
1076
+ audioFile.framePosition = pointStartFrame
1077
+ let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
1078
+ try audioFile.read(into: buffer, frameCount: framesToRead)
1079
+
1080
+ guard let floatData = buffer.floatChannelData else { continue }
1081
+
1082
+ var sumSquares: Float = 0
1083
+ var zeroCrossings = 0
1084
+ var prevValue: Float = 0
1085
+ var localMinAmplitude: Float = .greatestFiniteMagnitude
1086
+ var localMaxAmplitude: Float = -.greatestFiniteMagnitude
1087
+
1088
+ // Process samples for this point
1089
+ for frame in 0..<Int(framesToRead) {
1090
+ let value = floatData[0][frame]
1091
+ sumSquares += value * value
1092
+ if frame > 0 && value * prevValue < 0 {
1093
+ zeroCrossings += 1
1094
+ }
1095
+ prevValue = value
1096
+
1097
+ let absValue = abs(value)
1098
+ localMinAmplitude = min(localMinAmplitude, absValue)
1099
+ localMaxAmplitude = max(localMaxAmplitude, absValue)
1100
+ }
1101
+
1102
+ let features = computeFeatures(segmentData: Array(UnsafeBufferPointer(start: floatData[0], count: Int(framesToRead))),
1103
+ sampleRate: sampleRate,
1104
+ sumSquares: sumSquares,
1105
+ zeroCrossings: zeroCrossings,
1106
+ segmentLength: Int(framesToRead),
1107
+ featureOptions: featureOptions)
1108
+
1109
+ let rms = features.rms
1110
+ let silent = rms < SILENCE_THRESHOLD_RMS
1111
+ let dB = Float(20 * log10(Double(rms)))
1112
+
1113
+ let dataPoint = DataPoint(
1114
+ id: Int(uniqueIdCounter),
1115
+ amplitude: localMaxAmplitude,
1116
+ rms: rms,
1117
+ dB: dB,
1118
+ silent: silent,
1119
+ features: features,
1120
+ speech: SpeechFeatures(isActive: !silent),
1121
+ startTime: segmentStartTime,
1122
+ endTime: segmentEndTime,
1123
+ startPosition: startPosition,
1124
+ endPosition: endPosition,
1125
+ samples: Int(framesToRead)
1126
+ )
1127
+ dataPoints.append(dataPoint)
1128
+ uniqueIdCounter += 1
1129
+
1130
+ minAmplitude = min(minAmplitude, localMinAmplitude)
1131
+ maxAmplitude = max(maxAmplitude, localMaxAmplitude)
1132
+ } catch {
1133
+ reject("AUDIO_READ_ERROR", "Error reading audio data: \(error.localizedDescription)")
1134
+ return nil
1135
+ }
1136
+ }
1137
+
1138
+ let startTime = CACurrentMediaTime() // Start timing
1139
+
1140
+ let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
1141
+ let numberOfChannels = Int(audioFile.processingFormat.channelCount)
1142
+
1143
+ NSLog("""
1144
+ [AudioProcessor] Starting preview extraction:
1145
+ - numberOfPoints: \(numberOfPoints)
1146
+ - startTimeMs: \(String(describing: startTimeMs))
1147
+ - endTimeMs: \(String(describing: endTimeMs))
1148
+ - durationMs: \(durationMs)
1149
+ - sampleRate: \(sampleRate)
1150
+ - bitDepth: \(bitDepth)
1151
+ - channels: \(numberOfChannels)
1152
+ - samplesInRange: \(samplesInRange)
1153
+ - samplesPerPoint: \(samplesPerPoint)
1154
+ """)
1155
+
1156
+ let endTime = CACurrentMediaTime()
1157
+ let extractionTimeMs = Float((endTime - startTime) * 1000)
1158
+
1159
+ NSLog("""
1160
+ [AudioProcessor] Preview extraction completed:
1161
+ - dataPoints generated: \(dataPoints.count)
1162
+ - extractionTimeMs: \(String(format: "%.2f", extractionTimeMs))ms
1163
+ - amplitudeRange: (min: \(String(format: "%.6f", minAmplitude)), max: \(String(format: "%.6f", maxAmplitude)))
1164
+ """)
1165
+
1166
+ return AudioAnalysisData(
1167
+ segmentDurationMs: 100, // Default 100ms
1168
+ durationMs: Int(durationMs), // Use actual duration of trimmed section
1169
+ bitDepth: bitDepth,
1170
+ numberOfChannels: numberOfChannels,
1171
+ sampleRate: Int(sampleRate),
1172
+ samples: samplesInRange,
1173
+ dataPoints: dataPoints,
1174
+ amplitudeRange: AudioAnalysisData.AmplitudeRange(
1175
+ min: minAmplitude,
1176
+ max: maxAmplitude
1177
+ ),
1178
+ rmsRange: AudioAnalysisData.AmplitudeRange(
1179
+ min: 0,
1180
+ max: 1
1181
+ ),
1182
+ speechAnalysis: nil,
1183
+ extractionTimeMs: extractionTimeMs
1184
+ )
1185
+ }
1186
+
1187
+ // Add this helper function to the AudioProcessor class
1188
+ private func getDocumentsDirectory() -> URL {
1189
+ return FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
1190
+ }
1191
+ }