@siteed/audio-studio 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. package/CHANGELOG.md +535 -0
  2. package/LICENSE +21 -0
  3. package/README.md +167 -0
  4. package/android/build.gradle +143 -0
  5. package/android/src/androidTest/assets/chorus.wav +0 -0
  6. package/android/src/androidTest/assets/jfk.wav +0 -0
  7. package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
  8. package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
  9. package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +197 -0
  10. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +541 -0
  11. package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +234 -0
  12. package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +332 -0
  13. package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +324 -0
  14. package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +253 -0
  15. package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +218 -0
  16. package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +120 -0
  17. package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +345 -0
  18. package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +340 -0
  19. package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +252 -0
  20. package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +95 -0
  21. package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +43 -0
  22. package/android/src/main/AndroidManifest.xml +30 -0
  23. package/android/src/main/CMakeLists.txt +29 -0
  24. package/android/src/main/java/net/siteed/audiostudio/AudioAnalysisData.kt +188 -0
  25. package/android/src/main/java/net/siteed/audiostudio/AudioDataEncoder.kt +9 -0
  26. package/android/src/main/java/net/siteed/audiostudio/AudioDeviceManager.kt +1741 -0
  27. package/android/src/main/java/net/siteed/audiostudio/AudioFeaturesNative.kt +26 -0
  28. package/android/src/main/java/net/siteed/audiostudio/AudioFileHandler.kt +136 -0
  29. package/android/src/main/java/net/siteed/audiostudio/AudioFormatUtils.kt +354 -0
  30. package/android/src/main/java/net/siteed/audiostudio/AudioNotificationsManager.kt +439 -0
  31. package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +2237 -0
  32. package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +2163 -0
  33. package/android/src/main/java/net/siteed/audiostudio/AudioRecordingService.kt +167 -0
  34. package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +1112 -0
  35. package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +1099 -0
  36. package/android/src/main/java/net/siteed/audiostudio/Constants.kt +37 -0
  37. package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +7 -0
  38. package/android/src/main/java/net/siteed/audiostudio/FFT.kt +100 -0
  39. package/android/src/main/java/net/siteed/audiostudio/Features.kt +98 -0
  40. package/android/src/main/java/net/siteed/audiostudio/LogUtils.kt +93 -0
  41. package/android/src/main/java/net/siteed/audiostudio/MelSpectrogramNative.kt +36 -0
  42. package/android/src/main/java/net/siteed/audiostudio/NotificationConfig.kt +72 -0
  43. package/android/src/main/java/net/siteed/audiostudio/PermissionUtils.kt +68 -0
  44. package/android/src/main/java/net/siteed/audiostudio/RecordingActionReceiver.kt +59 -0
  45. package/android/src/main/java/net/siteed/audiostudio/RecordingConfig.kt +259 -0
  46. package/android/src/main/java/net/siteed/audiostudio/WaveformConfig.kt +19 -0
  47. package/android/src/main/java/net/siteed/audiostudio/WaveformRenderer.kt +159 -0
  48. package/android/src/main/jni/AudioFeaturesJNI.cpp +152 -0
  49. package/android/src/main/jni/MelSpectrogramJNI.cpp +165 -0
  50. package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
  51. package/android/src/main/res/drawable/ic_microphone.xml +13 -0
  52. package/android/src/main/res/drawable/ic_pause.xml +10 -0
  53. package/android/src/main/res/drawable/ic_play.xml +10 -0
  54. package/android/src/main/res/drawable/ic_stop.xml +10 -0
  55. package/android/src/main/res/layout/notification_recording.xml +37 -0
  56. package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +279 -0
  57. package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +249 -0
  58. package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +151 -0
  59. package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +273 -0
  60. package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +140 -0
  61. package/android/src/test/resources/chorus.wav +0 -0
  62. package/android/src/test/resources/generate_test_audio.py +94 -0
  63. package/android/src/test/resources/jfk.wav +0 -0
  64. package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
  65. package/android/src/test/resources/recorder_hello_world.wav +0 -0
  66. package/app.plugin.js +3 -0
  67. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +4 -0
  68. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  69. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +164 -0
  70. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
  71. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +213 -0
  72. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  73. package/build/cjs/AudioAnalysis/extractAudioData.js +21 -0
  74. package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -0
  75. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +90 -0
  76. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  77. package/build/cjs/AudioAnalysis/extractPreview.js +28 -0
  78. package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -0
  79. package/build/cjs/AudioAnalysis/extractWaveform.js +18 -0
  80. package/build/cjs/AudioAnalysis/extractWaveform.js.map +1 -0
  81. package/build/cjs/AudioAnalysis/melSpectrogramWasm.js +149 -0
  82. package/build/cjs/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
  83. package/build/cjs/AudioDeviceManager.js +688 -0
  84. package/build/cjs/AudioDeviceManager.js.map +1 -0
  85. package/build/cjs/AudioRecorder.provider.js +78 -0
  86. package/build/cjs/AudioRecorder.provider.js.map +1 -0
  87. package/build/cjs/AudioStudio.native.js +8 -0
  88. package/build/cjs/AudioStudio.native.js.map +1 -0
  89. package/build/cjs/AudioStudio.types.js +11 -0
  90. package/build/cjs/AudioStudio.types.js.map +1 -0
  91. package/build/cjs/AudioStudio.web.js +708 -0
  92. package/build/cjs/AudioStudio.web.js.map +1 -0
  93. package/build/cjs/AudioStudioModule.js +718 -0
  94. package/build/cjs/AudioStudioModule.js.map +1 -0
  95. package/build/cjs/WebRecorder.web.js +865 -0
  96. package/build/cjs/WebRecorder.web.js.map +1 -0
  97. package/build/cjs/constants/platformLimitations.js +99 -0
  98. package/build/cjs/constants/platformLimitations.js.map +1 -0
  99. package/build/cjs/constants.js +20 -0
  100. package/build/cjs/constants.js.map +1 -0
  101. package/build/cjs/events.js +29 -0
  102. package/build/cjs/events.js.map +1 -0
  103. package/build/cjs/hooks/useAudioDevices.js +179 -0
  104. package/build/cjs/hooks/useAudioDevices.js.map +1 -0
  105. package/build/cjs/index.js +64 -0
  106. package/build/cjs/index.js.map +1 -0
  107. package/build/cjs/trimAudio.js +76 -0
  108. package/build/cjs/trimAudio.js.map +1 -0
  109. package/build/cjs/useAudioRecorder.js +535 -0
  110. package/build/cjs/useAudioRecorder.js.map +1 -0
  111. package/build/cjs/utils/BlobFix.js +502 -0
  112. package/build/cjs/utils/BlobFix.js.map +1 -0
  113. package/build/cjs/utils/audioProcessing.js +136 -0
  114. package/build/cjs/utils/audioProcessing.js.map +1 -0
  115. package/build/cjs/utils/cleanNativeOptions.js +22 -0
  116. package/build/cjs/utils/cleanNativeOptions.js.map +1 -0
  117. package/build/cjs/utils/concatenateBuffers.js +25 -0
  118. package/build/cjs/utils/concatenateBuffers.js.map +1 -0
  119. package/build/cjs/utils/convertPCMToFloat32.js +124 -0
  120. package/build/cjs/utils/convertPCMToFloat32.js.map +1 -0
  121. package/build/cjs/utils/crc32.js +52 -0
  122. package/build/cjs/utils/crc32.js.map +1 -0
  123. package/build/cjs/utils/encodingToBitDepth.js +17 -0
  124. package/build/cjs/utils/encodingToBitDepth.js.map +1 -0
  125. package/build/cjs/utils/getWavFileInfo.js +96 -0
  126. package/build/cjs/utils/getWavFileInfo.js.map +1 -0
  127. package/build/cjs/utils/writeWavHeader.js +88 -0
  128. package/build/cjs/utils/writeWavHeader.js.map +1 -0
  129. package/build/cjs/workers/InlineFeaturesExtractor.web.js +294 -0
  130. package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +1 -0
  131. package/build/cjs/workers/inlineAudioWebWorker.web.js +190 -0
  132. package/build/cjs/workers/inlineAudioWebWorker.web.js.map +1 -0
  133. package/build/cjs/workers/wasmGlueString.web.js +27 -0
  134. package/build/cjs/workers/wasmGlueString.web.js.map +1 -0
  135. package/build/esm/AudioAnalysis/AudioAnalysis.types.js +3 -0
  136. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  137. package/build/esm/AudioAnalysis/audioFeaturesWasm.js +126 -0
  138. package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
  139. package/build/esm/AudioAnalysis/extractAudioAnalysis.js +205 -0
  140. package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  141. package/build/esm/AudioAnalysis/extractAudioData.js +14 -0
  142. package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -0
  143. package/build/esm/AudioAnalysis/extractMelSpectrogram.js +86 -0
  144. package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  145. package/build/esm/AudioAnalysis/extractPreview.js +25 -0
  146. package/build/esm/AudioAnalysis/extractPreview.js.map +1 -0
  147. package/build/esm/AudioAnalysis/extractWaveform.js +11 -0
  148. package/build/esm/AudioAnalysis/extractWaveform.js.map +1 -0
  149. package/build/esm/AudioAnalysis/melSpectrogramWasm.js +111 -0
  150. package/build/esm/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
  151. package/build/esm/AudioDeviceManager.js +681 -0
  152. package/build/esm/AudioDeviceManager.js.map +1 -0
  153. package/build/esm/AudioRecorder.provider.js +40 -0
  154. package/build/esm/AudioRecorder.provider.js.map +1 -0
  155. package/build/esm/AudioStudio.native.js +6 -0
  156. package/build/esm/AudioStudio.native.js.map +1 -0
  157. package/build/esm/AudioStudio.types.js +8 -0
  158. package/build/esm/AudioStudio.types.js.map +1 -0
  159. package/build/esm/AudioStudio.web.js +704 -0
  160. package/build/esm/AudioStudio.web.js.map +1 -0
  161. package/build/esm/AudioStudioModule.js +713 -0
  162. package/build/esm/AudioStudioModule.js.map +1 -0
  163. package/build/esm/WebRecorder.web.js +861 -0
  164. package/build/esm/WebRecorder.web.js.map +1 -0
  165. package/build/esm/constants/platformLimitations.js +90 -0
  166. package/build/esm/constants/platformLimitations.js.map +1 -0
  167. package/build/esm/constants.js +17 -0
  168. package/build/esm/constants.js.map +1 -0
  169. package/build/esm/events.js +21 -0
  170. package/build/esm/events.js.map +1 -0
  171. package/build/esm/hooks/useAudioDevices.js +176 -0
  172. package/build/esm/hooks/useAudioDevices.js.map +1 -0
  173. package/build/esm/index.js +23 -0
  174. package/build/esm/index.js.map +1 -0
  175. package/build/esm/trimAudio.js +69 -0
  176. package/build/esm/trimAudio.js.map +1 -0
  177. package/build/esm/useAudioRecorder.js +529 -0
  178. package/build/esm/useAudioRecorder.js.map +1 -0
  179. package/build/esm/utils/BlobFix.js +498 -0
  180. package/build/esm/utils/BlobFix.js.map +1 -0
  181. package/build/esm/utils/audioProcessing.js +133 -0
  182. package/build/esm/utils/audioProcessing.js.map +1 -0
  183. package/build/esm/utils/cleanNativeOptions.js +19 -0
  184. package/build/esm/utils/cleanNativeOptions.js.map +1 -0
  185. package/build/esm/utils/concatenateBuffers.js +21 -0
  186. package/build/esm/utils/concatenateBuffers.js.map +1 -0
  187. package/build/esm/utils/convertPCMToFloat32.js +120 -0
  188. package/build/esm/utils/convertPCMToFloat32.js.map +1 -0
  189. package/build/esm/utils/crc32.js +50 -0
  190. package/build/esm/utils/crc32.js.map +1 -0
  191. package/build/esm/utils/encodingToBitDepth.js +13 -0
  192. package/build/esm/utils/encodingToBitDepth.js.map +1 -0
  193. package/build/esm/utils/getWavFileInfo.js +92 -0
  194. package/build/esm/utils/getWavFileInfo.js.map +1 -0
  195. package/build/esm/utils/writeWavHeader.js +84 -0
  196. package/build/esm/utils/writeWavHeader.js.map +1 -0
  197. package/build/esm/workers/InlineFeaturesExtractor.web.js +291 -0
  198. package/build/esm/workers/InlineFeaturesExtractor.web.js.map +1 -0
  199. package/build/esm/workers/inlineAudioWebWorker.web.js +187 -0
  200. package/build/esm/workers/inlineAudioWebWorker.web.js.map +1 -0
  201. package/build/esm/workers/wasmGlueString.web.js +24 -0
  202. package/build/esm/workers/wasmGlueString.web.js.map +1 -0
  203. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +198 -0
  204. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
  205. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +24 -0
  206. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -0
  207. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +74 -0
  208. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
  209. package/build/types/AudioAnalysis/extractAudioData.d.ts +3 -0
  210. package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -0
  211. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +20 -0
  212. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
  213. package/build/types/AudioAnalysis/extractPreview.d.ts +11 -0
  214. package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -0
  215. package/build/types/AudioAnalysis/extractWaveform.d.ts +8 -0
  216. package/build/types/AudioAnalysis/extractWaveform.d.ts.map +1 -0
  217. package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts +16 -0
  218. package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts.map +1 -0
  219. package/build/types/AudioDeviceManager.d.ts +187 -0
  220. package/build/types/AudioDeviceManager.d.ts.map +1 -0
  221. package/build/types/AudioRecorder.provider.d.ts +11 -0
  222. package/build/types/AudioRecorder.provider.d.ts.map +1 -0
  223. package/build/types/AudioStudio.native.d.ts +3 -0
  224. package/build/types/AudioStudio.native.d.ts.map +1 -0
  225. package/build/types/AudioStudio.types.d.ts +760 -0
  226. package/build/types/AudioStudio.types.d.ts.map +1 -0
  227. package/build/types/AudioStudio.web.d.ts +96 -0
  228. package/build/types/AudioStudio.web.d.ts.map +1 -0
  229. package/build/types/AudioStudioModule.d.ts +3 -0
  230. package/build/types/AudioStudioModule.d.ts.map +1 -0
  231. package/build/types/WebRecorder.web.d.ts +208 -0
  232. package/build/types/WebRecorder.web.d.ts.map +1 -0
  233. package/build/types/constants/platformLimitations.d.ts +40 -0
  234. package/build/types/constants/platformLimitations.d.ts.map +1 -0
  235. package/build/types/constants.d.ts +14 -0
  236. package/build/types/constants.d.ts.map +1 -0
  237. package/build/types/events.d.ts +29 -0
  238. package/build/types/events.d.ts.map +1 -0
  239. package/build/types/hooks/useAudioDevices.d.ts +15 -0
  240. package/build/types/hooks/useAudioDevices.d.ts.map +1 -0
  241. package/build/types/index.d.ts +21 -0
  242. package/build/types/index.d.ts.map +1 -0
  243. package/build/types/trimAudio.d.ts +25 -0
  244. package/build/types/trimAudio.d.ts.map +1 -0
  245. package/build/types/useAudioRecorder.d.ts +22 -0
  246. package/build/types/useAudioRecorder.d.ts.map +1 -0
  247. package/build/types/utils/BlobFix.d.ts +9 -0
  248. package/build/types/utils/BlobFix.d.ts.map +1 -0
  249. package/build/types/utils/audioProcessing.d.ts +24 -0
  250. package/build/types/utils/audioProcessing.d.ts.map +1 -0
  251. package/build/types/utils/cleanNativeOptions.d.ts +15 -0
  252. package/build/types/utils/cleanNativeOptions.d.ts.map +1 -0
  253. package/build/types/utils/concatenateBuffers.d.ts +8 -0
  254. package/build/types/utils/concatenateBuffers.d.ts.map +1 -0
  255. package/build/types/utils/convertPCMToFloat32.d.ts +13 -0
  256. package/build/types/utils/convertPCMToFloat32.d.ts.map +1 -0
  257. package/build/types/utils/crc32.d.ts +7 -0
  258. package/build/types/utils/crc32.d.ts.map +1 -0
  259. package/build/types/utils/encodingToBitDepth.d.ts +5 -0
  260. package/build/types/utils/encodingToBitDepth.d.ts.map +1 -0
  261. package/build/types/utils/getWavFileInfo.d.ts +26 -0
  262. package/build/types/utils/getWavFileInfo.d.ts.map +1 -0
  263. package/build/types/utils/writeWavHeader.d.ts +34 -0
  264. package/build/types/utils/writeWavHeader.d.ts.map +1 -0
  265. package/build/types/workers/InlineFeaturesExtractor.web.d.ts +2 -0
  266. package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
  267. package/build/types/workers/inlineAudioWebWorker.web.d.ts +2 -0
  268. package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
  269. package/build/types/workers/wasmGlueString.web.d.ts +2 -0
  270. package/build/types/workers/wasmGlueString.web.d.ts.map +1 -0
  271. package/cpp/AudioFeatures.cpp +274 -0
  272. package/cpp/AudioFeatures.h +85 -0
  273. package/cpp/AudioFeaturesBridge.cpp +146 -0
  274. package/cpp/AudioFeaturesBridge.h +47 -0
  275. package/cpp/MelSpectrogram.cpp +227 -0
  276. package/cpp/MelSpectrogram.h +82 -0
  277. package/cpp/MelSpectrogramBridge.cpp +112 -0
  278. package/cpp/MelSpectrogramBridge.h +33 -0
  279. package/cpp/kiss_fft/COPYING +11 -0
  280. package/cpp/kiss_fft/_kiss_fft_guts.h +167 -0
  281. package/cpp/kiss_fft/kiss_fft.c +424 -0
  282. package/cpp/kiss_fft/kiss_fft.h +160 -0
  283. package/cpp/kiss_fft/kiss_fft_log.h +36 -0
  284. package/cpp/kiss_fft/kiss_fftr.c +155 -0
  285. package/cpp/kiss_fft/kiss_fftr.h +54 -0
  286. package/expo-module.config.json +10 -0
  287. package/ios/AudioAnalysisData.swift +74 -0
  288. package/ios/AudioDeviceManager.swift +670 -0
  289. package/ios/AudioFeaturesWrapper.h +21 -0
  290. package/ios/AudioFeaturesWrapper.mm +63 -0
  291. package/ios/AudioNotificationManager.swift +154 -0
  292. package/ios/AudioProcessingHelpers.swift +797 -0
  293. package/ios/AudioProcessor.swift +1191 -0
  294. package/ios/AudioStreamError.swift +7 -0
  295. package/ios/AudioStreamManager.swift +2369 -0
  296. package/ios/AudioStreamManagerDelegate.swift +16 -0
  297. package/ios/AudioStudio.podspec +39 -0
  298. package/ios/AudioStudioModule.swift +1111 -0
  299. package/ios/AudioStudioTests/AudioFileHandlerTests.swift +338 -0
  300. package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +331 -0
  301. package/ios/AudioStudioTests/AudioTestHelpers.swift +130 -0
  302. package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +294 -0
  303. package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +105 -0
  304. package/ios/AudioStudioTests/Info.plist +22 -0
  305. package/ios/AudioStudioTests/README.md +39 -0
  306. package/ios/AudioStudioTests/SimpleAudioTest.swift +98 -0
  307. package/ios/AudioStudioTests/TestAudioGenerator.swift +75 -0
  308. package/ios/DataPoint.swift +54 -0
  309. package/ios/DecodingConfig.swift +59 -0
  310. package/ios/FFT.swift +62 -0
  311. package/ios/Features.swift +95 -0
  312. package/ios/ISSUE_IOS.md +68 -0
  313. package/ios/Logger.swift +39 -0
  314. package/ios/MelSpectrogramWrapper.h +30 -0
  315. package/ios/MelSpectrogramWrapper.mm +97 -0
  316. package/ios/NotificationExtension.swift +15 -0
  317. package/ios/RecordingResult.swift +22 -0
  318. package/ios/RecordingSettings.swift +311 -0
  319. package/ios/WaveformExtractor.swift +105 -0
  320. package/ios/tests/README.md +41 -0
  321. package/ios/tests/integration/buffer_and_fallback_test.swift +178 -0
  322. package/ios/tests/integration/buffer_duration_test.swift +185 -0
  323. package/ios/tests/integration/compressed_only_output_test.swift +271 -0
  324. package/ios/tests/integration/output_control_test.swift +322 -0
  325. package/ios/tests/integration/run_integration_tests.sh +37 -0
  326. package/ios/tests/opus_support_test_macos.swift +154 -0
  327. package/ios/tests/standalone/audio_processing_test.swift +144 -0
  328. package/ios/tests/standalone/audio_recording_test.swift +277 -0
  329. package/ios/tests/standalone/audio_streaming_test.swift +249 -0
  330. package/ios/tests/standalone/standalone_test.swift +144 -0
  331. package/package.json +146 -0
  332. package/plugin/build/index.cjs +194 -0
  333. package/plugin/build/index.d.cts +22 -0
  334. package/plugin/build/index.js +194 -0
  335. package/plugin/src/index.ts +285 -0
  336. package/plugin/tsconfig.json +10 -0
  337. package/plugin/tsconfig.tsbuildinfo +1 -0
  338. package/prebuilt/wasm/mel-spectrogram.js +18 -0
  339. package/src/AudioAnalysis/AudioAnalysis.types.ts +226 -0
  340. package/src/AudioAnalysis/audio-features-wasm.d.ts +37 -0
  341. package/src/AudioAnalysis/audioFeaturesWasm.ts +200 -0
  342. package/src/AudioAnalysis/extractAudioAnalysis.ts +350 -0
  343. package/src/AudioAnalysis/extractAudioData.ts +17 -0
  344. package/src/AudioAnalysis/extractMelSpectrogram.ts +140 -0
  345. package/src/AudioAnalysis/extractPreview.ts +34 -0
  346. package/src/AudioAnalysis/extractWaveform.ts +22 -0
  347. package/src/AudioAnalysis/mel-spectrogram-wasm.d.ts +48 -0
  348. package/src/AudioAnalysis/melSpectrogramWasm.ts +179 -0
  349. package/src/AudioDeviceManager.ts +800 -0
  350. package/src/AudioRecorder.provider.tsx +57 -0
  351. package/src/AudioStudio.native.ts +6 -0
  352. package/src/AudioStudio.types.ts +899 -0
  353. package/src/AudioStudio.web.ts +911 -0
  354. package/src/AudioStudioModule.ts +984 -0
  355. package/src/WebRecorder.web.ts +1114 -0
  356. package/src/constants/platformLimitations.ts +118 -0
  357. package/src/constants.ts +21 -0
  358. package/src/events.ts +63 -0
  359. package/src/hooks/useAudioDevices.ts +213 -0
  360. package/src/index.ts +67 -0
  361. package/src/trimAudio.ts +94 -0
  362. package/src/types/crc-32.d.ts +9 -0
  363. package/src/useAudioRecorder.tsx +784 -0
  364. package/src/utils/BlobFix.ts +561 -0
  365. package/src/utils/audioProcessing.ts +205 -0
  366. package/src/utils/cleanNativeOptions.ts +18 -0
  367. package/src/utils/concatenateBuffers.ts +24 -0
  368. package/src/utils/convertPCMToFloat32.ts +170 -0
  369. package/src/utils/crc32.ts +59 -0
  370. package/src/utils/encodingToBitDepth.ts +18 -0
  371. package/src/utils/getWavFileInfo.ts +132 -0
  372. package/src/utils/writeWavHeader.ts +115 -0
  373. package/src/workers/InlineFeaturesExtractor.web.tsx +291 -0
  374. package/src/workers/inlineAudioWebWorker.web.tsx +186 -0
  375. package/src/workers/wasmGlueString.web.ts +23 -0
@@ -0,0 +1,2237 @@
1
+ package net.siteed.audiostudio
2
+
3
+ import java.nio.ByteBuffer
4
+ import java.nio.ByteOrder
5
+ import kotlin.math.*
6
+ import android.util.Log
7
+ import java.io.File
8
+ import java.util.concurrent.atomic.AtomicLong
9
+ import kotlin.system.measureTimeMillis
10
+ import android.media.MediaExtractor
11
+ import android.media.MediaFormat
12
+ import android.media.MediaCodec
13
+ import java.io.FileInputStream
14
+ import java.io.RandomAccessFile
15
+ import java.util.zip.CRC32
16
+ import net.siteed.audiostudio.LogUtils
17
+
18
+ data class DecodingConfig(
19
+ val targetSampleRate: Int? = null, // Optional target sample rate
20
+ val targetChannels: Int? = null, // Optional target number of channels
21
+ val targetBitDepth: Int = 16, // Default to 16-bit PCM
22
+ val normalizeAudio: Boolean = false // Whether to normalize audio levels
23
+ )
24
+
25
+ data class SpectrogramData(
26
+ val spectrogram: Array<FloatArray>, // 2D array: [time, frequency]
27
+ val timeStamps: FloatArray, // Time (in seconds) for each frame
28
+ val frequencies: FloatArray // Frequencies (in Hz) for each mel bin
29
+ )
30
+
31
+ class AudioProcessor(private val filesDir: File) {
32
+ companion object {
33
+ const val DCT_SQRT_DIVISOR = 2.0
34
+ private const val N_FFT = 1024
35
+ private const val N_CHROMA = 12
36
+ private const val CLASS_NAME = "AudioProcessor" // Add class name constant for logging
37
+
38
+ private val uniqueIdCounter = AtomicLong(0L) // Keep as companion object property to maintain during pause/resume cycles
39
+
40
+ fun resetUniqueIdCounter() {
41
+ uniqueIdCounter.set(0L)
42
+ }
43
+ }
44
+
45
+ data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int, val durationMs: Long = 0)
46
+
47
+ private var cumulativeMinAmplitude = Float.MAX_VALUE
48
+ private var cumulativeMaxAmplitude = Float.NEGATIVE_INFINITY
49
+
50
+ private fun loadAudioFile(filePath: String): AudioData? {
51
+ try {
52
+ val fileUri = filePath.removePrefix("file://")
53
+ LogUtils.d(CLASS_NAME, "Processing WAV file: $fileUri")
54
+
55
+ val file = File(fileUri).takeIf { it.exists() } ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
56
+ ?: run {
57
+ LogUtils.e(CLASS_NAME, "File not found: $fileUri")
58
+ return null
59
+ }
60
+
61
+ val raf = RandomAccessFile(file, "r")
62
+ val fileSize = raf.length()
63
+
64
+ // Read RIFF header
65
+ val riffHeader = ByteArray(4).apply { raf.readFully(this) }
66
+ if (String(riffHeader) != "RIFF") {
67
+ LogUtils.e(CLASS_NAME, "Invalid RIFF header")
68
+ return null
69
+ }
70
+
71
+ // Read WAVE header
72
+ val waveHeader = ByteArray(4).apply { raf.readFully(this) }
73
+ if (String(waveHeader) != "WAVE") {
74
+ LogUtils.e(CLASS_NAME, "Invalid WAVE header")
75
+ return null
76
+ }
77
+
78
+ var fmtChunkFound = false
79
+ var dataChunkFound = false
80
+ var sampleRate = 0
81
+ var channels = 0
82
+ var bitDepth = 0
83
+ var dataOffset = 0L
84
+ var dataSize = 0L
85
+
86
+ // Parse chunks
87
+ while (raf.filePointer < fileSize - 8) {
88
+ val chunkId = ByteArray(4).apply { raf.readFully(this) }.toString(Charsets.UTF_8)
89
+ val chunkSizeBytes = ByteArray(4).apply { raf.readFully(this) }
90
+ val chunkSize = ByteBuffer.wrap(chunkSizeBytes).order(ByteOrder.LITTLE_ENDIAN).int.toLong() and 0xFFFFFFFFL
91
+
92
+ LogUtils.d(CLASS_NAME, "Found chunk: $chunkId ($chunkSize bytes)")
93
+
94
+ when (chunkId) {
95
+ "fmt " -> {
96
+ if (chunkSize < 16) {
97
+ LogUtils.e(CLASS_NAME, "Invalid fmt chunk size")
98
+ return null
99
+ }
100
+
101
+ val formatData = ByteArray(16)
102
+ raf.readFully(formatData)
103
+ val formatBuffer = ByteBuffer.wrap(formatData).order(ByteOrder.LITTLE_ENDIAN)
104
+
105
+ val audioFormat = formatBuffer.short // Skip audio format
106
+ channels = formatBuffer.short.toInt() and 0xFFFF
107
+ sampleRate = formatBuffer.int
108
+ val byteRate = formatBuffer.int
109
+ val blockAlign = formatBuffer.short
110
+ bitDepth = formatBuffer.short.toInt() and 0xFFFF
111
+
112
+ LogUtils.d(CLASS_NAME, "Raw format data: ${formatData.joinToString(", ")}")
113
+ LogUtils.d(CLASS_NAME, "Format chunk: audioFormat=$audioFormat, channels=$channels, sampleRate=$sampleRate, bitDepth=$bitDepth, byteRate=$byteRate, blockAlign=$blockAlign")
114
+
115
+ if (bitDepth !in listOf(8, 16, 32)) {
116
+ LogUtils.e(CLASS_NAME, "Invalid bit depth: $bitDepth")
117
+ return null
118
+ }
119
+
120
+ val remainingFmtBytes = chunkSize - 16
121
+ if (remainingFmtBytes > 0) {
122
+ raf.skipBytes(remainingFmtBytes.toInt())
123
+ }
124
+ fmtChunkFound = true
125
+ }
126
+ "data" -> {
127
+ dataOffset = raf.filePointer
128
+ dataSize = chunkSize
129
+ dataChunkFound = true
130
+ break
131
+ }
132
+ else -> {
133
+ // Skip unknown chunks
134
+ val skipBytes = chunkSize
135
+ if (skipBytes > 0) {
136
+ val actualSkip = minOf(skipBytes, fileSize - raf.filePointer)
137
+ raf.seek(raf.filePointer + actualSkip)
138
+ }
139
+ }
140
+ }
141
+ }
142
+
143
+ if (!fmtChunkFound || !dataChunkFound) {
144
+ LogUtils.e(CLASS_NAME, "Missing essential chunks (fmt=$fmtChunkFound, data=$dataChunkFound)")
145
+ return null
146
+ }
147
+
148
+ // Calculate actual data size if it seems wrong
149
+ if (dataSize <= 0 || dataSize > fileSize - dataOffset) {
150
+ dataSize = fileSize - dataOffset
151
+ LogUtils.d(CLASS_NAME, "Adjusted data size to: $dataSize")
152
+ }
153
+
154
+ LogUtils.d(CLASS_NAME, "Reading PCM data: offset=$dataOffset, size=$dataSize")
155
+
156
+ val wavData = ByteArray(dataSize.toInt())
157
+ raf.seek(dataOffset)
158
+ raf.readFully(wavData)
159
+
160
+ // Calculate duration in ms
161
+ // Each sample is bitsPerSample/8 bytes, and we have 'channels' samples per frame
162
+ val bytesPerFrame = channels * (bitDepth / 8)
163
+ val numFrames = wavData.size / bytesPerFrame
164
+ val durationMs = (numFrames * 1000L) / sampleRate
165
+
166
+ LogUtils.d(CLASS_NAME, "WAV duration calculation: size=${wavData.size}, bytesPerFrame=$bytesPerFrame, numFrames=$numFrames, sampleRate=$sampleRate, duration=${durationMs}ms")
167
+
168
+ return AudioData(
169
+ data = wavData,
170
+ sampleRate = sampleRate,
171
+ channels = channels,
172
+ bitDepth = bitDepth,
173
+ durationMs = durationMs
174
+ )
175
+ } catch (e: Exception) {
176
+ LogUtils.e(CLASS_NAME, "Failed to load WAV file: ${e.message}", e)
177
+ return null
178
+ }
179
+ }
180
+
181
+ /**
182
+ * Processes the audio data and extracts features.
183
+ * @param data The audio data in bytes.
184
+ * @param config The recording configuration.
185
+ * @return AudioAnalysisData containing the extracted features.
186
+ */
187
+ fun processAudioData(data: ByteArray, config: RecordingConfig): AudioAnalysisData {
188
+ if (data.isEmpty()) {
189
+ LogUtils.e(CLASS_NAME, "Received empty audio data")
190
+ return AudioAnalysisData(
191
+ segmentDurationMs = config.segmentDurationMs,
192
+ durationMs = 0,
193
+ bitDepth = 16,
194
+ numberOfChannels = config.channels,
195
+ sampleRate = config.sampleRate,
196
+ samples = 0,
197
+ dataPoints = emptyList(),
198
+ amplitudeRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
199
+ rmsRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
200
+ extractionTimeMs = 0f,
201
+ )
202
+ }
203
+
204
+ val sampleRate = config.sampleRate.toFloat()
205
+ val bitDepth = when (config.encoding) {
206
+ "pcm_8bit" -> 8
207
+ "pcm_16bit" -> 16
208
+ "pcm_32bit" -> 32
209
+ else -> throw IllegalArgumentException("Unsupported encoding: ${config.encoding}")
210
+ }
211
+ val channelData = convertToFloatArray(data, bitDepth)
212
+ val featureOptions = config.features
213
+
214
+ val totalSamples = channelData.size
215
+ // Update samplesPerSegment calculation to use proper formula
216
+ val samplesPerSegment = ((config.segmentDurationMs / 1000.0) * sampleRate).toInt()
217
+ val totalPoints = ceil(totalSamples.toDouble() / samplesPerSegment).toInt()
218
+
219
+ LogUtils.d(CLASS_NAME, "Extracting waveform totalSize=${data.size} with $totalSamples samples --> $totalPoints points")
220
+ LogUtils.d(CLASS_NAME, "segmentDuration: ${config.segmentDurationMs}ms, samplesPerSegment: $samplesPerSegment")
221
+
222
+ // Remove expectedPoints calculation since it used pointsPerSecond
223
+ val samplesPerPoint = ceil(channelData.size / totalPoints.toDouble()).toInt()
224
+ LogUtils.d(CLASS_NAME, "Extracting waveform with samplesPerPoints=$samplesPerPoint")
225
+
226
+ val dataPoints = mutableListOf<DataPoint>()
227
+ var minAmplitude = Float.MAX_VALUE
228
+ var maxAmplitude = Float.NEGATIVE_INFINITY
229
+ var minRms = Float.MAX_VALUE
230
+ var maxRms = Float.NEGATIVE_INFINITY
231
+ // Calculate total duration in milliseconds based on sample rate and total samples
232
+ val durationMs = (totalSamples.toFloat() / sampleRate * 1000).toInt()
233
+
234
+ // Measure the time taken for audio processing
235
+ val extractionTimeMs = measureTimeMillis {
236
+ for (i in 0 until totalPoints) {
237
+ val start = i * samplesPerSegment
238
+ val end = min(start + samplesPerSegment, totalSamples)
239
+ val segmentData = channelData.sliceArray(start until end)
240
+
241
+ var sumSquares = 0f
242
+ var zeroCrossings = 0
243
+ var prevValue = 0f
244
+ var localMinAmplitude = Float.MAX_VALUE
245
+ var localMaxAmplitude = Float.MIN_VALUE
246
+
247
+ for (value in segmentData) {
248
+ sumSquares += value * value
249
+ if (prevValue != 0f && value * prevValue < 0) zeroCrossings += 1
250
+ prevValue = value
251
+
252
+ val absValue = abs(value)
253
+ localMinAmplitude = min(localMinAmplitude, absValue)
254
+ localMaxAmplitude = max(localMaxAmplitude, absValue)
255
+ }
256
+
257
+ val features = computeFeatures(
258
+ segmentData = segmentData,
259
+ sampleRate = sampleRate,
260
+ sumSquares = sumSquares,
261
+ zeroCrossings = zeroCrossings,
262
+ segmentLength = segmentData.size,
263
+ featureOptions = featureOptions,
264
+ minAmplitude = localMinAmplitude,
265
+ maxAmplitude = localMaxAmplitude
266
+ )
267
+ val rms = features.rms
268
+ val silent = rms < 0.01
269
+ val dB = 20 * log10(rms.toDouble()).toFloat()
270
+ minAmplitude = min(minAmplitude, localMinAmplitude)
271
+ maxAmplitude = max(maxAmplitude, localMaxAmplitude)
272
+ minRms = min(minRms, rms)
273
+ maxRms = max(maxRms, rms)
274
+
275
+ val bytesPerSample = bitDepth / 8
276
+ val startPosition = start * bytesPerSample * config.channels
277
+ val endPosition = end * bytesPerSample * config.channels
278
+
279
+ // Update cumulative amplitude range
280
+ cumulativeMinAmplitude = min(cumulativeMinAmplitude, localMinAmplitude)
281
+ cumulativeMaxAmplitude = max(cumulativeMaxAmplitude, localMaxAmplitude)
282
+
283
+ val dataPoint = DataPoint(
284
+ id = uniqueIdCounter.getAndIncrement(),
285
+ amplitude = localMaxAmplitude, // Always use peak amplitude
286
+ rms = rms, // Always include RMS
287
+ dB = dB,
288
+ silent = silent,
289
+ features = features,
290
+ speech = SpeechFeatures(isActive = !silent),
291
+ startTime = startPosition / (sampleRate * bytesPerSample * config.channels),
292
+ endTime = endPosition / (sampleRate * bytesPerSample * config.channels),
293
+ startPosition = startPosition,
294
+ endPosition = endPosition,
295
+ samples = segmentData.size
296
+ )
297
+
298
+ dataPoints.add(dataPoint)
299
+ }
300
+ }
301
+
302
+ return AudioAnalysisData(
303
+ segmentDurationMs = config.segmentDurationMs,
304
+ durationMs = durationMs,
305
+ bitDepth = bitDepth,
306
+ numberOfChannels = config.channels,
307
+ sampleRate = config.sampleRate, // Use config.sampleRate instead of sampleRate
308
+ samples = totalSamples, // Use totalSamples instead of samplesInRange
309
+ dataPoints = dataPoints,
310
+ amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
311
+ rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
312
+ extractionTimeMs = extractionTimeMs.toFloat()
313
+ )
314
+ }
315
+
316
+ fun resetCumulativeAmplitudeRange() {
317
+ cumulativeMinAmplitude = Float.MAX_VALUE
318
+ cumulativeMaxAmplitude = Float.MIN_VALUE
319
+ }
320
+
321
+ /**
322
+ * Converts the audio data to a float array.
323
+ * @param data The audio data in bytes.
324
+ * @param bitDepth The bit depth of the audio data.
325
+ * @return The converted float array.
326
+ */
327
+ private fun convertToFloatArray(data: ByteArray, bitDepth: Int): FloatArray {
328
+ return when (bitDepth) {
329
+ 16 -> {
330
+ val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
331
+ val array = ShortArray(buffer.remaining())
332
+ buffer.get(array)
333
+ array.map { it / 32768.0f }.toFloatArray()
334
+ }
335
+ 8 -> data.map { (it.toInt() - 128) / 128.0f }.toFloatArray()
336
+ 32 -> {
337
+ val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
338
+ val array = IntArray(buffer.remaining())
339
+ buffer.get(array)
340
+ array.map { it / Int.MAX_VALUE.toFloat() }.toFloatArray()
341
+ }
342
+ else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
343
+ }
344
+ }
345
+
346
+ /**
347
+ * Computes the features of the audio data.
348
+ * @param segmentData The segment data.
349
+ * @param sampleRate The sample rate of the audio data.
350
+ * @param minAmplitude The minimum amplitude.
351
+ * @param maxAmplitude The maximum amplitude.
352
+ * @param sumSquares The sum of squares.
353
+ * @param zeroCrossings The zero crossings.
354
+ * @param segmentLength The length of the segment.
355
+ * @param featureOptions The feature options to compute.
356
+ * @return The computed features.
357
+ */
358
+ private fun computeFeatures(
359
+ segmentData: FloatArray,
360
+ sampleRate: Float,
361
+ minAmplitude: Float,
362
+ maxAmplitude: Float,
363
+ sumSquares: Float,
364
+ zeroCrossings: Int,
365
+ segmentLength: Int,
366
+ featureOptions: Map<String, Boolean>
367
+ ): Features {
368
+ val rms = sqrt(sumSquares / segmentLength)
369
+ val energy = if (featureOptions["energy"] == true) sumSquares else 0f
370
+ val zcr = if (featureOptions["zcr"] == true) zeroCrossings / segmentLength.toFloat() else 0f
371
+
372
+ // Determine if we need the C++ audio features (single JNI call for spectral + MFCC + chroma)
373
+ val needSpectral = featureOptions["spectralCentroid"] == true ||
374
+ featureOptions["spectralFlatness"] == true ||
375
+ featureOptions["spectralRolloff"] == true ||
376
+ featureOptions["spectralBandwidth"] == true
377
+ val needMfcc = featureOptions["mfcc"] == true
378
+ val needChroma = featureOptions["chromagram"] == true
379
+
380
+ // Single C++ call for all FFT-based features
381
+ var spectralCentroid = 0f
382
+ var spectralFlatness = 0f
383
+ var spectralRolloff = 0f
384
+ var spectralBandwidth = 0f
385
+ var mfcc: List<Float> = emptyList()
386
+ var chroma: List<Float> = emptyList()
387
+
388
+ if (needSpectral || needMfcc || needChroma) {
389
+ try {
390
+ val cppResult = AudioFeaturesNative.computeFrame(
391
+ segmentData,
392
+ sampleRate.toInt(),
393
+ N_FFT,
394
+ 13, // nMfcc
395
+ 26, // nMelFilters
396
+ needMfcc,
397
+ needChroma
398
+ )
399
+ if (needSpectral) {
400
+ spectralCentroid = (cppResult["spectralCentroid"] as? Float) ?: 0f
401
+ spectralFlatness = (cppResult["spectralFlatness"] as? Float) ?: 0f
402
+ spectralRolloff = (cppResult["spectralRolloff"] as? Float) ?: 0f
403
+ spectralBandwidth = (cppResult["spectralBandwidth"] as? Float) ?: 0f
404
+ }
405
+ if (needMfcc) {
406
+ mfcc = (cppResult["mfcc"] as? FloatArray)?.toList() ?: emptyList()
407
+ }
408
+ if (needChroma) {
409
+ chroma = (cppResult["chromagram"] as? FloatArray)?.toList() ?: emptyList()
410
+ }
411
+ } catch (e: Exception) {
412
+ LogUtils.e(CLASS_NAME, "Failed to compute C++ audio features: ${e.message}", e)
413
+ }
414
+ }
415
+
416
+ val melSpectrogram = try {
417
+ if (featureOptions["melSpectrogram"] == true) computeMelSpectrogram(segmentData, sampleRate) else emptyList()
418
+ } catch (e: Exception) {
419
+ LogUtils.e(CLASS_NAME, "Failed to compute mel spectrogram: ${e.message}", e)
420
+ emptyList()
421
+ }
422
+
423
+ val tempo = try {
424
+ if (featureOptions["tempo"] == true) extractTempo(segmentData, sampleRate) else 0f
425
+ } catch (e: Exception) {
426
+ LogUtils.e(CLASS_NAME, "Failed to extract tempo: ${e.message}", e)
427
+ 0f
428
+ }
429
+
430
+ val hnr = try {
431
+ if (featureOptions["hnr"] == true) extractHNR(segmentData) else 0f
432
+ } catch (e: Exception) {
433
+ LogUtils.e(CLASS_NAME, "Failed to extract HNR: ${e.message}", e)
434
+ 0f
435
+ }
436
+
437
+ val spectralContrast = try {
438
+ if (featureOptions["spectralContrast"] == true) computeSpectralContrast(segmentData, sampleRate) else emptyList()
439
+ } catch (e: Exception) {
440
+ LogUtils.e(CLASS_NAME, "Failed to compute spectral contrast: ${e.message}", e)
441
+ emptyList()
442
+ }
443
+
444
+ val tonnetz = try {
445
+ if (featureOptions["tonnetz"] == true) computeTonnetz(segmentData, sampleRate) else emptyList()
446
+ } catch (e: Exception) {
447
+ LogUtils.e(CLASS_NAME, "Failed to compute tonnetz: ${e.message}", e)
448
+ emptyList()
449
+ }
450
+
451
+ val pitch = if (featureOptions["pitch"] == true) estimatePitch(segmentData, sampleRate) else 0.0f
452
+
453
+ val crc32Value = if (featureOptions["crc32"] == true) {
454
+ val byteBuffer = ByteBuffer.allocate(segmentData.size * 4)
455
+ .order(ByteOrder.LITTLE_ENDIAN)
456
+ segmentData.forEach { value ->
457
+ byteBuffer.putFloat(value)
458
+ }
459
+
460
+ val crc32 = CRC32()
461
+ crc32.update(byteBuffer.array())
462
+ crc32.value
463
+ } else null
464
+
465
+ return Features(
466
+ energy = energy,
467
+ mfcc = mfcc,
468
+ rms = rms,
469
+ minAmplitude = minAmplitude,
470
+ maxAmplitude = maxAmplitude,
471
+ zcr = zcr,
472
+ spectralCentroid = spectralCentroid,
473
+ spectralFlatness = spectralFlatness,
474
+ spectralRolloff = spectralRolloff,
475
+ spectralBandwidth = spectralBandwidth,
476
+ tempo = tempo,
477
+ hnr = hnr,
478
+ melSpectrogram = melSpectrogram,
479
+ chromagram = chroma,
480
+ spectralContrast = spectralContrast,
481
+ tonnetz = tonnetz,
482
+ pitch = pitch,
483
+ crc32 = crc32Value
484
+ )
485
+ }
486
+
487
+ private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
488
+ val hopLength = 512
489
+ val frameLength = 2048
490
+
491
+ // Compute onset strength signal using spectral flux
492
+ val onsetEnvelope = mutableListOf<Float>()
493
+ var previousSpectrum = FloatArray(frameLength / 2)
494
+
495
+ // Process frames with spectral flux
496
+ for (i in 0 until segmentData.size - frameLength step hopLength) {
497
+ val frame = segmentData.slice(i until minOf(i + frameLength, segmentData.size)).toFloatArray()
498
+ val fft = FFT(frameLength)
499
+ val fftData = frame.copyOf(frameLength)
500
+ fft.realForward(fftData)
501
+
502
+ // Compute magnitude spectrum
503
+ val magnitudes = FloatArray(frameLength / 2)
504
+ for (j in magnitudes.indices) {
505
+ val re = fftData[2 * j]
506
+ val im = if (2 * j + 1 < fftData.size) fftData[2 * j + 1] else 0f
507
+ magnitudes[j] = sqrt(re * re + im * im)
508
+ }
509
+
510
+ // Calculate spectral flux (sum of positive differences)
511
+ var flux = 0f
512
+ for (j in magnitudes.indices) {
513
+ flux += maxOf(magnitudes[j] - previousSpectrum[j], 0f)
514
+ }
515
+ onsetEnvelope.add(flux)
516
+ previousSpectrum = magnitudes
517
+ }
518
+
519
+ // Find peaks in onset envelope
520
+ val peaks = mutableListOf<Int>()
521
+ for (i in 1 until onsetEnvelope.size - 1) {
522
+ if (onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1]) {
523
+ peaks.add(i)
524
+ }
525
+ }
526
+
527
+ // Calculate tempo from peak intervals
528
+ return if (peaks.size > 1) {
529
+ val intervals = peaks.zipWithNext { a, b -> b - a }
530
+ val averageInterval = intervals.average().toFloat()
531
+ 60f * sampleRate / (hopLength * averageInterval)
532
+ } else {
533
+ 120f // Default tempo if no clear peaks found
534
+ }
535
+ }
536
+
537
+ private fun extractSpectralFeatures(samples: FloatArray, sampleRate: Float): SpectralFeatures {
538
+ // FFT requires a fixed-size buffer (N_FFT). If our input is larger,
539
+ // we'll analyze just the first N_FFT samples to prevent buffer overflow.
540
+ // This is a common practice in audio analysis where we process chunks
541
+ // of consistent size rather than variable-length segments.
542
+ val windowed = if (samples.size > N_FFT) {
543
+ // If samples are larger than FFT size, take the first N_FFT samples
544
+ applyHannWindow(samples.copyOf(N_FFT))
545
+ } else {
546
+ applyHannWindow(samples)
547
+ }
548
+
549
+ // Create padded array for FFT, ensuring we don't exceed N_FFT size
550
+ // Zero padding is automatic since FloatArray initializes with zeros
551
+ val paddedSamples = FloatArray(N_FFT).also { padded ->
552
+ windowed.copyInto(padded, 0, 0, minOf(windowed.size, N_FFT))
553
+ }
554
+
555
+ // Perform FFT
556
+ val fft = FFT(N_FFT)
557
+ fft.realForward(paddedSamples)
558
+
559
+ // Calculate magnitude spectrum (only need first half due to symmetry)
560
+ // Add 1 to include both DC (0 Hz) and Nyquist frequency components
561
+ val magnitudeSpectrum = FloatArray(N_FFT / 2 + 1)
562
+ for (i in 0 until N_FFT / 2) { // Since we're only going up to N_FFT/2, the check is unnecessary
563
+ val re = paddedSamples[2 * i]
564
+ val im = paddedSamples[2 * i + 1] // This will always be within bounds
565
+ magnitudeSpectrum[i] = sqrt(re * re + im * im)
566
+ }
567
+ // Handle Nyquist frequency component separately
568
+ magnitudeSpectrum[N_FFT / 2] = abs(paddedSamples[1])
569
+
570
+ // Compute power spectrum for spectral flatness
571
+ val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
572
+
573
+ // Compute spectral features
574
+ val centroid = computeSpectralCentroid(magnitudeSpectrum, sampleRate)
575
+ val flatness = computeSpectralFlatness(powerSpectrum)
576
+ val rollOff = computeSpectralRollOff(magnitudeSpectrum, sampleRate)
577
+ val bandwidth = computeSpectralBandwidth(magnitudeSpectrum, sampleRate, centroid)
578
+
579
+ return SpectralFeatures(
580
+ centroid = centroid,
581
+ flatness = flatness,
582
+ rollOff = rollOff,
583
+ bandwidth = bandwidth
584
+ )
585
+ }
586
+
587
+ private fun computeSpectralCentroid(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
588
+ val sum = magnitudeSpectrum.sum()
589
+ if (sum == 0f) return 0f
590
+
591
+ val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
592
+ index * (sampleRate / N_FFT) * value
593
+ }.sum()
594
+
595
+ return weightedSum / sum
596
+ }
597
+
598
+ private fun computeSpectralFlatness(powerSpectrum: FloatArray): Float {
599
+ // Calculate geometric mean using log-space to avoid numerical issues
600
+ var sumLogValues = 0.0f
601
+ for (value in powerSpectrum) {
602
+ sumLogValues += ln(value + 1e-10f) // Add small epsilon to avoid log(0)
603
+ }
604
+ val geometricMean = exp(sumLogValues / powerSpectrum.size)
605
+
606
+ // Calculate arithmetic mean
607
+ val arithmeticMean = powerSpectrum.sum() / powerSpectrum.size
608
+
609
+ return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
610
+ }
611
+
612
+ private fun computeSpectralRollOff(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
613
+ val totalEnergy = magnitudeSpectrum.sum()
614
+ var cumulativeEnergy = 0f
615
+ val rollOffThreshold = totalEnergy * 0.85f
616
+
617
+ for ((index, value) in magnitudeSpectrum.withIndex()) {
618
+ cumulativeEnergy += value
619
+ if (cumulativeEnergy >= rollOffThreshold) {
620
+ return index * (sampleRate / N_FFT)
621
+ }
622
+ }
623
+
624
+ return 0f
625
+ }
626
+
627
+ private fun computeSpectralBandwidth(
628
+ magnitudeSpectrum: FloatArray,
629
+ sampleRate: Float,
630
+ centroid: Float
631
+ ): Float {
632
+ val sum = magnitudeSpectrum.sum()
633
+ if (sum == 0f) return 0f
634
+
635
+ // Match iOS frequency calculation
636
+ val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
637
+ val freq = index * sampleRate / (2 * magnitudeSpectrum.size)
638
+ value * (freq - centroid).pow(2)
639
+ }.sum()
640
+
641
+ return sqrt(weightedSum / sum)
642
+ }
643
+
644
+ private data class SpectralFeatures(
645
+ val centroid: Float = 0f,
646
+ val flatness: Float = 0f,
647
+ val rollOff: Float = 0f,
648
+ val bandwidth: Float = 0f
649
+ )
650
+
651
+ /**
652
+ * Resets the segment data.
653
+ * @param sumSquaresUpdater Function to reset sum of squares.
654
+ * @param zeroCrossingsUpdater Function to reset zero crossings.
655
+ * @param localMinAmplitudeUpdater Function to reset local min amplitude.
656
+ * @param localMaxAmplitudeUpdater Function to reset local max amplitude.
657
+ * @param segmentData The segment data list to reset.
658
+ */
659
+ private fun resetSegmentData(
660
+ sumSquaresUpdater: (Float) -> Unit,
661
+ zeroCrossingsUpdater: (Int) -> Unit,
662
+ localMinAmplitudeUpdater: (Float) -> Unit,
663
+ localMaxAmplitudeUpdater: (Float) -> Unit,
664
+ segmentData: MutableList<Float>
665
+ ) {
666
+ sumSquaresUpdater(0f)
667
+ zeroCrossingsUpdater(0)
668
+ localMinAmplitudeUpdater(Float.MAX_VALUE)
669
+ localMaxAmplitudeUpdater(Float.MIN_VALUE)
670
+ segmentData.clear()
671
+ }
672
+
673
+ /**
674
+ * Computes the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
675
+ */
676
+ private fun computeMFCC(samples: FloatArray, sampleRate: Float): List<Float> {
677
+ val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
678
+ val melFilters = computeMelFilterbank(
679
+ numFilters = 26,
680
+ powerSpectrumSize = powerSpectrum.size,
681
+ sampleRate = sampleRate
682
+ )
683
+
684
+ if (melFilters.any { it.size != powerSpectrum.size }) {
685
+ LogUtils.e(CLASS_NAME, "Mel filter size (${melFilters[0].size}) does not match power spectrum size (${powerSpectrum.size})")
686
+ return emptyList()
687
+ }
688
+
689
+ val melEnergies = FloatArray(26) { i ->
690
+ var energy = 0f
691
+ for (j in powerSpectrum.indices) {
692
+ energy += powerSpectrum[j] * melFilters[i][j]
693
+ }
694
+ ln(maxOf(energy, 1e-10f))
695
+ }
696
+
697
+ val mfcc = FloatArray(13) { i ->
698
+ var sum = 0f
699
+ for (j in melEnergies.indices) {
700
+ sum += melEnergies[j] * cos(PI * i * (2 * j + 1) / (2 * 26)).toFloat()
701
+ }
702
+ sum * sqrt(2f / 26)
703
+ }
704
+
705
+ return mfcc.toList()
706
+ }
707
+
708
+ /**
709
+ * Computes the Mel filter bank.
710
+ * @param numFilters The number of Mel filters.
711
+ * @param powerSpectrumSize The size of the power spectrum.
712
+ * @param sampleRate The sample rate of the audio data.
713
+ * @return A list of Mel filters.
714
+ */
715
+ private fun computeMelFilterbank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): Array<FloatArray> {
716
+ val fMin = 0f
717
+ val fMax = sampleRate / 2
718
+
719
+ // Convert Hz to Mel
720
+ val melMin = hzToMel(fMin)
721
+ val melMax = hzToMel(fMax)
722
+
723
+ // Create equally spaced points in Mel scale
724
+ val melPoints = FloatArray(numFilters + 2)
725
+ val melStep = (melMax - melMin) / (numFilters + 1)
726
+ for (i in melPoints.indices) {
727
+ melPoints[i] = melMin + i * melStep
728
+ }
729
+
730
+ // Convert back to Hz
731
+ val hzPoints = melPoints.map { melToHz(it) }
732
+
733
+ // Convert to FFT bin numbers, clamping to valid range
734
+ val bins = hzPoints.map { minOf((it * powerSpectrumSize / sampleRate).roundToInt(), powerSpectrumSize - 1) }.toList()
735
+
736
+ // Create the filterbank matrix with size matching powerSpectrumSize
737
+ val filterbank = Array(numFilters) { FloatArray(powerSpectrumSize) { 0f } }
738
+
739
+ // Ensure safe access to bins by limiting the loop and checking boundaries
740
+ for (i in 0 until numFilters) {
741
+ if (i + 2 < bins.size) { // Check to prevent out-of-bounds access
742
+ val startBin = bins[i]
743
+ val centerBin = bins[i + 1]
744
+ val endBin = bins[i + 2]
745
+
746
+ // Left slope (ascending triangle)
747
+ if (centerBin > startBin) {
748
+ for (j in startBin until centerBin) {
749
+ filterbank[i][j] = (j - startBin).toFloat() / (centerBin - startBin).toFloat()
750
+ }
751
+ }
752
+ // Right slope (descending triangle)
753
+ if (endBin > centerBin) {
754
+ for (j in centerBin until endBin) {
755
+ filterbank[i][j] = (endBin - j).toFloat() / (endBin - centerBin).toFloat()
756
+ }
757
+ }
758
+ }
759
+ }
760
+
761
+ return filterbank
762
+ }
763
+
764
+ /**
765
+ * Computes the Discrete Cosine Transform (DCT) of the log energies.
766
+ * @param logEnergies The log energies.
767
+ * @param numCoefficients The number of coefficients to compute.
768
+ * @return A list of MFCC coefficients.
769
+ */
770
+ private fun computeDCT(logEnergies: List<Float>, numCoefficients: Int): List<Float> {
771
+ val n = logEnergies.size
772
+ val dct = FloatArray(numCoefficients)
773
+
774
+ for (i in 0 until numCoefficients) {
775
+ var sum = 0.0
776
+ for (j in logEnergies.indices) {
777
+ sum += logEnergies[j] * cos(PI * i * (j + 0.5) / n)
778
+ }
779
+ dct[i] = (sum / sqrt(DCT_SQRT_DIVISOR * n)).toFloat()
780
+ }
781
+
782
+ return dct.toList()
783
+ }
784
+
785
+ /**
786
+ * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
787
+ * @param segmentData The segment data.
788
+ * @return The HNR.
789
+ */
790
+ private fun extractHNR(segmentData: FloatArray): Float {
791
+ val frameSize = segmentData.size
792
+ val autocorrelation = FloatArray(frameSize)
793
+
794
+ // Compute the autocorrelation of the segment data
795
+ for (i in segmentData.indices) {
796
+ var sum = 0f
797
+ for (j in 0 until frameSize - i) {
798
+ sum += segmentData[j] * segmentData[j + i]
799
+ }
800
+ autocorrelation[i] = sum
801
+ }
802
+
803
+ // Find peaks with minimum prominence
804
+ val maxAutocorrelation = autocorrelation.maxOrNull() ?: 0f
805
+ val peaks = findPeaks(autocorrelation, minProminence = 0.1f * maxAutocorrelation)
806
+
807
+ if (peaks.isNotEmpty()) {
808
+ val firstPeakIndex = peaks.firstOrNull { it > 0 } ?: 0
809
+ val harmonicEnergy = autocorrelation[firstPeakIndex]
810
+ val noiseEnergy = autocorrelation[0] - harmonicEnergy
811
+ if (noiseEnergy > 0) {
812
+ return 10 * log10(harmonicEnergy / noiseEnergy)
813
+ }
814
+ }
815
+
816
+ return 0f
817
+ }
818
+
819
+ private fun findPeaks(data: FloatArray, minProminence: Float): List<Int> {
820
+ val peaks = mutableListOf<Int>()
821
+ for (i in 1 until data.size - 1) {
822
+ if (data[i] > data[i - 1] && data[i] > data[i + 1]) {
823
+ val prominence = data[i] - maxOf(data[i - 1], data[i + 1])
824
+ if (prominence >= minProminence) {
825
+ peaks.add(i)
826
+ }
827
+ }
828
+ }
829
+ return peaks
830
+ }
831
+
832
+ fun loadAudioFromAnyFormat(fileUri: String, decodingConfig: DecodingConfig? = null): AudioData? {
833
+ val cleanUri = fileUri.removePrefix("file://")
834
+ val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
835
+ ?: run {
836
+ LogUtils.e(CLASS_NAME, "File not found in any location: $cleanUri")
837
+ return null
838
+ }
839
+
840
+ // First try MediaExtractor
841
+ val extractor = MediaExtractor()
842
+ try {
843
+ LogUtils.d(CLASS_NAME, "Attempting MediaExtractor with path: ${file.absolutePath}")
844
+ extractor.setDataSource(file.absolutePath)
845
+
846
+ // Find the first audio track
847
+ val audioTrackIndex = (0 until extractor.trackCount)
848
+ .find { extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true }
849
+
850
+ if (audioTrackIndex != null) {
851
+ val format = extractor.getTrackFormat(audioTrackIndex)
852
+ extractor.selectTrack(audioTrackIndex)
853
+
854
+ // Get original audio properties
855
+ val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
856
+ val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
857
+ val totalDurationUs = try {
858
+ format.getLong(MediaFormat.KEY_DURATION)
859
+ } catch (e: Exception) {
860
+ (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
861
+ }
862
+ LogUtils.d(CLASS_NAME, "Raw duration from format: ${totalDurationUs}us")
863
+
864
+ val totalDurationMs = totalDurationUs / 1000
865
+ LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
866
+
867
+ // Process using MediaExtractor
868
+ val pcmData = decodeAudioToPCM(extractor, format)
869
+ val processedData = if (decodingConfig != null) {
870
+ processAudio(
871
+ pcmData,
872
+ originalSampleRate,
873
+ decodingConfig.targetSampleRate,
874
+ originalChannels,
875
+ decodingConfig.targetChannels,
876
+ decodingConfig.normalizeAudio
877
+ )
878
+ } else {
879
+ pcmData
880
+ }
881
+
882
+ return AudioData(
883
+ data = processedData,
884
+ sampleRate = decodingConfig?.targetSampleRate ?: originalSampleRate,
885
+ bitDepth = decodingConfig?.targetBitDepth ?: 16,
886
+ channels = decodingConfig?.targetChannels ?: originalChannels,
887
+ durationMs = totalDurationMs // Pass through the duration
888
+ )
889
+ }
890
+ } catch (e: Exception) {
891
+ LogUtils.d(CLASS_NAME, "MediaExtractor failed, attempting WAV parser: ${e.message}")
892
+ } finally {
893
+ extractor.release()
894
+ }
895
+
896
+ // If MediaExtractor failed and file is WAV, try WAV parser
897
+ if (file.name.lowercase().endsWith(".wav")) {
898
+ LogUtils.d(CLASS_NAME, "Falling back to WAV parser")
899
+ return loadAudioFile(file.absolutePath)?.let { wavData ->
900
+ if (decodingConfig != null) {
901
+ val processedData = processAudio(
902
+ wavData.data,
903
+ wavData.sampleRate,
904
+ decodingConfig.targetSampleRate,
905
+ wavData.channels,
906
+ decodingConfig.targetChannels,
907
+ decodingConfig.normalizeAudio
908
+ )
909
+ AudioData(
910
+ data = processedData,
911
+ sampleRate = decodingConfig.targetSampleRate ?: wavData.sampleRate,
912
+ bitDepth = decodingConfig.targetBitDepth,
913
+ channels = decodingConfig.targetChannels ?: wavData.channels,
914
+ durationMs = wavData.durationMs // Pass through the duration
915
+ )
916
+ } else {
917
+ wavData
918
+ }
919
+ }
920
+ }
921
+
922
+ LogUtils.e(CLASS_NAME, "Failed to process audio file with both MediaExtractor and WAV parser")
923
+ return null
924
+ }
925
+
926
+ private fun decodeAudioToPCM(extractor: MediaExtractor, format: MediaFormat): ByteArray {
927
+ var decoder: MediaCodec? = null
928
+
929
+ try {
930
+ decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
931
+ decoder.configure(format, null, null, 0)
932
+ decoder.start()
933
+
934
+ val info = MediaCodec.BufferInfo()
935
+ val pcmData = mutableListOf<Byte>()
936
+
937
+ var isEOS = false
938
+ while (!isEOS) {
939
+ val inputBufferId = decoder.dequeueInputBuffer(10000)
940
+ if (inputBufferId >= 0) {
941
+ val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
942
+ val sampleSize = extractor.readSampleData(inputBuffer, 0)
943
+
944
+ if (sampleSize < 0) {
945
+ decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
946
+ isEOS = true
947
+ } else {
948
+ decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
949
+ extractor.advance()
950
+ }
951
+ }
952
+
953
+ val outputBufferId = decoder.dequeueOutputBuffer(info, 10000)
954
+ if (outputBufferId >= 0) {
955
+ val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
956
+ val chunk = ByteArray(info.size)
957
+ outputBuffer.get(chunk)
958
+ pcmData.addAll(chunk.toList())
959
+ decoder.releaseOutputBuffer(outputBufferId, false)
960
+ }
961
+ }
962
+
963
+ return pcmData.toByteArray()
964
+ } finally {
965
+ try {
966
+ decoder?.stop()
967
+ } catch (e: Exception) {
968
+ LogUtils.w(CLASS_NAME, "Error stopping decoder: ${e.message}")
969
+ }
970
+ try {
971
+ decoder?.release()
972
+ } catch (e: Exception) {
973
+ LogUtils.w(CLASS_NAME, "Error releasing decoder: ${e.message}")
974
+ }
975
+ }
976
+ }
977
+
978
+ private fun resampleAudio(
979
+ pcmData: ByteArray,
980
+ originalSampleRate: Int,
981
+ targetSampleRate: Int,
982
+ originalChannels: Int
983
+ ): ByteArray {
984
+ // Convert byte array to short array (16-bit samples)
985
+ val shortArray = ShortArray(pcmData.size / 2)
986
+ ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shortArray)
987
+
988
+ // Convert to mono if needed
989
+ val monoShortArray = if (originalChannels > 1) {
990
+ convertToMono(shortArray, originalChannels)
991
+ } else {
992
+ shortArray
993
+ }
994
+
995
+ // Resample
996
+ val resampleRatio = targetSampleRate.toDouble() / originalSampleRate
997
+ val newLength = (monoShortArray.size * resampleRatio).toInt()
998
+ val resampledArray = ShortArray(newLength)
999
+
1000
+ for (i in resampledArray.indices) {
1001
+ val originalIndex = (i / resampleRatio).toInt()
1002
+ val nextIndex = minOf(originalIndex + 1, monoShortArray.size - 1)
1003
+ val fraction = (i / resampleRatio) - originalIndex
1004
+
1005
+ // Linear interpolation
1006
+ val sample = linearInterpolate(
1007
+ monoShortArray[originalIndex].toDouble(),
1008
+ monoShortArray[nextIndex].toDouble(),
1009
+ fraction
1010
+ ).toInt().toShort()
1011
+
1012
+ resampledArray[i] = sample
1013
+ }
1014
+
1015
+ // Convert back to byte array
1016
+ val resultBuffer = ByteBuffer.allocate(resampledArray.size * 2)
1017
+ resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
1018
+ resultBuffer.asShortBuffer().put(resampledArray)
1019
+ return resultBuffer.array()
1020
+ }
1021
+
1022
+ private fun convertToMono(stereoData: ShortArray, channels: Int): ShortArray {
1023
+ val monoLength = stereoData.size / channels
1024
+ val monoData = ShortArray(monoLength)
1025
+
1026
+ for (i in 0 until monoLength) {
1027
+ var sum = 0
1028
+ for (ch in 0 until channels) {
1029
+ sum += stereoData[i * channels + ch]
1030
+ }
1031
+ monoData[i] = (sum / channels).toShort()
1032
+ }
1033
+
1034
+ return monoData
1035
+ }
1036
+
1037
+ private fun linearInterpolate(a: Double, b: Double, fraction: Double): Double {
1038
+ return a + fraction * (b - a)
1039
+ }
1040
+
1041
+ fun processAudio(
1042
+ pcmData: ByteArray,
1043
+ originalSampleRate: Int,
1044
+ targetSampleRate: Int?,
1045
+ originalChannels: Int,
1046
+ targetChannels: Int?,
1047
+ normalize: Boolean
1048
+ ): ByteArray {
1049
+ var processedData = pcmData
1050
+
1051
+ // Only resample if target sample rate is explicitly specified and different
1052
+ if (targetSampleRate != null && originalSampleRate != targetSampleRate) {
1053
+ processedData = resampleAudio(processedData, originalSampleRate, targetSampleRate, originalChannels)
1054
+ }
1055
+
1056
+ // Only convert channels if target channels is explicitly specified and different
1057
+ if (targetChannels != null && originalChannels != targetChannels) {
1058
+ processedData = convertChannels(processedData, originalChannels, targetChannels)
1059
+ }
1060
+
1061
+ // Only normalize if explicitly requested
1062
+ if (normalize) {
1063
+ processedData = normalizeAudio(processedData)
1064
+ }
1065
+
1066
+ return processedData
1067
+ }
1068
+
1069
+ private fun normalizeAudio(pcmData: ByteArray): ByteArray {
1070
+ val shorts = ShortArray(pcmData.size / 2)
1071
+ ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1072
+
1073
+ // Find maximum amplitude
1074
+ var maxAmplitude = 0
1075
+ for (sample in shorts) {
1076
+ maxAmplitude = maxOf(maxAmplitude, abs(sample.toInt()))
1077
+ }
1078
+
1079
+ // Normalize if we found a non-zero maximum
1080
+ if (maxAmplitude > 0) {
1081
+ val normalizationFactor = Short.MAX_VALUE.toFloat() / maxAmplitude
1082
+ for (i in shorts.indices) {
1083
+ shorts[i] = (shorts[i] * normalizationFactor).toInt().toShort()
1084
+ }
1085
+ }
1086
+
1087
+ // Convert back to bytes
1088
+ val resultBuffer = ByteBuffer.allocate(shorts.size * 2)
1089
+ resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
1090
+ resultBuffer.asShortBuffer().put(shorts)
1091
+ return resultBuffer.array()
1092
+ }
1093
+
1094
+ private fun convertChannels(pcmData: ByteArray, originalChannels: Int, targetChannels: Int): ByteArray {
1095
+ // Use the correct implementation from AudioFormatUtils
1096
+ // Assuming 16-bit audio (which is the default for most audio processing)
1097
+ return AudioFormatUtils.convertChannels(pcmData, originalChannels, targetChannels, 16)
1098
+ }
1099
+
1100
+ private fun debugWavHeader(file: File) {
1101
+ try {
1102
+ val bytes = ByteArray(44) // Standard WAV header size
1103
+ RandomAccessFile(file, "r").use { raf ->
1104
+ raf.readFully(bytes)
1105
+ }
1106
+
1107
+ LogUtils.d(CLASS_NAME, "WAV Header Bytes: ${bytes.joinToString(", ") { String.format("%02X", it) }}")
1108
+ LogUtils.d(CLASS_NAME, "ASCII: ${bytes.map { it.toInt().toChar() }.joinToString("")}")
1109
+
1110
+ val buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)
1111
+ LogUtils.d(CLASS_NAME, """
1112
+ RIFF header: ${String(bytes, 0, 4)}
1113
+ File size: ${buffer.getInt(4)}
1114
+ WAVE header: ${String(bytes, 8, 4)}
1115
+ fmt header: ${String(bytes, 12, 4)}
1116
+ Chunk size: ${buffer.getInt(16)}
1117
+ Audio format: ${buffer.getShort(20)}
1118
+ Channels: ${buffer.getShort(22)}
1119
+ Sample rate: ${buffer.getInt(24)}
1120
+ Byte rate: ${buffer.getInt(28)}
1121
+ Block align: ${buffer.getShort(32)}
1122
+ Bits per sample: ${buffer.getShort(34)}
1123
+ """.trimIndent())
1124
+ } catch (e: Exception) {
1125
+ LogUtils.e(CLASS_NAME, "Failed to debug WAV header: ${e.message}", e)
1126
+ }
1127
+ }
1128
+
1129
+ fun generatePreview(
1130
+ audioData: AudioData,
1131
+ numberOfPoints: Int,
1132
+ startTimeMs: Long? = null,
1133
+ endTimeMs: Long? = null,
1134
+ config: RecordingConfig
1135
+ ): AudioAnalysisData {
1136
+ val totalDurationMs = audioData.durationMs
1137
+
1138
+ LogUtils.d(CLASS_NAME, "Total audio duration: ${totalDurationMs}ms")
1139
+
1140
+ // Validate time range
1141
+ if (startTimeMs != null) {
1142
+ require(startTimeMs >= 0) { "startTime must be non-negative, got: $startTimeMs" }
1143
+ require(startTimeMs <= totalDurationMs) { "startTime ($startTimeMs) is beyond audio duration ($totalDurationMs)" }
1144
+ }
1145
+
1146
+ if (endTimeMs != null) {
1147
+ require(endTimeMs >= 0) { "endTime must be non-negative, got: $endTimeMs" }
1148
+ if (endTimeMs > totalDurationMs) {
1149
+ LogUtils.w(CLASS_NAME, "endTime ($endTimeMs) is beyond audio duration ($totalDurationMs), clamping to duration")
1150
+ }
1151
+ if (startTimeMs != null) {
1152
+ require(startTimeMs < endTimeMs) { "startTime ($startTimeMs) must be less than endTime ($endTimeMs)" }
1153
+ }
1154
+ }
1155
+
1156
+ // Calculate effective range
1157
+ val effectiveStartMs = startTimeMs ?: 0L
1158
+ val effectiveEndMs = (endTimeMs ?: totalDurationMs).coerceAtMost(totalDurationMs)
1159
+ val durationMs = effectiveEndMs - effectiveStartMs
1160
+
1161
+ LogUtils.d(CLASS_NAME, "Preview range: ${effectiveStartMs}ms to ${effectiveEndMs}ms (${durationMs}ms)")
1162
+
1163
+ // Calculate sample range
1164
+ val startSampleIndex = ((effectiveStartMs * audioData.sampleRate) / 1000).toInt()
1165
+ val endSampleIndex = ((effectiveEndMs * audioData.sampleRate) / 1000).toInt().coerceAtMost(audioData.data.size)
1166
+ val samplesInRange = endSampleIndex - startSampleIndex
1167
+
1168
+ if (samplesInRange <= 0) {
1169
+ throw IllegalArgumentException("Invalid sample range: contains no samples")
1170
+ }
1171
+
1172
+ val samplesPerPoint = (samplesInRange / numberOfPoints).coerceAtLeast(1)
1173
+ val pointsPerSecond = numberOfPoints.toDouble() / (durationMs.toDouble() / 1000.0)
1174
+
1175
+ val dataPoints = mutableListOf<DataPoint>()
1176
+ var minAmplitude = Float.MAX_VALUE
1177
+ var maxAmplitude = Float.MIN_VALUE
1178
+ var minRms = Float.MAX_VALUE // Add minRms
1179
+ var maxRms = Float.MIN_VALUE // Add maxRms
1180
+
1181
+ val extractionTimeMs = measureTimeMillis {
1182
+ for (i in 0 until numberOfPoints) {
1183
+ val pointStartSample = startSampleIndex + (i * samplesPerPoint)
1184
+ val pointEndSample = minOf(startSampleIndex + ((i + 1) * samplesPerPoint), endSampleIndex)
1185
+
1186
+ if (pointStartSample >= pointEndSample) break
1187
+
1188
+ try {
1189
+ val segmentBytes = audioData.data.sliceArray(pointStartSample until pointEndSample)
1190
+
1191
+ // Convert PCM bytes to float samples with proper bit depth handling
1192
+ val segmentData = when (audioData.bitDepth) {
1193
+ 16 -> convert16BitPcmToFloat(segmentBytes)
1194
+ 32 -> convert32BitPcmToFloat(segmentBytes)
1195
+ else -> convert8BitPcmToFloat(segmentBytes)
1196
+ }
1197
+
1198
+ // Calculate time points based on actual sample rate
1199
+ val startTimePoint = ((pointStartSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
1200
+ val endTimePoint = ((pointEndSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
1201
+
1202
+ val rms = sqrt(segmentData.map { it * it }.average().toFloat())
1203
+ val amplitude = segmentData.maxOf { abs(it) } // Always use peak amplitude
1204
+
1205
+ minAmplitude = minOf(minAmplitude, amplitude)
1206
+ maxAmplitude = maxOf(maxAmplitude, amplitude)
1207
+ minRms = minOf(minRms, rms)
1208
+ maxRms = maxOf(maxRms, rms)
1209
+
1210
+ dataPoints.add(DataPoint(
1211
+ id = i.toLong(),
1212
+ amplitude = amplitude, // Peak amplitude
1213
+ rms = rms, // RMS value
1214
+ dB = 20 * log10(amplitude.toDouble()).toFloat(),
1215
+ silent = amplitude < 0.01,
1216
+ features = null,
1217
+ speech = null,
1218
+ startTime = startTimePoint,
1219
+ endTime = endTimePoint,
1220
+ startPosition = pointStartSample,
1221
+ endPosition = pointEndSample,
1222
+ samples = segmentData.size
1223
+ ))
1224
+ } catch (e: Exception) {
1225
+ LogUtils.e(CLASS_NAME, "Error processing segment $i: ${e.message}")
1226
+ throw IllegalStateException("Failed to process audio segment: ${e.message}", e)
1227
+ }
1228
+ }
1229
+ }
1230
+
1231
+ if (dataPoints.isEmpty()) {
1232
+ throw IllegalStateException("No data points were generated")
1233
+ }
1234
+
1235
+ return AudioAnalysisData(
1236
+ segmentDurationMs = config.segmentDurationMs,
1237
+ durationMs = durationMs.toInt(),
1238
+ bitDepth = audioData.bitDepth,
1239
+ numberOfChannels = audioData.channels,
1240
+ sampleRate = audioData.sampleRate,
1241
+ samples = samplesInRange,
1242
+ dataPoints = dataPoints,
1243
+ amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
1244
+ rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
1245
+ extractionTimeMs = extractionTimeMs.toFloat()
1246
+ )
1247
+ }
1248
+
1249
+ // Add these conversion helpers
1250
+ private fun convert16BitPcmToFloat(bytes: ByteArray): FloatArray {
1251
+ val shorts = ShortArray(bytes.size / 2)
1252
+ ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1253
+ return shorts.map { it.toFloat() / Short.MAX_VALUE }.toFloatArray()
1254
+ }
1255
+
1256
+ private fun convert32BitPcmToFloat(bytes: ByteArray): FloatArray {
1257
+ val ints = IntArray(bytes.size / 4)
1258
+ ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(ints)
1259
+ return ints.map { it.toFloat() / Int.MAX_VALUE }.toFloatArray()
1260
+ }
1261
+
1262
+ private fun convert8BitPcmToFloat(bytes: ByteArray): FloatArray {
1263
+ return bytes.map { (it.toInt() - 128).toFloat() / 127f }.toFloatArray()
1264
+ }
1265
+
1266
+ fun loadAudioRange(fileUri: String, startTimeMs: Long, endTimeMs: Long, config: DecodingConfig? = null): AudioData? {
1267
+ try {
1268
+ // Use default config if none provided
1269
+ val effectiveConfig = config ?: DecodingConfig(
1270
+ targetSampleRate = null,
1271
+ targetChannels = null,
1272
+ targetBitDepth = 16,
1273
+ normalizeAudio = false
1274
+ )
1275
+
1276
+ // First check if it's a WAV file by extension
1277
+ val isWavByExtension = fileUri.lowercase().endsWith(".wav")
1278
+
1279
+ // Then verify WAV header if needed
1280
+ val headerSize = if (isWavByExtension) {
1281
+ getWavHeaderSize(fileUri)
1282
+ } else null
1283
+
1284
+ // If it's a WAV file (by extension and header verification)
1285
+ return if (isWavByExtension && headerSize != null) {
1286
+ LogUtils.d(CLASS_NAME, "Loading WAV range with header size: $headerSize bytes")
1287
+ loadWavRange(fileUri, startTimeMs, endTimeMs, effectiveConfig, headerSize)
1288
+ } else {
1289
+ if (isWavByExtension) {
1290
+ LogUtils.w(CLASS_NAME, "File has .wav extension but invalid header, falling back to compressed loader")
1291
+ }
1292
+ LogUtils.d(CLASS_NAME, "Loading compressed audio range")
1293
+ loadCompressedAudioRange(fileUri, startTimeMs, endTimeMs, effectiveConfig)
1294
+ }
1295
+ } catch (e: Exception) {
1296
+ LogUtils.e(CLASS_NAME, "Failed to load audio range: ${e.message}", e)
1297
+ return null
1298
+ }
1299
+ }
1300
+
1301
+ private fun loadWavRange(
1302
+ fileUri: String,
1303
+ startTimeMs: Long,
1304
+ endTimeMs: Long,
1305
+ config: DecodingConfig,
1306
+ headerSize: Int
1307
+ ): AudioData? {
1308
+ try {
1309
+ val file = File(fileUri.removePrefix("file://")).takeIf { it.exists() }
1310
+ ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
1311
+ ?: throw IllegalArgumentException("File not found: $fileUri")
1312
+
1313
+ // Use existing method to get audio format
1314
+ val format = getAudioFormat(fileUri) ?: throw IllegalArgumentException("Could not determine audio format")
1315
+
1316
+ val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
1317
+ val startByteOffset = ((startTimeMs * bytesPerSecond) / 1000).toInt()
1318
+ val endByteOffset = ((endTimeMs * bytesPerSecond) / 1000).toInt()
1319
+
1320
+ val startByte = headerSize + startByteOffset
1321
+ val endByte = headerSize + endByteOffset
1322
+
1323
+ LogUtils.d(CLASS_NAME, """
1324
+ Loading WAV range:
1325
+ - headerSize: $headerSize
1326
+ - startByte: $startByte
1327
+ - endByte: $endByte
1328
+ - bytesPerSecond: $bytesPerSecond
1329
+ """.trimIndent())
1330
+
1331
+ var audioDataBytes = ByteArray((endByte - startByte).coerceAtLeast(0))
1332
+ FileInputStream(file).use { fis ->
1333
+ fis.skip(startByte.toLong())
1334
+ fis.read(audioDataBytes)
1335
+ }
1336
+
1337
+ // Apply bit depth conversion if needed
1338
+ var effectiveBitDepth = format.bitDepth
1339
+ if (config.targetBitDepth != format.bitDepth) {
1340
+ audioDataBytes = AudioFormatUtils.convertBitDepth(
1341
+ audioDataBytes,
1342
+ format.bitDepth,
1343
+ config.targetBitDepth
1344
+ )
1345
+ effectiveBitDepth = config.targetBitDepth
1346
+ LogUtils.d(CLASS_NAME, "Converted bit depth from ${format.bitDepth} to ${config.targetBitDepth}")
1347
+ }
1348
+
1349
+ return AudioData(
1350
+ data = audioDataBytes,
1351
+ sampleRate = format.sampleRate,
1352
+ channels = format.channels,
1353
+ bitDepth = effectiveBitDepth,
1354
+ durationMs = endTimeMs - startTimeMs
1355
+ )
1356
+ } catch (e: Exception) {
1357
+ LogUtils.e(CLASS_NAME, "Failed to load WAV range: ${e.message}", e)
1358
+ return null
1359
+ }
1360
+ }
1361
+
1362
+ private fun loadCompressedAudioRange(
1363
+ fileUri: String,
1364
+ startTimeMs: Long,
1365
+ endTimeMs: Long,
1366
+ config: DecodingConfig
1367
+ ): AudioData? {
1368
+ val extractor = MediaExtractor()
1369
+ var decoder: MediaCodec? = null
1370
+
1371
+ try {
1372
+ extractor.setDataSource(fileUri.removePrefix("file://"))
1373
+ val format = extractor.getTrackFormat(0)
1374
+ extractor.selectTrack(0)
1375
+
1376
+ val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
1377
+ val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
1378
+ val totalDurationUs = try {
1379
+ format.getLong(MediaFormat.KEY_DURATION)
1380
+ } catch (e: Exception) {
1381
+ (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
1382
+ }
1383
+ LogUtils.d(CLASS_NAME, "Raw duration from format: ${totalDurationUs}us")
1384
+
1385
+ val totalDurationMs = totalDurationUs / 1000
1386
+ LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
1387
+
1388
+ // Calculate valid time range
1389
+ val validStartMs = startTimeMs.coerceIn(0, totalDurationMs) ?: 0
1390
+ val validEndMs = endTimeMs.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
1391
+ val effectiveDurationMs = validEndMs - validStartMs
1392
+
1393
+ // Initialize decoder
1394
+ decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
1395
+ decoder.configure(format, null, null, 0)
1396
+ decoder.start()
1397
+
1398
+ // Seek to start position if needed
1399
+ if (validStartMs > 0) {
1400
+ extractor.seekTo(validStartMs * 1000, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
1401
+ }
1402
+
1403
+ // Calculate buffer sizes
1404
+ val targetSampleRate = config.targetSampleRate ?: originalSampleRate
1405
+ val targetChannels = config.targetChannels ?: originalChannels
1406
+ val targetBitDepth = config.targetBitDepth ?: 16
1407
+ val bytesPerSample = targetBitDepth / 8
1408
+ val samplesPerSecond = targetSampleRate * targetChannels
1409
+ val totalBytes = (effectiveDurationMs * samplesPerSecond * bytesPerSample) / 1000
1410
+
1411
+ LogUtils.d(CLASS_NAME, """
1412
+ Loading audio range:
1413
+ - start: ${validStartMs}ms
1414
+ - end: ${validEndMs}ms
1415
+ - duration: ${effectiveDurationMs}ms
1416
+ - bytes: $totalBytes
1417
+ - format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
1418
+ """.trimIndent())
1419
+
1420
+ val outputBuffer = ByteBuffer.allocate(totalBytes.toInt())
1421
+ val bufferInfo = MediaCodec.BufferInfo()
1422
+ var isEOS = false
1423
+
1424
+ while (!isEOS) {
1425
+ // Handle input
1426
+ val inputBufferId = decoder.dequeueInputBuffer(10000)
1427
+ if (inputBufferId >= 0) {
1428
+ val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
1429
+ val sampleSize = extractor.readSampleData(inputBuffer, 0)
1430
+
1431
+ when {
1432
+ sampleSize < 0 -> {
1433
+ decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1434
+ isEOS = true
1435
+ }
1436
+ extractor.sampleTime > validEndMs * 1000 -> {
1437
+ decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1438
+ isEOS = true
1439
+ }
1440
+ else -> {
1441
+ decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
1442
+ extractor.advance()
1443
+ }
1444
+ }
1445
+ }
1446
+
1447
+ // Handle output
1448
+ val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
1449
+ if (outputBufferId >= 0) {
1450
+ val decodedBuffer = decoder.getOutputBuffer(outputBufferId)!!
1451
+ if (bufferInfo.size > 0) {
1452
+ // Set buffer position and limit based on the decoded data
1453
+ decodedBuffer.position(bufferInfo.offset)
1454
+ decodedBuffer.limit(bufferInfo.offset + bufferInfo.size)
1455
+
1456
+ // Copy decoded data to our output buffer
1457
+ outputBuffer.put(decodedBuffer)
1458
+ }
1459
+ decoder.releaseOutputBuffer(outputBufferId, false)
1460
+
1461
+ // Check if we've reached the end
1462
+ if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
1463
+ isEOS = true
1464
+ }
1465
+ }
1466
+ }
1467
+
1468
+ // Prepare the final byte array
1469
+ outputBuffer.flip()
1470
+ val audioData = ByteArray(outputBuffer.remaining())
1471
+ outputBuffer.get(audioData)
1472
+
1473
+ return AudioData(
1474
+ data = audioData,
1475
+ sampleRate = targetSampleRate,
1476
+ channels = targetChannels,
1477
+ bitDepth = targetBitDepth,
1478
+ durationMs = endTimeMs - startTimeMs // Use the actual time range
1479
+ ).also {
1480
+ LogUtils.d(CLASS_NAME, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
1481
+ }
1482
+ } catch (e: Exception) {
1483
+ LogUtils.e(CLASS_NAME, "Failed to load compressed audio range: ${e.message}", e)
1484
+ return null
1485
+ } finally {
1486
+ decoder?.stop()
1487
+ decoder?.release()
1488
+ extractor.release()
1489
+ }
1490
+ }
1491
+
1492
+ // Future audio editing methods
1493
+ fun trimAudio(
1494
+ fileUri: String,
1495
+ startTimeMs: Long,
1496
+ endTimeMs: Long,
1497
+ config: DecodingConfig? = null,
1498
+ outputFileName: String? = null
1499
+ ): AudioData? {
1500
+ try {
1501
+ // Load the specified range
1502
+ val audioData = loadAudioRange(fileUri, startTimeMs, endTimeMs, config ?: DecodingConfig())
1503
+ ?: return null
1504
+
1505
+ // Generate output filename if not provided
1506
+ val outputFile = if (outputFileName != null) {
1507
+ File(filesDir, outputFileName)
1508
+ } else {
1509
+ val timestamp = System.currentTimeMillis()
1510
+ File(filesDir, "trimmed_${timestamp}.wav")
1511
+ }
1512
+
1513
+ val durationMs = (endTimeMs - startTimeMs).toInt()
1514
+
1515
+ LogUtils.d(CLASS_NAME, """
1516
+ Trimming audio:
1517
+ - start: ${startTimeMs}ms
1518
+ - end: ${endTimeMs}ms
1519
+ - duration: ${durationMs}ms
1520
+ - output: ${outputFile.name}
1521
+ """.trimIndent())
1522
+
1523
+ // Write WAV header
1524
+ RandomAccessFile(outputFile, "rw").use { raf ->
1525
+ // RIFF header
1526
+ raf.write("RIFF".toByteArray())
1527
+ val fileSize = audioData.data.size + 36 // File size minus RIFF header
1528
+ raf.writeInt(fileSize)
1529
+ raf.write("WAVE".toByteArray())
1530
+
1531
+ // fmt chunk
1532
+ raf.write("fmt ".toByteArray())
1533
+ raf.writeInt(16) // Subchunk1Size (16 for PCM)
1534
+ val formatBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1535
+ formatBytes.putShort(1) // AudioFormat (1 for PCM)
1536
+ raf.write(formatBytes.array())
1537
+
1538
+ val channelsBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1539
+ channelsBytes.putShort(audioData.channels.toShort())
1540
+ raf.write(channelsBytes.array())
1541
+
1542
+ val sampleRateBytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
1543
+ sampleRateBytes.putInt(audioData.sampleRate)
1544
+ raf.write(sampleRateBytes.array())
1545
+
1546
+ val byteRate = audioData.sampleRate * audioData.channels * (audioData.bitDepth / 8)
1547
+ raf.writeInt(byteRate) // ByteRate
1548
+
1549
+ val blockAlign = audioData.channels * (audioData.bitDepth / 8)
1550
+ raf.writeShort(blockAlign) // BlockAlign
1551
+ raf.writeShort(audioData.bitDepth) // BitsPerSample
1552
+
1553
+ // data chunk
1554
+ raf.write("data".toByteArray())
1555
+ raf.writeInt(audioData.data.size) // Subchunk2Size
1556
+
1557
+ // Write audio data
1558
+ raf.write(audioData.data)
1559
+ }
1560
+
1561
+ // Debug WAV header to verify
1562
+ debugWavHeader(outputFile)
1563
+
1564
+ // Return the trimmed audio data
1565
+ return AudioData(
1566
+ data = audioData.data,
1567
+ sampleRate = audioData.sampleRate,
1568
+ channels = audioData.channels,
1569
+ bitDepth = audioData.bitDepth
1570
+ )
1571
+ } catch (e: Exception) {
1572
+ LogUtils.e(CLASS_NAME, "Failed to trim audio: ${e.message}", e)
1573
+ return null
1574
+ }
1575
+ }
1576
+
1577
+ fun removeSection(
1578
+ fileUri: String,
1579
+ startTimeMs: Long,
1580
+ endTimeMs: Long,
1581
+ config: DecodingConfig? = null
1582
+ ): AudioData? {
1583
+ // TODO: Implement removing a section by concatenating before and after ranges
1584
+ // This will use loadAudioRange to get two sections and join them
1585
+ return null
1586
+ }
1587
+
1588
+ fun joinAudioSections(
1589
+ sections: List<AudioData>,
1590
+ config: DecodingConfig? = null
1591
+ ): AudioData? {
1592
+ // TODO: Implement joining multiple audio sections
1593
+ // This will be used by removeSection and other future editing features
1594
+ return null
1595
+ }
1596
+
1597
+ // Helper method for future editing features
1598
+ private fun convertAudioFormat(
1599
+ audioData: AudioData,
1600
+ targetSampleRate: Int? = null,
1601
+ targetChannels: Int? = null,
1602
+ targetBitDepth: Int? = null
1603
+ ): AudioData {
1604
+ // TODO: Implement audio format conversion
1605
+ // This will help ensure consistent format when joining sections
1606
+ return audioData
1607
+ }
1608
+
1609
+ // Add new function to process entire file
1610
+ fun processEntireFile(audioData: AudioData): Features {
1611
+ val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
1612
+
1613
+ // Compute basic features for the entire file
1614
+ val sumSquares = samples.sumOf { it * it.toDouble() }.toFloat()
1615
+ val segmentLength = samples.size
1616
+ val zeroCrossings = countZeroCrossings(samples)
1617
+ val minAmplitude = samples.minOrNull() ?: 0f
1618
+ val maxAmplitude = samples.maxOrNull() ?: 0f
1619
+
1620
+ // Use existing computeFeatures with the entire file as one segment
1621
+ return computeFeatures(
1622
+ segmentData = samples,
1623
+ sampleRate = audioData.sampleRate.toFloat(),
1624
+ sumSquares = sumSquares,
1625
+ zeroCrossings = zeroCrossings,
1626
+ segmentLength = segmentLength,
1627
+ minAmplitude = minAmplitude,
1628
+ maxAmplitude = maxAmplitude,
1629
+ featureOptions = mapOf() // Dont compute complex features
1630
+ )
1631
+ }
1632
+
1633
+ private fun countZeroCrossings(data: FloatArray): Int {
1634
+ var crossings = 0
1635
+ for (i in 1 until data.size) {
1636
+ if (data[i - 1] * data[i] < 0) crossings++
1637
+ }
1638
+ return crossings
1639
+ }
1640
+
1641
+ private fun hzToMel(hz: Float): Float {
1642
+ return 2595f * log10(1f + hz / 700f)
1643
+ }
1644
+
1645
+ private fun melToHz(mel: Float): Float {
1646
+ return 700f * (10f.pow(mel / 2595f) - 1f)
1647
+ }
1648
+
1649
+ private fun applyHannWindow(samples: FloatArray): FloatArray {
1650
+ val output = FloatArray(samples.size)
1651
+ for (i in samples.indices) {
1652
+ val multiplier = 0.5f * (1f - cos(2f * PI.toFloat() * i / (samples.size - 1)))
1653
+ output[i] = samples[i] * multiplier
1654
+ }
1655
+ return output
1656
+ }
1657
+
1658
+ // Generate a Hann window of a specific size (new, avoids modifying applyHannWindow)
1659
+ private fun generateHannWindow(size: Int): FloatArray {
1660
+ return FloatArray(size) { i ->
1661
+ 0.5f * (1f - cos(2f * PI.toFloat() * i / (size - 1)))
1662
+ }
1663
+ }
1664
+
1665
+ // Main function to extract mel spectrogram (uses shared C++ implementation via JNI)
1666
+ fun extractMelSpectrogram(
1667
+ audioData: AudioData,
1668
+ windowSizeMs: Float = 25f, // Default 25ms window
1669
+ hopLengthMs: Float = 10f, // Default 10ms hop
1670
+ nMels: Int = 128, // Number of mel bins
1671
+ fftLength: Int = 2048, // FFT size
1672
+ fMin: Float = 0f, // Minimum frequency
1673
+ fMax: Float = audioData.sampleRate.toFloat() / 2, // Nyquist frequency
1674
+ windowType: String = "hann",
1675
+ logScaling: Boolean = true, // Apply log scaling
1676
+ normalize: Boolean = false // Normalize output
1677
+ ): SpectrogramData {
1678
+ val sampleRate = audioData.sampleRate.toFloat()
1679
+ val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
1680
+
1681
+ // Convert ms to samples
1682
+ val windowSizeSamples = (windowSizeMs * sampleRate / 1000).toInt()
1683
+ val hopLengthSamples = (hopLengthMs * sampleRate / 1000).toInt()
1684
+
1685
+ val windowTypeInt = when (windowType.lowercase()) {
1686
+ "hann" -> 0
1687
+ "hamming" -> 1
1688
+ else -> throw IllegalArgumentException("Unsupported windowType: $windowType")
1689
+ }
1690
+
1691
+ // Call shared C++ implementation via JNI
1692
+ val melSpectrogram = MelSpectrogramNative.compute(
1693
+ samples = samples,
1694
+ sampleRate = sampleRate.toInt(),
1695
+ fftLength = fftLength,
1696
+ windowSizeSamples = windowSizeSamples,
1697
+ hopLengthSamples = hopLengthSamples,
1698
+ nMels = nMels,
1699
+ fMin = fMin,
1700
+ fMax = fMax,
1701
+ windowType = windowTypeInt,
1702
+ logScale = logScaling,
1703
+ normalize = normalize
1704
+ )
1705
+
1706
+ // Compute timestamps and frequencies for metadata
1707
+ val numFrames = melSpectrogram.size
1708
+ val timeStamps = FloatArray(numFrames) { it * hopLengthMs / 1000f }
1709
+ val frequencies = melFrequencies(nMels, fMin, fMax)
1710
+
1711
+ return SpectrogramData(melSpectrogram, timeStamps, frequencies)
1712
+ }
1713
+
1714
+ // Compute Short-Time Fourier Transform
1715
+ private fun computeSTFT(
1716
+ samples: FloatArray,
1717
+ fftLength: Int,
1718
+ windowSize: Int,
1719
+ hopLength: Int,
1720
+ window: FloatArray
1721
+ ): Array<FloatArray> {
1722
+ val fft = FFT(fftLength)
1723
+ val numFrames = ((samples.size - windowSize) / hopLength) + 1
1724
+ val stft = Array(numFrames) { FloatArray(fftLength / 2 + 1) }
1725
+
1726
+ for (frameIdx in 0 until numFrames) {
1727
+ val start = frameIdx * hopLength
1728
+ val end = minOf(start + windowSize, samples.size)
1729
+ val frame = FloatArray(fftLength) { 0f }
1730
+
1731
+ // Extract and window the frame
1732
+ for (i in start until end) {
1733
+ frame[i - start] = samples[i] * window[i - start]
1734
+ }
1735
+
1736
+ // Compute FFT and power spectrum
1737
+ val fftResult = fft.processSegment(frame)
1738
+ for (i in 0 until fftLength / 2 + 1) {
1739
+ // Check bounds before accessing array elements
1740
+ val real = if (2 * i < fftResult.size) fftResult[2 * i] else 0f
1741
+ val imag = if (2 * i + 1 < fftResult.size) fftResult[2 * i + 1] else 0f
1742
+ stft[frameIdx][i] = real * real + imag * imag
1743
+ }
1744
+ }
1745
+ return stft
1746
+ }
1747
+
1748
+ // Apply mel filterbank to STFT
1749
+ private fun applyMelFilterbank(
1750
+ stft: Array<FloatArray>,
1751
+ sampleRate: Float,
1752
+ nMels: Int,
1753
+ fftLength: Int,
1754
+ fMin: Float,
1755
+ fMax: Float
1756
+ ): Array<FloatArray> {
1757
+ val numFrames = stft.size
1758
+ val numBins = stft[0].size
1759
+ val melFilters = createMelFilterbank(sampleRate, fftLength, nMels, fMin, fMax)
1760
+ val melSpectrogram = Array(numFrames) { FloatArray(nMels) }
1761
+
1762
+ for (frame in 0 until numFrames) {
1763
+ for (melBin in 0 until nMels) {
1764
+ var sum = 0f
1765
+ for (bin in 0 until numBins) {
1766
+ sum += stft[frame][bin] * melFilters[melBin][bin]
1767
+ }
1768
+ melSpectrogram[frame][melBin] = sum
1769
+ }
1770
+ }
1771
+ return melSpectrogram
1772
+ }
1773
+
1774
+ // Create mel filterbank matrix
1775
+ private fun createMelFilterbank(
1776
+ sampleRate: Float,
1777
+ fftLength: Int,
1778
+ nMels: Int,
1779
+ fMin: Float,
1780
+ fMax: Float
1781
+ ): Array<FloatArray> {
1782
+ val freqs = FloatArray(fftLength / 2 + 1) { it * sampleRate / fftLength }
1783
+ val melPoints = melFrequencies(nMels + 2, fMin, fMax)
1784
+ val melFilters = Array(nMels) { FloatArray(fftLength / 2 + 1) }
1785
+
1786
+ for (melIdx in 0 until nMels) {
1787
+ val fLow = melPoints[melIdx]
1788
+ val fCenter = melPoints[melIdx + 1]
1789
+ val fHigh = melPoints[melIdx + 2]
1790
+
1791
+ for (bin in freqs.indices) {
1792
+ val freq = freqs[bin]
1793
+ melFilters[melIdx][bin] = when {
1794
+ freq < fLow || freq > fHigh -> 0f
1795
+ freq <= fCenter -> (freq - fLow) / (fCenter - fLow)
1796
+ else -> (fHigh - freq) / (fHigh - fCenter)
1797
+ }
1798
+ }
1799
+ }
1800
+ return melFilters
1801
+ }
1802
+
1803
+ // Generate mel-spaced frequencies
1804
+ private fun melFrequencies(nMels: Int, fMin: Float, fMax: Float): FloatArray {
1805
+ val melMin = hzToMel(fMin)
1806
+ val melMax = hzToMel(fMax)
1807
+ val melPoints = FloatArray(nMels) { i ->
1808
+ val mel = melMin + i * (melMax - melMin) / (nMels - 1)
1809
+ melToHz(mel)
1810
+ }
1811
+ return melPoints
1812
+ }
1813
+
1814
+ private fun computeMelSpectrogram(samples: FloatArray, sampleRate: Float): List<Float> {
1815
+ val nMels = 128
1816
+ val fftLength = 2048
1817
+ val windowSize = minOf(samples.size, fftLength)
1818
+ val hopLength = windowSize // single frame
1819
+
1820
+ MelSpectrogramNative.init(
1821
+ sampleRate = sampleRate.toInt(),
1822
+ fftLength = fftLength,
1823
+ windowSizeSamples = windowSize,
1824
+ hopLengthSamples = hopLength,
1825
+ nMels = nMels,
1826
+ fMin = 0f,
1827
+ fMax = sampleRate / 2f,
1828
+ windowType = 0 // Hann
1829
+ )
1830
+
1831
+ val melOutput = FloatArray(nMels)
1832
+ val success = MelSpectrogramNative.computeFrame(samples, melOutput)
1833
+ return if (success) melOutput.toList() else emptyList()
1834
+ }
1835
+
1836
+ private fun computeChroma(samples: FloatArray, sampleRate: Float): List<Float> {
1837
+ val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
1838
+ val chroma = FloatArray(N_CHROMA) { 0f }
1839
+ val freqsPerBin = sampleRate / N_FFT
1840
+
1841
+ for (i in 0 until N_FFT / 2) {
1842
+ val freq = i * freqsPerBin
1843
+ if (freq > 0) {
1844
+ val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
1845
+ if (pitchClass in 0..11) {
1846
+ val magnitude = sqrt(magnitudeSpectrum[2 * i] * magnitudeSpectrum[2 * i] +
1847
+ (if (2 * i + 1 < magnitudeSpectrum.size) magnitudeSpectrum[2 * i + 1] else 0f) *
1848
+ magnitudeSpectrum[2 * i + 1])
1849
+ chroma[pitchClass] += magnitude
1850
+ }
1851
+ }
1852
+ }
1853
+
1854
+ return chroma.toList()
1855
+ }
1856
+
1857
+ private fun computeSpectralContrast(samples: FloatArray, sampleRate: Float): List<Float> {
1858
+ val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
1859
+ // ... rest of spectral contrast computation using magnitudeSpectrum ...
1860
+ // Implementation depends on your specific requirements
1861
+ return emptyList() // Placeholder
1862
+ }
1863
+
1864
+ private fun computeTonnetz(samples: FloatArray, sampleRate: Float): List<Float> {
1865
+ // First compute chroma features
1866
+ val chroma = computeChroma(samples, sampleRate)
1867
+
1868
+ // Tonnetz transformation matrix (6x12)
1869
+ val tonnetzMatrix = arrayOf(
1870
+ floatArrayOf(1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Perfect fifth
1871
+ floatArrayOf(0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Minor third
1872
+ floatArrayOf(0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f), // Major third
1873
+ floatArrayOf(0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f), // Perfect fifth
1874
+ floatArrayOf(0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 0f, 0f, 1f, 0f), // Minor third
1875
+ floatArrayOf(1f, 0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f) // Major third
1876
+ )
1877
+
1878
+ // Compute tonnetz features
1879
+ val tonnetz = mutableListOf<Float>()
1880
+ for (row in tonnetzMatrix) {
1881
+ var sum = 0f
1882
+ for (i in row.indices) {
1883
+ sum += row[i] * (chroma.getOrNull(i) ?: 0f)
1884
+ }
1885
+ tonnetz.add(sum)
1886
+ }
1887
+
1888
+ return tonnetz
1889
+ }
1890
+
1891
+ private fun nextPowerOfTwo(n: Int): Int {
1892
+ var value = 1
1893
+ while (value < n) {
1894
+ value *= 2
1895
+ }
1896
+ return value
1897
+ }
1898
+
1899
+ private fun estimatePitch(segment: FloatArray, sampleRate: Float): Float {
1900
+ if (segment.size < 2) return 0.0f
1901
+
1902
+ // Apply Hann window
1903
+ val windowed = applyHannWindow(segment)
1904
+
1905
+ // Pad for FFT - ensure length is power of 2 and sufficient for autocorrelation
1906
+ val fftLength = nextPowerOfTwo(segment.size * 2)
1907
+ val padded = FloatArray(fftLength) // Initialize with zeros
1908
+ windowed.copyInto(padded) // Copy windowed data into padded array
1909
+
1910
+ // Perform forward FFT
1911
+ val fft = FFT(fftLength)
1912
+ try {
1913
+ fft.realForward(padded)
1914
+ } catch (e: Exception) {
1915
+ LogUtils.e(CLASS_NAME, "FFT forward transform failed: ${e.message}")
1916
+ return 0.0f
1917
+ }
1918
+
1919
+ // Compute power spectrum
1920
+ val powerSpectrum = FloatArray(fftLength)
1921
+ try {
1922
+ // Handle DC and Nyquist components separately
1923
+ powerSpectrum[0] = padded[0] * padded[0]
1924
+ powerSpectrum[fftLength/2] = padded[1] * padded[1]
1925
+
1926
+ // Handle remaining frequencies
1927
+ for (i in 1 until fftLength/2) {
1928
+ val re = padded[2 * i]
1929
+ val im = padded[2 * i + 1]
1930
+ powerSpectrum[i] = re * re + im * im
1931
+ powerSpectrum[fftLength - i] = powerSpectrum[i] // Mirror for inverse FFT
1932
+ }
1933
+ } catch (e: Exception) {
1934
+ LogUtils.e(CLASS_NAME, "Power spectrum computation failed: ${e.message}")
1935
+ return 0.0f
1936
+ }
1937
+
1938
+ // Inverse FFT to get autocorrelation
1939
+ val autocorrelation = FloatArray(fftLength)
1940
+ try {
1941
+ fft.realInverse(powerSpectrum, autocorrelation)
1942
+ } catch (e: Exception) {
1943
+ LogUtils.e(CLASS_NAME, "FFT inverse transform failed: ${e.message}")
1944
+ return 0.0f
1945
+ }
1946
+
1947
+ // Normalize autocorrelation
1948
+ val normFactor = 1.0f / autocorrelation[0] // Normalize by zero-lag autocorrelation
1949
+ for (i in autocorrelation.indices) {
1950
+ autocorrelation[i] *= normFactor
1951
+ }
1952
+
1953
+ // Find the first peak within pitch range (50-500 Hz)
1954
+ val minLag = (sampleRate / 500.0f).toInt().coerceAtLeast(1)
1955
+ val maxLag = (sampleRate / 50.0f).toInt().coerceAtMost(autocorrelation.size - 1)
1956
+
1957
+ var maxCorr = -1.0f
1958
+ var pitchLag = 0
1959
+
1960
+ // Add peak picking criteria
1961
+ val threshold = 0.3f // Correlation threshold
1962
+ var isPeak = false
1963
+
1964
+ for (lag in minLag..maxLag) {
1965
+ if (lag > 0 && lag < autocorrelation.size - 1) {
1966
+ // Check if this point is a peak
1967
+ isPeak = autocorrelation[lag] > autocorrelation[lag - 1] &&
1968
+ autocorrelation[lag] > autocorrelation[lag + 1] &&
1969
+ autocorrelation[lag] > threshold
1970
+
1971
+ if (isPeak && autocorrelation[lag] > maxCorr) {
1972
+ maxCorr = autocorrelation[lag]
1973
+ pitchLag = lag
1974
+ }
1975
+ }
1976
+ }
1977
+
1978
+ return if (pitchLag > 0) sampleRate / pitchLag else 0.0f
1979
+ }
1980
+
1981
+ /**
1982
+ * Prepares FFT by applying Hann window, padding, and computing both power and magnitude spectra.
1983
+ * @param samples Input audio samples
1984
+ * @param sampleRate Sampling rate in Hz
1985
+ * @param fftLength FFT size (must be power of 2)
1986
+ * @return Pair of power spectrum and magnitude spectrum
1987
+ */
1988
+ private fun prepareFFT(samples: FloatArray, sampleRate: Float, fftLength: Int = nextPowerOfTwo(samples.size.coerceAtLeast(2048))): Pair<FloatArray, FloatArray> {
1989
+ val windowed = applyHannWindow(samples)
1990
+ val padded = windowed.copyOf(fftLength)
1991
+ val fft = FFT(fftLength)
1992
+ fft.realForward(padded)
1993
+
1994
+ val magnitudeSpectrum = FloatArray(fftLength / 2 + 1)
1995
+ for (i in 0 until fftLength / 2) {
1996
+ val re = padded[2 * i]
1997
+ val im = padded[2 * i + 1]
1998
+ magnitudeSpectrum[i] = sqrt(re * re + im * im)
1999
+ }
2000
+ magnitudeSpectrum[fftLength / 2] = abs(padded[1])
2001
+
2002
+ val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
2003
+ return Pair(powerSpectrum, magnitudeSpectrum)
2004
+ }
2005
+
2006
+ data class AudioFormat(
2007
+ val sampleRate: Int,
2008
+ val channels: Int,
2009
+ val bitDepth: Int
2010
+ )
2011
+
2012
+ fun getAudioFormat(fileUri: String): AudioFormat? {
2013
+ val cleanUri = fileUri.removePrefix("file://")
2014
+ val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
2015
+ ?: run {
2016
+ LogUtils.e(CLASS_NAME, "File not found: $cleanUri")
2017
+ return null
2018
+ }
2019
+
2020
+ val extractor = MediaExtractor()
2021
+ try {
2022
+ extractor.setDataSource(file.absolutePath)
2023
+ val format = extractor.getTrackFormat(0)
2024
+ return AudioFormat(
2025
+ sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE),
2026
+ channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
2027
+ bitDepth = 16 // Most compressed formats decode to 16-bit PCM
2028
+ )
2029
+ } catch (e: Exception) {
2030
+ LogUtils.e(CLASS_NAME, "Failed to get audio format: ${e.message}", e)
2031
+ return null
2032
+ } finally {
2033
+ extractor.release()
2034
+ }
2035
+ }
2036
+
2037
+ /**
2038
+ * Gets the size of the audio file header.
2039
+ * For WAV files, this includes the RIFF header and all metadata chunks before the data chunk.
2040
+ * For other formats, this will return null as header size handling is format-specific.
2041
+ *
2042
+ * @param fileUri The URI of the audio file to analyze
2043
+ * @return The size of the header in bytes, or null if:
2044
+ * - The file is not a WAV file
2045
+ * - The file cannot be read
2046
+ * - The file format is invalid
2047
+ * - The data chunk cannot be found
2048
+ *
2049
+ * WAV File Structure:
2050
+ * - RIFF header (12 bytes)
2051
+ * - "RIFF" identifier (4 bytes)
2052
+ * - File size (4 bytes)
2053
+ * - "WAVE" identifier (4 bytes)
2054
+ * - Format chunk ("fmt ") (24 bytes typically)
2055
+ * - Optional metadata chunks (variable size)
2056
+ * - LIST (metadata like artist, title)
2057
+ * - JUNK (padding)
2058
+ * - fact (additional format info)
2059
+ * - cue (cue points)
2060
+ * - Data chunk
2061
+ * - "data" identifier (4 bytes)
2062
+ * - Chunk size (4 bytes)
2063
+ * - Actual audio data
2064
+ */
2065
+ fun getWavHeaderSize(fileUri: String): Int? {
2066
+ val cleanUri = fileUri.removePrefix("file://")
2067
+ val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
2068
+ ?: run {
2069
+ LogUtils.e(CLASS_NAME, "File not found: $cleanUri")
2070
+ return null
2071
+ }
2072
+
2073
+ try {
2074
+ val inputStream = FileInputStream(file)
2075
+ val buffer = ByteArray(12) // Read RIFF header and chunk size
2076
+
2077
+ // Read RIFF header
2078
+ if (inputStream.read(buffer) != 12) {
2079
+ LogUtils.e(CLASS_NAME, "Failed to read RIFF header")
2080
+ return null
2081
+ }
2082
+
2083
+ // Verify RIFF header
2084
+ if (String(buffer, 0, 4) != "RIFF" || String(buffer, 8, 4) != "WAVE") {
2085
+ LogUtils.e(CLASS_NAME, "Invalid WAV file format")
2086
+ return null
2087
+ }
2088
+
2089
+ var headerSize = 12
2090
+ var chunkSize: Int
2091
+
2092
+ // Read chunks until we find the data chunk
2093
+ while (true) {
2094
+ if (inputStream.read(buffer, 0, 8) != 8) {
2095
+ LogUtils.e(CLASS_NAME, "Unexpected end of file while reading chunks")
2096
+ break
2097
+ }
2098
+
2099
+ chunkSize = (buffer[7].toInt() and 0xFF shl 24) or
2100
+ (buffer[6].toInt() and 0xFF shl 16) or
2101
+ (buffer[5].toInt() and 0xFF shl 8) or
2102
+ (buffer[4].toInt() and 0xFF)
2103
+
2104
+ val chunkId = String(buffer, 0, 4)
2105
+ LogUtils.d(CLASS_NAME, "Found chunk: $chunkId, size: $chunkSize")
2106
+
2107
+ if (chunkId == "data") {
2108
+ headerSize += 8 // Add chunk header size
2109
+ LogUtils.d(CLASS_NAME, "Found data chunk at offset: $headerSize")
2110
+ break
2111
+ }
2112
+
2113
+ headerSize += 8 + chunkSize // Add chunk header and data size
2114
+ inputStream.skip(chunkSize.toLong()) // Skip chunk data
2115
+ }
2116
+
2117
+ inputStream.close()
2118
+ LogUtils.d(CLASS_NAME, "Total WAV header size: $headerSize bytes")
2119
+ return headerSize
2120
+
2121
+ } catch (e: Exception) {
2122
+ LogUtils.e(CLASS_NAME, "Error calculating WAV header size: ${e.message}", e)
2123
+ return null
2124
+ }
2125
+ }
2126
+
2127
+ /**
2128
+ * Decodes a specific time range of an audio file directly to PCM data
2129
+ * This is more efficient than decoding the entire file when only a portion is needed
2130
+ */
2131
+ fun decodeAudioRangeToPCM(fileUri: String, startTimeMs: Long, endTimeMs: Long): AudioData? {
2132
+ val extractor = MediaExtractor()
2133
+ var decoder: android.media.MediaCodec? = null
2134
+
2135
+ try {
2136
+ extractor.setDataSource(fileUri)
2137
+ val trackIndex = (0 until extractor.trackCount).find {
2138
+ extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
2139
+ } ?: return null
2140
+
2141
+ extractor.selectTrack(trackIndex)
2142
+ val format = extractor.getTrackFormat(trackIndex)
2143
+
2144
+ val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
2145
+ val channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
2146
+ decoder = android.media.MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
2147
+ decoder.configure(format, null, null, 0)
2148
+ decoder.start()
2149
+
2150
+ extractor.seekTo(startTimeMs * 1000, MediaExtractor.SEEK_TO_PREVIOUS_SYNC)
2151
+ val pcmData = mutableListOf<Byte>()
2152
+ val bufferInfo = android.media.MediaCodec.BufferInfo()
2153
+ var isEOS = false
2154
+ var firstBufferTimeUs: Long? = null
2155
+
2156
+ while (!isEOS) {
2157
+ val inputBufferId = decoder.dequeueInputBuffer(10000)
2158
+ if (inputBufferId >= 0) {
2159
+ val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
2160
+ val sampleSize = extractor.readSampleData(inputBuffer, 0)
2161
+ if (sampleSize < 0 || extractor.sampleTime > endTimeMs * 1000) {
2162
+ decoder.queueInputBuffer(inputBufferId, 0, 0, 0, android.media.MediaCodec.BUFFER_FLAG_END_OF_STREAM)
2163
+ isEOS = true
2164
+ } else {
2165
+ decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
2166
+ extractor.advance()
2167
+ }
2168
+ }
2169
+
2170
+ val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
2171
+ if (outputBufferId >= 0) {
2172
+ val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
2173
+ if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
2174
+ val chunk = ByteArray(bufferInfo.size)
2175
+ outputBuffer.get(chunk)
2176
+ pcmData.addAll(chunk.toList())
2177
+ decoder.releaseOutputBuffer(outputBufferId, false)
2178
+ }
2179
+ }
2180
+
2181
+ // If we didn't get any data or first buffer time, return null
2182
+ if (pcmData.isEmpty() || firstBufferTimeUs == null) {
2183
+ return null
2184
+ }
2185
+
2186
+ // Trim PCM data to exact time range
2187
+ val bytesPerSample = 2 // 16-bit PCM
2188
+ val bytesPerFrame = bytesPerSample * channels
2189
+ val samplesPerSecond = sampleRate * channels
2190
+ val dt = 1_000_000.0 / sampleRate // Time per sample in microseconds
2191
+
2192
+ val allSamples = java.nio.ByteBuffer.wrap(pcmData.toByteArray()).order(java.nio.ByteOrder.LITTLE_ENDIAN).asShortBuffer()
2193
+ val totalSamples = allSamples.capacity()
2194
+
2195
+ // Calculate sample indices for the exact time range
2196
+ val startSample = ((startTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(0, totalSamples)
2197
+ val endSample = ((endTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(startSample, totalSamples)
2198
+
2199
+ // Create a new ShortBuffer view starting at the correct position
2200
+ allSamples.position(startSample)
2201
+ val trimmedSamples = ShortArray(endSample - startSample)
2202
+ for (i in trimmedSamples.indices) {
2203
+ trimmedSamples[i] = allSamples.get()
2204
+ }
2205
+
2206
+ // Convert ShortArray to ByteArray
2207
+ val trimmedBytes = ByteArray(trimmedSamples.size * 2)
2208
+ val byteBuffer = java.nio.ByteBuffer.wrap(trimmedBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
2209
+ val shortBuffer = byteBuffer.asShortBuffer()
2210
+ shortBuffer.put(trimmedSamples)
2211
+
2212
+ return AudioData(
2213
+ data = trimmedBytes,
2214
+ sampleRate = sampleRate,
2215
+ channels = channels,
2216
+ bitDepth = 16, // MediaCodec typically decodes to 16-bit PCM
2217
+ durationMs = endTimeMs - startTimeMs
2218
+ )
2219
+ } catch (e: Exception) {
2220
+ LogUtils.e(CLASS_NAME, "Failed to decode audio range: ${e.message}", e)
2221
+ return null
2222
+ } finally {
2223
+ try {
2224
+ decoder?.stop()
2225
+ decoder?.release()
2226
+ } catch (e: Exception) {
2227
+ LogUtils.w(CLASS_NAME, "Error releasing decoder: ${e.message}")
2228
+ }
2229
+
2230
+ try {
2231
+ extractor.release()
2232
+ } catch (e: Exception) {
2233
+ LogUtils.w(CLASS_NAME, "Error releasing extractor: ${e.message}")
2234
+ }
2235
+ }
2236
+ }
2237
+ }