@siteed/expo-audio-studio 2.18.6 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +13 -297
  2. package/index.d.ts +1 -0
  3. package/index.js +1 -0
  4. package/package.json +6 -135
  5. package/CHANGELOG.md +0 -501
  6. package/LICENSE +0 -21
  7. package/android/build.gradle +0 -129
  8. package/android/src/androidTest/assets/chorus.wav +0 -0
  9. package/android/src/androidTest/assets/jfk.wav +0 -0
  10. package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
  11. package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
  12. package/android/src/androidTest/java/net/siteed/audiostream/AudioProcessorInstrumentedTest.kt +0 -197
  13. package/android/src/androidTest/java/net/siteed/audiostream/AudioRecorderInstrumentedTest.kt +0 -541
  14. package/android/src/androidTest/java/net/siteed/audiostream/AudioRecorderPerformanceInstrumentedTest.kt +0 -234
  15. package/android/src/androidTest/java/net/siteed/audiostream/integration/AudioFocusStrategyIntegrationTest.kt +0 -332
  16. package/android/src/androidTest/java/net/siteed/audiostream/integration/BufferDurationIntegrationTest.kt +0 -324
  17. package/android/src/androidTest/java/net/siteed/audiostream/integration/CompressedOnlyOutputTest.kt +0 -253
  18. package/android/src/androidTest/java/net/siteed/audiostream/integration/DeviceDisconnectionFallbackTest.kt +0 -218
  19. package/android/src/androidTest/java/net/siteed/audiostream/integration/EventEmissionIntervalTest.kt +0 -120
  20. package/android/src/androidTest/java/net/siteed/audiostream/integration/M4aFormatTest.kt +0 -345
  21. package/android/src/androidTest/java/net/siteed/audiostream/integration/OutputControlIntegrationTest.kt +0 -340
  22. package/android/src/androidTest/java/net/siteed/audiostream/integration/PcmStreamingDurationTest.kt +0 -252
  23. package/android/src/androidTest/java/net/siteed/audiostream/integration/README.md +0 -95
  24. package/android/src/androidTest/java/net/siteed/audiostream/integration/run_integration_tests.sh +0 -43
  25. package/android/src/main/AndroidManifest.xml +0 -30
  26. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +0 -188
  27. package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +0 -9
  28. package/android/src/main/java/net/siteed/audiostream/AudioDeviceManager.kt +0 -1741
  29. package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +0 -136
  30. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +0 -354
  31. package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +0 -439
  32. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +0 -2237
  33. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2141
  34. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +0 -167
  35. package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +0 -1099
  36. package/android/src/main/java/net/siteed/audiostream/Constants.kt +0 -37
  37. package/android/src/main/java/net/siteed/audiostream/EventSender.kt +0 -7
  38. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +0 -1113
  39. package/android/src/main/java/net/siteed/audiostream/FFT.kt +0 -99
  40. package/android/src/main/java/net/siteed/audiostream/Features.kt +0 -98
  41. package/android/src/main/java/net/siteed/audiostream/LogUtils.kt +0 -93
  42. package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +0 -72
  43. package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +0 -68
  44. package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +0 -59
  45. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +0 -257
  46. package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +0 -19
  47. package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +0 -159
  48. package/android/src/main/res/drawable/ic_default_action_icon.xml +0 -16
  49. package/android/src/main/res/drawable/ic_microphone.xml +0 -13
  50. package/android/src/main/res/drawable/ic_pause.xml +0 -10
  51. package/android/src/main/res/drawable/ic_play.xml +0 -10
  52. package/android/src/main/res/drawable/ic_stop.xml +0 -10
  53. package/android/src/main/res/layout/notification_recording.xml +0 -37
  54. package/android/src/test/java/net/siteed/audiostream/AudioFileHandlerTest.kt +0 -279
  55. package/android/src/test/java/net/siteed/audiostream/AudioFocusStrategyTest.kt +0 -249
  56. package/android/src/test/java/net/siteed/audiostream/AudioFormatTest.kt +0 -151
  57. package/android/src/test/java/net/siteed/audiostream/AudioFormatUtilsTest.kt +0 -273
  58. package/android/src/test/java/net/siteed/audiostream/DeviceDisconnectionFallbackUnitTest.kt +0 -140
  59. package/android/src/test/resources/chorus.wav +0 -0
  60. package/android/src/test/resources/generate_test_audio.py +0 -94
  61. package/android/src/test/resources/jfk.wav +0 -0
  62. package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
  63. package/android/src/test/resources/recorder_hello_world.wav +0 -0
  64. package/app.plugin.js +0 -3
  65. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +0 -4
  66. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  67. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +0 -210
  68. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  69. package/build/cjs/AudioAnalysis/extractAudioData.js +0 -21
  70. package/build/cjs/AudioAnalysis/extractAudioData.js.map +0 -1
  71. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +0 -92
  72. package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
  73. package/build/cjs/AudioAnalysis/extractPreview.js +0 -28
  74. package/build/cjs/AudioAnalysis/extractPreview.js.map +0 -1
  75. package/build/cjs/AudioAnalysis/extractWaveform.js +0 -18
  76. package/build/cjs/AudioAnalysis/extractWaveform.js.map +0 -1
  77. package/build/cjs/AudioDeviceManager.js +0 -689
  78. package/build/cjs/AudioDeviceManager.js.map +0 -1
  79. package/build/cjs/AudioRecorder.provider.js +0 -78
  80. package/build/cjs/AudioRecorder.provider.js.map +0 -1
  81. package/build/cjs/ExpoAudioStream.native.js +0 -8
  82. package/build/cjs/ExpoAudioStream.native.js.map +0 -1
  83. package/build/cjs/ExpoAudioStream.types.js +0 -11
  84. package/build/cjs/ExpoAudioStream.types.js.map +0 -1
  85. package/build/cjs/ExpoAudioStream.web.js +0 -708
  86. package/build/cjs/ExpoAudioStream.web.js.map +0 -1
  87. package/build/cjs/ExpoAudioStreamModule.js +0 -718
  88. package/build/cjs/ExpoAudioStreamModule.js.map +0 -1
  89. package/build/cjs/WebRecorder.web.js +0 -777
  90. package/build/cjs/WebRecorder.web.js.map +0 -1
  91. package/build/cjs/constants/platformLimitations.js +0 -99
  92. package/build/cjs/constants/platformLimitations.js.map +0 -1
  93. package/build/cjs/constants.js +0 -17
  94. package/build/cjs/constants.js.map +0 -1
  95. package/build/cjs/events.js +0 -29
  96. package/build/cjs/events.js.map +0 -1
  97. package/build/cjs/hooks/useAudioDevices.js +0 -179
  98. package/build/cjs/hooks/useAudioDevices.js.map +0 -1
  99. package/build/cjs/index.js +0 -58
  100. package/build/cjs/index.js.map +0 -1
  101. package/build/cjs/trimAudio.js +0 -76
  102. package/build/cjs/trimAudio.js.map +0 -1
  103. package/build/cjs/useAudioRecorder.js +0 -518
  104. package/build/cjs/useAudioRecorder.js.map +0 -1
  105. package/build/cjs/utils/BlobFix.js +0 -502
  106. package/build/cjs/utils/BlobFix.js.map +0 -1
  107. package/build/cjs/utils/audioProcessing.js +0 -136
  108. package/build/cjs/utils/audioProcessing.js.map +0 -1
  109. package/build/cjs/utils/cleanNativeOptions.js +0 -22
  110. package/build/cjs/utils/cleanNativeOptions.js.map +0 -1
  111. package/build/cjs/utils/concatenateBuffers.js +0 -25
  112. package/build/cjs/utils/concatenateBuffers.js.map +0 -1
  113. package/build/cjs/utils/convertPCMToFloat32.js +0 -124
  114. package/build/cjs/utils/convertPCMToFloat32.js.map +0 -1
  115. package/build/cjs/utils/crc32.js +0 -52
  116. package/build/cjs/utils/crc32.js.map +0 -1
  117. package/build/cjs/utils/encodingToBitDepth.js +0 -17
  118. package/build/cjs/utils/encodingToBitDepth.js.map +0 -1
  119. package/build/cjs/utils/getWavFileInfo.js +0 -96
  120. package/build/cjs/utils/getWavFileInfo.js.map +0 -1
  121. package/build/cjs/utils/writeWavHeader.js +0 -88
  122. package/build/cjs/utils/writeWavHeader.js.map +0 -1
  123. package/build/cjs/workers/InlineFeaturesExtractor.web.js +0 -859
  124. package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +0 -1
  125. package/build/cjs/workers/inlineAudioWebWorker.web.js +0 -184
  126. package/build/cjs/workers/inlineAudioWebWorker.web.js.map +0 -1
  127. package/build/esm/AudioAnalysis/AudioAnalysis.types.js +0 -3
  128. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  129. package/build/esm/AudioAnalysis/extractAudioAnalysis.js +0 -202
  130. package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  131. package/build/esm/AudioAnalysis/extractAudioData.js +0 -14
  132. package/build/esm/AudioAnalysis/extractAudioData.js.map +0 -1
  133. package/build/esm/AudioAnalysis/extractMelSpectrogram.js +0 -89
  134. package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +0 -1
  135. package/build/esm/AudioAnalysis/extractPreview.js +0 -25
  136. package/build/esm/AudioAnalysis/extractPreview.js.map +0 -1
  137. package/build/esm/AudioAnalysis/extractWaveform.js +0 -11
  138. package/build/esm/AudioAnalysis/extractWaveform.js.map +0 -1
  139. package/build/esm/AudioDeviceManager.js +0 -682
  140. package/build/esm/AudioDeviceManager.js.map +0 -1
  141. package/build/esm/AudioRecorder.provider.js +0 -40
  142. package/build/esm/AudioRecorder.provider.js.map +0 -1
  143. package/build/esm/ExpoAudioStream.native.js +0 -6
  144. package/build/esm/ExpoAudioStream.native.js.map +0 -1
  145. package/build/esm/ExpoAudioStream.types.js +0 -8
  146. package/build/esm/ExpoAudioStream.types.js.map +0 -1
  147. package/build/esm/ExpoAudioStream.web.js +0 -704
  148. package/build/esm/ExpoAudioStream.web.js.map +0 -1
  149. package/build/esm/ExpoAudioStreamModule.js +0 -713
  150. package/build/esm/ExpoAudioStreamModule.js.map +0 -1
  151. package/build/esm/WebRecorder.web.js +0 -773
  152. package/build/esm/WebRecorder.web.js.map +0 -1
  153. package/build/esm/constants/platformLimitations.js +0 -90
  154. package/build/esm/constants/platformLimitations.js.map +0 -1
  155. package/build/esm/constants.js +0 -14
  156. package/build/esm/constants.js.map +0 -1
  157. package/build/esm/events.js +0 -21
  158. package/build/esm/events.js.map +0 -1
  159. package/build/esm/hooks/useAudioDevices.js +0 -176
  160. package/build/esm/hooks/useAudioDevices.js.map +0 -1
  161. package/build/esm/index.js +0 -20
  162. package/build/esm/index.js.map +0 -1
  163. package/build/esm/trimAudio.js +0 -69
  164. package/build/esm/trimAudio.js.map +0 -1
  165. package/build/esm/useAudioRecorder.js +0 -512
  166. package/build/esm/useAudioRecorder.js.map +0 -1
  167. package/build/esm/utils/BlobFix.js +0 -498
  168. package/build/esm/utils/BlobFix.js.map +0 -1
  169. package/build/esm/utils/audioProcessing.js +0 -133
  170. package/build/esm/utils/audioProcessing.js.map +0 -1
  171. package/build/esm/utils/cleanNativeOptions.js +0 -19
  172. package/build/esm/utils/cleanNativeOptions.js.map +0 -1
  173. package/build/esm/utils/concatenateBuffers.js +0 -21
  174. package/build/esm/utils/concatenateBuffers.js.map +0 -1
  175. package/build/esm/utils/convertPCMToFloat32.js +0 -120
  176. package/build/esm/utils/convertPCMToFloat32.js.map +0 -1
  177. package/build/esm/utils/crc32.js +0 -50
  178. package/build/esm/utils/crc32.js.map +0 -1
  179. package/build/esm/utils/encodingToBitDepth.js +0 -13
  180. package/build/esm/utils/encodingToBitDepth.js.map +0 -1
  181. package/build/esm/utils/getWavFileInfo.js +0 -92
  182. package/build/esm/utils/getWavFileInfo.js.map +0 -1
  183. package/build/esm/utils/writeWavHeader.js +0 -84
  184. package/build/esm/utils/writeWavHeader.js.map +0 -1
  185. package/build/esm/workers/InlineFeaturesExtractor.web.js +0 -856
  186. package/build/esm/workers/InlineFeaturesExtractor.web.js.map +0 -1
  187. package/build/esm/workers/inlineAudioWebWorker.web.js +0 -181
  188. package/build/esm/workers/inlineAudioWebWorker.web.js.map +0 -1
  189. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +0 -196
  190. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  191. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +0 -74
  192. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  193. package/build/types/AudioAnalysis/extractAudioData.d.ts +0 -3
  194. package/build/types/AudioAnalysis/extractAudioData.d.ts.map +0 -1
  195. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +0 -14
  196. package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +0 -1
  197. package/build/types/AudioAnalysis/extractPreview.d.ts +0 -11
  198. package/build/types/AudioAnalysis/extractPreview.d.ts.map +0 -1
  199. package/build/types/AudioAnalysis/extractWaveform.d.ts +0 -8
  200. package/build/types/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  201. package/build/types/AudioDeviceManager.d.ts +0 -187
  202. package/build/types/AudioDeviceManager.d.ts.map +0 -1
  203. package/build/types/AudioRecorder.provider.d.ts +0 -11
  204. package/build/types/AudioRecorder.provider.d.ts.map +0 -1
  205. package/build/types/ExpoAudioStream.native.d.ts +0 -3
  206. package/build/types/ExpoAudioStream.native.d.ts.map +0 -1
  207. package/build/types/ExpoAudioStream.types.d.ts +0 -738
  208. package/build/types/ExpoAudioStream.types.d.ts.map +0 -1
  209. package/build/types/ExpoAudioStream.web.d.ts +0 -96
  210. package/build/types/ExpoAudioStream.web.d.ts.map +0 -1
  211. package/build/types/ExpoAudioStreamModule.d.ts +0 -3
  212. package/build/types/ExpoAudioStreamModule.d.ts.map +0 -1
  213. package/build/types/WebRecorder.web.d.ts +0 -198
  214. package/build/types/WebRecorder.web.d.ts.map +0 -1
  215. package/build/types/constants/platformLimitations.d.ts +0 -40
  216. package/build/types/constants/platformLimitations.d.ts.map +0 -1
  217. package/build/types/constants.d.ts +0 -11
  218. package/build/types/constants.d.ts.map +0 -1
  219. package/build/types/events.d.ts +0 -26
  220. package/build/types/events.d.ts.map +0 -1
  221. package/build/types/hooks/useAudioDevices.d.ts +0 -15
  222. package/build/types/hooks/useAudioDevices.d.ts.map +0 -1
  223. package/build/types/index.d.ts +0 -18
  224. package/build/types/index.d.ts.map +0 -1
  225. package/build/types/trimAudio.d.ts +0 -25
  226. package/build/types/trimAudio.d.ts.map +0 -1
  227. package/build/types/useAudioRecorder.d.ts +0 -22
  228. package/build/types/useAudioRecorder.d.ts.map +0 -1
  229. package/build/types/utils/BlobFix.d.ts +0 -9
  230. package/build/types/utils/BlobFix.d.ts.map +0 -1
  231. package/build/types/utils/audioProcessing.d.ts +0 -24
  232. package/build/types/utils/audioProcessing.d.ts.map +0 -1
  233. package/build/types/utils/cleanNativeOptions.d.ts +0 -15
  234. package/build/types/utils/cleanNativeOptions.d.ts.map +0 -1
  235. package/build/types/utils/concatenateBuffers.d.ts +0 -8
  236. package/build/types/utils/concatenateBuffers.d.ts.map +0 -1
  237. package/build/types/utils/convertPCMToFloat32.d.ts +0 -13
  238. package/build/types/utils/convertPCMToFloat32.d.ts.map +0 -1
  239. package/build/types/utils/crc32.d.ts +0 -7
  240. package/build/types/utils/crc32.d.ts.map +0 -1
  241. package/build/types/utils/encodingToBitDepth.d.ts +0 -5
  242. package/build/types/utils/encodingToBitDepth.d.ts.map +0 -1
  243. package/build/types/utils/getWavFileInfo.d.ts +0 -26
  244. package/build/types/utils/getWavFileInfo.d.ts.map +0 -1
  245. package/build/types/utils/writeWavHeader.d.ts +0 -34
  246. package/build/types/utils/writeWavHeader.d.ts.map +0 -1
  247. package/build/types/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  248. package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  249. package/build/types/workers/inlineAudioWebWorker.web.d.ts +0 -2
  250. package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  251. package/expo-module.config.json +0 -10
  252. package/ios/AudioAnalysisData.swift +0 -74
  253. package/ios/AudioDeviceManager.swift +0 -670
  254. package/ios/AudioNotificationManager.swift +0 -154
  255. package/ios/AudioProcessingHelpers.swift +0 -743
  256. package/ios/AudioProcessor.swift +0 -1151
  257. package/ios/AudioStreamError.swift +0 -7
  258. package/ios/AudioStreamManager.swift +0 -2369
  259. package/ios/AudioStreamManagerDelegate.swift +0 -16
  260. package/ios/DataPoint.swift +0 -54
  261. package/ios/DecodingConfig.swift +0 -59
  262. package/ios/ExpoAudioStream.podspec +0 -33
  263. package/ios/ExpoAudioStreamModule.swift +0 -1019
  264. package/ios/ExpoAudioStudioTests/AudioFileHandlerTests.swift +0 -338
  265. package/ios/ExpoAudioStudioTests/AudioFormatUtilsTests.swift +0 -331
  266. package/ios/ExpoAudioStudioTests/AudioTestHelpers.swift +0 -130
  267. package/ios/ExpoAudioStudioTests/CompressedOnlyOutputTests.swift +0 -294
  268. package/ios/ExpoAudioStudioTests/EventEmissionIntervalTests.swift +0 -105
  269. package/ios/ExpoAudioStudioTests/Info.plist +0 -22
  270. package/ios/ExpoAudioStudioTests/README.md +0 -39
  271. package/ios/ExpoAudioStudioTests/SimpleAudioTest.swift +0 -98
  272. package/ios/ExpoAudioStudioTests/TestAudioGenerator.swift +0 -75
  273. package/ios/FFT.swift +0 -62
  274. package/ios/Features.swift +0 -95
  275. package/ios/ISSUE_IOS.md +0 -68
  276. package/ios/Logger.swift +0 -39
  277. package/ios/NotificationExtension.swift +0 -15
  278. package/ios/RecordingResult.swift +0 -22
  279. package/ios/RecordingSettings.swift +0 -308
  280. package/ios/WaveformExtractor.swift +0 -105
  281. package/ios/tests/README.md +0 -41
  282. package/ios/tests/integration/buffer_and_fallback_test.swift +0 -178
  283. package/ios/tests/integration/buffer_duration_test.swift +0 -185
  284. package/ios/tests/integration/compressed_only_output_test.swift +0 -271
  285. package/ios/tests/integration/output_control_test.swift +0 -322
  286. package/ios/tests/integration/run_integration_tests.sh +0 -37
  287. package/ios/tests/opus_support_test_macos.swift +0 -154
  288. package/ios/tests/standalone/audio_processing_test.swift +0 -144
  289. package/ios/tests/standalone/audio_recording_test.swift +0 -277
  290. package/ios/tests/standalone/audio_streaming_test.swift +0 -249
  291. package/ios/tests/standalone/standalone_test.swift +0 -144
  292. package/plugin/build/index.cjs +0 -194
  293. package/plugin/build/index.d.cts +0 -22
  294. package/plugin/build/index.js +0 -194
  295. package/plugin/src/index.ts +0 -285
  296. package/plugin/tsconfig.json +0 -10
  297. package/plugin/tsconfig.tsbuildinfo +0 -1
  298. package/src/AudioAnalysis/AudioAnalysis.types.ts +0 -224
  299. package/src/AudioAnalysis/extractAudioAnalysis.ts +0 -344
  300. package/src/AudioAnalysis/extractAudioData.ts +0 -17
  301. package/src/AudioAnalysis/extractMelSpectrogram.ts +0 -154
  302. package/src/AudioAnalysis/extractPreview.ts +0 -34
  303. package/src/AudioAnalysis/extractWaveform.ts +0 -22
  304. package/src/AudioDeviceManager.ts +0 -803
  305. package/src/AudioRecorder.provider.tsx +0 -57
  306. package/src/ExpoAudioStream.native.ts +0 -6
  307. package/src/ExpoAudioStream.types.ts +0 -874
  308. package/src/ExpoAudioStream.web.ts +0 -905
  309. package/src/ExpoAudioStreamModule.ts +0 -990
  310. package/src/WebRecorder.web.ts +0 -1005
  311. package/src/constants/platformLimitations.ts +0 -118
  312. package/src/constants.ts +0 -18
  313. package/src/events.ts +0 -60
  314. package/src/hooks/useAudioDevices.ts +0 -213
  315. package/src/index.ts +0 -54
  316. package/src/trimAudio.ts +0 -94
  317. package/src/types/crc-32.d.ts +0 -9
  318. package/src/useAudioRecorder.tsx +0 -766
  319. package/src/utils/BlobFix.ts +0 -561
  320. package/src/utils/audioProcessing.ts +0 -205
  321. package/src/utils/cleanNativeOptions.ts +0 -18
  322. package/src/utils/concatenateBuffers.ts +0 -24
  323. package/src/utils/convertPCMToFloat32.ts +0 -170
  324. package/src/utils/crc32.ts +0 -59
  325. package/src/utils/encodingToBitDepth.ts +0 -18
  326. package/src/utils/getWavFileInfo.ts +0 -132
  327. package/src/utils/writeWavHeader.ts +0 -115
  328. package/src/workers/InlineFeaturesExtractor.web.tsx +0 -855
  329. package/src/workers/inlineAudioWebWorker.web.tsx +0 -180
@@ -1,2237 +0,0 @@
1
- package net.siteed.audiostream
2
-
3
- import java.nio.ByteBuffer
4
- import java.nio.ByteOrder
5
- import kotlin.math.*
6
- import android.util.Log
7
- import java.io.File
8
- import java.util.concurrent.atomic.AtomicLong
9
- import kotlin.system.measureTimeMillis
10
- import android.media.MediaExtractor
11
- import android.media.MediaFormat
12
- import android.media.MediaCodec
13
- import java.io.FileInputStream
14
- import java.io.RandomAccessFile
15
- import java.util.zip.CRC32
16
- import net.siteed.audiostream.LogUtils
17
-
18
- data class DecodingConfig(
19
- val targetSampleRate: Int? = null, // Optional target sample rate
20
- val targetChannels: Int? = null, // Optional target number of channels
21
- val targetBitDepth: Int = 16, // Default to 16-bit PCM
22
- val normalizeAudio: Boolean = false // Whether to normalize audio levels
23
- )
24
-
25
- data class SpectrogramData(
26
- val spectrogram: Array<FloatArray>, // 2D array: [time, frequency]
27
- val timeStamps: FloatArray, // Time (in seconds) for each frame
28
- val frequencies: FloatArray // Frequencies (in Hz) for each mel bin
29
- )
30
-
31
- class AudioProcessor(private val filesDir: File) {
32
- companion object {
33
- const val DCT_SQRT_DIVISOR = 2.0
34
- private const val N_FFT = 1024
35
- private const val N_CHROMA = 12
36
- private const val CLASS_NAME = "AudioProcessor" // Add class name constant for logging
37
-
38
- private val uniqueIdCounter = AtomicLong(0L) // Keep as companion object property to maintain during pause/resume cycles
39
-
40
- fun resetUniqueIdCounter() {
41
- uniqueIdCounter.set(0L)
42
- }
43
- }
44
-
45
- data class AudioData(val data: ByteArray, val sampleRate: Int, val bitDepth: Int, val channels: Int, val durationMs: Long = 0)
46
-
47
- private var cumulativeMinAmplitude = Float.MAX_VALUE
48
- private var cumulativeMaxAmplitude = Float.NEGATIVE_INFINITY
49
-
50
- private fun loadAudioFile(filePath: String): AudioData? {
51
- try {
52
- val fileUri = filePath.removePrefix("file://")
53
- LogUtils.d(CLASS_NAME, "Processing WAV file: $fileUri")
54
-
55
- val file = File(fileUri).takeIf { it.exists() } ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
56
- ?: run {
57
- LogUtils.e(CLASS_NAME, "File not found: $fileUri")
58
- return null
59
- }
60
-
61
- val raf = RandomAccessFile(file, "r")
62
- val fileSize = raf.length()
63
-
64
- // Read RIFF header
65
- val riffHeader = ByteArray(4).apply { raf.readFully(this) }
66
- if (String(riffHeader) != "RIFF") {
67
- LogUtils.e(CLASS_NAME, "Invalid RIFF header")
68
- return null
69
- }
70
-
71
- // Read WAVE header
72
- val waveHeader = ByteArray(4).apply { raf.readFully(this) }
73
- if (String(waveHeader) != "WAVE") {
74
- LogUtils.e(CLASS_NAME, "Invalid WAVE header")
75
- return null
76
- }
77
-
78
- var fmtChunkFound = false
79
- var dataChunkFound = false
80
- var sampleRate = 0
81
- var channels = 0
82
- var bitDepth = 0
83
- var dataOffset = 0L
84
- var dataSize = 0L
85
-
86
- // Parse chunks
87
- while (raf.filePointer < fileSize - 8) {
88
- val chunkId = ByteArray(4).apply { raf.readFully(this) }.toString(Charsets.UTF_8)
89
- val chunkSizeBytes = ByteArray(4).apply { raf.readFully(this) }
90
- val chunkSize = ByteBuffer.wrap(chunkSizeBytes).order(ByteOrder.LITTLE_ENDIAN).int.toLong() and 0xFFFFFFFFL
91
-
92
- LogUtils.d(CLASS_NAME, "Found chunk: $chunkId ($chunkSize bytes)")
93
-
94
- when (chunkId) {
95
- "fmt " -> {
96
- if (chunkSize < 16) {
97
- LogUtils.e(CLASS_NAME, "Invalid fmt chunk size")
98
- return null
99
- }
100
-
101
- val formatData = ByteArray(16)
102
- raf.readFully(formatData)
103
- val formatBuffer = ByteBuffer.wrap(formatData).order(ByteOrder.LITTLE_ENDIAN)
104
-
105
- val audioFormat = formatBuffer.short // Skip audio format
106
- channels = formatBuffer.short.toInt() and 0xFFFF
107
- sampleRate = formatBuffer.int
108
- val byteRate = formatBuffer.int
109
- val blockAlign = formatBuffer.short
110
- bitDepth = formatBuffer.short.toInt() and 0xFFFF
111
-
112
- LogUtils.d(CLASS_NAME, "Raw format data: ${formatData.joinToString(", ")}")
113
- LogUtils.d(CLASS_NAME, "Format chunk: audioFormat=$audioFormat, channels=$channels, sampleRate=$sampleRate, bitDepth=$bitDepth, byteRate=$byteRate, blockAlign=$blockAlign")
114
-
115
- if (bitDepth !in listOf(8, 16, 32)) {
116
- LogUtils.e(CLASS_NAME, "Invalid bit depth: $bitDepth")
117
- return null
118
- }
119
-
120
- val remainingFmtBytes = chunkSize - 16
121
- if (remainingFmtBytes > 0) {
122
- raf.skipBytes(remainingFmtBytes.toInt())
123
- }
124
- fmtChunkFound = true
125
- }
126
- "data" -> {
127
- dataOffset = raf.filePointer
128
- dataSize = chunkSize
129
- dataChunkFound = true
130
- break
131
- }
132
- else -> {
133
- // Skip unknown chunks
134
- val skipBytes = chunkSize
135
- if (skipBytes > 0) {
136
- val actualSkip = minOf(skipBytes, fileSize - raf.filePointer)
137
- raf.seek(raf.filePointer + actualSkip)
138
- }
139
- }
140
- }
141
- }
142
-
143
- if (!fmtChunkFound || !dataChunkFound) {
144
- LogUtils.e(CLASS_NAME, "Missing essential chunks (fmt=$fmtChunkFound, data=$dataChunkFound)")
145
- return null
146
- }
147
-
148
- // Calculate actual data size if it seems wrong
149
- if (dataSize <= 0 || dataSize > fileSize - dataOffset) {
150
- dataSize = fileSize - dataOffset
151
- LogUtils.d(CLASS_NAME, "Adjusted data size to: $dataSize")
152
- }
153
-
154
- LogUtils.d(CLASS_NAME, "Reading PCM data: offset=$dataOffset, size=$dataSize")
155
-
156
- val wavData = ByteArray(dataSize.toInt())
157
- raf.seek(dataOffset)
158
- raf.readFully(wavData)
159
-
160
- // Calculate duration in ms
161
- // Each sample is bitsPerSample/8 bytes, and we have 'channels' samples per frame
162
- val bytesPerFrame = channels * (bitDepth / 8)
163
- val numFrames = wavData.size / bytesPerFrame
164
- val durationMs = (numFrames * 1000L) / sampleRate
165
-
166
- LogUtils.d(CLASS_NAME, "WAV duration calculation: size=${wavData.size}, bytesPerFrame=$bytesPerFrame, numFrames=$numFrames, sampleRate=$sampleRate, duration=${durationMs}ms")
167
-
168
- return AudioData(
169
- data = wavData,
170
- sampleRate = sampleRate,
171
- channels = channels,
172
- bitDepth = bitDepth,
173
- durationMs = durationMs
174
- )
175
- } catch (e: Exception) {
176
- LogUtils.e(CLASS_NAME, "Failed to load WAV file: ${e.message}", e)
177
- return null
178
- }
179
- }
180
-
181
- /**
182
- * Processes the audio data and extracts features.
183
- * @param data The audio data in bytes.
184
- * @param config The recording configuration.
185
- * @return AudioAnalysisData containing the extracted features.
186
- */
187
- fun processAudioData(data: ByteArray, config: RecordingConfig): AudioAnalysisData {
188
- if (data.isEmpty()) {
189
- LogUtils.e(CLASS_NAME, "Received empty audio data")
190
- return AudioAnalysisData(
191
- segmentDurationMs = config.segmentDurationMs,
192
- durationMs = 0,
193
- bitDepth = 16,
194
- numberOfChannels = config.channels,
195
- sampleRate = config.sampleRate,
196
- samples = 0,
197
- dataPoints = emptyList(),
198
- amplitudeRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
199
- rmsRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
200
- extractionTimeMs = 0f,
201
- )
202
- }
203
-
204
- val sampleRate = config.sampleRate.toFloat()
205
- val bitDepth = when (config.encoding) {
206
- "pcm_8bit" -> 8
207
- "pcm_16bit" -> 16
208
- "pcm_32bit" -> 32
209
- else -> throw IllegalArgumentException("Unsupported encoding: ${config.encoding}")
210
- }
211
- val channelData = convertToFloatArray(data, bitDepth)
212
- val featureOptions = config.features
213
-
214
- val totalSamples = channelData.size
215
- // Update samplesPerSegment calculation to use proper formula
216
- val samplesPerSegment = ((config.segmentDurationMs / 1000.0) * sampleRate).toInt()
217
- val totalPoints = ceil(totalSamples.toDouble() / samplesPerSegment).toInt()
218
-
219
- LogUtils.d(CLASS_NAME, "Extracting waveform totalSize=${data.size} with $totalSamples samples --> $totalPoints points")
220
- LogUtils.d(CLASS_NAME, "segmentDuration: ${config.segmentDurationMs}ms, samplesPerSegment: $samplesPerSegment")
221
-
222
- // Remove expectedPoints calculation since it used pointsPerSecond
223
- val samplesPerPoint = ceil(channelData.size / totalPoints.toDouble()).toInt()
224
- LogUtils.d(CLASS_NAME, "Extracting waveform with samplesPerPoints=$samplesPerPoint")
225
-
226
- val dataPoints = mutableListOf<DataPoint>()
227
- var minAmplitude = Float.MAX_VALUE
228
- var maxAmplitude = Float.NEGATIVE_INFINITY
229
- var minRms = Float.MAX_VALUE
230
- var maxRms = Float.NEGATIVE_INFINITY
231
- // Calculate total duration in milliseconds based on sample rate and total samples
232
- val durationMs = (totalSamples.toFloat() / sampleRate * 1000).toInt()
233
-
234
- // Measure the time taken for audio processing
235
- val extractionTimeMs = measureTimeMillis {
236
- for (i in 0 until totalPoints) {
237
- val start = i * samplesPerSegment
238
- val end = min(start + samplesPerSegment, totalSamples)
239
- val segmentData = channelData.sliceArray(start until end)
240
-
241
- var sumSquares = 0f
242
- var zeroCrossings = 0
243
- var prevValue = 0f
244
- var localMinAmplitude = Float.MAX_VALUE
245
- var localMaxAmplitude = Float.MIN_VALUE
246
-
247
- for (value in segmentData) {
248
- sumSquares += value * value
249
- if (prevValue != 0f && value * prevValue < 0) zeroCrossings += 1
250
- prevValue = value
251
-
252
- val absValue = abs(value)
253
- localMinAmplitude = min(localMinAmplitude, absValue)
254
- localMaxAmplitude = max(localMaxAmplitude, absValue)
255
- }
256
-
257
- val features = computeFeatures(
258
- segmentData = segmentData,
259
- sampleRate = sampleRate,
260
- sumSquares = sumSquares,
261
- zeroCrossings = zeroCrossings,
262
- segmentLength = segmentData.size,
263
- featureOptions = featureOptions,
264
- minAmplitude = localMinAmplitude,
265
- maxAmplitude = localMaxAmplitude
266
- )
267
- val rms = features.rms
268
- val silent = rms < 0.01
269
- val dB = 20 * log10(rms.toDouble()).toFloat()
270
- minAmplitude = min(minAmplitude, localMinAmplitude)
271
- maxAmplitude = max(maxAmplitude, localMaxAmplitude)
272
- minRms = min(minRms, rms)
273
- maxRms = max(maxRms, rms)
274
-
275
- val bytesPerSample = bitDepth / 8
276
- val startPosition = start * bytesPerSample * config.channels
277
- val endPosition = end * bytesPerSample * config.channels
278
-
279
- // Update cumulative amplitude range
280
- cumulativeMinAmplitude = min(cumulativeMinAmplitude, localMinAmplitude)
281
- cumulativeMaxAmplitude = max(cumulativeMaxAmplitude, localMaxAmplitude)
282
-
283
- val dataPoint = DataPoint(
284
- id = uniqueIdCounter.getAndIncrement(),
285
- amplitude = localMaxAmplitude, // Always use peak amplitude
286
- rms = rms, // Always include RMS
287
- dB = dB,
288
- silent = silent,
289
- features = features,
290
- speech = SpeechFeatures(isActive = !silent),
291
- startTime = startPosition / (sampleRate * bytesPerSample * config.channels),
292
- endTime = endPosition / (sampleRate * bytesPerSample * config.channels),
293
- startPosition = startPosition,
294
- endPosition = endPosition,
295
- samples = segmentData.size
296
- )
297
-
298
- dataPoints.add(dataPoint)
299
- }
300
- }
301
-
302
- return AudioAnalysisData(
303
- segmentDurationMs = config.segmentDurationMs,
304
- durationMs = durationMs,
305
- bitDepth = bitDepth,
306
- numberOfChannels = config.channels,
307
- sampleRate = config.sampleRate, // Use config.sampleRate instead of sampleRate
308
- samples = totalSamples, // Use totalSamples instead of samplesInRange
309
- dataPoints = dataPoints,
310
- amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
311
- rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
312
- extractionTimeMs = extractionTimeMs.toFloat()
313
- )
314
- }
315
-
316
- fun resetCumulativeAmplitudeRange() {
317
- cumulativeMinAmplitude = Float.MAX_VALUE
318
- cumulativeMaxAmplitude = Float.MIN_VALUE
319
- }
320
-
321
- /**
322
- * Converts the audio data to a float array.
323
- * @param data The audio data in bytes.
324
- * @param bitDepth The bit depth of the audio data.
325
- * @return The converted float array.
326
- */
327
- private fun convertToFloatArray(data: ByteArray, bitDepth: Int): FloatArray {
328
- return when (bitDepth) {
329
- 16 -> {
330
- val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
331
- val array = ShortArray(buffer.remaining())
332
- buffer.get(array)
333
- array.map { it / 32768.0f }.toFloatArray()
334
- }
335
- 8 -> data.map { (it.toInt() - 128) / 128.0f }.toFloatArray()
336
- 32 -> {
337
- val buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
338
- val array = IntArray(buffer.remaining())
339
- buffer.get(array)
340
- array.map { it / Int.MAX_VALUE.toFloat() }.toFloatArray()
341
- }
342
- else -> throw IllegalArgumentException("Unsupported bit depth: $bitDepth")
343
- }
344
- }
345
-
346
- /**
347
- * Computes the features of the audio data.
348
- * @param segmentData The segment data.
349
- * @param sampleRate The sample rate of the audio data.
350
- * @param minAmplitude The minimum amplitude.
351
- * @param maxAmplitude The maximum amplitude.
352
- * @param sumSquares The sum of squares.
353
- * @param zeroCrossings The zero crossings.
354
- * @param segmentLength The length of the segment.
355
- * @param featureOptions The feature options to compute.
356
- * @return The computed features.
357
- */
358
- private fun computeFeatures(
359
- segmentData: FloatArray,
360
- sampleRate: Float,
361
- minAmplitude: Float,
362
- maxAmplitude: Float,
363
- sumSquares: Float,
364
- zeroCrossings: Int,
365
- segmentLength: Int,
366
- featureOptions: Map<String, Boolean>
367
- ): Features {
368
- val rms = sqrt(sumSquares / segmentLength)
369
- val energy = if (featureOptions["energy"] == true) sumSquares else 0f
370
- val zcr = if (featureOptions["zcr"] == true) zeroCrossings / segmentLength.toFloat() else 0f
371
-
372
- val mfcc = try {
373
- if (featureOptions["mfcc"] == true) computeMFCC(segmentData, sampleRate) else emptyList()
374
- } catch (e: Exception) {
375
- LogUtils.e(CLASS_NAME, "Failed to extract MFCC: ${e.message}", e)
376
- emptyList()
377
- }
378
-
379
- val melSpectrogram = try {
380
- if (featureOptions["melSpectrogram"] == true) computeMelSpectrogram(segmentData, sampleRate) else emptyList()
381
- } catch (e: Exception) {
382
- LogUtils.e(CLASS_NAME, "Failed to compute mel spectrogram: ${e.message}", e)
383
- emptyList()
384
- }
385
-
386
- val chroma = try {
387
- if (featureOptions["chromagram"] == true) computeChroma(segmentData, sampleRate) else emptyList()
388
- } catch (e: Exception) {
389
- LogUtils.e(CLASS_NAME, "Failed to compute chroma: ${e.message}", e)
390
- emptyList()
391
- }
392
-
393
- val spectralFeatures = if (featureOptions["spectralCentroid"] == true ||
394
- featureOptions["spectralFlatness"] == true ||
395
- featureOptions["spectralRollOff"] == true ||
396
- featureOptions["spectralBandwidth"] == true) {
397
- extractSpectralFeatures(segmentData, sampleRate)
398
- } else {
399
- SpectralFeatures()
400
- }
401
-
402
- val tempo = try {
403
- if (featureOptions["tempo"] == true) extractTempo(segmentData, sampleRate) else 0f
404
- } catch (e: Exception) {
405
- LogUtils.e(CLASS_NAME, "Failed to extract tempo: ${e.message}", e)
406
- 0f
407
- }
408
-
409
- val hnr = try {
410
- if (featureOptions["hnr"] == true) extractHNR(segmentData) else 0f
411
- } catch (e: Exception) {
412
- LogUtils.e(CLASS_NAME, "Failed to extract HNR: ${e.message}", e)
413
- 0f
414
- }
415
-
416
- val spectralContrast = try {
417
- if (featureOptions["spectralContrast"] == true) computeSpectralContrast(segmentData, sampleRate) else emptyList()
418
- } catch (e: Exception) {
419
- LogUtils.e(CLASS_NAME, "Failed to compute spectral contrast: ${e.message}", e)
420
- emptyList()
421
- }
422
-
423
- val tonnetz = try {
424
- if (featureOptions["tonnetz"] == true) computeTonnetz(segmentData, sampleRate) else emptyList()
425
- } catch (e: Exception) {
426
- LogUtils.e(CLASS_NAME, "Failed to compute tonnetz: ${e.message}", e)
427
- emptyList()
428
- }
429
-
430
- val pitch = if (featureOptions["pitch"] == true) estimatePitch(segmentData, sampleRate) else 0.0f
431
-
432
- val crc32Value = if (featureOptions["crc32"] == true) {
433
- val byteBuffer = ByteBuffer.allocate(segmentData.size * 4)
434
- .order(ByteOrder.LITTLE_ENDIAN)
435
- segmentData.forEach { value ->
436
- byteBuffer.putFloat(value)
437
- }
438
-
439
- val crc32 = CRC32()
440
- crc32.update(byteBuffer.array())
441
- crc32.value
442
- } else null
443
-
444
- return Features(
445
- energy = energy,
446
- mfcc = mfcc,
447
- rms = rms,
448
- minAmplitude = minAmplitude,
449
- maxAmplitude = maxAmplitude,
450
- zcr = zcr,
451
- spectralCentroid = spectralFeatures.centroid,
452
- spectralFlatness = spectralFeatures.flatness,
453
- spectralRollOff = spectralFeatures.rollOff,
454
- spectralBandwidth = spectralFeatures.bandwidth,
455
- tempo = tempo,
456
- hnr = hnr,
457
- melSpectrogram = melSpectrogram,
458
- chromagram = chroma,
459
- spectralContrast = spectralContrast,
460
- tonnetz = tonnetz,
461
- pitch = pitch,
462
- crc32 = crc32Value
463
- )
464
- }
465
-
466
- private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
467
- val hopLength = 512
468
- val frameLength = 2048
469
-
470
- // Compute onset strength signal using spectral flux
471
- val onsetEnvelope = mutableListOf<Float>()
472
- var previousSpectrum = FloatArray(frameLength / 2)
473
-
474
- // Process frames with spectral flux
475
- for (i in 0 until segmentData.size - frameLength step hopLength) {
476
- val frame = segmentData.slice(i until minOf(i + frameLength, segmentData.size)).toFloatArray()
477
- val fft = FFT(frameLength)
478
- val fftData = frame.copyOf(frameLength)
479
- fft.realForward(fftData)
480
-
481
- // Compute magnitude spectrum
482
- val magnitudes = FloatArray(frameLength / 2)
483
- for (j in magnitudes.indices) {
484
- val re = fftData[2 * j]
485
- val im = if (2 * j + 1 < fftData.size) fftData[2 * j + 1] else 0f
486
- magnitudes[j] = sqrt(re * re + im * im)
487
- }
488
-
489
- // Calculate spectral flux (sum of positive differences)
490
- var flux = 0f
491
- for (j in magnitudes.indices) {
492
- flux += maxOf(magnitudes[j] - previousSpectrum[j], 0f)
493
- }
494
- onsetEnvelope.add(flux)
495
- previousSpectrum = magnitudes
496
- }
497
-
498
- // Find peaks in onset envelope
499
- val peaks = mutableListOf<Int>()
500
- for (i in 1 until onsetEnvelope.size - 1) {
501
- if (onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1]) {
502
- peaks.add(i)
503
- }
504
- }
505
-
506
- // Calculate tempo from peak intervals
507
- return if (peaks.size > 1) {
508
- val intervals = peaks.zipWithNext { a, b -> b - a }
509
- val averageInterval = intervals.average().toFloat()
510
- 60f * sampleRate / (hopLength * averageInterval)
511
- } else {
512
- 120f // Default tempo if no clear peaks found
513
- }
514
- }
515
-
516
- private fun extractSpectralFeatures(samples: FloatArray, sampleRate: Float): SpectralFeatures {
517
- // FFT requires a fixed-size buffer (N_FFT). If our input is larger,
518
- // we'll analyze just the first N_FFT samples to prevent buffer overflow.
519
- // This is a common practice in audio analysis where we process chunks
520
- // of consistent size rather than variable-length segments.
521
- val windowed = if (samples.size > N_FFT) {
522
- // If samples are larger than FFT size, take the first N_FFT samples
523
- applyHannWindow(samples.copyOf(N_FFT))
524
- } else {
525
- applyHannWindow(samples)
526
- }
527
-
528
- // Create padded array for FFT, ensuring we don't exceed N_FFT size
529
- // Zero padding is automatic since FloatArray initializes with zeros
530
- val paddedSamples = FloatArray(N_FFT).also { padded ->
531
- windowed.copyInto(padded, 0, 0, minOf(windowed.size, N_FFT))
532
- }
533
-
534
- // Perform FFT
535
- val fft = FFT(N_FFT)
536
- fft.realForward(paddedSamples)
537
-
538
- // Calculate magnitude spectrum (only need first half due to symmetry)
539
- // Add 1 to include both DC (0 Hz) and Nyquist frequency components
540
- val magnitudeSpectrum = FloatArray(N_FFT / 2 + 1)
541
- for (i in 0 until N_FFT / 2) { // Since we're only going up to N_FFT/2, the check is unnecessary
542
- val re = paddedSamples[2 * i]
543
- val im = paddedSamples[2 * i + 1] // This will always be within bounds
544
- magnitudeSpectrum[i] = sqrt(re * re + im * im)
545
- }
546
- // Handle Nyquist frequency component separately
547
- magnitudeSpectrum[N_FFT / 2] = abs(paddedSamples[1])
548
-
549
- // Compute power spectrum for spectral flatness
550
- val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
551
-
552
- // Compute spectral features
553
- val centroid = computeSpectralCentroid(magnitudeSpectrum, sampleRate)
554
- val flatness = computeSpectralFlatness(powerSpectrum)
555
- val rollOff = computeSpectralRollOff(magnitudeSpectrum, sampleRate)
556
- val bandwidth = computeSpectralBandwidth(magnitudeSpectrum, sampleRate, centroid)
557
-
558
- return SpectralFeatures(
559
- centroid = centroid,
560
- flatness = flatness,
561
- rollOff = rollOff,
562
- bandwidth = bandwidth
563
- )
564
- }
565
-
566
- private fun computeSpectralCentroid(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
567
- val sum = magnitudeSpectrum.sum()
568
- if (sum == 0f) return 0f
569
-
570
- val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
571
- index * (sampleRate / N_FFT) * value
572
- }.sum()
573
-
574
- return weightedSum / sum
575
- }
576
-
577
- private fun computeSpectralFlatness(powerSpectrum: FloatArray): Float {
578
- // Calculate geometric mean using log-space to avoid numerical issues
579
- var sumLogValues = 0.0f
580
- for (value in powerSpectrum) {
581
- sumLogValues += ln(value + 1e-10f) // Add small epsilon to avoid log(0)
582
- }
583
- val geometricMean = exp(sumLogValues / powerSpectrum.size)
584
-
585
- // Calculate arithmetic mean
586
- val arithmeticMean = powerSpectrum.sum() / powerSpectrum.size
587
-
588
- return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
589
- }
590
-
591
- private fun computeSpectralRollOff(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
592
- val totalEnergy = magnitudeSpectrum.sum()
593
- var cumulativeEnergy = 0f
594
- val rollOffThreshold = totalEnergy * 0.85f
595
-
596
- for ((index, value) in magnitudeSpectrum.withIndex()) {
597
- cumulativeEnergy += value
598
- if (cumulativeEnergy >= rollOffThreshold) {
599
- return index * (sampleRate / N_FFT)
600
- }
601
- }
602
-
603
- return 0f
604
- }
605
-
606
- private fun computeSpectralBandwidth(
607
- magnitudeSpectrum: FloatArray,
608
- sampleRate: Float,
609
- centroid: Float
610
- ): Float {
611
- val sum = magnitudeSpectrum.sum()
612
- if (sum == 0f) return 0f
613
-
614
- // Match iOS frequency calculation
615
- val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
616
- val freq = index * sampleRate / (2 * magnitudeSpectrum.size)
617
- value * (freq - centroid).pow(2)
618
- }.sum()
619
-
620
- return sqrt(weightedSum / sum)
621
- }
622
-
623
- private data class SpectralFeatures(
624
- val centroid: Float = 0f,
625
- val flatness: Float = 0f,
626
- val rollOff: Float = 0f,
627
- val bandwidth: Float = 0f
628
- )
629
-
630
- /**
631
- * Resets the segment data.
632
- * @param sumSquaresUpdater Function to reset sum of squares.
633
- * @param zeroCrossingsUpdater Function to reset zero crossings.
634
- * @param localMinAmplitudeUpdater Function to reset local min amplitude.
635
- * @param localMaxAmplitudeUpdater Function to reset local max amplitude.
636
- * @param segmentData The segment data list to reset.
637
- */
638
- private fun resetSegmentData(
639
- sumSquaresUpdater: (Float) -> Unit,
640
- zeroCrossingsUpdater: (Int) -> Unit,
641
- localMinAmplitudeUpdater: (Float) -> Unit,
642
- localMaxAmplitudeUpdater: (Float) -> Unit,
643
- segmentData: MutableList<Float>
644
- ) {
645
- sumSquaresUpdater(0f)
646
- zeroCrossingsUpdater(0)
647
- localMinAmplitudeUpdater(Float.MAX_VALUE)
648
- localMaxAmplitudeUpdater(Float.MIN_VALUE)
649
- segmentData.clear()
650
- }
651
-
652
- /**
653
- * Computes the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
654
- */
655
- private fun computeMFCC(samples: FloatArray, sampleRate: Float): List<Float> {
656
- val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
657
- val melFilters = computeMelFilterbank(
658
- numFilters = 26,
659
- powerSpectrumSize = powerSpectrum.size,
660
- sampleRate = sampleRate
661
- )
662
-
663
- if (melFilters.any { it.size != powerSpectrum.size }) {
664
- LogUtils.e(CLASS_NAME, "Mel filter size (${melFilters[0].size}) does not match power spectrum size (${powerSpectrum.size})")
665
- return emptyList()
666
- }
667
-
668
- val melEnergies = FloatArray(26) { i ->
669
- var energy = 0f
670
- for (j in powerSpectrum.indices) {
671
- energy += powerSpectrum[j] * melFilters[i][j]
672
- }
673
- ln(maxOf(energy, 1e-10f))
674
- }
675
-
676
- val mfcc = FloatArray(13) { i ->
677
- var sum = 0f
678
- for (j in melEnergies.indices) {
679
- sum += melEnergies[j] * cos(PI * i * (2 * j + 1) / (2 * 26)).toFloat()
680
- }
681
- sum * sqrt(2f / 26)
682
- }
683
-
684
- return mfcc.toList()
685
- }
686
-
687
- /**
688
- * Computes the Mel filter bank.
689
- * @param numFilters The number of Mel filters.
690
- * @param powerSpectrumSize The size of the power spectrum.
691
- * @param sampleRate The sample rate of the audio data.
692
- * @return A list of Mel filters.
693
- */
694
- private fun computeMelFilterbank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): Array<FloatArray> {
695
- val fMin = 0f
696
- val fMax = sampleRate / 2
697
-
698
- // Convert Hz to Mel
699
- val melMin = hzToMel(fMin)
700
- val melMax = hzToMel(fMax)
701
-
702
- // Create equally spaced points in Mel scale
703
- val melPoints = FloatArray(numFilters + 2)
704
- val melStep = (melMax - melMin) / (numFilters + 1)
705
- for (i in melPoints.indices) {
706
- melPoints[i] = melMin + i * melStep
707
- }
708
-
709
- // Convert back to Hz
710
- val hzPoints = melPoints.map { melToHz(it) }
711
-
712
- // Convert to FFT bin numbers, clamping to valid range
713
- val bins = hzPoints.map { minOf((it * powerSpectrumSize / sampleRate).roundToInt(), powerSpectrumSize - 1) }.toList()
714
-
715
- // Create the filterbank matrix with size matching powerSpectrumSize
716
- val filterbank = Array(numFilters) { FloatArray(powerSpectrumSize) { 0f } }
717
-
718
- // Ensure safe access to bins by limiting the loop and checking boundaries
719
- for (i in 0 until numFilters) {
720
- if (i + 2 < bins.size) { // Check to prevent out-of-bounds access
721
- val startBin = bins[i]
722
- val centerBin = bins[i + 1]
723
- val endBin = bins[i + 2]
724
-
725
- // Left slope (ascending triangle)
726
- if (centerBin > startBin) {
727
- for (j in startBin until centerBin) {
728
- filterbank[i][j] = (j - startBin).toFloat() / (centerBin - startBin).toFloat()
729
- }
730
- }
731
- // Right slope (descending triangle)
732
- if (endBin > centerBin) {
733
- for (j in centerBin until endBin) {
734
- filterbank[i][j] = (endBin - j).toFloat() / (endBin - centerBin).toFloat()
735
- }
736
- }
737
- }
738
- }
739
-
740
- return filterbank
741
- }
742
-
743
- /**
744
- * Computes the Discrete Cosine Transform (DCT) of the log energies.
745
- * @param logEnergies The log energies.
746
- * @param numCoefficients The number of coefficients to compute.
747
- * @return A list of MFCC coefficients.
748
- */
749
- private fun computeDCT(logEnergies: List<Float>, numCoefficients: Int): List<Float> {
750
- val n = logEnergies.size
751
- val dct = FloatArray(numCoefficients)
752
-
753
- for (i in 0 until numCoefficients) {
754
- var sum = 0.0
755
- for (j in logEnergies.indices) {
756
- sum += logEnergies[j] * cos(PI * i * (j + 0.5) / n)
757
- }
758
- dct[i] = (sum / sqrt(DCT_SQRT_DIVISOR * n)).toFloat()
759
- }
760
-
761
- return dct.toList()
762
- }
763
-
764
- /**
765
- * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
766
- * @param segmentData The segment data.
767
- * @return The HNR.
768
- */
769
- private fun extractHNR(segmentData: FloatArray): Float {
770
- val frameSize = segmentData.size
771
- val autocorrelation = FloatArray(frameSize)
772
-
773
- // Compute the autocorrelation of the segment data
774
- for (i in segmentData.indices) {
775
- var sum = 0f
776
- for (j in 0 until frameSize - i) {
777
- sum += segmentData[j] * segmentData[j + i]
778
- }
779
- autocorrelation[i] = sum
780
- }
781
-
782
- // Find peaks with minimum prominence
783
- val maxAutocorrelation = autocorrelation.maxOrNull() ?: 0f
784
- val peaks = findPeaks(autocorrelation, minProminence = 0.1f * maxAutocorrelation)
785
-
786
- if (peaks.isNotEmpty()) {
787
- val firstPeakIndex = peaks.firstOrNull { it > 0 } ?: 0
788
- val harmonicEnergy = autocorrelation[firstPeakIndex]
789
- val noiseEnergy = autocorrelation[0] - harmonicEnergy
790
- if (noiseEnergy > 0) {
791
- return 10 * log10(harmonicEnergy / noiseEnergy)
792
- }
793
- }
794
-
795
- return 0f
796
- }
797
-
798
- private fun findPeaks(data: FloatArray, minProminence: Float): List<Int> {
799
- val peaks = mutableListOf<Int>()
800
- for (i in 1 until data.size - 1) {
801
- if (data[i] > data[i - 1] && data[i] > data[i + 1]) {
802
- val prominence = data[i] - maxOf(data[i - 1], data[i + 1])
803
- if (prominence >= minProminence) {
804
- peaks.add(i)
805
- }
806
- }
807
- }
808
- return peaks
809
- }
810
-
811
- fun loadAudioFromAnyFormat(fileUri: String, decodingConfig: DecodingConfig? = null): AudioData? {
812
- val cleanUri = fileUri.removePrefix("file://")
813
- val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
814
- ?: run {
815
- LogUtils.e(CLASS_NAME, "File not found in any location: $cleanUri")
816
- return null
817
- }
818
-
819
- // First try MediaExtractor
820
- val extractor = MediaExtractor()
821
- try {
822
- LogUtils.d(CLASS_NAME, "Attempting MediaExtractor with path: ${file.absolutePath}")
823
- extractor.setDataSource(file.absolutePath)
824
-
825
- // Find the first audio track
826
- val audioTrackIndex = (0 until extractor.trackCount)
827
- .find { extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true }
828
-
829
- if (audioTrackIndex != null) {
830
- val format = extractor.getTrackFormat(audioTrackIndex)
831
- extractor.selectTrack(audioTrackIndex)
832
-
833
- // Get original audio properties
834
- val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
835
- val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
836
- val totalDurationUs = try {
837
- format.getLong(MediaFormat.KEY_DURATION)
838
- } catch (e: Exception) {
839
- (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
840
- }
841
- LogUtils.d(CLASS_NAME, "Raw duration from format: ${totalDurationUs}us")
842
-
843
- val totalDurationMs = totalDurationUs / 1000
844
- LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
845
-
846
- // Process using MediaExtractor
847
- val pcmData = decodeAudioToPCM(extractor, format)
848
- val processedData = if (decodingConfig != null) {
849
- processAudio(
850
- pcmData,
851
- originalSampleRate,
852
- decodingConfig.targetSampleRate,
853
- originalChannels,
854
- decodingConfig.targetChannels,
855
- decodingConfig.normalizeAudio
856
- )
857
- } else {
858
- pcmData
859
- }
860
-
861
- return AudioData(
862
- data = processedData,
863
- sampleRate = decodingConfig?.targetSampleRate ?: originalSampleRate,
864
- bitDepth = decodingConfig?.targetBitDepth ?: 16,
865
- channels = decodingConfig?.targetChannels ?: originalChannels,
866
- durationMs = totalDurationMs // Pass through the duration
867
- )
868
- }
869
- } catch (e: Exception) {
870
- LogUtils.d(CLASS_NAME, "MediaExtractor failed, attempting WAV parser: ${e.message}")
871
- } finally {
872
- extractor.release()
873
- }
874
-
875
- // If MediaExtractor failed and file is WAV, try WAV parser
876
- if (file.name.lowercase().endsWith(".wav")) {
877
- LogUtils.d(CLASS_NAME, "Falling back to WAV parser")
878
- return loadAudioFile(file.absolutePath)?.let { wavData ->
879
- if (decodingConfig != null) {
880
- val processedData = processAudio(
881
- wavData.data,
882
- wavData.sampleRate,
883
- decodingConfig.targetSampleRate,
884
- wavData.channels,
885
- decodingConfig.targetChannels,
886
- decodingConfig.normalizeAudio
887
- )
888
- AudioData(
889
- data = processedData,
890
- sampleRate = decodingConfig.targetSampleRate ?: wavData.sampleRate,
891
- bitDepth = decodingConfig.targetBitDepth,
892
- channels = decodingConfig.targetChannels ?: wavData.channels,
893
- durationMs = wavData.durationMs // Pass through the duration
894
- )
895
- } else {
896
- wavData
897
- }
898
- }
899
- }
900
-
901
- LogUtils.e(CLASS_NAME, "Failed to process audio file with both MediaExtractor and WAV parser")
902
- return null
903
- }
904
-
905
- private fun decodeAudioToPCM(extractor: MediaExtractor, format: MediaFormat): ByteArray {
906
- var decoder: MediaCodec? = null
907
-
908
- try {
909
- decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
910
- decoder.configure(format, null, null, 0)
911
- decoder.start()
912
-
913
- val info = MediaCodec.BufferInfo()
914
- val pcmData = mutableListOf<Byte>()
915
-
916
- var isEOS = false
917
- while (!isEOS) {
918
- val inputBufferId = decoder.dequeueInputBuffer(10000)
919
- if (inputBufferId >= 0) {
920
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
921
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
922
-
923
- if (sampleSize < 0) {
924
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
925
- isEOS = true
926
- } else {
927
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
928
- extractor.advance()
929
- }
930
- }
931
-
932
- val outputBufferId = decoder.dequeueOutputBuffer(info, 10000)
933
- if (outputBufferId >= 0) {
934
- val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
935
- val chunk = ByteArray(info.size)
936
- outputBuffer.get(chunk)
937
- pcmData.addAll(chunk.toList())
938
- decoder.releaseOutputBuffer(outputBufferId, false)
939
- }
940
- }
941
-
942
- return pcmData.toByteArray()
943
- } finally {
944
- try {
945
- decoder?.stop()
946
- } catch (e: Exception) {
947
- LogUtils.w(CLASS_NAME, "Error stopping decoder: ${e.message}")
948
- }
949
- try {
950
- decoder?.release()
951
- } catch (e: Exception) {
952
- LogUtils.w(CLASS_NAME, "Error releasing decoder: ${e.message}")
953
- }
954
- }
955
- }
956
-
957
- private fun resampleAudio(
958
- pcmData: ByteArray,
959
- originalSampleRate: Int,
960
- targetSampleRate: Int,
961
- originalChannels: Int
962
- ): ByteArray {
963
- // Convert byte array to short array (16-bit samples)
964
- val shortArray = ShortArray(pcmData.size / 2)
965
- ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shortArray)
966
-
967
- // Convert to mono if needed
968
- val monoShortArray = if (originalChannels > 1) {
969
- convertToMono(shortArray, originalChannels)
970
- } else {
971
- shortArray
972
- }
973
-
974
- // Resample
975
- val resampleRatio = targetSampleRate.toDouble() / originalSampleRate
976
- val newLength = (monoShortArray.size * resampleRatio).toInt()
977
- val resampledArray = ShortArray(newLength)
978
-
979
- for (i in resampledArray.indices) {
980
- val originalIndex = (i / resampleRatio).toInt()
981
- val nextIndex = minOf(originalIndex + 1, monoShortArray.size - 1)
982
- val fraction = (i / resampleRatio) - originalIndex
983
-
984
- // Linear interpolation
985
- val sample = linearInterpolate(
986
- monoShortArray[originalIndex].toDouble(),
987
- monoShortArray[nextIndex].toDouble(),
988
- fraction
989
- ).toInt().toShort()
990
-
991
- resampledArray[i] = sample
992
- }
993
-
994
- // Convert back to byte array
995
- val resultBuffer = ByteBuffer.allocate(resampledArray.size * 2)
996
- resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
997
- resultBuffer.asShortBuffer().put(resampledArray)
998
- return resultBuffer.array()
999
- }
1000
-
1001
- private fun convertToMono(stereoData: ShortArray, channels: Int): ShortArray {
1002
- val monoLength = stereoData.size / channels
1003
- val monoData = ShortArray(monoLength)
1004
-
1005
- for (i in 0 until monoLength) {
1006
- var sum = 0
1007
- for (ch in 0 until channels) {
1008
- sum += stereoData[i * channels + ch]
1009
- }
1010
- monoData[i] = (sum / channels).toShort()
1011
- }
1012
-
1013
- return monoData
1014
- }
1015
-
1016
- private fun linearInterpolate(a: Double, b: Double, fraction: Double): Double {
1017
- return a + fraction * (b - a)
1018
- }
1019
-
1020
- fun processAudio(
1021
- pcmData: ByteArray,
1022
- originalSampleRate: Int,
1023
- targetSampleRate: Int?,
1024
- originalChannels: Int,
1025
- targetChannels: Int?,
1026
- normalize: Boolean
1027
- ): ByteArray {
1028
- var processedData = pcmData
1029
-
1030
- // Only resample if target sample rate is explicitly specified and different
1031
- if (targetSampleRate != null && originalSampleRate != targetSampleRate) {
1032
- processedData = resampleAudio(processedData, originalSampleRate, targetSampleRate, originalChannels)
1033
- }
1034
-
1035
- // Only convert channels if target channels is explicitly specified and different
1036
- if (targetChannels != null && originalChannels != targetChannels) {
1037
- processedData = convertChannels(processedData, originalChannels, targetChannels)
1038
- }
1039
-
1040
- // Only normalize if explicitly requested
1041
- if (normalize) {
1042
- processedData = normalizeAudio(processedData)
1043
- }
1044
-
1045
- return processedData
1046
- }
1047
-
1048
- private fun normalizeAudio(pcmData: ByteArray): ByteArray {
1049
- val shorts = ShortArray(pcmData.size / 2)
1050
- ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1051
-
1052
- // Find maximum amplitude
1053
- var maxAmplitude = 0
1054
- for (sample in shorts) {
1055
- maxAmplitude = maxOf(maxAmplitude, abs(sample.toInt()))
1056
- }
1057
-
1058
- // Normalize if we found a non-zero maximum
1059
- if (maxAmplitude > 0) {
1060
- val normalizationFactor = Short.MAX_VALUE.toFloat() / maxAmplitude
1061
- for (i in shorts.indices) {
1062
- shorts[i] = (shorts[i] * normalizationFactor).toInt().toShort()
1063
- }
1064
- }
1065
-
1066
- // Convert back to bytes
1067
- val resultBuffer = ByteBuffer.allocate(shorts.size * 2)
1068
- resultBuffer.order(ByteOrder.LITTLE_ENDIAN)
1069
- resultBuffer.asShortBuffer().put(shorts)
1070
- return resultBuffer.array()
1071
- }
1072
-
1073
- private fun convertChannels(pcmData: ByteArray, originalChannels: Int, targetChannels: Int): ByteArray {
1074
- // Use the correct implementation from AudioFormatUtils
1075
- // Assuming 16-bit audio (which is the default for most audio processing)
1076
- return AudioFormatUtils.convertChannels(pcmData, originalChannels, targetChannels, 16)
1077
- }
1078
-
1079
- private fun debugWavHeader(file: File) {
1080
- try {
1081
- val bytes = ByteArray(44) // Standard WAV header size
1082
- RandomAccessFile(file, "r").use { raf ->
1083
- raf.readFully(bytes)
1084
- }
1085
-
1086
- LogUtils.d(CLASS_NAME, "WAV Header Bytes: ${bytes.joinToString(", ") { String.format("%02X", it) }}")
1087
- LogUtils.d(CLASS_NAME, "ASCII: ${bytes.map { it.toInt().toChar() }.joinToString("")}")
1088
-
1089
- val buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)
1090
- LogUtils.d(CLASS_NAME, """
1091
- RIFF header: ${String(bytes, 0, 4)}
1092
- File size: ${buffer.getInt(4)}
1093
- WAVE header: ${String(bytes, 8, 4)}
1094
- fmt header: ${String(bytes, 12, 4)}
1095
- Chunk size: ${buffer.getInt(16)}
1096
- Audio format: ${buffer.getShort(20)}
1097
- Channels: ${buffer.getShort(22)}
1098
- Sample rate: ${buffer.getInt(24)}
1099
- Byte rate: ${buffer.getInt(28)}
1100
- Block align: ${buffer.getShort(32)}
1101
- Bits per sample: ${buffer.getShort(34)}
1102
- """.trimIndent())
1103
- } catch (e: Exception) {
1104
- LogUtils.e(CLASS_NAME, "Failed to debug WAV header: ${e.message}", e)
1105
- }
1106
- }
1107
-
1108
- fun generatePreview(
1109
- audioData: AudioData,
1110
- numberOfPoints: Int,
1111
- startTimeMs: Long? = null,
1112
- endTimeMs: Long? = null,
1113
- config: RecordingConfig
1114
- ): AudioAnalysisData {
1115
- val totalDurationMs = audioData.durationMs
1116
-
1117
- LogUtils.d(CLASS_NAME, "Total audio duration: ${totalDurationMs}ms")
1118
-
1119
- // Validate time range
1120
- if (startTimeMs != null) {
1121
- require(startTimeMs >= 0) { "startTime must be non-negative, got: $startTimeMs" }
1122
- require(startTimeMs <= totalDurationMs) { "startTime ($startTimeMs) is beyond audio duration ($totalDurationMs)" }
1123
- }
1124
-
1125
- if (endTimeMs != null) {
1126
- require(endTimeMs >= 0) { "endTime must be non-negative, got: $endTimeMs" }
1127
- if (endTimeMs > totalDurationMs) {
1128
- LogUtils.w(CLASS_NAME, "endTime ($endTimeMs) is beyond audio duration ($totalDurationMs), clamping to duration")
1129
- }
1130
- if (startTimeMs != null) {
1131
- require(startTimeMs < endTimeMs) { "startTime ($startTimeMs) must be less than endTime ($endTimeMs)" }
1132
- }
1133
- }
1134
-
1135
- // Calculate effective range
1136
- val effectiveStartMs = startTimeMs ?: 0L
1137
- val effectiveEndMs = (endTimeMs ?: totalDurationMs).coerceAtMost(totalDurationMs)
1138
- val durationMs = effectiveEndMs - effectiveStartMs
1139
-
1140
- LogUtils.d(CLASS_NAME, "Preview range: ${effectiveStartMs}ms to ${effectiveEndMs}ms (${durationMs}ms)")
1141
-
1142
- // Calculate sample range
1143
- val startSampleIndex = ((effectiveStartMs * audioData.sampleRate) / 1000).toInt()
1144
- val endSampleIndex = ((effectiveEndMs * audioData.sampleRate) / 1000).toInt().coerceAtMost(audioData.data.size)
1145
- val samplesInRange = endSampleIndex - startSampleIndex
1146
-
1147
- if (samplesInRange <= 0) {
1148
- throw IllegalArgumentException("Invalid sample range: contains no samples")
1149
- }
1150
-
1151
- val samplesPerPoint = (samplesInRange / numberOfPoints).coerceAtLeast(1)
1152
- val pointsPerSecond = numberOfPoints.toDouble() / (durationMs.toDouble() / 1000.0)
1153
-
1154
- val dataPoints = mutableListOf<DataPoint>()
1155
- var minAmplitude = Float.MAX_VALUE
1156
- var maxAmplitude = Float.MIN_VALUE
1157
- var minRms = Float.MAX_VALUE // Add minRms
1158
- var maxRms = Float.MIN_VALUE // Add maxRms
1159
-
1160
- val extractionTimeMs = measureTimeMillis {
1161
- for (i in 0 until numberOfPoints) {
1162
- val pointStartSample = startSampleIndex + (i * samplesPerPoint)
1163
- val pointEndSample = minOf(startSampleIndex + ((i + 1) * samplesPerPoint), endSampleIndex)
1164
-
1165
- if (pointStartSample >= pointEndSample) break
1166
-
1167
- try {
1168
- val segmentBytes = audioData.data.sliceArray(pointStartSample until pointEndSample)
1169
-
1170
- // Convert PCM bytes to float samples with proper bit depth handling
1171
- val segmentData = when (audioData.bitDepth) {
1172
- 16 -> convert16BitPcmToFloat(segmentBytes)
1173
- 32 -> convert32BitPcmToFloat(segmentBytes)
1174
- else -> convert8BitPcmToFloat(segmentBytes)
1175
- }
1176
-
1177
- // Calculate time points based on actual sample rate
1178
- val startTimePoint = ((pointStartSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
1179
- val endTimePoint = ((pointEndSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
1180
-
1181
- val rms = sqrt(segmentData.map { it * it }.average().toFloat())
1182
- val amplitude = segmentData.maxOf { abs(it) } // Always use peak amplitude
1183
-
1184
- minAmplitude = minOf(minAmplitude, amplitude)
1185
- maxAmplitude = maxOf(maxAmplitude, amplitude)
1186
- minRms = minOf(minRms, rms)
1187
- maxRms = maxOf(maxRms, rms)
1188
-
1189
- dataPoints.add(DataPoint(
1190
- id = i.toLong(),
1191
- amplitude = amplitude, // Peak amplitude
1192
- rms = rms, // RMS value
1193
- dB = 20 * log10(amplitude.toDouble()).toFloat(),
1194
- silent = amplitude < 0.01,
1195
- features = null,
1196
- speech = null,
1197
- startTime = startTimePoint,
1198
- endTime = endTimePoint,
1199
- startPosition = pointStartSample,
1200
- endPosition = pointEndSample,
1201
- samples = segmentData.size
1202
- ))
1203
- } catch (e: Exception) {
1204
- LogUtils.e(CLASS_NAME, "Error processing segment $i: ${e.message}")
1205
- throw IllegalStateException("Failed to process audio segment: ${e.message}", e)
1206
- }
1207
- }
1208
- }
1209
-
1210
- if (dataPoints.isEmpty()) {
1211
- throw IllegalStateException("No data points were generated")
1212
- }
1213
-
1214
- return AudioAnalysisData(
1215
- segmentDurationMs = config.segmentDurationMs,
1216
- durationMs = durationMs.toInt(),
1217
- bitDepth = audioData.bitDepth,
1218
- numberOfChannels = audioData.channels,
1219
- sampleRate = audioData.sampleRate,
1220
- samples = samplesInRange,
1221
- dataPoints = dataPoints,
1222
- amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
1223
- rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
1224
- extractionTimeMs = extractionTimeMs.toFloat()
1225
- )
1226
- }
1227
-
1228
- // Add these conversion helpers
1229
- private fun convert16BitPcmToFloat(bytes: ByteArray): FloatArray {
1230
- val shorts = ShortArray(bytes.size / 2)
1231
- ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts)
1232
- return shorts.map { it.toFloat() / Short.MAX_VALUE }.toFloatArray()
1233
- }
1234
-
1235
- private fun convert32BitPcmToFloat(bytes: ByteArray): FloatArray {
1236
- val ints = IntArray(bytes.size / 4)
1237
- ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(ints)
1238
- return ints.map { it.toFloat() / Int.MAX_VALUE }.toFloatArray()
1239
- }
1240
-
1241
- private fun convert8BitPcmToFloat(bytes: ByteArray): FloatArray {
1242
- return bytes.map { (it.toInt() - 128).toFloat() / 127f }.toFloatArray()
1243
- }
1244
-
1245
- fun loadAudioRange(fileUri: String, startTimeMs: Long, endTimeMs: Long, config: DecodingConfig? = null): AudioData? {
1246
- try {
1247
- // Use default config if none provided
1248
- val effectiveConfig = config ?: DecodingConfig(
1249
- targetSampleRate = null,
1250
- targetChannels = null,
1251
- targetBitDepth = 16,
1252
- normalizeAudio = false
1253
- )
1254
-
1255
- // First check if it's a WAV file by extension
1256
- val isWavByExtension = fileUri.lowercase().endsWith(".wav")
1257
-
1258
- // Then verify WAV header if needed
1259
- val headerSize = if (isWavByExtension) {
1260
- getWavHeaderSize(fileUri)
1261
- } else null
1262
-
1263
- // If it's a WAV file (by extension and header verification)
1264
- return if (isWavByExtension && headerSize != null) {
1265
- LogUtils.d(CLASS_NAME, "Loading WAV range with header size: $headerSize bytes")
1266
- loadWavRange(fileUri, startTimeMs, endTimeMs, effectiveConfig, headerSize)
1267
- } else {
1268
- if (isWavByExtension) {
1269
- LogUtils.w(CLASS_NAME, "File has .wav extension but invalid header, falling back to compressed loader")
1270
- }
1271
- LogUtils.d(CLASS_NAME, "Loading compressed audio range")
1272
- loadCompressedAudioRange(fileUri, startTimeMs, endTimeMs, effectiveConfig)
1273
- }
1274
- } catch (e: Exception) {
1275
- LogUtils.e(CLASS_NAME, "Failed to load audio range: ${e.message}", e)
1276
- return null
1277
- }
1278
- }
1279
-
1280
- private fun loadWavRange(
1281
- fileUri: String,
1282
- startTimeMs: Long,
1283
- endTimeMs: Long,
1284
- config: DecodingConfig,
1285
- headerSize: Int
1286
- ): AudioData? {
1287
- try {
1288
- val file = File(fileUri.removePrefix("file://")).takeIf { it.exists() }
1289
- ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
1290
- ?: throw IllegalArgumentException("File not found: $fileUri")
1291
-
1292
- // Use existing method to get audio format
1293
- val format = getAudioFormat(fileUri) ?: throw IllegalArgumentException("Could not determine audio format")
1294
-
1295
- val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
1296
- val startByteOffset = ((startTimeMs * bytesPerSecond) / 1000).toInt()
1297
- val endByteOffset = ((endTimeMs * bytesPerSecond) / 1000).toInt()
1298
-
1299
- val startByte = headerSize + startByteOffset
1300
- val endByte = headerSize + endByteOffset
1301
-
1302
- LogUtils.d(CLASS_NAME, """
1303
- Loading WAV range:
1304
- - headerSize: $headerSize
1305
- - startByte: $startByte
1306
- - endByte: $endByte
1307
- - bytesPerSecond: $bytesPerSecond
1308
- """.trimIndent())
1309
-
1310
- var audioDataBytes = ByteArray((endByte - startByte).coerceAtLeast(0))
1311
- FileInputStream(file).use { fis ->
1312
- fis.skip(startByte.toLong())
1313
- fis.read(audioDataBytes)
1314
- }
1315
-
1316
- // Apply bit depth conversion if needed
1317
- var effectiveBitDepth = format.bitDepth
1318
- if (config.targetBitDepth != format.bitDepth) {
1319
- audioDataBytes = AudioFormatUtils.convertBitDepth(
1320
- audioDataBytes,
1321
- format.bitDepth,
1322
- config.targetBitDepth
1323
- )
1324
- effectiveBitDepth = config.targetBitDepth
1325
- LogUtils.d(CLASS_NAME, "Converted bit depth from ${format.bitDepth} to ${config.targetBitDepth}")
1326
- }
1327
-
1328
- return AudioData(
1329
- data = audioDataBytes,
1330
- sampleRate = format.sampleRate,
1331
- channels = format.channels,
1332
- bitDepth = effectiveBitDepth,
1333
- durationMs = endTimeMs - startTimeMs
1334
- )
1335
- } catch (e: Exception) {
1336
- LogUtils.e(CLASS_NAME, "Failed to load WAV range: ${e.message}", e)
1337
- return null
1338
- }
1339
- }
1340
-
1341
- private fun loadCompressedAudioRange(
1342
- fileUri: String,
1343
- startTimeMs: Long,
1344
- endTimeMs: Long,
1345
- config: DecodingConfig
1346
- ): AudioData? {
1347
- val extractor = MediaExtractor()
1348
- var decoder: MediaCodec? = null
1349
-
1350
- try {
1351
- extractor.setDataSource(fileUri.removePrefix("file://"))
1352
- val format = extractor.getTrackFormat(0)
1353
- extractor.selectTrack(0)
1354
-
1355
- val originalSampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
1356
- val originalChannels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
1357
- val totalDurationUs = try {
1358
- format.getLong(MediaFormat.KEY_DURATION)
1359
- } catch (e: Exception) {
1360
- (format.getString(MediaFormat.KEY_DURATION) ?: "-1").toLong()
1361
- }
1362
- LogUtils.d(CLASS_NAME, "Raw duration from format: ${totalDurationUs}us")
1363
-
1364
- val totalDurationMs = totalDurationUs / 1000
1365
- LogUtils.d(CLASS_NAME, "Final duration: ${totalDurationMs}ms")
1366
-
1367
- // Calculate valid time range
1368
- val validStartMs = startTimeMs.coerceIn(0, totalDurationMs) ?: 0
1369
- val validEndMs = endTimeMs.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
1370
- val effectiveDurationMs = validEndMs - validStartMs
1371
-
1372
- // Initialize decoder
1373
- decoder = MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
1374
- decoder.configure(format, null, null, 0)
1375
- decoder.start()
1376
-
1377
- // Seek to start position if needed
1378
- if (validStartMs > 0) {
1379
- extractor.seekTo(validStartMs * 1000, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
1380
- }
1381
-
1382
- // Calculate buffer sizes
1383
- val targetSampleRate = config.targetSampleRate ?: originalSampleRate
1384
- val targetChannels = config.targetChannels ?: originalChannels
1385
- val targetBitDepth = config.targetBitDepth ?: 16
1386
- val bytesPerSample = targetBitDepth / 8
1387
- val samplesPerSecond = targetSampleRate * targetChannels
1388
- val totalBytes = (effectiveDurationMs * samplesPerSecond * bytesPerSample) / 1000
1389
-
1390
- LogUtils.d(CLASS_NAME, """
1391
- Loading audio range:
1392
- - start: ${validStartMs}ms
1393
- - end: ${validEndMs}ms
1394
- - duration: ${effectiveDurationMs}ms
1395
- - bytes: $totalBytes
1396
- - format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
1397
- """.trimIndent())
1398
-
1399
- val outputBuffer = ByteBuffer.allocate(totalBytes.toInt())
1400
- val bufferInfo = MediaCodec.BufferInfo()
1401
- var isEOS = false
1402
-
1403
- while (!isEOS) {
1404
- // Handle input
1405
- val inputBufferId = decoder.dequeueInputBuffer(10000)
1406
- if (inputBufferId >= 0) {
1407
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
1408
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
1409
-
1410
- when {
1411
- sampleSize < 0 -> {
1412
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1413
- isEOS = true
1414
- }
1415
- extractor.sampleTime > validEndMs * 1000 -> {
1416
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM)
1417
- isEOS = true
1418
- }
1419
- else -> {
1420
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
1421
- extractor.advance()
1422
- }
1423
- }
1424
- }
1425
-
1426
- // Handle output
1427
- val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
1428
- if (outputBufferId >= 0) {
1429
- val decodedBuffer = decoder.getOutputBuffer(outputBufferId)!!
1430
- if (bufferInfo.size > 0) {
1431
- // Set buffer position and limit based on the decoded data
1432
- decodedBuffer.position(bufferInfo.offset)
1433
- decodedBuffer.limit(bufferInfo.offset + bufferInfo.size)
1434
-
1435
- // Copy decoded data to our output buffer
1436
- outputBuffer.put(decodedBuffer)
1437
- }
1438
- decoder.releaseOutputBuffer(outputBufferId, false)
1439
-
1440
- // Check if we've reached the end
1441
- if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
1442
- isEOS = true
1443
- }
1444
- }
1445
- }
1446
-
1447
- // Prepare the final byte array
1448
- outputBuffer.flip()
1449
- val audioData = ByteArray(outputBuffer.remaining())
1450
- outputBuffer.get(audioData)
1451
-
1452
- return AudioData(
1453
- data = audioData,
1454
- sampleRate = targetSampleRate,
1455
- channels = targetChannels,
1456
- bitDepth = targetBitDepth,
1457
- durationMs = endTimeMs - startTimeMs // Use the actual time range
1458
- ).also {
1459
- LogUtils.d(CLASS_NAME, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
1460
- }
1461
- } catch (e: Exception) {
1462
- LogUtils.e(CLASS_NAME, "Failed to load compressed audio range: ${e.message}", e)
1463
- return null
1464
- } finally {
1465
- decoder?.stop()
1466
- decoder?.release()
1467
- extractor.release()
1468
- }
1469
- }
1470
-
1471
- // Future audio editing methods
1472
- fun trimAudio(
1473
- fileUri: String,
1474
- startTimeMs: Long,
1475
- endTimeMs: Long,
1476
- config: DecodingConfig? = null,
1477
- outputFileName: String? = null
1478
- ): AudioData? {
1479
- try {
1480
- // Load the specified range
1481
- val audioData = loadAudioRange(fileUri, startTimeMs, endTimeMs, config ?: DecodingConfig())
1482
- ?: return null
1483
-
1484
- // Generate output filename if not provided
1485
- val outputFile = if (outputFileName != null) {
1486
- File(filesDir, outputFileName)
1487
- } else {
1488
- val timestamp = System.currentTimeMillis()
1489
- File(filesDir, "trimmed_${timestamp}.wav")
1490
- }
1491
-
1492
- val durationMs = (endTimeMs - startTimeMs).toInt()
1493
-
1494
- LogUtils.d(CLASS_NAME, """
1495
- Trimming audio:
1496
- - start: ${startTimeMs}ms
1497
- - end: ${endTimeMs}ms
1498
- - duration: ${durationMs}ms
1499
- - output: ${outputFile.name}
1500
- """.trimIndent())
1501
-
1502
- // Write WAV header
1503
- RandomAccessFile(outputFile, "rw").use { raf ->
1504
- // RIFF header
1505
- raf.write("RIFF".toByteArray())
1506
- val fileSize = audioData.data.size + 36 // File size minus RIFF header
1507
- raf.writeInt(fileSize)
1508
- raf.write("WAVE".toByteArray())
1509
-
1510
- // fmt chunk
1511
- raf.write("fmt ".toByteArray())
1512
- raf.writeInt(16) // Subchunk1Size (16 for PCM)
1513
- val formatBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1514
- formatBytes.putShort(1) // AudioFormat (1 for PCM)
1515
- raf.write(formatBytes.array())
1516
-
1517
- val channelsBytes = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
1518
- channelsBytes.putShort(audioData.channels.toShort())
1519
- raf.write(channelsBytes.array())
1520
-
1521
- val sampleRateBytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
1522
- sampleRateBytes.putInt(audioData.sampleRate)
1523
- raf.write(sampleRateBytes.array())
1524
-
1525
- val byteRate = audioData.sampleRate * audioData.channels * (audioData.bitDepth / 8)
1526
- raf.writeInt(byteRate) // ByteRate
1527
-
1528
- val blockAlign = audioData.channels * (audioData.bitDepth / 8)
1529
- raf.writeShort(blockAlign) // BlockAlign
1530
- raf.writeShort(audioData.bitDepth) // BitsPerSample
1531
-
1532
- // data chunk
1533
- raf.write("data".toByteArray())
1534
- raf.writeInt(audioData.data.size) // Subchunk2Size
1535
-
1536
- // Write audio data
1537
- raf.write(audioData.data)
1538
- }
1539
-
1540
- // Debug WAV header to verify
1541
- debugWavHeader(outputFile)
1542
-
1543
- // Return the trimmed audio data
1544
- return AudioData(
1545
- data = audioData.data,
1546
- sampleRate = audioData.sampleRate,
1547
- channels = audioData.channels,
1548
- bitDepth = audioData.bitDepth
1549
- )
1550
- } catch (e: Exception) {
1551
- LogUtils.e(CLASS_NAME, "Failed to trim audio: ${e.message}", e)
1552
- return null
1553
- }
1554
- }
1555
-
1556
- fun removeSection(
1557
- fileUri: String,
1558
- startTimeMs: Long,
1559
- endTimeMs: Long,
1560
- config: DecodingConfig? = null
1561
- ): AudioData? {
1562
- // TODO: Implement removing a section by concatenating before and after ranges
1563
- // This will use loadAudioRange to get two sections and join them
1564
- return null
1565
- }
1566
-
1567
- fun joinAudioSections(
1568
- sections: List<AudioData>,
1569
- config: DecodingConfig? = null
1570
- ): AudioData? {
1571
- // TODO: Implement joining multiple audio sections
1572
- // This will be used by removeSection and other future editing features
1573
- return null
1574
- }
1575
-
1576
- // Helper method for future editing features
1577
- private fun convertAudioFormat(
1578
- audioData: AudioData,
1579
- targetSampleRate: Int? = null,
1580
- targetChannels: Int? = null,
1581
- targetBitDepth: Int? = null
1582
- ): AudioData {
1583
- // TODO: Implement audio format conversion
1584
- // This will help ensure consistent format when joining sections
1585
- return audioData
1586
- }
1587
-
1588
- // Add new function to process entire file
1589
- fun processEntireFile(audioData: AudioData): Features {
1590
- val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
1591
-
1592
- // Compute basic features for the entire file
1593
- val sumSquares = samples.sumOf { it * it.toDouble() }.toFloat()
1594
- val segmentLength = samples.size
1595
- val zeroCrossings = countZeroCrossings(samples)
1596
- val minAmplitude = samples.minOrNull() ?: 0f
1597
- val maxAmplitude = samples.maxOrNull() ?: 0f
1598
-
1599
- // Use existing computeFeatures with the entire file as one segment
1600
- return computeFeatures(
1601
- segmentData = samples,
1602
- sampleRate = audioData.sampleRate.toFloat(),
1603
- sumSquares = sumSquares,
1604
- zeroCrossings = zeroCrossings,
1605
- segmentLength = segmentLength,
1606
- minAmplitude = minAmplitude,
1607
- maxAmplitude = maxAmplitude,
1608
- featureOptions = mapOf() // Dont compute complex features
1609
- )
1610
- }
1611
-
1612
- private fun countZeroCrossings(data: FloatArray): Int {
1613
- var crossings = 0
1614
- for (i in 1 until data.size) {
1615
- if (data[i - 1] * data[i] < 0) crossings++
1616
- }
1617
- return crossings
1618
- }
1619
-
1620
- private fun hzToMel(hz: Float): Float {
1621
- return 2595f * log10(1f + hz / 700f)
1622
- }
1623
-
1624
- private fun melToHz(mel: Float): Float {
1625
- return 700f * (10f.pow(mel / 2595f) - 1f)
1626
- }
1627
-
1628
- private fun applyHannWindow(samples: FloatArray): FloatArray {
1629
- val output = FloatArray(samples.size)
1630
- for (i in samples.indices) {
1631
- val multiplier = 0.5f * (1f - cos(2f * PI.toFloat() * i / (samples.size - 1)))
1632
- output[i] = samples[i] * multiplier
1633
- }
1634
- return output
1635
- }
1636
-
1637
- // Generate a Hann window of a specific size (new, avoids modifying applyHannWindow)
1638
- private fun generateHannWindow(size: Int): FloatArray {
1639
- return FloatArray(size) { i ->
1640
- 0.5f * (1f - cos(2f * PI.toFloat() * i / (size - 1)))
1641
- }
1642
- }
1643
-
1644
- // Main function to extract mel spectrogram
1645
- fun extractMelSpectrogram(
1646
- audioData: AudioData,
1647
- windowSizeMs: Float = 25f, // Default 25ms window
1648
- hopLengthMs: Float = 10f, // Default 10ms hop
1649
- nMels: Int = 128, // Number of mel bins
1650
- fftLength: Int = 2048, // FFT size
1651
- fMin: Float = 0f, // Minimum frequency
1652
- fMax: Float = audioData.sampleRate.toFloat() / 2, // Nyquist frequency
1653
- windowType: String = "hann", // Add parameter
1654
- logScaling: Boolean = true, // Apply log scaling
1655
- normalize: Boolean = false // Normalize output
1656
- ): SpectrogramData {
1657
- val sampleRate = audioData.sampleRate.toFloat()
1658
- val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
1659
-
1660
- // Convert ms to samples
1661
- val windowSizeSamples = (windowSizeMs * sampleRate / 1000).toInt()
1662
- val hopLengthSamples = (hopLengthMs * sampleRate / 1000).toInt()
1663
-
1664
-
1665
- val window = when (windowType.lowercase()) {
1666
- "hann" -> generateHannWindow(windowSizeSamples)
1667
- "hamming" -> FloatArray(windowSizeSamples) { i ->
1668
- 0.54f - 0.46f * cos(2f * PI.toFloat() * i / (windowSizeSamples - 1))
1669
- }
1670
- else -> throw IllegalArgumentException("Unsupported windowType: $windowType")
1671
- }
1672
-
1673
- // Compute STFT
1674
- val stft = computeSTFT(samples, fftLength, windowSizeSamples, hopLengthSamples, window)
1675
-
1676
- // Apply mel filterbank
1677
- val melSpectrogram = applyMelFilterbank(stft, sampleRate, nMels, fftLength, fMin, fMax)
1678
-
1679
- // Post-processing: log scaling and normalization
1680
- if (logScaling) {
1681
- for (i in melSpectrogram.indices) {
1682
- for (j in melSpectrogram[i].indices) {
1683
- melSpectrogram[i][j] = ln(max(1e-10f, melSpectrogram[i][j])).toFloat()
1684
- }
1685
- }
1686
- }
1687
- if (normalize) {
1688
- // Find min and max values across the entire spectrogram
1689
- var minVal = Float.MAX_VALUE
1690
- var maxVal = Float.MIN_VALUE
1691
-
1692
- for (i in melSpectrogram.indices) {
1693
- for (j in melSpectrogram[i].indices) {
1694
- val value = melSpectrogram[i][j]
1695
- if (value < minVal) minVal = value
1696
- if (value > maxVal) maxVal = value
1697
- }
1698
- }
1699
-
1700
- val range = maxVal - minVal
1701
- if (range > 0) {
1702
- for (i in melSpectrogram.indices) {
1703
- for (j in melSpectrogram[i].indices) {
1704
- melSpectrogram[i][j] = (melSpectrogram[i][j] - minVal) / range
1705
- }
1706
- }
1707
- }
1708
- }
1709
-
1710
- // Compute timestamps and frequencies for metadata
1711
- val numFrames = melSpectrogram.size
1712
- val timeStamps = FloatArray(numFrames) { it * hopLengthMs / 1000f }
1713
- val frequencies = melFrequencies(nMels, fMin, fMax)
1714
-
1715
- return SpectrogramData(melSpectrogram, timeStamps, frequencies)
1716
- }
1717
-
1718
- // Compute Short-Time Fourier Transform
1719
- private fun computeSTFT(
1720
- samples: FloatArray,
1721
- fftLength: Int,
1722
- windowSize: Int,
1723
- hopLength: Int,
1724
- window: FloatArray
1725
- ): Array<FloatArray> {
1726
- val fft = FFT(fftLength)
1727
- val numFrames = ((samples.size - windowSize) / hopLength) + 1
1728
- val stft = Array(numFrames) { FloatArray(fftLength / 2 + 1) }
1729
-
1730
- for (frameIdx in 0 until numFrames) {
1731
- val start = frameIdx * hopLength
1732
- val end = minOf(start + windowSize, samples.size)
1733
- val frame = FloatArray(fftLength) { 0f }
1734
-
1735
- // Extract and window the frame
1736
- for (i in start until end) {
1737
- frame[i - start] = samples[i] * window[i - start]
1738
- }
1739
-
1740
- // Compute FFT and power spectrum
1741
- val fftResult = fft.processSegment(frame)
1742
- for (i in 0 until fftLength / 2 + 1) {
1743
- // Check bounds before accessing array elements
1744
- val real = if (2 * i < fftResult.size) fftResult[2 * i] else 0f
1745
- val imag = if (2 * i + 1 < fftResult.size) fftResult[2 * i + 1] else 0f
1746
- stft[frameIdx][i] = real * real + imag * imag
1747
- }
1748
- }
1749
- return stft
1750
- }
1751
-
1752
- // Apply mel filterbank to STFT
1753
- private fun applyMelFilterbank(
1754
- stft: Array<FloatArray>,
1755
- sampleRate: Float,
1756
- nMels: Int,
1757
- fftLength: Int,
1758
- fMin: Float,
1759
- fMax: Float
1760
- ): Array<FloatArray> {
1761
- val numFrames = stft.size
1762
- val numBins = stft[0].size
1763
- val melFilters = createMelFilterbank(sampleRate, fftLength, nMels, fMin, fMax)
1764
- val melSpectrogram = Array(numFrames) { FloatArray(nMels) }
1765
-
1766
- for (frame in 0 until numFrames) {
1767
- for (melBin in 0 until nMels) {
1768
- var sum = 0f
1769
- for (bin in 0 until numBins) {
1770
- sum += stft[frame][bin] * melFilters[melBin][bin]
1771
- }
1772
- melSpectrogram[frame][melBin] = sum
1773
- }
1774
- }
1775
- return melSpectrogram
1776
- }
1777
-
1778
- // Create mel filterbank matrix
1779
- private fun createMelFilterbank(
1780
- sampleRate: Float,
1781
- fftLength: Int,
1782
- nMels: Int,
1783
- fMin: Float,
1784
- fMax: Float
1785
- ): Array<FloatArray> {
1786
- val freqs = FloatArray(fftLength / 2 + 1) { it * sampleRate / fftLength }
1787
- val melPoints = melFrequencies(nMels + 2, fMin, fMax)
1788
- val melFilters = Array(nMels) { FloatArray(fftLength / 2 + 1) }
1789
-
1790
- for (melIdx in 0 until nMels) {
1791
- val fLow = melPoints[melIdx]
1792
- val fCenter = melPoints[melIdx + 1]
1793
- val fHigh = melPoints[melIdx + 2]
1794
-
1795
- for (bin in freqs.indices) {
1796
- val freq = freqs[bin]
1797
- melFilters[melIdx][bin] = when {
1798
- freq < fLow || freq > fHigh -> 0f
1799
- freq <= fCenter -> (freq - fLow) / (fCenter - fLow)
1800
- else -> (fHigh - freq) / (fHigh - fCenter)
1801
- }
1802
- }
1803
- }
1804
- return melFilters
1805
- }
1806
-
1807
- // Generate mel-spaced frequencies
1808
- private fun melFrequencies(nMels: Int, fMin: Float, fMax: Float): FloatArray {
1809
- val melMin = hzToMel(fMin)
1810
- val melMax = hzToMel(fMax)
1811
- val melPoints = FloatArray(nMels) { i ->
1812
- val mel = melMin + i * (melMax - melMin) / (nMels - 1)
1813
- melToHz(mel)
1814
- }
1815
- return melPoints
1816
- }
1817
-
1818
- private fun computeMelSpectrogram(samples: FloatArray, sampleRate: Float): List<Float> {
1819
- val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
1820
- val melFilters = computeMelFilterbank(
1821
- numFilters = 128,
1822
- powerSpectrumSize = powerSpectrum.size,
1823
- sampleRate = sampleRate
1824
- )
1825
-
1826
- // Apply Mel filters to power spectrum
1827
- return melFilters.map { filter ->
1828
- var energy = 0f
1829
- for (j in powerSpectrum.indices) {
1830
- energy += powerSpectrum[j] * filter[j]
1831
- }
1832
- kotlin.math.ln(maxOf(energy, 1e-10f))
1833
- }
1834
- }
1835
-
1836
- private fun computeChroma(samples: FloatArray, sampleRate: Float): List<Float> {
1837
- val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
1838
- val chroma = FloatArray(N_CHROMA) { 0f }
1839
- val freqsPerBin = sampleRate / N_FFT
1840
-
1841
- for (i in 0 until N_FFT / 2) {
1842
- val freq = i * freqsPerBin
1843
- if (freq > 0) {
1844
- val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
1845
- if (pitchClass in 0..11) {
1846
- val magnitude = sqrt(magnitudeSpectrum[2 * i] * magnitudeSpectrum[2 * i] +
1847
- (if (2 * i + 1 < magnitudeSpectrum.size) magnitudeSpectrum[2 * i + 1] else 0f) *
1848
- magnitudeSpectrum[2 * i + 1])
1849
- chroma[pitchClass] += magnitude
1850
- }
1851
- }
1852
- }
1853
-
1854
- return chroma.toList()
1855
- }
1856
-
1857
- private fun computeSpectralContrast(samples: FloatArray, sampleRate: Float): List<Float> {
1858
- val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
1859
- // ... rest of spectral contrast computation using magnitudeSpectrum ...
1860
- // Implementation depends on your specific requirements
1861
- return emptyList() // Placeholder
1862
- }
1863
-
1864
- private fun computeTonnetz(samples: FloatArray, sampleRate: Float): List<Float> {
1865
- // First compute chroma features
1866
- val chroma = computeChroma(samples, sampleRate)
1867
-
1868
- // Tonnetz transformation matrix (6x12)
1869
- val tonnetzMatrix = arrayOf(
1870
- floatArrayOf(1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Perfect fifth
1871
- floatArrayOf(0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Minor third
1872
- floatArrayOf(0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f), // Major third
1873
- floatArrayOf(0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f), // Perfect fifth
1874
- floatArrayOf(0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 0f, 0f, 1f, 0f), // Minor third
1875
- floatArrayOf(1f, 0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f) // Major third
1876
- )
1877
-
1878
- // Compute tonnetz features
1879
- val tonnetz = mutableListOf<Float>()
1880
- for (row in tonnetzMatrix) {
1881
- var sum = 0f
1882
- for (i in row.indices) {
1883
- sum += row[i] * (chroma.getOrNull(i) ?: 0f)
1884
- }
1885
- tonnetz.add(sum)
1886
- }
1887
-
1888
- return tonnetz
1889
- }
1890
-
1891
- private fun nextPowerOfTwo(n: Int): Int {
1892
- var value = 1
1893
- while (value < n) {
1894
- value *= 2
1895
- }
1896
- return value
1897
- }
1898
-
1899
- private fun estimatePitch(segment: FloatArray, sampleRate: Float): Float {
1900
- if (segment.size < 2) return 0.0f
1901
-
1902
- // Apply Hann window
1903
- val windowed = applyHannWindow(segment)
1904
-
1905
- // Pad for FFT - ensure length is power of 2 and sufficient for autocorrelation
1906
- val fftLength = nextPowerOfTwo(segment.size * 2)
1907
- val padded = FloatArray(fftLength) // Initialize with zeros
1908
- windowed.copyInto(padded) // Copy windowed data into padded array
1909
-
1910
- // Perform forward FFT
1911
- val fft = FFT(fftLength)
1912
- try {
1913
- fft.realForward(padded)
1914
- } catch (e: Exception) {
1915
- LogUtils.e(CLASS_NAME, "FFT forward transform failed: ${e.message}")
1916
- return 0.0f
1917
- }
1918
-
1919
- // Compute power spectrum
1920
- val powerSpectrum = FloatArray(fftLength)
1921
- try {
1922
- // Handle DC and Nyquist components separately
1923
- powerSpectrum[0] = padded[0] * padded[0]
1924
- powerSpectrum[fftLength/2] = padded[1] * padded[1]
1925
-
1926
- // Handle remaining frequencies
1927
- for (i in 1 until fftLength/2) {
1928
- val re = padded[2 * i]
1929
- val im = padded[2 * i + 1]
1930
- powerSpectrum[i] = re * re + im * im
1931
- powerSpectrum[fftLength - i] = powerSpectrum[i] // Mirror for inverse FFT
1932
- }
1933
- } catch (e: Exception) {
1934
- LogUtils.e(CLASS_NAME, "Power spectrum computation failed: ${e.message}")
1935
- return 0.0f
1936
- }
1937
-
1938
- // Inverse FFT to get autocorrelation
1939
- val autocorrelation = FloatArray(fftLength)
1940
- try {
1941
- fft.realInverse(powerSpectrum, autocorrelation)
1942
- } catch (e: Exception) {
1943
- LogUtils.e(CLASS_NAME, "FFT inverse transform failed: ${e.message}")
1944
- return 0.0f
1945
- }
1946
-
1947
- // Normalize autocorrelation
1948
- val normFactor = 1.0f / autocorrelation[0] // Normalize by zero-lag autocorrelation
1949
- for (i in autocorrelation.indices) {
1950
- autocorrelation[i] *= normFactor
1951
- }
1952
-
1953
- // Find the first peak within pitch range (50-500 Hz)
1954
- val minLag = (sampleRate / 500.0f).toInt().coerceAtLeast(1)
1955
- val maxLag = (sampleRate / 50.0f).toInt().coerceAtMost(autocorrelation.size - 1)
1956
-
1957
- var maxCorr = -1.0f
1958
- var pitchLag = 0
1959
-
1960
- // Add peak picking criteria
1961
- val threshold = 0.3f // Correlation threshold
1962
- var isPeak = false
1963
-
1964
- for (lag in minLag..maxLag) {
1965
- if (lag > 0 && lag < autocorrelation.size - 1) {
1966
- // Check if this point is a peak
1967
- isPeak = autocorrelation[lag] > autocorrelation[lag - 1] &&
1968
- autocorrelation[lag] > autocorrelation[lag + 1] &&
1969
- autocorrelation[lag] > threshold
1970
-
1971
- if (isPeak && autocorrelation[lag] > maxCorr) {
1972
- maxCorr = autocorrelation[lag]
1973
- pitchLag = lag
1974
- }
1975
- }
1976
- }
1977
-
1978
- return if (pitchLag > 0) sampleRate / pitchLag else 0.0f
1979
- }
1980
-
1981
- /**
1982
- * Prepares FFT by applying Hann window, padding, and computing both power and magnitude spectra.
1983
- * @param samples Input audio samples
1984
- * @param sampleRate Sampling rate in Hz
1985
- * @param fftLength FFT size (must be power of 2)
1986
- * @return Pair of power spectrum and magnitude spectrum
1987
- */
1988
- private fun prepareFFT(samples: FloatArray, sampleRate: Float, fftLength: Int = nextPowerOfTwo(samples.size.coerceAtLeast(2048))): Pair<FloatArray, FloatArray> {
1989
- val windowed = applyHannWindow(samples)
1990
- val padded = windowed.copyOf(fftLength)
1991
- val fft = FFT(fftLength)
1992
- fft.realForward(padded)
1993
-
1994
- val magnitudeSpectrum = FloatArray(fftLength / 2 + 1)
1995
- for (i in 0 until fftLength / 2) {
1996
- val re = padded[2 * i]
1997
- val im = padded[2 * i + 1]
1998
- magnitudeSpectrum[i] = sqrt(re * re + im * im)
1999
- }
2000
- magnitudeSpectrum[fftLength / 2] = abs(padded[1])
2001
-
2002
- val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
2003
- return Pair(powerSpectrum, magnitudeSpectrum)
2004
- }
2005
-
2006
- data class AudioFormat(
2007
- val sampleRate: Int,
2008
- val channels: Int,
2009
- val bitDepth: Int
2010
- )
2011
-
2012
- fun getAudioFormat(fileUri: String): AudioFormat? {
2013
- val cleanUri = fileUri.removePrefix("file://")
2014
- val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
2015
- ?: run {
2016
- LogUtils.e(CLASS_NAME, "File not found: $cleanUri")
2017
- return null
2018
- }
2019
-
2020
- val extractor = MediaExtractor()
2021
- try {
2022
- extractor.setDataSource(file.absolutePath)
2023
- val format = extractor.getTrackFormat(0)
2024
- return AudioFormat(
2025
- sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE),
2026
- channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
2027
- bitDepth = 16 // Most compressed formats decode to 16-bit PCM
2028
- )
2029
- } catch (e: Exception) {
2030
- LogUtils.e(CLASS_NAME, "Failed to get audio format: ${e.message}", e)
2031
- return null
2032
- } finally {
2033
- extractor.release()
2034
- }
2035
- }
2036
-
2037
- /**
2038
- * Gets the size of the audio file header.
2039
- * For WAV files, this includes the RIFF header and all metadata chunks before the data chunk.
2040
- * For other formats, this will return null as header size handling is format-specific.
2041
- *
2042
- * @param fileUri The URI of the audio file to analyze
2043
- * @return The size of the header in bytes, or null if:
2044
- * - The file is not a WAV file
2045
- * - The file cannot be read
2046
- * - The file format is invalid
2047
- * - The data chunk cannot be found
2048
- *
2049
- * WAV File Structure:
2050
- * - RIFF header (12 bytes)
2051
- * - "RIFF" identifier (4 bytes)
2052
- * - File size (4 bytes)
2053
- * - "WAVE" identifier (4 bytes)
2054
- * - Format chunk ("fmt ") (24 bytes typically)
2055
- * - Optional metadata chunks (variable size)
2056
- * - LIST (metadata like artist, title)
2057
- * - JUNK (padding)
2058
- * - fact (additional format info)
2059
- * - cue (cue points)
2060
- * - Data chunk
2061
- * - "data" identifier (4 bytes)
2062
- * - Chunk size (4 bytes)
2063
- * - Actual audio data
2064
- */
2065
- fun getWavHeaderSize(fileUri: String): Int? {
2066
- val cleanUri = fileUri.removePrefix("file://")
2067
- val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
2068
- ?: run {
2069
- LogUtils.e(CLASS_NAME, "File not found: $cleanUri")
2070
- return null
2071
- }
2072
-
2073
- try {
2074
- val inputStream = FileInputStream(file)
2075
- val buffer = ByteArray(12) // Read RIFF header and chunk size
2076
-
2077
- // Read RIFF header
2078
- if (inputStream.read(buffer) != 12) {
2079
- LogUtils.e(CLASS_NAME, "Failed to read RIFF header")
2080
- return null
2081
- }
2082
-
2083
- // Verify RIFF header
2084
- if (String(buffer, 0, 4) != "RIFF" || String(buffer, 8, 4) != "WAVE") {
2085
- LogUtils.e(CLASS_NAME, "Invalid WAV file format")
2086
- return null
2087
- }
2088
-
2089
- var headerSize = 12
2090
- var chunkSize: Int
2091
-
2092
- // Read chunks until we find the data chunk
2093
- while (true) {
2094
- if (inputStream.read(buffer, 0, 8) != 8) {
2095
- LogUtils.e(CLASS_NAME, "Unexpected end of file while reading chunks")
2096
- break
2097
- }
2098
-
2099
- chunkSize = (buffer[7].toInt() and 0xFF shl 24) or
2100
- (buffer[6].toInt() and 0xFF shl 16) or
2101
- (buffer[5].toInt() and 0xFF shl 8) or
2102
- (buffer[4].toInt() and 0xFF)
2103
-
2104
- val chunkId = String(buffer, 0, 4)
2105
- LogUtils.d(CLASS_NAME, "Found chunk: $chunkId, size: $chunkSize")
2106
-
2107
- if (chunkId == "data") {
2108
- headerSize += 8 // Add chunk header size
2109
- LogUtils.d(CLASS_NAME, "Found data chunk at offset: $headerSize")
2110
- break
2111
- }
2112
-
2113
- headerSize += 8 + chunkSize // Add chunk header and data size
2114
- inputStream.skip(chunkSize.toLong()) // Skip chunk data
2115
- }
2116
-
2117
- inputStream.close()
2118
- LogUtils.d(CLASS_NAME, "Total WAV header size: $headerSize bytes")
2119
- return headerSize
2120
-
2121
- } catch (e: Exception) {
2122
- LogUtils.e(CLASS_NAME, "Error calculating WAV header size: ${e.message}", e)
2123
- return null
2124
- }
2125
- }
2126
-
2127
- /**
2128
- * Decodes a specific time range of an audio file directly to PCM data
2129
- * This is more efficient than decoding the entire file when only a portion is needed
2130
- */
2131
- fun decodeAudioRangeToPCM(fileUri: String, startTimeMs: Long, endTimeMs: Long): AudioData? {
2132
- val extractor = MediaExtractor()
2133
- var decoder: android.media.MediaCodec? = null
2134
-
2135
- try {
2136
- extractor.setDataSource(fileUri)
2137
- val trackIndex = (0 until extractor.trackCount).find {
2138
- extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
2139
- } ?: return null
2140
-
2141
- extractor.selectTrack(trackIndex)
2142
- val format = extractor.getTrackFormat(trackIndex)
2143
-
2144
- val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
2145
- val channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
2146
- decoder = android.media.MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
2147
- decoder.configure(format, null, null, 0)
2148
- decoder.start()
2149
-
2150
- extractor.seekTo(startTimeMs * 1000, MediaExtractor.SEEK_TO_PREVIOUS_SYNC)
2151
- val pcmData = mutableListOf<Byte>()
2152
- val bufferInfo = android.media.MediaCodec.BufferInfo()
2153
- var isEOS = false
2154
- var firstBufferTimeUs: Long? = null
2155
-
2156
- while (!isEOS) {
2157
- val inputBufferId = decoder.dequeueInputBuffer(10000)
2158
- if (inputBufferId >= 0) {
2159
- val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
2160
- val sampleSize = extractor.readSampleData(inputBuffer, 0)
2161
- if (sampleSize < 0 || extractor.sampleTime > endTimeMs * 1000) {
2162
- decoder.queueInputBuffer(inputBufferId, 0, 0, 0, android.media.MediaCodec.BUFFER_FLAG_END_OF_STREAM)
2163
- isEOS = true
2164
- } else {
2165
- decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
2166
- extractor.advance()
2167
- }
2168
- }
2169
-
2170
- val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
2171
- if (outputBufferId >= 0) {
2172
- val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
2173
- if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
2174
- val chunk = ByteArray(bufferInfo.size)
2175
- outputBuffer.get(chunk)
2176
- pcmData.addAll(chunk.toList())
2177
- decoder.releaseOutputBuffer(outputBufferId, false)
2178
- }
2179
- }
2180
-
2181
- // If we didn't get any data or first buffer time, return null
2182
- if (pcmData.isEmpty() || firstBufferTimeUs == null) {
2183
- return null
2184
- }
2185
-
2186
- // Trim PCM data to exact time range
2187
- val bytesPerSample = 2 // 16-bit PCM
2188
- val bytesPerFrame = bytesPerSample * channels
2189
- val samplesPerSecond = sampleRate * channels
2190
- val dt = 1_000_000.0 / sampleRate // Time per sample in microseconds
2191
-
2192
- val allSamples = java.nio.ByteBuffer.wrap(pcmData.toByteArray()).order(java.nio.ByteOrder.LITTLE_ENDIAN).asShortBuffer()
2193
- val totalSamples = allSamples.capacity()
2194
-
2195
- // Calculate sample indices for the exact time range
2196
- val startSample = ((startTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(0, totalSamples)
2197
- val endSample = ((endTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(startSample, totalSamples)
2198
-
2199
- // Create a new ShortBuffer view starting at the correct position
2200
- allSamples.position(startSample)
2201
- val trimmedSamples = ShortArray(endSample - startSample)
2202
- for (i in trimmedSamples.indices) {
2203
- trimmedSamples[i] = allSamples.get()
2204
- }
2205
-
2206
- // Convert ShortArray to ByteArray
2207
- val trimmedBytes = ByteArray(trimmedSamples.size * 2)
2208
- val byteBuffer = java.nio.ByteBuffer.wrap(trimmedBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
2209
- val shortBuffer = byteBuffer.asShortBuffer()
2210
- shortBuffer.put(trimmedSamples)
2211
-
2212
- return AudioData(
2213
- data = trimmedBytes,
2214
- sampleRate = sampleRate,
2215
- channels = channels,
2216
- bitDepth = 16, // MediaCodec typically decodes to 16-bit PCM
2217
- durationMs = endTimeMs - startTimeMs
2218
- )
2219
- } catch (e: Exception) {
2220
- LogUtils.e(CLASS_NAME, "Failed to decode audio range: ${e.message}", e)
2221
- return null
2222
- } finally {
2223
- try {
2224
- decoder?.stop()
2225
- decoder?.release()
2226
- } catch (e: Exception) {
2227
- LogUtils.w(CLASS_NAME, "Error releasing decoder: ${e.message}")
2228
- }
2229
-
2230
- try {
2231
- extractor.release()
2232
- } catch (e: Exception) {
2233
- LogUtils.w(CLASS_NAME, "Error releasing extractor: ${e.message}")
2234
- }
2235
- }
2236
- }
2237
- }