@siteed/audio-studio 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +535 -0
- package/LICENSE +21 -0
- package/README.md +167 -0
- package/android/build.gradle +143 -0
- package/android/src/androidTest/assets/chorus.wav +0 -0
- package/android/src/androidTest/assets/jfk.wav +0 -0
- package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
- package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +197 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +541 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +234 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +332 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +324 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +253 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +218 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +120 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +345 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +340 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +252 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +95 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +43 -0
- package/android/src/main/AndroidManifest.xml +30 -0
- package/android/src/main/CMakeLists.txt +29 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioAnalysisData.kt +188 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioDataEncoder.kt +9 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioDeviceManager.kt +1741 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFeaturesNative.kt +26 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFileHandler.kt +136 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFormatUtils.kt +354 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioNotificationsManager.kt +439 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +2237 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +2163 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioRecordingService.kt +167 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +1112 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +1099 -0
- package/android/src/main/java/net/siteed/audiostudio/Constants.kt +37 -0
- package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +7 -0
- package/android/src/main/java/net/siteed/audiostudio/FFT.kt +100 -0
- package/android/src/main/java/net/siteed/audiostudio/Features.kt +98 -0
- package/android/src/main/java/net/siteed/audiostudio/LogUtils.kt +93 -0
- package/android/src/main/java/net/siteed/audiostudio/MelSpectrogramNative.kt +36 -0
- package/android/src/main/java/net/siteed/audiostudio/NotificationConfig.kt +72 -0
- package/android/src/main/java/net/siteed/audiostudio/PermissionUtils.kt +68 -0
- package/android/src/main/java/net/siteed/audiostudio/RecordingActionReceiver.kt +59 -0
- package/android/src/main/java/net/siteed/audiostudio/RecordingConfig.kt +259 -0
- package/android/src/main/java/net/siteed/audiostudio/WaveformConfig.kt +19 -0
- package/android/src/main/java/net/siteed/audiostudio/WaveformRenderer.kt +159 -0
- package/android/src/main/jni/AudioFeaturesJNI.cpp +152 -0
- package/android/src/main/jni/MelSpectrogramJNI.cpp +165 -0
- package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
- package/android/src/main/res/drawable/ic_microphone.xml +13 -0
- package/android/src/main/res/drawable/ic_pause.xml +10 -0
- package/android/src/main/res/drawable/ic_play.xml +10 -0
- package/android/src/main/res/drawable/ic_stop.xml +10 -0
- package/android/src/main/res/layout/notification_recording.xml +37 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +279 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +249 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +151 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +273 -0
- package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +140 -0
- package/android/src/test/resources/chorus.wav +0 -0
- package/android/src/test/resources/generate_test_audio.py +94 -0
- package/android/src/test/resources/jfk.wav +0 -0
- package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
- package/android/src/test/resources/recorder_hello_world.wav +0 -0
- package/app.plugin.js +3 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +4 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +164 -0
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +213 -0
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractAudioData.js +21 -0
- package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +90 -0
- package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractPreview.js +28 -0
- package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js +18 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.js +149 -0
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
- package/build/cjs/AudioDeviceManager.js +688 -0
- package/build/cjs/AudioDeviceManager.js.map +1 -0
- package/build/cjs/AudioRecorder.provider.js +78 -0
- package/build/cjs/AudioRecorder.provider.js.map +1 -0
- package/build/cjs/AudioStudio.native.js +8 -0
- package/build/cjs/AudioStudio.native.js.map +1 -0
- package/build/cjs/AudioStudio.types.js +11 -0
- package/build/cjs/AudioStudio.types.js.map +1 -0
- package/build/cjs/AudioStudio.web.js +708 -0
- package/build/cjs/AudioStudio.web.js.map +1 -0
- package/build/cjs/AudioStudioModule.js +718 -0
- package/build/cjs/AudioStudioModule.js.map +1 -0
- package/build/cjs/WebRecorder.web.js +865 -0
- package/build/cjs/WebRecorder.web.js.map +1 -0
- package/build/cjs/constants/platformLimitations.js +99 -0
- package/build/cjs/constants/platformLimitations.js.map +1 -0
- package/build/cjs/constants.js +20 -0
- package/build/cjs/constants.js.map +1 -0
- package/build/cjs/events.js +29 -0
- package/build/cjs/events.js.map +1 -0
- package/build/cjs/hooks/useAudioDevices.js +179 -0
- package/build/cjs/hooks/useAudioDevices.js.map +1 -0
- package/build/cjs/index.js +64 -0
- package/build/cjs/index.js.map +1 -0
- package/build/cjs/trimAudio.js +76 -0
- package/build/cjs/trimAudio.js.map +1 -0
- package/build/cjs/useAudioRecorder.js +535 -0
- package/build/cjs/useAudioRecorder.js.map +1 -0
- package/build/cjs/utils/BlobFix.js +502 -0
- package/build/cjs/utils/BlobFix.js.map +1 -0
- package/build/cjs/utils/audioProcessing.js +136 -0
- package/build/cjs/utils/audioProcessing.js.map +1 -0
- package/build/cjs/utils/cleanNativeOptions.js +22 -0
- package/build/cjs/utils/cleanNativeOptions.js.map +1 -0
- package/build/cjs/utils/concatenateBuffers.js +25 -0
- package/build/cjs/utils/concatenateBuffers.js.map +1 -0
- package/build/cjs/utils/convertPCMToFloat32.js +124 -0
- package/build/cjs/utils/convertPCMToFloat32.js.map +1 -0
- package/build/cjs/utils/crc32.js +52 -0
- package/build/cjs/utils/crc32.js.map +1 -0
- package/build/cjs/utils/encodingToBitDepth.js +17 -0
- package/build/cjs/utils/encodingToBitDepth.js.map +1 -0
- package/build/cjs/utils/getWavFileInfo.js +96 -0
- package/build/cjs/utils/getWavFileInfo.js.map +1 -0
- package/build/cjs/utils/writeWavHeader.js +88 -0
- package/build/cjs/utils/writeWavHeader.js.map +1 -0
- package/build/cjs/workers/InlineFeaturesExtractor.web.js +294 -0
- package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/cjs/workers/inlineAudioWebWorker.web.js +190 -0
- package/build/cjs/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/build/cjs/workers/wasmGlueString.web.js +27 -0
- package/build/cjs/workers/wasmGlueString.web.js.map +1 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js +3 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js +126 -0
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js +205 -0
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/esm/AudioAnalysis/extractAudioData.js +14 -0
- package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/esm/AudioAnalysis/extractMelSpectrogram.js +86 -0
- package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/esm/AudioAnalysis/extractPreview.js +25 -0
- package/build/esm/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/esm/AudioAnalysis/extractWaveform.js +11 -0
- package/build/esm/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/esm/AudioAnalysis/melSpectrogramWasm.js +111 -0
- package/build/esm/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
- package/build/esm/AudioDeviceManager.js +681 -0
- package/build/esm/AudioDeviceManager.js.map +1 -0
- package/build/esm/AudioRecorder.provider.js +40 -0
- package/build/esm/AudioRecorder.provider.js.map +1 -0
- package/build/esm/AudioStudio.native.js +6 -0
- package/build/esm/AudioStudio.native.js.map +1 -0
- package/build/esm/AudioStudio.types.js +8 -0
- package/build/esm/AudioStudio.types.js.map +1 -0
- package/build/esm/AudioStudio.web.js +704 -0
- package/build/esm/AudioStudio.web.js.map +1 -0
- package/build/esm/AudioStudioModule.js +713 -0
- package/build/esm/AudioStudioModule.js.map +1 -0
- package/build/esm/WebRecorder.web.js +861 -0
- package/build/esm/WebRecorder.web.js.map +1 -0
- package/build/esm/constants/platformLimitations.js +90 -0
- package/build/esm/constants/platformLimitations.js.map +1 -0
- package/build/esm/constants.js +17 -0
- package/build/esm/constants.js.map +1 -0
- package/build/esm/events.js +21 -0
- package/build/esm/events.js.map +1 -0
- package/build/esm/hooks/useAudioDevices.js +176 -0
- package/build/esm/hooks/useAudioDevices.js.map +1 -0
- package/build/esm/index.js +23 -0
- package/build/esm/index.js.map +1 -0
- package/build/esm/trimAudio.js +69 -0
- package/build/esm/trimAudio.js.map +1 -0
- package/build/esm/useAudioRecorder.js +529 -0
- package/build/esm/useAudioRecorder.js.map +1 -0
- package/build/esm/utils/BlobFix.js +498 -0
- package/build/esm/utils/BlobFix.js.map +1 -0
- package/build/esm/utils/audioProcessing.js +133 -0
- package/build/esm/utils/audioProcessing.js.map +1 -0
- package/build/esm/utils/cleanNativeOptions.js +19 -0
- package/build/esm/utils/cleanNativeOptions.js.map +1 -0
- package/build/esm/utils/concatenateBuffers.js +21 -0
- package/build/esm/utils/concatenateBuffers.js.map +1 -0
- package/build/esm/utils/convertPCMToFloat32.js +120 -0
- package/build/esm/utils/convertPCMToFloat32.js.map +1 -0
- package/build/esm/utils/crc32.js +50 -0
- package/build/esm/utils/crc32.js.map +1 -0
- package/build/esm/utils/encodingToBitDepth.js +13 -0
- package/build/esm/utils/encodingToBitDepth.js.map +1 -0
- package/build/esm/utils/getWavFileInfo.js +92 -0
- package/build/esm/utils/getWavFileInfo.js.map +1 -0
- package/build/esm/utils/writeWavHeader.js +84 -0
- package/build/esm/utils/writeWavHeader.js.map +1 -0
- package/build/esm/workers/InlineFeaturesExtractor.web.js +291 -0
- package/build/esm/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/esm/workers/inlineAudioWebWorker.web.js +187 -0
- package/build/esm/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/build/esm/workers/wasmGlueString.web.js +24 -0
- package/build/esm/workers/wasmGlueString.web.js.map +1 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +198 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +24 -0
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +74 -0
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractAudioData.d.ts +3 -0
- package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +20 -0
- package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractPreview.d.ts +11 -0
- package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractWaveform.d.ts +8 -0
- package/build/types/AudioAnalysis/extractWaveform.d.ts.map +1 -0
- package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts +16 -0
- package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts.map +1 -0
- package/build/types/AudioDeviceManager.d.ts +187 -0
- package/build/types/AudioDeviceManager.d.ts.map +1 -0
- package/build/types/AudioRecorder.provider.d.ts +11 -0
- package/build/types/AudioRecorder.provider.d.ts.map +1 -0
- package/build/types/AudioStudio.native.d.ts +3 -0
- package/build/types/AudioStudio.native.d.ts.map +1 -0
- package/build/types/AudioStudio.types.d.ts +760 -0
- package/build/types/AudioStudio.types.d.ts.map +1 -0
- package/build/types/AudioStudio.web.d.ts +96 -0
- package/build/types/AudioStudio.web.d.ts.map +1 -0
- package/build/types/AudioStudioModule.d.ts +3 -0
- package/build/types/AudioStudioModule.d.ts.map +1 -0
- package/build/types/WebRecorder.web.d.ts +208 -0
- package/build/types/WebRecorder.web.d.ts.map +1 -0
- package/build/types/constants/platformLimitations.d.ts +40 -0
- package/build/types/constants/platformLimitations.d.ts.map +1 -0
- package/build/types/constants.d.ts +14 -0
- package/build/types/constants.d.ts.map +1 -0
- package/build/types/events.d.ts +29 -0
- package/build/types/events.d.ts.map +1 -0
- package/build/types/hooks/useAudioDevices.d.ts +15 -0
- package/build/types/hooks/useAudioDevices.d.ts.map +1 -0
- package/build/types/index.d.ts +21 -0
- package/build/types/index.d.ts.map +1 -0
- package/build/types/trimAudio.d.ts +25 -0
- package/build/types/trimAudio.d.ts.map +1 -0
- package/build/types/useAudioRecorder.d.ts +22 -0
- package/build/types/useAudioRecorder.d.ts.map +1 -0
- package/build/types/utils/BlobFix.d.ts +9 -0
- package/build/types/utils/BlobFix.d.ts.map +1 -0
- package/build/types/utils/audioProcessing.d.ts +24 -0
- package/build/types/utils/audioProcessing.d.ts.map +1 -0
- package/build/types/utils/cleanNativeOptions.d.ts +15 -0
- package/build/types/utils/cleanNativeOptions.d.ts.map +1 -0
- package/build/types/utils/concatenateBuffers.d.ts +8 -0
- package/build/types/utils/concatenateBuffers.d.ts.map +1 -0
- package/build/types/utils/convertPCMToFloat32.d.ts +13 -0
- package/build/types/utils/convertPCMToFloat32.d.ts.map +1 -0
- package/build/types/utils/crc32.d.ts +7 -0
- package/build/types/utils/crc32.d.ts.map +1 -0
- package/build/types/utils/encodingToBitDepth.d.ts +5 -0
- package/build/types/utils/encodingToBitDepth.d.ts.map +1 -0
- package/build/types/utils/getWavFileInfo.d.ts +26 -0
- package/build/types/utils/getWavFileInfo.d.ts.map +1 -0
- package/build/types/utils/writeWavHeader.d.ts +34 -0
- package/build/types/utils/writeWavHeader.d.ts.map +1 -0
- package/build/types/workers/InlineFeaturesExtractor.web.d.ts +2 -0
- package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
- package/build/types/workers/inlineAudioWebWorker.web.d.ts +2 -0
- package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
- package/build/types/workers/wasmGlueString.web.d.ts +2 -0
- package/build/types/workers/wasmGlueString.web.d.ts.map +1 -0
- package/cpp/AudioFeatures.cpp +274 -0
- package/cpp/AudioFeatures.h +85 -0
- package/cpp/AudioFeaturesBridge.cpp +146 -0
- package/cpp/AudioFeaturesBridge.h +47 -0
- package/cpp/MelSpectrogram.cpp +227 -0
- package/cpp/MelSpectrogram.h +82 -0
- package/cpp/MelSpectrogramBridge.cpp +112 -0
- package/cpp/MelSpectrogramBridge.h +33 -0
- package/cpp/kiss_fft/COPYING +11 -0
- package/cpp/kiss_fft/_kiss_fft_guts.h +167 -0
- package/cpp/kiss_fft/kiss_fft.c +424 -0
- package/cpp/kiss_fft/kiss_fft.h +160 -0
- package/cpp/kiss_fft/kiss_fft_log.h +36 -0
- package/cpp/kiss_fft/kiss_fftr.c +155 -0
- package/cpp/kiss_fft/kiss_fftr.h +54 -0
- package/expo-module.config.json +10 -0
- package/ios/AudioAnalysisData.swift +74 -0
- package/ios/AudioDeviceManager.swift +670 -0
- package/ios/AudioFeaturesWrapper.h +21 -0
- package/ios/AudioFeaturesWrapper.mm +63 -0
- package/ios/AudioNotificationManager.swift +154 -0
- package/ios/AudioProcessingHelpers.swift +797 -0
- package/ios/AudioProcessor.swift +1191 -0
- package/ios/AudioStreamError.swift +7 -0
- package/ios/AudioStreamManager.swift +2369 -0
- package/ios/AudioStreamManagerDelegate.swift +16 -0
- package/ios/AudioStudio.podspec +39 -0
- package/ios/AudioStudioModule.swift +1111 -0
- package/ios/AudioStudioTests/AudioFileHandlerTests.swift +338 -0
- package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +331 -0
- package/ios/AudioStudioTests/AudioTestHelpers.swift +130 -0
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +294 -0
- package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +105 -0
- package/ios/AudioStudioTests/Info.plist +22 -0
- package/ios/AudioStudioTests/README.md +39 -0
- package/ios/AudioStudioTests/SimpleAudioTest.swift +98 -0
- package/ios/AudioStudioTests/TestAudioGenerator.swift +75 -0
- package/ios/DataPoint.swift +54 -0
- package/ios/DecodingConfig.swift +59 -0
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +95 -0
- package/ios/ISSUE_IOS.md +68 -0
- package/ios/Logger.swift +39 -0
- package/ios/MelSpectrogramWrapper.h +30 -0
- package/ios/MelSpectrogramWrapper.mm +97 -0
- package/ios/NotificationExtension.swift +15 -0
- package/ios/RecordingResult.swift +22 -0
- package/ios/RecordingSettings.swift +311 -0
- package/ios/WaveformExtractor.swift +105 -0
- package/ios/tests/README.md +41 -0
- package/ios/tests/integration/buffer_and_fallback_test.swift +178 -0
- package/ios/tests/integration/buffer_duration_test.swift +185 -0
- package/ios/tests/integration/compressed_only_output_test.swift +271 -0
- package/ios/tests/integration/output_control_test.swift +322 -0
- package/ios/tests/integration/run_integration_tests.sh +37 -0
- package/ios/tests/opus_support_test_macos.swift +154 -0
- package/ios/tests/standalone/audio_processing_test.swift +144 -0
- package/ios/tests/standalone/audio_recording_test.swift +277 -0
- package/ios/tests/standalone/audio_streaming_test.swift +249 -0
- package/ios/tests/standalone/standalone_test.swift +144 -0
- package/package.json +146 -0
- package/plugin/build/index.cjs +194 -0
- package/plugin/build/index.d.cts +22 -0
- package/plugin/build/index.js +194 -0
- package/plugin/src/index.ts +285 -0
- package/plugin/tsconfig.json +10 -0
- package/plugin/tsconfig.tsbuildinfo +1 -0
- package/prebuilt/wasm/mel-spectrogram.js +18 -0
- package/src/AudioAnalysis/AudioAnalysis.types.ts +226 -0
- package/src/AudioAnalysis/audio-features-wasm.d.ts +37 -0
- package/src/AudioAnalysis/audioFeaturesWasm.ts +200 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +350 -0
- package/src/AudioAnalysis/extractAudioData.ts +17 -0
- package/src/AudioAnalysis/extractMelSpectrogram.ts +140 -0
- package/src/AudioAnalysis/extractPreview.ts +34 -0
- package/src/AudioAnalysis/extractWaveform.ts +22 -0
- package/src/AudioAnalysis/mel-spectrogram-wasm.d.ts +48 -0
- package/src/AudioAnalysis/melSpectrogramWasm.ts +179 -0
- package/src/AudioDeviceManager.ts +800 -0
- package/src/AudioRecorder.provider.tsx +57 -0
- package/src/AudioStudio.native.ts +6 -0
- package/src/AudioStudio.types.ts +899 -0
- package/src/AudioStudio.web.ts +911 -0
- package/src/AudioStudioModule.ts +984 -0
- package/src/WebRecorder.web.ts +1114 -0
- package/src/constants/platformLimitations.ts +118 -0
- package/src/constants.ts +21 -0
- package/src/events.ts +63 -0
- package/src/hooks/useAudioDevices.ts +213 -0
- package/src/index.ts +67 -0
- package/src/trimAudio.ts +94 -0
- package/src/types/crc-32.d.ts +9 -0
- package/src/useAudioRecorder.tsx +784 -0
- package/src/utils/BlobFix.ts +561 -0
- package/src/utils/audioProcessing.ts +205 -0
- package/src/utils/cleanNativeOptions.ts +18 -0
- package/src/utils/concatenateBuffers.ts +24 -0
- package/src/utils/convertPCMToFloat32.ts +170 -0
- package/src/utils/crc32.ts +59 -0
- package/src/utils/encodingToBitDepth.ts +18 -0
- package/src/utils/getWavFileInfo.ts +132 -0
- package/src/utils/writeWavHeader.ts +115 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +291 -0
- package/src/workers/inlineAudioWebWorker.web.tsx +186 -0
- package/src/workers/wasmGlueString.web.ts +23 -0
|
@@ -0,0 +1,797 @@
|
|
|
1
|
+
// packages/audio-studio/ios/AudioProcessingHelpers.swift
|
|
2
|
+
|
|
3
|
+
import Accelerate
|
|
4
|
+
import AVFoundation
|
|
5
|
+
import QuartzCore
|
|
6
|
+
import zlib
|
|
7
|
+
|
|
8
|
+
// Constants
|
|
9
|
+
private let FFT_LENGTH = 1024
|
|
10
|
+
private let sharedFFT = FFT(FFT_LENGTH)
|
|
11
|
+
|
|
12
|
+
// Main feature extraction functions
|
|
13
|
+
func extractMFCC(from segment: [Float], sampleRate: Float) -> [Float] {
|
|
14
|
+
let nMFCC = 40
|
|
15
|
+
|
|
16
|
+
// Apply Hann window and prepare for FFT
|
|
17
|
+
let windowed = applyHannWindow(to: segment)
|
|
18
|
+
let fftData = sharedFFT.processSegment(windowed)
|
|
19
|
+
|
|
20
|
+
// Compute power spectrum
|
|
21
|
+
let powerSpectrum = computePowerSpectrum(from: fftData)
|
|
22
|
+
|
|
23
|
+
// Apply Mel filterbank
|
|
24
|
+
let melFilters = computeMelFilterbank(numFilters: nMFCC, fftSize: FFT_LENGTH, sampleRate: sampleRate)
|
|
25
|
+
var melEnergies = [Float](repeating: 0, count: nMFCC)
|
|
26
|
+
|
|
27
|
+
// Safe array access with bounds checking
|
|
28
|
+
for i in 0..<nMFCC {
|
|
29
|
+
var energy: Float = 0
|
|
30
|
+
let filterBank = melFilters[i]
|
|
31
|
+
let minLength = min(powerSpectrum.count, filterBank.count)
|
|
32
|
+
|
|
33
|
+
for j in 0..<minLength {
|
|
34
|
+
energy += powerSpectrum[j] * filterBank[j]
|
|
35
|
+
}
|
|
36
|
+
melEnergies[i] = log(max(energy, .leastNormalMagnitude))
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Apply DCT
|
|
40
|
+
return computeDCT(from: melEnergies)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
func extractSpectralCentroid(from segment: [Float], sampleRate: Float) -> Float {
|
|
44
|
+
let fftData = sharedFFT.processSegment(segment)
|
|
45
|
+
|
|
46
|
+
let magnitudes = computeMagnitudeSpectrum(from: fftData)
|
|
47
|
+
let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
|
|
48
|
+
|
|
49
|
+
let sumMagnitudes = magnitudes.reduce(0, +)
|
|
50
|
+
guard sumMagnitudes > 0 else { return 0 }
|
|
51
|
+
|
|
52
|
+
let weightedSum = zip(frequencies, magnitudes)
|
|
53
|
+
.map { $0.0 * $0.1 }
|
|
54
|
+
.reduce(0, +)
|
|
55
|
+
|
|
56
|
+
return weightedSum / sumMagnitudes
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
func extractSpectralFlatness(from segment: [Float]) -> Float {
|
|
60
|
+
let fftData = sharedFFT.processSegment(segment)
|
|
61
|
+
|
|
62
|
+
// Compute power spectrum
|
|
63
|
+
let powerSpectrum = computePowerSpectrum(from: fftData)
|
|
64
|
+
|
|
65
|
+
// Calculate geometric mean using log-space to avoid numerical issues
|
|
66
|
+
var sumLogValues: Float = 0.0
|
|
67
|
+
for value in powerSpectrum {
|
|
68
|
+
sumLogValues += log(value + 1e-10) // Add small epsilon to avoid log(0)
|
|
69
|
+
}
|
|
70
|
+
let geometricMean = exp(sumLogValues / Float(powerSpectrum.count))
|
|
71
|
+
|
|
72
|
+
// Calculate arithmetic mean
|
|
73
|
+
let arithmeticMean = powerSpectrum.reduce(0, +) / Float(powerSpectrum.count)
|
|
74
|
+
|
|
75
|
+
return arithmeticMean > 0 ? geometricMean / arithmeticMean : 0.0
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
func extractSpectralRollOff(from segment: [Float], sampleRate: Float) -> Float {
|
|
79
|
+
let fftData = sharedFFT.processSegment(segment)
|
|
80
|
+
|
|
81
|
+
let magnitudes = computeMagnitudeSpectrum(from: fftData)
|
|
82
|
+
let totalEnergy = magnitudes.reduce(0, +)
|
|
83
|
+
let threshold = 0.85 * totalEnergy // 85% rolloff point
|
|
84
|
+
|
|
85
|
+
var cumulativeEnergy: Float = 0
|
|
86
|
+
for (index, magnitude) in magnitudes.enumerated() {
|
|
87
|
+
cumulativeEnergy += magnitude
|
|
88
|
+
if cumulativeEnergy >= threshold {
|
|
89
|
+
return Float(index) * sampleRate / Float(2 * magnitudes.count)
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return 0.0
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
func extractSpectralBandwidth(from segment: [Float], sampleRate: Float) -> Float {
|
|
97
|
+
let fftData = sharedFFT.processSegment(segment)
|
|
98
|
+
|
|
99
|
+
let centroid = extractSpectralCentroid(from: segment, sampleRate: sampleRate)
|
|
100
|
+
|
|
101
|
+
let magnitudes = computeMagnitudeSpectrum(from: fftData)
|
|
102
|
+
let frequencies = (0..<magnitudes.count).map { Float($0) * sampleRate / Float(2 * magnitudes.count) }
|
|
103
|
+
|
|
104
|
+
let sumMagnitudes = magnitudes.reduce(0, +)
|
|
105
|
+
guard sumMagnitudes > 0 else { return 0 }
|
|
106
|
+
|
|
107
|
+
let variance = zip(frequencies, magnitudes)
|
|
108
|
+
.map { pow($0.0 - centroid, 2) * $0.1 }
|
|
109
|
+
.reduce(0, +)
|
|
110
|
+
|
|
111
|
+
return sqrt(variance / sumMagnitudes)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
func extractChromagram(from segment: [Float], sampleRate: Float) -> [Float] {
|
|
115
|
+
let fftData = sharedFFT.processSegment(segment)
|
|
116
|
+
let numBins = fftData.count / 2
|
|
117
|
+
let nChroma = 12
|
|
118
|
+
var chroma = [Float](repeating: 0, count: nChroma)
|
|
119
|
+
let freqsPerBin = sampleRate / Float(FFT_LENGTH)
|
|
120
|
+
|
|
121
|
+
for i in 0..<numBins {
|
|
122
|
+
let freq = Float(i) * freqsPerBin
|
|
123
|
+
if freq > 0 {
|
|
124
|
+
let pitchClass = Int((12 * log2(freq / 440.0)).truncatingRemainder(dividingBy: 12))
|
|
125
|
+
if pitchClass >= 0 && pitchClass < nChroma {
|
|
126
|
+
let realIndex = 2 * i
|
|
127
|
+
let imagIndex = realIndex + 1
|
|
128
|
+
|
|
129
|
+
let re = realIndex < fftData.count ? fftData[realIndex] : 0
|
|
130
|
+
let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
|
|
131
|
+
let magnitude = sqrt(re * re + im * im)
|
|
132
|
+
|
|
133
|
+
chroma[pitchClass] += magnitude
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return chroma
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
func extractTempo(from segment: [Float], sampleRate: Float) -> Float {
|
|
142
|
+
let hopLength = 512
|
|
143
|
+
let frameLength = 2048
|
|
144
|
+
|
|
145
|
+
// Compute onset strength signal using spectral flux
|
|
146
|
+
var onsetEnvelope = [Float]()
|
|
147
|
+
var previousSpectrum = [Float](repeating: 0, count: frameLength / 2)
|
|
148
|
+
|
|
149
|
+
// Ensure we have enough samples for at least one frame
|
|
150
|
+
guard segment.count >= frameLength else {
|
|
151
|
+
return 120.0 // Return default tempo if segment is too short
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Safe frame processing
|
|
155
|
+
for i in stride(from: 0, to: max(0, segment.count - frameLength), by: hopLength) {
|
|
156
|
+
let endIndex = min(i + frameLength, segment.count)
|
|
157
|
+
let frame = Array(segment[i..<endIndex])
|
|
158
|
+
var fftData = frame + [Float](repeating: 0, count: frameLength - frame.count)
|
|
159
|
+
sharedFFT.realForward(&fftData)
|
|
160
|
+
|
|
161
|
+
let magnitudes = computeMagnitudeSpectrum(from: fftData)
|
|
162
|
+
var flux: Float = 0
|
|
163
|
+
for j in 0..<min(magnitudes.count, previousSpectrum.count) {
|
|
164
|
+
flux += max(magnitudes[j] - previousSpectrum[j], 0)
|
|
165
|
+
}
|
|
166
|
+
onsetEnvelope.append(flux)
|
|
167
|
+
previousSpectrum = magnitudes
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Find peaks in onset envelope - ensure we have enough points
|
|
171
|
+
var peaks = [Int]()
|
|
172
|
+
if onsetEnvelope.count >= 3 {
|
|
173
|
+
for i in 1..<(onsetEnvelope.count - 1) {
|
|
174
|
+
if onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1] {
|
|
175
|
+
peaks.append(i)
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Calculate tempo from peak intervals
|
|
181
|
+
if peaks.count > 1 {
|
|
182
|
+
let intervals = zip(peaks, peaks.dropFirst()).map { $1 - $0 }
|
|
183
|
+
if !intervals.isEmpty {
|
|
184
|
+
let averageInterval = Float(intervals.reduce(0, +)) / Float(intervals.count)
|
|
185
|
+
if averageInterval > 0 {
|
|
186
|
+
let tempo = 60.0 * sampleRate / Float(hopLength) / averageInterval
|
|
187
|
+
// Constrain tempo to reasonable range (20-300 BPM)
|
|
188
|
+
return min(300.0, max(20.0, tempo))
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return 120.0 // Default tempo if no clear peaks found
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
private func findPeaks(in data: [Float], minProminence: Float) -> [Int] {
|
|
197
|
+
var peaks = [Int]()
|
|
198
|
+
for i in 1..<data.count - 1 {
|
|
199
|
+
if data[i] > data[i - 1] && data[i] > data[i + 1] {
|
|
200
|
+
let prominence = data[i] - max(data[i - 1], data[i + 1])
|
|
201
|
+
if prominence >= minProminence {
|
|
202
|
+
peaks.append(i)
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return peaks
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
func extractHNR(from segment: [Float]) -> Float {
|
|
210
|
+
let frameSize = segment.count
|
|
211
|
+
var autocorrelation = [Float](repeating: 0, count: frameSize)
|
|
212
|
+
|
|
213
|
+
// Compute autocorrelation
|
|
214
|
+
vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(frameSize), vDSP_Length(frameSize))
|
|
215
|
+
|
|
216
|
+
// Find peaks with minimum prominence
|
|
217
|
+
if let maxValue = autocorrelation.max() {
|
|
218
|
+
let peaks = findPeaks(in: autocorrelation, minProminence: 0.1 * maxValue)
|
|
219
|
+
|
|
220
|
+
// Find first peak after zero lag
|
|
221
|
+
if let firstPeakIndex = peaks.first(where: { $0 > 0 }) {
|
|
222
|
+
let harmonicEnergy = autocorrelation[firstPeakIndex]
|
|
223
|
+
let noiseEnergy = autocorrelation[0] - harmonicEnergy
|
|
224
|
+
if noiseEnergy > 0 {
|
|
225
|
+
return 10 * log10(harmonicEnergy / noiseEnergy)
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return 0.0
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Helper functions
|
|
234
|
+
private func computeMagnitudeSpectrum(from fftData: [Float]) -> [Float] {
|
|
235
|
+
let numBins = fftData.count / 2 // Since FFT data contains real and imaginary pairs
|
|
236
|
+
var magnitudes = [Float]()
|
|
237
|
+
|
|
238
|
+
for i in 0..<numBins {
|
|
239
|
+
let realIndex = 2 * i
|
|
240
|
+
let imagIndex = realIndex + 1
|
|
241
|
+
|
|
242
|
+
let re = realIndex < fftData.count ? fftData[realIndex] : 0
|
|
243
|
+
let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
|
|
244
|
+
magnitudes.append(sqrt(re*re + im*im))
|
|
245
|
+
}
|
|
246
|
+
return magnitudes
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
private func applyHannWindow(to segment: [Float]) -> [Float] {
|
|
250
|
+
var window = [Float](repeating: 0, count: segment.count)
|
|
251
|
+
vDSP_hann_window(&window, vDSP_Length(segment.count), Int32(vDSP_HANN_NORM))
|
|
252
|
+
|
|
253
|
+
var result = [Float](repeating: 0, count: segment.count)
|
|
254
|
+
vDSP_vmul(segment, 1, window, 1, &result, 1, vDSP_Length(segment.count))
|
|
255
|
+
|
|
256
|
+
return result
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
private func computePowerSpectrum(from fftData: [Float]) -> [Float] {
|
|
260
|
+
let numBins = fftData.count / 2
|
|
261
|
+
var powerSpectrum = [Float]()
|
|
262
|
+
|
|
263
|
+
for i in 0..<numBins {
|
|
264
|
+
let realIndex = 2 * i
|
|
265
|
+
let imagIndex = realIndex + 1
|
|
266
|
+
|
|
267
|
+
let re = realIndex < fftData.count ? fftData[realIndex] : 0
|
|
268
|
+
let im = imagIndex < fftData.count ? fftData[imagIndex] : 0
|
|
269
|
+
powerSpectrum.append(re*re + im*im)
|
|
270
|
+
}
|
|
271
|
+
return powerSpectrum
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
private func computeMelFilterbank(numFilters: Int, fftSize: Int, sampleRate: Float) -> [[Float]] {
|
|
275
|
+
let fMin: Float = 0
|
|
276
|
+
let fMax = sampleRate / 2
|
|
277
|
+
|
|
278
|
+
let melMin = hzToMel(fMin)
|
|
279
|
+
let melMax = hzToMel(fMax)
|
|
280
|
+
let melStep = (melMax - melMin) / Float(numFilters + 1)
|
|
281
|
+
|
|
282
|
+
let melPoints = (0...numFilters+1).map { melMin + Float($0) * melStep }
|
|
283
|
+
let hzPoints = melPoints.map { melToHz($0) }
|
|
284
|
+
let bins = hzPoints.map { Int(($0 * Float(fftSize) / sampleRate).rounded()) }
|
|
285
|
+
|
|
286
|
+
var filterbank = [[Float]](repeating: [Float](repeating: 0, count: 1 + fftSize/2), count: numFilters)
|
|
287
|
+
|
|
288
|
+
for i in 0..<numFilters {
|
|
289
|
+
for j in bins[i]..<bins[i+2] {
|
|
290
|
+
if j < bins[i+1] {
|
|
291
|
+
filterbank[i][j] = Float(j - bins[i]) / Float(bins[i+1] - bins[i])
|
|
292
|
+
} else {
|
|
293
|
+
filterbank[i][j] = Float(bins[i+2] - j) / Float(bins[i+2] - bins[i+1])
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return filterbank
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
private func hzToMel(_ hz: Float) -> Float {
|
|
302
|
+
return 2595 * log10(1 + hz/700)
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
private func melToHz(_ mel: Float) -> Float {
|
|
306
|
+
return 700 * (pow(10, mel/2595) - 1)
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
private func computeDCT(from input: [Float]) -> [Float] {
|
|
310
|
+
let N = input.count
|
|
311
|
+
var output = [Float](repeating: 0, count: N)
|
|
312
|
+
let scale = sqrt(2.0 / Float(N))
|
|
313
|
+
|
|
314
|
+
for i in 0..<N {
|
|
315
|
+
var sum: Float = 0
|
|
316
|
+
for j in 0..<N {
|
|
317
|
+
sum += input[j] * cos(.pi * Float(i) * (2 * Float(j) + 1) / (2 * Float(N)))
|
|
318
|
+
}
|
|
319
|
+
output[i] = scale * sum
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return output
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
func computeMelSpectrogram(from segment: [Float], sampleRate: Float) -> [Float] {
|
|
326
|
+
let nMels: Int32 = 128
|
|
327
|
+
let fftLength: Int32 = 2048
|
|
328
|
+
let windowSize = Int32(min(segment.count, Int(fftLength)))
|
|
329
|
+
let hopLength = windowSize // single frame
|
|
330
|
+
|
|
331
|
+
MelSpectrogramWrapper.initWithSampleRate(
|
|
332
|
+
Int32(sampleRate),
|
|
333
|
+
fftLength: fftLength,
|
|
334
|
+
windowSizeSamples: windowSize,
|
|
335
|
+
hopLengthSamples: hopLength,
|
|
336
|
+
nMels: nMels,
|
|
337
|
+
fMin: 0.0,
|
|
338
|
+
fMax: sampleRate / 2.0,
|
|
339
|
+
windowType: 0 // Hann
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
let melResult: [NSNumber]? = segment.withUnsafeBufferPointer { bufPtr in
|
|
343
|
+
return MelSpectrogramWrapper.computeFrame(
|
|
344
|
+
withSamples: bufPtr.baseAddress,
|
|
345
|
+
frameSize: Int32(segment.count)
|
|
346
|
+
)
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
guard let result = melResult else {
|
|
350
|
+
return []
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return result.map { $0.floatValue }
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
func computeSpectralContrast(from segment: [Float], sampleRate: Float) -> [Float] {
|
|
357
|
+
let fftData = sharedFFT.processSegment(segment)
|
|
358
|
+
|
|
359
|
+
let magnitudeSpectrum = computeMagnitudeSpectrum(from: fftData)
|
|
360
|
+
var contrast = [Float]()
|
|
361
|
+
|
|
362
|
+
// Define standard octave-based frequency bands
|
|
363
|
+
let bandFrequencies = [
|
|
364
|
+
(20.0, 125.0), // Sub-bass
|
|
365
|
+
(125.0, 250.0), // Bass
|
|
366
|
+
(250.0, 500.0), // Low-mids
|
|
367
|
+
(500.0, 1000.0), // Mids
|
|
368
|
+
(1000.0, 2000.0), // High-mids
|
|
369
|
+
(2000.0, 4000.0), // Presence
|
|
370
|
+
(4000.0, min(8000.0, Double(sampleRate) / 2.0)) // Brilliance
|
|
371
|
+
]
|
|
372
|
+
|
|
373
|
+
// Calculate frequency resolution
|
|
374
|
+
let freqResolution = Float(sampleRate) / Float(FFT_LENGTH)
|
|
375
|
+
|
|
376
|
+
for (lowFreq, highFreq) in bandFrequencies {
|
|
377
|
+
// Convert frequencies to FFT bin indices
|
|
378
|
+
let startBin = Int(Float(lowFreq) / freqResolution)
|
|
379
|
+
let endBin = min(Int(Float(highFreq) / freqResolution), magnitudeSpectrum.count - 1)
|
|
380
|
+
|
|
381
|
+
if startBin < endBin {
|
|
382
|
+
let bandSpectrum = Array(magnitudeSpectrum[startBin...endBin])
|
|
383
|
+
|
|
384
|
+
// Sort magnitudes for percentile calculation
|
|
385
|
+
let sortedMagnitudes = bandSpectrum.sorted()
|
|
386
|
+
let length = sortedMagnitudes.count
|
|
387
|
+
|
|
388
|
+
// Calculate peak (95th percentile) and valley (5th percentile)
|
|
389
|
+
let peakIndex = Int(Float(length) * 0.95)
|
|
390
|
+
let valleyIndex = Int(Float(length) * 0.05)
|
|
391
|
+
let peak = sortedMagnitudes[peakIndex]
|
|
392
|
+
let valley = sortedMagnitudes[valleyIndex]
|
|
393
|
+
|
|
394
|
+
// Calculate contrast in dB scale
|
|
395
|
+
let contrastValue = 20 * log10(peak / max(valley, .leastNormalMagnitude))
|
|
396
|
+
contrast.append(contrastValue)
|
|
397
|
+
} else {
|
|
398
|
+
contrast.append(0)
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
return contrast
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Original function for backward compatibility
|
|
406
|
+
func computeTonnetz(from segment: [Float], sampleRate: Float) -> [Float] {
|
|
407
|
+
let chroma = extractChromagram(from: segment, sampleRate: sampleRate)
|
|
408
|
+
return computeTonnetz(fromChroma: chroma)
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// New optimized function that accepts pre-computed chromagram
|
|
412
|
+
func computeTonnetz(fromChroma chroma: [Float]) -> [Float] {
|
|
413
|
+
// Tonnetz transformation matrix (6x12)
|
|
414
|
+
let tonnetzMatrix: [[Float]] = [
|
|
415
|
+
[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], // Perfect fifth
|
|
416
|
+
[0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], // Minor third
|
|
417
|
+
[0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], // Major third
|
|
418
|
+
[0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], // Perfect fifth
|
|
419
|
+
[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], // Minor third
|
|
420
|
+
[1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0] // Major third
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
// Compute tonnetz features
|
|
424
|
+
return tonnetzMatrix.map { row in
|
|
425
|
+
zip(row, chroma).map { $0 * $1 }.reduce(0, +)
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
struct AudioData {
|
|
430
|
+
let samples: [Float]
|
|
431
|
+
let sampleRate: Int
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
func loadAudioFile(_ fileUri: String) throws -> AudioData {
|
|
435
|
+
guard let url = URL(string: fileUri) else {
|
|
436
|
+
throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid file URL"])
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
let file = try AVAudioFile(forReading: url)
|
|
440
|
+
let format = file.processingFormat
|
|
441
|
+
let frameCount = UInt32(file.length)
|
|
442
|
+
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
|
|
443
|
+
|
|
444
|
+
try file.read(into: buffer, frameCount: frameCount)
|
|
445
|
+
|
|
446
|
+
// Convert buffer to float array
|
|
447
|
+
let samples: [Float]
|
|
448
|
+
if let floatData = buffer.floatChannelData?[0] {
|
|
449
|
+
samples = Array(UnsafeBufferPointer(start: floatData, count: Int(frameCount)))
|
|
450
|
+
} else {
|
|
451
|
+
throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to read audio data"])
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return AudioData(samples: samples, sampleRate: Int(format.sampleRate))
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
func computeEnergy(from samples: [Float]) -> Float {
|
|
458
|
+
var energy: Float = 0
|
|
459
|
+
vDSP_measqv(samples, 1, &energy, vDSP_Length(samples.count))
|
|
460
|
+
return energy / Float(samples.count)
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
func computeRMS(from samples: [Float]) -> Float {
|
|
464
|
+
let energy = computeEnergy(from: samples)
|
|
465
|
+
return sqrt(energy)
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
func computeZCR(from samples: [Float]) -> Float {
|
|
469
|
+
var zeroCrossings: Int = 0
|
|
470
|
+
for i in 1..<samples.count {
|
|
471
|
+
if (samples[i-1] * samples[i]) < 0 {
|
|
472
|
+
zeroCrossings += 1
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
return Float(zeroCrossings) / Float(samples.count)
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Keep in AudioProcessingHelpers.swift
|
|
479
|
+
private let N_MFCC = 40
|
|
480
|
+
private let N_FFT = 1024
|
|
481
|
+
private let N_MELS = 128
|
|
482
|
+
private let N_CHROMA = 12
|
|
483
|
+
private let N_BANDS = 7
|
|
484
|
+
|
|
485
|
+
// Core audio processing functions
|
|
486
|
+
func calculateZeroCrossingRate(_ data: [Float]) -> Float {
|
|
487
|
+
var count: Float = 0
|
|
488
|
+
for i in 1..<data.count {
|
|
489
|
+
if (data[i] >= 0 && data[i-1] < 0) || (data[i] < 0 && data[i-1] >= 0) {
|
|
490
|
+
count += 1
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return count / Float(data.count)
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
func calculateEnergy(_ data: [Float]) -> Float {
|
|
497
|
+
var energy: Float = 0
|
|
498
|
+
vDSP_svesq(data, 1, &energy, vDSP_Length(data.count))
|
|
499
|
+
return energy / Float(data.count)
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Feature extraction functions
|
|
503
|
+
func computeFeatures(segmentData: [Float], sampleRate: Float, sumSquares: Float, zeroCrossings: Int, segmentLength: Int, featureOptions: [String: Bool]) -> Features {
|
|
504
|
+
let rms = sqrt(sumSquares / Float(segmentLength))
|
|
505
|
+
let energy = featureOptions["energy"] == true ? sumSquares : 0
|
|
506
|
+
let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
|
|
507
|
+
|
|
508
|
+
// Determine which C++ features are needed
|
|
509
|
+
let needSpectral = featureOptions["spectralCentroid"] == true ||
|
|
510
|
+
featureOptions["spectralFlatness"] == true ||
|
|
511
|
+
featureOptions["spectralRolloff"] == true ||
|
|
512
|
+
featureOptions["spectralBandwidth"] == true
|
|
513
|
+
let needMfcc = featureOptions["mfcc"] == true
|
|
514
|
+
let needChroma = featureOptions["chromagram"] == true
|
|
515
|
+
|
|
516
|
+
// Single C++ call for all FFT-based features (spectral + MFCC + chroma)
|
|
517
|
+
var spectralCentroid: Float = 0
|
|
518
|
+
var spectralFlatness: Float = 0
|
|
519
|
+
var spectralRolloff: Float = 0
|
|
520
|
+
var spectralBandwidth: Float = 0
|
|
521
|
+
var mfcc: [Float] = []
|
|
522
|
+
var chromagram: [Float] = []
|
|
523
|
+
|
|
524
|
+
if needSpectral || needMfcc || needChroma {
|
|
525
|
+
let cppResult = segmentData.withUnsafeBufferPointer { bufPtr in
|
|
526
|
+
AudioFeaturesWrapper.computeFrame(
|
|
527
|
+
withSamples: bufPtr.baseAddress,
|
|
528
|
+
numSamples: Int32(segmentData.count),
|
|
529
|
+
sampleRate: Int32(sampleRate),
|
|
530
|
+
fftLength: Int32(N_FFT),
|
|
531
|
+
nMfcc: 13,
|
|
532
|
+
nMelFilters: 26,
|
|
533
|
+
computeMfcc: needMfcc,
|
|
534
|
+
computeChroma: needChroma
|
|
535
|
+
)
|
|
536
|
+
}
|
|
537
|
+
if let result = cppResult {
|
|
538
|
+
if needSpectral {
|
|
539
|
+
spectralCentroid = (result["spectralCentroid"] as? NSNumber)?.floatValue ?? 0
|
|
540
|
+
spectralFlatness = (result["spectralFlatness"] as? NSNumber)?.floatValue ?? 0
|
|
541
|
+
spectralRolloff = (result["spectralRolloff"] as? NSNumber)?.floatValue ?? 0
|
|
542
|
+
spectralBandwidth = (result["spectralBandwidth"] as? NSNumber)?.floatValue ?? 0
|
|
543
|
+
}
|
|
544
|
+
if needMfcc {
|
|
545
|
+
mfcc = (result["mfcc"] as? [NSNumber])?.map { $0.floatValue } ?? []
|
|
546
|
+
}
|
|
547
|
+
if needChroma {
|
|
548
|
+
chromagram = (result["chromagram"] as? [NSNumber])?.map { $0.floatValue } ?? []
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
|
|
554
|
+
let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
|
|
555
|
+
let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
|
|
556
|
+
|
|
557
|
+
let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : nil
|
|
558
|
+
|
|
559
|
+
return Features(
|
|
560
|
+
energy: energy,
|
|
561
|
+
mfcc: mfcc,
|
|
562
|
+
rms: rms,
|
|
563
|
+
zcr: zcr,
|
|
564
|
+
spectralCentroid: spectralCentroid,
|
|
565
|
+
spectralFlatness: spectralFlatness,
|
|
566
|
+
spectralRolloff: spectralRolloff,
|
|
567
|
+
spectralBandwidth: spectralBandwidth,
|
|
568
|
+
chromagram: chromagram,
|
|
569
|
+
tempo: extractTempo(from: segmentData, sampleRate: sampleRate),
|
|
570
|
+
hnr: extractHNR(from: segmentData),
|
|
571
|
+
melSpectrogram: melSpectrogram,
|
|
572
|
+
spectralContrast: spectralContrast,
|
|
573
|
+
tonnetz: tonnetz,
|
|
574
|
+
pitch: pitch
|
|
575
|
+
)
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
private func nextPowerOfTwo(_ n: Int) -> Int {
|
|
579
|
+
var power = 1
|
|
580
|
+
while power < n {
|
|
581
|
+
power *= 2
|
|
582
|
+
}
|
|
583
|
+
return power
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
func estimatePitch(from segment: [Float], sampleRate: Float) -> Float {
|
|
587
|
+
guard segment.count >= 2 else { return 0.0 }
|
|
588
|
+
|
|
589
|
+
// Apply a Hann window to reduce edge effects
|
|
590
|
+
let windowed = applyHannWindow(to: segment)
|
|
591
|
+
|
|
592
|
+
// Pad the signal for FFT
|
|
593
|
+
let fftLength = nextPowerOfTwo(segment.count * 2 - 1)
|
|
594
|
+
var padded = windowed + [Float](repeating: 0, count: fftLength - windowed.count)
|
|
595
|
+
sharedFFT.realForward(&padded)
|
|
596
|
+
|
|
597
|
+
// Compute autocorrelation using FFT
|
|
598
|
+
var autocorrelation = [Float](repeating: 0, count: fftLength)
|
|
599
|
+
vDSP_conv(segment, 1, segment.reversed(), 1, &autocorrelation, 1, vDSP_Length(segment.count), vDSP_Length(segment.count))
|
|
600
|
+
|
|
601
|
+
// Find the first peak within the pitch range (50-500 Hz)
|
|
602
|
+
let minLag = Int(sampleRate / 500.0) // Max frequency
|
|
603
|
+
let maxLag = Int(sampleRate / 50.0) // Min frequency
|
|
604
|
+
var maxCorr: Float = -1.0
|
|
605
|
+
var pitchLag = 0
|
|
606
|
+
|
|
607
|
+
// Skip the first few samples to avoid the zero-lag peak
|
|
608
|
+
for lag in minLag...maxLag {
|
|
609
|
+
if autocorrelation[lag] > maxCorr {
|
|
610
|
+
maxCorr = autocorrelation[lag]
|
|
611
|
+
pitchLag = lag
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Convert lag to frequency (sampleRate / lag)
|
|
616
|
+
return pitchLag > 0 ? sampleRate / Float(pitchLag) : 0.0
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
// Add speech detection helper function
|
|
620
|
+
func detectSpeech(from segment: [Float], rms: Float) -> (isActive: Bool, probability: Float) {
|
|
621
|
+
// Simple speech detection based on RMS and zero-crossing rate
|
|
622
|
+
let zcr = calculateZeroCrossingRate(segment)
|
|
623
|
+
let isSpeech = rms > 0.01 && zcr > 0.1 && zcr < 0.5
|
|
624
|
+
let probability = min(1.0, max(0.0, rms * 10)) // Simple probability estimation
|
|
625
|
+
|
|
626
|
+
return (isActive: isSpeech, probability: probability)
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
func extractRawAudioData(
|
|
630
|
+
from url: URL,
|
|
631
|
+
startFrame: AVAudioFramePosition,
|
|
632
|
+
frameCount: AVAudioFrameCount,
|
|
633
|
+
format: AVAudioFormat,
|
|
634
|
+
decodingConfig: DecodingConfig,
|
|
635
|
+
includeNormalizedData: Bool,
|
|
636
|
+
includeBase64Data: Bool
|
|
637
|
+
) throws -> (pcmData: Data, floatData: [Float]?, base64Data: String?) {
|
|
638
|
+
// Apply decoding configuration
|
|
639
|
+
let targetFormat = decodingConfig.toAudioFormat(baseFormat: format)
|
|
640
|
+
|
|
641
|
+
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount)!
|
|
642
|
+
let audioFile = try AVAudioFile(forReading: url)
|
|
643
|
+
|
|
644
|
+
audioFile.framePosition = startFrame
|
|
645
|
+
try audioFile.read(into: buffer, frameCount: frameCount)
|
|
646
|
+
|
|
647
|
+
// Convert to target format if different from source
|
|
648
|
+
let finalBuffer: AVAudioPCMBuffer
|
|
649
|
+
if targetFormat != format {
|
|
650
|
+
let converter = AVAudioConverter(from: format, to: targetFormat)!
|
|
651
|
+
finalBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCount)!
|
|
652
|
+
|
|
653
|
+
var error: NSError?
|
|
654
|
+
_ = converter.convert(to: finalBuffer, error: &error) { inNumPackets, outStatus in
|
|
655
|
+
outStatus.pointee = .haveData
|
|
656
|
+
return buffer
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
if let error = error {
|
|
660
|
+
Logger.debug("AudioProcessingHelpers", "Format conversion failed: \(error.localizedDescription)")
|
|
661
|
+
throw error
|
|
662
|
+
}
|
|
663
|
+
} else {
|
|
664
|
+
finalBuffer = buffer
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
guard let floatData = finalBuffer.floatChannelData else {
|
|
668
|
+
throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to get float channel data"])
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
let channels = Int(targetFormat.channelCount)
|
|
672
|
+
let totalSamples = Int(finalBuffer.frameLength) * channels
|
|
673
|
+
|
|
674
|
+
// Use targetBitDepth from decodingConfig instead of format's bit depth
|
|
675
|
+
let targetBitDepth = decodingConfig.targetBitDepth ?? 16
|
|
676
|
+
let bytesPerSample = targetBitDepth / 8
|
|
677
|
+
var pcmData = Data(capacity: totalSamples * bytesPerSample)
|
|
678
|
+
|
|
679
|
+
// Convert float samples to PCM format with specified bit depth
|
|
680
|
+
for frame in 0..<Int(finalBuffer.frameLength) {
|
|
681
|
+
for channel in 0..<channels {
|
|
682
|
+
let sample = floatData[channel][frame]
|
|
683
|
+
|
|
684
|
+
let normalizedSample = decodingConfig.normalizeAudio ?
|
|
685
|
+
max(-1.0, min(1.0, sample)) : sample
|
|
686
|
+
|
|
687
|
+
switch targetBitDepth {
|
|
688
|
+
case 16:
|
|
689
|
+
let intValue = Int16(normalizedSample * Float(Int16.max))
|
|
690
|
+
pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
|
|
691
|
+
case 32:
|
|
692
|
+
let intValue = Int32(normalizedSample * Float(Int32.max))
|
|
693
|
+
pcmData.append(contentsOf: withUnsafeBytes(of: intValue) { Array($0) })
|
|
694
|
+
default:
|
|
695
|
+
throw NSError(domain: "AudioProcessing", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unsupported bit depth \(targetBitDepth)"])
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Only process normalized data if requested
|
|
701
|
+
let normalizedData: [Float]? = includeNormalizedData ?
|
|
702
|
+
Array(UnsafeBufferPointer(start: floatData[0], count: Int(finalBuffer.frameLength))) :
|
|
703
|
+
nil
|
|
704
|
+
|
|
705
|
+
// Convert to base64 if requested
|
|
706
|
+
let base64Data: String? = includeBase64Data ?
|
|
707
|
+
pcmData.base64EncodedString() :
|
|
708
|
+
nil
|
|
709
|
+
|
|
710
|
+
return (pcmData: pcmData, floatData: normalizedData, base64Data: base64Data)
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// Update the CRC32 function to use zlib's implementation
|
|
714
|
+
func calculateCRC32(data: Data) -> UInt32 {
|
|
715
|
+
data.withUnsafeBytes { buffer in
|
|
716
|
+
let ptr = buffer.baseAddress?.assumingMemoryBound(to: UInt8.self)
|
|
717
|
+
return UInt32(crc32(0, ptr, UInt32(buffer.count)))
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
func calculateCRC32(from floatArray: [Float], count: Int) -> UInt32 {
|
|
722
|
+
return floatArray.withUnsafeBytes { floatBytes -> UInt32 in
|
|
723
|
+
// Get raw pointer to the bytes with proper alignment
|
|
724
|
+
let byteCount = count * MemoryLayout<Float>.size
|
|
725
|
+
return UInt32(crc32(0, floatBytes.baseAddress, UInt32(byteCount)))
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
func createWavHeader(pcmData: Data, sampleRate: Int, channels: Int, bitDepth: Int) -> Data {
|
|
730
|
+
let headerSize = 44
|
|
731
|
+
let totalDataLen = pcmData.count + headerSize - 8
|
|
732
|
+
let bytesPerSample = bitDepth / 8
|
|
733
|
+
let byteRate = sampleRate * channels * bytesPerSample
|
|
734
|
+
let blockAlign = channels * bytesPerSample
|
|
735
|
+
|
|
736
|
+
var header = Data(capacity: headerSize)
|
|
737
|
+
|
|
738
|
+
// RIFF header
|
|
739
|
+
header.append(contentsOf: "RIFF".data(using: .ascii)!)
|
|
740
|
+
|
|
741
|
+
// Total data length
|
|
742
|
+
header.append(UInt32(totalDataLen).littleEndian.data)
|
|
743
|
+
|
|
744
|
+
// WAVE header
|
|
745
|
+
header.append(contentsOf: "WAVE".data(using: .ascii)!)
|
|
746
|
+
|
|
747
|
+
// 'fmt ' chunk
|
|
748
|
+
header.append(contentsOf: "fmt ".data(using: .ascii)!)
|
|
749
|
+
|
|
750
|
+
// 16 for PCM format
|
|
751
|
+
header.append(UInt32(16).littleEndian.data)
|
|
752
|
+
|
|
753
|
+
// Format = 1 for PCM
|
|
754
|
+
header.append(UInt16(1).littleEndian.data)
|
|
755
|
+
|
|
756
|
+
// Number of channels
|
|
757
|
+
header.append(UInt16(channels).littleEndian.data)
|
|
758
|
+
|
|
759
|
+
// Sample rate
|
|
760
|
+
header.append(UInt32(sampleRate).littleEndian.data)
|
|
761
|
+
|
|
762
|
+
// Byte rate
|
|
763
|
+
header.append(UInt32(byteRate).littleEndian.data)
|
|
764
|
+
|
|
765
|
+
// Block align
|
|
766
|
+
header.append(UInt16(blockAlign).littleEndian.data)
|
|
767
|
+
|
|
768
|
+
// Bits per sample
|
|
769
|
+
header.append(UInt16(bitDepth).littleEndian.data)
|
|
770
|
+
|
|
771
|
+
// 'data' chunk
|
|
772
|
+
header.append(contentsOf: "data".data(using: .ascii)!)
|
|
773
|
+
|
|
774
|
+
// Data length
|
|
775
|
+
header.append(UInt32(pcmData.count).littleEndian.data)
|
|
776
|
+
|
|
777
|
+
// Combine header and PCM data
|
|
778
|
+
var wavData = header
|
|
779
|
+
wavData.append(pcmData)
|
|
780
|
+
|
|
781
|
+
return wavData
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// Extension to help with binary data conversion
|
|
785
|
+
extension UInt16 {
|
|
786
|
+
var data: Data {
|
|
787
|
+
var value = self
|
|
788
|
+
return Data(bytes: &value, count: MemoryLayout<UInt16>.size)
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
extension UInt32 {
|
|
793
|
+
var data: Data {
|
|
794
|
+
var value = self
|
|
795
|
+
return Data(bytes: &value, count: MemoryLayout<UInt32>.size)
|
|
796
|
+
}
|
|
797
|
+
}
|