@siteed/audio-studio 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +535 -0
- package/LICENSE +21 -0
- package/README.md +167 -0
- package/android/build.gradle +143 -0
- package/android/src/androidTest/assets/chorus.wav +0 -0
- package/android/src/androidTest/assets/jfk.wav +0 -0
- package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
- package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +197 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +541 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +234 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +332 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +324 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +253 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +218 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +120 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +345 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +340 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +252 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +95 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +43 -0
- package/android/src/main/AndroidManifest.xml +30 -0
- package/android/src/main/CMakeLists.txt +29 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioAnalysisData.kt +188 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioDataEncoder.kt +9 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioDeviceManager.kt +1741 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFeaturesNative.kt +26 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFileHandler.kt +136 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFormatUtils.kt +354 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioNotificationsManager.kt +439 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +2237 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +2163 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioRecordingService.kt +167 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +1112 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +1099 -0
- package/android/src/main/java/net/siteed/audiostudio/Constants.kt +37 -0
- package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +7 -0
- package/android/src/main/java/net/siteed/audiostudio/FFT.kt +100 -0
- package/android/src/main/java/net/siteed/audiostudio/Features.kt +98 -0
- package/android/src/main/java/net/siteed/audiostudio/LogUtils.kt +93 -0
- package/android/src/main/java/net/siteed/audiostudio/MelSpectrogramNative.kt +36 -0
- package/android/src/main/java/net/siteed/audiostudio/NotificationConfig.kt +72 -0
- package/android/src/main/java/net/siteed/audiostudio/PermissionUtils.kt +68 -0
- package/android/src/main/java/net/siteed/audiostudio/RecordingActionReceiver.kt +59 -0
- package/android/src/main/java/net/siteed/audiostudio/RecordingConfig.kt +259 -0
- package/android/src/main/java/net/siteed/audiostudio/WaveformConfig.kt +19 -0
- package/android/src/main/java/net/siteed/audiostudio/WaveformRenderer.kt +159 -0
- package/android/src/main/jni/AudioFeaturesJNI.cpp +152 -0
- package/android/src/main/jni/MelSpectrogramJNI.cpp +165 -0
- package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
- package/android/src/main/res/drawable/ic_microphone.xml +13 -0
- package/android/src/main/res/drawable/ic_pause.xml +10 -0
- package/android/src/main/res/drawable/ic_play.xml +10 -0
- package/android/src/main/res/drawable/ic_stop.xml +10 -0
- package/android/src/main/res/layout/notification_recording.xml +37 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +279 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +249 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +151 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +273 -0
- package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +140 -0
- package/android/src/test/resources/chorus.wav +0 -0
- package/android/src/test/resources/generate_test_audio.py +94 -0
- package/android/src/test/resources/jfk.wav +0 -0
- package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
- package/android/src/test/resources/recorder_hello_world.wav +0 -0
- package/app.plugin.js +3 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +4 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +164 -0
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +213 -0
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractAudioData.js +21 -0
- package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +90 -0
- package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractPreview.js +28 -0
- package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js +18 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.js +149 -0
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
- package/build/cjs/AudioDeviceManager.js +688 -0
- package/build/cjs/AudioDeviceManager.js.map +1 -0
- package/build/cjs/AudioRecorder.provider.js +78 -0
- package/build/cjs/AudioRecorder.provider.js.map +1 -0
- package/build/cjs/AudioStudio.native.js +8 -0
- package/build/cjs/AudioStudio.native.js.map +1 -0
- package/build/cjs/AudioStudio.types.js +11 -0
- package/build/cjs/AudioStudio.types.js.map +1 -0
- package/build/cjs/AudioStudio.web.js +708 -0
- package/build/cjs/AudioStudio.web.js.map +1 -0
- package/build/cjs/AudioStudioModule.js +718 -0
- package/build/cjs/AudioStudioModule.js.map +1 -0
- package/build/cjs/WebRecorder.web.js +865 -0
- package/build/cjs/WebRecorder.web.js.map +1 -0
- package/build/cjs/constants/platformLimitations.js +99 -0
- package/build/cjs/constants/platformLimitations.js.map +1 -0
- package/build/cjs/constants.js +20 -0
- package/build/cjs/constants.js.map +1 -0
- package/build/cjs/events.js +29 -0
- package/build/cjs/events.js.map +1 -0
- package/build/cjs/hooks/useAudioDevices.js +179 -0
- package/build/cjs/hooks/useAudioDevices.js.map +1 -0
- package/build/cjs/index.js +64 -0
- package/build/cjs/index.js.map +1 -0
- package/build/cjs/trimAudio.js +76 -0
- package/build/cjs/trimAudio.js.map +1 -0
- package/build/cjs/useAudioRecorder.js +535 -0
- package/build/cjs/useAudioRecorder.js.map +1 -0
- package/build/cjs/utils/BlobFix.js +502 -0
- package/build/cjs/utils/BlobFix.js.map +1 -0
- package/build/cjs/utils/audioProcessing.js +136 -0
- package/build/cjs/utils/audioProcessing.js.map +1 -0
- package/build/cjs/utils/cleanNativeOptions.js +22 -0
- package/build/cjs/utils/cleanNativeOptions.js.map +1 -0
- package/build/cjs/utils/concatenateBuffers.js +25 -0
- package/build/cjs/utils/concatenateBuffers.js.map +1 -0
- package/build/cjs/utils/convertPCMToFloat32.js +124 -0
- package/build/cjs/utils/convertPCMToFloat32.js.map +1 -0
- package/build/cjs/utils/crc32.js +52 -0
- package/build/cjs/utils/crc32.js.map +1 -0
- package/build/cjs/utils/encodingToBitDepth.js +17 -0
- package/build/cjs/utils/encodingToBitDepth.js.map +1 -0
- package/build/cjs/utils/getWavFileInfo.js +96 -0
- package/build/cjs/utils/getWavFileInfo.js.map +1 -0
- package/build/cjs/utils/writeWavHeader.js +88 -0
- package/build/cjs/utils/writeWavHeader.js.map +1 -0
- package/build/cjs/workers/InlineFeaturesExtractor.web.js +294 -0
- package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/cjs/workers/inlineAudioWebWorker.web.js +190 -0
- package/build/cjs/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/build/cjs/workers/wasmGlueString.web.js +27 -0
- package/build/cjs/workers/wasmGlueString.web.js.map +1 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js +3 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js +126 -0
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js +205 -0
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/esm/AudioAnalysis/extractAudioData.js +14 -0
- package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/esm/AudioAnalysis/extractMelSpectrogram.js +86 -0
- package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/esm/AudioAnalysis/extractPreview.js +25 -0
- package/build/esm/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/esm/AudioAnalysis/extractWaveform.js +11 -0
- package/build/esm/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/esm/AudioAnalysis/melSpectrogramWasm.js +111 -0
- package/build/esm/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
- package/build/esm/AudioDeviceManager.js +681 -0
- package/build/esm/AudioDeviceManager.js.map +1 -0
- package/build/esm/AudioRecorder.provider.js +40 -0
- package/build/esm/AudioRecorder.provider.js.map +1 -0
- package/build/esm/AudioStudio.native.js +6 -0
- package/build/esm/AudioStudio.native.js.map +1 -0
- package/build/esm/AudioStudio.types.js +8 -0
- package/build/esm/AudioStudio.types.js.map +1 -0
- package/build/esm/AudioStudio.web.js +704 -0
- package/build/esm/AudioStudio.web.js.map +1 -0
- package/build/esm/AudioStudioModule.js +713 -0
- package/build/esm/AudioStudioModule.js.map +1 -0
- package/build/esm/WebRecorder.web.js +861 -0
- package/build/esm/WebRecorder.web.js.map +1 -0
- package/build/esm/constants/platformLimitations.js +90 -0
- package/build/esm/constants/platformLimitations.js.map +1 -0
- package/build/esm/constants.js +17 -0
- package/build/esm/constants.js.map +1 -0
- package/build/esm/events.js +21 -0
- package/build/esm/events.js.map +1 -0
- package/build/esm/hooks/useAudioDevices.js +176 -0
- package/build/esm/hooks/useAudioDevices.js.map +1 -0
- package/build/esm/index.js +23 -0
- package/build/esm/index.js.map +1 -0
- package/build/esm/trimAudio.js +69 -0
- package/build/esm/trimAudio.js.map +1 -0
- package/build/esm/useAudioRecorder.js +529 -0
- package/build/esm/useAudioRecorder.js.map +1 -0
- package/build/esm/utils/BlobFix.js +498 -0
- package/build/esm/utils/BlobFix.js.map +1 -0
- package/build/esm/utils/audioProcessing.js +133 -0
- package/build/esm/utils/audioProcessing.js.map +1 -0
- package/build/esm/utils/cleanNativeOptions.js +19 -0
- package/build/esm/utils/cleanNativeOptions.js.map +1 -0
- package/build/esm/utils/concatenateBuffers.js +21 -0
- package/build/esm/utils/concatenateBuffers.js.map +1 -0
- package/build/esm/utils/convertPCMToFloat32.js +120 -0
- package/build/esm/utils/convertPCMToFloat32.js.map +1 -0
- package/build/esm/utils/crc32.js +50 -0
- package/build/esm/utils/crc32.js.map +1 -0
- package/build/esm/utils/encodingToBitDepth.js +13 -0
- package/build/esm/utils/encodingToBitDepth.js.map +1 -0
- package/build/esm/utils/getWavFileInfo.js +92 -0
- package/build/esm/utils/getWavFileInfo.js.map +1 -0
- package/build/esm/utils/writeWavHeader.js +84 -0
- package/build/esm/utils/writeWavHeader.js.map +1 -0
- package/build/esm/workers/InlineFeaturesExtractor.web.js +291 -0
- package/build/esm/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/esm/workers/inlineAudioWebWorker.web.js +187 -0
- package/build/esm/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/build/esm/workers/wasmGlueString.web.js +24 -0
- package/build/esm/workers/wasmGlueString.web.js.map +1 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +198 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +24 -0
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +74 -0
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractAudioData.d.ts +3 -0
- package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +20 -0
- package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractPreview.d.ts +11 -0
- package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractWaveform.d.ts +8 -0
- package/build/types/AudioAnalysis/extractWaveform.d.ts.map +1 -0
- package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts +16 -0
- package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts.map +1 -0
- package/build/types/AudioDeviceManager.d.ts +187 -0
- package/build/types/AudioDeviceManager.d.ts.map +1 -0
- package/build/types/AudioRecorder.provider.d.ts +11 -0
- package/build/types/AudioRecorder.provider.d.ts.map +1 -0
- package/build/types/AudioStudio.native.d.ts +3 -0
- package/build/types/AudioStudio.native.d.ts.map +1 -0
- package/build/types/AudioStudio.types.d.ts +760 -0
- package/build/types/AudioStudio.types.d.ts.map +1 -0
- package/build/types/AudioStudio.web.d.ts +96 -0
- package/build/types/AudioStudio.web.d.ts.map +1 -0
- package/build/types/AudioStudioModule.d.ts +3 -0
- package/build/types/AudioStudioModule.d.ts.map +1 -0
- package/build/types/WebRecorder.web.d.ts +208 -0
- package/build/types/WebRecorder.web.d.ts.map +1 -0
- package/build/types/constants/platformLimitations.d.ts +40 -0
- package/build/types/constants/platformLimitations.d.ts.map +1 -0
- package/build/types/constants.d.ts +14 -0
- package/build/types/constants.d.ts.map +1 -0
- package/build/types/events.d.ts +29 -0
- package/build/types/events.d.ts.map +1 -0
- package/build/types/hooks/useAudioDevices.d.ts +15 -0
- package/build/types/hooks/useAudioDevices.d.ts.map +1 -0
- package/build/types/index.d.ts +21 -0
- package/build/types/index.d.ts.map +1 -0
- package/build/types/trimAudio.d.ts +25 -0
- package/build/types/trimAudio.d.ts.map +1 -0
- package/build/types/useAudioRecorder.d.ts +22 -0
- package/build/types/useAudioRecorder.d.ts.map +1 -0
- package/build/types/utils/BlobFix.d.ts +9 -0
- package/build/types/utils/BlobFix.d.ts.map +1 -0
- package/build/types/utils/audioProcessing.d.ts +24 -0
- package/build/types/utils/audioProcessing.d.ts.map +1 -0
- package/build/types/utils/cleanNativeOptions.d.ts +15 -0
- package/build/types/utils/cleanNativeOptions.d.ts.map +1 -0
- package/build/types/utils/concatenateBuffers.d.ts +8 -0
- package/build/types/utils/concatenateBuffers.d.ts.map +1 -0
- package/build/types/utils/convertPCMToFloat32.d.ts +13 -0
- package/build/types/utils/convertPCMToFloat32.d.ts.map +1 -0
- package/build/types/utils/crc32.d.ts +7 -0
- package/build/types/utils/crc32.d.ts.map +1 -0
- package/build/types/utils/encodingToBitDepth.d.ts +5 -0
- package/build/types/utils/encodingToBitDepth.d.ts.map +1 -0
- package/build/types/utils/getWavFileInfo.d.ts +26 -0
- package/build/types/utils/getWavFileInfo.d.ts.map +1 -0
- package/build/types/utils/writeWavHeader.d.ts +34 -0
- package/build/types/utils/writeWavHeader.d.ts.map +1 -0
- package/build/types/workers/InlineFeaturesExtractor.web.d.ts +2 -0
- package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
- package/build/types/workers/inlineAudioWebWorker.web.d.ts +2 -0
- package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
- package/build/types/workers/wasmGlueString.web.d.ts +2 -0
- package/build/types/workers/wasmGlueString.web.d.ts.map +1 -0
- package/cpp/AudioFeatures.cpp +274 -0
- package/cpp/AudioFeatures.h +85 -0
- package/cpp/AudioFeaturesBridge.cpp +146 -0
- package/cpp/AudioFeaturesBridge.h +47 -0
- package/cpp/MelSpectrogram.cpp +227 -0
- package/cpp/MelSpectrogram.h +82 -0
- package/cpp/MelSpectrogramBridge.cpp +112 -0
- package/cpp/MelSpectrogramBridge.h +33 -0
- package/cpp/kiss_fft/COPYING +11 -0
- package/cpp/kiss_fft/_kiss_fft_guts.h +167 -0
- package/cpp/kiss_fft/kiss_fft.c +424 -0
- package/cpp/kiss_fft/kiss_fft.h +160 -0
- package/cpp/kiss_fft/kiss_fft_log.h +36 -0
- package/cpp/kiss_fft/kiss_fftr.c +155 -0
- package/cpp/kiss_fft/kiss_fftr.h +54 -0
- package/expo-module.config.json +10 -0
- package/ios/AudioAnalysisData.swift +74 -0
- package/ios/AudioDeviceManager.swift +670 -0
- package/ios/AudioFeaturesWrapper.h +21 -0
- package/ios/AudioFeaturesWrapper.mm +63 -0
- package/ios/AudioNotificationManager.swift +154 -0
- package/ios/AudioProcessingHelpers.swift +797 -0
- package/ios/AudioProcessor.swift +1191 -0
- package/ios/AudioStreamError.swift +7 -0
- package/ios/AudioStreamManager.swift +2369 -0
- package/ios/AudioStreamManagerDelegate.swift +16 -0
- package/ios/AudioStudio.podspec +39 -0
- package/ios/AudioStudioModule.swift +1111 -0
- package/ios/AudioStudioTests/AudioFileHandlerTests.swift +338 -0
- package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +331 -0
- package/ios/AudioStudioTests/AudioTestHelpers.swift +130 -0
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +294 -0
- package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +105 -0
- package/ios/AudioStudioTests/Info.plist +22 -0
- package/ios/AudioStudioTests/README.md +39 -0
- package/ios/AudioStudioTests/SimpleAudioTest.swift +98 -0
- package/ios/AudioStudioTests/TestAudioGenerator.swift +75 -0
- package/ios/DataPoint.swift +54 -0
- package/ios/DecodingConfig.swift +59 -0
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +95 -0
- package/ios/ISSUE_IOS.md +68 -0
- package/ios/Logger.swift +39 -0
- package/ios/MelSpectrogramWrapper.h +30 -0
- package/ios/MelSpectrogramWrapper.mm +97 -0
- package/ios/NotificationExtension.swift +15 -0
- package/ios/RecordingResult.swift +22 -0
- package/ios/RecordingSettings.swift +311 -0
- package/ios/WaveformExtractor.swift +105 -0
- package/ios/tests/README.md +41 -0
- package/ios/tests/integration/buffer_and_fallback_test.swift +178 -0
- package/ios/tests/integration/buffer_duration_test.swift +185 -0
- package/ios/tests/integration/compressed_only_output_test.swift +271 -0
- package/ios/tests/integration/output_control_test.swift +322 -0
- package/ios/tests/integration/run_integration_tests.sh +37 -0
- package/ios/tests/opus_support_test_macos.swift +154 -0
- package/ios/tests/standalone/audio_processing_test.swift +144 -0
- package/ios/tests/standalone/audio_recording_test.swift +277 -0
- package/ios/tests/standalone/audio_streaming_test.swift +249 -0
- package/ios/tests/standalone/standalone_test.swift +144 -0
- package/package.json +146 -0
- package/plugin/build/index.cjs +194 -0
- package/plugin/build/index.d.cts +22 -0
- package/plugin/build/index.js +194 -0
- package/plugin/src/index.ts +285 -0
- package/plugin/tsconfig.json +10 -0
- package/plugin/tsconfig.tsbuildinfo +1 -0
- package/prebuilt/wasm/mel-spectrogram.js +18 -0
- package/src/AudioAnalysis/AudioAnalysis.types.ts +226 -0
- package/src/AudioAnalysis/audio-features-wasm.d.ts +37 -0
- package/src/AudioAnalysis/audioFeaturesWasm.ts +200 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +350 -0
- package/src/AudioAnalysis/extractAudioData.ts +17 -0
- package/src/AudioAnalysis/extractMelSpectrogram.ts +140 -0
- package/src/AudioAnalysis/extractPreview.ts +34 -0
- package/src/AudioAnalysis/extractWaveform.ts +22 -0
- package/src/AudioAnalysis/mel-spectrogram-wasm.d.ts +48 -0
- package/src/AudioAnalysis/melSpectrogramWasm.ts +179 -0
- package/src/AudioDeviceManager.ts +800 -0
- package/src/AudioRecorder.provider.tsx +57 -0
- package/src/AudioStudio.native.ts +6 -0
- package/src/AudioStudio.types.ts +899 -0
- package/src/AudioStudio.web.ts +911 -0
- package/src/AudioStudioModule.ts +984 -0
- package/src/WebRecorder.web.ts +1114 -0
- package/src/constants/platformLimitations.ts +118 -0
- package/src/constants.ts +21 -0
- package/src/events.ts +63 -0
- package/src/hooks/useAudioDevices.ts +213 -0
- package/src/index.ts +67 -0
- package/src/trimAudio.ts +94 -0
- package/src/types/crc-32.d.ts +9 -0
- package/src/useAudioRecorder.tsx +784 -0
- package/src/utils/BlobFix.ts +561 -0
- package/src/utils/audioProcessing.ts +205 -0
- package/src/utils/cleanNativeOptions.ts +18 -0
- package/src/utils/concatenateBuffers.ts +24 -0
- package/src/utils/convertPCMToFloat32.ts +170 -0
- package/src/utils/crc32.ts +59 -0
- package/src/utils/encodingToBitDepth.ts +18 -0
- package/src/utils/getWavFileInfo.ts +132 -0
- package/src/utils/writeWavHeader.ts +115 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +291 -0
- package/src/workers/inlineAudioWebWorker.web.tsx +186 -0
- package/src/workers/wasmGlueString.web.ts +23 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#include "AudioFeaturesBridge.h"
|
|
2
|
+
#include "AudioFeatures.h"
|
|
3
|
+
#include <cstdlib>
|
|
4
|
+
#include <cstring>
|
|
5
|
+
#include <memory>
|
|
6
|
+
#include <mutex>
|
|
7
|
+
|
|
8
|
+
static std::unique_ptr<AudioFeaturesProcessor> cachedProcessor;
|
|
9
|
+
static std::mutex cachedMutex;
|
|
10
|
+
|
|
11
|
+
static CAudioFeaturesResult* resultFromCpp(const AudioFeaturesResult& src) {
|
|
12
|
+
CAudioFeaturesResult* r = (CAudioFeaturesResult*)malloc(sizeof(CAudioFeaturesResult));
|
|
13
|
+
if (!r) return nullptr;
|
|
14
|
+
r->spectralCentroid = src.spectralCentroid;
|
|
15
|
+
r->spectralFlatness = src.spectralFlatness;
|
|
16
|
+
r->spectralRolloff = src.spectralRolloff;
|
|
17
|
+
r->spectralBandwidth = src.spectralBandwidth;
|
|
18
|
+
|
|
19
|
+
r->mfccCount = static_cast<int>(src.mfcc.size());
|
|
20
|
+
if (r->mfccCount > 0) {
|
|
21
|
+
r->mfcc = (float*)malloc(r->mfccCount * sizeof(float));
|
|
22
|
+
if (!r->mfcc) { r->mfccCount = 0; }
|
|
23
|
+
else { std::memcpy(r->mfcc, src.mfcc.data(), r->mfccCount * sizeof(float)); }
|
|
24
|
+
} else {
|
|
25
|
+
r->mfcc = nullptr;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
r->chromagramCount = static_cast<int>(src.chromagram.size());
|
|
29
|
+
if (r->chromagramCount > 0) {
|
|
30
|
+
r->chromagram = (float*)malloc(r->chromagramCount * sizeof(float));
|
|
31
|
+
if (!r->chromagram) { r->chromagramCount = 0; }
|
|
32
|
+
else { std::memcpy(r->chromagram, src.chromagram.data(), r->chromagramCount * sizeof(float)); }
|
|
33
|
+
} else {
|
|
34
|
+
r->chromagram = nullptr;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return r;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static void fillResultFromCpp(const AudioFeaturesResult& src, CAudioFeaturesResult* r) {
|
|
41
|
+
r->spectralCentroid = src.spectralCentroid;
|
|
42
|
+
r->spectralFlatness = src.spectralFlatness;
|
|
43
|
+
r->spectralRolloff = src.spectralRolloff;
|
|
44
|
+
r->spectralBandwidth = src.spectralBandwidth;
|
|
45
|
+
|
|
46
|
+
// Free prior allocations to prevent leaks on repeated calls
|
|
47
|
+
if (r->mfcc) { free(r->mfcc); r->mfcc = nullptr; }
|
|
48
|
+
if (r->chromagram) { free(r->chromagram); r->chromagram = nullptr; }
|
|
49
|
+
|
|
50
|
+
r->mfccCount = static_cast<int>(src.mfcc.size());
|
|
51
|
+
if (r->mfccCount > 0) {
|
|
52
|
+
r->mfcc = (float*)malloc(r->mfccCount * sizeof(float));
|
|
53
|
+
if (!r->mfcc) { r->mfccCount = 0; }
|
|
54
|
+
else { std::memcpy(r->mfcc, src.mfcc.data(), r->mfccCount * sizeof(float)); }
|
|
55
|
+
} else {
|
|
56
|
+
r->mfcc = nullptr;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
r->chromagramCount = static_cast<int>(src.chromagram.size());
|
|
60
|
+
if (r->chromagramCount > 0) {
|
|
61
|
+
r->chromagram = (float*)malloc(r->chromagramCount * sizeof(float));
|
|
62
|
+
if (!r->chromagram) { r->chromagramCount = 0; }
|
|
63
|
+
else { std::memcpy(r->chromagram, src.chromagram.data(), r->chromagramCount * sizeof(float)); }
|
|
64
|
+
} else {
|
|
65
|
+
r->chromagram = nullptr;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
extern "C" {
|
|
70
|
+
|
|
71
|
+
CAudioFeaturesResult* audio_features_compute(
|
|
72
|
+
const float* samples, int numSamples, int sampleRate,
|
|
73
|
+
int fftLength, int nMfcc, int nMelFilters,
|
|
74
|
+
int computeMfcc, int computeChroma)
|
|
75
|
+
{
|
|
76
|
+
AudioFeaturesConfig config;
|
|
77
|
+
config.sampleRate = sampleRate;
|
|
78
|
+
config.fftLength = fftLength;
|
|
79
|
+
config.nMfcc = nMfcc;
|
|
80
|
+
config.nMelFilters = nMelFilters;
|
|
81
|
+
config.computeMfcc = (computeMfcc != 0);
|
|
82
|
+
config.computeChroma = (computeChroma != 0);
|
|
83
|
+
|
|
84
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
85
|
+
if (!cachedProcessor || !(cachedProcessor->config() == config)) {
|
|
86
|
+
cachedProcessor = std::make_unique<AudioFeaturesProcessor>(config);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
AudioFeaturesResult result = cachedProcessor->compute(samples, numSamples);
|
|
90
|
+
return resultFromCpp(result);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
void audio_features_free(CAudioFeaturesResult* result) {
|
|
94
|
+
if (result) {
|
|
95
|
+
if (result->mfcc) free(result->mfcc);
|
|
96
|
+
if (result->chromagram) free(result->chromagram);
|
|
97
|
+
free(result);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
void audio_features_free_arrays(CAudioFeaturesResult* result) {
|
|
102
|
+
if (result) {
|
|
103
|
+
if (result->mfcc) { free(result->mfcc); result->mfcc = nullptr; }
|
|
104
|
+
if (result->chromagram) { free(result->chromagram); result->chromagram = nullptr; }
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
void audio_features_init(int sampleRate, int fftLength,
|
|
109
|
+
int nMfcc, int nMelFilters, int computeMfcc, int computeChroma)
|
|
110
|
+
{
|
|
111
|
+
AudioFeaturesConfig config;
|
|
112
|
+
config.sampleRate = sampleRate;
|
|
113
|
+
config.fftLength = fftLength;
|
|
114
|
+
config.nMfcc = nMfcc;
|
|
115
|
+
config.nMelFilters = nMelFilters;
|
|
116
|
+
config.computeMfcc = (computeMfcc != 0);
|
|
117
|
+
config.computeChroma = (computeChroma != 0);
|
|
118
|
+
|
|
119
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
120
|
+
if (!cachedProcessor || !(cachedProcessor->config() == config)) {
|
|
121
|
+
cachedProcessor = std::make_unique<AudioFeaturesProcessor>(config);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
int audio_features_compute_frame(const float* samples, int numSamples,
|
|
126
|
+
CAudioFeaturesResult* result)
|
|
127
|
+
{
|
|
128
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
129
|
+
if (!cachedProcessor || !samples || !result) {
|
|
130
|
+
return 0;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
AudioFeaturesResult cppResult = cachedProcessor->compute(samples, numSamples);
|
|
134
|
+
fillResultFromCpp(cppResult, result);
|
|
135
|
+
return 1;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
int audio_features_get_n_mfcc(void) {
|
|
139
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
140
|
+
if (!cachedProcessor) {
|
|
141
|
+
return 0;
|
|
142
|
+
}
|
|
143
|
+
return cachedProcessor->config().nMfcc;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
} // extern "C"
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#ifndef AUDIO_FEATURES_BRIDGE_H
|
|
2
|
+
#define AUDIO_FEATURES_BRIDGE_H
|
|
3
|
+
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
typedef struct {
|
|
9
|
+
float spectralCentroid;
|
|
10
|
+
float spectralFlatness;
|
|
11
|
+
float spectralRolloff;
|
|
12
|
+
float spectralBandwidth;
|
|
13
|
+
float* mfcc; // nMfcc coefficients (caller must free via audio_features_free)
|
|
14
|
+
int mfccCount;
|
|
15
|
+
float* chromagram; // 12 bins (caller must free via audio_features_free)
|
|
16
|
+
int chromagramCount;
|
|
17
|
+
} CAudioFeaturesResult;
|
|
18
|
+
|
|
19
|
+
// Batch API: compute features for a buffer of samples
|
|
20
|
+
CAudioFeaturesResult* audio_features_compute(
|
|
21
|
+
const float* samples, int numSamples, int sampleRate,
|
|
22
|
+
int fftLength, int nMfcc, int nMelFilters,
|
|
23
|
+
int computeMfcc, int computeChroma);
|
|
24
|
+
|
|
25
|
+
void audio_features_free(CAudioFeaturesResult* result);
|
|
26
|
+
|
|
27
|
+
// Streaming API: init processor, then compute per-frame
|
|
28
|
+
void audio_features_init(int sampleRate, int fftLength,
|
|
29
|
+
int nMfcc, int nMelFilters, int computeMfcc, int computeChroma);
|
|
30
|
+
|
|
31
|
+
// Returns 1 on success, 0 on failure.
|
|
32
|
+
// Output written to the provided CAudioFeaturesResult (caller-allocated).
|
|
33
|
+
// mfcc and chromagram pointers inside result are allocated by this function
|
|
34
|
+
// and must be freed by the caller via audio_features_free_arrays().
|
|
35
|
+
int audio_features_compute_frame(const float* samples, int numSamples,
|
|
36
|
+
CAudioFeaturesResult* result);
|
|
37
|
+
|
|
38
|
+
// Free only the internal arrays (mfcc, chromagram) of a stack-allocated result
|
|
39
|
+
void audio_features_free_arrays(CAudioFeaturesResult* result);
|
|
40
|
+
|
|
41
|
+
int audio_features_get_n_mfcc(void);
|
|
42
|
+
|
|
43
|
+
#ifdef __cplusplus
|
|
44
|
+
}
|
|
45
|
+
#endif
|
|
46
|
+
|
|
47
|
+
#endif // AUDIO_FEATURES_BRIDGE_H
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
#include "MelSpectrogram.h"
|
|
2
|
+
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include <cstring>
|
|
5
|
+
#include <limits>
|
|
6
|
+
|
|
7
|
+
#ifndef M_PI
|
|
8
|
+
#define M_PI 3.14159265358979323846
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
MelSpectrogramProcessor::MelSpectrogramProcessor(const MelSpectrogramConfig& config)
|
|
12
|
+
: config_(config), fftCfg_(nullptr) {
|
|
13
|
+
// Clamp invalid values to safe defaults to prevent division by zero
|
|
14
|
+
if (config_.fftLength <= 0) config_.fftLength = 2048;
|
|
15
|
+
if (config_.hopLengthSamples <= 0) config_.hopLengthSamples = 160;
|
|
16
|
+
if (config_.windowSizeSamples <= 1) config_.windowSizeSamples = 400;
|
|
17
|
+
if (config_.nMels <= 0) config_.nMels = 128;
|
|
18
|
+
if (config_.fMax <= 0.0f) {
|
|
19
|
+
config_.fMax = static_cast<float>(config_.sampleRate) / 2.0f;
|
|
20
|
+
}
|
|
21
|
+
fftCfg_ = kiss_fftr_alloc(config_.fftLength, 0, nullptr, nullptr);
|
|
22
|
+
buildWindow();
|
|
23
|
+
buildMelFilterbank();
|
|
24
|
+
allocateBuffers();
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
MelSpectrogramProcessor::~MelSpectrogramProcessor() {
|
|
28
|
+
if (fftCfg_) {
|
|
29
|
+
free(fftCfg_);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
void MelSpectrogramProcessor::allocateBuffers() {
|
|
34
|
+
const int numBins = config_.fftLength / 2 + 1;
|
|
35
|
+
fftInput_.resize(config_.fftLength, 0.0f);
|
|
36
|
+
fftOutput_.resize(numBins);
|
|
37
|
+
powerSpectrum_.resize(numBins);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
float MelSpectrogramProcessor::hzToMel(float hz) {
|
|
41
|
+
return 2595.0f * std::log10(1.0f + hz / 700.0f);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
float MelSpectrogramProcessor::melToHz(float mel) {
|
|
45
|
+
return 700.0f * (std::pow(10.0f, mel / 2595.0f) - 1.0f);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
void MelSpectrogramProcessor::buildWindow() {
|
|
49
|
+
window_.resize(config_.windowSizeSamples);
|
|
50
|
+
const float N = static_cast<float>(config_.windowSizeSamples - 1);
|
|
51
|
+
for (int i = 0; i < config_.windowSizeSamples; ++i) {
|
|
52
|
+
if (config_.windowType == 1) {
|
|
53
|
+
// Hamming
|
|
54
|
+
window_[i] = 0.54f - 0.46f * std::cos(2.0f * static_cast<float>(M_PI) * i / N);
|
|
55
|
+
} else {
|
|
56
|
+
// Hann (default)
|
|
57
|
+
window_[i] = 0.5f * (1.0f - std::cos(2.0f * static_cast<float>(M_PI) * i / N));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
void MelSpectrogramProcessor::buildMelFilterbank() {
|
|
63
|
+
const int numBins = config_.fftLength / 2 + 1;
|
|
64
|
+
const float melMin = hzToMel(config_.fMin);
|
|
65
|
+
const float melMax = hzToMel(config_.fMax);
|
|
66
|
+
|
|
67
|
+
// nMels + 2 points for triangular filters
|
|
68
|
+
std::vector<float> melPoints(config_.nMels + 2);
|
|
69
|
+
for (int i = 0; i < config_.nMels + 2; ++i) {
|
|
70
|
+
float mel = melMin + i * (melMax - melMin) / (config_.nMels + 1);
|
|
71
|
+
melPoints[i] = melToHz(mel);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const float binWidth = static_cast<float>(config_.sampleRate) / config_.fftLength;
|
|
75
|
+
|
|
76
|
+
// Build sparse filterbank — only store non-zero weights per mel band
|
|
77
|
+
melFilters_.resize(config_.nMels);
|
|
78
|
+
for (int melIdx = 0; melIdx < config_.nMels; ++melIdx) {
|
|
79
|
+
const float fLow = melPoints[melIdx];
|
|
80
|
+
const float fCenter = melPoints[melIdx + 1];
|
|
81
|
+
const float fHigh = melPoints[melIdx + 2];
|
|
82
|
+
|
|
83
|
+
// Find bin range that overlaps this filter
|
|
84
|
+
int binStart = std::max(0, static_cast<int>(std::ceil(fLow / binWidth)));
|
|
85
|
+
int binEnd = std::min(numBins - 1, static_cast<int>(std::floor(fHigh / binWidth)));
|
|
86
|
+
|
|
87
|
+
melFilters_[melIdx].startBin = binStart;
|
|
88
|
+
const int count = binEnd - binStart + 1;
|
|
89
|
+
melFilters_[melIdx].weights.resize(count > 0 ? count : 0);
|
|
90
|
+
|
|
91
|
+
for (int bin = binStart; bin <= binEnd; ++bin) {
|
|
92
|
+
float freq = static_cast<float>(bin) * binWidth;
|
|
93
|
+
float weight;
|
|
94
|
+
if (freq <= fCenter) {
|
|
95
|
+
float denom = fCenter - fLow;
|
|
96
|
+
weight = (denom > 0.0f) ? (freq - fLow) / denom : 0.0f;
|
|
97
|
+
} else {
|
|
98
|
+
float denom = fHigh - fCenter;
|
|
99
|
+
weight = (denom > 0.0f) ? (fHigh - freq) / denom : 0.0f;
|
|
100
|
+
}
|
|
101
|
+
melFilters_[melIdx].weights[bin - binStart] = std::max(0.0f, weight);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
MelSpectrogramResult MelSpectrogramProcessor::compute(const float* samples, int numSamples) {
|
|
107
|
+
const int numBins = config_.fftLength / 2 + 1;
|
|
108
|
+
const int numFrames = (numSamples - config_.windowSizeSamples) / config_.hopLengthSamples + 1;
|
|
109
|
+
|
|
110
|
+
if (numFrames <= 0) {
|
|
111
|
+
return MelSpectrogramResult{{}, 0, config_.nMels};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Use pre-allocated work buffers
|
|
115
|
+
float* fftIn = fftInput_.data();
|
|
116
|
+
kiss_fft_cpx* fftOut = fftOutput_.data();
|
|
117
|
+
float* power = powerSpectrum_.data();
|
|
118
|
+
|
|
119
|
+
// Zero the tail of fftInput once (only matters when windowSize < fftLength)
|
|
120
|
+
if (config_.windowSizeSamples < config_.fftLength) {
|
|
121
|
+
std::memset(fftIn + config_.windowSizeSamples, 0,
|
|
122
|
+
(config_.fftLength - config_.windowSizeSamples) * sizeof(float));
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Flat contiguous result buffer
|
|
126
|
+
MelSpectrogramResult result;
|
|
127
|
+
result.timeSteps = numFrames;
|
|
128
|
+
result.nMels = config_.nMels;
|
|
129
|
+
result.data.resize(numFrames * config_.nMels);
|
|
130
|
+
|
|
131
|
+
for (int frameIdx = 0; frameIdx < numFrames; ++frameIdx) {
|
|
132
|
+
const int start = frameIdx * config_.hopLengthSamples;
|
|
133
|
+
|
|
134
|
+
// Apply window to frame
|
|
135
|
+
const int frameLen = std::min(config_.windowSizeSamples, numSamples - start);
|
|
136
|
+
for (int i = 0; i < frameLen; ++i) {
|
|
137
|
+
fftIn[i] = samples[start + i] * window_[i];
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Compute real FFT
|
|
141
|
+
kiss_fftr(fftCfg_, fftIn, fftOut);
|
|
142
|
+
|
|
143
|
+
// Compute power spectrum (real^2 + imag^2)
|
|
144
|
+
for (int i = 0; i < numBins; ++i) {
|
|
145
|
+
power[i] = fftOut[i].r * fftOut[i].r + fftOut[i].i * fftOut[i].i;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Apply sparse mel filterbank
|
|
149
|
+
float* melRow = result.data.data() + frameIdx * config_.nMels;
|
|
150
|
+
for (int melIdx = 0; melIdx < config_.nMels; ++melIdx) {
|
|
151
|
+
const MelFilter& filter = melFilters_[melIdx];
|
|
152
|
+
const int count = static_cast<int>(filter.weights.size());
|
|
153
|
+
float sum = 0.0f;
|
|
154
|
+
const float* w = filter.weights.data();
|
|
155
|
+
const float* p = power + filter.startBin;
|
|
156
|
+
for (int k = 0; k < count; ++k) {
|
|
157
|
+
sum += p[k] * w[k];
|
|
158
|
+
}
|
|
159
|
+
melRow[melIdx] = sum;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Post-processing: log scaling
|
|
164
|
+
if (config_.logScale) {
|
|
165
|
+
const int total = numFrames * config_.nMels;
|
|
166
|
+
float* d = result.data.data();
|
|
167
|
+
for (int i = 0; i < total; ++i) {
|
|
168
|
+
d[i] = std::log(std::max(1e-10f, d[i]));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Post-processing: normalize
|
|
173
|
+
if (config_.normalize) {
|
|
174
|
+
const int total = numFrames * config_.nMels;
|
|
175
|
+
float* d = result.data.data();
|
|
176
|
+
float minVal = std::numeric_limits<float>::max();
|
|
177
|
+
float maxVal = std::numeric_limits<float>::lowest();
|
|
178
|
+
for (int i = 0; i < total; ++i) {
|
|
179
|
+
minVal = std::min(minVal, d[i]);
|
|
180
|
+
maxVal = std::max(maxVal, d[i]);
|
|
181
|
+
}
|
|
182
|
+
float range = maxVal - minVal;
|
|
183
|
+
if (range > 0.0f) {
|
|
184
|
+
float invRange = 1.0f / range;
|
|
185
|
+
for (int i = 0; i < total; ++i) {
|
|
186
|
+
d[i] = (d[i] - minVal) * invRange;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return result;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
void MelSpectrogramProcessor::computeFrame(const float* frame, int frameSize, float* melOutput) {
|
|
195
|
+
const int numBins = config_.fftLength / 2 + 1;
|
|
196
|
+
|
|
197
|
+
// Use pre-allocated buffers
|
|
198
|
+
float* fftIn = fftInput_.data();
|
|
199
|
+
kiss_fft_cpx* fftOut = fftOutput_.data();
|
|
200
|
+
float* power = powerSpectrum_.data();
|
|
201
|
+
|
|
202
|
+
// Zero and apply window
|
|
203
|
+
std::memset(fftIn, 0, config_.fftLength * sizeof(float));
|
|
204
|
+
int len = std::min(frameSize, config_.windowSizeSamples);
|
|
205
|
+
for (int i = 0; i < len; ++i) {
|
|
206
|
+
fftIn[i] = frame[i] * window_[i];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
kiss_fftr(fftCfg_, fftIn, fftOut);
|
|
210
|
+
|
|
211
|
+
// Power spectrum -> sparse mel filterbank
|
|
212
|
+
for (int i = 0; i < numBins; ++i) {
|
|
213
|
+
power[i] = fftOut[i].r * fftOut[i].r + fftOut[i].i * fftOut[i].i;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
for (int melIdx = 0; melIdx < config_.nMels; ++melIdx) {
|
|
217
|
+
const MelFilter& filter = melFilters_[melIdx];
|
|
218
|
+
const int count = static_cast<int>(filter.weights.size());
|
|
219
|
+
float sum = 0.0f;
|
|
220
|
+
const float* w = filter.weights.data();
|
|
221
|
+
const float* p = power + filter.startBin;
|
|
222
|
+
for (int k = 0; k < count; ++k) {
|
|
223
|
+
sum += p[k] * w[k];
|
|
224
|
+
}
|
|
225
|
+
melOutput[melIdx] = sum;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "kiss_fft/kiss_fft.h"
|
|
6
|
+
#include "kiss_fft/kiss_fftr.h"
|
|
7
|
+
|
|
8
|
+
struct MelSpectrogramConfig {
|
|
9
|
+
int sampleRate;
|
|
10
|
+
int fftLength = 2048;
|
|
11
|
+
int windowSizeSamples;
|
|
12
|
+
int hopLengthSamples;
|
|
13
|
+
int nMels = 128;
|
|
14
|
+
float fMin = 0.0f;
|
|
15
|
+
float fMax = 0.0f; // 0 = use sampleRate/2
|
|
16
|
+
int windowType = 0; // 0=hann, 1=hamming
|
|
17
|
+
bool logScale = true;
|
|
18
|
+
bool normalize = false;
|
|
19
|
+
|
|
20
|
+
bool operator==(const MelSpectrogramConfig& other) const {
|
|
21
|
+
return sampleRate == other.sampleRate &&
|
|
22
|
+
fftLength == other.fftLength &&
|
|
23
|
+
windowSizeSamples == other.windowSizeSamples &&
|
|
24
|
+
hopLengthSamples == other.hopLengthSamples &&
|
|
25
|
+
nMels == other.nMels &&
|
|
26
|
+
fMin == other.fMin &&
|
|
27
|
+
fMax == other.fMax &&
|
|
28
|
+
windowType == other.windowType &&
|
|
29
|
+
logScale == other.logScale &&
|
|
30
|
+
normalize == other.normalize;
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
struct MelSpectrogramResult {
|
|
35
|
+
std::vector<float> data; // Flat array: [timeSteps * nMels], row-major
|
|
36
|
+
int timeSteps;
|
|
37
|
+
int nMels;
|
|
38
|
+
|
|
39
|
+
// Access element at [frame][mel]
|
|
40
|
+
float& at(int frame, int mel) { return data[frame * nMels + mel]; }
|
|
41
|
+
float at(int frame, int mel) const { return data[frame * nMels + mel]; }
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
class MelSpectrogramProcessor {
|
|
45
|
+
public:
|
|
46
|
+
MelSpectrogramProcessor(const MelSpectrogramConfig& config);
|
|
47
|
+
~MelSpectrogramProcessor();
|
|
48
|
+
|
|
49
|
+
// Non-copyable (owns FFT plan)
|
|
50
|
+
MelSpectrogramProcessor(const MelSpectrogramProcessor&) = delete;
|
|
51
|
+
MelSpectrogramProcessor& operator=(const MelSpectrogramProcessor&) = delete;
|
|
52
|
+
|
|
53
|
+
MelSpectrogramResult compute(const float* samples, int numSamples);
|
|
54
|
+
void computeFrame(const float* frame, int frameSize, float* melOutput);
|
|
55
|
+
|
|
56
|
+
const MelSpectrogramConfig& config() const { return config_; }
|
|
57
|
+
|
|
58
|
+
private:
|
|
59
|
+
MelSpectrogramConfig config_;
|
|
60
|
+
|
|
61
|
+
// Sparse mel filterbank: per mel band, store [startBin, weights[]]
|
|
62
|
+
struct MelFilter {
|
|
63
|
+
int startBin;
|
|
64
|
+
std::vector<float> weights; // only non-zero weights
|
|
65
|
+
};
|
|
66
|
+
std::vector<MelFilter> melFilters_;
|
|
67
|
+
|
|
68
|
+
std::vector<float> window_;
|
|
69
|
+
kiss_fftr_cfg fftCfg_;
|
|
70
|
+
|
|
71
|
+
// Pre-allocated work buffers (avoid per-frame allocation)
|
|
72
|
+
std::vector<float> fftInput_;
|
|
73
|
+
std::vector<kiss_fft_cpx> fftOutput_;
|
|
74
|
+
std::vector<float> powerSpectrum_;
|
|
75
|
+
|
|
76
|
+
void buildMelFilterbank();
|
|
77
|
+
void buildWindow();
|
|
78
|
+
void allocateBuffers();
|
|
79
|
+
|
|
80
|
+
static float hzToMel(float hz);
|
|
81
|
+
static float melToHz(float mel);
|
|
82
|
+
};
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
#include "MelSpectrogramBridge.h"
|
|
2
|
+
#include "MelSpectrogram.h"
|
|
3
|
+
#include <cstdlib>
|
|
4
|
+
#include <cstring>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include <algorithm>
|
|
7
|
+
#include <memory>
|
|
8
|
+
#include <mutex>
|
|
9
|
+
|
|
10
|
+
// Cache the processor so repeated calls with the same config skip
|
|
11
|
+
// FFT plan creation, window computation, and filterbank generation.
|
|
12
|
+
static std::unique_ptr<MelSpectrogramProcessor> cachedProcessor;
|
|
13
|
+
static std::mutex cachedMutex;
|
|
14
|
+
|
|
15
|
+
extern "C" {
|
|
16
|
+
|
|
17
|
+
CMelSpectrogramResult* mel_spectrogram_compute(
|
|
18
|
+
const float* samples, int numSamples, int sampleRate,
|
|
19
|
+
int fftLength, int windowSizeSamples, int hopLengthSamples,
|
|
20
|
+
int nMels, float fMin, float fMax,
|
|
21
|
+
int windowType, int logScale, int normalize)
|
|
22
|
+
{
|
|
23
|
+
MelSpectrogramConfig config;
|
|
24
|
+
config.sampleRate = sampleRate;
|
|
25
|
+
config.fftLength = fftLength;
|
|
26
|
+
config.windowSizeSamples = windowSizeSamples;
|
|
27
|
+
config.hopLengthSamples = hopLengthSamples;
|
|
28
|
+
config.nMels = nMels;
|
|
29
|
+
config.fMin = fMin;
|
|
30
|
+
config.fMax = fMax;
|
|
31
|
+
config.windowType = windowType;
|
|
32
|
+
config.logScale = (logScale != 0);
|
|
33
|
+
config.normalize = (normalize != 0);
|
|
34
|
+
|
|
35
|
+
// Reuse processor if config matches
|
|
36
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
37
|
+
if (!cachedProcessor || !(cachedProcessor->config() == config)) {
|
|
38
|
+
cachedProcessor = std::make_unique<MelSpectrogramProcessor>(config);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
MelSpectrogramResult result = cachedProcessor->compute(samples, numSamples);
|
|
42
|
+
|
|
43
|
+
if (result.timeSteps <= 0) {
|
|
44
|
+
return nullptr;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Allocate C result — data is already flat, just transfer ownership
|
|
48
|
+
CMelSpectrogramResult* cResult = (CMelSpectrogramResult*)malloc(sizeof(CMelSpectrogramResult));
|
|
49
|
+
if (!cResult) return nullptr;
|
|
50
|
+
cResult->timeSteps = result.timeSteps;
|
|
51
|
+
cResult->nMels = result.nMels;
|
|
52
|
+
const size_t dataSize = result.timeSteps * result.nMels * sizeof(float);
|
|
53
|
+
cResult->data = (float*)malloc(dataSize);
|
|
54
|
+
if (!cResult->data) { free(cResult); return nullptr; }
|
|
55
|
+
std::memcpy(cResult->data, result.data.data(), dataSize);
|
|
56
|
+
|
|
57
|
+
return cResult;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
void mel_spectrogram_free(CMelSpectrogramResult* result) {
|
|
61
|
+
if (result) {
|
|
62
|
+
if (result->data) {
|
|
63
|
+
free(result->data);
|
|
64
|
+
}
|
|
65
|
+
free(result);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
void mel_spectrogram_init(int sampleRate, int fftLength, int windowSizeSamples,
|
|
70
|
+
int hopLengthSamples, int nMels, float fMin, float fMax, int windowType)
|
|
71
|
+
{
|
|
72
|
+
MelSpectrogramConfig config;
|
|
73
|
+
config.sampleRate = sampleRate;
|
|
74
|
+
config.fftLength = fftLength;
|
|
75
|
+
config.windowSizeSamples = windowSizeSamples;
|
|
76
|
+
config.hopLengthSamples = hopLengthSamples;
|
|
77
|
+
config.nMels = nMels;
|
|
78
|
+
config.fMin = fMin;
|
|
79
|
+
config.fMax = fMax;
|
|
80
|
+
config.windowType = windowType;
|
|
81
|
+
config.logScale = false; // log applied manually per-frame below
|
|
82
|
+
config.normalize = false;
|
|
83
|
+
|
|
84
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
85
|
+
if (!cachedProcessor || !(cachedProcessor->config() == config)) {
|
|
86
|
+
cachedProcessor = std::make_unique<MelSpectrogramProcessor>(config);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
int mel_spectrogram_compute_frame(const float* frame, int frameSize, float* melOutput) {
|
|
91
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
92
|
+
if (!cachedProcessor || !frame || !melOutput) {
|
|
93
|
+
return 0;
|
|
94
|
+
}
|
|
95
|
+
cachedProcessor->computeFrame(frame, frameSize, melOutput);
|
|
96
|
+
// Apply log scaling (matches compute() logScale behavior)
|
|
97
|
+
const int nMels = cachedProcessor->config().nMels;
|
|
98
|
+
for (int i = 0; i < nMels; ++i) {
|
|
99
|
+
melOutput[i] = std::log(std::max(1e-10f, melOutput[i]));
|
|
100
|
+
}
|
|
101
|
+
return 1;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
int mel_spectrogram_get_n_mels(void) {
|
|
105
|
+
std::lock_guard<std::mutex> lock(cachedMutex);
|
|
106
|
+
if (!cachedProcessor) {
|
|
107
|
+
return 0;
|
|
108
|
+
}
|
|
109
|
+
return cachedProcessor->config().nMels;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
} // extern "C"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
#ifndef MEL_SPECTROGRAM_BRIDGE_H
|
|
2
|
+
#define MEL_SPECTROGRAM_BRIDGE_H
|
|
3
|
+
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
typedef struct {
|
|
9
|
+
float* data; // Flat array: timeSteps * nMels
|
|
10
|
+
int timeSteps;
|
|
11
|
+
int nMels;
|
|
12
|
+
} CMelSpectrogramResult;
|
|
13
|
+
|
|
14
|
+
CMelSpectrogramResult* mel_spectrogram_compute(
|
|
15
|
+
const float* samples, int numSamples, int sampleRate,
|
|
16
|
+
int fftLength, int windowSizeSamples, int hopLengthSamples,
|
|
17
|
+
int nMels, float fMin, float fMax,
|
|
18
|
+
int windowType, int logScale, int normalize);
|
|
19
|
+
|
|
20
|
+
void mel_spectrogram_free(CMelSpectrogramResult* result);
|
|
21
|
+
|
|
22
|
+
// Single-frame API for live/per-segment mel computation
|
|
23
|
+
void mel_spectrogram_init(int sampleRate, int fftLength, int windowSizeSamples,
|
|
24
|
+
int hopLengthSamples, int nMels, float fMin, float fMax, int windowType);
|
|
25
|
+
// Note: always applies log scaling (log(max(1e-10, val))) regardless of config.logScale
|
|
26
|
+
int mel_spectrogram_compute_frame(const float* frame, int frameSize, float* melOutput);
|
|
27
|
+
int mel_spectrogram_get_n_mels(void);
|
|
28
|
+
|
|
29
|
+
#ifdef __cplusplus
|
|
30
|
+
}
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
#endif // MEL_SPECTROGRAM_BRIDGE_H
|