@siteed/audio-studio 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +535 -0
- package/LICENSE +21 -0
- package/README.md +167 -0
- package/android/build.gradle +143 -0
- package/android/src/androidTest/assets/chorus.wav +0 -0
- package/android/src/androidTest/assets/jfk.wav +0 -0
- package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
- package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +197 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +541 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +234 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +332 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +324 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +253 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +218 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +120 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +345 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +340 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +252 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +95 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +43 -0
- package/android/src/main/AndroidManifest.xml +30 -0
- package/android/src/main/CMakeLists.txt +29 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioAnalysisData.kt +188 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioDataEncoder.kt +9 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioDeviceManager.kt +1741 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFeaturesNative.kt +26 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFileHandler.kt +136 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioFormatUtils.kt +354 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioNotificationsManager.kt +439 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +2237 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +2163 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioRecordingService.kt +167 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +1112 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +1099 -0
- package/android/src/main/java/net/siteed/audiostudio/Constants.kt +37 -0
- package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +7 -0
- package/android/src/main/java/net/siteed/audiostudio/FFT.kt +100 -0
- package/android/src/main/java/net/siteed/audiostudio/Features.kt +98 -0
- package/android/src/main/java/net/siteed/audiostudio/LogUtils.kt +93 -0
- package/android/src/main/java/net/siteed/audiostudio/MelSpectrogramNative.kt +36 -0
- package/android/src/main/java/net/siteed/audiostudio/NotificationConfig.kt +72 -0
- package/android/src/main/java/net/siteed/audiostudio/PermissionUtils.kt +68 -0
- package/android/src/main/java/net/siteed/audiostudio/RecordingActionReceiver.kt +59 -0
- package/android/src/main/java/net/siteed/audiostudio/RecordingConfig.kt +259 -0
- package/android/src/main/java/net/siteed/audiostudio/WaveformConfig.kt +19 -0
- package/android/src/main/java/net/siteed/audiostudio/WaveformRenderer.kt +159 -0
- package/android/src/main/jni/AudioFeaturesJNI.cpp +152 -0
- package/android/src/main/jni/MelSpectrogramJNI.cpp +165 -0
- package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
- package/android/src/main/res/drawable/ic_microphone.xml +13 -0
- package/android/src/main/res/drawable/ic_pause.xml +10 -0
- package/android/src/main/res/drawable/ic_play.xml +10 -0
- package/android/src/main/res/drawable/ic_stop.xml +10 -0
- package/android/src/main/res/layout/notification_recording.xml +37 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +279 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +249 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +151 -0
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +273 -0
- package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +140 -0
- package/android/src/test/resources/chorus.wav +0 -0
- package/android/src/test/resources/generate_test_audio.py +94 -0
- package/android/src/test/resources/jfk.wav +0 -0
- package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
- package/android/src/test/resources/recorder_hello_world.wav +0 -0
- package/app.plugin.js +3 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js +4 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +164 -0
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +213 -0
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractAudioData.js +21 -0
- package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractMelSpectrogram.js +90 -0
- package/build/cjs/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractPreview.js +28 -0
- package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js +18 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.js +149 -0
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
- package/build/cjs/AudioDeviceManager.js +688 -0
- package/build/cjs/AudioDeviceManager.js.map +1 -0
- package/build/cjs/AudioRecorder.provider.js +78 -0
- package/build/cjs/AudioRecorder.provider.js.map +1 -0
- package/build/cjs/AudioStudio.native.js +8 -0
- package/build/cjs/AudioStudio.native.js.map +1 -0
- package/build/cjs/AudioStudio.types.js +11 -0
- package/build/cjs/AudioStudio.types.js.map +1 -0
- package/build/cjs/AudioStudio.web.js +708 -0
- package/build/cjs/AudioStudio.web.js.map +1 -0
- package/build/cjs/AudioStudioModule.js +718 -0
- package/build/cjs/AudioStudioModule.js.map +1 -0
- package/build/cjs/WebRecorder.web.js +865 -0
- package/build/cjs/WebRecorder.web.js.map +1 -0
- package/build/cjs/constants/platformLimitations.js +99 -0
- package/build/cjs/constants/platformLimitations.js.map +1 -0
- package/build/cjs/constants.js +20 -0
- package/build/cjs/constants.js.map +1 -0
- package/build/cjs/events.js +29 -0
- package/build/cjs/events.js.map +1 -0
- package/build/cjs/hooks/useAudioDevices.js +179 -0
- package/build/cjs/hooks/useAudioDevices.js.map +1 -0
- package/build/cjs/index.js +64 -0
- package/build/cjs/index.js.map +1 -0
- package/build/cjs/trimAudio.js +76 -0
- package/build/cjs/trimAudio.js.map +1 -0
- package/build/cjs/useAudioRecorder.js +535 -0
- package/build/cjs/useAudioRecorder.js.map +1 -0
- package/build/cjs/utils/BlobFix.js +502 -0
- package/build/cjs/utils/BlobFix.js.map +1 -0
- package/build/cjs/utils/audioProcessing.js +136 -0
- package/build/cjs/utils/audioProcessing.js.map +1 -0
- package/build/cjs/utils/cleanNativeOptions.js +22 -0
- package/build/cjs/utils/cleanNativeOptions.js.map +1 -0
- package/build/cjs/utils/concatenateBuffers.js +25 -0
- package/build/cjs/utils/concatenateBuffers.js.map +1 -0
- package/build/cjs/utils/convertPCMToFloat32.js +124 -0
- package/build/cjs/utils/convertPCMToFloat32.js.map +1 -0
- package/build/cjs/utils/crc32.js +52 -0
- package/build/cjs/utils/crc32.js.map +1 -0
- package/build/cjs/utils/encodingToBitDepth.js +17 -0
- package/build/cjs/utils/encodingToBitDepth.js.map +1 -0
- package/build/cjs/utils/getWavFileInfo.js +96 -0
- package/build/cjs/utils/getWavFileInfo.js.map +1 -0
- package/build/cjs/utils/writeWavHeader.js +88 -0
- package/build/cjs/utils/writeWavHeader.js.map +1 -0
- package/build/cjs/workers/InlineFeaturesExtractor.web.js +294 -0
- package/build/cjs/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/cjs/workers/inlineAudioWebWorker.web.js +190 -0
- package/build/cjs/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/build/cjs/workers/wasmGlueString.web.js +27 -0
- package/build/cjs/workers/wasmGlueString.web.js.map +1 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js +3 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js +126 -0
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -0
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js +205 -0
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/esm/AudioAnalysis/extractAudioData.js +14 -0
- package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/esm/AudioAnalysis/extractMelSpectrogram.js +86 -0
- package/build/esm/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/esm/AudioAnalysis/extractPreview.js +25 -0
- package/build/esm/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/esm/AudioAnalysis/extractWaveform.js +11 -0
- package/build/esm/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/esm/AudioAnalysis/melSpectrogramWasm.js +111 -0
- package/build/esm/AudioAnalysis/melSpectrogramWasm.js.map +1 -0
- package/build/esm/AudioDeviceManager.js +681 -0
- package/build/esm/AudioDeviceManager.js.map +1 -0
- package/build/esm/AudioRecorder.provider.js +40 -0
- package/build/esm/AudioRecorder.provider.js.map +1 -0
- package/build/esm/AudioStudio.native.js +6 -0
- package/build/esm/AudioStudio.native.js.map +1 -0
- package/build/esm/AudioStudio.types.js +8 -0
- package/build/esm/AudioStudio.types.js.map +1 -0
- package/build/esm/AudioStudio.web.js +704 -0
- package/build/esm/AudioStudio.web.js.map +1 -0
- package/build/esm/AudioStudioModule.js +713 -0
- package/build/esm/AudioStudioModule.js.map +1 -0
- package/build/esm/WebRecorder.web.js +861 -0
- package/build/esm/WebRecorder.web.js.map +1 -0
- package/build/esm/constants/platformLimitations.js +90 -0
- package/build/esm/constants/platformLimitations.js.map +1 -0
- package/build/esm/constants.js +17 -0
- package/build/esm/constants.js.map +1 -0
- package/build/esm/events.js +21 -0
- package/build/esm/events.js.map +1 -0
- package/build/esm/hooks/useAudioDevices.js +176 -0
- package/build/esm/hooks/useAudioDevices.js.map +1 -0
- package/build/esm/index.js +23 -0
- package/build/esm/index.js.map +1 -0
- package/build/esm/trimAudio.js +69 -0
- package/build/esm/trimAudio.js.map +1 -0
- package/build/esm/useAudioRecorder.js +529 -0
- package/build/esm/useAudioRecorder.js.map +1 -0
- package/build/esm/utils/BlobFix.js +498 -0
- package/build/esm/utils/BlobFix.js.map +1 -0
- package/build/esm/utils/audioProcessing.js +133 -0
- package/build/esm/utils/audioProcessing.js.map +1 -0
- package/build/esm/utils/cleanNativeOptions.js +19 -0
- package/build/esm/utils/cleanNativeOptions.js.map +1 -0
- package/build/esm/utils/concatenateBuffers.js +21 -0
- package/build/esm/utils/concatenateBuffers.js.map +1 -0
- package/build/esm/utils/convertPCMToFloat32.js +120 -0
- package/build/esm/utils/convertPCMToFloat32.js.map +1 -0
- package/build/esm/utils/crc32.js +50 -0
- package/build/esm/utils/crc32.js.map +1 -0
- package/build/esm/utils/encodingToBitDepth.js +13 -0
- package/build/esm/utils/encodingToBitDepth.js.map +1 -0
- package/build/esm/utils/getWavFileInfo.js +92 -0
- package/build/esm/utils/getWavFileInfo.js.map +1 -0
- package/build/esm/utils/writeWavHeader.js +84 -0
- package/build/esm/utils/writeWavHeader.js.map +1 -0
- package/build/esm/workers/InlineFeaturesExtractor.web.js +291 -0
- package/build/esm/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/esm/workers/inlineAudioWebWorker.web.js +187 -0
- package/build/esm/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/build/esm/workers/wasmGlueString.web.js +24 -0
- package/build/esm/workers/wasmGlueString.web.js.map +1 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +198 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +24 -0
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts +74 -0
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractAudioData.d.ts +3 -0
- package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts +20 -0
- package/build/types/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractPreview.d.ts +11 -0
- package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -0
- package/build/types/AudioAnalysis/extractWaveform.d.ts +8 -0
- package/build/types/AudioAnalysis/extractWaveform.d.ts.map +1 -0
- package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts +16 -0
- package/build/types/AudioAnalysis/melSpectrogramWasm.d.ts.map +1 -0
- package/build/types/AudioDeviceManager.d.ts +187 -0
- package/build/types/AudioDeviceManager.d.ts.map +1 -0
- package/build/types/AudioRecorder.provider.d.ts +11 -0
- package/build/types/AudioRecorder.provider.d.ts.map +1 -0
- package/build/types/AudioStudio.native.d.ts +3 -0
- package/build/types/AudioStudio.native.d.ts.map +1 -0
- package/build/types/AudioStudio.types.d.ts +760 -0
- package/build/types/AudioStudio.types.d.ts.map +1 -0
- package/build/types/AudioStudio.web.d.ts +96 -0
- package/build/types/AudioStudio.web.d.ts.map +1 -0
- package/build/types/AudioStudioModule.d.ts +3 -0
- package/build/types/AudioStudioModule.d.ts.map +1 -0
- package/build/types/WebRecorder.web.d.ts +208 -0
- package/build/types/WebRecorder.web.d.ts.map +1 -0
- package/build/types/constants/platformLimitations.d.ts +40 -0
- package/build/types/constants/platformLimitations.d.ts.map +1 -0
- package/build/types/constants.d.ts +14 -0
- package/build/types/constants.d.ts.map +1 -0
- package/build/types/events.d.ts +29 -0
- package/build/types/events.d.ts.map +1 -0
- package/build/types/hooks/useAudioDevices.d.ts +15 -0
- package/build/types/hooks/useAudioDevices.d.ts.map +1 -0
- package/build/types/index.d.ts +21 -0
- package/build/types/index.d.ts.map +1 -0
- package/build/types/trimAudio.d.ts +25 -0
- package/build/types/trimAudio.d.ts.map +1 -0
- package/build/types/useAudioRecorder.d.ts +22 -0
- package/build/types/useAudioRecorder.d.ts.map +1 -0
- package/build/types/utils/BlobFix.d.ts +9 -0
- package/build/types/utils/BlobFix.d.ts.map +1 -0
- package/build/types/utils/audioProcessing.d.ts +24 -0
- package/build/types/utils/audioProcessing.d.ts.map +1 -0
- package/build/types/utils/cleanNativeOptions.d.ts +15 -0
- package/build/types/utils/cleanNativeOptions.d.ts.map +1 -0
- package/build/types/utils/concatenateBuffers.d.ts +8 -0
- package/build/types/utils/concatenateBuffers.d.ts.map +1 -0
- package/build/types/utils/convertPCMToFloat32.d.ts +13 -0
- package/build/types/utils/convertPCMToFloat32.d.ts.map +1 -0
- package/build/types/utils/crc32.d.ts +7 -0
- package/build/types/utils/crc32.d.ts.map +1 -0
- package/build/types/utils/encodingToBitDepth.d.ts +5 -0
- package/build/types/utils/encodingToBitDepth.d.ts.map +1 -0
- package/build/types/utils/getWavFileInfo.d.ts +26 -0
- package/build/types/utils/getWavFileInfo.d.ts.map +1 -0
- package/build/types/utils/writeWavHeader.d.ts +34 -0
- package/build/types/utils/writeWavHeader.d.ts.map +1 -0
- package/build/types/workers/InlineFeaturesExtractor.web.d.ts +2 -0
- package/build/types/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
- package/build/types/workers/inlineAudioWebWorker.web.d.ts +2 -0
- package/build/types/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
- package/build/types/workers/wasmGlueString.web.d.ts +2 -0
- package/build/types/workers/wasmGlueString.web.d.ts.map +1 -0
- package/cpp/AudioFeatures.cpp +274 -0
- package/cpp/AudioFeatures.h +85 -0
- package/cpp/AudioFeaturesBridge.cpp +146 -0
- package/cpp/AudioFeaturesBridge.h +47 -0
- package/cpp/MelSpectrogram.cpp +227 -0
- package/cpp/MelSpectrogram.h +82 -0
- package/cpp/MelSpectrogramBridge.cpp +112 -0
- package/cpp/MelSpectrogramBridge.h +33 -0
- package/cpp/kiss_fft/COPYING +11 -0
- package/cpp/kiss_fft/_kiss_fft_guts.h +167 -0
- package/cpp/kiss_fft/kiss_fft.c +424 -0
- package/cpp/kiss_fft/kiss_fft.h +160 -0
- package/cpp/kiss_fft/kiss_fft_log.h +36 -0
- package/cpp/kiss_fft/kiss_fftr.c +155 -0
- package/cpp/kiss_fft/kiss_fftr.h +54 -0
- package/expo-module.config.json +10 -0
- package/ios/AudioAnalysisData.swift +74 -0
- package/ios/AudioDeviceManager.swift +670 -0
- package/ios/AudioFeaturesWrapper.h +21 -0
- package/ios/AudioFeaturesWrapper.mm +63 -0
- package/ios/AudioNotificationManager.swift +154 -0
- package/ios/AudioProcessingHelpers.swift +797 -0
- package/ios/AudioProcessor.swift +1191 -0
- package/ios/AudioStreamError.swift +7 -0
- package/ios/AudioStreamManager.swift +2369 -0
- package/ios/AudioStreamManagerDelegate.swift +16 -0
- package/ios/AudioStudio.podspec +39 -0
- package/ios/AudioStudioModule.swift +1111 -0
- package/ios/AudioStudioTests/AudioFileHandlerTests.swift +338 -0
- package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +331 -0
- package/ios/AudioStudioTests/AudioTestHelpers.swift +130 -0
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +294 -0
- package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +105 -0
- package/ios/AudioStudioTests/Info.plist +22 -0
- package/ios/AudioStudioTests/README.md +39 -0
- package/ios/AudioStudioTests/SimpleAudioTest.swift +98 -0
- package/ios/AudioStudioTests/TestAudioGenerator.swift +75 -0
- package/ios/DataPoint.swift +54 -0
- package/ios/DecodingConfig.swift +59 -0
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +95 -0
- package/ios/ISSUE_IOS.md +68 -0
- package/ios/Logger.swift +39 -0
- package/ios/MelSpectrogramWrapper.h +30 -0
- package/ios/MelSpectrogramWrapper.mm +97 -0
- package/ios/NotificationExtension.swift +15 -0
- package/ios/RecordingResult.swift +22 -0
- package/ios/RecordingSettings.swift +311 -0
- package/ios/WaveformExtractor.swift +105 -0
- package/ios/tests/README.md +41 -0
- package/ios/tests/integration/buffer_and_fallback_test.swift +178 -0
- package/ios/tests/integration/buffer_duration_test.swift +185 -0
- package/ios/tests/integration/compressed_only_output_test.swift +271 -0
- package/ios/tests/integration/output_control_test.swift +322 -0
- package/ios/tests/integration/run_integration_tests.sh +37 -0
- package/ios/tests/opus_support_test_macos.swift +154 -0
- package/ios/tests/standalone/audio_processing_test.swift +144 -0
- package/ios/tests/standalone/audio_recording_test.swift +277 -0
- package/ios/tests/standalone/audio_streaming_test.swift +249 -0
- package/ios/tests/standalone/standalone_test.swift +144 -0
- package/package.json +146 -0
- package/plugin/build/index.cjs +194 -0
- package/plugin/build/index.d.cts +22 -0
- package/plugin/build/index.js +194 -0
- package/plugin/src/index.ts +285 -0
- package/plugin/tsconfig.json +10 -0
- package/plugin/tsconfig.tsbuildinfo +1 -0
- package/prebuilt/wasm/mel-spectrogram.js +18 -0
- package/src/AudioAnalysis/AudioAnalysis.types.ts +226 -0
- package/src/AudioAnalysis/audio-features-wasm.d.ts +37 -0
- package/src/AudioAnalysis/audioFeaturesWasm.ts +200 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +350 -0
- package/src/AudioAnalysis/extractAudioData.ts +17 -0
- package/src/AudioAnalysis/extractMelSpectrogram.ts +140 -0
- package/src/AudioAnalysis/extractPreview.ts +34 -0
- package/src/AudioAnalysis/extractWaveform.ts +22 -0
- package/src/AudioAnalysis/mel-spectrogram-wasm.d.ts +48 -0
- package/src/AudioAnalysis/melSpectrogramWasm.ts +179 -0
- package/src/AudioDeviceManager.ts +800 -0
- package/src/AudioRecorder.provider.tsx +57 -0
- package/src/AudioStudio.native.ts +6 -0
- package/src/AudioStudio.types.ts +899 -0
- package/src/AudioStudio.web.ts +911 -0
- package/src/AudioStudioModule.ts +984 -0
- package/src/WebRecorder.web.ts +1114 -0
- package/src/constants/platformLimitations.ts +118 -0
- package/src/constants.ts +21 -0
- package/src/events.ts +63 -0
- package/src/hooks/useAudioDevices.ts +213 -0
- package/src/index.ts +67 -0
- package/src/trimAudio.ts +94 -0
- package/src/types/crc-32.d.ts +9 -0
- package/src/useAudioRecorder.tsx +784 -0
- package/src/utils/BlobFix.ts +561 -0
- package/src/utils/audioProcessing.ts +205 -0
- package/src/utils/cleanNativeOptions.ts +18 -0
- package/src/utils/concatenateBuffers.ts +24 -0
- package/src/utils/convertPCMToFloat32.ts +170 -0
- package/src/utils/crc32.ts +59 -0
- package/src/utils/encodingToBitDepth.ts +18 -0
- package/src/utils/getWavFileInfo.ts +132 -0
- package/src/utils/writeWavHeader.ts +115 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +291 -0
- package/src/workers/inlineAudioWebWorker.web.tsx +186 -0
- package/src/workers/wasmGlueString.web.ts +23 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { ConsoleLike } from '../AudioStudio.types';
|
|
2
|
+
export declare const WAV_HEADER_SIZE = 44;
|
|
3
|
+
export declare const convertPCMToFloat32: ({ bitDepth, buffer, skipWavHeader, logger, }: {
|
|
4
|
+
buffer: ArrayBuffer;
|
|
5
|
+
bitDepth: number;
|
|
6
|
+
skipWavHeader?: boolean;
|
|
7
|
+
logger?: ConsoleLike;
|
|
8
|
+
}) => Promise<{
|
|
9
|
+
pcmValues: Float32Array;
|
|
10
|
+
min: number;
|
|
11
|
+
max: number;
|
|
12
|
+
}>;
|
|
13
|
+
//# sourceMappingURL=convertPCMToFloat32.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"convertPCMToFloat32.d.ts","sourceRoot":"","sources":["../../../src/utils/convertPCMToFloat32.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD,eAAO,MAAM,eAAe,KAAK,CAAA;AA+DjC,eAAO,MAAM,mBAAmB,GAAU,8CAKvC;IACC,MAAM,EAAE,WAAW,CAAA;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,MAAM,CAAC,EAAE,WAAW,CAAA;CACvB,KAAG,OAAO,CAAC;IAAE,SAAS,EAAE,YAAY,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CA2FhE,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crc32.d.ts","sourceRoot":"","sources":["../../../src/utils/crc32.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,KAAK;IAClB,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,MAAM,CAAA;IACnC,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAAA;CAChC;AAsCD,QAAA,IAAI,mBAAmB,EAAE,KAAK,CAAA;AAY9B,eAAe,mBAAmB,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encodingToBitDepth.d.ts","sourceRoot":"","sources":["../../../src/utils/encodingToBitDepth.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAA;AAE7D,eAAO,MAAM,kBAAkB,GAAI,eAEhC;IACC,QAAQ,EAAE,YAAY,CAAA;CACzB,KAAG,QAWH,CAAA"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { BitDepth, SampleRate } from '../AudioStudio.types';
|
|
2
|
+
/**
|
|
3
|
+
* Interface representing the metadata of a WAV file.
|
|
4
|
+
*/
|
|
5
|
+
export interface WavFileInfo {
|
|
6
|
+
sampleRate: SampleRate;
|
|
7
|
+
numChannels: number;
|
|
8
|
+
bitDepth: BitDepth;
|
|
9
|
+
size: number;
|
|
10
|
+
durationMs: number;
|
|
11
|
+
audioFormatDescription: string;
|
|
12
|
+
byteRate: number;
|
|
13
|
+
blockAlign: number;
|
|
14
|
+
creationDateTime?: string;
|
|
15
|
+
comments?: string;
|
|
16
|
+
compressionType?: string;
|
|
17
|
+
dataChunkOffset: number;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Extracts metadata from a WAV buffer.
|
|
21
|
+
*
|
|
22
|
+
* @param arrayBuffer - The array buffer containing the WAV data.
|
|
23
|
+
* @returns A promise that resolves to the extracted metadata.
|
|
24
|
+
*/
|
|
25
|
+
export declare const getWavFileInfo: (arrayBuffer: ArrayBuffer) => Promise<WavFileInfo>;
|
|
26
|
+
//# sourceMappingURL=getWavFileInfo.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getWavFileInfo.d.ts","sourceRoot":"","sources":["../../../src/utils/getWavFileInfo.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAA;AAoB3D;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,UAAU,EAAE,UAAU,CAAA;IACtB,WAAW,EAAE,MAAM,CAAA;IACnB,QAAQ,EAAE,QAAQ,CAAA;IAClB,IAAI,EAAE,MAAM,CAAA;IACZ,UAAU,EAAE,MAAM,CAAA;IAClB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,eAAe,EAAE,MAAM,CAAA;CAC1B;AAED;;;;;GAKG;AACH,eAAO,MAAM,cAAc,GACvB,aAAa,WAAW,KACzB,OAAO,CAAC,WAAW,CAmFrB,CAAA"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Options for creating a WAV header.
|
|
3
|
+
*/
|
|
4
|
+
export interface WavHeaderOptions {
|
|
5
|
+
/** Optional buffer containing audio data. If provided, it will be combined with the header. */
|
|
6
|
+
buffer?: ArrayBuffer;
|
|
7
|
+
/** The sample rate of the audio in Hz (e.g., 44100). */
|
|
8
|
+
sampleRate: number;
|
|
9
|
+
/** The number of audio channels (e.g., 1 for mono, 2 for stereo). */
|
|
10
|
+
numChannels: number;
|
|
11
|
+
/** The bit depth of the audio (e.g., 16, 24, or 32). */
|
|
12
|
+
bitDepth: number;
|
|
13
|
+
/** Whether the audio data is in float format (only applies to 32-bit) */
|
|
14
|
+
isFloat?: boolean;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Writes or updates a WAV (RIFF) header based on the provided options.
|
|
18
|
+
*
|
|
19
|
+
* This function can be used in three ways:
|
|
20
|
+
* 1. To create a standalone WAV header (when no buffer is provided).
|
|
21
|
+
* 2. To create a WAV header and combine it with existing audio data (when a buffer without a header is provided).
|
|
22
|
+
* 3. To update an existing WAV header in the provided buffer.
|
|
23
|
+
*
|
|
24
|
+
* For streaming audio where the final size is unknown, this function sets the size fields
|
|
25
|
+
* to the maximum 32-bit value (0xFFFFFFFF). These can be updated later using the
|
|
26
|
+
* `updateWavHeaderSize` function once the final size is known.
|
|
27
|
+
*
|
|
28
|
+
* @param options - The options for creating or updating the WAV header.
|
|
29
|
+
* @returns An ArrayBuffer containing the WAV header, or the header combined with the provided audio data.
|
|
30
|
+
*
|
|
31
|
+
* @throws {Error} Throws an error if the provided options are invalid or if the buffer is too small.
|
|
32
|
+
*/
|
|
33
|
+
export declare const writeWavHeader: ({ buffer, sampleRate, numChannels, bitDepth, isFloat, }: WavHeaderOptions) => ArrayBuffer;
|
|
34
|
+
//# sourceMappingURL=writeWavHeader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"writeWavHeader.d.ts","sourceRoot":"","sources":["../../../src/utils/writeWavHeader.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,+FAA+F;IAC/F,MAAM,CAAC,EAAE,WAAW,CAAA;IACpB,wDAAwD;IACxD,UAAU,EAAE,MAAM,CAAA;IAClB,qEAAqE;IACrE,WAAW,EAAE,MAAM,CAAA;IACnB,wDAAwD;IACxD,QAAQ,EAAE,MAAM,CAAA;IAChB,yEAAyE;IACzE,OAAO,CAAC,EAAE,OAAO,CAAA;CACpB;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,cAAc,GAAI,yDAM5B,gBAAgB,KAAG,WAyErB,CAAA"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export declare const InlineFeaturesExtractor = "\n// --- Constants ---\nconst N_FFT = 1024;\nconst N_CHROMA = 12;\nconst STRUCT_SIZE = 32; // CAudioFeaturesResult\n\n// --- WASM module state ---\nlet wasmModule = null;\nlet wasmInitPromise = null;\nlet wasmFramePtr = 0;\nlet wasmFrameCapacity = 0;\nlet wasmResultPtr = 0;\n\nfunction initWasm(sampleRate) {\n if (wasmInitPromise) return wasmInitPromise;\n wasmInitPromise = (typeof createMelSpectrogramModule === 'function'\n ? createMelSpectrogramModule()\n : Promise.reject(new Error('WASM glue not loaded'))\n ).then(function(Module) {\n wasmModule = Module;\n Module._audio_features_init(sampleRate, N_FFT, 13, 26, 1, 1);\n wasmResultPtr = Module._malloc(STRUCT_SIZE);\n Module.HEAPU8.fill(0, wasmResultPtr, wasmResultPtr + STRUCT_SIZE);\n return Module;\n }).catch(function(err) {\n wasmInitPromise = null;\n throw err;\n });\n return wasmInitPromise;\n}\n\nfunction readWasmResult(Module, ptr) {\n var getValue = Module.getValue;\n var centroid = getValue(ptr, 'float');\n var flatness = getValue(ptr + 4, 'float');\n var rolloff = getValue(ptr + 8, 'float');\n var bandwidth = getValue(ptr + 12, 'float');\n var mfccPtr = getValue(ptr + 16, 'i32');\n var mfccCount = getValue(ptr + 20, 'i32');\n var chromaPtr = getValue(ptr + 24, 'i32');\n var chromaCount = getValue(ptr + 28, 'i32');\n\n var mfcc = [];\n if (mfccPtr && mfccCount > 0) {\n var off = mfccPtr >> 2;\n for (var i = 0; i < mfccCount; i++) mfcc.push(Module.HEAPF32[off + i]);\n }\n var chromagram = [];\n if (chromaPtr && chromaCount > 0) {\n var off2 = chromaPtr >> 2;\n for (var i = 0; i < chromaCount; i++) chromagram.push(Module.HEAPF32[off2 + i]);\n }\n return { centroid: centroid, flatness: flatness, rolloff: rolloff, bandwidth: bandwidth, mfcc: mfcc, chromagram: chromagram };\n}\n\n// Compute spectral/MFCC/chroma features for a segment via WASM C++\nfunction computeFeaturesWasm(segment, sampleRate, featureOptions) {\n if (!wasmModule || !wasmResultPtr) {\n return { centroid: 0, flatness: 0, rolloff: 0, bandwidth: 0, mfcc: [], chromagram: [] };\n }\n var Module = wasmModule;\n var needSpectral = featureOptions.spectralCentroid || featureOptions.spectralFlatness ||\n featureOptions.spectralRolloff || featureOptions.spectralBandwidth;\n var needMfcc = !!featureOptions.mfcc;\n var needChroma = !!featureOptions.chromagram;\n\n if (!needSpectral && !needMfcc && !needChroma) {\n return { centroid: 0, flatness: 0, rolloff: 0, bandwidth: 0, mfcc: [], chromagram: [] };\n }\n\n // Re-init if needed (different feature flags)\n Module._audio_features_init(sampleRate, N_FFT, 13, 26, needMfcc ? 1 : 0, needChroma ? 1 : 0);\n\n // Allocate/grow input buffer on WASM heap\n if (segment.length > wasmFrameCapacity) {\n if (wasmFramePtr) Module._free(wasmFramePtr);\n wasmFramePtr = Module._malloc(segment.length * 4);\n wasmFrameCapacity = segment.length;\n }\n Module.HEAPF32.set(segment, wasmFramePtr >> 2);\n\n var ok = Module._audio_features_compute_frame(wasmFramePtr, segment.length, wasmResultPtr);\n if (!ok) {\n return { centroid: 0, flatness: 0, rolloff: 0, bandwidth: 0, mfcc: [], chromagram: [] };\n }\n var result = readWasmResult(Module, wasmResultPtr);\n Module._audio_features_free_arrays(wasmResultPtr);\n return result;\n}\n\n// --- JS fallback for HNR (autocorrelation, no FFT needed) ---\nfunction extractHNR(segmentData) {\n var frameSize = segmentData.length;\n var autocorrelation = new Float32Array(frameSize);\n for (var i = 0; i < frameSize; i++) {\n var sum = 0;\n for (var j = 0; j < frameSize - i; j++) {\n sum += segmentData[j] * segmentData[j + i];\n }\n autocorrelation[i] = sum;\n }\n var maxAutocorrelation = -Infinity;\n for (var i = 1; i < autocorrelation.length; i++) {\n if (autocorrelation[i] > maxAutocorrelation) {\n maxAutocorrelation = autocorrelation[i];\n }\n }\n return autocorrelation[0] !== 0\n ? 10 * Math.log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation))\n : 0;\n}\n\n// --- JS fallback for pitch estimation (simple peak-picking) ---\nfunction estimatePitch(segment, sampleRate) {\n if (!segment || segment.length < 2 || !sampleRate) return 0;\n // Simple autocorrelation-based pitch\n var minLag = Math.floor(sampleRate / 1000); // 1000 Hz max\n var maxLag = Math.floor(sampleRate / 50); // 50 Hz min\n if (maxLag >= segment.length) maxLag = segment.length - 1;\n var bestCorr = -Infinity;\n var bestLag = 0;\n for (var lag = minLag; lag <= maxLag; lag++) {\n var corr = 0;\n for (var i = 0; i < segment.length - lag; i++) {\n corr += segment[i] * segment[i + lag];\n }\n if (corr > bestCorr) {\n bestCorr = corr;\n bestLag = lag;\n }\n }\n return bestLag > 0 ? sampleRate / bestLag : 0;\n}\n\n// --- Unique ID counter ---\nlet uniqueIdCounter = 0;\n\n// --- Message handler ---\nself.onmessage = function (event) {\n var enableLogging = event.data.enableLogging || false;\n\n // Reset command\n if (event.data.command === 'resetCounter') {\n var newValue = event.data.value;\n uniqueIdCounter = typeof newValue === 'number' ? newValue : 0;\n return;\n }\n\n var channelData = event.data.channelData;\n var sampleRate = event.data.sampleRate;\n var segmentDurationMs = event.data.segmentDurationMs;\n var bitDepth = event.data.bitDepth;\n var fullAudioDurationMs = event.data.fullAudioDurationMs;\n var numberOfChannels = event.data.numberOfChannels;\n var _features = event.data.features;\n var features = _features || {};\n var bytesPerSample = bitDepth / 8;\n\n var subChunkStartTime = (typeof fullAudioDurationMs === 'number' && !isNaN(fullAudioDurationMs) && fullAudioDurationMs >= 0)\n ? fullAudioDurationMs / 1000 : 0;\n\n // Check if any C++-backed features are requested\n var needWasm = features.spectralCentroid || features.spectralFlatness ||\n features.spectralRolloff || features.spectralBandwidth ||\n features.mfcc || features.chromagram;\n\n function createFeaturesObject(maxAmp, rms, sumSquares, zeroCrossings, segLen, wasmResult, startIdx, endIdx) {\n if (!Object.values(features).some(function(v) { return v; })) return undefined;\n var result = {};\n if (features.energy) result.energy = sumSquares;\n if (features.rms) result.rms = rms;\n result.minAmplitude = -maxAmp;\n result.maxAmplitude = maxAmp;\n if (features.zcr) result.zcr = zeroCrossings / segLen;\n if (features.spectralCentroid) result.spectralCentroid = wasmResult.centroid;\n if (features.spectralFlatness) result.spectralFlatness = wasmResult.flatness;\n if (features.spectralRolloff) result.spectralRolloff = wasmResult.rolloff;\n if (features.spectralBandwidth) result.spectralBandwidth = wasmResult.bandwidth;\n if (features.mfcc) result.mfcc = wasmResult.mfcc;\n if (features.chromagram) result.chromagram = wasmResult.chromagram;\n if (features.hnr) result.hnr = extractHNR(channelData.slice(startIdx, endIdx));\n if (features.pitch) result.pitch = estimatePitch(channelData.slice(startIdx, endIdx), sampleRate);\n return result;\n }\n\n function processSegment(startIdx, endIdx, segLen) {\n var sumSquares = 0, maxAmp = 0, zeroCrossings = 0;\n for (var j = startIdx; j < endIdx; j++) {\n var value = channelData[j];\n sumSquares += value * value;\n if (Math.abs(value) > maxAmp) maxAmp = Math.abs(value);\n if (j > 0 && value * channelData[j - 1] < 0) zeroCrossings++;\n }\n var rms = Math.sqrt(sumSquares / segLen);\n var wasmResult = needWasm\n ? computeFeaturesWasm(channelData.slice(startIdx, endIdx), sampleRate, features)\n : { centroid: 0, flatness: 0, rolloff: 0, bandwidth: 0, mfcc: [], chromagram: [] };\n\n var dataPoint = {\n id: uniqueIdCounter++,\n amplitude: maxAmp,\n rms: rms,\n startTime: subChunkStartTime + (startIdx / sampleRate),\n endTime: subChunkStartTime + (endIdx / sampleRate),\n dB: 20 * Math.log10(rms + 1e-6),\n silent: rms < 0.01,\n startPosition: startIdx * (numberOfChannels || 1) * bytesPerSample,\n endPosition: endIdx * (numberOfChannels || 1) * bytesPerSample,\n samples: segLen,\n };\n var ef = createFeaturesObject(maxAmp, rms, sumSquares, zeroCrossings, segLen, wasmResult, startIdx, endIdx);\n if (ef) dataPoint.features = ef;\n return dataPoint;\n }\n\n function extractWaveform() {\n var totalSamples = channelData.length;\n var durationMs = (totalSamples / sampleRate) * 1000;\n var samplesPerSegment = Math.floor(sampleRate * (segmentDurationMs / 1000));\n var numPoints = Math.floor(totalSamples / samplesPerSegment);\n var remainingSamples = totalSamples % samplesPerSegment;\n\n var min = Infinity, max = -Infinity;\n for (var i = 0; i < totalSamples; i++) {\n if (channelData[i] < min) min = channelData[i];\n if (channelData[i] > max) max = channelData[i];\n }\n\n var dataPoints = [];\n for (var i = 0; i < numPoints; i++) {\n var startIdx = i * samplesPerSegment;\n dataPoints.push(processSegment(startIdx, startIdx + samplesPerSegment, samplesPerSegment));\n }\n if (remainingSamples > samplesPerSegment / 4) {\n var startIdx = numPoints * samplesPerSegment;\n dataPoints.push(processSegment(startIdx, totalSamples, totalSamples - startIdx));\n }\n return {\n durationMs: durationMs,\n dataPoints: dataPoints,\n amplitudeRange: { min: min, max: max },\n rmsRange: { min: 0, max: Math.max(Math.abs(min), Math.abs(max)) }\n };\n }\n\n // Main: init WASM if needed, then process\n var doProcess = function() {\n try {\n var t0 = performance.now();\n var result = extractWaveform();\n var t1 = performance.now();\n self.postMessage({\n command: 'features',\n result: {\n bitDepth: bitDepth,\n samples: channelData.length,\n numberOfChannels: numberOfChannels,\n sampleRate: sampleRate,\n segmentDurationMs: segmentDurationMs,\n durationMs: result.durationMs,\n dataPoints: result.dataPoints,\n amplitudeRange: result.amplitudeRange,\n rmsRange: result.rmsRange,\n extractionTimeMs: t1 - t0,\n }\n });\n } catch (error) {\n console.error('[Worker] Error', { message: error.message, stack: error.stack });\n self.postMessage({ error: { message: error.message, stack: error.stack, name: error.name } });\n }\n };\n\n if (needWasm && !wasmModule) {\n initWasm(sampleRate).then(doProcess).catch(function(e) {\n console.error('[Worker] WASM init failed, processing without WASM:', e);\n needWasm = false;\n doProcess();\n });\n } else {\n doProcess();\n }\n};\n";
|
|
2
|
+
//# sourceMappingURL=InlineFeaturesExtractor.web.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"InlineFeaturesExtractor.web.d.ts","sourceRoot":"","sources":["../../../src/workers/InlineFeaturesExtractor.web.tsx"],"names":[],"mappings":"AAQA,eAAO,MAAM,uBAAuB,w/WA0RnC,CAAA"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export declare const InlineAudioWebWorker = "\nconst DEFAULT_BIT_DEPTH = 32\nconst DEFAULT_SAMPLE_RATE = 44100\n\nclass RecorderProcessor extends AudioWorkletProcessor {\n constructor() {\n super()\n this.currentChunk = [] // Float32Array\n this.samplesSinceLastExport = 0\n this.recordSampleRate = DEFAULT_SAMPLE_RATE\n this.exportSampleRate = DEFAULT_SAMPLE_RATE\n this.recordBitDepth = DEFAULT_BIT_DEPTH\n this.exportBitDepth = DEFAULT_BIT_DEPTH\n this.numberOfChannels = 1\n this.isRecording = true\n this.port.onmessage = this.handleMessage.bind(this)\n this.enableLogging = false\n this.exportIntervalSamples = 0\n this.currentPosition = 0 // Track current position in seconds\n this.streamFormat = 'raw'\n }\n\n handleMessage(event) {\n switch (event.data.command) {\n case 'init':\n this.enableLogging = event.data.enableLogging || false\n this.recordSampleRate = event.data.recordSampleRate\n this.exportSampleRate =\n event.data.exportSampleRate || event.data.recordSampleRate\n this.exportIntervalSamples =\n this.recordSampleRate * (event.data.interval / 1000)\n if (event.data.numberOfChannels) {\n this.numberOfChannels = event.data.numberOfChannels\n }\n if (event.data.recordBitDepth) {\n this.recordBitDepth = event.data.recordBitDepth\n }\n this.exportBitDepth =\n event.data.exportBitDepth || this.recordBitDepth\n this.streamFormat = event.data.streamFormat || 'raw'\n \n // Handle position parameter for device switching\n if (typeof event.data.position === 'number' && event.data.position > 0) {\n this.currentPosition = event.data.position\n if (this.enableLogging) {\n console.log('AudioWorklet initialized with position:', this.currentPosition)\n }\n }\n break\n\n case 'stop':\n this.isRecording = false\n if (this.currentChunk.length > 0) {\n this.processChunk()\n }\n break\n \n case 'pause':\n // Just a placeholder for pause handling\n break\n \n case 'resume':\n // Just a placeholder for resume handling\n break\n }\n }\n\n process(inputs, _outputs, _parameters) {\n if (!this.isRecording) return true\n const input = inputs[0]\n if (input.length > 0) {\n const newBuffer = new Float32Array(input[0])\n this.currentChunk.push(newBuffer)\n this.samplesSinceLastExport += newBuffer.length\n\n if (this.samplesSinceLastExport >= this.exportIntervalSamples) {\n this.processChunk()\n this.samplesSinceLastExport = 0\n }\n }\n return true\n }\n\n mergeBuffers(bufferArray, recLength) {\n const result = new Float32Array(recLength)\n let offset = 0\n for (let i = 0; i < bufferArray.length; i++) {\n result.set(bufferArray[i], offset)\n offset += bufferArray[i].length\n }\n return result\n }\n\n // Keep basic resampling for sample rate conversion\n resample(samples, targetSampleRate) {\n if (this.recordSampleRate === targetSampleRate) {\n return samples\n }\n const resampledBuffer = new Float32Array(\n Math.ceil(\n (samples.length * targetSampleRate) / this.recordSampleRate\n )\n )\n const ratio = this.recordSampleRate / targetSampleRate\n let offset = 0\n for (let i = 0; i < resampledBuffer.length; i++) {\n const nextOffset = Math.floor((i + 1) * ratio)\n let accum = 0\n let count = 0\n for (let j = offset; j < nextOffset && j < samples.length; j++) {\n accum += samples[j]\n count++\n }\n resampledBuffer[i] = count > 0 ? accum / count : 0\n offset = nextOffset\n }\n return resampledBuffer\n }\n\n // Keep bit depth conversion if needed\n convertBitDepth(input, targetBitDepth) {\n if (targetBitDepth === 32) {\n const output = new Int32Array(input.length)\n for (let i = 0; i < input.length; i++) {\n const s = Math.max(-1, Math.min(1, input[i]))\n output[i] = s < 0 ? s * 0x80000000 : s * 0x7fffffff\n }\n return output\n } else if (targetBitDepth === 16) {\n const output = new Int16Array(input.length)\n for (let i = 0; i < input.length; i++) {\n const s = Math.max(-1, Math.min(1, input[i]))\n output[i] = s < 0 ? s * 0x8000 : s * 0x7fff\n }\n return output\n }\n return input\n }\n\n processChunk() {\n if (this.currentChunk.length === 0) return\n\n // Merge buffers\n const chunkLength = this.currentChunk.reduce(\n (acc, buf) => acc + buf.length,\n 0\n )\n const mergedChunk = this.mergeBuffers(this.currentChunk, chunkLength)\n\n // Resample if needed\n const resampledChunk = this.resample(mergedChunk, this.exportSampleRate)\n\n // Convert bit depth if needed (used for file storage format)\n const finalBuffer =\n this.recordBitDepth !== this.exportBitDepth\n ? this.convertBitDepth(resampledChunk, this.exportBitDepth)\n : resampledChunk\n\n // For float32 stream format, send the resampled Float32 data directly\n // (skipping the bit-depth conversion roundtrip)\n const streamData = this.streamFormat === 'float32' ? resampledChunk : finalBuffer\n\n // Calculate the duration in seconds\n const chunkDuration = resampledChunk.length / this.exportSampleRate\n\n // Send processed chunk with the current position\n this.port.postMessage({\n command: 'newData',\n recordedData: streamData,\n sampleRate: this.exportSampleRate,\n bitDepth: this.exportBitDepth,\n numberOfChannels: this.numberOfChannels,\n position: this.currentPosition,\n })\n \n // Update the position\n this.currentPosition += chunkDuration\n\n // Clear the current chunk\n this.currentChunk = []\n }\n}\n\nregisterProcessor('recorder-processor', RecorderProcessor)\n";
|
|
2
|
+
//# sourceMappingURL=inlineAudioWebWorker.web.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"inlineAudioWebWorker.web.d.ts","sourceRoot":"","sources":["../../../src/workers/inlineAudioWebWorker.web.tsx"],"names":[],"mappings":"AACA,eAAO,MAAM,oBAAoB,4tNAwLhC,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wasmGlueString.web.d.ts","sourceRoot":"","sources":["../../../src/workers/wasmGlueString.web.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,UAAU,EAAE,MAkBxB,CAAC"}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
#include "AudioFeatures.h"
|
|
2
|
+
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include <cstring>
|
|
5
|
+
|
|
6
|
+
#ifndef M_PI
|
|
7
|
+
#define M_PI 3.14159265358979323846
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
AudioFeaturesProcessor::AudioFeaturesProcessor(const AudioFeaturesConfig& config)
|
|
11
|
+
: config_(config), fftCfg_(nullptr) {
|
|
12
|
+
if (config_.sampleRate <= 0) config_.sampleRate = 16000;
|
|
13
|
+
if (config_.fftLength <= 0) config_.fftLength = 1024;
|
|
14
|
+
if (config_.nMfcc <= 0) config_.nMfcc = 13;
|
|
15
|
+
if (config_.nMelFilters <= 0) config_.nMelFilters = 26;
|
|
16
|
+
numBins_ = config_.fftLength / 2 + 1;
|
|
17
|
+
fftCfg_ = kiss_fftr_alloc(config_.fftLength, 0, nullptr, nullptr);
|
|
18
|
+
buildWindow();
|
|
19
|
+
if (config_.computeMfcc) {
|
|
20
|
+
buildMelFilterbank();
|
|
21
|
+
buildDCTMatrix();
|
|
22
|
+
}
|
|
23
|
+
allocateBuffers();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
AudioFeaturesProcessor::~AudioFeaturesProcessor() {
|
|
27
|
+
if (fftCfg_) {
|
|
28
|
+
free(fftCfg_);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
void AudioFeaturesProcessor::allocateBuffers() {
|
|
33
|
+
fftInput_.resize(config_.fftLength, 0.0f);
|
|
34
|
+
fftOutput_.resize(numBins_);
|
|
35
|
+
magnitudeSpectrum_.resize(numBins_, 0.0f);
|
|
36
|
+
powerSpectrum_.resize(numBins_, 0.0f);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
float AudioFeaturesProcessor::hzToMel(float hz) {
|
|
40
|
+
return 2595.0f * std::log10(1.0f + hz / 700.0f);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
float AudioFeaturesProcessor::melToHz(float mel) {
|
|
44
|
+
return 700.0f * (std::pow(10.0f, mel / 2595.0f) - 1.0f);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
void AudioFeaturesProcessor::buildWindow() {
|
|
48
|
+
window_.resize(config_.fftLength);
|
|
49
|
+
const float N = static_cast<float>(config_.fftLength - 1);
|
|
50
|
+
for (int i = 0; i < config_.fftLength; ++i) {
|
|
51
|
+
// Hann window
|
|
52
|
+
window_[i] = 0.5f * (1.0f - std::cos(2.0f * static_cast<float>(M_PI) * i / N));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void AudioFeaturesProcessor::buildMelFilterbank() {
|
|
57
|
+
const float fMin = 0.0f;
|
|
58
|
+
const float fMax = static_cast<float>(config_.sampleRate) / 2.0f;
|
|
59
|
+
const float melMin = hzToMel(fMin);
|
|
60
|
+
const float melMax = hzToMel(fMax);
|
|
61
|
+
|
|
62
|
+
// nMelFilters + 2 points for triangular filters
|
|
63
|
+
const int nPoints = config_.nMelFilters + 2;
|
|
64
|
+
std::vector<float> melPoints(nPoints);
|
|
65
|
+
for (int i = 0; i < nPoints; ++i) {
|
|
66
|
+
float mel = melMin + i * (melMax - melMin) / (config_.nMelFilters + 1);
|
|
67
|
+
melPoints[i] = melToHz(mel);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const float binWidth = static_cast<float>(config_.sampleRate) / config_.fftLength;
|
|
71
|
+
|
|
72
|
+
melFilters_.resize(config_.nMelFilters);
|
|
73
|
+
for (int m = 0; m < config_.nMelFilters; ++m) {
|
|
74
|
+
const float fLow = melPoints[m];
|
|
75
|
+
const float fCenter = melPoints[m + 1];
|
|
76
|
+
const float fHigh = melPoints[m + 2];
|
|
77
|
+
|
|
78
|
+
int binStart = std::max(0, static_cast<int>(std::ceil(fLow / binWidth)));
|
|
79
|
+
int binEnd = std::min(numBins_ - 1, static_cast<int>(std::floor(fHigh / binWidth)));
|
|
80
|
+
|
|
81
|
+
melFilters_[m].startBin = binStart;
|
|
82
|
+
const int count = binEnd - binStart + 1;
|
|
83
|
+
melFilters_[m].weights.resize(count > 0 ? count : 0);
|
|
84
|
+
|
|
85
|
+
for (int bin = binStart; bin <= binEnd; ++bin) {
|
|
86
|
+
float freq = static_cast<float>(bin) * binWidth;
|
|
87
|
+
float weight;
|
|
88
|
+
if (freq <= fCenter) {
|
|
89
|
+
weight = (fCenter - fLow) > 0.0f ? (freq - fLow) / (fCenter - fLow) : 0.0f;
|
|
90
|
+
} else {
|
|
91
|
+
weight = (fHigh - fCenter) > 0.0f ? (fHigh - freq) / (fHigh - fCenter) : 0.0f;
|
|
92
|
+
}
|
|
93
|
+
melFilters_[m].weights[bin - binStart] = std::max(0.0f, weight);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
void AudioFeaturesProcessor::buildDCTMatrix() {
|
|
99
|
+
// Precompute DCT-II matrix: dct[i][j] = scale * cos(pi * i * (2*j + 1) / (2*N))
|
|
100
|
+
const int N = config_.nMelFilters;
|
|
101
|
+
const int K = config_.nMfcc;
|
|
102
|
+
const float scale = std::sqrt(2.0f / N);
|
|
103
|
+
|
|
104
|
+
dctMatrix_.resize(K * N);
|
|
105
|
+
for (int i = 0; i < K; ++i) {
|
|
106
|
+
for (int j = 0; j < N; ++j) {
|
|
107
|
+
dctMatrix_[i * N + j] = scale * std::cos(
|
|
108
|
+
static_cast<float>(M_PI) * i * (2 * j + 1) / (2.0f * N)
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
void AudioFeaturesProcessor::computeFFT(const float* samples, int numSamples) {
|
|
115
|
+
float* fftIn = fftInput_.data();
|
|
116
|
+
|
|
117
|
+
// Zero the buffer
|
|
118
|
+
std::memset(fftIn, 0, config_.fftLength * sizeof(float));
|
|
119
|
+
|
|
120
|
+
// Apply window to input (truncate or zero-pad as needed)
|
|
121
|
+
const int len = std::min(numSamples, config_.fftLength);
|
|
122
|
+
for (int i = 0; i < len; ++i) {
|
|
123
|
+
fftIn[i] = samples[i] * window_[i];
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Compute real FFT
|
|
127
|
+
kiss_fftr(fftCfg_, fftIn, fftOutput_.data());
|
|
128
|
+
|
|
129
|
+
// Compute magnitude and power spectra
|
|
130
|
+
for (int i = 0; i < numBins_; ++i) {
|
|
131
|
+
float re = fftOutput_[i].r;
|
|
132
|
+
float im = fftOutput_[i].i;
|
|
133
|
+
float power = re * re + im * im;
|
|
134
|
+
powerSpectrum_[i] = power;
|
|
135
|
+
magnitudeSpectrum_[i] = std::sqrt(power);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
float AudioFeaturesProcessor::computeSpectralCentroid() const {
|
|
140
|
+
float sum = 0.0f;
|
|
141
|
+
float weightedSum = 0.0f;
|
|
142
|
+
const float binToFreq = static_cast<float>(config_.sampleRate) / config_.fftLength;
|
|
143
|
+
|
|
144
|
+
for (int i = 0; i < numBins_; ++i) {
|
|
145
|
+
float freq = i * binToFreq;
|
|
146
|
+
weightedSum += freq * magnitudeSpectrum_[i];
|
|
147
|
+
sum += magnitudeSpectrum_[i];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return sum > 0.0f ? weightedSum / sum : 0.0f;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
float AudioFeaturesProcessor::computeSpectralFlatness() const {
|
|
154
|
+
const float eps = 1e-10f;
|
|
155
|
+
float sumLog = 0.0f;
|
|
156
|
+
float sum = 0.0f;
|
|
157
|
+
|
|
158
|
+
for (int i = 0; i < numBins_; ++i) {
|
|
159
|
+
sumLog += std::log(powerSpectrum_[i] + eps);
|
|
160
|
+
sum += powerSpectrum_[i];
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
float geometricMean = std::exp(sumLog / numBins_);
|
|
164
|
+
float arithmeticMean = sum / numBins_;
|
|
165
|
+
|
|
166
|
+
return arithmeticMean > 0.0f ? geometricMean / arithmeticMean : 0.0f;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
float AudioFeaturesProcessor::computeSpectralRolloff() const {
|
|
170
|
+
float totalEnergy = 0.0f;
|
|
171
|
+
for (int i = 0; i < numBins_; ++i) {
|
|
172
|
+
totalEnergy += magnitudeSpectrum_[i];
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const float threshold = totalEnergy * 0.85f;
|
|
176
|
+
float cumulativeEnergy = 0.0f;
|
|
177
|
+
const float binToFreq = static_cast<float>(config_.sampleRate) / config_.fftLength;
|
|
178
|
+
|
|
179
|
+
for (int i = 0; i < numBins_; ++i) {
|
|
180
|
+
cumulativeEnergy += magnitudeSpectrum_[i];
|
|
181
|
+
if (cumulativeEnergy >= threshold) {
|
|
182
|
+
return i * binToFreq;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return 0.0f;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
float AudioFeaturesProcessor::computeSpectralBandwidth(float centroid) const {
|
|
190
|
+
float sum = 0.0f;
|
|
191
|
+
float weightedSum = 0.0f;
|
|
192
|
+
const float binToFreq = static_cast<float>(config_.sampleRate) / config_.fftLength;
|
|
193
|
+
|
|
194
|
+
for (int i = 0; i < numBins_; ++i) {
|
|
195
|
+
float freq = i * binToFreq;
|
|
196
|
+
float diff = freq - centroid;
|
|
197
|
+
weightedSum += magnitudeSpectrum_[i] * diff * diff;
|
|
198
|
+
sum += magnitudeSpectrum_[i];
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return sum > 0.0f ? std::sqrt(weightedSum / sum) : 0.0f;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
void AudioFeaturesProcessor::computeMFCC(std::vector<float>& mfcc) const {
|
|
205
|
+
const int N = config_.nMelFilters;
|
|
206
|
+
const int K = config_.nMfcc;
|
|
207
|
+
mfcc.resize(K);
|
|
208
|
+
|
|
209
|
+
// Apply mel filterbank to power spectrum -> log mel energies
|
|
210
|
+
std::vector<float> logMelEnergies(N);
|
|
211
|
+
for (int m = 0; m < N; ++m) {
|
|
212
|
+
const MelFilter& filter = melFilters_[m];
|
|
213
|
+
const int count = static_cast<int>(filter.weights.size());
|
|
214
|
+
float energy = 0.0f;
|
|
215
|
+
const float* w = filter.weights.data();
|
|
216
|
+
const float* p = powerSpectrum_.data() + filter.startBin;
|
|
217
|
+
for (int k = 0; k < count; ++k) {
|
|
218
|
+
energy += p[k] * w[k];
|
|
219
|
+
}
|
|
220
|
+
logMelEnergies[m] = std::log(std::max(energy, 1e-10f));
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Apply precomputed DCT matrix
|
|
224
|
+
for (int i = 0; i < K; ++i) {
|
|
225
|
+
float sum = 0.0f;
|
|
226
|
+
const float* dctRow = dctMatrix_.data() + i * N;
|
|
227
|
+
for (int j = 0; j < N; ++j) {
|
|
228
|
+
sum += logMelEnergies[j] * dctRow[j];
|
|
229
|
+
}
|
|
230
|
+
mfcc[i] = sum;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
void AudioFeaturesProcessor::computeChromagram(std::vector<float>& chroma) const {
|
|
235
|
+
chroma.assign(12, 0.0f);
|
|
236
|
+
const float binToFreq = static_cast<float>(config_.sampleRate) / config_.fftLength;
|
|
237
|
+
|
|
238
|
+
for (int i = 1; i < numBins_; ++i) { // skip DC bin
|
|
239
|
+
float freq = i * binToFreq;
|
|
240
|
+
if (freq < 20.0f) continue; // skip sub-audible frequencies
|
|
241
|
+
|
|
242
|
+
// Map frequency to pitch class: MIDI note = 69 + 12*log2(freq/440)
|
|
243
|
+
float midiNote = 69.0f + 12.0f * std::log2(freq / 440.0f);
|
|
244
|
+
int pitchClass = static_cast<int>(std::round(midiNote)) % 12;
|
|
245
|
+
if (pitchClass < 0) pitchClass += 12;
|
|
246
|
+
|
|
247
|
+
chroma[pitchClass] += magnitudeSpectrum_[i];
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
AudioFeaturesResult AudioFeaturesProcessor::compute(const float* samples, int numSamples) {
|
|
252
|
+
AudioFeaturesResult result;
|
|
253
|
+
|
|
254
|
+
// Single FFT pass for all features
|
|
255
|
+
computeFFT(samples, numSamples);
|
|
256
|
+
|
|
257
|
+
// Spectral features (always computed)
|
|
258
|
+
result.spectralCentroid = computeSpectralCentroid();
|
|
259
|
+
result.spectralFlatness = computeSpectralFlatness();
|
|
260
|
+
result.spectralRolloff = computeSpectralRolloff();
|
|
261
|
+
result.spectralBandwidth = computeSpectralBandwidth(result.spectralCentroid);
|
|
262
|
+
|
|
263
|
+
// MFCC (optional)
|
|
264
|
+
if (config_.computeMfcc) {
|
|
265
|
+
computeMFCC(result.mfcc);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Chromagram (optional)
|
|
269
|
+
if (config_.computeChroma) {
|
|
270
|
+
computeChromagram(result.chromagram);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return result;
|
|
274
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include "kiss_fft/kiss_fft.h"
|
|
6
|
+
#include "kiss_fft/kiss_fftr.h"
|
|
7
|
+
|
|
8
|
+
struct AudioFeaturesConfig {
|
|
9
|
+
int sampleRate;
|
|
10
|
+
int fftLength = 1024;
|
|
11
|
+
int nMfcc = 13; // Number of MFCC coefficients to output
|
|
12
|
+
int nMelFilters = 26; // Number of mel filters for MFCC
|
|
13
|
+
bool computeMfcc = true;
|
|
14
|
+
bool computeChroma = true;
|
|
15
|
+
|
|
16
|
+
bool operator==(const AudioFeaturesConfig& other) const {
|
|
17
|
+
return sampleRate == other.sampleRate &&
|
|
18
|
+
fftLength == other.fftLength &&
|
|
19
|
+
nMfcc == other.nMfcc &&
|
|
20
|
+
nMelFilters == other.nMelFilters &&
|
|
21
|
+
computeMfcc == other.computeMfcc &&
|
|
22
|
+
computeChroma == other.computeChroma;
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
struct AudioFeaturesResult {
|
|
27
|
+
float spectralCentroid;
|
|
28
|
+
float spectralFlatness;
|
|
29
|
+
float spectralRolloff;
|
|
30
|
+
float spectralBandwidth;
|
|
31
|
+
std::vector<float> mfcc; // nMfcc coefficients
|
|
32
|
+
std::vector<float> chromagram; // 12 bins
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
class AudioFeaturesProcessor {
|
|
36
|
+
public:
|
|
37
|
+
AudioFeaturesProcessor(const AudioFeaturesConfig& config);
|
|
38
|
+
~AudioFeaturesProcessor();
|
|
39
|
+
|
|
40
|
+
// Non-copyable (owns FFT plan)
|
|
41
|
+
AudioFeaturesProcessor(const AudioFeaturesProcessor&) = delete;
|
|
42
|
+
AudioFeaturesProcessor& operator=(const AudioFeaturesProcessor&) = delete;
|
|
43
|
+
|
|
44
|
+
AudioFeaturesResult compute(const float* samples, int numSamples);
|
|
45
|
+
|
|
46
|
+
const AudioFeaturesConfig& config() const { return config_; }
|
|
47
|
+
|
|
48
|
+
private:
|
|
49
|
+
AudioFeaturesConfig config_;
|
|
50
|
+
int numBins_; // fftLength / 2 + 1
|
|
51
|
+
|
|
52
|
+
// FFT resources
|
|
53
|
+
kiss_fftr_cfg fftCfg_;
|
|
54
|
+
std::vector<float> window_;
|
|
55
|
+
std::vector<float> fftInput_;
|
|
56
|
+
std::vector<kiss_fft_cpx> fftOutput_;
|
|
57
|
+
std::vector<float> magnitudeSpectrum_;
|
|
58
|
+
std::vector<float> powerSpectrum_;
|
|
59
|
+
|
|
60
|
+
// Mel filterbank for MFCC (sparse, same pattern as MelSpectrogram)
|
|
61
|
+
struct MelFilter {
|
|
62
|
+
int startBin;
|
|
63
|
+
std::vector<float> weights;
|
|
64
|
+
};
|
|
65
|
+
std::vector<MelFilter> melFilters_;
|
|
66
|
+
|
|
67
|
+
// DCT matrix for MFCC (precomputed)
|
|
68
|
+
std::vector<float> dctMatrix_; // [nMfcc * nMelFilters]
|
|
69
|
+
|
|
70
|
+
void buildWindow();
|
|
71
|
+
void buildMelFilterbank();
|
|
72
|
+
void buildDCTMatrix();
|
|
73
|
+
void allocateBuffers();
|
|
74
|
+
|
|
75
|
+
void computeFFT(const float* samples, int numSamples);
|
|
76
|
+
float computeSpectralCentroid() const;
|
|
77
|
+
float computeSpectralFlatness() const;
|
|
78
|
+
float computeSpectralRolloff() const;
|
|
79
|
+
float computeSpectralBandwidth(float centroid) const;
|
|
80
|
+
void computeMFCC(std::vector<float>& mfcc) const;
|
|
81
|
+
void computeChromagram(std::vector<float>& chroma) const;
|
|
82
|
+
|
|
83
|
+
static float hzToMel(float hz);
|
|
84
|
+
static float melToHz(float mel);
|
|
85
|
+
};
|