@siteed/expo-audio-studio 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +210 -0
- package/LICENSE +21 -0
- package/README.md +269 -0
- package/android/build.gradle +105 -0
- package/android/src/main/AndroidManifest.xml +27 -0
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +166 -0
- package/android/src/main/java/net/siteed/audiostream/AudioDataEncoder.kt +9 -0
- package/android/src/main/java/net/siteed/audiostream/AudioFileHandler.kt +131 -0
- package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +103 -0
- package/android/src/main/java/net/siteed/audiostream/AudioNotificationsManager.kt +435 -0
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +2235 -0
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +1437 -0
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +166 -0
- package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +1099 -0
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +21 -0
- package/android/src/main/java/net/siteed/audiostream/EventSender.kt +7 -0
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +739 -0
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +99 -0
- package/android/src/main/java/net/siteed/audiostream/Features.kt +98 -0
- package/android/src/main/java/net/siteed/audiostream/NotificationConfig.kt +70 -0
- package/android/src/main/java/net/siteed/audiostream/PermissionUtils.kt +59 -0
- package/android/src/main/java/net/siteed/audiostream/RecordingActionReceiver.kt +59 -0
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +205 -0
- package/android/src/main/java/net/siteed/audiostream/WaveformConfig.kt +19 -0
- package/android/src/main/java/net/siteed/audiostream/WaveformRenderer.kt +159 -0
- package/android/src/main/res/drawable/ic_default_action_icon.xml +16 -0
- package/android/src/main/res/drawable/ic_microphone.xml +13 -0
- package/android/src/main/res/drawable/ic_pause.xml +10 -0
- package/android/src/main/res/drawable/ic_play.xml +10 -0
- package/android/src/main/res/drawable/ic_stop.xml +10 -0
- package/android/src/main/res/layout/notification_recording.xml +37 -0
- package/android/src/main/test/java/net/siteed/audiostream/AudioProcessorTest.kt +56 -0
- package/app.plugin.js +1 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +179 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
- package/build/AudioAnalysis/AudioAnalysis.types.js +3 -0
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +68 -0
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
- package/build/AudioAnalysis/extractAudioAnalysis.js +203 -0
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
- package/build/AudioAnalysis/extractAudioData.d.ts +3 -0
- package/build/AudioAnalysis/extractAudioData.d.ts.map +1 -0
- package/build/AudioAnalysis/extractAudioData.js +5 -0
- package/build/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts +14 -0
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
- package/build/AudioAnalysis/extractMelSpectrogram.js +85 -0
- package/build/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/AudioAnalysis/extractPreview.d.ts +11 -0
- package/build/AudioAnalysis/extractPreview.d.ts.map +1 -0
- package/build/AudioAnalysis/extractPreview.js +25 -0
- package/build/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/AudioAnalysis/extractWaveform.d.ts +8 -0
- package/build/AudioAnalysis/extractWaveform.d.ts.map +1 -0
- package/build/AudioAnalysis/extractWaveform.js +11 -0
- package/build/AudioAnalysis/extractWaveform.js.map +1 -0
- package/build/AudioRecorder.provider.d.ts +11 -0
- package/build/AudioRecorder.provider.d.ts.map +1 -0
- package/build/AudioRecorder.provider.js +37 -0
- package/build/AudioRecorder.provider.js.map +1 -0
- package/build/ExpoAudioStream.native.d.ts +3 -0
- package/build/ExpoAudioStream.native.d.ts.map +1 -0
- package/build/ExpoAudioStream.native.js +6 -0
- package/build/ExpoAudioStream.native.js.map +1 -0
- package/build/ExpoAudioStream.types.d.ts +532 -0
- package/build/ExpoAudioStream.types.d.ts.map +1 -0
- package/build/ExpoAudioStream.types.js +2 -0
- package/build/ExpoAudioStream.types.js.map +1 -0
- package/build/ExpoAudioStream.web.d.ts +59 -0
- package/build/ExpoAudioStream.web.d.ts.map +1 -0
- package/build/ExpoAudioStream.web.js +285 -0
- package/build/ExpoAudioStream.web.js.map +1 -0
- package/build/ExpoAudioStreamModule.d.ts +3 -0
- package/build/ExpoAudioStreamModule.d.ts.map +1 -0
- package/build/ExpoAudioStreamModule.js +693 -0
- package/build/ExpoAudioStreamModule.js.map +1 -0
- package/build/WebRecorder.web.d.ts +119 -0
- package/build/WebRecorder.web.d.ts.map +1 -0
- package/build/WebRecorder.web.js +436 -0
- package/build/WebRecorder.web.js.map +1 -0
- package/build/constants.d.ts +11 -0
- package/build/constants.d.ts.map +1 -0
- package/build/constants.js +14 -0
- package/build/constants.js.map +1 -0
- package/build/events.d.ts +26 -0
- package/build/events.d.ts.map +1 -0
- package/build/events.js +21 -0
- package/build/events.js.map +1 -0
- package/build/index.d.ts +15 -0
- package/build/index.d.ts.map +1 -0
- package/build/index.js +14 -0
- package/build/index.js.map +1 -0
- package/build/trimAudio.d.ts +25 -0
- package/build/trimAudio.d.ts.map +1 -0
- package/build/trimAudio.js +67 -0
- package/build/trimAudio.js.map +1 -0
- package/build/useAudioRecorder.d.ts +21 -0
- package/build/useAudioRecorder.d.ts.map +1 -0
- package/build/useAudioRecorder.js +427 -0
- package/build/useAudioRecorder.js.map +1 -0
- package/build/utils/BlobFix.d.ts +9 -0
- package/build/utils/BlobFix.d.ts.map +1 -0
- package/build/utils/BlobFix.js +498 -0
- package/build/utils/BlobFix.js.map +1 -0
- package/build/utils/audioProcessing.d.ts +24 -0
- package/build/utils/audioProcessing.d.ts.map +1 -0
- package/build/utils/audioProcessing.js +133 -0
- package/build/utils/audioProcessing.js.map +1 -0
- package/build/utils/concatenateBuffers.d.ts +8 -0
- package/build/utils/concatenateBuffers.d.ts.map +1 -0
- package/build/utils/concatenateBuffers.js +21 -0
- package/build/utils/concatenateBuffers.js.map +1 -0
- package/build/utils/convertPCMToFloat32.d.ts +13 -0
- package/build/utils/convertPCMToFloat32.d.ts.map +1 -0
- package/build/utils/convertPCMToFloat32.js +120 -0
- package/build/utils/convertPCMToFloat32.js.map +1 -0
- package/build/utils/encodingToBitDepth.d.ts +5 -0
- package/build/utils/encodingToBitDepth.d.ts.map +1 -0
- package/build/utils/encodingToBitDepth.js +13 -0
- package/build/utils/encodingToBitDepth.js.map +1 -0
- package/build/utils/getWavFileInfo.d.ts +26 -0
- package/build/utils/getWavFileInfo.d.ts.map +1 -0
- package/build/utils/getWavFileInfo.js +92 -0
- package/build/utils/getWavFileInfo.js.map +1 -0
- package/build/utils/writeWavHeader.d.ts +49 -0
- package/build/utils/writeWavHeader.d.ts.map +1 -0
- package/build/utils/writeWavHeader.js +91 -0
- package/build/utils/writeWavHeader.js.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts +2 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.js +828 -0
- package/build/workers/InlineFeaturesExtractor.web.js.map +1 -0
- package/build/workers/inlineAudioWebWorker.web.d.ts +2 -0
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
- package/build/workers/inlineAudioWebWorker.web.js +157 -0
- package/build/workers/inlineAudioWebWorker.web.js.map +1 -0
- package/expo-module.config.json +9 -0
- package/ios/AudioAnalysisData.swift +74 -0
- package/ios/AudioNotificationManager.swift +135 -0
- package/ios/AudioProcessingHelpers.swift +743 -0
- package/ios/AudioProcessor.swift +1313 -0
- package/ios/AudioStreamError.swift +7 -0
- package/ios/AudioStreamManager.swift +1708 -0
- package/ios/AudioStreamManagerDelegate.swift +16 -0
- package/ios/DataPoint.swift +54 -0
- package/ios/DecodingConfig.swift +47 -0
- package/ios/ExpoAudioStream.podspec +27 -0
- package/ios/ExpoAudioStreamModule.swift +805 -0
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +95 -0
- package/ios/Logger.swift +7 -0
- package/ios/NotificationExtension.swift +15 -0
- package/ios/RecordingResult.swift +22 -0
- package/ios/RecordingSettings.swift +265 -0
- package/ios/WaveformExtractor.swift +105 -0
- package/package.json +128 -0
- package/plugin/build/index.d.ts +21 -0
- package/plugin/build/index.js +192 -0
- package/plugin/src/index.ts +279 -0
- package/plugin/tsconfig.json +10 -0
- package/plugin/tsconfig.tsbuildinfo +1 -0
- package/src/AudioAnalysis/AudioAnalysis.types.ts +202 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +333 -0
- package/src/AudioAnalysis/extractAudioData.ts +6 -0
- package/src/AudioAnalysis/extractMelSpectrogram.ts +144 -0
- package/src/AudioAnalysis/extractPreview.ts +34 -0
- package/src/AudioAnalysis/extractWaveform.ts +22 -0
- package/src/AudioRecorder.provider.tsx +54 -0
- package/src/ExpoAudioStream.native.ts +6 -0
- package/src/ExpoAudioStream.types.ts +641 -0
- package/src/ExpoAudioStream.web.ts +359 -0
- package/src/ExpoAudioStreamModule.ts +967 -0
- package/src/WebRecorder.web.ts +580 -0
- package/src/constants.ts +18 -0
- package/src/events.ts +60 -0
- package/src/index.ts +36 -0
- package/src/trimAudio.ts +90 -0
- package/src/useAudioRecorder.tsx +620 -0
- package/src/utils/BlobFix.ts +559 -0
- package/src/utils/audioProcessing.ts +205 -0
- package/src/utils/concatenateBuffers.ts +24 -0
- package/src/utils/convertPCMToFloat32.ts +170 -0
- package/src/utils/encodingToBitDepth.ts +18 -0
- package/src/utils/getWavFileInfo.ts +132 -0
- package/src/utils/writeWavHeader.ts +114 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +827 -0
- package/src/workers/inlineAudioWebWorker.web.tsx +156 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// packages/expo-audio-stream/src/AudioAnalysis/AudioAnalysis.types.ts
|
|
2
|
+
|
|
3
|
+
import { BitDepth, ConsoleLike } from '../ExpoAudioStream.types'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Represents the configuration for decoding audio data.
|
|
7
|
+
*/
|
|
8
|
+
export interface DecodingConfig {
|
|
9
|
+
/** Target sample rate for decoded audio (Android and Web) */
|
|
10
|
+
targetSampleRate?: number
|
|
11
|
+
/** Target number of channels (Android and Web) */
|
|
12
|
+
targetChannels?: number
|
|
13
|
+
/** Target bit depth (Android and Web) */
|
|
14
|
+
targetBitDepth?: BitDepth
|
|
15
|
+
/** Whether to normalize audio levels (Android and Web) */
|
|
16
|
+
normalizeAudio?: boolean
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Represents speech-related features extracted from audio.
|
|
21
|
+
*/
|
|
22
|
+
export interface SpeechFeatures {
|
|
23
|
+
isActive: boolean // Whether speech is detected in this segment
|
|
24
|
+
speakerId?: number // Optional speaker identification
|
|
25
|
+
// Could add more speech-related features here like:
|
|
26
|
+
// confidence: number
|
|
27
|
+
// language?: string
|
|
28
|
+
// sentiment?: number
|
|
29
|
+
// etc.
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Represents various audio features extracted from an audio signal.
|
|
34
|
+
*/
|
|
35
|
+
export interface AudioFeatures {
|
|
36
|
+
energy?: number // The infinite integral of the squared signal, representing the overall energy of the audio.
|
|
37
|
+
mfcc?: number[] // Mel-frequency cepstral coefficients, describing the short-term power spectrum of a sound.
|
|
38
|
+
rms?: number // Root mean square value, indicating the amplitude of the audio signal.
|
|
39
|
+
minAmplitude?: number // Minimum amplitude value in the audio signal.
|
|
40
|
+
maxAmplitude?: number // Maximum amplitude value in the audio signal.
|
|
41
|
+
zcr?: number // Zero-crossing rate, indicating the rate at which the signal changes sign.
|
|
42
|
+
spectralCentroid?: number // The center of mass of the spectrum, indicating the brightness of the sound.
|
|
43
|
+
spectralFlatness?: number // Measure of the flatness of the spectrum, indicating how noise-like the signal is.
|
|
44
|
+
spectralRolloff?: number // The frequency below which a specified percentage (usually 85%) of the total spectral energy lies.
|
|
45
|
+
spectralBandwidth?: number // The width of the spectrum, indicating the range of frequencies present.
|
|
46
|
+
chromagram?: number[] // Chromagram, representing the 12 different pitch classes of the audio.
|
|
47
|
+
tempo?: number // Estimated tempo of the audio signal, measured in beats per minute (BPM).
|
|
48
|
+
hnr?: number // Harmonics-to-noise ratio, indicating the proportion of harmonics to noise in the audio signal.
|
|
49
|
+
melSpectrogram?: number[] // Mel-scaled spectrogram representation of the audio.
|
|
50
|
+
spectralContrast?: number[] // Spectral contrast features representing the difference between peaks and valleys.
|
|
51
|
+
tonnetz?: number[] // Tonal network features representing harmonic relationships.
|
|
52
|
+
pitch?: number // Pitch of the audio signal, measured in Hertz (Hz).
|
|
53
|
+
crc32?: number // crc32 checksum of the audio signal, used to verify the integrity of the audio.
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Options to specify which audio features to extract.
|
|
58
|
+
*/
|
|
59
|
+
export interface AudioFeaturesOptions {
|
|
60
|
+
energy?: boolean
|
|
61
|
+
mfcc?: boolean
|
|
62
|
+
rms?: boolean
|
|
63
|
+
zcr?: boolean
|
|
64
|
+
spectralCentroid?: boolean
|
|
65
|
+
spectralFlatness?: boolean
|
|
66
|
+
spectralRolloff?: boolean
|
|
67
|
+
spectralBandwidth?: boolean
|
|
68
|
+
chromagram?: boolean
|
|
69
|
+
tempo?: boolean
|
|
70
|
+
hnr?: boolean
|
|
71
|
+
melSpectrogram?: boolean
|
|
72
|
+
spectralContrast?: boolean
|
|
73
|
+
tonnetz?: boolean
|
|
74
|
+
pitch?: boolean
|
|
75
|
+
crc32?: boolean
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Represents a single data point in the audio analysis.
|
|
80
|
+
*/
|
|
81
|
+
export interface DataPoint {
|
|
82
|
+
id: number
|
|
83
|
+
amplitude: number // Peak amplitude for the segment
|
|
84
|
+
rms: number // Root mean square value
|
|
85
|
+
dB: number // dBFS (decibels relative to full scale) computed from RMS value
|
|
86
|
+
silent: boolean // Always computed
|
|
87
|
+
features?: AudioFeatures
|
|
88
|
+
speech?: SpeechFeatures
|
|
89
|
+
startTime?: number
|
|
90
|
+
endTime?: number
|
|
91
|
+
// start / end position in bytes
|
|
92
|
+
startPosition?: number
|
|
93
|
+
endPosition?: number
|
|
94
|
+
// number of audio samples for this point (samples size depends on bit depth)
|
|
95
|
+
samples?: number
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Represents the complete data from the audio analysis.
|
|
100
|
+
*/
|
|
101
|
+
export interface AudioAnalysis {
|
|
102
|
+
segmentDurationMs: number // Duration of each segment in milliseconds
|
|
103
|
+
durationMs: number // Duration of the audio in milliseconds
|
|
104
|
+
bitDepth: number // Bit depth of the audio
|
|
105
|
+
samples: number // Size of the audio in bytes
|
|
106
|
+
numberOfChannels: number // Number of audio channels
|
|
107
|
+
sampleRate: number // Sample rate of the audio
|
|
108
|
+
dataPoints: DataPoint[] // Array of data points from the analysis.
|
|
109
|
+
amplitudeRange: {
|
|
110
|
+
min: number
|
|
111
|
+
max: number
|
|
112
|
+
}
|
|
113
|
+
rmsRange: {
|
|
114
|
+
min: number
|
|
115
|
+
max: number
|
|
116
|
+
}
|
|
117
|
+
// TODO: speaker changes into a broader speech analysis section
|
|
118
|
+
speechAnalysis?: {
|
|
119
|
+
speakerChanges: {
|
|
120
|
+
timestamp: number
|
|
121
|
+
speakerId: number
|
|
122
|
+
}[]
|
|
123
|
+
// Could add more speech analysis data here like:
|
|
124
|
+
// dominantSpeaker?: number
|
|
125
|
+
// totalSpeechDuration?: number
|
|
126
|
+
// speakerStats?: { [speakerId: number]: { duration: number, segments: number } }
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Options for specifying a time range within an audio file.
|
|
132
|
+
*/
|
|
133
|
+
export interface AudioRangeOptions {
|
|
134
|
+
/** Start time in milliseconds */
|
|
135
|
+
startTimeMs?: number
|
|
136
|
+
/** End time in milliseconds */
|
|
137
|
+
endTimeMs?: number
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Options for generating a quick preview of audio waveform.
|
|
142
|
+
* This is optimized for UI rendering with a specified number of points.
|
|
143
|
+
*/
|
|
144
|
+
export interface PreviewOptions extends AudioRangeOptions {
|
|
145
|
+
/** URI of the audio file to analyze */
|
|
146
|
+
fileUri: string
|
|
147
|
+
/**
|
|
148
|
+
* Total number of points to generate for the preview.
|
|
149
|
+
* @default 100
|
|
150
|
+
*/
|
|
151
|
+
numberOfPoints?: number
|
|
152
|
+
/**
|
|
153
|
+
* Optional logger for debugging.
|
|
154
|
+
*/
|
|
155
|
+
logger?: ConsoleLike
|
|
156
|
+
/**
|
|
157
|
+
* Optional configuration for decoding the audio file.
|
|
158
|
+
* Defaults to:
|
|
159
|
+
* - targetSampleRate: undefined (keep original)
|
|
160
|
+
* - targetChannels: undefined (keep original)
|
|
161
|
+
* - targetBitDepth: 16
|
|
162
|
+
* - normalizeAudio: false
|
|
163
|
+
*/
|
|
164
|
+
decodingOptions?: DecodingConfig
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Options for mel-spectrogram extraction
|
|
169
|
+
*
|
|
170
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
171
|
+
* The API may change in future versions.
|
|
172
|
+
*/
|
|
173
|
+
export interface ExtractMelSpectrogramOptions {
|
|
174
|
+
fileUri?: string // Path to audio file
|
|
175
|
+
arrayBuffer?: ArrayBuffer // Raw audio buffer
|
|
176
|
+
windowSizeMs: number // Window size in ms (e.g., 25)
|
|
177
|
+
hopLengthMs: number // Hop length in ms (e.g., 10)
|
|
178
|
+
nMels: number // Number of mel filters (e.g., 60)
|
|
179
|
+
fMin?: number // Min frequency (default: 0)
|
|
180
|
+
fMax?: number // Max frequency (default: sampleRate / 2)
|
|
181
|
+
windowType?: 'hann' | 'hamming' // Window function (default: 'hann')
|
|
182
|
+
normalize?: boolean // Mean normalization (default: false)
|
|
183
|
+
logScale?: boolean // Log scaling of mel energies (default: true)
|
|
184
|
+
decodingOptions?: DecodingConfig // Audio decoding settings
|
|
185
|
+
startTimeMs?: number // Optional start time
|
|
186
|
+
endTimeMs?: number // Optional end time
|
|
187
|
+
logger?: ConsoleLike
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Return type for mel spectrogram extraction
|
|
192
|
+
*
|
|
193
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
194
|
+
* The API may change in future versions.
|
|
195
|
+
*/
|
|
196
|
+
export interface MelSpectrogram {
|
|
197
|
+
spectrogram: number[][] // 2D array [time][mel]
|
|
198
|
+
sampleRate: number // Audio sample rate
|
|
199
|
+
nMels: number // Number of mel filters
|
|
200
|
+
timeSteps: number // Number of time frames
|
|
201
|
+
durationMs: number // Audio duration in ms
|
|
202
|
+
}
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
// packages/expo-audio-stream/src/AudioAnalysis/extractAudioAnalysis.ts
|
|
2
|
+
/**
|
|
3
|
+
* This module provides functions for extracting and analyzing audio data.
|
|
4
|
+
* - `extractAudioAnalysis`: For detailed analysis with customizable ranges and decoding options.
|
|
5
|
+
* - `extractWavAudioAnalysis`: For analyzing WAV files without decoding, preserving original PCM values.
|
|
6
|
+
* - `extractPreview`: For generating quick previews of audio waveforms, optimized for UI rendering.
|
|
7
|
+
*/
|
|
8
|
+
import crc32 from 'crc-32'
|
|
9
|
+
|
|
10
|
+
import { ConsoleLike } from '../ExpoAudioStream.types'
|
|
11
|
+
import ExpoAudioStreamModule from '../ExpoAudioStreamModule'
|
|
12
|
+
import { isWeb } from '../constants'
|
|
13
|
+
import {
|
|
14
|
+
AudioAnalysis,
|
|
15
|
+
AudioFeaturesOptions,
|
|
16
|
+
DataPoint,
|
|
17
|
+
DecodingConfig,
|
|
18
|
+
} from './AudioAnalysis.types'
|
|
19
|
+
import { processAudioBuffer } from '../utils/audioProcessing'
|
|
20
|
+
import { convertPCMToFloat32 } from '../utils/convertPCMToFloat32'
|
|
21
|
+
import { getWavFileInfo, WavFileInfo } from '../utils/getWavFileInfo'
|
|
22
|
+
import { InlineFeaturesExtractor } from '../workers/InlineFeaturesExtractor.web'
|
|
23
|
+
|
|
24
|
+
function calculateCRC32ForDataPoint(data: Float32Array): number {
|
|
25
|
+
// Convert float array to byte array for CRC32
|
|
26
|
+
const byteArray = new Uint8Array(data.length * 4)
|
|
27
|
+
const dataView = new DataView(byteArray.buffer)
|
|
28
|
+
|
|
29
|
+
for (let i = 0; i < data.length; i++) {
|
|
30
|
+
dataView.setFloat32(i * 4, data[i], true)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return crc32.buf(byteArray)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface ExtractWavAudioAnalysisProps {
|
|
37
|
+
fileUri?: string // should provide either fileUri or arrayBuffer
|
|
38
|
+
wavMetadata?: WavFileInfo
|
|
39
|
+
arrayBuffer?: ArrayBuffer
|
|
40
|
+
bitDepth?: number
|
|
41
|
+
durationMs?: number
|
|
42
|
+
sampleRate?: number
|
|
43
|
+
numberOfChannels?: number
|
|
44
|
+
position?: number // Optional number of bytes to skip. Default is 0
|
|
45
|
+
length?: number // Optional number of bytes to read.
|
|
46
|
+
segmentDurationMs?: number // Optional number of points per second. Use to reduce the number of points and compute the number of datapoints to return.
|
|
47
|
+
features?: AudioFeaturesOptions
|
|
48
|
+
featuresExtratorUrl?: string
|
|
49
|
+
logger?: ConsoleLike
|
|
50
|
+
decodingOptions?: DecodingConfig
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Define base options interface with common properties
|
|
54
|
+
interface BaseExtractOptions {
|
|
55
|
+
fileUri?: string
|
|
56
|
+
arrayBuffer?: ArrayBuffer
|
|
57
|
+
/**
|
|
58
|
+
* Duration of each analysis segment in milliseconds. Defaults to 100ms if not specified.
|
|
59
|
+
*/
|
|
60
|
+
segmentDurationMs?: number
|
|
61
|
+
features?: AudioFeaturesOptions
|
|
62
|
+
decodingOptions?: DecodingConfig
|
|
63
|
+
logger?: ConsoleLike
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Time-based range options
|
|
67
|
+
interface TimeRangeOptions extends BaseExtractOptions {
|
|
68
|
+
startTimeMs?: number
|
|
69
|
+
endTimeMs?: number
|
|
70
|
+
position?: never
|
|
71
|
+
length?: never
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Byte-based range options
|
|
75
|
+
interface ByteRangeOptions extends BaseExtractOptions {
|
|
76
|
+
position?: number
|
|
77
|
+
length?: number
|
|
78
|
+
startTimeMs?: never
|
|
79
|
+
endTimeMs?: never
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Options for extracting audio analysis.
|
|
84
|
+
* - For time-based analysis, provide `startTimeMs` and `endTimeMs`.
|
|
85
|
+
* - For byte-based analysis, provide `position` and `length`.
|
|
86
|
+
* - Do not mix time and byte ranges.
|
|
87
|
+
*/
|
|
88
|
+
export type ExtractAudioAnalysisProps = TimeRangeOptions | ByteRangeOptions
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Extracts detailed audio analysis from the specified audio file or buffer.
|
|
92
|
+
* Supports either time-based or byte-based ranges for flexibility in analysis.
|
|
93
|
+
*
|
|
94
|
+
* @param props - The options for extraction, including file URI, ranges, and decoding settings.
|
|
95
|
+
* @returns A promise that resolves to the audio analysis data.
|
|
96
|
+
* @throws {Error} If both time and byte ranges are provided or if required parameters are missing.
|
|
97
|
+
*/
|
|
98
|
+
export async function extractAudioAnalysis(
|
|
99
|
+
props: ExtractAudioAnalysisProps
|
|
100
|
+
): Promise<AudioAnalysis> {
|
|
101
|
+
const {
|
|
102
|
+
fileUri,
|
|
103
|
+
arrayBuffer,
|
|
104
|
+
decodingOptions,
|
|
105
|
+
logger,
|
|
106
|
+
segmentDurationMs = 100,
|
|
107
|
+
features,
|
|
108
|
+
} = props
|
|
109
|
+
|
|
110
|
+
if (isWeb) {
|
|
111
|
+
try {
|
|
112
|
+
// Create AudioContext here
|
|
113
|
+
const audioContext = new (window.AudioContext ||
|
|
114
|
+
(window as any).webkitAudioContext)({
|
|
115
|
+
sampleRate: decodingOptions?.targetSampleRate ?? 16000,
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
const processedBuffer = await processAudioBuffer({
|
|
120
|
+
arrayBuffer,
|
|
121
|
+
fileUri,
|
|
122
|
+
targetSampleRate:
|
|
123
|
+
decodingOptions?.targetSampleRate ?? 16000,
|
|
124
|
+
targetChannels: decodingOptions?.targetChannels ?? 1,
|
|
125
|
+
normalizeAudio: decodingOptions?.normalizeAudio ?? false,
|
|
126
|
+
startTimeMs:
|
|
127
|
+
'startTimeMs' in props ? props.startTimeMs : undefined,
|
|
128
|
+
endTimeMs:
|
|
129
|
+
'endTimeMs' in props ? props.endTimeMs : undefined,
|
|
130
|
+
position: 'position' in props ? props.position : undefined,
|
|
131
|
+
length: 'length' in props ? props.length : undefined,
|
|
132
|
+
audioContext, // Pass the context we created
|
|
133
|
+
logger,
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
const channelData = processedBuffer.buffer.getChannelData(0)
|
|
137
|
+
|
|
138
|
+
// Create and initialize the worker
|
|
139
|
+
const blob = new Blob([InlineFeaturesExtractor], {
|
|
140
|
+
type: 'application/javascript',
|
|
141
|
+
})
|
|
142
|
+
const workerUrl = URL.createObjectURL(blob)
|
|
143
|
+
const worker = new Worker(workerUrl)
|
|
144
|
+
|
|
145
|
+
return new Promise((resolve, reject) => {
|
|
146
|
+
worker.onmessage = (event) => {
|
|
147
|
+
if (event.data.error) {
|
|
148
|
+
reject(new Error(event.data.error))
|
|
149
|
+
return
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const result: AudioAnalysis = event.data.result
|
|
153
|
+
// Calculate CRC32 after worker completes if requested
|
|
154
|
+
if (features?.crc32) {
|
|
155
|
+
const samplesPerSegment = Math.floor(
|
|
156
|
+
(processedBuffer.sampleRate *
|
|
157
|
+
segmentDurationMs) /
|
|
158
|
+
1000
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
result.dataPoints = result.dataPoints.map(
|
|
162
|
+
(point: DataPoint, index: number) => {
|
|
163
|
+
const startSample =
|
|
164
|
+
index * samplesPerSegment
|
|
165
|
+
const segmentData = channelData.slice(
|
|
166
|
+
startSample,
|
|
167
|
+
startSample + samplesPerSegment
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
...point,
|
|
172
|
+
features: {
|
|
173
|
+
...point.features,
|
|
174
|
+
crc32: calculateCRC32ForDataPoint(
|
|
175
|
+
segmentData
|
|
176
|
+
),
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
URL.revokeObjectURL(workerUrl)
|
|
184
|
+
worker.terminate()
|
|
185
|
+
resolve(result)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
worker.onerror = (error) => {
|
|
189
|
+
URL.revokeObjectURL(workerUrl)
|
|
190
|
+
worker.terminate()
|
|
191
|
+
reject(error)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
worker.postMessage({
|
|
195
|
+
channelData,
|
|
196
|
+
sampleRate: processedBuffer.sampleRate,
|
|
197
|
+
segmentDurationMs,
|
|
198
|
+
bitDepth: decodingOptions?.targetBitDepth ?? 32,
|
|
199
|
+
numberOfChannels: processedBuffer.channels,
|
|
200
|
+
// enableLogging: !!logger,
|
|
201
|
+
features,
|
|
202
|
+
})
|
|
203
|
+
})
|
|
204
|
+
} finally {
|
|
205
|
+
await audioContext.close()
|
|
206
|
+
}
|
|
207
|
+
} catch (error) {
|
|
208
|
+
logger?.error('Failed to process audio:', error)
|
|
209
|
+
throw error
|
|
210
|
+
}
|
|
211
|
+
} else {
|
|
212
|
+
return await ExpoAudioStreamModule.extractAudioAnalysis(props)
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Analyzes WAV files without decoding, preserving original PCM values.
|
|
218
|
+
* Use this function when you need to ensure the analysis matches other software by avoiding any transformations.
|
|
219
|
+
*
|
|
220
|
+
* @param props - The options for WAV analysis, including file URI and range.
|
|
221
|
+
* @returns A promise that resolves to the audio analysis data.
|
|
222
|
+
*/
|
|
223
|
+
export const extractRawWavAnalysis = async ({
|
|
224
|
+
fileUri,
|
|
225
|
+
segmentDurationMs = 100, // Default to 100ms
|
|
226
|
+
arrayBuffer,
|
|
227
|
+
bitDepth,
|
|
228
|
+
durationMs,
|
|
229
|
+
sampleRate,
|
|
230
|
+
numberOfChannels,
|
|
231
|
+
features,
|
|
232
|
+
logger,
|
|
233
|
+
position = 0,
|
|
234
|
+
length,
|
|
235
|
+
}: ExtractWavAudioAnalysisProps): Promise<AudioAnalysis> => {
|
|
236
|
+
if (isWeb) {
|
|
237
|
+
if (!arrayBuffer && !fileUri) {
|
|
238
|
+
throw new Error('Either arrayBuffer or fileUri must be provided')
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (!arrayBuffer) {
|
|
242
|
+
logger?.log(`fetching fileUri`, fileUri)
|
|
243
|
+
const response = await fetch(fileUri!)
|
|
244
|
+
|
|
245
|
+
if (!response.ok) {
|
|
246
|
+
throw new Error(
|
|
247
|
+
`Failed to fetch fileUri: ${response.statusText}`
|
|
248
|
+
)
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
arrayBuffer = await response.arrayBuffer()
|
|
252
|
+
logger?.log(`fetched fileUri`, arrayBuffer.byteLength, arrayBuffer)
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Create a new copy of the ArrayBuffer to avoid detachment issues
|
|
256
|
+
const bufferCopy = arrayBuffer.slice(0)
|
|
257
|
+
logger?.log(
|
|
258
|
+
`extractAudioAnalysis bitDepth=${bitDepth} len=${bufferCopy.byteLength}`,
|
|
259
|
+
bufferCopy.slice(0, 100)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
let actualBitDepth = bitDepth
|
|
263
|
+
if (!actualBitDepth) {
|
|
264
|
+
logger?.log(
|
|
265
|
+
`extractAudioAnalysis bitDepth not provided -- getting wav file info`
|
|
266
|
+
)
|
|
267
|
+
const fileInfo = await getWavFileInfo(bufferCopy)
|
|
268
|
+
actualBitDepth = fileInfo.bitDepth
|
|
269
|
+
}
|
|
270
|
+
logger?.log(`extractAudioAnalysis actualBitDepth=${actualBitDepth}`)
|
|
271
|
+
|
|
272
|
+
const {
|
|
273
|
+
pcmValues: channelData,
|
|
274
|
+
min,
|
|
275
|
+
max,
|
|
276
|
+
} = await convertPCMToFloat32({
|
|
277
|
+
buffer: arrayBuffer,
|
|
278
|
+
bitDepth: actualBitDepth,
|
|
279
|
+
})
|
|
280
|
+
logger?.log(
|
|
281
|
+
`extractAudioAnalysis convertPCMToFloat32 length=${channelData.length} range: [ ${min} :: ${max} ]`
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
// Apply position and length constraints to channelData if specified
|
|
285
|
+
const startIndex = position
|
|
286
|
+
const endIndex = length ? startIndex + length : channelData.length
|
|
287
|
+
const constrainedChannelData = channelData.slice(startIndex, endIndex)
|
|
288
|
+
|
|
289
|
+
return new Promise((resolve, reject) => {
|
|
290
|
+
const blob = new Blob([InlineFeaturesExtractor], {
|
|
291
|
+
type: 'application/javascript',
|
|
292
|
+
})
|
|
293
|
+
const url = URL.createObjectURL(blob)
|
|
294
|
+
const worker = new Worker(url)
|
|
295
|
+
|
|
296
|
+
worker.onmessage = (event) => {
|
|
297
|
+
resolve(event.data.result)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
worker.onerror = (error) => {
|
|
301
|
+
reject(error)
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
worker.postMessage({
|
|
305
|
+
command: 'process',
|
|
306
|
+
channelData: constrainedChannelData,
|
|
307
|
+
sampleRate,
|
|
308
|
+
segmentDurationMs,
|
|
309
|
+
logger,
|
|
310
|
+
bitDepth,
|
|
311
|
+
fullAudioDurationMs: durationMs,
|
|
312
|
+
numberOfChannels,
|
|
313
|
+
})
|
|
314
|
+
})
|
|
315
|
+
} else {
|
|
316
|
+
if (!fileUri) {
|
|
317
|
+
throw new Error('fileUri is required')
|
|
318
|
+
}
|
|
319
|
+
logger?.log(`extractAudioAnalysis`, {
|
|
320
|
+
fileUri,
|
|
321
|
+
segmentDurationMs,
|
|
322
|
+
})
|
|
323
|
+
const res = await ExpoAudioStreamModule.extractAudioAnalysis({
|
|
324
|
+
fileUri,
|
|
325
|
+
segmentDurationMs,
|
|
326
|
+
features,
|
|
327
|
+
position,
|
|
328
|
+
length,
|
|
329
|
+
})
|
|
330
|
+
logger?.log(`extractAudioAnalysis`, res)
|
|
331
|
+
return res
|
|
332
|
+
}
|
|
333
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { ExtractAudioDataOptions } from '../ExpoAudioStream.types'
|
|
2
|
+
import ExpoAudioStreamModule from '../ExpoAudioStreamModule'
|
|
3
|
+
|
|
4
|
+
export const extractAudioData = async (props: ExtractAudioDataOptions) => {
|
|
5
|
+
return await ExpoAudioStreamModule.extractAudioData(props)
|
|
6
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
3
|
+
* The API may change in future versions. The web implementation is a placeholder.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { ExpoAudioStreamModule } from '..'
|
|
7
|
+
import { isWeb } from '../constants'
|
|
8
|
+
import {
|
|
9
|
+
ExtractMelSpectrogramOptions,
|
|
10
|
+
MelSpectrogram,
|
|
11
|
+
} from './AudioAnalysis.types'
|
|
12
|
+
import {
|
|
13
|
+
processAudioBuffer,
|
|
14
|
+
ProcessedAudioData,
|
|
15
|
+
} from '../utils/audioProcessing'
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Extracts a mel spectrogram from audio data
|
|
19
|
+
*
|
|
20
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
21
|
+
* The iOS implementation will throw an "UNSUPPORTED_PLATFORM" error.
|
|
22
|
+
* The web implementation is a placeholder that returns dummy data.
|
|
23
|
+
*/
|
|
24
|
+
export async function extractMelSpectrogram(
|
|
25
|
+
options: ExtractMelSpectrogramOptions
|
|
26
|
+
): Promise<MelSpectrogram> {
|
|
27
|
+
const {
|
|
28
|
+
fileUri,
|
|
29
|
+
arrayBuffer,
|
|
30
|
+
windowSizeMs,
|
|
31
|
+
hopLengthMs,
|
|
32
|
+
nMels,
|
|
33
|
+
fMin = 0,
|
|
34
|
+
fMax,
|
|
35
|
+
windowType = 'hann',
|
|
36
|
+
normalize = false,
|
|
37
|
+
logScale = true,
|
|
38
|
+
decodingOptions,
|
|
39
|
+
startTimeMs,
|
|
40
|
+
endTimeMs,
|
|
41
|
+
logger,
|
|
42
|
+
} = options
|
|
43
|
+
|
|
44
|
+
if (isWeb) {
|
|
45
|
+
// Create audio context
|
|
46
|
+
const audioContext = new (window.AudioContext ||
|
|
47
|
+
(window as any).webkitAudioContext)()
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
// Process audio data using the existing utility
|
|
51
|
+
const processedAudio: ProcessedAudioData = await processAudioBuffer(
|
|
52
|
+
{
|
|
53
|
+
arrayBuffer,
|
|
54
|
+
fileUri,
|
|
55
|
+
targetSampleRate:
|
|
56
|
+
decodingOptions?.targetSampleRate || 16000,
|
|
57
|
+
targetChannels: decodingOptions?.targetChannels || 1,
|
|
58
|
+
normalizeAudio: decodingOptions?.normalizeAudio ?? false,
|
|
59
|
+
startTimeMs,
|
|
60
|
+
endTimeMs,
|
|
61
|
+
audioContext,
|
|
62
|
+
logger: options.logger,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
// Calculate window and hop size in samples
|
|
67
|
+
const sampleRate = processedAudio.sampleRate
|
|
68
|
+
const windowSize = Math.floor((windowSizeMs * sampleRate) / 1000)
|
|
69
|
+
const hopLength = Math.floor((hopLengthMs * sampleRate) / 1000)
|
|
70
|
+
const maxFreq = fMax || sampleRate / 2
|
|
71
|
+
|
|
72
|
+
// Extract the mel spectrogram from the processed audio
|
|
73
|
+
const spectrogram = computeMelSpectrogram(
|
|
74
|
+
processedAudio.channelData,
|
|
75
|
+
sampleRate,
|
|
76
|
+
nMels,
|
|
77
|
+
windowSize,
|
|
78
|
+
hopLength,
|
|
79
|
+
fMin,
|
|
80
|
+
maxFreq,
|
|
81
|
+
windowType,
|
|
82
|
+
normalize,
|
|
83
|
+
logScale
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
const timeSteps = spectrogram.length
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
spectrogram,
|
|
90
|
+
sampleRate,
|
|
91
|
+
nMels,
|
|
92
|
+
timeSteps,
|
|
93
|
+
durationMs: processedAudio.durationMs,
|
|
94
|
+
}
|
|
95
|
+
} catch (error) {
|
|
96
|
+
logger?.error('Error extracting mel spectrogram:', error)
|
|
97
|
+
throw error
|
|
98
|
+
} finally {
|
|
99
|
+
// Close the audio context
|
|
100
|
+
await audioContext.close()
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return ExpoAudioStreamModule.extractMelSpectrogram(options)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Computes a mel spectrogram from audio data
|
|
108
|
+
*
|
|
109
|
+
* @experimental This is a placeholder implementation that returns dummy data.
|
|
110
|
+
* The actual implementation will be added in a future version.
|
|
111
|
+
*/
|
|
112
|
+
function computeMelSpectrogram(
|
|
113
|
+
audioData: Float32Array,
|
|
114
|
+
sampleRate: number,
|
|
115
|
+
nMels: number,
|
|
116
|
+
windowSize: number,
|
|
117
|
+
hopLength: number,
|
|
118
|
+
fMin: number,
|
|
119
|
+
fMax: number,
|
|
120
|
+
windowType: 'hann' | 'hamming',
|
|
121
|
+
normalize: boolean,
|
|
122
|
+
logScale: boolean
|
|
123
|
+
): number[][] {
|
|
124
|
+
// Placeholder for the actual implementation
|
|
125
|
+
// This would include:
|
|
126
|
+
// 1. Windowing the audio data using the specified window type
|
|
127
|
+
// 2. Computing the STFT (Short-Time Fourier Transform)
|
|
128
|
+
// 3. Converting to power spectrogram
|
|
129
|
+
// 4. Applying mel filterbanks
|
|
130
|
+
// 5. Taking the logarithm if logScale is true
|
|
131
|
+
// 6. Normalizing if normalize is true
|
|
132
|
+
|
|
133
|
+
// For now, return a dummy implementation
|
|
134
|
+
const numFrames =
|
|
135
|
+
Math.floor((audioData.length - windowSize) / hopLength) + 1
|
|
136
|
+
const spectrogram: number[][] = []
|
|
137
|
+
|
|
138
|
+
// Create dummy mel spectrogram data
|
|
139
|
+
for (let i = 0; i < numFrames; i++) {
|
|
140
|
+
spectrogram.push(Array(nMels).fill(0))
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return spectrogram
|
|
144
|
+
}
|