@siteed/expo-audio-stream 1.16.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -1
- package/README.md +1 -1
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
- package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +134 -23
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
- package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +4 -4
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/ExpoAudioStream.types.d.ts +49 -3
- package/build/ExpoAudioStream.types.d.ts.map +1 -1
- package/build/ExpoAudioStream.types.js.map +1 -1
- package/build/ExpoAudioStream.web.d.ts +2 -0
- package/build/ExpoAudioStream.web.d.ts.map +1 -1
- package/build/ExpoAudioStream.web.js +8 -1
- package/build/ExpoAudioStream.web.js.map +1 -1
- package/build/ExpoAudioStreamModule.d.ts.map +1 -1
- package/build/ExpoAudioStreamModule.js +216 -12
- package/build/ExpoAudioStreamModule.js.map +1 -1
- package/build/WebRecorder.web.d.ts +67 -13
- package/build/WebRecorder.web.d.ts.map +1 -1
- package/build/WebRecorder.web.js +178 -173
- package/build/WebRecorder.web.js.map +1 -1
- package/build/index.d.ts +3 -3
- package/build/index.d.ts.map +1 -1
- package/build/index.js +2 -2
- package/build/index.js.map +1 -1
- package/build/useAudioRecorder.d.ts.map +1 -1
- package/build/useAudioRecorder.js +12 -8
- package/build/useAudioRecorder.js.map +1 -1
- package/build/utils/audioProcessing.d.ts +24 -0
- package/build/utils/audioProcessing.d.ts.map +1 -0
- package/build/utils/audioProcessing.js +133 -0
- package/build/utils/audioProcessing.js.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
- package/build/workers/InlineFeaturesExtractor.web.js +692 -175
- package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
- package/build/workers/inlineAudioWebWorker.web.js +3 -2
- package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
- package/ios/AudioAnalysisData.swift +51 -16
- package/ios/AudioProcessingHelpers.swift +710 -26
- package/ios/AudioProcessor.swift +334 -185
- package/ios/AudioStreamManager.swift +66 -22
- package/ios/DataPoint.swift +25 -12
- package/ios/DecodingConfig.swift +47 -0
- package/ios/ExpoAudioStreamModule.swift +189 -104
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +24 -3
- package/ios/RecordingSettings.swift +9 -7
- package/package.json +2 -1
- package/plugin/build/index.d.ts +2 -0
- package/plugin/build/index.js +10 -3
- package/plugin/src/index.ts +10 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
- package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
- package/src/ExpoAudioStream.types.ts +57 -7
- package/src/ExpoAudioStream.web.ts +8 -1
- package/src/ExpoAudioStreamModule.ts +255 -10
- package/src/WebRecorder.web.ts +231 -243
- package/src/index.ts +5 -3
- package/src/useAudioRecorder.tsx +14 -10
- package/src/utils/audioProcessing.ts +205 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +692 -175
- package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
|
@@ -1,32 +1,56 @@
|
|
|
1
1
|
// packages/expo-audio-stream/src/AudioAnalysis/AudioAnalysis.types.ts
|
|
2
2
|
|
|
3
|
+
import { BitDepth, ConsoleLike } from '../ExpoAudioStream.types'
|
|
4
|
+
|
|
3
5
|
/**
|
|
4
6
|
* Represents the configuration for decoding audio data.
|
|
5
7
|
*/
|
|
6
8
|
export interface DecodingConfig {
|
|
9
|
+
/** Target sample rate for decoded audio (Android and Web) */
|
|
7
10
|
targetSampleRate?: number
|
|
11
|
+
/** Target number of channels (Android and Web) */
|
|
8
12
|
targetChannels?: number
|
|
9
|
-
|
|
13
|
+
/** Target bit depth (Android and Web) */
|
|
14
|
+
targetBitDepth?: BitDepth
|
|
15
|
+
/** Whether to normalize audio levels (Android and Web) */
|
|
10
16
|
normalizeAudio?: boolean
|
|
11
17
|
}
|
|
12
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Represents speech-related features extracted from audio.
|
|
21
|
+
*/
|
|
22
|
+
export interface SpeechFeatures {
|
|
23
|
+
isActive: boolean // Whether speech is detected in this segment
|
|
24
|
+
speakerId?: number // Optional speaker identification
|
|
25
|
+
// Could add more speech-related features here like:
|
|
26
|
+
// confidence: number
|
|
27
|
+
// language?: string
|
|
28
|
+
// sentiment?: number
|
|
29
|
+
// etc.
|
|
30
|
+
}
|
|
31
|
+
|
|
13
32
|
/**
|
|
14
33
|
* Represents various audio features extracted from an audio signal.
|
|
15
34
|
*/
|
|
16
35
|
export interface AudioFeatures {
|
|
17
|
-
energy
|
|
18
|
-
mfcc
|
|
19
|
-
rms
|
|
20
|
-
minAmplitude
|
|
21
|
-
maxAmplitude
|
|
22
|
-
zcr
|
|
23
|
-
spectralCentroid
|
|
24
|
-
spectralFlatness
|
|
25
|
-
spectralRolloff
|
|
26
|
-
spectralBandwidth
|
|
27
|
-
chromagram
|
|
28
|
-
tempo
|
|
29
|
-
hnr
|
|
36
|
+
energy?: number // The infinite integral of the squared signal, representing the overall energy of the audio.
|
|
37
|
+
mfcc?: number[] // Mel-frequency cepstral coefficients, describing the short-term power spectrum of a sound.
|
|
38
|
+
rms?: number // Root mean square value, indicating the amplitude of the audio signal.
|
|
39
|
+
minAmplitude?: number // Minimum amplitude value in the audio signal.
|
|
40
|
+
maxAmplitude?: number // Maximum amplitude value in the audio signal.
|
|
41
|
+
zcr?: number // Zero-crossing rate, indicating the rate at which the signal changes sign.
|
|
42
|
+
spectralCentroid?: number // The center of mass of the spectrum, indicating the brightness of the sound.
|
|
43
|
+
spectralFlatness?: number // Measure of the flatness of the spectrum, indicating how noise-like the signal is.
|
|
44
|
+
spectralRolloff?: number // The frequency below which a specified percentage (usually 85%) of the total spectral energy lies.
|
|
45
|
+
spectralBandwidth?: number // The width of the spectrum, indicating the range of frequencies present.
|
|
46
|
+
chromagram?: number[] // Chromagram, representing the 12 different pitch classes of the audio.
|
|
47
|
+
tempo?: number // Estimated tempo of the audio signal, measured in beats per minute (BPM).
|
|
48
|
+
hnr?: number // Harmonics-to-noise ratio, indicating the proportion of harmonics to noise in the audio signal.
|
|
49
|
+
melSpectrogram?: number[] // Mel-scaled spectrogram representation of the audio.
|
|
50
|
+
spectralContrast?: number[] // Spectral contrast features representing the difference between peaks and valleys.
|
|
51
|
+
tonnetz?: number[] // Tonal network features representing harmonic relationships.
|
|
52
|
+
pitch?: number // Pitch of the audio signal, measured in Hertz (Hz).
|
|
53
|
+
crc32?: number // crc32 checksum of the audio signal, used to verify the integrity of the audio.
|
|
30
54
|
}
|
|
31
55
|
|
|
32
56
|
/**
|
|
@@ -44,6 +68,11 @@ export interface AudioFeaturesOptions {
|
|
|
44
68
|
chromagram?: boolean
|
|
45
69
|
tempo?: boolean
|
|
46
70
|
hnr?: boolean
|
|
71
|
+
melSpectrogram?: boolean
|
|
72
|
+
spectralContrast?: boolean
|
|
73
|
+
tonnetz?: boolean
|
|
74
|
+
pitch?: boolean
|
|
75
|
+
crc32?: boolean
|
|
47
76
|
}
|
|
48
77
|
|
|
49
78
|
/**
|
|
@@ -51,11 +80,12 @@ export interface AudioFeaturesOptions {
|
|
|
51
80
|
*/
|
|
52
81
|
export interface DataPoint {
|
|
53
82
|
id: number
|
|
54
|
-
amplitude: number
|
|
55
|
-
|
|
56
|
-
dB
|
|
57
|
-
silent
|
|
83
|
+
amplitude: number // Peak amplitude for the segment
|
|
84
|
+
rms: number // Root mean square value
|
|
85
|
+
dB: number // Always computed
|
|
86
|
+
silent: boolean // Always computed
|
|
58
87
|
features?: AudioFeatures
|
|
88
|
+
speech?: SpeechFeatures
|
|
59
89
|
startTime?: number
|
|
60
90
|
endTime?: number
|
|
61
91
|
// start / end position in bytes
|
|
@@ -63,33 +93,38 @@ export interface DataPoint {
|
|
|
63
93
|
endPosition?: number
|
|
64
94
|
// number of audio samples for this point (samples size depends on bit depth)
|
|
65
95
|
samples?: number
|
|
66
|
-
// TODO: speaker detection
|
|
67
|
-
speaker?: number
|
|
68
96
|
}
|
|
69
97
|
|
|
70
|
-
export type AmplitudeAlgorithm = 'peak' | 'rms'
|
|
71
|
-
|
|
72
98
|
/**
|
|
73
99
|
* Represents the complete data from the audio analysis.
|
|
74
100
|
*/
|
|
75
101
|
export interface AudioAnalysis {
|
|
76
|
-
|
|
102
|
+
segmentDurationMs: number // Duration of each segment in milliseconds
|
|
77
103
|
durationMs: number // Duration of the audio in milliseconds
|
|
78
104
|
bitDepth: number // Bit depth of the audio
|
|
79
105
|
samples: number // Size of the audio in bytes
|
|
80
106
|
numberOfChannels: number // Number of audio channels
|
|
81
107
|
sampleRate: number // Sample rate of the audio
|
|
82
108
|
dataPoints: DataPoint[] // Array of data points from the analysis.
|
|
83
|
-
amplitudeAlgorithm: AmplitudeAlgorithm // Algorithm used to calculate amplitude values.
|
|
84
109
|
amplitudeRange: {
|
|
85
110
|
min: number
|
|
86
111
|
max: number
|
|
87
112
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
113
|
+
rmsRange: {
|
|
114
|
+
min: number
|
|
115
|
+
max: number
|
|
116
|
+
}
|
|
117
|
+
// TODO: speaker changes into a broader speech analysis section
|
|
118
|
+
speechAnalysis?: {
|
|
119
|
+
speakerChanges: {
|
|
120
|
+
timestamp: number
|
|
121
|
+
speakerId: number
|
|
122
|
+
}[]
|
|
123
|
+
// Could add more speech analysis data here like:
|
|
124
|
+
// dominantSpeaker?: number
|
|
125
|
+
// totalSpeechDuration?: number
|
|
126
|
+
// speakerStats?: { [speakerId: number]: { duration: number, segments: number } }
|
|
127
|
+
}
|
|
93
128
|
}
|
|
94
129
|
|
|
95
130
|
/**
|
|
@@ -97,9 +132,9 @@ export interface AudioAnalysis {
|
|
|
97
132
|
*/
|
|
98
133
|
export interface AudioRangeOptions {
|
|
99
134
|
/** Start time in milliseconds */
|
|
100
|
-
|
|
135
|
+
startTimeMs?: number
|
|
101
136
|
/** End time in milliseconds */
|
|
102
|
-
|
|
137
|
+
endTimeMs?: number
|
|
103
138
|
}
|
|
104
139
|
|
|
105
140
|
/**
|
|
@@ -115,10 +150,9 @@ export interface PreviewOptions extends AudioRangeOptions {
|
|
|
115
150
|
*/
|
|
116
151
|
numberOfPoints?: number
|
|
117
152
|
/**
|
|
118
|
-
*
|
|
119
|
-
* @default "rms"
|
|
153
|
+
* Optional logger for debugging.
|
|
120
154
|
*/
|
|
121
|
-
|
|
155
|
+
logger?: ConsoleLike
|
|
122
156
|
/**
|
|
123
157
|
* Optional configuration for decoding the audio file.
|
|
124
158
|
* Defaults to:
|
|
@@ -129,21 +163,3 @@ export interface PreviewOptions extends AudioRangeOptions {
|
|
|
129
163
|
*/
|
|
130
164
|
decodingOptions?: DecodingConfig
|
|
131
165
|
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Represents a simplified preview of audio waveform,
|
|
135
|
-
* optimized for quick visualization.
|
|
136
|
-
*/
|
|
137
|
-
export interface AudioPreview {
|
|
138
|
-
/** Number of data points per second */
|
|
139
|
-
pointsPerSecond: number
|
|
140
|
-
/** Duration of the audio in milliseconds */
|
|
141
|
-
durationMs: number
|
|
142
|
-
/** Range of amplitude values in the preview */
|
|
143
|
-
amplitudeRange: {
|
|
144
|
-
min: number
|
|
145
|
-
max: number
|
|
146
|
-
}
|
|
147
|
-
/** Array of data points representing the waveform */
|
|
148
|
-
dataPoints: DataPoint[]
|
|
149
|
-
}
|