@siteed/expo-audio-stream 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.md +202 -1
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +300 -1
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +16 -2
- package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +1099 -0
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +274 -44
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +35 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +2 -12
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +0 -26
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/AudioAnalysis/extractAudioData.d.ts +3 -0
- package/build/AudioAnalysis/extractAudioData.d.ts.map +1 -0
- package/build/AudioAnalysis/extractAudioData.js +5 -0
- package/build/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts +14 -0
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
- package/build/AudioAnalysis/extractMelSpectrogram.js +85 -0
- package/build/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/AudioAnalysis/extractPreview.d.ts +11 -0
- package/build/AudioAnalysis/extractPreview.d.ts.map +1 -0
- package/build/AudioAnalysis/extractPreview.js +25 -0
- package/build/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/ExpoAudioStream.types.d.ts +329 -3
- package/build/ExpoAudioStream.types.d.ts.map +1 -1
- package/build/ExpoAudioStream.types.js.map +1 -1
- package/build/ExpoAudioStreamModule.d.ts.map +1 -1
- package/build/ExpoAudioStreamModule.js +455 -1
- package/build/ExpoAudioStreamModule.js.map +1 -1
- package/build/WebRecorder.web.js +2 -2
- package/build/WebRecorder.web.js.map +1 -1
- package/build/index.d.ts +6 -3
- package/build/index.d.ts.map +1 -1
- package/build/index.js +6 -2
- package/build/index.js.map +1 -1
- package/build/trimAudio.d.ts +25 -0
- package/build/trimAudio.d.ts.map +1 -0
- package/build/trimAudio.js +67 -0
- package/build/trimAudio.js.map +1 -0
- package/ios/AudioProcessor.swift +536 -81
- package/ios/ExpoAudioStreamModule.swift +125 -18
- package/package.json +1 -1
- package/plugin/build/index.js +6 -1
- package/plugin/src/index.ts +9 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +38 -1
- package/src/AudioAnalysis/extractAudioAnalysis.ts +1 -38
- package/src/AudioAnalysis/extractAudioData.ts +6 -0
- package/src/AudioAnalysis/extractMelSpectrogram.ts +144 -0
- package/src/AudioAnalysis/extractPreview.ts +34 -0
- package/src/ExpoAudioStream.types.ts +354 -42
- package/src/ExpoAudioStreamModule.ts +682 -1
- package/src/WebRecorder.web.ts +2 -2
- package/src/index.ts +7 -8
- package/src/trimAudio.ts +90 -0
package/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
## [2.1.0] - 2025-03-04
|
|
12
|
+
### Changed
|
|
13
|
+
- feat(docs): enhance audio processing documentation and examples (#158) ([26afd49](https://github.com/deeeed/expo-audio-stream/commit/26afd4938e1c626294f40b50a84fe15f5c2bb6a1))
|
|
14
|
+
- feat: Add Mel Spectrogram Extraction and Language Detection to Audio Processing (#157) ([4129dee](https://github.com/deeeed/expo-audio-stream/commit/4129dee87c27dd5a9911c85e3dbf045507876cc1))
|
|
15
|
+
- feat: enhance audio import functionality and decibel visualization (#156) ([2dbecc7](https://github.com/deeeed/expo-audio-stream/commit/2dbecc7bd0ea46edd80c2b0e28dd2a0525953362))
|
|
16
|
+
- feat(trim): Implement iOS trim support with custom filename and format improvements (#152) ([dd49be4](https://github.com/deeeed/expo-audio-stream/commit/dd49be42bccbf3ae6cced8c3662237e1668ec2de))
|
|
17
|
+
- feat: Add Sample Rate Control and Web Trimming Support to Expo Audio Stream (#151) ([9158eec](https://github.com/deeeed/expo-audio-stream/commit/9158eeccc10e25ac77ba3a99185b4dbc5abfb353))
|
|
18
|
+
- feat: Enhance audio trimming with optimized processing and detailed feedback (#150) ([41a6945](https://github.com/deeeed/expo-audio-stream/commit/41a694528d1e803dc0012948eec4edfdc336b4fc))
|
|
19
|
+
- feat(trim): add audio trimming functionality with visualization and preview (Android only) (#149) ([cba03dc](https://github.com/deeeed/expo-audio-stream/commit/cba03dc920eb8a1f111b45e8404a42e48076b7cd))
|
|
20
|
+
- chore(expo-audio-stream): release @siteed/expo-audio-stream@2.0.1 ([c77cfc8](https://github.com/deeeed/expo-audio-stream/commit/c77cfc8b70f87a12bb19fa03b245cda7ed2496e1))
|
|
21
|
+
## [2.0.1] - 2025-02-27
|
|
22
|
+
### Changed
|
|
23
|
+
- refactor: update background mode handling for audio stream plugin ([e7e98cc](https://github.com/deeeed/expo-audio-stream/commit/e7e98cc60b7965770dcf25e9ae74cb356e1e7097))
|
|
24
|
+
- chore(expo-audio-stream): release @siteed/expo-audio-stream@2.0.0 ([356d3f4](https://github.com/deeeed/expo-audio-stream/commit/356d3f40ffb66806eeecb86d12bcbe5d60b7eea6))
|
|
11
25
|
## [2.0.0] - 2025-02-27
|
|
12
26
|
### Changed
|
|
13
27
|
- feat(playground): Enhance Audio Playground with Improved UX and Sample Audio Loading (#148) ([09d2794](https://github.com/deeeed/expo-audio-stream/commit/09d27940dcffa60e662c828742f4577bca5327f9))
|
|
@@ -154,7 +168,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
154
168
|
- Feature: Audio features extraction during recording.
|
|
155
169
|
- Feature: Consistent WAV PCM recording format across all platforms.
|
|
156
170
|
|
|
157
|
-
[unreleased]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.
|
|
171
|
+
[unreleased]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.1.0...HEAD
|
|
172
|
+
[2.1.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.0.1...@siteed/expo-audio-stream@2.1.0
|
|
173
|
+
[2.0.1]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.0.0...@siteed/expo-audio-stream@2.0.1
|
|
158
174
|
[2.0.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.17.0...@siteed/expo-audio-stream@2.0.0
|
|
159
175
|
[1.17.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.16.0...@siteed/expo-audio-stream@1.17.0
|
|
160
176
|
[1.16.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.15.1...@siteed/expo-audio-stream@1.16.0
|
package/README.md
CHANGED
|
@@ -48,16 +48,217 @@
|
|
|
48
48
|
- Background audio recording on iOS.
|
|
49
49
|
- Audio features extraction during recording.
|
|
50
50
|
- Consistent WAV PCM recording format across all platforms.
|
|
51
|
-
- Keep recording
|
|
51
|
+
- Keep recording active while app is in background
|
|
52
52
|
- Rich notification system for recording status:
|
|
53
53
|
- Android: Live waveform visualization in notifications
|
|
54
54
|
- Android: Fully customizable notification appearance and actions
|
|
55
55
|
- iOS: Media player integration
|
|
56
|
+
- Advanced audio analysis capabilities:
|
|
57
|
+
- Mel spectrogram generation for machine learning and visualization
|
|
58
|
+
- Comprehensive audio feature extraction (MFCC, spectral features, etc.)
|
|
59
|
+
- Lightweight waveform preview generation
|
|
60
|
+
- Precision audio manipulation:
|
|
61
|
+
- Advanced audio splitting and trimming API
|
|
62
|
+
- Support for trimming multiple segments in a single operation
|
|
63
|
+
- Ability to keep or remove specific time ranges
|
|
64
|
+
- Complete ecosystem:
|
|
65
|
+
- Full-featured AudioPlayground application showcasing advanced API usage
|
|
66
|
+
- Ready-to-use UI components via [@siteed/expo-audio-ui](https://github.com/deeeed/expo-audio-stream/tree/main/packages/expo-audio-ui) package
|
|
67
|
+
- Visualizations, waveforms, and audio controls that can be directly incorporated into your app
|
|
68
|
+
|
|
69
|
+
## Audio Analysis Features
|
|
70
|
+
|
|
71
|
+
Extract powerful audio features for advanced audio processing and visualization:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
// Extract audio analysis with specific features enabled
|
|
75
|
+
const analysis = await extractAudioAnalysis({
|
|
76
|
+
fileUri: 'path/to/recording.wav',
|
|
77
|
+
features: {
|
|
78
|
+
energy: true, // Overall energy of the audio
|
|
79
|
+
rms: true, // Root mean square (amplitude)
|
|
80
|
+
zcr: true, // Zero-crossing rate
|
|
81
|
+
mfcc: true, // Mel-frequency cepstral coefficients
|
|
82
|
+
spectralCentroid: true, // Brightness of sound
|
|
83
|
+
tempo: true, // Estimated BPM
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Available Audio Features
|
|
89
|
+
|
|
90
|
+
- **Basic Analysis**: RMS, energy, amplitude range, zero-crossing rate
|
|
91
|
+
- **Spectral Features**: Spectral centroid, flatness, rolloff, bandwidth
|
|
92
|
+
- **Advanced Analysis**:
|
|
93
|
+
- MFCC (Mel-frequency cepstral coefficients)
|
|
94
|
+
- Chromagram (pitch class representation)
|
|
95
|
+
- Mel Spectrogram
|
|
96
|
+
- Harmonics-to-noise ratio
|
|
97
|
+
- Tempo estimation
|
|
98
|
+
- Pitch detection
|
|
99
|
+
|
|
100
|
+
### Use Cases
|
|
101
|
+
|
|
102
|
+
- Visualize audio waveforms with detailed metrics
|
|
103
|
+
- Implement speech recognition preprocessing
|
|
104
|
+
- Create music analysis applications
|
|
105
|
+
- Build audio fingerprinting systems
|
|
106
|
+
- Develop voice activity detection
|
|
107
|
+
|
|
108
|
+
## API Overview
|
|
109
|
+
|
|
110
|
+
The library provides several specialized APIs for different audio processing needs:
|
|
111
|
+
|
|
112
|
+
### Recording and Playback
|
|
113
|
+
|
|
114
|
+
- **useAudioRecorder**: Hook for recording audio with configurable quality settings
|
|
115
|
+
- **AudioRecorderProvider**: Context provider for sharing recording state across components
|
|
116
|
+
- **useSharedAudioRecorder**: Hook to access shared recording state from any component
|
|
117
|
+
|
|
118
|
+
```typescript
|
|
119
|
+
// Start a new recording with configuration
|
|
120
|
+
const { startRecording, stopRecording, isRecording, recordingUri } = useAudioRecorder({
|
|
121
|
+
audioQuality: 'high',
|
|
122
|
+
sampleRate: 44100,
|
|
123
|
+
numberOfChannels: 2,
|
|
124
|
+
bitDepth: 16,
|
|
125
|
+
outputFormat: 'wav',
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// Share recording state across components
|
|
129
|
+
const AudioApp = () => (
|
|
130
|
+
<AudioRecorderProvider>
|
|
131
|
+
<RecordButton />
|
|
132
|
+
<AudioVisualizer />
|
|
133
|
+
</AudioRecorderProvider>
|
|
134
|
+
);
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Audio Analysis
|
|
138
|
+
|
|
139
|
+
- **extractAudioAnalysis**: Extract comprehensive audio features for detailed analysis
|
|
140
|
+
- **extractPreview**: Generate lightweight waveform data for visualization
|
|
141
|
+
- **extractAudioData**: Extract raw PCM data for custom processing
|
|
142
|
+
- **extractRawWavAnalysis**: Analyze WAV files without decoding, preserving original PCM values
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
// Extract detailed audio analysis with feature extraction
|
|
146
|
+
const analysis = await extractAudioAnalysis({
|
|
147
|
+
fileUri: 'path/to/recording.wav',
|
|
148
|
+
features: { rms: true, zcr: true, mfcc: true }
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// Generate a lightweight waveform preview
|
|
152
|
+
const preview = await extractPreview({
|
|
153
|
+
fileUri: 'path/to/recording.wav',
|
|
154
|
+
pointsPerSecond: 50
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
// Extract raw PCM data for custom processing
|
|
158
|
+
const audioData = await extractAudioData({
|
|
159
|
+
fileUri: 'path/to/recording.wav',
|
|
160
|
+
includeWavHeader: true
|
|
161
|
+
});
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
#### Choosing the Right Audio Analysis Method
|
|
165
|
+
|
|
166
|
+
| Method | Purpose | Performance | Use When |
|
|
167
|
+
|--------|---------|-------------|----------|
|
|
168
|
+
| `extractAudioAnalysis` | Comprehensive audio feature extraction | Medium-Heavy | You need detailed audio features like MFCC, spectral features |
|
|
169
|
+
| `extractPreview` | Lightweight waveform visualization | Very Light | You only need amplitude data for visualization |
|
|
170
|
+
| `extractAudioData` | Raw PCM data extraction | Medium | You need the raw audio data for custom processing |
|
|
171
|
+
| `extractRawWavAnalysis` | WAV analysis without decoding | Light | You want to analyze WAV files while preserving original values |
|
|
172
|
+
| `extractMelSpectrogram` | Mel spectrogram generation | Heavy | You need frequency-domain representation for ML or visualization |
|
|
173
|
+
|
|
174
|
+
### Specialized Audio Processing
|
|
175
|
+
|
|
176
|
+
- **extractMelSpectrogram**: Generate mel spectrogram for audio visualization or ML models
|
|
177
|
+
- **trimAudio**: Trim audio files with precision, supporting multiple segments and formats
|
|
178
|
+
|
|
179
|
+
```typescript
|
|
180
|
+
// Generate mel spectrogram for audio visualization or ML models
|
|
181
|
+
const melSpectrogram = await extractMelSpectrogram({
|
|
182
|
+
fileUri: 'path/to/recording.wav',
|
|
183
|
+
windowSizeMs: 25,
|
|
184
|
+
hopLengthMs: 10,
|
|
185
|
+
nMels: 40
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// Trim audio files with precision
|
|
189
|
+
const trimmedAudio = await trimAudio({
|
|
190
|
+
fileUri: 'path/to/recording.wav',
|
|
191
|
+
startTimeMs: 1000,
|
|
192
|
+
endTimeMs: 5000,
|
|
193
|
+
outputFormat: { format: 'wav' }
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Trim multiple segments from an audio file
|
|
197
|
+
const compiledAudio = await trimAudio({
|
|
198
|
+
fileUri: 'path/to/recording.wav',
|
|
199
|
+
mode: 'keep',
|
|
200
|
+
ranges: [
|
|
201
|
+
{ startTimeMs: 1000, endTimeMs: 5000 },
|
|
202
|
+
{ startTimeMs: 10000, endTimeMs: 15000 }
|
|
203
|
+
]
|
|
204
|
+
});
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Utility Functions
|
|
208
|
+
|
|
209
|
+
- **convertPCMToFloat32**: Convert PCM data to Float32Array for processing
|
|
210
|
+
- **getWavFileInfo**: Extract metadata from WAV files
|
|
211
|
+
- **writeWavHeader**: Create WAV headers for raw PCM data
|
|
212
|
+
|
|
213
|
+
### Low-Level Access
|
|
214
|
+
|
|
215
|
+
For advanced use cases, the library provides direct access to the native module:
|
|
216
|
+
|
|
217
|
+
```typescript
|
|
218
|
+
import { ExpoAudioStreamModule } from '@siteed/expo-audio-stream';
|
|
219
|
+
|
|
220
|
+
// Access platform-specific functionality
|
|
221
|
+
const status = await ExpoAudioStreamModule.status();
|
|
222
|
+
const permissions = await ExpoAudioStreamModule.getPermissionsAsync();
|
|
223
|
+
```
|
|
56
224
|
|
|
57
225
|
## Documentation
|
|
58
226
|
|
|
59
227
|
For detailed documentation, please refer to the [Getting Started Guide](https://deeeed.github.io/expo-audio-stream/docs/).
|
|
60
228
|
|
|
229
|
+
## Companion Resources
|
|
230
|
+
|
|
231
|
+
### AudioPlayground Application
|
|
232
|
+
|
|
233
|
+
The repository includes a complete AudioPlayground application that demonstrates advanced usage of the API. This playground serves as both a demonstration and a learning resource:
|
|
234
|
+
|
|
235
|
+
- Interactive examples of all major API features
|
|
236
|
+
- Real-time audio visualization and analysis
|
|
237
|
+
- Code samples you can directly reference for your own implementation
|
|
238
|
+
|
|
239
|
+
Try it online at [https://deeeed.github.io/expo-audio-stream/playground](https://deeeed.github.io/expo-audio-stream/playground) or run it locally from the repository.
|
|
240
|
+
|
|
241
|
+
### UI Components Package
|
|
242
|
+
|
|
243
|
+
The [@siteed/expo-audio-ui](https://github.com/deeeed/expo-audio-stream/tree/main/packages/expo-audio-ui) package provides ready-to-use UI components for audio applications:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Install the UI components package
|
|
247
|
+
npm install @siteed/expo-audio-ui
|
|
248
|
+
|
|
249
|
+
# or with yarn
|
|
250
|
+
yarn add @siteed/expo-audio-ui
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
This package includes:
|
|
254
|
+
- Waveform visualizers
|
|
255
|
+
- Audio recording controls
|
|
256
|
+
- Playback components
|
|
257
|
+
- Spectrogram displays
|
|
258
|
+
- And more!
|
|
259
|
+
|
|
260
|
+
All components are built with React Native, Reanimated, and Skia for optimal performance across platforms.
|
|
261
|
+
|
|
61
262
|
## License
|
|
62
263
|
|
|
63
264
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
// packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt
|
|
2
|
+
// packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt
|
|
2
3
|
package net.siteed.audiostream
|
|
3
4
|
|
|
4
5
|
import java.nio.ByteBuffer
|
|
@@ -22,6 +23,12 @@ data class DecodingConfig(
|
|
|
22
23
|
val normalizeAudio: Boolean = false // Whether to normalize audio levels
|
|
23
24
|
)
|
|
24
25
|
|
|
26
|
+
data class SpectrogramData(
|
|
27
|
+
val spectrogram: Array<FloatArray>, // 2D array: [time, frequency]
|
|
28
|
+
val timeStamps: FloatArray, // Time (in seconds) for each frame
|
|
29
|
+
val frequencies: FloatArray // Frequencies (in Hz) for each mel bin
|
|
30
|
+
)
|
|
31
|
+
|
|
25
32
|
class AudioProcessor(private val filesDir: File) {
|
|
26
33
|
companion object {
|
|
27
34
|
const val DCT_SQRT_DIVISOR = 2.0
|
|
@@ -998,7 +1005,7 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
998
1005
|
return a + fraction * (b - a)
|
|
999
1006
|
}
|
|
1000
1007
|
|
|
1001
|
-
|
|
1008
|
+
fun processAudio(
|
|
1002
1009
|
pcmData: ByteArray,
|
|
1003
1010
|
originalSampleRate: Int,
|
|
1004
1011
|
targetSampleRate: Int?,
|
|
@@ -1625,6 +1632,187 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
1625
1632
|
return output
|
|
1626
1633
|
}
|
|
1627
1634
|
|
|
1635
|
+
// Generate a Hann window of a specific size (new, avoids modifying applyHannWindow)
|
|
1636
|
+
private fun generateHannWindow(size: Int): FloatArray {
|
|
1637
|
+
return FloatArray(size) { i ->
|
|
1638
|
+
0.5f * (1f - cos(2f * PI.toFloat() * i / (size - 1)))
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
// Main function to extract mel spectrogram
|
|
1643
|
+
fun extractMelSpectrogram(
|
|
1644
|
+
audioData: AudioData,
|
|
1645
|
+
windowSizeMs: Float = 25f, // Default 25ms window
|
|
1646
|
+
hopLengthMs: Float = 10f, // Default 10ms hop
|
|
1647
|
+
nMels: Int = 128, // Number of mel bins
|
|
1648
|
+
fftLength: Int = 2048, // FFT size
|
|
1649
|
+
fMin: Float = 0f, // Minimum frequency
|
|
1650
|
+
fMax: Float = audioData.sampleRate.toFloat() / 2, // Nyquist frequency
|
|
1651
|
+
windowType: String = "hann", // Add parameter
|
|
1652
|
+
logScaling: Boolean = true, // Apply log scaling
|
|
1653
|
+
normalize: Boolean = false // Normalize output
|
|
1654
|
+
): SpectrogramData {
|
|
1655
|
+
val sampleRate = audioData.sampleRate.toFloat()
|
|
1656
|
+
val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
|
|
1657
|
+
|
|
1658
|
+
// Convert ms to samples
|
|
1659
|
+
val windowSizeSamples = (windowSizeMs * sampleRate / 1000).toInt()
|
|
1660
|
+
val hopLengthSamples = (hopLengthMs * sampleRate / 1000).toInt()
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
val window = when (windowType.lowercase()) {
|
|
1664
|
+
"hann" -> generateHannWindow(windowSizeSamples)
|
|
1665
|
+
"hamming" -> FloatArray(windowSizeSamples) { i ->
|
|
1666
|
+
0.54f - 0.46f * cos(2f * PI.toFloat() * i / (windowSizeSamples - 1))
|
|
1667
|
+
}
|
|
1668
|
+
else -> throw IllegalArgumentException("Unsupported windowType: $windowType")
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
// Compute STFT
|
|
1672
|
+
val stft = computeSTFT(samples, fftLength, windowSizeSamples, hopLengthSamples, window)
|
|
1673
|
+
|
|
1674
|
+
// Apply mel filterbank
|
|
1675
|
+
val melSpectrogram = applyMelFilterbank(stft, sampleRate, nMels, fftLength, fMin, fMax)
|
|
1676
|
+
|
|
1677
|
+
// Post-processing: log scaling and normalization
|
|
1678
|
+
if (logScaling) {
|
|
1679
|
+
for (i in melSpectrogram.indices) {
|
|
1680
|
+
for (j in melSpectrogram[i].indices) {
|
|
1681
|
+
melSpectrogram[i][j] = ln(max(1e-10f, melSpectrogram[i][j])).toFloat()
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
if (normalize) {
|
|
1686
|
+
// Find min and max values across the entire spectrogram
|
|
1687
|
+
var minVal = Float.MAX_VALUE
|
|
1688
|
+
var maxVal = Float.MIN_VALUE
|
|
1689
|
+
|
|
1690
|
+
for (i in melSpectrogram.indices) {
|
|
1691
|
+
for (j in melSpectrogram[i].indices) {
|
|
1692
|
+
val value = melSpectrogram[i][j]
|
|
1693
|
+
if (value < minVal) minVal = value
|
|
1694
|
+
if (value > maxVal) maxVal = value
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
val range = maxVal - minVal
|
|
1699
|
+
if (range > 0) {
|
|
1700
|
+
for (i in melSpectrogram.indices) {
|
|
1701
|
+
for (j in melSpectrogram[i].indices) {
|
|
1702
|
+
melSpectrogram[i][j] = (melSpectrogram[i][j] - minVal) / range
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
// Compute timestamps and frequencies for metadata
|
|
1709
|
+
val numFrames = melSpectrogram.size
|
|
1710
|
+
val timeStamps = FloatArray(numFrames) { it * hopLengthMs / 1000f }
|
|
1711
|
+
val frequencies = melFrequencies(nMels, fMin, fMax)
|
|
1712
|
+
|
|
1713
|
+
return SpectrogramData(melSpectrogram, timeStamps, frequencies)
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
// Compute Short-Time Fourier Transform
|
|
1717
|
+
private fun computeSTFT(
|
|
1718
|
+
samples: FloatArray,
|
|
1719
|
+
fftLength: Int,
|
|
1720
|
+
windowSize: Int,
|
|
1721
|
+
hopLength: Int,
|
|
1722
|
+
window: FloatArray
|
|
1723
|
+
): Array<FloatArray> {
|
|
1724
|
+
val fft = FFT(fftLength)
|
|
1725
|
+
val numFrames = ((samples.size - windowSize) / hopLength) + 1
|
|
1726
|
+
val stft = Array(numFrames) { FloatArray(fftLength / 2 + 1) }
|
|
1727
|
+
|
|
1728
|
+
for (frameIdx in 0 until numFrames) {
|
|
1729
|
+
val start = frameIdx * hopLength
|
|
1730
|
+
val end = minOf(start + windowSize, samples.size)
|
|
1731
|
+
val frame = FloatArray(fftLength) { 0f }
|
|
1732
|
+
|
|
1733
|
+
// Extract and window the frame
|
|
1734
|
+
for (i in start until end) {
|
|
1735
|
+
frame[i - start] = samples[i] * window[i - start]
|
|
1736
|
+
}
|
|
1737
|
+
|
|
1738
|
+
// Compute FFT and power spectrum
|
|
1739
|
+
val fftResult = fft.processSegment(frame)
|
|
1740
|
+
for (i in 0 until fftLength / 2 + 1) {
|
|
1741
|
+
// Check bounds before accessing array elements
|
|
1742
|
+
val real = if (2 * i < fftResult.size) fftResult[2 * i] else 0f
|
|
1743
|
+
val imag = if (2 * i + 1 < fftResult.size) fftResult[2 * i + 1] else 0f
|
|
1744
|
+
stft[frameIdx][i] = real * real + imag * imag
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
return stft
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
// Apply mel filterbank to STFT
|
|
1751
|
+
private fun applyMelFilterbank(
|
|
1752
|
+
stft: Array<FloatArray>,
|
|
1753
|
+
sampleRate: Float,
|
|
1754
|
+
nMels: Int,
|
|
1755
|
+
fftLength: Int,
|
|
1756
|
+
fMin: Float,
|
|
1757
|
+
fMax: Float
|
|
1758
|
+
): Array<FloatArray> {
|
|
1759
|
+
val numFrames = stft.size
|
|
1760
|
+
val numBins = stft[0].size
|
|
1761
|
+
val melFilters = createMelFilterbank(sampleRate, fftLength, nMels, fMin, fMax)
|
|
1762
|
+
val melSpectrogram = Array(numFrames) { FloatArray(nMels) }
|
|
1763
|
+
|
|
1764
|
+
for (frame in 0 until numFrames) {
|
|
1765
|
+
for (melBin in 0 until nMels) {
|
|
1766
|
+
var sum = 0f
|
|
1767
|
+
for (bin in 0 until numBins) {
|
|
1768
|
+
sum += stft[frame][bin] * melFilters[melBin][bin]
|
|
1769
|
+
}
|
|
1770
|
+
melSpectrogram[frame][melBin] = sum
|
|
1771
|
+
}
|
|
1772
|
+
}
|
|
1773
|
+
return melSpectrogram
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
// Create mel filterbank matrix
|
|
1777
|
+
private fun createMelFilterbank(
|
|
1778
|
+
sampleRate: Float,
|
|
1779
|
+
fftLength: Int,
|
|
1780
|
+
nMels: Int,
|
|
1781
|
+
fMin: Float,
|
|
1782
|
+
fMax: Float
|
|
1783
|
+
): Array<FloatArray> {
|
|
1784
|
+
val freqs = FloatArray(fftLength / 2 + 1) { it * sampleRate / fftLength }
|
|
1785
|
+
val melPoints = melFrequencies(nMels + 2, fMin, fMax)
|
|
1786
|
+
val melFilters = Array(nMels) { FloatArray(fftLength / 2 + 1) }
|
|
1787
|
+
|
|
1788
|
+
for (melIdx in 0 until nMels) {
|
|
1789
|
+
val fLow = melPoints[melIdx]
|
|
1790
|
+
val fCenter = melPoints[melIdx + 1]
|
|
1791
|
+
val fHigh = melPoints[melIdx + 2]
|
|
1792
|
+
|
|
1793
|
+
for (bin in freqs.indices) {
|
|
1794
|
+
val freq = freqs[bin]
|
|
1795
|
+
melFilters[melIdx][bin] = when {
|
|
1796
|
+
freq < fLow || freq > fHigh -> 0f
|
|
1797
|
+
freq <= fCenter -> (freq - fLow) / (fCenter - fLow)
|
|
1798
|
+
else -> (fHigh - freq) / (fHigh - fCenter)
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
return melFilters
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1805
|
+
// Generate mel-spaced frequencies
|
|
1806
|
+
private fun melFrequencies(nMels: Int, fMin: Float, fMax: Float): FloatArray {
|
|
1807
|
+
val melMin = hzToMel(fMin)
|
|
1808
|
+
val melMax = hzToMel(fMax)
|
|
1809
|
+
val melPoints = FloatArray(nMels) { i ->
|
|
1810
|
+
val mel = melMin + i * (melMax - melMin) / (nMels - 1)
|
|
1811
|
+
melToHz(mel)
|
|
1812
|
+
}
|
|
1813
|
+
return melPoints
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1628
1816
|
private fun computeMelSpectrogram(samples: FloatArray, sampleRate: Float): List<Float> {
|
|
1629
1817
|
val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
|
|
1630
1818
|
val melFilters = computeMelFilterbank(
|
|
@@ -1933,4 +2121,115 @@ class AudioProcessor(private val filesDir: File) {
|
|
|
1933
2121
|
return null
|
|
1934
2122
|
}
|
|
1935
2123
|
}
|
|
2124
|
+
|
|
2125
|
+
/**
|
|
2126
|
+
* Decodes a specific time range of an audio file directly to PCM data
|
|
2127
|
+
* This is more efficient than decoding the entire file when only a portion is needed
|
|
2128
|
+
*/
|
|
2129
|
+
fun decodeAudioRangeToPCM(fileUri: String, startTimeMs: Long, endTimeMs: Long): AudioData? {
|
|
2130
|
+
val extractor = MediaExtractor()
|
|
2131
|
+
var decoder: android.media.MediaCodec? = null
|
|
2132
|
+
|
|
2133
|
+
try {
|
|
2134
|
+
extractor.setDataSource(fileUri)
|
|
2135
|
+
val trackIndex = (0 until extractor.trackCount).find {
|
|
2136
|
+
extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
|
|
2137
|
+
} ?: return null
|
|
2138
|
+
|
|
2139
|
+
extractor.selectTrack(trackIndex)
|
|
2140
|
+
val format = extractor.getTrackFormat(trackIndex)
|
|
2141
|
+
|
|
2142
|
+
val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
|
2143
|
+
val channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
|
|
2144
|
+
decoder = android.media.MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
|
|
2145
|
+
decoder.configure(format, null, null, 0)
|
|
2146
|
+
decoder.start()
|
|
2147
|
+
|
|
2148
|
+
extractor.seekTo(startTimeMs * 1000, MediaExtractor.SEEK_TO_PREVIOUS_SYNC)
|
|
2149
|
+
val pcmData = mutableListOf<Byte>()
|
|
2150
|
+
val bufferInfo = android.media.MediaCodec.BufferInfo()
|
|
2151
|
+
var isEOS = false
|
|
2152
|
+
var firstBufferTimeUs: Long? = null
|
|
2153
|
+
|
|
2154
|
+
while (!isEOS) {
|
|
2155
|
+
val inputBufferId = decoder.dequeueInputBuffer(10000)
|
|
2156
|
+
if (inputBufferId >= 0) {
|
|
2157
|
+
val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
|
|
2158
|
+
val sampleSize = extractor.readSampleData(inputBuffer, 0)
|
|
2159
|
+
if (sampleSize < 0 || extractor.sampleTime > endTimeMs * 1000) {
|
|
2160
|
+
decoder.queueInputBuffer(inputBufferId, 0, 0, 0, android.media.MediaCodec.BUFFER_FLAG_END_OF_STREAM)
|
|
2161
|
+
isEOS = true
|
|
2162
|
+
} else {
|
|
2163
|
+
decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
|
|
2164
|
+
extractor.advance()
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
|
|
2168
|
+
val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
|
|
2169
|
+
if (outputBufferId >= 0) {
|
|
2170
|
+
val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
|
|
2171
|
+
if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
|
|
2172
|
+
val chunk = ByteArray(bufferInfo.size)
|
|
2173
|
+
outputBuffer.get(chunk)
|
|
2174
|
+
pcmData.addAll(chunk.toList())
|
|
2175
|
+
decoder.releaseOutputBuffer(outputBufferId, false)
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
|
|
2179
|
+
// If we didn't get any data or first buffer time, return null
|
|
2180
|
+
if (pcmData.isEmpty() || firstBufferTimeUs == null) {
|
|
2181
|
+
return null
|
|
2182
|
+
}
|
|
2183
|
+
|
|
2184
|
+
// Trim PCM data to exact time range
|
|
2185
|
+
val bytesPerSample = 2 // 16-bit PCM
|
|
2186
|
+
val bytesPerFrame = bytesPerSample * channels
|
|
2187
|
+
val samplesPerSecond = sampleRate * channels
|
|
2188
|
+
val dt = 1_000_000.0 / sampleRate // Time per sample in microseconds
|
|
2189
|
+
|
|
2190
|
+
val allSamples = java.nio.ByteBuffer.wrap(pcmData.toByteArray()).order(java.nio.ByteOrder.LITTLE_ENDIAN).asShortBuffer()
|
|
2191
|
+
val totalSamples = allSamples.capacity()
|
|
2192
|
+
|
|
2193
|
+
// Calculate sample indices for the exact time range
|
|
2194
|
+
val startSample = ((startTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(0, totalSamples)
|
|
2195
|
+
val endSample = ((endTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(startSample, totalSamples)
|
|
2196
|
+
|
|
2197
|
+
// Create a new ShortBuffer view starting at the correct position
|
|
2198
|
+
allSamples.position(startSample)
|
|
2199
|
+
val trimmedSamples = ShortArray(endSample - startSample)
|
|
2200
|
+
for (i in trimmedSamples.indices) {
|
|
2201
|
+
trimmedSamples[i] = allSamples.get()
|
|
2202
|
+
}
|
|
2203
|
+
|
|
2204
|
+
// Convert ShortArray to ByteArray
|
|
2205
|
+
val trimmedBytes = ByteArray(trimmedSamples.size * 2)
|
|
2206
|
+
val byteBuffer = java.nio.ByteBuffer.wrap(trimmedBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
|
|
2207
|
+
val shortBuffer = byteBuffer.asShortBuffer()
|
|
2208
|
+
shortBuffer.put(trimmedSamples)
|
|
2209
|
+
|
|
2210
|
+
return AudioData(
|
|
2211
|
+
data = trimmedBytes,
|
|
2212
|
+
sampleRate = sampleRate,
|
|
2213
|
+
channels = channels,
|
|
2214
|
+
bitDepth = 16, // MediaCodec typically decodes to 16-bit PCM
|
|
2215
|
+
durationMs = endTimeMs - startTimeMs
|
|
2216
|
+
)
|
|
2217
|
+
} catch (e: Exception) {
|
|
2218
|
+
Log.e(Constants.TAG, "Failed to decode audio range: ${e.message}", e)
|
|
2219
|
+
return null
|
|
2220
|
+
} finally {
|
|
2221
|
+
try {
|
|
2222
|
+
decoder?.stop()
|
|
2223
|
+
decoder?.release()
|
|
2224
|
+
} catch (e: Exception) {
|
|
2225
|
+
Log.w(Constants.TAG, "Error releasing decoder: ${e.message}")
|
|
2226
|
+
}
|
|
2227
|
+
|
|
2228
|
+
try {
|
|
2229
|
+
extractor.release()
|
|
2230
|
+
} catch (e: Exception) {
|
|
2231
|
+
Log.w(Constants.TAG, "Error releasing extractor: ${e.message}")
|
|
2232
|
+
}
|
|
2233
|
+
}
|
|
2234
|
+
}
|
|
1936
2235
|
}
|
|
@@ -11,8 +11,6 @@ import android.os.Looper
|
|
|
11
11
|
import expo.modules.kotlin.Promise
|
|
12
12
|
import android.app.NotificationChannel
|
|
13
13
|
import android.app.NotificationManager
|
|
14
|
-
import android.os.Build.VERSION_CODES
|
|
15
|
-
import android.app.Notification
|
|
16
14
|
import androidx.core.app.NotificationCompat
|
|
17
15
|
|
|
18
16
|
class AudioRecordingService : Service() {
|
|
@@ -27,6 +25,8 @@ class AudioRecordingService : Service() {
|
|
|
27
25
|
override fun onCreate() {
|
|
28
26
|
super.onCreate()
|
|
29
27
|
Log.d(Constants.TAG, "AudioRecordingService onCreate")
|
|
28
|
+
isRunning = true
|
|
29
|
+
setServiceRunning(true)
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
|
|
@@ -84,6 +84,7 @@ class AudioRecordingService : Service() {
|
|
|
84
84
|
stopForeground(STOP_FOREGROUND_REMOVE)
|
|
85
85
|
|
|
86
86
|
isRunning = false
|
|
87
|
+
setServiceRunning(false)
|
|
87
88
|
super.onDestroy()
|
|
88
89
|
}
|
|
89
90
|
|
|
@@ -122,6 +123,17 @@ class AudioRecordingService : Service() {
|
|
|
122
123
|
}
|
|
123
124
|
|
|
124
125
|
companion object {
|
|
126
|
+
// Static flag to track if service is running
|
|
127
|
+
private var isServiceRunningStatic = false
|
|
128
|
+
|
|
129
|
+
fun isServiceRunning(): Boolean {
|
|
130
|
+
return isServiceRunningStatic
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fun setServiceRunning(running: Boolean) {
|
|
134
|
+
isServiceRunningStatic = running
|
|
135
|
+
}
|
|
136
|
+
|
|
125
137
|
fun startService(context: Context) {
|
|
126
138
|
val serviceIntent = Intent(context, AudioRecordingService::class.java)
|
|
127
139
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
|
@@ -129,10 +141,12 @@ class AudioRecordingService : Service() {
|
|
|
129
141
|
} else {
|
|
130
142
|
context.startService(serviceIntent)
|
|
131
143
|
}
|
|
144
|
+
setServiceRunning(true)
|
|
132
145
|
}
|
|
133
146
|
|
|
134
147
|
fun stopService(context: Context) {
|
|
135
148
|
context.stopService(Intent(context, AudioRecordingService::class.java))
|
|
149
|
+
setServiceRunning(false)
|
|
136
150
|
}
|
|
137
151
|
}
|
|
138
152
|
}
|