@siteed/expo-audio-stream 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.md +202 -1
  3. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +300 -1
  4. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +16 -2
  5. package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +1099 -0
  6. package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
  7. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +274 -44
  8. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +35 -0
  9. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  10. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  11. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +2 -12
  12. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  13. package/build/AudioAnalysis/extractAudioAnalysis.js +0 -26
  14. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  15. package/build/AudioAnalysis/extractAudioData.d.ts +3 -0
  16. package/build/AudioAnalysis/extractAudioData.d.ts.map +1 -0
  17. package/build/AudioAnalysis/extractAudioData.js +5 -0
  18. package/build/AudioAnalysis/extractAudioData.js.map +1 -0
  19. package/build/AudioAnalysis/extractMelSpectrogram.d.ts +14 -0
  20. package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
  21. package/build/AudioAnalysis/extractMelSpectrogram.js +85 -0
  22. package/build/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
  23. package/build/AudioAnalysis/extractPreview.d.ts +11 -0
  24. package/build/AudioAnalysis/extractPreview.d.ts.map +1 -0
  25. package/build/AudioAnalysis/extractPreview.js +25 -0
  26. package/build/AudioAnalysis/extractPreview.js.map +1 -0
  27. package/build/ExpoAudioStream.types.d.ts +329 -3
  28. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  29. package/build/ExpoAudioStream.types.js.map +1 -1
  30. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  31. package/build/ExpoAudioStreamModule.js +455 -1
  32. package/build/ExpoAudioStreamModule.js.map +1 -1
  33. package/build/WebRecorder.web.js +2 -2
  34. package/build/WebRecorder.web.js.map +1 -1
  35. package/build/index.d.ts +6 -3
  36. package/build/index.d.ts.map +1 -1
  37. package/build/index.js +6 -2
  38. package/build/index.js.map +1 -1
  39. package/build/trimAudio.d.ts +25 -0
  40. package/build/trimAudio.d.ts.map +1 -0
  41. package/build/trimAudio.js +67 -0
  42. package/build/trimAudio.js.map +1 -0
  43. package/ios/AudioProcessor.swift +536 -81
  44. package/ios/ExpoAudioStreamModule.swift +125 -18
  45. package/package.json +1 -1
  46. package/plugin/build/index.js +6 -1
  47. package/plugin/src/index.ts +9 -1
  48. package/src/AudioAnalysis/AudioAnalysis.types.ts +38 -1
  49. package/src/AudioAnalysis/extractAudioAnalysis.ts +1 -38
  50. package/src/AudioAnalysis/extractAudioData.ts +6 -0
  51. package/src/AudioAnalysis/extractMelSpectrogram.ts +144 -0
  52. package/src/AudioAnalysis/extractPreview.ts +34 -0
  53. package/src/ExpoAudioStream.types.ts +354 -42
  54. package/src/ExpoAudioStreamModule.ts +682 -1
  55. package/src/WebRecorder.web.ts +2 -2
  56. package/src/index.ts +7 -8
  57. package/src/trimAudio.ts +90 -0
package/CHANGELOG.md CHANGED
@@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8
8
  ## [Unreleased]
9
9
 
10
10
 
11
+ ## [2.1.0] - 2025-03-04
12
+ ### Changed
13
+ - feat(docs): enhance audio processing documentation and examples (#158) ([26afd49](https://github.com/deeeed/expo-audio-stream/commit/26afd4938e1c626294f40b50a84fe15f5c2bb6a1))
14
+ - feat: Add Mel Spectrogram Extraction and Language Detection to Audio Processing (#157) ([4129dee](https://github.com/deeeed/expo-audio-stream/commit/4129dee87c27dd5a9911c85e3dbf045507876cc1))
15
+ - feat: enhance audio import functionality and decibel visualization (#156) ([2dbecc7](https://github.com/deeeed/expo-audio-stream/commit/2dbecc7bd0ea46edd80c2b0e28dd2a0525953362))
16
+ - feat(trim): Implement iOS trim support with custom filename and format improvements (#152) ([dd49be4](https://github.com/deeeed/expo-audio-stream/commit/dd49be42bccbf3ae6cced8c3662237e1668ec2de))
17
+ - feat: Add Sample Rate Control and Web Trimming Support to Expo Audio Stream (#151) ([9158eec](https://github.com/deeeed/expo-audio-stream/commit/9158eeccc10e25ac77ba3a99185b4dbc5abfb353))
18
+ - feat: Enhance audio trimming with optimized processing and detailed feedback (#150) ([41a6945](https://github.com/deeeed/expo-audio-stream/commit/41a694528d1e803dc0012948eec4edfdc336b4fc))
19
+ - feat(trim): add audio trimming functionality with visualization and preview (Android only) (#149) ([cba03dc](https://github.com/deeeed/expo-audio-stream/commit/cba03dc920eb8a1f111b45e8404a42e48076b7cd))
20
+ - chore(expo-audio-stream): release @siteed/expo-audio-stream@2.0.1 ([c77cfc8](https://github.com/deeeed/expo-audio-stream/commit/c77cfc8b70f87a12bb19fa03b245cda7ed2496e1))
21
+ ## [2.0.1] - 2025-02-27
22
+ ### Changed
23
+ - refactor: update background mode handling for audio stream plugin ([e7e98cc](https://github.com/deeeed/expo-audio-stream/commit/e7e98cc60b7965770dcf25e9ae74cb356e1e7097))
24
+ - chore(expo-audio-stream): release @siteed/expo-audio-stream@2.0.0 ([356d3f4](https://github.com/deeeed/expo-audio-stream/commit/356d3f40ffb66806eeecb86d12bcbe5d60b7eea6))
11
25
  ## [2.0.0] - 2025-02-27
12
26
  ### Changed
13
27
  - feat(playground): Enhance Audio Playground with Improved UX and Sample Audio Loading (#148) ([09d2794](https://github.com/deeeed/expo-audio-stream/commit/09d27940dcffa60e662c828742f4577bca5327f9))
@@ -154,7 +168,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
154
168
  - Feature: Audio features extraction during recording.
155
169
  - Feature: Consistent WAV PCM recording format across all platforms.
156
170
 
157
- [unreleased]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.0.0...HEAD
171
+ [unreleased]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.1.0...HEAD
172
+ [2.1.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.0.1...@siteed/expo-audio-stream@2.1.0
173
+ [2.0.1]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.0.0...@siteed/expo-audio-stream@2.0.1
158
174
  [2.0.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.17.0...@siteed/expo-audio-stream@2.0.0
159
175
  [1.17.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.16.0...@siteed/expo-audio-stream@1.17.0
160
176
  [1.16.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.15.1...@siteed/expo-audio-stream@1.16.0
package/README.md CHANGED
@@ -48,16 +48,217 @@
48
48
  - Background audio recording on iOS.
49
49
  - Audio features extraction during recording.
50
50
  - Consistent WAV PCM recording format across all platforms.
51
- - Keep recording acrtive while app is in background
51
+ - Keep recording active while app is in background
52
52
  - Rich notification system for recording status:
53
53
  - Android: Live waveform visualization in notifications
54
54
  - Android: Fully customizable notification appearance and actions
55
55
  - iOS: Media player integration
56
+ - Advanced audio analysis capabilities:
57
+ - Mel spectrogram generation for machine learning and visualization
58
+ - Comprehensive audio feature extraction (MFCC, spectral features, etc.)
59
+ - Lightweight waveform preview generation
60
+ - Precision audio manipulation:
61
+ - Advanced audio splitting and trimming API
62
+ - Support for trimming multiple segments in a single operation
63
+ - Ability to keep or remove specific time ranges
64
+ - Complete ecosystem:
65
+ - Full-featured AudioPlayground application showcasing advanced API usage
66
+ - Ready-to-use UI components via [@siteed/expo-audio-ui](https://github.com/deeeed/expo-audio-stream/tree/main/packages/expo-audio-ui) package
67
+ - Visualizations, waveforms, and audio controls that can be directly incorporated into your app
68
+
69
+ ## Audio Analysis Features
70
+
71
+ Extract powerful audio features for advanced audio processing and visualization:
72
+
73
+ ```typescript
74
+ // Extract audio analysis with specific features enabled
75
+ const analysis = await extractAudioAnalysis({
76
+ fileUri: 'path/to/recording.wav',
77
+ features: {
78
+ energy: true, // Overall energy of the audio
79
+ rms: true, // Root mean square (amplitude)
80
+ zcr: true, // Zero-crossing rate
81
+ mfcc: true, // Mel-frequency cepstral coefficients
82
+ spectralCentroid: true, // Brightness of sound
83
+ tempo: true, // Estimated BPM
84
+ }
85
+ });
86
+ ```
87
+
88
+ ### Available Audio Features
89
+
90
+ - **Basic Analysis**: RMS, energy, amplitude range, zero-crossing rate
91
+ - **Spectral Features**: Spectral centroid, flatness, rolloff, bandwidth
92
+ - **Advanced Analysis**:
93
+ - MFCC (Mel-frequency cepstral coefficients)
94
+ - Chromagram (pitch class representation)
95
+ - Mel Spectrogram
96
+ - Harmonics-to-noise ratio
97
+ - Tempo estimation
98
+ - Pitch detection
99
+
100
+ ### Use Cases
101
+
102
+ - Visualize audio waveforms with detailed metrics
103
+ - Implement speech recognition preprocessing
104
+ - Create music analysis applications
105
+ - Build audio fingerprinting systems
106
+ - Develop voice activity detection
107
+
108
+ ## API Overview
109
+
110
+ The library provides several specialized APIs for different audio processing needs:
111
+
112
+ ### Recording and Playback
113
+
114
+ - **useAudioRecorder**: Hook for recording audio with configurable quality settings
115
+ - **AudioRecorderProvider**: Context provider for sharing recording state across components
116
+ - **useSharedAudioRecorder**: Hook to access shared recording state from any component
117
+
118
+ ```typescript
119
+ // Start a new recording with configuration
120
+ const { startRecording, stopRecording, isRecording, recordingUri } = useAudioRecorder({
121
+ audioQuality: 'high',
122
+ sampleRate: 44100,
123
+ numberOfChannels: 2,
124
+ bitDepth: 16,
125
+ outputFormat: 'wav',
126
+ });
127
+
128
+ // Share recording state across components
129
+ const AudioApp = () => (
130
+ <AudioRecorderProvider>
131
+ <RecordButton />
132
+ <AudioVisualizer />
133
+ </AudioRecorderProvider>
134
+ );
135
+ ```
136
+
137
+ ### Audio Analysis
138
+
139
+ - **extractAudioAnalysis**: Extract comprehensive audio features for detailed analysis
140
+ - **extractPreview**: Generate lightweight waveform data for visualization
141
+ - **extractAudioData**: Extract raw PCM data for custom processing
142
+ - **extractRawWavAnalysis**: Analyze WAV files without decoding, preserving original PCM values
143
+
144
+ ```typescript
145
+ // Extract detailed audio analysis with feature extraction
146
+ const analysis = await extractAudioAnalysis({
147
+ fileUri: 'path/to/recording.wav',
148
+ features: { rms: true, zcr: true, mfcc: true }
149
+ });
150
+
151
+ // Generate a lightweight waveform preview
152
+ const preview = await extractPreview({
153
+ fileUri: 'path/to/recording.wav',
154
+ pointsPerSecond: 50
155
+ });
156
+
157
+ // Extract raw PCM data for custom processing
158
+ const audioData = await extractAudioData({
159
+ fileUri: 'path/to/recording.wav',
160
+ includeWavHeader: true
161
+ });
162
+ ```
163
+
164
+ #### Choosing the Right Audio Analysis Method
165
+
166
+ | Method | Purpose | Performance | Use When |
167
+ |--------|---------|-------------|----------|
168
+ | `extractAudioAnalysis` | Comprehensive audio feature extraction | Medium-Heavy | You need detailed audio features like MFCC, spectral features |
169
+ | `extractPreview` | Lightweight waveform visualization | Very Light | You only need amplitude data for visualization |
170
+ | `extractAudioData` | Raw PCM data extraction | Medium | You need the raw audio data for custom processing |
171
+ | `extractRawWavAnalysis` | WAV analysis without decoding | Light | You want to analyze WAV files while preserving original values |
172
+ | `extractMelSpectrogram` | Mel spectrogram generation | Heavy | You need frequency-domain representation for ML or visualization |
173
+
174
+ ### Specialized Audio Processing
175
+
176
+ - **extractMelSpectrogram**: Generate mel spectrogram for audio visualization or ML models
177
+ - **trimAudio**: Trim audio files with precision, supporting multiple segments and formats
178
+
179
+ ```typescript
180
+ // Generate mel spectrogram for audio visualization or ML models
181
+ const melSpectrogram = await extractMelSpectrogram({
182
+ fileUri: 'path/to/recording.wav',
183
+ windowSizeMs: 25,
184
+ hopLengthMs: 10,
185
+ nMels: 40
186
+ });
187
+
188
+ // Trim audio files with precision
189
+ const trimmedAudio = await trimAudio({
190
+ fileUri: 'path/to/recording.wav',
191
+ startTimeMs: 1000,
192
+ endTimeMs: 5000,
193
+ outputFormat: { format: 'wav' }
194
+ });
195
+
196
+ // Trim multiple segments from an audio file
197
+ const compiledAudio = await trimAudio({
198
+ fileUri: 'path/to/recording.wav',
199
+ mode: 'keep',
200
+ ranges: [
201
+ { startTimeMs: 1000, endTimeMs: 5000 },
202
+ { startTimeMs: 10000, endTimeMs: 15000 }
203
+ ]
204
+ });
205
+ ```
206
+
207
+ ### Utility Functions
208
+
209
+ - **convertPCMToFloat32**: Convert PCM data to Float32Array for processing
210
+ - **getWavFileInfo**: Extract metadata from WAV files
211
+ - **writeWavHeader**: Create WAV headers for raw PCM data
212
+
213
+ ### Low-Level Access
214
+
215
+ For advanced use cases, the library provides direct access to the native module:
216
+
217
+ ```typescript
218
+ import { ExpoAudioStreamModule } from '@siteed/expo-audio-stream';
219
+
220
+ // Access platform-specific functionality
221
+ const status = await ExpoAudioStreamModule.status();
222
+ const permissions = await ExpoAudioStreamModule.getPermissionsAsync();
223
+ ```
56
224
 
57
225
  ## Documentation
58
226
 
59
227
  For detailed documentation, please refer to the [Getting Started Guide](https://deeeed.github.io/expo-audio-stream/docs/).
60
228
 
229
+ ## Companion Resources
230
+
231
+ ### AudioPlayground Application
232
+
233
+ The repository includes a complete AudioPlayground application that demonstrates advanced usage of the API. This playground serves as both a demonstration and a learning resource:
234
+
235
+ - Interactive examples of all major API features
236
+ - Real-time audio visualization and analysis
237
+ - Code samples you can directly reference for your own implementation
238
+
239
+ Try it online at [https://deeeed.github.io/expo-audio-stream/playground](https://deeeed.github.io/expo-audio-stream/playground) or run it locally from the repository.
240
+
241
+ ### UI Components Package
242
+
243
+ The [@siteed/expo-audio-ui](https://github.com/deeeed/expo-audio-stream/tree/main/packages/expo-audio-ui) package provides ready-to-use UI components for audio applications:
244
+
245
+ ```bash
246
+ # Install the UI components package
247
+ npm install @siteed/expo-audio-ui
248
+
249
+ # or with yarn
250
+ yarn add @siteed/expo-audio-ui
251
+ ```
252
+
253
+ This package includes:
254
+ - Waveform visualizers
255
+ - Audio recording controls
256
+ - Playback components
257
+ - Spectrogram displays
258
+ - And more!
259
+
260
+ All components are built with React Native, Reanimated, and Skia for optimal performance across platforms.
261
+
61
262
  ## License
62
263
 
63
264
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -1,4 +1,5 @@
1
1
  // packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt
2
+ // packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt
2
3
  package net.siteed.audiostream
3
4
 
4
5
  import java.nio.ByteBuffer
@@ -22,6 +23,12 @@ data class DecodingConfig(
22
23
  val normalizeAudio: Boolean = false // Whether to normalize audio levels
23
24
  )
24
25
 
26
+ data class SpectrogramData(
27
+ val spectrogram: Array<FloatArray>, // 2D array: [time, frequency]
28
+ val timeStamps: FloatArray, // Time (in seconds) for each frame
29
+ val frequencies: FloatArray // Frequencies (in Hz) for each mel bin
30
+ )
31
+
25
32
  class AudioProcessor(private val filesDir: File) {
26
33
  companion object {
27
34
  const val DCT_SQRT_DIVISOR = 2.0
@@ -998,7 +1005,7 @@ class AudioProcessor(private val filesDir: File) {
998
1005
  return a + fraction * (b - a)
999
1006
  }
1000
1007
 
1001
- private fun processAudio(
1008
+ fun processAudio(
1002
1009
  pcmData: ByteArray,
1003
1010
  originalSampleRate: Int,
1004
1011
  targetSampleRate: Int?,
@@ -1625,6 +1632,187 @@ class AudioProcessor(private val filesDir: File) {
1625
1632
  return output
1626
1633
  }
1627
1634
 
1635
+ // Generate a Hann window of a specific size (new, avoids modifying applyHannWindow)
1636
+ private fun generateHannWindow(size: Int): FloatArray {
1637
+ return FloatArray(size) { i ->
1638
+ 0.5f * (1f - cos(2f * PI.toFloat() * i / (size - 1)))
1639
+ }
1640
+ }
1641
+
1642
+ // Main function to extract mel spectrogram
1643
+ fun extractMelSpectrogram(
1644
+ audioData: AudioData,
1645
+ windowSizeMs: Float = 25f, // Default 25ms window
1646
+ hopLengthMs: Float = 10f, // Default 10ms hop
1647
+ nMels: Int = 128, // Number of mel bins
1648
+ fftLength: Int = 2048, // FFT size
1649
+ fMin: Float = 0f, // Minimum frequency
1650
+ fMax: Float = audioData.sampleRate.toFloat() / 2, // Nyquist frequency
1651
+ windowType: String = "hann", // Add parameter
1652
+ logScaling: Boolean = true, // Apply log scaling
1653
+ normalize: Boolean = false // Normalize output
1654
+ ): SpectrogramData {
1655
+ val sampleRate = audioData.sampleRate.toFloat()
1656
+ val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
1657
+
1658
+ // Convert ms to samples
1659
+ val windowSizeSamples = (windowSizeMs * sampleRate / 1000).toInt()
1660
+ val hopLengthSamples = (hopLengthMs * sampleRate / 1000).toInt()
1661
+
1662
+
1663
+ val window = when (windowType.lowercase()) {
1664
+ "hann" -> generateHannWindow(windowSizeSamples)
1665
+ "hamming" -> FloatArray(windowSizeSamples) { i ->
1666
+ 0.54f - 0.46f * cos(2f * PI.toFloat() * i / (windowSizeSamples - 1))
1667
+ }
1668
+ else -> throw IllegalArgumentException("Unsupported windowType: $windowType")
1669
+ }
1670
+
1671
+ // Compute STFT
1672
+ val stft = computeSTFT(samples, fftLength, windowSizeSamples, hopLengthSamples, window)
1673
+
1674
+ // Apply mel filterbank
1675
+ val melSpectrogram = applyMelFilterbank(stft, sampleRate, nMels, fftLength, fMin, fMax)
1676
+
1677
+ // Post-processing: log scaling and normalization
1678
+ if (logScaling) {
1679
+ for (i in melSpectrogram.indices) {
1680
+ for (j in melSpectrogram[i].indices) {
1681
+ melSpectrogram[i][j] = ln(max(1e-10f, melSpectrogram[i][j])).toFloat()
1682
+ }
1683
+ }
1684
+ }
1685
+ if (normalize) {
1686
+ // Find min and max values across the entire spectrogram
1687
+ var minVal = Float.MAX_VALUE
1688
+ var maxVal = Float.MIN_VALUE
1689
+
1690
+ for (i in melSpectrogram.indices) {
1691
+ for (j in melSpectrogram[i].indices) {
1692
+ val value = melSpectrogram[i][j]
1693
+ if (value < minVal) minVal = value
1694
+ if (value > maxVal) maxVal = value
1695
+ }
1696
+ }
1697
+
1698
+ val range = maxVal - minVal
1699
+ if (range > 0) {
1700
+ for (i in melSpectrogram.indices) {
1701
+ for (j in melSpectrogram[i].indices) {
1702
+ melSpectrogram[i][j] = (melSpectrogram[i][j] - minVal) / range
1703
+ }
1704
+ }
1705
+ }
1706
+ }
1707
+
1708
+ // Compute timestamps and frequencies for metadata
1709
+ val numFrames = melSpectrogram.size
1710
+ val timeStamps = FloatArray(numFrames) { it * hopLengthMs / 1000f }
1711
+ val frequencies = melFrequencies(nMels, fMin, fMax)
1712
+
1713
+ return SpectrogramData(melSpectrogram, timeStamps, frequencies)
1714
+ }
1715
+
1716
+ // Compute Short-Time Fourier Transform
1717
+ private fun computeSTFT(
1718
+ samples: FloatArray,
1719
+ fftLength: Int,
1720
+ windowSize: Int,
1721
+ hopLength: Int,
1722
+ window: FloatArray
1723
+ ): Array<FloatArray> {
1724
+ val fft = FFT(fftLength)
1725
+ val numFrames = ((samples.size - windowSize) / hopLength) + 1
1726
+ val stft = Array(numFrames) { FloatArray(fftLength / 2 + 1) }
1727
+
1728
+ for (frameIdx in 0 until numFrames) {
1729
+ val start = frameIdx * hopLength
1730
+ val end = minOf(start + windowSize, samples.size)
1731
+ val frame = FloatArray(fftLength) { 0f }
1732
+
1733
+ // Extract and window the frame
1734
+ for (i in start until end) {
1735
+ frame[i - start] = samples[i] * window[i - start]
1736
+ }
1737
+
1738
+ // Compute FFT and power spectrum
1739
+ val fftResult = fft.processSegment(frame)
1740
+ for (i in 0 until fftLength / 2 + 1) {
1741
+ // Check bounds before accessing array elements
1742
+ val real = if (2 * i < fftResult.size) fftResult[2 * i] else 0f
1743
+ val imag = if (2 * i + 1 < fftResult.size) fftResult[2 * i + 1] else 0f
1744
+ stft[frameIdx][i] = real * real + imag * imag
1745
+ }
1746
+ }
1747
+ return stft
1748
+ }
1749
+
1750
+ // Apply mel filterbank to STFT
1751
+ private fun applyMelFilterbank(
1752
+ stft: Array<FloatArray>,
1753
+ sampleRate: Float,
1754
+ nMels: Int,
1755
+ fftLength: Int,
1756
+ fMin: Float,
1757
+ fMax: Float
1758
+ ): Array<FloatArray> {
1759
+ val numFrames = stft.size
1760
+ val numBins = stft[0].size
1761
+ val melFilters = createMelFilterbank(sampleRate, fftLength, nMels, fMin, fMax)
1762
+ val melSpectrogram = Array(numFrames) { FloatArray(nMels) }
1763
+
1764
+ for (frame in 0 until numFrames) {
1765
+ for (melBin in 0 until nMels) {
1766
+ var sum = 0f
1767
+ for (bin in 0 until numBins) {
1768
+ sum += stft[frame][bin] * melFilters[melBin][bin]
1769
+ }
1770
+ melSpectrogram[frame][melBin] = sum
1771
+ }
1772
+ }
1773
+ return melSpectrogram
1774
+ }
1775
+
1776
+ // Create mel filterbank matrix
1777
+ private fun createMelFilterbank(
1778
+ sampleRate: Float,
1779
+ fftLength: Int,
1780
+ nMels: Int,
1781
+ fMin: Float,
1782
+ fMax: Float
1783
+ ): Array<FloatArray> {
1784
+ val freqs = FloatArray(fftLength / 2 + 1) { it * sampleRate / fftLength }
1785
+ val melPoints = melFrequencies(nMels + 2, fMin, fMax)
1786
+ val melFilters = Array(nMels) { FloatArray(fftLength / 2 + 1) }
1787
+
1788
+ for (melIdx in 0 until nMels) {
1789
+ val fLow = melPoints[melIdx]
1790
+ val fCenter = melPoints[melIdx + 1]
1791
+ val fHigh = melPoints[melIdx + 2]
1792
+
1793
+ for (bin in freqs.indices) {
1794
+ val freq = freqs[bin]
1795
+ melFilters[melIdx][bin] = when {
1796
+ freq < fLow || freq > fHigh -> 0f
1797
+ freq <= fCenter -> (freq - fLow) / (fCenter - fLow)
1798
+ else -> (fHigh - freq) / (fHigh - fCenter)
1799
+ }
1800
+ }
1801
+ }
1802
+ return melFilters
1803
+ }
1804
+
1805
+ // Generate mel-spaced frequencies
1806
+ private fun melFrequencies(nMels: Int, fMin: Float, fMax: Float): FloatArray {
1807
+ val melMin = hzToMel(fMin)
1808
+ val melMax = hzToMel(fMax)
1809
+ val melPoints = FloatArray(nMels) { i ->
1810
+ val mel = melMin + i * (melMax - melMin) / (nMels - 1)
1811
+ melToHz(mel)
1812
+ }
1813
+ return melPoints
1814
+ }
1815
+
1628
1816
  private fun computeMelSpectrogram(samples: FloatArray, sampleRate: Float): List<Float> {
1629
1817
  val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
1630
1818
  val melFilters = computeMelFilterbank(
@@ -1933,4 +2121,115 @@ class AudioProcessor(private val filesDir: File) {
1933
2121
  return null
1934
2122
  }
1935
2123
  }
2124
+
2125
+ /**
2126
+ * Decodes a specific time range of an audio file directly to PCM data
2127
+ * This is more efficient than decoding the entire file when only a portion is needed
2128
+ */
2129
+ fun decodeAudioRangeToPCM(fileUri: String, startTimeMs: Long, endTimeMs: Long): AudioData? {
2130
+ val extractor = MediaExtractor()
2131
+ var decoder: android.media.MediaCodec? = null
2132
+
2133
+ try {
2134
+ extractor.setDataSource(fileUri)
2135
+ val trackIndex = (0 until extractor.trackCount).find {
2136
+ extractor.getTrackFormat(it).getString(MediaFormat.KEY_MIME)?.startsWith("audio/") == true
2137
+ } ?: return null
2138
+
2139
+ extractor.selectTrack(trackIndex)
2140
+ val format = extractor.getTrackFormat(trackIndex)
2141
+
2142
+ val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
2143
+ val channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
2144
+ decoder = android.media.MediaCodec.createDecoderByType(format.getString(MediaFormat.KEY_MIME)!!)
2145
+ decoder.configure(format, null, null, 0)
2146
+ decoder.start()
2147
+
2148
+ extractor.seekTo(startTimeMs * 1000, MediaExtractor.SEEK_TO_PREVIOUS_SYNC)
2149
+ val pcmData = mutableListOf<Byte>()
2150
+ val bufferInfo = android.media.MediaCodec.BufferInfo()
2151
+ var isEOS = false
2152
+ var firstBufferTimeUs: Long? = null
2153
+
2154
+ while (!isEOS) {
2155
+ val inputBufferId = decoder.dequeueInputBuffer(10000)
2156
+ if (inputBufferId >= 0) {
2157
+ val inputBuffer = decoder.getInputBuffer(inputBufferId)!!
2158
+ val sampleSize = extractor.readSampleData(inputBuffer, 0)
2159
+ if (sampleSize < 0 || extractor.sampleTime > endTimeMs * 1000) {
2160
+ decoder.queueInputBuffer(inputBufferId, 0, 0, 0, android.media.MediaCodec.BUFFER_FLAG_END_OF_STREAM)
2161
+ isEOS = true
2162
+ } else {
2163
+ decoder.queueInputBuffer(inputBufferId, 0, sampleSize, extractor.sampleTime, 0)
2164
+ extractor.advance()
2165
+ }
2166
+ }
2167
+
2168
+ val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
2169
+ if (outputBufferId >= 0) {
2170
+ val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
2171
+ if (firstBufferTimeUs == null) firstBufferTimeUs = bufferInfo.presentationTimeUs
2172
+ val chunk = ByteArray(bufferInfo.size)
2173
+ outputBuffer.get(chunk)
2174
+ pcmData.addAll(chunk.toList())
2175
+ decoder.releaseOutputBuffer(outputBufferId, false)
2176
+ }
2177
+ }
2178
+
2179
+ // If we didn't get any data or first buffer time, return null
2180
+ if (pcmData.isEmpty() || firstBufferTimeUs == null) {
2181
+ return null
2182
+ }
2183
+
2184
+ // Trim PCM data to exact time range
2185
+ val bytesPerSample = 2 // 16-bit PCM
2186
+ val bytesPerFrame = bytesPerSample * channels
2187
+ val samplesPerSecond = sampleRate * channels
2188
+ val dt = 1_000_000.0 / sampleRate // Time per sample in microseconds
2189
+
2190
+ val allSamples = java.nio.ByteBuffer.wrap(pcmData.toByteArray()).order(java.nio.ByteOrder.LITTLE_ENDIAN).asShortBuffer()
2191
+ val totalSamples = allSamples.capacity()
2192
+
2193
+ // Calculate sample indices for the exact time range
2194
+ val startSample = ((startTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(0, totalSamples)
2195
+ val endSample = ((endTimeMs * 1000 - firstBufferTimeUs) / dt).toInt().coerceIn(startSample, totalSamples)
2196
+
2197
+ // Create a new ShortBuffer view starting at the correct position
2198
+ allSamples.position(startSample)
2199
+ val trimmedSamples = ShortArray(endSample - startSample)
2200
+ for (i in trimmedSamples.indices) {
2201
+ trimmedSamples[i] = allSamples.get()
2202
+ }
2203
+
2204
+ // Convert ShortArray to ByteArray
2205
+ val trimmedBytes = ByteArray(trimmedSamples.size * 2)
2206
+ val byteBuffer = java.nio.ByteBuffer.wrap(trimmedBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN)
2207
+ val shortBuffer = byteBuffer.asShortBuffer()
2208
+ shortBuffer.put(trimmedSamples)
2209
+
2210
+ return AudioData(
2211
+ data = trimmedBytes,
2212
+ sampleRate = sampleRate,
2213
+ channels = channels,
2214
+ bitDepth = 16, // MediaCodec typically decodes to 16-bit PCM
2215
+ durationMs = endTimeMs - startTimeMs
2216
+ )
2217
+ } catch (e: Exception) {
2218
+ Log.e(Constants.TAG, "Failed to decode audio range: ${e.message}", e)
2219
+ return null
2220
+ } finally {
2221
+ try {
2222
+ decoder?.stop()
2223
+ decoder?.release()
2224
+ } catch (e: Exception) {
2225
+ Log.w(Constants.TAG, "Error releasing decoder: ${e.message}")
2226
+ }
2227
+
2228
+ try {
2229
+ extractor.release()
2230
+ } catch (e: Exception) {
2231
+ Log.w(Constants.TAG, "Error releasing extractor: ${e.message}")
2232
+ }
2233
+ }
2234
+ }
1936
2235
  }
@@ -11,8 +11,6 @@ import android.os.Looper
11
11
  import expo.modules.kotlin.Promise
12
12
  import android.app.NotificationChannel
13
13
  import android.app.NotificationManager
14
- import android.os.Build.VERSION_CODES
15
- import android.app.Notification
16
14
  import androidx.core.app.NotificationCompat
17
15
 
18
16
  class AudioRecordingService : Service() {
@@ -27,6 +25,8 @@ class AudioRecordingService : Service() {
27
25
  override fun onCreate() {
28
26
  super.onCreate()
29
27
  Log.d(Constants.TAG, "AudioRecordingService onCreate")
28
+ isRunning = true
29
+ setServiceRunning(true)
30
30
  }
31
31
 
32
32
  override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
@@ -84,6 +84,7 @@ class AudioRecordingService : Service() {
84
84
  stopForeground(STOP_FOREGROUND_REMOVE)
85
85
 
86
86
  isRunning = false
87
+ setServiceRunning(false)
87
88
  super.onDestroy()
88
89
  }
89
90
 
@@ -122,6 +123,17 @@ class AudioRecordingService : Service() {
122
123
  }
123
124
 
124
125
  companion object {
126
+ // Static flag to track if service is running
127
+ private var isServiceRunningStatic = false
128
+
129
+ fun isServiceRunning(): Boolean {
130
+ return isServiceRunningStatic
131
+ }
132
+
133
+ fun setServiceRunning(running: Boolean) {
134
+ isServiceRunningStatic = running
135
+ }
136
+
125
137
  fun startService(context: Context) {
126
138
  val serviceIntent = Intent(context, AudioRecordingService::class.java)
127
139
  if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
@@ -129,10 +141,12 @@ class AudioRecordingService : Service() {
129
141
  } else {
130
142
  context.startService(serviceIntent)
131
143
  }
144
+ setServiceRunning(true)
132
145
  }
133
146
 
134
147
  fun stopService(context: Context) {
135
148
  context.stopService(Intent(context, AudioRecordingService::class.java))
149
+ setServiceRunning(false)
136
150
  }
137
151
  }
138
152
  }