@siteed/audio-studio 3.0.5 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/README.md +108 -41
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioFinalMetadataContractInstrumentedTest.kt +190 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +29 -83
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +17 -1
- package/android/src/androidTest/java/net/siteed/audiostudio/OpusRangeDecodeRegressionInstrumentedTest.kt +186 -0
- package/android/src/main/java/net/siteed/audiostudio/AudioProcessor.kt +473 -380
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +74 -22
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +86 -19
- package/android/src/main/java/net/siteed/audiostudio/AudioTrimmer.kt +174 -212
- package/android/src/main/java/net/siteed/audiostudio/EventSender.kt +6 -0
- package/android/src/test/java/net/siteed/audiostudio/AndroidCallStateTest.kt +37 -0
- package/android/src/test/java/net/siteed/audiostudio/AndroidEventEmitterTest.kt +28 -0
- package/android/src/test/java/net/siteed/audiostudio/InterruptionAutoResumePolicyTest.kt +49 -0
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractPreview.js +92 -15
- package/build/cjs/AudioAnalysis/extractPreview.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractPreviewBars.js +134 -0
- package/build/cjs/AudioAnalysis/extractPreviewBars.js.map +1 -0
- package/build/cjs/AudioStudio.types.js.map +1 -1
- package/build/cjs/errors/AudioExtractionError.js +127 -0
- package/build/cjs/errors/AudioExtractionError.js.map +1 -0
- package/build/cjs/index.js +6 -1
- package/build/cjs/index.js.map +1 -1
- package/build/cjs/useAudioRecorder.js +36 -18
- package/build/cjs/useAudioRecorder.js.map +1 -1
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/esm/AudioAnalysis/extractPreview.js +92 -15
- package/build/esm/AudioAnalysis/extractPreview.js.map +1 -1
- package/build/esm/AudioAnalysis/extractPreviewBars.js +128 -0
- package/build/esm/AudioAnalysis/extractPreviewBars.js.map +1 -0
- package/build/esm/AudioStudio.types.js.map +1 -1
- package/build/esm/errors/AudioExtractionError.js +122 -0
- package/build/esm/errors/AudioExtractionError.js.map +1 -0
- package/build/esm/index.js +2 -0
- package/build/esm/index.js.map +1 -1
- package/build/esm/useAudioRecorder.js +36 -18
- package/build/esm/useAudioRecorder.js.map +1 -1
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +79 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractPreview.d.ts +2 -2
- package/build/types/AudioAnalysis/extractPreview.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractPreviewBars.d.ts +12 -0
- package/build/types/AudioAnalysis/extractPreviewBars.d.ts.map +1 -0
- package/build/types/AudioStudio.types.d.ts +14 -1
- package/build/types/AudioStudio.types.d.ts.map +1 -1
- package/build/types/errors/AudioExtractionError.d.ts +24 -0
- package/build/types/errors/AudioExtractionError.d.ts.map +1 -0
- package/build/types/index.d.ts +3 -0
- package/build/types/index.d.ts.map +1 -1
- package/build/types/useAudioRecorder.d.ts.map +1 -1
- package/ios/AudioProcessor.swift +99 -0
- package/ios/AudioStreamManager.swift +79 -15
- package/ios/AudioStudioModule.swift +63 -0
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +41 -1
- package/package.json +7 -7
- package/src/AudioAnalysis/AudioAnalysis.types.ts +82 -0
- package/src/AudioAnalysis/extractPreview.ts +118 -17
- package/src/AudioAnalysis/extractPreviewBars.ts +193 -0
- package/src/AudioStudio.types.ts +15 -1
- package/src/errors/AudioExtractionError.ts +167 -0
- package/src/index.ts +10 -0
- package/src/useAudioRecorder.tsx +36 -14
package/CHANGELOG.md
CHANGED
|
@@ -7,7 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [3.1.1] - 2026-05-08
|
|
11
|
+
### Fixed
|
|
12
|
+
- Trust Android final decoded PCM metadata for range extraction and trimming, including JS `Number` options bridged as Kotlin `Double`.
|
|
13
|
+
- Keep default Android validation deterministic by replacing physical loopback assertions and gating long recorder benchmarks behind an opt-in flag.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- Document the playground CDP regression-contract workflow and make optional VAD model loading cache-first.
|
|
10
17
|
|
|
18
|
+
|
|
19
|
+
## [3.1.0] - 2026-05-01
|
|
20
|
+
### Changed
|
|
21
|
+
- fix(audio-studio): preserve pause intent across interruptions (#375) ([2f0f731](https://github.com/deeeed/audiolab/commit/2f0f731e412f45fc81c4fe46bec2abd3f15b4824))
|
|
22
|
+
- fix(android): avoid recorder crashes from stale system callbacks (#374) ([34a9bc0](https://github.com/deeeed/audiolab/commit/34a9bc0c2f7c4e3e569862e8db43710abbde9043))
|
|
23
|
+
- Document when to avoid retaining live analysis history (#373) ([aa617b0](https://github.com/deeeed/audiolab/commit/aa617b048dd790218dda43ec2ed21e0abaf38daf))
|
|
24
|
+
- Let long-running analysis skip full history retention (#372) ([13c230c](https://github.com/deeeed/audiolab/commit/13c230cde655131a3a7c9472d294b2d432f79d50))
|
|
25
|
+
- Keep low-rate iOS AAC recordings from losing compressed output (#371) ([a689eb0](https://github.com/deeeed/audiolab/commit/a689eb03c7436429bd3bf997430f7f7c842b2e57))
|
|
26
|
+
- chore(audio-studio): release @siteed/audio-studio@3.0.5 ([9dff021](https://github.com/deeeed/audiolab/commit/9dff0219993803d29a03946cf81fe2bedb541cab))
|
|
11
27
|
## [3.0.5] - 2026-04-25
|
|
12
28
|
### Changed
|
|
13
29
|
- fix(audio-studio): don't start notification on prepareRecording (Android) (#364) ([5d40d7e](https://github.com/deeeed/audiolab/commit/5d40d7e730f2b74459d319979fd9c112891b10f2))
|
|
@@ -349,7 +365,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
349
365
|
- Audio features extraction during recording
|
|
350
366
|
- Consistent WAV PCM recording format across all platforms
|
|
351
367
|
|
|
352
|
-
[unreleased]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.
|
|
368
|
+
[unreleased]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.1.1...HEAD
|
|
369
|
+
[3.1.1]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.1.0...@siteed/audio-studio@3.1.1
|
|
370
|
+
[3.1.0]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.0.5...@siteed/audio-studio@3.1.0
|
|
353
371
|
[3.0.5]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.0.4...@siteed/audio-studio@3.0.5
|
|
354
372
|
[3.0.4]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.0.3...@siteed/audio-studio@3.0.4
|
|
355
373
|
[3.0.3]: https://github.com/deeeed/audiolab/compare/@siteed/audio-studio@3.0.2...@siteed/audio-studio@3.0.3
|
package/README.md
CHANGED
|
@@ -54,20 +54,20 @@ yarn add @siteed/audio-studio
|
|
|
54
54
|
## Quick Start
|
|
55
55
|
|
|
56
56
|
```typescript
|
|
57
|
-
import { useAudioRecorder } from '@siteed/audio-studio'
|
|
57
|
+
import { useAudioRecorder } from '@siteed/audio-studio'
|
|
58
58
|
|
|
59
|
-
const { startRecording, stopRecording, isRecording } = useAudioRecorder()
|
|
59
|
+
const { startRecording, stopRecording, isRecording } = useAudioRecorder()
|
|
60
60
|
|
|
61
61
|
// Record
|
|
62
62
|
await startRecording({
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
})
|
|
63
|
+
sampleRate: 44100,
|
|
64
|
+
channels: 1,
|
|
65
|
+
encoding: 'pcm_16bit',
|
|
66
|
+
})
|
|
67
67
|
|
|
68
68
|
// ... later
|
|
69
|
-
const result = await stopRecording()
|
|
70
|
-
console.log('Saved to:', result.fileUri)
|
|
69
|
+
const result = await stopRecording()
|
|
70
|
+
console.log('Saved to:', result.fileUri)
|
|
71
71
|
```
|
|
72
72
|
|
|
73
73
|
### Zero-Latency Recording
|
|
@@ -75,12 +75,17 @@ console.log('Saved to:', result.fileUri);
|
|
|
75
75
|
Pre-initialize to eliminate startup delay:
|
|
76
76
|
|
|
77
77
|
```typescript
|
|
78
|
-
const { prepareRecording, startRecording, stopRecording } =
|
|
78
|
+
const { prepareRecording, startRecording, stopRecording } =
|
|
79
|
+
useSharedAudioRecorder()
|
|
79
80
|
|
|
80
|
-
await prepareRecording({
|
|
81
|
+
await prepareRecording({
|
|
82
|
+
sampleRate: 44100,
|
|
83
|
+
channels: 1,
|
|
84
|
+
encoding: 'pcm_16bit',
|
|
85
|
+
})
|
|
81
86
|
|
|
82
87
|
// Later — starts instantly
|
|
83
|
-
await startRecording()
|
|
88
|
+
await startRecording()
|
|
84
89
|
```
|
|
85
90
|
|
|
86
91
|
### Shared State Across Components
|
|
@@ -100,57 +105,83 @@ Set `streamFormat: 'float32'` to get `Float32Array` on all platforms instead of
|
|
|
100
105
|
|
|
101
106
|
```typescript
|
|
102
107
|
await startRecording({
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
})
|
|
108
|
+
sampleRate: 16000,
|
|
109
|
+
channels: 1,
|
|
110
|
+
encoding: 'pcm_32bit',
|
|
111
|
+
streamFormat: 'float32',
|
|
112
|
+
onAudioStream: async (event) => {
|
|
113
|
+
const samples = event.data as Float32Array
|
|
114
|
+
await myModel.feed(samples)
|
|
115
|
+
},
|
|
116
|
+
})
|
|
112
117
|
```
|
|
113
118
|
|
|
114
119
|
## Audio Analysis
|
|
115
120
|
|
|
121
|
+
For live analysis during recording, `useAudioRecorder` keeps a recent analysis
|
|
122
|
+
window in `analysisData` for visualization and, by default, also retains the
|
|
123
|
+
full analysis history so `stopRecording().analysisData` can describe the whole
|
|
124
|
+
recording. This option only matters when `enableProcessing: true`. For
|
|
125
|
+
long-running sessions that only need live callbacks, disable the full-history
|
|
126
|
+
retention to avoid unbounded JS memory growth:
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
await startRecording({
|
|
130
|
+
sampleRate: 16000,
|
|
131
|
+
channels: 1,
|
|
132
|
+
enableProcessing: true,
|
|
133
|
+
keepFullAnalysis: false,
|
|
134
|
+
onAudioAnalysis: async (analysis) => {
|
|
135
|
+
// Consume each analysis chunk without retaining the full recording history.
|
|
136
|
+
updateVoiceActivity(analysis.dataPoints)
|
|
137
|
+
},
|
|
138
|
+
})
|
|
139
|
+
```
|
|
140
|
+
|
|
116
141
|
```typescript
|
|
117
|
-
import {
|
|
142
|
+
import {
|
|
143
|
+
extractAudioAnalysis,
|
|
144
|
+
extractPreview,
|
|
145
|
+
extractMelSpectrogram,
|
|
146
|
+
trimAudio,
|
|
147
|
+
} from '@siteed/audio-studio'
|
|
118
148
|
|
|
119
149
|
// Feature extraction
|
|
120
150
|
const analysis = await extractAudioAnalysis({
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
})
|
|
151
|
+
fileUri: 'path/to/recording.wav',
|
|
152
|
+
features: { rms: true, zcr: true, mfcc: true, spectralCentroid: true },
|
|
153
|
+
})
|
|
124
154
|
|
|
125
155
|
// Lightweight waveform for visualization
|
|
126
156
|
const preview = await extractPreview({
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
})
|
|
157
|
+
fileUri: 'path/to/recording.wav',
|
|
158
|
+
pointsPerSecond: 50,
|
|
159
|
+
})
|
|
130
160
|
|
|
131
161
|
// Mel spectrogram for ML
|
|
132
162
|
const mel = await extractMelSpectrogram({
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
163
|
+
fileUri: 'path/to/recording.wav',
|
|
164
|
+
nMels: 40,
|
|
165
|
+
hopLengthMs: 10,
|
|
166
|
+
})
|
|
136
167
|
|
|
137
168
|
// Trim audio
|
|
138
169
|
const trimmed = await trimAudio({
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
})
|
|
170
|
+
fileUri: 'path/to/recording.wav',
|
|
171
|
+
ranges: [{ startTimeMs: 1000, endTimeMs: 5000 }],
|
|
172
|
+
mode: 'keep',
|
|
173
|
+
})
|
|
143
174
|
```
|
|
144
175
|
|
|
145
176
|
### Which Method to Use
|
|
146
177
|
|
|
147
|
-
| Method
|
|
148
|
-
|
|
149
|
-
| `extractPreview`
|
|
150
|
-
| `extractRawWavAnalysis` | Light
|
|
151
|
-
| `extractAudioData`
|
|
152
|
-
| `extractAudioAnalysis`
|
|
153
|
-
| `extractMelSpectrogram` | Heavy
|
|
178
|
+
| Method | Cost | Use case |
|
|
179
|
+
| ----------------------- | ------------ | ------------------------------------- |
|
|
180
|
+
| `extractPreview` | Light | Waveform visualization |
|
|
181
|
+
| `extractRawWavAnalysis` | Light | WAV metadata without decoding |
|
|
182
|
+
| `extractAudioData` | Medium | Raw PCM for custom processing |
|
|
183
|
+
| `extractAudioAnalysis` | Medium-Heavy | MFCC, spectral features, pitch, tempo |
|
|
184
|
+
| `extractMelSpectrogram` | Heavy | Frequency-domain for ML |
|
|
154
185
|
|
|
155
186
|
## Docs
|
|
156
187
|
|
|
@@ -164,4 +195,40 @@ const trimmed = await trimAudio({
|
|
|
164
195
|
MIT — see [LICENSE](LICENSE).
|
|
165
196
|
|
|
166
197
|
---
|
|
198
|
+
|
|
167
199
|
<sub>Created by [Arthur Breton](https://siteed.net)</sub>
|
|
200
|
+
|
|
201
|
+
### Compact waveform preview bars
|
|
202
|
+
|
|
203
|
+
For UI waveform previews, prefer `extractPreviewBars` over adapting a full
|
|
204
|
+
`AudioAnalysis` when detailed features are not needed:
|
|
205
|
+
|
|
206
|
+
```typescript
|
|
207
|
+
import { extractPreviewBars } from '@siteed/audio-studio'
|
|
208
|
+
|
|
209
|
+
const preview = await extractPreviewBars({
|
|
210
|
+
fileUri,
|
|
211
|
+
numberOfBars: 120,
|
|
212
|
+
startTimeMs: 0,
|
|
213
|
+
endTimeMs: 30_000,
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
console.log(preview.bars, preview.durationMs, preview.amplitudeRange)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
`extractPreviewBars` returns compact `PreviewBar[]` data plus duration, sample
|
|
220
|
+
rate, channel count, bit depth, amplitude/RMS ranges, and extraction timing. The
|
|
221
|
+
existing `extractPreview` API remains available for compatibility with callers
|
|
222
|
+
that expect `AudioAnalysis` / `DataPoint[]`.
|
|
223
|
+
|
|
224
|
+
Native Android and iOS expose an `extractPreviewBars` bridge for compact
|
|
225
|
+
bars-out results. JS also keeps a compatibility fallback through `extractPreview`
|
|
226
|
+
for older native runtimes that do not yet provide the compact bridge.
|
|
227
|
+
|
|
228
|
+
#### C++ scope note
|
|
229
|
+
|
|
230
|
+
Waveform preview bar extraction intentionally keeps file decode in platform
|
|
231
|
+
code. A future C++ `WaveformBarsProcessor` should be considered only as a pure
|
|
232
|
+
PCM-in/bars-out processor if Kotlin/Swift/Web implementations become a real
|
|
233
|
+
maintenance problem or if waveform bars are bundled into a broader shared
|
|
234
|
+
processor/VAD effort. This is not a formal benchmark claim.
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
package net.siteed.audiostudio
|
|
2
|
+
|
|
3
|
+
import android.content.Context
|
|
4
|
+
import android.net.Uri
|
|
5
|
+
import androidx.test.ext.junit.runners.AndroidJUnit4
|
|
6
|
+
import androidx.test.platform.app.InstrumentationRegistry
|
|
7
|
+
import org.junit.After
|
|
8
|
+
import org.junit.Assert.assertEquals
|
|
9
|
+
import org.junit.Assert.assertTrue
|
|
10
|
+
import org.junit.Before
|
|
11
|
+
import org.junit.Test
|
|
12
|
+
import org.junit.runner.RunWith
|
|
13
|
+
import java.io.File
|
|
14
|
+
import java.nio.ByteBuffer
|
|
15
|
+
import java.nio.ByteOrder
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Regression coverage for Android range processing where the final PCM bytes,
|
|
19
|
+
* returned metadata, and WAV headers must all describe the post-conversion data.
|
|
20
|
+
*/
|
|
21
|
+
@RunWith(AndroidJUnit4::class)
|
|
22
|
+
class AudioFinalMetadataContractInstrumentedTest {
|
|
23
|
+
private lateinit var context: Context
|
|
24
|
+
private lateinit var filesDir: File
|
|
25
|
+
private lateinit var audioProcessor: AudioProcessor
|
|
26
|
+
|
|
27
|
+
@Before
|
|
28
|
+
fun setUp() {
|
|
29
|
+
context = InstrumentationRegistry.getInstrumentation().targetContext
|
|
30
|
+
filesDir = context.filesDir
|
|
31
|
+
audioProcessor = AudioProcessor(filesDir)
|
|
32
|
+
copyAssetToFilesDir("chorus.wav")
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@After
|
|
36
|
+
fun tearDown() {
|
|
37
|
+
filesDir.listFiles()?.forEach { file ->
|
|
38
|
+
if (file.name.startsWith("final_metadata_contract_") || file.name == "chorus.wav") {
|
|
39
|
+
file.delete()
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@Test
|
|
45
|
+
fun loadAudioRange_returnsMetadataFromFinalConvertedWavBytes() {
|
|
46
|
+
val audioData = audioProcessor.loadAudioRange(
|
|
47
|
+
fileUri = File(filesDir, "chorus.wav").absolutePath,
|
|
48
|
+
startTimeMs = 0,
|
|
49
|
+
endTimeMs = ONE_SECOND_MS,
|
|
50
|
+
config = DecodingConfig(
|
|
51
|
+
targetSampleRate = TARGET_SAMPLE_RATE,
|
|
52
|
+
targetChannels = TARGET_CHANNELS,
|
|
53
|
+
targetBitDepth = TARGET_BIT_DEPTH,
|
|
54
|
+
normalizeAudio = false
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
val converted = requireNotNull(audioData) { "Audio range should load" }
|
|
59
|
+
val bytesPerTargetFrame = TARGET_CHANNELS * BYTES_PER_TARGET_SAMPLE
|
|
60
|
+
val finalFrameCount = converted.data.size / bytesPerTargetFrame
|
|
61
|
+
val durationFromFinalBytes = finalFrameCount * 1_000L / TARGET_SAMPLE_RATE
|
|
62
|
+
|
|
63
|
+
assertEquals("sampleRate should describe final converted bytes", TARGET_SAMPLE_RATE, converted.sampleRate)
|
|
64
|
+
assertEquals("channels should describe final converted bytes", TARGET_CHANNELS, converted.channels)
|
|
65
|
+
assertEquals("bitDepth should describe final converted bytes", TARGET_BIT_DEPTH, converted.bitDepth)
|
|
66
|
+
assertEquals("final PCM data must end on a target frame boundary", 0, converted.data.size % bytesPerTargetFrame)
|
|
67
|
+
assertEquals(
|
|
68
|
+
"duration should be derived from actual final PCM bytes",
|
|
69
|
+
durationFromFinalBytes,
|
|
70
|
+
converted.durationMs
|
|
71
|
+
)
|
|
72
|
+
assertTrue(
|
|
73
|
+
"duration should remain close to requested range: ${converted.durationMs}ms",
|
|
74
|
+
kotlin.math.abs(converted.durationMs - ONE_SECOND_MS) <= 25
|
|
75
|
+
)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
@Test
|
|
79
|
+
fun loadAudioRange_alignsConvertedWavBytesToTargetFrameSize() {
|
|
80
|
+
val audioData = audioProcessor.loadAudioRange(
|
|
81
|
+
fileUri = File(filesDir, "chorus.wav").absolutePath,
|
|
82
|
+
startTimeMs = 0,
|
|
83
|
+
endTimeMs = ONE_SECOND_MS,
|
|
84
|
+
config = DecodingConfig(
|
|
85
|
+
targetSampleRate = TARGET_SAMPLE_RATE,
|
|
86
|
+
targetChannels = TARGET_CHANNELS,
|
|
87
|
+
targetBitDepth = TARGET_BIT_DEPTH,
|
|
88
|
+
normalizeAudio = false
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
val converted = requireNotNull(audioData) { "Audio range should load" }
|
|
93
|
+
val bytesPerTargetFrame = TARGET_CHANNELS * BYTES_PER_TARGET_SAMPLE
|
|
94
|
+
|
|
95
|
+
assertEquals("final PCM data must end on a target frame boundary", 0, converted.data.size % bytesPerTargetFrame)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
@Test
|
|
99
|
+
fun trimAudio_writesWavHeaderFromFinalConvertedBytes() {
|
|
100
|
+
val outputFileName = "final_metadata_contract_processor_trim.wav"
|
|
101
|
+
val trimmed = audioProcessor.trimAudio(
|
|
102
|
+
fileUri = File(filesDir, "chorus.wav").absolutePath,
|
|
103
|
+
startTimeMs = 0,
|
|
104
|
+
endTimeMs = ONE_SECOND_MS,
|
|
105
|
+
config = DecodingConfig(
|
|
106
|
+
targetSampleRate = TARGET_SAMPLE_RATE,
|
|
107
|
+
targetChannels = TARGET_CHANNELS,
|
|
108
|
+
targetBitDepth = TARGET_BIT_DEPTH,
|
|
109
|
+
normalizeAudio = false
|
|
110
|
+
),
|
|
111
|
+
outputFileName = outputFileName
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
requireNotNull(trimmed) { "Trimmed audio should be returned" }
|
|
115
|
+
val header = readWavHeader(File(filesDir, outputFileName))
|
|
116
|
+
|
|
117
|
+
assertEquals("WAV header sample rate should be target sample rate", TARGET_SAMPLE_RATE, header.sampleRate)
|
|
118
|
+
assertEquals("WAV header channels should be target channels", TARGET_CHANNELS, header.channels)
|
|
119
|
+
assertEquals("WAV header bit depth should be target bit depth", TARGET_BIT_DEPTH, header.bitDepth)
|
|
120
|
+
assertEquals("WAV data chunk should match returned final PCM bytes", trimmed.data.size, header.dataSize)
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
@Test
|
|
124
|
+
fun audioTrimmer_honorsJsNumberOutputFormatWhenWritingWavHeader() {
|
|
125
|
+
val trimmer = AudioTrimmer(context, AudioFileHandler(filesDir))
|
|
126
|
+
val result = trimmer.trimAudio(
|
|
127
|
+
fileUri = Uri.fromFile(File(filesDir, "chorus.wav")).toString(),
|
|
128
|
+
startTimeMs = 0,
|
|
129
|
+
endTimeMs = ONE_SECOND_MS,
|
|
130
|
+
outputFileName = "final_metadata_contract_audio_trimmer",
|
|
131
|
+
outputFormat = mapOf(
|
|
132
|
+
"format" to "wav",
|
|
133
|
+
"sampleRate" to TARGET_SAMPLE_RATE.toDouble(),
|
|
134
|
+
"channels" to TARGET_CHANNELS.toDouble(),
|
|
135
|
+
"bitDepth" to TARGET_BIT_DEPTH.toDouble()
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
val outputPath = result["uri"] as String
|
|
140
|
+
val header = readWavHeader(File(outputPath))
|
|
141
|
+
|
|
142
|
+
assertEquals("Double sampleRate option should drive WAV header", TARGET_SAMPLE_RATE, header.sampleRate)
|
|
143
|
+
assertEquals("Double channels option should drive WAV header", TARGET_CHANNELS, header.channels)
|
|
144
|
+
assertEquals("Double bitDepth option should drive WAV header", TARGET_BIT_DEPTH, header.bitDepth)
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private fun copyAssetToFilesDir(fileName: String) {
|
|
148
|
+
context.assets.open(fileName).use { input ->
|
|
149
|
+
File(filesDir, fileName).outputStream().use { output ->
|
|
150
|
+
input.copyTo(output)
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
private fun readWavHeader(file: File): WavHeader {
|
|
156
|
+
assertTrue("WAV file should exist: ${file.absolutePath}", file.exists())
|
|
157
|
+
val bytes = file.inputStream().use { it.readNBytes(44) }
|
|
158
|
+
assertEquals("RIFF", String(bytes.sliceArray(0..3)))
|
|
159
|
+
assertEquals("WAVE", String(bytes.sliceArray(8..11)))
|
|
160
|
+
assertEquals("data", String(bytes.sliceArray(36..39)))
|
|
161
|
+
|
|
162
|
+
return WavHeader(
|
|
163
|
+
channels = bytes.shortAt(22),
|
|
164
|
+
sampleRate = bytes.intAt(24),
|
|
165
|
+
bitDepth = bytes.shortAt(34),
|
|
166
|
+
dataSize = bytes.intAt(40)
|
|
167
|
+
)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
private fun ByteArray.shortAt(offset: Int): Int =
|
|
171
|
+
ByteBuffer.wrap(this, offset, 2).order(ByteOrder.LITTLE_ENDIAN).short.toInt()
|
|
172
|
+
|
|
173
|
+
private fun ByteArray.intAt(offset: Int): Int =
|
|
174
|
+
ByteBuffer.wrap(this, offset, 4).order(ByteOrder.LITTLE_ENDIAN).int
|
|
175
|
+
|
|
176
|
+
private data class WavHeader(
|
|
177
|
+
val channels: Int,
|
|
178
|
+
val sampleRate: Int,
|
|
179
|
+
val bitDepth: Int,
|
|
180
|
+
val dataSize: Int
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
companion object {
|
|
184
|
+
private const val ONE_SECOND_MS = 1_000L
|
|
185
|
+
private const val TARGET_SAMPLE_RATE = 16_000
|
|
186
|
+
private const val TARGET_CHANNELS = 2
|
|
187
|
+
private const val TARGET_BIT_DEPTH = 16
|
|
188
|
+
private const val BYTES_PER_TARGET_SAMPLE = TARGET_BIT_DEPTH / 8
|
|
189
|
+
}
|
|
190
|
+
}
|
package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt
CHANGED
|
@@ -2,9 +2,6 @@ package net.siteed.audiostudio
|
|
|
2
2
|
|
|
3
3
|
import android.Manifest
|
|
4
4
|
import android.content.Context
|
|
5
|
-
import android.media.AudioFormat
|
|
6
|
-
import android.media.AudioManager
|
|
7
|
-
import android.media.AudioTrack
|
|
8
5
|
import android.os.Bundle
|
|
9
6
|
import androidx.test.ext.junit.runners.AndroidJUnit4
|
|
10
7
|
import androidx.test.platform.app.InstrumentationRegistry
|
|
@@ -17,6 +14,8 @@ import org.junit.Rule
|
|
|
17
14
|
import org.junit.Test
|
|
18
15
|
import org.junit.runner.RunWith
|
|
19
16
|
import java.io.File
|
|
17
|
+
import java.nio.ByteBuffer
|
|
18
|
+
import java.nio.ByteOrder
|
|
20
19
|
import java.util.concurrent.CountDownLatch
|
|
21
20
|
import java.util.concurrent.TimeUnit
|
|
22
21
|
import kotlin.math.sin
|
|
@@ -363,44 +362,23 @@ class AudioRecorderInstrumentedTest {
|
|
|
363
362
|
}
|
|
364
363
|
|
|
365
364
|
@Test
|
|
366
|
-
fun
|
|
367
|
-
//
|
|
368
|
-
//
|
|
369
|
-
|
|
370
|
-
//
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
365
|
+
fun testGeneratedToneAnalysis_verifiesAudioContentFeatures() {
|
|
366
|
+
// Speaker-to-microphone loopback is device/environment dependent and flaky:
|
|
367
|
+
// volume, routing, echo cancellation, and physical placement can all turn a
|
|
368
|
+
// valid recorder run into near-silence. Keep recorder coverage in the
|
|
369
|
+
// lifecycle/file tests above, validate the physical mic path via the
|
|
370
|
+
// playground CDP/manual recorder flow, and verify tone analysis here
|
|
371
|
+
// with deterministic PCM.
|
|
372
|
+
val sampleRate = 44100
|
|
373
|
+
val tonePcm = generateTonePcm(
|
|
374
|
+
frequency = 1000.0,
|
|
375
|
+
durationMs = 1000,
|
|
376
|
+
sampleRate = sampleRate
|
|
377
377
|
)
|
|
378
|
-
|
|
379
|
-
// Start recording
|
|
380
|
-
startRecordingSync(recordingOptions)
|
|
381
|
-
|
|
382
|
-
// Play a 1kHz tone for 1 second
|
|
383
|
-
playTone(1000.0, 1000)
|
|
384
|
-
|
|
385
|
-
// Stop recording
|
|
386
|
-
val result = stopRecordingSync()
|
|
387
|
-
|
|
388
|
-
// Load and analyze the recorded file
|
|
389
|
-
val fileUri = result["fileUri"] as String
|
|
390
|
-
val audioFile = when {
|
|
391
|
-
fileUri.startsWith("file://") -> File(java.net.URI(fileUri))
|
|
392
|
-
fileUri.startsWith("file:") -> File(java.net.URI(fileUri))
|
|
393
|
-
else -> File(fileUri)
|
|
394
|
-
}
|
|
395
|
-
|
|
378
|
+
|
|
396
379
|
val audioProcessor = AudioProcessor(filesDir)
|
|
397
|
-
val audioData = audioProcessor.loadAudioFromAnyFormat(audioFile.absolutePath, null)
|
|
398
|
-
|
|
399
|
-
assertNotNull("Should load audio data", audioData)
|
|
400
|
-
|
|
401
|
-
// Analyze the audio to verify it contains the tone
|
|
402
380
|
val config = RecordingConfig(
|
|
403
|
-
sampleRate =
|
|
381
|
+
sampleRate = sampleRate,
|
|
404
382
|
channels = 1,
|
|
405
383
|
encoding = "pcm_16bit",
|
|
406
384
|
features = mapOf(
|
|
@@ -410,22 +388,18 @@ class AudioRecorderInstrumentedTest {
|
|
|
410
388
|
)
|
|
411
389
|
)
|
|
412
390
|
|
|
413
|
-
val analysis = audioProcessor.processAudioData(
|
|
391
|
+
val analysis = audioProcessor.processAudioData(tonePcm, config)
|
|
414
392
|
|
|
415
|
-
// Verify we captured audio with energy (not silence)
|
|
416
393
|
val dataPoints = analysis.dataPoints
|
|
417
394
|
assertTrue("Should have data points", dataPoints.isNotEmpty())
|
|
418
395
|
|
|
419
|
-
// Check that we have non-zero RMS values indicating captured audio
|
|
420
396
|
val avgRms = dataPoints.map { it.rms }.average()
|
|
421
|
-
assertTrue("Average RMS should indicate
|
|
397
|
+
assertTrue("Average RMS should indicate deterministic tone energy", avgRms > 0.01)
|
|
422
398
|
|
|
423
|
-
// Check that features were extracted
|
|
424
399
|
val firstPointWithFeatures = dataPoints.firstOrNull { it.features != null }
|
|
425
400
|
assertNotNull("Should have at least one data point with features", firstPointWithFeatures)
|
|
426
401
|
|
|
427
402
|
// The spectral centroid of a 1kHz tone should be around 1000Hz
|
|
428
|
-
// Note: spectral centroid can be affected by recording quality and background noise
|
|
429
403
|
val spectralCentroids = dataPoints.mapNotNull { it.features?.spectralCentroid }.filter { it > 0 }
|
|
430
404
|
assertTrue("Should have spectral centroid values", spectralCentroids.isNotEmpty())
|
|
431
405
|
val avgSpectralCentroid = spectralCentroids.average()
|
|
@@ -433,10 +407,10 @@ class AudioRecorderInstrumentedTest {
|
|
|
433
407
|
// Log the actual value for debugging
|
|
434
408
|
println("Average spectral centroid: $avgSpectralCentroid Hz")
|
|
435
409
|
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
410
|
+
assertTrue(
|
|
411
|
+
"Spectral centroid should indicate tonal content (was $avgSpectralCentroid Hz)",
|
|
412
|
+
avgSpectralCentroid > 700 && avgSpectralCentroid < 1300
|
|
413
|
+
)
|
|
440
414
|
}
|
|
441
415
|
|
|
442
416
|
// ========== Helper Methods ==========
|
|
@@ -499,43 +473,15 @@ class AudioRecorderInstrumentedTest {
|
|
|
499
473
|
return map
|
|
500
474
|
}
|
|
501
475
|
|
|
502
|
-
private fun
|
|
503
|
-
val sampleRate = 44100
|
|
476
|
+
private fun generateTonePcm(frequency: Double, durationMs: Int, sampleRate: Int): ByteArray {
|
|
504
477
|
val numSamples = (sampleRate * durationMs / 1000.0).toInt()
|
|
505
|
-
val
|
|
506
|
-
|
|
507
|
-
// Generate sine wave
|
|
478
|
+
val buffer = ByteBuffer.allocate(numSamples * 2).order(ByteOrder.LITTLE_ENDIAN)
|
|
479
|
+
|
|
508
480
|
for (i in 0 until numSamples) {
|
|
509
481
|
val angle = 2.0 * Math.PI * i * frequency / sampleRate
|
|
510
|
-
|
|
482
|
+
buffer.putShort((sin(angle) * Short.MAX_VALUE * 0.5).toInt().toShort())
|
|
511
483
|
}
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
val audioTrack = AudioTrack.Builder()
|
|
515
|
-
.setAudioAttributes(
|
|
516
|
-
android.media.AudioAttributes.Builder()
|
|
517
|
-
.setUsage(android.media.AudioAttributes.USAGE_MEDIA)
|
|
518
|
-
.setContentType(android.media.AudioAttributes.CONTENT_TYPE_MUSIC)
|
|
519
|
-
.build()
|
|
520
|
-
)
|
|
521
|
-
.setAudioFormat(
|
|
522
|
-
AudioFormat.Builder()
|
|
523
|
-
.setSampleRate(sampleRate)
|
|
524
|
-
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
|
|
525
|
-
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
|
|
526
|
-
.build()
|
|
527
|
-
)
|
|
528
|
-
.setBufferSizeInBytes(samples.size * 2)
|
|
529
|
-
.setTransferMode(AudioTrack.MODE_STATIC)
|
|
530
|
-
.build()
|
|
531
|
-
|
|
532
|
-
audioTrack.write(samples, 0, samples.size)
|
|
533
|
-
audioTrack.play()
|
|
534
|
-
|
|
535
|
-
// Wait for playback to complete
|
|
536
|
-
Thread.sleep(durationMs.toLong())
|
|
537
|
-
|
|
538
|
-
audioTrack.stop()
|
|
539
|
-
audioTrack.release()
|
|
484
|
+
|
|
485
|
+
return buffer.array()
|
|
540
486
|
}
|
|
541
|
-
}
|
|
487
|
+
}
|
|
@@ -9,6 +9,7 @@ import androidx.test.rule.GrantPermissionRule
|
|
|
9
9
|
import expo.modules.kotlin.Promise
|
|
10
10
|
import org.junit.After
|
|
11
11
|
import org.junit.Assert.*
|
|
12
|
+
import org.junit.Assume.assumeTrue
|
|
12
13
|
import org.junit.Before
|
|
13
14
|
import org.junit.Rule
|
|
14
15
|
import org.junit.Test
|
|
@@ -116,18 +117,33 @@ class AudioRecorderPerformanceInstrumentedTest {
|
|
|
116
117
|
|
|
117
118
|
@Test
|
|
118
119
|
fun measureStopTime_5minutes() {
|
|
120
|
+
assumeLongPerformanceTestsEnabled()
|
|
119
121
|
runPerformanceTest(300_000L, "5 minute recording")
|
|
120
122
|
}
|
|
121
123
|
|
|
122
124
|
@Test
|
|
123
125
|
fun measureStopTime_10minutes() {
|
|
126
|
+
assumeLongPerformanceTestsEnabled()
|
|
124
127
|
runPerformanceTest(600_000L, "10 minute recording")
|
|
125
128
|
}
|
|
126
129
|
|
|
127
130
|
@Test
|
|
128
131
|
fun measureStopTime_15minutes() {
|
|
132
|
+
assumeLongPerformanceTestsEnabled()
|
|
129
133
|
runPerformanceTest(900_000L, "15 minute recording")
|
|
130
134
|
}
|
|
135
|
+
|
|
136
|
+
private fun assumeLongPerformanceTestsEnabled() {
|
|
137
|
+
val enabled = InstrumentationRegistry.getArguments()
|
|
138
|
+
.getString("runLongPerformanceTests")
|
|
139
|
+
?.equals("true", ignoreCase = true) == true
|
|
140
|
+
|
|
141
|
+
assumeTrue(
|
|
142
|
+
"Long physical-device performance benchmarks are opt-in. " +
|
|
143
|
+
"Run with -Pandroid.testInstrumentationRunnerArguments.runLongPerformanceTests=true",
|
|
144
|
+
enabled
|
|
145
|
+
)
|
|
146
|
+
}
|
|
131
147
|
|
|
132
148
|
private fun runPerformanceTest(recordingDurationMs: Long, testName: String) {
|
|
133
149
|
val recordingOptions = mapOf(
|
|
@@ -231,4 +247,4 @@ class AudioRecorderPerformanceInstrumentedTest {
|
|
|
231
247
|
}
|
|
232
248
|
return map
|
|
233
249
|
}
|
|
234
|
-
}
|
|
250
|
+
}
|