@siteed/audio-studio 3.2.0-beta.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +356 -5
- package/android/src/main/java/net/siteed/audiostudio/AudioStreamDecoder.kt +306 -94
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +39 -6
- package/build/cjs/errors/AudioStreamError.js +9 -0
- package/build/cjs/errors/AudioStreamError.js.map +1 -1
- package/build/cjs/errors/AudioStreamError.test.js +22 -1
- package/build/cjs/errors/AudioStreamError.test.js.map +1 -1
- package/build/cjs/streamAudioData.js +99 -32
- package/build/cjs/streamAudioData.js.map +1 -1
- package/build/cjs/utils/audioProcessing.js +14 -10
- package/build/cjs/utils/audioProcessing.js.map +1 -1
- package/build/esm/errors/AudioStreamError.js +9 -0
- package/build/esm/errors/AudioStreamError.js.map +1 -1
- package/build/esm/errors/AudioStreamError.test.js +22 -1
- package/build/esm/errors/AudioStreamError.test.js.map +1 -1
- package/build/esm/streamAudioData.js +99 -32
- package/build/esm/streamAudioData.js.map +1 -1
- package/build/esm/utils/audioProcessing.js +14 -10
- package/build/esm/utils/audioProcessing.js.map +1 -1
- package/build/types/errors/AudioStreamError.d.ts.map +1 -1
- package/build/types/streamAudioData.d.ts +5 -0
- package/build/types/streamAudioData.d.ts.map +1 -1
- package/build/types/utils/audioProcessing.d.ts +2 -2
- package/build/types/utils/audioProcessing.d.ts.map +1 -1
- package/ios/AudioStreamDecoder.swift +191 -100
- package/ios/AudioStudioModule.swift +48 -9
- package/package.json +163 -146
- package/scripts/README.md +58 -0
- package/src/errors/AudioStreamError.test.ts +29 -2
- package/src/errors/AudioStreamError.ts +14 -0
- package/src/streamAudioData.ts +146 -42
- package/src/utils/audioProcessing.ts +25 -14
- package/android/src/androidTest/assets/chorus.wav +0 -0
- package/android/src/androidTest/assets/jfk.wav +0 -0
- package/android/src/androidTest/assets/osr_us_000_0010_8k.wav +0 -0
- package/android/src/androidTest/assets/recorder_hello_world.wav +0 -0
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioFinalMetadataContractInstrumentedTest.kt +0 -190
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioProcessorInstrumentedTest.kt +0 -197
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderInstrumentedTest.kt +0 -487
- package/android/src/androidTest/java/net/siteed/audiostudio/AudioRecorderPerformanceInstrumentedTest.kt +0 -250
- package/android/src/androidTest/java/net/siteed/audiostudio/OpusRangeDecodeRegressionInstrumentedTest.kt +0 -186
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/AudioFocusStrategyIntegrationTest.kt +0 -332
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/BufferDurationIntegrationTest.kt +0 -324
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/CompressedOnlyOutputTest.kt +0 -253
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/DeviceDisconnectionFallbackTest.kt +0 -218
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/EventEmissionIntervalTest.kt +0 -120
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/M4aFormatTest.kt +0 -345
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/OutputControlIntegrationTest.kt +0 -340
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/PcmStreamingDurationTest.kt +0 -252
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/README.md +0 -95
- package/android/src/androidTest/java/net/siteed/audiostudio/integration/run_integration_tests.sh +0 -43
- package/android/src/test/java/net/siteed/audiostudio/AndroidCallStateTest.kt +0 -37
- package/android/src/test/java/net/siteed/audiostudio/AndroidEventEmitterTest.kt +0 -28
- package/android/src/test/java/net/siteed/audiostudio/AudioFileHandlerTest.kt +0 -279
- package/android/src/test/java/net/siteed/audiostudio/AudioFocusStrategyTest.kt +0 -249
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatTest.kt +0 -151
- package/android/src/test/java/net/siteed/audiostudio/AudioFormatUtilsTest.kt +0 -273
- package/android/src/test/java/net/siteed/audiostudio/DeviceDisconnectionFallbackUnitTest.kt +0 -140
- package/android/src/test/java/net/siteed/audiostudio/InterruptionAutoResumePolicyTest.kt +0 -49
- package/android/src/test/resources/chorus.wav +0 -0
- package/android/src/test/resources/generate_test_audio.py +0 -94
- package/android/src/test/resources/jfk.wav +0 -0
- package/android/src/test/resources/osr_us_000_0010_8k.wav +0 -0
- package/android/src/test/resources/recorder_hello_world.wav +0 -0
- package/ios/AudioStudioTests/AudioFileHandlerTests.swift +0 -338
- package/ios/AudioStudioTests/AudioFormatUtilsTests.swift +0 -331
- package/ios/AudioStudioTests/AudioStreamDecoderTests.swift +0 -128
- package/ios/AudioStudioTests/AudioTestHelpers.swift +0 -130
- package/ios/AudioStudioTests/CompressedOnlyOutputTests.swift +0 -334
- package/ios/AudioStudioTests/EventEmissionIntervalTests.swift +0 -105
- package/ios/AudioStudioTests/Info.plist +0 -22
- package/ios/AudioStudioTests/README.md +0 -39
- package/ios/AudioStudioTests/SimpleAudioTest.swift +0 -98
- package/ios/AudioStudioTests/TestAudioGenerator.swift +0 -75
- package/ios/tests/README.md +0 -41
- package/ios/tests/integration/buffer_and_fallback_test.swift +0 -178
- package/ios/tests/integration/buffer_duration_test.swift +0 -185
- package/ios/tests/integration/compressed_only_output_test.swift +0 -271
- package/ios/tests/integration/output_control_test.swift +0 -322
- package/ios/tests/integration/run_integration_tests.sh +0 -37
- package/ios/tests/opus_support_test_macos.swift +0 -154
- package/ios/tests/standalone/audio_processing_test.swift +0 -144
- package/ios/tests/standalone/audio_recording_test.swift +0 -277
- package/ios/tests/standalone/audio_streaming_test.swift +0 -249
- package/ios/tests/standalone/standalone_test.swift +0 -144
package/src/streamAudioData.ts
CHANGED
|
@@ -39,6 +39,11 @@ export interface StreamAudioDataOptions {
|
|
|
39
39
|
maxChunkBytes?: number
|
|
40
40
|
/** Max chunks queued in native before JS ack pauses decode (default: 4). */
|
|
41
41
|
maxBufferedChunks?: number
|
|
42
|
+
/**
|
|
43
|
+
* Optional timeout for a chunk acknowledgement while backpressure is active.
|
|
44
|
+
* Undefined/0 disables timeout so long transcription callbacks can run.
|
|
45
|
+
*/
|
|
46
|
+
backpressureTimeoutMs?: number
|
|
42
47
|
/** Output PCM format; only `'float32'` supported today. */
|
|
43
48
|
streamFormat?: 'float32'
|
|
44
49
|
/** Abort the in-flight request. Resolves promise with `cancelled: true`. */
|
|
@@ -151,15 +156,15 @@ function toFloat32(samples: unknown): Float32Array {
|
|
|
151
156
|
}
|
|
152
157
|
if (typeof samples === 'string') {
|
|
153
158
|
const bytes = base64ToBytes(samples)
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
return new Float32Array(
|
|
159
|
+
const floatLength = Math.floor(bytes.byteLength / 4)
|
|
160
|
+
if (bytes.byteOffset % 4 === 0) {
|
|
161
|
+
return new Float32Array(bytes.buffer, bytes.byteOffset, floatLength)
|
|
162
|
+
}
|
|
163
|
+
const sliced = bytes.buffer.slice(
|
|
164
|
+
bytes.byteOffset,
|
|
165
|
+
bytes.byteOffset + bytes.byteLength
|
|
166
|
+
)
|
|
167
|
+
return new Float32Array(sliced, 0, Math.floor(sliced.byteLength / 4))
|
|
163
168
|
}
|
|
164
169
|
if (samples && typeof samples === 'object' && 'length' in samples) {
|
|
165
170
|
// ArrayLike fallback
|
|
@@ -178,7 +183,13 @@ function base64ToBytes(input: string): Uint8Array {
|
|
|
178
183
|
const g = globalThis as { atob?: (s: string) => string }
|
|
179
184
|
if (typeof g.atob !== 'function') {
|
|
180
185
|
// Buffer path for environments without atob; React Native has atob.
|
|
181
|
-
const Buf = (
|
|
186
|
+
const Buf = (
|
|
187
|
+
globalThis as {
|
|
188
|
+
Buffer?: {
|
|
189
|
+
from: (input: string, encoding: string) => Uint8Array
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
).Buffer
|
|
182
193
|
if (Buf) return new Uint8Array(Buf.from(input, 'base64'))
|
|
183
194
|
return new Uint8Array(0)
|
|
184
195
|
}
|
|
@@ -188,35 +199,70 @@ function base64ToBytes(input: string): Uint8Array {
|
|
|
188
199
|
return out
|
|
189
200
|
}
|
|
190
201
|
|
|
202
|
+
function rejectInvalidRange(message: string): never {
|
|
203
|
+
throw new AudioStreamError({
|
|
204
|
+
code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
|
|
205
|
+
message,
|
|
206
|
+
recoverable: false,
|
|
207
|
+
})
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function assertPositiveFiniteOption(
|
|
211
|
+
value: number | undefined,
|
|
212
|
+
name: string,
|
|
213
|
+
integer = false
|
|
214
|
+
): void {
|
|
215
|
+
if (value === undefined) return
|
|
216
|
+
if (
|
|
217
|
+
!Number.isFinite(value) ||
|
|
218
|
+
value <= 0 ||
|
|
219
|
+
(integer && !Number.isInteger(value))
|
|
220
|
+
) {
|
|
221
|
+
rejectInvalidRange(
|
|
222
|
+
`${name} must be a positive${integer ? ' integer' : ''}`
|
|
223
|
+
)
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
191
227
|
function validateOptions(options: StreamAudioDataOptions): void {
|
|
192
228
|
if (!options.fileUri) {
|
|
193
|
-
|
|
194
|
-
code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
|
|
195
|
-
message: 'fileUri is required',
|
|
196
|
-
recoverable: false,
|
|
197
|
-
})
|
|
229
|
+
rejectInvalidRange('fileUri is required')
|
|
198
230
|
}
|
|
199
231
|
if (
|
|
200
232
|
options.startTimeMs !== undefined &&
|
|
201
233
|
options.endTimeMs !== undefined &&
|
|
202
234
|
options.startTimeMs >= options.endTimeMs
|
|
203
235
|
) {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
236
|
+
rejectInvalidRange('startTimeMs must be < endTimeMs')
|
|
237
|
+
}
|
|
238
|
+
if (options.endTimeMs !== undefined && options.endTimeMs <= 0) {
|
|
239
|
+
rejectInvalidRange('endTimeMs must be > 0')
|
|
240
|
+
}
|
|
241
|
+
if (options.startTimeMs !== undefined && options.startTimeMs < 0) {
|
|
242
|
+
rejectInvalidRange('startTimeMs must be >= 0')
|
|
209
243
|
}
|
|
210
244
|
if (
|
|
211
245
|
options.chunkDurationMs !== undefined &&
|
|
212
246
|
(options.chunkDurationMs < 10 || options.chunkDurationMs > 60000)
|
|
213
247
|
) {
|
|
214
|
-
|
|
215
|
-
code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
|
|
216
|
-
message: 'chunkDurationMs must be in [10, 60000]',
|
|
217
|
-
recoverable: false,
|
|
218
|
-
})
|
|
248
|
+
rejectInvalidRange('chunkDurationMs must be in [10, 60000]')
|
|
219
249
|
}
|
|
250
|
+
if (
|
|
251
|
+
options.backpressureTimeoutMs !== undefined &&
|
|
252
|
+
options.backpressureTimeoutMs < 0
|
|
253
|
+
) {
|
|
254
|
+
rejectInvalidRange('backpressureTimeoutMs must be >= 0')
|
|
255
|
+
}
|
|
256
|
+
assertPositiveFiniteOption(options.targetSampleRate, 'targetSampleRate')
|
|
257
|
+
assertPositiveFiniteOption(options.sampleRate, 'sampleRate')
|
|
258
|
+
assertPositiveFiniteOption(options.channels, 'channels', true)
|
|
259
|
+
assertPositiveFiniteOption(
|
|
260
|
+
options.maxBufferedChunks,
|
|
261
|
+
'maxBufferedChunks',
|
|
262
|
+
true
|
|
263
|
+
)
|
|
264
|
+
assertPositiveFiniteOption(options.maxChunkBytes, 'maxChunkBytes', true)
|
|
265
|
+
|
|
220
266
|
if (
|
|
221
267
|
options.streamFormat !== undefined &&
|
|
222
268
|
options.streamFormat !== 'float32'
|
|
@@ -315,6 +361,7 @@ async function streamAudioDataNative(
|
|
|
315
361
|
let processingChain: Promise<void> = Promise.resolve()
|
|
316
362
|
let settled = false
|
|
317
363
|
let abortListener: (() => void) | null = null
|
|
364
|
+
let lastProgress: StreamAudioDataProgress | null = null
|
|
318
365
|
|
|
319
366
|
const finalize = () => {
|
|
320
367
|
for (const sub of subs) {
|
|
@@ -413,6 +460,7 @@ async function streamAudioDataNative(
|
|
|
413
460
|
emitter.addListener(PROGRESS_EVENT, (raw: unknown) => {
|
|
414
461
|
const evt = raw as StreamAudioDataProgress
|
|
415
462
|
if (evt.requestId !== requestId) return
|
|
463
|
+
lastProgress = evt
|
|
416
464
|
callbacks.onProgress!(evt)
|
|
417
465
|
})
|
|
418
466
|
)
|
|
@@ -435,7 +483,10 @@ async function streamAudioDataNative(
|
|
|
435
483
|
.then(() => {
|
|
436
484
|
settle(() => {}, 'resolve', {
|
|
437
485
|
requestId,
|
|
438
|
-
durationMs:
|
|
486
|
+
durationMs:
|
|
487
|
+
evt.durationMs > 0
|
|
488
|
+
? evt.durationMs
|
|
489
|
+
: (lastProgress?.durationMs ?? 0),
|
|
439
490
|
sampleRate: evt.sampleRate,
|
|
440
491
|
channels: evt.channels,
|
|
441
492
|
chunks: evt.chunks ?? chunkCount,
|
|
@@ -462,7 +513,7 @@ async function streamAudioDataNative(
|
|
|
462
513
|
.then(() => {
|
|
463
514
|
settle(() => {}, 'resolve', {
|
|
464
515
|
requestId,
|
|
465
|
-
durationMs: 0,
|
|
516
|
+
durationMs: lastProgress?.durationMs ?? 0,
|
|
466
517
|
sampleRate:
|
|
467
518
|
options.targetSampleRate ??
|
|
468
519
|
options.sampleRate ??
|
|
@@ -494,7 +545,7 @@ async function streamAudioDataNative(
|
|
|
494
545
|
if (options.signal.aborted) {
|
|
495
546
|
settle(() => {}, 'resolve', {
|
|
496
547
|
requestId,
|
|
497
|
-
durationMs: 0,
|
|
548
|
+
durationMs: lastProgress?.durationMs ?? 0,
|
|
498
549
|
sampleRate:
|
|
499
550
|
options.targetSampleRate ?? options.sampleRate ?? 0,
|
|
500
551
|
channels: options.channels ?? 1,
|
|
@@ -547,8 +598,8 @@ async function streamAudioDataWeb(
|
|
|
547
598
|
try {
|
|
548
599
|
const processed = await processAudioBuffer({
|
|
549
600
|
fileUri: options.fileUri,
|
|
550
|
-
targetSampleRate: options.targetSampleRate
|
|
551
|
-
targetChannels: options.channels
|
|
601
|
+
targetSampleRate: options.targetSampleRate,
|
|
602
|
+
targetChannels: options.channels,
|
|
552
603
|
normalizeAudio: options.normalizeAudio ?? true,
|
|
553
604
|
startTimeMs: options.startTimeMs,
|
|
554
605
|
endTimeMs: options.endTimeMs,
|
|
@@ -559,16 +610,34 @@ async function streamAudioDataWeb(
|
|
|
559
610
|
const durationMs = processed.durationMs
|
|
560
611
|
const chunkDurationMs = options.chunkDurationMs ?? 1000
|
|
561
612
|
let samplesPerChunk = Math.max(
|
|
562
|
-
|
|
613
|
+
channels,
|
|
563
614
|
Math.floor((chunkDurationMs / 1000) * sampleRate) * channels
|
|
564
615
|
)
|
|
565
616
|
if (options.maxChunkBytes) {
|
|
566
|
-
|
|
567
|
-
|
|
617
|
+
// Round down to a multiple of `channels` so we never split an
|
|
618
|
+
// interleaved frame across two chunks (that would produce a
|
|
619
|
+
// fractional `startSample` for the next chunk).
|
|
620
|
+
const rawMax = Math.floor(options.maxChunkBytes / 4)
|
|
621
|
+
const maxSamples = Math.max(
|
|
622
|
+
channels,
|
|
623
|
+
Math.floor(rawMax / channels) * channels
|
|
624
|
+
)
|
|
625
|
+
samplesPerChunk = Math.max(
|
|
626
|
+
channels,
|
|
627
|
+
Math.min(samplesPerChunk, maxSamples)
|
|
628
|
+
)
|
|
568
629
|
}
|
|
569
630
|
|
|
570
|
-
const all = sanitizeFloat32(
|
|
631
|
+
const all = sanitizeFloat32(
|
|
632
|
+
interleaveBuffer(processed.buffer, channels),
|
|
633
|
+
options.normalizeAudio ?? true
|
|
634
|
+
)
|
|
571
635
|
|
|
636
|
+
// Chunk timestamps are absolute (range start + offset) on every
|
|
637
|
+
// platform; progress is *elapsed within the range* so the
|
|
638
|
+
// `processedMs / durationMs` fraction stays in [0, 1] regardless of
|
|
639
|
+
// `startTimeMs`. The native decoders use the same split.
|
|
640
|
+
const rangeStartMs = options.startTimeMs ?? 0
|
|
572
641
|
let chunkIndex = 0
|
|
573
642
|
let emittedSamples = 0
|
|
574
643
|
for (let off = 0; off < all.length; off += samplesPerChunk) {
|
|
@@ -587,11 +656,15 @@ async function streamAudioDataWeb(
|
|
|
587
656
|
const slice = all.slice(off, end)
|
|
588
657
|
const startSample = off / channels
|
|
589
658
|
const endSample = end / channels
|
|
659
|
+
const startMs =
|
|
660
|
+
Math.round((startSample / sampleRate) * 1000) + rangeStartMs
|
|
661
|
+
const endMs =
|
|
662
|
+
Math.round((endSample / sampleRate) * 1000) + rangeStartMs
|
|
590
663
|
const chunk: StreamAudioDataChunk = {
|
|
591
664
|
requestId,
|
|
592
665
|
chunkIndex,
|
|
593
|
-
startTimeMs:
|
|
594
|
-
endTimeMs:
|
|
666
|
+
startTimeMs: startMs,
|
|
667
|
+
endTimeMs: endMs,
|
|
595
668
|
durationMs: Math.round(
|
|
596
669
|
((endSample - startSample) / sampleRate) * 1000
|
|
597
670
|
),
|
|
@@ -603,11 +676,24 @@ async function streamAudioDataWeb(
|
|
|
603
676
|
isFinal: end >= all.length,
|
|
604
677
|
}
|
|
605
678
|
await callbacks.onChunk(chunk)
|
|
679
|
+
// Resample rounding (Math.ceil in processAudioBuffer) can push
|
|
680
|
+
// elapsed past the source-rate-derived range duration on the tail
|
|
681
|
+
// chunk. Cap so onProgress consumers always see a [0, 1] ratio,
|
|
682
|
+
// matching the native `coerceIn(0, 1)` / `min(1, max(0, …))`
|
|
683
|
+
// clamp.
|
|
684
|
+
const rawElapsedMs = Math.round((endSample / sampleRate) * 1000)
|
|
685
|
+
const elapsedMs =
|
|
686
|
+
durationMs > 0
|
|
687
|
+
? Math.min(rawElapsedMs, durationMs)
|
|
688
|
+
: rawElapsedMs
|
|
606
689
|
callbacks.onProgress?.({
|
|
607
690
|
requestId,
|
|
608
|
-
processedMs:
|
|
691
|
+
processedMs: elapsedMs,
|
|
609
692
|
durationMs,
|
|
610
|
-
progress:
|
|
693
|
+
progress:
|
|
694
|
+
durationMs > 0
|
|
695
|
+
? Math.min(1, Math.max(0, elapsedMs / durationMs))
|
|
696
|
+
: 1,
|
|
611
697
|
emittedChunks: chunkIndex + 1,
|
|
612
698
|
})
|
|
613
699
|
chunkIndex += 1
|
|
@@ -628,10 +714,28 @@ async function streamAudioDataWeb(
|
|
|
628
714
|
}
|
|
629
715
|
}
|
|
630
716
|
|
|
631
|
-
function
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
)
|
|
717
|
+
function interleaveBuffer(buffer: AudioBuffer, channels: number): Float32Array {
|
|
718
|
+
const numCh = Math.max(1, Math.min(channels, buffer.numberOfChannels))
|
|
719
|
+
const framesPerCh = buffer.length
|
|
720
|
+
if (numCh === 1) {
|
|
721
|
+
// Cheap path: clone channel 0 so downstream mutation doesn't touch the
|
|
722
|
+
// underlying AudioBuffer storage.
|
|
723
|
+
return new Float32Array(buffer.getChannelData(0))
|
|
724
|
+
}
|
|
725
|
+
const out = new Float32Array(framesPerCh * numCh)
|
|
726
|
+
const channelData: Float32Array[] = []
|
|
727
|
+
for (let c = 0; c < numCh; c++) {
|
|
728
|
+
channelData.push(buffer.getChannelData(c))
|
|
729
|
+
}
|
|
730
|
+
for (let f = 0; f < framesPerCh; f++) {
|
|
731
|
+
for (let c = 0; c < numCh; c++) {
|
|
732
|
+
out[f * numCh + c] = channelData[c][f]
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
return out
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
function sanitizeFloat32(input: Float32Array, clamp: boolean): Float32Array {
|
|
635
739
|
if (!clamp) {
|
|
636
740
|
// still need NaN/Inf sanitation
|
|
637
741
|
for (let i = 0; i < input.length; i++) {
|
|
@@ -6,8 +6,8 @@ import { ConsoleLike } from '../AudioStudio.types'
|
|
|
6
6
|
export interface ProcessAudioBufferOptions {
|
|
7
7
|
arrayBuffer?: ArrayBuffer
|
|
8
8
|
fileUri?: string
|
|
9
|
-
targetSampleRate
|
|
10
|
-
targetChannels
|
|
9
|
+
targetSampleRate?: number
|
|
10
|
+
targetChannels?: number
|
|
11
11
|
normalizeAudio: boolean
|
|
12
12
|
startTimeMs?: number
|
|
13
13
|
endTimeMs?: number
|
|
@@ -84,9 +84,17 @@ export async function processAudioBuffer({
|
|
|
84
84
|
// Create context at original sample rate first
|
|
85
85
|
ctx =
|
|
86
86
|
audioContext ||
|
|
87
|
-
new (window.AudioContext ||
|
|
87
|
+
new (window.AudioContext ||
|
|
88
|
+
(
|
|
89
|
+
window as unknown as {
|
|
90
|
+
webkitAudioContext?: typeof AudioContext
|
|
91
|
+
}
|
|
92
|
+
).webkitAudioContext)()
|
|
88
93
|
buffer = await ctx.decodeAudioData(audioData)
|
|
89
94
|
|
|
95
|
+
const effectiveTargetSampleRate = targetSampleRate ?? buffer.sampleRate
|
|
96
|
+
const effectiveTargetChannels = targetChannels ?? buffer.numberOfChannels
|
|
97
|
+
|
|
90
98
|
logger?.debug('Decoded audio buffer:', {
|
|
91
99
|
originalChannels: buffer.numberOfChannels,
|
|
92
100
|
originalSampleRate: buffer.sampleRate,
|
|
@@ -109,7 +117,7 @@ export async function processAudioBuffer({
|
|
|
109
117
|
position !== undefined
|
|
110
118
|
? Math.floor(
|
|
111
119
|
(position / bytesPerSample) *
|
|
112
|
-
(buffer.sampleRate /
|
|
120
|
+
(buffer.sampleRate / effectiveTargetSampleRate)
|
|
113
121
|
)
|
|
114
122
|
: startSample
|
|
115
123
|
|
|
@@ -117,11 +125,12 @@ export async function processAudioBuffer({
|
|
|
117
125
|
length !== undefined
|
|
118
126
|
? Math.floor(
|
|
119
127
|
(length / bytesPerSample) *
|
|
120
|
-
(buffer.sampleRate /
|
|
128
|
+
(buffer.sampleRate / effectiveTargetSampleRate)
|
|
121
129
|
)
|
|
122
|
-
: endTimeMs !== undefined
|
|
130
|
+
: endTimeMs !== undefined
|
|
123
131
|
? Math.floor(
|
|
124
|
-
((endTimeMs - startTimeMs) / 1000) *
|
|
132
|
+
((endTimeMs - (startTimeMs ?? 0)) / 1000) *
|
|
133
|
+
buffer.sampleRate
|
|
125
134
|
)
|
|
126
135
|
: buffer.length - adjustedStartSample
|
|
127
136
|
|
|
@@ -130,8 +139,8 @@ export async function processAudioBuffer({
|
|
|
130
139
|
adjustedStartSample,
|
|
131
140
|
samplesNeeded,
|
|
132
141
|
originalSampleRate: buffer.sampleRate,
|
|
133
|
-
targetSampleRate,
|
|
134
|
-
conversionRatio: buffer.sampleRate /
|
|
142
|
+
targetSampleRate: effectiveTargetSampleRate,
|
|
143
|
+
conversionRatio: buffer.sampleRate / effectiveTargetSampleRate,
|
|
135
144
|
expectedDurationMs: (samplesNeeded / buffer.sampleRate) * 1000,
|
|
136
145
|
})
|
|
137
146
|
|
|
@@ -153,9 +162,11 @@ export async function processAudioBuffer({
|
|
|
153
162
|
|
|
154
163
|
// Create offline context for resampling
|
|
155
164
|
const offlineCtx = new OfflineAudioContext(
|
|
156
|
-
|
|
157
|
-
Math.ceil(
|
|
158
|
-
|
|
165
|
+
effectiveTargetChannels,
|
|
166
|
+
Math.ceil(
|
|
167
|
+
(samplesNeeded * effectiveTargetSampleRate) / buffer.sampleRate
|
|
168
|
+
),
|
|
169
|
+
effectiveTargetSampleRate
|
|
159
170
|
)
|
|
160
171
|
|
|
161
172
|
// Create source and connect
|
|
@@ -175,7 +186,7 @@ export async function processAudioBuffer({
|
|
|
175
186
|
|
|
176
187
|
logger?.debug('Final processed audio:', {
|
|
177
188
|
outputSamples: channelData.length,
|
|
178
|
-
outputSampleRate:
|
|
189
|
+
outputSampleRate: effectiveTargetSampleRate,
|
|
179
190
|
durationMs,
|
|
180
191
|
})
|
|
181
192
|
|
|
@@ -184,7 +195,7 @@ export async function processAudioBuffer({
|
|
|
184
195
|
channelData,
|
|
185
196
|
samples: channelData.length,
|
|
186
197
|
durationMs,
|
|
187
|
-
sampleRate:
|
|
198
|
+
sampleRate: effectiveTargetSampleRate,
|
|
188
199
|
channels: processedBuffer.numberOfChannels,
|
|
189
200
|
}
|
|
190
201
|
} catch (error) {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
package net.siteed.audiostudio
|
|
2
|
-
|
|
3
|
-
import android.content.Context
|
|
4
|
-
import android.net.Uri
|
|
5
|
-
import androidx.test.ext.junit.runners.AndroidJUnit4
|
|
6
|
-
import androidx.test.platform.app.InstrumentationRegistry
|
|
7
|
-
import org.junit.After
|
|
8
|
-
import org.junit.Assert.assertEquals
|
|
9
|
-
import org.junit.Assert.assertTrue
|
|
10
|
-
import org.junit.Before
|
|
11
|
-
import org.junit.Test
|
|
12
|
-
import org.junit.runner.RunWith
|
|
13
|
-
import java.io.File
|
|
14
|
-
import java.nio.ByteBuffer
|
|
15
|
-
import java.nio.ByteOrder
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Regression coverage for Android range processing where the final PCM bytes,
|
|
19
|
-
* returned metadata, and WAV headers must all describe the post-conversion data.
|
|
20
|
-
*/
|
|
21
|
-
@RunWith(AndroidJUnit4::class)
|
|
22
|
-
class AudioFinalMetadataContractInstrumentedTest {
|
|
23
|
-
private lateinit var context: Context
|
|
24
|
-
private lateinit var filesDir: File
|
|
25
|
-
private lateinit var audioProcessor: AudioProcessor
|
|
26
|
-
|
|
27
|
-
@Before
|
|
28
|
-
fun setUp() {
|
|
29
|
-
context = InstrumentationRegistry.getInstrumentation().targetContext
|
|
30
|
-
filesDir = context.filesDir
|
|
31
|
-
audioProcessor = AudioProcessor(filesDir)
|
|
32
|
-
copyAssetToFilesDir("chorus.wav")
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
@After
|
|
36
|
-
fun tearDown() {
|
|
37
|
-
filesDir.listFiles()?.forEach { file ->
|
|
38
|
-
if (file.name.startsWith("final_metadata_contract_") || file.name == "chorus.wav") {
|
|
39
|
-
file.delete()
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
@Test
|
|
45
|
-
fun loadAudioRange_returnsMetadataFromFinalConvertedWavBytes() {
|
|
46
|
-
val audioData = audioProcessor.loadAudioRange(
|
|
47
|
-
fileUri = File(filesDir, "chorus.wav").absolutePath,
|
|
48
|
-
startTimeMs = 0,
|
|
49
|
-
endTimeMs = ONE_SECOND_MS,
|
|
50
|
-
config = DecodingConfig(
|
|
51
|
-
targetSampleRate = TARGET_SAMPLE_RATE,
|
|
52
|
-
targetChannels = TARGET_CHANNELS,
|
|
53
|
-
targetBitDepth = TARGET_BIT_DEPTH,
|
|
54
|
-
normalizeAudio = false
|
|
55
|
-
)
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
val converted = requireNotNull(audioData) { "Audio range should load" }
|
|
59
|
-
val bytesPerTargetFrame = TARGET_CHANNELS * BYTES_PER_TARGET_SAMPLE
|
|
60
|
-
val finalFrameCount = converted.data.size / bytesPerTargetFrame
|
|
61
|
-
val durationFromFinalBytes = finalFrameCount * 1_000L / TARGET_SAMPLE_RATE
|
|
62
|
-
|
|
63
|
-
assertEquals("sampleRate should describe final converted bytes", TARGET_SAMPLE_RATE, converted.sampleRate)
|
|
64
|
-
assertEquals("channels should describe final converted bytes", TARGET_CHANNELS, converted.channels)
|
|
65
|
-
assertEquals("bitDepth should describe final converted bytes", TARGET_BIT_DEPTH, converted.bitDepth)
|
|
66
|
-
assertEquals("final PCM data must end on a target frame boundary", 0, converted.data.size % bytesPerTargetFrame)
|
|
67
|
-
assertEquals(
|
|
68
|
-
"duration should be derived from actual final PCM bytes",
|
|
69
|
-
durationFromFinalBytes,
|
|
70
|
-
converted.durationMs
|
|
71
|
-
)
|
|
72
|
-
assertTrue(
|
|
73
|
-
"duration should remain close to requested range: ${converted.durationMs}ms",
|
|
74
|
-
kotlin.math.abs(converted.durationMs - ONE_SECOND_MS) <= 25
|
|
75
|
-
)
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
@Test
|
|
79
|
-
fun loadAudioRange_alignsConvertedWavBytesToTargetFrameSize() {
|
|
80
|
-
val audioData = audioProcessor.loadAudioRange(
|
|
81
|
-
fileUri = File(filesDir, "chorus.wav").absolutePath,
|
|
82
|
-
startTimeMs = 0,
|
|
83
|
-
endTimeMs = ONE_SECOND_MS,
|
|
84
|
-
config = DecodingConfig(
|
|
85
|
-
targetSampleRate = TARGET_SAMPLE_RATE,
|
|
86
|
-
targetChannels = TARGET_CHANNELS,
|
|
87
|
-
targetBitDepth = TARGET_BIT_DEPTH,
|
|
88
|
-
normalizeAudio = false
|
|
89
|
-
)
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
val converted = requireNotNull(audioData) { "Audio range should load" }
|
|
93
|
-
val bytesPerTargetFrame = TARGET_CHANNELS * BYTES_PER_TARGET_SAMPLE
|
|
94
|
-
|
|
95
|
-
assertEquals("final PCM data must end on a target frame boundary", 0, converted.data.size % bytesPerTargetFrame)
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
@Test
|
|
99
|
-
fun trimAudio_writesWavHeaderFromFinalConvertedBytes() {
|
|
100
|
-
val outputFileName = "final_metadata_contract_processor_trim.wav"
|
|
101
|
-
val trimmed = audioProcessor.trimAudio(
|
|
102
|
-
fileUri = File(filesDir, "chorus.wav").absolutePath,
|
|
103
|
-
startTimeMs = 0,
|
|
104
|
-
endTimeMs = ONE_SECOND_MS,
|
|
105
|
-
config = DecodingConfig(
|
|
106
|
-
targetSampleRate = TARGET_SAMPLE_RATE,
|
|
107
|
-
targetChannels = TARGET_CHANNELS,
|
|
108
|
-
targetBitDepth = TARGET_BIT_DEPTH,
|
|
109
|
-
normalizeAudio = false
|
|
110
|
-
),
|
|
111
|
-
outputFileName = outputFileName
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
requireNotNull(trimmed) { "Trimmed audio should be returned" }
|
|
115
|
-
val header = readWavHeader(File(filesDir, outputFileName))
|
|
116
|
-
|
|
117
|
-
assertEquals("WAV header sample rate should be target sample rate", TARGET_SAMPLE_RATE, header.sampleRate)
|
|
118
|
-
assertEquals("WAV header channels should be target channels", TARGET_CHANNELS, header.channels)
|
|
119
|
-
assertEquals("WAV header bit depth should be target bit depth", TARGET_BIT_DEPTH, header.bitDepth)
|
|
120
|
-
assertEquals("WAV data chunk should match returned final PCM bytes", trimmed.data.size, header.dataSize)
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
@Test
|
|
124
|
-
fun audioTrimmer_honorsJsNumberOutputFormatWhenWritingWavHeader() {
|
|
125
|
-
val trimmer = AudioTrimmer(context, AudioFileHandler(filesDir))
|
|
126
|
-
val result = trimmer.trimAudio(
|
|
127
|
-
fileUri = Uri.fromFile(File(filesDir, "chorus.wav")).toString(),
|
|
128
|
-
startTimeMs = 0,
|
|
129
|
-
endTimeMs = ONE_SECOND_MS,
|
|
130
|
-
outputFileName = "final_metadata_contract_audio_trimmer",
|
|
131
|
-
outputFormat = mapOf(
|
|
132
|
-
"format" to "wav",
|
|
133
|
-
"sampleRate" to TARGET_SAMPLE_RATE.toDouble(),
|
|
134
|
-
"channels" to TARGET_CHANNELS.toDouble(),
|
|
135
|
-
"bitDepth" to TARGET_BIT_DEPTH.toDouble()
|
|
136
|
-
)
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
val outputPath = result["uri"] as String
|
|
140
|
-
val header = readWavHeader(File(outputPath))
|
|
141
|
-
|
|
142
|
-
assertEquals("Double sampleRate option should drive WAV header", TARGET_SAMPLE_RATE, header.sampleRate)
|
|
143
|
-
assertEquals("Double channels option should drive WAV header", TARGET_CHANNELS, header.channels)
|
|
144
|
-
assertEquals("Double bitDepth option should drive WAV header", TARGET_BIT_DEPTH, header.bitDepth)
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
private fun copyAssetToFilesDir(fileName: String) {
|
|
148
|
-
context.assets.open(fileName).use { input ->
|
|
149
|
-
File(filesDir, fileName).outputStream().use { output ->
|
|
150
|
-
input.copyTo(output)
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
private fun readWavHeader(file: File): WavHeader {
|
|
156
|
-
assertTrue("WAV file should exist: ${file.absolutePath}", file.exists())
|
|
157
|
-
val bytes = file.inputStream().use { it.readNBytes(44) }
|
|
158
|
-
assertEquals("RIFF", String(bytes.sliceArray(0..3)))
|
|
159
|
-
assertEquals("WAVE", String(bytes.sliceArray(8..11)))
|
|
160
|
-
assertEquals("data", String(bytes.sliceArray(36..39)))
|
|
161
|
-
|
|
162
|
-
return WavHeader(
|
|
163
|
-
channels = bytes.shortAt(22),
|
|
164
|
-
sampleRate = bytes.intAt(24),
|
|
165
|
-
bitDepth = bytes.shortAt(34),
|
|
166
|
-
dataSize = bytes.intAt(40)
|
|
167
|
-
)
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
private fun ByteArray.shortAt(offset: Int): Int =
|
|
171
|
-
ByteBuffer.wrap(this, offset, 2).order(ByteOrder.LITTLE_ENDIAN).short.toInt()
|
|
172
|
-
|
|
173
|
-
private fun ByteArray.intAt(offset: Int): Int =
|
|
174
|
-
ByteBuffer.wrap(this, offset, 4).order(ByteOrder.LITTLE_ENDIAN).int
|
|
175
|
-
|
|
176
|
-
private data class WavHeader(
|
|
177
|
-
val channels: Int,
|
|
178
|
-
val sampleRate: Int,
|
|
179
|
-
val bitDepth: Int,
|
|
180
|
-
val dataSize: Int
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
companion object {
|
|
184
|
-
private const val ONE_SECOND_MS = 1_000L
|
|
185
|
-
private const val TARGET_SAMPLE_RATE = 16_000
|
|
186
|
-
private const val TARGET_CHANNELS = 2
|
|
187
|
-
private const val TARGET_BIT_DEPTH = 16
|
|
188
|
-
private const val BYTES_PER_TARGET_SAMPLE = TARGET_BIT_DEPTH / 8
|
|
189
|
-
}
|
|
190
|
-
}
|