npm - @siteed/audio-studio - Versions diffs - 3.2.0-beta.1 → 3.2.1-beta.0 - Mend

@siteed/audio-studio 3.2.0-beta.1 → 3.2.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/src/errors/AudioStreamError.test.ts CHANGED Viewed

@@ -11,13 +11,18 @@ describe('AudioStreamError', () => {
     })
     it('maps native FILE_NOT_FOUND code', () => {
-        const mapped = mapStreamError({ code: 'FILE_NOT_FOUND', message: 'gone' })
+        const mapped = mapStreamError({
+            code: 'FILE_NOT_FOUND',
+            message: 'gone',
+        })
         expect(mapped.code).toBe('ERR_AUDIO_STREAM_FILE_NOT_FOUND')
         expect(mapped.recoverable).toBe(false)
     })
     it('maps unsupported codec text', () => {
-        const mapped = mapStreamError(new Error('No suitable codec for audio/opus'))
+        const mapped = mapStreamError(
+            new Error('No suitable codec for audio/opus')
+        )
         expect(mapped.code).toBe('ERR_AUDIO_STREAM_UNSUPPORTED_FORMAT')
     })
@@ -30,11 +35,33 @@ describe('AudioStreamError', () => {
         expect(mapped.recoverable).toBe(true)
     })
+    it('maps backpressure timeout as recoverable', () => {
+        const mapped = mapStreamError({
+            code: 'ERR_AUDIO_STREAM_BACKPRESSURE_TIMEOUT',
+            message: 'ack timed out',
+        })
+        expect(mapped.code).toBe('ERR_AUDIO_STREAM_BACKPRESSURE_TIMEOUT')
+        expect(mapped.recoverable).toBe(true)
+    })
     it('falls back to UNKNOWN', () => {
         const mapped = mapStreamError({})
         expect(mapped.code).toBe('ERR_AUDIO_STREAM_UNKNOWN')
     })
+    it('warns when native returns an unknown audio stream code', () => {
+        const warn = jest.spyOn(console, 'warn').mockImplementation(() => {})
+        const mapped = mapStreamError({
+            code: 'ERR_AUDIO_STREAM_FOOBAR',
+            message: 'new native code',
+        })
+        expect(mapped.code).toBe('ERR_AUDIO_STREAM_UNKNOWN')
+        expect(warn).toHaveBeenCalledWith(
+            '[AudioStreamError] Unknown native audio stream error code: ERR_AUDIO_STREAM_FOOBAR'
+        )
+        warn.mockRestore()
+    })
     it('preserves nativeCode and nativeMessage', () => {
         const mapped = mapStreamError({
             code: 'WEIRD_NATIVE_CODE',

package/src/errors/AudioStreamError.ts CHANGED Viewed

@@ -80,6 +80,14 @@ function getNativeCode(err: unknown): string | undefined {
     return undefined
 }
+function isUnknownAudioStreamCode(raw: string | undefined): boolean {
+    if (!raw) return false
+    return (
+        raw.toUpperCase().startsWith('ERR_AUDIO_STREAM_') &&
+        normalizeCode(raw) === null
+    )
+}
 function normalizeCode(raw: string | undefined): AudioStreamErrorCode | null {
     if (!raw) return null
     const upper = raw.toUpperCase()
@@ -151,6 +159,12 @@ export function mapStreamError(
     const nativeCode = getNativeCode(err)
     const lower = nativeMessage.toLowerCase()
+    if (isUnknownAudioStreamCode(nativeCode)) {
+        console.warn(
+            `[AudioStreamError] Unknown native audio stream error code: ${nativeCode}`
+        )
+    }
     let code =
         normalizeCode(nativeCode) ??
         normalizeCode(nativeMessage) ??

package/src/streamAudioData.ts CHANGED Viewed

@@ -39,6 +39,11 @@ export interface StreamAudioDataOptions {
     maxChunkBytes?: number
     /** Max chunks queued in native before JS ack pauses decode (default: 4). */
     maxBufferedChunks?: number
+    /**
+     * Optional timeout for a chunk acknowledgement while backpressure is active.
+     * Undefined/0 disables timeout so long transcription callbacks can run.
+     */
+    backpressureTimeoutMs?: number
     /** Output PCM format; only `'float32'` supported today. */
     streamFormat?: 'float32'
     /** Abort the in-flight request. Resolves promise with `cancelled: true`. */
@@ -151,15 +156,15 @@ function toFloat32(samples: unknown): Float32Array {
     }
     if (typeof samples === 'string') {
         const bytes = base64ToBytes(samples)
-        const aligned =
-            bytes.byteOffset % 4 === 0
-                ? new Float32Array(
-                      bytes.buffer,
-                      bytes.byteOffset,
-                      bytes.byteLength / 4
-                  )
-                : new Float32Array(bytes.buffer.slice(bytes.byteOffset))
-        return new Float32Array(aligned)
+        const floatLength = Math.floor(bytes.byteLength / 4)
+        if (bytes.byteOffset % 4 === 0) {
+            return new Float32Array(bytes.buffer, bytes.byteOffset, floatLength)
+        }
+        const sliced = bytes.buffer.slice(
+            bytes.byteOffset,
+            bytes.byteOffset + bytes.byteLength
+        )
+        return new Float32Array(sliced, 0, Math.floor(sliced.byteLength / 4))
     }
     if (samples && typeof samples === 'object' && 'length' in samples) {
         // ArrayLike fallback
@@ -178,7 +183,13 @@ function base64ToBytes(input: string): Uint8Array {
     const g = globalThis as { atob?: (s: string) => string }
     if (typeof g.atob !== 'function') {
         // Buffer path for environments without atob; React Native has atob.
-        const Buf = (globalThis as { Buffer?: { from: Function } }).Buffer
+        const Buf = (
+            globalThis as {
+                Buffer?: {
+                    from: (input: string, encoding: string) => Uint8Array
+                }
+            }
+        ).Buffer
         if (Buf) return new Uint8Array(Buf.from(input, 'base64'))
         return new Uint8Array(0)
     }
@@ -188,35 +199,70 @@ function base64ToBytes(input: string): Uint8Array {
     return out
 }
+function rejectInvalidRange(message: string): never {
+    throw new AudioStreamError({
+        code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
+        message,
+        recoverable: false,
+    })
+}
+function assertPositiveFiniteOption(
+    value: number | undefined,
+    name: string,
+    integer = false
+): void {
+    if (value === undefined) return
+    if (
+        !Number.isFinite(value) ||
+        value <= 0 ||
+        (integer && !Number.isInteger(value))
+    ) {
+        rejectInvalidRange(
+            `${name} must be a positive${integer ? ' integer' : ''}`
+        )
+    }
+}
 function validateOptions(options: StreamAudioDataOptions): void {
     if (!options.fileUri) {
-        throw new AudioStreamError({
-            code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
-            message: 'fileUri is required',
-            recoverable: false,
-        })
+        rejectInvalidRange('fileUri is required')
     }
     if (
         options.startTimeMs !== undefined &&
         options.endTimeMs !== undefined &&
         options.startTimeMs >= options.endTimeMs
     ) {
-        throw new AudioStreamError({
-            code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
-            message: 'startTimeMs must be < endTimeMs',
-            recoverable: false,
-        })
+        rejectInvalidRange('startTimeMs must be < endTimeMs')
+    }
+    if (options.endTimeMs !== undefined && options.endTimeMs <= 0) {
+        rejectInvalidRange('endTimeMs must be > 0')
+    }
+    if (options.startTimeMs !== undefined && options.startTimeMs < 0) {
+        rejectInvalidRange('startTimeMs must be >= 0')
     }
     if (
         options.chunkDurationMs !== undefined &&
         (options.chunkDurationMs < 10 || options.chunkDurationMs > 60000)
     ) {
-        throw new AudioStreamError({
-            code: 'ERR_AUDIO_STREAM_INVALID_RANGE',
-            message: 'chunkDurationMs must be in [10, 60000]',
-            recoverable: false,
-        })
+        rejectInvalidRange('chunkDurationMs must be in [10, 60000]')
     }
+    if (
+        options.backpressureTimeoutMs !== undefined &&
+        options.backpressureTimeoutMs < 0
+    ) {
+        rejectInvalidRange('backpressureTimeoutMs must be >= 0')
+    }
+    assertPositiveFiniteOption(options.targetSampleRate, 'targetSampleRate')
+    assertPositiveFiniteOption(options.sampleRate, 'sampleRate')
+    assertPositiveFiniteOption(options.channels, 'channels', true)
+    assertPositiveFiniteOption(
+        options.maxBufferedChunks,
+        'maxBufferedChunks',
+        true
+    )
+    assertPositiveFiniteOption(options.maxChunkBytes, 'maxChunkBytes', true)
     if (
         options.streamFormat !== undefined &&
         options.streamFormat !== 'float32'
@@ -315,6 +361,7 @@ async function streamAudioDataNative(
     let processingChain: Promise<void> = Promise.resolve()
     let settled = false
     let abortListener: (() => void) | null = null
+    let lastProgress: StreamAudioDataProgress | null = null
     const finalize = () => {
         for (const sub of subs) {
@@ -413,6 +460,7 @@ async function streamAudioDataNative(
                 emitter.addListener(PROGRESS_EVENT, (raw: unknown) => {
                     const evt = raw as StreamAudioDataProgress
                     if (evt.requestId !== requestId) return
+                    lastProgress = evt
                     callbacks.onProgress!(evt)
                 })
             )
@@ -435,7 +483,10 @@ async function streamAudioDataNative(
                     .then(() => {
                         settle(() => {}, 'resolve', {
                             requestId,
-                            durationMs: evt.durationMs,
+                            durationMs:
+                                evt.durationMs > 0
+                                    ? evt.durationMs
+                                    : (lastProgress?.durationMs ?? 0),
                             sampleRate: evt.sampleRate,
                             channels: evt.channels,
                             chunks: evt.chunks ?? chunkCount,
@@ -462,7 +513,7 @@ async function streamAudioDataNative(
                         .then(() => {
                             settle(() => {}, 'resolve', {
                                 requestId,
-                                durationMs: 0,
+                                durationMs: lastProgress?.durationMs ?? 0,
                                 sampleRate:
                                     options.targetSampleRate ??
                                     options.sampleRate ??
@@ -494,7 +545,7 @@ async function streamAudioDataNative(
             if (options.signal.aborted) {
                 settle(() => {}, 'resolve', {
                     requestId,
-                    durationMs: 0,
+                    durationMs: lastProgress?.durationMs ?? 0,
                     sampleRate:
                         options.targetSampleRate ?? options.sampleRate ?? 0,
                     channels: options.channels ?? 1,
@@ -547,8 +598,8 @@ async function streamAudioDataWeb(
     try {
         const processed = await processAudioBuffer({
             fileUri: options.fileUri,
-            targetSampleRate: options.targetSampleRate ?? 16000,
-            targetChannels: options.channels ?? 1,
+            targetSampleRate: options.targetSampleRate,
+            targetChannels: options.channels,
             normalizeAudio: options.normalizeAudio ?? true,
             startTimeMs: options.startTimeMs,
             endTimeMs: options.endTimeMs,
@@ -559,16 +610,34 @@ async function streamAudioDataWeb(
         const durationMs = processed.durationMs
         const chunkDurationMs = options.chunkDurationMs ?? 1000
         let samplesPerChunk = Math.max(
-            1,
+            channels,
             Math.floor((chunkDurationMs / 1000) * sampleRate) * channels
         )
         if (options.maxChunkBytes) {
-            const maxSamples = Math.floor(options.maxChunkBytes / 4)
-            samplesPerChunk = Math.min(samplesPerChunk, maxSamples)
+            // Round down to a multiple of `channels` so we never split an
+            // interleaved frame across two chunks (that would produce a
+            // fractional `startSample` for the next chunk).
+            const rawMax = Math.floor(options.maxChunkBytes / 4)
+            const maxSamples = Math.max(
+                channels,
+                Math.floor(rawMax / channels) * channels
+            )
+            samplesPerChunk = Math.max(
+                channels,
+                Math.min(samplesPerChunk, maxSamples)
+            )
         }
-        const all = sanitizeFloat32(processed.channelData, options.normalizeAudio ?? true)
+        const all = sanitizeFloat32(
+            interleaveBuffer(processed.buffer, channels),
+            options.normalizeAudio ?? true
+        )
+        // Chunk timestamps are absolute (range start + offset) on every
+        // platform; progress is *elapsed within the range* so the
+        // `processedMs / durationMs` fraction stays in [0, 1] regardless of
+        // `startTimeMs`. The native decoders use the same split.
+        const rangeStartMs = options.startTimeMs ?? 0
         let chunkIndex = 0
         let emittedSamples = 0
         for (let off = 0; off < all.length; off += samplesPerChunk) {
@@ -587,11 +656,15 @@ async function streamAudioDataWeb(
             const slice = all.slice(off, end)
             const startSample = off / channels
             const endSample = end / channels
+            const startMs =
+                Math.round((startSample / sampleRate) * 1000) + rangeStartMs
+            const endMs =
+                Math.round((endSample / sampleRate) * 1000) + rangeStartMs
             const chunk: StreamAudioDataChunk = {
                 requestId,
                 chunkIndex,
-                startTimeMs: Math.round((startSample / sampleRate) * 1000),
-                endTimeMs: Math.round((endSample / sampleRate) * 1000),
+                startTimeMs: startMs,
+                endTimeMs: endMs,
                 durationMs: Math.round(
                     ((endSample - startSample) / sampleRate) * 1000
                 ),
@@ -603,11 +676,24 @@ async function streamAudioDataWeb(
                 isFinal: end >= all.length,
             }
             await callbacks.onChunk(chunk)
+            // Resample rounding (Math.ceil in processAudioBuffer) can push
+            // elapsed past the source-rate-derived range duration on the tail
+            // chunk. Cap so onProgress consumers always see a [0, 1] ratio,
+            // matching the native `coerceIn(0, 1)` / `min(1, max(0, …))`
+            // clamp.
+            const rawElapsedMs = Math.round((endSample / sampleRate) * 1000)
+            const elapsedMs =
+                durationMs > 0
+                    ? Math.min(rawElapsedMs, durationMs)
+                    : rawElapsedMs
             callbacks.onProgress?.({
                 requestId,
-                processedMs: chunk.endTimeMs,
+                processedMs: elapsedMs,
                 durationMs,
-                progress: durationMs > 0 ? chunk.endTimeMs / durationMs : 1,
+                progress:
+                    durationMs > 0
+                        ? Math.min(1, Math.max(0, elapsedMs / durationMs))
+                        : 1,
                 emittedChunks: chunkIndex + 1,
             })
             chunkIndex += 1
@@ -628,10 +714,28 @@ async function streamAudioDataWeb(
     }
 }
-function sanitizeFloat32(
-    input: Float32Array,
-    clamp: boolean
-): Float32Array {
+function interleaveBuffer(buffer: AudioBuffer, channels: number): Float32Array {
+    const numCh = Math.max(1, Math.min(channels, buffer.numberOfChannels))
+    const framesPerCh = buffer.length
+    if (numCh === 1) {
+        // Cheap path: clone channel 0 so downstream mutation doesn't touch the
+        // underlying AudioBuffer storage.
+        return new Float32Array(buffer.getChannelData(0))
+    }
+    const out = new Float32Array(framesPerCh * numCh)
+    const channelData: Float32Array[] = []
+    for (let c = 0; c < numCh; c++) {
+        channelData.push(buffer.getChannelData(c))
+    }
+    for (let f = 0; f < framesPerCh; f++) {
+        for (let c = 0; c < numCh; c++) {
+            out[f * numCh + c] = channelData[c][f]
+        }
+    }
+    return out
+}
+function sanitizeFloat32(input: Float32Array, clamp: boolean): Float32Array {
     if (!clamp) {
         // still need NaN/Inf sanitation
         for (let i = 0; i < input.length; i++) {

package/src/utils/audioProcessing.ts CHANGED Viewed

@@ -6,8 +6,8 @@ import { ConsoleLike } from '../AudioStudio.types'
 export interface ProcessAudioBufferOptions {
     arrayBuffer?: ArrayBuffer
     fileUri?: string
-    targetSampleRate: number
-    targetChannels: number
+    targetSampleRate?: number
+    targetChannels?: number
     normalizeAudio: boolean
     startTimeMs?: number
     endTimeMs?: number
@@ -84,9 +84,17 @@ export async function processAudioBuffer({
         // Create context at original sample rate first
         ctx =
             audioContext ||
-            new (window.AudioContext || (window as any).webkitAudioContext)()
+            new (window.AudioContext ||
+                (
+                    window as unknown as {
+                        webkitAudioContext?: typeof AudioContext
+                    }
+                ).webkitAudioContext)()
         buffer = await ctx.decodeAudioData(audioData)
+        const effectiveTargetSampleRate = targetSampleRate ?? buffer.sampleRate
+        const effectiveTargetChannels = targetChannels ?? buffer.numberOfChannels
         logger?.debug('Decoded audio buffer:', {
             originalChannels: buffer.numberOfChannels,
             originalSampleRate: buffer.sampleRate,
@@ -109,7 +117,7 @@ export async function processAudioBuffer({
             position !== undefined
                 ? Math.floor(
                       (position / bytesPerSample) *
-                          (buffer.sampleRate / targetSampleRate)
+                          (buffer.sampleRate / effectiveTargetSampleRate)
                   )
                 : startSample
@@ -117,11 +125,12 @@ export async function processAudioBuffer({
             length !== undefined
                 ? Math.floor(
                       (length / bytesPerSample) *
-                          (buffer.sampleRate / targetSampleRate)
+                          (buffer.sampleRate / effectiveTargetSampleRate)
                   )
-                : endTimeMs !== undefined && startTimeMs !== undefined
+                : endTimeMs !== undefined
                   ? Math.floor(
-                        ((endTimeMs - startTimeMs) / 1000) * buffer.sampleRate
+                        ((endTimeMs - (startTimeMs ?? 0)) / 1000) *
+                            buffer.sampleRate
                     )
                   : buffer.length - adjustedStartSample
@@ -130,8 +139,8 @@ export async function processAudioBuffer({
             adjustedStartSample,
             samplesNeeded,
             originalSampleRate: buffer.sampleRate,
-            targetSampleRate,
-            conversionRatio: buffer.sampleRate / targetSampleRate,
+            targetSampleRate: effectiveTargetSampleRate,
+            conversionRatio: buffer.sampleRate / effectiveTargetSampleRate,
             expectedDurationMs: (samplesNeeded / buffer.sampleRate) * 1000,
         })
@@ -153,9 +162,11 @@ export async function processAudioBuffer({
         // Create offline context for resampling
         const offlineCtx = new OfflineAudioContext(
-            targetChannels,
-            Math.ceil((samplesNeeded * targetSampleRate) / buffer.sampleRate),
-            targetSampleRate
+            effectiveTargetChannels,
+            Math.ceil(
+                (samplesNeeded * effectiveTargetSampleRate) / buffer.sampleRate
+            ),
+            effectiveTargetSampleRate
         )
         // Create source and connect
@@ -175,7 +186,7 @@ export async function processAudioBuffer({
         logger?.debug('Final processed audio:', {
             outputSamples: channelData.length,
-            outputSampleRate: targetSampleRate,
+            outputSampleRate: effectiveTargetSampleRate,
             durationMs,
         })
@@ -184,7 +195,7 @@ export async function processAudioBuffer({
             channelData,
             samples: channelData.length,
             durationMs,
-            sampleRate: targetSampleRate,
+            sampleRate: effectiveTargetSampleRate,
             channels: processedBuffer.numberOfChannels,
         }
     } catch (error) {

package/android/src/androidTest/assets/chorus.wav DELETED Viewed

Binary file

package/android/src/androidTest/assets/jfk.wav DELETED Viewed

Binary file

package/android/src/androidTest/assets/osr_us_000_0010_8k.wav DELETED Viewed

Binary file

package/android/src/androidTest/assets/recorder_hello_world.wav DELETED Viewed

Binary file