npm - @siteed/audio-studio - Versions diffs - 3.0.2 → 3.0.4 - Mend

@siteed/audio-studio 3.0.2 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/src/AudioAnalysis/audioFeaturesWasm.web.ts CHANGED Viewed

@@ -1,34 +1,6 @@
+import type { AudioFeaturesWasmResult } from './AudioAnalysis.types'
 import type { AudioFeaturesWasmModule } from './audio-features-wasm'
-import { getMelSpectrogramWasmUrl } from './wasmConfig'
-export interface AudioFeaturesWasmResult {
-    spectralCentroid: number
-    spectralFlatness: number
-    spectralRolloff: number
-    spectralBandwidth: number
-    mfcc: number[]
-    chromagram: number[]
-}
-let modulePromise: Promise<AudioFeaturesWasmModule> | null = null
-function getModule(): Promise<AudioFeaturesWasmModule> {
-    if (!modulePromise) {
-        modulePromise = (async () => {
-            const url = getMelSpectrogramWasmUrl()
-            // webpackIgnore + @vite-ignore prevent bundlers from trying to resolve the URL
-            const mod = await import(
-                /* webpackIgnore: true */ /* @vite-ignore */ url
-            )
-            const factory = mod.default ?? mod
-            return factory() as Promise<AudioFeaturesWasmModule>
-        })().catch((err) => {
-            modulePromise = null
-            throw err
-        })
-    }
-    return modulePromise
-}
+import { getWasmModule } from './wasmLoader.web'
 // --- Struct layout for CAudioFeaturesResult (wasm32) ---
 // Offset  0: float spectralCentroid  (4 bytes)
@@ -83,79 +55,115 @@ function readResult(
 // --- Streaming (per-frame) API ---
-let streamingModule: AudioFeaturesWasmModule | null = null
-let streamingFramePtr = 0
-let streamingFrameCapacity = 0
-let streamingResultPtr = 0
 /**
- * Initialise the WASM streaming audio features processor.
- * Call once before computeAudioFeaturesFrameWasm().
+ * Encapsulates a single WASM streaming audio features session.
+ * Each instance owns its own WASM heap allocations; multiple sessions
+ * can exist concurrently without interfering with each other.
+ *
+ * Usage:
+ *   const session = await AudioFeaturesStreamingSession.create(sampleRate)
+ *   try {
+ *     for (const frame of frames) {
+ *       const result = session.computeFrame(frame)
+ *     }
+ *   } finally {
+ *     session.dispose()
+ *   }
  */
-export async function initAudioFeaturesWasm(
-    sampleRate: number,
-    fftLength = 1024,
-    nMfcc = 13,
-    nMelFilters = 26,
-    computeMfcc = true,
-    computeChroma = true
-): Promise<void> {
-    const Module = await getModule()
-    streamingModule = Module
-    Module._audio_features_init(
-        sampleRate,
-        fftLength,
-        nMfcc,
-        nMelFilters,
-        computeMfcc ? 1 : 0,
-        computeChroma ? 1 : 0
-    )
+export class AudioFeaturesStreamingSession {
+    private module: AudioFeaturesWasmModule
+    private framePtr = 0
+    private frameCapacity = 0
+    private resultPtr = 0
+    private constructor(module: AudioFeaturesWasmModule) {
+        this.module = module
+    }
-    // Pre-allocate result struct on WASM heap
-    if (streamingResultPtr) Module._free(streamingResultPtr)
-    streamingResultPtr = Module._malloc(STRUCT_SIZE)
-    // Zero-initialize to prevent freeing garbage pointers on first use
-    Module.HEAPU8.fill(0, streamingResultPtr, streamingResultPtr + STRUCT_SIZE)
+    /**
+     * Initialise a new streaming session. Loads the WASM module if needed.
+     */
+    static async create(
+        sampleRate: number,
+        fftLength = 1024,
+        nMfcc = 13,
+        nMelFilters = 26,
+        computeMfcc = true,
+        computeChroma = true
+    ): Promise<AudioFeaturesStreamingSession> {
+        const Module = await getWasmModule()
+        const session = new AudioFeaturesStreamingSession(Module)
+        Module._audio_features_init(
+            sampleRate,
+            fftLength,
+            nMfcc,
+            nMelFilters,
+            computeMfcc ? 1 : 0,
+            computeChroma ? 1 : 0
+        )
+        // Pre-allocate result struct on WASM heap
+        session.resultPtr = Module._malloc(STRUCT_SIZE)
+        // Zero-initialize to prevent freeing garbage pointers on first use
+        Module.HEAPU8.fill(
+            0,
+            session.resultPtr,
+            session.resultPtr + STRUCT_SIZE
+        )
+        return session
+    }
-    // Frame input buffer allocated on demand
-    streamingFrameCapacity = 0
-    streamingFramePtr = 0
-}
+    /**
+     * Compute audio features for a single frame.
+     * Returns null on error or if the session has been disposed.
+     */
+    computeFrame(samples: Float32Array): AudioFeaturesWasmResult | null {
+        if (!this.resultPtr) return null
+        const Module = this.module
+        // (Re-)allocate frame input buffer if needed
+        if (samples.length > this.frameCapacity) {
+            if (this.framePtr) Module._free(this.framePtr)
+            this.framePtr = Module._malloc(samples.length * 4)
+            this.frameCapacity = samples.length
+        }
-/**
- * Compute audio features for a single frame via WASM C++.
- * Returns null if not initialised or on error.
- */
-export function computeAudioFeaturesFrameWasm(
-    samples: Float32Array
-): AudioFeaturesWasmResult | null {
-    if (!streamingModule || !streamingResultPtr) return null
-    const Module = streamingModule
-    // (Re-)allocate frame input buffer if needed
-    if (samples.length > streamingFrameCapacity) {
-        if (streamingFramePtr) Module._free(streamingFramePtr)
-        streamingFramePtr = Module._malloc(samples.length * 4)
-        streamingFrameCapacity = samples.length
-    }
+        // Copy samples to WASM heap
+        Module.HEAPF32.set(samples, this.framePtr >> 2)
-    // Copy samples to WASM heap
-    Module.HEAPF32.set(samples, streamingFramePtr >> 2)
+        const ok = Module._audio_features_compute_frame(
+            this.framePtr,
+            samples.length,
+            this.resultPtr
+        )
+        if (!ok) return null
-    const ok = Module._audio_features_compute_frame(
-        streamingFramePtr,
-        samples.length,
-        streamingResultPtr
-    )
-    if (!ok) return null
+        const result = readResult(Module, this.resultPtr)
-    const result = readResult(Module, streamingResultPtr)
+        // Free internal arrays (mfcc, chromagram) allocated by C
+        Module._audio_features_free_arrays(this.resultPtr)
-    // Free internal arrays (mfcc, chromagram) allocated by C
-    Module._audio_features_free_arrays(streamingResultPtr)
+        return result
+    }
-    return result
+    /**
+     * Free all WASM heap allocations owned by this session.
+     * The session must not be used after calling dispose().
+     */
+    dispose(): void {
+        const Module = this.module
+        if (this.framePtr) {
+            Module._free(this.framePtr)
+            this.framePtr = 0
+            this.frameCapacity = 0
+        }
+        if (this.resultPtr) {
+            Module._free(this.resultPtr)
+            this.resultPtr = 0
+        }
+    }
 }
 // --- Batch API ---
@@ -173,7 +181,7 @@ export async function computeAudioFeaturesWasm(
     computeMfcc = true,
     computeChroma = true
 ): Promise<AudioFeaturesWasmResult> {
-    const Module = await getModule()
+    const Module = await getWasmModule()
     const numSamples = audioData.length
     const inputPtr = Module._malloc(numSamples * 4)

package/src/AudioAnalysis/extractAudioAnalysis.ts CHANGED Viewed

@@ -22,6 +22,14 @@ import { getWavFileInfo, WavFileInfo } from '../utils/getWavFileInfo'
 import { InlineFeaturesExtractor } from '../workers/InlineFeaturesExtractor.web'
 import { wasmGlueJs } from '../workers/wasmGlueString.web'
+function createAnalysisWorker(): { worker: Worker; workerUrl: string } {
+    const blob = new Blob([wasmGlueJs, '\n', InlineFeaturesExtractor], {
+        type: 'application/javascript',
+    })
+    const workerUrl = URL.createObjectURL(blob)
+    return { worker: new Worker(workerUrl), workerUrl }
+}
 function calculateCRC32ForDataPoint(data: Float32Array): number {
     // Convert float array to byte array for CRC32
     const byteArray = new Uint8Array(data.length * 4)
@@ -137,16 +145,13 @@ export async function extractAudioAnalysis(
                 const channelData = processedBuffer.buffer.getChannelData(0)
                 // Create worker blob: WASM glue (defines createMelSpectrogramModule) + worker code
-                const blob = new Blob(
-                    [wasmGlueJs, '\n', InlineFeaturesExtractor],
-                    { type: 'application/javascript' }
-                )
-                const workerUrl = URL.createObjectURL(blob)
-                const worker = new Worker(workerUrl)
+                const { worker, workerUrl } = createAnalysisWorker()
                 return new Promise((resolve, reject) => {
                     worker.onmessage = (event) => {
                         if (event.data.error) {
+                            URL.revokeObjectURL(workerUrl)
+                            worker.terminate()
                             reject(new Error(event.data.error))
                             return
                         }
@@ -300,20 +305,16 @@ export const extractRawWavAnalysis = async ({
         const constrainedChannelData = channelData.slice(startIndex, endIndex)
         return new Promise((resolve, reject) => {
-            const blob = new Blob([wasmGlueJs, '\n', InlineFeaturesExtractor], {
-                type: 'application/javascript',
-            })
-            const url = URL.createObjectURL(blob)
-            const worker = new Worker(url)
+            const { worker, workerUrl } = createAnalysisWorker()
             worker.onmessage = (event) => {
-                URL.revokeObjectURL(url)
+                URL.revokeObjectURL(workerUrl)
                 worker.terminate()
                 resolve(event.data.result)
             }
             worker.onerror = (error) => {
-                URL.revokeObjectURL(url)
+                URL.revokeObjectURL(workerUrl)
                 worker.terminate()
                 reject(error)
             }
@@ -337,13 +338,15 @@ export const extractRawWavAnalysis = async ({
             fileUri,
             segmentDurationMs,
         })
-        const res = await AudioStudioModule.extractAudioAnalysis({
-            fileUri,
-            segmentDurationMs,
-            features,
-            position,
-            length,
-        })
+        const res = await AudioStudioModule.extractAudioAnalysis(
+            cleanNativeOptions({
+                fileUri,
+                segmentDurationMs,
+                features,
+                position,
+                length,
+            })
+        )
         logger?.log(`extractAudioAnalysis`, res)
         return res
     }

package/src/AudioAnalysis/extractAudioData.ts CHANGED Viewed

@@ -1,13 +1,195 @@
-import { ExtractAudioDataOptions } from '../AudioStudio.types'
+import {
+    BitDepth,
+    ExtractAudioDataOptions,
+    ExtractedAudioData,
+} from '../AudioStudio.types'
 import AudioStudioModule from '../AudioStudioModule'
 import { isWeb } from '../constants'
+import { processAudioBuffer } from '../utils/audioProcessing'
 import { cleanNativeOptions } from '../utils/cleanNativeOptions'
+import crc32 from '../utils/crc32'
+import { writeWavHeader } from '../utils/writeWavHeader'
-export const extractAudioData = async (props: ExtractAudioDataOptions) => {
+export const extractAudioData = async (
+    props: ExtractAudioDataOptions
+): Promise<ExtractedAudioData> => {
     if (isWeb) {
-        // Web implementation handles logger natively in AudioStudioModule.ts
-        return await AudioStudioModule.extractAudioData(props)
+        try {
+            const {
+                fileUri,
+                position,
+                length,
+                startTimeMs,
+                endTimeMs,
+                decodingOptions,
+                includeNormalizedData,
+                includeBase64Data,
+                includeWavHeader = false,
+                logger,
+            } = props
+            logger?.debug('EXTRACT AUDIO - Step 1: Initial request', {
+                fileUri,
+                extractionParams: {
+                    position,
+                    length,
+                    startTimeMs,
+                    endTimeMs,
+                },
+                decodingOptions: {
+                    targetSampleRate:
+                        decodingOptions?.targetSampleRate ?? 16000,
+                    targetChannels: decodingOptions?.targetChannels ?? 1,
+                    targetBitDepth: decodingOptions?.targetBitDepth ?? 16,
+                    normalizeAudio: decodingOptions?.normalizeAudio ?? false,
+                },
+                outputOptions: {
+                    includeNormalizedData,
+                    includeBase64Data,
+                    includeWavHeader,
+                },
+            })
+            // Process the audio using shared helper function
+            const processedBuffer = await processAudioBuffer({
+                fileUri,
+                targetSampleRate: decodingOptions?.targetSampleRate ?? 16000,
+                targetChannels: decodingOptions?.targetChannels ?? 1,
+                normalizeAudio: decodingOptions?.normalizeAudio ?? false,
+                position,
+                length,
+                startTimeMs,
+                endTimeMs,
+                logger,
+            })
+            logger?.debug('EXTRACT AUDIO - Step 2: Audio processing complete', {
+                processedData: {
+                    samples: processedBuffer.samples,
+                    sampleRate: processedBuffer.sampleRate,
+                    channels: processedBuffer.channels,
+                    durationMs: processedBuffer.durationMs,
+                },
+            })
+            const channelData = processedBuffer.channelData
+            const bitDepth = (decodingOptions?.targetBitDepth ?? 16) as BitDepth
+            const bytesPerSample = bitDepth / 8
+            const numSamples = processedBuffer.samples
+            logger?.debug('EXTRACT AUDIO - Step 3: PCM conversion setup', {
+                channelData: {
+                    length: channelData.length,
+                    first: channelData[0],
+                    last: channelData[channelData.length - 1],
+                },
+                calculation: {
+                    bitDepth,
+                    bytesPerSample,
+                    numSamples,
+                    expectedBytes: numSamples * bytesPerSample,
+                },
+            })
+            // Create PCM data with correct length based on original byte length
+            const pcmData = new Uint8Array(numSamples * bytesPerSample)
+            let offset = 0
+            // Convert Float32 samples to PCM format
+            for (let i = 0; i < numSamples; i++) {
+                const sample = channelData[i]
+                const value = Math.max(-1, Math.min(1, sample))
+                // Convert to 16-bit signed integer
+                let intValue = Math.round(value * 32767)
+                // Handle negative values correctly
+                if (intValue < 0) {
+                    intValue = 65536 + intValue
+                }
+                // Write as little-endian
+                pcmData[offset++] = intValue & 255 // Low byte
+                pcmData[offset++] = (intValue >> 8) & 255 // High byte
+            }
+            const durationMs = Math.round(
+                (numSamples / processedBuffer.sampleRate) * 1000
+            )
+            logger?.debug('EXTRACT AUDIO - Step 4: Final output', {
+                pcmData: {
+                    length: pcmData.length,
+                    first: pcmData[0],
+                    last: pcmData[pcmData.length - 1],
+                },
+                timing: {
+                    numSamples,
+                    sampleRate: processedBuffer.sampleRate,
+                    durationMs,
+                    shouldBe3000ms: endTimeMs
+                        ? endTimeMs - (startTimeMs ?? 0) === 3000
+                        : undefined,
+                },
+            })
+            const result: ExtractedAudioData = {
+                pcmData: new Uint8Array(pcmData.buffer),
+                sampleRate: processedBuffer.sampleRate,
+                channels: processedBuffer.channels,
+                bitDepth,
+                durationMs,
+                format: `pcm_${bitDepth}bit` as const,
+                samples: numSamples,
+            }
+            // Add WAV header if requested
+            if (includeWavHeader) {
+                logger?.debug('EXTRACT AUDIO - Step 5: Adding WAV header', {
+                    originalLength: pcmData.length,
+                    newLength: result.pcmData.length,
+                    firstBytes: Array.from(result.pcmData.slice(0, 44)), // WAV header is 44 bytes
+                })
+                const wavBuffer = writeWavHeader({
+                    buffer: pcmData.buffer.slice(0, pcmData.length),
+                    sampleRate: processedBuffer.sampleRate,
+                    numChannels: processedBuffer.channels,
+                    bitDepth,
+                })
+                result.pcmData = new Uint8Array(wavBuffer)
+                result.hasWavHeader = true
+            }
+            if (includeNormalizedData) {
+                result.normalizedData = channelData
+            }
+            if (includeBase64Data) {
+                result.base64Data = btoa(
+                    String.fromCharCode(...new Uint8Array(pcmData.buffer))
+                )
+            }
+            if (props.computeChecksum) {
+                result.checksum = crc32.buf(pcmData)
+            }
+            logger?.debug('EXTRACT AUDIO - Step 3: PCM conversion complete', {
+                pcmStats: {
+                    length: pcmData.length,
+                    bytesPerSample,
+                    totalSamples: numSamples,
+                    firstBytes: Array.from(pcmData.slice(0, 16)),
+                    lastBytes: Array.from(pcmData.slice(-16)),
+                },
+            })
+            return result
+        } catch (error) {
+            props.logger?.error('EXTRACT AUDIO - Error:', error)
+            throw error
+        }
     }
     // Native: only pass serializable fields — logger causes crash on Android
     const { logger: _logger, ...nativeOptions } = props
     // Clean undefined values to avoid Android Kotlin bridge crash

package/src/AudioAnalysis/melSpectrogramWasm.web.ts CHANGED Viewed

@@ -1,29 +1,5 @@
 import type { MelSpectrogramWasmModule } from './mel-spectrogram-wasm'
-import { getMelSpectrogramWasmUrl, _registerModuleReset } from './wasmConfig'
-let modulePromise: Promise<MelSpectrogramWasmModule> | null = null
-_registerModuleReset(() => {
-    modulePromise = null
-})
-function getModule(): Promise<MelSpectrogramWasmModule> {
-    if (!modulePromise) {
-        modulePromise = (async () => {
-            const url = getMelSpectrogramWasmUrl()
-            // webpackIgnore + @vite-ignore prevent bundlers from trying to resolve the URL
-            const mod = await import(
-                /* webpackIgnore: true */ /* @vite-ignore */ url
-            )
-            const factory = mod.default ?? mod
-            return factory() as Promise<MelSpectrogramWasmModule>
-        })().catch((err) => {
-            modulePromise = null
-            throw err
-        })
-    }
-    return modulePromise
-}
+import { getWasmModule } from './wasmLoader.web'
 // --- Streaming (per-frame) API for live mel spectrogram ---
@@ -46,7 +22,7 @@ export async function initMelStreamingWasm(
     fMin = 0,
     fMax = 0
 ): Promise<void> {
-    const Module = await getModule()
+    const Module = (await getWasmModule()) as MelSpectrogramWasmModule
     streamingModule = Module
     const actualFMax = fMax > 0 ? fMax : sampleRate / 2
     Module._mel_spectrogram_init(
@@ -61,6 +37,13 @@ export async function initMelStreamingWasm(
     )
     streamingNMels = nMels
+    // Free frame buffer from previous session (if any) to avoid leak on re-init
+    if (streamingFramePtr) {
+        Module._free(streamingFramePtr)
+        streamingFramePtr = 0
+        streamingFrameCapacity = 0
+    }
     // Pre-allocate output buffer (fixed size)
     if (streamingMelPtr) Module._free(streamingMelPtr)
     streamingMelPtr = Module._malloc(nMels * 4)
@@ -120,7 +103,7 @@ export async function computeMelSpectrogramWasm(
     normalize: boolean,
     logScale: boolean
 ): Promise<number[][]> {
-    const Module = await getModule()
+    const Module = (await getWasmModule()) as MelSpectrogramWasmModule
     const fftLength = 2048
     const windowTypeInt = windowType === 'hamming' ? 1 : 0

package/src/AudioAnalysis/wasmConfig.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 // Version is inlined here — keep in sync with package.json when releasing.
 // The publish.sh script should bump this string alongside package.json.
-const WASM_VERSION = '3.0.2-beta.1'
+const WASM_VERSION = '3.0.2'
 // jsDelivr syncs from npm automatically within ~5 min of publish.
 // GitHub release fallback (attach mel-spectrogram.js as a release asset):
 //   https://github.com/deeeed/audiolab/releases/download/@siteed/audio-studio@VERSION/mel-spectrogram.js
@@ -8,15 +8,15 @@ const WASM_VERSION = '3.0.2-beta.1'
 const DEFAULT_WASM_CDN = `https://cdn.jsdelivr.net/npm/@siteed/audio-studio@${WASM_VERSION}/prebuilt/wasm/mel-spectrogram.js`
 let _wasmUrl: string = DEFAULT_WASM_CDN
-let _modulePromiseReset: (() => void) | null = null
+const _resetListeners: (() => void)[] = []
 export function _registerModuleReset(fn: () => void): void {
-    _modulePromiseReset = fn
+    _resetListeners.push(fn)
 }
 export function setMelSpectrogramWasmUrl(url: string): void {
     _wasmUrl = url
-    _modulePromiseReset?.() // invalidate cached module so next call re-fetches
+    _resetListeners.forEach((fn) => fn())
 }
 export function getMelSpectrogramWasmUrl(): string {

package/src/AudioAnalysis/wasmLoader.web.ts ADDED Viewed

@@ -0,0 +1,53 @@
+import type { AudioFeaturesWasmModule } from './audio-features-wasm'
+import { getMelSpectrogramWasmUrl, _registerModuleReset } from './wasmConfig'
+// Global factory name for the shared WASM binary. Despite the name referring to
+// mel spectrogram, this single binary also exports all audio-features functions.
+const WASM_GLOBAL_NAME = 'createMelSpectrogramModule'
+let modulePromise: Promise<AudioFeaturesWasmModule> | null = null
+_registerModuleReset(() => {
+    modulePromise = null
+})
+function loadScriptTag(url: string): Promise<void> {
+    return new Promise((resolve, reject) => {
+        const script = document.createElement('script')
+        script.src = url
+        script.onload = () => resolve()
+        script.onerror = () =>
+            reject(new Error(`Failed to load script: ${url}`))
+        document.head.appendChild(script)
+    })
+}
+export function getWasmModule(): Promise<AudioFeaturesWasmModule> {
+    if (!modulePromise) {
+        modulePromise = (async () => {
+            const url = getMelSpectrogramWasmUrl()
+            // Try ESM import first; fall back to <script> tag for UMD modules
+            const mod = await import(
+                /* webpackIgnore: true */ /* @vite-ignore */ url
+            )
+            let factory: unknown = mod.default ?? mod
+            if (typeof factory !== 'function') {
+                // UMD fallback: load via <script> tag so the top-level `var` becomes a global and
+                // document.currentScript.src is set (Emscripten uses it to locate the .wasm binary).
+                await loadScriptTag(url)
+                factory = (globalThis as Record<string, unknown>)[
+                    WASM_GLOBAL_NAME
+                ]
+            }
+            if (typeof factory !== 'function') {
+                throw new TypeError(
+                    `WASM factory '${WASM_GLOBAL_NAME}' not found after loading ${url}`
+                )
+            }
+            return (factory as () => Promise<AudioFeaturesWasmModule>)()
+        })().catch((err) => {
+            modulePromise = null
+            throw err
+        })
+    }
+    return modulePromise
+}