@siteed/audio-studio 3.0.2 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/CHANGELOG.md +19 -1
  2. package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +41 -35
  3. package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +500 -479
  4. package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  5. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +10 -7
  6. package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -1
  7. package/build/cjs/AudioAnalysis/audioFeaturesWasm.web.js +78 -97
  8. package/build/cjs/AudioAnalysis/audioFeaturesWasm.web.js.map +1 -1
  9. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +15 -12
  10. package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  11. package/build/cjs/AudioAnalysis/extractAudioData.js +144 -2
  12. package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -1
  13. package/build/cjs/AudioAnalysis/melSpectrogramWasm.web.js +9 -56
  14. package/build/cjs/AudioAnalysis/melSpectrogramWasm.web.js.map +1 -1
  15. package/build/cjs/AudioAnalysis/wasmConfig.js +4 -4
  16. package/build/cjs/AudioAnalysis/wasmConfig.js.map +1 -1
  17. package/build/cjs/AudioAnalysis/wasmLoader.web.js +79 -0
  18. package/build/cjs/AudioAnalysis/wasmLoader.web.js.map +1 -0
  19. package/build/cjs/AudioStudioModule.js +4 -599
  20. package/build/cjs/AudioStudioModule.js.map +1 -1
  21. package/build/cjs/trimAudio.js +227 -0
  22. package/build/cjs/trimAudio.js.map +1 -1
  23. package/build/cjs/utils/encodeCompressedAudio.web.js +65 -0
  24. package/build/cjs/utils/encodeCompressedAudio.web.js.map +1 -0
  25. package/build/cjs/utils/resampleAudioBuffer.web.js +25 -0
  26. package/build/cjs/utils/resampleAudioBuffer.web.js.map +1 -0
  27. package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  28. package/build/esm/AudioAnalysis/audioFeaturesWasm.js +8 -5
  29. package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -1
  30. package/build/esm/AudioAnalysis/audioFeaturesWasm.web.js +76 -62
  31. package/build/esm/AudioAnalysis/audioFeaturesWasm.web.js.map +1 -1
  32. package/build/esm/AudioAnalysis/extractAudioAnalysis.js +15 -12
  33. package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  34. package/build/esm/AudioAnalysis/extractAudioData.js +144 -2
  35. package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -1
  36. package/build/esm/AudioAnalysis/melSpectrogramWasm.web.js +9 -23
  37. package/build/esm/AudioAnalysis/melSpectrogramWasm.web.js.map +1 -1
  38. package/build/esm/AudioAnalysis/wasmConfig.js +4 -4
  39. package/build/esm/AudioAnalysis/wasmConfig.js.map +1 -1
  40. package/build/esm/AudioAnalysis/wasmLoader.web.js +43 -0
  41. package/build/esm/AudioAnalysis/wasmLoader.web.js.map +1 -0
  42. package/build/esm/AudioStudioModule.js +4 -596
  43. package/build/esm/AudioStudioModule.js.map +1 -1
  44. package/build/esm/trimAudio.js +227 -0
  45. package/build/esm/trimAudio.js.map +1 -1
  46. package/build/esm/utils/encodeCompressedAudio.web.js +62 -0
  47. package/build/esm/utils/encodeCompressedAudio.web.js.map +1 -0
  48. package/build/esm/utils/resampleAudioBuffer.web.js +22 -0
  49. package/build/esm/utils/resampleAudioBuffer.web.js.map +1 -0
  50. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +11 -0
  51. package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  52. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +5 -9
  53. package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -1
  54. package/build/types/AudioAnalysis/audioFeaturesWasm.web.d.ts +35 -16
  55. package/build/types/AudioAnalysis/audioFeaturesWasm.web.d.ts.map +1 -1
  56. package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  57. package/build/types/AudioAnalysis/extractAudioData.d.ts +2 -2
  58. package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -1
  59. package/build/types/AudioAnalysis/melSpectrogramWasm.web.d.ts.map +1 -1
  60. package/build/types/AudioAnalysis/wasmLoader.web.d.ts +3 -0
  61. package/build/types/AudioAnalysis/wasmLoader.web.d.ts.map +1 -0
  62. package/build/types/AudioStudioModule.d.ts.map +1 -1
  63. package/build/types/trimAudio.d.ts.map +1 -1
  64. package/build/types/utils/encodeCompressedAudio.web.d.ts +10 -0
  65. package/build/types/utils/encodeCompressedAudio.web.d.ts.map +1 -0
  66. package/build/types/utils/resampleAudioBuffer.web.d.ts +2 -0
  67. package/build/types/utils/resampleAudioBuffer.web.d.ts.map +1 -0
  68. package/ios/AudioStreamManager.swift +135 -89
  69. package/ios/AudioStudioModule.swift +239 -216
  70. package/package.json +1 -1
  71. package/src/AudioAnalysis/AudioAnalysis.types.ts +12 -0
  72. package/src/AudioAnalysis/audioFeaturesWasm.ts +17 -22
  73. package/src/AudioAnalysis/audioFeaturesWasm.web.ts +102 -94
  74. package/src/AudioAnalysis/extractAudioAnalysis.ts +23 -20
  75. package/src/AudioAnalysis/extractAudioData.ts +186 -4
  76. package/src/AudioAnalysis/melSpectrogramWasm.web.ts +10 -27
  77. package/src/AudioAnalysis/wasmConfig.ts +4 -4
  78. package/src/AudioAnalysis/wasmLoader.web.ts +53 -0
  79. package/src/AudioStudioModule.ts +6 -854
  80. package/src/trimAudio.ts +351 -0
  81. package/src/utils/encodeCompressedAudio.web.ts +78 -0
  82. package/src/utils/resampleAudioBuffer.web.ts +39 -0
  83. package/build/cjs/AudioAnalysis/extractWaveform.js +0 -18
  84. package/build/cjs/AudioAnalysis/extractWaveform.js.map +0 -1
  85. package/build/esm/AudioAnalysis/extractWaveform.js +0 -11
  86. package/build/esm/AudioAnalysis/extractWaveform.js.map +0 -1
  87. package/build/types/AudioAnalysis/extractWaveform.d.ts +0 -8
  88. package/build/types/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  89. package/src/AudioAnalysis/extractWaveform.ts +0 -22
@@ -1,34 +1,6 @@
1
+ import type { AudioFeaturesWasmResult } from './AudioAnalysis.types'
1
2
  import type { AudioFeaturesWasmModule } from './audio-features-wasm'
2
- import { getMelSpectrogramWasmUrl } from './wasmConfig'
3
-
4
- export interface AudioFeaturesWasmResult {
5
- spectralCentroid: number
6
- spectralFlatness: number
7
- spectralRolloff: number
8
- spectralBandwidth: number
9
- mfcc: number[]
10
- chromagram: number[]
11
- }
12
-
13
- let modulePromise: Promise<AudioFeaturesWasmModule> | null = null
14
-
15
- function getModule(): Promise<AudioFeaturesWasmModule> {
16
- if (!modulePromise) {
17
- modulePromise = (async () => {
18
- const url = getMelSpectrogramWasmUrl()
19
- // webpackIgnore + @vite-ignore prevent bundlers from trying to resolve the URL
20
- const mod = await import(
21
- /* webpackIgnore: true */ /* @vite-ignore */ url
22
- )
23
- const factory = mod.default ?? mod
24
- return factory() as Promise<AudioFeaturesWasmModule>
25
- })().catch((err) => {
26
- modulePromise = null
27
- throw err
28
- })
29
- }
30
- return modulePromise
31
- }
3
+ import { getWasmModule } from './wasmLoader.web'
32
4
 
33
5
  // --- Struct layout for CAudioFeaturesResult (wasm32) ---
34
6
  // Offset 0: float spectralCentroid (4 bytes)
@@ -83,79 +55,115 @@ function readResult(
83
55
 
84
56
  // --- Streaming (per-frame) API ---
85
57
 
86
- let streamingModule: AudioFeaturesWasmModule | null = null
87
- let streamingFramePtr = 0
88
- let streamingFrameCapacity = 0
89
- let streamingResultPtr = 0
90
-
91
58
  /**
92
- * Initialise the WASM streaming audio features processor.
93
- * Call once before computeAudioFeaturesFrameWasm().
59
+ * Encapsulates a single WASM streaming audio features session.
60
+ * Each instance owns its own WASM heap allocations; multiple sessions
61
+ * can exist concurrently without interfering with each other.
62
+ *
63
+ * Usage:
64
+ * const session = await AudioFeaturesStreamingSession.create(sampleRate)
65
+ * try {
66
+ * for (const frame of frames) {
67
+ * const result = session.computeFrame(frame)
68
+ * }
69
+ * } finally {
70
+ * session.dispose()
71
+ * }
94
72
  */
95
- export async function initAudioFeaturesWasm(
96
- sampleRate: number,
97
- fftLength = 1024,
98
- nMfcc = 13,
99
- nMelFilters = 26,
100
- computeMfcc = true,
101
- computeChroma = true
102
- ): Promise<void> {
103
- const Module = await getModule()
104
- streamingModule = Module
105
-
106
- Module._audio_features_init(
107
- sampleRate,
108
- fftLength,
109
- nMfcc,
110
- nMelFilters,
111
- computeMfcc ? 1 : 0,
112
- computeChroma ? 1 : 0
113
- )
73
+ export class AudioFeaturesStreamingSession {
74
+ private module: AudioFeaturesWasmModule
75
+ private framePtr = 0
76
+ private frameCapacity = 0
77
+ private resultPtr = 0
78
+
79
+ private constructor(module: AudioFeaturesWasmModule) {
80
+ this.module = module
81
+ }
114
82
 
115
- // Pre-allocate result struct on WASM heap
116
- if (streamingResultPtr) Module._free(streamingResultPtr)
117
- streamingResultPtr = Module._malloc(STRUCT_SIZE)
118
- // Zero-initialize to prevent freeing garbage pointers on first use
119
- Module.HEAPU8.fill(0, streamingResultPtr, streamingResultPtr + STRUCT_SIZE)
83
+ /**
84
+ * Initialise a new streaming session. Loads the WASM module if needed.
85
+ */
86
+ static async create(
87
+ sampleRate: number,
88
+ fftLength = 1024,
89
+ nMfcc = 13,
90
+ nMelFilters = 26,
91
+ computeMfcc = true,
92
+ computeChroma = true
93
+ ): Promise<AudioFeaturesStreamingSession> {
94
+ const Module = await getWasmModule()
95
+ const session = new AudioFeaturesStreamingSession(Module)
96
+
97
+ Module._audio_features_init(
98
+ sampleRate,
99
+ fftLength,
100
+ nMfcc,
101
+ nMelFilters,
102
+ computeMfcc ? 1 : 0,
103
+ computeChroma ? 1 : 0
104
+ )
105
+
106
+ // Pre-allocate result struct on WASM heap
107
+ session.resultPtr = Module._malloc(STRUCT_SIZE)
108
+ // Zero-initialize to prevent freeing garbage pointers on first use
109
+ Module.HEAPU8.fill(
110
+ 0,
111
+ session.resultPtr,
112
+ session.resultPtr + STRUCT_SIZE
113
+ )
114
+
115
+ return session
116
+ }
120
117
 
121
- // Frame input buffer allocated on demand
122
- streamingFrameCapacity = 0
123
- streamingFramePtr = 0
124
- }
118
+ /**
119
+ * Compute audio features for a single frame.
120
+ * Returns null on error or if the session has been disposed.
121
+ */
122
+ computeFrame(samples: Float32Array): AudioFeaturesWasmResult | null {
123
+ if (!this.resultPtr) return null
124
+ const Module = this.module
125
+
126
+ // (Re-)allocate frame input buffer if needed
127
+ if (samples.length > this.frameCapacity) {
128
+ if (this.framePtr) Module._free(this.framePtr)
129
+ this.framePtr = Module._malloc(samples.length * 4)
130
+ this.frameCapacity = samples.length
131
+ }
125
132
 
126
- /**
127
- * Compute audio features for a single frame via WASM C++.
128
- * Returns null if not initialised or on error.
129
- */
130
- export function computeAudioFeaturesFrameWasm(
131
- samples: Float32Array
132
- ): AudioFeaturesWasmResult | null {
133
- if (!streamingModule || !streamingResultPtr) return null
134
- const Module = streamingModule
135
-
136
- // (Re-)allocate frame input buffer if needed
137
- if (samples.length > streamingFrameCapacity) {
138
- if (streamingFramePtr) Module._free(streamingFramePtr)
139
- streamingFramePtr = Module._malloc(samples.length * 4)
140
- streamingFrameCapacity = samples.length
141
- }
133
+ // Copy samples to WASM heap
134
+ Module.HEAPF32.set(samples, this.framePtr >> 2)
142
135
 
143
- // Copy samples to WASM heap
144
- Module.HEAPF32.set(samples, streamingFramePtr >> 2)
136
+ const ok = Module._audio_features_compute_frame(
137
+ this.framePtr,
138
+ samples.length,
139
+ this.resultPtr
140
+ )
141
+ if (!ok) return null
145
142
 
146
- const ok = Module._audio_features_compute_frame(
147
- streamingFramePtr,
148
- samples.length,
149
- streamingResultPtr
150
- )
151
- if (!ok) return null
143
+ const result = readResult(Module, this.resultPtr)
152
144
 
153
- const result = readResult(Module, streamingResultPtr)
145
+ // Free internal arrays (mfcc, chromagram) allocated by C
146
+ Module._audio_features_free_arrays(this.resultPtr)
154
147
 
155
- // Free internal arrays (mfcc, chromagram) allocated by C
156
- Module._audio_features_free_arrays(streamingResultPtr)
148
+ return result
149
+ }
157
150
 
158
- return result
151
+ /**
152
+ * Free all WASM heap allocations owned by this session.
153
+ * The session must not be used after calling dispose().
154
+ */
155
+ dispose(): void {
156
+ const Module = this.module
157
+ if (this.framePtr) {
158
+ Module._free(this.framePtr)
159
+ this.framePtr = 0
160
+ this.frameCapacity = 0
161
+ }
162
+ if (this.resultPtr) {
163
+ Module._free(this.resultPtr)
164
+ this.resultPtr = 0
165
+ }
166
+ }
159
167
  }
160
168
 
161
169
  // --- Batch API ---
@@ -173,7 +181,7 @@ export async function computeAudioFeaturesWasm(
173
181
  computeMfcc = true,
174
182
  computeChroma = true
175
183
  ): Promise<AudioFeaturesWasmResult> {
176
- const Module = await getModule()
184
+ const Module = await getWasmModule()
177
185
 
178
186
  const numSamples = audioData.length
179
187
  const inputPtr = Module._malloc(numSamples * 4)
@@ -22,6 +22,14 @@ import { getWavFileInfo, WavFileInfo } from '../utils/getWavFileInfo'
22
22
  import { InlineFeaturesExtractor } from '../workers/InlineFeaturesExtractor.web'
23
23
  import { wasmGlueJs } from '../workers/wasmGlueString.web'
24
24
 
25
+ function createAnalysisWorker(): { worker: Worker; workerUrl: string } {
26
+ const blob = new Blob([wasmGlueJs, '\n', InlineFeaturesExtractor], {
27
+ type: 'application/javascript',
28
+ })
29
+ const workerUrl = URL.createObjectURL(blob)
30
+ return { worker: new Worker(workerUrl), workerUrl }
31
+ }
32
+
25
33
  function calculateCRC32ForDataPoint(data: Float32Array): number {
26
34
  // Convert float array to byte array for CRC32
27
35
  const byteArray = new Uint8Array(data.length * 4)
@@ -137,16 +145,13 @@ export async function extractAudioAnalysis(
137
145
  const channelData = processedBuffer.buffer.getChannelData(0)
138
146
 
139
147
  // Create worker blob: WASM glue (defines createMelSpectrogramModule) + worker code
140
- const blob = new Blob(
141
- [wasmGlueJs, '\n', InlineFeaturesExtractor],
142
- { type: 'application/javascript' }
143
- )
144
- const workerUrl = URL.createObjectURL(blob)
145
- const worker = new Worker(workerUrl)
148
+ const { worker, workerUrl } = createAnalysisWorker()
146
149
 
147
150
  return new Promise((resolve, reject) => {
148
151
  worker.onmessage = (event) => {
149
152
  if (event.data.error) {
153
+ URL.revokeObjectURL(workerUrl)
154
+ worker.terminate()
150
155
  reject(new Error(event.data.error))
151
156
  return
152
157
  }
@@ -300,20 +305,16 @@ export const extractRawWavAnalysis = async ({
300
305
  const constrainedChannelData = channelData.slice(startIndex, endIndex)
301
306
 
302
307
  return new Promise((resolve, reject) => {
303
- const blob = new Blob([wasmGlueJs, '\n', InlineFeaturesExtractor], {
304
- type: 'application/javascript',
305
- })
306
- const url = URL.createObjectURL(blob)
307
- const worker = new Worker(url)
308
+ const { worker, workerUrl } = createAnalysisWorker()
308
309
 
309
310
  worker.onmessage = (event) => {
310
- URL.revokeObjectURL(url)
311
+ URL.revokeObjectURL(workerUrl)
311
312
  worker.terminate()
312
313
  resolve(event.data.result)
313
314
  }
314
315
 
315
316
  worker.onerror = (error) => {
316
- URL.revokeObjectURL(url)
317
+ URL.revokeObjectURL(workerUrl)
317
318
  worker.terminate()
318
319
  reject(error)
319
320
  }
@@ -337,13 +338,15 @@ export const extractRawWavAnalysis = async ({
337
338
  fileUri,
338
339
  segmentDurationMs,
339
340
  })
340
- const res = await AudioStudioModule.extractAudioAnalysis({
341
- fileUri,
342
- segmentDurationMs,
343
- features,
344
- position,
345
- length,
346
- })
341
+ const res = await AudioStudioModule.extractAudioAnalysis(
342
+ cleanNativeOptions({
343
+ fileUri,
344
+ segmentDurationMs,
345
+ features,
346
+ position,
347
+ length,
348
+ })
349
+ )
347
350
  logger?.log(`extractAudioAnalysis`, res)
348
351
  return res
349
352
  }
@@ -1,13 +1,195 @@
1
- import { ExtractAudioDataOptions } from '../AudioStudio.types'
1
+ import {
2
+ BitDepth,
3
+ ExtractAudioDataOptions,
4
+ ExtractedAudioData,
5
+ } from '../AudioStudio.types'
2
6
  import AudioStudioModule from '../AudioStudioModule'
3
7
  import { isWeb } from '../constants'
8
+ import { processAudioBuffer } from '../utils/audioProcessing'
4
9
  import { cleanNativeOptions } from '../utils/cleanNativeOptions'
10
+ import crc32 from '../utils/crc32'
11
+ import { writeWavHeader } from '../utils/writeWavHeader'
5
12
 
6
- export const extractAudioData = async (props: ExtractAudioDataOptions) => {
13
+ export const extractAudioData = async (
14
+ props: ExtractAudioDataOptions
15
+ ): Promise<ExtractedAudioData> => {
7
16
  if (isWeb) {
8
- // Web implementation handles logger natively in AudioStudioModule.ts
9
- return await AudioStudioModule.extractAudioData(props)
17
+ try {
18
+ const {
19
+ fileUri,
20
+ position,
21
+ length,
22
+ startTimeMs,
23
+ endTimeMs,
24
+ decodingOptions,
25
+ includeNormalizedData,
26
+ includeBase64Data,
27
+ includeWavHeader = false,
28
+ logger,
29
+ } = props
30
+
31
+ logger?.debug('EXTRACT AUDIO - Step 1: Initial request', {
32
+ fileUri,
33
+ extractionParams: {
34
+ position,
35
+ length,
36
+ startTimeMs,
37
+ endTimeMs,
38
+ },
39
+ decodingOptions: {
40
+ targetSampleRate:
41
+ decodingOptions?.targetSampleRate ?? 16000,
42
+ targetChannels: decodingOptions?.targetChannels ?? 1,
43
+ targetBitDepth: decodingOptions?.targetBitDepth ?? 16,
44
+ normalizeAudio: decodingOptions?.normalizeAudio ?? false,
45
+ },
46
+ outputOptions: {
47
+ includeNormalizedData,
48
+ includeBase64Data,
49
+ includeWavHeader,
50
+ },
51
+ })
52
+
53
+ // Process the audio using shared helper function
54
+ const processedBuffer = await processAudioBuffer({
55
+ fileUri,
56
+ targetSampleRate: decodingOptions?.targetSampleRate ?? 16000,
57
+ targetChannels: decodingOptions?.targetChannels ?? 1,
58
+ normalizeAudio: decodingOptions?.normalizeAudio ?? false,
59
+ position,
60
+ length,
61
+ startTimeMs,
62
+ endTimeMs,
63
+ logger,
64
+ })
65
+
66
+ logger?.debug('EXTRACT AUDIO - Step 2: Audio processing complete', {
67
+ processedData: {
68
+ samples: processedBuffer.samples,
69
+ sampleRate: processedBuffer.sampleRate,
70
+ channels: processedBuffer.channels,
71
+ durationMs: processedBuffer.durationMs,
72
+ },
73
+ })
74
+
75
+ const channelData = processedBuffer.channelData
76
+ const bitDepth = (decodingOptions?.targetBitDepth ?? 16) as BitDepth
77
+ const bytesPerSample = bitDepth / 8
78
+ const numSamples = processedBuffer.samples
79
+
80
+ logger?.debug('EXTRACT AUDIO - Step 3: PCM conversion setup', {
81
+ channelData: {
82
+ length: channelData.length,
83
+ first: channelData[0],
84
+ last: channelData[channelData.length - 1],
85
+ },
86
+ calculation: {
87
+ bitDepth,
88
+ bytesPerSample,
89
+ numSamples,
90
+ expectedBytes: numSamples * bytesPerSample,
91
+ },
92
+ })
93
+
94
+ // Create PCM data with correct length based on original byte length
95
+ const pcmData = new Uint8Array(numSamples * bytesPerSample)
96
+ let offset = 0
97
+
98
+ // Convert Float32 samples to PCM format
99
+ for (let i = 0; i < numSamples; i++) {
100
+ const sample = channelData[i]
101
+ const value = Math.max(-1, Math.min(1, sample))
102
+ // Convert to 16-bit signed integer
103
+ let intValue = Math.round(value * 32767)
104
+
105
+ // Handle negative values correctly
106
+ if (intValue < 0) {
107
+ intValue = 65536 + intValue
108
+ }
109
+
110
+ // Write as little-endian
111
+ pcmData[offset++] = intValue & 255 // Low byte
112
+ pcmData[offset++] = (intValue >> 8) & 255 // High byte
113
+ }
114
+
115
+ const durationMs = Math.round(
116
+ (numSamples / processedBuffer.sampleRate) * 1000
117
+ )
118
+
119
+ logger?.debug('EXTRACT AUDIO - Step 4: Final output', {
120
+ pcmData: {
121
+ length: pcmData.length,
122
+ first: pcmData[0],
123
+ last: pcmData[pcmData.length - 1],
124
+ },
125
+ timing: {
126
+ numSamples,
127
+ sampleRate: processedBuffer.sampleRate,
128
+ durationMs,
129
+ shouldBe3000ms: endTimeMs
130
+ ? endTimeMs - (startTimeMs ?? 0) === 3000
131
+ : undefined,
132
+ },
133
+ })
134
+
135
+ const result: ExtractedAudioData = {
136
+ pcmData: new Uint8Array(pcmData.buffer),
137
+ sampleRate: processedBuffer.sampleRate,
138
+ channels: processedBuffer.channels,
139
+ bitDepth,
140
+ durationMs,
141
+ format: `pcm_${bitDepth}bit` as const,
142
+ samples: numSamples,
143
+ }
144
+
145
+ // Add WAV header if requested
146
+ if (includeWavHeader) {
147
+ logger?.debug('EXTRACT AUDIO - Step 5: Adding WAV header', {
148
+ originalLength: pcmData.length,
149
+ newLength: result.pcmData.length,
150
+ firstBytes: Array.from(result.pcmData.slice(0, 44)), // WAV header is 44 bytes
151
+ })
152
+ const wavBuffer = writeWavHeader({
153
+ buffer: pcmData.buffer.slice(0, pcmData.length),
154
+ sampleRate: processedBuffer.sampleRate,
155
+ numChannels: processedBuffer.channels,
156
+ bitDepth,
157
+ })
158
+ result.pcmData = new Uint8Array(wavBuffer)
159
+ result.hasWavHeader = true
160
+ }
161
+
162
+ if (includeNormalizedData) {
163
+ result.normalizedData = channelData
164
+ }
165
+
166
+ if (includeBase64Data) {
167
+ result.base64Data = btoa(
168
+ String.fromCharCode(...new Uint8Array(pcmData.buffer))
169
+ )
170
+ }
171
+
172
+ if (props.computeChecksum) {
173
+ result.checksum = crc32.buf(pcmData)
174
+ }
175
+
176
+ logger?.debug('EXTRACT AUDIO - Step 3: PCM conversion complete', {
177
+ pcmStats: {
178
+ length: pcmData.length,
179
+ bytesPerSample,
180
+ totalSamples: numSamples,
181
+ firstBytes: Array.from(pcmData.slice(0, 16)),
182
+ lastBytes: Array.from(pcmData.slice(-16)),
183
+ },
184
+ })
185
+
186
+ return result
187
+ } catch (error) {
188
+ props.logger?.error('EXTRACT AUDIO - Error:', error)
189
+ throw error
190
+ }
10
191
  }
192
+
11
193
  // Native: only pass serializable fields — logger causes crash on Android
12
194
  const { logger: _logger, ...nativeOptions } = props
13
195
  // Clean undefined values to avoid Android Kotlin bridge crash
@@ -1,29 +1,5 @@
1
1
  import type { MelSpectrogramWasmModule } from './mel-spectrogram-wasm'
2
- import { getMelSpectrogramWasmUrl, _registerModuleReset } from './wasmConfig'
3
-
4
- let modulePromise: Promise<MelSpectrogramWasmModule> | null = null
5
-
6
- _registerModuleReset(() => {
7
- modulePromise = null
8
- })
9
-
10
- function getModule(): Promise<MelSpectrogramWasmModule> {
11
- if (!modulePromise) {
12
- modulePromise = (async () => {
13
- const url = getMelSpectrogramWasmUrl()
14
- // webpackIgnore + @vite-ignore prevent bundlers from trying to resolve the URL
15
- const mod = await import(
16
- /* webpackIgnore: true */ /* @vite-ignore */ url
17
- )
18
- const factory = mod.default ?? mod
19
- return factory() as Promise<MelSpectrogramWasmModule>
20
- })().catch((err) => {
21
- modulePromise = null
22
- throw err
23
- })
24
- }
25
- return modulePromise
26
- }
2
+ import { getWasmModule } from './wasmLoader.web'
27
3
 
28
4
  // --- Streaming (per-frame) API for live mel spectrogram ---
29
5
 
@@ -46,7 +22,7 @@ export async function initMelStreamingWasm(
46
22
  fMin = 0,
47
23
  fMax = 0
48
24
  ): Promise<void> {
49
- const Module = await getModule()
25
+ const Module = (await getWasmModule()) as MelSpectrogramWasmModule
50
26
  streamingModule = Module
51
27
  const actualFMax = fMax > 0 ? fMax : sampleRate / 2
52
28
  Module._mel_spectrogram_init(
@@ -61,6 +37,13 @@ export async function initMelStreamingWasm(
61
37
  )
62
38
  streamingNMels = nMels
63
39
 
40
+ // Free frame buffer from previous session (if any) to avoid leak on re-init
41
+ if (streamingFramePtr) {
42
+ Module._free(streamingFramePtr)
43
+ streamingFramePtr = 0
44
+ streamingFrameCapacity = 0
45
+ }
46
+
64
47
  // Pre-allocate output buffer (fixed size)
65
48
  if (streamingMelPtr) Module._free(streamingMelPtr)
66
49
  streamingMelPtr = Module._malloc(nMels * 4)
@@ -120,7 +103,7 @@ export async function computeMelSpectrogramWasm(
120
103
  normalize: boolean,
121
104
  logScale: boolean
122
105
  ): Promise<number[][]> {
123
- const Module = await getModule()
106
+ const Module = (await getWasmModule()) as MelSpectrogramWasmModule
124
107
 
125
108
  const fftLength = 2048
126
109
  const windowTypeInt = windowType === 'hamming' ? 1 : 0
@@ -1,6 +1,6 @@
1
1
  // Version is inlined here — keep in sync with package.json when releasing.
2
2
  // The publish.sh script should bump this string alongside package.json.
3
- const WASM_VERSION = '3.0.2-beta.1'
3
+ const WASM_VERSION = '3.0.2'
4
4
  // jsDelivr syncs from npm automatically within ~5 min of publish.
5
5
  // GitHub release fallback (attach mel-spectrogram.js as a release asset):
6
6
  // https://github.com/deeeed/audiolab/releases/download/@siteed/audio-studio@VERSION/mel-spectrogram.js
@@ -8,15 +8,15 @@ const WASM_VERSION = '3.0.2-beta.1'
8
8
  const DEFAULT_WASM_CDN = `https://cdn.jsdelivr.net/npm/@siteed/audio-studio@${WASM_VERSION}/prebuilt/wasm/mel-spectrogram.js`
9
9
 
10
10
  let _wasmUrl: string = DEFAULT_WASM_CDN
11
- let _modulePromiseReset: (() => void) | null = null
11
+ const _resetListeners: (() => void)[] = []
12
12
 
13
13
  export function _registerModuleReset(fn: () => void): void {
14
- _modulePromiseReset = fn
14
+ _resetListeners.push(fn)
15
15
  }
16
16
 
17
17
  export function setMelSpectrogramWasmUrl(url: string): void {
18
18
  _wasmUrl = url
19
- _modulePromiseReset?.() // invalidate cached module so next call re-fetches
19
+ _resetListeners.forEach((fn) => fn())
20
20
  }
21
21
 
22
22
  export function getMelSpectrogramWasmUrl(): string {
@@ -0,0 +1,53 @@
1
+ import type { AudioFeaturesWasmModule } from './audio-features-wasm'
2
+ import { getMelSpectrogramWasmUrl, _registerModuleReset } from './wasmConfig'
3
+
4
+ // Global factory name for the shared WASM binary. Despite the name referring to
5
+ // mel spectrogram, this single binary also exports all audio-features functions.
6
+ const WASM_GLOBAL_NAME = 'createMelSpectrogramModule'
7
+ let modulePromise: Promise<AudioFeaturesWasmModule> | null = null
8
+
9
+ _registerModuleReset(() => {
10
+ modulePromise = null
11
+ })
12
+
13
+ function loadScriptTag(url: string): Promise<void> {
14
+ return new Promise((resolve, reject) => {
15
+ const script = document.createElement('script')
16
+ script.src = url
17
+ script.onload = () => resolve()
18
+ script.onerror = () =>
19
+ reject(new Error(`Failed to load script: ${url}`))
20
+ document.head.appendChild(script)
21
+ })
22
+ }
23
+
24
+ export function getWasmModule(): Promise<AudioFeaturesWasmModule> {
25
+ if (!modulePromise) {
26
+ modulePromise = (async () => {
27
+ const url = getMelSpectrogramWasmUrl()
28
+ // Try ESM import first; fall back to <script> tag for UMD modules
29
+ const mod = await import(
30
+ /* webpackIgnore: true */ /* @vite-ignore */ url
31
+ )
32
+ let factory: unknown = mod.default ?? mod
33
+ if (typeof factory !== 'function') {
34
+ // UMD fallback: load via <script> tag so the top-level `var` becomes a global and
35
+ // document.currentScript.src is set (Emscripten uses it to locate the .wasm binary).
36
+ await loadScriptTag(url)
37
+ factory = (globalThis as Record<string, unknown>)[
38
+ WASM_GLOBAL_NAME
39
+ ]
40
+ }
41
+ if (typeof factory !== 'function') {
42
+ throw new TypeError(
43
+ `WASM factory '${WASM_GLOBAL_NAME}' not found after loading ${url}`
44
+ )
45
+ }
46
+ return (factory as () => Promise<AudioFeaturesWasmModule>)()
47
+ })().catch((err) => {
48
+ modulePromise = null
49
+ throw err
50
+ })
51
+ }
52
+ return modulePromise
53
+ }