@siteed/audio-studio 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/android/src/main/java/net/siteed/audiostudio/AudioRecorderManager.kt +41 -35
- package/android/src/main/java/net/siteed/audiostudio/AudioStudioModule.kt +500 -479
- package/build/cjs/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js +10 -7
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.js.map +1 -1
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.web.js +78 -97
- package/build/cjs/AudioAnalysis/audioFeaturesWasm.web.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js +15 -12
- package/build/cjs/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/cjs/AudioAnalysis/extractAudioData.js +144 -2
- package/build/cjs/AudioAnalysis/extractAudioData.js.map +1 -1
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.web.js +9 -56
- package/build/cjs/AudioAnalysis/melSpectrogramWasm.web.js.map +1 -1
- package/build/cjs/AudioAnalysis/wasmConfig.js +4 -4
- package/build/cjs/AudioAnalysis/wasmConfig.js.map +1 -1
- package/build/cjs/AudioAnalysis/wasmLoader.web.js +79 -0
- package/build/cjs/AudioAnalysis/wasmLoader.web.js.map +1 -0
- package/build/cjs/AudioStudioModule.js +4 -599
- package/build/cjs/AudioStudioModule.js.map +1 -1
- package/build/cjs/trimAudio.js +227 -0
- package/build/cjs/trimAudio.js.map +1 -1
- package/build/cjs/utils/encodeCompressedAudio.web.js +65 -0
- package/build/cjs/utils/encodeCompressedAudio.web.js.map +1 -0
- package/build/cjs/utils/resampleAudioBuffer.web.js +25 -0
- package/build/cjs/utils/resampleAudioBuffer.web.js.map +1 -0
- package/build/esm/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js +8 -5
- package/build/esm/AudioAnalysis/audioFeaturesWasm.js.map +1 -1
- package/build/esm/AudioAnalysis/audioFeaturesWasm.web.js +76 -62
- package/build/esm/AudioAnalysis/audioFeaturesWasm.web.js.map +1 -1
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js +15 -12
- package/build/esm/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/esm/AudioAnalysis/extractAudioData.js +144 -2
- package/build/esm/AudioAnalysis/extractAudioData.js.map +1 -1
- package/build/esm/AudioAnalysis/melSpectrogramWasm.web.js +9 -23
- package/build/esm/AudioAnalysis/melSpectrogramWasm.web.js.map +1 -1
- package/build/esm/AudioAnalysis/wasmConfig.js +4 -4
- package/build/esm/AudioAnalysis/wasmConfig.js.map +1 -1
- package/build/esm/AudioAnalysis/wasmLoader.web.js +43 -0
- package/build/esm/AudioAnalysis/wasmLoader.web.js.map +1 -0
- package/build/esm/AudioStudioModule.js +4 -596
- package/build/esm/AudioStudioModule.js.map +1 -1
- package/build/esm/trimAudio.js +227 -0
- package/build/esm/trimAudio.js.map +1 -1
- package/build/esm/utils/encodeCompressedAudio.web.js +62 -0
- package/build/esm/utils/encodeCompressedAudio.web.js.map +1 -0
- package/build/esm/utils/resampleAudioBuffer.web.js +22 -0
- package/build/esm/utils/resampleAudioBuffer.web.js.map +1 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts +11 -0
- package/build/types/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts +5 -9
- package/build/types/AudioAnalysis/audioFeaturesWasm.d.ts.map +1 -1
- package/build/types/AudioAnalysis/audioFeaturesWasm.web.d.ts +35 -16
- package/build/types/AudioAnalysis/audioFeaturesWasm.web.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/types/AudioAnalysis/extractAudioData.d.ts +2 -2
- package/build/types/AudioAnalysis/extractAudioData.d.ts.map +1 -1
- package/build/types/AudioAnalysis/melSpectrogramWasm.web.d.ts.map +1 -1
- package/build/types/AudioAnalysis/wasmLoader.web.d.ts +3 -0
- package/build/types/AudioAnalysis/wasmLoader.web.d.ts.map +1 -0
- package/build/types/AudioStudioModule.d.ts.map +1 -1
- package/build/types/trimAudio.d.ts.map +1 -1
- package/build/types/utils/encodeCompressedAudio.web.d.ts +10 -0
- package/build/types/utils/encodeCompressedAudio.web.d.ts.map +1 -0
- package/build/types/utils/resampleAudioBuffer.web.d.ts +2 -0
- package/build/types/utils/resampleAudioBuffer.web.d.ts.map +1 -0
- package/ios/AudioStreamManager.swift +135 -89
- package/ios/AudioStudioModule.swift +239 -216
- package/package.json +1 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +12 -0
- package/src/AudioAnalysis/audioFeaturesWasm.ts +17 -22
- package/src/AudioAnalysis/audioFeaturesWasm.web.ts +102 -94
- package/src/AudioAnalysis/extractAudioAnalysis.ts +23 -20
- package/src/AudioAnalysis/extractAudioData.ts +186 -4
- package/src/AudioAnalysis/melSpectrogramWasm.web.ts +10 -27
- package/src/AudioAnalysis/wasmConfig.ts +4 -4
- package/src/AudioAnalysis/wasmLoader.web.ts +53 -0
- package/src/AudioStudioModule.ts +6 -854
- package/src/trimAudio.ts +351 -0
- package/src/utils/encodeCompressedAudio.web.ts +78 -0
- package/src/utils/resampleAudioBuffer.web.ts +39 -0
- package/build/cjs/AudioAnalysis/extractWaveform.js +0 -18
- package/build/cjs/AudioAnalysis/extractWaveform.js.map +0 -1
- package/build/esm/AudioAnalysis/extractWaveform.js +0 -11
- package/build/esm/AudioAnalysis/extractWaveform.js.map +0 -1
- package/build/types/AudioAnalysis/extractWaveform.d.ts +0 -8
- package/build/types/AudioAnalysis/extractWaveform.d.ts.map +0 -1
- package/src/AudioAnalysis/extractWaveform.ts +0 -22
|
@@ -1,34 +1,6 @@
|
|
|
1
|
+
import type { AudioFeaturesWasmResult } from './AudioAnalysis.types'
|
|
1
2
|
import type { AudioFeaturesWasmModule } from './audio-features-wasm'
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
export interface AudioFeaturesWasmResult {
|
|
5
|
-
spectralCentroid: number
|
|
6
|
-
spectralFlatness: number
|
|
7
|
-
spectralRolloff: number
|
|
8
|
-
spectralBandwidth: number
|
|
9
|
-
mfcc: number[]
|
|
10
|
-
chromagram: number[]
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
let modulePromise: Promise<AudioFeaturesWasmModule> | null = null
|
|
14
|
-
|
|
15
|
-
function getModule(): Promise<AudioFeaturesWasmModule> {
|
|
16
|
-
if (!modulePromise) {
|
|
17
|
-
modulePromise = (async () => {
|
|
18
|
-
const url = getMelSpectrogramWasmUrl()
|
|
19
|
-
// webpackIgnore + @vite-ignore prevent bundlers from trying to resolve the URL
|
|
20
|
-
const mod = await import(
|
|
21
|
-
/* webpackIgnore: true */ /* @vite-ignore */ url
|
|
22
|
-
)
|
|
23
|
-
const factory = mod.default ?? mod
|
|
24
|
-
return factory() as Promise<AudioFeaturesWasmModule>
|
|
25
|
-
})().catch((err) => {
|
|
26
|
-
modulePromise = null
|
|
27
|
-
throw err
|
|
28
|
-
})
|
|
29
|
-
}
|
|
30
|
-
return modulePromise
|
|
31
|
-
}
|
|
3
|
+
import { getWasmModule } from './wasmLoader.web'
|
|
32
4
|
|
|
33
5
|
// --- Struct layout for CAudioFeaturesResult (wasm32) ---
|
|
34
6
|
// Offset 0: float spectralCentroid (4 bytes)
|
|
@@ -83,79 +55,115 @@ function readResult(
|
|
|
83
55
|
|
|
84
56
|
// --- Streaming (per-frame) API ---
|
|
85
57
|
|
|
86
|
-
let streamingModule: AudioFeaturesWasmModule | null = null
|
|
87
|
-
let streamingFramePtr = 0
|
|
88
|
-
let streamingFrameCapacity = 0
|
|
89
|
-
let streamingResultPtr = 0
|
|
90
|
-
|
|
91
58
|
/**
|
|
92
|
-
*
|
|
93
|
-
*
|
|
59
|
+
* Encapsulates a single WASM streaming audio features session.
|
|
60
|
+
* Each instance owns its own WASM heap allocations; multiple sessions
|
|
61
|
+
* can exist concurrently without interfering with each other.
|
|
62
|
+
*
|
|
63
|
+
* Usage:
|
|
64
|
+
* const session = await AudioFeaturesStreamingSession.create(sampleRate)
|
|
65
|
+
* try {
|
|
66
|
+
* for (const frame of frames) {
|
|
67
|
+
* const result = session.computeFrame(frame)
|
|
68
|
+
* }
|
|
69
|
+
* } finally {
|
|
70
|
+
* session.dispose()
|
|
71
|
+
* }
|
|
94
72
|
*/
|
|
95
|
-
export
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
streamingModule = Module
|
|
105
|
-
|
|
106
|
-
Module._audio_features_init(
|
|
107
|
-
sampleRate,
|
|
108
|
-
fftLength,
|
|
109
|
-
nMfcc,
|
|
110
|
-
nMelFilters,
|
|
111
|
-
computeMfcc ? 1 : 0,
|
|
112
|
-
computeChroma ? 1 : 0
|
|
113
|
-
)
|
|
73
|
+
export class AudioFeaturesStreamingSession {
|
|
74
|
+
private module: AudioFeaturesWasmModule
|
|
75
|
+
private framePtr = 0
|
|
76
|
+
private frameCapacity = 0
|
|
77
|
+
private resultPtr = 0
|
|
78
|
+
|
|
79
|
+
private constructor(module: AudioFeaturesWasmModule) {
|
|
80
|
+
this.module = module
|
|
81
|
+
}
|
|
114
82
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
83
|
+
/**
|
|
84
|
+
* Initialise a new streaming session. Loads the WASM module if needed.
|
|
85
|
+
*/
|
|
86
|
+
static async create(
|
|
87
|
+
sampleRate: number,
|
|
88
|
+
fftLength = 1024,
|
|
89
|
+
nMfcc = 13,
|
|
90
|
+
nMelFilters = 26,
|
|
91
|
+
computeMfcc = true,
|
|
92
|
+
computeChroma = true
|
|
93
|
+
): Promise<AudioFeaturesStreamingSession> {
|
|
94
|
+
const Module = await getWasmModule()
|
|
95
|
+
const session = new AudioFeaturesStreamingSession(Module)
|
|
96
|
+
|
|
97
|
+
Module._audio_features_init(
|
|
98
|
+
sampleRate,
|
|
99
|
+
fftLength,
|
|
100
|
+
nMfcc,
|
|
101
|
+
nMelFilters,
|
|
102
|
+
computeMfcc ? 1 : 0,
|
|
103
|
+
computeChroma ? 1 : 0
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
// Pre-allocate result struct on WASM heap
|
|
107
|
+
session.resultPtr = Module._malloc(STRUCT_SIZE)
|
|
108
|
+
// Zero-initialize to prevent freeing garbage pointers on first use
|
|
109
|
+
Module.HEAPU8.fill(
|
|
110
|
+
0,
|
|
111
|
+
session.resultPtr,
|
|
112
|
+
session.resultPtr + STRUCT_SIZE
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return session
|
|
116
|
+
}
|
|
120
117
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
118
|
+
/**
|
|
119
|
+
* Compute audio features for a single frame.
|
|
120
|
+
* Returns null on error or if the session has been disposed.
|
|
121
|
+
*/
|
|
122
|
+
computeFrame(samples: Float32Array): AudioFeaturesWasmResult | null {
|
|
123
|
+
if (!this.resultPtr) return null
|
|
124
|
+
const Module = this.module
|
|
125
|
+
|
|
126
|
+
// (Re-)allocate frame input buffer if needed
|
|
127
|
+
if (samples.length > this.frameCapacity) {
|
|
128
|
+
if (this.framePtr) Module._free(this.framePtr)
|
|
129
|
+
this.framePtr = Module._malloc(samples.length * 4)
|
|
130
|
+
this.frameCapacity = samples.length
|
|
131
|
+
}
|
|
125
132
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
* Returns null if not initialised or on error.
|
|
129
|
-
*/
|
|
130
|
-
export function computeAudioFeaturesFrameWasm(
|
|
131
|
-
samples: Float32Array
|
|
132
|
-
): AudioFeaturesWasmResult | null {
|
|
133
|
-
if (!streamingModule || !streamingResultPtr) return null
|
|
134
|
-
const Module = streamingModule
|
|
135
|
-
|
|
136
|
-
// (Re-)allocate frame input buffer if needed
|
|
137
|
-
if (samples.length > streamingFrameCapacity) {
|
|
138
|
-
if (streamingFramePtr) Module._free(streamingFramePtr)
|
|
139
|
-
streamingFramePtr = Module._malloc(samples.length * 4)
|
|
140
|
-
streamingFrameCapacity = samples.length
|
|
141
|
-
}
|
|
133
|
+
// Copy samples to WASM heap
|
|
134
|
+
Module.HEAPF32.set(samples, this.framePtr >> 2)
|
|
142
135
|
|
|
143
|
-
|
|
144
|
-
|
|
136
|
+
const ok = Module._audio_features_compute_frame(
|
|
137
|
+
this.framePtr,
|
|
138
|
+
samples.length,
|
|
139
|
+
this.resultPtr
|
|
140
|
+
)
|
|
141
|
+
if (!ok) return null
|
|
145
142
|
|
|
146
|
-
|
|
147
|
-
streamingFramePtr,
|
|
148
|
-
samples.length,
|
|
149
|
-
streamingResultPtr
|
|
150
|
-
)
|
|
151
|
-
if (!ok) return null
|
|
143
|
+
const result = readResult(Module, this.resultPtr)
|
|
152
144
|
|
|
153
|
-
|
|
145
|
+
// Free internal arrays (mfcc, chromagram) allocated by C
|
|
146
|
+
Module._audio_features_free_arrays(this.resultPtr)
|
|
154
147
|
|
|
155
|
-
|
|
156
|
-
|
|
148
|
+
return result
|
|
149
|
+
}
|
|
157
150
|
|
|
158
|
-
|
|
151
|
+
/**
|
|
152
|
+
* Free all WASM heap allocations owned by this session.
|
|
153
|
+
* The session must not be used after calling dispose().
|
|
154
|
+
*/
|
|
155
|
+
dispose(): void {
|
|
156
|
+
const Module = this.module
|
|
157
|
+
if (this.framePtr) {
|
|
158
|
+
Module._free(this.framePtr)
|
|
159
|
+
this.framePtr = 0
|
|
160
|
+
this.frameCapacity = 0
|
|
161
|
+
}
|
|
162
|
+
if (this.resultPtr) {
|
|
163
|
+
Module._free(this.resultPtr)
|
|
164
|
+
this.resultPtr = 0
|
|
165
|
+
}
|
|
166
|
+
}
|
|
159
167
|
}
|
|
160
168
|
|
|
161
169
|
// --- Batch API ---
|
|
@@ -173,7 +181,7 @@ export async function computeAudioFeaturesWasm(
|
|
|
173
181
|
computeMfcc = true,
|
|
174
182
|
computeChroma = true
|
|
175
183
|
): Promise<AudioFeaturesWasmResult> {
|
|
176
|
-
const Module = await
|
|
184
|
+
const Module = await getWasmModule()
|
|
177
185
|
|
|
178
186
|
const numSamples = audioData.length
|
|
179
187
|
const inputPtr = Module._malloc(numSamples * 4)
|
|
@@ -22,6 +22,14 @@ import { getWavFileInfo, WavFileInfo } from '../utils/getWavFileInfo'
|
|
|
22
22
|
import { InlineFeaturesExtractor } from '../workers/InlineFeaturesExtractor.web'
|
|
23
23
|
import { wasmGlueJs } from '../workers/wasmGlueString.web'
|
|
24
24
|
|
|
25
|
+
function createAnalysisWorker(): { worker: Worker; workerUrl: string } {
|
|
26
|
+
const blob = new Blob([wasmGlueJs, '\n', InlineFeaturesExtractor], {
|
|
27
|
+
type: 'application/javascript',
|
|
28
|
+
})
|
|
29
|
+
const workerUrl = URL.createObjectURL(blob)
|
|
30
|
+
return { worker: new Worker(workerUrl), workerUrl }
|
|
31
|
+
}
|
|
32
|
+
|
|
25
33
|
function calculateCRC32ForDataPoint(data: Float32Array): number {
|
|
26
34
|
// Convert float array to byte array for CRC32
|
|
27
35
|
const byteArray = new Uint8Array(data.length * 4)
|
|
@@ -137,16 +145,13 @@ export async function extractAudioAnalysis(
|
|
|
137
145
|
const channelData = processedBuffer.buffer.getChannelData(0)
|
|
138
146
|
|
|
139
147
|
// Create worker blob: WASM glue (defines createMelSpectrogramModule) + worker code
|
|
140
|
-
const
|
|
141
|
-
[wasmGlueJs, '\n', InlineFeaturesExtractor],
|
|
142
|
-
{ type: 'application/javascript' }
|
|
143
|
-
)
|
|
144
|
-
const workerUrl = URL.createObjectURL(blob)
|
|
145
|
-
const worker = new Worker(workerUrl)
|
|
148
|
+
const { worker, workerUrl } = createAnalysisWorker()
|
|
146
149
|
|
|
147
150
|
return new Promise((resolve, reject) => {
|
|
148
151
|
worker.onmessage = (event) => {
|
|
149
152
|
if (event.data.error) {
|
|
153
|
+
URL.revokeObjectURL(workerUrl)
|
|
154
|
+
worker.terminate()
|
|
150
155
|
reject(new Error(event.data.error))
|
|
151
156
|
return
|
|
152
157
|
}
|
|
@@ -300,20 +305,16 @@ export const extractRawWavAnalysis = async ({
|
|
|
300
305
|
const constrainedChannelData = channelData.slice(startIndex, endIndex)
|
|
301
306
|
|
|
302
307
|
return new Promise((resolve, reject) => {
|
|
303
|
-
const
|
|
304
|
-
type: 'application/javascript',
|
|
305
|
-
})
|
|
306
|
-
const url = URL.createObjectURL(blob)
|
|
307
|
-
const worker = new Worker(url)
|
|
308
|
+
const { worker, workerUrl } = createAnalysisWorker()
|
|
308
309
|
|
|
309
310
|
worker.onmessage = (event) => {
|
|
310
|
-
URL.revokeObjectURL(
|
|
311
|
+
URL.revokeObjectURL(workerUrl)
|
|
311
312
|
worker.terminate()
|
|
312
313
|
resolve(event.data.result)
|
|
313
314
|
}
|
|
314
315
|
|
|
315
316
|
worker.onerror = (error) => {
|
|
316
|
-
URL.revokeObjectURL(
|
|
317
|
+
URL.revokeObjectURL(workerUrl)
|
|
317
318
|
worker.terminate()
|
|
318
319
|
reject(error)
|
|
319
320
|
}
|
|
@@ -337,13 +338,15 @@ export const extractRawWavAnalysis = async ({
|
|
|
337
338
|
fileUri,
|
|
338
339
|
segmentDurationMs,
|
|
339
340
|
})
|
|
340
|
-
const res = await AudioStudioModule.extractAudioAnalysis(
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
341
|
+
const res = await AudioStudioModule.extractAudioAnalysis(
|
|
342
|
+
cleanNativeOptions({
|
|
343
|
+
fileUri,
|
|
344
|
+
segmentDurationMs,
|
|
345
|
+
features,
|
|
346
|
+
position,
|
|
347
|
+
length,
|
|
348
|
+
})
|
|
349
|
+
)
|
|
347
350
|
logger?.log(`extractAudioAnalysis`, res)
|
|
348
351
|
return res
|
|
349
352
|
}
|
|
@@ -1,13 +1,195 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
BitDepth,
|
|
3
|
+
ExtractAudioDataOptions,
|
|
4
|
+
ExtractedAudioData,
|
|
5
|
+
} from '../AudioStudio.types'
|
|
2
6
|
import AudioStudioModule from '../AudioStudioModule'
|
|
3
7
|
import { isWeb } from '../constants'
|
|
8
|
+
import { processAudioBuffer } from '../utils/audioProcessing'
|
|
4
9
|
import { cleanNativeOptions } from '../utils/cleanNativeOptions'
|
|
10
|
+
import crc32 from '../utils/crc32'
|
|
11
|
+
import { writeWavHeader } from '../utils/writeWavHeader'
|
|
5
12
|
|
|
6
|
-
export const extractAudioData = async (
|
|
13
|
+
export const extractAudioData = async (
|
|
14
|
+
props: ExtractAudioDataOptions
|
|
15
|
+
): Promise<ExtractedAudioData> => {
|
|
7
16
|
if (isWeb) {
|
|
8
|
-
|
|
9
|
-
|
|
17
|
+
try {
|
|
18
|
+
const {
|
|
19
|
+
fileUri,
|
|
20
|
+
position,
|
|
21
|
+
length,
|
|
22
|
+
startTimeMs,
|
|
23
|
+
endTimeMs,
|
|
24
|
+
decodingOptions,
|
|
25
|
+
includeNormalizedData,
|
|
26
|
+
includeBase64Data,
|
|
27
|
+
includeWavHeader = false,
|
|
28
|
+
logger,
|
|
29
|
+
} = props
|
|
30
|
+
|
|
31
|
+
logger?.debug('EXTRACT AUDIO - Step 1: Initial request', {
|
|
32
|
+
fileUri,
|
|
33
|
+
extractionParams: {
|
|
34
|
+
position,
|
|
35
|
+
length,
|
|
36
|
+
startTimeMs,
|
|
37
|
+
endTimeMs,
|
|
38
|
+
},
|
|
39
|
+
decodingOptions: {
|
|
40
|
+
targetSampleRate:
|
|
41
|
+
decodingOptions?.targetSampleRate ?? 16000,
|
|
42
|
+
targetChannels: decodingOptions?.targetChannels ?? 1,
|
|
43
|
+
targetBitDepth: decodingOptions?.targetBitDepth ?? 16,
|
|
44
|
+
normalizeAudio: decodingOptions?.normalizeAudio ?? false,
|
|
45
|
+
},
|
|
46
|
+
outputOptions: {
|
|
47
|
+
includeNormalizedData,
|
|
48
|
+
includeBase64Data,
|
|
49
|
+
includeWavHeader,
|
|
50
|
+
},
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
// Process the audio using shared helper function
|
|
54
|
+
const processedBuffer = await processAudioBuffer({
|
|
55
|
+
fileUri,
|
|
56
|
+
targetSampleRate: decodingOptions?.targetSampleRate ?? 16000,
|
|
57
|
+
targetChannels: decodingOptions?.targetChannels ?? 1,
|
|
58
|
+
normalizeAudio: decodingOptions?.normalizeAudio ?? false,
|
|
59
|
+
position,
|
|
60
|
+
length,
|
|
61
|
+
startTimeMs,
|
|
62
|
+
endTimeMs,
|
|
63
|
+
logger,
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
logger?.debug('EXTRACT AUDIO - Step 2: Audio processing complete', {
|
|
67
|
+
processedData: {
|
|
68
|
+
samples: processedBuffer.samples,
|
|
69
|
+
sampleRate: processedBuffer.sampleRate,
|
|
70
|
+
channels: processedBuffer.channels,
|
|
71
|
+
durationMs: processedBuffer.durationMs,
|
|
72
|
+
},
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
const channelData = processedBuffer.channelData
|
|
76
|
+
const bitDepth = (decodingOptions?.targetBitDepth ?? 16) as BitDepth
|
|
77
|
+
const bytesPerSample = bitDepth / 8
|
|
78
|
+
const numSamples = processedBuffer.samples
|
|
79
|
+
|
|
80
|
+
logger?.debug('EXTRACT AUDIO - Step 3: PCM conversion setup', {
|
|
81
|
+
channelData: {
|
|
82
|
+
length: channelData.length,
|
|
83
|
+
first: channelData[0],
|
|
84
|
+
last: channelData[channelData.length - 1],
|
|
85
|
+
},
|
|
86
|
+
calculation: {
|
|
87
|
+
bitDepth,
|
|
88
|
+
bytesPerSample,
|
|
89
|
+
numSamples,
|
|
90
|
+
expectedBytes: numSamples * bytesPerSample,
|
|
91
|
+
},
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
// Create PCM data with correct length based on original byte length
|
|
95
|
+
const pcmData = new Uint8Array(numSamples * bytesPerSample)
|
|
96
|
+
let offset = 0
|
|
97
|
+
|
|
98
|
+
// Convert Float32 samples to PCM format
|
|
99
|
+
for (let i = 0; i < numSamples; i++) {
|
|
100
|
+
const sample = channelData[i]
|
|
101
|
+
const value = Math.max(-1, Math.min(1, sample))
|
|
102
|
+
// Convert to 16-bit signed integer
|
|
103
|
+
let intValue = Math.round(value * 32767)
|
|
104
|
+
|
|
105
|
+
// Handle negative values correctly
|
|
106
|
+
if (intValue < 0) {
|
|
107
|
+
intValue = 65536 + intValue
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Write as little-endian
|
|
111
|
+
pcmData[offset++] = intValue & 255 // Low byte
|
|
112
|
+
pcmData[offset++] = (intValue >> 8) & 255 // High byte
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const durationMs = Math.round(
|
|
116
|
+
(numSamples / processedBuffer.sampleRate) * 1000
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
logger?.debug('EXTRACT AUDIO - Step 4: Final output', {
|
|
120
|
+
pcmData: {
|
|
121
|
+
length: pcmData.length,
|
|
122
|
+
first: pcmData[0],
|
|
123
|
+
last: pcmData[pcmData.length - 1],
|
|
124
|
+
},
|
|
125
|
+
timing: {
|
|
126
|
+
numSamples,
|
|
127
|
+
sampleRate: processedBuffer.sampleRate,
|
|
128
|
+
durationMs,
|
|
129
|
+
shouldBe3000ms: endTimeMs
|
|
130
|
+
? endTimeMs - (startTimeMs ?? 0) === 3000
|
|
131
|
+
: undefined,
|
|
132
|
+
},
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
const result: ExtractedAudioData = {
|
|
136
|
+
pcmData: new Uint8Array(pcmData.buffer),
|
|
137
|
+
sampleRate: processedBuffer.sampleRate,
|
|
138
|
+
channels: processedBuffer.channels,
|
|
139
|
+
bitDepth,
|
|
140
|
+
durationMs,
|
|
141
|
+
format: `pcm_${bitDepth}bit` as const,
|
|
142
|
+
samples: numSamples,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Add WAV header if requested
|
|
146
|
+
if (includeWavHeader) {
|
|
147
|
+
logger?.debug('EXTRACT AUDIO - Step 5: Adding WAV header', {
|
|
148
|
+
originalLength: pcmData.length,
|
|
149
|
+
newLength: result.pcmData.length,
|
|
150
|
+
firstBytes: Array.from(result.pcmData.slice(0, 44)), // WAV header is 44 bytes
|
|
151
|
+
})
|
|
152
|
+
const wavBuffer = writeWavHeader({
|
|
153
|
+
buffer: pcmData.buffer.slice(0, pcmData.length),
|
|
154
|
+
sampleRate: processedBuffer.sampleRate,
|
|
155
|
+
numChannels: processedBuffer.channels,
|
|
156
|
+
bitDepth,
|
|
157
|
+
})
|
|
158
|
+
result.pcmData = new Uint8Array(wavBuffer)
|
|
159
|
+
result.hasWavHeader = true
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (includeNormalizedData) {
|
|
163
|
+
result.normalizedData = channelData
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (includeBase64Data) {
|
|
167
|
+
result.base64Data = btoa(
|
|
168
|
+
String.fromCharCode(...new Uint8Array(pcmData.buffer))
|
|
169
|
+
)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (props.computeChecksum) {
|
|
173
|
+
result.checksum = crc32.buf(pcmData)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
logger?.debug('EXTRACT AUDIO - Step 3: PCM conversion complete', {
|
|
177
|
+
pcmStats: {
|
|
178
|
+
length: pcmData.length,
|
|
179
|
+
bytesPerSample,
|
|
180
|
+
totalSamples: numSamples,
|
|
181
|
+
firstBytes: Array.from(pcmData.slice(0, 16)),
|
|
182
|
+
lastBytes: Array.from(pcmData.slice(-16)),
|
|
183
|
+
},
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
return result
|
|
187
|
+
} catch (error) {
|
|
188
|
+
props.logger?.error('EXTRACT AUDIO - Error:', error)
|
|
189
|
+
throw error
|
|
190
|
+
}
|
|
10
191
|
}
|
|
192
|
+
|
|
11
193
|
// Native: only pass serializable fields — logger causes crash on Android
|
|
12
194
|
const { logger: _logger, ...nativeOptions } = props
|
|
13
195
|
// Clean undefined values to avoid Android Kotlin bridge crash
|
|
@@ -1,29 +1,5 @@
|
|
|
1
1
|
import type { MelSpectrogramWasmModule } from './mel-spectrogram-wasm'
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
let modulePromise: Promise<MelSpectrogramWasmModule> | null = null
|
|
5
|
-
|
|
6
|
-
_registerModuleReset(() => {
|
|
7
|
-
modulePromise = null
|
|
8
|
-
})
|
|
9
|
-
|
|
10
|
-
function getModule(): Promise<MelSpectrogramWasmModule> {
|
|
11
|
-
if (!modulePromise) {
|
|
12
|
-
modulePromise = (async () => {
|
|
13
|
-
const url = getMelSpectrogramWasmUrl()
|
|
14
|
-
// webpackIgnore + @vite-ignore prevent bundlers from trying to resolve the URL
|
|
15
|
-
const mod = await import(
|
|
16
|
-
/* webpackIgnore: true */ /* @vite-ignore */ url
|
|
17
|
-
)
|
|
18
|
-
const factory = mod.default ?? mod
|
|
19
|
-
return factory() as Promise<MelSpectrogramWasmModule>
|
|
20
|
-
})().catch((err) => {
|
|
21
|
-
modulePromise = null
|
|
22
|
-
throw err
|
|
23
|
-
})
|
|
24
|
-
}
|
|
25
|
-
return modulePromise
|
|
26
|
-
}
|
|
2
|
+
import { getWasmModule } from './wasmLoader.web'
|
|
27
3
|
|
|
28
4
|
// --- Streaming (per-frame) API for live mel spectrogram ---
|
|
29
5
|
|
|
@@ -46,7 +22,7 @@ export async function initMelStreamingWasm(
|
|
|
46
22
|
fMin = 0,
|
|
47
23
|
fMax = 0
|
|
48
24
|
): Promise<void> {
|
|
49
|
-
const Module = await
|
|
25
|
+
const Module = (await getWasmModule()) as MelSpectrogramWasmModule
|
|
50
26
|
streamingModule = Module
|
|
51
27
|
const actualFMax = fMax > 0 ? fMax : sampleRate / 2
|
|
52
28
|
Module._mel_spectrogram_init(
|
|
@@ -61,6 +37,13 @@ export async function initMelStreamingWasm(
|
|
|
61
37
|
)
|
|
62
38
|
streamingNMels = nMels
|
|
63
39
|
|
|
40
|
+
// Free frame buffer from previous session (if any) to avoid leak on re-init
|
|
41
|
+
if (streamingFramePtr) {
|
|
42
|
+
Module._free(streamingFramePtr)
|
|
43
|
+
streamingFramePtr = 0
|
|
44
|
+
streamingFrameCapacity = 0
|
|
45
|
+
}
|
|
46
|
+
|
|
64
47
|
// Pre-allocate output buffer (fixed size)
|
|
65
48
|
if (streamingMelPtr) Module._free(streamingMelPtr)
|
|
66
49
|
streamingMelPtr = Module._malloc(nMels * 4)
|
|
@@ -120,7 +103,7 @@ export async function computeMelSpectrogramWasm(
|
|
|
120
103
|
normalize: boolean,
|
|
121
104
|
logScale: boolean
|
|
122
105
|
): Promise<number[][]> {
|
|
123
|
-
const Module = await
|
|
106
|
+
const Module = (await getWasmModule()) as MelSpectrogramWasmModule
|
|
124
107
|
|
|
125
108
|
const fftLength = 2048
|
|
126
109
|
const windowTypeInt = windowType === 'hamming' ? 1 : 0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Version is inlined here — keep in sync with package.json when releasing.
|
|
2
2
|
// The publish.sh script should bump this string alongside package.json.
|
|
3
|
-
const WASM_VERSION = '3.0.2
|
|
3
|
+
const WASM_VERSION = '3.0.2'
|
|
4
4
|
// jsDelivr syncs from npm automatically within ~5 min of publish.
|
|
5
5
|
// GitHub release fallback (attach mel-spectrogram.js as a release asset):
|
|
6
6
|
// https://github.com/deeeed/audiolab/releases/download/@siteed/audio-studio@VERSION/mel-spectrogram.js
|
|
@@ -8,15 +8,15 @@ const WASM_VERSION = '3.0.2-beta.1'
|
|
|
8
8
|
const DEFAULT_WASM_CDN = `https://cdn.jsdelivr.net/npm/@siteed/audio-studio@${WASM_VERSION}/prebuilt/wasm/mel-spectrogram.js`
|
|
9
9
|
|
|
10
10
|
let _wasmUrl: string = DEFAULT_WASM_CDN
|
|
11
|
-
|
|
11
|
+
const _resetListeners: (() => void)[] = []
|
|
12
12
|
|
|
13
13
|
export function _registerModuleReset(fn: () => void): void {
|
|
14
|
-
|
|
14
|
+
_resetListeners.push(fn)
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
export function setMelSpectrogramWasmUrl(url: string): void {
|
|
18
18
|
_wasmUrl = url
|
|
19
|
-
|
|
19
|
+
_resetListeners.forEach((fn) => fn())
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
export function getMelSpectrogramWasmUrl(): string {
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { AudioFeaturesWasmModule } from './audio-features-wasm'
|
|
2
|
+
import { getMelSpectrogramWasmUrl, _registerModuleReset } from './wasmConfig'
|
|
3
|
+
|
|
4
|
+
// Global factory name for the shared WASM binary. Despite the name referring to
|
|
5
|
+
// mel spectrogram, this single binary also exports all audio-features functions.
|
|
6
|
+
const WASM_GLOBAL_NAME = 'createMelSpectrogramModule'
|
|
7
|
+
let modulePromise: Promise<AudioFeaturesWasmModule> | null = null
|
|
8
|
+
|
|
9
|
+
_registerModuleReset(() => {
|
|
10
|
+
modulePromise = null
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
function loadScriptTag(url: string): Promise<void> {
|
|
14
|
+
return new Promise((resolve, reject) => {
|
|
15
|
+
const script = document.createElement('script')
|
|
16
|
+
script.src = url
|
|
17
|
+
script.onload = () => resolve()
|
|
18
|
+
script.onerror = () =>
|
|
19
|
+
reject(new Error(`Failed to load script: ${url}`))
|
|
20
|
+
document.head.appendChild(script)
|
|
21
|
+
})
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function getWasmModule(): Promise<AudioFeaturesWasmModule> {
|
|
25
|
+
if (!modulePromise) {
|
|
26
|
+
modulePromise = (async () => {
|
|
27
|
+
const url = getMelSpectrogramWasmUrl()
|
|
28
|
+
// Try ESM import first; fall back to <script> tag for UMD modules
|
|
29
|
+
const mod = await import(
|
|
30
|
+
/* webpackIgnore: true */ /* @vite-ignore */ url
|
|
31
|
+
)
|
|
32
|
+
let factory: unknown = mod.default ?? mod
|
|
33
|
+
if (typeof factory !== 'function') {
|
|
34
|
+
// UMD fallback: load via <script> tag so the top-level `var` becomes a global and
|
|
35
|
+
// document.currentScript.src is set (Emscripten uses it to locate the .wasm binary).
|
|
36
|
+
await loadScriptTag(url)
|
|
37
|
+
factory = (globalThis as Record<string, unknown>)[
|
|
38
|
+
WASM_GLOBAL_NAME
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
if (typeof factory !== 'function') {
|
|
42
|
+
throw new TypeError(
|
|
43
|
+
`WASM factory '${WASM_GLOBAL_NAME}' not found after loading ${url}`
|
|
44
|
+
)
|
|
45
|
+
}
|
|
46
|
+
return (factory as () => Promise<AudioFeaturesWasmModule>)()
|
|
47
|
+
})().catch((err) => {
|
|
48
|
+
modulePromise = null
|
|
49
|
+
throw err
|
|
50
|
+
})
|
|
51
|
+
}
|
|
52
|
+
return modulePromise
|
|
53
|
+
}
|