@octoseq/mir 0.1.0-main.0d2814e
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-DUWYCAVG.js +1525 -0
- package/dist/chunk-DUWYCAVG.js.map +1 -0
- package/dist/index.d.ts +450 -0
- package/dist/index.js +1234 -0
- package/dist/index.js.map +1 -0
- package/dist/runMir-CSIBwNZ3.d.ts +84 -0
- package/dist/runner/runMir.d.ts +2 -0
- package/dist/runner/runMir.js +3 -0
- package/dist/runner/runMir.js.map +1 -0
- package/dist/runner/workerProtocol.d.ts +169 -0
- package/dist/runner/workerProtocol.js +11 -0
- package/dist/runner/workerProtocol.js.map +1 -0
- package/dist/types-BE3py4fZ.d.ts +83 -0
- package/package.json +55 -0
- package/src/dsp/fft.ts +22 -0
- package/src/dsp/fftBackend.ts +53 -0
- package/src/dsp/fftBackendFftjs.ts +60 -0
- package/src/dsp/hpss.ts +152 -0
- package/src/dsp/hpssGpu.ts +101 -0
- package/src/dsp/mel.ts +219 -0
- package/src/dsp/mfcc.ts +119 -0
- package/src/dsp/onset.ts +205 -0
- package/src/dsp/peakPick.ts +112 -0
- package/src/dsp/spectral.ts +95 -0
- package/src/dsp/spectrogram.ts +176 -0
- package/src/gpu/README.md +34 -0
- package/src/gpu/context.ts +44 -0
- package/src/gpu/helpers.ts +87 -0
- package/src/gpu/hpssMasks.ts +116 -0
- package/src/gpu/kernels/hpssMasks.wgsl.ts +137 -0
- package/src/gpu/kernels/melProject.wgsl.ts +48 -0
- package/src/gpu/kernels/onsetEnvelope.wgsl.ts +56 -0
- package/src/gpu/melProject.ts +98 -0
- package/src/gpu/onsetEnvelope.ts +81 -0
- package/src/gpu/webgpu.d.ts +176 -0
- package/src/index.ts +121 -0
- package/src/runner/runMir.ts +431 -0
- package/src/runner/workerProtocol.ts +189 -0
- package/src/search/featureVectorV1.ts +123 -0
- package/src/search/fingerprintV1.ts +230 -0
- package/src/search/refinedModelV1.ts +321 -0
- package/src/search/searchTrackV1.ts +206 -0
- package/src/search/searchTrackV1Guided.ts +863 -0
- package/src/search/similarity.ts +98 -0
- package/src/types.ts +105 -0
- package/src/util/display.ts +80 -0
- package/src/util/normalise.ts +58 -0
- package/src/util/stats.ts +25 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
import { melSpectrogram, type MelConfig, type MelSpectrogram } from "../dsp/mel";
|
|
2
|
+
import { mfcc, delta, deltaDelta } from "../dsp/mfcc";
|
|
3
|
+
import { onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu } from "../dsp/onset";
|
|
4
|
+
import { peakPick } from "../dsp/peakPick";
|
|
5
|
+
import { hpss } from "../dsp/hpss";
|
|
6
|
+
import { hpssGpu } from "../dsp/hpssGpu";
|
|
7
|
+
import { spectralCentroid, spectralFlux } from "../dsp/spectral";
|
|
8
|
+
import { spectrogram, type AudioBufferLike, type Spectrogram, type SpectrogramConfig } from "../dsp/spectrogram";
|
|
9
|
+
import type { MirGPU } from "../gpu/context";
|
|
10
|
+
import type { MirAudioPayload, MirBackend, MirResult, MirRunRequest } from "../types";
|
|
11
|
+
|
|
12
|
+
function nowMs(): number {
|
|
13
|
+
return typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function asAudioBufferLike(audio: MirAudioPayload): AudioBufferLike {
|
|
17
|
+
return {
|
|
18
|
+
sampleRate: audio.sampleRate,
|
|
19
|
+
numberOfChannels: 1,
|
|
20
|
+
getChannelData: () => audio.mono,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type RunMirOptions = {
|
|
25
|
+
gpu?: MirGPU;
|
|
26
|
+
/** If provided, long loops should periodically call this and abort if true. */
|
|
27
|
+
isCancelled?: () => boolean;
|
|
28
|
+
/** If true and backend==='gpu', do not silently fall back to CPU on GPU errors. */
|
|
29
|
+
strictGpu?: boolean;
|
|
30
|
+
|
|
31
|
+
// v0.1 feature-specific options (kept minimal; UI provides basic controls)
|
|
32
|
+
onset?: {
|
|
33
|
+
smoothMs?: number;
|
|
34
|
+
diffMethod?: "rectified" | "abs";
|
|
35
|
+
useLog?: boolean;
|
|
36
|
+
};
|
|
37
|
+
peakPick?: {
|
|
38
|
+
minIntervalSec?: number;
|
|
39
|
+
threshold?: number;
|
|
40
|
+
adaptiveFactor?: number;
|
|
41
|
+
};
|
|
42
|
+
hpss?: {
|
|
43
|
+
timeMedian?: number;
|
|
44
|
+
freqMedian?: number;
|
|
45
|
+
spectrogram?: SpectrogramConfig;
|
|
46
|
+
};
|
|
47
|
+
mfcc?: {
|
|
48
|
+
nCoeffs?: number;
|
|
49
|
+
spectrogram?: SpectrogramConfig;
|
|
50
|
+
};
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// Backwards-compat export alias (some earlier tasks referenced this name).
|
|
54
|
+
export type RunMirBackendOptions = RunMirOptions;
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Shared MIR execution entrypoint used by the main thread and by the worker.
|
|
58
|
+
*
|
|
59
|
+
* Notes:
|
|
60
|
+
* - We keep FFT/STFT on CPU for now (spectrogram()), but allow one downstream stage
|
|
61
|
+
* (mel projection) to run on real WebGPU via `melSpectrogram(spec, config, gpu)`.
|
|
62
|
+
*/
|
|
63
|
+
export async function runMir(
|
|
64
|
+
audio: MirAudioPayload,
|
|
65
|
+
request: MirRunRequest,
|
|
66
|
+
options: RunMirOptions = {}
|
|
67
|
+
): Promise<MirResult> {
|
|
68
|
+
// Allow callers to pass per-run config via the request (needed for worker runs).
|
|
69
|
+
// If both are provided, `options.*` wins.
|
|
70
|
+
options = {
|
|
71
|
+
...options,
|
|
72
|
+
onset: { ...request.onset, ...options.onset },
|
|
73
|
+
peakPick: { ...request.peakPick, ...options.peakPick },
|
|
74
|
+
hpss: { ...request.hpss, ...options.hpss },
|
|
75
|
+
mfcc: { ...request.mfcc, ...options.mfcc },
|
|
76
|
+
};
|
|
77
|
+
const t0 = nowMs();
|
|
78
|
+
|
|
79
|
+
const backend: MirBackend = request.backend ?? "cpu";
|
|
80
|
+
|
|
81
|
+
const specConfig: SpectrogramConfig = request.spectrogram ?? {
|
|
82
|
+
fftSize: 2048,
|
|
83
|
+
hopSize: 512,
|
|
84
|
+
window: "hann",
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// CPU: spectrogram + centroid/flux are CPU-only today.
|
|
88
|
+
const cpuStart = nowMs();
|
|
89
|
+
const spec: Spectrogram = await spectrogram(asAudioBufferLike(audio), specConfig, undefined, {
|
|
90
|
+
isCancelled: options.isCancelled,
|
|
91
|
+
});
|
|
92
|
+
const cpuAfterSpec = nowMs();
|
|
93
|
+
|
|
94
|
+
if (options.isCancelled?.()) {
|
|
95
|
+
throw new Error("@octoseq/mir: cancelled");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (request.fn === "spectralCentroid") {
|
|
99
|
+
const values = spectralCentroid(spec);
|
|
100
|
+
const cpuEnd = nowMs();
|
|
101
|
+
return {
|
|
102
|
+
kind: "1d",
|
|
103
|
+
times: spec.times,
|
|
104
|
+
values,
|
|
105
|
+
meta: {
|
|
106
|
+
backend: "cpu",
|
|
107
|
+
usedGpu: false,
|
|
108
|
+
timings: {
|
|
109
|
+
totalMs: cpuEnd - t0,
|
|
110
|
+
cpuMs: cpuEnd - cpuStart,
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (request.fn === "spectralFlux") {
|
|
117
|
+
const values = spectralFlux(spec);
|
|
118
|
+
const cpuEnd = nowMs();
|
|
119
|
+
return {
|
|
120
|
+
kind: "1d",
|
|
121
|
+
times: spec.times,
|
|
122
|
+
values,
|
|
123
|
+
meta: {
|
|
124
|
+
backend: "cpu",
|
|
125
|
+
usedGpu: false,
|
|
126
|
+
timings: {
|
|
127
|
+
totalMs: cpuEnd - t0,
|
|
128
|
+
cpuMs: cpuEnd - cpuStart,
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// melSpectrogram
|
|
135
|
+
const melConfig: MelConfig = request.mel ?? { nMels: 64 };
|
|
136
|
+
|
|
137
|
+
// Helper: compute mel (possibly GPU-accelerated projection).
|
|
138
|
+
const computeMel = async (useGpu: boolean): Promise<{ mel: MelSpectrogram; usedGpu: boolean; gpuMs?: number; cpuExtraMs: number }> => {
|
|
139
|
+
const melCpuStart = nowMs();
|
|
140
|
+
|
|
141
|
+
if (useGpu) {
|
|
142
|
+
if (!options.gpu) {
|
|
143
|
+
throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const gpuStart = nowMs();
|
|
147
|
+
try {
|
|
148
|
+
const mel = await melSpectrogram(spec, melConfig, options.gpu);
|
|
149
|
+
const gpuEnd = nowMs();
|
|
150
|
+
const gpuKernelMs = mel.gpuTimings?.gpuSubmitToReadbackMs;
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
mel,
|
|
154
|
+
usedGpu: true,
|
|
155
|
+
gpuMs: gpuKernelMs ?? gpuEnd - gpuStart,
|
|
156
|
+
cpuExtraMs: nowMs() - melCpuStart - (gpuEnd - gpuStart),
|
|
157
|
+
};
|
|
158
|
+
} catch (e) {
|
|
159
|
+
if (options.strictGpu) throw e;
|
|
160
|
+
// fall back to CPU
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const mel = await melSpectrogram(spec, melConfig, undefined);
|
|
165
|
+
const melCpuEnd = nowMs();
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
mel,
|
|
169
|
+
usedGpu: false,
|
|
170
|
+
cpuExtraMs: melCpuEnd - melCpuStart,
|
|
171
|
+
};
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
// Branch by fn.
|
|
175
|
+
if (request.fn === "melSpectrogram") {
|
|
176
|
+
const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
|
|
177
|
+
const end = nowMs();
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
kind: "2d",
|
|
181
|
+
times: mel.times,
|
|
182
|
+
data: mel.melBands,
|
|
183
|
+
meta: {
|
|
184
|
+
backend: usedGpu ? "gpu" : "cpu",
|
|
185
|
+
usedGpu,
|
|
186
|
+
timings: {
|
|
187
|
+
totalMs: end - t0,
|
|
188
|
+
cpuMs: cpuAfterSpec - cpuStart + cpuExtraMs,
|
|
189
|
+
gpuMs,
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (request.fn === "onsetEnvelope") {
|
|
196
|
+
// For this milestone we compute onset from mel by default.
|
|
197
|
+
// GPU path: diff+reduction kernel on melFlat.
|
|
198
|
+
if (backend === "gpu") {
|
|
199
|
+
if (!options.gpu) throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
|
|
200
|
+
|
|
201
|
+
const { mel, usedGpu: usedGpuForMel, gpuMs: melGpuMs, cpuExtraMs: melCpuMs } = await computeMel(true);
|
|
202
|
+
|
|
203
|
+
const onsetStart = nowMs();
|
|
204
|
+
try {
|
|
205
|
+
const onsetGpu = await onsetEnvelopeFromMelGpu(mel, options.gpu, {
|
|
206
|
+
diffMethod: options.onset?.diffMethod,
|
|
207
|
+
});
|
|
208
|
+
const end = nowMs();
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
kind: "1d",
|
|
212
|
+
times: onsetGpu.times,
|
|
213
|
+
values: onsetGpu.values,
|
|
214
|
+
meta: {
|
|
215
|
+
backend: "gpu",
|
|
216
|
+
usedGpu: true,
|
|
217
|
+
timings: {
|
|
218
|
+
totalMs: end - t0,
|
|
219
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs,
|
|
220
|
+
gpuMs: (melGpuMs ?? 0) + onsetGpu.gpuTimings.gpuSubmitToReadbackMs,
|
|
221
|
+
},
|
|
222
|
+
},
|
|
223
|
+
};
|
|
224
|
+
} catch (e) {
|
|
225
|
+
if (options.strictGpu) throw e;
|
|
226
|
+
// fallback to CPU onset
|
|
227
|
+
void usedGpuForMel;
|
|
228
|
+
} finally {
|
|
229
|
+
void onsetStart;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const { mel, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
234
|
+
const onset = onsetEnvelopeFromMel(mel, {
|
|
235
|
+
smoothMs: options.onset?.smoothMs,
|
|
236
|
+
diffMethod: options.onset?.diffMethod,
|
|
237
|
+
useLog: options.onset?.useLog,
|
|
238
|
+
});
|
|
239
|
+
const end = nowMs();
|
|
240
|
+
|
|
241
|
+
return {
|
|
242
|
+
kind: "1d",
|
|
243
|
+
times: onset.times,
|
|
244
|
+
values: onset.values,
|
|
245
|
+
meta: {
|
|
246
|
+
backend: "cpu",
|
|
247
|
+
usedGpu: false,
|
|
248
|
+
timings: {
|
|
249
|
+
totalMs: end - t0,
|
|
250
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs,
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (request.fn === "onsetPeaks") {
|
|
257
|
+
const { mel, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
258
|
+
const onset = onsetEnvelopeFromMel(mel, {
|
|
259
|
+
smoothMs: options.onset?.smoothMs,
|
|
260
|
+
diffMethod: options.onset?.diffMethod,
|
|
261
|
+
useLog: options.onset?.useLog,
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
const events = peakPick(onset.times, onset.values, {
|
|
265
|
+
minIntervalSec: options.peakPick?.minIntervalSec,
|
|
266
|
+
threshold: options.peakPick?.threshold,
|
|
267
|
+
adaptive: options.peakPick?.adaptiveFactor
|
|
268
|
+
? { method: "meanStd", factor: options.peakPick.adaptiveFactor }
|
|
269
|
+
: undefined,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
const end = nowMs();
|
|
273
|
+
return {
|
|
274
|
+
kind: "events",
|
|
275
|
+
times: onset.times,
|
|
276
|
+
events,
|
|
277
|
+
meta: {
|
|
278
|
+
backend: "cpu",
|
|
279
|
+
usedGpu: false,
|
|
280
|
+
timings: {
|
|
281
|
+
totalMs: end - t0,
|
|
282
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs,
|
|
283
|
+
},
|
|
284
|
+
},
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
|
|
289
|
+
// HPSS may use a custom spectrogram config
|
|
290
|
+
const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
|
|
291
|
+
const needsHpssSpec = hpssSpecConfig.fftSize !== specConfig.fftSize || hpssSpecConfig.hopSize !== specConfig.hopSize;
|
|
292
|
+
|
|
293
|
+
let hpssSpec: Spectrogram;
|
|
294
|
+
let hpssCpuStart = cpuAfterSpec;
|
|
295
|
+
|
|
296
|
+
if (needsHpssSpec) {
|
|
297
|
+
hpssCpuStart = nowMs();
|
|
298
|
+
hpssSpec = await spectrogram(asAudioBufferLike(audio), hpssSpecConfig, undefined, {
|
|
299
|
+
isCancelled: options.isCancelled,
|
|
300
|
+
});
|
|
301
|
+
} else {
|
|
302
|
+
hpssSpec = spec;
|
|
303
|
+
}
|
|
304
|
+
const hpssAfterSpec = nowMs();
|
|
305
|
+
|
|
306
|
+
// HPSS is CPU-heavy; we optionally accelerate mask estimation with WebGPU.
|
|
307
|
+
// CPU path remains the reference implementation and is used as fallback.
|
|
308
|
+
if (backend === "gpu") {
|
|
309
|
+
if (!options.gpu) throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
|
|
310
|
+
|
|
311
|
+
const hpssStart = nowMs();
|
|
312
|
+
try {
|
|
313
|
+
const out = await hpssGpu(hpssSpec, options.gpu, {
|
|
314
|
+
timeMedian: options.hpss?.timeMedian,
|
|
315
|
+
freqMedian: options.hpss?.freqMedian,
|
|
316
|
+
softMask: true, // preserve CPU default
|
|
317
|
+
isCancelled: options.isCancelled,
|
|
318
|
+
});
|
|
319
|
+
const end = nowMs();
|
|
320
|
+
|
|
321
|
+
const chosen = request.fn === "hpssHarmonic" ? out.harmonic : out.percussive;
|
|
322
|
+
return {
|
|
323
|
+
kind: "2d",
|
|
324
|
+
times: chosen.times,
|
|
325
|
+
data: chosen.magnitudes,
|
|
326
|
+
meta: {
|
|
327
|
+
backend: "gpu",
|
|
328
|
+
usedGpu: true,
|
|
329
|
+
timings: {
|
|
330
|
+
totalMs: end - t0,
|
|
331
|
+
cpuMs: (needsHpssSpec ? hpssAfterSpec - hpssCpuStart : cpuAfterSpec - cpuStart) + ((end - hpssStart) - out.gpuMs),
|
|
332
|
+
gpuMs: out.gpuMs,
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
};
|
|
336
|
+
} catch (e) {
|
|
337
|
+
if (options.strictGpu) throw e;
|
|
338
|
+
// fall back to CPU HPSS
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const hpssStart = nowMs();
|
|
343
|
+
const { harmonic, percussive } = hpss(hpssSpec, {
|
|
344
|
+
timeMedian: options.hpss?.timeMedian,
|
|
345
|
+
freqMedian: options.hpss?.freqMedian,
|
|
346
|
+
isCancelled: options.isCancelled,
|
|
347
|
+
});
|
|
348
|
+
const end = nowMs();
|
|
349
|
+
const cpuMs = (needsHpssSpec ? hpssAfterSpec - hpssCpuStart : cpuAfterSpec - cpuStart) + (end - hpssStart);
|
|
350
|
+
|
|
351
|
+
const chosen = request.fn === "hpssHarmonic" ? harmonic : percussive;
|
|
352
|
+
return {
|
|
353
|
+
kind: "2d",
|
|
354
|
+
times: chosen.times,
|
|
355
|
+
data: chosen.magnitudes,
|
|
356
|
+
meta: {
|
|
357
|
+
backend: "cpu",
|
|
358
|
+
usedGpu: false,
|
|
359
|
+
timings: { totalMs: end - t0, cpuMs },
|
|
360
|
+
},
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
if (request.fn === "mfcc" || request.fn === "mfccDelta" || request.fn === "mfccDeltaDelta") {
|
|
365
|
+
// MFCC may use a custom spectrogram config
|
|
366
|
+
const mfccSpecConfig = options.mfcc?.spectrogram ?? specConfig;
|
|
367
|
+
const needsMfccSpec = mfccSpecConfig.fftSize !== specConfig.fftSize || mfccSpecConfig.hopSize !== specConfig.hopSize;
|
|
368
|
+
|
|
369
|
+
let mfccMel: MelSpectrogram;
|
|
370
|
+
let mfccCpuMs: number;
|
|
371
|
+
|
|
372
|
+
if (needsMfccSpec) {
|
|
373
|
+
const mfccCpuStart = nowMs();
|
|
374
|
+
const mfccSpec = await spectrogram(asAudioBufferLike(audio), mfccSpecConfig, undefined, {
|
|
375
|
+
isCancelled: options.isCancelled,
|
|
376
|
+
});
|
|
377
|
+
const mfccMelResult = await melSpectrogram(mfccSpec, melConfig, undefined);
|
|
378
|
+
mfccMel = mfccMelResult;
|
|
379
|
+
mfccCpuMs = nowMs() - mfccCpuStart;
|
|
380
|
+
} else {
|
|
381
|
+
const { mel, cpuExtraMs } = await computeMel(false);
|
|
382
|
+
mfccMel = mel;
|
|
383
|
+
mfccCpuMs = cpuAfterSpec - cpuStart + cpuExtraMs;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const mfccStart = nowMs();
|
|
387
|
+
const base = mfcc(mfccMel, { nCoeffs: options.mfcc?.nCoeffs });
|
|
388
|
+
|
|
389
|
+
const features = { times: base.times, values: base.coeffs };
|
|
390
|
+
const chosen =
|
|
391
|
+
request.fn === "mfcc"
|
|
392
|
+
? features
|
|
393
|
+
: request.fn === "mfccDelta"
|
|
394
|
+
? delta(features)
|
|
395
|
+
: deltaDelta(features);
|
|
396
|
+
|
|
397
|
+
const end = nowMs();
|
|
398
|
+
return {
|
|
399
|
+
kind: "2d",
|
|
400
|
+
times: chosen.times,
|
|
401
|
+
data: chosen.values,
|
|
402
|
+
meta: {
|
|
403
|
+
backend: "cpu",
|
|
404
|
+
usedGpu: false,
|
|
405
|
+
timings: {
|
|
406
|
+
totalMs: end - t0,
|
|
407
|
+
cpuMs: mfccCpuMs + (end - mfccStart),
|
|
408
|
+
},
|
|
409
|
+
},
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Fallback: keep old behaviour (melSpectrogram) if unknown fn.
|
|
414
|
+
const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
|
|
415
|
+
const end = nowMs();
|
|
416
|
+
|
|
417
|
+
return {
|
|
418
|
+
kind: "2d",
|
|
419
|
+
times: mel.times,
|
|
420
|
+
data: mel.melBands,
|
|
421
|
+
meta: {
|
|
422
|
+
backend: usedGpu ? "gpu" : "cpu",
|
|
423
|
+
usedGpu,
|
|
424
|
+
timings: {
|
|
425
|
+
totalMs: end - t0,
|
|
426
|
+
cpuMs: cpuAfterSpec - cpuStart + cpuExtraMs,
|
|
427
|
+
gpuMs,
|
|
428
|
+
},
|
|
429
|
+
},
|
|
430
|
+
};
|
|
431
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import type { MirAudioPayload, MirResult, MirRunRequest } from "../types";
|
|
2
|
+
|
|
3
|
+
export type MirWorkerInitMessage = {
|
|
4
|
+
type: "INIT";
|
|
5
|
+
enableGpu: boolean;
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export type MirWorkerRunMessage = {
|
|
9
|
+
type: "RUN";
|
|
10
|
+
jobId: string;
|
|
11
|
+
request: MirRunRequest;
|
|
12
|
+
audio: {
|
|
13
|
+
sampleRate: number;
|
|
14
|
+
mono: ArrayBufferLike; // transferred
|
|
15
|
+
};
|
|
16
|
+
enableGpu: boolean;
|
|
17
|
+
strictGpu?: boolean;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export type MirWorkerCancelMessage = {
|
|
21
|
+
type: "CANCEL";
|
|
22
|
+
jobId: string;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
export type MirWorkerSearchMessage = {
|
|
26
|
+
type: "SEARCH";
|
|
27
|
+
jobId: string;
|
|
28
|
+
|
|
29
|
+
audio: {
|
|
30
|
+
sampleRate: number;
|
|
31
|
+
mono: ArrayBufferLike; // transferred
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
query: {
|
|
35
|
+
t0: number;
|
|
36
|
+
t1: number;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/** Search tuning (kept small and explicit, like MirRunRequest). */
|
|
40
|
+
search?: {
|
|
41
|
+
hopSec?: number;
|
|
42
|
+
threshold?: number;
|
|
43
|
+
/** 0..1; if true, skip windows overlapping the query itself. */
|
|
44
|
+
skipOverlap?: boolean;
|
|
45
|
+
weights?: {
|
|
46
|
+
mel?: number;
|
|
47
|
+
transient?: number;
|
|
48
|
+
mfcc?: number;
|
|
49
|
+
};
|
|
50
|
+
/** Optional: apply softmax to similarity curve before returning. */
|
|
51
|
+
applySoftmax?: boolean;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/** Feature extraction config (re-uses existing MIR request knobs). */
|
|
55
|
+
features?: {
|
|
56
|
+
spectrogram?: MirRunRequest["spectrogram"];
|
|
57
|
+
mel?: MirRunRequest["mel"];
|
|
58
|
+
onset?: MirRunRequest["onset"];
|
|
59
|
+
mfcc?: MirRunRequest["mfcc"];
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Optional human-in-the-loop refinement data.
|
|
64
|
+
* When enabled, the worker can use accepted/rejected exemplars to produce a
|
|
65
|
+
* per-track confidence curve and a re-ranked candidate list.
|
|
66
|
+
*/
|
|
67
|
+
refinement?: {
|
|
68
|
+
enabled?: boolean;
|
|
69
|
+
includeQueryAsPositive?: boolean;
|
|
70
|
+
labels?: Array<{
|
|
71
|
+
t0: number;
|
|
72
|
+
t1: number;
|
|
73
|
+
status: "accepted" | "rejected";
|
|
74
|
+
source: "auto" | "manual";
|
|
75
|
+
}>;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
enableGpu: boolean;
|
|
79
|
+
strictGpu?: boolean;
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
export type MirWorkerInMessage = MirWorkerInitMessage | MirWorkerRunMessage | MirWorkerSearchMessage | MirWorkerCancelMessage;
|
|
83
|
+
|
|
84
|
+
export type MirWorkerResultMessage = {
|
|
85
|
+
type: "RESULT";
|
|
86
|
+
jobId: string;
|
|
87
|
+
/** Total time spent in the worker handling this RUN, including (optional) GPU readback. */
|
|
88
|
+
workerTotalMs: number;
|
|
89
|
+
result: {
|
|
90
|
+
// Mirror MirResult but transfer underlying buffers.
|
|
91
|
+
kind: MirResult["kind"];
|
|
92
|
+
times: ArrayBufferLike;
|
|
93
|
+
values?: ArrayBufferLike;
|
|
94
|
+
data2d?: ArrayBufferLike[];
|
|
95
|
+
events?: Array<{ time: number; strength: number; index: number }>;
|
|
96
|
+
meta: MirResult["meta"];
|
|
97
|
+
};
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
export type MirWorkerErrorMessage = {
|
|
101
|
+
type: "ERROR";
|
|
102
|
+
jobId: string;
|
|
103
|
+
message: string;
|
|
104
|
+
stack?: string;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
export type MirWorkerLogMessage = {
|
|
108
|
+
type: "LOG";
|
|
109
|
+
jobId?: string;
|
|
110
|
+
level: "debug" | "info" | "warn" | "error";
|
|
111
|
+
message: string;
|
|
112
|
+
data?: unknown;
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
export type MirWorkerSearchResultMessage = {
|
|
116
|
+
type: "SEARCH_RESULT";
|
|
117
|
+
jobId: string;
|
|
118
|
+
timings: {
|
|
119
|
+
fingerprintMs: number;
|
|
120
|
+
scanMs: number;
|
|
121
|
+
modelMs?: number;
|
|
122
|
+
totalMs: number;
|
|
123
|
+
};
|
|
124
|
+
result: {
|
|
125
|
+
times: ArrayBufferLike;
|
|
126
|
+
scores: ArrayBufferLike;
|
|
127
|
+
curveKind: "similarity" | "confidence";
|
|
128
|
+
model: {
|
|
129
|
+
kind: "baseline" | "prototype" | "logistic";
|
|
130
|
+
positives: number;
|
|
131
|
+
negatives: number;
|
|
132
|
+
weightL2?: {
|
|
133
|
+
mel: number;
|
|
134
|
+
melForeground: number;
|
|
135
|
+
melContrast?: number;
|
|
136
|
+
onset: number;
|
|
137
|
+
onsetForeground: number;
|
|
138
|
+
onsetContrast?: number;
|
|
139
|
+
mfcc?: number;
|
|
140
|
+
mfccForeground?: number;
|
|
141
|
+
mfccContrast?: number;
|
|
142
|
+
};
|
|
143
|
+
training?: {
|
|
144
|
+
iterations: number;
|
|
145
|
+
finalLoss: number;
|
|
146
|
+
};
|
|
147
|
+
};
|
|
148
|
+
candidates: Array<{
|
|
149
|
+
timeSec: number;
|
|
150
|
+
score: number;
|
|
151
|
+
windowStartSec: number;
|
|
152
|
+
windowEndSec: number;
|
|
153
|
+
explain?: {
|
|
154
|
+
groupLogit?: {
|
|
155
|
+
logit: number;
|
|
156
|
+
bias: number;
|
|
157
|
+
mel: number;
|
|
158
|
+
melForeground: number;
|
|
159
|
+
melContrast?: number;
|
|
160
|
+
onset: number;
|
|
161
|
+
onsetForeground: number;
|
|
162
|
+
onsetContrast?: number;
|
|
163
|
+
mfcc?: number;
|
|
164
|
+
mfccForeground?: number;
|
|
165
|
+
mfccContrast?: number;
|
|
166
|
+
};
|
|
167
|
+
};
|
|
168
|
+
}>;
|
|
169
|
+
meta: {
|
|
170
|
+
windowSec: number;
|
|
171
|
+
hopSec: number;
|
|
172
|
+
skippedWindows: number;
|
|
173
|
+
scannedWindows: number;
|
|
174
|
+
};
|
|
175
|
+
};
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
export type MirWorkerOutMessage =
|
|
179
|
+
| MirWorkerResultMessage
|
|
180
|
+
| MirWorkerSearchResultMessage
|
|
181
|
+
| MirWorkerErrorMessage
|
|
182
|
+
| MirWorkerLogMessage;
|
|
183
|
+
|
|
184
|
+
export function rebuildAudioPayload(a: MirWorkerRunMessage["audio"]): MirAudioPayload {
|
|
185
|
+
return {
|
|
186
|
+
sampleRate: a.sampleRate,
|
|
187
|
+
mono: new Float32Array(a.mono as ArrayBuffer),
|
|
188
|
+
};
|
|
189
|
+
}
|