@octoseq/mir 0.1.0-main.0d2814e
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-DUWYCAVG.js +1525 -0
- package/dist/chunk-DUWYCAVG.js.map +1 -0
- package/dist/index.d.ts +450 -0
- package/dist/index.js +1234 -0
- package/dist/index.js.map +1 -0
- package/dist/runMir-CSIBwNZ3.d.ts +84 -0
- package/dist/runner/runMir.d.ts +2 -0
- package/dist/runner/runMir.js +3 -0
- package/dist/runner/runMir.js.map +1 -0
- package/dist/runner/workerProtocol.d.ts +169 -0
- package/dist/runner/workerProtocol.js +11 -0
- package/dist/runner/workerProtocol.js.map +1 -0
- package/dist/types-BE3py4fZ.d.ts +83 -0
- package/package.json +55 -0
- package/src/dsp/fft.ts +22 -0
- package/src/dsp/fftBackend.ts +53 -0
- package/src/dsp/fftBackendFftjs.ts +60 -0
- package/src/dsp/hpss.ts +152 -0
- package/src/dsp/hpssGpu.ts +101 -0
- package/src/dsp/mel.ts +219 -0
- package/src/dsp/mfcc.ts +119 -0
- package/src/dsp/onset.ts +205 -0
- package/src/dsp/peakPick.ts +112 -0
- package/src/dsp/spectral.ts +95 -0
- package/src/dsp/spectrogram.ts +176 -0
- package/src/gpu/README.md +34 -0
- package/src/gpu/context.ts +44 -0
- package/src/gpu/helpers.ts +87 -0
- package/src/gpu/hpssMasks.ts +116 -0
- package/src/gpu/kernels/hpssMasks.wgsl.ts +137 -0
- package/src/gpu/kernels/melProject.wgsl.ts +48 -0
- package/src/gpu/kernels/onsetEnvelope.wgsl.ts +56 -0
- package/src/gpu/melProject.ts +98 -0
- package/src/gpu/onsetEnvelope.ts +81 -0
- package/src/gpu/webgpu.d.ts +176 -0
- package/src/index.ts +121 -0
- package/src/runner/runMir.ts +431 -0
- package/src/runner/workerProtocol.ts +189 -0
- package/src/search/featureVectorV1.ts +123 -0
- package/src/search/fingerprintV1.ts +230 -0
- package/src/search/refinedModelV1.ts +321 -0
- package/src/search/searchTrackV1.ts +206 -0
- package/src/search/searchTrackV1Guided.ts +863 -0
- package/src/search/similarity.ts +98 -0
- package/src/types.ts +105 -0
- package/src/util/display.ts +80 -0
- package/src/util/normalise.ts +58 -0
- package/src/util/stats.ts +25 -0
|
@@ -0,0 +1,1525 @@
|
|
|
1
|
+
import FFT from 'fft.js';
|
|
2
|
+
|
|
3
|
+
// src/gpu/helpers.ts
|
|
4
|
+
function nowMs() {
|
|
5
|
+
return typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
6
|
+
}
|
|
7
|
+
function byteSizeF32(n) {
|
|
8
|
+
return n * 4;
|
|
9
|
+
}
|
|
10
|
+
function createAndWriteStorageBuffer(gpu, data) {
|
|
11
|
+
const buf = gpu.device.createBuffer({
|
|
12
|
+
size: byteSizeF32(data.length),
|
|
13
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
14
|
+
});
|
|
15
|
+
gpu.queue.writeBuffer(buf, 0, data);
|
|
16
|
+
return buf;
|
|
17
|
+
}
|
|
18
|
+
function createUniformBufferU32x4(gpu, u32x4) {
|
|
19
|
+
if (u32x4.length !== 4) throw new Error("@octoseq/mir: uniform buffer must be 4 u32 values");
|
|
20
|
+
const buf = gpu.device.createBuffer({
|
|
21
|
+
size: 16,
|
|
22
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
23
|
+
});
|
|
24
|
+
gpu.queue.writeBuffer(buf, 0, u32x4);
|
|
25
|
+
return buf;
|
|
26
|
+
}
|
|
27
|
+
function createStorageOutBuffer(gpu, byteLength) {
|
|
28
|
+
return gpu.device.createBuffer({
|
|
29
|
+
size: byteLength,
|
|
30
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
function createReadbackBuffer(gpu, byteLength) {
|
|
34
|
+
return gpu.device.createBuffer({
|
|
35
|
+
size: byteLength,
|
|
36
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
async function submitAndReadback(gpu, encoder, outBuffer, readback, byteLength) {
|
|
40
|
+
encoder.copyBufferToBuffer(outBuffer, 0, readback, 0, byteLength);
|
|
41
|
+
const tSubmit = nowMs();
|
|
42
|
+
gpu.queue.submit([encoder.finish()]);
|
|
43
|
+
await readback.mapAsync(GPUMapMode.READ);
|
|
44
|
+
const tDone = nowMs();
|
|
45
|
+
const mapped = readback.getMappedRange();
|
|
46
|
+
const copy = mapped.slice(0);
|
|
47
|
+
readback.unmap();
|
|
48
|
+
return {
|
|
49
|
+
value: copy,
|
|
50
|
+
timing: {
|
|
51
|
+
gpuSubmitToReadbackMs: tDone - tSubmit
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// src/gpu/kernels/melProject.wgsl.ts
|
|
57
|
+
var melProjectWGSL = (
|
|
58
|
+
/* wgsl */
|
|
59
|
+
`
|
|
60
|
+
struct Params {
|
|
61
|
+
nBins: u32,
|
|
62
|
+
nMels: u32,
|
|
63
|
+
nFrames: u32,
|
|
64
|
+
_pad: u32,
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
@group(0) @binding(0) var<storage, read> mags : array<f32>;
|
|
68
|
+
@group(0) @binding(1) var<storage, read> filters : array<f32>;
|
|
69
|
+
@group(0) @binding(2) var<storage, read_write> out : array<f32>;
|
|
70
|
+
@group(0) @binding(3) var<uniform> params : Params;
|
|
71
|
+
|
|
72
|
+
fn log10(x: f32) -> f32 {
|
|
73
|
+
return log(x) / log(10.0);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
@compute @workgroup_size(16, 16)
|
|
77
|
+
fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
|
|
78
|
+
let frame = gid.x;
|
|
79
|
+
let mel = gid.y;
|
|
80
|
+
if (frame >= params.nFrames || mel >= params.nMels) {
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
var sum: f32 = 0.0;
|
|
85
|
+
let bins = params.nBins;
|
|
86
|
+
let magBase = frame * bins;
|
|
87
|
+
let filBase = mel * bins;
|
|
88
|
+
|
|
89
|
+
for (var k: u32 = 0u; k < bins; k = k + 1u) {
|
|
90
|
+
sum = sum + mags[magBase + k] * filters[filBase + k];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let eps: f32 = 1e-12;
|
|
94
|
+
out[frame * params.nMels + mel] = log10(eps + sum);
|
|
95
|
+
}
|
|
96
|
+
`
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
// src/gpu/melProject.ts
|
|
100
|
+
async function gpuMelProjectFlat(gpu, input) {
|
|
101
|
+
const { device } = gpu;
|
|
102
|
+
const { nFrames, nBins, nMels, magsFlat, filterFlat } = input;
|
|
103
|
+
if (magsFlat.length !== nFrames * nBins) {
|
|
104
|
+
throw new Error("@octoseq/mir: magsFlat length mismatch");
|
|
105
|
+
}
|
|
106
|
+
if (filterFlat.length !== nMels * nBins) {
|
|
107
|
+
throw new Error("@octoseq/mir: filterFlat length mismatch");
|
|
108
|
+
}
|
|
109
|
+
const magsBuffer = createAndWriteStorageBuffer(gpu, magsFlat);
|
|
110
|
+
const filterBuffer = createAndWriteStorageBuffer(gpu, filterFlat);
|
|
111
|
+
const outByteLen = byteSizeF32(nFrames * nMels);
|
|
112
|
+
const outBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
113
|
+
const readback = createReadbackBuffer(gpu, outByteLen);
|
|
114
|
+
const shader = device.createShaderModule({ code: melProjectWGSL });
|
|
115
|
+
const pipeline = device.createComputePipeline({
|
|
116
|
+
layout: "auto",
|
|
117
|
+
compute: {
|
|
118
|
+
module: shader,
|
|
119
|
+
entryPoint: "main"
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
const params = createUniformBufferU32x4(gpu, new Uint32Array([nBins, nMels, nFrames, 0]));
|
|
123
|
+
const bindGroup = device.createBindGroup({
|
|
124
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
125
|
+
entries: [
|
|
126
|
+
{ binding: 0, resource: { buffer: magsBuffer } },
|
|
127
|
+
{ binding: 1, resource: { buffer: filterBuffer } },
|
|
128
|
+
{ binding: 2, resource: { buffer: outBuffer } },
|
|
129
|
+
{ binding: 3, resource: { buffer: params } }
|
|
130
|
+
]
|
|
131
|
+
});
|
|
132
|
+
const encoder = device.createCommandEncoder();
|
|
133
|
+
const pass = encoder.beginComputePass();
|
|
134
|
+
pass.setPipeline(pipeline);
|
|
135
|
+
pass.setBindGroup(0, bindGroup);
|
|
136
|
+
const wgX = Math.ceil(nFrames / 16);
|
|
137
|
+
const wgY = Math.ceil(nMels / 16);
|
|
138
|
+
pass.dispatchWorkgroups(wgX, wgY);
|
|
139
|
+
pass.end();
|
|
140
|
+
const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
|
|
141
|
+
magsBuffer.destroy();
|
|
142
|
+
filterBuffer.destroy();
|
|
143
|
+
outBuffer.destroy();
|
|
144
|
+
params.destroy();
|
|
145
|
+
readback.destroy();
|
|
146
|
+
const outFlat = new Float32Array(bytes);
|
|
147
|
+
return {
|
|
148
|
+
value: { outFlat },
|
|
149
|
+
timing
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// src/dsp/mel.ts
|
|
154
|
+
function assertPositiveInt(name, value) {
|
|
155
|
+
if (!Number.isFinite(value) || value <= 0 || (value | 0) !== value) {
|
|
156
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
function hzToMel(hz) {
|
|
160
|
+
return 2595 * Math.log10(1 + hz / 700);
|
|
161
|
+
}
|
|
162
|
+
function melToHz(mel) {
|
|
163
|
+
return 700 * (Math.pow(10, mel / 2595) - 1);
|
|
164
|
+
}
|
|
165
|
+
function buildMelFilterBank(sampleRate, fftSize, nMels, fMin, fMax) {
|
|
166
|
+
const nBins = (fftSize >>> 1) + 1;
|
|
167
|
+
const nyquist = sampleRate / 2;
|
|
168
|
+
const fMinClamped = Math.max(0, Math.min(fMin, nyquist));
|
|
169
|
+
const fMaxClamped = Math.max(0, Math.min(fMax, nyquist));
|
|
170
|
+
if (fMaxClamped <= fMinClamped) {
|
|
171
|
+
throw new Error("@octoseq/mir: mel fMax must be > fMin");
|
|
172
|
+
}
|
|
173
|
+
const melMin = hzToMel(fMinClamped);
|
|
174
|
+
const melMax = hzToMel(fMaxClamped);
|
|
175
|
+
const melPoints = new Float32Array(nMels + 2);
|
|
176
|
+
for (let i = 0; i < melPoints.length; i++) {
|
|
177
|
+
melPoints[i] = melMin + i * (melMax - melMin) / (nMels + 1);
|
|
178
|
+
}
|
|
179
|
+
const hzPoints = new Float32Array(melPoints.length);
|
|
180
|
+
for (let i = 0; i < hzPoints.length; i++) hzPoints[i] = melToHz(melPoints[i] ?? 0);
|
|
181
|
+
const binHz = sampleRate / fftSize;
|
|
182
|
+
const binPoints = new Int32Array(hzPoints.length);
|
|
183
|
+
for (let i = 0; i < binPoints.length; i++) {
|
|
184
|
+
binPoints[i] = Math.max(0, Math.min(nBins - 1, Math.round((hzPoints[i] ?? 0) / binHz)));
|
|
185
|
+
}
|
|
186
|
+
const filters = new Array(nMels);
|
|
187
|
+
for (let m = 0; m < nMels; m++) {
|
|
188
|
+
const left = binPoints[m] ?? 0;
|
|
189
|
+
const center = binPoints[m + 1] ?? 0;
|
|
190
|
+
const right = binPoints[m + 2] ?? 0;
|
|
191
|
+
const w = new Float32Array(nBins);
|
|
192
|
+
if (center === left || right === center) {
|
|
193
|
+
filters[m] = w;
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
for (let k = left; k < center; k++) {
|
|
197
|
+
w[k] = (k - left) / (center - left);
|
|
198
|
+
}
|
|
199
|
+
for (let k = center; k < right; k++) {
|
|
200
|
+
w[k] = (right - k) / (right - center);
|
|
201
|
+
}
|
|
202
|
+
filters[m] = w;
|
|
203
|
+
}
|
|
204
|
+
return filters;
|
|
205
|
+
}
|
|
206
|
+
function cpuMelProject(spec, filters) {
|
|
207
|
+
const nFrames = spec.times.length;
|
|
208
|
+
const nMels = filters.length;
|
|
209
|
+
const out = new Array(nFrames);
|
|
210
|
+
const eps = 1e-12;
|
|
211
|
+
for (let t = 0; t < nFrames; t++) {
|
|
212
|
+
const mags = spec.magnitudes[t];
|
|
213
|
+
if (!mags) {
|
|
214
|
+
out[t] = new Float32Array(nMels);
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
const bands = new Float32Array(nMels);
|
|
218
|
+
for (let m = 0; m < nMels; m++) {
|
|
219
|
+
const w = filters[m];
|
|
220
|
+
if (!w) continue;
|
|
221
|
+
let sum = 0;
|
|
222
|
+
for (let k = 0; k < mags.length; k++) {
|
|
223
|
+
sum += (mags[k] ?? 0) * (w[k] ?? 0);
|
|
224
|
+
}
|
|
225
|
+
bands[m] = Math.log10(eps + sum);
|
|
226
|
+
}
|
|
227
|
+
out[t] = bands;
|
|
228
|
+
}
|
|
229
|
+
return {
|
|
230
|
+
times: spec.times,
|
|
231
|
+
melBands: out
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
async function gpuMelProject(spec, filters, gpu) {
|
|
235
|
+
const nFrames = spec.times.length;
|
|
236
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
237
|
+
const nMels = filters.length;
|
|
238
|
+
const magsFlat = new Float32Array(nFrames * nBins);
|
|
239
|
+
for (let t = 0; t < nFrames; t++) {
|
|
240
|
+
const mags = spec.magnitudes[t];
|
|
241
|
+
if (!mags) continue;
|
|
242
|
+
magsFlat.set(mags, t * nBins);
|
|
243
|
+
}
|
|
244
|
+
const filterFlat = new Float32Array(nMels * nBins);
|
|
245
|
+
for (let m = 0; m < nMels; m++) {
|
|
246
|
+
filterFlat.set(filters[m] ?? new Float32Array(nBins), m * nBins);
|
|
247
|
+
}
|
|
248
|
+
const { value, timing } = await gpuMelProjectFlat(gpu, {
|
|
249
|
+
nFrames,
|
|
250
|
+
nBins,
|
|
251
|
+
nMels,
|
|
252
|
+
magsFlat,
|
|
253
|
+
filterFlat
|
|
254
|
+
});
|
|
255
|
+
const outFlat = value.outFlat;
|
|
256
|
+
const melBands = new Array(nFrames);
|
|
257
|
+
for (let t = 0; t < nFrames; t++) {
|
|
258
|
+
melBands[t] = outFlat.subarray(t * nMels, (t + 1) * nMels);
|
|
259
|
+
}
|
|
260
|
+
return {
|
|
261
|
+
times: spec.times,
|
|
262
|
+
melBands,
|
|
263
|
+
gpuTimings: {
|
|
264
|
+
gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs
|
|
265
|
+
}
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
async function melSpectrogram(spec, config, gpu) {
|
|
269
|
+
assertPositiveInt("config.nMels", config.nMels);
|
|
270
|
+
const fMin = config.fMin ?? 0;
|
|
271
|
+
const fMax = config.fMax ?? spec.sampleRate / 2;
|
|
272
|
+
const filters = buildMelFilterBank(
|
|
273
|
+
spec.sampleRate,
|
|
274
|
+
spec.fftSize,
|
|
275
|
+
config.nMels,
|
|
276
|
+
fMin,
|
|
277
|
+
fMax
|
|
278
|
+
);
|
|
279
|
+
if (gpu) {
|
|
280
|
+
try {
|
|
281
|
+
return await gpuMelProject(spec, filters, gpu);
|
|
282
|
+
} catch {
|
|
283
|
+
return cpuMelProject(spec, filters);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return cpuMelProject(spec, filters);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// src/dsp/mfcc.ts
|
|
290
|
+
function assertPositiveInt2(name, v) {
|
|
291
|
+
if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
|
|
292
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
function buildDctMatrix(nCoeffs, nMels) {
|
|
296
|
+
const out = new Float32Array(nCoeffs * nMels);
|
|
297
|
+
const scale0 = Math.sqrt(1 / nMels);
|
|
298
|
+
const scale = Math.sqrt(2 / nMels);
|
|
299
|
+
for (let i = 0; i < nCoeffs; i++) {
|
|
300
|
+
for (let j = 0; j < nMels; j++) {
|
|
301
|
+
const c = Math.cos(Math.PI / nMels * (j + 0.5) * i);
|
|
302
|
+
out[i * nMels + j] = (i === 0 ? scale0 : scale) * c;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return out;
|
|
306
|
+
}
|
|
307
|
+
function mfcc(mel, options = {}) {
|
|
308
|
+
const nFrames = mel.times.length;
|
|
309
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
310
|
+
const nCoeffs = options.nCoeffs ?? 13;
|
|
311
|
+
assertPositiveInt2("options.nCoeffs", nCoeffs);
|
|
312
|
+
if (nMels <= 0) {
|
|
313
|
+
return { times: mel.times, coeffs: new Array(nFrames).fill(0).map(() => new Float32Array(nCoeffs)) };
|
|
314
|
+
}
|
|
315
|
+
const dct = buildDctMatrix(nCoeffs, nMels);
|
|
316
|
+
const out = new Array(nFrames);
|
|
317
|
+
for (let t = 0; t < nFrames; t++) {
|
|
318
|
+
const x = mel.melBands[t] ?? new Float32Array(nMels);
|
|
319
|
+
const c = new Float32Array(nCoeffs);
|
|
320
|
+
for (let i = 0; i < nCoeffs; i++) {
|
|
321
|
+
let sum = 0;
|
|
322
|
+
const rowOff = i * nMels;
|
|
323
|
+
for (let j = 0; j < nMels; j++) {
|
|
324
|
+
sum += (dct[rowOff + j] ?? 0) * (x[j] ?? 0);
|
|
325
|
+
}
|
|
326
|
+
c[i] = sum;
|
|
327
|
+
}
|
|
328
|
+
out[t] = c;
|
|
329
|
+
}
|
|
330
|
+
return { times: mel.times, coeffs: out };
|
|
331
|
+
}
|
|
332
|
+
function delta(features, options = {}) {
|
|
333
|
+
const N = options.window ?? 2;
|
|
334
|
+
assertPositiveInt2("options.window", N);
|
|
335
|
+
const nFrames = features.times.length;
|
|
336
|
+
const nFeat = features.values[0]?.length ?? 0;
|
|
337
|
+
const out = new Array(nFrames);
|
|
338
|
+
let denom = 0;
|
|
339
|
+
for (let n = 1; n <= N; n++) denom += n * n;
|
|
340
|
+
denom *= 2;
|
|
341
|
+
for (let t = 0; t < nFrames; t++) {
|
|
342
|
+
const d = new Float32Array(nFeat);
|
|
343
|
+
for (let f = 0; f < nFeat; f++) {
|
|
344
|
+
let num = 0;
|
|
345
|
+
for (let n = 1; n <= N; n++) {
|
|
346
|
+
const tPlus = Math.min(nFrames - 1, t + n);
|
|
347
|
+
const tMinus = Math.max(0, t - n);
|
|
348
|
+
const a = features.values[tPlus]?.[f] ?? 0;
|
|
349
|
+
const b = features.values[tMinus]?.[f] ?? 0;
|
|
350
|
+
num += n * (a - b);
|
|
351
|
+
}
|
|
352
|
+
d[f] = denom > 0 ? num / denom : 0;
|
|
353
|
+
}
|
|
354
|
+
out[t] = d;
|
|
355
|
+
}
|
|
356
|
+
return { times: features.times, values: out };
|
|
357
|
+
}
|
|
358
|
+
function deltaDelta(features, options = {}) {
|
|
359
|
+
return delta(delta(features, options), options);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// src/gpu/kernels/onsetEnvelope.wgsl.ts
|
|
363
|
+
var onsetEnvelopeWGSL = (
|
|
364
|
+
/* wgsl */
|
|
365
|
+
`
|
|
366
|
+
// Compute onset strength envelope from a (log) mel spectrogram.
|
|
367
|
+
//
|
|
368
|
+
// Input layout: melFlat[t*nMels + m]
|
|
369
|
+
// Output layout: out[t]
|
|
370
|
+
//
|
|
371
|
+
// We compute novelty per frame:
|
|
372
|
+
// novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
|
|
373
|
+
// or sum_m abs(...)
|
|
374
|
+
//
|
|
375
|
+
// One invocation computes one frame index (t). This is memory-bound but reduces a full
|
|
376
|
+
// (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
|
|
377
|
+
|
|
378
|
+
struct Params {
|
|
379
|
+
nMels: u32,
|
|
380
|
+
nFrames: u32,
|
|
381
|
+
diffMethod: u32, // 0=rectified, 1=abs
|
|
382
|
+
_pad: u32,
|
|
383
|
+
};
|
|
384
|
+
|
|
385
|
+
@group(0) @binding(0) var<storage, read> melFlat: array<f32>;
|
|
386
|
+
@group(0) @binding(1) var<storage, read_write> out: array<f32>;
|
|
387
|
+
@group(0) @binding(2) var<uniform> params: Params;
|
|
388
|
+
|
|
389
|
+
@compute @workgroup_size(256)
|
|
390
|
+
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
391
|
+
let t = gid.x;
|
|
392
|
+
if (t >= params.nFrames) { return; }
|
|
393
|
+
|
|
394
|
+
if (t == 0u) {
|
|
395
|
+
out[t] = 0.0;
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
let nMels = params.nMels;
|
|
400
|
+
var sum: f32 = 0.0;
|
|
401
|
+
|
|
402
|
+
// Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
|
|
403
|
+
// (Future optimisation: parallelise reduction within workgroup.)
|
|
404
|
+
for (var m: u32 = 0u; m < nMels; m = m + 1u) {
|
|
405
|
+
let a = melFlat[t * nMels + m];
|
|
406
|
+
let b = melFlat[(t - 1u) * nMels + m];
|
|
407
|
+
let d = a - b;
|
|
408
|
+
|
|
409
|
+
if (params.diffMethod == 1u) {
|
|
410
|
+
// abs
|
|
411
|
+
sum = sum + abs(d);
|
|
412
|
+
} else {
|
|
413
|
+
// rectified
|
|
414
|
+
sum = sum + max(0.0, d);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
out[t] = sum / max(1.0, f32(nMels));
|
|
419
|
+
}
|
|
420
|
+
`
|
|
421
|
+
);
|
|
422
|
+
|
|
423
|
+
// src/gpu/onsetEnvelope.ts
|
|
424
|
+
async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
|
|
425
|
+
const { device } = gpu;
|
|
426
|
+
const { nFrames, nMels, melFlat, diffMethod } = input;
|
|
427
|
+
if (melFlat.length !== nFrames * nMels) {
|
|
428
|
+
throw new Error("@octoseq/mir: melFlat length mismatch");
|
|
429
|
+
}
|
|
430
|
+
const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
|
|
431
|
+
const outByteLen = byteSizeF32(nFrames);
|
|
432
|
+
const outBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
433
|
+
const readback = createReadbackBuffer(gpu, outByteLen);
|
|
434
|
+
const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
|
|
435
|
+
const pipeline = device.createComputePipeline({
|
|
436
|
+
layout: "auto",
|
|
437
|
+
compute: { module: shader, entryPoint: "main" }
|
|
438
|
+
});
|
|
439
|
+
const diffU32 = diffMethod === "abs" ? 1 : 0;
|
|
440
|
+
const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
|
|
441
|
+
const bindGroup = device.createBindGroup({
|
|
442
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
443
|
+
entries: [
|
|
444
|
+
{ binding: 0, resource: { buffer: melBuffer } },
|
|
445
|
+
{ binding: 1, resource: { buffer: outBuffer } },
|
|
446
|
+
{ binding: 2, resource: { buffer: params } }
|
|
447
|
+
]
|
|
448
|
+
});
|
|
449
|
+
const encoder = device.createCommandEncoder();
|
|
450
|
+
const pass = encoder.beginComputePass();
|
|
451
|
+
pass.setPipeline(pipeline);
|
|
452
|
+
pass.setBindGroup(0, bindGroup);
|
|
453
|
+
const wg = Math.ceil(nFrames / 256);
|
|
454
|
+
pass.dispatchWorkgroups(wg);
|
|
455
|
+
pass.end();
|
|
456
|
+
const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
|
|
457
|
+
melBuffer.destroy();
|
|
458
|
+
outBuffer.destroy();
|
|
459
|
+
params.destroy();
|
|
460
|
+
readback.destroy();
|
|
461
|
+
return {
|
|
462
|
+
value: { out: new Float32Array(bytes) },
|
|
463
|
+
timing
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// src/dsp/onset.ts
|
|
468
|
+
function movingAverage(values, windowFrames) {
|
|
469
|
+
if (windowFrames <= 1) return values;
|
|
470
|
+
const n = values.length;
|
|
471
|
+
const out = new Float32Array(n);
|
|
472
|
+
const half = Math.floor(windowFrames / 2);
|
|
473
|
+
const prefix = new Float64Array(n + 1);
|
|
474
|
+
prefix[0] = 0;
|
|
475
|
+
for (let i = 0; i < n; i++) {
|
|
476
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
477
|
+
}
|
|
478
|
+
for (let i = 0; i < n; i++) {
|
|
479
|
+
const start = Math.max(0, i - half);
|
|
480
|
+
const end = Math.min(n, i + half + 1);
|
|
481
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
482
|
+
const count = Math.max(1, end - start);
|
|
483
|
+
out[i] = sum / count;
|
|
484
|
+
}
|
|
485
|
+
return out;
|
|
486
|
+
}
|
|
487
|
+
function defaultOptions(opts) {
|
|
488
|
+
return {
|
|
489
|
+
useLog: opts?.useLog ?? false,
|
|
490
|
+
smoothMs: opts?.smoothMs ?? 30,
|
|
491
|
+
diffMethod: opts?.diffMethod ?? "rectified"
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
function logCompress(x) {
|
|
495
|
+
return Math.log1p(Math.max(0, x));
|
|
496
|
+
}
|
|
497
|
+
function onsetEnvelopeFromSpectrogram(spec, options) {
|
|
498
|
+
const opts = defaultOptions(options);
|
|
499
|
+
const nFrames = spec.times.length;
|
|
500
|
+
const out = new Float32Array(nFrames);
|
|
501
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
502
|
+
out[0] = 0;
|
|
503
|
+
for (let t = 1; t < nFrames; t++) {
|
|
504
|
+
const cur = spec.magnitudes[t];
|
|
505
|
+
const prev = spec.magnitudes[t - 1];
|
|
506
|
+
if (!cur || !prev) {
|
|
507
|
+
out[t] = 0;
|
|
508
|
+
continue;
|
|
509
|
+
}
|
|
510
|
+
let sum = 0;
|
|
511
|
+
for (let k = 0; k < nBins; k++) {
|
|
512
|
+
let a = cur[k] ?? 0;
|
|
513
|
+
let b = prev[k] ?? 0;
|
|
514
|
+
if (opts.useLog) {
|
|
515
|
+
a = logCompress(a);
|
|
516
|
+
b = logCompress(b);
|
|
517
|
+
}
|
|
518
|
+
const d = a - b;
|
|
519
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
520
|
+
}
|
|
521
|
+
out[t] = nBins > 0 ? sum / nBins : 0;
|
|
522
|
+
}
|
|
523
|
+
const smoothMs = opts.smoothMs;
|
|
524
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
525
|
+
const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
|
|
526
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
527
|
+
return {
|
|
528
|
+
times: spec.times,
|
|
529
|
+
values: movingAverage(out, windowFrames | 1)
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
return { times: spec.times, values: out };
|
|
533
|
+
}
|
|
534
|
+
function onsetEnvelopeFromMel(mel, options) {
|
|
535
|
+
const opts = defaultOptions(options);
|
|
536
|
+
const nFrames = mel.times.length;
|
|
537
|
+
const out = new Float32Array(nFrames);
|
|
538
|
+
out[0] = 0;
|
|
539
|
+
for (let t = 1; t < nFrames; t++) {
|
|
540
|
+
const cur = mel.melBands[t];
|
|
541
|
+
const prev = mel.melBands[t - 1];
|
|
542
|
+
if (!cur || !prev) {
|
|
543
|
+
out[t] = 0;
|
|
544
|
+
continue;
|
|
545
|
+
}
|
|
546
|
+
const nBands = cur.length;
|
|
547
|
+
let sum = 0;
|
|
548
|
+
for (let m = 0; m < nBands; m++) {
|
|
549
|
+
let a = cur[m] ?? 0;
|
|
550
|
+
let b = prev[m] ?? 0;
|
|
551
|
+
if (opts.useLog) {
|
|
552
|
+
a = logCompress(a);
|
|
553
|
+
b = logCompress(b);
|
|
554
|
+
}
|
|
555
|
+
const d = a - b;
|
|
556
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
557
|
+
}
|
|
558
|
+
out[t] = nBands > 0 ? sum / nBands : 0;
|
|
559
|
+
}
|
|
560
|
+
const smoothMs = opts.smoothMs;
|
|
561
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
562
|
+
const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
|
|
563
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
564
|
+
return {
|
|
565
|
+
times: mel.times,
|
|
566
|
+
values: movingAverage(out, windowFrames | 1)
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
return { times: mel.times, values: out };
|
|
570
|
+
}
|
|
571
|
+
async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
|
|
572
|
+
const nFrames = mel.times.length;
|
|
573
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
574
|
+
const melFlat = new Float32Array(nFrames * nMels);
|
|
575
|
+
for (let t = 0; t < nFrames; t++) {
|
|
576
|
+
const row = mel.melBands[t];
|
|
577
|
+
if (!row) continue;
|
|
578
|
+
melFlat.set(row, t * nMels);
|
|
579
|
+
}
|
|
580
|
+
const diffMethod = options?.diffMethod ?? "rectified";
|
|
581
|
+
const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
|
|
582
|
+
nFrames,
|
|
583
|
+
nMels,
|
|
584
|
+
melFlat,
|
|
585
|
+
diffMethod
|
|
586
|
+
});
|
|
587
|
+
return {
|
|
588
|
+
times: mel.times,
|
|
589
|
+
values: value.out,
|
|
590
|
+
gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// src/dsp/peakPick.ts
|
|
595
|
+
function meanStd(values) {
|
|
596
|
+
const n = values.length;
|
|
597
|
+
if (n <= 0) return { mean: 0, std: 0 };
|
|
598
|
+
let mean = 0;
|
|
599
|
+
for (let i = 0; i < n; i++) mean += values[i] ?? 0;
|
|
600
|
+
mean /= n;
|
|
601
|
+
let varSum = 0;
|
|
602
|
+
for (let i = 0; i < n; i++) {
|
|
603
|
+
const d = (values[i] ?? 0) - mean;
|
|
604
|
+
varSum += d * d;
|
|
605
|
+
}
|
|
606
|
+
const std = Math.sqrt(varSum / n);
|
|
607
|
+
return { mean, std };
|
|
608
|
+
}
|
|
609
|
+
function median(values) {
|
|
610
|
+
const arr = Array.from(values);
|
|
611
|
+
arr.sort((a, b) => a - b);
|
|
612
|
+
const n = arr.length;
|
|
613
|
+
if (n === 0) return 0;
|
|
614
|
+
const mid = n >>> 1;
|
|
615
|
+
if (n % 2 === 1) return arr[mid] ?? 0;
|
|
616
|
+
return ((arr[mid - 1] ?? 0) + (arr[mid] ?? 0)) / 2;
|
|
617
|
+
}
|
|
618
|
+
function peakPick(times, values, options = {}) {
|
|
619
|
+
if (times.length !== values.length) {
|
|
620
|
+
throw new Error("@octoseq/mir: peakPick times/values length mismatch");
|
|
621
|
+
}
|
|
622
|
+
const n = values.length;
|
|
623
|
+
if (n === 0) return [];
|
|
624
|
+
const strict = options.strict ?? true;
|
|
625
|
+
let thr = options.threshold ?? 0;
|
|
626
|
+
if (options.adaptive) {
|
|
627
|
+
const method = options.adaptive.method ?? "meanStd";
|
|
628
|
+
const factor = options.adaptive.factor ?? 1;
|
|
629
|
+
if (method === "median") {
|
|
630
|
+
thr = median(values) * factor;
|
|
631
|
+
} else {
|
|
632
|
+
const { mean, std } = meanStd(values);
|
|
633
|
+
thr = mean + factor * std;
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
const minIntervalSec = options.minIntervalSec ?? 0;
|
|
637
|
+
const out = [];
|
|
638
|
+
let lastPeakTime = -Infinity;
|
|
639
|
+
for (let i = 1; i < n - 1; i++) {
|
|
640
|
+
const v = values[i] ?? 0;
|
|
641
|
+
if (!(v >= thr)) continue;
|
|
642
|
+
const prev = values[i - 1] ?? 0;
|
|
643
|
+
const next = values[i + 1] ?? 0;
|
|
644
|
+
const isMax = strict ? v > prev && v > next : v >= prev && v >= next;
|
|
645
|
+
if (!isMax) continue;
|
|
646
|
+
const t = times[i] ?? 0;
|
|
647
|
+
if (t - lastPeakTime < minIntervalSec) {
|
|
648
|
+
const last = out[out.length - 1];
|
|
649
|
+
if (last && v > last.strength) {
|
|
650
|
+
last.time = t;
|
|
651
|
+
last.strength = v;
|
|
652
|
+
last.index = i;
|
|
653
|
+
lastPeakTime = t;
|
|
654
|
+
}
|
|
655
|
+
continue;
|
|
656
|
+
}
|
|
657
|
+
out.push({ time: t, strength: v, index: i });
|
|
658
|
+
lastPeakTime = t;
|
|
659
|
+
}
|
|
660
|
+
return out;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// src/dsp/hpss.ts
|
|
664
|
+
function assertOddPositiveInt(name, v) {
|
|
665
|
+
if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
|
|
666
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
667
|
+
}
|
|
668
|
+
if (v % 2 !== 1) {
|
|
669
|
+
throw new Error(`@octoseq/mir: ${name} must be odd`);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
function medianOfWindow(values) {
|
|
673
|
+
const arr = Array.from(values);
|
|
674
|
+
arr.sort((a, b) => a - b);
|
|
675
|
+
const mid = arr.length >>> 1;
|
|
676
|
+
return arr[mid] ?? 0;
|
|
677
|
+
}
|
|
678
|
+
function medianFilterTime(spec, kTime, options) {
|
|
679
|
+
const nFrames = spec.times.length;
|
|
680
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
681
|
+
const half = kTime >>> 1;
|
|
682
|
+
const out = new Array(nFrames);
|
|
683
|
+
const window = new Float32Array(kTime);
|
|
684
|
+
for (let t = 0; t < nFrames; t++) {
|
|
685
|
+
if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
|
|
686
|
+
const row = new Float32Array(nBins);
|
|
687
|
+
for (let k = 0; k < nBins; k++) {
|
|
688
|
+
for (let i = -half, wi = 0; i <= half; i++, wi++) {
|
|
689
|
+
const tt = Math.max(0, Math.min(nFrames - 1, t + i));
|
|
690
|
+
const mags = spec.magnitudes[tt];
|
|
691
|
+
window[wi] = mags ? mags[k] ?? 0 : 0;
|
|
692
|
+
}
|
|
693
|
+
row[k] = medianOfWindow(window);
|
|
694
|
+
}
|
|
695
|
+
out[t] = row;
|
|
696
|
+
}
|
|
697
|
+
return out;
|
|
698
|
+
}
|
|
699
|
+
function medianFilterFreq(spec, kFreq, options) {
|
|
700
|
+
const nFrames = spec.times.length;
|
|
701
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
702
|
+
const half = kFreq >>> 1;
|
|
703
|
+
const out = new Array(nFrames);
|
|
704
|
+
const window = new Float32Array(kFreq);
|
|
705
|
+
for (let t = 0; t < nFrames; t++) {
|
|
706
|
+
if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
|
|
707
|
+
const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
|
|
708
|
+
const row = new Float32Array(nBins);
|
|
709
|
+
for (let k = 0; k < nBins; k++) {
|
|
710
|
+
for (let i = -half, wi = 0; i <= half; i++, wi++) {
|
|
711
|
+
const kk = Math.max(0, Math.min(nBins - 1, k + i));
|
|
712
|
+
window[wi] = mags[kk] ?? 0;
|
|
713
|
+
}
|
|
714
|
+
row[k] = medianOfWindow(window);
|
|
715
|
+
}
|
|
716
|
+
out[t] = row;
|
|
717
|
+
}
|
|
718
|
+
return out;
|
|
719
|
+
}
|
|
720
|
+
function hpss(spec, options = {}) {
|
|
721
|
+
const timeMedian = options.timeMedian ?? 17;
|
|
722
|
+
const freqMedian = options.freqMedian ?? 17;
|
|
723
|
+
assertOddPositiveInt("options.timeMedian", timeMedian);
|
|
724
|
+
assertOddPositiveInt("options.freqMedian", freqMedian);
|
|
725
|
+
const nFrames = spec.times.length;
|
|
726
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
727
|
+
const H = medianFilterTime(spec, timeMedian, options);
|
|
728
|
+
const P = medianFilterFreq(spec, freqMedian, options);
|
|
729
|
+
const harmonic = new Array(nFrames);
|
|
730
|
+
const percussive = new Array(nFrames);
|
|
731
|
+
const soft = options.softMask ?? true;
|
|
732
|
+
const eps = 1e-12;
|
|
733
|
+
for (let t = 0; t < nFrames; t++) {
|
|
734
|
+
if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
|
|
735
|
+
const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
|
|
736
|
+
const hRow = H[t] ?? new Float32Array(nBins);
|
|
737
|
+
const pRow = P[t] ?? new Float32Array(nBins);
|
|
738
|
+
const outH = new Float32Array(nBins);
|
|
739
|
+
const outP = new Float32Array(nBins);
|
|
740
|
+
for (let k = 0; k < nBins; k++) {
|
|
741
|
+
const x = mags[k] ?? 0;
|
|
742
|
+
const h = hRow[k] ?? 0;
|
|
743
|
+
const p = pRow[k] ?? 0;
|
|
744
|
+
if (soft) {
|
|
745
|
+
const denom = Math.max(eps, h + p);
|
|
746
|
+
const mh = h / denom;
|
|
747
|
+
const mp = p / denom;
|
|
748
|
+
outH[k] = x * mh;
|
|
749
|
+
outP[k] = x * mp;
|
|
750
|
+
} else {
|
|
751
|
+
const isH = h >= p;
|
|
752
|
+
outH[k] = isH ? x : 0;
|
|
753
|
+
outP[k] = isH ? 0 : x;
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
harmonic[t] = outH;
|
|
757
|
+
percussive[t] = outP;
|
|
758
|
+
}
|
|
759
|
+
return {
|
|
760
|
+
harmonic: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: harmonic },
|
|
761
|
+
percussive: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: percussive }
|
|
762
|
+
};
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
// src/gpu/kernels/hpssMasks.wgsl.ts
|
|
766
|
+
var hpssMasksWGSL = (
|
|
767
|
+
/* wgsl */
|
|
768
|
+
`
|
|
769
|
+
struct Params {
|
|
770
|
+
nBins: u32,
|
|
771
|
+
nFrames: u32,
|
|
772
|
+
softMask: u32, // 1 => soft, 0 => hard
|
|
773
|
+
_pad: u32,
|
|
774
|
+
};
|
|
775
|
+
|
|
776
|
+
@group(0) @binding(0) var<storage, read> mags : array<f32>;
|
|
777
|
+
@group(0) @binding(1) var<storage, read_write> harmonicMask : array<f32>;
|
|
778
|
+
@group(0) @binding(2) var<storage, read_write> percussiveMask : array<f32>;
|
|
779
|
+
@group(0) @binding(3) var<uniform> params : Params;
|
|
780
|
+
|
|
781
|
+
fn clamp_i32(x: i32, lo: i32, hi: i32) -> i32 {
|
|
782
|
+
return max(lo, min(hi, x));
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
fn swap_if_greater(a: ptr<function, f32>, b: ptr<function, f32>) {
|
|
786
|
+
// Branchless compare\u2013swap.
|
|
787
|
+
let av = *a;
|
|
788
|
+
let bv = *b;
|
|
789
|
+
*a = min(av, bv);
|
|
790
|
+
*b = max(av, bv);
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// Sorting network for 9 values; returns the 5th smallest (median).
|
|
794
|
+
//
|
|
795
|
+
// Notes:
|
|
796
|
+
// - This is fixed-cost and data-independent.
|
|
797
|
+
// - For our HPSS approximation we only need a robust center value, and exact median-of-9
|
|
798
|
+
// is a good tradeoff vs kernel size.
|
|
799
|
+
fn median9(v0: f32, v1: f32, v2: f32, v3: f32, v4: f32, v5: f32, v6: f32, v7: f32, v8: f32) -> f32 {
|
|
800
|
+
var a0 = v0; var a1 = v1; var a2 = v2;
|
|
801
|
+
var a3 = v3; var a4 = v4; var a5 = v5;
|
|
802
|
+
var a6 = v6; var a7 = v7; var a8 = v8;
|
|
803
|
+
|
|
804
|
+
// 9-input sorting network (compare\u2013swap stages). This is a known minimal-ish network.
|
|
805
|
+
// We fully sort then take middle; cost is acceptable for 9.
|
|
806
|
+
// Stage 1
|
|
807
|
+
swap_if_greater(&a0,&a1); swap_if_greater(&a3,&a4); swap_if_greater(&a6,&a7);
|
|
808
|
+
// Stage 2
|
|
809
|
+
swap_if_greater(&a1,&a2); swap_if_greater(&a4,&a5); swap_if_greater(&a7,&a8);
|
|
810
|
+
// Stage 3
|
|
811
|
+
swap_if_greater(&a0,&a1); swap_if_greater(&a3,&a4); swap_if_greater(&a6,&a7);
|
|
812
|
+
// Stage 4
|
|
813
|
+
swap_if_greater(&a0,&a3); swap_if_greater(&a3,&a6); swap_if_greater(&a0,&a3);
|
|
814
|
+
// Stage 5
|
|
815
|
+
swap_if_greater(&a1,&a4); swap_if_greater(&a4,&a7); swap_if_greater(&a1,&a4);
|
|
816
|
+
// Stage 6
|
|
817
|
+
swap_if_greater(&a2,&a5); swap_if_greater(&a5,&a8); swap_if_greater(&a2,&a5);
|
|
818
|
+
// Stage 7
|
|
819
|
+
swap_if_greater(&a1,&a3); swap_if_greater(&a5,&a7);
|
|
820
|
+
// Stage 8
|
|
821
|
+
swap_if_greater(&a2,&a6);
|
|
822
|
+
// Stage 9
|
|
823
|
+
swap_if_greater(&a2,&a3); swap_if_greater(&a4,&a6);
|
|
824
|
+
// Stage 10
|
|
825
|
+
swap_if_greater(&a2,&a4); swap_if_greater(&a4,&a6);
|
|
826
|
+
// Stage 11
|
|
827
|
+
swap_if_greater(&a3,&a5); swap_if_greater(&a5,&a7);
|
|
828
|
+
// Stage 12
|
|
829
|
+
swap_if_greater(&a3,&a4); swap_if_greater(&a5,&a6);
|
|
830
|
+
// Stage 13
|
|
831
|
+
swap_if_greater(&a4,&a5);
|
|
832
|
+
|
|
833
|
+
return a4;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
fn mag_at(frame: i32, bin: i32) -> f32 {
|
|
837
|
+
let f = clamp_i32(frame, 0, i32(params.nFrames) - 1);
|
|
838
|
+
let b = clamp_i32(bin, 0, i32(params.nBins) - 1);
|
|
839
|
+
let idx = u32(f) * params.nBins + u32(b);
|
|
840
|
+
return mags[idx];
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
@compute @workgroup_size(16, 16)
|
|
844
|
+
fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
|
|
845
|
+
let frame = gid.x;
|
|
846
|
+
let bin = gid.y;
|
|
847
|
+
|
|
848
|
+
if (frame >= params.nFrames || bin >= params.nBins) {
|
|
849
|
+
return;
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
let f = i32(frame);
|
|
853
|
+
let b = i32(bin);
|
|
854
|
+
|
|
855
|
+
// Harmonic estimate: median in time over 9 taps.
|
|
856
|
+
let h = median9(
|
|
857
|
+
mag_at(f-4,b), mag_at(f-3,b), mag_at(f-2,b), mag_at(f-1,b), mag_at(f,b),
|
|
858
|
+
mag_at(f+1,b), mag_at(f+2,b), mag_at(f+3,b), mag_at(f+4,b)
|
|
859
|
+
);
|
|
860
|
+
|
|
861
|
+
// Percussive estimate: median in frequency over 9 taps.
|
|
862
|
+
let p = median9(
|
|
863
|
+
mag_at(f,b-4), mag_at(f,b-3), mag_at(f,b-2), mag_at(f,b-1), mag_at(f,b),
|
|
864
|
+
mag_at(f,b+1), mag_at(f,b+2), mag_at(f,b+3), mag_at(f,b+4)
|
|
865
|
+
);
|
|
866
|
+
|
|
867
|
+
let eps: f32 = 1e-12;
|
|
868
|
+
let denom = max(eps, h + p);
|
|
869
|
+
|
|
870
|
+
var mh = h / denom;
|
|
871
|
+
var mp = p / denom;
|
|
872
|
+
|
|
873
|
+
// Optional hard mask (kept for compatibility with CPU options).
|
|
874
|
+
if (params.softMask == 0u) {
|
|
875
|
+
let isH = h >= p;
|
|
876
|
+
mh = select(0.0, 1.0, isH);
|
|
877
|
+
mp = select(1.0, 0.0, isH);
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
let idx = frame * params.nBins + bin;
|
|
881
|
+
harmonicMask[idx] = mh;
|
|
882
|
+
percussiveMask[idx] = mp;
|
|
883
|
+
}
|
|
884
|
+
`
|
|
885
|
+
);
|
|
886
|
+
|
|
887
|
+
// src/gpu/hpssMasks.ts
|
|
888
|
+
async function gpuHpssMasks(gpu, input) {
|
|
889
|
+
const { device } = gpu;
|
|
890
|
+
const { nFrames, nBins, magsFlat, softMask } = input;
|
|
891
|
+
if (magsFlat.length !== nFrames * nBins) {
|
|
892
|
+
throw new Error("@octoseq/mir: magsFlat length mismatch");
|
|
893
|
+
}
|
|
894
|
+
const magsBuffer = createAndWriteStorageBuffer(gpu, magsFlat);
|
|
895
|
+
const outByteLen = byteSizeF32(nFrames * nBins);
|
|
896
|
+
const harmonicOutBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
897
|
+
const percussiveOutBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
898
|
+
const harmonicReadback = createReadbackBuffer(gpu, outByteLen);
|
|
899
|
+
const percussiveReadback = createReadbackBuffer(gpu, outByteLen);
|
|
900
|
+
const shader = device.createShaderModule({ code: hpssMasksWGSL });
|
|
901
|
+
const pipeline = device.createComputePipeline({
|
|
902
|
+
layout: "auto",
|
|
903
|
+
compute: { module: shader, entryPoint: "main" }
|
|
904
|
+
});
|
|
905
|
+
const params = createUniformBufferU32x4(gpu, new Uint32Array([nBins, nFrames, softMask ? 1 : 0, 0]));
|
|
906
|
+
const bindGroup = device.createBindGroup({
|
|
907
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
908
|
+
entries: [
|
|
909
|
+
{ binding: 0, resource: { buffer: magsBuffer } },
|
|
910
|
+
{ binding: 1, resource: { buffer: harmonicOutBuffer } },
|
|
911
|
+
{ binding: 2, resource: { buffer: percussiveOutBuffer } },
|
|
912
|
+
{ binding: 3, resource: { buffer: params } }
|
|
913
|
+
]
|
|
914
|
+
});
|
|
915
|
+
const encoder = device.createCommandEncoder();
|
|
916
|
+
const pass = encoder.beginComputePass();
|
|
917
|
+
pass.setPipeline(pipeline);
|
|
918
|
+
pass.setBindGroup(0, bindGroup);
|
|
919
|
+
const wgX = Math.ceil(nFrames / 16);
|
|
920
|
+
const wgY = Math.ceil(nBins / 16);
|
|
921
|
+
pass.dispatchWorkgroups(wgX, wgY);
|
|
922
|
+
pass.end();
|
|
923
|
+
encoder.copyBufferToBuffer(harmonicOutBuffer, 0, harmonicReadback, 0, outByteLen);
|
|
924
|
+
encoder.copyBufferToBuffer(percussiveOutBuffer, 0, percussiveReadback, 0, outByteLen);
|
|
925
|
+
const tSubmit = nowMs();
|
|
926
|
+
gpu.queue.submit([encoder.finish()]);
|
|
927
|
+
await Promise.all([harmonicReadback.mapAsync(GPUMapMode.READ), percussiveReadback.mapAsync(GPUMapMode.READ)]);
|
|
928
|
+
const tDone = nowMs();
|
|
929
|
+
const hBytes = harmonicReadback.getMappedRange().slice(0);
|
|
930
|
+
const pBytes = percussiveReadback.getMappedRange().slice(0);
|
|
931
|
+
harmonicReadback.unmap();
|
|
932
|
+
percussiveReadback.unmap();
|
|
933
|
+
magsBuffer.destroy();
|
|
934
|
+
harmonicOutBuffer.destroy();
|
|
935
|
+
percussiveOutBuffer.destroy();
|
|
936
|
+
params.destroy();
|
|
937
|
+
harmonicReadback.destroy();
|
|
938
|
+
percussiveReadback.destroy();
|
|
939
|
+
return {
|
|
940
|
+
value: {
|
|
941
|
+
harmonicMaskFlat: new Float32Array(hBytes),
|
|
942
|
+
percussiveMaskFlat: new Float32Array(pBytes)
|
|
943
|
+
},
|
|
944
|
+
timing: {
|
|
945
|
+
gpuSubmitToReadbackMs: tDone - tSubmit
|
|
946
|
+
}
|
|
947
|
+
};
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
// src/dsp/hpssGpu.ts
|
|
951
|
+
function flattenMagnitudes(mags, nFrames, nBins) {
|
|
952
|
+
const flat = new Float32Array(nFrames * nBins);
|
|
953
|
+
for (let t = 0; t < nFrames; t++) {
|
|
954
|
+
const row = mags[t] ?? new Float32Array(nBins);
|
|
955
|
+
flat.set(row, t * nBins);
|
|
956
|
+
}
|
|
957
|
+
return flat;
|
|
958
|
+
}
|
|
959
|
+
function assertFiniteMask(name, v) {
|
|
960
|
+
if (!Number.isFinite(v)) {
|
|
961
|
+
throw new Error(`@octoseq/mir: GPU HPSS produced non-finite ${name}`);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
async function hpssGpu(spec, gpu, options = {}) {
|
|
965
|
+
const nFrames = spec.times.length;
|
|
966
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
967
|
+
if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
|
|
968
|
+
const magsFlat = flattenMagnitudes(spec.magnitudes, nFrames, nBins);
|
|
969
|
+
const soft = options.softMask ?? true;
|
|
970
|
+
const masks = await gpuHpssMasks(gpu, {
|
|
971
|
+
nFrames,
|
|
972
|
+
nBins,
|
|
973
|
+
magsFlat,
|
|
974
|
+
softMask: soft
|
|
975
|
+
});
|
|
976
|
+
if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
|
|
977
|
+
const hMask = masks.value.harmonicMaskFlat;
|
|
978
|
+
const pMask = masks.value.percussiveMaskFlat;
|
|
979
|
+
const harmonic = new Array(nFrames);
|
|
980
|
+
const percussive = new Array(nFrames);
|
|
981
|
+
for (let t = 0; t < nFrames; t++) {
|
|
982
|
+
if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
|
|
983
|
+
const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
|
|
984
|
+
const outH = new Float32Array(nBins);
|
|
985
|
+
const outP = new Float32Array(nBins);
|
|
986
|
+
const base = t * nBins;
|
|
987
|
+
for (let k = 0; k < nBins; k++) {
|
|
988
|
+
const x = mags[k] ?? 0;
|
|
989
|
+
const mh = hMask[base + k] ?? 0;
|
|
990
|
+
const mp = pMask[base + k] ?? 0;
|
|
991
|
+
assertFiniteMask("mask", mh);
|
|
992
|
+
assertFiniteMask("mask", mp);
|
|
993
|
+
const ch = Math.max(0, Math.min(1, mh));
|
|
994
|
+
const cp = Math.max(0, Math.min(1, mp));
|
|
995
|
+
outH[k] = x * ch;
|
|
996
|
+
outP[k] = x * cp;
|
|
997
|
+
}
|
|
998
|
+
harmonic[t] = outH;
|
|
999
|
+
percussive[t] = outP;
|
|
1000
|
+
}
|
|
1001
|
+
return {
|
|
1002
|
+
harmonic: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: harmonic },
|
|
1003
|
+
percussive: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: percussive },
|
|
1004
|
+
gpuMs: masks.timing.gpuSubmitToReadbackMs
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
// src/dsp/spectral.ts
|
|
1009
|
+
function spectralCentroid(spec) {
|
|
1010
|
+
const nFrames = spec.times.length;
|
|
1011
|
+
const out = new Float32Array(nFrames);
|
|
1012
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
1013
|
+
const binHz = spec.sampleRate / spec.fftSize;
|
|
1014
|
+
for (let t = 0; t < nFrames; t++) {
|
|
1015
|
+
const mags = spec.magnitudes[t];
|
|
1016
|
+
if (!mags) {
|
|
1017
|
+
out[t] = 0;
|
|
1018
|
+
continue;
|
|
1019
|
+
}
|
|
1020
|
+
let num = 0;
|
|
1021
|
+
let den = 0;
|
|
1022
|
+
for (let k = 0; k < nBins; k++) {
|
|
1023
|
+
const m = mags[k] ?? 0;
|
|
1024
|
+
const f = k * binHz;
|
|
1025
|
+
num += f * m;
|
|
1026
|
+
den += m;
|
|
1027
|
+
}
|
|
1028
|
+
out[t] = den > 0 ? num / den : 0;
|
|
1029
|
+
}
|
|
1030
|
+
return out;
|
|
1031
|
+
}
|
|
1032
|
+
function spectralFlux(spec) {
|
|
1033
|
+
const nFrames = spec.times.length;
|
|
1034
|
+
const out = new Float32Array(nFrames);
|
|
1035
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
1036
|
+
let prev = null;
|
|
1037
|
+
for (let t = 0; t < nFrames; t++) {
|
|
1038
|
+
const mags = spec.magnitudes[t];
|
|
1039
|
+
if (!mags) {
|
|
1040
|
+
out[t] = 0;
|
|
1041
|
+
prev = null;
|
|
1042
|
+
continue;
|
|
1043
|
+
}
|
|
1044
|
+
let sum = 0;
|
|
1045
|
+
for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
|
|
1046
|
+
if (sum <= 0) {
|
|
1047
|
+
out[t] = 0;
|
|
1048
|
+
prev = null;
|
|
1049
|
+
continue;
|
|
1050
|
+
}
|
|
1051
|
+
const cur = new Float32Array(nBins);
|
|
1052
|
+
const inv = 1 / sum;
|
|
1053
|
+
for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
|
|
1054
|
+
if (!prev) {
|
|
1055
|
+
out[t] = 0;
|
|
1056
|
+
prev = cur;
|
|
1057
|
+
continue;
|
|
1058
|
+
}
|
|
1059
|
+
let flux = 0;
|
|
1060
|
+
for (let k = 0; k < nBins; k++) {
|
|
1061
|
+
const d = (cur[k] ?? 0) - (prev[k] ?? 0);
|
|
1062
|
+
flux += Math.abs(d);
|
|
1063
|
+
}
|
|
1064
|
+
out[t] = flux;
|
|
1065
|
+
prev = cur;
|
|
1066
|
+
}
|
|
1067
|
+
return out;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
// src/dsp/fft.ts
|
|
1071
|
+
function hannWindow(size) {
|
|
1072
|
+
const w = new Float32Array(size);
|
|
1073
|
+
for (let n = 0; n < size; n++) {
|
|
1074
|
+
w[n] = 0.5 - 0.5 * Math.cos(2 * Math.PI * n / size);
|
|
1075
|
+
}
|
|
1076
|
+
return w;
|
|
1077
|
+
}
|
|
1078
|
+
function createFftJsBackend(fftSize) {
|
|
1079
|
+
if (!Number.isFinite(fftSize) || fftSize <= 0 || (fftSize | 0) !== fftSize) {
|
|
1080
|
+
throw new Error("@octoseq/mir: fftSize must be a positive integer");
|
|
1081
|
+
}
|
|
1082
|
+
const fft = new FFT(fftSize);
|
|
1083
|
+
const inReal = new Float32Array(fftSize);
|
|
1084
|
+
const outComplexInterleaved = fft.createComplexArray();
|
|
1085
|
+
const outReal = new Float32Array(fftSize);
|
|
1086
|
+
const outImag = new Float32Array(fftSize);
|
|
1087
|
+
return {
|
|
1088
|
+
fftSize,
|
|
1089
|
+
forwardReal(frame) {
|
|
1090
|
+
if (frame.length !== fftSize) {
|
|
1091
|
+
throw new Error(
|
|
1092
|
+
`@octoseq/mir: FFT input length (${frame.length}) must equal fftSize (${fftSize})`
|
|
1093
|
+
);
|
|
1094
|
+
}
|
|
1095
|
+
inReal.set(frame);
|
|
1096
|
+
fft.realTransform(outComplexInterleaved, inReal);
|
|
1097
|
+
fft.completeSpectrum(outComplexInterleaved);
|
|
1098
|
+
for (let k = 0; k < fftSize; k++) {
|
|
1099
|
+
const re = outComplexInterleaved[2 * k] ?? 0;
|
|
1100
|
+
const im = outComplexInterleaved[2 * k + 1] ?? 0;
|
|
1101
|
+
outReal[k] = re === 0 ? 0 : re;
|
|
1102
|
+
outImag[k] = im === 0 ? 0 : im;
|
|
1103
|
+
}
|
|
1104
|
+
return { real: outReal, imag: outImag };
|
|
1105
|
+
}
|
|
1106
|
+
};
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
// src/dsp/fftBackend.ts
|
|
1110
|
+
var backendCache = /* @__PURE__ */ new Map();
|
|
1111
|
+
function getFftBackend(fftSize) {
|
|
1112
|
+
const existing = backendCache.get(fftSize);
|
|
1113
|
+
if (existing) return existing;
|
|
1114
|
+
const created = createFftJsBackend(fftSize);
|
|
1115
|
+
backendCache.set(fftSize, created);
|
|
1116
|
+
return created;
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
// src/dsp/spectrogram.ts
|
|
1120
|
+
function assertPositiveInt3(name, value) {
|
|
1121
|
+
if (!Number.isFinite(value) || value <= 0 || (value | 0) !== value) {
|
|
1122
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
function mixToMono(audio) {
|
|
1126
|
+
const nCh = audio.numberOfChannels;
|
|
1127
|
+
if (nCh <= 0) {
|
|
1128
|
+
throw new Error("@octoseq/mir: audio.numberOfChannels must be >= 1");
|
|
1129
|
+
}
|
|
1130
|
+
if (nCh === 1) {
|
|
1131
|
+
return audio.getChannelData(0);
|
|
1132
|
+
}
|
|
1133
|
+
const length = audio.getChannelData(0).length;
|
|
1134
|
+
const out = new Float32Array(length);
|
|
1135
|
+
for (let ch = 0; ch < nCh; ch++) {
|
|
1136
|
+
const data = audio.getChannelData(ch);
|
|
1137
|
+
if (data.length !== length) {
|
|
1138
|
+
throw new Error(
|
|
1139
|
+
"@octoseq/mir: all channels must have equal length (AudioBuffer-like invariant)"
|
|
1140
|
+
);
|
|
1141
|
+
}
|
|
1142
|
+
for (let i = 0; i < length; i++) {
|
|
1143
|
+
out[i] = (out[i] ?? 0) + (data[i] ?? 0);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
const inv = 1 / nCh;
|
|
1147
|
+
for (let i = 0; i < length; i++) out[i] = (out[i] ?? 0) * inv;
|
|
1148
|
+
return out;
|
|
1149
|
+
}
|
|
1150
|
+
async function spectrogram(audio, config, gpu, options = {}) {
|
|
1151
|
+
assertPositiveInt3("config.fftSize", config.fftSize);
|
|
1152
|
+
assertPositiveInt3("config.hopSize", config.hopSize);
|
|
1153
|
+
if (config.window !== "hann") {
|
|
1154
|
+
throw new Error(
|
|
1155
|
+
`@octoseq/mir: unsupported window '${config.window}'. v0.1 supports only 'hann'.`
|
|
1156
|
+
);
|
|
1157
|
+
}
|
|
1158
|
+
const fftSize = config.fftSize;
|
|
1159
|
+
if ((fftSize & fftSize - 1) !== 0) {
|
|
1160
|
+
throw new Error("@octoseq/mir: config.fftSize must be a power of two");
|
|
1161
|
+
}
|
|
1162
|
+
const hopSize = config.hopSize;
|
|
1163
|
+
if (hopSize > fftSize) {
|
|
1164
|
+
throw new Error(
|
|
1165
|
+
"@octoseq/mir: config.hopSize must be <= config.fftSize"
|
|
1166
|
+
);
|
|
1167
|
+
}
|
|
1168
|
+
const sr = audio.sampleRate;
|
|
1169
|
+
const mono = mixToMono(audio);
|
|
1170
|
+
const nFrames = Math.max(0, 1 + Math.floor((mono.length - fftSize) / hopSize));
|
|
1171
|
+
const times = new Float32Array(nFrames);
|
|
1172
|
+
const mags = new Array(nFrames);
|
|
1173
|
+
const window = hannWindow(fftSize);
|
|
1174
|
+
const fft = getFftBackend(fftSize);
|
|
1175
|
+
const windowedFrame = new Float32Array(fftSize);
|
|
1176
|
+
let totalFftMs = 0;
|
|
1177
|
+
const nowMs3 = () => typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
1178
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1179
|
+
if (options.isCancelled?.()) {
|
|
1180
|
+
throw new Error("@octoseq/mir: cancelled");
|
|
1181
|
+
}
|
|
1182
|
+
const start = frame * hopSize;
|
|
1183
|
+
times[frame] = (start + fftSize / 2) / sr;
|
|
1184
|
+
for (let i = 0; i < fftSize; i++) {
|
|
1185
|
+
const s = mono[start + i] ?? 0;
|
|
1186
|
+
windowedFrame[i] = s * (window[i] ?? 0);
|
|
1187
|
+
}
|
|
1188
|
+
const t0 = nowMs3();
|
|
1189
|
+
const { real, imag } = fft.forwardReal(windowedFrame);
|
|
1190
|
+
totalFftMs += nowMs3() - t0;
|
|
1191
|
+
const nBins = (fftSize >>> 1) + 1;
|
|
1192
|
+
const out = new Float32Array(nBins);
|
|
1193
|
+
for (let k = 0; k < nBins; k++) {
|
|
1194
|
+
const re = real[k] ?? 0;
|
|
1195
|
+
const im = imag[k] ?? 0;
|
|
1196
|
+
out[k] = Math.hypot(re, im);
|
|
1197
|
+
}
|
|
1198
|
+
mags[frame] = out;
|
|
1199
|
+
}
|
|
1200
|
+
mags.cpuFftTotalMs = totalFftMs;
|
|
1201
|
+
return {
|
|
1202
|
+
sampleRate: sr,
|
|
1203
|
+
fftSize,
|
|
1204
|
+
hopSize,
|
|
1205
|
+
times,
|
|
1206
|
+
magnitudes: mags
|
|
1207
|
+
};
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
// src/runner/runMir.ts
|
|
1211
|
+
function nowMs2() {
|
|
1212
|
+
return typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
1213
|
+
}
|
|
1214
|
+
function asAudioBufferLike(audio) {
|
|
1215
|
+
return {
|
|
1216
|
+
sampleRate: audio.sampleRate,
|
|
1217
|
+
numberOfChannels: 1,
|
|
1218
|
+
getChannelData: () => audio.mono
|
|
1219
|
+
};
|
|
1220
|
+
}
|
|
1221
|
+
async function runMir(audio, request, options = {}) {
|
|
1222
|
+
options = {
|
|
1223
|
+
...options,
|
|
1224
|
+
onset: { ...request.onset, ...options.onset },
|
|
1225
|
+
peakPick: { ...request.peakPick, ...options.peakPick },
|
|
1226
|
+
hpss: { ...request.hpss, ...options.hpss },
|
|
1227
|
+
mfcc: { ...request.mfcc, ...options.mfcc }
|
|
1228
|
+
};
|
|
1229
|
+
const t0 = nowMs2();
|
|
1230
|
+
const backend = request.backend ?? "cpu";
|
|
1231
|
+
const specConfig = request.spectrogram ?? {
|
|
1232
|
+
fftSize: 2048,
|
|
1233
|
+
hopSize: 512,
|
|
1234
|
+
window: "hann"
|
|
1235
|
+
};
|
|
1236
|
+
const cpuStart = nowMs2();
|
|
1237
|
+
const spec = await spectrogram(asAudioBufferLike(audio), specConfig, void 0, {
|
|
1238
|
+
isCancelled: options.isCancelled
|
|
1239
|
+
});
|
|
1240
|
+
const cpuAfterSpec = nowMs2();
|
|
1241
|
+
if (options.isCancelled?.()) {
|
|
1242
|
+
throw new Error("@octoseq/mir: cancelled");
|
|
1243
|
+
}
|
|
1244
|
+
if (request.fn === "spectralCentroid") {
|
|
1245
|
+
const values = spectralCentroid(spec);
|
|
1246
|
+
const cpuEnd = nowMs2();
|
|
1247
|
+
return {
|
|
1248
|
+
kind: "1d",
|
|
1249
|
+
times: spec.times,
|
|
1250
|
+
values,
|
|
1251
|
+
meta: {
|
|
1252
|
+
backend: "cpu",
|
|
1253
|
+
usedGpu: false,
|
|
1254
|
+
timings: {
|
|
1255
|
+
totalMs: cpuEnd - t0,
|
|
1256
|
+
cpuMs: cpuEnd - cpuStart
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
if (request.fn === "spectralFlux") {
|
|
1262
|
+
const values = spectralFlux(spec);
|
|
1263
|
+
const cpuEnd = nowMs2();
|
|
1264
|
+
return {
|
|
1265
|
+
kind: "1d",
|
|
1266
|
+
times: spec.times,
|
|
1267
|
+
values,
|
|
1268
|
+
meta: {
|
|
1269
|
+
backend: "cpu",
|
|
1270
|
+
usedGpu: false,
|
|
1271
|
+
timings: {
|
|
1272
|
+
totalMs: cpuEnd - t0,
|
|
1273
|
+
cpuMs: cpuEnd - cpuStart
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
};
|
|
1277
|
+
}
|
|
1278
|
+
const melConfig = request.mel ?? { nMels: 64 };
|
|
1279
|
+
const computeMel = async (useGpu) => {
|
|
1280
|
+
const melCpuStart = nowMs2();
|
|
1281
|
+
if (useGpu) {
|
|
1282
|
+
if (!options.gpu) {
|
|
1283
|
+
throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
|
|
1284
|
+
}
|
|
1285
|
+
const gpuStart = nowMs2();
|
|
1286
|
+
try {
|
|
1287
|
+
const mel3 = await melSpectrogram(spec, melConfig, options.gpu);
|
|
1288
|
+
const gpuEnd = nowMs2();
|
|
1289
|
+
const gpuKernelMs = mel3.gpuTimings?.gpuSubmitToReadbackMs;
|
|
1290
|
+
return {
|
|
1291
|
+
mel: mel3,
|
|
1292
|
+
usedGpu: true,
|
|
1293
|
+
gpuMs: gpuKernelMs ?? gpuEnd - gpuStart,
|
|
1294
|
+
cpuExtraMs: nowMs2() - melCpuStart - (gpuEnd - gpuStart)
|
|
1295
|
+
};
|
|
1296
|
+
} catch (e) {
|
|
1297
|
+
if (options.strictGpu) throw e;
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
const mel2 = await melSpectrogram(spec, melConfig, void 0);
|
|
1301
|
+
const melCpuEnd = nowMs2();
|
|
1302
|
+
return {
|
|
1303
|
+
mel: mel2,
|
|
1304
|
+
usedGpu: false,
|
|
1305
|
+
cpuExtraMs: melCpuEnd - melCpuStart
|
|
1306
|
+
};
|
|
1307
|
+
};
|
|
1308
|
+
if (request.fn === "melSpectrogram") {
|
|
1309
|
+
const { mel: mel2, usedGpu: usedGpu2, gpuMs: gpuMs2, cpuExtraMs: cpuExtraMs2 } = await computeMel(backend === "gpu");
|
|
1310
|
+
const end2 = nowMs2();
|
|
1311
|
+
return {
|
|
1312
|
+
kind: "2d",
|
|
1313
|
+
times: mel2.times,
|
|
1314
|
+
data: mel2.melBands,
|
|
1315
|
+
meta: {
|
|
1316
|
+
backend: usedGpu2 ? "gpu" : "cpu",
|
|
1317
|
+
usedGpu: usedGpu2,
|
|
1318
|
+
timings: {
|
|
1319
|
+
totalMs: end2 - t0,
|
|
1320
|
+
cpuMs: cpuAfterSpec - cpuStart + cpuExtraMs2,
|
|
1321
|
+
gpuMs: gpuMs2
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
};
|
|
1325
|
+
}
|
|
1326
|
+
if (request.fn === "onsetEnvelope") {
|
|
1327
|
+
if (backend === "gpu") {
|
|
1328
|
+
if (!options.gpu) throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
|
|
1329
|
+
const { mel: mel3, usedGpu: usedGpuForMel, gpuMs: melGpuMs, cpuExtraMs: melCpuMs2 } = await computeMel(true);
|
|
1330
|
+
nowMs2();
|
|
1331
|
+
try {
|
|
1332
|
+
const onsetGpu = await onsetEnvelopeFromMelGpu(mel3, options.gpu, {
|
|
1333
|
+
diffMethod: options.onset?.diffMethod
|
|
1334
|
+
});
|
|
1335
|
+
const end3 = nowMs2();
|
|
1336
|
+
return {
|
|
1337
|
+
kind: "1d",
|
|
1338
|
+
times: onsetGpu.times,
|
|
1339
|
+
values: onsetGpu.values,
|
|
1340
|
+
meta: {
|
|
1341
|
+
backend: "gpu",
|
|
1342
|
+
usedGpu: true,
|
|
1343
|
+
timings: {
|
|
1344
|
+
totalMs: end3 - t0,
|
|
1345
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs2,
|
|
1346
|
+
gpuMs: (melGpuMs ?? 0) + onsetGpu.gpuTimings.gpuSubmitToReadbackMs
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
};
|
|
1350
|
+
} catch (e) {
|
|
1351
|
+
if (options.strictGpu) throw e;
|
|
1352
|
+
} finally {
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
1356
|
+
const onset = onsetEnvelopeFromMel(mel2, {
|
|
1357
|
+
smoothMs: options.onset?.smoothMs,
|
|
1358
|
+
diffMethod: options.onset?.diffMethod,
|
|
1359
|
+
useLog: options.onset?.useLog
|
|
1360
|
+
});
|
|
1361
|
+
const end2 = nowMs2();
|
|
1362
|
+
return {
|
|
1363
|
+
kind: "1d",
|
|
1364
|
+
times: onset.times,
|
|
1365
|
+
values: onset.values,
|
|
1366
|
+
meta: {
|
|
1367
|
+
backend: "cpu",
|
|
1368
|
+
usedGpu: false,
|
|
1369
|
+
timings: {
|
|
1370
|
+
totalMs: end2 - t0,
|
|
1371
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
if (request.fn === "onsetPeaks") {
|
|
1377
|
+
const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
1378
|
+
const onset = onsetEnvelopeFromMel(mel2, {
|
|
1379
|
+
smoothMs: options.onset?.smoothMs,
|
|
1380
|
+
diffMethod: options.onset?.diffMethod,
|
|
1381
|
+
useLog: options.onset?.useLog
|
|
1382
|
+
});
|
|
1383
|
+
const events = peakPick(onset.times, onset.values, {
|
|
1384
|
+
minIntervalSec: options.peakPick?.minIntervalSec,
|
|
1385
|
+
threshold: options.peakPick?.threshold,
|
|
1386
|
+
adaptive: options.peakPick?.adaptiveFactor ? { method: "meanStd", factor: options.peakPick.adaptiveFactor } : void 0
|
|
1387
|
+
});
|
|
1388
|
+
const end2 = nowMs2();
|
|
1389
|
+
return {
|
|
1390
|
+
kind: "events",
|
|
1391
|
+
times: onset.times,
|
|
1392
|
+
events,
|
|
1393
|
+
meta: {
|
|
1394
|
+
backend: "cpu",
|
|
1395
|
+
usedGpu: false,
|
|
1396
|
+
timings: {
|
|
1397
|
+
totalMs: end2 - t0,
|
|
1398
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
};
|
|
1402
|
+
}
|
|
1403
|
+
if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
|
|
1404
|
+
const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
|
|
1405
|
+
const needsHpssSpec = hpssSpecConfig.fftSize !== specConfig.fftSize || hpssSpecConfig.hopSize !== specConfig.hopSize;
|
|
1406
|
+
let hpssSpec;
|
|
1407
|
+
let hpssCpuStart = cpuAfterSpec;
|
|
1408
|
+
if (needsHpssSpec) {
|
|
1409
|
+
hpssCpuStart = nowMs2();
|
|
1410
|
+
hpssSpec = await spectrogram(asAudioBufferLike(audio), hpssSpecConfig, void 0, {
|
|
1411
|
+
isCancelled: options.isCancelled
|
|
1412
|
+
});
|
|
1413
|
+
} else {
|
|
1414
|
+
hpssSpec = spec;
|
|
1415
|
+
}
|
|
1416
|
+
const hpssAfterSpec = nowMs2();
|
|
1417
|
+
if (backend === "gpu") {
|
|
1418
|
+
if (!options.gpu) throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
|
|
1419
|
+
const hpssStart2 = nowMs2();
|
|
1420
|
+
try {
|
|
1421
|
+
const out = await hpssGpu(hpssSpec, options.gpu, {
|
|
1422
|
+
timeMedian: options.hpss?.timeMedian,
|
|
1423
|
+
freqMedian: options.hpss?.freqMedian,
|
|
1424
|
+
softMask: true,
|
|
1425
|
+
// preserve CPU default
|
|
1426
|
+
isCancelled: options.isCancelled
|
|
1427
|
+
});
|
|
1428
|
+
const end3 = nowMs2();
|
|
1429
|
+
const chosen2 = request.fn === "hpssHarmonic" ? out.harmonic : out.percussive;
|
|
1430
|
+
return {
|
|
1431
|
+
kind: "2d",
|
|
1432
|
+
times: chosen2.times,
|
|
1433
|
+
data: chosen2.magnitudes,
|
|
1434
|
+
meta: {
|
|
1435
|
+
backend: "gpu",
|
|
1436
|
+
usedGpu: true,
|
|
1437
|
+
timings: {
|
|
1438
|
+
totalMs: end3 - t0,
|
|
1439
|
+
cpuMs: (needsHpssSpec ? hpssAfterSpec - hpssCpuStart : cpuAfterSpec - cpuStart) + (end3 - hpssStart2 - out.gpuMs),
|
|
1440
|
+
gpuMs: out.gpuMs
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
};
|
|
1444
|
+
} catch (e) {
|
|
1445
|
+
if (options.strictGpu) throw e;
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
const hpssStart = nowMs2();
|
|
1449
|
+
const { harmonic, percussive } = hpss(hpssSpec, {
|
|
1450
|
+
timeMedian: options.hpss?.timeMedian,
|
|
1451
|
+
freqMedian: options.hpss?.freqMedian,
|
|
1452
|
+
isCancelled: options.isCancelled
|
|
1453
|
+
});
|
|
1454
|
+
const end2 = nowMs2();
|
|
1455
|
+
const cpuMs = (needsHpssSpec ? hpssAfterSpec - hpssCpuStart : cpuAfterSpec - cpuStart) + (end2 - hpssStart);
|
|
1456
|
+
const chosen = request.fn === "hpssHarmonic" ? harmonic : percussive;
|
|
1457
|
+
return {
|
|
1458
|
+
kind: "2d",
|
|
1459
|
+
times: chosen.times,
|
|
1460
|
+
data: chosen.magnitudes,
|
|
1461
|
+
meta: {
|
|
1462
|
+
backend: "cpu",
|
|
1463
|
+
usedGpu: false,
|
|
1464
|
+
timings: { totalMs: end2 - t0, cpuMs }
|
|
1465
|
+
}
|
|
1466
|
+
};
|
|
1467
|
+
}
|
|
1468
|
+
if (request.fn === "mfcc" || request.fn === "mfccDelta" || request.fn === "mfccDeltaDelta") {
|
|
1469
|
+
const mfccSpecConfig = options.mfcc?.spectrogram ?? specConfig;
|
|
1470
|
+
const needsMfccSpec = mfccSpecConfig.fftSize !== specConfig.fftSize || mfccSpecConfig.hopSize !== specConfig.hopSize;
|
|
1471
|
+
let mfccMel;
|
|
1472
|
+
let mfccCpuMs;
|
|
1473
|
+
if (needsMfccSpec) {
|
|
1474
|
+
const mfccCpuStart = nowMs2();
|
|
1475
|
+
const mfccSpec = await spectrogram(asAudioBufferLike(audio), mfccSpecConfig, void 0, {
|
|
1476
|
+
isCancelled: options.isCancelled
|
|
1477
|
+
});
|
|
1478
|
+
const mfccMelResult = await melSpectrogram(mfccSpec, melConfig, void 0);
|
|
1479
|
+
mfccMel = mfccMelResult;
|
|
1480
|
+
mfccCpuMs = nowMs2() - mfccCpuStart;
|
|
1481
|
+
} else {
|
|
1482
|
+
const { mel: mel2, cpuExtraMs: cpuExtraMs2 } = await computeMel(false);
|
|
1483
|
+
mfccMel = mel2;
|
|
1484
|
+
mfccCpuMs = cpuAfterSpec - cpuStart + cpuExtraMs2;
|
|
1485
|
+
}
|
|
1486
|
+
const mfccStart = nowMs2();
|
|
1487
|
+
const base = mfcc(mfccMel, { nCoeffs: options.mfcc?.nCoeffs });
|
|
1488
|
+
const features = { times: base.times, values: base.coeffs };
|
|
1489
|
+
const chosen = request.fn === "mfcc" ? features : request.fn === "mfccDelta" ? delta(features) : deltaDelta(features);
|
|
1490
|
+
const end2 = nowMs2();
|
|
1491
|
+
return {
|
|
1492
|
+
kind: "2d",
|
|
1493
|
+
times: chosen.times,
|
|
1494
|
+
data: chosen.values,
|
|
1495
|
+
meta: {
|
|
1496
|
+
backend: "cpu",
|
|
1497
|
+
usedGpu: false,
|
|
1498
|
+
timings: {
|
|
1499
|
+
totalMs: end2 - t0,
|
|
1500
|
+
cpuMs: mfccCpuMs + (end2 - mfccStart)
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
};
|
|
1504
|
+
}
|
|
1505
|
+
const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
|
|
1506
|
+
const end = nowMs2();
|
|
1507
|
+
return {
|
|
1508
|
+
kind: "2d",
|
|
1509
|
+
times: mel.times,
|
|
1510
|
+
data: mel.melBands,
|
|
1511
|
+
meta: {
|
|
1512
|
+
backend: usedGpu ? "gpu" : "cpu",
|
|
1513
|
+
usedGpu,
|
|
1514
|
+
timings: {
|
|
1515
|
+
totalMs: end - t0,
|
|
1516
|
+
cpuMs: cpuAfterSpec - cpuStart + cpuExtraMs,
|
|
1517
|
+
gpuMs
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
};
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
export { delta, deltaDelta, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
|
|
1524
|
+
//# sourceMappingURL=chunk-DUWYCAVG.js.map
|
|
1525
|
+
//# sourceMappingURL=chunk-DUWYCAVG.js.map
|