@octoseq/mir 0.1.0-main.2e286ce → 0.1.0-main.4baa7cd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DUWYCAVG.js → chunk-KIGWMJLC.js} +774 -368
- package/dist/chunk-KIGWMJLC.js.map +1 -0
- package/dist/index.d.ts +115 -4
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/{runMir-CSIBwNZ3.d.ts → runMir-CVEIxPd3.d.ts} +1 -1
- package/dist/runner/runMir.d.ts +2 -2
- package/dist/runner/runMir.js +1 -1
- package/dist/runner/workerProtocol.d.ts +8 -1
- package/dist/runner/workerProtocol.js.map +1 -1
- package/dist/types-4bAZI4F7.d.ts +190 -0
- package/package.json +1 -1
- package/src/dsp/beatCandidates.ts +299 -0
- package/src/dsp/tempoHypotheses.ts +395 -0
- package/src/index.ts +21 -1
- package/src/runner/runMir.ts +72 -0
- package/src/runner/workerProtocol.ts +9 -1
- package/src/types.ts +119 -1
- package/dist/chunk-DUWYCAVG.js.map +0 -1
- package/dist/types-BE3py4fZ.d.ts +0 -83
|
@@ -53,6 +53,646 @@ async function submitAndReadback(gpu, encoder, outBuffer, readback, byteLength)
|
|
|
53
53
|
};
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
+
// src/gpu/kernels/onsetEnvelope.wgsl.ts
|
|
57
|
+
var onsetEnvelopeWGSL = (
|
|
58
|
+
/* wgsl */
|
|
59
|
+
`
|
|
60
|
+
// Compute onset strength envelope from a (log) mel spectrogram.
|
|
61
|
+
//
|
|
62
|
+
// Input layout: melFlat[t*nMels + m]
|
|
63
|
+
// Output layout: out[t]
|
|
64
|
+
//
|
|
65
|
+
// We compute novelty per frame:
|
|
66
|
+
// novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
|
|
67
|
+
// or sum_m abs(...)
|
|
68
|
+
//
|
|
69
|
+
// One invocation computes one frame index (t). This is memory-bound but reduces a full
|
|
70
|
+
// (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
|
|
71
|
+
|
|
72
|
+
struct Params {
|
|
73
|
+
nMels: u32,
|
|
74
|
+
nFrames: u32,
|
|
75
|
+
diffMethod: u32, // 0=rectified, 1=abs
|
|
76
|
+
_pad: u32,
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
@group(0) @binding(0) var<storage, read> melFlat: array<f32>;
|
|
80
|
+
@group(0) @binding(1) var<storage, read_write> out: array<f32>;
|
|
81
|
+
@group(0) @binding(2) var<uniform> params: Params;
|
|
82
|
+
|
|
83
|
+
@compute @workgroup_size(256)
|
|
84
|
+
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
85
|
+
let t = gid.x;
|
|
86
|
+
if (t >= params.nFrames) { return; }
|
|
87
|
+
|
|
88
|
+
if (t == 0u) {
|
|
89
|
+
out[t] = 0.0;
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let nMels = params.nMels;
|
|
94
|
+
var sum: f32 = 0.0;
|
|
95
|
+
|
|
96
|
+
// Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
|
|
97
|
+
// (Future optimisation: parallelise reduction within workgroup.)
|
|
98
|
+
for (var m: u32 = 0u; m < nMels; m = m + 1u) {
|
|
99
|
+
let a = melFlat[t * nMels + m];
|
|
100
|
+
let b = melFlat[(t - 1u) * nMels + m];
|
|
101
|
+
let d = a - b;
|
|
102
|
+
|
|
103
|
+
if (params.diffMethod == 1u) {
|
|
104
|
+
// abs
|
|
105
|
+
sum = sum + abs(d);
|
|
106
|
+
} else {
|
|
107
|
+
// rectified
|
|
108
|
+
sum = sum + max(0.0, d);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
out[t] = sum / max(1.0, f32(nMels));
|
|
113
|
+
}
|
|
114
|
+
`
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
// src/gpu/onsetEnvelope.ts
|
|
118
|
+
async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
|
|
119
|
+
const { device } = gpu;
|
|
120
|
+
const { nFrames, nMels, melFlat, diffMethod } = input;
|
|
121
|
+
if (melFlat.length !== nFrames * nMels) {
|
|
122
|
+
throw new Error("@octoseq/mir: melFlat length mismatch");
|
|
123
|
+
}
|
|
124
|
+
const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
|
|
125
|
+
const outByteLen = byteSizeF32(nFrames);
|
|
126
|
+
const outBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
127
|
+
const readback = createReadbackBuffer(gpu, outByteLen);
|
|
128
|
+
const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
|
|
129
|
+
const pipeline = device.createComputePipeline({
|
|
130
|
+
layout: "auto",
|
|
131
|
+
compute: { module: shader, entryPoint: "main" }
|
|
132
|
+
});
|
|
133
|
+
const diffU32 = diffMethod === "abs" ? 1 : 0;
|
|
134
|
+
const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
|
|
135
|
+
const bindGroup = device.createBindGroup({
|
|
136
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
137
|
+
entries: [
|
|
138
|
+
{ binding: 0, resource: { buffer: melBuffer } },
|
|
139
|
+
{ binding: 1, resource: { buffer: outBuffer } },
|
|
140
|
+
{ binding: 2, resource: { buffer: params } }
|
|
141
|
+
]
|
|
142
|
+
});
|
|
143
|
+
const encoder = device.createCommandEncoder();
|
|
144
|
+
const pass = encoder.beginComputePass();
|
|
145
|
+
pass.setPipeline(pipeline);
|
|
146
|
+
pass.setBindGroup(0, bindGroup);
|
|
147
|
+
const wg = Math.ceil(nFrames / 256);
|
|
148
|
+
pass.dispatchWorkgroups(wg);
|
|
149
|
+
pass.end();
|
|
150
|
+
const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
|
|
151
|
+
melBuffer.destroy();
|
|
152
|
+
outBuffer.destroy();
|
|
153
|
+
params.destroy();
|
|
154
|
+
readback.destroy();
|
|
155
|
+
return {
|
|
156
|
+
value: { out: new Float32Array(bytes) },
|
|
157
|
+
timing
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// src/dsp/onset.ts
|
|
162
|
+
function movingAverage(values, windowFrames) {
|
|
163
|
+
if (windowFrames <= 1) return values;
|
|
164
|
+
const n = values.length;
|
|
165
|
+
const out = new Float32Array(n);
|
|
166
|
+
const half = Math.floor(windowFrames / 2);
|
|
167
|
+
const prefix = new Float64Array(n + 1);
|
|
168
|
+
prefix[0] = 0;
|
|
169
|
+
for (let i = 0; i < n; i++) {
|
|
170
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
171
|
+
}
|
|
172
|
+
for (let i = 0; i < n; i++) {
|
|
173
|
+
const start = Math.max(0, i - half);
|
|
174
|
+
const end = Math.min(n, i + half + 1);
|
|
175
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
176
|
+
const count = Math.max(1, end - start);
|
|
177
|
+
out[i] = sum / count;
|
|
178
|
+
}
|
|
179
|
+
return out;
|
|
180
|
+
}
|
|
181
|
+
function defaultOptions(opts) {
|
|
182
|
+
return {
|
|
183
|
+
useLog: opts?.useLog ?? false,
|
|
184
|
+
smoothMs: opts?.smoothMs ?? 30,
|
|
185
|
+
diffMethod: opts?.diffMethod ?? "rectified"
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
function logCompress(x) {
|
|
189
|
+
return Math.log1p(Math.max(0, x));
|
|
190
|
+
}
|
|
191
|
+
function onsetEnvelopeFromSpectrogram(spec, options) {
|
|
192
|
+
const opts = defaultOptions(options);
|
|
193
|
+
const nFrames = spec.times.length;
|
|
194
|
+
const out = new Float32Array(nFrames);
|
|
195
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
196
|
+
out[0] = 0;
|
|
197
|
+
for (let t = 1; t < nFrames; t++) {
|
|
198
|
+
const cur = spec.magnitudes[t];
|
|
199
|
+
const prev = spec.magnitudes[t - 1];
|
|
200
|
+
if (!cur || !prev) {
|
|
201
|
+
out[t] = 0;
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
let sum = 0;
|
|
205
|
+
for (let k = 0; k < nBins; k++) {
|
|
206
|
+
let a = cur[k] ?? 0;
|
|
207
|
+
let b = prev[k] ?? 0;
|
|
208
|
+
if (opts.useLog) {
|
|
209
|
+
a = logCompress(a);
|
|
210
|
+
b = logCompress(b);
|
|
211
|
+
}
|
|
212
|
+
const d = a - b;
|
|
213
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
214
|
+
}
|
|
215
|
+
out[t] = nBins > 0 ? sum / nBins : 0;
|
|
216
|
+
}
|
|
217
|
+
const smoothMs = opts.smoothMs;
|
|
218
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
219
|
+
const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
|
|
220
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
221
|
+
return {
|
|
222
|
+
times: spec.times,
|
|
223
|
+
values: movingAverage(out, windowFrames | 1)
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return { times: spec.times, values: out };
|
|
227
|
+
}
|
|
228
|
+
function onsetEnvelopeFromMel(mel, options) {
|
|
229
|
+
const opts = defaultOptions(options);
|
|
230
|
+
const nFrames = mel.times.length;
|
|
231
|
+
const out = new Float32Array(nFrames);
|
|
232
|
+
out[0] = 0;
|
|
233
|
+
for (let t = 1; t < nFrames; t++) {
|
|
234
|
+
const cur = mel.melBands[t];
|
|
235
|
+
const prev = mel.melBands[t - 1];
|
|
236
|
+
if (!cur || !prev) {
|
|
237
|
+
out[t] = 0;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
const nBands = cur.length;
|
|
241
|
+
let sum = 0;
|
|
242
|
+
for (let m = 0; m < nBands; m++) {
|
|
243
|
+
let a = cur[m] ?? 0;
|
|
244
|
+
let b = prev[m] ?? 0;
|
|
245
|
+
if (opts.useLog) {
|
|
246
|
+
a = logCompress(a);
|
|
247
|
+
b = logCompress(b);
|
|
248
|
+
}
|
|
249
|
+
const d = a - b;
|
|
250
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
251
|
+
}
|
|
252
|
+
out[t] = nBands > 0 ? sum / nBands : 0;
|
|
253
|
+
}
|
|
254
|
+
const smoothMs = opts.smoothMs;
|
|
255
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
256
|
+
const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
|
|
257
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
258
|
+
return {
|
|
259
|
+
times: mel.times,
|
|
260
|
+
values: movingAverage(out, windowFrames | 1)
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
return { times: mel.times, values: out };
|
|
264
|
+
}
|
|
265
|
+
async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
|
|
266
|
+
const nFrames = mel.times.length;
|
|
267
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
268
|
+
const melFlat = new Float32Array(nFrames * nMels);
|
|
269
|
+
for (let t = 0; t < nFrames; t++) {
|
|
270
|
+
const row = mel.melBands[t];
|
|
271
|
+
if (!row) continue;
|
|
272
|
+
melFlat.set(row, t * nMels);
|
|
273
|
+
}
|
|
274
|
+
const diffMethod = options?.diffMethod ?? "rectified";
|
|
275
|
+
const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
|
|
276
|
+
nFrames,
|
|
277
|
+
nMels,
|
|
278
|
+
melFlat,
|
|
279
|
+
diffMethod
|
|
280
|
+
});
|
|
281
|
+
return {
|
|
282
|
+
times: mel.times,
|
|
283
|
+
values: value.out,
|
|
284
|
+
gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// src/dsp/spectral.ts
|
|
289
|
+
function spectralCentroid(spec) {
|
|
290
|
+
const nFrames = spec.times.length;
|
|
291
|
+
const out = new Float32Array(nFrames);
|
|
292
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
293
|
+
const binHz = spec.sampleRate / spec.fftSize;
|
|
294
|
+
for (let t = 0; t < nFrames; t++) {
|
|
295
|
+
const mags = spec.magnitudes[t];
|
|
296
|
+
if (!mags) {
|
|
297
|
+
out[t] = 0;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
let num = 0;
|
|
301
|
+
let den = 0;
|
|
302
|
+
for (let k = 0; k < nBins; k++) {
|
|
303
|
+
const m = mags[k] ?? 0;
|
|
304
|
+
const f = k * binHz;
|
|
305
|
+
num += f * m;
|
|
306
|
+
den += m;
|
|
307
|
+
}
|
|
308
|
+
out[t] = den > 0 ? num / den : 0;
|
|
309
|
+
}
|
|
310
|
+
return out;
|
|
311
|
+
}
|
|
312
|
+
function spectralFlux(spec) {
|
|
313
|
+
const nFrames = spec.times.length;
|
|
314
|
+
const out = new Float32Array(nFrames);
|
|
315
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
316
|
+
let prev = null;
|
|
317
|
+
for (let t = 0; t < nFrames; t++) {
|
|
318
|
+
const mags = spec.magnitudes[t];
|
|
319
|
+
if (!mags) {
|
|
320
|
+
out[t] = 0;
|
|
321
|
+
prev = null;
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
let sum = 0;
|
|
325
|
+
for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
|
|
326
|
+
if (sum <= 0) {
|
|
327
|
+
out[t] = 0;
|
|
328
|
+
prev = null;
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
const cur = new Float32Array(nBins);
|
|
332
|
+
const inv = 1 / sum;
|
|
333
|
+
for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
|
|
334
|
+
if (!prev) {
|
|
335
|
+
out[t] = 0;
|
|
336
|
+
prev = cur;
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
let flux = 0;
|
|
340
|
+
for (let k = 0; k < nBins; k++) {
|
|
341
|
+
const d = (cur[k] ?? 0) - (prev[k] ?? 0);
|
|
342
|
+
flux += Math.abs(d);
|
|
343
|
+
}
|
|
344
|
+
out[t] = flux;
|
|
345
|
+
prev = cur;
|
|
346
|
+
}
|
|
347
|
+
return out;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// src/dsp/beatCandidates.ts
|
|
351
|
+
function movingAverage2(values, windowFrames) {
|
|
352
|
+
if (windowFrames <= 1) return values;
|
|
353
|
+
const n = values.length;
|
|
354
|
+
const out = new Float32Array(n);
|
|
355
|
+
const half = Math.floor(windowFrames / 2);
|
|
356
|
+
const prefix = new Float64Array(n + 1);
|
|
357
|
+
prefix[0] = 0;
|
|
358
|
+
for (let i = 0; i < n; i++) {
|
|
359
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
360
|
+
}
|
|
361
|
+
for (let i = 0; i < n; i++) {
|
|
362
|
+
const start = Math.max(0, i - half);
|
|
363
|
+
const end = Math.min(n, i + half + 1);
|
|
364
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
365
|
+
const count = Math.max(1, end - start);
|
|
366
|
+
out[i] = sum / count;
|
|
367
|
+
}
|
|
368
|
+
return out;
|
|
369
|
+
}
|
|
370
|
+
function meanStd(values) {
|
|
371
|
+
const n = values.length;
|
|
372
|
+
if (n <= 0) return { mean: 0, std: 0 };
|
|
373
|
+
let mean = 0;
|
|
374
|
+
for (let i = 0; i < n; i++) mean += values[i] ?? 0;
|
|
375
|
+
mean /= n;
|
|
376
|
+
let varSum = 0;
|
|
377
|
+
for (let i = 0; i < n; i++) {
|
|
378
|
+
const d = (values[i] ?? 0) - mean;
|
|
379
|
+
varSum += d * d;
|
|
380
|
+
}
|
|
381
|
+
const std = Math.sqrt(varSum / n);
|
|
382
|
+
return { mean, std };
|
|
383
|
+
}
|
|
384
|
+
function zScoreNormalize(values) {
|
|
385
|
+
const { mean, std } = meanStd(values);
|
|
386
|
+
const n = values.length;
|
|
387
|
+
const out = new Float32Array(n);
|
|
388
|
+
if (std === 0 || !Number.isFinite(std)) {
|
|
389
|
+
out.fill(0);
|
|
390
|
+
return out;
|
|
391
|
+
}
|
|
392
|
+
for (let i = 0; i < n; i++) {
|
|
393
|
+
out[i] = ((values[i] ?? 0) - mean) / std;
|
|
394
|
+
}
|
|
395
|
+
return out;
|
|
396
|
+
}
|
|
397
|
+
function minMaxNormalize(values) {
|
|
398
|
+
const n = values.length;
|
|
399
|
+
if (n === 0) return new Float32Array(0);
|
|
400
|
+
let min = Infinity;
|
|
401
|
+
let max = -Infinity;
|
|
402
|
+
for (let i = 0; i < n; i++) {
|
|
403
|
+
const v = values[i] ?? 0;
|
|
404
|
+
if (v < min) min = v;
|
|
405
|
+
if (v > max) max = v;
|
|
406
|
+
}
|
|
407
|
+
const out = new Float32Array(n);
|
|
408
|
+
const range = max - min;
|
|
409
|
+
if (range === 0 || !Number.isFinite(range)) {
|
|
410
|
+
out.fill(0.5);
|
|
411
|
+
return out;
|
|
412
|
+
}
|
|
413
|
+
for (let i = 0; i < n; i++) {
|
|
414
|
+
out[i] = ((values[i] ?? 0) - min) / range;
|
|
415
|
+
}
|
|
416
|
+
return out;
|
|
417
|
+
}
|
|
418
|
+
function beatSalienceFromMel(mel, spec, options) {
|
|
419
|
+
const smoothMs = options?.smoothMs ?? 50;
|
|
420
|
+
const onset = onsetEnvelopeFromMel(mel, {
|
|
421
|
+
smoothMs,
|
|
422
|
+
diffMethod: "rectified",
|
|
423
|
+
useLog: false
|
|
424
|
+
});
|
|
425
|
+
const flux = spectralFlux(spec);
|
|
426
|
+
const n = Math.min(onset.times.length, flux.length);
|
|
427
|
+
const onsetNorm = zScoreNormalize(onset.values.subarray(0, n));
|
|
428
|
+
const fluxNorm = zScoreNormalize(flux.subarray(0, n));
|
|
429
|
+
const combined = new Float32Array(n);
|
|
430
|
+
const onsetWeight = 0.7;
|
|
431
|
+
const fluxWeight = 0.3;
|
|
432
|
+
for (let i = 0; i < n; i++) {
|
|
433
|
+
combined[i] = onsetWeight * (onsetNorm[i] ?? 0) + fluxWeight * (fluxNorm[i] ?? 0);
|
|
434
|
+
}
|
|
435
|
+
const dt = n >= 2 ? (onset.times[1] ?? 0) - (onset.times[0] ?? 0) : 0.01;
|
|
436
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
437
|
+
const smoothed = movingAverage2(combined, windowFrames | 1);
|
|
438
|
+
const normalized = minMaxNormalize(smoothed);
|
|
439
|
+
return {
|
|
440
|
+
times: onset.times.subarray(0, n),
|
|
441
|
+
values: normalized
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
function pickBeatCandidates(salience, options, source) {
|
|
445
|
+
const minIntervalSec = options.minIntervalSec ?? 0.1;
|
|
446
|
+
const thresholdFactor = options.thresholdFactor ?? 0.5;
|
|
447
|
+
const { times, values } = salience;
|
|
448
|
+
const n = values.length;
|
|
449
|
+
if (n < 3) return [];
|
|
450
|
+
const { mean, std } = meanStd(values);
|
|
451
|
+
const threshold = mean + thresholdFactor * std;
|
|
452
|
+
const candidates = [];
|
|
453
|
+
let lastPeakTime = -Infinity;
|
|
454
|
+
for (let i = 1; i < n - 1; i++) {
|
|
455
|
+
const v = values[i] ?? 0;
|
|
456
|
+
if (v < threshold) continue;
|
|
457
|
+
const prev = values[i - 1] ?? 0;
|
|
458
|
+
const next = values[i + 1] ?? 0;
|
|
459
|
+
if (!(v > prev && v > next)) continue;
|
|
460
|
+
const t = times[i] ?? 0;
|
|
461
|
+
if (t - lastPeakTime < minIntervalSec) {
|
|
462
|
+
const last = candidates[candidates.length - 1];
|
|
463
|
+
if (last && v > last.strength) {
|
|
464
|
+
last.time = t;
|
|
465
|
+
last.strength = v;
|
|
466
|
+
}
|
|
467
|
+
continue;
|
|
468
|
+
}
|
|
469
|
+
candidates.push({
|
|
470
|
+
time: t,
|
|
471
|
+
strength: v,
|
|
472
|
+
source
|
|
473
|
+
});
|
|
474
|
+
lastPeakTime = t;
|
|
475
|
+
}
|
|
476
|
+
return candidates;
|
|
477
|
+
}
|
|
478
|
+
function detectBeatCandidates(mel, spec, options) {
|
|
479
|
+
const opts = {
|
|
480
|
+
minIntervalSec: options?.minIntervalSec ?? 0.1,
|
|
481
|
+
thresholdFactor: options?.thresholdFactor ?? 0.5,
|
|
482
|
+
smoothMs: options?.smoothMs ?? 50
|
|
483
|
+
};
|
|
484
|
+
const salience = beatSalienceFromMel(mel, spec, { smoothMs: opts.smoothMs });
|
|
485
|
+
const candidates = pickBeatCandidates(salience, opts, "combined");
|
|
486
|
+
return {
|
|
487
|
+
candidates,
|
|
488
|
+
salience
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// src/dsp/tempoHypotheses.ts
|
|
493
|
+
function intervalToBpm(intervalSec) {
|
|
494
|
+
return 60 / intervalSec;
|
|
495
|
+
}
|
|
496
|
+
function bpmToInterval(bpm) {
|
|
497
|
+
return 60 / bpm;
|
|
498
|
+
}
|
|
499
|
+
function computeIOIs(candidates, weightByStrength) {
|
|
500
|
+
if (candidates.length < 2) return [];
|
|
501
|
+
const iois = [];
|
|
502
|
+
const sorted = [...candidates].sort((a, b) => a.time - b.time);
|
|
503
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
504
|
+
const prev = sorted[i - 1];
|
|
505
|
+
const curr = sorted[i];
|
|
506
|
+
const interval = curr.time - prev.time;
|
|
507
|
+
if (interval <= 0) continue;
|
|
508
|
+
const weight = weightByStrength ? Math.sqrt(prev.strength * curr.strength) : 1;
|
|
509
|
+
iois.push({ intervalSec: interval, weight });
|
|
510
|
+
}
|
|
511
|
+
return iois;
|
|
512
|
+
}
|
|
513
|
+
function buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm) {
|
|
514
|
+
const numBins = Math.ceil((maxBpm - minBpm) / binSizeBpm);
|
|
515
|
+
const counts = new Float32Array(numBins);
|
|
516
|
+
const bpmBins = new Float32Array(numBins);
|
|
517
|
+
for (let i = 0; i < numBins; i++) {
|
|
518
|
+
bpmBins[i] = minBpm + (i + 0.5) * binSizeBpm;
|
|
519
|
+
}
|
|
520
|
+
const minInterval = bpmToInterval(maxBpm);
|
|
521
|
+
const maxInterval = bpmToInterval(minBpm);
|
|
522
|
+
for (const { intervalSec, weight } of iois) {
|
|
523
|
+
if (intervalSec < minInterval || intervalSec > maxInterval) continue;
|
|
524
|
+
const bpm = intervalToBpm(intervalSec);
|
|
525
|
+
const binIndex = Math.floor((bpm - minBpm) / binSizeBpm);
|
|
526
|
+
if (binIndex >= 0 && binIndex < numBins) {
|
|
527
|
+
counts[binIndex] = (counts[binIndex] ?? 0) + weight;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
return { bpmBins, counts };
|
|
531
|
+
}
|
|
532
|
+
function findHistogramPeaks(counts, minHeight) {
|
|
533
|
+
const peaks = [];
|
|
534
|
+
for (let i = 1; i < counts.length - 1; i++) {
|
|
535
|
+
const curr = counts[i];
|
|
536
|
+
const prev = counts[i - 1];
|
|
537
|
+
const next = counts[i + 1];
|
|
538
|
+
if (curr > prev && curr > next && curr >= minHeight) {
|
|
539
|
+
peaks.push({ index: i, height: curr });
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
if (counts.length > 0 && counts[0] >= minHeight && counts[0] > (counts[1] ?? 0)) {
|
|
543
|
+
peaks.push({ index: 0, height: counts[0] });
|
|
544
|
+
}
|
|
545
|
+
if (counts.length > 1) {
|
|
546
|
+
const last = counts.length - 1;
|
|
547
|
+
if (counts[last] >= minHeight && counts[last] > (counts[last - 1] ?? 0)) {
|
|
548
|
+
peaks.push({ index: last, height: counts[last] });
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
peaks.sort((a, b) => b.height - a.height);
|
|
552
|
+
return peaks.map((p) => p.index);
|
|
553
|
+
}
|
|
554
|
+
function refinePeakBpm(peakIndex, bpmBins, counts, binSizeBpm) {
|
|
555
|
+
let totalWeight = 0;
|
|
556
|
+
let weightedBpm = 0;
|
|
557
|
+
let minBinBpm = bpmBins[peakIndex] - binSizeBpm / 2;
|
|
558
|
+
let maxBinBpm = bpmBins[peakIndex] + binSizeBpm / 2;
|
|
559
|
+
for (let offset = -1; offset <= 1; offset++) {
|
|
560
|
+
const idx = peakIndex + offset;
|
|
561
|
+
if (idx < 0 || idx >= bpmBins.length) continue;
|
|
562
|
+
const w = counts[idx];
|
|
563
|
+
const bpm = bpmBins[idx];
|
|
564
|
+
totalWeight += w;
|
|
565
|
+
weightedBpm += w * bpm;
|
|
566
|
+
if (w > 0) {
|
|
567
|
+
minBinBpm = Math.min(minBinBpm, bpm - binSizeBpm / 2);
|
|
568
|
+
maxBinBpm = Math.max(maxBinBpm, bpm + binSizeBpm / 2);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
const refinedBpm = totalWeight > 0 ? weightedBpm / totalWeight : bpmBins[peakIndex];
|
|
572
|
+
return {
|
|
573
|
+
bpm: refinedBpm,
|
|
574
|
+
peakHeight: counts[peakIndex],
|
|
575
|
+
binRange: [minBinBpm, maxBinBpm],
|
|
576
|
+
totalWeight
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
function getHarmonicRatio(bpm1, bpm2, tolerance = 0.03) {
|
|
580
|
+
const ratios = [0.5, 1 / 3, 2 / 3, 1, 1.5, 2, 3];
|
|
581
|
+
for (const ratio of ratios) {
|
|
582
|
+
const expected = bpm1 * ratio;
|
|
583
|
+
const relativeError = Math.abs(bpm2 - expected) / expected;
|
|
584
|
+
if (relativeError <= tolerance) {
|
|
585
|
+
return ratio;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
return null;
|
|
589
|
+
}
|
|
590
|
+
function assignHarmonicFamilies(hypotheses) {
|
|
591
|
+
if (hypotheses.length === 0) return;
|
|
592
|
+
const families = /* @__PURE__ */ new Map();
|
|
593
|
+
for (const hyp of hypotheses) {
|
|
594
|
+
let foundFamily = false;
|
|
595
|
+
for (const [familyId, family] of families) {
|
|
596
|
+
const ratio = getHarmonicRatio(family.rootBpm, hyp.bpm);
|
|
597
|
+
if (ratio !== null) {
|
|
598
|
+
hyp.familyId = familyId;
|
|
599
|
+
hyp.harmonicRatio = ratio;
|
|
600
|
+
family.members.push(hyp);
|
|
601
|
+
foundFamily = true;
|
|
602
|
+
break;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
if (!foundFamily) {
|
|
606
|
+
const familyId = `fam-${Math.round(hyp.bpm)}`;
|
|
607
|
+
hyp.familyId = familyId;
|
|
608
|
+
hyp.harmonicRatio = 1;
|
|
609
|
+
families.set(familyId, { rootBpm: hyp.bpm, members: [hyp] });
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
function normalizeConfidence(hypotheses) {
|
|
614
|
+
if (hypotheses.length === 0) return;
|
|
615
|
+
const maxHeight = Math.max(...hypotheses.map((h) => h.evidence.peakHeight));
|
|
616
|
+
if (maxHeight <= 0) return;
|
|
617
|
+
for (const hyp of hypotheses) {
|
|
618
|
+
hyp.confidence = hyp.evidence.peakHeight / maxHeight;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
function generateTempoHypotheses(candidates, options) {
|
|
622
|
+
const minBpm = options?.minBpm ?? 24;
|
|
623
|
+
const maxBpm = options?.maxBpm ?? 300;
|
|
624
|
+
const binSizeBpm = options?.binSizeBpm ?? 1;
|
|
625
|
+
const maxHypotheses = options?.maxHypotheses ?? 10;
|
|
626
|
+
const minConfidence = options?.minConfidence ?? 0.05;
|
|
627
|
+
const weightByStrength = options?.weightByStrength ?? true;
|
|
628
|
+
const includeHistogram = options?.includeHistogram ?? false;
|
|
629
|
+
if (candidates.length < 2) {
|
|
630
|
+
return {
|
|
631
|
+
hypotheses: [],
|
|
632
|
+
inputCandidateCount: candidates.length,
|
|
633
|
+
histogram: includeHistogram ? {
|
|
634
|
+
bpmBins: new Float32Array(0),
|
|
635
|
+
counts: new Float32Array(0)
|
|
636
|
+
} : void 0
|
|
637
|
+
};
|
|
638
|
+
}
|
|
639
|
+
const iois = computeIOIs(candidates, weightByStrength);
|
|
640
|
+
if (iois.length === 0) {
|
|
641
|
+
return {
|
|
642
|
+
hypotheses: [],
|
|
643
|
+
inputCandidateCount: candidates.length,
|
|
644
|
+
histogram: includeHistogram ? {
|
|
645
|
+
bpmBins: new Float32Array(0),
|
|
646
|
+
counts: new Float32Array(0)
|
|
647
|
+
} : void 0
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
const { bpmBins, counts } = buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm);
|
|
651
|
+
const maxCount = Math.max(...counts);
|
|
652
|
+
const minHeight = maxCount * minConfidence;
|
|
653
|
+
const peakIndices = findHistogramPeaks(counts, minHeight);
|
|
654
|
+
const hypotheses = [];
|
|
655
|
+
for (const peakIndex of peakIndices.slice(0, maxHypotheses * 2)) {
|
|
656
|
+
const { bpm, peakHeight, binRange, totalWeight } = refinePeakBpm(
|
|
657
|
+
peakIndex,
|
|
658
|
+
bpmBins,
|
|
659
|
+
counts,
|
|
660
|
+
binSizeBpm
|
|
661
|
+
);
|
|
662
|
+
if (maxCount > 0 && peakHeight / maxCount < minConfidence) continue;
|
|
663
|
+
const evidence = {
|
|
664
|
+
supportingIntervalCount: Math.round(totalWeight),
|
|
665
|
+
weightedSupport: totalWeight,
|
|
666
|
+
peakHeight,
|
|
667
|
+
binRange
|
|
668
|
+
};
|
|
669
|
+
hypotheses.push({
|
|
670
|
+
id: "",
|
|
671
|
+
// Will be assigned after sorting
|
|
672
|
+
bpm: Math.round(bpm * 10) / 10,
|
|
673
|
+
// Round to 0.1 BPM precision
|
|
674
|
+
confidence: 0,
|
|
675
|
+
// Will be normalized
|
|
676
|
+
evidence,
|
|
677
|
+
familyId: "",
|
|
678
|
+
// Will be assigned
|
|
679
|
+
harmonicRatio: 1
|
|
680
|
+
// Will be assigned
|
|
681
|
+
});
|
|
682
|
+
}
|
|
683
|
+
assignHarmonicFamilies(hypotheses);
|
|
684
|
+
normalizeConfidence(hypotheses);
|
|
685
|
+
const filtered = hypotheses.filter((h) => h.confidence >= minConfidence).sort((a, b) => b.confidence - a.confidence).slice(0, maxHypotheses);
|
|
686
|
+
for (let i = 0; i < filtered.length; i++) {
|
|
687
|
+
filtered[i].id = `hyp-${i}`;
|
|
688
|
+
}
|
|
689
|
+
return {
|
|
690
|
+
hypotheses: filtered,
|
|
691
|
+
inputCandidateCount: candidates.length,
|
|
692
|
+
histogram: includeHistogram ? { bpmBins, counts } : void 0
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
|
|
56
696
|
// src/gpu/kernels/melProject.wgsl.ts
|
|
57
697
|
var melProjectWGSL = (
|
|
58
698
|
/* wgsl */
|
|
@@ -275,324 +915,92 @@ async function melSpectrogram(spec, config, gpu) {
|
|
|
275
915
|
config.nMels,
|
|
276
916
|
fMin,
|
|
277
917
|
fMax
|
|
278
|
-
);
|
|
279
|
-
if (gpu) {
|
|
280
|
-
try {
|
|
281
|
-
return await gpuMelProject(spec, filters, gpu);
|
|
282
|
-
} catch {
|
|
283
|
-
return cpuMelProject(spec, filters);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
return cpuMelProject(spec, filters);
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
// src/dsp/mfcc.ts
|
|
290
|
-
function assertPositiveInt2(name, v) {
|
|
291
|
-
if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
|
|
292
|
-
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
function buildDctMatrix(nCoeffs, nMels) {
|
|
296
|
-
const out = new Float32Array(nCoeffs * nMels);
|
|
297
|
-
const scale0 = Math.sqrt(1 / nMels);
|
|
298
|
-
const scale = Math.sqrt(2 / nMels);
|
|
299
|
-
for (let i = 0; i < nCoeffs; i++) {
|
|
300
|
-
for (let j = 0; j < nMels; j++) {
|
|
301
|
-
const c = Math.cos(Math.PI / nMels * (j + 0.5) * i);
|
|
302
|
-
out[i * nMels + j] = (i === 0 ? scale0 : scale) * c;
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
return out;
|
|
306
|
-
}
|
|
307
|
-
function mfcc(mel, options = {}) {
|
|
308
|
-
const nFrames = mel.times.length;
|
|
309
|
-
const nMels = mel.melBands[0]?.length ?? 0;
|
|
310
|
-
const nCoeffs = options.nCoeffs ?? 13;
|
|
311
|
-
assertPositiveInt2("options.nCoeffs", nCoeffs);
|
|
312
|
-
if (nMels <= 0) {
|
|
313
|
-
return { times: mel.times, coeffs: new Array(nFrames).fill(0).map(() => new Float32Array(nCoeffs)) };
|
|
314
|
-
}
|
|
315
|
-
const dct = buildDctMatrix(nCoeffs, nMels);
|
|
316
|
-
const out = new Array(nFrames);
|
|
317
|
-
for (let t = 0; t < nFrames; t++) {
|
|
318
|
-
const x = mel.melBands[t] ?? new Float32Array(nMels);
|
|
319
|
-
const c = new Float32Array(nCoeffs);
|
|
320
|
-
for (let i = 0; i < nCoeffs; i++) {
|
|
321
|
-
let sum = 0;
|
|
322
|
-
const rowOff = i * nMels;
|
|
323
|
-
for (let j = 0; j < nMels; j++) {
|
|
324
|
-
sum += (dct[rowOff + j] ?? 0) * (x[j] ?? 0);
|
|
325
|
-
}
|
|
326
|
-
c[i] = sum;
|
|
327
|
-
}
|
|
328
|
-
out[t] = c;
|
|
329
|
-
}
|
|
330
|
-
return { times: mel.times, coeffs: out };
|
|
331
|
-
}
|
|
332
|
-
function delta(features, options = {}) {
|
|
333
|
-
const N = options.window ?? 2;
|
|
334
|
-
assertPositiveInt2("options.window", N);
|
|
335
|
-
const nFrames = features.times.length;
|
|
336
|
-
const nFeat = features.values[0]?.length ?? 0;
|
|
337
|
-
const out = new Array(nFrames);
|
|
338
|
-
let denom = 0;
|
|
339
|
-
for (let n = 1; n <= N; n++) denom += n * n;
|
|
340
|
-
denom *= 2;
|
|
341
|
-
for (let t = 0; t < nFrames; t++) {
|
|
342
|
-
const d = new Float32Array(nFeat);
|
|
343
|
-
for (let f = 0; f < nFeat; f++) {
|
|
344
|
-
let num = 0;
|
|
345
|
-
for (let n = 1; n <= N; n++) {
|
|
346
|
-
const tPlus = Math.min(nFrames - 1, t + n);
|
|
347
|
-
const tMinus = Math.max(0, t - n);
|
|
348
|
-
const a = features.values[tPlus]?.[f] ?? 0;
|
|
349
|
-
const b = features.values[tMinus]?.[f] ?? 0;
|
|
350
|
-
num += n * (a - b);
|
|
351
|
-
}
|
|
352
|
-
d[f] = denom > 0 ? num / denom : 0;
|
|
353
|
-
}
|
|
354
|
-
out[t] = d;
|
|
355
|
-
}
|
|
356
|
-
return { times: features.times, values: out };
|
|
357
|
-
}
|
|
358
|
-
function deltaDelta(features, options = {}) {
|
|
359
|
-
return delta(delta(features, options), options);
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
// src/gpu/kernels/onsetEnvelope.wgsl.ts
|
|
363
|
-
var onsetEnvelopeWGSL = (
|
|
364
|
-
/* wgsl */
|
|
365
|
-
`
|
|
366
|
-
// Compute onset strength envelope from a (log) mel spectrogram.
|
|
367
|
-
//
|
|
368
|
-
// Input layout: melFlat[t*nMels + m]
|
|
369
|
-
// Output layout: out[t]
|
|
370
|
-
//
|
|
371
|
-
// We compute novelty per frame:
|
|
372
|
-
// novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
|
|
373
|
-
// or sum_m abs(...)
|
|
374
|
-
//
|
|
375
|
-
// One invocation computes one frame index (t). This is memory-bound but reduces a full
|
|
376
|
-
// (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
|
|
377
|
-
|
|
378
|
-
struct Params {
|
|
379
|
-
nMels: u32,
|
|
380
|
-
nFrames: u32,
|
|
381
|
-
diffMethod: u32, // 0=rectified, 1=abs
|
|
382
|
-
_pad: u32,
|
|
383
|
-
};
|
|
384
|
-
|
|
385
|
-
@group(0) @binding(0) var<storage, read> melFlat: array<f32>;
|
|
386
|
-
@group(0) @binding(1) var<storage, read_write> out: array<f32>;
|
|
387
|
-
@group(0) @binding(2) var<uniform> params: Params;
|
|
388
|
-
|
|
389
|
-
@compute @workgroup_size(256)
|
|
390
|
-
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
391
|
-
let t = gid.x;
|
|
392
|
-
if (t >= params.nFrames) { return; }
|
|
393
|
-
|
|
394
|
-
if (t == 0u) {
|
|
395
|
-
out[t] = 0.0;
|
|
396
|
-
return;
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
let nMels = params.nMels;
|
|
400
|
-
var sum: f32 = 0.0;
|
|
401
|
-
|
|
402
|
-
// Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
|
|
403
|
-
// (Future optimisation: parallelise reduction within workgroup.)
|
|
404
|
-
for (var m: u32 = 0u; m < nMels; m = m + 1u) {
|
|
405
|
-
let a = melFlat[t * nMels + m];
|
|
406
|
-
let b = melFlat[(t - 1u) * nMels + m];
|
|
407
|
-
let d = a - b;
|
|
408
|
-
|
|
409
|
-
if (params.diffMethod == 1u) {
|
|
410
|
-
// abs
|
|
411
|
-
sum = sum + abs(d);
|
|
412
|
-
} else {
|
|
413
|
-
// rectified
|
|
414
|
-
sum = sum + max(0.0, d);
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
out[t] = sum / max(1.0, f32(nMels));
|
|
419
|
-
}
|
|
420
|
-
`
|
|
421
|
-
);
|
|
422
|
-
|
|
423
|
-
// src/gpu/onsetEnvelope.ts
|
|
424
|
-
async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
|
|
425
|
-
const { device } = gpu;
|
|
426
|
-
const { nFrames, nMels, melFlat, diffMethod } = input;
|
|
427
|
-
if (melFlat.length !== nFrames * nMels) {
|
|
428
|
-
throw new Error("@octoseq/mir: melFlat length mismatch");
|
|
429
|
-
}
|
|
430
|
-
const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
|
|
431
|
-
const outByteLen = byteSizeF32(nFrames);
|
|
432
|
-
const outBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
433
|
-
const readback = createReadbackBuffer(gpu, outByteLen);
|
|
434
|
-
const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
|
|
435
|
-
const pipeline = device.createComputePipeline({
|
|
436
|
-
layout: "auto",
|
|
437
|
-
compute: { module: shader, entryPoint: "main" }
|
|
438
|
-
});
|
|
439
|
-
const diffU32 = diffMethod === "abs" ? 1 : 0;
|
|
440
|
-
const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
|
|
441
|
-
const bindGroup = device.createBindGroup({
|
|
442
|
-
layout: pipeline.getBindGroupLayout(0),
|
|
443
|
-
entries: [
|
|
444
|
-
{ binding: 0, resource: { buffer: melBuffer } },
|
|
445
|
-
{ binding: 1, resource: { buffer: outBuffer } },
|
|
446
|
-
{ binding: 2, resource: { buffer: params } }
|
|
447
|
-
]
|
|
448
|
-
});
|
|
449
|
-
const encoder = device.createCommandEncoder();
|
|
450
|
-
const pass = encoder.beginComputePass();
|
|
451
|
-
pass.setPipeline(pipeline);
|
|
452
|
-
pass.setBindGroup(0, bindGroup);
|
|
453
|
-
const wg = Math.ceil(nFrames / 256);
|
|
454
|
-
pass.dispatchWorkgroups(wg);
|
|
455
|
-
pass.end();
|
|
456
|
-
const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
|
|
457
|
-
melBuffer.destroy();
|
|
458
|
-
outBuffer.destroy();
|
|
459
|
-
params.destroy();
|
|
460
|
-
readback.destroy();
|
|
461
|
-
return {
|
|
462
|
-
value: { out: new Float32Array(bytes) },
|
|
463
|
-
timing
|
|
464
|
-
};
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
// src/dsp/onset.ts
|
|
468
|
-
function movingAverage(values, windowFrames) {
|
|
469
|
-
if (windowFrames <= 1) return values;
|
|
470
|
-
const n = values.length;
|
|
471
|
-
const out = new Float32Array(n);
|
|
472
|
-
const half = Math.floor(windowFrames / 2);
|
|
473
|
-
const prefix = new Float64Array(n + 1);
|
|
474
|
-
prefix[0] = 0;
|
|
475
|
-
for (let i = 0; i < n; i++) {
|
|
476
|
-
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
477
|
-
}
|
|
478
|
-
for (let i = 0; i < n; i++) {
|
|
479
|
-
const start = Math.max(0, i - half);
|
|
480
|
-
const end = Math.min(n, i + half + 1);
|
|
481
|
-
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
482
|
-
const count = Math.max(1, end - start);
|
|
483
|
-
out[i] = sum / count;
|
|
918
|
+
);
|
|
919
|
+
if (gpu) {
|
|
920
|
+
try {
|
|
921
|
+
return await gpuMelProject(spec, filters, gpu);
|
|
922
|
+
} catch {
|
|
923
|
+
return cpuMelProject(spec, filters);
|
|
924
|
+
}
|
|
484
925
|
}
|
|
485
|
-
return
|
|
486
|
-
}
|
|
487
|
-
function defaultOptions(opts) {
|
|
488
|
-
return {
|
|
489
|
-
useLog: opts?.useLog ?? false,
|
|
490
|
-
smoothMs: opts?.smoothMs ?? 30,
|
|
491
|
-
diffMethod: opts?.diffMethod ?? "rectified"
|
|
492
|
-
};
|
|
926
|
+
return cpuMelProject(spec, filters);
|
|
493
927
|
}
|
|
494
|
-
|
|
495
|
-
|
|
928
|
+
|
|
929
|
+
// src/dsp/mfcc.ts
|
|
930
|
+
function assertPositiveInt2(name, v) {
|
|
931
|
+
if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
|
|
932
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
933
|
+
}
|
|
496
934
|
}
|
|
497
|
-
function
|
|
498
|
-
const
|
|
499
|
-
const
|
|
500
|
-
const
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
const prev = spec.magnitudes[t - 1];
|
|
506
|
-
if (!cur || !prev) {
|
|
507
|
-
out[t] = 0;
|
|
508
|
-
continue;
|
|
509
|
-
}
|
|
510
|
-
let sum = 0;
|
|
511
|
-
for (let k = 0; k < nBins; k++) {
|
|
512
|
-
let a = cur[k] ?? 0;
|
|
513
|
-
let b = prev[k] ?? 0;
|
|
514
|
-
if (opts.useLog) {
|
|
515
|
-
a = logCompress(a);
|
|
516
|
-
b = logCompress(b);
|
|
517
|
-
}
|
|
518
|
-
const d = a - b;
|
|
519
|
-
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
935
|
+
function buildDctMatrix(nCoeffs, nMels) {
|
|
936
|
+
const out = new Float32Array(nCoeffs * nMels);
|
|
937
|
+
const scale0 = Math.sqrt(1 / nMels);
|
|
938
|
+
const scale = Math.sqrt(2 / nMels);
|
|
939
|
+
for (let i = 0; i < nCoeffs; i++) {
|
|
940
|
+
for (let j = 0; j < nMels; j++) {
|
|
941
|
+
const c = Math.cos(Math.PI / nMels * (j + 0.5) * i);
|
|
942
|
+
out[i * nMels + j] = (i === 0 ? scale0 : scale) * c;
|
|
520
943
|
}
|
|
521
|
-
out[t] = nBins > 0 ? sum / nBins : 0;
|
|
522
|
-
}
|
|
523
|
-
const smoothMs = opts.smoothMs;
|
|
524
|
-
if (smoothMs > 0 && nFrames >= 2) {
|
|
525
|
-
const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
|
|
526
|
-
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
527
|
-
return {
|
|
528
|
-
times: spec.times,
|
|
529
|
-
values: movingAverage(out, windowFrames | 1)
|
|
530
|
-
};
|
|
531
944
|
}
|
|
532
|
-
return
|
|
945
|
+
return out;
|
|
533
946
|
}
|
|
534
|
-
function
|
|
535
|
-
const opts = defaultOptions(options);
|
|
947
|
+
function mfcc(mel, options = {}) {
|
|
536
948
|
const nFrames = mel.times.length;
|
|
537
|
-
const
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
const
|
|
547
|
-
|
|
548
|
-
for (let
|
|
549
|
-
let
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
b = logCompress(b);
|
|
949
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
950
|
+
const nCoeffs = options.nCoeffs ?? 13;
|
|
951
|
+
assertPositiveInt2("options.nCoeffs", nCoeffs);
|
|
952
|
+
if (nMels <= 0) {
|
|
953
|
+
return { times: mel.times, coeffs: new Array(nFrames).fill(0).map(() => new Float32Array(nCoeffs)) };
|
|
954
|
+
}
|
|
955
|
+
const dct = buildDctMatrix(nCoeffs, nMels);
|
|
956
|
+
const out = new Array(nFrames);
|
|
957
|
+
for (let t = 0; t < nFrames; t++) {
|
|
958
|
+
const x = mel.melBands[t] ?? new Float32Array(nMels);
|
|
959
|
+
const c = new Float32Array(nCoeffs);
|
|
960
|
+
for (let i = 0; i < nCoeffs; i++) {
|
|
961
|
+
let sum = 0;
|
|
962
|
+
const rowOff = i * nMels;
|
|
963
|
+
for (let j = 0; j < nMels; j++) {
|
|
964
|
+
sum += (dct[rowOff + j] ?? 0) * (x[j] ?? 0);
|
|
554
965
|
}
|
|
555
|
-
|
|
556
|
-
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
966
|
+
c[i] = sum;
|
|
557
967
|
}
|
|
558
|
-
out[t] =
|
|
559
|
-
}
|
|
560
|
-
const smoothMs = opts.smoothMs;
|
|
561
|
-
if (smoothMs > 0 && nFrames >= 2) {
|
|
562
|
-
const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
|
|
563
|
-
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
564
|
-
return {
|
|
565
|
-
times: mel.times,
|
|
566
|
-
values: movingAverage(out, windowFrames | 1)
|
|
567
|
-
};
|
|
968
|
+
out[t] = c;
|
|
568
969
|
}
|
|
569
|
-
return { times: mel.times,
|
|
970
|
+
return { times: mel.times, coeffs: out };
|
|
570
971
|
}
|
|
571
|
-
|
|
572
|
-
const
|
|
573
|
-
|
|
574
|
-
const
|
|
972
|
+
function delta(features, options = {}) {
|
|
973
|
+
const N = options.window ?? 2;
|
|
974
|
+
assertPositiveInt2("options.window", N);
|
|
975
|
+
const nFrames = features.times.length;
|
|
976
|
+
const nFeat = features.values[0]?.length ?? 0;
|
|
977
|
+
const out = new Array(nFrames);
|
|
978
|
+
let denom = 0;
|
|
979
|
+
for (let n = 1; n <= N; n++) denom += n * n;
|
|
980
|
+
denom *= 2;
|
|
575
981
|
for (let t = 0; t < nFrames; t++) {
|
|
576
|
-
const
|
|
577
|
-
|
|
578
|
-
|
|
982
|
+
const d = new Float32Array(nFeat);
|
|
983
|
+
for (let f = 0; f < nFeat; f++) {
|
|
984
|
+
let num = 0;
|
|
985
|
+
for (let n = 1; n <= N; n++) {
|
|
986
|
+
const tPlus = Math.min(nFrames - 1, t + n);
|
|
987
|
+
const tMinus = Math.max(0, t - n);
|
|
988
|
+
const a = features.values[tPlus]?.[f] ?? 0;
|
|
989
|
+
const b = features.values[tMinus]?.[f] ?? 0;
|
|
990
|
+
num += n * (a - b);
|
|
991
|
+
}
|
|
992
|
+
d[f] = denom > 0 ? num / denom : 0;
|
|
993
|
+
}
|
|
994
|
+
out[t] = d;
|
|
579
995
|
}
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
melFlat,
|
|
585
|
-
diffMethod
|
|
586
|
-
});
|
|
587
|
-
return {
|
|
588
|
-
times: mel.times,
|
|
589
|
-
values: value.out,
|
|
590
|
-
gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
|
|
591
|
-
};
|
|
996
|
+
return { times: features.times, values: out };
|
|
997
|
+
}
|
|
998
|
+
function deltaDelta(features, options = {}) {
|
|
999
|
+
return delta(delta(features, options), options);
|
|
592
1000
|
}
|
|
593
1001
|
|
|
594
1002
|
// src/dsp/peakPick.ts
|
|
595
|
-
function
|
|
1003
|
+
function meanStd2(values) {
|
|
596
1004
|
const n = values.length;
|
|
597
1005
|
if (n <= 0) return { mean: 0, std: 0 };
|
|
598
1006
|
let mean = 0;
|
|
@@ -629,7 +1037,7 @@ function peakPick(times, values, options = {}) {
|
|
|
629
1037
|
if (method === "median") {
|
|
630
1038
|
thr = median(values) * factor;
|
|
631
1039
|
} else {
|
|
632
|
-
const { mean, std } =
|
|
1040
|
+
const { mean, std } = meanStd2(values);
|
|
633
1041
|
thr = mean + factor * std;
|
|
634
1042
|
}
|
|
635
1043
|
}
|
|
@@ -1005,68 +1413,6 @@ async function hpssGpu(spec, gpu, options = {}) {
|
|
|
1005
1413
|
};
|
|
1006
1414
|
}
|
|
1007
1415
|
|
|
1008
|
-
// src/dsp/spectral.ts
|
|
1009
|
-
function spectralCentroid(spec) {
|
|
1010
|
-
const nFrames = spec.times.length;
|
|
1011
|
-
const out = new Float32Array(nFrames);
|
|
1012
|
-
const nBins = (spec.fftSize >>> 1) + 1;
|
|
1013
|
-
const binHz = spec.sampleRate / spec.fftSize;
|
|
1014
|
-
for (let t = 0; t < nFrames; t++) {
|
|
1015
|
-
const mags = spec.magnitudes[t];
|
|
1016
|
-
if (!mags) {
|
|
1017
|
-
out[t] = 0;
|
|
1018
|
-
continue;
|
|
1019
|
-
}
|
|
1020
|
-
let num = 0;
|
|
1021
|
-
let den = 0;
|
|
1022
|
-
for (let k = 0; k < nBins; k++) {
|
|
1023
|
-
const m = mags[k] ?? 0;
|
|
1024
|
-
const f = k * binHz;
|
|
1025
|
-
num += f * m;
|
|
1026
|
-
den += m;
|
|
1027
|
-
}
|
|
1028
|
-
out[t] = den > 0 ? num / den : 0;
|
|
1029
|
-
}
|
|
1030
|
-
return out;
|
|
1031
|
-
}
|
|
1032
|
-
function spectralFlux(spec) {
|
|
1033
|
-
const nFrames = spec.times.length;
|
|
1034
|
-
const out = new Float32Array(nFrames);
|
|
1035
|
-
const nBins = (spec.fftSize >>> 1) + 1;
|
|
1036
|
-
let prev = null;
|
|
1037
|
-
for (let t = 0; t < nFrames; t++) {
|
|
1038
|
-
const mags = spec.magnitudes[t];
|
|
1039
|
-
if (!mags) {
|
|
1040
|
-
out[t] = 0;
|
|
1041
|
-
prev = null;
|
|
1042
|
-
continue;
|
|
1043
|
-
}
|
|
1044
|
-
let sum = 0;
|
|
1045
|
-
for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
|
|
1046
|
-
if (sum <= 0) {
|
|
1047
|
-
out[t] = 0;
|
|
1048
|
-
prev = null;
|
|
1049
|
-
continue;
|
|
1050
|
-
}
|
|
1051
|
-
const cur = new Float32Array(nBins);
|
|
1052
|
-
const inv = 1 / sum;
|
|
1053
|
-
for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
|
|
1054
|
-
if (!prev) {
|
|
1055
|
-
out[t] = 0;
|
|
1056
|
-
prev = cur;
|
|
1057
|
-
continue;
|
|
1058
|
-
}
|
|
1059
|
-
let flux = 0;
|
|
1060
|
-
for (let k = 0; k < nBins; k++) {
|
|
1061
|
-
const d = (cur[k] ?? 0) - (prev[k] ?? 0);
|
|
1062
|
-
flux += Math.abs(d);
|
|
1063
|
-
}
|
|
1064
|
-
out[t] = flux;
|
|
1065
|
-
prev = cur;
|
|
1066
|
-
}
|
|
1067
|
-
return out;
|
|
1068
|
-
}
|
|
1069
|
-
|
|
1070
1416
|
// src/dsp/fft.ts
|
|
1071
1417
|
function hannWindow(size) {
|
|
1072
1418
|
const w = new Float32Array(size);
|
|
@@ -1400,6 +1746,66 @@ async function runMir(audio, request, options = {}) {
|
|
|
1400
1746
|
}
|
|
1401
1747
|
};
|
|
1402
1748
|
}
|
|
1749
|
+
if (request.fn === "beatCandidates") {
|
|
1750
|
+
const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
1751
|
+
const beatOpts = request.beatCandidates ?? {};
|
|
1752
|
+
const result = detectBeatCandidates(mel2, spec, {
|
|
1753
|
+
minIntervalSec: beatOpts.minIntervalSec,
|
|
1754
|
+
thresholdFactor: beatOpts.thresholdFactor,
|
|
1755
|
+
smoothMs: beatOpts.smoothMs
|
|
1756
|
+
});
|
|
1757
|
+
const end2 = nowMs2();
|
|
1758
|
+
return {
|
|
1759
|
+
kind: "beatCandidates",
|
|
1760
|
+
times: result.salience.times,
|
|
1761
|
+
candidates: result.candidates,
|
|
1762
|
+
salience: beatOpts.includeSalience ? result.salience : void 0,
|
|
1763
|
+
meta: {
|
|
1764
|
+
backend: "cpu",
|
|
1765
|
+
usedGpu: false,
|
|
1766
|
+
timings: {
|
|
1767
|
+
totalMs: end2 - t0,
|
|
1768
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
};
|
|
1772
|
+
}
|
|
1773
|
+
if (request.fn === "tempoHypotheses") {
|
|
1774
|
+
const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
1775
|
+
const beatOpts = request.beatCandidates ?? {};
|
|
1776
|
+
const beatResult = detectBeatCandidates(mel2, spec, {
|
|
1777
|
+
minIntervalSec: beatOpts.minIntervalSec,
|
|
1778
|
+
thresholdFactor: beatOpts.thresholdFactor,
|
|
1779
|
+
smoothMs: beatOpts.smoothMs
|
|
1780
|
+
});
|
|
1781
|
+
const tempoStart = nowMs2();
|
|
1782
|
+
const tempoOpts = request.tempoHypotheses ?? {};
|
|
1783
|
+
const result = generateTempoHypotheses(beatResult.candidates, {
|
|
1784
|
+
minBpm: tempoOpts.minBpm,
|
|
1785
|
+
maxBpm: tempoOpts.maxBpm,
|
|
1786
|
+
binSizeBpm: tempoOpts.binSizeBpm,
|
|
1787
|
+
maxHypotheses: tempoOpts.maxHypotheses,
|
|
1788
|
+
minConfidence: tempoOpts.minConfidence,
|
|
1789
|
+
weightByStrength: tempoOpts.weightByStrength,
|
|
1790
|
+
includeHistogram: tempoOpts.includeHistogram
|
|
1791
|
+
});
|
|
1792
|
+
const end2 = nowMs2();
|
|
1793
|
+
return {
|
|
1794
|
+
kind: "tempoHypotheses",
|
|
1795
|
+
times: spec.times,
|
|
1796
|
+
hypotheses: result.hypotheses,
|
|
1797
|
+
inputCandidateCount: result.inputCandidateCount,
|
|
1798
|
+
histogram: result.histogram,
|
|
1799
|
+
meta: {
|
|
1800
|
+
backend: "cpu",
|
|
1801
|
+
usedGpu: false,
|
|
1802
|
+
timings: {
|
|
1803
|
+
totalMs: end2 - t0,
|
|
1804
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs + (end2 - tempoStart)
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
};
|
|
1808
|
+
}
|
|
1403
1809
|
if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
|
|
1404
1810
|
const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
|
|
1405
1811
|
const needsHpssSpec = hpssSpecConfig.fftSize !== specConfig.fftSize || hpssSpecConfig.hopSize !== specConfig.hopSize;
|
|
@@ -1520,6 +1926,6 @@ async function runMir(audio, request, options = {}) {
|
|
|
1520
1926
|
};
|
|
1521
1927
|
}
|
|
1522
1928
|
|
|
1523
|
-
export { delta, deltaDelta, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
|
|
1524
|
-
//# sourceMappingURL=chunk-
|
|
1525
|
-
//# sourceMappingURL=chunk-
|
|
1929
|
+
export { beatSalienceFromMel, delta, deltaDelta, detectBeatCandidates, generateTempoHypotheses, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
|
|
1930
|
+
//# sourceMappingURL=chunk-KIGWMJLC.js.map
|
|
1931
|
+
//# sourceMappingURL=chunk-KIGWMJLC.js.map
|