@octoseq/mir 0.1.0-main.e2ea119 → 0.1.0-main.ef9b77a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DUWYCAVG.js → chunk-OLIDGECY.js} +1199 -303
- package/dist/chunk-OLIDGECY.js.map +1 -0
- package/dist/index.d.ts +853 -4
- package/dist/index.js +1325 -3
- package/dist/index.js.map +1 -1
- package/dist/{runMir-CSIBwNZ3.d.ts → runMir-8PX3FuZC.d.ts} +2 -2
- package/dist/runner/runMir.d.ts +2 -2
- package/dist/runner/runMir.js +1 -1
- package/dist/runner/workerProtocol.d.ts +8 -1
- package/dist/runner/workerProtocol.js.map +1 -1
- package/dist/types-uMUczpax.d.ts +622 -0
- package/package.json +1 -1
- package/src/dsp/bandMask.ts +225 -0
- package/src/dsp/bandMir.ts +455 -0
- package/src/dsp/bandProposal.ts +551 -0
- package/src/dsp/beatCandidates.ts +299 -0
- package/src/dsp/cqt.ts +386 -0
- package/src/dsp/cqtSignals.ts +462 -0
- package/src/dsp/frequencyBand.ts +913 -0
- package/src/dsp/mel.ts +56 -3
- package/src/dsp/musicalTime.ts +240 -0
- package/src/dsp/phaseAlignment.ts +153 -0
- package/src/dsp/tempoHypotheses.ts +395 -0
- package/src/index.ts +171 -3
- package/src/runner/runMir.ts +118 -1
- package/src/runner/workerProtocol.ts +9 -1
- package/src/types.ts +605 -3
- package/dist/chunk-DUWYCAVG.js.map +0 -1
- package/dist/types-BE3py4fZ.d.ts +0 -83
|
@@ -53,6 +53,646 @@ async function submitAndReadback(gpu, encoder, outBuffer, readback, byteLength)
|
|
|
53
53
|
};
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
+
// src/gpu/kernels/onsetEnvelope.wgsl.ts
|
|
57
|
+
var onsetEnvelopeWGSL = (
|
|
58
|
+
/* wgsl */
|
|
59
|
+
`
|
|
60
|
+
// Compute onset strength envelope from a (log) mel spectrogram.
|
|
61
|
+
//
|
|
62
|
+
// Input layout: melFlat[t*nMels + m]
|
|
63
|
+
// Output layout: out[t]
|
|
64
|
+
//
|
|
65
|
+
// We compute novelty per frame:
|
|
66
|
+
// novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
|
|
67
|
+
// or sum_m abs(...)
|
|
68
|
+
//
|
|
69
|
+
// One invocation computes one frame index (t). This is memory-bound but reduces a full
|
|
70
|
+
// (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
|
|
71
|
+
|
|
72
|
+
struct Params {
|
|
73
|
+
nMels: u32,
|
|
74
|
+
nFrames: u32,
|
|
75
|
+
diffMethod: u32, // 0=rectified, 1=abs
|
|
76
|
+
_pad: u32,
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
@group(0) @binding(0) var<storage, read> melFlat: array<f32>;
|
|
80
|
+
@group(0) @binding(1) var<storage, read_write> out: array<f32>;
|
|
81
|
+
@group(0) @binding(2) var<uniform> params: Params;
|
|
82
|
+
|
|
83
|
+
@compute @workgroup_size(256)
|
|
84
|
+
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
85
|
+
let t = gid.x;
|
|
86
|
+
if (t >= params.nFrames) { return; }
|
|
87
|
+
|
|
88
|
+
if (t == 0u) {
|
|
89
|
+
out[t] = 0.0;
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let nMels = params.nMels;
|
|
94
|
+
var sum: f32 = 0.0;
|
|
95
|
+
|
|
96
|
+
// Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
|
|
97
|
+
// (Future optimisation: parallelise reduction within workgroup.)
|
|
98
|
+
for (var m: u32 = 0u; m < nMels; m = m + 1u) {
|
|
99
|
+
let a = melFlat[t * nMels + m];
|
|
100
|
+
let b = melFlat[(t - 1u) * nMels + m];
|
|
101
|
+
let d = a - b;
|
|
102
|
+
|
|
103
|
+
if (params.diffMethod == 1u) {
|
|
104
|
+
// abs
|
|
105
|
+
sum = sum + abs(d);
|
|
106
|
+
} else {
|
|
107
|
+
// rectified
|
|
108
|
+
sum = sum + max(0.0, d);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
out[t] = sum / max(1.0, f32(nMels));
|
|
113
|
+
}
|
|
114
|
+
`
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
// src/gpu/onsetEnvelope.ts
|
|
118
|
+
async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
|
|
119
|
+
const { device } = gpu;
|
|
120
|
+
const { nFrames, nMels, melFlat, diffMethod } = input;
|
|
121
|
+
if (melFlat.length !== nFrames * nMels) {
|
|
122
|
+
throw new Error("@octoseq/mir: melFlat length mismatch");
|
|
123
|
+
}
|
|
124
|
+
const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
|
|
125
|
+
const outByteLen = byteSizeF32(nFrames);
|
|
126
|
+
const outBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
127
|
+
const readback = createReadbackBuffer(gpu, outByteLen);
|
|
128
|
+
const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
|
|
129
|
+
const pipeline = device.createComputePipeline({
|
|
130
|
+
layout: "auto",
|
|
131
|
+
compute: { module: shader, entryPoint: "main" }
|
|
132
|
+
});
|
|
133
|
+
const diffU32 = diffMethod === "abs" ? 1 : 0;
|
|
134
|
+
const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
|
|
135
|
+
const bindGroup = device.createBindGroup({
|
|
136
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
137
|
+
entries: [
|
|
138
|
+
{ binding: 0, resource: { buffer: melBuffer } },
|
|
139
|
+
{ binding: 1, resource: { buffer: outBuffer } },
|
|
140
|
+
{ binding: 2, resource: { buffer: params } }
|
|
141
|
+
]
|
|
142
|
+
});
|
|
143
|
+
const encoder = device.createCommandEncoder();
|
|
144
|
+
const pass = encoder.beginComputePass();
|
|
145
|
+
pass.setPipeline(pipeline);
|
|
146
|
+
pass.setBindGroup(0, bindGroup);
|
|
147
|
+
const wg = Math.ceil(nFrames / 256);
|
|
148
|
+
pass.dispatchWorkgroups(wg);
|
|
149
|
+
pass.end();
|
|
150
|
+
const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
|
|
151
|
+
melBuffer.destroy();
|
|
152
|
+
outBuffer.destroy();
|
|
153
|
+
params.destroy();
|
|
154
|
+
readback.destroy();
|
|
155
|
+
return {
|
|
156
|
+
value: { out: new Float32Array(bytes) },
|
|
157
|
+
timing
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// src/dsp/onset.ts
|
|
162
|
+
function movingAverage(values, windowFrames) {
|
|
163
|
+
if (windowFrames <= 1) return values;
|
|
164
|
+
const n = values.length;
|
|
165
|
+
const out = new Float32Array(n);
|
|
166
|
+
const half = Math.floor(windowFrames / 2);
|
|
167
|
+
const prefix = new Float64Array(n + 1);
|
|
168
|
+
prefix[0] = 0;
|
|
169
|
+
for (let i = 0; i < n; i++) {
|
|
170
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
171
|
+
}
|
|
172
|
+
for (let i = 0; i < n; i++) {
|
|
173
|
+
const start = Math.max(0, i - half);
|
|
174
|
+
const end = Math.min(n, i + half + 1);
|
|
175
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
176
|
+
const count = Math.max(1, end - start);
|
|
177
|
+
out[i] = sum / count;
|
|
178
|
+
}
|
|
179
|
+
return out;
|
|
180
|
+
}
|
|
181
|
+
function defaultOptions(opts) {
|
|
182
|
+
return {
|
|
183
|
+
useLog: opts?.useLog ?? false,
|
|
184
|
+
smoothMs: opts?.smoothMs ?? 30,
|
|
185
|
+
diffMethod: opts?.diffMethod ?? "rectified"
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
function logCompress(x) {
|
|
189
|
+
return Math.log1p(Math.max(0, x));
|
|
190
|
+
}
|
|
191
|
+
function onsetEnvelopeFromSpectrogram(spec, options) {
|
|
192
|
+
const opts = defaultOptions(options);
|
|
193
|
+
const nFrames = spec.times.length;
|
|
194
|
+
const out = new Float32Array(nFrames);
|
|
195
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
196
|
+
out[0] = 0;
|
|
197
|
+
for (let t = 1; t < nFrames; t++) {
|
|
198
|
+
const cur = spec.magnitudes[t];
|
|
199
|
+
const prev = spec.magnitudes[t - 1];
|
|
200
|
+
if (!cur || !prev) {
|
|
201
|
+
out[t] = 0;
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
let sum = 0;
|
|
205
|
+
for (let k = 0; k < nBins; k++) {
|
|
206
|
+
let a = cur[k] ?? 0;
|
|
207
|
+
let b = prev[k] ?? 0;
|
|
208
|
+
if (opts.useLog) {
|
|
209
|
+
a = logCompress(a);
|
|
210
|
+
b = logCompress(b);
|
|
211
|
+
}
|
|
212
|
+
const d = a - b;
|
|
213
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
214
|
+
}
|
|
215
|
+
out[t] = nBins > 0 ? sum / nBins : 0;
|
|
216
|
+
}
|
|
217
|
+
const smoothMs = opts.smoothMs;
|
|
218
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
219
|
+
const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
|
|
220
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
221
|
+
return {
|
|
222
|
+
times: spec.times,
|
|
223
|
+
values: movingAverage(out, windowFrames | 1)
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return { times: spec.times, values: out };
|
|
227
|
+
}
|
|
228
|
+
function onsetEnvelopeFromMel(mel, options) {
|
|
229
|
+
const opts = defaultOptions(options);
|
|
230
|
+
const nFrames = mel.times.length;
|
|
231
|
+
const out = new Float32Array(nFrames);
|
|
232
|
+
out[0] = 0;
|
|
233
|
+
for (let t = 1; t < nFrames; t++) {
|
|
234
|
+
const cur = mel.melBands[t];
|
|
235
|
+
const prev = mel.melBands[t - 1];
|
|
236
|
+
if (!cur || !prev) {
|
|
237
|
+
out[t] = 0;
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
const nBands = cur.length;
|
|
241
|
+
let sum = 0;
|
|
242
|
+
for (let m = 0; m < nBands; m++) {
|
|
243
|
+
let a = cur[m] ?? 0;
|
|
244
|
+
let b = prev[m] ?? 0;
|
|
245
|
+
if (opts.useLog) {
|
|
246
|
+
a = logCompress(a);
|
|
247
|
+
b = logCompress(b);
|
|
248
|
+
}
|
|
249
|
+
const d = a - b;
|
|
250
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
251
|
+
}
|
|
252
|
+
out[t] = nBands > 0 ? sum / nBands : 0;
|
|
253
|
+
}
|
|
254
|
+
const smoothMs = opts.smoothMs;
|
|
255
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
256
|
+
const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
|
|
257
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
258
|
+
return {
|
|
259
|
+
times: mel.times,
|
|
260
|
+
values: movingAverage(out, windowFrames | 1)
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
return { times: mel.times, values: out };
|
|
264
|
+
}
|
|
265
|
+
async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
|
|
266
|
+
const nFrames = mel.times.length;
|
|
267
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
268
|
+
const melFlat = new Float32Array(nFrames * nMels);
|
|
269
|
+
for (let t = 0; t < nFrames; t++) {
|
|
270
|
+
const row = mel.melBands[t];
|
|
271
|
+
if (!row) continue;
|
|
272
|
+
melFlat.set(row, t * nMels);
|
|
273
|
+
}
|
|
274
|
+
const diffMethod = options?.diffMethod ?? "rectified";
|
|
275
|
+
const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
|
|
276
|
+
nFrames,
|
|
277
|
+
nMels,
|
|
278
|
+
melFlat,
|
|
279
|
+
diffMethod
|
|
280
|
+
});
|
|
281
|
+
return {
|
|
282
|
+
times: mel.times,
|
|
283
|
+
values: value.out,
|
|
284
|
+
gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// src/dsp/spectral.ts
|
|
289
|
+
function spectralCentroid(spec) {
|
|
290
|
+
const nFrames = spec.times.length;
|
|
291
|
+
const out = new Float32Array(nFrames);
|
|
292
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
293
|
+
const binHz = spec.sampleRate / spec.fftSize;
|
|
294
|
+
for (let t = 0; t < nFrames; t++) {
|
|
295
|
+
const mags = spec.magnitudes[t];
|
|
296
|
+
if (!mags) {
|
|
297
|
+
out[t] = 0;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
let num = 0;
|
|
301
|
+
let den = 0;
|
|
302
|
+
for (let k = 0; k < nBins; k++) {
|
|
303
|
+
const m = mags[k] ?? 0;
|
|
304
|
+
const f = k * binHz;
|
|
305
|
+
num += f * m;
|
|
306
|
+
den += m;
|
|
307
|
+
}
|
|
308
|
+
out[t] = den > 0 ? num / den : 0;
|
|
309
|
+
}
|
|
310
|
+
return out;
|
|
311
|
+
}
|
|
312
|
+
function spectralFlux(spec) {
|
|
313
|
+
const nFrames = spec.times.length;
|
|
314
|
+
const out = new Float32Array(nFrames);
|
|
315
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
316
|
+
let prev = null;
|
|
317
|
+
for (let t = 0; t < nFrames; t++) {
|
|
318
|
+
const mags = spec.magnitudes[t];
|
|
319
|
+
if (!mags) {
|
|
320
|
+
out[t] = 0;
|
|
321
|
+
prev = null;
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
let sum = 0;
|
|
325
|
+
for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
|
|
326
|
+
if (sum <= 0) {
|
|
327
|
+
out[t] = 0;
|
|
328
|
+
prev = null;
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
const cur = new Float32Array(nBins);
|
|
332
|
+
const inv = 1 / sum;
|
|
333
|
+
for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
|
|
334
|
+
if (!prev) {
|
|
335
|
+
out[t] = 0;
|
|
336
|
+
prev = cur;
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
let flux = 0;
|
|
340
|
+
for (let k = 0; k < nBins; k++) {
|
|
341
|
+
const d = (cur[k] ?? 0) - (prev[k] ?? 0);
|
|
342
|
+
flux += Math.abs(d);
|
|
343
|
+
}
|
|
344
|
+
out[t] = flux;
|
|
345
|
+
prev = cur;
|
|
346
|
+
}
|
|
347
|
+
return out;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// src/dsp/beatCandidates.ts
|
|
351
|
+
function movingAverage2(values, windowFrames) {
|
|
352
|
+
if (windowFrames <= 1) return values;
|
|
353
|
+
const n = values.length;
|
|
354
|
+
const out = new Float32Array(n);
|
|
355
|
+
const half = Math.floor(windowFrames / 2);
|
|
356
|
+
const prefix = new Float64Array(n + 1);
|
|
357
|
+
prefix[0] = 0;
|
|
358
|
+
for (let i = 0; i < n; i++) {
|
|
359
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
360
|
+
}
|
|
361
|
+
for (let i = 0; i < n; i++) {
|
|
362
|
+
const start = Math.max(0, i - half);
|
|
363
|
+
const end = Math.min(n, i + half + 1);
|
|
364
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
365
|
+
const count = Math.max(1, end - start);
|
|
366
|
+
out[i] = sum / count;
|
|
367
|
+
}
|
|
368
|
+
return out;
|
|
369
|
+
}
|
|
370
|
+
function meanStd(values) {
|
|
371
|
+
const n = values.length;
|
|
372
|
+
if (n <= 0) return { mean: 0, std: 0 };
|
|
373
|
+
let mean = 0;
|
|
374
|
+
for (let i = 0; i < n; i++) mean += values[i] ?? 0;
|
|
375
|
+
mean /= n;
|
|
376
|
+
let varSum = 0;
|
|
377
|
+
for (let i = 0; i < n; i++) {
|
|
378
|
+
const d = (values[i] ?? 0) - mean;
|
|
379
|
+
varSum += d * d;
|
|
380
|
+
}
|
|
381
|
+
const std = Math.sqrt(varSum / n);
|
|
382
|
+
return { mean, std };
|
|
383
|
+
}
|
|
384
|
+
function zScoreNormalize(values) {
|
|
385
|
+
const { mean, std } = meanStd(values);
|
|
386
|
+
const n = values.length;
|
|
387
|
+
const out = new Float32Array(n);
|
|
388
|
+
if (std === 0 || !Number.isFinite(std)) {
|
|
389
|
+
out.fill(0);
|
|
390
|
+
return out;
|
|
391
|
+
}
|
|
392
|
+
for (let i = 0; i < n; i++) {
|
|
393
|
+
out[i] = ((values[i] ?? 0) - mean) / std;
|
|
394
|
+
}
|
|
395
|
+
return out;
|
|
396
|
+
}
|
|
397
|
+
function minMaxNormalize(values) {
|
|
398
|
+
const n = values.length;
|
|
399
|
+
if (n === 0) return new Float32Array(0);
|
|
400
|
+
let min = Infinity;
|
|
401
|
+
let max = -Infinity;
|
|
402
|
+
for (let i = 0; i < n; i++) {
|
|
403
|
+
const v = values[i] ?? 0;
|
|
404
|
+
if (v < min) min = v;
|
|
405
|
+
if (v > max) max = v;
|
|
406
|
+
}
|
|
407
|
+
const out = new Float32Array(n);
|
|
408
|
+
const range = max - min;
|
|
409
|
+
if (range === 0 || !Number.isFinite(range)) {
|
|
410
|
+
out.fill(0.5);
|
|
411
|
+
return out;
|
|
412
|
+
}
|
|
413
|
+
for (let i = 0; i < n; i++) {
|
|
414
|
+
out[i] = ((values[i] ?? 0) - min) / range;
|
|
415
|
+
}
|
|
416
|
+
return out;
|
|
417
|
+
}
|
|
418
|
+
function beatSalienceFromMel(mel, spec, options) {
|
|
419
|
+
const smoothMs = options?.smoothMs ?? 50;
|
|
420
|
+
const onset = onsetEnvelopeFromMel(mel, {
|
|
421
|
+
smoothMs,
|
|
422
|
+
diffMethod: "rectified",
|
|
423
|
+
useLog: false
|
|
424
|
+
});
|
|
425
|
+
const flux = spectralFlux(spec);
|
|
426
|
+
const n = Math.min(onset.times.length, flux.length);
|
|
427
|
+
const onsetNorm = zScoreNormalize(onset.values.subarray(0, n));
|
|
428
|
+
const fluxNorm = zScoreNormalize(flux.subarray(0, n));
|
|
429
|
+
const combined = new Float32Array(n);
|
|
430
|
+
const onsetWeight = 0.7;
|
|
431
|
+
const fluxWeight = 0.3;
|
|
432
|
+
for (let i = 0; i < n; i++) {
|
|
433
|
+
combined[i] = onsetWeight * (onsetNorm[i] ?? 0) + fluxWeight * (fluxNorm[i] ?? 0);
|
|
434
|
+
}
|
|
435
|
+
const dt = n >= 2 ? (onset.times[1] ?? 0) - (onset.times[0] ?? 0) : 0.01;
|
|
436
|
+
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
437
|
+
const smoothed = movingAverage2(combined, windowFrames | 1);
|
|
438
|
+
const normalized = minMaxNormalize(smoothed);
|
|
439
|
+
return {
|
|
440
|
+
times: onset.times.subarray(0, n),
|
|
441
|
+
values: normalized
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
function pickBeatCandidates(salience, options, source) {
|
|
445
|
+
const minIntervalSec = options.minIntervalSec ?? 0.1;
|
|
446
|
+
const thresholdFactor = options.thresholdFactor ?? 0.5;
|
|
447
|
+
const { times, values } = salience;
|
|
448
|
+
const n = values.length;
|
|
449
|
+
if (n < 3) return [];
|
|
450
|
+
const { mean, std } = meanStd(values);
|
|
451
|
+
const threshold = mean + thresholdFactor * std;
|
|
452
|
+
const candidates = [];
|
|
453
|
+
let lastPeakTime = -Infinity;
|
|
454
|
+
for (let i = 1; i < n - 1; i++) {
|
|
455
|
+
const v = values[i] ?? 0;
|
|
456
|
+
if (v < threshold) continue;
|
|
457
|
+
const prev = values[i - 1] ?? 0;
|
|
458
|
+
const next = values[i + 1] ?? 0;
|
|
459
|
+
if (!(v > prev && v > next)) continue;
|
|
460
|
+
const t = times[i] ?? 0;
|
|
461
|
+
if (t - lastPeakTime < minIntervalSec) {
|
|
462
|
+
const last = candidates[candidates.length - 1];
|
|
463
|
+
if (last && v > last.strength) {
|
|
464
|
+
last.time = t;
|
|
465
|
+
last.strength = v;
|
|
466
|
+
}
|
|
467
|
+
continue;
|
|
468
|
+
}
|
|
469
|
+
candidates.push({
|
|
470
|
+
time: t,
|
|
471
|
+
strength: v,
|
|
472
|
+
source
|
|
473
|
+
});
|
|
474
|
+
lastPeakTime = t;
|
|
475
|
+
}
|
|
476
|
+
return candidates;
|
|
477
|
+
}
|
|
478
|
+
function detectBeatCandidates(mel, spec, options) {
|
|
479
|
+
const opts = {
|
|
480
|
+
minIntervalSec: options?.minIntervalSec ?? 0.1,
|
|
481
|
+
thresholdFactor: options?.thresholdFactor ?? 0.5,
|
|
482
|
+
smoothMs: options?.smoothMs ?? 50
|
|
483
|
+
};
|
|
484
|
+
const salience = beatSalienceFromMel(mel, spec, { smoothMs: opts.smoothMs });
|
|
485
|
+
const candidates = pickBeatCandidates(salience, opts, "combined");
|
|
486
|
+
return {
|
|
487
|
+
candidates,
|
|
488
|
+
salience
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// src/dsp/tempoHypotheses.ts
|
|
493
|
+
function intervalToBpm(intervalSec) {
|
|
494
|
+
return 60 / intervalSec;
|
|
495
|
+
}
|
|
496
|
+
function bpmToInterval(bpm) {
|
|
497
|
+
return 60 / bpm;
|
|
498
|
+
}
|
|
499
|
+
function computeIOIs(candidates, weightByStrength) {
|
|
500
|
+
if (candidates.length < 2) return [];
|
|
501
|
+
const iois = [];
|
|
502
|
+
const sorted = [...candidates].sort((a, b) => a.time - b.time);
|
|
503
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
504
|
+
const prev = sorted[i - 1];
|
|
505
|
+
const curr = sorted[i];
|
|
506
|
+
const interval = curr.time - prev.time;
|
|
507
|
+
if (interval <= 0) continue;
|
|
508
|
+
const weight = weightByStrength ? Math.sqrt(prev.strength * curr.strength) : 1;
|
|
509
|
+
iois.push({ intervalSec: interval, weight });
|
|
510
|
+
}
|
|
511
|
+
return iois;
|
|
512
|
+
}
|
|
513
|
+
function buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm) {
|
|
514
|
+
const numBins = Math.ceil((maxBpm - minBpm) / binSizeBpm);
|
|
515
|
+
const counts = new Float32Array(numBins);
|
|
516
|
+
const bpmBins = new Float32Array(numBins);
|
|
517
|
+
for (let i = 0; i < numBins; i++) {
|
|
518
|
+
bpmBins[i] = minBpm + (i + 0.5) * binSizeBpm;
|
|
519
|
+
}
|
|
520
|
+
const minInterval = bpmToInterval(maxBpm);
|
|
521
|
+
const maxInterval = bpmToInterval(minBpm);
|
|
522
|
+
for (const { intervalSec, weight } of iois) {
|
|
523
|
+
if (intervalSec < minInterval || intervalSec > maxInterval) continue;
|
|
524
|
+
const bpm = intervalToBpm(intervalSec);
|
|
525
|
+
const binIndex = Math.floor((bpm - minBpm) / binSizeBpm);
|
|
526
|
+
if (binIndex >= 0 && binIndex < numBins) {
|
|
527
|
+
counts[binIndex] = (counts[binIndex] ?? 0) + weight;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
return { bpmBins, counts };
|
|
531
|
+
}
|
|
532
|
+
function findHistogramPeaks(counts, minHeight) {
|
|
533
|
+
const peaks = [];
|
|
534
|
+
for (let i = 1; i < counts.length - 1; i++) {
|
|
535
|
+
const curr = counts[i];
|
|
536
|
+
const prev = counts[i - 1];
|
|
537
|
+
const next = counts[i + 1];
|
|
538
|
+
if (curr > prev && curr > next && curr >= minHeight) {
|
|
539
|
+
peaks.push({ index: i, height: curr });
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
if (counts.length > 0 && counts[0] >= minHeight && counts[0] > (counts[1] ?? 0)) {
|
|
543
|
+
peaks.push({ index: 0, height: counts[0] });
|
|
544
|
+
}
|
|
545
|
+
if (counts.length > 1) {
|
|
546
|
+
const last = counts.length - 1;
|
|
547
|
+
if (counts[last] >= minHeight && counts[last] > (counts[last - 1] ?? 0)) {
|
|
548
|
+
peaks.push({ index: last, height: counts[last] });
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
peaks.sort((a, b) => b.height - a.height);
|
|
552
|
+
return peaks.map((p) => p.index);
|
|
553
|
+
}
|
|
554
|
+
function refinePeakBpm(peakIndex, bpmBins, counts, binSizeBpm) {
|
|
555
|
+
let totalWeight = 0;
|
|
556
|
+
let weightedBpm = 0;
|
|
557
|
+
let minBinBpm = bpmBins[peakIndex] - binSizeBpm / 2;
|
|
558
|
+
let maxBinBpm = bpmBins[peakIndex] + binSizeBpm / 2;
|
|
559
|
+
for (let offset = -1; offset <= 1; offset++) {
|
|
560
|
+
const idx = peakIndex + offset;
|
|
561
|
+
if (idx < 0 || idx >= bpmBins.length) continue;
|
|
562
|
+
const w = counts[idx];
|
|
563
|
+
const bpm = bpmBins[idx];
|
|
564
|
+
totalWeight += w;
|
|
565
|
+
weightedBpm += w * bpm;
|
|
566
|
+
if (w > 0) {
|
|
567
|
+
minBinBpm = Math.min(minBinBpm, bpm - binSizeBpm / 2);
|
|
568
|
+
maxBinBpm = Math.max(maxBinBpm, bpm + binSizeBpm / 2);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
const refinedBpm = totalWeight > 0 ? weightedBpm / totalWeight : bpmBins[peakIndex];
|
|
572
|
+
return {
|
|
573
|
+
bpm: refinedBpm,
|
|
574
|
+
peakHeight: counts[peakIndex],
|
|
575
|
+
binRange: [minBinBpm, maxBinBpm],
|
|
576
|
+
totalWeight
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
function getHarmonicRatio(bpm1, bpm2, tolerance = 0.03) {
|
|
580
|
+
const ratios = [0.5, 1 / 3, 2 / 3, 1, 1.5, 2, 3];
|
|
581
|
+
for (const ratio of ratios) {
|
|
582
|
+
const expected = bpm1 * ratio;
|
|
583
|
+
const relativeError = Math.abs(bpm2 - expected) / expected;
|
|
584
|
+
if (relativeError <= tolerance) {
|
|
585
|
+
return ratio;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
return null;
|
|
589
|
+
}
|
|
590
|
+
function assignHarmonicFamilies(hypotheses) {
|
|
591
|
+
if (hypotheses.length === 0) return;
|
|
592
|
+
const families = /* @__PURE__ */ new Map();
|
|
593
|
+
for (const hyp of hypotheses) {
|
|
594
|
+
let foundFamily = false;
|
|
595
|
+
for (const [familyId, family] of families) {
|
|
596
|
+
const ratio = getHarmonicRatio(family.rootBpm, hyp.bpm);
|
|
597
|
+
if (ratio !== null) {
|
|
598
|
+
hyp.familyId = familyId;
|
|
599
|
+
hyp.harmonicRatio = ratio;
|
|
600
|
+
family.members.push(hyp);
|
|
601
|
+
foundFamily = true;
|
|
602
|
+
break;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
if (!foundFamily) {
|
|
606
|
+
const familyId = `fam-${Math.round(hyp.bpm)}`;
|
|
607
|
+
hyp.familyId = familyId;
|
|
608
|
+
hyp.harmonicRatio = 1;
|
|
609
|
+
families.set(familyId, { rootBpm: hyp.bpm, members: [hyp] });
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
function normalizeConfidence(hypotheses) {
|
|
614
|
+
if (hypotheses.length === 0) return;
|
|
615
|
+
const maxHeight = Math.max(...hypotheses.map((h) => h.evidence.peakHeight));
|
|
616
|
+
if (maxHeight <= 0) return;
|
|
617
|
+
for (const hyp of hypotheses) {
|
|
618
|
+
hyp.confidence = hyp.evidence.peakHeight / maxHeight;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
function generateTempoHypotheses(candidates, options) {
|
|
622
|
+
const minBpm = options?.minBpm ?? 24;
|
|
623
|
+
const maxBpm = options?.maxBpm ?? 300;
|
|
624
|
+
const binSizeBpm = options?.binSizeBpm ?? 1;
|
|
625
|
+
const maxHypotheses = options?.maxHypotheses ?? 10;
|
|
626
|
+
const minConfidence = options?.minConfidence ?? 0.05;
|
|
627
|
+
const weightByStrength = options?.weightByStrength ?? true;
|
|
628
|
+
const includeHistogram = options?.includeHistogram ?? false;
|
|
629
|
+
if (candidates.length < 2) {
|
|
630
|
+
return {
|
|
631
|
+
hypotheses: [],
|
|
632
|
+
inputCandidateCount: candidates.length,
|
|
633
|
+
histogram: includeHistogram ? {
|
|
634
|
+
bpmBins: new Float32Array(0),
|
|
635
|
+
counts: new Float32Array(0)
|
|
636
|
+
} : void 0
|
|
637
|
+
};
|
|
638
|
+
}
|
|
639
|
+
const iois = computeIOIs(candidates, weightByStrength);
|
|
640
|
+
if (iois.length === 0) {
|
|
641
|
+
return {
|
|
642
|
+
hypotheses: [],
|
|
643
|
+
inputCandidateCount: candidates.length,
|
|
644
|
+
histogram: includeHistogram ? {
|
|
645
|
+
bpmBins: new Float32Array(0),
|
|
646
|
+
counts: new Float32Array(0)
|
|
647
|
+
} : void 0
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
const { bpmBins, counts } = buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm);
|
|
651
|
+
const maxCount = Math.max(...counts);
|
|
652
|
+
const minHeight = maxCount * minConfidence;
|
|
653
|
+
const peakIndices = findHistogramPeaks(counts, minHeight);
|
|
654
|
+
const hypotheses = [];
|
|
655
|
+
for (const peakIndex of peakIndices.slice(0, maxHypotheses * 2)) {
|
|
656
|
+
const { bpm, peakHeight, binRange, totalWeight } = refinePeakBpm(
|
|
657
|
+
peakIndex,
|
|
658
|
+
bpmBins,
|
|
659
|
+
counts,
|
|
660
|
+
binSizeBpm
|
|
661
|
+
);
|
|
662
|
+
if (maxCount > 0 && peakHeight / maxCount < minConfidence) continue;
|
|
663
|
+
const evidence = {
|
|
664
|
+
supportingIntervalCount: Math.round(totalWeight),
|
|
665
|
+
weightedSupport: totalWeight,
|
|
666
|
+
peakHeight,
|
|
667
|
+
binRange
|
|
668
|
+
};
|
|
669
|
+
hypotheses.push({
|
|
670
|
+
id: "",
|
|
671
|
+
// Will be assigned after sorting
|
|
672
|
+
bpm: Math.round(bpm * 10) / 10,
|
|
673
|
+
// Round to 0.1 BPM precision
|
|
674
|
+
confidence: 0,
|
|
675
|
+
// Will be normalized
|
|
676
|
+
evidence,
|
|
677
|
+
familyId: "",
|
|
678
|
+
// Will be assigned
|
|
679
|
+
harmonicRatio: 1
|
|
680
|
+
// Will be assigned
|
|
681
|
+
});
|
|
682
|
+
}
|
|
683
|
+
assignHarmonicFamilies(hypotheses);
|
|
684
|
+
normalizeConfidence(hypotheses);
|
|
685
|
+
const filtered = hypotheses.filter((h) => h.confidence >= minConfidence).sort((a, b) => b.confidence - a.confidence).slice(0, maxHypotheses);
|
|
686
|
+
for (let i = 0; i < filtered.length; i++) {
|
|
687
|
+
filtered[i].id = `hyp-${i}`;
|
|
688
|
+
}
|
|
689
|
+
return {
|
|
690
|
+
hypotheses: filtered,
|
|
691
|
+
inputCandidateCount: candidates.length,
|
|
692
|
+
histogram: includeHistogram ? { bpmBins, counts } : void 0
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
|
|
56
696
|
// src/gpu/kernels/melProject.wgsl.ts
|
|
57
697
|
var melProjectWGSL = (
|
|
58
698
|
/* wgsl */
|
|
@@ -162,6 +802,20 @@ function hzToMel(hz) {
|
|
|
162
802
|
function melToHz(mel) {
|
|
163
803
|
return 700 * (Math.pow(10, mel / 2595) - 1);
|
|
164
804
|
}
|
|
805
|
+
function hzToFeatureIndex(hz, config) {
|
|
806
|
+
const melMin = hzToMel(config.fMin);
|
|
807
|
+
const melMax = hzToMel(config.fMax);
|
|
808
|
+
const melHz = hzToMel(hz);
|
|
809
|
+
const normalized = (melHz - melMin) / (melMax - melMin);
|
|
810
|
+
return normalized * (config.nMels - 1);
|
|
811
|
+
}
|
|
812
|
+
function featureIndexToHz(index, config) {
|
|
813
|
+
const melMin = hzToMel(config.fMin);
|
|
814
|
+
const melMax = hzToMel(config.fMax);
|
|
815
|
+
const normalized = index / (config.nMels - 1);
|
|
816
|
+
const mel = melMin + normalized * (melMax - melMin);
|
|
817
|
+
return melToHz(mel);
|
|
818
|
+
}
|
|
165
819
|
function buildMelFilterBank(sampleRate, fftSize, nMels, fMin, fMax) {
|
|
166
820
|
const nBins = (fftSize >>> 1) + 1;
|
|
167
821
|
const nyquist = sampleRate / 2;
|
|
@@ -351,248 +1005,16 @@ function delta(features, options = {}) {
|
|
|
351
1005
|
}
|
|
352
1006
|
d[f] = denom > 0 ? num / denom : 0;
|
|
353
1007
|
}
|
|
354
|
-
out[t] = d;
|
|
355
|
-
}
|
|
356
|
-
return { times: features.times, values: out };
|
|
357
|
-
}
|
|
358
|
-
function deltaDelta(features, options = {}) {
|
|
359
|
-
return delta(delta(features, options), options);
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
// src/gpu/kernels/onsetEnvelope.wgsl.ts
|
|
363
|
-
var onsetEnvelopeWGSL = (
|
|
364
|
-
/* wgsl */
|
|
365
|
-
`
|
|
366
|
-
// Compute onset strength envelope from a (log) mel spectrogram.
|
|
367
|
-
//
|
|
368
|
-
// Input layout: melFlat[t*nMels + m]
|
|
369
|
-
// Output layout: out[t]
|
|
370
|
-
//
|
|
371
|
-
// We compute novelty per frame:
|
|
372
|
-
// novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
|
|
373
|
-
// or sum_m abs(...)
|
|
374
|
-
//
|
|
375
|
-
// One invocation computes one frame index (t). This is memory-bound but reduces a full
|
|
376
|
-
// (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
|
|
377
|
-
|
|
378
|
-
struct Params {
|
|
379
|
-
nMels: u32,
|
|
380
|
-
nFrames: u32,
|
|
381
|
-
diffMethod: u32, // 0=rectified, 1=abs
|
|
382
|
-
_pad: u32,
|
|
383
|
-
};
|
|
384
|
-
|
|
385
|
-
@group(0) @binding(0) var<storage, read> melFlat: array<f32>;
|
|
386
|
-
@group(0) @binding(1) var<storage, read_write> out: array<f32>;
|
|
387
|
-
@group(0) @binding(2) var<uniform> params: Params;
|
|
388
|
-
|
|
389
|
-
@compute @workgroup_size(256)
|
|
390
|
-
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
391
|
-
let t = gid.x;
|
|
392
|
-
if (t >= params.nFrames) { return; }
|
|
393
|
-
|
|
394
|
-
if (t == 0u) {
|
|
395
|
-
out[t] = 0.0;
|
|
396
|
-
return;
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
let nMels = params.nMels;
|
|
400
|
-
var sum: f32 = 0.0;
|
|
401
|
-
|
|
402
|
-
// Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
|
|
403
|
-
// (Future optimisation: parallelise reduction within workgroup.)
|
|
404
|
-
for (var m: u32 = 0u; m < nMels; m = m + 1u) {
|
|
405
|
-
let a = melFlat[t * nMels + m];
|
|
406
|
-
let b = melFlat[(t - 1u) * nMels + m];
|
|
407
|
-
let d = a - b;
|
|
408
|
-
|
|
409
|
-
if (params.diffMethod == 1u) {
|
|
410
|
-
// abs
|
|
411
|
-
sum = sum + abs(d);
|
|
412
|
-
} else {
|
|
413
|
-
// rectified
|
|
414
|
-
sum = sum + max(0.0, d);
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
out[t] = sum / max(1.0, f32(nMels));
|
|
419
|
-
}
|
|
420
|
-
`
|
|
421
|
-
);
|
|
422
|
-
|
|
423
|
-
// src/gpu/onsetEnvelope.ts
|
|
424
|
-
async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
|
|
425
|
-
const { device } = gpu;
|
|
426
|
-
const { nFrames, nMels, melFlat, diffMethod } = input;
|
|
427
|
-
if (melFlat.length !== nFrames * nMels) {
|
|
428
|
-
throw new Error("@octoseq/mir: melFlat length mismatch");
|
|
429
|
-
}
|
|
430
|
-
const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
|
|
431
|
-
const outByteLen = byteSizeF32(nFrames);
|
|
432
|
-
const outBuffer = createStorageOutBuffer(gpu, outByteLen);
|
|
433
|
-
const readback = createReadbackBuffer(gpu, outByteLen);
|
|
434
|
-
const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
|
|
435
|
-
const pipeline = device.createComputePipeline({
|
|
436
|
-
layout: "auto",
|
|
437
|
-
compute: { module: shader, entryPoint: "main" }
|
|
438
|
-
});
|
|
439
|
-
const diffU32 = diffMethod === "abs" ? 1 : 0;
|
|
440
|
-
const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
|
|
441
|
-
const bindGroup = device.createBindGroup({
|
|
442
|
-
layout: pipeline.getBindGroupLayout(0),
|
|
443
|
-
entries: [
|
|
444
|
-
{ binding: 0, resource: { buffer: melBuffer } },
|
|
445
|
-
{ binding: 1, resource: { buffer: outBuffer } },
|
|
446
|
-
{ binding: 2, resource: { buffer: params } }
|
|
447
|
-
]
|
|
448
|
-
});
|
|
449
|
-
const encoder = device.createCommandEncoder();
|
|
450
|
-
const pass = encoder.beginComputePass();
|
|
451
|
-
pass.setPipeline(pipeline);
|
|
452
|
-
pass.setBindGroup(0, bindGroup);
|
|
453
|
-
const wg = Math.ceil(nFrames / 256);
|
|
454
|
-
pass.dispatchWorkgroups(wg);
|
|
455
|
-
pass.end();
|
|
456
|
-
const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
|
|
457
|
-
melBuffer.destroy();
|
|
458
|
-
outBuffer.destroy();
|
|
459
|
-
params.destroy();
|
|
460
|
-
readback.destroy();
|
|
461
|
-
return {
|
|
462
|
-
value: { out: new Float32Array(bytes) },
|
|
463
|
-
timing
|
|
464
|
-
};
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
// src/dsp/onset.ts
|
|
468
|
-
function movingAverage(values, windowFrames) {
|
|
469
|
-
if (windowFrames <= 1) return values;
|
|
470
|
-
const n = values.length;
|
|
471
|
-
const out = new Float32Array(n);
|
|
472
|
-
const half = Math.floor(windowFrames / 2);
|
|
473
|
-
const prefix = new Float64Array(n + 1);
|
|
474
|
-
prefix[0] = 0;
|
|
475
|
-
for (let i = 0; i < n; i++) {
|
|
476
|
-
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
477
|
-
}
|
|
478
|
-
for (let i = 0; i < n; i++) {
|
|
479
|
-
const start = Math.max(0, i - half);
|
|
480
|
-
const end = Math.min(n, i + half + 1);
|
|
481
|
-
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
482
|
-
const count = Math.max(1, end - start);
|
|
483
|
-
out[i] = sum / count;
|
|
484
|
-
}
|
|
485
|
-
return out;
|
|
486
|
-
}
|
|
487
|
-
function defaultOptions(opts) {
|
|
488
|
-
return {
|
|
489
|
-
useLog: opts?.useLog ?? false,
|
|
490
|
-
smoothMs: opts?.smoothMs ?? 30,
|
|
491
|
-
diffMethod: opts?.diffMethod ?? "rectified"
|
|
492
|
-
};
|
|
493
|
-
}
|
|
494
|
-
function logCompress(x) {
|
|
495
|
-
return Math.log1p(Math.max(0, x));
|
|
496
|
-
}
|
|
497
|
-
function onsetEnvelopeFromSpectrogram(spec, options) {
|
|
498
|
-
const opts = defaultOptions(options);
|
|
499
|
-
const nFrames = spec.times.length;
|
|
500
|
-
const out = new Float32Array(nFrames);
|
|
501
|
-
const nBins = (spec.fftSize >>> 1) + 1;
|
|
502
|
-
out[0] = 0;
|
|
503
|
-
for (let t = 1; t < nFrames; t++) {
|
|
504
|
-
const cur = spec.magnitudes[t];
|
|
505
|
-
const prev = spec.magnitudes[t - 1];
|
|
506
|
-
if (!cur || !prev) {
|
|
507
|
-
out[t] = 0;
|
|
508
|
-
continue;
|
|
509
|
-
}
|
|
510
|
-
let sum = 0;
|
|
511
|
-
for (let k = 0; k < nBins; k++) {
|
|
512
|
-
let a = cur[k] ?? 0;
|
|
513
|
-
let b = prev[k] ?? 0;
|
|
514
|
-
if (opts.useLog) {
|
|
515
|
-
a = logCompress(a);
|
|
516
|
-
b = logCompress(b);
|
|
517
|
-
}
|
|
518
|
-
const d = a - b;
|
|
519
|
-
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
520
|
-
}
|
|
521
|
-
out[t] = nBins > 0 ? sum / nBins : 0;
|
|
522
|
-
}
|
|
523
|
-
const smoothMs = opts.smoothMs;
|
|
524
|
-
if (smoothMs > 0 && nFrames >= 2) {
|
|
525
|
-
const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
|
|
526
|
-
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
527
|
-
return {
|
|
528
|
-
times: spec.times,
|
|
529
|
-
values: movingAverage(out, windowFrames | 1)
|
|
530
|
-
};
|
|
531
|
-
}
|
|
532
|
-
return { times: spec.times, values: out };
|
|
533
|
-
}
|
|
534
|
-
function onsetEnvelopeFromMel(mel, options) {
|
|
535
|
-
const opts = defaultOptions(options);
|
|
536
|
-
const nFrames = mel.times.length;
|
|
537
|
-
const out = new Float32Array(nFrames);
|
|
538
|
-
out[0] = 0;
|
|
539
|
-
for (let t = 1; t < nFrames; t++) {
|
|
540
|
-
const cur = mel.melBands[t];
|
|
541
|
-
const prev = mel.melBands[t - 1];
|
|
542
|
-
if (!cur || !prev) {
|
|
543
|
-
out[t] = 0;
|
|
544
|
-
continue;
|
|
545
|
-
}
|
|
546
|
-
const nBands = cur.length;
|
|
547
|
-
let sum = 0;
|
|
548
|
-
for (let m = 0; m < nBands; m++) {
|
|
549
|
-
let a = cur[m] ?? 0;
|
|
550
|
-
let b = prev[m] ?? 0;
|
|
551
|
-
if (opts.useLog) {
|
|
552
|
-
a = logCompress(a);
|
|
553
|
-
b = logCompress(b);
|
|
554
|
-
}
|
|
555
|
-
const d = a - b;
|
|
556
|
-
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
557
|
-
}
|
|
558
|
-
out[t] = nBands > 0 ? sum / nBands : 0;
|
|
559
|
-
}
|
|
560
|
-
const smoothMs = opts.smoothMs;
|
|
561
|
-
if (smoothMs > 0 && nFrames >= 2) {
|
|
562
|
-
const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
|
|
563
|
-
const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
|
|
564
|
-
return {
|
|
565
|
-
times: mel.times,
|
|
566
|
-
values: movingAverage(out, windowFrames | 1)
|
|
567
|
-
};
|
|
1008
|
+
out[t] = d;
|
|
568
1009
|
}
|
|
569
|
-
return { times:
|
|
1010
|
+
return { times: features.times, values: out };
|
|
570
1011
|
}
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
const nMels = mel.melBands[0]?.length ?? 0;
|
|
574
|
-
const melFlat = new Float32Array(nFrames * nMels);
|
|
575
|
-
for (let t = 0; t < nFrames; t++) {
|
|
576
|
-
const row = mel.melBands[t];
|
|
577
|
-
if (!row) continue;
|
|
578
|
-
melFlat.set(row, t * nMels);
|
|
579
|
-
}
|
|
580
|
-
const diffMethod = options?.diffMethod ?? "rectified";
|
|
581
|
-
const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
|
|
582
|
-
nFrames,
|
|
583
|
-
nMels,
|
|
584
|
-
melFlat,
|
|
585
|
-
diffMethod
|
|
586
|
-
});
|
|
587
|
-
return {
|
|
588
|
-
times: mel.times,
|
|
589
|
-
values: value.out,
|
|
590
|
-
gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
|
|
591
|
-
};
|
|
1012
|
+
function deltaDelta(features, options = {}) {
|
|
1013
|
+
return delta(delta(features, options), options);
|
|
592
1014
|
}
|
|
593
1015
|
|
|
594
1016
|
// src/dsp/peakPick.ts
|
|
595
|
-
function
|
|
1017
|
+
function meanStd2(values) {
|
|
596
1018
|
const n = values.length;
|
|
597
1019
|
if (n <= 0) return { mean: 0, std: 0 };
|
|
598
1020
|
let mean = 0;
|
|
@@ -629,7 +1051,7 @@ function peakPick(times, values, options = {}) {
|
|
|
629
1051
|
if (method === "median") {
|
|
630
1052
|
thr = median(values) * factor;
|
|
631
1053
|
} else {
|
|
632
|
-
const { mean, std } =
|
|
1054
|
+
const { mean, std } = meanStd2(values);
|
|
633
1055
|
thr = mean + factor * std;
|
|
634
1056
|
}
|
|
635
1057
|
}
|
|
@@ -1005,68 +1427,6 @@ async function hpssGpu(spec, gpu, options = {}) {
|
|
|
1005
1427
|
};
|
|
1006
1428
|
}
|
|
1007
1429
|
|
|
1008
|
-
// src/dsp/spectral.ts
|
|
1009
|
-
function spectralCentroid(spec) {
|
|
1010
|
-
const nFrames = spec.times.length;
|
|
1011
|
-
const out = new Float32Array(nFrames);
|
|
1012
|
-
const nBins = (spec.fftSize >>> 1) + 1;
|
|
1013
|
-
const binHz = spec.sampleRate / spec.fftSize;
|
|
1014
|
-
for (let t = 0; t < nFrames; t++) {
|
|
1015
|
-
const mags = spec.magnitudes[t];
|
|
1016
|
-
if (!mags) {
|
|
1017
|
-
out[t] = 0;
|
|
1018
|
-
continue;
|
|
1019
|
-
}
|
|
1020
|
-
let num = 0;
|
|
1021
|
-
let den = 0;
|
|
1022
|
-
for (let k = 0; k < nBins; k++) {
|
|
1023
|
-
const m = mags[k] ?? 0;
|
|
1024
|
-
const f = k * binHz;
|
|
1025
|
-
num += f * m;
|
|
1026
|
-
den += m;
|
|
1027
|
-
}
|
|
1028
|
-
out[t] = den > 0 ? num / den : 0;
|
|
1029
|
-
}
|
|
1030
|
-
return out;
|
|
1031
|
-
}
|
|
1032
|
-
function spectralFlux(spec) {
|
|
1033
|
-
const nFrames = spec.times.length;
|
|
1034
|
-
const out = new Float32Array(nFrames);
|
|
1035
|
-
const nBins = (spec.fftSize >>> 1) + 1;
|
|
1036
|
-
let prev = null;
|
|
1037
|
-
for (let t = 0; t < nFrames; t++) {
|
|
1038
|
-
const mags = spec.magnitudes[t];
|
|
1039
|
-
if (!mags) {
|
|
1040
|
-
out[t] = 0;
|
|
1041
|
-
prev = null;
|
|
1042
|
-
continue;
|
|
1043
|
-
}
|
|
1044
|
-
let sum = 0;
|
|
1045
|
-
for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
|
|
1046
|
-
if (sum <= 0) {
|
|
1047
|
-
out[t] = 0;
|
|
1048
|
-
prev = null;
|
|
1049
|
-
continue;
|
|
1050
|
-
}
|
|
1051
|
-
const cur = new Float32Array(nBins);
|
|
1052
|
-
const inv = 1 / sum;
|
|
1053
|
-
for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
|
|
1054
|
-
if (!prev) {
|
|
1055
|
-
out[t] = 0;
|
|
1056
|
-
prev = cur;
|
|
1057
|
-
continue;
|
|
1058
|
-
}
|
|
1059
|
-
let flux = 0;
|
|
1060
|
-
for (let k = 0; k < nBins; k++) {
|
|
1061
|
-
const d = (cur[k] ?? 0) - (prev[k] ?? 0);
|
|
1062
|
-
flux += Math.abs(d);
|
|
1063
|
-
}
|
|
1064
|
-
out[t] = flux;
|
|
1065
|
-
prev = cur;
|
|
1066
|
-
}
|
|
1067
|
-
return out;
|
|
1068
|
-
}
|
|
1069
|
-
|
|
1070
1430
|
// src/dsp/fft.ts
|
|
1071
1431
|
function hannWindow(size) {
|
|
1072
1432
|
const w = new Float32Array(size);
|
|
@@ -1207,6 +1567,452 @@ async function spectrogram(audio, config, gpu, options = {}) {
|
|
|
1207
1567
|
};
|
|
1208
1568
|
}
|
|
1209
1569
|
|
|
1570
|
+
// src/dsp/cqt.ts
|
|
1571
|
+
var CQT_DEFAULTS = {
|
|
1572
|
+
/** Quarter-tone resolution (24 bins per octave) */
|
|
1573
|
+
binsPerOctave: 24,
|
|
1574
|
+
/** C1 (lowest note on a standard piano) */
|
|
1575
|
+
fMin: 32.7,
|
|
1576
|
+
/** C9 (well above audible range for most content) */
|
|
1577
|
+
fMax: 8372
|
|
1578
|
+
};
|
|
1579
|
+
function cqtBinToHz(bin, config) {
|
|
1580
|
+
return config.fMin * Math.pow(2, bin / config.binsPerOctave);
|
|
1581
|
+
}
|
|
1582
|
+
function hzToCqtBin(hz, config) {
|
|
1583
|
+
if (hz <= 0) return -Infinity;
|
|
1584
|
+
return config.binsPerOctave * Math.log2(hz / config.fMin);
|
|
1585
|
+
}
|
|
1586
|
+
function getNumOctaves(config) {
|
|
1587
|
+
return Math.log2(config.fMax / config.fMin);
|
|
1588
|
+
}
|
|
1589
|
+
function getNumBins(config) {
|
|
1590
|
+
const nOctaves = getNumOctaves(config);
|
|
1591
|
+
return Math.ceil(nOctaves * config.binsPerOctave);
|
|
1592
|
+
}
|
|
1593
|
+
function getCqtBinFrequencies(config) {
|
|
1594
|
+
const nBins = getNumBins(config);
|
|
1595
|
+
const freqs = new Float32Array(nBins);
|
|
1596
|
+
for (let k = 0; k < nBins; k++) {
|
|
1597
|
+
freqs[k] = cqtBinToHz(k, config);
|
|
1598
|
+
}
|
|
1599
|
+
return freqs;
|
|
1600
|
+
}
|
|
1601
|
+
var kernelBankCache = /* @__PURE__ */ new Map();
|
|
1602
|
+
function kernelCacheKey(config, fftSize, sampleRate) {
|
|
1603
|
+
return `${config.binsPerOctave}:${config.fMin}:${config.fMax}:${fftSize}:${sampleRate}`;
|
|
1604
|
+
}
|
|
1605
|
+
function createCqtKernel(binIndex, config, fftSize, sampleRate) {
|
|
1606
|
+
const centerFreq = cqtBinToHz(binIndex, config);
|
|
1607
|
+
const freqResolution = sampleRate / fftSize;
|
|
1608
|
+
const Q = 1 / (Math.pow(2, 1 / config.binsPerOctave) - 1);
|
|
1609
|
+
const bandwidth = centerFreq / Q;
|
|
1610
|
+
const fLow = centerFreq - bandwidth / 2;
|
|
1611
|
+
const fHigh = centerFreq + bandwidth / 2;
|
|
1612
|
+
const startBin = Math.max(0, Math.floor(fLow / freqResolution));
|
|
1613
|
+
const endBin = Math.min(
|
|
1614
|
+
Math.floor(fftSize / 2) + 1,
|
|
1615
|
+
Math.ceil(fHigh / freqResolution) + 1
|
|
1616
|
+
);
|
|
1617
|
+
const numBins = Math.max(1, endBin - startBin);
|
|
1618
|
+
const weights = new Float32Array(numBins);
|
|
1619
|
+
for (let i = 0; i < numBins; i++) {
|
|
1620
|
+
const binFreq = (startBin + i) * freqResolution;
|
|
1621
|
+
if (binFreq <= centerFreq) {
|
|
1622
|
+
if (centerFreq > fLow) {
|
|
1623
|
+
weights[i] = (binFreq - fLow) / (centerFreq - fLow);
|
|
1624
|
+
} else {
|
|
1625
|
+
weights[i] = 1;
|
|
1626
|
+
}
|
|
1627
|
+
} else {
|
|
1628
|
+
if (fHigh > centerFreq) {
|
|
1629
|
+
weights[i] = (fHigh - binFreq) / (fHigh - centerFreq);
|
|
1630
|
+
} else {
|
|
1631
|
+
weights[i] = 1;
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
weights[i] = Math.max(0, Math.min(1, weights[i] ?? 0));
|
|
1635
|
+
}
|
|
1636
|
+
let sum = 0;
|
|
1637
|
+
for (let i = 0; i < numBins; i++) {
|
|
1638
|
+
sum += weights[i] ?? 0;
|
|
1639
|
+
}
|
|
1640
|
+
if (sum > 0) {
|
|
1641
|
+
for (let i = 0; i < numBins; i++) {
|
|
1642
|
+
weights[i] = (weights[i] ?? 0) / sum;
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
return {
|
|
1646
|
+
centerFreq,
|
|
1647
|
+
startBin,
|
|
1648
|
+
endBin,
|
|
1649
|
+
weights
|
|
1650
|
+
};
|
|
1651
|
+
}
|
|
1652
|
+
function getCqtKernelBank(config, fftSize, sampleRate) {
|
|
1653
|
+
const key = kernelCacheKey(config, fftSize, sampleRate);
|
|
1654
|
+
const cached = kernelBankCache.get(key);
|
|
1655
|
+
if (cached) return cached;
|
|
1656
|
+
const nBins = getNumBins(config);
|
|
1657
|
+
const kernels = new Array(nBins);
|
|
1658
|
+
for (let k = 0; k < nBins; k++) {
|
|
1659
|
+
kernels[k] = createCqtKernel(k, config, fftSize, sampleRate);
|
|
1660
|
+
}
|
|
1661
|
+
const bank = {
|
|
1662
|
+
config,
|
|
1663
|
+
fftSize,
|
|
1664
|
+
sampleRate,
|
|
1665
|
+
kernels
|
|
1666
|
+
};
|
|
1667
|
+
kernelBankCache.set(key, bank);
|
|
1668
|
+
return bank;
|
|
1669
|
+
}
|
|
1670
|
+
function applyCqtKernels(stftMagnitudes, kernelBank) {
|
|
1671
|
+
const nCqtBins = kernelBank.kernels.length;
|
|
1672
|
+
const cqtMagnitudes = new Float32Array(nCqtBins);
|
|
1673
|
+
for (let k = 0; k < nCqtBins; k++) {
|
|
1674
|
+
const kernel = kernelBank.kernels[k];
|
|
1675
|
+
if (!kernel) continue;
|
|
1676
|
+
let sum = 0;
|
|
1677
|
+
for (let i = 0; i < kernel.weights.length; i++) {
|
|
1678
|
+
const stftBin = kernel.startBin + i;
|
|
1679
|
+
const stftMag = stftMagnitudes[stftBin] ?? 0;
|
|
1680
|
+
const weight = kernel.weights[i] ?? 0;
|
|
1681
|
+
sum += stftMag * weight;
|
|
1682
|
+
}
|
|
1683
|
+
cqtMagnitudes[k] = sum;
|
|
1684
|
+
}
|
|
1685
|
+
return cqtMagnitudes;
|
|
1686
|
+
}
|
|
1687
|
+
function withCqtDefaults(partial) {
|
|
1688
|
+
return {
|
|
1689
|
+
binsPerOctave: partial?.binsPerOctave ?? CQT_DEFAULTS.binsPerOctave,
|
|
1690
|
+
fMin: partial?.fMin ?? CQT_DEFAULTS.fMin,
|
|
1691
|
+
fMax: partial?.fMax ?? CQT_DEFAULTS.fMax,
|
|
1692
|
+
hopSize: partial?.hopSize
|
|
1693
|
+
};
|
|
1694
|
+
}
|
|
1695
|
+
async function cqtSpectrogram(audio, config, options = {}) {
|
|
1696
|
+
const sampleRate = audio.sampleRate;
|
|
1697
|
+
if (config.fMin <= 0) {
|
|
1698
|
+
throw new Error("@octoseq/mir: CQT fMin must be positive");
|
|
1699
|
+
}
|
|
1700
|
+
if (config.fMax <= config.fMin) {
|
|
1701
|
+
throw new Error("@octoseq/mir: CQT fMax must be greater than fMin");
|
|
1702
|
+
}
|
|
1703
|
+
if (config.binsPerOctave <= 0) {
|
|
1704
|
+
throw new Error("@octoseq/mir: CQT binsPerOctave must be positive");
|
|
1705
|
+
}
|
|
1706
|
+
const Q = 1 / (Math.pow(2, 1 / config.binsPerOctave) - 1);
|
|
1707
|
+
const minFreqResolution = config.fMin / Q / 2;
|
|
1708
|
+
const minFftSize = Math.ceil(sampleRate / minFreqResolution);
|
|
1709
|
+
let fftSize = 1;
|
|
1710
|
+
while (fftSize < minFftSize) {
|
|
1711
|
+
fftSize *= 2;
|
|
1712
|
+
}
|
|
1713
|
+
fftSize = Math.min(fftSize, 16384);
|
|
1714
|
+
const hopSize = config.hopSize ?? Math.floor(fftSize / 4);
|
|
1715
|
+
const stft = await spectrogram(
|
|
1716
|
+
audio,
|
|
1717
|
+
{ fftSize, hopSize, window: "hann" },
|
|
1718
|
+
void 0,
|
|
1719
|
+
{ isCancelled: options.isCancelled }
|
|
1720
|
+
);
|
|
1721
|
+
const kernelBank = getCqtKernelBank(config, fftSize, sampleRate);
|
|
1722
|
+
const nFrames = stft.magnitudes.length;
|
|
1723
|
+
const cqtMagnitudes = new Array(nFrames);
|
|
1724
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1725
|
+
if (options.isCancelled?.()) {
|
|
1726
|
+
throw new Error("@octoseq/mir: cancelled");
|
|
1727
|
+
}
|
|
1728
|
+
const stftFrame = stft.magnitudes[frame];
|
|
1729
|
+
if (!stftFrame) continue;
|
|
1730
|
+
cqtMagnitudes[frame] = applyCqtKernels(stftFrame, kernelBank);
|
|
1731
|
+
}
|
|
1732
|
+
const nOctaves = getNumOctaves(config);
|
|
1733
|
+
getNumBins(config);
|
|
1734
|
+
return {
|
|
1735
|
+
sampleRate,
|
|
1736
|
+
config,
|
|
1737
|
+
times: stft.times,
|
|
1738
|
+
magnitudes: cqtMagnitudes,
|
|
1739
|
+
nOctaves,
|
|
1740
|
+
binsPerOctave: config.binsPerOctave,
|
|
1741
|
+
binFrequencies: getCqtBinFrequencies(config)
|
|
1742
|
+
};
|
|
1743
|
+
}
|
|
1744
|
+
async function computeCqt(audio, config, options = {}) {
|
|
1745
|
+
const startTime = performance.now();
|
|
1746
|
+
const fullConfig = withCqtDefaults(config);
|
|
1747
|
+
const cqt = await cqtSpectrogram(audio, fullConfig, options);
|
|
1748
|
+
const endTime = performance.now();
|
|
1749
|
+
return {
|
|
1750
|
+
cqt,
|
|
1751
|
+
meta: {
|
|
1752
|
+
backend: "cpu",
|
|
1753
|
+
usedGpu: false,
|
|
1754
|
+
timings: {
|
|
1755
|
+
totalMs: endTime - startTime,
|
|
1756
|
+
cpuMs: endTime - startTime
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
};
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1762
|
+
// src/dsp/cqtSignals.ts
|
|
1763
|
+
var BASS_MIN_HZ = 20;
|
|
1764
|
+
var BASS_MAX_HZ = 300;
|
|
1765
|
+
var TONAL_STABILITY_WINDOW_FRAMES = 20;
|
|
1766
|
+
var CHROMA_BINS = 12;
|
|
1767
|
+
function normalizeMinMax(values) {
|
|
1768
|
+
let min = Infinity;
|
|
1769
|
+
let max = -Infinity;
|
|
1770
|
+
for (let i = 0; i < values.length; i++) {
|
|
1771
|
+
const v = values[i] ?? 0;
|
|
1772
|
+
if (v < min) min = v;
|
|
1773
|
+
if (v > max) max = v;
|
|
1774
|
+
}
|
|
1775
|
+
const range = max - min;
|
|
1776
|
+
const result = new Float32Array(values.length);
|
|
1777
|
+
if (range > 0) {
|
|
1778
|
+
for (let i = 0; i < values.length; i++) {
|
|
1779
|
+
result[i] = ((values[i] ?? 0) - min) / range;
|
|
1780
|
+
}
|
|
1781
|
+
} else {
|
|
1782
|
+
result.fill(0.5);
|
|
1783
|
+
}
|
|
1784
|
+
return result;
|
|
1785
|
+
}
|
|
1786
|
+
function weightedCentroid(values, startIndex = 0) {
|
|
1787
|
+
let sumWeighted = 0;
|
|
1788
|
+
let sumWeights = 0;
|
|
1789
|
+
for (let i = 0; i < values.length; i++) {
|
|
1790
|
+
const weight = values[i] ?? 0;
|
|
1791
|
+
sumWeighted += (startIndex + i) * weight;
|
|
1792
|
+
sumWeights += weight;
|
|
1793
|
+
}
|
|
1794
|
+
return sumWeights > 0 ? sumWeighted / sumWeights : startIndex + values.length / 2;
|
|
1795
|
+
}
|
|
1796
|
+
function computeHarmonicEnergyFrame(frame, cqt) {
|
|
1797
|
+
if (frame.length === 0) return 0;
|
|
1798
|
+
let totalEnergy = 0;
|
|
1799
|
+
for (let i = 0; i < frame.length; i++) {
|
|
1800
|
+
const mag = frame[i] ?? 0;
|
|
1801
|
+
totalEnergy += mag * mag;
|
|
1802
|
+
}
|
|
1803
|
+
if (totalEnergy === 0) return 0;
|
|
1804
|
+
let maxMag = 0;
|
|
1805
|
+
let fundamentalBin = 0;
|
|
1806
|
+
for (let i = 0; i < frame.length; i++) {
|
|
1807
|
+
const mag = frame[i] ?? 0;
|
|
1808
|
+
if (mag > maxMag) {
|
|
1809
|
+
maxMag = mag;
|
|
1810
|
+
fundamentalBin = i;
|
|
1811
|
+
}
|
|
1812
|
+
}
|
|
1813
|
+
const fundamentalFreq = cqtBinToHz(fundamentalBin, cqt.config);
|
|
1814
|
+
let harmonicEnergy2 = 0;
|
|
1815
|
+
const numHarmonics = 6;
|
|
1816
|
+
for (let h = 1; h <= numHarmonics; h++) {
|
|
1817
|
+
const harmonicFreq = fundamentalFreq * h;
|
|
1818
|
+
const harmonicBin = Math.round(hzToCqtBin(harmonicFreq, cqt.config));
|
|
1819
|
+
if (harmonicBin >= 0 && harmonicBin < frame.length) {
|
|
1820
|
+
const mag = frame[harmonicBin] ?? 0;
|
|
1821
|
+
const weight = 1 / h;
|
|
1822
|
+
harmonicEnergy2 += mag * mag * weight;
|
|
1823
|
+
}
|
|
1824
|
+
}
|
|
1825
|
+
let weightSum = 0;
|
|
1826
|
+
for (let h = 1; h <= numHarmonics; h++) {
|
|
1827
|
+
weightSum += 1 / h;
|
|
1828
|
+
}
|
|
1829
|
+
harmonicEnergy2 /= weightSum;
|
|
1830
|
+
return Math.min(1, harmonicEnergy2 / totalEnergy);
|
|
1831
|
+
}
|
|
1832
|
+
function harmonicEnergy(cqt) {
|
|
1833
|
+
const startTime = performance.now();
|
|
1834
|
+
const nFrames = cqt.magnitudes.length;
|
|
1835
|
+
const values = new Float32Array(nFrames);
|
|
1836
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1837
|
+
const cqtFrame = cqt.magnitudes[frame];
|
|
1838
|
+
if (cqtFrame) {
|
|
1839
|
+
values[frame] = computeHarmonicEnergyFrame(cqtFrame, cqt);
|
|
1840
|
+
}
|
|
1841
|
+
}
|
|
1842
|
+
const normalized = normalizeMinMax(values);
|
|
1843
|
+
const endTime = performance.now();
|
|
1844
|
+
return {
|
|
1845
|
+
kind: "cqt1d",
|
|
1846
|
+
signalId: "harmonicEnergy",
|
|
1847
|
+
times: cqt.times,
|
|
1848
|
+
values: normalized,
|
|
1849
|
+
meta: {
|
|
1850
|
+
backend: "cpu",
|
|
1851
|
+
usedGpu: false,
|
|
1852
|
+
timings: {
|
|
1853
|
+
totalMs: endTime - startTime,
|
|
1854
|
+
cpuMs: endTime - startTime
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
};
|
|
1858
|
+
}
|
|
1859
|
+
function bassPitchMotion(cqt) {
|
|
1860
|
+
const startTime = performance.now();
|
|
1861
|
+
const nFrames = cqt.magnitudes.length;
|
|
1862
|
+
const bassStartBin = Math.max(0, Math.floor(hzToCqtBin(BASS_MIN_HZ, cqt.config)));
|
|
1863
|
+
const bassEndBin = Math.min(
|
|
1864
|
+
cqt.magnitudes[0]?.length ?? 0,
|
|
1865
|
+
Math.ceil(hzToCqtBin(BASS_MAX_HZ, cqt.config))
|
|
1866
|
+
);
|
|
1867
|
+
const bassNumBins = bassEndBin - bassStartBin;
|
|
1868
|
+
if (bassNumBins <= 0) {
|
|
1869
|
+
return {
|
|
1870
|
+
kind: "cqt1d",
|
|
1871
|
+
signalId: "bassPitchMotion",
|
|
1872
|
+
times: cqt.times,
|
|
1873
|
+
values: new Float32Array(nFrames),
|
|
1874
|
+
meta: {
|
|
1875
|
+
backend: "cpu",
|
|
1876
|
+
usedGpu: false,
|
|
1877
|
+
timings: { totalMs: 0, cpuMs: 0 }
|
|
1878
|
+
}
|
|
1879
|
+
};
|
|
1880
|
+
}
|
|
1881
|
+
const centroids = new Float32Array(nFrames);
|
|
1882
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1883
|
+
const cqtFrame = cqt.magnitudes[frame];
|
|
1884
|
+
if (!cqtFrame) continue;
|
|
1885
|
+
const bassBins = new Float32Array(bassNumBins);
|
|
1886
|
+
for (let i = 0; i < bassNumBins; i++) {
|
|
1887
|
+
bassBins[i] = cqtFrame[bassStartBin + i] ?? 0;
|
|
1888
|
+
}
|
|
1889
|
+
centroids[frame] = weightedCentroid(bassBins, bassStartBin);
|
|
1890
|
+
}
|
|
1891
|
+
const motion = new Float32Array(nFrames);
|
|
1892
|
+
for (let frame = 1; frame < nFrames; frame++) {
|
|
1893
|
+
motion[frame] = Math.abs((centroids[frame] ?? 0) - (centroids[frame - 1] ?? 0));
|
|
1894
|
+
}
|
|
1895
|
+
motion[0] = motion[1] ?? 0;
|
|
1896
|
+
const normalized = normalizeMinMax(motion);
|
|
1897
|
+
const endTime = performance.now();
|
|
1898
|
+
return {
|
|
1899
|
+
kind: "cqt1d",
|
|
1900
|
+
signalId: "bassPitchMotion",
|
|
1901
|
+
times: cqt.times,
|
|
1902
|
+
values: normalized,
|
|
1903
|
+
meta: {
|
|
1904
|
+
backend: "cpu",
|
|
1905
|
+
usedGpu: false,
|
|
1906
|
+
timings: {
|
|
1907
|
+
totalMs: endTime - startTime,
|
|
1908
|
+
cpuMs: endTime - startTime
|
|
1909
|
+
}
|
|
1910
|
+
}
|
|
1911
|
+
};
|
|
1912
|
+
}
|
|
1913
|
+
function computeChroma(frame, binsPerOctave) {
|
|
1914
|
+
const chroma = new Float32Array(CHROMA_BINS);
|
|
1915
|
+
const binsPerSemitone = binsPerOctave / CHROMA_BINS;
|
|
1916
|
+
for (let i = 0; i < frame.length; i++) {
|
|
1917
|
+
const chromaBin = Math.floor(i % binsPerOctave / binsPerSemitone) % CHROMA_BINS;
|
|
1918
|
+
const mag = frame[i] ?? 0;
|
|
1919
|
+
chroma[chromaBin] = (chroma[chromaBin] ?? 0) + mag * mag;
|
|
1920
|
+
}
|
|
1921
|
+
let sum = 0;
|
|
1922
|
+
for (let i = 0; i < CHROMA_BINS; i++) {
|
|
1923
|
+
sum += chroma[i] ?? 0;
|
|
1924
|
+
}
|
|
1925
|
+
if (sum > 0) {
|
|
1926
|
+
for (let i = 0; i < CHROMA_BINS; i++) {
|
|
1927
|
+
chroma[i] = (chroma[i] ?? 0) / sum;
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
return chroma;
|
|
1931
|
+
}
|
|
1932
|
+
function tonalStability(cqt) {
|
|
1933
|
+
const startTime = performance.now();
|
|
1934
|
+
const nFrames = cqt.magnitudes.length;
|
|
1935
|
+
const chromas = new Array(nFrames);
|
|
1936
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1937
|
+
const cqtFrame = cqt.magnitudes[frame];
|
|
1938
|
+
if (cqtFrame) {
|
|
1939
|
+
chromas[frame] = computeChroma(cqtFrame, cqt.binsPerOctave);
|
|
1940
|
+
} else {
|
|
1941
|
+
chromas[frame] = new Float32Array(CHROMA_BINS);
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
const halfWindow = Math.floor(TONAL_STABILITY_WINDOW_FRAMES / 2);
|
|
1945
|
+
const instability = new Float32Array(nFrames);
|
|
1946
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1947
|
+
const windowStart = Math.max(0, frame - halfWindow);
|
|
1948
|
+
const windowEnd = Math.min(nFrames, frame + halfWindow + 1);
|
|
1949
|
+
const windowSize = windowEnd - windowStart;
|
|
1950
|
+
const avgChroma = new Float32Array(CHROMA_BINS);
|
|
1951
|
+
for (let w = windowStart; w < windowEnd; w++) {
|
|
1952
|
+
const chroma = chromas[w];
|
|
1953
|
+
if (chroma) {
|
|
1954
|
+
for (let c = 0; c < CHROMA_BINS; c++) {
|
|
1955
|
+
avgChroma[c] = (avgChroma[c] ?? 0) + (chroma[c] ?? 0);
|
|
1956
|
+
}
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
for (let c = 0; c < CHROMA_BINS; c++) {
|
|
1960
|
+
avgChroma[c] = (avgChroma[c] ?? 0) / windowSize;
|
|
1961
|
+
}
|
|
1962
|
+
let totalVariance = 0;
|
|
1963
|
+
for (let w = windowStart; w < windowEnd; w++) {
|
|
1964
|
+
const chroma = chromas[w];
|
|
1965
|
+
if (chroma) {
|
|
1966
|
+
for (let c = 0; c < CHROMA_BINS; c++) {
|
|
1967
|
+
const diff = (chroma[c] ?? 0) - (avgChroma[c] ?? 0);
|
|
1968
|
+
totalVariance += diff * diff;
|
|
1969
|
+
}
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
totalVariance /= windowSize * CHROMA_BINS;
|
|
1973
|
+
instability[frame] = totalVariance;
|
|
1974
|
+
}
|
|
1975
|
+
const normalizedInstability = normalizeMinMax(instability);
|
|
1976
|
+
const stability = new Float32Array(nFrames);
|
|
1977
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1978
|
+
stability[frame] = 1 - (normalizedInstability[frame] ?? 0);
|
|
1979
|
+
}
|
|
1980
|
+
const endTime = performance.now();
|
|
1981
|
+
return {
|
|
1982
|
+
kind: "cqt1d",
|
|
1983
|
+
signalId: "tonalStability",
|
|
1984
|
+
times: cqt.times,
|
|
1985
|
+
values: stability,
|
|
1986
|
+
meta: {
|
|
1987
|
+
backend: "cpu",
|
|
1988
|
+
usedGpu: false,
|
|
1989
|
+
timings: {
|
|
1990
|
+
totalMs: endTime - startTime,
|
|
1991
|
+
cpuMs: endTime - startTime
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
};
|
|
1995
|
+
}
|
|
1996
|
+
function computeCqtSignal(cqt, signalId) {
|
|
1997
|
+
switch (signalId) {
|
|
1998
|
+
case "harmonicEnergy":
|
|
1999
|
+
return harmonicEnergy(cqt);
|
|
2000
|
+
case "bassPitchMotion":
|
|
2001
|
+
return bassPitchMotion(cqt);
|
|
2002
|
+
case "tonalStability":
|
|
2003
|
+
return tonalStability(cqt);
|
|
2004
|
+
default:
|
|
2005
|
+
throw new Error(`@octoseq/mir: unknown CQT signal ID: ${signalId}`);
|
|
2006
|
+
}
|
|
2007
|
+
}
|
|
2008
|
+
function computeAllCqtSignals(cqt) {
|
|
2009
|
+
const results = /* @__PURE__ */ new Map();
|
|
2010
|
+
results.set("harmonicEnergy", harmonicEnergy(cqt));
|
|
2011
|
+
results.set("bassPitchMotion", bassPitchMotion(cqt));
|
|
2012
|
+
results.set("tonalStability", tonalStability(cqt));
|
|
2013
|
+
return results;
|
|
2014
|
+
}
|
|
2015
|
+
|
|
1210
2016
|
// src/runner/runMir.ts
|
|
1211
2017
|
function nowMs2() {
|
|
1212
2018
|
return typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
@@ -1400,6 +2206,66 @@ async function runMir(audio, request, options = {}) {
|
|
|
1400
2206
|
}
|
|
1401
2207
|
};
|
|
1402
2208
|
}
|
|
2209
|
+
if (request.fn === "beatCandidates") {
|
|
2210
|
+
const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
2211
|
+
const beatOpts = request.beatCandidates ?? {};
|
|
2212
|
+
const result = detectBeatCandidates(mel2, spec, {
|
|
2213
|
+
minIntervalSec: beatOpts.minIntervalSec,
|
|
2214
|
+
thresholdFactor: beatOpts.thresholdFactor,
|
|
2215
|
+
smoothMs: beatOpts.smoothMs
|
|
2216
|
+
});
|
|
2217
|
+
const end2 = nowMs2();
|
|
2218
|
+
return {
|
|
2219
|
+
kind: "beatCandidates",
|
|
2220
|
+
times: result.salience.times,
|
|
2221
|
+
candidates: result.candidates,
|
|
2222
|
+
salience: beatOpts.includeSalience ? result.salience : void 0,
|
|
2223
|
+
meta: {
|
|
2224
|
+
backend: "cpu",
|
|
2225
|
+
usedGpu: false,
|
|
2226
|
+
timings: {
|
|
2227
|
+
totalMs: end2 - t0,
|
|
2228
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs
|
|
2229
|
+
}
|
|
2230
|
+
}
|
|
2231
|
+
};
|
|
2232
|
+
}
|
|
2233
|
+
if (request.fn === "tempoHypotheses") {
|
|
2234
|
+
const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
2235
|
+
const beatOpts = request.beatCandidates ?? {};
|
|
2236
|
+
const beatResult = detectBeatCandidates(mel2, spec, {
|
|
2237
|
+
minIntervalSec: beatOpts.minIntervalSec,
|
|
2238
|
+
thresholdFactor: beatOpts.thresholdFactor,
|
|
2239
|
+
smoothMs: beatOpts.smoothMs
|
|
2240
|
+
});
|
|
2241
|
+
const tempoStart = nowMs2();
|
|
2242
|
+
const tempoOpts = request.tempoHypotheses ?? {};
|
|
2243
|
+
const result = generateTempoHypotheses(beatResult.candidates, {
|
|
2244
|
+
minBpm: tempoOpts.minBpm,
|
|
2245
|
+
maxBpm: tempoOpts.maxBpm,
|
|
2246
|
+
binSizeBpm: tempoOpts.binSizeBpm,
|
|
2247
|
+
maxHypotheses: tempoOpts.maxHypotheses,
|
|
2248
|
+
minConfidence: tempoOpts.minConfidence,
|
|
2249
|
+
weightByStrength: tempoOpts.weightByStrength,
|
|
2250
|
+
includeHistogram: tempoOpts.includeHistogram
|
|
2251
|
+
});
|
|
2252
|
+
const end2 = nowMs2();
|
|
2253
|
+
return {
|
|
2254
|
+
kind: "tempoHypotheses",
|
|
2255
|
+
times: spec.times,
|
|
2256
|
+
hypotheses: result.hypotheses,
|
|
2257
|
+
inputCandidateCount: result.inputCandidateCount,
|
|
2258
|
+
histogram: result.histogram,
|
|
2259
|
+
meta: {
|
|
2260
|
+
backend: "cpu",
|
|
2261
|
+
usedGpu: false,
|
|
2262
|
+
timings: {
|
|
2263
|
+
totalMs: end2 - t0,
|
|
2264
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs + (end2 - tempoStart)
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
};
|
|
2268
|
+
}
|
|
1403
2269
|
if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
|
|
1404
2270
|
const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
|
|
1405
2271
|
const needsHpssSpec = hpssSpecConfig.fftSize !== specConfig.fftSize || hpssSpecConfig.hopSize !== specConfig.hopSize;
|
|
@@ -1502,6 +2368,36 @@ async function runMir(audio, request, options = {}) {
|
|
|
1502
2368
|
}
|
|
1503
2369
|
};
|
|
1504
2370
|
}
|
|
2371
|
+
if (request.fn === "cqtHarmonicEnergy" || request.fn === "cqtBassPitchMotion" || request.fn === "cqtTonalStability") {
|
|
2372
|
+
const cqtStart = nowMs2();
|
|
2373
|
+
const cqtConfig = withCqtDefaults(request.cqt);
|
|
2374
|
+
const cqt = await cqtSpectrogram(asAudioBufferLike(audio), cqtConfig, {
|
|
2375
|
+
isCancelled: options.isCancelled
|
|
2376
|
+
});
|
|
2377
|
+
const cqtEnd = nowMs2();
|
|
2378
|
+
let signal;
|
|
2379
|
+
if (request.fn === "cqtHarmonicEnergy") {
|
|
2380
|
+
signal = harmonicEnergy(cqt);
|
|
2381
|
+
} else if (request.fn === "cqtBassPitchMotion") {
|
|
2382
|
+
signal = bassPitchMotion(cqt);
|
|
2383
|
+
} else {
|
|
2384
|
+
signal = tonalStability(cqt);
|
|
2385
|
+
}
|
|
2386
|
+
const end2 = nowMs2();
|
|
2387
|
+
return {
|
|
2388
|
+
kind: "1d",
|
|
2389
|
+
times: signal.times,
|
|
2390
|
+
values: signal.values,
|
|
2391
|
+
meta: {
|
|
2392
|
+
backend: "cpu",
|
|
2393
|
+
usedGpu: false,
|
|
2394
|
+
timings: {
|
|
2395
|
+
totalMs: end2 - t0,
|
|
2396
|
+
cpuMs: cqtEnd - cqtStart + (end2 - cqtEnd)
|
|
2397
|
+
}
|
|
2398
|
+
}
|
|
2399
|
+
};
|
|
2400
|
+
}
|
|
1505
2401
|
const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
|
|
1506
2402
|
const end = nowMs2();
|
|
1507
2403
|
return {
|
|
@@ -1520,6 +2416,6 @@ async function runMir(audio, request, options = {}) {
|
|
|
1520
2416
|
};
|
|
1521
2417
|
}
|
|
1522
2418
|
|
|
1523
|
-
export { delta, deltaDelta, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
|
|
1524
|
-
//# sourceMappingURL=chunk-
|
|
1525
|
-
//# sourceMappingURL=chunk-
|
|
2419
|
+
export { CQT_DEFAULTS, bassPitchMotion, beatSalienceFromMel, computeAllCqtSignals, computeCqt, computeCqtSignal, cqtBinToHz, cqtSpectrogram, delta, deltaDelta, detectBeatCandidates, featureIndexToHz, generateTempoHypotheses, getCqtBinFrequencies, getNumBins, getNumOctaves, harmonicEnergy, hpss, hzToCqtBin, hzToFeatureIndex, hzToMel, melSpectrogram, melToHz, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram, tonalStability, withCqtDefaults };
|
|
2420
|
+
//# sourceMappingURL=chunk-OLIDGECY.js.map
|
|
2421
|
+
//# sourceMappingURL=chunk-OLIDGECY.js.map
|