@octoseq/mir 0.1.0-main.e2ea119 → 0.1.0-main.ef9b77a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,6 +53,646 @@ async function submitAndReadback(gpu, encoder, outBuffer, readback, byteLength)
53
53
  };
54
54
  }
55
55
 
56
+ // src/gpu/kernels/onsetEnvelope.wgsl.ts
57
+ var onsetEnvelopeWGSL = (
58
+ /* wgsl */
59
+ `
60
+ // Compute onset strength envelope from a (log) mel spectrogram.
61
+ //
62
+ // Input layout: melFlat[t*nMels + m]
63
+ // Output layout: out[t]
64
+ //
65
+ // We compute novelty per frame:
66
+ // novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
67
+ // or sum_m abs(...)
68
+ //
69
+ // One invocation computes one frame index (t). This is memory-bound but reduces a full
70
+ // (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
71
+
72
+ struct Params {
73
+ nMels: u32,
74
+ nFrames: u32,
75
+ diffMethod: u32, // 0=rectified, 1=abs
76
+ _pad: u32,
77
+ };
78
+
79
+ @group(0) @binding(0) var<storage, read> melFlat: array<f32>;
80
+ @group(0) @binding(1) var<storage, read_write> out: array<f32>;
81
+ @group(0) @binding(2) var<uniform> params: Params;
82
+
83
+ @compute @workgroup_size(256)
84
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
85
+ let t = gid.x;
86
+ if (t >= params.nFrames) { return; }
87
+
88
+ if (t == 0u) {
89
+ out[t] = 0.0;
90
+ return;
91
+ }
92
+
93
+ let nMels = params.nMels;
94
+ var sum: f32 = 0.0;
95
+
96
+ // Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
97
+ // (Future optimisation: parallelise reduction within workgroup.)
98
+ for (var m: u32 = 0u; m < nMels; m = m + 1u) {
99
+ let a = melFlat[t * nMels + m];
100
+ let b = melFlat[(t - 1u) * nMels + m];
101
+ let d = a - b;
102
+
103
+ if (params.diffMethod == 1u) {
104
+ // abs
105
+ sum = sum + abs(d);
106
+ } else {
107
+ // rectified
108
+ sum = sum + max(0.0, d);
109
+ }
110
+ }
111
+
112
+ out[t] = sum / max(1.0, f32(nMels));
113
+ }
114
+ `
115
+ );
116
+
117
+ // src/gpu/onsetEnvelope.ts
118
+ async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
119
+ const { device } = gpu;
120
+ const { nFrames, nMels, melFlat, diffMethod } = input;
121
+ if (melFlat.length !== nFrames * nMels) {
122
+ throw new Error("@octoseq/mir: melFlat length mismatch");
123
+ }
124
+ const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
125
+ const outByteLen = byteSizeF32(nFrames);
126
+ const outBuffer = createStorageOutBuffer(gpu, outByteLen);
127
+ const readback = createReadbackBuffer(gpu, outByteLen);
128
+ const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
129
+ const pipeline = device.createComputePipeline({
130
+ layout: "auto",
131
+ compute: { module: shader, entryPoint: "main" }
132
+ });
133
+ const diffU32 = diffMethod === "abs" ? 1 : 0;
134
+ const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
135
+ const bindGroup = device.createBindGroup({
136
+ layout: pipeline.getBindGroupLayout(0),
137
+ entries: [
138
+ { binding: 0, resource: { buffer: melBuffer } },
139
+ { binding: 1, resource: { buffer: outBuffer } },
140
+ { binding: 2, resource: { buffer: params } }
141
+ ]
142
+ });
143
+ const encoder = device.createCommandEncoder();
144
+ const pass = encoder.beginComputePass();
145
+ pass.setPipeline(pipeline);
146
+ pass.setBindGroup(0, bindGroup);
147
+ const wg = Math.ceil(nFrames / 256);
148
+ pass.dispatchWorkgroups(wg);
149
+ pass.end();
150
+ const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
151
+ melBuffer.destroy();
152
+ outBuffer.destroy();
153
+ params.destroy();
154
+ readback.destroy();
155
+ return {
156
+ value: { out: new Float32Array(bytes) },
157
+ timing
158
+ };
159
+ }
160
+
161
+ // src/dsp/onset.ts
162
+ function movingAverage(values, windowFrames) {
163
+ if (windowFrames <= 1) return values;
164
+ const n = values.length;
165
+ const out = new Float32Array(n);
166
+ const half = Math.floor(windowFrames / 2);
167
+ const prefix = new Float64Array(n + 1);
168
+ prefix[0] = 0;
169
+ for (let i = 0; i < n; i++) {
170
+ prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
171
+ }
172
+ for (let i = 0; i < n; i++) {
173
+ const start = Math.max(0, i - half);
174
+ const end = Math.min(n, i + half + 1);
175
+ const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
176
+ const count = Math.max(1, end - start);
177
+ out[i] = sum / count;
178
+ }
179
+ return out;
180
+ }
181
+ function defaultOptions(opts) {
182
+ return {
183
+ useLog: opts?.useLog ?? false,
184
+ smoothMs: opts?.smoothMs ?? 30,
185
+ diffMethod: opts?.diffMethod ?? "rectified"
186
+ };
187
+ }
188
+ function logCompress(x) {
189
+ return Math.log1p(Math.max(0, x));
190
+ }
191
+ function onsetEnvelopeFromSpectrogram(spec, options) {
192
+ const opts = defaultOptions(options);
193
+ const nFrames = spec.times.length;
194
+ const out = new Float32Array(nFrames);
195
+ const nBins = (spec.fftSize >>> 1) + 1;
196
+ out[0] = 0;
197
+ for (let t = 1; t < nFrames; t++) {
198
+ const cur = spec.magnitudes[t];
199
+ const prev = spec.magnitudes[t - 1];
200
+ if (!cur || !prev) {
201
+ out[t] = 0;
202
+ continue;
203
+ }
204
+ let sum = 0;
205
+ for (let k = 0; k < nBins; k++) {
206
+ let a = cur[k] ?? 0;
207
+ let b = prev[k] ?? 0;
208
+ if (opts.useLog) {
209
+ a = logCompress(a);
210
+ b = logCompress(b);
211
+ }
212
+ const d = a - b;
213
+ sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
214
+ }
215
+ out[t] = nBins > 0 ? sum / nBins : 0;
216
+ }
217
+ const smoothMs = opts.smoothMs;
218
+ if (smoothMs > 0 && nFrames >= 2) {
219
+ const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
220
+ const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
221
+ return {
222
+ times: spec.times,
223
+ values: movingAverage(out, windowFrames | 1)
224
+ };
225
+ }
226
+ return { times: spec.times, values: out };
227
+ }
228
+ function onsetEnvelopeFromMel(mel, options) {
229
+ const opts = defaultOptions(options);
230
+ const nFrames = mel.times.length;
231
+ const out = new Float32Array(nFrames);
232
+ out[0] = 0;
233
+ for (let t = 1; t < nFrames; t++) {
234
+ const cur = mel.melBands[t];
235
+ const prev = mel.melBands[t - 1];
236
+ if (!cur || !prev) {
237
+ out[t] = 0;
238
+ continue;
239
+ }
240
+ const nBands = cur.length;
241
+ let sum = 0;
242
+ for (let m = 0; m < nBands; m++) {
243
+ let a = cur[m] ?? 0;
244
+ let b = prev[m] ?? 0;
245
+ if (opts.useLog) {
246
+ a = logCompress(a);
247
+ b = logCompress(b);
248
+ }
249
+ const d = a - b;
250
+ sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
251
+ }
252
+ out[t] = nBands > 0 ? sum / nBands : 0;
253
+ }
254
+ const smoothMs = opts.smoothMs;
255
+ if (smoothMs > 0 && nFrames >= 2) {
256
+ const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
257
+ const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
258
+ return {
259
+ times: mel.times,
260
+ values: movingAverage(out, windowFrames | 1)
261
+ };
262
+ }
263
+ return { times: mel.times, values: out };
264
+ }
265
+ async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
266
+ const nFrames = mel.times.length;
267
+ const nMels = mel.melBands[0]?.length ?? 0;
268
+ const melFlat = new Float32Array(nFrames * nMels);
269
+ for (let t = 0; t < nFrames; t++) {
270
+ const row = mel.melBands[t];
271
+ if (!row) continue;
272
+ melFlat.set(row, t * nMels);
273
+ }
274
+ const diffMethod = options?.diffMethod ?? "rectified";
275
+ const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
276
+ nFrames,
277
+ nMels,
278
+ melFlat,
279
+ diffMethod
280
+ });
281
+ return {
282
+ times: mel.times,
283
+ values: value.out,
284
+ gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
285
+ };
286
+ }
287
+
288
+ // src/dsp/spectral.ts
289
+ function spectralCentroid(spec) {
290
+ const nFrames = spec.times.length;
291
+ const out = new Float32Array(nFrames);
292
+ const nBins = (spec.fftSize >>> 1) + 1;
293
+ const binHz = spec.sampleRate / spec.fftSize;
294
+ for (let t = 0; t < nFrames; t++) {
295
+ const mags = spec.magnitudes[t];
296
+ if (!mags) {
297
+ out[t] = 0;
298
+ continue;
299
+ }
300
+ let num = 0;
301
+ let den = 0;
302
+ for (let k = 0; k < nBins; k++) {
303
+ const m = mags[k] ?? 0;
304
+ const f = k * binHz;
305
+ num += f * m;
306
+ den += m;
307
+ }
308
+ out[t] = den > 0 ? num / den : 0;
309
+ }
310
+ return out;
311
+ }
312
+ function spectralFlux(spec) {
313
+ const nFrames = spec.times.length;
314
+ const out = new Float32Array(nFrames);
315
+ const nBins = (spec.fftSize >>> 1) + 1;
316
+ let prev = null;
317
+ for (let t = 0; t < nFrames; t++) {
318
+ const mags = spec.magnitudes[t];
319
+ if (!mags) {
320
+ out[t] = 0;
321
+ prev = null;
322
+ continue;
323
+ }
324
+ let sum = 0;
325
+ for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
326
+ if (sum <= 0) {
327
+ out[t] = 0;
328
+ prev = null;
329
+ continue;
330
+ }
331
+ const cur = new Float32Array(nBins);
332
+ const inv = 1 / sum;
333
+ for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
334
+ if (!prev) {
335
+ out[t] = 0;
336
+ prev = cur;
337
+ continue;
338
+ }
339
+ let flux = 0;
340
+ for (let k = 0; k < nBins; k++) {
341
+ const d = (cur[k] ?? 0) - (prev[k] ?? 0);
342
+ flux += Math.abs(d);
343
+ }
344
+ out[t] = flux;
345
+ prev = cur;
346
+ }
347
+ return out;
348
+ }
349
+
350
+ // src/dsp/beatCandidates.ts
351
+ function movingAverage2(values, windowFrames) {
352
+ if (windowFrames <= 1) return values;
353
+ const n = values.length;
354
+ const out = new Float32Array(n);
355
+ const half = Math.floor(windowFrames / 2);
356
+ const prefix = new Float64Array(n + 1);
357
+ prefix[0] = 0;
358
+ for (let i = 0; i < n; i++) {
359
+ prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
360
+ }
361
+ for (let i = 0; i < n; i++) {
362
+ const start = Math.max(0, i - half);
363
+ const end = Math.min(n, i + half + 1);
364
+ const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
365
+ const count = Math.max(1, end - start);
366
+ out[i] = sum / count;
367
+ }
368
+ return out;
369
+ }
370
+ function meanStd(values) {
371
+ const n = values.length;
372
+ if (n <= 0) return { mean: 0, std: 0 };
373
+ let mean = 0;
374
+ for (let i = 0; i < n; i++) mean += values[i] ?? 0;
375
+ mean /= n;
376
+ let varSum = 0;
377
+ for (let i = 0; i < n; i++) {
378
+ const d = (values[i] ?? 0) - mean;
379
+ varSum += d * d;
380
+ }
381
+ const std = Math.sqrt(varSum / n);
382
+ return { mean, std };
383
+ }
384
+ function zScoreNormalize(values) {
385
+ const { mean, std } = meanStd(values);
386
+ const n = values.length;
387
+ const out = new Float32Array(n);
388
+ if (std === 0 || !Number.isFinite(std)) {
389
+ out.fill(0);
390
+ return out;
391
+ }
392
+ for (let i = 0; i < n; i++) {
393
+ out[i] = ((values[i] ?? 0) - mean) / std;
394
+ }
395
+ return out;
396
+ }
397
+ function minMaxNormalize(values) {
398
+ const n = values.length;
399
+ if (n === 0) return new Float32Array(0);
400
+ let min = Infinity;
401
+ let max = -Infinity;
402
+ for (let i = 0; i < n; i++) {
403
+ const v = values[i] ?? 0;
404
+ if (v < min) min = v;
405
+ if (v > max) max = v;
406
+ }
407
+ const out = new Float32Array(n);
408
+ const range = max - min;
409
+ if (range === 0 || !Number.isFinite(range)) {
410
+ out.fill(0.5);
411
+ return out;
412
+ }
413
+ for (let i = 0; i < n; i++) {
414
+ out[i] = ((values[i] ?? 0) - min) / range;
415
+ }
416
+ return out;
417
+ }
418
+ function beatSalienceFromMel(mel, spec, options) {
419
+ const smoothMs = options?.smoothMs ?? 50;
420
+ const onset = onsetEnvelopeFromMel(mel, {
421
+ smoothMs,
422
+ diffMethod: "rectified",
423
+ useLog: false
424
+ });
425
+ const flux = spectralFlux(spec);
426
+ const n = Math.min(onset.times.length, flux.length);
427
+ const onsetNorm = zScoreNormalize(onset.values.subarray(0, n));
428
+ const fluxNorm = zScoreNormalize(flux.subarray(0, n));
429
+ const combined = new Float32Array(n);
430
+ const onsetWeight = 0.7;
431
+ const fluxWeight = 0.3;
432
+ for (let i = 0; i < n; i++) {
433
+ combined[i] = onsetWeight * (onsetNorm[i] ?? 0) + fluxWeight * (fluxNorm[i] ?? 0);
434
+ }
435
+ const dt = n >= 2 ? (onset.times[1] ?? 0) - (onset.times[0] ?? 0) : 0.01;
436
+ const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
437
+ const smoothed = movingAverage2(combined, windowFrames | 1);
438
+ const normalized = minMaxNormalize(smoothed);
439
+ return {
440
+ times: onset.times.subarray(0, n),
441
+ values: normalized
442
+ };
443
+ }
444
+ function pickBeatCandidates(salience, options, source) {
445
+ const minIntervalSec = options.minIntervalSec ?? 0.1;
446
+ const thresholdFactor = options.thresholdFactor ?? 0.5;
447
+ const { times, values } = salience;
448
+ const n = values.length;
449
+ if (n < 3) return [];
450
+ const { mean, std } = meanStd(values);
451
+ const threshold = mean + thresholdFactor * std;
452
+ const candidates = [];
453
+ let lastPeakTime = -Infinity;
454
+ for (let i = 1; i < n - 1; i++) {
455
+ const v = values[i] ?? 0;
456
+ if (v < threshold) continue;
457
+ const prev = values[i - 1] ?? 0;
458
+ const next = values[i + 1] ?? 0;
459
+ if (!(v > prev && v > next)) continue;
460
+ const t = times[i] ?? 0;
461
+ if (t - lastPeakTime < minIntervalSec) {
462
+ const last = candidates[candidates.length - 1];
463
+ if (last && v > last.strength) {
464
+ last.time = t;
465
+ last.strength = v;
466
+ }
467
+ continue;
468
+ }
469
+ candidates.push({
470
+ time: t,
471
+ strength: v,
472
+ source
473
+ });
474
+ lastPeakTime = t;
475
+ }
476
+ return candidates;
477
+ }
478
+ function detectBeatCandidates(mel, spec, options) {
479
+ const opts = {
480
+ minIntervalSec: options?.minIntervalSec ?? 0.1,
481
+ thresholdFactor: options?.thresholdFactor ?? 0.5,
482
+ smoothMs: options?.smoothMs ?? 50
483
+ };
484
+ const salience = beatSalienceFromMel(mel, spec, { smoothMs: opts.smoothMs });
485
+ const candidates = pickBeatCandidates(salience, opts, "combined");
486
+ return {
487
+ candidates,
488
+ salience
489
+ };
490
+ }
491
+
492
+ // src/dsp/tempoHypotheses.ts
493
+ function intervalToBpm(intervalSec) {
494
+ return 60 / intervalSec;
495
+ }
496
+ function bpmToInterval(bpm) {
497
+ return 60 / bpm;
498
+ }
499
+ function computeIOIs(candidates, weightByStrength) {
500
+ if (candidates.length < 2) return [];
501
+ const iois = [];
502
+ const sorted = [...candidates].sort((a, b) => a.time - b.time);
503
+ for (let i = 1; i < sorted.length; i++) {
504
+ const prev = sorted[i - 1];
505
+ const curr = sorted[i];
506
+ const interval = curr.time - prev.time;
507
+ if (interval <= 0) continue;
508
+ const weight = weightByStrength ? Math.sqrt(prev.strength * curr.strength) : 1;
509
+ iois.push({ intervalSec: interval, weight });
510
+ }
511
+ return iois;
512
+ }
513
+ function buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm) {
514
+ const numBins = Math.ceil((maxBpm - minBpm) / binSizeBpm);
515
+ const counts = new Float32Array(numBins);
516
+ const bpmBins = new Float32Array(numBins);
517
+ for (let i = 0; i < numBins; i++) {
518
+ bpmBins[i] = minBpm + (i + 0.5) * binSizeBpm;
519
+ }
520
+ const minInterval = bpmToInterval(maxBpm);
521
+ const maxInterval = bpmToInterval(minBpm);
522
+ for (const { intervalSec, weight } of iois) {
523
+ if (intervalSec < minInterval || intervalSec > maxInterval) continue;
524
+ const bpm = intervalToBpm(intervalSec);
525
+ const binIndex = Math.floor((bpm - minBpm) / binSizeBpm);
526
+ if (binIndex >= 0 && binIndex < numBins) {
527
+ counts[binIndex] = (counts[binIndex] ?? 0) + weight;
528
+ }
529
+ }
530
+ return { bpmBins, counts };
531
+ }
532
+ function findHistogramPeaks(counts, minHeight) {
533
+ const peaks = [];
534
+ for (let i = 1; i < counts.length - 1; i++) {
535
+ const curr = counts[i];
536
+ const prev = counts[i - 1];
537
+ const next = counts[i + 1];
538
+ if (curr > prev && curr > next && curr >= minHeight) {
539
+ peaks.push({ index: i, height: curr });
540
+ }
541
+ }
542
+ if (counts.length > 0 && counts[0] >= minHeight && counts[0] > (counts[1] ?? 0)) {
543
+ peaks.push({ index: 0, height: counts[0] });
544
+ }
545
+ if (counts.length > 1) {
546
+ const last = counts.length - 1;
547
+ if (counts[last] >= minHeight && counts[last] > (counts[last - 1] ?? 0)) {
548
+ peaks.push({ index: last, height: counts[last] });
549
+ }
550
+ }
551
+ peaks.sort((a, b) => b.height - a.height);
552
+ return peaks.map((p) => p.index);
553
+ }
554
+ function refinePeakBpm(peakIndex, bpmBins, counts, binSizeBpm) {
555
+ let totalWeight = 0;
556
+ let weightedBpm = 0;
557
+ let minBinBpm = bpmBins[peakIndex] - binSizeBpm / 2;
558
+ let maxBinBpm = bpmBins[peakIndex] + binSizeBpm / 2;
559
+ for (let offset = -1; offset <= 1; offset++) {
560
+ const idx = peakIndex + offset;
561
+ if (idx < 0 || idx >= bpmBins.length) continue;
562
+ const w = counts[idx];
563
+ const bpm = bpmBins[idx];
564
+ totalWeight += w;
565
+ weightedBpm += w * bpm;
566
+ if (w > 0) {
567
+ minBinBpm = Math.min(minBinBpm, bpm - binSizeBpm / 2);
568
+ maxBinBpm = Math.max(maxBinBpm, bpm + binSizeBpm / 2);
569
+ }
570
+ }
571
+ const refinedBpm = totalWeight > 0 ? weightedBpm / totalWeight : bpmBins[peakIndex];
572
+ return {
573
+ bpm: refinedBpm,
574
+ peakHeight: counts[peakIndex],
575
+ binRange: [minBinBpm, maxBinBpm],
576
+ totalWeight
577
+ };
578
+ }
579
+ function getHarmonicRatio(bpm1, bpm2, tolerance = 0.03) {
580
+ const ratios = [0.5, 1 / 3, 2 / 3, 1, 1.5, 2, 3];
581
+ for (const ratio of ratios) {
582
+ const expected = bpm1 * ratio;
583
+ const relativeError = Math.abs(bpm2 - expected) / expected;
584
+ if (relativeError <= tolerance) {
585
+ return ratio;
586
+ }
587
+ }
588
+ return null;
589
+ }
590
+ function assignHarmonicFamilies(hypotheses) {
591
+ if (hypotheses.length === 0) return;
592
+ const families = /* @__PURE__ */ new Map();
593
+ for (const hyp of hypotheses) {
594
+ let foundFamily = false;
595
+ for (const [familyId, family] of families) {
596
+ const ratio = getHarmonicRatio(family.rootBpm, hyp.bpm);
597
+ if (ratio !== null) {
598
+ hyp.familyId = familyId;
599
+ hyp.harmonicRatio = ratio;
600
+ family.members.push(hyp);
601
+ foundFamily = true;
602
+ break;
603
+ }
604
+ }
605
+ if (!foundFamily) {
606
+ const familyId = `fam-${Math.round(hyp.bpm)}`;
607
+ hyp.familyId = familyId;
608
+ hyp.harmonicRatio = 1;
609
+ families.set(familyId, { rootBpm: hyp.bpm, members: [hyp] });
610
+ }
611
+ }
612
+ }
613
+ function normalizeConfidence(hypotheses) {
614
+ if (hypotheses.length === 0) return;
615
+ const maxHeight = Math.max(...hypotheses.map((h) => h.evidence.peakHeight));
616
+ if (maxHeight <= 0) return;
617
+ for (const hyp of hypotheses) {
618
+ hyp.confidence = hyp.evidence.peakHeight / maxHeight;
619
+ }
620
+ }
621
+ function generateTempoHypotheses(candidates, options) {
622
+ const minBpm = options?.minBpm ?? 24;
623
+ const maxBpm = options?.maxBpm ?? 300;
624
+ const binSizeBpm = options?.binSizeBpm ?? 1;
625
+ const maxHypotheses = options?.maxHypotheses ?? 10;
626
+ const minConfidence = options?.minConfidence ?? 0.05;
627
+ const weightByStrength = options?.weightByStrength ?? true;
628
+ const includeHistogram = options?.includeHistogram ?? false;
629
+ if (candidates.length < 2) {
630
+ return {
631
+ hypotheses: [],
632
+ inputCandidateCount: candidates.length,
633
+ histogram: includeHistogram ? {
634
+ bpmBins: new Float32Array(0),
635
+ counts: new Float32Array(0)
636
+ } : void 0
637
+ };
638
+ }
639
+ const iois = computeIOIs(candidates, weightByStrength);
640
+ if (iois.length === 0) {
641
+ return {
642
+ hypotheses: [],
643
+ inputCandidateCount: candidates.length,
644
+ histogram: includeHistogram ? {
645
+ bpmBins: new Float32Array(0),
646
+ counts: new Float32Array(0)
647
+ } : void 0
648
+ };
649
+ }
650
+ const { bpmBins, counts } = buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm);
651
+ const maxCount = Math.max(...counts);
652
+ const minHeight = maxCount * minConfidence;
653
+ const peakIndices = findHistogramPeaks(counts, minHeight);
654
+ const hypotheses = [];
655
+ for (const peakIndex of peakIndices.slice(0, maxHypotheses * 2)) {
656
+ const { bpm, peakHeight, binRange, totalWeight } = refinePeakBpm(
657
+ peakIndex,
658
+ bpmBins,
659
+ counts,
660
+ binSizeBpm
661
+ );
662
+ if (maxCount > 0 && peakHeight / maxCount < minConfidence) continue;
663
+ const evidence = {
664
+ supportingIntervalCount: Math.round(totalWeight),
665
+ weightedSupport: totalWeight,
666
+ peakHeight,
667
+ binRange
668
+ };
669
+ hypotheses.push({
670
+ id: "",
671
+ // Will be assigned after sorting
672
+ bpm: Math.round(bpm * 10) / 10,
673
+ // Round to 0.1 BPM precision
674
+ confidence: 0,
675
+ // Will be normalized
676
+ evidence,
677
+ familyId: "",
678
+ // Will be assigned
679
+ harmonicRatio: 1
680
+ // Will be assigned
681
+ });
682
+ }
683
+ assignHarmonicFamilies(hypotheses);
684
+ normalizeConfidence(hypotheses);
685
+ const filtered = hypotheses.filter((h) => h.confidence >= minConfidence).sort((a, b) => b.confidence - a.confidence).slice(0, maxHypotheses);
686
+ for (let i = 0; i < filtered.length; i++) {
687
+ filtered[i].id = `hyp-${i}`;
688
+ }
689
+ return {
690
+ hypotheses: filtered,
691
+ inputCandidateCount: candidates.length,
692
+ histogram: includeHistogram ? { bpmBins, counts } : void 0
693
+ };
694
+ }
695
+
56
696
  // src/gpu/kernels/melProject.wgsl.ts
57
697
  var melProjectWGSL = (
58
698
  /* wgsl */
@@ -162,6 +802,20 @@ function hzToMel(hz) {
162
802
  function melToHz(mel) {
163
803
  return 700 * (Math.pow(10, mel / 2595) - 1);
164
804
  }
805
+ function hzToFeatureIndex(hz, config) {
806
+ const melMin = hzToMel(config.fMin);
807
+ const melMax = hzToMel(config.fMax);
808
+ const melHz = hzToMel(hz);
809
+ const normalized = (melHz - melMin) / (melMax - melMin);
810
+ return normalized * (config.nMels - 1);
811
+ }
812
+ function featureIndexToHz(index, config) {
813
+ const melMin = hzToMel(config.fMin);
814
+ const melMax = hzToMel(config.fMax);
815
+ const normalized = index / (config.nMels - 1);
816
+ const mel = melMin + normalized * (melMax - melMin);
817
+ return melToHz(mel);
818
+ }
165
819
  function buildMelFilterBank(sampleRate, fftSize, nMels, fMin, fMax) {
166
820
  const nBins = (fftSize >>> 1) + 1;
167
821
  const nyquist = sampleRate / 2;
@@ -351,248 +1005,16 @@ function delta(features, options = {}) {
351
1005
  }
352
1006
  d[f] = denom > 0 ? num / denom : 0;
353
1007
  }
354
- out[t] = d;
355
- }
356
- return { times: features.times, values: out };
357
- }
358
- function deltaDelta(features, options = {}) {
359
- return delta(delta(features, options), options);
360
- }
361
-
362
- // src/gpu/kernels/onsetEnvelope.wgsl.ts
363
- var onsetEnvelopeWGSL = (
364
- /* wgsl */
365
- `
366
- // Compute onset strength envelope from a (log) mel spectrogram.
367
- //
368
- // Input layout: melFlat[t*nMels + m]
369
- // Output layout: out[t]
370
- //
371
- // We compute novelty per frame:
372
- // novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
373
- // or sum_m abs(...)
374
- //
375
- // One invocation computes one frame index (t). This is memory-bound but reduces a full
376
- // (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
377
-
378
- struct Params {
379
- nMels: u32,
380
- nFrames: u32,
381
- diffMethod: u32, // 0=rectified, 1=abs
382
- _pad: u32,
383
- };
384
-
385
- @group(0) @binding(0) var<storage, read> melFlat: array<f32>;
386
- @group(0) @binding(1) var<storage, read_write> out: array<f32>;
387
- @group(0) @binding(2) var<uniform> params: Params;
388
-
389
- @compute @workgroup_size(256)
390
- fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
391
- let t = gid.x;
392
- if (t >= params.nFrames) { return; }
393
-
394
- if (t == 0u) {
395
- out[t] = 0.0;
396
- return;
397
- }
398
-
399
- let nMels = params.nMels;
400
- var sum: f32 = 0.0;
401
-
402
- // Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
403
- // (Future optimisation: parallelise reduction within workgroup.)
404
- for (var m: u32 = 0u; m < nMels; m = m + 1u) {
405
- let a = melFlat[t * nMels + m];
406
- let b = melFlat[(t - 1u) * nMels + m];
407
- let d = a - b;
408
-
409
- if (params.diffMethod == 1u) {
410
- // abs
411
- sum = sum + abs(d);
412
- } else {
413
- // rectified
414
- sum = sum + max(0.0, d);
415
- }
416
- }
417
-
418
- out[t] = sum / max(1.0, f32(nMels));
419
- }
420
- `
421
- );
422
-
423
- // src/gpu/onsetEnvelope.ts
424
- async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
425
- const { device } = gpu;
426
- const { nFrames, nMels, melFlat, diffMethod } = input;
427
- if (melFlat.length !== nFrames * nMels) {
428
- throw new Error("@octoseq/mir: melFlat length mismatch");
429
- }
430
- const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
431
- const outByteLen = byteSizeF32(nFrames);
432
- const outBuffer = createStorageOutBuffer(gpu, outByteLen);
433
- const readback = createReadbackBuffer(gpu, outByteLen);
434
- const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
435
- const pipeline = device.createComputePipeline({
436
- layout: "auto",
437
- compute: { module: shader, entryPoint: "main" }
438
- });
439
- const diffU32 = diffMethod === "abs" ? 1 : 0;
440
- const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
441
- const bindGroup = device.createBindGroup({
442
- layout: pipeline.getBindGroupLayout(0),
443
- entries: [
444
- { binding: 0, resource: { buffer: melBuffer } },
445
- { binding: 1, resource: { buffer: outBuffer } },
446
- { binding: 2, resource: { buffer: params } }
447
- ]
448
- });
449
- const encoder = device.createCommandEncoder();
450
- const pass = encoder.beginComputePass();
451
- pass.setPipeline(pipeline);
452
- pass.setBindGroup(0, bindGroup);
453
- const wg = Math.ceil(nFrames / 256);
454
- pass.dispatchWorkgroups(wg);
455
- pass.end();
456
- const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
457
- melBuffer.destroy();
458
- outBuffer.destroy();
459
- params.destroy();
460
- readback.destroy();
461
- return {
462
- value: { out: new Float32Array(bytes) },
463
- timing
464
- };
465
- }
466
-
467
- // src/dsp/onset.ts
468
- function movingAverage(values, windowFrames) {
469
- if (windowFrames <= 1) return values;
470
- const n = values.length;
471
- const out = new Float32Array(n);
472
- const half = Math.floor(windowFrames / 2);
473
- const prefix = new Float64Array(n + 1);
474
- prefix[0] = 0;
475
- for (let i = 0; i < n; i++) {
476
- prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
477
- }
478
- for (let i = 0; i < n; i++) {
479
- const start = Math.max(0, i - half);
480
- const end = Math.min(n, i + half + 1);
481
- const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
482
- const count = Math.max(1, end - start);
483
- out[i] = sum / count;
484
- }
485
- return out;
486
- }
487
- function defaultOptions(opts) {
488
- return {
489
- useLog: opts?.useLog ?? false,
490
- smoothMs: opts?.smoothMs ?? 30,
491
- diffMethod: opts?.diffMethod ?? "rectified"
492
- };
493
- }
494
- function logCompress(x) {
495
- return Math.log1p(Math.max(0, x));
496
- }
497
- function onsetEnvelopeFromSpectrogram(spec, options) {
498
- const opts = defaultOptions(options);
499
- const nFrames = spec.times.length;
500
- const out = new Float32Array(nFrames);
501
- const nBins = (spec.fftSize >>> 1) + 1;
502
- out[0] = 0;
503
- for (let t = 1; t < nFrames; t++) {
504
- const cur = spec.magnitudes[t];
505
- const prev = spec.magnitudes[t - 1];
506
- if (!cur || !prev) {
507
- out[t] = 0;
508
- continue;
509
- }
510
- let sum = 0;
511
- for (let k = 0; k < nBins; k++) {
512
- let a = cur[k] ?? 0;
513
- let b = prev[k] ?? 0;
514
- if (opts.useLog) {
515
- a = logCompress(a);
516
- b = logCompress(b);
517
- }
518
- const d = a - b;
519
- sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
520
- }
521
- out[t] = nBins > 0 ? sum / nBins : 0;
522
- }
523
- const smoothMs = opts.smoothMs;
524
- if (smoothMs > 0 && nFrames >= 2) {
525
- const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
526
- const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
527
- return {
528
- times: spec.times,
529
- values: movingAverage(out, windowFrames | 1)
530
- };
531
- }
532
- return { times: spec.times, values: out };
533
- }
534
- function onsetEnvelopeFromMel(mel, options) {
535
- const opts = defaultOptions(options);
536
- const nFrames = mel.times.length;
537
- const out = new Float32Array(nFrames);
538
- out[0] = 0;
539
- for (let t = 1; t < nFrames; t++) {
540
- const cur = mel.melBands[t];
541
- const prev = mel.melBands[t - 1];
542
- if (!cur || !prev) {
543
- out[t] = 0;
544
- continue;
545
- }
546
- const nBands = cur.length;
547
- let sum = 0;
548
- for (let m = 0; m < nBands; m++) {
549
- let a = cur[m] ?? 0;
550
- let b = prev[m] ?? 0;
551
- if (opts.useLog) {
552
- a = logCompress(a);
553
- b = logCompress(b);
554
- }
555
- const d = a - b;
556
- sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
557
- }
558
- out[t] = nBands > 0 ? sum / nBands : 0;
559
- }
560
- const smoothMs = opts.smoothMs;
561
- if (smoothMs > 0 && nFrames >= 2) {
562
- const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
563
- const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
564
- return {
565
- times: mel.times,
566
- values: movingAverage(out, windowFrames | 1)
567
- };
1008
+ out[t] = d;
568
1009
  }
569
- return { times: mel.times, values: out };
1010
+ return { times: features.times, values: out };
570
1011
  }
571
- async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
572
- const nFrames = mel.times.length;
573
- const nMels = mel.melBands[0]?.length ?? 0;
574
- const melFlat = new Float32Array(nFrames * nMels);
575
- for (let t = 0; t < nFrames; t++) {
576
- const row = mel.melBands[t];
577
- if (!row) continue;
578
- melFlat.set(row, t * nMels);
579
- }
580
- const diffMethod = options?.diffMethod ?? "rectified";
581
- const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
582
- nFrames,
583
- nMels,
584
- melFlat,
585
- diffMethod
586
- });
587
- return {
588
- times: mel.times,
589
- values: value.out,
590
- gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
591
- };
1012
+ function deltaDelta(features, options = {}) {
1013
+ return delta(delta(features, options), options);
592
1014
  }
593
1015
 
594
1016
  // src/dsp/peakPick.ts
595
- function meanStd(values) {
1017
+ function meanStd2(values) {
596
1018
  const n = values.length;
597
1019
  if (n <= 0) return { mean: 0, std: 0 };
598
1020
  let mean = 0;
@@ -629,7 +1051,7 @@ function peakPick(times, values, options = {}) {
629
1051
  if (method === "median") {
630
1052
  thr = median(values) * factor;
631
1053
  } else {
632
- const { mean, std } = meanStd(values);
1054
+ const { mean, std } = meanStd2(values);
633
1055
  thr = mean + factor * std;
634
1056
  }
635
1057
  }
@@ -1005,68 +1427,6 @@ async function hpssGpu(spec, gpu, options = {}) {
1005
1427
  };
1006
1428
  }
1007
1429
 
1008
- // src/dsp/spectral.ts
1009
- function spectralCentroid(spec) {
1010
- const nFrames = spec.times.length;
1011
- const out = new Float32Array(nFrames);
1012
- const nBins = (spec.fftSize >>> 1) + 1;
1013
- const binHz = spec.sampleRate / spec.fftSize;
1014
- for (let t = 0; t < nFrames; t++) {
1015
- const mags = spec.magnitudes[t];
1016
- if (!mags) {
1017
- out[t] = 0;
1018
- continue;
1019
- }
1020
- let num = 0;
1021
- let den = 0;
1022
- for (let k = 0; k < nBins; k++) {
1023
- const m = mags[k] ?? 0;
1024
- const f = k * binHz;
1025
- num += f * m;
1026
- den += m;
1027
- }
1028
- out[t] = den > 0 ? num / den : 0;
1029
- }
1030
- return out;
1031
- }
1032
- function spectralFlux(spec) {
1033
- const nFrames = spec.times.length;
1034
- const out = new Float32Array(nFrames);
1035
- const nBins = (spec.fftSize >>> 1) + 1;
1036
- let prev = null;
1037
- for (let t = 0; t < nFrames; t++) {
1038
- const mags = spec.magnitudes[t];
1039
- if (!mags) {
1040
- out[t] = 0;
1041
- prev = null;
1042
- continue;
1043
- }
1044
- let sum = 0;
1045
- for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
1046
- if (sum <= 0) {
1047
- out[t] = 0;
1048
- prev = null;
1049
- continue;
1050
- }
1051
- const cur = new Float32Array(nBins);
1052
- const inv = 1 / sum;
1053
- for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
1054
- if (!prev) {
1055
- out[t] = 0;
1056
- prev = cur;
1057
- continue;
1058
- }
1059
- let flux = 0;
1060
- for (let k = 0; k < nBins; k++) {
1061
- const d = (cur[k] ?? 0) - (prev[k] ?? 0);
1062
- flux += Math.abs(d);
1063
- }
1064
- out[t] = flux;
1065
- prev = cur;
1066
- }
1067
- return out;
1068
- }
1069
-
1070
1430
  // src/dsp/fft.ts
1071
1431
  function hannWindow(size) {
1072
1432
  const w = new Float32Array(size);
@@ -1207,6 +1567,452 @@ async function spectrogram(audio, config, gpu, options = {}) {
1207
1567
  };
1208
1568
  }
1209
1569
 
1570
+ // src/dsp/cqt.ts
1571
+ var CQT_DEFAULTS = {
1572
+ /** Quarter-tone resolution (24 bins per octave) */
1573
+ binsPerOctave: 24,
1574
+ /** C1 (lowest note on a standard piano) */
1575
+ fMin: 32.7,
1576
+ /** C9 (well above audible range for most content) */
1577
+ fMax: 8372
1578
+ };
1579
+ function cqtBinToHz(bin, config) {
1580
+ return config.fMin * Math.pow(2, bin / config.binsPerOctave);
1581
+ }
1582
+ function hzToCqtBin(hz, config) {
1583
+ if (hz <= 0) return -Infinity;
1584
+ return config.binsPerOctave * Math.log2(hz / config.fMin);
1585
+ }
1586
+ function getNumOctaves(config) {
1587
+ return Math.log2(config.fMax / config.fMin);
1588
+ }
1589
+ function getNumBins(config) {
1590
+ const nOctaves = getNumOctaves(config);
1591
+ return Math.ceil(nOctaves * config.binsPerOctave);
1592
+ }
1593
+ function getCqtBinFrequencies(config) {
1594
+ const nBins = getNumBins(config);
1595
+ const freqs = new Float32Array(nBins);
1596
+ for (let k = 0; k < nBins; k++) {
1597
+ freqs[k] = cqtBinToHz(k, config);
1598
+ }
1599
+ return freqs;
1600
+ }
1601
+ var kernelBankCache = /* @__PURE__ */ new Map();
1602
+ function kernelCacheKey(config, fftSize, sampleRate) {
1603
+ return `${config.binsPerOctave}:${config.fMin}:${config.fMax}:${fftSize}:${sampleRate}`;
1604
+ }
1605
+ function createCqtKernel(binIndex, config, fftSize, sampleRate) {
1606
+ const centerFreq = cqtBinToHz(binIndex, config);
1607
+ const freqResolution = sampleRate / fftSize;
1608
+ const Q = 1 / (Math.pow(2, 1 / config.binsPerOctave) - 1);
1609
+ const bandwidth = centerFreq / Q;
1610
+ const fLow = centerFreq - bandwidth / 2;
1611
+ const fHigh = centerFreq + bandwidth / 2;
1612
+ const startBin = Math.max(0, Math.floor(fLow / freqResolution));
1613
+ const endBin = Math.min(
1614
+ Math.floor(fftSize / 2) + 1,
1615
+ Math.ceil(fHigh / freqResolution) + 1
1616
+ );
1617
+ const numBins = Math.max(1, endBin - startBin);
1618
+ const weights = new Float32Array(numBins);
1619
+ for (let i = 0; i < numBins; i++) {
1620
+ const binFreq = (startBin + i) * freqResolution;
1621
+ if (binFreq <= centerFreq) {
1622
+ if (centerFreq > fLow) {
1623
+ weights[i] = (binFreq - fLow) / (centerFreq - fLow);
1624
+ } else {
1625
+ weights[i] = 1;
1626
+ }
1627
+ } else {
1628
+ if (fHigh > centerFreq) {
1629
+ weights[i] = (fHigh - binFreq) / (fHigh - centerFreq);
1630
+ } else {
1631
+ weights[i] = 1;
1632
+ }
1633
+ }
1634
+ weights[i] = Math.max(0, Math.min(1, weights[i] ?? 0));
1635
+ }
1636
+ let sum = 0;
1637
+ for (let i = 0; i < numBins; i++) {
1638
+ sum += weights[i] ?? 0;
1639
+ }
1640
+ if (sum > 0) {
1641
+ for (let i = 0; i < numBins; i++) {
1642
+ weights[i] = (weights[i] ?? 0) / sum;
1643
+ }
1644
+ }
1645
+ return {
1646
+ centerFreq,
1647
+ startBin,
1648
+ endBin,
1649
+ weights
1650
+ };
1651
+ }
1652
+ function getCqtKernelBank(config, fftSize, sampleRate) {
1653
+ const key = kernelCacheKey(config, fftSize, sampleRate);
1654
+ const cached = kernelBankCache.get(key);
1655
+ if (cached) return cached;
1656
+ const nBins = getNumBins(config);
1657
+ const kernels = new Array(nBins);
1658
+ for (let k = 0; k < nBins; k++) {
1659
+ kernels[k] = createCqtKernel(k, config, fftSize, sampleRate);
1660
+ }
1661
+ const bank = {
1662
+ config,
1663
+ fftSize,
1664
+ sampleRate,
1665
+ kernels
1666
+ };
1667
+ kernelBankCache.set(key, bank);
1668
+ return bank;
1669
+ }
1670
+ function applyCqtKernels(stftMagnitudes, kernelBank) {
1671
+ const nCqtBins = kernelBank.kernels.length;
1672
+ const cqtMagnitudes = new Float32Array(nCqtBins);
1673
+ for (let k = 0; k < nCqtBins; k++) {
1674
+ const kernel = kernelBank.kernels[k];
1675
+ if (!kernel) continue;
1676
+ let sum = 0;
1677
+ for (let i = 0; i < kernel.weights.length; i++) {
1678
+ const stftBin = kernel.startBin + i;
1679
+ const stftMag = stftMagnitudes[stftBin] ?? 0;
1680
+ const weight = kernel.weights[i] ?? 0;
1681
+ sum += stftMag * weight;
1682
+ }
1683
+ cqtMagnitudes[k] = sum;
1684
+ }
1685
+ return cqtMagnitudes;
1686
+ }
1687
+ function withCqtDefaults(partial) {
1688
+ return {
1689
+ binsPerOctave: partial?.binsPerOctave ?? CQT_DEFAULTS.binsPerOctave,
1690
+ fMin: partial?.fMin ?? CQT_DEFAULTS.fMin,
1691
+ fMax: partial?.fMax ?? CQT_DEFAULTS.fMax,
1692
+ hopSize: partial?.hopSize
1693
+ };
1694
+ }
1695
+ async function cqtSpectrogram(audio, config, options = {}) {
1696
+ const sampleRate = audio.sampleRate;
1697
+ if (config.fMin <= 0) {
1698
+ throw new Error("@octoseq/mir: CQT fMin must be positive");
1699
+ }
1700
+ if (config.fMax <= config.fMin) {
1701
+ throw new Error("@octoseq/mir: CQT fMax must be greater than fMin");
1702
+ }
1703
+ if (config.binsPerOctave <= 0) {
1704
+ throw new Error("@octoseq/mir: CQT binsPerOctave must be positive");
1705
+ }
1706
+ const Q = 1 / (Math.pow(2, 1 / config.binsPerOctave) - 1);
1707
+ const minFreqResolution = config.fMin / Q / 2;
1708
+ const minFftSize = Math.ceil(sampleRate / minFreqResolution);
1709
+ let fftSize = 1;
1710
+ while (fftSize < minFftSize) {
1711
+ fftSize *= 2;
1712
+ }
1713
+ fftSize = Math.min(fftSize, 16384);
1714
+ const hopSize = config.hopSize ?? Math.floor(fftSize / 4);
1715
+ const stft = await spectrogram(
1716
+ audio,
1717
+ { fftSize, hopSize, window: "hann" },
1718
+ void 0,
1719
+ { isCancelled: options.isCancelled }
1720
+ );
1721
+ const kernelBank = getCqtKernelBank(config, fftSize, sampleRate);
1722
+ const nFrames = stft.magnitudes.length;
1723
+ const cqtMagnitudes = new Array(nFrames);
1724
+ for (let frame = 0; frame < nFrames; frame++) {
1725
+ if (options.isCancelled?.()) {
1726
+ throw new Error("@octoseq/mir: cancelled");
1727
+ }
1728
+ const stftFrame = stft.magnitudes[frame];
1729
+ if (!stftFrame) continue;
1730
+ cqtMagnitudes[frame] = applyCqtKernels(stftFrame, kernelBank);
1731
+ }
1732
+ const nOctaves = getNumOctaves(config);
1733
+ getNumBins(config);
1734
+ return {
1735
+ sampleRate,
1736
+ config,
1737
+ times: stft.times,
1738
+ magnitudes: cqtMagnitudes,
1739
+ nOctaves,
1740
+ binsPerOctave: config.binsPerOctave,
1741
+ binFrequencies: getCqtBinFrequencies(config)
1742
+ };
1743
+ }
1744
+ async function computeCqt(audio, config, options = {}) {
1745
+ const startTime = performance.now();
1746
+ const fullConfig = withCqtDefaults(config);
1747
+ const cqt = await cqtSpectrogram(audio, fullConfig, options);
1748
+ const endTime = performance.now();
1749
+ return {
1750
+ cqt,
1751
+ meta: {
1752
+ backend: "cpu",
1753
+ usedGpu: false,
1754
+ timings: {
1755
+ totalMs: endTime - startTime,
1756
+ cpuMs: endTime - startTime
1757
+ }
1758
+ }
1759
+ };
1760
+ }
1761
+
1762
+ // src/dsp/cqtSignals.ts
1763
+ var BASS_MIN_HZ = 20;
1764
+ var BASS_MAX_HZ = 300;
1765
+ var TONAL_STABILITY_WINDOW_FRAMES = 20;
1766
+ var CHROMA_BINS = 12;
1767
+ function normalizeMinMax(values) {
1768
+ let min = Infinity;
1769
+ let max = -Infinity;
1770
+ for (let i = 0; i < values.length; i++) {
1771
+ const v = values[i] ?? 0;
1772
+ if (v < min) min = v;
1773
+ if (v > max) max = v;
1774
+ }
1775
+ const range = max - min;
1776
+ const result = new Float32Array(values.length);
1777
+ if (range > 0) {
1778
+ for (let i = 0; i < values.length; i++) {
1779
+ result[i] = ((values[i] ?? 0) - min) / range;
1780
+ }
1781
+ } else {
1782
+ result.fill(0.5);
1783
+ }
1784
+ return result;
1785
+ }
1786
+ function weightedCentroid(values, startIndex = 0) {
1787
+ let sumWeighted = 0;
1788
+ let sumWeights = 0;
1789
+ for (let i = 0; i < values.length; i++) {
1790
+ const weight = values[i] ?? 0;
1791
+ sumWeighted += (startIndex + i) * weight;
1792
+ sumWeights += weight;
1793
+ }
1794
+ return sumWeights > 0 ? sumWeighted / sumWeights : startIndex + values.length / 2;
1795
+ }
1796
+ function computeHarmonicEnergyFrame(frame, cqt) {
1797
+ if (frame.length === 0) return 0;
1798
+ let totalEnergy = 0;
1799
+ for (let i = 0; i < frame.length; i++) {
1800
+ const mag = frame[i] ?? 0;
1801
+ totalEnergy += mag * mag;
1802
+ }
1803
+ if (totalEnergy === 0) return 0;
1804
+ let maxMag = 0;
1805
+ let fundamentalBin = 0;
1806
+ for (let i = 0; i < frame.length; i++) {
1807
+ const mag = frame[i] ?? 0;
1808
+ if (mag > maxMag) {
1809
+ maxMag = mag;
1810
+ fundamentalBin = i;
1811
+ }
1812
+ }
1813
+ const fundamentalFreq = cqtBinToHz(fundamentalBin, cqt.config);
1814
+ let harmonicEnergy2 = 0;
1815
+ const numHarmonics = 6;
1816
+ for (let h = 1; h <= numHarmonics; h++) {
1817
+ const harmonicFreq = fundamentalFreq * h;
1818
+ const harmonicBin = Math.round(hzToCqtBin(harmonicFreq, cqt.config));
1819
+ if (harmonicBin >= 0 && harmonicBin < frame.length) {
1820
+ const mag = frame[harmonicBin] ?? 0;
1821
+ const weight = 1 / h;
1822
+ harmonicEnergy2 += mag * mag * weight;
1823
+ }
1824
+ }
1825
+ let weightSum = 0;
1826
+ for (let h = 1; h <= numHarmonics; h++) {
1827
+ weightSum += 1 / h;
1828
+ }
1829
+ harmonicEnergy2 /= weightSum;
1830
+ return Math.min(1, harmonicEnergy2 / totalEnergy);
1831
+ }
1832
+ function harmonicEnergy(cqt) {
1833
+ const startTime = performance.now();
1834
+ const nFrames = cqt.magnitudes.length;
1835
+ const values = new Float32Array(nFrames);
1836
+ for (let frame = 0; frame < nFrames; frame++) {
1837
+ const cqtFrame = cqt.magnitudes[frame];
1838
+ if (cqtFrame) {
1839
+ values[frame] = computeHarmonicEnergyFrame(cqtFrame, cqt);
1840
+ }
1841
+ }
1842
+ const normalized = normalizeMinMax(values);
1843
+ const endTime = performance.now();
1844
+ return {
1845
+ kind: "cqt1d",
1846
+ signalId: "harmonicEnergy",
1847
+ times: cqt.times,
1848
+ values: normalized,
1849
+ meta: {
1850
+ backend: "cpu",
1851
+ usedGpu: false,
1852
+ timings: {
1853
+ totalMs: endTime - startTime,
1854
+ cpuMs: endTime - startTime
1855
+ }
1856
+ }
1857
+ };
1858
+ }
1859
+ function bassPitchMotion(cqt) {
1860
+ const startTime = performance.now();
1861
+ const nFrames = cqt.magnitudes.length;
1862
+ const bassStartBin = Math.max(0, Math.floor(hzToCqtBin(BASS_MIN_HZ, cqt.config)));
1863
+ const bassEndBin = Math.min(
1864
+ cqt.magnitudes[0]?.length ?? 0,
1865
+ Math.ceil(hzToCqtBin(BASS_MAX_HZ, cqt.config))
1866
+ );
1867
+ const bassNumBins = bassEndBin - bassStartBin;
1868
+ if (bassNumBins <= 0) {
1869
+ return {
1870
+ kind: "cqt1d",
1871
+ signalId: "bassPitchMotion",
1872
+ times: cqt.times,
1873
+ values: new Float32Array(nFrames),
1874
+ meta: {
1875
+ backend: "cpu",
1876
+ usedGpu: false,
1877
+ timings: { totalMs: 0, cpuMs: 0 }
1878
+ }
1879
+ };
1880
+ }
1881
+ const centroids = new Float32Array(nFrames);
1882
+ for (let frame = 0; frame < nFrames; frame++) {
1883
+ const cqtFrame = cqt.magnitudes[frame];
1884
+ if (!cqtFrame) continue;
1885
+ const bassBins = new Float32Array(bassNumBins);
1886
+ for (let i = 0; i < bassNumBins; i++) {
1887
+ bassBins[i] = cqtFrame[bassStartBin + i] ?? 0;
1888
+ }
1889
+ centroids[frame] = weightedCentroid(bassBins, bassStartBin);
1890
+ }
1891
+ const motion = new Float32Array(nFrames);
1892
+ for (let frame = 1; frame < nFrames; frame++) {
1893
+ motion[frame] = Math.abs((centroids[frame] ?? 0) - (centroids[frame - 1] ?? 0));
1894
+ }
1895
+ motion[0] = motion[1] ?? 0;
1896
+ const normalized = normalizeMinMax(motion);
1897
+ const endTime = performance.now();
1898
+ return {
1899
+ kind: "cqt1d",
1900
+ signalId: "bassPitchMotion",
1901
+ times: cqt.times,
1902
+ values: normalized,
1903
+ meta: {
1904
+ backend: "cpu",
1905
+ usedGpu: false,
1906
+ timings: {
1907
+ totalMs: endTime - startTime,
1908
+ cpuMs: endTime - startTime
1909
+ }
1910
+ }
1911
+ };
1912
+ }
1913
+ function computeChroma(frame, binsPerOctave) {
1914
+ const chroma = new Float32Array(CHROMA_BINS);
1915
+ const binsPerSemitone = binsPerOctave / CHROMA_BINS;
1916
+ for (let i = 0; i < frame.length; i++) {
1917
+ const chromaBin = Math.floor(i % binsPerOctave / binsPerSemitone) % CHROMA_BINS;
1918
+ const mag = frame[i] ?? 0;
1919
+ chroma[chromaBin] = (chroma[chromaBin] ?? 0) + mag * mag;
1920
+ }
1921
+ let sum = 0;
1922
+ for (let i = 0; i < CHROMA_BINS; i++) {
1923
+ sum += chroma[i] ?? 0;
1924
+ }
1925
+ if (sum > 0) {
1926
+ for (let i = 0; i < CHROMA_BINS; i++) {
1927
+ chroma[i] = (chroma[i] ?? 0) / sum;
1928
+ }
1929
+ }
1930
+ return chroma;
1931
+ }
1932
+ function tonalStability(cqt) {
1933
+ const startTime = performance.now();
1934
+ const nFrames = cqt.magnitudes.length;
1935
+ const chromas = new Array(nFrames);
1936
+ for (let frame = 0; frame < nFrames; frame++) {
1937
+ const cqtFrame = cqt.magnitudes[frame];
1938
+ if (cqtFrame) {
1939
+ chromas[frame] = computeChroma(cqtFrame, cqt.binsPerOctave);
1940
+ } else {
1941
+ chromas[frame] = new Float32Array(CHROMA_BINS);
1942
+ }
1943
+ }
1944
+ const halfWindow = Math.floor(TONAL_STABILITY_WINDOW_FRAMES / 2);
1945
+ const instability = new Float32Array(nFrames);
1946
+ for (let frame = 0; frame < nFrames; frame++) {
1947
+ const windowStart = Math.max(0, frame - halfWindow);
1948
+ const windowEnd = Math.min(nFrames, frame + halfWindow + 1);
1949
+ const windowSize = windowEnd - windowStart;
1950
+ const avgChroma = new Float32Array(CHROMA_BINS);
1951
+ for (let w = windowStart; w < windowEnd; w++) {
1952
+ const chroma = chromas[w];
1953
+ if (chroma) {
1954
+ for (let c = 0; c < CHROMA_BINS; c++) {
1955
+ avgChroma[c] = (avgChroma[c] ?? 0) + (chroma[c] ?? 0);
1956
+ }
1957
+ }
1958
+ }
1959
+ for (let c = 0; c < CHROMA_BINS; c++) {
1960
+ avgChroma[c] = (avgChroma[c] ?? 0) / windowSize;
1961
+ }
1962
+ let totalVariance = 0;
1963
+ for (let w = windowStart; w < windowEnd; w++) {
1964
+ const chroma = chromas[w];
1965
+ if (chroma) {
1966
+ for (let c = 0; c < CHROMA_BINS; c++) {
1967
+ const diff = (chroma[c] ?? 0) - (avgChroma[c] ?? 0);
1968
+ totalVariance += diff * diff;
1969
+ }
1970
+ }
1971
+ }
1972
+ totalVariance /= windowSize * CHROMA_BINS;
1973
+ instability[frame] = totalVariance;
1974
+ }
1975
+ const normalizedInstability = normalizeMinMax(instability);
1976
+ const stability = new Float32Array(nFrames);
1977
+ for (let frame = 0; frame < nFrames; frame++) {
1978
+ stability[frame] = 1 - (normalizedInstability[frame] ?? 0);
1979
+ }
1980
+ const endTime = performance.now();
1981
+ return {
1982
+ kind: "cqt1d",
1983
+ signalId: "tonalStability",
1984
+ times: cqt.times,
1985
+ values: stability,
1986
+ meta: {
1987
+ backend: "cpu",
1988
+ usedGpu: false,
1989
+ timings: {
1990
+ totalMs: endTime - startTime,
1991
+ cpuMs: endTime - startTime
1992
+ }
1993
+ }
1994
+ };
1995
+ }
1996
+ function computeCqtSignal(cqt, signalId) {
1997
+ switch (signalId) {
1998
+ case "harmonicEnergy":
1999
+ return harmonicEnergy(cqt);
2000
+ case "bassPitchMotion":
2001
+ return bassPitchMotion(cqt);
2002
+ case "tonalStability":
2003
+ return tonalStability(cqt);
2004
+ default:
2005
+ throw new Error(`@octoseq/mir: unknown CQT signal ID: ${signalId}`);
2006
+ }
2007
+ }
2008
+ function computeAllCqtSignals(cqt) {
2009
+ const results = /* @__PURE__ */ new Map();
2010
+ results.set("harmonicEnergy", harmonicEnergy(cqt));
2011
+ results.set("bassPitchMotion", bassPitchMotion(cqt));
2012
+ results.set("tonalStability", tonalStability(cqt));
2013
+ return results;
2014
+ }
2015
+
1210
2016
  // src/runner/runMir.ts
1211
2017
  function nowMs2() {
1212
2018
  return typeof performance !== "undefined" ? performance.now() : Date.now();
@@ -1400,6 +2206,66 @@ async function runMir(audio, request, options = {}) {
1400
2206
  }
1401
2207
  };
1402
2208
  }
2209
+ if (request.fn === "beatCandidates") {
2210
+ const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
2211
+ const beatOpts = request.beatCandidates ?? {};
2212
+ const result = detectBeatCandidates(mel2, spec, {
2213
+ minIntervalSec: beatOpts.minIntervalSec,
2214
+ thresholdFactor: beatOpts.thresholdFactor,
2215
+ smoothMs: beatOpts.smoothMs
2216
+ });
2217
+ const end2 = nowMs2();
2218
+ return {
2219
+ kind: "beatCandidates",
2220
+ times: result.salience.times,
2221
+ candidates: result.candidates,
2222
+ salience: beatOpts.includeSalience ? result.salience : void 0,
2223
+ meta: {
2224
+ backend: "cpu",
2225
+ usedGpu: false,
2226
+ timings: {
2227
+ totalMs: end2 - t0,
2228
+ cpuMs: cpuAfterSpec - cpuStart + melCpuMs
2229
+ }
2230
+ }
2231
+ };
2232
+ }
2233
+ if (request.fn === "tempoHypotheses") {
2234
+ const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
2235
+ const beatOpts = request.beatCandidates ?? {};
2236
+ const beatResult = detectBeatCandidates(mel2, spec, {
2237
+ minIntervalSec: beatOpts.minIntervalSec,
2238
+ thresholdFactor: beatOpts.thresholdFactor,
2239
+ smoothMs: beatOpts.smoothMs
2240
+ });
2241
+ const tempoStart = nowMs2();
2242
+ const tempoOpts = request.tempoHypotheses ?? {};
2243
+ const result = generateTempoHypotheses(beatResult.candidates, {
2244
+ minBpm: tempoOpts.minBpm,
2245
+ maxBpm: tempoOpts.maxBpm,
2246
+ binSizeBpm: tempoOpts.binSizeBpm,
2247
+ maxHypotheses: tempoOpts.maxHypotheses,
2248
+ minConfidence: tempoOpts.minConfidence,
2249
+ weightByStrength: tempoOpts.weightByStrength,
2250
+ includeHistogram: tempoOpts.includeHistogram
2251
+ });
2252
+ const end2 = nowMs2();
2253
+ return {
2254
+ kind: "tempoHypotheses",
2255
+ times: spec.times,
2256
+ hypotheses: result.hypotheses,
2257
+ inputCandidateCount: result.inputCandidateCount,
2258
+ histogram: result.histogram,
2259
+ meta: {
2260
+ backend: "cpu",
2261
+ usedGpu: false,
2262
+ timings: {
2263
+ totalMs: end2 - t0,
2264
+ cpuMs: cpuAfterSpec - cpuStart + melCpuMs + (end2 - tempoStart)
2265
+ }
2266
+ }
2267
+ };
2268
+ }
1403
2269
  if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
1404
2270
  const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
1405
2271
  const needsHpssSpec = hpssSpecConfig.fftSize !== specConfig.fftSize || hpssSpecConfig.hopSize !== specConfig.hopSize;
@@ -1502,6 +2368,36 @@ async function runMir(audio, request, options = {}) {
1502
2368
  }
1503
2369
  };
1504
2370
  }
2371
+ if (request.fn === "cqtHarmonicEnergy" || request.fn === "cqtBassPitchMotion" || request.fn === "cqtTonalStability") {
2372
+ const cqtStart = nowMs2();
2373
+ const cqtConfig = withCqtDefaults(request.cqt);
2374
+ const cqt = await cqtSpectrogram(asAudioBufferLike(audio), cqtConfig, {
2375
+ isCancelled: options.isCancelled
2376
+ });
2377
+ const cqtEnd = nowMs2();
2378
+ let signal;
2379
+ if (request.fn === "cqtHarmonicEnergy") {
2380
+ signal = harmonicEnergy(cqt);
2381
+ } else if (request.fn === "cqtBassPitchMotion") {
2382
+ signal = bassPitchMotion(cqt);
2383
+ } else {
2384
+ signal = tonalStability(cqt);
2385
+ }
2386
+ const end2 = nowMs2();
2387
+ return {
2388
+ kind: "1d",
2389
+ times: signal.times,
2390
+ values: signal.values,
2391
+ meta: {
2392
+ backend: "cpu",
2393
+ usedGpu: false,
2394
+ timings: {
2395
+ totalMs: end2 - t0,
2396
+ cpuMs: cqtEnd - cqtStart + (end2 - cqtEnd)
2397
+ }
2398
+ }
2399
+ };
2400
+ }
1505
2401
  const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
1506
2402
  const end = nowMs2();
1507
2403
  return {
@@ -1520,6 +2416,6 @@ async function runMir(audio, request, options = {}) {
1520
2416
  };
1521
2417
  }
1522
2418
 
1523
- export { delta, deltaDelta, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
1524
- //# sourceMappingURL=chunk-DUWYCAVG.js.map
1525
- //# sourceMappingURL=chunk-DUWYCAVG.js.map
2419
+ export { CQT_DEFAULTS, bassPitchMotion, beatSalienceFromMel, computeAllCqtSignals, computeCqt, computeCqtSignal, cqtBinToHz, cqtSpectrogram, delta, deltaDelta, detectBeatCandidates, featureIndexToHz, generateTempoHypotheses, getCqtBinFrequencies, getNumBins, getNumOctaves, harmonicEnergy, hpss, hzToCqtBin, hzToFeatureIndex, hzToMel, melSpectrogram, melToHz, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram, tonalStability, withCqtDefaults };
2420
+ //# sourceMappingURL=chunk-OLIDGECY.js.map
2421
+ //# sourceMappingURL=chunk-OLIDGECY.js.map