@octoseq/mir 0.1.0-main.0d2814e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/chunk-DUWYCAVG.js +1525 -0
  2. package/dist/chunk-DUWYCAVG.js.map +1 -0
  3. package/dist/index.d.ts +450 -0
  4. package/dist/index.js +1234 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/runMir-CSIBwNZ3.d.ts +84 -0
  7. package/dist/runner/runMir.d.ts +2 -0
  8. package/dist/runner/runMir.js +3 -0
  9. package/dist/runner/runMir.js.map +1 -0
  10. package/dist/runner/workerProtocol.d.ts +169 -0
  11. package/dist/runner/workerProtocol.js +11 -0
  12. package/dist/runner/workerProtocol.js.map +1 -0
  13. package/dist/types-BE3py4fZ.d.ts +83 -0
  14. package/package.json +55 -0
  15. package/src/dsp/fft.ts +22 -0
  16. package/src/dsp/fftBackend.ts +53 -0
  17. package/src/dsp/fftBackendFftjs.ts +60 -0
  18. package/src/dsp/hpss.ts +152 -0
  19. package/src/dsp/hpssGpu.ts +101 -0
  20. package/src/dsp/mel.ts +219 -0
  21. package/src/dsp/mfcc.ts +119 -0
  22. package/src/dsp/onset.ts +205 -0
  23. package/src/dsp/peakPick.ts +112 -0
  24. package/src/dsp/spectral.ts +95 -0
  25. package/src/dsp/spectrogram.ts +176 -0
  26. package/src/gpu/README.md +34 -0
  27. package/src/gpu/context.ts +44 -0
  28. package/src/gpu/helpers.ts +87 -0
  29. package/src/gpu/hpssMasks.ts +116 -0
  30. package/src/gpu/kernels/hpssMasks.wgsl.ts +137 -0
  31. package/src/gpu/kernels/melProject.wgsl.ts +48 -0
  32. package/src/gpu/kernels/onsetEnvelope.wgsl.ts +56 -0
  33. package/src/gpu/melProject.ts +98 -0
  34. package/src/gpu/onsetEnvelope.ts +81 -0
  35. package/src/gpu/webgpu.d.ts +176 -0
  36. package/src/index.ts +121 -0
  37. package/src/runner/runMir.ts +431 -0
  38. package/src/runner/workerProtocol.ts +189 -0
  39. package/src/search/featureVectorV1.ts +123 -0
  40. package/src/search/fingerprintV1.ts +230 -0
  41. package/src/search/refinedModelV1.ts +321 -0
  42. package/src/search/searchTrackV1.ts +206 -0
  43. package/src/search/searchTrackV1Guided.ts +863 -0
  44. package/src/search/similarity.ts +98 -0
  45. package/src/types.ts +105 -0
  46. package/src/util/display.ts +80 -0
  47. package/src/util/normalise.ts +58 -0
  48. package/src/util/stats.ts +25 -0
@@ -0,0 +1,1525 @@
1
+ import FFT from 'fft.js';
2
+
3
+ // src/gpu/helpers.ts
4
+ function nowMs() {
5
+ return typeof performance !== "undefined" ? performance.now() : Date.now();
6
+ }
7
+ function byteSizeF32(n) {
8
+ return n * 4;
9
+ }
10
+ function createAndWriteStorageBuffer(gpu, data) {
11
+ const buf = gpu.device.createBuffer({
12
+ size: byteSizeF32(data.length),
13
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
14
+ });
15
+ gpu.queue.writeBuffer(buf, 0, data);
16
+ return buf;
17
+ }
18
+ function createUniformBufferU32x4(gpu, u32x4) {
19
+ if (u32x4.length !== 4) throw new Error("@octoseq/mir: uniform buffer must be 4 u32 values");
20
+ const buf = gpu.device.createBuffer({
21
+ size: 16,
22
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
23
+ });
24
+ gpu.queue.writeBuffer(buf, 0, u32x4);
25
+ return buf;
26
+ }
27
+ function createStorageOutBuffer(gpu, byteLength) {
28
+ return gpu.device.createBuffer({
29
+ size: byteLength,
30
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
31
+ });
32
+ }
33
+ function createReadbackBuffer(gpu, byteLength) {
34
+ return gpu.device.createBuffer({
35
+ size: byteLength,
36
+ usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
37
+ });
38
+ }
39
+ async function submitAndReadback(gpu, encoder, outBuffer, readback, byteLength) {
40
+ encoder.copyBufferToBuffer(outBuffer, 0, readback, 0, byteLength);
41
+ const tSubmit = nowMs();
42
+ gpu.queue.submit([encoder.finish()]);
43
+ await readback.mapAsync(GPUMapMode.READ);
44
+ const tDone = nowMs();
45
+ const mapped = readback.getMappedRange();
46
+ const copy = mapped.slice(0);
47
+ readback.unmap();
48
+ return {
49
+ value: copy,
50
+ timing: {
51
+ gpuSubmitToReadbackMs: tDone - tSubmit
52
+ }
53
+ };
54
+ }
55
+
56
+ // src/gpu/kernels/melProject.wgsl.ts
57
+ var melProjectWGSL = (
58
+ /* wgsl */
59
+ `
60
+ struct Params {
61
+ nBins: u32,
62
+ nMels: u32,
63
+ nFrames: u32,
64
+ _pad: u32,
65
+ };
66
+
67
+ @group(0) @binding(0) var<storage, read> mags : array<f32>;
68
+ @group(0) @binding(1) var<storage, read> filters : array<f32>;
69
+ @group(0) @binding(2) var<storage, read_write> out : array<f32>;
70
+ @group(0) @binding(3) var<uniform> params : Params;
71
+
72
+ fn log10(x: f32) -> f32 {
73
+ return log(x) / log(10.0);
74
+ }
75
+
76
+ @compute @workgroup_size(16, 16)
77
+ fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
78
+ let frame = gid.x;
79
+ let mel = gid.y;
80
+ if (frame >= params.nFrames || mel >= params.nMels) {
81
+ return;
82
+ }
83
+
84
+ var sum: f32 = 0.0;
85
+ let bins = params.nBins;
86
+ let magBase = frame * bins;
87
+ let filBase = mel * bins;
88
+
89
+ for (var k: u32 = 0u; k < bins; k = k + 1u) {
90
+ sum = sum + mags[magBase + k] * filters[filBase + k];
91
+ }
92
+
93
+ let eps: f32 = 1e-12;
94
+ out[frame * params.nMels + mel] = log10(eps + sum);
95
+ }
96
+ `
97
+ );
98
+
99
+ // src/gpu/melProject.ts
100
+ async function gpuMelProjectFlat(gpu, input) {
101
+ const { device } = gpu;
102
+ const { nFrames, nBins, nMels, magsFlat, filterFlat } = input;
103
+ if (magsFlat.length !== nFrames * nBins) {
104
+ throw new Error("@octoseq/mir: magsFlat length mismatch");
105
+ }
106
+ if (filterFlat.length !== nMels * nBins) {
107
+ throw new Error("@octoseq/mir: filterFlat length mismatch");
108
+ }
109
+ const magsBuffer = createAndWriteStorageBuffer(gpu, magsFlat);
110
+ const filterBuffer = createAndWriteStorageBuffer(gpu, filterFlat);
111
+ const outByteLen = byteSizeF32(nFrames * nMels);
112
+ const outBuffer = createStorageOutBuffer(gpu, outByteLen);
113
+ const readback = createReadbackBuffer(gpu, outByteLen);
114
+ const shader = device.createShaderModule({ code: melProjectWGSL });
115
+ const pipeline = device.createComputePipeline({
116
+ layout: "auto",
117
+ compute: {
118
+ module: shader,
119
+ entryPoint: "main"
120
+ }
121
+ });
122
+ const params = createUniformBufferU32x4(gpu, new Uint32Array([nBins, nMels, nFrames, 0]));
123
+ const bindGroup = device.createBindGroup({
124
+ layout: pipeline.getBindGroupLayout(0),
125
+ entries: [
126
+ { binding: 0, resource: { buffer: magsBuffer } },
127
+ { binding: 1, resource: { buffer: filterBuffer } },
128
+ { binding: 2, resource: { buffer: outBuffer } },
129
+ { binding: 3, resource: { buffer: params } }
130
+ ]
131
+ });
132
+ const encoder = device.createCommandEncoder();
133
+ const pass = encoder.beginComputePass();
134
+ pass.setPipeline(pipeline);
135
+ pass.setBindGroup(0, bindGroup);
136
+ const wgX = Math.ceil(nFrames / 16);
137
+ const wgY = Math.ceil(nMels / 16);
138
+ pass.dispatchWorkgroups(wgX, wgY);
139
+ pass.end();
140
+ const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
141
+ magsBuffer.destroy();
142
+ filterBuffer.destroy();
143
+ outBuffer.destroy();
144
+ params.destroy();
145
+ readback.destroy();
146
+ const outFlat = new Float32Array(bytes);
147
+ return {
148
+ value: { outFlat },
149
+ timing
150
+ };
151
+ }
152
+
153
+ // src/dsp/mel.ts
154
+ function assertPositiveInt(name, value) {
155
+ if (!Number.isFinite(value) || value <= 0 || (value | 0) !== value) {
156
+ throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
157
+ }
158
+ }
159
+ function hzToMel(hz) {
160
+ return 2595 * Math.log10(1 + hz / 700);
161
+ }
162
+ function melToHz(mel) {
163
+ return 700 * (Math.pow(10, mel / 2595) - 1);
164
+ }
165
+ function buildMelFilterBank(sampleRate, fftSize, nMels, fMin, fMax) {
166
+ const nBins = (fftSize >>> 1) + 1;
167
+ const nyquist = sampleRate / 2;
168
+ const fMinClamped = Math.max(0, Math.min(fMin, nyquist));
169
+ const fMaxClamped = Math.max(0, Math.min(fMax, nyquist));
170
+ if (fMaxClamped <= fMinClamped) {
171
+ throw new Error("@octoseq/mir: mel fMax must be > fMin");
172
+ }
173
+ const melMin = hzToMel(fMinClamped);
174
+ const melMax = hzToMel(fMaxClamped);
175
+ const melPoints = new Float32Array(nMels + 2);
176
+ for (let i = 0; i < melPoints.length; i++) {
177
+ melPoints[i] = melMin + i * (melMax - melMin) / (nMels + 1);
178
+ }
179
+ const hzPoints = new Float32Array(melPoints.length);
180
+ for (let i = 0; i < hzPoints.length; i++) hzPoints[i] = melToHz(melPoints[i] ?? 0);
181
+ const binHz = sampleRate / fftSize;
182
+ const binPoints = new Int32Array(hzPoints.length);
183
+ for (let i = 0; i < binPoints.length; i++) {
184
+ binPoints[i] = Math.max(0, Math.min(nBins - 1, Math.round((hzPoints[i] ?? 0) / binHz)));
185
+ }
186
+ const filters = new Array(nMels);
187
+ for (let m = 0; m < nMels; m++) {
188
+ const left = binPoints[m] ?? 0;
189
+ const center = binPoints[m + 1] ?? 0;
190
+ const right = binPoints[m + 2] ?? 0;
191
+ const w = new Float32Array(nBins);
192
+ if (center === left || right === center) {
193
+ filters[m] = w;
194
+ continue;
195
+ }
196
+ for (let k = left; k < center; k++) {
197
+ w[k] = (k - left) / (center - left);
198
+ }
199
+ for (let k = center; k < right; k++) {
200
+ w[k] = (right - k) / (right - center);
201
+ }
202
+ filters[m] = w;
203
+ }
204
+ return filters;
205
+ }
206
+ function cpuMelProject(spec, filters) {
207
+ const nFrames = spec.times.length;
208
+ const nMels = filters.length;
209
+ const out = new Array(nFrames);
210
+ const eps = 1e-12;
211
+ for (let t = 0; t < nFrames; t++) {
212
+ const mags = spec.magnitudes[t];
213
+ if (!mags) {
214
+ out[t] = new Float32Array(nMels);
215
+ continue;
216
+ }
217
+ const bands = new Float32Array(nMels);
218
+ for (let m = 0; m < nMels; m++) {
219
+ const w = filters[m];
220
+ if (!w) continue;
221
+ let sum = 0;
222
+ for (let k = 0; k < mags.length; k++) {
223
+ sum += (mags[k] ?? 0) * (w[k] ?? 0);
224
+ }
225
+ bands[m] = Math.log10(eps + sum);
226
+ }
227
+ out[t] = bands;
228
+ }
229
+ return {
230
+ times: spec.times,
231
+ melBands: out
232
+ };
233
+ }
234
+ async function gpuMelProject(spec, filters, gpu) {
235
+ const nFrames = spec.times.length;
236
+ const nBins = (spec.fftSize >>> 1) + 1;
237
+ const nMels = filters.length;
238
+ const magsFlat = new Float32Array(nFrames * nBins);
239
+ for (let t = 0; t < nFrames; t++) {
240
+ const mags = spec.magnitudes[t];
241
+ if (!mags) continue;
242
+ magsFlat.set(mags, t * nBins);
243
+ }
244
+ const filterFlat = new Float32Array(nMels * nBins);
245
+ for (let m = 0; m < nMels; m++) {
246
+ filterFlat.set(filters[m] ?? new Float32Array(nBins), m * nBins);
247
+ }
248
+ const { value, timing } = await gpuMelProjectFlat(gpu, {
249
+ nFrames,
250
+ nBins,
251
+ nMels,
252
+ magsFlat,
253
+ filterFlat
254
+ });
255
+ const outFlat = value.outFlat;
256
+ const melBands = new Array(nFrames);
257
+ for (let t = 0; t < nFrames; t++) {
258
+ melBands[t] = outFlat.subarray(t * nMels, (t + 1) * nMels);
259
+ }
260
+ return {
261
+ times: spec.times,
262
+ melBands,
263
+ gpuTimings: {
264
+ gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs
265
+ }
266
+ };
267
+ }
268
+ async function melSpectrogram(spec, config, gpu) {
269
+ assertPositiveInt("config.nMels", config.nMels);
270
+ const fMin = config.fMin ?? 0;
271
+ const fMax = config.fMax ?? spec.sampleRate / 2;
272
+ const filters = buildMelFilterBank(
273
+ spec.sampleRate,
274
+ spec.fftSize,
275
+ config.nMels,
276
+ fMin,
277
+ fMax
278
+ );
279
+ if (gpu) {
280
+ try {
281
+ return await gpuMelProject(spec, filters, gpu);
282
+ } catch {
283
+ return cpuMelProject(spec, filters);
284
+ }
285
+ }
286
+ return cpuMelProject(spec, filters);
287
+ }
288
+
289
+ // src/dsp/mfcc.ts
290
+ function assertPositiveInt2(name, v) {
291
+ if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
292
+ throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
293
+ }
294
+ }
295
+ function buildDctMatrix(nCoeffs, nMels) {
296
+ const out = new Float32Array(nCoeffs * nMels);
297
+ const scale0 = Math.sqrt(1 / nMels);
298
+ const scale = Math.sqrt(2 / nMels);
299
+ for (let i = 0; i < nCoeffs; i++) {
300
+ for (let j = 0; j < nMels; j++) {
301
+ const c = Math.cos(Math.PI / nMels * (j + 0.5) * i);
302
+ out[i * nMels + j] = (i === 0 ? scale0 : scale) * c;
303
+ }
304
+ }
305
+ return out;
306
+ }
307
+ function mfcc(mel, options = {}) {
308
+ const nFrames = mel.times.length;
309
+ const nMels = mel.melBands[0]?.length ?? 0;
310
+ const nCoeffs = options.nCoeffs ?? 13;
311
+ assertPositiveInt2("options.nCoeffs", nCoeffs);
312
+ if (nMels <= 0) {
313
+ return { times: mel.times, coeffs: new Array(nFrames).fill(0).map(() => new Float32Array(nCoeffs)) };
314
+ }
315
+ const dct = buildDctMatrix(nCoeffs, nMels);
316
+ const out = new Array(nFrames);
317
+ for (let t = 0; t < nFrames; t++) {
318
+ const x = mel.melBands[t] ?? new Float32Array(nMels);
319
+ const c = new Float32Array(nCoeffs);
320
+ for (let i = 0; i < nCoeffs; i++) {
321
+ let sum = 0;
322
+ const rowOff = i * nMels;
323
+ for (let j = 0; j < nMels; j++) {
324
+ sum += (dct[rowOff + j] ?? 0) * (x[j] ?? 0);
325
+ }
326
+ c[i] = sum;
327
+ }
328
+ out[t] = c;
329
+ }
330
+ return { times: mel.times, coeffs: out };
331
+ }
332
+ function delta(features, options = {}) {
333
+ const N = options.window ?? 2;
334
+ assertPositiveInt2("options.window", N);
335
+ const nFrames = features.times.length;
336
+ const nFeat = features.values[0]?.length ?? 0;
337
+ const out = new Array(nFrames);
338
+ let denom = 0;
339
+ for (let n = 1; n <= N; n++) denom += n * n;
340
+ denom *= 2;
341
+ for (let t = 0; t < nFrames; t++) {
342
+ const d = new Float32Array(nFeat);
343
+ for (let f = 0; f < nFeat; f++) {
344
+ let num = 0;
345
+ for (let n = 1; n <= N; n++) {
346
+ const tPlus = Math.min(nFrames - 1, t + n);
347
+ const tMinus = Math.max(0, t - n);
348
+ const a = features.values[tPlus]?.[f] ?? 0;
349
+ const b = features.values[tMinus]?.[f] ?? 0;
350
+ num += n * (a - b);
351
+ }
352
+ d[f] = denom > 0 ? num / denom : 0;
353
+ }
354
+ out[t] = d;
355
+ }
356
+ return { times: features.times, values: out };
357
+ }
358
+ function deltaDelta(features, options = {}) {
359
+ return delta(delta(features, options), options);
360
+ }
361
+
362
+ // src/gpu/kernels/onsetEnvelope.wgsl.ts
363
+ var onsetEnvelopeWGSL = (
364
+ /* wgsl */
365
+ `
366
+ // Compute onset strength envelope from a (log) mel spectrogram.
367
+ //
368
+ // Input layout: melFlat[t*nMels + m]
369
+ // Output layout: out[t]
370
+ //
371
+ // We compute novelty per frame:
372
+ // novelty[t] = sum_m max(0, mel[t,m] - mel[t-1,m]) (rectified)
373
+ // or sum_m abs(...)
374
+ //
375
+ // One invocation computes one frame index (t). This is memory-bound but reduces a full
376
+ // (frames*mels) loop to the GPU and provides an end-to-end submit->readback timing.
377
+
378
+ struct Params {
379
+ nMels: u32,
380
+ nFrames: u32,
381
+ diffMethod: u32, // 0=rectified, 1=abs
382
+ _pad: u32,
383
+ };
384
+
385
+ @group(0) @binding(0) var<storage, read> melFlat: array<f32>;
386
+ @group(0) @binding(1) var<storage, read_write> out: array<f32>;
387
+ @group(0) @binding(2) var<uniform> params: Params;
388
+
389
+ @compute @workgroup_size(256)
390
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
391
+ let t = gid.x;
392
+ if (t >= params.nFrames) { return; }
393
+
394
+ if (t == 0u) {
395
+ out[t] = 0.0;
396
+ return;
397
+ }
398
+
399
+ let nMels = params.nMels;
400
+ var sum: f32 = 0.0;
401
+
402
+ // Linear loop: nMels is small (e.g. 64). Keeping it serial per-frame is fine.
403
+ // (Future optimisation: parallelise reduction within workgroup.)
404
+ for (var m: u32 = 0u; m < nMels; m = m + 1u) {
405
+ let a = melFlat[t * nMels + m];
406
+ let b = melFlat[(t - 1u) * nMels + m];
407
+ let d = a - b;
408
+
409
+ if (params.diffMethod == 1u) {
410
+ // abs
411
+ sum = sum + abs(d);
412
+ } else {
413
+ // rectified
414
+ sum = sum + max(0.0, d);
415
+ }
416
+ }
417
+
418
+ out[t] = sum / max(1.0, f32(nMels));
419
+ }
420
+ `
421
+ );
422
+
423
+ // src/gpu/onsetEnvelope.ts
424
+ async function gpuOnsetEnvelopeFromMelFlat(gpu, input) {
425
+ const { device } = gpu;
426
+ const { nFrames, nMels, melFlat, diffMethod } = input;
427
+ if (melFlat.length !== nFrames * nMels) {
428
+ throw new Error("@octoseq/mir: melFlat length mismatch");
429
+ }
430
+ const melBuffer = createAndWriteStorageBuffer(gpu, melFlat);
431
+ const outByteLen = byteSizeF32(nFrames);
432
+ const outBuffer = createStorageOutBuffer(gpu, outByteLen);
433
+ const readback = createReadbackBuffer(gpu, outByteLen);
434
+ const shader = device.createShaderModule({ code: onsetEnvelopeWGSL });
435
+ const pipeline = device.createComputePipeline({
436
+ layout: "auto",
437
+ compute: { module: shader, entryPoint: "main" }
438
+ });
439
+ const diffU32 = diffMethod === "abs" ? 1 : 0;
440
+ const params = createUniformBufferU32x4(gpu, new Uint32Array([nMels, nFrames, diffU32, 0]));
441
+ const bindGroup = device.createBindGroup({
442
+ layout: pipeline.getBindGroupLayout(0),
443
+ entries: [
444
+ { binding: 0, resource: { buffer: melBuffer } },
445
+ { binding: 1, resource: { buffer: outBuffer } },
446
+ { binding: 2, resource: { buffer: params } }
447
+ ]
448
+ });
449
+ const encoder = device.createCommandEncoder();
450
+ const pass = encoder.beginComputePass();
451
+ pass.setPipeline(pipeline);
452
+ pass.setBindGroup(0, bindGroup);
453
+ const wg = Math.ceil(nFrames / 256);
454
+ pass.dispatchWorkgroups(wg);
455
+ pass.end();
456
+ const { value: bytes, timing } = await submitAndReadback(gpu, encoder, outBuffer, readback, outByteLen);
457
+ melBuffer.destroy();
458
+ outBuffer.destroy();
459
+ params.destroy();
460
+ readback.destroy();
461
+ return {
462
+ value: { out: new Float32Array(bytes) },
463
+ timing
464
+ };
465
+ }
466
+
467
+ // src/dsp/onset.ts
468
+ function movingAverage(values, windowFrames) {
469
+ if (windowFrames <= 1) return values;
470
+ const n = values.length;
471
+ const out = new Float32Array(n);
472
+ const half = Math.floor(windowFrames / 2);
473
+ const prefix = new Float64Array(n + 1);
474
+ prefix[0] = 0;
475
+ for (let i = 0; i < n; i++) {
476
+ prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
477
+ }
478
+ for (let i = 0; i < n; i++) {
479
+ const start = Math.max(0, i - half);
480
+ const end = Math.min(n, i + half + 1);
481
+ const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
482
+ const count = Math.max(1, end - start);
483
+ out[i] = sum / count;
484
+ }
485
+ return out;
486
+ }
487
+ function defaultOptions(opts) {
488
+ return {
489
+ useLog: opts?.useLog ?? false,
490
+ smoothMs: opts?.smoothMs ?? 30,
491
+ diffMethod: opts?.diffMethod ?? "rectified"
492
+ };
493
+ }
494
+ function logCompress(x) {
495
+ return Math.log1p(Math.max(0, x));
496
+ }
497
+ function onsetEnvelopeFromSpectrogram(spec, options) {
498
+ const opts = defaultOptions(options);
499
+ const nFrames = spec.times.length;
500
+ const out = new Float32Array(nFrames);
501
+ const nBins = (spec.fftSize >>> 1) + 1;
502
+ out[0] = 0;
503
+ for (let t = 1; t < nFrames; t++) {
504
+ const cur = spec.magnitudes[t];
505
+ const prev = spec.magnitudes[t - 1];
506
+ if (!cur || !prev) {
507
+ out[t] = 0;
508
+ continue;
509
+ }
510
+ let sum = 0;
511
+ for (let k = 0; k < nBins; k++) {
512
+ let a = cur[k] ?? 0;
513
+ let b = prev[k] ?? 0;
514
+ if (opts.useLog) {
515
+ a = logCompress(a);
516
+ b = logCompress(b);
517
+ }
518
+ const d = a - b;
519
+ sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
520
+ }
521
+ out[t] = nBins > 0 ? sum / nBins : 0;
522
+ }
523
+ const smoothMs = opts.smoothMs;
524
+ if (smoothMs > 0 && nFrames >= 2) {
525
+ const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
526
+ const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
527
+ return {
528
+ times: spec.times,
529
+ values: movingAverage(out, windowFrames | 1)
530
+ };
531
+ }
532
+ return { times: spec.times, values: out };
533
+ }
534
+ function onsetEnvelopeFromMel(mel, options) {
535
+ const opts = defaultOptions(options);
536
+ const nFrames = mel.times.length;
537
+ const out = new Float32Array(nFrames);
538
+ out[0] = 0;
539
+ for (let t = 1; t < nFrames; t++) {
540
+ const cur = mel.melBands[t];
541
+ const prev = mel.melBands[t - 1];
542
+ if (!cur || !prev) {
543
+ out[t] = 0;
544
+ continue;
545
+ }
546
+ const nBands = cur.length;
547
+ let sum = 0;
548
+ for (let m = 0; m < nBands; m++) {
549
+ let a = cur[m] ?? 0;
550
+ let b = prev[m] ?? 0;
551
+ if (opts.useLog) {
552
+ a = logCompress(a);
553
+ b = logCompress(b);
554
+ }
555
+ const d = a - b;
556
+ sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
557
+ }
558
+ out[t] = nBands > 0 ? sum / nBands : 0;
559
+ }
560
+ const smoothMs = opts.smoothMs;
561
+ if (smoothMs > 0 && nFrames >= 2) {
562
+ const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
563
+ const windowFrames = Math.max(1, Math.round(smoothMs / 1e3 / Math.max(1e-9, dt)));
564
+ return {
565
+ times: mel.times,
566
+ values: movingAverage(out, windowFrames | 1)
567
+ };
568
+ }
569
+ return { times: mel.times, values: out };
570
+ }
571
+ async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
572
+ const nFrames = mel.times.length;
573
+ const nMels = mel.melBands[0]?.length ?? 0;
574
+ const melFlat = new Float32Array(nFrames * nMels);
575
+ for (let t = 0; t < nFrames; t++) {
576
+ const row = mel.melBands[t];
577
+ if (!row) continue;
578
+ melFlat.set(row, t * nMels);
579
+ }
580
+ const diffMethod = options?.diffMethod ?? "rectified";
581
+ const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
582
+ nFrames,
583
+ nMels,
584
+ melFlat,
585
+ diffMethod
586
+ });
587
+ return {
588
+ times: mel.times,
589
+ values: value.out,
590
+ gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs }
591
+ };
592
+ }
593
+
594
+ // src/dsp/peakPick.ts
595
+ function meanStd(values) {
596
+ const n = values.length;
597
+ if (n <= 0) return { mean: 0, std: 0 };
598
+ let mean = 0;
599
+ for (let i = 0; i < n; i++) mean += values[i] ?? 0;
600
+ mean /= n;
601
+ let varSum = 0;
602
+ for (let i = 0; i < n; i++) {
603
+ const d = (values[i] ?? 0) - mean;
604
+ varSum += d * d;
605
+ }
606
+ const std = Math.sqrt(varSum / n);
607
+ return { mean, std };
608
+ }
609
+ function median(values) {
610
+ const arr = Array.from(values);
611
+ arr.sort((a, b) => a - b);
612
+ const n = arr.length;
613
+ if (n === 0) return 0;
614
+ const mid = n >>> 1;
615
+ if (n % 2 === 1) return arr[mid] ?? 0;
616
+ return ((arr[mid - 1] ?? 0) + (arr[mid] ?? 0)) / 2;
617
+ }
618
+ function peakPick(times, values, options = {}) {
619
+ if (times.length !== values.length) {
620
+ throw new Error("@octoseq/mir: peakPick times/values length mismatch");
621
+ }
622
+ const n = values.length;
623
+ if (n === 0) return [];
624
+ const strict = options.strict ?? true;
625
+ let thr = options.threshold ?? 0;
626
+ if (options.adaptive) {
627
+ const method = options.adaptive.method ?? "meanStd";
628
+ const factor = options.adaptive.factor ?? 1;
629
+ if (method === "median") {
630
+ thr = median(values) * factor;
631
+ } else {
632
+ const { mean, std } = meanStd(values);
633
+ thr = mean + factor * std;
634
+ }
635
+ }
636
+ const minIntervalSec = options.minIntervalSec ?? 0;
637
+ const out = [];
638
+ let lastPeakTime = -Infinity;
639
+ for (let i = 1; i < n - 1; i++) {
640
+ const v = values[i] ?? 0;
641
+ if (!(v >= thr)) continue;
642
+ const prev = values[i - 1] ?? 0;
643
+ const next = values[i + 1] ?? 0;
644
+ const isMax = strict ? v > prev && v > next : v >= prev && v >= next;
645
+ if (!isMax) continue;
646
+ const t = times[i] ?? 0;
647
+ if (t - lastPeakTime < minIntervalSec) {
648
+ const last = out[out.length - 1];
649
+ if (last && v > last.strength) {
650
+ last.time = t;
651
+ last.strength = v;
652
+ last.index = i;
653
+ lastPeakTime = t;
654
+ }
655
+ continue;
656
+ }
657
+ out.push({ time: t, strength: v, index: i });
658
+ lastPeakTime = t;
659
+ }
660
+ return out;
661
+ }
662
+
663
+ // src/dsp/hpss.ts
664
+ function assertOddPositiveInt(name, v) {
665
+ if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
666
+ throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
667
+ }
668
+ if (v % 2 !== 1) {
669
+ throw new Error(`@octoseq/mir: ${name} must be odd`);
670
+ }
671
+ }
672
+ function medianOfWindow(values) {
673
+ const arr = Array.from(values);
674
+ arr.sort((a, b) => a - b);
675
+ const mid = arr.length >>> 1;
676
+ return arr[mid] ?? 0;
677
+ }
678
+ function medianFilterTime(spec, kTime, options) {
679
+ const nFrames = spec.times.length;
680
+ const nBins = (spec.fftSize >>> 1) + 1;
681
+ const half = kTime >>> 1;
682
+ const out = new Array(nFrames);
683
+ const window = new Float32Array(kTime);
684
+ for (let t = 0; t < nFrames; t++) {
685
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
686
+ const row = new Float32Array(nBins);
687
+ for (let k = 0; k < nBins; k++) {
688
+ for (let i = -half, wi = 0; i <= half; i++, wi++) {
689
+ const tt = Math.max(0, Math.min(nFrames - 1, t + i));
690
+ const mags = spec.magnitudes[tt];
691
+ window[wi] = mags ? mags[k] ?? 0 : 0;
692
+ }
693
+ row[k] = medianOfWindow(window);
694
+ }
695
+ out[t] = row;
696
+ }
697
+ return out;
698
+ }
699
+ function medianFilterFreq(spec, kFreq, options) {
700
+ const nFrames = spec.times.length;
701
+ const nBins = (spec.fftSize >>> 1) + 1;
702
+ const half = kFreq >>> 1;
703
+ const out = new Array(nFrames);
704
+ const window = new Float32Array(kFreq);
705
+ for (let t = 0; t < nFrames; t++) {
706
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
707
+ const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
708
+ const row = new Float32Array(nBins);
709
+ for (let k = 0; k < nBins; k++) {
710
+ for (let i = -half, wi = 0; i <= half; i++, wi++) {
711
+ const kk = Math.max(0, Math.min(nBins - 1, k + i));
712
+ window[wi] = mags[kk] ?? 0;
713
+ }
714
+ row[k] = medianOfWindow(window);
715
+ }
716
+ out[t] = row;
717
+ }
718
+ return out;
719
+ }
720
+ function hpss(spec, options = {}) {
721
+ const timeMedian = options.timeMedian ?? 17;
722
+ const freqMedian = options.freqMedian ?? 17;
723
+ assertOddPositiveInt("options.timeMedian", timeMedian);
724
+ assertOddPositiveInt("options.freqMedian", freqMedian);
725
+ const nFrames = spec.times.length;
726
+ const nBins = (spec.fftSize >>> 1) + 1;
727
+ const H = medianFilterTime(spec, timeMedian, options);
728
+ const P = medianFilterFreq(spec, freqMedian, options);
729
+ const harmonic = new Array(nFrames);
730
+ const percussive = new Array(nFrames);
731
+ const soft = options.softMask ?? true;
732
+ const eps = 1e-12;
733
+ for (let t = 0; t < nFrames; t++) {
734
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
735
+ const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
736
+ const hRow = H[t] ?? new Float32Array(nBins);
737
+ const pRow = P[t] ?? new Float32Array(nBins);
738
+ const outH = new Float32Array(nBins);
739
+ const outP = new Float32Array(nBins);
740
+ for (let k = 0; k < nBins; k++) {
741
+ const x = mags[k] ?? 0;
742
+ const h = hRow[k] ?? 0;
743
+ const p = pRow[k] ?? 0;
744
+ if (soft) {
745
+ const denom = Math.max(eps, h + p);
746
+ const mh = h / denom;
747
+ const mp = p / denom;
748
+ outH[k] = x * mh;
749
+ outP[k] = x * mp;
750
+ } else {
751
+ const isH = h >= p;
752
+ outH[k] = isH ? x : 0;
753
+ outP[k] = isH ? 0 : x;
754
+ }
755
+ }
756
+ harmonic[t] = outH;
757
+ percussive[t] = outP;
758
+ }
759
+ return {
760
+ harmonic: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: harmonic },
761
+ percussive: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: percussive }
762
+ };
763
+ }
764
+
765
+ // src/gpu/kernels/hpssMasks.wgsl.ts
766
+ var hpssMasksWGSL = (
767
+ /* wgsl */
768
+ `
769
+ struct Params {
770
+ nBins: u32,
771
+ nFrames: u32,
772
+ softMask: u32, // 1 => soft, 0 => hard
773
+ _pad: u32,
774
+ };
775
+
776
+ @group(0) @binding(0) var<storage, read> mags : array<f32>;
777
+ @group(0) @binding(1) var<storage, read_write> harmonicMask : array<f32>;
778
+ @group(0) @binding(2) var<storage, read_write> percussiveMask : array<f32>;
779
+ @group(0) @binding(3) var<uniform> params : Params;
780
+
781
+ fn clamp_i32(x: i32, lo: i32, hi: i32) -> i32 {
782
+ return max(lo, min(hi, x));
783
+ }
784
+
785
+ fn swap_if_greater(a: ptr<function, f32>, b: ptr<function, f32>) {
786
+ // Branchless compare\u2013swap.
787
+ let av = *a;
788
+ let bv = *b;
789
+ *a = min(av, bv);
790
+ *b = max(av, bv);
791
+ }
792
+
793
+ // Sorting network for 9 values; returns the 5th smallest (median).
794
+ //
795
+ // Notes:
796
+ // - This is fixed-cost and data-independent.
797
+ // - For our HPSS approximation we only need a robust center value, and exact median-of-9
798
+ // is a good tradeoff vs kernel size.
799
+ fn median9(v0: f32, v1: f32, v2: f32, v3: f32, v4: f32, v5: f32, v6: f32, v7: f32, v8: f32) -> f32 {
800
+ var a0 = v0; var a1 = v1; var a2 = v2;
801
+ var a3 = v3; var a4 = v4; var a5 = v5;
802
+ var a6 = v6; var a7 = v7; var a8 = v8;
803
+
804
+ // 9-input sorting network (compare\u2013swap stages). This is a known minimal-ish network.
805
+ // We fully sort then take middle; cost is acceptable for 9.
806
+ // Stage 1
807
+ swap_if_greater(&a0,&a1); swap_if_greater(&a3,&a4); swap_if_greater(&a6,&a7);
808
+ // Stage 2
809
+ swap_if_greater(&a1,&a2); swap_if_greater(&a4,&a5); swap_if_greater(&a7,&a8);
810
+ // Stage 3
811
+ swap_if_greater(&a0,&a1); swap_if_greater(&a3,&a4); swap_if_greater(&a6,&a7);
812
+ // Stage 4
813
+ swap_if_greater(&a0,&a3); swap_if_greater(&a3,&a6); swap_if_greater(&a0,&a3);
814
+ // Stage 5
815
+ swap_if_greater(&a1,&a4); swap_if_greater(&a4,&a7); swap_if_greater(&a1,&a4);
816
+ // Stage 6
817
+ swap_if_greater(&a2,&a5); swap_if_greater(&a5,&a8); swap_if_greater(&a2,&a5);
818
+ // Stage 7
819
+ swap_if_greater(&a1,&a3); swap_if_greater(&a5,&a7);
820
+ // Stage 8
821
+ swap_if_greater(&a2,&a6);
822
+ // Stage 9
823
+ swap_if_greater(&a2,&a3); swap_if_greater(&a4,&a6);
824
+ // Stage 10
825
+ swap_if_greater(&a2,&a4); swap_if_greater(&a4,&a6);
826
+ // Stage 11
827
+ swap_if_greater(&a3,&a5); swap_if_greater(&a5,&a7);
828
+ // Stage 12
829
+ swap_if_greater(&a3,&a4); swap_if_greater(&a5,&a6);
830
+ // Stage 13
831
+ swap_if_greater(&a4,&a5);
832
+
833
+ return a4;
834
+ }
835
+
836
+ fn mag_at(frame: i32, bin: i32) -> f32 {
837
+ let f = clamp_i32(frame, 0, i32(params.nFrames) - 1);
838
+ let b = clamp_i32(bin, 0, i32(params.nBins) - 1);
839
+ let idx = u32(f) * params.nBins + u32(b);
840
+ return mags[idx];
841
+ }
842
+
843
+ @compute @workgroup_size(16, 16)
844
+ fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
845
+ let frame = gid.x;
846
+ let bin = gid.y;
847
+
848
+ if (frame >= params.nFrames || bin >= params.nBins) {
849
+ return;
850
+ }
851
+
852
+ let f = i32(frame);
853
+ let b = i32(bin);
854
+
855
+ // Harmonic estimate: median in time over 9 taps.
856
+ let h = median9(
857
+ mag_at(f-4,b), mag_at(f-3,b), mag_at(f-2,b), mag_at(f-1,b), mag_at(f,b),
858
+ mag_at(f+1,b), mag_at(f+2,b), mag_at(f+3,b), mag_at(f+4,b)
859
+ );
860
+
861
+ // Percussive estimate: median in frequency over 9 taps.
862
+ let p = median9(
863
+ mag_at(f,b-4), mag_at(f,b-3), mag_at(f,b-2), mag_at(f,b-1), mag_at(f,b),
864
+ mag_at(f,b+1), mag_at(f,b+2), mag_at(f,b+3), mag_at(f,b+4)
865
+ );
866
+
867
+ let eps: f32 = 1e-12;
868
+ let denom = max(eps, h + p);
869
+
870
+ var mh = h / denom;
871
+ var mp = p / denom;
872
+
873
+ // Optional hard mask (kept for compatibility with CPU options).
874
+ if (params.softMask == 0u) {
875
+ let isH = h >= p;
876
+ mh = select(0.0, 1.0, isH);
877
+ mp = select(1.0, 0.0, isH);
878
+ }
879
+
880
+ let idx = frame * params.nBins + bin;
881
+ harmonicMask[idx] = mh;
882
+ percussiveMask[idx] = mp;
883
+ }
884
+ `
885
+ );
886
+
887
+ // src/gpu/hpssMasks.ts
888
+ async function gpuHpssMasks(gpu, input) {
889
+ const { device } = gpu;
890
+ const { nFrames, nBins, magsFlat, softMask } = input;
891
+ if (magsFlat.length !== nFrames * nBins) {
892
+ throw new Error("@octoseq/mir: magsFlat length mismatch");
893
+ }
894
+ const magsBuffer = createAndWriteStorageBuffer(gpu, magsFlat);
895
+ const outByteLen = byteSizeF32(nFrames * nBins);
896
+ const harmonicOutBuffer = createStorageOutBuffer(gpu, outByteLen);
897
+ const percussiveOutBuffer = createStorageOutBuffer(gpu, outByteLen);
898
+ const harmonicReadback = createReadbackBuffer(gpu, outByteLen);
899
+ const percussiveReadback = createReadbackBuffer(gpu, outByteLen);
900
+ const shader = device.createShaderModule({ code: hpssMasksWGSL });
901
+ const pipeline = device.createComputePipeline({
902
+ layout: "auto",
903
+ compute: { module: shader, entryPoint: "main" }
904
+ });
905
+ const params = createUniformBufferU32x4(gpu, new Uint32Array([nBins, nFrames, softMask ? 1 : 0, 0]));
906
+ const bindGroup = device.createBindGroup({
907
+ layout: pipeline.getBindGroupLayout(0),
908
+ entries: [
909
+ { binding: 0, resource: { buffer: magsBuffer } },
910
+ { binding: 1, resource: { buffer: harmonicOutBuffer } },
911
+ { binding: 2, resource: { buffer: percussiveOutBuffer } },
912
+ { binding: 3, resource: { buffer: params } }
913
+ ]
914
+ });
915
+ const encoder = device.createCommandEncoder();
916
+ const pass = encoder.beginComputePass();
917
+ pass.setPipeline(pipeline);
918
+ pass.setBindGroup(0, bindGroup);
919
+ const wgX = Math.ceil(nFrames / 16);
920
+ const wgY = Math.ceil(nBins / 16);
921
+ pass.dispatchWorkgroups(wgX, wgY);
922
+ pass.end();
923
+ encoder.copyBufferToBuffer(harmonicOutBuffer, 0, harmonicReadback, 0, outByteLen);
924
+ encoder.copyBufferToBuffer(percussiveOutBuffer, 0, percussiveReadback, 0, outByteLen);
925
+ const tSubmit = nowMs();
926
+ gpu.queue.submit([encoder.finish()]);
927
+ await Promise.all([harmonicReadback.mapAsync(GPUMapMode.READ), percussiveReadback.mapAsync(GPUMapMode.READ)]);
928
+ const tDone = nowMs();
929
+ const hBytes = harmonicReadback.getMappedRange().slice(0);
930
+ const pBytes = percussiveReadback.getMappedRange().slice(0);
931
+ harmonicReadback.unmap();
932
+ percussiveReadback.unmap();
933
+ magsBuffer.destroy();
934
+ harmonicOutBuffer.destroy();
935
+ percussiveOutBuffer.destroy();
936
+ params.destroy();
937
+ harmonicReadback.destroy();
938
+ percussiveReadback.destroy();
939
+ return {
940
+ value: {
941
+ harmonicMaskFlat: new Float32Array(hBytes),
942
+ percussiveMaskFlat: new Float32Array(pBytes)
943
+ },
944
+ timing: {
945
+ gpuSubmitToReadbackMs: tDone - tSubmit
946
+ }
947
+ };
948
+ }
949
+
950
+ // src/dsp/hpssGpu.ts
951
+ function flattenMagnitudes(mags, nFrames, nBins) {
952
+ const flat = new Float32Array(nFrames * nBins);
953
+ for (let t = 0; t < nFrames; t++) {
954
+ const row = mags[t] ?? new Float32Array(nBins);
955
+ flat.set(row, t * nBins);
956
+ }
957
+ return flat;
958
+ }
959
+ function assertFiniteMask(name, v) {
960
+ if (!Number.isFinite(v)) {
961
+ throw new Error(`@octoseq/mir: GPU HPSS produced non-finite ${name}`);
962
+ }
963
+ }
964
+ async function hpssGpu(spec, gpu, options = {}) {
965
+ const nFrames = spec.times.length;
966
+ const nBins = (spec.fftSize >>> 1) + 1;
967
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
968
+ const magsFlat = flattenMagnitudes(spec.magnitudes, nFrames, nBins);
969
+ const soft = options.softMask ?? true;
970
+ const masks = await gpuHpssMasks(gpu, {
971
+ nFrames,
972
+ nBins,
973
+ magsFlat,
974
+ softMask: soft
975
+ });
976
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
977
+ const hMask = masks.value.harmonicMaskFlat;
978
+ const pMask = masks.value.percussiveMaskFlat;
979
+ const harmonic = new Array(nFrames);
980
+ const percussive = new Array(nFrames);
981
+ for (let t = 0; t < nFrames; t++) {
982
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
983
+ const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
984
+ const outH = new Float32Array(nBins);
985
+ const outP = new Float32Array(nBins);
986
+ const base = t * nBins;
987
+ for (let k = 0; k < nBins; k++) {
988
+ const x = mags[k] ?? 0;
989
+ const mh = hMask[base + k] ?? 0;
990
+ const mp = pMask[base + k] ?? 0;
991
+ assertFiniteMask("mask", mh);
992
+ assertFiniteMask("mask", mp);
993
+ const ch = Math.max(0, Math.min(1, mh));
994
+ const cp = Math.max(0, Math.min(1, mp));
995
+ outH[k] = x * ch;
996
+ outP[k] = x * cp;
997
+ }
998
+ harmonic[t] = outH;
999
+ percussive[t] = outP;
1000
+ }
1001
+ return {
1002
+ harmonic: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: harmonic },
1003
+ percussive: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: percussive },
1004
+ gpuMs: masks.timing.gpuSubmitToReadbackMs
1005
+ };
1006
+ }
1007
+
1008
+ // src/dsp/spectral.ts
1009
+ function spectralCentroid(spec) {
1010
+ const nFrames = spec.times.length;
1011
+ const out = new Float32Array(nFrames);
1012
+ const nBins = (spec.fftSize >>> 1) + 1;
1013
+ const binHz = spec.sampleRate / spec.fftSize;
1014
+ for (let t = 0; t < nFrames; t++) {
1015
+ const mags = spec.magnitudes[t];
1016
+ if (!mags) {
1017
+ out[t] = 0;
1018
+ continue;
1019
+ }
1020
+ let num = 0;
1021
+ let den = 0;
1022
+ for (let k = 0; k < nBins; k++) {
1023
+ const m = mags[k] ?? 0;
1024
+ const f = k * binHz;
1025
+ num += f * m;
1026
+ den += m;
1027
+ }
1028
+ out[t] = den > 0 ? num / den : 0;
1029
+ }
1030
+ return out;
1031
+ }
1032
+ function spectralFlux(spec) {
1033
+ const nFrames = spec.times.length;
1034
+ const out = new Float32Array(nFrames);
1035
+ const nBins = (spec.fftSize >>> 1) + 1;
1036
+ let prev = null;
1037
+ for (let t = 0; t < nFrames; t++) {
1038
+ const mags = spec.magnitudes[t];
1039
+ if (!mags) {
1040
+ out[t] = 0;
1041
+ prev = null;
1042
+ continue;
1043
+ }
1044
+ let sum = 0;
1045
+ for (let k = 0; k < nBins; k++) sum += mags[k] ?? 0;
1046
+ if (sum <= 0) {
1047
+ out[t] = 0;
1048
+ prev = null;
1049
+ continue;
1050
+ }
1051
+ const cur = new Float32Array(nBins);
1052
+ const inv = 1 / sum;
1053
+ for (let k = 0; k < nBins; k++) cur[k] = (mags[k] ?? 0) * inv;
1054
+ if (!prev) {
1055
+ out[t] = 0;
1056
+ prev = cur;
1057
+ continue;
1058
+ }
1059
+ let flux = 0;
1060
+ for (let k = 0; k < nBins; k++) {
1061
+ const d = (cur[k] ?? 0) - (prev[k] ?? 0);
1062
+ flux += Math.abs(d);
1063
+ }
1064
+ out[t] = flux;
1065
+ prev = cur;
1066
+ }
1067
+ return out;
1068
+ }
1069
+
1070
+ // src/dsp/fft.ts
1071
+ function hannWindow(size) {
1072
+ const w = new Float32Array(size);
1073
+ for (let n = 0; n < size; n++) {
1074
+ w[n] = 0.5 - 0.5 * Math.cos(2 * Math.PI * n / size);
1075
+ }
1076
+ return w;
1077
+ }
1078
+ function createFftJsBackend(fftSize) {
1079
+ if (!Number.isFinite(fftSize) || fftSize <= 0 || (fftSize | 0) !== fftSize) {
1080
+ throw new Error("@octoseq/mir: fftSize must be a positive integer");
1081
+ }
1082
+ const fft = new FFT(fftSize);
1083
+ const inReal = new Float32Array(fftSize);
1084
+ const outComplexInterleaved = fft.createComplexArray();
1085
+ const outReal = new Float32Array(fftSize);
1086
+ const outImag = new Float32Array(fftSize);
1087
+ return {
1088
+ fftSize,
1089
+ forwardReal(frame) {
1090
+ if (frame.length !== fftSize) {
1091
+ throw new Error(
1092
+ `@octoseq/mir: FFT input length (${frame.length}) must equal fftSize (${fftSize})`
1093
+ );
1094
+ }
1095
+ inReal.set(frame);
1096
+ fft.realTransform(outComplexInterleaved, inReal);
1097
+ fft.completeSpectrum(outComplexInterleaved);
1098
+ for (let k = 0; k < fftSize; k++) {
1099
+ const re = outComplexInterleaved[2 * k] ?? 0;
1100
+ const im = outComplexInterleaved[2 * k + 1] ?? 0;
1101
+ outReal[k] = re === 0 ? 0 : re;
1102
+ outImag[k] = im === 0 ? 0 : im;
1103
+ }
1104
+ return { real: outReal, imag: outImag };
1105
+ }
1106
+ };
1107
+ }
1108
+
1109
+ // src/dsp/fftBackend.ts
1110
+ var backendCache = /* @__PURE__ */ new Map();
1111
+ function getFftBackend(fftSize) {
1112
+ const existing = backendCache.get(fftSize);
1113
+ if (existing) return existing;
1114
+ const created = createFftJsBackend(fftSize);
1115
+ backendCache.set(fftSize, created);
1116
+ return created;
1117
+ }
1118
+
1119
+ // src/dsp/spectrogram.ts
1120
+ function assertPositiveInt3(name, value) {
1121
+ if (!Number.isFinite(value) || value <= 0 || (value | 0) !== value) {
1122
+ throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
1123
+ }
1124
+ }
1125
+ function mixToMono(audio) {
1126
+ const nCh = audio.numberOfChannels;
1127
+ if (nCh <= 0) {
1128
+ throw new Error("@octoseq/mir: audio.numberOfChannels must be >= 1");
1129
+ }
1130
+ if (nCh === 1) {
1131
+ return audio.getChannelData(0);
1132
+ }
1133
+ const length = audio.getChannelData(0).length;
1134
+ const out = new Float32Array(length);
1135
+ for (let ch = 0; ch < nCh; ch++) {
1136
+ const data = audio.getChannelData(ch);
1137
+ if (data.length !== length) {
1138
+ throw new Error(
1139
+ "@octoseq/mir: all channels must have equal length (AudioBuffer-like invariant)"
1140
+ );
1141
+ }
1142
+ for (let i = 0; i < length; i++) {
1143
+ out[i] = (out[i] ?? 0) + (data[i] ?? 0);
1144
+ }
1145
+ }
1146
+ const inv = 1 / nCh;
1147
+ for (let i = 0; i < length; i++) out[i] = (out[i] ?? 0) * inv;
1148
+ return out;
1149
+ }
1150
+ async function spectrogram(audio, config, gpu, options = {}) {
1151
+ assertPositiveInt3("config.fftSize", config.fftSize);
1152
+ assertPositiveInt3("config.hopSize", config.hopSize);
1153
+ if (config.window !== "hann") {
1154
+ throw new Error(
1155
+ `@octoseq/mir: unsupported window '${config.window}'. v0.1 supports only 'hann'.`
1156
+ );
1157
+ }
1158
+ const fftSize = config.fftSize;
1159
+ if ((fftSize & fftSize - 1) !== 0) {
1160
+ throw new Error("@octoseq/mir: config.fftSize must be a power of two");
1161
+ }
1162
+ const hopSize = config.hopSize;
1163
+ if (hopSize > fftSize) {
1164
+ throw new Error(
1165
+ "@octoseq/mir: config.hopSize must be <= config.fftSize"
1166
+ );
1167
+ }
1168
+ const sr = audio.sampleRate;
1169
+ const mono = mixToMono(audio);
1170
+ const nFrames = Math.max(0, 1 + Math.floor((mono.length - fftSize) / hopSize));
1171
+ const times = new Float32Array(nFrames);
1172
+ const mags = new Array(nFrames);
1173
+ const window = hannWindow(fftSize);
1174
+ const fft = getFftBackend(fftSize);
1175
+ const windowedFrame = new Float32Array(fftSize);
1176
+ let totalFftMs = 0;
1177
+ const nowMs3 = () => typeof performance !== "undefined" ? performance.now() : Date.now();
1178
+ for (let frame = 0; frame < nFrames; frame++) {
1179
+ if (options.isCancelled?.()) {
1180
+ throw new Error("@octoseq/mir: cancelled");
1181
+ }
1182
+ const start = frame * hopSize;
1183
+ times[frame] = (start + fftSize / 2) / sr;
1184
+ for (let i = 0; i < fftSize; i++) {
1185
+ const s = mono[start + i] ?? 0;
1186
+ windowedFrame[i] = s * (window[i] ?? 0);
1187
+ }
1188
+ const t0 = nowMs3();
1189
+ const { real, imag } = fft.forwardReal(windowedFrame);
1190
+ totalFftMs += nowMs3() - t0;
1191
+ const nBins = (fftSize >>> 1) + 1;
1192
+ const out = new Float32Array(nBins);
1193
+ for (let k = 0; k < nBins; k++) {
1194
+ const re = real[k] ?? 0;
1195
+ const im = imag[k] ?? 0;
1196
+ out[k] = Math.hypot(re, im);
1197
+ }
1198
+ mags[frame] = out;
1199
+ }
1200
+ mags.cpuFftTotalMs = totalFftMs;
1201
+ return {
1202
+ sampleRate: sr,
1203
+ fftSize,
1204
+ hopSize,
1205
+ times,
1206
+ magnitudes: mags
1207
+ };
1208
+ }
1209
+
1210
+ // src/runner/runMir.ts
1211
+ function nowMs2() {
1212
+ return typeof performance !== "undefined" ? performance.now() : Date.now();
1213
+ }
1214
+ function asAudioBufferLike(audio) {
1215
+ return {
1216
+ sampleRate: audio.sampleRate,
1217
+ numberOfChannels: 1,
1218
+ getChannelData: () => audio.mono
1219
+ };
1220
+ }
1221
+ async function runMir(audio, request, options = {}) {
1222
+ options = {
1223
+ ...options,
1224
+ onset: { ...request.onset, ...options.onset },
1225
+ peakPick: { ...request.peakPick, ...options.peakPick },
1226
+ hpss: { ...request.hpss, ...options.hpss },
1227
+ mfcc: { ...request.mfcc, ...options.mfcc }
1228
+ };
1229
+ const t0 = nowMs2();
1230
+ const backend = request.backend ?? "cpu";
1231
+ const specConfig = request.spectrogram ?? {
1232
+ fftSize: 2048,
1233
+ hopSize: 512,
1234
+ window: "hann"
1235
+ };
1236
+ const cpuStart = nowMs2();
1237
+ const spec = await spectrogram(asAudioBufferLike(audio), specConfig, void 0, {
1238
+ isCancelled: options.isCancelled
1239
+ });
1240
+ const cpuAfterSpec = nowMs2();
1241
+ if (options.isCancelled?.()) {
1242
+ throw new Error("@octoseq/mir: cancelled");
1243
+ }
1244
+ if (request.fn === "spectralCentroid") {
1245
+ const values = spectralCentroid(spec);
1246
+ const cpuEnd = nowMs2();
1247
+ return {
1248
+ kind: "1d",
1249
+ times: spec.times,
1250
+ values,
1251
+ meta: {
1252
+ backend: "cpu",
1253
+ usedGpu: false,
1254
+ timings: {
1255
+ totalMs: cpuEnd - t0,
1256
+ cpuMs: cpuEnd - cpuStart
1257
+ }
1258
+ }
1259
+ };
1260
+ }
1261
+ if (request.fn === "spectralFlux") {
1262
+ const values = spectralFlux(spec);
1263
+ const cpuEnd = nowMs2();
1264
+ return {
1265
+ kind: "1d",
1266
+ times: spec.times,
1267
+ values,
1268
+ meta: {
1269
+ backend: "cpu",
1270
+ usedGpu: false,
1271
+ timings: {
1272
+ totalMs: cpuEnd - t0,
1273
+ cpuMs: cpuEnd - cpuStart
1274
+ }
1275
+ }
1276
+ };
1277
+ }
1278
+ const melConfig = request.mel ?? { nMels: 64 };
1279
+ const computeMel = async (useGpu) => {
1280
+ const melCpuStart = nowMs2();
1281
+ if (useGpu) {
1282
+ if (!options.gpu) {
1283
+ throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
1284
+ }
1285
+ const gpuStart = nowMs2();
1286
+ try {
1287
+ const mel3 = await melSpectrogram(spec, melConfig, options.gpu);
1288
+ const gpuEnd = nowMs2();
1289
+ const gpuKernelMs = mel3.gpuTimings?.gpuSubmitToReadbackMs;
1290
+ return {
1291
+ mel: mel3,
1292
+ usedGpu: true,
1293
+ gpuMs: gpuKernelMs ?? gpuEnd - gpuStart,
1294
+ cpuExtraMs: nowMs2() - melCpuStart - (gpuEnd - gpuStart)
1295
+ };
1296
+ } catch (e) {
1297
+ if (options.strictGpu) throw e;
1298
+ }
1299
+ }
1300
+ const mel2 = await melSpectrogram(spec, melConfig, void 0);
1301
+ const melCpuEnd = nowMs2();
1302
+ return {
1303
+ mel: mel2,
1304
+ usedGpu: false,
1305
+ cpuExtraMs: melCpuEnd - melCpuStart
1306
+ };
1307
+ };
1308
+ if (request.fn === "melSpectrogram") {
1309
+ const { mel: mel2, usedGpu: usedGpu2, gpuMs: gpuMs2, cpuExtraMs: cpuExtraMs2 } = await computeMel(backend === "gpu");
1310
+ const end2 = nowMs2();
1311
+ return {
1312
+ kind: "2d",
1313
+ times: mel2.times,
1314
+ data: mel2.melBands,
1315
+ meta: {
1316
+ backend: usedGpu2 ? "gpu" : "cpu",
1317
+ usedGpu: usedGpu2,
1318
+ timings: {
1319
+ totalMs: end2 - t0,
1320
+ cpuMs: cpuAfterSpec - cpuStart + cpuExtraMs2,
1321
+ gpuMs: gpuMs2
1322
+ }
1323
+ }
1324
+ };
1325
+ }
1326
+ if (request.fn === "onsetEnvelope") {
1327
+ if (backend === "gpu") {
1328
+ if (!options.gpu) throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
1329
+ const { mel: mel3, usedGpu: usedGpuForMel, gpuMs: melGpuMs, cpuExtraMs: melCpuMs2 } = await computeMel(true);
1330
+ nowMs2();
1331
+ try {
1332
+ const onsetGpu = await onsetEnvelopeFromMelGpu(mel3, options.gpu, {
1333
+ diffMethod: options.onset?.diffMethod
1334
+ });
1335
+ const end3 = nowMs2();
1336
+ return {
1337
+ kind: "1d",
1338
+ times: onsetGpu.times,
1339
+ values: onsetGpu.values,
1340
+ meta: {
1341
+ backend: "gpu",
1342
+ usedGpu: true,
1343
+ timings: {
1344
+ totalMs: end3 - t0,
1345
+ cpuMs: cpuAfterSpec - cpuStart + melCpuMs2,
1346
+ gpuMs: (melGpuMs ?? 0) + onsetGpu.gpuTimings.gpuSubmitToReadbackMs
1347
+ }
1348
+ }
1349
+ };
1350
+ } catch (e) {
1351
+ if (options.strictGpu) throw e;
1352
+ } finally {
1353
+ }
1354
+ }
1355
+ const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
1356
+ const onset = onsetEnvelopeFromMel(mel2, {
1357
+ smoothMs: options.onset?.smoothMs,
1358
+ diffMethod: options.onset?.diffMethod,
1359
+ useLog: options.onset?.useLog
1360
+ });
1361
+ const end2 = nowMs2();
1362
+ return {
1363
+ kind: "1d",
1364
+ times: onset.times,
1365
+ values: onset.values,
1366
+ meta: {
1367
+ backend: "cpu",
1368
+ usedGpu: false,
1369
+ timings: {
1370
+ totalMs: end2 - t0,
1371
+ cpuMs: cpuAfterSpec - cpuStart + melCpuMs
1372
+ }
1373
+ }
1374
+ };
1375
+ }
1376
+ if (request.fn === "onsetPeaks") {
1377
+ const { mel: mel2, cpuExtraMs: melCpuMs } = await computeMel(false);
1378
+ const onset = onsetEnvelopeFromMel(mel2, {
1379
+ smoothMs: options.onset?.smoothMs,
1380
+ diffMethod: options.onset?.diffMethod,
1381
+ useLog: options.onset?.useLog
1382
+ });
1383
+ const events = peakPick(onset.times, onset.values, {
1384
+ minIntervalSec: options.peakPick?.minIntervalSec,
1385
+ threshold: options.peakPick?.threshold,
1386
+ adaptive: options.peakPick?.adaptiveFactor ? { method: "meanStd", factor: options.peakPick.adaptiveFactor } : void 0
1387
+ });
1388
+ const end2 = nowMs2();
1389
+ return {
1390
+ kind: "events",
1391
+ times: onset.times,
1392
+ events,
1393
+ meta: {
1394
+ backend: "cpu",
1395
+ usedGpu: false,
1396
+ timings: {
1397
+ totalMs: end2 - t0,
1398
+ cpuMs: cpuAfterSpec - cpuStart + melCpuMs
1399
+ }
1400
+ }
1401
+ };
1402
+ }
1403
+ if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
1404
+ const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
1405
+ const needsHpssSpec = hpssSpecConfig.fftSize !== specConfig.fftSize || hpssSpecConfig.hopSize !== specConfig.hopSize;
1406
+ let hpssSpec;
1407
+ let hpssCpuStart = cpuAfterSpec;
1408
+ if (needsHpssSpec) {
1409
+ hpssCpuStart = nowMs2();
1410
+ hpssSpec = await spectrogram(asAudioBufferLike(audio), hpssSpecConfig, void 0, {
1411
+ isCancelled: options.isCancelled
1412
+ });
1413
+ } else {
1414
+ hpssSpec = spec;
1415
+ }
1416
+ const hpssAfterSpec = nowMs2();
1417
+ if (backend === "gpu") {
1418
+ if (!options.gpu) throw new Error("@octoseq/mir: backend='gpu' requested but no MirGPU provided");
1419
+ const hpssStart2 = nowMs2();
1420
+ try {
1421
+ const out = await hpssGpu(hpssSpec, options.gpu, {
1422
+ timeMedian: options.hpss?.timeMedian,
1423
+ freqMedian: options.hpss?.freqMedian,
1424
+ softMask: true,
1425
+ // preserve CPU default
1426
+ isCancelled: options.isCancelled
1427
+ });
1428
+ const end3 = nowMs2();
1429
+ const chosen2 = request.fn === "hpssHarmonic" ? out.harmonic : out.percussive;
1430
+ return {
1431
+ kind: "2d",
1432
+ times: chosen2.times,
1433
+ data: chosen2.magnitudes,
1434
+ meta: {
1435
+ backend: "gpu",
1436
+ usedGpu: true,
1437
+ timings: {
1438
+ totalMs: end3 - t0,
1439
+ cpuMs: (needsHpssSpec ? hpssAfterSpec - hpssCpuStart : cpuAfterSpec - cpuStart) + (end3 - hpssStart2 - out.gpuMs),
1440
+ gpuMs: out.gpuMs
1441
+ }
1442
+ }
1443
+ };
1444
+ } catch (e) {
1445
+ if (options.strictGpu) throw e;
1446
+ }
1447
+ }
1448
+ const hpssStart = nowMs2();
1449
+ const { harmonic, percussive } = hpss(hpssSpec, {
1450
+ timeMedian: options.hpss?.timeMedian,
1451
+ freqMedian: options.hpss?.freqMedian,
1452
+ isCancelled: options.isCancelled
1453
+ });
1454
+ const end2 = nowMs2();
1455
+ const cpuMs = (needsHpssSpec ? hpssAfterSpec - hpssCpuStart : cpuAfterSpec - cpuStart) + (end2 - hpssStart);
1456
+ const chosen = request.fn === "hpssHarmonic" ? harmonic : percussive;
1457
+ return {
1458
+ kind: "2d",
1459
+ times: chosen.times,
1460
+ data: chosen.magnitudes,
1461
+ meta: {
1462
+ backend: "cpu",
1463
+ usedGpu: false,
1464
+ timings: { totalMs: end2 - t0, cpuMs }
1465
+ }
1466
+ };
1467
+ }
1468
+ if (request.fn === "mfcc" || request.fn === "mfccDelta" || request.fn === "mfccDeltaDelta") {
1469
+ const mfccSpecConfig = options.mfcc?.spectrogram ?? specConfig;
1470
+ const needsMfccSpec = mfccSpecConfig.fftSize !== specConfig.fftSize || mfccSpecConfig.hopSize !== specConfig.hopSize;
1471
+ let mfccMel;
1472
+ let mfccCpuMs;
1473
+ if (needsMfccSpec) {
1474
+ const mfccCpuStart = nowMs2();
1475
+ const mfccSpec = await spectrogram(asAudioBufferLike(audio), mfccSpecConfig, void 0, {
1476
+ isCancelled: options.isCancelled
1477
+ });
1478
+ const mfccMelResult = await melSpectrogram(mfccSpec, melConfig, void 0);
1479
+ mfccMel = mfccMelResult;
1480
+ mfccCpuMs = nowMs2() - mfccCpuStart;
1481
+ } else {
1482
+ const { mel: mel2, cpuExtraMs: cpuExtraMs2 } = await computeMel(false);
1483
+ mfccMel = mel2;
1484
+ mfccCpuMs = cpuAfterSpec - cpuStart + cpuExtraMs2;
1485
+ }
1486
+ const mfccStart = nowMs2();
1487
+ const base = mfcc(mfccMel, { nCoeffs: options.mfcc?.nCoeffs });
1488
+ const features = { times: base.times, values: base.coeffs };
1489
+ const chosen = request.fn === "mfcc" ? features : request.fn === "mfccDelta" ? delta(features) : deltaDelta(features);
1490
+ const end2 = nowMs2();
1491
+ return {
1492
+ kind: "2d",
1493
+ times: chosen.times,
1494
+ data: chosen.values,
1495
+ meta: {
1496
+ backend: "cpu",
1497
+ usedGpu: false,
1498
+ timings: {
1499
+ totalMs: end2 - t0,
1500
+ cpuMs: mfccCpuMs + (end2 - mfccStart)
1501
+ }
1502
+ }
1503
+ };
1504
+ }
1505
+ const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
1506
+ const end = nowMs2();
1507
+ return {
1508
+ kind: "2d",
1509
+ times: mel.times,
1510
+ data: mel.melBands,
1511
+ meta: {
1512
+ backend: usedGpu ? "gpu" : "cpu",
1513
+ usedGpu,
1514
+ timings: {
1515
+ totalMs: end - t0,
1516
+ cpuMs: cpuAfterSpec - cpuStart + cpuExtraMs,
1517
+ gpuMs
1518
+ }
1519
+ }
1520
+ };
1521
+ }
1522
+
1523
+ export { delta, deltaDelta, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
1524
+ //# sourceMappingURL=chunk-DUWYCAVG.js.map
1525
+ //# sourceMappingURL=chunk-DUWYCAVG.js.map