@octoseq/mir 0.1.0-main.0d2814e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/chunk-DUWYCAVG.js +1525 -0
  2. package/dist/chunk-DUWYCAVG.js.map +1 -0
  3. package/dist/index.d.ts +450 -0
  4. package/dist/index.js +1234 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/runMir-CSIBwNZ3.d.ts +84 -0
  7. package/dist/runner/runMir.d.ts +2 -0
  8. package/dist/runner/runMir.js +3 -0
  9. package/dist/runner/runMir.js.map +1 -0
  10. package/dist/runner/workerProtocol.d.ts +169 -0
  11. package/dist/runner/workerProtocol.js +11 -0
  12. package/dist/runner/workerProtocol.js.map +1 -0
  13. package/dist/types-BE3py4fZ.d.ts +83 -0
  14. package/package.json +55 -0
  15. package/src/dsp/fft.ts +22 -0
  16. package/src/dsp/fftBackend.ts +53 -0
  17. package/src/dsp/fftBackendFftjs.ts +60 -0
  18. package/src/dsp/hpss.ts +152 -0
  19. package/src/dsp/hpssGpu.ts +101 -0
  20. package/src/dsp/mel.ts +219 -0
  21. package/src/dsp/mfcc.ts +119 -0
  22. package/src/dsp/onset.ts +205 -0
  23. package/src/dsp/peakPick.ts +112 -0
  24. package/src/dsp/spectral.ts +95 -0
  25. package/src/dsp/spectrogram.ts +176 -0
  26. package/src/gpu/README.md +34 -0
  27. package/src/gpu/context.ts +44 -0
  28. package/src/gpu/helpers.ts +87 -0
  29. package/src/gpu/hpssMasks.ts +116 -0
  30. package/src/gpu/kernels/hpssMasks.wgsl.ts +137 -0
  31. package/src/gpu/kernels/melProject.wgsl.ts +48 -0
  32. package/src/gpu/kernels/onsetEnvelope.wgsl.ts +56 -0
  33. package/src/gpu/melProject.ts +98 -0
  34. package/src/gpu/onsetEnvelope.ts +81 -0
  35. package/src/gpu/webgpu.d.ts +176 -0
  36. package/src/index.ts +121 -0
  37. package/src/runner/runMir.ts +431 -0
  38. package/src/runner/workerProtocol.ts +189 -0
  39. package/src/search/featureVectorV1.ts +123 -0
  40. package/src/search/fingerprintV1.ts +230 -0
  41. package/src/search/refinedModelV1.ts +321 -0
  42. package/src/search/searchTrackV1.ts +206 -0
  43. package/src/search/searchTrackV1Guided.ts +863 -0
  44. package/src/search/similarity.ts +98 -0
  45. package/src/types.ts +105 -0
  46. package/src/util/display.ts +80 -0
  47. package/src/util/normalise.ts +58 -0
  48. package/src/util/stats.ts +25 -0
@@ -0,0 +1,83 @@
1
+ type MirBackend = "cpu" | "gpu";
2
+ type MirRunTimings = {
3
+ totalMs: number;
4
+ cpuMs?: number;
5
+ gpuMs?: number;
6
+ };
7
+ type MirRunMeta = {
8
+ backend: MirBackend;
9
+ usedGpu: boolean;
10
+ timings: MirRunTimings;
11
+ };
12
+ type Mir1DResult = {
13
+ kind: "1d";
14
+ times: Float32Array;
15
+ values: Float32Array;
16
+ meta: MirRunMeta;
17
+ };
18
+ type Mir2DResult = {
19
+ kind: "2d";
20
+ times: Float32Array;
21
+ data: Float32Array[];
22
+ meta: MirRunMeta;
23
+ };
24
+ type MirEvent = {
25
+ time: number;
26
+ strength: number;
27
+ index: number;
28
+ };
29
+ type MirEventsResult = {
30
+ kind: "events";
31
+ times: Float32Array;
32
+ events: MirEvent[];
33
+ meta: MirRunMeta;
34
+ };
35
+ type MirResult = Mir1DResult | Mir2DResult | MirEventsResult;
36
+ type MirFunctionId = "spectralCentroid" | "spectralFlux" | "melSpectrogram" | "onsetEnvelope" | "onsetPeaks" | "hpssHarmonic" | "hpssPercussive" | "mfcc" | "mfccDelta" | "mfccDeltaDelta";
37
+ type MirRunRequest = {
38
+ fn: MirFunctionId;
39
+ spectrogram?: {
40
+ fftSize: number;
41
+ hopSize: number;
42
+ window: "hann";
43
+ };
44
+ mel?: {
45
+ nMels: number;
46
+ fMin?: number;
47
+ fMax?: number;
48
+ };
49
+ backend?: MirBackend;
50
+ onset?: {
51
+ smoothMs?: number;
52
+ diffMethod?: "rectified" | "abs";
53
+ useLog?: boolean;
54
+ };
55
+ peakPick?: {
56
+ minIntervalSec?: number;
57
+ threshold?: number;
58
+ adaptiveFactor?: number;
59
+ };
60
+ hpss?: {
61
+ timeMedian?: number;
62
+ freqMedian?: number;
63
+ spectrogram?: {
64
+ fftSize: number;
65
+ hopSize: number;
66
+ window: "hann";
67
+ };
68
+ };
69
+ mfcc?: {
70
+ nCoeffs?: number;
71
+ spectrogram?: {
72
+ fftSize: number;
73
+ hopSize: number;
74
+ window: "hann";
75
+ };
76
+ };
77
+ };
78
+ type MirAudioPayload = {
79
+ sampleRate: number;
80
+ mono: Float32Array;
81
+ };
82
+
83
+ export type { MirBackend as M, MirRunTimings as a, MirRunMeta as b, Mir1DResult as c, Mir2DResult as d, MirResult as e, MirFunctionId as f, MirRunRequest as g, MirAudioPayload as h };
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@octoseq/mir",
3
+ "version": "0.1.0-main.0d2814e",
4
+ "description": "WebGPU-accelerated Music Information Retrieval (MIR) library (skeleton)",
5
+ "license": "MIT",
6
+ "private": false,
7
+ "type": "module",
8
+ "sideEffects": false,
9
+ "files": [
10
+ "dist",
11
+ "src"
12
+ ],
13
+ "exports": {
14
+ ".": {
15
+ "types": "./src/index.ts",
16
+ "import": "./src/index.ts"
17
+ },
18
+ "./runner/runMir": {
19
+ "types": "./src/runner/runMir.ts",
20
+ "import": "./src/runner/runMir.ts"
21
+ },
22
+ "./runner/workerProtocol": {
23
+ "types": "./src/runner/workerProtocol.ts",
24
+ "import": "./src/runner/workerProtocol.ts"
25
+ }
26
+ },
27
+ "main": "./dist/index.js",
28
+ "types": "./src/index.ts",
29
+ "scripts": {
30
+ "build": "tsup",
31
+ "dev": "tsup --watch",
32
+ "typecheck": "tsc -p tsconfig.json --noEmit",
33
+ "lint": "eslint -c eslint.config.mjs .",
34
+ "test": "vitest run",
35
+ "test:watch": "vitest"
36
+ },
37
+ "devDependencies": {
38
+ "@types/node": "latest",
39
+ "eslint": "latest",
40
+ "tsup": "latest",
41
+ "typescript": "latest",
42
+ "vitest": "latest"
43
+ },
44
+ "dependencies": {
45
+ "fft.js": "^4.0.4"
46
+ },
47
+ "publishConfig": {
48
+ "access": "public"
49
+ },
50
+ "repository": {
51
+ "type": "git",
52
+ "url": "https://github.com/rewbs/octoseq.git",
53
+ "directory": "packages/mir"
54
+ }
55
+ }
package/src/dsp/fft.ts ADDED
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Windowing utilities.
3
+ *
4
+ * Note:
5
+ * - The FFT implementation previously lived in this file.
6
+ * - As of v0.1.x we use `fft.js` behind an internal backend abstraction (see `fftBackend.ts`).
7
+ * - We keep only the window function here because it is part of the STFT behaviour that downstream
8
+ * stages depend on.
9
+ */
10
+
11
+ // (Complex FFT implementation removed; kept intentionally empty.)
12
+
13
+ export function hannWindow(size: number): Float32Array {
14
+ const w = new Float32Array(size);
15
+ // Periodic Hann (common for STFT overlap-add).
16
+ for (let n = 0; n < size; n++) {
17
+ w[n] = 0.5 - 0.5 * Math.cos((2 * Math.PI * n) / size);
18
+ }
19
+ return w;
20
+ }
21
+
22
+ // FFT and magnitude helpers removed.
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Internal FFT backend abstraction.
3
+ *
4
+ * Why:
5
+ * - The rest of the STFT pipeline should not care about the FFT implementation.
6
+ * - We want to be able to swap this layer for a future WebGPU FFT without touching callers.
7
+ */
8
+
9
+ export type FftComplexOutput = {
10
+ /** Full-length FFT output (length = fftSize). */
11
+ real: Float32Array;
12
+ /** Full-length FFT output (length = fftSize). */
13
+ imag: Float32Array;
14
+ };
15
+
16
+ export interface FftBackend {
17
+ readonly fftSize: number;
18
+
19
+ /**
20
+ * Forward FFT for real-valued input.
21
+ *
22
+ * Contract:
23
+ * - input length must equal fftSize.
24
+ * - returns full complex spectrum (not just rfft half-spectrum) to keep the interface generic.
25
+ *
26
+ * Scaling:
27
+ * - No normalisation is applied (same convention as typical FFT libraries, incl. fft.js).
28
+ * - Therefore magnitude values scale roughly with window sum and fftSize.
29
+ * This matches the previous hand-rolled FFT behaviour and is close to librosa's default
30
+ * `np.abs(np.fft.rfft(...))` magnitude semantics (also unnormalised).
31
+ */
32
+ forwardReal(input: Float32Array): FftComplexOutput;
33
+ }
34
+
35
+ /**
36
+ * Internal cache to avoid re-creating FFT plans for the same size.
37
+ * Safe for Web Workers (per-worker module instance). Not shared across threads.
38
+ */
39
+ const backendCache = new Map<number, FftBackend>();
40
+
41
+ export function getFftBackend(fftSize: number): FftBackend {
42
+ const existing = backendCache.get(fftSize);
43
+ if (existing) return existing;
44
+
45
+ // Note: ESM static import is OK in browsers and Web Workers.
46
+ // The cache ensures the plan is only created once per fftSize per worker.
47
+ const created = createFftJsBackend(fftSize);
48
+ backendCache.set(fftSize, created);
49
+ return created;
50
+ }
51
+
52
+ // Implemented in separate file to keep the public surface minimal.
53
+ import { createFftJsBackend } from "./fftBackendFftjs";
@@ -0,0 +1,60 @@
1
+ /**
2
+ * `fft.js` backend.
3
+ *
4
+ * Notes:
5
+ * - We intentionally keep this file small and self-contained so we can replace it later with a GPU FFT.
6
+ * - `fft.js` performs an unnormalised forward FFT (same convention as our previous radix-2 code).
7
+ * - We allocate the plan once per fftSize and reuse internal buffers across frames.
8
+ */
9
+
10
+ import FFT from "fft.js";
11
+
12
+ import type { FftBackend, FftComplexOutput } from "./fftBackend";
13
+
14
+ export function createFftJsBackend(fftSize: number): FftBackend {
15
+ if (!Number.isFinite(fftSize) || fftSize <= 0 || (fftSize | 0) !== fftSize) {
16
+ throw new Error("@octoseq/mir: fftSize must be a positive integer");
17
+ }
18
+
19
+ const fft = new FFT(fftSize);
20
+
21
+ // `fft.js` uses interleaved complex arrays [re0, im0, re1, im1, ...]
22
+ // It accepts input for realTransform as a real array of length N.
23
+ const inReal = new Float32Array(fftSize);
24
+ const outComplexInterleaved = fft.createComplexArray() as unknown as Float32Array;
25
+
26
+ const outReal = new Float32Array(fftSize);
27
+ const outImag = new Float32Array(fftSize);
28
+
29
+ return {
30
+ fftSize,
31
+ forwardReal(frame: Float32Array): FftComplexOutput {
32
+ if (frame.length !== fftSize) {
33
+ throw new Error(
34
+ `@octoseq/mir: FFT input length (${frame.length}) must equal fftSize (${fftSize})`
35
+ );
36
+ }
37
+
38
+ // Copy to stable buffer to avoid fft.js mutating user-owned arrays.
39
+ inReal.set(frame);
40
+
41
+ // Real-input FFT.
42
+ // `realTransform(out, data)` fills out with interleaved complex spectrum.
43
+ // `completeSpectrum(out)` fills the negative frequencies so we get full N complex bins.
44
+ fft.realTransform(outComplexInterleaved as unknown as number[], inReal as unknown as number[]);
45
+ fft.completeSpectrum(outComplexInterleaved as unknown as number[]);
46
+
47
+ // De-interleave into (real, imag) arrays.
48
+ // Note: we keep full spectrum even though most consumers only need 0..N/2.
49
+ for (let k = 0; k < fftSize; k++) {
50
+ const re = outComplexInterleaved[2 * k] ?? 0;
51
+ const im = outComplexInterleaved[2 * k + 1] ?? 0;
52
+ // Canonicalise -0 -> +0 so silence tests and downstream comparisons are stable.
53
+ outReal[k] = re === 0 ? 0 : re;
54
+ outImag[k] = im === 0 ? 0 : im;
55
+ }
56
+
57
+ return { real: outReal, imag: outImag };
58
+ }
59
+ };
60
+ }
@@ -0,0 +1,152 @@
1
+ import type { Spectrogram } from "./spectrogram";
2
+
3
+ export type SpectrogramLike2D = {
4
+ times: Float32Array;
5
+ bins: number;
6
+ frames: number;
7
+ magnitudes: Float32Array[]; // [frame][bin]
8
+ };
9
+
10
+ export type HpssOptions = {
11
+ /** Median filter kernel size along time axis (frames). Must be odd. */
12
+ timeMedian?: number;
13
+ /** Median filter kernel size along frequency axis (bins). Must be odd. */
14
+ freqMedian?: number;
15
+ /** If true, use soft masks; else hard mask. */
16
+ softMask?: boolean;
17
+ /** Cancellation hook for long loops. */
18
+ isCancelled?: () => boolean;
19
+ };
20
+
21
+ function assertOddPositiveInt(name: string, v: number): void {
22
+ if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
23
+ throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
24
+ }
25
+ if (v % 2 !== 1) {
26
+ throw new Error(`@octoseq/mir: ${name} must be odd`);
27
+ }
28
+ }
29
+
30
+ function medianOfWindow(values: Float32Array): number {
31
+ // Small-kernel median: copy + sort. CPU-heavy but fine for v0.1.
32
+ // Isolated here so a future GPU / histogram-based median can replace it.
33
+ const arr = Array.from(values);
34
+ arr.sort((a, b) => a - b);
35
+ const mid = arr.length >>> 1;
36
+ return arr[mid] ?? 0;
37
+ }
38
+
39
+ function medianFilterTime(spec: Spectrogram, kTime: number, options: HpssOptions): Float32Array[] {
40
+ const nFrames = spec.times.length;
41
+ const nBins = (spec.fftSize >>> 1) + 1;
42
+
43
+ const half = kTime >>> 1;
44
+ const out: Float32Array[] = new Array(nFrames);
45
+
46
+ const window = new Float32Array(kTime);
47
+
48
+ for (let t = 0; t < nFrames; t++) {
49
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
50
+
51
+ const row = new Float32Array(nBins);
52
+ for (let k = 0; k < nBins; k++) {
53
+ // Build temporal window for bin k.
54
+ for (let i = -half, wi = 0; i <= half; i++, wi++) {
55
+ const tt = Math.max(0, Math.min(nFrames - 1, t + i));
56
+ const mags = spec.magnitudes[tt];
57
+ window[wi] = mags ? (mags[k] ?? 0) : 0;
58
+ }
59
+ row[k] = medianOfWindow(window);
60
+ }
61
+ out[t] = row;
62
+ }
63
+
64
+ return out;
65
+ }
66
+
67
+ function medianFilterFreq(spec: Spectrogram, kFreq: number, options: HpssOptions): Float32Array[] {
68
+ const nFrames = spec.times.length;
69
+ const nBins = (spec.fftSize >>> 1) + 1;
70
+
71
+ const half = kFreq >>> 1;
72
+ const out: Float32Array[] = new Array(nFrames);
73
+
74
+ const window = new Float32Array(kFreq);
75
+
76
+ for (let t = 0; t < nFrames; t++) {
77
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
78
+
79
+ const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
80
+ const row = new Float32Array(nBins);
81
+
82
+ for (let k = 0; k < nBins; k++) {
83
+ for (let i = -half, wi = 0; i <= half; i++, wi++) {
84
+ const kk = Math.max(0, Math.min(nBins - 1, k + i));
85
+ window[wi] = mags[kk] ?? 0;
86
+ }
87
+ row[k] = medianOfWindow(window);
88
+ }
89
+
90
+ out[t] = row;
91
+ }
92
+
93
+ return out;
94
+ }
95
+
96
+ export function hpss(spec: Spectrogram, options: HpssOptions = {}): { harmonic: SpectrogramLike2D; percussive: SpectrogramLike2D } {
97
+ const timeMedian = options.timeMedian ?? 17;
98
+ const freqMedian = options.freqMedian ?? 17;
99
+ assertOddPositiveInt("options.timeMedian", timeMedian);
100
+ assertOddPositiveInt("options.freqMedian", freqMedian);
101
+
102
+ const nFrames = spec.times.length;
103
+ const nBins = (spec.fftSize >>> 1) + 1;
104
+
105
+ // Median along time -> harmonic estimate
106
+ const H = medianFilterTime(spec, timeMedian, options);
107
+ // Median along freq -> percussive estimate
108
+ const P = medianFilterFreq(spec, freqMedian, options);
109
+
110
+ const harmonic: Float32Array[] = new Array(nFrames);
111
+ const percussive: Float32Array[] = new Array(nFrames);
112
+
113
+ const soft = options.softMask ?? true;
114
+ const eps = 1e-12;
115
+
116
+ for (let t = 0; t < nFrames; t++) {
117
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
118
+
119
+ const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
120
+ const hRow = H[t] ?? new Float32Array(nBins);
121
+ const pRow = P[t] ?? new Float32Array(nBins);
122
+
123
+ const outH = new Float32Array(nBins);
124
+ const outP = new Float32Array(nBins);
125
+
126
+ for (let k = 0; k < nBins; k++) {
127
+ const x = mags[k] ?? 0;
128
+ const h = hRow[k] ?? 0;
129
+ const p = pRow[k] ?? 0;
130
+
131
+ if (soft) {
132
+ const denom = Math.max(eps, h + p);
133
+ const mh = h / denom;
134
+ const mp = p / denom;
135
+ outH[k] = x * mh;
136
+ outP[k] = x * mp;
137
+ } else {
138
+ const isH = h >= p;
139
+ outH[k] = isH ? x : 0;
140
+ outP[k] = isH ? 0 : x;
141
+ }
142
+ }
143
+
144
+ harmonic[t] = outH;
145
+ percussive[t] = outP;
146
+ }
147
+
148
+ return {
149
+ harmonic: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: harmonic },
150
+ percussive: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: percussive },
151
+ };
152
+ }
@@ -0,0 +1,101 @@
1
+ import type { Spectrogram } from "./spectrogram";
2
+ import type { SpectrogramLike2D, HpssOptions } from "./hpss";
3
+
4
+ import type { MirGPU } from "../gpu/context";
5
+ import { gpuHpssMasks } from "../gpu/hpssMasks";
6
+
7
+ export type HpssGpuResult = {
8
+ harmonic: SpectrogramLike2D;
9
+ percussive: SpectrogramLike2D;
10
+ gpuMs: number;
11
+ };
12
+
13
+ function flattenMagnitudes(mags: Float32Array[], nFrames: number, nBins: number): Float32Array {
14
+ const flat = new Float32Array(nFrames * nBins);
15
+ for (let t = 0; t < nFrames; t++) {
16
+ const row = mags[t] ?? new Float32Array(nBins);
17
+ flat.set(row, t * nBins);
18
+ }
19
+ return flat;
20
+ }
21
+
22
+ function assertFiniteMask(name: string, v: number): void {
23
+ if (!Number.isFinite(v)) {
24
+ throw new Error(`@octoseq/mir: GPU HPSS produced non-finite ${name}`);
25
+ }
26
+ }
27
+
28
+ /**
29
+ * GPU-accelerated HPSS (mask estimation on GPU, apply on CPU).
30
+ *
31
+ * Important:
32
+ * - CPU median HPSS remains the reference implementation.
33
+ * - GPU uses fixed median-of-9 approximation, regardless of CPU options.
34
+ * Mapping (documented): CPU defaults (17) -> GPU fixed (9).
35
+ */
36
+ export async function hpssGpu(
37
+ spec: Spectrogram,
38
+ gpu: MirGPU,
39
+ options: HpssOptions = {}
40
+ ): Promise<HpssGpuResult> {
41
+ const nFrames = spec.times.length;
42
+ const nBins = (spec.fftSize >>> 1) + 1;
43
+
44
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
45
+
46
+ // Flatten spectrogram magnitudes for GPU.
47
+ const magsFlat = flattenMagnitudes(spec.magnitudes, nFrames, nBins);
48
+
49
+ const soft = options.softMask ?? true;
50
+
51
+ const masks = await gpuHpssMasks(gpu, {
52
+ nFrames,
53
+ nBins,
54
+ magsFlat,
55
+ softMask: soft,
56
+ });
57
+
58
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
59
+
60
+ const hMask = masks.value.harmonicMaskFlat;
61
+ const pMask = masks.value.percussiveMaskFlat;
62
+
63
+ const harmonic: Float32Array[] = new Array(nFrames);
64
+ const percussive: Float32Array[] = new Array(nFrames);
65
+
66
+ // Apply masks on CPU to preserve exact output shape/type.
67
+ // We also do a best-effort cancellation check per frame.
68
+ for (let t = 0; t < nFrames; t++) {
69
+ if (options.isCancelled?.()) throw new Error("@octoseq/mir: cancelled");
70
+
71
+ const mags = spec.magnitudes[t] ?? new Float32Array(nBins);
72
+ const outH = new Float32Array(nBins);
73
+ const outP = new Float32Array(nBins);
74
+
75
+ const base = t * nBins;
76
+ for (let k = 0; k < nBins; k++) {
77
+ const x = mags[k] ?? 0;
78
+ const mh = hMask[base + k] ?? 0;
79
+ const mp = pMask[base + k] ?? 0;
80
+
81
+ assertFiniteMask("mask", mh);
82
+ assertFiniteMask("mask", mp);
83
+
84
+ // masks are expected in [0,1] (kernel outputs that), but clamp defensively.
85
+ const ch = Math.max(0, Math.min(1, mh));
86
+ const cp = Math.max(0, Math.min(1, mp));
87
+
88
+ outH[k] = x * ch;
89
+ outP[k] = x * cp;
90
+ }
91
+
92
+ harmonic[t] = outH;
93
+ percussive[t] = outP;
94
+ }
95
+
96
+ return {
97
+ harmonic: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: harmonic },
98
+ percussive: { times: spec.times, bins: nBins, frames: nFrames, magnitudes: percussive },
99
+ gpuMs: masks.timing.gpuSubmitToReadbackMs,
100
+ };
101
+ }