@octoseq/mir 0.1.0-main.2e286ce
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-DUWYCAVG.js +1525 -0
- package/dist/chunk-DUWYCAVG.js.map +1 -0
- package/dist/index.d.ts +450 -0
- package/dist/index.js +1234 -0
- package/dist/index.js.map +1 -0
- package/dist/runMir-CSIBwNZ3.d.ts +84 -0
- package/dist/runner/runMir.d.ts +2 -0
- package/dist/runner/runMir.js +3 -0
- package/dist/runner/runMir.js.map +1 -0
- package/dist/runner/workerProtocol.d.ts +169 -0
- package/dist/runner/workerProtocol.js +11 -0
- package/dist/runner/workerProtocol.js.map +1 -0
- package/dist/types-BE3py4fZ.d.ts +83 -0
- package/package.json +55 -0
- package/src/dsp/fft.ts +22 -0
- package/src/dsp/fftBackend.ts +53 -0
- package/src/dsp/fftBackendFftjs.ts +60 -0
- package/src/dsp/hpss.ts +152 -0
- package/src/dsp/hpssGpu.ts +101 -0
- package/src/dsp/mel.ts +219 -0
- package/src/dsp/mfcc.ts +119 -0
- package/src/dsp/onset.ts +205 -0
- package/src/dsp/peakPick.ts +112 -0
- package/src/dsp/spectral.ts +95 -0
- package/src/dsp/spectrogram.ts +176 -0
- package/src/gpu/README.md +34 -0
- package/src/gpu/context.ts +44 -0
- package/src/gpu/helpers.ts +87 -0
- package/src/gpu/hpssMasks.ts +116 -0
- package/src/gpu/kernels/hpssMasks.wgsl.ts +137 -0
- package/src/gpu/kernels/melProject.wgsl.ts +48 -0
- package/src/gpu/kernels/onsetEnvelope.wgsl.ts +56 -0
- package/src/gpu/melProject.ts +98 -0
- package/src/gpu/onsetEnvelope.ts +81 -0
- package/src/gpu/webgpu.d.ts +176 -0
- package/src/index.ts +121 -0
- package/src/runner/runMir.ts +431 -0
- package/src/runner/workerProtocol.ts +189 -0
- package/src/search/featureVectorV1.ts +123 -0
- package/src/search/fingerprintV1.ts +230 -0
- package/src/search/refinedModelV1.ts +321 -0
- package/src/search/searchTrackV1.ts +206 -0
- package/src/search/searchTrackV1Guided.ts +863 -0
- package/src/search/similarity.ts +98 -0
- package/src/types.ts +105 -0
- package/src/util/display.ts +80 -0
- package/src/util/normalise.ts +58 -0
- package/src/util/stats.ts +25 -0
package/src/dsp/mel.ts
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import type { MirGPU } from "../gpu/context";
|
|
2
|
+
import { gpuMelProjectFlat } from "../gpu/melProject";
|
|
3
|
+
|
|
4
|
+
import type { Spectrogram } from "./spectrogram";
|
|
5
|
+
|
|
6
|
+
export type MelConfig = {
|
|
7
|
+
nMels: number;
|
|
8
|
+
fMin?: number;
|
|
9
|
+
fMax?: number;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export type MelSpectrogram = {
|
|
13
|
+
times: Float32Array;
|
|
14
|
+
melBands: Float32Array[]; // [frame][mel]
|
|
15
|
+
/** Optional observability. Present when GPU path runs. */
|
|
16
|
+
gpuTimings?: {
|
|
17
|
+
gpuSubmitToReadbackMs: number;
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
function assertPositiveInt(name: string, value: number): void {
|
|
22
|
+
if (!Number.isFinite(value) || value <= 0 || (value | 0) !== value) {
|
|
23
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function hzToMel(hz: number): number {
|
|
28
|
+
// Slaney-style mel approximation (HTK-like).
|
|
29
|
+
return 2595 * Math.log10(1 + hz / 700);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function melToHz(mel: number): number {
|
|
33
|
+
return 700 * (Math.pow(10, mel / 2595) - 1);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function buildMelFilterBank(
|
|
37
|
+
sampleRate: number,
|
|
38
|
+
fftSize: number,
|
|
39
|
+
nMels: number,
|
|
40
|
+
fMin: number,
|
|
41
|
+
fMax: number
|
|
42
|
+
): Float32Array[] {
|
|
43
|
+
const nBins = (fftSize >>> 1) + 1;
|
|
44
|
+
const nyquist = sampleRate / 2;
|
|
45
|
+
|
|
46
|
+
const fMinClamped = Math.max(0, Math.min(fMin, nyquist));
|
|
47
|
+
const fMaxClamped = Math.max(0, Math.min(fMax, nyquist));
|
|
48
|
+
if (fMaxClamped <= fMinClamped) {
|
|
49
|
+
throw new Error("@octoseq/mir: mel fMax must be > fMin");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// We create nMels triangular filters defined by nMels+2 mel points.
|
|
53
|
+
const melMin = hzToMel(fMinClamped);
|
|
54
|
+
const melMax = hzToMel(fMaxClamped);
|
|
55
|
+
|
|
56
|
+
const melPoints = new Float32Array(nMels + 2);
|
|
57
|
+
for (let i = 0; i < melPoints.length; i++) {
|
|
58
|
+
melPoints[i] = melMin + (i * (melMax - melMin)) / (nMels + 1);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const hzPoints = new Float32Array(melPoints.length);
|
|
62
|
+
for (let i = 0; i < hzPoints.length; i++) hzPoints[i] = melToHz(melPoints[i] ?? 0);
|
|
63
|
+
|
|
64
|
+
const binHz = sampleRate / fftSize;
|
|
65
|
+
const binPoints = new Int32Array(hzPoints.length);
|
|
66
|
+
for (let i = 0; i < binPoints.length; i++) {
|
|
67
|
+
binPoints[i] = Math.max(0, Math.min(nBins - 1, Math.round((hzPoints[i] ?? 0) / binHz)));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const filters: Float32Array[] = new Array(nMels);
|
|
71
|
+
for (let m = 0; m < nMels; m++) {
|
|
72
|
+
const left = binPoints[m] ?? 0;
|
|
73
|
+
const center = binPoints[m + 1] ?? 0;
|
|
74
|
+
const right = binPoints[m + 2] ?? 0;
|
|
75
|
+
|
|
76
|
+
const w = new Float32Array(nBins);
|
|
77
|
+
if (center === left || right === center) {
|
|
78
|
+
filters[m] = w;
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
for (let k = left; k < center; k++) {
|
|
83
|
+
w[k] = (k - left) / (center - left);
|
|
84
|
+
}
|
|
85
|
+
for (let k = center; k < right; k++) {
|
|
86
|
+
w[k] = (right - k) / (right - center);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
filters[m] = w;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return filters;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function cpuMelProject(
|
|
96
|
+
spec: Spectrogram,
|
|
97
|
+
filters: Float32Array[]
|
|
98
|
+
): MelSpectrogram {
|
|
99
|
+
const nFrames = spec.times.length;
|
|
100
|
+
const nMels = filters.length;
|
|
101
|
+
const out: Float32Array[] = new Array(nFrames);
|
|
102
|
+
|
|
103
|
+
const eps = 1e-12;
|
|
104
|
+
|
|
105
|
+
for (let t = 0; t < nFrames; t++) {
|
|
106
|
+
const mags = spec.magnitudes[t];
|
|
107
|
+
if (!mags) {
|
|
108
|
+
out[t] = new Float32Array(nMels);
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const bands = new Float32Array(nMels);
|
|
113
|
+
for (let m = 0; m < nMels; m++) {
|
|
114
|
+
const w = filters[m];
|
|
115
|
+
if (!w) continue;
|
|
116
|
+
|
|
117
|
+
let sum = 0;
|
|
118
|
+
// Project linear magnitudes onto mel filters.
|
|
119
|
+
for (let k = 0; k < mags.length; k++) {
|
|
120
|
+
sum += (mags[k] ?? 0) * (w[k] ?? 0);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Log scaling for visualisation / downstream features.
|
|
124
|
+
bands[m] = Math.log10(eps + sum);
|
|
125
|
+
}
|
|
126
|
+
out[t] = bands;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
times: spec.times,
|
|
131
|
+
melBands: out
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async function gpuMelProject(
|
|
136
|
+
spec: Spectrogram,
|
|
137
|
+
filters: Float32Array[],
|
|
138
|
+
gpu: MirGPU
|
|
139
|
+
): Promise<MelSpectrogram> {
|
|
140
|
+
const nFrames = spec.times.length;
|
|
141
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
142
|
+
const nMels = filters.length;
|
|
143
|
+
|
|
144
|
+
const magsFlat = new Float32Array(nFrames * nBins);
|
|
145
|
+
for (let t = 0; t < nFrames; t++) {
|
|
146
|
+
const mags = spec.magnitudes[t];
|
|
147
|
+
if (!mags) continue;
|
|
148
|
+
magsFlat.set(mags, t * nBins);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const filterFlat = new Float32Array(nMels * nBins);
|
|
152
|
+
for (let m = 0; m < nMels; m++) {
|
|
153
|
+
filterFlat.set(filters[m] ?? new Float32Array(nBins), m * nBins);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// GPU stage timing (submission -> readback) is surfaced for validation/debug.
|
|
157
|
+
const { value, timing } = await gpuMelProjectFlat(gpu, {
|
|
158
|
+
nFrames,
|
|
159
|
+
nBins,
|
|
160
|
+
nMels,
|
|
161
|
+
magsFlat,
|
|
162
|
+
filterFlat,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
const outFlat = value.outFlat;
|
|
166
|
+
|
|
167
|
+
const melBands: Float32Array[] = new Array(nFrames);
|
|
168
|
+
for (let t = 0; t < nFrames; t++) {
|
|
169
|
+
// Keep zero-copy views into the single flat buffer.
|
|
170
|
+
melBands[t] = outFlat.subarray(t * nMels, (t + 1) * nMels);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
times: spec.times,
|
|
175
|
+
melBands,
|
|
176
|
+
gpuTimings: {
|
|
177
|
+
gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs,
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Compute a (log) mel spectrogram by projecting an existing spectrogram.
|
|
184
|
+
*
|
|
185
|
+
* Design rule compliance:
|
|
186
|
+
* - The caller provides the spectrogram (we do not hide STFT internally).
|
|
187
|
+
* - Output is aligned to `spec.times`.
|
|
188
|
+
*/
|
|
189
|
+
export async function melSpectrogram(
|
|
190
|
+
spec: Spectrogram,
|
|
191
|
+
config: MelConfig,
|
|
192
|
+
gpu?: MirGPU
|
|
193
|
+
): Promise<MelSpectrogram> {
|
|
194
|
+
assertPositiveInt("config.nMels", config.nMels);
|
|
195
|
+
|
|
196
|
+
const fMin = config.fMin ?? 0;
|
|
197
|
+
const fMax = config.fMax ?? spec.sampleRate / 2;
|
|
198
|
+
|
|
199
|
+
const filters = buildMelFilterBank(
|
|
200
|
+
spec.sampleRate,
|
|
201
|
+
spec.fftSize,
|
|
202
|
+
config.nMels,
|
|
203
|
+
fMin,
|
|
204
|
+
fMax
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
if (gpu) {
|
|
208
|
+
// Try GPU; if anything goes wrong, fall back to CPU.
|
|
209
|
+
try {
|
|
210
|
+
return await gpuMelProject(spec, filters, gpu);
|
|
211
|
+
} catch {
|
|
212
|
+
// GPU can fail due to missing features, adapter resets, etc.
|
|
213
|
+
// v0.1 prioritises correctness: we silently fall back.
|
|
214
|
+
return cpuMelProject(spec, filters);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return cpuMelProject(spec, filters);
|
|
219
|
+
}
|
package/src/dsp/mfcc.ts
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import type { MelSpectrogram } from "./mel";
|
|
2
|
+
|
|
3
|
+
export type MfccOptions = {
|
|
4
|
+
nCoeffs?: number;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
export type MfccResult = {
|
|
8
|
+
times: Float32Array;
|
|
9
|
+
coeffs: Float32Array[]; // [frame][coeff]
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
function assertPositiveInt(name: string, v: number): void {
|
|
13
|
+
if (!Number.isFinite(v) || v <= 0 || (v | 0) !== v) {
|
|
14
|
+
throw new Error(`@octoseq/mir: ${name} must be a positive integer`);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function buildDctMatrix(nCoeffs: number, nMels: number): Float32Array {
|
|
19
|
+
// DCT-II (ortho-ish scaling). Many MFCC refs use a scaled DCT; for visualisation and
|
|
20
|
+
// relative features this is sufficient and stable.
|
|
21
|
+
// Shape: [nCoeffs][nMels]
|
|
22
|
+
const out = new Float32Array(nCoeffs * nMels);
|
|
23
|
+
|
|
24
|
+
const scale0 = Math.sqrt(1 / nMels);
|
|
25
|
+
const scale = Math.sqrt(2 / nMels);
|
|
26
|
+
|
|
27
|
+
for (let i = 0; i < nCoeffs; i++) {
|
|
28
|
+
for (let j = 0; j < nMels; j++) {
|
|
29
|
+
const c = Math.cos((Math.PI / nMels) * (j + 0.5) * i);
|
|
30
|
+
out[i * nMels + j] = (i === 0 ? scale0 : scale) * c;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return out;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function mfcc(mel: MelSpectrogram, options: MfccOptions = {}): MfccResult {
|
|
38
|
+
const nFrames = mel.times.length;
|
|
39
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
40
|
+
|
|
41
|
+
const nCoeffs = options.nCoeffs ?? 13;
|
|
42
|
+
assertPositiveInt("options.nCoeffs", nCoeffs);
|
|
43
|
+
if (nMels <= 0) {
|
|
44
|
+
return { times: mel.times, coeffs: new Array(nFrames).fill(0).map(() => new Float32Array(nCoeffs)) };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const dct = buildDctMatrix(nCoeffs, nMels);
|
|
48
|
+
|
|
49
|
+
const out: Float32Array[] = new Array(nFrames);
|
|
50
|
+
for (let t = 0; t < nFrames; t++) {
|
|
51
|
+
const x = mel.melBands[t] ?? new Float32Array(nMels);
|
|
52
|
+
|
|
53
|
+
// melSpectrogram already returns log10 energies. For MFCC we typically use ln energies.
|
|
54
|
+
// We keep it simple here: treat the existing log-scaled values as log-energy features.
|
|
55
|
+
const c = new Float32Array(nCoeffs);
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < nCoeffs; i++) {
|
|
58
|
+
let sum = 0;
|
|
59
|
+
const rowOff = i * nMels;
|
|
60
|
+
for (let j = 0; j < nMels; j++) {
|
|
61
|
+
sum += (dct[rowOff + j] ?? 0) * (x[j] ?? 0);
|
|
62
|
+
}
|
|
63
|
+
c[i] = sum;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
out[t] = c;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return { times: mel.times, coeffs: out };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export type DeltaOptions = {
|
|
73
|
+
/** Regression window size N (frames). Standard choice is 2. */
|
|
74
|
+
window?: number;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
export type Features2D = {
|
|
78
|
+
times: Float32Array;
|
|
79
|
+
values: Float32Array[]; // [frame][feature]
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
export function delta(features: Features2D, options: DeltaOptions = {}): Features2D {
|
|
83
|
+
const N = options.window ?? 2;
|
|
84
|
+
assertPositiveInt("options.window", N);
|
|
85
|
+
|
|
86
|
+
const nFrames = features.times.length;
|
|
87
|
+
const nFeat = features.values[0]?.length ?? 0;
|
|
88
|
+
|
|
89
|
+
const out: Float32Array[] = new Array(nFrames);
|
|
90
|
+
|
|
91
|
+
// denom = 2 * sum_{n=1..N} n^2
|
|
92
|
+
let denom = 0;
|
|
93
|
+
for (let n = 1; n <= N; n++) denom += n * n;
|
|
94
|
+
denom *= 2;
|
|
95
|
+
|
|
96
|
+
for (let t = 0; t < nFrames; t++) {
|
|
97
|
+
const d = new Float32Array(nFeat);
|
|
98
|
+
|
|
99
|
+
for (let f = 0; f < nFeat; f++) {
|
|
100
|
+
let num = 0;
|
|
101
|
+
for (let n = 1; n <= N; n++) {
|
|
102
|
+
const tPlus = Math.min(nFrames - 1, t + n);
|
|
103
|
+
const tMinus = Math.max(0, t - n);
|
|
104
|
+
const a = features.values[tPlus]?.[f] ?? 0;
|
|
105
|
+
const b = features.values[tMinus]?.[f] ?? 0;
|
|
106
|
+
num += n * (a - b);
|
|
107
|
+
}
|
|
108
|
+
d[f] = denom > 0 ? num / denom : 0;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
out[t] = d;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return { times: features.times, values: out };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export function deltaDelta(features: Features2D, options: DeltaOptions = {}): Features2D {
|
|
118
|
+
return delta(delta(features, options), options);
|
|
119
|
+
}
|
package/src/dsp/onset.ts
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import type { MirGPU } from "../gpu/context";
|
|
2
|
+
import { gpuOnsetEnvelopeFromMelFlat } from "../gpu/onsetEnvelope";
|
|
3
|
+
|
|
4
|
+
import type { MelSpectrogram } from "./mel";
|
|
5
|
+
import type { Spectrogram } from "./spectrogram";
|
|
6
|
+
|
|
7
|
+
export type OnsetEnvelope = {
|
|
8
|
+
times: Float32Array;
|
|
9
|
+
values: Float32Array;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export type OnsetEnvelopeOptions = {
|
|
13
|
+
/** If true, log-compress magnitudes/energies before differencing. */
|
|
14
|
+
useLog?: boolean;
|
|
15
|
+
/** Moving-average smoothing window length in milliseconds. 0 disables smoothing. */
|
|
16
|
+
smoothMs?: number;
|
|
17
|
+
/** How to convert temporal differences into novelty. */
|
|
18
|
+
diffMethod?: "rectified" | "abs";
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
function movingAverage(values: Float32Array, windowFrames: number): Float32Array {
|
|
22
|
+
if (windowFrames <= 1) return values;
|
|
23
|
+
|
|
24
|
+
const n = values.length;
|
|
25
|
+
const out = new Float32Array(n);
|
|
26
|
+
|
|
27
|
+
// Centered window.
|
|
28
|
+
const half = Math.floor(windowFrames / 2);
|
|
29
|
+
|
|
30
|
+
// Prefix sums for stable, bug-free O(n) moving average.
|
|
31
|
+
const prefix = new Float64Array(n + 1);
|
|
32
|
+
prefix[0] = 0;
|
|
33
|
+
for (let i = 0; i < n; i++) {
|
|
34
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
for (let i = 0; i < n; i++) {
|
|
38
|
+
const start = Math.max(0, i - half);
|
|
39
|
+
const end = Math.min(n, i + half + 1);
|
|
40
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
41
|
+
const count = Math.max(1, end - start);
|
|
42
|
+
out[i] = sum / count;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return out;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function defaultOptions(opts?: OnsetEnvelopeOptions): Required<OnsetEnvelopeOptions> {
|
|
49
|
+
return {
|
|
50
|
+
useLog: opts?.useLog ?? false,
|
|
51
|
+
smoothMs: opts?.smoothMs ?? 30,
|
|
52
|
+
diffMethod: opts?.diffMethod ?? "rectified",
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function logCompress(x: number): number {
|
|
57
|
+
// Stable compression without -Inf.
|
|
58
|
+
// We use ln(1+x) so it behaves well for both linear mags and log-mel (already log10).
|
|
59
|
+
return Math.log1p(Math.max(0, x));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function onsetEnvelopeFromSpectrogram(spec: Spectrogram, options?: OnsetEnvelopeOptions): OnsetEnvelope {
|
|
63
|
+
const opts = defaultOptions(options);
|
|
64
|
+
|
|
65
|
+
const nFrames = spec.times.length;
|
|
66
|
+
const out = new Float32Array(nFrames);
|
|
67
|
+
|
|
68
|
+
const nBins = (spec.fftSize >>> 1) + 1;
|
|
69
|
+
|
|
70
|
+
// First frame has no previous frame.
|
|
71
|
+
out[0] = 0;
|
|
72
|
+
|
|
73
|
+
for (let t = 1; t < nFrames; t++) {
|
|
74
|
+
const cur = spec.magnitudes[t];
|
|
75
|
+
const prev = spec.magnitudes[t - 1];
|
|
76
|
+
if (!cur || !prev) {
|
|
77
|
+
out[t] = 0;
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
let sum = 0;
|
|
82
|
+
for (let k = 0; k < nBins; k++) {
|
|
83
|
+
let a = cur[k] ?? 0;
|
|
84
|
+
let b = prev[k] ?? 0;
|
|
85
|
+
if (opts.useLog) {
|
|
86
|
+
a = logCompress(a);
|
|
87
|
+
b = logCompress(b);
|
|
88
|
+
}
|
|
89
|
+
const d = a - b;
|
|
90
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Use an average over frequency bins so the overall scale is not tied to FFT size.
|
|
94
|
+
out[t] = nBins > 0 ? sum / nBins : 0;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Optional smoothing based on average frame spacing.
|
|
98
|
+
const smoothMs = opts.smoothMs;
|
|
99
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
100
|
+
const dt = (spec.times[1] ?? 0) - (spec.times[0] ?? 0);
|
|
101
|
+
const windowFrames = Math.max(1, Math.round((smoothMs / 1000) / Math.max(1e-9, dt)));
|
|
102
|
+
return {
|
|
103
|
+
times: spec.times,
|
|
104
|
+
values: movingAverage(out, windowFrames | 1),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return { times: spec.times, values: out };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function onsetEnvelopeFromMel(mel: MelSpectrogram, options?: OnsetEnvelopeOptions): OnsetEnvelope {
|
|
112
|
+
const opts = defaultOptions(options);
|
|
113
|
+
|
|
114
|
+
const nFrames = mel.times.length;
|
|
115
|
+
const out = new Float32Array(nFrames);
|
|
116
|
+
|
|
117
|
+
out[0] = 0;
|
|
118
|
+
|
|
119
|
+
for (let t = 1; t < nFrames; t++) {
|
|
120
|
+
const cur = mel.melBands[t];
|
|
121
|
+
const prev = mel.melBands[t - 1];
|
|
122
|
+
if (!cur || !prev) {
|
|
123
|
+
out[t] = 0;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const nBands = cur.length;
|
|
128
|
+
|
|
129
|
+
let sum = 0;
|
|
130
|
+
for (let m = 0; m < nBands; m++) {
|
|
131
|
+
let a = cur[m] ?? 0;
|
|
132
|
+
let b = prev[m] ?? 0;
|
|
133
|
+
|
|
134
|
+
// Note: melSpectrogram currently outputs log10(eps + energy).
|
|
135
|
+
// If useLog is requested, we apply an additional stable compression.
|
|
136
|
+
if (opts.useLog) {
|
|
137
|
+
a = logCompress(a);
|
|
138
|
+
b = logCompress(b);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const d = a - b;
|
|
142
|
+
sum += opts.diffMethod === "abs" ? Math.abs(d) : Math.max(0, d);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Use an average over bands so the overall scale is not tied to nMels.
|
|
146
|
+
out[t] = nBands > 0 ? sum / nBands : 0;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const smoothMs = opts.smoothMs;
|
|
150
|
+
if (smoothMs > 0 && nFrames >= 2) {
|
|
151
|
+
const dt = (mel.times[1] ?? 0) - (mel.times[0] ?? 0);
|
|
152
|
+
const windowFrames = Math.max(1, Math.round((smoothMs / 1000) / Math.max(1e-9, dt)));
|
|
153
|
+
return {
|
|
154
|
+
times: mel.times,
|
|
155
|
+
values: movingAverage(out, windowFrames | 1),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { times: mel.times, values: out };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export type OnsetEnvelopeGpuResult = {
|
|
163
|
+
times: Float32Array;
|
|
164
|
+
values: Float32Array;
|
|
165
|
+
gpuTimings: { gpuSubmitToReadbackMs: number };
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* GPU-accelerated onset envelope from mel spectrogram.
|
|
170
|
+
*
|
|
171
|
+
* Notes:
|
|
172
|
+
* - This bypasses JS loops for the diff+reduction step.
|
|
173
|
+
* - Smoothing/log options are intentionally limited for v0.1 (keeps WGSL simple).
|
|
174
|
+
* - Callers should fall back to CPU on errors.
|
|
175
|
+
*/
|
|
176
|
+
export async function onsetEnvelopeFromMelGpu(
|
|
177
|
+
mel: MelSpectrogram,
|
|
178
|
+
gpu: MirGPU,
|
|
179
|
+
options?: Pick<OnsetEnvelopeOptions, "diffMethod">
|
|
180
|
+
): Promise<OnsetEnvelopeGpuResult> {
|
|
181
|
+
const nFrames = mel.times.length;
|
|
182
|
+
const nMels = mel.melBands[0]?.length ?? 0;
|
|
183
|
+
|
|
184
|
+
const melFlat = new Float32Array(nFrames * nMels);
|
|
185
|
+
for (let t = 0; t < nFrames; t++) {
|
|
186
|
+
const row = mel.melBands[t];
|
|
187
|
+
if (!row) continue;
|
|
188
|
+
melFlat.set(row, t * nMels);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const diffMethod = options?.diffMethod ?? "rectified";
|
|
192
|
+
|
|
193
|
+
const { value, timing } = await gpuOnsetEnvelopeFromMelFlat(gpu, {
|
|
194
|
+
nFrames,
|
|
195
|
+
nMels,
|
|
196
|
+
melFlat,
|
|
197
|
+
diffMethod,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
times: mel.times,
|
|
202
|
+
values: value.out,
|
|
203
|
+
gpuTimings: { gpuSubmitToReadbackMs: timing.gpuSubmitToReadbackMs },
|
|
204
|
+
};
|
|
205
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
export type PeakPickEvent = {
|
|
2
|
+
time: number;
|
|
3
|
+
strength: number;
|
|
4
|
+
index: number;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
export type PeakPickOptions = {
|
|
8
|
+
/** Minimum peak height (absolute). */
|
|
9
|
+
threshold?: number;
|
|
10
|
+
/** Minimum inter-peak interval (seconds). */
|
|
11
|
+
minIntervalSec?: number;
|
|
12
|
+
|
|
13
|
+
/** If provided, use adaptive threshold: mean(values) + factor*std(values). */
|
|
14
|
+
adaptive?: {
|
|
15
|
+
method?: "meanStd" | "median";
|
|
16
|
+
factor?: number;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
/** If true, prefer strict maxima (> neighbors); else allow flat plateaus. */
|
|
20
|
+
strict?: boolean;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function meanStd(values: Float32Array): { mean: number; std: number } {
|
|
24
|
+
const n = values.length;
|
|
25
|
+
if (n <= 0) return { mean: 0, std: 0 };
|
|
26
|
+
|
|
27
|
+
let mean = 0;
|
|
28
|
+
for (let i = 0; i < n; i++) mean += values[i] ?? 0;
|
|
29
|
+
mean /= n;
|
|
30
|
+
|
|
31
|
+
let varSum = 0;
|
|
32
|
+
for (let i = 0; i < n; i++) {
|
|
33
|
+
const d = (values[i] ?? 0) - mean;
|
|
34
|
+
varSum += d * d;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const std = Math.sqrt(varSum / n);
|
|
38
|
+
return { mean, std };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function median(values: Float32Array): number {
|
|
42
|
+
const arr = Array.from(values);
|
|
43
|
+
arr.sort((a, b) => a - b);
|
|
44
|
+
const n = arr.length;
|
|
45
|
+
if (n === 0) return 0;
|
|
46
|
+
const mid = n >>> 1;
|
|
47
|
+
if (n % 2 === 1) return arr[mid] ?? 0;
|
|
48
|
+
return ((arr[mid - 1] ?? 0) + (arr[mid] ?? 0)) / 2;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function peakPick(
|
|
52
|
+
times: Float32Array,
|
|
53
|
+
values: Float32Array,
|
|
54
|
+
options: PeakPickOptions = {}
|
|
55
|
+
): PeakPickEvent[] {
|
|
56
|
+
if (times.length !== values.length) {
|
|
57
|
+
throw new Error("@octoseq/mir: peakPick times/values length mismatch");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const n = values.length;
|
|
61
|
+
if (n === 0) return [];
|
|
62
|
+
|
|
63
|
+
const strict = options.strict ?? true;
|
|
64
|
+
|
|
65
|
+
let thr = options.threshold ?? 0;
|
|
66
|
+
if (options.adaptive) {
|
|
67
|
+
const method = options.adaptive.method ?? "meanStd";
|
|
68
|
+
const factor = options.adaptive.factor ?? 1;
|
|
69
|
+
|
|
70
|
+
if (method === "median") {
|
|
71
|
+
thr = median(values) * factor;
|
|
72
|
+
} else {
|
|
73
|
+
const { mean, std } = meanStd(values);
|
|
74
|
+
thr = mean + factor * std;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const minIntervalSec = options.minIntervalSec ?? 0;
|
|
79
|
+
|
|
80
|
+
const out: PeakPickEvent[] = [];
|
|
81
|
+
|
|
82
|
+
let lastPeakTime = -Infinity;
|
|
83
|
+
|
|
84
|
+
for (let i = 1; i < n - 1; i++) {
|
|
85
|
+
const v = values[i] ?? 0;
|
|
86
|
+
if (!(v >= thr)) continue;
|
|
87
|
+
|
|
88
|
+
const prev = values[i - 1] ?? 0;
|
|
89
|
+
const next = values[i + 1] ?? 0;
|
|
90
|
+
|
|
91
|
+
const isMax = strict ? v > prev && v > next : v >= prev && v >= next;
|
|
92
|
+
if (!isMax) continue;
|
|
93
|
+
|
|
94
|
+
const t = times[i] ?? 0;
|
|
95
|
+
if (t - lastPeakTime < minIntervalSec) {
|
|
96
|
+
// If we're within the minimum interval, keep the stronger peak.
|
|
97
|
+
const last = out[out.length - 1];
|
|
98
|
+
if (last && v > last.strength) {
|
|
99
|
+
last.time = t;
|
|
100
|
+
last.strength = v;
|
|
101
|
+
last.index = i;
|
|
102
|
+
lastPeakTime = t;
|
|
103
|
+
}
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
out.push({ time: t, strength: v, index: i });
|
|
108
|
+
lastPeakTime = t;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return out;
|
|
112
|
+
}
|