@octoseq/mir 0.1.0-main.2e286ce → 0.1.0-main.4baa7cd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DUWYCAVG.js → chunk-KIGWMJLC.js} +774 -368
- package/dist/chunk-KIGWMJLC.js.map +1 -0
- package/dist/index.d.ts +115 -4
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/{runMir-CSIBwNZ3.d.ts → runMir-CVEIxPd3.d.ts} +1 -1
- package/dist/runner/runMir.d.ts +2 -2
- package/dist/runner/runMir.js +1 -1
- package/dist/runner/workerProtocol.d.ts +8 -1
- package/dist/runner/workerProtocol.js.map +1 -1
- package/dist/types-4bAZI4F7.d.ts +190 -0
- package/package.json +1 -1
- package/src/dsp/beatCandidates.ts +299 -0
- package/src/dsp/tempoHypotheses.ts +395 -0
- package/src/index.ts +21 -1
- package/src/runner/runMir.ts +72 -0
- package/src/runner/workerProtocol.ts +9 -1
- package/src/types.ts +119 -1
- package/dist/chunk-DUWYCAVG.js.map +0 -1
- package/dist/types-BE3py4fZ.d.ts +0 -83
package/dist/runner/runMir.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { b as RunMirBackendOptions, R as RunMirOptions, r as runMir } from '../runMir-
|
|
2
|
-
import '../types-
|
|
1
|
+
export { b as RunMirBackendOptions, R as RunMirOptions, r as runMir } from '../runMir-CVEIxPd3.js';
|
|
2
|
+
import '../types-4bAZI4F7.js';
|
package/dist/runner/runMir.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { g as MirRunRequest, e as MirResult, h as MirAudioPayload } from '../types-
|
|
1
|
+
import { g as MirRunRequest, e as MirResult, B as BeatCandidate, T as TempoHypothesis, h as MirAudioPayload } from '../types-4bAZI4F7.js';
|
|
2
2
|
|
|
3
3
|
type MirWorkerInitMessage = {
|
|
4
4
|
type: "INIT";
|
|
@@ -85,6 +85,13 @@ type MirWorkerResultMessage = {
|
|
|
85
85
|
strength: number;
|
|
86
86
|
index: number;
|
|
87
87
|
}>;
|
|
88
|
+
candidates?: BeatCandidate[];
|
|
89
|
+
hypotheses?: TempoHypothesis[];
|
|
90
|
+
inputCandidateCount?: number;
|
|
91
|
+
histogram?: {
|
|
92
|
+
bpmBins: ArrayBufferLike;
|
|
93
|
+
counts: ArrayBufferLike;
|
|
94
|
+
};
|
|
88
95
|
meta: MirResult["meta"];
|
|
89
96
|
};
|
|
90
97
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/runner/workerProtocol.ts"],"names":[],"mappings":";
|
|
1
|
+
{"version":3,"sources":["../../src/runner/workerProtocol.ts"],"names":[],"mappings":";AA+LO,SAAS,oBAAoB,CAAA,EAAkD;AAClF,EAAA,OAAO;AAAA,IACH,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,IAAA,EAAM,IAAI,YAAA,CAAa,CAAA,CAAE,IAAmB;AAAA,GAChD;AACJ","file":"workerProtocol.js","sourcesContent":["import type { BeatCandidate, MirAudioPayload, MirResult, MirRunRequest, TempoHypothesis } from \"../types\";\n\nexport type MirWorkerInitMessage = {\n type: \"INIT\";\n enableGpu: boolean;\n};\n\nexport type MirWorkerRunMessage = {\n type: \"RUN\";\n jobId: string;\n request: MirRunRequest;\n audio: {\n sampleRate: number;\n mono: ArrayBufferLike; // transferred\n };\n enableGpu: boolean;\n strictGpu?: boolean;\n};\n\nexport type MirWorkerCancelMessage = {\n type: \"CANCEL\";\n jobId: string;\n};\n\nexport type MirWorkerSearchMessage = {\n type: \"SEARCH\";\n jobId: string;\n\n audio: {\n sampleRate: number;\n mono: ArrayBufferLike; // transferred\n };\n\n query: {\n t0: number;\n t1: number;\n };\n\n /** Search tuning (kept small and explicit, like MirRunRequest). */\n search?: {\n hopSec?: number;\n threshold?: number;\n /** 0..1; if true, skip windows overlapping the query itself. */\n skipOverlap?: boolean;\n weights?: {\n mel?: number;\n transient?: number;\n mfcc?: number;\n };\n /** Optional: apply softmax to similarity curve before returning. */\n applySoftmax?: boolean;\n };\n\n /** Feature extraction config (re-uses existing MIR request knobs). */\n features?: {\n spectrogram?: MirRunRequest[\"spectrogram\"];\n mel?: MirRunRequest[\"mel\"];\n onset?: MirRunRequest[\"onset\"];\n mfcc?: MirRunRequest[\"mfcc\"];\n };\n\n /**\n * Optional human-in-the-loop refinement data.\n * When enabled, the worker can use accepted/rejected exemplars to produce a\n * per-track confidence curve and a re-ranked candidate list.\n */\n refinement?: {\n enabled?: boolean;\n includeQueryAsPositive?: boolean;\n labels?: Array<{\n t0: number;\n t1: number;\n status: \"accepted\" | \"rejected\";\n source: \"auto\" | \"manual\";\n }>;\n };\n\n enableGpu: boolean;\n strictGpu?: boolean;\n};\n\nexport type MirWorkerInMessage = MirWorkerInitMessage | MirWorkerRunMessage | MirWorkerSearchMessage | MirWorkerCancelMessage;\n\nexport type MirWorkerResultMessage = {\n type: \"RESULT\";\n jobId: string;\n /** Total time spent in the worker handling this RUN, including (optional) GPU readback. */\n workerTotalMs: number;\n result: {\n // Mirror MirResult but transfer underlying buffers.\n kind: MirResult[\"kind\"];\n times: ArrayBufferLike;\n values?: ArrayBufferLike;\n data2d?: ArrayBufferLike[];\n events?: Array<{ time: number; strength: number; index: number }>;\n candidates?: BeatCandidate[];\n // For tempoHypotheses\n hypotheses?: TempoHypothesis[];\n inputCandidateCount?: number;\n histogram?: {\n bpmBins: ArrayBufferLike;\n counts: ArrayBufferLike;\n };\n meta: MirResult[\"meta\"];\n };\n};\n\nexport type MirWorkerErrorMessage = {\n type: \"ERROR\";\n jobId: string;\n message: string;\n stack?: string;\n};\n\nexport type MirWorkerLogMessage = {\n type: \"LOG\";\n jobId?: string;\n level: \"debug\" | \"info\" | \"warn\" | \"error\";\n message: string;\n data?: unknown;\n};\n\nexport type MirWorkerSearchResultMessage = {\n type: \"SEARCH_RESULT\";\n jobId: string;\n timings: {\n fingerprintMs: number;\n scanMs: number;\n modelMs?: number;\n totalMs: number;\n };\n result: {\n times: ArrayBufferLike;\n scores: ArrayBufferLike;\n curveKind: \"similarity\" | \"confidence\";\n model: {\n kind: \"baseline\" | \"prototype\" | \"logistic\";\n positives: number;\n negatives: number;\n weightL2?: {\n mel: number;\n melForeground: number;\n melContrast?: number;\n onset: number;\n onsetForeground: number;\n onsetContrast?: number;\n mfcc?: number;\n mfccForeground?: number;\n mfccContrast?: number;\n };\n training?: {\n iterations: number;\n finalLoss: number;\n };\n };\n candidates: Array<{\n timeSec: number;\n score: number;\n windowStartSec: number;\n windowEndSec: number;\n explain?: {\n groupLogit?: {\n logit: number;\n bias: number;\n mel: number;\n melForeground: number;\n melContrast?: number;\n onset: number;\n onsetForeground: number;\n onsetContrast?: number;\n mfcc?: number;\n mfccForeground?: number;\n mfccContrast?: number;\n };\n };\n }>;\n meta: {\n windowSec: number;\n hopSec: number;\n skippedWindows: number;\n scannedWindows: number;\n };\n };\n};\n\nexport type MirWorkerOutMessage =\n | MirWorkerResultMessage\n | MirWorkerSearchResultMessage\n | MirWorkerErrorMessage\n | MirWorkerLogMessage;\n\nexport function rebuildAudioPayload(a: MirWorkerRunMessage[\"audio\"]): MirAudioPayload {\n return {\n sampleRate: a.sampleRate,\n mono: new Float32Array(a.mono as ArrayBuffer),\n };\n}\n"]}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
type MirBackend = "cpu" | "gpu";
|
|
2
|
+
type MirRunTimings = {
|
|
3
|
+
totalMs: number;
|
|
4
|
+
cpuMs?: number;
|
|
5
|
+
gpuMs?: number;
|
|
6
|
+
};
|
|
7
|
+
type MirRunMeta = {
|
|
8
|
+
backend: MirBackend;
|
|
9
|
+
usedGpu: boolean;
|
|
10
|
+
timings: MirRunTimings;
|
|
11
|
+
};
|
|
12
|
+
type Mir1DResult = {
|
|
13
|
+
kind: "1d";
|
|
14
|
+
times: Float32Array;
|
|
15
|
+
values: Float32Array;
|
|
16
|
+
meta: MirRunMeta;
|
|
17
|
+
};
|
|
18
|
+
type Mir2DResult = {
|
|
19
|
+
kind: "2d";
|
|
20
|
+
times: Float32Array;
|
|
21
|
+
data: Float32Array[];
|
|
22
|
+
meta: MirRunMeta;
|
|
23
|
+
};
|
|
24
|
+
type MirEvent = {
|
|
25
|
+
time: number;
|
|
26
|
+
strength: number;
|
|
27
|
+
index: number;
|
|
28
|
+
};
|
|
29
|
+
type MirEventsResult = {
|
|
30
|
+
kind: "events";
|
|
31
|
+
times: Float32Array;
|
|
32
|
+
events: MirEvent[];
|
|
33
|
+
meta: MirRunMeta;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* A beat candidate represents a plausible beat-like moment in the audio.
|
|
37
|
+
*
|
|
38
|
+
* These are sparse events (timestamps) that may or may not correspond to
|
|
39
|
+
* actual beats. They are not tempo-aligned and do not imply any BPM value.
|
|
40
|
+
*
|
|
41
|
+
* Beat candidates are intended to be:
|
|
42
|
+
* - Dense enough to include most true beats
|
|
43
|
+
* - Sparse enough to be computationally tractable
|
|
44
|
+
* - Inspectable in the UI for debugging
|
|
45
|
+
*
|
|
46
|
+
* Future milestones will cluster, align, and refine these candidates.
|
|
47
|
+
*/
|
|
48
|
+
type BeatCandidate = {
|
|
49
|
+
/** Time in seconds from track start. */
|
|
50
|
+
time: number;
|
|
51
|
+
/** Relative salience/confidence (0-1 normalized). Higher = more likely to be a beat. */
|
|
52
|
+
strength: number;
|
|
53
|
+
/** Source of this candidate (for debugging/inspection). */
|
|
54
|
+
source: BeatCandidateSource;
|
|
55
|
+
};
|
|
56
|
+
type BeatCandidateSource = "onset_peak" | "flux_peak" | "combined";
|
|
57
|
+
type BeatCandidatesResult = {
|
|
58
|
+
kind: "beatCandidates";
|
|
59
|
+
/** Frame times from the underlying analysis (for alignment). */
|
|
60
|
+
times: Float32Array;
|
|
61
|
+
/** The beat candidate events. */
|
|
62
|
+
candidates: BeatCandidate[];
|
|
63
|
+
/** Optional: the salience signal used for peak picking (for debugging). */
|
|
64
|
+
salience?: {
|
|
65
|
+
times: Float32Array;
|
|
66
|
+
values: Float32Array;
|
|
67
|
+
};
|
|
68
|
+
meta: MirRunMeta;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* A tempo hypothesis represents a plausible BPM with confidence score.
|
|
72
|
+
*
|
|
73
|
+
* Hypotheses are derived from inter-onset intervals of beat candidates.
|
|
74
|
+
* They are grouped into harmonic families (e.g., 60, 120, 180 BPM) but
|
|
75
|
+
* not collapsed - each BPM is preserved as a separate hypothesis.
|
|
76
|
+
*/
|
|
77
|
+
type TempoHypothesis = {
|
|
78
|
+
/** Deterministic identifier for this hypothesis (e.g., "hyp-0"). */
|
|
79
|
+
id: string;
|
|
80
|
+
/** Tempo in beats per minute (0.1 BPM precision). */
|
|
81
|
+
bpm: number;
|
|
82
|
+
/** Confidence score normalized to [0, 1]. Higher = more likely. */
|
|
83
|
+
confidence: number;
|
|
84
|
+
/** Evidence metadata for debugging/inspection. */
|
|
85
|
+
evidence: TempoHypothesisEvidence;
|
|
86
|
+
/** Harmonic family ID - hypotheses in the same family are harmonically related. */
|
|
87
|
+
familyId: string;
|
|
88
|
+
/** Harmonic relationship to the family root (1.0 = root, 2.0 = double, 0.5 = half, etc.). */
|
|
89
|
+
harmonicRatio: number;
|
|
90
|
+
};
|
|
91
|
+
type TempoHypothesisEvidence = {
|
|
92
|
+
/** Number of IOIs supporting this tempo. */
|
|
93
|
+
supportingIntervalCount: number;
|
|
94
|
+
/** Sum of weighted contributions (if strength-weighting enabled). */
|
|
95
|
+
weightedSupport: number;
|
|
96
|
+
/** Peak height in the histogram. */
|
|
97
|
+
peakHeight: number;
|
|
98
|
+
/** Histogram bin range [minBpm, maxBpm]. */
|
|
99
|
+
binRange: [number, number];
|
|
100
|
+
};
|
|
101
|
+
type TempoHypothesesResult = {
|
|
102
|
+
kind: "tempoHypotheses";
|
|
103
|
+
/** Frame times from underlying analysis (for alignment). */
|
|
104
|
+
times: Float32Array;
|
|
105
|
+
/** Ordered list of tempo hypotheses (by confidence descending). */
|
|
106
|
+
hypotheses: TempoHypothesis[];
|
|
107
|
+
/** The number of beat candidates used as input. */
|
|
108
|
+
inputCandidateCount: number;
|
|
109
|
+
/** Histogram data for debugging/visualization. */
|
|
110
|
+
histogram?: {
|
|
111
|
+
bpmBins: Float32Array;
|
|
112
|
+
counts: Float32Array;
|
|
113
|
+
};
|
|
114
|
+
meta: MirRunMeta;
|
|
115
|
+
};
|
|
116
|
+
type MirResult = Mir1DResult | Mir2DResult | MirEventsResult | BeatCandidatesResult | TempoHypothesesResult;
|
|
117
|
+
type MirFunctionId = "spectralCentroid" | "spectralFlux" | "melSpectrogram" | "onsetEnvelope" | "onsetPeaks" | "beatCandidates" | "tempoHypotheses" | "hpssHarmonic" | "hpssPercussive" | "mfcc" | "mfccDelta" | "mfccDeltaDelta";
|
|
118
|
+
type MirRunRequest = {
|
|
119
|
+
fn: MirFunctionId;
|
|
120
|
+
spectrogram?: {
|
|
121
|
+
fftSize: number;
|
|
122
|
+
hopSize: number;
|
|
123
|
+
window: "hann";
|
|
124
|
+
};
|
|
125
|
+
mel?: {
|
|
126
|
+
nMels: number;
|
|
127
|
+
fMin?: number;
|
|
128
|
+
fMax?: number;
|
|
129
|
+
};
|
|
130
|
+
backend?: MirBackend;
|
|
131
|
+
onset?: {
|
|
132
|
+
smoothMs?: number;
|
|
133
|
+
diffMethod?: "rectified" | "abs";
|
|
134
|
+
useLog?: boolean;
|
|
135
|
+
};
|
|
136
|
+
peakPick?: {
|
|
137
|
+
minIntervalSec?: number;
|
|
138
|
+
threshold?: number;
|
|
139
|
+
adaptiveFactor?: number;
|
|
140
|
+
};
|
|
141
|
+
hpss?: {
|
|
142
|
+
timeMedian?: number;
|
|
143
|
+
freqMedian?: number;
|
|
144
|
+
spectrogram?: {
|
|
145
|
+
fftSize: number;
|
|
146
|
+
hopSize: number;
|
|
147
|
+
window: "hann";
|
|
148
|
+
};
|
|
149
|
+
};
|
|
150
|
+
mfcc?: {
|
|
151
|
+
nCoeffs?: number;
|
|
152
|
+
spectrogram?: {
|
|
153
|
+
fftSize: number;
|
|
154
|
+
hopSize: number;
|
|
155
|
+
window: "hann";
|
|
156
|
+
};
|
|
157
|
+
};
|
|
158
|
+
beatCandidates?: {
|
|
159
|
+
/** Minimum inter-candidate interval in seconds. Default: 0.1 (100ms). */
|
|
160
|
+
minIntervalSec?: number;
|
|
161
|
+
/** Threshold factor for peak detection. Lower = more candidates. Default: 0.5. */
|
|
162
|
+
thresholdFactor?: number;
|
|
163
|
+
/** Smoothing window for salience signal in ms. Default: 50. */
|
|
164
|
+
smoothMs?: number;
|
|
165
|
+
/** Whether to include the salience signal in output (for debugging). */
|
|
166
|
+
includeSalience?: boolean;
|
|
167
|
+
};
|
|
168
|
+
tempoHypotheses?: {
|
|
169
|
+
/** Minimum BPM to consider. Default: 24. */
|
|
170
|
+
minBpm?: number;
|
|
171
|
+
/** Maximum BPM to consider. Default: 300. */
|
|
172
|
+
maxBpm?: number;
|
|
173
|
+
/** Histogram bin size in BPM. Default: 1.0. */
|
|
174
|
+
binSizeBpm?: number;
|
|
175
|
+
/** Maximum number of hypotheses to return. Default: 10. */
|
|
176
|
+
maxHypotheses?: number;
|
|
177
|
+
/** Minimum confidence threshold (0-1). Default: 0.05. */
|
|
178
|
+
minConfidence?: number;
|
|
179
|
+
/** Weight IOIs by beat candidate strength. Default: true. */
|
|
180
|
+
weightByStrength?: boolean;
|
|
181
|
+
/** Include histogram in output for debugging. Default: false. */
|
|
182
|
+
includeHistogram?: boolean;
|
|
183
|
+
};
|
|
184
|
+
};
|
|
185
|
+
type MirAudioPayload = {
|
|
186
|
+
sampleRate: number;
|
|
187
|
+
mono: Float32Array;
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
export type { BeatCandidate as B, MirBackend as M, TempoHypothesis as T, MirRunTimings as a, MirRunMeta as b, Mir1DResult as c, Mir2DResult as d, MirResult as e, MirFunctionId as f, MirRunRequest as g, MirAudioPayload as h, BeatCandidateSource as i, BeatCandidatesResult as j, TempoHypothesisEvidence as k, TempoHypothesesResult as l };
|
package/package.json
CHANGED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import type { MelSpectrogram } from "./mel";
|
|
2
|
+
import type { OnsetEnvelope } from "./onset";
|
|
3
|
+
import { onsetEnvelopeFromMel } from "./onset";
|
|
4
|
+
import type { Spectrogram } from "./spectrogram";
|
|
5
|
+
import { spectralFlux } from "./spectral";
|
|
6
|
+
import type { BeatCandidate, BeatCandidateSource } from "../types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Configuration for beat candidate detection.
|
|
10
|
+
*/
|
|
11
|
+
export type BeatCandidatesOptions = {
|
|
12
|
+
/** Minimum inter-candidate interval in seconds. Default: 0.1 (100ms). */
|
|
13
|
+
minIntervalSec?: number;
|
|
14
|
+
/** Threshold factor for adaptive peak detection. Lower = more candidates. Default: 0.5. */
|
|
15
|
+
thresholdFactor?: number;
|
|
16
|
+
/** Smoothing window for salience signal in ms. Default: 50. */
|
|
17
|
+
smoothMs?: number;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Result of beat candidate detection.
|
|
22
|
+
*/
|
|
23
|
+
export type BeatCandidatesOutput = {
|
|
24
|
+
candidates: BeatCandidate[];
|
|
25
|
+
/** The computed salience signal (for debugging/visualization). */
|
|
26
|
+
salience: {
|
|
27
|
+
times: Float32Array;
|
|
28
|
+
values: Float32Array;
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Compute a beat-oriented salience signal from mel spectrogram.
|
|
34
|
+
*
|
|
35
|
+
* This combines:
|
|
36
|
+
* - Onset envelope (captures transients/attacks)
|
|
37
|
+
* - Spectral flux from the underlying spectrogram (captures spectral change)
|
|
38
|
+
*
|
|
39
|
+
* The signals are normalized and combined to produce a single salience curve
|
|
40
|
+
* suitable for peak picking.
|
|
41
|
+
*
|
|
42
|
+
* Key design choices:
|
|
43
|
+
* - Whole-track normalization (z-score) for consistent behavior
|
|
44
|
+
* - Gentle smoothing to suppress micro-transients while preserving beat structure
|
|
45
|
+
* - No BPM inference or grid assumptions
|
|
46
|
+
*/
|
|
47
|
+
export type BeatSalienceSignal = {
|
|
48
|
+
times: Float32Array;
|
|
49
|
+
values: Float32Array;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
function movingAverage(values: Float32Array, windowFrames: number): Float32Array {
|
|
53
|
+
if (windowFrames <= 1) return values;
|
|
54
|
+
|
|
55
|
+
const n = values.length;
|
|
56
|
+
const out = new Float32Array(n);
|
|
57
|
+
|
|
58
|
+
const half = Math.floor(windowFrames / 2);
|
|
59
|
+
|
|
60
|
+
// Prefix sums for O(n) moving average.
|
|
61
|
+
const prefix = new Float64Array(n + 1);
|
|
62
|
+
prefix[0] = 0;
|
|
63
|
+
for (let i = 0; i < n; i++) {
|
|
64
|
+
prefix[i + 1] = (prefix[i] ?? 0) + (values[i] ?? 0);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
for (let i = 0; i < n; i++) {
|
|
68
|
+
const start = Math.max(0, i - half);
|
|
69
|
+
const end = Math.min(n, i + half + 1);
|
|
70
|
+
const sum = (prefix[end] ?? 0) - (prefix[start] ?? 0);
|
|
71
|
+
const count = Math.max(1, end - start);
|
|
72
|
+
out[i] = sum / count;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return out;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function meanStd(values: Float32Array): { mean: number; std: number } {
|
|
79
|
+
const n = values.length;
|
|
80
|
+
if (n <= 0) return { mean: 0, std: 0 };
|
|
81
|
+
|
|
82
|
+
let mean = 0;
|
|
83
|
+
for (let i = 0; i < n; i++) mean += values[i] ?? 0;
|
|
84
|
+
mean /= n;
|
|
85
|
+
|
|
86
|
+
let varSum = 0;
|
|
87
|
+
for (let i = 0; i < n; i++) {
|
|
88
|
+
const d = (values[i] ?? 0) - mean;
|
|
89
|
+
varSum += d * d;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const std = Math.sqrt(varSum / n);
|
|
93
|
+
return { mean, std };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Z-score normalize a signal (whole-track normalization).
|
|
98
|
+
* Result has mean ~0 and std ~1.
|
|
99
|
+
*/
|
|
100
|
+
function zScoreNormalize(values: Float32Array): Float32Array {
|
|
101
|
+
const { mean, std } = meanStd(values);
|
|
102
|
+
const n = values.length;
|
|
103
|
+
const out = new Float32Array(n);
|
|
104
|
+
|
|
105
|
+
if (std === 0 || !Number.isFinite(std)) {
|
|
106
|
+
// Degenerate case: all values are the same
|
|
107
|
+
out.fill(0);
|
|
108
|
+
return out;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
for (let i = 0; i < n; i++) {
|
|
112
|
+
out[i] = ((values[i] ?? 0) - mean) / std;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return out;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Min-max normalize to [0, 1] range.
|
|
120
|
+
*/
|
|
121
|
+
function minMaxNormalize(values: Float32Array): Float32Array {
|
|
122
|
+
const n = values.length;
|
|
123
|
+
if (n === 0) return new Float32Array(0);
|
|
124
|
+
|
|
125
|
+
let min = Infinity;
|
|
126
|
+
let max = -Infinity;
|
|
127
|
+
for (let i = 0; i < n; i++) {
|
|
128
|
+
const v = values[i] ?? 0;
|
|
129
|
+
if (v < min) min = v;
|
|
130
|
+
if (v > max) max = v;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const out = new Float32Array(n);
|
|
134
|
+
const range = max - min;
|
|
135
|
+
|
|
136
|
+
if (range === 0 || !Number.isFinite(range)) {
|
|
137
|
+
out.fill(0.5);
|
|
138
|
+
return out;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
for (let i = 0; i < n; i++) {
|
|
142
|
+
out[i] = ((values[i] ?? 0) - min) / range;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return out;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Compute beat salience signal from mel spectrogram.
|
|
150
|
+
*
|
|
151
|
+
* This is an intermediate signal suitable for peak picking to extract
|
|
152
|
+
* beat candidates. It combines onset envelope with additional smoothing
|
|
153
|
+
* tuned for beat-like (rather than onset-like) detection.
|
|
154
|
+
*/
|
|
155
|
+
export function beatSalienceFromMel(
|
|
156
|
+
mel: MelSpectrogram,
|
|
157
|
+
spec: Spectrogram,
|
|
158
|
+
options?: { smoothMs?: number }
|
|
159
|
+
): BeatSalienceSignal {
|
|
160
|
+
const smoothMs = options?.smoothMs ?? 50;
|
|
161
|
+
|
|
162
|
+
// Compute onset envelope with more smoothing than default onset detection.
|
|
163
|
+
// We want to capture the "attack envelope" of beats, not individual onsets.
|
|
164
|
+
const onset = onsetEnvelopeFromMel(mel, {
|
|
165
|
+
smoothMs: smoothMs,
|
|
166
|
+
diffMethod: "rectified",
|
|
167
|
+
useLog: false,
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// Compute spectral flux from the spectrogram.
|
|
171
|
+
const flux = spectralFlux(spec);
|
|
172
|
+
|
|
173
|
+
// Ensure times align (they should, but be defensive).
|
|
174
|
+
const n = Math.min(onset.times.length, flux.length);
|
|
175
|
+
|
|
176
|
+
// Z-score normalize both signals for equal contribution.
|
|
177
|
+
const onsetNorm = zScoreNormalize(onset.values.subarray(0, n));
|
|
178
|
+
const fluxNorm = zScoreNormalize(flux.subarray(0, n));
|
|
179
|
+
|
|
180
|
+
// Combine: weighted sum favoring onset envelope (it's more beat-specific).
|
|
181
|
+
const combined = new Float32Array(n);
|
|
182
|
+
const onsetWeight = 0.7;
|
|
183
|
+
const fluxWeight = 0.3;
|
|
184
|
+
|
|
185
|
+
for (let i = 0; i < n; i++) {
|
|
186
|
+
combined[i] = onsetWeight * (onsetNorm[i] ?? 0) + fluxWeight * (fluxNorm[i] ?? 0);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Apply final smoothing to reduce micro-peaks.
|
|
190
|
+
const dt = n >= 2 ? ((onset.times[1] ?? 0) - (onset.times[0] ?? 0)) : 0.01;
|
|
191
|
+
const windowFrames = Math.max(1, Math.round((smoothMs / 1000) / Math.max(1e-9, dt)));
|
|
192
|
+
const smoothed = movingAverage(combined, windowFrames | 1);
|
|
193
|
+
|
|
194
|
+
// Normalize to [0, 1] for consistent interpretation.
|
|
195
|
+
const normalized = minMaxNormalize(smoothed);
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
times: onset.times.subarray(0, n),
|
|
199
|
+
values: normalized,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Pick peaks from the salience signal to extract beat candidates.
|
|
205
|
+
*
|
|
206
|
+
* Uses relaxed parameters to err on the side of too many candidates.
|
|
207
|
+
* The goal is coverage, not precision.
|
|
208
|
+
*/
|
|
209
|
+
function pickBeatCandidates(
|
|
210
|
+
salience: BeatSalienceSignal,
|
|
211
|
+
options: BeatCandidatesOptions,
|
|
212
|
+
source: BeatCandidateSource
|
|
213
|
+
): BeatCandidate[] {
|
|
214
|
+
const minIntervalSec = options.minIntervalSec ?? 0.1;
|
|
215
|
+
const thresholdFactor = options.thresholdFactor ?? 0.5;
|
|
216
|
+
|
|
217
|
+
const { times, values } = salience;
|
|
218
|
+
const n = values.length;
|
|
219
|
+
|
|
220
|
+
if (n < 3) return [];
|
|
221
|
+
|
|
222
|
+
// Compute adaptive threshold based on signal statistics.
|
|
223
|
+
const { mean, std } = meanStd(values);
|
|
224
|
+
// Low threshold to get dense candidates.
|
|
225
|
+
// thresholdFactor of 0.5 means: mean + 0.5*std (quite low).
|
|
226
|
+
const threshold = mean + thresholdFactor * std;
|
|
227
|
+
|
|
228
|
+
const candidates: BeatCandidate[] = [];
|
|
229
|
+
let lastPeakTime = -Infinity;
|
|
230
|
+
|
|
231
|
+
for (let i = 1; i < n - 1; i++) {
|
|
232
|
+
const v = values[i] ?? 0;
|
|
233
|
+
|
|
234
|
+
// Must be above threshold.
|
|
235
|
+
if (v < threshold) continue;
|
|
236
|
+
|
|
237
|
+
// Must be a local maximum.
|
|
238
|
+
const prev = values[i - 1] ?? 0;
|
|
239
|
+
const next = values[i + 1] ?? 0;
|
|
240
|
+
if (!(v > prev && v > next)) continue;
|
|
241
|
+
|
|
242
|
+
const t = times[i] ?? 0;
|
|
243
|
+
|
|
244
|
+
// Enforce minimum interval.
|
|
245
|
+
if (t - lastPeakTime < minIntervalSec) {
|
|
246
|
+
// If within interval, keep the stronger peak.
|
|
247
|
+
const last = candidates[candidates.length - 1];
|
|
248
|
+
if (last && v > last.strength) {
|
|
249
|
+
last.time = t;
|
|
250
|
+
last.strength = v;
|
|
251
|
+
}
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
candidates.push({
|
|
256
|
+
time: t,
|
|
257
|
+
strength: v,
|
|
258
|
+
source,
|
|
259
|
+
});
|
|
260
|
+
lastPeakTime = t;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return candidates;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Detect beat candidates from mel spectrogram and spectrogram.
|
|
268
|
+
*
|
|
269
|
+
* This is the main entry point for beat candidate detection.
|
|
270
|
+
*
|
|
271
|
+
* Design principles:
|
|
272
|
+
* - Dense candidates (err on side of too many)
|
|
273
|
+
* - No BPM inference
|
|
274
|
+
* - No grid assumptions
|
|
275
|
+
* - Whole-track normalization for consistency
|
|
276
|
+
* - Deterministic (same input -> same output)
|
|
277
|
+
*/
|
|
278
|
+
export function detectBeatCandidates(
|
|
279
|
+
mel: MelSpectrogram,
|
|
280
|
+
spec: Spectrogram,
|
|
281
|
+
options?: BeatCandidatesOptions
|
|
282
|
+
): BeatCandidatesOutput {
|
|
283
|
+
const opts: BeatCandidatesOptions = {
|
|
284
|
+
minIntervalSec: options?.minIntervalSec ?? 0.1,
|
|
285
|
+
thresholdFactor: options?.thresholdFactor ?? 0.5,
|
|
286
|
+
smoothMs: options?.smoothMs ?? 50,
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
// Compute beat salience signal.
|
|
290
|
+
const salience = beatSalienceFromMel(mel, spec, { smoothMs: opts.smoothMs });
|
|
291
|
+
|
|
292
|
+
// Pick peaks from salience.
|
|
293
|
+
const candidates = pickBeatCandidates(salience, opts, "combined");
|
|
294
|
+
|
|
295
|
+
return {
|
|
296
|
+
candidates,
|
|
297
|
+
salience,
|
|
298
|
+
};
|
|
299
|
+
}
|