@octoseq/mir 0.1.0-main.2e286ce → 0.1.0-main.4baa7cd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DUWYCAVG.js → chunk-KIGWMJLC.js} +774 -368
- package/dist/chunk-KIGWMJLC.js.map +1 -0
- package/dist/index.d.ts +115 -4
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/{runMir-CSIBwNZ3.d.ts → runMir-CVEIxPd3.d.ts} +1 -1
- package/dist/runner/runMir.d.ts +2 -2
- package/dist/runner/runMir.js +1 -1
- package/dist/runner/workerProtocol.d.ts +8 -1
- package/dist/runner/workerProtocol.js.map +1 -1
- package/dist/types-4bAZI4F7.d.ts +190 -0
- package/package.json +1 -1
- package/src/dsp/beatCandidates.ts +299 -0
- package/src/dsp/tempoHypotheses.ts +395 -0
- package/src/index.ts +21 -1
- package/src/runner/runMir.ts +72 -0
- package/src/runner/workerProtocol.ts +9 -1
- package/src/types.ts +119 -1
- package/dist/chunk-DUWYCAVG.js.map +0 -1
- package/dist/types-BE3py4fZ.d.ts +0 -83
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
import type { BeatCandidate, TempoHypothesis, TempoHypothesisEvidence } from "../types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration for tempo hypothesis generation.
|
|
5
|
+
*/
|
|
6
|
+
export type TempoHypothesesOptions = {
|
|
7
|
+
/** Minimum BPM to consider. Default: 24. */
|
|
8
|
+
minBpm?: number;
|
|
9
|
+
/** Maximum BPM to consider. Default: 300. */
|
|
10
|
+
maxBpm?: number;
|
|
11
|
+
/** Histogram bin size in BPM. Default: 1.0. */
|
|
12
|
+
binSizeBpm?: number;
|
|
13
|
+
/** Maximum hypotheses to return. Default: 10. */
|
|
14
|
+
maxHypotheses?: number;
|
|
15
|
+
/** Minimum confidence threshold (0-1). Default: 0.05. */
|
|
16
|
+
minConfidence?: number;
|
|
17
|
+
/** Weight IOIs by candidate strength. Default: true. */
|
|
18
|
+
weightByStrength?: boolean;
|
|
19
|
+
/** Include histogram data in output. Default: false. */
|
|
20
|
+
includeHistogram?: boolean;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export type TempoHypothesesOutput = {
|
|
24
|
+
hypotheses: TempoHypothesis[];
|
|
25
|
+
inputCandidateCount: number;
|
|
26
|
+
histogram?: {
|
|
27
|
+
bpmBins: Float32Array;
|
|
28
|
+
counts: Float32Array;
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Convert interval (seconds) to BPM.
|
|
34
|
+
*/
|
|
35
|
+
function intervalToBpm(intervalSec: number): number {
|
|
36
|
+
return 60.0 / intervalSec;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Convert BPM to interval (seconds).
|
|
41
|
+
*/
|
|
42
|
+
function bpmToInterval(bpm: number): number {
|
|
43
|
+
return 60.0 / bpm;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
type IOI = { intervalSec: number; weight: number };
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Compute inter-onset intervals from beat candidates.
|
|
50
|
+
*
|
|
51
|
+
* @param candidates - Beat candidates sorted by time
|
|
52
|
+
* @param weightByStrength - Whether to weight by candidate strength
|
|
53
|
+
* @returns Array of { intervalSec, weight } pairs
|
|
54
|
+
*/
|
|
55
|
+
function computeIOIs(candidates: BeatCandidate[], weightByStrength: boolean): IOI[] {
|
|
56
|
+
if (candidates.length < 2) return [];
|
|
57
|
+
|
|
58
|
+
const iois: IOI[] = [];
|
|
59
|
+
|
|
60
|
+
// Sort candidates by time (should already be sorted, but be defensive)
|
|
61
|
+
const sorted = [...candidates].sort((a, b) => a.time - b.time);
|
|
62
|
+
|
|
63
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
64
|
+
const prev = sorted[i - 1]!;
|
|
65
|
+
const curr = sorted[i]!;
|
|
66
|
+
const interval = curr.time - prev.time;
|
|
67
|
+
|
|
68
|
+
// Skip invalid intervals
|
|
69
|
+
if (interval <= 0) continue;
|
|
70
|
+
|
|
71
|
+
// Weight is geometric mean of adjacent strengths, or 1.0 if not weighting
|
|
72
|
+
const weight = weightByStrength
|
|
73
|
+
? Math.sqrt(prev.strength * curr.strength)
|
|
74
|
+
: 1.0;
|
|
75
|
+
|
|
76
|
+
iois.push({ intervalSec: interval, weight });
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return iois;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Build a weighted histogram of BPM values from IOIs.
|
|
84
|
+
*
|
|
85
|
+
* @param iois - Inter-onset intervals with weights
|
|
86
|
+
* @param minBpm - Minimum BPM (determines max interval)
|
|
87
|
+
* @param maxBpm - Maximum BPM (determines min interval)
|
|
88
|
+
* @param binSizeBpm - Size of each histogram bin in BPM
|
|
89
|
+
* @returns { bins: center BPM of each bin, counts: weighted counts }
|
|
90
|
+
*/
|
|
91
|
+
function buildBpmHistogram(
|
|
92
|
+
iois: IOI[],
|
|
93
|
+
minBpm: number,
|
|
94
|
+
maxBpm: number,
|
|
95
|
+
binSizeBpm: number
|
|
96
|
+
): { bpmBins: Float32Array; counts: Float32Array } {
|
|
97
|
+
const numBins = Math.ceil((maxBpm - minBpm) / binSizeBpm);
|
|
98
|
+
const counts = new Float32Array(numBins);
|
|
99
|
+
const bpmBins = new Float32Array(numBins);
|
|
100
|
+
|
|
101
|
+
// Initialize bin centers
|
|
102
|
+
for (let i = 0; i < numBins; i++) {
|
|
103
|
+
bpmBins[i] = minBpm + (i + 0.5) * binSizeBpm;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Convert interval range to BPM range
|
|
107
|
+
const minInterval = bpmToInterval(maxBpm);
|
|
108
|
+
const maxInterval = bpmToInterval(minBpm);
|
|
109
|
+
|
|
110
|
+
for (const { intervalSec, weight } of iois) {
|
|
111
|
+
// Filter to plausible range
|
|
112
|
+
if (intervalSec < minInterval || intervalSec > maxInterval) continue;
|
|
113
|
+
|
|
114
|
+
const bpm = intervalToBpm(intervalSec);
|
|
115
|
+
const binIndex = Math.floor((bpm - minBpm) / binSizeBpm);
|
|
116
|
+
|
|
117
|
+
if (binIndex >= 0 && binIndex < numBins) {
|
|
118
|
+
counts[binIndex] = (counts[binIndex] ?? 0) + weight;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return { bpmBins, counts };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Find peaks in the histogram using local maximum detection.
|
|
127
|
+
*
|
|
128
|
+
* @param counts - Weighted counts per bin
|
|
129
|
+
* @param minHeight - Minimum peak height (absolute)
|
|
130
|
+
* @returns Array of peak indices sorted by height descending
|
|
131
|
+
*/
|
|
132
|
+
function findHistogramPeaks(counts: Float32Array, minHeight: number): number[] {
|
|
133
|
+
const peaks: Array<{ index: number; height: number }> = [];
|
|
134
|
+
|
|
135
|
+
for (let i = 1; i < counts.length - 1; i++) {
|
|
136
|
+
const curr = counts[i]!;
|
|
137
|
+
const prev = counts[i - 1]!;
|
|
138
|
+
const next = counts[i + 1]!;
|
|
139
|
+
|
|
140
|
+
// Local maximum
|
|
141
|
+
if (curr > prev && curr > next && curr >= minHeight) {
|
|
142
|
+
peaks.push({ index: i, height: curr });
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Also check boundary bins if they're high enough
|
|
147
|
+
if (counts.length > 0 && counts[0]! >= minHeight && counts[0]! > (counts[1] ?? 0)) {
|
|
148
|
+
peaks.push({ index: 0, height: counts[0]! });
|
|
149
|
+
}
|
|
150
|
+
if (counts.length > 1) {
|
|
151
|
+
const last = counts.length - 1;
|
|
152
|
+
if (counts[last]! >= minHeight && counts[last]! > (counts[last - 1] ?? 0)) {
|
|
153
|
+
peaks.push({ index: last, height: counts[last]! });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Sort by height descending
|
|
158
|
+
peaks.sort((a, b) => b.height - a.height);
|
|
159
|
+
|
|
160
|
+
return peaks.map((p) => p.index);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Merge adjacent peak bins to get refined BPM estimate.
|
|
165
|
+
* Uses weighted centroid of adjacent bins.
|
|
166
|
+
*/
|
|
167
|
+
function refinePeakBpm(
|
|
168
|
+
peakIndex: number,
|
|
169
|
+
bpmBins: Float32Array,
|
|
170
|
+
counts: Float32Array,
|
|
171
|
+
binSizeBpm: number
|
|
172
|
+
): { bpm: number; peakHeight: number; binRange: [number, number]; totalWeight: number } {
|
|
173
|
+
// Consider the peak bin and immediate neighbors
|
|
174
|
+
let totalWeight = 0;
|
|
175
|
+
let weightedBpm = 0;
|
|
176
|
+
let minBinBpm = bpmBins[peakIndex]! - binSizeBpm / 2;
|
|
177
|
+
let maxBinBpm = bpmBins[peakIndex]! + binSizeBpm / 2;
|
|
178
|
+
|
|
179
|
+
for (let offset = -1; offset <= 1; offset++) {
|
|
180
|
+
const idx = peakIndex + offset;
|
|
181
|
+
if (idx < 0 || idx >= bpmBins.length) continue;
|
|
182
|
+
|
|
183
|
+
const w = counts[idx]!;
|
|
184
|
+
const bpm = bpmBins[idx]!;
|
|
185
|
+
|
|
186
|
+
totalWeight += w;
|
|
187
|
+
weightedBpm += w * bpm;
|
|
188
|
+
|
|
189
|
+
if (w > 0) {
|
|
190
|
+
minBinBpm = Math.min(minBinBpm, bpm - binSizeBpm / 2);
|
|
191
|
+
maxBinBpm = Math.max(maxBinBpm, bpm + binSizeBpm / 2);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const refinedBpm = totalWeight > 0 ? weightedBpm / totalWeight : bpmBins[peakIndex]!;
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
bpm: refinedBpm,
|
|
199
|
+
peakHeight: counts[peakIndex]!,
|
|
200
|
+
binRange: [minBinBpm, maxBinBpm],
|
|
201
|
+
totalWeight,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Check if two BPMs are harmonically related (within tolerance).
|
|
207
|
+
* Returns the harmonic ratio if related, null otherwise.
|
|
208
|
+
*/
|
|
209
|
+
function getHarmonicRatio(bpm1: number, bpm2: number, tolerance: number = 0.03): number | null {
|
|
210
|
+
const ratios = [0.5, 1 / 3, 2 / 3, 1.0, 1.5, 2.0, 3.0];
|
|
211
|
+
|
|
212
|
+
for (const ratio of ratios) {
|
|
213
|
+
const expected = bpm1 * ratio;
|
|
214
|
+
const relativeError = Math.abs(bpm2 - expected) / expected;
|
|
215
|
+
if (relativeError <= tolerance) {
|
|
216
|
+
return ratio;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return null;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Group hypotheses into harmonic families.
|
|
225
|
+
* Assigns familyId and harmonicRatio to each hypothesis.
|
|
226
|
+
*
|
|
227
|
+
* Uses deterministic family IDs based on the root BPM.
|
|
228
|
+
*/
|
|
229
|
+
function assignHarmonicFamilies(hypotheses: TempoHypothesis[]): void {
|
|
230
|
+
if (hypotheses.length === 0) return;
|
|
231
|
+
|
|
232
|
+
const families: Map<string, { rootBpm: number; members: TempoHypothesis[] }> = new Map();
|
|
233
|
+
|
|
234
|
+
for (const hyp of hypotheses) {
|
|
235
|
+
let foundFamily = false;
|
|
236
|
+
|
|
237
|
+
for (const [familyId, family] of families) {
|
|
238
|
+
const ratio = getHarmonicRatio(family.rootBpm, hyp.bpm);
|
|
239
|
+
if (ratio !== null) {
|
|
240
|
+
hyp.familyId = familyId;
|
|
241
|
+
hyp.harmonicRatio = ratio;
|
|
242
|
+
family.members.push(hyp);
|
|
243
|
+
foundFamily = true;
|
|
244
|
+
break;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (!foundFamily) {
|
|
249
|
+
// Create new family with this hypothesis as root
|
|
250
|
+
// Use deterministic family ID based on root BPM
|
|
251
|
+
const familyId = `fam-${Math.round(hyp.bpm)}`;
|
|
252
|
+
hyp.familyId = familyId;
|
|
253
|
+
hyp.harmonicRatio = 1.0;
|
|
254
|
+
families.set(familyId, { rootBpm: hyp.bpm, members: [hyp] });
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Normalize confidence scores to [0, 1] range.
|
|
261
|
+
*/
|
|
262
|
+
function normalizeConfidence(hypotheses: TempoHypothesis[]): void {
|
|
263
|
+
if (hypotheses.length === 0) return;
|
|
264
|
+
|
|
265
|
+
const maxHeight = Math.max(...hypotheses.map((h) => h.evidence.peakHeight));
|
|
266
|
+
if (maxHeight <= 0) return;
|
|
267
|
+
|
|
268
|
+
for (const hyp of hypotheses) {
|
|
269
|
+
hyp.confidence = hyp.evidence.peakHeight / maxHeight;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Generate tempo hypotheses from beat candidates.
|
|
275
|
+
*
|
|
276
|
+
* Algorithm:
|
|
277
|
+
* 1. Compute inter-onset intervals (IOIs) from beat candidates
|
|
278
|
+
* 2. Filter IOIs to musically plausible range (0.2s-2.5s -> 24-300 BPM)
|
|
279
|
+
* 3. Build weighted histogram with configurable bin size
|
|
280
|
+
* 4. Extract peaks as tempo candidates
|
|
281
|
+
* 5. Refine BPM estimates using weighted centroid
|
|
282
|
+
* 6. Group into harmonic families
|
|
283
|
+
* 7. Normalize confidence scores
|
|
284
|
+
*
|
|
285
|
+
* @param candidates - Beat candidates from B1
|
|
286
|
+
* @param options - Configuration options
|
|
287
|
+
* @returns Tempo hypotheses with confidence and family groupings
|
|
288
|
+
*/
|
|
289
|
+
export function generateTempoHypotheses(
|
|
290
|
+
candidates: BeatCandidate[],
|
|
291
|
+
options?: TempoHypothesesOptions
|
|
292
|
+
): TempoHypothesesOutput {
|
|
293
|
+
const minBpm = options?.minBpm ?? 24;
|
|
294
|
+
const maxBpm = options?.maxBpm ?? 300;
|
|
295
|
+
const binSizeBpm = options?.binSizeBpm ?? 1.0;
|
|
296
|
+
const maxHypotheses = options?.maxHypotheses ?? 10;
|
|
297
|
+
const minConfidence = options?.minConfidence ?? 0.05;
|
|
298
|
+
const weightByStrength = options?.weightByStrength ?? true;
|
|
299
|
+
const includeHistogram = options?.includeHistogram ?? false;
|
|
300
|
+
|
|
301
|
+
// Early return if insufficient candidates
|
|
302
|
+
if (candidates.length < 2) {
|
|
303
|
+
return {
|
|
304
|
+
hypotheses: [],
|
|
305
|
+
inputCandidateCount: candidates.length,
|
|
306
|
+
histogram: includeHistogram
|
|
307
|
+
? {
|
|
308
|
+
bpmBins: new Float32Array(0),
|
|
309
|
+
counts: new Float32Array(0),
|
|
310
|
+
}
|
|
311
|
+
: undefined,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// Step 1: Compute IOIs
|
|
316
|
+
const iois = computeIOIs(candidates, weightByStrength);
|
|
317
|
+
|
|
318
|
+
if (iois.length === 0) {
|
|
319
|
+
return {
|
|
320
|
+
hypotheses: [],
|
|
321
|
+
inputCandidateCount: candidates.length,
|
|
322
|
+
histogram: includeHistogram
|
|
323
|
+
? {
|
|
324
|
+
bpmBins: new Float32Array(0),
|
|
325
|
+
counts: new Float32Array(0),
|
|
326
|
+
}
|
|
327
|
+
: undefined,
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Step 2-3: Build histogram (filtering happens during binning)
|
|
332
|
+
const { bpmBins, counts } = buildBpmHistogram(iois, minBpm, maxBpm, binSizeBpm);
|
|
333
|
+
|
|
334
|
+
// Calculate minimum height threshold based on minConfidence
|
|
335
|
+
const maxCount = Math.max(...counts);
|
|
336
|
+
const minHeight = maxCount * minConfidence;
|
|
337
|
+
|
|
338
|
+
// Step 4: Find peaks
|
|
339
|
+
const peakIndices = findHistogramPeaks(counts, minHeight);
|
|
340
|
+
|
|
341
|
+
// Step 5: Create hypotheses with refined BPM
|
|
342
|
+
const hypotheses: TempoHypothesis[] = [];
|
|
343
|
+
|
|
344
|
+
for (const peakIndex of peakIndices.slice(0, maxHypotheses * 2)) {
|
|
345
|
+
// Get extra for filtering
|
|
346
|
+
const { bpm, peakHeight, binRange, totalWeight } = refinePeakBpm(
|
|
347
|
+
peakIndex,
|
|
348
|
+
bpmBins,
|
|
349
|
+
counts,
|
|
350
|
+
binSizeBpm
|
|
351
|
+
);
|
|
352
|
+
|
|
353
|
+
// Skip if below confidence threshold
|
|
354
|
+
if (maxCount > 0 && peakHeight / maxCount < minConfidence) continue;
|
|
355
|
+
|
|
356
|
+
const evidence: TempoHypothesisEvidence = {
|
|
357
|
+
supportingIntervalCount: Math.round(totalWeight),
|
|
358
|
+
weightedSupport: totalWeight,
|
|
359
|
+
peakHeight,
|
|
360
|
+
binRange,
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
hypotheses.push({
|
|
364
|
+
id: "", // Will be assigned after sorting
|
|
365
|
+
bpm: Math.round(bpm * 10) / 10, // Round to 0.1 BPM precision
|
|
366
|
+
confidence: 0, // Will be normalized
|
|
367
|
+
evidence,
|
|
368
|
+
familyId: "", // Will be assigned
|
|
369
|
+
harmonicRatio: 1.0, // Will be assigned
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// Step 6: Group into harmonic families
|
|
374
|
+
assignHarmonicFamilies(hypotheses);
|
|
375
|
+
|
|
376
|
+
// Step 7: Normalize confidence
|
|
377
|
+
normalizeConfidence(hypotheses);
|
|
378
|
+
|
|
379
|
+
// Filter by minConfidence and sort by confidence descending
|
|
380
|
+
const filtered = hypotheses
|
|
381
|
+
.filter((h) => h.confidence >= minConfidence)
|
|
382
|
+
.sort((a, b) => b.confidence - a.confidence)
|
|
383
|
+
.slice(0, maxHypotheses);
|
|
384
|
+
|
|
385
|
+
// Assign deterministic IDs based on rank
|
|
386
|
+
for (let i = 0; i < filtered.length; i++) {
|
|
387
|
+
filtered[i]!.id = `hyp-${i}`;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return {
|
|
391
|
+
hypotheses: filtered,
|
|
392
|
+
inputCandidateCount: candidates.length,
|
|
393
|
+
histogram: includeHistogram ? { bpmBins, counts } : undefined,
|
|
394
|
+
};
|
|
395
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -9,7 +9,13 @@ export type {
|
|
|
9
9
|
MirResult,
|
|
10
10
|
MirFunctionId,
|
|
11
11
|
MirRunRequest,
|
|
12
|
-
MirAudioPayload
|
|
12
|
+
MirAudioPayload,
|
|
13
|
+
BeatCandidate,
|
|
14
|
+
BeatCandidateSource,
|
|
15
|
+
BeatCandidatesResult,
|
|
16
|
+
TempoHypothesis,
|
|
17
|
+
TempoHypothesisEvidence,
|
|
18
|
+
TempoHypothesesResult,
|
|
13
19
|
} from "./types";
|
|
14
20
|
|
|
15
21
|
export const MIR_VERSION: MirVersion = "0.1.0";
|
|
@@ -59,6 +65,20 @@ export { onsetEnvelopeFromSpectrogram, onsetEnvelopeFromMel, onsetEnvelopeFromMe
|
|
|
59
65
|
export type { PeakPickEvent, PeakPickOptions } from "./dsp/peakPick";
|
|
60
66
|
export { peakPick } from "./dsp/peakPick";
|
|
61
67
|
|
|
68
|
+
// ----------------------------
|
|
69
|
+
// Beat Candidates
|
|
70
|
+
// ----------------------------
|
|
71
|
+
|
|
72
|
+
export type { BeatCandidatesOptions, BeatCandidatesOutput, BeatSalienceSignal } from "./dsp/beatCandidates";
|
|
73
|
+
export { detectBeatCandidates, beatSalienceFromMel } from "./dsp/beatCandidates";
|
|
74
|
+
|
|
75
|
+
// ----------------------------
|
|
76
|
+
// Tempo Hypotheses
|
|
77
|
+
// ----------------------------
|
|
78
|
+
|
|
79
|
+
export type { TempoHypothesesOptions, TempoHypothesesOutput } from "./dsp/tempoHypotheses";
|
|
80
|
+
export { generateTempoHypotheses } from "./dsp/tempoHypotheses";
|
|
81
|
+
|
|
62
82
|
// ----------------------------
|
|
63
83
|
// HPSS
|
|
64
84
|
// ----------------------------
|
package/src/runner/runMir.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { detectBeatCandidates } from "../dsp/beatCandidates";
|
|
2
|
+
import { generateTempoHypotheses } from "../dsp/tempoHypotheses";
|
|
1
3
|
import { melSpectrogram, type MelConfig, type MelSpectrogram } from "../dsp/mel";
|
|
2
4
|
import { mfcc, delta, deltaDelta } from "../dsp/mfcc";
|
|
3
5
|
import { onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu } from "../dsp/onset";
|
|
@@ -285,6 +287,76 @@ export async function runMir(
|
|
|
285
287
|
};
|
|
286
288
|
}
|
|
287
289
|
|
|
290
|
+
if (request.fn === "beatCandidates") {
|
|
291
|
+
// Beat candidate detection requires both mel spectrogram and raw spectrogram.
|
|
292
|
+
const { mel, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
293
|
+
|
|
294
|
+
const beatOpts = request.beatCandidates ?? {};
|
|
295
|
+
const result = detectBeatCandidates(mel, spec, {
|
|
296
|
+
minIntervalSec: beatOpts.minIntervalSec,
|
|
297
|
+
thresholdFactor: beatOpts.thresholdFactor,
|
|
298
|
+
smoothMs: beatOpts.smoothMs,
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
const end = nowMs();
|
|
302
|
+
return {
|
|
303
|
+
kind: "beatCandidates",
|
|
304
|
+
times: result.salience.times,
|
|
305
|
+
candidates: result.candidates,
|
|
306
|
+
salience: beatOpts.includeSalience ? result.salience : undefined,
|
|
307
|
+
meta: {
|
|
308
|
+
backend: "cpu",
|
|
309
|
+
usedGpu: false,
|
|
310
|
+
timings: {
|
|
311
|
+
totalMs: end - t0,
|
|
312
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs,
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (request.fn === "tempoHypotheses") {
|
|
319
|
+
// Tempo hypothesis generation requires beat candidates.
|
|
320
|
+
// We compute them internally (could accept pre-computed in future).
|
|
321
|
+
const { mel, cpuExtraMs: melCpuMs } = await computeMel(false);
|
|
322
|
+
|
|
323
|
+
const beatOpts = request.beatCandidates ?? {};
|
|
324
|
+
const beatResult = detectBeatCandidates(mel, spec, {
|
|
325
|
+
minIntervalSec: beatOpts.minIntervalSec,
|
|
326
|
+
thresholdFactor: beatOpts.thresholdFactor,
|
|
327
|
+
smoothMs: beatOpts.smoothMs,
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
const tempoStart = nowMs();
|
|
331
|
+
const tempoOpts = request.tempoHypotheses ?? {};
|
|
332
|
+
const result = generateTempoHypotheses(beatResult.candidates, {
|
|
333
|
+
minBpm: tempoOpts.minBpm,
|
|
334
|
+
maxBpm: tempoOpts.maxBpm,
|
|
335
|
+
binSizeBpm: tempoOpts.binSizeBpm,
|
|
336
|
+
maxHypotheses: tempoOpts.maxHypotheses,
|
|
337
|
+
minConfidence: tempoOpts.minConfidence,
|
|
338
|
+
weightByStrength: tempoOpts.weightByStrength,
|
|
339
|
+
includeHistogram: tempoOpts.includeHistogram,
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
const end = nowMs();
|
|
343
|
+
return {
|
|
344
|
+
kind: "tempoHypotheses",
|
|
345
|
+
times: spec.times,
|
|
346
|
+
hypotheses: result.hypotheses,
|
|
347
|
+
inputCandidateCount: result.inputCandidateCount,
|
|
348
|
+
histogram: result.histogram,
|
|
349
|
+
meta: {
|
|
350
|
+
backend: "cpu",
|
|
351
|
+
usedGpu: false,
|
|
352
|
+
timings: {
|
|
353
|
+
totalMs: end - t0,
|
|
354
|
+
cpuMs: cpuAfterSpec - cpuStart + melCpuMs + (end - tempoStart),
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
288
360
|
if (request.fn === "hpssHarmonic" || request.fn === "hpssPercussive") {
|
|
289
361
|
// HPSS may use a custom spectrogram config
|
|
290
362
|
const hpssSpecConfig = options.hpss?.spectrogram ?? specConfig;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { MirAudioPayload, MirResult, MirRunRequest } from "../types";
|
|
1
|
+
import type { BeatCandidate, MirAudioPayload, MirResult, MirRunRequest, TempoHypothesis } from "../types";
|
|
2
2
|
|
|
3
3
|
export type MirWorkerInitMessage = {
|
|
4
4
|
type: "INIT";
|
|
@@ -93,6 +93,14 @@ export type MirWorkerResultMessage = {
|
|
|
93
93
|
values?: ArrayBufferLike;
|
|
94
94
|
data2d?: ArrayBufferLike[];
|
|
95
95
|
events?: Array<{ time: number; strength: number; index: number }>;
|
|
96
|
+
candidates?: BeatCandidate[];
|
|
97
|
+
// For tempoHypotheses
|
|
98
|
+
hypotheses?: TempoHypothesis[];
|
|
99
|
+
inputCandidateCount?: number;
|
|
100
|
+
histogram?: {
|
|
101
|
+
bpmBins: ArrayBufferLike;
|
|
102
|
+
counts: ArrayBufferLike;
|
|
103
|
+
};
|
|
96
104
|
meta: MirResult["meta"];
|
|
97
105
|
};
|
|
98
106
|
};
|
package/src/types.ts
CHANGED
|
@@ -39,7 +39,97 @@ export type MirEventsResult = {
|
|
|
39
39
|
meta: MirRunMeta;
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
/**
|
|
43
|
+
* A beat candidate represents a plausible beat-like moment in the audio.
|
|
44
|
+
*
|
|
45
|
+
* These are sparse events (timestamps) that may or may not correspond to
|
|
46
|
+
* actual beats. They are not tempo-aligned and do not imply any BPM value.
|
|
47
|
+
*
|
|
48
|
+
* Beat candidates are intended to be:
|
|
49
|
+
* - Dense enough to include most true beats
|
|
50
|
+
* - Sparse enough to be computationally tractable
|
|
51
|
+
* - Inspectable in the UI for debugging
|
|
52
|
+
*
|
|
53
|
+
* Future milestones will cluster, align, and refine these candidates.
|
|
54
|
+
*/
|
|
55
|
+
export type BeatCandidate = {
|
|
56
|
+
/** Time in seconds from track start. */
|
|
57
|
+
time: number;
|
|
58
|
+
/** Relative salience/confidence (0-1 normalized). Higher = more likely to be a beat. */
|
|
59
|
+
strength: number;
|
|
60
|
+
/** Source of this candidate (for debugging/inspection). */
|
|
61
|
+
source: BeatCandidateSource;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
export type BeatCandidateSource =
|
|
65
|
+
| "onset_peak" // Derived from onset envelope peaks
|
|
66
|
+
| "flux_peak" // Derived from spectral flux peaks
|
|
67
|
+
| "combined"; // Derived from combined salience signal
|
|
68
|
+
|
|
69
|
+
export type BeatCandidatesResult = {
|
|
70
|
+
kind: "beatCandidates";
|
|
71
|
+
/** Frame times from the underlying analysis (for alignment). */
|
|
72
|
+
times: Float32Array;
|
|
73
|
+
/** The beat candidate events. */
|
|
74
|
+
candidates: BeatCandidate[];
|
|
75
|
+
/** Optional: the salience signal used for peak picking (for debugging). */
|
|
76
|
+
salience?: {
|
|
77
|
+
times: Float32Array;
|
|
78
|
+
values: Float32Array;
|
|
79
|
+
};
|
|
80
|
+
meta: MirRunMeta;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* A tempo hypothesis represents a plausible BPM with confidence score.
|
|
85
|
+
*
|
|
86
|
+
* Hypotheses are derived from inter-onset intervals of beat candidates.
|
|
87
|
+
* They are grouped into harmonic families (e.g., 60, 120, 180 BPM) but
|
|
88
|
+
* not collapsed - each BPM is preserved as a separate hypothesis.
|
|
89
|
+
*/
|
|
90
|
+
export type TempoHypothesis = {
|
|
91
|
+
/** Deterministic identifier for this hypothesis (e.g., "hyp-0"). */
|
|
92
|
+
id: string;
|
|
93
|
+
/** Tempo in beats per minute (0.1 BPM precision). */
|
|
94
|
+
bpm: number;
|
|
95
|
+
/** Confidence score normalized to [0, 1]. Higher = more likely. */
|
|
96
|
+
confidence: number;
|
|
97
|
+
/** Evidence metadata for debugging/inspection. */
|
|
98
|
+
evidence: TempoHypothesisEvidence;
|
|
99
|
+
/** Harmonic family ID - hypotheses in the same family are harmonically related. */
|
|
100
|
+
familyId: string;
|
|
101
|
+
/** Harmonic relationship to the family root (1.0 = root, 2.0 = double, 0.5 = half, etc.). */
|
|
102
|
+
harmonicRatio: number;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
export type TempoHypothesisEvidence = {
|
|
106
|
+
/** Number of IOIs supporting this tempo. */
|
|
107
|
+
supportingIntervalCount: number;
|
|
108
|
+
/** Sum of weighted contributions (if strength-weighting enabled). */
|
|
109
|
+
weightedSupport: number;
|
|
110
|
+
/** Peak height in the histogram. */
|
|
111
|
+
peakHeight: number;
|
|
112
|
+
/** Histogram bin range [minBpm, maxBpm]. */
|
|
113
|
+
binRange: [number, number];
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
export type TempoHypothesesResult = {
|
|
117
|
+
kind: "tempoHypotheses";
|
|
118
|
+
/** Frame times from underlying analysis (for alignment). */
|
|
119
|
+
times: Float32Array;
|
|
120
|
+
/** Ordered list of tempo hypotheses (by confidence descending). */
|
|
121
|
+
hypotheses: TempoHypothesis[];
|
|
122
|
+
/** The number of beat candidates used as input. */
|
|
123
|
+
inputCandidateCount: number;
|
|
124
|
+
/** Histogram data for debugging/visualization. */
|
|
125
|
+
histogram?: {
|
|
126
|
+
bpmBins: Float32Array;
|
|
127
|
+
counts: Float32Array;
|
|
128
|
+
};
|
|
129
|
+
meta: MirRunMeta;
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
export type MirResult = Mir1DResult | Mir2DResult | MirEventsResult | BeatCandidatesResult | TempoHypothesesResult;
|
|
43
133
|
|
|
44
134
|
// (moved above)
|
|
45
135
|
|
|
@@ -49,6 +139,8 @@ export type MirFunctionId =
|
|
|
49
139
|
| "melSpectrogram"
|
|
50
140
|
| "onsetEnvelope"
|
|
51
141
|
| "onsetPeaks"
|
|
142
|
+
| "beatCandidates"
|
|
143
|
+
| "tempoHypotheses"
|
|
52
144
|
| "hpssHarmonic"
|
|
53
145
|
| "hpssPercussive"
|
|
54
146
|
| "mfcc"
|
|
@@ -97,6 +189,32 @@ export type MirRunRequest = {
|
|
|
97
189
|
window: "hann";
|
|
98
190
|
};
|
|
99
191
|
};
|
|
192
|
+
beatCandidates?: {
|
|
193
|
+
/** Minimum inter-candidate interval in seconds. Default: 0.1 (100ms). */
|
|
194
|
+
minIntervalSec?: number;
|
|
195
|
+
/** Threshold factor for peak detection. Lower = more candidates. Default: 0.5. */
|
|
196
|
+
thresholdFactor?: number;
|
|
197
|
+
/** Smoothing window for salience signal in ms. Default: 50. */
|
|
198
|
+
smoothMs?: number;
|
|
199
|
+
/** Whether to include the salience signal in output (for debugging). */
|
|
200
|
+
includeSalience?: boolean;
|
|
201
|
+
};
|
|
202
|
+
tempoHypotheses?: {
|
|
203
|
+
/** Minimum BPM to consider. Default: 24. */
|
|
204
|
+
minBpm?: number;
|
|
205
|
+
/** Maximum BPM to consider. Default: 300. */
|
|
206
|
+
maxBpm?: number;
|
|
207
|
+
/** Histogram bin size in BPM. Default: 1.0. */
|
|
208
|
+
binSizeBpm?: number;
|
|
209
|
+
/** Maximum number of hypotheses to return. Default: 10. */
|
|
210
|
+
maxHypotheses?: number;
|
|
211
|
+
/** Minimum confidence threshold (0-1). Default: 0.05. */
|
|
212
|
+
minConfidence?: number;
|
|
213
|
+
/** Weight IOIs by beat candidate strength. Default: true. */
|
|
214
|
+
weightByStrength?: boolean;
|
|
215
|
+
/** Include histogram in output for debugging. Default: false. */
|
|
216
|
+
includeHistogram?: boolean;
|
|
217
|
+
};
|
|
100
218
|
};
|
|
101
219
|
|
|
102
220
|
export type MirAudioPayload = {
|