@octoseq/mir 0.1.0-main.26cefa1 → 0.1.0-main.2b93a77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KIGWMJLC.js → chunk-CI7QGWP7.js} +534 -3
- package/dist/chunk-CI7QGWP7.js.map +1 -0
- package/dist/index.d.ts +1088 -5
- package/dist/index.js +2617 -103
- package/dist/index.js.map +1 -1
- package/dist/{runMir-g1ON6FUs.d.ts → runMir-D4t7WsN0.d.ts} +2 -2
- package/dist/runner/runMir.d.ts +2 -2
- package/dist/runner/runMir.js +1 -1
- package/dist/runner/workerProtocol.d.ts +1 -1
- package/dist/types-ifqndzu7.d.ts +700 -0
- package/package.json +1 -1
- package/src/dsp/bandCqt.ts +662 -0
- package/src/dsp/bandEvents.ts +351 -0
- package/src/dsp/bandMask.ts +225 -0
- package/src/dsp/bandMir.ts +524 -0
- package/src/dsp/bandProposal.ts +552 -0
- package/src/dsp/cqt.ts +386 -0
- package/src/dsp/cqtSignals.ts +462 -0
- package/src/dsp/customSignalReduction.ts +841 -0
- package/src/dsp/frequencyBand.ts +956 -0
- package/src/dsp/mel.ts +56 -3
- package/src/dsp/peakPicking.ts +519 -0
- package/src/dsp/spectral.ts +54 -0
- package/src/index.ts +212 -3
- package/src/runner/runMir.ts +70 -2
- package/src/types.ts +429 -2
- package/dist/chunk-KIGWMJLC.js.map +0 -1
- package/dist/types-1YXTZPFn.d.ts +0 -323
|
@@ -286,6 +286,26 @@ async function onsetEnvelopeFromMelGpu(mel, gpu, options) {
|
|
|
286
286
|
}
|
|
287
287
|
|
|
288
288
|
// src/dsp/spectral.ts
|
|
289
|
+
function amplitudeEnvelope(samples, sampleRate, config) {
|
|
290
|
+
const hopSize = config?.hopSize ?? 512;
|
|
291
|
+
const windowSize = config?.windowSize ?? hopSize;
|
|
292
|
+
const nFrames = Math.floor((samples.length - windowSize) / hopSize) + 1;
|
|
293
|
+
const times = new Float32Array(nFrames);
|
|
294
|
+
const values = new Float32Array(nFrames);
|
|
295
|
+
for (let t = 0; t < nFrames; t++) {
|
|
296
|
+
const start = t * hopSize;
|
|
297
|
+
const end = Math.min(start + windowSize, samples.length);
|
|
298
|
+
let sumSq = 0;
|
|
299
|
+
for (let i = start; i < end; i++) {
|
|
300
|
+
const s = samples[i] ?? 0;
|
|
301
|
+
sumSq += s * s;
|
|
302
|
+
}
|
|
303
|
+
const rms = Math.sqrt(sumSq / (end - start));
|
|
304
|
+
times[t] = (start + windowSize / 2) / sampleRate;
|
|
305
|
+
values[t] = rms;
|
|
306
|
+
}
|
|
307
|
+
return { times, values };
|
|
308
|
+
}
|
|
289
309
|
function spectralCentroid(spec) {
|
|
290
310
|
const nFrames = spec.times.length;
|
|
291
311
|
const out = new Float32Array(nFrames);
|
|
@@ -802,6 +822,20 @@ function hzToMel(hz) {
|
|
|
802
822
|
function melToHz(mel) {
|
|
803
823
|
return 700 * (Math.pow(10, mel / 2595) - 1);
|
|
804
824
|
}
|
|
825
|
+
function hzToFeatureIndex(hz, config) {
|
|
826
|
+
const melMin = hzToMel(config.fMin);
|
|
827
|
+
const melMax = hzToMel(config.fMax);
|
|
828
|
+
const melHz = hzToMel(hz);
|
|
829
|
+
const normalized = (melHz - melMin) / (melMax - melMin);
|
|
830
|
+
return normalized * (config.nMels - 1);
|
|
831
|
+
}
|
|
832
|
+
function featureIndexToHz(index, config) {
|
|
833
|
+
const melMin = hzToMel(config.fMin);
|
|
834
|
+
const melMax = hzToMel(config.fMax);
|
|
835
|
+
const normalized = index / (config.nMels - 1);
|
|
836
|
+
const mel = melMin + normalized * (melMax - melMin);
|
|
837
|
+
return melToHz(mel);
|
|
838
|
+
}
|
|
805
839
|
function buildMelFilterBank(sampleRate, fftSize, nMels, fMin, fMax) {
|
|
806
840
|
const nBins = (fftSize >>> 1) + 1;
|
|
807
841
|
const nyquist = sampleRate / 2;
|
|
@@ -1553,6 +1587,452 @@ async function spectrogram(audio, config, gpu, options = {}) {
|
|
|
1553
1587
|
};
|
|
1554
1588
|
}
|
|
1555
1589
|
|
|
1590
|
+
// src/dsp/cqt.ts
|
|
1591
|
+
var CQT_DEFAULTS = {
|
|
1592
|
+
/** Quarter-tone resolution (24 bins per octave) */
|
|
1593
|
+
binsPerOctave: 24,
|
|
1594
|
+
/** C1 (lowest note on a standard piano) */
|
|
1595
|
+
fMin: 32.7,
|
|
1596
|
+
/** C9 (well above audible range for most content) */
|
|
1597
|
+
fMax: 8372
|
|
1598
|
+
};
|
|
1599
|
+
function cqtBinToHz(bin, config) {
|
|
1600
|
+
return config.fMin * Math.pow(2, bin / config.binsPerOctave);
|
|
1601
|
+
}
|
|
1602
|
+
function hzToCqtBin(hz, config) {
|
|
1603
|
+
if (hz <= 0) return -Infinity;
|
|
1604
|
+
return config.binsPerOctave * Math.log2(hz / config.fMin);
|
|
1605
|
+
}
|
|
1606
|
+
function getNumOctaves(config) {
|
|
1607
|
+
return Math.log2(config.fMax / config.fMin);
|
|
1608
|
+
}
|
|
1609
|
+
function getNumBins(config) {
|
|
1610
|
+
const nOctaves = getNumOctaves(config);
|
|
1611
|
+
return Math.ceil(nOctaves * config.binsPerOctave);
|
|
1612
|
+
}
|
|
1613
|
+
function getCqtBinFrequencies(config) {
|
|
1614
|
+
const nBins = getNumBins(config);
|
|
1615
|
+
const freqs = new Float32Array(nBins);
|
|
1616
|
+
for (let k = 0; k < nBins; k++) {
|
|
1617
|
+
freqs[k] = cqtBinToHz(k, config);
|
|
1618
|
+
}
|
|
1619
|
+
return freqs;
|
|
1620
|
+
}
|
|
1621
|
+
var kernelBankCache = /* @__PURE__ */ new Map();
|
|
1622
|
+
function kernelCacheKey(config, fftSize, sampleRate) {
|
|
1623
|
+
return `${config.binsPerOctave}:${config.fMin}:${config.fMax}:${fftSize}:${sampleRate}`;
|
|
1624
|
+
}
|
|
1625
|
+
function createCqtKernel(binIndex, config, fftSize, sampleRate) {
|
|
1626
|
+
const centerFreq = cqtBinToHz(binIndex, config);
|
|
1627
|
+
const freqResolution = sampleRate / fftSize;
|
|
1628
|
+
const Q = 1 / (Math.pow(2, 1 / config.binsPerOctave) - 1);
|
|
1629
|
+
const bandwidth = centerFreq / Q;
|
|
1630
|
+
const fLow = centerFreq - bandwidth / 2;
|
|
1631
|
+
const fHigh = centerFreq + bandwidth / 2;
|
|
1632
|
+
const startBin = Math.max(0, Math.floor(fLow / freqResolution));
|
|
1633
|
+
const endBin = Math.min(
|
|
1634
|
+
Math.floor(fftSize / 2) + 1,
|
|
1635
|
+
Math.ceil(fHigh / freqResolution) + 1
|
|
1636
|
+
);
|
|
1637
|
+
const numBins = Math.max(1, endBin - startBin);
|
|
1638
|
+
const weights = new Float32Array(numBins);
|
|
1639
|
+
for (let i = 0; i < numBins; i++) {
|
|
1640
|
+
const binFreq = (startBin + i) * freqResolution;
|
|
1641
|
+
if (binFreq <= centerFreq) {
|
|
1642
|
+
if (centerFreq > fLow) {
|
|
1643
|
+
weights[i] = (binFreq - fLow) / (centerFreq - fLow);
|
|
1644
|
+
} else {
|
|
1645
|
+
weights[i] = 1;
|
|
1646
|
+
}
|
|
1647
|
+
} else {
|
|
1648
|
+
if (fHigh > centerFreq) {
|
|
1649
|
+
weights[i] = (fHigh - binFreq) / (fHigh - centerFreq);
|
|
1650
|
+
} else {
|
|
1651
|
+
weights[i] = 1;
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
weights[i] = Math.max(0, Math.min(1, weights[i] ?? 0));
|
|
1655
|
+
}
|
|
1656
|
+
let sum = 0;
|
|
1657
|
+
for (let i = 0; i < numBins; i++) {
|
|
1658
|
+
sum += weights[i] ?? 0;
|
|
1659
|
+
}
|
|
1660
|
+
if (sum > 0) {
|
|
1661
|
+
for (let i = 0; i < numBins; i++) {
|
|
1662
|
+
weights[i] = (weights[i] ?? 0) / sum;
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
return {
|
|
1666
|
+
centerFreq,
|
|
1667
|
+
startBin,
|
|
1668
|
+
endBin,
|
|
1669
|
+
weights
|
|
1670
|
+
};
|
|
1671
|
+
}
|
|
1672
|
+
function getCqtKernelBank(config, fftSize, sampleRate) {
|
|
1673
|
+
const key = kernelCacheKey(config, fftSize, sampleRate);
|
|
1674
|
+
const cached = kernelBankCache.get(key);
|
|
1675
|
+
if (cached) return cached;
|
|
1676
|
+
const nBins = getNumBins(config);
|
|
1677
|
+
const kernels = new Array(nBins);
|
|
1678
|
+
for (let k = 0; k < nBins; k++) {
|
|
1679
|
+
kernels[k] = createCqtKernel(k, config, fftSize, sampleRate);
|
|
1680
|
+
}
|
|
1681
|
+
const bank = {
|
|
1682
|
+
config,
|
|
1683
|
+
fftSize,
|
|
1684
|
+
sampleRate,
|
|
1685
|
+
kernels
|
|
1686
|
+
};
|
|
1687
|
+
kernelBankCache.set(key, bank);
|
|
1688
|
+
return bank;
|
|
1689
|
+
}
|
|
1690
|
+
function applyCqtKernels(stftMagnitudes, kernelBank) {
|
|
1691
|
+
const nCqtBins = kernelBank.kernels.length;
|
|
1692
|
+
const cqtMagnitudes = new Float32Array(nCqtBins);
|
|
1693
|
+
for (let k = 0; k < nCqtBins; k++) {
|
|
1694
|
+
const kernel = kernelBank.kernels[k];
|
|
1695
|
+
if (!kernel) continue;
|
|
1696
|
+
let sum = 0;
|
|
1697
|
+
for (let i = 0; i < kernel.weights.length; i++) {
|
|
1698
|
+
const stftBin = kernel.startBin + i;
|
|
1699
|
+
const stftMag = stftMagnitudes[stftBin] ?? 0;
|
|
1700
|
+
const weight = kernel.weights[i] ?? 0;
|
|
1701
|
+
sum += stftMag * weight;
|
|
1702
|
+
}
|
|
1703
|
+
cqtMagnitudes[k] = sum;
|
|
1704
|
+
}
|
|
1705
|
+
return cqtMagnitudes;
|
|
1706
|
+
}
|
|
1707
|
+
function withCqtDefaults(partial) {
|
|
1708
|
+
return {
|
|
1709
|
+
binsPerOctave: partial?.binsPerOctave ?? CQT_DEFAULTS.binsPerOctave,
|
|
1710
|
+
fMin: partial?.fMin ?? CQT_DEFAULTS.fMin,
|
|
1711
|
+
fMax: partial?.fMax ?? CQT_DEFAULTS.fMax,
|
|
1712
|
+
hopSize: partial?.hopSize
|
|
1713
|
+
};
|
|
1714
|
+
}
|
|
1715
|
+
async function cqtSpectrogram(audio, config, options = {}) {
|
|
1716
|
+
const sampleRate = audio.sampleRate;
|
|
1717
|
+
if (config.fMin <= 0) {
|
|
1718
|
+
throw new Error("@octoseq/mir: CQT fMin must be positive");
|
|
1719
|
+
}
|
|
1720
|
+
if (config.fMax <= config.fMin) {
|
|
1721
|
+
throw new Error("@octoseq/mir: CQT fMax must be greater than fMin");
|
|
1722
|
+
}
|
|
1723
|
+
if (config.binsPerOctave <= 0) {
|
|
1724
|
+
throw new Error("@octoseq/mir: CQT binsPerOctave must be positive");
|
|
1725
|
+
}
|
|
1726
|
+
const Q = 1 / (Math.pow(2, 1 / config.binsPerOctave) - 1);
|
|
1727
|
+
const minFreqResolution = config.fMin / Q / 2;
|
|
1728
|
+
const minFftSize = Math.ceil(sampleRate / minFreqResolution);
|
|
1729
|
+
let fftSize = 1;
|
|
1730
|
+
while (fftSize < minFftSize) {
|
|
1731
|
+
fftSize *= 2;
|
|
1732
|
+
}
|
|
1733
|
+
fftSize = Math.min(fftSize, 16384);
|
|
1734
|
+
const hopSize = config.hopSize ?? Math.floor(fftSize / 4);
|
|
1735
|
+
const stft = await spectrogram(
|
|
1736
|
+
audio,
|
|
1737
|
+
{ fftSize, hopSize, window: "hann" },
|
|
1738
|
+
void 0,
|
|
1739
|
+
{ isCancelled: options.isCancelled }
|
|
1740
|
+
);
|
|
1741
|
+
const kernelBank = getCqtKernelBank(config, fftSize, sampleRate);
|
|
1742
|
+
const nFrames = stft.magnitudes.length;
|
|
1743
|
+
const cqtMagnitudes = new Array(nFrames);
|
|
1744
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1745
|
+
if (options.isCancelled?.()) {
|
|
1746
|
+
throw new Error("@octoseq/mir: cancelled");
|
|
1747
|
+
}
|
|
1748
|
+
const stftFrame = stft.magnitudes[frame];
|
|
1749
|
+
if (!stftFrame) continue;
|
|
1750
|
+
cqtMagnitudes[frame] = applyCqtKernels(stftFrame, kernelBank);
|
|
1751
|
+
}
|
|
1752
|
+
const nOctaves = getNumOctaves(config);
|
|
1753
|
+
getNumBins(config);
|
|
1754
|
+
return {
|
|
1755
|
+
sampleRate,
|
|
1756
|
+
config,
|
|
1757
|
+
times: stft.times,
|
|
1758
|
+
magnitudes: cqtMagnitudes,
|
|
1759
|
+
nOctaves,
|
|
1760
|
+
binsPerOctave: config.binsPerOctave,
|
|
1761
|
+
binFrequencies: getCqtBinFrequencies(config)
|
|
1762
|
+
};
|
|
1763
|
+
}
|
|
1764
|
+
async function computeCqt(audio, config, options = {}) {
|
|
1765
|
+
const startTime = performance.now();
|
|
1766
|
+
const fullConfig = withCqtDefaults(config);
|
|
1767
|
+
const cqt = await cqtSpectrogram(audio, fullConfig, options);
|
|
1768
|
+
const endTime = performance.now();
|
|
1769
|
+
return {
|
|
1770
|
+
cqt,
|
|
1771
|
+
meta: {
|
|
1772
|
+
backend: "cpu",
|
|
1773
|
+
usedGpu: false,
|
|
1774
|
+
timings: {
|
|
1775
|
+
totalMs: endTime - startTime,
|
|
1776
|
+
cpuMs: endTime - startTime
|
|
1777
|
+
}
|
|
1778
|
+
}
|
|
1779
|
+
};
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
// src/dsp/cqtSignals.ts
|
|
1783
|
+
var BASS_MIN_HZ = 20;
|
|
1784
|
+
var BASS_MAX_HZ = 300;
|
|
1785
|
+
var TONAL_STABILITY_WINDOW_FRAMES = 20;
|
|
1786
|
+
var CHROMA_BINS = 12;
|
|
1787
|
+
function normalizeMinMax(values) {
|
|
1788
|
+
let min = Infinity;
|
|
1789
|
+
let max = -Infinity;
|
|
1790
|
+
for (let i = 0; i < values.length; i++) {
|
|
1791
|
+
const v = values[i] ?? 0;
|
|
1792
|
+
if (v < min) min = v;
|
|
1793
|
+
if (v > max) max = v;
|
|
1794
|
+
}
|
|
1795
|
+
const range = max - min;
|
|
1796
|
+
const result = new Float32Array(values.length);
|
|
1797
|
+
if (range > 0) {
|
|
1798
|
+
for (let i = 0; i < values.length; i++) {
|
|
1799
|
+
result[i] = ((values[i] ?? 0) - min) / range;
|
|
1800
|
+
}
|
|
1801
|
+
} else {
|
|
1802
|
+
result.fill(0.5);
|
|
1803
|
+
}
|
|
1804
|
+
return result;
|
|
1805
|
+
}
|
|
1806
|
+
function weightedCentroid(values, startIndex = 0) {
|
|
1807
|
+
let sumWeighted = 0;
|
|
1808
|
+
let sumWeights = 0;
|
|
1809
|
+
for (let i = 0; i < values.length; i++) {
|
|
1810
|
+
const weight = values[i] ?? 0;
|
|
1811
|
+
sumWeighted += (startIndex + i) * weight;
|
|
1812
|
+
sumWeights += weight;
|
|
1813
|
+
}
|
|
1814
|
+
return sumWeights > 0 ? sumWeighted / sumWeights : startIndex + values.length / 2;
|
|
1815
|
+
}
|
|
1816
|
+
function computeHarmonicEnergyFrame(frame, cqt) {
|
|
1817
|
+
if (frame.length === 0) return 0;
|
|
1818
|
+
let totalEnergy = 0;
|
|
1819
|
+
for (let i = 0; i < frame.length; i++) {
|
|
1820
|
+
const mag = frame[i] ?? 0;
|
|
1821
|
+
totalEnergy += mag * mag;
|
|
1822
|
+
}
|
|
1823
|
+
if (totalEnergy === 0) return 0;
|
|
1824
|
+
let maxMag = 0;
|
|
1825
|
+
let fundamentalBin = 0;
|
|
1826
|
+
for (let i = 0; i < frame.length; i++) {
|
|
1827
|
+
const mag = frame[i] ?? 0;
|
|
1828
|
+
if (mag > maxMag) {
|
|
1829
|
+
maxMag = mag;
|
|
1830
|
+
fundamentalBin = i;
|
|
1831
|
+
}
|
|
1832
|
+
}
|
|
1833
|
+
const fundamentalFreq = cqtBinToHz(fundamentalBin, cqt.config);
|
|
1834
|
+
let harmonicEnergy2 = 0;
|
|
1835
|
+
const numHarmonics = 6;
|
|
1836
|
+
for (let h = 1; h <= numHarmonics; h++) {
|
|
1837
|
+
const harmonicFreq = fundamentalFreq * h;
|
|
1838
|
+
const harmonicBin = Math.round(hzToCqtBin(harmonicFreq, cqt.config));
|
|
1839
|
+
if (harmonicBin >= 0 && harmonicBin < frame.length) {
|
|
1840
|
+
const mag = frame[harmonicBin] ?? 0;
|
|
1841
|
+
const weight = 1 / h;
|
|
1842
|
+
harmonicEnergy2 += mag * mag * weight;
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
let weightSum = 0;
|
|
1846
|
+
for (let h = 1; h <= numHarmonics; h++) {
|
|
1847
|
+
weightSum += 1 / h;
|
|
1848
|
+
}
|
|
1849
|
+
harmonicEnergy2 /= weightSum;
|
|
1850
|
+
return Math.min(1, harmonicEnergy2 / totalEnergy);
|
|
1851
|
+
}
|
|
1852
|
+
function harmonicEnergy(cqt) {
|
|
1853
|
+
const startTime = performance.now();
|
|
1854
|
+
const nFrames = cqt.magnitudes.length;
|
|
1855
|
+
const values = new Float32Array(nFrames);
|
|
1856
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1857
|
+
const cqtFrame = cqt.magnitudes[frame];
|
|
1858
|
+
if (cqtFrame) {
|
|
1859
|
+
values[frame] = computeHarmonicEnergyFrame(cqtFrame, cqt);
|
|
1860
|
+
}
|
|
1861
|
+
}
|
|
1862
|
+
const normalized = normalizeMinMax(values);
|
|
1863
|
+
const endTime = performance.now();
|
|
1864
|
+
return {
|
|
1865
|
+
kind: "cqt1d",
|
|
1866
|
+
signalId: "harmonicEnergy",
|
|
1867
|
+
times: cqt.times,
|
|
1868
|
+
values: normalized,
|
|
1869
|
+
meta: {
|
|
1870
|
+
backend: "cpu",
|
|
1871
|
+
usedGpu: false,
|
|
1872
|
+
timings: {
|
|
1873
|
+
totalMs: endTime - startTime,
|
|
1874
|
+
cpuMs: endTime - startTime
|
|
1875
|
+
}
|
|
1876
|
+
}
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
function bassPitchMotion(cqt) {
|
|
1880
|
+
const startTime = performance.now();
|
|
1881
|
+
const nFrames = cqt.magnitudes.length;
|
|
1882
|
+
const bassStartBin = Math.max(0, Math.floor(hzToCqtBin(BASS_MIN_HZ, cqt.config)));
|
|
1883
|
+
const bassEndBin = Math.min(
|
|
1884
|
+
cqt.magnitudes[0]?.length ?? 0,
|
|
1885
|
+
Math.ceil(hzToCqtBin(BASS_MAX_HZ, cqt.config))
|
|
1886
|
+
);
|
|
1887
|
+
const bassNumBins = bassEndBin - bassStartBin;
|
|
1888
|
+
if (bassNumBins <= 0) {
|
|
1889
|
+
return {
|
|
1890
|
+
kind: "cqt1d",
|
|
1891
|
+
signalId: "bassPitchMotion",
|
|
1892
|
+
times: cqt.times,
|
|
1893
|
+
values: new Float32Array(nFrames),
|
|
1894
|
+
meta: {
|
|
1895
|
+
backend: "cpu",
|
|
1896
|
+
usedGpu: false,
|
|
1897
|
+
timings: { totalMs: 0, cpuMs: 0 }
|
|
1898
|
+
}
|
|
1899
|
+
};
|
|
1900
|
+
}
|
|
1901
|
+
const centroids = new Float32Array(nFrames);
|
|
1902
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1903
|
+
const cqtFrame = cqt.magnitudes[frame];
|
|
1904
|
+
if (!cqtFrame) continue;
|
|
1905
|
+
const bassBins = new Float32Array(bassNumBins);
|
|
1906
|
+
for (let i = 0; i < bassNumBins; i++) {
|
|
1907
|
+
bassBins[i] = cqtFrame[bassStartBin + i] ?? 0;
|
|
1908
|
+
}
|
|
1909
|
+
centroids[frame] = weightedCentroid(bassBins, bassStartBin);
|
|
1910
|
+
}
|
|
1911
|
+
const motion = new Float32Array(nFrames);
|
|
1912
|
+
for (let frame = 1; frame < nFrames; frame++) {
|
|
1913
|
+
motion[frame] = Math.abs((centroids[frame] ?? 0) - (centroids[frame - 1] ?? 0));
|
|
1914
|
+
}
|
|
1915
|
+
motion[0] = motion[1] ?? 0;
|
|
1916
|
+
const normalized = normalizeMinMax(motion);
|
|
1917
|
+
const endTime = performance.now();
|
|
1918
|
+
return {
|
|
1919
|
+
kind: "cqt1d",
|
|
1920
|
+
signalId: "bassPitchMotion",
|
|
1921
|
+
times: cqt.times,
|
|
1922
|
+
values: normalized,
|
|
1923
|
+
meta: {
|
|
1924
|
+
backend: "cpu",
|
|
1925
|
+
usedGpu: false,
|
|
1926
|
+
timings: {
|
|
1927
|
+
totalMs: endTime - startTime,
|
|
1928
|
+
cpuMs: endTime - startTime
|
|
1929
|
+
}
|
|
1930
|
+
}
|
|
1931
|
+
};
|
|
1932
|
+
}
|
|
1933
|
+
function computeChroma(frame, binsPerOctave) {
|
|
1934
|
+
const chroma = new Float32Array(CHROMA_BINS);
|
|
1935
|
+
const binsPerSemitone = binsPerOctave / CHROMA_BINS;
|
|
1936
|
+
for (let i = 0; i < frame.length; i++) {
|
|
1937
|
+
const chromaBin = Math.floor(i % binsPerOctave / binsPerSemitone) % CHROMA_BINS;
|
|
1938
|
+
const mag = frame[i] ?? 0;
|
|
1939
|
+
chroma[chromaBin] = (chroma[chromaBin] ?? 0) + mag * mag;
|
|
1940
|
+
}
|
|
1941
|
+
let sum = 0;
|
|
1942
|
+
for (let i = 0; i < CHROMA_BINS; i++) {
|
|
1943
|
+
sum += chroma[i] ?? 0;
|
|
1944
|
+
}
|
|
1945
|
+
if (sum > 0) {
|
|
1946
|
+
for (let i = 0; i < CHROMA_BINS; i++) {
|
|
1947
|
+
chroma[i] = (chroma[i] ?? 0) / sum;
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
return chroma;
|
|
1951
|
+
}
|
|
1952
|
+
function tonalStability(cqt) {
|
|
1953
|
+
const startTime = performance.now();
|
|
1954
|
+
const nFrames = cqt.magnitudes.length;
|
|
1955
|
+
const chromas = new Array(nFrames);
|
|
1956
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1957
|
+
const cqtFrame = cqt.magnitudes[frame];
|
|
1958
|
+
if (cqtFrame) {
|
|
1959
|
+
chromas[frame] = computeChroma(cqtFrame, cqt.binsPerOctave);
|
|
1960
|
+
} else {
|
|
1961
|
+
chromas[frame] = new Float32Array(CHROMA_BINS);
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
const halfWindow = Math.floor(TONAL_STABILITY_WINDOW_FRAMES / 2);
|
|
1965
|
+
const instability = new Float32Array(nFrames);
|
|
1966
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1967
|
+
const windowStart = Math.max(0, frame - halfWindow);
|
|
1968
|
+
const windowEnd = Math.min(nFrames, frame + halfWindow + 1);
|
|
1969
|
+
const windowSize = windowEnd - windowStart;
|
|
1970
|
+
const avgChroma = new Float32Array(CHROMA_BINS);
|
|
1971
|
+
for (let w = windowStart; w < windowEnd; w++) {
|
|
1972
|
+
const chroma = chromas[w];
|
|
1973
|
+
if (chroma) {
|
|
1974
|
+
for (let c = 0; c < CHROMA_BINS; c++) {
|
|
1975
|
+
avgChroma[c] = (avgChroma[c] ?? 0) + (chroma[c] ?? 0);
|
|
1976
|
+
}
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
for (let c = 0; c < CHROMA_BINS; c++) {
|
|
1980
|
+
avgChroma[c] = (avgChroma[c] ?? 0) / windowSize;
|
|
1981
|
+
}
|
|
1982
|
+
let totalVariance = 0;
|
|
1983
|
+
for (let w = windowStart; w < windowEnd; w++) {
|
|
1984
|
+
const chroma = chromas[w];
|
|
1985
|
+
if (chroma) {
|
|
1986
|
+
for (let c = 0; c < CHROMA_BINS; c++) {
|
|
1987
|
+
const diff = (chroma[c] ?? 0) - (avgChroma[c] ?? 0);
|
|
1988
|
+
totalVariance += diff * diff;
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
totalVariance /= windowSize * CHROMA_BINS;
|
|
1993
|
+
instability[frame] = totalVariance;
|
|
1994
|
+
}
|
|
1995
|
+
const normalizedInstability = normalizeMinMax(instability);
|
|
1996
|
+
const stability = new Float32Array(nFrames);
|
|
1997
|
+
for (let frame = 0; frame < nFrames; frame++) {
|
|
1998
|
+
stability[frame] = 1 - (normalizedInstability[frame] ?? 0);
|
|
1999
|
+
}
|
|
2000
|
+
const endTime = performance.now();
|
|
2001
|
+
return {
|
|
2002
|
+
kind: "cqt1d",
|
|
2003
|
+
signalId: "tonalStability",
|
|
2004
|
+
times: cqt.times,
|
|
2005
|
+
values: stability,
|
|
2006
|
+
meta: {
|
|
2007
|
+
backend: "cpu",
|
|
2008
|
+
usedGpu: false,
|
|
2009
|
+
timings: {
|
|
2010
|
+
totalMs: endTime - startTime,
|
|
2011
|
+
cpuMs: endTime - startTime
|
|
2012
|
+
}
|
|
2013
|
+
}
|
|
2014
|
+
};
|
|
2015
|
+
}
|
|
2016
|
+
function computeCqtSignal(cqt, signalId) {
|
|
2017
|
+
switch (signalId) {
|
|
2018
|
+
case "harmonicEnergy":
|
|
2019
|
+
return harmonicEnergy(cqt);
|
|
2020
|
+
case "bassPitchMotion":
|
|
2021
|
+
return bassPitchMotion(cqt);
|
|
2022
|
+
case "tonalStability":
|
|
2023
|
+
return tonalStability(cqt);
|
|
2024
|
+
default:
|
|
2025
|
+
throw new Error(`@octoseq/mir: unknown CQT signal ID: ${signalId}`);
|
|
2026
|
+
}
|
|
2027
|
+
}
|
|
2028
|
+
function computeAllCqtSignals(cqt) {
|
|
2029
|
+
const results = /* @__PURE__ */ new Map();
|
|
2030
|
+
results.set("harmonicEnergy", harmonicEnergy(cqt));
|
|
2031
|
+
results.set("bassPitchMotion", bassPitchMotion(cqt));
|
|
2032
|
+
results.set("tonalStability", tonalStability(cqt));
|
|
2033
|
+
return results;
|
|
2034
|
+
}
|
|
2035
|
+
|
|
1556
2036
|
// src/runner/runMir.ts
|
|
1557
2037
|
function nowMs2() {
|
|
1558
2038
|
return typeof performance !== "undefined" ? performance.now() : Date.now();
|
|
@@ -1579,6 +2059,27 @@ async function runMir(audio, request, options = {}) {
|
|
|
1579
2059
|
hopSize: 512,
|
|
1580
2060
|
window: "hann"
|
|
1581
2061
|
};
|
|
2062
|
+
if (request.fn === "amplitudeEnvelope") {
|
|
2063
|
+
const cpuStart2 = nowMs2();
|
|
2064
|
+
const result = amplitudeEnvelope(audio.mono, audio.sampleRate, {
|
|
2065
|
+
hopSize: specConfig.hopSize,
|
|
2066
|
+
windowSize: specConfig.fftSize
|
|
2067
|
+
});
|
|
2068
|
+
const cpuEnd = nowMs2();
|
|
2069
|
+
return {
|
|
2070
|
+
kind: "1d",
|
|
2071
|
+
times: result.times,
|
|
2072
|
+
values: result.values,
|
|
2073
|
+
meta: {
|
|
2074
|
+
backend: "cpu",
|
|
2075
|
+
usedGpu: false,
|
|
2076
|
+
timings: {
|
|
2077
|
+
totalMs: cpuEnd - t0,
|
|
2078
|
+
cpuMs: cpuEnd - cpuStart2
|
|
2079
|
+
}
|
|
2080
|
+
}
|
|
2081
|
+
};
|
|
2082
|
+
}
|
|
1582
2083
|
const cpuStart = nowMs2();
|
|
1583
2084
|
const spec = await spectrogram(asAudioBufferLike(audio), specConfig, void 0, {
|
|
1584
2085
|
isCancelled: options.isCancelled
|
|
@@ -1908,6 +2409,36 @@ async function runMir(audio, request, options = {}) {
|
|
|
1908
2409
|
}
|
|
1909
2410
|
};
|
|
1910
2411
|
}
|
|
2412
|
+
if (request.fn === "cqtHarmonicEnergy" || request.fn === "cqtBassPitchMotion" || request.fn === "cqtTonalStability") {
|
|
2413
|
+
const cqtStart = nowMs2();
|
|
2414
|
+
const cqtConfig = withCqtDefaults(request.cqt);
|
|
2415
|
+
const cqt = await cqtSpectrogram(asAudioBufferLike(audio), cqtConfig, {
|
|
2416
|
+
isCancelled: options.isCancelled
|
|
2417
|
+
});
|
|
2418
|
+
const cqtEnd = nowMs2();
|
|
2419
|
+
let signal;
|
|
2420
|
+
if (request.fn === "cqtHarmonicEnergy") {
|
|
2421
|
+
signal = harmonicEnergy(cqt);
|
|
2422
|
+
} else if (request.fn === "cqtBassPitchMotion") {
|
|
2423
|
+
signal = bassPitchMotion(cqt);
|
|
2424
|
+
} else {
|
|
2425
|
+
signal = tonalStability(cqt);
|
|
2426
|
+
}
|
|
2427
|
+
const end2 = nowMs2();
|
|
2428
|
+
return {
|
|
2429
|
+
kind: "1d",
|
|
2430
|
+
times: signal.times,
|
|
2431
|
+
values: signal.values,
|
|
2432
|
+
meta: {
|
|
2433
|
+
backend: "cpu",
|
|
2434
|
+
usedGpu: false,
|
|
2435
|
+
timings: {
|
|
2436
|
+
totalMs: end2 - t0,
|
|
2437
|
+
cpuMs: cqtEnd - cqtStart + (end2 - cqtEnd)
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
};
|
|
2441
|
+
}
|
|
1911
2442
|
const { mel, usedGpu, gpuMs, cpuExtraMs } = await computeMel(backend === "gpu");
|
|
1912
2443
|
const end = nowMs2();
|
|
1913
2444
|
return {
|
|
@@ -1926,6 +2457,6 @@ async function runMir(audio, request, options = {}) {
|
|
|
1926
2457
|
};
|
|
1927
2458
|
}
|
|
1928
2459
|
|
|
1929
|
-
export { beatSalienceFromMel, delta, deltaDelta, detectBeatCandidates, generateTempoHypotheses, hpss, melSpectrogram, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram };
|
|
1930
|
-
//# sourceMappingURL=chunk-
|
|
1931
|
-
//# sourceMappingURL=chunk-
|
|
2460
|
+
export { CQT_DEFAULTS, amplitudeEnvelope, bassPitchMotion, beatSalienceFromMel, computeAllCqtSignals, computeCqt, computeCqtSignal, cqtBinToHz, cqtSpectrogram, delta, deltaDelta, detectBeatCandidates, featureIndexToHz, generateTempoHypotheses, getCqtBinFrequencies, getNumBins, getNumOctaves, harmonicEnergy, hpss, hzToCqtBin, hzToFeatureIndex, hzToMel, melSpectrogram, melToHz, mfcc, onsetEnvelopeFromMel, onsetEnvelopeFromMelGpu, onsetEnvelopeFromSpectrogram, peakPick, runMir, spectralCentroid, spectralFlux, spectrogram, tonalStability, withCqtDefaults };
|
|
2461
|
+
//# sourceMappingURL=chunk-CI7QGWP7.js.map
|
|
2462
|
+
//# sourceMappingURL=chunk-CI7QGWP7.js.map
|