PyPI - auvux-dsp - Versions diffs - 0.1.0.dev2__tar.gz → 0.1.0.dev3__tar.gz - Mend

auvux-dsp 0.1.0.dev2tar.gz → 0.1.0.dev3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/.gitignore RENAMED Viewed

@@ -14,3 +14,4 @@ __pycache__/
 *.env
 .pypirc
 *.pem
+FEATURE.md

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: auvux-dsp
-Version: 0.1.0.dev2
+Version: 0.1.0.dev3
 Summary: Fast differentiable audio transforms (STFT, mel, MFCC, CQT, chroma) on CPU and GPU
 Keywords: audio,dsp,stft,mel,mfcc,cqt,chroma,spectrogram,gpu
 Author-Email: Peter Kiers <pkiers.1983@gmail.com>
@@ -73,9 +73,14 @@ Status: under construction.
 - CPU (vDSP/PFFFT): STFT, ISTFT, MelSpectrogram, MFCC, CQT, VQT, Chroma —
   forward and native backward, librosa-parity tested, torch autograd built in.
 - Differentiable features: SpectralMoments (centroid, bandwidth, skewness,
-  kurtosis, flatness), RMS, SpectralFlux, OnsetStrength, Tempogram — native
-  forward + analytic adjoint, librosa-parity / torch-grad tested.
-- Metal: all of the above on GPU (n_fft <= 4096), forward + backward,
+  kurtosis, flatness, slope, decrease, crest, entropy), RMS, SpectralFlux,
+  OnsetStrength, Tempogram — native forward + analytic adjoint, librosa-parity
+  / torch-grad tested.
+- Composed descriptors (built on the transforms above, so they inherit the GPU
+  path and stay differentiable): SpectralContrast, BandEnergy, Tonnetz,
+  ChromaCENS, FourierTempogram. Plus `delta` (Savitzky-Golay derivative) and
+  `pool` post-processing over any feature matrix.
+- Metal: every native transform on GPU (n_fft <= 4096), forward + backward,
   parity-tested against the CPU path. torch MPS tensors stay on the GPU end to
   end (DLPack), and backend="auto" routes them there — no flags needed.
 - CUDA: kernel-for-kernel twin of the Metal backend including the resident

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/README.md RENAMED Viewed

@@ -50,9 +50,14 @@ Status: under construction.
 - CPU (vDSP/PFFFT): STFT, ISTFT, MelSpectrogram, MFCC, CQT, VQT, Chroma —
   forward and native backward, librosa-parity tested, torch autograd built in.
 - Differentiable features: SpectralMoments (centroid, bandwidth, skewness,
-  kurtosis, flatness), RMS, SpectralFlux, OnsetStrength, Tempogram — native
-  forward + analytic adjoint, librosa-parity / torch-grad tested.
-- Metal: all of the above on GPU (n_fft <= 4096), forward + backward,
+  kurtosis, flatness, slope, decrease, crest, entropy), RMS, SpectralFlux,
+  OnsetStrength, Tempogram — native forward + analytic adjoint, librosa-parity
+  / torch-grad tested.
+- Composed descriptors (built on the transforms above, so they inherit the GPU
+  path and stay differentiable): SpectralContrast, BandEnergy, Tonnetz,
+  ChromaCENS, FourierTempogram. Plus `delta` (Savitzky-Golay derivative) and
+  `pool` post-processing over any feature matrix.
+- Metal: every native transform on GPU (n_fft <= 4096), forward + backward,
   parity-tested against the CPU path. torch MPS tensors stay on the GPU end to
   end (DLPack), and backend="auto" routes them there — no flags needed.
 - CUDA: kernel-for-kernel twin of the Metal backend including the resident

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/benchmarks/benchmark.py RENAMED Viewed

@@ -95,8 +95,10 @@ def build_cases(y, sec, torch, device, gpu, resident):
         ("vqt", vqt_t),
         ("chroma", chroma_t),
     ]
-    # CPU-only differentiable features (no GPU kernels yet). librosa comparisons
-    # are registered below only for the 1:1-matching definitions.
+    # Differentiable features. moments/rms/flux/onset/tempogram have native GPU
+    # kernels; the composed descriptors (contrast, tonnetz, cens, fourier
+    # tempogram, band energy) ride the GPU of the transform they build on.
+    # librosa comparisons are registered below only for 1:1-matching definitions.
     feat = [
         ("spectral_moments", dsp.SpectralMoments(sr=SR, n_fft=2048, hop_length=512)),
         ("spectral_centroid",
@@ -105,6 +107,15 @@ def build_cases(y, sec, torch, device, gpu, resident):
          dsp.SpectralMoments(sr=SR, features=("bandwidth",), n_fft=2048, hop_length=512)),
         ("spectral_flatness",
          dsp.SpectralMoments(sr=SR, features=("flatness",), n_fft=2048, hop_length=512)),
+        ("spectral_descriptors",
+         dsp.SpectralMoments(sr=SR, features=("slope", "decrease", "crest", "entropy"),
+                             n_fft=2048, hop_length=512)),
+        ("spectral_contrast", dsp.SpectralContrast(sr=SR, n_fft=2048, hop_length=512)),
+        ("band_energy", dsp.BandEnergy(sr=SR, n_fft=2048, hop_length=512)),
+        ("tonnetz", dsp.Tonnetz(sr=SR, hop_length=512)),
+        ("chroma_cens", dsp.ChromaCENS(sr=SR, hop_length=512)),
+        ("fourier_tempogram",
+         dsp.FourierTempogram(sr=SR, win_length=256, n_fft=2048, hop_length=512, n_mels=128)),
         ("rms", dsp.RMS(sr=SR, frame_length=2048, hop_length=512)),
         ("spectral_flux", dsp.SpectralFlux(sr=SR, n_fft=2048, hop_length=512)),
         ("onset_strength", dsp.OnsetStrength(sr=SR, n_fft=2048, hop_length=512, n_mels=128)),
@@ -195,6 +206,16 @@ def build_cases(y, sec, torch, device, gpu, resident):
             lambda: librosa.feature.spectral_flatness(y=y, n_fft=2048, hop_length=512))
         add("forward", "rms", "librosa",
             lambda: librosa.feature.rms(y=y, frame_length=2048, hop_length=512))
+        add("forward", "spectral_contrast", "librosa",
+            lambda: librosa.feature.spectral_contrast(y=y, sr=SR, n_fft=2048, hop_length=512))
+        # tonnetz rides chroma_cqt (matched 1:1); cens omits librosa's amplitude
+        # quantization and fourier_tempogram inherits the onset lag-shift, so the
+        # latter two are speed-only (see _NO_DIFF).
+        add("forward", "tonnetz", "librosa", lambda: librosa.feature.tonnetz(y=y, sr=SR))
+        add("forward", "chroma_cens", "librosa",
+            lambda: librosa.feature.chroma_cens(y=y, sr=SR, hop_length=512))
+        add("forward", "fourier_tempogram", "librosa",
+            lambda: librosa.feature.fourier_tempogram(y=y, sr=SR, hop_length=512, win_length=256))
         # Speed-only: our onset/tempogram match librosa's values but not its
         # `center=True` envelope lag-shift (n_fft//(2*hop) frames), so the diff
         # is suppressed (see _NO_DIFF) — correctness lives in the test suite.
@@ -395,10 +416,11 @@ def build_cases(y, sec, torch, device, gpu, resident):
     return cases
-# Transforms compared against librosa for speed only: their definitions match
-# librosa's values but not its envelope lag-shift convention, so a relerr "diff"
-# would read as a (misleading) failure rather than a correctness check.
-_NO_DIFF = frozenset({"onset_strength", "tempogram"})
+# Transforms compared against librosa for speed only: their values would read as
+# a (misleading) relerr "diff" failure rather than a correctness check —
+# onset/tempogram/fourier_tempogram differ by librosa's envelope lag-shift
+# convention, chroma_cens by the amplitude quantization we intentionally omit.
+_NO_DIFF = frozenset({"onset_strength", "tempogram", "fourier_tempogram", "chroma_cens"})
 def run_cases(cases, repeats, warmup):

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/python/auvux/dsp/__init__.py RENAMED Viewed

@@ -20,9 +20,12 @@ from auvux.dsp._filters import (
     mel_to_hz,
 )
 from auvux.dsp._functional import (
+    band_energy,
     chroma,
+    chroma_cens,
     clear_caches,
     cqt,
+    fourier_tempogram,
     istft,
     mel_spectrogram,
     mfcc,
@@ -30,15 +33,22 @@ from auvux.dsp._functional import (
     rms,
     spectral_bandwidth,
     spectral_centroid,
+    spectral_contrast,
+    spectral_crest,
+    spectral_decrease,
+    spectral_entropy,
     spectral_flatness,
     spectral_flux,
     spectral_kurtosis,
     spectral_moments,
     spectral_skewness,
+    spectral_slope,
     stft,
     tempogram,
+    tonnetz,
     vqt,
 )
+from auvux.dsp._postproc import delta, pool
 from auvux.dsp._transform import Transform
 from auvux.dsp._transforms import (
     CQT,
@@ -47,12 +57,17 @@ from auvux.dsp._transforms import (
     RMS,
     STFT,
     VQT,
+    BandEnergy,
     Chroma,
+    ChromaCENS,
+    FourierTempogram,
     MelSpectrogram,
     OnsetStrength,
+    SpectralContrast,
     SpectralFlux,
     SpectralMoments,
     Tempogram,
+    Tonnetz,
 )
 from auvux.dsp._version import __version__
@@ -64,25 +79,34 @@ __all__ = [
     "STFT",
     "VQT",
     "BackendError",
+    "BandEnergy",
     "Chroma",
+    "ChromaCENS",
+    "FourierTempogram",
     "MelSpectrogram",
     "OnsetStrength",
+    "SpectralContrast",
     "SpectralFlux",
     "SpectralMoments",
     "Tempogram",
+    "Tonnetz",
     "Transform",
     "__version__",
     "amplitude_to_db",
     "backend_info",
     "backends",
+    "band_energy",
     "chroma",
+    "chroma_cens",
     "clear_caches",
     "cqt",
     "cqt_frequencies",
     "db_to_amplitude",
     "db_to_power",
     "dct_matrix",
+    "delta",
     "fft_frequencies",
+    "fourier_tempogram",
     "get_num_threads",
     "gpu_available",
     "gpu_backend",
@@ -94,17 +118,24 @@ __all__ = [
     "mel_to_hz",
     "mfcc",
     "onset_strength",
+    "pool",
     "power_to_db",
     "rms",
     "set_num_threads",
     "spectral_bandwidth",
     "spectral_centroid",
+    "spectral_contrast",
+    "spectral_crest",
+    "spectral_decrease",
+    "spectral_entropy",
     "spectral_flatness",
     "spectral_flux",
     "spectral_kurtosis",
     "spectral_moments",
     "spectral_skewness",
+    "spectral_slope",
     "stft",
     "tempogram",
+    "tonnetz",
     "vqt",
 ]

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/python/auvux/dsp/_dispatch.py RENAMED Viewed

@@ -17,7 +17,7 @@ class BackendError(RuntimeError):
 # Refuse to run against a compiled core older than these Python sources;
 # editable installs do not rebuild the extension on source changes.
 # Keep in lockstep with kAbiVersion in src/bindings/abi.hpp.
-_EXPECTED_ABI = 12
+_EXPECTED_ABI = 13
 if getattr(_native, "abi_version", lambda: 0)() != _EXPECTED_ABI:
     raise ImportError(
         "auvux.dsp's compiled core is out of date with its Python sources; "

{auvux_dsp-0.1.0.dev2 → auvux_dsp-0.1.0.dev3}/python/auvux/dsp/_functional.py RENAMED Viewed

@@ -18,12 +18,17 @@ from auvux.dsp._transforms import (
     RMS,
     STFT,
     VQT,
+    BandEnergy,
     Chroma,
+    ChromaCENS,
+    FourierTempogram,
     MelSpectrogram,
     OnsetStrength,
+    SpectralContrast,
     SpectralFlux,
     SpectralMoments,
     Tempogram,
+    Tonnetz,
 )
 _T = TypeVar("_T", bound=Transform)
@@ -249,6 +254,54 @@ def chroma(
     return t(y, backend=backend)
+def tonnetz(
+    y: Any,
+    *,
+    sr: float,
+    hop_length: int = 512,
+    n_chroma: int = 12,
+    bins_per_octave: int = 36,
+    n_octaves: int = 7,
+    fmin: float | None = None,
+    backend: str = "auto",
+) -> Any:
+    t = _plan(
+        Tonnetz,
+        sr=sr,
+        hop_length=hop_length,
+        n_chroma=n_chroma,
+        bins_per_octave=bins_per_octave,
+        n_octaves=n_octaves,
+        fmin=fmin,
+    )
+    return t(y, backend=backend)
+def chroma_cens(
+    y: Any,
+    *,
+    sr: float,
+    hop_length: int = 512,
+    n_chroma: int = 12,
+    bins_per_octave: int = 36,
+    n_octaves: int = 7,
+    fmin: float | None = None,
+    win: int = 41,
+    backend: str = "auto",
+) -> Any:
+    t = _plan(
+        ChromaCENS,
+        sr=sr,
+        hop_length=hop_length,
+        n_chroma=n_chroma,
+        bins_per_octave=bins_per_octave,
+        n_octaves=n_octaves,
+        fmin=fmin,
+        win=win,
+    )
+    return t(y, backend=backend)
 def spectral_moments(
     y: Any,
     *,
@@ -276,6 +329,68 @@ def spectral_moments(
     return t(y, backend=backend)
+def band_energy(
+    y: Any,
+    *,
+    sr: float,
+    n_fft: int = 2048,
+    hop_length: int | None = None,
+    win_length: int | None = None,
+    window: str = "hann",
+    center: bool = True,
+    pad_mode: str = "constant",
+    n_bands: int = 6,
+    fmin: float = 0.0,
+    backend: str = "auto",
+) -> Any:
+    t = _plan(
+        BandEnergy,
+        sr=sr,
+        n_fft=n_fft,
+        hop_length=hop_length,
+        win_length=win_length,
+        window=window,
+        center=center,
+        pad_mode=pad_mode,
+        n_bands=n_bands,
+        fmin=fmin,
+    )
+    return t(y, backend=backend)
+def spectral_contrast(
+    y: Any,
+    *,
+    sr: float,
+    n_fft: int = 2048,
+    hop_length: int | None = None,
+    win_length: int | None = None,
+    window: str = "hann",
+    center: bool = True,
+    pad_mode: str = "constant",
+    n_bands: int = 6,
+    fmin: float = 200.0,
+    quantile: float = 0.02,
+    linear: bool = False,
+    backend: str = "auto",
+) -> Any:
+    t = _plan(
+        SpectralContrast,
+        sr=sr,
+        n_fft=n_fft,
+        hop_length=hop_length,
+        win_length=win_length,
+        window=window,
+        center=center,
+        pad_mode=pad_mode,
+        n_bands=n_bands,
+        fmin=fmin,
+        quantile=quantile,
+        linear=linear,
+    )
+    return t(y, backend=backend)
 def _one_moment(feature: str, y: Any, **kw: Any) -> Any:
     return spectral_moments(y, features=(feature,), **kw)
@@ -305,6 +420,26 @@ def spectral_flatness(y: Any, **kw: Any) -> Any:
     return _one_moment("flatness", y, **kw)
+def spectral_slope(y: Any, **kw: Any) -> Any:
+    """Regression slope of |spectrum| on frequency, shape (..., 1, n_frames)."""
+    return _one_moment("slope", y, **kw)
+def spectral_decrease(y: Any, **kw: Any) -> Any:
+    """MPEG-7 spectral decrease per frame, shape (..., 1, n_frames)."""
+    return _one_moment("decrease", y, **kw)
+def spectral_crest(y: Any, **kw: Any) -> Any:
+    """Spectral crest (peak-to-mean magnitude) per frame, shape (..., 1, n_frames)."""
+    return _one_moment("crest", y, **kw)
+def spectral_entropy(y: Any, **kw: Any) -> Any:
+    """Normalized spectral entropy in [0, 1] per frame, shape (..., 1, n_frames)."""
+    return _one_moment("entropy", y, **kw)
 def rms(
     y: Any,
     *,
@@ -421,3 +556,40 @@ def tempogram(
         mel_norm=mel_norm,
     )
     return t(y, backend=backend)
+def fourier_tempogram(
+    y: Any,
+    *,
+    sr: float,
+    win_length: int = 256,
+    n_fft: int = 2048,
+    hop_length: int | None = None,
+    stft_win_length: int | None = None,
+    window: str = "hann",
+    center: bool = True,
+    pad_mode: str = "constant",
+    n_mels: int = 128,
+    fmin: float = 0.0,
+    fmax: float | None = None,
+    mel_scale: str = "slaney",
+    mel_norm: str | None = "slaney",
+    backend: str = "auto",
+) -> Any:
+    t = _plan(
+        FourierTempogram,
+        sr=sr,
+        win_length=win_length,
+        n_fft=n_fft,
+        hop_length=hop_length,
+        stft_win_length=stft_win_length,
+        window=window,
+        center=center,
+        pad_mode=pad_mode,
+        n_mels=n_mels,
+        fmin=fmin,
+        fmax=fmax,
+        mel_scale=mel_scale,
+        mel_norm=mel_norm,
+    )
+    return t(y, backend=backend)

auvux_dsp-0.1.0.dev3/python/auvux/dsp/_postproc.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Post-processing over feature matrices: delta (derivative) features and
+temporal pooling. These are fixed linear / reduction ops along the time axis,
+so they apply to any (..., t) feature and stay differentiable: numpy in ->
+numpy out, torch in -> torch out with grad."""
+from __future__ import annotations
+from functools import lru_cache
+from typing import Any
+import numpy as np
+from auvux.dsp import _dispatch
+_POOL_STATS = ("mean", "std", "var", "max", "min", "median")
+@lru_cache(maxsize=32)
+def _savgol_matrix(n: int, width: int, order: int) -> np.ndarray:
+    """The T x T Savitzky-Golay derivative operator (deriv == order, mode
+    'interp'), so savgol == data @ M.T. Built with numpy alone."""
+    half = width // 2
+    falling = float(np.prod([order - j for j in range(order)])) if order > 0 else 1.0
+    M = np.zeros((n, n), dtype=np.float64)
+    # Interior: one shared kernel, the order-th derivative at the window center.
+    xc = np.arange(width) - half
+    Ac = np.vander(xc, order + 1, increasing=True)
+    kernel = falling * np.linalg.pinv(Ac)[order]  # length width
+    for i in range(half, n - half):
+        M[i, i - half : i + half + 1] = kernel
+    # Edges: fit one polynomial to the first/last `width` points and read its
+    # order-th derivative at each edge position (scipy's 'interp' convention).
+    xe = np.arange(width)
+    pinv_e = np.linalg.pinv(np.vander(xe, order + 1, increasing=True))  # (order+1, width)
+    # powers[m] = m!/(m-order)!, the constant from differentiating x^m `order` times.
+    powers = np.array([float(np.prod([m - j for j in range(order)])) for m in range(order + 1)])
+    def edge_row(x: float) -> np.ndarray:
+        row = np.zeros(width, dtype=np.float64)
+        for m in range(order, len(powers)):
+            row += pinv_e[m] * powers[m] * (x ** (m - order))
+        return row
+    for i in range(half):  # left edge: window is the first `width` points, x = i
+        M[i, :width] = edge_row(float(i))
+    for i in range(n - half, n):  # right edge: window is the last `width` points
+        M[i, n - width :] = edge_row(float(i - (n - width)))
+    return M.astype(np.float32)
+def delta(data: Any, *, width: int = 9, order: int = 1, axis: int = -1) -> Any:
+    """Savitzky-Golay derivative features (librosa.feature.delta, mode
+    'interp'): a fixed linear filter of the requested order along `axis`.
+    order=1 is the delta, order=2 the delta-delta."""
+    if width < 3 or width % 2 == 0:
+        raise ValueError(f"width must be an odd integer >= 3, got {width}")
+    if order < 1 or order >= width:
+        raise ValueError(f"order must satisfy 1 <= order < width, got {order}")
+    n = data.shape[axis]
+    if width > n:
+        raise ValueError(f"width ({width}) exceeds the {n} frames along axis {axis}")
+    M = _savgol_matrix(int(n), int(width), int(order))  # (n, n), out = data @ M.T
+    if _dispatch.array_module(data) == "torch":
+        import torch
+        Mt = torch.from_numpy(M).to(device=data.device, dtype=data.dtype)
+        moved = data.movedim(axis, -1)
+        return torch.matmul(moved, Mt.transpose(0, 1)).movedim(-1, axis)
+    moved = np.moveaxis(data, axis, -1)
+    out = moved @ M.T
+    return np.moveaxis(out, -1, axis).astype(data.dtype, copy=False)
+def pool(data: Any, *, stat: str = "mean", axis: int = -1) -> Any:
+    """Reduce a feature matrix along the time `axis` to a single statistic
+    (mean / std / var / max / min / median). Differentiable for torch inputs
+    (max/min/median use the standard subgradient). std/var are population
+    (unbiased=False); median over an even-length axis follows each backend's
+    own convention (numpy averages the two central values, torch takes the
+    lower)."""
+    if stat not in _POOL_STATS:
+        raise ValueError(f"stat must be one of {_POOL_STATS}; got {stat!r}")
+    if _dispatch.array_module(data) == "torch":
+        import torch
+        if stat == "mean":
+            return data.mean(dim=axis)
+        if stat == "std":
+            return data.std(dim=axis, unbiased=False)
+        if stat == "var":
+            return data.var(dim=axis, unbiased=False)
+        if stat == "max":
+            return data.amax(dim=axis)
+        if stat == "min":
+            return data.amin(dim=axis)
+        return torch.median(data, dim=axis).values
+    if stat == "mean":
+        return np.mean(data, axis=axis)
+    if stat == "std":
+        return np.std(data, axis=axis)
+    if stat == "var":
+        return np.var(data, axis=axis)
+    if stat == "max":
+        return np.max(data, axis=axis)
+    if stat == "min":
+        return np.min(data, axis=axis)
+    return np.median(data, axis=axis)

auvux-dsp 0.1.0.dev2__tar.gz → 0.1.0.dev3__tar.gz

auvux-dsp 0.1.0.dev2tar.gz → 0.1.0.dev3tar.gz