auvux-dsp 0.1.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auvux_dsp-0.1.0.dev0/.clang-format +6 -0
- auvux_dsp-0.1.0.dev0/.clangd +9 -0
- auvux_dsp-0.1.0.dev0/.env +1 -0
- auvux_dsp-0.1.0.dev0/.github/workflows/ci.yml +71 -0
- auvux_dsp-0.1.0.dev0/.github/workflows/wheels.yml +47 -0
- auvux_dsp-0.1.0.dev0/.gitignore +11 -0
- auvux_dsp-0.1.0.dev0/CMakeLists.txt +164 -0
- auvux_dsp-0.1.0.dev0/LICENSE +21 -0
- auvux_dsp-0.1.0.dev0/PKG-INFO +90 -0
- auvux_dsp-0.1.0.dev0/README.md +69 -0
- auvux_dsp-0.1.0.dev0/THIRD_PARTY_LICENSES +55 -0
- auvux_dsp-0.1.0.dev0/benchmarks/benchmark.py +656 -0
- auvux_dsp-0.1.0.dev0/cmake/embed_text.cmake +12 -0
- auvux_dsp-0.1.0.dev0/pyproject.toml +77 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/__init__.py +72 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_convert.py +54 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_dispatch.py +309 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_filters.py +98 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_functional.py +236 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_torch.py +194 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_transform.py +77 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_transforms.py +506 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/_version.py +1 -0
- auvux_dsp-0.1.0.dev0/python/auvux/dsp/py.typed +0 -0
- auvux_dsp-0.1.0.dev0/scripts/dev-build.ps1 +5 -0
- auvux_dsp-0.1.0.dev0/scripts/dev-build.sh +7 -0
- auvux_dsp-0.1.0.dev0/src/bindings/abi.hpp +10 -0
- auvux_dsp-0.1.0.dev0/src/bindings/bind_cqt.cpp +297 -0
- auvux_dsp-0.1.0.dev0/src/bindings/bind_fft.cpp +97 -0
- auvux_dsp-0.1.0.dev0/src/bindings/bind_mel.cpp +203 -0
- auvux_dsp-0.1.0.dev0/src/bindings/bind_stft.cpp +374 -0
- auvux_dsp-0.1.0.dev0/src/bindings/bind_util.cpp +23 -0
- auvux_dsp-0.1.0.dev0/src/bindings/module.cpp +17 -0
- auvux_dsp-0.1.0.dev0/src/bindings/pooled_array.hpp +39 -0
- auvux_dsp-0.1.0.dev0/src/common/dlpack_bridge.hpp +129 -0
- auvux_dsp-0.1.0.dev0/src/common/host_pool.cpp +99 -0
- auvux_dsp-0.1.0.dev0/src/common/host_pool.hpp +39 -0
- auvux_dsp-0.1.0.dev0/src/common/threadpool.cpp +155 -0
- auvux_dsp-0.1.0.dev0/src/common/threadpool.hpp +18 -0
- auvux_dsp-0.1.0.dev0/src/fft/fft.cpp +149 -0
- auvux_dsp-0.1.0.dev0/src/fft/fft.hpp +70 -0
- auvux_dsp-0.1.0.dev0/src/fft/fft_impl.hpp +31 -0
- auvux_dsp-0.1.0.dev0/src/fft/fft_pffft.cpp +132 -0
- auvux_dsp-0.1.0.dev0/src/fft/fft_vdsp.cpp +112 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cqt_plan.cpp +93 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cqt_plan.hpp +44 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_common.cu +191 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_common.cuh +307 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_cqt.cu +551 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_mel.cu +240 -0
- auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_stft.cu +469 -0
- auvux_dsp-0.1.0.dev0/src/gpu/gpu.hpp +136 -0
- auvux_dsp-0.1.0.dev0/src/gpu/gpu_common.hpp +54 -0
- auvux_dsp-0.1.0.dev0/src/gpu/gpu_stub.cpp +20 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/common.metal +191 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/cqt.metal +235 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/mel.metal +105 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/stft.metal +240 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_common.h +58 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_common.mm +220 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_cqt.mm +529 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_mel.mm +214 -0
- auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_stft.mm +391 -0
- auvux_dsp-0.1.0.dev0/src/ops/chroma/chroma.hpp +41 -0
- auvux_dsp-0.1.0.dev0/src/ops/chroma/chroma_cpu.cpp +94 -0
- auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt.hpp +82 -0
- auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt_cpu.cpp +299 -0
- auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt_filterbank.cpp +200 -0
- auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt_filterbank.hpp +61 -0
- auvux_dsp-0.1.0.dev0/src/ops/frame.hpp +47 -0
- auvux_dsp-0.1.0.dev0/src/ops/istft/istft.hpp +45 -0
- auvux_dsp-0.1.0.dev0/src/ops/istft/istft_cpu.cpp +148 -0
- auvux_dsp-0.1.0.dev0/src/ops/mel/mel.hpp +80 -0
- auvux_dsp-0.1.0.dev0/src/ops/mel/mel_cpu.cpp +155 -0
- auvux_dsp-0.1.0.dev0/src/ops/mel/mel_filterbank.cpp +69 -0
- auvux_dsp-0.1.0.dev0/src/ops/ola.cpp +45 -0
- auvux_dsp-0.1.0.dev0/src/ops/ola.hpp +16 -0
- auvux_dsp-0.1.0.dev0/src/ops/stft/stft.hpp +43 -0
- auvux_dsp-0.1.0.dev0/src/ops/stft/stft_cpu.cpp +157 -0
- auvux_dsp-0.1.0.dev0/src/ops/types.hpp +29 -0
- auvux_dsp-0.1.0.dev0/src/ops/window.cpp +34 -0
- auvux_dsp-0.1.0.dev0/src/ops/window.hpp +23 -0
- auvux_dsp-0.1.0.dev0/src/third_party/dlpack.h +653 -0
- auvux_dsp-0.1.0.dev0/src/third_party/pffft.c +1909 -0
- auvux_dsp-0.1.0.dev0/src/third_party/pffft.h +181 -0
- auvux_dsp-0.1.0.dev0/tests/test_adjoint.py +134 -0
- auvux_dsp-0.1.0.dev0/tests/test_api.py +105 -0
- auvux_dsp-0.1.0.dev0/tests/test_chroma.py +54 -0
- auvux_dsp-0.1.0.dev0/tests/test_cqt.py +112 -0
- auvux_dsp-0.1.0.dev0/tests/test_fft.py +78 -0
- auvux_dsp-0.1.0.dev0/tests/test_gpu.py +211 -0
- auvux_dsp-0.1.0.dev0/tests/test_grad.py +205 -0
- auvux_dsp-0.1.0.dev0/tests/test_istft.py +65 -0
- auvux_dsp-0.1.0.dev0/tests/test_mel.py +132 -0
- auvux_dsp-0.1.0.dev0/tests/test_mfcc.py +61 -0
- auvux_dsp-0.1.0.dev0/tests/test_namespace.py +16 -0
- auvux_dsp-0.1.0.dev0/tests/test_resident.py +231 -0
- auvux_dsp-0.1.0.dev0/tests/test_stft.py +104 -0
- auvux_dsp-0.1.0.dev0/tests/test_vqt.py +82 -0
- auvux_dsp-0.1.0.dev0/tests/torch_refs.py +110 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
CompileFlags:
|
|
2
|
+
Add:
|
|
3
|
+
- -std=c++17
|
|
4
|
+
- -I/Users/pkiers/develop/auvux/auvux-dsp/src
|
|
5
|
+
- -I/Users/pkiers/develop/auvux/auvux-dsp/src/third_party
|
|
6
|
+
- -DAUVUX_HAVE_PFFFT=1
|
|
7
|
+
- -DAUVUX_HAVE_VDSP=1
|
|
8
|
+
- -I/Users/pkiers/develop/auvux/auvux-dsp/.venv/lib/python3.14/site-packages/pybind11/include
|
|
9
|
+
- -I/opt/homebrew/opt/python@3.14/Frameworks/Python.framework/Versions/3.14/include/python3.14
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
UV_PUBLISH_TOKEN=pypi-AgEIcHlwaS5vcmcCJDlhY2I4NWJjLWRmNzAtNGM3Ny04ZTdkLWRjYTQ0NzJmZTAzOAACKlszLCJiZjAzM2E3ZC1kYjE0LTRjNDAtYTk0Yy0zODM2YjlmY2I3MzciXQAABiDdf8s9ffSxqSmVcO6vg14IVDYXvlzWlXvv2rSgTTFgqg
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
|
|
3
|
+
# Manual-only until release readiness; restore the push/pull_request
|
|
4
|
+
# triggers below to re-enable.
|
|
5
|
+
# push:
|
|
6
|
+
# branches: [main]
|
|
7
|
+
# pull_request:
|
|
8
|
+
on:
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test:
|
|
13
|
+
name: test (${{ matrix.os }})
|
|
14
|
+
runs-on: ${{ matrix.os }}
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
- run: pip install scikit-build-core pybind11 numpy pytest
|
|
25
|
+
- run: pip install --no-build-isolation -v .
|
|
26
|
+
- run: pytest tests -q
|
|
27
|
+
|
|
28
|
+
# Editable installs are the least-traveled path for namespace packages;
|
|
29
|
+
# keep them covered.
|
|
30
|
+
editable:
|
|
31
|
+
runs-on: ubuntu-latest
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/checkout@v4
|
|
34
|
+
- uses: actions/setup-python@v5
|
|
35
|
+
with:
|
|
36
|
+
python-version: "3.12"
|
|
37
|
+
- run: pip install scikit-build-core pybind11 numpy pytest
|
|
38
|
+
- run: pip install --no-build-isolation -ve .
|
|
39
|
+
- run: pytest tests -q
|
|
40
|
+
|
|
41
|
+
# Compile gate for the CUDA backend (no GPU on hosted runners, so tests run
|
|
42
|
+
# on the CPU paths; gpu_available() is False without a driver).
|
|
43
|
+
cuda-build:
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
steps:
|
|
46
|
+
- uses: actions/checkout@v4
|
|
47
|
+
- uses: actions/setup-python@v5
|
|
48
|
+
with:
|
|
49
|
+
python-version: "3.12"
|
|
50
|
+
- uses: Jimver/cuda-toolkit@v0.2.35
|
|
51
|
+
with:
|
|
52
|
+
cuda: "12.9.1"
|
|
53
|
+
method: network
|
|
54
|
+
sub-packages: '["nvcc", "cudart"]'
|
|
55
|
+
- run: pip install scikit-build-core pybind11 numpy pytest
|
|
56
|
+
- run: pip install --no-build-isolation -v . -Ccmake.define.AUVUX_GPU=cuda
|
|
57
|
+
- run: pytest tests -q
|
|
58
|
+
|
|
59
|
+
lint:
|
|
60
|
+
runs-on: ubuntu-latest
|
|
61
|
+
steps:
|
|
62
|
+
- uses: actions/checkout@v4
|
|
63
|
+
- uses: actions/setup-python@v5
|
|
64
|
+
with:
|
|
65
|
+
python-version: "3.12"
|
|
66
|
+
- run: pipx run ruff check && pipx run ruff format --check
|
|
67
|
+
- run: pip install mypy numpy && mypy
|
|
68
|
+
- run: |
|
|
69
|
+
pip install clang-format
|
|
70
|
+
find src -name third_party -prune -o \( -name '*.cpp' -o -name '*.hpp' -o -name '*.cuh' -o -name '*.cu' -o -name '*.mm' -o -name '*.h' \) -print \
|
|
71
|
+
| xargs clang-format --dry-run --Werror
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
# Build config lives in [tool.cibuildwheel] in pyproject.toml, so local
|
|
9
|
+
# `cibuildwheel` runs and CI use the exact same settings.
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
wheels:
|
|
13
|
+
name: ${{ matrix.os }}
|
|
14
|
+
runs-on: ${{ matrix.os }}
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
# CUDA toolkit for the Windows wheels (nvcc + cudart only; Linux gets
|
|
22
|
+
# CUDA from the manylinux_cuda images set in pyproject.toml). Build-time
|
|
23
|
+
# only — cudart is linked statically into the extension.
|
|
24
|
+
- uses: Jimver/cuda-toolkit@v0.2.35
|
|
25
|
+
if: runner.os == 'Windows'
|
|
26
|
+
with:
|
|
27
|
+
cuda: "12.9.1"
|
|
28
|
+
method: network
|
|
29
|
+
sub-packages: '["nvcc", "cudart"]'
|
|
30
|
+
- uses: pypa/cibuildwheel@v2.21.3
|
|
31
|
+
- uses: actions/upload-artifact@v4
|
|
32
|
+
with:
|
|
33
|
+
name: wheels-${{ matrix.os }}
|
|
34
|
+
path: wheelhouse/*.whl
|
|
35
|
+
|
|
36
|
+
sdist:
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/checkout@v4
|
|
40
|
+
- uses: actions/setup-python@v5
|
|
41
|
+
with:
|
|
42
|
+
python-version: "3.12"
|
|
43
|
+
- run: pipx run build --sdist
|
|
44
|
+
- uses: actions/upload-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: sdist
|
|
47
|
+
path: dist/*.tar.gz
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
project(auvux_dsp LANGUAGES C CXX)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
set(CMAKE_C_STANDARD 11)
|
|
7
|
+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
|
8
|
+
if(NOT CMAKE_BUILD_TYPE)
|
|
9
|
+
set(CMAKE_BUILD_TYPE Release)
|
|
10
|
+
endif()
|
|
11
|
+
|
|
12
|
+
find_package(pybind11 CONFIG REQUIRED)
|
|
13
|
+
find_package(Threads REQUIRED)
|
|
14
|
+
|
|
15
|
+
# FFT: PFFFT is always compiled (vendored, guaranteed fallback); vDSP is added
|
|
16
|
+
# on Apple. AUVUX_FFT=pffft drops vDSP even on Apple.
|
|
17
|
+
set(AUVUX_FFT "auto" CACHE STRING "FFT backend: auto | vdsp | pffft")
|
|
18
|
+
|
|
19
|
+
# GPU: Metal on Apple, CUDA where a toolkit is found, stub otherwise.
|
|
20
|
+
# Override with -DAUVUX_GPU=metal|cuda|none; pick a CUDA toolkit with the
|
|
21
|
+
# CUDACXX environment variable.
|
|
22
|
+
set(AUVUX_GPU "auto" CACHE STRING "GPU backend: auto | metal | cuda | none")
|
|
23
|
+
if(AUVUX_GPU STREQUAL "auto")
|
|
24
|
+
if(APPLE)
|
|
25
|
+
set(AUVUX_GPU "metal")
|
|
26
|
+
else()
|
|
27
|
+
include(CheckLanguage)
|
|
28
|
+
check_language(CUDA)
|
|
29
|
+
if(CMAKE_CUDA_COMPILER)
|
|
30
|
+
set(AUVUX_GPU "cuda")
|
|
31
|
+
else()
|
|
32
|
+
set(AUVUX_GPU "none")
|
|
33
|
+
endif()
|
|
34
|
+
endif()
|
|
35
|
+
endif()
|
|
36
|
+
message(STATUS "auvux-dsp GPU backend: ${AUVUX_GPU}")
|
|
37
|
+
|
|
38
|
+
pybind11_add_module(_native
|
|
39
|
+
src/fft/fft.cpp
|
|
40
|
+
src/fft/fft_pffft.cpp
|
|
41
|
+
src/common/threadpool.cpp
|
|
42
|
+
src/common/host_pool.cpp
|
|
43
|
+
src/ops/window.cpp
|
|
44
|
+
src/ops/ola.cpp
|
|
45
|
+
src/ops/stft/stft_cpu.cpp
|
|
46
|
+
src/ops/istft/istft_cpu.cpp
|
|
47
|
+
src/ops/mel/mel_filterbank.cpp
|
|
48
|
+
src/ops/mel/mel_cpu.cpp
|
|
49
|
+
src/ops/cqt/cqt_filterbank.cpp
|
|
50
|
+
src/ops/cqt/cqt_cpu.cpp
|
|
51
|
+
src/ops/chroma/chroma_cpu.cpp
|
|
52
|
+
src/gpu/gpu_stub.cpp
|
|
53
|
+
src/gpu/cqt_plan.cpp
|
|
54
|
+
src/third_party/pffft.c
|
|
55
|
+
src/bindings/module.cpp
|
|
56
|
+
src/bindings/bind_fft.cpp
|
|
57
|
+
src/bindings/bind_stft.cpp
|
|
58
|
+
src/bindings/bind_mel.cpp
|
|
59
|
+
src/bindings/bind_cqt.cpp
|
|
60
|
+
src/bindings/bind_util.cpp)
|
|
61
|
+
target_include_directories(_native PRIVATE src src/third_party)
|
|
62
|
+
target_link_libraries(_native PRIVATE Threads::Threads)
|
|
63
|
+
target_compile_definitions(_native PRIVATE AUVUX_HAVE_PFFFT=1)
|
|
64
|
+
|
|
65
|
+
if(AUVUX_FFT STREQUAL "vdsp" AND NOT APPLE)
|
|
66
|
+
message(FATAL_ERROR "AUVUX_FFT=vdsp requires macOS")
|
|
67
|
+
endif()
|
|
68
|
+
if(AUVUX_FFT STREQUAL "vdsp" OR (AUVUX_FFT STREQUAL "auto" AND APPLE))
|
|
69
|
+
target_sources(_native PRIVATE src/fft/fft_vdsp.cpp)
|
|
70
|
+
target_compile_definitions(_native PRIVATE AUVUX_HAVE_VDSP=1)
|
|
71
|
+
find_library(ACCELERATE Accelerate REQUIRED)
|
|
72
|
+
target_link_libraries(_native PRIVATE ${ACCELERATE})
|
|
73
|
+
message(STATUS "auvux-dsp FFT backends: vdsp, pffft")
|
|
74
|
+
elseif(AUVUX_FFT STREQUAL "auto" OR AUVUX_FFT STREQUAL "pffft")
|
|
75
|
+
message(STATUS "auvux-dsp FFT backends: pffft")
|
|
76
|
+
else()
|
|
77
|
+
message(FATAL_ERROR "Unknown AUVUX_FFT='${AUVUX_FFT}' (use auto, vdsp, or pffft)")
|
|
78
|
+
endif()
|
|
79
|
+
|
|
80
|
+
if(AUVUX_GPU STREQUAL "metal")
|
|
81
|
+
if(NOT APPLE)
|
|
82
|
+
message(FATAL_ERROR "AUVUX_GPU=metal requires macOS")
|
|
83
|
+
endif()
|
|
84
|
+
enable_language(OBJCXX)
|
|
85
|
+
set(CMAKE_OBJCXX_STANDARD 17)
|
|
86
|
+
set(CMAKE_OBJCXX_STANDARD_REQUIRED ON)
|
|
87
|
+
# Kernel sources live in real .metal files; a generated header embeds them
|
|
88
|
+
# as strings for runtime compilation (no Metal toolchain needed at build).
|
|
89
|
+
set(auvux_metal_kernels
|
|
90
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/common.metal
|
|
91
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/stft.metal
|
|
92
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/mel.metal
|
|
93
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/cqt.metal)
|
|
94
|
+
set(auvux_metal_gen ${CMAKE_CURRENT_BINARY_DIR}/generated/auvux_metal_kernels.h)
|
|
95
|
+
string(JOIN "," auvux_metal_kernels_arg ${auvux_metal_kernels})
|
|
96
|
+
add_custom_command(
|
|
97
|
+
OUTPUT ${auvux_metal_gen}
|
|
98
|
+
COMMAND ${CMAKE_COMMAND} -DOUT=${auvux_metal_gen} "-DINPUTS=${auvux_metal_kernels_arg}"
|
|
99
|
+
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/embed_text.cmake
|
|
100
|
+
DEPENDS ${auvux_metal_kernels} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/embed_text.cmake
|
|
101
|
+
COMMENT "Embedding Metal kernel sources"
|
|
102
|
+
VERBATIM)
|
|
103
|
+
set(auvux_metal_srcs
|
|
104
|
+
src/gpu/metal/metal_common.mm
|
|
105
|
+
src/gpu/metal/metal_stft.mm
|
|
106
|
+
src/gpu/metal/metal_mel.mm
|
|
107
|
+
src/gpu/metal/metal_cqt.mm)
|
|
108
|
+
target_sources(_native PRIVATE ${auvux_metal_srcs} ${auvux_metal_gen})
|
|
109
|
+
set_source_files_properties(${auvux_metal_srcs} PROPERTIES COMPILE_OPTIONS "-fobjc-arc")
|
|
110
|
+
target_include_directories(_native PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/generated)
|
|
111
|
+
target_compile_definitions(_native PRIVATE AUVUX_METAL=1)
|
|
112
|
+
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
|
113
|
+
find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED)
|
|
114
|
+
target_link_libraries(_native PRIVATE ${METAL_FRAMEWORK} ${FOUNDATION_FRAMEWORK})
|
|
115
|
+
elseif(AUVUX_GPU STREQUAL "cuda")
|
|
116
|
+
# Detect a user-supplied arch list before enable_language fills in the
|
|
117
|
+
# toolkit default and masks the distinction.
|
|
118
|
+
if(DEFINED CMAKE_CUDA_ARCHITECTURES AND CMAKE_CUDA_ARCHITECTURES)
|
|
119
|
+
set(auvux_user_archs TRUE)
|
|
120
|
+
else()
|
|
121
|
+
set(auvux_user_archs FALSE)
|
|
122
|
+
endif()
|
|
123
|
+
enable_language(CUDA)
|
|
124
|
+
target_sources(_native PRIVATE
|
|
125
|
+
src/gpu/cuda/cuda_common.cu
|
|
126
|
+
src/gpu/cuda/cuda_stft.cu
|
|
127
|
+
src/gpu/cuda/cuda_mel.cu
|
|
128
|
+
src/gpu/cuda/cuda_cqt.cu)
|
|
129
|
+
target_compile_definitions(_native PRIVATE AUVUX_CUDA=1)
|
|
130
|
+
if(NOT auvux_user_archs)
|
|
131
|
+
# Distributable fatbin: SASS per supported generation plus PTX (the plain
|
|
132
|
+
# "90" entry embeds both) so future architectures JIT.
|
|
133
|
+
set(auvux_archs 75-real 80-real 86-real 89-real 90)
|
|
134
|
+
set(AUVUX_MIN_CC 75)
|
|
135
|
+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13)
|
|
136
|
+
list(PREPEND auvux_archs 60-real 70-real) # CUDA 13 dropped pre-Turing
|
|
137
|
+
set(AUVUX_MIN_CC 60)
|
|
138
|
+
endif()
|
|
139
|
+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
|
|
140
|
+
list(APPEND auvux_archs 100-real 120-real) # Blackwell
|
|
141
|
+
endif()
|
|
142
|
+
set(CMAKE_CUDA_ARCHITECTURES ${auvux_archs})
|
|
143
|
+
# Devices below the oldest shipped SASS get a clean CPU fallback.
|
|
144
|
+
target_compile_definitions(_native PRIVATE AUVUX_MIN_CC=${AUVUX_MIN_CC})
|
|
145
|
+
endif()
|
|
146
|
+
set_property(TARGET _native PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
|
|
147
|
+
# Static cudart: only the NVIDIA driver is needed at runtime; without one
|
|
148
|
+
# the wheel degrades to gpu_available() == False.
|
|
149
|
+
set_property(TARGET _native PROPERTY CUDA_RUNTIME_LIBRARY Static)
|
|
150
|
+
elseif(NOT AUVUX_GPU STREQUAL "none")
|
|
151
|
+
message(FATAL_ERROR "Unknown AUVUX_GPU='${AUVUX_GPU}' (use auto, metal, cuda, or none)")
|
|
152
|
+
endif()
|
|
153
|
+
|
|
154
|
+
if(MSVC)
|
|
155
|
+
target_compile_options(_native PRIVATE
|
|
156
|
+
$<$<COMPILE_LANGUAGE:C,CXX>:/O2;/fp:fast>
|
|
157
|
+
$<$<COMPILE_LANGUAGE:CUDA>:-O3;-Xcompiler=/fp:fast>)
|
|
158
|
+
else()
|
|
159
|
+
target_compile_options(_native PRIVATE
|
|
160
|
+
$<$<COMPILE_LANGUAGE:C,CXX>:-O3;-ffast-math;-funroll-loops>
|
|
161
|
+
$<$<COMPILE_LANGUAGE:CUDA>:-O3>)
|
|
162
|
+
endif()
|
|
163
|
+
|
|
164
|
+
install(TARGETS _native DESTINATION auvux/dsp)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Peter Kiers (Auvux)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: auvux-dsp
|
|
3
|
+
Version: 0.1.0.dev0
|
|
4
|
+
Summary: Fast differentiable audio transforms (STFT, mel, MFCC, CQT, chroma) on CPU and GPU
|
|
5
|
+
Keywords: audio,dsp,stft,mel,mfcc,cqt,chroma,spectrogram,gpu
|
|
6
|
+
Author-Email: Peter Kiers <pkiers.1983@gmail.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
License-File: THIRD_PARTY_LICENSES
|
|
10
|
+
Classifier: Programming Language :: C++
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Requires-Dist: numpy>=1.22
|
|
15
|
+
Provides-Extra: test
|
|
16
|
+
Requires-Dist: pytest; extra == "test"
|
|
17
|
+
Requires-Dist: librosa; extra == "test"
|
|
18
|
+
Requires-Dist: soxr; extra == "test"
|
|
19
|
+
Requires-Dist: torch; extra == "test"
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# auvux-dsp
|
|
23
|
+
|
|
24
|
+
Fast differentiable audio transforms (STFT, iSTFT, mel, MFCC, CQT/VQT, chroma)
|
|
25
|
+
with native CPU backends (vDSP on macOS, PFFFT elsewhere) and GPU support
|
|
26
|
+
(Metal, CUDA). Forward *and* backward passes run in native kernels; PyTorch
|
|
27
|
+
autograd plugs in when you pass a torch tensor, and torch is never required
|
|
28
|
+
otherwise.
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import auvux.dsp as dsp
|
|
32
|
+
|
|
33
|
+
mel = dsp.MelSpectrogram(sr=44100, n_fft=2048, hop_length=512, n_mels=128)
|
|
34
|
+
S = mel(y) # numpy in -> numpy out
|
|
35
|
+
S = mel(y, backend="gpu") # Metal / CUDA kernels
|
|
36
|
+
|
|
37
|
+
y = torch.tensor(clip, requires_grad=True)
|
|
38
|
+
loss = mel(y, output="db").sum() # native forward
|
|
39
|
+
loss.backward() # native adjoint kernel, no torch recompute
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
pip install auvux-dsp --pre # preview release; drop --pre once 0.1.0 is out
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Performance
|
|
47
|
+
|
|
48
|
+
Benchmarks against librosa and torchaudio (forward passes and full training
|
|
49
|
+
steps, CPU / staged GPU / GPU-resident) live in `benchmarks/benchmark.py`:
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
python benchmarks/benchmark.py
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
*Results table to be published with the first release.*
|
|
56
|
+
|
|
57
|
+
### Complex STFT layout
|
|
58
|
+
|
|
59
|
+
Complex spectra are returned as `(..., bins, frames)` backed by frame-major
|
|
60
|
+
memory — each frame's spectrum contiguous, the freq axis strided. This is the
|
|
61
|
+
same physical layout both references use (librosa allocates its stft output
|
|
62
|
+
`order='F'`; `torch.stft` returns a transposed view over frame-major memory),
|
|
63
|
+
so values *and* bytes match librosa, and the GPU-resident path returns a
|
|
64
|
+
tensor with the exact strides `torch.stft` produces. It is also what makes
|
|
65
|
+
the STFT fast: no backend ever materializes the bins-major transpose. `istft`
|
|
66
|
+
accepts both this layout (zero-copy) and compact C-order arrays. Float
|
|
67
|
+
outputs (power/db/mel/...) are ordinary C-contiguous arrays.
|
|
68
|
+
|
|
69
|
+
Status: under construction.
|
|
70
|
+
- CPU (vDSP/PFFFT): STFT, ISTFT, MelSpectrogram, MFCC, CQT, VQT, Chroma —
|
|
71
|
+
forward and native backward, librosa-parity tested, torch autograd built in.
|
|
72
|
+
- Metal: all of the above on GPU (n_fft <= 4096), forward + backward,
|
|
73
|
+
parity-tested against the CPU path. torch MPS tensors stay on the GPU end to
|
|
74
|
+
end (DLPack), and backend="auto" routes them there — no flags needed.
|
|
75
|
+
- CUDA: kernel-for-kernel twin of the Metal backend including the resident
|
|
76
|
+
paths; parity-tested on NVIDIA hardware (RTX 4090, CUDA 12.9), with pinned
|
|
77
|
+
double-buffered staging for the numpy-in/numpy-out GPU paths.
|
|
78
|
+
- Pending: iCQT.
|
|
79
|
+
|
|
80
|
+
## Development
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
pip install scikit-build-core pybind11 numpy pytest
|
|
84
|
+
./scripts/dev-build.sh
|
|
85
|
+
pytest
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Note for packagers: `python/auvux/` is a PEP 420 namespace package — it must
|
|
89
|
+
never contain an `__init__.py`, or it will shadow sibling `auvux-*`
|
|
90
|
+
distributions.
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# auvux-dsp
|
|
2
|
+
|
|
3
|
+
Fast differentiable audio transforms (STFT, iSTFT, mel, MFCC, CQT/VQT, chroma)
|
|
4
|
+
with native CPU backends (vDSP on macOS, PFFFT elsewhere) and GPU support
|
|
5
|
+
(Metal, CUDA). Forward *and* backward passes run in native kernels; PyTorch
|
|
6
|
+
autograd plugs in when you pass a torch tensor, and torch is never required
|
|
7
|
+
otherwise.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import auvux.dsp as dsp
|
|
11
|
+
|
|
12
|
+
mel = dsp.MelSpectrogram(sr=44100, n_fft=2048, hop_length=512, n_mels=128)
|
|
13
|
+
S = mel(y) # numpy in -> numpy out
|
|
14
|
+
S = mel(y, backend="gpu") # Metal / CUDA kernels
|
|
15
|
+
|
|
16
|
+
y = torch.tensor(clip, requires_grad=True)
|
|
17
|
+
loss = mel(y, output="db").sum() # native forward
|
|
18
|
+
loss.backward() # native adjoint kernel, no torch recompute
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
pip install auvux-dsp --pre # preview release; drop --pre once 0.1.0 is out
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Performance
|
|
26
|
+
|
|
27
|
+
Benchmarks against librosa and torchaudio (forward passes and full training
|
|
28
|
+
steps, CPU / staged GPU / GPU-resident) live in `benchmarks/benchmark.py`:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
python benchmarks/benchmark.py
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
*Results table to be published with the first release.*
|
|
35
|
+
|
|
36
|
+
### Complex STFT layout
|
|
37
|
+
|
|
38
|
+
Complex spectra are returned as `(..., bins, frames)` backed by frame-major
|
|
39
|
+
memory — each frame's spectrum contiguous, the freq axis strided. This is the
|
|
40
|
+
same physical layout both references use (librosa allocates its stft output
|
|
41
|
+
`order='F'`; `torch.stft` returns a transposed view over frame-major memory),
|
|
42
|
+
so values *and* bytes match librosa, and the GPU-resident path returns a
|
|
43
|
+
tensor with the exact strides `torch.stft` produces. It is also what makes
|
|
44
|
+
the STFT fast: no backend ever materializes the bins-major transpose. `istft`
|
|
45
|
+
accepts both this layout (zero-copy) and compact C-order arrays. Float
|
|
46
|
+
outputs (power/db/mel/...) are ordinary C-contiguous arrays.
|
|
47
|
+
|
|
48
|
+
Status: under construction.
|
|
49
|
+
- CPU (vDSP/PFFFT): STFT, ISTFT, MelSpectrogram, MFCC, CQT, VQT, Chroma —
|
|
50
|
+
forward and native backward, librosa-parity tested, torch autograd built in.
|
|
51
|
+
- Metal: all of the above on GPU (n_fft <= 4096), forward + backward,
|
|
52
|
+
parity-tested against the CPU path. torch MPS tensors stay on the GPU end to
|
|
53
|
+
end (DLPack), and backend="auto" routes them there — no flags needed.
|
|
54
|
+
- CUDA: kernel-for-kernel twin of the Metal backend including the resident
|
|
55
|
+
paths; parity-tested on NVIDIA hardware (RTX 4090, CUDA 12.9), with pinned
|
|
56
|
+
double-buffered staging for the numpy-in/numpy-out GPU paths.
|
|
57
|
+
- Pending: iCQT.
|
|
58
|
+
|
|
59
|
+
## Development
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
pip install scikit-build-core pybind11 numpy pytest
|
|
63
|
+
./scripts/dev-build.sh
|
|
64
|
+
pytest
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Note for packagers: `python/auvux/` is a PEP 420 namespace package — it must
|
|
68
|
+
never contain an `__init__.py`, or it will shadow sibling `auvux-*`
|
|
69
|
+
distributions.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
auvux-dsp bundles the following third-party components.
|
|
2
|
+
|
|
3
|
+
================================================================================
|
|
4
|
+
PFFFT — src/third_party/pffft.{c,h}
|
|
5
|
+
Copyright (c) 2013 Julien Pommier
|
|
6
|
+
Based on FFTPACK by Dr Paul Swarztrauber (NCAR), under the FFTPACK license.
|
|
7
|
+
================================================================================
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2004 the University Corporation for Atmospheric Research
|
|
10
|
+
("UCAR"). All rights reserved. Developed by NCAR's Computational and
|
|
11
|
+
Information Systems Laboratory, UCAR, www.cisl.ucar.edu.
|
|
12
|
+
|
|
13
|
+
Redistribution and use of the Software in source and binary forms, with or
|
|
14
|
+
without modification, is permitted provided that the following conditions are
|
|
15
|
+
met:
|
|
16
|
+
|
|
17
|
+
- Neither the names of NCAR's Computational and Information Systems Laboratory,
|
|
18
|
+
the University Corporation for Atmospheric Research, nor the names of its
|
|
19
|
+
sponsors or contributors may be used to endorse or promote products derived
|
|
20
|
+
from this Software without specific prior written permission.
|
|
21
|
+
- Redistributions of source code must retain the above copyright notices, this
|
|
22
|
+
list of conditions, and the disclaimer below.
|
|
23
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
|
24
|
+
this list of conditions, and the disclaimer below in the documentation and/or
|
|
25
|
+
other materials provided with the distribution.
|
|
26
|
+
|
|
27
|
+
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
28
|
+
IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
29
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
30
|
+
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT,
|
|
31
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY,
|
|
32
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
33
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
|
|
34
|
+
|
|
35
|
+
================================================================================
|
|
36
|
+
DLPack — src/third_party/dlpack.h
|
|
37
|
+
Copyright (c) 2017 by DLPack Contributors
|
|
38
|
+
================================================================================
|
|
39
|
+
|
|
40
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
41
|
+
use this file except in compliance with the License. You may obtain a copy of
|
|
42
|
+
the License at
|
|
43
|
+
|
|
44
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
45
|
+
|
|
46
|
+
Unless required by applicable law or agreed to in writing, software
|
|
47
|
+
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
48
|
+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
49
|
+
License for the specific language governing permissions and limitations under
|
|
50
|
+
the License.
|
|
51
|
+
|
|
52
|
+
================================================================================
|
|
53
|
+
pybind11 (build-time only; not redistributed in the wheel)
|
|
54
|
+
Copyright (c) 2016 Wenzel Jakob — BSD-3-Clause.
|
|
55
|
+
================================================================================
|