auvux-dsp 0.1.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. auvux_dsp-0.1.0.dev0/.clang-format +6 -0
  2. auvux_dsp-0.1.0.dev0/.clangd +9 -0
  3. auvux_dsp-0.1.0.dev0/.env +1 -0
  4. auvux_dsp-0.1.0.dev0/.github/workflows/ci.yml +71 -0
  5. auvux_dsp-0.1.0.dev0/.github/workflows/wheels.yml +47 -0
  6. auvux_dsp-0.1.0.dev0/.gitignore +11 -0
  7. auvux_dsp-0.1.0.dev0/CMakeLists.txt +164 -0
  8. auvux_dsp-0.1.0.dev0/LICENSE +21 -0
  9. auvux_dsp-0.1.0.dev0/PKG-INFO +90 -0
  10. auvux_dsp-0.1.0.dev0/README.md +69 -0
  11. auvux_dsp-0.1.0.dev0/THIRD_PARTY_LICENSES +55 -0
  12. auvux_dsp-0.1.0.dev0/benchmarks/benchmark.py +656 -0
  13. auvux_dsp-0.1.0.dev0/cmake/embed_text.cmake +12 -0
  14. auvux_dsp-0.1.0.dev0/pyproject.toml +77 -0
  15. auvux_dsp-0.1.0.dev0/python/auvux/dsp/__init__.py +72 -0
  16. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_convert.py +54 -0
  17. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_dispatch.py +309 -0
  18. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_filters.py +98 -0
  19. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_functional.py +236 -0
  20. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_torch.py +194 -0
  21. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_transform.py +77 -0
  22. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_transforms.py +506 -0
  23. auvux_dsp-0.1.0.dev0/python/auvux/dsp/_version.py +1 -0
  24. auvux_dsp-0.1.0.dev0/python/auvux/dsp/py.typed +0 -0
  25. auvux_dsp-0.1.0.dev0/scripts/dev-build.ps1 +5 -0
  26. auvux_dsp-0.1.0.dev0/scripts/dev-build.sh +7 -0
  27. auvux_dsp-0.1.0.dev0/src/bindings/abi.hpp +10 -0
  28. auvux_dsp-0.1.0.dev0/src/bindings/bind_cqt.cpp +297 -0
  29. auvux_dsp-0.1.0.dev0/src/bindings/bind_fft.cpp +97 -0
  30. auvux_dsp-0.1.0.dev0/src/bindings/bind_mel.cpp +203 -0
  31. auvux_dsp-0.1.0.dev0/src/bindings/bind_stft.cpp +374 -0
  32. auvux_dsp-0.1.0.dev0/src/bindings/bind_util.cpp +23 -0
  33. auvux_dsp-0.1.0.dev0/src/bindings/module.cpp +17 -0
  34. auvux_dsp-0.1.0.dev0/src/bindings/pooled_array.hpp +39 -0
  35. auvux_dsp-0.1.0.dev0/src/common/dlpack_bridge.hpp +129 -0
  36. auvux_dsp-0.1.0.dev0/src/common/host_pool.cpp +99 -0
  37. auvux_dsp-0.1.0.dev0/src/common/host_pool.hpp +39 -0
  38. auvux_dsp-0.1.0.dev0/src/common/threadpool.cpp +155 -0
  39. auvux_dsp-0.1.0.dev0/src/common/threadpool.hpp +18 -0
  40. auvux_dsp-0.1.0.dev0/src/fft/fft.cpp +149 -0
  41. auvux_dsp-0.1.0.dev0/src/fft/fft.hpp +70 -0
  42. auvux_dsp-0.1.0.dev0/src/fft/fft_impl.hpp +31 -0
  43. auvux_dsp-0.1.0.dev0/src/fft/fft_pffft.cpp +132 -0
  44. auvux_dsp-0.1.0.dev0/src/fft/fft_vdsp.cpp +112 -0
  45. auvux_dsp-0.1.0.dev0/src/gpu/cqt_plan.cpp +93 -0
  46. auvux_dsp-0.1.0.dev0/src/gpu/cqt_plan.hpp +44 -0
  47. auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_common.cu +191 -0
  48. auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_common.cuh +307 -0
  49. auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_cqt.cu +551 -0
  50. auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_mel.cu +240 -0
  51. auvux_dsp-0.1.0.dev0/src/gpu/cuda/cuda_stft.cu +469 -0
  52. auvux_dsp-0.1.0.dev0/src/gpu/gpu.hpp +136 -0
  53. auvux_dsp-0.1.0.dev0/src/gpu/gpu_common.hpp +54 -0
  54. auvux_dsp-0.1.0.dev0/src/gpu/gpu_stub.cpp +20 -0
  55. auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/common.metal +191 -0
  56. auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/cqt.metal +235 -0
  57. auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/mel.metal +105 -0
  58. auvux_dsp-0.1.0.dev0/src/gpu/metal/kernels/stft.metal +240 -0
  59. auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_common.h +58 -0
  60. auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_common.mm +220 -0
  61. auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_cqt.mm +529 -0
  62. auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_mel.mm +214 -0
  63. auvux_dsp-0.1.0.dev0/src/gpu/metal/metal_stft.mm +391 -0
  64. auvux_dsp-0.1.0.dev0/src/ops/chroma/chroma.hpp +41 -0
  65. auvux_dsp-0.1.0.dev0/src/ops/chroma/chroma_cpu.cpp +94 -0
  66. auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt.hpp +82 -0
  67. auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt_cpu.cpp +299 -0
  68. auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt_filterbank.cpp +200 -0
  69. auvux_dsp-0.1.0.dev0/src/ops/cqt/cqt_filterbank.hpp +61 -0
  70. auvux_dsp-0.1.0.dev0/src/ops/frame.hpp +47 -0
  71. auvux_dsp-0.1.0.dev0/src/ops/istft/istft.hpp +45 -0
  72. auvux_dsp-0.1.0.dev0/src/ops/istft/istft_cpu.cpp +148 -0
  73. auvux_dsp-0.1.0.dev0/src/ops/mel/mel.hpp +80 -0
  74. auvux_dsp-0.1.0.dev0/src/ops/mel/mel_cpu.cpp +155 -0
  75. auvux_dsp-0.1.0.dev0/src/ops/mel/mel_filterbank.cpp +69 -0
  76. auvux_dsp-0.1.0.dev0/src/ops/ola.cpp +45 -0
  77. auvux_dsp-0.1.0.dev0/src/ops/ola.hpp +16 -0
  78. auvux_dsp-0.1.0.dev0/src/ops/stft/stft.hpp +43 -0
  79. auvux_dsp-0.1.0.dev0/src/ops/stft/stft_cpu.cpp +157 -0
  80. auvux_dsp-0.1.0.dev0/src/ops/types.hpp +29 -0
  81. auvux_dsp-0.1.0.dev0/src/ops/window.cpp +34 -0
  82. auvux_dsp-0.1.0.dev0/src/ops/window.hpp +23 -0
  83. auvux_dsp-0.1.0.dev0/src/third_party/dlpack.h +653 -0
  84. auvux_dsp-0.1.0.dev0/src/third_party/pffft.c +1909 -0
  85. auvux_dsp-0.1.0.dev0/src/third_party/pffft.h +181 -0
  86. auvux_dsp-0.1.0.dev0/tests/test_adjoint.py +134 -0
  87. auvux_dsp-0.1.0.dev0/tests/test_api.py +105 -0
  88. auvux_dsp-0.1.0.dev0/tests/test_chroma.py +54 -0
  89. auvux_dsp-0.1.0.dev0/tests/test_cqt.py +112 -0
  90. auvux_dsp-0.1.0.dev0/tests/test_fft.py +78 -0
  91. auvux_dsp-0.1.0.dev0/tests/test_gpu.py +211 -0
  92. auvux_dsp-0.1.0.dev0/tests/test_grad.py +205 -0
  93. auvux_dsp-0.1.0.dev0/tests/test_istft.py +65 -0
  94. auvux_dsp-0.1.0.dev0/tests/test_mel.py +132 -0
  95. auvux_dsp-0.1.0.dev0/tests/test_mfcc.py +61 -0
  96. auvux_dsp-0.1.0.dev0/tests/test_namespace.py +16 -0
  97. auvux_dsp-0.1.0.dev0/tests/test_resident.py +231 -0
  98. auvux_dsp-0.1.0.dev0/tests/test_stft.py +104 -0
  99. auvux_dsp-0.1.0.dev0/tests/test_vqt.py +82 -0
  100. auvux_dsp-0.1.0.dev0/tests/torch_refs.py +110 -0
@@ -0,0 +1,6 @@
1
+ BasedOnStyle: Google
2
+ IndentWidth: 4
3
+ ColumnLimit: 100
4
+ AccessModifierOffset: -4
5
+ DerivePointerAlignment: false
6
+ PointerAlignment: Left
@@ -0,0 +1,9 @@
1
+ CompileFlags:
2
+ Add:
3
+ - -std=c++17
4
+ - -I/Users/pkiers/develop/auvux/auvux-dsp/src
5
+ - -I/Users/pkiers/develop/auvux/auvux-dsp/src/third_party
6
+ - -DAUVUX_HAVE_PFFFT=1
7
+ - -DAUVUX_HAVE_VDSP=1
8
+ - -I/Users/pkiers/develop/auvux/auvux-dsp/.venv/lib/python3.14/site-packages/pybind11/include
9
+ - -I/opt/homebrew/opt/python@3.14/Frameworks/Python.framework/Versions/3.14/include/python3.14
@@ -0,0 +1 @@
1
+ UV_PUBLISH_TOKEN=pypi-AgEIcHlwaS5vcmcCJDlhY2I4NWJjLWRmNzAtNGM3Ny04ZTdkLWRjYTQ0NzJmZTAzOAACKlszLCJiZjAzM2E3ZC1kYjE0LTRjNDAtYTk0Yy0zODM2YjlmY2I3MzciXQAABiDdf8s9ffSxqSmVcO6vg14IVDYXvlzWlXvv2rSgTTFgqg
@@ -0,0 +1,71 @@
1
+ name: ci
2
+
3
+ # Manual-only until release readiness; restore the push/pull_request
4
+ # triggers below to re-enable.
5
+ # push:
6
+ # branches: [main]
7
+ # pull_request:
8
+ on:
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ test:
13
+ name: test (${{ matrix.os }})
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest, macos-latest, windows-latest]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+ - run: pip install scikit-build-core pybind11 numpy pytest
25
+ - run: pip install --no-build-isolation -v .
26
+ - run: pytest tests -q
27
+
28
+ # Editable installs are the least-traveled path for namespace packages;
29
+ # keep them covered.
30
+ editable:
31
+ runs-on: ubuntu-latest
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+ - uses: actions/setup-python@v5
35
+ with:
36
+ python-version: "3.12"
37
+ - run: pip install scikit-build-core pybind11 numpy pytest
38
+ - run: pip install --no-build-isolation -ve .
39
+ - run: pytest tests -q
40
+
41
+ # Compile gate for the CUDA backend (no GPU on hosted runners, so tests run
42
+ # on the CPU paths; gpu_available() is False without a driver).
43
+ cuda-build:
44
+ runs-on: ubuntu-latest
45
+ steps:
46
+ - uses: actions/checkout@v4
47
+ - uses: actions/setup-python@v5
48
+ with:
49
+ python-version: "3.12"
50
+ - uses: Jimver/cuda-toolkit@v0.2.35
51
+ with:
52
+ cuda: "12.9.1"
53
+ method: network
54
+ sub-packages: '["nvcc", "cudart"]'
55
+ - run: pip install scikit-build-core pybind11 numpy pytest
56
+ - run: pip install --no-build-isolation -v . -Ccmake.define.AUVUX_GPU=cuda
57
+ - run: pytest tests -q
58
+
59
+ lint:
60
+ runs-on: ubuntu-latest
61
+ steps:
62
+ - uses: actions/checkout@v4
63
+ - uses: actions/setup-python@v5
64
+ with:
65
+ python-version: "3.12"
66
+ - run: pipx run ruff check && pipx run ruff format --check
67
+ - run: pip install mypy numpy && mypy
68
+ - run: |
69
+ pip install clang-format
70
+ find src -name third_party -prune -o \( -name '*.cpp' -o -name '*.hpp' -o -name '*.cuh' -o -name '*.cu' -o -name '*.mm' -o -name '*.h' \) -print \
71
+ | xargs clang-format --dry-run --Werror
@@ -0,0 +1,47 @@
1
+ name: wheels
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+ workflow_dispatch:
7
+
8
+ # Build config lives in [tool.cibuildwheel] in pyproject.toml, so local
9
+ # `cibuildwheel` runs and CI use the exact same settings.
10
+
11
+ jobs:
12
+ wheels:
13
+ name: ${{ matrix.os }}
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest, macos-latest, windows-latest]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ # CUDA toolkit for the Windows wheels (nvcc + cudart only; Linux gets
22
+ # CUDA from the manylinux_cuda images set in pyproject.toml). Build-time
23
+ # only — cudart is linked statically into the extension.
24
+ - uses: Jimver/cuda-toolkit@v0.2.35
25
+ if: runner.os == 'Windows'
26
+ with:
27
+ cuda: "12.9.1"
28
+ method: network
29
+ sub-packages: '["nvcc", "cudart"]'
30
+ - uses: pypa/cibuildwheel@v2.21.3
31
+ - uses: actions/upload-artifact@v4
32
+ with:
33
+ name: wheels-${{ matrix.os }}
34
+ path: wheelhouse/*.whl
35
+
36
+ sdist:
37
+ runs-on: ubuntu-latest
38
+ steps:
39
+ - uses: actions/checkout@v4
40
+ - uses: actions/setup-python@v5
41
+ with:
42
+ python-version: "3.12"
43
+ - run: pipx run build --sdist
44
+ - uses: actions/upload-artifact@v4
45
+ with:
46
+ name: sdist
47
+ path: dist/*.tar.gz
@@ -0,0 +1,11 @@
1
+ build/
2
+ dist/
3
+ wheelhouse/
4
+ *.so
5
+ *.pyd
6
+ __pycache__/
7
+ .pytest_cache/
8
+ .venv/
9
+ .mypy_cache/
10
+ .ruff_cache/
11
+ .DS_Store
@@ -0,0 +1,164 @@
1
+ cmake_minimum_required(VERSION 3.18)
2
+ project(auvux_dsp LANGUAGES C CXX)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+ set(CMAKE_C_STANDARD 11)
7
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
8
+ if(NOT CMAKE_BUILD_TYPE)
9
+ set(CMAKE_BUILD_TYPE Release)
10
+ endif()
11
+
12
+ find_package(pybind11 CONFIG REQUIRED)
13
+ find_package(Threads REQUIRED)
14
+
15
+ # FFT: PFFFT is always compiled (vendored, guaranteed fallback); vDSP is added
16
+ # on Apple. AUVUX_FFT=pffft drops vDSP even on Apple.
17
+ set(AUVUX_FFT "auto" CACHE STRING "FFT backend: auto | vdsp | pffft")
18
+
19
+ # GPU: Metal on Apple, CUDA where a toolkit is found, stub otherwise.
20
+ # Override with -DAUVUX_GPU=metal|cuda|none; pick a CUDA toolkit with the
21
+ # CUDACXX environment variable.
22
+ set(AUVUX_GPU "auto" CACHE STRING "GPU backend: auto | metal | cuda | none")
23
+ if(AUVUX_GPU STREQUAL "auto")
24
+ if(APPLE)
25
+ set(AUVUX_GPU "metal")
26
+ else()
27
+ include(CheckLanguage)
28
+ check_language(CUDA)
29
+ if(CMAKE_CUDA_COMPILER)
30
+ set(AUVUX_GPU "cuda")
31
+ else()
32
+ set(AUVUX_GPU "none")
33
+ endif()
34
+ endif()
35
+ endif()
36
+ message(STATUS "auvux-dsp GPU backend: ${AUVUX_GPU}")
37
+
38
+ pybind11_add_module(_native
39
+ src/fft/fft.cpp
40
+ src/fft/fft_pffft.cpp
41
+ src/common/threadpool.cpp
42
+ src/common/host_pool.cpp
43
+ src/ops/window.cpp
44
+ src/ops/ola.cpp
45
+ src/ops/stft/stft_cpu.cpp
46
+ src/ops/istft/istft_cpu.cpp
47
+ src/ops/mel/mel_filterbank.cpp
48
+ src/ops/mel/mel_cpu.cpp
49
+ src/ops/cqt/cqt_filterbank.cpp
50
+ src/ops/cqt/cqt_cpu.cpp
51
+ src/ops/chroma/chroma_cpu.cpp
52
+ src/gpu/gpu_stub.cpp
53
+ src/gpu/cqt_plan.cpp
54
+ src/third_party/pffft.c
55
+ src/bindings/module.cpp
56
+ src/bindings/bind_fft.cpp
57
+ src/bindings/bind_stft.cpp
58
+ src/bindings/bind_mel.cpp
59
+ src/bindings/bind_cqt.cpp
60
+ src/bindings/bind_util.cpp)
61
+ target_include_directories(_native PRIVATE src src/third_party)
62
+ target_link_libraries(_native PRIVATE Threads::Threads)
63
+ target_compile_definitions(_native PRIVATE AUVUX_HAVE_PFFFT=1)
64
+
65
+ if(AUVUX_FFT STREQUAL "vdsp" AND NOT APPLE)
66
+ message(FATAL_ERROR "AUVUX_FFT=vdsp requires macOS")
67
+ endif()
68
+ if(AUVUX_FFT STREQUAL "vdsp" OR (AUVUX_FFT STREQUAL "auto" AND APPLE))
69
+ target_sources(_native PRIVATE src/fft/fft_vdsp.cpp)
70
+ target_compile_definitions(_native PRIVATE AUVUX_HAVE_VDSP=1)
71
+ find_library(ACCELERATE Accelerate REQUIRED)
72
+ target_link_libraries(_native PRIVATE ${ACCELERATE})
73
+ message(STATUS "auvux-dsp FFT backends: vdsp, pffft")
74
+ elseif(AUVUX_FFT STREQUAL "auto" OR AUVUX_FFT STREQUAL "pffft")
75
+ message(STATUS "auvux-dsp FFT backends: pffft")
76
+ else()
77
+ message(FATAL_ERROR "Unknown AUVUX_FFT='${AUVUX_FFT}' (use auto, vdsp, or pffft)")
78
+ endif()
79
+
80
+ if(AUVUX_GPU STREQUAL "metal")
81
+ if(NOT APPLE)
82
+ message(FATAL_ERROR "AUVUX_GPU=metal requires macOS")
83
+ endif()
84
+ enable_language(OBJCXX)
85
+ set(CMAKE_OBJCXX_STANDARD 17)
86
+ set(CMAKE_OBJCXX_STANDARD_REQUIRED ON)
87
+ # Kernel sources live in real .metal files; a generated header embeds them
88
+ # as strings for runtime compilation (no Metal toolchain needed at build).
89
+ set(auvux_metal_kernels
90
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/common.metal
91
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/stft.metal
92
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/mel.metal
93
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/metal/kernels/cqt.metal)
94
+ set(auvux_metal_gen ${CMAKE_CURRENT_BINARY_DIR}/generated/auvux_metal_kernels.h)
95
+ string(JOIN "," auvux_metal_kernels_arg ${auvux_metal_kernels})
96
+ add_custom_command(
97
+ OUTPUT ${auvux_metal_gen}
98
+ COMMAND ${CMAKE_COMMAND} -DOUT=${auvux_metal_gen} "-DINPUTS=${auvux_metal_kernels_arg}"
99
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/embed_text.cmake
100
+ DEPENDS ${auvux_metal_kernels} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/embed_text.cmake
101
+ COMMENT "Embedding Metal kernel sources"
102
+ VERBATIM)
103
+ set(auvux_metal_srcs
104
+ src/gpu/metal/metal_common.mm
105
+ src/gpu/metal/metal_stft.mm
106
+ src/gpu/metal/metal_mel.mm
107
+ src/gpu/metal/metal_cqt.mm)
108
+ target_sources(_native PRIVATE ${auvux_metal_srcs} ${auvux_metal_gen})
109
+ set_source_files_properties(${auvux_metal_srcs} PROPERTIES COMPILE_OPTIONS "-fobjc-arc")
110
+ target_include_directories(_native PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/generated)
111
+ target_compile_definitions(_native PRIVATE AUVUX_METAL=1)
112
+ find_library(METAL_FRAMEWORK Metal REQUIRED)
113
+ find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED)
114
+ target_link_libraries(_native PRIVATE ${METAL_FRAMEWORK} ${FOUNDATION_FRAMEWORK})
115
+ elseif(AUVUX_GPU STREQUAL "cuda")
116
+ # Detect a user-supplied arch list before enable_language fills in the
117
+ # toolkit default and masks the distinction.
118
+ if(DEFINED CMAKE_CUDA_ARCHITECTURES AND CMAKE_CUDA_ARCHITECTURES)
119
+ set(auvux_user_archs TRUE)
120
+ else()
121
+ set(auvux_user_archs FALSE)
122
+ endif()
123
+ enable_language(CUDA)
124
+ target_sources(_native PRIVATE
125
+ src/gpu/cuda/cuda_common.cu
126
+ src/gpu/cuda/cuda_stft.cu
127
+ src/gpu/cuda/cuda_mel.cu
128
+ src/gpu/cuda/cuda_cqt.cu)
129
+ target_compile_definitions(_native PRIVATE AUVUX_CUDA=1)
130
+ if(NOT auvux_user_archs)
131
+ # Distributable fatbin: SASS per supported generation plus PTX (the plain
132
+ # "90" entry embeds both) so future architectures JIT.
133
+ set(auvux_archs 75-real 80-real 86-real 89-real 90)
134
+ set(AUVUX_MIN_CC 75)
135
+ if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13)
136
+ list(PREPEND auvux_archs 60-real 70-real) # CUDA 13 dropped pre-Turing
137
+ set(AUVUX_MIN_CC 60)
138
+ endif()
139
+ if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
140
+ list(APPEND auvux_archs 100-real 120-real) # Blackwell
141
+ endif()
142
+ set(CMAKE_CUDA_ARCHITECTURES ${auvux_archs})
143
+ # Devices below the oldest shipped SASS get a clean CPU fallback.
144
+ target_compile_definitions(_native PRIVATE AUVUX_MIN_CC=${AUVUX_MIN_CC})
145
+ endif()
146
+ set_property(TARGET _native PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
147
+ # Static cudart: only the NVIDIA driver is needed at runtime; without one
148
+ # the wheel degrades to gpu_available() == False.
149
+ set_property(TARGET _native PROPERTY CUDA_RUNTIME_LIBRARY Static)
150
+ elseif(NOT AUVUX_GPU STREQUAL "none")
151
+ message(FATAL_ERROR "Unknown AUVUX_GPU='${AUVUX_GPU}' (use auto, metal, cuda, or none)")
152
+ endif()
153
+
154
+ if(MSVC)
155
+ target_compile_options(_native PRIVATE
156
+ $<$<COMPILE_LANGUAGE:C,CXX>:/O2;/fp:fast>
157
+ $<$<COMPILE_LANGUAGE:CUDA>:-O3;-Xcompiler=/fp:fast>)
158
+ else()
159
+ target_compile_options(_native PRIVATE
160
+ $<$<COMPILE_LANGUAGE:C,CXX>:-O3;-ffast-math;-funroll-loops>
161
+ $<$<COMPILE_LANGUAGE:CUDA>:-O3>)
162
+ endif()
163
+
164
+ install(TARGETS _native DESTINATION auvux/dsp)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Peter Kiers (Auvux)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: auvux-dsp
3
+ Version: 0.1.0.dev0
4
+ Summary: Fast differentiable audio transforms (STFT, mel, MFCC, CQT, chroma) on CPU and GPU
5
+ Keywords: audio,dsp,stft,mel,mfcc,cqt,chroma,spectrogram,gpu
6
+ Author-Email: Peter Kiers <pkiers.1983@gmail.com>
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ License-File: THIRD_PARTY_LICENSES
10
+ Classifier: Programming Language :: C++
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
13
+ Requires-Python: >=3.10
14
+ Requires-Dist: numpy>=1.22
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest; extra == "test"
17
+ Requires-Dist: librosa; extra == "test"
18
+ Requires-Dist: soxr; extra == "test"
19
+ Requires-Dist: torch; extra == "test"
20
+ Description-Content-Type: text/markdown
21
+
22
+ # auvux-dsp
23
+
24
+ Fast differentiable audio transforms (STFT, iSTFT, mel, MFCC, CQT/VQT, chroma)
25
+ with native CPU backends (vDSP on macOS, PFFFT elsewhere) and GPU support
26
+ (Metal, CUDA). Forward *and* backward passes run in native kernels; PyTorch
27
+ autograd plugs in when you pass a torch tensor, and torch is never required
28
+ otherwise.
29
+
30
+ ```python
31
+ import auvux.dsp as dsp
32
+
33
+ mel = dsp.MelSpectrogram(sr=44100, n_fft=2048, hop_length=512, n_mels=128)
34
+ S = mel(y) # numpy in -> numpy out
35
+ S = mel(y, backend="gpu") # Metal / CUDA kernels
36
+
37
+ y = torch.tensor(clip, requires_grad=True)
38
+ loss = mel(y, output="db").sum() # native forward
39
+ loss.backward() # native adjoint kernel, no torch recompute
40
+ ```
41
+
42
+ ```
43
+ pip install auvux-dsp --pre # preview release; drop --pre once 0.1.0 is out
44
+ ```
45
+
46
+ ## Performance
47
+
48
+ Benchmarks against librosa and torchaudio (forward passes and full training
49
+ steps, CPU / staged GPU / GPU-resident) live in `benchmarks/benchmark.py`:
50
+
51
+ ```
52
+ python benchmarks/benchmark.py
53
+ ```
54
+
55
+ *Results table to be published with the first release.*
56
+
57
+ ### Complex STFT layout
58
+
59
+ Complex spectra are returned as `(..., bins, frames)` backed by frame-major
60
+ memory — each frame's spectrum contiguous, the freq axis strided. This is the
61
+ same physical layout both references use (librosa allocates its stft output
62
+ `order='F'`; `torch.stft` returns a transposed view over frame-major memory),
63
+ so values *and* bytes match librosa, and the GPU-resident path returns a
64
+ tensor with the exact strides `torch.stft` produces. It is also what makes
65
+ the STFT fast: no backend ever materializes the bins-major transpose. `istft`
66
+ accepts both this layout (zero-copy) and compact C-order arrays. Float
67
+ outputs (power/db/mel/...) are ordinary C-contiguous arrays.
68
+
69
+ Status: under construction.
70
+ - CPU (vDSP/PFFFT): STFT, ISTFT, MelSpectrogram, MFCC, CQT, VQT, Chroma —
71
+ forward and native backward, librosa-parity tested, torch autograd built in.
72
+ - Metal: all of the above on GPU (n_fft <= 4096), forward + backward,
73
+ parity-tested against the CPU path. torch MPS tensors stay on the GPU end to
74
+ end (DLPack), and backend="auto" routes them there — no flags needed.
75
+ - CUDA: kernel-for-kernel twin of the Metal backend including the resident
76
+ paths; parity-tested on NVIDIA hardware (RTX 4090, CUDA 12.9), with pinned
77
+ double-buffered staging for the numpy-in/numpy-out GPU paths.
78
+ - Pending: iCQT.
79
+
80
+ ## Development
81
+
82
+ ```
83
+ pip install scikit-build-core pybind11 numpy pytest
84
+ ./scripts/dev-build.sh
85
+ pytest
86
+ ```
87
+
88
+ Note for packagers: `python/auvux/` is a PEP 420 namespace package — it must
89
+ never contain an `__init__.py`, or it will shadow sibling `auvux-*`
90
+ distributions.
@@ -0,0 +1,69 @@
1
+ # auvux-dsp
2
+
3
+ Fast differentiable audio transforms (STFT, iSTFT, mel, MFCC, CQT/VQT, chroma)
4
+ with native CPU backends (vDSP on macOS, PFFFT elsewhere) and GPU support
5
+ (Metal, CUDA). Forward *and* backward passes run in native kernels; PyTorch
6
+ autograd plugs in when you pass a torch tensor, and torch is never required
7
+ otherwise.
8
+
9
+ ```python
10
+ import auvux.dsp as dsp
11
+
12
+ mel = dsp.MelSpectrogram(sr=44100, n_fft=2048, hop_length=512, n_mels=128)
13
+ S = mel(y) # numpy in -> numpy out
14
+ S = mel(y, backend="gpu") # Metal / CUDA kernels
15
+
16
+ y = torch.tensor(clip, requires_grad=True)
17
+ loss = mel(y, output="db").sum() # native forward
18
+ loss.backward() # native adjoint kernel, no torch recompute
19
+ ```
20
+
21
+ ```
22
+ pip install auvux-dsp --pre # preview release; drop --pre once 0.1.0 is out
23
+ ```
24
+
25
+ ## Performance
26
+
27
+ Benchmarks against librosa and torchaudio (forward passes and full training
28
+ steps, CPU / staged GPU / GPU-resident) live in `benchmarks/benchmark.py`:
29
+
30
+ ```
31
+ python benchmarks/benchmark.py
32
+ ```
33
+
34
+ *Results table to be published with the first release.*
35
+
36
+ ### Complex STFT layout
37
+
38
+ Complex spectra are returned as `(..., bins, frames)` backed by frame-major
39
+ memory — each frame's spectrum contiguous, the freq axis strided. This is the
40
+ same physical layout both references use (librosa allocates its stft output
41
+ `order='F'`; `torch.stft` returns a transposed view over frame-major memory),
42
+ so values *and* bytes match librosa, and the GPU-resident path returns a
43
+ tensor with the exact strides `torch.stft` produces. It is also what makes
44
+ the STFT fast: no backend ever materializes the bins-major transpose. `istft`
45
+ accepts both this layout (zero-copy) and compact C-order arrays. Float
46
+ outputs (power/db/mel/...) are ordinary C-contiguous arrays.
47
+
48
+ Status: under construction.
49
+ - CPU (vDSP/PFFFT): STFT, ISTFT, MelSpectrogram, MFCC, CQT, VQT, Chroma —
50
+ forward and native backward, librosa-parity tested, torch autograd built in.
51
+ - Metal: all of the above on GPU (n_fft <= 4096), forward + backward,
52
+ parity-tested against the CPU path. torch MPS tensors stay on the GPU end to
53
+ end (DLPack), and backend="auto" routes them there — no flags needed.
54
+ - CUDA: kernel-for-kernel twin of the Metal backend including the resident
55
+ paths; parity-tested on NVIDIA hardware (RTX 4090, CUDA 12.9), with pinned
56
+ double-buffered staging for the numpy-in/numpy-out GPU paths.
57
+ - Pending: iCQT.
58
+
59
+ ## Development
60
+
61
+ ```
62
+ pip install scikit-build-core pybind11 numpy pytest
63
+ ./scripts/dev-build.sh
64
+ pytest
65
+ ```
66
+
67
+ Note for packagers: `python/auvux/` is a PEP 420 namespace package — it must
68
+ never contain an `__init__.py`, or it will shadow sibling `auvux-*`
69
+ distributions.
@@ -0,0 +1,55 @@
1
+ auvux-dsp bundles the following third-party components.
2
+
3
+ ================================================================================
4
+ PFFFT — src/third_party/pffft.{c,h}
5
+ Copyright (c) 2013 Julien Pommier
6
+ Based on FFTPACK by Dr Paul Swarztrauber (NCAR), under the FFTPACK license.
7
+ ================================================================================
8
+
9
+ Copyright (c) 2004 the University Corporation for Atmospheric Research
10
+ ("UCAR"). All rights reserved. Developed by NCAR's Computational and
11
+ Information Systems Laboratory, UCAR, www.cisl.ucar.edu.
12
+
13
+ Redistribution and use of the Software in source and binary forms, with or
14
+ without modification, is permitted provided that the following conditions are
15
+ met:
16
+
17
+ - Neither the names of NCAR's Computational and Information Systems Laboratory,
18
+ the University Corporation for Atmospheric Research, nor the names of its
19
+ sponsors or contributors may be used to endorse or promote products derived
20
+ from this Software without specific prior written permission.
21
+ - Redistributions of source code must retain the above copyright notices, this
22
+ list of conditions, and the disclaimer below.
23
+ - Redistributions in binary form must reproduce the above copyright notice,
24
+ this list of conditions, and the disclaimer below in the documentation and/or
25
+ other materials provided with the distribution.
26
+
27
+ THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28
+ IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30
+ CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT,
31
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY,
32
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
34
+
35
+ ================================================================================
36
+ DLPack — src/third_party/dlpack.h
37
+ Copyright (c) 2017 by DLPack Contributors
38
+ ================================================================================
39
+
40
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not
41
+ use this file except in compliance with the License. You may obtain a copy of
42
+ the License at
43
+
44
+ http://www.apache.org/licenses/LICENSE-2.0
45
+
46
+ Unless required by applicable law or agreed to in writing, software
47
+ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
48
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
49
+ License for the specific language governing permissions and limitations under
50
+ the License.
51
+
52
+ ================================================================================
53
+ pybind11 (build-time only; not redistributed in the wheel)
54
+ Copyright (c) 2016 Wenzel Jakob — BSD-3-Clause.
55
+ ================================================================================