fht-cpu 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fht_cpu-1.0.0/.github/workflows/test.yml +42 -0
- fht_cpu-1.0.0/.github/workflows/wheels.yml +69 -0
- fht_cpu-1.0.0/.gitignore +11 -0
- fht_cpu-1.0.0/.vscode/settings.json +4 -0
- fht_cpu-1.0.0/CMakeLists.txt +239 -0
- fht_cpu-1.0.0/CMakePresets.json +72 -0
- fht_cpu-1.0.0/LICENSE +37 -0
- fht_cpu-1.0.0/PKG-INFO +133 -0
- fht_cpu-1.0.0/README.md +117 -0
- fht_cpu-1.0.0/build_tools/repair_macos_wheel.sh +62 -0
- fht_cpu-1.0.0/cmake/CPM.cmake +1363 -0
- fht_cpu-1.0.0/cmake/fhtConfig.cmake.in +17 -0
- fht_cpu-1.0.0/data/optimization_results_v7.json +104 -0
- fht_cpu-1.0.0/include/fht/fht.h +143 -0
- fht_cpu-1.0.0/include/fht/fht_config.h +47 -0
- fht_cpu-1.0.0/include/fht/neon/fht_neon.h +21462 -0
- fht_cpu-1.0.0/include/fht/x86/fast_copy.h +74 -0
- fht_cpu-1.0.0/include/fht/x86/fht.h +4 -0
- fht_cpu-1.0.0/include/fht/x86/fht_avx.c +19671 -0
- fht_cpu-1.0.0/include/fht/x86/fht_impl.h +42 -0
- fht_cpu-1.0.0/include/fht/x86/fht_sse.c +26215 -0
- fht_cpu-1.0.0/include/fht/x86/fht_x86.h +38 -0
- fht_cpu-1.0.0/pixi.lock +3790 -0
- fht_cpu-1.0.0/pixi.toml +26 -0
- fht_cpu-1.0.0/pyproject.toml +47 -0
- fht_cpu-1.0.0/python/CMakeLists.txt +52 -0
- fht_cpu-1.0.0/python/benchmarks.py +77 -0
- fht_cpu-1.0.0/python/fht_cpu/__init__.py +253 -0
- fht_cpu-1.0.0/python/pyproject.toml +15 -0
- fht_cpu-1.0.0/python/src/fht_bindings.cpp +220 -0
- fht_cpu-1.0.0/python/tests/test_fht.py +213 -0
- fht_cpu-1.0.0/scripts/fht_neon_v7_sample.h +16114 -0
- fht_cpu-1.0.0/scripts/gen_neon_v7.py +1137 -0
- fht_cpu-1.0.0/scripts/gen_x86.py +697 -0
- fht_cpu-1.0.0/scripts/measurements/Makefile +23 -0
- fht_cpu-1.0.0/scripts/measurements/run_double.cpp +27 -0
- fht_cpu-1.0.0/scripts/measurements/run_float.cpp +27 -0
- fht_cpu-1.0.0/scripts/optimize_v7_grid.py +908 -0
- fht_cpu-1.0.0/tests/fht_reference.h +111 -0
- fht_cpu-1.0.0/tests/test_basic.cpp +125 -0
- fht_cpu-1.0.0/tests/test_fht.cpp +489 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
strategy:
|
|
15
|
+
fail-fast: false
|
|
16
|
+
matrix:
|
|
17
|
+
include:
|
|
18
|
+
- os: ubuntu-24.04 # x86_64
|
|
19
|
+
arch: x86_64
|
|
20
|
+
- os: ubuntu-24.04-arm # arm64 (GitHub native ARM runner)
|
|
21
|
+
arch: arm64
|
|
22
|
+
runs-on: ${{ matrix.os }}
|
|
23
|
+
name: test (${{ matrix.arch }})
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v5
|
|
27
|
+
|
|
28
|
+
- uses: prefix-dev/setup-pixi@v0.8.8
|
|
29
|
+
with:
|
|
30
|
+
locked: false
|
|
31
|
+
|
|
32
|
+
- name: Build
|
|
33
|
+
run: pixi run build
|
|
34
|
+
|
|
35
|
+
- name: Run Python tests
|
|
36
|
+
run: pixi run test
|
|
37
|
+
|
|
38
|
+
- name: Build and run C++ tests
|
|
39
|
+
run: |
|
|
40
|
+
pixi run cmake -B build -DFHT_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
|
|
41
|
+
pixi run cmake --build build -j
|
|
42
|
+
pixi run ctest --test-dir build --output-on-failure
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
name: Build wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
# Manual trigger from GitHub UI
|
|
5
|
+
workflow_dispatch:
|
|
6
|
+
# Auto on version tags
|
|
7
|
+
push:
|
|
8
|
+
tags: ["v*"]
|
|
9
|
+
|
|
10
|
+
env:
|
|
11
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
build:
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
include:
|
|
19
|
+
- os: ubuntu-24.04
|
|
20
|
+
cibw_archs: x86_64
|
|
21
|
+
- os: ubuntu-24.04-arm
|
|
22
|
+
cibw_archs: aarch64
|
|
23
|
+
- os: macos-15
|
|
24
|
+
cibw_archs: arm64
|
|
25
|
+
runs-on: ${{ matrix.os }}
|
|
26
|
+
name: wheels (${{ matrix.os }}, ${{ matrix.cibw_archs }})
|
|
27
|
+
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v5
|
|
30
|
+
|
|
31
|
+
- uses: pypa/cibuildwheel@v3.4
|
|
32
|
+
env:
|
|
33
|
+
CIBW_ARCHS: ${{ matrix.cibw_archs }}
|
|
34
|
+
|
|
35
|
+
- uses: actions/upload-artifact@v5
|
|
36
|
+
with:
|
|
37
|
+
name: wheels-${{ matrix.os }}-${{ matrix.cibw_archs }}
|
|
38
|
+
path: wheelhouse/*.whl
|
|
39
|
+
|
|
40
|
+
# Build the source distribution
|
|
41
|
+
sdist:
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/checkout@v5
|
|
45
|
+
- run: pipx run build --sdist
|
|
46
|
+
- uses: actions/upload-artifact@v5
|
|
47
|
+
with:
|
|
48
|
+
name: sdist
|
|
49
|
+
path: dist/*.tar.gz
|
|
50
|
+
|
|
51
|
+
# TODO: Add PyPI publishing once trusted publishing is configured.
|
|
52
|
+
# Steps: go to pypi.org → your project → Publishing → add GitHub as a
|
|
53
|
+
# trusted publisher (org/repo + workflow file "wheels.yml").
|
|
54
|
+
# Then uncomment:
|
|
55
|
+
#
|
|
56
|
+
# publish:
|
|
57
|
+
# if: startsWith(github.ref, 'refs/tags/v')
|
|
58
|
+
# needs: [build, sdist]
|
|
59
|
+
# runs-on: ubuntu-latest
|
|
60
|
+
# permissions:
|
|
61
|
+
# id-token: write
|
|
62
|
+
# steps:
|
|
63
|
+
# - uses: actions/download-artifact@v5
|
|
64
|
+
# with:
|
|
65
|
+
# path: dist
|
|
66
|
+
# merge-multiple: true
|
|
67
|
+
# - uses: pypa/gh-action-pypi-publish@release/v1
|
|
68
|
+
# with:
|
|
69
|
+
# packages-dir: dist/
|
fht_cpu-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.14)
|
|
2
|
+
|
|
3
|
+
project(fht
|
|
4
|
+
VERSION 1.0.0
|
|
5
|
+
DESCRIPTION "Fast Hadamard Transform library with SSE/AVX and NEON support"
|
|
6
|
+
LANGUAGES CXX C
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
include(GNUInstallDirs)
|
|
10
|
+
include(CMakePackageConfigHelpers)
|
|
11
|
+
|
|
12
|
+
# Options
|
|
13
|
+
option(FHT_OPTIMIZE_FOR_HOST "Re-optimize NEON kernels for this host (ARM only)" OFF)
|
|
14
|
+
option(FHT_BUILD_TESTS "Build test programs" OFF)
|
|
15
|
+
# Generate list from 2 to 30
|
|
16
|
+
set(FHT_OPTIMIZATION_SIZES "")
|
|
17
|
+
foreach(i RANGE 2 30)
|
|
18
|
+
list(APPEND FHT_OPTIMIZATION_SIZES ${i})
|
|
19
|
+
endforeach()
|
|
20
|
+
set(FHT_OPTIMIZATION_SIZES ${FHT_OPTIMIZATION_SIZES} CACHE STRING
|
|
21
|
+
"Sizes (log_n) to optimize when FHT_OPTIMIZE_FOR_HOST is ON")
|
|
22
|
+
|
|
23
|
+
# Header-only interface library
|
|
24
|
+
add_library(fht INTERFACE)
|
|
25
|
+
add_library(fht::fht ALIAS fht)
|
|
26
|
+
|
|
27
|
+
target_include_directories(fht INTERFACE
|
|
28
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
|
29
|
+
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
target_compile_features(fht INTERFACE cxx_std_11)
|
|
33
|
+
|
|
34
|
+
# Platform detection and configuration
|
|
35
|
+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64)$")
|
|
36
|
+
message(STATUS "FHT: ARM64 platform detected, using NEON implementation")
|
|
37
|
+
target_compile_definitions(fht INTERFACE FHT_PLATFORM_ARM=1)
|
|
38
|
+
|
|
39
|
+
# Optional re-optimization at build time
|
|
40
|
+
if(FHT_OPTIMIZE_FOR_HOST)
|
|
41
|
+
find_package(Python3 REQUIRED COMPONENTS Interpreter)
|
|
42
|
+
|
|
43
|
+
set(FHT_OPT_HEADER "${CMAKE_CURRENT_BINARY_DIR}/fht_neon_optimized.h")
|
|
44
|
+
|
|
45
|
+
# Convert list to space-separated string for the command
|
|
46
|
+
string(REPLACE ";" " " FHT_SIZES_STR "${FHT_OPTIMIZATION_SIZES}")
|
|
47
|
+
|
|
48
|
+
add_custom_command(
|
|
49
|
+
OUTPUT ${FHT_OPT_HEADER}
|
|
50
|
+
COMMAND ${Python3_EXECUTABLE}
|
|
51
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/optimize_v7_grid.py
|
|
52
|
+
--sizes ${FHT_OPTIMIZATION_SIZES}
|
|
53
|
+
--output ${FHT_OPT_HEADER}
|
|
54
|
+
--no-compare
|
|
55
|
+
DEPENDS
|
|
56
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/optimize_v7_grid.py
|
|
57
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_neon_v7.py
|
|
58
|
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts
|
|
59
|
+
COMMENT "Optimizing FHT NEON kernels for this host (sizes: ${FHT_SIZES_STR})..."
|
|
60
|
+
VERBATIM
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
add_custom_target(fht_optimize ALL DEPENDS ${FHT_OPT_HEADER})
|
|
64
|
+
|
|
65
|
+
target_compile_definitions(fht INTERFACE
|
|
66
|
+
FHT_USE_OPTIMIZED_HEADER=1
|
|
67
|
+
FHT_OPTIMIZED_HEADER_PATH="${FHT_OPT_HEADER}"
|
|
68
|
+
)
|
|
69
|
+
target_include_directories(fht INTERFACE
|
|
70
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
message(STATUS "FHT: Re-optimization enabled. Header will be generated during build.")
|
|
74
|
+
endif()
|
|
75
|
+
|
|
76
|
+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64|AMD64)$")
|
|
77
|
+
message(STATUS "FHT: x86_64 platform detected, using SSE/AVX implementation")
|
|
78
|
+
target_compile_definitions(fht INTERFACE FHT_PLATFORM_X86=1)
|
|
79
|
+
|
|
80
|
+
# Check for AVX support
|
|
81
|
+
include(CheckCXXCompilerFlag)
|
|
82
|
+
check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
|
|
83
|
+
if(COMPILER_SUPPORTS_AVX)
|
|
84
|
+
message(STATUS "FHT: AVX support detected, recommend adding -mavx to compile flags")
|
|
85
|
+
endif()
|
|
86
|
+
|
|
87
|
+
# Optional re-optimization at build time (x86)
|
|
88
|
+
if(FHT_OPTIMIZE_FOR_HOST)
|
|
89
|
+
find_package(Python3 REQUIRED COMPONENTS Interpreter)
|
|
90
|
+
|
|
91
|
+
# Fetch Google Benchmark via CPM (required for optimization benchmarks)
|
|
92
|
+
include(cmake/CPM.cmake)
|
|
93
|
+
CPMAddPackage(
|
|
94
|
+
NAME benchmark
|
|
95
|
+
GITHUB_REPOSITORY google/benchmark
|
|
96
|
+
GIT_TAG v1.8.3
|
|
97
|
+
OPTIONS
|
|
98
|
+
"BENCHMARK_ENABLE_TESTING OFF"
|
|
99
|
+
"BENCHMARK_ENABLE_INSTALL OFF"
|
|
100
|
+
"BENCHMARK_ENABLE_GTEST_TESTS OFF"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
set(FHT_X86_OPT_HEADER "${CMAKE_CURRENT_BINARY_DIR}/fht_x86_optimized.h")
|
|
104
|
+
set(FHT_X86_HOF "${CMAKE_CURRENT_BINARY_DIR}/hall_of_fame_x86.txt")
|
|
105
|
+
|
|
106
|
+
add_custom_command(
|
|
107
|
+
OUTPUT ${FHT_X86_OPT_HEADER}
|
|
108
|
+
COMMAND ${CMAKE_COMMAND} -E env
|
|
109
|
+
"BENCHMARK_INCLUDE=$<TARGET_PROPERTY:benchmark,INTERFACE_INCLUDE_DIRECTORIES>"
|
|
110
|
+
"BENCHMARK_LIB=$<TARGET_FILE_DIR:benchmark>"
|
|
111
|
+
${Python3_EXECUTABLE}
|
|
112
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_x86.py
|
|
113
|
+
--output ${FHT_X86_OPT_HEADER}
|
|
114
|
+
--hall-of-fame ${FHT_X86_HOF}
|
|
115
|
+
--measurements-dir ${CMAKE_CURRENT_SOURCE_DIR}/scripts/measurements
|
|
116
|
+
DEPENDS
|
|
117
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_x86.py
|
|
118
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/measurements/Makefile
|
|
119
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/measurements/run_float.cpp
|
|
120
|
+
${CMAKE_CURRENT_SOURCE_DIR}/scripts/measurements/run_double.cpp
|
|
121
|
+
benchmark
|
|
122
|
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts
|
|
123
|
+
COMMENT "Optimizing FHT x86 kernels for this host (this may take a while)..."
|
|
124
|
+
VERBATIM
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
add_custom_target(fht_optimize_x86 DEPENDS ${FHT_X86_OPT_HEADER})
|
|
128
|
+
|
|
129
|
+
# Note: Unlike ARM, x86 optimization is NOT automatic (ALL target)
|
|
130
|
+
# Run manually with: cmake --build . --target fht_optimize_x86
|
|
131
|
+
message(STATUS "FHT: x86 optimization available. Run 'cmake --build . --target fht_optimize_x86' to generate optimized code.")
|
|
132
|
+
endif()
|
|
133
|
+
|
|
134
|
+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i386|i686)$")
|
|
135
|
+
message(STATUS "FHT: x86 (32-bit) platform detected, using SSE implementation")
|
|
136
|
+
target_compile_definitions(fht INTERFACE FHT_PLATFORM_X86=1)
|
|
137
|
+
|
|
138
|
+
else()
|
|
139
|
+
message(WARNING "FHT: Unknown platform '${CMAKE_SYSTEM_PROCESSOR}', build may fail")
|
|
140
|
+
endif()
|
|
141
|
+
|
|
142
|
+
# Installation
|
|
143
|
+
install(TARGETS fht
|
|
144
|
+
EXPORT fhtTargets
|
|
145
|
+
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
install(DIRECTORY include/fht
|
|
149
|
+
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
install(EXPORT fhtTargets
|
|
153
|
+
FILE fhtTargets.cmake
|
|
154
|
+
NAMESPACE fht::
|
|
155
|
+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fht
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Package configuration
|
|
159
|
+
configure_package_config_file(
|
|
160
|
+
${CMAKE_CURRENT_SOURCE_DIR}/cmake/fhtConfig.cmake.in
|
|
161
|
+
${CMAKE_CURRENT_BINARY_DIR}/fhtConfig.cmake
|
|
162
|
+
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fht
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
write_basic_package_version_file(
|
|
166
|
+
${CMAKE_CURRENT_BINARY_DIR}/fhtConfigVersion.cmake
|
|
167
|
+
VERSION ${PROJECT_VERSION}
|
|
168
|
+
COMPATIBILITY SameMajorVersion
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
install(FILES
|
|
172
|
+
${CMAKE_CURRENT_BINARY_DIR}/fhtConfig.cmake
|
|
173
|
+
${CMAKE_CURRENT_BINARY_DIR}/fhtConfigVersion.cmake
|
|
174
|
+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fht
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Tests
|
|
178
|
+
if(FHT_BUILD_TESTS)
|
|
179
|
+
enable_testing()
|
|
180
|
+
|
|
181
|
+
# Use CPM to fetch GoogleTest
|
|
182
|
+
include(cmake/CPM.cmake)
|
|
183
|
+
|
|
184
|
+
CPMAddPackage(
|
|
185
|
+
NAME googletest
|
|
186
|
+
GITHUB_REPOSITORY google/googletest
|
|
187
|
+
GIT_TAG v1.14.0
|
|
188
|
+
OPTIONS
|
|
189
|
+
"INSTALL_GTEST OFF"
|
|
190
|
+
"gtest_force_shared_crt ON"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Platform-specific compile options
|
|
194
|
+
set(FHT_TEST_COMPILE_OPTIONS "")
|
|
195
|
+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64)$")
|
|
196
|
+
list(APPEND FHT_TEST_COMPILE_OPTIONS -march=armv8-a+simd)
|
|
197
|
+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64|AMD64)$")
|
|
198
|
+
if(COMPILER_SUPPORTS_AVX)
|
|
199
|
+
list(APPEND FHT_TEST_COMPILE_OPTIONS -mavx)
|
|
200
|
+
endif()
|
|
201
|
+
endif()
|
|
202
|
+
|
|
203
|
+
# Basic test (legacy, quick sanity check)
|
|
204
|
+
add_executable(fht_test_basic tests/test_basic.cpp)
|
|
205
|
+
target_link_libraries(fht_test_basic PRIVATE fht::fht)
|
|
206
|
+
target_compile_options(fht_test_basic PRIVATE ${FHT_TEST_COMPILE_OPTIONS})
|
|
207
|
+
add_test(NAME fht_basic COMMAND fht_test_basic)
|
|
208
|
+
|
|
209
|
+
# Comprehensive GoogleTest suite
|
|
210
|
+
add_executable(fht_test tests/test_fht.cpp)
|
|
211
|
+
target_link_libraries(fht_test PRIVATE fht::fht GTest::gtest_main)
|
|
212
|
+
target_include_directories(fht_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests)
|
|
213
|
+
target_compile_options(fht_test PRIVATE ${FHT_TEST_COMPILE_OPTIONS})
|
|
214
|
+
target_compile_features(fht_test PRIVATE cxx_std_17)
|
|
215
|
+
|
|
216
|
+
# If optimizing for host, tests must wait for the optimized header
|
|
217
|
+
if(TARGET fht_optimize)
|
|
218
|
+
add_dependencies(fht_test_basic fht_optimize)
|
|
219
|
+
add_dependencies(fht_test fht_optimize)
|
|
220
|
+
endif()
|
|
221
|
+
|
|
222
|
+
# Allow customizing max test size via cmake
|
|
223
|
+
if(DEFINED FHT_TEST_MAX_LOG_N)
|
|
224
|
+
target_compile_definitions(fht_test PRIVATE FHT_TEST_MAX_LOG_N=${FHT_TEST_MAX_LOG_N})
|
|
225
|
+
endif()
|
|
226
|
+
|
|
227
|
+
include(GoogleTest)
|
|
228
|
+
gtest_discover_tests(fht_test)
|
|
229
|
+
endif()
|
|
230
|
+
|
|
231
|
+
# Print summary
|
|
232
|
+
message(STATUS "")
|
|
233
|
+
message(STATUS "FHT Configuration Summary:")
|
|
234
|
+
message(STATUS " Version: ${PROJECT_VERSION}")
|
|
235
|
+
message(STATUS " Platform: ${CMAKE_SYSTEM_PROCESSOR}")
|
|
236
|
+
message(STATUS " Install prefix: ${CMAKE_INSTALL_PREFIX}")
|
|
237
|
+
message(STATUS " Optimize for host: ${FHT_OPTIMIZE_FOR_HOST}")
|
|
238
|
+
message(STATUS " Build tests: ${FHT_BUILD_TESTS}")
|
|
239
|
+
message(STATUS "")
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 6,
|
|
3
|
+
"cmakeMinimumRequired": {
|
|
4
|
+
"major": 3,
|
|
5
|
+
"minor": 21,
|
|
6
|
+
"patch": 0
|
|
7
|
+
},
|
|
8
|
+
"configurePresets": [
|
|
9
|
+
{
|
|
10
|
+
"name": "default",
|
|
11
|
+
"displayName": "Default (Homebrew Clang)",
|
|
12
|
+
"description": "Default build with Homebrew LLVM/Clang",
|
|
13
|
+
"binaryDir": "${sourceDir}/build",
|
|
14
|
+
"cacheVariables": {
|
|
15
|
+
"CMAKE_C_COMPILER": "/opt/homebrew/opt/llvm/bin/clang",
|
|
16
|
+
"CMAKE_CXX_COMPILER": "/opt/homebrew/opt/llvm/bin/clang++",
|
|
17
|
+
"CMAKE_BUILD_TYPE": "Release"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"name": "dev",
|
|
22
|
+
"displayName": "Development",
|
|
23
|
+
"description": "Development build with tests",
|
|
24
|
+
"inherits": "default",
|
|
25
|
+
"cacheVariables": {
|
|
26
|
+
"FHT_BUILD_TESTS": "ON",
|
|
27
|
+
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"name": "dev-optimize",
|
|
32
|
+
"displayName": "Development with Optimization",
|
|
33
|
+
"description": "Development build with tests and host optimization",
|
|
34
|
+
"inherits": "dev",
|
|
35
|
+
"cacheVariables": {
|
|
36
|
+
"FHT_OPTIMIZE_FOR_HOST": "ON"
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"name": "appleclang",
|
|
41
|
+
"displayName": "AppleClang",
|
|
42
|
+
"description": "Build with system AppleClang",
|
|
43
|
+
"binaryDir": "${sourceDir}/build",
|
|
44
|
+
"cacheVariables": {
|
|
45
|
+
"CMAKE_BUILD_TYPE": "Release"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"buildPresets": [
|
|
50
|
+
{
|
|
51
|
+
"name": "default",
|
|
52
|
+
"configurePreset": "default"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"name": "dev",
|
|
56
|
+
"configurePreset": "dev"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"name": "dev-optimize",
|
|
60
|
+
"configurePreset": "dev-optimize"
|
|
61
|
+
}
|
|
62
|
+
],
|
|
63
|
+
"testPresets": [
|
|
64
|
+
{
|
|
65
|
+
"name": "default",
|
|
66
|
+
"configurePreset": "dev",
|
|
67
|
+
"output": {
|
|
68
|
+
"outputOnFailure": true
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
]
|
|
72
|
+
}
|
fht_cpu-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Fast Hadamard Transform Library
|
|
2
|
+
================================
|
|
3
|
+
|
|
4
|
+
================================================================================
|
|
5
|
+
FHT Library - Overall License
|
|
6
|
+
================================================================================
|
|
7
|
+
|
|
8
|
+
BSD 3-Clause License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026, HPNALGS Group EPFL / PSI
|
|
11
|
+
|
|
12
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
13
|
+
|
|
14
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
17
|
+
|
|
18
|
+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
21
|
+
|
|
22
|
+
================================================================================
|
|
23
|
+
FFHT - x86 SSE/AVX Implementation (derived code)
|
|
24
|
+
================================================================================
|
|
25
|
+
|
|
26
|
+
The x86 SSE/AVX implementation in this library (include/fht/x86) is derived from FFHT
|
|
27
|
+
(https://github.com/FALCONN-LIB/FFHT).
|
|
28
|
+
|
|
29
|
+
The MIT License (MIT)
|
|
30
|
+
|
|
31
|
+
Copyright (c) 2015 Alexandr Andoni, Piotr Indyk, Thijs Laarhoven, Ilya Razenshteyn, Ludwig Schmidt
|
|
32
|
+
|
|
33
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
34
|
+
|
|
35
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
36
|
+
|
|
37
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
fht_cpu-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: fht_cpu
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Fast Hadamard Transform with SIMD acceleration
|
|
5
|
+
License: MIT
|
|
6
|
+
Classifier: Development Status :: 4 - Beta
|
|
7
|
+
Classifier: Intended Audience :: Science/Research
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: C++
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering
|
|
12
|
+
Project-URL: Homepage, https://github.com/yourorg/fht
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Requires-Dist: numpy
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# FHT - Fast Hadamard Transform
|
|
18
|
+
|
|
19
|
+
High-performance Fast Hadamard Transform library with SIMD-optimized implementations for x86 (SSE/AVX) and ARM (NEON), and Python bindings via nanobind.
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install fht_cpu
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
From source:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
git clone https://github.com/grigori-hpnalgs-lab/fht_cpu.git
|
|
31
|
+
cd fht
|
|
32
|
+
pip install .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Python Usage
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import numpy as np
|
|
39
|
+
from fht import fht
|
|
40
|
+
|
|
41
|
+
# 1D transform (in-place)
|
|
42
|
+
x = np.random.randn(1024).astype(np.float32)
|
|
43
|
+
fht(x)
|
|
44
|
+
|
|
45
|
+
# Allocating mode (returns new array, original unchanged)
|
|
46
|
+
y = fht(x, inplace=False)
|
|
47
|
+
|
|
48
|
+
# Preallocated output
|
|
49
|
+
out = np.empty_like(x)
|
|
50
|
+
fht(x, out=out)
|
|
51
|
+
|
|
52
|
+
# 2D — each row transformed in parallel via OpenMP
|
|
53
|
+
X = np.random.randn(1000, 2**16).astype(np.float32)
|
|
54
|
+
fht(X, axis=-1)
|
|
55
|
+
|
|
56
|
+
# Complex arrays (decomposes into real/imag, transforms separately)
|
|
57
|
+
z = np.random.randn(512).astype(np.complex128)
|
|
58
|
+
fht(z)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Supported dtypes: `float32`, `float64`, `complex64`, `complex128`.
|
|
62
|
+
|
|
63
|
+
The transform axis must have a power-of-2 length. For 2D arrays, rows (or columns) are processed in parallel with OpenMP — set thread count via `OMP_NUM_THREADS=N`.
|
|
64
|
+
|
|
65
|
+
## C/C++ Usage
|
|
66
|
+
|
|
67
|
+
Header-only. Just include and compile:
|
|
68
|
+
|
|
69
|
+
```cpp
|
|
70
|
+
#include <fht/fht.h>
|
|
71
|
+
|
|
72
|
+
float buf[1024];
|
|
73
|
+
fht_float(buf, 10); // log2(1024) = 10
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### C API
|
|
77
|
+
|
|
78
|
+
```c
|
|
79
|
+
int fht_float(float *buf, int log_n);
|
|
80
|
+
int fht_double(double *buf, int log_n);
|
|
81
|
+
int fht_float_oop(float *in, float *out, int log_n);
|
|
82
|
+
int fht_double_oop(double *in, double *out, int log_n);
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Returns 0 on success, 1 on invalid `log_n` (valid range: 0-30).
|
|
86
|
+
|
|
87
|
+
### CMake Integration
|
|
88
|
+
|
|
89
|
+
```cmake
|
|
90
|
+
# Via CPM (recommended)
|
|
91
|
+
CPMAddPackage("gh:grigori-hpnalgs-lab/fht@1.0.0")
|
|
92
|
+
target_link_libraries(myapp PRIVATE fht::fht)
|
|
93
|
+
|
|
94
|
+
# Or as subdirectory
|
|
95
|
+
add_subdirectory(fht)
|
|
96
|
+
target_link_libraries(myapp PRIVATE fht::fht)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Compile with `-mavx` on x86 for best performance.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
## Platform Support
|
|
103
|
+
|
|
104
|
+
| Platform | Float | Double |
|
|
105
|
+
|----------|-------|--------|
|
|
106
|
+
| x86_64 + AVX | yes | yes |
|
|
107
|
+
| x86_64 + SSE | yes | yes |
|
|
108
|
+
| ARM64 (NEON) | yes | yes |
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
## Re-optimizing the code for your CPU
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
cmake -B build -DFHT_OPTIMIZE_FOR_HOST=ON
|
|
115
|
+
cmake --build build
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Limitations
|
|
119
|
+
|
|
120
|
+
Known issues:
|
|
121
|
+
|
|
122
|
+
- [ ] **`inplace=False` / `out=` does copy + in-place** — we will add an out of place version soon, it just requires some changes to the codegen files.
|
|
123
|
+
- [ ] **Complex number support is not optimal** — we will provide separate kernels for complex, currently we deinterleave into real and imaginary parts, apply the transform and then recombine.
|
|
124
|
+
|
|
125
|
+
## Acknowledgments
|
|
126
|
+
|
|
127
|
+
The x86 AVX/SSE implementation is based on [FFHT](https://github.com/FALCONN-LIB/FFHT) from the [FALCONN](https://github.com/FALCONN-LIB/FALCONN) project by Alexandr Andoni, Piotr Indyk, Thijs Laarhoven, Ilya Razenshteyn, and Ludwig Schmidt. The original code was copied and integrated with minor modifications.
|
|
128
|
+
|
|
129
|
+
The ARM NEON implementation was written from scratch with auto-tuned code generation.
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
See [LICENSE](LICENSE).
|