kernelforge 0.1.2__tar.gz → 0.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kernelforge-0.1.13/.github/workflows/ci.yml +60 -0
- kernelforge-0.1.13/.github/workflows/release.yaml +108 -0
- {kernelforge-0.1.2 → kernelforge-0.1.13}/.gitignore +2 -0
- kernelforge-0.1.13/CMakeLists.txt +138 -0
- kernelforge-0.1.13/Makefile +12 -0
- kernelforge-0.1.13/PKG-INFO +188 -0
- kernelforge-0.1.13/README.md +171 -0
- kernelforge-0.1.13/environments/environment-dev-macos.yaml +33 -0
- kernelforge-0.1.13/environments/environment-dev.yaml +30 -0
- kernelforge-0.1.13/examples/desc_hess-gauss.ipynb +984 -0
- kernelforge-0.1.13/examples/desc_hess.ipynb +808 -0
- kernelforge-0.1.13/examples/ethanol.sdf +22 -0
- kernelforge-0.1.13/examples/fchl_example.ipynb +655 -0
- kernelforge-0.1.13/examples/fchl_hess.ipynb +419 -0
- kernelforge-0.1.13/examples/qm7_example.ipynb +1058 -0
- kernelforge-0.1.13/examples/regression_ethanol-Copy1.ipynb +359 -0
- kernelforge-0.1.13/examples/regression_forces.ipynb +602 -0
- kernelforge-0.1.13/examples/sgdml.ipynb +1004 -0
- {kernelforge-0.1.2 → kernelforge-0.1.13}/pyproject.toml +11 -12
- kernelforge-0.1.13/python/kernelforge/__init__.py +0 -0
- kernelforge-0.1.13/src/aligned_alloc64.hpp +31 -0
- {kernelforge-0.1.2 → kernelforge-0.1.13}/src/bindings.cpp +27 -2
- kernelforge-0.1.13/src/bindings_cholesky.cpp +178 -0
- kernelforge-0.1.13/src/bindings_fchl19.cpp +912 -0
- kernelforge-0.1.13/src/bindings_kernels.cpp +305 -0
- kernelforge-0.1.13/src/cholesky.cpp +232 -0
- kernelforge-0.1.13/src/cholesky.hpp +22 -0
- kernelforge-0.1.13/src/fchl19_representation.cpp +1984 -0
- kernelforge-0.1.13/src/fchl19_representation.hpp +173 -0
- kernelforge-0.1.13/src/invdist.cpp +98 -0
- kernelforge-0.1.13/src/invdist.hpp +36 -0
- kernelforge-0.1.13/src/invdist_bindings.cpp +96 -0
- kernelforge-0.1.13/src/kernels.cpp +1287 -0
- kernelforge-0.1.13/src/kernels.hpp +55 -0
- kernelforge-0.1.13/tests/test_cholesky.py +59 -0
- kernelforge-0.1.13/tests/test_fchl19.py +227 -0
- kernelforge-0.1.13/tests/test_fchl19_gradient.py +161 -0
- kernelforge-0.1.13/tests/test_hessian.py +123 -0
- kernelforge-0.1.13/tests/test_invdist.py +85 -0
- kernelforge-0.1.13/tests/test_jacobian.py +155 -0
- kernelforge-0.1.13/tests/test_kernels.py +113 -0
- kernelforge-0.1.13/tests/test_rfp.py +98 -0
- kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp310-cp310-macosx_15_0_arm64.whl +0 -0
- kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp311-cp311-macosx_15_0_arm64.whl +0 -0
- kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp312-cp312-macosx_15_0_arm64.whl +0 -0
- kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp313-cp313-macosx_15_0_arm64.whl +0 -0
- kernelforge-0.1.2/.github/workflows/ci.yml +0 -56
- kernelforge-0.1.2/.github/workflows/release.yaml +0 -74
- kernelforge-0.1.2/CMakeLists.txt +0 -51
- kernelforge-0.1.2/Makefile +0 -5
- kernelforge-0.1.2/PKG-INFO +0 -41
- kernelforge-0.1.2/README.md +0 -24
- kernelforge-0.1.2/python/kernelforge/__init__.py +0 -2
- kernelforge-0.1.2/src/kernel.f90 +0 -108
- kernelforge-0.1.2/tests/test_basic.py +0 -99
- kernelforge-0.1.2/wheelhouse/kernelforge-0.1.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl +0 -0
- {kernelforge-0.1.2 → kernelforge-0.1.13}/LICENSE +0 -0
- {kernelforge-0.1.2 → kernelforge-0.1.13}/pytest.ini +0 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
name: pytest (${{ matrix.os }} / py${{ matrix.python-version }})
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-24.04, macos-15]
|
|
16
|
+
python-version: ["3.14"]
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Install OpenMP (macOS only)
|
|
22
|
+
if: runner.os == 'macOS'
|
|
23
|
+
run: brew install libomp llvm
|
|
24
|
+
|
|
25
|
+
- name: Install OpenBLAS (Linux only)
|
|
26
|
+
if: runner.os == 'Linux'
|
|
27
|
+
run: sudo apt-get install -y libopenblas-dev
|
|
28
|
+
|
|
29
|
+
- name: Set up uv
|
|
30
|
+
uses: astral-sh/setup-uv@v7
|
|
31
|
+
|
|
32
|
+
- name: Build & install (macOS only)
|
|
33
|
+
if: runner.os == 'macOS'
|
|
34
|
+
env:
|
|
35
|
+
OMP_NUM_THREADS: "1"
|
|
36
|
+
OPENBLAS_NUM_THREADS: "1"
|
|
37
|
+
CMAKE_PREFIX_PATH: /opt/homebrew/opt/libomp
|
|
38
|
+
run: |
|
|
39
|
+
uv sync --dev --all-extras
|
|
40
|
+
uv pip install scikit-build-core pybind11
|
|
41
|
+
CMAKE_ARGS="-DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang \
|
|
42
|
+
-DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++" \
|
|
43
|
+
uv pip install -e .[test] --no-build-isolation
|
|
44
|
+
|
|
45
|
+
- name: Build & install (Linux only)
|
|
46
|
+
if: runner.os == 'linux'
|
|
47
|
+
env:
|
|
48
|
+
OMP_NUM_THREADS: "1"
|
|
49
|
+
OPENBLAS_NUM_THREADS: "1"
|
|
50
|
+
run: |
|
|
51
|
+
uv sync --dev --all-extras
|
|
52
|
+
uv pip install scikit-build-core pybind11
|
|
53
|
+
uv pip install -e .[test] --no-build-isolation
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
- name: Run pytest
|
|
57
|
+
env:
|
|
58
|
+
OMP_NUM_THREADS: "1"
|
|
59
|
+
OPENBLAS_NUM_THREADS: "1"
|
|
60
|
+
run: uv run pytest -q -ra -k "not slow" -x
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# .github/workflows/release.yml
|
|
2
|
+
name: "Build & Publish"
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
release:
|
|
6
|
+
types: [published]
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build-wheels:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-22.04, macos-latest]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12" # host Python, cibuildwheel makes all others
|
|
23
|
+
|
|
24
|
+
- name: Clean wheelhouse and build artifacts
|
|
25
|
+
run: |
|
|
26
|
+
rm -rf dist/ build/ *.egg-info wheelhouse/
|
|
27
|
+
|
|
28
|
+
- name: Install GCC and OpenMP (macOS)
|
|
29
|
+
if: runner.os == 'macOS'
|
|
30
|
+
run: brew install gcc libomp
|
|
31
|
+
|
|
32
|
+
- name: Install cibuildwheel
|
|
33
|
+
run: python -m pip install cibuildwheel==2.*
|
|
34
|
+
|
|
35
|
+
- name: Build wheels
|
|
36
|
+
env:
|
|
37
|
+
CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-* cp314-*"
|
|
38
|
+
CIBW_SKIP: "pp* *-musllinux_* cp*-manylinux_i686"
|
|
39
|
+
CIBW_TEST_COMMAND: "pytest -q {project}/tests -k 'not slow' -x"
|
|
40
|
+
CIBW_TEST_EXTRAS: "test"
|
|
41
|
+
CIBW_ENVIRONMENT: >
|
|
42
|
+
OMP_NUM_THREADS=1
|
|
43
|
+
OPENBLAS_NUM_THREADS=1
|
|
44
|
+
CIBW_BEFORE_BUILD_LINUX: |
|
|
45
|
+
yum -y install openblas-devel
|
|
46
|
+
find /usr/include -name cblas.h -print
|
|
47
|
+
CIBW_ENVIRONMENT_LINUX: >
|
|
48
|
+
CPPFLAGS="-I/usr/include/openblas"
|
|
49
|
+
CFLAGS="-I/usr/include/openblas"
|
|
50
|
+
LD_LIBRARY_PATH="/usr/lib64:$LD_LIBRARY_PATH"
|
|
51
|
+
CMAKE_ARGS="-DBLAS_LIBRARIES=/usr/lib64/libopenblas.so -DBLAS_INCLUDE_DIR=/usr/include/openblas -DCMAKE_CXX_FLAGS=-I/usr/include/openblas -DCMAKE_C_FLAGS=-I/usr/include/openblas"
|
|
52
|
+
OPENBLAS_NUM_THREADS=1
|
|
53
|
+
OMP_NUM_THREADS=1
|
|
54
|
+
CIBW_ENVIRONMENT_PASS_LINUX: >
|
|
55
|
+
CPPFLAGS
|
|
56
|
+
CFLAGS
|
|
57
|
+
LD_LIBRARY_PATH
|
|
58
|
+
CMAKE_ARGS
|
|
59
|
+
OPENBLAS_NUM_THREADS
|
|
60
|
+
OMP_NUM_THREADS
|
|
61
|
+
CIBW_ENVIRONMENT_MACOS: >
|
|
62
|
+
MACOSX_DEPLOYMENT_TARGET=15.0
|
|
63
|
+
CMAKE_ARGS="-DBLAS_VENDOR=Apple
|
|
64
|
+
-DCMAKE_CXX_FLAGS=-I/opt/homebrew/opt/libomp/include
|
|
65
|
+
-DCMAKE_C_FLAGS=-I/opt/homebrew/opt/libomp/include
|
|
66
|
+
-DCMAKE_SHARED_LINKER_FLAGS=-L/opt/homebrew/opt/libomp/lib
|
|
67
|
+
-DCMAKE_EXE_LINKER_FLAGS=-L/opt/homebrew/opt/libomp/lib"
|
|
68
|
+
CIBW_ENVIRONMENT_PASS_MACOS: >
|
|
69
|
+
MACOSX_DEPLOYMENT_TARGET
|
|
70
|
+
CMAKE_ARGS
|
|
71
|
+
CIBW_ARCHS_MACOS: arm64
|
|
72
|
+
run: python -m cibuildwheel --output-dir wheelhouse
|
|
73
|
+
|
|
74
|
+
- name: Build sdist
|
|
75
|
+
run: python -m pip install build && python -m build --sdist -o wheelhouse
|
|
76
|
+
|
|
77
|
+
- name: Upload artifacts
|
|
78
|
+
uses: actions/upload-artifact@v4
|
|
79
|
+
with:
|
|
80
|
+
name: wheels-${{ runner.os }}
|
|
81
|
+
path: wheelhouse/*
|
|
82
|
+
publish:
|
|
83
|
+
needs: build-wheels
|
|
84
|
+
runs-on: ubuntu-22.04
|
|
85
|
+
if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
|
|
86
|
+
permissions:
|
|
87
|
+
id-token: write
|
|
88
|
+
steps:
|
|
89
|
+
- uses: actions/download-artifact@v4
|
|
90
|
+
with:
|
|
91
|
+
name: wheels-Linux
|
|
92
|
+
path: dist
|
|
93
|
+
|
|
94
|
+
- uses: actions/download-artifact@v4
|
|
95
|
+
with:
|
|
96
|
+
name: wheels-macOS
|
|
97
|
+
path: dist
|
|
98
|
+
|
|
99
|
+
- name: Flatten artifacts
|
|
100
|
+
run: |
|
|
101
|
+
mkdir -p dist/flat
|
|
102
|
+
find dist -name '*.whl' -exec cp {} dist/flat/ \;
|
|
103
|
+
find dist -name '*.tar.gz' -exec cp {} dist/flat/ \;
|
|
104
|
+
|
|
105
|
+
- uses: pypa/gh-action-pypi-publish@v1.11.0
|
|
106
|
+
with:
|
|
107
|
+
packages-dir: dist/flat
|
|
108
|
+
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
project(kernelforge LANGUAGES C CXX)
|
|
3
|
+
|
|
4
|
+
# Platform tweaks
|
|
5
|
+
if(APPLE)
|
|
6
|
+
|
|
7
|
+
# Required for "new lapack" in Accelerate
|
|
8
|
+
set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "" FORCE)
|
|
9
|
+
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
|
10
|
+
set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "" FORCE)
|
|
11
|
+
|
|
12
|
+
# Necessary to compile with -Accelerate, homebrew clang and openmp
|
|
13
|
+
# Took me way too long to figure out
|
|
14
|
+
add_compile_options(-stdlib=libc++)
|
|
15
|
+
add_link_options(
|
|
16
|
+
-stdlib=libc++
|
|
17
|
+
-L/opt/homebrew/opt/llvm/lib/c++
|
|
18
|
+
-Wl,-rpath,/opt/homebrew/opt/llvm/lib/c++
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
endif()
|
|
22
|
+
|
|
23
|
+
# Position-independent code for all targets (helps for Python extensions)
|
|
24
|
+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
|
25
|
+
|
|
26
|
+
# Dependencies
|
|
27
|
+
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
|
|
28
|
+
execute_process(
|
|
29
|
+
COMMAND "${Python_EXECUTABLE}" -m pybind11 --cmakedir
|
|
30
|
+
OUTPUT_VARIABLE pybind11_DIR
|
|
31
|
+
OUTPUT_STRIP_TRAILING_WHITESPACE
|
|
32
|
+
)
|
|
33
|
+
find_package(pybind11 CONFIG REQUIRED)
|
|
34
|
+
|
|
35
|
+
find_package(OpenMP REQUIRED)
|
|
36
|
+
if (OpenMP_CXX_FOUND)
|
|
37
|
+
if (APPLE)
|
|
38
|
+
# Apple/Homebrew Clang requires explicit flags
|
|
39
|
+
add_compile_options(-Xclang -fopenmp -I/opt/homebrew/opt/libomp/include)
|
|
40
|
+
add_link_options(-L/opt/homebrew/opt/libomp/lib -lomp)
|
|
41
|
+
else()
|
|
42
|
+
add_compile_options(${OpenMP_CXX_FLAGS})
|
|
43
|
+
add_link_options(${OpenMP_CXX_FLAGS})
|
|
44
|
+
endif()
|
|
45
|
+
endif()
|
|
46
|
+
|
|
47
|
+
# Use Accelerate on Apple, otherwise BLAS (MKL, OpenBLAS, etc)
|
|
48
|
+
if(APPLE)
|
|
49
|
+
find_library(ACCELERATE Accelerate REQUIRED)
|
|
50
|
+
else()
|
|
51
|
+
find_package(BLAS REQUIRED)
|
|
52
|
+
endif()
|
|
53
|
+
|
|
54
|
+
# Common interface for headers from Python/pybind11
|
|
55
|
+
add_library(kf_common INTERFACE)
|
|
56
|
+
target_link_libraries(kf_common INTERFACE pybind11::headers Python::Module)
|
|
57
|
+
|
|
58
|
+
# ---- Small helpers to avoid repetition --------------------------------------
|
|
59
|
+
# Track created modules/objlibs so we can link things in one go later
|
|
60
|
+
set(_KF_ALL_MODULES "")
|
|
61
|
+
set(_KF_ALL_OBJLIBS "")
|
|
62
|
+
|
|
63
|
+
# Create a C++ object library + pybind11 module pair:
|
|
64
|
+
# kf_add_cpp_module(<base> <obj_src> <binding_src>)
|
|
65
|
+
# -> object lib: kf_<base>
|
|
66
|
+
# -> module target: _<base>
|
|
67
|
+
function(kf_add_cpp_module base obj_src bind_src)
|
|
68
|
+
set(obj kf_${base})
|
|
69
|
+
add_library(${obj} OBJECT ${obj_src})
|
|
70
|
+
target_link_libraries(${obj} PRIVATE kf_common)
|
|
71
|
+
target_link_libraries(${obj} PRIVATE OpenMP::OpenMP_CXX) # <-- compile flags propagate
|
|
72
|
+
|
|
73
|
+
pybind11_add_module(_${base} MODULE
|
|
74
|
+
${bind_src}
|
|
75
|
+
$<TARGET_OBJECTS:${obj}>
|
|
76
|
+
)
|
|
77
|
+
set_target_properties(_${base} PROPERTIES OUTPUT_NAME "_${base}")
|
|
78
|
+
target_link_libraries(_${base} PRIVATE OpenMP::OpenMP_CXX) # <-- link flags
|
|
79
|
+
|
|
80
|
+
list(APPEND _KF_ALL_MODULES _${base})
|
|
81
|
+
list(APPEND _KF_ALL_OBJLIBS ${obj})
|
|
82
|
+
set(_KF_ALL_MODULES "${_KF_ALL_MODULES}" PARENT_SCOPE)
|
|
83
|
+
set(_KF_ALL_OBJLIBS "${_KF_ALL_OBJLIBS}" PARENT_SCOPE)
|
|
84
|
+
endfunction()
|
|
85
|
+
|
|
86
|
+
# Portable optimization; native tuning is opt-in
|
|
87
|
+
option(KF_USE_NATIVE "Enable -march/-mcpu=native style flags" OFF)
|
|
88
|
+
|
|
89
|
+
function(kf_apply_cxx_flags tgt)
|
|
90
|
+
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
|
91
|
+
target_compile_options(${tgt} PRIVATE
|
|
92
|
+
-O3 -ffast-math -ftree-vectorize -fopenmp
|
|
93
|
+
$<$<BOOL:${KF_USE_NATIVE}>:-mcpu=native -mtune=native>
|
|
94
|
+
)
|
|
95
|
+
elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
|
96
|
+
target_compile_options(${tgt} PRIVATE
|
|
97
|
+
-O3 -ffast-math
|
|
98
|
+
$<$<BOOL:${KF_USE_NATIVE}>:-xHost -mtune=native>
|
|
99
|
+
)
|
|
100
|
+
endif()
|
|
101
|
+
endfunction()
|
|
102
|
+
|
|
103
|
+
# ---- C++ modules -------------------------------------------------------------
|
|
104
|
+
kf_add_cpp_module(kernels src/kernels.cpp src/bindings_kernels.cpp)
|
|
105
|
+
kf_add_cpp_module(invdist src/invdist.cpp src/invdist_bindings.cpp)
|
|
106
|
+
kf_add_cpp_module(fchl19 src/fchl19_representation.cpp src/bindings_fchl19.cpp)
|
|
107
|
+
kf_add_cpp_module(cholesky src/cholesky.cpp src/bindings_cholesky.cpp)
|
|
108
|
+
|
|
109
|
+
# Apply C++ flags to the object libs (not to the module targets)
|
|
110
|
+
foreach(obj ${_KF_ALL_OBJLIBS})
|
|
111
|
+
kf_apply_cxx_flags(${obj})
|
|
112
|
+
endforeach()
|
|
113
|
+
|
|
114
|
+
# ---- OpenMP (C++) ------------------------------------------------------------
|
|
115
|
+
if (OpenMP_CXX_FOUND)
|
|
116
|
+
target_link_libraries(_cholesky PRIVATE OpenMP::OpenMP_CXX)
|
|
117
|
+
target_link_libraries(_kernels PRIVATE OpenMP::OpenMP_CXX)
|
|
118
|
+
target_link_libraries(_fchl19 PRIVATE OpenMP::OpenMP_CXX)
|
|
119
|
+
target_link_libraries(_invdist PRIVATE OpenMP::OpenMP_CXX)
|
|
120
|
+
endif()
|
|
121
|
+
|
|
122
|
+
# ---- BLAS/LAPACK backend selection (link all modules) -----------------------
|
|
123
|
+
foreach(m ${_KF_ALL_MODULES})
|
|
124
|
+
if(APPLE)
|
|
125
|
+
target_link_libraries(${m} PRIVATE ${ACCELERATE})
|
|
126
|
+
elseif(WIN32)
|
|
127
|
+
target_link_libraries(${m} PRIVATE MKL::MKL)
|
|
128
|
+
else()
|
|
129
|
+
target_link_libraries(${m} PRIVATE BLAS::BLAS)
|
|
130
|
+
endif()
|
|
131
|
+
endforeach()
|
|
132
|
+
|
|
133
|
+
# ---- Install ----------------------------------------------------------------
|
|
134
|
+
install(TARGETS _kernels _invdist _fchl19 _cholesky
|
|
135
|
+
LIBRARY DESTINATION kernelforge # Linux/macOS
|
|
136
|
+
RUNTIME DESTINATION kernelforge # Windows (.pyd)
|
|
137
|
+
)
|
|
138
|
+
install(FILES python/kernelforge/__init__.py DESTINATION kernelforge)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
install-linux:
|
|
2
|
+
CMAKE_ARGS="-DKF_USE_NATIVE=ON" uv pip install -e .[test] --verbose
|
|
3
|
+
|
|
4
|
+
install-macos:
|
|
5
|
+
CMAKE_ARGS="-DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DKF_USE_NATIVE=ON " uv pip install -e .[test] --verbose
|
|
6
|
+
|
|
7
|
+
test:
|
|
8
|
+
pytest
|
|
9
|
+
|
|
10
|
+
environment:
|
|
11
|
+
uv venv --python 3.14
|
|
12
|
+
uv pip install scikit-build-core pybind11
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: kernelforge
|
|
3
|
+
Version: 0.1.13
|
|
4
|
+
Summary: Optimized Kernels for ML
|
|
5
|
+
Author: Anders Christensen
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/andersx/kernelforge
|
|
8
|
+
Project-URL: Issues, https://github.com/andersx/kernelforge/issues
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Requires-Dist: numpy>=2.00
|
|
11
|
+
Provides-Extra: test
|
|
12
|
+
Requires-Dist: pytest>=8; extra == "test"
|
|
13
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
|
14
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
15
|
+
Requires-Dist: pytest-timeout; extra == "test"
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# KernelForge - Optimized Kernels for ML
|
|
19
|
+
|
|
20
|
+
I really only care about writing optimized kernel code, so this project will be completed as I find additional time... XD
|
|
21
|
+
|
|
22
|
+
I'm reviving this project to finish an old project using random Fourier features for kernel ML.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
conda env create -f environments/environment-dev.yml
|
|
29
|
+
pip install -e .
|
|
30
|
+
pytest -v -s
|
|
31
|
+
```
|
|
32
|
+
## PyPI installation
|
|
33
|
+
|
|
34
|
+
Install the requirements (e.g. the conda env above) and install from PyPI.
|
|
35
|
+
This should work on both MacOS and Linux/PC:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
conda activate kernelforge-dev
|
|
39
|
+
pip install kernelforge
|
|
40
|
+
```
|
|
41
|
+
This will install pre-compiled wheels with gfortran and linked againts OpenBLAS on Linux and Accelerate on MacOS.
|
|
42
|
+
If you want to use MKL or other BLAS/LAPACK libraries, you need to compile from source, see below.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
## Intel compilers and MKL
|
|
46
|
+
|
|
47
|
+
It is 2025 so you can `sudo apt get install intel-basekit` on Linux/PC to get the compilers and MKL.
|
|
48
|
+
Then set up the environment variables:
|
|
49
|
+
```bash
|
|
50
|
+
source /opt/intel/oneapi/setvars.sh
|
|
51
|
+
```
|
|
52
|
+
In this case, MKL will be autodetected by some CMake magic. If you additionally want to compile with Intel compilers, you can set the environment variables when running `pip install`:
|
|
53
|
+
```bash
|
|
54
|
+
CC=icx CXX=icpx FC=ifx make install
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
In my experience, GCC/G++/GFortran with OpenBLAS is very similar to Intel API alternatives in terms of performance, perhaps even better.
|
|
58
|
+
On MacOS, GNU compilers with `-framework Accelerate` for BLAS/LAPACK is the default and is very fast on M-series macs.
|
|
59
|
+
|
|
60
|
+
## Timings
|
|
61
|
+
I've rewritten a few of the kernels from the original QML code completely in C++.
|
|
62
|
+
There are performance gains in most cases.
|
|
63
|
+
These are primarily due to better use of BLAS routines for calculating, for example, Gramian sub-matrices with chunked DGEMM/DSYRK calls, etc.
|
|
64
|
+
In the gradient and Hessian matrices there are also some algorithmic improvement and pre-computed terms.
|
|
65
|
+
Memory usage might be a bit higher, but this could be optimized with more fine-graind chunking if needed.
|
|
66
|
+
More is coming as I find the time ...
|
|
67
|
+
|
|
68
|
+
Some speedups vs the original QML code are shown below:
|
|
69
|
+
|
|
70
|
+
| Benchmark | QML [s] | Kernelforge [s] |
|
|
71
|
+
|:---------------|------------:|--------------------:|
|
|
72
|
+
| Upper triangle Gaussian kernel (16K x 16K) | 1.82 | 0.64 |
|
|
73
|
+
| 1K FCHL19 descriptors (1K) | ? | 0.43 |
|
|
74
|
+
| 1K FCHL19 descriptors+jacobian (1K) | ? | 0.62 |
|
|
75
|
+
| FCHL19 Local Gaussian scalar kernel (10K x 10K) | 76.81 | 18.15 |
|
|
76
|
+
| FCHL19 Local Gaussian gradient kernel (1K x 2700K) | 32.54 | 1.52 |
|
|
77
|
+
| FCHL19 Local Gaussian Hessian kernel (5400K x 5400K) | 29.68 | 2.05 |
|
|
78
|
+
|
|
79
|
+
## TODO list
|
|
80
|
+
|
|
81
|
+
The goal is to remove pain-points of existing QML libraries
|
|
82
|
+
- Removal of Fortran dependencies
|
|
83
|
+
- No Fortran-ordered arrays
|
|
84
|
+
- No Fortran compilers needed
|
|
85
|
+
- Simplified build system
|
|
86
|
+
- No cooked F2PY/Meson build system, just CMake and Pybind11
|
|
87
|
+
- Improved use of BLAS routines, with built-in chunking to avoid memory explosions
|
|
88
|
+
- Better use of pre-computed terms for single-point inference/MD kernels
|
|
89
|
+
- Low overhead with Pybind11 shims and better aligned memory?
|
|
90
|
+
- Simplified entrypoints that are compatible with RDKit, ASE, Scikit-learn, etc.
|
|
91
|
+
- A few high-level functions that do the most common tasks efficiently and correctly
|
|
92
|
+
- Efficient FCHL19 out-of-the-box
|
|
93
|
+
- Fast training with random Fourier features
|
|
94
|
+
- With derivatives
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
## Priority list for the next months:
|
|
98
|
+
|
|
99
|
+
- [x] Finish the inverse-distance kernel and its Jacobian
|
|
100
|
+
- [x] Make Pybind11 interface
|
|
101
|
+
- [ ] Finalize the C++ interface
|
|
102
|
+
- [x] Finish the Gaussian kernel
|
|
103
|
+
- [x] Notebook with rMD17 example
|
|
104
|
+
- [x] Finish the Jacobian and Hessian kernels
|
|
105
|
+
- [x] Notebook with rMD17 forces example
|
|
106
|
+
- FCHL19 support:
|
|
107
|
+
- [x] Add FCHL19 descriptors
|
|
108
|
+
- [x] Add FCHL19 kernels (local/elemental)
|
|
109
|
+
- [x] Add FCHL19 descriptor with derivatives
|
|
110
|
+
- [x] Add FCHL19 kernel Jacobian
|
|
111
|
+
- [x] Add FCHL19 kernel Hessian (GDML-style)
|
|
112
|
+
- [ ] Improve FCHL19 kernel Jacobian performance (its poor)
|
|
113
|
+
- Finish the random Fourier features kernel and its Jacobian
|
|
114
|
+
- [ ] Parallel random basis sampler
|
|
115
|
+
- [ ] RFF kernel for global descriptors
|
|
116
|
+
- [ ] SVD and QR solvers for rectangular matrices
|
|
117
|
+
- [ ] RFF kernel for local descriptors (FCHL19)
|
|
118
|
+
- [ ] RFF kernels with Cholesky solver and chunked DSYRK kernel updates
|
|
119
|
+
- [ ] RFF kernels with RFP format with chunked DSFRK kernel updates
|
|
120
|
+
- [ ] RFF kernel Jacobian for global descriptors
|
|
121
|
+
- [ ] RFF kernel Jacobian for local descriptors (FCHL19)
|
|
122
|
+
- [ ] Notebook with rMD17 random Fourier features examples
|
|
123
|
+
|
|
124
|
+
- Science:
|
|
125
|
+
- Benchmark full kernel vs RFF on rMD17 and QM7b and QM9
|
|
126
|
+
- Both FCHL19 and inverse-distance matrix
|
|
127
|
+
|
|
128
|
+
#### Todos:
|
|
129
|
+
- Houskeeping:
|
|
130
|
+
- [x] Pybind11 bindings and CMake build system
|
|
131
|
+
- [x] Setup CI with GitHub Actions
|
|
132
|
+
- [x] Rewrite existing kernels to C++ (no Fortran)
|
|
133
|
+
- [x] Setup GHA to build PyPI wheels
|
|
134
|
+
- [x] Test Linux build matrices
|
|
135
|
+
- [x] Test MacOS build matrices
|
|
136
|
+
- [ ] Test Windows build matrices
|
|
137
|
+
- [x] Add build for all Python version >=3.11
|
|
138
|
+
- [ ] Plan structure for saving models for inference as `.npz` files
|
|
139
|
+
- Ensure correct linking with optimized BLAS/LAPACK libraries:
|
|
140
|
+
- [x] OpenBLAS (Linux) <- also used in wheels
|
|
141
|
+
- [x] MKL (Linux)
|
|
142
|
+
- [x] Accelerate (MacOS)
|
|
143
|
+
- Add global kernels:
|
|
144
|
+
- [x] Gaussian kernel
|
|
145
|
+
- [x] Jacobian/gradient kernel
|
|
146
|
+
- [ ] Optimized Jacobian kernel for single inference
|
|
147
|
+
- [x] Hessian kernel
|
|
148
|
+
- [x] GDML-like kernel
|
|
149
|
+
- [ ] Full GPR kernel
|
|
150
|
+
- Add local kernels:
|
|
151
|
+
- [x] Gaussian kernel
|
|
152
|
+
- [x] Jacobian/gradient kernel
|
|
153
|
+
- [x] Optimized Jacobian kernel for single inference
|
|
154
|
+
- [x] Hessian kernel (GDML-style)
|
|
155
|
+
- [ ] Full GPR kernel
|
|
156
|
+
- [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
|
|
157
|
+
- Add random Fourier features kernel code:
|
|
158
|
+
- [ ] Fourier-basis sampler
|
|
159
|
+
- [ ] RFF kernel
|
|
160
|
+
- [ ] RFF gradient kernel
|
|
161
|
+
- [ ] RFF chunked DSYRK kernel
|
|
162
|
+
- [ ] Optimized RFF gradient kernel for single inference/MD
|
|
163
|
+
- The same as above, just for Hadamard features when I find the time?
|
|
164
|
+
- GDML and sGDML kernels:
|
|
165
|
+
- [x] Inverse-distance matrix descriptor
|
|
166
|
+
- [ ] Packed Jacobian for inverse-distance matrix
|
|
167
|
+
- [x] GDML kernel (brute-force implemented)
|
|
168
|
+
- [ ] sGDML kernel (brute-force implemented)
|
|
169
|
+
- [ ] Full GPR kernel
|
|
170
|
+
- [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
|
|
171
|
+
- FCHL18 support:
|
|
172
|
+
- [ ] Complete rewrite of FCHL18 analytical scalar kernel in C++
|
|
173
|
+
- [ ] Stretch goal 1: Add new analytical FCHL18 kernel Jacobian
|
|
174
|
+
- [ ] Stretch goal 2: Add new analytical FCHL18 kernel Hessian (+GPR/GDML-style)
|
|
175
|
+
- [ ] Stretch goal 3: Attempt to optimize hyperparameters and cut-off functions
|
|
176
|
+
- Add standard solvers:
|
|
177
|
+
- [x] Cholesky in-place solver
|
|
178
|
+
- [x] L2-reg kwarg
|
|
179
|
+
- [x] Toggle destructive vs non-destructive
|
|
180
|
+
- [ ] QR and/or SVD for non-square matrices
|
|
181
|
+
- Add moleular descriptors with derivatives:
|
|
182
|
+
- [ ] Coulomb matrix + misc variants without derivatives
|
|
183
|
+
- [x] FCHL19 + derivatives
|
|
184
|
+
- [x] GDML-like inverse-distance matrix + derivatives
|
|
185
|
+
#### Stretch goals:
|
|
186
|
+
- [ ] Plan RDKit interface
|
|
187
|
+
- [ ] Plan Scikit-learn interface
|
|
188
|
+
- [ ] Plan ASE interface
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# KernelForge - Optimized Kernels for ML
|
|
2
|
+
|
|
3
|
+
I really only care about writing optimized kernel code, so this project will be completed as I find additional time... XD
|
|
4
|
+
|
|
5
|
+
I'm reviving this project to finish an old project using random Fourier features for kernel ML.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
conda env create -f environments/environment-dev.yml
|
|
12
|
+
pip install -e .
|
|
13
|
+
pytest -v -s
|
|
14
|
+
```
|
|
15
|
+
## PyPI installation
|
|
16
|
+
|
|
17
|
+
Install the requirements (e.g. the conda env above) and install from PyPI.
|
|
18
|
+
This should work on both MacOS and Linux/PC:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
conda activate kernelforge-dev
|
|
22
|
+
pip install kernelforge
|
|
23
|
+
```
|
|
24
|
+
This will install pre-compiled wheels with gfortran and linked againts OpenBLAS on Linux and Accelerate on MacOS.
|
|
25
|
+
If you want to use MKL or other BLAS/LAPACK libraries, you need to compile from source, see below.
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## Intel compilers and MKL
|
|
29
|
+
|
|
30
|
+
It is 2025 so you can `sudo apt get install intel-basekit` on Linux/PC to get the compilers and MKL.
|
|
31
|
+
Then set up the environment variables:
|
|
32
|
+
```bash
|
|
33
|
+
source /opt/intel/oneapi/setvars.sh
|
|
34
|
+
```
|
|
35
|
+
In this case, MKL will be autodetected by some CMake magic. If you additionally want to compile with Intel compilers, you can set the environment variables when running `pip install`:
|
|
36
|
+
```bash
|
|
37
|
+
CC=icx CXX=icpx FC=ifx make install
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
In my experience, GCC/G++/GFortran with OpenBLAS is very similar to Intel API alternatives in terms of performance, perhaps even better.
|
|
41
|
+
On MacOS, GNU compilers with `-framework Accelerate` for BLAS/LAPACK is the default and is very fast on M-series macs.
|
|
42
|
+
|
|
43
|
+
## Timings
|
|
44
|
+
I've rewritten a few of the kernels from the original QML code completely in C++.
|
|
45
|
+
There are performance gains in most cases.
|
|
46
|
+
These are primarily due to better use of BLAS routines for calculating, for example, Gramian sub-matrices with chunked DGEMM/DSYRK calls, etc.
|
|
47
|
+
In the gradient and Hessian matrices there are also some algorithmic improvement and pre-computed terms.
|
|
48
|
+
Memory usage might be a bit higher, but this could be optimized with more fine-graind chunking if needed.
|
|
49
|
+
More is coming as I find the time ...
|
|
50
|
+
|
|
51
|
+
Some speedups vs the original QML code are shown below:
|
|
52
|
+
|
|
53
|
+
| Benchmark | QML [s] | Kernelforge [s] |
|
|
54
|
+
|:---------------|------------:|--------------------:|
|
|
55
|
+
| Upper triangle Gaussian kernel (16K x 16K) | 1.82 | 0.64 |
|
|
56
|
+
| 1K FCHL19 descriptors (1K) | ? | 0.43 |
|
|
57
|
+
| 1K FCHL19 descriptors+jacobian (1K) | ? | 0.62 |
|
|
58
|
+
| FCHL19 Local Gaussian scalar kernel (10K x 10K) | 76.81 | 18.15 |
|
|
59
|
+
| FCHL19 Local Gaussian gradient kernel (1K x 2700K) | 32.54 | 1.52 |
|
|
60
|
+
| FCHL19 Local Gaussian Hessian kernel (5400K x 5400K) | 29.68 | 2.05 |
|
|
61
|
+
|
|
62
|
+
## TODO list
|
|
63
|
+
|
|
64
|
+
The goal is to remove pain-points of existing QML libraries
|
|
65
|
+
- Removal of Fortran dependencies
|
|
66
|
+
- No Fortran-ordered arrays
|
|
67
|
+
- No Fortran compilers needed
|
|
68
|
+
- Simplified build system
|
|
69
|
+
- No cooked F2PY/Meson build system, just CMake and Pybind11
|
|
70
|
+
- Improved use of BLAS routines, with built-in chunking to avoid memory explosions
|
|
71
|
+
- Better use of pre-computed terms for single-point inference/MD kernels
|
|
72
|
+
- Low overhead with Pybind11 shims and better aligned memory?
|
|
73
|
+
- Simplified entrypoints that are compatible with RDKit, ASE, Scikit-learn, etc.
|
|
74
|
+
- A few high-level functions that do the most common tasks efficiently and correctly
|
|
75
|
+
- Efficient FCHL19 out-of-the-box
|
|
76
|
+
- Fast training with random Fourier features
|
|
77
|
+
- With derivatives
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
## Priority list for the next months:
|
|
81
|
+
|
|
82
|
+
- [x] Finish the inverse-distance kernel and its Jacobian
|
|
83
|
+
- [x] Make Pybind11 interface
|
|
84
|
+
- [ ] Finalize the C++ interface
|
|
85
|
+
- [x] Finish the Gaussian kernel
|
|
86
|
+
- [x] Notebook with rMD17 example
|
|
87
|
+
- [x] Finish the Jacobian and Hessian kernels
|
|
88
|
+
- [x] Notebook with rMD17 forces example
|
|
89
|
+
- FCHL19 support:
|
|
90
|
+
- [x] Add FCHL19 descriptors
|
|
91
|
+
- [x] Add FCHL19 kernels (local/elemental)
|
|
92
|
+
- [x] Add FCHL19 descriptor with derivatives
|
|
93
|
+
- [x] Add FCHL19 kernel Jacobian
|
|
94
|
+
- [x] Add FCHL19 kernel Hessian (GDML-style)
|
|
95
|
+
- [ ] Improve FCHL19 kernel Jacobian performance (its poor)
|
|
96
|
+
- Finish the random Fourier features kernel and its Jacobian
|
|
97
|
+
- [ ] Parallel random basis sampler
|
|
98
|
+
- [ ] RFF kernel for global descriptors
|
|
99
|
+
- [ ] SVD and QR solvers for rectangular matrices
|
|
100
|
+
- [ ] RFF kernel for local descriptors (FCHL19)
|
|
101
|
+
- [ ] RFF kernels with Cholesky solver and chunked DSYRK kernel updates
|
|
102
|
+
- [ ] RFF kernels with RFP format with chunked DSFRK kernel updates
|
|
103
|
+
- [ ] RFF kernel Jacobian for global descriptors
|
|
104
|
+
- [ ] RFF kernel Jacobian for local descriptors (FCHL19)
|
|
105
|
+
- [ ] Notebook with rMD17 random Fourier features examples
|
|
106
|
+
|
|
107
|
+
- Science:
|
|
108
|
+
- Benchmark full kernel vs RFF on rMD17 and QM7b and QM9
|
|
109
|
+
- Both FCHL19 and inverse-distance matrix
|
|
110
|
+
|
|
111
|
+
#### Todos:
|
|
112
|
+
- Houskeeping:
|
|
113
|
+
- [x] Pybind11 bindings and CMake build system
|
|
114
|
+
- [x] Setup CI with GitHub Actions
|
|
115
|
+
- [x] Rewrite existing kernels to C++ (no Fortran)
|
|
116
|
+
- [x] Setup GHA to build PyPI wheels
|
|
117
|
+
- [x] Test Linux build matrices
|
|
118
|
+
- [x] Test MacOS build matrices
|
|
119
|
+
- [ ] Test Windows build matrices
|
|
120
|
+
- [x] Add build for all Python version >=3.11
|
|
121
|
+
- [ ] Plan structure for saving models for inference as `.npz` files
|
|
122
|
+
- Ensure correct linking with optimized BLAS/LAPACK libraries:
|
|
123
|
+
- [x] OpenBLAS (Linux) <- also used in wheels
|
|
124
|
+
- [x] MKL (Linux)
|
|
125
|
+
- [x] Accelerate (MacOS)
|
|
126
|
+
- Add global kernels:
|
|
127
|
+
- [x] Gaussian kernel
|
|
128
|
+
- [x] Jacobian/gradient kernel
|
|
129
|
+
- [ ] Optimized Jacobian kernel for single inference
|
|
130
|
+
- [x] Hessian kernel
|
|
131
|
+
- [x] GDML-like kernel
|
|
132
|
+
- [ ] Full GPR kernel
|
|
133
|
+
- Add local kernels:
|
|
134
|
+
- [x] Gaussian kernel
|
|
135
|
+
- [x] Jacobian/gradient kernel
|
|
136
|
+
- [x] Optimized Jacobian kernel for single inference
|
|
137
|
+
- [x] Hessian kernel (GDML-style)
|
|
138
|
+
- [ ] Full GPR kernel
|
|
139
|
+
- [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
|
|
140
|
+
- Add random Fourier features kernel code:
|
|
141
|
+
- [ ] Fourier-basis sampler
|
|
142
|
+
- [ ] RFF kernel
|
|
143
|
+
- [ ] RFF gradient kernel
|
|
144
|
+
- [ ] RFF chunked DSYRK kernel
|
|
145
|
+
- [ ] Optimized RFF gradient kernel for single inference/MD
|
|
146
|
+
- The same as above, just for Hadamard features when I find the time?
|
|
147
|
+
- GDML and sGDML kernels:
|
|
148
|
+
- [x] Inverse-distance matrix descriptor
|
|
149
|
+
- [ ] Packed Jacobian for inverse-distance matrix
|
|
150
|
+
- [x] GDML kernel (brute-force implemented)
|
|
151
|
+
- [ ] sGDML kernel (brute-force implemented)
|
|
152
|
+
- [ ] Full GPR kernel
|
|
153
|
+
- [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
|
|
154
|
+
- FCHL18 support:
|
|
155
|
+
- [ ] Complete rewrite of FCHL18 analytical scalar kernel in C++
|
|
156
|
+
- [ ] Stretch goal 1: Add new analytical FCHL18 kernel Jacobian
|
|
157
|
+
- [ ] Stretch goal 2: Add new analytical FCHL18 kernel Hessian (+GPR/GDML-style)
|
|
158
|
+
- [ ] Stretch goal 3: Attempt to optimize hyperparameters and cut-off functions
|
|
159
|
+
- Add standard solvers:
|
|
160
|
+
- [x] Cholesky in-place solver
|
|
161
|
+
- [x] L2-reg kwarg
|
|
162
|
+
- [x] Toggle destructive vs non-destructive
|
|
163
|
+
- [ ] QR and/or SVD for non-square matrices
|
|
164
|
+
- Add moleular descriptors with derivatives:
|
|
165
|
+
- [ ] Coulomb matrix + misc variants without derivatives
|
|
166
|
+
- [x] FCHL19 + derivatives
|
|
167
|
+
- [x] GDML-like inverse-distance matrix + derivatives
|
|
168
|
+
#### Stretch goals:
|
|
169
|
+
- [ ] Plan RDKit interface
|
|
170
|
+
- [ ] Plan Scikit-learn interface
|
|
171
|
+
- [ ] Plan ASE interface
|