kernelforge 0.1.2__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. kernelforge-0.1.13/.github/workflows/ci.yml +60 -0
  2. kernelforge-0.1.13/.github/workflows/release.yaml +108 -0
  3. {kernelforge-0.1.2 → kernelforge-0.1.13}/.gitignore +2 -0
  4. kernelforge-0.1.13/CMakeLists.txt +138 -0
  5. kernelforge-0.1.13/Makefile +12 -0
  6. kernelforge-0.1.13/PKG-INFO +188 -0
  7. kernelforge-0.1.13/README.md +171 -0
  8. kernelforge-0.1.13/environments/environment-dev-macos.yaml +33 -0
  9. kernelforge-0.1.13/environments/environment-dev.yaml +30 -0
  10. kernelforge-0.1.13/examples/desc_hess-gauss.ipynb +984 -0
  11. kernelforge-0.1.13/examples/desc_hess.ipynb +808 -0
  12. kernelforge-0.1.13/examples/ethanol.sdf +22 -0
  13. kernelforge-0.1.13/examples/fchl_example.ipynb +655 -0
  14. kernelforge-0.1.13/examples/fchl_hess.ipynb +419 -0
  15. kernelforge-0.1.13/examples/qm7_example.ipynb +1058 -0
  16. kernelforge-0.1.13/examples/regression_ethanol-Copy1.ipynb +359 -0
  17. kernelforge-0.1.13/examples/regression_forces.ipynb +602 -0
  18. kernelforge-0.1.13/examples/sgdml.ipynb +1004 -0
  19. {kernelforge-0.1.2 → kernelforge-0.1.13}/pyproject.toml +11 -12
  20. kernelforge-0.1.13/python/kernelforge/__init__.py +0 -0
  21. kernelforge-0.1.13/src/aligned_alloc64.hpp +31 -0
  22. {kernelforge-0.1.2 → kernelforge-0.1.13}/src/bindings.cpp +27 -2
  23. kernelforge-0.1.13/src/bindings_cholesky.cpp +178 -0
  24. kernelforge-0.1.13/src/bindings_fchl19.cpp +912 -0
  25. kernelforge-0.1.13/src/bindings_kernels.cpp +305 -0
  26. kernelforge-0.1.13/src/cholesky.cpp +232 -0
  27. kernelforge-0.1.13/src/cholesky.hpp +22 -0
  28. kernelforge-0.1.13/src/fchl19_representation.cpp +1984 -0
  29. kernelforge-0.1.13/src/fchl19_representation.hpp +173 -0
  30. kernelforge-0.1.13/src/invdist.cpp +98 -0
  31. kernelforge-0.1.13/src/invdist.hpp +36 -0
  32. kernelforge-0.1.13/src/invdist_bindings.cpp +96 -0
  33. kernelforge-0.1.13/src/kernels.cpp +1287 -0
  34. kernelforge-0.1.13/src/kernels.hpp +55 -0
  35. kernelforge-0.1.13/tests/test_cholesky.py +59 -0
  36. kernelforge-0.1.13/tests/test_fchl19.py +227 -0
  37. kernelforge-0.1.13/tests/test_fchl19_gradient.py +161 -0
  38. kernelforge-0.1.13/tests/test_hessian.py +123 -0
  39. kernelforge-0.1.13/tests/test_invdist.py +85 -0
  40. kernelforge-0.1.13/tests/test_jacobian.py +155 -0
  41. kernelforge-0.1.13/tests/test_kernels.py +113 -0
  42. kernelforge-0.1.13/tests/test_rfp.py +98 -0
  43. kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp310-cp310-macosx_15_0_arm64.whl +0 -0
  44. kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp311-cp311-macosx_15_0_arm64.whl +0 -0
  45. kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp312-cp312-macosx_15_0_arm64.whl +0 -0
  46. kernelforge-0.1.13/wheelhouse/kernelforge-0.1.13-cp313-cp313-macosx_15_0_arm64.whl +0 -0
  47. kernelforge-0.1.2/.github/workflows/ci.yml +0 -56
  48. kernelforge-0.1.2/.github/workflows/release.yaml +0 -74
  49. kernelforge-0.1.2/CMakeLists.txt +0 -51
  50. kernelforge-0.1.2/Makefile +0 -5
  51. kernelforge-0.1.2/PKG-INFO +0 -41
  52. kernelforge-0.1.2/README.md +0 -24
  53. kernelforge-0.1.2/python/kernelforge/__init__.py +0 -2
  54. kernelforge-0.1.2/src/kernel.f90 +0 -108
  55. kernelforge-0.1.2/tests/test_basic.py +0 -99
  56. kernelforge-0.1.2/wheelhouse/kernelforge-0.1.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl +0 -0
  57. {kernelforge-0.1.2 → kernelforge-0.1.13}/LICENSE +0 -0
  58. {kernelforge-0.1.2 → kernelforge-0.1.13}/pytest.ini +0 -0
@@ -0,0 +1,60 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ name: pytest (${{ matrix.os }} / py${{ matrix.python-version }})
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ os: [ubuntu-24.04, macos-15]
16
+ python-version: ["3.14"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Install OpenMP (macOS only)
22
+ if: runner.os == 'macOS'
23
+ run: brew install libomp llvm
24
+
25
+ - name: Install OpenBLAS (Linux only)
26
+ if: runner.os == 'Linux'
27
+ run: sudo apt-get install -y libopenblas-dev
28
+
29
+ - name: Set up uv
30
+ uses: astral-sh/setup-uv@v7
31
+
32
+ - name: Build & install (macOS only)
33
+ if: runner.os == 'macOS'
34
+ env:
35
+ OMP_NUM_THREADS: "1"
36
+ OPENBLAS_NUM_THREADS: "1"
37
+ CMAKE_PREFIX_PATH: /opt/homebrew/opt/libomp
38
+ run: |
39
+ uv sync --dev --all-extras
40
+ uv pip install scikit-build-core pybind11
41
+ CMAKE_ARGS="-DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang \
42
+ -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++" \
43
+ uv pip install -e .[test] --no-build-isolation
44
+
45
+ - name: Build & install (Linux only)
46
+ if: runner.os == 'linux'
47
+ env:
48
+ OMP_NUM_THREADS: "1"
49
+ OPENBLAS_NUM_THREADS: "1"
50
+ run: |
51
+ uv sync --dev --all-extras
52
+ uv pip install scikit-build-core pybind11
53
+ uv pip install -e .[test] --no-build-isolation
54
+
55
+
56
+ - name: Run pytest
57
+ env:
58
+ OMP_NUM_THREADS: "1"
59
+ OPENBLAS_NUM_THREADS: "1"
60
+ run: uv run pytest -q -ra -k "not slow" -x
@@ -0,0 +1,108 @@
1
+ # .github/workflows/release.yml
2
+ name: "Build & Publish"
3
+
4
+ on:
5
+ release:
6
+ types: [published]
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build-wheels:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ os: [ubuntu-22.04, macos-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12" # host Python, cibuildwheel makes all others
23
+
24
+ - name: Clean wheelhouse and build artifacts
25
+ run: |
26
+ rm -rf dist/ build/ *.egg-info wheelhouse/
27
+
28
+ - name: Install GCC and OpenMP (macOS)
29
+ if: runner.os == 'macOS'
30
+ run: brew install gcc libomp
31
+
32
+ - name: Install cibuildwheel
33
+ run: python -m pip install cibuildwheel==2.*
34
+
35
+ - name: Build wheels
36
+ env:
37
+ CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-* cp314-*"
38
+ CIBW_SKIP: "pp* *-musllinux_* cp*-manylinux_i686"
39
+ CIBW_TEST_COMMAND: "pytest -q {project}/tests -k 'not slow' -x"
40
+ CIBW_TEST_EXTRAS: "test"
41
+ CIBW_ENVIRONMENT: >
42
+ OMP_NUM_THREADS=1
43
+ OPENBLAS_NUM_THREADS=1
44
+ CIBW_BEFORE_BUILD_LINUX: |
45
+ yum -y install openblas-devel
46
+ find /usr/include -name cblas.h -print
47
+ CIBW_ENVIRONMENT_LINUX: >
48
+ CPPFLAGS="-I/usr/include/openblas"
49
+ CFLAGS="-I/usr/include/openblas"
50
+ LD_LIBRARY_PATH="/usr/lib64:$LD_LIBRARY_PATH"
51
+ CMAKE_ARGS="-DBLAS_LIBRARIES=/usr/lib64/libopenblas.so -DBLAS_INCLUDE_DIR=/usr/include/openblas -DCMAKE_CXX_FLAGS=-I/usr/include/openblas -DCMAKE_C_FLAGS=-I/usr/include/openblas"
52
+ OPENBLAS_NUM_THREADS=1
53
+ OMP_NUM_THREADS=1
54
+ CIBW_ENVIRONMENT_PASS_LINUX: >
55
+ CPPFLAGS
56
+ CFLAGS
57
+ LD_LIBRARY_PATH
58
+ CMAKE_ARGS
59
+ OPENBLAS_NUM_THREADS
60
+ OMP_NUM_THREADS
61
+ CIBW_ENVIRONMENT_MACOS: >
62
+ MACOSX_DEPLOYMENT_TARGET=15.0
63
+ CMAKE_ARGS="-DBLAS_VENDOR=Apple
64
+ -DCMAKE_CXX_FLAGS=-I/opt/homebrew/opt/libomp/include
65
+ -DCMAKE_C_FLAGS=-I/opt/homebrew/opt/libomp/include
66
+ -DCMAKE_SHARED_LINKER_FLAGS=-L/opt/homebrew/opt/libomp/lib
67
+ -DCMAKE_EXE_LINKER_FLAGS=-L/opt/homebrew/opt/libomp/lib"
68
+ CIBW_ENVIRONMENT_PASS_MACOS: >
69
+ MACOSX_DEPLOYMENT_TARGET
70
+ CMAKE_ARGS
71
+ CIBW_ARCHS_MACOS: arm64
72
+ run: python -m cibuildwheel --output-dir wheelhouse
73
+
74
+ - name: Build sdist
75
+ run: python -m pip install build && python -m build --sdist -o wheelhouse
76
+
77
+ - name: Upload artifacts
78
+ uses: actions/upload-artifact@v4
79
+ with:
80
+ name: wheels-${{ runner.os }}
81
+ path: wheelhouse/*
82
+ publish:
83
+ needs: build-wheels
84
+ runs-on: ubuntu-22.04
85
+ if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
86
+ permissions:
87
+ id-token: write
88
+ steps:
89
+ - uses: actions/download-artifact@v4
90
+ with:
91
+ name: wheels-Linux
92
+ path: dist
93
+
94
+ - uses: actions/download-artifact@v4
95
+ with:
96
+ name: wheels-macOS
97
+ path: dist
98
+
99
+ - name: Flatten artifacts
100
+ run: |
101
+ mkdir -p dist/flat
102
+ find dist -name '*.whl' -exec cp {} dist/flat/ \;
103
+ find dist -name '*.tar.gz' -exec cp {} dist/flat/ \;
104
+
105
+ - uses: pypa/gh-action-pypi-publish@v1.11.0
106
+ with:
107
+ packages-dir: dist/flat
108
+
@@ -208,3 +208,5 @@ cython_debug/
208
208
  marimo/_static/
209
209
  marimo/_lsp/
210
210
  __marimo__/
211
+
212
+ uv.lock
@@ -0,0 +1,138 @@
1
+ cmake_minimum_required(VERSION 3.18)
2
+ project(kernelforge LANGUAGES C CXX)
3
+
4
+ # Platform tweaks
5
+ if(APPLE)
6
+
7
+ # Required for "new lapack" in Accelerate
8
+ set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "" FORCE)
9
+ add_compile_definitions(ACCELERATE_NEW_LAPACK)
10
+ set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "" FORCE)
11
+
12
+ # Necessary to compile with -Accelerate, homebrew clang and openmp
13
+ # Took me way too long to figure out
14
+ add_compile_options(-stdlib=libc++)
15
+ add_link_options(
16
+ -stdlib=libc++
17
+ -L/opt/homebrew/opt/llvm/lib/c++
18
+ -Wl,-rpath,/opt/homebrew/opt/llvm/lib/c++
19
+ )
20
+
21
+ endif()
22
+
23
+ # Position-independent code for all targets (helps for Python extensions)
24
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
25
+
26
+ # Dependencies
27
+ find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
28
+ execute_process(
29
+ COMMAND "${Python_EXECUTABLE}" -m pybind11 --cmakedir
30
+ OUTPUT_VARIABLE pybind11_DIR
31
+ OUTPUT_STRIP_TRAILING_WHITESPACE
32
+ )
33
+ find_package(pybind11 CONFIG REQUIRED)
34
+
35
+ find_package(OpenMP REQUIRED)
36
+ if (OpenMP_CXX_FOUND)
37
+ if (APPLE)
38
+ # Apple/Homebrew Clang requires explicit flags
39
+ add_compile_options(-Xclang -fopenmp -I/opt/homebrew/opt/libomp/include)
40
+ add_link_options(-L/opt/homebrew/opt/libomp/lib -lomp)
41
+ else()
42
+ add_compile_options(${OpenMP_CXX_FLAGS})
43
+ add_link_options(${OpenMP_CXX_FLAGS})
44
+ endif()
45
+ endif()
46
+
47
+ # Use Accelerate on Apple, otherwise BLAS (MKL, OpenBLAS, etc)
48
+ if(APPLE)
49
+ find_library(ACCELERATE Accelerate REQUIRED)
50
+ else()
51
+ find_package(BLAS REQUIRED)
52
+ endif()
53
+
54
+ # Common interface for headers from Python/pybind11
55
+ add_library(kf_common INTERFACE)
56
+ target_link_libraries(kf_common INTERFACE pybind11::headers Python::Module)
57
+
58
+ # ---- Small helpers to avoid repetition --------------------------------------
59
+ # Track created modules/objlibs so we can link things in one go later
60
+ set(_KF_ALL_MODULES "")
61
+ set(_KF_ALL_OBJLIBS "")
62
+
63
+ # Create a C++ object library + pybind11 module pair:
64
+ # kf_add_cpp_module(<base> <obj_src> <binding_src>)
65
+ # -> object lib: kf_<base>
66
+ # -> module target: _<base>
67
+ function(kf_add_cpp_module base obj_src bind_src)
68
+ set(obj kf_${base})
69
+ add_library(${obj} OBJECT ${obj_src})
70
+ target_link_libraries(${obj} PRIVATE kf_common)
71
+ target_link_libraries(${obj} PRIVATE OpenMP::OpenMP_CXX) # <-- compile flags propagate
72
+
73
+ pybind11_add_module(_${base} MODULE
74
+ ${bind_src}
75
+ $<TARGET_OBJECTS:${obj}>
76
+ )
77
+ set_target_properties(_${base} PROPERTIES OUTPUT_NAME "_${base}")
78
+ target_link_libraries(_${base} PRIVATE OpenMP::OpenMP_CXX) # <-- link flags
79
+
80
+ list(APPEND _KF_ALL_MODULES _${base})
81
+ list(APPEND _KF_ALL_OBJLIBS ${obj})
82
+ set(_KF_ALL_MODULES "${_KF_ALL_MODULES}" PARENT_SCOPE)
83
+ set(_KF_ALL_OBJLIBS "${_KF_ALL_OBJLIBS}" PARENT_SCOPE)
84
+ endfunction()
85
+
86
+ # Portable optimization; native tuning is opt-in
87
+ option(KF_USE_NATIVE "Enable -march/-mcpu=native style flags" OFF)
88
+
89
+ function(kf_apply_cxx_flags tgt)
90
+ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
91
+ target_compile_options(${tgt} PRIVATE
92
+ -O3 -ffast-math -ftree-vectorize -fopenmp
93
+ $<$<BOOL:${KF_USE_NATIVE}>:-mcpu=native -mtune=native>
94
+ )
95
+ elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel")
96
+ target_compile_options(${tgt} PRIVATE
97
+ -O3 -ffast-math
98
+ $<$<BOOL:${KF_USE_NATIVE}>:-xHost -mtune=native>
99
+ )
100
+ endif()
101
+ endfunction()
102
+
103
+ # ---- C++ modules -------------------------------------------------------------
104
+ kf_add_cpp_module(kernels src/kernels.cpp src/bindings_kernels.cpp)
105
+ kf_add_cpp_module(invdist src/invdist.cpp src/invdist_bindings.cpp)
106
+ kf_add_cpp_module(fchl19 src/fchl19_representation.cpp src/bindings_fchl19.cpp)
107
+ kf_add_cpp_module(cholesky src/cholesky.cpp src/bindings_cholesky.cpp)
108
+
109
+ # Apply C++ flags to the object libs (not to the module targets)
110
+ foreach(obj ${_KF_ALL_OBJLIBS})
111
+ kf_apply_cxx_flags(${obj})
112
+ endforeach()
113
+
114
+ # ---- OpenMP (C++) ------------------------------------------------------------
115
+ if (OpenMP_CXX_FOUND)
116
+ target_link_libraries(_cholesky PRIVATE OpenMP::OpenMP_CXX)
117
+ target_link_libraries(_kernels PRIVATE OpenMP::OpenMP_CXX)
118
+ target_link_libraries(_fchl19 PRIVATE OpenMP::OpenMP_CXX)
119
+ target_link_libraries(_invdist PRIVATE OpenMP::OpenMP_CXX)
120
+ endif()
121
+
122
+ # ---- BLAS/LAPACK backend selection (link all modules) -----------------------
123
+ foreach(m ${_KF_ALL_MODULES})
124
+ if(APPLE)
125
+ target_link_libraries(${m} PRIVATE ${ACCELERATE})
126
+ elseif(WIN32)
127
+ target_link_libraries(${m} PRIVATE MKL::MKL)
128
+ else()
129
+ target_link_libraries(${m} PRIVATE BLAS::BLAS)
130
+ endif()
131
+ endforeach()
132
+
133
+ # ---- Install ----------------------------------------------------------------
134
+ install(TARGETS _kernels _invdist _fchl19 _cholesky
135
+ LIBRARY DESTINATION kernelforge # Linux/macOS
136
+ RUNTIME DESTINATION kernelforge # Windows (.pyd)
137
+ )
138
+ install(FILES python/kernelforge/__init__.py DESTINATION kernelforge)
@@ -0,0 +1,12 @@
1
+ install-linux:
2
+ CMAKE_ARGS="-DKF_USE_NATIVE=ON" uv pip install -e .[test] --verbose
3
+
4
+ install-macos:
5
+ CMAKE_ARGS="-DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DKF_USE_NATIVE=ON " uv pip install -e .[test] --verbose
6
+
7
+ test:
8
+ pytest
9
+
10
+ environment:
11
+ uv venv --python 3.14
12
+ uv pip install scikit-build-core pybind11
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.2
2
+ Name: kernelforge
3
+ Version: 0.1.13
4
+ Summary: Optimized Kernels for ML
5
+ Author: Anders Christensen
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/andersx/kernelforge
8
+ Project-URL: Issues, https://github.com/andersx/kernelforge/issues
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: numpy>=2.00
11
+ Provides-Extra: test
12
+ Requires-Dist: pytest>=8; extra == "test"
13
+ Requires-Dist: pytest-xdist; extra == "test"
14
+ Requires-Dist: pytest-cov; extra == "test"
15
+ Requires-Dist: pytest-timeout; extra == "test"
16
+ Description-Content-Type: text/markdown
17
+
18
+ # KernelForge - Optimized Kernels for ML
19
+
20
+ I really only care about writing optimized kernel code, so this project will be completed as I find additional time... XD
21
+
22
+ I'm reviving this project to finish an old project using random Fourier features for kernel ML.
23
+
24
+
25
+ # Installation
26
+
27
+ ```bash
28
+ conda env create -f environments/environment-dev.yml
29
+ pip install -e .
30
+ pytest -v -s
31
+ ```
32
+ ## PyPI installation
33
+
34
+ Install the requirements (e.g. the conda env above) and install from PyPI.
35
+ This should work on both MacOS and Linux/PC:
36
+
37
+ ```bash
38
+ conda activate kernelforge-dev
39
+ pip install kernelforge
40
+ ```
41
+ This will install pre-compiled wheels with gfortran and linked againts OpenBLAS on Linux and Accelerate on MacOS.
42
+ If you want to use MKL or other BLAS/LAPACK libraries, you need to compile from source, see below.
43
+
44
+
45
+ ## Intel compilers and MKL
46
+
47
+ It is 2025 so you can `sudo apt get install intel-basekit` on Linux/PC to get the compilers and MKL.
48
+ Then set up the environment variables:
49
+ ```bash
50
+ source /opt/intel/oneapi/setvars.sh
51
+ ```
52
+ In this case, MKL will be autodetected by some CMake magic. If you additionally want to compile with Intel compilers, you can set the environment variables when running `pip install`:
53
+ ```bash
54
+ CC=icx CXX=icpx FC=ifx make install
55
+ ```
56
+
57
+ In my experience, GCC/G++/GFortran with OpenBLAS is very similar to Intel API alternatives in terms of performance, perhaps even better.
58
+ On MacOS, GNU compilers with `-framework Accelerate` for BLAS/LAPACK is the default and is very fast on M-series macs.
59
+
60
+ ## Timings
61
+ I've rewritten a few of the kernels from the original QML code completely in C++.
62
+ There are performance gains in most cases.
63
+ These are primarily due to better use of BLAS routines for calculating, for example, Gramian sub-matrices with chunked DGEMM/DSYRK calls, etc.
64
+ In the gradient and Hessian matrices there are also some algorithmic improvement and pre-computed terms.
65
+ Memory usage might be a bit higher, but this could be optimized with more fine-graind chunking if needed.
66
+ More is coming as I find the time ...
67
+
68
+ Some speedups vs the original QML code are shown below:
69
+
70
+ | Benchmark | QML [s] | Kernelforge [s] |
71
+ |:---------------|------------:|--------------------:|
72
+ | Upper triangle Gaussian kernel (16K x 16K) | 1.82 | 0.64 |
73
+ | 1K FCHL19 descriptors (1K) | ? | 0.43 |
74
+ | 1K FCHL19 descriptors+jacobian (1K) | ? | 0.62 |
75
+ | FCHL19 Local Gaussian scalar kernel (10K x 10K) | 76.81 | 18.15 |
76
+ | FCHL19 Local Gaussian gradient kernel (1K x 2700K) | 32.54 | 1.52 |
77
+ | FCHL19 Local Gaussian Hessian kernel (5400K x 5400K) | 29.68 | 2.05 |
78
+
79
+ ## TODO list
80
+
81
+ The goal is to remove pain-points of existing QML libraries
82
+ - Removal of Fortran dependencies
83
+ - No Fortran-ordered arrays
84
+ - No Fortran compilers needed
85
+ - Simplified build system
86
+ - No cooked F2PY/Meson build system, just CMake and Pybind11
87
+ - Improved use of BLAS routines, with built-in chunking to avoid memory explosions
88
+ - Better use of pre-computed terms for single-point inference/MD kernels
89
+ - Low overhead with Pybind11 shims and better aligned memory?
90
+ - Simplified entrypoints that are compatible with RDKit, ASE, Scikit-learn, etc.
91
+ - A few high-level functions that do the most common tasks efficiently and correctly
92
+ - Efficient FCHL19 out-of-the-box
93
+ - Fast training with random Fourier features
94
+ - With derivatives
95
+
96
+
97
+ ## Priority list for the next months:
98
+
99
+ - [x] Finish the inverse-distance kernel and its Jacobian
100
+ - [x] Make Pybind11 interface
101
+ - [ ] Finalize the C++ interface
102
+ - [x] Finish the Gaussian kernel
103
+ - [x] Notebook with rMD17 example
104
+ - [x] Finish the Jacobian and Hessian kernels
105
+ - [x] Notebook with rMD17 forces example
106
+ - FCHL19 support:
107
+ - [x] Add FCHL19 descriptors
108
+ - [x] Add FCHL19 kernels (local/elemental)
109
+ - [x] Add FCHL19 descriptor with derivatives
110
+ - [x] Add FCHL19 kernel Jacobian
111
+ - [x] Add FCHL19 kernel Hessian (GDML-style)
112
+ - [ ] Improve FCHL19 kernel Jacobian performance (its poor)
113
+ - Finish the random Fourier features kernel and its Jacobian
114
+ - [ ] Parallel random basis sampler
115
+ - [ ] RFF kernel for global descriptors
116
+ - [ ] SVD and QR solvers for rectangular matrices
117
+ - [ ] RFF kernel for local descriptors (FCHL19)
118
+ - [ ] RFF kernels with Cholesky solver and chunked DSYRK kernel updates
119
+ - [ ] RFF kernels with RFP format with chunked DSFRK kernel updates
120
+ - [ ] RFF kernel Jacobian for global descriptors
121
+ - [ ] RFF kernel Jacobian for local descriptors (FCHL19)
122
+ - [ ] Notebook with rMD17 random Fourier features examples
123
+
124
+ - Science:
125
+ - Benchmark full kernel vs RFF on rMD17 and QM7b and QM9
126
+ - Both FCHL19 and inverse-distance matrix
127
+
128
+ #### Todos:
129
+ - Houskeeping:
130
+ - [x] Pybind11 bindings and CMake build system
131
+ - [x] Setup CI with GitHub Actions
132
+ - [x] Rewrite existing kernels to C++ (no Fortran)
133
+ - [x] Setup GHA to build PyPI wheels
134
+ - [x] Test Linux build matrices
135
+ - [x] Test MacOS build matrices
136
+ - [ ] Test Windows build matrices
137
+ - [x] Add build for all Python version >=3.11
138
+ - [ ] Plan structure for saving models for inference as `.npz` files
139
+ - Ensure correct linking with optimized BLAS/LAPACK libraries:
140
+ - [x] OpenBLAS (Linux) <- also used in wheels
141
+ - [x] MKL (Linux)
142
+ - [x] Accelerate (MacOS)
143
+ - Add global kernels:
144
+ - [x] Gaussian kernel
145
+ - [x] Jacobian/gradient kernel
146
+ - [ ] Optimized Jacobian kernel for single inference
147
+ - [x] Hessian kernel
148
+ - [x] GDML-like kernel
149
+ - [ ] Full GPR kernel
150
+ - Add local kernels:
151
+ - [x] Gaussian kernel
152
+ - [x] Jacobian/gradient kernel
153
+ - [x] Optimized Jacobian kernel for single inference
154
+ - [x] Hessian kernel (GDML-style)
155
+ - [ ] Full GPR kernel
156
+ - [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
157
+ - Add random Fourier features kernel code:
158
+ - [ ] Fourier-basis sampler
159
+ - [ ] RFF kernel
160
+ - [ ] RFF gradient kernel
161
+ - [ ] RFF chunked DSYRK kernel
162
+ - [ ] Optimized RFF gradient kernel for single inference/MD
163
+ - The same as above, just for Hadamard features when I find the time?
164
+ - GDML and sGDML kernels:
165
+ - [x] Inverse-distance matrix descriptor
166
+ - [ ] Packed Jacobian for inverse-distance matrix
167
+ - [x] GDML kernel (brute-force implemented)
168
+ - [ ] sGDML kernel (brute-force implemented)
169
+ - [ ] Full GPR kernel
170
+ - [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
171
+ - FCHL18 support:
172
+ - [ ] Complete rewrite of FCHL18 analytical scalar kernel in C++
173
+ - [ ] Stretch goal 1: Add new analytical FCHL18 kernel Jacobian
174
+ - [ ] Stretch goal 2: Add new analytical FCHL18 kernel Hessian (+GPR/GDML-style)
175
+ - [ ] Stretch goal 3: Attempt to optimize hyperparameters and cut-off functions
176
+ - Add standard solvers:
177
+ - [x] Cholesky in-place solver
178
+ - [x] L2-reg kwarg
179
+ - [x] Toggle destructive vs non-destructive
180
+ - [ ] QR and/or SVD for non-square matrices
181
+ - Add moleular descriptors with derivatives:
182
+ - [ ] Coulomb matrix + misc variants without derivatives
183
+ - [x] FCHL19 + derivatives
184
+ - [x] GDML-like inverse-distance matrix + derivatives
185
+ #### Stretch goals:
186
+ - [ ] Plan RDKit interface
187
+ - [ ] Plan Scikit-learn interface
188
+ - [ ] Plan ASE interface
@@ -0,0 +1,171 @@
1
+ # KernelForge - Optimized Kernels for ML
2
+
3
+ I really only care about writing optimized kernel code, so this project will be completed as I find additional time... XD
4
+
5
+ I'm reviving this project to finish an old project using random Fourier features for kernel ML.
6
+
7
+
8
+ # Installation
9
+
10
+ ```bash
11
+ conda env create -f environments/environment-dev.yml
12
+ pip install -e .
13
+ pytest -v -s
14
+ ```
15
+ ## PyPI installation
16
+
17
+ Install the requirements (e.g. the conda env above) and install from PyPI.
18
+ This should work on both MacOS and Linux/PC:
19
+
20
+ ```bash
21
+ conda activate kernelforge-dev
22
+ pip install kernelforge
23
+ ```
24
+ This will install pre-compiled wheels with gfortran and linked againts OpenBLAS on Linux and Accelerate on MacOS.
25
+ If you want to use MKL or other BLAS/LAPACK libraries, you need to compile from source, see below.
26
+
27
+
28
+ ## Intel compilers and MKL
29
+
30
+ It is 2025 so you can `sudo apt get install intel-basekit` on Linux/PC to get the compilers and MKL.
31
+ Then set up the environment variables:
32
+ ```bash
33
+ source /opt/intel/oneapi/setvars.sh
34
+ ```
35
+ In this case, MKL will be autodetected by some CMake magic. If you additionally want to compile with Intel compilers, you can set the environment variables when running `pip install`:
36
+ ```bash
37
+ CC=icx CXX=icpx FC=ifx make install
38
+ ```
39
+
40
+ In my experience, GCC/G++/GFortran with OpenBLAS is very similar to Intel API alternatives in terms of performance, perhaps even better.
41
+ On MacOS, GNU compilers with `-framework Accelerate` for BLAS/LAPACK is the default and is very fast on M-series macs.
42
+
43
+ ## Timings
44
+ I've rewritten a few of the kernels from the original QML code completely in C++.
45
+ There are performance gains in most cases.
46
+ These are primarily due to better use of BLAS routines for calculating, for example, Gramian sub-matrices with chunked DGEMM/DSYRK calls, etc.
47
+ In the gradient and Hessian matrices there are also some algorithmic improvement and pre-computed terms.
48
+ Memory usage might be a bit higher, but this could be optimized with more fine-graind chunking if needed.
49
+ More is coming as I find the time ...
50
+
51
+ Some speedups vs the original QML code are shown below:
52
+
53
+ | Benchmark | QML [s] | Kernelforge [s] |
54
+ |:---------------|------------:|--------------------:|
55
+ | Upper triangle Gaussian kernel (16K x 16K) | 1.82 | 0.64 |
56
+ | 1K FCHL19 descriptors (1K) | ? | 0.43 |
57
+ | 1K FCHL19 descriptors+jacobian (1K) | ? | 0.62 |
58
+ | FCHL19 Local Gaussian scalar kernel (10K x 10K) | 76.81 | 18.15 |
59
+ | FCHL19 Local Gaussian gradient kernel (1K x 2700K) | 32.54 | 1.52 |
60
+ | FCHL19 Local Gaussian Hessian kernel (5400K x 5400K) | 29.68 | 2.05 |
61
+
62
+ ## TODO list
63
+
64
+ The goal is to remove pain-points of existing QML libraries
65
+ - Removal of Fortran dependencies
66
+ - No Fortran-ordered arrays
67
+ - No Fortran compilers needed
68
+ - Simplified build system
69
+ - No cooked F2PY/Meson build system, just CMake and Pybind11
70
+ - Improved use of BLAS routines, with built-in chunking to avoid memory explosions
71
+ - Better use of pre-computed terms for single-point inference/MD kernels
72
+ - Low overhead with Pybind11 shims and better aligned memory?
73
+ - Simplified entrypoints that are compatible with RDKit, ASE, Scikit-learn, etc.
74
+ - A few high-level functions that do the most common tasks efficiently and correctly
75
+ - Efficient FCHL19 out-of-the-box
76
+ - Fast training with random Fourier features
77
+ - With derivatives
78
+
79
+
80
+ ## Priority list for the next months:
81
+
82
+ - [x] Finish the inverse-distance kernel and its Jacobian
83
+ - [x] Make Pybind11 interface
84
+ - [ ] Finalize the C++ interface
85
+ - [x] Finish the Gaussian kernel
86
+ - [x] Notebook with rMD17 example
87
+ - [x] Finish the Jacobian and Hessian kernels
88
+ - [x] Notebook with rMD17 forces example
89
+ - FCHL19 support:
90
+ - [x] Add FCHL19 descriptors
91
+ - [x] Add FCHL19 kernels (local/elemental)
92
+ - [x] Add FCHL19 descriptor with derivatives
93
+ - [x] Add FCHL19 kernel Jacobian
94
+ - [x] Add FCHL19 kernel Hessian (GDML-style)
95
+ - [ ] Improve FCHL19 kernel Jacobian performance (its poor)
96
+ - Finish the random Fourier features kernel and its Jacobian
97
+ - [ ] Parallel random basis sampler
98
+ - [ ] RFF kernel for global descriptors
99
+ - [ ] SVD and QR solvers for rectangular matrices
100
+ - [ ] RFF kernel for local descriptors (FCHL19)
101
+ - [ ] RFF kernels with Cholesky solver and chunked DSYRK kernel updates
102
+ - [ ] RFF kernels with RFP format with chunked DSFRK kernel updates
103
+ - [ ] RFF kernel Jacobian for global descriptors
104
+ - [ ] RFF kernel Jacobian for local descriptors (FCHL19)
105
+ - [ ] Notebook with rMD17 random Fourier features examples
106
+
107
+ - Science:
108
+ - Benchmark full kernel vs RFF on rMD17 and QM7b and QM9
109
+ - Both FCHL19 and inverse-distance matrix
110
+
111
+ #### Todos:
112
+ - Houskeeping:
113
+ - [x] Pybind11 bindings and CMake build system
114
+ - [x] Setup CI with GitHub Actions
115
+ - [x] Rewrite existing kernels to C++ (no Fortran)
116
+ - [x] Setup GHA to build PyPI wheels
117
+ - [x] Test Linux build matrices
118
+ - [x] Test MacOS build matrices
119
+ - [ ] Test Windows build matrices
120
+ - [x] Add build for all Python version >=3.11
121
+ - [ ] Plan structure for saving models for inference as `.npz` files
122
+ - Ensure correct linking with optimized BLAS/LAPACK libraries:
123
+ - [x] OpenBLAS (Linux) <- also used in wheels
124
+ - [x] MKL (Linux)
125
+ - [x] Accelerate (MacOS)
126
+ - Add global kernels:
127
+ - [x] Gaussian kernel
128
+ - [x] Jacobian/gradient kernel
129
+ - [ ] Optimized Jacobian kernel for single inference
130
+ - [x] Hessian kernel
131
+ - [x] GDML-like kernel
132
+ - [ ] Full GPR kernel
133
+ - Add local kernels:
134
+ - [x] Gaussian kernel
135
+ - [x] Jacobian/gradient kernel
136
+ - [x] Optimized Jacobian kernel for single inference
137
+ - [x] Hessian kernel (GDML-style)
138
+ - [ ] Full GPR kernel
139
+ - [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
140
+ - Add random Fourier features kernel code:
141
+ - [ ] Fourier-basis sampler
142
+ - [ ] RFF kernel
143
+ - [ ] RFF gradient kernel
144
+ - [ ] RFF chunked DSYRK kernel
145
+ - [ ] Optimized RFF gradient kernel for single inference/MD
146
+ - The same as above, just for Hadamard features when I find the time?
147
+ - GDML and sGDML kernels:
148
+ - [x] Inverse-distance matrix descriptor
149
+ - [ ] Packed Jacobian for inverse-distance matrix
150
+ - [x] GDML kernel (brute-force implemented)
151
+ - [ ] sGDML kernel (brute-force implemented)
152
+ - [ ] Full GPR kernel
153
+ - [ ] Optimized GPR kernel with pre-computed terms for single inference/MD
154
+ - FCHL18 support:
155
+ - [ ] Complete rewrite of FCHL18 analytical scalar kernel in C++
156
+ - [ ] Stretch goal 1: Add new analytical FCHL18 kernel Jacobian
157
+ - [ ] Stretch goal 2: Add new analytical FCHL18 kernel Hessian (+GPR/GDML-style)
158
+ - [ ] Stretch goal 3: Attempt to optimize hyperparameters and cut-off functions
159
+ - Add standard solvers:
160
+ - [x] Cholesky in-place solver
161
+ - [x] L2-reg kwarg
162
+ - [x] Toggle destructive vs non-destructive
163
+ - [ ] QR and/or SVD for non-square matrices
164
+ - Add moleular descriptors with derivatives:
165
+ - [ ] Coulomb matrix + misc variants without derivatives
166
+ - [x] FCHL19 + derivatives
167
+ - [x] GDML-like inverse-distance matrix + derivatives
168
+ #### Stretch goals:
169
+ - [ ] Plan RDKit interface
170
+ - [ ] Plan Scikit-learn interface
171
+ - [ ] Plan ASE interface