kernelforge 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ name: pytest (${{ matrix.os }} / py${{ matrix.python-version }})
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ os: [ubuntu-22.04] # add 'windows-2022' if desired
16
+ python-version: ["3.12"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Cache pip
26
+ uses: actions/cache@v4
27
+ with:
28
+ path: ~/.cache/pip
29
+ key: pip-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
30
+
31
+ # macOS: Accelerate, Linux: OpenBLAS from system or wheels; keep it simple for CI
32
+ - name: Install build deps
33
+ run: |
34
+ python -m pip install -U pip
35
+ python -m pip install -U build ".[test]"
36
+ # Optional: OpenBLAS dev on Ubuntu if you link system BLAS
37
+ if [[ "$RUNNER_OS" == "Linux" ]]; then sudo apt-get update && sudo apt-get install -y libopenblas-dev; fi
38
+
39
+ - name: Build & install (editable)
40
+ env:
41
+ # Make MKL/OpenBLAS behave in CI
42
+ OMP_NUM_THREADS: "1"
43
+ MKL_NUM_THREADS: "1"
44
+ MKL_THREADING_LAYER: "SEQUENTIAL"
45
+ MKL_INTERFACE_LAYER: "LP64"
46
+ run: |
47
+ python -m pip install -e . -v
48
+
49
+ - name: Pytest (fast)
50
+ env:
51
+ OMP_NUM_THREADS: "1"
52
+ MKL_NUM_THREADS: "1"
53
+ MKL_THREADING_LAYER: "SEQUENTIAL"
54
+ MKL_INTERFACE_LAYER: "LP64"
55
+ run: |
56
+ pytest -q -ra -k "not slow" -x
@@ -0,0 +1,74 @@
1
+ # .github/workflows/release.yml
2
+ name: Build & Publish
3
+
4
+ on:
5
+ release:
6
+ types: [published]
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build-wheels:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ os: [ubuntu-22.04] # macos-14]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12" # host Python, cibuildwheel makes all others
23
+
24
+ - name: Install cibuildwheel
25
+ run: python -m pip install cibuildwheel==2.*
26
+
27
+ - name: Build wheels
28
+ env:
29
+ CIBW_BUILD: "cp312-*"
30
+ CIBW_SKIP: "pp* *-musllinux_* cp*-manylinux_i686"
31
+ CIBW_TEST_COMMAND: "pytest -q {project}/tests -k 'not slow' -x"
32
+ CIBW_TEST_EXTRAS: "test"
33
+ CIBW_ENVIRONMENT: >
34
+ OMP_NUM_THREADS=1
35
+ OPENBLAS_NUM_THREADS=1
36
+ CIBW_BEFORE_BUILD_LINUX: |
37
+ curl -L https://anaconda.org/multibuild-wheels-staging/openblas-libs/v0.3.27/download/openblas-v0.3.27-manylinux2014_x86_64.tar.gz | tar xz
38
+ cp -r usr /opt/openblas
39
+ ls -l /opt/openblas/local/lib/libopenblas.so
40
+ CIBW_ENVIRONMENT_LINUX: >
41
+ LD_LIBRARY_PATH=/opt/openblas/local/lib:$LD_LIBRARY_PATH
42
+ CMAKE_ARGS="-DBLAS_LIBRARIES=/opt/openblas/local/lib/libopenblas.so -DBLAS_INCLUDE_DIR=/opt/openblas/local/include -DBLAS_VENDOR=OpenBLAS"
43
+ OPENBLAS_NUM_THREADS=1
44
+ OMP_NUM_THREADS=1
45
+ CIBW_ENVIRONMENT_PASS_LINUX: >
46
+ LD_LIBRARY_PATH
47
+ CMAKE_ARGS
48
+ OPENBLAS_NUM_THREADS
49
+ OMP_NUM_THREADS
50
+ run: python -m cibuildwheel --output-dir wheelhouse
51
+
52
+ - name: Build sdist
53
+ run: python -m pip install build && python -m build --sdist -o wheelhouse
54
+
55
+ - name: Upload artifacts
56
+ uses: actions/upload-artifact@v4
57
+ with:
58
+ name: wheels
59
+ path: wheelhouse/*
60
+
61
+ publish:
62
+ needs: build-wheels
63
+ runs-on: ubuntu-22.04
64
+ if: github.event_name == 'release'
65
+ steps:
66
+ - uses: actions/download-artifact@v4
67
+ with:
68
+ name: wheels
69
+ path: dist
70
+ - uses: pypa/gh-action-pypi-publish@v1.11.0
71
+ with:
72
+ user: __token__
73
+ password: ${{ secrets.PYPI_API_TOKEN }}
74
+
@@ -0,0 +1,210 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ CMakeFiles/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ CMakeCache.txt
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py.cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # UV
101
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ #uv.lock
105
+
106
+ # poetry
107
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
109
+ # commonly ignored for libraries.
110
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111
+ #poetry.lock
112
+ #poetry.toml
113
+
114
+ # pdm
115
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
117
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
118
+ #pdm.lock
119
+ #pdm.toml
120
+ .pdm-python
121
+ .pdm-build/
122
+
123
+ # pixi
124
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
125
+ #pixi.lock
126
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
127
+ # in the .venv directory. It is recommended not to include this directory in version control.
128
+ .pixi
129
+
130
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131
+ __pypackages__/
132
+
133
+ # Celery stuff
134
+ celerybeat-schedule
135
+ celerybeat.pid
136
+
137
+ # SageMath parsed files
138
+ *.sage.py
139
+
140
+ # Environments
141
+ .env
142
+ .envrc
143
+ .venv
144
+ env/
145
+ venv/
146
+ ENV/
147
+ env.bak/
148
+ venv.bak/
149
+
150
+ # Spyder project settings
151
+ .spyderproject
152
+ .spyproject
153
+
154
+ # Rope project settings
155
+ .ropeproject
156
+
157
+ # mkdocs documentation
158
+ /site
159
+
160
+ # mypy
161
+ .mypy_cache/
162
+ .dmypy.json
163
+ dmypy.json
164
+
165
+ # Pyre type checker
166
+ .pyre/
167
+
168
+ # pytype static type analyzer
169
+ .pytype/
170
+
171
+ # Cython debug symbols
172
+ cython_debug/
173
+
174
+ # PyCharm
175
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
176
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
177
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
178
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
179
+ #.idea/
180
+
181
+ # Abstra
182
+ # Abstra is an AI-powered process automation framework.
183
+ # Ignore directories containing user credentials, local state, and settings.
184
+ # Learn more at https://abstra.io/docs
185
+ .abstra/
186
+
187
+ # Visual Studio Code
188
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
189
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
190
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
191
+ # you could uncomment the following to ignore the entire vscode folder
192
+ # .vscode/
193
+
194
+ # Ruff stuff:
195
+ .ruff_cache/
196
+
197
+ # PyPI configuration file
198
+ .pypirc
199
+
200
+ # Cursor
201
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
202
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
203
+ # refer to https://docs.cursor.com/context/ignore-files
204
+ .cursorignore
205
+ .cursorindexingignore
206
+
207
+ # Marimo
208
+ marimo/_static/
209
+ marimo/_lsp/
210
+ __marimo__/
@@ -0,0 +1,51 @@
1
+ cmake_minimum_required(VERSION 3.18)
2
+ project(kernelforge LANGUAGES C CXX Fortran)
3
+
4
+ # Python + pybind11
5
+ find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
6
+ find_package(pybind11 CONFIG REQUIRED)
7
+
8
+ # Fortran kernels as an object library (for linking into the Python module)
9
+ add_library(kf_fortran OBJECT src/kernel.f90)
10
+ set_property(TARGET kf_fortran PROPERTY POSITION_INDEPENDENT_CODE ON)
11
+
12
+ # Build the Python extension module via pybind11 and link the Fortran objects
13
+ pybind11_add_module(_kernelforge MODULE
14
+ src/bindings.cpp
15
+ $<TARGET_OBJECTS:kf_fortran>
16
+ )
17
+ # Ensure the built filename is exactly "_kernelforge.*"
18
+ set_target_properties(_kernelforge PROPERTIES OUTPUT_NAME "_kernelforge")
19
+
20
+ # Optional: OpenMP for Fortran if you use it
21
+ find_package(OpenMP)
22
+ if (OpenMP_Fortran_FOUND)
23
+ target_link_libraries(_kernelforge PRIVATE OpenMP::OpenMP_Fortran)
24
+ endif()
25
+
26
+ # Optional BLAS/LAPACK backends (enable later if needed)
27
+ if(APPLE)
28
+ find_library(ACCELERATE Accelerate REQUIRED)
29
+ target_link_libraries(_kernelforge PRIVATE ${ACCELERATE})
30
+ elseif(WIN32)
31
+ find_package(MKL CONFIG REQUIRED)
32
+ target_link_libraries(_kernelforge PRIVATE MKL::MKL)
33
+ else()
34
+ find_package(BLAS REQUIRED)
35
+ target_link_libraries(_kernelforge PRIVATE BLAS::BLAS)
36
+ endif()
37
+
38
+ # Conservative optimization flags (portable wheels). Override via env if you want.
39
+ if (CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_Fortran_COMPILER_ID STREQUAL "Intel")
40
+ target_compile_options(kf_fortran PRIVATE -O3 -ipo -xHost -fp-model fast=2 -no-prec-div -fno-alias -qopenmp)
41
+ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
42
+ target_compile_options(kf_fortran PRIVATE -O3 -fopenmp -mcpu=native -mtune=native -ffast-math -ftree-vectorize)
43
+ endif()
44
+
45
+ # Install the compiled extension into the Python package and the Python shim
46
+ install(TARGETS _kernelforge
47
+ LIBRARY DESTINATION kernelforge # Linux/macOS
48
+ RUNTIME DESTINATION kernelforge # Windows (.pyd)
49
+ )
50
+ install(FILES python/kernelforge/__init__.py DESTINATION kernelforge)
51
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Anders Steen Christensen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ install:
2
+ pip install -e .[test] --verbose
3
+
4
+ test:
5
+ pytest
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.2
2
+ Name: kernelforge
3
+ Version: 0.1.2
4
+ Summary: Fortran kernels with C ABI and Python bindings (CMake/skbuild/pybind11)
5
+ Author: Anders Christensen
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/youruser/kernelforge
8
+ Project-URL: Issues, https://github.com/youruser/kernelforge/issues
9
+ Requires-Python: >=3.9
10
+ Requires-Dist: numpy>=2.00
11
+ Provides-Extra: test
12
+ Requires-Dist: pytest>=8; extra == "test"
13
+ Requires-Dist: pytest-xdist; extra == "test"
14
+ Requires-Dist: pytest-cov; extra == "test"
15
+ Requires-Dist: pytest-timeout; extra == "test"
16
+ Description-Content-Type: text/markdown
17
+
18
+ # kernelforge
19
+ Optimized kernels for ML
20
+
21
+ - Without using F2PY or Meson
22
+
23
+ # Installation
24
+
25
+ ```bash
26
+ pip install -e .
27
+ pytest -v -s
28
+ ```
29
+
30
+ ## Intel compilers and MKL
31
+
32
+ GNU compilers will be used by default. If you want to use Intel compilers and MKL, you can set the environment variables:
33
+
34
+ ```bash
35
+ source /opt/intel/oneapi/setvars.sh
36
+ ```
37
+ In this case, MKL will be autodetected and used. If you additionally want to compile with Intel compilers, you can set the environment variables when running `pip install`:
38
+
39
+ ```bash
40
+ CC=icx CXX=icpx FC=ifx make install
41
+ ```
@@ -0,0 +1,24 @@
1
+ # kernelforge
2
+ Optimized kernels for ML
3
+
4
+ - Without using F2PY or Meson
5
+
6
+ # Installation
7
+
8
+ ```bash
9
+ pip install -e .
10
+ pytest -v -s
11
+ ```
12
+
13
+ ## Intel compilers and MKL
14
+
15
+ GNU compilers will be used by default. If you want to use Intel compilers and MKL, you can set the environment variables:
16
+
17
+ ```bash
18
+ source /opt/intel/oneapi/setvars.sh
19
+ ```
20
+ In this case, MKL will be autodetected and used. If you additionally want to compile with Intel compilers, you can set the environment variables when running `pip install`:
21
+
22
+ ```bash
23
+ CC=icx CXX=icpx FC=ifx make install
24
+ ```
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["scikit-build-core>=0.9", "pybind11", "setuptools"]
3
+ build-backend = "scikit_build_core.build"
4
+
5
+ [project]
6
+ name = "kernelforge"
7
+ version = "0.1.2"
8
+ description = "Fortran kernels with C ABI and Python bindings (CMake/skbuild/pybind11)"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [{name = "Anders Christensen"}]
12
+ requires-python = ">=3.9"
13
+ dependencies = [
14
+ "numpy>=2.00", # required at runtime
15
+ ]
16
+
17
+ [project.optional-dependencies]
18
+ test = ["pytest>=8", "pytest-xdist", "pytest-cov", "pytest-timeout"]
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/youruser/kernelforge"
22
+ Issues = "https://github.com/youruser/kernelforge/issues"
23
+
24
+ [tool.scikit-build]
25
+ wheel.expand-macos-universal-tags = true
26
+ wheel.py-api = "py3"
27
+ cmake.build-type = "Release"
28
+ cmake.verbose = true
29
+ wheel.packages = ["python/kernelforge"]
30
+
31
+ # optional: put compiled outputs under build/{tag}/ to avoid clashes
32
+ # build-dir = "build/{wheel_tag}"
33
+
34
+ [tool.scikit-build.cmake.define]
35
+ CMAKE_VERBOSE_MAKEFILE = "ON"
36
+ CMAKE_BUILD_TYPE = "Release"
@@ -0,0 +1,2 @@
1
+ [pytest]
2
+ addopts = -v --tb=short -rA --verbose
@@ -0,0 +1,2 @@
1
+ from ._kernelforge import inverse_distance, kernel_symm_simple, kernel_symm_blas
2
+ __all__ = ["inverse_distance", "kernel_symm_simple", "kernel_symm_blas"]
@@ -0,0 +1,89 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/numpy.h>
3
+ extern "C" {
4
+ void compute_inverse_distance(const double* x_3_by_n, int n, double* d_packed);
5
+ void kernel_symm_simple(const double* x, int lda, int n, double* k, int ldk, double alpha);
6
+ void kernel_symm_blas(const double* x, int lda, int n, double* k, int ldk, double alpha);
7
+ }
8
+
9
+ namespace py = pybind11;
10
+
11
+ py::array_t<double> inverse_distance(py::array_t<double, py::array::c_style | py::array::forcecast> X) {
12
+ auto buf = X.request();
13
+ if (buf.ndim != 2 || buf.shape[1] != 3) {
14
+ throw std::runtime_error("X must have shape (N,3)");
15
+ }
16
+ const int n = static_cast<int>(buf.shape[0]);
17
+
18
+ // D packed length
19
+ const ssize_t m = static_cast<ssize_t>(n) * (n - 1) / 2;
20
+ auto D = py::array_t<double>(m);
21
+
22
+ // Pass row-major (N,3) as transposed view (3,N) to Fortran without copy:
23
+ // NumPy will give a view; pybind11 exposes data pointer for the view.
24
+ py::array_t<double> XT({3, n}, {buf.strides[1], buf.strides[0]}, static_cast<double*>(buf.ptr), X);
25
+
26
+ compute_inverse_distance(static_cast<const double*>(XT.request().ptr), n,
27
+ static_cast<double*>(D.request().ptr));
28
+ return D;
29
+ }
30
+
31
+ py::array_t<double> kernel_symm_simple_py(
32
+ py::array_t<double, py::array::forcecast | py::array::f_style> X,
33
+ double alpha
34
+ ) {
35
+ // Require (rep_size, n) in Fortran order; forcecast|f_style will copy if needed.
36
+ auto xb = X.request();
37
+ if (xb.ndim != 2) {
38
+ throw std::runtime_error("X must be 2D with shape (rep_size, n) in column-major (Fortran) order");
39
+ }
40
+ const int lda = static_cast<int>(xb.shape[0]);
41
+ const int n = static_cast<int>(xb.shape[1]);
42
+
43
+ // Allocate K as Fortran-order (n x n): stride0 = 8, stride1 = n*8
44
+ auto K = py::array_t<double>({n, n}, {sizeof(double), static_cast<ssize_t>(n)*sizeof(double)});
45
+
46
+ kernel_symm_simple(static_cast<const double*>(xb.ptr),
47
+ lda, n,
48
+ static_cast<double*>(K.request().ptr),
49
+ /*ldk=*/n, alpha);
50
+
51
+ return K;
52
+ }
53
+
54
+
55
+ py::array_t<double> kernel_symm_blas_py(
56
+ py::array_t<double, py::array::forcecast | py::array::f_style> X,
57
+ double alpha
58
+ ) {
59
+ // Require (rep_size, n) in Fortran order; forcecast|f_style will copy if needed.
60
+ auto xb = X.request();
61
+ if (xb.ndim != 2) {
62
+ throw std::runtime_error("X must be 2D with shape (rep_size, n) in column-major (Fortran) order");
63
+ }
64
+ const int lda = static_cast<int>(xb.shape[0]);
65
+ const int n = static_cast<int>(xb.shape[1]);
66
+
67
+ // Allocate K as Fortran-order (n x n): stride0 = 8, stride1 = n*8
68
+ auto K = py::array_t<double>({n, n}, {sizeof(double), static_cast<ssize_t>(n)*sizeof(double)});
69
+
70
+ kernel_symm_blas(static_cast<const double*>(xb.ptr),
71
+ lda, n,
72
+ static_cast<double*>(K.request().ptr),
73
+ /*ldk=*/n, alpha);
74
+
75
+ return K;
76
+ }
77
+
78
+ PYBIND11_MODULE(_kernelforge, m) {
79
+ m.doc() = "kernelforge: Fortran kernels with C ABI and Python bindings";
80
+ m.def("inverse_distance", &inverse_distance, "Compute packed inverse distance matrix from (N,3) coordinates");
81
+ m.def("kernel_symm_simple", &kernel_symm_simple_py,
82
+ "Compute K (upper triangle) with Gaussian-like exp(alpha * ||xi-xj||^2). "
83
+ "X must be shape (rep_size, n), Fortran-order.");
84
+ m.def("kernel_symm_blas", &kernel_symm_blas_py,
85
+ "Compute K (upper triangle) with Gaussian-like exp(alpha * ||xi-xj||^2). "
86
+ "X must be shape (rep_size, n), Fortran-order.");
87
+
88
+ }
89
+
@@ -0,0 +1,108 @@
1
+ module kernelforge_mod
2
+
3
+ use, intrinsic :: iso_c_binding
4
+ implicit none
5
+
6
+ contains
7
+
8
+ ! Example kernel: inverse distance (packed upper triangle)
9
+ subroutine compute_inverse_distance(x, n, d) bind(C, name="compute_inverse_distance")
10
+
11
+ implicit none
12
+
13
+ integer(c_int), value :: n
14
+ real(c_double), intent(in) :: x(3,n) ! expect (3,n)
15
+ real(c_double), intent(out) :: d(n*(n-1)/2) ! packed upper triangle
16
+
17
+ integer :: i, j, idx
18
+ real(c_double) :: dx, dy, dz, rij2, rij
19
+
20
+ idx = 0
21
+ do j = 2, n
22
+ do i = 1, j-1
23
+ idx = idx + 1
24
+ dx = x(1,i) - x(1,j)
25
+ dy = x(2,i) - x(2,j)
26
+ dz = x(3,i) - x(3,j)
27
+ rij2 = dx*dx + dy*dy + dz*dz
28
+ rij = sqrt(rij2)
29
+ d(idx) = 1.0d0 / rij
30
+ end do
31
+ end do
32
+ end subroutine compute_inverse_distance
33
+
34
+
35
+ subroutine kernel_symm_simple(X, lda, n, K, ldk, alpha) bind(C, name="kernel_symm_simple")
36
+
37
+ integer(c_int), value :: lda, n, ldk
38
+ real(c_double), intent(in) :: X(lda, *)
39
+ real(c_double), intent(inout) :: K(ldk, *)
40
+ real(c_double), value :: alpha
41
+
42
+ integer :: i, j, p
43
+ real(c_double) :: dx, rij2, dist2
44
+
45
+ !$omp parallel do private(i, j, dist2) shared(X, K, alpha, n) schedule(guided)
46
+ do j = 1, n
47
+ do i = 1, j
48
+ dist2 = sum((X(:, i) - X(:, j))**2)
49
+ K(i, j) = exp(alpha * dist2)
50
+ end do
51
+ end do
52
+ !$omp end parallel do
53
+
54
+ end subroutine kernel_symm_simple
55
+
56
+
57
+ subroutine kernel_symm_blas(X, lda, n, K, ldk, alpha) bind(C, name="kernel_symm_blas")
58
+
59
+ use, intrinsic :: iso_c_binding, only: c_int, c_double
60
+ use, intrinsic :: iso_fortran_env, only: dp => real64
61
+ use omp_lib
62
+
63
+ implicit none
64
+
65
+ ! C ABI args
66
+ integer(c_int), value :: lda, n, ldk
67
+ real(c_double), intent(in) :: X(lda,*)
68
+ real(c_double), intent(inout):: K(ldk,*)
69
+ real(c_double), value :: alpha
70
+
71
+ ! Fortran default integers for BLAS calls
72
+ integer :: lda_f, n_f, ldk_f, rep_size_f
73
+ integer :: i, j
74
+ real(c_double), allocatable :: diag(:), onevec(:)
75
+
76
+ ! Copy c_int (by-value) to default INTEGERs for BLAS (expects default INTEGER by ref)
77
+ lda_f = int(lda)
78
+ n_f = int(n)
79
+ ldk_f = int(ldk)
80
+
81
+ ! Rep size is the first dim of X; keep as default INTEGER
82
+ rep_size_f = lda_f
83
+
84
+ ! Gram matrix computation using DGEMM/DSYRK
85
+ call dsyrk('U', 'T', int(n), int(lda), -2.0_dp * alpha, X, int(lda), 0.0_dp, K, int(n))
86
+
87
+ allocate(diag(n_f), onevec(n_f))
88
+ diag(:) = -0.5_dp * [ (K(i,i), i = 1, n) ]
89
+ onevec(:) = 1.0_dp
90
+
91
+ ! Add the (diagonal) self-inner products the matrix to form the distance matrix
92
+ call dsyr2('U', n_f, 1.0_dp, onevec, 1, diag, 1, K, n_f)
93
+ deallocate(diag, onevec)
94
+
95
+ ! EXP double loop is fast compared to dsyrk anyway.
96
+ !$omp parallel do private(i, j) shared(K, n) schedule(guided)
97
+ do j = 1, n
98
+ do i = 1, j
99
+ K(i, j) = exp(K(i, j))
100
+ end do
101
+ end do
102
+ !$omp end parallel do
103
+
104
+ end subroutine kernel_symm_blas
105
+
106
+
107
+ end module kernelforge_mod
108
+
@@ -0,0 +1,99 @@
1
+ import numpy as np
2
+ from time import time
3
+ import kernelforge as kf
4
+ import pytest
5
+
6
+ def test_inverse_distance_shapes():
7
+ X = np.random.rand(5, 3)
8
+ D = kf.inverse_distance(X)
9
+ assert D.shape == (5*4//2,)
10
+
11
+
12
+ def test_kernel_simple_runs():
13
+ rep, n = 512, 64
14
+ rng = np.random.default_rng(0)
15
+ X = np.asfortranarray(rng.random((rep, n)))
16
+ alpha = -1.0 / rep
17
+
18
+ K = kf.kernel_symm_simple(X, alpha)
19
+ assert K.shape == (n, n)
20
+
21
+ # Symmetrize since only upper triangle is written
22
+ iu = np.triu_indices(n, 1)
23
+ K[(iu[1], iu[0])] = K[iu]
24
+ # Check diagonal ~ 1.0
25
+ assert np.allclose(np.diag(K), 1.0)
26
+
27
+ # Off-diagonal entries should be between 0 and 1
28
+ off_diag = K[iu]
29
+ assert np.all((off_diag >= 0.0) & (off_diag <= 1.0))
30
+
31
+
32
+ def test_kernel_blas_runs():
33
+ rep, n = 512, 64
34
+ rng = np.random.default_rng(0)
35
+ X = np.asfortranarray(rng.random((rep, n)))
36
+ alpha = -1.0 / rep
37
+
38
+ K = kf.kernel_symm_blas(X, alpha)
39
+ assert K.shape == (n, n)
40
+
41
+ # Symmetrize since only upper triangle is written
42
+ iu = np.triu_indices(n, 1)
43
+ K[(iu[1], iu[0])] = K[iu]
44
+
45
+ # Check diagonal ~ 1.0
46
+ assert np.allclose(np.diag(K), 1.0)
47
+
48
+ # Off-diagonal entries should be between 0 and 1
49
+ off_diag = K[iu]
50
+ assert np.all((off_diag >= 0.0) & (off_diag <= 1.0))
51
+
52
+ @pytest.mark.slow
53
+ def test_kernel_simple_time():
54
+ rep, n = 512, 16000
55
+ rng = np.random.default_rng(0)
56
+ X = np.asfortranarray(rng.random((rep, n)))
57
+ alpha = -1.0 / rep
58
+
59
+ start = time()
60
+ K = kf.kernel_symm_simple(X, alpha)
61
+ end = time()
62
+ print(f"Kernel SIMPLE took {end - start:.2f} seconds for {n} points.")
63
+ assert K.shape == (n, n)
64
+
65
+ # Symmetrize since only upper triangle is written
66
+ iu = np.triu_indices(n, 1)
67
+ K[(iu[1], iu[0])] = K[iu]
68
+ # Check diagonal ~ 1.0
69
+ assert np.allclose(np.diag(K), 1.0)
70
+
71
+ # Off-diagonal entries should be between 0 and 1
72
+ off_diag = K[iu]
73
+ assert np.all((off_diag >= 0.0) & (off_diag <= 1.0))
74
+
75
+
76
+ @pytest.mark.slow
77
+ def test_kernel_blas_time():
78
+ rep, n = 512, 16000
79
+ rng = np.random.default_rng(0)
80
+ X = np.asfortranarray(rng.random((rep, n)))
81
+ alpha = -1.0 / rep
82
+
83
+ start = time()
84
+ K = kf.kernel_symm_blas(X, alpha)
85
+
86
+ end = time()
87
+ print(f"Kernel BLAS took {end - start:.2f} seconds for {n} points.")
88
+ assert K.shape == (n, n)
89
+
90
+ # Symmetrize since only upper triangle is written
91
+ iu = np.triu_indices(n, 1)
92
+ K[(iu[1], iu[0])] = K[iu]
93
+
94
+ # Check diagonal ~ 1.0
95
+ assert np.allclose(np.diag(K), 1.0)
96
+
97
+ # Off-diagonal entries should be between 0 and 1
98
+ off_diag = K[iu]
99
+ assert np.all((off_diag >= 0.0) & (off_diag <= 1.0))