csp5 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. csp5-0.2.5/PKG-INFO +197 -0
  2. csp5-0.2.5/README.md +181 -0
  3. csp5-0.2.5/pyproject.toml +45 -0
  4. csp5-0.2.5/setup.cfg +4 -0
  5. csp5-0.2.5/setup.py +86 -0
  6. csp5-0.2.5/src/csp5/__init__.py +52 -0
  7. csp5-0.2.5/src/csp5/_native/__init__.py +6 -0
  8. csp5-0.2.5/src/csp5/_native/dp_backend.py +86 -0
  9. csp5-0.2.5/src/csp5/_native/fastmurty_clink.py +209 -0
  10. csp5-0.2.5/src/csp5/_native/libmatching_dp.so +0 -0
  11. csp5-0.2.5/src/csp5/_native/mhtda.so +0 -0
  12. csp5-0.2.5/src/csp5/_native/murty_backend.py +71 -0
  13. csp5-0.2.5/src/csp5/_native/src/fastmurty/LICENSE +21 -0
  14. csp5-0.2.5/src/csp5/_native/src/fastmurty/da.c +300 -0
  15. csp5-0.2.5/src/csp5/_native/src/fastmurty/da.h +47 -0
  16. csp5-0.2.5/src/csp5/_native/src/fastmurty/murtysplitDense.c +170 -0
  17. csp5-0.2.5/src/csp5/_native/src/fastmurty/murtysplitDense.h +28 -0
  18. csp5-0.2.5/src/csp5/_native/src/fastmurty/murtysplitSparse.c +185 -0
  19. csp5-0.2.5/src/csp5/_native/src/fastmurty/murtysplitSparse.h +30 -0
  20. csp5-0.2.5/src/csp5/_native/src/fastmurty/queue.c +138 -0
  21. csp5-0.2.5/src/csp5/_native/src/fastmurty/queue.h +12 -0
  22. csp5-0.2.5/src/csp5/_native/src/fastmurty/sparsematrix.h +20 -0
  23. csp5-0.2.5/src/csp5/_native/src/fastmurty/sspDense.c +404 -0
  24. csp5-0.2.5/src/csp5/_native/src/fastmurty/sspDense.h +34 -0
  25. csp5-0.2.5/src/csp5/_native/src/fastmurty/sspSparse.c +529 -0
  26. csp5-0.2.5/src/csp5/_native/src/fastmurty/sspSparse.h +42 -0
  27. csp5-0.2.5/src/csp5/_native/src/fastmurty/subproblem.c +51 -0
  28. csp5-0.2.5/src/csp5/_native/src/fastmurty/subproblem.h +56 -0
  29. csp5-0.2.5/src/csp5/_native/src/matching_dp.cpp +205 -0
  30. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/__init__.py +1 -0
  31. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/__init__.py +20 -0
  32. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/layers/__init__.py +3 -0
  33. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/layers/layers.py +446 -0
  34. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/layers/utils.py +90 -0
  35. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/layers/wrappers.py +45 -0
  36. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/models/__init__.py +2 -0
  37. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/models/losses.py +19 -0
  38. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/models/models.py +12 -0
  39. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/preprocessing/__init__.py +7 -0
  40. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/preprocessing/features.py +130 -0
  41. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/preprocessing/preprocessor.py +746 -0
  42. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/preprocessing/scaling.py +40 -0
  43. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/preprocessing/sequence.py +115 -0
  44. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/modules/nfp/preprocessing/test.py +734 -0
  45. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/preprocessor_orig.p +0 -0
  46. csp5-0.2.5/src/csp5/_runtime/Predict_SMILES_FF/torch_model.py +327 -0
  47. csp5-0.2.5/src/csp5/_runtime/__init__.py +1 -0
  48. csp5-0.2.5/src/csp5/_runtime/cascade_core.py +1972 -0
  49. csp5-0.2.5/src/csp5/api.py +428 -0
  50. csp5-0.2.5/src/csp5/cli.py +212 -0
  51. csp5-0.2.5/src/csp5/matching.py +409 -0
  52. csp5-0.2.5/src/csp5/matching_cli.py +96 -0
  53. csp5-0.2.5/src/csp5/model_registry.py +69 -0
  54. csp5-0.2.5/src/csp5/models/__init__.py +1 -0
  55. csp5-0.2.5/src/csp5/models/cascade_13c_20260312_13c_joint_component_from_assigned_normboth/best_model.pt +0 -0
  56. csp5-0.2.5/src/csp5/models/cascade_1h_20260312_1h_joint_component_from_assigned_normboth/best_model.pt +0 -0
  57. csp5-0.2.5/src/csp5.egg-info/PKG-INFO +197 -0
  58. csp5-0.2.5/src/csp5.egg-info/SOURCES.txt +63 -0
  59. csp5-0.2.5/src/csp5.egg-info/dependency_links.txt +1 -0
  60. csp5-0.2.5/src/csp5.egg-info/entry_points.txt +3 -0
  61. csp5-0.2.5/src/csp5.egg-info/requires.txt +8 -0
  62. csp5-0.2.5/src/csp5.egg-info/top_level.txt +1 -0
  63. csp5-0.2.5/tests/test_device_resolution.py +48 -0
  64. csp5-0.2.5/tests/test_matching.py +82 -0
  65. csp5-0.2.5/tests/test_precomputed_prediction.py +91 -0
csp5-0.2.5/PKG-INFO ADDED
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: csp5
3
+ Version: 0.2.5
4
+ Summary: CSP5: pip-installable CASCADE NMR predictor (13C + 1H baselines).
5
+ Author: Benji Rowlands
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: pandas>=2.0
10
+ Requires-Dist: pyarrow>=12
11
+ Requires-Dist: scipy>=1.13
12
+ Requires-Dist: scikit-learn>=1.6
13
+ Requires-Dist: tqdm>=4.65
14
+ Requires-Dist: rdkit>=2023.9
15
+ Requires-Dist: torch>=2.2
16
+
17
+ # CSP5
18
+
19
+ `CSP5` is a pip-installable CASCADE predictor package with:
20
+ - batched `13C` and `1H` prediction
21
+ - prediction from precomputed geometries (no re-embedding)
22
+ - shift matching utilities with `dp` (default), `scipy`, and `murty` (k-best)
23
+
24
+ Bundled defaults:
25
+ - 13C model: `CSP5 base (13C)` (`model_id`: `csp5-base-13c`)
26
+ - 1H model: `CSP5 base (1H)` (`model_id`: `csp5-base-1h`)
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ pip install CSP5
32
+ ```
33
+
34
+ ## Prediction CLI
35
+
36
+ In interactive terminals, `csp5` prints status lines to `stderr` before
37
+ and after prediction. If a run is slow, it prints an additional note that first
38
+ invocation can take longer while dependencies and model weights initialize, plus
39
+ periodic "still working" updates during long runs. Use `--no-status` to silence
40
+ them.
41
+
42
+ ### From SMILES
43
+
44
+ ```bash
45
+ csp5 --smiles "CCO" --nucleus 1H
46
+ csp5 --smiles-file smiles.txt --nucleus 13C --batch-size 64
47
+ ```
48
+
49
+ ### From precomputed geometries (parquet structures dataset)
50
+
51
+ Input dataset requirements:
52
+ - required columns: `smiles`, `molblock`
53
+ - optional columns: `conformer_rank`, `conformer_id`, `energy`, `energy_method`
54
+
55
+ Predict only rank-0 conformers:
56
+
57
+ ```bash
58
+ csp5 \
59
+ --structures-path /path/to/structures.parquet \
60
+ --conformer-rank 0 \
61
+ --nucleus 1H \
62
+ --batch-size 64
63
+ ```
64
+
65
+ Predict using all conformers in the dataset:
66
+
67
+ ```bash
68
+ csp5 \
69
+ --structures-path /path/to/structures.parquet \
70
+ --use-all-conformers \
71
+ --nucleus 13C
72
+ ```
73
+
74
+ ## Prediction Python API
75
+
76
+ ```python
77
+ from csp5 import predict_smiles, predict_structures, predict_sdf
78
+
79
+ # Standard SMILES mode
80
+ res = predict_smiles(["CCO", "c1ccccc1"], nucleus="1H", batch_size=32)
81
+ print(res.predictions.head())
82
+
83
+ # Precomputed-geometry parquet mode
84
+ res2 = predict_structures(
85
+ "/path/to/structures.parquet",
86
+ nucleus="1H",
87
+ conformer_rank=0,
88
+ use_all_conformers=False,
89
+ )
90
+
91
+ # Precomputed-geometry SDF mode
92
+ res3 = predict_sdf("/path/to/embedded.sdf", nucleus="13C")
93
+ ```
94
+
95
+ ## Matching CLI
96
+
97
+ `csp5-match` expects one shift per line in each file.
98
+
99
+ ### Default fast path (`dp`)
100
+
101
+ ```bash
102
+ csp5-match \
103
+ --predicted-file predicted.txt \
104
+ --experimental-file experimental.txt \
105
+ --solver dp
106
+ ```
107
+
108
+ ### SciPy Hungarian option
109
+
110
+ ```bash
111
+ csp5-match \
112
+ --predicted-file predicted.txt \
113
+ --experimental-file experimental.txt \
114
+ --solver scipy
115
+ ```
116
+
117
+ ### Murty k-best option
118
+
119
+ ```bash
120
+ csp5-match \
121
+ --predicted-file predicted.txt \
122
+ --experimental-file experimental.txt \
123
+ --solver murty \
124
+ --k-best-policy clip \
125
+ --k-best 25 \
126
+ --temperature 0.5 \
127
+ --mae-delta-threshold 0.2
128
+ ```
129
+
130
+ ## Matching Python API
131
+
132
+ ```python
133
+ from csp5 import match_shifts
134
+
135
+ pred = [7.35, 7.30, 1.25]
136
+ exp = [7.34, 7.31, 1.20]
137
+
138
+ # DP (default)
139
+ r1 = match_shifts(pred, exp, solver="dp")
140
+
141
+ # SciPy Hungarian
142
+ r2 = match_shifts(pred, exp, solver="scipy")
143
+
144
+ # Murty k-best
145
+ r3 = match_shifts(pred, exp, solver="murty", k_best=10, k_best_policy="clip")
146
+ print(r3.assignment_entropy, r3.num_competing_assignments)
147
+ ```
148
+
149
+ ## Solver Notes
150
+
151
+ - `dp` is the default and is intended for the standard 1D shift objective.
152
+ - `scipy` uses Hungarian assignment on the full padded cost matrix.
153
+ - `murty` is the k-best solver; use this when you need assignment ambiguity analysis.
154
+ - For `murty`, `k_best_policy="clip"` (default) returns all feasible unique assignments
155
+ when `k_best` is larger than what exists. Use `k_best_policy="strict"` to fail instead.
156
+ - `dp` and `scipy` are top-1 only (`k_best` must be `1`).
157
+
158
+ ## Output Notes
159
+
160
+ - Prediction failures are returned explicitly (`failures`) with reason tags.
161
+ - Prediction output always includes `nucleus`, `model_id`, and `model_name`.
162
+ - For structures-mode predictions, conformer metadata columns are propagated when available.
163
+
164
+ ## Release
165
+
166
+ ### Local macOS wheel build
167
+
168
+ From repo root:
169
+
170
+ ```bash
171
+ cd deploy/CSP5
172
+ rm -rf dist build *.egg-info
173
+ MACOSX_DEPLOYMENT_TARGET=11.0 uvx --from build pyproject-build --wheel
174
+ uvx --from twine twine check dist/*
175
+ uvx --from twine twine upload --repository pypi --skip-existing dist/*.whl
176
+ ```
177
+
178
+ `MACOSX_DEPLOYMENT_TARGET=11.0` keeps wheel tags broadly compatible (for example,
179
+ `macosx_11_0_arm64`) instead of pinning to the host macOS version.
180
+
181
+ ### Cross-platform publishing (Linux + macOS)
182
+
183
+ Use GitHub Actions workflow:
184
+
185
+ - file: `.github/workflows/release-csp5.yml`
186
+ - trigger:
187
+ - push a tag like `csp5-v0.2.5` (build + publish), or
188
+ - run manually with `publish=true`
189
+ - required repo secret: `PYPI_API_TOKEN`
190
+
191
+ The workflow builds:
192
+
193
+ - Linux manylinux x86_64 wheels for Python 3.10, 3.11, 3.12, and 3.13
194
+ - macOS arm64 wheels for Python 3.10, 3.11, 3.12, and 3.13
195
+ - one source distribution (sdist)
196
+
197
+ Then it uploads all artifacts to PyPI in one step.
csp5-0.2.5/README.md ADDED
@@ -0,0 +1,181 @@
1
+ # CSP5
2
+
3
+ `CSP5` is a pip-installable CASCADE predictor package with:
4
+ - batched `13C` and `1H` prediction
5
+ - prediction from precomputed geometries (no re-embedding)
6
+ - shift matching utilities with `dp` (default), `scipy`, and `murty` (k-best)
7
+
8
+ Bundled defaults:
9
+ - 13C model: `CSP5 base (13C)` (`model_id`: `csp5-base-13c`)
10
+ - 1H model: `CSP5 base (1H)` (`model_id`: `csp5-base-1h`)
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ pip install CSP5
16
+ ```
17
+
18
+ ## Prediction CLI
19
+
20
+ In interactive terminals, `csp5` prints status lines to `stderr` before
21
+ and after prediction. If a run is slow, it prints an additional note that first
22
+ invocation can take longer while dependencies and model weights initialize, plus
23
+ periodic "still working" updates during long runs. Use `--no-status` to silence
24
+ them.
25
+
26
+ ### From SMILES
27
+
28
+ ```bash
29
+ csp5 --smiles "CCO" --nucleus 1H
30
+ csp5 --smiles-file smiles.txt --nucleus 13C --batch-size 64
31
+ ```
32
+
33
+ ### From precomputed geometries (parquet structures dataset)
34
+
35
+ Input dataset requirements:
36
+ - required columns: `smiles`, `molblock`
37
+ - optional columns: `conformer_rank`, `conformer_id`, `energy`, `energy_method`
38
+
39
+ Predict only rank-0 conformers:
40
+
41
+ ```bash
42
+ csp5 \
43
+ --structures-path /path/to/structures.parquet \
44
+ --conformer-rank 0 \
45
+ --nucleus 1H \
46
+ --batch-size 64
47
+ ```
48
+
49
+ Predict using all conformers in the dataset:
50
+
51
+ ```bash
52
+ csp5 \
53
+ --structures-path /path/to/structures.parquet \
54
+ --use-all-conformers \
55
+ --nucleus 13C
56
+ ```
57
+
58
+ ## Prediction Python API
59
+
60
+ ```python
61
+ from csp5 import predict_smiles, predict_structures, predict_sdf
62
+
63
+ # Standard SMILES mode
64
+ res = predict_smiles(["CCO", "c1ccccc1"], nucleus="1H", batch_size=32)
65
+ print(res.predictions.head())
66
+
67
+ # Precomputed-geometry parquet mode
68
+ res2 = predict_structures(
69
+ "/path/to/structures.parquet",
70
+ nucleus="1H",
71
+ conformer_rank=0,
72
+ use_all_conformers=False,
73
+ )
74
+
75
+ # Precomputed-geometry SDF mode
76
+ res3 = predict_sdf("/path/to/embedded.sdf", nucleus="13C")
77
+ ```
78
+
79
+ ## Matching CLI
80
+
81
+ `csp5-match` expects one shift per line in each file.
82
+
83
+ ### Default fast path (`dp`)
84
+
85
+ ```bash
86
+ csp5-match \
87
+ --predicted-file predicted.txt \
88
+ --experimental-file experimental.txt \
89
+ --solver dp
90
+ ```
91
+
92
+ ### SciPy Hungarian option
93
+
94
+ ```bash
95
+ csp5-match \
96
+ --predicted-file predicted.txt \
97
+ --experimental-file experimental.txt \
98
+ --solver scipy
99
+ ```
100
+
101
+ ### Murty k-best option
102
+
103
+ ```bash
104
+ csp5-match \
105
+ --predicted-file predicted.txt \
106
+ --experimental-file experimental.txt \
107
+ --solver murty \
108
+ --k-best-policy clip \
109
+ --k-best 25 \
110
+ --temperature 0.5 \
111
+ --mae-delta-threshold 0.2
112
+ ```
113
+
114
+ ## Matching Python API
115
+
116
+ ```python
117
+ from csp5 import match_shifts
118
+
119
+ pred = [7.35, 7.30, 1.25]
120
+ exp = [7.34, 7.31, 1.20]
121
+
122
+ # DP (default)
123
+ r1 = match_shifts(pred, exp, solver="dp")
124
+
125
+ # SciPy Hungarian
126
+ r2 = match_shifts(pred, exp, solver="scipy")
127
+
128
+ # Murty k-best
129
+ r3 = match_shifts(pred, exp, solver="murty", k_best=10, k_best_policy="clip")
130
+ print(r3.assignment_entropy, r3.num_competing_assignments)
131
+ ```
132
+
133
+ ## Solver Notes
134
+
135
+ - `dp` is the default and is intended for the standard 1D shift objective.
136
+ - `scipy` uses Hungarian assignment on the full padded cost matrix.
137
+ - `murty` is the k-best solver; use this when you need assignment ambiguity analysis.
138
+ - For `murty`, `k_best_policy="clip"` (default) returns all feasible unique assignments
139
+ when `k_best` is larger than what exists. Use `k_best_policy="strict"` to fail instead.
140
+ - `dp` and `scipy` are top-1 only (`k_best` must be `1`).
141
+
142
+ ## Output Notes
143
+
144
+ - Prediction failures are returned explicitly (`failures`) with reason tags.
145
+ - Prediction output always includes `nucleus`, `model_id`, and `model_name`.
146
+ - For structures-mode predictions, conformer metadata columns are propagated when available.
147
+
148
+ ## Release
149
+
150
+ ### Local macOS wheel build
151
+
152
+ From repo root:
153
+
154
+ ```bash
155
+ cd deploy/CSP5
156
+ rm -rf dist build *.egg-info
157
+ MACOSX_DEPLOYMENT_TARGET=11.0 uvx --from build pyproject-build --wheel
158
+ uvx --from twine twine check dist/*
159
+ uvx --from twine twine upload --repository pypi --skip-existing dist/*.whl
160
+ ```
161
+
162
+ `MACOSX_DEPLOYMENT_TARGET=11.0` keeps wheel tags broadly compatible (for example,
163
+ `macosx_11_0_arm64`) instead of pinning to the host macOS version.
164
+
165
+ ### Cross-platform publishing (Linux + macOS)
166
+
167
+ Use GitHub Actions workflow:
168
+
169
+ - file: `.github/workflows/release-csp5.yml`
170
+ - trigger:
171
+ - push a tag like `csp5-v0.2.5` (build + publish), or
172
+ - run manually with `publish=true`
173
+ - required repo secret: `PYPI_API_TOKEN`
174
+
175
+ The workflow builds:
176
+
177
+ - Linux manylinux x86_64 wheels for Python 3.10, 3.11, 3.12, and 3.13
178
+ - macOS arm64 wheels for Python 3.10, 3.11, 3.12, and 3.13
179
+ - one source distribution (sdist)
180
+
181
+ Then it uploads all artifacts to PyPI in one step.
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "csp5"
7
+ version = "0.2.5"
8
+ description = "CSP5: pip-installable CASCADE NMR predictor (13C + 1H baselines)."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ authors = [{ name = "Benji Rowlands" }]
12
+ dependencies = [
13
+ "numpy>=1.24",
14
+ "pandas>=2.0",
15
+ "pyarrow>=12",
16
+ "scipy>=1.13",
17
+ "scikit-learn>=1.6",
18
+ "tqdm>=4.65",
19
+ "rdkit>=2023.9",
20
+ "torch>=2.2",
21
+ ]
22
+
23
+ [project.scripts]
24
+ csp5 = "csp5.cli:main"
25
+ csp5-match = "csp5.matching_cli:main"
26
+
27
+ [tool.setuptools]
28
+ package-dir = {"" = "src"}
29
+ include-package-data = true
30
+
31
+ [tool.setuptools.packages.find]
32
+ where = ["src"]
33
+
34
+ [tool.setuptools.package-data]
35
+ "csp5" = [
36
+ "models/**/*.pt",
37
+ "_runtime/Predict_SMILES_FF/preprocessor_orig.p",
38
+ "_runtime/Predict_SMILES_FF/modules/**/*.py",
39
+ "_runtime/Predict_SMILES_FF/torch_model.py",
40
+ "_native/*.so",
41
+ "_native/src/matching_dp.cpp",
42
+ "_native/src/fastmurty/*.c",
43
+ "_native/src/fastmurty/*.h",
44
+ "_native/src/fastmurty/LICENSE",
45
+ ]
csp5-0.2.5/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
csp5-0.2.5/setup.py ADDED
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from setuptools import Distribution, setup
9
+ from setuptools.command.build_py import build_py as _build_py
10
+
11
+
12
+ class BinaryDistribution(Distribution):
13
+ def has_ext_modules(self) -> bool: # pragma: no cover
14
+ return True
15
+
16
+
17
+ class build_py(_build_py):
18
+ def run(self) -> None:
19
+ super().run()
20
+ self._build_native_shared_libs()
21
+
22
+ def _build_native_shared_libs(self) -> None:
23
+ if os.name == "nt":
24
+ raise RuntimeError("Building CSP5 native matching backends on Windows is unsupported")
25
+
26
+ pkg_native_dir = Path(self.build_lib) / "csp5" / "_native"
27
+ pkg_native_dir.mkdir(parents=True, exist_ok=True)
28
+
29
+ project_root = Path(__file__).resolve().parent
30
+ native_src = project_root / "src" / "csp5" / "_native" / "src"
31
+ dp_src = native_src / "matching_dp.cpp"
32
+ fastmurty_dir = native_src / "fastmurty"
33
+
34
+ dp_out = pkg_native_dir / "libmatching_dp.so"
35
+ murty_out = pkg_native_dir / "mhtda.so"
36
+
37
+ cxx = os.environ.get("CXX", "g++")
38
+ cc = os.environ.get("CC", "gcc")
39
+
40
+ dp_cmd = [
41
+ cxx,
42
+ "-O3",
43
+ "-std=c++17",
44
+ "-fPIC",
45
+ "-shared",
46
+ "-o",
47
+ str(dp_out),
48
+ str(dp_src),
49
+ ]
50
+
51
+ fastmurty_sources = [
52
+ fastmurty_dir / "subproblem.c",
53
+ fastmurty_dir / "queue.c",
54
+ fastmurty_dir / "sspDense.c",
55
+ fastmurty_dir / "sspSparse.c",
56
+ fastmurty_dir / "murtysplitDense.c",
57
+ fastmurty_dir / "murtysplitSparse.c",
58
+ fastmurty_dir / "da.c",
59
+ ]
60
+ murty_cmd = [
61
+ cc,
62
+ "-O3",
63
+ "-fPIC",
64
+ "-shared",
65
+ "-DSPARSE",
66
+ "-DNDEBUG",
67
+ "-o",
68
+ str(murty_out),
69
+ *[str(path) for path in fastmurty_sources],
70
+ ]
71
+
72
+ self.announce("Building native DP backend", level=2)
73
+ subprocess.check_call(dp_cmd)
74
+ self.announce("Building native Murty backend", level=2)
75
+ subprocess.check_call(murty_cmd)
76
+
77
+ if not dp_out.exists():
78
+ raise RuntimeError(f"Failed to build native DP backend: {dp_out}")
79
+ if not murty_out.exists():
80
+ raise RuntimeError(f"Failed to build native Murty backend: {murty_out}")
81
+
82
+
83
+ setup(
84
+ distclass=BinaryDistribution,
85
+ cmdclass={"build_py": build_py},
86
+ )
@@ -0,0 +1,52 @@
1
+ """CSP5 public API.
2
+
3
+ This module keeps top-level imports lazy so entrypoints like ``csp5``
4
+ avoid importing optional matching dependencies unless needed.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from importlib import import_module
10
+ from typing import TYPE_CHECKING, Dict, Tuple
11
+
12
+
13
+ if TYPE_CHECKING:
14
+ from .api import PredictionResult, predict_mols, predict_sdf, predict_smiles, predict_structures
15
+ from .matching import MatchingResult, RankedAssignment, match_shifts
16
+
17
+
18
+ __all__ = [
19
+ "PredictionResult",
20
+ "MatchingResult",
21
+ "RankedAssignment",
22
+ "predict_smiles",
23
+ "predict_mols",
24
+ "predict_structures",
25
+ "predict_sdf",
26
+ "match_shifts",
27
+ ]
28
+
29
+ _EXPORT_MAP: Dict[str, Tuple[str, str]] = {
30
+ "PredictionResult": ("csp5.api", "PredictionResult"),
31
+ "predict_smiles": ("csp5.api", "predict_smiles"),
32
+ "predict_mols": ("csp5.api", "predict_mols"),
33
+ "predict_structures": ("csp5.api", "predict_structures"),
34
+ "predict_sdf": ("csp5.api", "predict_sdf"),
35
+ "MatchingResult": ("csp5.matching", "MatchingResult"),
36
+ "RankedAssignment": ("csp5.matching", "RankedAssignment"),
37
+ "match_shifts": ("csp5.matching", "match_shifts"),
38
+ }
39
+
40
+
41
+ def __getattr__(name: str):
42
+ if name not in _EXPORT_MAP:
43
+ raise AttributeError(f"module 'csp5' has no attribute {name!r}")
44
+ module_name, attr_name = _EXPORT_MAP[name]
45
+ module = import_module(module_name)
46
+ value = getattr(module, attr_name)
47
+ globals()[name] = value
48
+ return value
49
+
50
+
51
+ def __dir__() -> list[str]:
52
+ return sorted(set(globals()) | set(__all__))
@@ -0,0 +1,6 @@
1
+ """Native matching backends bundled with CSP5."""
2
+
3
+ from .dp_backend import match_indices_dp
4
+ from .murty_backend import murty_k_best
5
+
6
+ __all__ = ["match_indices_dp", "murty_k_best"]
@@ -0,0 +1,86 @@
1
+ """ctypes wrapper for native DP matching backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ctypes
6
+ from pathlib import Path
7
+ from typing import List, Sequence, Tuple
8
+
9
+ import numpy as np
10
+
11
+
12
+ _LIB_PATH = Path(__file__).resolve().with_name("libmatching_dp.so")
13
+ if not _LIB_PATH.exists():
14
+ raise RuntimeError(
15
+ "Native DP backend missing: libmatching_dp.so was not bundled. "
16
+ "Reinstall CSP5 from a wheel built for this platform."
17
+ )
18
+
19
+ try:
20
+ _LIB = ctypes.CDLL(str(_LIB_PATH))
21
+ except OSError as exc: # pragma: no cover
22
+ raise RuntimeError(f"Failed to load native DP backend: {_LIB_PATH} ({exc})") from exc
23
+
24
+ _MATCH_DP = _LIB.nmrexp_match_indices_dp
25
+ _MATCH_DP.argtypes = [
26
+ ctypes.POINTER(ctypes.c_double),
27
+ ctypes.c_int,
28
+ ctypes.POINTER(ctypes.c_double),
29
+ ctypes.c_int,
30
+ ctypes.c_double,
31
+ ctypes.POINTER(ctypes.c_double),
32
+ ctypes.POINTER(ctypes.c_int),
33
+ ctypes.POINTER(ctypes.c_int),
34
+ ctypes.c_int,
35
+ ]
36
+ _MATCH_DP.restype = ctypes.c_int
37
+
38
+
39
+ def match_indices_dp(
40
+ pred_vals: Sequence[float] | np.ndarray,
41
+ obs_vals: Sequence[float] | np.ndarray,
42
+ *,
43
+ dummy_cost: float,
44
+ row_penalties: Sequence[float] | np.ndarray | None = None,
45
+ ) -> Tuple[List[int], List[int]]:
46
+ """Run native DP matcher and return matched row/column indices."""
47
+ pred_arr = np.asarray(pred_vals, dtype=np.float64).reshape(-1)
48
+ obs_arr = np.asarray(obs_vals, dtype=np.float64).reshape(-1)
49
+ n_pred = int(pred_arr.shape[0])
50
+ n_obs = int(obs_arr.shape[0])
51
+
52
+ if n_pred == 0 or n_obs == 0:
53
+ return [], []
54
+
55
+ penalties_arr = None
56
+ penalties_ptr = None
57
+ if row_penalties is not None:
58
+ penalties_arr = np.asarray(row_penalties, dtype=np.float64).reshape(-1)
59
+ if penalties_arr.shape[0] != n_pred:
60
+ raise ValueError(
61
+ f"row_penalties length mismatch: got {penalties_arr.shape[0]}, expected {n_pred}"
62
+ )
63
+ penalties_ptr = penalties_arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
64
+
65
+ out_size = int(min(n_pred, n_obs))
66
+ out_rows = np.empty(out_size, dtype=np.int32)
67
+ out_cols = np.empty(out_size, dtype=np.int32)
68
+
69
+ count = _MATCH_DP(
70
+ pred_arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
71
+ ctypes.c_int(n_pred),
72
+ obs_arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
73
+ ctypes.c_int(n_obs),
74
+ ctypes.c_double(float(dummy_cost)),
75
+ penalties_ptr,
76
+ out_rows.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
77
+ out_cols.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
78
+ ctypes.c_int(out_size),
79
+ )
80
+
81
+ if count < 0:
82
+ raise RuntimeError(f"Native DP backend returned invalid count={count}")
83
+ if count == 0:
84
+ return [], []
85
+
86
+ return out_rows[:count].tolist(), out_cols[:count].tolist()