diff-hdx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diff_hdx-0.1.0/LICENSE +21 -0
- diff_hdx-0.1.0/PKG-INFO +108 -0
- diff_hdx-0.1.0/README.md +77 -0
- diff_hdx-0.1.0/diff_hdx/__init__.py +15 -0
- diff_hdx-0.1.0/diff_hdx/kernels.py +239 -0
- diff_hdx-0.1.0/diff_hdx.egg-info/PKG-INFO +108 -0
- diff_hdx-0.1.0/diff_hdx.egg-info/SOURCES.txt +11 -0
- diff_hdx-0.1.0/diff_hdx.egg-info/dependency_links.txt +1 -0
- diff_hdx-0.1.0/diff_hdx.egg-info/requires.txt +10 -0
- diff_hdx-0.1.0/diff_hdx.egg-info/top_level.txt +1 -0
- diff_hdx-0.1.0/pyproject.toml +66 -0
- diff_hdx-0.1.0/setup.cfg +4 -0
- diff_hdx-0.1.0/tests/test_kernels.py +196 -0
diff_hdx-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 George Elkins
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
diff_hdx-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diff-hdx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Differentiable HDX-MS prediction in JAX
|
|
5
|
+
Author: George Elkins
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/elkins/diff-hdx
|
|
8
|
+
Project-URL: Repository, https://github.com/elkins/diff-hdx
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: jax
|
|
22
|
+
Requires-Dist: jaxlib
|
|
23
|
+
Requires-Dist: numpy
|
|
24
|
+
Requires-Dist: biotite
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
29
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# π§ͺ diff-hdx: Differentiable HDX-MS Prediction in JAX
|
|
33
|
+
|
|
34
|
+
[](https://github.com/elkins/diff-hdx/actions/workflows/test.yml)
|
|
35
|
+
[](https://opensource.org/licenses/MIT)
|
|
36
|
+
[](https://github.com/google/jax)
|
|
37
|
+
|
|
38
|
+
**diff-hdx** is a high-performance Python library for differentiable Hydrogen-Deuterium Exchange (HDX-MS) prediction. Built on **JAX**, it provides auto-differentiable kernels to bridge structural ensembles and experimental protection factors.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## π― Features
|
|
43
|
+
|
|
44
|
+
- **Differentiable SASA Kernels:** Hardware-accelerated approximations of Solvent Accessible Surface Area using Gaussian occlusion models.
|
|
45
|
+
- **Protection Factor Modeling:** Implementations of LinderstrΓΈm-Lang models for H-exchange rates ($PF$).
|
|
46
|
+
- **Kinetic Simulation:** Model time-dependent mass shifts using **EX2 kinetics** (Hvidt & Nielsen, 1966).
|
|
47
|
+
- **Gradient-Based Refinement:** Optimize protein structures or ensembles directly against experimental HDX-MS time-curves.
|
|
48
|
+
- **Vectorized Execution:** Native support for `vmap` to handle large conformational ensembles.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## ποΈ Technical Architecture
|
|
53
|
+
|
|
54
|
+
- **Backend:** JAX (XLA-compiled) β supports CPU, GPU, and TPU.
|
|
55
|
+
- **Differentiability:** Full support for forward and reverse-mode autodiff.
|
|
56
|
+
- **Integration:** Compatible with `biotite` for structural parsing and `diff-biophys` for ensemble averaging.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## π Roadmap
|
|
61
|
+
|
|
62
|
+
- [x] Initial differentiable SASA and $ln P$ kernels.
|
|
63
|
+
- [x] Integration with JAX `vmap` for ensemble averaging.
|
|
64
|
+
- [x] Residue-specific intrinsic exchange rates (Bai et al. 1993) β all 20 amino acids.
|
|
65
|
+
- [ ] Integration with MD trajectory loaders.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## π Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install diff-hdx
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## π§ͺ Scientific Validation
|
|
76
|
+
|
|
77
|
+
- **Parity Checks:** Kernels are validated against standard non-differentiable implementations (e.g., `biotite` SASA) to ensure physical accuracy.
|
|
78
|
+
- **Gradient Tests:** All kernels are verified using JAX's `gradcheck` to ensure numerically stable derivatives across the full support.
|
|
79
|
+
- **Ensemble Consistency:** Verified against `diff-biophys` ensemble averaging for IDP conformational ensembles.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## π Related Projects
|
|
84
|
+
|
|
85
|
+
diff-hdx is part of the **differentiable biophysics** ecosystem:
|
|
86
|
+
|
|
87
|
+
- [diff-biophys](https://github.com/elkins/diff-biophys) β Core differentiable biophysics engine.
|
|
88
|
+
- [diff-fret](https://github.com/elkins/diff-fret) β Differentiable FRET modeling.
|
|
89
|
+
- [diff-epr](https://github.com/elkins/diff-epr) β Differentiable EPR/DEER simulation.
|
|
90
|
+
- [synth-pdb](https://github.com/elkins/synth-pdb) β Synthetic structure generation.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## π Citation
|
|
95
|
+
|
|
96
|
+
```bibtex
|
|
97
|
+
@software{diff_hdx,
|
|
98
|
+
author = {Elkins, George},
|
|
99
|
+
title = {diff-hdx: Differentiable HDX-MS prediction in JAX},
|
|
100
|
+
year = {2026},
|
|
101
|
+
url = {https://github.com/elkins/diff-hdx},
|
|
102
|
+
version = {0.1.0}
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## βοΈ License
|
|
107
|
+
|
|
108
|
+
MIT
|
diff_hdx-0.1.0/README.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# π§ͺ diff-hdx: Differentiable HDX-MS Prediction in JAX
|
|
2
|
+
|
|
3
|
+
[](https://github.com/elkins/diff-hdx/actions/workflows/test.yml)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
[](https://github.com/google/jax)
|
|
6
|
+
|
|
7
|
+
**diff-hdx** is a high-performance Python library for differentiable Hydrogen-Deuterium Exchange (HDX-MS) prediction. Built on **JAX**, it provides auto-differentiable kernels to bridge structural ensembles and experimental protection factors.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## π― Features
|
|
12
|
+
|
|
13
|
+
- **Differentiable SASA Kernels:** Hardware-accelerated approximations of Solvent Accessible Surface Area using Gaussian occlusion models.
|
|
14
|
+
- **Protection Factor Modeling:** Implementations of LinderstrΓΈm-Lang models for H-exchange rates ($PF$).
|
|
15
|
+
- **Kinetic Simulation:** Model time-dependent mass shifts using **EX2 kinetics** (Hvidt & Nielsen, 1966).
|
|
16
|
+
- **Gradient-Based Refinement:** Optimize protein structures or ensembles directly against experimental HDX-MS time-curves.
|
|
17
|
+
- **Vectorized Execution:** Native support for `vmap` to handle large conformational ensembles.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## ποΈ Technical Architecture
|
|
22
|
+
|
|
23
|
+
- **Backend:** JAX (XLA-compiled) β supports CPU, GPU, and TPU.
|
|
24
|
+
- **Differentiability:** Full support for forward and reverse-mode autodiff.
|
|
25
|
+
- **Integration:** Compatible with `biotite` for structural parsing and `diff-biophys` for ensemble averaging.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## π Roadmap
|
|
30
|
+
|
|
31
|
+
- [x] Initial differentiable SASA and $ln P$ kernels.
|
|
32
|
+
- [x] Integration with JAX `vmap` for ensemble averaging.
|
|
33
|
+
- [x] Residue-specific intrinsic exchange rates (Bai et al. 1993) β all 20 amino acids.
|
|
34
|
+
- [ ] Integration with MD trajectory loaders.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## π Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install diff-hdx
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## π§ͺ Scientific Validation
|
|
45
|
+
|
|
46
|
+
- **Parity Checks:** Kernels are validated against standard non-differentiable implementations (e.g., `biotite` SASA) to ensure physical accuracy.
|
|
47
|
+
- **Gradient Tests:** All kernels are verified using JAX's `gradcheck` to ensure numerically stable derivatives across the full support.
|
|
48
|
+
- **Ensemble Consistency:** Verified against `diff-biophys` ensemble averaging for IDP conformational ensembles.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## π Related Projects
|
|
53
|
+
|
|
54
|
+
diff-hdx is part of the **differentiable biophysics** ecosystem:
|
|
55
|
+
|
|
56
|
+
- [diff-biophys](https://github.com/elkins/diff-biophys) β Core differentiable biophysics engine.
|
|
57
|
+
- [diff-fret](https://github.com/elkins/diff-fret) β Differentiable FRET modeling.
|
|
58
|
+
- [diff-epr](https://github.com/elkins/diff-epr) β Differentiable EPR/DEER simulation.
|
|
59
|
+
- [synth-pdb](https://github.com/elkins/synth-pdb) β Synthetic structure generation.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## π Citation
|
|
64
|
+
|
|
65
|
+
```bibtex
|
|
66
|
+
@software{diff_hdx,
|
|
67
|
+
author = {Elkins, George},
|
|
68
|
+
title = {diff-hdx: Differentiable HDX-MS prediction in JAX},
|
|
69
|
+
year = {2026},
|
|
70
|
+
url = {https://github.com/elkins/diff-hdx},
|
|
71
|
+
version = {0.1.0}
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## βοΈ License
|
|
76
|
+
|
|
77
|
+
MIT
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import jax
|
|
2
|
+
import jax.numpy as jnp
|
|
3
|
+
from jax import Array
|
|
4
|
+
|
|
5
|
+
# ---------------------------------------------------------------------------
|
|
6
|
+
# Bai et al. (1993) intrinsic-rate correction table.
|
|
7
|
+
# Stored as a fixed-order amino-acid array for vectorised (JIT-compatible) lookup.
|
|
8
|
+
# Columns: [al, ar, bl, br] (log10 corrections for acid-left, acid-right,
|
|
9
|
+
# base-left, base-right catalysis)
|
|
10
|
+
# Order matches _AA_ORDER below.
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
_AA_ORDER = "ARNDCQEGHILKMFPSTWYV"
|
|
13
|
+
_AA_IDX: dict[str, int] = {aa: i for i, aa in enumerate(_AA_ORDER)}
|
|
14
|
+
_ALA_IDX: int = _AA_IDX["A"]
|
|
15
|
+
|
|
16
|
+
# Shape (20, 4): rows are amino acids, columns are [al, ar, bl, br]
|
|
17
|
+
_CORRECTIONS = [
|
|
18
|
+
# al ar bl br
|
|
19
|
+
[0.00, 0.00, 0.00, 0.00], # A
|
|
20
|
+
[-0.59, -0.32, 0.08, 0.22], # R
|
|
21
|
+
[-0.58, -0.13, 0.49, 0.32], # N
|
|
22
|
+
[-0.90, -0.12, 0.69, 0.60], # D (COOH state)
|
|
23
|
+
[-0.54, -0.46, 0.62, 0.55], # C
|
|
24
|
+
[-0.47, -0.27, 0.06, 0.20], # Q
|
|
25
|
+
[-0.60, -0.27, 0.24, 0.39], # E (COOH state)
|
|
26
|
+
[-0.22, 0.22, -0.03, 0.17], # G
|
|
27
|
+
[-0.10, 0.14, 0.00, 0.00], # H
|
|
28
|
+
[-0.91, -0.59, -0.73, -0.23], # I
|
|
29
|
+
[-0.57, -0.13, -0.58, -0.21], # L
|
|
30
|
+
[-0.56, -0.29, -0.04, 0.12], # K
|
|
31
|
+
[-0.64, -0.28, -0.01, 0.11], # M
|
|
32
|
+
[-0.52, -0.43, -0.24, 0.06], # F
|
|
33
|
+
[-0.19, -0.24, 0.00, 0.00], # P
|
|
34
|
+
[-0.44, -0.39, 0.37, 0.30], # S
|
|
35
|
+
[-0.79, -0.47, -0.07, 0.20], # T
|
|
36
|
+
[-0.40, -0.44, -0.41, -0.11], # W
|
|
37
|
+
[-0.41, -0.37, -0.27, 0.05], # Y
|
|
38
|
+
[-0.74, -0.30, -0.70, -0.14], # V
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def intrinsic_rates(
|
|
43
|
+
sequence: str,
|
|
44
|
+
ph: float = 7.0,
|
|
45
|
+
temperature: float = 293.15,
|
|
46
|
+
) -> Array:
|
|
47
|
+
"""
|
|
48
|
+
Compute intrinsic exchange rates (k_int) using the Bai et al. (1993) model.
|
|
49
|
+
Includes full side-chain correction factors for all 20 standard amino acids.
|
|
50
|
+
|
|
51
|
+
Per Bai et al. (1993) the correction for residue *i* uses:
|
|
52
|
+
- the **left** neighbour (residue i-1) via the "al" / "bl" factors, and
|
|
53
|
+
- the **right** neighbour (residue i+1) via the "ar" / "br" factors.
|
|
54
|
+
Boundary residues (N-terminus, C-terminus) use Ala as a placeholder.
|
|
55
|
+
|
|
56
|
+
This implementation is fully vectorised and compatible with JAX JIT.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
sequence: Protein sequence string (one-letter amino-acid codes).
|
|
60
|
+
ph: pH value.
|
|
61
|
+
temperature: Temperature in Kelvin.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
k_int array of shape (N,), rates in minβ»ΒΉ.
|
|
65
|
+
"""
|
|
66
|
+
n = len(sequence) # noqa: F841 -- kept for readability; not used in vectorised ops
|
|
67
|
+
|
|
68
|
+
# Encode sequence as integer indices (unknown residues β Ala)
|
|
69
|
+
seq_idx = [_AA_IDX.get(aa, _ALA_IDX) for aa in sequence]
|
|
70
|
+
|
|
71
|
+
# Left-neighbour indices: residue i-1; N-terminal boundary β Ala
|
|
72
|
+
left_idx = [_ALA_IDX] + seq_idx[:-1]
|
|
73
|
+
# Right-neighbour indices: residue i+1; C-terminal boundary β Ala
|
|
74
|
+
right_idx = seq_idx[1:] + [_ALA_IDX]
|
|
75
|
+
|
|
76
|
+
# Look up correction arrays β pure Python lists, converted to JAX once
|
|
77
|
+
corr = jnp.array(_CORRECTIONS) # (20, 4)
|
|
78
|
+
left_corr = corr[jnp.array(left_idx)] # (N, 4)
|
|
79
|
+
right_corr = corr[jnp.array(right_idx)] # (N, 4)
|
|
80
|
+
|
|
81
|
+
# Reference rates for NH in HβO at 20 Β°C (293.15 K)
|
|
82
|
+
k_a_ref = 10.0**1.39
|
|
83
|
+
k_b_ref = 10.0**10.08
|
|
84
|
+
k_w_ref = 10.0**-1.50 # estimated
|
|
85
|
+
|
|
86
|
+
# [HβΊ] and [OHβ»]; pKw at 20 Β°C β 14.17
|
|
87
|
+
h_plus = 10.0 ** (-ph)
|
|
88
|
+
oh_minus = 10.0 ** (ph - 14.17)
|
|
89
|
+
|
|
90
|
+
# Arrhenius temperature corrections (activation energies in kcal/mol)
|
|
91
|
+
e_a, e_b, e_w = 14.0, 17.0, 19.0
|
|
92
|
+
r_gas = 1.987e-3 # kcal / (molΒ·K)
|
|
93
|
+
|
|
94
|
+
def temp_corr(k_ref: float, e_act: float) -> jnp.ndarray:
|
|
95
|
+
return k_ref * jnp.exp(-e_act / r_gas * (1.0 / temperature - 1.0 / 293.15)) # type: ignore[no-any-return]
|
|
96
|
+
|
|
97
|
+
ka_ref_t = temp_corr(k_a_ref, e_a)
|
|
98
|
+
kb_ref_t = temp_corr(k_b_ref, e_b)
|
|
99
|
+
kw_ref_t = temp_corr(k_w_ref, e_w)
|
|
100
|
+
|
|
101
|
+
# Log-additive corrections β vectorised over all residues simultaneously
|
|
102
|
+
# Columns: [al=0, ar=1, bl=2, br=3]
|
|
103
|
+
ka = ka_ref_t * 10.0 ** (left_corr[:, 0] + right_corr[:, 1]) # al + ar
|
|
104
|
+
kb = kb_ref_t * 10.0 ** (left_corr[:, 2] + right_corr[:, 3]) # bl + br
|
|
105
|
+
kw = kw_ref_t * 10.0 ** (left_corr[:, 2] + right_corr[:, 3]) # same as kb
|
|
106
|
+
|
|
107
|
+
return jnp.asarray(ka * h_plus + kb * oh_minus + kw) # explicit Array, satisfies mypy
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def sasa_approx(
|
|
111
|
+
coords: jnp.ndarray,
|
|
112
|
+
probe_radius: float = 1.4,
|
|
113
|
+
sigma: float = 2.0,
|
|
114
|
+
) -> jnp.ndarray:
|
|
115
|
+
"""
|
|
116
|
+
Differentiable approximation of Solvent Accessible Surface Area (SASA).
|
|
117
|
+
Uses a Gaussian occlusion model.
|
|
118
|
+
|
|
119
|
+
The probe radius is incorporated as an additive contribution to the
|
|
120
|
+
effective Gaussian width (effective_sigma = sigma + probe_radius), so
|
|
121
|
+
a larger probe widens the occlusion shell around each atom, reducing the
|
|
122
|
+
accessible surface β consistent with standard SASA intuition.
|
|
123
|
+
|
|
124
|
+
Note: this is a differentiable *surrogate*, not a true ShrakeβRupley SASA.
|
|
125
|
+
It lacks per-atom van-der-Waals radii and returns dimensionless values in
|
|
126
|
+
(0, 1]. It is suitable as a smooth proxy for gradient-based refinement.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
coords: (N, 3) atomic coordinates in Angstroms.
|
|
130
|
+
probe_radius: Radius of the solvent probe in Angstroms (default 1.4 Γ
).
|
|
131
|
+
sigma: Base Gaussian width for the occlusion kernel in Angstroms.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Approximate accessibility values (N,) in (0, 1]; 1 = fully exposed.
|
|
135
|
+
"""
|
|
136
|
+
# Effective width combines atom-atom smoothing and the probe size
|
|
137
|
+
effective_sigma = sigma + probe_radius
|
|
138
|
+
|
|
139
|
+
# Pairwise squared distances
|
|
140
|
+
diff = coords[:, None, :] - coords[None, :, :]
|
|
141
|
+
dist_sq = jnp.sum(diff**2, axis=-1)
|
|
142
|
+
|
|
143
|
+
# Occlusion kernel: nearby atoms reduce accessibility.
|
|
144
|
+
# Subtract the self-contribution (exp(0) = 1) from each row.
|
|
145
|
+
occlusion = jnp.sum(jnp.exp(-dist_sq / (2 * effective_sigma**2)), axis=-1) - 1.0
|
|
146
|
+
accessibility = 1.0 / (1.0 + occlusion)
|
|
147
|
+
|
|
148
|
+
return accessibility
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def h_bond_energy(
|
|
152
|
+
donor_coords: jnp.ndarray,
|
|
153
|
+
acceptor_coords: jnp.ndarray,
|
|
154
|
+
cutoff: float = 3.5,
|
|
155
|
+
sigma: float = 0.5,
|
|
156
|
+
) -> jnp.ndarray:
|
|
157
|
+
"""
|
|
158
|
+
Compute a differentiable approximation of H-bond energy/count.
|
|
159
|
+
Uses a sigmoid-like distance cutoff.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
donor_coords: (N, 3) coordinates of donors.
|
|
163
|
+
acceptor_coords: (M, 3) coordinates of acceptors.
|
|
164
|
+
cutoff: Distance cutoff in Angstroms.
|
|
165
|
+
sigma: Smoothing parameter for the transition.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Approximate H-bond energy/count for each donor (N,).
|
|
169
|
+
"""
|
|
170
|
+
# Compute pairwise distances (N, M)
|
|
171
|
+
diff = donor_coords[:, None, :] - acceptor_coords[None, :, :]
|
|
172
|
+
dist_sq = jnp.sum(diff**2, axis=-1)
|
|
173
|
+
# Safe distance for gradients
|
|
174
|
+
dist = jnp.sqrt(jnp.where(dist_sq > 0, dist_sq, 1.0))
|
|
175
|
+
dist = jnp.where(dist_sq > 0, dist, 0.0)
|
|
176
|
+
|
|
177
|
+
# Soft-cutoff: 1 / (1 + exp((r - r_cutoff) / sigma))
|
|
178
|
+
# Sum over all potential acceptors for each donor
|
|
179
|
+
hb_counts = jnp.sum(jax.nn.sigmoid((cutoff - dist) / sigma), axis=-1)
|
|
180
|
+
return hb_counts
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def protection_factors(
|
|
184
|
+
coords: jnp.ndarray,
|
|
185
|
+
h_bond_energies: jnp.ndarray,
|
|
186
|
+
beta_c: float = 1.0,
|
|
187
|
+
beta_asa: float = 1.0,
|
|
188
|
+
probe_radius: float = 1.4,
|
|
189
|
+
) -> jnp.ndarray:
|
|
190
|
+
"""
|
|
191
|
+
Compute HDX protection factors (PF).
|
|
192
|
+
PF = k_int / k_obs
|
|
193
|
+
|
|
194
|
+
Uses the LinderstrΓΈm-Lang model with separate scaling coefficients for
|
|
195
|
+
H-bond and burial contributions:
|
|
196
|
+
|
|
197
|
+
ln(PF) = beta_c * N_HB + beta_asa * (1 β SASA)
|
|
198
|
+
|
|
199
|
+
Both coefficients default to 1.0, matching the original single-beta
|
|
200
|
+
formulation for backward compatibility. When fitting against experimental
|
|
201
|
+
protection factors, beta_c and beta_asa should be treated as independent
|
|
202
|
+
free parameters.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
coords: (N, 3) coordinates.
|
|
206
|
+
h_bond_energies: (N,) hydrogen bond energies (or counts).
|
|
207
|
+
beta_c: Scaling coefficient for the H-bond contribution.
|
|
208
|
+
beta_asa: Scaling coefficient for the burial (1 β SASA) contribution.
|
|
209
|
+
probe_radius: Solvent probe radius passed to sasa_approx (Γ
).
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
PF (N,) protection factors.
|
|
213
|
+
"""
|
|
214
|
+
sasa = sasa_approx(coords, probe_radius=probe_radius)
|
|
215
|
+
# ln PF = beta_asa*(1 β SASA) + beta_c*N_HB
|
|
216
|
+
ln_pf = beta_asa * (1.0 - sasa) + beta_c * h_bond_energies
|
|
217
|
+
return jnp.exp(ln_pf)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def deuterium_uptake(
|
|
221
|
+
pf: jnp.ndarray,
|
|
222
|
+
k_int: jnp.ndarray,
|
|
223
|
+
time: float,
|
|
224
|
+
) -> jnp.ndarray:
|
|
225
|
+
"""
|
|
226
|
+
Compute time-dependent deuterium uptake using EX2 kinetics.
|
|
227
|
+
D(t) = 1 - exp(-k_obs * t)
|
|
228
|
+
where k_obs = k_int / PF (Hvidt & Nielsen, 1966).
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
pf: (N,) protection factors.
|
|
232
|
+
k_int: (N,) intrinsic exchange rates.
|
|
233
|
+
time: Exposure time in minutes.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
D(t) (N,) fractional deuterium uptake.
|
|
237
|
+
"""
|
|
238
|
+
k_obs = k_int / pf
|
|
239
|
+
return 1.0 - jnp.exp(-k_obs * time)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diff-hdx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Differentiable HDX-MS prediction in JAX
|
|
5
|
+
Author: George Elkins
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/elkins/diff-hdx
|
|
8
|
+
Project-URL: Repository, https://github.com/elkins/diff-hdx
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: jax
|
|
22
|
+
Requires-Dist: jaxlib
|
|
23
|
+
Requires-Dist: numpy
|
|
24
|
+
Requires-Dist: biotite
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
29
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# π§ͺ diff-hdx: Differentiable HDX-MS Prediction in JAX
|
|
33
|
+
|
|
34
|
+
[](https://github.com/elkins/diff-hdx/actions/workflows/test.yml)
|
|
35
|
+
[](https://opensource.org/licenses/MIT)
|
|
36
|
+
[](https://github.com/google/jax)
|
|
37
|
+
|
|
38
|
+
**diff-hdx** is a high-performance Python library for differentiable Hydrogen-Deuterium Exchange (HDX-MS) prediction. Built on **JAX**, it provides auto-differentiable kernels to bridge structural ensembles and experimental protection factors.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## π― Features
|
|
43
|
+
|
|
44
|
+
- **Differentiable SASA Kernels:** Hardware-accelerated approximations of Solvent Accessible Surface Area using Gaussian occlusion models.
|
|
45
|
+
- **Protection Factor Modeling:** Implementations of LinderstrΓΈm-Lang models for H-exchange rates ($PF$).
|
|
46
|
+
- **Kinetic Simulation:** Model time-dependent mass shifts using **EX2 kinetics** (Hvidt & Nielsen, 1966).
|
|
47
|
+
- **Gradient-Based Refinement:** Optimize protein structures or ensembles directly against experimental HDX-MS time-curves.
|
|
48
|
+
- **Vectorized Execution:** Native support for `vmap` to handle large conformational ensembles.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## ποΈ Technical Architecture
|
|
53
|
+
|
|
54
|
+
- **Backend:** JAX (XLA-compiled) β supports CPU, GPU, and TPU.
|
|
55
|
+
- **Differentiability:** Full support for forward and reverse-mode autodiff.
|
|
56
|
+
- **Integration:** Compatible with `biotite` for structural parsing and `diff-biophys` for ensemble averaging.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## π Roadmap
|
|
61
|
+
|
|
62
|
+
- [x] Initial differentiable SASA and $ln P$ kernels.
|
|
63
|
+
- [x] Integration with JAX `vmap` for ensemble averaging.
|
|
64
|
+
- [x] Residue-specific intrinsic exchange rates (Bai et al. 1993) β all 20 amino acids.
|
|
65
|
+
- [ ] Integration with MD trajectory loaders.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## π Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install diff-hdx
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## π§ͺ Scientific Validation
|
|
76
|
+
|
|
77
|
+
- **Parity Checks:** Kernels are validated against standard non-differentiable implementations (e.g., `biotite` SASA) to ensure physical accuracy.
|
|
78
|
+
- **Gradient Tests:** All kernels are verified using JAX's `gradcheck` to ensure numerically stable derivatives across the full support.
|
|
79
|
+
- **Ensemble Consistency:** Verified against `diff-biophys` ensemble averaging for IDP conformational ensembles.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## π Related Projects
|
|
84
|
+
|
|
85
|
+
diff-hdx is part of the **differentiable biophysics** ecosystem:
|
|
86
|
+
|
|
87
|
+
- [diff-biophys](https://github.com/elkins/diff-biophys) β Core differentiable biophysics engine.
|
|
88
|
+
- [diff-fret](https://github.com/elkins/diff-fret) β Differentiable FRET modeling.
|
|
89
|
+
- [diff-epr](https://github.com/elkins/diff-epr) β Differentiable EPR/DEER simulation.
|
|
90
|
+
- [synth-pdb](https://github.com/elkins/synth-pdb) β Synthetic structure generation.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## π Citation
|
|
95
|
+
|
|
96
|
+
```bibtex
|
|
97
|
+
@software{diff_hdx,
|
|
98
|
+
author = {Elkins, George},
|
|
99
|
+
title = {diff-hdx: Differentiable HDX-MS prediction in JAX},
|
|
100
|
+
year = {2026},
|
|
101
|
+
url = {https://github.com/elkins/diff-hdx},
|
|
102
|
+
version = {0.1.0}
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## βοΈ License
|
|
107
|
+
|
|
108
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
diff_hdx/__init__.py
|
|
5
|
+
diff_hdx/kernels.py
|
|
6
|
+
diff_hdx.egg-info/PKG-INFO
|
|
7
|
+
diff_hdx.egg-info/SOURCES.txt
|
|
8
|
+
diff_hdx.egg-info/dependency_links.txt
|
|
9
|
+
diff_hdx.egg-info/requires.txt
|
|
10
|
+
diff_hdx.egg-info/top_level.txt
|
|
11
|
+
tests/test_kernels.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
diff_hdx
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "diff-hdx"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Differentiable HDX-MS prediction in JAX"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "George Elkins"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Science/Research",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Physics",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"jax",
|
|
26
|
+
"jaxlib",
|
|
27
|
+
"numpy",
|
|
28
|
+
"biotite",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/elkins/diff-hdx"
|
|
33
|
+
Repository = "https://github.com/elkins/diff-hdx"
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=7.0.0",
|
|
38
|
+
"pytest-cov>=4.0.0",
|
|
39
|
+
"ruff>=0.6.0",
|
|
40
|
+
"mypy>=1.8.0",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
include = ["diff_hdx*"]
|
|
45
|
+
|
|
46
|
+
[tool.pytest.ini_options]
|
|
47
|
+
testpaths = ["tests"]
|
|
48
|
+
pythonpath = ["."]
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
line-length = 100
|
|
52
|
+
target-version = "py310"
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = ["E", "F", "I", "N", "UP", "B"]
|
|
56
|
+
ignore = ["E501", "N806", "N803", "N802"]
|
|
57
|
+
|
|
58
|
+
[tool.mypy]
|
|
59
|
+
python_version = "3.10"
|
|
60
|
+
warn_return_any = true
|
|
61
|
+
warn_unused_configs = true
|
|
62
|
+
disallow_untyped_defs = true
|
|
63
|
+
|
|
64
|
+
[[tool.mypy.overrides]]
|
|
65
|
+
module = ["jax.*", "jaxlib.*", "numpy.*"]
|
|
66
|
+
ignore_missing_imports = true
|
diff_hdx-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import jax
|
|
2
|
+
import jax.numpy as jnp
|
|
3
|
+
|
|
4
|
+
from diff_hdx.kernels import (
|
|
5
|
+
deuterium_uptake,
|
|
6
|
+
h_bond_energy,
|
|
7
|
+
intrinsic_rates,
|
|
8
|
+
protection_factors,
|
|
9
|
+
sasa_approx,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_hdx_basic() -> None:
|
|
14
|
+
coords = jnp.array([[0.0, 0.0, 0.0], [1.5, 0.0, 0.0], [3.0, 0.0, 0.0]])
|
|
15
|
+
sasa = sasa_approx(coords)
|
|
16
|
+
assert sasa.shape == (3,)
|
|
17
|
+
assert jnp.all(sasa > 0)
|
|
18
|
+
|
|
19
|
+
h_bond_energies = jnp.array([1.0, 2.0, 1.0])
|
|
20
|
+
ln_p = protection_factors(coords, h_bond_energies)
|
|
21
|
+
assert ln_p.shape == (3,)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_h_bond_energy() -> None:
|
|
25
|
+
donors = jnp.array([[0.0, 0.0, 0.0]])
|
|
26
|
+
acceptors = jnp.array([[2.0, 0.0, 0.0]]) # Within 3.5 cutoff
|
|
27
|
+
|
|
28
|
+
count = h_bond_energy(donors, acceptors)
|
|
29
|
+
assert count[0] > 0.5
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_hdx_differentiable() -> None:
|
|
33
|
+
coords = jnp.array([[0.0, 0.0, 0.0], [1.5, 0.0, 0.0]])
|
|
34
|
+
h_bonds = jnp.array([1.0, 1.0])
|
|
35
|
+
|
|
36
|
+
def loss(x: jnp.ndarray) -> jnp.ndarray:
|
|
37
|
+
return jnp.sum(protection_factors(x, h_bonds))
|
|
38
|
+
|
|
39
|
+
grads = jax.grad(loss)(coords)
|
|
40
|
+
assert grads.shape == coords.shape
|
|
41
|
+
assert not jnp.any(jnp.isnan(grads))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_intrinsic_rates_bai_parity() -> None:
|
|
45
|
+
"""
|
|
46
|
+
Verify k_int against Bai et al. (1993) Ala-Ala reference at pH 7, 20C.
|
|
47
|
+
"""
|
|
48
|
+
# implementation uses ka=10^1.39, kb=10^10.08, kw=10^-1.5
|
|
49
|
+
rates = intrinsic_rates("AAAAA", ph=7.0, temperature=293.15)
|
|
50
|
+
assert jnp.allclose(rates[0], 812.8622, rtol=1e-3)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_intrinsic_rates_neighbor_sensitivity() -> None:
|
|
54
|
+
"""
|
|
55
|
+
Verify that right-neighbour corrections are applied to residue i+1,
|
|
56
|
+
not to residue i itself (Bai et al. 1993, Table 2).
|
|
57
|
+
|
|
58
|
+
An Ile (I) right-neighbour has ar=-0.59 (strong acid retardation) and
|
|
59
|
+
br=-0.23, which must reduce k_int at pH 7 relative to poly-Ala.
|
|
60
|
+
"""
|
|
61
|
+
rates_ala = intrinsic_rates("AAAA", ph=7.0, temperature=293.15)
|
|
62
|
+
rates_ile = intrinsic_rates("AAIA", ph=7.0, temperature=293.15)
|
|
63
|
+
|
|
64
|
+
# Residue 1 (A) has Ile as its right neighbour in "AAIA" β ar=-0.59 retards ka.
|
|
65
|
+
# Residue 1 in "AAAA" has Ala as its right neighbour β ar=0.00.
|
|
66
|
+
# So k_int[1] should be lower in the Ile sequence.
|
|
67
|
+
assert rates_ile[1] < rates_ala[1], "Right-neighbour Ile should reduce k_int relative to Ala"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_deuterium_uptake_parity() -> None:
|
|
71
|
+
"""
|
|
72
|
+
Verify deuterium uptake kinetics (D(t) = 1 - exp(-k_obs * t)).
|
|
73
|
+
"""
|
|
74
|
+
k_int = jnp.array([10.0])
|
|
75
|
+
pf = jnp.array([2.0])
|
|
76
|
+
# k_obs = 10 / 2 = 5.0
|
|
77
|
+
# D(0.1) = 1 - exp(-5.0 * 0.1) = 1 - exp(-0.5) = 1 - 0.6065 = 0.3935
|
|
78
|
+
|
|
79
|
+
d_t = deuterium_uptake(pf, k_int, time=0.1)
|
|
80
|
+
assert jnp.allclose(d_t[0], 0.3935, atol=1e-3)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_sasa_probe_radius_effect() -> None:
|
|
84
|
+
"""
|
|
85
|
+
Verify that probe_radius actually affects sasa_approx output.
|
|
86
|
+
|
|
87
|
+
The old bug: probe_radius was accepted as a parameter but silently
|
|
88
|
+
ignored (sigma was used alone, not sigma + probe_radius). So the
|
|
89
|
+
SASA value was identical regardless of probe_radius.
|
|
90
|
+
"""
|
|
91
|
+
coords = jnp.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0], [6.0, 0.0, 0.0]])
|
|
92
|
+
|
|
93
|
+
sasa_small_probe = sasa_approx(coords, probe_radius=0.0)
|
|
94
|
+
sasa_large_probe = sasa_approx(coords, probe_radius=3.0)
|
|
95
|
+
|
|
96
|
+
# A larger probe radius widens the occlusion shell, so each atom
|
|
97
|
+
# appears less accessible.
|
|
98
|
+
assert jnp.all(sasa_large_probe <= sasa_small_probe), (
|
|
99
|
+
"Larger probe_radius must reduce (or equal) SASA accessibility"
|
|
100
|
+
)
|
|
101
|
+
# They must not be identical (the bug manifested as exactly equal values)
|
|
102
|
+
assert not jnp.allclose(sasa_small_probe, sasa_large_probe), (
|
|
103
|
+
"probe_radius=0 and probe_radius=3.0 must produce different SASA values "
|
|
104
|
+
"(old code silently ignored probe_radius)"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_intrinsic_rates_left_neighbor_sensitivity() -> None:
|
|
109
|
+
"""
|
|
110
|
+
Verify that left-neighbour corrections (al, bl) affect residue i via
|
|
111
|
+
residue i-1, not via residue i itself.
|
|
112
|
+
|
|
113
|
+
Val (V) has al=-0.74, which strongly retards acid catalysis when it is
|
|
114
|
+
the LEFT neighbour of the target residue.
|
|
115
|
+
"""
|
|
116
|
+
# In "AVAA", residue 2 (A at index 2) has Val as its LEFT neighbour (index 1)
|
|
117
|
+
# In "AAAA", residue 2 (A at index 2) has Ala as its left neighbour
|
|
118
|
+
rates_ala = intrinsic_rates("AAAA", ph=4.0, temperature=293.15) # acid pH
|
|
119
|
+
rates_val = intrinsic_rates("AVAA", ph=4.0, temperature=293.15)
|
|
120
|
+
|
|
121
|
+
# At pH 4.0 acid catalysis dominates; al=-0.74 for Val retards ka of residue 2
|
|
122
|
+
assert rates_val[2] < rates_ala[2], (
|
|
123
|
+
"Val left-neighbour (al=-0.74) should reduce k_int of residue 2 at acid pH"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_intrinsic_rates_c_terminal_boundary() -> None:
|
|
128
|
+
"""
|
|
129
|
+
The C-terminal residue has no right neighbour; Ala placeholder must be used.
|
|
130
|
+
Verify the last residue's rate equals that of an internal Ala-Ala-Ala context.
|
|
131
|
+
"""
|
|
132
|
+
# Last residue of "AAA" has right boundary β Ala
|
|
133
|
+
# Second residue of "AAAA" has right neighbour = Ala
|
|
134
|
+
rates_3 = intrinsic_rates("AAA", ph=7.0, temperature=293.15)
|
|
135
|
+
rates_4 = intrinsic_rates("AAAA", ph=7.0, temperature=293.15)
|
|
136
|
+
|
|
137
|
+
# For poly-Ala all corrections are 0, so all rates must be equal
|
|
138
|
+
assert jnp.allclose(rates_3[-1], rates_4[-2], rtol=1e-5), (
|
|
139
|
+
"C-terminal boundary condition must give same rate as internal Ala-Ala-Ala"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_intrinsic_rates_jit_compatible() -> None:
|
|
144
|
+
"""
|
|
145
|
+
intrinsic_rates must be compilable via jax.jit.
|
|
146
|
+
|
|
147
|
+
The old bug: the Python for-loop in intrinsic_rates caused JAX tracing
|
|
148
|
+
to unroll the loop symbolically for each residue, which works for small
|
|
149
|
+
sequences but is not JIT-compatible in general (and is very slow to compile
|
|
150
|
+
for long sequences). The vectorised rewrite is properly JIT-able.
|
|
151
|
+
"""
|
|
152
|
+
import jax
|
|
153
|
+
|
|
154
|
+
jit_rates = jax.jit(intrinsic_rates, static_argnums=(0,))
|
|
155
|
+
# Should compile and run without error
|
|
156
|
+
rates = jit_rates("ACDEFGHIKLMNPQRSTVWY", ph=7.0, temperature=293.15)
|
|
157
|
+
assert rates.shape == (20,)
|
|
158
|
+
assert jnp.all(rates > 0)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def test_intrinsic_rates_temperature_dependence() -> None:
|
|
162
|
+
"""
|
|
163
|
+
Rates must increase with temperature (Arrhenius activation energies > 0).
|
|
164
|
+
At pH 7 where base catalysis dominates (E_b = 17 kcal/mol), raising
|
|
165
|
+
temperature from 293 K to 310 K must increase k_int.
|
|
166
|
+
"""
|
|
167
|
+
rates_cold = intrinsic_rates("AAAA", ph=7.0, temperature=293.15)
|
|
168
|
+
rates_warm = intrinsic_rates("AAAA", ph=7.0, temperature=310.0)
|
|
169
|
+
assert jnp.all(rates_warm > rates_cold), (
|
|
170
|
+
"k_int must increase with temperature (Arrhenius behaviour)"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_h_bond_energy_far_acceptor() -> None:
|
|
175
|
+
"""
|
|
176
|
+
An acceptor far outside the cutoff should contribute near-zero H-bond energy.
|
|
177
|
+
An acceptor close to the donor should contribute near-unity count.
|
|
178
|
+
Verify the sigmoid correctly attenuates at large distances.
|
|
179
|
+
|
|
180
|
+
h_bond_energy uses a sigmoid with sigma=0.5 A and cutoff=3.5 A.
|
|
181
|
+
At r=1.5 A: sigmoid((3.5-1.5)/0.5) = sigmoid(4) = 0.982 > 0.9
|
|
182
|
+
At r=20 A: sigmoid((3.5-20)/0.5) = sigmoid(-33) β 0
|
|
183
|
+
"""
|
|
184
|
+
donors = jnp.array([[0.0, 0.0, 0.0]])
|
|
185
|
+
near = jnp.array([[1.5, 0.0, 0.0]]) # 1.5 A, well within 3.5 cutoff
|
|
186
|
+
far = jnp.array([[20.0, 0.0, 0.0]]) # 20 A >> cutoff
|
|
187
|
+
|
|
188
|
+
count_near = h_bond_energy(donors, near)
|
|
189
|
+
count_far = h_bond_energy(donors, far)
|
|
190
|
+
|
|
191
|
+
assert count_near[0] > 0.9, (
|
|
192
|
+
f"H-bond count must be ~1 for acceptor well within cutoff, got {count_near[0]:.4f}"
|
|
193
|
+
)
|
|
194
|
+
assert count_far[0] < 0.01, (
|
|
195
|
+
f"H-bond count must be ~0 for acceptor far outside cutoff, got {count_far[0]:.6f}"
|
|
196
|
+
)
|