diff-hdx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diff_hdx/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ from .kernels import (
2
+ deuterium_uptake,
3
+ h_bond_energy,
4
+ intrinsic_rates,
5
+ protection_factors,
6
+ sasa_approx,
7
+ )
8
+
9
+ __all__ = [
10
+ "deuterium_uptake",
11
+ "h_bond_energy",
12
+ "intrinsic_rates",
13
+ "protection_factors",
14
+ "sasa_approx",
15
+ ]
diff_hdx/kernels.py ADDED
@@ -0,0 +1,239 @@
1
+ import jax
2
+ import jax.numpy as jnp
3
+ from jax import Array
4
+
5
+ # ---------------------------------------------------------------------------
6
+ # Bai et al. (1993) intrinsic-rate correction table.
7
+ # Stored as a fixed-order amino-acid array for vectorised (JIT-compatible) lookup.
8
+ # Columns: [al, ar, bl, br] (log10 corrections for acid-left, acid-right,
9
+ # base-left, base-right catalysis)
10
+ # Order matches _AA_ORDER below.
11
+ # ---------------------------------------------------------------------------
12
+ _AA_ORDER = "ARNDCQEGHILKMFPSTWYV"
13
+ _AA_IDX: dict[str, int] = {aa: i for i, aa in enumerate(_AA_ORDER)}
14
+ _ALA_IDX: int = _AA_IDX["A"]
15
+
16
+ # Shape (20, 4): rows are amino acids, columns are [al, ar, bl, br]
17
+ _CORRECTIONS = [
18
+ # al ar bl br
19
+ [0.00, 0.00, 0.00, 0.00], # A
20
+ [-0.59, -0.32, 0.08, 0.22], # R
21
+ [-0.58, -0.13, 0.49, 0.32], # N
22
+ [-0.90, -0.12, 0.69, 0.60], # D (COOH state)
23
+ [-0.54, -0.46, 0.62, 0.55], # C
24
+ [-0.47, -0.27, 0.06, 0.20], # Q
25
+ [-0.60, -0.27, 0.24, 0.39], # E (COOH state)
26
+ [-0.22, 0.22, -0.03, 0.17], # G
27
+ [-0.10, 0.14, 0.00, 0.00], # H
28
+ [-0.91, -0.59, -0.73, -0.23], # I
29
+ [-0.57, -0.13, -0.58, -0.21], # L
30
+ [-0.56, -0.29, -0.04, 0.12], # K
31
+ [-0.64, -0.28, -0.01, 0.11], # M
32
+ [-0.52, -0.43, -0.24, 0.06], # F
33
+ [-0.19, -0.24, 0.00, 0.00], # P
34
+ [-0.44, -0.39, 0.37, 0.30], # S
35
+ [-0.79, -0.47, -0.07, 0.20], # T
36
+ [-0.40, -0.44, -0.41, -0.11], # W
37
+ [-0.41, -0.37, -0.27, 0.05], # Y
38
+ [-0.74, -0.30, -0.70, -0.14], # V
39
+ ]
40
+
41
+
42
+ def intrinsic_rates(
43
+ sequence: str,
44
+ ph: float = 7.0,
45
+ temperature: float = 293.15,
46
+ ) -> Array:
47
+ """
48
+ Compute intrinsic exchange rates (k_int) using the Bai et al. (1993) model.
49
+ Includes full side-chain correction factors for all 20 standard amino acids.
50
+
51
+ Per Bai et al. (1993) the correction for residue *i* uses:
52
+ - the **left** neighbour (residue i-1) via the "al" / "bl" factors, and
53
+ - the **right** neighbour (residue i+1) via the "ar" / "br" factors.
54
+ Boundary residues (N-terminus, C-terminus) use Ala as a placeholder.
55
+
56
+ This implementation is fully vectorised and compatible with JAX JIT.
57
+
58
+ Args:
59
+ sequence: Protein sequence string (one-letter amino-acid codes).
60
+ ph: pH value.
61
+ temperature: Temperature in Kelvin.
62
+
63
+ Returns:
64
+ k_int array of shape (N,), rates in min⁻¹.
65
+ """
66
+ n = len(sequence) # noqa: F841 -- kept for readability; not used in vectorised ops
67
+
68
+ # Encode sequence as integer indices (unknown residues → Ala)
69
+ seq_idx = [_AA_IDX.get(aa, _ALA_IDX) for aa in sequence]
70
+
71
+ # Left-neighbour indices: residue i-1; N-terminal boundary → Ala
72
+ left_idx = [_ALA_IDX] + seq_idx[:-1]
73
+ # Right-neighbour indices: residue i+1; C-terminal boundary → Ala
74
+ right_idx = seq_idx[1:] + [_ALA_IDX]
75
+
76
+ # Look up correction arrays — pure Python lists, converted to JAX once
77
+ corr = jnp.array(_CORRECTIONS) # (20, 4)
78
+ left_corr = corr[jnp.array(left_idx)] # (N, 4)
79
+ right_corr = corr[jnp.array(right_idx)] # (N, 4)
80
+
81
+ # Reference rates for NH in H₂O at 20 °C (293.15 K)
82
+ k_a_ref = 10.0**1.39
83
+ k_b_ref = 10.0**10.08
84
+ k_w_ref = 10.0**-1.50 # estimated
85
+
86
+ # [H⁺] and [OH⁻]; pKw at 20 °C ≈ 14.17
87
+ h_plus = 10.0 ** (-ph)
88
+ oh_minus = 10.0 ** (ph - 14.17)
89
+
90
+ # Arrhenius temperature corrections (activation energies in kcal/mol)
91
+ e_a, e_b, e_w = 14.0, 17.0, 19.0
92
+ r_gas = 1.987e-3 # kcal / (mol·K)
93
+
94
+ def temp_corr(k_ref: float, e_act: float) -> jnp.ndarray:
95
+ return k_ref * jnp.exp(-e_act / r_gas * (1.0 / temperature - 1.0 / 293.15)) # type: ignore[no-any-return]
96
+
97
+ ka_ref_t = temp_corr(k_a_ref, e_a)
98
+ kb_ref_t = temp_corr(k_b_ref, e_b)
99
+ kw_ref_t = temp_corr(k_w_ref, e_w)
100
+
101
+ # Log-additive corrections — vectorised over all residues simultaneously
102
+ # Columns: [al=0, ar=1, bl=2, br=3]
103
+ ka = ka_ref_t * 10.0 ** (left_corr[:, 0] + right_corr[:, 1]) # al + ar
104
+ kb = kb_ref_t * 10.0 ** (left_corr[:, 2] + right_corr[:, 3]) # bl + br
105
+ kw = kw_ref_t * 10.0 ** (left_corr[:, 2] + right_corr[:, 3]) # same as kb
106
+
107
+ return jnp.asarray(ka * h_plus + kb * oh_minus + kw) # explicit Array, satisfies mypy
108
+
109
+
110
+ def sasa_approx(
111
+ coords: jnp.ndarray,
112
+ probe_radius: float = 1.4,
113
+ sigma: float = 2.0,
114
+ ) -> jnp.ndarray:
115
+ """
116
+ Differentiable approximation of Solvent Accessible Surface Area (SASA).
117
+ Uses a Gaussian occlusion model.
118
+
119
+ The probe radius is incorporated as an additive contribution to the
120
+ effective Gaussian width (effective_sigma = sigma + probe_radius), so
121
+ a larger probe widens the occlusion shell around each atom, reducing the
122
+ accessible surface — consistent with standard SASA intuition.
123
+
124
+ Note: this is a differentiable *surrogate*, not a true Shrake–Rupley SASA.
125
+ It lacks per-atom van-der-Waals radii and returns dimensionless values in
126
+ (0, 1]. It is suitable as a smooth proxy for gradient-based refinement.
127
+
128
+ Args:
129
+ coords: (N, 3) atomic coordinates in Angstroms.
130
+ probe_radius: Radius of the solvent probe in Angstroms (default 1.4 Å).
131
+ sigma: Base Gaussian width for the occlusion kernel in Angstroms.
132
+
133
+ Returns:
134
+ Approximate accessibility values (N,) in (0, 1]; 1 = fully exposed.
135
+ """
136
+ # Effective width combines atom-atom smoothing and the probe size
137
+ effective_sigma = sigma + probe_radius
138
+
139
+ # Pairwise squared distances
140
+ diff = coords[:, None, :] - coords[None, :, :]
141
+ dist_sq = jnp.sum(diff**2, axis=-1)
142
+
143
+ # Occlusion kernel: nearby atoms reduce accessibility.
144
+ # Subtract the self-contribution (exp(0) = 1) from each row.
145
+ occlusion = jnp.sum(jnp.exp(-dist_sq / (2 * effective_sigma**2)), axis=-1) - 1.0
146
+ accessibility = 1.0 / (1.0 + occlusion)
147
+
148
+ return accessibility
149
+
150
+
151
+ def h_bond_energy(
152
+ donor_coords: jnp.ndarray,
153
+ acceptor_coords: jnp.ndarray,
154
+ cutoff: float = 3.5,
155
+ sigma: float = 0.5,
156
+ ) -> jnp.ndarray:
157
+ """
158
+ Compute a differentiable approximation of H-bond energy/count.
159
+ Uses a sigmoid-like distance cutoff.
160
+
161
+ Args:
162
+ donor_coords: (N, 3) coordinates of donors.
163
+ acceptor_coords: (M, 3) coordinates of acceptors.
164
+ cutoff: Distance cutoff in Angstroms.
165
+ sigma: Smoothing parameter for the transition.
166
+
167
+ Returns:
168
+ Approximate H-bond energy/count for each donor (N,).
169
+ """
170
+ # Compute pairwise distances (N, M)
171
+ diff = donor_coords[:, None, :] - acceptor_coords[None, :, :]
172
+ dist_sq = jnp.sum(diff**2, axis=-1)
173
+ # Safe distance for gradients
174
+ dist = jnp.sqrt(jnp.where(dist_sq > 0, dist_sq, 1.0))
175
+ dist = jnp.where(dist_sq > 0, dist, 0.0)
176
+
177
+ # Soft-cutoff: 1 / (1 + exp((r - r_cutoff) / sigma))
178
+ # Sum over all potential acceptors for each donor
179
+ hb_counts = jnp.sum(jax.nn.sigmoid((cutoff - dist) / sigma), axis=-1)
180
+ return hb_counts
181
+
182
+
183
+ def protection_factors(
184
+ coords: jnp.ndarray,
185
+ h_bond_energies: jnp.ndarray,
186
+ beta_c: float = 1.0,
187
+ beta_asa: float = 1.0,
188
+ probe_radius: float = 1.4,
189
+ ) -> jnp.ndarray:
190
+ """
191
+ Compute HDX protection factors (PF).
192
+ PF = k_int / k_obs
193
+
194
+ Uses the Linderstrøm-Lang model with separate scaling coefficients for
195
+ H-bond and burial contributions:
196
+
197
+ ln(PF) = beta_c * N_HB + beta_asa * (1 − SASA)
198
+
199
+ Both coefficients default to 1.0, matching the original single-beta
200
+ formulation for backward compatibility. When fitting against experimental
201
+ protection factors, beta_c and beta_asa should be treated as independent
202
+ free parameters.
203
+
204
+ Args:
205
+ coords: (N, 3) coordinates.
206
+ h_bond_energies: (N,) hydrogen bond energies (or counts).
207
+ beta_c: Scaling coefficient for the H-bond contribution.
208
+ beta_asa: Scaling coefficient for the burial (1 − SASA) contribution.
209
+ probe_radius: Solvent probe radius passed to sasa_approx (Å).
210
+
211
+ Returns:
212
+ PF (N,) protection factors.
213
+ """
214
+ sasa = sasa_approx(coords, probe_radius=probe_radius)
215
+ # ln PF = beta_asa*(1 − SASA) + beta_c*N_HB
216
+ ln_pf = beta_asa * (1.0 - sasa) + beta_c * h_bond_energies
217
+ return jnp.exp(ln_pf)
218
+
219
+
220
+ def deuterium_uptake(
221
+ pf: jnp.ndarray,
222
+ k_int: jnp.ndarray,
223
+ time: float,
224
+ ) -> jnp.ndarray:
225
+ """
226
+ Compute time-dependent deuterium uptake using EX2 kinetics.
227
+ D(t) = 1 - exp(-k_obs * t)
228
+ where k_obs = k_int / PF (Hvidt & Nielsen, 1966).
229
+
230
+ Args:
231
+ pf: (N,) protection factors.
232
+ k_int: (N,) intrinsic exchange rates.
233
+ time: Exposure time in minutes.
234
+
235
+ Returns:
236
+ D(t) (N,) fractional deuterium uptake.
237
+ """
238
+ k_obs = k_int / pf
239
+ return 1.0 - jnp.exp(-k_obs * time)
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: diff-hdx
3
+ Version: 0.1.0
4
+ Summary: Differentiable HDX-MS prediction in JAX
5
+ Author: George Elkins
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/elkins/diff-hdx
8
+ Project-URL: Repository, https://github.com/elkins/diff-hdx
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
17
+ Classifier: Topic :: Scientific/Engineering :: Physics
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: jax
22
+ Requires-Dist: jaxlib
23
+ Requires-Dist: numpy
24
+ Requires-Dist: biotite
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
27
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
28
+ Requires-Dist: ruff>=0.6.0; extra == "dev"
29
+ Requires-Dist: mypy>=1.8.0; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # 🧪 diff-hdx: Differentiable HDX-MS Prediction in JAX
33
+
34
+ [![Tests](https://github.com/elkins/diff-hdx/actions/workflows/test.yml/badge.svg)](https://github.com/elkins/diff-hdx/actions/workflows/test.yml)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
36
+ [![JAX](https://img.shields.io/badge/backend-JAX-9cf.svg)](https://github.com/google/jax)
37
+
38
+ **diff-hdx** is a high-performance Python library for differentiable Hydrogen-Deuterium Exchange (HDX-MS) prediction. Built on **JAX**, it provides auto-differentiable kernels to bridge structural ensembles and experimental protection factors.
39
+
40
+ ---
41
+
42
+ ## 🎯 Features
43
+
44
+ - **Differentiable SASA Kernels:** Hardware-accelerated approximations of Solvent Accessible Surface Area using Gaussian occlusion models.
45
+ - **Protection Factor Modeling:** Implementations of Linderstrøm-Lang models for H-exchange rates ($PF$).
46
+ - **Kinetic Simulation:** Model time-dependent mass shifts using **EX2 kinetics** (Hvidt & Nielsen, 1966).
47
+ - **Gradient-Based Refinement:** Optimize protein structures or ensembles directly against experimental HDX-MS time-curves.
48
+ - **Vectorized Execution:** Native support for `vmap` to handle large conformational ensembles.
49
+
50
+ ---
51
+
52
+ ## 🏗️ Technical Architecture
53
+
54
+ - **Backend:** JAX (XLA-compiled) — supports CPU, GPU, and TPU.
55
+ - **Differentiability:** Full support for forward and reverse-mode autodiff.
56
+ - **Integration:** Compatible with `biotite` for structural parsing and `diff-biophys` for ensemble averaging.
57
+
58
+ ---
59
+
60
+ ## 🚀 Roadmap
61
+
62
+ - [x] Initial differentiable SASA and $ln P$ kernels.
63
+ - [x] Integration with JAX `vmap` for ensemble averaging.
64
+ - [x] Residue-specific intrinsic exchange rates (Bai et al. 1993) — all 20 amino acids.
65
+ - [ ] Integration with MD trajectory loaders.
66
+
67
+ ---
68
+
69
+ ## 🚀 Installation
70
+
71
+ ```bash
72
+ pip install diff-hdx
73
+ ```
74
+
75
+ ## 🧪 Scientific Validation
76
+
77
+ - **Parity Checks:** Kernels are validated against standard non-differentiable implementations (e.g., `biotite` SASA) to ensure physical accuracy.
78
+ - **Gradient Tests:** All kernels are verified using JAX's `gradcheck` to ensure numerically stable derivatives across the full support.
79
+ - **Ensemble Consistency:** Verified against `diff-biophys` ensemble averaging for IDP conformational ensembles.
80
+
81
+ ---
82
+
83
+ ## 🔗 Related Projects
84
+
85
+ diff-hdx is part of the **differentiable biophysics** ecosystem:
86
+
87
+ - [diff-biophys](https://github.com/elkins/diff-biophys) — Core differentiable biophysics engine.
88
+ - [diff-fret](https://github.com/elkins/diff-fret) — Differentiable FRET modeling.
89
+ - [diff-epr](https://github.com/elkins/diff-epr) — Differentiable EPR/DEER simulation.
90
+ - [synth-pdb](https://github.com/elkins/synth-pdb) — Synthetic structure generation.
91
+
92
+ ---
93
+
94
+ ## 📖 Citation
95
+
96
+ ```bibtex
97
+ @software{diff_hdx,
98
+ author = {Elkins, George},
99
+ title = {diff-hdx: Differentiable HDX-MS prediction in JAX},
100
+ year = {2026},
101
+ url = {https://github.com/elkins/diff-hdx},
102
+ version = {0.1.0}
103
+ }
104
+ ```
105
+
106
+ ## ⚖️ License
107
+
108
+ MIT
@@ -0,0 +1,7 @@
1
+ diff_hdx/__init__.py,sha256=9hzPWRFhiiTh1LgIsiW92wnkXaaMjLSyp-JniHHYIs4,256
2
+ diff_hdx/kernels.py,sha256=6DN4Ui5qliNQ-43xSzQgEH8oYQlGMgkdbj4GU2Zohgk,8710
3
+ diff_hdx-0.1.0.dist-info/licenses/LICENSE,sha256=vhOY8MmX_KvSe6pbSfjbPEoceqNPv_Hoaz6BXrKS6rI,1070
4
+ diff_hdx-0.1.0.dist-info/METADATA,sha256=Jpt-Fbdqzl4j0uvtFXnNzlaJXOiiXn536fx7sb3Vv2w,4141
5
+ diff_hdx-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ diff_hdx-0.1.0.dist-info/top_level.txt,sha256=hFrWLyycjk4rs_HFhz9FfNo2tfTu3n_8Z0UnsqlR4wM,9
7
+ diff_hdx-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 George Elkins
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ diff_hdx