hxprobe 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hxprobe-0.1.0/LICENSE +21 -0
- hxprobe-0.1.0/PKG-INFO +165 -0
- hxprobe-0.1.0/README.md +128 -0
- hxprobe-0.1.0/pyproject.toml +60 -0
- hxprobe-0.1.0/setup.cfg +4 -0
- hxprobe-0.1.0/src/hxprobe/__init__.py +29 -0
- hxprobe-0.1.0/src/hxprobe/cli.py +101 -0
- hxprobe-0.1.0/src/hxprobe/data/ubiquitin_dgopen.csv +42 -0
- hxprobe-0.1.0/src/hxprobe/data/ubiquitin_ensemble.pdb.gz +0 -0
- hxprobe-0.1.0/src/hxprobe/diff.py +68 -0
- hxprobe-0.1.0/src/hxprobe/ensemble.py +60 -0
- hxprobe-0.1.0/src/hxprobe/operator.py +252 -0
- hxprobe-0.1.0/src/hxprobe/probe.py +165 -0
- hxprobe-0.1.0/src/hxprobe/protonate.py +70 -0
- hxprobe-0.1.0/src/hxprobe.egg-info/PKG-INFO +165 -0
- hxprobe-0.1.0/src/hxprobe.egg-info/SOURCES.txt +21 -0
- hxprobe-0.1.0/src/hxprobe.egg-info/dependency_links.txt +1 -0
- hxprobe-0.1.0/src/hxprobe.egg-info/entry_points.txt +2 -0
- hxprobe-0.1.0/src/hxprobe.egg-info/requires.txt +15 -0
- hxprobe-0.1.0/src/hxprobe.egg-info/top_level.txt +1 -0
- hxprobe-0.1.0/tests/test_cli.py +21 -0
- hxprobe-0.1.0/tests/test_operator.py +38 -0
- hxprobe-0.1.0/tests/test_probe.py +37 -0
hxprobe-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 hxprobe authors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
hxprobe-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hxprobe
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The 50-conformer probe: residue-resolved hydrogen-exchange opening free energies from conformational ensembles
|
|
5
|
+
Author: hxprobe authors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/woshuizhaol/hxprobe
|
|
8
|
+
Project-URL: Source, https://github.com/woshuizhaol/hxprobe
|
|
9
|
+
Project-URL: Issues, https://github.com/woshuizhaol/hxprobe/issues
|
|
10
|
+
Keywords: hydrogen-deuterium exchange,protection factor,conformational ensemble,generative models,protein dynamics,free energy,structural biology
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.21
|
|
25
|
+
Requires-Dist: pandas>=1.3
|
|
26
|
+
Requires-Dist: mdtraj>=1.9
|
|
27
|
+
Provides-Extra: fix
|
|
28
|
+
Requires-Dist: pdbfixer>=1.8; extra == "fix"
|
|
29
|
+
Requires-Dist: openmm>=7.6; extra == "fix"
|
|
30
|
+
Provides-Extra: diff
|
|
31
|
+
Requires-Dist: torch>=1.10; extra == "diff"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
34
|
+
Requires-Dist: build; extra == "dev"
|
|
35
|
+
Requires-Dist: twine; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# hxprobe — the 50-conformer probe
|
|
39
|
+
|
|
40
|
+
**Read residue-resolved hydrogen-exchange opening free energies out of a conformational ensemble.**
|
|
41
|
+
|
|
42
|
+
`hxprobe` turns a conformational ensemble (from a generative model, molecular
|
|
43
|
+
dynamics, or any source) into per-residue protection factors and opening free
|
|
44
|
+
energies (ΔG_open, in kcal/mol) using a white-box, two-parameter physical
|
|
45
|
+
operator. It is an inexpensive, physically interpretable probe of how well an
|
|
46
|
+
ensemble reproduces the *near-equilibrium local opening* that hydrogen–deuterium
|
|
47
|
+
exchange measures — and it converges within roughly **50 conformers**, which is
|
|
48
|
+
where the name comes from.
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
ln PF_i = β_C · ⟨N_C,i⟩ + β_H · ⟨N_H,i⟩ (ensemble average)
|
|
52
|
+
ΔG_open,i = RT · ln PF_i (EX2 regime)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
`N_C` counts heavy atoms near each backbone amide nitrogen and `N_H` counts the
|
|
56
|
+
amide's backbone hydrogen bonds, averaged over the ensemble. The two
|
|
57
|
+
coefficients are **fixed to their classical Best–Vendruscolo values** (0.35 and
|
|
58
|
+
2.0) and are *not* fitted to stability data, so any residue-level signal the
|
|
59
|
+
probe recovers comes from the ensemble, not from a tuned scoring function.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install hxprobe
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
This pulls in `numpy`, `pandas`, and `mdtraj`. Two optional extras:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install "hxprobe[fix]" # PDBFixer/OpenMM: repair + protonate raw structures
|
|
71
|
+
pip install "hxprobe[diff]" # PyTorch: differentiable operator for steering
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quickstart
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import hxprobe
|
|
78
|
+
|
|
79
|
+
# Score the bundled 50-conformer ubiquitin ensemble (already protonated).
|
|
80
|
+
res = hxprobe.score_ensemble(hxprobe.example_ensemble_path(), protonate="none")
|
|
81
|
+
print(res.to_dataframe().head()) # resSeq, resn, NC_mean, NH_mean, lnPF, dGopen_kcal
|
|
82
|
+
|
|
83
|
+
# Compare to experimental native-state HX opening free energies.
|
|
84
|
+
exp = hxprobe.load_experimental() # {resSeq: dG_open}
|
|
85
|
+
ref = [exp.get(int(r), float("nan")) for r in res.resSeq]
|
|
86
|
+
print("Spearman vs experiment:", round(hxprobe.spearman(res.lnPF, ref), 3))
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Score *your own* ensemble — a multi-model PDB, or a trajectory plus topology:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
res = hxprobe.score_ensemble("my_ensemble.pdb") # multi-model PDB
|
|
93
|
+
res = hxprobe.score_ensemble("traj.xtc", top="topology.pdb") # trajectory + topology
|
|
94
|
+
res.to_csv("opening_free_energies.csv")
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
If your structures are raw heavy-atom coordinates without hydrogens, the H-bond
|
|
98
|
+
term is obtained either by a geometric amide-H placement (default, no extra
|
|
99
|
+
dependencies) or, more faithfully, with PDBFixer:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
res = hxprobe.score_ensemble("raw_heavy_atom.pdb", protonate="pdbfixer") # needs hxprobe[fix]
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Command line
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
hxprobe example # run the bundled ubiquitin demo
|
|
109
|
+
hxprobe score my_ensemble.pdb # print the per-residue table
|
|
110
|
+
hxprobe score traj.xtc --top top.pdb --out dG.csv
|
|
111
|
+
hxprobe converge my_ensemble.pdb # show convergence with ensemble size
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## What you get back
|
|
115
|
+
|
|
116
|
+
`score_ensemble` returns a `ProtectionResult` with NumPy arrays and a
|
|
117
|
+
`.to_dataframe()` / `.to_csv()` helper:
|
|
118
|
+
|
|
119
|
+
| field | meaning |
|
|
120
|
+
|---|---|
|
|
121
|
+
| `resSeq`, `resn` | residue number and one-letter code |
|
|
122
|
+
| `NC_mean`, `NH_mean` | ensemble-averaged contacts / hydrogen bonds |
|
|
123
|
+
| `lnPF`, `log10PF` | log protection factor |
|
|
124
|
+
| `dGopen_kcal` | opening free energy ΔG_open (kcal/mol) |
|
|
125
|
+
|
|
126
|
+
Two further entry points:
|
|
127
|
+
|
|
128
|
+
* **`convergence(ensemble)`** — Spearman correlation of the `n`-conformer
|
|
129
|
+
readout against the full-ensemble readout (and, optionally, against an
|
|
130
|
+
experimental reference), showing the plateau near ~50 conformers.
|
|
131
|
+
* **`global_unfolding(ensemble)`** — `RT · min_c ⟨ln PF⟩_residues`, the
|
|
132
|
+
protection of the most-open conformer, a bounded proxy for global fold
|
|
133
|
+
stability (the unfolded-state limit of the ensemble).
|
|
134
|
+
|
|
135
|
+
## How it works
|
|
136
|
+
|
|
137
|
+
For each backbone amide (prolines and non-standard residues are skipped):
|
|
138
|
+
|
|
139
|
+
* **`N_C`** — heavy atoms within **6.5 Å** of the amide nitrogen, sequence
|
|
140
|
+
separation `|i − j| ≥ 3`, hydrogens excluded.
|
|
141
|
+
* **`N_H`** — backbone carbonyl oxygens within **2.6 Å** of the amide hydrogen,
|
|
142
|
+
sequence separation `|i − j| ≥ 2`.
|
|
143
|
+
|
|
144
|
+
Counts are computed per conformer and **averaged over the ensemble before** the
|
|
145
|
+
linear combination is formed, so a residue that is buried in most conformers but
|
|
146
|
+
exposed in a rare open state receives the reduced mean contact count its
|
|
147
|
+
protection reflects. The contact term dominates, so the readout is robust even
|
|
148
|
+
when hydrogens are placed geometrically rather than with a full protonation step.
|
|
149
|
+
|
|
150
|
+
## Reproducing the bundled example
|
|
151
|
+
|
|
152
|
+
`hxprobe example` scores a 50-conformer leakage-free ubiquitin ensemble and
|
|
153
|
+
recovers the experimental native-state opening free energies at Spearman
|
|
154
|
+
ρ ≈ 0.58, with the correlation plateauing by ~25–50 conformers — the behaviour
|
|
155
|
+
that motivates the probe.
|
|
156
|
+
|
|
157
|
+
## Citing
|
|
158
|
+
|
|
159
|
+
If you use `hxprobe`, please cite the accompanying study on residue-resolved
|
|
160
|
+
hydrogen-exchange free energies as a benchmark for generative conformational
|
|
161
|
+
ensembles. (Reference to be added on publication.)
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
|
|
165
|
+
MIT.
|
hxprobe-0.1.0/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# hxprobe — the 50-conformer probe
|
|
2
|
+
|
|
3
|
+
**Read residue-resolved hydrogen-exchange opening free energies out of a conformational ensemble.**
|
|
4
|
+
|
|
5
|
+
`hxprobe` turns a conformational ensemble (from a generative model, molecular
|
|
6
|
+
dynamics, or any source) into per-residue protection factors and opening free
|
|
7
|
+
energies (ΔG_open, in kcal/mol) using a white-box, two-parameter physical
|
|
8
|
+
operator. It is an inexpensive, physically interpretable probe of how well an
|
|
9
|
+
ensemble reproduces the *near-equilibrium local opening* that hydrogen–deuterium
|
|
10
|
+
exchange measures — and it converges within roughly **50 conformers**, which is
|
|
11
|
+
where the name comes from.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
ln PF_i = β_C · ⟨N_C,i⟩ + β_H · ⟨N_H,i⟩ (ensemble average)
|
|
15
|
+
ΔG_open,i = RT · ln PF_i (EX2 regime)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
`N_C` counts heavy atoms near each backbone amide nitrogen and `N_H` counts the
|
|
19
|
+
amide's backbone hydrogen bonds, averaged over the ensemble. The two
|
|
20
|
+
coefficients are **fixed to their classical Best–Vendruscolo values** (0.35 and
|
|
21
|
+
2.0) and are *not* fitted to stability data, so any residue-level signal the
|
|
22
|
+
probe recovers comes from the ensemble, not from a tuned scoring function.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install hxprobe
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
This pulls in `numpy`, `pandas`, and `mdtraj`. Two optional extras:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install "hxprobe[fix]" # PDBFixer/OpenMM: repair + protonate raw structures
|
|
34
|
+
pip install "hxprobe[diff]" # PyTorch: differentiable operator for steering
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quickstart
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
import hxprobe
|
|
41
|
+
|
|
42
|
+
# Score the bundled 50-conformer ubiquitin ensemble (already protonated).
|
|
43
|
+
res = hxprobe.score_ensemble(hxprobe.example_ensemble_path(), protonate="none")
|
|
44
|
+
print(res.to_dataframe().head()) # resSeq, resn, NC_mean, NH_mean, lnPF, dGopen_kcal
|
|
45
|
+
|
|
46
|
+
# Compare to experimental native-state HX opening free energies.
|
|
47
|
+
exp = hxprobe.load_experimental() # {resSeq: dG_open}
|
|
48
|
+
ref = [exp.get(int(r), float("nan")) for r in res.resSeq]
|
|
49
|
+
print("Spearman vs experiment:", round(hxprobe.spearman(res.lnPF, ref), 3))
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Score *your own* ensemble — a multi-model PDB, or a trajectory plus topology:
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
res = hxprobe.score_ensemble("my_ensemble.pdb") # multi-model PDB
|
|
56
|
+
res = hxprobe.score_ensemble("traj.xtc", top="topology.pdb") # trajectory + topology
|
|
57
|
+
res.to_csv("opening_free_energies.csv")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
If your structures are raw heavy-atom coordinates without hydrogens, the H-bond
|
|
61
|
+
term is obtained either by a geometric amide-H placement (default, no extra
|
|
62
|
+
dependencies) or, more faithfully, with PDBFixer:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
res = hxprobe.score_ensemble("raw_heavy_atom.pdb", protonate="pdbfixer") # needs hxprobe[fix]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Command line
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
hxprobe example # run the bundled ubiquitin demo
|
|
72
|
+
hxprobe score my_ensemble.pdb # print the per-residue table
|
|
73
|
+
hxprobe score traj.xtc --top top.pdb --out dG.csv
|
|
74
|
+
hxprobe converge my_ensemble.pdb # show convergence with ensemble size
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## What you get back
|
|
78
|
+
|
|
79
|
+
`score_ensemble` returns a `ProtectionResult` with NumPy arrays and a
|
|
80
|
+
`.to_dataframe()` / `.to_csv()` helper:
|
|
81
|
+
|
|
82
|
+
| field | meaning |
|
|
83
|
+
|---|---|
|
|
84
|
+
| `resSeq`, `resn` | residue number and one-letter code |
|
|
85
|
+
| `NC_mean`, `NH_mean` | ensemble-averaged contacts / hydrogen bonds |
|
|
86
|
+
| `lnPF`, `log10PF` | log protection factor |
|
|
87
|
+
| `dGopen_kcal` | opening free energy ΔG_open (kcal/mol) |
|
|
88
|
+
|
|
89
|
+
Two further entry points:
|
|
90
|
+
|
|
91
|
+
* **`convergence(ensemble)`** — Spearman correlation of the `n`-conformer
|
|
92
|
+
readout against the full-ensemble readout (and, optionally, against an
|
|
93
|
+
experimental reference), showing the plateau near ~50 conformers.
|
|
94
|
+
* **`global_unfolding(ensemble)`** — `RT · min_c ⟨ln PF⟩_residues`, the
|
|
95
|
+
protection of the most-open conformer, a bounded proxy for global fold
|
|
96
|
+
stability (the unfolded-state limit of the ensemble).
|
|
97
|
+
|
|
98
|
+
## How it works
|
|
99
|
+
|
|
100
|
+
For each backbone amide (prolines and non-standard residues are skipped):
|
|
101
|
+
|
|
102
|
+
* **`N_C`** — heavy atoms within **6.5 Å** of the amide nitrogen, sequence
|
|
103
|
+
separation `|i − j| ≥ 3`, hydrogens excluded.
|
|
104
|
+
* **`N_H`** — backbone carbonyl oxygens within **2.6 Å** of the amide hydrogen,
|
|
105
|
+
sequence separation `|i − j| ≥ 2`.
|
|
106
|
+
|
|
107
|
+
Counts are computed per conformer and **averaged over the ensemble before** the
|
|
108
|
+
linear combination is formed, so a residue that is buried in most conformers but
|
|
109
|
+
exposed in a rare open state receives the reduced mean contact count its
|
|
110
|
+
protection reflects. The contact term dominates, so the readout is robust even
|
|
111
|
+
when hydrogens are placed geometrically rather than with a full protonation step.
|
|
112
|
+
|
|
113
|
+
## Reproducing the bundled example
|
|
114
|
+
|
|
115
|
+
`hxprobe example` scores a 50-conformer leakage-free ubiquitin ensemble and
|
|
116
|
+
recovers the experimental native-state opening free energies at Spearman
|
|
117
|
+
ρ ≈ 0.58, with the correlation plateauing by ~25–50 conformers — the behaviour
|
|
118
|
+
that motivates the probe.
|
|
119
|
+
|
|
120
|
+
## Citing
|
|
121
|
+
|
|
122
|
+
If you use `hxprobe`, please cite the accompanying study on residue-resolved
|
|
123
|
+
hydrogen-exchange free energies as a benchmark for generative conformational
|
|
124
|
+
ensembles. (Reference to be added on publication.)
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hxprobe"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "The 50-conformer probe: residue-resolved hydrogen-exchange opening free energies from conformational ensembles"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "hxprobe authors" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"hydrogen-deuterium exchange",
|
|
15
|
+
"protection factor",
|
|
16
|
+
"conformational ensemble",
|
|
17
|
+
"generative models",
|
|
18
|
+
"protein dynamics",
|
|
19
|
+
"free energy",
|
|
20
|
+
"structural biology",
|
|
21
|
+
]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Development Status :: 4 - Beta",
|
|
24
|
+
"Intended Audience :: Science/Research",
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Programming Language :: Python :: 3.9",
|
|
28
|
+
"Programming Language :: Python :: 3.10",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
32
|
+
"Topic :: Scientific/Engineering :: Chemistry",
|
|
33
|
+
]
|
|
34
|
+
dependencies = [
|
|
35
|
+
"numpy>=1.21",
|
|
36
|
+
"pandas>=1.3",
|
|
37
|
+
"mdtraj>=1.9",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
# Repair + protonate raw crystal/generated structures (missing heavy atoms, no hydrogens).
|
|
42
|
+
fix = ["pdbfixer>=1.8", "openmm>=7.6"]
|
|
43
|
+
# Differentiable operator for gradient-based ensemble steering.
|
|
44
|
+
diff = ["torch>=1.10"]
|
|
45
|
+
# Development / test.
|
|
46
|
+
dev = ["pytest>=7", "build", "twine"]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/woshuizhaol/hxprobe"
|
|
50
|
+
Source = "https://github.com/woshuizhaol/hxprobe"
|
|
51
|
+
Issues = "https://github.com/woshuizhaol/hxprobe/issues"
|
|
52
|
+
|
|
53
|
+
[project.scripts]
|
|
54
|
+
hxprobe = "hxprobe.cli:main"
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.packages.find]
|
|
57
|
+
where = ["src"]
|
|
58
|
+
|
|
59
|
+
[tool.setuptools.package-data]
|
|
60
|
+
hxprobe = ["data/*.pdb.gz", "data/*.csv"]
|
hxprobe-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""hxprobe -- the 50-conformer probe.
|
|
2
|
+
|
|
3
|
+
Read residue-resolved hydrogen-exchange opening free energies out of a
|
|
4
|
+
conformational ensemble with a white-box, two-parameter physical operator.
|
|
5
|
+
"""
|
|
6
|
+
from .operator import (BETA_C, BETA_H, CUT_NC_NM, CUT_NH_NM, IJ_NC, IJ_NH,
|
|
7
|
+
R_KCAL, T_REF, ProtectionResult, compute, nc_nh_frame)
|
|
8
|
+
from .ensemble import load_ensemble, optionally_protonate
|
|
9
|
+
from .probe import (convergence, example_ensemble_path, global_unfolding,
|
|
10
|
+
load_experimental, score_ensemble, spearman)
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"__version__",
|
|
16
|
+
"score_ensemble",
|
|
17
|
+
"convergence",
|
|
18
|
+
"global_unfolding",
|
|
19
|
+
"compute",
|
|
20
|
+
"nc_nh_frame",
|
|
21
|
+
"ProtectionResult",
|
|
22
|
+
"load_ensemble",
|
|
23
|
+
"optionally_protonate",
|
|
24
|
+
"example_ensemble_path",
|
|
25
|
+
"load_experimental",
|
|
26
|
+
"spearman",
|
|
27
|
+
"BETA_C", "BETA_H", "CUT_NC_NM", "CUT_NH_NM", "IJ_NC", "IJ_NH",
|
|
28
|
+
"R_KCAL", "T_REF",
|
|
29
|
+
]
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Command-line interface for hxprobe."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _add_common(p):
|
|
9
|
+
p.add_argument("ensemble", help="multi-model PDB (.pdb/.pdb.gz) or trajectory file")
|
|
10
|
+
p.add_argument("--top", default=None, help="topology file (for trajectory inputs)")
|
|
11
|
+
p.add_argument("--protonate", default="auto",
|
|
12
|
+
choices=["auto", "none", "pdbfixer"],
|
|
13
|
+
help="how to obtain backbone amide hydrogens (default: auto)")
|
|
14
|
+
p.add_argument("--betaC", type=float, default=None, help="contact coefficient")
|
|
15
|
+
p.add_argument("--betaH", type=float, default=None, help="H-bond coefficient")
|
|
16
|
+
p.add_argument("--temperature", type=float, default=None, help="temperature (K)")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _operator_kw(args):
|
|
20
|
+
from . import BETA_C, BETA_H, T_REF
|
|
21
|
+
return dict(
|
|
22
|
+
protonate=args.protonate,
|
|
23
|
+
betaC=BETA_C if args.betaC is None else args.betaC,
|
|
24
|
+
betaH=BETA_H if args.betaH is None else args.betaH,
|
|
25
|
+
temperature=T_REF if args.temperature is None else args.temperature,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _cmd_score(args):
|
|
30
|
+
from . import score_ensemble
|
|
31
|
+
res = score_ensemble(args.ensemble, top=args.top, **_operator_kw(args))
|
|
32
|
+
df = res.to_dataframe()
|
|
33
|
+
if args.out:
|
|
34
|
+
res.to_csv(args.out)
|
|
35
|
+
print(f"wrote {len(res)} residues to {args.out}")
|
|
36
|
+
else:
|
|
37
|
+
print(df.to_string(index=False))
|
|
38
|
+
return 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _cmd_converge(args):
|
|
42
|
+
from . import convergence
|
|
43
|
+
out = convergence(args.ensemble, top=args.top, **_operator_kw(args))
|
|
44
|
+
try:
|
|
45
|
+
print(out.to_string(index=False))
|
|
46
|
+
except AttributeError:
|
|
47
|
+
for row in out:
|
|
48
|
+
print(row)
|
|
49
|
+
return 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _cmd_example(args):
|
|
53
|
+
from . import (convergence, example_ensemble_path, load_experimental,
|
|
54
|
+
score_ensemble, spearman)
|
|
55
|
+
path = example_ensemble_path()
|
|
56
|
+
exp = load_experimental()
|
|
57
|
+
print(f"bundled example: 50-conformer leakage-free ubiquitin ensemble\n {path}")
|
|
58
|
+
res = score_ensemble(path, protonate="none") # already protonated
|
|
59
|
+
ref = [exp.get(int(rs)) for rs in res.resSeq]
|
|
60
|
+
rho = spearman(res.lnPF, [r if r is not None else float("nan") for r in ref])
|
|
61
|
+
n_overlap = sum(1 for r in ref if r is not None)
|
|
62
|
+
print(f"\nper-residue ln PF vs experimental dG_open (native-state HX):")
|
|
63
|
+
print(f" Spearman rho = {rho:+.3f} over {n_overlap} measured residues")
|
|
64
|
+
print(f"\nconvergence with ensemble size:")
|
|
65
|
+
conv = convergence(path, protonate="none", reference=exp)
|
|
66
|
+
try:
|
|
67
|
+
print(conv.to_string(index=False))
|
|
68
|
+
except AttributeError:
|
|
69
|
+
for row in conv:
|
|
70
|
+
print(" ", row)
|
|
71
|
+
return 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def build_parser():
|
|
75
|
+
p = argparse.ArgumentParser(
|
|
76
|
+
prog="hxprobe",
|
|
77
|
+
description="The 50-conformer probe: residue-resolved hydrogen-exchange "
|
|
78
|
+
"opening free energies from conformational ensembles.")
|
|
79
|
+
sub = p.add_subparsers(dest="command", required=True)
|
|
80
|
+
|
|
81
|
+
s = sub.add_parser("score", help="per-residue opening free energies for an ensemble")
|
|
82
|
+
_add_common(s)
|
|
83
|
+
s.add_argument("--out", default=None, help="write a CSV instead of printing")
|
|
84
|
+
s.set_defaults(func=_cmd_score)
|
|
85
|
+
|
|
86
|
+
c = sub.add_parser("converge", help="convergence of the readout with ensemble size")
|
|
87
|
+
_add_common(c)
|
|
88
|
+
c.set_defaults(func=_cmd_converge)
|
|
89
|
+
|
|
90
|
+
e = sub.add_parser("example", help="run the bundled ubiquitin example")
|
|
91
|
+
e.set_defaults(func=_cmd_example)
|
|
92
|
+
return p
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def main(argv=None):
|
|
96
|
+
args = build_parser().parse_args(argv)
|
|
97
|
+
return args.func(args)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
sys.exit(main())
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
resi,dGopen_kcal,grade,resn
|
|
2
|
+
2,1.831,B,
|
|
3
|
+
3,6.335,B,
|
|
4
|
+
4,6.307,B,
|
|
5
|
+
5,5.897,B,
|
|
6
|
+
6,4.929,B,
|
|
7
|
+
7,6.108,B,
|
|
8
|
+
12,2.969,B,
|
|
9
|
+
13,5.202,B,
|
|
10
|
+
15,2.526,B,
|
|
11
|
+
16,3.436,B,
|
|
12
|
+
17,5.565,B,
|
|
13
|
+
22,2.171,B,
|
|
14
|
+
23,5.771,B,
|
|
15
|
+
25,4.292,B,
|
|
16
|
+
26,7.699,B,
|
|
17
|
+
28,5.513,B,
|
|
18
|
+
29,5.897,B,
|
|
19
|
+
30,7.169,B,
|
|
20
|
+
31,4.703,B,
|
|
21
|
+
32,2.015,B,
|
|
22
|
+
36,2.837,B,
|
|
23
|
+
40,2.679,B,
|
|
24
|
+
41,4.093,B,
|
|
25
|
+
42,4.503,B,
|
|
26
|
+
44,6.307,B,
|
|
27
|
+
45,4.333,B,
|
|
28
|
+
48,4.586,B,
|
|
29
|
+
49,2.482,B,
|
|
30
|
+
50,3.855,B,
|
|
31
|
+
55,4.818,B,
|
|
32
|
+
56,6.045,B,
|
|
33
|
+
57,3.436,B,
|
|
34
|
+
58,2.969,B,
|
|
35
|
+
59,5.612,B,
|
|
36
|
+
60,2.426,B,
|
|
37
|
+
61,4.611,B,
|
|
38
|
+
65,4.201,B,
|
|
39
|
+
67,3.93,B,
|
|
40
|
+
68,4.407,B,
|
|
41
|
+
69,5.339,B,
|
|
42
|
+
70,4.724,B,
|
|
Binary file
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Differentiable Best--Vendruscolo operator (optional, requires PyTorch).
|
|
2
|
+
|
|
3
|
+
The hard contact and hydrogen-bond counts are replaced by smooth sigmoidal
|
|
4
|
+
switching functions controlled by a temperature ``tau``; the discrete operator
|
|
5
|
+
is recovered as ``tau -> 0``. Because protection then becomes a differentiable
|
|
6
|
+
function of atomic coordinates, the residue-level readout can in principle
|
|
7
|
+
provide gradients to steer a generator toward the rare openings it
|
|
8
|
+
under-populates.
|
|
9
|
+
|
|
10
|
+
Install with ``pip install hxprobe[diff]``.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from .operator import BETA_C, BETA_H, CUT_NC_NM, CUT_NH_NM, IJ_NC, IJ_NH, R_KCAL, T_REF
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def soft_nc_nh(xyz, amideN_idx, amideH_idx, heavy_idx, O_idx, resid,
|
|
18
|
+
cut_Nc=CUT_NC_NM, cut_Nh=CUT_NH_NM, ij_Nc=IJ_NC, ij_Nh=IJ_NH,
|
|
19
|
+
tau=0.02):
|
|
20
|
+
"""Differentiable per-residue (N_C, N_H) for one conformer.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
xyz : torch.Tensor ``[n_atoms, 3]`` (nanometres, requires_grad as needed)
|
|
25
|
+
amideN_idx, amideH_idx : list[int]
|
|
26
|
+
Per-residue amide N / amide H atom indices (``-1`` if absent).
|
|
27
|
+
heavy_idx, O_idx : list[int]
|
|
28
|
+
Heavy-atom and backbone-carbonyl-oxygen atom indices.
|
|
29
|
+
resid : sequence[int]
|
|
30
|
+
Residue index of every atom (used for the sequence-separation mask).
|
|
31
|
+
tau : float
|
|
32
|
+
Switching temperature; smaller is sharper (recovers the hard operator).
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
(NC, NH) : torch.Tensor, torch.Tensor each ``[n_res]``
|
|
37
|
+
"""
|
|
38
|
+
import torch
|
|
39
|
+
|
|
40
|
+
dev = xyz.device
|
|
41
|
+
R = len(amideN_idx)
|
|
42
|
+
NC = torch.zeros(R, device=dev)
|
|
43
|
+
NH = torch.zeros(R, device=dev)
|
|
44
|
+
heavy = torch.as_tensor(heavy_idx, device=dev, dtype=torch.long)
|
|
45
|
+
hres = torch.as_tensor([resid[i] for i in heavy_idx], device=dev)
|
|
46
|
+
Os = torch.as_tensor(O_idx, device=dev, dtype=torch.long)
|
|
47
|
+
Ores = torch.as_tensor([resid[i] for i in O_idx], device=dev)
|
|
48
|
+
for r in range(R):
|
|
49
|
+
ni = amideN_idx[r]
|
|
50
|
+
if ni < 0:
|
|
51
|
+
continue
|
|
52
|
+
ri = resid[ni]
|
|
53
|
+
d = torch.norm(xyz[heavy] - xyz[ni], dim=1)
|
|
54
|
+
mask_c = (torch.abs(hres - ri) >= ij_Nc).float()
|
|
55
|
+
NC[r] = torch.sum(torch.sigmoid((cut_Nc - d) / tau) * mask_c)
|
|
56
|
+
hi = amideH_idx[r]
|
|
57
|
+
if hi >= 0:
|
|
58
|
+
dh = torch.norm(xyz[Os] - xyz[hi], dim=1)
|
|
59
|
+
mask_h = (torch.abs(Ores - ri) >= ij_Nh).float()
|
|
60
|
+
NH[r] = torch.sum(torch.sigmoid((cut_Nh - dh) / tau) * mask_h)
|
|
61
|
+
return NC, NH
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def soft_lnpf(xyz, amideN_idx, amideH_idx, heavy_idx, O_idx, resid,
|
|
65
|
+
betaC=BETA_C, betaH=BETA_H, **kw):
|
|
66
|
+
"""Differentiable per-residue ln PF for one conformer."""
|
|
67
|
+
NC, NH = soft_nc_nh(xyz, amideN_idx, amideH_idx, heavy_idx, O_idx, resid, **kw)
|
|
68
|
+
return betaC * NC + betaH * NH
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Loading conformational ensembles.
|
|
2
|
+
|
|
3
|
+
Thin wrappers over MDTraj that accept the common ways an ensemble is stored:
|
|
4
|
+
a multi-model PDB (optionally gzipped), or a trajectory file plus a topology.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_ensemble(path: str, top: Optional[str] = None):
|
|
12
|
+
"""Load a conformational ensemble as an ``mdtraj.Trajectory``.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
path : str
|
|
17
|
+
A multi-model PDB (``.pdb`` / ``.pdb.gz``) or a trajectory file
|
|
18
|
+
(``.xtc``, ``.dcd``, ``.h5`` ...).
|
|
19
|
+
top : str, optional
|
|
20
|
+
Topology file (e.g. a ``.pdb``); required for trajectory formats that
|
|
21
|
+
do not embed topology.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
mdtraj.Trajectory
|
|
26
|
+
"""
|
|
27
|
+
import mdtraj as md
|
|
28
|
+
|
|
29
|
+
if top is not None:
|
|
30
|
+
return md.load(path, top=top)
|
|
31
|
+
return md.load(path)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def optionally_protonate(traj, method: str = "auto"):
|
|
35
|
+
"""Return an ensemble guaranteed to be scorable for the H-bond term.
|
|
36
|
+
|
|
37
|
+
``method``:
|
|
38
|
+
|
|
39
|
+
* ``"none"`` -- score as-is (geometric amide-H placement is used inside
|
|
40
|
+
the operator when explicit hydrogens are absent).
|
|
41
|
+
* ``"pdbfixer"`` -- repair missing heavy atoms and add real hydrogens with
|
|
42
|
+
PDBFixer/OpenMM (the ``hxprobe[fix]`` extra). Most faithful for raw
|
|
43
|
+
crystal or heavy-atom generated structures.
|
|
44
|
+
* ``"auto"`` (default) -- use PDBFixer if it is installed and hydrogens are
|
|
45
|
+
missing, otherwise fall back to ``"none"``.
|
|
46
|
+
"""
|
|
47
|
+
from .protonate import has_explicit_hydrogens, pdbfixer_protonate
|
|
48
|
+
|
|
49
|
+
if method == "none":
|
|
50
|
+
return traj
|
|
51
|
+
if has_explicit_hydrogens(traj):
|
|
52
|
+
return traj
|
|
53
|
+
if method == "pdbfixer":
|
|
54
|
+
return pdbfixer_protonate(traj)
|
|
55
|
+
if method == "auto":
|
|
56
|
+
try:
|
|
57
|
+
return pdbfixer_protonate(traj)
|
|
58
|
+
except Exception:
|
|
59
|
+
return traj
|
|
60
|
+
raise ValueError(f"unknown protonation method: {method!r}")
|