resonance-flow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- resonance_flow-0.1.0/LICENSE +21 -0
- resonance_flow-0.1.0/PKG-INFO +247 -0
- resonance_flow-0.1.0/README.md +213 -0
- resonance_flow-0.1.0/pyproject.toml +130 -0
- resonance_flow-0.1.0/resonance_flow/__init__.py +11 -0
- resonance_flow-0.1.0/resonance_flow/losses.py +224 -0
- resonance_flow-0.1.0/resonance_flow/model.py +56 -0
- resonance_flow-0.1.0/resonance_flow/train.py +103 -0
- resonance_flow-0.1.0/resonance_flow.egg-info/PKG-INFO +247 -0
- resonance_flow-0.1.0/resonance_flow.egg-info/SOURCES.txt +19 -0
- resonance_flow-0.1.0/resonance_flow.egg-info/dependency_links.txt +1 -0
- resonance_flow-0.1.0/resonance_flow.egg-info/requires.txt +16 -0
- resonance_flow-0.1.0/resonance_flow.egg-info/top_level.txt +1 -0
- resonance_flow-0.1.0/setup.cfg +4 -0
- resonance_flow-0.1.0/tests/test_integration.py +9 -0
- resonance_flow-0.1.0/tests/test_invariance.py +47 -0
- resonance_flow-0.1.0/tests/test_losses.py +39 -0
- resonance_flow-0.1.0/tests/test_nmr_physics.py +171 -0
- resonance_flow-0.1.0/tests/test_pbc.py +34 -0
- resonance_flow-0.1.0/tests/test_pipeline.py +42 -0
- resonance_flow-0.1.0/tests/test_validation.py +343 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 George Elkins
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: resonance-flow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Self-correcting protein folding with differentiable NMR constraints
|
|
5
|
+
Author: George Elkins
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: jax
|
|
19
|
+
Requires-Dist: jaxlib
|
|
20
|
+
Requires-Dist: flax
|
|
21
|
+
Requires-Dist: optax
|
|
22
|
+
Requires-Dist: jax-md
|
|
23
|
+
Requires-Dist: numpy
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff; extra == "dev"
|
|
28
|
+
Requires-Dist: black; extra == "dev"
|
|
29
|
+
Requires-Dist: mypy; extra == "dev"
|
|
30
|
+
Requires-Dist: mkdocs; extra == "dev"
|
|
31
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
32
|
+
Requires-Dist: mkdocstrings[python]; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# 𧬠ResonanceFlow: Differentiable Protein Structure Prediction with NMR Self-Correction
|
|
36
|
+
|
|
37
|
+
[](https://github.com/elkins/resonance-flow/actions/workflows/test.yml)
|
|
38
|
+
[](https://github.com/elkins/resonance-flow/actions/workflows/docs.yml)
|
|
39
|
+
[](https://opensource.org/licenses/MIT)
|
|
40
|
+
[](https://elkins.github.io/resonance-flow/)
|
|
41
|
+
[](https://www.python.org/)
|
|
42
|
+
[](https://github.com/elkins/resonance-flow/releases)
|
|
43
|
+
[](https://github.com/astral-sh/ruff)
|
|
44
|
+
[](https://github.com/astral-sh/ruff)
|
|
45
|
+
[](https://mypy-lang.org/)
|
|
46
|
+
[](https://jax.readthedocs.io/)
|
|
47
|
+
|
|
48
|
+
**ResonanceFlow** is a JAX-native protein structure prediction framework that integrates differentiable biophysics with experimental NMR constraints. It allows models to "self-correct" by propagating gradients from physical violations (atomic clashes, bad geometry) and NMR observables (RDCs, NOE distances) back into the neural network architecture β end-to-end, with no manual refinement step.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## π Key Features
|
|
53
|
+
|
|
54
|
+
- **JAX-Native Gradient Flow** β End-to-end differentiability from experimental constraints to model weights via `jax.grad`.
|
|
55
|
+
- **Saupe Tensor RDC Loss** β Differentiable least-squares fitting of the alignment tensor at every forward pass (Bax & Tjandra 1997; Cornilescu et al. 1998).
|
|
56
|
+
- **NOE Distance Restraints** β Flat-bottomed harmonic penalty on upper-bound violations, the primary 3D information source in protein NMR (WΓΌthrich 1986; GΓΌntert et al. 1997).
|
|
57
|
+
- **Biophysically Correct Geometry** β Bond length loss calibrated to the canonical CΞ±βCΞ± distance of 3.80 Γ
(Engh & Huber 1991).
|
|
58
|
+
- **Differentiable Steric Clash** β Harmonic atom-overlap penalty with optional AMBER/CHARMM-style 1-2/1-3 bonded exclusions, powered by `jax-md`.
|
|
59
|
+
- **RDC Quality Metric** β Built-in Q-factor (Cornilescu et al. 1998) for structural validation without additional tooling.
|
|
60
|
+
- **PBC Support** β Periodic boundary conditions for simulation-box contexts.
|
|
61
|
+
- **Transformer-to-Coords** β A pre-LN Transformer architecture that maps amino acid sequences directly to physical 3D CΞ± coordinates.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## π§ The Concept: "Self-Correction"
|
|
66
|
+
|
|
67
|
+
Traditional folding models are trained on static PDB snapshots. ResonanceFlow instead teaches a model to *listen* to physical laws and NMR data during training itself:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
Sequence β [Transformer] β CΞ± Coordinates
|
|
71
|
+
β
|
|
72
|
+
ββββββββββββββββββββββΌβββββββββββββββββββββββ
|
|
73
|
+
βΌ βΌ βΌ
|
|
74
|
+
Steric Clash Bond Length RDC / NOE
|
|
75
|
+
Penalty Loss Mismatch
|
|
76
|
+
ββββββββββββββββββββββΌβββββββββββββββββββββββ
|
|
77
|
+
β βΞΈ L_total
|
|
78
|
+
βΌ
|
|
79
|
+
[Optimizer Step]
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Gradients from every constraint flow back simultaneously into the model weights β the model learns not just from data, but from physics.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## π οΈ Installation
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install resonance-flow
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
For development (includes linting, type-checking, testing, and docs):
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
git clone https://github.com/elkins/resonance-flow.git
|
|
96
|
+
cd resonance-flow
|
|
97
|
+
pip install -e ".[dev]"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Requirements:** Python 3.10+, JAX β₯ 0.4, Flax, Optax, jax-md, NumPy.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## π§ͺ Quick Start
|
|
105
|
+
|
|
106
|
+
### Run the self-correction demo
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from resonance_flow.train import main
|
|
110
|
+
|
|
111
|
+
state = main(num_steps=100)
|
|
112
|
+
# Step 0 | Total Loss: 12.3421 | Steric: 0.0012 | Bond: 1.2034 | RDC: 0.0087
|
|
113
|
+
# Step 10 | Total Loss: 4.1823 | ...
|
|
114
|
+
# Step 100 | Total Loss: 0.0031 | ...
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Use individual loss functions
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
import jax.numpy as jnp
|
|
121
|
+
from resonance_flow import (
|
|
122
|
+
get_steric_clash_loss,
|
|
123
|
+
get_bond_length_loss,
|
|
124
|
+
rdc_loss,
|
|
125
|
+
rdc_q_factor,
|
|
126
|
+
noe_upper_bound_loss,
|
|
127
|
+
estimate_nh_proxy_vectors,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# ββ Steric clash (AMBER-style 1-2 bonded exclusion) ββββββββββββββββββββββββββ
|
|
131
|
+
clash_fn = get_steric_clash_loss(exclude_bonded_range=1)
|
|
132
|
+
positions = jnp.array([[0.0, 0.0, 0.0], [4.0, 0.0, 0.0]])
|
|
133
|
+
atom_radii = jnp.array([1.5, 1.5])
|
|
134
|
+
clash_fn(positions, atom_radii) # β 0.0 (no overlap)
|
|
135
|
+
|
|
136
|
+
# ββ Bond length (CΞ±βCΞ± virtual bond, Engh & Huber 1991) βββββββββββββββββββββ
|
|
137
|
+
bond_fn = get_bond_length_loss() # default target = 3.8 Γ
|
|
138
|
+
ca_chain = jnp.array([[0.0,0.0,0.0],[3.8,0.0,0.0],[7.6,0.0,0.0]])
|
|
139
|
+
bond_fn(ca_chain) # β ~0.0
|
|
140
|
+
|
|
141
|
+
# ββ RDC loss (Saupe tensor fitting) βββββββββββββββββββββββββββββββββββββββββ
|
|
142
|
+
nh_vecs = jnp.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.],
|
|
143
|
+
[0.7,0.7,0.],[0.7,0.,0.7],[0.,0.7,0.7]])
|
|
144
|
+
measured_rdc = jnp.array([10., -5., 2., 0., 4., 8.])
|
|
145
|
+
rdc_loss(nh_vecs, measured_rdc) # β scalar MSE
|
|
146
|
+
|
|
147
|
+
# ββ RDC Q-factor (structure quality; Q β€ 0.20 = high quality) βββββββββββββββ
|
|
148
|
+
rdc_q_factor(nh_vecs, measured_rdc) # β 0 β 1 (lower is better)
|
|
149
|
+
|
|
150
|
+
# ββ N-H proxy vectors from CΞ± coordinates (CΞ±-only models) ββββββββββββββββββ
|
|
151
|
+
ca_coords = jax.random.normal(jax.random.PRNGKey(0), (10, 3))
|
|
152
|
+
nh_proxy = estimate_nh_proxy_vectors(ca_coords) # β (8, 3) unit vectors
|
|
153
|
+
|
|
154
|
+
# ββ NOE upper-bound distance restraints (WΓΌthrich 1986) βββββββββββββββββββββ
|
|
155
|
+
noe_pairs = jnp.array([[0, 2], [1, 3]])
|
|
156
|
+
upper_bounds = jnp.array([5.0, 4.5])
|
|
157
|
+
noe_upper_bound_loss(positions, noe_pairs[:1], upper_bounds[:1]) # β 0.0
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## π¬ Scientific Basis
|
|
163
|
+
|
|
164
|
+
All loss functions and validation metrics are grounded in published, peer-reviewed NMR methodology:
|
|
165
|
+
|
|
166
|
+
| Loss / Metric | Scientific Basis |
|
|
167
|
+
|---|---|
|
|
168
|
+
| RDC loss β Saupe tensor | Bax & Tjandra, *J. Biomol. NMR* 1997; Cornilescu et al., *JACS* 1998 |
|
|
169
|
+
| RDC Q-factor | Cornilescu et al., *JACS* 1998; Clore & Garrett, *JACS* 1999 |
|
|
170
|
+
| NOE distance restraints | WΓΌthrich, *NMR of Proteins and Nucleic Acids* 1986; GΓΌntert et al., *J. Mol. Biol.* 1997 |
|
|
171
|
+
| CΞ±βCΞ± bond distance (3.8 Γ
) | Engh & Huber, *Acta Crystallogr. A* 1991 |
|
|
172
|
+
| N-H proxy vectors | Zweckstetter & Bax, *JACS* 2000 |
|
|
173
|
+
| Bonded exclusion (1-2/1-3) | Cornell et al. (AMBER), *JACS* 1995; MacKerell et al. (CHARMM), *J. Phys. Chem. B* 1998 |
|
|
174
|
+
| d_max = 21 700 Hz | Ottiger & Bax, *JACS* 1998 |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## 𧬠Architecture
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
TransformerCoordinatePredictor
|
|
182
|
+
βββ Embedding (vocab_size=21, d_model=128)
|
|
183
|
+
βββ Positional Embed (learned, max_len=512)
|
|
184
|
+
βββ N Γ Pre-LN Block
|
|
185
|
+
β βββ LayerNorm β MultiHeadSelfAttention β Residual
|
|
186
|
+
β βββ LayerNorm β FFN (4Γ expand, GELU) β Residual
|
|
187
|
+
βββ LayerNorm β Linear(3) # β (batch, seq_len, 3) CΞ± coordinates
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
The pre-LN (LayerNorm before attention) layout avoids gradient
|
|
191
|
+
explosion and follows the convention recommended by Xiong et al. 2020.
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## π€ Contributing
|
|
196
|
+
|
|
197
|
+
Contributions are welcome! Please open an issue or pull request. The project follows:
|
|
198
|
+
|
|
199
|
+
- **Formatting + Linting:** `ruff` / `ruff format`
|
|
200
|
+
- **Type checking:** `mypy`
|
|
201
|
+
- **Testing:** `pytest` with coverage
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
# Run the full quality pipeline before submitting a PR
|
|
205
|
+
ruff check resonance_flow tests
|
|
206
|
+
ruff format resonance_flow tests
|
|
207
|
+
mypy resonance_flow tests
|
|
208
|
+
pytest --cov=resonance_flow tests
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## π Documentation
|
|
214
|
+
|
|
215
|
+
Full theory, API reference, and examples at **[elkins.github.io/resonance-flow](https://elkins.github.io/resonance-flow/)**.
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## βοΈ License
|
|
220
|
+
|
|
221
|
+
MIT Β© George Elkins
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## π Related Projects
|
|
226
|
+
|
|
227
|
+
ResonanceFlow is the most complete end-to-end model in this ecosystem, depending on:
|
|
228
|
+
|
|
229
|
+
- [diff-biophys](https://github.com/elkins/diff-biophys) β Differentiable RDC, NOE, bond-length, and clash kernels
|
|
230
|
+
- [synth-nmr](https://github.com/elkins/synth-nmr) β NMR parameter libraries (chemical shifts, Karplus, RDC)
|
|
231
|
+
- [synth-pdb](https://github.com/elkins/synth-pdb) β Protein structure data generation
|
|
232
|
+
- [TorsionTuner](https://github.com/elkins/TorsionTuner) β Single-structure refinement using similar torsion-space kinematics
|
|
233
|
+
- [diff-ensemble](https://github.com/elkins/diff-ensemble) β Ensemble counterpart for IDPs
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## π Citation
|
|
238
|
+
|
|
239
|
+
```bibtex
|
|
240
|
+
@software{resonance_flow,
|
|
241
|
+
author = {Elkins, George},
|
|
242
|
+
title = {ResonanceFlow: Differentiable protein structure prediction with NMR self-correction},
|
|
243
|
+
year = {2024},
|
|
244
|
+
url = {https://github.com/elkins/resonance-flow},
|
|
245
|
+
version = {0.1.0}
|
|
246
|
+
}
|
|
247
|
+
```
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# 𧬠ResonanceFlow: Differentiable Protein Structure Prediction with NMR Self-Correction
|
|
2
|
+
|
|
3
|
+
[](https://github.com/elkins/resonance-flow/actions/workflows/test.yml)
|
|
4
|
+
[](https://github.com/elkins/resonance-flow/actions/workflows/docs.yml)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://elkins.github.io/resonance-flow/)
|
|
7
|
+
[](https://www.python.org/)
|
|
8
|
+
[](https://github.com/elkins/resonance-flow/releases)
|
|
9
|
+
[](https://github.com/astral-sh/ruff)
|
|
10
|
+
[](https://github.com/astral-sh/ruff)
|
|
11
|
+
[](https://mypy-lang.org/)
|
|
12
|
+
[](https://jax.readthedocs.io/)
|
|
13
|
+
|
|
14
|
+
**ResonanceFlow** is a JAX-native protein structure prediction framework that integrates differentiable biophysics with experimental NMR constraints. It allows models to "self-correct" by propagating gradients from physical violations (atomic clashes, bad geometry) and NMR observables (RDCs, NOE distances) back into the neural network architecture β end-to-end, with no manual refinement step.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## π Key Features
|
|
19
|
+
|
|
20
|
+
- **JAX-Native Gradient Flow** β End-to-end differentiability from experimental constraints to model weights via `jax.grad`.
|
|
21
|
+
- **Saupe Tensor RDC Loss** β Differentiable least-squares fitting of the alignment tensor at every forward pass (Bax & Tjandra 1997; Cornilescu et al. 1998).
|
|
22
|
+
- **NOE Distance Restraints** β Flat-bottomed harmonic penalty on upper-bound violations, the primary 3D information source in protein NMR (WΓΌthrich 1986; GΓΌntert et al. 1997).
|
|
23
|
+
- **Biophysically Correct Geometry** β Bond length loss calibrated to the canonical CΞ±βCΞ± distance of 3.80 Γ
(Engh & Huber 1991).
|
|
24
|
+
- **Differentiable Steric Clash** β Harmonic atom-overlap penalty with optional AMBER/CHARMM-style 1-2/1-3 bonded exclusions, powered by `jax-md`.
|
|
25
|
+
- **RDC Quality Metric** β Built-in Q-factor (Cornilescu et al. 1998) for structural validation without additional tooling.
|
|
26
|
+
- **PBC Support** β Periodic boundary conditions for simulation-box contexts.
|
|
27
|
+
- **Transformer-to-Coords** β A pre-LN Transformer architecture that maps amino acid sequences directly to physical 3D CΞ± coordinates.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## π§ The Concept: "Self-Correction"
|
|
32
|
+
|
|
33
|
+
Traditional folding models are trained on static PDB snapshots. ResonanceFlow instead teaches a model to *listen* to physical laws and NMR data during training itself:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
Sequence β [Transformer] β CΞ± Coordinates
|
|
37
|
+
β
|
|
38
|
+
ββββββββββββββββββββββΌβββββββββββββββββββββββ
|
|
39
|
+
βΌ βΌ βΌ
|
|
40
|
+
Steric Clash Bond Length RDC / NOE
|
|
41
|
+
Penalty Loss Mismatch
|
|
42
|
+
ββββββββββββββββββββββΌβββββββββββββββββββββββ
|
|
43
|
+
β βΞΈ L_total
|
|
44
|
+
βΌ
|
|
45
|
+
[Optimizer Step]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Gradients from every constraint flow back simultaneously into the model weights β the model learns not just from data, but from physics.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## π οΈ Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install resonance-flow
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
For development (includes linting, type-checking, testing, and docs):
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
git clone https://github.com/elkins/resonance-flow.git
|
|
62
|
+
cd resonance-flow
|
|
63
|
+
pip install -e ".[dev]"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Requirements:** Python 3.10+, JAX β₯ 0.4, Flax, Optax, jax-md, NumPy.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## π§ͺ Quick Start
|
|
71
|
+
|
|
72
|
+
### Run the self-correction demo
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from resonance_flow.train import main
|
|
76
|
+
|
|
77
|
+
state = main(num_steps=100)
|
|
78
|
+
# Step 0 | Total Loss: 12.3421 | Steric: 0.0012 | Bond: 1.2034 | RDC: 0.0087
|
|
79
|
+
# Step 10 | Total Loss: 4.1823 | ...
|
|
80
|
+
# Step 100 | Total Loss: 0.0031 | ...
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Use individual loss functions
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import jax.numpy as jnp
|
|
87
|
+
from resonance_flow import (
|
|
88
|
+
get_steric_clash_loss,
|
|
89
|
+
get_bond_length_loss,
|
|
90
|
+
rdc_loss,
|
|
91
|
+
rdc_q_factor,
|
|
92
|
+
noe_upper_bound_loss,
|
|
93
|
+
estimate_nh_proxy_vectors,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# ββ Steric clash (AMBER-style 1-2 bonded exclusion) ββββββββββββββββββββββββββ
|
|
97
|
+
clash_fn = get_steric_clash_loss(exclude_bonded_range=1)
|
|
98
|
+
positions = jnp.array([[0.0, 0.0, 0.0], [4.0, 0.0, 0.0]])
|
|
99
|
+
atom_radii = jnp.array([1.5, 1.5])
|
|
100
|
+
clash_fn(positions, atom_radii) # β 0.0 (no overlap)
|
|
101
|
+
|
|
102
|
+
# ββ Bond length (CΞ±βCΞ± virtual bond, Engh & Huber 1991) βββββββββββββββββββββ
|
|
103
|
+
bond_fn = get_bond_length_loss() # default target = 3.8 Γ
|
|
104
|
+
ca_chain = jnp.array([[0.0,0.0,0.0],[3.8,0.0,0.0],[7.6,0.0,0.0]])
|
|
105
|
+
bond_fn(ca_chain) # β ~0.0
|
|
106
|
+
|
|
107
|
+
# ββ RDC loss (Saupe tensor fitting) βββββββββββββββββββββββββββββββββββββββββ
|
|
108
|
+
nh_vecs = jnp.array([[1.,0.,0.],[0.,1.,0.],[0.,0.,1.],
|
|
109
|
+
[0.7,0.7,0.],[0.7,0.,0.7],[0.,0.7,0.7]])
|
|
110
|
+
measured_rdc = jnp.array([10., -5., 2., 0., 4., 8.])
|
|
111
|
+
rdc_loss(nh_vecs, measured_rdc) # β scalar MSE
|
|
112
|
+
|
|
113
|
+
# ββ RDC Q-factor (structure quality; Q β€ 0.20 = high quality) βββββββββββββββ
|
|
114
|
+
rdc_q_factor(nh_vecs, measured_rdc) # β 0 β 1 (lower is better)
|
|
115
|
+
|
|
116
|
+
# ββ N-H proxy vectors from CΞ± coordinates (CΞ±-only models) ββββββββββββββββββ
|
|
117
|
+
ca_coords = jax.random.normal(jax.random.PRNGKey(0), (10, 3))
|
|
118
|
+
nh_proxy = estimate_nh_proxy_vectors(ca_coords) # β (8, 3) unit vectors
|
|
119
|
+
|
|
120
|
+
# ββ NOE upper-bound distance restraints (WΓΌthrich 1986) βββββββββββββββββββββ
|
|
121
|
+
noe_pairs = jnp.array([[0, 2], [1, 3]])
|
|
122
|
+
upper_bounds = jnp.array([5.0, 4.5])
|
|
123
|
+
noe_upper_bound_loss(positions, noe_pairs[:1], upper_bounds[:1]) # β 0.0
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## π¬ Scientific Basis
|
|
129
|
+
|
|
130
|
+
All loss functions and validation metrics are grounded in published, peer-reviewed NMR methodology:
|
|
131
|
+
|
|
132
|
+
| Loss / Metric | Scientific Basis |
|
|
133
|
+
|---|---|
|
|
134
|
+
| RDC loss β Saupe tensor | Bax & Tjandra, *J. Biomol. NMR* 1997; Cornilescu et al., *JACS* 1998 |
|
|
135
|
+
| RDC Q-factor | Cornilescu et al., *JACS* 1998; Clore & Garrett, *JACS* 1999 |
|
|
136
|
+
| NOE distance restraints | WΓΌthrich, *NMR of Proteins and Nucleic Acids* 1986; GΓΌntert et al., *J. Mol. Biol.* 1997 |
|
|
137
|
+
| CΞ±βCΞ± bond distance (3.8 Γ
) | Engh & Huber, *Acta Crystallogr. A* 1991 |
|
|
138
|
+
| N-H proxy vectors | Zweckstetter & Bax, *JACS* 2000 |
|
|
139
|
+
| Bonded exclusion (1-2/1-3) | Cornell et al. (AMBER), *JACS* 1995; MacKerell et al. (CHARMM), *J. Phys. Chem. B* 1998 |
|
|
140
|
+
| d_max = 21 700 Hz | Ottiger & Bax, *JACS* 1998 |
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 𧬠Architecture
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
TransformerCoordinatePredictor
|
|
148
|
+
βββ Embedding (vocab_size=21, d_model=128)
|
|
149
|
+
βββ Positional Embed (learned, max_len=512)
|
|
150
|
+
βββ N Γ Pre-LN Block
|
|
151
|
+
β βββ LayerNorm β MultiHeadSelfAttention β Residual
|
|
152
|
+
β βββ LayerNorm β FFN (4Γ expand, GELU) β Residual
|
|
153
|
+
βββ LayerNorm β Linear(3) # β (batch, seq_len, 3) CΞ± coordinates
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The pre-LN (LayerNorm before attention) layout avoids gradient
|
|
157
|
+
explosion and follows the convention recommended by Xiong et al. 2020.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## π€ Contributing
|
|
162
|
+
|
|
163
|
+
Contributions are welcome! Please open an issue or pull request. The project follows:
|
|
164
|
+
|
|
165
|
+
- **Formatting + Linting:** `ruff` / `ruff format`
|
|
166
|
+
- **Type checking:** `mypy`
|
|
167
|
+
- **Testing:** `pytest` with coverage
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# Run the full quality pipeline before submitting a PR
|
|
171
|
+
ruff check resonance_flow tests
|
|
172
|
+
ruff format resonance_flow tests
|
|
173
|
+
mypy resonance_flow tests
|
|
174
|
+
pytest --cov=resonance_flow tests
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## π Documentation
|
|
180
|
+
|
|
181
|
+
Full theory, API reference, and examples at **[elkins.github.io/resonance-flow](https://elkins.github.io/resonance-flow/)**.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## βοΈ License
|
|
186
|
+
|
|
187
|
+
MIT Β© George Elkins
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## π Related Projects
|
|
192
|
+
|
|
193
|
+
ResonanceFlow is the most complete end-to-end model in this ecosystem, depending on:
|
|
194
|
+
|
|
195
|
+
- [diff-biophys](https://github.com/elkins/diff-biophys) β Differentiable RDC, NOE, bond-length, and clash kernels
|
|
196
|
+
- [synth-nmr](https://github.com/elkins/synth-nmr) β NMR parameter libraries (chemical shifts, Karplus, RDC)
|
|
197
|
+
- [synth-pdb](https://github.com/elkins/synth-pdb) β Protein structure data generation
|
|
198
|
+
- [TorsionTuner](https://github.com/elkins/TorsionTuner) β Single-structure refinement using similar torsion-space kinematics
|
|
199
|
+
- [diff-ensemble](https://github.com/elkins/diff-ensemble) β Ensemble counterpart for IDPs
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## π Citation
|
|
204
|
+
|
|
205
|
+
```bibtex
|
|
206
|
+
@software{resonance_flow,
|
|
207
|
+
author = {Elkins, George},
|
|
208
|
+
title = {ResonanceFlow: Differentiable protein structure prediction with NMR self-correction},
|
|
209
|
+
year = {2024},
|
|
210
|
+
url = {https://github.com/elkins/resonance-flow},
|
|
211
|
+
version = {0.1.0}
|
|
212
|
+
}
|
|
213
|
+
```
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "resonance-flow"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Self-correcting protein folding with differentiable NMR constraints"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
authors = [{name = "George Elkins"}]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 3 - Alpha",
|
|
14
|
+
"Intended Audience :: Science/Research",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Physics",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"jax",
|
|
25
|
+
"jaxlib",
|
|
26
|
+
"flax",
|
|
27
|
+
"optax",
|
|
28
|
+
"jax-md",
|
|
29
|
+
"numpy",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = [
|
|
34
|
+
"pytest",
|
|
35
|
+
"pytest-cov",
|
|
36
|
+
"ruff",
|
|
37
|
+
"black",
|
|
38
|
+
"mypy",
|
|
39
|
+
"mkdocs",
|
|
40
|
+
"mkdocs-material",
|
|
41
|
+
"mkdocstrings[python]",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
include = ["resonance_flow*"]
|
|
46
|
+
|
|
47
|
+
[tool.pytest.ini_options]
|
|
48
|
+
testpaths = ["tests"]
|
|
49
|
+
pythonpath = ["."]
|
|
50
|
+
python_files = ["test_*.py"]
|
|
51
|
+
|
|
52
|
+
[tool.ruff]
|
|
53
|
+
line-length = 100
|
|
54
|
+
target-version = "py310"
|
|
55
|
+
|
|
56
|
+
[tool.black]
|
|
57
|
+
line-length = 100
|
|
58
|
+
target-version = ['py310']
|
|
59
|
+
|
|
60
|
+
[tool.ruff.format]
|
|
61
|
+
quote-style = "double"
|
|
62
|
+
indent-style = "space"
|
|
63
|
+
skip-magic-trailing-comma = false
|
|
64
|
+
line-ending = "auto"
|
|
65
|
+
|
|
66
|
+
[tool.ruff.lint]
|
|
67
|
+
select = [
|
|
68
|
+
"E", # pycodestyle errors
|
|
69
|
+
"W", # pycodestyle warnings
|
|
70
|
+
"F", # pyflakes
|
|
71
|
+
"I", # isort
|
|
72
|
+
"N", # pep8-naming
|
|
73
|
+
"UP", # pyupgrade
|
|
74
|
+
"B", # flake8-bugbear
|
|
75
|
+
"C4", # flake8-comprehensions
|
|
76
|
+
]
|
|
77
|
+
ignore = [
|
|
78
|
+
"E501", # line length handled by formatter
|
|
79
|
+
"N806", # uppercase variables in functions (scientific constants: Da, R, S2)
|
|
80
|
+
"N803", # uppercase arguments (common in biophysics)
|
|
81
|
+
"N802", # uppercase function names (physics convention)
|
|
82
|
+
"B017", # asserting on Exception (acceptable in generic error tests)
|
|
83
|
+
"B904", # raise from (not yet fixed project-wide)
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
[tool.ruff.lint.per-file-ignores]
|
|
87
|
+
"__init__.py" = ["F401"] # re-exports are intentional
|
|
88
|
+
"tests/*" = ["N", "B"] # naming/bugbear not enforced in tests
|
|
89
|
+
|
|
90
|
+
[tool.ruff.lint.isort]
|
|
91
|
+
known-first-party = ["resonance_flow"]
|
|
92
|
+
|
|
93
|
+
[tool.mypy]
|
|
94
|
+
python_version = "3.10"
|
|
95
|
+
warn_return_any = true
|
|
96
|
+
warn_unused_configs = true
|
|
97
|
+
disallow_untyped_defs = true
|
|
98
|
+
check_untyped_defs = true
|
|
99
|
+
strict_equality = true
|
|
100
|
+
exclude = ["build/", "dist/", "scratch/", "site/"]
|
|
101
|
+
|
|
102
|
+
[[tool.mypy.overrides]]
|
|
103
|
+
module = ["tests.*", "scripts.*", "examples.*"]
|
|
104
|
+
disallow_untyped_defs = false
|
|
105
|
+
|
|
106
|
+
[[tool.mypy.overrides]]
|
|
107
|
+
module = [
|
|
108
|
+
"jax.*",
|
|
109
|
+
"jaxlib.*",
|
|
110
|
+
"flax.*",
|
|
111
|
+
"optax.*",
|
|
112
|
+
"jax_md.*",
|
|
113
|
+
"numpy.*",
|
|
114
|
+
]
|
|
115
|
+
ignore_missing_imports = true
|
|
116
|
+
|
|
117
|
+
# ββ Coverage ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
118
|
+
[tool.coverage.run]
|
|
119
|
+
source = ["."]
|
|
120
|
+
omit = ["*/tests/*", "*/test_*.py"]
|
|
121
|
+
|
|
122
|
+
[tool.coverage.report]
|
|
123
|
+
exclude_lines = [
|
|
124
|
+
"pragma: no cover",
|
|
125
|
+
"def __repr__",
|
|
126
|
+
"raise AssertionError",
|
|
127
|
+
"raise NotImplementedError",
|
|
128
|
+
"if __name__ == .__main__.:",
|
|
129
|
+
"if TYPE_CHECKING:",
|
|
130
|
+
]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from resonance_flow.losses import estimate_nh_proxy_vectors as estimate_nh_proxy_vectors
|
|
2
|
+
from resonance_flow.losses import get_bond_length_loss as get_bond_length_loss
|
|
3
|
+
from resonance_flow.losses import get_steric_clash_loss as get_steric_clash_loss
|
|
4
|
+
from resonance_flow.losses import noe_upper_bound_loss as noe_upper_bound_loss
|
|
5
|
+
from resonance_flow.losses import rdc_loss as rdc_loss
|
|
6
|
+
from resonance_flow.losses import rdc_q_factor as rdc_q_factor
|
|
7
|
+
from resonance_flow.model import (
|
|
8
|
+
TransformerCoordinatePredictor as TransformerCoordinatePredictor,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__version__ = "0.1.0"
|