alphafold-attention 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. alphafold_attention-0.0.1/PKG-INFO +22 -0
  2. alphafold_attention-0.0.1/pyproject.toml +21 -0
  3. alphafold_attention-0.0.1/src/alphafold/__init__.py +14 -0
  4. alphafold_attention-0.0.1/src/alphafold/common/__init__.py +14 -0
  5. alphafold_attention-0.0.1/src/alphafold/common/confidence.py +213 -0
  6. alphafold_attention-0.0.1/src/alphafold/common/mmcif_metadata.py +212 -0
  7. alphafold_attention-0.0.1/src/alphafold/common/protein.py +582 -0
  8. alphafold_attention-0.0.1/src/alphafold/common/protein_test.py +114 -0
  9. alphafold_attention-0.0.1/src/alphafold/common/residue_constants.py +926 -0
  10. alphafold_attention-0.0.1/src/alphafold/common/residue_constants_test.py +190 -0
  11. alphafold_attention-0.0.1/src/alphafold/common/stereo_chemical_props.txt +345 -0
  12. alphafold_attention-0.0.1/src/alphafold/data/__init__.py +14 -0
  13. alphafold_attention-0.0.1/src/alphafold/data/feature_processing.py +229 -0
  14. alphafold_attention-0.0.1/src/alphafold/data/mmcif_parsing.py +389 -0
  15. alphafold_attention-0.0.1/src/alphafold/data/msa_identifiers.py +90 -0
  16. alphafold_attention-0.0.1/src/alphafold/data/msa_pairing.py +461 -0
  17. alphafold_attention-0.0.1/src/alphafold/data/parsers.py +613 -0
  18. alphafold_attention-0.0.1/src/alphafold/data/pipeline.py +243 -0
  19. alphafold_attention-0.0.1/src/alphafold/data/pipeline_multimer.py +284 -0
  20. alphafold_attention-0.0.1/src/alphafold/data/templates.py +1000 -0
  21. alphafold_attention-0.0.1/src/alphafold/data/tools/__init__.py +14 -0
  22. alphafold_attention-0.0.1/src/alphafold/data/tools/hhblits.py +155 -0
  23. alphafold_attention-0.0.1/src/alphafold/data/tools/hhsearch.py +107 -0
  24. alphafold_attention-0.0.1/src/alphafold/data/tools/hmmbuild.py +138 -0
  25. alphafold_attention-0.0.1/src/alphafold/data/tools/hmmsearch.py +131 -0
  26. alphafold_attention-0.0.1/src/alphafold/data/tools/jackhmmer.py +221 -0
  27. alphafold_attention-0.0.1/src/alphafold/data/tools/kalign.py +104 -0
  28. alphafold_attention-0.0.1/src/alphafold/data/tools/utils.py +40 -0
  29. alphafold_attention-0.0.1/src/alphafold/model/__init__.py +14 -0
  30. alphafold_attention-0.0.1/src/alphafold/model/all_atom.py +1141 -0
  31. alphafold_attention-0.0.1/src/alphafold/model/all_atom_multimer.py +968 -0
  32. alphafold_attention-0.0.1/src/alphafold/model/all_atom_test.py +135 -0
  33. alphafold_attention-0.0.1/src/alphafold/model/common_modules.py +191 -0
  34. alphafold_attention-0.0.1/src/alphafold/model/config.py +705 -0
  35. alphafold_attention-0.0.1/src/alphafold/model/data.py +29 -0
  36. alphafold_attention-0.0.1/src/alphafold/model/features.py +104 -0
  37. alphafold_attention-0.0.1/src/alphafold/model/folding.py +1009 -0
  38. alphafold_attention-0.0.1/src/alphafold/model/folding_multimer.py +1159 -0
  39. alphafold_attention-0.0.1/src/alphafold/model/geometry/__init__.py +31 -0
  40. alphafold_attention-0.0.1/src/alphafold/model/geometry/rigid_matrix_vector.py +106 -0
  41. alphafold_attention-0.0.1/src/alphafold/model/geometry/rotation_matrix.py +157 -0
  42. alphafold_attention-0.0.1/src/alphafold/model/geometry/struct_of_array.py +220 -0
  43. alphafold_attention-0.0.1/src/alphafold/model/geometry/test_utils.py +98 -0
  44. alphafold_attention-0.0.1/src/alphafold/model/geometry/utils.py +23 -0
  45. alphafold_attention-0.0.1/src/alphafold/model/geometry/vector.py +217 -0
  46. alphafold_attention-0.0.1/src/alphafold/model/layer_stack.py +274 -0
  47. alphafold_attention-0.0.1/src/alphafold/model/layer_stack_test.py +335 -0
  48. alphafold_attention-0.0.1/src/alphafold/model/lddt.py +88 -0
  49. alphafold_attention-0.0.1/src/alphafold/model/lddt_test.py +79 -0
  50. alphafold_attention-0.0.1/src/alphafold/model/mapping.py +223 -0
  51. alphafold_attention-0.0.1/src/alphafold/model/model.py +210 -0
  52. alphafold_attention-0.0.1/src/alphafold/model/modules.py +2319 -0
  53. alphafold_attention-0.0.1/src/alphafold/model/modules_multimer.py +1134 -0
  54. alphafold_attention-0.0.1/src/alphafold/model/prng.py +69 -0
  55. alphafold_attention-0.0.1/src/alphafold/model/prng_test.py +46 -0
  56. alphafold_attention-0.0.1/src/alphafold/model/quat_affine.py +459 -0
  57. alphafold_attention-0.0.1/src/alphafold/model/quat_affine_test.py +150 -0
  58. alphafold_attention-0.0.1/src/alphafold/model/r3.py +320 -0
  59. alphafold_attention-0.0.1/src/alphafold/model/tf/__init__.py +14 -0
  60. alphafold_attention-0.0.1/src/alphafold/model/tf/data_transforms.py +625 -0
  61. alphafold_attention-0.0.1/src/alphafold/model/tf/input_pipeline.py +166 -0
  62. alphafold_attention-0.0.1/src/alphafold/model/tf/protein_features.py +129 -0
  63. alphafold_attention-0.0.1/src/alphafold/model/tf/protein_features_test.py +54 -0
  64. alphafold_attention-0.0.1/src/alphafold/model/tf/proteins_dataset.py +166 -0
  65. alphafold_attention-0.0.1/src/alphafold/model/tf/shape_helpers.py +47 -0
  66. alphafold_attention-0.0.1/src/alphafold/model/tf/shape_helpers_test.py +42 -0
  67. alphafold_attention-0.0.1/src/alphafold/model/tf/shape_placeholders.py +20 -0
  68. alphafold_attention-0.0.1/src/alphafold/model/tf/utils.py +47 -0
  69. alphafold_attention-0.0.1/src/alphafold/model/utils.py +179 -0
  70. alphafold_attention-0.0.1/src/alphafold/notebooks/__init__.py +14 -0
  71. alphafold_attention-0.0.1/src/alphafold/notebooks/notebook_utils.py +168 -0
  72. alphafold_attention-0.0.1/src/alphafold/notebooks/notebook_utils_test.py +196 -0
  73. alphafold_attention-0.0.1/src/alphafold/relax/__init__.py +14 -0
  74. alphafold_attention-0.0.1/src/alphafold/relax/amber_minimize.py +505 -0
  75. alphafold_attention-0.0.1/src/alphafold/relax/amber_minimize_test.py +133 -0
  76. alphafold_attention-0.0.1/src/alphafold/relax/cleanup.py +127 -0
  77. alphafold_attention-0.0.1/src/alphafold/relax/cleanup_test.py +137 -0
  78. alphafold_attention-0.0.1/src/alphafold/relax/relax.py +84 -0
  79. alphafold_attention-0.0.1/src/alphafold/relax/relax_test.py +89 -0
  80. alphafold_attention-0.0.1/src/alphafold/relax/utils.py +74 -0
  81. alphafold_attention-0.0.1/src/alphafold/relax/utils_test.py +55 -0
@@ -0,0 +1,22 @@
1
+ Metadata-Version: 2.4
2
+ Name: alphafold-attention
3
+ Version: 0.0.1
4
+ Summary: AlphaFold with attention head access for ColabFold
5
+ License: Apache-2.0
6
+ Author: Suchetan Dontha
7
+ Author-email: sdontha@umd.edu
8
+ Classifier: License :: OSI Approved :: Apache Software License
9
+ Classifier: Programming Language :: Python :: 2
10
+ Classifier: Programming Language :: Python :: 2.7
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.4
13
+ Classifier: Programming Language :: Python :: 3.5
14
+ Classifier: Programming Language :: Python :: 3.6
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
@@ -0,0 +1,21 @@
1
+ [tool.poetry]
2
+ name = "alphafold-attention"
3
+ version = "0.0.1"
4
+ description = "AlphaFold with attention head access for ColabFold"
5
+ authors = [
6
+ "Suchetan Dontha <sdontha@umd.edu>",
7
+ "Pramesh Sharma <pramesh.sharma@nih.gov>",
8
+ ]
9
+ license = "Apache-2.0"
10
+
11
+ [tool.poetry.group.dev.dependencies]
12
+ pytest = "^6.2.5"
13
+ black = "^23.1.0"
14
+
15
+ [[tool.poetry.packages]]
16
+ include = "alphafold"
17
+ from = "src"
18
+
19
+ [build-system]
20
+ requires = ["poetry-core>=1.0.0"]
21
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,14 @@
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """An implementation of the inference pipeline of AlphaFold v2.0."""
@@ -0,0 +1,14 @@
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Common data types and constants used within Alphafold."""
@@ -0,0 +1,213 @@
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Functions for processing confidence metrics."""
16
+
17
+ import jax.numpy as jnp
18
+ import jax
19
+ import numpy as np
20
+ from alphafold.common import residue_constants
21
+ import scipy.special
22
+
23
+
24
+ def compute_tol(prev_pos, current_pos, mask, use_jnp=False):
25
+ # Early stopping criteria based on criteria used in
26
+ # AF2Complex: https://www.nature.com/articles/s41467-022-29394-2
27
+ _np = jnp if use_jnp else np
28
+ dist = lambda x:_np.sqrt(((x[:,None] - x[None,:])**2).sum(-1))
29
+ ca_idx = residue_constants.atom_order['CA']
30
+ sq_diff = _np.square(dist(prev_pos[:,ca_idx])-dist(current_pos[:,ca_idx]))
31
+ mask_2d = mask[:,None] * mask[None,:]
32
+ return _np.sqrt((sq_diff * mask_2d).sum()/mask_2d.sum() + 1e-8)
33
+
34
+
35
+ def compute_plddt(logits, use_jnp=False):
36
+ """Computes per-residue pLDDT from logits.
37
+ Args:
38
+ logits: [num_res, num_bins] output from the PredictedLDDTHead.
39
+ Returns:
40
+ plddt: [num_res] per-residue pLDDT.
41
+ """
42
+ if use_jnp:
43
+ _np, _softmax = jnp, jax.nn.softmax
44
+ else:
45
+ _np, _softmax = np, scipy.special.softmax
46
+
47
+ num_bins = logits.shape[-1]
48
+ bin_width = 1.0 / num_bins
49
+ bin_centers = _np.arange(start=0.5 * bin_width, stop=1.0, step=bin_width)
50
+ probs = _softmax(logits, axis=-1)
51
+ predicted_lddt_ca = (probs * bin_centers[None, :]).sum(-1)
52
+ return predicted_lddt_ca * 100
53
+
54
+ def _calculate_bin_centers(breaks, use_jnp=False):
55
+ """Gets the bin centers from the bin edges.
56
+ Args:
57
+ breaks: [num_bins - 1] the error bin edges.
58
+ Returns:
59
+ bin_centers: [num_bins] the error bin centers.
60
+ """
61
+ _np = jnp if use_jnp else np
62
+ step = breaks[1] - breaks[0]
63
+
64
+ # Add half-step to get the center
65
+ bin_centers = breaks + step / 2
66
+
67
+ # Add a catch-all bin at the end.
68
+ return _np.append(bin_centers, bin_centers[-1] + step)
69
+
70
+ def _calculate_expected_aligned_error(
71
+ alignment_confidence_breaks,
72
+ aligned_distance_error_probs,
73
+ use_jnp=False):
74
+ """Calculates expected aligned distance errors for every pair of residues.
75
+ Args:
76
+ alignment_confidence_breaks: [num_bins - 1] the error bin edges.
77
+ aligned_distance_error_probs: [num_res, num_res, num_bins] the predicted
78
+ probs for each error bin, for each pair of residues.
79
+ Returns:
80
+ predicted_aligned_error: [num_res, num_res] the expected aligned distance
81
+ error for each pair of residues.
82
+ max_predicted_aligned_error: The maximum predicted error possible.
83
+ """
84
+ bin_centers = _calculate_bin_centers(alignment_confidence_breaks, use_jnp=use_jnp)
85
+ # Tuple of expected aligned distance error and max possible error.
86
+ pae = (aligned_distance_error_probs * bin_centers).sum(-1)
87
+ return (pae, bin_centers[-1])
88
+
89
+ def compute_predicted_aligned_error(logits, breaks, use_jnp=False):
90
+ """Computes aligned confidence metrics from logits.
91
+ Args:
92
+ logits: [num_res, num_res, num_bins] the logits output from
93
+ PredictedAlignedErrorHead.
94
+ breaks: [num_bins - 1] the error bin edges.
95
+
96
+ Returns:
97
+ aligned_confidence_probs: [num_res, num_res, num_bins] the predicted
98
+ aligned error probabilities over bins for each residue pair.
99
+ predicted_aligned_error: [num_res, num_res] the expected aligned distance
100
+ error for each pair of residues.
101
+ max_predicted_aligned_error: The maximum predicted error possible.
102
+ """
103
+ _softmax = jax.nn.softmax if use_jnp else scipy.special.softmax
104
+ aligned_confidence_probs = _softmax(logits,axis=-1)
105
+ predicted_aligned_error, max_predicted_aligned_error = \
106
+ _calculate_expected_aligned_error(breaks, aligned_confidence_probs, use_jnp=use_jnp)
107
+
108
+ return {
109
+ 'aligned_confidence_probs': aligned_confidence_probs,
110
+ 'predicted_aligned_error': predicted_aligned_error,
111
+ 'max_predicted_aligned_error': max_predicted_aligned_error,
112
+ }
113
+
114
+ def predicted_tm_score(logits, breaks, residue_weights = None,
115
+ asym_id = None, use_jnp=False):
116
+ """Computes predicted TM alignment or predicted interface TM alignment score.
117
+
118
+ Args:
119
+ logits: [num_res, num_res, num_bins] the logits output from
120
+ PredictedAlignedErrorHead.
121
+ breaks: [num_bins] the error bins.
122
+ residue_weights: [num_res] the per residue weights to use for the
123
+ expectation.
124
+ asym_id: [num_res] the asymmetric unit ID - the chain ID. Only needed for
125
+ ipTM calculation.
126
+
127
+ Returns:
128
+ ptm_score: The predicted TM alignment or the predicted iTM score.
129
+ """
130
+ if use_jnp:
131
+ _np, _softmax = jnp, jax.nn.softmax
132
+ else:
133
+ _np, _softmax = np, scipy.special.softmax
134
+
135
+ # residue_weights has to be in [0, 1], but can be floating-point, i.e. the
136
+ # exp. resolved head's probability.
137
+ if residue_weights is None:
138
+ residue_weights = _np.ones(logits.shape[0])
139
+
140
+ bin_centers = _calculate_bin_centers(breaks, use_jnp=use_jnp)
141
+ num_res = residue_weights.shape[0]
142
+
143
+ # Clip num_res to avoid negative/undefined d0.
144
+ clipped_num_res = _np.maximum(residue_weights.sum(), 19)
145
+
146
+ # Compute d_0(num_res) as defined by TM-score, eqn. (5) in Yang & Skolnick
147
+ # "Scoring function for automated assessment of protein structure template
148
+ # quality", 2004: http://zhanglab.ccmb.med.umich.edu/papers/2004_3.pdf
149
+ d0 = 1.24 * (clipped_num_res - 15) ** (1./3) - 1.8
150
+
151
+ # Convert logits to probs.
152
+ probs = _softmax(logits, axis=-1)
153
+
154
+ # TM-Score term for every bin.
155
+ tm_per_bin = 1. / (1 + _np.square(bin_centers) / _np.square(d0))
156
+ # E_distances tm(distance).
157
+ predicted_tm_term = (probs * tm_per_bin).sum(-1)
158
+
159
+ if asym_id is None:
160
+ pair_mask = _np.full((num_res,num_res),True)
161
+ else:
162
+ pair_mask = asym_id[:, None] != asym_id[None, :]
163
+
164
+ predicted_tm_term *= pair_mask
165
+
166
+ pair_residue_weights = pair_mask * (residue_weights[None, :] * residue_weights[:, None])
167
+ normed_residue_mask = pair_residue_weights / (1e-8 + pair_residue_weights.sum(-1, keepdims=True))
168
+ per_alignment = (predicted_tm_term * normed_residue_mask).sum(-1)
169
+
170
+ return (per_alignment * residue_weights).max()
171
+
172
+ def get_confidence_metrics(prediction_result, mask, rank_by = "plddt", use_jnp=False, keep_pae=False):
173
+ """Post processes prediction_result to get confidence metrics."""
174
+ confidence_metrics = {}
175
+ plddt = compute_plddt(prediction_result['predicted_lddt']['logits'], use_jnp=use_jnp)
176
+ confidence_metrics['plddt'] = plddt
177
+ confidence_metrics["mean_plddt"] = (plddt * mask).sum()/mask.sum()
178
+
179
+ if 'predicted_aligned_error' in prediction_result:
180
+ if keep_pae:
181
+ prediction_result['pae_matrix_with_logits'] = prediction_result['predicted_aligned_error']
182
+
183
+ confidence_metrics.update(compute_predicted_aligned_error(
184
+ logits=prediction_result['predicted_aligned_error']['logits'],
185
+ breaks=prediction_result['predicted_aligned_error']['breaks'],
186
+ use_jnp=use_jnp))
187
+
188
+ confidence_metrics['ptm'] = predicted_tm_score(
189
+ logits=prediction_result['predicted_aligned_error']['logits'],
190
+ breaks=prediction_result['predicted_aligned_error']['breaks'],
191
+ residue_weights=mask,
192
+ use_jnp=use_jnp)
193
+
194
+ if "asym_id" in prediction_result["predicted_aligned_error"]:
195
+ # Compute the ipTM only for the multimer model.
196
+ confidence_metrics['iptm'] = predicted_tm_score(
197
+ logits=prediction_result['predicted_aligned_error']['logits'],
198
+ breaks=prediction_result['predicted_aligned_error']['breaks'],
199
+ residue_weights=mask,
200
+ asym_id=prediction_result['predicted_aligned_error']['asym_id'],
201
+ use_jnp=use_jnp)
202
+
203
+ # compute mean_score
204
+ if rank_by == "multimer":
205
+ mean_score = 80 * confidence_metrics["iptm"] + 20 * confidence_metrics["ptm"]
206
+ elif rank_by == "iptm":
207
+ mean_score = 100 * confidence_metrics["iptm"]
208
+ elif rank_by == "ptm":
209
+ mean_score = 100 * confidence_metrics["ptm"]
210
+ else:
211
+ mean_score = confidence_metrics["mean_plddt"]
212
+ confidence_metrics["ranking_confidence"] = mean_score
213
+ return confidence_metrics
@@ -0,0 +1,212 @@
1
+ # Copyright 2021 DeepMind Technologies Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """mmCIF metadata."""
16
+
17
+ from typing import Mapping, Sequence
18
+ import numpy as np
19
+
20
+
21
+ _DISCLAIMER = """ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED.
22
+ THE INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD BE
23
+ EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY OF ANY KIND,
24
+ WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT USE OF THE INFORMATION
25
+ SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD PARTY. DISCLAIMER: THE INFORMATION IS
26
+ NOT INTENDED TO BE A SUBSTITUTE FOR PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR
27
+ TREATMENT, AND DOES NOT CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS
28
+ AVAILABLE FOR ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE."""
29
+
30
+ # Authors of the Nature methods paper we reference in the mmCIF.
31
+ _MMCIF_PAPER_AUTHORS = (
32
+ 'Jumper, John',
33
+ 'Evans, Richard',
34
+ 'Pritzel, Alexander',
35
+ 'Green, Tim',
36
+ 'Figurnov, Michael',
37
+ 'Ronneberger, Olaf',
38
+ 'Tunyasuvunakool, Kathryn',
39
+ 'Bates, Russ',
40
+ 'Zidek, Augustin',
41
+ 'Potapenko, Anna',
42
+ 'Bridgland, Alex',
43
+ 'Meyer, Clemens',
44
+ 'Kohl, Simon A. A.',
45
+ 'Ballard, Andrew J.',
46
+ 'Cowie, Andrew',
47
+ 'Romera-Paredes, Bernardino',
48
+ 'Nikolov, Stanislav',
49
+ 'Jain, Rishub',
50
+ 'Adler, Jonas',
51
+ 'Back, Trevor',
52
+ 'Petersen, Stig',
53
+ 'Reiman, David',
54
+ 'Clancy, Ellen',
55
+ 'Zielinski, Michal',
56
+ 'Steinegger, Martin',
57
+ 'Pacholska, Michalina',
58
+ 'Berghammer, Tamas',
59
+ 'Silver, David',
60
+ 'Vinyals, Oriol',
61
+ 'Senior, Andrew W.',
62
+ 'Kavukcuoglu, Koray',
63
+ 'Kohli, Pushmeet',
64
+ 'Hassabis, Demis',
65
+ )
66
+
67
+ # Authors of the mmCIF - we set them to be equal to the authors of the paper.
68
+ _MMCIF_AUTHORS = _MMCIF_PAPER_AUTHORS
69
+
70
+
71
+ def add_metadata_to_mmcif(
72
+ old_cif: Mapping[str, Sequence[str]], model_type: str
73
+ ) -> Mapping[str, Sequence[str]]:
74
+ """Adds AlphaFold metadata in the given mmCIF."""
75
+ cif = {}
76
+
77
+ # ModelCIF conformation dictionary.
78
+ cif['_audit_conform.dict_name'] = ['mmcif_ma.dic']
79
+ cif['_audit_conform.dict_version'] = ['1.3.9']
80
+ cif['_audit_conform.dict_location'] = [
81
+ 'https://raw.githubusercontent.com/ihmwg/ModelCIF/master/dist/'
82
+ 'mmcif_ma.dic'
83
+ ]
84
+
85
+ # License and disclaimer.
86
+ cif['_pdbx_data_usage.id'] = ['1', '2']
87
+ cif['_pdbx_data_usage.type'] = ['license', 'disclaimer']
88
+ cif['_pdbx_data_usage.details'] = [
89
+ 'Data in this file is available under a CC-BY-4.0 license.',
90
+ _DISCLAIMER,
91
+ ]
92
+ cif['_pdbx_data_usage.url'] = [
93
+ 'https://creativecommons.org/licenses/by/4.0/',
94
+ '?',
95
+ ]
96
+ cif['_pdbx_data_usage.name'] = ['CC-BY-4.0', '?']
97
+
98
+ # Structure author details.
99
+ cif['_audit_author.name'] = []
100
+ cif['_audit_author.pdbx_ordinal'] = []
101
+ for author_index, author_name in enumerate(_MMCIF_AUTHORS, start=1):
102
+ cif['_audit_author.name'].append(author_name)
103
+ cif['_audit_author.pdbx_ordinal'].append(str(author_index))
104
+
105
+ # Paper author details.
106
+ cif['_citation_author.citation_id'] = []
107
+ cif['_citation_author.name'] = []
108
+ cif['_citation_author.ordinal'] = []
109
+ for author_index, author_name in enumerate(_MMCIF_PAPER_AUTHORS, start=1):
110
+ cif['_citation_author.citation_id'].append('primary')
111
+ cif['_citation_author.name'].append(author_name)
112
+ cif['_citation_author.ordinal'].append(str(author_index))
113
+
114
+ # Paper citation details.
115
+ cif['_citation.id'] = ['primary']
116
+ cif['_citation.title'] = [
117
+ 'Highly accurate protein structure prediction with AlphaFold'
118
+ ]
119
+ cif['_citation.journal_full'] = ['Nature']
120
+ cif['_citation.journal_volume'] = ['596']
121
+ cif['_citation.page_first'] = ['583']
122
+ cif['_citation.page_last'] = ['589']
123
+ cif['_citation.year'] = ['2021']
124
+ cif['_citation.journal_id_ASTM'] = ['NATUAS']
125
+ cif['_citation.country'] = ['UK']
126
+ cif['_citation.journal_id_ISSN'] = ['0028-0836']
127
+ cif['_citation.journal_id_CSD'] = ['0006']
128
+ cif['_citation.book_publisher'] = ['?']
129
+ cif['_citation.pdbx_database_id_PubMed'] = ['34265844']
130
+ cif['_citation.pdbx_database_id_DOI'] = ['10.1038/s41586-021-03819-2']
131
+
132
+ # Type of data in the dataset including data used in the model generation.
133
+ cif['_ma_data.id'] = ['1']
134
+ cif['_ma_data.name'] = ['Model']
135
+ cif['_ma_data.content_type'] = ['model coordinates']
136
+
137
+ # Description of number of instances for each entity.
138
+ cif['_ma_target_entity_instance.asym_id'] = old_cif['_struct_asym.id']
139
+ cif['_ma_target_entity_instance.entity_id'] = old_cif[
140
+ '_struct_asym.entity_id'
141
+ ]
142
+ cif['_ma_target_entity_instance.details'] = ['.'] * len(
143
+ cif['_ma_target_entity_instance.entity_id']
144
+ )
145
+
146
+ # Details about the target entities.
147
+ cif['_ma_target_entity.entity_id'] = cif[
148
+ '_ma_target_entity_instance.entity_id'
149
+ ]
150
+ cif['_ma_target_entity.data_id'] = ['1'] * len(
151
+ cif['_ma_target_entity.entity_id']
152
+ )
153
+ cif['_ma_target_entity.origin'] = ['.'] * len(
154
+ cif['_ma_target_entity.entity_id']
155
+ )
156
+
157
+ # Details of the models being deposited.
158
+ cif['_ma_model_list.ordinal_id'] = ['1']
159
+ cif['_ma_model_list.model_id'] = ['1']
160
+ cif['_ma_model_list.model_group_id'] = ['1']
161
+ cif['_ma_model_list.model_name'] = ['Top ranked model']
162
+
163
+ cif['_ma_model_list.model_group_name'] = [
164
+ f'AlphaFold {model_type} v2.3.2 model'
165
+ ]
166
+ cif['_ma_model_list.data_id'] = ['1']
167
+ cif['_ma_model_list.model_type'] = ['Ab initio model']
168
+
169
+ # Software used.
170
+ cif['_software.pdbx_ordinal'] = ['1']
171
+ cif['_software.name'] = ['AlphaFold']
172
+ cif['_software.version'] = [f'v2.3.2']
173
+ cif['_software.type'] = ['package']
174
+ cif['_software.description'] = ['Structure prediction']
175
+ cif['_software.classification'] = ['other']
176
+ cif['_software.date'] = ['?']
177
+
178
+ # Collection of software into groups.
179
+ cif['_ma_software_group.ordinal_id'] = ['1']
180
+ cif['_ma_software_group.group_id'] = ['1']
181
+ cif['_ma_software_group.software_id'] = ['1']
182
+
183
+ # Method description to conform with ModelCIF.
184
+ cif['_ma_protocol_step.ordinal_id'] = ['1', '2', '3']
185
+ cif['_ma_protocol_step.protocol_id'] = ['1', '1', '1']
186
+ cif['_ma_protocol_step.step_id'] = ['1', '2', '3']
187
+ cif['_ma_protocol_step.method_type'] = [
188
+ 'coevolution MSA',
189
+ 'template search',
190
+ 'modeling',
191
+ ]
192
+
193
+ # Details of the metrics use to assess model confidence.
194
+ cif['_ma_qa_metric.id'] = ['1', '2']
195
+ cif['_ma_qa_metric.name'] = ['pLDDT', 'pLDDT']
196
+ # Accepted values are distance, energy, normalised score, other, zscore.
197
+ cif['_ma_qa_metric.type'] = ['pLDDT', 'pLDDT']
198
+ cif['_ma_qa_metric.mode'] = ['global', 'local']
199
+ cif['_ma_qa_metric.software_group_id'] = ['1', '1']
200
+
201
+ # Global model confidence metric value.
202
+ cif['_ma_qa_metric_global.ordinal_id'] = ['1']
203
+ cif['_ma_qa_metric_global.model_id'] = ['1']
204
+ cif['_ma_qa_metric_global.metric_id'] = ['1']
205
+ global_plddt = np.mean(
206
+ [float(v) for v in old_cif['_atom_site.B_iso_or_equiv']]
207
+ )
208
+ cif['_ma_qa_metric_global.metric_value'] = [f'{global_plddt:.2f}']
209
+
210
+ cif['_atom_type.symbol'] = sorted(set(old_cif['_atom_site.type_symbol']))
211
+
212
+ return cif