alphafold-attention 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alphafold_attention-0.0.1/PKG-INFO +22 -0
- alphafold_attention-0.0.1/pyproject.toml +21 -0
- alphafold_attention-0.0.1/src/alphafold/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/common/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/common/confidence.py +213 -0
- alphafold_attention-0.0.1/src/alphafold/common/mmcif_metadata.py +212 -0
- alphafold_attention-0.0.1/src/alphafold/common/protein.py +582 -0
- alphafold_attention-0.0.1/src/alphafold/common/protein_test.py +114 -0
- alphafold_attention-0.0.1/src/alphafold/common/residue_constants.py +926 -0
- alphafold_attention-0.0.1/src/alphafold/common/residue_constants_test.py +190 -0
- alphafold_attention-0.0.1/src/alphafold/common/stereo_chemical_props.txt +345 -0
- alphafold_attention-0.0.1/src/alphafold/data/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/data/feature_processing.py +229 -0
- alphafold_attention-0.0.1/src/alphafold/data/mmcif_parsing.py +389 -0
- alphafold_attention-0.0.1/src/alphafold/data/msa_identifiers.py +90 -0
- alphafold_attention-0.0.1/src/alphafold/data/msa_pairing.py +461 -0
- alphafold_attention-0.0.1/src/alphafold/data/parsers.py +613 -0
- alphafold_attention-0.0.1/src/alphafold/data/pipeline.py +243 -0
- alphafold_attention-0.0.1/src/alphafold/data/pipeline_multimer.py +284 -0
- alphafold_attention-0.0.1/src/alphafold/data/templates.py +1000 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/hhblits.py +155 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/hhsearch.py +107 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/hmmbuild.py +138 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/hmmsearch.py +131 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/jackhmmer.py +221 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/kalign.py +104 -0
- alphafold_attention-0.0.1/src/alphafold/data/tools/utils.py +40 -0
- alphafold_attention-0.0.1/src/alphafold/model/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/model/all_atom.py +1141 -0
- alphafold_attention-0.0.1/src/alphafold/model/all_atom_multimer.py +968 -0
- alphafold_attention-0.0.1/src/alphafold/model/all_atom_test.py +135 -0
- alphafold_attention-0.0.1/src/alphafold/model/common_modules.py +191 -0
- alphafold_attention-0.0.1/src/alphafold/model/config.py +705 -0
- alphafold_attention-0.0.1/src/alphafold/model/data.py +29 -0
- alphafold_attention-0.0.1/src/alphafold/model/features.py +104 -0
- alphafold_attention-0.0.1/src/alphafold/model/folding.py +1009 -0
- alphafold_attention-0.0.1/src/alphafold/model/folding_multimer.py +1159 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/__init__.py +31 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/rigid_matrix_vector.py +106 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/rotation_matrix.py +157 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/struct_of_array.py +220 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/test_utils.py +98 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/utils.py +23 -0
- alphafold_attention-0.0.1/src/alphafold/model/geometry/vector.py +217 -0
- alphafold_attention-0.0.1/src/alphafold/model/layer_stack.py +274 -0
- alphafold_attention-0.0.1/src/alphafold/model/layer_stack_test.py +335 -0
- alphafold_attention-0.0.1/src/alphafold/model/lddt.py +88 -0
- alphafold_attention-0.0.1/src/alphafold/model/lddt_test.py +79 -0
- alphafold_attention-0.0.1/src/alphafold/model/mapping.py +223 -0
- alphafold_attention-0.0.1/src/alphafold/model/model.py +210 -0
- alphafold_attention-0.0.1/src/alphafold/model/modules.py +2319 -0
- alphafold_attention-0.0.1/src/alphafold/model/modules_multimer.py +1134 -0
- alphafold_attention-0.0.1/src/alphafold/model/prng.py +69 -0
- alphafold_attention-0.0.1/src/alphafold/model/prng_test.py +46 -0
- alphafold_attention-0.0.1/src/alphafold/model/quat_affine.py +459 -0
- alphafold_attention-0.0.1/src/alphafold/model/quat_affine_test.py +150 -0
- alphafold_attention-0.0.1/src/alphafold/model/r3.py +320 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/data_transforms.py +625 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/input_pipeline.py +166 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/protein_features.py +129 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/protein_features_test.py +54 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/proteins_dataset.py +166 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/shape_helpers.py +47 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/shape_helpers_test.py +42 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/shape_placeholders.py +20 -0
- alphafold_attention-0.0.1/src/alphafold/model/tf/utils.py +47 -0
- alphafold_attention-0.0.1/src/alphafold/model/utils.py +179 -0
- alphafold_attention-0.0.1/src/alphafold/notebooks/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/notebooks/notebook_utils.py +168 -0
- alphafold_attention-0.0.1/src/alphafold/notebooks/notebook_utils_test.py +196 -0
- alphafold_attention-0.0.1/src/alphafold/relax/__init__.py +14 -0
- alphafold_attention-0.0.1/src/alphafold/relax/amber_minimize.py +505 -0
- alphafold_attention-0.0.1/src/alphafold/relax/amber_minimize_test.py +133 -0
- alphafold_attention-0.0.1/src/alphafold/relax/cleanup.py +127 -0
- alphafold_attention-0.0.1/src/alphafold/relax/cleanup_test.py +137 -0
- alphafold_attention-0.0.1/src/alphafold/relax/relax.py +84 -0
- alphafold_attention-0.0.1/src/alphafold/relax/relax_test.py +89 -0
- alphafold_attention-0.0.1/src/alphafold/relax/utils.py +74 -0
- alphafold_attention-0.0.1/src/alphafold/relax/utils_test.py +55 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: alphafold-attention
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: AlphaFold with attention head access for ColabFold
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Suchetan Dontha
|
|
7
|
+
Author-email: sdontha@umd.edu
|
|
8
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
9
|
+
Classifier: Programming Language :: Python :: 2
|
|
10
|
+
Classifier: Programming Language :: Python :: 2.7
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.4
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.5
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "alphafold-attention"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "AlphaFold with attention head access for ColabFold"
|
|
5
|
+
authors = [
|
|
6
|
+
"Suchetan Dontha <sdontha@umd.edu>",
|
|
7
|
+
"Pramesh Sharma <pramesh.sharma@nih.gov>",
|
|
8
|
+
]
|
|
9
|
+
license = "Apache-2.0"
|
|
10
|
+
|
|
11
|
+
[tool.poetry.group.dev.dependencies]
|
|
12
|
+
pytest = "^6.2.5"
|
|
13
|
+
black = "^23.1.0"
|
|
14
|
+
|
|
15
|
+
[[tool.poetry.packages]]
|
|
16
|
+
include = "alphafold"
|
|
17
|
+
from = "src"
|
|
18
|
+
|
|
19
|
+
[build-system]
|
|
20
|
+
requires = ["poetry-core>=1.0.0"]
|
|
21
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright 2021 DeepMind Technologies Limited
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""An implementation of the inference pipeline of AlphaFold v2.0."""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright 2021 DeepMind Technologies Limited
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Common data types and constants used within Alphafold."""
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Copyright 2021 DeepMind Technologies Limited
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Functions for processing confidence metrics."""
|
|
16
|
+
|
|
17
|
+
import jax.numpy as jnp
|
|
18
|
+
import jax
|
|
19
|
+
import numpy as np
|
|
20
|
+
from alphafold.common import residue_constants
|
|
21
|
+
import scipy.special
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def compute_tol(prev_pos, current_pos, mask, use_jnp=False):
|
|
25
|
+
# Early stopping criteria based on criteria used in
|
|
26
|
+
# AF2Complex: https://www.nature.com/articles/s41467-022-29394-2
|
|
27
|
+
_np = jnp if use_jnp else np
|
|
28
|
+
dist = lambda x:_np.sqrt(((x[:,None] - x[None,:])**2).sum(-1))
|
|
29
|
+
ca_idx = residue_constants.atom_order['CA']
|
|
30
|
+
sq_diff = _np.square(dist(prev_pos[:,ca_idx])-dist(current_pos[:,ca_idx]))
|
|
31
|
+
mask_2d = mask[:,None] * mask[None,:]
|
|
32
|
+
return _np.sqrt((sq_diff * mask_2d).sum()/mask_2d.sum() + 1e-8)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def compute_plddt(logits, use_jnp=False):
|
|
36
|
+
"""Computes per-residue pLDDT from logits.
|
|
37
|
+
Args:
|
|
38
|
+
logits: [num_res, num_bins] output from the PredictedLDDTHead.
|
|
39
|
+
Returns:
|
|
40
|
+
plddt: [num_res] per-residue pLDDT.
|
|
41
|
+
"""
|
|
42
|
+
if use_jnp:
|
|
43
|
+
_np, _softmax = jnp, jax.nn.softmax
|
|
44
|
+
else:
|
|
45
|
+
_np, _softmax = np, scipy.special.softmax
|
|
46
|
+
|
|
47
|
+
num_bins = logits.shape[-1]
|
|
48
|
+
bin_width = 1.0 / num_bins
|
|
49
|
+
bin_centers = _np.arange(start=0.5 * bin_width, stop=1.0, step=bin_width)
|
|
50
|
+
probs = _softmax(logits, axis=-1)
|
|
51
|
+
predicted_lddt_ca = (probs * bin_centers[None, :]).sum(-1)
|
|
52
|
+
return predicted_lddt_ca * 100
|
|
53
|
+
|
|
54
|
+
def _calculate_bin_centers(breaks, use_jnp=False):
|
|
55
|
+
"""Gets the bin centers from the bin edges.
|
|
56
|
+
Args:
|
|
57
|
+
breaks: [num_bins - 1] the error bin edges.
|
|
58
|
+
Returns:
|
|
59
|
+
bin_centers: [num_bins] the error bin centers.
|
|
60
|
+
"""
|
|
61
|
+
_np = jnp if use_jnp else np
|
|
62
|
+
step = breaks[1] - breaks[0]
|
|
63
|
+
|
|
64
|
+
# Add half-step to get the center
|
|
65
|
+
bin_centers = breaks + step / 2
|
|
66
|
+
|
|
67
|
+
# Add a catch-all bin at the end.
|
|
68
|
+
return _np.append(bin_centers, bin_centers[-1] + step)
|
|
69
|
+
|
|
70
|
+
def _calculate_expected_aligned_error(
|
|
71
|
+
alignment_confidence_breaks,
|
|
72
|
+
aligned_distance_error_probs,
|
|
73
|
+
use_jnp=False):
|
|
74
|
+
"""Calculates expected aligned distance errors for every pair of residues.
|
|
75
|
+
Args:
|
|
76
|
+
alignment_confidence_breaks: [num_bins - 1] the error bin edges.
|
|
77
|
+
aligned_distance_error_probs: [num_res, num_res, num_bins] the predicted
|
|
78
|
+
probs for each error bin, for each pair of residues.
|
|
79
|
+
Returns:
|
|
80
|
+
predicted_aligned_error: [num_res, num_res] the expected aligned distance
|
|
81
|
+
error for each pair of residues.
|
|
82
|
+
max_predicted_aligned_error: The maximum predicted error possible.
|
|
83
|
+
"""
|
|
84
|
+
bin_centers = _calculate_bin_centers(alignment_confidence_breaks, use_jnp=use_jnp)
|
|
85
|
+
# Tuple of expected aligned distance error and max possible error.
|
|
86
|
+
pae = (aligned_distance_error_probs * bin_centers).sum(-1)
|
|
87
|
+
return (pae, bin_centers[-1])
|
|
88
|
+
|
|
89
|
+
def compute_predicted_aligned_error(logits, breaks, use_jnp=False):
|
|
90
|
+
"""Computes aligned confidence metrics from logits.
|
|
91
|
+
Args:
|
|
92
|
+
logits: [num_res, num_res, num_bins] the logits output from
|
|
93
|
+
PredictedAlignedErrorHead.
|
|
94
|
+
breaks: [num_bins - 1] the error bin edges.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
aligned_confidence_probs: [num_res, num_res, num_bins] the predicted
|
|
98
|
+
aligned error probabilities over bins for each residue pair.
|
|
99
|
+
predicted_aligned_error: [num_res, num_res] the expected aligned distance
|
|
100
|
+
error for each pair of residues.
|
|
101
|
+
max_predicted_aligned_error: The maximum predicted error possible.
|
|
102
|
+
"""
|
|
103
|
+
_softmax = jax.nn.softmax if use_jnp else scipy.special.softmax
|
|
104
|
+
aligned_confidence_probs = _softmax(logits,axis=-1)
|
|
105
|
+
predicted_aligned_error, max_predicted_aligned_error = \
|
|
106
|
+
_calculate_expected_aligned_error(breaks, aligned_confidence_probs, use_jnp=use_jnp)
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
'aligned_confidence_probs': aligned_confidence_probs,
|
|
110
|
+
'predicted_aligned_error': predicted_aligned_error,
|
|
111
|
+
'max_predicted_aligned_error': max_predicted_aligned_error,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
def predicted_tm_score(logits, breaks, residue_weights = None,
|
|
115
|
+
asym_id = None, use_jnp=False):
|
|
116
|
+
"""Computes predicted TM alignment or predicted interface TM alignment score.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
logits: [num_res, num_res, num_bins] the logits output from
|
|
120
|
+
PredictedAlignedErrorHead.
|
|
121
|
+
breaks: [num_bins] the error bins.
|
|
122
|
+
residue_weights: [num_res] the per residue weights to use for the
|
|
123
|
+
expectation.
|
|
124
|
+
asym_id: [num_res] the asymmetric unit ID - the chain ID. Only needed for
|
|
125
|
+
ipTM calculation.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
ptm_score: The predicted TM alignment or the predicted iTM score.
|
|
129
|
+
"""
|
|
130
|
+
if use_jnp:
|
|
131
|
+
_np, _softmax = jnp, jax.nn.softmax
|
|
132
|
+
else:
|
|
133
|
+
_np, _softmax = np, scipy.special.softmax
|
|
134
|
+
|
|
135
|
+
# residue_weights has to be in [0, 1], but can be floating-point, i.e. the
|
|
136
|
+
# exp. resolved head's probability.
|
|
137
|
+
if residue_weights is None:
|
|
138
|
+
residue_weights = _np.ones(logits.shape[0])
|
|
139
|
+
|
|
140
|
+
bin_centers = _calculate_bin_centers(breaks, use_jnp=use_jnp)
|
|
141
|
+
num_res = residue_weights.shape[0]
|
|
142
|
+
|
|
143
|
+
# Clip num_res to avoid negative/undefined d0.
|
|
144
|
+
clipped_num_res = _np.maximum(residue_weights.sum(), 19)
|
|
145
|
+
|
|
146
|
+
# Compute d_0(num_res) as defined by TM-score, eqn. (5) in Yang & Skolnick
|
|
147
|
+
# "Scoring function for automated assessment of protein structure template
|
|
148
|
+
# quality", 2004: http://zhanglab.ccmb.med.umich.edu/papers/2004_3.pdf
|
|
149
|
+
d0 = 1.24 * (clipped_num_res - 15) ** (1./3) - 1.8
|
|
150
|
+
|
|
151
|
+
# Convert logits to probs.
|
|
152
|
+
probs = _softmax(logits, axis=-1)
|
|
153
|
+
|
|
154
|
+
# TM-Score term for every bin.
|
|
155
|
+
tm_per_bin = 1. / (1 + _np.square(bin_centers) / _np.square(d0))
|
|
156
|
+
# E_distances tm(distance).
|
|
157
|
+
predicted_tm_term = (probs * tm_per_bin).sum(-1)
|
|
158
|
+
|
|
159
|
+
if asym_id is None:
|
|
160
|
+
pair_mask = _np.full((num_res,num_res),True)
|
|
161
|
+
else:
|
|
162
|
+
pair_mask = asym_id[:, None] != asym_id[None, :]
|
|
163
|
+
|
|
164
|
+
predicted_tm_term *= pair_mask
|
|
165
|
+
|
|
166
|
+
pair_residue_weights = pair_mask * (residue_weights[None, :] * residue_weights[:, None])
|
|
167
|
+
normed_residue_mask = pair_residue_weights / (1e-8 + pair_residue_weights.sum(-1, keepdims=True))
|
|
168
|
+
per_alignment = (predicted_tm_term * normed_residue_mask).sum(-1)
|
|
169
|
+
|
|
170
|
+
return (per_alignment * residue_weights).max()
|
|
171
|
+
|
|
172
|
+
def get_confidence_metrics(prediction_result, mask, rank_by = "plddt", use_jnp=False, keep_pae=False):
|
|
173
|
+
"""Post processes prediction_result to get confidence metrics."""
|
|
174
|
+
confidence_metrics = {}
|
|
175
|
+
plddt = compute_plddt(prediction_result['predicted_lddt']['logits'], use_jnp=use_jnp)
|
|
176
|
+
confidence_metrics['plddt'] = plddt
|
|
177
|
+
confidence_metrics["mean_plddt"] = (plddt * mask).sum()/mask.sum()
|
|
178
|
+
|
|
179
|
+
if 'predicted_aligned_error' in prediction_result:
|
|
180
|
+
if keep_pae:
|
|
181
|
+
prediction_result['pae_matrix_with_logits'] = prediction_result['predicted_aligned_error']
|
|
182
|
+
|
|
183
|
+
confidence_metrics.update(compute_predicted_aligned_error(
|
|
184
|
+
logits=prediction_result['predicted_aligned_error']['logits'],
|
|
185
|
+
breaks=prediction_result['predicted_aligned_error']['breaks'],
|
|
186
|
+
use_jnp=use_jnp))
|
|
187
|
+
|
|
188
|
+
confidence_metrics['ptm'] = predicted_tm_score(
|
|
189
|
+
logits=prediction_result['predicted_aligned_error']['logits'],
|
|
190
|
+
breaks=prediction_result['predicted_aligned_error']['breaks'],
|
|
191
|
+
residue_weights=mask,
|
|
192
|
+
use_jnp=use_jnp)
|
|
193
|
+
|
|
194
|
+
if "asym_id" in prediction_result["predicted_aligned_error"]:
|
|
195
|
+
# Compute the ipTM only for the multimer model.
|
|
196
|
+
confidence_metrics['iptm'] = predicted_tm_score(
|
|
197
|
+
logits=prediction_result['predicted_aligned_error']['logits'],
|
|
198
|
+
breaks=prediction_result['predicted_aligned_error']['breaks'],
|
|
199
|
+
residue_weights=mask,
|
|
200
|
+
asym_id=prediction_result['predicted_aligned_error']['asym_id'],
|
|
201
|
+
use_jnp=use_jnp)
|
|
202
|
+
|
|
203
|
+
# compute mean_score
|
|
204
|
+
if rank_by == "multimer":
|
|
205
|
+
mean_score = 80 * confidence_metrics["iptm"] + 20 * confidence_metrics["ptm"]
|
|
206
|
+
elif rank_by == "iptm":
|
|
207
|
+
mean_score = 100 * confidence_metrics["iptm"]
|
|
208
|
+
elif rank_by == "ptm":
|
|
209
|
+
mean_score = 100 * confidence_metrics["ptm"]
|
|
210
|
+
else:
|
|
211
|
+
mean_score = confidence_metrics["mean_plddt"]
|
|
212
|
+
confidence_metrics["ranking_confidence"] = mean_score
|
|
213
|
+
return confidence_metrics
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# Copyright 2021 DeepMind Technologies Limited
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""mmCIF metadata."""
|
|
16
|
+
|
|
17
|
+
from typing import Mapping, Sequence
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_DISCLAIMER = """ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED.
|
|
22
|
+
THE INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD BE
|
|
23
|
+
EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY OF ANY KIND,
|
|
24
|
+
WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT USE OF THE INFORMATION
|
|
25
|
+
SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD PARTY. DISCLAIMER: THE INFORMATION IS
|
|
26
|
+
NOT INTENDED TO BE A SUBSTITUTE FOR PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR
|
|
27
|
+
TREATMENT, AND DOES NOT CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS
|
|
28
|
+
AVAILABLE FOR ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE."""
|
|
29
|
+
|
|
30
|
+
# Authors of the Nature methods paper we reference in the mmCIF.
|
|
31
|
+
_MMCIF_PAPER_AUTHORS = (
|
|
32
|
+
'Jumper, John',
|
|
33
|
+
'Evans, Richard',
|
|
34
|
+
'Pritzel, Alexander',
|
|
35
|
+
'Green, Tim',
|
|
36
|
+
'Figurnov, Michael',
|
|
37
|
+
'Ronneberger, Olaf',
|
|
38
|
+
'Tunyasuvunakool, Kathryn',
|
|
39
|
+
'Bates, Russ',
|
|
40
|
+
'Zidek, Augustin',
|
|
41
|
+
'Potapenko, Anna',
|
|
42
|
+
'Bridgland, Alex',
|
|
43
|
+
'Meyer, Clemens',
|
|
44
|
+
'Kohl, Simon A. A.',
|
|
45
|
+
'Ballard, Andrew J.',
|
|
46
|
+
'Cowie, Andrew',
|
|
47
|
+
'Romera-Paredes, Bernardino',
|
|
48
|
+
'Nikolov, Stanislav',
|
|
49
|
+
'Jain, Rishub',
|
|
50
|
+
'Adler, Jonas',
|
|
51
|
+
'Back, Trevor',
|
|
52
|
+
'Petersen, Stig',
|
|
53
|
+
'Reiman, David',
|
|
54
|
+
'Clancy, Ellen',
|
|
55
|
+
'Zielinski, Michal',
|
|
56
|
+
'Steinegger, Martin',
|
|
57
|
+
'Pacholska, Michalina',
|
|
58
|
+
'Berghammer, Tamas',
|
|
59
|
+
'Silver, David',
|
|
60
|
+
'Vinyals, Oriol',
|
|
61
|
+
'Senior, Andrew W.',
|
|
62
|
+
'Kavukcuoglu, Koray',
|
|
63
|
+
'Kohli, Pushmeet',
|
|
64
|
+
'Hassabis, Demis',
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Authors of the mmCIF - we set them to be equal to the authors of the paper.
|
|
68
|
+
_MMCIF_AUTHORS = _MMCIF_PAPER_AUTHORS
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def add_metadata_to_mmcif(
|
|
72
|
+
old_cif: Mapping[str, Sequence[str]], model_type: str
|
|
73
|
+
) -> Mapping[str, Sequence[str]]:
|
|
74
|
+
"""Adds AlphaFold metadata in the given mmCIF."""
|
|
75
|
+
cif = {}
|
|
76
|
+
|
|
77
|
+
# ModelCIF conformation dictionary.
|
|
78
|
+
cif['_audit_conform.dict_name'] = ['mmcif_ma.dic']
|
|
79
|
+
cif['_audit_conform.dict_version'] = ['1.3.9']
|
|
80
|
+
cif['_audit_conform.dict_location'] = [
|
|
81
|
+
'https://raw.githubusercontent.com/ihmwg/ModelCIF/master/dist/'
|
|
82
|
+
'mmcif_ma.dic'
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
# License and disclaimer.
|
|
86
|
+
cif['_pdbx_data_usage.id'] = ['1', '2']
|
|
87
|
+
cif['_pdbx_data_usage.type'] = ['license', 'disclaimer']
|
|
88
|
+
cif['_pdbx_data_usage.details'] = [
|
|
89
|
+
'Data in this file is available under a CC-BY-4.0 license.',
|
|
90
|
+
_DISCLAIMER,
|
|
91
|
+
]
|
|
92
|
+
cif['_pdbx_data_usage.url'] = [
|
|
93
|
+
'https://creativecommons.org/licenses/by/4.0/',
|
|
94
|
+
'?',
|
|
95
|
+
]
|
|
96
|
+
cif['_pdbx_data_usage.name'] = ['CC-BY-4.0', '?']
|
|
97
|
+
|
|
98
|
+
# Structure author details.
|
|
99
|
+
cif['_audit_author.name'] = []
|
|
100
|
+
cif['_audit_author.pdbx_ordinal'] = []
|
|
101
|
+
for author_index, author_name in enumerate(_MMCIF_AUTHORS, start=1):
|
|
102
|
+
cif['_audit_author.name'].append(author_name)
|
|
103
|
+
cif['_audit_author.pdbx_ordinal'].append(str(author_index))
|
|
104
|
+
|
|
105
|
+
# Paper author details.
|
|
106
|
+
cif['_citation_author.citation_id'] = []
|
|
107
|
+
cif['_citation_author.name'] = []
|
|
108
|
+
cif['_citation_author.ordinal'] = []
|
|
109
|
+
for author_index, author_name in enumerate(_MMCIF_PAPER_AUTHORS, start=1):
|
|
110
|
+
cif['_citation_author.citation_id'].append('primary')
|
|
111
|
+
cif['_citation_author.name'].append(author_name)
|
|
112
|
+
cif['_citation_author.ordinal'].append(str(author_index))
|
|
113
|
+
|
|
114
|
+
# Paper citation details.
|
|
115
|
+
cif['_citation.id'] = ['primary']
|
|
116
|
+
cif['_citation.title'] = [
|
|
117
|
+
'Highly accurate protein structure prediction with AlphaFold'
|
|
118
|
+
]
|
|
119
|
+
cif['_citation.journal_full'] = ['Nature']
|
|
120
|
+
cif['_citation.journal_volume'] = ['596']
|
|
121
|
+
cif['_citation.page_first'] = ['583']
|
|
122
|
+
cif['_citation.page_last'] = ['589']
|
|
123
|
+
cif['_citation.year'] = ['2021']
|
|
124
|
+
cif['_citation.journal_id_ASTM'] = ['NATUAS']
|
|
125
|
+
cif['_citation.country'] = ['UK']
|
|
126
|
+
cif['_citation.journal_id_ISSN'] = ['0028-0836']
|
|
127
|
+
cif['_citation.journal_id_CSD'] = ['0006']
|
|
128
|
+
cif['_citation.book_publisher'] = ['?']
|
|
129
|
+
cif['_citation.pdbx_database_id_PubMed'] = ['34265844']
|
|
130
|
+
cif['_citation.pdbx_database_id_DOI'] = ['10.1038/s41586-021-03819-2']
|
|
131
|
+
|
|
132
|
+
# Type of data in the dataset including data used in the model generation.
|
|
133
|
+
cif['_ma_data.id'] = ['1']
|
|
134
|
+
cif['_ma_data.name'] = ['Model']
|
|
135
|
+
cif['_ma_data.content_type'] = ['model coordinates']
|
|
136
|
+
|
|
137
|
+
# Description of number of instances for each entity.
|
|
138
|
+
cif['_ma_target_entity_instance.asym_id'] = old_cif['_struct_asym.id']
|
|
139
|
+
cif['_ma_target_entity_instance.entity_id'] = old_cif[
|
|
140
|
+
'_struct_asym.entity_id'
|
|
141
|
+
]
|
|
142
|
+
cif['_ma_target_entity_instance.details'] = ['.'] * len(
|
|
143
|
+
cif['_ma_target_entity_instance.entity_id']
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Details about the target entities.
|
|
147
|
+
cif['_ma_target_entity.entity_id'] = cif[
|
|
148
|
+
'_ma_target_entity_instance.entity_id'
|
|
149
|
+
]
|
|
150
|
+
cif['_ma_target_entity.data_id'] = ['1'] * len(
|
|
151
|
+
cif['_ma_target_entity.entity_id']
|
|
152
|
+
)
|
|
153
|
+
cif['_ma_target_entity.origin'] = ['.'] * len(
|
|
154
|
+
cif['_ma_target_entity.entity_id']
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Details of the models being deposited.
|
|
158
|
+
cif['_ma_model_list.ordinal_id'] = ['1']
|
|
159
|
+
cif['_ma_model_list.model_id'] = ['1']
|
|
160
|
+
cif['_ma_model_list.model_group_id'] = ['1']
|
|
161
|
+
cif['_ma_model_list.model_name'] = ['Top ranked model']
|
|
162
|
+
|
|
163
|
+
cif['_ma_model_list.model_group_name'] = [
|
|
164
|
+
f'AlphaFold {model_type} v2.3.2 model'
|
|
165
|
+
]
|
|
166
|
+
cif['_ma_model_list.data_id'] = ['1']
|
|
167
|
+
cif['_ma_model_list.model_type'] = ['Ab initio model']
|
|
168
|
+
|
|
169
|
+
# Software used.
|
|
170
|
+
cif['_software.pdbx_ordinal'] = ['1']
|
|
171
|
+
cif['_software.name'] = ['AlphaFold']
|
|
172
|
+
cif['_software.version'] = [f'v2.3.2']
|
|
173
|
+
cif['_software.type'] = ['package']
|
|
174
|
+
cif['_software.description'] = ['Structure prediction']
|
|
175
|
+
cif['_software.classification'] = ['other']
|
|
176
|
+
cif['_software.date'] = ['?']
|
|
177
|
+
|
|
178
|
+
# Collection of software into groups.
|
|
179
|
+
cif['_ma_software_group.ordinal_id'] = ['1']
|
|
180
|
+
cif['_ma_software_group.group_id'] = ['1']
|
|
181
|
+
cif['_ma_software_group.software_id'] = ['1']
|
|
182
|
+
|
|
183
|
+
# Method description to conform with ModelCIF.
|
|
184
|
+
cif['_ma_protocol_step.ordinal_id'] = ['1', '2', '3']
|
|
185
|
+
cif['_ma_protocol_step.protocol_id'] = ['1', '1', '1']
|
|
186
|
+
cif['_ma_protocol_step.step_id'] = ['1', '2', '3']
|
|
187
|
+
cif['_ma_protocol_step.method_type'] = [
|
|
188
|
+
'coevolution MSA',
|
|
189
|
+
'template search',
|
|
190
|
+
'modeling',
|
|
191
|
+
]
|
|
192
|
+
|
|
193
|
+
# Details of the metrics use to assess model confidence.
|
|
194
|
+
cif['_ma_qa_metric.id'] = ['1', '2']
|
|
195
|
+
cif['_ma_qa_metric.name'] = ['pLDDT', 'pLDDT']
|
|
196
|
+
# Accepted values are distance, energy, normalised score, other, zscore.
|
|
197
|
+
cif['_ma_qa_metric.type'] = ['pLDDT', 'pLDDT']
|
|
198
|
+
cif['_ma_qa_metric.mode'] = ['global', 'local']
|
|
199
|
+
cif['_ma_qa_metric.software_group_id'] = ['1', '1']
|
|
200
|
+
|
|
201
|
+
# Global model confidence metric value.
|
|
202
|
+
cif['_ma_qa_metric_global.ordinal_id'] = ['1']
|
|
203
|
+
cif['_ma_qa_metric_global.model_id'] = ['1']
|
|
204
|
+
cif['_ma_qa_metric_global.metric_id'] = ['1']
|
|
205
|
+
global_plddt = np.mean(
|
|
206
|
+
[float(v) for v in old_cif['_atom_site.B_iso_or_equiv']]
|
|
207
|
+
)
|
|
208
|
+
cif['_ma_qa_metric_global.metric_value'] = [f'{global_plddt:.2f}']
|
|
209
|
+
|
|
210
|
+
cif['_atom_type.symbol'] = sorted(set(old_cif['_atom_site.type_symbol']))
|
|
211
|
+
|
|
212
|
+
return cif
|