biotite 0.40.0__cp310-cp310-win_amd64.whl → 0.41.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +1 -1
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +119 -119
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +119 -119
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +119 -119
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +119 -119
- biotite/sequence/align/localgapped.c +119 -119
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +119 -119
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +119 -119
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +119 -119
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +119 -119
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.c +119 -119
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +119 -119
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +119 -119
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +119 -119
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +119 -119
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +119 -119
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/bonds.c +1124 -915
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +119 -119
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/charges.c +119 -119
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/info/atoms.py +6 -1
- biotite/structure/info/bonds.py +1 -1
- biotite/structure/info/ccd/amino_acids.txt +17 -0
- biotite/structure/info/ccd/carbohydrates.txt +2 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +1 -0
- biotite/structure/info/misc.py +69 -5
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +157 -165
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/convertarray.c +119 -119
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +119 -119
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +119 -119
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +119 -119
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/pdb/file.py +84 -31
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/__init__.py +0 -1
- biotite/structure/io/pdbx/bcif.py +2 -3
- biotite/structure/io/pdbx/cif.py +9 -5
- biotite/structure/io/pdbx/component.py +4 -1
- biotite/structure/io/pdbx/convert.py +203 -79
- biotite/structure/io/pdbx/encoding.c +119 -119
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +119 -119
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +472 -13
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
- biotite/structure/io/pdbx/error.py +0 -14
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Function for converting a structure into a sequence.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
__all__ = ["to_sequence"]
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from .info.misc import one_letter_code
|
|
15
|
+
from .info.groups import amino_acid_names, nucleotide_names
|
|
16
|
+
from .residues import get_residues
|
|
17
|
+
from .chains import get_chain_starts
|
|
18
|
+
from .error import BadStructureError
|
|
19
|
+
from ..sequence.seqtypes import ProteinSequence, NucleotideSequence
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
HETERO_PLACEHOLDER = "."
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def to_sequence(atoms, allow_hetero=False):
|
|
26
|
+
"""
|
|
27
|
+
Convert each chain in a structure into a sequence.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
atoms : AtomArray or AtomArrayStack
|
|
32
|
+
The structure.
|
|
33
|
+
May contain multiple chains.
|
|
34
|
+
Each chain must be either a peptide or a nucleic acid.
|
|
35
|
+
allow_hetero : bool, optional
|
|
36
|
+
If true, residues inside a amino acid or nucleotide chain,
|
|
37
|
+
that have no one-letter code, are replaced by the respective
|
|
38
|
+
'*any*' symbol (`"X"` or `"N"`, respectively).
|
|
39
|
+
The same is true for amino acids in nucleotide chains and vice
|
|
40
|
+
versa.
|
|
41
|
+
By default, an exception is raised.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
sequences : list of Sequence, length=n
|
|
46
|
+
The sequence for each chain in the structure.
|
|
47
|
+
chain_start_indices : ndarray, shape=(n,), dtype=int
|
|
48
|
+
The atom index where each chain starts.
|
|
49
|
+
|
|
50
|
+
Notes
|
|
51
|
+
-----
|
|
52
|
+
Residues are considered amino acids or nucleotides based on their
|
|
53
|
+
appearance :func:`info.amino_acid_names()` or
|
|
54
|
+
:func:`info.nucleotide_names()`, respectively.
|
|
55
|
+
|
|
56
|
+
Examples
|
|
57
|
+
--------
|
|
58
|
+
|
|
59
|
+
>>> sequences, chain_starts = to_sequence(atom_array)
|
|
60
|
+
>>> print(sequences)
|
|
61
|
+
[ProteinSequence("NLYIQWLKDGGPSSGRPPPS")]
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
sequences = []
|
|
65
|
+
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
66
|
+
for i in range(len(chain_start_indices)-1):
|
|
67
|
+
start = chain_start_indices[i]
|
|
68
|
+
stop = chain_start_indices[i+1]
|
|
69
|
+
chain = atoms[start:stop]
|
|
70
|
+
_, residues = get_residues(chain)
|
|
71
|
+
one_letter_symbols = np.array(
|
|
72
|
+
[one_letter_code(res) or HETERO_PLACEHOLDER for res in residues]
|
|
73
|
+
)
|
|
74
|
+
hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
|
|
75
|
+
|
|
76
|
+
aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
|
|
77
|
+
nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
|
|
78
|
+
if aa_count == 0 and nuc_count == 0:
|
|
79
|
+
raise BadStructureError(
|
|
80
|
+
f"Chain {chain.chain_id[0]} contains neither amino acids "
|
|
81
|
+
"nor nucleotides"
|
|
82
|
+
)
|
|
83
|
+
elif aa_count > nuc_count:
|
|
84
|
+
# Chain is a peptide
|
|
85
|
+
hetero_mask |= ~np.isin(residues, amino_acid_names())
|
|
86
|
+
if not allow_hetero and np.any(hetero_mask):
|
|
87
|
+
hetero_indices = np.where(hetero_mask)[0]
|
|
88
|
+
raise BadStructureError(
|
|
89
|
+
f"Hetero residue(s) "
|
|
90
|
+
f"{', '.join(residues[hetero_indices])} in peptide"
|
|
91
|
+
)
|
|
92
|
+
one_letter_symbols[hetero_mask] = "X"
|
|
93
|
+
# Replace selenocysteine and pyrrolysine
|
|
94
|
+
one_letter_symbols[one_letter_symbols == "U"] = "C"
|
|
95
|
+
one_letter_symbols[one_letter_symbols == "O"] = "K"
|
|
96
|
+
sequences.append(ProteinSequence("".join(one_letter_symbols)))
|
|
97
|
+
else:
|
|
98
|
+
# Chain is a nucleic acid
|
|
99
|
+
hetero_mask |= ~np.isin(residues, nucleotide_names())
|
|
100
|
+
if not allow_hetero and np.any(hetero_mask):
|
|
101
|
+
hetero_indices = np.where(hetero_mask)[0]
|
|
102
|
+
raise BadStructureError(
|
|
103
|
+
f"Hetero residue(s) "
|
|
104
|
+
f"{', '.join(residues[hetero_indices])} in nucleic acid"
|
|
105
|
+
)
|
|
106
|
+
one_letter_symbols[hetero_mask] = "N"
|
|
107
|
+
# Replace uracil
|
|
108
|
+
one_letter_symbols[one_letter_symbols == "U"] = "T"
|
|
109
|
+
sequences.append(NucleotideSequence("".join(one_letter_symbols)))
|
|
110
|
+
|
|
111
|
+
# Remove exclusive stop
|
|
112
|
+
return sequences, chain_start_indices[:-1]
|
biotite/structure/superimpose.py
CHANGED
|
@@ -8,12 +8,19 @@ This module provides functions for structure superimposition.
|
|
|
8
8
|
|
|
9
9
|
__name__ = "biotite.structure"
|
|
10
10
|
__author__ = "Patrick Kunzmann, Claude J. Rogers"
|
|
11
|
-
__all__ = ["superimpose", "
|
|
11
|
+
__all__ = ["superimpose", "superimpose_homologs",
|
|
12
|
+
"superimpose_without_outliers",
|
|
13
|
+
"AffineTransformation", "superimpose_apply"]
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
import numpy as np
|
|
15
17
|
from .atoms import coord
|
|
16
|
-
from .geometry import centroid
|
|
18
|
+
from .geometry import centroid, distance
|
|
19
|
+
from .filter import filter_amino_acids, filter_nucleotides
|
|
20
|
+
from .sequence import to_sequence
|
|
21
|
+
from ..sequence.alphabet import common_alphabet
|
|
22
|
+
from ..sequence.seqtypes import ProteinSequence
|
|
23
|
+
from ..sequence.align import SubstitutionMatrix, align_optimal, get_codes
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
class AffineTransformation:
|
|
@@ -35,11 +42,13 @@ class AffineTransformation:
|
|
|
35
42
|
----------
|
|
36
43
|
center_translation, rotation, target_translation : ndarray
|
|
37
44
|
Same as the parameters.
|
|
45
|
+
The dimensions are always expanded to *(m,3)* or *(m,3,3)*,
|
|
46
|
+
respectively.
|
|
38
47
|
"""
|
|
39
48
|
def __init__(self, center_translation, rotation, target_translation):
|
|
40
|
-
self.center_translation = center_translation
|
|
41
|
-
self.rotation = rotation
|
|
42
|
-
self.target_translation = target_translation
|
|
49
|
+
self.center_translation = _expand_dims(center_translation, 2)
|
|
50
|
+
self.rotation = _expand_dims(rotation, 3)
|
|
51
|
+
self.target_translation = _expand_dims(target_translation, 2)
|
|
43
52
|
|
|
44
53
|
|
|
45
54
|
def apply(self, atoms):
|
|
@@ -58,10 +67,43 @@ class AffineTransformation:
|
|
|
58
67
|
with transformations applied.
|
|
59
68
|
Only coordinates are returned, if coordinates were given in
|
|
60
69
|
`atoms`.
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
|
|
74
|
+
>>> coord = np.arange(15).reshape(5,3)
|
|
75
|
+
>>> print(coord)
|
|
76
|
+
[[ 0 1 2]
|
|
77
|
+
[ 3 4 5]
|
|
78
|
+
[ 6 7 8]
|
|
79
|
+
[ 9 10 11]
|
|
80
|
+
[12 13 14]]
|
|
81
|
+
>>> # Rotates 90 degrees around the z-axis
|
|
82
|
+
>>> transform = AffineTransformation(
|
|
83
|
+
... center_translation=np.array([0,0,0]),
|
|
84
|
+
... rotation=np.array([
|
|
85
|
+
... [0, -1, 0],
|
|
86
|
+
... [1, 0, 0],
|
|
87
|
+
... [0, 0, 1]
|
|
88
|
+
... ]),
|
|
89
|
+
... target_translation=np.array([0,0,0])
|
|
90
|
+
... )
|
|
91
|
+
>>> print(transform.apply(coord))
|
|
92
|
+
[[ -1. 0. 2.]
|
|
93
|
+
[ -4. 3. 5.]
|
|
94
|
+
[ -7. 6. 8.]
|
|
95
|
+
[-10. 9. 11.]
|
|
96
|
+
[-13. 12. 14.]]
|
|
97
|
+
|
|
61
98
|
"""
|
|
62
99
|
mobile_coord = coord(atoms)
|
|
63
100
|
original_shape = mobile_coord.shape
|
|
64
101
|
mobile_coord = _reshape_to_3d(mobile_coord)
|
|
102
|
+
if mobile_coord.shape[0] != self.rotation.shape[0]:
|
|
103
|
+
raise IndexError(
|
|
104
|
+
f"Number of transformations is {self.rotation.shape[0]}, "
|
|
105
|
+
f"but number of structure models is {mobile_coord.shape[0]}"
|
|
106
|
+
)
|
|
65
107
|
|
|
66
108
|
superimposed_coord = mobile_coord.copy()
|
|
67
109
|
superimposed_coord += self.center_translation[:, np.newaxis, :]
|
|
@@ -77,24 +119,115 @@ class AffineTransformation:
|
|
|
77
119
|
return superimposed
|
|
78
120
|
|
|
79
121
|
|
|
122
|
+
def as_matrix(self):
|
|
123
|
+
"""
|
|
124
|
+
Get the translations and rotation as a combined 4x4
|
|
125
|
+
transformation matrix.
|
|
126
|
+
|
|
127
|
+
Multiplying this matrix with coordinates in the form
|
|
128
|
+
*(x, y, z, 1)* will apply the same transformation as
|
|
129
|
+
:meth:`apply()` to coordinates in the form *(x, y, z)*.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
transformation_matrix : ndarray, shape=(m,4,4), dtype=float
|
|
134
|
+
The transformation matrix.
|
|
135
|
+
*m* is the number of models in the transformation.
|
|
136
|
+
|
|
137
|
+
Examples
|
|
138
|
+
--------
|
|
139
|
+
|
|
140
|
+
>>> coord = np.arange(15).reshape(5,3)
|
|
141
|
+
>>> print(coord)
|
|
142
|
+
[[ 0 1 2]
|
|
143
|
+
[ 3 4 5]
|
|
144
|
+
[ 6 7 8]
|
|
145
|
+
[ 9 10 11]
|
|
146
|
+
[12 13 14]]
|
|
147
|
+
>>> # Rotates 90 degrees around the z-axis
|
|
148
|
+
>>> transform = AffineTransformation(
|
|
149
|
+
... center_translation=np.array([0,0,0]),
|
|
150
|
+
... rotation=np.array([
|
|
151
|
+
... [0, -1, 0],
|
|
152
|
+
... [1, 0, 0],
|
|
153
|
+
... [0, 0, 1]
|
|
154
|
+
... ]),
|
|
155
|
+
... target_translation=np.array([0,0,0])
|
|
156
|
+
... )
|
|
157
|
+
>>> print(transform.apply(coord))
|
|
158
|
+
[[ -1. 0. 2.]
|
|
159
|
+
[ -4. 3. 5.]
|
|
160
|
+
[ -7. 6. 8.]
|
|
161
|
+
[-10. 9. 11.]
|
|
162
|
+
[-13. 12. 14.]]
|
|
163
|
+
>>> # Use a 4x4 matrix for transformation as alternative
|
|
164
|
+
>>> coord_4 = np.concatenate([coord, np.ones((len(coord), 1))], axis=-1)
|
|
165
|
+
>>> print(coord_4)
|
|
166
|
+
[[ 0. 1. 2. 1.]
|
|
167
|
+
[ 3. 4. 5. 1.]
|
|
168
|
+
[ 6. 7. 8. 1.]
|
|
169
|
+
[ 9. 10. 11. 1.]
|
|
170
|
+
[12. 13. 14. 1.]]
|
|
171
|
+
>>> print((transform.as_matrix()[0] @ coord_4.T).T)
|
|
172
|
+
[[ -1. 0. 2. 1.]
|
|
173
|
+
[ -4. 3. 5. 1.]
|
|
174
|
+
[ -7. 6. 8. 1.]
|
|
175
|
+
[-10. 9. 11. 1.]
|
|
176
|
+
[-13. 12. 14. 1.]]
|
|
177
|
+
|
|
178
|
+
"""
|
|
179
|
+
n_models = self.rotation.shape[0]
|
|
180
|
+
rotation_mat = _3d_identity(n_models, 4)
|
|
181
|
+
rotation_mat[:, :3, :3] = self.rotation
|
|
182
|
+
center_translation_mat = _3d_identity(n_models, 4)
|
|
183
|
+
center_translation_mat[:, :3, 3] = self.center_translation
|
|
184
|
+
target_translation_mat = _3d_identity(n_models, 4)
|
|
185
|
+
target_translation_mat[:, :3, 3] = self.target_translation
|
|
186
|
+
return target_translation_mat @ rotation_mat @ center_translation_mat
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _expand_dims(array, n_dims):
|
|
190
|
+
"""
|
|
191
|
+
Expand the dimensions of an `ndarray` to a certain number of
|
|
192
|
+
dimensions.
|
|
193
|
+
"""
|
|
194
|
+
while array.ndim < n_dims:
|
|
195
|
+
array = array[np.newaxis, ...]
|
|
196
|
+
return array
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _3d_identity(m, n):
|
|
200
|
+
"""
|
|
201
|
+
Create an array of *m* identity matrices of shape *(n, n)*
|
|
202
|
+
"""
|
|
203
|
+
matrices = np.zeros((m, n, n), dtype=float)
|
|
204
|
+
indices = np.arange(n)
|
|
205
|
+
matrices[:, indices, indices] = 1
|
|
206
|
+
return matrices
|
|
207
|
+
|
|
208
|
+
|
|
80
209
|
def superimpose(fixed, mobile, atom_mask=None):
|
|
81
210
|
"""
|
|
82
|
-
Superimpose structures onto
|
|
211
|
+
Superimpose structures onto each other, minimizing the RMSD between
|
|
212
|
+
them.
|
|
213
|
+
:footcite:`Kabsch1976, Kabsch1978`.
|
|
83
214
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
superimposed and the fixed structure is minimized.
|
|
215
|
+
More precisely, the `mobile` structure is rotated and translated onto
|
|
216
|
+
the `fixed` structure.
|
|
87
217
|
|
|
88
218
|
Parameters
|
|
89
219
|
----------
|
|
90
220
|
fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
91
|
-
The fixed structure.
|
|
221
|
+
The fixed structure(s).
|
|
92
222
|
Alternatively coordinates can be given.
|
|
93
223
|
mobile: AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
94
224
|
The structure(s) which is/are superimposed on the `fixed`
|
|
95
225
|
structure.
|
|
96
226
|
Each atom at index *i* in `mobile` must correspond the
|
|
97
227
|
atom at index *i* in `fixed` to obtain correct results.
|
|
228
|
+
Furthermore, if both `fixed` and `mobile` are
|
|
229
|
+
:class:`AtomArrayStack` objects, they must have the same
|
|
230
|
+
number of models.
|
|
98
231
|
Alternatively coordinates can be given.
|
|
99
232
|
atom_mask: ndarray, dtype=bool, optional
|
|
100
233
|
If given, only the atoms covered by this boolean mask will be
|
|
@@ -108,15 +241,19 @@ def superimpose(fixed, mobile, atom_mask=None):
|
|
|
108
241
|
-------
|
|
109
242
|
fitted : AtomArray or AtomArrayStack or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
110
243
|
A copy of the `mobile` structure(s),
|
|
111
|
-
superimposed on the fixed structure.
|
|
244
|
+
superimposed on the fixed structure(s).
|
|
112
245
|
Only coordinates are returned, if coordinates were given in
|
|
113
246
|
`mobile`.
|
|
114
247
|
transformation : AffineTransformation
|
|
115
|
-
|
|
116
|
-
applied on `mobile`.
|
|
248
|
+
The affine transformation(s) that were applied on `mobile`.
|
|
117
249
|
:meth:`AffineTransformation.apply()` can be used to transform
|
|
118
250
|
another AtomArray in the same way.
|
|
119
251
|
|
|
252
|
+
See Also
|
|
253
|
+
--------
|
|
254
|
+
superimpose_without_outliers : Superimposition with outlier removal
|
|
255
|
+
superimpose_homologs : Superimposition of homologous structures
|
|
256
|
+
|
|
120
257
|
Notes
|
|
121
258
|
-----
|
|
122
259
|
The `transformation` can come in handy, in case you want to
|
|
@@ -186,6 +323,258 @@ def superimpose(fixed, mobile, atom_mask=None):
|
|
|
186
323
|
return transform.apply(mobile), transform
|
|
187
324
|
|
|
188
325
|
|
|
326
|
+
def superimpose_without_outliers(fixed, mobile, min_anchors=3,
|
|
327
|
+
max_iterations=10, quantiles=(0.25, 0.75),
|
|
328
|
+
outlier_threshold=1.5):
|
|
329
|
+
r"""
|
|
330
|
+
Superimpose structures onto a fixed structure, ignoring
|
|
331
|
+
conformational outliers.
|
|
332
|
+
|
|
333
|
+
This method iteratively superimposes the `mobile` structure onto the
|
|
334
|
+
`fixed` structure, removes conformational outliers and superimposes
|
|
335
|
+
the remaining atoms (called *anchors*) again until no outlier
|
|
336
|
+
remains.
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
Parameters
|
|
340
|
+
----------
|
|
341
|
+
fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
342
|
+
The fixed structure(s).
|
|
343
|
+
Alternatively coordinates can be given.
|
|
344
|
+
mobile: AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float
|
|
345
|
+
The structure(s) which is/are superimposed on the `fixed`
|
|
346
|
+
structure.
|
|
347
|
+
Each atom at index *i* in `mobile` must correspond the
|
|
348
|
+
atom at index *i* in `fixed` to obtain correct results.
|
|
349
|
+
Furthermore, if both `fixed` and `mobile` are
|
|
350
|
+
:class:`AtomArrayStack` objects, they must have the same
|
|
351
|
+
number of models.
|
|
352
|
+
Alternatively coordinates can be given.
|
|
353
|
+
min_anchors : int, optional
|
|
354
|
+
The outlier removal is stopped, if less than `min_anchors`
|
|
355
|
+
anchors would be left.
|
|
356
|
+
max_iterations : int, optional
|
|
357
|
+
The maximum number of iterations for removing conformational
|
|
358
|
+
outliers.
|
|
359
|
+
Setting the value to 1 means that no outlier removal is
|
|
360
|
+
conducted.
|
|
361
|
+
quantiles : tuple (float, float), optional
|
|
362
|
+
The lower and upper quantile for the interpercentile range
|
|
363
|
+
(IPR).
|
|
364
|
+
By default the interquartile range is taken.
|
|
365
|
+
outlier_threshold : float, optional
|
|
366
|
+
The threshold for considering a conformational outlier.
|
|
367
|
+
The threshold is given in units of IPR.
|
|
368
|
+
|
|
369
|
+
Returns
|
|
370
|
+
-------
|
|
371
|
+
fitted : AtomArray or AtomArrayStack
|
|
372
|
+
A copy of the `mobile` structure(s), superimposed on the fixed
|
|
373
|
+
structure.
|
|
374
|
+
Only coordinates are returned, if coordinates were given in
|
|
375
|
+
`mobile`.
|
|
376
|
+
transform : AffineTransformation
|
|
377
|
+
This object contains the affine transformation(s) that were
|
|
378
|
+
applied on `mobile`.
|
|
379
|
+
:meth:`AffineTransformation.apply()` can be used to transform
|
|
380
|
+
another AtomArray in the same way.
|
|
381
|
+
anchor_indices : ndarray, shape(k,), dtype=int
|
|
382
|
+
The indices of the anchor atoms.
|
|
383
|
+
These atoms were used for the superimposition.
|
|
384
|
+
|
|
385
|
+
See Also
|
|
386
|
+
--------
|
|
387
|
+
superimpose : Superimposition without outlier removal
|
|
388
|
+
superimpose_homologs : Superimposition of homologous structures
|
|
389
|
+
|
|
390
|
+
Notes
|
|
391
|
+
-----
|
|
392
|
+
This method runs the following algorithm in iterations:
|
|
393
|
+
|
|
394
|
+
1. Superimpose anchor atoms of `mobile` onto `fixed`.
|
|
395
|
+
2. Calculate the squared distance :math:`d^2` between the
|
|
396
|
+
superimposed anchors.
|
|
397
|
+
3. Remove conformational outliers from anchors based on the
|
|
398
|
+
following criterion:
|
|
399
|
+
|
|
400
|
+
.. math:: d^2 > P_\text{upper}(d^2) + \left( P_\text{upper}(d^2) - P_\text{lower}(d^2) \right) \cdot T
|
|
401
|
+
|
|
402
|
+
In prose this means that an anchor is considered an outlier, if
|
|
403
|
+
it is `outlier_threshold` :math:`T` times the interpercentile
|
|
404
|
+
range (IPR) above the upper percentile.
|
|
405
|
+
By default, this is 1.5 times the interquartile range, which is
|
|
406
|
+
the usual threshold to mark outliers in box plots.
|
|
407
|
+
|
|
408
|
+
In the beginning, all atoms are considered as anchors.
|
|
409
|
+
|
|
410
|
+
Considering all atoms (not only the anchors), this approach does
|
|
411
|
+
**not** minimize the RMSD, in contrast to :func:`superimpose()`.
|
|
412
|
+
The purpose of this function is to ignore outliers to decrease the
|
|
413
|
+
RMSD in the more conserved parts of the structure.
|
|
414
|
+
"""
|
|
415
|
+
if max_iterations < 1:
|
|
416
|
+
raise ValueError("Maximum number of iterations must be at least 1")
|
|
417
|
+
|
|
418
|
+
# Ensure that the first quantile is smaller than the second one
|
|
419
|
+
quantiles = sorted(quantiles)
|
|
420
|
+
|
|
421
|
+
fixed_coord = coord(fixed)
|
|
422
|
+
mobile_coord = coord(mobile)
|
|
423
|
+
# Before refinement, all anchors are included
|
|
424
|
+
# 'inlier' is the opposite of 'outlier'
|
|
425
|
+
updated_inlier_mask = np.ones(fixed_coord.shape[-2], dtype=bool)
|
|
426
|
+
|
|
427
|
+
for _ in range(max_iterations):
|
|
428
|
+
# Run superimposition
|
|
429
|
+
inlier_mask = updated_inlier_mask
|
|
430
|
+
filtered_fixed_coord = fixed_coord[..., inlier_mask, :]
|
|
431
|
+
filtered_mobile_coord = mobile_coord[..., inlier_mask, :]
|
|
432
|
+
superimposed_coord, transform = superimpose(
|
|
433
|
+
filtered_fixed_coord, filtered_mobile_coord
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# Find outliers
|
|
437
|
+
sq_dist = distance(filtered_fixed_coord, superimposed_coord) ** 2
|
|
438
|
+
if sq_dist.ndim == 2:
|
|
439
|
+
# If multiple models are superimposed,
|
|
440
|
+
# use the mean squared distance to determine outliers
|
|
441
|
+
sq_dist = np.mean(sq_dist, axis=0)
|
|
442
|
+
lower_quantile, upper_quantile = np.quantile(sq_dist, quantiles)
|
|
443
|
+
ipr = upper_quantile - lower_quantile
|
|
444
|
+
updated_inlier_mask = inlier_mask.copy()
|
|
445
|
+
# Squared distance was only calculated for the existing inliers
|
|
446
|
+
# -> update the mask only for these atoms
|
|
447
|
+
updated_inlier_mask[updated_inlier_mask] = (
|
|
448
|
+
sq_dist <= upper_quantile + outlier_threshold * ipr
|
|
449
|
+
)
|
|
450
|
+
if np.all(updated_inlier_mask):
|
|
451
|
+
# No outliers anymore -> early termination
|
|
452
|
+
break
|
|
453
|
+
if np.count_nonzero(updated_inlier_mask) < min_anchors:
|
|
454
|
+
# Less than min_anchors anchors would be left -> early termination
|
|
455
|
+
break
|
|
456
|
+
|
|
457
|
+
anchor_indices = np.where(inlier_mask)[0]
|
|
458
|
+
return transform.apply(mobile), transform, anchor_indices
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def superimpose_homologs(fixed, mobile, substitution_matrix=None,
|
|
462
|
+
gap_penalty=-10, min_anchors=3, **kwargs):
|
|
463
|
+
r"""
|
|
464
|
+
Superimpose one protein or nucleotide chain onto another one,
|
|
465
|
+
considering sequence differences and conformational outliers.
|
|
466
|
+
|
|
467
|
+
The method finds corresponding residues by sequence alignment and
|
|
468
|
+
selects their :math:`C_{\alpha}` or :math:`P` atoms as
|
|
469
|
+
superimposition *anchors*.
|
|
470
|
+
Then iteratively the anchor atoms are superimposed and outliers are
|
|
471
|
+
removed.
|
|
472
|
+
|
|
473
|
+
Parameters
|
|
474
|
+
----------
|
|
475
|
+
fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n)
|
|
476
|
+
The fixed structure(s).
|
|
477
|
+
Must comprise a single chain.
|
|
478
|
+
mobile : AtomArray, shape(n,) or AtomArrayStack, shape(m,n)
|
|
479
|
+
The structure(s) which is/are superimposed on the `fixed`
|
|
480
|
+
structure.
|
|
481
|
+
Must comprise a single chain.
|
|
482
|
+
substitution_matrix : str or SubstitutionMatrix, optional
|
|
483
|
+
The (name of the) substitution matrix used for sequence
|
|
484
|
+
alignment.
|
|
485
|
+
Must fit the chain type.
|
|
486
|
+
By default, ``"BLOSUM62"`` and ``"NUC"`` are used respectively.
|
|
487
|
+
Only aligned residues with a positive score are considered as
|
|
488
|
+
initial anchors.
|
|
489
|
+
gap_penalty : int or tuple of int, optional
|
|
490
|
+
The gap penalty for sequence alignment.
|
|
491
|
+
A single value indicates a linear penalty, while a tuple
|
|
492
|
+
indicates an affine penalty.
|
|
493
|
+
min_anchors : int, optional
|
|
494
|
+
If less than `min_anchors` anchors are found by sequence
|
|
495
|
+
alignment, the method ditches the alignment and matches all
|
|
496
|
+
anchor atoms.
|
|
497
|
+
If the number of anchor atoms is not equal in `fixed` and
|
|
498
|
+
`mobile` in this fallback case, an exception is raised.
|
|
499
|
+
Furthermore, the outlier removal is stopped, if less than
|
|
500
|
+
`min_anchors` anchors would be left.
|
|
501
|
+
**kwargs
|
|
502
|
+
Additional parameters for
|
|
503
|
+
:func:`superimpose_without_outliers()`.
|
|
504
|
+
|
|
505
|
+
Returns
|
|
506
|
+
-------
|
|
507
|
+
fitted : AtomArray or AtomArrayStack
|
|
508
|
+
A copy of the `mobile` structure(s), superimposed on the fixed
|
|
509
|
+
structure(s).
|
|
510
|
+
transform : AffineTransformation
|
|
511
|
+
This object contains the affine transformation(s) that were
|
|
512
|
+
applied on `mobile`.
|
|
513
|
+
:meth:`AffineTransformation.apply()` can be used to transform
|
|
514
|
+
another AtomArray in the same way.
|
|
515
|
+
fixed_anchor_indices, mobile_anchor_indices : ndarray, shape(k,), dtype=int
|
|
516
|
+
The indices of the anchor atoms in the fixed and mobile
|
|
517
|
+
structure, respectively.
|
|
518
|
+
These atoms were used for the superimposition.
|
|
519
|
+
|
|
520
|
+
See Also
|
|
521
|
+
--------
|
|
522
|
+
superimpose : Superimposition without outlier removal
|
|
523
|
+
superimpose_without_outliers : Internally used for outlier removal
|
|
524
|
+
|
|
525
|
+
Notes
|
|
526
|
+
-----
|
|
527
|
+
As this method relies on sequence alignment, it works only for
|
|
528
|
+
proteins/nucleic acids with decent sequence homology.
|
|
529
|
+
"""
|
|
530
|
+
fixed_anchor_indices = _get_backbone_anchor_indices(fixed)
|
|
531
|
+
mobile_anchor_indices = _get_backbone_anchor_indices(mobile)
|
|
532
|
+
if (
|
|
533
|
+
len(fixed_anchor_indices) < min_anchors or
|
|
534
|
+
len(mobile_anchor_indices) < min_anchors
|
|
535
|
+
):
|
|
536
|
+
raise ValueError(
|
|
537
|
+
"Structures have too few CA atoms for required number of anchors"
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
anchor_indices = _find_matching_anchors(
|
|
541
|
+
fixed[..., fixed_anchor_indices],
|
|
542
|
+
mobile[..., mobile_anchor_indices],
|
|
543
|
+
substitution_matrix,
|
|
544
|
+
gap_penalty,
|
|
545
|
+
)
|
|
546
|
+
if len(anchor_indices) < min_anchors:
|
|
547
|
+
# Fallback: Match all backbone anchors
|
|
548
|
+
if len(fixed_anchor_indices) != len(mobile_anchor_indices):
|
|
549
|
+
raise ValueError(
|
|
550
|
+
"Tried fallback due to low anchor number, "
|
|
551
|
+
"but number of CA atoms does not match"
|
|
552
|
+
)
|
|
553
|
+
fixed_anchor_indices = fixed_anchor_indices
|
|
554
|
+
mobile_anchor_indices = mobile_anchor_indices
|
|
555
|
+
else:
|
|
556
|
+
# The anchor indices point to the CA atoms
|
|
557
|
+
# -> get the corresponding indices for the whole structure
|
|
558
|
+
fixed_anchor_indices = fixed_anchor_indices[anchor_indices[:, 0]]
|
|
559
|
+
mobile_anchor_indices = mobile_anchor_indices[anchor_indices[:, 1]]
|
|
560
|
+
|
|
561
|
+
_, transform, selected_anchor_indices = superimpose_without_outliers(
|
|
562
|
+
fixed[..., fixed_anchor_indices],
|
|
563
|
+
mobile[..., mobile_anchor_indices],
|
|
564
|
+
min_anchors,
|
|
565
|
+
**kwargs
|
|
566
|
+
)
|
|
567
|
+
fixed_anchor_indices = fixed_anchor_indices[selected_anchor_indices]
|
|
568
|
+
mobile_anchor_indices = mobile_anchor_indices[selected_anchor_indices]
|
|
569
|
+
|
|
570
|
+
return (
|
|
571
|
+
transform.apply(mobile),
|
|
572
|
+
transform,
|
|
573
|
+
fixed_anchor_indices,
|
|
574
|
+
mobile_anchor_indices,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
|
|
189
578
|
def superimpose_apply(atoms, transformation):
|
|
190
579
|
"""
|
|
191
580
|
Superimpose structures using a given :class:`AffineTransformation`.
|
|
@@ -266,3 +655,73 @@ def _multi_matmul(matrices, vectors):
|
|
|
266
655
|
),
|
|
267
656
|
axes=(0, 2, 1)
|
|
268
657
|
)
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _get_backbone_anchor_indices(atoms):
|
|
661
|
+
"""
|
|
662
|
+
Select one representative anchor atom for each amino acid and
|
|
663
|
+
nucleotide and return their indices.
|
|
664
|
+
"""
|
|
665
|
+
return np.where(
|
|
666
|
+
((filter_amino_acids(atoms)) & (atoms.atom_name == "CA")) |
|
|
667
|
+
((filter_nucleotides(atoms)) & (atoms.atom_name == "P"))
|
|
668
|
+
)[0]
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _find_matching_anchors(
|
|
672
|
+
fixed_anchor_atoms,
|
|
673
|
+
mobile_anchors_atoms,
|
|
674
|
+
substitution_matrix,
|
|
675
|
+
gap_penalty,
|
|
676
|
+
):
|
|
677
|
+
"""
|
|
678
|
+
Find corresponding residues using pairwise sequence alignment.
|
|
679
|
+
"""
|
|
680
|
+
fixed_seq = _to_sequence(fixed_anchor_atoms)
|
|
681
|
+
mobile_seq = _to_sequence(mobile_anchors_atoms)
|
|
682
|
+
common_alph = common_alphabet([fixed_seq.alphabet, mobile_seq.alphabet])
|
|
683
|
+
if common_alph is None:
|
|
684
|
+
raise ValueError("Cannot superimpose peptides with nucleic acids")
|
|
685
|
+
|
|
686
|
+
if substitution_matrix is None:
|
|
687
|
+
if isinstance(fixed_seq, ProteinSequence):
|
|
688
|
+
substitution_matrix = SubstitutionMatrix.std_protein_matrix()
|
|
689
|
+
else:
|
|
690
|
+
substitution_matrix = SubstitutionMatrix.std_nucleotide_matrix()
|
|
691
|
+
elif isinstance(substitution_matrix, str):
|
|
692
|
+
substitution_matrix = SubstitutionMatrix(
|
|
693
|
+
common_alph, common_alph, substitution_matrix
|
|
694
|
+
)
|
|
695
|
+
score_matrix = substitution_matrix.score_matrix()
|
|
696
|
+
|
|
697
|
+
alignment = align_optimal(
|
|
698
|
+
fixed_seq,
|
|
699
|
+
mobile_seq,
|
|
700
|
+
substitution_matrix,
|
|
701
|
+
gap_penalty,
|
|
702
|
+
terminal_penalty=False,
|
|
703
|
+
max_number=1,
|
|
704
|
+
)[0]
|
|
705
|
+
alignment_codes = get_codes(alignment)
|
|
706
|
+
anchor_mask = (
|
|
707
|
+
# Anchors must be similar amino acids
|
|
708
|
+
(score_matrix[alignment_codes[0], alignment_codes[1]] > 0)
|
|
709
|
+
# Cannot anchor gaps
|
|
710
|
+
& (alignment_codes[0] != -1)
|
|
711
|
+
& (alignment_codes[1] != -1)
|
|
712
|
+
)
|
|
713
|
+
anchors = alignment.trace[anchor_mask]
|
|
714
|
+
return anchors
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _to_sequence(atoms):
|
|
718
|
+
sequences, _ = to_sequence(atoms, allow_hetero=True)
|
|
719
|
+
if len(sequences) == 0:
|
|
720
|
+
raise ValueError(
|
|
721
|
+
"Structure does not contain any amino acids or nucleotides"
|
|
722
|
+
)
|
|
723
|
+
if len(sequences) > 1:
|
|
724
|
+
raise ValueError(
|
|
725
|
+
"Structure contains multiple chains, but only one is allowed"
|
|
726
|
+
)
|
|
727
|
+
return sequences[0]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biotite
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.41.0
|
|
4
4
|
Summary: A comprehensive library for computational molecular biology
|
|
5
5
|
Author: The Biotite contributors
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -48,7 +48,7 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
48
48
|
Classifier: Programming Language :: Python :: 3
|
|
49
49
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
50
50
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
51
|
-
Requires-Python: >=3.
|
|
51
|
+
Requires-Python: >=3.10
|
|
52
52
|
Description-Content-Type: text/x-rst
|
|
53
53
|
License-File: LICENSE.rst
|
|
54
54
|
Requires-Dist: requests >=2.12
|