biotite 0.41.1__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +36 -10
- biotite/application/application.py +22 -11
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +16 -5
- biotite/sequence/align/__init__.py +160 -6
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +35 -35
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +112 -126
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +64 -64
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +226 -240
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +88 -100
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +21 -7
- biotite/structure/info/groups.py +10 -15
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -52
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
- biotite-1.0.0.dist-info/RECORD +322 -0
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.1.dist-info/RECORD +0 -340
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/sse.py
CHANGED
|
@@ -12,51 +12,43 @@ __author__ = "Patrick Kunzmann"
|
|
|
12
12
|
__all__ = ["annotate_sse"]
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
15
|
-
from .celllist import CellList
|
|
16
|
-
from .
|
|
17
|
-
from .
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
15
|
+
from biotite.structure.celllist import CellList
|
|
16
|
+
from biotite.structure.filter import filter_amino_acids
|
|
17
|
+
from biotite.structure.geometry import angle, dihedral, distance
|
|
18
|
+
from biotite.structure.integrity import check_res_id_continuity
|
|
19
|
+
from biotite.structure.residues import get_residue_starts
|
|
20
20
|
|
|
21
|
+
_r_helix = (np.deg2rad(89 - 12), np.deg2rad(89 + 12))
|
|
22
|
+
_a_helix = (np.deg2rad(50 - 20), np.deg2rad(50 + 20))
|
|
23
|
+
_d2_helix = ((5.5 - 0.5), (5.5 + 0.5)) # Not used in the algorithm description
|
|
24
|
+
_d3_helix = ((5.3 - 0.5), (5.3 + 0.5))
|
|
25
|
+
_d4_helix = ((6.4 - 0.6), (6.4 + 0.6))
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
_r_strand = (np.deg2rad(124 - 14), np.deg2rad(124 + 14))
|
|
28
|
+
_a_strand = (np.deg2rad(-180), np.deg2rad(-125), np.deg2rad(145), np.deg2rad(180))
|
|
29
|
+
_d2_strand = ((6.7 - 0.6), (6.7 + 0.6))
|
|
30
|
+
_d3_strand = ((9.9 - 0.9), (9.9 + 0.9))
|
|
31
|
+
_d4_strand = ((12.4 - 1.1), (12.4 + 1.1))
|
|
27
32
|
|
|
28
|
-
_r_strand = (np.deg2rad(124-14), np.deg2rad(124+14))
|
|
29
|
-
_a_strand = (np.deg2rad(-180), np.deg2rad(-125),
|
|
30
|
-
np.deg2rad(145), np.deg2rad(180))
|
|
31
|
-
_d2_strand = ((6.7-0.6), (6.7+0.6))
|
|
32
|
-
_d3_strand = ((9.9-0.9), (9.9+0.9))
|
|
33
|
-
_d4_strand = ((12.4-1.1), (12.4+1.1))
|
|
34
33
|
|
|
35
|
-
|
|
36
|
-
def annotate_sse(atom_array, chain_id=None):
|
|
34
|
+
def annotate_sse(atom_array):
|
|
37
35
|
r"""
|
|
38
36
|
Calculate the secondary structure elements (SSEs) of a
|
|
39
37
|
peptide chain based on the `P-SEA` algorithm.
|
|
40
38
|
:footcite:`Labesse1997`
|
|
41
|
-
|
|
39
|
+
|
|
42
40
|
The annotation is based CA coordinates only, specifically
|
|
43
41
|
distances and dihedral angles.
|
|
44
42
|
Discontinuities between chains are detected by residue ID.
|
|
45
|
-
|
|
43
|
+
|
|
46
44
|
Parameters
|
|
47
45
|
----------
|
|
48
46
|
atom_array : AtomArray
|
|
49
47
|
The atom array to annotate for.
|
|
50
48
|
Non-peptide residues are also allowed and obtain a ``''``
|
|
51
49
|
SSE.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
annotated.
|
|
55
|
-
DEPRECATED: By now multiple chains can be annotated at once.
|
|
56
|
-
To annotate only a certain chain, filter the `atom_array` before
|
|
57
|
-
giving it as input to this function.
|
|
58
|
-
|
|
59
|
-
|
|
50
|
+
|
|
51
|
+
|
|
60
52
|
Returns
|
|
61
53
|
-------
|
|
62
54
|
sse : ndarray
|
|
@@ -67,37 +59,30 @@ def annotate_sse(atom_array, chain_id=None):
|
|
|
67
59
|
:math:`{\beta}`-strand/sheet, ``'c'`` means coil.
|
|
68
60
|
``''`` indicates that a residue is not an amino acid or it
|
|
69
61
|
comprises no ``CA`` atom.
|
|
70
|
-
|
|
62
|
+
|
|
71
63
|
Notes
|
|
72
64
|
-----
|
|
73
65
|
Although this function is based on the original `P-SEA` algorithm,
|
|
74
66
|
there are deviations compared to the official `P-SEA` software in
|
|
75
67
|
some cases.
|
|
76
68
|
Do not rely on getting the exact same results.
|
|
77
|
-
|
|
69
|
+
|
|
78
70
|
References
|
|
79
71
|
----------
|
|
80
72
|
|
|
81
73
|
.. footbibliography::
|
|
82
|
-
|
|
74
|
+
|
|
83
75
|
Examples
|
|
84
76
|
--------
|
|
85
|
-
|
|
77
|
+
|
|
86
78
|
SSE of PDB 1L2Y:
|
|
87
|
-
|
|
88
|
-
>>> sse = annotate_sse(atom_array
|
|
79
|
+
|
|
80
|
+
>>> sse = annotate_sse(atom_array)
|
|
89
81
|
>>> print(sse)
|
|
90
82
|
['c' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'a' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c' 'c'
|
|
91
83
|
'c' 'c']
|
|
92
|
-
|
|
93
|
-
"""
|
|
94
|
-
if chain_id is not None:
|
|
95
|
-
# Filter all CA atoms in the relevant chain
|
|
96
|
-
atom_array = atom_array[
|
|
97
|
-
(atom_array.chain_id == chain_id) & filter_amino_acids(atom_array)
|
|
98
|
-
]
|
|
99
|
-
|
|
100
84
|
|
|
85
|
+
"""
|
|
101
86
|
residue_starts = get_residue_starts(atom_array)
|
|
102
87
|
# Sort CA coord into the coord array at the respective residue index
|
|
103
88
|
# If a residue has no CA, e.g. because it is not an amino acid,
|
|
@@ -106,9 +91,9 @@ def annotate_sse(atom_array, chain_id=None):
|
|
|
106
91
|
ca_indices = np.where(
|
|
107
92
|
filter_amino_acids(atom_array) & (atom_array.atom_name == "CA")
|
|
108
93
|
)[0]
|
|
109
|
-
ca_coord[
|
|
110
|
-
|
|
111
|
-
|
|
94
|
+
ca_coord[np.searchsorted(residue_starts, ca_indices, "right") - 1] = (
|
|
95
|
+
atom_array.coord[ca_indices]
|
|
96
|
+
)
|
|
112
97
|
|
|
113
98
|
if len(ca_coord) <= 5:
|
|
114
99
|
# The number of atoms is too small #
|
|
@@ -125,12 +110,12 @@ def annotate_sse(atom_array, chain_id=None):
|
|
|
125
110
|
# purpose of geometric measurements
|
|
126
111
|
# -> the distances/angles spanning discontinuities are NaN
|
|
127
112
|
discont_indices = check_res_id_continuity(atom_array)
|
|
128
|
-
discont_res_indices = np.searchsorted(
|
|
129
|
-
residue_starts, discont_indices, "right"
|
|
130
|
-
) - 1
|
|
113
|
+
discont_res_indices = np.searchsorted(residue_starts, discont_indices, "right") - 1
|
|
131
114
|
ca_coord = np.insert(
|
|
132
|
-
ca_coord,
|
|
133
|
-
|
|
115
|
+
ca_coord,
|
|
116
|
+
discont_res_indices,
|
|
117
|
+
np.full((len(discont_res_indices), 3), np.nan),
|
|
118
|
+
axis=0,
|
|
134
119
|
)
|
|
135
120
|
# Later the SSE for virtual residues are removed again
|
|
136
121
|
# via this mask
|
|
@@ -139,73 +124,74 @@ def annotate_sse(atom_array, chain_id=None):
|
|
|
139
124
|
|
|
140
125
|
length = len(ca_coord)
|
|
141
126
|
|
|
142
|
-
|
|
143
127
|
# The distances and angles are not defined for the entire interval,
|
|
144
128
|
# therefore the indices do not have the full range
|
|
145
129
|
# Values that are not defined are NaN
|
|
146
130
|
d2i = np.full(length, np.nan)
|
|
147
131
|
d3i = np.full(length, np.nan)
|
|
148
132
|
d4i = np.full(length, np.nan)
|
|
149
|
-
ri
|
|
150
|
-
ai
|
|
151
|
-
|
|
152
|
-
d2i[1 : length-1] = distance(ca_coord[0 : length-2], ca_coord[2
|
|
153
|
-
d3i[1 : length-2] = distance(ca_coord[0 : length-3], ca_coord[3
|
|
154
|
-
d4i[1 : length-3] = distance(ca_coord[0 : length-4], ca_coord[4
|
|
155
|
-
ri[1 : length-1]
|
|
156
|
-
ca_coord[0 : length-2],
|
|
157
|
-
ca_coord[1 : length-1],
|
|
158
|
-
ca_coord[2 : length]
|
|
133
|
+
ri = np.full(length, np.nan)
|
|
134
|
+
ai = np.full(length, np.nan)
|
|
135
|
+
|
|
136
|
+
d2i[1 : length - 1] = distance(ca_coord[0 : length - 2], ca_coord[2:length])
|
|
137
|
+
d3i[1 : length - 2] = distance(ca_coord[0 : length - 3], ca_coord[3:length])
|
|
138
|
+
d4i[1 : length - 3] = distance(ca_coord[0 : length - 4], ca_coord[4:length])
|
|
139
|
+
ri[1 : length - 1] = angle(
|
|
140
|
+
ca_coord[0 : length - 2], ca_coord[1 : length - 1], ca_coord[2:length]
|
|
159
141
|
)
|
|
160
|
-
ai[1 : length-2] = dihedral(
|
|
161
|
-
ca_coord[0 : length-3],
|
|
162
|
-
ca_coord[1 : length-2],
|
|
163
|
-
ca_coord[2 : length-1],
|
|
164
|
-
ca_coord[3 : length-0]
|
|
142
|
+
ai[1 : length - 2] = dihedral(
|
|
143
|
+
ca_coord[0 : length - 3],
|
|
144
|
+
ca_coord[1 : length - 2],
|
|
145
|
+
ca_coord[2 : length - 1],
|
|
146
|
+
ca_coord[3 : length - 0],
|
|
165
147
|
)
|
|
166
|
-
|
|
148
|
+
|
|
167
149
|
# Find CA that meet criteria for potential helices and strands
|
|
168
|
-
relaxed_helix = (
|
|
169
|
-
(
|
|
170
|
-
) | (
|
|
171
|
-
(ri >= _r_helix[0] ) & ( ri <= _r_helix[1])
|
|
150
|
+
relaxed_helix = ((d3i >= _d3_helix[0]) & (d3i <= _d3_helix[1])) | (
|
|
151
|
+
(ri >= _r_helix[0]) & (ri <= _r_helix[1])
|
|
172
152
|
)
|
|
173
153
|
strict_helix = (
|
|
174
|
-
(d3i >= _d3_helix[0])
|
|
175
|
-
|
|
154
|
+
(d3i >= _d3_helix[0])
|
|
155
|
+
& (d3i <= _d3_helix[1])
|
|
156
|
+
& (d4i >= _d4_helix[0])
|
|
157
|
+
& (d4i <= _d4_helix[1])
|
|
176
158
|
) | (
|
|
177
|
-
(ri
|
|
178
|
-
|
|
159
|
+
(ri >= _r_helix[0])
|
|
160
|
+
& (ri <= _r_helix[1])
|
|
161
|
+
& (ai >= _a_helix[0])
|
|
162
|
+
& (ai <= _a_helix[1])
|
|
179
163
|
)
|
|
180
164
|
|
|
181
165
|
relaxed_strand = (d3i >= _d3_strand[0]) & (d3i <= _d3_strand[1])
|
|
182
166
|
strict_strand = (
|
|
183
|
-
(d2i >= _d2_strand[0])
|
|
184
|
-
|
|
185
|
-
(
|
|
167
|
+
(d2i >= _d2_strand[0])
|
|
168
|
+
& (d2i <= _d2_strand[1])
|
|
169
|
+
& (d3i >= _d3_strand[0])
|
|
170
|
+
& (d3i <= _d3_strand[1])
|
|
171
|
+
& (d4i >= _d4_strand[0])
|
|
172
|
+
& (d4i <= _d4_strand[1])
|
|
186
173
|
) | (
|
|
187
|
-
(ri
|
|
188
|
-
(
|
|
174
|
+
(ri >= _r_strand[0])
|
|
175
|
+
& (ri <= _r_strand[1])
|
|
176
|
+
& (
|
|
189
177
|
# Account for periodic boundary of dihedral angle
|
|
190
|
-
((ai
|
|
191
|
-
((ai
|
|
178
|
+
((ai >= _a_strand[0]) & (ai <= _a_strand[1]))
|
|
179
|
+
| ((ai >= _a_strand[2]) & (ai <= _a_strand[3]))
|
|
192
180
|
)
|
|
193
181
|
)
|
|
194
182
|
|
|
195
|
-
|
|
196
183
|
helix_mask = _mask_consecutive(strict_helix, 5)
|
|
197
184
|
helix_mask = _extend_region(helix_mask, relaxed_helix)
|
|
198
|
-
|
|
185
|
+
|
|
199
186
|
strand_mask = _mask_consecutive(strict_strand, 4)
|
|
200
187
|
short_strand_mask = _mask_regions_with_contacts(
|
|
201
188
|
ca_coord,
|
|
202
189
|
_mask_consecutive(strict_strand, 3),
|
|
203
|
-
min_contacts=5,
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
strand_mask | short_strand_mask, relaxed_strand
|
|
190
|
+
min_contacts=5,
|
|
191
|
+
min_distance=4.2,
|
|
192
|
+
max_distance=5.2,
|
|
207
193
|
)
|
|
208
|
-
|
|
194
|
+
strand_mask = _extend_region(strand_mask | short_strand_mask, relaxed_strand)
|
|
209
195
|
|
|
210
196
|
sse = np.full(length, "c", dtype="U1")
|
|
211
197
|
sse[helix_mask] = "a"
|
|
@@ -215,7 +201,7 @@ def annotate_sse(atom_array, chain_id=None):
|
|
|
215
201
|
sse[np.isnan(ca_coord).any(axis=-1)] = ""
|
|
216
202
|
# Remove SSE for virtual atoms and return
|
|
217
203
|
return sse[no_virtual_mask]
|
|
218
|
-
|
|
204
|
+
|
|
219
205
|
|
|
220
206
|
def _mask_consecutive(mask, number):
|
|
221
207
|
"""
|
|
@@ -228,17 +214,17 @@ def _mask_consecutive(mask, number):
|
|
|
228
214
|
# if it and the following `number-1` elements are True
|
|
229
215
|
# The elements `mask[-(number-1):]` cannot have the sufficient count
|
|
230
216
|
# by this definition, as they are at the end of the array
|
|
231
|
-
counts = np.zeros(len(mask) - (number-1), dtype=int)
|
|
217
|
+
counts = np.zeros(len(mask) - (number - 1), dtype=int)
|
|
232
218
|
for i in range(number):
|
|
233
219
|
counts[mask[i : i + len(counts)]] += 1
|
|
234
|
-
consecutive_seed =
|
|
235
|
-
|
|
220
|
+
consecutive_seed = counts == number
|
|
221
|
+
|
|
236
222
|
# Not only that element, but also the
|
|
237
223
|
# following `number-1` elements are in a consecutive region
|
|
238
224
|
consecutive_mask = np.zeros(len(mask), dtype=bool)
|
|
239
225
|
for i in range(number):
|
|
240
226
|
consecutive_mask[i : i + len(consecutive_seed)] |= consecutive_seed
|
|
241
|
-
|
|
227
|
+
|
|
242
228
|
return consecutive_mask
|
|
243
229
|
|
|
244
230
|
|
|
@@ -253,7 +239,7 @@ def _extend_region(base_condition_mask, extension_condition_mask):
|
|
|
253
239
|
# Prepend absent region to the start to capture the event,
|
|
254
240
|
# that the first element is already the start of a region
|
|
255
241
|
region_change_mask = np.diff(np.append([False], base_condition_mask))
|
|
256
|
-
|
|
242
|
+
|
|
257
243
|
# These masks point to the first `False` element
|
|
258
244
|
# left and right of a 'True' region
|
|
259
245
|
# The left end is the element before the first element of a 'True' region
|
|
@@ -262,7 +248,7 @@ def _extend_region(base_condition_mask, extension_condition_mask):
|
|
|
262
248
|
left_end_mask = np.append(left_end_mask[1:], [False])
|
|
263
249
|
# The right end is first element of a 'False' region
|
|
264
250
|
right_end_mask = region_change_mask & ~base_condition_mask
|
|
265
|
-
|
|
251
|
+
|
|
266
252
|
# The 'base_condition_mask' gets additional 'True' elements
|
|
267
253
|
# at left or right ends, which meet the extension criterion
|
|
268
254
|
return base_condition_mask | (
|
|
@@ -270,8 +256,9 @@ def _extend_region(base_condition_mask, extension_condition_mask):
|
|
|
270
256
|
)
|
|
271
257
|
|
|
272
258
|
|
|
273
|
-
def _mask_regions_with_contacts(
|
|
274
|
-
|
|
259
|
+
def _mask_regions_with_contacts(
|
|
260
|
+
coord, candidate_mask, min_contacts, min_distance, max_distance
|
|
261
|
+
):
|
|
275
262
|
"""
|
|
276
263
|
Mask regions of `candidate_mask` that have at least `min_contacts`
|
|
277
264
|
contacts with `coord` in the range `min_distance` to `max_distance`.
|
|
@@ -281,47 +268,41 @@ def _mask_regions_with_contacts(coord, candidate_mask,
|
|
|
281
268
|
# No potential contacts -> no contacts
|
|
282
269
|
# -> no residue can satisfy 'min_contacts'
|
|
283
270
|
return np.zeros(len(candidate_mask), dtype=bool)
|
|
284
|
-
|
|
285
|
-
cell_list = CellList(
|
|
286
|
-
potential_contact_coord, max_distance
|
|
287
|
-
)
|
|
271
|
+
|
|
272
|
+
cell_list = CellList(potential_contact_coord, max_distance)
|
|
288
273
|
# For each candidate position,
|
|
289
274
|
# get all contacts within maximum distance
|
|
290
275
|
all_within_max_dist_indices = cell_list.get_atoms(
|
|
291
276
|
coord[candidate_mask], max_distance
|
|
292
277
|
)
|
|
293
|
-
|
|
278
|
+
|
|
294
279
|
contacts = np.zeros(len(coord), dtype=int)
|
|
295
280
|
for i, atom_index in enumerate(np.where(candidate_mask)[0]):
|
|
296
281
|
within_max_dist_indices = all_within_max_dist_indices[i]
|
|
297
282
|
# Remove padding values
|
|
298
|
-
within_max_dist_indices = within_max_dist_indices[
|
|
299
|
-
|
|
300
|
-
]
|
|
301
|
-
# Now count all contacts within maximum distance
|
|
283
|
+
within_max_dist_indices = within_max_dist_indices[within_max_dist_indices != -1]
|
|
284
|
+
# Now count all contacts within maximum distance
|
|
302
285
|
# that also satisfy the minimum distance
|
|
303
286
|
contacts[atom_index] = np.count_nonzero(
|
|
304
287
|
distance(
|
|
305
|
-
coord[atom_index],
|
|
306
|
-
|
|
307
|
-
|
|
288
|
+
coord[atom_index], potential_contact_coord[within_max_dist_indices]
|
|
289
|
+
)
|
|
290
|
+
> min_distance
|
|
308
291
|
)
|
|
309
|
-
|
|
292
|
+
|
|
310
293
|
# Count the number of contacts per region
|
|
311
294
|
# These indices mark the start of either a 'True' or 'False' region
|
|
312
295
|
# Prepend absent region to the start to capture the event,
|
|
313
296
|
# that the first element is already the start of a region
|
|
314
|
-
region_change_indices = np.where(
|
|
315
|
-
np.diff(np.append([False], candidate_mask))
|
|
316
|
-
)[0]
|
|
297
|
+
region_change_indices = np.where(np.diff(np.append([False], candidate_mask)))[0]
|
|
317
298
|
# Add exclusive stop
|
|
318
299
|
region_change_indices = np.append(region_change_indices, [len(coord)])
|
|
319
300
|
output_mask = np.zeros(len(candidate_mask), dtype=bool)
|
|
320
301
|
for i in range(len(region_change_indices) - 1):
|
|
321
302
|
start = region_change_indices[i]
|
|
322
|
-
stop = region_change_indices[i+1]
|
|
323
|
-
total_contacts = np.sum(contacts[start
|
|
303
|
+
stop = region_change_indices[i + 1]
|
|
304
|
+
total_contacts = np.sum(contacts[start:stop])
|
|
324
305
|
if total_contacts >= min_contacts:
|
|
325
|
-
output_mask[start
|
|
326
|
-
|
|
327
|
-
return output_mask
|
|
306
|
+
output_mask[start:stop] = True
|
|
307
|
+
|
|
308
|
+
return output_mask
|
biotite/structure/superimpose.py
CHANGED
|
@@ -8,19 +8,22 @@ This module provides functions for structure superimposition.
|
|
|
8
8
|
|
|
9
9
|
__name__ = "biotite.structure"
|
|
10
10
|
__author__ = "Patrick Kunzmann, Claude J. Rogers"
|
|
11
|
-
__all__ = [
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
__all__ = [
|
|
12
|
+
"superimpose",
|
|
13
|
+
"superimpose_homologs",
|
|
14
|
+
"superimpose_without_outliers",
|
|
15
|
+
"AffineTransformation",
|
|
16
|
+
]
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
import numpy as np
|
|
17
|
-
from .
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
20
|
+
from biotite.sequence.align import SubstitutionMatrix, align_optimal, get_codes
|
|
21
|
+
from biotite.sequence.alphabet import common_alphabet
|
|
22
|
+
from biotite.sequence.seqtypes import ProteinSequence
|
|
23
|
+
from biotite.structure.atoms import coord
|
|
24
|
+
from biotite.structure.filter import filter_amino_acids, filter_nucleotides
|
|
25
|
+
from biotite.structure.geometry import centroid, distance
|
|
26
|
+
from biotite.structure.sequence import to_sequence
|
|
24
27
|
|
|
25
28
|
|
|
26
29
|
class AffineTransformation:
|
|
@@ -45,12 +48,12 @@ class AffineTransformation:
|
|
|
45
48
|
The dimensions are always expanded to *(m,3)* or *(m,3,3)*,
|
|
46
49
|
respectively.
|
|
47
50
|
"""
|
|
51
|
+
|
|
48
52
|
def __init__(self, center_translation, rotation, target_translation):
|
|
49
53
|
self.center_translation = _expand_dims(center_translation, 2)
|
|
50
54
|
self.rotation = _expand_dims(rotation, 3)
|
|
51
55
|
self.target_translation = _expand_dims(target_translation, 2)
|
|
52
56
|
|
|
53
|
-
|
|
54
57
|
def apply(self, atoms):
|
|
55
58
|
"""
|
|
56
59
|
Apply this transformation on the given structure.
|
|
@@ -118,7 +121,6 @@ class AffineTransformation:
|
|
|
118
121
|
superimposed.coord = superimposed_coord
|
|
119
122
|
return superimposed
|
|
120
123
|
|
|
121
|
-
|
|
122
124
|
def as_matrix(self):
|
|
123
125
|
"""
|
|
124
126
|
Get the translations and rotation as a combined 4x4
|
|
@@ -316,16 +318,19 @@ def superimpose(fixed, mobile, atom_mask=None):
|
|
|
316
318
|
mob_centered_filtered = mob_filtered - mob_centroid[:, np.newaxis, :]
|
|
317
319
|
fix_centered_filtered = fix_filtered - fix_centroid[:, np.newaxis, :]
|
|
318
320
|
|
|
319
|
-
rotation = _get_rotation_matrices(
|
|
320
|
-
fix_centered_filtered, mob_centered_filtered
|
|
321
|
-
)
|
|
321
|
+
rotation = _get_rotation_matrices(fix_centered_filtered, mob_centered_filtered)
|
|
322
322
|
transform = AffineTransformation(-mob_centroid, rotation, fix_centroid)
|
|
323
323
|
return transform.apply(mobile), transform
|
|
324
324
|
|
|
325
325
|
|
|
326
|
-
def superimpose_without_outliers(
|
|
327
|
-
|
|
328
|
-
|
|
326
|
+
def superimpose_without_outliers(
|
|
327
|
+
fixed,
|
|
328
|
+
mobile,
|
|
329
|
+
min_anchors=3,
|
|
330
|
+
max_iterations=10,
|
|
331
|
+
quantiles=(0.25, 0.75),
|
|
332
|
+
outlier_threshold=1.5,
|
|
333
|
+
):
|
|
329
334
|
r"""
|
|
330
335
|
Superimpose structures onto a fixed structure, ignoring
|
|
331
336
|
conformational outliers.
|
|
@@ -458,8 +463,9 @@ def superimpose_without_outliers(fixed, mobile, min_anchors=3,
|
|
|
458
463
|
return transform.apply(mobile), transform, anchor_indices
|
|
459
464
|
|
|
460
465
|
|
|
461
|
-
def superimpose_homologs(
|
|
462
|
-
|
|
466
|
+
def superimpose_homologs(
|
|
467
|
+
fixed, mobile, substitution_matrix=None, gap_penalty=-10, min_anchors=3, **kwargs
|
|
468
|
+
):
|
|
463
469
|
r"""
|
|
464
470
|
Superimpose one protein or nucleotide chain onto another one,
|
|
465
471
|
considering sequence differences and conformational outliers.
|
|
@@ -530,8 +536,8 @@ def superimpose_homologs(fixed, mobile, substitution_matrix=None,
|
|
|
530
536
|
fixed_anchor_indices = _get_backbone_anchor_indices(fixed)
|
|
531
537
|
mobile_anchor_indices = _get_backbone_anchor_indices(mobile)
|
|
532
538
|
if (
|
|
533
|
-
len(fixed_anchor_indices) < min_anchors
|
|
534
|
-
len(mobile_anchor_indices) < min_anchors
|
|
539
|
+
len(fixed_anchor_indices) < min_anchors
|
|
540
|
+
or len(mobile_anchor_indices) < min_anchors
|
|
535
541
|
):
|
|
536
542
|
raise ValueError(
|
|
537
543
|
"Structures have too few CA atoms for required number of anchors"
|
|
@@ -562,7 +568,7 @@ def superimpose_homologs(fixed, mobile, substitution_matrix=None,
|
|
|
562
568
|
fixed[..., fixed_anchor_indices],
|
|
563
569
|
mobile[..., mobile_anchor_indices],
|
|
564
570
|
min_anchors,
|
|
565
|
-
**kwargs
|
|
571
|
+
**kwargs,
|
|
566
572
|
)
|
|
567
573
|
fixed_anchor_indices = fixed_anchor_indices[selected_anchor_indices]
|
|
568
574
|
mobile_anchor_indices = mobile_anchor_indices[selected_anchor_indices]
|
|
@@ -575,54 +581,18 @@ def superimpose_homologs(fixed, mobile, substitution_matrix=None,
|
|
|
575
581
|
)
|
|
576
582
|
|
|
577
583
|
|
|
578
|
-
def superimpose_apply(atoms, transformation):
|
|
579
|
-
"""
|
|
580
|
-
Superimpose structures using a given :class:`AffineTransformation`.
|
|
581
|
-
|
|
582
|
-
The :class:`AffineTransformation` can be obtained by prior
|
|
583
|
-
superimposition.
|
|
584
|
-
|
|
585
|
-
DEPRECATED: Use :func:`AffineTransformation.apply()` instead.
|
|
586
|
-
|
|
587
|
-
Parameters
|
|
588
|
-
----------
|
|
589
|
-
atoms : AtomArray or ndarray, shape(n,), dtype=float
|
|
590
|
-
The structure to apply the transformation on.
|
|
591
|
-
Alternatively coordinates can be given.
|
|
592
|
-
transformation: AffineTransformation
|
|
593
|
-
The transformation, obtained by :func:`superimpose()`.
|
|
594
|
-
|
|
595
|
-
Returns
|
|
596
|
-
-------
|
|
597
|
-
fitted : AtomArray or AtomArrayStack
|
|
598
|
-
A copy of the `atoms` structure,
|
|
599
|
-
with transformations applied.
|
|
600
|
-
Only coordinates are returned, if coordinates were given in
|
|
601
|
-
`atoms`.
|
|
602
|
-
|
|
603
|
-
See Also
|
|
604
|
-
--------
|
|
605
|
-
superimpose
|
|
606
|
-
"""
|
|
607
|
-
return transformation.apply(atoms)
|
|
608
|
-
|
|
609
|
-
|
|
610
584
|
def _reshape_to_3d(coord):
|
|
611
585
|
"""
|
|
612
586
|
Reshape the coordinate array to 3D, if it is 2D.
|
|
613
587
|
"""
|
|
614
588
|
if coord.ndim < 2:
|
|
615
|
-
raise ValueError(
|
|
616
|
-
"Coordinates must be at least two-dimensional"
|
|
617
|
-
)
|
|
589
|
+
raise ValueError("Coordinates must be at least two-dimensional")
|
|
618
590
|
if coord.ndim == 2:
|
|
619
591
|
return coord[np.newaxis, ...]
|
|
620
592
|
elif coord.ndim == 3:
|
|
621
593
|
return coord
|
|
622
594
|
else:
|
|
623
|
-
raise ValueError(
|
|
624
|
-
"Coordinates must be at most three-dimensional"
|
|
625
|
-
)
|
|
595
|
+
raise ValueError("Coordinates must be at most three-dimensional")
|
|
626
596
|
|
|
627
597
|
|
|
628
598
|
def _get_rotation_matrices(fixed, mobile):
|
|
@@ -634,10 +604,10 @@ def _get_rotation_matrices(fixed, mobile):
|
|
|
634
604
|
Both sets of coordinates must already be centered at origin.
|
|
635
605
|
"""
|
|
636
606
|
# Calculate cross-covariance matrices
|
|
637
|
-
cov = np.sum(fixed[
|
|
607
|
+
cov = np.sum(fixed[:, :, :, np.newaxis] * mobile[:, :, np.newaxis, :], axis=1)
|
|
638
608
|
v, s, w = np.linalg.svd(cov)
|
|
639
609
|
# Remove possibility of reflected atom coordinates
|
|
640
|
-
reflected_mask =
|
|
610
|
+
reflected_mask = np.linalg.det(v) * np.linalg.det(w) < 0
|
|
641
611
|
v[reflected_mask, :, -1] *= -1
|
|
642
612
|
matrices = np.matmul(v, w)
|
|
643
613
|
return matrices
|
|
@@ -649,11 +619,7 @@ def _multi_matmul(matrices, vectors):
|
|
|
649
619
|
with m x n vectors.
|
|
650
620
|
"""
|
|
651
621
|
return np.transpose(
|
|
652
|
-
np.matmul(
|
|
653
|
-
matrices,
|
|
654
|
-
np.transpose(vectors, axes=(0, 2, 1))
|
|
655
|
-
),
|
|
656
|
-
axes=(0, 2, 1)
|
|
622
|
+
np.matmul(matrices, np.transpose(vectors, axes=(0, 2, 1))), axes=(0, 2, 1)
|
|
657
623
|
)
|
|
658
624
|
|
|
659
625
|
|
|
@@ -663,8 +629,8 @@ def _get_backbone_anchor_indices(atoms):
|
|
|
663
629
|
nucleotide and return their indices.
|
|
664
630
|
"""
|
|
665
631
|
return np.where(
|
|
666
|
-
((filter_amino_acids(atoms)) & (atoms.atom_name == "CA"))
|
|
667
|
-
((filter_nucleotides(atoms)) & (atoms.atom_name == "P"))
|
|
632
|
+
((filter_amino_acids(atoms)) & (atoms.atom_name == "CA"))
|
|
633
|
+
| ((filter_nucleotides(atoms)) & (atoms.atom_name == "P"))
|
|
668
634
|
)[0]
|
|
669
635
|
|
|
670
636
|
|
|
@@ -717,11 +683,7 @@ def _find_matching_anchors(
|
|
|
717
683
|
def _to_sequence(atoms):
|
|
718
684
|
sequences, _ = to_sequence(atoms, allow_hetero=True)
|
|
719
685
|
if len(sequences) == 0:
|
|
720
|
-
raise ValueError(
|
|
721
|
-
"Structure does not contain any amino acids or nucleotides"
|
|
722
|
-
)
|
|
686
|
+
raise ValueError("Structure does not contain any amino acids or nucleotides")
|
|
723
687
|
if len(sequences) > 1:
|
|
724
|
-
raise ValueError(
|
|
725
|
-
|
|
726
|
-
)
|
|
727
|
-
return sequences[0]
|
|
688
|
+
raise ValueError("Structure contains multiple chains, but only one is allowed")
|
|
689
|
+
return sequences[0]
|