biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/chains.py
CHANGED
|
@@ -9,22 +9,38 @@ atom level.
|
|
|
9
9
|
|
|
10
10
|
__name__ = "biotite.structure"
|
|
11
11
|
__author__ = "Patrick Kunzmann"
|
|
12
|
-
__all__ = [
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
__all__ = [
|
|
13
|
+
"get_chain_starts",
|
|
14
|
+
"apply_chain_wise",
|
|
15
|
+
"spread_chain_wise",
|
|
16
|
+
"get_chain_masks",
|
|
17
|
+
"get_chain_starts_for",
|
|
18
|
+
"get_chain_positions",
|
|
19
|
+
"chain_iter",
|
|
20
|
+
"get_chains",
|
|
21
|
+
"get_chain_count",
|
|
22
|
+
"chain_iter",
|
|
23
|
+
]
|
|
15
24
|
|
|
16
25
|
import numpy as np
|
|
17
|
-
from .
|
|
26
|
+
from biotite.structure.segments import (
|
|
27
|
+
apply_segment_wise,
|
|
28
|
+
get_segment_masks,
|
|
29
|
+
get_segment_positions,
|
|
30
|
+
get_segment_starts_for,
|
|
31
|
+
segment_iter,
|
|
32
|
+
spread_segment_wise,
|
|
33
|
+
)
|
|
18
34
|
|
|
19
35
|
|
|
20
36
|
def get_chain_starts(array, add_exclusive_stop=False):
|
|
21
37
|
"""
|
|
22
38
|
Get the indices in an atom array, which indicates the beginning of
|
|
23
39
|
a new chain.
|
|
24
|
-
|
|
40
|
+
|
|
25
41
|
A new chain starts, when the chain ID changes or when the residue ID
|
|
26
42
|
decreases.
|
|
27
|
-
|
|
43
|
+
|
|
28
44
|
Parameters
|
|
29
45
|
----------
|
|
30
46
|
array : AtomArray or AtomArrayStack
|
|
@@ -33,17 +49,17 @@ def get_chain_starts(array, add_exclusive_stop=False):
|
|
|
33
49
|
If true, the exclusive stop of the input atom array, i.e.
|
|
34
50
|
``array.array_length()``, is added to the returned array of
|
|
35
51
|
start indices as last element.
|
|
36
|
-
|
|
52
|
+
|
|
37
53
|
Returns
|
|
38
54
|
-------
|
|
39
55
|
starts : ndarray, dtype=int
|
|
40
56
|
The start indices of new chains in `array`.
|
|
41
|
-
|
|
57
|
+
|
|
42
58
|
Notes
|
|
43
59
|
-----
|
|
44
60
|
This method is internally used by all other chain-related
|
|
45
61
|
functions.
|
|
46
|
-
|
|
62
|
+
|
|
47
63
|
See also
|
|
48
64
|
--------
|
|
49
65
|
get_residue_starts
|
|
@@ -51,13 +67,13 @@ def get_chain_starts(array, add_exclusive_stop=False):
|
|
|
51
67
|
diff = np.diff(array.res_id)
|
|
52
68
|
res_id_decrement = diff < 0
|
|
53
69
|
# This mask is 'true' at indices where the value changes
|
|
54
|
-
chain_id_changes =
|
|
55
|
-
|
|
70
|
+
chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
|
|
71
|
+
|
|
56
72
|
# Convert mask to indices
|
|
57
73
|
# Add 1, to shift the indices from the end of a chain
|
|
58
74
|
# to the start of a new chain
|
|
59
75
|
chain_starts = np.where(res_id_decrement | chain_id_changes)[0] + 1
|
|
60
|
-
|
|
76
|
+
|
|
61
77
|
# The first chain is not included yet -> Insert '[0]'
|
|
62
78
|
if add_exclusive_stop:
|
|
63
79
|
return np.concatenate(([0], chain_starts, [array.array_length()]))
|
|
@@ -69,7 +85,7 @@ def apply_chain_wise(array, data, function, axis=None):
|
|
|
69
85
|
"""
|
|
70
86
|
Apply a function to intervals of data, where each interval
|
|
71
87
|
corresponds to one chain.
|
|
72
|
-
|
|
88
|
+
|
|
73
89
|
The function takes an atom array (stack) and an data array
|
|
74
90
|
(`ndarray`) of the same length. The function iterates through the
|
|
75
91
|
chain IDs of the atom array (stack) and identifies intervals of
|
|
@@ -77,8 +93,8 @@ def apply_chain_wise(array, data, function, axis=None):
|
|
|
77
93
|
partitioned into the same intervals, and each interval (also an
|
|
78
94
|
:class:`ndarray`) is put as parameter into `function`. Each return value is
|
|
79
95
|
stored as element in the resulting :class:`ndarray`, therefore each element
|
|
80
|
-
corresponds to one chain.
|
|
81
|
-
|
|
96
|
+
corresponds to one chain.
|
|
97
|
+
|
|
82
98
|
Parameters
|
|
83
99
|
----------
|
|
84
100
|
array : AtomArray or AtomArrayStack
|
|
@@ -92,14 +108,14 @@ def apply_chain_wise(array, data, function, axis=None):
|
|
|
92
108
|
must return a value with the same shape and data type.
|
|
93
109
|
axis : int, optional
|
|
94
110
|
This value is given to the `axis` parameter of `function`.
|
|
95
|
-
|
|
111
|
+
|
|
96
112
|
Returns
|
|
97
113
|
-------
|
|
98
114
|
processed_data : ndarray
|
|
99
115
|
Chain-wise evaluation of `data` by `function`. The size of the
|
|
100
116
|
first dimension of this array is equal to the amount of
|
|
101
117
|
chains.
|
|
102
|
-
|
|
118
|
+
|
|
103
119
|
See also
|
|
104
120
|
--------
|
|
105
121
|
apply_residue_wise
|
|
@@ -114,11 +130,11 @@ def spread_chain_wise(array, input_data):
|
|
|
114
130
|
|
|
115
131
|
Each value in the chain-wise input is assigned to all atoms of
|
|
116
132
|
this chain:
|
|
117
|
-
|
|
133
|
+
|
|
118
134
|
``output_data[i] = input_data[j]``,
|
|
119
135
|
*i* is incremented from atom to atom,
|
|
120
136
|
*j* is incremented every chain change.
|
|
121
|
-
|
|
137
|
+
|
|
122
138
|
Parameters
|
|
123
139
|
----------
|
|
124
140
|
array : AtomArray or AtomArrayStack
|
|
@@ -126,13 +142,13 @@ def spread_chain_wise(array, input_data):
|
|
|
126
142
|
input_data : ndarray
|
|
127
143
|
The data to be spread. The length of axis=0 must be equal to
|
|
128
144
|
the amount of different chain IDs in `array`.
|
|
129
|
-
|
|
145
|
+
|
|
130
146
|
Returns
|
|
131
147
|
-------
|
|
132
148
|
output_data : ndarray
|
|
133
149
|
Chain-wise spread `input_data`. Length is the same as
|
|
134
150
|
`array_length()` of `array`.
|
|
135
|
-
|
|
151
|
+
|
|
136
152
|
See also
|
|
137
153
|
--------
|
|
138
154
|
spread_residue_wise
|
|
@@ -154,14 +170,14 @@ def get_chain_masks(array, indices):
|
|
|
154
170
|
These indices indicate the atoms to get the corresponding
|
|
155
171
|
chains for.
|
|
156
172
|
Negative indices are not allowed.
|
|
157
|
-
|
|
173
|
+
|
|
158
174
|
Returns
|
|
159
175
|
-------
|
|
160
176
|
chains_masks : ndarray, dtype=bool, shape=(k,n)
|
|
161
177
|
Multiple boolean masks, one for each given index in `indices`.
|
|
162
178
|
Each array masks the atoms that belong to the same chain as
|
|
163
179
|
the atom at the given index.
|
|
164
|
-
|
|
180
|
+
|
|
165
181
|
See also
|
|
166
182
|
--------
|
|
167
183
|
get_residue_masks
|
|
@@ -183,13 +199,13 @@ def get_chain_starts_for(array, indices):
|
|
|
183
199
|
These indices point to the atoms to get the corresponding
|
|
184
200
|
chain starts for.
|
|
185
201
|
Negative indices are not allowed.
|
|
186
|
-
|
|
202
|
+
|
|
187
203
|
Returns
|
|
188
204
|
-------
|
|
189
205
|
start_indices : ndarray, dtype=int, shape=(k,)
|
|
190
206
|
The indices that point to the chain starts for the input
|
|
191
207
|
`indices`.
|
|
192
|
-
|
|
208
|
+
|
|
193
209
|
See also
|
|
194
210
|
--------
|
|
195
211
|
get_residue_starts_for
|
|
@@ -214,12 +230,12 @@ def get_chain_positions(array, indices):
|
|
|
214
230
|
These indices point to the atoms to get the corresponding
|
|
215
231
|
chain positions for.
|
|
216
232
|
Negative indices are not allowed.
|
|
217
|
-
|
|
233
|
+
|
|
218
234
|
Returns
|
|
219
235
|
-------
|
|
220
236
|
start_indices : ndarray, dtype=int, shape=(k,)
|
|
221
237
|
The indices that point to the position of the chains.
|
|
222
|
-
|
|
238
|
+
|
|
223
239
|
See also
|
|
224
240
|
--------
|
|
225
241
|
get_residue_positions
|
|
@@ -231,20 +247,20 @@ def get_chain_positions(array, indices):
|
|
|
231
247
|
def get_chains(array):
|
|
232
248
|
"""
|
|
233
249
|
Get the chain IDs of an atom array (stack).
|
|
234
|
-
|
|
250
|
+
|
|
235
251
|
The chains are listed in the same order they occur in the array
|
|
236
252
|
(stack).
|
|
237
|
-
|
|
253
|
+
|
|
238
254
|
Parameters
|
|
239
255
|
----------
|
|
240
256
|
array : AtomArray or AtomArrayStack
|
|
241
257
|
The atom array (stack), where the chains are determined.
|
|
242
|
-
|
|
258
|
+
|
|
243
259
|
Returns
|
|
244
260
|
-------
|
|
245
261
|
ids : ndarray, dtype=str
|
|
246
262
|
List of chain IDs.
|
|
247
|
-
|
|
263
|
+
|
|
248
264
|
See also
|
|
249
265
|
--------
|
|
250
266
|
get_residues
|
|
@@ -255,20 +271,20 @@ def get_chains(array):
|
|
|
255
271
|
def get_chain_count(array):
|
|
256
272
|
"""
|
|
257
273
|
Get the amount of chains in an atom array (stack).
|
|
258
|
-
|
|
274
|
+
|
|
259
275
|
The count is determined from the `chain_id` annotation.
|
|
260
276
|
Each time the chain ID changes, the count is incremented.
|
|
261
|
-
|
|
277
|
+
|
|
262
278
|
Parameters
|
|
263
279
|
----------
|
|
264
280
|
array : AtomArray or AtomArrayStack
|
|
265
281
|
The atom array (stack), where the chains are counted.
|
|
266
|
-
|
|
282
|
+
|
|
267
283
|
Returns
|
|
268
284
|
-------
|
|
269
285
|
count : int
|
|
270
286
|
Amount of chains.
|
|
271
|
-
|
|
287
|
+
|
|
272
288
|
See also
|
|
273
289
|
--------
|
|
274
290
|
get_residue_count
|
|
@@ -279,20 +295,20 @@ def get_chain_count(array):
|
|
|
279
295
|
def chain_iter(array):
|
|
280
296
|
"""
|
|
281
297
|
Iterate over all chains in an atom array (stack).
|
|
282
|
-
|
|
298
|
+
|
|
283
299
|
Parameters
|
|
284
300
|
----------
|
|
285
301
|
array : AtomArray or AtomArrayStack
|
|
286
302
|
The atom array (stack) to iterate over.
|
|
287
|
-
|
|
303
|
+
|
|
288
304
|
Yields
|
|
289
305
|
------
|
|
290
306
|
chain : AtomArray or AtomArrayStack
|
|
291
307
|
A single chain of the input `array`.
|
|
292
|
-
|
|
308
|
+
|
|
293
309
|
See also
|
|
294
310
|
--------
|
|
295
311
|
residue_iter
|
|
296
312
|
"""
|
|
297
313
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
298
|
-
return segment_iter(array, starts)
|
|
314
|
+
return segment_iter(array, starts)
|
|
Binary file
|
biotite/structure/compare.py
CHANGED
|
@@ -12,21 +12,21 @@ __author__ = "Patrick Kunzmann"
|
|
|
12
12
|
__all__ = ["rmsd", "rmspd", "rmsf", "average"]
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
15
|
-
from .atoms import
|
|
16
|
-
from .geometry import index_distance
|
|
17
|
-
from .util import vector_dot
|
|
15
|
+
from biotite.structure.atoms import AtomArrayStack, coord
|
|
16
|
+
from biotite.structure.geometry import index_distance
|
|
17
|
+
from biotite.structure.util import vector_dot
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def rmsd(reference, subject):
|
|
21
21
|
r"""
|
|
22
22
|
Calculate the RMSD between two structures.
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
The *root-mean-square-deviation* (RMSD) indicates the overall
|
|
25
25
|
deviation of each model of a structure to a reference structure.
|
|
26
26
|
It is defined as:
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
.. math:: RMSD = \sqrt{ \frac{1}{n} \sum\limits_{i=1}^n (x_i - x_{ref,i})^2}
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
Parameters
|
|
31
31
|
----------
|
|
32
32
|
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
@@ -37,7 +37,7 @@ def rmsd(reference, subject):
|
|
|
37
37
|
Structure(s) to be compared with `reference`.
|
|
38
38
|
Alternatively, coordinates can be provided directly as
|
|
39
39
|
:class:`ndarray`.
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
Returns
|
|
42
42
|
-------
|
|
43
43
|
rmsd : float or ndarray, dtype=float, shape=(m,)
|
|
@@ -45,7 +45,7 @@ def rmsd(reference, subject):
|
|
|
45
45
|
If subject is an :class:`AtomArray` a float is returned.
|
|
46
46
|
If subject is an :class:`AtomArrayStack` a :class:`ndarray`
|
|
47
47
|
containing the RMSD for each model is returned.
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
See Also
|
|
50
50
|
--------
|
|
51
51
|
rmsf
|
|
@@ -71,16 +71,17 @@ def rmsd(reference, subject):
|
|
|
71
71
|
"""
|
|
72
72
|
return np.sqrt(np.mean(_sq_euclidian(reference, subject), axis=-1))
|
|
73
73
|
|
|
74
|
+
|
|
74
75
|
def rmspd(reference, subject, periodic=False, box=None):
|
|
75
76
|
r"""
|
|
76
|
-
Calculate the RMSD of atom pair distances for given structures
|
|
77
|
+
Calculate the RMSD of atom pair distances for given structures
|
|
77
78
|
relative to those found in a reference structure.
|
|
78
79
|
|
|
79
|
-
Unlike the standard RMSD, the *root-mean-square-pairwise-deviation*
|
|
80
|
-
(RMSPD) is a fit-free method to determine deviations between
|
|
80
|
+
Unlike the standard RMSD, the *root-mean-square-pairwise-deviation*
|
|
81
|
+
(RMSPD) is a fit-free method to determine deviations between
|
|
81
82
|
a structure and a preset reference.
|
|
82
83
|
|
|
83
|
-
.. math:: RMSPD = \sqrt{ \frac{1}{n^2} \sum\limits_{i=1}^n \sum\limits_{j \neq i}^n (d_{ij} - d_{ref,ij})^2}
|
|
84
|
+
.. math:: RMSPD = \sqrt{ \frac{1}{n^2} \sum\limits_{i=1}^n \sum\limits_{j \neq i}^n (d_{ij} - d_{ref,ij})^2}
|
|
84
85
|
|
|
85
86
|
Parameters
|
|
86
87
|
----------
|
|
@@ -102,7 +103,7 @@ def rmspd(reference, subject, periodic=False, box=None):
|
|
|
102
103
|
box : ndarray, shape=(3,3) or shape=(m,3,3), optional
|
|
103
104
|
If this parameter is set, the given box is used instead of the
|
|
104
105
|
`box` attribute of `atoms`.
|
|
105
|
-
|
|
106
|
+
|
|
106
107
|
Returns
|
|
107
108
|
-------
|
|
108
109
|
rmspd : float or ndarray, dtype=float, shape=(m,)
|
|
@@ -110,7 +111,7 @@ def rmspd(reference, subject, periodic=False, box=None):
|
|
|
110
111
|
If subject is an :class:`AtomArray` a float is returned.
|
|
111
112
|
If subject is an :class:`AtomArrayStack` a :class:`ndarray`
|
|
112
113
|
containing the RMSD for each model is returned.
|
|
113
|
-
|
|
114
|
+
|
|
114
115
|
Warnings
|
|
115
116
|
--------
|
|
116
117
|
Internally, this function uses :func:`index_distance()`.
|
|
@@ -119,7 +120,7 @@ def rmspd(reference, subject, periodic=False, box=None):
|
|
|
119
120
|
prior to the computation of RMSPDs with `periodic` set to false
|
|
120
121
|
to ensure correct results.
|
|
121
122
|
(e.g. with :func:`remove_pbc()`).
|
|
122
|
-
|
|
123
|
+
|
|
123
124
|
See also
|
|
124
125
|
--------
|
|
125
126
|
index_distance
|
|
@@ -134,9 +135,10 @@ def rmspd(reference, subject, periodic=False, box=None):
|
|
|
134
135
|
refdist = index_distance(reference, pairs, periodic=periodic, box=box)
|
|
135
136
|
subjdist = index_distance(subject, pairs, periodic=periodic, box=box)
|
|
136
137
|
|
|
137
|
-
rmspd = np.sqrt(np.sum((subjdist - refdist)**2, axis
|
|
138
|
+
rmspd = np.sqrt(np.sum((subjdist - refdist) ** 2, axis=-1)) / reflen
|
|
138
139
|
return rmspd
|
|
139
140
|
|
|
141
|
+
|
|
140
142
|
def rmsf(reference, subject):
|
|
141
143
|
r"""
|
|
142
144
|
Calculate the RMSF between two structures.
|
|
@@ -146,9 +148,9 @@ def rmsf(reference, subject):
|
|
|
146
148
|
models.
|
|
147
149
|
Usually the reference structure, is the average over all models.
|
|
148
150
|
The RMSF is defined as:
|
|
149
|
-
|
|
151
|
+
|
|
150
152
|
.. math:: RMSF(i) = \sqrt{ \frac{1}{T} \sum\limits_{t=1}^T (x_i(t) - x_{ref,i}(t))^2}
|
|
151
|
-
|
|
153
|
+
|
|
152
154
|
Parameters
|
|
153
155
|
----------
|
|
154
156
|
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
@@ -161,14 +163,14 @@ def rmsf(reference, subject):
|
|
|
161
163
|
:class:`AtomArrayStack`.
|
|
162
164
|
Alternatively, coordinates can be provided directly as
|
|
163
165
|
:class:`ndarray`.
|
|
164
|
-
|
|
166
|
+
|
|
165
167
|
Returns
|
|
166
168
|
-------
|
|
167
169
|
rmsf : ndarray, dtype=float, shape=(n,)
|
|
168
170
|
RMSF between subject and reference structure.
|
|
169
171
|
Each element gives the RMSF for the atom at the respective
|
|
170
172
|
index.
|
|
171
|
-
|
|
173
|
+
|
|
172
174
|
See Also
|
|
173
175
|
--------
|
|
174
176
|
rmsd
|
|
@@ -198,41 +200,39 @@ def rmsf(reference, subject):
|
|
|
198
200
|
def average(atoms):
|
|
199
201
|
"""
|
|
200
202
|
Calculate an average structure.
|
|
201
|
-
|
|
203
|
+
|
|
202
204
|
The average structure has the average coordinates
|
|
203
205
|
of the input models.
|
|
204
|
-
|
|
206
|
+
|
|
205
207
|
Parameters
|
|
206
208
|
----------
|
|
207
209
|
atoms : AtomArrayStack or ndarray, dtype=float, shape=(m,n,3)
|
|
208
210
|
The structure models to be averaged.
|
|
209
211
|
Alternatively, coordinates can be provided directly as
|
|
210
212
|
:class:`ndarray`.
|
|
211
|
-
|
|
213
|
+
|
|
212
214
|
Returns
|
|
213
215
|
-------
|
|
214
216
|
average : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
215
217
|
Structure with averaged atom coordinates.
|
|
216
218
|
If `atoms` is a :class:`ndarray` and :class:`ndarray` is also
|
|
217
219
|
returned.
|
|
218
|
-
|
|
220
|
+
|
|
219
221
|
See Also
|
|
220
222
|
--------
|
|
221
223
|
rmsd, rmsf
|
|
222
|
-
|
|
224
|
+
|
|
223
225
|
Notes
|
|
224
226
|
-----
|
|
225
227
|
The calculated average structure is not suitable for visualization
|
|
226
228
|
or geometric calculations, since bond lengths and angles will
|
|
227
229
|
deviate from meaningful values.
|
|
228
230
|
This method is rather useful to provide a reference structure for
|
|
229
|
-
calculation of e.g. the RMSD or RMSF.
|
|
231
|
+
calculation of e.g. the RMSD or RMSF.
|
|
230
232
|
"""
|
|
231
233
|
coords = coord(atoms)
|
|
232
234
|
if coords.ndim != 3:
|
|
233
|
-
raise TypeError(
|
|
234
|
-
"Expected an AtomArrayStack or an ndarray with shape (m,n,3)"
|
|
235
|
-
)
|
|
235
|
+
raise TypeError("Expected an AtomArrayStack or an ndarray with shape (m,n,3)")
|
|
236
236
|
mean_coords = np.mean(coords, axis=0)
|
|
237
237
|
if isinstance(atoms, AtomArrayStack):
|
|
238
238
|
mean_array = atoms[0].copy()
|
|
@@ -246,7 +246,7 @@ def _sq_euclidian(reference, subject):
|
|
|
246
246
|
"""
|
|
247
247
|
Calculate squared euclidian distance between atoms in two
|
|
248
248
|
structures.
|
|
249
|
-
|
|
249
|
+
|
|
250
250
|
Parameters
|
|
251
251
|
----------
|
|
252
252
|
reference : AtomArray or ndarray, dtype=float, shape=(n,3)
|
|
@@ -254,7 +254,7 @@ def _sq_euclidian(reference, subject):
|
|
|
254
254
|
subject : AtomArray or AtomArrayStack or ndarray, dtype=float, shape=(n,3) or shape=(m,n,3)
|
|
255
255
|
Structure(s) whose atoms squared euclidian distance to
|
|
256
256
|
`reference` is measured.
|
|
257
|
-
|
|
257
|
+
|
|
258
258
|
Returns
|
|
259
259
|
-------
|
|
260
260
|
ndarray, dtype=float, shape=(n,) or shape=(m,n)
|
|
@@ -271,4 +271,4 @@ def _sq_euclidian(reference, subject):
|
|
|
271
271
|
"Expected an AtomArray or an ndarray with shape (n,3) as reference"
|
|
272
272
|
)
|
|
273
273
|
dif = subject_coord - reference_coord
|
|
274
|
-
return vector_dot(dif, dif)
|
|
274
|
+
return vector_dot(dif, dif)
|
biotite/structure/density.py
CHANGED
|
@@ -11,11 +11,10 @@ __author__ = "Daniel Bauer"
|
|
|
11
11
|
__all__ = ["density"]
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
14
|
-
from .atoms import coord
|
|
14
|
+
from biotite.structure.atoms import coord
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def density(atoms, selection=None, delta=1.0, bins=None,
|
|
18
|
-
density=False, weights=None):
|
|
17
|
+
def density(atoms, selection=None, delta=1.0, bins=None, density=False, weights=None):
|
|
19
18
|
r"""
|
|
20
19
|
Compute the density of the selected atoms.
|
|
21
20
|
|
|
@@ -51,13 +50,13 @@ def density(atoms, selection=None, delta=1.0, bins=None,
|
|
|
51
50
|
Otherwise, returns the probability density function of each bin.
|
|
52
51
|
See :func:`numpy.histogramdd()` for further details.
|
|
53
52
|
weights: ndarray, shape=(n,) or shape=(m,n), optional
|
|
54
|
-
An array of values to weight the contribution of *n* atoms in
|
|
53
|
+
An array of values to weight the contribution of *n* atoms in
|
|
55
54
|
*m* models.
|
|
56
55
|
If the shape is *(n,)*, the weights will be interpreted as
|
|
57
56
|
*per atom*.
|
|
58
57
|
A shape of *(m,n)* allows to additionally weight atoms on a
|
|
59
58
|
*per model* basis.
|
|
60
|
-
|
|
59
|
+
|
|
61
60
|
Returns
|
|
62
61
|
-------
|
|
63
62
|
H : ndarray, dtype=float
|
|
@@ -69,12 +68,12 @@ def density(atoms, selection=None, delta=1.0, bins=None,
|
|
|
69
68
|
A list containing the 3 arrays describing the bin edges.
|
|
70
69
|
"""
|
|
71
70
|
coords = coord(atoms)
|
|
72
|
-
|
|
71
|
+
|
|
73
72
|
is_stack = coords.ndim == 3
|
|
74
73
|
|
|
75
74
|
# Define the grid for coordinate binning based on coordinates of
|
|
76
75
|
# supplied atoms
|
|
77
|
-
# This makes the binning independent of a supplied box vector and
|
|
76
|
+
# This makes the binning independent of a supplied box vector and
|
|
78
77
|
# fluctuating box dimensions are not a problem
|
|
79
78
|
# However, this means that the user has to make sure the region of
|
|
80
79
|
# interest is in the center of the box, i.e. by centering the
|
|
@@ -84,19 +83,17 @@ def density(atoms, selection=None, delta=1.0, bins=None,
|
|
|
84
83
|
axis = (0, 1)
|
|
85
84
|
else:
|
|
86
85
|
axis = 0
|
|
87
|
-
grid_min, grid_max = np.min(
|
|
88
|
-
coords, axis=axis), np.max(coords, axis=axis
|
|
89
|
-
)
|
|
86
|
+
grid_min, grid_max = np.min(coords, axis=axis), np.max(coords, axis=axis)
|
|
90
87
|
bins = [
|
|
91
|
-
np.arange(grid_min[0], grid_max[0]+delta, delta),
|
|
92
|
-
np.arange(grid_min[1], grid_max[1]+delta, delta),
|
|
93
|
-
np.arange(grid_min[2], grid_max[2]+delta, delta),
|
|
88
|
+
np.arange(grid_min[0], grid_max[0] + delta, delta),
|
|
89
|
+
np.arange(grid_min[1], grid_max[1] + delta, delta),
|
|
90
|
+
np.arange(grid_min[2], grid_max[2] + delta, delta),
|
|
94
91
|
]
|
|
95
92
|
|
|
96
93
|
if selection is None:
|
|
97
94
|
selected_coords = coords
|
|
98
95
|
else:
|
|
99
|
-
selected_coords = coords[...,selection, :]
|
|
96
|
+
selected_coords = coords[..., selection, :]
|
|
100
97
|
|
|
101
98
|
# Reshape the coords into Nx3
|
|
102
99
|
coords = selected_coords.reshape((np.prod(selected_coords.shape[:-1]), 3))
|
|
@@ -106,9 +103,7 @@ def density(atoms, selection=None, delta=1.0, bins=None,
|
|
|
106
103
|
if is_stack and len(weights.shape) < 2:
|
|
107
104
|
weights = np.tile(weights, len(selected_coords))
|
|
108
105
|
weights = weights.reshape(coords.shape[0])
|
|
109
|
-
|
|
106
|
+
|
|
110
107
|
# Calculate the histogram
|
|
111
|
-
hist = np.histogramdd(
|
|
112
|
-
coords, bins=bins, density=density, weights=weights
|
|
113
|
-
)
|
|
108
|
+
hist = np.histogramdd(coords, bins=bins, density=density, weights=weights)
|
|
114
109
|
return hist
|
biotite/structure/dotbracket.py
CHANGED
|
@@ -9,13 +9,12 @@ dot-bracket-notation.
|
|
|
9
9
|
|
|
10
10
|
__name__ = "biotite.structure"
|
|
11
11
|
__author__ = "Tom David Müller"
|
|
12
|
-
__all__ = ["dot_bracket_from_structure", "dot_bracket",
|
|
13
|
-
"base_pairs_from_dot_bracket"]
|
|
12
|
+
__all__ = ["dot_bracket_from_structure", "dot_bracket", "base_pairs_from_dot_bracket"]
|
|
14
13
|
|
|
15
14
|
import numpy as np
|
|
16
|
-
from .basepairs import base_pairs
|
|
17
|
-
from .pseudoknots import pseudoknots
|
|
18
|
-
from .residues import get_residue_count, get_residue_positions
|
|
15
|
+
from biotite.structure.basepairs import base_pairs
|
|
16
|
+
from biotite.structure.pseudoknots import pseudoknots
|
|
17
|
+
from biotite.structure.residues import get_residue_count, get_residue_positions
|
|
19
18
|
|
|
20
19
|
_OPENING_BRACKETS = "([{<ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
21
20
|
_OPENING_BRACKETS_BYTES = _OPENING_BRACKETS.encode()
|
|
@@ -24,7 +23,8 @@ _CLOSING_BRACKETS_BYTES = _CLOSING_BRACKETS.encode()
|
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
def dot_bracket_from_structure(
|
|
27
|
-
nucleic_acid_strand, scores=None, max_pseudoknot_order=None
|
|
26
|
+
nucleic_acid_strand, scores=None, max_pseudoknot_order=None
|
|
27
|
+
):
|
|
28
28
|
"""
|
|
29
29
|
Represent a nucleic-acid-strand in dot-bracket-letter-notation
|
|
30
30
|
(DBL-notation). :footcite:`Antczak2018`
|
|
@@ -53,16 +53,18 @@ def dot_bracket_from_structure(
|
|
|
53
53
|
|
|
54
54
|
References
|
|
55
55
|
----------
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
.. footbibliography::
|
|
58
58
|
"""
|
|
59
59
|
basepairs = base_pairs(nucleic_acid_strand)
|
|
60
60
|
if len(basepairs) == 0:
|
|
61
|
-
return [
|
|
61
|
+
return [""]
|
|
62
62
|
basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
|
|
63
63
|
length = get_residue_count(nucleic_acid_strand)
|
|
64
|
-
return dot_bracket(
|
|
65
|
-
|
|
64
|
+
return dot_bracket(
|
|
65
|
+
basepairs, length, scores=scores, max_pseudoknot_order=max_pseudoknot_order
|
|
66
|
+
)
|
|
67
|
+
|
|
66
68
|
|
|
67
69
|
def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
68
70
|
"""
|
|
@@ -115,21 +117,20 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
|
115
117
|
|
|
116
118
|
References
|
|
117
119
|
----------
|
|
118
|
-
|
|
120
|
+
|
|
119
121
|
.. footbibliography::
|
|
120
122
|
"""
|
|
121
123
|
# Make sure the lower residue is on the left for each row
|
|
122
124
|
basepairs = np.sort(basepairs, axis=1)
|
|
123
125
|
|
|
124
126
|
# Get pseudoknot order
|
|
125
|
-
pseudoknot_order = pseudoknots(
|
|
126
|
-
|
|
127
|
+
pseudoknot_order = pseudoknots(
|
|
128
|
+
basepairs, scores=scores, max_pseudoknot_order=max_pseudoknot_order
|
|
129
|
+
)
|
|
127
130
|
|
|
128
131
|
# Each optimal pseudoknot order solution is represented in
|
|
129
132
|
# dot-bracket-notation
|
|
130
|
-
notations = [
|
|
131
|
-
bytearray((b"."*length)) for _ in range(len(pseudoknot_order))
|
|
132
|
-
]
|
|
133
|
+
notations = [bytearray((b"." * length)) for _ in range(len(pseudoknot_order))]
|
|
133
134
|
for s, solution in enumerate(pseudoknot_order):
|
|
134
135
|
for basepair, order in zip(basepairs, solution):
|
|
135
136
|
if order == -1:
|
|
@@ -138,6 +139,7 @@ def dot_bracket(basepairs, length, scores=None, max_pseudoknot_order=None):
|
|
|
138
139
|
notations[s][basepair[1]] = _CLOSING_BRACKETS_BYTES[order]
|
|
139
140
|
return [notation.decode() for notation in notations]
|
|
140
141
|
|
|
142
|
+
|
|
141
143
|
def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
142
144
|
"""
|
|
143
145
|
Extract the base pairs from a nucleic-acid-strand in
|
|
@@ -172,7 +174,7 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
172
174
|
|
|
173
175
|
References
|
|
174
176
|
----------
|
|
175
|
-
|
|
177
|
+
|
|
176
178
|
.. footbibliography::
|
|
177
179
|
"""
|
|
178
180
|
basepairs = []
|
|
@@ -180,7 +182,6 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
180
182
|
|
|
181
183
|
# Iterate through input string and extract base pairs
|
|
182
184
|
for pos, symbol in enumerate(dot_bracket_notation):
|
|
183
|
-
|
|
184
185
|
if symbol in _OPENING_BRACKETS:
|
|
185
186
|
# Add opening residues to list (separate list for each
|
|
186
187
|
# bracket type)
|
|
@@ -197,9 +198,7 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
197
198
|
|
|
198
199
|
else:
|
|
199
200
|
if symbol != ".":
|
|
200
|
-
raise ValueError(
|
|
201
|
-
f"'{symbol}' is an invalid character for DBL-notation"
|
|
202
|
-
)
|
|
201
|
+
raise ValueError(f"'{symbol}' is an invalid character for DBL-notation")
|
|
203
202
|
|
|
204
203
|
for not_closed in opened_brackets:
|
|
205
204
|
if not_closed != []:
|
|
@@ -208,7 +207,6 @@ def base_pairs_from_dot_bracket(dot_bracket_notation):
|
|
|
208
207
|
"closing bracket"
|
|
209
208
|
)
|
|
210
209
|
|
|
211
|
-
|
|
212
210
|
# Sort the base pair indices in ascending order
|
|
213
211
|
basepairs = np.array(basepairs)
|
|
214
212
|
if len(basepairs) > 0:
|