biotite 0.41.1__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +36 -10
- biotite/application/application.py +22 -11
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +16 -5
- biotite/sequence/align/__init__.py +160 -6
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +35 -35
- biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +112 -126
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +64 -64
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +226 -240
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +88 -100
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cp310-win_amd64.pyd +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +21 -7
- biotite/structure/info/groups.py +10 -15
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -52
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
- biotite-1.0.0.dist-info/RECORD +322 -0
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.1.dist-info/RECORD +0 -340
- {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/repair.py
CHANGED
|
@@ -8,80 +8,14 @@ This module contains functionalities for repairing malformed structures.
|
|
|
8
8
|
|
|
9
9
|
__name__ = "biotite.structure"
|
|
10
10
|
__author__ = "Patrick Kunzmann, Daniel Bauer"
|
|
11
|
-
__all__ = ["
|
|
12
|
-
"create_continuous_res_ids", "infer_elements", "create_atom_names"]
|
|
11
|
+
__all__ = ["create_continuous_res_ids", "infer_elements", "create_atom_names"]
|
|
13
12
|
|
|
14
|
-
from collections import Counter
|
|
15
13
|
import warnings
|
|
14
|
+
from collections import Counter
|
|
16
15
|
import numpy as np
|
|
17
|
-
from .atoms import AtomArray, AtomArrayStack
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def renumber_atom_ids(array, start=None):
|
|
23
|
-
"""
|
|
24
|
-
Renumber the atom IDs of the given array.
|
|
25
|
-
|
|
26
|
-
DEPRECATED.
|
|
27
|
-
|
|
28
|
-
Parameters
|
|
29
|
-
----------
|
|
30
|
-
array : AtomArray or AtomArrayStack
|
|
31
|
-
The array to be checked.
|
|
32
|
-
start : int, optional
|
|
33
|
-
The starting index for renumbering.
|
|
34
|
-
The first ID in the array is taken by default.
|
|
35
|
-
|
|
36
|
-
Returns
|
|
37
|
-
-------
|
|
38
|
-
array : AtomArray or AtomArrayStack
|
|
39
|
-
The renumbered array.
|
|
40
|
-
"""
|
|
41
|
-
warnings.warn(
|
|
42
|
-
"'renumber_atom_ids()' is deprecated",
|
|
43
|
-
DeprecationWarning
|
|
44
|
-
)
|
|
45
|
-
if "atom_id" not in array.get_annotation_categories():
|
|
46
|
-
raise ValueError("The atom array must have the 'atom_id' annotation")
|
|
47
|
-
if start is None:
|
|
48
|
-
start = array.atom_id[0]
|
|
49
|
-
array = array.copy()
|
|
50
|
-
array.atom_id = np.arange(start, array.shape[-1]+1)
|
|
51
|
-
return array
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def renumber_res_ids(array, start=None):
|
|
55
|
-
"""
|
|
56
|
-
Renumber the residue IDs of the given array, so that are continuous.
|
|
57
|
-
|
|
58
|
-
DEPRECATED: Use :func:`create_continuous_res_ids()`instead.
|
|
59
|
-
|
|
60
|
-
Parameters
|
|
61
|
-
----------
|
|
62
|
-
array : AtomArray or AtomArrayStack
|
|
63
|
-
The array to be checked.
|
|
64
|
-
start : int, optional
|
|
65
|
-
The starting index for renumbering.
|
|
66
|
-
The first ID in the array is taken by default.
|
|
67
|
-
|
|
68
|
-
Returns
|
|
69
|
-
-------
|
|
70
|
-
array : AtomArray or AtomArrayStack
|
|
71
|
-
The renumbered array.
|
|
72
|
-
"""
|
|
73
|
-
warnings.warn(
|
|
74
|
-
"'renumber_res_ids()' is deprecated, use 'create_continuous_res_ids()'",
|
|
75
|
-
DeprecationWarning
|
|
76
|
-
)
|
|
77
|
-
if start is None:
|
|
78
|
-
start = array.res_id[0]
|
|
79
|
-
diff = np.diff(array.res_id)
|
|
80
|
-
diff[diff != 0] = 1
|
|
81
|
-
new_res_ids = np.concatenate(([start], diff)).cumsum()
|
|
82
|
-
array = array.copy()
|
|
83
|
-
array.res_id = new_res_ids
|
|
84
|
-
return array
|
|
16
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack
|
|
17
|
+
from biotite.structure.chains import get_chain_starts
|
|
18
|
+
from biotite.structure.residues import get_residue_starts
|
|
85
19
|
|
|
86
20
|
|
|
87
21
|
def create_continuous_res_ids(atoms, restart_each_chain=True):
|
|
@@ -217,18 +151,131 @@ def create_atom_names(atoms):
|
|
|
217
151
|
return atom_names
|
|
218
152
|
|
|
219
153
|
|
|
220
|
-
_elements = [
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
"
|
|
224
|
-
"
|
|
225
|
-
"
|
|
226
|
-
"
|
|
227
|
-
"
|
|
228
|
-
"
|
|
229
|
-
"
|
|
230
|
-
"
|
|
154
|
+
_elements = [
|
|
155
|
+
elem.upper()
|
|
156
|
+
for elem in [
|
|
157
|
+
"H",
|
|
158
|
+
"He",
|
|
159
|
+
"Li",
|
|
160
|
+
"Be",
|
|
161
|
+
"B",
|
|
162
|
+
"C",
|
|
163
|
+
"N",
|
|
164
|
+
"O",
|
|
165
|
+
"F",
|
|
166
|
+
"Ne",
|
|
167
|
+
"Na",
|
|
168
|
+
"Mg",
|
|
169
|
+
"Al",
|
|
170
|
+
"Si",
|
|
171
|
+
"P",
|
|
172
|
+
"S",
|
|
173
|
+
"Cl",
|
|
174
|
+
"Ar",
|
|
175
|
+
"K",
|
|
176
|
+
"Ca",
|
|
177
|
+
"Sc",
|
|
178
|
+
"Ti",
|
|
179
|
+
"V",
|
|
180
|
+
"Cr",
|
|
181
|
+
"Mn",
|
|
182
|
+
"Fe",
|
|
183
|
+
"Co",
|
|
184
|
+
"Ni",
|
|
185
|
+
"Cu",
|
|
186
|
+
"Zn",
|
|
187
|
+
"Ga",
|
|
188
|
+
"Ge",
|
|
189
|
+
"As",
|
|
190
|
+
"Se",
|
|
191
|
+
"Br",
|
|
192
|
+
"Kr",
|
|
193
|
+
"Rb",
|
|
194
|
+
"Sr",
|
|
195
|
+
"Y",
|
|
196
|
+
"Zr",
|
|
197
|
+
"Nb",
|
|
198
|
+
"Mo",
|
|
199
|
+
"Tc",
|
|
200
|
+
"Ru",
|
|
201
|
+
"Rh",
|
|
202
|
+
"Pd",
|
|
203
|
+
"Ag",
|
|
204
|
+
"Cd",
|
|
205
|
+
"In",
|
|
206
|
+
"Sn",
|
|
207
|
+
"Sb",
|
|
208
|
+
"Te",
|
|
209
|
+
"I",
|
|
210
|
+
"Xe",
|
|
211
|
+
"Cs",
|
|
212
|
+
"Ba",
|
|
213
|
+
"La",
|
|
214
|
+
"Ce",
|
|
215
|
+
"Pr",
|
|
216
|
+
"Nd",
|
|
217
|
+
"Pm",
|
|
218
|
+
"Sm",
|
|
219
|
+
"Eu",
|
|
220
|
+
"Gd",
|
|
221
|
+
"Tb",
|
|
222
|
+
"Dy",
|
|
223
|
+
"Ho",
|
|
224
|
+
"Er",
|
|
225
|
+
"Tm",
|
|
226
|
+
"Yb",
|
|
227
|
+
"Lu",
|
|
228
|
+
"Hf",
|
|
229
|
+
"Ta",
|
|
230
|
+
"W",
|
|
231
|
+
"Re",
|
|
232
|
+
"Os",
|
|
233
|
+
"Ir",
|
|
234
|
+
"Pt",
|
|
235
|
+
"Au",
|
|
236
|
+
"Hg",
|
|
237
|
+
"Tl",
|
|
238
|
+
"Pb",
|
|
239
|
+
"Bi",
|
|
240
|
+
"Po",
|
|
241
|
+
"At",
|
|
242
|
+
"Rn",
|
|
243
|
+
"Fr",
|
|
244
|
+
"Ra",
|
|
245
|
+
"Ac",
|
|
246
|
+
"Th",
|
|
247
|
+
"Pa",
|
|
248
|
+
"U",
|
|
249
|
+
"Np",
|
|
250
|
+
"Pu",
|
|
251
|
+
"Am",
|
|
252
|
+
"Cm",
|
|
253
|
+
"Bk",
|
|
254
|
+
"Cf",
|
|
255
|
+
"Es",
|
|
256
|
+
"Fm",
|
|
257
|
+
"Md",
|
|
258
|
+
"No",
|
|
259
|
+
"Lr",
|
|
260
|
+
"Rf",
|
|
261
|
+
"Db",
|
|
262
|
+
"Sg",
|
|
263
|
+
"Bh",
|
|
264
|
+
"Hs",
|
|
265
|
+
"Mt",
|
|
266
|
+
"Ds",
|
|
267
|
+
"Rg",
|
|
268
|
+
"Cn",
|
|
269
|
+
"Nh",
|
|
270
|
+
"Fl",
|
|
271
|
+
"Mc",
|
|
272
|
+
"Lv",
|
|
273
|
+
"Ts",
|
|
274
|
+
"Og",
|
|
275
|
+
]
|
|
231
276
|
]
|
|
277
|
+
|
|
278
|
+
|
|
232
279
|
def _guess_element(atom_name):
|
|
233
280
|
# remove digits (1H -> H)
|
|
234
281
|
elem = "".join([i for i in atom_name if not i.isdigit()])
|
|
@@ -237,9 +284,13 @@ def _guess_element(atom_name):
|
|
|
237
284
|
return ""
|
|
238
285
|
|
|
239
286
|
# Some often used elements for biomolecules
|
|
240
|
-
if
|
|
241
|
-
elem.startswith("
|
|
242
|
-
elem.startswith("
|
|
287
|
+
if (
|
|
288
|
+
elem.startswith("C")
|
|
289
|
+
or elem.startswith("N")
|
|
290
|
+
or elem.startswith("O")
|
|
291
|
+
or elem.startswith("S")
|
|
292
|
+
or elem.startswith("H")
|
|
293
|
+
):
|
|
243
294
|
return elem[0]
|
|
244
295
|
|
|
245
296
|
# Exactly match element abbreviations
|
|
@@ -250,4 +301,4 @@ def _guess_element(atom_name):
|
|
|
250
301
|
return _elements[_elements.index(elem[0])]
|
|
251
302
|
except ValueError:
|
|
252
303
|
warnings.warn(f"Could not infer element for '{atom_name}'")
|
|
253
|
-
return ""
|
|
304
|
+
return ""
|
biotite/structure/residues.py
CHANGED
|
@@ -9,14 +9,27 @@ atom level.
|
|
|
9
9
|
|
|
10
10
|
__name__ = "biotite.structure"
|
|
11
11
|
__author__ = "Patrick Kunzmann"
|
|
12
|
-
__all__ = [
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
__all__ = [
|
|
13
|
+
"get_residue_starts",
|
|
14
|
+
"apply_residue_wise",
|
|
15
|
+
"spread_residue_wise",
|
|
16
|
+
"get_residue_masks",
|
|
17
|
+
"get_residue_starts_for",
|
|
18
|
+
"get_residue_positions",
|
|
19
|
+
"get_residues",
|
|
20
|
+
"get_residue_count",
|
|
21
|
+
"residue_iter",
|
|
22
|
+
]
|
|
16
23
|
|
|
17
24
|
import numpy as np
|
|
18
|
-
from .
|
|
19
|
-
|
|
25
|
+
from biotite.structure.segments import (
|
|
26
|
+
apply_segment_wise,
|
|
27
|
+
get_segment_masks,
|
|
28
|
+
get_segment_positions,
|
|
29
|
+
get_segment_starts_for,
|
|
30
|
+
segment_iter,
|
|
31
|
+
spread_segment_wise,
|
|
32
|
+
)
|
|
20
33
|
|
|
21
34
|
|
|
22
35
|
def get_residue_starts(array, add_exclusive_stop=False):
|
|
@@ -57,23 +70,20 @@ def get_residue_starts(array, add_exclusive_stop=False):
|
|
|
57
70
|
278 292 304]
|
|
58
71
|
"""
|
|
59
72
|
# These mask are 'true' at indices where the value changes
|
|
60
|
-
chain_id_changes =
|
|
61
|
-
res_id_changes
|
|
62
|
-
ins_code_changes =
|
|
63
|
-
res_name_changes =
|
|
73
|
+
chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
|
|
74
|
+
res_id_changes = array.res_id[1:] != array.res_id[:-1]
|
|
75
|
+
ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
|
|
76
|
+
res_name_changes = array.res_name[1:] != array.res_name[:-1]
|
|
64
77
|
|
|
65
78
|
# If any of these annotation arrays change, a new residue starts
|
|
66
79
|
residue_change_mask = (
|
|
67
|
-
chain_id_changes |
|
|
68
|
-
res_id_changes |
|
|
69
|
-
ins_code_changes |
|
|
70
|
-
res_name_changes
|
|
80
|
+
chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
|
|
71
81
|
)
|
|
72
82
|
|
|
73
83
|
# Convert mask to indices
|
|
74
84
|
# Add 1, to shift the indices from the end of a residue
|
|
75
85
|
# to the start of a new residue
|
|
76
|
-
residue_starts = np.where(residue_change_mask)[0] +1
|
|
86
|
+
residue_starts = np.where(residue_change_mask)[0] + 1
|
|
77
87
|
|
|
78
88
|
# The first residue is not included yet -> Insert '[0]'
|
|
79
89
|
if add_exclusive_stop:
|
|
@@ -197,7 +207,7 @@ def spread_residue_wise(array, input_data):
|
|
|
197
207
|
Spread secondary structure annotation to every atom of a 20 residue
|
|
198
208
|
peptide (with 304 atoms).
|
|
199
209
|
|
|
200
|
-
>>> sse = annotate_sse(atom_array
|
|
210
|
+
>>> sse = annotate_sse(atom_array)
|
|
201
211
|
>>> print(len(sse))
|
|
202
212
|
20
|
|
203
213
|
>>> print(sse)
|
|
Binary file
|
|
@@ -4,8 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = [
|
|
8
|
-
|
|
7
|
+
__all__ = [
|
|
8
|
+
"apply_segment_wise",
|
|
9
|
+
"spread_segment_wise",
|
|
10
|
+
"get_segment_masks",
|
|
11
|
+
"get_segment_starts_for",
|
|
12
|
+
"get_segment_positions",
|
|
13
|
+
"segment_iter",
|
|
14
|
+
]
|
|
9
15
|
|
|
10
16
|
import numpy as np
|
|
11
17
|
|
|
@@ -24,9 +30,9 @@ def apply_segment_wise(starts, data, function, axis):
|
|
|
24
30
|
"""
|
|
25
31
|
# The result array
|
|
26
32
|
processed_data = None
|
|
27
|
-
for i in range(len(starts)-1):
|
|
28
|
-
segment = data[starts[i]:starts[i+1]]
|
|
29
|
-
if axis
|
|
33
|
+
for i in range(len(starts) - 1):
|
|
34
|
+
segment = data[starts[i] : starts[i + 1]]
|
|
35
|
+
if axis is None:
|
|
30
36
|
value = function(segment)
|
|
31
37
|
else:
|
|
32
38
|
value = function(segment, axis=axis)
|
|
@@ -39,13 +45,11 @@ def apply_segment_wise(starts, data, function, axis):
|
|
|
39
45
|
# is length of segment of size 1 -> length of all IDs
|
|
40
46
|
# (equal to atom array length)
|
|
41
47
|
processed_data = np.zeros(
|
|
42
|
-
(len(starts)-1,) + value.shape, dtype=value.dtype
|
|
48
|
+
(len(starts) - 1,) + value.shape, dtype=value.dtype
|
|
43
49
|
)
|
|
44
50
|
else:
|
|
45
51
|
# Scalar value -> one dimensional result array
|
|
46
|
-
processed_data = np.zeros(
|
|
47
|
-
len(starts)-1, dtype=type(value)
|
|
48
|
-
)
|
|
52
|
+
processed_data = np.zeros(len(starts) - 1, dtype=type(value))
|
|
49
53
|
# Write values into result arrays
|
|
50
54
|
processed_data[i] = value
|
|
51
55
|
return processed_data
|
|
@@ -64,7 +68,7 @@ def spread_segment_wise(starts, input_data):
|
|
|
64
68
|
atom array.
|
|
65
69
|
"""
|
|
66
70
|
output_data = np.zeros(starts[-1], dtype=input_data.dtype)
|
|
67
|
-
for i in range(len(starts)-1):
|
|
71
|
+
for i in range(len(starts) - 1):
|
|
68
72
|
start = starts[i]
|
|
69
73
|
stop = starts[i + 1]
|
|
70
74
|
output_data[start:stop] = input_data[i]
|
|
@@ -92,14 +96,13 @@ def get_segment_masks(starts, indices):
|
|
|
92
96
|
if (indices >= length).any():
|
|
93
97
|
index = np.min(np.where(indices >= length)[0])
|
|
94
98
|
raise ValueError(
|
|
95
|
-
f"Index {index} is out of range for "
|
|
96
|
-
f"an atom array with length {length}"
|
|
99
|
+
f"Index {index} is out of range for " f"an atom array with length {length}"
|
|
97
100
|
)
|
|
98
|
-
|
|
101
|
+
|
|
99
102
|
insertion_points = np.searchsorted(starts, indices, side="right") - 1
|
|
100
103
|
for i, point in enumerate(insertion_points):
|
|
101
|
-
masks[i, starts[point] : starts[point+1]] = True
|
|
102
|
-
|
|
104
|
+
masks[i, starts[point] : starts[point + 1]] = True
|
|
105
|
+
|
|
103
106
|
return masks
|
|
104
107
|
|
|
105
108
|
|
|
@@ -125,10 +128,9 @@ def get_segment_starts_for(starts, indices):
|
|
|
125
128
|
if (indices >= length).any():
|
|
126
129
|
index = np.min(np.where(indices >= length)[0])
|
|
127
130
|
raise ValueError(
|
|
128
|
-
f"Index {index} is out of range for "
|
|
129
|
-
f"an atom array with length {length}"
|
|
131
|
+
f"Index {index} is out of range for " f"an atom array with length {length}"
|
|
130
132
|
)
|
|
131
|
-
|
|
133
|
+
|
|
132
134
|
insertion_points = np.searchsorted(starts, indices, side="right") - 1
|
|
133
135
|
return starts[insertion_points]
|
|
134
136
|
|
|
@@ -155,10 +157,9 @@ def get_segment_positions(starts, indices):
|
|
|
155
157
|
if (indices >= length).any():
|
|
156
158
|
index = np.min(np.where(indices >= length)[0])
|
|
157
159
|
raise ValueError(
|
|
158
|
-
f"Index {index} is out of range for "
|
|
159
|
-
f"an atom array with length {length}"
|
|
160
|
+
f"Index {index} is out of range for " f"an atom array with length {length}"
|
|
160
161
|
)
|
|
161
|
-
|
|
162
|
+
|
|
162
163
|
return np.searchsorted(starts, indices, side="right") - 1
|
|
163
164
|
|
|
164
165
|
|
|
@@ -174,5 +175,5 @@ def segment_iter(array, starts):
|
|
|
174
175
|
Includes exclusive stop, i.e. the length of the corresponding
|
|
175
176
|
atom array.
|
|
176
177
|
"""
|
|
177
|
-
for i in range(len(starts)-1):
|
|
178
|
-
yield array[..., starts[i] : starts[i+1]]
|
|
178
|
+
for i in range(len(starts) - 1):
|
|
179
|
+
yield array[..., starts[i] : starts[i + 1]]
|
biotite/structure/sequence.py
CHANGED
|
@@ -11,13 +11,12 @@ __author__ = "Patrick Kunzmann"
|
|
|
11
11
|
__all__ = ["to_sequence"]
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
14
|
-
from .
|
|
15
|
-
from .
|
|
16
|
-
from .
|
|
17
|
-
from .
|
|
18
|
-
from .
|
|
19
|
-
from
|
|
20
|
-
|
|
14
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
15
|
+
from biotite.structure.chains import get_chain_starts
|
|
16
|
+
from biotite.structure.error import BadStructureError
|
|
17
|
+
from biotite.structure.info.groups import amino_acid_names, nucleotide_names
|
|
18
|
+
from biotite.structure.info.misc import one_letter_code
|
|
19
|
+
from biotite.structure.residues import get_residues
|
|
21
20
|
|
|
22
21
|
HETERO_PLACEHOLDER = "."
|
|
23
22
|
|
|
@@ -63,9 +62,9 @@ def to_sequence(atoms, allow_hetero=False):
|
|
|
63
62
|
"""
|
|
64
63
|
sequences = []
|
|
65
64
|
chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
|
|
66
|
-
for i in range(len(chain_start_indices)-1):
|
|
65
|
+
for i in range(len(chain_start_indices) - 1):
|
|
67
66
|
start = chain_start_indices[i]
|
|
68
|
-
stop = chain_start_indices[i+1]
|
|
67
|
+
stop = chain_start_indices[i + 1]
|
|
69
68
|
chain = atoms[start:stop]
|
|
70
69
|
_, residues = get_residues(chain)
|
|
71
70
|
one_letter_symbols = np.array(
|
|
@@ -73,7 +72,7 @@ def to_sequence(atoms, allow_hetero=False):
|
|
|
73
72
|
)
|
|
74
73
|
hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
|
|
75
74
|
|
|
76
|
-
aa_count
|
|
75
|
+
aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
|
|
77
76
|
nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
|
|
78
77
|
if aa_count == 0 and nuc_count == 0:
|
|
79
78
|
raise BadStructureError(
|
|
@@ -109,4 +108,4 @@ def to_sequence(atoms, allow_hetero=False):
|
|
|
109
108
|
sequences.append(NucleotideSequence("".join(one_letter_symbols)))
|
|
110
109
|
|
|
111
110
|
# Remove exclusive stop
|
|
112
|
-
return sequences, chain_start_indices[:-1]
|
|
111
|
+
return sequences, chain_start_indices[:-1]
|