biotite 0.40.0__cp312-cp312-win_amd64.whl → 0.41.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +1 -1
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +117 -117
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +117 -117
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +117 -117
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +117 -117
- biotite/sequence/align/localgapped.c +117 -117
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +117 -117
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +117 -117
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +117 -117
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +117 -117
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.c +117 -117
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +117 -117
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +117 -117
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +117 -117
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +117 -117
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +117 -117
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/bonds.c +1122 -913
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +117 -117
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/charges.c +117 -117
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/info/atoms.py +6 -1
- biotite/structure/info/bonds.py +1 -1
- biotite/structure/info/ccd/amino_acids.txt +17 -0
- biotite/structure/info/ccd/carbohydrates.txt +2 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +1 -0
- biotite/structure/info/misc.py +69 -5
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +157 -165
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/convertarray.c +117 -117
- biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +117 -117
- biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +117 -117
- biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +117 -117
- biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/pdb/file.py +84 -31
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/__init__.py +0 -1
- biotite/structure/io/pdbx/bcif.py +2 -3
- biotite/structure/io/pdbx/cif.py +9 -5
- biotite/structure/io/pdbx/component.py +4 -1
- biotite/structure/io/pdbx/convert.py +203 -79
- biotite/structure/io/pdbx/encoding.c +117 -117
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +117 -117
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +472 -13
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
- biotite/structure/io/pdbx/error.py +0 -14
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
biotite/structure/dotbracket.py
CHANGED
|
@@ -57,6 +57,8 @@ def dot_bracket_from_structure(
|
|
|
57
57
|
.. footbibliography::
|
|
58
58
|
"""
|
|
59
59
|
basepairs = base_pairs(nucleic_acid_strand)
|
|
60
|
+
if len(basepairs) == 0:
|
|
61
|
+
return ['']
|
|
60
62
|
basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
|
|
61
63
|
length = get_residue_count(nucleic_acid_strand)
|
|
62
64
|
return dot_bracket(basepairs, length, scores=scores,
|
biotite/structure/info/atoms.py
CHANGED
|
@@ -72,6 +72,11 @@ def residue(res_name):
|
|
|
72
72
|
# Avoid circular import
|
|
73
73
|
from ..io.pdbx import get_component
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
try:
|
|
76
|
+
component = get_component(get_ccd(), res_name=res_name)
|
|
77
|
+
except KeyError:
|
|
78
|
+
raise KeyError(
|
|
79
|
+
f"No atom information found for residue '{res_name}' in CCD"
|
|
80
|
+
)
|
|
76
81
|
component.hetero[:] = res_name not in non_hetero_residues
|
|
77
82
|
return component
|
biotite/structure/info/bonds.py
CHANGED
|
@@ -83,7 +83,7 @@ def bonds_in_residue(res_name):
|
|
|
83
83
|
|
|
84
84
|
Returns
|
|
85
85
|
-------
|
|
86
|
-
bonds : dict (str -> int)
|
|
86
|
+
bonds : dict ((str, str) -> int)
|
|
87
87
|
A dictionary that maps tuples of two atom names to their
|
|
88
88
|
respective bond types (represented as integer).
|
|
89
89
|
Empty, if the residue is unknown to the
|
|
@@ -228,6 +228,7 @@
|
|
|
228
228
|
4L8
|
|
229
229
|
4LZ
|
|
230
230
|
4M8
|
|
231
|
+
4M9
|
|
231
232
|
4MM
|
|
232
233
|
4N3
|
|
233
234
|
4N7
|
|
@@ -386,9 +387,14 @@
|
|
|
386
387
|
9VR
|
|
387
388
|
9WV
|
|
388
389
|
A0G
|
|
390
|
+
A1ADO
|
|
389
391
|
A1ADW
|
|
390
392
|
A1ADY
|
|
391
393
|
A1ADZ
|
|
394
|
+
A1D64
|
|
395
|
+
A1H2H
|
|
396
|
+
A1H2I
|
|
397
|
+
A1H45
|
|
392
398
|
A1LWV
|
|
393
399
|
A30
|
|
394
400
|
A3U
|
|
@@ -472,6 +478,7 @@ B2C
|
|
|
472
478
|
B2H
|
|
473
479
|
B2N
|
|
474
480
|
B3A
|
|
481
|
+
B3D
|
|
475
482
|
B3E
|
|
476
483
|
B3K
|
|
477
484
|
B3L
|
|
@@ -555,6 +562,7 @@ CH7
|
|
|
555
562
|
CHG
|
|
556
563
|
CHP
|
|
557
564
|
CIR
|
|
565
|
+
CIV
|
|
558
566
|
CJO
|
|
559
567
|
CLB
|
|
560
568
|
CLD
|
|
@@ -1328,6 +1336,7 @@ QPA
|
|
|
1328
1336
|
QPH
|
|
1329
1337
|
QQ8
|
|
1330
1338
|
QQB
|
|
1339
|
+
QUK
|
|
1331
1340
|
QVA
|
|
1332
1341
|
QX7
|
|
1333
1342
|
QXV
|
|
@@ -1613,13 +1622,16 @@ YNM
|
|
|
1613
1622
|
YOF
|
|
1614
1623
|
YPR
|
|
1615
1624
|
YPZ
|
|
1625
|
+
YRV
|
|
1616
1626
|
YTF
|
|
1617
1627
|
YTH
|
|
1618
1628
|
YWV
|
|
1619
1629
|
YYA
|
|
1620
1630
|
Z01
|
|
1621
1631
|
Z3E
|
|
1632
|
+
Z50
|
|
1622
1633
|
Z70
|
|
1634
|
+
Z9J
|
|
1623
1635
|
ZAE
|
|
1624
1636
|
ZAI
|
|
1625
1637
|
ZAL
|
|
@@ -1629,7 +1641,11 @@ ZDJ
|
|
|
1629
1641
|
ZFB
|
|
1630
1642
|
ZGL
|
|
1631
1643
|
ZIQ
|
|
1644
|
+
ZJU
|
|
1645
|
+
ZKO
|
|
1646
|
+
ZLF
|
|
1632
1647
|
ZNY
|
|
1648
|
+
ZRJ
|
|
1633
1649
|
ZSX
|
|
1634
1650
|
ZT6
|
|
1635
1651
|
ZT9
|
|
@@ -1639,6 +1655,7 @@ ZTK
|
|
|
1639
1655
|
ZU0
|
|
1640
1656
|
ZUK
|
|
1641
1657
|
ZV4
|
|
1658
|
+
ZY9
|
|
1642
1659
|
ZYJ
|
|
1643
1660
|
ZYK
|
|
1644
1661
|
ZZD
|
|
Binary file
|
biotite/structure/info/misc.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure.info"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["all_residues", "full_name", "link_type"]
|
|
7
|
+
__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
|
|
8
8
|
|
|
9
9
|
from .ccd import get_ccd, get_from_ccd
|
|
10
10
|
|
|
@@ -40,8 +40,10 @@ def full_name(res_name):
|
|
|
40
40
|
|
|
41
41
|
Returns
|
|
42
42
|
-------
|
|
43
|
-
name : str
|
|
43
|
+
name : str or None
|
|
44
44
|
The full name of the residue.
|
|
45
|
+
If the residue is unknown to the chemical components dictionary,
|
|
46
|
+
``None`` is returned.
|
|
45
47
|
|
|
46
48
|
Examples
|
|
47
49
|
--------
|
|
@@ -49,7 +51,10 @@ def full_name(res_name):
|
|
|
49
51
|
>>> print(full_name("MAN"))
|
|
50
52
|
alpha-D-mannopyranose
|
|
51
53
|
"""
|
|
52
|
-
|
|
54
|
+
array = get_from_ccd("chem_comp", res_name.upper(), "name")
|
|
55
|
+
if array is None:
|
|
56
|
+
return None
|
|
57
|
+
return array.item()
|
|
53
58
|
|
|
54
59
|
|
|
55
60
|
def link_type(res_name):
|
|
@@ -64,8 +69,10 @@ def link_type(res_name):
|
|
|
64
69
|
|
|
65
70
|
Returns
|
|
66
71
|
-------
|
|
67
|
-
link_type : str
|
|
72
|
+
link_type : str or None
|
|
68
73
|
The link type.
|
|
74
|
+
If the residue is unknown to the chemical components dictionary,
|
|
75
|
+
``None`` is returned.
|
|
69
76
|
|
|
70
77
|
Examples
|
|
71
78
|
--------
|
|
@@ -77,4 +84,61 @@ def link_type(res_name):
|
|
|
77
84
|
>>> print(link_type("HOH"))
|
|
78
85
|
NON-POLYMER
|
|
79
86
|
"""
|
|
80
|
-
|
|
87
|
+
array = get_from_ccd("chem_comp", res_name.upper(), "type")
|
|
88
|
+
if array is None:
|
|
89
|
+
return None
|
|
90
|
+
return array.item()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def one_letter_code(res_name):
|
|
94
|
+
"""
|
|
95
|
+
Get the one-letter code of a residue/compound,
|
|
96
|
+
based on the PDB chemical components dictionary.
|
|
97
|
+
|
|
98
|
+
The one-letter code is only defined for amino acids and nucleotides
|
|
99
|
+
and for compounds that are structurally similar to them.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
res_name : str
|
|
104
|
+
The up to 3-letter residue name.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
one_letter_code : str or None
|
|
109
|
+
The one-letter code.
|
|
110
|
+
None if the compound is not present in the CCD or if no
|
|
111
|
+
one-letter code is defined for this compound.
|
|
112
|
+
|
|
113
|
+
Examples
|
|
114
|
+
--------
|
|
115
|
+
|
|
116
|
+
Get the one letter code for an amino acid (or a nucleotide).
|
|
117
|
+
|
|
118
|
+
>>> print(full_name("ALA"))
|
|
119
|
+
ALANINE
|
|
120
|
+
>>> print(one_letter_code("ALA"))
|
|
121
|
+
A
|
|
122
|
+
|
|
123
|
+
For similar compounds, the one-letter code is also defined.
|
|
124
|
+
|
|
125
|
+
>>> print(full_name("DAL"))
|
|
126
|
+
D-ALANINE
|
|
127
|
+
>>> print(one_letter_code("DAL"))
|
|
128
|
+
A
|
|
129
|
+
|
|
130
|
+
For other compounds, the one-letter code is not defined.
|
|
131
|
+
|
|
132
|
+
>>> print(full_name("MAN"))
|
|
133
|
+
alpha-D-mannopyranose
|
|
134
|
+
>>> print(one_letter_code("MAN"))
|
|
135
|
+
None
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
|
|
139
|
+
if array is None:
|
|
140
|
+
return None
|
|
141
|
+
item = array.item()
|
|
142
|
+
if item == "":
|
|
143
|
+
return None
|
|
144
|
+
return item
|
biotite/structure/integrity.py
CHANGED
|
@@ -12,7 +12,7 @@ __author__ = "Patrick Kunzmann, Daniel Bauer"
|
|
|
12
12
|
__all__ = ["check_id_continuity", "check_atom_id_continuity",
|
|
13
13
|
"check_res_id_continuity", "check_backbone_continuity",
|
|
14
14
|
"check_duplicate_atoms", "check_bond_continuity",
|
|
15
|
-
"check_linear_continuity"
|
|
15
|
+
"check_linear_continuity"]
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import warnings
|
|
@@ -32,17 +32,17 @@ def check_id_continuity(array):
|
|
|
32
32
|
"""
|
|
33
33
|
Check if the residue IDs are incremented by more than 1 or
|
|
34
34
|
decremented, from one atom to the next one.
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
An increment by more than 1 is as strong clue for missing residues,
|
|
37
37
|
a decrement means probably a start of a new chain.
|
|
38
38
|
|
|
39
39
|
DEPRECATED: Use :func:`check_res_id_continuity()` instead.
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
Parameters
|
|
42
42
|
----------
|
|
43
43
|
array : AtomArray or AtomArrayStack
|
|
44
44
|
The array to be checked.
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
Returns
|
|
47
47
|
-------
|
|
48
48
|
discontinuity : ndarray, dtype=int
|
|
@@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
|
|
|
60
60
|
"""
|
|
61
61
|
Check if the atom IDs are incremented by more than 1 or
|
|
62
62
|
decremented, from one atom to the next one.
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
An increment by more than 1 is as strong clue for missing atoms.
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
Parameters
|
|
67
67
|
----------
|
|
68
68
|
array : AtomArray or AtomArrayStack
|
|
69
69
|
The array to be checked.
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
Returns
|
|
72
72
|
-------
|
|
73
73
|
discontinuity : ndarray, dtype=int
|
|
@@ -81,15 +81,15 @@ def check_res_id_continuity(array):
|
|
|
81
81
|
"""
|
|
82
82
|
Check if the residue IDs are incremented by more than 1 or
|
|
83
83
|
decremented, from one atom to the next one.
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
An increment by more than 1 is as strong clue for missing residues,
|
|
86
86
|
a decrement means probably a start of a new chain.
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
Parameters
|
|
89
89
|
----------
|
|
90
90
|
array : AtomArray or AtomArrayStack
|
|
91
91
|
The array to be checked.
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
Returns
|
|
94
94
|
-------
|
|
95
95
|
discontinuity : ndarray, dtype=int
|
|
@@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
168
168
|
"""
|
|
169
169
|
Check if the (peptide or phosphate) backbone atoms have
|
|
170
170
|
non-reasonable distance to the next atom.
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
A large or very small distance is a very strong clue, that there is
|
|
173
173
|
no bond between those atoms, therefore the chain is discontinued.
|
|
174
174
|
|
|
@@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
|
|
|
206
206
|
"""
|
|
207
207
|
Check if a structure contains duplicate atoms, i.e. two atoms in a
|
|
208
208
|
structure have the same annotations (coordinates may be different).
|
|
209
|
-
|
|
209
|
+
|
|
210
210
|
Duplicate atoms may appear, when a structure has occupancy for an
|
|
211
211
|
atom at two or more positions or when the *altloc* positions are
|
|
212
212
|
improperly read.
|
|
213
|
-
|
|
213
|
+
|
|
214
214
|
Parameters
|
|
215
215
|
----------
|
|
216
216
|
array : AtomArray or AtomArrayStack
|
|
217
217
|
The array to be checked.
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
Returns
|
|
220
220
|
-------
|
|
221
221
|
duplicate : ndarray, dtype=int
|
|
@@ -228,16 +228,16 @@ def check_duplicate_atoms(array):
|
|
|
228
228
|
for i in range(1, array.array_length()):
|
|
229
229
|
# Start with assumption that all atoms in the array
|
|
230
230
|
# until index i are duplicates of the atom at index i
|
|
231
|
-
|
|
231
|
+
is_duplicate = np.full(i, True, dtype=bool)
|
|
232
232
|
for annot in annots:
|
|
233
233
|
# For each annotation array filter out the atoms until
|
|
234
234
|
# index i that have an unequal annotation
|
|
235
|
-
# to the atom at index i
|
|
236
|
-
|
|
235
|
+
# to the atom at index i
|
|
236
|
+
is_duplicate &= (annot[:i] == annot[i])
|
|
237
237
|
# After checking all annotation arrays,
|
|
238
238
|
# if there still is any duplicate to the atom at index i,
|
|
239
239
|
# add i the the list of duplicate atom indices
|
|
240
|
-
if
|
|
240
|
+
if is_duplicate.any():
|
|
241
241
|
duplicates.append(i)
|
|
242
242
|
return np.array(duplicates)
|
|
243
243
|
|
|
@@ -255,7 +255,7 @@ def check_in_box(array):
|
|
|
255
255
|
----------
|
|
256
256
|
array : AtomArray or AtomArrayStack
|
|
257
257
|
The array to be checked.
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
Returns
|
|
260
260
|
-------
|
|
261
261
|
outside : ndarray, dtype=int
|
|
@@ -266,54 +266,3 @@ def check_in_box(array):
|
|
|
266
266
|
box = array.box
|
|
267
267
|
fractions = coord_to_fraction(array, box)
|
|
268
268
|
return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def renumber_atom_ids(array, start=None):
|
|
272
|
-
"""
|
|
273
|
-
Renumber the atom IDs of the given array.
|
|
274
|
-
|
|
275
|
-
Parameters
|
|
276
|
-
----------
|
|
277
|
-
array : AtomArray or AtomArrayStack
|
|
278
|
-
The array to be checked.
|
|
279
|
-
start : int, optional
|
|
280
|
-
The starting index for renumbering.
|
|
281
|
-
The first ID in the array is taken by default.
|
|
282
|
-
|
|
283
|
-
Returns
|
|
284
|
-
-------
|
|
285
|
-
array : AtomArray or AtomArrayStack
|
|
286
|
-
The renumbered array.
|
|
287
|
-
"""
|
|
288
|
-
if "atom_id" not in array.get_annotation_categories():
|
|
289
|
-
raise ValueError("The atom array must have the 'atom_id' annotation")
|
|
290
|
-
if start is None:
|
|
291
|
-
start = array.atom_id[0]
|
|
292
|
-
array.atom_id = np.arange(start, array.shape[-1]+1)
|
|
293
|
-
return array
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
def renumber_res_ids(array, start=None):
|
|
297
|
-
"""
|
|
298
|
-
Renumber the residue IDs of the given array.
|
|
299
|
-
|
|
300
|
-
Parameters
|
|
301
|
-
----------
|
|
302
|
-
array : AtomArray or AtomArrayStack
|
|
303
|
-
The array to be checked.
|
|
304
|
-
start : int, optional
|
|
305
|
-
The starting index for renumbering.
|
|
306
|
-
The first ID in the array is taken by default.
|
|
307
|
-
|
|
308
|
-
Returns
|
|
309
|
-
-------
|
|
310
|
-
array : AtomArray or AtomArrayStack
|
|
311
|
-
The renumbered array.
|
|
312
|
-
"""
|
|
313
|
-
if start is None:
|
|
314
|
-
start = array.res_id[0]
|
|
315
|
-
diff = np.diff(array.res_id)
|
|
316
|
-
diff[diff != 0] = 1
|
|
317
|
-
new_res_ids = np.concatenate(([start], diff)).cumsum()
|
|
318
|
-
array.res_id = new_res_ids
|
|
319
|
-
return array
|
biotite/structure/io/ctab.py
CHANGED
|
@@ -2,46 +2,20 @@
|
|
|
2
2
|
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
3
|
# information.
|
|
4
4
|
|
|
5
|
-
"""
|
|
6
|
-
Functions for parsing and writing an :class:`AtomArray` from/to
|
|
7
|
-
*MDL* connection tables (Ctab).
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
5
|
__name__ = "biotite.structure.io"
|
|
11
6
|
__author__ = "Patrick Kunzmann"
|
|
12
7
|
__all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
|
|
13
8
|
|
|
14
9
|
import warnings
|
|
15
|
-
|
|
16
|
-
from ..error import BadStructureError
|
|
17
|
-
from ..atoms import AtomArray, AtomArrayStack
|
|
18
|
-
from ..bonds import BondList, BondType
|
|
19
|
-
|
|
20
|
-
BOND_TYPE_MAPPING = {
|
|
21
|
-
1: BondType.SINGLE,
|
|
22
|
-
2: BondType.DOUBLE,
|
|
23
|
-
3: BondType.TRIPLE,
|
|
24
|
-
6: BondType.SINGLE,
|
|
25
|
-
7: BondType.DOUBLE,
|
|
26
|
-
8: BondType.ANY,
|
|
27
|
-
}
|
|
28
|
-
BOND_TYPE_MAPPING_REV = {
|
|
29
|
-
BondType.SINGLE: 1,
|
|
30
|
-
BondType.DOUBLE: 2,
|
|
31
|
-
BondType.TRIPLE: 3,
|
|
32
|
-
BondType.AROMATIC_SINGLE: 1,
|
|
33
|
-
BondType.AROMATIC_DOUBLE: 2,
|
|
34
|
-
BondType.ANY: 8,
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
|
|
38
|
-
CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
|
|
10
|
+
from ..bonds import BondType
|
|
39
11
|
|
|
40
12
|
|
|
41
13
|
def read_structure_from_ctab(ctab_lines):
|
|
42
14
|
"""
|
|
43
15
|
Parse a *MDL* connection table (Ctab) to obtain an
|
|
44
|
-
:class:`AtomArray`. :footcite:`Dalby1992
|
|
16
|
+
:class:`AtomArray`. :footcite:`Dalby1992`.
|
|
17
|
+
|
|
18
|
+
DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
|
|
45
19
|
|
|
46
20
|
Parameters
|
|
47
21
|
----------
|
|
@@ -60,41 +34,9 @@ def read_structure_from_ctab(ctab_lines):
|
|
|
60
34
|
|
|
61
35
|
.. footbibliography::
|
|
62
36
|
"""
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
atoms = AtomArray(n_atoms)
|
|
68
|
-
atoms.add_annotation("charge", int)
|
|
69
|
-
for i, line in enumerate(atom_lines):
|
|
70
|
-
atoms.coord[i, 0] = float(line[0:10])
|
|
71
|
-
atoms.coord[i, 1] = float(line[10:20])
|
|
72
|
-
atoms.coord[i, 2] = float(line[20:30])
|
|
73
|
-
atoms.element[i] = line[31:34].strip().upper()
|
|
74
|
-
charge = CHARGE_MAPPING.get(int(line[36:39]))
|
|
75
|
-
if charge is None:
|
|
76
|
-
warnings.warn(
|
|
77
|
-
f"Cannot handle MDL charge type {int(line[36 : 39])}, "
|
|
78
|
-
f"0 is used instead"
|
|
79
|
-
)
|
|
80
|
-
charge = 0
|
|
81
|
-
atoms.charge[i] = charge
|
|
82
|
-
|
|
83
|
-
bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
|
|
84
|
-
for i, line in enumerate(bond_lines):
|
|
85
|
-
bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
|
|
86
|
-
if bond_type is None:
|
|
87
|
-
warnings.warn(
|
|
88
|
-
f"Cannot handle MDL bond type {int(line[6 : 9])}, "
|
|
89
|
-
f"BondType.ANY is used instead"
|
|
90
|
-
)
|
|
91
|
-
bond_type = BondType.ANY
|
|
92
|
-
bond_array[i, 0] = int(line[0:3]) - 1
|
|
93
|
-
bond_array[i, 1] = int(line[3:6]) - 1
|
|
94
|
-
bond_array[i, 2] = bond_type
|
|
95
|
-
atoms.bonds = BondList(n_atoms, bond_array)
|
|
96
|
-
|
|
97
|
-
return atoms
|
|
37
|
+
warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
|
|
38
|
+
from biotite.structure.io.mol.ctab import read_structure_from_ctab
|
|
39
|
+
return read_structure_from_ctab(ctab_lines)
|
|
98
40
|
|
|
99
41
|
|
|
100
42
|
def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
|
|
@@ -102,6 +44,8 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
|
|
|
102
44
|
Convert an :class:`AtomArray` into a
|
|
103
45
|
*MDL* connection table (Ctab). :footcite:`Dalby1992`
|
|
104
46
|
|
|
47
|
+
DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
|
|
48
|
+
|
|
105
49
|
Parameters
|
|
106
50
|
----------
|
|
107
51
|
atoms : AtomArray
|
|
@@ -123,44 +67,6 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
|
|
|
123
67
|
|
|
124
68
|
.. footbibliography::
|
|
125
69
|
"""
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
"but only a single model can be written"
|
|
130
|
-
)
|
|
131
|
-
if atoms.bonds is None:
|
|
132
|
-
raise BadStructureError("Input AtomArray has no associated BondList")
|
|
133
|
-
|
|
134
|
-
try:
|
|
135
|
-
charge = atoms.charge
|
|
136
|
-
except AttributeError:
|
|
137
|
-
charge = np.zeros(atoms.array_length(), dtype=int)
|
|
138
|
-
|
|
139
|
-
atom_lines = [
|
|
140
|
-
f"{atoms.coord[i,0]:>10.5f}"
|
|
141
|
-
f"{atoms.coord[i,1]:>10.5f}"
|
|
142
|
-
f"{atoms.coord[i,2]:>10.5f}"
|
|
143
|
-
f" {atoms.element[i]:>3}"
|
|
144
|
-
f" {CHARGE_MAPPING_REV.get(charge[i], 0):>3d}" + f"{0:>3d}" * 10
|
|
145
|
-
for i in range(atoms.array_length())
|
|
146
|
-
]
|
|
147
|
-
|
|
148
|
-
default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
|
|
149
|
-
|
|
150
|
-
bond_lines = [
|
|
151
|
-
f"{i+1:>3d}{j+1:>3d}"
|
|
152
|
-
f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
|
|
153
|
-
+ f"{0:>3d}" * 4
|
|
154
|
-
for i, j, bond_type in atoms.bonds.as_array()
|
|
155
|
-
]
|
|
156
|
-
|
|
157
|
-
counts_line = (
|
|
158
|
-
f"{len(atom_lines):>3d}{len(bond_lines):>3d}"
|
|
159
|
-
" 0 0 0 0 0 0 0 1 V2000"
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
return [counts_line] + atom_lines + bond_lines + ["M END"]
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def _get_counts(counts_line):
|
|
166
|
-
return int(counts_line[0:3]), int(counts_line[3:6])
|
|
70
|
+
warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
|
|
71
|
+
from biotite.structure.io.mol.ctab import write_structure_to_ctab
|
|
72
|
+
return write_structure_to_ctab(atoms, default_bond_type)
|