biotite 0.39.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/database/rcsb/download.py +19 -14
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +258 -237
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +243 -222
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.c +215 -196
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpp +233 -205
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.c +258 -237
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.c +235 -214
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.c +255 -234
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.c +274 -253
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.c +215 -196
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.c +217 -197
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.c +215 -195
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +235 -214
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +215 -196
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.c +227 -202
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.c +215 -196
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1437 -1279
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.c +217 -197
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/charges.c +1052 -1101
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +31 -68
- biotite/structure/info/bonds.py +47 -101
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1663 -0
- biotite/structure/info/ccd/carbohydrates.txt +1135 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +798 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +78 -25
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +167 -181
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +219 -198
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.c +217 -197
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.c +225 -204
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.c +215 -196
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +89 -34
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +12 -6
- biotite/structure/io/pdbx/bcif.py +648 -0
- biotite/structure/io/pdbx/cif.py +1032 -0
- biotite/structure/io/pdbx/component.py +246 -0
- biotite/structure/io/pdbx/convert.py +858 -386
- biotite/structure/io/pdbx/encoding.c +112813 -0
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +215 -196
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +618 -116
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
biotite/structure/dotbracket.py
CHANGED
|
@@ -57,6 +57,8 @@ def dot_bracket_from_structure(
|
|
|
57
57
|
.. footbibliography::
|
|
58
58
|
"""
|
|
59
59
|
basepairs = base_pairs(nucleic_acid_strand)
|
|
60
|
+
if len(basepairs) == 0:
|
|
61
|
+
return ['']
|
|
60
62
|
basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
|
|
61
63
|
length = get_residue_count(nucleic_acid_strand)
|
|
62
64
|
return dot_bracket(basepairs, length, scores=scores,
|
biotite/structure/filter.py
CHANGED
|
@@ -10,9 +10,9 @@ arrays and atom array stacks.
|
|
|
10
10
|
__name__ = "biotite.structure"
|
|
11
11
|
__author__ = "Patrick Kunzmann, Tom David Müller"
|
|
12
12
|
__all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
|
|
13
|
-
"filter_canonical_nucleotides", "filter_amino_acids",
|
|
14
|
-
"filter_canonical_amino_acids", "filter_carbohydrates",
|
|
15
|
-
"filter_backbone", "filter_intersection", "filter_first_altloc",
|
|
13
|
+
"filter_canonical_nucleotides", "filter_amino_acids",
|
|
14
|
+
"filter_canonical_amino_acids", "filter_carbohydrates",
|
|
15
|
+
"filter_backbone", "filter_intersection", "filter_first_altloc",
|
|
16
16
|
"filter_highest_occupancy_altloc", "filter_peptide_backbone",
|
|
17
17
|
"filter_phosphate_backbone", "filter_linear_bond_continuity",
|
|
18
18
|
"filter_polymer"]
|
|
@@ -20,13 +20,10 @@ __all__ = ["filter_solvent", "filter_monoatomic_ions", "filter_nucleotides",
|
|
|
20
20
|
import warnings
|
|
21
21
|
|
|
22
22
|
import numpy as np
|
|
23
|
-
|
|
24
|
-
from
|
|
25
|
-
from .atoms import Atom, AtomArray, AtomArrayStack, array as atom_array
|
|
23
|
+
from functools import partial
|
|
24
|
+
from .atoms import array as atom_array
|
|
26
25
|
from .residues import get_residue_starts, get_residue_count
|
|
27
|
-
from .info.
|
|
28
|
-
from .info.amino_acids import amino_acid_names
|
|
29
|
-
from .info.carbohydrates import carbohydrate_names
|
|
26
|
+
from .info.groups import amino_acid_names, carbohydrate_names, nucleotide_names
|
|
30
27
|
|
|
31
28
|
|
|
32
29
|
_canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
|
|
@@ -34,10 +31,6 @@ _canonical_aa_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
|
|
|
34
31
|
"TRP","TYR","VAL", "SEC"]
|
|
35
32
|
_canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
|
|
36
33
|
|
|
37
|
-
_nucleotide_list = nucleotide_names()
|
|
38
|
-
_amino_acid_list = amino_acid_names()
|
|
39
|
-
_carbohydrate_list = carbohydrate_names()
|
|
40
|
-
|
|
41
34
|
_solvent_list = ["HOH","SOL"]
|
|
42
35
|
|
|
43
36
|
_peptide_backbone_atoms = ['N', 'CA', 'C']
|
|
@@ -118,22 +111,22 @@ def filter_nucleotides(array):
|
|
|
118
111
|
|
|
119
112
|
Notes
|
|
120
113
|
-----
|
|
121
|
-
Nucleotides are identified according to the PDB chemical component
|
|
114
|
+
Nucleotides are identified according to the PDB chemical component
|
|
122
115
|
dictionary. A residue is considered a nucleotide if it its
|
|
123
116
|
``_chem_comp.type`` property has one of the following values (case
|
|
124
117
|
insensitive):
|
|
125
118
|
|
|
126
|
-
``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
|
|
127
|
-
``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
|
|
119
|
+
``DNA LINKING``, ``DNA OH 3 PRIME TERMINUS``,
|
|
120
|
+
``DNA OH 5 PRIME TERMINUS``, ``L-DNA LINKING``, ``L-RNA LINKING``,
|
|
128
121
|
``RNA LINKING``, ``RNA OH 3 PRIME TERMINUS``,
|
|
129
122
|
``RNA OH 5 PRIME TERMINUS``
|
|
130
123
|
"""
|
|
131
|
-
return np.isin(array.res_name,
|
|
124
|
+
return np.isin(array.res_name, nucleotide_names())
|
|
132
125
|
|
|
133
126
|
|
|
134
127
|
def filter_canonical_amino_acids(array):
|
|
135
128
|
"""
|
|
136
|
-
Filter all atoms of one array that belong to canonical amino acid
|
|
129
|
+
Filter all atoms of one array that belong to canonical amino acid
|
|
137
130
|
residues.
|
|
138
131
|
|
|
139
132
|
Parameters
|
|
@@ -164,23 +157,23 @@ def filter_amino_acids(array):
|
|
|
164
157
|
filter : ndarray, dtype=bool
|
|
165
158
|
This array is `True` for all indices in `array`, where the atom
|
|
166
159
|
belongs to an amino acid residue.
|
|
167
|
-
|
|
160
|
+
|
|
168
161
|
Notes
|
|
169
162
|
-----
|
|
170
|
-
Amino acids are identified according to the PDB chemical component
|
|
163
|
+
Amino acids are identified according to the PDB chemical component
|
|
171
164
|
dictionary. A residue is considered an amino acid if it its
|
|
172
165
|
``_chem_comp.type`` property has one of the following values (case
|
|
173
166
|
insensitive):
|
|
174
167
|
|
|
175
|
-
``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
|
|
176
|
-
``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
|
|
177
|
-
``D-PEPTIDE NH3 AMINO TERMINUS``,
|
|
178
|
-
``L-BETA-PEPTIDE, C-GAMMA LINKING``,
|
|
179
|
-
``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
|
|
180
|
-
``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
|
|
168
|
+
``D-BETA-PEPTIDE``, ``C-GAMMA LINKING``, ``D-GAMMA-PEPTIDE``,
|
|
169
|
+
``C-DELTA LINKING``, ``D-PEPTIDE LINKING``,
|
|
170
|
+
``D-PEPTIDE NH3 AMINO TERMINUS``,
|
|
171
|
+
``L-BETA-PEPTIDE, C-GAMMA LINKING``,
|
|
172
|
+
``L-GAMMA-PEPTIDE, C-DELTA LINKING``,
|
|
173
|
+
``L-PEPTIDE COOH CARBOXY TERMINUS``, ``L-PEPTIDE LINKING``,
|
|
181
174
|
``L-PEPTIDE NH3 AMINO TERMINUS``, ``PEPTIDE LINKING``
|
|
182
175
|
"""
|
|
183
|
-
return np.isin(array.res_name,
|
|
176
|
+
return np.isin(array.res_name, amino_acid_names())
|
|
184
177
|
|
|
185
178
|
|
|
186
179
|
def filter_carbohydrates(array):
|
|
@@ -197,20 +190,20 @@ def filter_carbohydrates(array):
|
|
|
197
190
|
filter : ndarray, dtype=bool
|
|
198
191
|
This array is `True` for all indices in `array`, where the atom
|
|
199
192
|
belongs to a carbohydrate.
|
|
200
|
-
|
|
193
|
+
|
|
201
194
|
Notes
|
|
202
195
|
-----
|
|
203
|
-
Carbohydrates are identified according to the PDB chemical component
|
|
196
|
+
Carbohydrates are identified according to the PDB chemical component
|
|
204
197
|
dictionary. A residue is considered a carbohydrate if it its
|
|
205
198
|
``_chem_comp.type`` property has one of the following values (case
|
|
206
199
|
insensitive):
|
|
207
200
|
|
|
208
|
-
``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
|
|
209
|
-
``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
|
|
210
|
-
``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
|
|
201
|
+
``D-SACCHARIDE``, ``D-SACCHARIDE,ALPHA LINKING``,
|
|
202
|
+
``D-SACCHARIDE, BETA LINKING``, ``L-SACCHARIDE``,
|
|
203
|
+
``L-SACCHARIDE, ALPHA LINKING``, ``L-SACCHARIDE, BETA LINKING``,
|
|
211
204
|
``SACCHARIDE``
|
|
212
205
|
"""
|
|
213
|
-
return np.isin(array.res_name,
|
|
206
|
+
return np.isin(array.res_name, carbohydrate_names())
|
|
214
207
|
|
|
215
208
|
|
|
216
209
|
def filter_backbone(array):
|
|
@@ -299,7 +292,7 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
299
292
|
|
|
300
293
|
The result will depend on the atoms' order.
|
|
301
294
|
For instance, consider a molecule::
|
|
302
|
-
|
|
295
|
+
|
|
303
296
|
C3
|
|
304
297
|
|
|
|
305
298
|
C1-C2-C4
|
|
@@ -323,7 +316,7 @@ def filter_linear_bond_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
323
316
|
This array is `True` for all indices in `array`, where an atom
|
|
324
317
|
has a bond length with the next atom within [`min_len`, `max_len`]
|
|
325
318
|
boundaries.
|
|
326
|
-
|
|
319
|
+
|
|
327
320
|
Notes
|
|
328
321
|
-----
|
|
329
322
|
Note that this function purely uses distances between consecutive atoms.
|
|
@@ -438,7 +431,7 @@ def filter_first_altloc(atoms, altloc_ids):
|
|
|
438
431
|
Filter all atoms, that have the first *altloc* ID appearing in a
|
|
439
432
|
residue.
|
|
440
433
|
|
|
441
|
-
Structure files (PDB, PDBx
|
|
434
|
+
Structure files (PDB, PDBx) allow for duplicate atom records,
|
|
442
435
|
in case a residue is found in multiple alternate locations
|
|
443
436
|
(*altloc*).
|
|
444
437
|
This function is used to remove such duplicate atoms by choosing a
|
|
@@ -507,7 +500,7 @@ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies):
|
|
|
507
500
|
For each residue, filter all atoms, that have the *altloc* ID
|
|
508
501
|
with the highest occupancy for this residue.
|
|
509
502
|
|
|
510
|
-
Structure files (PDB, PDBx
|
|
503
|
+
Structure files (PDB, PDBx) allow for duplicate atom records,
|
|
511
504
|
in case a residue is found in multiple alternate locations
|
|
512
505
|
(*altloc*).
|
|
513
506
|
This function is used to remove such duplicate atoms by choosing a
|
|
@@ -6,23 +6,20 @@
|
|
|
6
6
|
A subpackage for obtaining all kinds of chemical information about atoms
|
|
7
7
|
and residues, including masses, radii, bonds, etc.
|
|
8
8
|
|
|
9
|
-
Most information is extracted from the
|
|
9
|
+
Most information is extracted from the *Chemical Component Dictionary*
|
|
10
10
|
of the
|
|
11
|
-
`wwPDB <ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif>`_
|
|
12
|
-
via tools from the
|
|
13
|
-
`biotite-util <https://github.com/biotite-dev/biotite-util>`_
|
|
14
|
-
repository.
|
|
11
|
+
`wwPDB <ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif>`_.
|
|
15
12
|
"""
|
|
16
13
|
|
|
17
14
|
__name__ = "biotite.structure.info"
|
|
18
15
|
__author__ = "Patrick Kunzmann, Tom David Müller"
|
|
19
16
|
|
|
17
|
+
from .groups import *
|
|
18
|
+
|
|
20
19
|
from .atoms import *
|
|
21
20
|
from .bonds import *
|
|
21
|
+
from .groups import *
|
|
22
22
|
from .masses import *
|
|
23
23
|
from .misc import *
|
|
24
24
|
from .radii import *
|
|
25
25
|
from .standardize import *
|
|
26
|
-
from .nucleotides import *
|
|
27
|
-
from .amino_acids import *
|
|
28
|
-
from .carbohydrates import *
|
biotite/structure/info/atoms.py
CHANGED
|
@@ -6,36 +6,15 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["residue"]
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
import msgpack
|
|
11
|
-
import numpy as np
|
|
12
|
-
from ..atoms import AtomArray
|
|
13
|
-
from ..bonds import BondList
|
|
9
|
+
from .ccd import get_ccd
|
|
14
10
|
|
|
15
11
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
Since loading the database is computationally expensive,
|
|
24
|
-
this is only done, when the residue database is actually required.
|
|
25
|
-
"""
|
|
26
|
-
global _residues
|
|
27
|
-
if _residues is not None:
|
|
28
|
-
# Database is already initialized
|
|
29
|
-
return
|
|
30
|
-
|
|
31
|
-
# Residue data is taken from
|
|
32
|
-
# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
|
|
33
|
-
# (2019/01/27)
|
|
34
|
-
_info_dir = dirname(realpath(__file__))
|
|
35
|
-
with open(join(_info_dir, "residues.msgpack"), "rb") as file:
|
|
36
|
-
_residues = msgpack.unpack(
|
|
37
|
-
file, use_list=False, raw=False
|
|
38
|
-
)
|
|
12
|
+
non_hetero_residues = set([
|
|
13
|
+
"ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS",
|
|
14
|
+
"ILE","LEU","LYS","MET","PHE","PRO","PYL","SER","THR",
|
|
15
|
+
"TRP","TYR","VAL", "SEC",
|
|
16
|
+
"A", "DA", "G", "DG", "C", "DC", "U", "DT",
|
|
17
|
+
])
|
|
39
18
|
|
|
40
19
|
|
|
41
20
|
def residue(res_name):
|
|
@@ -62,19 +41,19 @@ def residue(res_name):
|
|
|
62
41
|
>>> alanine = residue("ALA")
|
|
63
42
|
>>> # Atoms and geometry
|
|
64
43
|
>>> print(alanine)
|
|
65
|
-
0 ALA N N -0.
|
|
66
|
-
0 ALA CA C 0.
|
|
67
|
-
0 ALA C C -0.
|
|
68
|
-
0 ALA O O -1.
|
|
69
|
-
0 ALA CB C 1.
|
|
70
|
-
0 ALA OXT O 0.
|
|
71
|
-
0 ALA H H -1.
|
|
72
|
-
0 ALA H2 H -0.
|
|
73
|
-
0 ALA HA H 0.
|
|
74
|
-
0 ALA HB1 H 1.
|
|
75
|
-
0 ALA HB2 H 0.
|
|
76
|
-
0 ALA HB3 H 2.
|
|
77
|
-
0 ALA HXT H 0.
|
|
44
|
+
0 ALA N N -0.970 0.490 1.500
|
|
45
|
+
0 ALA CA C 0.260 0.420 0.690
|
|
46
|
+
0 ALA C C -0.090 0.020 -0.720
|
|
47
|
+
0 ALA O O -1.060 -0.680 -0.920
|
|
48
|
+
0 ALA CB C 1.200 -0.620 1.300
|
|
49
|
+
0 ALA OXT O 0.660 0.440 -1.740
|
|
50
|
+
0 ALA H H -1.380 -0.420 1.480
|
|
51
|
+
0 ALA H2 H -0.680 0.660 2.450
|
|
52
|
+
0 ALA HA H 0.750 1.390 0.680
|
|
53
|
+
0 ALA HB1 H 1.460 -0.330 2.320
|
|
54
|
+
0 ALA HB2 H 0.720 -1.590 1.310
|
|
55
|
+
0 ALA HB3 H 2.110 -0.680 0.700
|
|
56
|
+
0 ALA HXT H 0.440 0.180 -2.650
|
|
78
57
|
>>> # Bonds
|
|
79
58
|
>>> print(alanine.atom_name[alanine.bonds.as_array()[:,:2]])
|
|
80
59
|
[['N' 'CA']
|
|
@@ -90,30 +69,14 @@ def residue(res_name):
|
|
|
90
69
|
['CB' 'HB3']
|
|
91
70
|
['OXT' 'HXT']]
|
|
92
71
|
"""
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
array.hetero = array_dict["hetero"]
|
|
105
|
-
|
|
106
|
-
array.coord[:,0] = array_dict["coord_x"]
|
|
107
|
-
array.coord[:,1] = array_dict["coord_y"]
|
|
108
|
-
array.coord[:,2] = array_dict["coord_z"]
|
|
109
|
-
|
|
110
|
-
array.bonds = BondList(
|
|
111
|
-
array.array_length(),
|
|
112
|
-
bonds = np.stack([
|
|
113
|
-
array_dict["bond_i"],
|
|
114
|
-
array_dict["bond_j"],
|
|
115
|
-
array_dict["bond_type"]
|
|
116
|
-
]).T
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
return array
|
|
72
|
+
# Avoid circular import
|
|
73
|
+
from ..io.pdbx import get_component
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
component = get_component(get_ccd(), res_name=res_name)
|
|
77
|
+
except KeyError:
|
|
78
|
+
raise KeyError(
|
|
79
|
+
f"No atom information found for residue '{res_name}' in CCD"
|
|
80
|
+
)
|
|
81
|
+
component.hetero[:] = res_name not in non_hetero_residues
|
|
82
|
+
return component
|
biotite/structure/info/bonds.py
CHANGED
|
@@ -4,98 +4,23 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure.info"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["
|
|
7
|
+
__all__ = ["bond_type", "bonds_in_residue"]
|
|
8
8
|
|
|
9
|
-
import warnings
|
|
10
|
-
import copy
|
|
11
|
-
from os.path import join, dirname, realpath
|
|
12
|
-
import msgpack
|
|
13
9
|
from ..bonds import BondType
|
|
10
|
+
from .ccd import get_from_ccd
|
|
14
11
|
|
|
15
12
|
|
|
16
|
-
|
|
13
|
+
BOND_TYPES = {
|
|
14
|
+
("SING", "N") : BondType.SINGLE,
|
|
15
|
+
("DOUB", "N") : BondType.DOUBLE,
|
|
16
|
+
("TRIP", "N") : BondType.TRIPLE,
|
|
17
|
+
("QUAD", "N") : BondType.QUADRUPLE,
|
|
18
|
+
("SING", "Y") : BondType.AROMATIC_SINGLE,
|
|
19
|
+
("DOUB", "Y") : BondType.AROMATIC_DOUBLE,
|
|
20
|
+
("TRIP", "Y") : BondType.AROMATIC_TRIPLE,
|
|
21
|
+
}
|
|
17
22
|
|
|
18
|
-
|
|
19
|
-
def _init_dataset():
|
|
20
|
-
"""
|
|
21
|
-
Load the bond dataset from MessagePack file.
|
|
22
|
-
|
|
23
|
-
Since loading the database is computationally expensive,
|
|
24
|
-
this is only done, when the bond database is actually required.
|
|
25
|
-
"""
|
|
26
|
-
global _intra_bonds
|
|
27
|
-
if _intra_bonds is not None:
|
|
28
|
-
# Database is already initialized
|
|
29
|
-
return
|
|
30
|
-
|
|
31
|
-
# Bonds are taken from
|
|
32
|
-
# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
|
|
33
|
-
# (2019/01/27)
|
|
34
|
-
_info_dir = dirname(realpath(__file__))
|
|
35
|
-
with open(join(_info_dir, "intra_bonds.msgpack"), "rb") as file:
|
|
36
|
-
_intra_bonds= msgpack.unpack(
|
|
37
|
-
file, use_list=False, raw=False, strict_map_key=False
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def bond_dataset():
|
|
42
|
-
"""
|
|
43
|
-
Get a copy of the complete bond dataset extracted from the chemical
|
|
44
|
-
components dictionary.
|
|
45
|
-
|
|
46
|
-
This dataset does only contain intra-residue bonds.
|
|
47
|
-
|
|
48
|
-
Returns
|
|
49
|
-
-------
|
|
50
|
-
bonds : dict (str -> dict ((str, str) -> int))
|
|
51
|
-
The bonds as nested dictionary.
|
|
52
|
-
It maps residue names (up to 3-letters, upper case) to
|
|
53
|
-
inner dictionaries.
|
|
54
|
-
Each of these dictionary contains the bond information for the
|
|
55
|
-
given residue.
|
|
56
|
-
Specifically, it uses a set of two atom names, that are bonded,
|
|
57
|
-
as keys and the respective :class:`BondType`
|
|
58
|
-
(represented by an integer) as values.
|
|
59
|
-
"""
|
|
60
|
-
_init_dataset()
|
|
61
|
-
return copy.copy(_intra_bonds)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def bond_order(res_name, atom_name1, atom_name2):
|
|
65
|
-
"""
|
|
66
|
-
Get the bond order for two atoms of the same residue, based
|
|
67
|
-
on the PDB chemical components dictionary.
|
|
68
|
-
|
|
69
|
-
DEPRECATED: Please use :func:`bond_type()` instead.
|
|
70
|
-
|
|
71
|
-
Parameters
|
|
72
|
-
----------
|
|
73
|
-
res_name : str
|
|
74
|
-
The up to 3-letter name of the residue
|
|
75
|
-
`atom_name1` and `atom_name2` belong to.
|
|
76
|
-
atom_name1, atom_name2 : str
|
|
77
|
-
The names of the two atoms to get the bond order from.
|
|
78
|
-
|
|
79
|
-
Returns
|
|
80
|
-
-------
|
|
81
|
-
order : int or None
|
|
82
|
-
The order of the bond between `atom_name1` and `atom_name2`.
|
|
83
|
-
If the atoms form no bond, if any of the two atoms does not
|
|
84
|
-
exist in the context of the residue or if the residue is unknown
|
|
85
|
-
to the chemical components dictionary, `None` is returned.
|
|
86
|
-
"""
|
|
87
|
-
warnings.warn("Please use `bond_type()` instead", DeprecationWarning)
|
|
88
|
-
|
|
89
|
-
_init_dataset()
|
|
90
|
-
btype = bond_type(res_name, atom_name1, atom_name2)
|
|
91
|
-
if btype is None:
|
|
92
|
-
return None
|
|
93
|
-
elif btype == BondType.AROMATIC_SINGLE:
|
|
94
|
-
return 1
|
|
95
|
-
elif btype == BondType.AROMATIC_DOUBLE:
|
|
96
|
-
return 2
|
|
97
|
-
else:
|
|
98
|
-
return int(btype)
|
|
23
|
+
_intra_bonds = {}
|
|
99
24
|
|
|
100
25
|
|
|
101
26
|
def bond_type(res_name, atom_name1, atom_name2):
|
|
@@ -110,7 +35,7 @@ def bond_type(res_name, atom_name1, atom_name2):
|
|
|
110
35
|
`atom_name1` and `atom_name2` belong to.
|
|
111
36
|
atom_name1, atom_name2 : str
|
|
112
37
|
The names of the two atoms to get the bond order from.
|
|
113
|
-
|
|
38
|
+
|
|
114
39
|
Returns
|
|
115
40
|
-------
|
|
116
41
|
order : BondType or None
|
|
@@ -119,7 +44,7 @@ def bond_type(res_name, atom_name1, atom_name2):
|
|
|
119
44
|
If the atoms form no bond, if any of the two atoms does not
|
|
120
45
|
exist in the context of the residue or if the residue is unknown
|
|
121
46
|
to the chemical components dictionary, `None` is returned.
|
|
122
|
-
|
|
47
|
+
|
|
123
48
|
Examples
|
|
124
49
|
--------
|
|
125
50
|
|
|
@@ -132,14 +57,13 @@ def bond_type(res_name, atom_name1, atom_name2):
|
|
|
132
57
|
>>> print(bond_type("PHE", "FOO", "BAR"))
|
|
133
58
|
None
|
|
134
59
|
"""
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if group_bonds is None:
|
|
60
|
+
bonds_for_residue = bonds_in_residue(res_name)
|
|
61
|
+
if bonds_for_residue is None:
|
|
138
62
|
return None
|
|
139
|
-
# Try both atom
|
|
140
|
-
bond_type_int =
|
|
63
|
+
# Try both atom orders
|
|
64
|
+
bond_type_int = bonds_for_residue.get(
|
|
141
65
|
(atom_name1, atom_name2),
|
|
142
|
-
|
|
66
|
+
bonds_for_residue.get((atom_name2, atom_name1))
|
|
143
67
|
)
|
|
144
68
|
if bond_type_int is not None:
|
|
145
69
|
return BondType(bond_type_int)
|
|
@@ -156,15 +80,22 @@ def bonds_in_residue(res_name):
|
|
|
156
80
|
----------
|
|
157
81
|
res_name : str
|
|
158
82
|
The up to 3-letter name of the residue to get the bonds for.
|
|
159
|
-
|
|
83
|
+
|
|
160
84
|
Returns
|
|
161
85
|
-------
|
|
162
|
-
bonds : dict (str -> int)
|
|
86
|
+
bonds : dict ((str, str) -> int)
|
|
163
87
|
A dictionary that maps tuples of two atom names to their
|
|
164
88
|
respective bond types (represented as integer).
|
|
165
|
-
|
|
89
|
+
Empty, if the residue is unknown to the
|
|
166
90
|
chemical components dictionary.
|
|
167
|
-
|
|
91
|
+
|
|
92
|
+
Warnings
|
|
93
|
+
--------
|
|
94
|
+
Treat the returned dictionary as immutable.
|
|
95
|
+
Modifying the dictionary may lead to unexpected behavior.
|
|
96
|
+
In other functionalities throughout *Biotite* that uses this
|
|
97
|
+
function.
|
|
98
|
+
|
|
168
99
|
Examples
|
|
169
100
|
--------
|
|
170
101
|
>>> bonds = bonds_in_residue("PHE")
|
|
@@ -195,5 +126,20 @@ def bonds_in_residue(res_name):
|
|
|
195
126
|
H2 + N -> BondType.SINGLE
|
|
196
127
|
HXT + OXT -> BondType.SINGLE
|
|
197
128
|
"""
|
|
198
|
-
|
|
199
|
-
|
|
129
|
+
global _intra_bonds
|
|
130
|
+
if res_name not in _intra_bonds:
|
|
131
|
+
chem_comp_bond_dict = get_from_ccd("chem_comp_bond", res_name)
|
|
132
|
+
if chem_comp_bond_dict is None:
|
|
133
|
+
_intra_bonds[res_name] = {}
|
|
134
|
+
else:
|
|
135
|
+
bonds_for_residue = {}
|
|
136
|
+
for atom1, atom2, order, aromatic_flag in zip(
|
|
137
|
+
chem_comp_bond_dict["atom_id_1"],
|
|
138
|
+
chem_comp_bond_dict["atom_id_2"],
|
|
139
|
+
chem_comp_bond_dict["value_order"],
|
|
140
|
+
chem_comp_bond_dict["pdbx_aromatic_flag"]
|
|
141
|
+
):
|
|
142
|
+
bond_type = BOND_TYPES[order, aromatic_flag]
|
|
143
|
+
bonds_for_residue[atom1.item(), atom2.item()] = bond_type
|
|
144
|
+
_intra_bonds[res_name] = bonds_for_residue
|
|
145
|
+
return _intra_bonds[res_name]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
These files are based on the
|
|
2
|
+
`Chemical Component Dictionary <https://www.wwpdb.org/data/ccd>`_
|
|
3
|
+
and were created using ``setup_ccd.py``.
|
|
4
|
+
|
|
5
|
+
To keep the size of the repository small, the original commit should be
|
|
6
|
+
rewritten, if the formats of the affected files are compatible with the
|
|
7
|
+
original ones.
|
|
8
|
+
The name of the commit is ``Add CCD dataset``.
|