biotite 0.39.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/database/rcsb/download.py +19 -14
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +258 -237
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +243 -222
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.c +215 -196
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpp +233 -205
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.c +258 -237
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.c +235 -214
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.c +255 -234
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.c +274 -253
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.c +215 -196
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.c +217 -197
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.c +215 -195
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +235 -214
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +215 -196
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.c +227 -202
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.c +215 -196
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1437 -1279
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.c +217 -197
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/charges.c +1052 -1101
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +31 -68
- biotite/structure/info/bonds.py +47 -101
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1663 -0
- biotite/structure/info/ccd/carbohydrates.txt +1135 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +798 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +78 -25
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +167 -181
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +219 -198
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.c +217 -197
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.c +225 -204
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.c +215 -196
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +89 -34
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +12 -6
- biotite/structure/io/pdbx/bcif.py +648 -0
- biotite/structure/io/pdbx/cif.py +1032 -0
- biotite/structure/io/pdbx/component.py +246 -0
- biotite/structure/io/pdbx/convert.py +858 -386
- biotite/structure/io/pdbx/encoding.c +112813 -0
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +215 -196
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +618 -116
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["get_ccd", "get_from_ccd"]
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CCD_DIR = Path(__file__).parent / "ccd"
|
|
14
|
+
INDEX_COLUMN_NAME = {
|
|
15
|
+
"chem_comp": "id",
|
|
16
|
+
"chem_comp_atom": "comp_id",
|
|
17
|
+
"chem_comp_bond": "comp_id",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
_ccd_block = None
|
|
21
|
+
# For each category this index gives the start and stop for each residue
|
|
22
|
+
_residue_index = {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_ccd():
|
|
26
|
+
"""
|
|
27
|
+
Get the PDB *Chemical Component Dictionary* (CCD).
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
ccd : BinaryCIFFile
|
|
32
|
+
The CCD.
|
|
33
|
+
"""
|
|
34
|
+
# Avoid circular import
|
|
35
|
+
from ..io.pdbx.bcif import BinaryCIFFile
|
|
36
|
+
|
|
37
|
+
global _ccd_block
|
|
38
|
+
if _ccd_block is None:
|
|
39
|
+
# Load CCD once and cache it for subsequent calls
|
|
40
|
+
_ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
|
|
41
|
+
return _ccd_block
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_from_ccd(category_name, comp_id, column_name=None):
|
|
45
|
+
"""
|
|
46
|
+
Get the rows for the given residue in the given category from the
|
|
47
|
+
PDB *Chemical Component Dictionary* (CCD).
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
category_name : str
|
|
52
|
+
The category in the CCD.
|
|
53
|
+
comp_id : str
|
|
54
|
+
The residue identifier, i.e. the ``res_name``.
|
|
55
|
+
column_name : str, optional
|
|
56
|
+
The name of the column to be retrieved.
|
|
57
|
+
If None, all columns are returned as dictionary.
|
|
58
|
+
By default None.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
value : ndarray or dict or None
|
|
63
|
+
The array of the given column or all columns as dictionary.
|
|
64
|
+
``None`` if the `comp_id` is not found in the category.
|
|
65
|
+
"""
|
|
66
|
+
global _residue_index
|
|
67
|
+
ccd = get_ccd()
|
|
68
|
+
category = ccd[category_name]
|
|
69
|
+
if category_name not in _residue_index:
|
|
70
|
+
_residue_index[category_name] = _index_residues(
|
|
71
|
+
category[INDEX_COLUMN_NAME[category_name]].as_array()
|
|
72
|
+
)
|
|
73
|
+
try:
|
|
74
|
+
start, stop = _residue_index[category_name][comp_id]
|
|
75
|
+
except KeyError:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
if column_name is None:
|
|
79
|
+
return {
|
|
80
|
+
col_name: category[col_name].as_array()[start:stop]
|
|
81
|
+
for col_name in category.keys()
|
|
82
|
+
}
|
|
83
|
+
else:
|
|
84
|
+
return category[column_name].as_array()[start:stop]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _index_residues(id_column):
|
|
88
|
+
residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
|
|
89
|
+
# The final start is the exclusive stop of last residue
|
|
90
|
+
residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
|
|
91
|
+
index = {}
|
|
92
|
+
for i in range(len(residue_starts)-1):
|
|
93
|
+
comp_id = id_column[residue_starts[i]].item()
|
|
94
|
+
index[comp_id] = (residue_starts[i], residue_starts[i+1])
|
|
95
|
+
return index
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Tom David Müller, Patrick Kunzmann"
|
|
7
|
+
__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import copy
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CCD_DIR = Path(__file__).parent / "ccd"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
group_lists = {}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def amino_acid_names():
|
|
20
|
+
"""
|
|
21
|
+
Get a tuple of amino acid three-letter codes according to the
|
|
22
|
+
PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
amino_acid_names : tuple of str
|
|
27
|
+
A list of three-letter-codes containing residues that are
|
|
28
|
+
peptide monomers.
|
|
29
|
+
|
|
30
|
+
Notes
|
|
31
|
+
-----
|
|
32
|
+
|
|
33
|
+
References
|
|
34
|
+
----------
|
|
35
|
+
|
|
36
|
+
.. footbibliography::
|
|
37
|
+
"""
|
|
38
|
+
return _get_group_members("amino_acids")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def nucleotide_names():
|
|
42
|
+
"""
|
|
43
|
+
Get a tuple of nucleotide three-letter codes according to the
|
|
44
|
+
PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
nucleotide_names : tuple of str
|
|
49
|
+
A list of three-letter-codes containing residues that are
|
|
50
|
+
DNA/RNA monomers.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
|
|
55
|
+
References
|
|
56
|
+
----------
|
|
57
|
+
|
|
58
|
+
.. footbibliography::
|
|
59
|
+
"""
|
|
60
|
+
return _get_group_members("nucleotides")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def carbohydrate_names():
|
|
64
|
+
"""
|
|
65
|
+
Get a tuple of carbohydrate three-letter codes according to the
|
|
66
|
+
PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
carbohydrate_names : tuple of str
|
|
71
|
+
A list of three-letter-codes containing residues that are
|
|
72
|
+
saccharide monomers.
|
|
73
|
+
|
|
74
|
+
Notes
|
|
75
|
+
-----
|
|
76
|
+
|
|
77
|
+
References
|
|
78
|
+
----------
|
|
79
|
+
|
|
80
|
+
.. footbibliography::
|
|
81
|
+
"""
|
|
82
|
+
return _get_group_members("carbohydrates")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_group_members(group_name):
|
|
86
|
+
global group_lists
|
|
87
|
+
if group_name not in group_lists:
|
|
88
|
+
with open(CCD_DIR / f"{group_name}.txt", "r") as file:
|
|
89
|
+
group_lists[group_name] = tuple(file.read().split())
|
|
90
|
+
return group_lists[group_name]
|
biotite/structure/info/masses.py
CHANGED
|
@@ -7,20 +7,14 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["mass"]
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
|
-
from
|
|
11
|
-
import msgpack
|
|
10
|
+
from pathlib import Path
|
|
12
11
|
from ..atoms import Atom, AtomArray, AtomArrayStack
|
|
12
|
+
from .ccd import get_from_ccd
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
_info_dir = dirname(realpath(__file__))
|
|
16
15
|
# Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# Masses are taken from
|
|
20
|
-
# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
|
|
21
|
-
# (2019/01/27)
|
|
22
|
-
with open(join(_info_dir, "residue_masses.msgpack"), "rb") as file:
|
|
23
|
-
_res_masses = msgpack.load(file, raw=False)
|
|
16
|
+
ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
|
|
17
|
+
_atom_masses = None
|
|
24
18
|
|
|
25
19
|
|
|
26
20
|
def mass(item, is_residue=None):
|
|
@@ -34,7 +28,7 @@ def mass(item, is_residue=None):
|
|
|
34
28
|
from the molecule.
|
|
35
29
|
For example non-terminal residues in a protein or nucleotide chain
|
|
36
30
|
miss the mass of a water molecule.
|
|
37
|
-
|
|
31
|
+
|
|
38
32
|
Parameters
|
|
39
33
|
----------
|
|
40
34
|
item : str or Atom or AtomArray or AtomArrayStack
|
|
@@ -50,17 +44,17 @@ def mass(item, is_residue=None):
|
|
|
50
44
|
If set to false, the string is strictly interpreted as element.
|
|
51
45
|
By default the string will be interpreted as element at first
|
|
52
46
|
and secondly as residue name, if the element is unknown.
|
|
53
|
-
|
|
47
|
+
|
|
54
48
|
Returns
|
|
55
49
|
-------
|
|
56
50
|
mass : float or None
|
|
57
51
|
The mass of the given object in *u*. None if the mass is unknown.
|
|
58
|
-
|
|
52
|
+
|
|
59
53
|
References
|
|
60
54
|
----------
|
|
61
|
-
|
|
55
|
+
|
|
62
56
|
.. footbibliography::
|
|
63
|
-
|
|
57
|
+
|
|
64
58
|
Examples
|
|
65
59
|
--------
|
|
66
60
|
|
|
@@ -94,29 +88,36 @@ def mass(item, is_residue=None):
|
|
|
94
88
|
>>> print(mass("N"))
|
|
95
89
|
14.007
|
|
96
90
|
"""
|
|
91
|
+
global _atom_masses
|
|
92
|
+
with open(ATOM_MASSES_FILE, "r") as file:
|
|
93
|
+
_atom_masses = json.load(file)
|
|
97
94
|
|
|
98
95
|
if isinstance(item, str):
|
|
99
96
|
if is_residue is None:
|
|
100
97
|
result_mass = _atom_masses.get(item.upper())
|
|
101
98
|
if result_mass is None:
|
|
102
|
-
result_mass =
|
|
99
|
+
result_mass = get_from_ccd(
|
|
100
|
+
"chem_comp", item.upper(), "formula_weight"
|
|
101
|
+
).item()
|
|
103
102
|
elif not is_residue:
|
|
104
103
|
result_mass = _atom_masses.get(item.upper())
|
|
105
104
|
else:
|
|
106
|
-
result_mass =
|
|
107
|
-
|
|
105
|
+
result_mass = get_from_ccd(
|
|
106
|
+
"chem_comp", item.upper(), "formula_weight"
|
|
107
|
+
).item()
|
|
108
|
+
|
|
108
109
|
elif isinstance(item, Atom):
|
|
109
110
|
result_mass = mass(item.element, is_residue=False)
|
|
110
111
|
elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
|
|
111
112
|
result_mass = sum(
|
|
112
113
|
(mass(element, is_residue=False) for element in item.element)
|
|
113
114
|
)
|
|
114
|
-
|
|
115
|
+
|
|
115
116
|
else:
|
|
116
117
|
raise TypeError(
|
|
117
118
|
f"Cannot calculate mass for {type(item).__name__} objects"
|
|
118
119
|
)
|
|
119
|
-
|
|
120
|
+
|
|
120
121
|
if result_mass is None:
|
|
121
122
|
raise KeyError(f"{item} is not known")
|
|
122
123
|
return result_mass
|
biotite/structure/info/misc.py
CHANGED
|
@@ -4,39 +4,28 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure.info"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["all_residues", "full_name", "link_type"]
|
|
7
|
+
__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
import msgpack
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
_info_dir = dirname(realpath(__file__))
|
|
14
|
-
# Data is taken from
|
|
15
|
-
# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
|
|
16
|
-
# (2019/01/27)
|
|
17
|
-
with open(join(_info_dir, "residue_names.msgpack"), "rb") as file:
|
|
18
|
-
_res_names = msgpack.load(file, raw=False)
|
|
19
|
-
with open(join(_info_dir, "link_types.msgpack"), "rb") as file:
|
|
20
|
-
_link_types = msgpack.load(file, raw=False)
|
|
9
|
+
from .ccd import get_ccd, get_from_ccd
|
|
21
10
|
|
|
22
11
|
|
|
23
12
|
def all_residues():
|
|
24
13
|
"""
|
|
25
14
|
Get a list of all residues/compound names in the
|
|
26
15
|
PDB chemical components dictionary.
|
|
27
|
-
|
|
16
|
+
|
|
28
17
|
Returns
|
|
29
18
|
-------
|
|
30
19
|
residues : list of str
|
|
31
20
|
A list of all available The up to 3-letter residue names.
|
|
32
|
-
|
|
21
|
+
|
|
33
22
|
Examples
|
|
34
23
|
--------
|
|
35
24
|
|
|
36
25
|
>>> print(all_residues()[1000 : 1010])
|
|
37
|
-
['
|
|
26
|
+
['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
|
|
38
27
|
"""
|
|
39
|
-
return
|
|
28
|
+
return get_ccd()["chem_comp"]["id"].as_array().tolist()
|
|
40
29
|
|
|
41
30
|
|
|
42
31
|
def full_name(res_name):
|
|
@@ -48,19 +37,24 @@ def full_name(res_name):
|
|
|
48
37
|
----------
|
|
49
38
|
res_name : str
|
|
50
39
|
The up to 3-letter residue name.
|
|
51
|
-
|
|
40
|
+
|
|
52
41
|
Returns
|
|
53
42
|
-------
|
|
54
|
-
name : str
|
|
43
|
+
name : str or None
|
|
55
44
|
The full name of the residue.
|
|
56
|
-
|
|
45
|
+
If the residue is unknown to the chemical components dictionary,
|
|
46
|
+
``None`` is returned.
|
|
47
|
+
|
|
57
48
|
Examples
|
|
58
49
|
--------
|
|
59
50
|
|
|
60
51
|
>>> print(full_name("MAN"))
|
|
61
52
|
alpha-D-mannopyranose
|
|
62
53
|
"""
|
|
63
|
-
|
|
54
|
+
array = get_from_ccd("chem_comp", res_name.upper(), "name")
|
|
55
|
+
if array is None:
|
|
56
|
+
return None
|
|
57
|
+
return array.item()
|
|
64
58
|
|
|
65
59
|
|
|
66
60
|
def link_type(res_name):
|
|
@@ -72,12 +66,14 @@ def link_type(res_name):
|
|
|
72
66
|
----------
|
|
73
67
|
res_name : str
|
|
74
68
|
The up to 3-letter residue name.
|
|
75
|
-
|
|
69
|
+
|
|
76
70
|
Returns
|
|
77
71
|
-------
|
|
78
|
-
link_type : str
|
|
72
|
+
link_type : str or None
|
|
79
73
|
The link type.
|
|
80
|
-
|
|
74
|
+
If the residue is unknown to the chemical components dictionary,
|
|
75
|
+
``None`` is returned.
|
|
76
|
+
|
|
81
77
|
Examples
|
|
82
78
|
--------
|
|
83
79
|
|
|
@@ -88,4 +84,61 @@ def link_type(res_name):
|
|
|
88
84
|
>>> print(link_type("HOH"))
|
|
89
85
|
NON-POLYMER
|
|
90
86
|
"""
|
|
91
|
-
|
|
87
|
+
array = get_from_ccd("chem_comp", res_name.upper(), "type")
|
|
88
|
+
if array is None:
|
|
89
|
+
return None
|
|
90
|
+
return array.item()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def one_letter_code(res_name):
|
|
94
|
+
"""
|
|
95
|
+
Get the one-letter code of a residue/compound,
|
|
96
|
+
based on the PDB chemical components dictionary.
|
|
97
|
+
|
|
98
|
+
The one-letter code is only defined for amino acids and nucleotides
|
|
99
|
+
and for compounds that are structurally similar to them.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
res_name : str
|
|
104
|
+
The up to 3-letter residue name.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
one_letter_code : str or None
|
|
109
|
+
The one-letter code.
|
|
110
|
+
None if the compound is not present in the CCD or if no
|
|
111
|
+
one-letter code is defined for this compound.
|
|
112
|
+
|
|
113
|
+
Examples
|
|
114
|
+
--------
|
|
115
|
+
|
|
116
|
+
Get the one letter code for an amino acid (or a nucleotide).
|
|
117
|
+
|
|
118
|
+
>>> print(full_name("ALA"))
|
|
119
|
+
ALANINE
|
|
120
|
+
>>> print(one_letter_code("ALA"))
|
|
121
|
+
A
|
|
122
|
+
|
|
123
|
+
For similar compounds, the one-letter code is also defined.
|
|
124
|
+
|
|
125
|
+
>>> print(full_name("DAL"))
|
|
126
|
+
D-ALANINE
|
|
127
|
+
>>> print(one_letter_code("DAL"))
|
|
128
|
+
A
|
|
129
|
+
|
|
130
|
+
For other compounds, the one-letter code is not defined.
|
|
131
|
+
|
|
132
|
+
>>> print(full_name("MAN"))
|
|
133
|
+
alpha-D-mannopyranose
|
|
134
|
+
>>> print(one_letter_code("MAN"))
|
|
135
|
+
None
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
|
|
139
|
+
if array is None:
|
|
140
|
+
return None
|
|
141
|
+
item = array.item()
|
|
142
|
+
if item == "":
|
|
143
|
+
return None
|
|
144
|
+
return item
|
|
@@ -6,15 +6,13 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["standardize_order"]
|
|
8
8
|
|
|
9
|
+
import warnings
|
|
9
10
|
import numpy as np
|
|
10
|
-
from .
|
|
11
|
+
from .ccd import get_from_ccd
|
|
11
12
|
from ..residues import get_residue_starts
|
|
12
13
|
from ..error import BadStructureError
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
_atom_name_cache = {}
|
|
16
|
-
|
|
17
|
-
|
|
18
16
|
def standardize_order(atoms):
|
|
19
17
|
"""
|
|
20
18
|
Get an index array for an input :class:`AtomArray` or
|
|
@@ -34,20 +32,20 @@ def standardize_order(atoms):
|
|
|
34
32
|
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
35
33
|
Input structure with atoms that are potentially not in the
|
|
36
34
|
*standard* order.
|
|
37
|
-
|
|
35
|
+
|
|
38
36
|
Returns
|
|
39
37
|
-------
|
|
40
38
|
indices : ndarray, dtype=int, shape=(n,)
|
|
41
39
|
When this index array is applied on the input `atoms`,
|
|
42
40
|
the atoms for each residue are reordered to obtain the
|
|
43
41
|
standard *RCSB PDB* atom order.
|
|
44
|
-
|
|
42
|
+
|
|
45
43
|
Raises
|
|
46
44
|
------
|
|
47
45
|
BadStructureError
|
|
48
46
|
If the input `atoms` have duplicate atoms (same atom name)
|
|
49
47
|
within a residue.
|
|
50
|
-
|
|
48
|
+
|
|
51
49
|
Examples
|
|
52
50
|
--------
|
|
53
51
|
|
|
@@ -123,11 +121,18 @@ def standardize_order(atoms):
|
|
|
123
121
|
stop = starts[i+1]
|
|
124
122
|
|
|
125
123
|
res_name = atoms.res_name[start]
|
|
126
|
-
standard_atom_names =
|
|
124
|
+
standard_atom_names = get_from_ccd(
|
|
125
|
+
"chem_comp_atom", res_name, "atom_id"
|
|
126
|
+
)
|
|
127
127
|
if standard_atom_names is None:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
# If the residue is not in the CCD, keep the current order
|
|
129
|
+
warnings.warn(
|
|
130
|
+
f"Residue '{res_name}' is not in the CCD, "
|
|
131
|
+
f"keeping current atom order"
|
|
132
|
+
)
|
|
133
|
+
reordered_indices[start : stop] = np.arange(start, stop)
|
|
134
|
+
continue
|
|
135
|
+
|
|
131
136
|
reordered_indices[start : stop] = _reorder(
|
|
132
137
|
atoms.atom_name[start : stop], standard_atom_names
|
|
133
138
|
) + start
|
|
@@ -152,7 +157,7 @@ def _reorder(origin, target):
|
|
|
152
157
|
The atom names to reorder.
|
|
153
158
|
target : ndarray, dtype=str
|
|
154
159
|
The atom names in target order.
|
|
155
|
-
|
|
160
|
+
|
|
156
161
|
Returns
|
|
157
162
|
-------
|
|
158
163
|
indices : ndarray, dtype=int
|
biotite/structure/integrity.py
CHANGED
|
@@ -12,7 +12,7 @@ __author__ = "Patrick Kunzmann, Daniel Bauer"
|
|
|
12
12
|
__all__ = ["check_id_continuity", "check_atom_id_continuity",
|
|
13
13
|
"check_res_id_continuity", "check_backbone_continuity",
|
|
14
14
|
"check_duplicate_atoms", "check_bond_continuity",
|
|
15
|
-
"check_linear_continuity"
|
|
15
|
+
"check_linear_continuity"]
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import warnings
|
|
@@ -32,17 +32,17 @@ def check_id_continuity(array):
|
|
|
32
32
|
"""
|
|
33
33
|
Check if the residue IDs are incremented by more than 1 or
|
|
34
34
|
decremented, from one atom to the next one.
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
An increment by more than 1 is as strong clue for missing residues,
|
|
37
37
|
a decrement means probably a start of a new chain.
|
|
38
38
|
|
|
39
39
|
DEPRECATED: Use :func:`check_res_id_continuity()` instead.
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
Parameters
|
|
42
42
|
----------
|
|
43
43
|
array : AtomArray or AtomArrayStack
|
|
44
44
|
The array to be checked.
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
Returns
|
|
47
47
|
-------
|
|
48
48
|
discontinuity : ndarray, dtype=int
|
|
@@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
|
|
|
60
60
|
"""
|
|
61
61
|
Check if the atom IDs are incremented by more than 1 or
|
|
62
62
|
decremented, from one atom to the next one.
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
An increment by more than 1 is as strong clue for missing atoms.
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
Parameters
|
|
67
67
|
----------
|
|
68
68
|
array : AtomArray or AtomArrayStack
|
|
69
69
|
The array to be checked.
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
Returns
|
|
72
72
|
-------
|
|
73
73
|
discontinuity : ndarray, dtype=int
|
|
@@ -81,15 +81,15 @@ def check_res_id_continuity(array):
|
|
|
81
81
|
"""
|
|
82
82
|
Check if the residue IDs are incremented by more than 1 or
|
|
83
83
|
decremented, from one atom to the next one.
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
An increment by more than 1 is as strong clue for missing residues,
|
|
86
86
|
a decrement means probably a start of a new chain.
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
Parameters
|
|
89
89
|
----------
|
|
90
90
|
array : AtomArray or AtomArrayStack
|
|
91
91
|
The array to be checked.
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
Returns
|
|
94
94
|
-------
|
|
95
95
|
discontinuity : ndarray, dtype=int
|
|
@@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
168
168
|
"""
|
|
169
169
|
Check if the (peptide or phosphate) backbone atoms have
|
|
170
170
|
non-reasonable distance to the next atom.
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
A large or very small distance is a very strong clue, that there is
|
|
173
173
|
no bond between those atoms, therefore the chain is discontinued.
|
|
174
174
|
|
|
@@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
|
|
|
206
206
|
"""
|
|
207
207
|
Check if a structure contains duplicate atoms, i.e. two atoms in a
|
|
208
208
|
structure have the same annotations (coordinates may be different).
|
|
209
|
-
|
|
209
|
+
|
|
210
210
|
Duplicate atoms may appear, when a structure has occupancy for an
|
|
211
211
|
atom at two or more positions or when the *altloc* positions are
|
|
212
212
|
improperly read.
|
|
213
|
-
|
|
213
|
+
|
|
214
214
|
Parameters
|
|
215
215
|
----------
|
|
216
216
|
array : AtomArray or AtomArrayStack
|
|
217
217
|
The array to be checked.
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
Returns
|
|
220
220
|
-------
|
|
221
221
|
duplicate : ndarray, dtype=int
|
|
@@ -228,16 +228,16 @@ def check_duplicate_atoms(array):
|
|
|
228
228
|
for i in range(1, array.array_length()):
|
|
229
229
|
# Start with assumption that all atoms in the array
|
|
230
230
|
# until index i are duplicates of the atom at index i
|
|
231
|
-
|
|
231
|
+
is_duplicate = np.full(i, True, dtype=bool)
|
|
232
232
|
for annot in annots:
|
|
233
233
|
# For each annotation array filter out the atoms until
|
|
234
234
|
# index i that have an unequal annotation
|
|
235
|
-
# to the atom at index i
|
|
236
|
-
|
|
235
|
+
# to the atom at index i
|
|
236
|
+
is_duplicate &= (annot[:i] == annot[i])
|
|
237
237
|
# After checking all annotation arrays,
|
|
238
238
|
# if there still is any duplicate to the atom at index i,
|
|
239
239
|
# add i the the list of duplicate atom indices
|
|
240
|
-
if
|
|
240
|
+
if is_duplicate.any():
|
|
241
241
|
duplicates.append(i)
|
|
242
242
|
return np.array(duplicates)
|
|
243
243
|
|
|
@@ -255,7 +255,7 @@ def check_in_box(array):
|
|
|
255
255
|
----------
|
|
256
256
|
array : AtomArray or AtomArrayStack
|
|
257
257
|
The array to be checked.
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
Returns
|
|
260
260
|
-------
|
|
261
261
|
outside : ndarray, dtype=int
|
|
@@ -266,54 +266,3 @@ def check_in_box(array):
|
|
|
266
266
|
box = array.box
|
|
267
267
|
fractions = coord_to_fraction(array, box)
|
|
268
268
|
return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def renumber_atom_ids(array, start=None):
|
|
272
|
-
"""
|
|
273
|
-
Renumber the atom IDs of the given array.
|
|
274
|
-
|
|
275
|
-
Parameters
|
|
276
|
-
----------
|
|
277
|
-
array : AtomArray or AtomArrayStack
|
|
278
|
-
The array to be checked.
|
|
279
|
-
start : int, optional
|
|
280
|
-
The starting index for renumbering.
|
|
281
|
-
The first ID in the array is taken by default.
|
|
282
|
-
|
|
283
|
-
Returns
|
|
284
|
-
-------
|
|
285
|
-
array : AtomArray or AtomArrayStack
|
|
286
|
-
The renumbered array.
|
|
287
|
-
"""
|
|
288
|
-
if "atom_id" not in array.get_annotation_categories():
|
|
289
|
-
raise ValueError("The atom array must have the 'atom_id' annotation")
|
|
290
|
-
if start is None:
|
|
291
|
-
start = array.atom_id[0]
|
|
292
|
-
array.atom_id = np.arange(start, array.shape[-1]+1)
|
|
293
|
-
return array
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
def renumber_res_ids(array, start=None):
|
|
297
|
-
"""
|
|
298
|
-
Renumber the residue IDs of the given array.
|
|
299
|
-
|
|
300
|
-
Parameters
|
|
301
|
-
----------
|
|
302
|
-
array : AtomArray or AtomArrayStack
|
|
303
|
-
The array to be checked.
|
|
304
|
-
start : int, optional
|
|
305
|
-
The starting index for renumbering.
|
|
306
|
-
The first ID in the array is taken by default.
|
|
307
|
-
|
|
308
|
-
Returns
|
|
309
|
-
-------
|
|
310
|
-
array : AtomArray or AtomArrayStack
|
|
311
|
-
The renumbered array.
|
|
312
|
-
"""
|
|
313
|
-
if start is None:
|
|
314
|
-
start = array.res_id[0]
|
|
315
|
-
diff = np.diff(array.res_id)
|
|
316
|
-
diff[diff != 0] = 1
|
|
317
|
-
new_res_ids = np.concatenate(([start], diff)).cumsum()
|
|
318
|
-
array.res_id = new_res_ids
|
|
319
|
-
return array
|