biotite 0.39.0__cp312-cp312-win_amd64.whl → 0.40.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/rcsb/download.py +19 -14
- biotite/sequence/align/banded.c +256 -235
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.c +241 -220
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.c +213 -194
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cpp +231 -203
- biotite/sequence/align/localgapped.c +256 -235
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.c +233 -212
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.c +253 -232
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.c +272 -251
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/permutation.c +213 -194
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.c +215 -195
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.c +213 -193
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codec.c +233 -212
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.c +213 -194
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.c +225 -200
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.c +213 -194
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1173 -1224
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.c +215 -195
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/charges.c +1050 -1099
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +25 -67
- biotite/structure/info/bonds.py +46 -100
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1646 -0
- biotite/structure/info/ccd/carbohydrates.txt +1133 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +797 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +11 -22
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +1 -1
- biotite/structure/io/general.py +37 -43
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +217 -196
- biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/convertfile.c +215 -195
- biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/decode.c +223 -202
- biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/encode.c +213 -194
- biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +5 -3
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +13 -6
- biotite/structure/io/pdbx/bcif.py +649 -0
- biotite/structure/io/pdbx/cif.py +1028 -0
- biotite/structure/io/pdbx/component.py +243 -0
- biotite/structure/io/pdbx/convert.py +707 -359
- biotite/structure/io/pdbx/encoding.c +112803 -0
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/error.py +14 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/sasa.c +213 -194
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/superimpose.py +158 -115
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/RECORD +92 -90
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["get_ccd", "get_from_ccd"]
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CCD_DIR = Path(__file__).parent / "ccd"
|
|
14
|
+
INDEX_COLUMN_NAME = {
|
|
15
|
+
"chem_comp": "id",
|
|
16
|
+
"chem_comp_atom": "comp_id",
|
|
17
|
+
"chem_comp_bond": "comp_id",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
_ccd_block = None
|
|
21
|
+
# For each category this index gives the start and stop for each residue
|
|
22
|
+
_residue_index = {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_ccd():
|
|
26
|
+
"""
|
|
27
|
+
Get the PDB *Chemical Component Dictionary* (CCD).
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
ccd : BinaryCIFFile
|
|
32
|
+
The CCD.
|
|
33
|
+
"""
|
|
34
|
+
# Avoid circular import
|
|
35
|
+
from ..io.pdbx.bcif import BinaryCIFFile
|
|
36
|
+
|
|
37
|
+
global _ccd_block
|
|
38
|
+
if _ccd_block is None:
|
|
39
|
+
# Load CCD once and cache it for subsequent calls
|
|
40
|
+
_ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
|
|
41
|
+
return _ccd_block
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_from_ccd(category_name, comp_id, column_name=None):
|
|
45
|
+
"""
|
|
46
|
+
Get the rows for the given residue in the given category from the
|
|
47
|
+
PDB *Chemical Component Dictionary* (CCD).
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
category_name : str
|
|
52
|
+
The category in the CCD.
|
|
53
|
+
comp_id : str
|
|
54
|
+
The residue identifier, i.e. the ``res_name``.
|
|
55
|
+
column_name : str, optional
|
|
56
|
+
The name of the column to be retrieved.
|
|
57
|
+
If None, all columns are returned as dictionary.
|
|
58
|
+
By default None.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
value : ndarray or dict or None
|
|
63
|
+
The array of the given column or all columns as dictionary.
|
|
64
|
+
``None`` if the `comp_id` is not found in the category.
|
|
65
|
+
"""
|
|
66
|
+
global _residue_index
|
|
67
|
+
ccd = get_ccd()
|
|
68
|
+
category = ccd[category_name]
|
|
69
|
+
if category_name not in _residue_index:
|
|
70
|
+
_residue_index[category_name] = _index_residues(
|
|
71
|
+
category[INDEX_COLUMN_NAME[category_name]].as_array()
|
|
72
|
+
)
|
|
73
|
+
try:
|
|
74
|
+
start, stop = _residue_index[category_name][comp_id]
|
|
75
|
+
except KeyError:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
if column_name is None:
|
|
79
|
+
return {
|
|
80
|
+
col_name: category[col_name].as_array()[start:stop]
|
|
81
|
+
for col_name in category.keys()
|
|
82
|
+
}
|
|
83
|
+
else:
|
|
84
|
+
return category[column_name].as_array()[start:stop]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _index_residues(id_column):
|
|
88
|
+
residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
|
|
89
|
+
# The final start is the exclusive stop of last residue
|
|
90
|
+
residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
|
|
91
|
+
index = {}
|
|
92
|
+
for i in range(len(residue_starts)-1):
|
|
93
|
+
comp_id = id_column[residue_starts[i]].item()
|
|
94
|
+
index[comp_id] = (residue_starts[i], residue_starts[i+1])
|
|
95
|
+
return index
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.structure.info"
|
|
6
|
+
__author__ = "Tom David Müller, Patrick Kunzmann"
|
|
7
|
+
__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import copy
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CCD_DIR = Path(__file__).parent / "ccd"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
group_lists = {}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def amino_acid_names():
|
|
20
|
+
"""
|
|
21
|
+
Get a tuple of amino acid three-letter codes according to the
|
|
22
|
+
PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
amino_acid_names : tuple of str
|
|
27
|
+
A list of three-letter-codes containing residues that are
|
|
28
|
+
peptide monomers.
|
|
29
|
+
|
|
30
|
+
Notes
|
|
31
|
+
-----
|
|
32
|
+
|
|
33
|
+
References
|
|
34
|
+
----------
|
|
35
|
+
|
|
36
|
+
.. footbibliography::
|
|
37
|
+
"""
|
|
38
|
+
return _get_group_members("amino_acids")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def nucleotide_names():
|
|
42
|
+
"""
|
|
43
|
+
Get a tuple of nucleotide three-letter codes according to the
|
|
44
|
+
PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
nucleotide_names : tuple of str
|
|
49
|
+
A list of three-letter-codes containing residues that are
|
|
50
|
+
DNA/RNA monomers.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
|
|
55
|
+
References
|
|
56
|
+
----------
|
|
57
|
+
|
|
58
|
+
.. footbibliography::
|
|
59
|
+
"""
|
|
60
|
+
return _get_group_members("nucleotides")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def carbohydrate_names():
|
|
64
|
+
"""
|
|
65
|
+
Get a tuple of carbohydrate three-letter codes according to the
|
|
66
|
+
PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
carbohydrate_names : tuple of str
|
|
71
|
+
A list of three-letter-codes containing residues that are
|
|
72
|
+
saccharide monomers.
|
|
73
|
+
|
|
74
|
+
Notes
|
|
75
|
+
-----
|
|
76
|
+
|
|
77
|
+
References
|
|
78
|
+
----------
|
|
79
|
+
|
|
80
|
+
.. footbibliography::
|
|
81
|
+
"""
|
|
82
|
+
return _get_group_members("carbohydrates")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_group_members(group_name):
|
|
86
|
+
global group_lists
|
|
87
|
+
if group_name not in group_lists:
|
|
88
|
+
with open(CCD_DIR / f"{group_name}.txt", "r") as file:
|
|
89
|
+
group_lists[group_name] = tuple(file.read().split())
|
|
90
|
+
return group_lists[group_name]
|
biotite/structure/info/masses.py
CHANGED
|
@@ -7,20 +7,14 @@ __author__ = "Patrick Kunzmann"
|
|
|
7
7
|
__all__ = ["mass"]
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
|
-
from
|
|
11
|
-
import msgpack
|
|
10
|
+
from pathlib import Path
|
|
12
11
|
from ..atoms import Atom, AtomArray, AtomArrayStack
|
|
12
|
+
from .ccd import get_from_ccd
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
_info_dir = dirname(realpath(__file__))
|
|
16
15
|
# Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# Masses are taken from
|
|
20
|
-
# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
|
|
21
|
-
# (2019/01/27)
|
|
22
|
-
with open(join(_info_dir, "residue_masses.msgpack"), "rb") as file:
|
|
23
|
-
_res_masses = msgpack.load(file, raw=False)
|
|
16
|
+
ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
|
|
17
|
+
_atom_masses = None
|
|
24
18
|
|
|
25
19
|
|
|
26
20
|
def mass(item, is_residue=None):
|
|
@@ -34,7 +28,7 @@ def mass(item, is_residue=None):
|
|
|
34
28
|
from the molecule.
|
|
35
29
|
For example non-terminal residues in a protein or nucleotide chain
|
|
36
30
|
miss the mass of a water molecule.
|
|
37
|
-
|
|
31
|
+
|
|
38
32
|
Parameters
|
|
39
33
|
----------
|
|
40
34
|
item : str or Atom or AtomArray or AtomArrayStack
|
|
@@ -50,17 +44,17 @@ def mass(item, is_residue=None):
|
|
|
50
44
|
If set to false, the string is strictly interpreted as element.
|
|
51
45
|
By default the string will be interpreted as element at first
|
|
52
46
|
and secondly as residue name, if the element is unknown.
|
|
53
|
-
|
|
47
|
+
|
|
54
48
|
Returns
|
|
55
49
|
-------
|
|
56
50
|
mass : float or None
|
|
57
51
|
The mass of the given object in *u*. None if the mass is unknown.
|
|
58
|
-
|
|
52
|
+
|
|
59
53
|
References
|
|
60
54
|
----------
|
|
61
|
-
|
|
55
|
+
|
|
62
56
|
.. footbibliography::
|
|
63
|
-
|
|
57
|
+
|
|
64
58
|
Examples
|
|
65
59
|
--------
|
|
66
60
|
|
|
@@ -94,29 +88,36 @@ def mass(item, is_residue=None):
|
|
|
94
88
|
>>> print(mass("N"))
|
|
95
89
|
14.007
|
|
96
90
|
"""
|
|
91
|
+
global _atom_masses
|
|
92
|
+
with open(ATOM_MASSES_FILE, "r") as file:
|
|
93
|
+
_atom_masses = json.load(file)
|
|
97
94
|
|
|
98
95
|
if isinstance(item, str):
|
|
99
96
|
if is_residue is None:
|
|
100
97
|
result_mass = _atom_masses.get(item.upper())
|
|
101
98
|
if result_mass is None:
|
|
102
|
-
result_mass =
|
|
99
|
+
result_mass = get_from_ccd(
|
|
100
|
+
"chem_comp", item.upper(), "formula_weight"
|
|
101
|
+
).item()
|
|
103
102
|
elif not is_residue:
|
|
104
103
|
result_mass = _atom_masses.get(item.upper())
|
|
105
104
|
else:
|
|
106
|
-
result_mass =
|
|
107
|
-
|
|
105
|
+
result_mass = get_from_ccd(
|
|
106
|
+
"chem_comp", item.upper(), "formula_weight"
|
|
107
|
+
).item()
|
|
108
|
+
|
|
108
109
|
elif isinstance(item, Atom):
|
|
109
110
|
result_mass = mass(item.element, is_residue=False)
|
|
110
111
|
elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
|
|
111
112
|
result_mass = sum(
|
|
112
113
|
(mass(element, is_residue=False) for element in item.element)
|
|
113
114
|
)
|
|
114
|
-
|
|
115
|
+
|
|
115
116
|
else:
|
|
116
117
|
raise TypeError(
|
|
117
118
|
f"Cannot calculate mass for {type(item).__name__} objects"
|
|
118
119
|
)
|
|
119
|
-
|
|
120
|
+
|
|
120
121
|
if result_mass is None:
|
|
121
122
|
raise KeyError(f"{item} is not known")
|
|
122
123
|
return result_mass
|
biotite/structure/info/misc.py
CHANGED
|
@@ -6,37 +6,26 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["all_residues", "full_name", "link_type"]
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
import msgpack
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
_info_dir = dirname(realpath(__file__))
|
|
14
|
-
# Data is taken from
|
|
15
|
-
# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
|
|
16
|
-
# (2019/01/27)
|
|
17
|
-
with open(join(_info_dir, "residue_names.msgpack"), "rb") as file:
|
|
18
|
-
_res_names = msgpack.load(file, raw=False)
|
|
19
|
-
with open(join(_info_dir, "link_types.msgpack"), "rb") as file:
|
|
20
|
-
_link_types = msgpack.load(file, raw=False)
|
|
9
|
+
from .ccd import get_ccd, get_from_ccd
|
|
21
10
|
|
|
22
11
|
|
|
23
12
|
def all_residues():
|
|
24
13
|
"""
|
|
25
14
|
Get a list of all residues/compound names in the
|
|
26
15
|
PDB chemical components dictionary.
|
|
27
|
-
|
|
16
|
+
|
|
28
17
|
Returns
|
|
29
18
|
-------
|
|
30
19
|
residues : list of str
|
|
31
20
|
A list of all available The up to 3-letter residue names.
|
|
32
|
-
|
|
21
|
+
|
|
33
22
|
Examples
|
|
34
23
|
--------
|
|
35
24
|
|
|
36
25
|
>>> print(all_residues()[1000 : 1010])
|
|
37
|
-
['
|
|
26
|
+
['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
|
|
38
27
|
"""
|
|
39
|
-
return
|
|
28
|
+
return get_ccd()["chem_comp"]["id"].as_array().tolist()
|
|
40
29
|
|
|
41
30
|
|
|
42
31
|
def full_name(res_name):
|
|
@@ -48,19 +37,19 @@ def full_name(res_name):
|
|
|
48
37
|
----------
|
|
49
38
|
res_name : str
|
|
50
39
|
The up to 3-letter residue name.
|
|
51
|
-
|
|
40
|
+
|
|
52
41
|
Returns
|
|
53
42
|
-------
|
|
54
43
|
name : str
|
|
55
44
|
The full name of the residue.
|
|
56
|
-
|
|
45
|
+
|
|
57
46
|
Examples
|
|
58
47
|
--------
|
|
59
48
|
|
|
60
49
|
>>> print(full_name("MAN"))
|
|
61
50
|
alpha-D-mannopyranose
|
|
62
51
|
"""
|
|
63
|
-
return
|
|
52
|
+
return get_from_ccd("chem_comp", res_name.upper(), "name").item()
|
|
64
53
|
|
|
65
54
|
|
|
66
55
|
def link_type(res_name):
|
|
@@ -72,12 +61,12 @@ def link_type(res_name):
|
|
|
72
61
|
----------
|
|
73
62
|
res_name : str
|
|
74
63
|
The up to 3-letter residue name.
|
|
75
|
-
|
|
64
|
+
|
|
76
65
|
Returns
|
|
77
66
|
-------
|
|
78
67
|
link_type : str
|
|
79
68
|
The link type.
|
|
80
|
-
|
|
69
|
+
|
|
81
70
|
Examples
|
|
82
71
|
--------
|
|
83
72
|
|
|
@@ -88,4 +77,4 @@ def link_type(res_name):
|
|
|
88
77
|
>>> print(link_type("HOH"))
|
|
89
78
|
NON-POLYMER
|
|
90
79
|
"""
|
|
91
|
-
return
|
|
80
|
+
return get_from_ccd("chem_comp", res_name.upper(), "type").item()
|
|
@@ -6,15 +6,13 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["standardize_order"]
|
|
8
8
|
|
|
9
|
+
import warnings
|
|
9
10
|
import numpy as np
|
|
10
|
-
from .
|
|
11
|
+
from .ccd import get_from_ccd
|
|
11
12
|
from ..residues import get_residue_starts
|
|
12
13
|
from ..error import BadStructureError
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
_atom_name_cache = {}
|
|
16
|
-
|
|
17
|
-
|
|
18
16
|
def standardize_order(atoms):
|
|
19
17
|
"""
|
|
20
18
|
Get an index array for an input :class:`AtomArray` or
|
|
@@ -34,20 +32,20 @@ def standardize_order(atoms):
|
|
|
34
32
|
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
|
|
35
33
|
Input structure with atoms that are potentially not in the
|
|
36
34
|
*standard* order.
|
|
37
|
-
|
|
35
|
+
|
|
38
36
|
Returns
|
|
39
37
|
-------
|
|
40
38
|
indices : ndarray, dtype=int, shape=(n,)
|
|
41
39
|
When this index array is applied on the input `atoms`,
|
|
42
40
|
the atoms for each residue are reordered to obtain the
|
|
43
41
|
standard *RCSB PDB* atom order.
|
|
44
|
-
|
|
42
|
+
|
|
45
43
|
Raises
|
|
46
44
|
------
|
|
47
45
|
BadStructureError
|
|
48
46
|
If the input `atoms` have duplicate atoms (same atom name)
|
|
49
47
|
within a residue.
|
|
50
|
-
|
|
48
|
+
|
|
51
49
|
Examples
|
|
52
50
|
--------
|
|
53
51
|
|
|
@@ -123,11 +121,18 @@ def standardize_order(atoms):
|
|
|
123
121
|
stop = starts[i+1]
|
|
124
122
|
|
|
125
123
|
res_name = atoms.res_name[start]
|
|
126
|
-
standard_atom_names =
|
|
124
|
+
standard_atom_names = get_from_ccd(
|
|
125
|
+
"chem_comp_atom", res_name, "atom_id"
|
|
126
|
+
)
|
|
127
127
|
if standard_atom_names is None:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
# If the residue is not in the CCD, keep the current order
|
|
129
|
+
warnings.warn(
|
|
130
|
+
f"Residue '{res_name}' is not in the CCD, "
|
|
131
|
+
f"keeping current atom order"
|
|
132
|
+
)
|
|
133
|
+
reordered_indices[start : stop] = np.arange(start, stop)
|
|
134
|
+
continue
|
|
135
|
+
|
|
131
136
|
reordered_indices[start : stop] = _reorder(
|
|
132
137
|
atoms.atom_name[start : stop], standard_atom_names
|
|
133
138
|
) + start
|
|
@@ -152,7 +157,7 @@ def _reorder(origin, target):
|
|
|
152
157
|
The atom names to reorder.
|
|
153
158
|
target : ndarray, dtype=str
|
|
154
159
|
The atom names in target order.
|
|
155
|
-
|
|
160
|
+
|
|
156
161
|
Returns
|
|
157
162
|
-------
|
|
158
163
|
indices : ndarray, dtype=int
|
biotite/structure/io/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"""
|
|
6
6
|
A subpackage for reading and writing structure related data.
|
|
7
7
|
|
|
8
|
-
Macromolecular structure files (PDB, PDBx/mmCIF,
|
|
8
|
+
Macromolecular structure files (PDB, PDBx/mmCIF, BinaryCIF, etc.) and
|
|
9
9
|
small molecule files (MOL, SDF, etc.) can be used
|
|
10
10
|
to load an :class:`AtomArray` or :class:`AtomArrayStack`.
|
|
11
11
|
|
|
@@ -15,10 +15,8 @@ only one *altloc* can be chosen for each atom. Hence, the amount of
|
|
|
15
15
|
atoms may be lower in the atom array (stack) than in respective
|
|
16
16
|
structure file.
|
|
17
17
|
|
|
18
|
-
The recommended format for reading structure files is
|
|
18
|
+
The recommended format for reading structure files is *BinaryCIF*.
|
|
19
19
|
It has by far the shortest parsing time and file size.
|
|
20
|
-
Furthermore, chemical bond information can be read from MMTF files
|
|
21
|
-
as :class:`BondList` instances.
|
|
22
20
|
|
|
23
21
|
Besides the mentioned structure formats, Gromacs trajectory files can be
|
|
24
22
|
loaded, if `mdtraj` is installed.
|
biotite/structure/io/ctab.py
CHANGED
|
@@ -13,7 +13,7 @@ __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
|
|
|
13
13
|
|
|
14
14
|
import warnings
|
|
15
15
|
import numpy as np
|
|
16
|
-
from
|
|
16
|
+
from ..error import BadStructureError
|
|
17
17
|
from ..atoms import AtomArray, AtomArrayStack
|
|
18
18
|
from ..bonds import BondList, BondType
|
|
19
19
|
|
biotite/structure/io/general.py
CHANGED
|
@@ -21,12 +21,12 @@ def load_structure(file_path, template=None, **kwargs):
|
|
|
21
21
|
Load an :class:`AtomArray` or class`AtomArrayStack` from a structure
|
|
22
22
|
file without the need to manually instantiate a :class:`File`
|
|
23
23
|
object.
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
Internally this function uses a :class:`File` object, based on the
|
|
26
26
|
file extension.
|
|
27
27
|
Trajectory files furthermore require specification of the `template`
|
|
28
28
|
parameter.
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
Parameters
|
|
31
31
|
----------
|
|
32
32
|
file_path : str
|
|
@@ -40,13 +40,13 @@ def load_structure(file_path, template=None, **kwargs):
|
|
|
40
40
|
This does not affect files given via the `template` parameter.
|
|
41
41
|
The only exception is the `atom_i`, which is applied to the template
|
|
42
42
|
as well if number of atoms do not match.
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
Returns
|
|
45
45
|
-------
|
|
46
46
|
array : AtomArray or AtomArrayStack
|
|
47
47
|
If the file contains multiple models, an AtomArrayStack is
|
|
48
48
|
returned, otherwise an AtomArray is returned.
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
Raises
|
|
51
51
|
------
|
|
52
52
|
ValueError
|
|
@@ -65,56 +65,37 @@ def load_structure(file_path, template=None, **kwargs):
|
|
|
65
65
|
from .pdb import PDBFile
|
|
66
66
|
file = PDBFile.read(file_path)
|
|
67
67
|
array = file.get_structure(**kwargs)
|
|
68
|
-
|
|
69
|
-
# Stack containing only one model -> return as atom array
|
|
70
|
-
return array[0]
|
|
71
|
-
else:
|
|
72
|
-
return array
|
|
68
|
+
return _as_single_model_if_possible(array)
|
|
73
69
|
elif suffix == ".pdbqt":
|
|
74
70
|
from .pdbqt import PDBQTFile
|
|
75
71
|
file = PDBQTFile.read(file_path)
|
|
76
72
|
array = file.get_structure(**kwargs)
|
|
77
|
-
|
|
78
|
-
# Stack containing only one model -> return as atom array
|
|
79
|
-
return array[0]
|
|
80
|
-
else:
|
|
81
|
-
return array
|
|
73
|
+
return _as_single_model_if_possible(array)
|
|
82
74
|
elif suffix == ".cif" or suffix == ".pdbx":
|
|
83
|
-
from .pdbx import
|
|
84
|
-
file =
|
|
75
|
+
from .pdbx import CIFFile, get_structure
|
|
76
|
+
file = CIFFile.read(file_path)
|
|
85
77
|
array = get_structure(file, **kwargs)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
78
|
+
return _as_single_model_if_possible(array)
|
|
79
|
+
elif suffix == ".bcif":
|
|
80
|
+
from .pdbx import BinaryCIFFile, get_structure
|
|
81
|
+
file = BinaryCIFFile.read(file_path)
|
|
82
|
+
array = get_structure(file, **kwargs)
|
|
83
|
+
return _as_single_model_if_possible(array)
|
|
91
84
|
elif suffix == ".gro":
|
|
92
85
|
from .gro import GROFile
|
|
93
86
|
file = GROFile.read(file_path)
|
|
94
87
|
array = file.get_structure(**kwargs)
|
|
95
|
-
|
|
96
|
-
# Stack containing only one model -> return as atom array
|
|
97
|
-
return array[0]
|
|
98
|
-
else:
|
|
99
|
-
return array
|
|
88
|
+
return _as_single_model_if_possible(array)
|
|
100
89
|
elif suffix == ".mmtf":
|
|
101
90
|
from .mmtf import MMTFFile, get_structure
|
|
102
91
|
file = MMTFFile.read(file_path)
|
|
103
92
|
array = get_structure(file, **kwargs)
|
|
104
|
-
|
|
105
|
-
# Stack containing only one model -> return as atom array
|
|
106
|
-
return array[0]
|
|
107
|
-
else:
|
|
108
|
-
return array
|
|
93
|
+
return _as_single_model_if_possible(array)
|
|
109
94
|
elif suffix == ".npz":
|
|
110
95
|
from .npz import NpzFile
|
|
111
96
|
file = NpzFile.read(file_path)
|
|
112
97
|
array = file.get_structure(**kwargs)
|
|
113
|
-
|
|
114
|
-
# Stack containing only one model -> return as atom array
|
|
115
|
-
return array[0]
|
|
116
|
-
else:
|
|
117
|
-
return array
|
|
98
|
+
return _as_single_model_if_possible(array)
|
|
118
99
|
elif suffix == ".mol" or suffix == ".sdf":
|
|
119
100
|
from .mol import MOLFile
|
|
120
101
|
file = MOLFile.read(file_path)
|
|
@@ -153,10 +134,10 @@ def save_structure(file_path, array, **kwargs):
|
|
|
153
134
|
Save an :class:`AtomArray` or class`AtomArrayStack` to a structure
|
|
154
135
|
file without the need to manually instantiate a :class:`File`
|
|
155
136
|
object.
|
|
156
|
-
|
|
137
|
+
|
|
157
138
|
Internally this function uses a :class:`File` object, based on the
|
|
158
139
|
file extension.
|
|
159
|
-
|
|
140
|
+
|
|
160
141
|
Parameters
|
|
161
142
|
----------
|
|
162
143
|
file_path : str
|
|
@@ -185,9 +166,14 @@ def save_structure(file_path, array, **kwargs):
|
|
|
185
166
|
file.set_structure(array, **kwargs)
|
|
186
167
|
file.write(file_path)
|
|
187
168
|
elif suffix == ".cif" or suffix == ".pdbx":
|
|
188
|
-
from .pdbx import
|
|
189
|
-
file =
|
|
190
|
-
set_structure(file, array,
|
|
169
|
+
from .pdbx import CIFFile, set_structure
|
|
170
|
+
file = CIFFile()
|
|
171
|
+
set_structure(file, array, **kwargs)
|
|
172
|
+
file.write(file_path)
|
|
173
|
+
elif suffix == ".bcif":
|
|
174
|
+
from .pdbx import BinaryCIFFile, set_structure
|
|
175
|
+
file = BinaryCIFFile()
|
|
176
|
+
set_structure(file, array, **kwargs)
|
|
191
177
|
file.write(file_path)
|
|
192
178
|
elif suffix == ".gro":
|
|
193
179
|
from .gro import GROFile
|
|
@@ -232,8 +218,16 @@ def save_structure(file_path, array, **kwargs):
|
|
|
232
218
|
raise ValueError(f"Unknown file format '{suffix}'")
|
|
233
219
|
|
|
234
220
|
|
|
221
|
+
def _as_single_model_if_possible(atoms):
|
|
222
|
+
if isinstance(atoms, AtomArrayStack) and atoms.stack_depth() == 1:
|
|
223
|
+
# Stack containing only one model -> return as atom array
|
|
224
|
+
return atoms[0]
|
|
225
|
+
else:
|
|
226
|
+
return atoms
|
|
227
|
+
|
|
228
|
+
|
|
235
229
|
# Helper function to estimate elements from atom names
|
|
236
|
-
_elements = [elem.upper() for elem in
|
|
230
|
+
_elements = [elem.upper() for elem in
|
|
237
231
|
["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
|
|
238
232
|
"Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
|
|
239
233
|
"Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
|
|
@@ -268,4 +262,4 @@ def _guess_element(atom_name):
|
|
|
268
262
|
pass
|
|
269
263
|
|
|
270
264
|
return ""
|
|
271
|
-
|
|
265
|
+
|
|
@@ -7,6 +7,9 @@ This subpackage is used for reading and writing an :class:`AtomArray` or
|
|
|
7
7
|
:class:`AtomArrayStack` using the binary MMTF format. This format
|
|
8
8
|
features a smaller file size and a highly increased I/O operation
|
|
9
9
|
performance, than the text based file formats.
|
|
10
|
+
|
|
11
|
+
DEPRECATED: Use :class:`biotite.structure.io.pdbx.BinaryCIFFile`
|
|
12
|
+
instead.
|
|
10
13
|
"""
|
|
11
14
|
|
|
12
15
|
__name__ = "biotite.structure.io.mmtf"
|