biotite 1.0.1__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/application.py +3 -3
- biotite/application/autodock/app.py +1 -1
- biotite/application/blast/webapp.py +1 -1
- biotite/application/clustalo/app.py +1 -1
- biotite/application/dssp/app.py +13 -3
- biotite/application/localapp.py +36 -2
- biotite/application/msaapp.py +10 -10
- biotite/application/muscle/app3.py +5 -18
- biotite/application/muscle/app5.py +5 -5
- biotite/application/sra/app.py +0 -5
- biotite/application/util.py +22 -2
- biotite/application/viennarna/rnaalifold.py +8 -8
- biotite/application/viennarna/rnaplot.py +9 -3
- biotite/application/viennarna/util.py +1 -1
- biotite/application/webapp.py +1 -1
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +191 -0
- biotite/database/entrez/dbnames.py +10 -0
- biotite/database/entrez/download.py +9 -10
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +5 -4
- biotite/database/pubchem/download.py +6 -6
- biotite/database/pubchem/error.py +10 -0
- biotite/database/pubchem/query.py +12 -23
- biotite/database/rcsb/download.py +3 -2
- biotite/database/rcsb/query.py +8 -9
- biotite/database/uniprot/check.py +22 -17
- biotite/database/uniprot/download.py +3 -6
- biotite/database/uniprot/query.py +4 -5
- biotite/file.py +14 -2
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +16 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +198 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1226 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +15 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +71 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/align/__init__.py +0 -4
- biotite/sequence/align/alignment.py +49 -14
- biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +26 -26
- biotite/sequence/align/cigar.py +2 -2
- biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmeralphabet.pyx +19 -2
- biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.pyx +58 -48
- biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.pyx +47 -47
- biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.pyx +10 -10
- biotite/sequence/align/matrix.py +284 -57
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +35 -35
- biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.pyx +2 -2
- biotite/sequence/align/statistics.py +1 -1
- biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
- biotite/sequence/alphabet.py +5 -2
- biotite/sequence/annotation.py +19 -13
- biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
- biotite/sequence/codon.py +1 -2
- biotite/sequence/graphics/alignment.py +25 -39
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
- biotite/sequence/graphics/colorschemes.py +44 -11
- biotite/sequence/graphics/dendrogram.py +4 -2
- biotite/sequence/graphics/features.py +2 -2
- biotite/sequence/graphics/logo.py +10 -12
- biotite/sequence/io/fasta/convert.py +1 -2
- biotite/sequence/io/fasta/file.py +1 -1
- biotite/sequence/io/fastq/file.py +3 -3
- biotite/sequence/io/genbank/file.py +3 -3
- biotite/sequence/io/genbank/sequence.py +2 -0
- biotite/sequence/io/gff/convert.py +1 -1
- biotite/sequence/io/gff/file.py +1 -2
- biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
- biotite/sequence/profile.py +105 -29
- biotite/sequence/search.py +0 -1
- biotite/sequence/seqtypes.py +136 -8
- biotite/sequence/sequence.py +1 -2
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +6 -3
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +163 -66
- biotite/structure/basepairs.py +26 -26
- biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +79 -25
- biotite/structure/box.py +19 -21
- biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +83 -67
- biotite/structure/chains.py +5 -37
- biotite/structure/charges.cp312-win_amd64.pyd +0 -0
- biotite/structure/compare.py +420 -13
- biotite/structure/density.py +1 -1
- biotite/structure/dotbracket.py +27 -28
- biotite/structure/filter.py +8 -8
- biotite/structure/geometry.py +74 -127
- biotite/structure/hbond.py +17 -19
- biotite/structure/info/__init__.py +1 -0
- biotite/structure/info/atoms.py +24 -15
- biotite/structure/info/bonds.py +12 -6
- biotite/structure/info/ccd.py +125 -34
- biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
- biotite/structure/info/groups.py +62 -19
- biotite/structure/info/masses.py +9 -6
- biotite/structure/info/misc.py +15 -22
- biotite/structure/info/radii.py +92 -22
- biotite/structure/info/standardize.py +4 -4
- biotite/structure/integrity.py +4 -6
- biotite/structure/io/general.py +2 -2
- biotite/structure/io/gro/file.py +8 -9
- biotite/structure/io/mol/convert.py +1 -1
- biotite/structure/io/mol/ctab.py +33 -28
- biotite/structure/io/mol/mol.py +1 -1
- biotite/structure/io/mol/sdf.py +80 -53
- biotite/structure/io/pdb/convert.py +4 -3
- biotite/structure/io/pdb/file.py +85 -25
- biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbqt/file.py +36 -36
- biotite/structure/io/pdbx/__init__.py +1 -0
- biotite/structure/io/pdbx/bcif.py +54 -15
- biotite/structure/io/pdbx/cif.py +92 -66
- biotite/structure/io/pdbx/component.py +15 -4
- biotite/structure/io/pdbx/compress.py +321 -0
- biotite/structure/io/pdbx/convert.py +410 -75
- biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +98 -17
- biotite/structure/io/trajfile.py +9 -6
- biotite/structure/io/util.py +38 -0
- biotite/structure/mechanics.py +0 -1
- biotite/structure/molecules.py +141 -156
- biotite/structure/pseudoknots.py +7 -13
- biotite/structure/repair.py +2 -4
- biotite/structure/residues.py +13 -24
- biotite/structure/rings.py +335 -0
- biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +2 -1
- biotite/structure/segments.py +69 -11
- biotite/structure/sequence.py +0 -1
- biotite/structure/sse.py +0 -2
- biotite/structure/superimpose.py +74 -62
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +12 -25
- biotite/structure/util.py +76 -4
- biotite/version.py +9 -4
- biotite/visualize.py +111 -1
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
- biotite/structure/info/ccd/README.rst +0 -8
- biotite/structure/info/ccd/amino_acids.txt +0 -1663
- biotite/structure/info/ccd/carbohydrates.txt +0 -1135
- biotite/structure/info/ccd/nucleotides.txt +0 -798
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
- {biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/structure/info/ccd.py
CHANGED
|
@@ -4,23 +4,23 @@
|
|
|
4
4
|
|
|
5
5
|
__name__ = "biotite.structure.info"
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["get_ccd", "get_from_ccd"]
|
|
7
|
+
__all__ = ["get_ccd", "set_ccd_path", "get_from_ccd"]
|
|
8
8
|
|
|
9
|
+
import functools
|
|
10
|
+
import importlib
|
|
11
|
+
import inspect
|
|
12
|
+
import pkgutil
|
|
9
13
|
from pathlib import Path
|
|
10
14
|
import numpy as np
|
|
11
15
|
|
|
12
|
-
|
|
13
|
-
|
|
16
|
+
_CCD_FILE = Path(__file__).parent / "components.bcif"
|
|
17
|
+
_SPECIAL_ID_COLUMN_NAMES = {
|
|
14
18
|
"chem_comp": "id",
|
|
15
|
-
"chem_comp_atom": "comp_id",
|
|
16
|
-
"chem_comp_bond": "comp_id",
|
|
17
19
|
}
|
|
18
|
-
|
|
19
|
-
_ccd_block = None
|
|
20
|
-
# For each category this index gives the start and stop for each residue
|
|
21
|
-
_residue_index = {}
|
|
20
|
+
_DEFAULT_ID_COLUMN_NAME = "comp_id"
|
|
22
21
|
|
|
23
22
|
|
|
23
|
+
@functools.cache
|
|
24
24
|
def get_ccd():
|
|
25
25
|
"""
|
|
26
26
|
Get the internal subset of the PDB
|
|
@@ -29,25 +29,68 @@ def get_ccd():
|
|
|
29
29
|
|
|
30
30
|
Returns
|
|
31
31
|
-------
|
|
32
|
-
ccd :
|
|
32
|
+
ccd : BinaryCIFBlock
|
|
33
33
|
The CCD.
|
|
34
|
+
It contains the categories `chem_comp`, `chem_comp_atom` and `chem_comp_bond`.
|
|
35
|
+
|
|
36
|
+
Warnings
|
|
37
|
+
--------
|
|
38
|
+
|
|
39
|
+
Consider the return value as read-only.
|
|
40
|
+
As other functions cache data from it, changing data may lead to undefined
|
|
41
|
+
behavior.
|
|
34
42
|
|
|
35
43
|
References
|
|
36
44
|
----------
|
|
37
45
|
|
|
38
46
|
.. footbibliography::
|
|
39
|
-
|
|
40
47
|
"""
|
|
41
48
|
# Avoid circular import
|
|
42
49
|
from biotite.structure.io.pdbx.bcif import BinaryCIFFile
|
|
43
50
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
51
|
+
try:
|
|
52
|
+
return BinaryCIFFile.read(_CCD_FILE).block
|
|
53
|
+
except FileNotFoundError:
|
|
54
|
+
raise RuntimeError(
|
|
55
|
+
"Internal CCD not found. Please run 'python -m biotite.setup_ccd'."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def set_ccd_path(ccd_path):
|
|
60
|
+
"""
|
|
61
|
+
Replace the internal *Chemical Component Dictionary* (CCD) with a custom one.
|
|
62
|
+
|
|
63
|
+
This function also clears the cache of functions depending on the CCD to ensure
|
|
64
|
+
that the new CCD is used.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
ccd_path : path-like
|
|
69
|
+
The path to the custom CCD in BinaryCIF format, prepared with the
|
|
70
|
+
``setup_ccd.py`` module.
|
|
71
|
+
|
|
72
|
+
Notes
|
|
73
|
+
-----
|
|
74
|
+
This function is intended for advanced users who need to add information for
|
|
75
|
+
compounds, which are not part of the internal CCD.
|
|
76
|
+
The reason might be that an updated version already exists upstream or that
|
|
77
|
+
the user wants to add custom compounds to the CCD.
|
|
78
|
+
"""
|
|
79
|
+
global _CCD_FILE
|
|
80
|
+
_CCD_FILE = Path(ccd_path)
|
|
81
|
+
|
|
82
|
+
# Clear caches in all functions in biotite.structure.info
|
|
83
|
+
info_modules = [
|
|
84
|
+
importlib.import_module(f"biotite.structure.info.{mod_name}")
|
|
85
|
+
for _, mod_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)])
|
|
86
|
+
]
|
|
87
|
+
for module in info_modules:
|
|
88
|
+
for _, function in inspect.getmembers(module, callable):
|
|
89
|
+
if hasattr(function, "cache_clear"):
|
|
90
|
+
function.cache_clear()
|
|
49
91
|
|
|
50
92
|
|
|
93
|
+
@functools.cache
|
|
51
94
|
def get_from_ccd(category_name, comp_id, column_name=None):
|
|
52
95
|
"""
|
|
53
96
|
Get the rows for the given residue in the given category from the
|
|
@@ -67,38 +110,54 @@ def get_from_ccd(category_name, comp_id, column_name=None):
|
|
|
67
110
|
|
|
68
111
|
Returns
|
|
69
112
|
-------
|
|
70
|
-
|
|
71
|
-
The
|
|
72
|
-
|
|
113
|
+
slice : BinaryCIFCategory or BinaryCIFColumn
|
|
114
|
+
The category or column (if `column_name` is provided) containing only the rows
|
|
115
|
+
for the given residue.
|
|
116
|
+
|
|
117
|
+
Notes
|
|
118
|
+
-----
|
|
119
|
+
The returned values are cached for faster access in subsequent calls.
|
|
73
120
|
|
|
74
121
|
References
|
|
75
122
|
----------
|
|
76
123
|
|
|
77
124
|
.. footbibliography::
|
|
78
|
-
|
|
79
125
|
"""
|
|
80
|
-
global _residue_index
|
|
81
|
-
ccd = get_ccd()
|
|
82
|
-
category = ccd[category_name]
|
|
83
|
-
if category_name not in _residue_index:
|
|
84
|
-
_residue_index[category_name] = _index_residues(
|
|
85
|
-
category[INDEX_COLUMN_NAME[category_name]].as_array()
|
|
86
|
-
)
|
|
87
126
|
try:
|
|
88
|
-
start, stop = _residue_index
|
|
127
|
+
start, stop = _residue_index(category_name)[comp_id]
|
|
89
128
|
except KeyError:
|
|
90
129
|
return None
|
|
91
130
|
|
|
131
|
+
category = get_ccd()[category_name]
|
|
92
132
|
if column_name is None:
|
|
93
|
-
return
|
|
94
|
-
col_name: category[col_name].as_array()[start:stop]
|
|
95
|
-
for col_name in category.keys()
|
|
96
|
-
}
|
|
133
|
+
return _filter_category(category, slice(start, stop))
|
|
97
134
|
else:
|
|
98
|
-
return category[column_name]
|
|
135
|
+
return _filter_column(category[column_name], slice(start, stop))
|
|
99
136
|
|
|
100
137
|
|
|
101
|
-
|
|
138
|
+
@functools.cache
|
|
139
|
+
def _residue_index(category_name):
|
|
140
|
+
"""
|
|
141
|
+
Get the start and stop index for each component name in the given
|
|
142
|
+
CCD category.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
category_name : str
|
|
147
|
+
The category to determine start and stop indices for each component in.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
index : dict (str -> (int, int))
|
|
152
|
+
The index maps each present component name to the corresponding
|
|
153
|
+
start and exclusive stop index in `id_column`.
|
|
154
|
+
"""
|
|
155
|
+
category = get_ccd()[category_name]
|
|
156
|
+
id_column_name = _SPECIAL_ID_COLUMN_NAMES.get(
|
|
157
|
+
category_name, _DEFAULT_ID_COLUMN_NAME
|
|
158
|
+
)
|
|
159
|
+
id_column = category[id_column_name].as_array()
|
|
160
|
+
|
|
102
161
|
residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
|
|
103
162
|
# The final start is the exclusive stop of last residue
|
|
104
163
|
residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
|
|
@@ -107,3 +166,35 @@ def _index_residues(id_column):
|
|
|
107
166
|
comp_id = id_column[residue_starts[i]].item()
|
|
108
167
|
index[comp_id] = (residue_starts[i], residue_starts[i + 1])
|
|
109
168
|
return index
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _filter_category(category, index):
|
|
172
|
+
"""
|
|
173
|
+
Reduce the category to the values for the given index.∂
|
|
174
|
+
"""
|
|
175
|
+
# Avoid circular import
|
|
176
|
+
from biotite.structure.io.pdbx.bcif import BinaryCIFCategory
|
|
177
|
+
|
|
178
|
+
return BinaryCIFCategory(
|
|
179
|
+
{key: _filter_column(column, index) for key, column in category.items()}
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _filter_column(column, index):
|
|
184
|
+
"""
|
|
185
|
+
Reduce the column to the values for the given index.
|
|
186
|
+
"""
|
|
187
|
+
# Avoid circular import
|
|
188
|
+
from biotite.structure.io.pdbx.bcif import BinaryCIFColumn, BinaryCIFData
|
|
189
|
+
from biotite.structure.io.pdbx.component import MaskValue
|
|
190
|
+
|
|
191
|
+
data_array = column.data.array[index]
|
|
192
|
+
mask_array = column.mask.array[index] if column.mask is not None else None
|
|
193
|
+
return BinaryCIFColumn(
|
|
194
|
+
BinaryCIFData(data_array),
|
|
195
|
+
(
|
|
196
|
+
BinaryCIFData(mask_array)
|
|
197
|
+
if column.mask is not None and (mask_array != MaskValue.PRESENT).any()
|
|
198
|
+
else None
|
|
199
|
+
),
|
|
200
|
+
)
|
|
Binary file
|
biotite/structure/info/groups.py
CHANGED
|
@@ -6,14 +6,45 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Tom David Müller, Patrick Kunzmann"
|
|
7
7
|
__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
9
|
+
import functools
|
|
10
|
+
import numpy as np
|
|
11
|
+
from biotite.structure.info.ccd import get_ccd
|
|
12
|
+
|
|
13
|
+
_AMINO_ACID_TYPES = [
|
|
14
|
+
"D-beta-peptide, C-gamma linking",
|
|
15
|
+
"D-gamma-peptide, C-delta linking",
|
|
16
|
+
"D-peptide COOH carboxy terminus",
|
|
17
|
+
"D-peptide NH3 amino terminus",
|
|
18
|
+
"D-peptide linking",
|
|
19
|
+
"L-beta-peptide, C-gamma linking",
|
|
20
|
+
"L-gamma-peptide, C-delta linking",
|
|
21
|
+
"L-peptide COOH carboxy terminus",
|
|
22
|
+
"L-peptide NH3 amino terminus",
|
|
23
|
+
"L-peptide linking",
|
|
24
|
+
"peptide linking",
|
|
25
|
+
]
|
|
26
|
+
_NUCLEOTIDE_TYPES = [
|
|
27
|
+
"DNA OH 3 prime terminus",
|
|
28
|
+
"DNA OH 5 prime terminus",
|
|
29
|
+
"DNA linking",
|
|
30
|
+
"L-DNA linking",
|
|
31
|
+
"L-RNA linking",
|
|
32
|
+
"RNA OH 3 prime terminus",
|
|
33
|
+
"RNA OH 5 prime terminus",
|
|
34
|
+
"RNA linking",
|
|
35
|
+
]
|
|
36
|
+
_CARBOHYDRATE_TYPES = [
|
|
37
|
+
"D-saccharide",
|
|
38
|
+
"D-saccharide, alpha linking",
|
|
39
|
+
"D-saccharide, beta linking",
|
|
40
|
+
"L-saccharide",
|
|
41
|
+
"L-saccharide, alpha linking",
|
|
42
|
+
"L-saccharide, beta linking",
|
|
43
|
+
"saccharide",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@functools.cache
|
|
17
48
|
def amino_acid_names():
|
|
18
49
|
"""
|
|
19
50
|
Get a tuple of amino acid three-letter codes according to the
|
|
@@ -30,11 +61,11 @@ def amino_acid_names():
|
|
|
30
61
|
----------
|
|
31
62
|
|
|
32
63
|
.. footbibliography::
|
|
33
|
-
|
|
34
64
|
"""
|
|
35
|
-
return _get_group_members(
|
|
65
|
+
return _get_group_members(_AMINO_ACID_TYPES)
|
|
36
66
|
|
|
37
67
|
|
|
68
|
+
@functools.cache
|
|
38
69
|
def nucleotide_names():
|
|
39
70
|
"""
|
|
40
71
|
Get a tuple of nucleotide three-letter codes according to the
|
|
@@ -51,11 +82,11 @@ def nucleotide_names():
|
|
|
51
82
|
----------
|
|
52
83
|
|
|
53
84
|
.. footbibliography::
|
|
54
|
-
|
|
55
85
|
"""
|
|
56
|
-
return _get_group_members(
|
|
86
|
+
return _get_group_members(_NUCLEOTIDE_TYPES)
|
|
57
87
|
|
|
58
88
|
|
|
89
|
+
@functools.cache
|
|
59
90
|
def carbohydrate_names():
|
|
60
91
|
"""
|
|
61
92
|
Get a tuple of carbohydrate three-letter codes according to the
|
|
@@ -72,14 +103,26 @@ def carbohydrate_names():
|
|
|
72
103
|
----------
|
|
73
104
|
|
|
74
105
|
.. footbibliography::
|
|
106
|
+
"""
|
|
107
|
+
return _get_group_members(_CARBOHYDRATE_TYPES)
|
|
108
|
+
|
|
75
109
|
|
|
110
|
+
def _get_group_members(match_types):
|
|
76
111
|
"""
|
|
77
|
-
|
|
112
|
+
Identify component IDs that matches a given component *type* from the CCD.
|
|
78
113
|
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
match_types : list of str
|
|
117
|
+
The component types to extract.
|
|
79
118
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
comp_ids : list of str
|
|
122
|
+
The extracted component IDs.
|
|
123
|
+
"""
|
|
124
|
+
category = get_ccd()["chem_comp"]
|
|
125
|
+
comp_ids = category["id"].as_array()
|
|
126
|
+
types = category["type"].as_array()
|
|
127
|
+
# Ignore case
|
|
128
|
+
return comp_ids[np.isin(np.char.lower(types), np.char.lower(match_types))].tolist()
|
biotite/structure/info/masses.py
CHANGED
|
@@ -95,15 +95,11 @@ def mass(item, is_residue=None):
|
|
|
95
95
|
if is_residue is None:
|
|
96
96
|
result_mass = _atom_masses.get(item.upper())
|
|
97
97
|
if result_mass is None:
|
|
98
|
-
result_mass =
|
|
99
|
-
"chem_comp", item.upper(), "formula_weight"
|
|
100
|
-
).item()
|
|
98
|
+
result_mass = _mass_for_residue(item)
|
|
101
99
|
elif not is_residue:
|
|
102
100
|
result_mass = _atom_masses.get(item.upper())
|
|
103
101
|
else:
|
|
104
|
-
result_mass =
|
|
105
|
-
"chem_comp", item.upper(), "formula_weight"
|
|
106
|
-
).item()
|
|
102
|
+
result_mass = _mass_for_residue(item)
|
|
107
103
|
|
|
108
104
|
elif isinstance(item, Atom):
|
|
109
105
|
result_mass = mass(item.element, is_residue=False)
|
|
@@ -116,3 +112,10 @@ def mass(item, is_residue=None):
|
|
|
116
112
|
if result_mass is None:
|
|
117
113
|
raise KeyError(f"{item} is not known")
|
|
118
114
|
return result_mass
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _mass_for_residue(res_name):
|
|
118
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "formula_weight")
|
|
119
|
+
if column is None:
|
|
120
|
+
raise KeyError(f"Residue '{res_name}' is not known")
|
|
121
|
+
return column.as_item()
|
biotite/structure/info/misc.py
CHANGED
|
@@ -11,19 +11,13 @@ from biotite.structure.info.ccd import get_ccd, get_from_ccd
|
|
|
11
11
|
|
|
12
12
|
def all_residues():
|
|
13
13
|
"""
|
|
14
|
-
Get a list of all residues/compound names in the
|
|
15
|
-
|
|
14
|
+
Get a list of all residues/compound names in the PDB
|
|
15
|
+
*Chemical Component Dictionary* (CCD).
|
|
16
16
|
|
|
17
17
|
Returns
|
|
18
18
|
-------
|
|
19
19
|
residues : list of str
|
|
20
|
-
A list of all available
|
|
21
|
-
|
|
22
|
-
Examples
|
|
23
|
-
--------
|
|
24
|
-
|
|
25
|
-
>>> print(all_residues()[1000 : 1010])
|
|
26
|
-
['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
|
|
20
|
+
A list of all available residue names.
|
|
27
21
|
"""
|
|
28
22
|
return get_ccd()["chem_comp"]["id"].as_array().tolist()
|
|
29
23
|
|
|
@@ -51,10 +45,10 @@ def full_name(res_name):
|
|
|
51
45
|
>>> print(full_name("MAN"))
|
|
52
46
|
alpha-D-mannopyranose
|
|
53
47
|
"""
|
|
54
|
-
|
|
55
|
-
if
|
|
48
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "name")
|
|
49
|
+
if column is None:
|
|
56
50
|
return None
|
|
57
|
-
return
|
|
51
|
+
return column.as_item()
|
|
58
52
|
|
|
59
53
|
|
|
60
54
|
def link_type(res_name):
|
|
@@ -84,10 +78,10 @@ def link_type(res_name):
|
|
|
84
78
|
>>> print(link_type("HOH"))
|
|
85
79
|
NON-POLYMER
|
|
86
80
|
"""
|
|
87
|
-
|
|
88
|
-
if
|
|
81
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "type")
|
|
82
|
+
if column is None:
|
|
89
83
|
return None
|
|
90
|
-
return
|
|
84
|
+
return column.as_item()
|
|
91
85
|
|
|
92
86
|
|
|
93
87
|
def one_letter_code(res_name):
|
|
@@ -107,7 +101,7 @@ def one_letter_code(res_name):
|
|
|
107
101
|
-------
|
|
108
102
|
one_letter_code : str or None
|
|
109
103
|
The one-letter code.
|
|
110
|
-
None if the compound is not present in the CCD or if no
|
|
104
|
+
``None`` if the compound is not present in the CCD or if no
|
|
111
105
|
one-letter code is defined for this compound.
|
|
112
106
|
|
|
113
107
|
Examples
|
|
@@ -133,12 +127,11 @@ def one_letter_code(res_name):
|
|
|
133
127
|
alpha-D-mannopyranose
|
|
134
128
|
>>> print(one_letter_code("MAN"))
|
|
135
129
|
None
|
|
136
|
-
|
|
137
130
|
"""
|
|
138
|
-
|
|
139
|
-
if
|
|
131
|
+
column = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
|
|
132
|
+
if column is None:
|
|
140
133
|
return None
|
|
141
|
-
|
|
142
|
-
|
|
134
|
+
if column.mask is not None:
|
|
135
|
+
# Value is masked, i.e. inapplicable or missing
|
|
143
136
|
return None
|
|
144
|
-
return
|
|
137
|
+
return column.as_item()
|
biotite/structure/info/radii.py
CHANGED
|
@@ -26,37 +26,106 @@ _PROTOR_RADII = {
|
|
|
26
26
|
("S", 1, 0) : 1.77,
|
|
27
27
|
("S", 2, 0) : 1.77, # Not official, added for completeness (MET)
|
|
28
28
|
("S", 2, 1) : 1.77,
|
|
29
|
-
("F", 1, 0) : 1.47, # Taken from
|
|
30
|
-
("CL", 1, 0) : 1.75, # Taken from
|
|
31
|
-
("BR", 1, 0) : 1.85, # Taken from
|
|
29
|
+
("F", 1, 0) : 1.47, # Taken from _SINGLE_ATOM_VDW_RADII
|
|
30
|
+
("CL", 1, 0) : 1.75, # Taken from _SINGLE_ATOM_VDW_RADII
|
|
31
|
+
("BR", 1, 0) : 1.85, # Taken from _SINGLE_ATOM_VDW_RADII
|
|
32
32
|
("I", 1, 0) : 1.98, # Taken from _SINGLE_RADII
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
_SINGLE_ATOM_VDW_RADII = {
|
|
36
|
+
# Main group
|
|
37
|
+
# Row 1 (Period 1)
|
|
38
|
+
"H": 1.10,
|
|
37
39
|
"HE": 1.40,
|
|
38
40
|
|
|
41
|
+
# Row 2 (Period 2)
|
|
42
|
+
"LI": 1.81,
|
|
43
|
+
"BE": 1.53,
|
|
44
|
+
"B": 1.92,
|
|
39
45
|
"C": 1.70,
|
|
40
46
|
"N": 1.55,
|
|
41
47
|
"O": 1.52,
|
|
42
48
|
"F": 1.47,
|
|
43
49
|
"NE": 1.54,
|
|
44
50
|
|
|
51
|
+
# Row 3 (Period 3)
|
|
52
|
+
"NA": 2.27,
|
|
53
|
+
"MG": 1.73,
|
|
54
|
+
"AL": 1.84,
|
|
45
55
|
"SI": 2.10,
|
|
46
56
|
"P": 1.80,
|
|
47
57
|
"S": 1.80,
|
|
48
58
|
"CL": 1.75,
|
|
49
59
|
"AR": 1.88,
|
|
50
60
|
|
|
61
|
+
# Row 4 (Period 4)
|
|
62
|
+
"K": 2.75,
|
|
63
|
+
"CA": 2.31,
|
|
64
|
+
"GA": 1.87,
|
|
65
|
+
"GE": 2.11,
|
|
51
66
|
"AS": 1.85,
|
|
52
67
|
"SE": 1.90,
|
|
53
|
-
"BR": 1.
|
|
68
|
+
"BR": 1.83,
|
|
54
69
|
"KR": 2.02,
|
|
55
70
|
|
|
71
|
+
# Row 5 (Period 5)
|
|
72
|
+
"RB": 3.03,
|
|
73
|
+
"SR": 2.49,
|
|
74
|
+
"IN": 1.93,
|
|
75
|
+
"SN": 2.17,
|
|
76
|
+
"SB": 2.06,
|
|
56
77
|
"TE": 2.06,
|
|
57
78
|
"I": 1.98,
|
|
58
79
|
"XE": 2.16,
|
|
80
|
+
|
|
81
|
+
# Row 6 (Period 6)
|
|
82
|
+
"CS": 3.43,
|
|
83
|
+
"BA": 2.68,
|
|
84
|
+
"TL": 1.96,
|
|
85
|
+
"PB": 2.02,
|
|
86
|
+
"BI": 2.07,
|
|
87
|
+
"PO": 1.97,
|
|
88
|
+
"AT": 2.02,
|
|
89
|
+
"RN": 2.20,
|
|
90
|
+
|
|
91
|
+
# Row 7 (Period 7)
|
|
92
|
+
"FR": 3.48,
|
|
93
|
+
"RA": 2.83,
|
|
94
|
+
|
|
95
|
+
# Transition metals (relevant ones only)
|
|
96
|
+
# Row 1
|
|
97
|
+
"FE": 2.05,
|
|
98
|
+
"CU": 2.00,
|
|
99
|
+
"ZN": 2.10,
|
|
100
|
+
"MN": 2.05,
|
|
101
|
+
"CO": 2.00,
|
|
102
|
+
"NI": 2.00,
|
|
103
|
+
|
|
104
|
+
# Row 2
|
|
105
|
+
'MO': 2.10,
|
|
106
|
+
'RU': 2.05,
|
|
107
|
+
|
|
108
|
+
# Row 3
|
|
109
|
+
'W': 2.10,
|
|
110
|
+
'PT': 2.05,
|
|
111
|
+
'AU': 2.10,
|
|
59
112
|
}
|
|
113
|
+
"""
|
|
114
|
+
Van der Waals radii for main group and transition elements.
|
|
115
|
+
|
|
116
|
+
Main group:
|
|
117
|
+
Source: https://pubs.acs.org/doi/10.1021/jp8111556, Table 12 (Mantina et al. 2009)
|
|
118
|
+
|
|
119
|
+
Transition metals:
|
|
120
|
+
Source: RDKit, 2024.9.4 Release
|
|
121
|
+
https://github.com/rdkit/rdkit/blob/af6347963f25cfe8fe4db0638410b2f3a8e8bd89/Code/GraphMol/atomic_data.cpp#L51
|
|
122
|
+
|
|
123
|
+
Where available, these values were cross-checked vs the CRC Handbook of
|
|
124
|
+
Chemistry and Physics (105th edition) and verified that they are closely
|
|
125
|
+
in line (barring very minor discrepancies, usually < 0.05 Å).
|
|
126
|
+
We cannot use the CRC values directly as they are not permissively licensed.
|
|
127
|
+
"""
|
|
128
|
+
|
|
60
129
|
# fmt: on
|
|
61
130
|
|
|
62
131
|
# A dictionary that caches radii for each residue
|
|
@@ -65,16 +134,15 @@ _protor_radii = {}
|
|
|
65
134
|
|
|
66
135
|
def vdw_radius_protor(res_name, atom_name):
|
|
67
136
|
"""
|
|
68
|
-
Estimate the Van-der-Waals radius of
|
|
137
|
+
Estimate the Van-der-Waals radius of a heavy atom,
|
|
69
138
|
that includes the radius added by potential bonded hydrogen atoms.
|
|
70
139
|
The respective radii are taken from the ProtOr dataset.
|
|
71
140
|
:footcite:`Tsai1999`
|
|
72
141
|
|
|
73
142
|
This is especially useful for macromolecular structures where no
|
|
74
143
|
hydrogen atoms are resolved, e.g. crystal structures.
|
|
75
|
-
The valency of the
|
|
76
|
-
bonded hydrogen atoms is taken from the
|
|
77
|
-
dataset.
|
|
144
|
+
The valency of the heavy atom and the amount of normally
|
|
145
|
+
bonded hydrogen atoms is taken from the *Chemical Component Dictionary*.
|
|
78
146
|
|
|
79
147
|
Parameters
|
|
80
148
|
----------
|
|
@@ -86,12 +154,13 @@ def vdw_radius_protor(res_name, atom_name):
|
|
|
86
154
|
|
|
87
155
|
Returns
|
|
88
156
|
-------
|
|
89
|
-
|
|
90
|
-
|
|
157
|
+
radius : float
|
|
158
|
+
The Van-der-Waals radius of the given atom.
|
|
159
|
+
If the radius cannot be estimated for the atom, `None` is returned.
|
|
91
160
|
|
|
92
|
-
See
|
|
161
|
+
See Also
|
|
93
162
|
--------
|
|
94
|
-
vdw_radius_single
|
|
163
|
+
vdw_radius_single : *Van-der-Waals* radii for structures with annotated hydrogen atoms.
|
|
95
164
|
|
|
96
165
|
References
|
|
97
166
|
----------
|
|
@@ -114,7 +183,7 @@ def vdw_radius_protor(res_name, atom_name):
|
|
|
114
183
|
# Use cached radii for the residue, if already calculated
|
|
115
184
|
if atom_name not in _protor_radii[res_name]:
|
|
116
185
|
raise KeyError(
|
|
117
|
-
f"Residue '{res_name}' does not contain an atom named
|
|
186
|
+
f"Residue '{res_name}' does not contain an atom named '{atom_name}'"
|
|
118
187
|
)
|
|
119
188
|
return _protor_radii[res_name].get(atom_name)
|
|
120
189
|
else:
|
|
@@ -166,8 +235,8 @@ def _calculate_protor_radii(res_name):
|
|
|
166
235
|
|
|
167
236
|
def vdw_radius_single(element):
|
|
168
237
|
"""
|
|
169
|
-
Get the Van-der-Waals radius of an atom from the given element.
|
|
170
|
-
:footcite:`
|
|
238
|
+
Get the *Van-der-Waals* radius of an atom from the given element.
|
|
239
|
+
:footcite:`Mantina2009`
|
|
171
240
|
|
|
172
241
|
Parameters
|
|
173
242
|
----------
|
|
@@ -176,12 +245,13 @@ def vdw_radius_single(element):
|
|
|
176
245
|
|
|
177
246
|
Returns
|
|
178
247
|
-------
|
|
179
|
-
|
|
180
|
-
|
|
248
|
+
radius : float
|
|
249
|
+
The Van-der-Waals radius of the atom.
|
|
250
|
+
If the radius is unknown for the element, `None` is returned.
|
|
181
251
|
|
|
182
|
-
See
|
|
252
|
+
See Also
|
|
183
253
|
--------
|
|
184
|
-
vdw_radius_protor
|
|
254
|
+
vdw_radius_protor : *Van-der-Waals* radii for structures without annotated hydrogen atoms.
|
|
185
255
|
|
|
186
256
|
References
|
|
187
257
|
----------
|
|
@@ -194,4 +264,4 @@ def vdw_radius_single(element):
|
|
|
194
264
|
>>> print(vdw_radius_single("C"))
|
|
195
265
|
1.7
|
|
196
266
|
"""
|
|
197
|
-
return
|
|
267
|
+
return _SINGLE_ATOM_VDW_RADII.get(element.upper())
|
|
@@ -121,16 +121,16 @@ def standardize_order(atoms):
|
|
|
121
121
|
stop = starts[i + 1]
|
|
122
122
|
|
|
123
123
|
res_name = atoms.res_name[start]
|
|
124
|
-
|
|
125
|
-
if
|
|
124
|
+
chem_comp_atom = get_from_ccd("chem_comp_atom", res_name, "atom_id")
|
|
125
|
+
if chem_comp_atom is None:
|
|
126
126
|
# If the residue is not in the CCD, keep the current order
|
|
127
127
|
warnings.warn(
|
|
128
|
-
f"Residue '{res_name}' is not in the CCD, "
|
|
129
|
-
f"keeping current atom order"
|
|
128
|
+
f"Residue '{res_name}' is not in the CCD, keeping current atom order"
|
|
130
129
|
)
|
|
131
130
|
reordered_indices[start:stop] = np.arange(start, stop)
|
|
132
131
|
continue
|
|
133
132
|
|
|
133
|
+
standard_atom_names = chem_comp_atom.as_array()
|
|
134
134
|
reordered_indices[start:stop] = (
|
|
135
135
|
_reorder(atoms.atom_name[start:stop], standard_atom_names) + start
|
|
136
136
|
)
|
biotite/structure/integrity.py
CHANGED
|
@@ -47,7 +47,7 @@ def check_atom_id_continuity(array):
|
|
|
47
47
|
Returns
|
|
48
48
|
-------
|
|
49
49
|
discontinuity : ndarray, dtype=int
|
|
50
|
-
Contains the indices of atoms after a discontinuity
|
|
50
|
+
Contains the indices of atoms after a discontinuity.
|
|
51
51
|
"""
|
|
52
52
|
ids = array.atom_id
|
|
53
53
|
return _check_continuity(ids)
|
|
@@ -69,7 +69,7 @@ def check_res_id_continuity(array):
|
|
|
69
69
|
Returns
|
|
70
70
|
-------
|
|
71
71
|
discontinuity : ndarray, dtype=int
|
|
72
|
-
Contains the indices of atoms after a discontinuity
|
|
72
|
+
Contains the indices of atoms after a discontinuity.
|
|
73
73
|
"""
|
|
74
74
|
ids = array.res_id
|
|
75
75
|
return _check_continuity(ids)
|
|
@@ -96,10 +96,8 @@ def check_linear_continuity(array, min_len=1.2, max_len=1.8):
|
|
|
96
96
|
|
|
97
97
|
See Also
|
|
98
98
|
--------
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
biotite.structure.bonds.BondList :
|
|
102
|
-
A class that doesn't depend on the atoms' order to identify bonds.
|
|
99
|
+
filter_linear_bond_continuity : A function to filter for atoms preserving the continuity (used here).
|
|
100
|
+
BondList : A class that doesn't depend on the atoms' order to identify bonds.
|
|
103
101
|
"""
|
|
104
102
|
con_mask = filter_linear_bond_continuity(array, min_len, max_len)
|
|
105
103
|
# The continuity mask `con_mask` points to atoms for which the next atom is continuous.
|