biotite 1.4.0__cp312-cp312-macosx_11_0_arm64.whl → 1.5.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biotite/application/dssp/app.py +63 -6
- biotite/database/afdb/download.py +12 -6
- biotite/database/rcsb/download.py +1 -0
- biotite/database/rcsb/query.py +2 -2
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/structure/atoms.py +1 -1
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/chains.py +34 -0
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/filter.py +2 -1
- biotite/structure/geometry.py +164 -2
- biotite/structure/info/atoms.py +8 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/io/pdb/convert.py +1 -0
- biotite/structure/io/pdb/file.py +16 -2
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/bcif.py +1 -1
- biotite/structure/io/pdbx/cif.py +1 -1
- biotite/structure/io/pdbx/compress.py +13 -9
- biotite/structure/io/pdbx/convert.py +17 -6
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -8
- biotite/structure/residues.py +173 -1
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/segments.py +39 -3
- biotite/structure/util.py +14 -22
- biotite/version.py +16 -3
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/METADATA +1 -1
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/RECORD +46 -46
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/WHEEL +0 -0
- {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/licenses/LICENSE.rst +0 -0
biotite/application/dssp/app.py
CHANGED
|
@@ -11,8 +11,13 @@ from tempfile import NamedTemporaryFile
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from biotite.application.application import AppState, requires_state
|
|
13
13
|
from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
|
|
14
|
-
from biotite.structure.
|
|
14
|
+
from biotite.structure.error import BadStructureError
|
|
15
|
+
from biotite.structure.filter import filter_amino_acids
|
|
16
|
+
from biotite.structure.io.pdbx.cif import CIFCategory, CIFColumn, CIFFile
|
|
17
|
+
from biotite.structure.io.pdbx.component import MaskValue
|
|
15
18
|
from biotite.structure.io.pdbx.convert import set_structure
|
|
19
|
+
from biotite.structure.repair import create_continuous_res_ids
|
|
20
|
+
from biotite.structure.residues import get_residue_starts
|
|
16
21
|
|
|
17
22
|
|
|
18
23
|
class DsspApp(LocalApp):
|
|
@@ -49,17 +54,19 @@ class DsspApp(LocalApp):
|
|
|
49
54
|
>>> app.start()
|
|
50
55
|
>>> app.join()
|
|
51
56
|
>>> print(app.get_sse())
|
|
52
|
-
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' '
|
|
53
|
-
'
|
|
57
|
+
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'P' 'P'
|
|
58
|
+
'P' 'C']
|
|
54
59
|
"""
|
|
55
60
|
|
|
56
61
|
def __init__(self, atom_array, bin_path="mkdssp"):
|
|
57
62
|
super().__init__(bin_path)
|
|
58
63
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# -> Add these annotations to a copy of the input structure
|
|
64
|
+
if not np.all(filter_amino_acids(atom_array)):
|
|
65
|
+
raise BadStructureError("The input structure must contain only amino acids")
|
|
62
66
|
self._array = atom_array.copy()
|
|
67
|
+
# DSSP requires also the
|
|
68
|
+
# 'occupancy', 'b_factor' and 'charge' fields
|
|
69
|
+
# -> Add these placeholder values
|
|
63
70
|
categories = self._array.get_annotation_categories()
|
|
64
71
|
if "charge" not in categories:
|
|
65
72
|
self._array.set_annotation(
|
|
@@ -73,6 +80,10 @@ class DsspApp(LocalApp):
|
|
|
73
80
|
self._array.set_annotation(
|
|
74
81
|
"occupancy", np.ones(self._array.array_length(), dtype=float)
|
|
75
82
|
)
|
|
83
|
+
# DSSP>=4 complains about the `pdbx_poly_seq_scheme` category,
|
|
84
|
+
# if `seq_id` does not start at 1
|
|
85
|
+
self._array.res_id = create_continuous_res_ids(self._array)
|
|
86
|
+
|
|
76
87
|
try:
|
|
77
88
|
# The parameters have changed in version 4
|
|
78
89
|
self._new_cli = get_version(bin_path)[0] >= 4
|
|
@@ -86,6 +97,9 @@ class DsspApp(LocalApp):
|
|
|
86
97
|
def run(self):
|
|
87
98
|
in_file = CIFFile()
|
|
88
99
|
set_structure(in_file, self._array)
|
|
100
|
+
in_file.block["pdbx_poly_seq_scheme"] = _create_pdbx_poly_seq_scheme(
|
|
101
|
+
self._array, in_file.block["atom_site"]["label_entity_id"].as_array(str)
|
|
102
|
+
)
|
|
89
103
|
in_file.write(self._in_file)
|
|
90
104
|
self._in_file.flush()
|
|
91
105
|
if self._new_cli:
|
|
@@ -157,3 +171,46 @@ class DsspApp(LocalApp):
|
|
|
157
171
|
app.start()
|
|
158
172
|
app.join()
|
|
159
173
|
return app.get_sse()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _create_pdbx_poly_seq_scheme(atom_array, entity_ids):
|
|
177
|
+
"""
|
|
178
|
+
Create the ``pdbx_poly_seq_scheme`` category, as required by DSSP.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
atom_array : AtomArray
|
|
183
|
+
The atom array to create the category from.
|
|
184
|
+
entity_ids : ndarray, dtype=str
|
|
185
|
+
The entity IDs for each atoms.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
pdbx_poly_seq_scheme : CIFCategory
|
|
190
|
+
The ``pdbx_poly_seq_scheme`` category.
|
|
191
|
+
"""
|
|
192
|
+
res_start_indices = get_residue_starts(atom_array)
|
|
193
|
+
chain_id = atom_array.chain_id[res_start_indices]
|
|
194
|
+
res_name = atom_array.res_name[res_start_indices]
|
|
195
|
+
res_id = atom_array.res_id[res_start_indices]
|
|
196
|
+
ins_code = atom_array.ins_code[res_start_indices]
|
|
197
|
+
hetero = atom_array.hetero[res_start_indices]
|
|
198
|
+
entity_id = entity_ids[res_start_indices]
|
|
199
|
+
|
|
200
|
+
poly_seq_scheme = CIFCategory()
|
|
201
|
+
poly_seq_scheme["asym_id"] = chain_id
|
|
202
|
+
poly_seq_scheme["entity_id"] = entity_id
|
|
203
|
+
poly_seq_scheme["seq_id"] = res_id
|
|
204
|
+
poly_seq_scheme["mon_id"] = res_name
|
|
205
|
+
poly_seq_scheme["ndb_seq_num"] = res_id
|
|
206
|
+
poly_seq_scheme["pdb_seq_num"] = res_id
|
|
207
|
+
poly_seq_scheme["auth_seq_num"] = res_id
|
|
208
|
+
poly_seq_scheme["pdb_mon_id"] = res_name
|
|
209
|
+
poly_seq_scheme["auth_mon_id"] = res_name
|
|
210
|
+
poly_seq_scheme["pdb_strand_id"] = chain_id
|
|
211
|
+
poly_seq_scheme["pdb_ins_code"] = CIFColumn(
|
|
212
|
+
ins_code, np.where(ins_code == "", MaskValue.MISSING, MaskValue.PRESENT)
|
|
213
|
+
)
|
|
214
|
+
poly_seq_scheme["hetero"] = np.where(hetero, "y", "n")
|
|
215
|
+
|
|
216
|
+
return poly_seq_scheme
|
|
@@ -16,8 +16,11 @@ from biotite.database.error import RequestError
|
|
|
16
16
|
_METADATA_URL = "https://alphafold.com/api/prediction"
|
|
17
17
|
_BINARY_FORMATS = ["bcif"]
|
|
18
18
|
# Adopted from https://www.uniprot.org/help/accession_numbers
|
|
19
|
+
# adding the optional 'AF-' prefix and '-F1' suffix used by RCSB
|
|
19
20
|
_UNIPROT_PATTERN = (
|
|
20
|
-
"
|
|
21
|
+
r"^(?P<prefix>(AF-)|(AF_AF))?"
|
|
22
|
+
r"(?P<id>[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
|
|
23
|
+
r"(?P<suffix>-?F1)?$"
|
|
21
24
|
)
|
|
22
25
|
|
|
23
26
|
|
|
@@ -31,8 +34,8 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
|
|
|
31
34
|
----------
|
|
32
35
|
ids : str or iterable object of str
|
|
33
36
|
A single ID or a list of IDs of the file(s) to be downloaded.
|
|
34
|
-
They can be either UniProt IDs (e.g. ``P12345``)
|
|
35
|
-
(e.g. ``AF-
|
|
37
|
+
They can be either UniProt IDs (e.g. ``P12345``), AlphaFold DB IDs
|
|
38
|
+
(e.g. ``AF-P12345-F1``) or computational RCSB IDs (e.g. ``AF_AFP12345F1``).
|
|
36
39
|
format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
|
|
37
40
|
The format of the files to be downloaded.
|
|
38
41
|
target_path : str, optional
|
|
@@ -142,7 +145,10 @@ def _get_file_url(id, format):
|
|
|
142
145
|
The URL of the file to be downloaded.
|
|
143
146
|
"""
|
|
144
147
|
uniprot_id = _extract_id(id)
|
|
145
|
-
|
|
148
|
+
try:
|
|
149
|
+
metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
|
|
150
|
+
except requests.exceptions.JSONDecodeError:
|
|
151
|
+
raise RequestError("Received malformed JSON response")
|
|
146
152
|
if len(metadata) == 0:
|
|
147
153
|
raise RequestError(f"ID {id} is invalid")
|
|
148
154
|
# A list of length 1 is always returned, if the response is valid
|
|
@@ -167,10 +173,10 @@ def _extract_id(id):
|
|
|
167
173
|
uniprot_id : str
|
|
168
174
|
The UniProt ID.
|
|
169
175
|
"""
|
|
170
|
-
match = re.
|
|
176
|
+
match = re.match(_UNIPROT_PATTERN, id)
|
|
171
177
|
if match is None:
|
|
172
178
|
raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
|
|
173
|
-
return match.group()
|
|
179
|
+
return match.group("id")
|
|
174
180
|
|
|
175
181
|
|
|
176
182
|
def _assert_valid_file(response, id):
|
|
@@ -155,6 +155,7 @@ def _assert_valid_file(response_text, pdb_id):
|
|
|
155
155
|
"<title>PDB Archive over AWS</title>",
|
|
156
156
|
"No fasta files were found.",
|
|
157
157
|
"No valid PDB IDs were submitted.",
|
|
158
|
+
"The requested URL was incorrect, too long or otherwise malformed.",
|
|
158
159
|
]
|
|
159
160
|
):
|
|
160
161
|
raise RequestError("PDB ID {:} is invalid".format(pdb_id))
|
biotite/database/rcsb/query.py
CHANGED
|
@@ -74,7 +74,7 @@ class SingleQuery(Query, metaclass=abc.ABCMeta):
|
|
|
74
74
|
A terminal query node for the RCSB search API.
|
|
75
75
|
|
|
76
76
|
Multiple :class:`SingleQuery` objects can be combined to
|
|
77
|
-
:class:`CompositeQuery`objects using the ``|`` and ``&`` operators.
|
|
77
|
+
:class:`CompositeQuery` objects using the ``|`` and ``&`` operators.
|
|
78
78
|
|
|
79
79
|
This is the abstract base class for all queries that are
|
|
80
80
|
terminal nodes.
|
|
@@ -783,7 +783,7 @@ def search(
|
|
|
783
783
|
The type of the returned identifiers:
|
|
784
784
|
|
|
785
785
|
- ``'entry'``: Only the PDB ID is returned (e.g. ``'XXXX'``).
|
|
786
|
-
These can be used directly
|
|
786
|
+
These can be used directly as input to :func:`fetch()`.
|
|
787
787
|
- ``'assembly'``: The PDB ID appended with assembly ID is
|
|
788
788
|
returned (e.g. ``'XXXX-1'``).
|
|
789
789
|
- ``'polymer_entity'``: The PDB ID appended with entity ID of
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
biotite/structure/atoms.py
CHANGED
|
@@ -1554,7 +1554,7 @@ def coord(item):
|
|
|
1554
1554
|
Atom coordinates.
|
|
1555
1555
|
"""
|
|
1556
1556
|
|
|
1557
|
-
if
|
|
1557
|
+
if isinstance(item, (Atom, _AtomArrayBase)):
|
|
1558
1558
|
return item.coord
|
|
1559
1559
|
elif isinstance(item, np.ndarray):
|
|
1560
1560
|
return item.astype(np.float32, copy=False)
|
|
Binary file
|
|
Binary file
|
biotite/structure/chains.py
CHANGED
|
@@ -16,6 +16,7 @@ __all__ = [
|
|
|
16
16
|
"get_chain_masks",
|
|
17
17
|
"get_chain_starts_for",
|
|
18
18
|
"get_chain_positions",
|
|
19
|
+
"get_all_chain_positions",
|
|
19
20
|
"chain_iter",
|
|
20
21
|
"get_chains",
|
|
21
22
|
"get_chain_count",
|
|
@@ -24,6 +25,7 @@ __all__ = [
|
|
|
24
25
|
|
|
25
26
|
from biotite.structure.segments import (
|
|
26
27
|
apply_segment_wise,
|
|
28
|
+
get_all_segment_positions,
|
|
27
29
|
get_segment_masks,
|
|
28
30
|
get_segment_positions,
|
|
29
31
|
get_segment_starts,
|
|
@@ -212,11 +214,43 @@ def get_chain_positions(array, indices):
|
|
|
212
214
|
-------
|
|
213
215
|
start_indices : ndarray, dtype=int, shape=(k,)
|
|
214
216
|
The indices that point to the position of the chains.
|
|
217
|
+
|
|
218
|
+
See Also
|
|
219
|
+
--------
|
|
220
|
+
get_all_chain_positions :
|
|
221
|
+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
|
|
215
222
|
"""
|
|
216
223
|
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
217
224
|
return get_segment_positions(starts, indices)
|
|
218
225
|
|
|
219
226
|
|
|
227
|
+
def get_all_chain_positions(array):
|
|
228
|
+
"""
|
|
229
|
+
For each atom, obtain the position of the chain
|
|
230
|
+
corresponding to this atom in the input `array`.
|
|
231
|
+
|
|
232
|
+
For example, the position of the first chain in the atom array is
|
|
233
|
+
``0``, the the position of the second chain is ``1``, etc.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
array : AtomArray or AtomArrayStack
|
|
238
|
+
The atom array (stack) to determine the chains from.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
chain_indices : ndarray, dtype=int, shape=(k,)
|
|
243
|
+
The indices that point to the position of the chains.
|
|
244
|
+
|
|
245
|
+
See Also
|
|
246
|
+
--------
|
|
247
|
+
get_chain_positions :
|
|
248
|
+
Similar to this function, but for a given subset of atom indices.
|
|
249
|
+
"""
|
|
250
|
+
starts = get_chain_starts(array, add_exclusive_stop=True)
|
|
251
|
+
return get_all_segment_positions(starts, array.array_length())
|
|
252
|
+
|
|
253
|
+
|
|
220
254
|
def get_chains(array):
|
|
221
255
|
"""
|
|
222
256
|
Get the chain IDs of an atom array (stack).
|
|
Binary file
|
biotite/structure/filter.py
CHANGED
|
@@ -63,7 +63,8 @@ _canonical_aa_list = [
|
|
|
63
63
|
]
|
|
64
64
|
_canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
# Residue names of solvent molecules non only in CCD, but also from modeling software
|
|
67
|
+
_solvent_list = ["HOH", "DOD", "SOL", "WAT", "H2O", "TIP3", "TIP4", "TIP5"]
|
|
67
68
|
|
|
68
69
|
_peptide_backbone_atoms = ["N", "CA", "C"]
|
|
69
70
|
_phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
|
biotite/structure/geometry.py
CHANGED
|
@@ -19,19 +19,79 @@ __all__ = [
|
|
|
19
19
|
"dihedral",
|
|
20
20
|
"index_dihedral",
|
|
21
21
|
"dihedral_backbone",
|
|
22
|
+
"dihedral_side_chain",
|
|
22
23
|
"centroid",
|
|
23
24
|
]
|
|
24
25
|
|
|
26
|
+
import functools
|
|
25
27
|
import numpy as np
|
|
26
28
|
from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
|
|
27
29
|
from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
|
|
28
|
-
from biotite.structure.filter import filter_amino_acids
|
|
30
|
+
from biotite.structure.filter import filter_amino_acids, filter_canonical_amino_acids
|
|
31
|
+
from biotite.structure.residues import get_residue_starts
|
|
29
32
|
from biotite.structure.util import (
|
|
30
33
|
coord_for_atom_name_per_residue,
|
|
31
34
|
norm_vector,
|
|
32
35
|
vector_dot,
|
|
33
36
|
)
|
|
34
37
|
|
|
38
|
+
# The names of the atoms participating in chi angle
|
|
39
|
+
_CHI_ATOMS = {
|
|
40
|
+
"ARG": [
|
|
41
|
+
("N", "CA", "CB", "CG"),
|
|
42
|
+
("CA", "CB", "CG", "CD"),
|
|
43
|
+
("CB", "CG", "CD", "NE"),
|
|
44
|
+
("CG", "CD", "NE", "CZ"),
|
|
45
|
+
],
|
|
46
|
+
"LEU": [
|
|
47
|
+
("N", "CA", "CB", "CG"),
|
|
48
|
+
# By convention chi2 is defined using CD1 instead of CD2
|
|
49
|
+
("CA", "CB", "CG", "CD1"),
|
|
50
|
+
],
|
|
51
|
+
"VAL": [("N", "CA", "CB", "CG1")],
|
|
52
|
+
"ILE": [("N", "CA", "CB", "CG1"), ("CA", "CB", "CG1", "CD1")],
|
|
53
|
+
"MET": [
|
|
54
|
+
("N", "CA", "CB", "CG"),
|
|
55
|
+
("CA", "CB", "CG", "SD"),
|
|
56
|
+
("CB", "CG", "SD", "CE"),
|
|
57
|
+
],
|
|
58
|
+
"LYS": [
|
|
59
|
+
("N", "CA", "CB", "CG"),
|
|
60
|
+
("CA", "CB", "CG", "CD"),
|
|
61
|
+
("CB", "CG", "CD", "CE"),
|
|
62
|
+
("CG", "CD", "CE", "NZ"),
|
|
63
|
+
],
|
|
64
|
+
"PHE": [
|
|
65
|
+
("N", "CA", "CB", "CG"),
|
|
66
|
+
("CA", "CB", "CG", "CD1"),
|
|
67
|
+
],
|
|
68
|
+
"TRP": [
|
|
69
|
+
("N", "CA", "CB", "CG"),
|
|
70
|
+
("CA", "CB", "CG", "CD1"),
|
|
71
|
+
],
|
|
72
|
+
"TYR": [
|
|
73
|
+
("N", "CA", "CB", "CG"),
|
|
74
|
+
("CA", "CB", "CG", "CD1"),
|
|
75
|
+
],
|
|
76
|
+
"ASN": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")],
|
|
77
|
+
"GLN": [
|
|
78
|
+
("N", "CA", "CB", "CG"),
|
|
79
|
+
("CA", "CB", "CG", "CD"),
|
|
80
|
+
("CB", "CG", "CD", "OE1"),
|
|
81
|
+
],
|
|
82
|
+
"ASP": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")],
|
|
83
|
+
"GLU": [
|
|
84
|
+
("N", "CA", "CB", "CG"),
|
|
85
|
+
("CA", "CB", "CG", "CD"),
|
|
86
|
+
("CB", "CG", "CD", "OE1"),
|
|
87
|
+
],
|
|
88
|
+
"CYS": [("N", "CA", "CB", "SG")],
|
|
89
|
+
"HIS": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "ND1")],
|
|
90
|
+
"PRO": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "CD")],
|
|
91
|
+
"SER": [("N", "CA", "CB", "OG")],
|
|
92
|
+
"THR": [("N", "CA", "CB", "OG1")],
|
|
93
|
+
}
|
|
94
|
+
|
|
35
95
|
|
|
36
96
|
def displacement(atoms1, atoms2, box=None):
|
|
37
97
|
"""
|
|
@@ -492,7 +552,7 @@ def dihedral_backbone(atom_array):
|
|
|
492
552
|
|
|
493
553
|
Returns
|
|
494
554
|
-------
|
|
495
|
-
phi, psi, omega : ndarray
|
|
555
|
+
phi, psi, omega : ndarray, shape=(m,n) or shape=(n,), dtype=float
|
|
496
556
|
An array containing the 3 backbone dihedral angles for every CA atom.
|
|
497
557
|
`phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
|
|
498
558
|
C-terminus.
|
|
@@ -562,6 +622,96 @@ def dihedral_backbone(atom_array):
|
|
|
562
622
|
return phi, psi, omg
|
|
563
623
|
|
|
564
624
|
|
|
625
|
+
def dihedral_side_chain(atoms):
|
|
626
|
+
r"""
|
|
627
|
+
Measure the side chain :math:`\chi` dihedral angles of amino acid residues.
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
atoms : AtomArray or AtomArrayStack
|
|
632
|
+
The protein structure to measure the side chain dihedral angles for.
|
|
633
|
+
|
|
634
|
+
Returns
|
|
635
|
+
-------
|
|
636
|
+
chi : ndarray, shape=(m, n, 4) or shape=(n, 4), dtype=float
|
|
637
|
+
An array containing the up to four side chain dihedral angles for every
|
|
638
|
+
amino acid residue.
|
|
639
|
+
Trailing :math:`\chi` angles that are not defined for an amino acid are filled
|
|
640
|
+
with :math:`NaN` values.
|
|
641
|
+
The same is True for all residues that are not canonical amino acids.
|
|
642
|
+
|
|
643
|
+
Notes
|
|
644
|
+
-----
|
|
645
|
+
By convention, the :math:`\chi_2` angle of leucine is defined using ``CD1``
|
|
646
|
+
instead of ``CD2``.
|
|
647
|
+
|
|
648
|
+
Examples
|
|
649
|
+
--------
|
|
650
|
+
|
|
651
|
+
>>> res_ids, res_names = get_residues(atom_array)
|
|
652
|
+
>>> dihedrals = dihedral_side_chain(atom_array)
|
|
653
|
+
>>> for res_id, res_name, dihedrals in zip(res_ids, res_names, dihedrals):
|
|
654
|
+
... print(f"{res_name.capitalize()}{res_id:<2d}:", dihedrals)
|
|
655
|
+
Asn1 : [-1.180 -0.066 nan nan]
|
|
656
|
+
Leu2 : [0.923 1.866 nan nan]
|
|
657
|
+
Tyr3 : [-2.593 -1.487 nan nan]
|
|
658
|
+
Ile4 : [-0.781 -0.972 nan nan]
|
|
659
|
+
Gln5 : [-2.557 1.410 -1.776 nan]
|
|
660
|
+
Trp6 : [3.117 1.372 nan nan]
|
|
661
|
+
Leu7 : [-1.33 3.08 nan nan]
|
|
662
|
+
Lys8 : [ 1.320 1.734 3.076 -2.022]
|
|
663
|
+
Asp9 : [-1.623 0.909 nan nan]
|
|
664
|
+
Gly10: [nan nan nan nan]
|
|
665
|
+
Gly11: [nan nan nan nan]
|
|
666
|
+
Pro12: [-0.331 0.539 nan nan]
|
|
667
|
+
Ser13: [-1.067 nan nan nan]
|
|
668
|
+
Ser14: [-2.514 nan nan nan]
|
|
669
|
+
Gly15: [nan nan nan nan]
|
|
670
|
+
Arg16: [ 1.032 -3.063 1.541 -1.568]
|
|
671
|
+
Pro17: [ 0.522 -0.601 nan nan]
|
|
672
|
+
Pro18: [ 0.475 -0.577 nan nan]
|
|
673
|
+
Pro19: [ 0.561 -0.602 nan nan]
|
|
674
|
+
Ser20: [-1.055 nan nan nan]
|
|
675
|
+
"""
|
|
676
|
+
is_multi_model = isinstance(atoms, AtomArrayStack)
|
|
677
|
+
|
|
678
|
+
chi_atoms = _all_chi_atoms()
|
|
679
|
+
res_names = atoms.res_name[get_residue_starts(atoms)]
|
|
680
|
+
chi_atom_coord = coord_for_atom_name_per_residue(
|
|
681
|
+
atoms, chi_atoms, filter_canonical_amino_acids(atoms)
|
|
682
|
+
)
|
|
683
|
+
chi_atoms_to_coord_index = {atom_name: i for i, atom_name in enumerate(chi_atoms)}
|
|
684
|
+
|
|
685
|
+
if is_multi_model:
|
|
686
|
+
shape = (atoms.stack_depth(), len(res_names), 4)
|
|
687
|
+
else:
|
|
688
|
+
shape = (len(res_names), 4)
|
|
689
|
+
chi_angles = np.full(shape, np.nan, dtype=np.float32)
|
|
690
|
+
for res_name, chi_atom_names_for_all_angles in _CHI_ATOMS.items():
|
|
691
|
+
res_mask = res_names == res_name
|
|
692
|
+
for chi_i, chi_atom_names in enumerate(chi_atom_names_for_all_angles):
|
|
693
|
+
dihedrals = dihedral(
|
|
694
|
+
chi_atom_coord[
|
|
695
|
+
chi_atoms_to_coord_index[chi_atom_names[0]], ..., res_mask, :
|
|
696
|
+
],
|
|
697
|
+
chi_atom_coord[
|
|
698
|
+
chi_atoms_to_coord_index[chi_atom_names[1]], ..., res_mask, :
|
|
699
|
+
],
|
|
700
|
+
chi_atom_coord[
|
|
701
|
+
chi_atoms_to_coord_index[chi_atom_names[2]], ..., res_mask, :
|
|
702
|
+
],
|
|
703
|
+
chi_atom_coord[
|
|
704
|
+
chi_atoms_to_coord_index[chi_atom_names[3]], ..., res_mask, :
|
|
705
|
+
],
|
|
706
|
+
)
|
|
707
|
+
if is_multi_model:
|
|
708
|
+
# Swap dimensions due to NumPy's behavior when using advanced indexing
|
|
709
|
+
# (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing)
|
|
710
|
+
dihedrals = dihedrals.T
|
|
711
|
+
chi_angles[..., res_mask, chi_i] = dihedrals
|
|
712
|
+
return chi_angles
|
|
713
|
+
|
|
714
|
+
|
|
565
715
|
def centroid(atoms):
|
|
566
716
|
"""
|
|
567
717
|
Measure the centroid of a structure.
|
|
@@ -653,3 +803,15 @@ def _displacement_triclinic_box(fractions, box, disp):
|
|
|
653
803
|
disp[:] = shifted_diffs[
|
|
654
804
|
np.arange(len(shifted_diffs)), np.argmin(sq_distance, axis=1)
|
|
655
805
|
]
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
@functools.cache
|
|
809
|
+
def _all_chi_atoms():
|
|
810
|
+
"""
|
|
811
|
+
Get the names of the atoms participating in any chi angle.
|
|
812
|
+
"""
|
|
813
|
+
atom_names = set()
|
|
814
|
+
for angles in _CHI_ATOMS.values():
|
|
815
|
+
for angle in angles:
|
|
816
|
+
atom_names.update(angle)
|
|
817
|
+
return sorted(atom_names)
|
biotite/structure/info/atoms.py
CHANGED
|
@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["residue"]
|
|
8
8
|
|
|
9
|
+
import functools
|
|
9
10
|
from biotite.structure.info.ccd import get_ccd
|
|
10
11
|
|
|
11
12
|
# fmt: off
|
|
@@ -75,6 +76,13 @@ def residue(res_name, allow_missing_coord=False):
|
|
|
75
76
|
['CB' 'HB3']
|
|
76
77
|
['OXT' 'HXT']]
|
|
77
78
|
"""
|
|
79
|
+
# Use a cache internally, but always return a copy,
|
|
80
|
+
# as the returned AtomArray is mutable
|
|
81
|
+
return _residue(res_name, allow_missing_coord).copy()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@functools.lru_cache(maxsize=100)
|
|
85
|
+
def _residue(res_name, allow_missing_coord=False):
|
|
78
86
|
# Avoid circular import
|
|
79
87
|
from biotite.structure.io.pdbx import get_component
|
|
80
88
|
|
|
Binary file
|
biotite/structure/io/pdb/file.py
CHANGED
|
@@ -6,12 +6,16 @@ __name__ = "biotite.structure.io.pdb"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers"
|
|
7
7
|
__all__ = ["PDBFile"]
|
|
8
8
|
|
|
9
|
+
import itertools
|
|
9
10
|
import warnings
|
|
10
11
|
from collections import namedtuple
|
|
11
12
|
import numpy as np
|
|
12
13
|
from biotite.file import InvalidFileError, TextFile
|
|
13
14
|
from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
|
|
14
|
-
from biotite.structure.bonds import
|
|
15
|
+
from biotite.structure.bonds import (
|
|
16
|
+
BondList,
|
|
17
|
+
connect_via_residue_names,
|
|
18
|
+
)
|
|
15
19
|
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
16
20
|
from biotite.structure.error import BadStructureError
|
|
17
21
|
from biotite.structure.filter import (
|
|
@@ -19,6 +23,7 @@ from biotite.structure.filter import (
|
|
|
19
23
|
filter_highest_occupancy_altloc,
|
|
20
24
|
filter_solvent,
|
|
21
25
|
)
|
|
26
|
+
from biotite.structure.info.bonds import bonds_in_residue
|
|
22
27
|
from biotite.structure.io.pdb.hybrid36 import (
|
|
23
28
|
decode_hybrid36,
|
|
24
29
|
encode_hybrid36,
|
|
@@ -544,7 +549,16 @@ class PDBFile(TextFile):
|
|
|
544
549
|
# Read bonds
|
|
545
550
|
if include_bonds:
|
|
546
551
|
bond_list = self._get_bonds(atom_id)
|
|
547
|
-
|
|
552
|
+
# Create bond dict containing only non-hetero residues (+ water)
|
|
553
|
+
custom_bond_dict = {
|
|
554
|
+
res_name: bonds_in_residue(res_name)
|
|
555
|
+
for res_name in itertools.chain(
|
|
556
|
+
np.unique(array[..., ~array.hetero].res_name), ["HOH"]
|
|
557
|
+
)
|
|
558
|
+
}
|
|
559
|
+
bond_list = bond_list.merge(
|
|
560
|
+
connect_via_residue_names(array, custom_bond_dict=custom_bond_dict)
|
|
561
|
+
)
|
|
548
562
|
array.bonds = bond_list
|
|
549
563
|
|
|
550
564
|
return array
|
|
Binary file
|
|
@@ -292,7 +292,7 @@ class BinaryCIFColumn(_Component):
|
|
|
292
292
|
else:
|
|
293
293
|
# Array needs to be converted, but masked values are
|
|
294
294
|
# not necessarily convertible
|
|
295
|
-
# (e.g. '' cannot be converted to int)
|
|
295
|
+
# (e.g. '.' cannot be converted to int)
|
|
296
296
|
if masked_value is None:
|
|
297
297
|
array = np.zeros(len(self._data), dtype=dtype)
|
|
298
298
|
else:
|
biotite/structure/io/pdbx/cif.py
CHANGED
|
@@ -243,7 +243,7 @@ class CIFColumn:
|
|
|
243
243
|
else:
|
|
244
244
|
# Array needs to be converted, but masked values are
|
|
245
245
|
# not necessarily convertible
|
|
246
|
-
# (e.g. '' cannot be converted to int)
|
|
246
|
+
# (e.g. '.' cannot be converted to int)
|
|
247
247
|
if masked_value is None:
|
|
248
248
|
array = np.zeros(len(self._data), dtype=dtype)
|
|
249
249
|
else:
|
|
@@ -140,8 +140,8 @@ def _compress_data(bcif_data, rtol, atol):
|
|
|
140
140
|
# Run encode to initialize the data and offset arrays
|
|
141
141
|
indices = encoding.encode(array)
|
|
142
142
|
offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
|
|
143
|
-
encoding.data_encoding
|
|
144
|
-
encoding.offset_encoding
|
|
143
|
+
encoding.data_encoding = _find_best_integer_compression(indices)
|
|
144
|
+
encoding.offset_encoding = _find_best_integer_compression(offsets)
|
|
145
145
|
return bcif.BinaryCIFData(array, [encoding])
|
|
146
146
|
|
|
147
147
|
elif np.issubdtype(array.dtype, np.floating):
|
|
@@ -159,18 +159,22 @@ def _compress_data(bcif_data, rtol, atol):
|
|
|
159
159
|
# -> do not use integer encoding
|
|
160
160
|
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
161
161
|
else:
|
|
162
|
-
best_encoding
|
|
163
|
-
|
|
162
|
+
best_encoding = _find_best_integer_compression(integer_array)
|
|
163
|
+
compressed_data = bcif.BinaryCIFData(
|
|
164
|
+
array, [to_integer_encoding] + best_encoding
|
|
164
165
|
)
|
|
165
|
-
|
|
166
|
-
|
|
166
|
+
uncompressed_data = bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
167
|
+
if _data_size_in_file(compressed_data) < _data_size_in_file(
|
|
168
|
+
uncompressed_data
|
|
169
|
+
):
|
|
170
|
+
return compressed_data
|
|
167
171
|
else:
|
|
168
172
|
# The float array is smaller -> encode it directly as bytes
|
|
169
|
-
return
|
|
173
|
+
return uncompressed_data
|
|
170
174
|
|
|
171
175
|
elif np.issubdtype(array.dtype, np.integer):
|
|
172
176
|
array = _to_smallest_integer_type(array)
|
|
173
|
-
encodings
|
|
177
|
+
encodings = _find_best_integer_compression(array)
|
|
174
178
|
return bcif.BinaryCIFData(array, encodings)
|
|
175
179
|
|
|
176
180
|
else:
|
|
@@ -233,7 +237,7 @@ def _find_best_integer_compression(array):
|
|
|
233
237
|
if size < smallest_size:
|
|
234
238
|
best_encoding_sequence = encodings
|
|
235
239
|
smallest_size = size
|
|
236
|
-
return best_encoding_sequence
|
|
240
|
+
return best_encoding_sequence
|
|
237
241
|
|
|
238
242
|
|
|
239
243
|
def _estimate_packed_length(array, packed_byte_count):
|