biotite 1.4.0__cp312-cp312-win_amd64.whl → 1.5.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (46) hide show
  1. biotite/application/dssp/app.py +63 -6
  2. biotite/database/afdb/download.py +12 -6
  3. biotite/database/rcsb/download.py +1 -0
  4. biotite/database/rcsb/query.py +2 -2
  5. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  6. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  7. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  8. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  9. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  10. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  11. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  12. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  13. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  14. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  15. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  16. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  17. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  18. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  19. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  20. biotite/structure/atoms.py +1 -1
  21. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  22. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  23. biotite/structure/chains.py +34 -0
  24. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  25. biotite/structure/filter.py +2 -1
  26. biotite/structure/geometry.py +164 -2
  27. biotite/structure/info/atoms.py +8 -0
  28. biotite/structure/info/components.bcif +0 -0
  29. biotite/structure/io/pdb/convert.py +1 -0
  30. biotite/structure/io/pdb/file.py +16 -2
  31. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  32. biotite/structure/io/pdbx/bcif.py +1 -1
  33. biotite/structure/io/pdbx/cif.py +1 -1
  34. biotite/structure/io/pdbx/compress.py +13 -9
  35. biotite/structure/io/pdbx/convert.py +17 -6
  36. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  37. biotite/structure/io/pdbx/encoding.pyx +39 -8
  38. biotite/structure/residues.py +173 -1
  39. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  40. biotite/structure/segments.py +39 -3
  41. biotite/structure/util.py +14 -22
  42. biotite/version.py +16 -3
  43. {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/METADATA +1 -1
  44. {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/RECORD +46 -46
  45. {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/WHEEL +0 -0
  46. {biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -11,8 +11,13 @@ from tempfile import NamedTemporaryFile
11
11
  import numpy as np
12
12
  from biotite.application.application import AppState, requires_state
13
13
  from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
14
- from biotite.structure.io.pdbx.cif import CIFFile
14
+ from biotite.structure.error import BadStructureError
15
+ from biotite.structure.filter import filter_amino_acids
16
+ from biotite.structure.io.pdbx.cif import CIFCategory, CIFColumn, CIFFile
17
+ from biotite.structure.io.pdbx.component import MaskValue
15
18
  from biotite.structure.io.pdbx.convert import set_structure
19
+ from biotite.structure.repair import create_continuous_res_ids
20
+ from biotite.structure.residues import get_residue_starts
16
21
 
17
22
 
18
23
  class DsspApp(LocalApp):
@@ -49,17 +54,19 @@ class DsspApp(LocalApp):
49
54
  >>> app.start()
50
55
  >>> app.join()
51
56
  >>> print(app.get_sse())
52
- ['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'C' 'C'
53
- 'C' 'C']
57
+ ['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'P' 'P'
58
+ 'P' 'C']
54
59
  """
55
60
 
56
61
  def __init__(self, atom_array, bin_path="mkdssp"):
57
62
  super().__init__(bin_path)
58
63
 
59
- # mkdssp requires also the
60
- # 'occupancy', 'b_factor' and 'charge' fields
61
- # -> Add these annotations to a copy of the input structure
64
+ if not np.all(filter_amino_acids(atom_array)):
65
+ raise BadStructureError("The input structure must contain only amino acids")
62
66
  self._array = atom_array.copy()
67
+ # DSSP requires also the
68
+ # 'occupancy', 'b_factor' and 'charge' fields
69
+ # -> Add these placeholder values
63
70
  categories = self._array.get_annotation_categories()
64
71
  if "charge" not in categories:
65
72
  self._array.set_annotation(
@@ -73,6 +80,10 @@ class DsspApp(LocalApp):
73
80
  self._array.set_annotation(
74
81
  "occupancy", np.ones(self._array.array_length(), dtype=float)
75
82
  )
83
+ # DSSP>=4 complains about the `pdbx_poly_seq_scheme` category,
84
+ # if `seq_id` does not start at 1
85
+ self._array.res_id = create_continuous_res_ids(self._array)
86
+
76
87
  try:
77
88
  # The parameters have changed in version 4
78
89
  self._new_cli = get_version(bin_path)[0] >= 4
@@ -86,6 +97,9 @@ class DsspApp(LocalApp):
86
97
  def run(self):
87
98
  in_file = CIFFile()
88
99
  set_structure(in_file, self._array)
100
+ in_file.block["pdbx_poly_seq_scheme"] = _create_pdbx_poly_seq_scheme(
101
+ self._array, in_file.block["atom_site"]["label_entity_id"].as_array(str)
102
+ )
89
103
  in_file.write(self._in_file)
90
104
  self._in_file.flush()
91
105
  if self._new_cli:
@@ -157,3 +171,46 @@ class DsspApp(LocalApp):
157
171
  app.start()
158
172
  app.join()
159
173
  return app.get_sse()
174
+
175
+
176
+ def _create_pdbx_poly_seq_scheme(atom_array, entity_ids):
177
+ """
178
+ Create the ``pdbx_poly_seq_scheme`` category, as required by DSSP.
179
+
180
+ Parameters
181
+ ----------
182
+ atom_array : AtomArray
183
+ The atom array to create the category from.
184
+ entity_ids : ndarray, dtype=str
185
+ The entity IDs for each atoms.
186
+
187
+ Returns
188
+ -------
189
+ pdbx_poly_seq_scheme : CIFCategory
190
+ The ``pdbx_poly_seq_scheme`` category.
191
+ """
192
+ res_start_indices = get_residue_starts(atom_array)
193
+ chain_id = atom_array.chain_id[res_start_indices]
194
+ res_name = atom_array.res_name[res_start_indices]
195
+ res_id = atom_array.res_id[res_start_indices]
196
+ ins_code = atom_array.ins_code[res_start_indices]
197
+ hetero = atom_array.hetero[res_start_indices]
198
+ entity_id = entity_ids[res_start_indices]
199
+
200
+ poly_seq_scheme = CIFCategory()
201
+ poly_seq_scheme["asym_id"] = chain_id
202
+ poly_seq_scheme["entity_id"] = entity_id
203
+ poly_seq_scheme["seq_id"] = res_id
204
+ poly_seq_scheme["mon_id"] = res_name
205
+ poly_seq_scheme["ndb_seq_num"] = res_id
206
+ poly_seq_scheme["pdb_seq_num"] = res_id
207
+ poly_seq_scheme["auth_seq_num"] = res_id
208
+ poly_seq_scheme["pdb_mon_id"] = res_name
209
+ poly_seq_scheme["auth_mon_id"] = res_name
210
+ poly_seq_scheme["pdb_strand_id"] = chain_id
211
+ poly_seq_scheme["pdb_ins_code"] = CIFColumn(
212
+ ins_code, np.where(ins_code == "", MaskValue.MISSING, MaskValue.PRESENT)
213
+ )
214
+ poly_seq_scheme["hetero"] = np.where(hetero, "y", "n")
215
+
216
+ return poly_seq_scheme
@@ -16,8 +16,11 @@ from biotite.database.error import RequestError
16
16
  _METADATA_URL = "https://alphafold.com/api/prediction"
17
17
  _BINARY_FORMATS = ["bcif"]
18
18
  # Adopted from https://www.uniprot.org/help/accession_numbers
19
+ # adding the optional 'AF-' prefix and '-F1' suffix used by RCSB
19
20
  _UNIPROT_PATTERN = (
20
- "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
21
+ r"^(?P<prefix>(AF-)|(AF_AF))?"
22
+ r"(?P<id>[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
23
+ r"(?P<suffix>-?F1)?$"
21
24
  )
22
25
 
23
26
 
@@ -31,8 +34,8 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
31
34
  ----------
32
35
  ids : str or iterable object of str
33
36
  A single ID or a list of IDs of the file(s) to be downloaded.
34
- They can be either UniProt IDs (e.g. ``P12345``) or AlphaFold DB IDs
35
- (e.g. ``AF-P12345F1``).
37
+ They can be either UniProt IDs (e.g. ``P12345``), AlphaFold DB IDs
38
+ (e.g. ``AF-P12345-F1``) or computational RCSB IDs (e.g. ``AF_AFP12345F1``).
36
39
  format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
37
40
  The format of the files to be downloaded.
38
41
  target_path : str, optional
@@ -142,7 +145,10 @@ def _get_file_url(id, format):
142
145
  The URL of the file to be downloaded.
143
146
  """
144
147
  uniprot_id = _extract_id(id)
145
- metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
148
+ try:
149
+ metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
150
+ except requests.exceptions.JSONDecodeError:
151
+ raise RequestError("Received malformed JSON response")
146
152
  if len(metadata) == 0:
147
153
  raise RequestError(f"ID {id} is invalid")
148
154
  # A list of length 1 is always returned, if the response is valid
@@ -167,10 +173,10 @@ def _extract_id(id):
167
173
  uniprot_id : str
168
174
  The UniProt ID.
169
175
  """
170
- match = re.search(_UNIPROT_PATTERN, id)
176
+ match = re.match(_UNIPROT_PATTERN, id)
171
177
  if match is None:
172
178
  raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
173
- return match.group()
179
+ return match.group("id")
174
180
 
175
181
 
176
182
  def _assert_valid_file(response, id):
@@ -155,6 +155,7 @@ def _assert_valid_file(response_text, pdb_id):
155
155
  "<title>PDB Archive over AWS</title>",
156
156
  "No fasta files were found.",
157
157
  "No valid PDB IDs were submitted.",
158
+ "The requested URL was incorrect, too long or otherwise malformed.",
158
159
  ]
159
160
  ):
160
161
  raise RequestError("PDB ID {:} is invalid".format(pdb_id))
@@ -74,7 +74,7 @@ class SingleQuery(Query, metaclass=abc.ABCMeta):
74
74
  A terminal query node for the RCSB search API.
75
75
 
76
76
  Multiple :class:`SingleQuery` objects can be combined to
77
- :class:`CompositeQuery`objects using the ``|`` and ``&`` operators.
77
+ :class:`CompositeQuery` objects using the ``|`` and ``&`` operators.
78
78
 
79
79
  This is the abstract base class for all queries that are
80
80
  terminal nodes.
@@ -783,7 +783,7 @@ def search(
783
783
  The type of the returned identifiers:
784
784
 
785
785
  - ``'entry'``: Only the PDB ID is returned (e.g. ``'XXXX'``).
786
- These can be used directly a input to :func:`fetch()`.
786
+ These can be used directly as input to :func:`fetch()`.
787
787
  - ``'assembly'``: The PDB ID appended with assembly ID is
788
788
  returned (e.g. ``'XXXX-1'``).
789
789
  - ``'polymer_entity'``: The PDB ID appended with entity ID of
Binary file
@@ -1554,7 +1554,7 @@ def coord(item):
1554
1554
  Atom coordinates.
1555
1555
  """
1556
1556
 
1557
- if type(item) in (Atom, AtomArray, AtomArrayStack):
1557
+ if isinstance(item, (Atom, _AtomArrayBase)):
1558
1558
  return item.coord
1559
1559
  elif isinstance(item, np.ndarray):
1560
1560
  return item.astype(np.float32, copy=False)
Binary file
@@ -16,6 +16,7 @@ __all__ = [
16
16
  "get_chain_masks",
17
17
  "get_chain_starts_for",
18
18
  "get_chain_positions",
19
+ "get_all_chain_positions",
19
20
  "chain_iter",
20
21
  "get_chains",
21
22
  "get_chain_count",
@@ -24,6 +25,7 @@ __all__ = [
24
25
 
25
26
  from biotite.structure.segments import (
26
27
  apply_segment_wise,
28
+ get_all_segment_positions,
27
29
  get_segment_masks,
28
30
  get_segment_positions,
29
31
  get_segment_starts,
@@ -212,11 +214,43 @@ def get_chain_positions(array, indices):
212
214
  -------
213
215
  start_indices : ndarray, dtype=int, shape=(k,)
214
216
  The indices that point to the position of the chains.
217
+
218
+ See Also
219
+ --------
220
+ get_all_chain_positions :
221
+ Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
215
222
  """
216
223
  starts = get_chain_starts(array, add_exclusive_stop=True)
217
224
  return get_segment_positions(starts, indices)
218
225
 
219
226
 
227
+ def get_all_chain_positions(array):
228
+ """
229
+ For each atom, obtain the position of the chain
230
+ corresponding to this atom in the input `array`.
231
+
232
+ For example, the position of the first chain in the atom array is
233
+ ``0``, the the position of the second chain is ``1``, etc.
234
+
235
+ Parameters
236
+ ----------
237
+ array : AtomArray or AtomArrayStack
238
+ The atom array (stack) to determine the chains from.
239
+
240
+ Returns
241
+ -------
242
+ chain_indices : ndarray, dtype=int, shape=(k,)
243
+ The indices that point to the position of the chains.
244
+
245
+ See Also
246
+ --------
247
+ get_chain_positions :
248
+ Similar to this function, but for a given subset of atom indices.
249
+ """
250
+ starts = get_chain_starts(array, add_exclusive_stop=True)
251
+ return get_all_segment_positions(starts, array.array_length())
252
+
253
+
220
254
  def get_chains(array):
221
255
  """
222
256
  Get the chain IDs of an atom array (stack).
@@ -63,7 +63,8 @@ _canonical_aa_list = [
63
63
  ]
64
64
  _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
65
65
 
66
- _solvent_list = ["HOH", "SOL"]
66
+ # Residue names of solvent molecules non only in CCD, but also from modeling software
67
+ _solvent_list = ["HOH", "DOD", "SOL", "WAT", "H2O", "TIP3", "TIP4", "TIP5"]
67
68
 
68
69
  _peptide_backbone_atoms = ["N", "CA", "C"]
69
70
  _phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
@@ -19,19 +19,79 @@ __all__ = [
19
19
  "dihedral",
20
20
  "index_dihedral",
21
21
  "dihedral_backbone",
22
+ "dihedral_side_chain",
22
23
  "centroid",
23
24
  ]
24
25
 
26
+ import functools
25
27
  import numpy as np
26
28
  from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
27
29
  from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
28
- from biotite.structure.filter import filter_amino_acids
30
+ from biotite.structure.filter import filter_amino_acids, filter_canonical_amino_acids
31
+ from biotite.structure.residues import get_residue_starts
29
32
  from biotite.structure.util import (
30
33
  coord_for_atom_name_per_residue,
31
34
  norm_vector,
32
35
  vector_dot,
33
36
  )
34
37
 
38
+ # The names of the atoms participating in chi angle
39
+ _CHI_ATOMS = {
40
+ "ARG": [
41
+ ("N", "CA", "CB", "CG"),
42
+ ("CA", "CB", "CG", "CD"),
43
+ ("CB", "CG", "CD", "NE"),
44
+ ("CG", "CD", "NE", "CZ"),
45
+ ],
46
+ "LEU": [
47
+ ("N", "CA", "CB", "CG"),
48
+ # By convention chi2 is defined using CD1 instead of CD2
49
+ ("CA", "CB", "CG", "CD1"),
50
+ ],
51
+ "VAL": [("N", "CA", "CB", "CG1")],
52
+ "ILE": [("N", "CA", "CB", "CG1"), ("CA", "CB", "CG1", "CD1")],
53
+ "MET": [
54
+ ("N", "CA", "CB", "CG"),
55
+ ("CA", "CB", "CG", "SD"),
56
+ ("CB", "CG", "SD", "CE"),
57
+ ],
58
+ "LYS": [
59
+ ("N", "CA", "CB", "CG"),
60
+ ("CA", "CB", "CG", "CD"),
61
+ ("CB", "CG", "CD", "CE"),
62
+ ("CG", "CD", "CE", "NZ"),
63
+ ],
64
+ "PHE": [
65
+ ("N", "CA", "CB", "CG"),
66
+ ("CA", "CB", "CG", "CD1"),
67
+ ],
68
+ "TRP": [
69
+ ("N", "CA", "CB", "CG"),
70
+ ("CA", "CB", "CG", "CD1"),
71
+ ],
72
+ "TYR": [
73
+ ("N", "CA", "CB", "CG"),
74
+ ("CA", "CB", "CG", "CD1"),
75
+ ],
76
+ "ASN": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")],
77
+ "GLN": [
78
+ ("N", "CA", "CB", "CG"),
79
+ ("CA", "CB", "CG", "CD"),
80
+ ("CB", "CG", "CD", "OE1"),
81
+ ],
82
+ "ASP": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")],
83
+ "GLU": [
84
+ ("N", "CA", "CB", "CG"),
85
+ ("CA", "CB", "CG", "CD"),
86
+ ("CB", "CG", "CD", "OE1"),
87
+ ],
88
+ "CYS": [("N", "CA", "CB", "SG")],
89
+ "HIS": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "ND1")],
90
+ "PRO": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "CD")],
91
+ "SER": [("N", "CA", "CB", "OG")],
92
+ "THR": [("N", "CA", "CB", "OG1")],
93
+ }
94
+
35
95
 
36
96
  def displacement(atoms1, atoms2, box=None):
37
97
  """
@@ -492,7 +552,7 @@ def dihedral_backbone(atom_array):
492
552
 
493
553
  Returns
494
554
  -------
495
- phi, psi, omega : ndarray
555
+ phi, psi, omega : ndarray, shape=(m,n) or shape=(n,), dtype=float
496
556
  An array containing the 3 backbone dihedral angles for every CA atom.
497
557
  `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
498
558
  C-terminus.
@@ -562,6 +622,96 @@ def dihedral_backbone(atom_array):
562
622
  return phi, psi, omg
563
623
 
564
624
 
625
+ def dihedral_side_chain(atoms):
626
+ r"""
627
+ Measure the side chain :math:`\chi` dihedral angles of amino acid residues.
628
+
629
+ Parameters
630
+ ----------
631
+ atoms : AtomArray or AtomArrayStack
632
+ The protein structure to measure the side chain dihedral angles for.
633
+
634
+ Returns
635
+ -------
636
+ chi : ndarray, shape=(m, n, 4) or shape=(n, 4), dtype=float
637
+ An array containing the up to four side chain dihedral angles for every
638
+ amino acid residue.
639
+ Trailing :math:`\chi` angles that are not defined for an amino acid are filled
640
+ with :math:`NaN` values.
641
+ The same is True for all residues that are not canonical amino acids.
642
+
643
+ Notes
644
+ -----
645
+ By convention, the :math:`\chi_2` angle of leucine is defined using ``CD1``
646
+ instead of ``CD2``.
647
+
648
+ Examples
649
+ --------
650
+
651
+ >>> res_ids, res_names = get_residues(atom_array)
652
+ >>> dihedrals = dihedral_side_chain(atom_array)
653
+ >>> for res_id, res_name, dihedrals in zip(res_ids, res_names, dihedrals):
654
+ ... print(f"{res_name.capitalize()}{res_id:<2d}:", dihedrals)
655
+ Asn1 : [-1.180 -0.066 nan nan]
656
+ Leu2 : [0.923 1.866 nan nan]
657
+ Tyr3 : [-2.593 -1.487 nan nan]
658
+ Ile4 : [-0.781 -0.972 nan nan]
659
+ Gln5 : [-2.557 1.410 -1.776 nan]
660
+ Trp6 : [3.117 1.372 nan nan]
661
+ Leu7 : [-1.33 3.08 nan nan]
662
+ Lys8 : [ 1.320 1.734 3.076 -2.022]
663
+ Asp9 : [-1.623 0.909 nan nan]
664
+ Gly10: [nan nan nan nan]
665
+ Gly11: [nan nan nan nan]
666
+ Pro12: [-0.331 0.539 nan nan]
667
+ Ser13: [-1.067 nan nan nan]
668
+ Ser14: [-2.514 nan nan nan]
669
+ Gly15: [nan nan nan nan]
670
+ Arg16: [ 1.032 -3.063 1.541 -1.568]
671
+ Pro17: [ 0.522 -0.601 nan nan]
672
+ Pro18: [ 0.475 -0.577 nan nan]
673
+ Pro19: [ 0.561 -0.602 nan nan]
674
+ Ser20: [-1.055 nan nan nan]
675
+ """
676
+ is_multi_model = isinstance(atoms, AtomArrayStack)
677
+
678
+ chi_atoms = _all_chi_atoms()
679
+ res_names = atoms.res_name[get_residue_starts(atoms)]
680
+ chi_atom_coord = coord_for_atom_name_per_residue(
681
+ atoms, chi_atoms, filter_canonical_amino_acids(atoms)
682
+ )
683
+ chi_atoms_to_coord_index = {atom_name: i for i, atom_name in enumerate(chi_atoms)}
684
+
685
+ if is_multi_model:
686
+ shape = (atoms.stack_depth(), len(res_names), 4)
687
+ else:
688
+ shape = (len(res_names), 4)
689
+ chi_angles = np.full(shape, np.nan, dtype=np.float32)
690
+ for res_name, chi_atom_names_for_all_angles in _CHI_ATOMS.items():
691
+ res_mask = res_names == res_name
692
+ for chi_i, chi_atom_names in enumerate(chi_atom_names_for_all_angles):
693
+ dihedrals = dihedral(
694
+ chi_atom_coord[
695
+ chi_atoms_to_coord_index[chi_atom_names[0]], ..., res_mask, :
696
+ ],
697
+ chi_atom_coord[
698
+ chi_atoms_to_coord_index[chi_atom_names[1]], ..., res_mask, :
699
+ ],
700
+ chi_atom_coord[
701
+ chi_atoms_to_coord_index[chi_atom_names[2]], ..., res_mask, :
702
+ ],
703
+ chi_atom_coord[
704
+ chi_atoms_to_coord_index[chi_atom_names[3]], ..., res_mask, :
705
+ ],
706
+ )
707
+ if is_multi_model:
708
+ # Swap dimensions due to NumPy's behavior when using advanced indexing
709
+ # (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing)
710
+ dihedrals = dihedrals.T
711
+ chi_angles[..., res_mask, chi_i] = dihedrals
712
+ return chi_angles
713
+
714
+
565
715
  def centroid(atoms):
566
716
  """
567
717
  Measure the centroid of a structure.
@@ -653,3 +803,15 @@ def _displacement_triclinic_box(fractions, box, disp):
653
803
  disp[:] = shifted_diffs[
654
804
  np.arange(len(shifted_diffs)), np.argmin(sq_distance, axis=1)
655
805
  ]
806
+
807
+
808
+ @functools.cache
809
+ def _all_chi_atoms():
810
+ """
811
+ Get the names of the atoms participating in any chi angle.
812
+ """
813
+ atom_names = set()
814
+ for angles in _CHI_ATOMS.values():
815
+ for angle in angles:
816
+ atom_names.update(angle)
817
+ return sorted(atom_names)
@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["residue"]
8
8
 
9
+ import functools
9
10
  from biotite.structure.info.ccd import get_ccd
10
11
 
11
12
  # fmt: off
@@ -75,6 +76,13 @@ def residue(res_name, allow_missing_coord=False):
75
76
  ['CB' 'HB3']
76
77
  ['OXT' 'HXT']]
77
78
  """
79
+ # Use a cache internally, but always return a copy,
80
+ # as the returned AtomArray is mutable
81
+ return _residue(res_name, allow_missing_coord).copy()
82
+
83
+
84
+ @functools.lru_cache(maxsize=100)
85
+ def _residue(res_name, allow_missing_coord=False):
78
86
  # Avoid circular import
79
87
  from biotite.structure.io.pdbx import get_component
80
88
 
Binary file
@@ -16,6 +16,7 @@ __all__ = [
16
16
  "list_assemblies",
17
17
  "get_assembly",
18
18
  "get_unit_cell",
19
+ "get_symmetry_mates",
19
20
  ]
20
21
 
21
22
  import warnings
@@ -6,12 +6,16 @@ __name__ = "biotite.structure.io.pdb"
6
6
  __author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers"
7
7
  __all__ = ["PDBFile"]
8
8
 
9
+ import itertools
9
10
  import warnings
10
11
  from collections import namedtuple
11
12
  import numpy as np
12
13
  from biotite.file import InvalidFileError, TextFile
13
14
  from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
14
- from biotite.structure.bonds import BondList, connect_via_residue_names
15
+ from biotite.structure.bonds import (
16
+ BondList,
17
+ connect_via_residue_names,
18
+ )
15
19
  from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
16
20
  from biotite.structure.error import BadStructureError
17
21
  from biotite.structure.filter import (
@@ -19,6 +23,7 @@ from biotite.structure.filter import (
19
23
  filter_highest_occupancy_altloc,
20
24
  filter_solvent,
21
25
  )
26
+ from biotite.structure.info.bonds import bonds_in_residue
22
27
  from biotite.structure.io.pdb.hybrid36 import (
23
28
  decode_hybrid36,
24
29
  encode_hybrid36,
@@ -544,7 +549,16 @@ class PDBFile(TextFile):
544
549
  # Read bonds
545
550
  if include_bonds:
546
551
  bond_list = self._get_bonds(atom_id)
547
- bond_list = bond_list.merge(connect_via_residue_names(array))
552
+ # Create bond dict containing only non-hetero residues (+ water)
553
+ custom_bond_dict = {
554
+ res_name: bonds_in_residue(res_name)
555
+ for res_name in itertools.chain(
556
+ np.unique(array[..., ~array.hetero].res_name), ["HOH"]
557
+ )
558
+ }
559
+ bond_list = bond_list.merge(
560
+ connect_via_residue_names(array, custom_bond_dict=custom_bond_dict)
561
+ )
548
562
  array.bonds = bond_list
549
563
 
550
564
  return array
@@ -292,7 +292,7 @@ class BinaryCIFColumn(_Component):
292
292
  else:
293
293
  # Array needs to be converted, but masked values are
294
294
  # not necessarily convertible
295
- # (e.g. '' cannot be converted to int)
295
+ # (e.g. '.' cannot be converted to int)
296
296
  if masked_value is None:
297
297
  array = np.zeros(len(self._data), dtype=dtype)
298
298
  else:
@@ -243,7 +243,7 @@ class CIFColumn:
243
243
  else:
244
244
  # Array needs to be converted, but masked values are
245
245
  # not necessarily convertible
246
- # (e.g. '' cannot be converted to int)
246
+ # (e.g. '.' cannot be converted to int)
247
247
  if masked_value is None:
248
248
  array = np.zeros(len(self._data), dtype=dtype)
249
249
  else:
@@ -140,8 +140,8 @@ def _compress_data(bcif_data, rtol, atol):
140
140
  # Run encode to initialize the data and offset arrays
141
141
  indices = encoding.encode(array)
142
142
  offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
143
- encoding.data_encoding, _ = _find_best_integer_compression(indices)
144
- encoding.offset_encoding, _ = _find_best_integer_compression(offsets)
143
+ encoding.data_encoding = _find_best_integer_compression(indices)
144
+ encoding.offset_encoding = _find_best_integer_compression(offsets)
145
145
  return bcif.BinaryCIFData(array, [encoding])
146
146
 
147
147
  elif np.issubdtype(array.dtype, np.floating):
@@ -159,18 +159,22 @@ def _compress_data(bcif_data, rtol, atol):
159
159
  # -> do not use integer encoding
160
160
  return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
161
161
  else:
162
- best_encoding, size_compressed = _find_best_integer_compression(
163
- integer_array
162
+ best_encoding = _find_best_integer_compression(integer_array)
163
+ compressed_data = bcif.BinaryCIFData(
164
+ array, [to_integer_encoding] + best_encoding
164
165
  )
165
- if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
166
- return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
166
+ uncompressed_data = bcif.BinaryCIFData(array, [ByteArrayEncoding()])
167
+ if _data_size_in_file(compressed_data) < _data_size_in_file(
168
+ uncompressed_data
169
+ ):
170
+ return compressed_data
167
171
  else:
168
172
  # The float array is smaller -> encode it directly as bytes
169
- return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
173
+ return uncompressed_data
170
174
 
171
175
  elif np.issubdtype(array.dtype, np.integer):
172
176
  array = _to_smallest_integer_type(array)
173
- encodings, _ = _find_best_integer_compression(array)
177
+ encodings = _find_best_integer_compression(array)
174
178
  return bcif.BinaryCIFData(array, encodings)
175
179
 
176
180
  else:
@@ -233,7 +237,7 @@ def _find_best_integer_compression(array):
233
237
  if size < smallest_size:
234
238
  best_encoding_sequence = encodings
235
239
  smallest_size = size
236
- return best_encoding_sequence, smallest_size
240
+ return best_encoding_sequence
237
241
 
238
242
 
239
243
  def _estimate_packed_length(array, packed_byte_count):