biotite 1.2.0__cp311-cp311-macosx_11_0_arm64.whl → 1.3.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (56) hide show
  1. biotite/application/viennarna/rnaplot.py +7 -7
  2. biotite/interface/openmm/__init__.py +4 -0
  3. biotite/interface/pymol/__init__.py +3 -0
  4. biotite/interface/rdkit/__init__.py +4 -0
  5. biotite/interface/version.py +23 -0
  6. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  7. biotite/sequence/align/banded.pyx +1 -1
  8. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  9. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  10. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  11. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  12. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  13. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  14. biotite/sequence/align/multiple.pyx +1 -2
  15. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  16. biotite/sequence/align/pairwise.pyx +2 -4
  17. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  18. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  19. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  20. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  21. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  22. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  23. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  24. biotite/structure/basepairs.py +13 -14
  25. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  26. biotite/structure/box.py +140 -2
  27. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  28. biotite/structure/celllist.pyx +0 -1
  29. biotite/structure/chains.py +15 -21
  30. biotite/structure/charges.cpython-311-darwin.so +0 -0
  31. biotite/structure/dotbracket.py +4 -4
  32. biotite/structure/graphics/rna.py +19 -16
  33. biotite/structure/hbond.py +1 -2
  34. biotite/structure/info/components.bcif +0 -0
  35. biotite/structure/io/pdb/convert.py +84 -2
  36. biotite/structure/io/pdb/file.py +79 -2
  37. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  38. biotite/structure/io/pdbx/compress.py +69 -32
  39. biotite/structure/io/pdbx/convert.py +207 -44
  40. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  41. biotite/structure/io/pdbx/encoding.pyx +39 -23
  42. biotite/structure/pseudoknots.py +6 -6
  43. biotite/structure/residues.py +10 -27
  44. biotite/structure/rings.py +1 -1
  45. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  46. biotite/structure/sasa.pyx +28 -29
  47. biotite/structure/segments.py +55 -0
  48. biotite/structure/spacegroups.json +1567 -0
  49. biotite/structure/spacegroups.license +26 -0
  50. biotite/structure/superimpose.py +1 -191
  51. biotite/structure/transform.py +220 -1
  52. biotite/version.py +2 -2
  53. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/METADATA +4 -34
  54. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/RECORD +56 -54
  55. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +3 -1
  56. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -13,17 +13,30 @@ __all__ = [
13
13
  "set_component",
14
14
  "list_assemblies",
15
15
  "get_assembly",
16
+ "get_unit_cell",
16
17
  "get_sse",
17
18
  ]
18
19
 
19
20
  import itertools
20
21
  import warnings
22
+ from collections import defaultdict
21
23
  import numpy as np
22
24
  from biotite.file import InvalidFileError
23
25
  from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
24
- from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
26
+ from biotite.structure.atoms import (
27
+ AtomArray,
28
+ AtomArrayStack,
29
+ concatenate,
30
+ repeat,
31
+ )
25
32
  from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
26
- from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
33
+ from biotite.structure.box import (
34
+ coord_to_fraction,
35
+ fraction_to_coord,
36
+ space_group_transforms,
37
+ unitcell_from_vectors,
38
+ vectors_from_unitcell,
39
+ )
27
40
  from biotite.structure.error import BadStructureError
28
41
  from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
29
42
  from biotite.structure.filter import (
@@ -33,6 +46,7 @@ from biotite.structure.filter import (
33
46
  filter_first_altloc,
34
47
  filter_highest_occupancy_altloc,
35
48
  )
49
+ from biotite.structure.geometry import centroid
36
50
  from biotite.structure.io.pdbx.bcif import (
37
51
  BinaryCIFBlock,
38
52
  BinaryCIFColumn,
@@ -46,7 +60,7 @@ from biotite.structure.residues import (
46
60
  get_residue_positions,
47
61
  get_residue_starts_for,
48
62
  )
49
- from biotite.structure.util import matrix_rotate
63
+ from biotite.structure.transform import AffineTransformation
50
64
 
51
65
  # Bond types in `struct_conn` category that refer to covalent bonds
52
66
  PDBX_BOND_TYPE_ID_TO_TYPE = {
@@ -125,8 +139,7 @@ _other_type_list = [
125
139
 
126
140
  def _filter(category, index):
127
141
  """
128
- Reduce the ``atom_site`` category to the values for the given
129
- model.
142
+ Reduce the given category to the values selected by the given index,
130
143
  """
131
144
  Category = type(category)
132
145
  Column = Category.subcomponent_class()
@@ -391,7 +404,16 @@ def get_structure(
391
404
 
392
405
  # The below part is the same for both, AtomArray and AtomArrayStack
393
406
  _fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
407
+
408
+ atoms, altloc_filtered_atom_site = _filter_altloc(atoms, model_atom_site, altloc)
409
+
394
410
  if include_bonds:
411
+ if altloc == "all":
412
+ raise ValueError(
413
+ "Bond computation is not supported with `altloc='all', consider using "
414
+ "'connect_via_residue_names()' afterwards"
415
+ )
416
+
395
417
  if "chem_comp_bond" in block:
396
418
  try:
397
419
  custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
@@ -407,10 +429,13 @@ def get_structure(
407
429
  bonds = connect_via_residue_names(atoms)
408
430
  if "struct_conn" in block:
409
431
  bonds = bonds.merge(
410
- _parse_inter_residue_bonds(model_atom_site, block["struct_conn"])
432
+ _parse_inter_residue_bonds(
433
+ altloc_filtered_atom_site,
434
+ block["struct_conn"],
435
+ atom_count=atoms.array_length(),
436
+ )
411
437
  )
412
438
  atoms.bonds = bonds
413
- atoms = _filter_altloc(atoms, model_atom_site, altloc)
414
439
 
415
440
  return atoms
416
441
 
@@ -570,11 +595,12 @@ def _parse_intra_residue_bonds(chem_comp_bond):
570
595
  return custom_bond_dict
571
596
 
572
597
 
573
- def _parse_inter_residue_bonds(atom_site, struct_conn):
598
+ def _parse_inter_residue_bonds(atom_site, struct_conn, atom_count=None):
574
599
  """
575
600
  Create inter-residue bonds by parsing the ``struct_conn`` category.
576
601
  The atom indices of each bond are found by matching the bond labels
577
602
  to the ``atom_site`` category.
603
+ If atom_count is None, it will be inferred from the ``atom_site`` category.
578
604
  """
579
605
  # Identity symmetry operation
580
606
  IDENTITY = "1_555"
@@ -643,7 +669,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
643
669
  bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
644
670
 
645
671
  return BondList(
646
- atom_site.row_count,
672
+ atom_count if atom_count is not None else atom_site.row_count,
647
673
  np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
648
674
  )
649
675
 
@@ -739,25 +765,28 @@ def _get_struct_conn_col_name(col_name, partner):
739
765
 
740
766
 
741
767
  def _filter_altloc(array, atom_site, altloc):
768
+ """
769
+ Filter the given :class:`AtomArray` and ``atom_site`` category to the rows
770
+ specified by the given *altloc* identifier.
771
+ """
742
772
  altloc_ids = atom_site.get("label_alt_id")
743
773
  occupancy = atom_site.get("occupancy")
744
774
 
745
- # Filter altloc IDs and return
746
- if altloc_ids is None:
747
- return array
775
+ if altloc == "all":
776
+ array.set_annotation("altloc_id", altloc_ids.as_array(str))
777
+ return array, atom_site
778
+ elif altloc_ids is None or (altloc_ids.mask.array != MaskValue.PRESENT).all():
779
+ # No altlocs in atom_site category
780
+ return array, atom_site
748
781
  elif altloc == "occupancy" and occupancy is not None:
749
- return array[
750
- ...,
751
- filter_highest_occupancy_altloc(
752
- array, altloc_ids.as_array(str), occupancy.as_array(float)
753
- ),
754
- ]
782
+ mask = filter_highest_occupancy_altloc(
783
+ array, altloc_ids.as_array(str), occupancy.as_array(float)
784
+ )
785
+ return array[..., mask], _filter(atom_site, mask)
755
786
  # 'first' is also fallback if file has no occupancy information
756
787
  elif altloc == "first":
757
- return array[..., filter_first_altloc(array, altloc_ids.as_array(str))]
758
- elif altloc == "all":
759
- array.set_annotation("altloc_id", altloc_ids.as_array(str))
760
- return array
788
+ mask = filter_first_altloc(array, altloc_ids.as_array(str))
789
+ return array[..., mask], _filter(atom_site, mask)
761
790
  else:
762
791
  raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
763
792
 
@@ -1686,7 +1715,7 @@ def get_assembly(
1686
1715
  )
1687
1716
 
1688
1717
  ### Get transformations and apply them to the affected asym IDs
1689
- assembly = None
1718
+ chain_ops = defaultdict(list)
1690
1719
  for id, op_expr, asym_id_expr in zip(
1691
1720
  assembly_gen_category["assembly_id"].as_array(str),
1692
1721
  assembly_gen_category["oper_expression"].as_array(str),
@@ -1695,19 +1724,22 @@ def get_assembly(
1695
1724
  # Find the operation expressions for given assembly ID
1696
1725
  # We already asserted that the ID is actually present
1697
1726
  if id == assembly_id:
1698
- operations = _parse_operation_expression(op_expr)
1699
- asym_ids = asym_id_expr.split(",")
1700
- # Filter affected asym IDs
1701
- sub_structure = structure[..., np.isin(structure.label_asym_id, asym_ids)]
1702
- sub_assembly = _apply_transformations(
1703
- sub_structure, transformations, operations
1704
- )
1705
- # Merge the chains with asym IDs for this operation
1706
- # with chains from other operations
1707
- if assembly is None:
1708
- assembly = sub_assembly
1709
- else:
1710
- assembly += sub_assembly
1727
+ for chain_id in asym_id_expr.split(","):
1728
+ chain_ops[chain_id].extend(_parse_operation_expression(op_expr))
1729
+
1730
+ sub_assemblies = []
1731
+ for asym_id, op_list in chain_ops.items():
1732
+ sub_struct = structure[..., structure.label_asym_id == asym_id]
1733
+ sub_assembly = _apply_transformations(sub_struct, transformations, op_list)
1734
+ # Merge the chain's sub_assembly into the rest of the assembly
1735
+ sub_assemblies.append(sub_assembly)
1736
+ assembly = concatenate(sub_assemblies)
1737
+
1738
+ # Sort AtomArray or AtomArrayStack by 'sym_id'
1739
+ max_sym_id = assembly.sym_id.max()
1740
+ assembly = concatenate(
1741
+ [assembly[..., assembly.sym_id == sym_id] for sym_id in range(max_sym_id + 1)]
1742
+ )
1711
1743
 
1712
1744
  # Remove 'label_asym_id', if it was not included in the original
1713
1745
  # user-supplied 'extra_fields'
@@ -1730,11 +1762,7 @@ def _apply_transformations(structure, transformation_dict, operations):
1730
1762
  # Execute for each transformation step
1731
1763
  # in the operation expression
1732
1764
  for op_step in operation:
1733
- rotation_matrix, translation_vector = transformation_dict[op_step]
1734
- # Rotate
1735
- coord = matrix_rotate(coord, rotation_matrix)
1736
- # Translate
1737
- coord += translation_vector
1765
+ coord = transformation_dict[op_step].apply(coord)
1738
1766
  assembly_coord[i] = coord
1739
1767
 
1740
1768
  assembly = repeat(structure, assembly_coord)
@@ -1746,8 +1774,7 @@ def _apply_transformations(structure, transformation_dict, operations):
1746
1774
 
1747
1775
  def _get_transformations(struct_oper):
1748
1776
  """
1749
- Get transformation operation in terms of rotation matrix and
1750
- translation for each operation ID in ``pdbx_struct_oper_list``.
1777
+ Get affine transformation for each operation ID in ``pdbx_struct_oper_list``.
1751
1778
  """
1752
1779
  transformation_dict = {}
1753
1780
  for index, id in enumerate(struct_oper["id"].as_array(str)):
@@ -1763,7 +1790,9 @@ def _get_transformations(struct_oper):
1763
1790
  translation_vector = np.array(
1764
1791
  [struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
1765
1792
  )
1766
- transformation_dict[id] = (rotation_matrix, translation_vector)
1793
+ transformation_dict[id] = AffineTransformation(
1794
+ np.zeros(3), rotation_matrix, translation_vector
1795
+ )
1767
1796
  return transformation_dict
1768
1797
 
1769
1798
 
@@ -1820,6 +1849,140 @@ def _convert_string_to_sequence(string, stype):
1820
1849
  raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
1821
1850
 
1822
1851
 
1852
+ def get_unit_cell(
1853
+ pdbx_file,
1854
+ center=True,
1855
+ model=None,
1856
+ data_block=None,
1857
+ altloc="first",
1858
+ extra_fields=None,
1859
+ use_author_fields=True,
1860
+ include_bonds=False,
1861
+ ):
1862
+ """
1863
+ Build a structure model containing all symmetric copies of the structure within a
1864
+ single unit cell.
1865
+
1866
+ This function receives the data from the ``symmetry`` and ``atom_site`` categories
1867
+ in the file.
1868
+ Consequently, these categories must be present in the file.
1869
+
1870
+ Parameters
1871
+ ----------
1872
+ pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
1873
+ The file object.
1874
+ center : bool, optional
1875
+ If set to true, each symmetric copy will be moved inside the unit cell
1876
+ dimensions, if its centroid is outside.
1877
+ By default, the copies are are created using the raw space group
1878
+ transformations, which may put them one unit cell length further away.
1879
+ model : int, optional
1880
+ If this parameter is given, the function will return an
1881
+ :class:`AtomArray` from the atoms corresponding to the given
1882
+ model number (starting at 1).
1883
+ Negative values are used to index models starting from the last
1884
+ model insted of the first model.
1885
+ If this parameter is omitted, an :class:`AtomArrayStack`
1886
+ containing all models will be returned, even if the structure
1887
+ contains only one model.
1888
+ data_block : str, optional
1889
+ The name of the data block.
1890
+ Default is the first (and most times only) data block of the
1891
+ file.
1892
+ If the data block object is passed directly to `pdbx_file`,
1893
+ this parameter is ignored.
1894
+ altloc : {'first', 'occupancy', 'all'}
1895
+ This parameter defines how *altloc* IDs are handled:
1896
+ - ``'first'`` - Use atoms that have the first *altloc* ID
1897
+ appearing in a residue.
1898
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
1899
+ with the highest occupancy for a residue.
1900
+ - ``'all'`` - Use all atoms.
1901
+ Note that this leads to duplicate atoms.
1902
+ When this option is chosen, the ``altloc_id`` annotation
1903
+ array is added to the returned structure.
1904
+ extra_fields : list of str, optional
1905
+ The strings in the list are entry names, that are
1906
+ additionally added as annotation arrays.
1907
+ The annotation category name will be the same as the PDBx
1908
+ subcategory name.
1909
+ The array type is always `str`.
1910
+ An exception are the special field identifiers:
1911
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
1912
+ These will convert the fitting subcategory into an
1913
+ annotation array with reasonable type.
1914
+ use_author_fields : bool, optional
1915
+ Some fields can be read from two alternative sources,
1916
+ for example both, ``label_seq_id`` and ``auth_seq_id`` describe
1917
+ the ID of the residue.
1918
+ While, the ``label_xxx`` fields can be used as official pointers
1919
+ to other categories in the file, the ``auth_xxx``
1920
+ fields are set by the author(s) of the structure and are
1921
+ consistent with the corresponding values in PDB files.
1922
+ If `use_author_fields` is true, the annotation arrays will be
1923
+ read from the ``auth_xxx`` fields (if applicable),
1924
+ otherwise from the the ``label_xxx`` fields.
1925
+ include_bonds : bool, optional
1926
+ If set to true, a :class:`BondList` will be created for the
1927
+ resulting :class:`AtomArray` containing the bond information
1928
+ from the file.
1929
+ Bonds, whose order could not be determined from the
1930
+ *Chemical Component Dictionary*
1931
+ (e.g. especially inter-residue bonds),
1932
+ have :attr:`BondType.ANY`, since the PDB format itself does
1933
+ not support bond orders.
1934
+
1935
+ Returns
1936
+ -------
1937
+ unit_cell : AtomArray or AtomArrayStack
1938
+ The structure representing the unit cell.
1939
+ The return type depends on the `model` parameter.
1940
+ Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
1941
+ unit in the unit cell.
1942
+
1943
+ Examples
1944
+ --------
1945
+
1946
+ >>> import os.path
1947
+ >>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
1948
+ >>> unit_cell = get_unit_cell(file, model=1)
1949
+ """
1950
+ block = _get_block(pdbx_file, data_block)
1951
+
1952
+ try:
1953
+ space_group = block["symmetry"]["space_group_name_H-M"].as_item()
1954
+ except KeyError:
1955
+ raise InvalidFileError("File has no 'symmetry.space_group_name_H-M' field")
1956
+ transforms = space_group_transforms(space_group)
1957
+
1958
+ asym = get_structure(
1959
+ pdbx_file,
1960
+ model,
1961
+ data_block,
1962
+ altloc,
1963
+ extra_fields,
1964
+ use_author_fields,
1965
+ include_bonds,
1966
+ )
1967
+
1968
+ fractional_asym_coord = coord_to_fraction(asym.coord, asym.box)
1969
+ unit_cell_copies = []
1970
+ for transform in transforms:
1971
+ fractional_coord = transform.apply(fractional_asym_coord)
1972
+ if center:
1973
+ # If the centroid is outside the box, move the copy inside the box
1974
+ orig_centroid = centroid(fractional_coord)
1975
+ new_centroid = orig_centroid % 1
1976
+ fractional_coord += (new_centroid - orig_centroid)[..., np.newaxis, :]
1977
+ unit_cell_copies.append(fraction_to_coord(fractional_coord, asym.box))
1978
+
1979
+ unit_cell = repeat(asym, np.stack(unit_cell_copies, axis=0))
1980
+ unit_cell.set_annotation(
1981
+ "sym_id", np.repeat(np.arange(len(transforms)), asym.array_length())
1982
+ )
1983
+ return unit_cell
1984
+
1985
+
1823
1986
  def get_sse(pdbx_file, data_block=None, match_model=None):
1824
1987
  """
1825
1988
  Get the secondary structure from a PDBx file.
@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
230
230
  # since the file content may be invalid/malicious.
231
231
  raise NotImplementedError()
232
232
 
233
+ def __str__(self):
234
+ # Restore original behavior, as `__str__()` implementation of `_Component`
235
+ # may require serialization, which is not possible for some encodings prior
236
+ # to the first encoding pass
237
+ return object.__str__(self)
238
+
233
239
 
234
240
  @dataclass
235
241
  class ByteArrayEncoding(Encoding):
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
325
331
  )
326
332
 
327
333
  # Round to avoid wrong values due to floating point inaccuracies
328
- return np.round(data * self.factor).astype(np.int32)
334
+ scaled_data = np.round(data * self.factor)
335
+ return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
329
336
 
330
337
  def decode(self, data):
331
338
  return (data / self.factor).astype(
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
392
399
  self.min, self.max, self.num_steps, dtype=data.dtype
393
400
  )
394
401
  indices = np.searchsorted(steps, data, side="left")
395
- return indices.astype(np.int32, copy=False)
402
+ return _safe_cast(indices, np.int32)
396
403
 
397
404
  def decode(self, data):
398
405
  output = data * (self.max - self.min) / (self.num_steps - 1)
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
570
577
  if self.origin is None:
571
578
  self.origin = data[0]
572
579
 
580
+ # Differences (including `np.diff`) return an array with the same dtype as the
581
+ # input array
582
+ # As the input dtype may be unsigned, the output dtype could underflow,
583
+ # if the difference is negative
584
+ # -> cast to int64 to avoid this
585
+ data = data.astype(np.int64, copy=False)
573
586
  data = data - self.origin
574
- return np.diff(data, prepend=0).astype(np.int32, copy=False)
587
+ return _safe_cast(np.diff(data, prepend=0), np.int32)
575
588
 
576
589
  def decode(self, data):
577
590
  output = np.cumsum(data, dtype=self.src_type.to_dtype())
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
635
648
  # Only positive values -> use unsigned integers
636
649
  self.is_unsigned = data.min().item() >= 0
637
650
 
638
- data = data.astype(np.int32, copy=False)
651
+ data = _safe_cast(data, np.int32)
639
652
  return self._encode(
640
653
  data, np.empty(0, dtype=self._determine_packed_dtype())
641
654
  )
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
870
883
  else:
871
884
  check_present = True
872
885
 
873
- string_order = np.argsort(self.strings).astype(np.int32)
886
+ string_order = _safe_cast(np.argsort(self.strings), np.int32)
874
887
  sorted_strings = self.strings[string_order]
875
888
  sorted_indices = np.searchsorted(sorted_strings, data)
876
889
  indices = string_order[sorted_indices]
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
1010
1023
  return attribute_name[0].lower() + attribute_name[1:]
1011
1024
 
1012
1025
 
1013
- def _safe_cast(array, dtype):
1014
- dtype = np.dtype(dtype)
1015
- if dtype == array.dtype:
1026
+ def _safe_cast(array, dtype, allow_decimal_loss=False):
1027
+ source_dtype = array.dtype
1028
+ target_dtype = np.dtype(dtype)
1029
+
1030
+ if target_dtype == source_dtype:
1016
1031
  return array
1017
- if np.issubdtype(dtype, np.integer):
1018
- if not np.issubdtype(array.dtype, np.integer):
1019
- raise ValueError("Cannot cast floating point to integer")
1020
- dtype_info = np.iinfo(dtype)
1021
- if np.any(array < dtype_info.min) or np.any(array > dtype_info.max):
1022
- raise ValueError("Integer values do not fit into the given dtype")
1023
- return array.astype(dtype)
1024
-
1025
-
1026
- def _get_n_decimals(value, tolerance):
1027
- MAX_DECIMALS = 10
1028
- for n in range(MAX_DECIMALS):
1029
- if abs(value - round(value, n)) < tolerance:
1030
- return n
1031
- return MAX_DECIMALS
1032
+
1033
+ if np.issubdtype(target_dtype, np.integer):
1034
+ if np.issubdtype(source_dtype, np.floating):
1035
+ if not allow_decimal_loss:
1036
+ raise ValueError("Cannot cast floating point to integer")
1037
+ if not np.isfinite(array).all():
1038
+ raise ValueError("Data contains non-finite values")
1039
+ elif not np.issubdtype(source_dtype, np.integer):
1040
+ # Neither float, nor integer -> cannot cast
1041
+ raise ValueError(f"Cannot cast '{source_dtype}' to integer")
1042
+ dtype_info = np.iinfo(target_dtype)
1043
+ # Check if an integer underflow/overflow would occur during conversion
1044
+ if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
1045
+ raise ValueError("Values do not fit into the given dtype")
1046
+
1047
+ return array.astype(target_dtype)
@@ -148,7 +148,7 @@ class _Region:
148
148
  region_pairs : ndarray, dtype=int
149
149
  The indices of the base pairs in ``base_pairs`` that are part of
150
150
  the region.
151
- scores : ndarray, dtype=int, shape=(n,) (default: None)
151
+ scores : ndarray, dtype=int, shape=(n,)
152
152
  The score for each base pair.
153
153
  """
154
154
 
@@ -202,7 +202,7 @@ def _find_regions(base_pairs, scores):
202
202
  base_pairs : ndarray, dtype=int, shape=(n, 2)
203
203
  Each row is equivalent to one base pair and contains the first
204
204
  indices of the residues corresponding to each base.
205
- scores : ndarray, dtype=int, shape=(n,) (default: None)
205
+ scores : ndarray, dtype=int, shape=(n,)
206
206
  The score for each base pair.
207
207
 
208
208
  Returns
@@ -352,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
352
352
  return i
353
353
 
354
354
 
355
- def _get_region_array_for(regions, content=[], dtype=[]):
355
+ def _get_region_array_for(regions, content=(), dtype=()):
356
356
  """
357
357
  Get a :class:`ndarray` of region objects. Each object occurs twice,
358
358
  representing its start and end point. The regions positions in the
@@ -365,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
365
365
  ----------
366
366
  regions : set {_region, ...}
367
367
  The regions to be considered
368
- content : list [function, ...] (default: [])
368
+ content : list [function, ...]
369
369
  The functions to be considered for custom outputs. For a given
370
370
  region they must return a tuple of which the first value is
371
371
  placed at the start position and the second value at the end
372
372
  position of the region relative to the other regions.
373
- dtype : list [str, ...] (default: [])
373
+ dtype : list [str, ...]
374
374
  The data type of the output of the custom functions.
375
375
 
376
376
  Returns
@@ -554,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
554
554
  The maximum pseudoknot order to be found. If a base pair would
555
555
  be of a higher order, its order is specified as -1. If ``None``
556
556
  is given, all base pairs are evaluated.
557
- order : int (default: 0)
557
+ order : int
558
558
  The order that is currently evaluated.
559
559
 
560
560
  Returns
@@ -21,23 +21,23 @@ __all__ = [
21
21
  "residue_iter",
22
22
  ]
23
23
 
24
- import numpy as np
25
24
  from biotite.structure.segments import (
26
25
  apply_segment_wise,
27
26
  get_segment_masks,
28
27
  get_segment_positions,
28
+ get_segment_starts,
29
29
  get_segment_starts_for,
30
30
  segment_iter,
31
31
  spread_segment_wise,
32
32
  )
33
33
 
34
34
 
35
- def get_residue_starts(array, add_exclusive_stop=False):
35
+ def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
36
36
  """
37
37
  Get indices for an atom array, each indicating the beginning of
38
38
  a residue.
39
39
 
40
- A new residue starts, either when the chain ID, residue ID,
40
+ A new residue starts, either when the chain ID, sym ID, residue ID,
41
41
  insertion code or residue name changes from one to the next atom.
42
42
 
43
43
  Parameters
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
48
48
  If true, the exclusive stop of the input atom array, i.e.
49
49
  ``array.array_length()``, is added to the returned array of
50
50
  start indices as last element.
51
+ extra_categories : tuple of str, optional
52
+ Additional annotation categories that induce the start of a new residue,
53
+ when their value change from one atom to the next.
51
54
 
52
55
  Returns
53
56
  -------
@@ -69,30 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
69
72
  [ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
70
73
  278 292 304]
71
74
  """
72
- if array.array_length() == 0:
73
- return np.array([], dtype=int)
74
-
75
- # These mask are 'true' at indices where the value changes
76
- chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
77
- res_id_changes = array.res_id[1:] != array.res_id[:-1]
78
- ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
79
- res_name_changes = array.res_name[1:] != array.res_name[:-1]
80
-
81
- # If any of these annotation arrays change, a new residue starts
82
- residue_change_mask = (
83
- chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
84
- )
85
-
86
- # Convert mask to indices
87
- # Add 1, to shift the indices from the end of a residue
88
- # to the start of a new residue
89
- residue_starts = np.where(residue_change_mask)[0] + 1
90
-
91
- # The first residue is not included yet -> Insert '[0]'
92
- if add_exclusive_stop:
93
- return np.concatenate(([0], residue_starts, [array.array_length()]))
94
- else:
95
- return np.concatenate(([0], residue_starts))
75
+ categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
76
+ if "sym_id" in array.get_annotation_categories():
77
+ categories.append("sym_id")
78
+ return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
96
79
 
97
80
 
98
81
  def apply_residue_wise(array, data, function, axis=None):
@@ -149,7 +149,7 @@ def find_stacking_interactions(
149
149
 
150
150
  The conditions for pi-stacking are :footcite:`Wojcikowski2015` :
151
151
 
152
- - The ring centroids must be within cutoff distance (default: 6.5 Å).
152
+ - The ring centroids must be within cutoff `centroid_cutoff` distance.
153
153
  While :footcite:`Wojcikowski2015` uses a cutoff of 5.0 Å, 6.5 Å was
154
154
  adopted from :footcite:`Bouysset2021` to better identify perpendicular
155
155
  stacking interactions.