biotite 1.2.0__cp311-cp311-macosx_11_0_arm64.whl → 1.3.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/viennarna/rnaplot.py +7 -7
- biotite/interface/openmm/__init__.py +4 -0
- biotite/interface/pymol/__init__.py +3 -0
- biotite/interface/rdkit/__init__.py +4 -0
- biotite/interface/version.py +23 -0
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +1 -1
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -2
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +2 -4
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/structure/basepairs.py +13 -14
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/box.py +140 -2
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.pyx +0 -1
- biotite/structure/chains.py +15 -21
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/dotbracket.py +4 -4
- biotite/structure/graphics/rna.py +19 -16
- biotite/structure/hbond.py +1 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/io/pdb/convert.py +84 -2
- biotite/structure/io/pdb/file.py +79 -2
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/compress.py +69 -32
- biotite/structure/io/pdbx/convert.py +207 -44
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -23
- biotite/structure/pseudoknots.py +6 -6
- biotite/structure/residues.py +10 -27
- biotite/structure/rings.py +1 -1
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sasa.pyx +28 -29
- biotite/structure/segments.py +55 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/superimpose.py +1 -191
- biotite/structure/transform.py +220 -1
- biotite/version.py +2 -2
- {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/METADATA +4 -34
- {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/RECORD +56 -54
- {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +3 -1
- {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -13,17 +13,30 @@ __all__ = [
|
|
|
13
13
|
"set_component",
|
|
14
14
|
"list_assemblies",
|
|
15
15
|
"get_assembly",
|
|
16
|
+
"get_unit_cell",
|
|
16
17
|
"get_sse",
|
|
17
18
|
]
|
|
18
19
|
|
|
19
20
|
import itertools
|
|
20
21
|
import warnings
|
|
22
|
+
from collections import defaultdict
|
|
21
23
|
import numpy as np
|
|
22
24
|
from biotite.file import InvalidFileError
|
|
23
25
|
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
24
|
-
from biotite.structure.atoms import
|
|
26
|
+
from biotite.structure.atoms import (
|
|
27
|
+
AtomArray,
|
|
28
|
+
AtomArrayStack,
|
|
29
|
+
concatenate,
|
|
30
|
+
repeat,
|
|
31
|
+
)
|
|
25
32
|
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
|
|
26
|
-
from biotite.structure.box import
|
|
33
|
+
from biotite.structure.box import (
|
|
34
|
+
coord_to_fraction,
|
|
35
|
+
fraction_to_coord,
|
|
36
|
+
space_group_transforms,
|
|
37
|
+
unitcell_from_vectors,
|
|
38
|
+
vectors_from_unitcell,
|
|
39
|
+
)
|
|
27
40
|
from biotite.structure.error import BadStructureError
|
|
28
41
|
from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
|
|
29
42
|
from biotite.structure.filter import (
|
|
@@ -33,6 +46,7 @@ from biotite.structure.filter import (
|
|
|
33
46
|
filter_first_altloc,
|
|
34
47
|
filter_highest_occupancy_altloc,
|
|
35
48
|
)
|
|
49
|
+
from biotite.structure.geometry import centroid
|
|
36
50
|
from biotite.structure.io.pdbx.bcif import (
|
|
37
51
|
BinaryCIFBlock,
|
|
38
52
|
BinaryCIFColumn,
|
|
@@ -46,7 +60,7 @@ from biotite.structure.residues import (
|
|
|
46
60
|
get_residue_positions,
|
|
47
61
|
get_residue_starts_for,
|
|
48
62
|
)
|
|
49
|
-
from biotite.structure.
|
|
63
|
+
from biotite.structure.transform import AffineTransformation
|
|
50
64
|
|
|
51
65
|
# Bond types in `struct_conn` category that refer to covalent bonds
|
|
52
66
|
PDBX_BOND_TYPE_ID_TO_TYPE = {
|
|
@@ -125,8 +139,7 @@ _other_type_list = [
|
|
|
125
139
|
|
|
126
140
|
def _filter(category, index):
|
|
127
141
|
"""
|
|
128
|
-
Reduce the
|
|
129
|
-
model.
|
|
142
|
+
Reduce the given category to the values selected by the given index,
|
|
130
143
|
"""
|
|
131
144
|
Category = type(category)
|
|
132
145
|
Column = Category.subcomponent_class()
|
|
@@ -391,7 +404,16 @@ def get_structure(
|
|
|
391
404
|
|
|
392
405
|
# The below part is the same for both, AtomArray and AtomArrayStack
|
|
393
406
|
_fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
|
|
407
|
+
|
|
408
|
+
atoms, altloc_filtered_atom_site = _filter_altloc(atoms, model_atom_site, altloc)
|
|
409
|
+
|
|
394
410
|
if include_bonds:
|
|
411
|
+
if altloc == "all":
|
|
412
|
+
raise ValueError(
|
|
413
|
+
"Bond computation is not supported with `altloc='all', consider using "
|
|
414
|
+
"'connect_via_residue_names()' afterwards"
|
|
415
|
+
)
|
|
416
|
+
|
|
395
417
|
if "chem_comp_bond" in block:
|
|
396
418
|
try:
|
|
397
419
|
custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
|
|
@@ -407,10 +429,13 @@ def get_structure(
|
|
|
407
429
|
bonds = connect_via_residue_names(atoms)
|
|
408
430
|
if "struct_conn" in block:
|
|
409
431
|
bonds = bonds.merge(
|
|
410
|
-
_parse_inter_residue_bonds(
|
|
432
|
+
_parse_inter_residue_bonds(
|
|
433
|
+
altloc_filtered_atom_site,
|
|
434
|
+
block["struct_conn"],
|
|
435
|
+
atom_count=atoms.array_length(),
|
|
436
|
+
)
|
|
411
437
|
)
|
|
412
438
|
atoms.bonds = bonds
|
|
413
|
-
atoms = _filter_altloc(atoms, model_atom_site, altloc)
|
|
414
439
|
|
|
415
440
|
return atoms
|
|
416
441
|
|
|
@@ -570,11 +595,12 @@ def _parse_intra_residue_bonds(chem_comp_bond):
|
|
|
570
595
|
return custom_bond_dict
|
|
571
596
|
|
|
572
597
|
|
|
573
|
-
def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
598
|
+
def _parse_inter_residue_bonds(atom_site, struct_conn, atom_count=None):
|
|
574
599
|
"""
|
|
575
600
|
Create inter-residue bonds by parsing the ``struct_conn`` category.
|
|
576
601
|
The atom indices of each bond are found by matching the bond labels
|
|
577
602
|
to the ``atom_site`` category.
|
|
603
|
+
If atom_count is None, it will be inferred from the ``atom_site`` category.
|
|
578
604
|
"""
|
|
579
605
|
# Identity symmetry operation
|
|
580
606
|
IDENTITY = "1_555"
|
|
@@ -643,7 +669,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
643
669
|
bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
|
|
644
670
|
|
|
645
671
|
return BondList(
|
|
646
|
-
atom_site.row_count,
|
|
672
|
+
atom_count if atom_count is not None else atom_site.row_count,
|
|
647
673
|
np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
|
|
648
674
|
)
|
|
649
675
|
|
|
@@ -739,25 +765,28 @@ def _get_struct_conn_col_name(col_name, partner):
|
|
|
739
765
|
|
|
740
766
|
|
|
741
767
|
def _filter_altloc(array, atom_site, altloc):
|
|
768
|
+
"""
|
|
769
|
+
Filter the given :class:`AtomArray` and ``atom_site`` category to the rows
|
|
770
|
+
specified by the given *altloc* identifier.
|
|
771
|
+
"""
|
|
742
772
|
altloc_ids = atom_site.get("label_alt_id")
|
|
743
773
|
occupancy = atom_site.get("occupancy")
|
|
744
774
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
return array
|
|
775
|
+
if altloc == "all":
|
|
776
|
+
array.set_annotation("altloc_id", altloc_ids.as_array(str))
|
|
777
|
+
return array, atom_site
|
|
778
|
+
elif altloc_ids is None or (altloc_ids.mask.array != MaskValue.PRESENT).all():
|
|
779
|
+
# No altlocs in atom_site category
|
|
780
|
+
return array, atom_site
|
|
748
781
|
elif altloc == "occupancy" and occupancy is not None:
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
),
|
|
754
|
-
]
|
|
782
|
+
mask = filter_highest_occupancy_altloc(
|
|
783
|
+
array, altloc_ids.as_array(str), occupancy.as_array(float)
|
|
784
|
+
)
|
|
785
|
+
return array[..., mask], _filter(atom_site, mask)
|
|
755
786
|
# 'first' is also fallback if file has no occupancy information
|
|
756
787
|
elif altloc == "first":
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
array.set_annotation("altloc_id", altloc_ids.as_array(str))
|
|
760
|
-
return array
|
|
788
|
+
mask = filter_first_altloc(array, altloc_ids.as_array(str))
|
|
789
|
+
return array[..., mask], _filter(atom_site, mask)
|
|
761
790
|
else:
|
|
762
791
|
raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
|
|
763
792
|
|
|
@@ -1686,7 +1715,7 @@ def get_assembly(
|
|
|
1686
1715
|
)
|
|
1687
1716
|
|
|
1688
1717
|
### Get transformations and apply them to the affected asym IDs
|
|
1689
|
-
|
|
1718
|
+
chain_ops = defaultdict(list)
|
|
1690
1719
|
for id, op_expr, asym_id_expr in zip(
|
|
1691
1720
|
assembly_gen_category["assembly_id"].as_array(str),
|
|
1692
1721
|
assembly_gen_category["oper_expression"].as_array(str),
|
|
@@ -1695,19 +1724,22 @@ def get_assembly(
|
|
|
1695
1724
|
# Find the operation expressions for given assembly ID
|
|
1696
1725
|
# We already asserted that the ID is actually present
|
|
1697
1726
|
if id == assembly_id:
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1727
|
+
for chain_id in asym_id_expr.split(","):
|
|
1728
|
+
chain_ops[chain_id].extend(_parse_operation_expression(op_expr))
|
|
1729
|
+
|
|
1730
|
+
sub_assemblies = []
|
|
1731
|
+
for asym_id, op_list in chain_ops.items():
|
|
1732
|
+
sub_struct = structure[..., structure.label_asym_id == asym_id]
|
|
1733
|
+
sub_assembly = _apply_transformations(sub_struct, transformations, op_list)
|
|
1734
|
+
# Merge the chain's sub_assembly into the rest of the assembly
|
|
1735
|
+
sub_assemblies.append(sub_assembly)
|
|
1736
|
+
assembly = concatenate(sub_assemblies)
|
|
1737
|
+
|
|
1738
|
+
# Sort AtomArray or AtomArrayStack by 'sym_id'
|
|
1739
|
+
max_sym_id = assembly.sym_id.max()
|
|
1740
|
+
assembly = concatenate(
|
|
1741
|
+
[assembly[..., assembly.sym_id == sym_id] for sym_id in range(max_sym_id + 1)]
|
|
1742
|
+
)
|
|
1711
1743
|
|
|
1712
1744
|
# Remove 'label_asym_id', if it was not included in the original
|
|
1713
1745
|
# user-supplied 'extra_fields'
|
|
@@ -1730,11 +1762,7 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1730
1762
|
# Execute for each transformation step
|
|
1731
1763
|
# in the operation expression
|
|
1732
1764
|
for op_step in operation:
|
|
1733
|
-
|
|
1734
|
-
# Rotate
|
|
1735
|
-
coord = matrix_rotate(coord, rotation_matrix)
|
|
1736
|
-
# Translate
|
|
1737
|
-
coord += translation_vector
|
|
1765
|
+
coord = transformation_dict[op_step].apply(coord)
|
|
1738
1766
|
assembly_coord[i] = coord
|
|
1739
1767
|
|
|
1740
1768
|
assembly = repeat(structure, assembly_coord)
|
|
@@ -1746,8 +1774,7 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1746
1774
|
|
|
1747
1775
|
def _get_transformations(struct_oper):
|
|
1748
1776
|
"""
|
|
1749
|
-
Get transformation
|
|
1750
|
-
translation for each operation ID in ``pdbx_struct_oper_list``.
|
|
1777
|
+
Get affine transformation for each operation ID in ``pdbx_struct_oper_list``.
|
|
1751
1778
|
"""
|
|
1752
1779
|
transformation_dict = {}
|
|
1753
1780
|
for index, id in enumerate(struct_oper["id"].as_array(str)):
|
|
@@ -1763,7 +1790,9 @@ def _get_transformations(struct_oper):
|
|
|
1763
1790
|
translation_vector = np.array(
|
|
1764
1791
|
[struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
|
|
1765
1792
|
)
|
|
1766
|
-
transformation_dict[id] = (
|
|
1793
|
+
transformation_dict[id] = AffineTransformation(
|
|
1794
|
+
np.zeros(3), rotation_matrix, translation_vector
|
|
1795
|
+
)
|
|
1767
1796
|
return transformation_dict
|
|
1768
1797
|
|
|
1769
1798
|
|
|
@@ -1820,6 +1849,140 @@ def _convert_string_to_sequence(string, stype):
|
|
|
1820
1849
|
raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
|
|
1821
1850
|
|
|
1822
1851
|
|
|
1852
|
+
def get_unit_cell(
|
|
1853
|
+
pdbx_file,
|
|
1854
|
+
center=True,
|
|
1855
|
+
model=None,
|
|
1856
|
+
data_block=None,
|
|
1857
|
+
altloc="first",
|
|
1858
|
+
extra_fields=None,
|
|
1859
|
+
use_author_fields=True,
|
|
1860
|
+
include_bonds=False,
|
|
1861
|
+
):
|
|
1862
|
+
"""
|
|
1863
|
+
Build a structure model containing all symmetric copies of the structure within a
|
|
1864
|
+
single unit cell.
|
|
1865
|
+
|
|
1866
|
+
This function receives the data from the ``symmetry`` and ``atom_site`` categories
|
|
1867
|
+
in the file.
|
|
1868
|
+
Consequently, these categories must be present in the file.
|
|
1869
|
+
|
|
1870
|
+
Parameters
|
|
1871
|
+
----------
|
|
1872
|
+
pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
|
|
1873
|
+
The file object.
|
|
1874
|
+
center : bool, optional
|
|
1875
|
+
If set to true, each symmetric copy will be moved inside the unit cell
|
|
1876
|
+
dimensions, if its centroid is outside.
|
|
1877
|
+
By default, the copies are are created using the raw space group
|
|
1878
|
+
transformations, which may put them one unit cell length further away.
|
|
1879
|
+
model : int, optional
|
|
1880
|
+
If this parameter is given, the function will return an
|
|
1881
|
+
:class:`AtomArray` from the atoms corresponding to the given
|
|
1882
|
+
model number (starting at 1).
|
|
1883
|
+
Negative values are used to index models starting from the last
|
|
1884
|
+
model insted of the first model.
|
|
1885
|
+
If this parameter is omitted, an :class:`AtomArrayStack`
|
|
1886
|
+
containing all models will be returned, even if the structure
|
|
1887
|
+
contains only one model.
|
|
1888
|
+
data_block : str, optional
|
|
1889
|
+
The name of the data block.
|
|
1890
|
+
Default is the first (and most times only) data block of the
|
|
1891
|
+
file.
|
|
1892
|
+
If the data block object is passed directly to `pdbx_file`,
|
|
1893
|
+
this parameter is ignored.
|
|
1894
|
+
altloc : {'first', 'occupancy', 'all'}
|
|
1895
|
+
This parameter defines how *altloc* IDs are handled:
|
|
1896
|
+
- ``'first'`` - Use atoms that have the first *altloc* ID
|
|
1897
|
+
appearing in a residue.
|
|
1898
|
+
- ``'occupancy'`` - Use atoms that have the *altloc* ID
|
|
1899
|
+
with the highest occupancy for a residue.
|
|
1900
|
+
- ``'all'`` - Use all atoms.
|
|
1901
|
+
Note that this leads to duplicate atoms.
|
|
1902
|
+
When this option is chosen, the ``altloc_id`` annotation
|
|
1903
|
+
array is added to the returned structure.
|
|
1904
|
+
extra_fields : list of str, optional
|
|
1905
|
+
The strings in the list are entry names, that are
|
|
1906
|
+
additionally added as annotation arrays.
|
|
1907
|
+
The annotation category name will be the same as the PDBx
|
|
1908
|
+
subcategory name.
|
|
1909
|
+
The array type is always `str`.
|
|
1910
|
+
An exception are the special field identifiers:
|
|
1911
|
+
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
|
|
1912
|
+
These will convert the fitting subcategory into an
|
|
1913
|
+
annotation array with reasonable type.
|
|
1914
|
+
use_author_fields : bool, optional
|
|
1915
|
+
Some fields can be read from two alternative sources,
|
|
1916
|
+
for example both, ``label_seq_id`` and ``auth_seq_id`` describe
|
|
1917
|
+
the ID of the residue.
|
|
1918
|
+
While, the ``label_xxx`` fields can be used as official pointers
|
|
1919
|
+
to other categories in the file, the ``auth_xxx``
|
|
1920
|
+
fields are set by the author(s) of the structure and are
|
|
1921
|
+
consistent with the corresponding values in PDB files.
|
|
1922
|
+
If `use_author_fields` is true, the annotation arrays will be
|
|
1923
|
+
read from the ``auth_xxx`` fields (if applicable),
|
|
1924
|
+
otherwise from the the ``label_xxx`` fields.
|
|
1925
|
+
include_bonds : bool, optional
|
|
1926
|
+
If set to true, a :class:`BondList` will be created for the
|
|
1927
|
+
resulting :class:`AtomArray` containing the bond information
|
|
1928
|
+
from the file.
|
|
1929
|
+
Bonds, whose order could not be determined from the
|
|
1930
|
+
*Chemical Component Dictionary*
|
|
1931
|
+
(e.g. especially inter-residue bonds),
|
|
1932
|
+
have :attr:`BondType.ANY`, since the PDB format itself does
|
|
1933
|
+
not support bond orders.
|
|
1934
|
+
|
|
1935
|
+
Returns
|
|
1936
|
+
-------
|
|
1937
|
+
unit_cell : AtomArray or AtomArrayStack
|
|
1938
|
+
The structure representing the unit cell.
|
|
1939
|
+
The return type depends on the `model` parameter.
|
|
1940
|
+
Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
|
|
1941
|
+
unit in the unit cell.
|
|
1942
|
+
|
|
1943
|
+
Examples
|
|
1944
|
+
--------
|
|
1945
|
+
|
|
1946
|
+
>>> import os.path
|
|
1947
|
+
>>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
|
|
1948
|
+
>>> unit_cell = get_unit_cell(file, model=1)
|
|
1949
|
+
"""
|
|
1950
|
+
block = _get_block(pdbx_file, data_block)
|
|
1951
|
+
|
|
1952
|
+
try:
|
|
1953
|
+
space_group = block["symmetry"]["space_group_name_H-M"].as_item()
|
|
1954
|
+
except KeyError:
|
|
1955
|
+
raise InvalidFileError("File has no 'symmetry.space_group_name_H-M' field")
|
|
1956
|
+
transforms = space_group_transforms(space_group)
|
|
1957
|
+
|
|
1958
|
+
asym = get_structure(
|
|
1959
|
+
pdbx_file,
|
|
1960
|
+
model,
|
|
1961
|
+
data_block,
|
|
1962
|
+
altloc,
|
|
1963
|
+
extra_fields,
|
|
1964
|
+
use_author_fields,
|
|
1965
|
+
include_bonds,
|
|
1966
|
+
)
|
|
1967
|
+
|
|
1968
|
+
fractional_asym_coord = coord_to_fraction(asym.coord, asym.box)
|
|
1969
|
+
unit_cell_copies = []
|
|
1970
|
+
for transform in transforms:
|
|
1971
|
+
fractional_coord = transform.apply(fractional_asym_coord)
|
|
1972
|
+
if center:
|
|
1973
|
+
# If the centroid is outside the box, move the copy inside the box
|
|
1974
|
+
orig_centroid = centroid(fractional_coord)
|
|
1975
|
+
new_centroid = orig_centroid % 1
|
|
1976
|
+
fractional_coord += (new_centroid - orig_centroid)[..., np.newaxis, :]
|
|
1977
|
+
unit_cell_copies.append(fraction_to_coord(fractional_coord, asym.box))
|
|
1978
|
+
|
|
1979
|
+
unit_cell = repeat(asym, np.stack(unit_cell_copies, axis=0))
|
|
1980
|
+
unit_cell.set_annotation(
|
|
1981
|
+
"sym_id", np.repeat(np.arange(len(transforms)), asym.array_length())
|
|
1982
|
+
)
|
|
1983
|
+
return unit_cell
|
|
1984
|
+
|
|
1985
|
+
|
|
1823
1986
|
def get_sse(pdbx_file, data_block=None, match_model=None):
|
|
1824
1987
|
"""
|
|
1825
1988
|
Get the secondary structure from a PDBx file.
|
|
Binary file
|
|
@@ -230,6 +230,12 @@ class Encoding(_Component, metaclass=ABCMeta):
|
|
|
230
230
|
# since the file content may be invalid/malicious.
|
|
231
231
|
raise NotImplementedError()
|
|
232
232
|
|
|
233
|
+
def __str__(self):
|
|
234
|
+
# Restore original behavior, as `__str__()` implementation of `_Component`
|
|
235
|
+
# may require serialization, which is not possible for some encodings prior
|
|
236
|
+
# to the first encoding pass
|
|
237
|
+
return object.__str__(self)
|
|
238
|
+
|
|
233
239
|
|
|
234
240
|
@dataclass
|
|
235
241
|
class ByteArrayEncoding(Encoding):
|
|
@@ -325,7 +331,8 @@ class FixedPointEncoding(Encoding):
|
|
|
325
331
|
)
|
|
326
332
|
|
|
327
333
|
# Round to avoid wrong values due to floating point inaccuracies
|
|
328
|
-
|
|
334
|
+
scaled_data = np.round(data * self.factor)
|
|
335
|
+
return _safe_cast(scaled_data, np.int32, allow_decimal_loss=True)
|
|
329
336
|
|
|
330
337
|
def decode(self, data):
|
|
331
338
|
return (data / self.factor).astype(
|
|
@@ -392,7 +399,7 @@ class IntervalQuantizationEncoding(Encoding):
|
|
|
392
399
|
self.min, self.max, self.num_steps, dtype=data.dtype
|
|
393
400
|
)
|
|
394
401
|
indices = np.searchsorted(steps, data, side="left")
|
|
395
|
-
return indices
|
|
402
|
+
return _safe_cast(indices, np.int32)
|
|
396
403
|
|
|
397
404
|
def decode(self, data):
|
|
398
405
|
output = data * (self.max - self.min) / (self.num_steps - 1)
|
|
@@ -570,8 +577,14 @@ class DeltaEncoding(Encoding):
|
|
|
570
577
|
if self.origin is None:
|
|
571
578
|
self.origin = data[0]
|
|
572
579
|
|
|
580
|
+
# Differences (including `np.diff`) return an array with the same dtype as the
|
|
581
|
+
# input array
|
|
582
|
+
# As the input dtype may be unsigned, the output dtype could underflow,
|
|
583
|
+
# if the difference is negative
|
|
584
|
+
# -> cast to int64 to avoid this
|
|
585
|
+
data = data.astype(np.int64, copy=False)
|
|
573
586
|
data = data - self.origin
|
|
574
|
-
return np.diff(data, prepend=0)
|
|
587
|
+
return _safe_cast(np.diff(data, prepend=0), np.int32)
|
|
575
588
|
|
|
576
589
|
def decode(self, data):
|
|
577
590
|
output = np.cumsum(data, dtype=self.src_type.to_dtype())
|
|
@@ -635,7 +648,7 @@ class IntegerPackingEncoding(Encoding):
|
|
|
635
648
|
# Only positive values -> use unsigned integers
|
|
636
649
|
self.is_unsigned = data.min().item() >= 0
|
|
637
650
|
|
|
638
|
-
data = data
|
|
651
|
+
data = _safe_cast(data, np.int32)
|
|
639
652
|
return self._encode(
|
|
640
653
|
data, np.empty(0, dtype=self._determine_packed_dtype())
|
|
641
654
|
)
|
|
@@ -870,7 +883,7 @@ class StringArrayEncoding(Encoding):
|
|
|
870
883
|
else:
|
|
871
884
|
check_present = True
|
|
872
885
|
|
|
873
|
-
string_order = np.argsort(self.strings)
|
|
886
|
+
string_order = _safe_cast(np.argsort(self.strings), np.int32)
|
|
874
887
|
sorted_strings = self.strings[string_order]
|
|
875
888
|
sorted_indices = np.searchsorted(sorted_strings, data)
|
|
876
889
|
indices = string_order[sorted_indices]
|
|
@@ -1010,22 +1023,25 @@ def _snake_to_camel_case(attribute_name):
|
|
|
1010
1023
|
return attribute_name[0].lower() + attribute_name[1:]
|
|
1011
1024
|
|
|
1012
1025
|
|
|
1013
|
-
def _safe_cast(array, dtype):
|
|
1014
|
-
|
|
1015
|
-
|
|
1026
|
+
def _safe_cast(array, dtype, allow_decimal_loss=False):
|
|
1027
|
+
source_dtype = array.dtype
|
|
1028
|
+
target_dtype = np.dtype(dtype)
|
|
1029
|
+
|
|
1030
|
+
if target_dtype == source_dtype:
|
|
1016
1031
|
return array
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
if
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
+
|
|
1033
|
+
if np.issubdtype(target_dtype, np.integer):
|
|
1034
|
+
if np.issubdtype(source_dtype, np.floating):
|
|
1035
|
+
if not allow_decimal_loss:
|
|
1036
|
+
raise ValueError("Cannot cast floating point to integer")
|
|
1037
|
+
if not np.isfinite(array).all():
|
|
1038
|
+
raise ValueError("Data contains non-finite values")
|
|
1039
|
+
elif not np.issubdtype(source_dtype, np.integer):
|
|
1040
|
+
# Neither float, nor integer -> cannot cast
|
|
1041
|
+
raise ValueError(f"Cannot cast '{source_dtype}' to integer")
|
|
1042
|
+
dtype_info = np.iinfo(target_dtype)
|
|
1043
|
+
# Check if an integer underflow/overflow would occur during conversion
|
|
1044
|
+
if np.max(array) > dtype_info.max or np.min(array) < dtype_info.min:
|
|
1045
|
+
raise ValueError("Values do not fit into the given dtype")
|
|
1046
|
+
|
|
1047
|
+
return array.astype(target_dtype)
|
biotite/structure/pseudoknots.py
CHANGED
|
@@ -148,7 +148,7 @@ class _Region:
|
|
|
148
148
|
region_pairs : ndarray, dtype=int
|
|
149
149
|
The indices of the base pairs in ``base_pairs`` that are part of
|
|
150
150
|
the region.
|
|
151
|
-
scores : ndarray, dtype=int, shape=(n,)
|
|
151
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
152
152
|
The score for each base pair.
|
|
153
153
|
"""
|
|
154
154
|
|
|
@@ -202,7 +202,7 @@ def _find_regions(base_pairs, scores):
|
|
|
202
202
|
base_pairs : ndarray, dtype=int, shape=(n, 2)
|
|
203
203
|
Each row is equivalent to one base pair and contains the first
|
|
204
204
|
indices of the residues corresponding to each base.
|
|
205
|
-
scores : ndarray, dtype=int, shape=(n,)
|
|
205
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
206
206
|
The score for each base pair.
|
|
207
207
|
|
|
208
208
|
Returns
|
|
@@ -352,7 +352,7 @@ def _get_first_occurrence_for(iterable, wanted_object):
|
|
|
352
352
|
return i
|
|
353
353
|
|
|
354
354
|
|
|
355
|
-
def _get_region_array_for(regions, content=
|
|
355
|
+
def _get_region_array_for(regions, content=(), dtype=()):
|
|
356
356
|
"""
|
|
357
357
|
Get a :class:`ndarray` of region objects. Each object occurs twice,
|
|
358
358
|
representing its start and end point. The regions positions in the
|
|
@@ -365,12 +365,12 @@ def _get_region_array_for(regions, content=[], dtype=[]):
|
|
|
365
365
|
----------
|
|
366
366
|
regions : set {_region, ...}
|
|
367
367
|
The regions to be considered
|
|
368
|
-
content : list [function, ...]
|
|
368
|
+
content : list [function, ...]
|
|
369
369
|
The functions to be considered for custom outputs. For a given
|
|
370
370
|
region they must return a tuple of which the first value is
|
|
371
371
|
placed at the start position and the second value at the end
|
|
372
372
|
position of the region relative to the other regions.
|
|
373
|
-
dtype : list [str, ...]
|
|
373
|
+
dtype : list [str, ...]
|
|
374
374
|
The data type of the output of the custom functions.
|
|
375
375
|
|
|
376
376
|
Returns
|
|
@@ -554,7 +554,7 @@ def _get_results(regions, results, max_pseudoknot_order, order=0):
|
|
|
554
554
|
The maximum pseudoknot order to be found. If a base pair would
|
|
555
555
|
be of a higher order, its order is specified as -1. If ``None``
|
|
556
556
|
is given, all base pairs are evaluated.
|
|
557
|
-
order : int
|
|
557
|
+
order : int
|
|
558
558
|
The order that is currently evaluated.
|
|
559
559
|
|
|
560
560
|
Returns
|
biotite/structure/residues.py
CHANGED
|
@@ -21,23 +21,23 @@ __all__ = [
|
|
|
21
21
|
"residue_iter",
|
|
22
22
|
]
|
|
23
23
|
|
|
24
|
-
import numpy as np
|
|
25
24
|
from biotite.structure.segments import (
|
|
26
25
|
apply_segment_wise,
|
|
27
26
|
get_segment_masks,
|
|
28
27
|
get_segment_positions,
|
|
28
|
+
get_segment_starts,
|
|
29
29
|
get_segment_starts_for,
|
|
30
30
|
segment_iter,
|
|
31
31
|
spread_segment_wise,
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def get_residue_starts(array, add_exclusive_stop=False):
|
|
35
|
+
def get_residue_starts(array, add_exclusive_stop=False, extra_categories=()):
|
|
36
36
|
"""
|
|
37
37
|
Get indices for an atom array, each indicating the beginning of
|
|
38
38
|
a residue.
|
|
39
39
|
|
|
40
|
-
A new residue starts, either when the chain ID, residue ID,
|
|
40
|
+
A new residue starts, either when the chain ID, sym ID, residue ID,
|
|
41
41
|
insertion code or residue name changes from one to the next atom.
|
|
42
42
|
|
|
43
43
|
Parameters
|
|
@@ -48,6 +48,9 @@ def get_residue_starts(array, add_exclusive_stop=False):
|
|
|
48
48
|
If true, the exclusive stop of the input atom array, i.e.
|
|
49
49
|
``array.array_length()``, is added to the returned array of
|
|
50
50
|
start indices as last element.
|
|
51
|
+
extra_categories : tuple of str, optional
|
|
52
|
+
Additional annotation categories that induce the start of a new residue,
|
|
53
|
+
when their value change from one atom to the next.
|
|
51
54
|
|
|
52
55
|
Returns
|
|
53
56
|
-------
|
|
@@ -69,30 +72,10 @@ def get_residue_starts(array, add_exclusive_stop=False):
|
|
|
69
72
|
[ 0 16 35 56 75 92 116 135 157 169 176 183 197 208 219 226 250 264
|
|
70
73
|
278 292 304]
|
|
71
74
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
chain_id_changes = array.chain_id[1:] != array.chain_id[:-1]
|
|
77
|
-
res_id_changes = array.res_id[1:] != array.res_id[:-1]
|
|
78
|
-
ins_code_changes = array.ins_code[1:] != array.ins_code[:-1]
|
|
79
|
-
res_name_changes = array.res_name[1:] != array.res_name[:-1]
|
|
80
|
-
|
|
81
|
-
# If any of these annotation arrays change, a new residue starts
|
|
82
|
-
residue_change_mask = (
|
|
83
|
-
chain_id_changes | res_id_changes | ins_code_changes | res_name_changes
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
# Convert mask to indices
|
|
87
|
-
# Add 1, to shift the indices from the end of a residue
|
|
88
|
-
# to the start of a new residue
|
|
89
|
-
residue_starts = np.where(residue_change_mask)[0] + 1
|
|
90
|
-
|
|
91
|
-
# The first residue is not included yet -> Insert '[0]'
|
|
92
|
-
if add_exclusive_stop:
|
|
93
|
-
return np.concatenate(([0], residue_starts, [array.array_length()]))
|
|
94
|
-
else:
|
|
95
|
-
return np.concatenate(([0], residue_starts))
|
|
75
|
+
categories = ["chain_id", "res_id", "ins_code", "res_name"] + list(extra_categories)
|
|
76
|
+
if "sym_id" in array.get_annotation_categories():
|
|
77
|
+
categories.append("sym_id")
|
|
78
|
+
return get_segment_starts(array, add_exclusive_stop, equal_categories=categories)
|
|
96
79
|
|
|
97
80
|
|
|
98
81
|
def apply_residue_wise(array, data, function, axis=None):
|
biotite/structure/rings.py
CHANGED
|
@@ -149,7 +149,7 @@ def find_stacking_interactions(
|
|
|
149
149
|
|
|
150
150
|
The conditions for pi-stacking are :footcite:`Wojcikowski2015` :
|
|
151
151
|
|
|
152
|
-
- The ring centroids must be within cutoff distance
|
|
152
|
+
- The ring centroids must be within cutoff `centroid_cutoff` distance.
|
|
153
153
|
While :footcite:`Wojcikowski2015` uses a cutoff of 5.0 Å, 6.5 Å was
|
|
154
154
|
adopted from :footcite:`Bouysset2021` to better identify perpendicular
|
|
155
155
|
stacking interactions.
|
|
Binary file
|