PyPI - biotite - Versions diffs - 1.4.0__cp312-cp312-win_amd64.whl → 1.5.0__cp312-cp312-win_amd64.whl - Mend

biotite 1.4.0__cp312-cp312-win_amd64.whl → 1.5.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (46) hide show

biotite/application/dssp/app.py +63 -6
biotite/database/afdb/download.py +12 -6
biotite/database/rcsb/download.py +1 -0
biotite/database/rcsb/query.py +2 -2
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/structure/atoms.py +1 -1
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/chains.py +34 -0
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/filter.py +2 -1
biotite/structure/geometry.py +164 -2
biotite/structure/info/atoms.py +8 -0
biotite/structure/info/components.bcif +0 -0
biotite/structure/io/pdb/convert.py +1 -0
biotite/structure/io/pdb/file.py +16 -2
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/bcif.py +1 -1
biotite/structure/io/pdbx/cif.py +1 -1
biotite/structure/io/pdbx/compress.py +13 -9
biotite/structure/io/pdbx/convert.py +17 -6
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +39 -8
biotite/structure/residues.py +173 -1
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/segments.py +39 -3
biotite/structure/util.py +14 -22
biotite/version.py +16 -3
{biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/METADATA +1 -1
{biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/RECORD +46 -46
{biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/WHEEL +0 -0
{biotite-1.4.0.dist-info → biotite-1.5.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/application/dssp/app.py CHANGED Viewed

@@ -11,8 +11,13 @@ from tempfile import NamedTemporaryFile
 import numpy as np
 from biotite.application.application import AppState, requires_state
 from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
-from biotite.structure.io.pdbx.cif import CIFFile
+from biotite.structure.error import BadStructureError
+from biotite.structure.filter import filter_amino_acids
+from biotite.structure.io.pdbx.cif import CIFCategory, CIFColumn, CIFFile
+from biotite.structure.io.pdbx.component import MaskValue
 from biotite.structure.io.pdbx.convert import set_structure
+from biotite.structure.repair import create_continuous_res_ids
+from biotite.structure.residues import get_residue_starts
 class DsspApp(LocalApp):
@@ -49,17 +54,19 @@ class DsspApp(LocalApp):
     >>> app.start()
     >>> app.join()
     >>> print(app.get_sse())
-    ['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'C' 'C'
-     'C' 'C']
+    ['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'P' 'P'
+     'P' 'C']
     """
     def __init__(self, atom_array, bin_path="mkdssp"):
         super().__init__(bin_path)
-        # mkdssp requires also the
-        # 'occupancy', 'b_factor' and 'charge' fields
-        # -> Add these annotations to a copy of the input structure
+        if not np.all(filter_amino_acids(atom_array)):
+            raise BadStructureError("The input structure must contain only amino acids")
         self._array = atom_array.copy()
+        # DSSP requires also the
+        # 'occupancy', 'b_factor' and 'charge' fields
+        # -> Add these placeholder values
         categories = self._array.get_annotation_categories()
         if "charge" not in categories:
             self._array.set_annotation(
@@ -73,6 +80,10 @@ class DsspApp(LocalApp):
             self._array.set_annotation(
                 "occupancy", np.ones(self._array.array_length(), dtype=float)
             )
+        # DSSP>=4 complains about the `pdbx_poly_seq_scheme` category,
+        # if `seq_id` does not start at 1
+        self._array.res_id = create_continuous_res_ids(self._array)
         try:
             # The parameters have changed in version 4
             self._new_cli = get_version(bin_path)[0] >= 4
@@ -86,6 +97,9 @@ class DsspApp(LocalApp):
     def run(self):
         in_file = CIFFile()
         set_structure(in_file, self._array)
+        in_file.block["pdbx_poly_seq_scheme"] = _create_pdbx_poly_seq_scheme(
+            self._array, in_file.block["atom_site"]["label_entity_id"].as_array(str)
+        )
         in_file.write(self._in_file)
         self._in_file.flush()
         if self._new_cli:
@@ -157,3 +171,46 @@ class DsspApp(LocalApp):
         app.start()
         app.join()
         return app.get_sse()
+def _create_pdbx_poly_seq_scheme(atom_array, entity_ids):
+    """
+    Create the ``pdbx_poly_seq_scheme`` category, as required by DSSP.
+    Parameters
+    ----------
+    atom_array : AtomArray
+        The atom array to create the category from.
+    entity_ids : ndarray, dtype=str
+        The entity IDs for each atoms.
+    Returns
+    -------
+    pdbx_poly_seq_scheme : CIFCategory
+        The ``pdbx_poly_seq_scheme`` category.
+    """
+    res_start_indices = get_residue_starts(atom_array)
+    chain_id = atom_array.chain_id[res_start_indices]
+    res_name = atom_array.res_name[res_start_indices]
+    res_id = atom_array.res_id[res_start_indices]
+    ins_code = atom_array.ins_code[res_start_indices]
+    hetero = atom_array.hetero[res_start_indices]
+    entity_id = entity_ids[res_start_indices]
+    poly_seq_scheme = CIFCategory()
+    poly_seq_scheme["asym_id"] = chain_id
+    poly_seq_scheme["entity_id"] = entity_id
+    poly_seq_scheme["seq_id"] = res_id
+    poly_seq_scheme["mon_id"] = res_name
+    poly_seq_scheme["ndb_seq_num"] = res_id
+    poly_seq_scheme["pdb_seq_num"] = res_id
+    poly_seq_scheme["auth_seq_num"] = res_id
+    poly_seq_scheme["pdb_mon_id"] = res_name
+    poly_seq_scheme["auth_mon_id"] = res_name
+    poly_seq_scheme["pdb_strand_id"] = chain_id
+    poly_seq_scheme["pdb_ins_code"] = CIFColumn(
+        ins_code, np.where(ins_code == "", MaskValue.MISSING, MaskValue.PRESENT)
+    )
+    poly_seq_scheme["hetero"] = np.where(hetero, "y", "n")
+    return poly_seq_scheme

biotite/database/afdb/download.py CHANGED Viewed

@@ -16,8 +16,11 @@ from biotite.database.error import RequestError
 _METADATA_URL = "https://alphafold.com/api/prediction"
 _BINARY_FORMATS = ["bcif"]
 # Adopted from https://www.uniprot.org/help/accession_numbers
+# adding the optional 'AF-' prefix and '-F1' suffix used by RCSB
 _UNIPROT_PATTERN = (
-    "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
+    r"^(?P<prefix>(AF-)|(AF_AF))?"
+    r"(?P<id>[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})"
+    r"(?P<suffix>-?F1)?$"
 )
@@ -31,8 +34,8 @@ def fetch(ids, format, target_path=None, overwrite=False, verbose=False):
     ----------
     ids : str or iterable object of str
         A single ID or a list of IDs of the file(s) to be downloaded.
-        They can be either UniProt IDs (e.g. ``P12345``) or AlphaFold DB IDs
-        (e.g. ``AF-P12345F1``).
+        They can be either UniProt IDs (e.g. ``P12345``), AlphaFold DB IDs
+        (e.g. ``AF-P12345-F1``) or computational RCSB IDs (e.g. ``AF_AFP12345F1``).
     format : {'pdb', 'pdbx', 'cif', 'mmcif', 'bcif', 'fasta'}
         The format of the files to be downloaded.
     target_path : str, optional
@@ -142,7 +145,10 @@ def _get_file_url(id, format):
         The URL of the file to be downloaded.
     """
     uniprot_id = _extract_id(id)
-    metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
+    try:
+        metadata = requests.get(f"{_METADATA_URL}/{uniprot_id}").json()
+    except requests.exceptions.JSONDecodeError:
+        raise RequestError("Received malformed JSON response")
     if len(metadata) == 0:
         raise RequestError(f"ID {id} is invalid")
     # A list of length 1 is always returned, if the response is valid
@@ -167,10 +173,10 @@ def _extract_id(id):
     uniprot_id : str
         The UniProt ID.
     """
-    match = re.search(_UNIPROT_PATTERN, id)
+    match = re.match(_UNIPROT_PATTERN, id)
     if match is None:
         raise ValueError(f"Cannot extract AFDB identifier from '{id}'")
-    return match.group()
+    return match.group("id")
 def _assert_valid_file(response, id):

biotite/database/rcsb/download.py CHANGED Viewed

@@ -155,6 +155,7 @@ def _assert_valid_file(response_text, pdb_id):
             "<title>PDB Archive over AWS</title>",
             "No fasta files were found.",
             "No valid PDB IDs were submitted.",
+            "The requested URL was incorrect, too long or otherwise malformed.",
         ]
     ):
         raise RequestError("PDB ID {:} is invalid".format(pdb_id))

biotite/database/rcsb/query.py CHANGED Viewed

@@ -74,7 +74,7 @@ class SingleQuery(Query, metaclass=abc.ABCMeta):
     A terminal query node for the RCSB search API.
     Multiple :class:`SingleQuery` objects can be combined to
-    :class:`CompositeQuery`objects using the ``|`` and ``&`` operators.
+    :class:`CompositeQuery` objects using the ``|`` and ``&`` operators.
     This is the abstract base class for all queries that are
     terminal nodes.
@@ -783,7 +783,7 @@ def search(
         The type of the returned identifiers:
         - ``'entry'``: Only the PDB ID is returned (e.g. ``'XXXX'``).
-          These can be used directly a input to :func:`fetch()`.
+          These can be used directly as input to :func:`fetch()`.
         - ``'assembly'``: The PDB ID appended with assembly ID is
           returned (e.g. ``'XXXX-1'``).
         - ``'polymer_entity'``: The PDB ID appended with entity ID of

biotite/sequence/align/banded.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/kmertable.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/localgapped.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/localungapped.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/multiple.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/pairwise.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/permutation.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/selector.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/align/tracetable.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/codec.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/nj.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/tree.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/upgma.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/atoms.py CHANGED Viewed

@@ -1554,7 +1554,7 @@ def coord(item):
         Atom coordinates.
     """
-    if type(item) in (Atom, AtomArray, AtomArrayStack):
+    if isinstance(item, (Atom, _AtomArrayBase)):
         return item.coord
     elif isinstance(item, np.ndarray):
         return item.astype(np.float32, copy=False)

biotite/structure/bonds.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/celllist.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/chains.py CHANGED Viewed

@@ -16,6 +16,7 @@ __all__ = [
     "get_chain_masks",
     "get_chain_starts_for",
     "get_chain_positions",
+    "get_all_chain_positions",
     "chain_iter",
     "get_chains",
     "get_chain_count",
@@ -24,6 +25,7 @@ __all__ = [
 from biotite.structure.segments import (
     apply_segment_wise,
+    get_all_segment_positions,
     get_segment_masks,
     get_segment_positions,
     get_segment_starts,
@@ -212,11 +214,43 @@ def get_chain_positions(array, indices):
     -------
     start_indices : ndarray, dtype=int, shape=(k,)
         The indices that point to the position of the chains.
+    See Also
+    --------
+    get_all_chain_positions :
+        Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
     """
     starts = get_chain_starts(array, add_exclusive_stop=True)
     return get_segment_positions(starts, indices)
+def get_all_chain_positions(array):
+    """
+    For each atom, obtain the position of the chain
+    corresponding to this atom in the input `array`.
+    For example, the position of the first chain in the atom array is
+    ``0``, the the position of the second chain is ``1``, etc.
+    Parameters
+    ----------
+    array : AtomArray or AtomArrayStack
+        The atom array (stack) to determine the chains from.
+    Returns
+    -------
+    chain_indices : ndarray, dtype=int, shape=(k,)
+        The indices that point to the position of the chains.
+    See Also
+    --------
+    get_chain_positions :
+        Similar to this function, but for a given subset of atom indices.
+    """
+    starts = get_chain_starts(array, add_exclusive_stop=True)
+    return get_all_segment_positions(starts, array.array_length())
 def get_chains(array):
     """
     Get the chain IDs of an atom array (stack).

biotite/structure/charges.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/filter.py CHANGED Viewed

@@ -63,7 +63,8 @@ _canonical_aa_list = [
 ]
 _canonical_nucleotide_list = ["A", "DA", "G", "DG", "C", "DC", "U", "DT"]
-_solvent_list = ["HOH", "SOL"]
+# Residue names of solvent molecules non only in CCD, but also from modeling software
+_solvent_list = ["HOH", "DOD", "SOL", "WAT", "H2O", "TIP3", "TIP4", "TIP5"]
 _peptide_backbone_atoms = ["N", "CA", "C"]
 _phosphate_backbone_atoms = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]

biotite/structure/geometry.py CHANGED Viewed

@@ -19,19 +19,79 @@ __all__ = [
     "dihedral",
     "index_dihedral",
     "dihedral_backbone",
+    "dihedral_side_chain",
     "centroid",
 ]
+import functools
 import numpy as np
 from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
 from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
-from biotite.structure.filter import filter_amino_acids
+from biotite.structure.filter import filter_amino_acids, filter_canonical_amino_acids
+from biotite.structure.residues import get_residue_starts
 from biotite.structure.util import (
     coord_for_atom_name_per_residue,
     norm_vector,
     vector_dot,
 )
+# The names of the atoms participating in chi angle
+_CHI_ATOMS = {
+    "ARG": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD"),
+        ("CB", "CG", "CD", "NE"),
+        ("CG", "CD", "NE", "CZ"),
+    ],
+    "LEU": [
+        ("N", "CA", "CB", "CG"),
+        # By convention chi2 is defined using CD1 instead of CD2
+        ("CA", "CB", "CG", "CD1"),
+    ],
+    "VAL": [("N", "CA", "CB", "CG1")],
+    "ILE": [("N", "CA", "CB", "CG1"), ("CA", "CB", "CG1", "CD1")],
+    "MET": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "SD"),
+        ("CB", "CG", "SD", "CE"),
+    ],
+    "LYS": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD"),
+        ("CB", "CG", "CD", "CE"),
+        ("CG", "CD", "CE", "NZ"),
+    ],
+    "PHE": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD1"),
+    ],
+    "TRP": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD1"),
+    ],
+    "TYR": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD1"),
+    ],
+    "ASN": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")],
+    "GLN": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD"),
+        ("CB", "CG", "CD", "OE1"),
+    ],
+    "ASP": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")],
+    "GLU": [
+        ("N", "CA", "CB", "CG"),
+        ("CA", "CB", "CG", "CD"),
+        ("CB", "CG", "CD", "OE1"),
+    ],
+    "CYS": [("N", "CA", "CB", "SG")],
+    "HIS": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "ND1")],
+    "PRO": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "CD")],
+    "SER": [("N", "CA", "CB", "OG")],
+    "THR": [("N", "CA", "CB", "OG1")],
+}
 def displacement(atoms1, atoms2, box=None):
     """
@@ -492,7 +552,7 @@ def dihedral_backbone(atom_array):
     Returns
     -------
-    phi, psi, omega : ndarray
+    phi, psi, omega : ndarray, shape=(m,n) or shape=(n,), dtype=float
         An array containing the 3 backbone dihedral angles for every CA atom.
         `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
         C-terminus.
@@ -562,6 +622,96 @@ def dihedral_backbone(atom_array):
     return phi, psi, omg
+def dihedral_side_chain(atoms):
+    r"""
+    Measure the side chain :math:`\chi` dihedral angles of amino acid residues.
+    Parameters
+    ----------
+    atoms : AtomArray or AtomArrayStack
+        The protein structure to measure the side chain dihedral angles for.
+    Returns
+    -------
+    chi : ndarray, shape=(m, n, 4) or shape=(n, 4), dtype=float
+        An array containing the up to four side chain dihedral angles for every
+        amino acid residue.
+        Trailing :math:`\chi` angles that are not defined for an amino acid are filled
+        with :math:`NaN` values.
+        The same is True for all residues that are not canonical amino acids.
+    Notes
+    -----
+    By convention, the :math:`\chi_2` angle of leucine is defined using ``CD1``
+    instead of ``CD2``.
+    Examples
+    --------
+    >>> res_ids, res_names = get_residues(atom_array)
+    >>> dihedrals = dihedral_side_chain(atom_array)
+    >>> for res_id, res_name, dihedrals in zip(res_ids, res_names, dihedrals):
+    ...     print(f"{res_name.capitalize()}{res_id:<2d}:", dihedrals)
+    Asn1 : [-1.180 -0.066    nan    nan]
+    Leu2 : [0.923 1.866   nan   nan]
+    Tyr3 : [-2.593 -1.487    nan    nan]
+    Ile4 : [-0.781 -0.972    nan    nan]
+    Gln5 : [-2.557  1.410 -1.776    nan]
+    Trp6 : [3.117 1.372   nan   nan]
+    Leu7 : [-1.33  3.08   nan   nan]
+    Lys8 : [ 1.320  1.734  3.076 -2.022]
+    Asp9 : [-1.623  0.909    nan    nan]
+    Gly10: [nan nan nan nan]
+    Gly11: [nan nan nan nan]
+    Pro12: [-0.331  0.539    nan    nan]
+    Ser13: [-1.067    nan    nan    nan]
+    Ser14: [-2.514    nan    nan    nan]
+    Gly15: [nan nan nan nan]
+    Arg16: [ 1.032 -3.063  1.541 -1.568]
+    Pro17: [ 0.522 -0.601    nan    nan]
+    Pro18: [ 0.475 -0.577    nan    nan]
+    Pro19: [ 0.561 -0.602    nan    nan]
+    Ser20: [-1.055    nan    nan    nan]
+    """
+    is_multi_model = isinstance(atoms, AtomArrayStack)
+    chi_atoms = _all_chi_atoms()
+    res_names = atoms.res_name[get_residue_starts(atoms)]
+    chi_atom_coord = coord_for_atom_name_per_residue(
+        atoms, chi_atoms, filter_canonical_amino_acids(atoms)
+    )
+    chi_atoms_to_coord_index = {atom_name: i for i, atom_name in enumerate(chi_atoms)}
+    if is_multi_model:
+        shape = (atoms.stack_depth(), len(res_names), 4)
+    else:
+        shape = (len(res_names), 4)
+    chi_angles = np.full(shape, np.nan, dtype=np.float32)
+    for res_name, chi_atom_names_for_all_angles in _CHI_ATOMS.items():
+        res_mask = res_names == res_name
+        for chi_i, chi_atom_names in enumerate(chi_atom_names_for_all_angles):
+            dihedrals = dihedral(
+                chi_atom_coord[
+                    chi_atoms_to_coord_index[chi_atom_names[0]], ..., res_mask, :
+                ],
+                chi_atom_coord[
+                    chi_atoms_to_coord_index[chi_atom_names[1]], ..., res_mask, :
+                ],
+                chi_atom_coord[
+                    chi_atoms_to_coord_index[chi_atom_names[2]], ..., res_mask, :
+                ],
+                chi_atom_coord[
+                    chi_atoms_to_coord_index[chi_atom_names[3]], ..., res_mask, :
+                ],
+            )
+            if is_multi_model:
+                # Swap dimensions due to NumPy's behavior when using advanced indexing
+                # (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing)
+                dihedrals = dihedrals.T
+            chi_angles[..., res_mask, chi_i] = dihedrals
+    return chi_angles
 def centroid(atoms):
     """
     Measure the centroid of a structure.
@@ -653,3 +803,15 @@ def _displacement_triclinic_box(fractions, box, disp):
     disp[:] = shifted_diffs[
         np.arange(len(shifted_diffs)), np.argmin(sq_distance, axis=1)
     ]
+@functools.cache
+def _all_chi_atoms():
+    """
+    Get the names of the atoms participating in any chi angle.
+    """
+    atom_names = set()
+    for angles in _CHI_ATOMS.values():
+        for angle in angles:
+            atom_names.update(angle)
+    return sorted(atom_names)

biotite/structure/info/atoms.py CHANGED Viewed

@@ -6,6 +6,7 @@ __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
 __all__ = ["residue"]
+import functools
 from biotite.structure.info.ccd import get_ccd
 # fmt: off
@@ -75,6 +76,13 @@ def residue(res_name, allow_missing_coord=False):
      ['CB' 'HB3']
      ['OXT' 'HXT']]
     """
+    # Use a cache internally, but always return a copy,
+    # as the returned AtomArray is mutable
+    return _residue(res_name, allow_missing_coord).copy()
+@functools.lru_cache(maxsize=100)
+def _residue(res_name, allow_missing_coord=False):
     # Avoid circular import
     from biotite.structure.io.pdbx import get_component

biotite/structure/info/components.bcif CHANGED Viewed

Binary file

biotite/structure/io/pdb/convert.py CHANGED Viewed

@@ -16,6 +16,7 @@ __all__ = [
     "list_assemblies",
     "get_assembly",
     "get_unit_cell",
+    "get_symmetry_mates",
 ]
 import warnings

biotite/structure/io/pdb/file.py CHANGED Viewed

@@ -6,12 +6,16 @@ __name__ = "biotite.structure.io.pdb"
 __author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers"
 __all__ = ["PDBFile"]
+import itertools
 import warnings
 from collections import namedtuple
 import numpy as np
 from biotite.file import InvalidFileError, TextFile
 from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
-from biotite.structure.bonds import BondList, connect_via_residue_names
+from biotite.structure.bonds import (
+    BondList,
+    connect_via_residue_names,
+)
 from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
 from biotite.structure.error import BadStructureError
 from biotite.structure.filter import (
@@ -19,6 +23,7 @@ from biotite.structure.filter import (
     filter_highest_occupancy_altloc,
     filter_solvent,
 )
+from biotite.structure.info.bonds import bonds_in_residue
 from biotite.structure.io.pdb.hybrid36 import (
     decode_hybrid36,
     encode_hybrid36,
@@ -544,7 +549,16 @@ class PDBFile(TextFile):
         # Read bonds
         if include_bonds:
             bond_list = self._get_bonds(atom_id)
-            bond_list = bond_list.merge(connect_via_residue_names(array))
+            # Create bond dict containing only non-hetero residues (+ water)
+            custom_bond_dict = {
+                res_name: bonds_in_residue(res_name)
+                for res_name in itertools.chain(
+                    np.unique(array[..., ~array.hetero].res_name), ["HOH"]
+                )
+            }
+            bond_list = bond_list.merge(
+                connect_via_residue_names(array, custom_bond_dict=custom_bond_dict)
+            )
             array.bonds = bond_list
         return array

biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/io/pdbx/bcif.py CHANGED Viewed

@@ -292,7 +292,7 @@ class BinaryCIFColumn(_Component):
         else:
             # Array needs to be converted, but masked values are
             # not necessarily convertible
-            # (e.g. '' cannot be converted to int)
+            # (e.g. '.' cannot be converted to int)
             if masked_value is None:
                 array = np.zeros(len(self._data), dtype=dtype)
             else:

biotite/structure/io/pdbx/cif.py CHANGED Viewed

@@ -243,7 +243,7 @@ class CIFColumn:
         else:
             # Array needs to be converted, but masked values are
             # not necessarily convertible
-            # (e.g. '' cannot be converted to int)
+            # (e.g. '.' cannot be converted to int)
             if masked_value is None:
                 array = np.zeros(len(self._data), dtype=dtype)
             else:

biotite/structure/io/pdbx/compress.py CHANGED Viewed

@@ -140,8 +140,8 @@ def _compress_data(bcif_data, rtol, atol):
         # Run encode to initialize the data and offset arrays
         indices = encoding.encode(array)
         offsets = np.cumsum([0] + [len(s) for s in encoding.strings])
-        encoding.data_encoding, _ = _find_best_integer_compression(indices)
-        encoding.offset_encoding, _ = _find_best_integer_compression(offsets)
+        encoding.data_encoding = _find_best_integer_compression(indices)
+        encoding.offset_encoding = _find_best_integer_compression(offsets)
         return bcif.BinaryCIFData(array, [encoding])
     elif np.issubdtype(array.dtype, np.floating):
@@ -159,18 +159,22 @@ def _compress_data(bcif_data, rtol, atol):
             # -> do not use integer encoding
             return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
         else:
-            best_encoding, size_compressed = _find_best_integer_compression(
-                integer_array
+            best_encoding = _find_best_integer_compression(integer_array)
+            compressed_data = bcif.BinaryCIFData(
+                array, [to_integer_encoding] + best_encoding
             )
-            if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
-                return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
+            uncompressed_data = bcif.BinaryCIFData(array, [ByteArrayEncoding()])
+            if _data_size_in_file(compressed_data) < _data_size_in_file(
+                uncompressed_data
+            ):
+                return compressed_data
             else:
                 # The float array is smaller -> encode it directly as bytes
-                return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
+                return uncompressed_data
     elif np.issubdtype(array.dtype, np.integer):
         array = _to_smallest_integer_type(array)
-        encodings, _ = _find_best_integer_compression(array)
+        encodings = _find_best_integer_compression(array)
         return bcif.BinaryCIFData(array, encodings)
     else:
@@ -233,7 +237,7 @@ def _find_best_integer_compression(array):
                 if size < smallest_size:
                     best_encoding_sequence = encodings
                     smallest_size = size
-    return best_encoding_sequence, smallest_size
+    return best_encoding_sequence
 def _estimate_packed_length(array, packed_byte_count):