PyPI - biotite - Versions diffs - 1.1.0__cp313-cp313-win_amd64.whl → 1.2.0__cp313-cp313-win_amd64.whl - Mend

biotite 1.1.0__cp313-cp313-win_amd64.whl → 1.2.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (155) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/localapp.py +2 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +3 -3
biotite/application/muscle/app5.py +3 -3
biotite/application/sra/app.py +0 -5
biotite/application/util.py +21 -1
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +3 -1
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +2 -3
biotite/database/uniprot/check.py +2 -2
biotite/database/uniprot/download.py +2 -5
biotite/database/uniprot/query.py +3 -4
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +33 -11
biotite/sequence/align/banded.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +21 -21
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +2 -2
biotite/sequence/align/kmersimilarity.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +6 -6
biotite/sequence/align/localgapped.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +12 -3
biotite/sequence/align/multiple.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp313-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cp313-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +2 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cp313-win_amd64.pyd +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cp313-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp313-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp313-win_amd64.pyd +0 -0
biotite/sequence/profile.py +19 -25
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +12 -5
biotite/sequence/sequence.py +1 -2
biotite/structure/__init__.py +2 -0
biotite/structure/alphabet/i3d.py +1 -2
biotite/structure/alphabet/pb.py +1 -2
biotite/structure/alphabet/unkerasify.py +8 -2
biotite/structure/atoms.py +35 -27
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cp313-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +8 -5
biotite/structure/box.py +19 -21
biotite/structure/celllist.cp313-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cp313-win_amd64.pyd +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +15 -15
biotite/structure/hbond.py +17 -19
biotite/structure/info/atoms.py +11 -2
biotite/structure/info/ccd.py +0 -2
biotite/structure/info/components.bcif +0 -0
biotite/structure/info/groups.py +0 -3
biotite/structure/info/misc.py +0 -1
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +1 -2
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +39 -13
biotite/structure/io/pdb/convert.py +2 -3
biotite/structure/io/pdb/file.py +11 -22
biotite/structure/io/pdb/hybrid36.cp313-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +4 -4
biotite/structure/io/pdbx/bcif.py +22 -7
biotite/structure/io/pdbx/cif.py +20 -7
biotite/structure/io/pdbx/component.py +6 -0
biotite/structure/io/pdbx/compress.py +2 -2
biotite/structure/io/pdbx/convert.py +222 -33
biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +0 -15
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cp313-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +68 -9
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +3 -3
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/METADATA +5 -3
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/RECORD +155 -135
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/structure/io/pdbx/convert.py CHANGED Viewed

@@ -3,7 +3,7 @@
 # information.
 __name__ = "biotite.structure.io.pdbx"
-__author__ = "Fabrice Allain, Patrick Kunzmann"
+__author__ = "Fabrice Allain, Patrick Kunzmann, Cheyenne Ziegler"
 __all__ = [
     "get_sequence",
     "get_model_count",
@@ -13,6 +13,7 @@ __all__ = [
     "set_component",
     "list_assemblies",
     "get_assembly",
+    "get_sse",
 ]
 import itertools
@@ -81,6 +82,7 @@ PDBX_BOND_TYPE_TO_ORDER = {
     BondType.AROMATIC_TRIPLE: "trip",
     # These are masked later, it is merely added here to avoid a KeyError
     BondType.ANY: "",
+    BondType.AROMATIC: "",
     BondType.COORDINATION: "",
 }
 # Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
@@ -92,12 +94,19 @@ COMP_BOND_ORDER_TO_TYPE = {
     ("SING", "Y"): BondType.AROMATIC_SINGLE,
     ("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
     ("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
+    ("AROM", "Y"): BondType.AROMATIC,
 }
 # ...and vice versa
 COMP_BOND_TYPE_TO_ORDER = {
     bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
 }
 CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list
+# it was observed that when the number or rows in `atom_site` and `struct_conn`
+# exceed a certain threshold,
+# a dictionary approach is less computation and memory intensive than the dense
+# vectorized approach.
+# https://github.com/biotite-dev/biotite/pull/765#issuecomment-2708867357
+FIND_MATCHES_SWITCH_THRESHOLD = 4000000
 _proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
 _nucleotideseq_type_list = [
@@ -160,8 +169,8 @@ def get_sequence(pdbx_file, data_block=None):
     -------
     sequence_dict : Dictionary of Sequences
         Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
-        (often equivalent to chain_id and atom_site.auth_asym_id
-        in most cases). Dictionary values are sequences.
+        (equivalent to ``atom_site.auth_asym_id``).
+        Dictionary values are sequences.
     Notes
     -----
@@ -217,9 +226,7 @@ def get_model_count(pdbx_file, data_block=None):
         The number of models.
     """
     block = _get_block(pdbx_file, data_block)
-    return len(
-        _get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
-    )
+    return len(np.unique((block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))))
 def get_structure(
@@ -310,7 +317,6 @@ def get_structure(
     >>> arr = get_structure(file, model=1)
     >>> print(len(arr))
     304
     """
     block = _get_block(pdbx_file, data_block)
@@ -321,13 +327,12 @@ def get_structure(
         raise InvalidFileError("Missing 'atom_site' category in file")
     models = atom_site["pdbx_PDB_model_num"].as_array(np.int32)
-    model_starts = _get_model_starts(models)
-    model_count = len(model_starts)
+    model_count = len(np.unique(models))
     atom_count = len(models)
     if model is None:
         # For a stack, the annotations are derived from the first model
-        model_atom_site = _filter_model(atom_site, model_starts, 1)
+        model_atom_site = _filter_model(atom_site, 1)
         # Any field of the category would work here to get the length
         model_length = model_atom_site.row_count
         atoms = AtomArrayStack(model_count, model_length)
@@ -373,7 +378,7 @@ def get_structure(
                 f"the given model {model} does not exist"
             )
-        model_atom_site = _filter_model(atom_site, model_starts, model)
+        model_atom_site = _filter_model(atom_site, model)
         # Any field of the category would work here to get the length
         model_length = model_atom_site.row_count
         atoms = AtomArray(model_length)
@@ -649,6 +654,17 @@ def _find_matches(query_arrays, reference_arrays):
     `reference_arrays` where all query values match the reference counterpart.
     If no match is found for a query, the corresponding index is -1.
     """
+    if (
+        query_arrays[0].shape[0] * reference_arrays[0].shape[0]
+        <= FIND_MATCHES_SWITCH_THRESHOLD
+    ):
+        match_indices = _find_matches_by_dense_array(query_arrays, reference_arrays)
+    else:
+        match_indices = _find_matches_by_dict(query_arrays, reference_arrays)
+    return match_indices
+def _find_matches_by_dense_array(query_arrays, reference_arrays):
     match_masks_for_all_columns = np.stack(
         [
             query[:, np.newaxis] == reference[np.newaxis, :]
@@ -676,6 +692,38 @@ def _find_matches(query_arrays, reference_arrays):
     return match_indices
+def _find_matches_by_dict(query_arrays, reference_arrays):
+    # Convert reference arrays to a dictionary for O(1) lookups
+    reference_dict = {}
+    ambiguous_keys = set()
+    for ref_idx, ref_row in enumerate(zip(*reference_arrays)):
+        ref_key = tuple(ref_row)
+        if ref_key in reference_dict:
+            ambiguous_keys.add(ref_key)
+            continue
+        reference_dict[ref_key] = ref_idx
+    match_indices = []
+    for query_idx, query_row in enumerate(zip(*query_arrays)):
+        query_key = tuple(query_row)
+        occurrence = reference_dict.get(query_key)
+        if occurrence is None:
+            # -1 indicates that no match was found in the reference
+            match_indices.append(-1)
+        elif query_key in ambiguous_keys:
+            # The query cannot be uniquely matched to an atom in the reference
+            raise InvalidFileError(
+                f"The covalent bond in the 'struct_conn' category at index "
+                f"{query_idx} cannot be unambiguously assigned to atoms in "
+                f"the 'atom_site' category"
+            )
+        else:
+            match_indices.append(occurrence)
+    return np.array(match_indices)
 def _get_struct_conn_col_name(col_name, partner):
     """
     For a column name in ``atom_site`` get the corresponding column name
@@ -714,21 +762,26 @@ def _filter_altloc(array, atom_site, altloc):
         raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
-def _get_model_starts(model_array):
-    """
-    Get the start index for each model in the arrays of the
-    ``atom_site`` category.
-    """
-    _, indices = np.unique(model_array, return_index=True)
-    indices.sort()
-    return indices
-def _filter_model(atom_site, model_starts, model):
+def _filter_model(atom_site, model):
     """
     Reduce the ``atom_site`` category to the values for the given
     model.
+    Parameters
+    ----------
+    atom_site : CIFCategory or BinaryCIFCategory
+        ``atom_site`` category containing all models.
+    model : int
+        The model to be selected.
+    Returns
+    -------
+    atom_site : CIFCategory or BinaryCIFCategory
+        The ``atom_site`` category containing only the selected model.
     """
+    models = atom_site["pdbx_PDB_model_num"].as_array(np.int32)
+    _, model_starts = np.unique(models, return_index=True)
+    model_starts.sort()
     # Append exclusive stop
     model_starts = np.append(model_starts, [atom_site.row_count])
     # Indexing starts at 0, but model number starts at 1
@@ -815,7 +868,6 @@ def set_structure(
     >>> file = CIFFile()
     >>> set_structure(file, atom_array)
     >>> file.write(os.path.join(path_to_directory, "structure.cif"))
     """
     _check_non_empty(array)
@@ -836,7 +888,11 @@ def set_structure(
     )
     atom_site["label_comp_id"] = np.copy(array.res_name)
     atom_site["label_asym_id"] = np.copy(array.chain_id)
-    atom_site["label_entity_id"] = _determine_entity_id(array.chain_id)
+    atom_site["label_entity_id"] = (
+        np.copy(array.label_entity_id)
+        if "label_entity_id" in array.get_annotation_categories()
+        else _determine_entity_id(array.chain_id)
+    )
     atom_site["label_seq_id"] = np.copy(array.res_id)
     atom_site["pdbx_PDB_ins_code"] = Column(
         np.copy(array.ins_code),
@@ -1181,7 +1237,13 @@ def _filter_canonical_links(array, bond_array):
     )  # fmt: skip
-def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
+def get_component(
+    pdbx_file,
+    data_block=None,
+    use_ideal_coord=True,
+    res_name=None,
+    allow_missing_coord=False,
+):
     """
     Create an :class:`AtomArray` for a chemical component from the
     ``chem_comp_atom`` and, if available, the ``chem_comp_bond``
@@ -1209,6 +1271,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
         In this case, the component with the given residue name is
         read.
         By default, all rows would be read in this case.
+    allow_missing_coord : bool, optional
+        Whether to allow missing coordinate values in components.
+        If ``True``, these will be represented as ``nan`` values.
+        If ``False``, a ``ValueError`` is raised when missing coordinates
+        are encountered.
     Returns
     -------
@@ -1299,7 +1366,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
         else:
             raise
         array.coord = _parse_component_coordinates(
-            [atom_category[field] for field in alt_coord_fields]
+            [atom_category[field] for field in alt_coord_fields],
+            allow_missing=allow_missing_coord,
         )
     try:
@@ -1310,7 +1378,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
             )
     except KeyError:
         warnings.warn(
-            "Category 'chem_comp_bond' not found. " "No bonds will be parsed",
+            "Category 'chem_comp_bond' not found. No bonds will be parsed",
             UserWarning,
         )
     else:
@@ -1330,14 +1398,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
     return array
-def _parse_component_coordinates(coord_columns):
+def _parse_component_coordinates(coord_columns, allow_missing=False):
     coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
     for i, column in enumerate(coord_columns):
         if column.mask is not None and column.mask.array.any():
-            raise ValueError(
-                "Missing coordinates for some atoms",
-            )
-        coord[:, i] = column.as_array(np.float32)
+            if allow_missing:
+                warnings.warn(
+                    "Missing coordinates for some atoms. Those will be set to nan",
+                    UserWarning,
+                )
+            else:
+                raise ValueError(
+                    "Missing coordinates for some atoms",
+                )
+        coord[:, i] = column.as_array(np.float32, masked_value=np.nan)
     return coord
@@ -1445,6 +1519,7 @@ def list_assemblies(pdbx_file, data_block=None):
     Examples
     --------
     >>> import os.path
     >>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
     >>> assembly_ids = list_assemblies(file)
@@ -1742,4 +1817,118 @@ def _convert_string_to_sequence(string, stype):
     elif stype in _other_type_list:
         return None
     else:
-        raise InvalidFileError("mmCIF _entity_poly.type unsupported" " type: " + stype)
+        raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
+def get_sse(pdbx_file, data_block=None, match_model=None):
+    """
+    Get the secondary structure from a PDBx file.
+    Parameters
+    ----------
+    pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
+        The file object.
+        The following categories are required:
+        - ``entity_poly``
+        - ``struct_conf`` (if alpha-helices are present)
+        - ``struct_sheet_range`` (if beta-strands are present)
+        - ``atom_site`` (if `match_model` is set)
+    data_block : str, optional
+        The name of the data block.
+        Default is the first (and most times only) data block of the
+        file.
+        If the data block object is passed directly to `pdbx_file`,
+        this parameter is ignored.
+    match_model : None, optional
+        If a model number is given, only secondary structure elements for residues are
+        kept, that are resolved in the given model.
+        This means secondary structure elements for residues that would not appear
+        in a corresponding :class:`AtomArray` from :func:`get_structure()` are removed.
+        By default, all residues in the sequence are kept.
+    Returns
+    -------
+    sse_dict : dict of str -> ndarray, dtype=str
+        The dictionary maps the chain ID (derived from ``auth_asym_id``) to the
+        secondary structure of the respective chain.
+        - ``"a"``: alpha-helix
+        - ``"b"``: beta-strand
+        - ``"c"``: coil or not an amino acid
+        Each secondary structure element corresponds to the ``label_seq_id`` of the
+        ``atom_site`` category.
+        This means that the 0-th position of the array corresponds to the residue
+        in ``atom_site`` with ``label_seq_id`` ``1``.
+    Examples
+    --------
+    >>> import os.path
+    >>> file = CIFFile.read(os.path.join(path_to_structures, "1aki.cif"))
+    >>> sse = get_sse(file, match_model=1)
+    >>> print(sse)
+    {'A': array(['c', 'c', 'c', 'c', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
+                 'a', 'c', 'c', 'c', 'c', 'c', 'a', 'a', 'a', 'c', 'c', 'a', 'a',
+                 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c',
+                 'c', 'c', 'c', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'c', 'b', 'b',
+                 'b', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
+                 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
+                 'c', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c', 'c', 'a', 'a', 'a',
+                 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'a',
+                 'a', 'a', 'a', 'c', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c',
+                 'c', 'c', 'a', 'a', 'a', 'a', 'c', 'c', 'c', 'c', 'c', 'c'],
+                 dtype='<U1')}
+    If only secondary structure elements for resolved residues are requested, the length
+    of the returned array matches the number of peptide residues in the structure.
+    >>> file = CIFFile.read(os.path.join(path_to_structures, "3o5r.cif"))
+    >>> print(len(get_sse(file, match_model=1)["A"]))
+    128
+    >>> atoms = get_structure(file, model=1)
+    >>> atoms = atoms[filter_amino_acids(atoms) & (atoms.chain_id == "A")]
+    >>> print(get_residue_count(atoms))
+    128
+    """
+    block = _get_block(pdbx_file, data_block)
+    # Init all chains with "c" for coil
+    sse_dict = {
+        chain_id: np.repeat("c", len(sequence))
+        for chain_id, sequence in get_sequence(block).items()
+    }
+    # Populate SSE arrays with helices and strands
+    for sse_symbol, category_name in [
+        ("a", "struct_conf"),
+        ("b", "struct_sheet_range"),
+    ]:
+        if category_name in block:
+            category = block[category_name]
+            chains = category["beg_auth_asym_id"].as_array(str)
+            start_positions = category["beg_label_seq_id"].as_array(int)
+            end_positions = category["end_label_seq_id"].as_array(int)
+            # set alpha helix positions
+            for chain, start, end in zip(chains, start_positions, end_positions):
+                # Translate the 1-based positions from PDBx into 0-based array indices
+                sse_dict[chain][start - 1 : end] = sse_symbol
+    if match_model is not None:
+        model_atom_site = _filter_model(block["atom_site"], match_model)
+        chain_ids = model_atom_site["auth_asym_id"].as_array(str)
+        res_ids = model_atom_site["label_seq_id"].as_array(int, masked_value=-1)
+        # Filter out masked residues, i.e. residues not part of a chain
+        mask = res_ids != -1
+        chain_ids = chain_ids[mask]
+        res_ids = res_ids[mask]
+        for chain_id, sse in sse_dict.items():
+            res_ids_in_chain = res_ids[chain_ids == chain_id]
+            # Transform from 1-based residue ID to 0-based index
+            indices = np.unique(res_ids_in_chain) - 1
+            sse_dict[chain_id] = sse[indices]
+    return sse_dict

biotite/structure/io/pdbx/encoding.cp313-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/io/trajfile.py CHANGED Viewed

@@ -187,9 +187,11 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         time : float or ndarray, dtype=float32, shape=(n,) or None
             The simulation time of the current frame or stack in *ps*.
-        See also
+        See Also
         --------
-        read_iter_structure
+        read_iter_structure :
+            Get an :class:`AtomArray` for each frame or an :class:`AtomArrayStack`
+            for each chunk of frames instead.
         Notes
         -----
@@ -315,9 +317,10 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
             If `stack_size` is set, multiple frames are returned as
             :class:`AtomArrayStack`.
-        See also
+        See Also
         --------
-        read_iter
+        read_iter :
+            Get an the raw data for each frame or for each chunk of frames instead.
         Notes
         -----
@@ -480,7 +483,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        time : ndarray, dtype=float, shape=(m,3,3)
+        box : ndarray, dtype=float, shape=(m,3,3)
             The box vectors to be set.
         """
         self._check_model_count(box)
@@ -546,7 +549,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
         ------
         NotImplementedError
         """
-        raise NotImplementedError("Copying is not implemented " "for trajectory files")
+        raise NotImplementedError("Copying is not implemented for trajectory files")
     @classmethod
     @abc.abstractmethod

biotite/structure/io/util.py ADDED Viewed

@@ -0,0 +1,38 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+Common functions used by a number of subpackages.
+"""
+__name__ = "biotite.structure.io"
+__author__ = "Patrick Kunzmann"
+__all__ = ["number_of_integer_digits"]
+import numpy as np
+def number_of_integer_digits(values):
+    """
+    Get the maximum number of characters needed to represent the
+    pre-decimal positions of the given numeric values.
+    Parameters
+    ----------
+    values : ndarray, dtype=float
+        The values to be checked.
+    Returns
+    -------
+    n_digits : int
+        The maximum number of characters needed to represent the
+        pre-decimal positions of the given numeric values.
+    """
+    if len(values) == 0:
+        return 0
+    values = values.astype(int, copy=False)
+    n_digits = 0
+    n_digits = max(n_digits, len(str(np.min(values))))
+    n_digits = max(n_digits, len(str(np.max(values))))
+    return n_digits

biotite/structure/mechanics.py CHANGED Viewed

@@ -30,7 +30,6 @@ def gyration_radius(array, masses=None):
         Must have the same length as `array`. By default, the standard
         atomic mass for each element is taken.
     Returns
     -------
     masses : float or ndarray, dtype=float

biotite/structure/molecules.py CHANGED Viewed

@@ -39,11 +39,6 @@ def get_molecule_indices(array):
         Consequently, the length of this list is equal to the number of
         molecules in the input `array`.
-    See also
-    --------
-    get_molecule_masks
-    molecule_iter
     Examples
     --------
     Get an :class:`AtomArray` for ATP and show that it is a single
@@ -157,11 +152,6 @@ def get_molecule_masks(array):
         Consequently, the length of this list is equal to the number of
         molecules in the input `array`.
-    See also
-    --------
-    get_molecule_indices
-    molecule_iter
     Examples
     --------
     Get an :class:`AtomArray` for ATP and show that it is a single
@@ -270,11 +260,6 @@ def molecule_iter(array):
     molecule : AtomArray or AtomArrayStack
         A single molecule of the input `array`.
-    See also
-    --------
-    get_molecule_indices
-    get_molecule_masks
     Examples
     --------
     Get an :class:`AtomArray` for ATP and break it into two molecules

biotite/structure/pseudoknots.py CHANGED Viewed

@@ -69,6 +69,11 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
     Therefore, there are no pseudoknots between base pairs with the same
     pseudoknot order.
+    References
+    ----------
+    .. footbibliography::
     Examples
     --------
     Remove the pseudoknotted base pair for the sequence *ABCbac*, where
@@ -102,17 +107,6 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
     [[0 0 1]]
     >>> print(dot_bracket(basepairs, 6)[0])
     (([))]
-    See Also
-    --------
-    base_pairs
-    dot_bracket
-    References
-    ----------
-    .. footbibliography::
     """
     if len(base_pairs) == 0:
         # No base pairs -> empty pseudoknot order array
@@ -149,9 +143,9 @@ class _Region:
     Parameters
     ----------
-    base_pairs: ndarray, shape=(n,2), dtype=int
+    base_pairs : ndarray, shape=(n,2), dtype=int
         All base pairs of the structure the region is a subset for.
-    region_pairs: ndarray, dtype=int
+    region_pairs : ndarray, dtype=int
         The indices of the base pairs in ``base_pairs`` that are part of
         the region.
     scores : ndarray, dtype=int, shape=(n,) (default: None)

biotite/structure/repair.py CHANGED Viewed

@@ -48,7 +48,6 @@ def create_continuous_res_ids(atoms, restart_each_chain=True):
     >>> res_ids, _ = get_residues(atom_array)
     >>> print(res_ids)
     [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
     """
     res_ids_diff = np.zeros(atoms.array_length(), dtype=int)
     res_starts = get_residue_starts(atoms)
@@ -80,7 +79,7 @@ def infer_elements(atoms):
     See Also
     --------
-    create_atoms_names : The opposite of this function
+    create_atoms_names : The opposite of this function.
     Examples
     --------
@@ -89,7 +88,6 @@ def infer_elements(atoms):
     ['N' 'C' 'C' 'O' 'C' 'C' 'O' 'N' 'H' 'H']
     >>> print(infer_elements(["CA", "C", "C1", "OD1", "HD21", "1H", "FE"]))
     ['C' 'C' 'C' 'O' 'H' 'H' 'FE']
     """
     if isinstance(atoms, (AtomArray, AtomArrayStack)):
         atom_names = atoms.atom_name
@@ -117,7 +115,7 @@ def create_atom_names(atoms):
     See Also
     --------
-    infer_elements : The opposite of this function
+    infer_elements : The opposite of this function.
     Notes
     -----