PyPI - biotite - Versions diffs - 0.40.0__cp312-cp312-win_amd64.whl → 0.41.0__cp312-cp312-win_amd64.whl - Mend

biotite 0.40.0__cp312-cp312-win_amd64.whl → 0.41.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show

biotite/__init__.py +1 -1
biotite/database/pubchem/download.py +23 -23
biotite/database/pubchem/query.py +7 -7
biotite/file.py +17 -9
biotite/sequence/align/banded.c +117 -117
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/cigar.py +60 -15
biotite/sequence/align/kmeralphabet.c +117 -117
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.c +117 -117
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cpp +117 -117
biotite/sequence/align/localgapped.c +117 -117
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.c +117 -117
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.c +117 -117
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.c +117 -117
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.c +117 -117
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.c +117 -117
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.c +117 -117
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/annotation.py +2 -2
biotite/sequence/codec.c +117 -117
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/io/fasta/convert.py +27 -24
biotite/sequence/phylo/nj.c +117 -117
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.c +117 -117
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.c +117 -117
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/structure/__init__.py +2 -0
biotite/structure/bonds.c +1122 -913
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/celllist.c +117 -117
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/charges.c +117 -117
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/dotbracket.py +2 -0
biotite/structure/info/atoms.py +6 -1
biotite/structure/info/bonds.py +1 -1
biotite/structure/info/ccd/amino_acids.txt +17 -0
biotite/structure/info/ccd/carbohydrates.txt +2 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +1 -0
biotite/structure/info/misc.py +69 -5
biotite/structure/integrity.py +19 -70
biotite/structure/io/ctab.py +12 -106
biotite/structure/io/general.py +157 -165
biotite/structure/io/gro/file.py +16 -16
biotite/structure/io/mmtf/convertarray.c +117 -117
biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.c +117 -117
biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.c +117 -117
biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.c +117 -117
biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
biotite/structure/io/mol/__init__.py +4 -2
biotite/structure/io/mol/convert.py +71 -7
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/{file.py → mol.py} +69 -82
biotite/structure/io/mol/sdf.py +909 -0
biotite/structure/io/pdb/file.py +84 -31
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/__init__.py +0 -1
biotite/structure/io/pdbx/bcif.py +2 -3
biotite/structure/io/pdbx/cif.py +9 -5
biotite/structure/io/pdbx/component.py +4 -1
biotite/structure/io/pdbx/convert.py +203 -79
biotite/structure/io/pdbx/encoding.c +117 -117
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/repair.py +253 -0
biotite/structure/sasa.c +117 -117
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/sequence.py +112 -0
biotite/structure/superimpose.py +472 -13
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
biotite/structure/io/pdbx/error.py +0 -14
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
{biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0

biotite/structure/charges.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/structure/dotbracket.py CHANGED Viewed

@@ -57,6 +57,8 @@ def dot_bracket_from_structure(
     .. footbibliography::
     """
     basepairs = base_pairs(nucleic_acid_strand)
+    if len(basepairs) == 0:
+        return ['']
     basepairs = get_residue_positions(nucleic_acid_strand, basepairs)
     length = get_residue_count(nucleic_acid_strand)
     return dot_bracket(basepairs, length, scores=scores,

biotite/structure/info/atoms.py CHANGED Viewed

@@ -72,6 +72,11 @@ def residue(res_name):
     # Avoid circular import
     from ..io.pdbx import get_component
-    component = get_component(get_ccd(), res_name=res_name)
+    try:
+        component = get_component(get_ccd(), res_name=res_name)
+    except KeyError:
+        raise KeyError(
+            f"No atom information found for residue '{res_name}' in CCD"
+        )
     component.hetero[:] = res_name not in non_hetero_residues
     return component

biotite/structure/info/bonds.py CHANGED Viewed

@@ -83,7 +83,7 @@ def bonds_in_residue(res_name):
     Returns
     -------
-    bonds : dict (str -> int)
+    bonds : dict ((str, str) -> int)
         A dictionary that maps tuples of two atom names to their
         respective bond types (represented as integer).
         Empty, if the residue is unknown to the

biotite/structure/info/ccd/amino_acids.txt CHANGED Viewed

@@ -228,6 +228,7 @@
 4L8
 4LZ
 4M8
+4M9
 4MM
 4N3
 4N7
@@ -386,9 +387,14 @@
 9VR
 9WV
 A0G
+A1ADO
 A1ADW
 A1ADY
 A1ADZ
+A1D64
+A1H2H
+A1H2I
+A1H45
 A1LWV
 A30
 A3U
@@ -472,6 +478,7 @@ B2C
 B2H
 B2N
 B3A
+B3D
 B3E
 B3K
 B3L
@@ -555,6 +562,7 @@ CH7
 CHG
 CHP
 CIR
+CIV
 CJO
 CLB
 CLD
@@ -1328,6 +1336,7 @@ QPA
 QPH
 QQ8
 QQB
+QUK
 QVA
 QX7
 QXV
@@ -1613,13 +1622,16 @@ YNM
 YOF
 YPR
 YPZ
+YRV
 YTF
 YTH
 YWV
 YYA
 Z01
 Z3E
+Z50
 Z70
+Z9J
 ZAE
 ZAI
 ZAL
@@ -1629,7 +1641,11 @@ ZDJ
 ZFB
 ZGL
 ZIQ
+ZJU
+ZKO
+ZLF
 ZNY
+ZRJ
 ZSX
 ZT6
 ZT9
@@ -1639,6 +1655,7 @@ ZTK
 ZU0
 ZUK
 ZV4
+ZY9
 ZYJ
 ZYK
 ZZD

biotite/structure/info/ccd/carbohydrates.txt CHANGED Viewed

@@ -241,6 +241,8 @@
 9WZ
 9YW
 A0K
+A1AIO
+A1H0Z
 A1Q
 A2G
 A5C

biotite/structure/info/ccd/components.bcif CHANGED Viewed

Binary file

biotite/structure/info/ccd/nucleotides.txt CHANGED Viewed

@@ -747,6 +747,7 @@ VET
 VSN
 WC7
 WUH
+WVQ
 X
 X0F
 X0O

biotite/structure/info/misc.py CHANGED Viewed

@@ -4,7 +4,7 @@
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
-__all__ = ["all_residues", "full_name", "link_type"]
+__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
 from .ccd import get_ccd, get_from_ccd
@@ -40,8 +40,10 @@ def full_name(res_name):
     Returns
     -------
-    name : str
+    name : str or None
         The full name of the residue.
+        If the residue is unknown to the chemical components dictionary,
+        ``None`` is returned.
     Examples
     --------
@@ -49,7 +51,10 @@ def full_name(res_name):
     >>> print(full_name("MAN"))
     alpha-D-mannopyranose
     """
-    return get_from_ccd("chem_comp", res_name.upper(), "name").item()
+    array = get_from_ccd("chem_comp", res_name.upper(), "name")
+    if array is None:
+        return None
+    return array.item()
 def link_type(res_name):
@@ -64,8 +69,10 @@ def link_type(res_name):
     Returns
     -------
-    link_type : str
+    link_type : str or None
         The link type.
+        If the residue is unknown to the chemical components dictionary,
+        ``None`` is returned.
     Examples
     --------
@@ -77,4 +84,61 @@ def link_type(res_name):
     >>> print(link_type("HOH"))
     NON-POLYMER
     """
-    return get_from_ccd("chem_comp", res_name.upper(), "type").item()
+    array = get_from_ccd("chem_comp", res_name.upper(), "type")
+    if array is None:
+        return None
+    return array.item()
+def one_letter_code(res_name):
+    """
+    Get the one-letter code of a residue/compound,
+    based on the PDB chemical components dictionary.
+    The one-letter code is only defined for amino acids and nucleotides
+    and for compounds that are structurally similar to them.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter residue name.
+    Returns
+    -------
+    one_letter_code : str or None
+        The one-letter code.
+        None if the compound is not present in the CCD or if no
+        one-letter code is defined for this compound.
+    Examples
+    --------
+    Get the one letter code for an amino acid (or a nucleotide).
+    >>> print(full_name("ALA"))
+    ALANINE
+    >>> print(one_letter_code("ALA"))
+    A
+    For similar compounds, the one-letter code is also defined.
+    >>> print(full_name("DAL"))
+    D-ALANINE
+    >>> print(one_letter_code("DAL"))
+    A
+    For other compounds, the one-letter code is not defined.
+    >>> print(full_name("MAN"))
+    alpha-D-mannopyranose
+    >>> print(one_letter_code("MAN"))
+    None
+    """
+    array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
+    if array is None:
+        return None
+    item = array.item()
+    if item == "":
+        return None
+    return item

biotite/structure/integrity.py CHANGED Viewed

@@ -12,7 +12,7 @@ __author__ = "Patrick Kunzmann, Daniel Bauer"
 __all__ = ["check_id_continuity", "check_atom_id_continuity",
            "check_res_id_continuity", "check_backbone_continuity",
            "check_duplicate_atoms", "check_bond_continuity",
-           "check_linear_continuity", "renumber_atom_ids", "renumber_res_ids"]
+           "check_linear_continuity"]
 import numpy as np
 import warnings
@@ -32,17 +32,17 @@ def check_id_continuity(array):
     """
     Check if the residue IDs are incremented by more than 1 or
     decremented, from one atom to the next one.
     An increment by more than 1 is as strong clue for missing residues,
     a decrement means probably a start of a new chain.
     DEPRECATED: Use :func:`check_res_id_continuity()` instead.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     discontinuity : ndarray, dtype=int
@@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
     """
     Check if the atom IDs are incremented by more than 1 or
     decremented, from one atom to the next one.
     An increment by more than 1 is as strong clue for missing atoms.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     discontinuity : ndarray, dtype=int
@@ -81,15 +81,15 @@ def check_res_id_continuity(array):
     """
     Check if the residue IDs are incremented by more than 1 or
     decremented, from one atom to the next one.
     An increment by more than 1 is as strong clue for missing residues,
     a decrement means probably a start of a new chain.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     discontinuity : ndarray, dtype=int
@@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
     """
     Check if the (peptide or phosphate) backbone atoms have
     non-reasonable distance to the next atom.
     A large or very small distance is a very strong clue, that there is
     no bond between those atoms, therefore the chain is discontinued.
@@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
     """
     Check if a structure contains duplicate atoms, i.e. two atoms in a
     structure have the same annotations (coordinates may be different).
     Duplicate atoms may appear, when a structure has occupancy for an
     atom at two or more positions or when the *altloc* positions are
     improperly read.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     duplicate : ndarray, dtype=int
@@ -228,16 +228,16 @@ def check_duplicate_atoms(array):
     for i in range(1, array.array_length()):
         # Start with assumption that all atoms in the array
         # until index i are duplicates of the atom at index i
-        is_dublicate = np.full(i, True, dtype=bool)
+        is_duplicate = np.full(i, True, dtype=bool)
         for annot in annots:
             # For each annotation array filter out the atoms until
             # index i that have an unequal annotation
-            # to the atom at index i
-            is_dublicate &= (annot[:i] == annot[i])
+            # to the atom at index i
+            is_duplicate &= (annot[:i] == annot[i])
         # After checking all annotation arrays,
         # if there still is any duplicate to the atom at index i,
         # add i the the list of duplicate atom indices
-        if is_dublicate.any():
+        if is_duplicate.any():
             duplicates.append(i)
     return np.array(duplicates)
@@ -255,7 +255,7 @@ def check_in_box(array):
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     outside : ndarray, dtype=int
@@ -266,54 +266,3 @@ def check_in_box(array):
     box = array.box
     fractions = coord_to_fraction(array, box)
     return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]
-def renumber_atom_ids(array, start=None):
-    """
-    Renumber the atom IDs of the given array.
-    Parameters
-    ----------
-    array : AtomArray or AtomArrayStack
-        The array to be checked.
-    start : int, optional
-        The starting index for renumbering.
-        The first ID in the array is taken by default.
-    Returns
-    -------
-    array : AtomArray or AtomArrayStack
-        The renumbered array.
-    """
-    if "atom_id" not in array.get_annotation_categories():
-        raise ValueError("The atom array must have the 'atom_id' annotation")
-    if start is None:
-        start = array.atom_id[0]
-    array.atom_id = np.arange(start, array.shape[-1]+1)
-    return array
-def renumber_res_ids(array, start=None):
-    """
-    Renumber the residue IDs of the given array.
-    Parameters
-    ----------
-    array : AtomArray or AtomArrayStack
-        The array to be checked.
-    start : int, optional
-        The starting index for renumbering.
-        The first ID in the array is taken by default.
-    Returns
-    -------
-    array : AtomArray or AtomArrayStack
-        The renumbered array.
-    """
-    if start is None:
-        start = array.res_id[0]
-    diff = np.diff(array.res_id)
-    diff[diff != 0] = 1
-    new_res_ids =  np.concatenate(([start], diff)).cumsum()
-    array.res_id = new_res_ids
-    return array

biotite/structure/io/ctab.py CHANGED Viewed

@@ -2,46 +2,20 @@
 # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
 # information.
-"""
-Functions for parsing and writing an :class:`AtomArray` from/to
-*MDL* connection tables (Ctab).
-"""
 __name__ = "biotite.structure.io"
 __author__ = "Patrick Kunzmann"
 __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
 import warnings
-import numpy as np
-from ..error import BadStructureError
-from ..atoms import AtomArray, AtomArrayStack
-from ..bonds import BondList, BondType
-BOND_TYPE_MAPPING = {
-    1: BondType.SINGLE,
-    2: BondType.DOUBLE,
-    3: BondType.TRIPLE,
-    6: BondType.SINGLE,
-    7: BondType.DOUBLE,
-    8: BondType.ANY,
-}
-BOND_TYPE_MAPPING_REV = {
-    BondType.SINGLE: 1,
-    BondType.DOUBLE: 2,
-    BondType.TRIPLE: 3,
-    BondType.AROMATIC_SINGLE: 1,
-    BondType.AROMATIC_DOUBLE: 2,
-    BondType.ANY: 8,
-}
-CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
-CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
+from ..bonds import BondType
 def read_structure_from_ctab(ctab_lines):
     """
     Parse a *MDL* connection table (Ctab) to obtain an
-    :class:`AtomArray`. :footcite:`Dalby1992`
+    :class:`AtomArray`. :footcite:`Dalby1992`.
+    DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
     Parameters
     ----------
@@ -60,41 +34,9 @@ def read_structure_from_ctab(ctab_lines):
     .. footbibliography::
     """
-    n_atoms, n_bonds = _get_counts(ctab_lines[0])
-    atom_lines = ctab_lines[1 : 1 + n_atoms]
-    bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]
-    atoms = AtomArray(n_atoms)
-    atoms.add_annotation("charge", int)
-    for i, line in enumerate(atom_lines):
-        atoms.coord[i, 0] = float(line[0:10])
-        atoms.coord[i, 1] = float(line[10:20])
-        atoms.coord[i, 2] = float(line[20:30])
-        atoms.element[i] = line[31:34].strip().upper()
-        charge = CHARGE_MAPPING.get(int(line[36:39]))
-        if charge is None:
-            warnings.warn(
-                f"Cannot handle MDL charge type {int(line[36 : 39])}, "
-                f"0 is used instead"
-            )
-            charge = 0
-        atoms.charge[i] = charge
-    bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
-    for i, line in enumerate(bond_lines):
-        bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
-        if bond_type is None:
-            warnings.warn(
-                f"Cannot handle MDL bond type {int(line[6 : 9])}, "
-                f"BondType.ANY is used instead"
-            )
-            bond_type = BondType.ANY
-        bond_array[i, 0] = int(line[0:3]) - 1
-        bond_array[i, 1] = int(line[3:6]) - 1
-        bond_array[i, 2] = bond_type
-    atoms.bonds = BondList(n_atoms, bond_array)
-    return atoms
+    warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
+    from biotite.structure.io.mol.ctab import read_structure_from_ctab
+    return read_structure_from_ctab(ctab_lines)
 def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
@@ -102,6 +44,8 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
     Convert an :class:`AtomArray` into a
     *MDL* connection table (Ctab). :footcite:`Dalby1992`
+    DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
     Parameters
     ----------
     atoms : AtomArray
@@ -123,44 +67,6 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
     .. footbibliography::
     """
-    if isinstance(atoms, AtomArrayStack):
-        raise TypeError(
-            "An 'AtomArrayStack' was given, "
-            "but only a single model can be written"
-        )
-    if atoms.bonds is None:
-        raise BadStructureError("Input AtomArray has no associated BondList")
-    try:
-        charge = atoms.charge
-    except AttributeError:
-        charge = np.zeros(atoms.array_length(), dtype=int)
-    atom_lines = [
-        f"{atoms.coord[i,0]:>10.5f}"
-        f"{atoms.coord[i,1]:>10.5f}"
-        f"{atoms.coord[i,2]:>10.5f}"
-        f" {atoms.element[i]:>3}"
-        f"  {CHARGE_MAPPING_REV.get(charge[i], 0):>3d}" + f"{0:>3d}" * 10
-        for i in range(atoms.array_length())
-    ]
-    default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
-    bond_lines = [
-        f"{i+1:>3d}{j+1:>3d}"
-        f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
-        + f"{0:>3d}" * 4
-        for i, j, bond_type in atoms.bonds.as_array()
-    ]
-    counts_line = (
-        f"{len(atom_lines):>3d}{len(bond_lines):>3d}"
-        "  0     0  0  0  0  0  0  1 V2000"
-    )
-    return [counts_line] + atom_lines + bond_lines + ["M  END"]
-def _get_counts(counts_line):
-    return int(counts_line[0:3]), int(counts_line[3:6])
+    warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
+    from biotite.structure.io.mol.ctab import write_structure_to_ctab
+    return write_structure_to_ctab(atoms, default_bond_type)