PyPI - biotite - Versions diffs - 0.39.0__cp311-cp311-win_amd64.whl → 0.41.0__cp311-cp311-win_amd64.whl - Mend

biotite 0.39.0__cp311-cp311-win_amd64.whl → 0.41.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show

biotite/__init__.py +3 -3
biotite/application/dssp/app.py +18 -18
biotite/database/pubchem/download.py +23 -23
biotite/database/pubchem/query.py +7 -7
biotite/database/rcsb/download.py +19 -14
biotite/file.py +17 -9
biotite/sequence/align/banded.c +258 -237
biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/cigar.py +60 -15
biotite/sequence/align/kmeralphabet.c +243 -222
biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmersimilarity.c +215 -196
biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cpp +233 -205
biotite/sequence/align/localgapped.c +258 -237
biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.c +235 -214
biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.c +255 -234
biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.c +274 -253
biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.c +215 -196
biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/selector.c +217 -197
biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
biotite/sequence/align/tracetable.c +215 -195
biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
biotite/sequence/annotation.py +2 -2
biotite/sequence/codec.c +235 -214
biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
biotite/sequence/io/fasta/convert.py +27 -24
biotite/sequence/phylo/nj.c +215 -196
biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.c +227 -202
biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.c +215 -196
biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
biotite/structure/__init__.py +2 -0
biotite/structure/basepairs.py +7 -12
biotite/structure/bonds.c +1437 -1279
biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
biotite/structure/celllist.c +217 -197
biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
biotite/structure/charges.c +1052 -1101
biotite/structure/charges.cp311-win_amd64.pyd +0 -0
biotite/structure/dotbracket.py +2 -0
biotite/structure/filter.py +30 -37
biotite/structure/info/__init__.py +5 -8
biotite/structure/info/atoms.py +31 -68
biotite/structure/info/bonds.py +47 -101
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +21 -20
biotite/structure/info/misc.py +78 -25
biotite/structure/info/standardize.py +17 -12
biotite/structure/integrity.py +19 -70
biotite/structure/io/__init__.py +2 -4
biotite/structure/io/ctab.py +12 -106
biotite/structure/io/general.py +167 -181
biotite/structure/io/gro/file.py +16 -16
biotite/structure/io/mmtf/__init__.py +3 -0
biotite/structure/io/mmtf/convertarray.c +219 -198
biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.c +217 -197
biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.c +225 -204
biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.c +215 -196
biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/file.py +34 -26
biotite/structure/io/mol/__init__.py +4 -2
biotite/structure/io/mol/convert.py +71 -7
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/{file.py → mol.py} +69 -82
biotite/structure/io/mol/sdf.py +909 -0
biotite/structure/io/npz/__init__.py +3 -0
biotite/structure/io/npz/file.py +21 -18
biotite/structure/io/pdb/__init__.py +3 -3
biotite/structure/io/pdb/file.py +89 -34
biotite/structure/io/pdb/hybrid36.c +63 -43
biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +32 -32
biotite/structure/io/pdbx/__init__.py +12 -6
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +858 -386
biotite/structure/io/pdbx/encoding.c +112813 -0
biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/molecules.py +151 -151
biotite/structure/repair.py +253 -0
biotite/structure/sasa.c +215 -196
biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
biotite/structure/sequence.py +112 -0
biotite/structure/superimpose.py +618 -116
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
biotite/structure/info/amino_acids.json +0 -1556
biotite/structure/info/amino_acids.py +0 -42
biotite/structure/info/carbohydrates.json +0 -1122
biotite/structure/info/carbohydrates.py +0 -39
biotite/structure/info/intra_bonds.msgpack +0 -0
biotite/structure/info/link_types.msgpack +0 -1
biotite/structure/info/nucleotides.json +0 -772
biotite/structure/info/nucleotides.py +0 -39
biotite/structure/info/residue_masses.msgpack +0 -0
biotite/structure/info/residue_names.msgpack +0 -3
biotite/structure/info/residues.msgpack +0 -0
biotite/structure/io/pdbx/file.py +0 -652
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
{biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0

biotite/structure/info/ccd.py ADDED Viewed

@@ -0,0 +1,95 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Patrick Kunzmann"
+__all__ = ["get_ccd", "get_from_ccd"]
+from pathlib import Path
+import numpy as np
+CCD_DIR = Path(__file__).parent / "ccd"
+INDEX_COLUMN_NAME = {
+    "chem_comp": "id",
+    "chem_comp_atom": "comp_id",
+    "chem_comp_bond": "comp_id",
+}
+_ccd_block = None
+# For each category this index gives the start and stop for each residue
+_residue_index = {}
+def get_ccd():
+    """
+    Get the PDB *Chemical Component Dictionary* (CCD).
+    Returns
+    -------
+    ccd : BinaryCIFFile
+        The CCD.
+    """
+    # Avoid circular import
+    from ..io.pdbx.bcif import BinaryCIFFile
+    global _ccd_block
+    if _ccd_block is None:
+        # Load CCD once and cache it for subsequent calls
+        _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
+    return _ccd_block
+def get_from_ccd(category_name, comp_id, column_name=None):
+    """
+    Get the rows for the given residue in the given category from the
+    PDB *Chemical Component Dictionary* (CCD).
+    Parameters
+    ----------
+    category_name : str
+        The category in the CCD.
+    comp_id : str
+        The residue identifier, i.e. the ``res_name``.
+    column_name : str, optional
+        The name of the column to be retrieved.
+        If None, all columns are returned as dictionary.
+        By default None.
+    Returns
+    -------
+    value : ndarray or dict or None
+        The array of the given column or all columns as dictionary.
+        ``None`` if the `comp_id` is not found in the category.
+    """
+    global _residue_index
+    ccd = get_ccd()
+    category = ccd[category_name]
+    if category_name not in _residue_index:
+        _residue_index[category_name] = _index_residues(
+            category[INDEX_COLUMN_NAME[category_name]].as_array()
+        )
+    try:
+        start, stop = _residue_index[category_name][comp_id]
+    except KeyError:
+        return None
+    if column_name is None:
+        return {
+            col_name: category[col_name].as_array()[start:stop]
+            for col_name in category.keys()
+        }
+    else:
+        return category[column_name].as_array()[start:stop]
+def _index_residues(id_column):
+    residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
+    # The final start is the exclusive stop of last residue
+    residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
+    index = {}
+    for i in range(len(residue_starts)-1):
+        comp_id = id_column[residue_starts[i]].item()
+        index[comp_id] = (residue_starts[i], residue_starts[i+1])
+    return index

biotite/structure/info/groups.py ADDED Viewed

@@ -0,0 +1,90 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.structure.info"
+__author__ = "Tom David Müller, Patrick Kunzmann"
+__all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
+from pathlib import Path
+import copy
+CCD_DIR = Path(__file__).parent / "ccd"
+group_lists = {}
+def amino_acid_names():
+    """
+    Get a tuple of amino acid three-letter codes according to the
+    PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
+    Returns
+    -------
+    amino_acid_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        peptide monomers.
+    Notes
+    -----
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members("amino_acids")
+def nucleotide_names():
+    """
+    Get a tuple of nucleotide three-letter codes according to the
+    PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
+    Returns
+    -------
+    nucleotide_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        DNA/RNA monomers.
+    Notes
+    -----
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members("nucleotides")
+def carbohydrate_names():
+    """
+    Get a tuple of carbohydrate three-letter codes according to the
+    PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
+    Returns
+    -------
+    carbohydrate_names : tuple of str
+        A list of three-letter-codes containing residues that are
+        saccharide monomers.
+    Notes
+    -----
+    References
+    ----------
+    .. footbibliography::
+    """
+    return _get_group_members("carbohydrates")
+def _get_group_members(group_name):
+    global group_lists
+    if group_name not in group_lists:
+        with open(CCD_DIR / f"{group_name}.txt", "r") as file:
+            group_lists[group_name] = tuple(file.read().split())
+    return group_lists[group_name]

biotite/structure/info/masses.py CHANGED Viewed

@@ -7,20 +7,14 @@ __author__ = "Patrick Kunzmann"
 __all__ = ["mass"]
 import json
-from os.path import join, dirname, realpath
-import msgpack
+from pathlib import Path
 from ..atoms import Atom, AtomArray, AtomArrayStack
+from .ccd import get_from_ccd
-_info_dir = dirname(realpath(__file__))
 # Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
-with open(join(_info_dir, "atom_masses.json"), "r") as file:
-    _atom_masses = json.load(file)
-# Masses are taken from
-# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
-# (2019/01/27)
-with open(join(_info_dir, "residue_masses.msgpack"), "rb") as file:
-    _res_masses = msgpack.load(file, raw=False)
+ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
+_atom_masses = None
 def mass(item, is_residue=None):
@@ -34,7 +28,7 @@ def mass(item, is_residue=None):
     from the molecule.
     For example non-terminal residues in a protein or nucleotide chain
     miss the mass of a water molecule.
     Parameters
     ----------
     item : str or Atom or AtomArray or AtomArrayStack
@@ -50,17 +44,17 @@ def mass(item, is_residue=None):
         If set to false, the string is strictly interpreted as element.
         By default the string will be interpreted as element at first
         and secondly as residue name, if the element is unknown.
     Returns
     -------
     mass : float or None
         The mass of the given object in *u*. None if the mass is unknown.
     References
     ----------
     .. footbibliography::
     Examples
     --------
@@ -94,29 +88,36 @@ def mass(item, is_residue=None):
     >>> print(mass("N"))
     14.007
     """
+    global _atom_masses
+    with open(ATOM_MASSES_FILE, "r") as file:
+        _atom_masses = json.load(file)
     if isinstance(item, str):
         if is_residue is None:
             result_mass = _atom_masses.get(item.upper())
             if result_mass is None:
-                result_mass = _res_masses.get(item.upper())
+                result_mass = get_from_ccd(
+                    "chem_comp", item.upper(), "formula_weight"
+                ).item()
         elif not is_residue:
             result_mass = _atom_masses.get(item.upper())
         else:
-            result_mass = _res_masses.get(item.upper())
+            result_mass = get_from_ccd(
+                "chem_comp", item.upper(), "formula_weight"
+            ).item()
     elif isinstance(item, Atom):
         result_mass = mass(item.element, is_residue=False)
     elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
         result_mass = sum(
             (mass(element, is_residue=False) for element in item.element)
         )
     else:
         raise TypeError(
             f"Cannot calculate mass for {type(item).__name__} objects"
         )
     if result_mass is None:
         raise KeyError(f"{item} is not known")
     return result_mass

biotite/structure/info/misc.py CHANGED Viewed

@@ -4,39 +4,28 @@
 __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
-__all__ = ["all_residues", "full_name", "link_type"]
+__all__ = ["all_residues", "full_name", "link_type", "one_letter_code"]
-from os.path import join, dirname, realpath
-import msgpack
-_info_dir = dirname(realpath(__file__))
-# Data is taken from
-# ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
-# (2019/01/27)
-with open(join(_info_dir, "residue_names.msgpack"), "rb") as file:
-    _res_names = msgpack.load(file, raw=False)
-with open(join(_info_dir, "link_types.msgpack"), "rb") as file:
-    _link_types = msgpack.load(file, raw=False)
+from .ccd import get_ccd, get_from_ccd
 def all_residues():
     """
     Get a list of all residues/compound names in the
     PDB chemical components dictionary.
     Returns
     -------
     residues : list of str
         A list of all available The up to 3-letter residue names.
     Examples
     --------
     >>> print(all_residues()[1000 : 1010])
-    ['0Y4', '0Y5', '0Y7', '0Y8', '0Y9', '0YA', '0YB', '0YC', '0YD', '0YE']
+    ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
     """
-    return list(_res_names.keys())
+    return get_ccd()["chem_comp"]["id"].as_array().tolist()
 def full_name(res_name):
@@ -48,19 +37,24 @@ def full_name(res_name):
     ----------
     res_name : str
         The up to 3-letter residue name.
     Returns
     -------
-    name : str
+    name : str or None
         The full name of the residue.
+        If the residue is unknown to the chemical components dictionary,
+        ``None`` is returned.
     Examples
     --------
     >>> print(full_name("MAN"))
     alpha-D-mannopyranose
     """
-    return _res_names.get(res_name.upper())
+    array = get_from_ccd("chem_comp", res_name.upper(), "name")
+    if array is None:
+        return None
+    return array.item()
 def link_type(res_name):
@@ -72,12 +66,14 @@ def link_type(res_name):
     ----------
     res_name : str
         The up to 3-letter residue name.
     Returns
     -------
-    link_type : str
+    link_type : str or None
         The link type.
+        If the residue is unknown to the chemical components dictionary,
+        ``None`` is returned.
     Examples
     --------
@@ -88,4 +84,61 @@ def link_type(res_name):
     >>> print(link_type("HOH"))
     NON-POLYMER
     """
-    return _link_types.get(res_name.upper())
+    array = get_from_ccd("chem_comp", res_name.upper(), "type")
+    if array is None:
+        return None
+    return array.item()
+def one_letter_code(res_name):
+    """
+    Get the one-letter code of a residue/compound,
+    based on the PDB chemical components dictionary.
+    The one-letter code is only defined for amino acids and nucleotides
+    and for compounds that are structurally similar to them.
+    Parameters
+    ----------
+    res_name : str
+        The up to 3-letter residue name.
+    Returns
+    -------
+    one_letter_code : str or None
+        The one-letter code.
+        None if the compound is not present in the CCD or if no
+        one-letter code is defined for this compound.
+    Examples
+    --------
+    Get the one letter code for an amino acid (or a nucleotide).
+    >>> print(full_name("ALA"))
+    ALANINE
+    >>> print(one_letter_code("ALA"))
+    A
+    For similar compounds, the one-letter code is also defined.
+    >>> print(full_name("DAL"))
+    D-ALANINE
+    >>> print(one_letter_code("DAL"))
+    A
+    For other compounds, the one-letter code is not defined.
+    >>> print(full_name("MAN"))
+    alpha-D-mannopyranose
+    >>> print(one_letter_code("MAN"))
+    None
+    """
+    array = get_from_ccd("chem_comp", res_name.upper(), "one_letter_code")
+    if array is None:
+        return None
+    item = array.item()
+    if item == "":
+        return None
+    return item

biotite/structure/info/standardize.py CHANGED Viewed

@@ -6,15 +6,13 @@ __name__ = "biotite.structure.info"
 __author__ = "Patrick Kunzmann"
 __all__ = ["standardize_order"]
+import warnings
 import numpy as np
-from .atoms import residue
+from .ccd import get_from_ccd
 from ..residues import get_residue_starts
 from ..error import BadStructureError
-_atom_name_cache = {}
 def standardize_order(atoms):
     """
     Get an index array for an input :class:`AtomArray` or
@@ -34,20 +32,20 @@ def standardize_order(atoms):
     atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
         Input structure with atoms that are potentially not in the
         *standard* order.
     Returns
     -------
     indices : ndarray, dtype=int, shape=(n,)
         When this index array is applied on the input `atoms`,
         the atoms for each residue are reordered to obtain the
         standard *RCSB PDB* atom order.
     Raises
     ------
     BadStructureError
         If the input `atoms` have duplicate atoms (same atom name)
         within a residue.
     Examples
     --------
@@ -123,11 +121,18 @@ def standardize_order(atoms):
         stop = starts[i+1]
         res_name = atoms.res_name[start]
-        standard_atom_names = _atom_name_cache.get(res_name)
+        standard_atom_names = get_from_ccd(
+            "chem_comp_atom", res_name, "atom_id"
+        )
         if standard_atom_names is None:
-            standard_atom_names = residue(res_name).atom_name
-            _atom_name_cache[res_name] = standard_atom_names
+            # If the residue is not in the CCD, keep the current order
+            warnings.warn(
+                f"Residue '{res_name}' is not in the CCD, "
+                f"keeping current atom order"
+            )
+            reordered_indices[start : stop] = np.arange(start, stop)
+            continue
         reordered_indices[start : stop] = _reorder(
             atoms.atom_name[start : stop], standard_atom_names
         ) + start
@@ -152,7 +157,7 @@ def _reorder(origin, target):
         The atom names to reorder.
     target : ndarray, dtype=str
         The atom names in target order.
     Returns
     -------
     indices : ndarray, dtype=int

biotite/structure/integrity.py CHANGED Viewed

@@ -12,7 +12,7 @@ __author__ = "Patrick Kunzmann, Daniel Bauer"
 __all__ = ["check_id_continuity", "check_atom_id_continuity",
            "check_res_id_continuity", "check_backbone_continuity",
            "check_duplicate_atoms", "check_bond_continuity",
-           "check_linear_continuity", "renumber_atom_ids", "renumber_res_ids"]
+           "check_linear_continuity"]
 import numpy as np
 import warnings
@@ -32,17 +32,17 @@ def check_id_continuity(array):
     """
     Check if the residue IDs are incremented by more than 1 or
     decremented, from one atom to the next one.
     An increment by more than 1 is as strong clue for missing residues,
     a decrement means probably a start of a new chain.
     DEPRECATED: Use :func:`check_res_id_continuity()` instead.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     discontinuity : ndarray, dtype=int
@@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
     """
     Check if the atom IDs are incremented by more than 1 or
     decremented, from one atom to the next one.
     An increment by more than 1 is as strong clue for missing atoms.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     discontinuity : ndarray, dtype=int
@@ -81,15 +81,15 @@ def check_res_id_continuity(array):
     """
     Check if the residue IDs are incremented by more than 1 or
     decremented, from one atom to the next one.
     An increment by more than 1 is as strong clue for missing residues,
     a decrement means probably a start of a new chain.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     discontinuity : ndarray, dtype=int
@@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
     """
     Check if the (peptide or phosphate) backbone atoms have
     non-reasonable distance to the next atom.
     A large or very small distance is a very strong clue, that there is
     no bond between those atoms, therefore the chain is discontinued.
@@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
     """
     Check if a structure contains duplicate atoms, i.e. two atoms in a
     structure have the same annotations (coordinates may be different).
     Duplicate atoms may appear, when a structure has occupancy for an
     atom at two or more positions or when the *altloc* positions are
     improperly read.
     Parameters
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     duplicate : ndarray, dtype=int
@@ -228,16 +228,16 @@ def check_duplicate_atoms(array):
     for i in range(1, array.array_length()):
         # Start with assumption that all atoms in the array
         # until index i are duplicates of the atom at index i
-        is_dublicate = np.full(i, True, dtype=bool)
+        is_duplicate = np.full(i, True, dtype=bool)
         for annot in annots:
             # For each annotation array filter out the atoms until
             # index i that have an unequal annotation
-            # to the atom at index i
-            is_dublicate &= (annot[:i] == annot[i])
+            # to the atom at index i
+            is_duplicate &= (annot[:i] == annot[i])
         # After checking all annotation arrays,
         # if there still is any duplicate to the atom at index i,
         # add i the the list of duplicate atom indices
-        if is_dublicate.any():
+        if is_duplicate.any():
             duplicates.append(i)
     return np.array(duplicates)
@@ -255,7 +255,7 @@ def check_in_box(array):
     ----------
     array : AtomArray or AtomArrayStack
         The array to be checked.
     Returns
     -------
     outside : ndarray, dtype=int
@@ -266,54 +266,3 @@ def check_in_box(array):
     box = array.box
     fractions = coord_to_fraction(array, box)
     return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]
-def renumber_atom_ids(array, start=None):
-    """
-    Renumber the atom IDs of the given array.
-    Parameters
-    ----------
-    array : AtomArray or AtomArrayStack
-        The array to be checked.
-    start : int, optional
-        The starting index for renumbering.
-        The first ID in the array is taken by default.
-    Returns
-    -------
-    array : AtomArray or AtomArrayStack
-        The renumbered array.
-    """
-    if "atom_id" not in array.get_annotation_categories():
-        raise ValueError("The atom array must have the 'atom_id' annotation")
-    if start is None:
-        start = array.atom_id[0]
-    array.atom_id = np.arange(start, array.shape[-1]+1)
-    return array
-def renumber_res_ids(array, start=None):
-    """
-    Renumber the residue IDs of the given array.
-    Parameters
-    ----------
-    array : AtomArray or AtomArrayStack
-        The array to be checked.
-    start : int, optional
-        The starting index for renumbering.
-        The first ID in the array is taken by default.
-    Returns
-    -------
-    array : AtomArray or AtomArrayStack
-        The renumbered array.
-    """
-    if start is None:
-        start = array.res_id[0]
-    diff = np.diff(array.res_id)
-    diff[diff != 0] = 1
-    new_res_ids =  np.concatenate(([start], diff)).cumsum()
-    array.res_id = new_res_ids
-    return array