PyPI - biotite - Versions diffs - 1.0.1__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl - Mend

biotite 1.0.1__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (177) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/dssp/app.py +13 -3
biotite/application/localapp.py +36 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +5 -18
biotite/application/muscle/app5.py +5 -5
biotite/application/sra/app.py +0 -5
biotite/application/util.py +22 -2
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +9 -3
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +8 -9
biotite/database/uniprot/check.py +22 -17
biotite/database/uniprot/download.py +3 -6
biotite/database/uniprot/query.py +4 -5
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +49 -14
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +26 -26
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +19 -2
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +58 -48
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +284 -57
biotite/sequence/align/matrix_data/3Di.mat +24 -0
biotite/sequence/align/matrix_data/PB.license +21 -0
biotite/sequence/align/matrix_data/PB.mat +18 -0
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +5 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
biotite/sequence/graphics/colorschemes.py +44 -11
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/sequence/profile.py +105 -29
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +136 -8
biotite/sequence/sequence.py +1 -2
biotite/setup_ccd.py +197 -0
biotite/structure/__init__.py +6 -3
biotite/structure/alphabet/__init__.py +25 -0
biotite/structure/alphabet/encoder.py +332 -0
biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
biotite/structure/alphabet/i3d.py +109 -0
biotite/structure/alphabet/layers.py +86 -0
biotite/structure/alphabet/pb.license +21 -0
biotite/structure/alphabet/pb.py +170 -0
biotite/structure/alphabet/unkerasify.py +128 -0
biotite/structure/atoms.py +163 -66
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +79 -25
biotite/structure/box.py +19 -21
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +74 -127
biotite/structure/hbond.py +17 -19
biotite/structure/info/__init__.py +1 -0
biotite/structure/info/atoms.py +24 -15
biotite/structure/info/bonds.py +12 -6
biotite/structure/info/ccd.py +125 -34
biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
biotite/structure/info/groups.py +62 -19
biotite/structure/info/masses.py +9 -6
biotite/structure/info/misc.py +15 -22
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +4 -4
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +80 -53
biotite/structure/io/pdb/convert.py +4 -3
biotite/structure/io/pdb/file.py +85 -25
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +36 -36
biotite/structure/io/pdbx/__init__.py +1 -0
biotite/structure/io/pdbx/bcif.py +54 -15
biotite/structure/io/pdbx/cif.py +92 -66
biotite/structure/io/pdbx/component.py +15 -4
biotite/structure/io/pdbx/compress.py +321 -0
biotite/structure/io/pdbx/convert.py +410 -75
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbx/encoding.pyx +98 -17
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +141 -156
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +69 -11
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +76 -4
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/METADATA +6 -2
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/RECORD +173 -143
biotite/structure/info/ccd/README.rst +0 -8
biotite/structure/info/ccd/amino_acids.txt +0 -1663
biotite/structure/info/ccd/carbohydrates.txt +0 -1135
biotite/structure/info/ccd/nucleotides.txt +0 -798
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.0.1.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/seqtypes.py CHANGED Viewed

@@ -4,10 +4,22 @@
 __name__ = "biotite.sequence"
 __author__ = "Patrick Kunzmann", "Thomas Nevolianis"
-__all__ = ["GeneralSequence", "NucleotideSequence", "ProteinSequence"]
+__all__ = [
+    "GeneralSequence",
+    "NucleotideSequence",
+    "ProteinSequence",
+    "PositionalSequence",
+    "PurePositionalSequence",
+]
+from dataclasses import dataclass, field
 import numpy as np
-from biotite.sequence.alphabet import AlphabetError, AlphabetMapper, LetterAlphabet
+from biotite.sequence.alphabet import (
+    Alphabet,
+    AlphabetError,
+    AlphabetMapper,
+    LetterAlphabet,
+)
 from biotite.sequence.sequence import Sequence
@@ -188,7 +200,6 @@ class NucleotideSequence(Sequence):
         TGCGAA
         >>> print(dna_seq.reverse().complement())
         AAGCGT
         """
         # Interpreting the sequence code of this object in the
         # complementary alphabet gives the complementary symbols
@@ -214,7 +225,7 @@ class NucleotideSequence(Sequence):
         complete : bool, optional
             If true, the complete sequence is translated. In this case
             the sequence length must be a multiple of 3.
-            Otherwise all ORFs are translated. (Default: False)
+            Otherwise all ORFs are translated.
         codon_table : CodonTable, optional
             The codon table to be used. By default the default table
             will be used
@@ -224,7 +235,6 @@ class NucleotideSequence(Sequence):
             even if the start codon codes for another amino acid.
             Otherwise the translation starts with the amino acid
             the codon codes for. Only applies, if `complete` is false.
-            (Default: False)
         Returns
         -------
@@ -254,7 +264,6 @@ class NucleotideSequence(Sequence):
         ...    print(seq)
         MML*
         ML*
         """
         if self._alphabet != NucleotideSequence.alphabet_unamb:
             raise AlphabetError("Translation requires unambiguous alphabet")
@@ -574,6 +583,11 @@ class ProteinSequence(Sequence):
         in the protein and the average isotopic mass of one water
         molecule.
+        Parameters
+        ----------
+        monoisotopic : bool
+            Use the mass of the most common isotope.
         Returns
         -------
         weight : float
@@ -587,6 +601,120 @@ class ProteinSequence(Sequence):
         if np.isnan(weight):
             raise ValueError(
-                "Sequence contains ambiguous amino acids, " "cannot calculate weight"
+                "Sequence contains ambiguous amino acids, cannot calculate weight"
             )
         return weight
+class PositionalSequence(Sequence):
+    """
+    A sequence where each symbol is associated with a position.
+    For each individual position the sequence contains a separate
+    :class:`PositionalSequence.Symbol`, encoded by a custom alphabet for this sequence.
+    In consequence the symbol code is the position in the sequence itself.
+    This is useful for aligning sequences based on a position-specific
+    substitution matrix.
+    Parameters
+    ----------
+    original_sequence : seq.Sequence
+        The original sequence to create the positional sequence from.
+    """
+    @dataclass(frozen=True)
+    class Symbol:
+        """
+        Combination of a symbol and its position in a sequence.
+        Attributes
+        ----------
+        original_alphabet : Alphabet
+            The original alphabet, where the symbol stems from.
+        original_code : int
+            The code of the original symbol in the original alphabet.
+        position : int
+            The 0-based position of the symbol in the sequence.
+        symbol : object
+            The symbol from the original alphabet.
+        See Also
+        --------
+        PositionalSequence
+            The sequence type containing :class:`PositionalSymbol` objects.
+        """
+        original_alphabet: ...
+        original_code: ...
+        position: ...
+        symbol: ... = field(init=False)
+        def __post_init__(self):
+            sym = self.original_alphabet.decode(self.original_code)
+            super().__setattr__("symbol", sym)
+        def __str__(self):
+            return str(self.symbol)
+    def __init__(self, original_sequence):
+        self._orig_alphabet = original_sequence.get_alphabet()
+        self._alphabet = Alphabet(
+            [
+                PositionalSequence.Symbol(self._orig_alphabet, code, pos)
+                for pos, code in enumerate(original_sequence.code)
+            ]
+        )
+        self.code = np.arange(
+            len(original_sequence), dtype=Sequence.dtype(len(self._alphabet))
+        )
+    def reconstruct(self):
+        """
+        Reconstruct the original sequence from the positional sequence.
+        Returns
+        -------
+        original_sequence : GeneralSequence
+            The original sequence.
+            Although the actual type of the returned sequence is always a
+            :class:`GeneralSequence`, the alphabet and the symbols of the returned
+            sequence are equal to the original sequence.
+        """
+        original_sequence = GeneralSequence(self._orig_alphabet)
+        original_sequence.code = np.array([sym.original_code for sym in self._alphabet])
+        return original_sequence
+    def get_alphabet(self):
+        return self._alphabet
+    def __str__(self) -> str:
+        return "".join([str(sym) for sym in self.symbols])
+    def __repr__(self):
+        return f"PositionalSequence({self.reconstruct()!r})"
+class PurePositionalSequence(Sequence):
+    """
+    An object of this class is a 'placeholder' sequence, where each symbol is the
+    position in the sequence itself.
+    This class is similar to :class:`PositionalSequence`, but the symbols are not
+    derived from an original sequence, but are the pure position.
+    Hence, there is no meaningful string representation of the sequence and its symbols.
+    Parameters
+    ----------
+    length : int
+        The length of the sequence.
+    """
+    def __init__(self, length):
+        self._alphabet = Alphabet(range(length))
+        self.code = np.arange(length, dtype=Sequence.dtype(length))
+    def get_alphabet(self):
+        return self._alphabet
+    def __repr__(self):
+        return f"PurePositionalSequence({len(self)})"

biotite/sequence/sequence.py CHANGED Viewed

@@ -139,7 +139,6 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
     >>> dna_seq_concat = dna_seq + dna_seq_rev
     >>> print(dna_seq_concat)
     ACGTAATGCA
     """
     def __init__(self, sequence=()):
@@ -354,7 +353,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        alpahabet_size : int
+        alphabet_size : int
             The size of the alphabet.
         Returns

biotite/setup_ccd.py ADDED Viewed

@@ -0,0 +1,197 @@
+__author__ = "Patrick Kunzmann"
+__all__ = []
+import gzip
+import logging
+from collections import defaultdict
+from io import StringIO
+from pathlib import Path
+import numpy as np
+import requests
+from biotite.structure.io.pdbx import *
+OUTPUT_CCD = Path(__file__).parent / "structure" / "info" / "components.bcif"
+CCD_URL = "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
+def concatenate_ccd(categories=None):
+    """
+    Create the CCD in BinaryCIF format with each category contains the
+    data of all blocks.
+    Parameters
+    ----------
+    categories : list of str, optional
+        The names of the categories to include.
+        By default, all categories from the CCD are included.
+    Returns
+    -------
+    compressed_file : BinaryCIFFile
+        The compressed CCD in BinaryCIF format.
+    """
+    logging.info("Download and read CCD...")
+    ccd_cif_text = gzip.decompress(requests.get(CCD_URL).content).decode()
+    ccd_file = CIFFile.read(StringIO(ccd_cif_text))
+    compressed_block = BinaryCIFBlock()
+    if categories is None:
+        categories = _list_all_category_names(ccd_file)
+    for category_name in categories:
+        logging.info(f"Concatenate and compress '{category_name}' category...")
+        compressed_block[category_name] = compress(
+            _concatenate_blocks_into_category(ccd_file, category_name)
+        )
+    logging.info("Write concatenated CCD into BinaryCIF...")
+    compressed_file = BinaryCIFFile()
+    compressed_file["components"] = compressed_block
+    return compressed_file
+def _concatenate_blocks_into_category(pdbx_file, category_name):
+    """
+    Concatenate the given category from all blocks into a single
+    category.
+    Parameters
+    ----------
+    pdbx_file : PDBxFile
+        The PDBx file, whose blocks should be concatenated.
+    category_name : str
+        The name of the category to concatenate.
+    Returns
+    -------
+    category : BinaryCIFCategory
+        The concatenated category.
+    """
+    columns_names = _list_all_column_names(pdbx_file, category_name)
+    data_chunks = defaultdict(list)
+    mask_chunks = defaultdict(list)
+    for block in pdbx_file.values():
+        if category_name not in block:
+            continue
+        category = block[category_name]
+        for column_name in columns_names:
+            if column_name in category:
+                column = category[column_name]
+                data_chunks[column_name].append(column.data.array)
+                if column.mask is not None:
+                    mask_chunks[column_name].append(column.mask.array)
+                else:
+                    mask_chunks[column_name].append(
+                        np.full(category.row_count, MaskValue.PRESENT, dtype=np.uint8)
+                    )
+            else:
+                # Column is missing in this block
+                # -> handle it as data masked as 'missing'
+                data_chunks[column_name].append(
+                    # For now all arrays are of type string anyway,
+                    # as they are read from a CIF file
+                    np.full(category.row_count, "", dtype="U1")
+                )
+                mask_chunks[column_name].append(
+                    np.full(category.row_count, MaskValue.MISSING, dtype=np.uint8)
+                )
+    bcif_columns = {}
+    for col_name in columns_names:
+        data = np.concatenate(data_chunks[col_name])
+        mask = np.concatenate(mask_chunks[col_name])
+        data = _into_fitting_type(data, mask)
+        if np.all(mask == MaskValue.PRESENT):
+            mask = None
+        bcif_columns[col_name] = BinaryCIFColumn(data, mask)
+    return BinaryCIFCategory(bcif_columns)
+def _list_all_column_names(pdbx_file, category_name):
+    """
+    Get all columns that exist in any block for a given category.
+    Parameters
+    ----------
+    pdbx_file : PDBxFile
+        The PDBx file to search in for the columns.
+    category_name : str
+        The name of the category to search in.
+    Returns
+    -------
+    columns_names : list of str
+        The names of the columns.
+    """
+    columns_names = set()
+    for block in pdbx_file.values():
+        if category_name in block:
+            columns_names.update(block[category_name].keys())
+    return sorted(columns_names)
+def _list_all_category_names(pdbx_file):
+    """
+    Get all categories that exist in any block.
+    Parameters
+    ----------
+    pdbx_file : PDBxFile
+        The PDBx file to search in for the columns.
+    Returns
+    -------
+    columns_names : list of str
+        The names of the columns.
+    """
+    category_names = set()
+    for block in pdbx_file.values():
+        category_names.update(block.keys())
+    return sorted(category_names)
+def _into_fitting_type(string_array, mask):
+    """
+    Try to find a numeric type for a string ndarray, if possible.
+    Parameters
+    ----------
+    string_array : ndarray, dtype=string
+        The array to convert.
+    mask : ndarray, dtype=uint8
+        Only values in `string_array` where the mask is ``MaskValue.PRESENT`` are
+        considered for type conversion.
+    Returns
+    -------
+    array : ndarray
+        The array converted into an appropriate dtype.
+    """
+    mask = mask == MaskValue.PRESENT
+    # Only try to find an appropriate dtype for unmasked values
+    values = string_array[mask]
+    try:
+        # Try to fit into integer type
+        values = values.astype(int)
+    except ValueError:
+        try:
+            # Try to fit into float type
+            values = values.astype(float)
+        except ValueError:
+            # Keep string type
+            pass
+    array = np.zeros(string_array.shape, dtype=values.dtype)
+    array[mask] = values
+    return array
+def main():
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(message)s")
+    OUTPUT_CCD.parent.mkdir(parents=True, exist_ok=True)
+    compressed_ccd = concatenate_ccd(["chem_comp", "chem_comp_atom", "chem_comp_bond"])
+    compressed_ccd.write(OUTPUT_CCD)
+if __name__ == "__main__":
+    main()

biotite/structure/__init__.py CHANGED Viewed

@@ -57,14 +57,15 @@ The annotation arrays can be accessed either via the method
 The following annotation categories are optionally used by some
 functions:
-=========  ===========  =================   ============================
+=========  ===========  =================   =========================================
 Category   Type         Examples            Description
-=========  ===========  =================   ============================
+=========  ===========  =================   =========================================
 atom_id    int          1,2,3, ...          Atom serial number
 b_factor   float        0.9, 12.3, ...      Temperature factor
 occupancy  float        .1, .3, .9, ...     Occupancy
 charge     int          -2,-1,0,1,2, ...    Electric charge of the atom
-=========  ===========  =================   ============================
+sym_id     string       '1','2','3', ...    Symmetry ID for assemblies/symmetry mates
+=========  ===========  =================   =========================================
 For each type, the attributes can be accessed directly.
 Both :class:`AtomArray` and :class:`AtomArrayStack` support
@@ -124,9 +125,11 @@ from .pseudoknots import *
 from .rdf import *
 from .repair import *
 from .residues import *
+from .rings import *
 from .sasa import *
 from .sequence import *
 from .sse import *
 from .superimpose import *
+from .tm import *
 from .transform import *
 # util and segments are used internally

biotite/structure/alphabet/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+A subpackage for converting structures to structural alphabet sequences.
+Structural alphabets represent the local geometry of each residue in a structure as
+symbol in a sequence.
+This allows using sequence-based functionality from :mod:`biotite.sequence` on
+structural data.
+For each supported structural alphabet, this subpackage provides a conversion function
+that converts each chain of a given structure into a :class:`Sequence` object from the
+respective structural alphabet.
+Note that the structural alphabets use lower-case letters as symbols, in order to
+distinguish them better from the nucleotide and amino acid alphabets.
+"""
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde, Patrick Kunzmann"
+from .i3d import *
+from .pb import *