PyPI - biotite - Versions diffs - 0.41.1__cp312-cp312-macosx_10_16_arm64.whl - Mend

biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show

biotite/__init__.py +19 -0
biotite/application/__init__.py +43 -0
biotite/application/application.py +265 -0
biotite/application/autodock/__init__.py +12 -0
biotite/application/autodock/app.py +505 -0
biotite/application/blast/__init__.py +14 -0
biotite/application/blast/alignment.py +83 -0
biotite/application/blast/webapp.py +421 -0
biotite/application/clustalo/__init__.py +12 -0
biotite/application/clustalo/app.py +238 -0
biotite/application/dssp/__init__.py +12 -0
biotite/application/dssp/app.py +152 -0
biotite/application/localapp.py +306 -0
biotite/application/mafft/__init__.py +12 -0
biotite/application/mafft/app.py +122 -0
biotite/application/msaapp.py +374 -0
biotite/application/muscle/__init__.py +13 -0
biotite/application/muscle/app3.py +254 -0
biotite/application/muscle/app5.py +171 -0
biotite/application/sra/__init__.py +18 -0
biotite/application/sra/app.py +456 -0
biotite/application/tantan/__init__.py +12 -0
biotite/application/tantan/app.py +222 -0
biotite/application/util.py +59 -0
biotite/application/viennarna/__init__.py +18 -0
biotite/application/viennarna/rnaalifold.py +304 -0
biotite/application/viennarna/rnafold.py +269 -0
biotite/application/viennarna/rnaplot.py +187 -0
biotite/application/viennarna/util.py +72 -0
biotite/application/webapp.py +77 -0
biotite/copyable.py +71 -0
biotite/database/__init__.py +23 -0
biotite/database/entrez/__init__.py +15 -0
biotite/database/entrez/check.py +61 -0
biotite/database/entrez/dbnames.py +89 -0
biotite/database/entrez/download.py +223 -0
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +223 -0
biotite/database/error.py +15 -0
biotite/database/pubchem/__init__.py +21 -0
biotite/database/pubchem/download.py +260 -0
biotite/database/pubchem/error.py +20 -0
biotite/database/pubchem/query.py +827 -0
biotite/database/pubchem/throttle.py +99 -0
biotite/database/rcsb/__init__.py +13 -0
biotite/database/rcsb/download.py +167 -0
biotite/database/rcsb/query.py +959 -0
biotite/database/uniprot/__init__.py +13 -0
biotite/database/uniprot/check.py +32 -0
biotite/database/uniprot/download.py +134 -0
biotite/database/uniprot/query.py +209 -0
biotite/file.py +251 -0
biotite/sequence/__init__.py +73 -0
biotite/sequence/align/__init__.py +49 -0
biotite/sequence/align/alignment.py +658 -0
biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
biotite/sequence/align/banded.pyx +652 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +434 -0
biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +574 -0
biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.pyx +233 -0
biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3400 -0
biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +892 -0
biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +279 -0
biotite/sequence/align/matrix.py +405 -0
biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
biotite/sequence/align/matrix_data/GONNET.mat +26 -0
biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
biotite/sequence/align/matrix_data/MATCH.mat +25 -0
biotite/sequence/align/matrix_data/NUC.mat +25 -0
biotite/sequence/align/matrix_data/PAM10.mat +34 -0
biotite/sequence/align/matrix_data/PAM100.mat +34 -0
biotite/sequence/align/matrix_data/PAM110.mat +34 -0
biotite/sequence/align/matrix_data/PAM120.mat +34 -0
biotite/sequence/align/matrix_data/PAM130.mat +34 -0
biotite/sequence/align/matrix_data/PAM140.mat +34 -0
biotite/sequence/align/matrix_data/PAM150.mat +34 -0
biotite/sequence/align/matrix_data/PAM160.mat +34 -0
biotite/sequence/align/matrix_data/PAM170.mat +34 -0
biotite/sequence/align/matrix_data/PAM180.mat +34 -0
biotite/sequence/align/matrix_data/PAM190.mat +34 -0
biotite/sequence/align/matrix_data/PAM20.mat +34 -0
biotite/sequence/align/matrix_data/PAM200.mat +34 -0
biotite/sequence/align/matrix_data/PAM210.mat +34 -0
biotite/sequence/align/matrix_data/PAM220.mat +34 -0
biotite/sequence/align/matrix_data/PAM230.mat +34 -0
biotite/sequence/align/matrix_data/PAM240.mat +34 -0
biotite/sequence/align/matrix_data/PAM250.mat +34 -0
biotite/sequence/align/matrix_data/PAM260.mat +34 -0
biotite/sequence/align/matrix_data/PAM270.mat +34 -0
biotite/sequence/align/matrix_data/PAM280.mat +34 -0
biotite/sequence/align/matrix_data/PAM290.mat +34 -0
biotite/sequence/align/matrix_data/PAM30.mat +34 -0
biotite/sequence/align/matrix_data/PAM300.mat +34 -0
biotite/sequence/align/matrix_data/PAM310.mat +34 -0
biotite/sequence/align/matrix_data/PAM320.mat +34 -0
biotite/sequence/align/matrix_data/PAM330.mat +34 -0
biotite/sequence/align/matrix_data/PAM340.mat +34 -0
biotite/sequence/align/matrix_data/PAM350.mat +34 -0
biotite/sequence/align/matrix_data/PAM360.mat +34 -0
biotite/sequence/align/matrix_data/PAM370.mat +34 -0
biotite/sequence/align/matrix_data/PAM380.mat +34 -0
biotite/sequence/align/matrix_data/PAM390.mat +34 -0
biotite/sequence/align/matrix_data/PAM40.mat +34 -0
biotite/sequence/align/matrix_data/PAM400.mat +34 -0
biotite/sequence/align/matrix_data/PAM410.mat +34 -0
biotite/sequence/align/matrix_data/PAM420.mat +34 -0
biotite/sequence/align/matrix_data/PAM430.mat +34 -0
biotite/sequence/align/matrix_data/PAM440.mat +34 -0
biotite/sequence/align/matrix_data/PAM450.mat +34 -0
biotite/sequence/align/matrix_data/PAM460.mat +34 -0
biotite/sequence/align/matrix_data/PAM470.mat +34 -0
biotite/sequence/align/matrix_data/PAM480.mat +34 -0
biotite/sequence/align/matrix_data/PAM490.mat +34 -0
biotite/sequence/align/matrix_data/PAM50.mat +34 -0
biotite/sequence/align/matrix_data/PAM500.mat +34 -0
biotite/sequence/align/matrix_data/PAM60.mat +34 -0
biotite/sequence/align/matrix_data/PAM70.mat +34 -0
biotite/sequence/align/matrix_data/PAM80.mat +34 -0
biotite/sequence/align/matrix_data/PAM90.mat +34 -0
biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +620 -0
biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +587 -0
biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +305 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
biotite/sequence/align/selector.pyx +956 -0
biotite/sequence/align/statistics.py +265 -0
biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
biotite/sequence/align/tracetable.pxd +64 -0
biotite/sequence/align/tracetable.pyx +370 -0
biotite/sequence/alphabet.py +566 -0
biotite/sequence/annotation.py +829 -0
biotite/sequence/codec.cpython-312-darwin.so +0 -0
biotite/sequence/codec.pyx +155 -0
biotite/sequence/codon.py +466 -0
biotite/sequence/codon_tables.txt +202 -0
biotite/sequence/graphics/__init__.py +33 -0
biotite/sequence/graphics/alignment.py +1034 -0
biotite/sequence/graphics/color_schemes/autumn.json +51 -0
biotite/sequence/graphics/color_schemes/blossom.json +51 -0
biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
biotite/sequence/graphics/color_schemes/flower.json +51 -0
biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
biotite/sequence/graphics/color_schemes/ocean.json +51 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
biotite/sequence/graphics/color_schemes/spring.json +51 -0
biotite/sequence/graphics/color_schemes/sunset.json +51 -0
biotite/sequence/graphics/color_schemes/wither.json +51 -0
biotite/sequence/graphics/colorschemes.py +139 -0
biotite/sequence/graphics/dendrogram.py +184 -0
biotite/sequence/graphics/features.py +510 -0
biotite/sequence/graphics/logo.py +110 -0
biotite/sequence/graphics/plasmid.py +661 -0
biotite/sequence/io/__init__.py +12 -0
biotite/sequence/io/fasta/__init__.py +22 -0
biotite/sequence/io/fasta/convert.py +273 -0
biotite/sequence/io/fasta/file.py +278 -0
biotite/sequence/io/fastq/__init__.py +19 -0
biotite/sequence/io/fastq/convert.py +120 -0
biotite/sequence/io/fastq/file.py +551 -0
biotite/sequence/io/genbank/__init__.py +17 -0
biotite/sequence/io/genbank/annotation.py +277 -0
biotite/sequence/io/genbank/file.py +575 -0
biotite/sequence/io/genbank/metadata.py +324 -0
biotite/sequence/io/genbank/sequence.py +172 -0
biotite/sequence/io/general.py +192 -0
biotite/sequence/io/gff/__init__.py +26 -0
biotite/sequence/io/gff/convert.py +133 -0
biotite/sequence/io/gff/file.py +434 -0
biotite/sequence/phylo/__init__.py +36 -0
biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/nj.pyx +221 -0
biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/tree.pyx +1169 -0
biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
biotite/sequence/phylo/upgma.pyx +164 -0
biotite/sequence/profile.py +456 -0
biotite/sequence/search.py +116 -0
biotite/sequence/seqtypes.py +556 -0
biotite/sequence/sequence.py +374 -0
biotite/structure/__init__.py +132 -0
biotite/structure/atoms.py +1455 -0
biotite/structure/basepairs.py +1415 -0
biotite/structure/bonds.cpython-312-darwin.so +0 -0
biotite/structure/bonds.pyx +1933 -0
biotite/structure/box.py +592 -0
biotite/structure/celllist.cpython-312-darwin.so +0 -0
biotite/structure/celllist.pyx +849 -0
biotite/structure/chains.py +298 -0
biotite/structure/charges.cpython-312-darwin.so +0 -0
biotite/structure/charges.pyx +520 -0
biotite/structure/compare.py +274 -0
biotite/structure/density.py +114 -0
biotite/structure/dotbracket.py +216 -0
biotite/structure/error.py +31 -0
biotite/structure/filter.py +585 -0
biotite/structure/geometry.py +697 -0
biotite/structure/graphics/__init__.py +13 -0
biotite/structure/graphics/atoms.py +226 -0
biotite/structure/graphics/rna.py +282 -0
biotite/structure/hbond.py +409 -0
biotite/structure/info/__init__.py +25 -0
biotite/structure/info/atom_masses.json +121 -0
biotite/structure/info/atoms.py +82 -0
biotite/structure/info/bonds.py +145 -0
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +123 -0
biotite/structure/info/misc.py +144 -0
biotite/structure/info/radii.py +197 -0
biotite/structure/info/standardize.py +196 -0
biotite/structure/integrity.py +268 -0
biotite/structure/io/__init__.py +30 -0
biotite/structure/io/ctab.py +72 -0
biotite/structure/io/dcd/__init__.py +13 -0
biotite/structure/io/dcd/file.py +65 -0
biotite/structure/io/general.py +257 -0
biotite/structure/io/gro/__init__.py +14 -0
biotite/structure/io/gro/file.py +343 -0
biotite/structure/io/mmtf/__init__.py +21 -0
biotite/structure/io/mmtf/assembly.py +214 -0
biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertarray.pyx +341 -0
biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.pyx +501 -0
biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/decode.pyx +152 -0
biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
biotite/structure/io/mmtf/encode.pyx +183 -0
biotite/structure/io/mmtf/file.py +233 -0
biotite/structure/io/mol/__init__.py +20 -0
biotite/structure/io/mol/convert.py +115 -0
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/mol.py +193 -0
biotite/structure/io/mol/sdf.py +916 -0
biotite/structure/io/netcdf/__init__.py +13 -0
biotite/structure/io/netcdf/file.py +63 -0
biotite/structure/io/npz/__init__.py +20 -0
biotite/structure/io/npz/file.py +152 -0
biotite/structure/io/pdb/__init__.py +20 -0
biotite/structure/io/pdb/convert.py +293 -0
biotite/structure/io/pdb/file.py +1240 -0
biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
biotite/structure/io/pdb/hybrid36.pyx +242 -0
biotite/structure/io/pdbqt/__init__.py +15 -0
biotite/structure/io/pdbqt/convert.py +107 -0
biotite/structure/io/pdbqt/file.py +640 -0
biotite/structure/io/pdbx/__init__.py +23 -0
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +1597 -0
biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +950 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/io/tng/__init__.py +13 -0
biotite/structure/io/tng/file.py +46 -0
biotite/structure/io/trajfile.py +710 -0
biotite/structure/io/trr/__init__.py +13 -0
biotite/structure/io/trr/file.py +46 -0
biotite/structure/io/xtc/__init__.py +13 -0
biotite/structure/io/xtc/file.py +46 -0
biotite/structure/mechanics.py +75 -0
biotite/structure/molecules.py +353 -0
biotite/structure/pseudoknots.py +642 -0
biotite/structure/rdf.py +243 -0
biotite/structure/repair.py +253 -0
biotite/structure/residues.py +562 -0
biotite/structure/resutil.py +178 -0
biotite/structure/sasa.cpython-312-darwin.so +0 -0
biotite/structure/sasa.pyx +322 -0
biotite/structure/sequence.py +112 -0
biotite/structure/sse.py +327 -0
biotite/structure/superimpose.py +727 -0
biotite/structure/transform.py +504 -0
biotite/structure/util.py +98 -0
biotite/temp.py +86 -0
biotite/version.py +16 -0
biotite/visualize.py +251 -0
biotite-0.41.1.dist-info/METADATA +187 -0
biotite-0.41.1.dist-info/RECORD +340 -0
biotite-0.41.1.dist-info/WHEEL +4 -0
biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0

biotite/application/tantan/app.py ADDED Viewed

@@ -0,0 +1,222 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.application.tantan"
+__author__ = "Patrick Kunzmann"
+__all__ = ["TantanApp"]
+from collections.abc import Sequence as SequenceABC
+import io
+from tempfile import NamedTemporaryFile
+import numpy as np
+from ..localapp import LocalApp, cleanup_tempfile
+from ..application import AppState, requires_state
+from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
+from ...sequence.alphabet import common_alphabet
+from ...sequence.io.fasta.file import FastaFile
+from ..util import map_sequence, map_matrix
+MASKING_LETTER = "!"
+class TantanApp(LocalApp):
+    r"""
+    Mask sequence repeat regions using *tantan*. :footcite:`Frith2011`
+    Parameters
+    ----------
+    sequence : (list of) NucleotideSequence or ProteinSequence
+        The sequence(s) to be masked.
+        Either a single sequence or multiple sequences can be masked.
+        Masking multiple sequences in a single run decreases the
+        run time compared to multiple runs with a single sequence.
+        All sequences must be of the same type.
+    matrix : SubstitutionMatrix, optional
+        The substitution matrix to use for repeat identification.
+        A sequence segment is considered to be a repeat of another
+        segment, if the substitution score between these segments is
+        greater than a threshold value.
+    bin_path : str, optional
+        Path of the *tantan* binary.
+    References
+    ----------
+    .. footbibliography::
+    Examples
+    --------
+    >>> sequence = NucleotideSequence("GGCATCGATATATATATATAGTCAA")
+    >>> app = TantanApp(sequence)
+    >>> app.start()
+    >>> app.join()
+    >>> repeat_mask = app.get_mask()
+    >>> print(repeat_mask)
+    [False False False False False False False False False  True  True  True
+      True  True  True  True  True  True  True  True False False False False
+     False]
+    >>> print(sequence, "\n" + "".join(["^" if e else " " for e in repeat_mask]))
+    GGCATCGATATATATATATAGTCAA
+             ^^^^^^^^^^^
+    """
+    def __init__(self, sequence, matrix=None, bin_path="tantan"):
+        super().__init__(bin_path)
+        if isinstance(sequence, SequenceABC):
+            self._as_list = True
+            self._sequences = sequence
+        else:
+            # Convert to list of sequences anyway for consistent handling
+            self._as_list = False
+            self._sequences = [sequence]
+        self._is_protein = None
+        for seq in self._sequences:
+            if isinstance(seq, NucleotideSequence):
+                if self._is_protein is True:
+                    # Already protein sequences in the list
+                    raise ValueError(
+                        "List of sequences contains mixed "
+                        "nucleotide and protein sequences"
+                    )
+                self._is_protein = False
+            elif isinstance(seq, ProteinSequence):
+                if self._is_protein is False:
+                    # Already nucleotide sequences in the list
+                    raise ValueError(
+                        "List of sequences contains mixed "
+                        "nucleotide and protein sequences"
+                    )
+                self._is_protein = True
+            else:
+                raise TypeError(
+                    "A NucleotideSequence or ProteinSequence is required"
+                )
+        if matrix is None:
+            self._matrix_file = None
+        else:
+            common_alph = common_alphabet(
+                (seq.alphabet for seq in self._sequences)
+            )
+            if common_alph is None:
+                raise ValueError(
+                    "There is no common alphabet within the sequences"
+                )
+            if not matrix.get_alphabet1().extends(common_alph):
+                raise ValueError(
+                    "The alphabet of the sequence(s) do not fit the matrix"
+                )
+            if not matrix.is_symmetric():
+                raise ValueError("A symmetric matrix is required")
+            self._matrix_file = NamedTemporaryFile(
+                "w", suffix=".mat", delete=False
+            )
+        self._matrix = matrix
+        self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
+    def run(self):
+        FastaFile.write_iter(
+            self._in_file,
+            (
+                (f"sequence_{i:d}", str(seq))
+                for i, seq in enumerate(self._sequences)
+            )
+        )
+        self._in_file.flush()
+        if self._matrix is not None:
+            self._matrix_file.write(str(self._matrix))
+            self._matrix_file.flush()
+        args = []
+        if self._matrix is not None:
+            args += ["-m", self._matrix_file.name]
+        if self._is_protein:
+             args += ["-p"]
+        args += [
+            "-x", MASKING_LETTER,
+            self._in_file.name
+        ]
+        self.set_arguments(args)
+        super().run()
+    def evaluate(self):
+        super().evaluate()
+        out_file = io.StringIO(self.get_stdout())
+        self._masks = []
+        encoded_masking_letter = MASKING_LETTER.encode("ASCII")[0]
+        for _, masked_seq_string in FastaFile.read_iter(out_file):
+            array = np.frombuffer(
+                masked_seq_string.encode("ASCII"), dtype=np.ubyte
+            )
+            self._masks.append(array == encoded_masking_letter)
+    def clean_up(self):
+        super().clean_up()
+        cleanup_tempfile(self._in_file)
+        if self._matrix_file is not None:
+            cleanup_tempfile(self._matrix_file)
+    @requires_state(AppState.JOINED)
+    def get_mask(self):
+        """
+        Get a boolean mask covering identified repeat regions of each
+        input sequence.
+        Returns
+        -------
+        repeat_mask : (list of) ndarray, shape=(n,), dtype=bool
+            A boolean mask that is true for each sequence position that
+            is identified as repeat.
+            If a list of sequences were given as input, a list of masks
+            is returned instead.
+        """
+        if self._as_list:
+            return self._masks
+        else:
+            return self._masks[0]
+    @staticmethod
+    def mask_repeats(sequence, matrix=None, bin_path="tantan"):
+        """
+        Mask repeat regions of the given input sequence(s).
+        Parameters
+        ----------
+        sequence : (list of) NucleotideSequence or ProteinSequence
+            The sequence(s) to be masked.
+            Either a single sequence or multiple sequences can be masked.
+            Masking multiple sequences in a single run decreases the
+            run time compared to multiple runs with a single sequence.
+            All sequences must be of the same type.
+        matrix : SubstitutionMatrix, optional
+            The substitution matrix to use for repeat identification.
+            A sequence segment is considered to be a repeat of another
+            segment, if the substitution score between these segments is
+            greater than a threshold value.
+        bin_path : str, optional
+            Path of the *tantan* binary.
+        Returns
+        -------
+        repeat_mask : (list of) ndarray, shape=(n,), dtype=bool
+            A boolean mask that is true for each sequence position that
+            is identified as repeat.
+            If a list of sequences were given as input, a list of masks
+            is returned instead.
+        """
+        app = TantanApp(sequence, matrix, bin_path)
+        app.start()
+        app.join()
+        return app.get_mask()

biotite/application/util.py ADDED Viewed

@@ -0,0 +1,59 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.application"
+__author__ = "Patrick Kunzmann"
+__all__ = ["map_sequence", "map_matrix"]
+import numpy as np
+from ..sequence.seqtypes import ProteinSequence
+from ..sequence.align.matrix import SubstitutionMatrix
+def map_sequence(sequence):
+    """
+    Map a sequence with an arbitrary alphabet into a
+    :class:`ProteinSequence`, in order to support arbitrary sequence
+    types in software that can handle protein sequences.
+    """
+    if len(sequence.alphabet) > len(ProteinSequence.alphabet):
+        # Cannot map into a protein sequence if the alphabet
+        # has more symbols
+        raise TypeError(
+            f"The software cannot align sequences of type "
+            f"{type(sequence).__name__}: "
+            f"Alphabet is too large to be converted into amino "
+            f"acid alphabet"
+        )
+    # Mapping is done by simply taking over the sequence
+    # code of the original sequence
+    mapped_sequence = ProteinSequence()
+    mapped_sequence.code = sequence.code
+    return mapped_sequence
+def map_matrix(matrix):
+    """
+    Map a :class:`SubstitutionMatrix` with an arbitrary alphabet into a
+    class:`SubstitutionMatrix` for protein sequences, in order to support
+    arbitrary sequence types in software that can handle protein
+    sequences.
+    """
+    if matrix is None:
+        raise TypeError(
+            "A substitution matrix must be provided for custom "
+            "sequence types"
+        )
+    # Create a protein substitution matrix with the values taken
+    # from the original matrix
+    # All trailing symbols are filled with zeros
+    old_length = len(matrix.get_alphabet1())
+    new_length = len(ProteinSequence.alphabet)
+    new_score_matrix = np.zeros((new_length, new_length))
+    new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
+    return SubstitutionMatrix(
+        ProteinSequence.alphabet, ProteinSequence.alphabet,
+        new_score_matrix
+    )

biotite/application/viennarna/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+A subpackage that provides interfaces to the *ViennaRNA* software
+package.
+Secondary structures can be predicted using *RNAfold* and plotted using
+*RNAplot*.
+"""
+__name__ = "biotite.application.viennarna"
+__author__ = "Tom David Müller"
+from .rnaalifold import *
+from .rnafold import *
+from .rnaplot import *

biotite/application/viennarna/rnaalifold.py ADDED Viewed

@@ -0,0 +1,304 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.application.viennarna"
+__author__ = "Tom David Müller"
+__all__ = ["RNAalifoldApp"]
+import copy
+from tempfile import NamedTemporaryFile
+import numpy as np
+from ..application import AppState, requires_state
+from ..localapp import LocalApp, cleanup_tempfile
+from ...sequence.io.fasta import FastaFile, set_alignment
+from ...structure.dotbracket import base_pairs_from_dot_bracket
+from ...structure.bonds import BondList
+from .util import build_constraint_string
+class RNAalifoldApp(LocalApp):
+    """
+    Predict the consensus secondary structure from a ribonucleic acid alignment
+    using *ViennaRNA's* *RNAalifold* software.
+    In contrast to :class:`RNAfoldApp`, the energy function includes
+    a term that includes coevolution information extracted from an
+    alignment in addition to the physical free energy term.
+    Internally this creates a :class:`Popen` instance, which handles
+    the execution.
+    Parameters
+    ----------
+    alignment : Alignment
+        An alignment of RNA sequences.
+    temperature : int, optional
+        The temperature (°C) to be assumed for the energy parameters.
+    bin_path : str, optional
+        Path of the *RNAalifold* binary.
+    """
+    def __init__(self, alignment, temperature=37, bin_path="RNAalifold"):
+        super().__init__(bin_path)
+        self._alignment = copy.deepcopy(alignment)
+        self._temperature = str(temperature)
+        self._constraints = None
+        self._enforce = None
+        self._in_file = NamedTemporaryFile(
+            "w", suffix=".fa", delete=False
+        )
+        self._constraints_file = NamedTemporaryFile(
+            "w+", suffix=".constraints", delete=False
+        )
+    def run(self):
+        # Insert no line breaks
+        # -> Extremely high value for characters per line
+        fasta_file = FastaFile(chars_per_line=np.iinfo(np.int32).max)
+        set_alignment(
+            fasta_file, self._alignment,
+            seq_names=[str(i) for i in range(len(self._alignment.sequences))]
+        )
+        fasta_file.write(self._in_file)
+        self._in_file.flush()
+        options = [
+            "--noPS",
+            "-T", self._temperature,
+        ]
+        if self._enforce is True:
+            options.append("--enforceConstraint")
+        if self._constraints is not None:
+            options.append("-C")
+            self._constraints_file.write(self._constraints)
+            self._constraints_file.flush()
+            self._constraints_file.seek(0)
+            self.set_stdin(self._constraints_file)
+        self.set_arguments(options + [self._in_file.name])
+        super().run()
+    def clean_up(self):
+        super().clean_up()
+        cleanup_tempfile(self._in_file)
+        cleanup_tempfile(self._constraints_file)
+    def evaluate(self):
+        super().evaluate()
+        lines = self.get_stdout().splitlines()
+        self._consensus = lines[0].strip()
+        result = lines[1]
+        dotbracket, total_energy = result.split(" ", maxsplit=1)
+        # Energy has the form:
+        # (<total> = <free> + <covariance>)
+        total_energy = total_energy[1:-1]
+        energy_contributions = total_energy.split("=")[1].split("+")
+        self._free_energy = float(energy_contributions[0])
+        self._covariance_energy = float(energy_contributions[1])
+        self._dotbracket = dotbracket
+    @requires_state(AppState.CREATED)
+    def set_temperature(self, temperature):
+        """
+        Adjust the energy parameters according to a temperature in
+        degrees Celsius.
+        Parameters
+        ----------
+        temperature : int
+            The temperature.
+        """
+        self._temperature = str(temperature)
+    @requires_state(AppState.CREATED)
+    def set_constraints(self, pairs=None, paired=None, unpaired=None,
+                        downstream=None, upstream=None, enforce=False):
+        """
+        Add constraints of known paired or unpaired bases to the folding
+        algorithm.
+        Constraints forbid pairs conflicting with the respective
+        constraint.
+        Parameters
+        ----------
+        pairs : ndarray, shape=(n,2), dtype=int, optional
+            Positions of constrained base pairs.
+        paired : ndarray, shape=(n,), dtype=int or dtype=bool, optional
+            Positions of bases that are paired with any other base.
+        unpaired : ndarray, shape=(n,), dtype=int or dtype=bool, optional
+            Positions of bases that are unpaired.
+        downstream : ndarray, shape=(n,), dtype=int or dtype=bool, optional
+            Positions of bases that are paired with any downstream base.
+        upstream : ndarray, shape=(n,), dtype=int or dtype=bool, optional
+            Positions of bases that are paired with any upstream base.
+        enforce : bool, optional
+            If set to true, the given constraints are enforced, i.e. a
+            the respective base pairs must form.
+            By default (false), a constraint does only forbid formation
+            of a pair that would conflict with this constraint.
+        Warnings
+        --------
+        If a constraint is given for a gap position in the consensus sequence,
+        the software may find no base pairs at all.
+        """
+        self._constraints = build_constraint_string(
+            len(self._alignment),
+            pairs, paired, unpaired, downstream, upstream
+        )
+        self._enforce = enforce
+    @requires_state(AppState.JOINED)
+    def get_free_energy(self):
+        """
+        Get the free energy (kcal/mol) of the suggested consensus
+        secondary structure.
+        Returns
+        -------
+        free_energy : float
+            The free energy.
+        Notes
+        -----
+        The total energy of the secondary structure regarding the
+        minimization objective is the sum of the free energy and the
+        covariance term.
+        See also
+        --------
+        get_covariance_energy
+        """
+        return self._free_energy
+    @requires_state(AppState.JOINED)
+    def get_covariance_energy(self):
+        """
+        Get the energy of the artificial covariance term (kcal/mol) of
+        the suggested consensus secondary structure.
+        Returns
+        -------
+        covariance_energy : float
+            The energy of the covariance term.
+        Notes
+        -----
+        The total energy of the secondary structure regarding the
+        minimization objective is the sum of the free energy and the
+        covariance term.
+        See also
+        --------
+        get_free_energy
+        """
+        return self._covariance_energy
+    @requires_state(AppState.JOINED)
+    def get_consensus_sequence_string(self):
+        """
+        Get the consensus sequence.
+        As the consensus may contain gaps, the sequence is returned as
+        string.
+        Returns
+        -------
+        consensus : str
+            The consensus sequence.
+        """
+        return self._consensus
+    @requires_state(AppState.JOINED)
+    def get_dot_bracket(self):
+        """
+        Get the consensus secondary structure in dot bracket notation.
+        Returns
+        -------
+        dotbracket : str
+            The secondary structure in dot bracket notation.
+        """
+        return self._dotbracket
+    @requires_state(AppState.JOINED)
+    def get_base_pairs(self, sequence_index=None):
+        """
+        Get the base pairs from the suggested secondary structure.
+        Parameters
+        ----------
+        sequence_index : int, optional
+            By default, the base pairs point to positions in the
+            alignment.
+            If `sequence_index` is set, the returned base pairs point to
+            positions in the given sequence, instead.
+            The sequence is specified as index in the alignment.
+            For example, if the alignment comprises three sequences,
+            `sequence_index` is in range 0-2.
+        Returns
+        -------
+        base_pairs : ndarray, shape=(n,2)
+            Each row corresponds to the positions of the bases in the
+            alignment.
+            If `sequence_index` is set, the positions correspond to the
+            given sequence.
+        """
+        base_pairs = base_pairs_from_dot_bracket(self._dotbracket)
+        if sequence_index is not None:
+            trace = self._alignment.trace[:, sequence_index]
+            # Map base pairs that point to consensus to base pairs that
+            # point to given sequence, which is only a subsequence
+            # (without gaps) of consensus sequence
+            # This is not trivial:
+            # The pairs that are not part of the subsequence must be
+            # removed and all other pairs need to be shifted
+            # To solve this problem a BondList is 'misused', since it
+            # is build to solve the same problem on the level of atoms
+            # Here the 'bonds' in the BondList are base pairs and the indices
+            # are base positions
+            pair_list = BondList(len(self._alignment), base_pairs)
+            # Remove all pairs that appear in gaps of given sequence
+            pair_list = pair_list[trace != -1]
+            # Convert back to array of base pairs,
+            # remove unused BondType column
+            base_pairs = pair_list.as_array()[:,:2]
+        return base_pairs
+    @staticmethod
+    def compute_secondary_structure(alignment, bin_path="RNAalifold"):
+        """
+        Predict the secondary structure of a ribonucleic acid sequence
+        using *ViennaRNA's* *RNAalifold* software.
+        This is a convenience function, that wraps the
+        :class:`RNAalifoldApp` execution.
+        Parameters
+        ----------
+        alignment : Alignment
+            An alignment of RNA sequences.
+        bin_path : str, optional
+            Path of the *RNAalifold* binary.
+        Returns
+        -------
+        dotbracket : str
+            The secondary structure in dot bracket notation.
+        free_energy : float
+            The free energy.
+        covariance_energy : float
+            The energy of the covariance term.
+        """
+        app = RNAalifoldApp(alignment, bin_path=bin_path)
+        app.start()
+        app.join()
+        return (
+            app.get_dot_bracket(),
+            app.get_free_energy(),
+            app.get_covariance_energy()
+        )