biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["MSAApp"]
|
|
8
|
+
|
|
9
|
+
import abc
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
from tempfile import NamedTemporaryFile
|
|
12
|
+
import numpy as np
|
|
13
|
+
from biotite.application.application import AppState, requires_state
|
|
14
|
+
from biotite.application.localapp import LocalApp, cleanup_tempfile
|
|
15
|
+
from biotite.application.util import map_matrix, map_sequence
|
|
16
|
+
from biotite.sequence.align.alignment import Alignment
|
|
17
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
18
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MSAApp(LocalApp, metaclass=abc.ABCMeta):
|
|
22
|
+
"""
|
|
23
|
+
This is an abstract base class for multiple sequence alignment
|
|
24
|
+
software.
|
|
25
|
+
|
|
26
|
+
It handles conversion of :class:`Sequence` objects to FASTA input
|
|
27
|
+
and FASTA output to an :class:`Alignment` object.
|
|
28
|
+
Inheriting subclasses only need to incorporate the file path
|
|
29
|
+
of these FASTA files into the program arguments.
|
|
30
|
+
|
|
31
|
+
Furthermore, this class can handle custom substitution matrices,
|
|
32
|
+
if the underlying program supports these.
|
|
33
|
+
|
|
34
|
+
MSA software that supports alignment of protein sequences and custom
|
|
35
|
+
substitution matrices, can be used to align exotic, normally
|
|
36
|
+
unsupported sequence types:
|
|
37
|
+
At first the exotic sequences are mapped into protein sequences and
|
|
38
|
+
the custom substitution matrix is converted into a protein sequence
|
|
39
|
+
substitution matrix.
|
|
40
|
+
Then the protein sequences are aligned and finally the protein
|
|
41
|
+
sequences are mapped back into the original sequence types.
|
|
42
|
+
The mapping does not work, when the alphabet of the exotic
|
|
43
|
+
sequences is larger than the amino acid alphabet.
|
|
44
|
+
|
|
45
|
+
Internally this creates a :class:`Popen` instance, which handles
|
|
46
|
+
the execution.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
sequences : iterable object of Sequence
|
|
51
|
+
The sequences to be aligned.
|
|
52
|
+
bin_path : str, optional
|
|
53
|
+
Path of the MSA software binary.
|
|
54
|
+
matrix : SubstitutionMatrix, optional
|
|
55
|
+
A custom substitution matrix.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, sequences, bin_path, matrix=None):
|
|
59
|
+
super().__init__(bin_path)
|
|
60
|
+
|
|
61
|
+
if len(sequences) < 2:
|
|
62
|
+
raise ValueError("At least two sequences are required")
|
|
63
|
+
# Check if all sequences share the same alphabet
|
|
64
|
+
alphabet = sequences[0].get_alphabet()
|
|
65
|
+
for seq in sequences:
|
|
66
|
+
if seq.get_alphabet() != alphabet:
|
|
67
|
+
raise ValueError("Alphabets of the sequences are not equal")
|
|
68
|
+
# Check matrix symmetry
|
|
69
|
+
if matrix is not None and not matrix.is_symmetric():
|
|
70
|
+
raise ValueError(
|
|
71
|
+
"A symmetric matrix is required for multiple sequence alignments"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Check whether the program supports the alignment for the given
|
|
75
|
+
# sequence type
|
|
76
|
+
if ProteinSequence.alphabet.extends(alphabet) and self.supports_protein():
|
|
77
|
+
self._is_mapped = False
|
|
78
|
+
self._seqtype = "protein"
|
|
79
|
+
if matrix is not None:
|
|
80
|
+
if not self.supports_custom_protein_matrix():
|
|
81
|
+
raise TypeError(
|
|
82
|
+
"The software does not support custom "
|
|
83
|
+
"substitution matrices for protein sequences"
|
|
84
|
+
)
|
|
85
|
+
self._matrix = matrix
|
|
86
|
+
else:
|
|
87
|
+
self._matrix = None
|
|
88
|
+
|
|
89
|
+
elif (
|
|
90
|
+
NucleotideSequence.alphabet_amb.extends(alphabet)
|
|
91
|
+
and self.supports_nucleotide()
|
|
92
|
+
):
|
|
93
|
+
self._is_mapped = False
|
|
94
|
+
self._seqtype = "nucleotide"
|
|
95
|
+
if matrix is not None:
|
|
96
|
+
if not self.supports_custom_nucleotide_matrix():
|
|
97
|
+
raise TypeError(
|
|
98
|
+
"The software does not support custom "
|
|
99
|
+
"substitution matrices for nucleotide sequences"
|
|
100
|
+
)
|
|
101
|
+
self._matrix = matrix
|
|
102
|
+
else:
|
|
103
|
+
self._matrix = None
|
|
104
|
+
|
|
105
|
+
else:
|
|
106
|
+
# For all other sequence types, try to map the sequence into
|
|
107
|
+
# a protein sequence
|
|
108
|
+
if not self.supports_protein():
|
|
109
|
+
# Alignment of a custom sequence type requires mapping
|
|
110
|
+
# into a protein sequence
|
|
111
|
+
raise TypeError(
|
|
112
|
+
f"The software cannot align sequences of type "
|
|
113
|
+
f"{type(sequences[0]).__name__}: "
|
|
114
|
+
f"No support for alignment of the mapped sequences"
|
|
115
|
+
)
|
|
116
|
+
if not self.supports_custom_protein_matrix():
|
|
117
|
+
# Alignment of a custom sequence type requires a custom
|
|
118
|
+
# substitution matrix
|
|
119
|
+
raise TypeError(
|
|
120
|
+
f"The software cannot align sequences of type "
|
|
121
|
+
f"{type(sequences[0]).__name__}: "
|
|
122
|
+
f"No support for custom substitution matrices"
|
|
123
|
+
)
|
|
124
|
+
self._is_mapped = True
|
|
125
|
+
self._sequences = sequences
|
|
126
|
+
# Sequence masquerades as protein
|
|
127
|
+
self._seqtype = "protein"
|
|
128
|
+
self._mapped_sequences = [map_sequence(sequence) for sequence in sequences]
|
|
129
|
+
self._matrix = map_matrix(matrix)
|
|
130
|
+
|
|
131
|
+
self._sequences = sequences
|
|
132
|
+
self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
|
|
133
|
+
self._out_file = NamedTemporaryFile("r", suffix=".fa", delete=False)
|
|
134
|
+
self._matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
|
|
135
|
+
|
|
136
|
+
def run(self):
|
|
137
|
+
sequences = self._sequences if not self._is_mapped else self._mapped_sequences
|
|
138
|
+
sequences_file = FastaFile()
|
|
139
|
+
for i, seq in enumerate(sequences):
|
|
140
|
+
sequences_file[str(i)] = str(seq)
|
|
141
|
+
sequences_file.write(self._in_file)
|
|
142
|
+
self._in_file.flush()
|
|
143
|
+
if self._matrix is not None:
|
|
144
|
+
self._matrix_file.write(str(self._matrix))
|
|
145
|
+
self._matrix_file.flush()
|
|
146
|
+
super().run()
|
|
147
|
+
|
|
148
|
+
def evaluate(self):
|
|
149
|
+
super().evaluate()
|
|
150
|
+
alignment_file = FastaFile.read(self._out_file)
|
|
151
|
+
seq_dict = OrderedDict(alignment_file)
|
|
152
|
+
# Get alignment
|
|
153
|
+
out_seq_str = [None] * len(seq_dict)
|
|
154
|
+
for i in range(len(self._sequences)):
|
|
155
|
+
out_seq_str[i] = seq_dict[str(i)]
|
|
156
|
+
trace = Alignment.trace_from_strings(out_seq_str)
|
|
157
|
+
self._alignment = Alignment(self._sequences, trace, None)
|
|
158
|
+
# Also obtain original order
|
|
159
|
+
self._order = np.zeros(len(seq_dict), dtype=int)
|
|
160
|
+
for i, seq_index in enumerate(seq_dict):
|
|
161
|
+
self._order[i] = int(seq_index)
|
|
162
|
+
|
|
163
|
+
def clean_up(self):
|
|
164
|
+
super().clean_up()
|
|
165
|
+
cleanup_tempfile(self._in_file)
|
|
166
|
+
cleanup_tempfile(self._out_file)
|
|
167
|
+
cleanup_tempfile(self._matrix_file)
|
|
168
|
+
|
|
169
|
+
@requires_state(AppState.JOINED)
|
|
170
|
+
def get_alignment(self):
|
|
171
|
+
"""
|
|
172
|
+
Get the resulting multiple sequence alignment.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
alignment : Alignment
|
|
177
|
+
The global multiple sequence alignment.
|
|
178
|
+
"""
|
|
179
|
+
return self._alignment
|
|
180
|
+
|
|
181
|
+
@requires_state(AppState.JOINED)
|
|
182
|
+
def get_alignment_order(self):
|
|
183
|
+
"""
|
|
184
|
+
Get the order of the resulting multiple sequence alignment.
|
|
185
|
+
|
|
186
|
+
Usually the order of sequences in the output file is
|
|
187
|
+
different from the input file, e.g. the sequences are ordered
|
|
188
|
+
according to the guide tree.
|
|
189
|
+
After running an MSA software, the output sequence order of
|
|
190
|
+
the alignment rearranged so that it is the same as the input
|
|
191
|
+
order.
|
|
192
|
+
This method returns the order of the sequences intended by the
|
|
193
|
+
MSA software.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
order : ndarray, dtype=int
|
|
198
|
+
The sequence order intended by the MSA software.
|
|
199
|
+
|
|
200
|
+
Examples
|
|
201
|
+
--------
|
|
202
|
+
Align sequences and restore the original order:
|
|
203
|
+
|
|
204
|
+
app = ClustalOmegaApp(sequences)
|
|
205
|
+
app.start()
|
|
206
|
+
app.join()
|
|
207
|
+
alignment = app.get_alignment()
|
|
208
|
+
order = app.get_alignment_order()
|
|
209
|
+
alignment = alignment[:, order]
|
|
210
|
+
"""
|
|
211
|
+
return self._order
|
|
212
|
+
|
|
213
|
+
def get_input_file_path(self):
|
|
214
|
+
"""
|
|
215
|
+
Get input file path (FASTA format).
|
|
216
|
+
|
|
217
|
+
PROTECTED: Do not call from outside.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
path : str
|
|
222
|
+
Path of input file.
|
|
223
|
+
"""
|
|
224
|
+
return self._in_file.name
|
|
225
|
+
|
|
226
|
+
def get_output_file_path(self):
|
|
227
|
+
"""
|
|
228
|
+
Get output file path (FASTA format).
|
|
229
|
+
|
|
230
|
+
PROTECTED: Do not call from outside.
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
path : str
|
|
235
|
+
Path of output file.
|
|
236
|
+
"""
|
|
237
|
+
return self._out_file.name
|
|
238
|
+
|
|
239
|
+
def get_matrix_file_path(self):
|
|
240
|
+
"""
|
|
241
|
+
Get file path for custom substitution matrix.
|
|
242
|
+
|
|
243
|
+
PROTECTED: Do not call from outside.
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
path : str or None
|
|
248
|
+
Path of substitution matrix.
|
|
249
|
+
None if no matrix was given.
|
|
250
|
+
"""
|
|
251
|
+
return self._matrix_file.name if self._matrix is not None else None
|
|
252
|
+
|
|
253
|
+
def get_seqtype(self):
|
|
254
|
+
"""
|
|
255
|
+
Get the type of aligned sequences.
|
|
256
|
+
|
|
257
|
+
When a custom sequence type (neither nucleotide nor protein)
|
|
258
|
+
is mapped onto a protein sequence, the return value is also
|
|
259
|
+
``'protein'``.
|
|
260
|
+
|
|
261
|
+
PROTECTED: Do not call from outside.
|
|
262
|
+
|
|
263
|
+
Returns
|
|
264
|
+
-------
|
|
265
|
+
seqtype : {'nucleotide', 'protein'}
|
|
266
|
+
Type of sequences to be aligned.
|
|
267
|
+
"""
|
|
268
|
+
return self._seqtype
|
|
269
|
+
|
|
270
|
+
@staticmethod
|
|
271
|
+
@abc.abstractmethod
|
|
272
|
+
def supports_nucleotide():
|
|
273
|
+
"""
|
|
274
|
+
Check whether this class supports nucleotide sequences for
|
|
275
|
+
alignment.
|
|
276
|
+
|
|
277
|
+
PROTECTED: Override when inheriting.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
support : bool
|
|
282
|
+
True, if the class has support, false otherwise.
|
|
283
|
+
"""
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
@staticmethod
|
|
287
|
+
@abc.abstractmethod
|
|
288
|
+
def supports_protein():
|
|
289
|
+
"""
|
|
290
|
+
Check whether this class supports nucleotide sequences for
|
|
291
|
+
alignment.
|
|
292
|
+
|
|
293
|
+
PROTECTED: Override when inheriting.
|
|
294
|
+
|
|
295
|
+
Returns
|
|
296
|
+
-------
|
|
297
|
+
support : bool
|
|
298
|
+
True, if the class has support, false otherwise.
|
|
299
|
+
"""
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
@staticmethod
|
|
303
|
+
@abc.abstractmethod
|
|
304
|
+
def supports_custom_nucleotide_matrix():
|
|
305
|
+
"""
|
|
306
|
+
Check whether this class supports custom substitution matrices
|
|
307
|
+
for protein sequence alignment.
|
|
308
|
+
|
|
309
|
+
PROTECTED: Override when inheriting.
|
|
310
|
+
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
support : bool
|
|
314
|
+
True, if the class has support, false otherwise.
|
|
315
|
+
"""
|
|
316
|
+
pass
|
|
317
|
+
|
|
318
|
+
@staticmethod
|
|
319
|
+
@abc.abstractmethod
|
|
320
|
+
def supports_custom_protein_matrix():
|
|
321
|
+
"""
|
|
322
|
+
Check whether this class supports custom substitution matrices
|
|
323
|
+
for nucleotide sequence alignment.
|
|
324
|
+
|
|
325
|
+
PROTECTED: Override when inheriting.
|
|
326
|
+
|
|
327
|
+
Returns
|
|
328
|
+
-------
|
|
329
|
+
support : bool
|
|
330
|
+
True, if the class has support, false otherwise.
|
|
331
|
+
"""
|
|
332
|
+
pass
|
|
333
|
+
|
|
334
|
+
@classmethod
|
|
335
|
+
def align(cls, sequences, bin_path=None, matrix=None):
|
|
336
|
+
"""
|
|
337
|
+
Perform a multiple sequence alignment.
|
|
338
|
+
|
|
339
|
+
This is a convenience function, that wraps the :class:`MSAApp`
|
|
340
|
+
execution.
|
|
341
|
+
|
|
342
|
+
Parameters
|
|
343
|
+
----------
|
|
344
|
+
sequences : iterable object of Sequence
|
|
345
|
+
The sequences to be aligned.
|
|
346
|
+
bin_path : str, optional
|
|
347
|
+
Path of the MSA software binary. By default, the default
|
|
348
|
+
path will be used.
|
|
349
|
+
matrix : SubstitutionMatrix, optional
|
|
350
|
+
A custom substitution matrix.
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
alignment : Alignment
|
|
355
|
+
The global multiple sequence alignment.
|
|
356
|
+
"""
|
|
357
|
+
if bin_path is None:
|
|
358
|
+
app = cls(sequences, matrix=matrix)
|
|
359
|
+
else:
|
|
360
|
+
app = cls(sequences, bin_path, matrix=matrix)
|
|
361
|
+
app.start()
|
|
362
|
+
app.join()
|
|
363
|
+
return app.get_alignment()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for multiple sequence alignments using MUSCLE.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.application.muscle"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .app3 import *
|
|
13
|
+
from .app5 import *
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.muscle"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["MuscleApp"]
|
|
8
|
+
|
|
9
|
+
import numbers
|
|
10
|
+
import warnings
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from tempfile import NamedTemporaryFile
|
|
13
|
+
from biotite.application.application import AppState, VersionError, requires_state
|
|
14
|
+
from biotite.application.localapp import cleanup_tempfile, get_version
|
|
15
|
+
from biotite.application.msaapp import MSAApp
|
|
16
|
+
from biotite.sequence.phylo.tree import Tree
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MuscleApp(MSAApp):
|
|
20
|
+
"""
|
|
21
|
+
Perform a multiple sequence alignment using MUSCLE version 3.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
sequences : list of Sequence
|
|
26
|
+
The sequences to be aligned.
|
|
27
|
+
bin_path : str, optional
|
|
28
|
+
Path of the MUSCLE binary.
|
|
29
|
+
matrix : SubstitutionMatrix, optional
|
|
30
|
+
A custom substitution matrix.
|
|
31
|
+
|
|
32
|
+
See Also
|
|
33
|
+
--------
|
|
34
|
+
Muscle5App : Interface to MUSCLE version ``>=5``.
|
|
35
|
+
|
|
36
|
+
Examples
|
|
37
|
+
--------
|
|
38
|
+
|
|
39
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
40
|
+
>>> seq2 = ProteinSequence("TITANITE")
|
|
41
|
+
>>> seq3 = ProteinSequence("BISMITE")
|
|
42
|
+
>>> seq4 = ProteinSequence("IQLITE")
|
|
43
|
+
>>> app = MuscleApp([seq1, seq2, seq3, seq4])
|
|
44
|
+
>>> app.start()
|
|
45
|
+
>>> app.join()
|
|
46
|
+
>>> alignment = app.get_alignment()
|
|
47
|
+
>>> print(alignment)
|
|
48
|
+
BIQT-ITE
|
|
49
|
+
TITANITE
|
|
50
|
+
BISM-ITE
|
|
51
|
+
-IQL-ITE
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, sequences, bin_path="muscle", matrix=None):
|
|
55
|
+
major_version = get_version(bin_path, "-version")[0]
|
|
56
|
+
if major_version != 3:
|
|
57
|
+
raise VersionError(f"Muscle 3 is required, got version {major_version}")
|
|
58
|
+
|
|
59
|
+
super().__init__(sequences, bin_path, matrix)
|
|
60
|
+
self._gap_open = None
|
|
61
|
+
self._gap_ext = None
|
|
62
|
+
self._terminal_penalty = None
|
|
63
|
+
self._tree1 = None
|
|
64
|
+
self._tree2 = None
|
|
65
|
+
self._out_tree1_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
|
|
66
|
+
self._out_tree2_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
|
|
67
|
+
|
|
68
|
+
def run(self):
|
|
69
|
+
args = [
|
|
70
|
+
"-quiet",
|
|
71
|
+
"-in",
|
|
72
|
+
self.get_input_file_path(),
|
|
73
|
+
"-out",
|
|
74
|
+
self.get_output_file_path(),
|
|
75
|
+
"-tree1",
|
|
76
|
+
self._out_tree1_file.name,
|
|
77
|
+
"-tree2",
|
|
78
|
+
self._out_tree2_file.name,
|
|
79
|
+
]
|
|
80
|
+
if self.get_seqtype() == "protein":
|
|
81
|
+
args += ["-seqtype", "protein"]
|
|
82
|
+
else:
|
|
83
|
+
args += ["-seqtype", "dna"]
|
|
84
|
+
if self.get_matrix_file_path() is not None:
|
|
85
|
+
args += ["-matrix", self.get_matrix_file_path()]
|
|
86
|
+
if self._gap_open is not None and self._gap_ext is not None:
|
|
87
|
+
args += ["-gapopen", f"{self._gap_open:.1f}"]
|
|
88
|
+
args += ["-gapextend", f"{self._gap_ext:.1f}"]
|
|
89
|
+
# When the gap penalty is set,
|
|
90
|
+
# use the penalty also for hydrophobic regions
|
|
91
|
+
args += ["-hydrofactor", "1.0"]
|
|
92
|
+
# Use the recommendation of the documentation
|
|
93
|
+
args += ["-center", "0.0"]
|
|
94
|
+
self.set_arguments(args)
|
|
95
|
+
super().run()
|
|
96
|
+
|
|
97
|
+
def evaluate(self):
|
|
98
|
+
super().evaluate()
|
|
99
|
+
|
|
100
|
+
newick = self._out_tree1_file.read().replace("\n", "")
|
|
101
|
+
if len(newick) > 0:
|
|
102
|
+
self._tree1 = Tree.from_newick(newick)
|
|
103
|
+
else:
|
|
104
|
+
warnings.warn("MUSCLE did not write a tree file from the first iteration")
|
|
105
|
+
|
|
106
|
+
newick = self._out_tree2_file.read().replace("\n", "")
|
|
107
|
+
if len(newick) > 0:
|
|
108
|
+
self._tree2 = Tree.from_newick(newick)
|
|
109
|
+
else:
|
|
110
|
+
warnings.warn("MUSCLE did not write a tree file from the second iteration")
|
|
111
|
+
|
|
112
|
+
def clean_up(self):
|
|
113
|
+
super().clean_up()
|
|
114
|
+
cleanup_tempfile(self._out_tree1_file)
|
|
115
|
+
cleanup_tempfile(self._out_tree2_file)
|
|
116
|
+
|
|
117
|
+
@requires_state(AppState.CREATED)
|
|
118
|
+
def set_gap_penalty(self, gap_penalty):
|
|
119
|
+
"""
|
|
120
|
+
Set the gap penalty for the alignment.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
gap_penalty : float or (tuple, dtype=int)
|
|
125
|
+
If a float is provided, the value will be interpreted as
|
|
126
|
+
general gap penalty.
|
|
127
|
+
If a tuple is provided, an affine gap penalty is used.
|
|
128
|
+
The first value in the tuple is the gap opening penalty,
|
|
129
|
+
the second value is the gap extension penalty.
|
|
130
|
+
The values need to be negative.
|
|
131
|
+
"""
|
|
132
|
+
# Check if gap penalty is general or affine
|
|
133
|
+
if isinstance(gap_penalty, numbers.Real):
|
|
134
|
+
if gap_penalty > 0:
|
|
135
|
+
raise ValueError("Gap penalty must be negative")
|
|
136
|
+
self._gap_open = gap_penalty
|
|
137
|
+
self._gap_ext = gap_penalty
|
|
138
|
+
elif isinstance(gap_penalty, Sequence):
|
|
139
|
+
if gap_penalty[0] > 0 or gap_penalty[1] > 0:
|
|
140
|
+
raise ValueError("Gap penalty must be negative")
|
|
141
|
+
self._gap_open = gap_penalty[0]
|
|
142
|
+
self._gap_ext = gap_penalty[1]
|
|
143
|
+
else:
|
|
144
|
+
raise TypeError("Gap penalty must be either float or tuple")
|
|
145
|
+
|
|
146
|
+
@requires_state(AppState.JOINED)
|
|
147
|
+
def get_guide_tree(self, iteration="identity"):
|
|
148
|
+
"""
|
|
149
|
+
Get the guide tree created for the progressive alignment.
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
iteration : {'kmer', 'identity'}
|
|
154
|
+
If 'kmer', the first iteration tree is returned.
|
|
155
|
+
This tree uses the sequences common *k*-mers as distance
|
|
156
|
+
measure.
|
|
157
|
+
If 'identity' the second iteration tree is returned.
|
|
158
|
+
This tree uses distances based on the pairwise sequence
|
|
159
|
+
identity after the first progressive alignment iteration.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
tree : Tree
|
|
164
|
+
The guide tree.
|
|
165
|
+
"""
|
|
166
|
+
if iteration == "kmer":
|
|
167
|
+
return self._tree1
|
|
168
|
+
elif iteration == "identity":
|
|
169
|
+
return self._tree2
|
|
170
|
+
else:
|
|
171
|
+
raise ValueError("Iteration must be 'kmer' or 'identity'")
|
|
172
|
+
|
|
173
|
+
@staticmethod
|
|
174
|
+
def supports_nucleotide():
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def supports_protein():
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def supports_custom_nucleotide_matrix():
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def supports_custom_protein_matrix():
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def align(cls, sequences, bin_path=None, matrix=None, gap_penalty=None):
|
|
191
|
+
"""
|
|
192
|
+
Perform a multiple sequence alignment.
|
|
193
|
+
|
|
194
|
+
This is a convenience function, that wraps the :class:`MuscleApp`
|
|
195
|
+
execution.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
sequences : iterable object of Sequence
|
|
200
|
+
The sequences to be aligned.
|
|
201
|
+
bin_path : str, optional
|
|
202
|
+
Path of the MSA software binary. By default, the default path
|
|
203
|
+
will be used.
|
|
204
|
+
matrix : SubstitutionMatrix, optional
|
|
205
|
+
A custom substitution matrix.
|
|
206
|
+
gap_penalty : float or (tuple, dtype=int), optional
|
|
207
|
+
If a float is provided, the value will be interpreted as
|
|
208
|
+
general gap penalty.
|
|
209
|
+
If a tuple is provided, an affine gap penalty is used.
|
|
210
|
+
The first value in the tuple is the gap opening penalty,
|
|
211
|
+
the second value is the gap extension penalty.
|
|
212
|
+
The values need to be negative.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
alignment : Alignment
|
|
217
|
+
The global multiple sequence alignment.
|
|
218
|
+
"""
|
|
219
|
+
if bin_path is None:
|
|
220
|
+
app = cls(sequences, matrix=matrix)
|
|
221
|
+
else:
|
|
222
|
+
app = cls(sequences, bin_path, matrix=matrix)
|
|
223
|
+
if gap_penalty is not None:
|
|
224
|
+
app.set_gap_penalty(gap_penalty)
|
|
225
|
+
app.start()
|
|
226
|
+
app.join()
|
|
227
|
+
return app.get_alignment()
|