biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.blast"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["BlastWebApp"]
|
|
8
|
+
|
|
9
|
+
import time
|
|
10
|
+
from xml.etree import ElementTree
|
|
11
|
+
import requests
|
|
12
|
+
from biotite.application.application import AppState, requires_state
|
|
13
|
+
from biotite.application.blast.alignment import BlastAlignment
|
|
14
|
+
from biotite.application.webapp import WebApp
|
|
15
|
+
from biotite.sequence.align.alignment import Alignment
|
|
16
|
+
from biotite.sequence.io.fasta.convert import get_sequence
|
|
17
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
18
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
19
|
+
from biotite.sequence.sequence import Sequence
|
|
20
|
+
|
|
21
|
+
_ncbi_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BlastWebApp(WebApp):
|
|
25
|
+
"""
|
|
26
|
+
Perform a local alignment against a large sequence database using
|
|
27
|
+
using the web-based BLAST application (by default NCBI BLAST).
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
program : str
|
|
32
|
+
The specific BLAST program. One of 'blastn', 'megablast',
|
|
33
|
+
'blastp', 'blastx', 'tblastn' and 'tblastx'.
|
|
34
|
+
query : Sequence or str
|
|
35
|
+
The query sequence. If a string is provided, it is interpreted
|
|
36
|
+
as path to a FASTA file, if the string contains a valid FASTA
|
|
37
|
+
file extension, otherwise it is interpreted as a single letter
|
|
38
|
+
string representation of a sequence.
|
|
39
|
+
database : str, optional
|
|
40
|
+
The NCBI sequence database to blast against. By default it
|
|
41
|
+
contains all sequences (`database`='nr'`).
|
|
42
|
+
app_url : str, optional
|
|
43
|
+
URL of the BLAST web app. By default NCBI BLAST is used.
|
|
44
|
+
This can be changed to a private server or another cloud
|
|
45
|
+
provider.
|
|
46
|
+
obey_rules : bool, optional
|
|
47
|
+
If true, the application raises an :class:`RuleViolationError`,
|
|
48
|
+
if the server is contacted too often, based on the NCBI BLAST
|
|
49
|
+
usage rules.
|
|
50
|
+
mail : str, optional
|
|
51
|
+
If a mail address is provided, it will be appended in the
|
|
52
|
+
HTTP request. This allows the NCBI to contact you in case
|
|
53
|
+
your application sends too many requests.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_last_contact = 0
|
|
57
|
+
_last_request = 0
|
|
58
|
+
_contact_delay = 3
|
|
59
|
+
_request_delay = 60
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
program,
|
|
64
|
+
query,
|
|
65
|
+
database="nr",
|
|
66
|
+
app_url=_ncbi_url,
|
|
67
|
+
obey_rules=True,
|
|
68
|
+
mail="padix.key@gmail.com",
|
|
69
|
+
):
|
|
70
|
+
super().__init__(app_url, obey_rules)
|
|
71
|
+
|
|
72
|
+
# 'megablast' is somehow not working
|
|
73
|
+
# When entering the corresponding HTTPS request into a browser
|
|
74
|
+
# you are redirected onto the blast mainpage
|
|
75
|
+
if program not in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]:
|
|
76
|
+
raise ValueError(f"'{program}' is not a valid BLAST program")
|
|
77
|
+
self._program = program
|
|
78
|
+
|
|
79
|
+
requires_protein = program in ["blastp", "tblastn"]
|
|
80
|
+
if isinstance(query, str) and query.endswith((".fa", ".fst", ".fasta")):
|
|
81
|
+
# If string has a file extension, it is interpreted as
|
|
82
|
+
# FASTA file from which the sequence is taken
|
|
83
|
+
file = FastaFile.read(query)
|
|
84
|
+
# Get first entry in file and take the sequence
|
|
85
|
+
# (rather than header)
|
|
86
|
+
self._query = str(get_sequence(file))
|
|
87
|
+
elif isinstance(query, Sequence):
|
|
88
|
+
self._query = str(query)
|
|
89
|
+
else:
|
|
90
|
+
self._query = query
|
|
91
|
+
|
|
92
|
+
# Check for unsuitable symbols in query string
|
|
93
|
+
if requires_protein:
|
|
94
|
+
ref_alphabet = ProteinSequence.alphabet
|
|
95
|
+
else:
|
|
96
|
+
ref_alphabet = NucleotideSequence.alphabet_amb
|
|
97
|
+
for symbol in self._query:
|
|
98
|
+
if symbol.upper() not in ref_alphabet:
|
|
99
|
+
raise ValueError(f"Query sequence contains unsuitable symbol {symbol}")
|
|
100
|
+
|
|
101
|
+
self._database = database
|
|
102
|
+
|
|
103
|
+
self._gap_openining = None
|
|
104
|
+
self._gap_extension = None
|
|
105
|
+
self._word_size = None
|
|
106
|
+
|
|
107
|
+
self._expect_value = None
|
|
108
|
+
self._max_results = None
|
|
109
|
+
self._entrez_query = None
|
|
110
|
+
|
|
111
|
+
self._reward = None
|
|
112
|
+
self._penalty = None
|
|
113
|
+
|
|
114
|
+
self._matrix = None
|
|
115
|
+
self._threshold = None
|
|
116
|
+
|
|
117
|
+
self._mail = mail
|
|
118
|
+
self._rid = None
|
|
119
|
+
|
|
120
|
+
@requires_state(AppState.CREATED)
|
|
121
|
+
def set_entrez_query(self, query):
|
|
122
|
+
"""
|
|
123
|
+
Limit the size of the database.
|
|
124
|
+
Only sequences that match the query are searched.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
query : Query
|
|
129
|
+
An NCBI Entrez query.
|
|
130
|
+
"""
|
|
131
|
+
self._entrez_query = str(query)
|
|
132
|
+
|
|
133
|
+
@requires_state(AppState.CREATED)
|
|
134
|
+
def set_max_results(self, number):
|
|
135
|
+
"""
|
|
136
|
+
Limit the maximum number of results.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
number : int
|
|
141
|
+
The maximum number of results.
|
|
142
|
+
"""
|
|
143
|
+
self._max_results = number
|
|
144
|
+
|
|
145
|
+
@requires_state(AppState.CREATED)
|
|
146
|
+
def set_max_expect_value(self, value):
|
|
147
|
+
"""
|
|
148
|
+
Set the threshold expectation value (E-value).
|
|
149
|
+
No alignments with an E-value above this threshold will be
|
|
150
|
+
considered.
|
|
151
|
+
|
|
152
|
+
The E-Value is the expectation value for the number of random
|
|
153
|
+
sequences of a similar sized database getting an equal or higher
|
|
154
|
+
score by change when aligned with the query sequence.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
value : float
|
|
159
|
+
The threshold E-value.
|
|
160
|
+
"""
|
|
161
|
+
self._expect_value = value
|
|
162
|
+
|
|
163
|
+
@requires_state(AppState.CREATED)
|
|
164
|
+
def set_gap_penalty(self, opening, extension):
|
|
165
|
+
"""
|
|
166
|
+
Set the affine gap penalty for the alignment.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
opening : float
|
|
171
|
+
The penalty for gap opening.
|
|
172
|
+
extension : float
|
|
173
|
+
The penalty for gap extension.
|
|
174
|
+
"""
|
|
175
|
+
self._gap_openining = opening
|
|
176
|
+
self._gap_extension = extension
|
|
177
|
+
|
|
178
|
+
@requires_state(AppState.CREATED)
|
|
179
|
+
def set_word_size(self, size):
|
|
180
|
+
"""
|
|
181
|
+
Set the word size for alignment seeds.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
size : int
|
|
186
|
+
Word size.
|
|
187
|
+
"""
|
|
188
|
+
self._word_size = size
|
|
189
|
+
|
|
190
|
+
@requires_state(AppState.CREATED)
|
|
191
|
+
def set_match_reward(self, reward):
|
|
192
|
+
"""
|
|
193
|
+
Set the score of a symbol match in the alignment.
|
|
194
|
+
|
|
195
|
+
Used only in 'blastn' and 'megablast'.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
reward : int
|
|
200
|
+
Match reward. Must be positive.
|
|
201
|
+
"""
|
|
202
|
+
self._reward = reward
|
|
203
|
+
|
|
204
|
+
@requires_state(AppState.CREATED)
|
|
205
|
+
def set_mismatch_penalty(self, penalty):
|
|
206
|
+
"""
|
|
207
|
+
Set the penalty of a symbol mismatch in the alignment.
|
|
208
|
+
|
|
209
|
+
Used only in 'blastn' and 'megablast'.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
penalty : int
|
|
214
|
+
Mismatch penalty. Must be negative.
|
|
215
|
+
"""
|
|
216
|
+
self._penalty = penalty
|
|
217
|
+
|
|
218
|
+
@requires_state(AppState.CREATED)
|
|
219
|
+
def set_substitution_matrix(self, matrix_name):
|
|
220
|
+
"""
|
|
221
|
+
Set the penalty of a symbol mismatch in the alignment.
|
|
222
|
+
|
|
223
|
+
Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
|
|
224
|
+
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
matrix_name : str
|
|
228
|
+
Name of the substitution matrix. Default is 'BLOSUM62'.
|
|
229
|
+
"""
|
|
230
|
+
self._matrix = matrix_name.upper()
|
|
231
|
+
|
|
232
|
+
@requires_state(AppState.CREATED)
|
|
233
|
+
def set_threshold(self, threshold):
|
|
234
|
+
"""
|
|
235
|
+
Set the threshold neighboring score for initial words.
|
|
236
|
+
|
|
237
|
+
Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
threshold : int
|
|
242
|
+
Threshold value. Must be positve.
|
|
243
|
+
"""
|
|
244
|
+
self._threshold = threshold
|
|
245
|
+
|
|
246
|
+
def run(self):
|
|
247
|
+
param_dict = {}
|
|
248
|
+
param_dict["tool"] = "Biotite"
|
|
249
|
+
param_dict["email"] = self._mail
|
|
250
|
+
param_dict["CMD"] = "Put"
|
|
251
|
+
param_dict["PROGRAM"] = self._program
|
|
252
|
+
param_dict["QUERY"] = str(self._query)
|
|
253
|
+
param_dict["DATABASE"] = self._database
|
|
254
|
+
if self._entrez_query is not None:
|
|
255
|
+
param_dict["ENTREZ_QUERY"] = self._entrez_query
|
|
256
|
+
if self._max_results is not None:
|
|
257
|
+
param_dict["HITLIST_SIZE"] = str(self._max_results)
|
|
258
|
+
if self._expect_value is not None:
|
|
259
|
+
param_dict["EXPECT"] = self._expect_value
|
|
260
|
+
if self._gap_openining is not None and self._gap_extension is not None:
|
|
261
|
+
param_dict["GAPCOSTS"] = "{:d} {:d}".format(
|
|
262
|
+
self._gap_openining, self._gap_extension
|
|
263
|
+
)
|
|
264
|
+
if self._word_size is not None:
|
|
265
|
+
param_dict["WORD_SIZE"] = self._word_size
|
|
266
|
+
|
|
267
|
+
if self._program in ["blastn", "megablast"]:
|
|
268
|
+
if self._reward is not None:
|
|
269
|
+
param_dict["NUCL_REWARD"] = self._reward
|
|
270
|
+
if self._penalty is not None:
|
|
271
|
+
param_dict["NUCL_PENALTY"] = self._penalty
|
|
272
|
+
|
|
273
|
+
if self._program in ["blastp", "blastx", "tblastn", "tblastx"]:
|
|
274
|
+
if self._matrix is not None:
|
|
275
|
+
param_dict["MATRIX"] = self._matrix
|
|
276
|
+
if self._threshold is not None:
|
|
277
|
+
param_dict["THRESHOLD"] = self._threshold
|
|
278
|
+
|
|
279
|
+
request = requests.get(self.app_url(), params=param_dict)
|
|
280
|
+
if "Submitted URI too large" in request.text:
|
|
281
|
+
raise ValueError("The URI is too large, try a shorter sequence")
|
|
282
|
+
self._contact()
|
|
283
|
+
self._request()
|
|
284
|
+
info_dict = BlastWebApp._get_info(request.text)
|
|
285
|
+
self._rid = info_dict["RID"]
|
|
286
|
+
|
|
287
|
+
def is_finished(self):
|
|
288
|
+
data_dict = {"FORMAT_OBJECT": "SearchInfo", "RID": self._rid, "CMD": "Get"}
|
|
289
|
+
request = requests.get(self.app_url(), params=data_dict)
|
|
290
|
+
self._contact()
|
|
291
|
+
info_dict = BlastWebApp._get_info(request.text)
|
|
292
|
+
if info_dict["Status"] == "UNKNOWN":
|
|
293
|
+
# Indicates invalid query input values
|
|
294
|
+
raise ValueError(
|
|
295
|
+
"The input values seem to be invalid "
|
|
296
|
+
"(Server responsed status 'UNKNOWN')"
|
|
297
|
+
)
|
|
298
|
+
return info_dict["Status"] == "READY"
|
|
299
|
+
|
|
300
|
+
def wait_interval(self):
|
|
301
|
+
# NCBI requires a 3 second delay between server contacts
|
|
302
|
+
return BlastWebApp._contact_delay
|
|
303
|
+
|
|
304
|
+
def clean_up(self):
|
|
305
|
+
param_dict = {}
|
|
306
|
+
param_dict["CMD"] = "Delete"
|
|
307
|
+
param_dict["RID"] = self._rid
|
|
308
|
+
requests.get(self.app_url(), params=param_dict)
|
|
309
|
+
|
|
310
|
+
def evaluate(self):
|
|
311
|
+
param_dict = {}
|
|
312
|
+
param_dict["tool"] = "BiotiteClient"
|
|
313
|
+
if self._mail is not None:
|
|
314
|
+
param_dict["email"] = self._mail
|
|
315
|
+
param_dict["CMD"] = "Get"
|
|
316
|
+
param_dict["RID"] = self._rid
|
|
317
|
+
param_dict["FORMAT_TYPE"] = "XML"
|
|
318
|
+
param_dict["NCBI_GI"] = "T"
|
|
319
|
+
request = requests.get(self.app_url(), params=param_dict)
|
|
320
|
+
self._contact()
|
|
321
|
+
|
|
322
|
+
self._alignments = []
|
|
323
|
+
self._xml_response = request.text
|
|
324
|
+
root = ElementTree.fromstring(self._xml_response)
|
|
325
|
+
# Extract BlastAlignment objects from <Hit> tags
|
|
326
|
+
hit_xpath = "./BlastOutput_iterations/Iteration/Iteration_hits/Hit"
|
|
327
|
+
hits = root.findall(hit_xpath)
|
|
328
|
+
for hit in hits:
|
|
329
|
+
hit_definition = hit.find("Hit_def").text
|
|
330
|
+
hit_id = hit.find("Hit_accession").text
|
|
331
|
+
hsp = hit.find(".Hit_hsps/Hsp")
|
|
332
|
+
score = int(hsp.find("Hsp_score").text)
|
|
333
|
+
e_value = float(hsp.find("Hsp_evalue").text)
|
|
334
|
+
query_begin = int(hsp.find("Hsp_query-from").text)
|
|
335
|
+
query_end = int(hsp.find("Hsp_query-to").text)
|
|
336
|
+
hit_begin = int(hsp.find("Hsp_hit-from").text)
|
|
337
|
+
hit_end = int(hsp.find("Hsp_hit-to").text)
|
|
338
|
+
|
|
339
|
+
seq1_str = hsp.find("Hsp_qseq").text
|
|
340
|
+
seq2_str = hsp.find("Hsp_hseq").text
|
|
341
|
+
if self._program in ["blastn", "megablast"]:
|
|
342
|
+
# NucleotideSequence/ProteinSequence do ignore gaps
|
|
343
|
+
# Gaps are represented by the trace
|
|
344
|
+
seq1, seq2 = [
|
|
345
|
+
NucleotideSequence(s.replace("-", "")) for s in (seq1_str, seq2_str)
|
|
346
|
+
]
|
|
347
|
+
else:
|
|
348
|
+
seq1, seq2 = [
|
|
349
|
+
ProteinSequence(s.replace("-", "").replace("U", "C"))
|
|
350
|
+
for s in (seq1_str, seq2_str)
|
|
351
|
+
]
|
|
352
|
+
trace = Alignment.trace_from_strings([seq1_str, seq2_str])
|
|
353
|
+
|
|
354
|
+
alignment = BlastAlignment(
|
|
355
|
+
[seq1, seq2],
|
|
356
|
+
trace,
|
|
357
|
+
score,
|
|
358
|
+
e_value,
|
|
359
|
+
(query_begin, query_end),
|
|
360
|
+
(hit_begin, hit_end),
|
|
361
|
+
hit_id,
|
|
362
|
+
hit_definition,
|
|
363
|
+
)
|
|
364
|
+
self._alignments.append(alignment)
|
|
365
|
+
|
|
366
|
+
@requires_state(AppState.JOINED)
|
|
367
|
+
def get_xml_response(self):
|
|
368
|
+
"""
|
|
369
|
+
Get the raw XML response.
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
response : str
|
|
374
|
+
The raw XML response.
|
|
375
|
+
"""
|
|
376
|
+
return self._xml_response
|
|
377
|
+
|
|
378
|
+
@requires_state(AppState.JOINED)
|
|
379
|
+
def get_alignments(self):
|
|
380
|
+
"""
|
|
381
|
+
Get the resulting local sequence alignments.
|
|
382
|
+
|
|
383
|
+
Returns
|
|
384
|
+
-------
|
|
385
|
+
alignment : list of BlastAlignment
|
|
386
|
+
The local sequence alignments.
|
|
387
|
+
"""
|
|
388
|
+
return self._alignments
|
|
389
|
+
|
|
390
|
+
@staticmethod
|
|
391
|
+
def _get_info(text):
|
|
392
|
+
"""
|
|
393
|
+
Get the *QBlastInfo* block of the response HTML as dictionary
|
|
394
|
+
"""
|
|
395
|
+
lines = [line for line in text.split("\n")]
|
|
396
|
+
info_dict = {}
|
|
397
|
+
in_info_block = False
|
|
398
|
+
for line in lines:
|
|
399
|
+
if "QBlastInfoBegin" in line:
|
|
400
|
+
in_info_block = True
|
|
401
|
+
continue
|
|
402
|
+
if "QBlastInfoEnd" in line:
|
|
403
|
+
in_info_block = False
|
|
404
|
+
continue
|
|
405
|
+
if in_info_block:
|
|
406
|
+
pair = line.split("=")
|
|
407
|
+
info_dict[pair[0].strip()] = pair[1].strip()
|
|
408
|
+
return info_dict
|
|
409
|
+
|
|
410
|
+
def _contact(self):
|
|
411
|
+
"""
|
|
412
|
+
Resets the time since the last server contact. Used for
|
|
413
|
+
detecting server rule violation.
|
|
414
|
+
"""
|
|
415
|
+
contact = time.time()
|
|
416
|
+
if (contact - BlastWebApp._last_contact) < BlastWebApp._contact_delay:
|
|
417
|
+
self.violate_rule("The server was contacted too often")
|
|
418
|
+
BlastWebApp._last_contact = contact
|
|
419
|
+
|
|
420
|
+
def _request(self):
|
|
421
|
+
"""
|
|
422
|
+
Resets the time since the last new alignment request. Used for
|
|
423
|
+
detecting server rule violation.
|
|
424
|
+
"""
|
|
425
|
+
request = time.time()
|
|
426
|
+
if (request - BlastWebApp._last_request) < BlastWebApp._request_delay:
|
|
427
|
+
self.violate_rule("Too frequent BLAST requests")
|
|
428
|
+
BlastWebApp._last_request = request
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for multiple sequence alignments using Clustal-Omega.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.application.clustalo"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .app import *
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.clustalo"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["ClustalOmegaApp"]
|
|
8
|
+
|
|
9
|
+
from tempfile import NamedTemporaryFile
|
|
10
|
+
import numpy as np
|
|
11
|
+
from biotite.application.application import AppState, requires_state
|
|
12
|
+
from biotite.application.localapp import cleanup_tempfile
|
|
13
|
+
from biotite.application.msaapp import MSAApp
|
|
14
|
+
from biotite.sequence.phylo.tree import Tree
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ClustalOmegaApp(MSAApp):
|
|
18
|
+
"""
|
|
19
|
+
Perform a multiple sequence alignment using Clustal-Omega.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
sequences : list of ProteinSequence or NucleotideSequence
|
|
24
|
+
The sequences to be aligned.
|
|
25
|
+
bin_path : str, optional
|
|
26
|
+
Path of the Custal-Omega binary.
|
|
27
|
+
matrix : None
|
|
28
|
+
This parameter is used for compatibility reasons and is ignored.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
|
|
33
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
34
|
+
>>> seq2 = ProteinSequence("TITANITE")
|
|
35
|
+
>>> seq3 = ProteinSequence("BISMITE")
|
|
36
|
+
>>> seq4 = ProteinSequence("IQLITE")
|
|
37
|
+
>>> app = ClustalOmegaApp([seq1, seq2, seq3, seq4])
|
|
38
|
+
>>> app.start()
|
|
39
|
+
>>> app.join()
|
|
40
|
+
>>> alignment = app.get_alignment()
|
|
41
|
+
>>> print(alignment)
|
|
42
|
+
-BIQTITE
|
|
43
|
+
TITANITE
|
|
44
|
+
-BISMITE
|
|
45
|
+
--IQLITE
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, sequences, bin_path="clustalo", matrix=None):
|
|
49
|
+
super().__init__(sequences, bin_path, None)
|
|
50
|
+
self._seq_count = len(sequences)
|
|
51
|
+
self._mbed = True
|
|
52
|
+
self._dist_matrix = None
|
|
53
|
+
self._tree = None
|
|
54
|
+
self._in_dist_matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
|
|
55
|
+
self._out_dist_matrix_file = NamedTemporaryFile(
|
|
56
|
+
"r", suffix=".mat", delete=False
|
|
57
|
+
)
|
|
58
|
+
self._in_tree_file = NamedTemporaryFile("w", suffix=".tree", delete=False)
|
|
59
|
+
self._out_tree_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
|
|
60
|
+
|
|
61
|
+
def run(self):
|
|
62
|
+
args = [
|
|
63
|
+
"--in",
|
|
64
|
+
self.get_input_file_path(),
|
|
65
|
+
"--out",
|
|
66
|
+
self.get_output_file_path(),
|
|
67
|
+
# The temporary files are already created
|
|
68
|
+
# -> tell Clustal to overwrite these empty files
|
|
69
|
+
"--force",
|
|
70
|
+
# Tree order for get_alignment_order() to work properly
|
|
71
|
+
"--output-order=tree-order",
|
|
72
|
+
]
|
|
73
|
+
if self.get_seqtype() == "protein":
|
|
74
|
+
args += ["--seqtype", "Protein"]
|
|
75
|
+
else:
|
|
76
|
+
args += ["--seqtype", "DNA"]
|
|
77
|
+
if self._tree is None:
|
|
78
|
+
# ClustalOmega does not like when a tree is set
|
|
79
|
+
# as input and output#
|
|
80
|
+
# -> Only request tree output when not tree is input
|
|
81
|
+
args += [
|
|
82
|
+
"--guidetree-out",
|
|
83
|
+
self._out_tree_file.name,
|
|
84
|
+
]
|
|
85
|
+
if not self._mbed:
|
|
86
|
+
args += ["--full", "--distmat-out", self._out_dist_matrix_file.name]
|
|
87
|
+
if self._dist_matrix is not None:
|
|
88
|
+
# Add the sequence names (0, 1, 2, 3 ...) as first column
|
|
89
|
+
dist_matrix_with_index = np.concatenate(
|
|
90
|
+
(np.arange(self._seq_count)[:, np.newaxis], self._dist_matrix), axis=1
|
|
91
|
+
)
|
|
92
|
+
np.savetxt(
|
|
93
|
+
self._in_dist_matrix_file.name,
|
|
94
|
+
dist_matrix_with_index,
|
|
95
|
+
# The first line contains the amount of sequences
|
|
96
|
+
comments="",
|
|
97
|
+
header=str(self._seq_count),
|
|
98
|
+
# The sequence indices are integers, the rest are floats
|
|
99
|
+
fmt=["%d"] + ["%.5f"] * self._seq_count,
|
|
100
|
+
)
|
|
101
|
+
args += ["--distmat-in", self._in_dist_matrix_file.name]
|
|
102
|
+
if self._tree is not None:
|
|
103
|
+
self._in_tree_file.write(str(self._tree))
|
|
104
|
+
self._in_tree_file.flush()
|
|
105
|
+
args += ["--guidetree-in", self._in_tree_file.name]
|
|
106
|
+
self.set_arguments(args)
|
|
107
|
+
super().run()
|
|
108
|
+
|
|
109
|
+
def evaluate(self):
|
|
110
|
+
super().evaluate()
|
|
111
|
+
if not self._mbed:
|
|
112
|
+
self._dist_matrix = np.loadtxt(
|
|
113
|
+
self._out_dist_matrix_file.name,
|
|
114
|
+
# The first row only contains the number of sequences
|
|
115
|
+
skiprows=1,
|
|
116
|
+
dtype=float,
|
|
117
|
+
)
|
|
118
|
+
# The first column contains only the name of the
|
|
119
|
+
# sequences, in this case 0, 1, 2, 3 ...
|
|
120
|
+
# -> Omit the first column
|
|
121
|
+
self._dist_matrix = self._dist_matrix[:, 1:]
|
|
122
|
+
# Only read output tree if no tree was input
|
|
123
|
+
if self._tree is None:
|
|
124
|
+
self._tree = Tree.from_newick(self._out_tree_file.read().replace("\n", ""))
|
|
125
|
+
|
|
126
|
+
def clean_up(self):
|
|
127
|
+
super().clean_up()
|
|
128
|
+
cleanup_tempfile(self._in_dist_matrix_file)
|
|
129
|
+
cleanup_tempfile(self._out_dist_matrix_file)
|
|
130
|
+
cleanup_tempfile(self._in_tree_file)
|
|
131
|
+
cleanup_tempfile(self._out_tree_file)
|
|
132
|
+
|
|
133
|
+
@requires_state(AppState.CREATED)
|
|
134
|
+
def full_matrix_calculation(self):
|
|
135
|
+
"""
|
|
136
|
+
Use full distance matrix for guide-tree calculation, equivalent
|
|
137
|
+
to the ``--full`` option.
|
|
138
|
+
|
|
139
|
+
This makes the distance matrix calculation slower than using the
|
|
140
|
+
default *mBed* heuristic.
|
|
141
|
+
"""
|
|
142
|
+
self._mbed = False
|
|
143
|
+
|
|
144
|
+
@requires_state(AppState.CREATED)
|
|
145
|
+
def set_distance_matrix(self, matrix):
|
|
146
|
+
"""
|
|
147
|
+
Set the pairwise sequence distances, the program should use to
|
|
148
|
+
calculate the guide tree.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
matrix : ndarray, shape=(n,n), dtype=float
|
|
153
|
+
The pairwise distances.
|
|
154
|
+
"""
|
|
155
|
+
if matrix.shape != (self._seq_count, self._seq_count):
|
|
156
|
+
raise ValueError(
|
|
157
|
+
f"Matrix with shape {matrix.shape} is not sufficient for "
|
|
158
|
+
f"{self._seq_count} sequences"
|
|
159
|
+
)
|
|
160
|
+
self._dist_matrix = matrix.astype(float, copy=False)
|
|
161
|
+
|
|
162
|
+
@requires_state(AppState.JOINED)
|
|
163
|
+
def get_distance_matrix(self):
|
|
164
|
+
"""
|
|
165
|
+
Get the pairwise sequence distances the program used to
|
|
166
|
+
calculate the guide tree.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
matrix : ndarray, shape=(n,n), dtype=float
|
|
171
|
+
The pairwise distances.
|
|
172
|
+
"""
|
|
173
|
+
if self._mbed:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
"Getting the distance matrix requires 'full_matrix_calculation()'"
|
|
176
|
+
)
|
|
177
|
+
return self._dist_matrix
|
|
178
|
+
|
|
179
|
+
@requires_state(AppState.CREATED)
|
|
180
|
+
def set_guide_tree(self, tree):
|
|
181
|
+
"""
|
|
182
|
+
Set the guide tree, the program should use for the
|
|
183
|
+
progressive alignment.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
tree : Tree
|
|
188
|
+
The guide tree.
|
|
189
|
+
"""
|
|
190
|
+
if self._seq_count != len(tree):
|
|
191
|
+
raise ValueError(
|
|
192
|
+
f"Tree with {len(tree)} leaves is not sufficient for "
|
|
193
|
+
"{self._seq_count} sequences, must be equal"
|
|
194
|
+
)
|
|
195
|
+
self._tree = tree
|
|
196
|
+
|
|
197
|
+
@requires_state(AppState.JOINED)
|
|
198
|
+
def get_guide_tree(self):
|
|
199
|
+
"""
|
|
200
|
+
Get the guide tree created for the progressive alignment.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
tree : Tree
|
|
205
|
+
The guide tree.
|
|
206
|
+
"""
|
|
207
|
+
return self._tree
|
|
208
|
+
|
|
209
|
+
@staticmethod
|
|
210
|
+
def supports_nucleotide():
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def supports_protein():
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
@staticmethod
|
|
218
|
+
def supports_custom_nucleotide_matrix():
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
@staticmethod
|
|
222
|
+
def supports_custom_protein_matrix():
|
|
223
|
+
return False
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for protein secondary structure annotation using DSSP.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.application.dssp"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .app import *
|