biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.muscle"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["Muscle5App"]
|
|
8
|
+
|
|
9
|
+
from biotite.application.application import AppState, VersionError, requires_state
|
|
10
|
+
from biotite.application.localapp import get_version
|
|
11
|
+
from biotite.application.msaapp import MSAApp
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Muscle5App(MSAApp):
|
|
15
|
+
"""
|
|
16
|
+
Perform a multiple sequence alignment using MUSCLE version 5.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
sequences : list of Sequence
|
|
21
|
+
The sequences to be aligned.
|
|
22
|
+
bin_path : str, optional
|
|
23
|
+
Path of the MUSCLE binary.
|
|
24
|
+
|
|
25
|
+
See Also
|
|
26
|
+
--------
|
|
27
|
+
MuscleApp : Interface to MUSCLE version ``<5``.
|
|
28
|
+
|
|
29
|
+
Notes
|
|
30
|
+
-----
|
|
31
|
+
Alignment ensemble generation is not supported, yet.
|
|
32
|
+
|
|
33
|
+
Examples
|
|
34
|
+
--------
|
|
35
|
+
|
|
36
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
37
|
+
>>> seq2 = ProteinSequence("TITANITE")
|
|
38
|
+
>>> seq3 = ProteinSequence("BISMITE")
|
|
39
|
+
>>> seq4 = ProteinSequence("IQLITE")
|
|
40
|
+
>>> app = Muscle5App([seq1, seq2, seq3, seq4])
|
|
41
|
+
>>> app.start()
|
|
42
|
+
>>> app.join()
|
|
43
|
+
>>> alignment = app.get_alignment()
|
|
44
|
+
>>> print(alignment)
|
|
45
|
+
BI-QTITE
|
|
46
|
+
TITANITE
|
|
47
|
+
BI-SMITE
|
|
48
|
+
-I-QLITE
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, sequences, bin_path="muscle"):
|
|
52
|
+
major_version = get_version(bin_path, "-version")[0]
|
|
53
|
+
if major_version < 5:
|
|
54
|
+
raise VersionError(
|
|
55
|
+
f"At least Muscle 5 is required, got version {major_version}"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
super().__init__(sequences, bin_path)
|
|
59
|
+
self._mode = "align"
|
|
60
|
+
self._consiters = None
|
|
61
|
+
self._refineiters = None
|
|
62
|
+
self._n_threads = None
|
|
63
|
+
|
|
64
|
+
@requires_state(AppState.CREATED)
|
|
65
|
+
def set_iterations(self, consistency=None, refinement=None):
|
|
66
|
+
"""
|
|
67
|
+
Set the number of iterations for the alignment algorithm.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
consistency : int, optional
|
|
72
|
+
The number of consistency iterations.
|
|
73
|
+
refinement : int, optional
|
|
74
|
+
The number of refinement iterations.
|
|
75
|
+
"""
|
|
76
|
+
if consistency is not None:
|
|
77
|
+
self._consiters = consistency
|
|
78
|
+
if refinement is not None:
|
|
79
|
+
self._refineiters = refinement
|
|
80
|
+
|
|
81
|
+
@requires_state(AppState.CREATED)
|
|
82
|
+
def set_thread_number(self, number):
|
|
83
|
+
"""
|
|
84
|
+
Set the number of threads for the alignment run.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
number : int, optional
|
|
89
|
+
The number of threads.
|
|
90
|
+
"""
|
|
91
|
+
self._n_threads = number
|
|
92
|
+
|
|
93
|
+
@requires_state(AppState.CREATED)
|
|
94
|
+
def use_super5(self):
|
|
95
|
+
"""
|
|
96
|
+
Use the *Super5* algorithm for the alignment run.
|
|
97
|
+
"""
|
|
98
|
+
self._mode = "super5"
|
|
99
|
+
|
|
100
|
+
def run(self):
|
|
101
|
+
args = [
|
|
102
|
+
f"-{self._mode}",
|
|
103
|
+
self.get_input_file_path(),
|
|
104
|
+
"-output",
|
|
105
|
+
self.get_output_file_path(),
|
|
106
|
+
]
|
|
107
|
+
if self.get_seqtype() == "protein":
|
|
108
|
+
args += ["-amino"]
|
|
109
|
+
else:
|
|
110
|
+
args += ["-nt"]
|
|
111
|
+
if self._n_threads is not None:
|
|
112
|
+
args += ["-threads", str(self._n_threads)]
|
|
113
|
+
if self._consiters is not None:
|
|
114
|
+
args += ["-consiters", str(self._consiters)]
|
|
115
|
+
if self._refineiters is not None:
|
|
116
|
+
args += ["-refineiters", str(self._refineiters)]
|
|
117
|
+
self.set_arguments(args)
|
|
118
|
+
super().run()
|
|
119
|
+
|
|
120
|
+
def clean_up(self):
|
|
121
|
+
super().clean_up()
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def supports_nucleotide():
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def supports_protein():
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def supports_custom_nucleotide_matrix():
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def supports_custom_protein_matrix():
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def align(cls, sequences, bin_path="muscle"):
|
|
141
|
+
"""
|
|
142
|
+
Perform a multiple sequence alignment.
|
|
143
|
+
|
|
144
|
+
This is a convenience function, that wraps the :class:`Muscle5App`
|
|
145
|
+
execution.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
sequences : iterable object of Sequence
|
|
150
|
+
The sequences to be aligned.
|
|
151
|
+
bin_path : str, optional
|
|
152
|
+
Path of the MSA software binary. By default, the default path
|
|
153
|
+
will be used.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
alignment : Alignment
|
|
158
|
+
The global multiple sequence alignment.
|
|
159
|
+
"""
|
|
160
|
+
app = cls(sequences, bin_path)
|
|
161
|
+
app.start()
|
|
162
|
+
app.join()
|
|
163
|
+
return app.get_alignment()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for obtaining sequencing data from the *NCBI*
|
|
7
|
+
*sequence read archive* (SRA).
|
|
8
|
+
|
|
9
|
+
It comprises two central classes:
|
|
10
|
+
:class:`FastqDumpApp` downloads sequence reads in FASTQ format.
|
|
11
|
+
If only sequences (and no scores) are required :class:`FastaDumpApp`
|
|
12
|
+
writes sequence reads into FASTA format.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__name__ = "biotite.application.sra"
|
|
16
|
+
__author__ = "Patrick Kunzmann"
|
|
17
|
+
|
|
18
|
+
from .app import *
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.sra"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["FastaDumpApp", "FastqDumpApp"]
|
|
8
|
+
|
|
9
|
+
import abc
|
|
10
|
+
import glob
|
|
11
|
+
from os.path import join
|
|
12
|
+
from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
|
|
13
|
+
from tempfile import TemporaryDirectory
|
|
14
|
+
from biotite.application.application import (
|
|
15
|
+
Application,
|
|
16
|
+
AppState,
|
|
17
|
+
AppStateError,
|
|
18
|
+
requires_state,
|
|
19
|
+
)
|
|
20
|
+
from biotite.sequence.io.fasta.convert import get_sequences
|
|
21
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
22
|
+
from biotite.sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
|
|
23
|
+
from biotite.sequence.io.fastq.file import FastqFile
|
|
24
|
+
from biotite.sequence.seqtypes import NucleotideSequence
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Do not use LocalApp, as two programs are executed
|
|
28
|
+
class _DumpApp(Application, metaclass=abc.ABCMeta):
|
|
29
|
+
"""
|
|
30
|
+
Fetch sequencing data from the *NCBI sequence read archive*
|
|
31
|
+
(SRA) using *sra-tools*.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
uid : str
|
|
36
|
+
A *unique identifier* (UID) of the file to be downloaded.
|
|
37
|
+
output_path_prefix : str, optional
|
|
38
|
+
The prefix of the path to store the downloaded FASTQ file.
|
|
39
|
+
``.fastq`` is appended to this prefix if the run contains
|
|
40
|
+
a single read per spot.
|
|
41
|
+
``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
|
|
42
|
+
multiple reads per spot.
|
|
43
|
+
By default, the files are created in a temporary directory and
|
|
44
|
+
deleted after the files have been read.
|
|
45
|
+
prefetch_path, fasterq_dump_path : str, optional
|
|
46
|
+
Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
|
|
47
|
+
respectively.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
uid,
|
|
53
|
+
output_path_prefix=None,
|
|
54
|
+
prefetch_path="prefetch",
|
|
55
|
+
fasterq_dump_path="fasterq-dump",
|
|
56
|
+
):
|
|
57
|
+
super().__init__()
|
|
58
|
+
self._prefetch_path = prefetch_path
|
|
59
|
+
self._fasterq_dump_path = fasterq_dump_path
|
|
60
|
+
self._uid = uid
|
|
61
|
+
self._sra_dir = TemporaryDirectory(suffix="_sra")
|
|
62
|
+
if output_path_prefix is None:
|
|
63
|
+
self._prefix = join(self._sra_dir.name, self._uid)
|
|
64
|
+
else:
|
|
65
|
+
self._prefix = output_path_prefix
|
|
66
|
+
self._prefetch_process = None
|
|
67
|
+
self._fasterq_dump_process = None
|
|
68
|
+
|
|
69
|
+
@requires_state(AppState.RUNNING | AppState.FINISHED)
|
|
70
|
+
def join(self, timeout=None):
|
|
71
|
+
# Override method as repetitive calls of 'is_finished()'
|
|
72
|
+
# are not necessary as 'communicate()' already waits for the
|
|
73
|
+
# finished application
|
|
74
|
+
try:
|
|
75
|
+
_, self._stderr = self._process.communicate(timeout=timeout)
|
|
76
|
+
except TimeoutExpired:
|
|
77
|
+
self.cancel()
|
|
78
|
+
raise TimeoutError(f"The application expired its timeout ({timeout:.1f} s)")
|
|
79
|
+
self._state = AppState.FINISHED
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
self.evaluate()
|
|
83
|
+
except AppStateError:
|
|
84
|
+
raise
|
|
85
|
+
except:
|
|
86
|
+
self._state = AppState.CANCELLED
|
|
87
|
+
raise
|
|
88
|
+
else:
|
|
89
|
+
self._state = AppState.JOINED
|
|
90
|
+
self.clean_up()
|
|
91
|
+
|
|
92
|
+
def run(self):
|
|
93
|
+
# Prefetch into a temp directory with file name equaling UID
|
|
94
|
+
# This ensures that the ID in the header is not the temp prefix
|
|
95
|
+
sra_file_name = join(self._sra_dir.name, self._uid)
|
|
96
|
+
command = (
|
|
97
|
+
f"{self._prefetch_path} -q -O {self._sra_dir.name} "
|
|
98
|
+
f"{self.get_prefetch_options()} {self._uid}; "
|
|
99
|
+
f"{self._fasterq_dump_path} -q -o {self._prefix}.fastq "
|
|
100
|
+
f"{self.get_fastq_dump_options()} {sra_file_name}"
|
|
101
|
+
)
|
|
102
|
+
self._process = Popen(
|
|
103
|
+
command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def is_finished(self):
|
|
107
|
+
code = self._process.poll()
|
|
108
|
+
if code is None:
|
|
109
|
+
return False
|
|
110
|
+
else:
|
|
111
|
+
_, self._stderr = self._process.communicate()
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
def evaluate(self):
|
|
115
|
+
super().evaluate()
|
|
116
|
+
# Check if applicaion terminated correctly
|
|
117
|
+
exit_code = self._process.returncode
|
|
118
|
+
if exit_code != 0:
|
|
119
|
+
err_msg = self._stderr.replace("\n", " ")
|
|
120
|
+
raise SubprocessError(
|
|
121
|
+
f"'prefetch' or 'fasterq-dump' returned with exit code "
|
|
122
|
+
f"{exit_code}: {err_msg}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
self._file_names = (
|
|
126
|
+
# For entries with one read per spot
|
|
127
|
+
glob.glob(self._prefix + ".fastq")
|
|
128
|
+
+
|
|
129
|
+
# For entries with multiple reads per spot
|
|
130
|
+
glob.glob(self._prefix + "_*.fastq")
|
|
131
|
+
)
|
|
132
|
+
# Only load FASTQ files into memory when needed
|
|
133
|
+
self._fastq_files = None
|
|
134
|
+
|
|
135
|
+
def wait_interval(self):
|
|
136
|
+
# Not used in this implementation of 'join()'
|
|
137
|
+
raise NotImplementedError()
|
|
138
|
+
|
|
139
|
+
def clean_up(self):
|
|
140
|
+
if self.get_app_state() == AppState.CANCELLED:
|
|
141
|
+
self._process.kill()
|
|
142
|
+
# Directory with temp files does not need to be deleted,
|
|
143
|
+
# as temp dir is automatically deleted upon object destruction
|
|
144
|
+
|
|
145
|
+
@requires_state(AppState.CREATED)
|
|
146
|
+
def get_prefetch_options(self):
|
|
147
|
+
"""
|
|
148
|
+
Get additional options for the `prefetch` call.
|
|
149
|
+
|
|
150
|
+
PROTECTED: Override when inheriting.
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
options: str
|
|
155
|
+
The additional options.
|
|
156
|
+
"""
|
|
157
|
+
return ""
|
|
158
|
+
|
|
159
|
+
@requires_state(AppState.CREATED)
|
|
160
|
+
def get_fastq_dump_options(self):
|
|
161
|
+
"""
|
|
162
|
+
Get additional options for the `fasterq-dump` call.
|
|
163
|
+
|
|
164
|
+
PROTECTED: Override when inheriting.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
options: str
|
|
169
|
+
The additional options.
|
|
170
|
+
"""
|
|
171
|
+
return ""
|
|
172
|
+
|
|
173
|
+
@requires_state(AppState.JOINED)
|
|
174
|
+
def get_file_paths(self):
|
|
175
|
+
"""
|
|
176
|
+
Get the file paths to the downloaded files.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
paths : list of str
|
|
181
|
+
The file paths to the downloaded files.
|
|
182
|
+
"""
|
|
183
|
+
return self._file_names
|
|
184
|
+
|
|
185
|
+
@requires_state(AppState.JOINED)
|
|
186
|
+
@abc.abstractmethod
|
|
187
|
+
def get_sequences(self):
|
|
188
|
+
"""
|
|
189
|
+
Get the sequences from the downloaded file(s).
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
sequences : list of dict (str -> NucleotideSequence)
|
|
194
|
+
This list contains the reads for each spot:
|
|
195
|
+
The first item contains the first read for each spot, the
|
|
196
|
+
second item contains the second read for each spot (if existing),
|
|
197
|
+
etc.
|
|
198
|
+
Each item in the list is a dictionary mapping identifiers to its
|
|
199
|
+
corresponding sequence.
|
|
200
|
+
"""
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class FastqDumpApp(_DumpApp):
|
|
205
|
+
"""
|
|
206
|
+
Fetch sequencing data from the *NCBI sequence read archive*
|
|
207
|
+
(SRA) using *sra-tools*.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
uid : str
|
|
212
|
+
A *unique identifier* (UID) of the file to be downloaded.
|
|
213
|
+
output_path_prefix : str, optional
|
|
214
|
+
The prefix of the path to store the downloaded FASTQ file.
|
|
215
|
+
``.fastq`` is appended to this prefix if the run contains
|
|
216
|
+
a single read per spot.
|
|
217
|
+
``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
|
|
218
|
+
multiple reads per spot.
|
|
219
|
+
By default, the files are created in a temporary directory and
|
|
220
|
+
deleted after the files have been read.
|
|
221
|
+
prefetch_path, fasterq_dump_path : str, optional
|
|
222
|
+
Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
|
|
223
|
+
respectively.
|
|
224
|
+
offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
|
|
225
|
+
This value is subtracted from the FASTQ ASCII code to obtain the
|
|
226
|
+
quality score.
|
|
227
|
+
Can either be directly the value, or a string that indicates
|
|
228
|
+
the score format.
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
def __init__(
|
|
232
|
+
self,
|
|
233
|
+
uid,
|
|
234
|
+
output_path_prefix=None,
|
|
235
|
+
prefetch_path="prefetch",
|
|
236
|
+
fasterq_dump_path="fasterq-dump",
|
|
237
|
+
offset="Sanger",
|
|
238
|
+
):
|
|
239
|
+
super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
|
|
240
|
+
self._offset = offset
|
|
241
|
+
self._fastq_files = None
|
|
242
|
+
|
|
243
|
+
@requires_state(AppState.JOINED)
|
|
244
|
+
def get_fastq(self):
|
|
245
|
+
"""
|
|
246
|
+
Get the `FastqFile` objects from the downloaded file(s).
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
fastq_files : list of FastqFile
|
|
251
|
+
This list contains the reads for each spot:
|
|
252
|
+
The first item contains the first read for each spot, the
|
|
253
|
+
second item contains the second read for each spot (if existing),
|
|
254
|
+
etc.
|
|
255
|
+
"""
|
|
256
|
+
if self._fastq_files is None:
|
|
257
|
+
self._fastq_files = [
|
|
258
|
+
FastqFile.read(file_name, offset=self._offset)
|
|
259
|
+
for file_name in self.get_file_paths()
|
|
260
|
+
]
|
|
261
|
+
return self._fastq_files
|
|
262
|
+
|
|
263
|
+
@requires_state(AppState.JOINED)
|
|
264
|
+
def get_sequences(self):
|
|
265
|
+
return [
|
|
266
|
+
{
|
|
267
|
+
header: NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
|
|
268
|
+
for header, (seq_str, _) in fastq_file.items()
|
|
269
|
+
}
|
|
270
|
+
for fastq_file in self.get_fastq()
|
|
271
|
+
]
|
|
272
|
+
|
|
273
|
+
@requires_state(AppState.JOINED)
|
|
274
|
+
def get_sequences_and_scores(self):
|
|
275
|
+
"""
|
|
276
|
+
Get the sequences and score values from the downloaded file(s).
|
|
277
|
+
|
|
278
|
+
Returns
|
|
279
|
+
-------
|
|
280
|
+
sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
|
|
281
|
+
This list contains the reads for each spot:
|
|
282
|
+
The first item contains the first read for each spot, the
|
|
283
|
+
second item contains the second read for each spot (if existing),
|
|
284
|
+
etc.
|
|
285
|
+
Each item in the list is a dictionary mapping identifiers to its
|
|
286
|
+
corresponding sequence and score values.
|
|
287
|
+
"""
|
|
288
|
+
return [get_sequences_and_scores(fastq_file) for fastq_file in self.get_fastq()]
|
|
289
|
+
|
|
290
|
+
@classmethod
|
|
291
|
+
def fetch(
|
|
292
|
+
cls,
|
|
293
|
+
uid,
|
|
294
|
+
output_path_prefix=None,
|
|
295
|
+
prefetch_path="prefetch",
|
|
296
|
+
fasterq_dump_path="fasterq-dump",
|
|
297
|
+
offset="Sanger",
|
|
298
|
+
):
|
|
299
|
+
"""
|
|
300
|
+
Get the sequences belonging to the UID from the
|
|
301
|
+
*NCBI sequence read archive* (SRA).
|
|
302
|
+
|
|
303
|
+
Parameters
|
|
304
|
+
----------
|
|
305
|
+
uid : str
|
|
306
|
+
A *unique identifier* (UID) of the file to be downloaded.
|
|
307
|
+
output_path_prefix : str, optional
|
|
308
|
+
The prefix of the path to store the downloaded FASTQ file.
|
|
309
|
+
``.fastq`` is appended to this prefix if the run contains
|
|
310
|
+
a single read per spot.
|
|
311
|
+
``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
|
|
312
|
+
multiple reads per spot.
|
|
313
|
+
By default, the files are created in a temporary directory and
|
|
314
|
+
deleted after the files have been read.
|
|
315
|
+
prefetch_path, fasterq_dump_path : str, optional
|
|
316
|
+
Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
|
|
317
|
+
respectively.
|
|
318
|
+
offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
|
|
319
|
+
This value is subtracted from the FASTQ ASCII code to obtain the
|
|
320
|
+
quality score.
|
|
321
|
+
Can either be directly the value, or a string that indicates
|
|
322
|
+
the score format.
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
sequences : list of dict (str -> NucleotideSequence)
|
|
327
|
+
This list contains the reads for each spot:
|
|
328
|
+
The first item contains the first read for each spot, the
|
|
329
|
+
second item contains the second read for each spot (if existing),
|
|
330
|
+
etc.
|
|
331
|
+
Each item in the list is a dictionary mapping identifiers to its
|
|
332
|
+
corresponding sequence.
|
|
333
|
+
"""
|
|
334
|
+
app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset)
|
|
335
|
+
app.start()
|
|
336
|
+
app.join()
|
|
337
|
+
return app.get_sequences()
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class FastaDumpApp(_DumpApp):
|
|
341
|
+
"""
|
|
342
|
+
Fetch sequencing data from the *NCBI sequence read archive*
|
|
343
|
+
(SRA) using *sra-tools*.
|
|
344
|
+
|
|
345
|
+
Parameters
|
|
346
|
+
----------
|
|
347
|
+
uid : str
|
|
348
|
+
A *unique identifier* (UID) of the file to be downloaded.
|
|
349
|
+
output_path_prefix : str, optional
|
|
350
|
+
The prefix of the path to store the downloaded FASTQ file.
|
|
351
|
+
``.fastq`` is appended to this prefix if the run contains
|
|
352
|
+
a single read per spot.
|
|
353
|
+
``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
|
|
354
|
+
multiple reads per spot.
|
|
355
|
+
By default, the files are created in a temporary directory and
|
|
356
|
+
deleted after the files have been read.
|
|
357
|
+
prefetch_path, fasterq_dump_path : str, optional
|
|
358
|
+
Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
|
|
359
|
+
respectively.
|
|
360
|
+
"""
|
|
361
|
+
|
|
362
|
+
def __init__(
|
|
363
|
+
self,
|
|
364
|
+
uid,
|
|
365
|
+
output_path_prefix=None,
|
|
366
|
+
prefetch_path="prefetch",
|
|
367
|
+
fasterq_dump_path="fasterq-dump",
|
|
368
|
+
):
|
|
369
|
+
super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
|
|
370
|
+
self._fasta_files = None
|
|
371
|
+
|
|
372
|
+
@requires_state(AppState.CREATED)
|
|
373
|
+
def get_prefetch_options(self):
|
|
374
|
+
return
|
|
375
|
+
# TODO: Use '--eliminate-quals'
|
|
376
|
+
# when https://github.com/ncbi/sra-tools/issues/883 is resolved
|
|
377
|
+
# return "--eliminate-quals"
|
|
378
|
+
|
|
379
|
+
@requires_state(AppState.CREATED)
|
|
380
|
+
def get_fastq_dump_options(self):
|
|
381
|
+
return "--fasta"
|
|
382
|
+
|
|
383
|
+
@requires_state(AppState.JOINED)
|
|
384
|
+
def get_fasta(self):
|
|
385
|
+
"""
|
|
386
|
+
Get the `FastaFile` objects from the downloaded file(s).
|
|
387
|
+
|
|
388
|
+
Returns
|
|
389
|
+
-------
|
|
390
|
+
fasta_files : list of FastaFile
|
|
391
|
+
This list contains the reads for each spot:
|
|
392
|
+
The first item contains the first read for each spot, the
|
|
393
|
+
second item contains the second read for each spot (if existing),
|
|
394
|
+
etc.
|
|
395
|
+
"""
|
|
396
|
+
if self._fasta_files is None:
|
|
397
|
+
self._fasta_files = [
|
|
398
|
+
FastaFile.read(file_name) for file_name in self.get_file_paths()
|
|
399
|
+
]
|
|
400
|
+
return self._fasta_files
|
|
401
|
+
|
|
402
|
+
@requires_state(AppState.JOINED)
|
|
403
|
+
def get_sequences(self):
|
|
404
|
+
return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
|
|
405
|
+
|
|
406
|
+
@classmethod
|
|
407
|
+
def fetch(
|
|
408
|
+
cls,
|
|
409
|
+
uid,
|
|
410
|
+
output_path_prefix=None,
|
|
411
|
+
prefetch_path="prefetch",
|
|
412
|
+
fasterq_dump_path="fasterq-dump",
|
|
413
|
+
):
|
|
414
|
+
"""
|
|
415
|
+
Get the sequences belonging to the UID from the
|
|
416
|
+
*NCBI sequence read archive* (SRA).
|
|
417
|
+
|
|
418
|
+
Parameters
|
|
419
|
+
----------
|
|
420
|
+
uid : str
|
|
421
|
+
A *unique identifier* (UID) of the file to be downloaded.
|
|
422
|
+
output_path_prefix : str, optional
|
|
423
|
+
The prefix of the path to store the downloaded FASTQ file.
|
|
424
|
+
``.fastq`` is appended to this prefix if the run contains
|
|
425
|
+
a single read per spot.
|
|
426
|
+
``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
|
|
427
|
+
multiple reads per spot.
|
|
428
|
+
By default, the files are created in a temporary directory and
|
|
429
|
+
deleted after the files have been read.
|
|
430
|
+
prefetch_path, fasterq_dump_path : str, optional
|
|
431
|
+
Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
|
|
432
|
+
respectively.
|
|
433
|
+
|
|
434
|
+
Returns
|
|
435
|
+
-------
|
|
436
|
+
sequences : list of dict (str -> NucleotideSequence)
|
|
437
|
+
This list contains the reads for each spot:
|
|
438
|
+
The first item contains the first read for each spot, the
|
|
439
|
+
second item contains the second read for each spot (if existing),
|
|
440
|
+
etc.
|
|
441
|
+
Each item in the list is a dictionary mapping identifiers to its
|
|
442
|
+
corresponding sequence.
|
|
443
|
+
"""
|
|
444
|
+
app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
|
|
445
|
+
app.start()
|
|
446
|
+
app.join()
|
|
447
|
+
return app.get_sequences()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for masking sequence regions using the *tantan* software.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.application.tantan"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .app import *
|