biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.dssp"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["DsspApp"]
|
|
8
|
+
|
|
9
|
+
from subprocess import SubprocessError
|
|
10
|
+
from tempfile import NamedTemporaryFile
|
|
11
|
+
import numpy as np
|
|
12
|
+
from biotite.application.application import AppState, requires_state
|
|
13
|
+
from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
|
|
14
|
+
from biotite.structure.error import BadStructureError
|
|
15
|
+
from biotite.structure.filter import filter_amino_acids
|
|
16
|
+
from biotite.structure.io.pdbx.cif import CIFCategory, CIFColumn, CIFFile
|
|
17
|
+
from biotite.structure.io.pdbx.component import MaskValue
|
|
18
|
+
from biotite.structure.io.pdbx.convert import set_structure
|
|
19
|
+
from biotite.structure.repair import create_continuous_res_ids
|
|
20
|
+
from biotite.structure.residues import get_residue_starts
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DsspApp(LocalApp):
|
|
24
|
+
r"""
|
|
25
|
+
Annotate the secondary structure of a protein structure using the
|
|
26
|
+
*DSSP* software.
|
|
27
|
+
|
|
28
|
+
Internally this creates a :class:`Popen` instance, which handles
|
|
29
|
+
the execution.
|
|
30
|
+
|
|
31
|
+
DSSP differentiates between 8 different types of secondary
|
|
32
|
+
structure elements:
|
|
33
|
+
|
|
34
|
+
- C: loop, coil or irregular
|
|
35
|
+
- H: :math:`{\alpha}`-helix
|
|
36
|
+
- B: :math:`{\beta}`-bridge
|
|
37
|
+
- E: extended strand, participation in :math:`{\beta}`-ladder
|
|
38
|
+
- G: 3 :sub:`10`-helix
|
|
39
|
+
- I: :math:`{\pi}`-helix
|
|
40
|
+
- T: hydrogen bonded turn
|
|
41
|
+
- S: bend
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
atom_array : AtomArray
|
|
46
|
+
The atom array to be annotated.
|
|
47
|
+
bin_path : str, optional
|
|
48
|
+
Path of the *DDSP* binary.
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
|
|
53
|
+
>>> app = DsspApp(atom_array)
|
|
54
|
+
>>> app.start()
|
|
55
|
+
>>> app.join()
|
|
56
|
+
>>> print(app.get_sse())
|
|
57
|
+
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'P' 'P'
|
|
58
|
+
'P' 'C']
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, atom_array, bin_path="mkdssp"):
|
|
62
|
+
super().__init__(bin_path)
|
|
63
|
+
|
|
64
|
+
if not np.all(filter_amino_acids(atom_array)):
|
|
65
|
+
raise BadStructureError("The input structure must contain only amino acids")
|
|
66
|
+
self._array = atom_array.copy()
|
|
67
|
+
# DSSP requires also the
|
|
68
|
+
# 'occupancy', 'b_factor' and 'charge' fields
|
|
69
|
+
# -> Add these placeholder values
|
|
70
|
+
categories = self._array.get_annotation_categories()
|
|
71
|
+
if "charge" not in categories:
|
|
72
|
+
self._array.set_annotation(
|
|
73
|
+
"charge", np.zeros(self._array.array_length(), dtype=int)
|
|
74
|
+
)
|
|
75
|
+
if "b_factor" not in categories:
|
|
76
|
+
self._array.set_annotation(
|
|
77
|
+
"b_factor", np.zeros(self._array.array_length(), dtype=float)
|
|
78
|
+
)
|
|
79
|
+
if "occupancy" not in categories:
|
|
80
|
+
self._array.set_annotation(
|
|
81
|
+
"occupancy", np.ones(self._array.array_length(), dtype=float)
|
|
82
|
+
)
|
|
83
|
+
# DSSP>=4 complains about the `pdbx_poly_seq_scheme` category,
|
|
84
|
+
# if `seq_id` does not start at 1
|
|
85
|
+
self._array.res_id = create_continuous_res_ids(self._array)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
# The parameters have changed in version 4
|
|
89
|
+
self._new_cli = get_version(bin_path)[0] >= 4
|
|
90
|
+
except SubprocessError:
|
|
91
|
+
# In older versions, the no version is returned with `--version`
|
|
92
|
+
# -> a SubprocessError is raised
|
|
93
|
+
self._new_cli = False
|
|
94
|
+
self._in_file = NamedTemporaryFile("w", suffix=".cif", delete=False)
|
|
95
|
+
self._out_file = NamedTemporaryFile("r", suffix=".dssp", delete=False)
|
|
96
|
+
|
|
97
|
+
def run(self):
|
|
98
|
+
in_file = CIFFile()
|
|
99
|
+
set_structure(in_file, self._array)
|
|
100
|
+
in_file.block["pdbx_poly_seq_scheme"] = _create_pdbx_poly_seq_scheme(
|
|
101
|
+
self._array, in_file.block["atom_site"]["label_entity_id"].as_array(str)
|
|
102
|
+
)
|
|
103
|
+
in_file.write(self._in_file)
|
|
104
|
+
self._in_file.flush()
|
|
105
|
+
if self._new_cli:
|
|
106
|
+
self.set_arguments([self._in_file.name, self._out_file.name])
|
|
107
|
+
else:
|
|
108
|
+
self.set_arguments(["-i", self._in_file.name, "-o", self._out_file.name])
|
|
109
|
+
super().run()
|
|
110
|
+
|
|
111
|
+
def evaluate(self):
|
|
112
|
+
super().evaluate()
|
|
113
|
+
lines = self._out_file.read().split("\n")
|
|
114
|
+
# Index where SSE records start
|
|
115
|
+
sse_start = None
|
|
116
|
+
for i, line in enumerate(lines):
|
|
117
|
+
if line.startswith(" # RESIDUE AA STRUCTURE"):
|
|
118
|
+
sse_start = i + 1
|
|
119
|
+
if sse_start is None:
|
|
120
|
+
raise ValueError("DSSP file does not contain SSE records")
|
|
121
|
+
# Remove "!" for missing residues
|
|
122
|
+
lines = [
|
|
123
|
+
line for line in lines[sse_start:] if len(line) != 0 and line[13] != "!"
|
|
124
|
+
]
|
|
125
|
+
self._sse = np.zeros(len(lines), dtype="U1")
|
|
126
|
+
# Parse file for SSE letters
|
|
127
|
+
for i, line in enumerate(lines):
|
|
128
|
+
self._sse[i] = line[16]
|
|
129
|
+
self._sse[self._sse == " "] = "C"
|
|
130
|
+
|
|
131
|
+
def clean_up(self):
|
|
132
|
+
super().clean_up()
|
|
133
|
+
cleanup_tempfile(self._in_file)
|
|
134
|
+
cleanup_tempfile(self._out_file)
|
|
135
|
+
|
|
136
|
+
@requires_state(AppState.JOINED)
|
|
137
|
+
def get_sse(self):
|
|
138
|
+
"""
|
|
139
|
+
Get the resulting secondary structure assignment.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
sse : ndarray, dtype="U1"
|
|
144
|
+
An array containing DSSP secondary structure symbols
|
|
145
|
+
corresponding to the residues in the input atom array.
|
|
146
|
+
"""
|
|
147
|
+
return self._sse
|
|
148
|
+
|
|
149
|
+
@staticmethod
|
|
150
|
+
def annotate_sse(atom_array, bin_path="mkdssp"):
|
|
151
|
+
"""
|
|
152
|
+
Perform a secondary structure assignment to an atom array.
|
|
153
|
+
|
|
154
|
+
This is a convenience function, that wraps the :class:`DsspApp`
|
|
155
|
+
execution.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
atom_array : AtomArray
|
|
160
|
+
The atom array to be annotated.
|
|
161
|
+
bin_path : str, optional
|
|
162
|
+
Path of the DDSP binary.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
sse : ndarray, dtype="U1"
|
|
167
|
+
An array containing DSSP secondary structure symbols
|
|
168
|
+
corresponding to the residues in the input atom array.
|
|
169
|
+
"""
|
|
170
|
+
app = DsspApp(atom_array, bin_path)
|
|
171
|
+
app.start()
|
|
172
|
+
app.join()
|
|
173
|
+
return app.get_sse()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _create_pdbx_poly_seq_scheme(atom_array, entity_ids):
|
|
177
|
+
"""
|
|
178
|
+
Create the ``pdbx_poly_seq_scheme`` category, as required by DSSP.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
atom_array : AtomArray
|
|
183
|
+
The atom array to create the category from.
|
|
184
|
+
entity_ids : ndarray, dtype=str
|
|
185
|
+
The entity IDs for each atoms.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
pdbx_poly_seq_scheme : CIFCategory
|
|
190
|
+
The ``pdbx_poly_seq_scheme`` category.
|
|
191
|
+
"""
|
|
192
|
+
res_start_indices = get_residue_starts(atom_array)
|
|
193
|
+
chain_id = atom_array.chain_id[res_start_indices]
|
|
194
|
+
res_name = atom_array.res_name[res_start_indices]
|
|
195
|
+
res_id = atom_array.res_id[res_start_indices]
|
|
196
|
+
ins_code = atom_array.ins_code[res_start_indices]
|
|
197
|
+
hetero = atom_array.hetero[res_start_indices]
|
|
198
|
+
entity_id = entity_ids[res_start_indices]
|
|
199
|
+
|
|
200
|
+
poly_seq_scheme = CIFCategory()
|
|
201
|
+
poly_seq_scheme["asym_id"] = chain_id
|
|
202
|
+
poly_seq_scheme["entity_id"] = entity_id
|
|
203
|
+
poly_seq_scheme["seq_id"] = res_id
|
|
204
|
+
poly_seq_scheme["mon_id"] = res_name
|
|
205
|
+
poly_seq_scheme["ndb_seq_num"] = res_id
|
|
206
|
+
poly_seq_scheme["pdb_seq_num"] = res_id
|
|
207
|
+
poly_seq_scheme["auth_seq_num"] = res_id
|
|
208
|
+
poly_seq_scheme["pdb_mon_id"] = res_name
|
|
209
|
+
poly_seq_scheme["auth_mon_id"] = res_name
|
|
210
|
+
poly_seq_scheme["pdb_strand_id"] = chain_id
|
|
211
|
+
poly_seq_scheme["pdb_ins_code"] = CIFColumn(
|
|
212
|
+
ins_code, np.where(ins_code == "", MaskValue.MISSING, MaskValue.PRESENT)
|
|
213
|
+
)
|
|
214
|
+
poly_seq_scheme["hetero"] = np.where(hetero, "y", "n")
|
|
215
|
+
|
|
216
|
+
return poly_seq_scheme
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["LocalApp"]
|
|
8
|
+
|
|
9
|
+
import abc
|
|
10
|
+
import copy
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
13
|
+
from os import chdir, getcwd, remove
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
|
|
16
|
+
from biotite.application.application import (
|
|
17
|
+
Application,
|
|
18
|
+
AppState,
|
|
19
|
+
AppStateError,
|
|
20
|
+
requires_state,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LocalApp(Application, metaclass=abc.ABCMeta):
|
|
25
|
+
"""
|
|
26
|
+
The base class for all locally installed applications, that are used
|
|
27
|
+
via the command line.
|
|
28
|
+
|
|
29
|
+
Internally this creates a :class:`Popen` instance, which handles
|
|
30
|
+
the execution.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
bin_path : str
|
|
35
|
+
Path of the application represented by this class.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, bin_path):
|
|
39
|
+
super().__init__()
|
|
40
|
+
self._bin_path = bin_path
|
|
41
|
+
self._arguments = []
|
|
42
|
+
self._options = []
|
|
43
|
+
self._exec_dir = getcwd()
|
|
44
|
+
self._process = None
|
|
45
|
+
self._command = None
|
|
46
|
+
self._stdin_file = None
|
|
47
|
+
|
|
48
|
+
@requires_state(AppState.CREATED)
|
|
49
|
+
def set_arguments(self, arguments):
|
|
50
|
+
"""
|
|
51
|
+
Set command line arguments for the application run.
|
|
52
|
+
|
|
53
|
+
PROTECTED: Do not call from outside.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
arguments : list of str
|
|
58
|
+
A list of strings representing the command line options.
|
|
59
|
+
"""
|
|
60
|
+
self._arguments = copy.copy(arguments)
|
|
61
|
+
|
|
62
|
+
@requires_state(AppState.CREATED)
|
|
63
|
+
def set_stdin(self, file):
|
|
64
|
+
"""
|
|
65
|
+
Set a file as standard input for the application run.
|
|
66
|
+
|
|
67
|
+
PROTECTED: Do not call from outside.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
file : file object
|
|
72
|
+
The file for the standard input.
|
|
73
|
+
Must have a valid file descriptor, e.g. file-like objects
|
|
74
|
+
such as `StringIO` are invalid.
|
|
75
|
+
"""
|
|
76
|
+
self._stdin_file = file
|
|
77
|
+
|
|
78
|
+
@requires_state(AppState.CREATED)
|
|
79
|
+
def add_additional_options(self, options):
|
|
80
|
+
"""
|
|
81
|
+
Add additional options for the command line program.
|
|
82
|
+
These options are put before the arguments automatically
|
|
83
|
+
determined by the respective :class:`LocalApp` subclass.
|
|
84
|
+
|
|
85
|
+
This method is focused on advanced users, who have knowledge on
|
|
86
|
+
the available options of the command line program and the
|
|
87
|
+
options already used by the :class:`LocalApp` subclasses.
|
|
88
|
+
Ignoring the already used options may result in conflicting
|
|
89
|
+
CLI arguments and potential unexpected results.
|
|
90
|
+
It is recommended to use this method only, when the respective
|
|
91
|
+
:class:`LocalApp` subclass does not provide a method to set the
|
|
92
|
+
desired option.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
options : list of str
|
|
97
|
+
A list of strings representing the command line options.
|
|
98
|
+
|
|
99
|
+
Notes
|
|
100
|
+
-----
|
|
101
|
+
In order to see which options the command line execution used,
|
|
102
|
+
try the :meth:`get_command()` method.
|
|
103
|
+
|
|
104
|
+
Examples
|
|
105
|
+
--------
|
|
106
|
+
|
|
107
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
108
|
+
>>> seq2 = ProteinSequence("TITANITE")
|
|
109
|
+
>>> seq3 = ProteinSequence("BISMITE")
|
|
110
|
+
>>> seq4 = ProteinSequence("IQLITE")
|
|
111
|
+
>>> # Run application without additional arguments
|
|
112
|
+
>>> app = ClustalOmegaApp([seq1, seq2, seq3, seq4])
|
|
113
|
+
>>> app.start()
|
|
114
|
+
>>> app.join()
|
|
115
|
+
>>> print(app.get_command())
|
|
116
|
+
clustalo --in ...fa --out ...fa --force --output-order=tree-order --seqtype Protein --guidetree-out ...tree
|
|
117
|
+
>>> # Run application with additional argument
|
|
118
|
+
>>> app = ClustalOmegaApp([seq1, seq2, seq3, seq4])
|
|
119
|
+
>>> app.add_additional_options(["--full"])
|
|
120
|
+
>>> app.start()
|
|
121
|
+
>>> app.join()
|
|
122
|
+
>>> print(app.get_command())
|
|
123
|
+
clustalo --full --in ...fa --out ...fa --force --output-order=tree-order --seqtype Protein --guidetree-out ...tree
|
|
124
|
+
"""
|
|
125
|
+
self._options += options
|
|
126
|
+
|
|
127
|
+
@requires_state(
|
|
128
|
+
AppState.RUNNING | AppState.CANCELLED | AppState.FINISHED | AppState.JOINED
|
|
129
|
+
)
|
|
130
|
+
def get_command(self):
|
|
131
|
+
"""
|
|
132
|
+
Get the executed command.
|
|
133
|
+
|
|
134
|
+
Cannot be called until the application has been started.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
command : str
|
|
139
|
+
The executed command.
|
|
140
|
+
|
|
141
|
+
Examples
|
|
142
|
+
--------
|
|
143
|
+
|
|
144
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
145
|
+
>>> seq2 = ProteinSequence("TITANITE")
|
|
146
|
+
>>> seq3 = ProteinSequence("BISMITE")
|
|
147
|
+
>>> seq4 = ProteinSequence("IQLITE")
|
|
148
|
+
>>> app = ClustalOmegaApp([seq1, seq2, seq3, seq4])
|
|
149
|
+
>>> app.start()
|
|
150
|
+
>>> print(app.get_command())
|
|
151
|
+
clustalo --in ...fa --out ...fa --force --output-order=tree-order --seqtype Protein --guidetree-out ...tree
|
|
152
|
+
"""
|
|
153
|
+
return " ".join(self._command)
|
|
154
|
+
|
|
155
|
+
@requires_state(AppState.CREATED)
|
|
156
|
+
def set_exec_dir(self, exec_dir):
|
|
157
|
+
"""
|
|
158
|
+
Set the directory where the application should be executed.
|
|
159
|
+
If not set, it will be executed in the working directory at the
|
|
160
|
+
time the application was created.
|
|
161
|
+
|
|
162
|
+
PROTECTED: Do not call from outside.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
exec_dir : str
|
|
167
|
+
The execution directory.
|
|
168
|
+
"""
|
|
169
|
+
self._exec_dir = exec_dir
|
|
170
|
+
|
|
171
|
+
@requires_state(AppState.RUNNING | AppState.FINISHED)
|
|
172
|
+
def get_process(self):
|
|
173
|
+
"""
|
|
174
|
+
Get the `Popen` instance.
|
|
175
|
+
|
|
176
|
+
PROTECTED: Do not call from outside.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
process : Popen
|
|
181
|
+
The `Popen` instance.
|
|
182
|
+
"""
|
|
183
|
+
return self._process
|
|
184
|
+
|
|
185
|
+
@requires_state(AppState.FINISHED | AppState.JOINED)
|
|
186
|
+
def get_exit_code(self):
|
|
187
|
+
"""
|
|
188
|
+
Get the exit code of the process.
|
|
189
|
+
|
|
190
|
+
PROTECTED: Do not call from outside.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
code : int
|
|
195
|
+
The exit code.
|
|
196
|
+
"""
|
|
197
|
+
return self._process.returncode
|
|
198
|
+
|
|
199
|
+
@requires_state(AppState.FINISHED | AppState.JOINED)
|
|
200
|
+
def get_stdout(self):
|
|
201
|
+
"""
|
|
202
|
+
Get the STDOUT pipe content of the process.
|
|
203
|
+
|
|
204
|
+
PROTECTED: Do not call from outside.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
stdout : str
|
|
209
|
+
The standard output.
|
|
210
|
+
"""
|
|
211
|
+
return self._stdout
|
|
212
|
+
|
|
213
|
+
@requires_state(AppState.FINISHED | AppState.JOINED)
|
|
214
|
+
def get_stderr(self):
|
|
215
|
+
"""
|
|
216
|
+
Get the STDERR pipe content of the process.
|
|
217
|
+
|
|
218
|
+
PROTECTED: Do not call from outside.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
stdout : str
|
|
223
|
+
The standard error.
|
|
224
|
+
"""
|
|
225
|
+
return self._stderr
|
|
226
|
+
|
|
227
|
+
def run(self):
|
|
228
|
+
cwd = getcwd()
|
|
229
|
+
chdir(self._exec_dir)
|
|
230
|
+
self._command = [self._bin_path] + self._options + self._arguments
|
|
231
|
+
self._process = Popen(
|
|
232
|
+
self._command,
|
|
233
|
+
stdin=self._stdin_file,
|
|
234
|
+
stdout=PIPE,
|
|
235
|
+
stderr=PIPE,
|
|
236
|
+
encoding="UTF-8",
|
|
237
|
+
)
|
|
238
|
+
chdir(cwd)
|
|
239
|
+
|
|
240
|
+
def is_finished(self):
|
|
241
|
+
code = self._process.poll()
|
|
242
|
+
if code is None:
|
|
243
|
+
return False
|
|
244
|
+
else:
|
|
245
|
+
self._stdout, self._stderr = self._process.communicate()
|
|
246
|
+
return True
|
|
247
|
+
|
|
248
|
+
@requires_state(AppState.RUNNING | AppState.FINISHED)
|
|
249
|
+
def join(self, timeout=None):
|
|
250
|
+
# Override method as repetitive calls of 'is_finished()'
|
|
251
|
+
# are not necessary as 'communicate()' already waits for the
|
|
252
|
+
# finished application
|
|
253
|
+
try:
|
|
254
|
+
self._stdout, self._stderr = self._process.communicate(timeout=timeout)
|
|
255
|
+
except TimeoutExpired:
|
|
256
|
+
self.cancel()
|
|
257
|
+
raise TimeoutError(f"The application expired its timeout ({timeout:.1f} s)")
|
|
258
|
+
self._state = AppState.FINISHED
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
self.evaluate()
|
|
262
|
+
except AppStateError:
|
|
263
|
+
raise
|
|
264
|
+
except:
|
|
265
|
+
self._state = AppState.CANCELLED
|
|
266
|
+
raise
|
|
267
|
+
else:
|
|
268
|
+
self._state = AppState.JOINED
|
|
269
|
+
self.clean_up()
|
|
270
|
+
|
|
271
|
+
def wait_interval(self):
|
|
272
|
+
# Not used in this implementation of 'join()'
|
|
273
|
+
raise NotImplementedError()
|
|
274
|
+
|
|
275
|
+
def evaluate(self):
|
|
276
|
+
super().evaluate()
|
|
277
|
+
# Check if applicaion terminated correctly
|
|
278
|
+
exit_code = self.get_exit_code()
|
|
279
|
+
if exit_code != 0:
|
|
280
|
+
err_msg = self.get_stderr().replace("\n", " ")
|
|
281
|
+
raise SubprocessError(
|
|
282
|
+
f"'{self._bin_path}' returned with exit code {exit_code}: {err_msg}"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def clean_up(self):
|
|
286
|
+
if self.get_app_state() == AppState.CANCELLED:
|
|
287
|
+
self._process.kill()
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def cleanup_tempfile(temp_file):
|
|
291
|
+
"""
|
|
292
|
+
Close a :class:`NamedTemporaryFile` and delete it manually,
|
|
293
|
+
if `delete` is set to ``False``.
|
|
294
|
+
This function is a small helper function intended for usage in
|
|
295
|
+
`LocalApp` subclasses.
|
|
296
|
+
|
|
297
|
+
The manual deletion is necessary, as Windows does not allow to open
|
|
298
|
+
a :class:`NamedTemporaryFile` as second time
|
|
299
|
+
(e.g. by the file name), if `delete` is set to ``True``.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
temp_file : NamedTemporaryFile
|
|
304
|
+
The temporary file to be closed and deleted.
|
|
305
|
+
"""
|
|
306
|
+
temp_file.close()
|
|
307
|
+
try:
|
|
308
|
+
remove(temp_file.name)
|
|
309
|
+
except FileNotFoundError:
|
|
310
|
+
# File was already deleted, e.g. due to `TemporaryFile(delete=True)`
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_version(bin_path, version_option="--version"):
|
|
315
|
+
"""
|
|
316
|
+
Get the version of a locally installed application.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
bin_path : str or Path
|
|
321
|
+
Path of the application.
|
|
322
|
+
version_option : str, optional
|
|
323
|
+
The command line option to get the version.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
major, minor : int
|
|
328
|
+
The major and minor version number.
|
|
329
|
+
"""
|
|
330
|
+
output = subprocess.run(
|
|
331
|
+
[bin_path, version_option], capture_output=True, text=True
|
|
332
|
+
).stdout
|
|
333
|
+
# Find matches for version string containing major and minor version
|
|
334
|
+
match = re.search(r"\d+\.\d+", output)
|
|
335
|
+
if match is None:
|
|
336
|
+
raise subprocess.SubprocessError(
|
|
337
|
+
f"Could not determine '{Path(bin_path).name}' version "
|
|
338
|
+
f"from the string '{output}'"
|
|
339
|
+
)
|
|
340
|
+
version_string = match.group(0)
|
|
341
|
+
splitted = version_string.split(".")
|
|
342
|
+
return int(splitted[0]), int(splitted[1])
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
A subpackage for multiple sequence alignments using MAFFT.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.application.mafft"
|
|
10
|
+
__author__ = "Patrick Kunzmann"
|
|
11
|
+
|
|
12
|
+
from .app import *
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.application.mafft"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["MafftApp"]
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
from biotite.application.application import AppState, requires_state
|
|
12
|
+
from biotite.application.msaapp import MSAApp
|
|
13
|
+
from biotite.sequence.phylo.tree import Tree
|
|
14
|
+
|
|
15
|
+
_prefix_pattern = re.compile(r"\d*_")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MafftApp(MSAApp):
|
|
19
|
+
"""
|
|
20
|
+
Perform a multiple sequence alignment using MAFFT.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
sequences : list of Sequence
|
|
25
|
+
The sequences to be aligned.
|
|
26
|
+
bin_path : str, optional
|
|
27
|
+
Path of the MUSCLE binary.
|
|
28
|
+
matrix : SubstitutionMatrix, optional
|
|
29
|
+
A custom substitution matrix.
|
|
30
|
+
|
|
31
|
+
Examples
|
|
32
|
+
--------
|
|
33
|
+
|
|
34
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
35
|
+
>>> seq2 = ProteinSequence("TITANITE")
|
|
36
|
+
>>> seq3 = ProteinSequence("BISMITE")
|
|
37
|
+
>>> seq4 = ProteinSequence("IQLITE")
|
|
38
|
+
>>> app = MafftApp([seq1, seq2, seq3, seq4])
|
|
39
|
+
>>> app.start()
|
|
40
|
+
>>> app.join()
|
|
41
|
+
>>> alignment = app.get_alignment()
|
|
42
|
+
>>> print(alignment)
|
|
43
|
+
-BIQTITE
|
|
44
|
+
TITANITE
|
|
45
|
+
-BISMITE
|
|
46
|
+
--IQLITE
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, sequences, bin_path="mafft", matrix=None):
|
|
50
|
+
super().__init__(sequences, bin_path, matrix)
|
|
51
|
+
self._tree = None
|
|
52
|
+
self._out_tree_file_name = self.get_input_file_path() + ".tree"
|
|
53
|
+
|
|
54
|
+
def run(self):
|
|
55
|
+
args = [
|
|
56
|
+
"--quiet",
|
|
57
|
+
"--auto",
|
|
58
|
+
"--treeout",
|
|
59
|
+
# Get the reordered alignment in order for
|
|
60
|
+
# get_alignment_order() to work properly
|
|
61
|
+
"--reorder",
|
|
62
|
+
]
|
|
63
|
+
if self.get_seqtype() == "protein":
|
|
64
|
+
args += ["--amino"]
|
|
65
|
+
else:
|
|
66
|
+
args += ["--nuc"]
|
|
67
|
+
if self.get_matrix_file_path() is not None:
|
|
68
|
+
args += ["--aamatrix", self.get_matrix_file_path()]
|
|
69
|
+
args += [self.get_input_file_path()]
|
|
70
|
+
self.set_arguments(args)
|
|
71
|
+
super().run()
|
|
72
|
+
|
|
73
|
+
def evaluate(self):
|
|
74
|
+
with open(self.get_output_file_path(), "w") as f:
|
|
75
|
+
# MAFFT outputs alignment to stdout
|
|
76
|
+
# -> write stdout to output file name
|
|
77
|
+
f.write(self.get_stdout())
|
|
78
|
+
super().evaluate()
|
|
79
|
+
with open(self._out_tree_file_name, "r") as file:
|
|
80
|
+
raw_newick = file.read().replace("\n", "")
|
|
81
|
+
# Mafft uses sequences label in the form '<n>_<seqname>'
|
|
82
|
+
# Only the <seqname> is required
|
|
83
|
+
# -> remove the '<n>_' prefix
|
|
84
|
+
newick = re.sub(_prefix_pattern, "", raw_newick)
|
|
85
|
+
self._tree = Tree.from_newick(newick)
|
|
86
|
+
|
|
87
|
+
def clean_up(self):
|
|
88
|
+
os.remove(self._out_tree_file_name)
|
|
89
|
+
|
|
90
|
+
@requires_state(AppState.JOINED)
|
|
91
|
+
def get_guide_tree(self):
|
|
92
|
+
"""
|
|
93
|
+
Get the guide tree created for the progressive alignment.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
tree : Tree
|
|
98
|
+
The guide tree.
|
|
99
|
+
"""
|
|
100
|
+
return self._tree
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def supports_nucleotide():
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def supports_protein():
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def supports_custom_nucleotide_matrix():
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def supports_custom_protein_matrix():
|
|
116
|
+
return True
|