biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-310-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-310-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-310-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-310-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-310-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -6,26 +6,26 @@ __name__ = "biotite.application.blast"
|
|
|
6
6
|
__author__ = "Patrick Kunzmann"
|
|
7
7
|
__all__ = ["BlastWebApp"]
|
|
8
8
|
|
|
9
|
-
from .alignment import BlastAlignment
|
|
10
|
-
from ..application import Application, requires_state, AppState
|
|
11
|
-
from ..webapp import WebApp, RuleViolationError
|
|
12
|
-
from ...sequence.sequence import Sequence
|
|
13
|
-
from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
14
|
-
from ...sequence.io.fasta.file import FastaFile
|
|
15
|
-
from ...sequence.io.fasta.convert import get_sequence
|
|
16
|
-
from ...sequence.align.alignment import Alignment
|
|
17
9
|
import time
|
|
18
|
-
import requests
|
|
19
10
|
from xml.etree import ElementTree
|
|
20
|
-
|
|
11
|
+
import requests
|
|
12
|
+
from biotite.application.application import AppState, requires_state
|
|
13
|
+
from biotite.application.blast.alignment import BlastAlignment
|
|
14
|
+
from biotite.application.webapp import WebApp
|
|
15
|
+
from biotite.sequence.align.alignment import Alignment
|
|
16
|
+
from biotite.sequence.io.fasta.convert import get_sequence
|
|
17
|
+
from biotite.sequence.io.fasta.file import FastaFile
|
|
18
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
19
|
+
from biotite.sequence.sequence import Sequence
|
|
21
20
|
|
|
22
21
|
_ncbi_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
|
|
23
22
|
|
|
23
|
+
|
|
24
24
|
class BlastWebApp(WebApp):
|
|
25
25
|
"""
|
|
26
26
|
Perform a local alignment against a large sequence database using
|
|
27
27
|
using the web-based BLAST application (by default NCBI BLAST).
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
Parameters
|
|
30
30
|
----------
|
|
31
31
|
program : str
|
|
@@ -35,7 +35,7 @@ class BlastWebApp(WebApp):
|
|
|
35
35
|
The query sequence. If a string is provided, it is interpreted
|
|
36
36
|
as path to a FASTA file, if the string contains a valid FASTA
|
|
37
37
|
file extension, otherwise it is interpreted as a single letter
|
|
38
|
-
string representation of a sequence.
|
|
38
|
+
string representation of a sequence.
|
|
39
39
|
database : str, optional
|
|
40
40
|
The NCBI sequence database to blast against. By default it
|
|
41
41
|
contains all sequences (`database`='nr'`).
|
|
@@ -52,68 +52,71 @@ class BlastWebApp(WebApp):
|
|
|
52
52
|
HTTP request. This allows the NCBI to contact you in case
|
|
53
53
|
your application sends too many requests.
|
|
54
54
|
"""
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
_last_contact = 0
|
|
57
57
|
_last_request = 0
|
|
58
58
|
_contact_delay = 3
|
|
59
59
|
_request_delay = 60
|
|
60
|
-
|
|
61
|
-
def __init__(
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
program,
|
|
64
|
+
query,
|
|
65
|
+
database="nr",
|
|
66
|
+
app_url=_ncbi_url,
|
|
67
|
+
obey_rules=True,
|
|
68
|
+
mail="padix.key@gmail.com",
|
|
69
|
+
):
|
|
64
70
|
super().__init__(app_url, obey_rules)
|
|
65
|
-
|
|
71
|
+
|
|
66
72
|
# 'megablast' is somehow not working
|
|
67
73
|
# When entering the corresponding HTTPS request into a browser
|
|
68
74
|
# you are redirected onto the blast mainpage
|
|
69
|
-
if program not in ["blastn", "blastp",
|
|
70
|
-
"blastx", "tblastn", "tblastx"]:
|
|
75
|
+
if program not in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]:
|
|
71
76
|
raise ValueError(f"'{program}' is not a valid BLAST program")
|
|
72
77
|
self._program = program
|
|
73
|
-
|
|
74
|
-
requires_protein =
|
|
75
|
-
if isinstance(query, str) and query.endswith((".fa",".fst",".fasta")):
|
|
78
|
+
|
|
79
|
+
requires_protein = program in ["blastp", "tblastn"]
|
|
80
|
+
if isinstance(query, str) and query.endswith((".fa", ".fst", ".fasta")):
|
|
76
81
|
# If string has a file extension, it is interpreted as
|
|
77
82
|
# FASTA file from which the sequence is taken
|
|
78
83
|
file = FastaFile.read(query)
|
|
79
84
|
# Get first entry in file and take the sequence
|
|
80
|
-
# (rather than header)
|
|
85
|
+
# (rather than header)
|
|
81
86
|
self._query = str(get_sequence(file))
|
|
82
87
|
elif isinstance(query, Sequence):
|
|
83
88
|
self._query = str(query)
|
|
84
89
|
else:
|
|
85
90
|
self._query = query
|
|
86
|
-
|
|
91
|
+
|
|
87
92
|
# Check for unsuitable symbols in query string
|
|
88
93
|
if requires_protein:
|
|
89
94
|
ref_alphabet = ProteinSequence.alphabet
|
|
90
95
|
else:
|
|
91
96
|
ref_alphabet = NucleotideSequence.alphabet_amb
|
|
92
97
|
for symbol in self._query:
|
|
93
|
-
if
|
|
94
|
-
raise ValueError(
|
|
95
|
-
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
+
if symbol.upper() not in ref_alphabet:
|
|
99
|
+
raise ValueError(f"Query sequence contains unsuitable symbol {symbol}")
|
|
100
|
+
|
|
98
101
|
self._database = database
|
|
99
|
-
|
|
102
|
+
|
|
100
103
|
self._gap_openining = None
|
|
101
104
|
self._gap_extension = None
|
|
102
105
|
self._word_size = None
|
|
103
|
-
|
|
106
|
+
|
|
104
107
|
self._expect_value = None
|
|
105
108
|
self._max_results = None
|
|
106
109
|
self._entrez_query = None
|
|
107
|
-
|
|
110
|
+
|
|
108
111
|
self._reward = None
|
|
109
112
|
self._penalty = None
|
|
110
|
-
|
|
113
|
+
|
|
111
114
|
self._matrix = None
|
|
112
115
|
self._threshold = None
|
|
113
|
-
|
|
114
|
-
self._mail=mail
|
|
116
|
+
|
|
117
|
+
self._mail = mail
|
|
115
118
|
self._rid = None
|
|
116
|
-
|
|
119
|
+
|
|
117
120
|
@requires_state(AppState.CREATED)
|
|
118
121
|
def set_entrez_query(self, query):
|
|
119
122
|
"""
|
|
@@ -126,7 +129,7 @@ class BlastWebApp(WebApp):
|
|
|
126
129
|
An NCBI Entrez query.
|
|
127
130
|
"""
|
|
128
131
|
self._entrez_query = str(query)
|
|
129
|
-
|
|
132
|
+
|
|
130
133
|
@requires_state(AppState.CREATED)
|
|
131
134
|
def set_max_results(self, number):
|
|
132
135
|
"""
|
|
@@ -138,30 +141,30 @@ class BlastWebApp(WebApp):
|
|
|
138
141
|
The maximum number of results.
|
|
139
142
|
"""
|
|
140
143
|
self._max_results = number
|
|
141
|
-
|
|
144
|
+
|
|
142
145
|
@requires_state(AppState.CREATED)
|
|
143
146
|
def set_max_expect_value(self, value):
|
|
144
147
|
"""
|
|
145
148
|
Set the threshold expectation value (E-value).
|
|
146
149
|
No alignments with an E-value above this threshold will be
|
|
147
150
|
considered.
|
|
148
|
-
|
|
151
|
+
|
|
149
152
|
The E-Value is the expectation value for the number of random
|
|
150
153
|
sequences of a similar sized database getting an equal or higher
|
|
151
154
|
score by change when aligned with the query sequence.
|
|
152
|
-
|
|
155
|
+
|
|
153
156
|
Parameters
|
|
154
157
|
----------
|
|
155
158
|
value : float
|
|
156
159
|
The threshold E-value.
|
|
157
160
|
"""
|
|
158
161
|
self._expect_value = value
|
|
159
|
-
|
|
162
|
+
|
|
160
163
|
@requires_state(AppState.CREATED)
|
|
161
164
|
def set_gap_penalty(self, opening, extension):
|
|
162
165
|
"""
|
|
163
166
|
Set the affine gap penalty for the alignment.
|
|
164
|
-
|
|
167
|
+
|
|
165
168
|
Parameters
|
|
166
169
|
----------
|
|
167
170
|
opening : float
|
|
@@ -171,75 +174,75 @@ class BlastWebApp(WebApp):
|
|
|
171
174
|
"""
|
|
172
175
|
self._gap_openining = opening
|
|
173
176
|
self._gap_extension = extension
|
|
174
|
-
|
|
177
|
+
|
|
175
178
|
@requires_state(AppState.CREATED)
|
|
176
179
|
def set_word_size(self, size):
|
|
177
180
|
"""
|
|
178
181
|
Set the word size for alignment seeds.
|
|
179
|
-
|
|
182
|
+
|
|
180
183
|
Parameters
|
|
181
184
|
----------
|
|
182
185
|
size : int
|
|
183
186
|
Word size.
|
|
184
187
|
"""
|
|
185
188
|
self._word_size = size
|
|
186
|
-
|
|
189
|
+
|
|
187
190
|
@requires_state(AppState.CREATED)
|
|
188
191
|
def set_match_reward(self, reward):
|
|
189
192
|
"""
|
|
190
193
|
Set the score of a symbol match in the alignment.
|
|
191
|
-
|
|
194
|
+
|
|
192
195
|
Used only in 'blastn' and 'megablast'.
|
|
193
|
-
|
|
196
|
+
|
|
194
197
|
Parameters
|
|
195
198
|
----------
|
|
196
199
|
reward : int
|
|
197
200
|
Match reward. Must be positive.
|
|
198
201
|
"""
|
|
199
202
|
self._reward = reward
|
|
200
|
-
|
|
203
|
+
|
|
201
204
|
@requires_state(AppState.CREATED)
|
|
202
205
|
def set_mismatch_penalty(self, penalty):
|
|
203
206
|
"""
|
|
204
207
|
Set the penalty of a symbol mismatch in the alignment.
|
|
205
|
-
|
|
208
|
+
|
|
206
209
|
Used only in 'blastn' and 'megablast'.
|
|
207
|
-
|
|
210
|
+
|
|
208
211
|
Parameters
|
|
209
212
|
----------
|
|
210
213
|
penalty : int
|
|
211
214
|
Mismatch penalty. Must be negative.
|
|
212
215
|
"""
|
|
213
216
|
self._penalty = penalty
|
|
214
|
-
|
|
217
|
+
|
|
215
218
|
@requires_state(AppState.CREATED)
|
|
216
219
|
def set_substitution_matrix(self, matrix_name):
|
|
217
220
|
"""
|
|
218
221
|
Set the penalty of a symbol mismatch in the alignment.
|
|
219
|
-
|
|
222
|
+
|
|
220
223
|
Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
|
|
221
|
-
|
|
224
|
+
|
|
222
225
|
Parameters
|
|
223
226
|
----------
|
|
224
227
|
matrix_name : str
|
|
225
228
|
Name of the substitution matrix. Default is 'BLOSUM62'.
|
|
226
229
|
"""
|
|
227
230
|
self._matrix = matrix_name.upper()
|
|
228
|
-
|
|
231
|
+
|
|
229
232
|
@requires_state(AppState.CREATED)
|
|
230
233
|
def set_threshold(self, threshold):
|
|
231
234
|
"""
|
|
232
235
|
Set the threshold neighboring score for initial words.
|
|
233
|
-
|
|
236
|
+
|
|
234
237
|
Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
|
|
235
|
-
|
|
238
|
+
|
|
236
239
|
Parameters
|
|
237
240
|
----------
|
|
238
241
|
threshold : int
|
|
239
242
|
Threshold value. Must be positve.
|
|
240
243
|
"""
|
|
241
244
|
self._threshold = threshold
|
|
242
|
-
|
|
245
|
+
|
|
243
246
|
def run(self):
|
|
244
247
|
param_dict = {}
|
|
245
248
|
param_dict["tool"] = "Biotite"
|
|
@@ -255,23 +258,24 @@ class BlastWebApp(WebApp):
|
|
|
255
258
|
if self._expect_value is not None:
|
|
256
259
|
param_dict["EXPECT"] = self._expect_value
|
|
257
260
|
if self._gap_openining is not None and self._gap_extension is not None:
|
|
258
|
-
param_dict["GAPCOSTS"] = "{:d} {:d}".format(
|
|
259
|
-
|
|
261
|
+
param_dict["GAPCOSTS"] = "{:d} {:d}".format(
|
|
262
|
+
self._gap_openining, self._gap_extension
|
|
263
|
+
)
|
|
260
264
|
if self._word_size is not None:
|
|
261
265
|
param_dict["WORD_SIZE"] = self._word_size
|
|
262
|
-
|
|
266
|
+
|
|
263
267
|
if self._program in ["blastn", "megablast"]:
|
|
264
268
|
if self._reward is not None:
|
|
265
269
|
param_dict["NUCL_REWARD"] = self._reward
|
|
266
270
|
if self._penalty is not None:
|
|
267
271
|
param_dict["NUCL_PENALTY"] = self._penalty
|
|
268
|
-
|
|
272
|
+
|
|
269
273
|
if self._program in ["blastp", "blastx", "tblastn", "tblastx"]:
|
|
270
274
|
if self._matrix is not None:
|
|
271
275
|
param_dict["MATRIX"] = self._matrix
|
|
272
276
|
if self._threshold is not None:
|
|
273
277
|
param_dict["THRESHOLD"] = self._threshold
|
|
274
|
-
|
|
278
|
+
|
|
275
279
|
request = requests.get(self.app_url(), params=param_dict)
|
|
276
280
|
if "Submitted URI too large" in request.text:
|
|
277
281
|
raise ValueError("The URI is too large, try a shorter sequence")
|
|
@@ -279,11 +283,9 @@ class BlastWebApp(WebApp):
|
|
|
279
283
|
self._request()
|
|
280
284
|
info_dict = BlastWebApp._get_info(request.text)
|
|
281
285
|
self._rid = info_dict["RID"]
|
|
282
|
-
|
|
286
|
+
|
|
283
287
|
def is_finished(self):
|
|
284
|
-
data_dict = {"FORMAT_OBJECT"
|
|
285
|
-
"RID" : self._rid,
|
|
286
|
-
"CMD" : "Get"}
|
|
288
|
+
data_dict = {"FORMAT_OBJECT": "SearchInfo", "RID": self._rid, "CMD": "Get"}
|
|
287
289
|
request = requests.get(self.app_url(), params=data_dict)
|
|
288
290
|
self._contact()
|
|
289
291
|
info_dict = BlastWebApp._get_info(request.text)
|
|
@@ -294,17 +296,17 @@ class BlastWebApp(WebApp):
|
|
|
294
296
|
"(Server responsed status 'UNKNOWN')"
|
|
295
297
|
)
|
|
296
298
|
return info_dict["Status"] == "READY"
|
|
297
|
-
|
|
299
|
+
|
|
298
300
|
def wait_interval(self):
|
|
299
301
|
# NCBI requires a 3 second delay between server contacts
|
|
300
302
|
return BlastWebApp._contact_delay
|
|
301
|
-
|
|
303
|
+
|
|
302
304
|
def clean_up(self):
|
|
303
305
|
param_dict = {}
|
|
304
306
|
param_dict["CMD"] = "Delete"
|
|
305
307
|
param_dict["RID"] = self._rid
|
|
306
|
-
|
|
307
|
-
|
|
308
|
+
requests.get(self.app_url(), params=param_dict)
|
|
309
|
+
|
|
308
310
|
def evaluate(self):
|
|
309
311
|
param_dict = {}
|
|
310
312
|
param_dict["tool"] = "BiotiteClient"
|
|
@@ -316,7 +318,7 @@ class BlastWebApp(WebApp):
|
|
|
316
318
|
param_dict["NCBI_GI"] = "T"
|
|
317
319
|
request = requests.get(self.app_url(), params=param_dict)
|
|
318
320
|
self._contact()
|
|
319
|
-
|
|
321
|
+
|
|
320
322
|
self._alignments = []
|
|
321
323
|
self._xml_response = request.text
|
|
322
324
|
root = ElementTree.fromstring(self._xml_response)
|
|
@@ -333,15 +335,14 @@ class BlastWebApp(WebApp):
|
|
|
333
335
|
query_end = int(hsp.find("Hsp_query-to").text)
|
|
334
336
|
hit_begin = int(hsp.find("Hsp_hit-from").text)
|
|
335
337
|
hit_end = int(hsp.find("Hsp_hit-to").text)
|
|
336
|
-
|
|
338
|
+
|
|
337
339
|
seq1_str = hsp.find("Hsp_qseq").text
|
|
338
340
|
seq2_str = hsp.find("Hsp_hseq").text
|
|
339
341
|
if self._program in ["blastn", "megablast"]:
|
|
340
342
|
# NucleotideSequence/ProteinSequence do ignore gaps
|
|
341
343
|
# Gaps are represented by the trace
|
|
342
344
|
seq1, seq2 = [
|
|
343
|
-
NucleotideSequence(s.replace("-", ""))
|
|
344
|
-
for s in (seq1_str, seq2_str)
|
|
345
|
+
NucleotideSequence(s.replace("-", "")) for s in (seq1_str, seq2_str)
|
|
345
346
|
]
|
|
346
347
|
else:
|
|
347
348
|
seq1, seq2 = [
|
|
@@ -349,18 +350,24 @@ class BlastWebApp(WebApp):
|
|
|
349
350
|
for s in (seq1_str, seq2_str)
|
|
350
351
|
]
|
|
351
352
|
trace = Alignment.trace_from_strings([seq1_str, seq2_str])
|
|
352
|
-
|
|
353
|
-
alignment = BlastAlignment(
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
353
|
+
|
|
354
|
+
alignment = BlastAlignment(
|
|
355
|
+
[seq1, seq2],
|
|
356
|
+
trace,
|
|
357
|
+
score,
|
|
358
|
+
e_value,
|
|
359
|
+
(query_begin, query_end),
|
|
360
|
+
(hit_begin, hit_end),
|
|
361
|
+
hit_id,
|
|
362
|
+
hit_definition,
|
|
363
|
+
)
|
|
357
364
|
self._alignments.append(alignment)
|
|
358
365
|
|
|
359
366
|
@requires_state(AppState.JOINED)
|
|
360
367
|
def get_xml_response(self):
|
|
361
368
|
"""
|
|
362
369
|
Get the raw XML response.
|
|
363
|
-
|
|
370
|
+
|
|
364
371
|
Returns
|
|
365
372
|
-------
|
|
366
373
|
response : str
|
|
@@ -372,14 +379,14 @@ class BlastWebApp(WebApp):
|
|
|
372
379
|
def get_alignments(self):
|
|
373
380
|
"""
|
|
374
381
|
Get the resulting local sequence alignments.
|
|
375
|
-
|
|
382
|
+
|
|
376
383
|
Returns
|
|
377
384
|
-------
|
|
378
385
|
alignment : list of BlastAlignment
|
|
379
386
|
The local sequence alignments.
|
|
380
387
|
"""
|
|
381
388
|
return self._alignments
|
|
382
|
-
|
|
389
|
+
|
|
383
390
|
@staticmethod
|
|
384
391
|
def _get_info(text):
|
|
385
392
|
"""
|
|
@@ -399,7 +406,7 @@ class BlastWebApp(WebApp):
|
|
|
399
406
|
pair = line.split("=")
|
|
400
407
|
info_dict[pair[0].strip()] = pair[1].strip()
|
|
401
408
|
return info_dict
|
|
402
|
-
|
|
409
|
+
|
|
403
410
|
def _contact(self):
|
|
404
411
|
"""
|
|
405
412
|
Resets the time since the last server contact. Used for
|
|
@@ -409,7 +416,7 @@ class BlastWebApp(WebApp):
|
|
|
409
416
|
if (contact - BlastWebApp._last_contact) < BlastWebApp._contact_delay:
|
|
410
417
|
self.violate_rule("The server was contacted too often")
|
|
411
418
|
BlastWebApp._last_contact = contact
|
|
412
|
-
|
|
419
|
+
|
|
413
420
|
def _request(self):
|
|
414
421
|
"""
|
|
415
422
|
Resets the time since the last new alignment request. Used for
|
|
@@ -8,20 +8,16 @@ __all__ = ["ClustalOmegaApp"]
|
|
|
8
8
|
|
|
9
9
|
from tempfile import NamedTemporaryFile
|
|
10
10
|
import numpy as np
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from ...sequence.phylo.tree import Tree
|
|
16
|
-
from ..localapp import cleanup_tempfile
|
|
17
|
-
from ..msaapp import MSAApp
|
|
18
|
-
from ..application import AppState, requires_state
|
|
11
|
+
from biotite.application.application import AppState, requires_state
|
|
12
|
+
from biotite.application.localapp import cleanup_tempfile
|
|
13
|
+
from biotite.application.msaapp import MSAApp
|
|
14
|
+
from biotite.sequence.phylo.tree import Tree
|
|
19
15
|
|
|
20
16
|
|
|
21
17
|
class ClustalOmegaApp(MSAApp):
|
|
22
18
|
"""
|
|
23
19
|
Perform a multiple sequence alignment using Clustal-Omega.
|
|
24
|
-
|
|
20
|
+
|
|
25
21
|
Parameters
|
|
26
22
|
----------
|
|
27
23
|
sequences : list of ProteinSequence or NucleotideSequence
|
|
@@ -30,7 +26,7 @@ class ClustalOmegaApp(MSAApp):
|
|
|
30
26
|
Path of the Custal-Omega binary.
|
|
31
27
|
matrix : None
|
|
32
28
|
This parameter is used for compatibility reasons and is ignored.
|
|
33
|
-
|
|
29
|
+
|
|
34
30
|
Examples
|
|
35
31
|
--------
|
|
36
32
|
|
|
@@ -48,34 +44,30 @@ class ClustalOmegaApp(MSAApp):
|
|
|
48
44
|
-BISMITE
|
|
49
45
|
--IQLITE
|
|
50
46
|
"""
|
|
51
|
-
|
|
47
|
+
|
|
52
48
|
def __init__(self, sequences, bin_path="clustalo", matrix=None):
|
|
53
49
|
super().__init__(sequences, bin_path, None)
|
|
54
50
|
self._seq_count = len(sequences)
|
|
55
51
|
self._mbed = True
|
|
56
52
|
self._dist_matrix = None
|
|
57
53
|
self._tree = None
|
|
58
|
-
self._in_dist_matrix_file = NamedTemporaryFile(
|
|
59
|
-
"w", suffix=".mat", delete=False
|
|
60
|
-
)
|
|
54
|
+
self._in_dist_matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
|
|
61
55
|
self._out_dist_matrix_file = NamedTemporaryFile(
|
|
62
56
|
"r", suffix=".mat", delete=False
|
|
63
57
|
)
|
|
64
|
-
self._in_tree_file = NamedTemporaryFile(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
self._out_tree_file = NamedTemporaryFile(
|
|
68
|
-
"r", suffix=".tree", delete=False
|
|
69
|
-
)
|
|
70
|
-
|
|
58
|
+
self._in_tree_file = NamedTemporaryFile("w", suffix=".tree", delete=False)
|
|
59
|
+
self._out_tree_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
|
|
60
|
+
|
|
71
61
|
def run(self):
|
|
72
62
|
args = [
|
|
73
|
-
"--in",
|
|
74
|
-
|
|
63
|
+
"--in",
|
|
64
|
+
self.get_input_file_path(),
|
|
65
|
+
"--out",
|
|
66
|
+
self.get_output_file_path(),
|
|
75
67
|
# The temporary files are already created
|
|
76
68
|
# -> tell Clustal to overwrite these empty files
|
|
77
69
|
"--force",
|
|
78
|
-
# Tree order for get_alignment_order() to work properly
|
|
70
|
+
# Tree order for get_alignment_order() to work properly
|
|
79
71
|
"--output-order=tree-order",
|
|
80
72
|
]
|
|
81
73
|
if self.get_seqtype() == "protein":
|
|
@@ -87,28 +79,24 @@ class ClustalOmegaApp(MSAApp):
|
|
|
87
79
|
# as input and output#
|
|
88
80
|
# -> Only request tree output when not tree is input
|
|
89
81
|
args += [
|
|
90
|
-
"--guidetree-out",
|
|
82
|
+
"--guidetree-out",
|
|
83
|
+
self._out_tree_file.name,
|
|
91
84
|
]
|
|
92
85
|
if not self._mbed:
|
|
93
|
-
args += [
|
|
94
|
-
"--full",
|
|
95
|
-
"--distmat-out", self._out_dist_matrix_file.name
|
|
96
|
-
]
|
|
86
|
+
args += ["--full", "--distmat-out", self._out_dist_matrix_file.name]
|
|
97
87
|
if self._dist_matrix is not None:
|
|
98
88
|
# Add the sequence names (0, 1, 2, 3 ...) as first column
|
|
99
89
|
dist_matrix_with_index = np.concatenate(
|
|
100
|
-
(
|
|
101
|
-
np.arange(self._seq_count)[:, np.newaxis],
|
|
102
|
-
self._dist_matrix
|
|
103
|
-
), axis=1
|
|
90
|
+
(np.arange(self._seq_count)[:, np.newaxis], self._dist_matrix), axis=1
|
|
104
91
|
)
|
|
105
92
|
np.savetxt(
|
|
106
|
-
self._in_dist_matrix_file.name,
|
|
93
|
+
self._in_dist_matrix_file.name,
|
|
94
|
+
dist_matrix_with_index,
|
|
107
95
|
# The first line contains the amount of sequences
|
|
108
|
-
comments
|
|
109
|
-
header
|
|
96
|
+
comments="",
|
|
97
|
+
header=str(self._seq_count),
|
|
110
98
|
# The sequence indices are integers, the rest are floats
|
|
111
|
-
fmt
|
|
99
|
+
fmt=["%d"] + ["%.5f"] * self._seq_count,
|
|
112
100
|
)
|
|
113
101
|
args += ["--distmat-in", self._in_dist_matrix_file.name]
|
|
114
102
|
if self._tree is not None:
|
|
@@ -117,15 +105,15 @@ class ClustalOmegaApp(MSAApp):
|
|
|
117
105
|
args += ["--guidetree-in", self._in_tree_file.name]
|
|
118
106
|
self.set_arguments(args)
|
|
119
107
|
super().run()
|
|
120
|
-
|
|
108
|
+
|
|
121
109
|
def evaluate(self):
|
|
122
110
|
super().evaluate()
|
|
123
111
|
if not self._mbed:
|
|
124
112
|
self._dist_matrix = np.loadtxt(
|
|
125
113
|
self._out_dist_matrix_file.name,
|
|
126
114
|
# The first row only contains the number of sequences
|
|
127
|
-
skiprows
|
|
128
|
-
dtype
|
|
115
|
+
skiprows=1,
|
|
116
|
+
dtype=float,
|
|
129
117
|
)
|
|
130
118
|
# The first column contains only the name of the
|
|
131
119
|
# sequences, in this case 0, 1, 2, 3 ...
|
|
@@ -133,17 +121,15 @@ class ClustalOmegaApp(MSAApp):
|
|
|
133
121
|
self._dist_matrix = self._dist_matrix[:, 1:]
|
|
134
122
|
# Only read output tree if no tree was input
|
|
135
123
|
if self._tree is None:
|
|
136
|
-
self._tree = Tree.from_newick(
|
|
137
|
-
|
|
138
|
-
)
|
|
139
|
-
|
|
124
|
+
self._tree = Tree.from_newick(self._out_tree_file.read().replace("\n", ""))
|
|
125
|
+
|
|
140
126
|
def clean_up(self):
|
|
141
127
|
super().clean_up()
|
|
142
128
|
cleanup_tempfile(self._in_dist_matrix_file)
|
|
143
129
|
cleanup_tempfile(self._out_dist_matrix_file)
|
|
144
130
|
cleanup_tempfile(self._in_tree_file)
|
|
145
131
|
cleanup_tempfile(self._out_tree_file)
|
|
146
|
-
|
|
132
|
+
|
|
147
133
|
@requires_state(AppState.CREATED)
|
|
148
134
|
def full_matrix_calculation(self):
|
|
149
135
|
"""
|
|
@@ -154,13 +140,13 @@ class ClustalOmegaApp(MSAApp):
|
|
|
154
140
|
default *mBed* heuristic.
|
|
155
141
|
"""
|
|
156
142
|
self._mbed = False
|
|
157
|
-
|
|
143
|
+
|
|
158
144
|
@requires_state(AppState.CREATED)
|
|
159
145
|
def set_distance_matrix(self, matrix):
|
|
160
146
|
"""
|
|
161
147
|
Set the pairwise sequence distances, the program should use to
|
|
162
|
-
calculate the guide tree.
|
|
163
|
-
|
|
148
|
+
calculate the guide tree.
|
|
149
|
+
|
|
164
150
|
Parameters
|
|
165
151
|
----------
|
|
166
152
|
matrix : ndarray, shape=(n,n), dtype=float
|
|
@@ -172,13 +158,13 @@ class ClustalOmegaApp(MSAApp):
|
|
|
172
158
|
f"{self._seq_count} sequences"
|
|
173
159
|
)
|
|
174
160
|
self._dist_matrix = matrix.astype(float, copy=False)
|
|
175
|
-
|
|
161
|
+
|
|
176
162
|
@requires_state(AppState.JOINED)
|
|
177
163
|
def get_distance_matrix(self):
|
|
178
164
|
"""
|
|
179
165
|
Get the pairwise sequence distances the program used to
|
|
180
|
-
calculate the guide tree.
|
|
181
|
-
|
|
166
|
+
calculate the guide tree.
|
|
167
|
+
|
|
182
168
|
Returns
|
|
183
169
|
-------
|
|
184
170
|
matrix : ndarray, shape=(n,n), dtype=float
|
|
@@ -186,17 +172,16 @@ class ClustalOmegaApp(MSAApp):
|
|
|
186
172
|
"""
|
|
187
173
|
if self._mbed:
|
|
188
174
|
raise ValueError(
|
|
189
|
-
"Getting the distance matrix requires "
|
|
190
|
-
"'full_matrix_calculation()'"
|
|
175
|
+
"Getting the distance matrix requires " "'full_matrix_calculation()'"
|
|
191
176
|
)
|
|
192
177
|
return self._dist_matrix
|
|
193
|
-
|
|
178
|
+
|
|
194
179
|
@requires_state(AppState.CREATED)
|
|
195
180
|
def set_guide_tree(self, tree):
|
|
196
181
|
"""
|
|
197
182
|
Set the guide tree, the program should use for the
|
|
198
183
|
progressive alignment.
|
|
199
|
-
|
|
184
|
+
|
|
200
185
|
Parameters
|
|
201
186
|
----------
|
|
202
187
|
tree : Tree
|
|
@@ -208,31 +193,31 @@ class ClustalOmegaApp(MSAApp):
|
|
|
208
193
|
"{self._seq_count} sequences, must be equal"
|
|
209
194
|
)
|
|
210
195
|
self._tree = tree
|
|
211
|
-
|
|
196
|
+
|
|
212
197
|
@requires_state(AppState.JOINED)
|
|
213
198
|
def get_guide_tree(self):
|
|
214
199
|
"""
|
|
215
200
|
Get the guide tree created for the progressive alignment.
|
|
216
|
-
|
|
201
|
+
|
|
217
202
|
Returns
|
|
218
203
|
-------
|
|
219
204
|
tree : Tree
|
|
220
205
|
The guide tree.
|
|
221
206
|
"""
|
|
222
207
|
return self._tree
|
|
223
|
-
|
|
208
|
+
|
|
224
209
|
@staticmethod
|
|
225
210
|
def supports_nucleotide():
|
|
226
211
|
return True
|
|
227
|
-
|
|
212
|
+
|
|
228
213
|
@staticmethod
|
|
229
214
|
def supports_protein():
|
|
230
215
|
return True
|
|
231
|
-
|
|
216
|
+
|
|
232
217
|
@staticmethod
|
|
233
218
|
def supports_custom_nucleotide_matrix():
|
|
234
219
|
return False
|
|
235
|
-
|
|
220
|
+
|
|
236
221
|
@staticmethod
|
|
237
222
|
def supports_custom_protein_matrix():
|
|
238
223
|
return False
|