biotite 0.41.2__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +1 -1
  3. biotite/application/application.py +20 -10
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +3 -3
  52. biotite/sequence/align/__init__.py +2 -2
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +1 -1
  67. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +51 -65
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +15 -17
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +221 -235
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +29 -32
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +4 -5
  130. biotite/structure/info/groups.py +1 -3
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -44
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. biotite/structure/io/ctab.py +0 -72
  186. biotite/structure/io/mmtf/__init__.py +0 -21
  187. biotite/structure/io/mmtf/assembly.py +0 -214
  188. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  189. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  190. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  191. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  192. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  193. biotite/structure/io/mmtf/decode.pyx +0 -152
  194. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  195. biotite/structure/io/mmtf/encode.pyx +0 -183
  196. biotite/structure/io/mmtf/file.py +0 -233
  197. biotite/structure/io/npz/__init__.py +0 -20
  198. biotite/structure/io/npz/file.py +0 -152
  199. biotite/structure/io/pdbx/legacy.py +0 -267
  200. biotite/structure/io/tng/__init__.py +0 -13
  201. biotite/structure/io/tng/file.py +0 -46
  202. biotite/temp.py +0 -86
  203. biotite-0.41.2.dist-info/RECORD +0 -340
  204. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
  205. {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -6,26 +6,26 @@ __name__ = "biotite.application.blast"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["BlastWebApp"]
8
8
 
9
- from .alignment import BlastAlignment
10
- from ..application import Application, requires_state, AppState
11
- from ..webapp import WebApp, RuleViolationError
12
- from ...sequence.sequence import Sequence
13
- from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
14
- from ...sequence.io.fasta.file import FastaFile
15
- from ...sequence.io.fasta.convert import get_sequence
16
- from ...sequence.align.alignment import Alignment
17
9
  import time
18
- import requests
19
10
  from xml.etree import ElementTree
20
-
11
+ import requests
12
+ from biotite.application.application import AppState, requires_state
13
+ from biotite.application.blast.alignment import BlastAlignment
14
+ from biotite.application.webapp import WebApp
15
+ from biotite.sequence.align.alignment import Alignment
16
+ from biotite.sequence.io.fasta.convert import get_sequence
17
+ from biotite.sequence.io.fasta.file import FastaFile
18
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
19
+ from biotite.sequence.sequence import Sequence
21
20
 
22
21
  _ncbi_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
23
22
 
23
+
24
24
  class BlastWebApp(WebApp):
25
25
  """
26
26
  Perform a local alignment against a large sequence database using
27
27
  using the web-based BLAST application (by default NCBI BLAST).
28
-
28
+
29
29
  Parameters
30
30
  ----------
31
31
  program : str
@@ -35,7 +35,7 @@ class BlastWebApp(WebApp):
35
35
  The query sequence. If a string is provided, it is interpreted
36
36
  as path to a FASTA file, if the string contains a valid FASTA
37
37
  file extension, otherwise it is interpreted as a single letter
38
- string representation of a sequence.
38
+ string representation of a sequence.
39
39
  database : str, optional
40
40
  The NCBI sequence database to blast against. By default it
41
41
  contains all sequences (`database`='nr'`).
@@ -52,68 +52,71 @@ class BlastWebApp(WebApp):
52
52
  HTTP request. This allows the NCBI to contact you in case
53
53
  your application sends too many requests.
54
54
  """
55
-
55
+
56
56
  _last_contact = 0
57
57
  _last_request = 0
58
58
  _contact_delay = 3
59
59
  _request_delay = 60
60
-
61
- def __init__(self, program, query, database="nr",
62
- app_url=_ncbi_url, obey_rules=True,
63
- mail="padix.key@gmail.com"):
60
+
61
+ def __init__(
62
+ self,
63
+ program,
64
+ query,
65
+ database="nr",
66
+ app_url=_ncbi_url,
67
+ obey_rules=True,
68
+ mail="padix.key@gmail.com",
69
+ ):
64
70
  super().__init__(app_url, obey_rules)
65
-
71
+
66
72
  # 'megablast' is somehow not working
67
73
  # When entering the corresponding HTTPS request into a browser
68
74
  # you are redirected onto the blast mainpage
69
- if program not in ["blastn", "blastp",
70
- "blastx", "tblastn", "tblastx"]:
75
+ if program not in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]:
71
76
  raise ValueError(f"'{program}' is not a valid BLAST program")
72
77
  self._program = program
73
-
74
- requires_protein = (program in ["blastp", "tblastn"])
75
- if isinstance(query, str) and query.endswith((".fa",".fst",".fasta")):
78
+
79
+ requires_protein = program in ["blastp", "tblastn"]
80
+ if isinstance(query, str) and query.endswith((".fa", ".fst", ".fasta")):
76
81
  # If string has a file extension, it is interpreted as
77
82
  # FASTA file from which the sequence is taken
78
83
  file = FastaFile.read(query)
79
84
  # Get first entry in file and take the sequence
80
- # (rather than header)
85
+ # (rather than header)
81
86
  self._query = str(get_sequence(file))
82
87
  elif isinstance(query, Sequence):
83
88
  self._query = str(query)
84
89
  else:
85
90
  self._query = query
86
-
91
+
87
92
  # Check for unsuitable symbols in query string
88
93
  if requires_protein:
89
94
  ref_alphabet = ProteinSequence.alphabet
90
95
  else:
91
96
  ref_alphabet = NucleotideSequence.alphabet_amb
92
97
  for symbol in self._query:
93
- if not symbol.upper() in ref_alphabet:
94
- raise ValueError(
95
- f"Query sequence contains unsuitable symbol {symbol}"
96
- )
97
-
98
+ if symbol.upper() not in ref_alphabet:
99
+ raise ValueError(f"Query sequence contains unsuitable symbol {symbol}")
100
+
98
101
  self._database = database
99
-
102
+
100
103
  self._gap_openining = None
101
104
  self._gap_extension = None
102
105
  self._word_size = None
103
-
106
+
104
107
  self._expect_value = None
105
108
  self._max_results = None
106
109
  self._entrez_query = None
107
-
110
+
108
111
  self._reward = None
109
112
  self._penalty = None
110
-
113
+
111
114
  self._matrix = None
112
115
  self._threshold = None
113
-
114
- self._mail=mail
116
+
117
+ self._mail = mail
115
118
  self._rid = None
116
-
119
+
117
120
  @requires_state(AppState.CREATED)
118
121
  def set_entrez_query(self, query):
119
122
  """
@@ -126,7 +129,7 @@ class BlastWebApp(WebApp):
126
129
  An NCBI Entrez query.
127
130
  """
128
131
  self._entrez_query = str(query)
129
-
132
+
130
133
  @requires_state(AppState.CREATED)
131
134
  def set_max_results(self, number):
132
135
  """
@@ -138,30 +141,30 @@ class BlastWebApp(WebApp):
138
141
  The maximum number of results.
139
142
  """
140
143
  self._max_results = number
141
-
144
+
142
145
  @requires_state(AppState.CREATED)
143
146
  def set_max_expect_value(self, value):
144
147
  """
145
148
  Set the threshold expectation value (E-value).
146
149
  No alignments with an E-value above this threshold will be
147
150
  considered.
148
-
151
+
149
152
  The E-Value is the expectation value for the number of random
150
153
  sequences of a similar sized database getting an equal or higher
151
154
  score by change when aligned with the query sequence.
152
-
155
+
153
156
  Parameters
154
157
  ----------
155
158
  value : float
156
159
  The threshold E-value.
157
160
  """
158
161
  self._expect_value = value
159
-
162
+
160
163
  @requires_state(AppState.CREATED)
161
164
  def set_gap_penalty(self, opening, extension):
162
165
  """
163
166
  Set the affine gap penalty for the alignment.
164
-
167
+
165
168
  Parameters
166
169
  ----------
167
170
  opening : float
@@ -171,75 +174,75 @@ class BlastWebApp(WebApp):
171
174
  """
172
175
  self._gap_openining = opening
173
176
  self._gap_extension = extension
174
-
177
+
175
178
  @requires_state(AppState.CREATED)
176
179
  def set_word_size(self, size):
177
180
  """
178
181
  Set the word size for alignment seeds.
179
-
182
+
180
183
  Parameters
181
184
  ----------
182
185
  size : int
183
186
  Word size.
184
187
  """
185
188
  self._word_size = size
186
-
189
+
187
190
  @requires_state(AppState.CREATED)
188
191
  def set_match_reward(self, reward):
189
192
  """
190
193
  Set the score of a symbol match in the alignment.
191
-
194
+
192
195
  Used only in 'blastn' and 'megablast'.
193
-
196
+
194
197
  Parameters
195
198
  ----------
196
199
  reward : int
197
200
  Match reward. Must be positive.
198
201
  """
199
202
  self._reward = reward
200
-
203
+
201
204
  @requires_state(AppState.CREATED)
202
205
  def set_mismatch_penalty(self, penalty):
203
206
  """
204
207
  Set the penalty of a symbol mismatch in the alignment.
205
-
208
+
206
209
  Used only in 'blastn' and 'megablast'.
207
-
210
+
208
211
  Parameters
209
212
  ----------
210
213
  penalty : int
211
214
  Mismatch penalty. Must be negative.
212
215
  """
213
216
  self._penalty = penalty
214
-
217
+
215
218
  @requires_state(AppState.CREATED)
216
219
  def set_substitution_matrix(self, matrix_name):
217
220
  """
218
221
  Set the penalty of a symbol mismatch in the alignment.
219
-
222
+
220
223
  Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
221
-
224
+
222
225
  Parameters
223
226
  ----------
224
227
  matrix_name : str
225
228
  Name of the substitution matrix. Default is 'BLOSUM62'.
226
229
  """
227
230
  self._matrix = matrix_name.upper()
228
-
231
+
229
232
  @requires_state(AppState.CREATED)
230
233
  def set_threshold(self, threshold):
231
234
  """
232
235
  Set the threshold neighboring score for initial words.
233
-
236
+
234
237
  Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
235
-
238
+
236
239
  Parameters
237
240
  ----------
238
241
  threshold : int
239
242
  Threshold value. Must be positve.
240
243
  """
241
244
  self._threshold = threshold
242
-
245
+
243
246
  def run(self):
244
247
  param_dict = {}
245
248
  param_dict["tool"] = "Biotite"
@@ -255,23 +258,24 @@ class BlastWebApp(WebApp):
255
258
  if self._expect_value is not None:
256
259
  param_dict["EXPECT"] = self._expect_value
257
260
  if self._gap_openining is not None and self._gap_extension is not None:
258
- param_dict["GAPCOSTS"] = "{:d} {:d}".format(self._gap_openining,
259
- self._gap_extension)
261
+ param_dict["GAPCOSTS"] = "{:d} {:d}".format(
262
+ self._gap_openining, self._gap_extension
263
+ )
260
264
  if self._word_size is not None:
261
265
  param_dict["WORD_SIZE"] = self._word_size
262
-
266
+
263
267
  if self._program in ["blastn", "megablast"]:
264
268
  if self._reward is not None:
265
269
  param_dict["NUCL_REWARD"] = self._reward
266
270
  if self._penalty is not None:
267
271
  param_dict["NUCL_PENALTY"] = self._penalty
268
-
272
+
269
273
  if self._program in ["blastp", "blastx", "tblastn", "tblastx"]:
270
274
  if self._matrix is not None:
271
275
  param_dict["MATRIX"] = self._matrix
272
276
  if self._threshold is not None:
273
277
  param_dict["THRESHOLD"] = self._threshold
274
-
278
+
275
279
  request = requests.get(self.app_url(), params=param_dict)
276
280
  if "Submitted URI too large" in request.text:
277
281
  raise ValueError("The URI is too large, try a shorter sequence")
@@ -279,11 +283,9 @@ class BlastWebApp(WebApp):
279
283
  self._request()
280
284
  info_dict = BlastWebApp._get_info(request.text)
281
285
  self._rid = info_dict["RID"]
282
-
286
+
283
287
  def is_finished(self):
284
- data_dict = {"FORMAT_OBJECT" : "SearchInfo",
285
- "RID" : self._rid,
286
- "CMD" : "Get"}
288
+ data_dict = {"FORMAT_OBJECT": "SearchInfo", "RID": self._rid, "CMD": "Get"}
287
289
  request = requests.get(self.app_url(), params=data_dict)
288
290
  self._contact()
289
291
  info_dict = BlastWebApp._get_info(request.text)
@@ -294,17 +296,17 @@ class BlastWebApp(WebApp):
294
296
  "(Server responsed status 'UNKNOWN')"
295
297
  )
296
298
  return info_dict["Status"] == "READY"
297
-
299
+
298
300
  def wait_interval(self):
299
301
  # NCBI requires a 3 second delay between server contacts
300
302
  return BlastWebApp._contact_delay
301
-
303
+
302
304
  def clean_up(self):
303
305
  param_dict = {}
304
306
  param_dict["CMD"] = "Delete"
305
307
  param_dict["RID"] = self._rid
306
- request = requests.get(self.app_url(), params=param_dict)
307
-
308
+ requests.get(self.app_url(), params=param_dict)
309
+
308
310
  def evaluate(self):
309
311
  param_dict = {}
310
312
  param_dict["tool"] = "BiotiteClient"
@@ -316,7 +318,7 @@ class BlastWebApp(WebApp):
316
318
  param_dict["NCBI_GI"] = "T"
317
319
  request = requests.get(self.app_url(), params=param_dict)
318
320
  self._contact()
319
-
321
+
320
322
  self._alignments = []
321
323
  self._xml_response = request.text
322
324
  root = ElementTree.fromstring(self._xml_response)
@@ -333,15 +335,14 @@ class BlastWebApp(WebApp):
333
335
  query_end = int(hsp.find("Hsp_query-to").text)
334
336
  hit_begin = int(hsp.find("Hsp_hit-from").text)
335
337
  hit_end = int(hsp.find("Hsp_hit-to").text)
336
-
338
+
337
339
  seq1_str = hsp.find("Hsp_qseq").text
338
340
  seq2_str = hsp.find("Hsp_hseq").text
339
341
  if self._program in ["blastn", "megablast"]:
340
342
  # NucleotideSequence/ProteinSequence do ignore gaps
341
343
  # Gaps are represented by the trace
342
344
  seq1, seq2 = [
343
- NucleotideSequence(s.replace("-", ""))
344
- for s in (seq1_str, seq2_str)
345
+ NucleotideSequence(s.replace("-", "")) for s in (seq1_str, seq2_str)
345
346
  ]
346
347
  else:
347
348
  seq1, seq2 = [
@@ -349,18 +350,24 @@ class BlastWebApp(WebApp):
349
350
  for s in (seq1_str, seq2_str)
350
351
  ]
351
352
  trace = Alignment.trace_from_strings([seq1_str, seq2_str])
352
-
353
- alignment = BlastAlignment( [seq1 ,seq2], trace, score, e_value,
354
- (query_begin, query_end),
355
- (hit_begin, hit_end),
356
- hit_id, hit_definition )
353
+
354
+ alignment = BlastAlignment(
355
+ [seq1, seq2],
356
+ trace,
357
+ score,
358
+ e_value,
359
+ (query_begin, query_end),
360
+ (hit_begin, hit_end),
361
+ hit_id,
362
+ hit_definition,
363
+ )
357
364
  self._alignments.append(alignment)
358
365
 
359
366
  @requires_state(AppState.JOINED)
360
367
  def get_xml_response(self):
361
368
  """
362
369
  Get the raw XML response.
363
-
370
+
364
371
  Returns
365
372
  -------
366
373
  response : str
@@ -372,14 +379,14 @@ class BlastWebApp(WebApp):
372
379
  def get_alignments(self):
373
380
  """
374
381
  Get the resulting local sequence alignments.
375
-
382
+
376
383
  Returns
377
384
  -------
378
385
  alignment : list of BlastAlignment
379
386
  The local sequence alignments.
380
387
  """
381
388
  return self._alignments
382
-
389
+
383
390
  @staticmethod
384
391
  def _get_info(text):
385
392
  """
@@ -399,7 +406,7 @@ class BlastWebApp(WebApp):
399
406
  pair = line.split("=")
400
407
  info_dict[pair[0].strip()] = pair[1].strip()
401
408
  return info_dict
402
-
409
+
403
410
  def _contact(self):
404
411
  """
405
412
  Resets the time since the last server contact. Used for
@@ -409,7 +416,7 @@ class BlastWebApp(WebApp):
409
416
  if (contact - BlastWebApp._last_contact) < BlastWebApp._contact_delay:
410
417
  self.violate_rule("The server was contacted too often")
411
418
  BlastWebApp._last_contact = contact
412
-
419
+
413
420
  def _request(self):
414
421
  """
415
422
  Resets the time since the last new alignment request. Used for
@@ -9,4 +9,4 @@ A subpackage for multiple sequence alignments using Clustal-Omega.
9
9
  __name__ = "biotite.application.clustalo"
10
10
  __author__ = "Patrick Kunzmann"
11
11
 
12
- from .app import *
12
+ from .app import *
@@ -8,20 +8,16 @@ __all__ = ["ClustalOmegaApp"]
8
8
 
9
9
  from tempfile import NamedTemporaryFile
10
10
  import numpy as np
11
- from ...sequence.sequence import Sequence
12
- from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
13
- from ...sequence.io.fasta.file import FastaFile
14
- from ...sequence.align.alignment import Alignment
15
- from ...sequence.phylo.tree import Tree
16
- from ..localapp import cleanup_tempfile
17
- from ..msaapp import MSAApp
18
- from ..application import AppState, requires_state
11
+ from biotite.application.application import AppState, requires_state
12
+ from biotite.application.localapp import cleanup_tempfile
13
+ from biotite.application.msaapp import MSAApp
14
+ from biotite.sequence.phylo.tree import Tree
19
15
 
20
16
 
21
17
  class ClustalOmegaApp(MSAApp):
22
18
  """
23
19
  Perform a multiple sequence alignment using Clustal-Omega.
24
-
20
+
25
21
  Parameters
26
22
  ----------
27
23
  sequences : list of ProteinSequence or NucleotideSequence
@@ -30,7 +26,7 @@ class ClustalOmegaApp(MSAApp):
30
26
  Path of the Custal-Omega binary.
31
27
  matrix : None
32
28
  This parameter is used for compatibility reasons and is ignored.
33
-
29
+
34
30
  Examples
35
31
  --------
36
32
 
@@ -48,34 +44,30 @@ class ClustalOmegaApp(MSAApp):
48
44
  -BISMITE
49
45
  --IQLITE
50
46
  """
51
-
47
+
52
48
  def __init__(self, sequences, bin_path="clustalo", matrix=None):
53
49
  super().__init__(sequences, bin_path, None)
54
50
  self._seq_count = len(sequences)
55
51
  self._mbed = True
56
52
  self._dist_matrix = None
57
53
  self._tree = None
58
- self._in_dist_matrix_file = NamedTemporaryFile(
59
- "w", suffix=".mat", delete=False
60
- )
54
+ self._in_dist_matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
61
55
  self._out_dist_matrix_file = NamedTemporaryFile(
62
56
  "r", suffix=".mat", delete=False
63
57
  )
64
- self._in_tree_file = NamedTemporaryFile(
65
- "w", suffix=".tree", delete=False
66
- )
67
- self._out_tree_file = NamedTemporaryFile(
68
- "r", suffix=".tree", delete=False
69
- )
70
-
58
+ self._in_tree_file = NamedTemporaryFile("w", suffix=".tree", delete=False)
59
+ self._out_tree_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
60
+
71
61
  def run(self):
72
62
  args = [
73
- "--in", self.get_input_file_path(),
74
- "--out", self.get_output_file_path(),
63
+ "--in",
64
+ self.get_input_file_path(),
65
+ "--out",
66
+ self.get_output_file_path(),
75
67
  # The temporary files are already created
76
68
  # -> tell Clustal to overwrite these empty files
77
69
  "--force",
78
- # Tree order for get_alignment_order() to work properly
70
+ # Tree order for get_alignment_order() to work properly
79
71
  "--output-order=tree-order",
80
72
  ]
81
73
  if self.get_seqtype() == "protein":
@@ -87,28 +79,24 @@ class ClustalOmegaApp(MSAApp):
87
79
  # as input and output#
88
80
  # -> Only request tree output when not tree is input
89
81
  args += [
90
- "--guidetree-out", self._out_tree_file.name,
82
+ "--guidetree-out",
83
+ self._out_tree_file.name,
91
84
  ]
92
85
  if not self._mbed:
93
- args += [
94
- "--full",
95
- "--distmat-out", self._out_dist_matrix_file.name
96
- ]
86
+ args += ["--full", "--distmat-out", self._out_dist_matrix_file.name]
97
87
  if self._dist_matrix is not None:
98
88
  # Add the sequence names (0, 1, 2, 3 ...) as first column
99
89
  dist_matrix_with_index = np.concatenate(
100
- (
101
- np.arange(self._seq_count)[:, np.newaxis],
102
- self._dist_matrix
103
- ), axis=1
90
+ (np.arange(self._seq_count)[:, np.newaxis], self._dist_matrix), axis=1
104
91
  )
105
92
  np.savetxt(
106
- self._in_dist_matrix_file.name, dist_matrix_with_index,
93
+ self._in_dist_matrix_file.name,
94
+ dist_matrix_with_index,
107
95
  # The first line contains the amount of sequences
108
- comments = "",
109
- header = str(self._seq_count),
96
+ comments="",
97
+ header=str(self._seq_count),
110
98
  # The sequence indices are integers, the rest are floats
111
- fmt = ["%d"] + ["%.5f"] * self._seq_count
99
+ fmt=["%d"] + ["%.5f"] * self._seq_count,
112
100
  )
113
101
  args += ["--distmat-in", self._in_dist_matrix_file.name]
114
102
  if self._tree is not None:
@@ -117,15 +105,15 @@ class ClustalOmegaApp(MSAApp):
117
105
  args += ["--guidetree-in", self._in_tree_file.name]
118
106
  self.set_arguments(args)
119
107
  super().run()
120
-
108
+
121
109
  def evaluate(self):
122
110
  super().evaluate()
123
111
  if not self._mbed:
124
112
  self._dist_matrix = np.loadtxt(
125
113
  self._out_dist_matrix_file.name,
126
114
  # The first row only contains the number of sequences
127
- skiprows = 1,
128
- dtype = float
115
+ skiprows=1,
116
+ dtype=float,
129
117
  )
130
118
  # The first column contains only the name of the
131
119
  # sequences, in this case 0, 1, 2, 3 ...
@@ -133,17 +121,15 @@ class ClustalOmegaApp(MSAApp):
133
121
  self._dist_matrix = self._dist_matrix[:, 1:]
134
122
  # Only read output tree if no tree was input
135
123
  if self._tree is None:
136
- self._tree = Tree.from_newick(
137
- self._out_tree_file.read().replace("\n", "")
138
- )
139
-
124
+ self._tree = Tree.from_newick(self._out_tree_file.read().replace("\n", ""))
125
+
140
126
  def clean_up(self):
141
127
  super().clean_up()
142
128
  cleanup_tempfile(self._in_dist_matrix_file)
143
129
  cleanup_tempfile(self._out_dist_matrix_file)
144
130
  cleanup_tempfile(self._in_tree_file)
145
131
  cleanup_tempfile(self._out_tree_file)
146
-
132
+
147
133
  @requires_state(AppState.CREATED)
148
134
  def full_matrix_calculation(self):
149
135
  """
@@ -154,13 +140,13 @@ class ClustalOmegaApp(MSAApp):
154
140
  default *mBed* heuristic.
155
141
  """
156
142
  self._mbed = False
157
-
143
+
158
144
  @requires_state(AppState.CREATED)
159
145
  def set_distance_matrix(self, matrix):
160
146
  """
161
147
  Set the pairwise sequence distances, the program should use to
162
- calculate the guide tree.
163
-
148
+ calculate the guide tree.
149
+
164
150
  Parameters
165
151
  ----------
166
152
  matrix : ndarray, shape=(n,n), dtype=float
@@ -172,13 +158,13 @@ class ClustalOmegaApp(MSAApp):
172
158
  f"{self._seq_count} sequences"
173
159
  )
174
160
  self._dist_matrix = matrix.astype(float, copy=False)
175
-
161
+
176
162
  @requires_state(AppState.JOINED)
177
163
  def get_distance_matrix(self):
178
164
  """
179
165
  Get the pairwise sequence distances the program used to
180
- calculate the guide tree.
181
-
166
+ calculate the guide tree.
167
+
182
168
  Returns
183
169
  -------
184
170
  matrix : ndarray, shape=(n,n), dtype=float
@@ -186,17 +172,16 @@ class ClustalOmegaApp(MSAApp):
186
172
  """
187
173
  if self._mbed:
188
174
  raise ValueError(
189
- "Getting the distance matrix requires "
190
- "'full_matrix_calculation()'"
175
+ "Getting the distance matrix requires " "'full_matrix_calculation()'"
191
176
  )
192
177
  return self._dist_matrix
193
-
178
+
194
179
  @requires_state(AppState.CREATED)
195
180
  def set_guide_tree(self, tree):
196
181
  """
197
182
  Set the guide tree, the program should use for the
198
183
  progressive alignment.
199
-
184
+
200
185
  Parameters
201
186
  ----------
202
187
  tree : Tree
@@ -208,31 +193,31 @@ class ClustalOmegaApp(MSAApp):
208
193
  "{self._seq_count} sequences, must be equal"
209
194
  )
210
195
  self._tree = tree
211
-
196
+
212
197
  @requires_state(AppState.JOINED)
213
198
  def get_guide_tree(self):
214
199
  """
215
200
  Get the guide tree created for the progressive alignment.
216
-
201
+
217
202
  Returns
218
203
  -------
219
204
  tree : Tree
220
205
  The guide tree.
221
206
  """
222
207
  return self._tree
223
-
208
+
224
209
  @staticmethod
225
210
  def supports_nucleotide():
226
211
  return True
227
-
212
+
228
213
  @staticmethod
229
214
  def supports_protein():
230
215
  return True
231
-
216
+
232
217
  @staticmethod
233
218
  def supports_custom_nucleotide_matrix():
234
219
  return False
235
-
220
+
236
221
  @staticmethod
237
222
  def supports_custom_protein_matrix():
238
223
  return False