biotite 0.41.1__cp310-cp310-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,421 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.blast"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["BlastWebApp"]
8
+
9
+ from .alignment import BlastAlignment
10
+ from ..application import Application, requires_state, AppState
11
+ from ..webapp import WebApp, RuleViolationError
12
+ from ...sequence.sequence import Sequence
13
+ from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
14
+ from ...sequence.io.fasta.file import FastaFile
15
+ from ...sequence.io.fasta.convert import get_sequence
16
+ from ...sequence.align.alignment import Alignment
17
+ import time
18
+ import requests
19
+ from xml.etree import ElementTree
20
+
21
+
22
+ _ncbi_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
23
+
24
+ class BlastWebApp(WebApp):
25
+ """
26
+ Perform a local alignment against a large sequence database using
27
+ using the web-based BLAST application (by default NCBI BLAST).
28
+
29
+ Parameters
30
+ ----------
31
+ program : str
32
+ The specific BLAST program. One of 'blastn', 'megablast',
33
+ 'blastp', 'blastx', 'tblastn' and 'tblastx'.
34
+ query : Sequence or str
35
+ The query sequence. If a string is provided, it is interpreted
36
+ as path to a FASTA file, if the string contains a valid FASTA
37
+ file extension, otherwise it is interpreted as a single letter
38
+ string representation of a sequence.
39
+ database : str, optional
40
+ The NCBI sequence database to blast against. By default it
41
+ contains all sequences (`database`='nr'`).
42
+ app_url : str, optional
43
+ URL of the BLAST web app. By default NCBI BLAST is used.
44
+ This can be changed to a private server or another cloud
45
+ provider.
46
+ obey_rules : bool, optional
47
+ If true, the application raises an :class:`RuleViolationError`,
48
+ if the server is contacted too often, based on the NCBI BLAST
49
+ usage rules. (Default: True)
50
+ mail : str, optional
51
+ If a mail address is provided, it will be appended in the
52
+ HTTP request. This allows the NCBI to contact you in case
53
+ your application sends too many requests.
54
+ """
55
+
56
+ _last_contact = 0
57
+ _last_request = 0
58
+ _contact_delay = 3
59
+ _request_delay = 60
60
+
61
+ def __init__(self, program, query, database="nr",
62
+ app_url=_ncbi_url, obey_rules=True,
63
+ mail="padix.key@gmail.com"):
64
+ super().__init__(app_url, obey_rules)
65
+
66
+ # 'megablast' is somehow not working
67
+ # When entering the corresponding HTTPS request into a browser
68
+ # you are redirected onto the blast mainpage
69
+ if program not in ["blastn", "blastp",
70
+ "blastx", "tblastn", "tblastx"]:
71
+ raise ValueError(f"'{program}' is not a valid BLAST program")
72
+ self._program = program
73
+
74
+ requires_protein = (program in ["blastp", "tblastn"])
75
+ if isinstance(query, str) and query.endswith((".fa",".fst",".fasta")):
76
+ # If string has a file extension, it is interpreted as
77
+ # FASTA file from which the sequence is taken
78
+ file = FastaFile.read(query)
79
+ # Get first entry in file and take the sequence
80
+ # (rather than header)
81
+ self._query = str(get_sequence(file))
82
+ elif isinstance(query, Sequence):
83
+ self._query = str(query)
84
+ else:
85
+ self._query = query
86
+
87
+ # Check for unsuitable symbols in query string
88
+ if requires_protein:
89
+ ref_alphabet = ProteinSequence.alphabet
90
+ else:
91
+ ref_alphabet = NucleotideSequence.alphabet_amb
92
+ for symbol in self._query:
93
+ if not symbol.upper() in ref_alphabet:
94
+ raise ValueError(
95
+ f"Query sequence contains unsuitable symbol {symbol}"
96
+ )
97
+
98
+ self._database = database
99
+
100
+ self._gap_openining = None
101
+ self._gap_extension = None
102
+ self._word_size = None
103
+
104
+ self._expect_value = None
105
+ self._max_results = None
106
+ self._entrez_query = None
107
+
108
+ self._reward = None
109
+ self._penalty = None
110
+
111
+ self._matrix = None
112
+ self._threshold = None
113
+
114
+ self._mail=mail
115
+ self._rid = None
116
+
117
+ @requires_state(AppState.CREATED)
118
+ def set_entrez_query(self, query):
119
+ """
120
+ Limit the size of the database.
121
+ Only sequences that match the query are searched.
122
+
123
+ Parameters
124
+ ----------
125
+ query : Query
126
+ An NCBI Entrez query.
127
+ """
128
+ self._entrez_query = str(query)
129
+
130
+ @requires_state(AppState.CREATED)
131
+ def set_max_results(self, number):
132
+ """
133
+ Limit the maximum number of results.
134
+
135
+ Parameters
136
+ ----------
137
+ number : int
138
+ The maximum number of results.
139
+ """
140
+ self._max_results = number
141
+
142
+ @requires_state(AppState.CREATED)
143
+ def set_max_expect_value(self, value):
144
+ """
145
+ Set the threshold expectation value (E-value).
146
+ No alignments with an E-value above this threshold will be
147
+ considered.
148
+
149
+ The E-Value is the expectation value for the number of random
150
+ sequences of a similar sized database getting an equal or higher
151
+ score by change when aligned with the query sequence.
152
+
153
+ Parameters
154
+ ----------
155
+ value : float
156
+ The threshold E-value.
157
+ """
158
+ self._expect_value = value
159
+
160
+ @requires_state(AppState.CREATED)
161
+ def set_gap_penalty(self, opening, extension):
162
+ """
163
+ Set the affine gap penalty for the alignment.
164
+
165
+ Parameters
166
+ ----------
167
+ opening : float
168
+ The penalty for gap opening.
169
+ extension : float
170
+ The penalty for gap extension.
171
+ """
172
+ self._gap_openining = opening
173
+ self._gap_extension = extension
174
+
175
+ @requires_state(AppState.CREATED)
176
+ def set_word_size(self, size):
177
+ """
178
+ Set the word size for alignment seeds.
179
+
180
+ Parameters
181
+ ----------
182
+ size : int
183
+ Word size.
184
+ """
185
+ self._word_size = size
186
+
187
+ @requires_state(AppState.CREATED)
188
+ def set_match_reward(self, reward):
189
+ """
190
+ Set the score of a symbol match in the alignment.
191
+
192
+ Used only in 'blastn' and 'megablast'.
193
+
194
+ Parameters
195
+ ----------
196
+ reward : int
197
+ Match reward. Must be positive.
198
+ """
199
+ self._reward = reward
200
+
201
+ @requires_state(AppState.CREATED)
202
+ def set_mismatch_penalty(self, penalty):
203
+ """
204
+ Set the penalty of a symbol mismatch in the alignment.
205
+
206
+ Used only in 'blastn' and 'megablast'.
207
+
208
+ Parameters
209
+ ----------
210
+ penalty : int
211
+ Mismatch penalty. Must be negative.
212
+ """
213
+ self._penalty = penalty
214
+
215
+ @requires_state(AppState.CREATED)
216
+ def set_substitution_matrix(self, matrix_name):
217
+ """
218
+ Set the penalty of a symbol mismatch in the alignment.
219
+
220
+ Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
221
+
222
+ Parameters
223
+ ----------
224
+ matrix_name : str
225
+ Name of the substitution matrix. Default is 'BLOSUM62'.
226
+ """
227
+ self._matrix = matrix_name.upper()
228
+
229
+ @requires_state(AppState.CREATED)
230
+ def set_threshold(self, threshold):
231
+ """
232
+ Set the threshold neighboring score for initial words.
233
+
234
+ Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
235
+
236
+ Parameters
237
+ ----------
238
+ threshold : int
239
+ Threshold value. Must be positve.
240
+ """
241
+ self._threshold = threshold
242
+
243
+ def run(self):
244
+ param_dict = {}
245
+ param_dict["tool"] = "Biotite"
246
+ param_dict["email"] = self._mail
247
+ param_dict["CMD"] = "Put"
248
+ param_dict["PROGRAM"] = self._program
249
+ param_dict["QUERY"] = str(self._query)
250
+ param_dict["DATABASE"] = self._database
251
+ if self._entrez_query is not None:
252
+ param_dict["ENTREZ_QUERY"] = self._entrez_query
253
+ if self._max_results is not None:
254
+ param_dict["HITLIST_SIZE"] = str(self._max_results)
255
+ if self._expect_value is not None:
256
+ param_dict["EXPECT"] = self._expect_value
257
+ if self._gap_openining is not None and self._gap_extension is not None:
258
+ param_dict["GAPCOSTS"] = "{:d} {:d}".format(self._gap_openining,
259
+ self._gap_extension)
260
+ if self._word_size is not None:
261
+ param_dict["WORD_SIZE"] = self._word_size
262
+
263
+ if self._program in ["blastn", "megablast"]:
264
+ if self._reward is not None:
265
+ param_dict["NUCL_REWARD"] = self._reward
266
+ if self._penalty is not None:
267
+ param_dict["NUCL_PENALTY"] = self._penalty
268
+
269
+ if self._program in ["blastp", "blastx", "tblastn", "tblastx"]:
270
+ if self._matrix is not None:
271
+ param_dict["MATRIX"] = self._matrix
272
+ if self._threshold is not None:
273
+ param_dict["THRESHOLD"] = self._threshold
274
+
275
+ request = requests.get(self.app_url(), params=param_dict)
276
+ if "Submitted URI too large" in request.text:
277
+ raise ValueError("The URI is too large, try a shorter sequence")
278
+ self._contact()
279
+ self._request()
280
+ info_dict = BlastWebApp._get_info(request.text)
281
+ self._rid = info_dict["RID"]
282
+
283
+ def is_finished(self):
284
+ data_dict = {"FORMAT_OBJECT" : "SearchInfo",
285
+ "RID" : self._rid,
286
+ "CMD" : "Get"}
287
+ request = requests.get(self.app_url(), params=data_dict)
288
+ self._contact()
289
+ info_dict = BlastWebApp._get_info(request.text)
290
+ if info_dict["Status"] == "UNKNOWN":
291
+ # Indicates invalid query input values
292
+ raise ValueError(
293
+ "The input values seem to be invalid "
294
+ "(Server responsed status 'UNKNOWN')"
295
+ )
296
+ return info_dict["Status"] == "READY"
297
+
298
+ def wait_interval(self):
299
+ # NCBI requires a 3 second delay between server contacts
300
+ return BlastWebApp._contact_delay
301
+
302
+ def clean_up(self):
303
+ param_dict = {}
304
+ param_dict["CMD"] = "Delete"
305
+ param_dict["RID"] = self._rid
306
+ request = requests.get(self.app_url(), params=param_dict)
307
+
308
+ def evaluate(self):
309
+ param_dict = {}
310
+ param_dict["tool"] = "BiotiteClient"
311
+ if self._mail is not None:
312
+ param_dict["email"] = self._mail
313
+ param_dict["CMD"] = "Get"
314
+ param_dict["RID"] = self._rid
315
+ param_dict["FORMAT_TYPE"] = "XML"
316
+ param_dict["NCBI_GI"] = "T"
317
+ request = requests.get(self.app_url(), params=param_dict)
318
+ self._contact()
319
+
320
+ self._alignments = []
321
+ self._xml_response = request.text
322
+ root = ElementTree.fromstring(self._xml_response)
323
+ # Extract BlastAlignment objects from <Hit> tags
324
+ hit_xpath = "./BlastOutput_iterations/Iteration/Iteration_hits/Hit"
325
+ hits = root.findall(hit_xpath)
326
+ for hit in hits:
327
+ hit_definition = hit.find("Hit_def").text
328
+ hit_id = hit.find("Hit_accession").text
329
+ hsp = hit.find(".Hit_hsps/Hsp")
330
+ score = int(hsp.find("Hsp_score").text)
331
+ e_value = float(hsp.find("Hsp_evalue").text)
332
+ query_begin = int(hsp.find("Hsp_query-from").text)
333
+ query_end = int(hsp.find("Hsp_query-to").text)
334
+ hit_begin = int(hsp.find("Hsp_hit-from").text)
335
+ hit_end = int(hsp.find("Hsp_hit-to").text)
336
+
337
+ seq1_str = hsp.find("Hsp_qseq").text
338
+ seq2_str = hsp.find("Hsp_hseq").text
339
+ if self._program in ["blastn", "megablast"]:
340
+ # NucleotideSequence/ProteinSequence do ignore gaps
341
+ # Gaps are represented by the trace
342
+ seq1, seq2 = [
343
+ NucleotideSequence(s.replace("-", ""))
344
+ for s in (seq1_str, seq2_str)
345
+ ]
346
+ else:
347
+ seq1, seq2 = [
348
+ ProteinSequence(s.replace("-", "").replace("U", "C"))
349
+ for s in (seq1_str, seq2_str)
350
+ ]
351
+ trace = Alignment.trace_from_strings([seq1_str, seq2_str])
352
+
353
+ alignment = BlastAlignment( [seq1 ,seq2], trace, score, e_value,
354
+ (query_begin, query_end),
355
+ (hit_begin, hit_end),
356
+ hit_id, hit_definition )
357
+ self._alignments.append(alignment)
358
+
359
+ @requires_state(AppState.JOINED)
360
+ def get_xml_response(self):
361
+ """
362
+ Get the raw XML response.
363
+
364
+ Returns
365
+ -------
366
+ response : str
367
+ The raw XML response.
368
+ """
369
+ return self._xml_response
370
+
371
+ @requires_state(AppState.JOINED)
372
+ def get_alignments(self):
373
+ """
374
+ Get the resulting local sequence alignments.
375
+
376
+ Returns
377
+ -------
378
+ alignment : list of BlastAlignment
379
+ The local sequence alignments.
380
+ """
381
+ return self._alignments
382
+
383
+ @staticmethod
384
+ def _get_info(text):
385
+ """
386
+ Get the *QBlastInfo* block of the response HTML as dictionary
387
+ """
388
+ lines = [line for line in text.split("\n")]
389
+ info_dict = {}
390
+ in_info_block = False
391
+ for line in lines:
392
+ if "QBlastInfoBegin" in line:
393
+ in_info_block = True
394
+ continue
395
+ if "QBlastInfoEnd" in line:
396
+ in_info_block = False
397
+ continue
398
+ if in_info_block:
399
+ pair = line.split("=")
400
+ info_dict[pair[0].strip()] = pair[1].strip()
401
+ return info_dict
402
+
403
+ def _contact(self):
404
+ """
405
+ Resets the time since the last server contact. Used for
406
+ detecting server rule violation.
407
+ """
408
+ contact = time.time()
409
+ if (contact - BlastWebApp._last_contact) < BlastWebApp._contact_delay:
410
+ self.violate_rule("The server was contacted too often")
411
+ BlastWebApp._last_contact = contact
412
+
413
+ def _request(self):
414
+ """
415
+ Resets the time since the last new alignment request. Used for
416
+ detecting server rule violation.
417
+ """
418
+ request = time.time()
419
+ if (request - BlastWebApp._last_request) < BlastWebApp._request_delay:
420
+ self.violate_rule("Too frequent BLAST requests")
421
+ BlastWebApp._last_request = request
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for multiple sequence alignments using Clustal-Omega.
7
+ """
8
+
9
+ __name__ = "biotite.application.clustalo"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *
@@ -0,0 +1,238 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.clustalo"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["ClustalOmegaApp"]
8
+
9
+ from tempfile import NamedTemporaryFile
10
+ import numpy as np
11
+ from ...sequence.sequence import Sequence
12
+ from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
13
+ from ...sequence.io.fasta.file import FastaFile
14
+ from ...sequence.align.alignment import Alignment
15
+ from ...sequence.phylo.tree import Tree
16
+ from ..localapp import cleanup_tempfile
17
+ from ..msaapp import MSAApp
18
+ from ..application import AppState, requires_state
19
+
20
+
21
+ class ClustalOmegaApp(MSAApp):
22
+ """
23
+ Perform a multiple sequence alignment using Clustal-Omega.
24
+
25
+ Parameters
26
+ ----------
27
+ sequences : list of ProteinSequence or NucleotideSequence
28
+ The sequences to be aligned.
29
+ bin_path : str, optional
30
+ Path of the Custal-Omega binary.
31
+ matrix : None
32
+ This parameter is used for compatibility reasons and is ignored.
33
+
34
+ Examples
35
+ --------
36
+
37
+ >>> seq1 = ProteinSequence("BIQTITE")
38
+ >>> seq2 = ProteinSequence("TITANITE")
39
+ >>> seq3 = ProteinSequence("BISMITE")
40
+ >>> seq4 = ProteinSequence("IQLITE")
41
+ >>> app = ClustalOmegaApp([seq1, seq2, seq3, seq4])
42
+ >>> app.start()
43
+ >>> app.join()
44
+ >>> alignment = app.get_alignment()
45
+ >>> print(alignment)
46
+ -BIQTITE
47
+ TITANITE
48
+ -BISMITE
49
+ --IQLITE
50
+ """
51
+
52
+ def __init__(self, sequences, bin_path="clustalo", matrix=None):
53
+ super().__init__(sequences, bin_path, None)
54
+ self._seq_count = len(sequences)
55
+ self._mbed = True
56
+ self._dist_matrix = None
57
+ self._tree = None
58
+ self._in_dist_matrix_file = NamedTemporaryFile(
59
+ "w", suffix=".mat", delete=False
60
+ )
61
+ self._out_dist_matrix_file = NamedTemporaryFile(
62
+ "r", suffix=".mat", delete=False
63
+ )
64
+ self._in_tree_file = NamedTemporaryFile(
65
+ "w", suffix=".tree", delete=False
66
+ )
67
+ self._out_tree_file = NamedTemporaryFile(
68
+ "r", suffix=".tree", delete=False
69
+ )
70
+
71
+ def run(self):
72
+ args = [
73
+ "--in", self.get_input_file_path(),
74
+ "--out", self.get_output_file_path(),
75
+ # The temporary files are already created
76
+ # -> tell Clustal to overwrite these empty files
77
+ "--force",
78
+ # Tree order for get_alignment_order() to work properly
79
+ "--output-order=tree-order",
80
+ ]
81
+ if self.get_seqtype() == "protein":
82
+ args += ["--seqtype", "Protein"]
83
+ else:
84
+ args += ["--seqtype", "DNA"]
85
+ if self._tree is None:
86
+ # ClustalOmega does not like when a tree is set
87
+ # as input and output#
88
+ # -> Only request tree output when not tree is input
89
+ args += [
90
+ "--guidetree-out", self._out_tree_file.name,
91
+ ]
92
+ if not self._mbed:
93
+ args += [
94
+ "--full",
95
+ "--distmat-out", self._out_dist_matrix_file.name
96
+ ]
97
+ if self._dist_matrix is not None:
98
+ # Add the sequence names (0, 1, 2, 3 ...) as first column
99
+ dist_matrix_with_index = np.concatenate(
100
+ (
101
+ np.arange(self._seq_count)[:, np.newaxis],
102
+ self._dist_matrix
103
+ ), axis=1
104
+ )
105
+ np.savetxt(
106
+ self._in_dist_matrix_file.name, dist_matrix_with_index,
107
+ # The first line contains the amount of sequences
108
+ comments = "",
109
+ header = str(self._seq_count),
110
+ # The sequence indices are integers, the rest are floats
111
+ fmt = ["%d"] + ["%.5f"] * self._seq_count
112
+ )
113
+ args += ["--distmat-in", self._in_dist_matrix_file.name]
114
+ if self._tree is not None:
115
+ self._in_tree_file.write(str(self._tree))
116
+ self._in_tree_file.flush()
117
+ args += ["--guidetree-in", self._in_tree_file.name]
118
+ self.set_arguments(args)
119
+ super().run()
120
+
121
+ def evaluate(self):
122
+ super().evaluate()
123
+ if not self._mbed:
124
+ self._dist_matrix = np.loadtxt(
125
+ self._out_dist_matrix_file.name,
126
+ # The first row only contains the number of sequences
127
+ skiprows = 1,
128
+ dtype = float
129
+ )
130
+ # The first column contains only the name of the
131
+ # sequences, in this case 0, 1, 2, 3 ...
132
+ # -> Omit the first column
133
+ self._dist_matrix = self._dist_matrix[:, 1:]
134
+ # Only read output tree if no tree was input
135
+ if self._tree is None:
136
+ self._tree = Tree.from_newick(
137
+ self._out_tree_file.read().replace("\n", "")
138
+ )
139
+
140
+ def clean_up(self):
141
+ super().clean_up()
142
+ cleanup_tempfile(self._in_dist_matrix_file)
143
+ cleanup_tempfile(self._out_dist_matrix_file)
144
+ cleanup_tempfile(self._in_tree_file)
145
+ cleanup_tempfile(self._out_tree_file)
146
+
147
+ @requires_state(AppState.CREATED)
148
+ def full_matrix_calculation(self):
149
+ """
150
+ Use full distance matrix for guide-tree calculation, equivalent
151
+ to the ``--full`` option.
152
+
153
+ This makes the distance matrix calculation slower than using the
154
+ default *mBed* heuristic.
155
+ """
156
+ self._mbed = False
157
+
158
+ @requires_state(AppState.CREATED)
159
+ def set_distance_matrix(self, matrix):
160
+ """
161
+ Set the pairwise sequence distances, the program should use to
162
+ calculate the guide tree.
163
+
164
+ Parameters
165
+ ----------
166
+ matrix : ndarray, shape=(n,n), dtype=float
167
+ The pairwise distances.
168
+ """
169
+ if matrix.shape != (self._seq_count, self._seq_count):
170
+ raise ValueError(
171
+ f"Matrix with shape {matrix.shape} is not sufficient for "
172
+ f"{self._seq_count} sequences"
173
+ )
174
+ self._dist_matrix = matrix.astype(float, copy=False)
175
+
176
+ @requires_state(AppState.JOINED)
177
+ def get_distance_matrix(self):
178
+ """
179
+ Get the pairwise sequence distances the program used to
180
+ calculate the guide tree.
181
+
182
+ Returns
183
+ -------
184
+ matrix : ndarray, shape=(n,n), dtype=float
185
+ The pairwise distances.
186
+ """
187
+ if self._mbed:
188
+ raise ValueError(
189
+ "Getting the distance matrix requires "
190
+ "'full_matrix_calculation()'"
191
+ )
192
+ return self._dist_matrix
193
+
194
+ @requires_state(AppState.CREATED)
195
+ def set_guide_tree(self, tree):
196
+ """
197
+ Set the guide tree, the program should use for the
198
+ progressive alignment.
199
+
200
+ Parameters
201
+ ----------
202
+ tree : Tree
203
+ The guide tree.
204
+ """
205
+ if self._seq_count != len(tree):
206
+ raise ValueError(
207
+ f"Tree with {len(tree)} leaves is not sufficient for "
208
+ "{self._seq_count} sequences, must be equal"
209
+ )
210
+ self._tree = tree
211
+
212
+ @requires_state(AppState.JOINED)
213
+ def get_guide_tree(self):
214
+ """
215
+ Get the guide tree created for the progressive alignment.
216
+
217
+ Returns
218
+ -------
219
+ tree : Tree
220
+ The guide tree.
221
+ """
222
+ return self._tree
223
+
224
+ @staticmethod
225
+ def supports_nucleotide():
226
+ return True
227
+
228
+ @staticmethod
229
+ def supports_protein():
230
+ return True
231
+
232
+ @staticmethod
233
+ def supports_custom_nucleotide_matrix():
234
+ return False
235
+
236
+ @staticmethod
237
+ def supports_custom_protein_matrix():
238
+ return False
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for protein secondary structure annotation using DSSP.
7
+ """
8
+
9
+ __name__ = "biotite.application.dssp"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *