biotite 0.41.1__cp310-cp310-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-310-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-310-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-310-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-310-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-310-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,171 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.muscle"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Muscle5App"]
8
+
9
+ import numbers
10
+ import warnings
11
+ from tempfile import NamedTemporaryFile
12
+ from ..localapp import cleanup_tempfile
13
+ from ..msaapp import MSAApp
14
+ from ..application import AppState, VersionError, requires_state
15
+ from ...sequence.sequence import Sequence
16
+ from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
17
+ from ...sequence.align.matrix import SubstitutionMatrix
18
+ from ...sequence.align.alignment import Alignment
19
+ from ...sequence.phylo.tree import Tree
20
+ from .app3 import get_version
21
+
22
+
23
+ class Muscle5App(MSAApp):
24
+ """
25
+ Perform a multiple sequence alignment using MUSCLE version 5.
26
+
27
+ Parameters
28
+ ----------
29
+ sequences : list of Sequence
30
+ The sequences to be aligned.
31
+ bin_path : str, optional
32
+ Path of the MUSCLE binary.
33
+
34
+ See also
35
+ --------
36
+ MuscleApp
37
+
38
+ Notes
39
+ -----
40
+ Alignment ensemble generation is not supported, yet.
41
+
42
+ Examples
43
+ --------
44
+
45
+ >>> seq1 = ProteinSequence("BIQTITE")
46
+ >>> seq2 = ProteinSequence("TITANITE")
47
+ >>> seq3 = ProteinSequence("BISMITE")
48
+ >>> seq4 = ProteinSequence("IQLITE")
49
+ >>> app = Muscle5App([seq1, seq2, seq3, seq4])
50
+ >>> app.start()
51
+ >>> app.join()
52
+ >>> alignment = app.get_alignment()
53
+ >>> print(alignment)
54
+ BI-QTITE
55
+ TITANITE
56
+ BI-SMITE
57
+ -I-QLITE
58
+ """
59
+
60
+ def __init__(self, sequences, bin_path="muscle"):
61
+ major_version = get_version(bin_path)[0]
62
+ if major_version < 5:
63
+ raise VersionError(
64
+ f"At least Muscle 5 is required, got version {major_version}"
65
+ )
66
+
67
+ super().__init__(sequences, bin_path)
68
+ self._mode = "align"
69
+ self._consiters = None
70
+ self._refineiters = None
71
+ self._n_threads = None
72
+
73
+ @requires_state(AppState.CREATED)
74
+ def set_iterations(self, consistency=None, refinement=None):
75
+ """
76
+ Set the number of iterations for the alignment algorithm.
77
+
78
+ Parameters
79
+ ----------
80
+ consistency : int, optional
81
+ The number of consistency iterations.
82
+ refinement : int, optional
83
+ The number of refinement iterations.
84
+ """
85
+ if consistency is not None:
86
+ self._consiters = consistency
87
+ if refinement is not None:
88
+ self._refineiters = refinement
89
+
90
+ @requires_state(AppState.CREATED)
91
+ def set_thread_number(self, number):
92
+ """
93
+ Set the number of threads for the alignment run.
94
+
95
+ Parameters
96
+ ----------
97
+ number : int, optional
98
+ The number of threads.
99
+ """
100
+ self._n_threads = number
101
+
102
+ @requires_state(AppState.CREATED)
103
+ def use_super5(self):
104
+ """
105
+ Use the *Super5* algorithm for the alignment run.
106
+ """
107
+ self._mode = "super5"
108
+
109
+ def run(self):
110
+ args = [
111
+ f"-{self._mode}",
112
+ self.get_input_file_path(),
113
+ "-output", self.get_output_file_path(),
114
+ ]
115
+ if self.get_seqtype() == "protein":
116
+ args += ["-amino"]
117
+ else:
118
+ args += ["-nt"]
119
+ if self._n_threads is not None:
120
+ args += ["-threads", str(self._n_threads)]
121
+ if self._consiters is not None:
122
+ args += ["-consiters", str(self._consiters)]
123
+ if self._refineiters is not None:
124
+ args += ["-refineiters", str(self._refineiters)]
125
+ self.set_arguments(args)
126
+ super().run()
127
+
128
+ def clean_up(self):
129
+ super().clean_up()
130
+
131
+ @staticmethod
132
+ def supports_nucleotide():
133
+ return True
134
+
135
+ @staticmethod
136
+ def supports_protein():
137
+ return True
138
+
139
+ @staticmethod
140
+ def supports_custom_nucleotide_matrix():
141
+ return False
142
+
143
+ @staticmethod
144
+ def supports_custom_protein_matrix():
145
+ return False
146
+
147
+ @classmethod
148
+ def align(cls, sequences, bin_path="muscle"):
149
+ """
150
+ Perform a multiple sequence alignment.
151
+
152
+ This is a convenience function, that wraps the :class:`Muscle5App`
153
+ execution.
154
+
155
+ Parameters
156
+ ----------
157
+ sequences : iterable object of Sequence
158
+ The sequences to be aligned
159
+ bin_path : str, optional
160
+ Path of the MSA software binary. By default, the default path
161
+ will be used.
162
+
163
+ Returns
164
+ -------
165
+ alignment : Alignment
166
+ The global multiple sequence alignment.
167
+ """
168
+ app = cls(sequences, bin_path)
169
+ app.start()
170
+ app.join()
171
+ return app.get_alignment()
@@ -0,0 +1,18 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for obtaining sequencing data from the *NCBI*
7
+ *sequence read archive* (SRA).
8
+
9
+ It comprises two central classes:
10
+ :class:`FastqDumpApp` downloads sequence reads in FASTQ format.
11
+ If only sequences (and no scores) are required :class:`FastaDumpApp`
12
+ writes sequence reads into FASTA format.
13
+ """
14
+
15
+ __name__ = "biotite.application.sra"
16
+ __author__ = "Patrick Kunzmann"
17
+
18
+ from .app import *
@@ -0,0 +1,456 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.sra"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["FastaDumpApp", "FastqDumpApp"]
8
+
9
+ import abc
10
+ from os.path import join
11
+ from subprocess import Popen, SubprocessError, PIPE, TimeoutExpired
12
+ import glob
13
+ from tempfile import TemporaryDirectory
14
+ from ..application import Application, AppState, AppStateError, \
15
+ requires_state
16
+ from ...sequence.seqtypes import NucleotideSequence
17
+ from ...sequence.io.fastq.file import FastqFile
18
+ from ...sequence.io.fasta.file import FastaFile
19
+ from ...sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
20
+ from ...sequence.io.fasta.convert import get_sequences
21
+
22
+
23
+ # Do not use LocalApp, as two programs are executed
24
+ class _DumpApp(Application, metaclass=abc.ABCMeta):
25
+ """
26
+ Fetch sequencing data from the *NCBI sequence read archive*
27
+ (SRA) using *sra-tools*.
28
+
29
+ Parameters
30
+ ----------
31
+ uid : str
32
+ A *unique identifier* (UID) of the file to be downloaded.
33
+ output_path_prefix : str, optional
34
+ The prefix of the path to store the downloaded FASTQ file.
35
+ ``.fastq`` is appended to this prefix if the run contains
36
+ a single read per spot.
37
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
38
+ multiple reads per spot.
39
+ By default, the files are created in a temporary directory and
40
+ deleted after the files have been read.
41
+ prefetch_path, fasterq_dump_path : str, optional
42
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
43
+ respectively.
44
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
45
+ This value is subtracted from the FASTQ ASCII code to obtain the
46
+ quality score.
47
+ Can either be directly the value, or a string that indicates
48
+ the score format.
49
+ """
50
+
51
+ def __init__(self, uid, output_path_prefix=None,
52
+ prefetch_path="prefetch", fasterq_dump_path="fasterq-dump"):
53
+ super().__init__()
54
+ self._prefetch_path = prefetch_path
55
+ self._fasterq_dump_path = fasterq_dump_path
56
+ self._uid = uid
57
+ self._sra_dir = TemporaryDirectory(suffix="_sra")
58
+ if output_path_prefix is None:
59
+ self._prefix = join(self._sra_dir.name, self._uid)
60
+ else:
61
+ self._prefix = output_path_prefix
62
+ self._prefetch_process = None
63
+ self._fasterq_dump_process = None
64
+
65
+
66
+ @requires_state(AppState.RUNNING | AppState.FINISHED)
67
+ def join(self, timeout=None):
68
+ # Override method as repetitive calls of 'is_finished()'
69
+ # are not necessary as 'communicate()' already waits for the
70
+ # finished application
71
+ try:
72
+ _, self._stderr = self._process.communicate(
73
+ timeout=timeout
74
+ )
75
+ except TimeoutExpired:
76
+ self.cancel()
77
+ raise TimeoutError(
78
+ f"The application expired its timeout ({timeout:.1f} s)"
79
+ )
80
+ self._state = AppState.FINISHED
81
+
82
+ try:
83
+ self.evaluate()
84
+ except AppStateError:
85
+ raise
86
+ except:
87
+ self._state = AppState.CANCELLED
88
+ raise
89
+ else:
90
+ self._state = AppState.JOINED
91
+ self.clean_up()
92
+
93
+
94
+ def run(self):
95
+ # Prefetch into a temp directory with file name equaling UID
96
+ # This ensures that the ID in the header is not the temp prefix
97
+ sra_file_name = join(self._sra_dir.name, self._uid)
98
+ command = (
99
+ f"{self._prefetch_path} -q -O {self._sra_dir.name} "
100
+ f"{self.get_prefetch_options()} {self._uid}; "
101
+ f"{self._fasterq_dump_path} -q -o {self._prefix}.fastq "
102
+ f"{self.get_fastq_dump_options()} {sra_file_name}"
103
+ )
104
+ self._process = Popen(
105
+ command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
106
+ )
107
+
108
+
109
+ def is_finished(self):
110
+ code = self._process.poll()
111
+ if code == None:
112
+ return False
113
+ else:
114
+ _, self._stderr = self._process.communicate()
115
+ return True
116
+
117
+
118
+ def evaluate(self):
119
+ super().evaluate()
120
+ # Check if applicaion terminated correctly
121
+ exit_code = self._process.returncode
122
+ if exit_code != 0:
123
+ err_msg = self._stderr.replace("\n", " ")
124
+ raise SubprocessError(
125
+ f"'prefetch' or 'fasterq-dump' returned with exit code "
126
+ f"{exit_code}: {err_msg}"
127
+ )
128
+
129
+ self._file_names = (
130
+ # For entries with one read per spot
131
+ glob.glob(self._prefix + ".fastq") +
132
+ # For entries with multiple reads per spot
133
+ glob.glob(self._prefix + "_*.fastq")
134
+ )
135
+ # Only load FASTQ files into memory when needed
136
+ self._fastq_files = None
137
+
138
+
139
+ def wait_interval(self):
140
+ # Not used in this implementation of 'join()'
141
+ raise NotImplementedError()
142
+
143
+
144
+ def clean_up(self):
145
+ if self.get_app_state() == AppState.CANCELLED:
146
+ self._process.kill()
147
+ # Directory with temp files does not need to be deleted,
148
+ # as temp dir is automatically deleted upon object destruction
149
+
150
+
151
+ @requires_state(AppState.CREATED)
152
+ def get_prefetch_options(self):
153
+ """
154
+ Get additional options for the `prefetch` call.
155
+
156
+ PROTECTED: Override when inheriting.
157
+
158
+ Returns
159
+ -------
160
+ options: str
161
+ The additional options.
162
+ """
163
+ return ""
164
+
165
+ @requires_state(AppState.CREATED)
166
+ def get_fastq_dump_options(self):
167
+ """
168
+ Get additional options for the `fasterq-dump` call.
169
+
170
+ PROTECTED: Override when inheriting.
171
+
172
+ Returns
173
+ -------
174
+ options: str
175
+ The additional options.
176
+ """
177
+ return ""
178
+
179
+
180
+ @requires_state(AppState.JOINED)
181
+ def get_file_paths(self):
182
+ """
183
+ Get the file paths to the downloaded files.
184
+
185
+ Returns
186
+ -------
187
+ paths : list of str
188
+ The file paths to the downloaded files.
189
+ """
190
+ return self._file_names
191
+
192
+
193
+ @requires_state(AppState.JOINED)
194
+ @abc.abstractmethod
195
+ def get_sequences(self):
196
+ """
197
+ Get the sequences from the downloaded file(s).
198
+
199
+ Returns
200
+ -------
201
+ sequences : list of dict (str -> NucleotideSequence)
202
+ This list contains the reads for each spot:
203
+ The first item contains the first read for each spot, the
204
+ second item contains the second read for each spot (if existing),
205
+ etc.
206
+ Each item in the list is a dictionary mapping identifiers to its
207
+ corresponding sequence.
208
+ """
209
+ pass
210
+
211
+
212
+ class FastqDumpApp(_DumpApp):
213
+ """
214
+ Fetch sequencing data from the *NCBI sequence read archive*
215
+ (SRA) using *sra-tools*.
216
+
217
+ Parameters
218
+ ----------
219
+ uid : str
220
+ A *unique identifier* (UID) of the file to be downloaded.
221
+ output_path_prefix : str, optional
222
+ The prefix of the path to store the downloaded FASTQ file.
223
+ ``.fastq`` is appended to this prefix if the run contains
224
+ a single read per spot.
225
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
226
+ multiple reads per spot.
227
+ By default, the files are created in a temporary directory and
228
+ deleted after the files have been read.
229
+ prefetch_path, fasterq_dump_path : str, optional
230
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
231
+ respectively.
232
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
233
+ This value is subtracted from the FASTQ ASCII code to obtain the
234
+ quality score.
235
+ Can either be directly the value, or a string that indicates
236
+ the score format.
237
+ """
238
+
239
+ def __init__(self, uid, output_path_prefix=None, prefetch_path="prefetch",
240
+ fasterq_dump_path="fasterq-dump", offset="Sanger"):
241
+ super().__init__(
242
+ uid, output_path_prefix, prefetch_path, fasterq_dump_path
243
+ )
244
+ self._offset = offset
245
+ self._fastq_files = None
246
+
247
+
248
+ @requires_state(AppState.JOINED)
249
+ def get_fastq(self):
250
+ """
251
+ Get the `FastqFile` objects from the downloaded file(s).
252
+
253
+ Returns
254
+ -------
255
+ fastq_files : list of FastqFile
256
+ This list contains the reads for each spot:
257
+ The first item contains the first read for each spot, the
258
+ second item contains the second read for each spot (if existing),
259
+ etc.
260
+ """
261
+ if self._fastq_files is None:
262
+ self._fastq_files = [
263
+ FastqFile.read(file_name, offset=self._offset)
264
+ for file_name in self.get_file_paths()
265
+ ]
266
+ return self._fastq_files
267
+
268
+
269
+ @requires_state(AppState.JOINED)
270
+ def get_sequences(self):
271
+ return [
272
+ {
273
+ header: NucleotideSequence(
274
+ seq_str.replace("U","T").replace("X","N")
275
+ )
276
+ for header, (seq_str, _) in fastq_file.items()
277
+ }
278
+ for fastq_file in self.get_fastq()
279
+ ]
280
+
281
+
282
+ @requires_state(AppState.JOINED)
283
+ def get_sequences_and_scores(self):
284
+ """
285
+ Get the sequences and score values from the downloaded file(s).
286
+
287
+ Returns
288
+ -------
289
+ sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
290
+ This list contains the reads for each spot:
291
+ The first item contains the first read for each spot, the
292
+ second item contains the second read for each spot (if existing),
293
+ etc.
294
+ Each item in the list is a dictionary mapping identifiers to its
295
+ corresponding sequence and score values.
296
+ """
297
+ return [
298
+ get_sequences_and_scores(fastq_file)
299
+ for fastq_file in self.get_fastq()
300
+ ]
301
+
302
+
303
+ @classmethod
304
+ def fetch(cls, uid, output_path_prefix=None, prefetch_path="prefetch",
305
+ fasterq_dump_path="fasterq-dump", offset="Sanger"):
306
+ """
307
+ Get the sequences belonging to the UID from the
308
+ *NCBI sequence read archive* (SRA).
309
+
310
+ Parameters
311
+ ----------
312
+ uid : str
313
+ A *unique identifier* (UID) of the file to be downloaded.
314
+ output_path_prefix : str, optional
315
+ The prefix of the path to store the downloaded FASTQ file.
316
+ ``.fastq`` is appended to this prefix if the run contains
317
+ a single read per spot.
318
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
319
+ multiple reads per spot.
320
+ By default, the files are created in a temporary directory and
321
+ deleted after the files have been read.
322
+ prefetch_path, fasterq_dump_path : str, optional
323
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
324
+ respectively.
325
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
326
+ This value is subtracted from the FASTQ ASCII code to obtain the
327
+ quality score.
328
+ Can either be directly the value, or a string that indicates
329
+ the score format.
330
+
331
+ Returns
332
+ -------
333
+ sequences : list of dict (str -> NucleotideSequence)
334
+ This list contains the reads for each spot:
335
+ The first item contains the first read for each spot, the
336
+ second item contains the second read for each spot (if existing),
337
+ etc.
338
+ Each item in the list is a dictionary mapping identifiers to its
339
+ corresponding sequence.
340
+ """
341
+ app = cls(
342
+ uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset
343
+ )
344
+ app.start()
345
+ app.join()
346
+ return app.get_sequences()
347
+
348
+
349
+ class FastaDumpApp(_DumpApp):
350
+ """
351
+ Fetch sequencing data from the *NCBI sequence read archive*
352
+ (SRA) using *sra-tools*.
353
+
354
+ Parameters
355
+ ----------
356
+ uid : str
357
+ A *unique identifier* (UID) of the file to be downloaded.
358
+ output_path_prefix : str, optional
359
+ The prefix of the path to store the downloaded FASTQ file.
360
+ ``.fastq`` is appended to this prefix if the run contains
361
+ a single read per spot.
362
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
363
+ multiple reads per spot.
364
+ By default, the files are created in a temporary directory and
365
+ deleted after the files have been read.
366
+ prefetch_path, fasterq_dump_path : str, optional
367
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
368
+ respectively.
369
+ """
370
+
371
+ def __init__(self, uid, output_path_prefix=None, prefetch_path="prefetch",
372
+ fasterq_dump_path="fasterq-dump"):
373
+ super().__init__(
374
+ uid, output_path_prefix, prefetch_path, fasterq_dump_path
375
+ )
376
+ self._fasta_files = None
377
+
378
+
379
+ @requires_state(AppState.CREATED)
380
+ def get_prefetch_options(self):
381
+ return
382
+ # TODO: Use '--eliminate-quals'
383
+ # when https://github.com/ncbi/sra-tools/issues/883 is resolved
384
+ # return "--eliminate-quals"
385
+
386
+
387
+ @requires_state(AppState.CREATED)
388
+ def get_fastq_dump_options(self):
389
+ return "--fasta"
390
+
391
+
392
+ @requires_state(AppState.JOINED)
393
+ def get_fasta(self):
394
+ """
395
+ Get the `FastaFile` objects from the downloaded file(s).
396
+
397
+ Returns
398
+ -------
399
+ fasta_files : list of FastaFile
400
+ This list contains the reads for each spot:
401
+ The first item contains the first read for each spot, the
402
+ second item contains the second read for each spot (if existing),
403
+ etc.
404
+ """
405
+ if self._fasta_files is None:
406
+ self._fasta_files = [
407
+ FastaFile.read(file_name)
408
+ for file_name in self.get_file_paths()
409
+ ]
410
+ return self._fasta_files
411
+
412
+
413
+ @requires_state(AppState.JOINED)
414
+ def get_sequences(self):
415
+ return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
416
+
417
+
418
+ @classmethod
419
+ def fetch(cls, uid, output_path_prefix=None, prefetch_path="prefetch",
420
+ fasterq_dump_path="fasterq-dump"):
421
+ """
422
+ Get the sequences belonging to the UID from the
423
+ *NCBI sequence read archive* (SRA).
424
+
425
+ Parameters
426
+ ----------
427
+ uid : str
428
+ A *unique identifier* (UID) of the file to be downloaded.
429
+ output_path_prefix : str, optional
430
+ The prefix of the path to store the downloaded FASTQ file.
431
+ ``.fastq`` is appended to this prefix if the run contains
432
+ a single read per spot.
433
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
434
+ multiple reads per spot.
435
+ By default, the files are created in a temporary directory and
436
+ deleted after the files have been read.
437
+ prefetch_path, fasterq_dump_path : str, optional
438
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
439
+ respectively.
440
+
441
+ Returns
442
+ -------
443
+ sequences : list of dict (str -> NucleotideSequence)
444
+ This list contains the reads for each spot:
445
+ The first item contains the first read for each spot, the
446
+ second item contains the second read for each spot (if existing),
447
+ etc.
448
+ Each item in the list is a dictionary mapping identifiers to its
449
+ corresponding sequence.
450
+ """
451
+ app = cls(
452
+ uid, output_path_prefix, prefetch_path, fasterq_dump_path
453
+ )
454
+ app.start()
455
+ app.join()
456
+ return app.get_sequences()
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for masking sequence regions using the *tantan* software.
7
+ """
8
+
9
+ __name__ = "biotite.application.tantan"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *