biotite 1.1.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +6 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,163 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.muscle"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Muscle5App"]
8
+
9
+ from biotite.application.application import AppState, VersionError, requires_state
10
+ from biotite.application.localapp import get_version
11
+ from biotite.application.msaapp import MSAApp
12
+
13
+
14
+ class Muscle5App(MSAApp):
15
+ """
16
+ Perform a multiple sequence alignment using MUSCLE version 5.
17
+
18
+ Parameters
19
+ ----------
20
+ sequences : list of Sequence
21
+ The sequences to be aligned.
22
+ bin_path : str, optional
23
+ Path of the MUSCLE binary.
24
+
25
+ See also
26
+ --------
27
+ MuscleApp
28
+
29
+ Notes
30
+ -----
31
+ Alignment ensemble generation is not supported, yet.
32
+
33
+ Examples
34
+ --------
35
+
36
+ >>> seq1 = ProteinSequence("BIQTITE")
37
+ >>> seq2 = ProteinSequence("TITANITE")
38
+ >>> seq3 = ProteinSequence("BISMITE")
39
+ >>> seq4 = ProteinSequence("IQLITE")
40
+ >>> app = Muscle5App([seq1, seq2, seq3, seq4])
41
+ >>> app.start()
42
+ >>> app.join()
43
+ >>> alignment = app.get_alignment()
44
+ >>> print(alignment)
45
+ BI-QTITE
46
+ TITANITE
47
+ BI-SMITE
48
+ -I-QLITE
49
+ """
50
+
51
+ def __init__(self, sequences, bin_path="muscle"):
52
+ major_version = get_version(bin_path, "-version")[0]
53
+ if major_version < 5:
54
+ raise VersionError(
55
+ f"At least Muscle 5 is required, got version {major_version}"
56
+ )
57
+
58
+ super().__init__(sequences, bin_path)
59
+ self._mode = "align"
60
+ self._consiters = None
61
+ self._refineiters = None
62
+ self._n_threads = None
63
+
64
+ @requires_state(AppState.CREATED)
65
+ def set_iterations(self, consistency=None, refinement=None):
66
+ """
67
+ Set the number of iterations for the alignment algorithm.
68
+
69
+ Parameters
70
+ ----------
71
+ consistency : int, optional
72
+ The number of consistency iterations.
73
+ refinement : int, optional
74
+ The number of refinement iterations.
75
+ """
76
+ if consistency is not None:
77
+ self._consiters = consistency
78
+ if refinement is not None:
79
+ self._refineiters = refinement
80
+
81
+ @requires_state(AppState.CREATED)
82
+ def set_thread_number(self, number):
83
+ """
84
+ Set the number of threads for the alignment run.
85
+
86
+ Parameters
87
+ ----------
88
+ number : int, optional
89
+ The number of threads.
90
+ """
91
+ self._n_threads = number
92
+
93
+ @requires_state(AppState.CREATED)
94
+ def use_super5(self):
95
+ """
96
+ Use the *Super5* algorithm for the alignment run.
97
+ """
98
+ self._mode = "super5"
99
+
100
+ def run(self):
101
+ args = [
102
+ f"-{self._mode}",
103
+ self.get_input_file_path(),
104
+ "-output",
105
+ self.get_output_file_path(),
106
+ ]
107
+ if self.get_seqtype() == "protein":
108
+ args += ["-amino"]
109
+ else:
110
+ args += ["-nt"]
111
+ if self._n_threads is not None:
112
+ args += ["-threads", str(self._n_threads)]
113
+ if self._consiters is not None:
114
+ args += ["-consiters", str(self._consiters)]
115
+ if self._refineiters is not None:
116
+ args += ["-refineiters", str(self._refineiters)]
117
+ self.set_arguments(args)
118
+ super().run()
119
+
120
+ def clean_up(self):
121
+ super().clean_up()
122
+
123
+ @staticmethod
124
+ def supports_nucleotide():
125
+ return True
126
+
127
+ @staticmethod
128
+ def supports_protein():
129
+ return True
130
+
131
+ @staticmethod
132
+ def supports_custom_nucleotide_matrix():
133
+ return False
134
+
135
+ @staticmethod
136
+ def supports_custom_protein_matrix():
137
+ return False
138
+
139
+ @classmethod
140
+ def align(cls, sequences, bin_path="muscle"):
141
+ """
142
+ Perform a multiple sequence alignment.
143
+
144
+ This is a convenience function, that wraps the :class:`Muscle5App`
145
+ execution.
146
+
147
+ Parameters
148
+ ----------
149
+ sequences : iterable object of Sequence
150
+ The sequences to be aligned
151
+ bin_path : str, optional
152
+ Path of the MSA software binary. By default, the default path
153
+ will be used.
154
+
155
+ Returns
156
+ -------
157
+ alignment : Alignment
158
+ The global multiple sequence alignment.
159
+ """
160
+ app = cls(sequences, bin_path)
161
+ app.start()
162
+ app.join()
163
+ return app.get_alignment()
@@ -0,0 +1,18 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for obtaining sequencing data from the *NCBI*
7
+ *sequence read archive* (SRA).
8
+
9
+ It comprises two central classes:
10
+ :class:`FastqDumpApp` downloads sequence reads in FASTQ format.
11
+ If only sequences (and no scores) are required :class:`FastaDumpApp`
12
+ writes sequence reads into FASTA format.
13
+ """
14
+
15
+ __name__ = "biotite.application.sra"
16
+ __author__ = "Patrick Kunzmann"
17
+
18
+ from .app import *
@@ -0,0 +1,452 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.sra"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["FastaDumpApp", "FastqDumpApp"]
8
+
9
+ import abc
10
+ import glob
11
+ from os.path import join
12
+ from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
13
+ from tempfile import TemporaryDirectory
14
+ from biotite.application.application import (
15
+ Application,
16
+ AppState,
17
+ AppStateError,
18
+ requires_state,
19
+ )
20
+ from biotite.sequence.io.fasta.convert import get_sequences
21
+ from biotite.sequence.io.fasta.file import FastaFile
22
+ from biotite.sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
23
+ from biotite.sequence.io.fastq.file import FastqFile
24
+ from biotite.sequence.seqtypes import NucleotideSequence
25
+
26
+
27
+ # Do not use LocalApp, as two programs are executed
28
+ class _DumpApp(Application, metaclass=abc.ABCMeta):
29
+ """
30
+ Fetch sequencing data from the *NCBI sequence read archive*
31
+ (SRA) using *sra-tools*.
32
+
33
+ Parameters
34
+ ----------
35
+ uid : str
36
+ A *unique identifier* (UID) of the file to be downloaded.
37
+ output_path_prefix : str, optional
38
+ The prefix of the path to store the downloaded FASTQ file.
39
+ ``.fastq`` is appended to this prefix if the run contains
40
+ a single read per spot.
41
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
42
+ multiple reads per spot.
43
+ By default, the files are created in a temporary directory and
44
+ deleted after the files have been read.
45
+ prefetch_path, fasterq_dump_path : str, optional
46
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
47
+ respectively.
48
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
49
+ This value is subtracted from the FASTQ ASCII code to obtain the
50
+ quality score.
51
+ Can either be directly the value, or a string that indicates
52
+ the score format.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ uid,
58
+ output_path_prefix=None,
59
+ prefetch_path="prefetch",
60
+ fasterq_dump_path="fasterq-dump",
61
+ ):
62
+ super().__init__()
63
+ self._prefetch_path = prefetch_path
64
+ self._fasterq_dump_path = fasterq_dump_path
65
+ self._uid = uid
66
+ self._sra_dir = TemporaryDirectory(suffix="_sra")
67
+ if output_path_prefix is None:
68
+ self._prefix = join(self._sra_dir.name, self._uid)
69
+ else:
70
+ self._prefix = output_path_prefix
71
+ self._prefetch_process = None
72
+ self._fasterq_dump_process = None
73
+
74
+ @requires_state(AppState.RUNNING | AppState.FINISHED)
75
+ def join(self, timeout=None):
76
+ # Override method as repetitive calls of 'is_finished()'
77
+ # are not necessary as 'communicate()' already waits for the
78
+ # finished application
79
+ try:
80
+ _, self._stderr = self._process.communicate(timeout=timeout)
81
+ except TimeoutExpired:
82
+ self.cancel()
83
+ raise TimeoutError(f"The application expired its timeout ({timeout:.1f} s)")
84
+ self._state = AppState.FINISHED
85
+
86
+ try:
87
+ self.evaluate()
88
+ except AppStateError:
89
+ raise
90
+ except:
91
+ self._state = AppState.CANCELLED
92
+ raise
93
+ else:
94
+ self._state = AppState.JOINED
95
+ self.clean_up()
96
+
97
+ def run(self):
98
+ # Prefetch into a temp directory with file name equaling UID
99
+ # This ensures that the ID in the header is not the temp prefix
100
+ sra_file_name = join(self._sra_dir.name, self._uid)
101
+ command = (
102
+ f"{self._prefetch_path} -q -O {self._sra_dir.name} "
103
+ f"{self.get_prefetch_options()} {self._uid}; "
104
+ f"{self._fasterq_dump_path} -q -o {self._prefix}.fastq "
105
+ f"{self.get_fastq_dump_options()} {sra_file_name}"
106
+ )
107
+ self._process = Popen(
108
+ command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
109
+ )
110
+
111
+ def is_finished(self):
112
+ code = self._process.poll()
113
+ if code is None:
114
+ return False
115
+ else:
116
+ _, self._stderr = self._process.communicate()
117
+ return True
118
+
119
+ def evaluate(self):
120
+ super().evaluate()
121
+ # Check if applicaion terminated correctly
122
+ exit_code = self._process.returncode
123
+ if exit_code != 0:
124
+ err_msg = self._stderr.replace("\n", " ")
125
+ raise SubprocessError(
126
+ f"'prefetch' or 'fasterq-dump' returned with exit code "
127
+ f"{exit_code}: {err_msg}"
128
+ )
129
+
130
+ self._file_names = (
131
+ # For entries with one read per spot
132
+ glob.glob(self._prefix + ".fastq")
133
+ +
134
+ # For entries with multiple reads per spot
135
+ glob.glob(self._prefix + "_*.fastq")
136
+ )
137
+ # Only load FASTQ files into memory when needed
138
+ self._fastq_files = None
139
+
140
+ def wait_interval(self):
141
+ # Not used in this implementation of 'join()'
142
+ raise NotImplementedError()
143
+
144
+ def clean_up(self):
145
+ if self.get_app_state() == AppState.CANCELLED:
146
+ self._process.kill()
147
+ # Directory with temp files does not need to be deleted,
148
+ # as temp dir is automatically deleted upon object destruction
149
+
150
+ @requires_state(AppState.CREATED)
151
+ def get_prefetch_options(self):
152
+ """
153
+ Get additional options for the `prefetch` call.
154
+
155
+ PROTECTED: Override when inheriting.
156
+
157
+ Returns
158
+ -------
159
+ options: str
160
+ The additional options.
161
+ """
162
+ return ""
163
+
164
+ @requires_state(AppState.CREATED)
165
+ def get_fastq_dump_options(self):
166
+ """
167
+ Get additional options for the `fasterq-dump` call.
168
+
169
+ PROTECTED: Override when inheriting.
170
+
171
+ Returns
172
+ -------
173
+ options: str
174
+ The additional options.
175
+ """
176
+ return ""
177
+
178
+ @requires_state(AppState.JOINED)
179
+ def get_file_paths(self):
180
+ """
181
+ Get the file paths to the downloaded files.
182
+
183
+ Returns
184
+ -------
185
+ paths : list of str
186
+ The file paths to the downloaded files.
187
+ """
188
+ return self._file_names
189
+
190
+ @requires_state(AppState.JOINED)
191
+ @abc.abstractmethod
192
+ def get_sequences(self):
193
+ """
194
+ Get the sequences from the downloaded file(s).
195
+
196
+ Returns
197
+ -------
198
+ sequences : list of dict (str -> NucleotideSequence)
199
+ This list contains the reads for each spot:
200
+ The first item contains the first read for each spot, the
201
+ second item contains the second read for each spot (if existing),
202
+ etc.
203
+ Each item in the list is a dictionary mapping identifiers to its
204
+ corresponding sequence.
205
+ """
206
+ pass
207
+
208
+
209
+ class FastqDumpApp(_DumpApp):
210
+ """
211
+ Fetch sequencing data from the *NCBI sequence read archive*
212
+ (SRA) using *sra-tools*.
213
+
214
+ Parameters
215
+ ----------
216
+ uid : str
217
+ A *unique identifier* (UID) of the file to be downloaded.
218
+ output_path_prefix : str, optional
219
+ The prefix of the path to store the downloaded FASTQ file.
220
+ ``.fastq`` is appended to this prefix if the run contains
221
+ a single read per spot.
222
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
223
+ multiple reads per spot.
224
+ By default, the files are created in a temporary directory and
225
+ deleted after the files have been read.
226
+ prefetch_path, fasterq_dump_path : str, optional
227
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
228
+ respectively.
229
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
230
+ This value is subtracted from the FASTQ ASCII code to obtain the
231
+ quality score.
232
+ Can either be directly the value, or a string that indicates
233
+ the score format.
234
+ """
235
+
236
+ def __init__(
237
+ self,
238
+ uid,
239
+ output_path_prefix=None,
240
+ prefetch_path="prefetch",
241
+ fasterq_dump_path="fasterq-dump",
242
+ offset="Sanger",
243
+ ):
244
+ super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
245
+ self._offset = offset
246
+ self._fastq_files = None
247
+
248
+ @requires_state(AppState.JOINED)
249
+ def get_fastq(self):
250
+ """
251
+ Get the `FastqFile` objects from the downloaded file(s).
252
+
253
+ Returns
254
+ -------
255
+ fastq_files : list of FastqFile
256
+ This list contains the reads for each spot:
257
+ The first item contains the first read for each spot, the
258
+ second item contains the second read for each spot (if existing),
259
+ etc.
260
+ """
261
+ if self._fastq_files is None:
262
+ self._fastq_files = [
263
+ FastqFile.read(file_name, offset=self._offset)
264
+ for file_name in self.get_file_paths()
265
+ ]
266
+ return self._fastq_files
267
+
268
+ @requires_state(AppState.JOINED)
269
+ def get_sequences(self):
270
+ return [
271
+ {
272
+ header: NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
273
+ for header, (seq_str, _) in fastq_file.items()
274
+ }
275
+ for fastq_file in self.get_fastq()
276
+ ]
277
+
278
+ @requires_state(AppState.JOINED)
279
+ def get_sequences_and_scores(self):
280
+ """
281
+ Get the sequences and score values from the downloaded file(s).
282
+
283
+ Returns
284
+ -------
285
+ sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
286
+ This list contains the reads for each spot:
287
+ The first item contains the first read for each spot, the
288
+ second item contains the second read for each spot (if existing),
289
+ etc.
290
+ Each item in the list is a dictionary mapping identifiers to its
291
+ corresponding sequence and score values.
292
+ """
293
+ return [get_sequences_and_scores(fastq_file) for fastq_file in self.get_fastq()]
294
+
295
+ @classmethod
296
+ def fetch(
297
+ cls,
298
+ uid,
299
+ output_path_prefix=None,
300
+ prefetch_path="prefetch",
301
+ fasterq_dump_path="fasterq-dump",
302
+ offset="Sanger",
303
+ ):
304
+ """
305
+ Get the sequences belonging to the UID from the
306
+ *NCBI sequence read archive* (SRA).
307
+
308
+ Parameters
309
+ ----------
310
+ uid : str
311
+ A *unique identifier* (UID) of the file to be downloaded.
312
+ output_path_prefix : str, optional
313
+ The prefix of the path to store the downloaded FASTQ file.
314
+ ``.fastq`` is appended to this prefix if the run contains
315
+ a single read per spot.
316
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
317
+ multiple reads per spot.
318
+ By default, the files are created in a temporary directory and
319
+ deleted after the files have been read.
320
+ prefetch_path, fasterq_dump_path : str, optional
321
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
322
+ respectively.
323
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
324
+ This value is subtracted from the FASTQ ASCII code to obtain the
325
+ quality score.
326
+ Can either be directly the value, or a string that indicates
327
+ the score format.
328
+
329
+ Returns
330
+ -------
331
+ sequences : list of dict (str -> NucleotideSequence)
332
+ This list contains the reads for each spot:
333
+ The first item contains the first read for each spot, the
334
+ second item contains the second read for each spot (if existing),
335
+ etc.
336
+ Each item in the list is a dictionary mapping identifiers to its
337
+ corresponding sequence.
338
+ """
339
+ app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset)
340
+ app.start()
341
+ app.join()
342
+ return app.get_sequences()
343
+
344
+
345
+ class FastaDumpApp(_DumpApp):
346
+ """
347
+ Fetch sequencing data from the *NCBI sequence read archive*
348
+ (SRA) using *sra-tools*.
349
+
350
+ Parameters
351
+ ----------
352
+ uid : str
353
+ A *unique identifier* (UID) of the file to be downloaded.
354
+ output_path_prefix : str, optional
355
+ The prefix of the path to store the downloaded FASTQ file.
356
+ ``.fastq`` is appended to this prefix if the run contains
357
+ a single read per spot.
358
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
359
+ multiple reads per spot.
360
+ By default, the files are created in a temporary directory and
361
+ deleted after the files have been read.
362
+ prefetch_path, fasterq_dump_path : str, optional
363
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
364
+ respectively.
365
+ """
366
+
367
+ def __init__(
368
+ self,
369
+ uid,
370
+ output_path_prefix=None,
371
+ prefetch_path="prefetch",
372
+ fasterq_dump_path="fasterq-dump",
373
+ ):
374
+ super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
375
+ self._fasta_files = None
376
+
377
+ @requires_state(AppState.CREATED)
378
+ def get_prefetch_options(self):
379
+ return
380
+ # TODO: Use '--eliminate-quals'
381
+ # when https://github.com/ncbi/sra-tools/issues/883 is resolved
382
+ # return "--eliminate-quals"
383
+
384
+ @requires_state(AppState.CREATED)
385
+ def get_fastq_dump_options(self):
386
+ return "--fasta"
387
+
388
+ @requires_state(AppState.JOINED)
389
+ def get_fasta(self):
390
+ """
391
+ Get the `FastaFile` objects from the downloaded file(s).
392
+
393
+ Returns
394
+ -------
395
+ fasta_files : list of FastaFile
396
+ This list contains the reads for each spot:
397
+ The first item contains the first read for each spot, the
398
+ second item contains the second read for each spot (if existing),
399
+ etc.
400
+ """
401
+ if self._fasta_files is None:
402
+ self._fasta_files = [
403
+ FastaFile.read(file_name) for file_name in self.get_file_paths()
404
+ ]
405
+ return self._fasta_files
406
+
407
+ @requires_state(AppState.JOINED)
408
+ def get_sequences(self):
409
+ return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
410
+
411
+ @classmethod
412
+ def fetch(
413
+ cls,
414
+ uid,
415
+ output_path_prefix=None,
416
+ prefetch_path="prefetch",
417
+ fasterq_dump_path="fasterq-dump",
418
+ ):
419
+ """
420
+ Get the sequences belonging to the UID from the
421
+ *NCBI sequence read archive* (SRA).
422
+
423
+ Parameters
424
+ ----------
425
+ uid : str
426
+ A *unique identifier* (UID) of the file to be downloaded.
427
+ output_path_prefix : str, optional
428
+ The prefix of the path to store the downloaded FASTQ file.
429
+ ``.fastq`` is appended to this prefix if the run contains
430
+ a single read per spot.
431
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
432
+ multiple reads per spot.
433
+ By default, the files are created in a temporary directory and
434
+ deleted after the files have been read.
435
+ prefetch_path, fasterq_dump_path : str, optional
436
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
437
+ respectively.
438
+
439
+ Returns
440
+ -------
441
+ sequences : list of dict (str -> NucleotideSequence)
442
+ This list contains the reads for each spot:
443
+ The first item contains the first read for each spot, the
444
+ second item contains the second read for each spot (if existing),
445
+ etc.
446
+ Each item in the list is a dictionary mapping identifiers to its
447
+ corresponding sequence.
448
+ """
449
+ app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
450
+ app.start()
451
+ app.join()
452
+ return app.get_sequences()
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for masking sequence regions using the *tantan* software.
7
+ """
8
+
9
+ __name__ = "biotite.application.tantan"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *