biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,163 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.muscle"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["Muscle5App"]
8
+
9
+ from biotite.application.application import AppState, VersionError, requires_state
10
+ from biotite.application.localapp import get_version
11
+ from biotite.application.msaapp import MSAApp
12
+
13
+
14
+ class Muscle5App(MSAApp):
15
+ """
16
+ Perform a multiple sequence alignment using MUSCLE version 5.
17
+
18
+ Parameters
19
+ ----------
20
+ sequences : list of Sequence
21
+ The sequences to be aligned.
22
+ bin_path : str, optional
23
+ Path of the MUSCLE binary.
24
+
25
+ See Also
26
+ --------
27
+ MuscleApp : Interface to MUSCLE version ``<5``.
28
+
29
+ Notes
30
+ -----
31
+ Alignment ensemble generation is not supported, yet.
32
+
33
+ Examples
34
+ --------
35
+
36
+ >>> seq1 = ProteinSequence("BIQTITE")
37
+ >>> seq2 = ProteinSequence("TITANITE")
38
+ >>> seq3 = ProteinSequence("BISMITE")
39
+ >>> seq4 = ProteinSequence("IQLITE")
40
+ >>> app = Muscle5App([seq1, seq2, seq3, seq4])
41
+ >>> app.start()
42
+ >>> app.join()
43
+ >>> alignment = app.get_alignment()
44
+ >>> print(alignment)
45
+ BI-QTITE
46
+ TITANITE
47
+ BI-SMITE
48
+ -I-QLITE
49
+ """
50
+
51
+ def __init__(self, sequences, bin_path="muscle"):
52
+ major_version = get_version(bin_path, "-version")[0]
53
+ if major_version < 5:
54
+ raise VersionError(
55
+ f"At least Muscle 5 is required, got version {major_version}"
56
+ )
57
+
58
+ super().__init__(sequences, bin_path)
59
+ self._mode = "align"
60
+ self._consiters = None
61
+ self._refineiters = None
62
+ self._n_threads = None
63
+
64
+ @requires_state(AppState.CREATED)
65
+ def set_iterations(self, consistency=None, refinement=None):
66
+ """
67
+ Set the number of iterations for the alignment algorithm.
68
+
69
+ Parameters
70
+ ----------
71
+ consistency : int, optional
72
+ The number of consistency iterations.
73
+ refinement : int, optional
74
+ The number of refinement iterations.
75
+ """
76
+ if consistency is not None:
77
+ self._consiters = consistency
78
+ if refinement is not None:
79
+ self._refineiters = refinement
80
+
81
+ @requires_state(AppState.CREATED)
82
+ def set_thread_number(self, number):
83
+ """
84
+ Set the number of threads for the alignment run.
85
+
86
+ Parameters
87
+ ----------
88
+ number : int, optional
89
+ The number of threads.
90
+ """
91
+ self._n_threads = number
92
+
93
+ @requires_state(AppState.CREATED)
94
+ def use_super5(self):
95
+ """
96
+ Use the *Super5* algorithm for the alignment run.
97
+ """
98
+ self._mode = "super5"
99
+
100
+ def run(self):
101
+ args = [
102
+ f"-{self._mode}",
103
+ self.get_input_file_path(),
104
+ "-output",
105
+ self.get_output_file_path(),
106
+ ]
107
+ if self.get_seqtype() == "protein":
108
+ args += ["-amino"]
109
+ else:
110
+ args += ["-nt"]
111
+ if self._n_threads is not None:
112
+ args += ["-threads", str(self._n_threads)]
113
+ if self._consiters is not None:
114
+ args += ["-consiters", str(self._consiters)]
115
+ if self._refineiters is not None:
116
+ args += ["-refineiters", str(self._refineiters)]
117
+ self.set_arguments(args)
118
+ super().run()
119
+
120
+ def clean_up(self):
121
+ super().clean_up()
122
+
123
+ @staticmethod
124
+ def supports_nucleotide():
125
+ return True
126
+
127
+ @staticmethod
128
+ def supports_protein():
129
+ return True
130
+
131
+ @staticmethod
132
+ def supports_custom_nucleotide_matrix():
133
+ return False
134
+
135
+ @staticmethod
136
+ def supports_custom_protein_matrix():
137
+ return False
138
+
139
+ @classmethod
140
+ def align(cls, sequences, bin_path="muscle"):
141
+ """
142
+ Perform a multiple sequence alignment.
143
+
144
+ This is a convenience function, that wraps the :class:`Muscle5App`
145
+ execution.
146
+
147
+ Parameters
148
+ ----------
149
+ sequences : iterable object of Sequence
150
+ The sequences to be aligned.
151
+ bin_path : str, optional
152
+ Path of the MSA software binary. By default, the default path
153
+ will be used.
154
+
155
+ Returns
156
+ -------
157
+ alignment : Alignment
158
+ The global multiple sequence alignment.
159
+ """
160
+ app = cls(sequences, bin_path)
161
+ app.start()
162
+ app.join()
163
+ return app.get_alignment()
@@ -0,0 +1,18 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for obtaining sequencing data from the *NCBI*
7
+ *sequence read archive* (SRA).
8
+
9
+ It comprises two central classes:
10
+ :class:`FastqDumpApp` downloads sequence reads in FASTQ format.
11
+ If only sequences (and no scores) are required :class:`FastaDumpApp`
12
+ writes sequence reads into FASTA format.
13
+ """
14
+
15
+ __name__ = "biotite.application.sra"
16
+ __author__ = "Patrick Kunzmann"
17
+
18
+ from .app import *
@@ -0,0 +1,447 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.sra"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["FastaDumpApp", "FastqDumpApp"]
8
+
9
+ import abc
10
+ import glob
11
+ from os.path import join
12
+ from subprocess import PIPE, Popen, SubprocessError, TimeoutExpired
13
+ from tempfile import TemporaryDirectory
14
+ from biotite.application.application import (
15
+ Application,
16
+ AppState,
17
+ AppStateError,
18
+ requires_state,
19
+ )
20
+ from biotite.sequence.io.fasta.convert import get_sequences
21
+ from biotite.sequence.io.fasta.file import FastaFile
22
+ from biotite.sequence.io.fastq.convert import get_sequences as get_sequences_and_scores
23
+ from biotite.sequence.io.fastq.file import FastqFile
24
+ from biotite.sequence.seqtypes import NucleotideSequence
25
+
26
+
27
+ # Do not use LocalApp, as two programs are executed
28
+ class _DumpApp(Application, metaclass=abc.ABCMeta):
29
+ """
30
+ Fetch sequencing data from the *NCBI sequence read archive*
31
+ (SRA) using *sra-tools*.
32
+
33
+ Parameters
34
+ ----------
35
+ uid : str
36
+ A *unique identifier* (UID) of the file to be downloaded.
37
+ output_path_prefix : str, optional
38
+ The prefix of the path to store the downloaded FASTQ file.
39
+ ``.fastq`` is appended to this prefix if the run contains
40
+ a single read per spot.
41
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
42
+ multiple reads per spot.
43
+ By default, the files are created in a temporary directory and
44
+ deleted after the files have been read.
45
+ prefetch_path, fasterq_dump_path : str, optional
46
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
47
+ respectively.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ uid,
53
+ output_path_prefix=None,
54
+ prefetch_path="prefetch",
55
+ fasterq_dump_path="fasterq-dump",
56
+ ):
57
+ super().__init__()
58
+ self._prefetch_path = prefetch_path
59
+ self._fasterq_dump_path = fasterq_dump_path
60
+ self._uid = uid
61
+ self._sra_dir = TemporaryDirectory(suffix="_sra")
62
+ if output_path_prefix is None:
63
+ self._prefix = join(self._sra_dir.name, self._uid)
64
+ else:
65
+ self._prefix = output_path_prefix
66
+ self._prefetch_process = None
67
+ self._fasterq_dump_process = None
68
+
69
+ @requires_state(AppState.RUNNING | AppState.FINISHED)
70
+ def join(self, timeout=None):
71
+ # Override method as repetitive calls of 'is_finished()'
72
+ # are not necessary as 'communicate()' already waits for the
73
+ # finished application
74
+ try:
75
+ _, self._stderr = self._process.communicate(timeout=timeout)
76
+ except TimeoutExpired:
77
+ self.cancel()
78
+ raise TimeoutError(f"The application expired its timeout ({timeout:.1f} s)")
79
+ self._state = AppState.FINISHED
80
+
81
+ try:
82
+ self.evaluate()
83
+ except AppStateError:
84
+ raise
85
+ except:
86
+ self._state = AppState.CANCELLED
87
+ raise
88
+ else:
89
+ self._state = AppState.JOINED
90
+ self.clean_up()
91
+
92
+ def run(self):
93
+ # Prefetch into a temp directory with file name equaling UID
94
+ # This ensures that the ID in the header is not the temp prefix
95
+ sra_file_name = join(self._sra_dir.name, self._uid)
96
+ command = (
97
+ f"{self._prefetch_path} -q -O {self._sra_dir.name} "
98
+ f"{self.get_prefetch_options()} {self._uid}; "
99
+ f"{self._fasterq_dump_path} -q -o {self._prefix}.fastq "
100
+ f"{self.get_fastq_dump_options()} {sra_file_name}"
101
+ )
102
+ self._process = Popen(
103
+ command, stdout=PIPE, stderr=PIPE, shell=True, encoding="UTF-8"
104
+ )
105
+
106
+ def is_finished(self):
107
+ code = self._process.poll()
108
+ if code is None:
109
+ return False
110
+ else:
111
+ _, self._stderr = self._process.communicate()
112
+ return True
113
+
114
+ def evaluate(self):
115
+ super().evaluate()
116
+ # Check if applicaion terminated correctly
117
+ exit_code = self._process.returncode
118
+ if exit_code != 0:
119
+ err_msg = self._stderr.replace("\n", " ")
120
+ raise SubprocessError(
121
+ f"'prefetch' or 'fasterq-dump' returned with exit code "
122
+ f"{exit_code}: {err_msg}"
123
+ )
124
+
125
+ self._file_names = (
126
+ # For entries with one read per spot
127
+ glob.glob(self._prefix + ".fastq")
128
+ +
129
+ # For entries with multiple reads per spot
130
+ glob.glob(self._prefix + "_*.fastq")
131
+ )
132
+ # Only load FASTQ files into memory when needed
133
+ self._fastq_files = None
134
+
135
+ def wait_interval(self):
136
+ # Not used in this implementation of 'join()'
137
+ raise NotImplementedError()
138
+
139
+ def clean_up(self):
140
+ if self.get_app_state() == AppState.CANCELLED:
141
+ self._process.kill()
142
+ # Directory with temp files does not need to be deleted,
143
+ # as temp dir is automatically deleted upon object destruction
144
+
145
+ @requires_state(AppState.CREATED)
146
+ def get_prefetch_options(self):
147
+ """
148
+ Get additional options for the `prefetch` call.
149
+
150
+ PROTECTED: Override when inheriting.
151
+
152
+ Returns
153
+ -------
154
+ options: str
155
+ The additional options.
156
+ """
157
+ return ""
158
+
159
+ @requires_state(AppState.CREATED)
160
+ def get_fastq_dump_options(self):
161
+ """
162
+ Get additional options for the `fasterq-dump` call.
163
+
164
+ PROTECTED: Override when inheriting.
165
+
166
+ Returns
167
+ -------
168
+ options: str
169
+ The additional options.
170
+ """
171
+ return ""
172
+
173
+ @requires_state(AppState.JOINED)
174
+ def get_file_paths(self):
175
+ """
176
+ Get the file paths to the downloaded files.
177
+
178
+ Returns
179
+ -------
180
+ paths : list of str
181
+ The file paths to the downloaded files.
182
+ """
183
+ return self._file_names
184
+
185
+ @requires_state(AppState.JOINED)
186
+ @abc.abstractmethod
187
+ def get_sequences(self):
188
+ """
189
+ Get the sequences from the downloaded file(s).
190
+
191
+ Returns
192
+ -------
193
+ sequences : list of dict (str -> NucleotideSequence)
194
+ This list contains the reads for each spot:
195
+ The first item contains the first read for each spot, the
196
+ second item contains the second read for each spot (if existing),
197
+ etc.
198
+ Each item in the list is a dictionary mapping identifiers to its
199
+ corresponding sequence.
200
+ """
201
+ pass
202
+
203
+
204
+ class FastqDumpApp(_DumpApp):
205
+ """
206
+ Fetch sequencing data from the *NCBI sequence read archive*
207
+ (SRA) using *sra-tools*.
208
+
209
+ Parameters
210
+ ----------
211
+ uid : str
212
+ A *unique identifier* (UID) of the file to be downloaded.
213
+ output_path_prefix : str, optional
214
+ The prefix of the path to store the downloaded FASTQ file.
215
+ ``.fastq`` is appended to this prefix if the run contains
216
+ a single read per spot.
217
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
218
+ multiple reads per spot.
219
+ By default, the files are created in a temporary directory and
220
+ deleted after the files have been read.
221
+ prefetch_path, fasterq_dump_path : str, optional
222
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
223
+ respectively.
224
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
225
+ This value is subtracted from the FASTQ ASCII code to obtain the
226
+ quality score.
227
+ Can either be directly the value, or a string that indicates
228
+ the score format.
229
+ """
230
+
231
+ def __init__(
232
+ self,
233
+ uid,
234
+ output_path_prefix=None,
235
+ prefetch_path="prefetch",
236
+ fasterq_dump_path="fasterq-dump",
237
+ offset="Sanger",
238
+ ):
239
+ super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
240
+ self._offset = offset
241
+ self._fastq_files = None
242
+
243
+ @requires_state(AppState.JOINED)
244
+ def get_fastq(self):
245
+ """
246
+ Get the `FastqFile` objects from the downloaded file(s).
247
+
248
+ Returns
249
+ -------
250
+ fastq_files : list of FastqFile
251
+ This list contains the reads for each spot:
252
+ The first item contains the first read for each spot, the
253
+ second item contains the second read for each spot (if existing),
254
+ etc.
255
+ """
256
+ if self._fastq_files is None:
257
+ self._fastq_files = [
258
+ FastqFile.read(file_name, offset=self._offset)
259
+ for file_name in self.get_file_paths()
260
+ ]
261
+ return self._fastq_files
262
+
263
+ @requires_state(AppState.JOINED)
264
+ def get_sequences(self):
265
+ return [
266
+ {
267
+ header: NucleotideSequence(seq_str.replace("U", "T").replace("X", "N"))
268
+ for header, (seq_str, _) in fastq_file.items()
269
+ }
270
+ for fastq_file in self.get_fastq()
271
+ ]
272
+
273
+ @requires_state(AppState.JOINED)
274
+ def get_sequences_and_scores(self):
275
+ """
276
+ Get the sequences and score values from the downloaded file(s).
277
+
278
+ Returns
279
+ -------
280
+ sequences_and_scores : list of dict (str -> (NucleotideSequence, ndarray))
281
+ This list contains the reads for each spot:
282
+ The first item contains the first read for each spot, the
283
+ second item contains the second read for each spot (if existing),
284
+ etc.
285
+ Each item in the list is a dictionary mapping identifiers to its
286
+ corresponding sequence and score values.
287
+ """
288
+ return [get_sequences_and_scores(fastq_file) for fastq_file in self.get_fastq()]
289
+
290
+ @classmethod
291
+ def fetch(
292
+ cls,
293
+ uid,
294
+ output_path_prefix=None,
295
+ prefetch_path="prefetch",
296
+ fasterq_dump_path="fasterq-dump",
297
+ offset="Sanger",
298
+ ):
299
+ """
300
+ Get the sequences belonging to the UID from the
301
+ *NCBI sequence read archive* (SRA).
302
+
303
+ Parameters
304
+ ----------
305
+ uid : str
306
+ A *unique identifier* (UID) of the file to be downloaded.
307
+ output_path_prefix : str, optional
308
+ The prefix of the path to store the downloaded FASTQ file.
309
+ ``.fastq`` is appended to this prefix if the run contains
310
+ a single read per spot.
311
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
312
+ multiple reads per spot.
313
+ By default, the files are created in a temporary directory and
314
+ deleted after the files have been read.
315
+ prefetch_path, fasterq_dump_path : str, optional
316
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
317
+ respectively.
318
+ offset : int or {'Sanger', 'Solexa', 'Illumina-1.3', 'Illumina-1.5', 'Illumina-1.8'}, optional
319
+ This value is subtracted from the FASTQ ASCII code to obtain the
320
+ quality score.
321
+ Can either be directly the value, or a string that indicates
322
+ the score format.
323
+
324
+ Returns
325
+ -------
326
+ sequences : list of dict (str -> NucleotideSequence)
327
+ This list contains the reads for each spot:
328
+ The first item contains the first read for each spot, the
329
+ second item contains the second read for each spot (if existing),
330
+ etc.
331
+ Each item in the list is a dictionary mapping identifiers to its
332
+ corresponding sequence.
333
+ """
334
+ app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path, offset)
335
+ app.start()
336
+ app.join()
337
+ return app.get_sequences()
338
+
339
+
340
+ class FastaDumpApp(_DumpApp):
341
+ """
342
+ Fetch sequencing data from the *NCBI sequence read archive*
343
+ (SRA) using *sra-tools*.
344
+
345
+ Parameters
346
+ ----------
347
+ uid : str
348
+ A *unique identifier* (UID) of the file to be downloaded.
349
+ output_path_prefix : str, optional
350
+ The prefix of the path to store the downloaded FASTQ file.
351
+ ``.fastq`` is appended to this prefix if the run contains
352
+ a single read per spot.
353
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
354
+ multiple reads per spot.
355
+ By default, the files are created in a temporary directory and
356
+ deleted after the files have been read.
357
+ prefetch_path, fasterq_dump_path : str, optional
358
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
359
+ respectively.
360
+ """
361
+
362
+ def __init__(
363
+ self,
364
+ uid,
365
+ output_path_prefix=None,
366
+ prefetch_path="prefetch",
367
+ fasterq_dump_path="fasterq-dump",
368
+ ):
369
+ super().__init__(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
370
+ self._fasta_files = None
371
+
372
+ @requires_state(AppState.CREATED)
373
+ def get_prefetch_options(self):
374
+ return
375
+ # TODO: Use '--eliminate-quals'
376
+ # when https://github.com/ncbi/sra-tools/issues/883 is resolved
377
+ # return "--eliminate-quals"
378
+
379
+ @requires_state(AppState.CREATED)
380
+ def get_fastq_dump_options(self):
381
+ return "--fasta"
382
+
383
+ @requires_state(AppState.JOINED)
384
+ def get_fasta(self):
385
+ """
386
+ Get the `FastaFile` objects from the downloaded file(s).
387
+
388
+ Returns
389
+ -------
390
+ fasta_files : list of FastaFile
391
+ This list contains the reads for each spot:
392
+ The first item contains the first read for each spot, the
393
+ second item contains the second read for each spot (if existing),
394
+ etc.
395
+ """
396
+ if self._fasta_files is None:
397
+ self._fasta_files = [
398
+ FastaFile.read(file_name) for file_name in self.get_file_paths()
399
+ ]
400
+ return self._fasta_files
401
+
402
+ @requires_state(AppState.JOINED)
403
+ def get_sequences(self):
404
+ return [get_sequences(fasta_file) for fasta_file in self.get_fasta()]
405
+
406
+ @classmethod
407
+ def fetch(
408
+ cls,
409
+ uid,
410
+ output_path_prefix=None,
411
+ prefetch_path="prefetch",
412
+ fasterq_dump_path="fasterq-dump",
413
+ ):
414
+ """
415
+ Get the sequences belonging to the UID from the
416
+ *NCBI sequence read archive* (SRA).
417
+
418
+ Parameters
419
+ ----------
420
+ uid : str
421
+ A *unique identifier* (UID) of the file to be downloaded.
422
+ output_path_prefix : str, optional
423
+ The prefix of the path to store the downloaded FASTQ file.
424
+ ``.fastq`` is appended to this prefix if the run contains
425
+ a single read per spot.
426
+ ``_1.fastq``, ``_2.fastq``, etc. is appended if it contains
427
+ multiple reads per spot.
428
+ By default, the files are created in a temporary directory and
429
+ deleted after the files have been read.
430
+ prefetch_path, fasterq_dump_path : str, optional
431
+ Path to the ``prefetch_path`` and ``fasterq-dump`` binary,
432
+ respectively.
433
+
434
+ Returns
435
+ -------
436
+ sequences : list of dict (str -> NucleotideSequence)
437
+ This list contains the reads for each spot:
438
+ The first item contains the first read for each spot, the
439
+ second item contains the second read for each spot (if existing),
440
+ etc.
441
+ Each item in the list is a dictionary mapping identifiers to its
442
+ corresponding sequence.
443
+ """
444
+ app = cls(uid, output_path_prefix, prefetch_path, fasterq_dump_path)
445
+ app.start()
446
+ app.join()
447
+ return app.get_sequences()
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for masking sequence regions using the *tantan* software.
7
+ """
8
+
9
+ __name__ = "biotite.application.tantan"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *