biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,428 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.blast"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["BlastWebApp"]
8
+
9
+ import time
10
+ from xml.etree import ElementTree
11
+ import requests
12
+ from biotite.application.application import AppState, requires_state
13
+ from biotite.application.blast.alignment import BlastAlignment
14
+ from biotite.application.webapp import WebApp
15
+ from biotite.sequence.align.alignment import Alignment
16
+ from biotite.sequence.io.fasta.convert import get_sequence
17
+ from biotite.sequence.io.fasta.file import FastaFile
18
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
19
+ from biotite.sequence.sequence import Sequence
20
+
21
+ _ncbi_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
22
+
23
+
24
+ class BlastWebApp(WebApp):
25
+ """
26
+ Perform a local alignment against a large sequence database using
27
+ using the web-based BLAST application (by default NCBI BLAST).
28
+
29
+ Parameters
30
+ ----------
31
+ program : str
32
+ The specific BLAST program. One of 'blastn', 'megablast',
33
+ 'blastp', 'blastx', 'tblastn' and 'tblastx'.
34
+ query : Sequence or str
35
+ The query sequence. If a string is provided, it is interpreted
36
+ as path to a FASTA file, if the string contains a valid FASTA
37
+ file extension, otherwise it is interpreted as a single letter
38
+ string representation of a sequence.
39
+ database : str, optional
40
+ The NCBI sequence database to blast against. By default it
41
+ contains all sequences (`database`='nr'`).
42
+ app_url : str, optional
43
+ URL of the BLAST web app. By default NCBI BLAST is used.
44
+ This can be changed to a private server or another cloud
45
+ provider.
46
+ obey_rules : bool, optional
47
+ If true, the application raises an :class:`RuleViolationError`,
48
+ if the server is contacted too often, based on the NCBI BLAST
49
+ usage rules.
50
+ mail : str, optional
51
+ If a mail address is provided, it will be appended in the
52
+ HTTP request. This allows the NCBI to contact you in case
53
+ your application sends too many requests.
54
+ """
55
+
56
+ _last_contact = 0
57
+ _last_request = 0
58
+ _contact_delay = 3
59
+ _request_delay = 60
60
+
61
+ def __init__(
62
+ self,
63
+ program,
64
+ query,
65
+ database="nr",
66
+ app_url=_ncbi_url,
67
+ obey_rules=True,
68
+ mail="padix.key@gmail.com",
69
+ ):
70
+ super().__init__(app_url, obey_rules)
71
+
72
+ # 'megablast' is somehow not working
73
+ # When entering the corresponding HTTPS request into a browser
74
+ # you are redirected onto the blast mainpage
75
+ if program not in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]:
76
+ raise ValueError(f"'{program}' is not a valid BLAST program")
77
+ self._program = program
78
+
79
+ requires_protein = program in ["blastp", "tblastn"]
80
+ if isinstance(query, str) and query.endswith((".fa", ".fst", ".fasta")):
81
+ # If string has a file extension, it is interpreted as
82
+ # FASTA file from which the sequence is taken
83
+ file = FastaFile.read(query)
84
+ # Get first entry in file and take the sequence
85
+ # (rather than header)
86
+ self._query = str(get_sequence(file))
87
+ elif isinstance(query, Sequence):
88
+ self._query = str(query)
89
+ else:
90
+ self._query = query
91
+
92
+ # Check for unsuitable symbols in query string
93
+ if requires_protein:
94
+ ref_alphabet = ProteinSequence.alphabet
95
+ else:
96
+ ref_alphabet = NucleotideSequence.alphabet_amb
97
+ for symbol in self._query:
98
+ if symbol.upper() not in ref_alphabet:
99
+ raise ValueError(f"Query sequence contains unsuitable symbol {symbol}")
100
+
101
+ self._database = database
102
+
103
+ self._gap_openining = None
104
+ self._gap_extension = None
105
+ self._word_size = None
106
+
107
+ self._expect_value = None
108
+ self._max_results = None
109
+ self._entrez_query = None
110
+
111
+ self._reward = None
112
+ self._penalty = None
113
+
114
+ self._matrix = None
115
+ self._threshold = None
116
+
117
+ self._mail = mail
118
+ self._rid = None
119
+
120
+ @requires_state(AppState.CREATED)
121
+ def set_entrez_query(self, query):
122
+ """
123
+ Limit the size of the database.
124
+ Only sequences that match the query are searched.
125
+
126
+ Parameters
127
+ ----------
128
+ query : Query
129
+ An NCBI Entrez query.
130
+ """
131
+ self._entrez_query = str(query)
132
+
133
+ @requires_state(AppState.CREATED)
134
+ def set_max_results(self, number):
135
+ """
136
+ Limit the maximum number of results.
137
+
138
+ Parameters
139
+ ----------
140
+ number : int
141
+ The maximum number of results.
142
+ """
143
+ self._max_results = number
144
+
145
+ @requires_state(AppState.CREATED)
146
+ def set_max_expect_value(self, value):
147
+ """
148
+ Set the threshold expectation value (E-value).
149
+ No alignments with an E-value above this threshold will be
150
+ considered.
151
+
152
+ The E-Value is the expectation value for the number of random
153
+ sequences of a similar sized database getting an equal or higher
154
+ score by change when aligned with the query sequence.
155
+
156
+ Parameters
157
+ ----------
158
+ value : float
159
+ The threshold E-value.
160
+ """
161
+ self._expect_value = value
162
+
163
+ @requires_state(AppState.CREATED)
164
+ def set_gap_penalty(self, opening, extension):
165
+ """
166
+ Set the affine gap penalty for the alignment.
167
+
168
+ Parameters
169
+ ----------
170
+ opening : float
171
+ The penalty for gap opening.
172
+ extension : float
173
+ The penalty for gap extension.
174
+ """
175
+ self._gap_openining = opening
176
+ self._gap_extension = extension
177
+
178
+ @requires_state(AppState.CREATED)
179
+ def set_word_size(self, size):
180
+ """
181
+ Set the word size for alignment seeds.
182
+
183
+ Parameters
184
+ ----------
185
+ size : int
186
+ Word size.
187
+ """
188
+ self._word_size = size
189
+
190
+ @requires_state(AppState.CREATED)
191
+ def set_match_reward(self, reward):
192
+ """
193
+ Set the score of a symbol match in the alignment.
194
+
195
+ Used only in 'blastn' and 'megablast'.
196
+
197
+ Parameters
198
+ ----------
199
+ reward : int
200
+ Match reward. Must be positive.
201
+ """
202
+ self._reward = reward
203
+
204
+ @requires_state(AppState.CREATED)
205
+ def set_mismatch_penalty(self, penalty):
206
+ """
207
+ Set the penalty of a symbol mismatch in the alignment.
208
+
209
+ Used only in 'blastn' and 'megablast'.
210
+
211
+ Parameters
212
+ ----------
213
+ penalty : int
214
+ Mismatch penalty. Must be negative.
215
+ """
216
+ self._penalty = penalty
217
+
218
+ @requires_state(AppState.CREATED)
219
+ def set_substitution_matrix(self, matrix_name):
220
+ """
221
+ Set the penalty of a symbol mismatch in the alignment.
222
+
223
+ Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
224
+
225
+ Parameters
226
+ ----------
227
+ matrix_name : str
228
+ Name of the substitution matrix. Default is 'BLOSUM62'.
229
+ """
230
+ self._matrix = matrix_name.upper()
231
+
232
+ @requires_state(AppState.CREATED)
233
+ def set_threshold(self, threshold):
234
+ """
235
+ Set the threshold neighboring score for initial words.
236
+
237
+ Used only in 'blastp', "blastx', 'tblastn' and 'tblastx'.
238
+
239
+ Parameters
240
+ ----------
241
+ threshold : int
242
+ Threshold value. Must be positve.
243
+ """
244
+ self._threshold = threshold
245
+
246
+ def run(self):
247
+ param_dict = {}
248
+ param_dict["tool"] = "Biotite"
249
+ param_dict["email"] = self._mail
250
+ param_dict["CMD"] = "Put"
251
+ param_dict["PROGRAM"] = self._program
252
+ param_dict["QUERY"] = str(self._query)
253
+ param_dict["DATABASE"] = self._database
254
+ if self._entrez_query is not None:
255
+ param_dict["ENTREZ_QUERY"] = self._entrez_query
256
+ if self._max_results is not None:
257
+ param_dict["HITLIST_SIZE"] = str(self._max_results)
258
+ if self._expect_value is not None:
259
+ param_dict["EXPECT"] = self._expect_value
260
+ if self._gap_openining is not None and self._gap_extension is not None:
261
+ param_dict["GAPCOSTS"] = "{:d} {:d}".format(
262
+ self._gap_openining, self._gap_extension
263
+ )
264
+ if self._word_size is not None:
265
+ param_dict["WORD_SIZE"] = self._word_size
266
+
267
+ if self._program in ["blastn", "megablast"]:
268
+ if self._reward is not None:
269
+ param_dict["NUCL_REWARD"] = self._reward
270
+ if self._penalty is not None:
271
+ param_dict["NUCL_PENALTY"] = self._penalty
272
+
273
+ if self._program in ["blastp", "blastx", "tblastn", "tblastx"]:
274
+ if self._matrix is not None:
275
+ param_dict["MATRIX"] = self._matrix
276
+ if self._threshold is not None:
277
+ param_dict["THRESHOLD"] = self._threshold
278
+
279
+ request = requests.get(self.app_url(), params=param_dict)
280
+ if "Submitted URI too large" in request.text:
281
+ raise ValueError("The URI is too large, try a shorter sequence")
282
+ self._contact()
283
+ self._request()
284
+ info_dict = BlastWebApp._get_info(request.text)
285
+ self._rid = info_dict["RID"]
286
+
287
+ def is_finished(self):
288
+ data_dict = {"FORMAT_OBJECT": "SearchInfo", "RID": self._rid, "CMD": "Get"}
289
+ request = requests.get(self.app_url(), params=data_dict)
290
+ self._contact()
291
+ info_dict = BlastWebApp._get_info(request.text)
292
+ if info_dict["Status"] == "UNKNOWN":
293
+ # Indicates invalid query input values
294
+ raise ValueError(
295
+ "The input values seem to be invalid "
296
+ "(Server responsed status 'UNKNOWN')"
297
+ )
298
+ return info_dict["Status"] == "READY"
299
+
300
+ def wait_interval(self):
301
+ # NCBI requires a 3 second delay between server contacts
302
+ return BlastWebApp._contact_delay
303
+
304
+ def clean_up(self):
305
+ param_dict = {}
306
+ param_dict["CMD"] = "Delete"
307
+ param_dict["RID"] = self._rid
308
+ requests.get(self.app_url(), params=param_dict)
309
+
310
+ def evaluate(self):
311
+ param_dict = {}
312
+ param_dict["tool"] = "BiotiteClient"
313
+ if self._mail is not None:
314
+ param_dict["email"] = self._mail
315
+ param_dict["CMD"] = "Get"
316
+ param_dict["RID"] = self._rid
317
+ param_dict["FORMAT_TYPE"] = "XML"
318
+ param_dict["NCBI_GI"] = "T"
319
+ request = requests.get(self.app_url(), params=param_dict)
320
+ self._contact()
321
+
322
+ self._alignments = []
323
+ self._xml_response = request.text
324
+ root = ElementTree.fromstring(self._xml_response)
325
+ # Extract BlastAlignment objects from <Hit> tags
326
+ hit_xpath = "./BlastOutput_iterations/Iteration/Iteration_hits/Hit"
327
+ hits = root.findall(hit_xpath)
328
+ for hit in hits:
329
+ hit_definition = hit.find("Hit_def").text
330
+ hit_id = hit.find("Hit_accession").text
331
+ hsp = hit.find(".Hit_hsps/Hsp")
332
+ score = int(hsp.find("Hsp_score").text)
333
+ e_value = float(hsp.find("Hsp_evalue").text)
334
+ query_begin = int(hsp.find("Hsp_query-from").text)
335
+ query_end = int(hsp.find("Hsp_query-to").text)
336
+ hit_begin = int(hsp.find("Hsp_hit-from").text)
337
+ hit_end = int(hsp.find("Hsp_hit-to").text)
338
+
339
+ seq1_str = hsp.find("Hsp_qseq").text
340
+ seq2_str = hsp.find("Hsp_hseq").text
341
+ if self._program in ["blastn", "megablast"]:
342
+ # NucleotideSequence/ProteinSequence do ignore gaps
343
+ # Gaps are represented by the trace
344
+ seq1, seq2 = [
345
+ NucleotideSequence(s.replace("-", "")) for s in (seq1_str, seq2_str)
346
+ ]
347
+ else:
348
+ seq1, seq2 = [
349
+ ProteinSequence(s.replace("-", "").replace("U", "C"))
350
+ for s in (seq1_str, seq2_str)
351
+ ]
352
+ trace = Alignment.trace_from_strings([seq1_str, seq2_str])
353
+
354
+ alignment = BlastAlignment(
355
+ [seq1, seq2],
356
+ trace,
357
+ score,
358
+ e_value,
359
+ (query_begin, query_end),
360
+ (hit_begin, hit_end),
361
+ hit_id,
362
+ hit_definition,
363
+ )
364
+ self._alignments.append(alignment)
365
+
366
+ @requires_state(AppState.JOINED)
367
+ def get_xml_response(self):
368
+ """
369
+ Get the raw XML response.
370
+
371
+ Returns
372
+ -------
373
+ response : str
374
+ The raw XML response.
375
+ """
376
+ return self._xml_response
377
+
378
+ @requires_state(AppState.JOINED)
379
+ def get_alignments(self):
380
+ """
381
+ Get the resulting local sequence alignments.
382
+
383
+ Returns
384
+ -------
385
+ alignment : list of BlastAlignment
386
+ The local sequence alignments.
387
+ """
388
+ return self._alignments
389
+
390
+ @staticmethod
391
+ def _get_info(text):
392
+ """
393
+ Get the *QBlastInfo* block of the response HTML as dictionary
394
+ """
395
+ lines = [line for line in text.split("\n")]
396
+ info_dict = {}
397
+ in_info_block = False
398
+ for line in lines:
399
+ if "QBlastInfoBegin" in line:
400
+ in_info_block = True
401
+ continue
402
+ if "QBlastInfoEnd" in line:
403
+ in_info_block = False
404
+ continue
405
+ if in_info_block:
406
+ pair = line.split("=")
407
+ info_dict[pair[0].strip()] = pair[1].strip()
408
+ return info_dict
409
+
410
+ def _contact(self):
411
+ """
412
+ Resets the time since the last server contact. Used for
413
+ detecting server rule violation.
414
+ """
415
+ contact = time.time()
416
+ if (contact - BlastWebApp._last_contact) < BlastWebApp._contact_delay:
417
+ self.violate_rule("The server was contacted too often")
418
+ BlastWebApp._last_contact = contact
419
+
420
+ def _request(self):
421
+ """
422
+ Resets the time since the last new alignment request. Used for
423
+ detecting server rule violation.
424
+ """
425
+ request = time.time()
426
+ if (request - BlastWebApp._last_request) < BlastWebApp._request_delay:
427
+ self.violate_rule("Too frequent BLAST requests")
428
+ BlastWebApp._last_request = request
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for multiple sequence alignments using Clustal-Omega.
7
+ """
8
+
9
+ __name__ = "biotite.application.clustalo"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *
@@ -0,0 +1,223 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.clustalo"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["ClustalOmegaApp"]
8
+
9
+ from tempfile import NamedTemporaryFile
10
+ import numpy as np
11
+ from biotite.application.application import AppState, requires_state
12
+ from biotite.application.localapp import cleanup_tempfile
13
+ from biotite.application.msaapp import MSAApp
14
+ from biotite.sequence.phylo.tree import Tree
15
+
16
+
17
+ class ClustalOmegaApp(MSAApp):
18
+ """
19
+ Perform a multiple sequence alignment using Clustal-Omega.
20
+
21
+ Parameters
22
+ ----------
23
+ sequences : list of ProteinSequence or NucleotideSequence
24
+ The sequences to be aligned.
25
+ bin_path : str, optional
26
+ Path of the Custal-Omega binary.
27
+ matrix : None
28
+ This parameter is used for compatibility reasons and is ignored.
29
+
30
+ Examples
31
+ --------
32
+
33
+ >>> seq1 = ProteinSequence("BIQTITE")
34
+ >>> seq2 = ProteinSequence("TITANITE")
35
+ >>> seq3 = ProteinSequence("BISMITE")
36
+ >>> seq4 = ProteinSequence("IQLITE")
37
+ >>> app = ClustalOmegaApp([seq1, seq2, seq3, seq4])
38
+ >>> app.start()
39
+ >>> app.join()
40
+ >>> alignment = app.get_alignment()
41
+ >>> print(alignment)
42
+ -BIQTITE
43
+ TITANITE
44
+ -BISMITE
45
+ --IQLITE
46
+ """
47
+
48
+ def __init__(self, sequences, bin_path="clustalo", matrix=None):
49
+ super().__init__(sequences, bin_path, None)
50
+ self._seq_count = len(sequences)
51
+ self._mbed = True
52
+ self._dist_matrix = None
53
+ self._tree = None
54
+ self._in_dist_matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
55
+ self._out_dist_matrix_file = NamedTemporaryFile(
56
+ "r", suffix=".mat", delete=False
57
+ )
58
+ self._in_tree_file = NamedTemporaryFile("w", suffix=".tree", delete=False)
59
+ self._out_tree_file = NamedTemporaryFile("r", suffix=".tree", delete=False)
60
+
61
+ def run(self):
62
+ args = [
63
+ "--in",
64
+ self.get_input_file_path(),
65
+ "--out",
66
+ self.get_output_file_path(),
67
+ # The temporary files are already created
68
+ # -> tell Clustal to overwrite these empty files
69
+ "--force",
70
+ # Tree order for get_alignment_order() to work properly
71
+ "--output-order=tree-order",
72
+ ]
73
+ if self.get_seqtype() == "protein":
74
+ args += ["--seqtype", "Protein"]
75
+ else:
76
+ args += ["--seqtype", "DNA"]
77
+ if self._tree is None:
78
+ # ClustalOmega does not like when a tree is set
79
+ # as input and output#
80
+ # -> Only request tree output when not tree is input
81
+ args += [
82
+ "--guidetree-out",
83
+ self._out_tree_file.name,
84
+ ]
85
+ if not self._mbed:
86
+ args += ["--full", "--distmat-out", self._out_dist_matrix_file.name]
87
+ if self._dist_matrix is not None:
88
+ # Add the sequence names (0, 1, 2, 3 ...) as first column
89
+ dist_matrix_with_index = np.concatenate(
90
+ (np.arange(self._seq_count)[:, np.newaxis], self._dist_matrix), axis=1
91
+ )
92
+ np.savetxt(
93
+ self._in_dist_matrix_file.name,
94
+ dist_matrix_with_index,
95
+ # The first line contains the amount of sequences
96
+ comments="",
97
+ header=str(self._seq_count),
98
+ # The sequence indices are integers, the rest are floats
99
+ fmt=["%d"] + ["%.5f"] * self._seq_count,
100
+ )
101
+ args += ["--distmat-in", self._in_dist_matrix_file.name]
102
+ if self._tree is not None:
103
+ self._in_tree_file.write(str(self._tree))
104
+ self._in_tree_file.flush()
105
+ args += ["--guidetree-in", self._in_tree_file.name]
106
+ self.set_arguments(args)
107
+ super().run()
108
+
109
+ def evaluate(self):
110
+ super().evaluate()
111
+ if not self._mbed:
112
+ self._dist_matrix = np.loadtxt(
113
+ self._out_dist_matrix_file.name,
114
+ # The first row only contains the number of sequences
115
+ skiprows=1,
116
+ dtype=float,
117
+ )
118
+ # The first column contains only the name of the
119
+ # sequences, in this case 0, 1, 2, 3 ...
120
+ # -> Omit the first column
121
+ self._dist_matrix = self._dist_matrix[:, 1:]
122
+ # Only read output tree if no tree was input
123
+ if self._tree is None:
124
+ self._tree = Tree.from_newick(self._out_tree_file.read().replace("\n", ""))
125
+
126
+ def clean_up(self):
127
+ super().clean_up()
128
+ cleanup_tempfile(self._in_dist_matrix_file)
129
+ cleanup_tempfile(self._out_dist_matrix_file)
130
+ cleanup_tempfile(self._in_tree_file)
131
+ cleanup_tempfile(self._out_tree_file)
132
+
133
+ @requires_state(AppState.CREATED)
134
+ def full_matrix_calculation(self):
135
+ """
136
+ Use full distance matrix for guide-tree calculation, equivalent
137
+ to the ``--full`` option.
138
+
139
+ This makes the distance matrix calculation slower than using the
140
+ default *mBed* heuristic.
141
+ """
142
+ self._mbed = False
143
+
144
+ @requires_state(AppState.CREATED)
145
+ def set_distance_matrix(self, matrix):
146
+ """
147
+ Set the pairwise sequence distances, the program should use to
148
+ calculate the guide tree.
149
+
150
+ Parameters
151
+ ----------
152
+ matrix : ndarray, shape=(n,n), dtype=float
153
+ The pairwise distances.
154
+ """
155
+ if matrix.shape != (self._seq_count, self._seq_count):
156
+ raise ValueError(
157
+ f"Matrix with shape {matrix.shape} is not sufficient for "
158
+ f"{self._seq_count} sequences"
159
+ )
160
+ self._dist_matrix = matrix.astype(float, copy=False)
161
+
162
+ @requires_state(AppState.JOINED)
163
+ def get_distance_matrix(self):
164
+ """
165
+ Get the pairwise sequence distances the program used to
166
+ calculate the guide tree.
167
+
168
+ Returns
169
+ -------
170
+ matrix : ndarray, shape=(n,n), dtype=float
171
+ The pairwise distances.
172
+ """
173
+ if self._mbed:
174
+ raise ValueError(
175
+ "Getting the distance matrix requires 'full_matrix_calculation()'"
176
+ )
177
+ return self._dist_matrix
178
+
179
+ @requires_state(AppState.CREATED)
180
+ def set_guide_tree(self, tree):
181
+ """
182
+ Set the guide tree, the program should use for the
183
+ progressive alignment.
184
+
185
+ Parameters
186
+ ----------
187
+ tree : Tree
188
+ The guide tree.
189
+ """
190
+ if self._seq_count != len(tree):
191
+ raise ValueError(
192
+ f"Tree with {len(tree)} leaves is not sufficient for "
193
+ "{self._seq_count} sequences, must be equal"
194
+ )
195
+ self._tree = tree
196
+
197
+ @requires_state(AppState.JOINED)
198
+ def get_guide_tree(self):
199
+ """
200
+ Get the guide tree created for the progressive alignment.
201
+
202
+ Returns
203
+ -------
204
+ tree : Tree
205
+ The guide tree.
206
+ """
207
+ return self._tree
208
+
209
+ @staticmethod
210
+ def supports_nucleotide():
211
+ return True
212
+
213
+ @staticmethod
214
+ def supports_protein():
215
+ return True
216
+
217
+ @staticmethod
218
+ def supports_custom_nucleotide_matrix():
219
+ return False
220
+
221
+ @staticmethod
222
+ def supports_custom_protein_matrix():
223
+ return False
@@ -0,0 +1,12 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage for protein secondary structure annotation using DSSP.
7
+ """
8
+
9
+ __name__ = "biotite.application.dssp"
10
+ __author__ = "Patrick Kunzmann"
11
+
12
+ from .app import *