biotite 1.5.0__cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-312-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-312-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-312-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-312-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-312-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-312-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-312-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-312-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-312-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-312-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-312-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-312-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-312-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-312-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-312-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-312-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-312-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-312-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-312-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-312-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-312-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,199 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.tantan"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["TantanApp"]
8
+
9
+ import io
10
+ from collections.abc import Sequence as SequenceABC
11
+ from tempfile import NamedTemporaryFile
12
+ import numpy as np
13
+ from biotite.application.application import AppState, requires_state
14
+ from biotite.application.localapp import LocalApp, cleanup_tempfile
15
+ from biotite.sequence.alphabet import common_alphabet
16
+ from biotite.sequence.io.fasta.file import FastaFile
17
+ from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
18
+
19
+ MASKING_LETTER = "!"
20
+
21
+
22
+ class TantanApp(LocalApp):
23
+ r"""
24
+ Mask sequence repeat regions using *tantan*. :footcite:`Frith2011`
25
+
26
+ Parameters
27
+ ----------
28
+ sequence : (list of) NucleotideSequence or ProteinSequence
29
+ The sequence(s) to be masked.
30
+ Either a single sequence or multiple sequences can be masked.
31
+ Masking multiple sequences in a single run decreases the
32
+ run time compared to multiple runs with a single sequence.
33
+ All sequences must be of the same type.
34
+ matrix : SubstitutionMatrix, optional
35
+ The substitution matrix to use for repeat identification.
36
+ A sequence segment is considered to be a repeat of another
37
+ segment, if the substitution score between these segments is
38
+ greater than a threshold value.
39
+ bin_path : str, optional
40
+ Path of the *tantan* binary.
41
+
42
+ References
43
+ ----------
44
+
45
+ .. footbibliography::
46
+
47
+ Examples
48
+ --------
49
+
50
+ >>> sequence = NucleotideSequence("GGCATCGATATATATATATAGTCAA")
51
+ >>> app = TantanApp(sequence)
52
+ >>> app.start()
53
+ >>> app.join()
54
+ >>> repeat_mask = app.get_mask()
55
+ >>> print(repeat_mask)
56
+ [False False False False False False False False False True True True
57
+ True True True True True True True True False False False False
58
+ False]
59
+ >>> print(sequence, "\n" + "".join(["^" if e else " " for e in repeat_mask]))
60
+ GGCATCGATATATATATATAGTCAA
61
+ ^^^^^^^^^^^
62
+ """
63
+
64
+ def __init__(self, sequence, matrix=None, bin_path="tantan"):
65
+ super().__init__(bin_path)
66
+
67
+ if isinstance(sequence, SequenceABC):
68
+ self._as_list = True
69
+ self._sequences = sequence
70
+ else:
71
+ # Convert to list of sequences anyway for consistent handling
72
+ self._as_list = False
73
+ self._sequences = [sequence]
74
+
75
+ self._is_protein = None
76
+ for seq in self._sequences:
77
+ if isinstance(seq, NucleotideSequence):
78
+ if self._is_protein is True:
79
+ # Already protein sequences in the list
80
+ raise ValueError(
81
+ "List of sequences contains mixed "
82
+ "nucleotide and protein sequences"
83
+ )
84
+ self._is_protein = False
85
+ elif isinstance(seq, ProteinSequence):
86
+ if self._is_protein is False:
87
+ # Already nucleotide sequences in the list
88
+ raise ValueError(
89
+ "List of sequences contains mixed "
90
+ "nucleotide and protein sequences"
91
+ )
92
+ self._is_protein = True
93
+ else:
94
+ raise TypeError("A NucleotideSequence or ProteinSequence is required")
95
+
96
+ if matrix is None:
97
+ self._matrix_file = None
98
+ else:
99
+ common_alph = common_alphabet((seq.alphabet for seq in self._sequences))
100
+ if common_alph is None:
101
+ raise ValueError("There is no common alphabet within the sequences")
102
+ if not matrix.get_alphabet1().extends(common_alph):
103
+ raise ValueError(
104
+ "The alphabet of the sequence(s) do not fit the matrix"
105
+ )
106
+ if not matrix.is_symmetric():
107
+ raise ValueError("A symmetric matrix is required")
108
+ self._matrix_file = NamedTemporaryFile("w", suffix=".mat", delete=False)
109
+ self._matrix = matrix
110
+
111
+ self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
112
+
113
+ def run(self):
114
+ FastaFile.write_iter(
115
+ self._in_file,
116
+ ((f"sequence_{i:d}", str(seq)) for i, seq in enumerate(self._sequences)),
117
+ )
118
+ self._in_file.flush()
119
+ if self._matrix is not None:
120
+ self._matrix_file.write(str(self._matrix))
121
+ self._matrix_file.flush()
122
+
123
+ args = []
124
+ if self._matrix is not None:
125
+ args += ["-m", self._matrix_file.name]
126
+ if self._is_protein:
127
+ args += ["-p"]
128
+ args += ["-x", MASKING_LETTER, self._in_file.name]
129
+ self.set_arguments(args)
130
+ super().run()
131
+
132
+ def evaluate(self):
133
+ super().evaluate()
134
+
135
+ out_file = io.StringIO(self.get_stdout())
136
+ self._masks = []
137
+ encoded_masking_letter = MASKING_LETTER.encode("ASCII")[0]
138
+ for _, masked_seq_string in FastaFile.read_iter(out_file):
139
+ array = np.frombuffer(masked_seq_string.encode("ASCII"), dtype=np.ubyte)
140
+ self._masks.append(array == encoded_masking_letter)
141
+
142
+ def clean_up(self):
143
+ super().clean_up()
144
+ cleanup_tempfile(self._in_file)
145
+ if self._matrix_file is not None:
146
+ cleanup_tempfile(self._matrix_file)
147
+
148
+ @requires_state(AppState.JOINED)
149
+ def get_mask(self):
150
+ """
151
+ Get a boolean mask covering identified repeat regions of each
152
+ input sequence.
153
+
154
+ Returns
155
+ -------
156
+ repeat_mask : (list of) ndarray, shape=(n,), dtype=bool
157
+ A boolean mask that is true for each sequence position that
158
+ is identified as repeat.
159
+ If a list of sequences were given as input, a list of masks
160
+ is returned instead.
161
+ """
162
+ if self._as_list:
163
+ return self._masks
164
+ else:
165
+ return self._masks[0]
166
+
167
+ @staticmethod
168
+ def mask_repeats(sequence, matrix=None, bin_path="tantan"):
169
+ """
170
+ Mask repeat regions of the given input sequence(s).
171
+
172
+ Parameters
173
+ ----------
174
+ sequence : (list of) NucleotideSequence or ProteinSequence
175
+ The sequence(s) to be masked.
176
+ Either a single sequence or multiple sequences can be masked.
177
+ Masking multiple sequences in a single run decreases the
178
+ run time compared to multiple runs with a single sequence.
179
+ All sequences must be of the same type.
180
+ matrix : SubstitutionMatrix, optional
181
+ The substitution matrix to use for repeat identification.
182
+ A sequence segment is considered to be a repeat of another
183
+ segment, if the substitution score between these segments is
184
+ greater than a threshold value.
185
+ bin_path : str, optional
186
+ Path of the *tantan* binary.
187
+
188
+ Returns
189
+ -------
190
+ repeat_mask : (list of) ndarray, shape=(n,), dtype=bool
191
+ A boolean mask that is true for each sequence position that
192
+ is identified as repeat.
193
+ If a list of sequences were given as input, a list of masks
194
+ is returned instead.
195
+ """
196
+ app = TantanApp(sequence, matrix, bin_path)
197
+ app.start()
198
+ app.join()
199
+ return app.get_mask()
@@ -0,0 +1,77 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["map_sequence", "map_matrix"]
8
+
9
+
10
+ import numpy as np
11
+ from biotite.sequence.align.matrix import SubstitutionMatrix
12
+ from biotite.sequence.seqtypes import ProteinSequence
13
+
14
+
15
+ def map_sequence(sequence):
16
+ """
17
+ Map a sequence with an arbitrary alphabet into a
18
+ :class:`ProteinSequence`, in order to support arbitrary sequence
19
+ types in software that can handle protein sequences.
20
+
21
+ Parameters
22
+ ----------
23
+ sequence : Sequence
24
+ The sequence to be mapped.
25
+
26
+ Returns
27
+ -------
28
+ mapped_sequence : ProteinSequence
29
+ The mapped sequence.
30
+ """
31
+ if len(sequence.alphabet) > len(ProteinSequence.alphabet):
32
+ # Cannot map into a protein sequence if the alphabet
33
+ # has more symbols
34
+ raise TypeError(
35
+ f"The software cannot align sequences of type "
36
+ f"{type(sequence).__name__}: "
37
+ f"Alphabet is too large to be converted into amino "
38
+ f"acid alphabet"
39
+ )
40
+ # Mapping is done by simply taking over the sequence
41
+ # code of the original sequence
42
+ mapped_sequence = ProteinSequence()
43
+ mapped_sequence.code = sequence.code
44
+ return mapped_sequence
45
+
46
+
47
+ def map_matrix(matrix):
48
+ """
49
+ Map a :class:`SubstitutionMatrix` with an arbitrary alphabet into a
50
+ class:`SubstitutionMatrix` for protein sequences, in order to support
51
+ arbitrary sequence types in software that can handle protein
52
+ sequences.
53
+
54
+ Parameters
55
+ ----------
56
+ matrix : SubstitutionMatrix
57
+ The substitution matrix to be mapped.
58
+
59
+ Returns
60
+ -------
61
+ mapped_matrix : SubstitutionMatrix
62
+ The mapped substitution matrix.
63
+ """
64
+ if matrix is None:
65
+ raise TypeError(
66
+ "A substitution matrix must be provided for custom sequence types"
67
+ )
68
+ # Create a protein substitution matrix with the values taken
69
+ # from the original matrix
70
+ # All trailing symbols are filled with zeros
71
+ old_length = len(matrix.get_alphabet1())
72
+ new_length = len(ProteinSequence.alphabet)
73
+ new_score_matrix = np.zeros((new_length, new_length), dtype=np.int32)
74
+ new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
75
+ return SubstitutionMatrix(
76
+ ProteinSequence.alphabet, ProteinSequence.alphabet, new_score_matrix
77
+ )
@@ -0,0 +1,18 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage that provides interfaces to the *ViennaRNA* software
7
+ package.
8
+
9
+ Secondary structures can be predicted using *RNAfold* and plotted using
10
+ *RNAplot*.
11
+ """
12
+
13
+ __name__ = "biotite.application.viennarna"
14
+ __author__ = "Tom David Müller"
15
+
16
+ from .rnaalifold import *
17
+ from .rnafold import *
18
+ from .rnaplot import *
@@ -0,0 +1,310 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.viennarna"
6
+ __author__ = "Tom David Müller"
7
+ __all__ = ["RNAalifoldApp"]
8
+
9
+ import copy
10
+ from tempfile import NamedTemporaryFile
11
+ import numpy as np
12
+ from biotite.application.application import AppState, requires_state
13
+ from biotite.application.localapp import LocalApp, cleanup_tempfile
14
+ from biotite.application.viennarna.util import build_constraint_string
15
+ from biotite.sequence.io.fasta import FastaFile, set_alignment
16
+ from biotite.structure.bonds import BondList
17
+ from biotite.structure.dotbracket import base_pairs_from_dot_bracket
18
+
19
+
20
+ class RNAalifoldApp(LocalApp):
21
+ """
22
+ Predict the consensus secondary structure from a ribonucleic acid alignment
23
+ using *ViennaRNA's* *RNAalifold* software.
24
+
25
+ In contrast to :class:`RNAfoldApp`, the energy function includes
26
+ a term that includes coevolution information extracted from an
27
+ alignment in addition to the physical free energy term.
28
+
29
+ Internally this creates a :class:`Popen` instance, which handles
30
+ the execution.
31
+
32
+ Parameters
33
+ ----------
34
+ alignment : Alignment
35
+ An alignment of RNA sequences.
36
+ temperature : int, optional
37
+ The temperature (°C) to be assumed for the energy parameters.
38
+ bin_path : str, optional
39
+ Path of the *RNAalifold* binary.
40
+ """
41
+
42
+ def __init__(self, alignment, temperature=37, bin_path="RNAalifold"):
43
+ super().__init__(bin_path)
44
+ self._alignment = copy.deepcopy(alignment)
45
+ self._temperature = str(temperature)
46
+ self._constraints = None
47
+ self._enforce = None
48
+ self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
49
+ self._constraints_file = NamedTemporaryFile(
50
+ "w+", suffix=".constraints", delete=False
51
+ )
52
+
53
+ def run(self):
54
+ # Insert no line breaks
55
+ # -> Extremely high value for characters per line
56
+ fasta_file = FastaFile(chars_per_line=np.iinfo(np.int32).max)
57
+ set_alignment(
58
+ fasta_file,
59
+ self._alignment,
60
+ seq_names=[str(i) for i in range(len(self._alignment.sequences))],
61
+ )
62
+ fasta_file.write(self._in_file)
63
+ self._in_file.flush()
64
+
65
+ options = [
66
+ "--noPS",
67
+ "-T",
68
+ self._temperature,
69
+ ]
70
+ if self._enforce is True:
71
+ options.append("--enforceConstraint")
72
+ if self._constraints is not None:
73
+ options.append("-C")
74
+ self._constraints_file.write(self._constraints)
75
+ self._constraints_file.flush()
76
+ self._constraints_file.seek(0)
77
+ self.set_stdin(self._constraints_file)
78
+
79
+ self.set_arguments(options + [self._in_file.name])
80
+ super().run()
81
+
82
+ def clean_up(self):
83
+ super().clean_up()
84
+ cleanup_tempfile(self._in_file)
85
+ cleanup_tempfile(self._constraints_file)
86
+
87
+ def evaluate(self):
88
+ super().evaluate()
89
+ lines = self.get_stdout().splitlines()
90
+ self._consensus = lines[0].strip()
91
+ result = lines[1]
92
+ dotbracket, total_energy = result.split(" ", maxsplit=1)
93
+ # Energy has the form:
94
+ # (<total> = <free> + <covariance>)
95
+ total_energy = total_energy[1:-1]
96
+ energy_contributions = total_energy.split("=")[1].split("+")
97
+ self._free_energy = float(energy_contributions[0])
98
+ self._covariance_energy = float(energy_contributions[1])
99
+ self._dotbracket = dotbracket
100
+
101
+ @requires_state(AppState.CREATED)
102
+ def set_temperature(self, temperature):
103
+ """
104
+ Adjust the energy parameters according to a temperature in
105
+ degrees Celsius.
106
+
107
+ Parameters
108
+ ----------
109
+ temperature : int
110
+ The temperature.
111
+ """
112
+ self._temperature = str(temperature)
113
+
114
+ @requires_state(AppState.CREATED)
115
+ def set_constraints(
116
+ self,
117
+ pairs=None,
118
+ paired=None,
119
+ unpaired=None,
120
+ downstream=None,
121
+ upstream=None,
122
+ enforce=False,
123
+ ):
124
+ """
125
+ Add constraints of known paired or unpaired bases to the folding
126
+ algorithm.
127
+
128
+ Constraints forbid pairs conflicting with the respective
129
+ constraint.
130
+
131
+ Parameters
132
+ ----------
133
+ pairs : ndarray, shape=(n,2), dtype=int, optional
134
+ Positions of constrained base pairs.
135
+ paired : ndarray, shape=(n,), dtype=int or dtype=bool, optional
136
+ Positions of bases that are paired with any other base.
137
+ unpaired : ndarray, shape=(n,), dtype=int or dtype=bool, optional
138
+ Positions of bases that are unpaired.
139
+ downstream : ndarray, shape=(n,), dtype=int or dtype=bool, optional
140
+ Positions of bases that are paired with any downstream base.
141
+ upstream : ndarray, shape=(n,), dtype=int or dtype=bool, optional
142
+ Positions of bases that are paired with any upstream base.
143
+ enforce : bool, optional
144
+ If set to true, the given constraints are enforced, i.e. a
145
+ the respective base pairs must form.
146
+ By default (false), a constraint does only forbid formation
147
+ of a pair that would conflict with this constraint.
148
+
149
+ Warnings
150
+ --------
151
+ If a constraint is given for a gap position in the consensus sequence,
152
+ the software may find no base pairs at all.
153
+ """
154
+ self._constraints = build_constraint_string(
155
+ len(self._alignment), pairs, paired, unpaired, downstream, upstream
156
+ )
157
+ self._enforce = enforce
158
+
159
+ @requires_state(AppState.JOINED)
160
+ def get_free_energy(self):
161
+ """
162
+ Get the free energy (kcal/mol) of the suggested consensus
163
+ secondary structure.
164
+
165
+ Returns
166
+ -------
167
+ free_energy : float
168
+ The free energy.
169
+
170
+ See Also
171
+ --------
172
+ get_covariance_energy : Get the energy of the artificial covariance term.
173
+
174
+ Notes
175
+ -----
176
+ The total energy of the secondary structure regarding the
177
+ minimization objective is the sum of the free energy and the
178
+ covariance term.
179
+ """
180
+ return self._free_energy
181
+
182
+ @requires_state(AppState.JOINED)
183
+ def get_covariance_energy(self):
184
+ """
185
+ Get the energy of the artificial covariance term (kcal/mol) of
186
+ the suggested consensus secondary structure.
187
+
188
+ Returns
189
+ -------
190
+ covariance_energy : float
191
+ The energy of the covariance term.
192
+
193
+ See Also
194
+ --------
195
+ get_free_energy : Get the free energy.
196
+
197
+ Notes
198
+ -----
199
+ The total energy of the secondary structure regarding the
200
+ minimization objective is the sum of the free energy and the
201
+ covariance term.
202
+ """
203
+ return self._covariance_energy
204
+
205
+ @requires_state(AppState.JOINED)
206
+ def get_consensus_sequence_string(self):
207
+ """
208
+ Get the consensus sequence.
209
+
210
+ As the consensus may contain gaps, the sequence is returned as
211
+ string.
212
+
213
+ Returns
214
+ -------
215
+ consensus : str
216
+ The consensus sequence.
217
+ """
218
+ return self._consensus
219
+
220
+ @requires_state(AppState.JOINED)
221
+ def get_dot_bracket(self):
222
+ """
223
+ Get the consensus secondary structure in dot bracket notation.
224
+
225
+ Returns
226
+ -------
227
+ dotbracket : str
228
+ The secondary structure in dot bracket notation.
229
+ """
230
+ return self._dotbracket
231
+
232
+ @requires_state(AppState.JOINED)
233
+ def get_base_pairs(self, sequence_index=None):
234
+ """
235
+ Get the base pairs from the suggested secondary structure.
236
+
237
+ Parameters
238
+ ----------
239
+ sequence_index : int, optional
240
+ By default, the base pairs point to positions in the
241
+ alignment.
242
+ If `sequence_index` is set, the returned base pairs point to
243
+ positions in the given sequence, instead.
244
+ The sequence is specified as index in the alignment.
245
+ For example, if the alignment comprises three sequences,
246
+ `sequence_index` is in range 0-2.
247
+
248
+ Returns
249
+ -------
250
+ base_pairs : ndarray, shape=(n,2)
251
+ Each row corresponds to the positions of the bases in the
252
+ alignment.
253
+ If `sequence_index` is set, the positions correspond to the
254
+ given sequence.
255
+ """
256
+ base_pairs = base_pairs_from_dot_bracket(self._dotbracket)
257
+ if sequence_index is not None:
258
+ trace = self._alignment.trace[:, sequence_index]
259
+ # Map base pairs that point to consensus to base pairs that
260
+ # point to given sequence, which is only a subsequence
261
+ # (without gaps) of consensus sequence
262
+ # This is not trivial:
263
+ # The pairs that are not part of the subsequence must be
264
+ # removed and all other pairs need to be shifted
265
+ # To solve this problem a BondList is 'misused', since it
266
+ # is build to solve the same problem on the level of atoms
267
+ # Here the 'bonds' in the BondList are base pairs and the indices
268
+ # are base positions
269
+ pair_list = BondList(len(self._alignment), base_pairs)
270
+ # Remove all pairs that appear in gaps of given sequence
271
+ pair_list = pair_list[trace != -1]
272
+ # Convert back to array of base pairs,
273
+ # remove unused BondType column
274
+ base_pairs = pair_list.as_array()[:, :2]
275
+ return base_pairs
276
+
277
+ @staticmethod
278
+ def compute_secondary_structure(alignment, bin_path="RNAalifold"):
279
+ """
280
+ Predict the secondary structure of a ribonucleic acid sequence
281
+ using *ViennaRNA's* *RNAalifold* software.
282
+
283
+ This is a convenience function, that wraps the
284
+ :class:`RNAalifoldApp` execution.
285
+
286
+ Parameters
287
+ ----------
288
+ alignment : Alignment
289
+ An alignment of RNA sequences.
290
+ bin_path : str, optional
291
+ Path of the *RNAalifold* binary.
292
+
293
+ Returns
294
+ -------
295
+ dotbracket : str
296
+ The secondary structure in dot bracket notation.
297
+ free_energy : float
298
+ The free energy.
299
+ covariance_energy : float
300
+ The energy of the covariance term.
301
+ """
302
+
303
+ app = RNAalifoldApp(alignment, bin_path=bin_path)
304
+ app.start()
305
+ app.join()
306
+ return (
307
+ app.get_dot_bracket(),
308
+ app.get_free_energy(),
309
+ app.get_covariance_energy(),
310
+ )