biotite 0.41.1__cp312-cp312-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,222 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.tantan"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["TantanApp"]
8
+
9
+ from collections.abc import Sequence as SequenceABC
10
+ import io
11
+ from tempfile import NamedTemporaryFile
12
+ import numpy as np
13
+ from ..localapp import LocalApp, cleanup_tempfile
14
+ from ..application import AppState, requires_state
15
+ from ...sequence.seqtypes import NucleotideSequence, ProteinSequence
16
+ from ...sequence.alphabet import common_alphabet
17
+ from ...sequence.io.fasta.file import FastaFile
18
+ from ..util import map_sequence, map_matrix
19
+
20
+
21
+ MASKING_LETTER = "!"
22
+
23
+
24
+ class TantanApp(LocalApp):
25
+ r"""
26
+ Mask sequence repeat regions using *tantan*. :footcite:`Frith2011`
27
+
28
+ Parameters
29
+ ----------
30
+ sequence : (list of) NucleotideSequence or ProteinSequence
31
+ The sequence(s) to be masked.
32
+ Either a single sequence or multiple sequences can be masked.
33
+ Masking multiple sequences in a single run decreases the
34
+ run time compared to multiple runs with a single sequence.
35
+ All sequences must be of the same type.
36
+ matrix : SubstitutionMatrix, optional
37
+ The substitution matrix to use for repeat identification.
38
+ A sequence segment is considered to be a repeat of another
39
+ segment, if the substitution score between these segments is
40
+ greater than a threshold value.
41
+ bin_path : str, optional
42
+ Path of the *tantan* binary.
43
+
44
+ References
45
+ ----------
46
+
47
+ .. footbibliography::
48
+
49
+ Examples
50
+ --------
51
+
52
+ >>> sequence = NucleotideSequence("GGCATCGATATATATATATAGTCAA")
53
+ >>> app = TantanApp(sequence)
54
+ >>> app.start()
55
+ >>> app.join()
56
+ >>> repeat_mask = app.get_mask()
57
+ >>> print(repeat_mask)
58
+ [False False False False False False False False False True True True
59
+ True True True True True True True True False False False False
60
+ False]
61
+ >>> print(sequence, "\n" + "".join(["^" if e else " " for e in repeat_mask]))
62
+ GGCATCGATATATATATATAGTCAA
63
+ ^^^^^^^^^^^
64
+ """
65
+
66
+ def __init__(self, sequence, matrix=None, bin_path="tantan"):
67
+ super().__init__(bin_path)
68
+
69
+ if isinstance(sequence, SequenceABC):
70
+ self._as_list = True
71
+ self._sequences = sequence
72
+ else:
73
+ # Convert to list of sequences anyway for consistent handling
74
+ self._as_list = False
75
+ self._sequences = [sequence]
76
+
77
+ self._is_protein = None
78
+ for seq in self._sequences:
79
+ if isinstance(seq, NucleotideSequence):
80
+ if self._is_protein is True:
81
+ # Already protein sequences in the list
82
+ raise ValueError(
83
+ "List of sequences contains mixed "
84
+ "nucleotide and protein sequences"
85
+ )
86
+ self._is_protein = False
87
+ elif isinstance(seq, ProteinSequence):
88
+ if self._is_protein is False:
89
+ # Already nucleotide sequences in the list
90
+ raise ValueError(
91
+ "List of sequences contains mixed "
92
+ "nucleotide and protein sequences"
93
+ )
94
+ self._is_protein = True
95
+ else:
96
+ raise TypeError(
97
+ "A NucleotideSequence or ProteinSequence is required"
98
+ )
99
+
100
+ if matrix is None:
101
+ self._matrix_file = None
102
+ else:
103
+ common_alph = common_alphabet(
104
+ (seq.alphabet for seq in self._sequences)
105
+ )
106
+ if common_alph is None:
107
+ raise ValueError(
108
+ "There is no common alphabet within the sequences"
109
+ )
110
+ if not matrix.get_alphabet1().extends(common_alph):
111
+ raise ValueError(
112
+ "The alphabet of the sequence(s) do not fit the matrix"
113
+ )
114
+ if not matrix.is_symmetric():
115
+ raise ValueError("A symmetric matrix is required")
116
+ self._matrix_file = NamedTemporaryFile(
117
+ "w", suffix=".mat", delete=False
118
+ )
119
+ self._matrix = matrix
120
+
121
+ self._in_file = NamedTemporaryFile("w", suffix=".fa", delete=False)
122
+
123
+
124
+ def run(self):
125
+ FastaFile.write_iter(
126
+ self._in_file,
127
+ (
128
+ (f"sequence_{i:d}", str(seq))
129
+ for i, seq in enumerate(self._sequences)
130
+ )
131
+ )
132
+ self._in_file.flush()
133
+ if self._matrix is not None:
134
+ self._matrix_file.write(str(self._matrix))
135
+ self._matrix_file.flush()
136
+
137
+ args = []
138
+ if self._matrix is not None:
139
+ args += ["-m", self._matrix_file.name]
140
+ if self._is_protein:
141
+ args += ["-p"]
142
+ args += [
143
+ "-x", MASKING_LETTER,
144
+ self._in_file.name
145
+ ]
146
+ self.set_arguments(args)
147
+ super().run()
148
+
149
+
150
+ def evaluate(self):
151
+ super().evaluate()
152
+
153
+ out_file = io.StringIO(self.get_stdout())
154
+ self._masks = []
155
+ encoded_masking_letter = MASKING_LETTER.encode("ASCII")[0]
156
+ for _, masked_seq_string in FastaFile.read_iter(out_file):
157
+ array = np.frombuffer(
158
+ masked_seq_string.encode("ASCII"), dtype=np.ubyte
159
+ )
160
+ self._masks.append(array == encoded_masking_letter)
161
+
162
+
163
+ def clean_up(self):
164
+ super().clean_up()
165
+ cleanup_tempfile(self._in_file)
166
+ if self._matrix_file is not None:
167
+ cleanup_tempfile(self._matrix_file)
168
+
169
+
170
+ @requires_state(AppState.JOINED)
171
+ def get_mask(self):
172
+ """
173
+ Get a boolean mask covering identified repeat regions of each
174
+ input sequence.
175
+
176
+ Returns
177
+ -------
178
+ repeat_mask : (list of) ndarray, shape=(n,), dtype=bool
179
+ A boolean mask that is true for each sequence position that
180
+ is identified as repeat.
181
+ If a list of sequences were given as input, a list of masks
182
+ is returned instead.
183
+ """
184
+ if self._as_list:
185
+ return self._masks
186
+ else:
187
+ return self._masks[0]
188
+
189
+
190
+ @staticmethod
191
+ def mask_repeats(sequence, matrix=None, bin_path="tantan"):
192
+ """
193
+ Mask repeat regions of the given input sequence(s).
194
+
195
+ Parameters
196
+ ----------
197
+ sequence : (list of) NucleotideSequence or ProteinSequence
198
+ The sequence(s) to be masked.
199
+ Either a single sequence or multiple sequences can be masked.
200
+ Masking multiple sequences in a single run decreases the
201
+ run time compared to multiple runs with a single sequence.
202
+ All sequences must be of the same type.
203
+ matrix : SubstitutionMatrix, optional
204
+ The substitution matrix to use for repeat identification.
205
+ A sequence segment is considered to be a repeat of another
206
+ segment, if the substitution score between these segments is
207
+ greater than a threshold value.
208
+ bin_path : str, optional
209
+ Path of the *tantan* binary.
210
+
211
+ Returns
212
+ -------
213
+ repeat_mask : (list of) ndarray, shape=(n,), dtype=bool
214
+ A boolean mask that is true for each sequence position that
215
+ is identified as repeat.
216
+ If a list of sequences were given as input, a list of masks
217
+ is returned instead.
218
+ """
219
+ app = TantanApp(sequence, matrix, bin_path)
220
+ app.start()
221
+ app.join()
222
+ return app.get_mask()
@@ -0,0 +1,59 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["map_sequence", "map_matrix"]
8
+
9
+
10
+ import numpy as np
11
+ from ..sequence.seqtypes import ProteinSequence
12
+ from ..sequence.align.matrix import SubstitutionMatrix
13
+
14
+
15
+ def map_sequence(sequence):
16
+ """
17
+ Map a sequence with an arbitrary alphabet into a
18
+ :class:`ProteinSequence`, in order to support arbitrary sequence
19
+ types in software that can handle protein sequences.
20
+ """
21
+ if len(sequence.alphabet) > len(ProteinSequence.alphabet):
22
+ # Cannot map into a protein sequence if the alphabet
23
+ # has more symbols
24
+ raise TypeError(
25
+ f"The software cannot align sequences of type "
26
+ f"{type(sequence).__name__}: "
27
+ f"Alphabet is too large to be converted into amino "
28
+ f"acid alphabet"
29
+ )
30
+ # Mapping is done by simply taking over the sequence
31
+ # code of the original sequence
32
+ mapped_sequence = ProteinSequence()
33
+ mapped_sequence.code = sequence.code
34
+ return mapped_sequence
35
+
36
+
37
+ def map_matrix(matrix):
38
+ """
39
+ Map a :class:`SubstitutionMatrix` with an arbitrary alphabet into a
40
+ class:`SubstitutionMatrix` for protein sequences, in order to support
41
+ arbitrary sequence types in software that can handle protein
42
+ sequences.
43
+ """
44
+ if matrix is None:
45
+ raise TypeError(
46
+ "A substitution matrix must be provided for custom "
47
+ "sequence types"
48
+ )
49
+ # Create a protein substitution matrix with the values taken
50
+ # from the original matrix
51
+ # All trailing symbols are filled with zeros
52
+ old_length = len(matrix.get_alphabet1())
53
+ new_length = len(ProteinSequence.alphabet)
54
+ new_score_matrix = np.zeros((new_length, new_length))
55
+ new_score_matrix[:old_length, :old_length] = matrix.score_matrix()
56
+ return SubstitutionMatrix(
57
+ ProteinSequence.alphabet, ProteinSequence.alphabet,
58
+ new_score_matrix
59
+ )
@@ -0,0 +1,18 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ A subpackage that provides interfaces to the *ViennaRNA* software
7
+ package.
8
+
9
+ Secondary structures can be predicted using *RNAfold* and plotted using
10
+ *RNAplot*.
11
+ """
12
+
13
+ __name__ = "biotite.application.viennarna"
14
+ __author__ = "Tom David Müller"
15
+
16
+ from .rnaalifold import *
17
+ from .rnafold import *
18
+ from .rnaplot import *
@@ -0,0 +1,304 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.application.viennarna"
6
+ __author__ = "Tom David Müller"
7
+ __all__ = ["RNAalifoldApp"]
8
+
9
+ import copy
10
+ from tempfile import NamedTemporaryFile
11
+ import numpy as np
12
+ from ..application import AppState, requires_state
13
+ from ..localapp import LocalApp, cleanup_tempfile
14
+ from ...sequence.io.fasta import FastaFile, set_alignment
15
+ from ...structure.dotbracket import base_pairs_from_dot_bracket
16
+ from ...structure.bonds import BondList
17
+ from .util import build_constraint_string
18
+
19
+
20
+ class RNAalifoldApp(LocalApp):
21
+ """
22
+ Predict the consensus secondary structure from a ribonucleic acid alignment
23
+ using *ViennaRNA's* *RNAalifold* software.
24
+
25
+ In contrast to :class:`RNAfoldApp`, the energy function includes
26
+ a term that includes coevolution information extracted from an
27
+ alignment in addition to the physical free energy term.
28
+
29
+ Internally this creates a :class:`Popen` instance, which handles
30
+ the execution.
31
+
32
+ Parameters
33
+ ----------
34
+ alignment : Alignment
35
+ An alignment of RNA sequences.
36
+ temperature : int, optional
37
+ The temperature (°C) to be assumed for the energy parameters.
38
+ bin_path : str, optional
39
+ Path of the *RNAalifold* binary.
40
+ """
41
+
42
+ def __init__(self, alignment, temperature=37, bin_path="RNAalifold"):
43
+ super().__init__(bin_path)
44
+ self._alignment = copy.deepcopy(alignment)
45
+ self._temperature = str(temperature)
46
+ self._constraints = None
47
+ self._enforce = None
48
+ self._in_file = NamedTemporaryFile(
49
+ "w", suffix=".fa", delete=False
50
+ )
51
+ self._constraints_file = NamedTemporaryFile(
52
+ "w+", suffix=".constraints", delete=False
53
+ )
54
+
55
+ def run(self):
56
+ # Insert no line breaks
57
+ # -> Extremely high value for characters per line
58
+ fasta_file = FastaFile(chars_per_line=np.iinfo(np.int32).max)
59
+ set_alignment(
60
+ fasta_file, self._alignment,
61
+ seq_names=[str(i) for i in range(len(self._alignment.sequences))]
62
+ )
63
+ fasta_file.write(self._in_file)
64
+ self._in_file.flush()
65
+
66
+ options = [
67
+ "--noPS",
68
+ "-T", self._temperature,
69
+ ]
70
+ if self._enforce is True:
71
+ options.append("--enforceConstraint")
72
+ if self._constraints is not None:
73
+ options.append("-C")
74
+ self._constraints_file.write(self._constraints)
75
+ self._constraints_file.flush()
76
+ self._constraints_file.seek(0)
77
+ self.set_stdin(self._constraints_file)
78
+
79
+ self.set_arguments(options + [self._in_file.name])
80
+ super().run()
81
+
82
+ def clean_up(self):
83
+ super().clean_up()
84
+ cleanup_tempfile(self._in_file)
85
+ cleanup_tempfile(self._constraints_file)
86
+
87
+ def evaluate(self):
88
+ super().evaluate()
89
+ lines = self.get_stdout().splitlines()
90
+ self._consensus = lines[0].strip()
91
+ result = lines[1]
92
+ dotbracket, total_energy = result.split(" ", maxsplit=1)
93
+ # Energy has the form:
94
+ # (<total> = <free> + <covariance>)
95
+ total_energy = total_energy[1:-1]
96
+ energy_contributions = total_energy.split("=")[1].split("+")
97
+ self._free_energy = float(energy_contributions[0])
98
+ self._covariance_energy = float(energy_contributions[1])
99
+ self._dotbracket = dotbracket
100
+
101
+ @requires_state(AppState.CREATED)
102
+ def set_temperature(self, temperature):
103
+ """
104
+ Adjust the energy parameters according to a temperature in
105
+ degrees Celsius.
106
+
107
+ Parameters
108
+ ----------
109
+ temperature : int
110
+ The temperature.
111
+ """
112
+ self._temperature = str(temperature)
113
+
114
+ @requires_state(AppState.CREATED)
115
+ def set_constraints(self, pairs=None, paired=None, unpaired=None,
116
+ downstream=None, upstream=None, enforce=False):
117
+ """
118
+ Add constraints of known paired or unpaired bases to the folding
119
+ algorithm.
120
+
121
+ Constraints forbid pairs conflicting with the respective
122
+ constraint.
123
+
124
+ Parameters
125
+ ----------
126
+ pairs : ndarray, shape=(n,2), dtype=int, optional
127
+ Positions of constrained base pairs.
128
+ paired : ndarray, shape=(n,), dtype=int or dtype=bool, optional
129
+ Positions of bases that are paired with any other base.
130
+ unpaired : ndarray, shape=(n,), dtype=int or dtype=bool, optional
131
+ Positions of bases that are unpaired.
132
+ downstream : ndarray, shape=(n,), dtype=int or dtype=bool, optional
133
+ Positions of bases that are paired with any downstream base.
134
+ upstream : ndarray, shape=(n,), dtype=int or dtype=bool, optional
135
+ Positions of bases that are paired with any upstream base.
136
+ enforce : bool, optional
137
+ If set to true, the given constraints are enforced, i.e. a
138
+ the respective base pairs must form.
139
+ By default (false), a constraint does only forbid formation
140
+ of a pair that would conflict with this constraint.
141
+
142
+ Warnings
143
+ --------
144
+ If a constraint is given for a gap position in the consensus sequence,
145
+ the software may find no base pairs at all.
146
+ """
147
+ self._constraints = build_constraint_string(
148
+ len(self._alignment),
149
+ pairs, paired, unpaired, downstream, upstream
150
+ )
151
+ self._enforce = enforce
152
+
153
+ @requires_state(AppState.JOINED)
154
+ def get_free_energy(self):
155
+ """
156
+ Get the free energy (kcal/mol) of the suggested consensus
157
+ secondary structure.
158
+
159
+ Returns
160
+ -------
161
+ free_energy : float
162
+ The free energy.
163
+
164
+ Notes
165
+ -----
166
+ The total energy of the secondary structure regarding the
167
+ minimization objective is the sum of the free energy and the
168
+ covariance term.
169
+
170
+ See also
171
+ --------
172
+ get_covariance_energy
173
+ """
174
+ return self._free_energy
175
+
176
+ @requires_state(AppState.JOINED)
177
+ def get_covariance_energy(self):
178
+ """
179
+ Get the energy of the artificial covariance term (kcal/mol) of
180
+ the suggested consensus secondary structure.
181
+
182
+ Returns
183
+ -------
184
+ covariance_energy : float
185
+ The energy of the covariance term.
186
+
187
+ Notes
188
+ -----
189
+ The total energy of the secondary structure regarding the
190
+ minimization objective is the sum of the free energy and the
191
+ covariance term.
192
+
193
+ See also
194
+ --------
195
+ get_free_energy
196
+ """
197
+ return self._covariance_energy
198
+
199
+ @requires_state(AppState.JOINED)
200
+ def get_consensus_sequence_string(self):
201
+ """
202
+ Get the consensus sequence.
203
+
204
+ As the consensus may contain gaps, the sequence is returned as
205
+ string.
206
+
207
+ Returns
208
+ -------
209
+ consensus : str
210
+ The consensus sequence.
211
+ """
212
+ return self._consensus
213
+
214
+ @requires_state(AppState.JOINED)
215
+ def get_dot_bracket(self):
216
+ """
217
+ Get the consensus secondary structure in dot bracket notation.
218
+
219
+ Returns
220
+ -------
221
+ dotbracket : str
222
+ The secondary structure in dot bracket notation.
223
+ """
224
+ return self._dotbracket
225
+
226
+ @requires_state(AppState.JOINED)
227
+ def get_base_pairs(self, sequence_index=None):
228
+ """
229
+ Get the base pairs from the suggested secondary structure.
230
+
231
+ Parameters
232
+ ----------
233
+ sequence_index : int, optional
234
+ By default, the base pairs point to positions in the
235
+ alignment.
236
+ If `sequence_index` is set, the returned base pairs point to
237
+ positions in the given sequence, instead.
238
+ The sequence is specified as index in the alignment.
239
+ For example, if the alignment comprises three sequences,
240
+ `sequence_index` is in range 0-2.
241
+
242
+ Returns
243
+ -------
244
+ base_pairs : ndarray, shape=(n,2)
245
+ Each row corresponds to the positions of the bases in the
246
+ alignment.
247
+ If `sequence_index` is set, the positions correspond to the
248
+ given sequence.
249
+ """
250
+ base_pairs = base_pairs_from_dot_bracket(self._dotbracket)
251
+ if sequence_index is not None:
252
+ trace = self._alignment.trace[:, sequence_index]
253
+ # Map base pairs that point to consensus to base pairs that
254
+ # point to given sequence, which is only a subsequence
255
+ # (without gaps) of consensus sequence
256
+ # This is not trivial:
257
+ # The pairs that are not part of the subsequence must be
258
+ # removed and all other pairs need to be shifted
259
+ # To solve this problem a BondList is 'misused', since it
260
+ # is build to solve the same problem on the level of atoms
261
+ # Here the 'bonds' in the BondList are base pairs and the indices
262
+ # are base positions
263
+ pair_list = BondList(len(self._alignment), base_pairs)
264
+ # Remove all pairs that appear in gaps of given sequence
265
+ pair_list = pair_list[trace != -1]
266
+ # Convert back to array of base pairs,
267
+ # remove unused BondType column
268
+ base_pairs = pair_list.as_array()[:,:2]
269
+ return base_pairs
270
+
271
+ @staticmethod
272
+ def compute_secondary_structure(alignment, bin_path="RNAalifold"):
273
+ """
274
+ Predict the secondary structure of a ribonucleic acid sequence
275
+ using *ViennaRNA's* *RNAalifold* software.
276
+
277
+ This is a convenience function, that wraps the
278
+ :class:`RNAalifoldApp` execution.
279
+
280
+ Parameters
281
+ ----------
282
+ alignment : Alignment
283
+ An alignment of RNA sequences.
284
+ bin_path : str, optional
285
+ Path of the *RNAalifold* binary.
286
+
287
+ Returns
288
+ -------
289
+ dotbracket : str
290
+ The secondary structure in dot bracket notation.
291
+ free_energy : float
292
+ The free energy.
293
+ covariance_energy : float
294
+ The energy of the covariance term.
295
+ """
296
+
297
+ app = RNAalifoldApp(alignment, bin_path=bin_path)
298
+ app.start()
299
+ app.join()
300
+ return (
301
+ app.get_dot_bracket(),
302
+ app.get_free_energy(),
303
+ app.get_covariance_energy()
304
+ )