biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (354) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +216 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +447 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +77 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +208 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/afdb/__init__.py +12 -0
  34. biotite/database/afdb/download.py +197 -0
  35. biotite/database/entrez/__init__.py +15 -0
  36. biotite/database/entrez/check.py +60 -0
  37. biotite/database/entrez/dbnames.py +101 -0
  38. biotite/database/entrez/download.py +228 -0
  39. biotite/database/entrez/key.py +44 -0
  40. biotite/database/entrez/query.py +263 -0
  41. biotite/database/error.py +16 -0
  42. biotite/database/pubchem/__init__.py +21 -0
  43. biotite/database/pubchem/download.py +258 -0
  44. biotite/database/pubchem/error.py +30 -0
  45. biotite/database/pubchem/query.py +819 -0
  46. biotite/database/pubchem/throttle.py +98 -0
  47. biotite/database/rcsb/__init__.py +13 -0
  48. biotite/database/rcsb/download.py +161 -0
  49. biotite/database/rcsb/query.py +963 -0
  50. biotite/database/uniprot/__init__.py +13 -0
  51. biotite/database/uniprot/check.py +40 -0
  52. biotite/database/uniprot/download.py +126 -0
  53. biotite/database/uniprot/query.py +292 -0
  54. biotite/file.py +244 -0
  55. biotite/interface/__init__.py +19 -0
  56. biotite/interface/openmm/__init__.py +20 -0
  57. biotite/interface/openmm/state.py +93 -0
  58. biotite/interface/openmm/system.py +227 -0
  59. biotite/interface/pymol/__init__.py +201 -0
  60. biotite/interface/pymol/cgo.py +346 -0
  61. biotite/interface/pymol/convert.py +185 -0
  62. biotite/interface/pymol/display.py +267 -0
  63. biotite/interface/pymol/object.py +1228 -0
  64. biotite/interface/pymol/shapes.py +178 -0
  65. biotite/interface/pymol/startup.py +169 -0
  66. biotite/interface/rdkit/__init__.py +19 -0
  67. biotite/interface/rdkit/mol.py +490 -0
  68. biotite/interface/version.py +94 -0
  69. biotite/interface/warning.py +19 -0
  70. biotite/sequence/__init__.py +84 -0
  71. biotite/sequence/align/__init__.py +199 -0
  72. biotite/sequence/align/alignment.py +702 -0
  73. biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
  74. biotite/sequence/align/banded.pyx +652 -0
  75. biotite/sequence/align/buckets.py +71 -0
  76. biotite/sequence/align/cigar.py +425 -0
  77. biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
  78. biotite/sequence/align/kmeralphabet.pyx +595 -0
  79. biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
  80. biotite/sequence/align/kmersimilarity.pyx +233 -0
  81. biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
  82. biotite/sequence/align/kmertable.pyx +3411 -0
  83. biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
  84. biotite/sequence/align/localgapped.pyx +892 -0
  85. biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
  86. biotite/sequence/align/localungapped.pyx +279 -0
  87. biotite/sequence/align/matrix.py +631 -0
  88. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  89. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  94. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  95. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  99. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  100. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  101. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  102. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  103. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  104. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  105. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  106. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  107. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  108. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  109. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  110. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  111. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  112. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  113. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  114. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  115. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  116. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  117. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  118. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  119. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  120. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  121. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  122. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  155. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  156. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  157. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  158. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  159. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  160. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  161. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  162. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  163. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  164. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  165. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  166. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  167. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  168. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  169. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  170. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  171. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  172. biotite/sequence/align/matrix_data/PB.license +21 -0
  173. biotite/sequence/align/matrix_data/PB.mat +18 -0
  174. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  175. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  176. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  177. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  178. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  179. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  180. biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
  181. biotite/sequence/align/multiple.pyx +619 -0
  182. biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
  183. biotite/sequence/align/pairwise.pyx +585 -0
  184. biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
  185. biotite/sequence/align/permutation.pyx +313 -0
  186. biotite/sequence/align/primes.txt +821 -0
  187. biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
  188. biotite/sequence/align/selector.pyx +954 -0
  189. biotite/sequence/align/statistics.py +264 -0
  190. biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
  191. biotite/sequence/align/tracetable.pxd +64 -0
  192. biotite/sequence/align/tracetable.pyx +370 -0
  193. biotite/sequence/alphabet.py +555 -0
  194. biotite/sequence/annotation.py +836 -0
  195. biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
  196. biotite/sequence/codec.pyx +155 -0
  197. biotite/sequence/codon.py +476 -0
  198. biotite/sequence/codon_tables.txt +202 -0
  199. biotite/sequence/graphics/__init__.py +33 -0
  200. biotite/sequence/graphics/alignment.py +1101 -0
  201. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  202. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  203. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  204. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  205. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  206. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  207. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  208. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  209. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  210. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  211. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  212. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  213. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  214. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  215. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  216. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  217. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  218. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  219. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  220. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  221. biotite/sequence/graphics/colorschemes.py +170 -0
  222. biotite/sequence/graphics/dendrogram.py +231 -0
  223. biotite/sequence/graphics/features.py +544 -0
  224. biotite/sequence/graphics/logo.py +102 -0
  225. biotite/sequence/graphics/plasmid.py +712 -0
  226. biotite/sequence/io/__init__.py +12 -0
  227. biotite/sequence/io/fasta/__init__.py +22 -0
  228. biotite/sequence/io/fasta/convert.py +283 -0
  229. biotite/sequence/io/fasta/file.py +265 -0
  230. biotite/sequence/io/fastq/__init__.py +19 -0
  231. biotite/sequence/io/fastq/convert.py +117 -0
  232. biotite/sequence/io/fastq/file.py +507 -0
  233. biotite/sequence/io/genbank/__init__.py +17 -0
  234. biotite/sequence/io/genbank/annotation.py +269 -0
  235. biotite/sequence/io/genbank/file.py +573 -0
  236. biotite/sequence/io/genbank/metadata.py +336 -0
  237. biotite/sequence/io/genbank/sequence.py +173 -0
  238. biotite/sequence/io/general.py +201 -0
  239. biotite/sequence/io/gff/__init__.py +26 -0
  240. biotite/sequence/io/gff/convert.py +128 -0
  241. biotite/sequence/io/gff/file.py +449 -0
  242. biotite/sequence/phylo/__init__.py +36 -0
  243. biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
  244. biotite/sequence/phylo/nj.pyx +221 -0
  245. biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
  246. biotite/sequence/phylo/tree.pyx +1169 -0
  247. biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
  248. biotite/sequence/phylo/upgma.pyx +164 -0
  249. biotite/sequence/profile.py +561 -0
  250. biotite/sequence/search.py +117 -0
  251. biotite/sequence/seqtypes.py +720 -0
  252. biotite/sequence/sequence.py +373 -0
  253. biotite/setup_ccd.py +197 -0
  254. biotite/structure/__init__.py +135 -0
  255. biotite/structure/alphabet/__init__.py +25 -0
  256. biotite/structure/alphabet/encoder.py +332 -0
  257. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  258. biotite/structure/alphabet/i3d.py +109 -0
  259. biotite/structure/alphabet/layers.py +86 -0
  260. biotite/structure/alphabet/pb.license +21 -0
  261. biotite/structure/alphabet/pb.py +170 -0
  262. biotite/structure/alphabet/unkerasify.py +128 -0
  263. biotite/structure/atoms.py +1562 -0
  264. biotite/structure/basepairs.py +1403 -0
  265. biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
  266. biotite/structure/bonds.pyx +2036 -0
  267. biotite/structure/box.py +724 -0
  268. biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
  269. biotite/structure/celllist.pyx +864 -0
  270. biotite/structure/chains.py +310 -0
  271. biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
  272. biotite/structure/charges.pyx +520 -0
  273. biotite/structure/compare.py +683 -0
  274. biotite/structure/density.py +109 -0
  275. biotite/structure/dotbracket.py +213 -0
  276. biotite/structure/error.py +39 -0
  277. biotite/structure/filter.py +591 -0
  278. biotite/structure/geometry.py +817 -0
  279. biotite/structure/graphics/__init__.py +13 -0
  280. biotite/structure/graphics/atoms.py +243 -0
  281. biotite/structure/graphics/rna.py +298 -0
  282. biotite/structure/hbond.py +425 -0
  283. biotite/structure/info/__init__.py +24 -0
  284. biotite/structure/info/atom_masses.json +121 -0
  285. biotite/structure/info/atoms.py +98 -0
  286. biotite/structure/info/bonds.py +149 -0
  287. biotite/structure/info/ccd.py +200 -0
  288. biotite/structure/info/components.bcif +0 -0
  289. biotite/structure/info/groups.py +128 -0
  290. biotite/structure/info/masses.py +121 -0
  291. biotite/structure/info/misc.py +137 -0
  292. biotite/structure/info/radii.py +267 -0
  293. biotite/structure/info/standardize.py +185 -0
  294. biotite/structure/integrity.py +213 -0
  295. biotite/structure/io/__init__.py +29 -0
  296. biotite/structure/io/dcd/__init__.py +13 -0
  297. biotite/structure/io/dcd/file.py +67 -0
  298. biotite/structure/io/general.py +243 -0
  299. biotite/structure/io/gro/__init__.py +14 -0
  300. biotite/structure/io/gro/file.py +343 -0
  301. biotite/structure/io/mol/__init__.py +20 -0
  302. biotite/structure/io/mol/convert.py +112 -0
  303. biotite/structure/io/mol/ctab.py +420 -0
  304. biotite/structure/io/mol/header.py +120 -0
  305. biotite/structure/io/mol/mol.py +149 -0
  306. biotite/structure/io/mol/sdf.py +940 -0
  307. biotite/structure/io/netcdf/__init__.py +13 -0
  308. biotite/structure/io/netcdf/file.py +64 -0
  309. biotite/structure/io/pdb/__init__.py +20 -0
  310. biotite/structure/io/pdb/convert.py +389 -0
  311. biotite/structure/io/pdb/file.py +1380 -0
  312. biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
  313. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  314. biotite/structure/io/pdbqt/__init__.py +15 -0
  315. biotite/structure/io/pdbqt/convert.py +113 -0
  316. biotite/structure/io/pdbqt/file.py +688 -0
  317. biotite/structure/io/pdbx/__init__.py +23 -0
  318. biotite/structure/io/pdbx/bcif.py +674 -0
  319. biotite/structure/io/pdbx/cif.py +1091 -0
  320. biotite/structure/io/pdbx/component.py +251 -0
  321. biotite/structure/io/pdbx/compress.py +362 -0
  322. biotite/structure/io/pdbx/convert.py +2113 -0
  323. biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
  324. biotite/structure/io/pdbx/encoding.pyx +1078 -0
  325. biotite/structure/io/trajfile.py +696 -0
  326. biotite/structure/io/trr/__init__.py +13 -0
  327. biotite/structure/io/trr/file.py +43 -0
  328. biotite/structure/io/util.py +38 -0
  329. biotite/structure/io/xtc/__init__.py +13 -0
  330. biotite/structure/io/xtc/file.py +43 -0
  331. biotite/structure/mechanics.py +72 -0
  332. biotite/structure/molecules.py +337 -0
  333. biotite/structure/pseudoknots.py +622 -0
  334. biotite/structure/rdf.py +245 -0
  335. biotite/structure/repair.py +302 -0
  336. biotite/structure/residues.py +716 -0
  337. biotite/structure/rings.py +451 -0
  338. biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
  339. biotite/structure/sasa.pyx +322 -0
  340. biotite/structure/segments.py +328 -0
  341. biotite/structure/sequence.py +110 -0
  342. biotite/structure/spacegroups.json +1567 -0
  343. biotite/structure/spacegroups.license +26 -0
  344. biotite/structure/sse.py +306 -0
  345. biotite/structure/superimpose.py +511 -0
  346. biotite/structure/tm.py +581 -0
  347. biotite/structure/transform.py +736 -0
  348. biotite/structure/util.py +160 -0
  349. biotite/version.py +34 -0
  350. biotite/visualize.py +375 -0
  351. biotite-1.5.0.dist-info/METADATA +162 -0
  352. biotite-1.5.0.dist-info/RECORD +354 -0
  353. biotite-1.5.0.dist-info/WHEEL +6 -0
  354. biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,622 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides functionality for pseudoknot detection.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Tom David Müller"
11
+ __all__ = ["pseudoknots"]
12
+
13
+ from itertools import chain, product
14
+ import networkx as nx
15
+ import numpy as np
16
+
17
+
18
+ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
19
+ """
20
+ Identify the pseudoknot order for each base pair in a given set of
21
+ base pairs.
22
+
23
+ By default the algorithm removes base pairs until the remaining
24
+ base pairs are completely nested i.e. no pseudoknots appear.
25
+ The pseudoknot order of the removed base pairs is incremented and
26
+ the procedure is repeated with these base pairs.
27
+ Base pairs are removed in a way that maximizes the number of
28
+ remaining base pairs.
29
+ However, an optional score for each individual base pair can be
30
+ provided.
31
+
32
+ Parameters
33
+ ----------
34
+ base_pairs : ndarray, dtype=int, shape=(n,2)
35
+ The base pairs to determine the pseudoknot order of. Each row
36
+ represents indices form two paired bases. The structure of
37
+ the :class:`ndarray` is equal to the structure of the output of
38
+ :func:`base_pairs()`, where the indices represent the
39
+ beginning of the residues.
40
+ scores : ndarray, dtype=int, shape=(n,), optional
41
+ The score for each base pair.
42
+ By default, the score of each base pair is ``1``.
43
+ max_pseudoknot_order : int, optional
44
+ The maximum pseudoknot order to be found. If a base pair would
45
+ be of a higher order, its order is specified as ``-1``.
46
+ By default, the algorithm is run until all base pairs
47
+ have an assigned pseudoknot order.
48
+
49
+ Returns
50
+ -------
51
+ pseudoknot_order : ndarray, dtype=int, shape=(m,n)
52
+ The pseudoknot order of the input `base_pairs`.
53
+ Multiple solutions that maximize the number of basepairs or
54
+ the given score, respectively, may be possible.
55
+ Therefore all *m* individual solutions are returned.
56
+
57
+ Notes
58
+ -----
59
+ The dynamic programming approach by Smit *et al*
60
+ :footcite:`Smit2008` is applied to detect pseudoknots.
61
+ The algorithm was originally developed to remove pseudoknots from a
62
+ structure.
63
+ However, if it is run iteratively on removed knotted pairs it can be
64
+ used to identify the pseudoknot order.
65
+
66
+ The pseudoknot order is defined as the minimum number of base pair
67
+ set decompositions resulting in a nested structure
68
+ :footcite:`Antczak2018`.
69
+ Therefore, there are no pseudoknots between base pairs with the same
70
+ pseudoknot order.
71
+
72
+ References
73
+ ----------
74
+
75
+ .. footbibliography::
76
+
77
+ Examples
78
+ --------
79
+ Remove the pseudoknotted base pair for the sequence *ABCbac*, where
80
+ the corresponding big and small letters each represent a base pair:
81
+
82
+ Define the base pairs as :class:`ndarray`:
83
+
84
+ >>> basepairs = np.array([[0, 4],
85
+ ... [1, 3],
86
+ ... [2, 5]])
87
+
88
+ Find the unknotted base pairs, optimizing for the maximum number of
89
+ base pairs:
90
+
91
+ >>> print(pseudoknots(basepairs, max_pseudoknot_order=0))
92
+ [[ 0 0 -1]]
93
+
94
+ This indicates that the base pair *Cc* is a pseudoknot.
95
+
96
+ Given the length of the sequence (6 bases), we can also represent
97
+ the unknotted structure in dot bracket notation:
98
+
99
+ >>> print(dot_bracket(basepairs, 6, max_pseudoknot_order=0)[0])
100
+ ((.)).
101
+
102
+ If the maximum pseudoknot order is not restricted, the order of the
103
+ knotted pairs is determined and can be represented using dot bracket
104
+ letter notation:
105
+
106
+ >>> print(pseudoknots(basepairs))
107
+ [[0 0 1]]
108
+ >>> print(dot_bracket(basepairs, 6)[0])
109
+ (([))]
110
+ """
111
+ if len(base_pairs) == 0:
112
+ # No base pairs -> empty pseudoknot order array
113
+ return np.array([[]], dtype=np.int32)
114
+
115
+ # List containing the results
116
+ results = [np.full(len(base_pairs), -1, dtype="int32")]
117
+
118
+ # if no score array is given, each base pairs' score is one
119
+ if scores is None:
120
+ scores = np.ones(len(base_pairs))
121
+
122
+ # Make sure `base_pairs` has the same length as the score array
123
+ if len(base_pairs) != len(scores):
124
+ raise ValueError("'base_pair' and 'scores' must have the same shape")
125
+
126
+ # Split the base pairs in regions
127
+ regions = _find_regions(base_pairs, scores)
128
+
129
+ # Compute results
130
+ results = _get_results(regions, results, max_pseudoknot_order)
131
+
132
+ return np.vstack(results)
133
+
134
+
135
+ class _Region:
136
+ """
137
+ This class represents a paired region.
138
+
139
+ A region is a set of base pairs. This class provides methods to
140
+ access the minimum and maximum index of the bases that are part of
141
+ the region, handles score calculation, and backtracing to the
142
+ original base pair array.
143
+
144
+ Parameters
145
+ ----------
146
+ base_pairs : ndarray, shape=(n,2), dtype=int
147
+ All base pairs of the structure the region is a subset for.
148
+ region_pairs : ndarray, dtype=int
149
+ The indices of the base pairs in ``base_pairs`` that are part of
150
+ the region.
151
+ scores : ndarray, dtype=int, shape=(n,)
152
+ The score for each base pair.
153
+ """
154
+
155
+ def __init__(self, base_pairs, region_pairs, scores):
156
+ # The Start and Stop indices for each Region
157
+ self.start = np.min(base_pairs[region_pairs])
158
+ self.stop = np.max(base_pairs[region_pairs])
159
+
160
+ self.region_pairs = region_pairs
161
+ self.score = np.sum(scores[region_pairs])
162
+
163
+ def get_index_array(self):
164
+ """
165
+ Return an index array with the positions of the region`s bases
166
+ in the original base pair array.
167
+
168
+ Returns
169
+ -------
170
+ region_pairs : ndarray
171
+ The indices of the bases in the original base pair array.
172
+ """
173
+ return self.region_pairs
174
+
175
+ def __lt__(self, other):
176
+ """
177
+ This comparison operator is required for :func:`np.unique()`. As
178
+ only the difference between the regions is relevant and not any
179
+ particular order, a distinction is made by the objects unique
180
+ ids.
181
+
182
+ Parameters
183
+ ----------
184
+ other : _region
185
+ The other region.
186
+
187
+ Returns
188
+ -------
189
+ comparision : bool
190
+ The evaluated comparison.
191
+ """
192
+ return id(self) < id(other)
193
+
194
+
195
+ def _find_regions(base_pairs, scores):
196
+ """
197
+ Find regions in a base pair array. A region is defined as a set of
198
+ consecutively nested base pairs.
199
+
200
+ Parameters
201
+ ----------
202
+ base_pairs : ndarray, dtype=int, shape=(n, 2)
203
+ Each row is equivalent to one base pair and contains the first
204
+ indices of the residues corresponding to each base.
205
+ scores : ndarray, dtype=int, shape=(n,)
206
+ The score for each base pair.
207
+
208
+ Returns
209
+ -------
210
+ regions : Graph
211
+ The ``_Region`` objects as graph, where the edges represent
212
+ conflicts.
213
+ """
214
+ # Make sure the lower residue is on the left for each row
215
+ sorted_base_pairs = np.sort(base_pairs, axis=1)
216
+
217
+ # Sort the first column in ascending order
218
+ original_indices = np.argsort(sorted_base_pairs[:, 0])
219
+ sorted_base_pairs = sorted_base_pairs[original_indices]
220
+
221
+ # Rank each base
222
+ # E.g.: [[3, 5] --> [[0, 1]
223
+ # [9, 7]] [3, 2]]
224
+ order = np.argsort(sorted_base_pairs.flatten())
225
+ rank = np.argsort(order).reshape(base_pairs.shape)
226
+
227
+ # The base pairs belonging to the current region
228
+ region_pairs = []
229
+ # The individual regions
230
+ regions = set()
231
+
232
+ # Find separate regions
233
+ for i in range(len(sorted_base_pairs)):
234
+ # if a new region is to be started append the current base pair
235
+ if len(region_pairs) == 0:
236
+ region_pairs.append(original_indices[i])
237
+ continue
238
+
239
+ # Check if the current base pair belongs to the region that is
240
+ # currently being defined
241
+ previous_upstream_rank = rank[i - 1, 0]
242
+ this_upstream_rank = rank[i, 0]
243
+ previous_downstream_rank = rank[i - 1, 1]
244
+ this_downstream_rank = rank[i, 1]
245
+
246
+ # if the current base pair belongs to a new region, save the
247
+ # current region and start a new region
248
+ if (previous_downstream_rank - this_downstream_rank) != 1 or (
249
+ this_upstream_rank - previous_upstream_rank
250
+ ) != 1:
251
+ regions.add(_Region(base_pairs, np.array(region_pairs), scores))
252
+ region_pairs = []
253
+
254
+ # Append the current base pair to the region
255
+ region_pairs.append(original_indices[i])
256
+
257
+ # The last region has no endpoint defined by the beginning of a
258
+ # new region.
259
+ regions.add(_Region(base_pairs, np.array(region_pairs), scores))
260
+
261
+ # Return the graphical representation of the conflicting regions
262
+ return _generate_graphical_representation(regions)
263
+
264
+
265
+ def _generate_graphical_representation(regions):
266
+ """
267
+ Find the conflicting regions and represent them graphically using
268
+ the ``Graph`` class from ``Networkx``.
269
+
270
+ Parameters
271
+ ----------
272
+ regions : set {_region, ...}
273
+ The regions representing the consecutively nested base pairs.
274
+
275
+ Returns
276
+ -------
277
+ regions : Graph
278
+ The ``_Region`` objects as graph, where the edges represent
279
+ conflicts.
280
+ """
281
+
282
+ # Create a graph
283
+ region_graph = nx.Graph()
284
+
285
+ # Add the regions to the graph as nodes
286
+ region_graph.add_nodes_from(regions)
287
+
288
+ # Get the region array and a boolean array, where the start of each
289
+ # region is ``True``.
290
+ region_array, (start_stops,) = _get_region_array_for(
291
+ regions, content=[lambda a: [True, False]], dtype=["bool"]
292
+ )
293
+
294
+ # Check each region for conflicts with other regions
295
+ for start, region in enumerate(region_array):
296
+ # Check each region only once
297
+ if not start_stops[start]:
298
+ continue
299
+
300
+ # Find the index of the stopping of the region in the region
301
+ # array
302
+ stop = _get_first_occurrence_for(region_array[start + 1 :], region)
303
+ stop += start + 1
304
+
305
+ # Store regions the current region conflicts with
306
+ conflicts = set()
307
+
308
+ # Iterate over the regions between the starting and stopping
309
+ # point of the current region
310
+ for other_region in region_array[start + 1 : stop]:
311
+ # If the other region is not already a conflict, add it to
312
+ # the conflict set
313
+ if other_region not in conflicts:
314
+ conflicts.add(other_region)
315
+ # If the other region is twice between the starting and
316
+ # stopping point of the current region, its starting and
317
+ # stopping point lie between the current region and it is
318
+ # thus non-conflicting
319
+ else:
320
+ conflicts.remove(other_region)
321
+
322
+ # Conflicts between regions are represented as graph edges
323
+ edges = []
324
+
325
+ # Convert the edges in a ``NetworkX`` compatible format
326
+ for conflict in conflicts:
327
+ edges.append((region, conflict))
328
+
329
+ # Add the edges to the graph
330
+ region_graph.add_edges_from(edges)
331
+ return region_graph
332
+
333
+
334
+ def _get_first_occurrence_for(iterable, wanted_object):
335
+ """
336
+ Get the first occurrence of an object in an iterable.
337
+
338
+ Parameters
339
+ ----------
340
+ iterable : iterable
341
+ The iterable containing the object.
342
+ wanted_object : object
343
+ The object to be found.
344
+
345
+ Returns
346
+ -------
347
+ index : int
348
+ The index of the first occurrence of the object.
349
+ """
350
+ for i, value in enumerate(iterable):
351
+ if value is wanted_object:
352
+ return i
353
+
354
+
355
+ def _get_region_array_for(regions, content=(), dtype=()):
356
+ """
357
+ Get a :class:`ndarray` of region objects. Each object occurs twice,
358
+ representing its start and end point. The regions positions in the
359
+ array reflect their relative positions.
360
+
361
+ Furthermore, a list of functions can be provided enabling custom
362
+ outputs for each objects` start and end point.
363
+
364
+ Parameters
365
+ ----------
366
+ regions : set {_region, ...}
367
+ The regions to be considered
368
+ content : list [function, ...]
369
+ The functions to be considered for custom outputs. For a given
370
+ region they must return a tuple of which the first value is
371
+ placed at the start position and the second value at the end
372
+ position of the region relative to the other regions.
373
+ dtype : list [str, ...]
374
+ The data type of the output of the custom functions.
375
+
376
+ Returns
377
+ -------
378
+ region_array : ndarray, dtype=object
379
+ The array of ordered region objects.
380
+ custom_content : list [ndarray, ...]
381
+ The custom output.
382
+ """
383
+ # region_array and index array
384
+ region_array = np.empty(len(regions) * 2, dtype=_Region)
385
+ index_array = np.empty(len(regions) * 2, dtype="int32")
386
+
387
+ # Content array for custom return arrays
388
+ content_list = [None] * len(content)
389
+ for i in range(len(content)):
390
+ content_list[i] = np.empty(len(regions) * 2, dtype=dtype[i])
391
+
392
+ # Fill the arrays
393
+ for i, reg in enumerate(regions):
394
+ indices = [2 * i, 2 * i + 1]
395
+ region_array[indices] = reg
396
+ for c in range(len(content_list)):
397
+ content_list[c][indices] = content[c](reg)
398
+ index_array[indices] = [reg.start, reg.stop]
399
+
400
+ # Order the arrays by the base indices
401
+ sort_mask = np.argsort(index_array)
402
+ region_array = region_array[sort_mask]
403
+
404
+ # if no custom array content is given only return the ordered array
405
+ # containing the regions
406
+ if content == []:
407
+ return region_array
408
+
409
+ # if custom content is given also return the ordered content
410
+ for i in range(len(content_list)):
411
+ content_list[i] = content_list[i][sort_mask]
412
+ return region_array, content_list
413
+
414
+
415
+ def _remove_pseudoknots(regions):
416
+ """
417
+ Get the optimal solutions according to the algorithm referenced in
418
+ :func:`pseudoknots()`.
419
+
420
+ The algorithm uses a dynamic programming matrix in order to find
421
+ the optimal solutions with the highest combined region scores.
422
+
423
+ Parameters
424
+ ----------
425
+ regions : set {_region, ...}
426
+ The conflicting regions for whích optimal solutions are to be
427
+ found.
428
+ scores : ndarray
429
+ The score array.
430
+
431
+ Returns
432
+ -------
433
+ solutions : ndarray, dtype=object
434
+ The optimal solutions. Each solution in the ``ndarray`` is
435
+ represented as ``set`` of unknotted regions.
436
+ """
437
+ # Create dynamic programming matrix
438
+ dp_matrix_shape = len(regions) * 2, len(regions) * 2
439
+ dp_matrix = np.empty(dp_matrix_shape, dtype="object")
440
+ dp_matrix_solutions_starts = np.zeros_like(dp_matrix)
441
+ dp_matrix_solutions_stops = np.zeros_like(dp_matrix)
442
+
443
+ # Each index corresponds to the position in the dp matrix.
444
+ # ``region_array`` contains the region objects and ``start_stops``
445
+ # contains the lowest and highest positions of the regions
446
+ region_array, (start_stops,) = _get_region_array_for(
447
+ regions, [lambda a: (a.start, a.stop)], ["int32"]
448
+ )
449
+ # Initialise the matrix diagonal with ndarrays of empty frozensets
450
+ for i in range(len(dp_matrix)):
451
+ dp_matrix[i, i] = np.array([frozenset()])
452
+
453
+ # Iterate through the top right half of the dynamic programming
454
+ # matrix
455
+ for j in range(len(regions) * 2):
456
+ for i in range(j - 1, -1, -1):
457
+ solution_candidates = set()
458
+ left = dp_matrix[i, j - 1]
459
+ bottom = dp_matrix[i + 1, j]
460
+
461
+ # Add all solutions of the cell to the left
462
+ for solution in left:
463
+ solution_candidates.add(solution)
464
+
465
+ # Add all solutions of the cell to the bottom
466
+ for solution in bottom:
467
+ solution_candidates.add(solution)
468
+
469
+ # Check if i and j are start/end-points of the same region
470
+ if region_array[i] is region_array[j]:
471
+ # Add all solutions from the cell to the bottom left
472
+ # plus this region
473
+ bottom_left = dp_matrix[i + 1, j - 1]
474
+ for solution in bottom_left:
475
+ solution_candidates.add(solution | set([region_array[i]]))
476
+
477
+ # Perform additional tests if solution in the left cell and
478
+ # bottom cell both differ from an empty solution
479
+ if np.any(left != [frozenset()]) and np.any(bottom != [frozenset()]):
480
+ left_highest = dp_matrix_solutions_stops[i, j - 1]
481
+ bottom_lowest = dp_matrix_solutions_starts[i + 1, j]
482
+
483
+ # For each pair of solutions check if solutions are
484
+ # disjoint
485
+ for solution1, highest in zip(left, left_highest):
486
+ for solution2, lowest in zip(bottom, bottom_lowest):
487
+ if highest < lowest:
488
+ # Both solutions are disjoint
489
+ solution_candidates.add(solution1 | solution2)
490
+ else:
491
+ # Both solutions are not disjoint
492
+ # Add subsolutions
493
+ for k in range(
494
+ np.where(start_stops == lowest)[0][0] - 1,
495
+ np.where(start_stops == highest)[0][0] + 1,
496
+ ):
497
+ cell1 = dp_matrix[i, k]
498
+ cell2 = dp_matrix[k + 1, j]
499
+ for subsolution1 in cell1:
500
+ for subsolution2 in cell2:
501
+ solution_candidates.add(
502
+ subsolution1 | subsolution2
503
+ )
504
+
505
+ # Make solution candidates ``ndarray`` array of sets
506
+ solution_candidates = np.array(list(solution_candidates))
507
+
508
+ # Calculate the scores for each solution
509
+ solution_scores = np.zeros(len(solution_candidates))
510
+ for s, solution in enumerate(solution_candidates):
511
+ score = 0
512
+ for reg in solution:
513
+ score += reg.score
514
+ solution_scores[s] = score
515
+ # Get the indices where the score is at a maximum
516
+ highest_scores = np.argwhere(
517
+ solution_scores == np.amax(solution_scores)
518
+ ).flatten()
519
+
520
+ # Get the solutions with the highest score
521
+ solution_candidates = solution_candidates[highest_scores]
522
+
523
+ # Add the solutions to the dynamic programming matrix
524
+ dp_matrix[i, j] = solution_candidates
525
+
526
+ solution_starts = np.zeros_like(solution_candidates, dtype="int32")
527
+ solution_stops = np.zeros_like(solution_candidates, dtype="int32")
528
+
529
+ for s, solution in enumerate(solution_candidates):
530
+ solution_starts[s] = min([reg.start for reg in solution], default=-1)
531
+ solution_stops[s] = max([reg.stop for reg in solution], default=-1)
532
+
533
+ dp_matrix_solutions_starts[i, j] = solution_starts
534
+ dp_matrix_solutions_stops[i, j] = solution_stops
535
+
536
+ # The top right corner contains the optimal solutions
537
+ return dp_matrix[0, -1]
538
+
539
+
540
+ def _get_results(regions, results, max_pseudoknot_order, order=0):
541
+ """
542
+ Use the dynamic programming algorithm to get the pseudoknot order
543
+ of a given set of regions. If there are remaining conflicts their
544
+ results are recursively calculated and merged with the current
545
+ results.
546
+
547
+ Parameters
548
+ ----------
549
+ regions : set {_region, ...}
550
+ The regions for whích optimal solutions are to be found.
551
+ results : list [ndarray, ...]
552
+ The results
553
+ max_pseudoknot_order : int
554
+ The maximum pseudoknot order to be found. If a base pair would
555
+ be of a higher order, its order is specified as -1. If ``None``
556
+ is given, all base pairs are evaluated.
557
+ order : int
558
+ The order that is currently evaluated.
559
+
560
+ Returns
561
+ -------
562
+ results : list [ndarray, ...]
563
+ The results
564
+ """
565
+
566
+ # Remove non-conflicting regions
567
+ non_conflicting = [isolate for isolate in nx.isolates(regions)]
568
+ regions.remove_nodes_from(non_conflicting)
569
+
570
+ # Non-conflicting regions are of the current order:
571
+ index_list_non_conflicting = list(
572
+ chain(*[region.get_index_array() for region in non_conflicting])
573
+ )
574
+ for result in results:
575
+ result[index_list_non_conflicting] = order
576
+
577
+ # If no conflicts remain, the results are complete
578
+ if len(regions) == 0:
579
+ return results
580
+
581
+ # Get the optimal solutions for given regions. Evaluate each clique
582
+ # of mutually conflicting regions seperately
583
+ cliques = [component for component in nx.connected_components(regions)]
584
+ solutions = [
585
+ set(chain(*e))
586
+ for e in product(*[_remove_pseudoknots(clique) for clique in cliques])
587
+ ]
588
+
589
+ # Get a copy of the current results for each optimal solution
590
+ results_list = [
591
+ [result.copy() for result in results] for _ in range(len(solutions))
592
+ ]
593
+
594
+ # Evaluate each optimal solution
595
+ for i, solution in enumerate(solutions):
596
+ # Get the pseudoknotted regions
597
+ pseudoknotted_regions = regions.copy()
598
+ pseudoknotted_regions.remove_nodes_from(solution)
599
+
600
+ # Get an index list of the unknotted base pairs
601
+ index_list_unknotted = list(
602
+ chain(*[region.get_index_array() for region in solution])
603
+ )
604
+
605
+ # Write results for current solution
606
+ for j, result in enumerate(results_list[i]):
607
+ result[index_list_unknotted] = order
608
+
609
+ # If this order is the specified maximum order, stop evaluation
610
+ if max_pseudoknot_order == order:
611
+ continue
612
+
613
+ # Evaluate the pseudoknotted region
614
+ results_list[i] = _get_results(
615
+ pseudoknotted_regions,
616
+ results_list[i],
617
+ max_pseudoknot_order,
618
+ order=order + 1,
619
+ )
620
+
621
+ # Flatten the results
622
+ return list(chain(*results_list))