biotite 0.41.1__cp312-cp312-macosx_10_16_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show
  1. biotite/__init__.py +19 -0
  2. biotite/application/__init__.py +43 -0
  3. biotite/application/application.py +265 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +505 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +83 -0
  8. biotite/application/blast/webapp.py +421 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +238 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +152 -0
  13. biotite/application/localapp.py +306 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +122 -0
  16. biotite/application/msaapp.py +374 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +254 -0
  19. biotite/application/muscle/app5.py +171 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +456 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +222 -0
  24. biotite/application/util.py +59 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +304 -0
  27. biotite/application/viennarna/rnafold.py +269 -0
  28. biotite/application/viennarna/rnaplot.py +187 -0
  29. biotite/application/viennarna/util.py +72 -0
  30. biotite/application/webapp.py +77 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +61 -0
  35. biotite/database/entrez/dbnames.py +89 -0
  36. biotite/database/entrez/download.py +223 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +223 -0
  39. biotite/database/error.py +15 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +260 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +827 -0
  44. biotite/database/pubchem/throttle.py +99 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +167 -0
  47. biotite/database/rcsb/query.py +959 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +32 -0
  50. biotite/database/uniprot/download.py +134 -0
  51. biotite/database/uniprot/query.py +209 -0
  52. biotite/file.py +251 -0
  53. biotite/sequence/__init__.py +73 -0
  54. biotite/sequence/align/__init__.py +49 -0
  55. biotite/sequence/align/alignment.py +658 -0
  56. biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +69 -0
  59. biotite/sequence/align/cigar.py +434 -0
  60. biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +574 -0
  62. biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3400 -0
  66. biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +405 -0
  71. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  72. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  81. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  87. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  93. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  99. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  100. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  101. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  102. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  103. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  104. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  105. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  154. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  155. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  156. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  160. biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
  161. biotite/sequence/align/multiple.pyx +620 -0
  162. biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
  163. biotite/sequence/align/pairwise.pyx +587 -0
  164. biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
  165. biotite/sequence/align/permutation.pyx +305 -0
  166. biotite/sequence/align/primes.txt +821 -0
  167. biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
  168. biotite/sequence/align/selector.pyx +956 -0
  169. biotite/sequence/align/statistics.py +265 -0
  170. biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
  171. biotite/sequence/align/tracetable.pxd +64 -0
  172. biotite/sequence/align/tracetable.pyx +370 -0
  173. biotite/sequence/alphabet.py +566 -0
  174. biotite/sequence/annotation.py +829 -0
  175. biotite/sequence/codec.cpython-312-darwin.so +0 -0
  176. biotite/sequence/codec.pyx +155 -0
  177. biotite/sequence/codon.py +466 -0
  178. biotite/sequence/codon_tables.txt +202 -0
  179. biotite/sequence/graphics/__init__.py +33 -0
  180. biotite/sequence/graphics/alignment.py +1034 -0
  181. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  182. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  183. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  184. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  185. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  186. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  187. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  188. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  189. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  190. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  192. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  193. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  194. biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
  195. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  196. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  197. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  198. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  199. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  200. biotite/sequence/graphics/colorschemes.py +139 -0
  201. biotite/sequence/graphics/dendrogram.py +184 -0
  202. biotite/sequence/graphics/features.py +510 -0
  203. biotite/sequence/graphics/logo.py +110 -0
  204. biotite/sequence/graphics/plasmid.py +661 -0
  205. biotite/sequence/io/__init__.py +12 -0
  206. biotite/sequence/io/fasta/__init__.py +22 -0
  207. biotite/sequence/io/fasta/convert.py +273 -0
  208. biotite/sequence/io/fasta/file.py +278 -0
  209. biotite/sequence/io/fastq/__init__.py +19 -0
  210. biotite/sequence/io/fastq/convert.py +120 -0
  211. biotite/sequence/io/fastq/file.py +551 -0
  212. biotite/sequence/io/genbank/__init__.py +17 -0
  213. biotite/sequence/io/genbank/annotation.py +277 -0
  214. biotite/sequence/io/genbank/file.py +575 -0
  215. biotite/sequence/io/genbank/metadata.py +324 -0
  216. biotite/sequence/io/genbank/sequence.py +172 -0
  217. biotite/sequence/io/general.py +192 -0
  218. biotite/sequence/io/gff/__init__.py +26 -0
  219. biotite/sequence/io/gff/convert.py +133 -0
  220. biotite/sequence/io/gff/file.py +434 -0
  221. biotite/sequence/phylo/__init__.py +36 -0
  222. biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
  223. biotite/sequence/phylo/nj.pyx +221 -0
  224. biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
  225. biotite/sequence/phylo/tree.pyx +1169 -0
  226. biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
  227. biotite/sequence/phylo/upgma.pyx +164 -0
  228. biotite/sequence/profile.py +456 -0
  229. biotite/sequence/search.py +116 -0
  230. biotite/sequence/seqtypes.py +556 -0
  231. biotite/sequence/sequence.py +374 -0
  232. biotite/structure/__init__.py +132 -0
  233. biotite/structure/atoms.py +1455 -0
  234. biotite/structure/basepairs.py +1415 -0
  235. biotite/structure/bonds.cpython-312-darwin.so +0 -0
  236. biotite/structure/bonds.pyx +1933 -0
  237. biotite/structure/box.py +592 -0
  238. biotite/structure/celllist.cpython-312-darwin.so +0 -0
  239. biotite/structure/celllist.pyx +849 -0
  240. biotite/structure/chains.py +298 -0
  241. biotite/structure/charges.cpython-312-darwin.so +0 -0
  242. biotite/structure/charges.pyx +520 -0
  243. biotite/structure/compare.py +274 -0
  244. biotite/structure/density.py +114 -0
  245. biotite/structure/dotbracket.py +216 -0
  246. biotite/structure/error.py +31 -0
  247. biotite/structure/filter.py +585 -0
  248. biotite/structure/geometry.py +697 -0
  249. biotite/structure/graphics/__init__.py +13 -0
  250. biotite/structure/graphics/atoms.py +226 -0
  251. biotite/structure/graphics/rna.py +282 -0
  252. biotite/structure/hbond.py +409 -0
  253. biotite/structure/info/__init__.py +25 -0
  254. biotite/structure/info/atom_masses.json +121 -0
  255. biotite/structure/info/atoms.py +82 -0
  256. biotite/structure/info/bonds.py +145 -0
  257. biotite/structure/info/ccd/README.rst +8 -0
  258. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  259. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  260. biotite/structure/info/ccd/components.bcif +0 -0
  261. biotite/structure/info/ccd/nucleotides.txt +798 -0
  262. biotite/structure/info/ccd.py +95 -0
  263. biotite/structure/info/groups.py +90 -0
  264. biotite/structure/info/masses.py +123 -0
  265. biotite/structure/info/misc.py +144 -0
  266. biotite/structure/info/radii.py +197 -0
  267. biotite/structure/info/standardize.py +196 -0
  268. biotite/structure/integrity.py +268 -0
  269. biotite/structure/io/__init__.py +30 -0
  270. biotite/structure/io/ctab.py +72 -0
  271. biotite/structure/io/dcd/__init__.py +13 -0
  272. biotite/structure/io/dcd/file.py +65 -0
  273. biotite/structure/io/general.py +257 -0
  274. biotite/structure/io/gro/__init__.py +14 -0
  275. biotite/structure/io/gro/file.py +343 -0
  276. biotite/structure/io/mmtf/__init__.py +21 -0
  277. biotite/structure/io/mmtf/assembly.py +214 -0
  278. biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
  279. biotite/structure/io/mmtf/convertarray.pyx +341 -0
  280. biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
  281. biotite/structure/io/mmtf/convertfile.pyx +501 -0
  282. biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
  283. biotite/structure/io/mmtf/decode.pyx +152 -0
  284. biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
  285. biotite/structure/io/mmtf/encode.pyx +183 -0
  286. biotite/structure/io/mmtf/file.py +233 -0
  287. biotite/structure/io/mol/__init__.py +20 -0
  288. biotite/structure/io/mol/convert.py +115 -0
  289. biotite/structure/io/mol/ctab.py +414 -0
  290. biotite/structure/io/mol/header.py +116 -0
  291. biotite/structure/io/mol/mol.py +193 -0
  292. biotite/structure/io/mol/sdf.py +916 -0
  293. biotite/structure/io/netcdf/__init__.py +13 -0
  294. biotite/structure/io/netcdf/file.py +63 -0
  295. biotite/structure/io/npz/__init__.py +20 -0
  296. biotite/structure/io/npz/file.py +152 -0
  297. biotite/structure/io/pdb/__init__.py +20 -0
  298. biotite/structure/io/pdb/convert.py +293 -0
  299. biotite/structure/io/pdb/file.py +1240 -0
  300. biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
  301. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  302. biotite/structure/io/pdbqt/__init__.py +15 -0
  303. biotite/structure/io/pdbqt/convert.py +107 -0
  304. biotite/structure/io/pdbqt/file.py +640 -0
  305. biotite/structure/io/pdbx/__init__.py +23 -0
  306. biotite/structure/io/pdbx/bcif.py +648 -0
  307. biotite/structure/io/pdbx/cif.py +1032 -0
  308. biotite/structure/io/pdbx/component.py +246 -0
  309. biotite/structure/io/pdbx/convert.py +1597 -0
  310. biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
  311. biotite/structure/io/pdbx/encoding.pyx +950 -0
  312. biotite/structure/io/pdbx/legacy.py +267 -0
  313. biotite/structure/io/tng/__init__.py +13 -0
  314. biotite/structure/io/tng/file.py +46 -0
  315. biotite/structure/io/trajfile.py +710 -0
  316. biotite/structure/io/trr/__init__.py +13 -0
  317. biotite/structure/io/trr/file.py +46 -0
  318. biotite/structure/io/xtc/__init__.py +13 -0
  319. biotite/structure/io/xtc/file.py +46 -0
  320. biotite/structure/mechanics.py +75 -0
  321. biotite/structure/molecules.py +353 -0
  322. biotite/structure/pseudoknots.py +642 -0
  323. biotite/structure/rdf.py +243 -0
  324. biotite/structure/repair.py +253 -0
  325. biotite/structure/residues.py +562 -0
  326. biotite/structure/resutil.py +178 -0
  327. biotite/structure/sasa.cpython-312-darwin.so +0 -0
  328. biotite/structure/sasa.pyx +322 -0
  329. biotite/structure/sequence.py +112 -0
  330. biotite/structure/sse.py +327 -0
  331. biotite/structure/superimpose.py +727 -0
  332. biotite/structure/transform.py +504 -0
  333. biotite/structure/util.py +98 -0
  334. biotite/temp.py +86 -0
  335. biotite/version.py +16 -0
  336. biotite/visualize.py +251 -0
  337. biotite-0.41.1.dist-info/METADATA +187 -0
  338. biotite-0.41.1.dist-info/RECORD +340 -0
  339. biotite-0.41.1.dist-info/WHEEL +4 -0
  340. biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,642 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides functionality for pseudoknot detection.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Tom David Müller"
11
+ __all__ = ["pseudoknots"]
12
+
13
+ import numpy as np
14
+ import networkx as nx
15
+ from itertools import chain, product
16
+
17
+ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
18
+ """
19
+ Identify the pseudoknot order for each base pair in a given set of
20
+ base pairs.
21
+
22
+ By default the algorithm removes base pairs until the remaining
23
+ base pairs are completely nested i.e. no pseudoknots appear.
24
+ The pseudoknot order of the removed base pairs is incremented and
25
+ the procedure is repeated with these base pairs.
26
+ Base pairs are removed in a way that maximizes the number of
27
+ remaining base pairs.
28
+ However, an optional score for each individual base pair can be
29
+ provided.
30
+
31
+ Parameters
32
+ ----------
33
+ base_pairs : ndarray, dtype=int, shape=(n,2)
34
+ The base pairs to determine the pseudoknot order of. Each row
35
+ represents indices form two paired bases. The structure of
36
+ the :class:`ndarray` is equal to the structure of the output of
37
+ :func:`base_pairs()`, where the indices represent the
38
+ beginning of the residues.
39
+ scores : ndarray, dtype=int, shape=(n,), optional
40
+ The score for each base pair.
41
+ By default, the score of each base pair is ``1``.
42
+ max_pseudoknot_order : int, optional
43
+ The maximum pseudoknot order to be found. If a base pair would
44
+ be of a higher order, its order is specified as ``-1``.
45
+ By default, the algorithm is run until all base pairs
46
+ have an assigned pseudoknot order.
47
+
48
+ Returns
49
+ -------
50
+ pseudoknot_order : ndarray, dtype=int, shape=(m,n)
51
+ The pseudoknot order of the input `base_pairs`.
52
+ Multiple solutions that maximize the number of basepairs or
53
+ the given score, respectively, may be possible.
54
+ Therefore all *m* individual solutions are returned.
55
+
56
+ Notes
57
+ -----
58
+ The dynamic programming approach by Smit *et al*
59
+ :footcite:`Smit2008` is applied to detect pseudoknots.
60
+ The algorithm was originally developed to remove pseudoknots from a
61
+ structure.
62
+ However, if it is run iteratively on removed knotted pairs it can be
63
+ used to identify the pseudoknot order.
64
+
65
+ The pseudoknot order is defined as the minimum number of base pair
66
+ set decompositions resulting in a nested structure
67
+ :footcite:`Antczak2018`.
68
+ Therefore, there are no pseudoknots between base pairs with the same
69
+ pseudoknot order.
70
+
71
+ Examples
72
+ --------
73
+ Remove the pseudoknotted base pair for the sequence *ABCbac*, where
74
+ the corresponding big and small letters each represent a base pair:
75
+
76
+ Define the base pairs as :class:`ndarray`:
77
+
78
+ >>> basepairs = np.array([[0, 4],
79
+ ... [1, 3],
80
+ ... [2, 5]])
81
+
82
+ Find the unknotted base pairs, optimizing for the maximum number of
83
+ base pairs:
84
+
85
+ >>> print(pseudoknots(basepairs, max_pseudoknot_order=0))
86
+ [[ 0 0 -1]]
87
+
88
+ This indicates that the base pair *Cc* is a pseudoknot.
89
+
90
+ Given the length of the sequence (6 bases), we can also represent
91
+ the unknotted structure in dot bracket notation:
92
+
93
+ >>> print(dot_bracket(basepairs, 6, max_pseudoknot_order=0)[0])
94
+ ((.)).
95
+
96
+ If the maximum pseudoknot order is not restricted, the order of the
97
+ knotted pairs is determined and can be represented using dot bracket
98
+ letter notation:
99
+
100
+ >>> print(pseudoknots(basepairs))
101
+ [[0 0 1]]
102
+ >>> print(dot_bracket(basepairs, 6)[0])
103
+ (([))]
104
+
105
+ See Also
106
+ --------
107
+ base_pairs
108
+ dot_bracket
109
+
110
+ References
111
+ ----------
112
+
113
+ .. footbibliography::
114
+
115
+ """
116
+ if len(base_pairs) == 0:
117
+ # No base pairs -> empty pseudoknot order array
118
+ return np.array([[]], dtype=np.int32)
119
+
120
+ # List containing the results
121
+ results = [np.full(len(base_pairs), -1, dtype='int32')]
122
+
123
+ # if no score array is given, each base pairs' score is one
124
+ if scores is None:
125
+ scores = np.ones(len(base_pairs))
126
+
127
+ # Make sure `base_pairs` has the same length as the score array
128
+ if len(base_pairs) != len(scores):
129
+ raise ValueError(
130
+ "'base_pair' and 'scores' must have the same shape"
131
+ )
132
+
133
+ # Split the base pairs in regions
134
+ regions = _find_regions(base_pairs, scores)
135
+
136
+ # Compute results
137
+ results = _get_results(regions, results, max_pseudoknot_order)
138
+
139
+ return np.vstack(results)
140
+
141
+
142
+ class _Region():
143
+ """
144
+ This class represents a paired region.
145
+
146
+ A region is a set of base pairs. This class provides methods to
147
+ access the minimum and maximum index of the bases that are part of
148
+ the region, handles score calculation, and backtracing to the
149
+ original base pair array.
150
+
151
+ Parameters
152
+ ----------
153
+ base_pairs: ndarray, shape=(n,2), dtype=int
154
+ All base pairs of the structure the region is a subset for.
155
+ region_pairs: ndarray, dtype=int
156
+ The indices of the base pairs in ``base_pairs`` that are part of
157
+ the region.
158
+ scores : ndarray, dtype=int, shape=(n,) (default: None)
159
+ The score for each base pair.
160
+ """
161
+
162
+ def __init__ (self, base_pairs, region_pairs, scores):
163
+ # The Start and Stop indices for each Region
164
+ self.start = np.min(base_pairs[region_pairs])
165
+ self.stop = np.max(base_pairs[region_pairs])
166
+
167
+ self.region_pairs = region_pairs
168
+ self.score = np.sum(scores[region_pairs])
169
+
170
+ def get_index_array(self):
171
+ """
172
+ Return an index array with the positions of the region`s bases
173
+ in the original base pair array.
174
+
175
+ Returns
176
+ -------
177
+ region_pairs : ndarray
178
+ The indices of the bases in the original base pair array.
179
+ """
180
+ return self.region_pairs
181
+
182
+ def __lt__(self, other):
183
+ """
184
+ This comparison operator is required for :func:`np.unique()`. As
185
+ only the difference between the regions is relevant and not any
186
+ particular order, a distinction is made by the objects unique
187
+ ids.
188
+
189
+ Parameters
190
+ ----------
191
+ other : _region
192
+ The other region.
193
+
194
+ Returns
195
+ -------
196
+ comparision : bool
197
+ The evaluated comparison.
198
+ """
199
+ return id(self) < id(other)
200
+
201
+
202
+ def _find_regions(base_pairs, scores):
203
+ """
204
+ Find regions in a base pair array. A region is defined as a set of
205
+ consecutively nested base pairs.
206
+
207
+ Parameters
208
+ ----------
209
+ base_pairs : ndarray, dtype=int, shape=(n, 2)
210
+ Each row is equivalent to one base pair and contains the first
211
+ indices of the residues corresponding to each base.
212
+ scores : ndarray, dtype=int, shape=(n,) (default: None)
213
+ The score for each base pair.
214
+
215
+ Returns
216
+ -------
217
+ regions : Graph
218
+ The ``_Region`` objects as graph, where the edges represent
219
+ conflicts.
220
+ """
221
+ # Make sure the lower residue is on the left for each row
222
+ sorted_base_pairs = np.sort(base_pairs, axis=1)
223
+
224
+ # Sort the first column in ascending order
225
+ original_indices = np.argsort(sorted_base_pairs[:, 0])
226
+ sorted_base_pairs = sorted_base_pairs[original_indices]
227
+
228
+ # Rank each base
229
+ # E.g.: [[3, 5] --> [[0, 1]
230
+ # [9, 7]] [3, 2]]
231
+ order = np.argsort(sorted_base_pairs.flatten())
232
+ rank = np.argsort(order).reshape(base_pairs.shape)
233
+
234
+ # The base pairs belonging to the current region
235
+ region_pairs = []
236
+ # The individual regions
237
+ regions = set()
238
+
239
+ # Find separate regions
240
+ for i in range(len(sorted_base_pairs)):
241
+ # if a new region is to be started append the current base pair
242
+ if len(region_pairs) == 0:
243
+ region_pairs.append(original_indices[i])
244
+ continue
245
+
246
+ # Check if the current base pair belongs to the region that is
247
+ # currently being defined
248
+ previous_upstream_rank = rank[i-1, 0]
249
+ this_upstream_rank = rank[i, 0]
250
+ previous_downstream_rank = rank[i-1, 1]
251
+ this_downstream_rank = rank[i, 1]
252
+
253
+ # if the current base pair belongs to a new region, save the
254
+ # current region and start a new region
255
+ if ((previous_downstream_rank - this_downstream_rank) != 1 or
256
+ (this_upstream_rank - previous_upstream_rank) != 1):
257
+ regions.add(
258
+ _Region(base_pairs, np.array(region_pairs), scores)
259
+ )
260
+ region_pairs = []
261
+
262
+ # Append the current base pair to the region
263
+ region_pairs.append(original_indices[i])
264
+
265
+ # The last region has no endpoint defined by the beginning of a
266
+ # new region.
267
+ regions.add(_Region(base_pairs, np.array(region_pairs), scores))
268
+
269
+ # Return the graphical representation of the conflicting regions
270
+ return _generate_graphical_representation(regions)
271
+
272
+
273
+ def _generate_graphical_representation(regions):
274
+ """
275
+ Find the conflicting regions and represent them graphically using
276
+ the ``Graph`` class from ``Networkx``.
277
+
278
+ Parameters
279
+ ----------
280
+ regions : set {_region, ...}
281
+ The regions representing the consecutively nested base pairs.
282
+
283
+ Returns
284
+ -------
285
+ regions : Graph
286
+ The ``_Region`` objects as graph, where the edges represent
287
+ conflicts.
288
+ """
289
+
290
+ # Create a graph
291
+ region_graph = nx.Graph()
292
+
293
+ # Add the regions to the graph as nodes
294
+ region_graph.add_nodes_from(regions)
295
+
296
+ # Get the region array and a boolean array, where the start of each
297
+ # region is ``True``.
298
+ region_array, (start_stops,) = _get_region_array_for(
299
+ regions, content=[lambda a : [True, False]], dtype=['bool']
300
+ )
301
+
302
+ # Check each region for conflicts with other regions
303
+ for start, region in enumerate(region_array):
304
+ # Check each region only once
305
+ if not start_stops[start]:
306
+ continue
307
+
308
+ # Find the index of the stopping of the region in the region
309
+ # array
310
+ stop = _get_first_occurrence_for(region_array[start+1:], region)
311
+ stop += (start + 1)
312
+
313
+ # Store regions the current region conflicts with
314
+ conflicts = set()
315
+
316
+ # Iterate over the regions between the starting and stopping
317
+ # point of the current region
318
+ for other_region in region_array[start+1:stop]:
319
+ # If the other region is not already a conflict, add it to
320
+ # the conflict set
321
+ if other_region not in conflicts:
322
+ conflicts.add(other_region)
323
+ # If the other region is twice between the starting and
324
+ # stopping point of the current region, its starting and
325
+ # stopping point lie between the current region and it is
326
+ # thus non-conflicting
327
+ else:
328
+ conflicts.remove(other_region)
329
+
330
+ # Conflicts between regions are represented as graph edges
331
+ edges = []
332
+
333
+ # Convert the edges in a ``NetworkX`` compatible format
334
+ for conflict in conflicts:
335
+ edges.append((region, conflict))
336
+
337
+ # Add the edges to the graph
338
+ region_graph.add_edges_from(edges)
339
+ return region_graph
340
+
341
+
342
+ def _get_first_occurrence_for(iterable, wanted_object):
343
+ """
344
+ Get the first occurrence of an object in an iterable.
345
+
346
+ Parameters
347
+ ----------
348
+ iterable : iterable
349
+ The iterable containing the object.
350
+ wanted_object : object
351
+ The object to be found.
352
+
353
+ Returns
354
+ -------
355
+ index : int
356
+ The index of the first occurrence of the object.
357
+ """
358
+ for i, value in enumerate(iterable):
359
+ if value is wanted_object:
360
+ return i
361
+
362
+
363
+ def _get_region_array_for(regions, content=[], dtype=[]):
364
+ """
365
+ Get a :class:`ndarray` of region objects. Each object occurs twice,
366
+ representing its start and end point. The regions positions in the
367
+ array reflect their relative positions.
368
+
369
+ Furthermore, a list of functions can be provided enabling custom
370
+ outputs for each objects` start and end point.
371
+
372
+ Parameters
373
+ ----------
374
+ regions : set {_region, ...}
375
+ The regions to be considered
376
+ content : list [function, ...] (default: [])
377
+ The functions to be considered for custom outputs. For a given
378
+ region they must return a tuple of which the first value is
379
+ placed at the start position and the second value at the end
380
+ position of the region relative to the other regions.
381
+ dtype : list [str, ...] (default: [])
382
+ The data type of the output of the custom functions.
383
+
384
+ Returns
385
+ -------
386
+ region_array : ndarray, dtype=object
387
+ The array of ordered region objects.
388
+ custom_content : list [ndarray, ...]
389
+ The custom output.
390
+ """
391
+ # region_array and index array
392
+ region_array = np.empty(len(regions)*2, dtype=_Region)
393
+ index_array = np.empty(len(regions)*2, dtype='int32')
394
+
395
+ # Content array for custom return arrays
396
+ content_list = [None]*len(content)
397
+ for i in range(len(content)):
398
+ content_list[i] = np.empty(len(regions)*2, dtype=dtype[i])
399
+
400
+ # Fill the arrays
401
+ for i, reg in enumerate(regions):
402
+ indices = [2*i, 2*i+1]
403
+ region_array[indices] = reg
404
+ for c in range(len(content_list)):
405
+ content_list[c][indices] = content[c](reg)
406
+ index_array[indices] = [reg.start, reg.stop]
407
+
408
+ # Order the arrays by the base indices
409
+ sort_mask = np.argsort(index_array)
410
+ region_array = region_array[sort_mask]
411
+
412
+ # if no custom array content is given only return the ordered array
413
+ # containing the regions
414
+ if content == []:
415
+ return region_array
416
+
417
+ # if custom content is given also return the ordered content
418
+ for i in range(len(content_list)):
419
+ content_list[i] = content_list[i][sort_mask]
420
+ return region_array, content_list
421
+
422
+
423
+ def _remove_pseudoknots(regions):
424
+ """
425
+ Get the optimal solutions according to the algorithm referenced in
426
+ :func:`pseudoknots()`.
427
+
428
+ The algorithm uses a dynamic programming matrix in order to find
429
+ the optimal solutions with the highest combined region scores.
430
+
431
+ Parameters
432
+ ----------
433
+ regions : set {_region, ...}
434
+ The conflicting regions for whích optimal solutions are to be
435
+ found.
436
+ scores : ndarray
437
+ The score array.
438
+
439
+ Returns
440
+ -------
441
+ solutions : ndarray, dtype=object
442
+ The optimal solutions. Each solution in the ``ndarray`` is
443
+ represented as ``set`` of unknotted regions.
444
+ """
445
+ # Create dynamic programming matrix
446
+ dp_matrix_shape = len(regions)*2, len(regions)*2
447
+ dp_matrix = np.empty(dp_matrix_shape, dtype='object')
448
+ dp_matrix_solutions_starts = np.zeros_like(dp_matrix)
449
+ dp_matrix_solutions_stops = np.zeros_like(dp_matrix)
450
+
451
+ # Each index corresponds to the position in the dp matrix.
452
+ # ``region_array`` contains the region objects and ``start_stops``
453
+ # contains the lowest and highest positions of the regions
454
+ region_array, (start_stops,) = _get_region_array_for(
455
+ regions,
456
+ [lambda a : (a.start, a.stop)],
457
+ ['int32']
458
+ )
459
+ # Initialise the matrix diagonal with ndarrays of empty frozensets
460
+ for i in range(len(dp_matrix)):
461
+ dp_matrix[i, i] = np.array([frozenset()])
462
+
463
+ # Iterate through the top right half of the dynamic programming
464
+ # matrix
465
+ for j in range(len(regions)*2):
466
+ for i in range(j-1, -1, -1):
467
+ solution_candidates = set()
468
+ left = dp_matrix[i, j-1]
469
+ bottom = dp_matrix[i+1, j]
470
+
471
+ # Add all solutions of the cell to the left
472
+ for solution in left:
473
+ solution_candidates.add(solution)
474
+
475
+ # Add all solutions of the cell to the bottom
476
+ for solution in bottom:
477
+ solution_candidates.add(solution)
478
+
479
+ # Check if i and j are start/end-points of the same region
480
+ if region_array[i] is region_array[j]:
481
+
482
+ # Add all solutions from the cell to the bottom left
483
+ # plus this region
484
+ bottom_left = dp_matrix[i+1, j-1]
485
+ for solution in bottom_left:
486
+ solution_candidates.add(solution | set([region_array[i]]))
487
+
488
+ # Perform additional tests if solution in the left cell and
489
+ # bottom cell both differ from an empty solution
490
+ if (np.any(left != [frozenset()]) and
491
+ np.any(bottom != [frozenset()])):
492
+
493
+ left_highest = dp_matrix_solutions_stops[i, j-1]
494
+ bottom_lowest = dp_matrix_solutions_starts[i+1, j]
495
+
496
+ # For each pair of solutions check if solutions are
497
+ # disjoint
498
+ for solution1, highest in zip(left, left_highest):
499
+ for solution2, lowest in zip(bottom, bottom_lowest):
500
+ if highest < lowest:
501
+ # Both solutions are disjoint
502
+ solution_candidates.add(solution1 | solution2)
503
+ else:
504
+ # Both solutions are not disjoint
505
+ # Add subsolutions
506
+ for k in range(
507
+ np.where(start_stops==lowest)[0][0]-1,
508
+ np.where(start_stops==highest)[0][0]+1
509
+ ):
510
+ cell1 = dp_matrix[i, k]
511
+ cell2 = dp_matrix[k+1, j]
512
+ for subsolution1 in cell1:
513
+ for subsolution2 in cell2:
514
+ solution_candidates.add(
515
+ subsolution1 | subsolution2
516
+ )
517
+
518
+ # Make solution candidates ``ndarray`` array of sets
519
+ solution_candidates = np.array(list(solution_candidates))
520
+
521
+ # Calculate the scores for each solution
522
+ solution_scores = np.zeros(len(solution_candidates))
523
+ for s, solution in enumerate(solution_candidates):
524
+ score = 0
525
+ for reg in solution:
526
+ score += reg.score
527
+ solution_scores[s] = score
528
+ # Get the indices where the score is at a maximum
529
+ highest_scores = np.argwhere(
530
+ solution_scores == np.amax(solution_scores)
531
+ ).flatten()
532
+
533
+ # Get the solutions with the highest score
534
+ solution_candidates = solution_candidates[highest_scores]
535
+
536
+ # Add the solutions to the dynamic programming matrix
537
+ dp_matrix[i, j] = solution_candidates
538
+
539
+ solution_starts = np.zeros_like(solution_candidates, dtype='int32')
540
+ solution_stops = np.zeros_like(solution_candidates, dtype='int32')
541
+
542
+ for s, solution in enumerate(solution_candidates):
543
+ solution_starts[s] = min(
544
+ [reg.start for reg in solution], default=-1
545
+ )
546
+ solution_stops[s] = max(
547
+ [reg.stop for reg in solution], default=-1
548
+ )
549
+
550
+ dp_matrix_solutions_starts[i, j] = solution_starts
551
+ dp_matrix_solutions_stops[i, j] = solution_stops
552
+
553
+ # The top right corner contains the optimal solutions
554
+ return dp_matrix[0, -1]
555
+
556
+
557
+ def _get_results(regions, results, max_pseudoknot_order, order=0):
558
+ """
559
+ Use the dynamic programming algorithm to get the pseudoknot order
560
+ of a given set of regions. If there are remaining conflicts their
561
+ results are recursively calculated and merged with the current
562
+ results.
563
+
564
+ Parameters
565
+ ----------
566
+ regions : set {_region, ...}
567
+ The regions for whích optimal solutions are to be found.
568
+ results : list [ndarray, ...]
569
+ The results
570
+ max_pseudoknot_order : int
571
+ The maximum pseudoknot order to be found. If a base pair would
572
+ be of a higher order, its order is specified as -1. If ``None``
573
+ is given, all base pairs are evaluated.
574
+ order : int (default: 0)
575
+ The order that is currently evaluated.
576
+
577
+ Returns
578
+ -------
579
+ results : list [ndarray, ...]
580
+ The results
581
+ """
582
+
583
+ # Remove non-conflicting regions
584
+ non_conflicting = [isolate for isolate in nx.isolates(regions)]
585
+ regions.remove_nodes_from(non_conflicting)
586
+
587
+ # Non-conflicting regions are of the current order:
588
+ index_list_non_conflicting = list(
589
+ chain(
590
+ *[region.get_index_array() for region in non_conflicting]
591
+ )
592
+ )
593
+ for result in results:
594
+ result[index_list_non_conflicting] = order
595
+
596
+
597
+ # If no conflicts remain, the results are complete
598
+ if len(regions) == 0:
599
+ return results
600
+
601
+ # Get the optimal solutions for given regions. Evaluate each clique
602
+ # of mutually conflicting regions seperately
603
+ cliques = [component for component in nx.connected_components(regions)]
604
+ solutions = [set(chain(*e)) for e in product(
605
+ *[_remove_pseudoknots(clique) for clique in cliques]
606
+ )]
607
+
608
+ # Get a copy of the current results for each optimal solution
609
+ results_list = [
610
+ [result.copy() for result in results] for _ in range(len(solutions))
611
+ ]
612
+
613
+ # Evaluate each optimal solution
614
+ for i, solution in enumerate(solutions):
615
+
616
+ # Get the pseudoknotted regions
617
+ pseudoknotted_regions = regions.copy()
618
+ pseudoknotted_regions.remove_nodes_from(solution)
619
+
620
+ # Get an index list of the unknotted base pairs
621
+ index_list_unknotted = list(
622
+ chain(
623
+ *[region.get_index_array() for region in solution]
624
+ )
625
+ )
626
+
627
+ # Write results for current solution
628
+ for j, result in enumerate(results_list[i]):
629
+ result[index_list_unknotted] = order
630
+
631
+ # If this order is the specified maximum order, stop evaluation
632
+ if max_pseudoknot_order == order:
633
+ continue
634
+
635
+ # Evaluate the pseudoknotted region
636
+ results_list[i] = _get_results(
637
+ pseudoknotted_regions, results_list[i],
638
+ max_pseudoknot_order, order=order+1
639
+ )
640
+
641
+ # Flatten the results
642
+ return list(chain(*results_list))