biotite 1.1.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (332) hide show
  1. biotite/__init__.py +18 -0
  2. biotite/application/__init__.py +69 -0
  3. biotite/application/application.py +276 -0
  4. biotite/application/autodock/__init__.py +12 -0
  5. biotite/application/autodock/app.py +500 -0
  6. biotite/application/blast/__init__.py +14 -0
  7. biotite/application/blast/alignment.py +92 -0
  8. biotite/application/blast/webapp.py +428 -0
  9. biotite/application/clustalo/__init__.py +12 -0
  10. biotite/application/clustalo/app.py +223 -0
  11. biotite/application/dssp/__init__.py +12 -0
  12. biotite/application/dssp/app.py +159 -0
  13. biotite/application/localapp.py +342 -0
  14. biotite/application/mafft/__init__.py +12 -0
  15. biotite/application/mafft/app.py +116 -0
  16. biotite/application/msaapp.py +363 -0
  17. biotite/application/muscle/__init__.py +13 -0
  18. biotite/application/muscle/app3.py +227 -0
  19. biotite/application/muscle/app5.py +163 -0
  20. biotite/application/sra/__init__.py +18 -0
  21. biotite/application/sra/app.py +452 -0
  22. biotite/application/tantan/__init__.py +12 -0
  23. biotite/application/tantan/app.py +199 -0
  24. biotite/application/util.py +57 -0
  25. biotite/application/viennarna/__init__.py +18 -0
  26. biotite/application/viennarna/rnaalifold.py +310 -0
  27. biotite/application/viennarna/rnafold.py +254 -0
  28. biotite/application/viennarna/rnaplot.py +206 -0
  29. biotite/application/viennarna/util.py +77 -0
  30. biotite/application/webapp.py +76 -0
  31. biotite/copyable.py +71 -0
  32. biotite/database/__init__.py +23 -0
  33. biotite/database/entrez/__init__.py +15 -0
  34. biotite/database/entrez/check.py +60 -0
  35. biotite/database/entrez/dbnames.py +91 -0
  36. biotite/database/entrez/download.py +229 -0
  37. biotite/database/entrez/key.py +44 -0
  38. biotite/database/entrez/query.py +262 -0
  39. biotite/database/error.py +16 -0
  40. biotite/database/pubchem/__init__.py +21 -0
  41. biotite/database/pubchem/download.py +258 -0
  42. biotite/database/pubchem/error.py +20 -0
  43. biotite/database/pubchem/query.py +830 -0
  44. biotite/database/pubchem/throttle.py +98 -0
  45. biotite/database/rcsb/__init__.py +13 -0
  46. biotite/database/rcsb/download.py +159 -0
  47. biotite/database/rcsb/query.py +964 -0
  48. biotite/database/uniprot/__init__.py +13 -0
  49. biotite/database/uniprot/check.py +40 -0
  50. biotite/database/uniprot/download.py +129 -0
  51. biotite/database/uniprot/query.py +293 -0
  52. biotite/file.py +232 -0
  53. biotite/sequence/__init__.py +84 -0
  54. biotite/sequence/align/__init__.py +203 -0
  55. biotite/sequence/align/alignment.py +680 -0
  56. biotite/sequence/align/banded.cpython-313-darwin.so +0 -0
  57. biotite/sequence/align/banded.pyx +652 -0
  58. biotite/sequence/align/buckets.py +71 -0
  59. biotite/sequence/align/cigar.py +425 -0
  60. biotite/sequence/align/kmeralphabet.cpython-313-darwin.so +0 -0
  61. biotite/sequence/align/kmeralphabet.pyx +595 -0
  62. biotite/sequence/align/kmersimilarity.cpython-313-darwin.so +0 -0
  63. biotite/sequence/align/kmersimilarity.pyx +233 -0
  64. biotite/sequence/align/kmertable.cpython-313-darwin.so +0 -0
  65. biotite/sequence/align/kmertable.pyx +3411 -0
  66. biotite/sequence/align/localgapped.cpython-313-darwin.so +0 -0
  67. biotite/sequence/align/localgapped.pyx +892 -0
  68. biotite/sequence/align/localungapped.cpython-313-darwin.so +0 -0
  69. biotite/sequence/align/localungapped.pyx +279 -0
  70. biotite/sequence/align/matrix.py +622 -0
  71. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  72. biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
  73. biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
  74. biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
  75. biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
  76. biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
  77. biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
  78. biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
  79. biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
  80. biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
  81. biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
  82. biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
  83. biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
  84. biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
  85. biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
  86. biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
  87. biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
  88. biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
  89. biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
  90. biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
  91. biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
  92. biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
  93. biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
  94. biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
  95. biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
  96. biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
  97. biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
  98. biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
  99. biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
  100. biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
  101. biotite/sequence/align/matrix_data/GONNET.mat +26 -0
  102. biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
  103. biotite/sequence/align/matrix_data/MATCH.mat +25 -0
  104. biotite/sequence/align/matrix_data/NUC.mat +25 -0
  105. biotite/sequence/align/matrix_data/PAM10.mat +34 -0
  106. biotite/sequence/align/matrix_data/PAM100.mat +34 -0
  107. biotite/sequence/align/matrix_data/PAM110.mat +34 -0
  108. biotite/sequence/align/matrix_data/PAM120.mat +34 -0
  109. biotite/sequence/align/matrix_data/PAM130.mat +34 -0
  110. biotite/sequence/align/matrix_data/PAM140.mat +34 -0
  111. biotite/sequence/align/matrix_data/PAM150.mat +34 -0
  112. biotite/sequence/align/matrix_data/PAM160.mat +34 -0
  113. biotite/sequence/align/matrix_data/PAM170.mat +34 -0
  114. biotite/sequence/align/matrix_data/PAM180.mat +34 -0
  115. biotite/sequence/align/matrix_data/PAM190.mat +34 -0
  116. biotite/sequence/align/matrix_data/PAM20.mat +34 -0
  117. biotite/sequence/align/matrix_data/PAM200.mat +34 -0
  118. biotite/sequence/align/matrix_data/PAM210.mat +34 -0
  119. biotite/sequence/align/matrix_data/PAM220.mat +34 -0
  120. biotite/sequence/align/matrix_data/PAM230.mat +34 -0
  121. biotite/sequence/align/matrix_data/PAM240.mat +34 -0
  122. biotite/sequence/align/matrix_data/PAM250.mat +34 -0
  123. biotite/sequence/align/matrix_data/PAM260.mat +34 -0
  124. biotite/sequence/align/matrix_data/PAM270.mat +34 -0
  125. biotite/sequence/align/matrix_data/PAM280.mat +34 -0
  126. biotite/sequence/align/matrix_data/PAM290.mat +34 -0
  127. biotite/sequence/align/matrix_data/PAM30.mat +34 -0
  128. biotite/sequence/align/matrix_data/PAM300.mat +34 -0
  129. biotite/sequence/align/matrix_data/PAM310.mat +34 -0
  130. biotite/sequence/align/matrix_data/PAM320.mat +34 -0
  131. biotite/sequence/align/matrix_data/PAM330.mat +34 -0
  132. biotite/sequence/align/matrix_data/PAM340.mat +34 -0
  133. biotite/sequence/align/matrix_data/PAM350.mat +34 -0
  134. biotite/sequence/align/matrix_data/PAM360.mat +34 -0
  135. biotite/sequence/align/matrix_data/PAM370.mat +34 -0
  136. biotite/sequence/align/matrix_data/PAM380.mat +34 -0
  137. biotite/sequence/align/matrix_data/PAM390.mat +34 -0
  138. biotite/sequence/align/matrix_data/PAM40.mat +34 -0
  139. biotite/sequence/align/matrix_data/PAM400.mat +34 -0
  140. biotite/sequence/align/matrix_data/PAM410.mat +34 -0
  141. biotite/sequence/align/matrix_data/PAM420.mat +34 -0
  142. biotite/sequence/align/matrix_data/PAM430.mat +34 -0
  143. biotite/sequence/align/matrix_data/PAM440.mat +34 -0
  144. biotite/sequence/align/matrix_data/PAM450.mat +34 -0
  145. biotite/sequence/align/matrix_data/PAM460.mat +34 -0
  146. biotite/sequence/align/matrix_data/PAM470.mat +34 -0
  147. biotite/sequence/align/matrix_data/PAM480.mat +34 -0
  148. biotite/sequence/align/matrix_data/PAM490.mat +34 -0
  149. biotite/sequence/align/matrix_data/PAM50.mat +34 -0
  150. biotite/sequence/align/matrix_data/PAM500.mat +34 -0
  151. biotite/sequence/align/matrix_data/PAM60.mat +34 -0
  152. biotite/sequence/align/matrix_data/PAM70.mat +34 -0
  153. biotite/sequence/align/matrix_data/PAM80.mat +34 -0
  154. biotite/sequence/align/matrix_data/PAM90.mat +34 -0
  155. biotite/sequence/align/matrix_data/PB.license +21 -0
  156. biotite/sequence/align/matrix_data/PB.mat +18 -0
  157. biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
  158. biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
  159. biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
  160. biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
  161. biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
  162. biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
  163. biotite/sequence/align/multiple.cpython-313-darwin.so +0 -0
  164. biotite/sequence/align/multiple.pyx +620 -0
  165. biotite/sequence/align/pairwise.cpython-313-darwin.so +0 -0
  166. biotite/sequence/align/pairwise.pyx +587 -0
  167. biotite/sequence/align/permutation.cpython-313-darwin.so +0 -0
  168. biotite/sequence/align/permutation.pyx +313 -0
  169. biotite/sequence/align/primes.txt +821 -0
  170. biotite/sequence/align/selector.cpython-313-darwin.so +0 -0
  171. biotite/sequence/align/selector.pyx +954 -0
  172. biotite/sequence/align/statistics.py +264 -0
  173. biotite/sequence/align/tracetable.cpython-313-darwin.so +0 -0
  174. biotite/sequence/align/tracetable.pxd +64 -0
  175. biotite/sequence/align/tracetable.pyx +370 -0
  176. biotite/sequence/alphabet.py +555 -0
  177. biotite/sequence/annotation.py +830 -0
  178. biotite/sequence/codec.cpython-313-darwin.so +0 -0
  179. biotite/sequence/codec.pyx +155 -0
  180. biotite/sequence/codon.py +477 -0
  181. biotite/sequence/codon_tables.txt +202 -0
  182. biotite/sequence/graphics/__init__.py +33 -0
  183. biotite/sequence/graphics/alignment.py +1115 -0
  184. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  185. biotite/sequence/graphics/color_schemes/autumn.json +51 -0
  186. biotite/sequence/graphics/color_schemes/blossom.json +51 -0
  187. biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
  188. biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
  189. biotite/sequence/graphics/color_schemes/flower.json +51 -0
  190. biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
  191. biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
  192. biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
  193. biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
  194. biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
  195. biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
  196. biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
  197. biotite/sequence/graphics/color_schemes/ocean.json +51 -0
  198. biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
  199. biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
  200. biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
  201. biotite/sequence/graphics/color_schemes/spring.json +51 -0
  202. biotite/sequence/graphics/color_schemes/sunset.json +51 -0
  203. biotite/sequence/graphics/color_schemes/wither.json +51 -0
  204. biotite/sequence/graphics/colorschemes.py +170 -0
  205. biotite/sequence/graphics/dendrogram.py +229 -0
  206. biotite/sequence/graphics/features.py +544 -0
  207. biotite/sequence/graphics/logo.py +104 -0
  208. biotite/sequence/graphics/plasmid.py +712 -0
  209. biotite/sequence/io/__init__.py +12 -0
  210. biotite/sequence/io/fasta/__init__.py +22 -0
  211. biotite/sequence/io/fasta/convert.py +284 -0
  212. biotite/sequence/io/fasta/file.py +265 -0
  213. biotite/sequence/io/fastq/__init__.py +19 -0
  214. biotite/sequence/io/fastq/convert.py +117 -0
  215. biotite/sequence/io/fastq/file.py +507 -0
  216. biotite/sequence/io/genbank/__init__.py +17 -0
  217. biotite/sequence/io/genbank/annotation.py +269 -0
  218. biotite/sequence/io/genbank/file.py +573 -0
  219. biotite/sequence/io/genbank/metadata.py +336 -0
  220. biotite/sequence/io/genbank/sequence.py +171 -0
  221. biotite/sequence/io/general.py +201 -0
  222. biotite/sequence/io/gff/__init__.py +26 -0
  223. biotite/sequence/io/gff/convert.py +128 -0
  224. biotite/sequence/io/gff/file.py +450 -0
  225. biotite/sequence/phylo/__init__.py +36 -0
  226. biotite/sequence/phylo/nj.cpython-313-darwin.so +0 -0
  227. biotite/sequence/phylo/nj.pyx +221 -0
  228. biotite/sequence/phylo/tree.cpython-313-darwin.so +0 -0
  229. biotite/sequence/phylo/tree.pyx +1169 -0
  230. biotite/sequence/phylo/upgma.cpython-313-darwin.so +0 -0
  231. biotite/sequence/phylo/upgma.pyx +164 -0
  232. biotite/sequence/profile.py +567 -0
  233. biotite/sequence/search.py +118 -0
  234. biotite/sequence/seqtypes.py +713 -0
  235. biotite/sequence/sequence.py +374 -0
  236. biotite/setup_ccd.py +197 -0
  237. biotite/structure/__init__.py +133 -0
  238. biotite/structure/alphabet/__init__.py +25 -0
  239. biotite/structure/alphabet/encoder.py +332 -0
  240. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  241. biotite/structure/alphabet/i3d.py +110 -0
  242. biotite/structure/alphabet/layers.py +86 -0
  243. biotite/structure/alphabet/pb.license +21 -0
  244. biotite/structure/alphabet/pb.py +171 -0
  245. biotite/structure/alphabet/unkerasify.py +122 -0
  246. biotite/structure/atoms.py +1554 -0
  247. biotite/structure/basepairs.py +1404 -0
  248. biotite/structure/bonds.cpython-313-darwin.so +0 -0
  249. biotite/structure/bonds.pyx +1972 -0
  250. biotite/structure/box.py +588 -0
  251. biotite/structure/celllist.cpython-313-darwin.so +0 -0
  252. biotite/structure/celllist.pyx +849 -0
  253. biotite/structure/chains.py +314 -0
  254. biotite/structure/charges.cpython-313-darwin.so +0 -0
  255. biotite/structure/charges.pyx +520 -0
  256. biotite/structure/compare.py +274 -0
  257. biotite/structure/density.py +109 -0
  258. biotite/structure/dotbracket.py +214 -0
  259. biotite/structure/error.py +39 -0
  260. biotite/structure/filter.py +590 -0
  261. biotite/structure/geometry.py +655 -0
  262. biotite/structure/graphics/__init__.py +13 -0
  263. biotite/structure/graphics/atoms.py +243 -0
  264. biotite/structure/graphics/rna.py +295 -0
  265. biotite/structure/hbond.py +428 -0
  266. biotite/structure/info/__init__.py +24 -0
  267. biotite/structure/info/atom_masses.json +121 -0
  268. biotite/structure/info/atoms.py +81 -0
  269. biotite/structure/info/bonds.py +149 -0
  270. biotite/structure/info/ccd.py +202 -0
  271. biotite/structure/info/components.bcif +0 -0
  272. biotite/structure/info/groups.py +131 -0
  273. biotite/structure/info/masses.py +121 -0
  274. biotite/structure/info/misc.py +138 -0
  275. biotite/structure/info/radii.py +197 -0
  276. biotite/structure/info/standardize.py +186 -0
  277. biotite/structure/integrity.py +215 -0
  278. biotite/structure/io/__init__.py +29 -0
  279. biotite/structure/io/dcd/__init__.py +13 -0
  280. biotite/structure/io/dcd/file.py +67 -0
  281. biotite/structure/io/general.py +243 -0
  282. biotite/structure/io/gro/__init__.py +14 -0
  283. biotite/structure/io/gro/file.py +344 -0
  284. biotite/structure/io/mol/__init__.py +20 -0
  285. biotite/structure/io/mol/convert.py +112 -0
  286. biotite/structure/io/mol/ctab.py +415 -0
  287. biotite/structure/io/mol/header.py +120 -0
  288. biotite/structure/io/mol/mol.py +149 -0
  289. biotite/structure/io/mol/sdf.py +914 -0
  290. biotite/structure/io/netcdf/__init__.py +13 -0
  291. biotite/structure/io/netcdf/file.py +64 -0
  292. biotite/structure/io/pdb/__init__.py +20 -0
  293. biotite/structure/io/pdb/convert.py +307 -0
  294. biotite/structure/io/pdb/file.py +1290 -0
  295. biotite/structure/io/pdb/hybrid36.cpython-313-darwin.so +0 -0
  296. biotite/structure/io/pdb/hybrid36.pyx +242 -0
  297. biotite/structure/io/pdbqt/__init__.py +15 -0
  298. biotite/structure/io/pdbqt/convert.py +113 -0
  299. biotite/structure/io/pdbqt/file.py +688 -0
  300. biotite/structure/io/pdbx/__init__.py +23 -0
  301. biotite/structure/io/pdbx/bcif.py +656 -0
  302. biotite/structure/io/pdbx/cif.py +1075 -0
  303. biotite/structure/io/pdbx/component.py +245 -0
  304. biotite/structure/io/pdbx/compress.py +321 -0
  305. biotite/structure/io/pdbx/convert.py +1745 -0
  306. biotite/structure/io/pdbx/encoding.cpython-313-darwin.so +0 -0
  307. biotite/structure/io/pdbx/encoding.pyx +1031 -0
  308. biotite/structure/io/trajfile.py +693 -0
  309. biotite/structure/io/trr/__init__.py +13 -0
  310. biotite/structure/io/trr/file.py +43 -0
  311. biotite/structure/io/xtc/__init__.py +13 -0
  312. biotite/structure/io/xtc/file.py +43 -0
  313. biotite/structure/mechanics.py +73 -0
  314. biotite/structure/molecules.py +352 -0
  315. biotite/structure/pseudoknots.py +628 -0
  316. biotite/structure/rdf.py +245 -0
  317. biotite/structure/repair.py +304 -0
  318. biotite/structure/residues.py +572 -0
  319. biotite/structure/sasa.cpython-313-darwin.so +0 -0
  320. biotite/structure/sasa.pyx +322 -0
  321. biotite/structure/segments.py +178 -0
  322. biotite/structure/sequence.py +111 -0
  323. biotite/structure/sse.py +308 -0
  324. biotite/structure/superimpose.py +689 -0
  325. biotite/structure/transform.py +530 -0
  326. biotite/structure/util.py +168 -0
  327. biotite/version.py +16 -0
  328. biotite/visualize.py +265 -0
  329. biotite-1.1.0.dist-info/METADATA +190 -0
  330. biotite-1.1.0.dist-info/RECORD +332 -0
  331. biotite-1.1.0.dist-info/WHEEL +4 -0
  332. biotite-1.1.0.dist-info/licenses/LICENSE.rst +30 -0
@@ -0,0 +1,628 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ This module provides functionality for pseudoknot detection.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Tom David Müller"
11
+ __all__ = ["pseudoknots"]
12
+
13
+ from itertools import chain, product
14
+ import networkx as nx
15
+ import numpy as np
16
+
17
+
18
+ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
19
+ """
20
+ Identify the pseudoknot order for each base pair in a given set of
21
+ base pairs.
22
+
23
+ By default the algorithm removes base pairs until the remaining
24
+ base pairs are completely nested i.e. no pseudoknots appear.
25
+ The pseudoknot order of the removed base pairs is incremented and
26
+ the procedure is repeated with these base pairs.
27
+ Base pairs are removed in a way that maximizes the number of
28
+ remaining base pairs.
29
+ However, an optional score for each individual base pair can be
30
+ provided.
31
+
32
+ Parameters
33
+ ----------
34
+ base_pairs : ndarray, dtype=int, shape=(n,2)
35
+ The base pairs to determine the pseudoknot order of. Each row
36
+ represents indices form two paired bases. The structure of
37
+ the :class:`ndarray` is equal to the structure of the output of
38
+ :func:`base_pairs()`, where the indices represent the
39
+ beginning of the residues.
40
+ scores : ndarray, dtype=int, shape=(n,), optional
41
+ The score for each base pair.
42
+ By default, the score of each base pair is ``1``.
43
+ max_pseudoknot_order : int, optional
44
+ The maximum pseudoknot order to be found. If a base pair would
45
+ be of a higher order, its order is specified as ``-1``.
46
+ By default, the algorithm is run until all base pairs
47
+ have an assigned pseudoknot order.
48
+
49
+ Returns
50
+ -------
51
+ pseudoknot_order : ndarray, dtype=int, shape=(m,n)
52
+ The pseudoknot order of the input `base_pairs`.
53
+ Multiple solutions that maximize the number of basepairs or
54
+ the given score, respectively, may be possible.
55
+ Therefore all *m* individual solutions are returned.
56
+
57
+ Notes
58
+ -----
59
+ The dynamic programming approach by Smit *et al*
60
+ :footcite:`Smit2008` is applied to detect pseudoknots.
61
+ The algorithm was originally developed to remove pseudoknots from a
62
+ structure.
63
+ However, if it is run iteratively on removed knotted pairs it can be
64
+ used to identify the pseudoknot order.
65
+
66
+ The pseudoknot order is defined as the minimum number of base pair
67
+ set decompositions resulting in a nested structure
68
+ :footcite:`Antczak2018`.
69
+ Therefore, there are no pseudoknots between base pairs with the same
70
+ pseudoknot order.
71
+
72
+ Examples
73
+ --------
74
+ Remove the pseudoknotted base pair for the sequence *ABCbac*, where
75
+ the corresponding big and small letters each represent a base pair:
76
+
77
+ Define the base pairs as :class:`ndarray`:
78
+
79
+ >>> basepairs = np.array([[0, 4],
80
+ ... [1, 3],
81
+ ... [2, 5]])
82
+
83
+ Find the unknotted base pairs, optimizing for the maximum number of
84
+ base pairs:
85
+
86
+ >>> print(pseudoknots(basepairs, max_pseudoknot_order=0))
87
+ [[ 0 0 -1]]
88
+
89
+ This indicates that the base pair *Cc* is a pseudoknot.
90
+
91
+ Given the length of the sequence (6 bases), we can also represent
92
+ the unknotted structure in dot bracket notation:
93
+
94
+ >>> print(dot_bracket(basepairs, 6, max_pseudoknot_order=0)[0])
95
+ ((.)).
96
+
97
+ If the maximum pseudoknot order is not restricted, the order of the
98
+ knotted pairs is determined and can be represented using dot bracket
99
+ letter notation:
100
+
101
+ >>> print(pseudoknots(basepairs))
102
+ [[0 0 1]]
103
+ >>> print(dot_bracket(basepairs, 6)[0])
104
+ (([))]
105
+
106
+ See Also
107
+ --------
108
+ base_pairs
109
+ dot_bracket
110
+
111
+ References
112
+ ----------
113
+
114
+ .. footbibliography::
115
+
116
+ """
117
+ if len(base_pairs) == 0:
118
+ # No base pairs -> empty pseudoknot order array
119
+ return np.array([[]], dtype=np.int32)
120
+
121
+ # List containing the results
122
+ results = [np.full(len(base_pairs), -1, dtype="int32")]
123
+
124
+ # if no score array is given, each base pairs' score is one
125
+ if scores is None:
126
+ scores = np.ones(len(base_pairs))
127
+
128
+ # Make sure `base_pairs` has the same length as the score array
129
+ if len(base_pairs) != len(scores):
130
+ raise ValueError("'base_pair' and 'scores' must have the same shape")
131
+
132
+ # Split the base pairs in regions
133
+ regions = _find_regions(base_pairs, scores)
134
+
135
+ # Compute results
136
+ results = _get_results(regions, results, max_pseudoknot_order)
137
+
138
+ return np.vstack(results)
139
+
140
+
141
+ class _Region:
142
+ """
143
+ This class represents a paired region.
144
+
145
+ A region is a set of base pairs. This class provides methods to
146
+ access the minimum and maximum index of the bases that are part of
147
+ the region, handles score calculation, and backtracing to the
148
+ original base pair array.
149
+
150
+ Parameters
151
+ ----------
152
+ base_pairs: ndarray, shape=(n,2), dtype=int
153
+ All base pairs of the structure the region is a subset for.
154
+ region_pairs: ndarray, dtype=int
155
+ The indices of the base pairs in ``base_pairs`` that are part of
156
+ the region.
157
+ scores : ndarray, dtype=int, shape=(n,) (default: None)
158
+ The score for each base pair.
159
+ """
160
+
161
+ def __init__(self, base_pairs, region_pairs, scores):
162
+ # The Start and Stop indices for each Region
163
+ self.start = np.min(base_pairs[region_pairs])
164
+ self.stop = np.max(base_pairs[region_pairs])
165
+
166
+ self.region_pairs = region_pairs
167
+ self.score = np.sum(scores[region_pairs])
168
+
169
+ def get_index_array(self):
170
+ """
171
+ Return an index array with the positions of the region`s bases
172
+ in the original base pair array.
173
+
174
+ Returns
175
+ -------
176
+ region_pairs : ndarray
177
+ The indices of the bases in the original base pair array.
178
+ """
179
+ return self.region_pairs
180
+
181
+ def __lt__(self, other):
182
+ """
183
+ This comparison operator is required for :func:`np.unique()`. As
184
+ only the difference between the regions is relevant and not any
185
+ particular order, a distinction is made by the objects unique
186
+ ids.
187
+
188
+ Parameters
189
+ ----------
190
+ other : _region
191
+ The other region.
192
+
193
+ Returns
194
+ -------
195
+ comparision : bool
196
+ The evaluated comparison.
197
+ """
198
+ return id(self) < id(other)
199
+
200
+
201
+ def _find_regions(base_pairs, scores):
202
+ """
203
+ Find regions in a base pair array. A region is defined as a set of
204
+ consecutively nested base pairs.
205
+
206
+ Parameters
207
+ ----------
208
+ base_pairs : ndarray, dtype=int, shape=(n, 2)
209
+ Each row is equivalent to one base pair and contains the first
210
+ indices of the residues corresponding to each base.
211
+ scores : ndarray, dtype=int, shape=(n,) (default: None)
212
+ The score for each base pair.
213
+
214
+ Returns
215
+ -------
216
+ regions : Graph
217
+ The ``_Region`` objects as graph, where the edges represent
218
+ conflicts.
219
+ """
220
+ # Make sure the lower residue is on the left for each row
221
+ sorted_base_pairs = np.sort(base_pairs, axis=1)
222
+
223
+ # Sort the first column in ascending order
224
+ original_indices = np.argsort(sorted_base_pairs[:, 0])
225
+ sorted_base_pairs = sorted_base_pairs[original_indices]
226
+
227
+ # Rank each base
228
+ # E.g.: [[3, 5] --> [[0, 1]
229
+ # [9, 7]] [3, 2]]
230
+ order = np.argsort(sorted_base_pairs.flatten())
231
+ rank = np.argsort(order).reshape(base_pairs.shape)
232
+
233
+ # The base pairs belonging to the current region
234
+ region_pairs = []
235
+ # The individual regions
236
+ regions = set()
237
+
238
+ # Find separate regions
239
+ for i in range(len(sorted_base_pairs)):
240
+ # if a new region is to be started append the current base pair
241
+ if len(region_pairs) == 0:
242
+ region_pairs.append(original_indices[i])
243
+ continue
244
+
245
+ # Check if the current base pair belongs to the region that is
246
+ # currently being defined
247
+ previous_upstream_rank = rank[i - 1, 0]
248
+ this_upstream_rank = rank[i, 0]
249
+ previous_downstream_rank = rank[i - 1, 1]
250
+ this_downstream_rank = rank[i, 1]
251
+
252
+ # if the current base pair belongs to a new region, save the
253
+ # current region and start a new region
254
+ if (previous_downstream_rank - this_downstream_rank) != 1 or (
255
+ this_upstream_rank - previous_upstream_rank
256
+ ) != 1:
257
+ regions.add(_Region(base_pairs, np.array(region_pairs), scores))
258
+ region_pairs = []
259
+
260
+ # Append the current base pair to the region
261
+ region_pairs.append(original_indices[i])
262
+
263
+ # The last region has no endpoint defined by the beginning of a
264
+ # new region.
265
+ regions.add(_Region(base_pairs, np.array(region_pairs), scores))
266
+
267
+ # Return the graphical representation of the conflicting regions
268
+ return _generate_graphical_representation(regions)
269
+
270
+
271
+ def _generate_graphical_representation(regions):
272
+ """
273
+ Find the conflicting regions and represent them graphically using
274
+ the ``Graph`` class from ``Networkx``.
275
+
276
+ Parameters
277
+ ----------
278
+ regions : set {_region, ...}
279
+ The regions representing the consecutively nested base pairs.
280
+
281
+ Returns
282
+ -------
283
+ regions : Graph
284
+ The ``_Region`` objects as graph, where the edges represent
285
+ conflicts.
286
+ """
287
+
288
+ # Create a graph
289
+ region_graph = nx.Graph()
290
+
291
+ # Add the regions to the graph as nodes
292
+ region_graph.add_nodes_from(regions)
293
+
294
+ # Get the region array and a boolean array, where the start of each
295
+ # region is ``True``.
296
+ region_array, (start_stops,) = _get_region_array_for(
297
+ regions, content=[lambda a: [True, False]], dtype=["bool"]
298
+ )
299
+
300
+ # Check each region for conflicts with other regions
301
+ for start, region in enumerate(region_array):
302
+ # Check each region only once
303
+ if not start_stops[start]:
304
+ continue
305
+
306
+ # Find the index of the stopping of the region in the region
307
+ # array
308
+ stop = _get_first_occurrence_for(region_array[start + 1 :], region)
309
+ stop += start + 1
310
+
311
+ # Store regions the current region conflicts with
312
+ conflicts = set()
313
+
314
+ # Iterate over the regions between the starting and stopping
315
+ # point of the current region
316
+ for other_region in region_array[start + 1 : stop]:
317
+ # If the other region is not already a conflict, add it to
318
+ # the conflict set
319
+ if other_region not in conflicts:
320
+ conflicts.add(other_region)
321
+ # If the other region is twice between the starting and
322
+ # stopping point of the current region, its starting and
323
+ # stopping point lie between the current region and it is
324
+ # thus non-conflicting
325
+ else:
326
+ conflicts.remove(other_region)
327
+
328
+ # Conflicts between regions are represented as graph edges
329
+ edges = []
330
+
331
+ # Convert the edges in a ``NetworkX`` compatible format
332
+ for conflict in conflicts:
333
+ edges.append((region, conflict))
334
+
335
+ # Add the edges to the graph
336
+ region_graph.add_edges_from(edges)
337
+ return region_graph
338
+
339
+
340
+ def _get_first_occurrence_for(iterable, wanted_object):
341
+ """
342
+ Get the first occurrence of an object in an iterable.
343
+
344
+ Parameters
345
+ ----------
346
+ iterable : iterable
347
+ The iterable containing the object.
348
+ wanted_object : object
349
+ The object to be found.
350
+
351
+ Returns
352
+ -------
353
+ index : int
354
+ The index of the first occurrence of the object.
355
+ """
356
+ for i, value in enumerate(iterable):
357
+ if value is wanted_object:
358
+ return i
359
+
360
+
361
+ def _get_region_array_for(regions, content=[], dtype=[]):
362
+ """
363
+ Get a :class:`ndarray` of region objects. Each object occurs twice,
364
+ representing its start and end point. The regions positions in the
365
+ array reflect their relative positions.
366
+
367
+ Furthermore, a list of functions can be provided enabling custom
368
+ outputs for each objects` start and end point.
369
+
370
+ Parameters
371
+ ----------
372
+ regions : set {_region, ...}
373
+ The regions to be considered
374
+ content : list [function, ...] (default: [])
375
+ The functions to be considered for custom outputs. For a given
376
+ region they must return a tuple of which the first value is
377
+ placed at the start position and the second value at the end
378
+ position of the region relative to the other regions.
379
+ dtype : list [str, ...] (default: [])
380
+ The data type of the output of the custom functions.
381
+
382
+ Returns
383
+ -------
384
+ region_array : ndarray, dtype=object
385
+ The array of ordered region objects.
386
+ custom_content : list [ndarray, ...]
387
+ The custom output.
388
+ """
389
+ # region_array and index array
390
+ region_array = np.empty(len(regions) * 2, dtype=_Region)
391
+ index_array = np.empty(len(regions) * 2, dtype="int32")
392
+
393
+ # Content array for custom return arrays
394
+ content_list = [None] * len(content)
395
+ for i in range(len(content)):
396
+ content_list[i] = np.empty(len(regions) * 2, dtype=dtype[i])
397
+
398
+ # Fill the arrays
399
+ for i, reg in enumerate(regions):
400
+ indices = [2 * i, 2 * i + 1]
401
+ region_array[indices] = reg
402
+ for c in range(len(content_list)):
403
+ content_list[c][indices] = content[c](reg)
404
+ index_array[indices] = [reg.start, reg.stop]
405
+
406
+ # Order the arrays by the base indices
407
+ sort_mask = np.argsort(index_array)
408
+ region_array = region_array[sort_mask]
409
+
410
+ # if no custom array content is given only return the ordered array
411
+ # containing the regions
412
+ if content == []:
413
+ return region_array
414
+
415
+ # if custom content is given also return the ordered content
416
+ for i in range(len(content_list)):
417
+ content_list[i] = content_list[i][sort_mask]
418
+ return region_array, content_list
419
+
420
+
421
+ def _remove_pseudoknots(regions):
422
+ """
423
+ Get the optimal solutions according to the algorithm referenced in
424
+ :func:`pseudoknots()`.
425
+
426
+ The algorithm uses a dynamic programming matrix in order to find
427
+ the optimal solutions with the highest combined region scores.
428
+
429
+ Parameters
430
+ ----------
431
+ regions : set {_region, ...}
432
+ The conflicting regions for whích optimal solutions are to be
433
+ found.
434
+ scores : ndarray
435
+ The score array.
436
+
437
+ Returns
438
+ -------
439
+ solutions : ndarray, dtype=object
440
+ The optimal solutions. Each solution in the ``ndarray`` is
441
+ represented as ``set`` of unknotted regions.
442
+ """
443
+ # Create dynamic programming matrix
444
+ dp_matrix_shape = len(regions) * 2, len(regions) * 2
445
+ dp_matrix = np.empty(dp_matrix_shape, dtype="object")
446
+ dp_matrix_solutions_starts = np.zeros_like(dp_matrix)
447
+ dp_matrix_solutions_stops = np.zeros_like(dp_matrix)
448
+
449
+ # Each index corresponds to the position in the dp matrix.
450
+ # ``region_array`` contains the region objects and ``start_stops``
451
+ # contains the lowest and highest positions of the regions
452
+ region_array, (start_stops,) = _get_region_array_for(
453
+ regions, [lambda a: (a.start, a.stop)], ["int32"]
454
+ )
455
+ # Initialise the matrix diagonal with ndarrays of empty frozensets
456
+ for i in range(len(dp_matrix)):
457
+ dp_matrix[i, i] = np.array([frozenset()])
458
+
459
+ # Iterate through the top right half of the dynamic programming
460
+ # matrix
461
+ for j in range(len(regions) * 2):
462
+ for i in range(j - 1, -1, -1):
463
+ solution_candidates = set()
464
+ left = dp_matrix[i, j - 1]
465
+ bottom = dp_matrix[i + 1, j]
466
+
467
+ # Add all solutions of the cell to the left
468
+ for solution in left:
469
+ solution_candidates.add(solution)
470
+
471
+ # Add all solutions of the cell to the bottom
472
+ for solution in bottom:
473
+ solution_candidates.add(solution)
474
+
475
+ # Check if i and j are start/end-points of the same region
476
+ if region_array[i] is region_array[j]:
477
+ # Add all solutions from the cell to the bottom left
478
+ # plus this region
479
+ bottom_left = dp_matrix[i + 1, j - 1]
480
+ for solution in bottom_left:
481
+ solution_candidates.add(solution | set([region_array[i]]))
482
+
483
+ # Perform additional tests if solution in the left cell and
484
+ # bottom cell both differ from an empty solution
485
+ if np.any(left != [frozenset()]) and np.any(bottom != [frozenset()]):
486
+ left_highest = dp_matrix_solutions_stops[i, j - 1]
487
+ bottom_lowest = dp_matrix_solutions_starts[i + 1, j]
488
+
489
+ # For each pair of solutions check if solutions are
490
+ # disjoint
491
+ for solution1, highest in zip(left, left_highest):
492
+ for solution2, lowest in zip(bottom, bottom_lowest):
493
+ if highest < lowest:
494
+ # Both solutions are disjoint
495
+ solution_candidates.add(solution1 | solution2)
496
+ else:
497
+ # Both solutions are not disjoint
498
+ # Add subsolutions
499
+ for k in range(
500
+ np.where(start_stops == lowest)[0][0] - 1,
501
+ np.where(start_stops == highest)[0][0] + 1,
502
+ ):
503
+ cell1 = dp_matrix[i, k]
504
+ cell2 = dp_matrix[k + 1, j]
505
+ for subsolution1 in cell1:
506
+ for subsolution2 in cell2:
507
+ solution_candidates.add(
508
+ subsolution1 | subsolution2
509
+ )
510
+
511
+ # Make solution candidates ``ndarray`` array of sets
512
+ solution_candidates = np.array(list(solution_candidates))
513
+
514
+ # Calculate the scores for each solution
515
+ solution_scores = np.zeros(len(solution_candidates))
516
+ for s, solution in enumerate(solution_candidates):
517
+ score = 0
518
+ for reg in solution:
519
+ score += reg.score
520
+ solution_scores[s] = score
521
+ # Get the indices where the score is at a maximum
522
+ highest_scores = np.argwhere(
523
+ solution_scores == np.amax(solution_scores)
524
+ ).flatten()
525
+
526
+ # Get the solutions with the highest score
527
+ solution_candidates = solution_candidates[highest_scores]
528
+
529
+ # Add the solutions to the dynamic programming matrix
530
+ dp_matrix[i, j] = solution_candidates
531
+
532
+ solution_starts = np.zeros_like(solution_candidates, dtype="int32")
533
+ solution_stops = np.zeros_like(solution_candidates, dtype="int32")
534
+
535
+ for s, solution in enumerate(solution_candidates):
536
+ solution_starts[s] = min([reg.start for reg in solution], default=-1)
537
+ solution_stops[s] = max([reg.stop for reg in solution], default=-1)
538
+
539
+ dp_matrix_solutions_starts[i, j] = solution_starts
540
+ dp_matrix_solutions_stops[i, j] = solution_stops
541
+
542
+ # The top right corner contains the optimal solutions
543
+ return dp_matrix[0, -1]
544
+
545
+
546
+ def _get_results(regions, results, max_pseudoknot_order, order=0):
547
+ """
548
+ Use the dynamic programming algorithm to get the pseudoknot order
549
+ of a given set of regions. If there are remaining conflicts their
550
+ results are recursively calculated and merged with the current
551
+ results.
552
+
553
+ Parameters
554
+ ----------
555
+ regions : set {_region, ...}
556
+ The regions for whích optimal solutions are to be found.
557
+ results : list [ndarray, ...]
558
+ The results
559
+ max_pseudoknot_order : int
560
+ The maximum pseudoknot order to be found. If a base pair would
561
+ be of a higher order, its order is specified as -1. If ``None``
562
+ is given, all base pairs are evaluated.
563
+ order : int (default: 0)
564
+ The order that is currently evaluated.
565
+
566
+ Returns
567
+ -------
568
+ results : list [ndarray, ...]
569
+ The results
570
+ """
571
+
572
+ # Remove non-conflicting regions
573
+ non_conflicting = [isolate for isolate in nx.isolates(regions)]
574
+ regions.remove_nodes_from(non_conflicting)
575
+
576
+ # Non-conflicting regions are of the current order:
577
+ index_list_non_conflicting = list(
578
+ chain(*[region.get_index_array() for region in non_conflicting])
579
+ )
580
+ for result in results:
581
+ result[index_list_non_conflicting] = order
582
+
583
+ # If no conflicts remain, the results are complete
584
+ if len(regions) == 0:
585
+ return results
586
+
587
+ # Get the optimal solutions for given regions. Evaluate each clique
588
+ # of mutually conflicting regions seperately
589
+ cliques = [component for component in nx.connected_components(regions)]
590
+ solutions = [
591
+ set(chain(*e))
592
+ for e in product(*[_remove_pseudoknots(clique) for clique in cliques])
593
+ ]
594
+
595
+ # Get a copy of the current results for each optimal solution
596
+ results_list = [
597
+ [result.copy() for result in results] for _ in range(len(solutions))
598
+ ]
599
+
600
+ # Evaluate each optimal solution
601
+ for i, solution in enumerate(solutions):
602
+ # Get the pseudoknotted regions
603
+ pseudoknotted_regions = regions.copy()
604
+ pseudoknotted_regions.remove_nodes_from(solution)
605
+
606
+ # Get an index list of the unknotted base pairs
607
+ index_list_unknotted = list(
608
+ chain(*[region.get_index_array() for region in solution])
609
+ )
610
+
611
+ # Write results for current solution
612
+ for j, result in enumerate(results_list[i]):
613
+ result[index_list_unknotted] = order
614
+
615
+ # If this order is the specified maximum order, stop evaluation
616
+ if max_pseudoknot_order == order:
617
+ continue
618
+
619
+ # Evaluate the pseudoknotted region
620
+ results_list[i] = _get_results(
621
+ pseudoknotted_regions,
622
+ results_list[i],
623
+ max_pseudoknot_order,
624
+ order=order + 1,
625
+ )
626
+
627
+ # Flatten the results
628
+ return list(chain(*results_list))