biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,622 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
This module provides functionality for pseudoknot detection.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__name__ = "biotite.structure"
|
|
10
|
+
__author__ = "Tom David Müller"
|
|
11
|
+
__all__ = ["pseudoknots"]
|
|
12
|
+
|
|
13
|
+
from itertools import chain, product
|
|
14
|
+
import networkx as nx
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
|
|
19
|
+
"""
|
|
20
|
+
Identify the pseudoknot order for each base pair in a given set of
|
|
21
|
+
base pairs.
|
|
22
|
+
|
|
23
|
+
By default the algorithm removes base pairs until the remaining
|
|
24
|
+
base pairs are completely nested i.e. no pseudoknots appear.
|
|
25
|
+
The pseudoknot order of the removed base pairs is incremented and
|
|
26
|
+
the procedure is repeated with these base pairs.
|
|
27
|
+
Base pairs are removed in a way that maximizes the number of
|
|
28
|
+
remaining base pairs.
|
|
29
|
+
However, an optional score for each individual base pair can be
|
|
30
|
+
provided.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
base_pairs : ndarray, dtype=int, shape=(n,2)
|
|
35
|
+
The base pairs to determine the pseudoknot order of. Each row
|
|
36
|
+
represents indices form two paired bases. The structure of
|
|
37
|
+
the :class:`ndarray` is equal to the structure of the output of
|
|
38
|
+
:func:`base_pairs()`, where the indices represent the
|
|
39
|
+
beginning of the residues.
|
|
40
|
+
scores : ndarray, dtype=int, shape=(n,), optional
|
|
41
|
+
The score for each base pair.
|
|
42
|
+
By default, the score of each base pair is ``1``.
|
|
43
|
+
max_pseudoknot_order : int, optional
|
|
44
|
+
The maximum pseudoknot order to be found. If a base pair would
|
|
45
|
+
be of a higher order, its order is specified as ``-1``.
|
|
46
|
+
By default, the algorithm is run until all base pairs
|
|
47
|
+
have an assigned pseudoknot order.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
pseudoknot_order : ndarray, dtype=int, shape=(m,n)
|
|
52
|
+
The pseudoknot order of the input `base_pairs`.
|
|
53
|
+
Multiple solutions that maximize the number of basepairs or
|
|
54
|
+
the given score, respectively, may be possible.
|
|
55
|
+
Therefore all *m* individual solutions are returned.
|
|
56
|
+
|
|
57
|
+
Notes
|
|
58
|
+
-----
|
|
59
|
+
The dynamic programming approach by Smit *et al*
|
|
60
|
+
:footcite:`Smit2008` is applied to detect pseudoknots.
|
|
61
|
+
The algorithm was originally developed to remove pseudoknots from a
|
|
62
|
+
structure.
|
|
63
|
+
However, if it is run iteratively on removed knotted pairs it can be
|
|
64
|
+
used to identify the pseudoknot order.
|
|
65
|
+
|
|
66
|
+
The pseudoknot order is defined as the minimum number of base pair
|
|
67
|
+
set decompositions resulting in a nested structure
|
|
68
|
+
:footcite:`Antczak2018`.
|
|
69
|
+
Therefore, there are no pseudoknots between base pairs with the same
|
|
70
|
+
pseudoknot order.
|
|
71
|
+
|
|
72
|
+
References
|
|
73
|
+
----------
|
|
74
|
+
|
|
75
|
+
.. footbibliography::
|
|
76
|
+
|
|
77
|
+
Examples
|
|
78
|
+
--------
|
|
79
|
+
Remove the pseudoknotted base pair for the sequence *ABCbac*, where
|
|
80
|
+
the corresponding big and small letters each represent a base pair:
|
|
81
|
+
|
|
82
|
+
Define the base pairs as :class:`ndarray`:
|
|
83
|
+
|
|
84
|
+
>>> basepairs = np.array([[0, 4],
|
|
85
|
+
... [1, 3],
|
|
86
|
+
... [2, 5]])
|
|
87
|
+
|
|
88
|
+
Find the unknotted base pairs, optimizing for the maximum number of
|
|
89
|
+
base pairs:
|
|
90
|
+
|
|
91
|
+
>>> print(pseudoknots(basepairs, max_pseudoknot_order=0))
|
|
92
|
+
[[ 0 0 -1]]
|
|
93
|
+
|
|
94
|
+
This indicates that the base pair *Cc* is a pseudoknot.
|
|
95
|
+
|
|
96
|
+
Given the length of the sequence (6 bases), we can also represent
|
|
97
|
+
the unknotted structure in dot bracket notation:
|
|
98
|
+
|
|
99
|
+
>>> print(dot_bracket(basepairs, 6, max_pseudoknot_order=0)[0])
|
|
100
|
+
((.)).
|
|
101
|
+
|
|
102
|
+
If the maximum pseudoknot order is not restricted, the order of the
|
|
103
|
+
knotted pairs is determined and can be represented using dot bracket
|
|
104
|
+
letter notation:
|
|
105
|
+
|
|
106
|
+
>>> print(pseudoknots(basepairs))
|
|
107
|
+
[[0 0 1]]
|
|
108
|
+
>>> print(dot_bracket(basepairs, 6)[0])
|
|
109
|
+
(([))]
|
|
110
|
+
"""
|
|
111
|
+
if len(base_pairs) == 0:
|
|
112
|
+
# No base pairs -> empty pseudoknot order array
|
|
113
|
+
return np.array([[]], dtype=np.int32)
|
|
114
|
+
|
|
115
|
+
# List containing the results
|
|
116
|
+
results = [np.full(len(base_pairs), -1, dtype="int32")]
|
|
117
|
+
|
|
118
|
+
# if no score array is given, each base pairs' score is one
|
|
119
|
+
if scores is None:
|
|
120
|
+
scores = np.ones(len(base_pairs))
|
|
121
|
+
|
|
122
|
+
# Make sure `base_pairs` has the same length as the score array
|
|
123
|
+
if len(base_pairs) != len(scores):
|
|
124
|
+
raise ValueError("'base_pair' and 'scores' must have the same shape")
|
|
125
|
+
|
|
126
|
+
# Split the base pairs in regions
|
|
127
|
+
regions = _find_regions(base_pairs, scores)
|
|
128
|
+
|
|
129
|
+
# Compute results
|
|
130
|
+
results = _get_results(regions, results, max_pseudoknot_order)
|
|
131
|
+
|
|
132
|
+
return np.vstack(results)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class _Region:
|
|
136
|
+
"""
|
|
137
|
+
This class represents a paired region.
|
|
138
|
+
|
|
139
|
+
A region is a set of base pairs. This class provides methods to
|
|
140
|
+
access the minimum and maximum index of the bases that are part of
|
|
141
|
+
the region, handles score calculation, and backtracing to the
|
|
142
|
+
original base pair array.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
base_pairs : ndarray, shape=(n,2), dtype=int
|
|
147
|
+
All base pairs of the structure the region is a subset for.
|
|
148
|
+
region_pairs : ndarray, dtype=int
|
|
149
|
+
The indices of the base pairs in ``base_pairs`` that are part of
|
|
150
|
+
the region.
|
|
151
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
152
|
+
The score for each base pair.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(self, base_pairs, region_pairs, scores):
|
|
156
|
+
# The Start and Stop indices for each Region
|
|
157
|
+
self.start = np.min(base_pairs[region_pairs])
|
|
158
|
+
self.stop = np.max(base_pairs[region_pairs])
|
|
159
|
+
|
|
160
|
+
self.region_pairs = region_pairs
|
|
161
|
+
self.score = np.sum(scores[region_pairs])
|
|
162
|
+
|
|
163
|
+
def get_index_array(self):
|
|
164
|
+
"""
|
|
165
|
+
Return an index array with the positions of the region`s bases
|
|
166
|
+
in the original base pair array.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
region_pairs : ndarray
|
|
171
|
+
The indices of the bases in the original base pair array.
|
|
172
|
+
"""
|
|
173
|
+
return self.region_pairs
|
|
174
|
+
|
|
175
|
+
def __lt__(self, other):
|
|
176
|
+
"""
|
|
177
|
+
This comparison operator is required for :func:`np.unique()`. As
|
|
178
|
+
only the difference between the regions is relevant and not any
|
|
179
|
+
particular order, a distinction is made by the objects unique
|
|
180
|
+
ids.
|
|
181
|
+
|
|
182
|
+
Parameters
|
|
183
|
+
----------
|
|
184
|
+
other : _region
|
|
185
|
+
The other region.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
comparision : bool
|
|
190
|
+
The evaluated comparison.
|
|
191
|
+
"""
|
|
192
|
+
return id(self) < id(other)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _find_regions(base_pairs, scores):
|
|
196
|
+
"""
|
|
197
|
+
Find regions in a base pair array. A region is defined as a set of
|
|
198
|
+
consecutively nested base pairs.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
base_pairs : ndarray, dtype=int, shape=(n, 2)
|
|
203
|
+
Each row is equivalent to one base pair and contains the first
|
|
204
|
+
indices of the residues corresponding to each base.
|
|
205
|
+
scores : ndarray, dtype=int, shape=(n,)
|
|
206
|
+
The score for each base pair.
|
|
207
|
+
|
|
208
|
+
Returns
|
|
209
|
+
-------
|
|
210
|
+
regions : Graph
|
|
211
|
+
The ``_Region`` objects as graph, where the edges represent
|
|
212
|
+
conflicts.
|
|
213
|
+
"""
|
|
214
|
+
# Make sure the lower residue is on the left for each row
|
|
215
|
+
sorted_base_pairs = np.sort(base_pairs, axis=1)
|
|
216
|
+
|
|
217
|
+
# Sort the first column in ascending order
|
|
218
|
+
original_indices = np.argsort(sorted_base_pairs[:, 0])
|
|
219
|
+
sorted_base_pairs = sorted_base_pairs[original_indices]
|
|
220
|
+
|
|
221
|
+
# Rank each base
|
|
222
|
+
# E.g.: [[3, 5] --> [[0, 1]
|
|
223
|
+
# [9, 7]] [3, 2]]
|
|
224
|
+
order = np.argsort(sorted_base_pairs.flatten())
|
|
225
|
+
rank = np.argsort(order).reshape(base_pairs.shape)
|
|
226
|
+
|
|
227
|
+
# The base pairs belonging to the current region
|
|
228
|
+
region_pairs = []
|
|
229
|
+
# The individual regions
|
|
230
|
+
regions = set()
|
|
231
|
+
|
|
232
|
+
# Find separate regions
|
|
233
|
+
for i in range(len(sorted_base_pairs)):
|
|
234
|
+
# if a new region is to be started append the current base pair
|
|
235
|
+
if len(region_pairs) == 0:
|
|
236
|
+
region_pairs.append(original_indices[i])
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
# Check if the current base pair belongs to the region that is
|
|
240
|
+
# currently being defined
|
|
241
|
+
previous_upstream_rank = rank[i - 1, 0]
|
|
242
|
+
this_upstream_rank = rank[i, 0]
|
|
243
|
+
previous_downstream_rank = rank[i - 1, 1]
|
|
244
|
+
this_downstream_rank = rank[i, 1]
|
|
245
|
+
|
|
246
|
+
# if the current base pair belongs to a new region, save the
|
|
247
|
+
# current region and start a new region
|
|
248
|
+
if (previous_downstream_rank - this_downstream_rank) != 1 or (
|
|
249
|
+
this_upstream_rank - previous_upstream_rank
|
|
250
|
+
) != 1:
|
|
251
|
+
regions.add(_Region(base_pairs, np.array(region_pairs), scores))
|
|
252
|
+
region_pairs = []
|
|
253
|
+
|
|
254
|
+
# Append the current base pair to the region
|
|
255
|
+
region_pairs.append(original_indices[i])
|
|
256
|
+
|
|
257
|
+
# The last region has no endpoint defined by the beginning of a
|
|
258
|
+
# new region.
|
|
259
|
+
regions.add(_Region(base_pairs, np.array(region_pairs), scores))
|
|
260
|
+
|
|
261
|
+
# Return the graphical representation of the conflicting regions
|
|
262
|
+
return _generate_graphical_representation(regions)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _generate_graphical_representation(regions):
|
|
266
|
+
"""
|
|
267
|
+
Find the conflicting regions and represent them graphically using
|
|
268
|
+
the ``Graph`` class from ``Networkx``.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
regions : set {_region, ...}
|
|
273
|
+
The regions representing the consecutively nested base pairs.
|
|
274
|
+
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
277
|
+
regions : Graph
|
|
278
|
+
The ``_Region`` objects as graph, where the edges represent
|
|
279
|
+
conflicts.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
# Create a graph
|
|
283
|
+
region_graph = nx.Graph()
|
|
284
|
+
|
|
285
|
+
# Add the regions to the graph as nodes
|
|
286
|
+
region_graph.add_nodes_from(regions)
|
|
287
|
+
|
|
288
|
+
# Get the region array and a boolean array, where the start of each
|
|
289
|
+
# region is ``True``.
|
|
290
|
+
region_array, (start_stops,) = _get_region_array_for(
|
|
291
|
+
regions, content=[lambda a: [True, False]], dtype=["bool"]
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Check each region for conflicts with other regions
|
|
295
|
+
for start, region in enumerate(region_array):
|
|
296
|
+
# Check each region only once
|
|
297
|
+
if not start_stops[start]:
|
|
298
|
+
continue
|
|
299
|
+
|
|
300
|
+
# Find the index of the stopping of the region in the region
|
|
301
|
+
# array
|
|
302
|
+
stop = _get_first_occurrence_for(region_array[start + 1 :], region)
|
|
303
|
+
stop += start + 1
|
|
304
|
+
|
|
305
|
+
# Store regions the current region conflicts with
|
|
306
|
+
conflicts = set()
|
|
307
|
+
|
|
308
|
+
# Iterate over the regions between the starting and stopping
|
|
309
|
+
# point of the current region
|
|
310
|
+
for other_region in region_array[start + 1 : stop]:
|
|
311
|
+
# If the other region is not already a conflict, add it to
|
|
312
|
+
# the conflict set
|
|
313
|
+
if other_region not in conflicts:
|
|
314
|
+
conflicts.add(other_region)
|
|
315
|
+
# If the other region is twice between the starting and
|
|
316
|
+
# stopping point of the current region, its starting and
|
|
317
|
+
# stopping point lie between the current region and it is
|
|
318
|
+
# thus non-conflicting
|
|
319
|
+
else:
|
|
320
|
+
conflicts.remove(other_region)
|
|
321
|
+
|
|
322
|
+
# Conflicts between regions are represented as graph edges
|
|
323
|
+
edges = []
|
|
324
|
+
|
|
325
|
+
# Convert the edges in a ``NetworkX`` compatible format
|
|
326
|
+
for conflict in conflicts:
|
|
327
|
+
edges.append((region, conflict))
|
|
328
|
+
|
|
329
|
+
# Add the edges to the graph
|
|
330
|
+
region_graph.add_edges_from(edges)
|
|
331
|
+
return region_graph
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _get_first_occurrence_for(iterable, wanted_object):
|
|
335
|
+
"""
|
|
336
|
+
Get the first occurrence of an object in an iterable.
|
|
337
|
+
|
|
338
|
+
Parameters
|
|
339
|
+
----------
|
|
340
|
+
iterable : iterable
|
|
341
|
+
The iterable containing the object.
|
|
342
|
+
wanted_object : object
|
|
343
|
+
The object to be found.
|
|
344
|
+
|
|
345
|
+
Returns
|
|
346
|
+
-------
|
|
347
|
+
index : int
|
|
348
|
+
The index of the first occurrence of the object.
|
|
349
|
+
"""
|
|
350
|
+
for i, value in enumerate(iterable):
|
|
351
|
+
if value is wanted_object:
|
|
352
|
+
return i
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _get_region_array_for(regions, content=(), dtype=()):
|
|
356
|
+
"""
|
|
357
|
+
Get a :class:`ndarray` of region objects. Each object occurs twice,
|
|
358
|
+
representing its start and end point. The regions positions in the
|
|
359
|
+
array reflect their relative positions.
|
|
360
|
+
|
|
361
|
+
Furthermore, a list of functions can be provided enabling custom
|
|
362
|
+
outputs for each objects` start and end point.
|
|
363
|
+
|
|
364
|
+
Parameters
|
|
365
|
+
----------
|
|
366
|
+
regions : set {_region, ...}
|
|
367
|
+
The regions to be considered
|
|
368
|
+
content : list [function, ...]
|
|
369
|
+
The functions to be considered for custom outputs. For a given
|
|
370
|
+
region they must return a tuple of which the first value is
|
|
371
|
+
placed at the start position and the second value at the end
|
|
372
|
+
position of the region relative to the other regions.
|
|
373
|
+
dtype : list [str, ...]
|
|
374
|
+
The data type of the output of the custom functions.
|
|
375
|
+
|
|
376
|
+
Returns
|
|
377
|
+
-------
|
|
378
|
+
region_array : ndarray, dtype=object
|
|
379
|
+
The array of ordered region objects.
|
|
380
|
+
custom_content : list [ndarray, ...]
|
|
381
|
+
The custom output.
|
|
382
|
+
"""
|
|
383
|
+
# region_array and index array
|
|
384
|
+
region_array = np.empty(len(regions) * 2, dtype=_Region)
|
|
385
|
+
index_array = np.empty(len(regions) * 2, dtype="int32")
|
|
386
|
+
|
|
387
|
+
# Content array for custom return arrays
|
|
388
|
+
content_list = [None] * len(content)
|
|
389
|
+
for i in range(len(content)):
|
|
390
|
+
content_list[i] = np.empty(len(regions) * 2, dtype=dtype[i])
|
|
391
|
+
|
|
392
|
+
# Fill the arrays
|
|
393
|
+
for i, reg in enumerate(regions):
|
|
394
|
+
indices = [2 * i, 2 * i + 1]
|
|
395
|
+
region_array[indices] = reg
|
|
396
|
+
for c in range(len(content_list)):
|
|
397
|
+
content_list[c][indices] = content[c](reg)
|
|
398
|
+
index_array[indices] = [reg.start, reg.stop]
|
|
399
|
+
|
|
400
|
+
# Order the arrays by the base indices
|
|
401
|
+
sort_mask = np.argsort(index_array)
|
|
402
|
+
region_array = region_array[sort_mask]
|
|
403
|
+
|
|
404
|
+
# if no custom array content is given only return the ordered array
|
|
405
|
+
# containing the regions
|
|
406
|
+
if content == []:
|
|
407
|
+
return region_array
|
|
408
|
+
|
|
409
|
+
# if custom content is given also return the ordered content
|
|
410
|
+
for i in range(len(content_list)):
|
|
411
|
+
content_list[i] = content_list[i][sort_mask]
|
|
412
|
+
return region_array, content_list
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _remove_pseudoknots(regions):
|
|
416
|
+
"""
|
|
417
|
+
Get the optimal solutions according to the algorithm referenced in
|
|
418
|
+
:func:`pseudoknots()`.
|
|
419
|
+
|
|
420
|
+
The algorithm uses a dynamic programming matrix in order to find
|
|
421
|
+
the optimal solutions with the highest combined region scores.
|
|
422
|
+
|
|
423
|
+
Parameters
|
|
424
|
+
----------
|
|
425
|
+
regions : set {_region, ...}
|
|
426
|
+
The conflicting regions for whích optimal solutions are to be
|
|
427
|
+
found.
|
|
428
|
+
scores : ndarray
|
|
429
|
+
The score array.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
solutions : ndarray, dtype=object
|
|
434
|
+
The optimal solutions. Each solution in the ``ndarray`` is
|
|
435
|
+
represented as ``set`` of unknotted regions.
|
|
436
|
+
"""
|
|
437
|
+
# Create dynamic programming matrix
|
|
438
|
+
dp_matrix_shape = len(regions) * 2, len(regions) * 2
|
|
439
|
+
dp_matrix = np.empty(dp_matrix_shape, dtype="object")
|
|
440
|
+
dp_matrix_solutions_starts = np.zeros_like(dp_matrix)
|
|
441
|
+
dp_matrix_solutions_stops = np.zeros_like(dp_matrix)
|
|
442
|
+
|
|
443
|
+
# Each index corresponds to the position in the dp matrix.
|
|
444
|
+
# ``region_array`` contains the region objects and ``start_stops``
|
|
445
|
+
# contains the lowest and highest positions of the regions
|
|
446
|
+
region_array, (start_stops,) = _get_region_array_for(
|
|
447
|
+
regions, [lambda a: (a.start, a.stop)], ["int32"]
|
|
448
|
+
)
|
|
449
|
+
# Initialise the matrix diagonal with ndarrays of empty frozensets
|
|
450
|
+
for i in range(len(dp_matrix)):
|
|
451
|
+
dp_matrix[i, i] = np.array([frozenset()])
|
|
452
|
+
|
|
453
|
+
# Iterate through the top right half of the dynamic programming
|
|
454
|
+
# matrix
|
|
455
|
+
for j in range(len(regions) * 2):
|
|
456
|
+
for i in range(j - 1, -1, -1):
|
|
457
|
+
solution_candidates = set()
|
|
458
|
+
left = dp_matrix[i, j - 1]
|
|
459
|
+
bottom = dp_matrix[i + 1, j]
|
|
460
|
+
|
|
461
|
+
# Add all solutions of the cell to the left
|
|
462
|
+
for solution in left:
|
|
463
|
+
solution_candidates.add(solution)
|
|
464
|
+
|
|
465
|
+
# Add all solutions of the cell to the bottom
|
|
466
|
+
for solution in bottom:
|
|
467
|
+
solution_candidates.add(solution)
|
|
468
|
+
|
|
469
|
+
# Check if i and j are start/end-points of the same region
|
|
470
|
+
if region_array[i] is region_array[j]:
|
|
471
|
+
# Add all solutions from the cell to the bottom left
|
|
472
|
+
# plus this region
|
|
473
|
+
bottom_left = dp_matrix[i + 1, j - 1]
|
|
474
|
+
for solution in bottom_left:
|
|
475
|
+
solution_candidates.add(solution | set([region_array[i]]))
|
|
476
|
+
|
|
477
|
+
# Perform additional tests if solution in the left cell and
|
|
478
|
+
# bottom cell both differ from an empty solution
|
|
479
|
+
if np.any(left != [frozenset()]) and np.any(bottom != [frozenset()]):
|
|
480
|
+
left_highest = dp_matrix_solutions_stops[i, j - 1]
|
|
481
|
+
bottom_lowest = dp_matrix_solutions_starts[i + 1, j]
|
|
482
|
+
|
|
483
|
+
# For each pair of solutions check if solutions are
|
|
484
|
+
# disjoint
|
|
485
|
+
for solution1, highest in zip(left, left_highest):
|
|
486
|
+
for solution2, lowest in zip(bottom, bottom_lowest):
|
|
487
|
+
if highest < lowest:
|
|
488
|
+
# Both solutions are disjoint
|
|
489
|
+
solution_candidates.add(solution1 | solution2)
|
|
490
|
+
else:
|
|
491
|
+
# Both solutions are not disjoint
|
|
492
|
+
# Add subsolutions
|
|
493
|
+
for k in range(
|
|
494
|
+
np.where(start_stops == lowest)[0][0] - 1,
|
|
495
|
+
np.where(start_stops == highest)[0][0] + 1,
|
|
496
|
+
):
|
|
497
|
+
cell1 = dp_matrix[i, k]
|
|
498
|
+
cell2 = dp_matrix[k + 1, j]
|
|
499
|
+
for subsolution1 in cell1:
|
|
500
|
+
for subsolution2 in cell2:
|
|
501
|
+
solution_candidates.add(
|
|
502
|
+
subsolution1 | subsolution2
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Make solution candidates ``ndarray`` array of sets
|
|
506
|
+
solution_candidates = np.array(list(solution_candidates))
|
|
507
|
+
|
|
508
|
+
# Calculate the scores for each solution
|
|
509
|
+
solution_scores = np.zeros(len(solution_candidates))
|
|
510
|
+
for s, solution in enumerate(solution_candidates):
|
|
511
|
+
score = 0
|
|
512
|
+
for reg in solution:
|
|
513
|
+
score += reg.score
|
|
514
|
+
solution_scores[s] = score
|
|
515
|
+
# Get the indices where the score is at a maximum
|
|
516
|
+
highest_scores = np.argwhere(
|
|
517
|
+
solution_scores == np.amax(solution_scores)
|
|
518
|
+
).flatten()
|
|
519
|
+
|
|
520
|
+
# Get the solutions with the highest score
|
|
521
|
+
solution_candidates = solution_candidates[highest_scores]
|
|
522
|
+
|
|
523
|
+
# Add the solutions to the dynamic programming matrix
|
|
524
|
+
dp_matrix[i, j] = solution_candidates
|
|
525
|
+
|
|
526
|
+
solution_starts = np.zeros_like(solution_candidates, dtype="int32")
|
|
527
|
+
solution_stops = np.zeros_like(solution_candidates, dtype="int32")
|
|
528
|
+
|
|
529
|
+
for s, solution in enumerate(solution_candidates):
|
|
530
|
+
solution_starts[s] = min([reg.start for reg in solution], default=-1)
|
|
531
|
+
solution_stops[s] = max([reg.stop for reg in solution], default=-1)
|
|
532
|
+
|
|
533
|
+
dp_matrix_solutions_starts[i, j] = solution_starts
|
|
534
|
+
dp_matrix_solutions_stops[i, j] = solution_stops
|
|
535
|
+
|
|
536
|
+
# The top right corner contains the optimal solutions
|
|
537
|
+
return dp_matrix[0, -1]
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def _get_results(regions, results, max_pseudoknot_order, order=0):
|
|
541
|
+
"""
|
|
542
|
+
Use the dynamic programming algorithm to get the pseudoknot order
|
|
543
|
+
of a given set of regions. If there are remaining conflicts their
|
|
544
|
+
results are recursively calculated and merged with the current
|
|
545
|
+
results.
|
|
546
|
+
|
|
547
|
+
Parameters
|
|
548
|
+
----------
|
|
549
|
+
regions : set {_region, ...}
|
|
550
|
+
The regions for whích optimal solutions are to be found.
|
|
551
|
+
results : list [ndarray, ...]
|
|
552
|
+
The results
|
|
553
|
+
max_pseudoknot_order : int
|
|
554
|
+
The maximum pseudoknot order to be found. If a base pair would
|
|
555
|
+
be of a higher order, its order is specified as -1. If ``None``
|
|
556
|
+
is given, all base pairs are evaluated.
|
|
557
|
+
order : int
|
|
558
|
+
The order that is currently evaluated.
|
|
559
|
+
|
|
560
|
+
Returns
|
|
561
|
+
-------
|
|
562
|
+
results : list [ndarray, ...]
|
|
563
|
+
The results
|
|
564
|
+
"""
|
|
565
|
+
|
|
566
|
+
# Remove non-conflicting regions
|
|
567
|
+
non_conflicting = [isolate for isolate in nx.isolates(regions)]
|
|
568
|
+
regions.remove_nodes_from(non_conflicting)
|
|
569
|
+
|
|
570
|
+
# Non-conflicting regions are of the current order:
|
|
571
|
+
index_list_non_conflicting = list(
|
|
572
|
+
chain(*[region.get_index_array() for region in non_conflicting])
|
|
573
|
+
)
|
|
574
|
+
for result in results:
|
|
575
|
+
result[index_list_non_conflicting] = order
|
|
576
|
+
|
|
577
|
+
# If no conflicts remain, the results are complete
|
|
578
|
+
if len(regions) == 0:
|
|
579
|
+
return results
|
|
580
|
+
|
|
581
|
+
# Get the optimal solutions for given regions. Evaluate each clique
|
|
582
|
+
# of mutually conflicting regions seperately
|
|
583
|
+
cliques = [component for component in nx.connected_components(regions)]
|
|
584
|
+
solutions = [
|
|
585
|
+
set(chain(*e))
|
|
586
|
+
for e in product(*[_remove_pseudoknots(clique) for clique in cliques])
|
|
587
|
+
]
|
|
588
|
+
|
|
589
|
+
# Get a copy of the current results for each optimal solution
|
|
590
|
+
results_list = [
|
|
591
|
+
[result.copy() for result in results] for _ in range(len(solutions))
|
|
592
|
+
]
|
|
593
|
+
|
|
594
|
+
# Evaluate each optimal solution
|
|
595
|
+
for i, solution in enumerate(solutions):
|
|
596
|
+
# Get the pseudoknotted regions
|
|
597
|
+
pseudoknotted_regions = regions.copy()
|
|
598
|
+
pseudoknotted_regions.remove_nodes_from(solution)
|
|
599
|
+
|
|
600
|
+
# Get an index list of the unknotted base pairs
|
|
601
|
+
index_list_unknotted = list(
|
|
602
|
+
chain(*[region.get_index_array() for region in solution])
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
# Write results for current solution
|
|
606
|
+
for j, result in enumerate(results_list[i]):
|
|
607
|
+
result[index_list_unknotted] = order
|
|
608
|
+
|
|
609
|
+
# If this order is the specified maximum order, stop evaluation
|
|
610
|
+
if max_pseudoknot_order == order:
|
|
611
|
+
continue
|
|
612
|
+
|
|
613
|
+
# Evaluate the pseudoknotted region
|
|
614
|
+
results_list[i] = _get_results(
|
|
615
|
+
pseudoknotted_regions,
|
|
616
|
+
results_list[i],
|
|
617
|
+
max_pseudoknot_order,
|
|
618
|
+
order=order + 1,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
# Flatten the results
|
|
622
|
+
return list(chain(*results_list))
|