biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.align"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["Permutation", "RandomPermutation", "FrequencyPermutation"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
import abc
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
ctypedef np.int64_t int64
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Permutation(metaclass=abc.ABCMeta):
|
|
20
|
+
"""
|
|
21
|
+
Provides an order for *k-mers*, usually used by *k-mer* subset
|
|
22
|
+
selectors such as :class:`MinimizerSelector`.
|
|
23
|
+
The method how such order is computed depends on the concrete
|
|
24
|
+
subclass of this abstract base class.
|
|
25
|
+
|
|
26
|
+
Without a :class:`Permutation` subset selectors usually resort to
|
|
27
|
+
the symbol order in the :class:`KmerAlphabet`.
|
|
28
|
+
That order is often the lexicographical order, which is known to
|
|
29
|
+
yield suboptimal *k-mer* selection many cases
|
|
30
|
+
:footcite:`Roberts2004`.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
min, max: int
|
|
35
|
+
The minimum and maximum value, the permutated value
|
|
36
|
+
(i.e. the return value of :meth:`permute()`)
|
|
37
|
+
can take.
|
|
38
|
+
Must be overriden by subclasses.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def min(self):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
@abc.abstractmethod
|
|
49
|
+
def max(self):
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@abc.abstractmethod
|
|
54
|
+
def permute(self, kmers):
|
|
55
|
+
"""
|
|
56
|
+
permute(kmers)
|
|
57
|
+
|
|
58
|
+
Give the given *k-mers* a new order.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
kmers : ndarray, dtype=np.int64
|
|
63
|
+
The *k-mers* to reorder given as *k-mer* code.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
order : ndarray, dtype=np.int64
|
|
68
|
+
The sort key for the new order, i.e. a *k-mer* ``A`` is
|
|
69
|
+
smaller than *k-mer* ``B``, if ``order[A] < order[B]``
|
|
70
|
+
The order value may not only contain positive but also
|
|
71
|
+
negative integers.
|
|
72
|
+
The order is unambiguous:
|
|
73
|
+
If ``A != B``, then ``order[A] != order[B]``.
|
|
74
|
+
"""
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class RandomPermutation(Permutation):
|
|
79
|
+
r"""
|
|
80
|
+
Provide a pseudo-randomized order for *k-mers*.
|
|
81
|
+
|
|
82
|
+
Notes
|
|
83
|
+
-----
|
|
84
|
+
|
|
85
|
+
This class uses a simple full-period *linear congruential generator*
|
|
86
|
+
(LCG) to provide pseudo-randomized values:
|
|
87
|
+
|
|
88
|
+
.. math:: \text{order} = (a \, c_\text{k-mer} + 1) \mod 2^{64}.
|
|
89
|
+
|
|
90
|
+
The factor :math:`a` is taken from :footcite:`Steele2021` to ensure
|
|
91
|
+
full periodicity and good random behavior.
|
|
92
|
+
However, note that LCGs in general do not provide perfect random
|
|
93
|
+
behavior, but only *good-enough* values for this purpose.
|
|
94
|
+
|
|
95
|
+
Attributes
|
|
96
|
+
----------
|
|
97
|
+
min, max: int
|
|
98
|
+
The minimum and maximum value, the permutated value
|
|
99
|
+
(i.e. the return value of :meth:`permute()`)
|
|
100
|
+
can take.
|
|
101
|
+
|
|
102
|
+
References
|
|
103
|
+
----------
|
|
104
|
+
|
|
105
|
+
.. footbibliography::
|
|
106
|
+
|
|
107
|
+
Examples
|
|
108
|
+
--------
|
|
109
|
+
|
|
110
|
+
>>> kmer_alph = KmerAlphabet(NucleotideSequence.alphabet_unamb, k=2)
|
|
111
|
+
>>> permutation = RandomPermutation()
|
|
112
|
+
>>> # k-mer codes representing the k-mers from 'AA' to 'TT'
|
|
113
|
+
>>> # in lexicographic order
|
|
114
|
+
>>> kmer_codes = np.arange(len(kmer_alph))
|
|
115
|
+
>>> print(kmer_codes)
|
|
116
|
+
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
117
|
+
>>> print(["".join(kmer_alph.decode(c)) for c in kmer_codes])
|
|
118
|
+
['AA', 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT']
|
|
119
|
+
>>> # Shuffle order of these k-mer codes using the permutation
|
|
120
|
+
>>> order = permutation.permute(kmer_codes)
|
|
121
|
+
>>> print(order)
|
|
122
|
+
[ 1 -3372029247567499370 -6744058495134998741
|
|
123
|
+
8330656331007053504 4958627083439554133 1586597835872054762
|
|
124
|
+
-1785431411695444609 -5157460659262943980 -8529489906830443351
|
|
125
|
+
6545224919311608894 3173195671744109523 -198833575823389848
|
|
126
|
+
-3570862823390889219 -6942892070958388590 8131822755183663655
|
|
127
|
+
4759793507616164284]
|
|
128
|
+
>>> # The order is not lexicographic anymore
|
|
129
|
+
>>> kmer_codes = kmer_codes[np.argsort(order)]
|
|
130
|
+
>>> print(["".join(kmer_alph.decode(c)) for c in kmer_codes])
|
|
131
|
+
['GA', 'TC', 'AG', 'CT', 'TA', 'AC', 'CG', 'GT', 'AA', 'CC', 'GG', 'TT', 'CA', 'GC', 'TG', 'AT']
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
LCG_A = 0xd1342543de82ef95
|
|
135
|
+
LCG_C = 1
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def min(self):
|
|
140
|
+
return np.iinfo(np.int64).min
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def max(self):
|
|
144
|
+
return np.iinfo(np.int64).max
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def permute(self, kmers):
|
|
148
|
+
kmers = kmers.astype(np.int64, copy=False)
|
|
149
|
+
# Cast to unsigned int to harness the m=2^64 LCG
|
|
150
|
+
kmers = kmers.view(np.uint64)
|
|
151
|
+
# Apply LCG
|
|
152
|
+
# Applying the modulo operator is not necessary
|
|
153
|
+
# is the corresponding bits are truncated automatically
|
|
154
|
+
permutation = RandomPermutation.LCG_A * kmers + RandomPermutation.LCG_C
|
|
155
|
+
# Convert back to required signed int64
|
|
156
|
+
# The resulting integer overflow changes the order, but this is
|
|
157
|
+
# no problem since the order is pseudo-random anyway
|
|
158
|
+
return permutation.view(np.int64)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class FrequencyPermutation(Permutation):
|
|
162
|
+
"""
|
|
163
|
+
__init__(kmer_alphabet, counts)
|
|
164
|
+
|
|
165
|
+
Provide an order for *k-mers* from a given
|
|
166
|
+
:class:`KmerAlphabet`, such that less frequent *k-mers* are smaller
|
|
167
|
+
than more frequent *k-mers*.
|
|
168
|
+
The frequency of each *k-mer* can either be given directly via the
|
|
169
|
+
constructor or can be computed from a :class:`KmerTable` via
|
|
170
|
+
:meth:`from_table()`.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
kmer_alphabet : KmerAlphabet, length=n
|
|
175
|
+
The *k-mer* alphabet that defines the range of possible *k-mers*
|
|
176
|
+
that should be permuted.
|
|
177
|
+
counts : ndarray, shape=(n,), dtype=np.int64
|
|
178
|
+
The absolute frequency, i.e. the number of occurrences, of each
|
|
179
|
+
*k-mer* in `kmer_alphabet` in the sequence database of interest.
|
|
180
|
+
``counts[c] = f``, where ``c`` is the *k-mer* code and ``f`` is
|
|
181
|
+
the corresponding frequency.
|
|
182
|
+
|
|
183
|
+
Attributes
|
|
184
|
+
----------
|
|
185
|
+
min, max: int
|
|
186
|
+
The minimum and maximum value, the permutated value
|
|
187
|
+
(i.e. the return value of :meth:`permute()`)
|
|
188
|
+
can take.
|
|
189
|
+
kmer_alphabet : KmerAlphabet
|
|
190
|
+
The *k-mer* alphabet that defines the range of possible *k-mers*
|
|
191
|
+
that should be permuted.
|
|
192
|
+
|
|
193
|
+
Notes
|
|
194
|
+
-----
|
|
195
|
+
|
|
196
|
+
In actual sequences some sequence patterns appear in high quantity.
|
|
197
|
+
When selecting a subset of *k-mers*, e.g. via
|
|
198
|
+
:class:`MinimizerSelector`, it is desireable to select the
|
|
199
|
+
low-frequency *informative* *k-mers* to avoid spurious matches.
|
|
200
|
+
To achieve such selection this class can be used.
|
|
201
|
+
|
|
202
|
+
This class uses a table to look up the order.
|
|
203
|
+
Hence, the memory consumption is :math:`8 n^k` bytes,
|
|
204
|
+
where :math:`n` is the size of the base alphabet and :math:`k` is
|
|
205
|
+
the *k-mer* size.
|
|
206
|
+
|
|
207
|
+
Examples
|
|
208
|
+
--------
|
|
209
|
+
|
|
210
|
+
>>> alphabet = LetterAlphabet("abcdr")
|
|
211
|
+
>>> sequence = GeneralSequence(alphabet, "abracadabra")
|
|
212
|
+
>>> kmer_table = KmerTable.from_sequences(k=2, sequences=[sequence])
|
|
213
|
+
>>> print(kmer_table)
|
|
214
|
+
ab: (0, 0), (0, 7)
|
|
215
|
+
ac: (0, 3)
|
|
216
|
+
ad: (0, 5)
|
|
217
|
+
br: (0, 1), (0, 8)
|
|
218
|
+
ca: (0, 4)
|
|
219
|
+
da: (0, 6)
|
|
220
|
+
ra: (0, 2), (0, 9)
|
|
221
|
+
>>> # Create all k-mers in lexicographic order
|
|
222
|
+
>>> kmer_alph = kmer_table.kmer_alphabet
|
|
223
|
+
>>> kmer_codes = np.arange(0, len(kmer_alph))
|
|
224
|
+
>>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]])
|
|
225
|
+
['...', 'da', 'db', 'dc', 'dd', 'dr', 'ra', 'rb', 'rc', 'rd', 'rr']
|
|
226
|
+
>>> # After applying the permutation the k-mers are ordered
|
|
227
|
+
>>> # by their frequency in the table
|
|
228
|
+
>>> # -> the most frequent k-mers have low rank
|
|
229
|
+
>>> permutation = FrequencyPermutation.from_table(kmer_table)
|
|
230
|
+
>>> order = permutation.permute(kmer_codes)
|
|
231
|
+
>>> print(order)
|
|
232
|
+
[ 0 22 18 19 1 2 3 4 5 23 20 6 7 8 9 21 10 11 12 13 24 14 15 16
|
|
233
|
+
17]
|
|
234
|
+
>>> kmer_codes = kmer_codes[np.argsort(order)]
|
|
235
|
+
>>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]])
|
|
236
|
+
['...', 'rc', 'rd', 'rr', 'ac', 'ad', 'ca', 'da', 'ab', 'br', 'ra']
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
def __init__(self, kmer_alphabet, counts):
|
|
240
|
+
if len(kmer_alphabet) != len(counts):
|
|
241
|
+
raise IndexError(
|
|
242
|
+
f"The k-mer alphabet has {len(kmer_alphabet)} k-mers, "
|
|
243
|
+
f"but {len(counts)} counts were given"
|
|
244
|
+
)
|
|
245
|
+
# 'order' maps a permutation to a k-mer
|
|
246
|
+
# Stability is important to get the same k-mer subset selection
|
|
247
|
+
# on different architectures
|
|
248
|
+
order = np.argsort(counts, kind="stable")
|
|
249
|
+
# '_permutation_table' should perform the reverse mapping
|
|
250
|
+
self._permutation_table = _invert_mapping(order)
|
|
251
|
+
self._kmer_alph = kmer_alphabet
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@property
|
|
255
|
+
def min(self):
|
|
256
|
+
return 0
|
|
257
|
+
|
|
258
|
+
@property
|
|
259
|
+
def max(self):
|
|
260
|
+
return len(self._permutation_table) - 1
|
|
261
|
+
|
|
262
|
+
@property
|
|
263
|
+
def kmer_alphabet(self):
|
|
264
|
+
return self._kmer_alph
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@staticmethod
|
|
268
|
+
def from_table(kmer_table):
|
|
269
|
+
"""
|
|
270
|
+
from_table(kmer_table)
|
|
271
|
+
|
|
272
|
+
Create a :class:`FrequencyPermutation` from the *k-mer* counts
|
|
273
|
+
of a :class:`KmerTable`.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
kmer_table : KmerTable
|
|
278
|
+
The *k-mer* counts are taken from this table.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
permutation : FrequencyPermutation
|
|
283
|
+
The permutation is based on the counts.
|
|
284
|
+
"""
|
|
285
|
+
return FrequencyPermutation(
|
|
286
|
+
kmer_table.kmer_alphabet, kmer_table.count()
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def permute(self, kmers):
|
|
291
|
+
return self._permutation_table[kmers]
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@cython.boundscheck(False)
|
|
295
|
+
@cython.wraparound(False)
|
|
296
|
+
def _invert_mapping(int64[:] mapping):
|
|
297
|
+
"""
|
|
298
|
+
If `mapping` maps an unqiue integer ``A`` to an unique integer
|
|
299
|
+
``B``, i.e. ``B = mapping[A]``, this function inverts the mapping
|
|
300
|
+
so that ``A = inverted[B]``.
|
|
301
|
+
|
|
302
|
+
Note that it is necessary that the mapping must be bijective and in
|
|
303
|
+
the range ``0..n``.
|
|
304
|
+
"""
|
|
305
|
+
cdef int64 i
|
|
306
|
+
cdef int64 value
|
|
307
|
+
|
|
308
|
+
cdef int64[:] inverted = np.empty(mapping.shape[0], dtype=np.int64)
|
|
309
|
+
for i in range(mapping.shape[0]):
|
|
310
|
+
value = mapping[i]
|
|
311
|
+
inverted[value] = i
|
|
312
|
+
|
|
313
|
+
return np.asarray(inverted)
|