biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.align"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["SimilarityRule", "ScoreThresholdRule"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
import abc
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
ctypedef np.int64_t int64
|
|
17
|
+
ctypedef np.int32_t int32
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SimilarityRule(metaclass=abc.ABCMeta):
|
|
21
|
+
"""
|
|
22
|
+
This is the abstract base class for all similarity rules.
|
|
23
|
+
A :class:`SimilarityRule` calculates all *similar* *k-mers* for
|
|
24
|
+
a given *k-mer*, while the definition of similarity depends
|
|
25
|
+
on the derived class.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def similar_kmers(self, kmer_alphabet, kmer):
|
|
30
|
+
"""
|
|
31
|
+
similar_kmers(kmer_alphabet, kmer)
|
|
32
|
+
|
|
33
|
+
Calculate all similar *k-mers* for a given *k-mer*.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
kmer_alphabet : KmerAlphabet
|
|
38
|
+
The reference *k-mer* alphabet to select the *k-mers* from.
|
|
39
|
+
kmer : int
|
|
40
|
+
The symbol code for the *k-mer* to find similars for.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
similar_kmers : ndarray, dtype=np.int64
|
|
45
|
+
The symbol codes for all similar *k-mers*.
|
|
46
|
+
|
|
47
|
+
Notes
|
|
48
|
+
-----
|
|
49
|
+
The implementations in derived classes must ensure that the
|
|
50
|
+
returned array
|
|
51
|
+
|
|
52
|
+
1. contains no duplicates and
|
|
53
|
+
2. includes the input `kmer` itself.
|
|
54
|
+
"""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ScoreThresholdRule(SimilarityRule):
|
|
59
|
+
"""
|
|
60
|
+
__init__(matrix, threshold)
|
|
61
|
+
|
|
62
|
+
This similarity rule calculates all *k-mers* that have a greater or
|
|
63
|
+
equal similarity score with a given *k-mer* than a defined threshold
|
|
64
|
+
score.
|
|
65
|
+
|
|
66
|
+
The similarity score :math:`S` of two *k-mers* :math:`a` and
|
|
67
|
+
:math:`b` is defined as the sum of the pairwise similarity scores
|
|
68
|
+
from a substitution matrix :math:`M`:
|
|
69
|
+
|
|
70
|
+
.. math::
|
|
71
|
+
|
|
72
|
+
S(a,b) = \sum_{i=1}^k M(a_i, b_i)
|
|
73
|
+
|
|
74
|
+
Therefore, this similarity rule allows substitutions with similar
|
|
75
|
+
symbols within a *k-mer*.
|
|
76
|
+
|
|
77
|
+
This class is especially useful for finding similar *k-mers* in
|
|
78
|
+
protein sequences.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
matrix : SubstitutionMatrix
|
|
83
|
+
The similarity scores are taken from this matrix.
|
|
84
|
+
The matrix must be symmetric.
|
|
85
|
+
threshold : int
|
|
86
|
+
The threshold score.
|
|
87
|
+
A *k-mer* :math:`b` is regarded as similar to a *k-mer*
|
|
88
|
+
:math:`a`, if the similarity score between :math:`a` and
|
|
89
|
+
:math:`b` is equal or greater than the threshold.
|
|
90
|
+
|
|
91
|
+
Notes
|
|
92
|
+
-----
|
|
93
|
+
For efficient generation of similar *k-mers* an implementation of
|
|
94
|
+
the *branch-and-bound* algorithm :footcite:`Hauser2013` is used.
|
|
95
|
+
|
|
96
|
+
References
|
|
97
|
+
----------
|
|
98
|
+
|
|
99
|
+
.. footbibliography::
|
|
100
|
+
|
|
101
|
+
Examples
|
|
102
|
+
--------
|
|
103
|
+
|
|
104
|
+
>>> kmer_alphabet = KmerAlphabet(ProteinSequence.alphabet, k=3)
|
|
105
|
+
>>> matrix = SubstitutionMatrix.std_protein_matrix()
|
|
106
|
+
>>> rule = ScoreThresholdRule(matrix, threshold=15)
|
|
107
|
+
>>> similars = rule.similar_kmers(kmer_alphabet, kmer_alphabet.encode("AIW"))
|
|
108
|
+
>>> print(["".join(s) for s in kmer_alphabet.decode_multiple(similars)])
|
|
109
|
+
['AFW', 'AIW', 'ALW', 'AMW', 'AVW', 'CIW', 'GIW', 'SIW', 'SVW', 'TIW', 'VIW', 'XIW']
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, matrix, int32 threshold):
|
|
113
|
+
if not matrix.is_symmetric():
|
|
114
|
+
raise ValueError("A symmetric substitution matrix is required")
|
|
115
|
+
self._matrix = matrix
|
|
116
|
+
self._threshold = threshold
|
|
117
|
+
|
|
118
|
+
@cython.boundscheck(False)
|
|
119
|
+
@cython.wraparound(False)
|
|
120
|
+
def similar_kmers(self, kmer_alphabet, kmer):
|
|
121
|
+
"""
|
|
122
|
+
Calculate all similar *k-mers* for a given *k-mer*.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
kmer_alphabet : KmerAlphabet
|
|
127
|
+
The reference *k-mer* alphabet to select the *k-mers* from.
|
|
128
|
+
kmer : int
|
|
129
|
+
The symbol code for the *k-mer* to find similars for.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
similar_kmers : ndarray, dtype=np.int64
|
|
134
|
+
The symbol codes for all similar *k-mers*.
|
|
135
|
+
"""
|
|
136
|
+
cdef int INIT_SIZE = 1
|
|
137
|
+
|
|
138
|
+
if not self._matrix.get_alphabet1().extends(
|
|
139
|
+
kmer_alphabet.base_alphabet
|
|
140
|
+
):
|
|
141
|
+
raise ValueError(
|
|
142
|
+
"Substitution matrix is incompatible with k-mer base alphabet"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
cdef int64 alph_len = len(kmer_alphabet.base_alphabet)
|
|
146
|
+
cdef const int32[:,:] matrix = self._matrix.score_matrix()
|
|
147
|
+
# For simplicity trim matrix to required size
|
|
148
|
+
# (remove unused symbols)
|
|
149
|
+
matrix = matrix[:alph_len, :alph_len]
|
|
150
|
+
cdef int32 threshold = self._threshold
|
|
151
|
+
|
|
152
|
+
cdef int32[:] max_scores = np.max(self._matrix.score_matrix(), axis=-1)
|
|
153
|
+
|
|
154
|
+
cdef int k = kmer_alphabet.k
|
|
155
|
+
# Split the k-mer code into the individual symbol codes
|
|
156
|
+
cdef int64[:] split_kmer = kmer_alphabet.split(kmer).astype(np.int64)
|
|
157
|
+
# This array will hold the current kmer to be tested
|
|
158
|
+
cdef int64[:] current_split_kmer = np.zeros(k, dtype=np.int64)
|
|
159
|
+
# This array will store the accepted k-mers
|
|
160
|
+
# i.e. k-mers that reach the threshold score
|
|
161
|
+
cdef int64[:,:] similar_split_kmers = np.zeros(
|
|
162
|
+
(INIT_SIZE, k), dtype=np.int64
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Calculate the minimum score for each k-mer position that is
|
|
166
|
+
# necessary to reach a total higher/equal to the threshold score
|
|
167
|
+
cdef int32[:] positional_thresholds = np.empty(k, dtype=np.int32)
|
|
168
|
+
cdef int i
|
|
169
|
+
cdef int total_max_score = 0
|
|
170
|
+
for i in reversed(range(positional_thresholds.shape[0])):
|
|
171
|
+
positional_thresholds[i] = threshold - total_max_score
|
|
172
|
+
total_max_score += max_scores[split_kmer[i]]
|
|
173
|
+
|
|
174
|
+
# 'pos' is the current position within the k-mer
|
|
175
|
+
# where symbols are substituted
|
|
176
|
+
cdef int pos = 0
|
|
177
|
+
cdef int similar_i = 0
|
|
178
|
+
cdef int32 score
|
|
179
|
+
# 'pos' is -1, after all symbol codes at pos 0 are traversed
|
|
180
|
+
while pos != -1:
|
|
181
|
+
if current_split_kmer[pos] >= alph_len:
|
|
182
|
+
# All symbol codes were traversed at this position
|
|
183
|
+
# -> jump one k-mer position back and proceed with
|
|
184
|
+
# next symbol
|
|
185
|
+
pos -= 1
|
|
186
|
+
if pos != -1:
|
|
187
|
+
current_split_kmer[pos] += 1
|
|
188
|
+
else:
|
|
189
|
+
# Get total similarity score between the input k-mer
|
|
190
|
+
# and generated k-mer up to the point of the current
|
|
191
|
+
# position
|
|
192
|
+
score = 0
|
|
193
|
+
for i in range(pos+1):
|
|
194
|
+
score += matrix[split_kmer[i], current_split_kmer[i]]
|
|
195
|
+
# Check score threshold condition
|
|
196
|
+
if score >= positional_thresholds[pos]:
|
|
197
|
+
# Threshold condition is fulfilled:
|
|
198
|
+
# Either go deeper in the same branch
|
|
199
|
+
# (jump one position forward) ...
|
|
200
|
+
if pos < k-1:
|
|
201
|
+
pos += 1
|
|
202
|
+
current_split_kmer[pos] = 0
|
|
203
|
+
# ...or store similar k-mer,
|
|
204
|
+
# if already at maximum depth (last k-mer position)
|
|
205
|
+
else:
|
|
206
|
+
if similar_i >= similar_split_kmers.shape[0]:
|
|
207
|
+
# The array is full -> double its size
|
|
208
|
+
similar_split_kmers = expand(
|
|
209
|
+
np.asarray(similar_split_kmers)
|
|
210
|
+
)
|
|
211
|
+
similar_split_kmers[similar_i] = current_split_kmer
|
|
212
|
+
similar_i += 1
|
|
213
|
+
# Proceed with the next symbol at this position,
|
|
214
|
+
# as we cannot go deeper anymore
|
|
215
|
+
current_split_kmer[pos] += 1
|
|
216
|
+
else:
|
|
217
|
+
# The threshold score is not reached
|
|
218
|
+
# -> this branch ends and we proceed with the next
|
|
219
|
+
# symbol at this position
|
|
220
|
+
current_split_kmer[pos] += 1
|
|
221
|
+
|
|
222
|
+
# Trim to correct size
|
|
223
|
+
# and convert split k-mers back to k-mer code
|
|
224
|
+
return kmer_alphabet.fuse(np.asarray(similar_split_kmers[:similar_i]))
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
cdef np.ndarray expand(np.ndarray array):
|
|
228
|
+
"""
|
|
229
|
+
Double the size of the first dimension of an existing array.
|
|
230
|
+
"""
|
|
231
|
+
new_array = np.empty((array.shape[0]*2, array.shape[1]), dtype=array.dtype)
|
|
232
|
+
new_array[:array.shape[0],:] = array
|
|
233
|
+
return new_array
|
|
Binary file
|