biotite 1.5.0__cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-311-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,954 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.align"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["MinimizerSelector", "SyncmerSelector", "CachedSyncmerSelector",
|
|
8
|
+
"MincodeSelector"]
|
|
9
|
+
|
|
10
|
+
cimport cython
|
|
11
|
+
cimport numpy as np
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from .kmeralphabet import KmerAlphabet
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
ctypedef np.int64_t int64
|
|
18
|
+
ctypedef np.uint32_t uint32
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Obtained from 'np.iinfo(np.int64).max'
|
|
22
|
+
cdef int64 MAX_INT_64 = 9223372036854775807
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MinimizerSelector:
|
|
26
|
+
"""
|
|
27
|
+
MinimizerSelector(kmer_alphabet, window, permutation=None)
|
|
28
|
+
|
|
29
|
+
Selects the *minimizers* in sequences.
|
|
30
|
+
|
|
31
|
+
In a rolling window of *k-mers*, the minimizer is defined as the
|
|
32
|
+
*k-mer* with the minimum *k-mer* code :footcite:`Roberts2004`.
|
|
33
|
+
If the same minimum *k-mer* appears twice in a window, the leftmost
|
|
34
|
+
*k-mer* is selected as minimizer.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
kmer_alphabet : KmerAlphabet
|
|
39
|
+
The *k-mer* alphabet that defines the *k-mer* size and the type
|
|
40
|
+
of sequence this :class:`MinimizerSelector` can be applied on.
|
|
41
|
+
window : int
|
|
42
|
+
The size of the rolling window, where the minimizers are
|
|
43
|
+
searched in.
|
|
44
|
+
In other words this is the number of *k-mers* per window.
|
|
45
|
+
The window size must be at least 2.
|
|
46
|
+
permutation : Permutation
|
|
47
|
+
If set, the *k-mer* order is permuted, i.e.
|
|
48
|
+
the minimizer is chosen based on the ordering of the sort keys
|
|
49
|
+
from :class:`Permutation.permute()`.
|
|
50
|
+
By default, the standard order of the :class:`KmerAlphabet` is
|
|
51
|
+
used.
|
|
52
|
+
This standard order is often the lexicographical order, which is
|
|
53
|
+
known to yield suboptimal *density* in many cases
|
|
54
|
+
:footcite:`Roberts2004`.
|
|
55
|
+
|
|
56
|
+
Attributes
|
|
57
|
+
----------
|
|
58
|
+
kmer_alphabet : KmerAlphabet
|
|
59
|
+
The *k-mer* alphabet.
|
|
60
|
+
window : int
|
|
61
|
+
The window size.
|
|
62
|
+
permutation : Permutation
|
|
63
|
+
The permutation.
|
|
64
|
+
|
|
65
|
+
Notes
|
|
66
|
+
-----
|
|
67
|
+
For minimizer computation a fast algorithm :footcite:`VanHerk1992`
|
|
68
|
+
is used, whose runtime scales linearly with the length of the
|
|
69
|
+
sequence and is constant with regard to the size of the rolling
|
|
70
|
+
window.
|
|
71
|
+
|
|
72
|
+
References
|
|
73
|
+
----------
|
|
74
|
+
|
|
75
|
+
.. footbibliography::
|
|
76
|
+
|
|
77
|
+
Examples
|
|
78
|
+
--------
|
|
79
|
+
|
|
80
|
+
The *k-mer* decomposition of a sequence can yield a high number of
|
|
81
|
+
*k-mers*:
|
|
82
|
+
|
|
83
|
+
>>> sequence1 = ProteinSequence("THIS*IS*A*SEQVENCE")
|
|
84
|
+
>>> kmer_alph = KmerAlphabet(sequence1.alphabet, k=3)
|
|
85
|
+
>>> all_kmers = kmer_alph.create_kmers(sequence1.code)
|
|
86
|
+
>>> print(all_kmers)
|
|
87
|
+
[ 9367 3639 4415 9199 13431 4415 9192 13271 567 13611 8725 2057
|
|
88
|
+
7899 9875 1993 6363]
|
|
89
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in all_kmers])
|
|
90
|
+
['THI', 'HIS', 'IS*', 'S*I', '*IS', 'IS*', 'S*A', '*A*', 'A*S', '*SE', 'SEQ', 'EQV', 'QVE', 'VEN', 'ENC', 'NCE']
|
|
91
|
+
|
|
92
|
+
Minimizers can be used to reduce the number of *k-mers* by selecting
|
|
93
|
+
only the minimum *k-mer* in each window *w*:
|
|
94
|
+
|
|
95
|
+
>>> minimizer = MinimizerSelector(kmer_alph, window=4)
|
|
96
|
+
>>> minimizer_pos, minimizers = minimizer.select(sequence1)
|
|
97
|
+
>>> print(minimizer_pos)
|
|
98
|
+
[ 1 2 5 8 11 14]
|
|
99
|
+
>>> print(minimizers)
|
|
100
|
+
[3639 4415 4415 567 2057 1993]
|
|
101
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in minimizers])
|
|
102
|
+
['HIS', 'IS*', 'IS*', 'A*S', 'EQV', 'ENC']
|
|
103
|
+
|
|
104
|
+
Although this approach reduces the number of *k-mers*, minimizers
|
|
105
|
+
are still guaranteed to match minimizers in another sequence, if
|
|
106
|
+
they share an equal subsequence of at least length *w + k - 1*:
|
|
107
|
+
|
|
108
|
+
>>> sequence2 = ProteinSequence("ANQTHER*SEQVENCE")
|
|
109
|
+
>>> other_minimizer_pos, other_minimizers = minimizer.select(sequence2)
|
|
110
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in other_minimizers])
|
|
111
|
+
['ANQ', 'HER', 'ER*', 'EQV', 'ENC']
|
|
112
|
+
>>> common_minimizers = set.intersection(set(minimizers), set(other_minimizers))
|
|
113
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in common_minimizers])
|
|
114
|
+
['EQV', 'ENC']
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, kmer_alphabet, window, permutation=None):
|
|
118
|
+
if window < 2:
|
|
119
|
+
raise ValueError("Window size must be at least 2")
|
|
120
|
+
self._window = window
|
|
121
|
+
self._kmer_alph = kmer_alphabet
|
|
122
|
+
self._permutation = permutation
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def kmer_alphabet(self):
|
|
127
|
+
return self._kmer_alph
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def window(self):
|
|
131
|
+
return self._window
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def permutation(self):
|
|
135
|
+
return self._permutation
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def select(self, sequence, bint alphabet_check=True):
|
|
139
|
+
"""
|
|
140
|
+
select(sequence, alphabet_check=True)
|
|
141
|
+
|
|
142
|
+
Obtain all overlapping *k-mers* from a sequence and select
|
|
143
|
+
the minimizers from them.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
sequence : Sequence
|
|
148
|
+
The sequence to find the minimizers in.
|
|
149
|
+
Must be compatible with the given `kmer_alphabet`
|
|
150
|
+
alphabet_check: bool, optional
|
|
151
|
+
If set to false, the compatibility between the alphabet
|
|
152
|
+
of the sequence and the alphabet of the
|
|
153
|
+
:class:`MinimizerSelector`
|
|
154
|
+
is not checked to gain additional performance.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
minimizer_indices : ndarray, dtype=np.uint32
|
|
159
|
+
The sequence indices where the minimizer *k-mers* start.
|
|
160
|
+
minimizers : ndarray, dtype=np.int64
|
|
161
|
+
The *k-mers* that are the selected minimizers, returned as
|
|
162
|
+
*k-mer* code.
|
|
163
|
+
|
|
164
|
+
Notes
|
|
165
|
+
-----
|
|
166
|
+
Duplicate minimizers are omitted, i.e. if two windows have the
|
|
167
|
+
same minimizer position, the return values contain this
|
|
168
|
+
minimizer only once.
|
|
169
|
+
"""
|
|
170
|
+
if alphabet_check:
|
|
171
|
+
if not self._kmer_alph.base_alphabet.extends(sequence.alphabet):
|
|
172
|
+
raise ValueError(
|
|
173
|
+
"The sequence's alphabet does not fit the k-mer alphabet"
|
|
174
|
+
)
|
|
175
|
+
kmers = self._kmer_alph.create_kmers(sequence.code)
|
|
176
|
+
return self.select_from_kmers(kmers)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def select_from_kmers(self, kmers):
|
|
180
|
+
"""
|
|
181
|
+
select_from_kmers(kmers)
|
|
182
|
+
|
|
183
|
+
Select minimizers for the given overlapping *k-mers*.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
kmers : ndarray, dtype=np.int64
|
|
188
|
+
The *k-mer* codes representing the sequence to find the
|
|
189
|
+
minimizers in.
|
|
190
|
+
The *k-mer* codes correspond to the *k-mers* encoded by the
|
|
191
|
+
given `kmer_alphabet`.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
minimizer_indices : ndarray, dtype=np.uint32
|
|
196
|
+
The indices in the input *k-mer* sequence where a minimizer
|
|
197
|
+
appears.
|
|
198
|
+
minimizers : ndarray, dtype=np.int64
|
|
199
|
+
The corresponding *k-mers* codes of the minimizers.
|
|
200
|
+
|
|
201
|
+
Notes
|
|
202
|
+
-----
|
|
203
|
+
Duplicate minimizers are omitted, i.e. if two windows have the
|
|
204
|
+
same minimizer position, the return values contain this
|
|
205
|
+
minimizer only once.
|
|
206
|
+
"""
|
|
207
|
+
if self._permutation is None:
|
|
208
|
+
ordering = kmers
|
|
209
|
+
else:
|
|
210
|
+
ordering = self._permutation.permute(kmers)
|
|
211
|
+
if len(ordering) != len(kmers):
|
|
212
|
+
raise IndexError(
|
|
213
|
+
f"The Permutation is defective, it gave {len(ordering)} "
|
|
214
|
+
f"sort keys for {len(kmers)} k-mers"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if len(kmers) < self._window:
|
|
218
|
+
raise ValueError(
|
|
219
|
+
"The number of k-mers is smaller than the window size"
|
|
220
|
+
)
|
|
221
|
+
return _minimize(
|
|
222
|
+
kmers.astype(np.int64, copy=False),
|
|
223
|
+
ordering.astype(np.int64, copy=False),
|
|
224
|
+
self._window,
|
|
225
|
+
include_duplicates=False
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class SyncmerSelector:
|
|
230
|
+
"""
|
|
231
|
+
SyncmerSelector(alphabet, k, s, permutation=None, offset=(0,))
|
|
232
|
+
|
|
233
|
+
Selects the *syncmers* in sequences.
|
|
234
|
+
|
|
235
|
+
Let the *s-mers* be all overlapping substrings of length *s* in a
|
|
236
|
+
*k-mer*.
|
|
237
|
+
A *k-mer* is a syncmer, if its minimum *s-mer* is at one of the
|
|
238
|
+
given offset positions :footcite:`Edgar2021`.
|
|
239
|
+
If the same minimum *s-mer* appears twice in a *k-mer*, the position
|
|
240
|
+
of the leftmost *s-mer* is taken.
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
alphabet : Alphabet
|
|
245
|
+
The base alphabet the *k-mers* and *s-mers* are created from.
|
|
246
|
+
Defines the type of sequence this :class:`MinimizerSelector` can
|
|
247
|
+
be applied on.
|
|
248
|
+
k, s : int
|
|
249
|
+
The length of the *k-mers* and *s-mers*, respectively.
|
|
250
|
+
permutation : Permutation
|
|
251
|
+
If set, the *s-mer* order is permuted, i.e.
|
|
252
|
+
the minimum *s-mer* is chosen based on the ordering of the sort
|
|
253
|
+
keys from :class:`Permutation.permute()`.
|
|
254
|
+
This :class:`Permutation` must be compatible with *s*
|
|
255
|
+
(not with *k*).
|
|
256
|
+
By default, the standard order of the :class:`KmerAlphabet` is
|
|
257
|
+
used.
|
|
258
|
+
This standard order is often the lexicographical order, which is
|
|
259
|
+
known to yield suboptimal *density* in many cases
|
|
260
|
+
:footcite:`Roberts2004`.
|
|
261
|
+
offset : array-like of int
|
|
262
|
+
If the minimum *s-mer* in a *k-mer* is at one of the given
|
|
263
|
+
offset positions, that *k-mer* is a syncmer.
|
|
264
|
+
Negative values indicate the position from the end of the
|
|
265
|
+
*k-mer*.
|
|
266
|
+
By default, the minimum position needs to be at the start of the
|
|
267
|
+
*k-mer*, which is termed *open syncmer*.
|
|
268
|
+
|
|
269
|
+
Attributes
|
|
270
|
+
----------
|
|
271
|
+
alphabet : Alphabet
|
|
272
|
+
The base alphabet.
|
|
273
|
+
kmer_alphabet, smer_alphabet : int
|
|
274
|
+
The :class:`KmerAlphabet` for *k* and *s*, respectively.
|
|
275
|
+
permutation : Permutation
|
|
276
|
+
The permutation.
|
|
277
|
+
|
|
278
|
+
See Also
|
|
279
|
+
--------
|
|
280
|
+
CachedSyncmerSelector
|
|
281
|
+
A cached variant with faster syncmer selection at the cost of
|
|
282
|
+
increased initialization time.
|
|
283
|
+
|
|
284
|
+
Notes
|
|
285
|
+
-----
|
|
286
|
+
For syncmer computation from a sequence a fast algorithm
|
|
287
|
+
:footcite:`VanHerk1992` is used, whose runtime scales linearly with
|
|
288
|
+
the length of the sequence and is constant with regard to *k*.
|
|
289
|
+
|
|
290
|
+
References
|
|
291
|
+
----------
|
|
292
|
+
|
|
293
|
+
.. footbibliography::
|
|
294
|
+
|
|
295
|
+
Examples
|
|
296
|
+
--------
|
|
297
|
+
|
|
298
|
+
This example is taken from :footcite:`Edgar2021`:
|
|
299
|
+
The subset of *k-mers* that are *closed syncmers* are selected.
|
|
300
|
+
Closed syncmers are syncmers, where the minimum *s-mer* is in the
|
|
301
|
+
first or last position of the *k-mer*.
|
|
302
|
+
*s-mers* are ordered lexicographically in this example.
|
|
303
|
+
|
|
304
|
+
>>> sequence = NucleotideSequence("GGCAAGTGACA")
|
|
305
|
+
>>> kmer_alph = KmerAlphabet(sequence.alphabet, k=5)
|
|
306
|
+
>>> kmers = kmer_alph.create_kmers(sequence.code)
|
|
307
|
+
>>> closed_syncmer_selector = CachedSyncmerSelector(
|
|
308
|
+
... sequence.alphabet,
|
|
309
|
+
... # The same k as in the KmerAlphabet
|
|
310
|
+
... k=5,
|
|
311
|
+
... s=2,
|
|
312
|
+
... # The offset determines that closed syncmers will be selected
|
|
313
|
+
... offset=(0, -1)
|
|
314
|
+
... )
|
|
315
|
+
>>> syncmer_pos, syncmers = closed_syncmer_selector.select(sequence)
|
|
316
|
+
>>> # Print all k-mers in the sequence and mark syncmers with a '*'
|
|
317
|
+
>>> for pos, kmer in enumerate(kmer_alph.create_kmers(sequence.code)):
|
|
318
|
+
... if pos in syncmer_pos:
|
|
319
|
+
... print("* " + "".join(kmer_alph.decode(kmer)))
|
|
320
|
+
... else:
|
|
321
|
+
... print(" " + "".join(kmer_alph.decode(kmer)))
|
|
322
|
+
* GGCAA
|
|
323
|
+
GCAAG
|
|
324
|
+
CAAGT
|
|
325
|
+
* AAGTG
|
|
326
|
+
* AGTGA
|
|
327
|
+
* GTGAC
|
|
328
|
+
TGACA
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
def __init__(self, alphabet, k, s, permutation=None, offset=(0,)):
|
|
332
|
+
if not s < k:
|
|
333
|
+
raise ValueError("s must be smaller than k")
|
|
334
|
+
self._window = k - s + 1
|
|
335
|
+
self._alphabet = alphabet
|
|
336
|
+
self._kmer_alph = KmerAlphabet(alphabet, k)
|
|
337
|
+
self._smer_alph = KmerAlphabet(alphabet, s)
|
|
338
|
+
|
|
339
|
+
self._permutation = permutation
|
|
340
|
+
|
|
341
|
+
self._offset = np.asarray(offset, dtype=np.int64)
|
|
342
|
+
# Wrap around negative indices
|
|
343
|
+
self._offset = np.where(
|
|
344
|
+
self._offset < 0,
|
|
345
|
+
self._window + self._offset,
|
|
346
|
+
self._offset
|
|
347
|
+
)
|
|
348
|
+
if (self._offset >= self._window).any() or (self._offset < 0).any():
|
|
349
|
+
raise IndexError(
|
|
350
|
+
f"Offset is out of window range"
|
|
351
|
+
)
|
|
352
|
+
if len(np.unique(self._offset)) != len(self._offset):
|
|
353
|
+
raise ValueError("Offset must contain unique values")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@property
|
|
357
|
+
def alphabet(self):
|
|
358
|
+
return self._alphabet
|
|
359
|
+
|
|
360
|
+
@property
|
|
361
|
+
def kmer_alphabet(self):
|
|
362
|
+
return self._kmer_alph
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def smer_alphabet(self):
|
|
366
|
+
return self._smer_alph
|
|
367
|
+
|
|
368
|
+
@property
|
|
369
|
+
def permutation(self):
|
|
370
|
+
return self._permutation
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def select(self, sequence, bint alphabet_check=True):
|
|
374
|
+
"""
|
|
375
|
+
select(sequence, alphabet_check=True)
|
|
376
|
+
|
|
377
|
+
Obtain all overlapping *k-mers* from a sequence and select
|
|
378
|
+
the syncmers from them.
|
|
379
|
+
|
|
380
|
+
Parameters
|
|
381
|
+
----------
|
|
382
|
+
sequence : Sequence
|
|
383
|
+
The sequence to find the syncmers in.
|
|
384
|
+
Must be compatible with the given `kmer_alphabet`
|
|
385
|
+
alphabet_check: bool, optional
|
|
386
|
+
If set to false, the compatibility between the alphabet
|
|
387
|
+
of the sequence and the alphabet of the
|
|
388
|
+
:class:`SyncmerSelector`
|
|
389
|
+
is not checked to gain additional performance.
|
|
390
|
+
|
|
391
|
+
Returns
|
|
392
|
+
-------
|
|
393
|
+
syncmer_indices : ndarray, dtype=np.uint32
|
|
394
|
+
The sequence indices where the syncmers start.
|
|
395
|
+
syncmers : ndarray, dtype=np.int64
|
|
396
|
+
The corresponding *k-mer* codes of the syncmers.
|
|
397
|
+
"""
|
|
398
|
+
if alphabet_check:
|
|
399
|
+
if not self._alphabet.extends(sequence.alphabet):
|
|
400
|
+
raise ValueError(
|
|
401
|
+
"The sequence's alphabet does not fit "
|
|
402
|
+
"the selector's alphabet"
|
|
403
|
+
)
|
|
404
|
+
kmers = self._kmer_alph.create_kmers(sequence.code)
|
|
405
|
+
smers = self._smer_alph.create_kmers(sequence.code)
|
|
406
|
+
|
|
407
|
+
if self._permutation is None:
|
|
408
|
+
ordering = smers
|
|
409
|
+
else:
|
|
410
|
+
ordering = self._permutation.permute(smers)
|
|
411
|
+
if len(ordering) != len(smers):
|
|
412
|
+
raise IndexError(
|
|
413
|
+
f"The Permutation is defective, it gave {len(ordering)} "
|
|
414
|
+
f"sort keys for {len(smers)} s-mers"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# The aboslute position of the minimum s-mer for each k-mer
|
|
418
|
+
min_pos, _ = _minimize(
|
|
419
|
+
smers,
|
|
420
|
+
ordering.astype(np.int64, copy=False),
|
|
421
|
+
self._window,
|
|
422
|
+
include_duplicates=True
|
|
423
|
+
)
|
|
424
|
+
# The position of the minimum s-mer relative to the start
|
|
425
|
+
# of the k-mer
|
|
426
|
+
relative_min_pos = min_pos - np.arange(len(kmers))
|
|
427
|
+
syncmer_pos = self._filter_syncmer_pos(relative_min_pos)
|
|
428
|
+
return syncmer_pos, kmers[syncmer_pos]
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def select_from_kmers(self, kmers):
|
|
432
|
+
"""
|
|
433
|
+
select_from_kmers(kmers)
|
|
434
|
+
|
|
435
|
+
Select syncmers for the given *k-mers*.
|
|
436
|
+
|
|
437
|
+
The *k-mers* are not required to overlap.
|
|
438
|
+
|
|
439
|
+
Parameters
|
|
440
|
+
----------
|
|
441
|
+
kmers : ndarray, dtype=np.int64
|
|
442
|
+
The *k-mer* codes to select the syncmers from.
|
|
443
|
+
|
|
444
|
+
Returns
|
|
445
|
+
-------
|
|
446
|
+
syncmer_indices : ndarray, dtype=np.uint32
|
|
447
|
+
The sequence indices where the syncmers start.
|
|
448
|
+
syncmers : ndarray, dtype=np.int64
|
|
449
|
+
The corresponding *k-mer* codes of the syncmers.
|
|
450
|
+
|
|
451
|
+
Notes
|
|
452
|
+
-----
|
|
453
|
+
Since for *s-mer* creation, the *k-mers* need to be converted
|
|
454
|
+
back to symbol codes again and since the input *k-mers* are not
|
|
455
|
+
required to overlap, calling :meth:`select()` is much faster.
|
|
456
|
+
However, :meth:`select()` is only available for
|
|
457
|
+
:class:`Sequence` objects.
|
|
458
|
+
"""
|
|
459
|
+
cdef int64 i
|
|
460
|
+
|
|
461
|
+
symbol_codes_for_each_kmer = self._kmer_alph.split(kmers)
|
|
462
|
+
|
|
463
|
+
cdef int64[:] min_pos = np.zeros(
|
|
464
|
+
len(symbol_codes_for_each_kmer), dtype=np.int64
|
|
465
|
+
)
|
|
466
|
+
for i in range(symbol_codes_for_each_kmer.shape[0]):
|
|
467
|
+
smers = self._smer_alph.create_kmers(symbol_codes_for_each_kmer[i])
|
|
468
|
+
if self._permutation is None:
|
|
469
|
+
ordering = smers
|
|
470
|
+
else:
|
|
471
|
+
ordering = self._permutation.permute(smers)
|
|
472
|
+
if len(ordering) != len(smers):
|
|
473
|
+
raise IndexError(
|
|
474
|
+
f"The Permutation is defective, it gave {len(ordering)} "
|
|
475
|
+
f"sort keys for {len(smers)} s-mers"
|
|
476
|
+
)
|
|
477
|
+
min_pos[i] = np.argmin(ordering)
|
|
478
|
+
|
|
479
|
+
syncmer_pos = self._filter_syncmer_pos(min_pos)
|
|
480
|
+
return syncmer_pos, kmers[syncmer_pos]
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _filter_syncmer_pos(self, min_pos):
|
|
484
|
+
"""
|
|
485
|
+
Get indices of *k-mers* that are syncmers, based on `min_pos`,
|
|
486
|
+
the position of the minimum *s-mer* in each *k-mer*.
|
|
487
|
+
Syncmers are k-mers whose the minimum s-mer is at (one of)
|
|
488
|
+
the given offet position(s).
|
|
489
|
+
"""
|
|
490
|
+
syncmer_mask = None
|
|
491
|
+
for offset in self._offset:
|
|
492
|
+
# For the usual number of offsets, this 'loop'-appoach is
|
|
493
|
+
# faster than np.isin()
|
|
494
|
+
if syncmer_mask is None:
|
|
495
|
+
syncmer_mask = min_pos == offset
|
|
496
|
+
else:
|
|
497
|
+
syncmer_mask |= min_pos == offset
|
|
498
|
+
return np.where(syncmer_mask)[0]
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
class CachedSyncmerSelector(SyncmerSelector):
|
|
502
|
+
"""
|
|
503
|
+
CachedSyncmerSelector(alphabet, k, s, permutation=None, offset=(0,))
|
|
504
|
+
|
|
505
|
+
Selects the *syncmers* in sequences.
|
|
506
|
+
|
|
507
|
+
Fulsfills the same purpose as :class:`SyncmerSelector`, but
|
|
508
|
+
precomputes for each possible *k-mer*, whether it is a syncmer,
|
|
509
|
+
at initialization.
|
|
510
|
+
Hence, syncmer selection is faster at the cost of longer
|
|
511
|
+
initialization time.
|
|
512
|
+
|
|
513
|
+
Parameters
|
|
514
|
+
----------
|
|
515
|
+
alphabet : Alphabet
|
|
516
|
+
The base alphabet the *k-mers* and *s-mers* are created from.
|
|
517
|
+
Defines the type of sequence this :class:`MinimizerSelector` can
|
|
518
|
+
be applied on.
|
|
519
|
+
k, s : int
|
|
520
|
+
The length of the *k-mers* and *s-mers*, respectively.
|
|
521
|
+
permutation : Permutation
|
|
522
|
+
If set, the *s-mer* order is permuted, i.e.
|
|
523
|
+
the minimum *s-mer* is chosen based on the ordering of the sort
|
|
524
|
+
keys from :class:`Permutation.permute()`.
|
|
525
|
+
This :class:`Permutation` must be compatible with *s*
|
|
526
|
+
(not with *k*).
|
|
527
|
+
By default, the standard order of the :class:`KmerAlphabet` is
|
|
528
|
+
used.
|
|
529
|
+
This standard order is often the lexicographical order, which is
|
|
530
|
+
known to yield suboptimal *density* in many cases
|
|
531
|
+
:footcite:`Roberts2004`.
|
|
532
|
+
offset : array-like of int
|
|
533
|
+
If the minimum *s-mer* in a *k-mer* is at one of the given
|
|
534
|
+
offset positions, that *k-mer* is a syncmer.
|
|
535
|
+
Negative values indicate the position from the end of the
|
|
536
|
+
*k-mer*.
|
|
537
|
+
By default, the minimum position needs to be at the start of the
|
|
538
|
+
*k-mer*, which is termed *open syncmer*.
|
|
539
|
+
|
|
540
|
+
Attributes
|
|
541
|
+
----------
|
|
542
|
+
alphabet : Alphabet
|
|
543
|
+
The base alphabet.
|
|
544
|
+
kmer_alphabet, smer_alphabet : int
|
|
545
|
+
The :class:`KmerAlphabet` for *k* and *s*, respectively.
|
|
546
|
+
permutation : Permutation
|
|
547
|
+
The permutation.
|
|
548
|
+
|
|
549
|
+
See Also
|
|
550
|
+
--------
|
|
551
|
+
SyncmerSelector
|
|
552
|
+
A standard variant for syncmer selection.
|
|
553
|
+
|
|
554
|
+
Notes
|
|
555
|
+
-----
|
|
556
|
+
Both the initialization time and memory requirements are
|
|
557
|
+
proportional to the size of the `kmer_alphabet`, i.e. :math:`n^k`.
|
|
558
|
+
Hence, it is adviced to use this class only for rather small
|
|
559
|
+
alphabets.
|
|
560
|
+
|
|
561
|
+
References
|
|
562
|
+
----------
|
|
563
|
+
|
|
564
|
+
.. footbibliography::
|
|
565
|
+
|
|
566
|
+
Examples
|
|
567
|
+
--------
|
|
568
|
+
|
|
569
|
+
>>> sequence = NucleotideSequence("GGCAAGTGACA")
|
|
570
|
+
>>> kmer_alph = KmerAlphabet(sequence.alphabet, k=5)
|
|
571
|
+
>>> # The initialization can quite a long time for large *k-mer* alphabets...
|
|
572
|
+
>>> closed_syncmer_selector = CachedSyncmerSelector(
|
|
573
|
+
... sequence.alphabet,
|
|
574
|
+
... # The same k as in the KmerAlphabet
|
|
575
|
+
... k=5,
|
|
576
|
+
... s=2,
|
|
577
|
+
... # The offset determines that closed syncmers will be selected
|
|
578
|
+
... offset=(0, -1)
|
|
579
|
+
... )
|
|
580
|
+
>>> # ...but the actual syncmer identification is very fast
|
|
581
|
+
>>> syncmer_pos, syncmers = closed_syncmer_selector.select(sequence)
|
|
582
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in syncmers])
|
|
583
|
+
['GGCAA', 'AAGTG', 'AGTGA', 'GTGAC']
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
def __init__(self, alphabet, k, s, permutation=None, offset=(0,)):
|
|
587
|
+
super().__init__(alphabet, k, s, permutation, offset)
|
|
588
|
+
# Check for all possible *k-mers*, whether they are syncmers
|
|
589
|
+
all_kmers = np.arange(len(self.kmer_alphabet))
|
|
590
|
+
syncmer_indices, _ = super().select_from_kmers(all_kmers)
|
|
591
|
+
# Convert the index array into a boolean mask
|
|
592
|
+
self._syncmer_mask = np.zeros(len(self.kmer_alphabet), dtype=bool)
|
|
593
|
+
self._syncmer_mask[syncmer_indices] = True
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def select(self, sequence, bint alphabet_check=True):
|
|
597
|
+
"""
|
|
598
|
+
select(sequence, alphabet_check=True)
|
|
599
|
+
|
|
600
|
+
Obtain all overlapping *k-mers* from a sequence and select
|
|
601
|
+
the syncmers from them.
|
|
602
|
+
|
|
603
|
+
Parameters
|
|
604
|
+
----------
|
|
605
|
+
sequence : Sequence
|
|
606
|
+
The sequence to find the syncmers in.
|
|
607
|
+
Must be compatible with the given `kmer_alphabet`
|
|
608
|
+
alphabet_check: bool, optional
|
|
609
|
+
If set to false, the compatibility between the alphabet
|
|
610
|
+
of the sequence and the alphabet of the
|
|
611
|
+
:class:`CachedSyncmerSelector`
|
|
612
|
+
is not checked to gain additional performance.
|
|
613
|
+
|
|
614
|
+
Returns
|
|
615
|
+
-------
|
|
616
|
+
syncmer_indices : ndarray, dtype=np.uint32
|
|
617
|
+
The sequence indices where the syncmers start.
|
|
618
|
+
syncmers : ndarray, dtype=np.int64
|
|
619
|
+
The corresponding *k-mer* codes of the syncmers.
|
|
620
|
+
"""
|
|
621
|
+
if alphabet_check:
|
|
622
|
+
if not self.alphabet.extends(sequence.alphabet):
|
|
623
|
+
raise ValueError(
|
|
624
|
+
"The sequence's alphabet does not fit "
|
|
625
|
+
"the selector's alphabet"
|
|
626
|
+
)
|
|
627
|
+
kmers = self.kmer_alphabet.create_kmers(sequence.code)
|
|
628
|
+
return self.select_from_kmers(kmers)
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def select_from_kmers(self, kmers):
|
|
632
|
+
"""
|
|
633
|
+
select_from_kmers(kmers)
|
|
634
|
+
|
|
635
|
+
Select syncmers for the given *k-mers*.
|
|
636
|
+
|
|
637
|
+
The *k-mers* are not required to overlap.
|
|
638
|
+
|
|
639
|
+
Parameters
|
|
640
|
+
----------
|
|
641
|
+
kmers : ndarray, dtype=np.int64
|
|
642
|
+
The *k-mer* codes to select the syncmers from.
|
|
643
|
+
|
|
644
|
+
Returns
|
|
645
|
+
-------
|
|
646
|
+
syncmer_indices : ndarray, dtype=np.uint32
|
|
647
|
+
The sequence indices where the syncmers start.
|
|
648
|
+
syncmers : ndarray, dtype=np.int64
|
|
649
|
+
The corresponding *k-mer* codes of the syncmers.
|
|
650
|
+
"""
|
|
651
|
+
syncmer_pos = np.where(self._syncmer_mask[kmers])[0]
|
|
652
|
+
return syncmer_pos, kmers[syncmer_pos]
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
class MincodeSelector:
|
|
656
|
+
r"""
|
|
657
|
+
MincodeSelector(self, kmer_alphabet, compression, permutation=None)
|
|
658
|
+
|
|
659
|
+
Selects the :math:`1/\text{compression}` *smallest* *k-mers* from
|
|
660
|
+
:class:`KmerAlphabet`. :footcite:`Edgar2021`
|
|
661
|
+
|
|
662
|
+
'*Small*' refers to the lexicographical order, or alternatively a
|
|
663
|
+
custom order if `permutation` is given.
|
|
664
|
+
The *Mincode* approach tries to reduce the number of *k-mers* from a
|
|
665
|
+
sequence by the factor `compression`, while it still ensures that
|
|
666
|
+
a common set of *k-mers* are selected from similar sequences.
|
|
667
|
+
|
|
668
|
+
Parameters
|
|
669
|
+
----------
|
|
670
|
+
kmer_alphabet : KmerAlphabet
|
|
671
|
+
The *k-mer* alphabet that defines the *k-mer* size and the type
|
|
672
|
+
of sequence this :class:`MincodeSelector` can be applied on.
|
|
673
|
+
compression : float
|
|
674
|
+
Defines the compression factor, i.e. the approximate fraction
|
|
675
|
+
of *k-mers* that will be sampled from a sequence.
|
|
676
|
+
permutation : Permutation
|
|
677
|
+
If set, the *k-mer* order is permuted, i.e.
|
|
678
|
+
the *k-mers* are selected based on the ordering of the sort keys
|
|
679
|
+
from :class:`Permutation.permute()`.
|
|
680
|
+
By default, the standard order of the :class:`KmerAlphabet` is
|
|
681
|
+
used.
|
|
682
|
+
This standard order is often the lexicographical order.
|
|
683
|
+
|
|
684
|
+
Attributes
|
|
685
|
+
----------
|
|
686
|
+
kmer_alphabet : KmerAlphabet
|
|
687
|
+
The *k-mer* alphabet.
|
|
688
|
+
compression : float
|
|
689
|
+
The compression factor.
|
|
690
|
+
threshold : float
|
|
691
|
+
Based on the compression factor and the range of (permuted)
|
|
692
|
+
*k-mer* values this threshold is calculated.
|
|
693
|
+
All *k-mers*, that are smaller than this value are selected.
|
|
694
|
+
permutation : Permutation
|
|
695
|
+
The permutation.
|
|
696
|
+
|
|
697
|
+
References
|
|
698
|
+
----------
|
|
699
|
+
|
|
700
|
+
.. footbibliography::
|
|
701
|
+
|
|
702
|
+
Examples
|
|
703
|
+
--------
|
|
704
|
+
|
|
705
|
+
>>> kmer_alph = KmerAlphabet(NucleotideSequence.alphabet_unamb, k=2)
|
|
706
|
+
>>> kmers = np.arange(len(kmer_alph))
|
|
707
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in kmers])
|
|
708
|
+
['AA', 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT']
|
|
709
|
+
>>> # Select 1/4 of *k-mers* based on lexicographical k-mer order
|
|
710
|
+
>>> selector = MincodeSelector(kmer_alph, 4)
|
|
711
|
+
>>> subset_pos, kmers_subset = selector.select_from_kmers(kmers)
|
|
712
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in kmers_subset])
|
|
713
|
+
['AA', 'AC', 'AG', 'AT']
|
|
714
|
+
>>> # Select 1/4 based on randomized k-mer order
|
|
715
|
+
>>> selector = MincodeSelector(kmer_alph, 4, permutation=RandomPermutation())
|
|
716
|
+
>>> subset_pos, kmers_subset = selector.select_from_kmers(kmers)
|
|
717
|
+
>>> print(["".join(kmer_alph.decode(kmer)) for kmer in kmers_subset])
|
|
718
|
+
['AG', 'CT', 'GA', 'TC']
|
|
719
|
+
"""
|
|
720
|
+
|
|
721
|
+
def __init__(self, kmer_alphabet, compression, permutation=None):
|
|
722
|
+
if compression < 1:
|
|
723
|
+
raise ValueError(
|
|
724
|
+
"Compression factor must be equal to or larger than 1"
|
|
725
|
+
)
|
|
726
|
+
self._compression = compression
|
|
727
|
+
self._kmer_alph = kmer_alphabet
|
|
728
|
+
self._permutation = permutation
|
|
729
|
+
if permutation is None:
|
|
730
|
+
permutation_offset = 0
|
|
731
|
+
permutation_range = len(kmer_alphabet)
|
|
732
|
+
else:
|
|
733
|
+
permutation_offset = permutation.min
|
|
734
|
+
permutation_range = permutation.max - permutation.min + 1
|
|
735
|
+
self._threshold = permutation_offset + permutation_range / compression
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
@property
|
|
739
|
+
def kmer_alphabet(self):
|
|
740
|
+
return self._kmer_alph
|
|
741
|
+
|
|
742
|
+
@property
|
|
743
|
+
def compression(self):
|
|
744
|
+
return self._compression
|
|
745
|
+
|
|
746
|
+
@property
|
|
747
|
+
def threshold(self):
|
|
748
|
+
return self._threshold
|
|
749
|
+
|
|
750
|
+
@property
|
|
751
|
+
def permutation(self):
|
|
752
|
+
return self._permutation
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def select(self, sequence, bint alphabet_check=True):
|
|
756
|
+
"""
|
|
757
|
+
select(sequence, alphabet_check=True)
|
|
758
|
+
|
|
759
|
+
Obtain all overlapping *k-mers* from a sequence and select
|
|
760
|
+
the *Mincode k-mers* from them.
|
|
761
|
+
|
|
762
|
+
Parameters
|
|
763
|
+
----------
|
|
764
|
+
sequence : Sequence
|
|
765
|
+
The sequence to find the *Mincode k-mers* in.
|
|
766
|
+
Must be compatible with the given `kmer_alphabet`
|
|
767
|
+
alphabet_check: bool, optional
|
|
768
|
+
If set to false, the compatibility between the alphabet
|
|
769
|
+
of the sequence and the alphabet of the
|
|
770
|
+
:class:`MincodeSelector`
|
|
771
|
+
is not checked to gain additional performance.
|
|
772
|
+
|
|
773
|
+
Returns
|
|
774
|
+
-------
|
|
775
|
+
mincode_indices : ndarray, dtype=np.uint32
|
|
776
|
+
The sequence indices where the *Mincode k-mers* start.
|
|
777
|
+
mincode : ndarray, dtype=np.int64
|
|
778
|
+
The corresponding *Mincode k-mer* codes.
|
|
779
|
+
"""
|
|
780
|
+
if alphabet_check:
|
|
781
|
+
if not self._kmer_alph.base_alphabet.extends(sequence.alphabet):
|
|
782
|
+
raise ValueError(
|
|
783
|
+
"The sequence's alphabet does not fit the k-mer alphabet"
|
|
784
|
+
)
|
|
785
|
+
kmers = self._kmer_alph.create_kmers(sequence.code)
|
|
786
|
+
return self.select_from_kmers(kmers)
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def select_from_kmers(self, kmers):
|
|
790
|
+
"""
|
|
791
|
+
select_from_kmers(kmers)
|
|
792
|
+
|
|
793
|
+
Select *Mincode k-mers*.
|
|
794
|
+
|
|
795
|
+
The given *k-mers* are not required to overlap.
|
|
796
|
+
|
|
797
|
+
Parameters
|
|
798
|
+
----------
|
|
799
|
+
kmers : ndarray, dtype=np.int64
|
|
800
|
+
The *k-mer* codes to select the *Mincode k-mers* from.
|
|
801
|
+
|
|
802
|
+
Returns
|
|
803
|
+
-------
|
|
804
|
+
mincode_indices : ndarray, dtype=np.uint32
|
|
805
|
+
The sequence indices where the *Mincode k-mers* start.
|
|
806
|
+
mincode : ndarray, dtype=np.int64
|
|
807
|
+
The corresponding *Mincode k-mer* codes.
|
|
808
|
+
"""
|
|
809
|
+
if self._permutation is None:
|
|
810
|
+
ordering = kmers
|
|
811
|
+
else:
|
|
812
|
+
ordering = self._permutation.permute(kmers)
|
|
813
|
+
if len(ordering) != len(kmers):
|
|
814
|
+
raise IndexError(
|
|
815
|
+
f"The Permutation is defective, it gave {len(ordering)} "
|
|
816
|
+
f"sort keys for {len(kmers)} k-mers"
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
mincode_pos = ordering < self._threshold
|
|
820
|
+
return mincode_pos, kmers[mincode_pos]
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
@cython.boundscheck(False)
|
|
824
|
+
@cython.wraparound(False)
|
|
825
|
+
def _minimize(int64[:] kmers, int64[:] ordering, uint32 window,
|
|
826
|
+
bint include_duplicates):
|
|
827
|
+
"""
|
|
828
|
+
Implementation of the algorithm originally devised by
|
|
829
|
+
Marcel van Herk.
|
|
830
|
+
|
|
831
|
+
In this implementation the frame is chosen differently:
|
|
832
|
+
For a position 'x' the frame ranges from 'x' to 'x + window-1'
|
|
833
|
+
instead of 'x - (window-1)/2' to 'x + (window-1)/2'.
|
|
834
|
+
"""
|
|
835
|
+
cdef uint32 seq_i
|
|
836
|
+
|
|
837
|
+
cdef uint32 n_windows = kmers.shape[0] - (window - 1)
|
|
838
|
+
# Pessimistic array allocation size
|
|
839
|
+
# -> Expect that every window has a new minimizer
|
|
840
|
+
cdef uint32[:] mininizer_pos = np.empty(n_windows, dtype=np.uint32)
|
|
841
|
+
cdef int64[:] minimizers = np.empty(n_windows, dtype=np.int64)
|
|
842
|
+
# Counts the actual number of minimiers for later trimming
|
|
843
|
+
cdef uint32 n_minimizers = 0
|
|
844
|
+
|
|
845
|
+
# Variables for the position of the previous cumulative minimum
|
|
846
|
+
# Assign an value that can never occur for the start,
|
|
847
|
+
# as in the beginning there is no previous value
|
|
848
|
+
cdef uint32 prev_argcummin = kmers.shape[0]
|
|
849
|
+
# Variables for the position of the current cumulative minimum
|
|
850
|
+
cdef uint32 combined_argcummin, forward_argcummin, reverse_argcummin
|
|
851
|
+
# Variables for the current cumulative minimum
|
|
852
|
+
cdef int64 combined_cummin, forward_cummin, reverse_cummin
|
|
853
|
+
# Variables for cumulative minima at all positions
|
|
854
|
+
cdef uint32[:] forward_argcummins = _chunk_wise_forward_argcummin(
|
|
855
|
+
ordering, window
|
|
856
|
+
)
|
|
857
|
+
cdef uint32[:] reverse_argcummins = _chunk_wise_reverse_argcummin(
|
|
858
|
+
ordering, window
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
for seq_i in range(n_windows):
|
|
862
|
+
forward_argcummin = forward_argcummins[seq_i + window - 1]
|
|
863
|
+
reverse_argcummin = reverse_argcummins[seq_i]
|
|
864
|
+
forward_cummin = ordering[forward_argcummin]
|
|
865
|
+
reverse_cummin = ordering[reverse_argcummin]
|
|
866
|
+
|
|
867
|
+
# At ties the leftmost position is taken,
|
|
868
|
+
# which stems from the reverse pass
|
|
869
|
+
if forward_cummin < reverse_cummin:
|
|
870
|
+
combined_argcummin = forward_argcummin
|
|
871
|
+
else:
|
|
872
|
+
combined_argcummin = reverse_argcummin
|
|
873
|
+
|
|
874
|
+
# If the same minimizer position was observed before, the
|
|
875
|
+
# duplicate is simply ignored, if 'include_duplicates' is false
|
|
876
|
+
if include_duplicates or combined_argcummin != prev_argcummin:
|
|
877
|
+
# Append minimizer to return value
|
|
878
|
+
mininizer_pos[n_minimizers] = combined_argcummin
|
|
879
|
+
minimizers[n_minimizers] = kmers[combined_argcummin]
|
|
880
|
+
n_minimizers += 1
|
|
881
|
+
prev_argcummin = combined_argcummin
|
|
882
|
+
|
|
883
|
+
return (
|
|
884
|
+
np.asarray(mininizer_pos)[:n_minimizers],
|
|
885
|
+
np.asarray(minimizers)[:n_minimizers]
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
@cython.boundscheck(False)
|
|
889
|
+
@cython.wraparound(False)
|
|
890
|
+
@cython.cdivision(True)
|
|
891
|
+
cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size):
|
|
892
|
+
"""
|
|
893
|
+
Argument of the cumulative minimum.
|
|
894
|
+
"""
|
|
895
|
+
cdef uint32 seq_i
|
|
896
|
+
|
|
897
|
+
cdef uint32 current_min_i = 0
|
|
898
|
+
cdef int64 current_min, current_val
|
|
899
|
+
cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32)
|
|
900
|
+
|
|
901
|
+
# Any actual value will be smaller than this placeholder
|
|
902
|
+
current_min = MAX_INT_64
|
|
903
|
+
for seq_i in range(values.shape[0]):
|
|
904
|
+
if seq_i % chunk_size == 0:
|
|
905
|
+
# New chunk begins
|
|
906
|
+
current_min = MAX_INT_64
|
|
907
|
+
current_val = values[seq_i]
|
|
908
|
+
if current_val < current_min:
|
|
909
|
+
current_min_i = seq_i
|
|
910
|
+
current_min = current_val
|
|
911
|
+
min_pos[seq_i] = current_min_i
|
|
912
|
+
|
|
913
|
+
return min_pos
|
|
914
|
+
|
|
915
|
+
@cython.boundscheck(False)
|
|
916
|
+
@cython.wraparound(False)
|
|
917
|
+
@cython.cdivision(True)
|
|
918
|
+
cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size):
|
|
919
|
+
"""
|
|
920
|
+
The same as above but starting from the other end and iterating
|
|
921
|
+
backwards.
|
|
922
|
+
Separation into two functions leads to code duplication.
|
|
923
|
+
However, single implemention with reversed `values` as input
|
|
924
|
+
has some disadvantages:
|
|
925
|
+
|
|
926
|
+
- Indices must be transformed so that they point to the
|
|
927
|
+
non-reversed `values`
|
|
928
|
+
- There are issues in selecting the leftmost argument
|
|
929
|
+
- An offset is necessary to ensure alignment of chunks with forward
|
|
930
|
+
pass
|
|
931
|
+
|
|
932
|
+
Hence, a separate 'reverse' variant of the function was implemented.
|
|
933
|
+
"""
|
|
934
|
+
cdef uint32 seq_i
|
|
935
|
+
|
|
936
|
+
cdef uint32 current_min_i = 0
|
|
937
|
+
cdef int64 current_min, current_val
|
|
938
|
+
cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32)
|
|
939
|
+
|
|
940
|
+
current_min = MAX_INT_64
|
|
941
|
+
for seq_i in reversed(range(values.shape[0])):
|
|
942
|
+
# The chunk beginning is a small difference to forward
|
|
943
|
+
# implementation, as it begins on the left of the chunk border
|
|
944
|
+
if seq_i % chunk_size == chunk_size - 1:
|
|
945
|
+
current_min = MAX_INT_64
|
|
946
|
+
current_val = values[seq_i]
|
|
947
|
+
# The '<=' is a small difference to forward implementation
|
|
948
|
+
# to enure the loftmost argument is selected
|
|
949
|
+
if current_val <= current_min:
|
|
950
|
+
current_min_i = seq_i
|
|
951
|
+
current_min = current_val
|
|
952
|
+
min_pos[seq_i] = current_min_i
|
|
953
|
+
|
|
954
|
+
return min_pos
|