biotite 1.5.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-313-x86_64-linux-gnu.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.align"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["align_ungapped", "align_optimal"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
from .tracetable cimport follow_trace, get_trace_linear, get_trace_affine, \
|
|
12
|
+
TraceDirectionLinear, TraceDirectionAffine
|
|
13
|
+
|
|
14
|
+
from .alignment import Alignment
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
ctypedef np.int32_t int32
|
|
19
|
+
ctypedef np.int64_t int64
|
|
20
|
+
ctypedef np.uint8_t uint8
|
|
21
|
+
ctypedef np.uint16_t uint16
|
|
22
|
+
ctypedef np.uint32_t uint32
|
|
23
|
+
ctypedef np.uint64_t uint64
|
|
24
|
+
|
|
25
|
+
ctypedef fused CodeType1:
|
|
26
|
+
uint8
|
|
27
|
+
uint16
|
|
28
|
+
uint32
|
|
29
|
+
uint64
|
|
30
|
+
ctypedef fused CodeType2:
|
|
31
|
+
uint8
|
|
32
|
+
uint16
|
|
33
|
+
uint32
|
|
34
|
+
uint64
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def align_ungapped(seq1, seq2, matrix, score_only=False):
|
|
38
|
+
"""
|
|
39
|
+
align_ungapped(seq1, seq2, matrix, score_only=False)
|
|
40
|
+
|
|
41
|
+
Align two sequences without insertion of gaps.
|
|
42
|
+
|
|
43
|
+
Both sequences need to have the same length.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
seq1, seq2 : Sequence
|
|
48
|
+
The sequences, whose similarity should be scored.
|
|
49
|
+
matrix : SubstitutionMatrix
|
|
50
|
+
The substitution matrix used for scoring.
|
|
51
|
+
score_only : bool, optional
|
|
52
|
+
If true return only the score instead of an alignment.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
score : Alignment or int
|
|
57
|
+
The resulting trivial alignment. If `score_only` is set to true,
|
|
58
|
+
only the score is returned.
|
|
59
|
+
"""
|
|
60
|
+
if len(seq1) != len(seq2):
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"Different sequence lengths ({len(seq1):d} and {len(seq2):d})"
|
|
63
|
+
)
|
|
64
|
+
if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
|
|
65
|
+
or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
|
|
66
|
+
raise ValueError("The sequences' alphabets do not fit the matrix")
|
|
67
|
+
score = _add_scores(seq1.code, seq2.code, matrix.score_matrix())
|
|
68
|
+
if score_only:
|
|
69
|
+
return score
|
|
70
|
+
else:
|
|
71
|
+
# Sequences do not need to be actually aligned
|
|
72
|
+
# -> Create alignment with trivial trace
|
|
73
|
+
# [[0 0]
|
|
74
|
+
# [1 1]
|
|
75
|
+
# [2 2]
|
|
76
|
+
# ... ]
|
|
77
|
+
seq_length = len(seq1)
|
|
78
|
+
return Alignment(
|
|
79
|
+
sequences = [seq1, seq2],
|
|
80
|
+
trace = np.tile(np.arange(seq_length), 2)
|
|
81
|
+
.reshape(2, seq_length)
|
|
82
|
+
.transpose(),
|
|
83
|
+
score = score
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@cython.boundscheck(False)
|
|
88
|
+
@cython.wraparound(False)
|
|
89
|
+
def _add_scores(CodeType1[:] code1 not None,
|
|
90
|
+
CodeType2[:] code2 not None,
|
|
91
|
+
const int32[:,:] matrix not None):
|
|
92
|
+
cdef int32 score = 0
|
|
93
|
+
cdef int i
|
|
94
|
+
for i in range(code1.shape[0]):
|
|
95
|
+
score += matrix[code1[i], code2[i]]
|
|
96
|
+
return score
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def align_optimal(seq1, seq2, matrix, gap_penalty=-10,
|
|
100
|
+
terminal_penalty=True, local=False,
|
|
101
|
+
max_number=1000):
|
|
102
|
+
"""
|
|
103
|
+
align_optimal(seq1, seq2, matrix, gap_penalty=-10,
|
|
104
|
+
terminal_penalty=True, local=False, max_number=1000)
|
|
105
|
+
|
|
106
|
+
Perform an optimal alignment of two sequences based on a
|
|
107
|
+
dynamic programming algorithm.
|
|
108
|
+
|
|
109
|
+
This algorithm yields an optimal alignment, i.e. the sequences
|
|
110
|
+
are aligned in the way that results in the highest similarity
|
|
111
|
+
score. This operation can be very time and space consuming,
|
|
112
|
+
because both scale linearly with each sequence length.
|
|
113
|
+
|
|
114
|
+
The aligned sequences do not need to be instances from the same
|
|
115
|
+
:class:`Sequence` subclass, since they do not need to have the same
|
|
116
|
+
alphabet. The only requirement is that the
|
|
117
|
+
:class:`SubstitutionMatrix`' alphabets extend the alphabets of the
|
|
118
|
+
two sequences.
|
|
119
|
+
|
|
120
|
+
This function can either perform a global alignment, based on the
|
|
121
|
+
Needleman-Wunsch algorithm :footcite:`Needleman1970` or a local
|
|
122
|
+
alignment, based on the Smith–Waterman algorithm
|
|
123
|
+
:footcite:`Smith1981`.
|
|
124
|
+
|
|
125
|
+
Furthermore this function supports affine gap penalties using the
|
|
126
|
+
Gotoh algorithm :footcite:`Gotoh1982`, however, this requires
|
|
127
|
+
approximately 4 times the RAM space and execution time.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
seq1, seq2 : Sequence
|
|
132
|
+
The sequences to be aligned.
|
|
133
|
+
matrix : SubstitutionMatrix
|
|
134
|
+
The substitution matrix used for scoring.
|
|
135
|
+
gap_penalty : int or tuple(int, int), optional
|
|
136
|
+
If an integer is provided, the value will be interpreted as
|
|
137
|
+
linear gap penalty.
|
|
138
|
+
If a tuple is provided, an affine gap penalty is used.
|
|
139
|
+
The first integer in the tuple is the gap opening penalty,
|
|
140
|
+
the second integer is the gap extension penalty.
|
|
141
|
+
The values need to be negative.
|
|
142
|
+
terminal_penalty : bool, optional
|
|
143
|
+
If true, gap penalties are applied to terminal gaps.
|
|
144
|
+
If `local` is true, this parameter has no effect.
|
|
145
|
+
local : bool, optional
|
|
146
|
+
If false, a global alignment is performed, otherwise a local
|
|
147
|
+
alignment is performed.
|
|
148
|
+
max_number : int, optional
|
|
149
|
+
The maximum number of alignments returned.
|
|
150
|
+
When the number of branches exceeds this value in the traceback
|
|
151
|
+
step, no further branches are created.
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
alignments : list, type=Alignment
|
|
156
|
+
A list of alignments.
|
|
157
|
+
Each alignment in the list has the same maximum similarity
|
|
158
|
+
score.
|
|
159
|
+
|
|
160
|
+
See Also
|
|
161
|
+
--------
|
|
162
|
+
align_banded
|
|
163
|
+
|
|
164
|
+
References
|
|
165
|
+
----------
|
|
166
|
+
|
|
167
|
+
.. footbibliography::
|
|
168
|
+
|
|
169
|
+
Examples
|
|
170
|
+
--------
|
|
171
|
+
|
|
172
|
+
>>> seq1 = NucleotideSequence("ATACGCTTGCT")
|
|
173
|
+
>>> seq2 = NucleotideSequence("AGGCGCAGCT")
|
|
174
|
+
>>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
|
|
175
|
+
>>> ali = align_optimal(seq1, seq2, matrix, gap_penalty=-6)
|
|
176
|
+
>>> for a in ali:
|
|
177
|
+
... print(a, "\\n")
|
|
178
|
+
ATACGCTTGCT
|
|
179
|
+
AGGCGCA-GCT
|
|
180
|
+
<BLANKLINE>
|
|
181
|
+
ATACGCTTGCT
|
|
182
|
+
AGGCGC-AGCT
|
|
183
|
+
<BLANKLINE>
|
|
184
|
+
"""
|
|
185
|
+
# Check matrix alphabets
|
|
186
|
+
if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
|
|
187
|
+
or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
|
|
188
|
+
raise ValueError("The sequences' alphabets do not fit the matrix")
|
|
189
|
+
# Check if gap penalty is linear or affine
|
|
190
|
+
if type(gap_penalty) == int:
|
|
191
|
+
if gap_penalty > 0:
|
|
192
|
+
raise ValueError("Gap penalty must be negative")
|
|
193
|
+
affine_penalty = False
|
|
194
|
+
elif type(gap_penalty) == tuple:
|
|
195
|
+
if gap_penalty[0] > 0 or gap_penalty[1] > 0:
|
|
196
|
+
raise ValueError("Gap penalty must be negative")
|
|
197
|
+
affine_penalty = True
|
|
198
|
+
else:
|
|
199
|
+
raise TypeError("Gap penalty must be either integer or tuple")
|
|
200
|
+
# Check if max_number is reasonable
|
|
201
|
+
if max_number < 1:
|
|
202
|
+
raise ValueError(
|
|
203
|
+
"Maximum number of returned alignments must be at least 1"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# This implementation uses transposed tables in comparison
|
|
208
|
+
# to the common visualization
|
|
209
|
+
# This means the first sequence is one the left
|
|
210
|
+
# and the second sequence is at the top
|
|
211
|
+
trace_table = np.zeros(( len(seq1)+1, len(seq2)+1 ), dtype=np.uint8)
|
|
212
|
+
code1 = seq1.code
|
|
213
|
+
code2 = seq2.code
|
|
214
|
+
|
|
215
|
+
# Table filling
|
|
216
|
+
###############
|
|
217
|
+
if affine_penalty:
|
|
218
|
+
# Affine gap penalty
|
|
219
|
+
gap_open = gap_penalty[0]
|
|
220
|
+
gap_ext = gap_penalty[1]
|
|
221
|
+
# Value for negative infinity
|
|
222
|
+
# Used to prevent unallowed state transitions
|
|
223
|
+
# Subtraction of gap_open, gap_ext and lowest score value
|
|
224
|
+
# to prevent integer overflow
|
|
225
|
+
neg_inf = np.iinfo(np.int32).min - gap_open - gap_ext
|
|
226
|
+
min_score = np.min(matrix.score_matrix())
|
|
227
|
+
if min_score < 0:
|
|
228
|
+
neg_inf -= min_score
|
|
229
|
+
# m_table, g1_table and g2_table are the 3 score tables
|
|
230
|
+
m_table = np.zeros((len(seq1)+1, len(seq2)+1), dtype=np.int32)
|
|
231
|
+
# Fill with negative infinity values to prevent that an
|
|
232
|
+
# alignment trace starts with a gap extension
|
|
233
|
+
# instead of a gap opening
|
|
234
|
+
g1_table = np.full((len(seq1)+1, len(seq2)+1), neg_inf, dtype=np.int32)
|
|
235
|
+
g2_table = np.full((len(seq1)+1, len(seq2)+1), neg_inf, dtype=np.int32)
|
|
236
|
+
# Disallow trace coming from the match table on the
|
|
237
|
+
# left column/top row, as these represent terminal gaps
|
|
238
|
+
m_table [0, 1:] = neg_inf
|
|
239
|
+
m_table [1:, 0] = neg_inf
|
|
240
|
+
# Initialize first row and column for global alignments
|
|
241
|
+
if not local:
|
|
242
|
+
if terminal_penalty:
|
|
243
|
+
# Terminal gaps are penalized
|
|
244
|
+
# -> Penalties in first row/column
|
|
245
|
+
g1_table[0, 1:] = (np.arange(len(seq2)) * gap_ext) + gap_open
|
|
246
|
+
g2_table[1:, 0] = (np.arange(len(seq1)) * gap_ext) + gap_open
|
|
247
|
+
else:
|
|
248
|
+
g1_table[0, 1:] = np.zeros(len(seq2))
|
|
249
|
+
g2_table[1:, 0] = np.zeros(len(seq1))
|
|
250
|
+
trace_table[0, 1] = TraceDirectionAffine.MATCH_TO_GAP_LEFT
|
|
251
|
+
trace_table[0, 2:] = TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
|
|
252
|
+
trace_table[1, 0] = TraceDirectionAffine.MATCH_TO_GAP_TOP
|
|
253
|
+
trace_table[2: ,0] = TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
|
|
254
|
+
else:
|
|
255
|
+
g1_table[0, 1:] = np.zeros(len(seq2))
|
|
256
|
+
g2_table[1:, 0] = np.zeros(len(seq1))
|
|
257
|
+
_fill_align_table_affine(code1, code2,
|
|
258
|
+
matrix.score_matrix(), trace_table,
|
|
259
|
+
m_table, g1_table, g2_table,
|
|
260
|
+
gap_open, gap_ext, terminal_penalty, local)
|
|
261
|
+
else:
|
|
262
|
+
# Linear gap penalty
|
|
263
|
+
# The table for saving the scores
|
|
264
|
+
score_table = np.zeros(( len(seq1)+1, len(seq2)+1 ), dtype=np.int32)
|
|
265
|
+
# Initialize first row and column for global alignments
|
|
266
|
+
if not local:
|
|
267
|
+
if terminal_penalty:
|
|
268
|
+
# Terminal gaps are penalized
|
|
269
|
+
# -> Penalties in first row/column
|
|
270
|
+
score_table[:,0] = np.arange(len(seq1)+1) * gap_penalty
|
|
271
|
+
score_table[0,:] = np.arange(len(seq2)+1) * gap_penalty
|
|
272
|
+
trace_table[1:,0] = TraceDirectionLinear.GAP_TOP
|
|
273
|
+
trace_table[0,1:] = TraceDirectionLinear.GAP_LEFT
|
|
274
|
+
_fill_align_table(code1, code2, matrix.score_matrix(), trace_table,
|
|
275
|
+
score_table, gap_penalty, terminal_penalty, local)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# Traceback
|
|
279
|
+
###########
|
|
280
|
+
# Stores all possible traces (= possible alignments)
|
|
281
|
+
# A trace stores the indices of the aligned symbols
|
|
282
|
+
# in both sequences
|
|
283
|
+
trace_list = []
|
|
284
|
+
# Lists of trace starting indices
|
|
285
|
+
i_list = np.zeros(0, dtype=int)
|
|
286
|
+
j_list = np.zeros(0, dtype=int)
|
|
287
|
+
# List of start states
|
|
288
|
+
# State specifies the table the trace starts in
|
|
289
|
+
state_list = np.zeros(0, dtype=int)
|
|
290
|
+
if local:
|
|
291
|
+
# The start point is the maximal score in the table
|
|
292
|
+
# Multiple starting points possible,
|
|
293
|
+
# when duplicates of maximal score exist
|
|
294
|
+
if affine_penalty:
|
|
295
|
+
# The maximum score in the gap score tables do not need to
|
|
296
|
+
# be considered, as these starting positions would indicate
|
|
297
|
+
# that the local alignment starts with a gap
|
|
298
|
+
# Hence the maximum score value in these tables is always
|
|
299
|
+
# less than in the match table
|
|
300
|
+
max_score = np.max(m_table)
|
|
301
|
+
i_list, j_list = np.where((m_table == max_score))
|
|
302
|
+
state_list = np.append(state_list, np.full(len(i_list), 1))
|
|
303
|
+
else:
|
|
304
|
+
max_score = np.max(score_table)
|
|
305
|
+
i_list, j_list = np.where((score_table == max_score))
|
|
306
|
+
# State is always 0 for linear gap penalty
|
|
307
|
+
# since there is only one table
|
|
308
|
+
state_list = np.zeros(len(i_list), dtype=int)
|
|
309
|
+
else:
|
|
310
|
+
# The start point is the last element in the table
|
|
311
|
+
# -1 in start indices due to sequence offset mentioned before
|
|
312
|
+
i_start = trace_table.shape[0] -1
|
|
313
|
+
j_start = trace_table.shape[1] -1
|
|
314
|
+
if affine_penalty:
|
|
315
|
+
max_score = max(m_table[i_start,j_start],
|
|
316
|
+
g1_table[i_start,j_start],
|
|
317
|
+
g2_table[i_start,j_start])
|
|
318
|
+
if m_table[i_start,j_start] == max_score:
|
|
319
|
+
i_list = np.append(i_list, i_start)
|
|
320
|
+
j_list = np.append(j_list, j_start)
|
|
321
|
+
state_list = np.append(state_list, 1)
|
|
322
|
+
if g1_table[i_start,j_start] == max_score:
|
|
323
|
+
i_list = np.append(i_list, i_start)
|
|
324
|
+
j_list = np.append(j_list, j_start)
|
|
325
|
+
state_list = np.append(state_list, 2)
|
|
326
|
+
if g2_table[i_start,j_start] == max_score:
|
|
327
|
+
i_list = np.append(i_list, i_start)
|
|
328
|
+
j_list = np.append(j_list, j_start)
|
|
329
|
+
state_list = np.append(state_list, 3)
|
|
330
|
+
else:
|
|
331
|
+
i_list = np.append(i_list, i_start)
|
|
332
|
+
j_list = np.append(j_list, j_start)
|
|
333
|
+
state_list = np.append(state_list, 0)
|
|
334
|
+
max_score = score_table[i_start,j_start]
|
|
335
|
+
# Follow the traces specified in state and indices lists
|
|
336
|
+
cdef int curr_trace_count
|
|
337
|
+
for k in range(len(i_list)):
|
|
338
|
+
i_start = i_list[k]
|
|
339
|
+
j_start = j_list[k]
|
|
340
|
+
state_start = state_list[k]
|
|
341
|
+
# Pessimistic array allocation:
|
|
342
|
+
# The maximum trace length arises from an alignment, where each
|
|
343
|
+
# symbol is aligned to a gap
|
|
344
|
+
trace = np.full(( i_start+1 + j_start+1, 2 ), -1, dtype=np.int64)
|
|
345
|
+
curr_trace_count = 1
|
|
346
|
+
follow_trace(
|
|
347
|
+
trace_table, False, i_start, j_start, 0, trace, trace_list,
|
|
348
|
+
state=state_start, curr_trace_count=&curr_trace_count,
|
|
349
|
+
max_trace_count=max_number,
|
|
350
|
+
# Diagonals are only needed for banded alignments
|
|
351
|
+
lower_diag=0, upper_diag=0
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Replace gap entries in trace with -1
|
|
355
|
+
for i, trace in enumerate(trace_list):
|
|
356
|
+
trace = np.flip(trace, axis=0)
|
|
357
|
+
gap_filter = np.zeros(trace.shape, dtype=bool)
|
|
358
|
+
gap_filter[np.unique(trace[:,0], return_index=True)[1], 0] = True
|
|
359
|
+
gap_filter[np.unique(trace[:,1], return_index=True)[1], 1] = True
|
|
360
|
+
trace[~gap_filter] = -1
|
|
361
|
+
trace_list[i] = trace
|
|
362
|
+
|
|
363
|
+
# Limit the number of generated alignments to `max_number`:
|
|
364
|
+
# In most cases this is achieved by discarding branches in
|
|
365
|
+
# 'follow_trace()', however, if multiple local alignment starts
|
|
366
|
+
# are used, the number of created traces are the number of
|
|
367
|
+
# starts times `max_number`
|
|
368
|
+
trace_list = trace_list[:max_number]
|
|
369
|
+
return [Alignment([seq1, seq2], trace, max_score) for trace in trace_list]
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
@cython.boundscheck(False)
|
|
373
|
+
@cython.wraparound(False)
|
|
374
|
+
def _fill_align_table(CodeType1[:] code1 not None,
|
|
375
|
+
CodeType2[:] code2 not None,
|
|
376
|
+
const int32[:,:] matrix not None,
|
|
377
|
+
uint8[:,:] trace_table not None,
|
|
378
|
+
int32[:,:] score_table not None,
|
|
379
|
+
int gap_penalty,
|
|
380
|
+
bint term_penalty,
|
|
381
|
+
bint local):
|
|
382
|
+
"""
|
|
383
|
+
Fill an alignment table with linear gap penalty using dynamic
|
|
384
|
+
programming.
|
|
385
|
+
|
|
386
|
+
Parameters
|
|
387
|
+
----------
|
|
388
|
+
code1, code2
|
|
389
|
+
The sequence code of each sequence to be aligned.
|
|
390
|
+
matrix
|
|
391
|
+
The score matrix obtained from the :class:`SubstitutionMatrix`
|
|
392
|
+
object.
|
|
393
|
+
trace_table
|
|
394
|
+
A matrix containing values indicating the direction for the
|
|
395
|
+
traceback step.
|
|
396
|
+
The matrix is filled in this function
|
|
397
|
+
score_table
|
|
398
|
+
The alignment table.
|
|
399
|
+
The matrix is filled in this function.
|
|
400
|
+
gap_penalty
|
|
401
|
+
The linear gap penalty.
|
|
402
|
+
term_penalty
|
|
403
|
+
Indicates, whether terminal gaps should be penalized.
|
|
404
|
+
local
|
|
405
|
+
Indicates, whether a local alignment should be performed.
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
cdef int i, j
|
|
409
|
+
cdef int max_i, max_j
|
|
410
|
+
cdef int32 from_diag, from_left, from_top
|
|
411
|
+
cdef uint8 trace
|
|
412
|
+
cdef int32 score
|
|
413
|
+
|
|
414
|
+
# For local alignments terminal gaps on the right side are ignored
|
|
415
|
+
# anyway, as the alignment should stop before
|
|
416
|
+
if local:
|
|
417
|
+
term_penalty = True
|
|
418
|
+
# Used in case terminal gaps are not penalized
|
|
419
|
+
i_max = score_table.shape[0] -1
|
|
420
|
+
j_max = score_table.shape[1] -1
|
|
421
|
+
|
|
422
|
+
# Starts at 1 since the first row and column are already filled
|
|
423
|
+
for i in range(1, score_table.shape[0]):
|
|
424
|
+
for j in range(1, score_table.shape[1]):
|
|
425
|
+
# Evaluate score from diagonal direction
|
|
426
|
+
# -1 in sequence index is necessary
|
|
427
|
+
# due to the shift of the sequences
|
|
428
|
+
# to the bottom/right in the table
|
|
429
|
+
from_diag = score_table[i-1, j-1] + matrix[code1[i-1], code2[j-1]]
|
|
430
|
+
# Evaluate score from left direction
|
|
431
|
+
if not term_penalty and i == i_max:
|
|
432
|
+
from_left = score_table[i, j-1]
|
|
433
|
+
else:
|
|
434
|
+
from_left = score_table[i, j-1] + gap_penalty
|
|
435
|
+
# Evaluate score from top direction
|
|
436
|
+
if not term_penalty and j == j_max:
|
|
437
|
+
from_top = score_table[i-1, j]
|
|
438
|
+
else:
|
|
439
|
+
from_top = score_table[i-1, j] + gap_penalty
|
|
440
|
+
|
|
441
|
+
trace = get_trace_linear(from_diag, from_left, from_top, &score)
|
|
442
|
+
|
|
443
|
+
# Local alignment specialty:
|
|
444
|
+
# If score is less than or equal to 0,
|
|
445
|
+
# then the score of the cell remains 0
|
|
446
|
+
# and the trace ends here
|
|
447
|
+
if local == True and score <= 0:
|
|
448
|
+
continue
|
|
449
|
+
|
|
450
|
+
score_table[i,j] = score
|
|
451
|
+
trace_table[i,j] = trace
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
@cython.boundscheck(False)
|
|
455
|
+
@cython.wraparound(False)
|
|
456
|
+
def _fill_align_table_affine(CodeType1[:] code1 not None,
|
|
457
|
+
CodeType2[:] code2 not None,
|
|
458
|
+
const int32[:,:] matrix not None,
|
|
459
|
+
uint8[:,:] trace_table not None,
|
|
460
|
+
int32[:,:] m_table not None,
|
|
461
|
+
int32[:,:] g1_table not None,
|
|
462
|
+
int32[:,:] g2_table not None,
|
|
463
|
+
int gap_open,
|
|
464
|
+
int gap_ext,
|
|
465
|
+
bint term_penalty,
|
|
466
|
+
bint local):
|
|
467
|
+
"""
|
|
468
|
+
Fill an alignment table with affine gap penalty using dynamic
|
|
469
|
+
programming.
|
|
470
|
+
|
|
471
|
+
Parameters
|
|
472
|
+
----------
|
|
473
|
+
code1, code2
|
|
474
|
+
The sequence code of each sequence to be aligned.
|
|
475
|
+
matrix
|
|
476
|
+
The score matrix obtained from the class:`SubstitutionMatrix`
|
|
477
|
+
object.
|
|
478
|
+
trace_table
|
|
479
|
+
A matrix containing values indicating the direction for the
|
|
480
|
+
traceback step.
|
|
481
|
+
The matrix is filled in this function.
|
|
482
|
+
m_table, g1_table, g2_table
|
|
483
|
+
The alignment tables containing the scores.
|
|
484
|
+
`m_table` contains values for matches.
|
|
485
|
+
`g1_table` contains values for gaps in the first sequence.
|
|
486
|
+
`g2_table` contains values for gaps in the second sequence.
|
|
487
|
+
The matrix is filled in this function.
|
|
488
|
+
gap_open
|
|
489
|
+
The gap opening penalty.
|
|
490
|
+
gap_ext
|
|
491
|
+
The gap extension penalty.
|
|
492
|
+
term_penalty
|
|
493
|
+
Indicates, whether terminal gaps should be penalized.
|
|
494
|
+
local
|
|
495
|
+
Indicates, whether a local alignment should be performed.
|
|
496
|
+
"""
|
|
497
|
+
|
|
498
|
+
cdef int i, j
|
|
499
|
+
cdef int max_i, max_j
|
|
500
|
+
cdef int32 mm_score, g1m_score, g2m_score
|
|
501
|
+
cdef int32 mg1_score, g1g1_score
|
|
502
|
+
cdef int32 mg2_score, g2g2_score
|
|
503
|
+
cdef int32 m_score, g1_score, g2_score
|
|
504
|
+
cdef int32 similarity_score
|
|
505
|
+
cdef uint8 trace
|
|
506
|
+
|
|
507
|
+
# For local alignments terminal gaps on the right and the bottom are
|
|
508
|
+
# ignored anyway, as the alignment should stop before
|
|
509
|
+
if local:
|
|
510
|
+
term_penalty = True
|
|
511
|
+
# Used in case terminal gaps are not penalized
|
|
512
|
+
i_max = trace_table.shape[0] -1
|
|
513
|
+
j_max = trace_table.shape[1] -1
|
|
514
|
+
|
|
515
|
+
# Starts at 1 since the first row and column are already filled
|
|
516
|
+
for i in range(1, trace_table.shape[0]):
|
|
517
|
+
for j in range(1, trace_table.shape[1]):
|
|
518
|
+
# Calculate the scores for possible transitions
|
|
519
|
+
# into the current cell
|
|
520
|
+
similarity_score = matrix[code1[i-1], code2[j-1]]
|
|
521
|
+
mm_score = m_table[i-1,j-1] + similarity_score
|
|
522
|
+
g1m_score = g1_table[i-1,j-1] + similarity_score
|
|
523
|
+
g2m_score = g2_table[i-1,j-1] + similarity_score
|
|
524
|
+
# No transition from g1_table to g2_table and vice versa
|
|
525
|
+
# Since this would mean adjacent gaps in both sequences
|
|
526
|
+
# A substitution makes more sense in this case
|
|
527
|
+
if not term_penalty and i == i_max:
|
|
528
|
+
mg1_score = m_table[i,j-1]
|
|
529
|
+
g1g1_score = g1_table[i,j-1]
|
|
530
|
+
else:
|
|
531
|
+
mg1_score = m_table[i,j-1] + gap_open
|
|
532
|
+
g1g1_score = g1_table[i,j-1] + gap_ext
|
|
533
|
+
if not term_penalty and j == j_max:
|
|
534
|
+
mg2_score = m_table[i-1,j]
|
|
535
|
+
g2g2_score = g2_table[i-1,j]
|
|
536
|
+
else:
|
|
537
|
+
mg2_score = m_table[i-1,j] + gap_open
|
|
538
|
+
g2g2_score = g2_table[i-1,j] + gap_ext
|
|
539
|
+
|
|
540
|
+
trace = get_trace_affine(
|
|
541
|
+
mm_score, g1m_score, g2m_score,
|
|
542
|
+
mg1_score, g1g1_score,
|
|
543
|
+
mg2_score, g2g2_score,
|
|
544
|
+
# The max score values to be written
|
|
545
|
+
&m_score, &g1_score, &g2_score
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
# Fill values into tables
|
|
549
|
+
# Local alignment specialty:
|
|
550
|
+
# If score is less than or equal to 0,
|
|
551
|
+
# then the score of the cell remains 0
|
|
552
|
+
# and the trace ends here
|
|
553
|
+
if local == True:
|
|
554
|
+
if m_score <= 0:
|
|
555
|
+
# End trace in specific table
|
|
556
|
+
# by filtering out the respective bits
|
|
557
|
+
trace &= ~(
|
|
558
|
+
TraceDirectionAffine.MATCH_TO_MATCH |
|
|
559
|
+
TraceDirectionAffine.GAP_LEFT_TO_MATCH |
|
|
560
|
+
TraceDirectionAffine.GAP_TOP_TO_MATCH
|
|
561
|
+
)
|
|
562
|
+
# m_table[i,j] remains 0
|
|
563
|
+
else:
|
|
564
|
+
m_table[i,j] = m_score
|
|
565
|
+
if g1_score <= 0:
|
|
566
|
+
trace &= ~(
|
|
567
|
+
TraceDirectionAffine.MATCH_TO_GAP_LEFT |
|
|
568
|
+
TraceDirectionAffine.GAP_LEFT_TO_GAP_LEFT
|
|
569
|
+
)
|
|
570
|
+
# g1_table[i,j] remains negative infinity
|
|
571
|
+
else:
|
|
572
|
+
g1_table[i,j] = g1_score
|
|
573
|
+
if g2_score <= 0:
|
|
574
|
+
trace &= ~(
|
|
575
|
+
TraceDirectionAffine.MATCH_TO_GAP_TOP |
|
|
576
|
+
TraceDirectionAffine.GAP_TOP_TO_GAP_TOP
|
|
577
|
+
)
|
|
578
|
+
# g2_table[i,j] remains negative infinity
|
|
579
|
+
else:
|
|
580
|
+
g2_table[i,j] = g2_score
|
|
581
|
+
else:
|
|
582
|
+
m_table[i,j] = m_score
|
|
583
|
+
g1_table[i,j] = g1_score
|
|
584
|
+
g2_table[i,j] = g2_score
|
|
585
|
+
trace_table[i,j] = trace
|
|
Binary file
|