biotite 1.5.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +18 -0
- biotite/application/__init__.py +69 -0
- biotite/application/application.py +276 -0
- biotite/application/autodock/__init__.py +12 -0
- biotite/application/autodock/app.py +500 -0
- biotite/application/blast/__init__.py +14 -0
- biotite/application/blast/alignment.py +92 -0
- biotite/application/blast/webapp.py +428 -0
- biotite/application/clustalo/__init__.py +12 -0
- biotite/application/clustalo/app.py +223 -0
- biotite/application/dssp/__init__.py +12 -0
- biotite/application/dssp/app.py +216 -0
- biotite/application/localapp.py +342 -0
- biotite/application/mafft/__init__.py +12 -0
- biotite/application/mafft/app.py +116 -0
- biotite/application/msaapp.py +363 -0
- biotite/application/muscle/__init__.py +13 -0
- biotite/application/muscle/app3.py +227 -0
- biotite/application/muscle/app5.py +163 -0
- biotite/application/sra/__init__.py +18 -0
- biotite/application/sra/app.py +447 -0
- biotite/application/tantan/__init__.py +12 -0
- biotite/application/tantan/app.py +199 -0
- biotite/application/util.py +77 -0
- biotite/application/viennarna/__init__.py +18 -0
- biotite/application/viennarna/rnaalifold.py +310 -0
- biotite/application/viennarna/rnafold.py +254 -0
- biotite/application/viennarna/rnaplot.py +208 -0
- biotite/application/viennarna/util.py +77 -0
- biotite/application/webapp.py +76 -0
- biotite/copyable.py +71 -0
- biotite/database/__init__.py +23 -0
- biotite/database/afdb/__init__.py +12 -0
- biotite/database/afdb/download.py +197 -0
- biotite/database/entrez/__init__.py +15 -0
- biotite/database/entrez/check.py +60 -0
- biotite/database/entrez/dbnames.py +101 -0
- biotite/database/entrez/download.py +228 -0
- biotite/database/entrez/key.py +44 -0
- biotite/database/entrez/query.py +263 -0
- biotite/database/error.py +16 -0
- biotite/database/pubchem/__init__.py +21 -0
- biotite/database/pubchem/download.py +258 -0
- biotite/database/pubchem/error.py +30 -0
- biotite/database/pubchem/query.py +819 -0
- biotite/database/pubchem/throttle.py +98 -0
- biotite/database/rcsb/__init__.py +13 -0
- biotite/database/rcsb/download.py +161 -0
- biotite/database/rcsb/query.py +963 -0
- biotite/database/uniprot/__init__.py +13 -0
- biotite/database/uniprot/check.py +40 -0
- biotite/database/uniprot/download.py +126 -0
- biotite/database/uniprot/query.py +292 -0
- biotite/file.py +244 -0
- biotite/interface/__init__.py +19 -0
- biotite/interface/openmm/__init__.py +20 -0
- biotite/interface/openmm/state.py +93 -0
- biotite/interface/openmm/system.py +227 -0
- biotite/interface/pymol/__init__.py +201 -0
- biotite/interface/pymol/cgo.py +346 -0
- biotite/interface/pymol/convert.py +185 -0
- biotite/interface/pymol/display.py +267 -0
- biotite/interface/pymol/object.py +1228 -0
- biotite/interface/pymol/shapes.py +178 -0
- biotite/interface/pymol/startup.py +169 -0
- biotite/interface/rdkit/__init__.py +19 -0
- biotite/interface/rdkit/mol.py +490 -0
- biotite/interface/version.py +94 -0
- biotite/interface/warning.py +19 -0
- biotite/sequence/__init__.py +84 -0
- biotite/sequence/align/__init__.py +199 -0
- biotite/sequence/align/alignment.py +702 -0
- biotite/sequence/align/banded.cpython-314-darwin.so +0 -0
- biotite/sequence/align/banded.pyx +652 -0
- biotite/sequence/align/buckets.py +71 -0
- biotite/sequence/align/cigar.py +425 -0
- biotite/sequence/align/kmeralphabet.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +595 -0
- biotite/sequence/align/kmersimilarity.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.pyx +233 -0
- biotite/sequence/align/kmertable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3411 -0
- biotite/sequence/align/localgapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localgapped.pyx +892 -0
- biotite/sequence/align/localungapped.cpython-314-darwin.so +0 -0
- biotite/sequence/align/localungapped.pyx +279 -0
- biotite/sequence/align/matrix.py +631 -0
- biotite/sequence/align/matrix_data/3Di.mat +24 -0
- biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
- biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
- biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
- biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
- biotite/sequence/align/matrix_data/GONNET.mat +26 -0
- biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
- biotite/sequence/align/matrix_data/MATCH.mat +25 -0
- biotite/sequence/align/matrix_data/NUC.mat +25 -0
- biotite/sequence/align/matrix_data/PAM10.mat +34 -0
- biotite/sequence/align/matrix_data/PAM100.mat +34 -0
- biotite/sequence/align/matrix_data/PAM110.mat +34 -0
- biotite/sequence/align/matrix_data/PAM120.mat +34 -0
- biotite/sequence/align/matrix_data/PAM130.mat +34 -0
- biotite/sequence/align/matrix_data/PAM140.mat +34 -0
- biotite/sequence/align/matrix_data/PAM150.mat +34 -0
- biotite/sequence/align/matrix_data/PAM160.mat +34 -0
- biotite/sequence/align/matrix_data/PAM170.mat +34 -0
- biotite/sequence/align/matrix_data/PAM180.mat +34 -0
- biotite/sequence/align/matrix_data/PAM190.mat +34 -0
- biotite/sequence/align/matrix_data/PAM20.mat +34 -0
- biotite/sequence/align/matrix_data/PAM200.mat +34 -0
- biotite/sequence/align/matrix_data/PAM210.mat +34 -0
- biotite/sequence/align/matrix_data/PAM220.mat +34 -0
- biotite/sequence/align/matrix_data/PAM230.mat +34 -0
- biotite/sequence/align/matrix_data/PAM240.mat +34 -0
- biotite/sequence/align/matrix_data/PAM250.mat +34 -0
- biotite/sequence/align/matrix_data/PAM260.mat +34 -0
- biotite/sequence/align/matrix_data/PAM270.mat +34 -0
- biotite/sequence/align/matrix_data/PAM280.mat +34 -0
- biotite/sequence/align/matrix_data/PAM290.mat +34 -0
- biotite/sequence/align/matrix_data/PAM30.mat +34 -0
- biotite/sequence/align/matrix_data/PAM300.mat +34 -0
- biotite/sequence/align/matrix_data/PAM310.mat +34 -0
- biotite/sequence/align/matrix_data/PAM320.mat +34 -0
- biotite/sequence/align/matrix_data/PAM330.mat +34 -0
- biotite/sequence/align/matrix_data/PAM340.mat +34 -0
- biotite/sequence/align/matrix_data/PAM350.mat +34 -0
- biotite/sequence/align/matrix_data/PAM360.mat +34 -0
- biotite/sequence/align/matrix_data/PAM370.mat +34 -0
- biotite/sequence/align/matrix_data/PAM380.mat +34 -0
- biotite/sequence/align/matrix_data/PAM390.mat +34 -0
- biotite/sequence/align/matrix_data/PAM40.mat +34 -0
- biotite/sequence/align/matrix_data/PAM400.mat +34 -0
- biotite/sequence/align/matrix_data/PAM410.mat +34 -0
- biotite/sequence/align/matrix_data/PAM420.mat +34 -0
- biotite/sequence/align/matrix_data/PAM430.mat +34 -0
- biotite/sequence/align/matrix_data/PAM440.mat +34 -0
- biotite/sequence/align/matrix_data/PAM450.mat +34 -0
- biotite/sequence/align/matrix_data/PAM460.mat +34 -0
- biotite/sequence/align/matrix_data/PAM470.mat +34 -0
- biotite/sequence/align/matrix_data/PAM480.mat +34 -0
- biotite/sequence/align/matrix_data/PAM490.mat +34 -0
- biotite/sequence/align/matrix_data/PAM50.mat +34 -0
- biotite/sequence/align/matrix_data/PAM500.mat +34 -0
- biotite/sequence/align/matrix_data/PAM60.mat +34 -0
- biotite/sequence/align/matrix_data/PAM70.mat +34 -0
- biotite/sequence/align/matrix_data/PAM80.mat +34 -0
- biotite/sequence/align/matrix_data/PAM90.mat +34 -0
- biotite/sequence/align/matrix_data/PB.license +21 -0
- biotite/sequence/align/matrix_data/PB.mat +18 -0
- biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
- biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
- biotite/sequence/align/multiple.cpython-314-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +619 -0
- biotite/sequence/align/pairwise.cpython-314-darwin.so +0 -0
- biotite/sequence/align/pairwise.pyx +585 -0
- biotite/sequence/align/permutation.cpython-314-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +313 -0
- biotite/sequence/align/primes.txt +821 -0
- biotite/sequence/align/selector.cpython-314-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +954 -0
- biotite/sequence/align/statistics.py +264 -0
- biotite/sequence/align/tracetable.cpython-314-darwin.so +0 -0
- biotite/sequence/align/tracetable.pxd +64 -0
- biotite/sequence/align/tracetable.pyx +370 -0
- biotite/sequence/alphabet.py +555 -0
- biotite/sequence/annotation.py +836 -0
- biotite/sequence/codec.cpython-314-darwin.so +0 -0
- biotite/sequence/codec.pyx +155 -0
- biotite/sequence/codon.py +476 -0
- biotite/sequence/codon_tables.txt +202 -0
- biotite/sequence/graphics/__init__.py +33 -0
- biotite/sequence/graphics/alignment.py +1101 -0
- biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
- biotite/sequence/graphics/color_schemes/autumn.json +51 -0
- biotite/sequence/graphics/color_schemes/blossom.json +51 -0
- biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
- biotite/sequence/graphics/color_schemes/flower.json +51 -0
- biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
- biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
- biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
- biotite/sequence/graphics/color_schemes/ocean.json +51 -0
- biotite/sequence/graphics/color_schemes/pb_flower.json +40 -0
- biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
- biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
- biotite/sequence/graphics/color_schemes/spring.json +51 -0
- biotite/sequence/graphics/color_schemes/sunset.json +51 -0
- biotite/sequence/graphics/color_schemes/wither.json +51 -0
- biotite/sequence/graphics/colorschemes.py +170 -0
- biotite/sequence/graphics/dendrogram.py +231 -0
- biotite/sequence/graphics/features.py +544 -0
- biotite/sequence/graphics/logo.py +102 -0
- biotite/sequence/graphics/plasmid.py +712 -0
- biotite/sequence/io/__init__.py +12 -0
- biotite/sequence/io/fasta/__init__.py +22 -0
- biotite/sequence/io/fasta/convert.py +283 -0
- biotite/sequence/io/fasta/file.py +265 -0
- biotite/sequence/io/fastq/__init__.py +19 -0
- biotite/sequence/io/fastq/convert.py +117 -0
- biotite/sequence/io/fastq/file.py +507 -0
- biotite/sequence/io/genbank/__init__.py +17 -0
- biotite/sequence/io/genbank/annotation.py +269 -0
- biotite/sequence/io/genbank/file.py +573 -0
- biotite/sequence/io/genbank/metadata.py +336 -0
- biotite/sequence/io/genbank/sequence.py +173 -0
- biotite/sequence/io/general.py +201 -0
- biotite/sequence/io/gff/__init__.py +26 -0
- biotite/sequence/io/gff/convert.py +128 -0
- biotite/sequence/io/gff/file.py +449 -0
- biotite/sequence/phylo/__init__.py +36 -0
- biotite/sequence/phylo/nj.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/nj.pyx +221 -0
- biotite/sequence/phylo/tree.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/tree.pyx +1169 -0
- biotite/sequence/phylo/upgma.cpython-314-darwin.so +0 -0
- biotite/sequence/phylo/upgma.pyx +164 -0
- biotite/sequence/profile.py +561 -0
- biotite/sequence/search.py +117 -0
- biotite/sequence/seqtypes.py +720 -0
- biotite/sequence/sequence.py +373 -0
- biotite/setup_ccd.py +197 -0
- biotite/structure/__init__.py +135 -0
- biotite/structure/alphabet/__init__.py +25 -0
- biotite/structure/alphabet/encoder.py +332 -0
- biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
- biotite/structure/alphabet/i3d.py +109 -0
- biotite/structure/alphabet/layers.py +86 -0
- biotite/structure/alphabet/pb.license +21 -0
- biotite/structure/alphabet/pb.py +170 -0
- biotite/structure/alphabet/unkerasify.py +128 -0
- biotite/structure/atoms.py +1562 -0
- biotite/structure/basepairs.py +1403 -0
- biotite/structure/bonds.cpython-314-darwin.so +0 -0
- biotite/structure/bonds.pyx +2036 -0
- biotite/structure/box.py +724 -0
- biotite/structure/celllist.cpython-314-darwin.so +0 -0
- biotite/structure/celllist.pyx +864 -0
- biotite/structure/chains.py +310 -0
- biotite/structure/charges.cpython-314-darwin.so +0 -0
- biotite/structure/charges.pyx +520 -0
- biotite/structure/compare.py +683 -0
- biotite/structure/density.py +109 -0
- biotite/structure/dotbracket.py +213 -0
- biotite/structure/error.py +39 -0
- biotite/structure/filter.py +591 -0
- biotite/structure/geometry.py +817 -0
- biotite/structure/graphics/__init__.py +13 -0
- biotite/structure/graphics/atoms.py +243 -0
- biotite/structure/graphics/rna.py +298 -0
- biotite/structure/hbond.py +425 -0
- biotite/structure/info/__init__.py +24 -0
- biotite/structure/info/atom_masses.json +121 -0
- biotite/structure/info/atoms.py +98 -0
- biotite/structure/info/bonds.py +149 -0
- biotite/structure/info/ccd.py +200 -0
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/info/groups.py +128 -0
- biotite/structure/info/masses.py +121 -0
- biotite/structure/info/misc.py +137 -0
- biotite/structure/info/radii.py +267 -0
- biotite/structure/info/standardize.py +185 -0
- biotite/structure/integrity.py +213 -0
- biotite/structure/io/__init__.py +29 -0
- biotite/structure/io/dcd/__init__.py +13 -0
- biotite/structure/io/dcd/file.py +67 -0
- biotite/structure/io/general.py +243 -0
- biotite/structure/io/gro/__init__.py +14 -0
- biotite/structure/io/gro/file.py +343 -0
- biotite/structure/io/mol/__init__.py +20 -0
- biotite/structure/io/mol/convert.py +112 -0
- biotite/structure/io/mol/ctab.py +420 -0
- biotite/structure/io/mol/header.py +120 -0
- biotite/structure/io/mol/mol.py +149 -0
- biotite/structure/io/mol/sdf.py +940 -0
- biotite/structure/io/netcdf/__init__.py +13 -0
- biotite/structure/io/netcdf/file.py +64 -0
- biotite/structure/io/pdb/__init__.py +20 -0
- biotite/structure/io/pdb/convert.py +389 -0
- biotite/structure/io/pdb/file.py +1380 -0
- biotite/structure/io/pdb/hybrid36.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdb/hybrid36.pyx +242 -0
- biotite/structure/io/pdbqt/__init__.py +15 -0
- biotite/structure/io/pdbqt/convert.py +113 -0
- biotite/structure/io/pdbqt/file.py +688 -0
- biotite/structure/io/pdbx/__init__.py +23 -0
- biotite/structure/io/pdbx/bcif.py +674 -0
- biotite/structure/io/pdbx/cif.py +1091 -0
- biotite/structure/io/pdbx/component.py +251 -0
- biotite/structure/io/pdbx/compress.py +362 -0
- biotite/structure/io/pdbx/convert.py +2113 -0
- biotite/structure/io/pdbx/encoding.cpython-314-darwin.so +0 -0
- biotite/structure/io/pdbx/encoding.pyx +1078 -0
- biotite/structure/io/trajfile.py +696 -0
- biotite/structure/io/trr/__init__.py +13 -0
- biotite/structure/io/trr/file.py +43 -0
- biotite/structure/io/util.py +38 -0
- biotite/structure/io/xtc/__init__.py +13 -0
- biotite/structure/io/xtc/file.py +43 -0
- biotite/structure/mechanics.py +72 -0
- biotite/structure/molecules.py +337 -0
- biotite/structure/pseudoknots.py +622 -0
- biotite/structure/rdf.py +245 -0
- biotite/structure/repair.py +302 -0
- biotite/structure/residues.py +716 -0
- biotite/structure/rings.py +451 -0
- biotite/structure/sasa.cpython-314-darwin.so +0 -0
- biotite/structure/sasa.pyx +322 -0
- biotite/structure/segments.py +328 -0
- biotite/structure/sequence.py +110 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/sse.py +306 -0
- biotite/structure/superimpose.py +511 -0
- biotite/structure/tm.py +581 -0
- biotite/structure/transform.py +736 -0
- biotite/structure/util.py +160 -0
- biotite/version.py +34 -0
- biotite/visualize.py +375 -0
- biotite-1.5.0.dist-info/METADATA +162 -0
- biotite-1.5.0.dist-info/RECORD +354 -0
- biotite-1.5.0.dist-info/WHEEL +6 -0
- biotite-1.5.0.dist-info/licenses/LICENSE.rst +30 -0
|
Binary file
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# This source code is part of the Biotite package and is distributed
|
|
2
|
+
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
+
# information.
|
|
4
|
+
|
|
5
|
+
__name__ = "biotite.sequence.align"
|
|
6
|
+
__author__ = "Patrick Kunzmann"
|
|
7
|
+
__all__ = ["align_local_ungapped"]
|
|
8
|
+
|
|
9
|
+
cimport cython
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
from .alignment import Alignment
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
ctypedef np.int32_t int32
|
|
17
|
+
ctypedef np.int64_t int64
|
|
18
|
+
ctypedef np.uint8_t uint8
|
|
19
|
+
ctypedef np.uint16_t uint16
|
|
20
|
+
ctypedef np.uint32_t uint32
|
|
21
|
+
ctypedef np.uint64_t uint64
|
|
22
|
+
|
|
23
|
+
ctypedef fused CodeType1:
|
|
24
|
+
uint8
|
|
25
|
+
uint16
|
|
26
|
+
uint32
|
|
27
|
+
uint64
|
|
28
|
+
ctypedef fused CodeType2:
|
|
29
|
+
uint8
|
|
30
|
+
uint16
|
|
31
|
+
uint32
|
|
32
|
+
uint64
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def align_local_ungapped(seq1, seq2, matrix, seed, int32 threshold,
|
|
36
|
+
str direction="both", bint score_only=False,
|
|
37
|
+
bint check_matrix=True):
|
|
38
|
+
"""
|
|
39
|
+
align_local_ungapped(seq1, seq2, matrix, seed, threshold,
|
|
40
|
+
direction="both", score_only=False, check_matrix=True)
|
|
41
|
+
|
|
42
|
+
Perform a local alignment extending from given `seed` position
|
|
43
|
+
without inserting gaps.
|
|
44
|
+
|
|
45
|
+
The alignment extends into one or both directions (controlled by
|
|
46
|
+
`direction`) until the total alignment score falls more than
|
|
47
|
+
`threshold` below the maximum score found (*X-Drop*).
|
|
48
|
+
The returned alignment contains the range that yielded the maximum
|
|
49
|
+
score.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
seq1, seq2 : Sequence
|
|
54
|
+
The sequences to be aligned.
|
|
55
|
+
The sequences do not need to have the same alphabets, as long as
|
|
56
|
+
the two alphabets of `matrix` extend the alphabets of the two
|
|
57
|
+
sequences.
|
|
58
|
+
matrix : SubstitutionMatrix
|
|
59
|
+
The substitution matrix used for scoring.
|
|
60
|
+
seed : tuple(int, int)
|
|
61
|
+
The indices in `seq1` and `seq2` where the local alignment
|
|
62
|
+
starts.
|
|
63
|
+
The indices must be non-negative.
|
|
64
|
+
threshold : int
|
|
65
|
+
If the current score falls this value below the maximum score
|
|
66
|
+
found, the alignment terminates.
|
|
67
|
+
direction : {'both', 'upstream', 'downstream'}, optional
|
|
68
|
+
Controls in which direction the alignment extends starting
|
|
69
|
+
from the seed.
|
|
70
|
+
If ``'upstream'``, the alignment starts before the `seed` and
|
|
71
|
+
ends at the `seed`.
|
|
72
|
+
If ``'downstream'``, the alignment starts at the `seed` and
|
|
73
|
+
ends behind the `seed`.
|
|
74
|
+
If ``'both'`` (default) the alignment starts before the `seed`
|
|
75
|
+
and ends behind the `seed`.
|
|
76
|
+
The `seed` position itself is always included in the alignment.
|
|
77
|
+
score_only : bool, optional
|
|
78
|
+
If set to ``True``, only the similarity score is returned
|
|
79
|
+
instead of the :class:`Alignment`, decreasing the runtime
|
|
80
|
+
substantially.
|
|
81
|
+
check_matrix : bool, optional
|
|
82
|
+
If set to False, the `matrix` is not checked for compatibility
|
|
83
|
+
with the alphabets of the sequences.
|
|
84
|
+
Due to the small overall runtime of the function, this can increase
|
|
85
|
+
performance substantially.
|
|
86
|
+
However, unexpected results or crashes may occur, if an
|
|
87
|
+
incompatible `matrix` is given.
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
alignment : Alignment
|
|
93
|
+
The resulting ungapped alignment.
|
|
94
|
+
Only returned, if `score_only` is ``False``.
|
|
95
|
+
score : int
|
|
96
|
+
The alignment similarity score.
|
|
97
|
+
Only returned, if `score_only` is ``True``.
|
|
98
|
+
|
|
99
|
+
See Also
|
|
100
|
+
--------
|
|
101
|
+
align_gapped
|
|
102
|
+
For gapped local alignments with the same *X-Drop* technique.
|
|
103
|
+
|
|
104
|
+
Examples
|
|
105
|
+
--------
|
|
106
|
+
|
|
107
|
+
>>> seq1 = ProteinSequence("BIQTITE")
|
|
108
|
+
>>> seq2 = ProteinSequence("PYRRHQTITE")
|
|
109
|
+
>>> matrix = SubstitutionMatrix.std_protein_matrix()
|
|
110
|
+
>>> alignment = align_local_ungapped(seq1, seq2, matrix, seed=(4,7), threshold=10)
|
|
111
|
+
>>> print(alignment)
|
|
112
|
+
QTITE
|
|
113
|
+
QTITE
|
|
114
|
+
>>> alignment = align_local_ungapped(seq1, seq2, matrix, (4,7), 10, direction="upstream")
|
|
115
|
+
>>> print(alignment)
|
|
116
|
+
QTI
|
|
117
|
+
QTI
|
|
118
|
+
>>> alignment = align_local_ungapped(seq1, seq2, matrix, (4,7), 10, direction="downstream")
|
|
119
|
+
>>> print(alignment)
|
|
120
|
+
ITE
|
|
121
|
+
ITE
|
|
122
|
+
>>> score = align_local_ungapped(seq1, seq2, matrix, (4,7), 10, score_only=True)
|
|
123
|
+
>>> print(score)
|
|
124
|
+
24
|
|
125
|
+
"""
|
|
126
|
+
if check_matrix:
|
|
127
|
+
if not matrix.get_alphabet1().extends(seq1.get_alphabet()) \
|
|
128
|
+
or not matrix.get_alphabet2().extends(seq2.get_alphabet()):
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"The sequences' alphabets do not fit the matrix"
|
|
131
|
+
)
|
|
132
|
+
cdef const int32[:,:] score_matrix = matrix.score_matrix()
|
|
133
|
+
|
|
134
|
+
cdef bint upstream
|
|
135
|
+
cdef bint downstream
|
|
136
|
+
if direction == "both":
|
|
137
|
+
upstream = True
|
|
138
|
+
downstream = True
|
|
139
|
+
elif direction == "upstream":
|
|
140
|
+
upstream = True
|
|
141
|
+
downstream = False
|
|
142
|
+
elif direction == "downstream":
|
|
143
|
+
upstream = False
|
|
144
|
+
downstream = True
|
|
145
|
+
else:
|
|
146
|
+
raise ValueError(f"Direction '{direction}' is invalid")
|
|
147
|
+
|
|
148
|
+
if threshold < 0:
|
|
149
|
+
raise ValueError("The threshold value must be a non-negative integer")
|
|
150
|
+
|
|
151
|
+
cdef int seq1_start, seq2_start
|
|
152
|
+
seq1_start, seq2_start = seed
|
|
153
|
+
if seq1_start < 0 or seq2_start < 0:
|
|
154
|
+
raise IndexError("Seed must contain positive indices")
|
|
155
|
+
|
|
156
|
+
cdef np.ndarray code1 = seq1.code
|
|
157
|
+
cdef np.ndarray code2 = seq2.code
|
|
158
|
+
# For C- function call of the '_seed_extend_uint8()' function
|
|
159
|
+
# for the common case
|
|
160
|
+
# This gives significant performance increase since the
|
|
161
|
+
# seed extend itself runs fast
|
|
162
|
+
cdef bint both_uint8 = (code1.dtype == np.uint8) \
|
|
163
|
+
& (code2.dtype == np.uint8)
|
|
164
|
+
|
|
165
|
+
cdef int32 length
|
|
166
|
+
cdef int start_offset = 0
|
|
167
|
+
cdef int stop_offset = 1
|
|
168
|
+
cdef int32 score = 0
|
|
169
|
+
cdef int32 total_score = 0
|
|
170
|
+
|
|
171
|
+
# Separate alignment into two parts:
|
|
172
|
+
# the regions upstream and downstream from the seed position
|
|
173
|
+
# Range check to avoid negative indices
|
|
174
|
+
if upstream and seq1_start > 0 and seq2_start > 0:
|
|
175
|
+
# For the upstream region the respective part of the sequence
|
|
176
|
+
# must be reversed
|
|
177
|
+
if both_uint8:
|
|
178
|
+
length = _seed_extend_uint8(
|
|
179
|
+
code1[seq1_start-1::-1], code2[seq2_start-1::-1],
|
|
180
|
+
score_matrix, threshold, &score
|
|
181
|
+
)
|
|
182
|
+
else:
|
|
183
|
+
score, length = _seed_extend_generic(
|
|
184
|
+
code1[seq1_start-1::-1], code2[seq2_start-1::-1],
|
|
185
|
+
score_matrix, threshold
|
|
186
|
+
)
|
|
187
|
+
total_score += score
|
|
188
|
+
start_offset -= length
|
|
189
|
+
if downstream:
|
|
190
|
+
if both_uint8:
|
|
191
|
+
length = _seed_extend_uint8(
|
|
192
|
+
code1[seq1_start+1:], code2[seq2_start+1:],
|
|
193
|
+
score_matrix, threshold, &score
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
score, length = _seed_extend_generic(
|
|
197
|
+
code1[seq1_start+1:], code2[seq2_start+1:],
|
|
198
|
+
score_matrix, threshold
|
|
199
|
+
)
|
|
200
|
+
total_score += score
|
|
201
|
+
stop_offset += length
|
|
202
|
+
total_score += score_matrix[code1[seq1_start], code2[seq2_start]]
|
|
203
|
+
|
|
204
|
+
if score_only:
|
|
205
|
+
return total_score
|
|
206
|
+
else:
|
|
207
|
+
trace = np.stack([
|
|
208
|
+
np.arange(seq1_start + start_offset, seq1_start + stop_offset),
|
|
209
|
+
np.arange(seq2_start + start_offset, seq2_start + stop_offset)
|
|
210
|
+
], axis=-1)
|
|
211
|
+
return Alignment([seq1, seq2], trace, total_score)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@cython.boundscheck(False)
|
|
215
|
+
@cython.wraparound(False)
|
|
216
|
+
def _seed_extend_generic(CodeType1[:] code1 not None,
|
|
217
|
+
CodeType2[:] code2 not None,
|
|
218
|
+
const int32[:,:] matrix not None,
|
|
219
|
+
int32 threshold):
|
|
220
|
+
"""
|
|
221
|
+
Align two sequences without insertion of gaps beginning from
|
|
222
|
+
start of the given sequences.
|
|
223
|
+
If the score drops too low, terminate the alignment.
|
|
224
|
+
Return the similarity score and the number of aligned symbols.
|
|
225
|
+
"""
|
|
226
|
+
cdef int i
|
|
227
|
+
cdef int32 total_score = 0, max_score = 0
|
|
228
|
+
cdef int i_max_score = -1
|
|
229
|
+
|
|
230
|
+
# Iterate over the symbols in both sequences
|
|
231
|
+
# The alignment automatically terminates,
|
|
232
|
+
# if the the end of either sequence is reached
|
|
233
|
+
for i in range(_min(code1.shape[0], code2.shape[0])):
|
|
234
|
+
total_score += matrix[code1[i], code2[i]]
|
|
235
|
+
if total_score >= max_score:
|
|
236
|
+
max_score = total_score
|
|
237
|
+
i_max_score = i
|
|
238
|
+
elif max_score - total_score > threshold:
|
|
239
|
+
# Score drops too low -> terminate alignment
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
# Return the total score and the number of aligned symbols at the
|
|
243
|
+
# point with maximum total score
|
|
244
|
+
return max_score, i_max_score + 1
|
|
245
|
+
|
|
246
|
+
@cython.boundscheck(False)
|
|
247
|
+
@cython.wraparound(False)
|
|
248
|
+
cdef int _seed_extend_uint8(uint8[:] code1, uint8[:] code2,
|
|
249
|
+
const int32[:,:] matrix,
|
|
250
|
+
int32 threshold, int32* score):
|
|
251
|
+
"""
|
|
252
|
+
The same functionality as :func:`_seed_extend_generic()` but as
|
|
253
|
+
C-function tailored for the common ``uint8`` sequence code *dtype*.
|
|
254
|
+
This increases the performance for this common case.
|
|
255
|
+
"""
|
|
256
|
+
cdef int i
|
|
257
|
+
cdef int32 total_score = 0, max_score = 0
|
|
258
|
+
cdef int i_max_score = -1
|
|
259
|
+
|
|
260
|
+
# Iterate over the symbols in both sequences
|
|
261
|
+
# The alignment automatically terminates,
|
|
262
|
+
# if the the end of either sequence is reached
|
|
263
|
+
for i in range(_min(code1.shape[0], code2.shape[0])):
|
|
264
|
+
total_score += matrix[code1[i], code2[i]]
|
|
265
|
+
if total_score >= max_score:
|
|
266
|
+
max_score = total_score
|
|
267
|
+
i_max_score = i
|
|
268
|
+
elif max_score - total_score > threshold:
|
|
269
|
+
# Score drops too low -> terminate alignment
|
|
270
|
+
break
|
|
271
|
+
|
|
272
|
+
# Return the total score and the number of aligned symbols at the
|
|
273
|
+
# point with maximum total score
|
|
274
|
+
score[0] = max_score
|
|
275
|
+
return i_max_score + 1
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
cdef inline int _min(int a, int b):
|
|
279
|
+
return a if a < b else b
|