bio-bwa 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/.document +5 -0
  2. data/Gemfile +15 -0
  3. data/Gemfile.lock +28 -0
  4. data/LICENSE.txt +35 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +56 -0
  7. data/VERSION +1 -0
  8. data/bio-bwa.gemspec +152 -0
  9. data/doc/Bio.html +93 -0
  10. data/doc/Bio/BWA.html +2884 -0
  11. data/doc/Bio/BWA/Library.html +229 -0
  12. data/doc/_index.html +119 -0
  13. data/doc/class_list.html +36 -0
  14. data/doc/css/common.css +1 -0
  15. data/doc/css/full_list.css +53 -0
  16. data/doc/css/style.css +310 -0
  17. data/doc/file.LICENSE.html +88 -0
  18. data/doc/file.README.html +119 -0
  19. data/doc/file_list.html +41 -0
  20. data/doc/frames.html +13 -0
  21. data/doc/index.html +119 -0
  22. data/doc/js/app.js +203 -0
  23. data/doc/js/full_list.js +149 -0
  24. data/doc/js/jquery.js +154 -0
  25. data/doc/method_list.html +171 -0
  26. data/doc/top-level-namespace.html +88 -0
  27. data/ext/COPYING +674 -0
  28. data/ext/ChangeLog +3864 -0
  29. data/ext/NEWS +555 -0
  30. data/ext/README +29 -0
  31. data/ext/bamlite.c +155 -0
  32. data/ext/bamlite.h +94 -0
  33. data/ext/bntseq.c +303 -0
  34. data/ext/bntseq.h +80 -0
  35. data/ext/bwa.1 +562 -0
  36. data/ext/bwape.c +807 -0
  37. data/ext/bwase.c +686 -0
  38. data/ext/bwase.h +27 -0
  39. data/ext/bwaseqio.c +222 -0
  40. data/ext/bwt.c +250 -0
  41. data/ext/bwt.h +105 -0
  42. data/ext/bwt_gen/Makefile +23 -0
  43. data/ext/bwt_gen/QSufSort.c +496 -0
  44. data/ext/bwt_gen/QSufSort.h +40 -0
  45. data/ext/bwt_gen/bwt_gen.c +1547 -0
  46. data/ext/bwt_gen/bwt_gen.h +105 -0
  47. data/ext/bwt_lite.c +94 -0
  48. data/ext/bwt_lite.h +29 -0
  49. data/ext/bwtaln.c +345 -0
  50. data/ext/bwtaln.h +150 -0
  51. data/ext/bwtgap.c +264 -0
  52. data/ext/bwtgap.h +38 -0
  53. data/ext/bwtindex.c +186 -0
  54. data/ext/bwtio.c +77 -0
  55. data/ext/bwtmisc.c +269 -0
  56. data/ext/bwtsw2.h +51 -0
  57. data/ext/bwtsw2_aux.c +650 -0
  58. data/ext/bwtsw2_chain.c +107 -0
  59. data/ext/bwtsw2_core.c +594 -0
  60. data/ext/bwtsw2_main.c +100 -0
  61. data/ext/cs2nt.c +191 -0
  62. data/ext/is.c +218 -0
  63. data/ext/khash.h +506 -0
  64. data/ext/kseq.h +208 -0
  65. data/ext/ksort.h +269 -0
  66. data/ext/kstring.c +35 -0
  67. data/ext/kstring.h +46 -0
  68. data/ext/kvec.h +90 -0
  69. data/ext/main.c +63 -0
  70. data/ext/main.h +29 -0
  71. data/ext/mkrf_conf.rb +49 -0
  72. data/ext/qualfa2fq.pl +27 -0
  73. data/ext/simple_dp.c +162 -0
  74. data/ext/simpletest.c +23 -0
  75. data/ext/solid2fastq.pl +111 -0
  76. data/ext/stdaln.c +1072 -0
  77. data/ext/stdaln.h +162 -0
  78. data/ext/utils.c +82 -0
  79. data/ext/utils.h +54 -0
  80. data/lib/bio-bwa.rb +7 -0
  81. data/lib/bio/bwa.rb +312 -0
  82. data/lib/bio/bwa/library.rb +42 -0
  83. data/test/data/testdata.fa +602 -0
  84. data/test/data/testdata.long.fa +175 -0
  85. data/test/data/testdata.short.fa +2 -0
  86. data/test/helper.rb +18 -0
  87. data/test/test_bio-bwa_basic.rb +62 -0
  88. data/test/test_bio-bwa_make_index.rb +42 -0
  89. data/test/test_bio-bwa_run_aln.rb +49 -0
  90. data/test/test_bio-bwa_sam_conversion.rb +49 -0
  91. metadata +218 -0
data/ext/simpletest.c ADDED
@@ -0,0 +1,23 @@
1
+ #include "main.h"
2
+
3
+ int main (int argc, char const *argv[])
4
+ {
5
+
6
+ char *se[6];
7
+ se[0] = "bwa_sai2sam_se";
8
+ se[1] = "-f";
9
+ se[2] = "testdata.sam";
10
+ se[3] = "testdata";
11
+ se[4] = "testdata.sai";
12
+ se[5] = "testdata.fa";
13
+ bwa_sai2sam_se(6,se);
14
+
15
+ char *aln[6];
16
+ aln[0] = "bwa_aln";
17
+ aln[1] = "-f";
18
+ aln[2] = "testdata.sai";
19
+ aln[3] = "testdata";
20
+ aln[4] = "testdata.fa";
21
+ bwa_aln(5,aln);
22
+
23
+ }
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/perl -w
2
+
3
+ # Author: lh3
4
+ # Note: Ideally, this script should be written in C. It is a bit slow at present.
5
+ # Also note that this script is different from the one contained in MAQ.
6
+
7
+ use strict;
8
+ use warnings;
9
+ use Getopt::Std;
10
+
11
+ my %opts;
12
+ my $version = '0.1.4';
13
+ my $usage = qq{
14
+ Usage: solid2fastq.pl <in.title> <out.prefix>
15
+
16
+ Note: <in.title> is the string showed in the `# Title:' line of a
17
+ ".csfasta" read file. Then <in.title>F3.csfasta is read sequence
18
+ file and <in.title>F3_QV.qual is the quality file. If
19
+ <in.title>R3.csfasta is present, this script assumes reads are
20
+ paired; otherwise reads will be regarded as single-end.
21
+
22
+ The read name will be <out.prefix>:panel_x_y/[12] with `1' for R3
23
+ tag and `2' for F3. Usually you may want to use short <out.prefix>
24
+ to save diskspace. Long <out.prefix> also causes troubles to maq.
25
+
26
+ };
27
+
28
+ getopts('', \%opts);
29
+ die($usage) if (@ARGV != 2);
30
+ my ($title, $pre) = @ARGV;
31
+ my (@fhr, @fhw);
32
+ my @fn_suff = ('F3.csfasta', 'F3_QV.qual', 'R3.csfasta', 'R3_QV.qual');
33
+ my $is_paired = (-f "$title$fn_suff[2]" || -f "$title$fn_suff[2].gz")? 1 : 0;
34
+ if ($is_paired) { # paired end
35
+ for (0 .. 3) {
36
+ my $fn = "$title$fn_suff[$_]";
37
+ $fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
38
+ open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
39
+ }
40
+ open($fhw[0], "|gzip >$pre.read2.fastq.gz") || die; # this is NOT a typo
41
+ open($fhw[1], "|gzip >$pre.read1.fastq.gz") || die;
42
+ open($fhw[2], "|gzip >$pre.single.fastq.gz") || die;
43
+ my (@df, @dr);
44
+ @df = &read1(1); @dr = &read1(2);
45
+ while (@df && @dr) {
46
+ if ($df[0] eq $dr[0]) { # mate pair
47
+ print {$fhw[0]} $df[1]; print {$fhw[1]} $dr[1];
48
+ @df = &read1(1); @dr = &read1(2);
49
+ } else {
50
+ if ($df[0] le $dr[0]) {
51
+ print {$fhw[2]} $df[1];
52
+ @df = &read1(1);
53
+ } else {
54
+ print {$fhw[2]} $dr[1];
55
+ @dr = &read1(2);
56
+ }
57
+ }
58
+ }
59
+ if (@df) {
60
+ print {$fhw[2]} $df[1];
61
+ while (@df = &read1(1, $fhr[0], $fhr[1])) {
62
+ print {$fhw[2]} $df[1];
63
+ }
64
+ }
65
+ if (@dr) {
66
+ print {$fhw[2]} $dr[1];
67
+ while (@dr = &read1(2, $fhr[2], $fhr[3])) {
68
+ print {$fhw[2]} $dr[1];
69
+ }
70
+ }
71
+ close($fhr[$_]) for (0 .. $#fhr);
72
+ close($fhw[$_]) for (0 .. $#fhw);
73
+ } else { # single end
74
+ for (0 .. 1) {
75
+ my $fn = "$title$fn_suff[$_]";
76
+ $fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
77
+ open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
78
+ }
79
+ open($fhw[2], "|gzip >$pre.single.fastq.gz") || die;
80
+ my @df;
81
+ while (@df = &read1(1, $fhr[0], $fhr[1])) {
82
+ print {$fhw[2]} $df[1];
83
+ }
84
+ close($fhr[$_]) for (0 .. $#fhr);
85
+ close($fhw[2]);
86
+ }
87
+
88
+ sub read1 {
89
+ my $i = shift(@_);
90
+ my $j = ($i-1)<<1;
91
+ my ($key, $seq);
92
+ my ($fhs, $fhq) = ($fhr[$j], $fhr[$j|1]);
93
+ while (<$fhs>) {
94
+ my $t = <$fhq>;
95
+ if (/^>(\d+)_(\d+)_(\d+)_[FR]3/) {
96
+ $key = sprintf("%.4d_%.4d_%.4d", $1, $2, $3); # this line could be improved on 64-bit machines
97
+ die(qq/** unmatched read name: '$_' != '$_'\n/) unless ($_ eq $t);
98
+ my $name = "$pre:$1_$2_$3/$i";
99
+ $_ = substr(<$fhs>, 2);
100
+ tr/0123./ACGTN/;
101
+ my $s = $_;
102
+ $_ = <$fhq>;
103
+ s/-1\b/0/eg;
104
+ s/^(\d+)\s*//;
105
+ s/(\d+)\s*/chr($1+33)/eg;
106
+ $seq = qq/\@$name\n$s+\n$_\n/;
107
+ last;
108
+ }
109
+ }
110
+ return defined($seq)? ($key, $seq) : ();
111
+ }
data/ext/stdaln.c ADDED
@@ -0,0 +1,1072 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2003-2006, 2008, 2009, by Heng Li <lh3lh3@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ #include <stdlib.h>
27
+ #include <stdio.h>
28
+ #include <string.h>
29
+ #include <stdint.h>
30
+ #include "stdaln.h"
31
+
32
+ /* char -> 17 (=16+1) nucleotides */
33
+ unsigned char aln_nt16_table[256] = {
34
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
35
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
36
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,16 /*'-'*/,15,15,
37
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
38
+ 15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
39
+ 15,15, 3, 6, 8,15, 7, 9, 0,12,15,15, 15,15,15,15,
40
+ 15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
41
+ 15,15, 3, 6, 8,15, 7, 9, 0,12,15,15, 15,15,15,15,
42
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
43
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
44
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
45
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
46
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
47
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
48
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
49
+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
50
+ };
51
+ char *aln_nt16_rev_table = "XAGRCMSVTWKDYHBN-";
52
+
53
+ /* char -> 5 (=4+1) nucleotides */
54
+ unsigned char aln_nt4_table[256] = {
55
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
56
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
57
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4,
58
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
59
+ 4, 0, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4,
60
+ 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
61
+ 4, 0, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4,
62
+ 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
63
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
64
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
65
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
66
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
67
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
68
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
69
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
70
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
71
+ };
72
+ char *aln_nt4_rev_table = "AGCTN-";
73
+
74
+ /* char -> 22 (=20+1+1) amino acids */
75
+ unsigned char aln_aa_table[256] = {
76
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
77
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
78
+ 21,21,21,21, 21,21,21,21, 21,21,20,21, 21,22 /*'-'*/,21,21,
79
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
80
+ 21, 0,21, 4, 3, 6,13, 7, 8, 9,21,11, 10,12, 2,21,
81
+ 14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
82
+ 21, 0,21, 4, 3, 6,13, 7, 8, 9,21,11, 10,12, 2,21,
83
+ 14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
84
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
85
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
86
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
87
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
88
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
89
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
90
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
91
+ 21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21
92
+ };
93
+ char *aln_aa_rev_table = "ARNDCQEGHILKMFPSTWYV*X-";
94
+ /* 01234567890123456789012 */
95
+
96
+ /* translation table. They are useless in stdaln.c, but when you realize you need it, you need not write the table again. */
97
+ unsigned char aln_trans_table_eu[66] = {
98
+ 11,11, 2, 2, 1, 1,15,15, 16,16,16,16, 9,12, 9, 9,
99
+ 6, 6, 3, 3, 7, 7, 7, 7, 0, 0, 0, 0, 19,19,19,19,
100
+ 5, 5, 8, 8, 1, 1, 1, 1, 14,14,14,14, 10,10,10,10,
101
+ 20,20,18,18, 20,17, 4, 4, 15,15,15,15, 10,10,13,13, 21, 22
102
+ };
103
+ char *aln_trans_table_eu_char = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFFX";
104
+ /* 01234567890123456789012345678901234567890123456789012345678901234 */
105
+ int aln_sm_blosum62[] = {
106
+ /* A R N D C Q E G H I L K M F P S T W Y V * X */
107
+ 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,
108
+ -1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,
109
+ -2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,
110
+ -2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,
111
+ 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,
112
+ -1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,
113
+ -1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,
114
+ 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,
115
+ -2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,
116
+ -1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,
117
+ -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,
118
+ -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,
119
+ -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,
120
+ -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,
121
+ -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,
122
+ 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,
123
+ 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,
124
+ -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,
125
+ -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,
126
+ 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,
127
+ -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,
128
+ 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1
129
+ };
130
+
131
+ int aln_sm_blosum45[] = {
132
+ /* A R N D C Q E G H I L K M F P S T W Y V * X */
133
+ 5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-2,-2, 0,-5, 0,
134
+ -2, 7, 0,-1,-3, 1, 0,-2, 0,-3,-2, 3,-1,-2,-2,-1,-1,-2,-1,-2,-5,-1,
135
+ -1, 0, 6, 2,-2, 0, 0, 0, 1,-2,-3, 0,-2,-2,-2, 1, 0,-4,-2,-3,-5,-1,
136
+ -2,-1, 2, 7,-3, 0, 2,-1, 0,-4,-3, 0,-3,-4,-1, 0,-1,-4,-2,-3,-5,-1,
137
+ -1,-3,-2,-3,12,-3,-3,-3,-3,-3,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1,-5,-2,
138
+ -1, 1, 0, 0,-3, 6, 2,-2, 1,-2,-2, 1, 0,-4,-1, 0,-1,-2,-1,-3,-5,-1,
139
+ -1, 0, 0, 2,-3, 2, 6,-2, 0,-3,-2, 1,-2,-3, 0, 0,-1,-3,-2,-3,-5,-1,
140
+ 0,-2, 0,-1,-3,-2,-2, 7,-2,-4,-3,-2,-2,-3,-2, 0,-2,-2,-3,-3,-5,-1,
141
+ -2, 0, 1, 0,-3, 1, 0,-2,10,-3,-2,-1, 0,-2,-2,-1,-2,-3, 2,-3,-5,-1,
142
+ -1,-3,-2,-4,-3,-2,-3,-4,-3, 5, 2,-3, 2, 0,-2,-2,-1,-2, 0, 3,-5,-1,
143
+ -1,-2,-3,-3,-2,-2,-2,-3,-2, 2, 5,-3, 2, 1,-3,-3,-1,-2, 0, 1,-5,-1,
144
+ -1, 3, 0, 0,-3, 1, 1,-2,-1,-3,-3, 5,-1,-3,-1,-1,-1,-2,-1,-2,-5,-1,
145
+ -1,-1,-2,-3,-2, 0,-2,-2, 0, 2, 2,-1, 6, 0,-2,-2,-1,-2, 0, 1,-5,-1,
146
+ -2,-2,-2,-4,-2,-4,-3,-3,-2, 0, 1,-3, 0, 8,-3,-2,-1, 1, 3, 0,-5,-1,
147
+ -1,-2,-2,-1,-4,-1, 0,-2,-2,-2,-3,-1,-2,-3, 9,-1,-1,-3,-3,-3,-5,-1,
148
+ 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-3,-1,-2,-2,-1, 4, 2,-4,-2,-1,-5, 0,
149
+ 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-1,-1, 2, 5,-3,-1, 0,-5, 0,
150
+ -2,-2,-4,-4,-5,-2,-3,-2,-3,-2,-2,-2,-2, 1,-3,-4,-3,15, 3,-3,-5,-2,
151
+ -2,-1,-2,-2,-3,-1,-2,-3, 2, 0, 0,-1, 0, 3,-3,-2,-1, 3, 8,-1,-5,-1,
152
+ 0,-2,-3,-3,-1,-3,-3,-3,-3, 3, 1,-2, 1, 0,-3,-1, 0,-3,-1, 5,-5,-1,
153
+ -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 1,-5,
154
+ 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0,-2,-1,-1,-5,-1
155
+ };
156
+
157
+ int aln_sm_nt[] = {
158
+ /* X A G R C M S V T W K D Y H B N */
159
+ -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
160
+ -2, 2,-1, 1,-2, 1,-2, 0,-2, 1,-2, 0,-2, 0,-2, 0,
161
+ -2,-1, 2, 1,-2,-2, 1, 0,-2,-2, 1, 0,-2,-2, 0, 0,
162
+ -2, 1, 1, 1,-2,-1,-1, 0,-2,-1,-1, 0,-2, 0, 0, 0,
163
+ -2,-2,-2,-2, 2, 1, 1, 0,-1,-2,-2,-2, 1, 0, 0, 0,
164
+ -2, 1,-2,-1, 1, 1,-1, 0,-2,-1,-2, 0,-1, 0, 0, 0,
165
+ -2,-2, 1,-1, 1,-1, 1, 0,-2,-2,-1, 0,-1, 0, 0, 0,
166
+ -2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,
167
+ -2,-2,-2,-2,-1,-2,-2,-2, 2, 1, 1, 0, 1, 0, 0, 0,
168
+ -2, 1,-2,-1,-2,-1,-2, 0, 1, 1,-1, 0,-1, 0, 0, 0,
169
+ -2,-2, 1,-1,-2,-2,-1, 0, 1,-1, 1, 0,-1, 0, 0, 0,
170
+ -2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171
+ -2,-2,-2,-2, 1,-1,-1, 0, 1,-1,-1, 0, 1, 0, 0, 0,
172
+ -2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173
+ -2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174
+ -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
175
+ };
176
+
177
+ int aln_sm_read[] = {
178
+ /* X A G R C M S V T W K D Y H B N */
179
+ -17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,
180
+ -17, 2,-17, 1,-17, 1,-17, 0,-17, 1,-17, 0,-17, 0,-17, 0,
181
+ -17,-17, 2, 1,-17,-17, 1, 0,-17,-17, 1, 0,-17,-17, 0, 0,
182
+ -17, 1, 1, 1,-17,-17,-17, 0,-17,-17,-17, 0,-17, 0, 0, 0,
183
+ -17,-17,-17,-17, 2, 1, 1, 0,-17,-17,-17,-17, 1, 0, 0, 0,
184
+ -17, 1,-17,-17, 1, 1,-17, 0,-17,-17,-17, 0,-17, 0, 0, 0,
185
+ -17,-17, 1,-17, 1,-17, 1, 0,-17,-17,-17, 0,-17, 0, 0, 0,
186
+ -17, 0, 0, 0, 0, 0, 0, 0,-17, 0, 0, 0, 0, 0, 0, 0,
187
+ -17,-17,-17,-17,-17,-17,-17,-17, 2, 1, 1, 0, 1, 0, 0, 0,
188
+ -17, 1,-17,-17,-17,-17,-17, 0, 1, 1,-17, 0,-17, 0, 0, 0,
189
+ -17,-17, 1,-17,-17,-17,-17, 0, 1,-17, 1, 0,-17, 0, 0, 0,
190
+ -17, 0, 0, 0,-17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191
+ -17,-17,-17,-17, 1,-17,-17, 0, 1,-17,-17, 0, 1, 0, 0, 0,
192
+ -17, 0,-17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193
+ -17,-17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194
+ -17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
195
+ };
196
+
197
+ int aln_sm_hs[] = {
198
+ /* A G C T N */
199
+ 91, -31,-114,-123, -44,
200
+ -31, 100,-125,-114, -42,
201
+ -123,-125, 100, -31, -42,
202
+ -114,-114, -31, 91, -42,
203
+ -44, -42, -42, -42, -43
204
+ };
205
+
206
+ int aln_sm_maq[] = {
207
+ 11, -19, -19, -19, -13,
208
+ -19, 11, -19, -19, -13,
209
+ -19, -19, 11, -19, -13,
210
+ -19, -19, -19, 11, -13,
211
+ -13, -13, -13, -13, -13
212
+ };
213
+
214
+ int aln_sm_blast[] = {
215
+ 1, -3, -3, -3, -2,
216
+ -3, 1, -3, -3, -2,
217
+ -3, -3, 1, -3, -2,
218
+ -3, -3, -3, 1, -2,
219
+ -2, -2, -2, -2, -2
220
+ };
221
+
222
+ /********************/
223
+ /* START OF align.c */
224
+ /********************/
225
+
226
+ AlnParam aln_param_blast = { 5, 2, 2, aln_sm_blast, 5, 50 };
227
+ AlnParam aln_param_bwa = { 26, 9, 5, aln_sm_maq, 5, 50 };
228
+ AlnParam aln_param_nt2nt = { 8, 2, 2, aln_sm_nt, 16, 75 };
229
+ AlnParam aln_param_rd2rd = { 1, 19, 19, aln_sm_read, 16, 75 };
230
+ AlnParam aln_param_aa2aa = { 10, 2, 2, aln_sm_blosum62, 22, 50 };
231
+
232
+ AlnAln *aln_init_AlnAln()
233
+ {
234
+ AlnAln *aa;
235
+ aa = (AlnAln*)malloc(sizeof(AlnAln));
236
+ aa->path = 0;
237
+ aa->out1 = aa->out2 = aa->outm = 0;
238
+ aa->path_len = 0;
239
+ return aa;
240
+ }
241
+ void aln_free_AlnAln(AlnAln *aa)
242
+ {
243
+ free(aa->path); free(aa->cigar32);
244
+ free(aa->out1); free(aa->out2); free(aa->outm);
245
+ free(aa);
246
+ }
247
+
248
+ /***************************/
249
+ /* START OF common_align.c */
250
+ /***************************/
251
+
252
+ #define LOCAL_OVERFLOW_THRESHOLD 32000
253
+ #define LOCAL_OVERFLOW_REDUCE 16000
254
+ #define NT_LOCAL_SCORE int
255
+ #define NT_LOCAL_SHIFT 16
256
+ #define NT_LOCAL_MASK 0xffff
257
+
258
+ #define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF;
259
+
260
+ #define set_M(MM, cur, p, sc) \
261
+ { \
262
+ if ((p)->M >= (p)->I) { \
263
+ if ((p)->M >= (p)->D) { \
264
+ (MM) = (p)->M + (sc); (cur)->Mt = FROM_M; \
265
+ } else { \
266
+ (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \
267
+ } \
268
+ } else { \
269
+ if ((p)->I > (p)->D) { \
270
+ (MM) = (p)->I + (sc); (cur)->Mt = FROM_I; \
271
+ } else { \
272
+ (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \
273
+ } \
274
+ } \
275
+ }
276
+ #define set_I(II, cur, p) \
277
+ { \
278
+ if ((p)->M - gap_open > (p)->I) { \
279
+ (cur)->It = FROM_M; \
280
+ (II) = (p)->M - gap_open - gap_ext; \
281
+ } else { \
282
+ (cur)->It = FROM_I; \
283
+ (II) = (p)->I - gap_ext; \
284
+ } \
285
+ }
286
+ #define set_end_I(II, cur, p) \
287
+ { \
288
+ if (gap_end >= 0) { \
289
+ if ((p)->M - gap_open > (p)->I) { \
290
+ (cur)->It = FROM_M; \
291
+ (II) = (p)->M - gap_open - gap_end; \
292
+ } else { \
293
+ (cur)->It = FROM_I; \
294
+ (II) = (p)->I - gap_end; \
295
+ } \
296
+ } else set_I(II, cur, p); \
297
+ }
298
+ #define set_D(DD, cur, p) \
299
+ { \
300
+ if ((p)->M - gap_open > (p)->D) { \
301
+ (cur)->Dt = FROM_M; \
302
+ (DD) = (p)->M - gap_open - gap_ext; \
303
+ } else { \
304
+ (cur)->Dt = FROM_D; \
305
+ (DD) = (p)->D - gap_ext; \
306
+ } \
307
+ }
308
+ #define set_end_D(DD, cur, p) \
309
+ { \
310
+ if (gap_end >= 0) { \
311
+ if ((p)->M - gap_open > (p)->D) { \
312
+ (cur)->Dt = FROM_M; \
313
+ (DD) = (p)->M - gap_open - gap_end; \
314
+ } else { \
315
+ (cur)->Dt = FROM_D; \
316
+ (DD) = (p)->D - gap_end; \
317
+ } \
318
+ } else set_D(DD, cur, p); \
319
+ }
320
+
321
+ typedef struct
322
+ {
323
+ unsigned char Mt:3, It:2, Dt:2;
324
+ } dpcell_t;
325
+
326
+ typedef struct
327
+ {
328
+ int M, I, D;
329
+ } dpscore_t;
330
+
331
+ /* build score profile for accelerating alignment, in theory */
332
+ void aln_init_score_array(unsigned char *seq, int len, int row, int *score_matrix, int **s_array)
333
+ {
334
+ int *tmp, *tmp2, i, k;
335
+ for (i = 0; i != row; ++i) {
336
+ tmp = score_matrix + i * row;
337
+ tmp2 = s_array[i];
338
+ for (k = 0; k != len; ++k)
339
+ tmp2[k] = tmp[seq[k]];
340
+ }
341
+ }
342
+ /***************************
343
+ * banded global alignment *
344
+ ***************************/
345
+ int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
346
+ path_t *path, int *path_len)
347
+ {
348
+ register int i, j;
349
+ dpcell_t **dpcell, *q;
350
+ dpscore_t *curr, *last, *s;
351
+ path_t *p;
352
+ int b1, b2, tmp_end;
353
+ int *mat, end, max;
354
+ unsigned char type, ctype;
355
+
356
+ int gap_open, gap_ext, gap_end, b;
357
+ int *score_matrix, N_MATRIX_ROW;
358
+
359
+ /* initialize some align-related parameters. just for compatibility */
360
+ gap_open = ap->gap_open;
361
+ gap_ext = ap->gap_ext;
362
+ gap_end = ap->gap_end;
363
+ b = ap->band_width;
364
+ score_matrix = ap->matrix;
365
+ N_MATRIX_ROW = ap->row;
366
+
367
+ if (len1 == 0 || len2 == 0) {
368
+ *path_len = 0;
369
+ return 0;
370
+ }
371
+ /* calculate b1 and b2 */
372
+ if (len1 > len2) {
373
+ b1 = len1 - len2 + b;
374
+ b2 = b;
375
+ } else {
376
+ b1 = b;
377
+ b2 = len2 - len1 + b;
378
+ }
379
+ if (b1 > len1) b1 = len1;
380
+ if (b2 > len2) b2 = len2;
381
+ --seq1; --seq2;
382
+
383
+ /* allocate memory */
384
+ end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1);
385
+ dpcell = (dpcell_t**)malloc(sizeof(dpcell_t*) * (len2 + 1));
386
+ for (j = 0; j <= len2; ++j)
387
+ dpcell[j] = (dpcell_t*)malloc(sizeof(dpcell_t) * end);
388
+ for (j = b2 + 1; j <= len2; ++j)
389
+ dpcell[j] -= j - b2;
390
+ curr = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1));
391
+ last = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1));
392
+
393
+ /* set first row */
394
+ SET_INF(*curr); curr->M = 0;
395
+ for (i = 1, s = curr + 1; i < b1; ++i, ++s) {
396
+ SET_INF(*s);
397
+ set_end_D(s->D, dpcell[0] + i, s - 1);
398
+ }
399
+ s = curr; curr = last; last = s;
400
+
401
+ /* core dynamic programming, part 1 */
402
+ tmp_end = (b2 < len2)? b2 : len2 - 1;
403
+ for (j = 1; j <= tmp_end; ++j) {
404
+ q = dpcell[j]; s = curr; SET_INF(*s);
405
+ set_end_I(s->I, q, last);
406
+ end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
407
+ mat = score_matrix + seq2[j] * N_MATRIX_ROW;
408
+ ++s; ++q;
409
+ for (i = 1; i != end; ++i, ++s, ++q) {
410
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
411
+ set_I(s->I, q, last + i);
412
+ set_D(s->D, q, s - 1);
413
+ }
414
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]);
415
+ set_D(s->D, q, s - 1);
416
+ if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
417
+ set_end_I(s->I, q, last + i);
418
+ } else s->I = MINOR_INF;
419
+ s = curr; curr = last; last = s;
420
+ }
421
+ /* last row for part 1, use set_end_D() instead of set_D() */
422
+ if (j == len2 && b2 != len2 - 1) {
423
+ q = dpcell[j]; s = curr; SET_INF(*s);
424
+ set_end_I(s->I, q, last);
425
+ end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
426
+ mat = score_matrix + seq2[j] * N_MATRIX_ROW;
427
+ ++s; ++q;
428
+ for (i = 1; i != end; ++i, ++s, ++q) {
429
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
430
+ set_I(s->I, q, last + i);
431
+ set_end_D(s->D, q, s - 1);
432
+ }
433
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]);
434
+ set_end_D(s->D, q, s - 1);
435
+ if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
436
+ set_end_I(s->I, q, last + i);
437
+ } else s->I = MINOR_INF;
438
+ s = curr; curr = last; last = s;
439
+ ++j;
440
+ }
441
+
442
+ /* core dynamic programming, part 2 */
443
+ for (; j <= len2 - b2 + 1; ++j) {
444
+ SET_INF(curr[j - b2]);
445
+ mat = score_matrix + seq2[j] * N_MATRIX_ROW;
446
+ end = j + b1 - 1;
447
+ for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {
448
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]);
449
+ set_I(s->I, q, last + i);
450
+ set_D(s->D, q, s - 1);
451
+ }
452
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]);
453
+ set_D(s->D, q, s - 1);
454
+ s->I = MINOR_INF;
455
+ s = curr; curr = last; last = s;
456
+ }
457
+
458
+ /* core dynamic programming, part 3 */
459
+ for (; j < len2; ++j) {
460
+ SET_INF(curr[j - b2]);
461
+ mat = score_matrix + seq2[j] * N_MATRIX_ROW;
462
+ for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
463
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]);
464
+ set_I(s->I, q, last + i);
465
+ set_D(s->D, q, s - 1);
466
+ }
467
+ set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
468
+ set_end_I(s->I, q, last + i);
469
+ set_D(s->D, q, s - 1);
470
+ s = curr; curr = last; last = s;
471
+ }
472
+ /* last row */
473
+ if (j == len2) {
474
+ SET_INF(curr[j - b2]);
475
+ mat = score_matrix + seq2[j] * N_MATRIX_ROW;
476
+ for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
477
+ set_M(s->M, q, last + i - 1, mat[seq1[i]]);
478
+ set_I(s->I, q, last + i);
479
+ set_end_D(s->D, q, s - 1);
480
+ }
481
+ set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
482
+ set_end_I(s->I, q, last + i);
483
+ set_end_D(s->D, q, s - 1);
484
+ s = curr; curr = last; last = s;
485
+ }
486
+
487
+ /* backtrace */
488
+ i = len1; j = len2;
489
+ q = dpcell[j] + i;
490
+ s = last + len1;
491
+ max = s->M; type = q->Mt; ctype = FROM_M;
492
+ if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }
493
+ if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }
494
+
495
+ p = path;
496
+ p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */
497
+ ++p;
498
+ do {
499
+ switch (ctype) {
500
+ case FROM_M: --i; --j; break;
501
+ case FROM_I: --j; break;
502
+ case FROM_D: --i; break;
503
+ }
504
+ q = dpcell[j] + i;
505
+ ctype = type;
506
+ switch (type) {
507
+ case FROM_M: type = q->Mt; break;
508
+ case FROM_I: type = q->It; break;
509
+ case FROM_D: type = q->Dt; break;
510
+ }
511
+ p->ctype = ctype; p->i = i; p->j = j;
512
+ ++p;
513
+ } while (i || j);
514
+ *path_len = p - path - 1;
515
+
516
+ /* free memory */
517
+ for (j = b2 + 1; j <= len2; ++j)
518
+ dpcell[j] += j - b2;
519
+ for (j = 0; j <= len2; ++j)
520
+ free(dpcell[j]);
521
+ free(dpcell);
522
+ free(curr); free(last);
523
+
524
+ return max;
525
+ }
526
+ /*************************************************
527
+ * local alignment combined with banded strategy *
528
+ *************************************************/
529
+ int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
530
+ path_t *path, int *path_len, int _thres, int *_subo)
531
+ {
532
+ register NT_LOCAL_SCORE *s;
533
+ register int i;
534
+ int q, r, qr, tmp_len, qr_shift;
535
+ int **s_array, *score_array;
536
+ int e, f;
537
+ int is_overflow, of_base;
538
+ NT_LOCAL_SCORE *eh, curr_h, last_h, curr_last_h;
539
+ int j, start_i, start_j, end_i, end_j;
540
+ path_t *p;
541
+ int score_f, score_r, score_g;
542
+ int start, end, max_score;
543
+ int thres, *suba, *ss;
544
+
545
+ int gap_open, gap_ext, b;
546
+ int *score_matrix, N_MATRIX_ROW;
547
+
548
+ /* initialize some align-related parameters. just for compatibility */
549
+ gap_open = ap->gap_open;
550
+ gap_ext = ap->gap_ext;
551
+ b = ap->band_width;
552
+ score_matrix = ap->matrix;
553
+ N_MATRIX_ROW = ap->row;
554
+ thres = _thres > 0? _thres : -_thres;
555
+
556
+ if (len1 == 0 || len2 == 0) return -1;
557
+
558
+ /* allocate memory */
559
+ suba = (int*)malloc(sizeof(int) * (len2 + 1));
560
+ eh = (NT_LOCAL_SCORE*)malloc(sizeof(NT_LOCAL_SCORE) * (len1 + 1));
561
+ s_array = (int**)malloc(sizeof(int*) * N_MATRIX_ROW);
562
+ for (i = 0; i != N_MATRIX_ROW; ++i)
563
+ s_array[i] = (int*)malloc(sizeof(int) * len1);
564
+ /* initialization */
565
+ aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);
566
+ q = gap_open;
567
+ r = gap_ext;
568
+ qr = q + r;
569
+ qr_shift = (qr+1) << NT_LOCAL_SHIFT;
570
+ tmp_len = len1 + 1;
571
+ start_i = start_j = end_i = end_j = 0;
572
+ for (i = 0, max_score = 0; i != N_MATRIX_ROW * N_MATRIX_ROW; ++i)
573
+ if (max_score < score_matrix[i]) max_score = score_matrix[i];
574
+ /* convert the coordinate */
575
+ --seq1; --seq2;
576
+ for (i = 0; i != N_MATRIX_ROW; ++i) --s_array[i];
577
+
578
+ /* forward dynamic programming */
579
+ for (i = 0, s = eh; i != tmp_len; ++i, ++s) *s = 0;
580
+ score_f = 0;
581
+ is_overflow = of_base = 0;
582
+ suba[0] = 0;
583
+ for (j = 1, ss = suba + 1; j <= len2; ++j, ++ss) {
584
+ int subo = 0;
585
+ last_h = f = 0;
586
+ score_array = s_array[seq2[j]];
587
+ if (is_overflow) { /* adjust eh[] array if overflow occurs. */
588
+ /* If LOCAL_OVERFLOW_REDUCE is too small, optimal alignment might be missed.
589
+ * If it is too large, this block will be excuted frequently and therefore
590
+ * slow down the whole program.
591
+ * Acually, smaller LOCAL_OVERFLOW_REDUCE might also help to reduce the
592
+ * number of assignments because it sets some cells to zero when overflow
593
+ * happens. */
594
+ int tmp, tmp2;
595
+ score_f -= LOCAL_OVERFLOW_REDUCE;
596
+ of_base += LOCAL_OVERFLOW_REDUCE;
597
+ is_overflow = 0;
598
+ for (i = 1, s = eh; i <= tmp_len; ++i, ++s) {
599
+ tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
600
+ if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
601
+ else tmp2 -= LOCAL_OVERFLOW_REDUCE;
602
+ if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
603
+ else tmp -= LOCAL_OVERFLOW_REDUCE;
604
+ *s = (tmp << NT_LOCAL_SHIFT) | tmp2;
605
+ }
606
+ }
607
+ for (i = 1, s = eh; i != tmp_len; ++i, ++s) {
608
+ /* prepare for calculate current h */
609
+ curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
610
+ if (curr_h < 0) curr_h = 0;
611
+ if (last_h > 0) { /* initialize f */
612
+ f = (f > last_h - q)? f - r : last_h - qr;
613
+ if (curr_h < f) curr_h = f;
614
+ }
615
+ if (*(s+1) >= qr_shift) { /* initialize e */
616
+ curr_last_h = *(s+1) >> NT_LOCAL_SHIFT;
617
+ e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
618
+ if (curr_h < e) curr_h = e;
619
+ *s = (last_h << NT_LOCAL_SHIFT) | e;
620
+ } else *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */
621
+ last_h = curr_h;
622
+ if (subo < curr_h) subo = curr_h;
623
+ if (score_f < curr_h) {
624
+ score_f = curr_h; end_i = i; end_j = j;
625
+ if (score_f > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
626
+ }
627
+ }
628
+ *s = last_h << NT_LOCAL_SHIFT;
629
+ *ss = subo + of_base;
630
+ }
631
+ score_f += of_base;
632
+
633
+ if (score_f < thres) { /* no matching residue at all, 090218 */
634
+ *path_len = 0;
635
+ goto end_func;
636
+ }
637
+ if (path == 0) goto end_func; /* skip path-filling */
638
+
639
+ /* reverse dynamic programming */
640
+ for (i = end_i, s = eh + end_i; i >= 0; --i, --s) *s = 0;
641
+ if (end_i == 0 || end_j == 0) goto end_func; /* no local match */
642
+ score_r = score_matrix[seq1[end_i] * N_MATRIX_ROW + seq2[end_j]];
643
+ is_overflow = of_base = 0;
644
+ start_i = end_i; start_j = end_j;
645
+ eh[end_i] = ((NT_LOCAL_SCORE)(qr + score_r)) << NT_LOCAL_SHIFT; /* in order to initialize f and e, 040408 */
646
+ start = end_i - 1;
647
+ end = end_i - 3;
648
+ if (end <= 0) end = 0;
649
+
650
+ /* second pass DP can be done in a band, speed will thus be enhanced */
651
+ for (j = end_j - 1; j != 0; --j) {
652
+ last_h = f = 0;
653
+ score_array = s_array[seq2[j]];
654
+ if (is_overflow) { /* adjust eh[] array if overflow occurs. */
655
+ int tmp, tmp2;
656
+ score_r -= LOCAL_OVERFLOW_REDUCE;
657
+ of_base += LOCAL_OVERFLOW_REDUCE;
658
+ is_overflow = 0;
659
+ for (i = start, s = eh + start + 1; i >= end; --i, --s) {
660
+ tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
661
+ if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
662
+ else tmp2 -= LOCAL_OVERFLOW_REDUCE;
663
+ if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
664
+ else tmp -= LOCAL_OVERFLOW_REDUCE;
665
+ *s = (tmp << NT_LOCAL_SHIFT) | tmp2;
666
+ }
667
+ }
668
+ for (i = start, s = eh + start + 1; i != end; --i, --s) {
669
+ /* prepare for calculate current h */
670
+ curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
671
+ if (curr_h < 0) curr_h = 0;
672
+ if (last_h > 0) { /* initialize f */
673
+ f = (f > last_h - q)? f - r : last_h - qr;
674
+ if (curr_h < f) curr_h = f;
675
+ }
676
+ curr_last_h = *(s-1) >> NT_LOCAL_SHIFT;
677
+ e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
678
+ if (e < 0) e = 0;
679
+ if (curr_h < e) curr_h = e;
680
+ *s = (last_h << NT_LOCAL_SHIFT) | e;
681
+ last_h = curr_h;
682
+ if (score_r < curr_h) {
683
+ score_r = curr_h; start_i = i; start_j = j;
684
+ if (score_r + of_base - qr == score_f) {
685
+ j = 1; break;
686
+ }
687
+ if (score_r > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
688
+ }
689
+ }
690
+ *s = last_h << NT_LOCAL_SHIFT;
691
+ /* recalculate start and end, the boundaries of the band */
692
+ if ((eh[start] >> NT_LOCAL_SHIFT) <= qr) --start;
693
+ if (start <= 0) start = 0;
694
+ end = start_i - (start_j - j) - (score_r + of_base + (start_j - j) * max_score) / r - 1;
695
+ if (end <= 0) end = 0;
696
+ }
697
+
698
+ if (_subo) {
699
+ int tmp2 = 0, tmp = (int)(start_j - .33 * (end_j - start_j) + .499);
700
+ for (j = 1; j <= tmp; ++j)
701
+ if (tmp2 < suba[j]) tmp2 = suba[j];
702
+ tmp = (int)(end_j + .33 * (end_j - start_j) + .499);
703
+ for (j = tmp; j <= len2; ++j)
704
+ if (tmp2 < suba[j]) tmp2 = suba[j];
705
+ *_subo = tmp2;
706
+ }
707
+
708
+ if (path_len == 0) {
709
+ path[0].i = start_i; path[0].j = start_j;
710
+ path[1].i = end_i; path[1].j = end_j;
711
+ goto end_func;
712
+ }
713
+
714
+ score_r += of_base;
715
+ score_r -= qr;
716
+
717
+ #ifdef DEBUG
718
+ /* this seems not a bug */
719
+ if (score_f != score_r)
720
+ fprintf(stderr, "[aln_local_core] unknown flaw occurs: score_f(%d) != score_r(%d)\n", score_f, score_r);
721
+ #endif
722
+
723
+ if (_thres > 0) { /* call global alignment to fill the path */
724
+ score_g = 0;
725
+ j = (end_i - start_i > end_j - start_j)? end_i - start_i : end_j - start_j;
726
+ ++j; /* j is the maximum band_width */
727
+ for (i = ap->band_width;; i <<= 1) {
728
+ AlnParam ap_real = *ap;
729
+ ap_real.gap_end = -1;
730
+ ap_real.band_width = i;
731
+ score_g = aln_global_core(seq1 + start_i, end_i - start_i + 1, seq2 + start_j,
732
+ end_j - start_j + 1, &ap_real, path, path_len);
733
+ if (score_g == score_r || score_f == score_g) break;
734
+ if (i > j) break;
735
+ }
736
+ if (score_r > score_g && score_f > score_g) {
737
+ fprintf(stderr, "[aln_local_core] Potential bug: (%d,%d) > %d\n", score_f, score_r, score_g);
738
+ score_f = score_r = -1;
739
+ } else score_f = score_g;
740
+
741
+ /* convert coordinate */
742
+ for (p = path + *path_len - 1; p >= path; --p) {
743
+ p->i += start_i - 1;
744
+ p->j += start_j - 1;
745
+ }
746
+ } else { /* just store the start and end */
747
+ *path_len = 2;
748
+ path[1].i = start_i; path[1].j = start_j;
749
+ path->i = end_i; path->j = end_j;
750
+ }
751
+
752
+ end_func:
753
+ /* free */
754
+ free(eh); free(suba);
755
+ for (i = 0; i != N_MATRIX_ROW; ++i) {
756
+ ++s_array[i];
757
+ free(s_array[i]);
758
+ }
759
+ free(s_array);
760
+ return score_f;
761
+ }
762
+ AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
763
+ int type, int thres, int len1, int len2)
764
+ {
765
+ unsigned char *seq11, *seq22;
766
+ int score;
767
+ int i, j, l;
768
+ path_t *p;
769
+ char *out1, *out2, *outm;
770
+ AlnAln *aa;
771
+
772
+ if (len1 < 0) len1 = strlen(seq1);
773
+ if (len2 < 0) len2 = strlen(seq2);
774
+
775
+ aa = aln_init_AlnAln();
776
+ seq11 = (unsigned char*)malloc(sizeof(unsigned char) * len1);
777
+ seq22 = (unsigned char*)malloc(sizeof(unsigned char) * len2);
778
+ aa->path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 1));
779
+
780
+ if (ap->row < 10) { /* 4-nucleotide alignment */
781
+ for (i = 0; i < len1; ++i)
782
+ seq11[i] = aln_nt4_table[(int)seq1[i]];
783
+ for (j = 0; j < len2; ++j)
784
+ seq22[j] = aln_nt4_table[(int)seq2[j]];
785
+ } else if (ap->row < 20) { /* 16-nucleotide alignment */
786
+ for (i = 0; i < len1; ++i)
787
+ seq11[i] = aln_nt16_table[(int)seq1[i]];
788
+ for (j = 0; j < len2; ++j)
789
+ seq22[j] = aln_nt16_table[(int)seq2[j]];
790
+ } else { /* amino acids */
791
+ for (i = 0; i < len1; ++i)
792
+ seq11[i] = aln_aa_table[(int)seq1[i]];
793
+ for (j = 0; j < len2; ++j)
794
+ seq22[j] = aln_aa_table[(int)seq2[j]];
795
+ }
796
+
797
+ if (type == ALN_TYPE_GLOBAL) score = aln_global_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len);
798
+ else if (type == ALN_TYPE_LOCAL) score = aln_local_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len, thres, &aa->subo);
799
+ else if (type == ALN_TYPE_EXTEND) score = aln_extend_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len, 1, 0);
800
+ else {
801
+ free(seq11); free(seq22); free(aa->path);
802
+ aln_free_AlnAln(aa);
803
+ return 0;
804
+ }
805
+ aa->score = score;
806
+
807
+ if (thres > 0) {
808
+ out1 = aa->out1 = (char*)malloc(sizeof(char) * (aa->path_len + 1));
809
+ out2 = aa->out2 = (char*)malloc(sizeof(char) * (aa->path_len + 1));
810
+ outm = aa->outm = (char*)malloc(sizeof(char) * (aa->path_len + 1));
811
+
812
+ --seq1; --seq2;
813
+ --seq11; --seq22;
814
+
815
+ p = aa->path + aa->path_len - 1;
816
+
817
+ for (l = 0; p >= aa->path; --p, ++l) {
818
+ switch (p->ctype) {
819
+ case FROM_M: out1[l] = seq1[p->i]; out2[l] = seq2[p->j];
820
+ outm[l] = (seq11[p->i] == seq22[p->j] && seq11[p->i] != ap->row)? '|' : ' ';
821
+ break;
822
+ case FROM_I: out1[l] = '-'; out2[l] = seq2[p->j]; outm[l] = ' '; break;
823
+ case FROM_D: out1[l] = seq1[p->i]; out2[l] = '-'; outm[l] = ' '; break;
824
+ }
825
+ }
826
+ out1[l] = out2[l] = outm[l] = '\0';
827
+ ++seq11; ++seq22;
828
+ }
829
+
830
+ free(seq11);
831
+ free(seq22);
832
+
833
+ p = aa->path + aa->path_len - 1;
834
+ aa->start1 = p->i? p->i : 1;
835
+ aa->end1 = aa->path->i;
836
+ aa->start2 = p->j? p->j : 1;
837
+ aa->end2 = aa->path->j;
838
+ aa->cigar32 = aln_path2cigar32(aa->path, aa->path_len, &aa->n_cigar);
839
+
840
+ return aa;
841
+ }
842
+ AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int thres)
843
+ {
844
+ return aln_stdaln_aux(seq1, seq2, ap, type, thres, -1, -1);
845
+ }
846
+
847
+ /* for backward compatibility */
848
+ uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
849
+ {
850
+ uint32_t *cigar32;
851
+ uint16_t *cigar;
852
+ int i;
853
+ cigar32 = aln_path2cigar32(path, path_len, n_cigar);
854
+ cigar = (uint16_t*)cigar32;
855
+ for (i = 0; i < *n_cigar; ++i)
856
+ cigar[i] = (cigar32[i]&0xf)<<14 | (cigar32[i]>>4&0x3fff);
857
+ return cigar;
858
+ }
859
+
860
+ /* newly added functions (2009-07-21) */
861
+
862
+ int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
863
+ path_t *path, int *path_len, int G0, uint8_t *_mem)
864
+ {
865
+ int q, r, qr, tmp_len;
866
+ int32_t **s_array, *score_array;
867
+ int is_overflow, of_base;
868
+ uint32_t *eh;
869
+ int i, j, end_i, end_j;
870
+ int score, start, end;
871
+ int *score_matrix, N_MATRIX_ROW;
872
+ uint8_t *mem, *_p;
873
+
874
+ /* initialize some align-related parameters. just for compatibility */
875
+ q = ap->gap_open;
876
+ r = ap->gap_ext;
877
+ qr = q + r;
878
+ score_matrix = ap->matrix;
879
+ N_MATRIX_ROW = ap->row;
880
+
881
+ if (len1 == 0 || len2 == 0) return -1;
882
+
883
+ /* allocate memory */
884
+ mem = _mem? _mem : calloc((len1 + 2) * (N_MATRIX_ROW + 1), 4);
885
+ _p = mem;
886
+ eh = (uint32_t*)_p, _p += 4 * (len1 + 2);
887
+ s_array = calloc(N_MATRIX_ROW, sizeof(void*));
888
+ for (i = 0; i != N_MATRIX_ROW; ++i)
889
+ s_array[i] = (int32_t*)_p, _p += 4 * len1;
890
+ /* initialization */
891
+ aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);
892
+ tmp_len = len1 + 1;
893
+ start = 1; end = 2;
894
+ end_i = end_j = 0;
895
+ score = 0;
896
+ is_overflow = of_base = 0;
897
+ /* convert the coordinate */
898
+ --seq1; --seq2;
899
+ for (i = 0; i != N_MATRIX_ROW; ++i) --s_array[i];
900
+
901
+ /* dynamic programming */
902
+ memset(eh, 0, 4 * (len1 + 2));
903
+ eh[1] = (uint32_t)G0<<16;
904
+ for (j = 1; j <= len2; ++j) {
905
+ int _start, _end;
906
+ int h1 = 0, f = 0;
907
+ score_array = s_array[seq2[j]];
908
+ /* set start and end */
909
+ _start = j - ap->band_width;
910
+ if (_start < 1) _start = 1;
911
+ if (_start > start) start = _start;
912
+ _end = j + ap->band_width;
913
+ if (_end > len1 + 1) _end = len1 + 1;
914
+ if (_end < end) end = _end;
915
+ if (start == end) break;
916
+ /* adjust eh[] array if overflow occurs. */
917
+ if (is_overflow) {
918
+ int tmp, tmp2;
919
+ score -= LOCAL_OVERFLOW_REDUCE;
920
+ of_base += LOCAL_OVERFLOW_REDUCE;
921
+ is_overflow = 0;
922
+ for (i = start; i <= end; ++i) {
923
+ uint32_t *s = &eh[i];
924
+ tmp = *s >> 16; tmp2 = *s & 0xffff;
925
+ if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
926
+ else tmp2 -= LOCAL_OVERFLOW_REDUCE;
927
+ if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
928
+ else tmp -= LOCAL_OVERFLOW_REDUCE;
929
+ *s = (tmp << 16) | tmp2;
930
+ }
931
+ }
932
+ _start = _end = 0;
933
+ /* the inner loop */
934
+ for (i = start; i < end; ++i) {
935
+ /* At the beginning of each cycle:
936
+ eh[i] -> h[j-1,i-1]<<16 | e[j,i]
937
+ f -> f[j,i]
938
+ h1 -> h[j,i-1]
939
+ */
940
+ uint32_t *s = &eh[i];
941
+ int h = (int)(*s >> 16);
942
+ int e = *s & 0xffff; /* this is e[j,i] */
943
+ *s = (uint32_t)h1 << 16; /* eh[i] now stores h[j,i-1]<<16 */
944
+ h += h? score_array[i] : 0; /* this is left_core() specific */
945
+ /* calculate h[j,i]; don't need to test 0, as {e,f}>=0 */
946
+ h = h > e? h : e;
947
+ h = h > f? h : f; /* h now is h[j,i] */
948
+ h1 = h;
949
+ if (h > 0) {
950
+ if (_start == 0) _start = i;
951
+ _end = i;
952
+ if (score < h) {
953
+ score = h; end_i = i; end_j = j;
954
+ if (score > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
955
+ }
956
+ }
957
+ /* calculate e[j+1,i] and f[j,i+1] */
958
+ h -= qr;
959
+ h = h > 0? h : 0;
960
+ e -= r;
961
+ e = e > h? e : h;
962
+ f -= r;
963
+ f = f > h? f : h;
964
+ *s |= e;
965
+ }
966
+ eh[end] = h1 << 16;
967
+ /* recalculate start and end, the boundaries of the band */
968
+ if (_end <= 0) break; /* no cell in this row has a positive score */
969
+ start = _start;
970
+ end = _end + 3;
971
+ }
972
+
973
+ score += of_base - 1;
974
+ if (score <= 0) {
975
+ if (path_len) *path_len = 0;
976
+ goto end_left_func;
977
+ }
978
+
979
+ if (path == 0) goto end_left_func;
980
+
981
+ if (path_len == 0) {
982
+ path[0].i = end_i; path[0].j = end_j;
983
+ goto end_left_func;
984
+ }
985
+
986
+ { /* call global alignment to fill the path */
987
+ int score_g = 0;
988
+ j = (end_i - 1 > end_j - 1)? end_i - 1 : end_j - 1;
989
+ ++j; /* j is the maximum band_width */
990
+ for (i = ap->band_width;; i <<= 1) {
991
+ AlnParam ap_real = *ap;
992
+ ap_real.gap_end = -1;
993
+ ap_real.band_width = i;
994
+ score_g = aln_global_core(seq1 + 1, end_i, seq2 + 1, end_j, &ap_real, path, path_len);
995
+ if (score == score_g) break;
996
+ if (i > j) break;
997
+ }
998
+ if (score > score_g)
999
+ fprintf(stderr, "[aln_left_core] no suitable bandwidth: %d < %d\n", score_g, score);
1000
+ score = score_g;
1001
+ }
1002
+
1003
+ end_left_func:
1004
+ /* free */
1005
+ free(s_array);
1006
+ if (!_mem) free(mem);
1007
+ return score;
1008
+ }
1009
+
1010
+ uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar)
1011
+ {
1012
+ int i, n;
1013
+ uint32_t *cigar;
1014
+ unsigned char last_type;
1015
+
1016
+ if (path_len == 0 || path == 0) {
1017
+ *n_cigar = 0;
1018
+ return 0;
1019
+ }
1020
+
1021
+ last_type = path->ctype;
1022
+ for (i = n = 1; i < path_len; ++i) {
1023
+ if (last_type != path[i].ctype) ++n;
1024
+ last_type = path[i].ctype;
1025
+ }
1026
+ *n_cigar = n;
1027
+ cigar = (uint32_t*)malloc(*n_cigar * 4);
1028
+
1029
+ cigar[0] = 1u << 4 | path[path_len-1].ctype;
1030
+ last_type = path[path_len-1].ctype;
1031
+ for (i = path_len - 2, n = 0; i >= 0; --i) {
1032
+ if (path[i].ctype == last_type) cigar[n] += 1u << 4;
1033
+ else {
1034
+ cigar[++n] = 1u << 4 | path[i].ctype;
1035
+ last_type = path[i].ctype;
1036
+ }
1037
+ }
1038
+
1039
+ return cigar;
1040
+ }
1041
+
1042
+ #ifdef STDALN_MAIN
1043
+ int main()
1044
+ {
1045
+ AlnAln *aln_local, *aln_global, *aln_left;
1046
+ int i;
1047
+
1048
+ aln_local = aln_stdaln("CGTGCGATGCactgCATACGGCTCGCCTAGATCA", "AAGGGATGCTCTGCATCgCTCGGCTAGCTGT", &aln_param_blast, 0, 1);
1049
+ aln_global = aln_stdaln("CGTGCGATGCactgCATACGGCTCGCCTAGATCA", "AAGGGATGCTCTGCATCGgCTCGGCTAGCTGT", &aln_param_blast, 1, 1);
1050
+ // aln_left = aln_stdaln( "GATGCACTGCATACGGCTCGCCTAGATCA", "GATGCTCTGCATCGgCTCGGCTAGCTGT", &aln_param_blast, 2, 1);
1051
+ aln_left = aln_stdaln("CACCTTCGACTCACGTCTCATTCTCGGAGTCGAGTGGACGGTCCCTCATACACGAACAGGTTC",
1052
+ "CACCTTCGACTTTCACCTCTCATTCTCGGACTCGAGTGGACGGTCCCTCATCCAAGAACAGGGTCTGTGAAA", &aln_param_blast, 2, 1);
1053
+
1054
+ printf(">%d,%d\t%d,%d\n", aln_local->start1, aln_local->end1, aln_local->start2, aln_local->end2);
1055
+ printf("%s\n%s\n%s\n", aln_local->out1, aln_local->outm, aln_local->out2);
1056
+
1057
+ printf(">%d,%d\t%d,%d\t", aln_global->start1, aln_global->end1, aln_global->start2, aln_global->end2);
1058
+ for (i = 0; i != aln_global->n_cigar; ++i)
1059
+ printf("%d%c", aln_global->cigar32[i]>>4, "MID"[aln_global->cigar32[i]&0xf]);
1060
+ printf("\n%s\n%s\n%s\n", aln_global->out1, aln_global->outm, aln_global->out2);
1061
+
1062
+ printf(">%d\t%d,%d\t%d,%d\t", aln_left->score, aln_left->start1, aln_left->end1, aln_left->start2, aln_left->end2);
1063
+ for (i = 0; i != aln_left->n_cigar; ++i)
1064
+ printf("%d%c", aln_left->cigar32[i]>>4, "MID"[aln_left->cigar32[i]&0xf]);
1065
+ printf("\n%s\n%s\n%s\n", aln_left->out1, aln_left->outm, aln_left->out2);
1066
+
1067
+ aln_free_AlnAln(aln_local);
1068
+ aln_free_AlnAln(aln_global);
1069
+ aln_free_AlnAln(aln_left);
1070
+ return 0;
1071
+ }
1072
+ #endif