bio-bwa 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/.document +5 -0
  2. data/Gemfile +15 -0
  3. data/Gemfile.lock +28 -0
  4. data/LICENSE.txt +35 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +56 -0
  7. data/VERSION +1 -0
  8. data/bio-bwa.gemspec +152 -0
  9. data/doc/Bio.html +93 -0
  10. data/doc/Bio/BWA.html +2884 -0
  11. data/doc/Bio/BWA/Library.html +229 -0
  12. data/doc/_index.html +119 -0
  13. data/doc/class_list.html +36 -0
  14. data/doc/css/common.css +1 -0
  15. data/doc/css/full_list.css +53 -0
  16. data/doc/css/style.css +310 -0
  17. data/doc/file.LICENSE.html +88 -0
  18. data/doc/file.README.html +119 -0
  19. data/doc/file_list.html +41 -0
  20. data/doc/frames.html +13 -0
  21. data/doc/index.html +119 -0
  22. data/doc/js/app.js +203 -0
  23. data/doc/js/full_list.js +149 -0
  24. data/doc/js/jquery.js +154 -0
  25. data/doc/method_list.html +171 -0
  26. data/doc/top-level-namespace.html +88 -0
  27. data/ext/COPYING +674 -0
  28. data/ext/ChangeLog +3864 -0
  29. data/ext/NEWS +555 -0
  30. data/ext/README +29 -0
  31. data/ext/bamlite.c +155 -0
  32. data/ext/bamlite.h +94 -0
  33. data/ext/bntseq.c +303 -0
  34. data/ext/bntseq.h +80 -0
  35. data/ext/bwa.1 +562 -0
  36. data/ext/bwape.c +807 -0
  37. data/ext/bwase.c +686 -0
  38. data/ext/bwase.h +27 -0
  39. data/ext/bwaseqio.c +222 -0
  40. data/ext/bwt.c +250 -0
  41. data/ext/bwt.h +105 -0
  42. data/ext/bwt_gen/Makefile +23 -0
  43. data/ext/bwt_gen/QSufSort.c +496 -0
  44. data/ext/bwt_gen/QSufSort.h +40 -0
  45. data/ext/bwt_gen/bwt_gen.c +1547 -0
  46. data/ext/bwt_gen/bwt_gen.h +105 -0
  47. data/ext/bwt_lite.c +94 -0
  48. data/ext/bwt_lite.h +29 -0
  49. data/ext/bwtaln.c +345 -0
  50. data/ext/bwtaln.h +150 -0
  51. data/ext/bwtgap.c +264 -0
  52. data/ext/bwtgap.h +38 -0
  53. data/ext/bwtindex.c +186 -0
  54. data/ext/bwtio.c +77 -0
  55. data/ext/bwtmisc.c +269 -0
  56. data/ext/bwtsw2.h +51 -0
  57. data/ext/bwtsw2_aux.c +650 -0
  58. data/ext/bwtsw2_chain.c +107 -0
  59. data/ext/bwtsw2_core.c +594 -0
  60. data/ext/bwtsw2_main.c +100 -0
  61. data/ext/cs2nt.c +191 -0
  62. data/ext/is.c +218 -0
  63. data/ext/khash.h +506 -0
  64. data/ext/kseq.h +208 -0
  65. data/ext/ksort.h +269 -0
  66. data/ext/kstring.c +35 -0
  67. data/ext/kstring.h +46 -0
  68. data/ext/kvec.h +90 -0
  69. data/ext/main.c +63 -0
  70. data/ext/main.h +29 -0
  71. data/ext/mkrf_conf.rb +49 -0
  72. data/ext/qualfa2fq.pl +27 -0
  73. data/ext/simple_dp.c +162 -0
  74. data/ext/simpletest.c +23 -0
  75. data/ext/solid2fastq.pl +111 -0
  76. data/ext/stdaln.c +1072 -0
  77. data/ext/stdaln.h +162 -0
  78. data/ext/utils.c +82 -0
  79. data/ext/utils.h +54 -0
  80. data/lib/bio-bwa.rb +7 -0
  81. data/lib/bio/bwa.rb +312 -0
  82. data/lib/bio/bwa/library.rb +42 -0
  83. data/test/data/testdata.fa +602 -0
  84. data/test/data/testdata.long.fa +175 -0
  85. data/test/data/testdata.short.fa +2 -0
  86. data/test/helper.rb +18 -0
  87. data/test/test_bio-bwa_basic.rb +62 -0
  88. data/test/test_bio-bwa_make_index.rb +42 -0
  89. data/test/test_bio-bwa_run_aln.rb +49 -0
  90. data/test/test_bio-bwa_sam_conversion.rb +49 -0
  91. metadata +218 -0
data/ext/bwtaln.h ADDED
@@ -0,0 +1,150 @@
1
+ #ifndef BWTALN_H
2
+ #define BWTALN_H
3
+
4
+ #include <stdint.h>
5
+ #include "bwt.h"
6
+
7
+ #define BWA_TYPE_NO_MATCH 0
8
+ #define BWA_TYPE_UNIQUE 1
9
+ #define BWA_TYPE_REPEAT 2
10
+ #define BWA_TYPE_MATESW 3
11
+
12
+ #define SAM_FPD 1 // paired
13
+ #define SAM_FPP 2 // properly paired
14
+ #define SAM_FSU 4 // self-unmapped
15
+ #define SAM_FMU 8 // mate-unmapped
16
+ #define SAM_FSR 16 // self on the reverse strand
17
+ #define SAM_FMR 32 // mate on the reverse strand
18
+ #define SAM_FR1 64 // this is read one
19
+ #define SAM_FR2 128 // this is read two
20
+ #define SAM_FSC 256 // secondary alignment
21
+
22
+ #define BWA_AVG_ERR 0.02
23
+ #define BWA_MIN_RDLEN 35 // for read trimming
24
+
25
+ #ifndef bns_pac
26
+ #define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
27
+ #endif
28
+
29
+ typedef struct {
30
+ bwtint_t w;
31
+ int bid;
32
+ } bwt_width_t;
33
+
34
+ typedef struct {
35
+ uint32_t n_mm:8, n_gapo:8, n_gape:8, a:1;
36
+ bwtint_t k, l;
37
+ int score;
38
+ } bwt_aln1_t;
39
+
40
+ typedef uint16_t bwa_cigar_t;
41
+ /* rgoya: If changing order of bytes, beware of operations like:
42
+ * s->cigar[0] += s->full_len - s->len;
43
+ */
44
+ #define CIGAR_OP_SHIFT 14
45
+ #define CIGAR_LN_MASK 0x3fff
46
+
47
+ #define __cigar_op(__cigar) ((__cigar)>>CIGAR_OP_SHIFT)
48
+ #define __cigar_len(__cigar) ((__cigar)&CIGAR_LN_MASK)
49
+ #define __cigar_create(__op, __len) ((__op)<<CIGAR_OP_SHIFT | (__len))
50
+
51
+ typedef struct {
52
+ uint32_t pos;
53
+ uint32_t n_cigar:15, gap:8, mm:8, strand:1;
54
+ bwa_cigar_t *cigar;
55
+ } bwt_multi1_t;
56
+
57
+ typedef struct {
58
+ char *name;
59
+ ubyte_t *seq, *rseq, *qual;
60
+ uint32_t len:20, strand:1, type:2, dummy:1, extra_flag:8;
61
+ uint32_t n_mm:8, n_gapo:8, n_gape:8, mapQ:8;
62
+ int score;
63
+ int clip_len;
64
+ // alignments in SA coordinates
65
+ int n_aln;
66
+ bwt_aln1_t *aln;
67
+ // multiple hits
68
+ int n_multi;
69
+ bwt_multi1_t *multi;
70
+ // alignment information
71
+ bwtint_t sa, pos;
72
+ uint64_t c1:28, c2:28, seQ:8; // number of top1 and top2 hits; single-end mapQ
73
+ int n_cigar;
74
+ bwa_cigar_t *cigar;
75
+ // for multi-threading only
76
+ int tid;
77
+ // barcode
78
+ char bc[16]; // null terminated; up to 15 bases
79
+ // NM and MD tags
80
+ uint32_t full_len:20, nm:12;
81
+ char *md;
82
+ } bwa_seq_t;
83
+
84
+ #define BWA_MODE_GAPE 0x01
85
+ #define BWA_MODE_COMPREAD 0x02
86
+ #define BWA_MODE_LOGGAP 0x04
87
+ #define BWA_MODE_NONSTOP 0x10
88
+ #define BWA_MODE_BAM 0x20
89
+ #define BWA_MODE_BAM_SE 0x40
90
+ #define BWA_MODE_BAM_READ1 0x80
91
+ #define BWA_MODE_BAM_READ2 0x100
92
+ #define BWA_MODE_IL13 0x200
93
+
94
+ typedef struct {
95
+ int s_mm, s_gapo, s_gape;
96
+ int mode; // bit 24-31 are the barcode length
97
+ int indel_end_skip, max_del_occ, max_entries;
98
+ float fnr;
99
+ int max_diff, max_gapo, max_gape;
100
+ int max_seed_diff, seed_len;
101
+ int n_threads;
102
+ int max_top2;
103
+ int trim_qual;
104
+ } gap_opt_t;
105
+
106
+ #define BWA_PET_STD 1
107
+ #define BWA_PET_SOLID 2
108
+
109
+ typedef struct {
110
+ int max_isize, force_isize;
111
+ int max_occ;
112
+ int n_multi, N_multi;
113
+ int type, is_sw, is_preload;
114
+ double ap_prior;
115
+ } pe_opt_t;
116
+
117
+ struct __bwa_seqio_t;
118
+ typedef struct __bwa_seqio_t bwa_seqio_t;
119
+
120
+ #ifdef __cplusplus
121
+ extern "C" {
122
+ #endif
123
+
124
+ gap_opt_t *gap_init_opt();
125
+ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt);
126
+
127
+ bwa_seqio_t *bwa_seq_open(const char *fn);
128
+ bwa_seqio_t *bwa_bam_open(const char *fn, int which);
129
+ void bwa_seq_close(bwa_seqio_t *bs);
130
+ void seq_reverse(int len, ubyte_t *seq, int is_comp);
131
+ bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual);
132
+ void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs);
133
+
134
+ int bwa_cal_maxdiff(int l, double err, double thres);
135
+ void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt[2], int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt);
136
+
137
+ void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);
138
+
139
+
140
+ /* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
141
+ __cigar_op and __cigar_len while keeping stdaln stand alone */
142
+ #include "stdaln.h"
143
+
144
+ bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
145
+
146
+ #ifdef __cplusplus
147
+ }
148
+ #endif
149
+
150
+ #endif
data/ext/bwtgap.c ADDED
@@ -0,0 +1,264 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "bwtgap.h"
5
+ #include "bwtaln.h"
6
+
7
+ #define STATE_M 0
8
+ #define STATE_I 1
9
+ #define STATE_D 2
10
+
11
+ #define aln_score(m,o,e,p) ((m)*(p)->s_mm + (o)*(p)->s_gapo + (e)*(p)->s_gape)
12
+
13
+ gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt)
14
+ {
15
+ int i;
16
+ gap_stack_t *stack;
17
+ stack = (gap_stack_t*)calloc(1, sizeof(gap_stack_t));
18
+ stack->n_stacks = aln_score(max_mm+1, max_gapo+1, max_gape+1, opt);
19
+ stack->stacks = (gap_stack1_t*)calloc(stack->n_stacks, sizeof(gap_stack1_t));
20
+ for (i = 0; i != stack->n_stacks; ++i) {
21
+ gap_stack1_t *p = stack->stacks + i;
22
+ p->m_entries = 4;
23
+ p->stack = (gap_entry_t*)calloc(p->m_entries, sizeof(gap_entry_t));
24
+ }
25
+ return stack;
26
+ }
27
+
28
+ void gap_destroy_stack(gap_stack_t *stack)
29
+ {
30
+ int i;
31
+ for (i = 0; i != stack->n_stacks; ++i) free(stack->stacks[i].stack);
32
+ free(stack->stacks);
33
+ free(stack);
34
+ }
35
+
36
+ static void gap_reset_stack(gap_stack_t *stack)
37
+ {
38
+ int i;
39
+ for (i = 0; i != stack->n_stacks; ++i)
40
+ stack->stacks[i].n_entries = 0;
41
+ stack->best = stack->n_stacks;
42
+ stack->n_entries = 0;
43
+ }
44
+
45
+ static inline void gap_push(gap_stack_t *stack, int a, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape,
46
+ int state, int is_diff, const gap_opt_t *opt)
47
+ {
48
+ int score;
49
+ gap_entry_t *p;
50
+ gap_stack1_t *q;
51
+ score = aln_score(n_mm, n_gapo, n_gape, opt);
52
+ q = stack->stacks + score;
53
+ if (q->n_entries == q->m_entries) {
54
+ q->m_entries <<= 1;
55
+ q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);
56
+ }
57
+ p = q->stack + q->n_entries;
58
+ p->info = (u_int32_t)score<<21 | a<<20 | i; p->k = k; p->l = l;
59
+ p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->state = state;
60
+ if (is_diff) p->last_diff_pos = i;
61
+ ++(q->n_entries);
62
+ ++(stack->n_entries);
63
+ if (stack->best > score) stack->best = score;
64
+ }
65
+
66
+ static inline void gap_pop(gap_stack_t *stack, gap_entry_t *e)
67
+ {
68
+ gap_stack1_t *q;
69
+ q = stack->stacks + stack->best;
70
+ *e = q->stack[q->n_entries - 1];
71
+ --(q->n_entries);
72
+ --(stack->n_entries);
73
+ if (q->n_entries == 0 && stack->n_entries) { // reset best
74
+ int i;
75
+ for (i = stack->best + 1; i < stack->n_stacks; ++i)
76
+ if (stack->stacks[i].n_entries != 0) break;
77
+ stack->best = i;
78
+ } else if (stack->n_entries == 0) stack->best = stack->n_stacks;
79
+ }
80
+
81
+ static inline void gap_shadow(int x, int len, bwtint_t max, int last_diff_pos, bwt_width_t *w)
82
+ {
83
+ int i, j;
84
+ for (i = j = 0; i < last_diff_pos; ++i) {
85
+ if (w[i].w > x) w[i].w -= x;
86
+ else if (w[i].w == x) {
87
+ w[i].bid = 1;
88
+ w[i].w = max - (++j);
89
+ } // else should not happen
90
+ }
91
+ }
92
+
93
+ static inline int int_log2(uint32_t v)
94
+ {
95
+ int c = 0;
96
+ if (v & 0xffff0000u) { v >>= 16; c |= 16; }
97
+ if (v & 0xff00) { v >>= 8; c |= 8; }
98
+ if (v & 0xf0) { v >>= 4; c |= 4; }
99
+ if (v & 0xc) { v >>= 2; c |= 2; }
100
+ if (v & 0x2) c |= 1;
101
+ return c;
102
+ }
103
+
104
+ bwt_aln1_t *bwt_match_gap(bwt_t *const bwts[2], int len, const ubyte_t *seq[2], bwt_width_t *w[2],
105
+ bwt_width_t *seed_w[2], const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack)
106
+ {
107
+ int best_score = aln_score(opt->max_diff+1, opt->max_gapo+1, opt->max_gape+1, opt);
108
+ int best_diff = opt->max_diff + 1, max_diff = opt->max_diff;
109
+ int best_cnt = 0;
110
+ int max_entries = 0, j, _j, n_aln, m_aln;
111
+ bwt_aln1_t *aln;
112
+
113
+ m_aln = 4; n_aln = 0;
114
+ aln = (bwt_aln1_t*)calloc(m_aln, sizeof(bwt_aln1_t));
115
+
116
+ // check whether there are too many N
117
+ for (j = _j = 0; j < len; ++j)
118
+ if (seq[0][j] > 3) ++_j;
119
+ if (_j > max_diff) {
120
+ *_n_aln = n_aln;
121
+ return aln;
122
+ }
123
+
124
+ //for (j = 0; j != len; ++j) printf("#0 %d: [%d,%u]\t[%d,%u]\n", j, w[0][j].bid, w[0][j].w, w[1][j].bid, w[1][j].w);
125
+ gap_reset_stack(stack); // reset stack
126
+ gap_push(stack, 0, len, 0, bwts[0]->seq_len, 0, 0, 0, 0, 0, opt);
127
+ gap_push(stack, 1, len, 0, bwts[0]->seq_len, 0, 0, 0, 0, 0, opt);
128
+
129
+ while (stack->n_entries) {
130
+ gap_entry_t e;
131
+ int a, i, m, m_seed = 0, hit_found, allow_diff, allow_M, tmp;
132
+ bwtint_t k, l, cnt_k[4], cnt_l[4], occ;
133
+ const bwt_t *bwt;
134
+ const ubyte_t *str;
135
+ const bwt_width_t *seed_width = 0;
136
+ bwt_width_t *width;
137
+
138
+ if (max_entries < stack->n_entries) max_entries = stack->n_entries;
139
+ if (stack->n_entries > opt->max_entries) break;
140
+ gap_pop(stack, &e); // get the best entry
141
+ k = e.k; l = e.l; // SA interval
142
+ a = e.info>>20&1; i = e.info&0xffff; // strand, length
143
+ if (!(opt->mode & BWA_MODE_NONSTOP) && e.info>>21 > best_score + opt->s_mm) break; // no need to proceed
144
+
145
+ m = max_diff - (e.n_mm + e.n_gapo);
146
+ if (opt->mode & BWA_MODE_GAPE) m -= e.n_gape;
147
+ if (m < 0) continue;
148
+ bwt = bwts[1-a]; str = seq[a]; width = w[a];
149
+ if (seed_w) { // apply seeding
150
+ seed_width = seed_w[a];
151
+ m_seed = opt->max_seed_diff - (e.n_mm + e.n_gapo);
152
+ if (opt->mode & BWA_MODE_GAPE) m_seed -= e.n_gape;
153
+ }
154
+ //printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%u,%u]\t[%u,%u]\t%d\n", stack->n_entries, a, i, "MID"[e.state], e.n_mm, e.n_gapo, e.n_gape, width[i-1].bid, width[i-1].w, k, l, e.last_diff_pos);
155
+ if (i > 0 && m < width[i-1].bid) continue;
156
+
157
+ // check whether a hit is found
158
+ hit_found = 0;
159
+ if (i == 0) hit_found = 1;
160
+ else if (m == 0 && (e.state == STATE_M || (opt->mode&BWA_MODE_GAPE) || e.n_gape == opt->max_gape)) { // no diff allowed
161
+ if (bwt_match_exact_alt(bwt, i, str, &k, &l)) hit_found = 1;
162
+ else continue; // no hit, skip
163
+ }
164
+
165
+ if (hit_found) { // action for found hits
166
+ int score = aln_score(e.n_mm, e.n_gapo, e.n_gape, opt);
167
+ int do_add = 1;
168
+ //printf("#2 hits found: %d:(%u,%u)\n", e.n_mm+e.n_gapo, k, l);
169
+ if (n_aln == 0) {
170
+ best_score = score;
171
+ best_diff = e.n_mm + e.n_gapo;
172
+ if (opt->mode & BWA_MODE_GAPE) best_diff += e.n_gape;
173
+ if (!(opt->mode & BWA_MODE_NONSTOP))
174
+ max_diff = (best_diff + 1 > opt->max_diff)? opt->max_diff : best_diff + 1; // top2 behaviour
175
+ }
176
+ if (score == best_score) best_cnt += l - k + 1;
177
+ else if (best_cnt > opt->max_top2) break; // top2b behaviour
178
+ if (e.n_gapo) { // check whether the hit has been found. this may happen when a gap occurs in a tandem repeat
179
+ for (j = 0; j != n_aln; ++j)
180
+ if (aln[j].k == k && aln[j].l == l) break;
181
+ if (j < n_aln) do_add = 0;
182
+ }
183
+ if (do_add) { // append
184
+ bwt_aln1_t *p;
185
+ gap_shadow(l - k + 1, len, bwt->seq_len, e.last_diff_pos, width);
186
+ if (n_aln == m_aln) {
187
+ m_aln <<= 1;
188
+ aln = (bwt_aln1_t*)realloc(aln, m_aln * sizeof(bwt_aln1_t));
189
+ memset(aln + m_aln/2, 0, m_aln/2*sizeof(bwt_aln1_t));
190
+ }
191
+ p = aln + n_aln;
192
+ p->n_mm = e.n_mm; p->n_gapo = e.n_gapo; p->n_gape = e.n_gape; p->a = a;
193
+ p->k = k; p->l = l;
194
+ p->score = score;
195
+ ++n_aln;
196
+ }
197
+ continue;
198
+ }
199
+
200
+ --i;
201
+ bwt_2occ4(bwt, k - 1, l, cnt_k, cnt_l); // retrieve Occ values
202
+ occ = l - k + 1;
203
+ // test whether diff is allowed
204
+ allow_diff = allow_M = 1;
205
+ if (i > 0) {
206
+ int ii = i - (len - opt->seed_len);
207
+ if (width[i-1].bid > m-1) allow_diff = 0;
208
+ else if (width[i-1].bid == m-1 && width[i].bid == m-1 && width[i-1].w == width[i].w) allow_M = 0;
209
+ if (seed_w && ii > 0) {
210
+ if (seed_width[ii-1].bid > m_seed-1) allow_diff = 0;
211
+ else if (seed_width[ii-1].bid == m_seed-1 && seed_width[ii].bid == m_seed-1
212
+ && seed_width[ii-1].w == seed_width[ii].w) allow_M = 0;
213
+ }
214
+ }
215
+ // indels
216
+ tmp = (opt->mode & BWA_MODE_LOGGAP)? int_log2(e.n_gape + e.n_gapo)/2+1 : e.n_gapo + e.n_gape;
217
+ if (allow_diff && i >= opt->indel_end_skip + tmp && len - i >= opt->indel_end_skip + tmp) {
218
+ if (e.state == STATE_M) { // gap open
219
+ if (e.n_gapo < opt->max_gapo) { // gap open is allowed
220
+ // insertion
221
+ gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_I, 1, opt);
222
+ // deletion
223
+ for (j = 0; j != 4; ++j) {
224
+ k = bwt->L2[j] + cnt_k[j] + 1;
225
+ l = bwt->L2[j] + cnt_l[j];
226
+ if (k <= l) gap_push(stack, a, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_D, 1, opt);
227
+ }
228
+ }
229
+ } else if (e.state == STATE_I) { // extention of an insertion
230
+ if (e.n_gape < opt->max_gape) // gap extention is allowed
231
+ gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_I, 1, opt);
232
+ } else if (e.state == STATE_D) { // extention of a deletion
233
+ if (e.n_gape < opt->max_gape) { // gap extention is allowed
234
+ if (e.n_gape + e.n_gapo < max_diff || occ < opt->max_del_occ) {
235
+ for (j = 0; j != 4; ++j) {
236
+ k = bwt->L2[j] + cnt_k[j] + 1;
237
+ l = bwt->L2[j] + cnt_l[j];
238
+ if (k <= l) gap_push(stack, a, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_D, 1, opt);
239
+ }
240
+ }
241
+ }
242
+ }
243
+ }
244
+ // mismatches
245
+ if (allow_diff && allow_M) { // mismatch is allowed
246
+ for (j = 1; j <= 4; ++j) {
247
+ int c = (str[i] + j) & 3;
248
+ int is_mm = (j != 4 || str[i] > 3);
249
+ k = bwt->L2[c] + cnt_k[c] + 1;
250
+ l = bwt->L2[c] + cnt_l[c];
251
+ if (k <= l) gap_push(stack, a, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, STATE_M, is_mm, opt);
252
+ }
253
+ } else if (str[i] < 4) { // try exact match only
254
+ int c = str[i] & 3;
255
+ k = bwt->L2[c] + cnt_k[c] + 1;
256
+ l = bwt->L2[c] + cnt_l[c];
257
+ if (k <= l) gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo, e.n_gape, STATE_M, 0, opt);
258
+ }
259
+ }
260
+
261
+ *_n_aln = n_aln;
262
+ //fprintf(stderr, "max_entries = %d\n", max_entries);
263
+ return aln;
264
+ }
data/ext/bwtgap.h ADDED
@@ -0,0 +1,38 @@
1
+ #ifndef BWTGAP_H_
2
+ #define BWTGAP_H_
3
+
4
+ #include "bwt.h"
5
+ #include "bwtaln.h"
6
+
7
+ typedef struct { // recursion stack
8
+ u_int32_t info; // score<<21 | a<<20 | i
9
+ u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
10
+ bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
11
+ int last_diff_pos;
12
+ } gap_entry_t;
13
+
14
+ typedef struct {
15
+ int n_entries, m_entries;
16
+ gap_entry_t *stack;
17
+ } gap_stack1_t;
18
+
19
+ typedef struct {
20
+ int n_stacks, best, n_entries;
21
+ gap_stack1_t *stacks;
22
+ } gap_stack_t;
23
+
24
+ #ifdef __cplusplus
25
+ extern "C" {
26
+ #endif
27
+
28
+ gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt);
29
+ void gap_destroy_stack(gap_stack_t *stack);
30
+ bwt_aln1_t *bwt_match_gap(bwt_t *const bwt[2], int len, const ubyte_t *seq[2], bwt_width_t *w[2],
31
+ bwt_width_t *seed_w[2], const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack);
32
+ void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
33
+
34
+ #ifdef __cplusplus
35
+ }
36
+ #endif
37
+
38
+ #endif