bio-bwa 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/.document +5 -0
  2. data/Gemfile +15 -0
  3. data/Gemfile.lock +28 -0
  4. data/LICENSE.txt +35 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +56 -0
  7. data/VERSION +1 -0
  8. data/bio-bwa.gemspec +152 -0
  9. data/doc/Bio.html +93 -0
  10. data/doc/Bio/BWA.html +2884 -0
  11. data/doc/Bio/BWA/Library.html +229 -0
  12. data/doc/_index.html +119 -0
  13. data/doc/class_list.html +36 -0
  14. data/doc/css/common.css +1 -0
  15. data/doc/css/full_list.css +53 -0
  16. data/doc/css/style.css +310 -0
  17. data/doc/file.LICENSE.html +88 -0
  18. data/doc/file.README.html +119 -0
  19. data/doc/file_list.html +41 -0
  20. data/doc/frames.html +13 -0
  21. data/doc/index.html +119 -0
  22. data/doc/js/app.js +203 -0
  23. data/doc/js/full_list.js +149 -0
  24. data/doc/js/jquery.js +154 -0
  25. data/doc/method_list.html +171 -0
  26. data/doc/top-level-namespace.html +88 -0
  27. data/ext/COPYING +674 -0
  28. data/ext/ChangeLog +3864 -0
  29. data/ext/NEWS +555 -0
  30. data/ext/README +29 -0
  31. data/ext/bamlite.c +155 -0
  32. data/ext/bamlite.h +94 -0
  33. data/ext/bntseq.c +303 -0
  34. data/ext/bntseq.h +80 -0
  35. data/ext/bwa.1 +562 -0
  36. data/ext/bwape.c +807 -0
  37. data/ext/bwase.c +686 -0
  38. data/ext/bwase.h +27 -0
  39. data/ext/bwaseqio.c +222 -0
  40. data/ext/bwt.c +250 -0
  41. data/ext/bwt.h +105 -0
  42. data/ext/bwt_gen/Makefile +23 -0
  43. data/ext/bwt_gen/QSufSort.c +496 -0
  44. data/ext/bwt_gen/QSufSort.h +40 -0
  45. data/ext/bwt_gen/bwt_gen.c +1547 -0
  46. data/ext/bwt_gen/bwt_gen.h +105 -0
  47. data/ext/bwt_lite.c +94 -0
  48. data/ext/bwt_lite.h +29 -0
  49. data/ext/bwtaln.c +345 -0
  50. data/ext/bwtaln.h +150 -0
  51. data/ext/bwtgap.c +264 -0
  52. data/ext/bwtgap.h +38 -0
  53. data/ext/bwtindex.c +186 -0
  54. data/ext/bwtio.c +77 -0
  55. data/ext/bwtmisc.c +269 -0
  56. data/ext/bwtsw2.h +51 -0
  57. data/ext/bwtsw2_aux.c +650 -0
  58. data/ext/bwtsw2_chain.c +107 -0
  59. data/ext/bwtsw2_core.c +594 -0
  60. data/ext/bwtsw2_main.c +100 -0
  61. data/ext/cs2nt.c +191 -0
  62. data/ext/is.c +218 -0
  63. data/ext/khash.h +506 -0
  64. data/ext/kseq.h +208 -0
  65. data/ext/ksort.h +269 -0
  66. data/ext/kstring.c +35 -0
  67. data/ext/kstring.h +46 -0
  68. data/ext/kvec.h +90 -0
  69. data/ext/main.c +63 -0
  70. data/ext/main.h +29 -0
  71. data/ext/mkrf_conf.rb +49 -0
  72. data/ext/qualfa2fq.pl +27 -0
  73. data/ext/simple_dp.c +162 -0
  74. data/ext/simpletest.c +23 -0
  75. data/ext/solid2fastq.pl +111 -0
  76. data/ext/stdaln.c +1072 -0
  77. data/ext/stdaln.h +162 -0
  78. data/ext/utils.c +82 -0
  79. data/ext/utils.h +54 -0
  80. data/lib/bio-bwa.rb +7 -0
  81. data/lib/bio/bwa.rb +312 -0
  82. data/lib/bio/bwa/library.rb +42 -0
  83. data/test/data/testdata.fa +602 -0
  84. data/test/data/testdata.long.fa +175 -0
  85. data/test/data/testdata.short.fa +2 -0
  86. data/test/helper.rb +18 -0
  87. data/test/test_bio-bwa_basic.rb +62 -0
  88. data/test/test_bio-bwa_make_index.rb +42 -0
  89. data/test/test_bio-bwa_run_aln.rb +49 -0
  90. data/test/test_bio-bwa_sam_conversion.rb +49 -0
  91. metadata +218 -0
data/ext/bwtaln.h ADDED
@@ -0,0 +1,150 @@
1
+ #ifndef BWTALN_H
2
+ #define BWTALN_H
3
+
4
+ #include <stdint.h>
5
+ #include "bwt.h"
6
+
7
+ #define BWA_TYPE_NO_MATCH 0
8
+ #define BWA_TYPE_UNIQUE 1
9
+ #define BWA_TYPE_REPEAT 2
10
+ #define BWA_TYPE_MATESW 3
11
+
12
+ #define SAM_FPD 1 // paired
13
+ #define SAM_FPP 2 // properly paired
14
+ #define SAM_FSU 4 // self-unmapped
15
+ #define SAM_FMU 8 // mate-unmapped
16
+ #define SAM_FSR 16 // self on the reverse strand
17
+ #define SAM_FMR 32 // mate on the reverse strand
18
+ #define SAM_FR1 64 // this is read one
19
+ #define SAM_FR2 128 // this is read two
20
+ #define SAM_FSC 256 // secondary alignment
21
+
22
+ #define BWA_AVG_ERR 0.02
23
+ #define BWA_MIN_RDLEN 35 // for read trimming
24
+
25
+ #ifndef bns_pac
26
+ #define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
27
+ #endif
28
+
29
+ typedef struct {
30
+ bwtint_t w;
31
+ int bid;
32
+ } bwt_width_t;
33
+
34
+ typedef struct {
35
+ uint32_t n_mm:8, n_gapo:8, n_gape:8, a:1;
36
+ bwtint_t k, l;
37
+ int score;
38
+ } bwt_aln1_t;
39
+
40
+ typedef uint16_t bwa_cigar_t;
41
+ /* rgoya: If changing order of bytes, beware of operations like:
42
+ * s->cigar[0] += s->full_len - s->len;
43
+ */
44
+ #define CIGAR_OP_SHIFT 14
45
+ #define CIGAR_LN_MASK 0x3fff
46
+
47
+ #define __cigar_op(__cigar) ((__cigar)>>CIGAR_OP_SHIFT)
48
+ #define __cigar_len(__cigar) ((__cigar)&CIGAR_LN_MASK)
49
+ #define __cigar_create(__op, __len) ((__op)<<CIGAR_OP_SHIFT | (__len))
50
+
51
+ typedef struct {
52
+ uint32_t pos;
53
+ uint32_t n_cigar:15, gap:8, mm:8, strand:1;
54
+ bwa_cigar_t *cigar;
55
+ } bwt_multi1_t;
56
+
57
+ typedef struct {
58
+ char *name;
59
+ ubyte_t *seq, *rseq, *qual;
60
+ uint32_t len:20, strand:1, type:2, dummy:1, extra_flag:8;
61
+ uint32_t n_mm:8, n_gapo:8, n_gape:8, mapQ:8;
62
+ int score;
63
+ int clip_len;
64
+ // alignments in SA coordinates
65
+ int n_aln;
66
+ bwt_aln1_t *aln;
67
+ // multiple hits
68
+ int n_multi;
69
+ bwt_multi1_t *multi;
70
+ // alignment information
71
+ bwtint_t sa, pos;
72
+ uint64_t c1:28, c2:28, seQ:8; // number of top1 and top2 hits; single-end mapQ
73
+ int n_cigar;
74
+ bwa_cigar_t *cigar;
75
+ // for multi-threading only
76
+ int tid;
77
+ // barcode
78
+ char bc[16]; // null terminated; up to 15 bases
79
+ // NM and MD tags
80
+ uint32_t full_len:20, nm:12;
81
+ char *md;
82
+ } bwa_seq_t;
83
+
84
+ #define BWA_MODE_GAPE 0x01
85
+ #define BWA_MODE_COMPREAD 0x02
86
+ #define BWA_MODE_LOGGAP 0x04
87
+ #define BWA_MODE_NONSTOP 0x10
88
+ #define BWA_MODE_BAM 0x20
89
+ #define BWA_MODE_BAM_SE 0x40
90
+ #define BWA_MODE_BAM_READ1 0x80
91
+ #define BWA_MODE_BAM_READ2 0x100
92
+ #define BWA_MODE_IL13 0x200
93
+
94
+ typedef struct {
95
+ int s_mm, s_gapo, s_gape;
96
+ int mode; // bit 24-31 are the barcode length
97
+ int indel_end_skip, max_del_occ, max_entries;
98
+ float fnr;
99
+ int max_diff, max_gapo, max_gape;
100
+ int max_seed_diff, seed_len;
101
+ int n_threads;
102
+ int max_top2;
103
+ int trim_qual;
104
+ } gap_opt_t;
105
+
106
+ #define BWA_PET_STD 1
107
+ #define BWA_PET_SOLID 2
108
+
109
+ typedef struct {
110
+ int max_isize, force_isize;
111
+ int max_occ;
112
+ int n_multi, N_multi;
113
+ int type, is_sw, is_preload;
114
+ double ap_prior;
115
+ } pe_opt_t;
116
+
117
+ struct __bwa_seqio_t;
118
+ typedef struct __bwa_seqio_t bwa_seqio_t;
119
+
120
+ #ifdef __cplusplus
121
+ extern "C" {
122
+ #endif
123
+
124
+ gap_opt_t *gap_init_opt();
125
+ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt);
126
+
127
+ bwa_seqio_t *bwa_seq_open(const char *fn);
128
+ bwa_seqio_t *bwa_bam_open(const char *fn, int which);
129
+ void bwa_seq_close(bwa_seqio_t *bs);
130
+ void seq_reverse(int len, ubyte_t *seq, int is_comp);
131
+ bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual);
132
+ void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs);
133
+
134
+ int bwa_cal_maxdiff(int l, double err, double thres);
135
+ void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt[2], int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt);
136
+
137
+ void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);
138
+
139
+
140
+ /* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
141
+ __cigar_op and __cigar_len while keeping stdaln stand alone */
142
+ #include "stdaln.h"
143
+
144
+ bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
145
+
146
+ #ifdef __cplusplus
147
+ }
148
+ #endif
149
+
150
+ #endif
data/ext/bwtgap.c ADDED
@@ -0,0 +1,264 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "bwtgap.h"
5
+ #include "bwtaln.h"
6
+
7
+ #define STATE_M 0
8
+ #define STATE_I 1
9
+ #define STATE_D 2
10
+
11
+ #define aln_score(m,o,e,p) ((m)*(p)->s_mm + (o)*(p)->s_gapo + (e)*(p)->s_gape)
12
+
13
+ gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt)
14
+ {
15
+ int i;
16
+ gap_stack_t *stack;
17
+ stack = (gap_stack_t*)calloc(1, sizeof(gap_stack_t));
18
+ stack->n_stacks = aln_score(max_mm+1, max_gapo+1, max_gape+1, opt);
19
+ stack->stacks = (gap_stack1_t*)calloc(stack->n_stacks, sizeof(gap_stack1_t));
20
+ for (i = 0; i != stack->n_stacks; ++i) {
21
+ gap_stack1_t *p = stack->stacks + i;
22
+ p->m_entries = 4;
23
+ p->stack = (gap_entry_t*)calloc(p->m_entries, sizeof(gap_entry_t));
24
+ }
25
+ return stack;
26
+ }
27
+
28
+ void gap_destroy_stack(gap_stack_t *stack)
29
+ {
30
+ int i;
31
+ for (i = 0; i != stack->n_stacks; ++i) free(stack->stacks[i].stack);
32
+ free(stack->stacks);
33
+ free(stack);
34
+ }
35
+
36
+ static void gap_reset_stack(gap_stack_t *stack)
37
+ {
38
+ int i;
39
+ for (i = 0; i != stack->n_stacks; ++i)
40
+ stack->stacks[i].n_entries = 0;
41
+ stack->best = stack->n_stacks;
42
+ stack->n_entries = 0;
43
+ }
44
+
45
+ static inline void gap_push(gap_stack_t *stack, int a, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape,
46
+ int state, int is_diff, const gap_opt_t *opt)
47
+ {
48
+ int score;
49
+ gap_entry_t *p;
50
+ gap_stack1_t *q;
51
+ score = aln_score(n_mm, n_gapo, n_gape, opt);
52
+ q = stack->stacks + score;
53
+ if (q->n_entries == q->m_entries) {
54
+ q->m_entries <<= 1;
55
+ q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);
56
+ }
57
+ p = q->stack + q->n_entries;
58
+ p->info = (u_int32_t)score<<21 | a<<20 | i; p->k = k; p->l = l;
59
+ p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->state = state;
60
+ if (is_diff) p->last_diff_pos = i;
61
+ ++(q->n_entries);
62
+ ++(stack->n_entries);
63
+ if (stack->best > score) stack->best = score;
64
+ }
65
+
66
+ static inline void gap_pop(gap_stack_t *stack, gap_entry_t *e)
67
+ {
68
+ gap_stack1_t *q;
69
+ q = stack->stacks + stack->best;
70
+ *e = q->stack[q->n_entries - 1];
71
+ --(q->n_entries);
72
+ --(stack->n_entries);
73
+ if (q->n_entries == 0 && stack->n_entries) { // reset best
74
+ int i;
75
+ for (i = stack->best + 1; i < stack->n_stacks; ++i)
76
+ if (stack->stacks[i].n_entries != 0) break;
77
+ stack->best = i;
78
+ } else if (stack->n_entries == 0) stack->best = stack->n_stacks;
79
+ }
80
+
81
+ static inline void gap_shadow(int x, int len, bwtint_t max, int last_diff_pos, bwt_width_t *w)
82
+ {
83
+ int i, j;
84
+ for (i = j = 0; i < last_diff_pos; ++i) {
85
+ if (w[i].w > x) w[i].w -= x;
86
+ else if (w[i].w == x) {
87
+ w[i].bid = 1;
88
+ w[i].w = max - (++j);
89
+ } // else should not happen
90
+ }
91
+ }
92
+
93
+ static inline int int_log2(uint32_t v)
94
+ {
95
+ int c = 0;
96
+ if (v & 0xffff0000u) { v >>= 16; c |= 16; }
97
+ if (v & 0xff00) { v >>= 8; c |= 8; }
98
+ if (v & 0xf0) { v >>= 4; c |= 4; }
99
+ if (v & 0xc) { v >>= 2; c |= 2; }
100
+ if (v & 0x2) c |= 1;
101
+ return c;
102
+ }
103
+
104
+ bwt_aln1_t *bwt_match_gap(bwt_t *const bwts[2], int len, const ubyte_t *seq[2], bwt_width_t *w[2],
105
+ bwt_width_t *seed_w[2], const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack)
106
+ {
107
+ int best_score = aln_score(opt->max_diff+1, opt->max_gapo+1, opt->max_gape+1, opt);
108
+ int best_diff = opt->max_diff + 1, max_diff = opt->max_diff;
109
+ int best_cnt = 0;
110
+ int max_entries = 0, j, _j, n_aln, m_aln;
111
+ bwt_aln1_t *aln;
112
+
113
+ m_aln = 4; n_aln = 0;
114
+ aln = (bwt_aln1_t*)calloc(m_aln, sizeof(bwt_aln1_t));
115
+
116
+ // check whether there are too many N
117
+ for (j = _j = 0; j < len; ++j)
118
+ if (seq[0][j] > 3) ++_j;
119
+ if (_j > max_diff) {
120
+ *_n_aln = n_aln;
121
+ return aln;
122
+ }
123
+
124
+ //for (j = 0; j != len; ++j) printf("#0 %d: [%d,%u]\t[%d,%u]\n", j, w[0][j].bid, w[0][j].w, w[1][j].bid, w[1][j].w);
125
+ gap_reset_stack(stack); // reset stack
126
+ gap_push(stack, 0, len, 0, bwts[0]->seq_len, 0, 0, 0, 0, 0, opt);
127
+ gap_push(stack, 1, len, 0, bwts[0]->seq_len, 0, 0, 0, 0, 0, opt);
128
+
129
+ while (stack->n_entries) {
130
+ gap_entry_t e;
131
+ int a, i, m, m_seed = 0, hit_found, allow_diff, allow_M, tmp;
132
+ bwtint_t k, l, cnt_k[4], cnt_l[4], occ;
133
+ const bwt_t *bwt;
134
+ const ubyte_t *str;
135
+ const bwt_width_t *seed_width = 0;
136
+ bwt_width_t *width;
137
+
138
+ if (max_entries < stack->n_entries) max_entries = stack->n_entries;
139
+ if (stack->n_entries > opt->max_entries) break;
140
+ gap_pop(stack, &e); // get the best entry
141
+ k = e.k; l = e.l; // SA interval
142
+ a = e.info>>20&1; i = e.info&0xffff; // strand, length
143
+ if (!(opt->mode & BWA_MODE_NONSTOP) && e.info>>21 > best_score + opt->s_mm) break; // no need to proceed
144
+
145
+ m = max_diff - (e.n_mm + e.n_gapo);
146
+ if (opt->mode & BWA_MODE_GAPE) m -= e.n_gape;
147
+ if (m < 0) continue;
148
+ bwt = bwts[1-a]; str = seq[a]; width = w[a];
149
+ if (seed_w) { // apply seeding
150
+ seed_width = seed_w[a];
151
+ m_seed = opt->max_seed_diff - (e.n_mm + e.n_gapo);
152
+ if (opt->mode & BWA_MODE_GAPE) m_seed -= e.n_gape;
153
+ }
154
+ //printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%u,%u]\t[%u,%u]\t%d\n", stack->n_entries, a, i, "MID"[e.state], e.n_mm, e.n_gapo, e.n_gape, width[i-1].bid, width[i-1].w, k, l, e.last_diff_pos);
155
+ if (i > 0 && m < width[i-1].bid) continue;
156
+
157
+ // check whether a hit is found
158
+ hit_found = 0;
159
+ if (i == 0) hit_found = 1;
160
+ else if (m == 0 && (e.state == STATE_M || (opt->mode&BWA_MODE_GAPE) || e.n_gape == opt->max_gape)) { // no diff allowed
161
+ if (bwt_match_exact_alt(bwt, i, str, &k, &l)) hit_found = 1;
162
+ else continue; // no hit, skip
163
+ }
164
+
165
+ if (hit_found) { // action for found hits
166
+ int score = aln_score(e.n_mm, e.n_gapo, e.n_gape, opt);
167
+ int do_add = 1;
168
+ //printf("#2 hits found: %d:(%u,%u)\n", e.n_mm+e.n_gapo, k, l);
169
+ if (n_aln == 0) {
170
+ best_score = score;
171
+ best_diff = e.n_mm + e.n_gapo;
172
+ if (opt->mode & BWA_MODE_GAPE) best_diff += e.n_gape;
173
+ if (!(opt->mode & BWA_MODE_NONSTOP))
174
+ max_diff = (best_diff + 1 > opt->max_diff)? opt->max_diff : best_diff + 1; // top2 behaviour
175
+ }
176
+ if (score == best_score) best_cnt += l - k + 1;
177
+ else if (best_cnt > opt->max_top2) break; // top2b behaviour
178
+ if (e.n_gapo) { // check whether the hit has been found. this may happen when a gap occurs in a tandem repeat
179
+ for (j = 0; j != n_aln; ++j)
180
+ if (aln[j].k == k && aln[j].l == l) break;
181
+ if (j < n_aln) do_add = 0;
182
+ }
183
+ if (do_add) { // append
184
+ bwt_aln1_t *p;
185
+ gap_shadow(l - k + 1, len, bwt->seq_len, e.last_diff_pos, width);
186
+ if (n_aln == m_aln) {
187
+ m_aln <<= 1;
188
+ aln = (bwt_aln1_t*)realloc(aln, m_aln * sizeof(bwt_aln1_t));
189
+ memset(aln + m_aln/2, 0, m_aln/2*sizeof(bwt_aln1_t));
190
+ }
191
+ p = aln + n_aln;
192
+ p->n_mm = e.n_mm; p->n_gapo = e.n_gapo; p->n_gape = e.n_gape; p->a = a;
193
+ p->k = k; p->l = l;
194
+ p->score = score;
195
+ ++n_aln;
196
+ }
197
+ continue;
198
+ }
199
+
200
+ --i;
201
+ bwt_2occ4(bwt, k - 1, l, cnt_k, cnt_l); // retrieve Occ values
202
+ occ = l - k + 1;
203
+ // test whether diff is allowed
204
+ allow_diff = allow_M = 1;
205
+ if (i > 0) {
206
+ int ii = i - (len - opt->seed_len);
207
+ if (width[i-1].bid > m-1) allow_diff = 0;
208
+ else if (width[i-1].bid == m-1 && width[i].bid == m-1 && width[i-1].w == width[i].w) allow_M = 0;
209
+ if (seed_w && ii > 0) {
210
+ if (seed_width[ii-1].bid > m_seed-1) allow_diff = 0;
211
+ else if (seed_width[ii-1].bid == m_seed-1 && seed_width[ii].bid == m_seed-1
212
+ && seed_width[ii-1].w == seed_width[ii].w) allow_M = 0;
213
+ }
214
+ }
215
+ // indels
216
+ tmp = (opt->mode & BWA_MODE_LOGGAP)? int_log2(e.n_gape + e.n_gapo)/2+1 : e.n_gapo + e.n_gape;
217
+ if (allow_diff && i >= opt->indel_end_skip + tmp && len - i >= opt->indel_end_skip + tmp) {
218
+ if (e.state == STATE_M) { // gap open
219
+ if (e.n_gapo < opt->max_gapo) { // gap open is allowed
220
+ // insertion
221
+ gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_I, 1, opt);
222
+ // deletion
223
+ for (j = 0; j != 4; ++j) {
224
+ k = bwt->L2[j] + cnt_k[j] + 1;
225
+ l = bwt->L2[j] + cnt_l[j];
226
+ if (k <= l) gap_push(stack, a, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_D, 1, opt);
227
+ }
228
+ }
229
+ } else if (e.state == STATE_I) { // extention of an insertion
230
+ if (e.n_gape < opt->max_gape) // gap extention is allowed
231
+ gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_I, 1, opt);
232
+ } else if (e.state == STATE_D) { // extention of a deletion
233
+ if (e.n_gape < opt->max_gape) { // gap extention is allowed
234
+ if (e.n_gape + e.n_gapo < max_diff || occ < opt->max_del_occ) {
235
+ for (j = 0; j != 4; ++j) {
236
+ k = bwt->L2[j] + cnt_k[j] + 1;
237
+ l = bwt->L2[j] + cnt_l[j];
238
+ if (k <= l) gap_push(stack, a, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_D, 1, opt);
239
+ }
240
+ }
241
+ }
242
+ }
243
+ }
244
+ // mismatches
245
+ if (allow_diff && allow_M) { // mismatch is allowed
246
+ for (j = 1; j <= 4; ++j) {
247
+ int c = (str[i] + j) & 3;
248
+ int is_mm = (j != 4 || str[i] > 3);
249
+ k = bwt->L2[c] + cnt_k[c] + 1;
250
+ l = bwt->L2[c] + cnt_l[c];
251
+ if (k <= l) gap_push(stack, a, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, STATE_M, is_mm, opt);
252
+ }
253
+ } else if (str[i] < 4) { // try exact match only
254
+ int c = str[i] & 3;
255
+ k = bwt->L2[c] + cnt_k[c] + 1;
256
+ l = bwt->L2[c] + cnt_l[c];
257
+ if (k <= l) gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo, e.n_gape, STATE_M, 0, opt);
258
+ }
259
+ }
260
+
261
+ *_n_aln = n_aln;
262
+ //fprintf(stderr, "max_entries = %d\n", max_entries);
263
+ return aln;
264
+ }
data/ext/bwtgap.h ADDED
@@ -0,0 +1,38 @@
1
+ #ifndef BWTGAP_H_
2
+ #define BWTGAP_H_
3
+
4
+ #include "bwt.h"
5
+ #include "bwtaln.h"
6
+
7
+ typedef struct { // recursion stack
8
+ u_int32_t info; // score<<21 | a<<20 | i
9
+ u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
10
+ bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
11
+ int last_diff_pos;
12
+ } gap_entry_t;
13
+
14
+ typedef struct {
15
+ int n_entries, m_entries;
16
+ gap_entry_t *stack;
17
+ } gap_stack1_t;
18
+
19
+ typedef struct {
20
+ int n_stacks, best, n_entries;
21
+ gap_stack1_t *stacks;
22
+ } gap_stack_t;
23
+
24
+ #ifdef __cplusplus
25
+ extern "C" {
26
+ #endif
27
+
28
+ gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt);
29
+ void gap_destroy_stack(gap_stack_t *stack);
30
+ bwt_aln1_t *bwt_match_gap(bwt_t *const bwt[2], int len, const ubyte_t *seq[2], bwt_width_t *w[2],
31
+ bwt_width_t *seed_w[2], const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack);
32
+ void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
33
+
34
+ #ifdef __cplusplus
35
+ }
36
+ #endif
37
+
38
+ #endif