ruby-minigraph 0.0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,441 @@
1
+ #include <stdint.h>
2
+ #include <string.h>
3
+ #include <stdio.h>
4
+ #include <assert.h>
5
+ #include "mgpriv.h"
6
+ #include "kalloc.h"
7
+ #include "krmq.h"
8
+
9
+ static int64_t mg_chain_bk_end(int32_t max_drop, const mg128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k)
10
+ {
11
+ int64_t i = z[k].y, end_i = -1, max_i = i;
12
+ int32_t max_s = 0;
13
+ if (i < 0 || t[i] != 0) return i;
14
+ do {
15
+ int32_t s;
16
+ t[i] = 2;
17
+ end_i = i = p[i];
18
+ s = i < 0? z[k].x : (int32_t)z[k].x - f[i];
19
+ if (s > max_s) max_s = s, max_i = i;
20
+ else if (max_s - s > max_drop) break;
21
+ } while (i >= 0 && t[i] == 0);
22
+ for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[]
23
+ t[i] = 0;
24
+ return max_i;
25
+ }
26
+
27
+ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop,
28
+ int32_t extra_u, int32_t *n_u_, int32_t *n_v_)
29
+ {
30
+ mg128_t *z;
31
+ uint64_t *u;
32
+ int64_t i, k, n_z, n_v;
33
+ int32_t n_u;
34
+
35
+ *n_u_ = *n_v_ = 0;
36
+ for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
37
+ if (f[i] >= min_sc) ++n_z;
38
+ if (n_z == 0) return 0;
39
+ KMALLOC(km, z, n_z);
40
+ for (i = 0, k = 0; i < n; ++i) // populate z[]
41
+ if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
42
+ radix_sort_128x(z, z + n_z);
43
+
44
+ memset(t, 0, n * 4);
45
+ for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
46
+ if (t[z[k].y] == 0) {
47
+ int64_t n_v0 = n_v, end_i;
48
+ int32_t sc;
49
+ end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
50
+ for (i = z[k].y; i != end_i; i = p[i])
51
+ ++n_v, t[i] = 1;
52
+ sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
53
+ if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
54
+ ++n_u;
55
+ else n_v = n_v0;
56
+ }
57
+ }
58
+ KMALLOC(km, u, n_u + extra_u);
59
+ memset(t, 0, n * 4);
60
+ for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
61
+ if (t[z[k].y] == 0) {
62
+ int64_t n_v0 = n_v, end_i;
63
+ int32_t sc;
64
+ end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
65
+ for (i = z[k].y; i != end_i; i = p[i])
66
+ v[n_v++] = i, t[i] = 1;
67
+ sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
68
+ if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
69
+ u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
70
+ else n_v = n_v0;
71
+ }
72
+ }
73
+ kfree(km, z);
74
+ assert(n_v < INT32_MAX);
75
+ *n_u_ = n_u, *n_v_ = n_v;
76
+ return u;
77
+ }
78
+
79
+ static mg128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32_t *v, mg128_t *a)
80
+ {
81
+ mg128_t *b, *w;
82
+ uint64_t *u2;
83
+ int64_t i, j, k;
84
+
85
+ // write the result to b[]
86
+ KMALLOC(km, b, n_v);
87
+ for (i = 0, k = 0; i < n_u; ++i) {
88
+ int32_t k0 = k, ni = (int32_t)u[i];
89
+ for (j = 0; j < ni; ++j)
90
+ b[k++] = a[v[k0 + (ni - j - 1)]];
91
+ }
92
+ kfree(km, v);
93
+
94
+ // sort u[] and a[] by the target position, such that adjacent chains may be joined
95
+ KMALLOC(km, w, n_u);
96
+ for (i = k = 0; i < n_u; ++i) {
97
+ w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
98
+ k += (int32_t)u[i];
99
+ }
100
+ radix_sort_128x(w, w + n_u);
101
+ KMALLOC(km, u2, n_u);
102
+ for (i = k = 0; i < n_u; ++i) {
103
+ int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
104
+ u2[i] = u[j];
105
+ memcpy(&a[k], &b[w[i].y>>32], n * sizeof(mg128_t));
106
+ k += n;
107
+ }
108
+ memcpy(u, u2, n_u * 8);
109
+ memcpy(b, a, k * sizeof(mg128_t)); // write _a_ to _b_ and deallocate _a_ because _a_ is oversized, sometimes a lot
110
+ kfree(km, a); kfree(km, w); kfree(km, u2);
111
+ return b;
112
+ }
113
+
114
+ static inline int32_t comput_sc(const mg128_t *ai, const mg128_t *aj, int32_t max_dist_x, int32_t max_dist_y, int32_t bw, float chn_pen_gap, float chn_pen_skip, int is_cdna, int n_seg)
115
+ {
116
+ int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc;
117
+ int32_t sidi = (ai->y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT;
118
+ int32_t sidj = (aj->y & MG_SEED_SEG_MASK) >> MG_SEED_SEG_SHIFT;
119
+ if (dq <= 0 || dq > max_dist_x) return INT32_MIN;
120
+ dr = (int32_t)(ai->x - aj->x);
121
+ if (sidi == sidj && (dr == 0 || dq > max_dist_y)) return INT32_MIN;
122
+ dd = dr > dq? dr - dq : dq - dr;
123
+ if (sidi == sidj && dd > bw) return INT32_MIN;
124
+ if (n_seg > 1 && !is_cdna && sidi == sidj && dr > max_dist_y) return INT32_MIN;
125
+ dg = dr < dq? dr : dq;
126
+ q_span = aj->y>>32&0xff;
127
+ sc = q_span < dg? q_span : dg;
128
+ if (dd || dg > q_span) {
129
+ float lin_pen, log_pen;
130
+ lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg;
131
+ log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2
132
+ if (is_cdna || sidi != sidj) {
133
+ if (sidi != sidj && dr == 0) ++sc; // possibly due to overlapping paired ends; give a minor bonus
134
+ else if (dr > dq || sidi != sidj) sc -= (int)(lin_pen < log_pen? lin_pen : log_pen); // deletion or jump between paired ends
135
+ else sc -= (int)(lin_pen + .5f * log_pen);
136
+ } else sc -= (int)(lin_pen + .5f * log_pen);
137
+ }
138
+ return sc;
139
+ }
140
+
141
+ /* Input:
142
+ * a[].x: tid<<33 | rev<<32 | tpos
143
+ * a[].y: flags<<40 | q_span<<32 | q_pos
144
+ * Output:
145
+ * n_u: #chains
146
+ * u[]: score<<32 | #anchors (sum of lower 32 bits of u[] is the returned length of a[])
147
+ * input a[] is deallocated on return
148
+ */
149
+ mg128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
150
+ int is_cdna, int n_seg, int64_t n, mg128_t *a, int *n_u_, uint64_t **_u, void *km)
151
+ { // TODO: make sure this works when n has more than 32 bits
152
+ int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
153
+ int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
154
+ uint64_t *u;
155
+
156
+ if (_u) *_u = 0, *n_u_ = 0;
157
+ if (n == 0 || a == 0) {
158
+ kfree(km, a);
159
+ return 0;
160
+ }
161
+ if (max_dist_x < bw) max_dist_x = bw;
162
+ if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
163
+ if (is_cdna) max_drop = INT32_MAX;
164
+ KMALLOC(km, p, n);
165
+ KMALLOC(km, f, n);
166
+ KMALLOC(km, v, n);
167
+ KCALLOC(km, t, n);
168
+
169
+ // fill the score and backtrack arrays
170
+ for (i = 0, max_ii = -1; i < n; ++i) {
171
+ int64_t max_j = -1, end_j;
172
+ int32_t max_f = a[i].y>>32&0xff, n_skip = 0;
173
+ while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist_x)) ++st;
174
+ if (i - st > max_iter) st = i - max_iter;
175
+ for (j = i - 1; j >= st; --j) {
176
+ int32_t sc;
177
+ sc = comput_sc(&a[i], &a[j], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
178
+ ++n_iter;
179
+ if (sc == INT32_MIN) continue;
180
+ sc += f[j];
181
+ if (sc > max_f) {
182
+ max_f = sc, max_j = j;
183
+ if (n_skip > 0) --n_skip;
184
+ } else if (t[j] == (int32_t)i) {
185
+ if (++n_skip > max_skip)
186
+ break;
187
+ }
188
+ if (p[j] >= 0) t[p[j]] = i;
189
+ }
190
+ end_j = j;
191
+ if (max_ii < 0 || a[i].x - a[max_ii].x > (int64_t)max_dist_x) {
192
+ int32_t max = INT32_MIN;
193
+ max_ii = -1;
194
+ for (j = i - 1; j >= st; --j)
195
+ if (max < f[j]) max = f[j], max_ii = j;
196
+ }
197
+ if (max_ii >= 0 && max_ii < end_j) {
198
+ int32_t tmp;
199
+ tmp = comput_sc(&a[i], &a[max_ii], max_dist_x, max_dist_y, bw, chn_pen_gap, chn_pen_skip, is_cdna, n_seg);
200
+ if (tmp != INT32_MIN && max_f < tmp + f[max_ii])
201
+ max_f = tmp + f[max_ii], max_j = max_ii;
202
+ }
203
+ f[i] = max_f, p[i] = max_j;
204
+ v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak
205
+ if (max_ii < 0 || (a[i].x - a[max_ii].x <= (int64_t)max_dist_x && f[max_ii] < f[i]))
206
+ max_ii = i;
207
+ if (mmax_f < max_f) mmax_f = max_f;
208
+ }
209
+ if (mg_dbg_flag & MG_DBG_LC_PROF) fprintf(stderr, "LP\tn_iter=%ld\tmmax_f=%d\n", (long)n_iter, mmax_f);
210
+
211
+ u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, 0, &n_u, &n_v);
212
+ *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
213
+ kfree(km, p); kfree(km, f); kfree(km, t);
214
+ if (n_u == 0) {
215
+ kfree(km, a); kfree(km, v);
216
+ return 0;
217
+ }
218
+ return compact_a(km, n_u, u, n_v, v, a);
219
+ }
220
+
221
+ typedef struct lc_elem_s {
222
+ int32_t y;
223
+ int64_t i;
224
+ double pri;
225
+ KRMQ_HEAD(struct lc_elem_s) head;
226
+ } lc_elem_t;
227
+
228
+ #define lc_elem_cmp(a, b) ((a)->y < (b)->y? -1 : (a)->y > (b)->y? 1 : ((a)->i > (b)->i) - ((a)->i < (b)->i))
229
+ #define lc_elem_lt2(a, b) ((a)->pri < (b)->pri)
230
+ KRMQ_INIT(lc_elem, lc_elem_t, head, lc_elem_cmp, lc_elem_lt2)
231
+
232
+ KALLOC_POOL_INIT(rmq, lc_elem_t)
233
+
234
+ static inline int32_t comput_sc_simple(const mg128_t *ai, const mg128_t *aj, float chn_pen_gap, float chn_pen_skip, int32_t *exact, int32_t *width)
235
+ {
236
+ int32_t dq = (int32_t)ai->y - (int32_t)aj->y, dr, dd, dg, q_span, sc;
237
+ dr = (int32_t)(ai->x - aj->x);
238
+ *width = dd = dr > dq? dr - dq : dq - dr;
239
+ dg = dr < dq? dr : dq;
240
+ q_span = aj->y>>32&0xff;
241
+ sc = q_span < dg? q_span : dg;
242
+ if (exact) *exact = (dd == 0 && dg <= q_span);
243
+ if (dd || dq > q_span) {
244
+ float lin_pen, log_pen;
245
+ lin_pen = chn_pen_gap * (float)dd + chn_pen_skip * (float)dg;
246
+ log_pen = dd >= 1? mg_log2(dd + 1) : 0.0f; // mg_log2() only works for dd>=2
247
+ sc -= (int)(lin_pen + .5f * log_pen);
248
+ }
249
+ return sc;
250
+ }
251
+
252
+ mg128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
253
+ int64_t n, mg128_t *a, int *n_u_, uint64_t **_u, void *km)
254
+ {
255
+ int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
256
+ int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
257
+ uint64_t *u;
258
+ lc_elem_t *root = 0, *root_inner = 0;
259
+ void *mem_mp = 0;
260
+ kmp_rmq_t *mp;
261
+
262
+ if (_u) *_u = 0, *n_u_ = 0;
263
+ if (n == 0 || a == 0) {
264
+ kfree(km, a);
265
+ return 0;
266
+ }
267
+ if (max_dist < bw) max_dist = bw;
268
+ if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
269
+ KMALLOC(km, p, n);
270
+ KMALLOC(km, f, n);
271
+ KCALLOC(km, t, n);
272
+ KMALLOC(km, v, n);
273
+ mem_mp = km_init2(km, 0x10000);
274
+ mp = kmp_init_rmq(mem_mp);
275
+
276
+ // fill the score and backtrack arrays
277
+ for (i = i0 = 0; i < n; ++i) {
278
+ int64_t max_j = -1;
279
+ int32_t q_span = a[i].y>>32&0xff, max_f = q_span;
280
+ lc_elem_t s, *q, *r, lo, hi;
281
+ // add in-range anchors
282
+ if (i0 < i && a[i0].x != a[i].x) {
283
+ int64_t j;
284
+ for (j = i0; j < i; ++j) {
285
+ q = kmp_alloc_rmq(mp);
286
+ q->y = (int32_t)a[j].y, q->i = j, q->pri = -(f[j] + 0.5 * chn_pen_gap * ((int32_t)a[j].x + (int32_t)a[j].y));
287
+ krmq_insert(lc_elem, &root, q, 0);
288
+ if (max_dist_inner > 0) {
289
+ r = kmp_alloc_rmq(mp);
290
+ *r = *q;
291
+ krmq_insert(lc_elem, &root_inner, r, 0);
292
+ }
293
+ }
294
+ i0 = i;
295
+ }
296
+ // get rid of active chains out of range
297
+ while (st < i && (a[i].x>>32 != a[st].x>>32 || a[i].x > a[st].x + max_dist || krmq_size(head, root) > cap_rmq_size)) {
298
+ s.y = (int32_t)a[st].y, s.i = st;
299
+ if ((q = krmq_find(lc_elem, root, &s, 0)) != 0) {
300
+ q = krmq_erase(lc_elem, &root, q, 0);
301
+ kmp_free_rmq(mp, q);
302
+ }
303
+ ++st;
304
+ }
305
+ if (max_dist_inner > 0) { // similar to the block above, but applied to the inner tree
306
+ while (st_inner < i && (a[i].x>>32 != a[st_inner].x>>32 || a[i].x > a[st_inner].x + max_dist_inner || krmq_size(head, root_inner) > cap_rmq_size)) {
307
+ s.y = (int32_t)a[st_inner].y, s.i = st_inner;
308
+ if ((q = krmq_find(lc_elem, root_inner, &s, 0)) != 0) {
309
+ q = krmq_erase(lc_elem, &root_inner, q, 0);
310
+ kmp_free_rmq(mp, q);
311
+ }
312
+ ++st_inner;
313
+ }
314
+ }
315
+ // RMQ
316
+ lo.i = INT32_MAX, lo.y = (int32_t)a[i].y - max_dist;
317
+ hi.i = 0, hi.y = (int32_t)a[i].y - 1;
318
+ if ((q = krmq_rmq(lc_elem, root, &lo, &hi)) != 0) {
319
+ int32_t sc, exact, width, n_skip = 0;
320
+ int64_t j = q->i;
321
+ assert(q->y >= lo.y && q->y <= hi.y);
322
+ sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, &exact, &width);
323
+ if (width <= bw && sc > max_f) max_f = sc, max_j = j;
324
+ if (!exact && root_inner && (int32_t)a[i].y > 0) {
325
+ lc_elem_t *lo, *hi;
326
+ s.y = (int32_t)a[i].y - 1, s.i = n;
327
+ krmq_interval(lc_elem, root_inner, &s, &lo, &hi);
328
+ if (lo) {
329
+ const lc_elem_t *q;
330
+ int32_t width, n_rmq_iter = 0;
331
+ krmq_itr_t(lc_elem) itr;
332
+ krmq_itr_find(lc_elem, root_inner, lo, &itr);
333
+ while ((q = krmq_at(&itr)) != 0) {
334
+ if (q->y < (int32_t)a[i].y - max_dist_inner) break;
335
+ ++n_rmq_iter;
336
+ j = q->i;
337
+ sc = f[j] + comput_sc_simple(&a[i], &a[j], chn_pen_gap, chn_pen_skip, 0, &width);
338
+ if (width <= bw) {
339
+ if (sc > max_f) {
340
+ max_f = sc, max_j = j;
341
+ if (n_skip > 0) --n_skip;
342
+ } else if (t[j] == (int32_t)i) {
343
+ if (++n_skip > max_chn_skip)
344
+ break;
345
+ }
346
+ if (p[j] >= 0) t[p[j]] = i;
347
+ }
348
+ if (!krmq_itr_prev(lc_elem, &itr)) break;
349
+ }
350
+ n_iter += n_rmq_iter;
351
+ }
352
+ }
353
+ }
354
+ // set max
355
+ assert(max_j < 0 || (a[max_j].x < a[i].x && (int32_t)a[max_j].y < (int32_t)a[i].y));
356
+ f[i] = max_f, p[i] = max_j;
357
+ v[i] = max_j >= 0 && v[max_j] > max_f? v[max_j] : max_f; // v[] keeps the peak score up to i; f[] is the score ending at i, not always the peak
358
+ if (mmax_f < max_f) mmax_f = max_f;
359
+ if (max_rmq_size < krmq_size(head, root)) max_rmq_size = krmq_size(head, root);
360
+ }
361
+ if (mg_dbg_flag & MG_DBG_LC_PROF) fprintf(stderr, "LP\tn_iter=%ld\tmmax_f=%d\trmq_size=%d\tmp_max=%ld\n", (long)n_iter, mmax_f, max_rmq_size, mp->max);
362
+ km_destroy(mem_mp);
363
+
364
+ u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, 0, &n_u, &n_v);
365
+ *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
366
+ kfree(km, p); kfree(km, f); kfree(km, t);
367
+ if (n_u == 0) {
368
+ kfree(km, a); kfree(km, v);
369
+ return 0;
370
+ }
371
+ return compact_a(km, n_u, u, n_v, v, a);
372
+ }
373
+
374
+ mg_lchain_t *mg_lchain_gen(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mg128_t *a)
375
+ {
376
+ mg128_t *z;
377
+ mg_lchain_t *r;
378
+ int i, k;
379
+
380
+ if (n_u == 0) return 0;
381
+ KCALLOC(km, r, n_u);
382
+
383
+ // sort by query position
384
+ KMALLOC(km, z, n_u);
385
+ for (i = k = 0; i < n_u; ++i) {
386
+ int32_t qs = (int32_t)a[k].y + 1 - (a[k].y>>32 & 0xff);
387
+ z[i].x = (uint64_t)qs << 32 | u[i] >> 32;
388
+ z[i].y = (uint64_t)k << 32 | (int32_t)u[i];
389
+ k += (int32_t)u[i];
390
+ }
391
+ radix_sort_128x(z, z + n_u);
392
+
393
+ // populate r[]
394
+ for (i = 0; i < n_u; ++i) {
395
+ mg_lchain_t *ri = &r[i];
396
+ int32_t k = z[i].y >> 32, q_span = a[k].y >> 32 & 0xff;
397
+ ri->off = k;
398
+ ri->cnt = (int32_t)z[i].y;
399
+ ri->score = (uint32_t)z[i].x;
400
+ ri->v = a[k].x >> 32;
401
+ ri->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // for HPC k-mer
402
+ ri->qs = z[i].x >> 32;
403
+ ri->re = (int32_t)a[k + ri->cnt - 1].x + 1;
404
+ ri->qe = (int32_t)a[k + ri->cnt - 1].y + 1;
405
+ }
406
+ kfree(km, z);
407
+ return r;
408
+ }
409
+
410
+ static int32_t get_mini_idx(const mg128_t *a, int32_t n, const int32_t *mini_pos)
411
+ {
412
+ int32_t x, L = 0, R = n - 1;
413
+ x = (int32_t)a->y;
414
+ while (L <= R) { // binary search
415
+ int32_t m = ((uint64_t)L + R) >> 1;
416
+ int32_t y = mini_pos[m];
417
+ if (y < x) L = m + 1;
418
+ else if (y > x) R = m - 1;
419
+ else return m;
420
+ }
421
+ return -1;
422
+ }
423
+
424
+ /* Before:
425
+ * a[].x: tid<<33 | rev<<32 | tpos
426
+ * a[].y: flags<<40 | q_span<<32 | q_pos
427
+ * After:
428
+ * a[].x: mini_pos<<32 | tpos
429
+ * a[].y: same
430
+ */
431
+ void mg_update_anchors(int32_t n_a, mg128_t *a, int32_t n, const int32_t *mini_pos)
432
+ {
433
+ int32_t st, j, k;
434
+ if (n_a <= 0) return;
435
+ st = get_mini_idx(&a[0], n, mini_pos);
436
+ assert(st >= 0);
437
+ for (k = 0, j = st; j < n && k < n_a; ++j)
438
+ if ((int32_t)a[k].y == mini_pos[j])
439
+ a[k].x = (uint64_t)j << 32 | (a[k].x & 0xffffffffU), ++k;
440
+ assert(k == n_a);
441
+ }