ruby-minigraph 0.0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,526 @@
1
+ #include <stdlib.h>
2
+ #include <assert.h>
3
+ #include <ctype.h>
4
+ #include "gfa-priv.h"
5
+ #include "kstring.h"
6
+
7
+ #include "khashl.h"
8
+ KHASHL_MAP_INIT(KH_LOCAL, h_s2i_t, h_s2i, kh_cstr_t, uint32_t, kh_hash_str, kh_eq_str)
9
+
10
+ #include "ksort.h"
11
+ #define gfa_arc_key(a) ((a).v_lv)
12
+ KRADIX_SORT_INIT(arc, gfa_arc_t, gfa_arc_key, 8)
13
+
14
+ #define generic_key(x) (x)
15
+ KRADIX_SORT_INIT(gfa64, uint64_t, generic_key, 8)
16
+
17
+ int gfa_verbose = 2;
18
+
19
+ gfa_t *gfa_init(void)
20
+ {
21
+ gfa_t *g;
22
+ g = (gfa_t*)calloc(1, sizeof(gfa_t));
23
+ g->h_names = h_s2i_init();
24
+ g->h_snames = h_s2i_init();
25
+ return g;
26
+ }
27
+
28
+ void gfa_destroy(gfa_t *g)
29
+ {
30
+ uint32_t i, j;
31
+ uint64_t k;
32
+ if (g == 0) return;
33
+ h_s2i_destroy((h_s2i_t*)g->h_names);
34
+ for (i = 0; i < g->n_seg; ++i) {
35
+ gfa_seg_t *s = &g->seg[i];
36
+ free(s->name);
37
+ free(s->seq);
38
+ free(s->aux.aux);
39
+ if (s->utg) {
40
+ for (j = 0; j < s->utg->n; ++j)
41
+ free(s->utg->name[j]);
42
+ free(s->utg->name);
43
+ free(s->utg->a);
44
+ free(s->utg);
45
+ }
46
+ }
47
+ for (i = 0; i < g->n_sseq; ++i) free(g->sseq[i].name);
48
+ h_s2i_destroy((h_s2i_t*)g->h_snames);
49
+ if (g->link_aux)
50
+ for (k = 0; k < g->n_arc; ++k)
51
+ free(g->link_aux[k].aux);
52
+ free(g->idx); free(g->seg); free(g->arc); free(g->link_aux); free(g->sseq);
53
+ free(g);
54
+ }
55
+
56
+ char *gfa_strdup(const char *src)
57
+ {
58
+ int32_t len;
59
+ char *dst;
60
+ len = strlen(src);
61
+ GFA_MALLOC(dst, len + 1);
62
+ memcpy(dst, src, len + 1);
63
+ return dst;
64
+ }
65
+
66
+ char *gfa_strndup(const char *src, size_t n)
67
+ {
68
+ char *dst;
69
+ GFA_MALLOC(dst, n + 1);
70
+ strncpy(dst, src, n);
71
+ dst[n] = 0;
72
+ return dst;
73
+ }
74
+
75
+ int32_t gfa_add_seg(gfa_t *g, const char *name)
76
+ {
77
+ khint_t k;
78
+ int absent;
79
+ h_s2i_t *h = (h_s2i_t*)g->h_names;
80
+ k = h_s2i_put(h, name, &absent);
81
+ if (absent) {
82
+ gfa_seg_t *s;
83
+ if (g->n_seg == g->m_seg) {
84
+ uint32_t old_m = g->m_seg;
85
+ g->m_seg = g->m_seg? g->m_seg<<1 : 16;
86
+ g->seg = (gfa_seg_t*)realloc(g->seg, g->m_seg * sizeof(gfa_seg_t));
87
+ memset(&g->seg[old_m], 0, (g->m_seg - old_m) * sizeof(gfa_seg_t));
88
+ }
89
+ s = &g->seg[g->n_seg++];
90
+ kh_key(h, k) = s->name = gfa_strdup(name);
91
+ s->del = s->len = 0;
92
+ s->snid = s->soff = s->rank = -1;
93
+ kh_val(h, k) = g->n_seg - 1;
94
+ }
95
+ return kh_val(h, k);
96
+ }
97
+
98
+ int32_t gfa_sseq_add(gfa_t *g, const char *sname)
99
+ {
100
+ h_s2i_t *h = (h_s2i_t*)g->h_snames;
101
+ khint_t k;
102
+ int absent;
103
+ k = h_s2i_put(h, sname, &absent);
104
+ if (absent) {
105
+ gfa_sseq_t *ss;
106
+ if (g->n_sseq == g->m_sseq) GFA_EXPAND(g->sseq, g->m_sseq);
107
+ ss = &g->sseq[g->n_sseq++];
108
+ kh_val(h, k) = g->n_sseq - 1;
109
+ kh_key(h, k) = ss->name = gfa_strdup(sname);
110
+ ss->min = -1, ss->max = -1, ss->rank = -1;
111
+ }
112
+ return kh_val(h, k);
113
+ }
114
+
115
+ int32_t gfa_sseq_get(const gfa_t *g, const char *sname)
116
+ {
117
+ h_s2i_t *h = (h_s2i_t*)g->h_snames;
118
+ khint_t k;
119
+ k = h_s2i_get(h, sname);
120
+ return k == kh_end(h)? -1 : kh_val(h, k);
121
+ }
122
+
123
+ void gfa_sseq_update(gfa_t *g, const gfa_seg_t *s)
124
+ {
125
+ gfa_sseq_t *ps;
126
+ if (s->snid < 0 || s->snid >= g->n_sseq) return;
127
+ ps = &g->sseq[s->snid];
128
+ if (ps->min < 0 || s->soff < ps->min) ps->min = s->soff;
129
+ if (ps->max < 0 || s->soff + s->len > ps->max) ps->max = s->soff + s->len;
130
+ if (ps->rank < 0) ps->rank = s->rank;
131
+ else if (ps->rank != s->rank) {
132
+ if (gfa_verbose >= 2)
133
+ fprintf(stderr, "[W] stable sequence '%s' associated with different ranks on segment '%s': %d != %d\n", ps->name, s->name, ps->rank, s->rank);
134
+ }
135
+ }
136
+
137
+ int32_t gfa_name2id(const gfa_t *g, const char *name)
138
+ {
139
+ h_s2i_t *h = (h_s2i_t*)g->h_names;
140
+ khint_t k;
141
+ k = h_s2i_get(h, name);
142
+ return k == kh_end(h)? -1 : kh_val(h, k);
143
+ }
144
+
145
+ gfa_arc_t *gfa_add_arc1(gfa_t *g, uint32_t v, uint32_t w, int32_t ov, int32_t ow, int64_t link_id, int comp)
146
+ {
147
+ gfa_arc_t *a;
148
+ if (g->m_arc == g->n_arc) {
149
+ uint64_t old_m = g->m_arc;
150
+ g->m_arc = g->m_arc? g->m_arc<<1 : 16;
151
+ g->arc = (gfa_arc_t*)realloc(g->arc, g->m_arc * sizeof(gfa_arc_t));
152
+ memset(&g->arc[old_m], 0, (g->m_arc - old_m) * sizeof(gfa_arc_t));
153
+ g->link_aux = (gfa_aux_t*)realloc(g->link_aux, g->m_arc * sizeof(gfa_aux_t));
154
+ memset(&g->link_aux[old_m], 0, (g->m_arc - old_m) * sizeof(gfa_aux_t));
155
+ }
156
+ a = &g->arc[g->n_arc++];
157
+ a->v_lv = (uint64_t)v << 32;
158
+ a->w = w, a->ov = ov, a->ow = ow, a->rank = -1;
159
+ a->link_id = link_id >= 0? link_id : g->n_arc - 1;
160
+ if (link_id >= 0) a->rank = g->arc[link_id].rank; // TODO: this is not always correct!
161
+ a->del = a->strong = 0;
162
+ a->comp = comp;
163
+ return a;
164
+ }
165
+
166
+ int gfa_arc_is_sorted(const gfa_t *g)
167
+ {
168
+ uint64_t e;
169
+ for (e = 1; e < g->n_arc; ++e)
170
+ if (g->arc[e-1].v_lv > g->arc[e].v_lv)
171
+ break;
172
+ return (e == g->n_arc);
173
+ }
174
+
175
+ void gfa_arc_sort(gfa_t *g)
176
+ {
177
+ radix_sort_arc(g->arc, g->arc + g->n_arc);
178
+ }
179
+
180
+ uint64_t *gfa_arc_index_core(size_t max_seq, size_t n, const gfa_arc_t *a)
181
+ {
182
+ size_t i, last;
183
+ uint64_t *idx;
184
+ idx = (uint64_t*)calloc(max_seq * 2, 8);
185
+ for (i = 1, last = 0; i <= n; ++i)
186
+ if (i == n || gfa_arc_head(a[i-1]) != gfa_arc_head(a[i]))
187
+ idx[gfa_arc_head(a[i-1])] = (uint64_t)last<<32 | (i - last), last = i;
188
+ return idx;
189
+ }
190
+
191
+ void gfa_arc_index(gfa_t *g)
192
+ {
193
+ if (g->idx) free(g->idx);
194
+ g->idx = gfa_arc_index_core(g->n_seg, g->n_arc, g->arc);
195
+ }
196
+
197
+ /********************
198
+ * Fix graph issues *
199
+ ********************/
200
+
201
+ uint32_t gfa_fix_no_seg(gfa_t *g)
202
+ {
203
+ uint32_t i, n_err = 0;
204
+ for (i = 0; i < g->n_seg; ++i) {
205
+ gfa_seg_t *s = &g->seg[i];
206
+ if (s->len == 0) {
207
+ ++n_err, s->del = 1;
208
+ if (gfa_verbose >= 2)
209
+ fprintf(stderr, "[W] segment '%s' is used on an L-line but not defined on an S-line\n", s->name);
210
+ }
211
+ }
212
+ return n_err;
213
+ }
214
+
215
+ void gfa_fix_arc_len(gfa_t *g)
216
+ {
217
+ uint64_t k;
218
+ for (k = 0; k < g->n_arc; ++k) {
219
+ gfa_arc_t *a = &g->arc[k];
220
+ uint32_t v = gfa_arc_head(*a), w = gfa_arc_tail(*a);
221
+ const gfa_seg_t *sv = &g->seg[v>>1];
222
+ if (!sv->del && sv->len < a->ov) {
223
+ if (gfa_verbose >= 2)
224
+ fprintf(stderr, "[W] overlap length longer than segment length for '%s': %d > %d\n", sv->name, a->ov, sv->len);
225
+ a->ov = sv->len;
226
+ }
227
+ if (sv->del || g->seg[w>>1].del) {
228
+ a->del = 1;
229
+ } else {
230
+ a->v_lv |= sv->len - a->ov;
231
+ }
232
+ }
233
+ }
234
+
235
+ uint32_t gfa_fix_semi_arc(gfa_t *g)
236
+ {
237
+ uint32_t n_err = 0, v, n_vtx = gfa_n_vtx(g);
238
+ int i, j;
239
+ for (v = 0; v < n_vtx; ++v) {
240
+ int nv = gfa_arc_n(g, v);
241
+ gfa_arc_t *av = gfa_arc_a(g, v);
242
+ for (i = 0; i < nv; ++i) {
243
+ if (!av[i].del && (av[i].ow == INT32_MAX || av[i].ov == INT32_MAX)) { // overlap length is missing
244
+ uint32_t w = av[i].w^1;
245
+ int is_multi = 0, c, jv = -1, nw = gfa_arc_n(g, w);
246
+ gfa_arc_t *aw = gfa_arc_a(g, w);
247
+ for (j = 0, c = 0; j < nw; ++j)
248
+ if (!aw[j].del && aw[j].w == (v^1)) ++c, jv = j;
249
+ if (c == 1) {
250
+ if (av[i].ov != INT32_MAX && aw[jv].ow != INT32_MAX && av[i].ov != aw[jv].ow) is_multi = 1;
251
+ if (av[i].ow != INT32_MAX && aw[jv].ov != INT32_MAX && av[i].ow != aw[jv].ov) is_multi = 1;
252
+ }
253
+ if (c == 1 && !is_multi) {
254
+ if (aw[jv].ov != INT32_MAX) av[i].ow = aw[jv].ov;
255
+ if (aw[jv].ow != INT32_MAX) av[i].ov = aw[jv].ow;
256
+ } else {
257
+ if (gfa_verbose >= 2)
258
+ fprintf(stderr, "[W] can't infer overlap length for %s%c -> %s%c\n",
259
+ g->seg[v>>1].name, "+-"[v&1], g->seg[w>>1].name, "+-"[(w^1)&1]);
260
+ ++n_err;
261
+ av[i].del = 1;
262
+ }
263
+ }
264
+ }
265
+ }
266
+ return n_err;
267
+ }
268
+
269
+ uint32_t gfa_fix_symm_add(gfa_t *g)
270
+ {
271
+ uint32_t n_err = 0, v, n_vtx = gfa_n_vtx(g);
272
+ int i;
273
+ for (v = 0; v < n_vtx; ++v) {
274
+ int nv = gfa_arc_n(g, v);
275
+ gfa_arc_t *av = gfa_arc_a(g, v);
276
+ for (i = 0; i < nv; ++i) {
277
+ int j, nw;
278
+ gfa_arc_t *aw, *avi = &av[i];
279
+ if (avi->del || avi->comp) continue;
280
+ nw = gfa_arc_n(g, avi->w^1);
281
+ aw = gfa_arc_a(g, avi->w^1);
282
+ for (j = 0; j < nw; ++j) {
283
+ gfa_arc_t *awj = &aw[j];
284
+ if (awj->del || awj->comp) continue;
285
+ if (awj->w == (v^1) && awj->ov == avi->ow && awj->ow == avi->ov) { // complement found
286
+ awj->comp = 1;
287
+ awj->link_id = avi->link_id;
288
+ break;
289
+ }
290
+ }
291
+ if (j == nw) {
292
+ gfa_arc_t *arc_old = g->arc, *arc_new;
293
+ arc_new = gfa_add_arc1(g, avi->w^1, v^1, avi->ow, avi->ov, avi->link_id, 1);
294
+ if (arc_old != g->arc) av = gfa_arc_a(g, v); // g->arc may be reallocated
295
+ arc_new->rank = av[i].rank;
296
+ }
297
+ }
298
+ }
299
+ if (n_vtx < gfa_n_vtx(g)) {
300
+ gfa_arc_sort(g);
301
+ gfa_arc_index(g);
302
+ }
303
+ return n_err;
304
+ }
305
+
306
+ void gfa_arc_rm(gfa_t *g)
307
+ {
308
+ uint32_t e, n;
309
+ for (e = n = 0; e < g->n_arc; ++e) {
310
+ uint32_t u = g->arc[e].v_lv>>32, v = g->arc[e].w;
311
+ if (!g->arc[e].del && !g->seg[u>>1].del && !g->seg[v>>1].del)
312
+ g->arc[n++] = g->arc[e];
313
+ else {
314
+ gfa_aux_t *aux = g->arc[e].link_id < g->n_arc? &g->link_aux[g->arc[e].link_id] : 0;
315
+ if (aux) {
316
+ free(aux->aux);
317
+ aux->aux = 0, aux->l_aux = aux->m_aux = 0;
318
+ }
319
+ }
320
+ }
321
+ if (n < g->n_arc) { // arc index is out of sync
322
+ if (g->idx) free(g->idx);
323
+ g->idx = 0;
324
+ }
325
+ g->n_arc = n;
326
+ }
327
+
328
+ void gfa_cleanup(gfa_t *g)
329
+ {
330
+ gfa_arc_rm(g);
331
+ if (!gfa_arc_is_sorted(g)) {
332
+ gfa_arc_sort(g);
333
+ if (g->idx) free(g->idx);
334
+ g->idx = 0;
335
+ }
336
+ if (g->idx == 0) gfa_arc_index(g);
337
+ }
338
+
339
+ int32_t gfa_check_multi(const gfa_t *g)
340
+ {
341
+ uint32_t v, n_vtx = gfa_n_vtx(g);
342
+ int32_t max_nv = -1, n_multi = 0;
343
+ uint64_t *buf; // actually, uint32_t is enough
344
+ for (v = 0; v < n_vtx; ++v) {
345
+ int32_t nv = gfa_arc_n(g, v);
346
+ max_nv = max_nv > nv? max_nv : nv;
347
+ }
348
+ if (max_nv == 1 || max_nv < 0) return 0;
349
+ GFA_MALLOC(buf, max_nv);
350
+ for (v = 0; v < n_vtx; ++v) {
351
+ int32_t i, s, nv = gfa_arc_n(g, v);
352
+ const gfa_arc_t *av = gfa_arc_a(g, v);
353
+ for (i = 0; i < nv; ++i) buf[i] = av[i].w;
354
+ radix_sort_gfa64(buf, buf + nv);
355
+ for (s = 0, i = 1; i <= nv; ++i)
356
+ if (i == nv || buf[i] != buf[s])
357
+ n_multi += i - s - 1, s = i;
358
+ }
359
+ free(buf);
360
+ return n_multi;
361
+ }
362
+
363
+ uint32_t gfa_fix_multi(gfa_t *g)
364
+ {
365
+ uint32_t v, n_vtx = gfa_n_vtx(g), n_rm = 0;
366
+ int32_t max_nv = -1;
367
+ uint64_t *buf; // actually, uint32_t is enough
368
+ for (v = 0; v < n_vtx; ++v) {
369
+ int32_t nv = gfa_arc_n(g, v);
370
+ max_nv = max_nv > nv? max_nv : nv;
371
+ }
372
+ if (max_nv == 1) return 0;
373
+ GFA_MALLOC(buf, max_nv);
374
+ for (v = 0; v < n_vtx; ++v) {
375
+ int32_t i, j, s, nv = gfa_arc_n(g, v), nb;
376
+ gfa_arc_t *av = gfa_arc_a(g, v);
377
+ for (i = j = 0; i < nv; ++i)
378
+ if (!av[i].del) buf[j++] = (uint64_t)av[i].w<<32 | i;
379
+ nb = j;
380
+ if (nb < 1) continue;
381
+ radix_sort_gfa64(buf, buf + nb);
382
+ for (s = 0, i = 1; i <= nb; ++i) {
383
+ if (i == nv || buf[i]>>32 != buf[s]>>32) {
384
+ if (i - s > 1) {
385
+ int32_t k = (int32_t)buf[s], min_rank = av[k].rank; // prefer longest overlap
386
+ for (j = s + 1; j < i; ++j) { // rank has higher priority
387
+ int32_t t = (int32_t)buf[j];
388
+ if (av[t].rank >= 0 && av[t].rank < min_rank)
389
+ min_rank = av[t].rank, k = t;
390
+ }
391
+ if (av[k].w == (v^1)) { // a weird loop
392
+ if (gfa_verbose >= 2)
393
+ fprintf(stderr, "[W::%s] can't fix multiple edges due to '>v -- <v' involving segment %s\n", __func__, g->seg[v>>1].name);
394
+ } else {
395
+ int32_t nw = gfa_arc_n(g, av[k].w^1), n_wdel;
396
+ gfa_arc_t *aw = gfa_arc_a(g, av[k].w^1);
397
+ uint64_t link_id = av[k].link_id;
398
+ n_rm += i - s - 1;
399
+ for (j = s + 1; j < i; ++j)
400
+ av[(int32_t)buf[j]].del = 1;
401
+ for (j = 0, n_wdel = 0; j < nw; ++j)
402
+ if (aw[j].w == (v^1) && aw[j].link_id != link_id)
403
+ aw[j].del = 1, ++n_wdel;
404
+ assert(n_wdel == i - s - 1);
405
+ }
406
+ }
407
+ s = i;
408
+ }
409
+ }
410
+ }
411
+ free(buf);
412
+ if (n_rm > 0) {
413
+ if (gfa_verbose >= 2)
414
+ fprintf(stderr, "[W::%s] removed %d multiple link(s)\n", __func__, n_rm);
415
+ gfa_arc_rm(g);
416
+ gfa_arc_index(g);
417
+ }
418
+ return n_rm;
419
+ }
420
+
421
+ void gfa_finalize(gfa_t *g)
422
+ {
423
+ gfa_fix_no_seg(g);
424
+ gfa_arc_sort(g);
425
+ gfa_arc_index(g);
426
+ gfa_fix_semi_arc(g);
427
+ gfa_fix_symm_add(g);
428
+ gfa_fix_arc_len(g);
429
+ gfa_cleanup(g);
430
+ }
431
+
432
+ /********************
433
+ * Tag manipulation *
434
+ ********************/
435
+
436
+ static inline int gfa_aux_type2size(int x)
437
+ {
438
+ if (x == 'C' || x == 'c' || x == 'A') return 1;
439
+ else if (x == 'S' || x == 's') return 2;
440
+ else if (x == 'I' || x == 'i' || x == 'f') return 4;
441
+ else return 0;
442
+ }
443
+
444
+ #define __skip_tag(s) do { \
445
+ int type = *(s); \
446
+ ++(s); \
447
+ if (type == 'Z') { while (*(s)) ++(s); ++(s); } \
448
+ else if (type == 'B') (s) += 5 + gfa_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \
449
+ else (s) += gfa_aux_type2size(type); \
450
+ } while(0)
451
+
452
+ uint8_t *gfa_aux_get(int l_data, const uint8_t *data, const char tag[2])
453
+ {
454
+ const uint8_t *s = data;
455
+ int y = tag[0]<<8 | tag[1];
456
+ while (s < data + l_data) {
457
+ int x = (int)s[0]<<8 | s[1];
458
+ s += 2;
459
+ if (x == y) return (uint8_t*)s;
460
+ __skip_tag(s);
461
+ }
462
+ return 0;
463
+ }
464
+
465
+ // s MUST BE returned by gfa_aux_get()
466
+ int gfa_aux_del(int l_data, uint8_t *data, uint8_t *s)
467
+ {
468
+ uint8_t *p;
469
+ p = s - 2;
470
+ __skip_tag(s);
471
+ memmove(p, s, l_data - (s - data));
472
+ return l_data - (s - p);
473
+ }
474
+
475
+ void gfa_aux_update_f(gfa_aux_t *a, const char tag[2], float x)
476
+ {
477
+ uint8_t *p = 0;
478
+ if (a->l_aux > 0)
479
+ p = gfa_aux_get(a->l_aux, a->aux, "cv");
480
+ if (p) {
481
+ memcpy(p + 1, &x, 4);
482
+ } else {
483
+ kstring_t str;
484
+ str.l = a->l_aux, str.m = a->m_aux, str.s = (char*)a->aux;
485
+ ks_resize(&str, str.l + 7);
486
+ kputsn_(tag, 2, &str);
487
+ kputc_('f', &str);
488
+ kputsn_(&x, 4, &str);
489
+ a->l_aux = str.l, a->m_aux = str.m, a->aux = (uint8_t*)str.s;
490
+ }
491
+ }
492
+
493
+ void gfa_aux_update_cv(gfa_t *g, const char *tag, const double *cov_seg, const double *cov_link)
494
+ {
495
+ int64_t i;
496
+ if (cov_seg)
497
+ for (i = 0; i < g->n_seg; ++i)
498
+ gfa_aux_update_f(&g->seg[i].aux, tag, cov_seg[i]);
499
+ if (cov_link)
500
+ for (i = 0; i < g->n_arc; ++i)
501
+ if (g->arc[i].comp == 0)
502
+ gfa_aux_update_f(&g->link_aux[g->arc[i].link_id], tag, cov_link[i]);
503
+ }
504
+
505
+ /*********************
506
+ * Translation table *
507
+ *********************/
508
+
509
+ unsigned char gfa_comp_table[256] = {
510
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
511
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
512
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
513
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
514
+ 64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
515
+ 'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z', 91, 92, 93, 94, 95,
516
+ 96, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
517
+ 'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127,
518
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
519
+ 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
520
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
521
+ 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
522
+ 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
523
+ 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
524
+ 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
525
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
526
+ };