ruby-minigraph 0.0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,526 @@
1
+ #include <stdlib.h>
2
+ #include <assert.h>
3
+ #include <ctype.h>
4
+ #include "gfa-priv.h"
5
+ #include "kstring.h"
6
+
7
+ #include "khashl.h"
8
+ KHASHL_MAP_INIT(KH_LOCAL, h_s2i_t, h_s2i, kh_cstr_t, uint32_t, kh_hash_str, kh_eq_str)
9
+
10
+ #include "ksort.h"
11
+ #define gfa_arc_key(a) ((a).v_lv)
12
+ KRADIX_SORT_INIT(arc, gfa_arc_t, gfa_arc_key, 8)
13
+
14
+ #define generic_key(x) (x)
15
+ KRADIX_SORT_INIT(gfa64, uint64_t, generic_key, 8)
16
+
17
+ int gfa_verbose = 2;
18
+
19
+ gfa_t *gfa_init(void)
20
+ {
21
+ gfa_t *g;
22
+ g = (gfa_t*)calloc(1, sizeof(gfa_t));
23
+ g->h_names = h_s2i_init();
24
+ g->h_snames = h_s2i_init();
25
+ return g;
26
+ }
27
+
28
+ void gfa_destroy(gfa_t *g)
29
+ {
30
+ uint32_t i, j;
31
+ uint64_t k;
32
+ if (g == 0) return;
33
+ h_s2i_destroy((h_s2i_t*)g->h_names);
34
+ for (i = 0; i < g->n_seg; ++i) {
35
+ gfa_seg_t *s = &g->seg[i];
36
+ free(s->name);
37
+ free(s->seq);
38
+ free(s->aux.aux);
39
+ if (s->utg) {
40
+ for (j = 0; j < s->utg->n; ++j)
41
+ free(s->utg->name[j]);
42
+ free(s->utg->name);
43
+ free(s->utg->a);
44
+ free(s->utg);
45
+ }
46
+ }
47
+ for (i = 0; i < g->n_sseq; ++i) free(g->sseq[i].name);
48
+ h_s2i_destroy((h_s2i_t*)g->h_snames);
49
+ if (g->link_aux)
50
+ for (k = 0; k < g->n_arc; ++k)
51
+ free(g->link_aux[k].aux);
52
+ free(g->idx); free(g->seg); free(g->arc); free(g->link_aux); free(g->sseq);
53
+ free(g);
54
+ }
55
+
56
+ char *gfa_strdup(const char *src)
57
+ {
58
+ int32_t len;
59
+ char *dst;
60
+ len = strlen(src);
61
+ GFA_MALLOC(dst, len + 1);
62
+ memcpy(dst, src, len + 1);
63
+ return dst;
64
+ }
65
+
66
+ char *gfa_strndup(const char *src, size_t n)
67
+ {
68
+ char *dst;
69
+ GFA_MALLOC(dst, n + 1);
70
+ strncpy(dst, src, n);
71
+ dst[n] = 0;
72
+ return dst;
73
+ }
74
+
75
+ int32_t gfa_add_seg(gfa_t *g, const char *name)
76
+ {
77
+ khint_t k;
78
+ int absent;
79
+ h_s2i_t *h = (h_s2i_t*)g->h_names;
80
+ k = h_s2i_put(h, name, &absent);
81
+ if (absent) {
82
+ gfa_seg_t *s;
83
+ if (g->n_seg == g->m_seg) {
84
+ uint32_t old_m = g->m_seg;
85
+ g->m_seg = g->m_seg? g->m_seg<<1 : 16;
86
+ g->seg = (gfa_seg_t*)realloc(g->seg, g->m_seg * sizeof(gfa_seg_t));
87
+ memset(&g->seg[old_m], 0, (g->m_seg - old_m) * sizeof(gfa_seg_t));
88
+ }
89
+ s = &g->seg[g->n_seg++];
90
+ kh_key(h, k) = s->name = gfa_strdup(name);
91
+ s->del = s->len = 0;
92
+ s->snid = s->soff = s->rank = -1;
93
+ kh_val(h, k) = g->n_seg - 1;
94
+ }
95
+ return kh_val(h, k);
96
+ }
97
+
98
+ int32_t gfa_sseq_add(gfa_t *g, const char *sname)
99
+ {
100
+ h_s2i_t *h = (h_s2i_t*)g->h_snames;
101
+ khint_t k;
102
+ int absent;
103
+ k = h_s2i_put(h, sname, &absent);
104
+ if (absent) {
105
+ gfa_sseq_t *ss;
106
+ if (g->n_sseq == g->m_sseq) GFA_EXPAND(g->sseq, g->m_sseq);
107
+ ss = &g->sseq[g->n_sseq++];
108
+ kh_val(h, k) = g->n_sseq - 1;
109
+ kh_key(h, k) = ss->name = gfa_strdup(sname);
110
+ ss->min = -1, ss->max = -1, ss->rank = -1;
111
+ }
112
+ return kh_val(h, k);
113
+ }
114
+
115
+ int32_t gfa_sseq_get(const gfa_t *g, const char *sname)
116
+ {
117
+ h_s2i_t *h = (h_s2i_t*)g->h_snames;
118
+ khint_t k;
119
+ k = h_s2i_get(h, sname);
120
+ return k == kh_end(h)? -1 : kh_val(h, k);
121
+ }
122
+
123
+ void gfa_sseq_update(gfa_t *g, const gfa_seg_t *s)
124
+ {
125
+ gfa_sseq_t *ps;
126
+ if (s->snid < 0 || s->snid >= g->n_sseq) return;
127
+ ps = &g->sseq[s->snid];
128
+ if (ps->min < 0 || s->soff < ps->min) ps->min = s->soff;
129
+ if (ps->max < 0 || s->soff + s->len > ps->max) ps->max = s->soff + s->len;
130
+ if (ps->rank < 0) ps->rank = s->rank;
131
+ else if (ps->rank != s->rank) {
132
+ if (gfa_verbose >= 2)
133
+ fprintf(stderr, "[W] stable sequence '%s' associated with different ranks on segment '%s': %d != %d\n", ps->name, s->name, ps->rank, s->rank);
134
+ }
135
+ }
136
+
137
+ int32_t gfa_name2id(const gfa_t *g, const char *name)
138
+ {
139
+ h_s2i_t *h = (h_s2i_t*)g->h_names;
140
+ khint_t k;
141
+ k = h_s2i_get(h, name);
142
+ return k == kh_end(h)? -1 : kh_val(h, k);
143
+ }
144
+
145
+ gfa_arc_t *gfa_add_arc1(gfa_t *g, uint32_t v, uint32_t w, int32_t ov, int32_t ow, int64_t link_id, int comp)
146
+ {
147
+ gfa_arc_t *a;
148
+ if (g->m_arc == g->n_arc) {
149
+ uint64_t old_m = g->m_arc;
150
+ g->m_arc = g->m_arc? g->m_arc<<1 : 16;
151
+ g->arc = (gfa_arc_t*)realloc(g->arc, g->m_arc * sizeof(gfa_arc_t));
152
+ memset(&g->arc[old_m], 0, (g->m_arc - old_m) * sizeof(gfa_arc_t));
153
+ g->link_aux = (gfa_aux_t*)realloc(g->link_aux, g->m_arc * sizeof(gfa_aux_t));
154
+ memset(&g->link_aux[old_m], 0, (g->m_arc - old_m) * sizeof(gfa_aux_t));
155
+ }
156
+ a = &g->arc[g->n_arc++];
157
+ a->v_lv = (uint64_t)v << 32;
158
+ a->w = w, a->ov = ov, a->ow = ow, a->rank = -1;
159
+ a->link_id = link_id >= 0? link_id : g->n_arc - 1;
160
+ if (link_id >= 0) a->rank = g->arc[link_id].rank; // TODO: this is not always correct!
161
+ a->del = a->strong = 0;
162
+ a->comp = comp;
163
+ return a;
164
+ }
165
+
166
+ int gfa_arc_is_sorted(const gfa_t *g)
167
+ {
168
+ uint64_t e;
169
+ for (e = 1; e < g->n_arc; ++e)
170
+ if (g->arc[e-1].v_lv > g->arc[e].v_lv)
171
+ break;
172
+ return (e == g->n_arc);
173
+ }
174
+
175
+ void gfa_arc_sort(gfa_t *g)
176
+ {
177
+ radix_sort_arc(g->arc, g->arc + g->n_arc);
178
+ }
179
+
180
+ uint64_t *gfa_arc_index_core(size_t max_seq, size_t n, const gfa_arc_t *a)
181
+ {
182
+ size_t i, last;
183
+ uint64_t *idx;
184
+ idx = (uint64_t*)calloc(max_seq * 2, 8);
185
+ for (i = 1, last = 0; i <= n; ++i)
186
+ if (i == n || gfa_arc_head(a[i-1]) != gfa_arc_head(a[i]))
187
+ idx[gfa_arc_head(a[i-1])] = (uint64_t)last<<32 | (i - last), last = i;
188
+ return idx;
189
+ }
190
+
191
+ void gfa_arc_index(gfa_t *g)
192
+ {
193
+ if (g->idx) free(g->idx);
194
+ g->idx = gfa_arc_index_core(g->n_seg, g->n_arc, g->arc);
195
+ }
196
+
197
+ /********************
198
+ * Fix graph issues *
199
+ ********************/
200
+
201
+ uint32_t gfa_fix_no_seg(gfa_t *g)
202
+ {
203
+ uint32_t i, n_err = 0;
204
+ for (i = 0; i < g->n_seg; ++i) {
205
+ gfa_seg_t *s = &g->seg[i];
206
+ if (s->len == 0) {
207
+ ++n_err, s->del = 1;
208
+ if (gfa_verbose >= 2)
209
+ fprintf(stderr, "[W] segment '%s' is used on an L-line but not defined on an S-line\n", s->name);
210
+ }
211
+ }
212
+ return n_err;
213
+ }
214
+
215
+ void gfa_fix_arc_len(gfa_t *g)
216
+ {
217
+ uint64_t k;
218
+ for (k = 0; k < g->n_arc; ++k) {
219
+ gfa_arc_t *a = &g->arc[k];
220
+ uint32_t v = gfa_arc_head(*a), w = gfa_arc_tail(*a);
221
+ const gfa_seg_t *sv = &g->seg[v>>1];
222
+ if (!sv->del && sv->len < a->ov) {
223
+ if (gfa_verbose >= 2)
224
+ fprintf(stderr, "[W] overlap length longer than segment length for '%s': %d > %d\n", sv->name, a->ov, sv->len);
225
+ a->ov = sv->len;
226
+ }
227
+ if (sv->del || g->seg[w>>1].del) {
228
+ a->del = 1;
229
+ } else {
230
+ a->v_lv |= sv->len - a->ov;
231
+ }
232
+ }
233
+ }
234
+
235
+ uint32_t gfa_fix_semi_arc(gfa_t *g)
236
+ {
237
+ uint32_t n_err = 0, v, n_vtx = gfa_n_vtx(g);
238
+ int i, j;
239
+ for (v = 0; v < n_vtx; ++v) {
240
+ int nv = gfa_arc_n(g, v);
241
+ gfa_arc_t *av = gfa_arc_a(g, v);
242
+ for (i = 0; i < nv; ++i) {
243
+ if (!av[i].del && (av[i].ow == INT32_MAX || av[i].ov == INT32_MAX)) { // overlap length is missing
244
+ uint32_t w = av[i].w^1;
245
+ int is_multi = 0, c, jv = -1, nw = gfa_arc_n(g, w);
246
+ gfa_arc_t *aw = gfa_arc_a(g, w);
247
+ for (j = 0, c = 0; j < nw; ++j)
248
+ if (!aw[j].del && aw[j].w == (v^1)) ++c, jv = j;
249
+ if (c == 1) {
250
+ if (av[i].ov != INT32_MAX && aw[jv].ow != INT32_MAX && av[i].ov != aw[jv].ow) is_multi = 1;
251
+ if (av[i].ow != INT32_MAX && aw[jv].ov != INT32_MAX && av[i].ow != aw[jv].ov) is_multi = 1;
252
+ }
253
+ if (c == 1 && !is_multi) {
254
+ if (aw[jv].ov != INT32_MAX) av[i].ow = aw[jv].ov;
255
+ if (aw[jv].ow != INT32_MAX) av[i].ov = aw[jv].ow;
256
+ } else {
257
+ if (gfa_verbose >= 2)
258
+ fprintf(stderr, "[W] can't infer overlap length for %s%c -> %s%c\n",
259
+ g->seg[v>>1].name, "+-"[v&1], g->seg[w>>1].name, "+-"[(w^1)&1]);
260
+ ++n_err;
261
+ av[i].del = 1;
262
+ }
263
+ }
264
+ }
265
+ }
266
+ return n_err;
267
+ }
268
+
269
+ uint32_t gfa_fix_symm_add(gfa_t *g)
270
+ {
271
+ uint32_t n_err = 0, v, n_vtx = gfa_n_vtx(g);
272
+ int i;
273
+ for (v = 0; v < n_vtx; ++v) {
274
+ int nv = gfa_arc_n(g, v);
275
+ gfa_arc_t *av = gfa_arc_a(g, v);
276
+ for (i = 0; i < nv; ++i) {
277
+ int j, nw;
278
+ gfa_arc_t *aw, *avi = &av[i];
279
+ if (avi->del || avi->comp) continue;
280
+ nw = gfa_arc_n(g, avi->w^1);
281
+ aw = gfa_arc_a(g, avi->w^1);
282
+ for (j = 0; j < nw; ++j) {
283
+ gfa_arc_t *awj = &aw[j];
284
+ if (awj->del || awj->comp) continue;
285
+ if (awj->w == (v^1) && awj->ov == avi->ow && awj->ow == avi->ov) { // complement found
286
+ awj->comp = 1;
287
+ awj->link_id = avi->link_id;
288
+ break;
289
+ }
290
+ }
291
+ if (j == nw) {
292
+ gfa_arc_t *arc_old = g->arc, *arc_new;
293
+ arc_new = gfa_add_arc1(g, avi->w^1, v^1, avi->ow, avi->ov, avi->link_id, 1);
294
+ if (arc_old != g->arc) av = gfa_arc_a(g, v); // g->arc may be reallocated
295
+ arc_new->rank = av[i].rank;
296
+ }
297
+ }
298
+ }
299
+ if (n_vtx < gfa_n_vtx(g)) {
300
+ gfa_arc_sort(g);
301
+ gfa_arc_index(g);
302
+ }
303
+ return n_err;
304
+ }
305
+
306
+ void gfa_arc_rm(gfa_t *g)
307
+ {
308
+ uint32_t e, n;
309
+ for (e = n = 0; e < g->n_arc; ++e) {
310
+ uint32_t u = g->arc[e].v_lv>>32, v = g->arc[e].w;
311
+ if (!g->arc[e].del && !g->seg[u>>1].del && !g->seg[v>>1].del)
312
+ g->arc[n++] = g->arc[e];
313
+ else {
314
+ gfa_aux_t *aux = g->arc[e].link_id < g->n_arc? &g->link_aux[g->arc[e].link_id] : 0;
315
+ if (aux) {
316
+ free(aux->aux);
317
+ aux->aux = 0, aux->l_aux = aux->m_aux = 0;
318
+ }
319
+ }
320
+ }
321
+ if (n < g->n_arc) { // arc index is out of sync
322
+ if (g->idx) free(g->idx);
323
+ g->idx = 0;
324
+ }
325
+ g->n_arc = n;
326
+ }
327
+
328
+ void gfa_cleanup(gfa_t *g)
329
+ {
330
+ gfa_arc_rm(g);
331
+ if (!gfa_arc_is_sorted(g)) {
332
+ gfa_arc_sort(g);
333
+ if (g->idx) free(g->idx);
334
+ g->idx = 0;
335
+ }
336
+ if (g->idx == 0) gfa_arc_index(g);
337
+ }
338
+
339
+ int32_t gfa_check_multi(const gfa_t *g)
340
+ {
341
+ uint32_t v, n_vtx = gfa_n_vtx(g);
342
+ int32_t max_nv = -1, n_multi = 0;
343
+ uint64_t *buf; // actually, uint32_t is enough
344
+ for (v = 0; v < n_vtx; ++v) {
345
+ int32_t nv = gfa_arc_n(g, v);
346
+ max_nv = max_nv > nv? max_nv : nv;
347
+ }
348
+ if (max_nv == 1 || max_nv < 0) return 0;
349
+ GFA_MALLOC(buf, max_nv);
350
+ for (v = 0; v < n_vtx; ++v) {
351
+ int32_t i, s, nv = gfa_arc_n(g, v);
352
+ const gfa_arc_t *av = gfa_arc_a(g, v);
353
+ for (i = 0; i < nv; ++i) buf[i] = av[i].w;
354
+ radix_sort_gfa64(buf, buf + nv);
355
+ for (s = 0, i = 1; i <= nv; ++i)
356
+ if (i == nv || buf[i] != buf[s])
357
+ n_multi += i - s - 1, s = i;
358
+ }
359
+ free(buf);
360
+ return n_multi;
361
+ }
362
+
363
+ uint32_t gfa_fix_multi(gfa_t *g)
364
+ {
365
+ uint32_t v, n_vtx = gfa_n_vtx(g), n_rm = 0;
366
+ int32_t max_nv = -1;
367
+ uint64_t *buf; // actually, uint32_t is enough
368
+ for (v = 0; v < n_vtx; ++v) {
369
+ int32_t nv = gfa_arc_n(g, v);
370
+ max_nv = max_nv > nv? max_nv : nv;
371
+ }
372
+ if (max_nv == 1) return 0;
373
+ GFA_MALLOC(buf, max_nv);
374
+ for (v = 0; v < n_vtx; ++v) {
375
+ int32_t i, j, s, nv = gfa_arc_n(g, v), nb;
376
+ gfa_arc_t *av = gfa_arc_a(g, v);
377
+ for (i = j = 0; i < nv; ++i)
378
+ if (!av[i].del) buf[j++] = (uint64_t)av[i].w<<32 | i;
379
+ nb = j;
380
+ if (nb < 1) continue;
381
+ radix_sort_gfa64(buf, buf + nb);
382
+ for (s = 0, i = 1; i <= nb; ++i) {
383
+ if (i == nv || buf[i]>>32 != buf[s]>>32) {
384
+ if (i - s > 1) {
385
+ int32_t k = (int32_t)buf[s], min_rank = av[k].rank; // prefer longest overlap
386
+ for (j = s + 1; j < i; ++j) { // rank has higher priority
387
+ int32_t t = (int32_t)buf[j];
388
+ if (av[t].rank >= 0 && av[t].rank < min_rank)
389
+ min_rank = av[t].rank, k = t;
390
+ }
391
+ if (av[k].w == (v^1)) { // a weird loop
392
+ if (gfa_verbose >= 2)
393
+ fprintf(stderr, "[W::%s] can't fix multiple edges due to '>v -- <v' involving segment %s\n", __func__, g->seg[v>>1].name);
394
+ } else {
395
+ int32_t nw = gfa_arc_n(g, av[k].w^1), n_wdel;
396
+ gfa_arc_t *aw = gfa_arc_a(g, av[k].w^1);
397
+ uint64_t link_id = av[k].link_id;
398
+ n_rm += i - s - 1;
399
+ for (j = s + 1; j < i; ++j)
400
+ av[(int32_t)buf[j]].del = 1;
401
+ for (j = 0, n_wdel = 0; j < nw; ++j)
402
+ if (aw[j].w == (v^1) && aw[j].link_id != link_id)
403
+ aw[j].del = 1, ++n_wdel;
404
+ assert(n_wdel == i - s - 1);
405
+ }
406
+ }
407
+ s = i;
408
+ }
409
+ }
410
+ }
411
+ free(buf);
412
+ if (n_rm > 0) {
413
+ if (gfa_verbose >= 2)
414
+ fprintf(stderr, "[W::%s] removed %d multiple link(s)\n", __func__, n_rm);
415
+ gfa_arc_rm(g);
416
+ gfa_arc_index(g);
417
+ }
418
+ return n_rm;
419
+ }
420
+
421
+ void gfa_finalize(gfa_t *g)
422
+ {
423
+ gfa_fix_no_seg(g);
424
+ gfa_arc_sort(g);
425
+ gfa_arc_index(g);
426
+ gfa_fix_semi_arc(g);
427
+ gfa_fix_symm_add(g);
428
+ gfa_fix_arc_len(g);
429
+ gfa_cleanup(g);
430
+ }
431
+
432
+ /********************
433
+ * Tag manipulation *
434
+ ********************/
435
+
436
+ static inline int gfa_aux_type2size(int x)
437
+ {
438
+ if (x == 'C' || x == 'c' || x == 'A') return 1;
439
+ else if (x == 'S' || x == 's') return 2;
440
+ else if (x == 'I' || x == 'i' || x == 'f') return 4;
441
+ else return 0;
442
+ }
443
+
444
+ #define __skip_tag(s) do { \
445
+ int type = *(s); \
446
+ ++(s); \
447
+ if (type == 'Z') { while (*(s)) ++(s); ++(s); } \
448
+ else if (type == 'B') (s) += 5 + gfa_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \
449
+ else (s) += gfa_aux_type2size(type); \
450
+ } while(0)
451
+
452
+ uint8_t *gfa_aux_get(int l_data, const uint8_t *data, const char tag[2])
453
+ {
454
+ const uint8_t *s = data;
455
+ int y = tag[0]<<8 | tag[1];
456
+ while (s < data + l_data) {
457
+ int x = (int)s[0]<<8 | s[1];
458
+ s += 2;
459
+ if (x == y) return (uint8_t*)s;
460
+ __skip_tag(s);
461
+ }
462
+ return 0;
463
+ }
464
+
465
+ // s MUST BE returned by gfa_aux_get()
466
+ int gfa_aux_del(int l_data, uint8_t *data, uint8_t *s)
467
+ {
468
+ uint8_t *p;
469
+ p = s - 2;
470
+ __skip_tag(s);
471
+ memmove(p, s, l_data - (s - data));
472
+ return l_data - (s - p);
473
+ }
474
+
475
+ void gfa_aux_update_f(gfa_aux_t *a, const char tag[2], float x)
476
+ {
477
+ uint8_t *p = 0;
478
+ if (a->l_aux > 0)
479
+ p = gfa_aux_get(a->l_aux, a->aux, "cv");
480
+ if (p) {
481
+ memcpy(p + 1, &x, 4);
482
+ } else {
483
+ kstring_t str;
484
+ str.l = a->l_aux, str.m = a->m_aux, str.s = (char*)a->aux;
485
+ ks_resize(&str, str.l + 7);
486
+ kputsn_(tag, 2, &str);
487
+ kputc_('f', &str);
488
+ kputsn_(&x, 4, &str);
489
+ a->l_aux = str.l, a->m_aux = str.m, a->aux = (uint8_t*)str.s;
490
+ }
491
+ }
492
+
493
+ void gfa_aux_update_cv(gfa_t *g, const char *tag, const double *cov_seg, const double *cov_link)
494
+ {
495
+ int64_t i;
496
+ if (cov_seg)
497
+ for (i = 0; i < g->n_seg; ++i)
498
+ gfa_aux_update_f(&g->seg[i].aux, tag, cov_seg[i]);
499
+ if (cov_link)
500
+ for (i = 0; i < g->n_arc; ++i)
501
+ if (g->arc[i].comp == 0)
502
+ gfa_aux_update_f(&g->link_aux[g->arc[i].link_id], tag, cov_link[i]);
503
+ }
504
+
505
+ /*********************
506
+ * Translation table *
507
+ *********************/
508
+
509
+ unsigned char gfa_comp_table[256] = {
510
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
511
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
512
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
513
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
514
+ 64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
515
+ 'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z', 91, 92, 93, 94, 95,
516
+ 96, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
517
+ 'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127,
518
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
519
+ 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
520
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
521
+ 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
522
+ 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
523
+ 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
524
+ 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
525
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
526
+ };