ruby-minigraph 0.0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,1451 @@
1
+ #!/usr/bin/env k8
2
+
3
+ /*******************************
4
+ * Command line option parsing *
5
+ *******************************/
6
+
7
+ var getopt = function(args, ostr) {
8
+ var oli; // option letter list index
9
+ if (typeof(getopt.place) == 'undefined')
10
+ getopt.ind = 0, getopt.arg = null, getopt.place = -1;
11
+ if (getopt.place == -1) { // update scanning pointer
12
+ if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
13
+ getopt.place = -1;
14
+ return null;
15
+ }
16
+ if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
17
+ ++getopt.ind;
18
+ getopt.place = -1;
19
+ return null;
20
+ }
21
+ }
22
+ var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
23
+ if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
24
+ if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
25
+ if (getopt.place < 0) ++getopt.ind;
26
+ return '?';
27
+ }
28
+ if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
29
+ getopt.arg = null;
30
+ if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
31
+ } else { // need an argument
32
+ if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
33
+ getopt.arg = args[getopt.ind].substr(getopt.place);
34
+ else if (args.length <= ++getopt.ind) { // no arg
35
+ getopt.place = -1;
36
+ if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
37
+ return '?';
38
+ } else getopt.arg = args[getopt.ind]; // white space
39
+ getopt.place = -1;
40
+ ++getopt.ind;
41
+ }
42
+ return optopt;
43
+ }
44
+
45
+ function it_index(a) {
46
+ if (a.length == 0) return -1;
47
+ a.sort(function(x, y) { return x[0] - y[0] });
48
+ var last, last_i;
49
+ for (var i = 0; i < a.length; i += 2) last = a[i][2] = a[i][1], last_i = i;
50
+ for (var k = 1; 1<<k <= a.length; ++k) {
51
+ var i0 = (1<<k) - 1, step = 1<<(k+1);
52
+ for (var i = i0; i < a.length; i += step) {
53
+ var x = 1<<(k-1);
54
+ a[i][2] = a[i][1];
55
+ if (a[i][2] < a[i-x][2]) a[i][2] = a[i-x][2];
56
+ var e = i + x < a.length? a[i+x][2] : last;
57
+ if (a[i][2] < e) a[i][2] = e;
58
+ }
59
+ last_i = last_i>>k&1? last_i - (1<<(k-1)) : last_i + (1<<(k-1));
60
+ if (last_i < a.length) last = last > a[last_i][2]? last : a[last_i][2];
61
+ }
62
+ return k - 1;
63
+ }
64
+
65
+ function it_overlap(a, st, en) {
66
+ if (a == null) return [];
67
+ var h, stack = [], b = [];
68
+ for (h = 0; 1<<h <= a.length; ++h);
69
+ --h;
70
+ stack.push([(1<<h) - 1, h, 0]);
71
+ while (stack.length) {
72
+ var t = stack.pop();
73
+ var x = t[0], h = t[1], w = t[2];
74
+ if (h <= 2) {
75
+ var i0 = x >> h << h, i1 = i0 + (1<<(h+1)) - 1;
76
+ if (i1 >= a.length) i1 = a.length;
77
+ for (var i = i0; i < i1; ++i)
78
+ if (a[i][0] < en && st < a[i][1])
79
+ b.push(a[i]);
80
+ } else if (w == 0) { // if left child not processed
81
+ stack.push([x, h, 1]);
82
+ var y = x - (1<<(h-1));
83
+ if (y >= a.length || a[y][2] > st)
84
+ stack.push([y, h - 1, 0]);
85
+ } else if (x < a.length && a[x][0] < en) {
86
+ if (st < a[x][1]) b.push(a[x]);
87
+ stack.push([x + (1<<(h-1)), h - 1, 0]);
88
+ }
89
+ }
90
+ return b;
91
+ }
92
+
93
+ function it_contained(a, st, en) {
94
+ if (a == null) return false;
95
+ var b = it_overlap(a, st, en);
96
+ var c = false;
97
+ for (var i = 0; i < b.length; ++i) {
98
+ if (b[i][0] <= st && en <= b[i][1])
99
+ c = true;
100
+ }
101
+ return c;
102
+ }
103
+
104
+ /****************************
105
+ ***** mgutils commands *****
106
+ ****************************/
107
+
108
+ function mg_cmd_renamefa(args)
109
+ {
110
+ var c, sep = '#';
111
+ while ((c = getopt(args, "d:")) != null)
112
+ if (c == 'd') sep = getopt.arg;
113
+ if (args.length - getopt.ind < 2) {
114
+ print("Usage: mgutils.js renamefa [-d delimitor] <prefix> <in.fa>");
115
+ return;
116
+ }
117
+ var prefix = args[getopt.ind];
118
+ var file = new File(args[getopt.ind+1]);
119
+ var buf = new Bytes();
120
+ while (file.readline(buf) >= 0) {
121
+ if (buf[0] != 62) {
122
+ print(buf);
123
+ } else {
124
+ var m, s = buf.toString();
125
+ if ((m = /^>(.*)/.exec(s)) != null) {
126
+ var name = m[1].replace(/^\S+#/, "");
127
+ print(">" + prefix + sep + name);
128
+ } else throw Error("Wrong FASTA format!");
129
+ }
130
+ }
131
+ file.close();
132
+ buf.destroy();
133
+ }
134
+
135
+ function mg_cmd_joinfa(args)
136
+ {
137
+ var c, len_n = 20, min_len = 150, name = "decoy-cat";
138
+ while ((c = getopt(args, "n:l:s:")) != null) {
139
+ if (c == 'l') min_len = parseInt(getopt.arg);
140
+ else if (c == 'n') len_n = parseInt(getopt.arg);
141
+ else if (c == 's') name = getopt.arg;
142
+ }
143
+ if (args.length - getopt.ind < 1) {
144
+ print("Usage: mgutils.js joinfa [options] <in.fa>");
145
+ return;
146
+ }
147
+ var seq = new Bytes(), seq1 = new Bytes(), lineno = 0, nn = new Bytes();
148
+ for (var i = 0; i < len_n; ++i) nn.set(78);
149
+ var buf = new Bytes();
150
+ var file = new File(args[getopt.ind]);
151
+ while (file.readline(buf) >= 0) {
152
+ ++lineno;
153
+ if (buf[0] == 62) {
154
+ if (seq1.length >= min_len) {
155
+ if (seq.length > 0) seq.set(nn);
156
+ seq.set(seq1);
157
+ }
158
+ seq1.length = 0;
159
+ } else seq1.set(buf);
160
+ }
161
+ if (seq1.length >= min_len) {
162
+ if (seq.length > 0) seq.set(nn);
163
+ seq.set(seq1);
164
+ }
165
+ print(">" + name);
166
+ print(seq);
167
+ file.close();
168
+ buf.destroy();
169
+ seq.destroy();
170
+ seq1.destroy();
171
+ }
172
+
173
+ function mg_cmd_anno(args)
174
+ {
175
+ var c, min_rm_div = 0.2, min_rm_sc = 300, micro_cap = 6, min_feat_len = 30, min_centro_len = 200, mobile = false, max_mobile_div = 2.0, min_segdup_frac = 0.2;
176
+ var fn_rmout = null, fn_etrf = null, fn_dust = null, fn_gap = null, fn_paf = null, fn_centro = null, fn_bb = null, fn_sd = null;
177
+ while ((c = getopt(args, "e:p:g:d:r:c:l:S:b:s:m")) != null) {
178
+ if (c == 'l') min_feat_len = parseInt(getopt.arg);
179
+ else if (c == 'S') min_segdup_frac = parseFloat(getopt.arg);
180
+ else if (c == 'm') mobile = true;
181
+ else if (c == 'e') fn_etrf = getopt.arg;
182
+ else if (c == 'p') fn_paf = getopt.arg;
183
+ else if (c == 'g') fn_gap = getopt.arg;
184
+ else if (c == 'd') fn_dust = getopt.arg;
185
+ else if (c == 'r') fn_rmout = getopt.arg;
186
+ else if (c == 'c') fn_centro = getopt.arg;
187
+ else if (c == 'b') fn_bb = getopt.arg;
188
+ else if (c == 's') fn_sd = getopt.arg;
189
+ }
190
+
191
+ if (args.length - getopt.ind < 1) {
192
+ print("Usage: anno.js [options] <in.bed>");
193
+ print("Options:");
194
+ print(" -l INT min feature length [" + min_feat_len + "]");
195
+ print(" -S FLOAT min segdup length [" + min_segdup_frac + "]");
196
+ print(" -r FILE RepeatMasker .out [null]");
197
+ print(" -g FILE seqtk gap output for stretches of Ns [null]");
198
+ print(" -d FILE minimap2/sdust output for LCRs [null]");
199
+ print(" -e FILE etrf output [null]");
200
+ print(" -p FILE PAF alignment against reference [null]");
201
+ print(" -c FILE dna-brnn centromere results [null]");
202
+ print(" -b FILE bubble file [null]");
203
+ print(" -s FILE segdup file (paste gfa2bed bedcov) [null]");
204
+ print(" -m annotate AluY and L1HS separately");
205
+ exit(1);
206
+ }
207
+
208
+ var file, buf = new Bytes();
209
+
210
+ var bb = {}, bba = [], seg = {};
211
+
212
+ file = new File(args[getopt.ind]);
213
+ while (file.readline(buf) >= 0) {
214
+ var t = buf.toString().split("\t");
215
+ if (t.length < 4) continue;
216
+ var key = t[0] + "_" + t[1] + "_" + t[2];
217
+ var len = parseInt(t[3]);
218
+ if (len < parseInt(t[2]) - parseInt(t[1]))
219
+ throw Error("ERROR: event length smaller than interval length");
220
+ bb[key] = [len, {}];
221
+ bba.push(key);
222
+ }
223
+ file.close();
224
+
225
+ if (fn_bb) {
226
+ if (fn_sd) { // generated by "paste <(gfatools gfa2bed) <(bedtk cov segdup.bed gfa2bed.bed) | cut -f1-5,9,10"
227
+ file = new File(fn_sd);
228
+ while (file.readline(buf) >= 0) {
229
+ var t = buf.toString().split("\t");
230
+ seg[t[3]] = [parseInt(t[4]), parseInt(t[2]) - parseInt(t[1]), parseInt(t[6])];
231
+ }
232
+ file.close();
233
+ }
234
+ file = new File(fn_bb); // parse "gfatools bubble" output
235
+ while (file.readline(buf) >= 0) {
236
+ var t = buf.toString().split("\t");
237
+ var key = t[0] + "_" + t[1] + "_" + t[2];
238
+ if (key in bb) {
239
+ bb[key].push(t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]);
240
+ var s = t[11].split(","), tot_len = 0, tot_sd = 0, ref_len = 0;
241
+ var dup = {};
242
+ for (var i = 1; i < s.length - 1; ++i) {
243
+ if (seg[s[i]] == null) continue;
244
+ if (dup[s[i]]) continue;
245
+ dup[s[i]] = 1;
246
+ tot_len += seg[s[i]][1], tot_sd += seg[s[i]][2];
247
+ if (seg[s[i]][0] == 0)
248
+ ref_len += seg[s[i]][1];
249
+ }
250
+ bb[key][7] = tot_len;
251
+ bb[key][8] = tot_sd;
252
+ bb[key][9] = ref_len;
253
+ }
254
+ }
255
+ file.close();
256
+ }
257
+
258
+ if (fn_rmout) { // parse RepeastMasker output
259
+ var motif0 = "GGAAT", motif_hash = {}, motif_mut_hash = {};
260
+ { // dealing with possible (GGAAT)n rotations and mutations
261
+ var comp_tbl = { 'A':'T', 'T':'A', 'C':'G', 'G':'C' };
262
+ var motif = [motif0], motif_alt = [];
263
+
264
+ // reverse complement
265
+ for (var i = 0; i < motif.length; ++i) {
266
+ var x = motif[i], y = "";
267
+ for (var j = x.length - 1; j >= 0; --j) {
268
+ y += comp_tbl[x[j]];
269
+ }
270
+ motif_alt.push(y);
271
+ }
272
+ for (var i = 0; i < motif_alt.length; ++i)
273
+ motif.push(motif_alt[i]);
274
+
275
+ // rotate
276
+ motif_alt = [];
277
+ for (var i = 0; i < motif.length; ++i) {
278
+ var x = motif[i];
279
+ for (var j = 1; j < x.length; ++j)
280
+ motif_alt.push(x.substr(j) + x.substr(0, j));
281
+ }
282
+ for (var i = 0; i < motif_alt.length; ++i)
283
+ motif.push(motif_alt[i]);
284
+
285
+ for (var i = 0; i < motif.length; ++i) motif_hash[motif[i]] = i;
286
+
287
+ // mutate
288
+ var bases = [ 'A', 'C', 'G', 'T' ];
289
+ for (var x in motif_hash) {
290
+ var y = x;
291
+ for (var i = 0; i < x.length; ++i) {
292
+ for (var j = 0; j < bases.length; ++j) {
293
+ var a = x.split("");
294
+ if (a[i] == bases[j]) continue;
295
+ a[i] = bases[j];
296
+ motif_mut_hash[a.join("")] = 1;
297
+ }
298
+ }
299
+ }
300
+ }
301
+
302
+ function process_rm_line(bb, lines) {
303
+ var h = {};
304
+ if (lines.length == 0) return;
305
+ var key = lines[0][4];
306
+ if (bb[key] == null) throw Error("ERROR: missing key: " + key);
307
+ var h = bb[key][1];
308
+ for (var i = 0; i < lines.length; ++i) {
309
+ var t = lines[i];
310
+ var st = parseInt(t[5]) - 1, en = parseInt(t[6]);
311
+ if (h[t[10]] == null) h[t[10]] = [];
312
+ h[t[10]].push([st, en]);
313
+ }
314
+ }
315
+
316
+ file = new File(fn_rmout);
317
+ var lines = [];
318
+ while (file.readline(buf) >= 0) {
319
+ var line = buf.toString();
320
+ var l2 = line.replace(/^\s+/, "");
321
+ var m4, t = l2.split(/\s+/);
322
+ if (t.length < 15) continue;
323
+ if (t[9] == "ALR/Alpha") t[10] = "alpha";
324
+ else if (t[9] == "HSATII") t[10] = "hsat2/3";
325
+ else if (/^LTR\/ERV/.test(t[10])) t[10] = 'LTR/ERV';
326
+ else if (/^LTR/.test(t[10])) t[10] = 'LTR/misc';
327
+ else if (/^DNA/.test(t[10])) t[10] = 'DNA/misc';
328
+ else if (/rRNA|scRNA|snRNA|srpRNA/.test(t[10])) t[10] = 'RNAmisc';
329
+ else if (/^LINE/.test(t[10]) && t[10] != "LINE/L1") t[10] = 'LINE/misc';
330
+ else if ((t[10] == "Simple_repeat" || t[10] == "Satellite") && ((m4 = /^\(([ACGT]+)\)n/.exec(t[9])) != null)) {
331
+ if (motif_hash[m4[1]] != null) {
332
+ t[10] = "hsat2/3";
333
+ } else if (m4[1].length % motif0.length == 0) {
334
+ var c = 0, c_mut = 0;
335
+ for (var j = 0; j < m4[1].length; j += motif0.length) {
336
+ var s = m4[1].substr(j, j + motif0.length);
337
+ if (motif_hash[s] != null)
338
+ ++c;
339
+ else if (motif_mut_hash[s] != null)
340
+ ++c_mut;
341
+ }
342
+ if (c > 0 && (c + c_mut) * motif0.length == m4[1].length)
343
+ t[10] = "hsat2/3";
344
+ }
345
+ }
346
+
347
+ if (mobile) {
348
+ if (t[10] == "LINE/L1" && t[9] == "L1HS" && parseFloat(t[1]) < max_mobile_div) t[10] = "LINE/L1HS";
349
+ if (t[10] == "SINE/Alu" && /^AluY/.test(t[9]) && parseFloat(t[1]) < max_mobile_div) t[10] = "SINE/AluY";
350
+ }
351
+ if (t[10] == 'Simple_repeat' || t[10] == 'Low_complexity') t[10] = 'LCR';
352
+ if (t[10] != 'LCR') {
353
+ // if (parseInt(t[0]) < min_rm_sc) continue;
354
+ // if (parseInt(t[1])/100 > min_rm_div) continue;
355
+ }
356
+ if (lines.length > 0 && lines[0][4] != t[4]) {
357
+ process_rm_line(bb, lines);
358
+ lines = [];
359
+ }
360
+ lines.push(t);
361
+ }
362
+ if (lines.length > 0) process_rm_line(bb, lines);
363
+ file.close();
364
+
365
+ for (var i = 0; i < bba.length; ++i) {
366
+ var h = bb[bba[i]][1], a = [], b = [], c_alu = [], c_l1 = [];
367
+ for (var key in h) {
368
+ if (/^(DNA|SINE|LINE|Retroposon|LTR)/.test(key))
369
+ for (var j = 0; j < h[key].length; ++j)
370
+ a.push(h[key][j]);
371
+ if (/^(Satellite|hsat2\/3|alpha)/.test(key))
372
+ for (var j = 0; j < h[key].length; ++j)
373
+ b.push(h[key][j]);
374
+ if (/^(SINE\/Alu)/.test(key))
375
+ for (var j = 0; j < h[key].length; ++j)
376
+ c_alu.push(h[key][j]);
377
+ if (/^(LINE\/L1)/.test(key))
378
+ for (var j = 0; j < h[key].length; ++j)
379
+ c_l1.push(h[key][j]);
380
+ }
381
+ if (a.length) h['_inter'] = a;
382
+ if (b.length) h['_sat'] = b;
383
+ if (c_alu.length) h['_alu'] = c_alu;
384
+ if (c_l1.length) h['_l1'] = c_l1;
385
+ }
386
+ }
387
+
388
+ if (fn_etrf) { // parse etrf output
389
+ file = new File(fn_etrf);
390
+ while (file.readline(buf) >= 0) {
391
+ var t = buf.toString().split("\t");
392
+ var l = parseInt(t[4]);
393
+ if (l == 1) continue;
394
+ var anno = l <= micro_cap? 'micro' : 'mini';
395
+ if (bb[t[0]][1][anno] == null)
396
+ bb[t[0]][1][anno] = [];
397
+ var st = parseInt(t[1]), en = parseInt(t[2]);
398
+ bb[t[0]][1][anno].push([st, en]);
399
+ if (bb[t[0]][1]['LCR'] == null)
400
+ bb[t[0]][1]['LCR'] = [];
401
+ bb[t[0]][1]['LCR'].push([st, en]);
402
+ }
403
+ file.close();
404
+ }
405
+
406
+ if (fn_dust) { // parse minimap2/sdust output
407
+ file = new File(fn_dust);
408
+ while (file.readline(buf) >= 0) {
409
+ var t = buf.toString().split("\t");
410
+ var anno = 'LCR';
411
+ if (bb[t[0]][1][anno] == null)
412
+ bb[t[0]][1][anno] = [];
413
+ bb[t[0]][1][anno].push([parseInt(t[1]), parseInt(t[2])]);
414
+ }
415
+ file.close();
416
+ }
417
+
418
+ if (fn_paf) { // parse bubble-to-reference PAF for self alignment
419
+ file = new File(fn_paf);
420
+ while (file.readline(buf) >= 0) {
421
+ var t = buf.toString().split("\t");
422
+ var anno = 'self';
423
+ if (bb[t[0]][1][anno] == null)
424
+ bb[t[0]][1][anno] = [];
425
+ bb[t[0]][1][anno].push([parseInt(t[2]), parseInt(t[3])]);
426
+ }
427
+ file.close();
428
+ }
429
+
430
+ if (fn_gap) { // parse assembly gaps, generated by "seqtk gap"
431
+ file = new File(fn_gap);
432
+ while (file.readline(buf) >= 0) {
433
+ var t = buf.toString().split("\t");
434
+ var anno = 'gap';
435
+ if (bb[t[0]][1][anno] == null)
436
+ bb[t[0]][1][anno] = [];
437
+ bb[t[0]][1][anno].push([parseInt(t[1]), parseInt(t[2])]);
438
+ }
439
+ file.close();
440
+ }
441
+
442
+ if (fn_centro) {
443
+ file = new File(fn_centro);
444
+ while (file.readline(buf) >= 0) {
445
+ var t = buf.toString().split("\t");
446
+ var anno = t[3] == '1'? 'hsat2/3' : 'alpha';
447
+ if (bb[t[0]][1][anno] == null)
448
+ bb[t[0]][1][anno] = [];
449
+ var st = parseInt(t[1]), en = parseInt(t[2]);
450
+ if (en - st >= min_centro_len)
451
+ bb[t[0]][1][anno].push([st, en]);
452
+ }
453
+ file.close();
454
+ }
455
+
456
+ for (var i = 0; i < bba.length; ++i) {
457
+ var m, key = bba[i], h = bb[key][1], len = bb[key][0];
458
+ if ((m = /^(\S+)_(\d+)_(\d+)/.exec(key)) == null)
459
+ throw("Bug!");
460
+ var x = {}, t = [m[1], m[2], m[3]];
461
+ if (fn_bb) t.push(bb[key][2], bb[key][3], bb[key][4], bb[key][5], bb[key][6], bb[key][7], bb[key][8], bb[key][9]);
462
+ else t.push(len);
463
+ for (var c in h) { // calculated the merged length of each feature
464
+ var s, st = 0, en = 0, cov = 0;
465
+ s = h[c].sort(function(a, b) { return a[0] - b[0]; });
466
+ for (var j = 0; j < s.length; ++j) {
467
+ if (s[j][0] > en) {
468
+ cov += en - st;
469
+ st = s[j][0], en = s[j][1];
470
+ } else en = en > s[j][1]? en : s[j][1];
471
+ }
472
+ cov += en - st;
473
+ if (cov >= min_feat_len)
474
+ x[c] = cov;
475
+ }
476
+ var type = "none";
477
+ var max = 0, max2 = 0, max_c2 = null, max_c = null, sum = 0, sum_misc = 0;
478
+ var lcr = x['LCR'] == null? 0 : x['LCR'];
479
+ var self_len = x['self'] == null? 0 : x['self'];
480
+ for (var c in x) {
481
+ if (c == 'LCR' || c == 'self') continue;
482
+ if (c[0] == '_') continue;
483
+ sum += x[c];
484
+ if (c != 'mini' && c != 'micro') sum_misc += x[c];
485
+ if (max < x[c]) max2 = max, max_c2 = max_c, max = x[c], max_c = c;
486
+ else if (max2 < x[c]) max2 = x[c], max_c2 = c;
487
+ }
488
+ if (max >= len * 0.7) {
489
+ type = max_c;
490
+ } else if (lcr >= len * 0.7) {
491
+ type = 'lcr';
492
+ if (max_c == 'mini' || max_c == 'micro') {
493
+ var y = x['mini'] == null? 0 : x['mini'];
494
+ y += x['micro'] == null? 0 : x['micro'];
495
+ if (max >= y * 0.7) type = max_c;
496
+ }
497
+ } else if ((max_c == 'mini' || max_c == 'micro') && max2 < max * 0.1) {
498
+ type = max_c;
499
+ } else if (x['_alu'] != null && x['_alu'] >= len * 0.7) {
500
+ type = 'SINE/Alu';
501
+ } else if (x['_l1'] != null && x['_l1'] >= len * 0.7) {
502
+ type = 'LINE/L1';
503
+ } else if (x['_inter'] != null && x['_inter'] >= len * 0.7) {
504
+ type = 'inter';
505
+ } else if (x['_sat'] != null && x['_sat'] >= len * 0.5) {
506
+ type = 'Satellite';
507
+ } else if (sum_misc + lcr >= len * 0.7) {
508
+ type = 'mixed';
509
+ } else if (sum + lcr > len * 0.05) {
510
+ type = 'partial';
511
+ } else if (self_len >= len * 0.5) {
512
+ type = 'self';
513
+ }
514
+ if ((type == 'partial' || type == 'self' || type == 'none' || type == 'mixed') && fn_bb && t[8] >= 1000 && t[9] >= t[8] * min_segdup_frac)
515
+ type = 'segdup';
516
+ t.push(type);
517
+ for (var c in x)
518
+ t.push(c + ':' + x[c]);
519
+ print(t.join("\t"));
520
+ }
521
+
522
+ buf.destroy();
523
+ }
524
+
525
+ function mg_classify_repeat(anno) {
526
+ var type;
527
+ if (anno == "mini") type = "11_VNTR";
528
+ else if (anno == "micro") type = "12_STR";
529
+ else if (anno == "lcr") type = "13_Other-LCR";
530
+ else if (anno == "LINE/L1" || anno == "LINE/L1HS") type = "02_L1";
531
+ else if (anno == "SINE/Alu" || anno == "SINE/AluY") type = "01_Alu";
532
+ else if (anno == "Retroposon/SVA") type = "03_SVA";
533
+ else if (anno == "LTR/ERV") type = "04_ERV";
534
+ else if (anno == "inter" || /^(DNA|LINE|SINE|LTR)/.test(anno)) type = "05_Other-TE";
535
+ else if (/^Satellite/.test(anno) || anno == "alpha" || anno == "hsat2/3" || anno == "_sat") type = "10_Satellite";
536
+ else if (anno == "self" || anno == "none") type = "30_Low-repeat";
537
+ else if (anno == "mixed") type = "20_Other-repeat";
538
+ else if (anno == "segdup") type = "21_SegDup";
539
+ else if (anno == "partial") type = "30_Low-repeat";
540
+ else type = "20_Other-repeat";
541
+ return type;
542
+ }
543
+
544
+ function mg_cmd_anno2tbl(args)
545
+ {
546
+ var segdup_ratio = 0.7;
547
+ var buf = new Bytes();
548
+ var file = args.length == 0? new File() : new File(args[0]);
549
+ var h = {};
550
+ while (file.readline(buf) >= 0) {
551
+ var t = buf.toString().split("\t");
552
+ for (var i = 1; i <= 7; ++i) t[i] = parseInt(t[i]);
553
+ //if (t[5]) continue;
554
+ if (t[11] == "gap") continue;
555
+ if (/chrUn|_random/.test(t[0])) continue;
556
+ var na = t[4] < 4? t[4] : 4;
557
+ var key = mg_classify_repeat(t[11]);
558
+ if (h[key] == null) h[key] = [0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0];
559
+ ++h[key][na];
560
+ h[key][na+3] += t[7];
561
+ if (t[8] >= 0 && t[10] >= 0) h[key][na+6] += t[8] - t[10];
562
+ }
563
+
564
+ file.close();
565
+ buf.destroy();
566
+
567
+ for (var key in h) {
568
+ var label = key.replace(/^[0-9]+_/, "");
569
+ print(key, label, h[key].slice(2).join("\t"));
570
+ }
571
+ }
572
+
573
+ function mg_cmd_paf2bl(args)
574
+ {
575
+ var c, min_de = 0.01, max_de = 0.1, sub_de = 0.002, min_mapq = 5, min_len = 500, is_sub = false;
576
+ while ((c = getopt(args, "d:s")) != null) {
577
+ if (c == 'd') min_de = parseFloat(getopt.arg);
578
+ else if (c == 's') is_sub = true;
579
+ }
580
+ if (args.length - getopt.ind < 1) {
581
+ print("Usage: mgutils.js paf2bl <ins.paf>");
582
+ print("Note: bedtk sub <(mgutils.js paf2bl ins.paf; cat bl100.bed) <(../mgutils.js paf2bl -s ins.paf) | bedtk merge");
583
+ return;
584
+ }
585
+ var file = new File(args[getopt.ind]);
586
+ var buf = new Bytes();
587
+ while (file.readline(buf) >= 0) {
588
+ var line = buf.toString();
589
+ var m, t = line.split("\t");
590
+ if (/\ttp:A:[SI]/.test(line)) continue;
591
+ if (parseInt(t[11]) < min_mapq) continue;
592
+ if (parseInt(t[10]) < min_len) continue;
593
+ if ((m = /\tde:f:(\S+)/.exec(line)) == null) continue;
594
+ var de = parseFloat(m[1]);
595
+ if (is_sub) {
596
+ if (de > sub_de) continue;
597
+ } else {
598
+ if (de < min_de || de > max_de) continue;
599
+ }
600
+ print(t[5], t[7], t[8]);
601
+ //print(line);
602
+ }
603
+ buf.destroy();
604
+ file.close();
605
+ }
606
+
607
+ function mg_cmd_stableGaf(args)
608
+ {
609
+ var c;
610
+ while ((c = getopt(args, "")) != null) {
611
+ }
612
+ if (args.length - getopt.ind < 1) {
613
+ print("Usage: mgutils.js stableGaf <graph.gfa> <aln.gaf>");
614
+ return;
615
+ }
616
+
617
+ var re = /\t(LN|SN|SO|SR):[Zi]:(\S+)/g;
618
+ var file, buf = new Bytes();
619
+
620
+ var pri_len = {}, segh = {};
621
+ file = new File(args[getopt.ind]);
622
+ while (file.readline(buf) >= 0) {
623
+ var m, line = buf.toString();
624
+ if ((m = /^S\t(\S+)\t(\S+)(\t.*)/.exec(line)) == null) continue;
625
+ var seg = m[1], len = m[2] == '*'? 0 : m[2].length, tags = m[3];
626
+ var sn = null, so = -1, sr = -1;
627
+ while ((m = re.exec(tags)) != null) {
628
+ if (m[1] == "LN") len = parseInt(m[2]);
629
+ else if (m[1] == "SN") sn = m[2];
630
+ else if (m[1] == "SO") so = parseInt(m[2]);
631
+ else if (m[1] == "SR") sr = parseInt(m[2]);
632
+ }
633
+ if (sn == null || so < 0 || sr < 0 || len <= 0)
634
+ throw Error("failed to parse tags '" + tags + "'");
635
+ segh[seg] = [sn, so, so + len, sr];
636
+ if (sr == 0) {
637
+ if (pri_len[sn] == null) pri_len[sn] = 0;
638
+ pri_len[sn] = pri_len[sn] > so + len? pri_len[sn] : so + len;
639
+ }
640
+ }
641
+ file.close();
642
+
643
+ re = /([><])([^\s><]+)/g;
644
+ file = args.length - getopt.ind < 2? new File() : new File(args[getopt.ind+1]);
645
+ while (file.readline(buf) >= 0) {
646
+ var m, line = buf.toString();
647
+ if ((m = /^(\S+)\t(\d+\t\d+\t\d+)\t([+-])\t(\S+)\t(\d+)\t(\d+)\t(\d+)\t(.*)/.exec(line)) == null)
648
+ continue;
649
+ var s, a = [];
650
+ while ((s = re.exec(m[4])) != null) {
651
+ if (segh[s[2]] == null)
652
+ throw Error("failed to find segment '" + s[2] + "'");
653
+ var h = segh[s[2]], add_new = true;
654
+ if (a.length) {
655
+ var b = a[a.length - 1];
656
+ if (b[0] == s[1] && h[3] == b[4] && h[0] == b[1]) {
657
+ if (b[0] == '>') {
658
+ if (h[1] == b[3]) b[3] = h[2], add_new = false;
659
+ } else {
660
+ if (h[2] == b[2]) b[2] = h[1], add_new = false;
661
+ }
662
+ }
663
+ }
664
+ if (add_new) a.push([s[1], h[0], h[1], h[2], h[3]]);
665
+ }
666
+ var path_len = 0, path = "";
667
+ for (var i = 0; i < a.length; ++i)
668
+ path_len += a[i][3] - a[i][2];
669
+ if (path_len != parseInt(m[5]))
670
+ throw Error("inconsistent path length for '" + m[1] + "': " + path_len + "!=" + m[5]);
671
+ if (a.length == 1 && pri_len[a[0][1]] != null) {
672
+ m[6] = parseInt(m[6]);
673
+ m[7] = parseInt(m[7]);
674
+ if (a[0][0] == '>') {
675
+ m[6] += a[0][2], m[7] += a[0][2];
676
+ } else {
677
+ m[3] = m[3] == '+'? '-' : '+';
678
+ var st = a[0][2] + (path_len - 1 - m[7]);
679
+ var en = a[0][2] + (path_len - 1 - m[6]);
680
+ m[6] = st, m[7] = en;
681
+ }
682
+ path_len = pri_len[a[0][1]];
683
+ path = a[0][1];
684
+ } else {
685
+ var b = [];
686
+ for (var i = 0; i < a.length; ++i)
687
+ b.push(a[i][0] + a[i][1] + ':' + a[i][2] + '-' + a[i][3]);
688
+ path = b.join("");
689
+ }
690
+ print(m[1], m[2], m[3], path, path_len, m[6], m[7], m[8]);
691
+ }
692
+ file.close();
693
+ buf.destroy();
694
+ }
695
+
696
+ function mg_cmd_subgaf(args) // FIXME: this is BUGGY!!!
697
+ {
698
+ if (args.length < 2) {
699
+ print("Usage: mgutils.js subgaf <in.gaf> <reg>");
700
+ exit(1);
701
+ }
702
+
703
+ var m, ctg, st, en;
704
+ if ((m = /^(\S+):(\S+)-(\S+)/.exec(args[1])) != null)
705
+ ctg = m[1], st = parseInt(m[2]), en = parseInt(m[3]);
706
+
707
+ var buf = new Bytes();
708
+ var file = new File(args[0]);
709
+ var re = /([><])([^\s><]+):(\d+)-(\d+)/g;
710
+
711
+ while (file.readline(buf) >= 0) {
712
+ var t = buf.toString().split("\t");
713
+ var l = parseInt(t[6]), s = parseInt(t[7]), e = parseInt(t[8]);
714
+ var regs = [];
715
+ if (t[5][0] == '>' || t[5][0] == '<') {
716
+ var m, x = 0;
717
+ //print(buf);
718
+ while ((m = re.exec(t[5])) != null) {
719
+ var a = parseInt(m[3]), b = parseInt(m[4]), c = b - a;
720
+ if (x == 0) {
721
+ if (b - a <= s) throw Error("Inconsistent!");
722
+ a += s;
723
+ }
724
+ if (x + c == l) b -= l - e;
725
+ //print(m[2], a, b);
726
+ regs.push([m[2], a, b]);
727
+ x += c;
728
+ }
729
+ } else {
730
+ regs.push([t[5], s, e]);
731
+ }
732
+ var hit = false;
733
+ for (var i = 0; i < regs.length; ++i) {
734
+ if (regs[i][0] == ctg && regs[i][2] > st && en > regs[i][1])
735
+ hit = true;
736
+ }
737
+ if (hit) print(buf);
738
+ }
739
+
740
+ file.close();
741
+ buf.destroy();
742
+ }
743
+
744
+ function mg_cmd_sveval(args)
745
+ {
746
+ var c, flank = 100, min_var_len = 100, min_test_len = 50, min_sc = 20.0, non_chr = false, out_err = false, flt_vcf = false;
747
+ while ((c = getopt(args, "f:v:t:s:aeF")) != null) {
748
+ if (c == 'f') flank = parseInt(getopt.arg);
749
+ else if (c == 'v') min_var_len = parseInt(getopt.arg);
750
+ else if (c == 't') min_test_len = parseInt(getopt.arg);
751
+ else if (c == 's') min_sc = parseFloat(getopt.arg);
752
+ else if (c == 'a') non_chr = true;
753
+ else if (c == 'e') out_err = true;
754
+ else if (c == 'F') flt_vcf = true;
755
+ }
756
+ if (args.length - getopt.ind < 3) {
757
+ print("Usage: mgutils.js sveval <true.vcf> <true.bed> <call.txt>");
758
+ print("Options:");
759
+ print(" -f INT length of flanking regions [" + flank + "]");
760
+ print(" -v INT min INDEL length [" + min_var_len + "]");
761
+ print(" -t INT min true INDEL length [" + min_test_len + "]");
762
+ print(" -s INT min called score [" + min_sc + "]");
763
+ print(" -e print errors");
764
+ exit(1);
765
+ }
766
+
767
+ var file, buf = new Bytes();
768
+
769
+ // parse true.bed
770
+ warn("Reading confident regions...");
771
+ var bed = {}
772
+ file = new File(args[getopt.ind + 1]);
773
+ while (file.readline(buf) >= 0) {
774
+ var t = buf.toString().split("\t");
775
+ if (t.length < 3) continue;
776
+ if (!non_chr && /^(chr)?[XY]$/.test(t[0])) continue;
777
+ if (bed[t[0]] == null) bed[t[0]] = [];
778
+ bed[t[0]].push([parseInt(t[1]), parseInt(t[2])]);
779
+ }
780
+ file.close();
781
+ for (var ctg in bed) it_index(bed[ctg]);
782
+
783
+ // parse true.vcf
784
+ warn("Reading baseline variants...");
785
+ var vcf = {}, n_vcf = 0;
786
+ file = new File(args[getopt.ind]);
787
+ while (file.readline(buf) >= 0) {
788
+ var t = buf.toString().split("\t");
789
+ if (t[0][0] == '#') continue;
790
+ if (t.length < 10) continue;
791
+ var flt = (t[6] != '.' && t[6] != 'PASS');
792
+ if (flt_vcf && flt) continue;
793
+ if (bed[t[0]] == null) continue;
794
+ var ref = t[3];
795
+ var st = parseInt(t[1]) - 1;
796
+ var en = st + ref.length;
797
+ var max_diff = 0;
798
+ var al = t[4].split(",");
799
+ al.unshift(ref);
800
+ for (var i = 1; i < al.length; ++i) {
801
+ var l = al[i].length - ref.length;
802
+ if (l < 0) l = -l;
803
+ if (max_diff < l) max_diff = l;
804
+ }
805
+ if (max_diff < min_test_len) continue;
806
+ var s = t[9].split(':');
807
+ if (s.length == 0) continue;
808
+ var gt = s[0].split(/[|\/]/);
809
+ if (gt == 0) continue;
810
+ var max_ev = 0;
811
+ max_diff = 0;
812
+ for (var i = 0; i < gt.length; ++i) {
813
+ if (gt[i] == '.') continue;
814
+ var x = parseInt(gt[i]);
815
+ var l = al[x].length - ref.length;
816
+ var x = l > 0? l : -l;
817
+ if (max_diff < x) max_diff = x, max_ev = l;
818
+ }
819
+ if (max_diff < min_test_len) continue;
820
+ if (vcf[t[0]] == null) vcf[t[0]] = [];
821
+ vcf[t[0]].push([st, en, -1, max_diff, max_ev, flt, s[0]]);
822
+ }
823
+ file.close();
824
+ for (var ctg in vcf) it_index(vcf[ctg]);
825
+
826
+ // parse rst.txt
827
+ warn("Reading gt results...");
828
+ var rst = {};
829
+ file = new File(args[getopt.ind + 2]);
830
+ while (file.readline(buf) >= 0) {
831
+ var t = buf.toString().split("\t");
832
+ if (parseFloat(t[3]) < min_sc) continue;
833
+ if (bed[t[0]] == null) continue;
834
+ if (rst[t[0]] == null) rst[t[0]] = [];
835
+ var ref_len = t[7] == '*'? 0 : t[7].length;
836
+ var max_diff = 0, max_ev = 0;
837
+ for (var i = 8; i < t.length; ++i) {
838
+ var alt_len = t[i] == '*'? 0 : t[8].length;
839
+ var l = alt_len - ref_len;
840
+ var x = l > 0? l : -l;
841
+ if (max_diff < x) max_diff = x, max_ev = l;
842
+ }
843
+ var st = parseInt(t[1]), en = parseInt(t[2]);
844
+ rst[t[0]].push([st, en, -1, max_diff, max_ev]);
845
+ }
846
+ file.close();
847
+ for (var ctg in rst) it_index(rst[ctg]);
848
+
849
+ // sensitivity
850
+ var n_vcf = [0, 0, 0], fn = [0, 0, 0];
851
+ for (var ctg in vcf) {
852
+ for (var i = 0; i < vcf[ctg].length; ++i) {
853
+ var v = vcf[ctg][i];
854
+ if (v[3] < min_var_len) continue;
855
+ if (v[5]) continue;
856
+ var st = v[0] - flank, en = v[1] + flank;
857
+ if (st < 0) st = 0;
858
+ if (!it_contained(bed[ctg], st, en)) continue;
859
+ var sub = v[4] < 0? 1 : 2;
860
+ ++n_vcf[0], ++n_vcf[sub];
861
+ var b = it_overlap(rst[ctg], st, en);
862
+ if (b.length == 0) {
863
+ if (out_err) print("FN", ctg, v[0], v[1], v[4], v[6]);
864
+ ++fn[0], ++fn[sub];
865
+ }
866
+ }
867
+ }
868
+
869
+ // specificity
870
+ var n_rst = [0, 0, 0], fp = [0, 0, 0];
871
+ for (var ctg in rst) {
872
+ for (var i = 0; i < rst[ctg].length; ++i) {
873
+ var v = rst[ctg][i];
874
+ if (v[3] < min_var_len) continue;
875
+ var st = v[0] - flank, en = v[1] + flank;
876
+ if (st < 0) st = 0;
877
+ if (!it_contained(bed[ctg], st, en)) continue;
878
+ var sub = v[4] < 0? 1 : 2;
879
+ ++n_rst[0], ++n_rst[sub];
880
+ var b = it_overlap(vcf[ctg], st, en);
881
+ if (b.length == 0) {
882
+ if (out_err) print("FP", ctg, v[0], v[1], v[4]);
883
+ ++fp[0], ++fp[sub];
884
+ }
885
+ }
886
+ }
887
+
888
+ print("NA", fn[0], n_vcf[0], (fn[0]/n_vcf[0]).toFixed(4));
889
+ print("ND", fn[1], n_vcf[1], (fn[1]/n_vcf[1]).toFixed(4));
890
+ print("NI", fn[2], n_vcf[2], (fn[2]/n_vcf[2]).toFixed(4));
891
+ print("PA", fp[0], n_rst[0], (fp[0]/n_rst[0]).toFixed(4));
892
+ print("PD", fp[1], n_rst[1], (fp[1]/n_rst[1]).toFixed(4));
893
+ print("PI", fp[2], n_rst[2], (fp[2]/n_rst[2]).toFixed(4));
894
+ }
895
+
896
+ function mg_cmd_extractseg(args)
897
+ {
898
+ function process(ctg, first, last, is_end) {
899
+ if (ctg == null || first[0] == null || first[1] == null) return;
900
+ if (first[0][7] == first[1][7]) return;
901
+ if (first[0][7] < first[1][7]) {
902
+ if (last[0][7] >= first[1][7]) return;
903
+ if (is_end) print(ctg, last[0][8], first[1][7], '*', 0, '+');
904
+ else print(ctg, last[0][7], first[1][8], '*', 0, '+');
905
+ } else {
906
+ if (last[1][7] >= first[0][7]) return;
907
+ if (is_end) print(ctg, last[1][8], first[0][7], '*', 0, '-');
908
+ else print(ctg, last[1][7], first[0][8], '*', 0, '-');
909
+ }
910
+ }
911
+
912
+ var c, min_len = 100000, is_end = false;
913
+ while ((c = getopt(args, "el:")) != null) {
914
+ if (c == 'l') min_len = parseInt(getopt.arg);
915
+ else if (c == 'e') is_end = true;
916
+ }
917
+ if (args.length - getopt.ind < 3) {
918
+ print("Usage: mgutils.js extractseg <seg1> <seg2> <in.gaf> [...]");
919
+ return;
920
+ }
921
+
922
+ var seg = [args[getopt.ind], args[getopt.ind+1]];
923
+ var buf = new Bytes();
924
+ for (var i = getopt.ind + 2; i < args.length; ++i) {
925
+ var file = new File(args[i]);
926
+ var flt = false;
927
+ var first = [null, null], last = [null, null], ctg = null;
928
+ while (file.readline(buf) >= 0) {
929
+ var t = buf.toString().split("\t");
930
+ if (t[0] != "*") {
931
+ process(ctg, first, last, is_end);
932
+ flt = (parseInt(t[3]) - parseInt(t[2]) < min_len || parseInt(t[8]) - parseInt(t[7]) < min_len);
933
+ first = [null, null];
934
+ last = [null, null];
935
+ ctg = t[0];
936
+ } else if (!flt) {
937
+ var s = t[1].substr(1);
938
+ t[7] = parseInt(t[7]), t[8] = parseInt(t[8]);
939
+ if (s == seg[0] && t[3] != '0') {
940
+ if (first[0] == null) first[0] = t.slice(0);
941
+ last[0] = t.slice(0);
942
+ } else if (s == seg[1] && t[3] != '0') {
943
+ if (first[1] == null) first[1] = t.slice(0);
944
+ last[1] = t.slice(0);
945
+ }
946
+ }
947
+ }
948
+ process(ctg, first, last, is_end);
949
+ file.close();
950
+ }
951
+ buf.destroy();
952
+ }
953
+
954
+ function mg_cmd_bed2sql(args)
955
+ {
956
+ var c;
957
+ while ((c = getopt(args, "")) != null) {
958
+ }
959
+ if (args.length - getopt.ind == 0) {
960
+ print("Usage: paste *.bed | mgutils.js bed2sql <sample.list> | sqlite3 rGFA.db");
961
+ return;
962
+ }
963
+
964
+ var file, buf = new Bytes();
965
+
966
+ var sample = [];
967
+ file = new File(args[getopt.ind]);
968
+ while (file.readline(buf) >= 0) {
969
+ var t = buf.toString().split("\t");
970
+ sample.push(t[0]);
971
+ }
972
+ file.close();
973
+
974
+ file = args.length - getopt.ind >= 2 && args[getopt.ind+1] != "-"? new File(args[getopt.ind+1]) : new File();
975
+ print("DROP INDEX IF EXISTS idx_bwalk;");
976
+ print("DROP INDEX IF EXISTS idx_cst;");
977
+ print("DROP INDEX IF EXISTS idx_cen;");
978
+ print("BEGIN TRANSACTION;");
979
+ var wid = 0, bid = 0, ins_walk = [];
980
+ while (file.readline(buf) >= 0) {
981
+ var t = buf.toString().split("\t");
982
+ if (t.length != sample.length * 6)
983
+ throw Error("Different number of samples");
984
+ var h = {}, w = [], j = 0;
985
+ for (var i = 5; i < t.length; i += 6, ++j) {
986
+ if (t[i] == ".") continue;
987
+ var s = t[i].split(":");
988
+ if (!(s[0] in h)) {
989
+ h[s[0]] = w.length;
990
+ ins_walk.push([wid, bid, s[1], s[0]]);
991
+ w.push([s[0], s[1], wid++]);
992
+ }
993
+ var v = [], x = w[h[s[0]]];
994
+ v.push("'" + bid + "'", "'" + sample[j] + "'", "'" + x[2] + "'", "'" + s[3] + "'");
995
+ v.push("'" + s[4] + "'", "'" + s[5] + "'", "'" + (s[2] == '+'? 1 : -1) + "'");
996
+ print("INSERT INTO call (bid,sample,wid,ctg,start,end,strand) VALUES (" + v.join(",") + ");");
997
+ }
998
+ ++bid;
999
+ }
1000
+ for (var i = 0; i < ins_walk.length; ++i) {
1001
+ var w = ins_walk[i], v = [];
1002
+ for (var j = 0; j < w.length; ++j)
1003
+ v.push("'" + w[j] + "'");
1004
+ print("INSERT INTO bwalk (wid,bid,len,walk) VALUES (" + v.join(",") + ");");
1005
+ }
1006
+ print("END TRANSACTION;");
1007
+ print("CREATE INDEX IF NOT EXISTS idx_bwalk ON bwalk (bid);");
1008
+ print("CREATE INDEX IF NOT EXISTS idx_cst ON call (ctg, start);");
1009
+ print("CREATE INDEX IF NOT EXISTS idx_cen ON call (ctg, end);");
1010
+ file.close();
1011
+
1012
+ buf.destroy();
1013
+ }
1014
+
1015
+ function mg_cmd_merge(args)
1016
+ {
1017
+ var c, fn_anno = null, fn_sample = null;
1018
+ while ((c = getopt(args, "a:s:")) != null) {
1019
+ if (c == 'a') fn_anno = getopt.arg;
1020
+ else if (c == 's') fn_sample = getopt.arg;
1021
+ }
1022
+ if (args.length - getopt.ind == 0) {
1023
+ print("Usage: paste *.bed | mgutils.js merge -");
1024
+ print("Options:");
1025
+ print(" -a FILE annotation [null]");
1026
+ print(" -s FILE list of samples [null]");
1027
+ return;
1028
+ }
1029
+
1030
+ var file, buf = new Bytes();
1031
+ var anno = {};
1032
+ if (fn_anno) {
1033
+ file = new File(fn_anno);
1034
+ while (file.readline(buf) >= 0) {
1035
+ var t = buf.toString().split("\t");
1036
+ var key = [t[0], t[1], t[2]].join("_");
1037
+ anno[key] = t[11];
1038
+ }
1039
+ file.close();
1040
+ }
1041
+ var hdr = ["#CHROM", "START", "END", "INFO", "FORMAT"];
1042
+ if (fn_sample) {
1043
+ file = new File(fn_sample);
1044
+ while (file.readline(buf) >= 0) {
1045
+ var t = buf.toString().split(/\s+/);
1046
+ hdr.push(t[0]);
1047
+ }
1048
+ file.close();
1049
+ }
1050
+ file = args[getopt.ind] == "-"? new File() : new File(args[getopt.ind]);
1051
+ print('##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">');
1052
+ print('##INFO=<ID=NA,Number=1,Type=Integer,Description="Number of alleles">');
1053
+ print('##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count">');
1054
+ print('##INFO=<ID=ALEN,Number=.,Type=Integer,Description="Length of each allele">');
1055
+ print('##INFO=<ID=ANNO,Number=1,Type=String,Description="Annotation">');
1056
+ print('##INFO=<ID=VS,Number=1,Type=String,Description="Start vertex">');
1057
+ print('##INFO=<ID=VE,Number=1,Type=String,Description="End vertex">');
1058
+ print('##INFO=<ID=AWALK,Number=.,Type=String,Description="Walk of each allele">');
1059
+ print('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">');
1060
+ print('##FORMAT=<ID=CSTRAND,Number=1,Type=String,Description="Contig strand">');
1061
+ print('##FORMAT=<ID=CTG,Number=1,Type=String,Description="Contig name">');
1062
+ print('##FORMAT=<ID=CS,Number=1,Type=String,Description="Contig start, BED-like">');
1063
+ print('##FORMAT=<ID=CE,Number=1,Type=String,Description="Contig end, BED-like">');
1064
+ print(hdr.join("\t"));
1065
+ while (file.readline(buf) >= 0) {
1066
+ var t = buf.toString().split("\t");
1067
+ var a = [t[0], t[1], t[2], "", "GT:CSTRAND:CTG:CS:CE"];
1068
+ var ah = {}, aa = [], b = [], ns = 0;
1069
+ for (var j = 5; j < t.length; j += 6) {
1070
+ if (t[j] == ".") {
1071
+ b.push(["."]);
1072
+ continue;
1073
+ }
1074
+ ++ns;
1075
+ var s = t[j].split(":");
1076
+ if (ah[s[0]] == null) {
1077
+ ah[s[0]] = aa.length;
1078
+ aa.push({walk:s[0], len:s[1], cnt:0});
1079
+ }
1080
+ var k = ah[s[0]];
1081
+ ++aa[k].cnt;
1082
+ s[0] = k;
1083
+ b.push(s);
1084
+ }
1085
+ for (var i = 0; i < aa.length; ++i)
1086
+ aa[i].i = i;
1087
+ aa.sort(function(a,b) { return b.cnt - a.cnt });
1088
+ var i2a = [], alen = [], awalk = [], ac = [];
1089
+ for (var i = 0; i < aa.length; ++i) {
1090
+ i2a[aa[i].i] = i;
1091
+ alen[i] = aa[i].len;
1092
+ awalk[i] = aa[i].walk;
1093
+ ac[i] = aa[i].cnt;
1094
+ }
1095
+ for (var j = 0; j < b.length; ++j) {
1096
+ if (b[j][0] != ".") {
1097
+ var i = b[j].shift();
1098
+ b[j][0] = i2a[i];
1099
+ a.push(b[j].join(":"));
1100
+ } else a.push(".");
1101
+ }
1102
+ var info = ["NS="+ns, "NA="+aa.length, "ALEN="+alen.join(","), "AC="+ac.join(",")];
1103
+ var key = [t[0], t[1], t[2]].join("_");
1104
+ if (anno[key] != null) info.push("ANNO="+anno[key]);
1105
+ info.push("VS="+t[3], "VE="+t[4], "AWALK="+awalk.join(","));
1106
+ a[3] = info.join(";");
1107
+ print(a.join("\t"));
1108
+ }
1109
+ buf.destroy();
1110
+ file.close();
1111
+ }
1112
+
1113
+ function mg_cmd_merge2vcf(args) {
1114
+ var buf = new Bytes();
1115
+ var file = args.length == 0? new File() : new File(args[0]);
1116
+ print("##fileformat=VCFv4.2");
1117
+ print('##ALT=<ID=CNV,Description="description">');
1118
+ print('##FORMAT=<ID=GT0,Number=1,Type=String,Description="Original genotype">');
1119
+ while (file.readline(buf) >= 0) {
1120
+ var line = buf.toString();
1121
+ if (/^##/.test(line)) {
1122
+ print(line);
1123
+ continue;
1124
+ }
1125
+ var a, t = line.split("\t");
1126
+ if (line[0] == "#") {
1127
+ a = ["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"];
1128
+ for (var i = 5; i < t.length; ++i)
1129
+ a.push(t[i]);
1130
+ } else {
1131
+ a = [t[0], t[1], ".", "N", "<CNV>", 30, "PASS", t[3] + ";END=" + t[2], "GT:GT0"];
1132
+ for (var i = 5; i < t.length; ++i) {
1133
+ var s = t[i].split(":");
1134
+ if (s[0] == ".") a.push(s[0]);
1135
+ else if (s[0] == "0") a.push("0:0");
1136
+ else a.push("1:" + s[0]);
1137
+ }
1138
+ }
1139
+ print(a.join("\t"));
1140
+ }
1141
+ file.close();
1142
+ buf.destroy();
1143
+ }
1144
+
1145
+ function mg_cmd_segfreq(args) {
1146
+ var c, min_af = 0.05;
1147
+ while ((c = getopt(args, "f:")) != null) {
1148
+ if (c == 'f') min_af = parseFloat(getopt.arg);
1149
+ }
1150
+ if (args.length - getopt.ind < 2) {
1151
+ print("Usage: mgutils.js segfreq [-f minFreq=0.05] <gfa2bed.bed> <merged.txt> [bubble.bed]");
1152
+ return 1;
1153
+ }
1154
+ var file, buf = new Bytes();
1155
+
1156
+ file = new File(args[getopt.ind]);
1157
+ var h = {}, a = [];
1158
+ while (file.readline(buf) >= 0) {
1159
+ var t = buf.toString().split("\t");
1160
+ h[t[3]] = a.length;
1161
+ a.push([t[0], t[1], t[2], t[3], parseInt(t[4]), 0, 0, "N/A", "N/A", 0]);
1162
+ }
1163
+ file.close();
1164
+
1165
+ var re_info = /([^\s=;]+)=([^\s=;]+)/g;
1166
+ var re_walk = /([><])([^\s><]+)/g;
1167
+ var bb = {};
1168
+ file = new File(args[getopt.ind+1]);
1169
+ while (file.readline(buf) >= 0) {
1170
+ var m, t = buf.toString().split("\t", 4);
1171
+ if (t[0][0] == "#") continue;
1172
+ var anno = null, ac = null, walk = null;
1173
+ while ((m = re_info.exec(t[3])) != null) {
1174
+ if (m[1] == "ANNO") anno = m[2];
1175
+ else if (m[1] == "AWALK") walk = m[2].split(",");
1176
+ else if (m[1] == "AC") {
1177
+ ac = m[2].split(",");
1178
+ for (var i = 0; i < ac.length; ++i)
1179
+ ac[i] = parseInt(ac[i]);
1180
+ }
1181
+ }
1182
+ if (ac == null || walk == null) throw Error("Missing AC or AWALK");
1183
+ if (ac.length != walk.length) throw Error("Inconsistent AC or AWALK");
1184
+ if (anno == null) anno = "N/A";
1185
+ bb[t[0]+"_"+t[1]+"_"+t[2]] = anno;
1186
+ var ns = 0;
1187
+ for (var i = 0; i < walk.length; ++i)
1188
+ ns += ac[i];
1189
+ var dup = {};
1190
+ for (var i = 0; i < walk.length; ++i) {
1191
+ if (walk[i] == "*") continue;
1192
+ while ((m = re_walk.exec(walk[i])) != null) {
1193
+ var s = m[2];
1194
+ if (h[s] == null) throw Error("Missing segment " + s);
1195
+ if (dup[s]) continue;
1196
+ dup[s] = 1;
1197
+ var b = a[h[s]];
1198
+ b[5] = ns;
1199
+ b[6] += ac[i];
1200
+ b[7] = anno;
1201
+ b[8] = mg_classify_repeat(anno);
1202
+ b[9] = walk.length;
1203
+ }
1204
+ }
1205
+ }
1206
+ file.close();
1207
+
1208
+ if (args.length - getopt.ind >= 3) {
1209
+ file = new File(args[getopt.ind+2]);
1210
+ while (file.readline(buf) >= 0) {
1211
+ var t = buf.toString().split("\t");
1212
+ var s = t[11].split(",");
1213
+ var anno = bb[t[0]+"_"+t[1]+"_"+t[2]];
1214
+ if (anno == null) throw Error("Missing bubble");
1215
+ for (var i = 1; i < s.length - 1; ++i) {
1216
+ if (h[s[i]] == null) throw Error("Inconsistent bubble file");
1217
+ var b = a[h[s[i]]];
1218
+ b[10] = t[0], b[11] = t[1], b[12] = t[2];
1219
+ b[7] = anno;
1220
+ b[8] = mg_classify_repeat(anno);
1221
+ }
1222
+ }
1223
+ file.close();
1224
+ }
1225
+
1226
+ buf.destroy();
1227
+
1228
+ var replen = {};
1229
+ for (var i = 0; i < a.length; ++i) {
1230
+ print(a[i].join("\t"));
1231
+ var anno = a[i][8], len = parseInt(a[i][2]) - parseInt(a[i][1]);
1232
+ if (a[i][4] > 0 && a[i][5] > 0 && a[i][6] >= a[i][5] * min_af) {
1233
+ if (replen[anno] == null) replen[anno] = [0, 0, 0];
1234
+ if (a[i][9] == 2) replen[anno][0] += len;
1235
+ else if (a[i][9] == 3) replen[anno][1] += len;
1236
+ else if (a[i][9] > 3) replen[anno][2] += len;
1237
+ }
1238
+ }
1239
+ for (var x in replen) {
1240
+ var y = x.replace(/^\d+_/, "");
1241
+ warn(x, y, replen[x].join("\t"));
1242
+ }
1243
+ }
1244
+
1245
+ function mg_cmd_genecopy(args)
1246
+ {
1247
+ var c, opt = { min_cov:0.8, min_rel_cov:0.85, max_prev_ovlp:0.5, mm:4, gapo:5 };
1248
+ while ((c = getopt(args, "c:r:")) != null) {
1249
+ if (c == 'c') opt.min_cov = parseFloat(getopt.arg);
1250
+ else if (c == 'r') opt.min_rel_cov = parseFloat(getopt.arg);
1251
+ }
1252
+ if (args.length - getopt.ind < 2) {
1253
+ print("Usage: mgutils.js genecopy [options] <in.gaf> <src.bed>");
1254
+ print("Options:");
1255
+ print(" -c FLOAT min coverage [" + opt.min_cov + "]");
1256
+ print(" -r FLOAT min relative coverage [" + opt.min_rel_cov + "]");
1257
+ return;
1258
+ }
1259
+ var re_cg = /(\d+)([MIDNSHP=X])/g;
1260
+ var re_walk = /([><])([^\s><]+):(\d+)-(\d+)/g;
1261
+ var file, buf = new Bytes();
1262
+
1263
+ var src = {};
1264
+ file = new File(args[getopt.ind+1]);
1265
+ while (file.readline(buf) >= 0) {
1266
+ var t = buf.toString().split("\t");
1267
+ src[t[3]] = [t[0], parseInt(t[1]), parseInt(t[2]), t[5] == '+'? 1 : -1];
1268
+ }
1269
+ file.close();
1270
+
1271
+ file = new File(args[getopt.ind]);
1272
+ var gene = {}, reg = {};
1273
+ while (file.readline(buf) >= 0) {
1274
+ var t = buf.toString().split("\t");
1275
+
1276
+ // check coverage
1277
+ if (/\|([A-Z]+\d*\.\d+|ENSG\d+)$/.test(t[0])) continue;
1278
+ for (var i = 1; i <= 3; ++i) t[i] = parseInt(t[i]);
1279
+ for (var i = 6; i <= 11; ++i) t[i] = parseInt(t[i]);
1280
+ if (t[3] - t[2] < t[1] * opt.min_cov) continue;
1281
+ if (gene[t[0]] != null) {
1282
+ var g0 = gene[t[0]][0];
1283
+ if (t[3] - t[2] < (g0[2] - g0[1]) * opt.min_rel_cov)
1284
+ continue;
1285
+ }
1286
+
1287
+ // compute de
1288
+ var m, cg = null;
1289
+ for (var i = 12; i < t.length; ++i) {
1290
+ if (t[i].substr(0, 4) == "cg:Z")
1291
+ cg = t[i].substr(5);
1292
+ }
1293
+ if (cg == null) throw Error("no cg");
1294
+ var blen = 0, mlen = 0, sc = 0;
1295
+ while ((m = re_cg.exec(cg)) != null) {
1296
+ var len = parseInt(m[1]);
1297
+ if (m[2] == '=') mlen += len, blen += len, sc += len;
1298
+ else {
1299
+ ++blen;
1300
+ if (m[2] == '*') sc -= opt.mm;
1301
+ else sc -= opt.gapo + len;
1302
+ }
1303
+ }
1304
+ var de = (blen - mlen) / blen;
1305
+
1306
+ // find intervals
1307
+ var intv = [];
1308
+ if (t[5][0] == '>' || t[5][0] == '<') {
1309
+ var len = 0;
1310
+ while ((m = re_walk.exec(t[5])) != null) {
1311
+ var st = parseInt(m[3]), en = parseInt(m[4]);
1312
+ var ss = st, ee = en;
1313
+ if (t[7] >= len && t[7] < len + en - st) {
1314
+ if (m[1] == '>') ss = st + t[7];
1315
+ else ee = en - t[7];
1316
+ } else if (t[8] >= len && t[8] < len + en - st) {
1317
+ if (m[1] == '>') ee = st + t[8] - len;
1318
+ else ss = st + t[6] - t[8];
1319
+ }
1320
+ intv.push([m[2], ss, ee, m[1] == '>'? 1 : -1]);
1321
+ len += en - st;
1322
+ }
1323
+ } else intv.push([t[5], t[7], t[8], t[4] == '+'? 1 : -1]);
1324
+
1325
+ // save
1326
+ if (gene[t[0]] == null) gene[t[0]] = [];
1327
+ for (var j = 0; j < intv.length; ++j) {
1328
+ var x = intv[j], pass = true;
1329
+ if (reg[x[0]] == null) reg[x[0]] = [];
1330
+ if (src[t[0]] != null) {
1331
+ var y = src[t[0]];
1332
+ if (y[0] == x[0] && y[1] < x[2] && x[1] < y[2]) {
1333
+ var l = (x[2] < y[2]? x[2] : y[2]) - (x[1] > y[1]? x[1] : y[1]);
1334
+ if (l > (x[2] - x[1]) * 0.99) pass = false;
1335
+ }
1336
+ }
1337
+ reg[x[0]].push([x[1], x[2], 0, t[0], gene[t[0]].length, pass, x[3]]);
1338
+ }
1339
+ gene[t[0]].push([t[1], t[2], t[3], sc, de, intv]);
1340
+ }
1341
+ file.close();
1342
+ buf.destroy();
1343
+
1344
+ // preparation
1345
+ var a = [];
1346
+ for (var g in gene) {
1347
+ var x = gene[g];
1348
+ for (var i = 0; i < x.length; ++i)
1349
+ a.push([x[i][3], g, i]);
1350
+ }
1351
+ a.sort(function(x,y) { return y[0]-x[0] });
1352
+ for (var x in reg) it_index(reg[x]);
1353
+
1354
+ // select
1355
+ var good_hit = [];
1356
+ for (var i = 0; i < a.length; ++i) {
1357
+ var x = a[i];
1358
+ var h = gene[x[1]][x[2]];
1359
+ var intv = h[5], cov_tot = 0, len_tot = 0, ovlp_gene = {};
1360
+ for (var j = 0; j < intv.length; ++j) {
1361
+ var y = intv[j];
1362
+ len_tot += y[2] - y[1];
1363
+ if (reg[y[0]] == null) continue;
1364
+ var st0 = y[1], en0 = y[2];
1365
+ var b = it_overlap(reg[y[0]], st0, en0);
1366
+ var cov_st = 0, cov_en = 0, cov = 0;
1367
+ for (var k = 0; k < b.length; ++k) {
1368
+ if (b[k][5] || b[k][6] != y[3]) continue;
1369
+ ovlp_gene[b[k][3]] = 1;
1370
+ var st1 = b[k][0] > st0? b[k][0] : st0;
1371
+ var en1 = b[k][1] < en0? b[k][1] : en0;
1372
+ if (st1 > cov_en) {
1373
+ cov += cov_en - cov_st;
1374
+ cov_st = st1, cov_en = en1;
1375
+ } else cov_en = cov_en > en1? cov_en : en1;
1376
+ }
1377
+ cov += cov_en - cov_st;
1378
+ cov_tot += cov;
1379
+ }
1380
+ var ovlp_gene_arr = [];
1381
+ for (var y in ovlp_gene) ovlp_gene_arr.push(y);
1382
+ if (ovlp_gene_arr.length > 0)
1383
+ print("OG", x[1], x[2], cov_tot, len_tot, ovlp_gene_arr);
1384
+ if (cov_tot < len_tot * opt.max_prev_ovlp) {
1385
+ good_hit.push([x[1], x[2]]);
1386
+ for (var j = 0; j < intv.length; ++j) {
1387
+ var y = intv[j];
1388
+ if (reg[y[0]] == null) continue;
1389
+ var b = it_overlap(reg[y[0]], y[1], y[2]);
1390
+ for (var k = 0; k < b.length; ++k)
1391
+ if (b[k][3] == x[1] && b[k][4] == x[2])
1392
+ b[k][5] = false;
1393
+ }
1394
+ }
1395
+ }
1396
+
1397
+ // count good_hit
1398
+ var out = {};
1399
+ for (var g in gene) out[g] = [gene[g].length, 0];
1400
+ for (var i = 0; i < good_hit.length; ++i) {
1401
+ print("GH", good_hit[i][0], gene[good_hit[i][0]][good_hit[i][1]].join("\t"));
1402
+ ++out[good_hit[i][0]][1];
1403
+ }
1404
+ for (var g in out)
1405
+ print("GC", g, out[g].join("\t"));
1406
+ }
1407
+
1408
+ /*************************
1409
+ ***** main function *****
1410
+ *************************/
1411
+
1412
+ function main(args)
1413
+ {
1414
+ if (args.length == 0) {
1415
+ print("Usage: mgutils.js <command> [arguments]");
1416
+ print("Commands:");
1417
+ print(" stableGaf convert unstable GAF to stable GAF");
1418
+ print(" renamefa add a prefix to sequence names in FASTA");
1419
+ print(" paf2bl blacklist regions from insert-to-ref alignment");
1420
+ print(" anno annotate short sequences");
1421
+ print(" anno2tbl summarize anno output");
1422
+ print(" extractseg extract a segment from GAF");
1423
+ print(" merge merge per-sample --call BED");
1424
+ print(" merge2vcf convert merge BED output to VCF");
1425
+ print(" segfreq compute node frequency from merged calls");
1426
+ print(" genecopy gene copy analysis");
1427
+ print(" bed2sql generate SQL from --call BED");
1428
+ //print(" subgaf extract GAF overlapping with a region (BUGGY)");
1429
+ //print(" sveval evaluate SV accuracy");
1430
+ exit(1);
1431
+ }
1432
+
1433
+ var cmd = args.shift();
1434
+ if (cmd == 'renamefa') mg_cmd_renamefa(args);
1435
+ else if (cmd == 'paf2bl') mg_cmd_paf2bl(args);
1436
+ else if (cmd == 'anno') mg_cmd_anno(args);
1437
+ else if (cmd == 'anno2tbl') mg_cmd_anno2tbl(args);
1438
+ else if (cmd == 'subgaf') mg_cmd_subgaf(args);
1439
+ else if (cmd == 'sveval') mg_cmd_sveval(args);
1440
+ else if (cmd == 'joinfa') mg_cmd_joinfa(args);
1441
+ else if (cmd == 'stableGaf') mg_cmd_stableGaf(args);
1442
+ else if (cmd == 'bed2sql') mg_cmd_bed2sql(args);
1443
+ else if (cmd == 'extractseg') mg_cmd_extractseg(args);
1444
+ else if (cmd == 'merge') mg_cmd_merge(args);
1445
+ else if (cmd == 'merge2vcf') mg_cmd_merge2vcf(args);
1446
+ else if (cmd == 'segfreq') mg_cmd_segfreq(args);
1447
+ else if (cmd == 'genecopy') mg_cmd_genecopy(args);
1448
+ else throw Error("unrecognized command: " + cmd);
1449
+ }
1450
+
1451
+ main(arguments);