ruby-minigraph 0.0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +62 -0
  4. data/ext/Rakefile +56 -0
  5. data/ext/cmappy/cmappy.c +7 -0
  6. data/ext/cmappy/cmappy.h +8 -0
  7. data/ext/minigraph/LICENSE.txt +23 -0
  8. data/ext/minigraph/Makefile +66 -0
  9. data/ext/minigraph/NEWS.md +317 -0
  10. data/ext/minigraph/README.md +207 -0
  11. data/ext/minigraph/algo.c +194 -0
  12. data/ext/minigraph/algo.h +33 -0
  13. data/ext/minigraph/asm-call.c +147 -0
  14. data/ext/minigraph/bseq.c +133 -0
  15. data/ext/minigraph/bseq.h +76 -0
  16. data/ext/minigraph/cal_cov.c +139 -0
  17. data/ext/minigraph/doc/example1.png +0 -0
  18. data/ext/minigraph/doc/example2.png +0 -0
  19. data/ext/minigraph/doc/examples.graffle +0 -0
  20. data/ext/minigraph/format.c +241 -0
  21. data/ext/minigraph/galign.c +140 -0
  22. data/ext/minigraph/gchain1.c +532 -0
  23. data/ext/minigraph/gcmisc.c +223 -0
  24. data/ext/minigraph/gfa-aug.c +260 -0
  25. data/ext/minigraph/gfa-base.c +526 -0
  26. data/ext/minigraph/gfa-bbl.c +372 -0
  27. data/ext/minigraph/gfa-ed.c +617 -0
  28. data/ext/minigraph/gfa-io.c +395 -0
  29. data/ext/minigraph/gfa-priv.h +154 -0
  30. data/ext/minigraph/gfa.h +166 -0
  31. data/ext/minigraph/ggen.c +182 -0
  32. data/ext/minigraph/ggen.h +21 -0
  33. data/ext/minigraph/ggsimple.c +570 -0
  34. data/ext/minigraph/gmap.c +211 -0
  35. data/ext/minigraph/index.c +230 -0
  36. data/ext/minigraph/kalloc.c +224 -0
  37. data/ext/minigraph/kalloc.h +82 -0
  38. data/ext/minigraph/kavl.h +414 -0
  39. data/ext/minigraph/kdq.h +134 -0
  40. data/ext/minigraph/ketopt.h +116 -0
  41. data/ext/minigraph/khashl.h +348 -0
  42. data/ext/minigraph/krmq.h +474 -0
  43. data/ext/minigraph/kseq.h +256 -0
  44. data/ext/minigraph/ksort.h +164 -0
  45. data/ext/minigraph/kstring.h +165 -0
  46. data/ext/minigraph/kthread.c +159 -0
  47. data/ext/minigraph/kthread.h +15 -0
  48. data/ext/minigraph/kvec-km.h +105 -0
  49. data/ext/minigraph/kvec.h +110 -0
  50. data/ext/minigraph/lchain.c +441 -0
  51. data/ext/minigraph/main.c +301 -0
  52. data/ext/minigraph/map-algo.c +500 -0
  53. data/ext/minigraph/mgpriv.h +128 -0
  54. data/ext/minigraph/minigraph.1 +359 -0
  55. data/ext/minigraph/minigraph.h +176 -0
  56. data/ext/minigraph/miniwfa.c +834 -0
  57. data/ext/minigraph/miniwfa.h +95 -0
  58. data/ext/minigraph/misc/mgutils.js +1451 -0
  59. data/ext/minigraph/misc.c +12 -0
  60. data/ext/minigraph/options.c +134 -0
  61. data/ext/minigraph/shortk.c +251 -0
  62. data/ext/minigraph/sketch.c +109 -0
  63. data/ext/minigraph/sys.c +147 -0
  64. data/ext/minigraph/sys.h +20 -0
  65. data/ext/minigraph/test/MT-chimp.fa +277 -0
  66. data/ext/minigraph/test/MT-human.fa +239 -0
  67. data/ext/minigraph/test/MT-orangA.fa +276 -0
  68. data/ext/minigraph/test/MT.gfa +19 -0
  69. data/ext/minigraph/tex/Makefile +13 -0
  70. data/ext/minigraph/tex/minigraph.bib +676 -0
  71. data/ext/minigraph/tex/minigraph.tex +986 -0
  72. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
  73. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
  74. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
  75. data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
  76. data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
  77. data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
  78. data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
  79. data/ext/minigraph/tex/plots/bedutils.js +367 -0
  80. data/ext/minigraph/tex/plots/chr-plot.js +130 -0
  81. data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
  82. data/ext/minigraph.patch +21 -0
  83. data/lib/minigraph/ffi/constants.rb +230 -0
  84. data/lib/minigraph/ffi/functions.rb +70 -0
  85. data/lib/minigraph/ffi/mappy.rb +8 -0
  86. data/lib/minigraph/ffi.rb +27 -0
  87. data/lib/minigraph/version.rb +5 -0
  88. data/lib/minigraph.rb +72 -0
  89. metadata +159 -0
@@ -0,0 +1,1451 @@
1
+ #!/usr/bin/env k8
2
+
3
+ /*******************************
4
+ * Command line option parsing *
5
+ *******************************/
6
+
7
+ var getopt = function(args, ostr) {
8
+ var oli; // option letter list index
9
+ if (typeof(getopt.place) == 'undefined')
10
+ getopt.ind = 0, getopt.arg = null, getopt.place = -1;
11
+ if (getopt.place == -1) { // update scanning pointer
12
+ if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
13
+ getopt.place = -1;
14
+ return null;
15
+ }
16
+ if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
17
+ ++getopt.ind;
18
+ getopt.place = -1;
19
+ return null;
20
+ }
21
+ }
22
+ var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
23
+ if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
24
+ if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
25
+ if (getopt.place < 0) ++getopt.ind;
26
+ return '?';
27
+ }
28
+ if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
29
+ getopt.arg = null;
30
+ if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
31
+ } else { // need an argument
32
+ if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
33
+ getopt.arg = args[getopt.ind].substr(getopt.place);
34
+ else if (args.length <= ++getopt.ind) { // no arg
35
+ getopt.place = -1;
36
+ if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
37
+ return '?';
38
+ } else getopt.arg = args[getopt.ind]; // white space
39
+ getopt.place = -1;
40
+ ++getopt.ind;
41
+ }
42
+ return optopt;
43
+ }
44
+
45
+ function it_index(a) {
46
+ if (a.length == 0) return -1;
47
+ a.sort(function(x, y) { return x[0] - y[0] });
48
+ var last, last_i;
49
+ for (var i = 0; i < a.length; i += 2) last = a[i][2] = a[i][1], last_i = i;
50
+ for (var k = 1; 1<<k <= a.length; ++k) {
51
+ var i0 = (1<<k) - 1, step = 1<<(k+1);
52
+ for (var i = i0; i < a.length; i += step) {
53
+ var x = 1<<(k-1);
54
+ a[i][2] = a[i][1];
55
+ if (a[i][2] < a[i-x][2]) a[i][2] = a[i-x][2];
56
+ var e = i + x < a.length? a[i+x][2] : last;
57
+ if (a[i][2] < e) a[i][2] = e;
58
+ }
59
+ last_i = last_i>>k&1? last_i - (1<<(k-1)) : last_i + (1<<(k-1));
60
+ if (last_i < a.length) last = last > a[last_i][2]? last : a[last_i][2];
61
+ }
62
+ return k - 1;
63
+ }
64
+
65
+ function it_overlap(a, st, en) {
66
+ if (a == null) return [];
67
+ var h, stack = [], b = [];
68
+ for (h = 0; 1<<h <= a.length; ++h);
69
+ --h;
70
+ stack.push([(1<<h) - 1, h, 0]);
71
+ while (stack.length) {
72
+ var t = stack.pop();
73
+ var x = t[0], h = t[1], w = t[2];
74
+ if (h <= 2) {
75
+ var i0 = x >> h << h, i1 = i0 + (1<<(h+1)) - 1;
76
+ if (i1 >= a.length) i1 = a.length;
77
+ for (var i = i0; i < i1; ++i)
78
+ if (a[i][0] < en && st < a[i][1])
79
+ b.push(a[i]);
80
+ } else if (w == 0) { // if left child not processed
81
+ stack.push([x, h, 1]);
82
+ var y = x - (1<<(h-1));
83
+ if (y >= a.length || a[y][2] > st)
84
+ stack.push([y, h - 1, 0]);
85
+ } else if (x < a.length && a[x][0] < en) {
86
+ if (st < a[x][1]) b.push(a[x]);
87
+ stack.push([x + (1<<(h-1)), h - 1, 0]);
88
+ }
89
+ }
90
+ return b;
91
+ }
92
+
93
+ function it_contained(a, st, en) {
94
+ if (a == null) return false;
95
+ var b = it_overlap(a, st, en);
96
+ var c = false;
97
+ for (var i = 0; i < b.length; ++i) {
98
+ if (b[i][0] <= st && en <= b[i][1])
99
+ c = true;
100
+ }
101
+ return c;
102
+ }
103
+
104
+ /****************************
105
+ ***** mgutils commands *****
106
+ ****************************/
107
+
108
+ function mg_cmd_renamefa(args)
109
+ {
110
+ var c, sep = '#';
111
+ while ((c = getopt(args, "d:")) != null)
112
+ if (c == 'd') sep = getopt.arg;
113
+ if (args.length - getopt.ind < 2) {
114
+ print("Usage: mgutils.js renamefa [-d delimitor] <prefix> <in.fa>");
115
+ return;
116
+ }
117
+ var prefix = args[getopt.ind];
118
+ var file = new File(args[getopt.ind+1]);
119
+ var buf = new Bytes();
120
+ while (file.readline(buf) >= 0) {
121
+ if (buf[0] != 62) {
122
+ print(buf);
123
+ } else {
124
+ var m, s = buf.toString();
125
+ if ((m = /^>(.*)/.exec(s)) != null) {
126
+ var name = m[1].replace(/^\S+#/, "");
127
+ print(">" + prefix + sep + name);
128
+ } else throw Error("Wrong FASTA format!");
129
+ }
130
+ }
131
+ file.close();
132
+ buf.destroy();
133
+ }
134
+
135
+ function mg_cmd_joinfa(args)
136
+ {
137
+ var c, len_n = 20, min_len = 150, name = "decoy-cat";
138
+ while ((c = getopt(args, "n:l:s:")) != null) {
139
+ if (c == 'l') min_len = parseInt(getopt.arg);
140
+ else if (c == 'n') len_n = parseInt(getopt.arg);
141
+ else if (c == 's') name = getopt.arg;
142
+ }
143
+ if (args.length - getopt.ind < 1) {
144
+ print("Usage: mgutils.js joinfa [options] <in.fa>");
145
+ return;
146
+ }
147
+ var seq = new Bytes(), seq1 = new Bytes(), lineno = 0, nn = new Bytes();
148
+ for (var i = 0; i < len_n; ++i) nn.set(78);
149
+ var buf = new Bytes();
150
+ var file = new File(args[getopt.ind]);
151
+ while (file.readline(buf) >= 0) {
152
+ ++lineno;
153
+ if (buf[0] == 62) {
154
+ if (seq1.length >= min_len) {
155
+ if (seq.length > 0) seq.set(nn);
156
+ seq.set(seq1);
157
+ }
158
+ seq1.length = 0;
159
+ } else seq1.set(buf);
160
+ }
161
+ if (seq1.length >= min_len) {
162
+ if (seq.length > 0) seq.set(nn);
163
+ seq.set(seq1);
164
+ }
165
+ print(">" + name);
166
+ print(seq);
167
+ file.close();
168
+ buf.destroy();
169
+ seq.destroy();
170
+ seq1.destroy();
171
+ }
172
+
173
+ function mg_cmd_anno(args)
174
+ {
175
+ var c, min_rm_div = 0.2, min_rm_sc = 300, micro_cap = 6, min_feat_len = 30, min_centro_len = 200, mobile = false, max_mobile_div = 2.0, min_segdup_frac = 0.2;
176
+ var fn_rmout = null, fn_etrf = null, fn_dust = null, fn_gap = null, fn_paf = null, fn_centro = null, fn_bb = null, fn_sd = null;
177
+ while ((c = getopt(args, "e:p:g:d:r:c:l:S:b:s:m")) != null) {
178
+ if (c == 'l') min_feat_len = parseInt(getopt.arg);
179
+ else if (c == 'S') min_segdup_frac = parseFloat(getopt.arg);
180
+ else if (c == 'm') mobile = true;
181
+ else if (c == 'e') fn_etrf = getopt.arg;
182
+ else if (c == 'p') fn_paf = getopt.arg;
183
+ else if (c == 'g') fn_gap = getopt.arg;
184
+ else if (c == 'd') fn_dust = getopt.arg;
185
+ else if (c == 'r') fn_rmout = getopt.arg;
186
+ else if (c == 'c') fn_centro = getopt.arg;
187
+ else if (c == 'b') fn_bb = getopt.arg;
188
+ else if (c == 's') fn_sd = getopt.arg;
189
+ }
190
+
191
+ if (args.length - getopt.ind < 1) {
192
+ print("Usage: anno.js [options] <in.bed>");
193
+ print("Options:");
194
+ print(" -l INT min feature length [" + min_feat_len + "]");
195
+ print(" -S FLOAT min segdup length [" + min_segdup_frac + "]");
196
+ print(" -r FILE RepeatMasker .out [null]");
197
+ print(" -g FILE seqtk gap output for stretches of Ns [null]");
198
+ print(" -d FILE minimap2/sdust output for LCRs [null]");
199
+ print(" -e FILE etrf output [null]");
200
+ print(" -p FILE PAF alignment against reference [null]");
201
+ print(" -c FILE dna-brnn centromere results [null]");
202
+ print(" -b FILE bubble file [null]");
203
+ print(" -s FILE segdup file (paste gfa2bed bedcov) [null]");
204
+ print(" -m annotate AluY and L1HS separately");
205
+ exit(1);
206
+ }
207
+
208
+ var file, buf = new Bytes();
209
+
210
+ var bb = {}, bba = [], seg = {};
211
+
212
+ file = new File(args[getopt.ind]);
213
+ while (file.readline(buf) >= 0) {
214
+ var t = buf.toString().split("\t");
215
+ if (t.length < 4) continue;
216
+ var key = t[0] + "_" + t[1] + "_" + t[2];
217
+ var len = parseInt(t[3]);
218
+ if (len < parseInt(t[2]) - parseInt(t[1]))
219
+ throw Error("ERROR: event length smaller than interval length");
220
+ bb[key] = [len, {}];
221
+ bba.push(key);
222
+ }
223
+ file.close();
224
+
225
+ if (fn_bb) {
226
+ if (fn_sd) { // generated by "paste <(gfatools gfa2bed) <(bedtk cov segdup.bed gfa2bed.bed) | cut -f1-5,9,10"
227
+ file = new File(fn_sd);
228
+ while (file.readline(buf) >= 0) {
229
+ var t = buf.toString().split("\t");
230
+ seg[t[3]] = [parseInt(t[4]), parseInt(t[2]) - parseInt(t[1]), parseInt(t[6])];
231
+ }
232
+ file.close();
233
+ }
234
+ file = new File(fn_bb); // parse "gfatools bubble" output
235
+ while (file.readline(buf) >= 0) {
236
+ var t = buf.toString().split("\t");
237
+ var key = t[0] + "_" + t[1] + "_" + t[2];
238
+ if (key in bb) {
239
+ bb[key].push(t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10]);
240
+ var s = t[11].split(","), tot_len = 0, tot_sd = 0, ref_len = 0;
241
+ var dup = {};
242
+ for (var i = 1; i < s.length - 1; ++i) {
243
+ if (seg[s[i]] == null) continue;
244
+ if (dup[s[i]]) continue;
245
+ dup[s[i]] = 1;
246
+ tot_len += seg[s[i]][1], tot_sd += seg[s[i]][2];
247
+ if (seg[s[i]][0] == 0)
248
+ ref_len += seg[s[i]][1];
249
+ }
250
+ bb[key][7] = tot_len;
251
+ bb[key][8] = tot_sd;
252
+ bb[key][9] = ref_len;
253
+ }
254
+ }
255
+ file.close();
256
+ }
257
+
258
+ if (fn_rmout) { // parse RepeastMasker output
259
+ var motif0 = "GGAAT", motif_hash = {}, motif_mut_hash = {};
260
+ { // dealing with possible (GGAAT)n rotations and mutations
261
+ var comp_tbl = { 'A':'T', 'T':'A', 'C':'G', 'G':'C' };
262
+ var motif = [motif0], motif_alt = [];
263
+
264
+ // reverse complement
265
+ for (var i = 0; i < motif.length; ++i) {
266
+ var x = motif[i], y = "";
267
+ for (var j = x.length - 1; j >= 0; --j) {
268
+ y += comp_tbl[x[j]];
269
+ }
270
+ motif_alt.push(y);
271
+ }
272
+ for (var i = 0; i < motif_alt.length; ++i)
273
+ motif.push(motif_alt[i]);
274
+
275
+ // rotate
276
+ motif_alt = [];
277
+ for (var i = 0; i < motif.length; ++i) {
278
+ var x = motif[i];
279
+ for (var j = 1; j < x.length; ++j)
280
+ motif_alt.push(x.substr(j) + x.substr(0, j));
281
+ }
282
+ for (var i = 0; i < motif_alt.length; ++i)
283
+ motif.push(motif_alt[i]);
284
+
285
+ for (var i = 0; i < motif.length; ++i) motif_hash[motif[i]] = i;
286
+
287
+ // mutate
288
+ var bases = [ 'A', 'C', 'G', 'T' ];
289
+ for (var x in motif_hash) {
290
+ var y = x;
291
+ for (var i = 0; i < x.length; ++i) {
292
+ for (var j = 0; j < bases.length; ++j) {
293
+ var a = x.split("");
294
+ if (a[i] == bases[j]) continue;
295
+ a[i] = bases[j];
296
+ motif_mut_hash[a.join("")] = 1;
297
+ }
298
+ }
299
+ }
300
+ }
301
+
302
+ function process_rm_line(bb, lines) {
303
+ var h = {};
304
+ if (lines.length == 0) return;
305
+ var key = lines[0][4];
306
+ if (bb[key] == null) throw Error("ERROR: missing key: " + key);
307
+ var h = bb[key][1];
308
+ for (var i = 0; i < lines.length; ++i) {
309
+ var t = lines[i];
310
+ var st = parseInt(t[5]) - 1, en = parseInt(t[6]);
311
+ if (h[t[10]] == null) h[t[10]] = [];
312
+ h[t[10]].push([st, en]);
313
+ }
314
+ }
315
+
316
+ file = new File(fn_rmout);
317
+ var lines = [];
318
+ while (file.readline(buf) >= 0) {
319
+ var line = buf.toString();
320
+ var l2 = line.replace(/^\s+/, "");
321
+ var m4, t = l2.split(/\s+/);
322
+ if (t.length < 15) continue;
323
+ if (t[9] == "ALR/Alpha") t[10] = "alpha";
324
+ else if (t[9] == "HSATII") t[10] = "hsat2/3";
325
+ else if (/^LTR\/ERV/.test(t[10])) t[10] = 'LTR/ERV';
326
+ else if (/^LTR/.test(t[10])) t[10] = 'LTR/misc';
327
+ else if (/^DNA/.test(t[10])) t[10] = 'DNA/misc';
328
+ else if (/rRNA|scRNA|snRNA|srpRNA/.test(t[10])) t[10] = 'RNAmisc';
329
+ else if (/^LINE/.test(t[10]) && t[10] != "LINE/L1") t[10] = 'LINE/misc';
330
+ else if ((t[10] == "Simple_repeat" || t[10] == "Satellite") && ((m4 = /^\(([ACGT]+)\)n/.exec(t[9])) != null)) {
331
+ if (motif_hash[m4[1]] != null) {
332
+ t[10] = "hsat2/3";
333
+ } else if (m4[1].length % motif0.length == 0) {
334
+ var c = 0, c_mut = 0;
335
+ for (var j = 0; j < m4[1].length; j += motif0.length) {
336
+ var s = m4[1].substr(j, j + motif0.length);
337
+ if (motif_hash[s] != null)
338
+ ++c;
339
+ else if (motif_mut_hash[s] != null)
340
+ ++c_mut;
341
+ }
342
+ if (c > 0 && (c + c_mut) * motif0.length == m4[1].length)
343
+ t[10] = "hsat2/3";
344
+ }
345
+ }
346
+
347
+ if (mobile) {
348
+ if (t[10] == "LINE/L1" && t[9] == "L1HS" && parseFloat(t[1]) < max_mobile_div) t[10] = "LINE/L1HS";
349
+ if (t[10] == "SINE/Alu" && /^AluY/.test(t[9]) && parseFloat(t[1]) < max_mobile_div) t[10] = "SINE/AluY";
350
+ }
351
+ if (t[10] == 'Simple_repeat' || t[10] == 'Low_complexity') t[10] = 'LCR';
352
+ if (t[10] != 'LCR') {
353
+ // if (parseInt(t[0]) < min_rm_sc) continue;
354
+ // if (parseInt(t[1])/100 > min_rm_div) continue;
355
+ }
356
+ if (lines.length > 0 && lines[0][4] != t[4]) {
357
+ process_rm_line(bb, lines);
358
+ lines = [];
359
+ }
360
+ lines.push(t);
361
+ }
362
+ if (lines.length > 0) process_rm_line(bb, lines);
363
+ file.close();
364
+
365
+ for (var i = 0; i < bba.length; ++i) {
366
+ var h = bb[bba[i]][1], a = [], b = [], c_alu = [], c_l1 = [];
367
+ for (var key in h) {
368
+ if (/^(DNA|SINE|LINE|Retroposon|LTR)/.test(key))
369
+ for (var j = 0; j < h[key].length; ++j)
370
+ a.push(h[key][j]);
371
+ if (/^(Satellite|hsat2\/3|alpha)/.test(key))
372
+ for (var j = 0; j < h[key].length; ++j)
373
+ b.push(h[key][j]);
374
+ if (/^(SINE\/Alu)/.test(key))
375
+ for (var j = 0; j < h[key].length; ++j)
376
+ c_alu.push(h[key][j]);
377
+ if (/^(LINE\/L1)/.test(key))
378
+ for (var j = 0; j < h[key].length; ++j)
379
+ c_l1.push(h[key][j]);
380
+ }
381
+ if (a.length) h['_inter'] = a;
382
+ if (b.length) h['_sat'] = b;
383
+ if (c_alu.length) h['_alu'] = c_alu;
384
+ if (c_l1.length) h['_l1'] = c_l1;
385
+ }
386
+ }
387
+
388
+ if (fn_etrf) { // parse etrf output
389
+ file = new File(fn_etrf);
390
+ while (file.readline(buf) >= 0) {
391
+ var t = buf.toString().split("\t");
392
+ var l = parseInt(t[4]);
393
+ if (l == 1) continue;
394
+ var anno = l <= micro_cap? 'micro' : 'mini';
395
+ if (bb[t[0]][1][anno] == null)
396
+ bb[t[0]][1][anno] = [];
397
+ var st = parseInt(t[1]), en = parseInt(t[2]);
398
+ bb[t[0]][1][anno].push([st, en]);
399
+ if (bb[t[0]][1]['LCR'] == null)
400
+ bb[t[0]][1]['LCR'] = [];
401
+ bb[t[0]][1]['LCR'].push([st, en]);
402
+ }
403
+ file.close();
404
+ }
405
+
406
+ if (fn_dust) { // parse minimap2/sdust output
407
+ file = new File(fn_dust);
408
+ while (file.readline(buf) >= 0) {
409
+ var t = buf.toString().split("\t");
410
+ var anno = 'LCR';
411
+ if (bb[t[0]][1][anno] == null)
412
+ bb[t[0]][1][anno] = [];
413
+ bb[t[0]][1][anno].push([parseInt(t[1]), parseInt(t[2])]);
414
+ }
415
+ file.close();
416
+ }
417
+
418
+ if (fn_paf) { // parse bubble-to-reference PAF for self alignment
419
+ file = new File(fn_paf);
420
+ while (file.readline(buf) >= 0) {
421
+ var t = buf.toString().split("\t");
422
+ var anno = 'self';
423
+ if (bb[t[0]][1][anno] == null)
424
+ bb[t[0]][1][anno] = [];
425
+ bb[t[0]][1][anno].push([parseInt(t[2]), parseInt(t[3])]);
426
+ }
427
+ file.close();
428
+ }
429
+
430
+ if (fn_gap) { // parse assembly gaps, generated by "seqtk gap"
431
+ file = new File(fn_gap);
432
+ while (file.readline(buf) >= 0) {
433
+ var t = buf.toString().split("\t");
434
+ var anno = 'gap';
435
+ if (bb[t[0]][1][anno] == null)
436
+ bb[t[0]][1][anno] = [];
437
+ bb[t[0]][1][anno].push([parseInt(t[1]), parseInt(t[2])]);
438
+ }
439
+ file.close();
440
+ }
441
+
442
+ if (fn_centro) {
443
+ file = new File(fn_centro);
444
+ while (file.readline(buf) >= 0) {
445
+ var t = buf.toString().split("\t");
446
+ var anno = t[3] == '1'? 'hsat2/3' : 'alpha';
447
+ if (bb[t[0]][1][anno] == null)
448
+ bb[t[0]][1][anno] = [];
449
+ var st = parseInt(t[1]), en = parseInt(t[2]);
450
+ if (en - st >= min_centro_len)
451
+ bb[t[0]][1][anno].push([st, en]);
452
+ }
453
+ file.close();
454
+ }
455
+
456
+ for (var i = 0; i < bba.length; ++i) {
457
+ var m, key = bba[i], h = bb[key][1], len = bb[key][0];
458
+ if ((m = /^(\S+)_(\d+)_(\d+)/.exec(key)) == null)
459
+ throw("Bug!");
460
+ var x = {}, t = [m[1], m[2], m[3]];
461
+ if (fn_bb) t.push(bb[key][2], bb[key][3], bb[key][4], bb[key][5], bb[key][6], bb[key][7], bb[key][8], bb[key][9]);
462
+ else t.push(len);
463
+ for (var c in h) { // calculated the merged length of each feature
464
+ var s, st = 0, en = 0, cov = 0;
465
+ s = h[c].sort(function(a, b) { return a[0] - b[0]; });
466
+ for (var j = 0; j < s.length; ++j) {
467
+ if (s[j][0] > en) {
468
+ cov += en - st;
469
+ st = s[j][0], en = s[j][1];
470
+ } else en = en > s[j][1]? en : s[j][1];
471
+ }
472
+ cov += en - st;
473
+ if (cov >= min_feat_len)
474
+ x[c] = cov;
475
+ }
476
+ var type = "none";
477
+ var max = 0, max2 = 0, max_c2 = null, max_c = null, sum = 0, sum_misc = 0;
478
+ var lcr = x['LCR'] == null? 0 : x['LCR'];
479
+ var self_len = x['self'] == null? 0 : x['self'];
480
+ for (var c in x) {
481
+ if (c == 'LCR' || c == 'self') continue;
482
+ if (c[0] == '_') continue;
483
+ sum += x[c];
484
+ if (c != 'mini' && c != 'micro') sum_misc += x[c];
485
+ if (max < x[c]) max2 = max, max_c2 = max_c, max = x[c], max_c = c;
486
+ else if (max2 < x[c]) max2 = x[c], max_c2 = c;
487
+ }
488
+ if (max >= len * 0.7) {
489
+ type = max_c;
490
+ } else if (lcr >= len * 0.7) {
491
+ type = 'lcr';
492
+ if (max_c == 'mini' || max_c == 'micro') {
493
+ var y = x['mini'] == null? 0 : x['mini'];
494
+ y += x['micro'] == null? 0 : x['micro'];
495
+ if (max >= y * 0.7) type = max_c;
496
+ }
497
+ } else if ((max_c == 'mini' || max_c == 'micro') && max2 < max * 0.1) {
498
+ type = max_c;
499
+ } else if (x['_alu'] != null && x['_alu'] >= len * 0.7) {
500
+ type = 'SINE/Alu';
501
+ } else if (x['_l1'] != null && x['_l1'] >= len * 0.7) {
502
+ type = 'LINE/L1';
503
+ } else if (x['_inter'] != null && x['_inter'] >= len * 0.7) {
504
+ type = 'inter';
505
+ } else if (x['_sat'] != null && x['_sat'] >= len * 0.5) {
506
+ type = 'Satellite';
507
+ } else if (sum_misc + lcr >= len * 0.7) {
508
+ type = 'mixed';
509
+ } else if (sum + lcr > len * 0.05) {
510
+ type = 'partial';
511
+ } else if (self_len >= len * 0.5) {
512
+ type = 'self';
513
+ }
514
+ if ((type == 'partial' || type == 'self' || type == 'none' || type == 'mixed') && fn_bb && t[8] >= 1000 && t[9] >= t[8] * min_segdup_frac)
515
+ type = 'segdup';
516
+ t.push(type);
517
+ for (var c in x)
518
+ t.push(c + ':' + x[c]);
519
+ print(t.join("\t"));
520
+ }
521
+
522
+ buf.destroy();
523
+ }
524
+
525
+ function mg_classify_repeat(anno) {
526
+ var type;
527
+ if (anno == "mini") type = "11_VNTR";
528
+ else if (anno == "micro") type = "12_STR";
529
+ else if (anno == "lcr") type = "13_Other-LCR";
530
+ else if (anno == "LINE/L1" || anno == "LINE/L1HS") type = "02_L1";
531
+ else if (anno == "SINE/Alu" || anno == "SINE/AluY") type = "01_Alu";
532
+ else if (anno == "Retroposon/SVA") type = "03_SVA";
533
+ else if (anno == "LTR/ERV") type = "04_ERV";
534
+ else if (anno == "inter" || /^(DNA|LINE|SINE|LTR)/.test(anno)) type = "05_Other-TE";
535
+ else if (/^Satellite/.test(anno) || anno == "alpha" || anno == "hsat2/3" || anno == "_sat") type = "10_Satellite";
536
+ else if (anno == "self" || anno == "none") type = "30_Low-repeat";
537
+ else if (anno == "mixed") type = "20_Other-repeat";
538
+ else if (anno == "segdup") type = "21_SegDup";
539
+ else if (anno == "partial") type = "30_Low-repeat";
540
+ else type = "20_Other-repeat";
541
+ return type;
542
+ }
543
+
544
+ function mg_cmd_anno2tbl(args)
545
+ {
546
+ var segdup_ratio = 0.7;
547
+ var buf = new Bytes();
548
+ var file = args.length == 0? new File() : new File(args[0]);
549
+ var h = {};
550
+ while (file.readline(buf) >= 0) {
551
+ var t = buf.toString().split("\t");
552
+ for (var i = 1; i <= 7; ++i) t[i] = parseInt(t[i]);
553
+ //if (t[5]) continue;
554
+ if (t[11] == "gap") continue;
555
+ if (/chrUn|_random/.test(t[0])) continue;
556
+ var na = t[4] < 4? t[4] : 4;
557
+ var key = mg_classify_repeat(t[11]);
558
+ if (h[key] == null) h[key] = [0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0];
559
+ ++h[key][na];
560
+ h[key][na+3] += t[7];
561
+ if (t[8] >= 0 && t[10] >= 0) h[key][na+6] += t[8] - t[10];
562
+ }
563
+
564
+ file.close();
565
+ buf.destroy();
566
+
567
+ for (var key in h) {
568
+ var label = key.replace(/^[0-9]+_/, "");
569
+ print(key, label, h[key].slice(2).join("\t"));
570
+ }
571
+ }
572
+
573
+ function mg_cmd_paf2bl(args)
574
+ {
575
+ var c, min_de = 0.01, max_de = 0.1, sub_de = 0.002, min_mapq = 5, min_len = 500, is_sub = false;
576
+ while ((c = getopt(args, "d:s")) != null) {
577
+ if (c == 'd') min_de = parseFloat(getopt.arg);
578
+ else if (c == 's') is_sub = true;
579
+ }
580
+ if (args.length - getopt.ind < 1) {
581
+ print("Usage: mgutils.js paf2bl <ins.paf>");
582
+ print("Note: bedtk sub <(mgutils.js paf2bl ins.paf; cat bl100.bed) <(../mgutils.js paf2bl -s ins.paf) | bedtk merge");
583
+ return;
584
+ }
585
+ var file = new File(args[getopt.ind]);
586
+ var buf = new Bytes();
587
+ while (file.readline(buf) >= 0) {
588
+ var line = buf.toString();
589
+ var m, t = line.split("\t");
590
+ if (/\ttp:A:[SI]/.test(line)) continue;
591
+ if (parseInt(t[11]) < min_mapq) continue;
592
+ if (parseInt(t[10]) < min_len) continue;
593
+ if ((m = /\tde:f:(\S+)/.exec(line)) == null) continue;
594
+ var de = parseFloat(m[1]);
595
+ if (is_sub) {
596
+ if (de > sub_de) continue;
597
+ } else {
598
+ if (de < min_de || de > max_de) continue;
599
+ }
600
+ print(t[5], t[7], t[8]);
601
+ //print(line);
602
+ }
603
+ buf.destroy();
604
+ file.close();
605
+ }
606
+
607
+ function mg_cmd_stableGaf(args)
608
+ {
609
+ var c;
610
+ while ((c = getopt(args, "")) != null) {
611
+ }
612
+ if (args.length - getopt.ind < 1) {
613
+ print("Usage: mgutils.js stableGaf <graph.gfa> <aln.gaf>");
614
+ return;
615
+ }
616
+
617
+ var re = /\t(LN|SN|SO|SR):[Zi]:(\S+)/g;
618
+ var file, buf = new Bytes();
619
+
620
+ var pri_len = {}, segh = {};
621
+ file = new File(args[getopt.ind]);
622
+ while (file.readline(buf) >= 0) {
623
+ var m, line = buf.toString();
624
+ if ((m = /^S\t(\S+)\t(\S+)(\t.*)/.exec(line)) == null) continue;
625
+ var seg = m[1], len = m[2] == '*'? 0 : m[2].length, tags = m[3];
626
+ var sn = null, so = -1, sr = -1;
627
+ while ((m = re.exec(tags)) != null) {
628
+ if (m[1] == "LN") len = parseInt(m[2]);
629
+ else if (m[1] == "SN") sn = m[2];
630
+ else if (m[1] == "SO") so = parseInt(m[2]);
631
+ else if (m[1] == "SR") sr = parseInt(m[2]);
632
+ }
633
+ if (sn == null || so < 0 || sr < 0 || len <= 0)
634
+ throw Error("failed to parse tags '" + tags + "'");
635
+ segh[seg] = [sn, so, so + len, sr];
636
+ if (sr == 0) {
637
+ if (pri_len[sn] == null) pri_len[sn] = 0;
638
+ pri_len[sn] = pri_len[sn] > so + len? pri_len[sn] : so + len;
639
+ }
640
+ }
641
+ file.close();
642
+
643
+ re = /([><])([^\s><]+)/g;
644
+ file = args.length - getopt.ind < 2? new File() : new File(args[getopt.ind+1]);
645
+ while (file.readline(buf) >= 0) {
646
+ var m, line = buf.toString();
647
+ if ((m = /^(\S+)\t(\d+\t\d+\t\d+)\t([+-])\t(\S+)\t(\d+)\t(\d+)\t(\d+)\t(.*)/.exec(line)) == null)
648
+ continue;
649
+ var s, a = [];
650
+ while ((s = re.exec(m[4])) != null) {
651
+ if (segh[s[2]] == null)
652
+ throw Error("failed to find segment '" + s[2] + "'");
653
+ var h = segh[s[2]], add_new = true;
654
+ if (a.length) {
655
+ var b = a[a.length - 1];
656
+ if (b[0] == s[1] && h[3] == b[4] && h[0] == b[1]) {
657
+ if (b[0] == '>') {
658
+ if (h[1] == b[3]) b[3] = h[2], add_new = false;
659
+ } else {
660
+ if (h[2] == b[2]) b[2] = h[1], add_new = false;
661
+ }
662
+ }
663
+ }
664
+ if (add_new) a.push([s[1], h[0], h[1], h[2], h[3]]);
665
+ }
666
+ var path_len = 0, path = "";
667
+ for (var i = 0; i < a.length; ++i)
668
+ path_len += a[i][3] - a[i][2];
669
+ if (path_len != parseInt(m[5]))
670
+ throw Error("inconsistent path length for '" + m[1] + "': " + path_len + "!=" + m[5]);
671
+ if (a.length == 1 && pri_len[a[0][1]] != null) {
672
+ m[6] = parseInt(m[6]);
673
+ m[7] = parseInt(m[7]);
674
+ if (a[0][0] == '>') {
675
+ m[6] += a[0][2], m[7] += a[0][2];
676
+ } else {
677
+ m[3] = m[3] == '+'? '-' : '+';
678
+ var st = a[0][2] + (path_len - 1 - m[7]);
679
+ var en = a[0][2] + (path_len - 1 - m[6]);
680
+ m[6] = st, m[7] = en;
681
+ }
682
+ path_len = pri_len[a[0][1]];
683
+ path = a[0][1];
684
+ } else {
685
+ var b = [];
686
+ for (var i = 0; i < a.length; ++i)
687
+ b.push(a[i][0] + a[i][1] + ':' + a[i][2] + '-' + a[i][3]);
688
+ path = b.join("");
689
+ }
690
+ print(m[1], m[2], m[3], path, path_len, m[6], m[7], m[8]);
691
+ }
692
+ file.close();
693
+ buf.destroy();
694
+ }
695
+
696
+ function mg_cmd_subgaf(args) // FIXME: this is BUGGY!!!
697
+ {
698
+ if (args.length < 2) {
699
+ print("Usage: mgutils.js subgaf <in.gaf> <reg>");
700
+ exit(1);
701
+ }
702
+
703
+ var m, ctg, st, en;
704
+ if ((m = /^(\S+):(\S+)-(\S+)/.exec(args[1])) != null)
705
+ ctg = m[1], st = parseInt(m[2]), en = parseInt(m[3]);
706
+
707
+ var buf = new Bytes();
708
+ var file = new File(args[0]);
709
+ var re = /([><])([^\s><]+):(\d+)-(\d+)/g;
710
+
711
+ while (file.readline(buf) >= 0) {
712
+ var t = buf.toString().split("\t");
713
+ var l = parseInt(t[6]), s = parseInt(t[7]), e = parseInt(t[8]);
714
+ var regs = [];
715
+ if (t[5][0] == '>' || t[5][0] == '<') {
716
+ var m, x = 0;
717
+ //print(buf);
718
+ while ((m = re.exec(t[5])) != null) {
719
+ var a = parseInt(m[3]), b = parseInt(m[4]), c = b - a;
720
+ if (x == 0) {
721
+ if (b - a <= s) throw Error("Inconsistent!");
722
+ a += s;
723
+ }
724
+ if (x + c == l) b -= l - e;
725
+ //print(m[2], a, b);
726
+ regs.push([m[2], a, b]);
727
+ x += c;
728
+ }
729
+ } else {
730
+ regs.push([t[5], s, e]);
731
+ }
732
+ var hit = false;
733
+ for (var i = 0; i < regs.length; ++i) {
734
+ if (regs[i][0] == ctg && regs[i][2] > st && en > regs[i][1])
735
+ hit = true;
736
+ }
737
+ if (hit) print(buf);
738
+ }
739
+
740
+ file.close();
741
+ buf.destroy();
742
+ }
743
+
744
+ function mg_cmd_sveval(args)
745
+ {
746
+ var c, flank = 100, min_var_len = 100, min_test_len = 50, min_sc = 20.0, non_chr = false, out_err = false, flt_vcf = false;
747
+ while ((c = getopt(args, "f:v:t:s:aeF")) != null) {
748
+ if (c == 'f') flank = parseInt(getopt.arg);
749
+ else if (c == 'v') min_var_len = parseInt(getopt.arg);
750
+ else if (c == 't') min_test_len = parseInt(getopt.arg);
751
+ else if (c == 's') min_sc = parseFloat(getopt.arg);
752
+ else if (c == 'a') non_chr = true;
753
+ else if (c == 'e') out_err = true;
754
+ else if (c == 'F') flt_vcf = true;
755
+ }
756
+ if (args.length - getopt.ind < 3) {
757
+ print("Usage: mgutils.js sveval <true.vcf> <true.bed> <call.txt>");
758
+ print("Options:");
759
+ print(" -f INT length of flanking regions [" + flank + "]");
760
+ print(" -v INT min INDEL length [" + min_var_len + "]");
761
+ print(" -t INT min true INDEL length [" + min_test_len + "]");
762
+ print(" -s INT min called score [" + min_sc + "]");
763
+ print(" -e print errors");
764
+ exit(1);
765
+ }
766
+
767
+ var file, buf = new Bytes();
768
+
769
+ // parse true.bed
770
+ warn("Reading confident regions...");
771
+ var bed = {}
772
+ file = new File(args[getopt.ind + 1]);
773
+ while (file.readline(buf) >= 0) {
774
+ var t = buf.toString().split("\t");
775
+ if (t.length < 3) continue;
776
+ if (!non_chr && /^(chr)?[XY]$/.test(t[0])) continue;
777
+ if (bed[t[0]] == null) bed[t[0]] = [];
778
+ bed[t[0]].push([parseInt(t[1]), parseInt(t[2])]);
779
+ }
780
+ file.close();
781
+ for (var ctg in bed) it_index(bed[ctg]);
782
+
783
+ // parse true.vcf
784
+ warn("Reading baseline variants...");
785
+ var vcf = {}, n_vcf = 0;
786
+ file = new File(args[getopt.ind]);
787
+ while (file.readline(buf) >= 0) {
788
+ var t = buf.toString().split("\t");
789
+ if (t[0][0] == '#') continue;
790
+ if (t.length < 10) continue;
791
+ var flt = (t[6] != '.' && t[6] != 'PASS');
792
+ if (flt_vcf && flt) continue;
793
+ if (bed[t[0]] == null) continue;
794
+ var ref = t[3];
795
+ var st = parseInt(t[1]) - 1;
796
+ var en = st + ref.length;
797
+ var max_diff = 0;
798
+ var al = t[4].split(",");
799
+ al.unshift(ref);
800
+ for (var i = 1; i < al.length; ++i) {
801
+ var l = al[i].length - ref.length;
802
+ if (l < 0) l = -l;
803
+ if (max_diff < l) max_diff = l;
804
+ }
805
+ if (max_diff < min_test_len) continue;
806
+ var s = t[9].split(':');
807
+ if (s.length == 0) continue;
808
+ var gt = s[0].split(/[|\/]/);
809
+ if (gt == 0) continue;
810
+ var max_ev = 0;
811
+ max_diff = 0;
812
+ for (var i = 0; i < gt.length; ++i) {
813
+ if (gt[i] == '.') continue;
814
+ var x = parseInt(gt[i]);
815
+ var l = al[x].length - ref.length;
816
+ var x = l > 0? l : -l;
817
+ if (max_diff < x) max_diff = x, max_ev = l;
818
+ }
819
+ if (max_diff < min_test_len) continue;
820
+ if (vcf[t[0]] == null) vcf[t[0]] = [];
821
+ vcf[t[0]].push([st, en, -1, max_diff, max_ev, flt, s[0]]);
822
+ }
823
+ file.close();
824
+ for (var ctg in vcf) it_index(vcf[ctg]);
825
+
826
+ // parse rst.txt
827
+ warn("Reading gt results...");
828
+ var rst = {};
829
+ file = new File(args[getopt.ind + 2]);
830
+ while (file.readline(buf) >= 0) {
831
+ var t = buf.toString().split("\t");
832
+ if (parseFloat(t[3]) < min_sc) continue;
833
+ if (bed[t[0]] == null) continue;
834
+ if (rst[t[0]] == null) rst[t[0]] = [];
835
+ var ref_len = t[7] == '*'? 0 : t[7].length;
836
+ var max_diff = 0, max_ev = 0;
837
+ for (var i = 8; i < t.length; ++i) {
838
+ var alt_len = t[i] == '*'? 0 : t[8].length;
839
+ var l = alt_len - ref_len;
840
+ var x = l > 0? l : -l;
841
+ if (max_diff < x) max_diff = x, max_ev = l;
842
+ }
843
+ var st = parseInt(t[1]), en = parseInt(t[2]);
844
+ rst[t[0]].push([st, en, -1, max_diff, max_ev]);
845
+ }
846
+ file.close();
847
+ for (var ctg in rst) it_index(rst[ctg]);
848
+
849
+ // sensitivity
850
+ var n_vcf = [0, 0, 0], fn = [0, 0, 0];
851
+ for (var ctg in vcf) {
852
+ for (var i = 0; i < vcf[ctg].length; ++i) {
853
+ var v = vcf[ctg][i];
854
+ if (v[3] < min_var_len) continue;
855
+ if (v[5]) continue;
856
+ var st = v[0] - flank, en = v[1] + flank;
857
+ if (st < 0) st = 0;
858
+ if (!it_contained(bed[ctg], st, en)) continue;
859
+ var sub = v[4] < 0? 1 : 2;
860
+ ++n_vcf[0], ++n_vcf[sub];
861
+ var b = it_overlap(rst[ctg], st, en);
862
+ if (b.length == 0) {
863
+ if (out_err) print("FN", ctg, v[0], v[1], v[4], v[6]);
864
+ ++fn[0], ++fn[sub];
865
+ }
866
+ }
867
+ }
868
+
869
+ // specificity
870
+ var n_rst = [0, 0, 0], fp = [0, 0, 0];
871
+ for (var ctg in rst) {
872
+ for (var i = 0; i < rst[ctg].length; ++i) {
873
+ var v = rst[ctg][i];
874
+ if (v[3] < min_var_len) continue;
875
+ var st = v[0] - flank, en = v[1] + flank;
876
+ if (st < 0) st = 0;
877
+ if (!it_contained(bed[ctg], st, en)) continue;
878
+ var sub = v[4] < 0? 1 : 2;
879
+ ++n_rst[0], ++n_rst[sub];
880
+ var b = it_overlap(vcf[ctg], st, en);
881
+ if (b.length == 0) {
882
+ if (out_err) print("FP", ctg, v[0], v[1], v[4]);
883
+ ++fp[0], ++fp[sub];
884
+ }
885
+ }
886
+ }
887
+
888
+ print("NA", fn[0], n_vcf[0], (fn[0]/n_vcf[0]).toFixed(4));
889
+ print("ND", fn[1], n_vcf[1], (fn[1]/n_vcf[1]).toFixed(4));
890
+ print("NI", fn[2], n_vcf[2], (fn[2]/n_vcf[2]).toFixed(4));
891
+ print("PA", fp[0], n_rst[0], (fp[0]/n_rst[0]).toFixed(4));
892
+ print("PD", fp[1], n_rst[1], (fp[1]/n_rst[1]).toFixed(4));
893
+ print("PI", fp[2], n_rst[2], (fp[2]/n_rst[2]).toFixed(4));
894
+ }
895
+
896
+ function mg_cmd_extractseg(args)
897
+ {
898
+ function process(ctg, first, last, is_end) {
899
+ if (ctg == null || first[0] == null || first[1] == null) return;
900
+ if (first[0][7] == first[1][7]) return;
901
+ if (first[0][7] < first[1][7]) {
902
+ if (last[0][7] >= first[1][7]) return;
903
+ if (is_end) print(ctg, last[0][8], first[1][7], '*', 0, '+');
904
+ else print(ctg, last[0][7], first[1][8], '*', 0, '+');
905
+ } else {
906
+ if (last[1][7] >= first[0][7]) return;
907
+ if (is_end) print(ctg, last[1][8], first[0][7], '*', 0, '-');
908
+ else print(ctg, last[1][7], first[0][8], '*', 0, '-');
909
+ }
910
+ }
911
+
912
+ var c, min_len = 100000, is_end = false;
913
+ while ((c = getopt(args, "el:")) != null) {
914
+ if (c == 'l') min_len = parseInt(getopt.arg);
915
+ else if (c == 'e') is_end = true;
916
+ }
917
+ if (args.length - getopt.ind < 3) {
918
+ print("Usage: mgutils.js extractseg <seg1> <seg2> <in.gaf> [...]");
919
+ return;
920
+ }
921
+
922
+ var seg = [args[getopt.ind], args[getopt.ind+1]];
923
+ var buf = new Bytes();
924
+ for (var i = getopt.ind + 2; i < args.length; ++i) {
925
+ var file = new File(args[i]);
926
+ var flt = false;
927
+ var first = [null, null], last = [null, null], ctg = null;
928
+ while (file.readline(buf) >= 0) {
929
+ var t = buf.toString().split("\t");
930
+ if (t[0] != "*") {
931
+ process(ctg, first, last, is_end);
932
+ flt = (parseInt(t[3]) - parseInt(t[2]) < min_len || parseInt(t[8]) - parseInt(t[7]) < min_len);
933
+ first = [null, null];
934
+ last = [null, null];
935
+ ctg = t[0];
936
+ } else if (!flt) {
937
+ var s = t[1].substr(1);
938
+ t[7] = parseInt(t[7]), t[8] = parseInt(t[8]);
939
+ if (s == seg[0] && t[3] != '0') {
940
+ if (first[0] == null) first[0] = t.slice(0);
941
+ last[0] = t.slice(0);
942
+ } else if (s == seg[1] && t[3] != '0') {
943
+ if (first[1] == null) first[1] = t.slice(0);
944
+ last[1] = t.slice(0);
945
+ }
946
+ }
947
+ }
948
+ process(ctg, first, last, is_end);
949
+ file.close();
950
+ }
951
+ buf.destroy();
952
+ }
953
+
954
+ function mg_cmd_bed2sql(args)
955
+ {
956
+ var c;
957
+ while ((c = getopt(args, "")) != null) {
958
+ }
959
+ if (args.length - getopt.ind == 0) {
960
+ print("Usage: paste *.bed | mgutils.js bed2sql <sample.list> | sqlite3 rGFA.db");
961
+ return;
962
+ }
963
+
964
+ var file, buf = new Bytes();
965
+
966
+ var sample = [];
967
+ file = new File(args[getopt.ind]);
968
+ while (file.readline(buf) >= 0) {
969
+ var t = buf.toString().split("\t");
970
+ sample.push(t[0]);
971
+ }
972
+ file.close();
973
+
974
+ file = args.length - getopt.ind >= 2 && args[getopt.ind+1] != "-"? new File(args[getopt.ind+1]) : new File();
975
+ print("DROP INDEX IF EXISTS idx_bwalk;");
976
+ print("DROP INDEX IF EXISTS idx_cst;");
977
+ print("DROP INDEX IF EXISTS idx_cen;");
978
+ print("BEGIN TRANSACTION;");
979
+ var wid = 0, bid = 0, ins_walk = [];
980
+ while (file.readline(buf) >= 0) {
981
+ var t = buf.toString().split("\t");
982
+ if (t.length != sample.length * 6)
983
+ throw Error("Different number of samples");
984
+ var h = {}, w = [], j = 0;
985
+ for (var i = 5; i < t.length; i += 6, ++j) {
986
+ if (t[i] == ".") continue;
987
+ var s = t[i].split(":");
988
+ if (!(s[0] in h)) {
989
+ h[s[0]] = w.length;
990
+ ins_walk.push([wid, bid, s[1], s[0]]);
991
+ w.push([s[0], s[1], wid++]);
992
+ }
993
+ var v = [], x = w[h[s[0]]];
994
+ v.push("'" + bid + "'", "'" + sample[j] + "'", "'" + x[2] + "'", "'" + s[3] + "'");
995
+ v.push("'" + s[4] + "'", "'" + s[5] + "'", "'" + (s[2] == '+'? 1 : -1) + "'");
996
+ print("INSERT INTO call (bid,sample,wid,ctg,start,end,strand) VALUES (" + v.join(",") + ");");
997
+ }
998
+ ++bid;
999
+ }
1000
+ for (var i = 0; i < ins_walk.length; ++i) {
1001
+ var w = ins_walk[i], v = [];
1002
+ for (var j = 0; j < w.length; ++j)
1003
+ v.push("'" + w[j] + "'");
1004
+ print("INSERT INTO bwalk (wid,bid,len,walk) VALUES (" + v.join(",") + ");");
1005
+ }
1006
+ print("END TRANSACTION;");
1007
+ print("CREATE INDEX IF NOT EXISTS idx_bwalk ON bwalk (bid);");
1008
+ print("CREATE INDEX IF NOT EXISTS idx_cst ON call (ctg, start);");
1009
+ print("CREATE INDEX IF NOT EXISTS idx_cen ON call (ctg, end);");
1010
+ file.close();
1011
+
1012
+ buf.destroy();
1013
+ }
1014
+
1015
+ function mg_cmd_merge(args)
1016
+ {
1017
+ var c, fn_anno = null, fn_sample = null;
1018
+ while ((c = getopt(args, "a:s:")) != null) {
1019
+ if (c == 'a') fn_anno = getopt.arg;
1020
+ else if (c == 's') fn_sample = getopt.arg;
1021
+ }
1022
+ if (args.length - getopt.ind == 0) {
1023
+ print("Usage: paste *.bed | mgutils.js merge -");
1024
+ print("Options:");
1025
+ print(" -a FILE annotation [null]");
1026
+ print(" -s FILE list of samples [null]");
1027
+ return;
1028
+ }
1029
+
1030
+ var file, buf = new Bytes();
1031
+ var anno = {};
1032
+ if (fn_anno) {
1033
+ file = new File(fn_anno);
1034
+ while (file.readline(buf) >= 0) {
1035
+ var t = buf.toString().split("\t");
1036
+ var key = [t[0], t[1], t[2]].join("_");
1037
+ anno[key] = t[11];
1038
+ }
1039
+ file.close();
1040
+ }
1041
+ var hdr = ["#CHROM", "START", "END", "INFO", "FORMAT"];
1042
+ if (fn_sample) {
1043
+ file = new File(fn_sample);
1044
+ while (file.readline(buf) >= 0) {
1045
+ var t = buf.toString().split(/\s+/);
1046
+ hdr.push(t[0]);
1047
+ }
1048
+ file.close();
1049
+ }
1050
+ file = args[getopt.ind] == "-"? new File() : new File(args[getopt.ind]);
1051
+ print('##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">');
1052
+ print('##INFO=<ID=NA,Number=1,Type=Integer,Description="Number of alleles">');
1053
+ print('##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count">');
1054
+ print('##INFO=<ID=ALEN,Number=.,Type=Integer,Description="Length of each allele">');
1055
+ print('##INFO=<ID=ANNO,Number=1,Type=String,Description="Annotation">');
1056
+ print('##INFO=<ID=VS,Number=1,Type=String,Description="Start vertex">');
1057
+ print('##INFO=<ID=VE,Number=1,Type=String,Description="End vertex">');
1058
+ print('##INFO=<ID=AWALK,Number=.,Type=String,Description="Walk of each allele">');
1059
+ print('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">');
1060
+ print('##FORMAT=<ID=CSTRAND,Number=1,Type=String,Description="Contig strand">');
1061
+ print('##FORMAT=<ID=CTG,Number=1,Type=String,Description="Contig name">');
1062
+ print('##FORMAT=<ID=CS,Number=1,Type=String,Description="Contig start, BED-like">');
1063
+ print('##FORMAT=<ID=CE,Number=1,Type=String,Description="Contig end, BED-like">');
1064
+ print(hdr.join("\t"));
1065
+ while (file.readline(buf) >= 0) {
1066
+ var t = buf.toString().split("\t");
1067
+ var a = [t[0], t[1], t[2], "", "GT:CSTRAND:CTG:CS:CE"];
1068
+ var ah = {}, aa = [], b = [], ns = 0;
1069
+ for (var j = 5; j < t.length; j += 6) {
1070
+ if (t[j] == ".") {
1071
+ b.push(["."]);
1072
+ continue;
1073
+ }
1074
+ ++ns;
1075
+ var s = t[j].split(":");
1076
+ if (ah[s[0]] == null) {
1077
+ ah[s[0]] = aa.length;
1078
+ aa.push({walk:s[0], len:s[1], cnt:0});
1079
+ }
1080
+ var k = ah[s[0]];
1081
+ ++aa[k].cnt;
1082
+ s[0] = k;
1083
+ b.push(s);
1084
+ }
1085
+ for (var i = 0; i < aa.length; ++i)
1086
+ aa[i].i = i;
1087
+ aa.sort(function(a,b) { return b.cnt - a.cnt });
1088
+ var i2a = [], alen = [], awalk = [], ac = [];
1089
+ for (var i = 0; i < aa.length; ++i) {
1090
+ i2a[aa[i].i] = i;
1091
+ alen[i] = aa[i].len;
1092
+ awalk[i] = aa[i].walk;
1093
+ ac[i] = aa[i].cnt;
1094
+ }
1095
+ for (var j = 0; j < b.length; ++j) {
1096
+ if (b[j][0] != ".") {
1097
+ var i = b[j].shift();
1098
+ b[j][0] = i2a[i];
1099
+ a.push(b[j].join(":"));
1100
+ } else a.push(".");
1101
+ }
1102
+ var info = ["NS="+ns, "NA="+aa.length, "ALEN="+alen.join(","), "AC="+ac.join(",")];
1103
+ var key = [t[0], t[1], t[2]].join("_");
1104
+ if (anno[key] != null) info.push("ANNO="+anno[key]);
1105
+ info.push("VS="+t[3], "VE="+t[4], "AWALK="+awalk.join(","));
1106
+ a[3] = info.join(";");
1107
+ print(a.join("\t"));
1108
+ }
1109
+ buf.destroy();
1110
+ file.close();
1111
+ }
1112
+
1113
+ function mg_cmd_merge2vcf(args) {
1114
+ var buf = new Bytes();
1115
+ var file = args.length == 0? new File() : new File(args[0]);
1116
+ print("##fileformat=VCFv4.2");
1117
+ print('##ALT=<ID=CNV,Description="description">');
1118
+ print('##FORMAT=<ID=GT0,Number=1,Type=String,Description="Original genotype">');
1119
+ while (file.readline(buf) >= 0) {
1120
+ var line = buf.toString();
1121
+ if (/^##/.test(line)) {
1122
+ print(line);
1123
+ continue;
1124
+ }
1125
+ var a, t = line.split("\t");
1126
+ if (line[0] == "#") {
1127
+ a = ["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"];
1128
+ for (var i = 5; i < t.length; ++i)
1129
+ a.push(t[i]);
1130
+ } else {
1131
+ a = [t[0], t[1], ".", "N", "<CNV>", 30, "PASS", t[3] + ";END=" + t[2], "GT:GT0"];
1132
+ for (var i = 5; i < t.length; ++i) {
1133
+ var s = t[i].split(":");
1134
+ if (s[0] == ".") a.push(s[0]);
1135
+ else if (s[0] == "0") a.push("0:0");
1136
+ else a.push("1:" + s[0]);
1137
+ }
1138
+ }
1139
+ print(a.join("\t"));
1140
+ }
1141
+ file.close();
1142
+ buf.destroy();
1143
+ }
1144
+
1145
+ function mg_cmd_segfreq(args) {
1146
+ var c, min_af = 0.05;
1147
+ while ((c = getopt(args, "f:")) != null) {
1148
+ if (c == 'f') min_af = parseFloat(getopt.arg);
1149
+ }
1150
+ if (args.length - getopt.ind < 2) {
1151
+ print("Usage: mgutils.js segfreq [-f minFreq=0.05] <gfa2bed.bed> <merged.txt> [bubble.bed]");
1152
+ return 1;
1153
+ }
1154
+ var file, buf = new Bytes();
1155
+
1156
+ file = new File(args[getopt.ind]);
1157
+ var h = {}, a = [];
1158
+ while (file.readline(buf) >= 0) {
1159
+ var t = buf.toString().split("\t");
1160
+ h[t[3]] = a.length;
1161
+ a.push([t[0], t[1], t[2], t[3], parseInt(t[4]), 0, 0, "N/A", "N/A", 0]);
1162
+ }
1163
+ file.close();
1164
+
1165
+ var re_info = /([^\s=;]+)=([^\s=;]+)/g;
1166
+ var re_walk = /([><])([^\s><]+)/g;
1167
+ var bb = {};
1168
+ file = new File(args[getopt.ind+1]);
1169
+ while (file.readline(buf) >= 0) {
1170
+ var m, t = buf.toString().split("\t", 4);
1171
+ if (t[0][0] == "#") continue;
1172
+ var anno = null, ac = null, walk = null;
1173
+ while ((m = re_info.exec(t[3])) != null) {
1174
+ if (m[1] == "ANNO") anno = m[2];
1175
+ else if (m[1] == "AWALK") walk = m[2].split(",");
1176
+ else if (m[1] == "AC") {
1177
+ ac = m[2].split(",");
1178
+ for (var i = 0; i < ac.length; ++i)
1179
+ ac[i] = parseInt(ac[i]);
1180
+ }
1181
+ }
1182
+ if (ac == null || walk == null) throw Error("Missing AC or AWALK");
1183
+ if (ac.length != walk.length) throw Error("Inconsistent AC or AWALK");
1184
+ if (anno == null) anno = "N/A";
1185
+ bb[t[0]+"_"+t[1]+"_"+t[2]] = anno;
1186
+ var ns = 0;
1187
+ for (var i = 0; i < walk.length; ++i)
1188
+ ns += ac[i];
1189
+ var dup = {};
1190
+ for (var i = 0; i < walk.length; ++i) {
1191
+ if (walk[i] == "*") continue;
1192
+ while ((m = re_walk.exec(walk[i])) != null) {
1193
+ var s = m[2];
1194
+ if (h[s] == null) throw Error("Missing segment " + s);
1195
+ if (dup[s]) continue;
1196
+ dup[s] = 1;
1197
+ var b = a[h[s]];
1198
+ b[5] = ns;
1199
+ b[6] += ac[i];
1200
+ b[7] = anno;
1201
+ b[8] = mg_classify_repeat(anno);
1202
+ b[9] = walk.length;
1203
+ }
1204
+ }
1205
+ }
1206
+ file.close();
1207
+
1208
+ if (args.length - getopt.ind >= 3) {
1209
+ file = new File(args[getopt.ind+2]);
1210
+ while (file.readline(buf) >= 0) {
1211
+ var t = buf.toString().split("\t");
1212
+ var s = t[11].split(",");
1213
+ var anno = bb[t[0]+"_"+t[1]+"_"+t[2]];
1214
+ if (anno == null) throw Error("Missing bubble");
1215
+ for (var i = 1; i < s.length - 1; ++i) {
1216
+ if (h[s[i]] == null) throw Error("Inconsistent bubble file");
1217
+ var b = a[h[s[i]]];
1218
+ b[10] = t[0], b[11] = t[1], b[12] = t[2];
1219
+ b[7] = anno;
1220
+ b[8] = mg_classify_repeat(anno);
1221
+ }
1222
+ }
1223
+ file.close();
1224
+ }
1225
+
1226
+ buf.destroy();
1227
+
1228
+ var replen = {};
1229
+ for (var i = 0; i < a.length; ++i) {
1230
+ print(a[i].join("\t"));
1231
+ var anno = a[i][8], len = parseInt(a[i][2]) - parseInt(a[i][1]);
1232
+ if (a[i][4] > 0 && a[i][5] > 0 && a[i][6] >= a[i][5] * min_af) {
1233
+ if (replen[anno] == null) replen[anno] = [0, 0, 0];
1234
+ if (a[i][9] == 2) replen[anno][0] += len;
1235
+ else if (a[i][9] == 3) replen[anno][1] += len;
1236
+ else if (a[i][9] > 3) replen[anno][2] += len;
1237
+ }
1238
+ }
1239
+ for (var x in replen) {
1240
+ var y = x.replace(/^\d+_/, "");
1241
+ warn(x, y, replen[x].join("\t"));
1242
+ }
1243
+ }
1244
+
1245
+ function mg_cmd_genecopy(args)
1246
+ {
1247
+ var c, opt = { min_cov:0.8, min_rel_cov:0.85, max_prev_ovlp:0.5, mm:4, gapo:5 };
1248
+ while ((c = getopt(args, "c:r:")) != null) {
1249
+ if (c == 'c') opt.min_cov = parseFloat(getopt.arg);
1250
+ else if (c == 'r') opt.min_rel_cov = parseFloat(getopt.arg);
1251
+ }
1252
+ if (args.length - getopt.ind < 2) {
1253
+ print("Usage: mgutils.js genecopy [options] <in.gaf> <src.bed>");
1254
+ print("Options:");
1255
+ print(" -c FLOAT min coverage [" + opt.min_cov + "]");
1256
+ print(" -r FLOAT min relative coverage [" + opt.min_rel_cov + "]");
1257
+ return;
1258
+ }
1259
+ var re_cg = /(\d+)([MIDNSHP=X])/g;
1260
+ var re_walk = /([><])([^\s><]+):(\d+)-(\d+)/g;
1261
+ var file, buf = new Bytes();
1262
+
1263
+ var src = {};
1264
+ file = new File(args[getopt.ind+1]);
1265
+ while (file.readline(buf) >= 0) {
1266
+ var t = buf.toString().split("\t");
1267
+ src[t[3]] = [t[0], parseInt(t[1]), parseInt(t[2]), t[5] == '+'? 1 : -1];
1268
+ }
1269
+ file.close();
1270
+
1271
+ file = new File(args[getopt.ind]);
1272
+ var gene = {}, reg = {};
1273
+ while (file.readline(buf) >= 0) {
1274
+ var t = buf.toString().split("\t");
1275
+
1276
+ // check coverage
1277
+ if (/\|([A-Z]+\d*\.\d+|ENSG\d+)$/.test(t[0])) continue;
1278
+ for (var i = 1; i <= 3; ++i) t[i] = parseInt(t[i]);
1279
+ for (var i = 6; i <= 11; ++i) t[i] = parseInt(t[i]);
1280
+ if (t[3] - t[2] < t[1] * opt.min_cov) continue;
1281
+ if (gene[t[0]] != null) {
1282
+ var g0 = gene[t[0]][0];
1283
+ if (t[3] - t[2] < (g0[2] - g0[1]) * opt.min_rel_cov)
1284
+ continue;
1285
+ }
1286
+
1287
+ // compute de
1288
+ var m, cg = null;
1289
+ for (var i = 12; i < t.length; ++i) {
1290
+ if (t[i].substr(0, 4) == "cg:Z")
1291
+ cg = t[i].substr(5);
1292
+ }
1293
+ if (cg == null) throw Error("no cg");
1294
+ var blen = 0, mlen = 0, sc = 0;
1295
+ while ((m = re_cg.exec(cg)) != null) {
1296
+ var len = parseInt(m[1]);
1297
+ if (m[2] == '=') mlen += len, blen += len, sc += len;
1298
+ else {
1299
+ ++blen;
1300
+ if (m[2] == '*') sc -= opt.mm;
1301
+ else sc -= opt.gapo + len;
1302
+ }
1303
+ }
1304
+ var de = (blen - mlen) / blen;
1305
+
1306
+ // find intervals
1307
+ var intv = [];
1308
+ if (t[5][0] == '>' || t[5][0] == '<') {
1309
+ var len = 0;
1310
+ while ((m = re_walk.exec(t[5])) != null) {
1311
+ var st = parseInt(m[3]), en = parseInt(m[4]);
1312
+ var ss = st, ee = en;
1313
+ if (t[7] >= len && t[7] < len + en - st) {
1314
+ if (m[1] == '>') ss = st + t[7];
1315
+ else ee = en - t[7];
1316
+ } else if (t[8] >= len && t[8] < len + en - st) {
1317
+ if (m[1] == '>') ee = st + t[8] - len;
1318
+ else ss = st + t[6] - t[8];
1319
+ }
1320
+ intv.push([m[2], ss, ee, m[1] == '>'? 1 : -1]);
1321
+ len += en - st;
1322
+ }
1323
+ } else intv.push([t[5], t[7], t[8], t[4] == '+'? 1 : -1]);
1324
+
1325
+ // save
1326
+ if (gene[t[0]] == null) gene[t[0]] = [];
1327
+ for (var j = 0; j < intv.length; ++j) {
1328
+ var x = intv[j], pass = true;
1329
+ if (reg[x[0]] == null) reg[x[0]] = [];
1330
+ if (src[t[0]] != null) {
1331
+ var y = src[t[0]];
1332
+ if (y[0] == x[0] && y[1] < x[2] && x[1] < y[2]) {
1333
+ var l = (x[2] < y[2]? x[2] : y[2]) - (x[1] > y[1]? x[1] : y[1]);
1334
+ if (l > (x[2] - x[1]) * 0.99) pass = false;
1335
+ }
1336
+ }
1337
+ reg[x[0]].push([x[1], x[2], 0, t[0], gene[t[0]].length, pass, x[3]]);
1338
+ }
1339
+ gene[t[0]].push([t[1], t[2], t[3], sc, de, intv]);
1340
+ }
1341
+ file.close();
1342
+ buf.destroy();
1343
+
1344
+ // preparation
1345
+ var a = [];
1346
+ for (var g in gene) {
1347
+ var x = gene[g];
1348
+ for (var i = 0; i < x.length; ++i)
1349
+ a.push([x[i][3], g, i]);
1350
+ }
1351
+ a.sort(function(x,y) { return y[0]-x[0] });
1352
+ for (var x in reg) it_index(reg[x]);
1353
+
1354
+ // select
1355
+ var good_hit = [];
1356
+ for (var i = 0; i < a.length; ++i) {
1357
+ var x = a[i];
1358
+ var h = gene[x[1]][x[2]];
1359
+ var intv = h[5], cov_tot = 0, len_tot = 0, ovlp_gene = {};
1360
+ for (var j = 0; j < intv.length; ++j) {
1361
+ var y = intv[j];
1362
+ len_tot += y[2] - y[1];
1363
+ if (reg[y[0]] == null) continue;
1364
+ var st0 = y[1], en0 = y[2];
1365
+ var b = it_overlap(reg[y[0]], st0, en0);
1366
+ var cov_st = 0, cov_en = 0, cov = 0;
1367
+ for (var k = 0; k < b.length; ++k) {
1368
+ if (b[k][5] || b[k][6] != y[3]) continue;
1369
+ ovlp_gene[b[k][3]] = 1;
1370
+ var st1 = b[k][0] > st0? b[k][0] : st0;
1371
+ var en1 = b[k][1] < en0? b[k][1] : en0;
1372
+ if (st1 > cov_en) {
1373
+ cov += cov_en - cov_st;
1374
+ cov_st = st1, cov_en = en1;
1375
+ } else cov_en = cov_en > en1? cov_en : en1;
1376
+ }
1377
+ cov += cov_en - cov_st;
1378
+ cov_tot += cov;
1379
+ }
1380
+ var ovlp_gene_arr = [];
1381
+ for (var y in ovlp_gene) ovlp_gene_arr.push(y);
1382
+ if (ovlp_gene_arr.length > 0)
1383
+ print("OG", x[1], x[2], cov_tot, len_tot, ovlp_gene_arr);
1384
+ if (cov_tot < len_tot * opt.max_prev_ovlp) {
1385
+ good_hit.push([x[1], x[2]]);
1386
+ for (var j = 0; j < intv.length; ++j) {
1387
+ var y = intv[j];
1388
+ if (reg[y[0]] == null) continue;
1389
+ var b = it_overlap(reg[y[0]], y[1], y[2]);
1390
+ for (var k = 0; k < b.length; ++k)
1391
+ if (b[k][3] == x[1] && b[k][4] == x[2])
1392
+ b[k][5] = false;
1393
+ }
1394
+ }
1395
+ }
1396
+
1397
+ // count good_hit
1398
+ var out = {};
1399
+ for (var g in gene) out[g] = [gene[g].length, 0];
1400
+ for (var i = 0; i < good_hit.length; ++i) {
1401
+ print("GH", good_hit[i][0], gene[good_hit[i][0]][good_hit[i][1]].join("\t"));
1402
+ ++out[good_hit[i][0]][1];
1403
+ }
1404
+ for (var g in out)
1405
+ print("GC", g, out[g].join("\t"));
1406
+ }
1407
+
1408
+ /*************************
1409
+ ***** main function *****
1410
+ *************************/
1411
+
1412
+ function main(args)
1413
+ {
1414
+ if (args.length == 0) {
1415
+ print("Usage: mgutils.js <command> [arguments]");
1416
+ print("Commands:");
1417
+ print(" stableGaf convert unstable GAF to stable GAF");
1418
+ print(" renamefa add a prefix to sequence names in FASTA");
1419
+ print(" paf2bl blacklist regions from insert-to-ref alignment");
1420
+ print(" anno annotate short sequences");
1421
+ print(" anno2tbl summarize anno output");
1422
+ print(" extractseg extract a segment from GAF");
1423
+ print(" merge merge per-sample --call BED");
1424
+ print(" merge2vcf convert merge BED output to VCF");
1425
+ print(" segfreq compute node frequency from merged calls");
1426
+ print(" genecopy gene copy analysis");
1427
+ print(" bed2sql generate SQL from --call BED");
1428
+ //print(" subgaf extract GAF overlapping with a region (BUGGY)");
1429
+ //print(" sveval evaluate SV accuracy");
1430
+ exit(1);
1431
+ }
1432
+
1433
+ var cmd = args.shift();
1434
+ if (cmd == 'renamefa') mg_cmd_renamefa(args);
1435
+ else if (cmd == 'paf2bl') mg_cmd_paf2bl(args);
1436
+ else if (cmd == 'anno') mg_cmd_anno(args);
1437
+ else if (cmd == 'anno2tbl') mg_cmd_anno2tbl(args);
1438
+ else if (cmd == 'subgaf') mg_cmd_subgaf(args);
1439
+ else if (cmd == 'sveval') mg_cmd_sveval(args);
1440
+ else if (cmd == 'joinfa') mg_cmd_joinfa(args);
1441
+ else if (cmd == 'stableGaf') mg_cmd_stableGaf(args);
1442
+ else if (cmd == 'bed2sql') mg_cmd_bed2sql(args);
1443
+ else if (cmd == 'extractseg') mg_cmd_extractseg(args);
1444
+ else if (cmd == 'merge') mg_cmd_merge(args);
1445
+ else if (cmd == 'merge2vcf') mg_cmd_merge2vcf(args);
1446
+ else if (cmd == 'segfreq') mg_cmd_segfreq(args);
1447
+ else if (cmd == 'genecopy') mg_cmd_genecopy(args);
1448
+ else throw Error("unrecognized command: " + cmd);
1449
+ }
1450
+
1451
+ main(arguments);