ruby-minigraph 0.0.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +62 -0
- data/ext/Rakefile +56 -0
- data/ext/cmappy/cmappy.c +7 -0
- data/ext/cmappy/cmappy.h +8 -0
- data/ext/minigraph/LICENSE.txt +23 -0
- data/ext/minigraph/Makefile +66 -0
- data/ext/minigraph/NEWS.md +317 -0
- data/ext/minigraph/README.md +207 -0
- data/ext/minigraph/algo.c +194 -0
- data/ext/minigraph/algo.h +33 -0
- data/ext/minigraph/asm-call.c +147 -0
- data/ext/minigraph/bseq.c +133 -0
- data/ext/minigraph/bseq.h +76 -0
- data/ext/minigraph/cal_cov.c +139 -0
- data/ext/minigraph/doc/example1.png +0 -0
- data/ext/minigraph/doc/example2.png +0 -0
- data/ext/minigraph/doc/examples.graffle +0 -0
- data/ext/minigraph/format.c +241 -0
- data/ext/minigraph/galign.c +140 -0
- data/ext/minigraph/gchain1.c +532 -0
- data/ext/minigraph/gcmisc.c +223 -0
- data/ext/minigraph/gfa-aug.c +260 -0
- data/ext/minigraph/gfa-base.c +526 -0
- data/ext/minigraph/gfa-bbl.c +372 -0
- data/ext/minigraph/gfa-ed.c +617 -0
- data/ext/minigraph/gfa-io.c +395 -0
- data/ext/minigraph/gfa-priv.h +154 -0
- data/ext/minigraph/gfa.h +166 -0
- data/ext/minigraph/ggen.c +182 -0
- data/ext/minigraph/ggen.h +21 -0
- data/ext/minigraph/ggsimple.c +570 -0
- data/ext/minigraph/gmap.c +211 -0
- data/ext/minigraph/index.c +230 -0
- data/ext/minigraph/kalloc.c +224 -0
- data/ext/minigraph/kalloc.h +82 -0
- data/ext/minigraph/kavl.h +414 -0
- data/ext/minigraph/kdq.h +134 -0
- data/ext/minigraph/ketopt.h +116 -0
- data/ext/minigraph/khashl.h +348 -0
- data/ext/minigraph/krmq.h +474 -0
- data/ext/minigraph/kseq.h +256 -0
- data/ext/minigraph/ksort.h +164 -0
- data/ext/minigraph/kstring.h +165 -0
- data/ext/minigraph/kthread.c +159 -0
- data/ext/minigraph/kthread.h +15 -0
- data/ext/minigraph/kvec-km.h +105 -0
- data/ext/minigraph/kvec.h +110 -0
- data/ext/minigraph/lchain.c +441 -0
- data/ext/minigraph/main.c +301 -0
- data/ext/minigraph/map-algo.c +500 -0
- data/ext/minigraph/mgpriv.h +128 -0
- data/ext/minigraph/minigraph.1 +359 -0
- data/ext/minigraph/minigraph.h +176 -0
- data/ext/minigraph/miniwfa.c +834 -0
- data/ext/minigraph/miniwfa.h +95 -0
- data/ext/minigraph/misc/mgutils.js +1451 -0
- data/ext/minigraph/misc.c +12 -0
- data/ext/minigraph/options.c +134 -0
- data/ext/minigraph/shortk.c +251 -0
- data/ext/minigraph/sketch.c +109 -0
- data/ext/minigraph/sys.c +147 -0
- data/ext/minigraph/sys.h +20 -0
- data/ext/minigraph/test/MT-chimp.fa +277 -0
- data/ext/minigraph/test/MT-human.fa +239 -0
- data/ext/minigraph/test/MT-orangA.fa +276 -0
- data/ext/minigraph/test/MT.gfa +19 -0
- data/ext/minigraph/tex/Makefile +13 -0
- data/ext/minigraph/tex/minigraph.bib +676 -0
- data/ext/minigraph/tex/minigraph.tex +986 -0
- data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.gp +42 -0
- data/ext/minigraph/tex/plots/CHM13-f1-90.bb.anno.tbl +13 -0
- data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.gp +269 -0
- data/ext/minigraph/tex/plots/CHM13-f1-90.bb.mini-inter-none.win.sh +7 -0
- data/ext/minigraph/tex/plots/CHM13v1.cen.bed +23 -0
- data/ext/minigraph/tex/plots/CHM13v1.size +23 -0
- data/ext/minigraph/tex/plots/anno2tbl.js +40 -0
- data/ext/minigraph/tex/plots/bedutils.js +367 -0
- data/ext/minigraph/tex/plots/chr-plot.js +130 -0
- data/ext/minigraph/tex/plots/gen-anno.mak +24 -0
- data/ext/minigraph.patch +21 -0
- data/lib/minigraph/ffi/constants.rb +230 -0
- data/lib/minigraph/ffi/functions.rb +70 -0
- data/lib/minigraph/ffi/mappy.rb +8 -0
- data/lib/minigraph/ffi.rb +27 -0
- data/lib/minigraph/version.rb +5 -0
- data/lib/minigraph.rb +72 -0
- metadata +159 -0
@@ -0,0 +1,367 @@
|
|
1
|
+
#!/usr/bin/env k8
|
2
|
+
|
3
|
+
/*****************************
|
4
|
+
***** Library functions *****
|
5
|
+
*****************************/
|
6
|
+
|
7
|
+
/*******************************
|
8
|
+
* Command line option parsing *
|
9
|
+
*******************************/
|
10
|
+
|
11
|
+
var getopt = function(args, ostr) {
|
12
|
+
var oli; // option letter list index
|
13
|
+
if (typeof(getopt.place) == 'undefined')
|
14
|
+
getopt.ind = 0, getopt.arg = null, getopt.place = -1;
|
15
|
+
if (getopt.place == -1) { // update scanning pointer
|
16
|
+
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
|
17
|
+
getopt.place = -1;
|
18
|
+
return null;
|
19
|
+
}
|
20
|
+
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
|
21
|
+
++getopt.ind;
|
22
|
+
getopt.place = -1;
|
23
|
+
return null;
|
24
|
+
}
|
25
|
+
}
|
26
|
+
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
|
27
|
+
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
|
28
|
+
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
|
29
|
+
if (getopt.place < 0) ++getopt.ind;
|
30
|
+
return '?';
|
31
|
+
}
|
32
|
+
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
|
33
|
+
getopt.arg = null;
|
34
|
+
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
|
35
|
+
} else { // need an argument
|
36
|
+
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
|
37
|
+
getopt.arg = args[getopt.ind].substr(getopt.place);
|
38
|
+
else if (args.length <= ++getopt.ind) { // no arg
|
39
|
+
getopt.place = -1;
|
40
|
+
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
|
41
|
+
return '?';
|
42
|
+
} else getopt.arg = args[getopt.ind]; // white space
|
43
|
+
getopt.place = -1;
|
44
|
+
++getopt.ind;
|
45
|
+
}
|
46
|
+
return optopt;
|
47
|
+
}
|
48
|
+
|
49
|
+
/***************
|
50
|
+
* BED overlap *
|
51
|
+
***************/
|
52
|
+
|
53
|
+
function it_index(a) {
|
54
|
+
if (a.length == 0) return -1;
|
55
|
+
a.sort(function(x, y) { return x[0] - y[0] });
|
56
|
+
var last, last_i;
|
57
|
+
for (var i = 0; i < a.length; i += 2) last = a[i][2] = a[i][1], last_i = i;
|
58
|
+
for (var k = 1; 1<<k <= a.length; ++k) {
|
59
|
+
var i0 = (1<<k) - 1, step = 1<<(k+1);
|
60
|
+
for (var i = i0; i < a.length; i += step) {
|
61
|
+
var x = 1<<(k-1);
|
62
|
+
a[i][2] = a[i][1];
|
63
|
+
if (a[i][2] < a[i-x][2]) a[i][2] = a[i-x][2];
|
64
|
+
var e = i + x < a.length? a[i+x][2] : last;
|
65
|
+
if (a[i][2] < e) a[i][2] = e;
|
66
|
+
}
|
67
|
+
last_i = last_i>>k&1? last_i - (1<<(k-1)) : last_i + (1<<(k-1));
|
68
|
+
if (last_i < a.length) last = last > a[last_i][2]? last : a[last_i][2];
|
69
|
+
}
|
70
|
+
return k - 1;
|
71
|
+
}
|
72
|
+
|
73
|
+
function it_overlap(a, st, en) {
|
74
|
+
var h, stack = [], b = [];
|
75
|
+
for (h = 0; 1<<h <= a.length; ++h);
|
76
|
+
--h;
|
77
|
+
stack.push([(1<<h) - 1, h, 0]);
|
78
|
+
while (stack.length) {
|
79
|
+
var t = stack.pop();
|
80
|
+
var x = t[0], h = t[1], w = t[2];
|
81
|
+
if (h <= 2) {
|
82
|
+
var i0 = x >> h << h, i1 = i0 + (1<<(h+1)) - 1;
|
83
|
+
if (i1 >= a.length) i1 = a.length;
|
84
|
+
for (var i = i0; i < i1; ++i)
|
85
|
+
if (a[i][0] < en && st < a[i][1])
|
86
|
+
b.push(i);
|
87
|
+
} else if (w == 0) { // if left child not processed
|
88
|
+
stack.push([x, h, 1]);
|
89
|
+
var y = x - (1<<(h-1));
|
90
|
+
if (y >= a.length || a[y][2] > st)
|
91
|
+
stack.push([y, h - 1, 0]);
|
92
|
+
} else if (x < a.length && a[x][0] < en) {
|
93
|
+
if (st < a[x][1]) b.push(x);
|
94
|
+
stack.push([x + (1<<(h-1)), h - 1, 0]);
|
95
|
+
}
|
96
|
+
}
|
97
|
+
return b;
|
98
|
+
}
|
99
|
+
|
100
|
+
/******************************
|
101
|
+
***** Command-line tools *****
|
102
|
+
******************************/
|
103
|
+
|
104
|
+
function bed_sum(args)
|
105
|
+
{
|
106
|
+
var buf = new Bytes();
|
107
|
+
var file = args.length == 0 || args[0] == '-'? new File() : new File(args[0]);
|
108
|
+
var s = 0;
|
109
|
+
while (file.readline(buf) >= 0) {
|
110
|
+
var t = buf.toString().split("\t", 3);
|
111
|
+
if (t.length < 3) continue;
|
112
|
+
s += parseInt(t[2]) - parseInt(t[1]);
|
113
|
+
}
|
114
|
+
file.close();
|
115
|
+
buf.destroy();
|
116
|
+
print(s);
|
117
|
+
return 0;
|
118
|
+
}
|
119
|
+
|
120
|
+
function bed_sum2nd(args)
|
121
|
+
{
|
122
|
+
var buf = new Bytes();
|
123
|
+
var file = args.length == 0 || args[0] == '-'? new File() : new File(args[0]);
|
124
|
+
var s = 0;
|
125
|
+
while (file.readline(buf) >= 0) {
|
126
|
+
var t = buf.toString().split("\t", 2);
|
127
|
+
s += parseInt(t[1]);
|
128
|
+
}
|
129
|
+
file.close();
|
130
|
+
buf.destroy();
|
131
|
+
print(s);
|
132
|
+
return 0;
|
133
|
+
}
|
134
|
+
|
135
|
+
function bed_merge(args)
|
136
|
+
{
|
137
|
+
var buf = new Bytes();
|
138
|
+
var file = args.length > 0? new File(args[0]) : new File();
|
139
|
+
var ctg = null, st, en;
|
140
|
+
while (file.readline(buf) >= 0) {
|
141
|
+
var t = buf.toString().split("\t", 3);
|
142
|
+
var s = parseInt(t[1]);
|
143
|
+
var e = parseInt(t[2]);
|
144
|
+
if (ctg != t[0] || s > en) { // no overlap
|
145
|
+
if (ctg != null) print(ctg, st, en);
|
146
|
+
ctg = t[0], st = s, en = e;
|
147
|
+
} else if (s < st) throw Error("ERROR: input is not sorted by coordinate");
|
148
|
+
else en = en > e? en : e;
|
149
|
+
}
|
150
|
+
if (ctg != null) print(ctg, st, en);
|
151
|
+
file.close();
|
152
|
+
buf.destroy();
|
153
|
+
return 0;
|
154
|
+
}
|
155
|
+
|
156
|
+
function bed_sum1(args)
|
157
|
+
{
|
158
|
+
var buf = new Bytes();
|
159
|
+
var file = args.length == 0 || args[0] == '-'? new File() : new File(args[0]);
|
160
|
+
var ctg = null, st = 0, en = 0, sum = 0;
|
161
|
+
while (file.readline(buf) >= 0) {
|
162
|
+
var t = buf.toString().split("\t", 3);
|
163
|
+
var s = parseInt(t[1]);
|
164
|
+
var e = parseInt(t[2]);
|
165
|
+
if (ctg != t[0] || s > en) { // no overlap
|
166
|
+
sum += en - st;
|
167
|
+
if (ctg != null && ctg != t[0]) {
|
168
|
+
print(ctg, sum);
|
169
|
+
sum = 0;
|
170
|
+
}
|
171
|
+
ctg = t[0], st = s, en = e;
|
172
|
+
} else if (s < st) throw Error("ERROR: input is not sorted by coordinate");
|
173
|
+
else en = en > e? en : e;
|
174
|
+
}
|
175
|
+
if (ctg != null) {
|
176
|
+
sum += en - st;
|
177
|
+
print(ctg, sum);
|
178
|
+
}
|
179
|
+
file.close();
|
180
|
+
buf.destroy();
|
181
|
+
return 0;
|
182
|
+
}
|
183
|
+
|
184
|
+
function bed_gdist(args)
|
185
|
+
{
|
186
|
+
if (args.length == 0) {
|
187
|
+
print("Usage: bedutils.js gdist <3-col-gmap.txt> <reg.bed>");
|
188
|
+
exit(1);
|
189
|
+
}
|
190
|
+
var file, buf = new Bytes();
|
191
|
+
|
192
|
+
var gmap = {};
|
193
|
+
file = new File(args[0]);
|
194
|
+
var last_pos = 0, last_ctg = null, last_v = 0.0;
|
195
|
+
while (file.readline(buf) >= 0) {
|
196
|
+
var t = buf.toString().split("\t");
|
197
|
+
var pos = parseInt(t[1]);
|
198
|
+
var v = parseFloat(t[2]);
|
199
|
+
if (last_ctg != t[0] && last_ctg != null) {
|
200
|
+
gmap[last_ctg].push([last_pos, 0x7fffffff, -1, last_v]);
|
201
|
+
last_pos = 0, last_v = 0.0;
|
202
|
+
}
|
203
|
+
if (gmap[t[0]] == null) gmap[t[0]] = [];
|
204
|
+
if (last_pos == pos) throw Error("Zero-length interval");
|
205
|
+
gmap[t[0]].push([last_pos, pos, -1, last_v]);
|
206
|
+
last_pos = pos, last_ctg = t[0], last_v = v;
|
207
|
+
}
|
208
|
+
if (last_ctg != null)
|
209
|
+
gmap[last_ctg].push([last_pos, 0x7fffffff, -1, last_v]);
|
210
|
+
file.close();
|
211
|
+
|
212
|
+
for (var ctg in gmap) it_index(gmap[ctg]);
|
213
|
+
|
214
|
+
file = args.length >= 2? new File(args[1]) : new File();
|
215
|
+
while (file.readline(buf) >= 0) {
|
216
|
+
var t = buf.toString().split("\t");
|
217
|
+
var st = parseInt(t[1]), en = parseInt(t[2]);
|
218
|
+
var v, g = gmap[t[0]];
|
219
|
+
if (g == null) v = -1;
|
220
|
+
else if (st == en) v = 0;
|
221
|
+
else {
|
222
|
+
var as = it_overlap(g, st, st + 1);
|
223
|
+
var ae = it_overlap(g, en - 1, en);
|
224
|
+
if (as.length != 1 || ae.length != 1)
|
225
|
+
throw Error("Bug!");
|
226
|
+
var is = as[0], ie = ae[0];
|
227
|
+
var xs = g[is][3] + (is == g.length - 1? 0 : (g[is+1][3] - g[is][3]) / (g[is][1] - g[is][0]) * (st - g[is][0]));
|
228
|
+
var xe = g[ie][3] + (ie == g.length - 1? 0 : (g[ie+1][3] - g[ie][3]) / (g[ie][1] - g[ie][0]) * (en - g[ie][0]));
|
229
|
+
v = 1e6 * (xe - xs) / (en - st);
|
230
|
+
}
|
231
|
+
v = v <= 0? v : v.toFixed(15);
|
232
|
+
print(t[0], t[1], t[2], v);
|
233
|
+
}
|
234
|
+
file.close();
|
235
|
+
buf.destroy();
|
236
|
+
}
|
237
|
+
|
238
|
+
function bed_window(args)
|
239
|
+
{
|
240
|
+
var c, win_size = 1000000, skip = 500000, cnt_only = false, fn_len = null;
|
241
|
+
while ((c = getopt(args, "w:s:cl:")) != null) {
|
242
|
+
if (c == 'w') win_size = parseInt(getopt.arg);
|
243
|
+
else if (c == 's') skip = parseInt(getopt.arg);
|
244
|
+
else if (c == 'c') cnt_only = true;
|
245
|
+
else if (c == 'l') fn_len = getopt.arg;
|
246
|
+
}
|
247
|
+
|
248
|
+
var lens = {}, file, buf = new Bytes();
|
249
|
+
if (fn_len) {
|
250
|
+
file = new File(fn_len);
|
251
|
+
while (file.readline(buf) >= 0) {
|
252
|
+
var t = buf.toString().split("\t");
|
253
|
+
if (t.length < 2) continue;
|
254
|
+
lens[t[0]] = parseInt(t[1]);
|
255
|
+
}
|
256
|
+
file.close();
|
257
|
+
}
|
258
|
+
file = getopt.ind < args.length? new File(args[getopt.ind]) : new File();
|
259
|
+
var bed = {}, ctgs = [];
|
260
|
+
while (file.readline(buf) >= 0) {
|
261
|
+
var t = buf.toString().split("\t");
|
262
|
+
if (bed[t[0]] == null) { bed[t[0]] = []; ctgs.push(t[0]); }
|
263
|
+
bed[t[0]].push([parseInt(t[1]), parseInt(t[2]), -1]);
|
264
|
+
}
|
265
|
+
file.close();
|
266
|
+
buf.destroy();
|
267
|
+
|
268
|
+
for (var ct = 0; ct < ctgs.length; ++ct) {
|
269
|
+
var ctg = ctgs[ct];
|
270
|
+
it_index(bed[ctg]);
|
271
|
+
var a = bed[ctg];
|
272
|
+
var max = 0;
|
273
|
+
for (var i = 0; i < a.length; ++i)
|
274
|
+
max = max > a[i][1]? max : a[i][1];
|
275
|
+
if (lens[ctg] > 0 && max < lens[ctg]) max = lens[ctg];
|
276
|
+
for (var x = 0; x < max; x += skip) {
|
277
|
+
var st = x - (win_size>>1), en = x + (win_size>>1);
|
278
|
+
if (st < 0) st = 0;
|
279
|
+
if (en > max) en = max;
|
280
|
+
var sum = 0, b = it_overlap(a, st, en);
|
281
|
+
if (cnt_only) {
|
282
|
+
sum = b.length;
|
283
|
+
} else {
|
284
|
+
for (var i = 0; i < b.length; ++i) {
|
285
|
+
var c = a[b[i]];
|
286
|
+
var s = st > c[0]? st : c[0];
|
287
|
+
var e = en < c[1]? en : c[1];
|
288
|
+
sum += e - s;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
print(ctg, x, sum/(en-st)*1e6);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
function bed_cov(args)
|
297
|
+
{
|
298
|
+
if (args.length < 2) {
|
299
|
+
warn("Usage: bedutils.js cov <loaded.bed> <streamed.bed>");
|
300
|
+
exit(1);
|
301
|
+
}
|
302
|
+
var file, buf = new Bytes();
|
303
|
+
|
304
|
+
file = new File(args[0]);
|
305
|
+
var bed = {};
|
306
|
+
while (file.readline(buf) >= 0) {
|
307
|
+
var t = buf.toString().split("\t", 3);
|
308
|
+
if (bed[t[0]] == null) bed[t[0]] = [];
|
309
|
+
bed[t[0]].push([parseInt(t[1]), parseInt(t[2])]);
|
310
|
+
}
|
311
|
+
for (var ctg in bed) it_index(bed[ctg]);
|
312
|
+
file.close();
|
313
|
+
|
314
|
+
file = new File(args[1]);
|
315
|
+
while (file.readline(buf) >= 0) {
|
316
|
+
var t = buf.toString().split("\t", 3);
|
317
|
+
if (bed[t[0]] == null) {
|
318
|
+
print(t[0], t[1], t[2], 0, 0);
|
319
|
+
} else {
|
320
|
+
var st0 = parseInt(t[1]), en0 = parseInt(t[2]);
|
321
|
+
var b = bed[t[0]];
|
322
|
+
var a = it_overlap(b, st0, en0);
|
323
|
+
var cov_st = 0, cov_en = 0, cov = 0;
|
324
|
+
for (var i = 0; i < a.length; ++i) {
|
325
|
+
var st1 = b[a[i]][0] > st0? b[a[i]][0] : st0;
|
326
|
+
var en1 = b[a[i]][1] < en0? b[a[i]][1] : en0;
|
327
|
+
if (st1 > cov_en) {
|
328
|
+
cov += cov_en - cov_st;
|
329
|
+
cov_st = st1, cov_en = en1;
|
330
|
+
} else cov_en = cov_en > en1? cov_en : en1;
|
331
|
+
}
|
332
|
+
cov += cov_en - cov_st;
|
333
|
+
print(t[0], t[1], t[2], a.length, cov);
|
334
|
+
}
|
335
|
+
}
|
336
|
+
file.close();
|
337
|
+
|
338
|
+
buf.destroy();
|
339
|
+
}
|
340
|
+
|
341
|
+
function main(args)
|
342
|
+
{
|
343
|
+
if (args.length == 0) {
|
344
|
+
print("Usage: bedutils.js <command> [arguments]");
|
345
|
+
print("Commands:");
|
346
|
+
print(" sum sum of BED regions (deprecated by bedtk)");
|
347
|
+
print(" sum1 sum of BED regions for each contig");
|
348
|
+
print(" sum2nd sum of the 2nd column");
|
349
|
+
print(" merge merge overlapping regions in *sorted* BED (deprecated)");
|
350
|
+
print(" cov breadth of coverage (deprecated by bedtk)");
|
351
|
+
print(" gdist genetic distance from 3-col genetic map");
|
352
|
+
print(" window window-based counting");
|
353
|
+
exit(1);
|
354
|
+
}
|
355
|
+
|
356
|
+
var cmd = args.shift();
|
357
|
+
if (cmd == 'sum') bed_sum(args);
|
358
|
+
else if (cmd == 'sum2nd') bed_sum2nd(args);
|
359
|
+
else if (cmd == 'sum1') bed_sum1(args);
|
360
|
+
else if (cmd == 'merge') bed_merge(args);
|
361
|
+
else if (cmd == 'cov') bed_cov(args);
|
362
|
+
else if (cmd == 'gdist') bed_gdist(args);
|
363
|
+
else if (cmd == 'window') bed_window(args);
|
364
|
+
else throw Error("unrecognized command: " + cmd);
|
365
|
+
}
|
366
|
+
|
367
|
+
main(arguments);
|
@@ -0,0 +1,130 @@
|
|
1
|
+
#!/usr/bin/env k8
|
2
|
+
|
3
|
+
var getopt = function(args, ostr) {
|
4
|
+
var oli; // option letter list index
|
5
|
+
if (typeof(getopt.place) == 'undefined')
|
6
|
+
getopt.ind = 0, getopt.arg = null, getopt.place = -1;
|
7
|
+
if (getopt.place == -1) { // update scanning pointer
|
8
|
+
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
|
9
|
+
getopt.place = -1;
|
10
|
+
return null;
|
11
|
+
}
|
12
|
+
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
|
13
|
+
++getopt.ind;
|
14
|
+
getopt.place = -1;
|
15
|
+
return null;
|
16
|
+
}
|
17
|
+
}
|
18
|
+
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
|
19
|
+
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
|
20
|
+
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
|
21
|
+
if (getopt.place < 0) ++getopt.ind;
|
22
|
+
return '?';
|
23
|
+
}
|
24
|
+
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
|
25
|
+
getopt.arg = null;
|
26
|
+
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
|
27
|
+
} else { // need an argument
|
28
|
+
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
|
29
|
+
getopt.arg = args[getopt.ind].substr(getopt.place);
|
30
|
+
else if (args.length <= ++getopt.ind) { // no arg
|
31
|
+
getopt.place = -1;
|
32
|
+
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
|
33
|
+
return '?';
|
34
|
+
} else getopt.arg = args[getopt.ind]; // white space
|
35
|
+
getopt.place = -1;
|
36
|
+
++getopt.ind;
|
37
|
+
}
|
38
|
+
return optopt;
|
39
|
+
}
|
40
|
+
|
41
|
+
var c, width = 2, height = 1.5, y_max0 = null, y_min0 = null, fn_out = "chr-plot.eps", n = 1, fsize = 14;
|
42
|
+
while ((c = getopt(arguments, "w:h:x:i:o:n:f:")) != null) {
|
43
|
+
if (c == 'h') height = parseFloat(getopt.arg);
|
44
|
+
else if (c == 'n') n = parseInt(getopt.arg);
|
45
|
+
else if (c == 'w') width = parseFloat(getopt.arg);
|
46
|
+
else if (c == 'x') y_max0 = parseFloat(getopt.arg);
|
47
|
+
else if (c == 'i') y_min0 = parseFloat(getopt.arg);
|
48
|
+
else if (c == 'o') fn_out = getopt.arg;
|
49
|
+
else if (c == 'f') fsize = parseInt(getopt.arg);
|
50
|
+
}
|
51
|
+
|
52
|
+
if (arguments.length - getopt.ind < 2) {
|
53
|
+
print("Usage: chr-plot.js [options] <chr.size> <dat.txt>");
|
54
|
+
print("Options:");
|
55
|
+
print(" -n INT number of data points [" + n + "]");
|
56
|
+
print(" -w FLOAT width of the plot [" + width + "]");
|
57
|
+
print(" -h FLOAT height of the plot [" + height + "]");
|
58
|
+
print(" -x FLOAT max y value [auto]");
|
59
|
+
print(" -i FLOAT min y value [auto]");
|
60
|
+
print(" -o FILE output file name [chr-plot.eps]");
|
61
|
+
exit(1);
|
62
|
+
}
|
63
|
+
|
64
|
+
var file, buf = new Bytes();
|
65
|
+
|
66
|
+
var chr_list = [], chr = {}, cen = [], max_len = 0;
|
67
|
+
file = new File(arguments[getopt.ind]);
|
68
|
+
while (file.readline(buf) >= 0) {
|
69
|
+
var t = buf.toString().split("\t");
|
70
|
+
var len = parseInt(t[3]);
|
71
|
+
chr_list.push(t[0]);
|
72
|
+
cen.push([parseInt(t[1]), parseInt(t[2])]);
|
73
|
+
chr[t[0]] = len;
|
74
|
+
max_len = max_len > len? max_len : len;
|
75
|
+
}
|
76
|
+
file.close();
|
77
|
+
|
78
|
+
var y_max = -1e300, y_min = 1e300;
|
79
|
+
file = new File(arguments[getopt.ind+1]);
|
80
|
+
while (file.readline(buf) >= 0) {
|
81
|
+
var t = buf.toString().split("\t");
|
82
|
+
if (chr[t[0]] == null) continue;
|
83
|
+
var y = parseFloat(t[2]);
|
84
|
+
y_max = y_max > y? y_max : y;
|
85
|
+
y_min = y_min < y? y_min : y;
|
86
|
+
}
|
87
|
+
file.close();
|
88
|
+
if (y_max0 != null) y_max = y_max0;
|
89
|
+
if (y_min0 != null) y_min = y_min0;
|
90
|
+
|
91
|
+
buf.destroy();
|
92
|
+
|
93
|
+
print('set t po eps co so enh "Helvetica,' + fsize + '"');
|
94
|
+
print('set out "' + fn_out + '"');
|
95
|
+
print('set size ' + width + ',' + (height + 0.02));
|
96
|
+
print('set multiplot layout ' + chr_list.length + ',1');
|
97
|
+
print('set lmargin screen ' + (fsize/2 * 0.01 + 0.005).toFixed(3));
|
98
|
+
print('set border 0; unset xtics; unset ytics; set bmargin 0; set tmargin 0.02; set rmargin 0.02');
|
99
|
+
print('set style line 1 lc rgb "#377eb8" lw 1');
|
100
|
+
print('set style line 2 lc rgb "#e41a1c" lw 1');
|
101
|
+
print('set style line 3 lc rgb "#4daf4a" lw 1');
|
102
|
+
//print('set style fill transparent solid 0.5 noborder');
|
103
|
+
print('set yran [' + y_min + ':' + y_max + ']');
|
104
|
+
print('');
|
105
|
+
var h = height / chr_list.length;
|
106
|
+
for (var i = 0; i < chr_list.length; ++i) {
|
107
|
+
var len = chr[chr_list[i]];
|
108
|
+
print('set origin 0,' + (height - (i + 1) * h + 0.01));
|
109
|
+
print('set xran [0:' + len * 1e-6 + ']');
|
110
|
+
print('set size ' + (width*len/max_len) + ',' + h);
|
111
|
+
print('set style rect fc lt -1 fs solid 0.15 noborder');
|
112
|
+
print('unset obj; unset label');
|
113
|
+
print('set obj rect from ' + cen[i][0]*1e-6 + ', graph 0 to ' + cen[i][1]*1e-6 + ', graph 1');
|
114
|
+
print('set label "' + chr_list[i] + '" at screen 0.01, graph 0.5');
|
115
|
+
print('plot \\');
|
116
|
+
for (var j = 0; j < n; ++j) {
|
117
|
+
var st, en, endl = j == n - 1? '' : ', \\';
|
118
|
+
if (j > 0) {
|
119
|
+
st = en = '';
|
120
|
+
for (var k = 0; k < j; ++k) {
|
121
|
+
st += '+$' + (k + 3);
|
122
|
+
en += '+$' + (k + 3);
|
123
|
+
}
|
124
|
+
en += '+$' + (j + 3);
|
125
|
+
st = st.replace(/^\+/, "(") + ")";
|
126
|
+
en = en.replace(/^\+/, "(") + ")";
|
127
|
+
} else st = '(0)', en = '($3)';
|
128
|
+
print(' "<awk \'$1==\\"' + chr_list[i] + '\\"\' ' + arguments[getopt.ind+1] + '" u ($2*1e-6):' + st + ':' + en + ' not w filledcu ls ' + (j+1) + endl);
|
129
|
+
}
|
130
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
prefix=CHM13-f1-90.bb
|
2
|
+
|
3
|
+
all:$(prefix).base $(prefix).gap $(prefix).brnn.gz $(prefix).etrf.gz $(prefix).sdust.gz
|
4
|
+
|
5
|
+
$(prefix).base:$(prefix).fa
|
6
|
+
seqtk comp $< | cut -f1,2 | perl -ane 'print "$$1\t$$2\t$$3\t$$F[1]\n" if $$F[0]=~/(\S+)_(\d+)_(\d+)/' > $@
|
7
|
+
|
8
|
+
$(prefix).gap:$(prefix).fa
|
9
|
+
seqtk gap $< > $@
|
10
|
+
|
11
|
+
$(prefix).brnn.gz:$(prefix).fa
|
12
|
+
~/dna-nn/dna-brnn -Ai ~/dna-nn/attcc-alpha.knm -t16 $< | htsbox bgzip > $@
|
13
|
+
|
14
|
+
$(prefix).etrf.gz:$(prefix).fa
|
15
|
+
~/src/etrf/etrf $< | htsbox bgzip > $@
|
16
|
+
|
17
|
+
$(prefix).sdust.gz:$(prefix).fa
|
18
|
+
~/minimap2/sdust $< | htsbox bgzip > $@
|
19
|
+
|
20
|
+
CHM13-f1-90.bb.paf.gz:CHM13-f1-90.bb.fa
|
21
|
+
minimap2 -cxasm20 -r2k --cs -t16 ~/ref/CHM13v1Y.fa $< 2> CHM13-f1-90.bb.paf.log | gzip > $@
|
22
|
+
|
23
|
+
GRCh38-f1-90.bb.paf.gz:GRCh38-f1-90.bb.fa
|
24
|
+
minimap2 -cxasm20 -r2k --cs -t16 ~/ref/hs38.fa $< 2> GRCh38-f1-90.bb.paf.log | gzip > $@
|
data/ext/minigraph.patch
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
--- a/Makefile
|
2
|
+
+++ b/Makefile
|
3
|
+
@@ -1,11 +1,11 @@
|
4
|
+
CC= gcc
|
5
|
+
-CFLAGS= -g -Wall -Wc++-compat -std=c99 -msse4 -O3
|
6
|
+
+CFLAGS= -g -Wall -Wc++-compat -std=c99 -msse4 -O3 -fPIC
|
7
|
+
CPPFLAGS=
|
8
|
+
INCLUDES=
|
9
|
+
OBJS= kalloc.o kthread.o algo.o sys.o gfa-base.o gfa-io.o gfa-aug.o gfa-bbl.o gfa-ed.o \
|
10
|
+
sketch.o misc.o bseq.o options.o shortk.o miniwfa.o \
|
11
|
+
index.o lchain.o gchain1.o galign.o gcmisc.o map-algo.o cal_cov.o \
|
12
|
+
- format.o gmap.o ggsimple.o ggen.o asm-call.o
|
13
|
+
+ format.o gmap.o ggsimple.o ggen.o asm-call.o cmappy.o
|
14
|
+
PROG= minigraph
|
15
|
+
LIBS= -lz -lpthread -lm
|
16
|
+
|
17
|
+
@@ -64,3 +64,4 @@ options.o: mgpriv.h minigraph.h gfa.h sys.h
|
18
|
+
shortk.o: mgpriv.h minigraph.h gfa.h ksort.h kavl.h algo.h khashl.h kalloc.h
|
19
|
+
sketch.o: kvec-km.h kalloc.h mgpriv.h minigraph.h gfa.h
|
20
|
+
sys.o: sys.h
|
21
|
+
+cmappy.o: cmappy.h
|