minimap2 0.2.27.0 → 0.2.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/cmappy/cmappy.c +3 -3
- data/ext/cmappy/cmappy.h +1 -1
- data/ext/minimap2/FAQ.md +1 -1
- data/ext/minimap2/Makefile +4 -3
- data/ext/minimap2/NEWS.md +68 -0
- data/ext/minimap2/README.md +30 -14
- data/ext/minimap2/align.c +136 -52
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/format.c +59 -5
- data/ext/minimap2/hit.c +14 -6
- data/ext/minimap2/index.c +304 -13
- data/ext/minimap2/jump.c +201 -0
- data/ext/minimap2/kalloc.h +8 -0
- data/ext/minimap2/ksw2.h +5 -2
- data/ext/minimap2/ksw2_dispatch.c +5 -5
- data/ext/minimap2/ksw2_exts2_sse.c +17 -6
- data/ext/minimap2/lchain.c +5 -5
- data/ext/minimap2/main.c +64 -12
- data/ext/minimap2/map.c +35 -8
- data/ext/minimap2/minimap.h +14 -3
- data/ext/minimap2/minimap2.1 +98 -46
- data/ext/minimap2/misc/README.md +2 -1
- data/ext/minimap2/misc/pafcluster.js +241 -0
- data/ext/minimap2/misc/paftools.js +17 -6
- data/ext/minimap2/mmpriv.h +25 -4
- data/ext/minimap2/options.c +36 -3
- data/ext/minimap2/python/cmappy.h +3 -3
- data/ext/minimap2/python/cmappy.pxd +5 -2
- data/ext/minimap2/python/mappy.pyx +20 -7
- data/ext/minimap2/python/minimap2.py +5 -3
- data/ext/minimap2/seed.c +2 -1
- data/ext/minimap2/setup.py +2 -2
- data/ext/minimap2.patch +2 -2
- data/lib/minimap2/aligner.rb +19 -12
- data/lib/minimap2/alignment.rb +1 -0
- data/lib/minimap2/ffi/constants.rb +10 -2
- data/lib/minimap2/ffi/functions.rb +145 -6
- data/lib/minimap2/ffi/mappy.rb +1 -1
- data/lib/minimap2/version.rb +1 -1
- data/lib/minimap2.rb +2 -2
- metadata +8 -7
- data/ext/minimap2/misc/mmphase.js +0 -335
@@ -7,6 +7,7 @@ module Minimap2
|
|
7
7
|
%i[int pointer],
|
8
8
|
:int
|
9
9
|
|
10
|
+
# int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo);
|
10
11
|
attach_function \
|
11
12
|
:mm_set_opt_raw, :mm_set_opt,
|
12
13
|
[:pointer, IdxOpt.by_ref, MapOpt.by_ref],
|
@@ -24,66 +25,204 @@ module Minimap2
|
|
24
25
|
mm_set_opt_raw(ptr, io, mo)
|
25
26
|
end
|
26
27
|
|
28
|
+
# int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo);
|
29
|
+
attach_function \
|
30
|
+
:mm_check_opt,
|
31
|
+
[IdxOpt.by_ref, MapOpt.by_ref],
|
32
|
+
:int
|
33
|
+
|
34
|
+
# void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
|
35
|
+
attach_function \
|
36
|
+
:mm_mapopt_update,
|
37
|
+
[MapOpt.by_ref, Idx.by_ref],
|
38
|
+
:void
|
39
|
+
|
40
|
+
# void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len);
|
41
|
+
attach_function \
|
42
|
+
:mm_mapopt_max_intron_len,
|
43
|
+
[MapOpt.by_ref, :int],
|
44
|
+
:void
|
45
|
+
|
46
|
+
# mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out);
|
27
47
|
attach_function \
|
28
48
|
:mm_idx_reader_open,
|
29
49
|
[:string, IdxOpt.by_ref, :string],
|
30
50
|
IdxReader.by_ref
|
31
51
|
|
52
|
+
# mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads);
|
32
53
|
attach_function \
|
33
54
|
:mm_idx_reader_read,
|
34
55
|
[IdxReader.by_ref, :int],
|
35
56
|
Idx.by_ref
|
36
57
|
|
58
|
+
# void mm_idx_reader_close(mm_idx_reader_t *r);
|
37
59
|
attach_function \
|
38
60
|
:mm_idx_reader_close,
|
39
61
|
[IdxReader.by_ref],
|
40
62
|
:void
|
41
63
|
|
64
|
+
# int mm_idx_reader_eof(const mm_idx_reader_t *r);
|
42
65
|
attach_function \
|
43
|
-
:
|
44
|
-
[
|
66
|
+
:mm_idx_reader_eof,
|
67
|
+
[IdxReader.by_ref],
|
68
|
+
:int
|
69
|
+
|
70
|
+
# int64_t mm_idx_is_idx(const char *fn);
|
71
|
+
attach_function \
|
72
|
+
:mm_idx_is_idx,
|
73
|
+
[:string],
|
74
|
+
:int64_t
|
75
|
+
|
76
|
+
# mm_idx_t *mm_idx_load(FILE *fp);
|
77
|
+
attach_function \
|
78
|
+
:mm_idx_load,
|
79
|
+
[:pointer], # FILE pointer
|
80
|
+
Idx.by_ref
|
81
|
+
|
82
|
+
# void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
|
83
|
+
attach_function \
|
84
|
+
:mm_idx_dump,
|
85
|
+
[:pointer, Idx.by_ref], # FILE pointer
|
45
86
|
:void
|
46
87
|
|
88
|
+
# mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name);
|
47
89
|
attach_function \
|
48
|
-
:
|
49
|
-
[
|
90
|
+
:mm_idx_str,
|
91
|
+
%i[int int int int int pointer pointer],
|
92
|
+
Idx.by_ref
|
93
|
+
|
94
|
+
# void mm_idx_stat(const mm_idx_t *idx);
|
95
|
+
attach_function \
|
96
|
+
:mm_idx_stat,
|
97
|
+
[Idx.by_ref],
|
50
98
|
:void
|
51
99
|
|
100
|
+
# void mm_idx_destroy(mm_idx_t *mi);
|
52
101
|
attach_function \
|
53
|
-
:
|
102
|
+
:mm_idx_destroy,
|
54
103
|
[Idx.by_ref],
|
55
|
-
:
|
104
|
+
:void
|
56
105
|
|
106
|
+
# mm_tbuf_t *mm_tbuf_init(void);
|
57
107
|
attach_function \
|
58
108
|
:mm_tbuf_init,
|
59
109
|
[],
|
60
110
|
TBuf.by_ref
|
61
111
|
|
112
|
+
# void mm_tbuf_destroy(mm_tbuf_t *b);
|
62
113
|
attach_function \
|
63
114
|
:mm_tbuf_destroy,
|
64
115
|
[TBuf.by_ref],
|
65
116
|
:void
|
66
117
|
|
118
|
+
# void *mm_tbuf_get_km(mm_tbuf_t *b);
|
67
119
|
attach_function \
|
68
120
|
:mm_tbuf_get_km,
|
69
121
|
[TBuf.by_ref],
|
70
122
|
:pointer
|
71
123
|
|
124
|
+
# mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
|
125
|
+
attach_function \
|
126
|
+
:mm_map,
|
127
|
+
[Idx.by_ref, :int, :string, :pointer, TBuf.by_ref, MapOpt.by_ref, :string],
|
128
|
+
Reg1.by_ref
|
129
|
+
|
130
|
+
# void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname);
|
131
|
+
attach_function \
|
132
|
+
:mm_map_frag,
|
133
|
+
[Idx.by_ref, :int, :pointer, :pointer, :pointer, TBuf.by_ref, MapOpt.by_ref, :string],
|
134
|
+
:void
|
135
|
+
|
136
|
+
# int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads);
|
137
|
+
attach_function \
|
138
|
+
:mm_map_file,
|
139
|
+
[Idx.by_ref, :string, MapOpt.by_ref, :int],
|
140
|
+
:int
|
141
|
+
|
142
|
+
# int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads);
|
143
|
+
attach_function \
|
144
|
+
:mm_map_file_frag,
|
145
|
+
[Idx.by_ref, :int, :pointer, MapOpt.by_ref, :int],
|
146
|
+
:int
|
147
|
+
|
148
|
+
# int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
|
72
149
|
attach_function \
|
73
150
|
:mm_gen_cs,
|
74
151
|
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
|
75
152
|
:int
|
76
153
|
|
154
|
+
# int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
|
77
155
|
attach_function \
|
78
156
|
:mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
|
79
157
|
[:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string],
|
80
158
|
:int
|
81
159
|
|
160
|
+
# int mm_idx_index_name(mm_idx_t *mi);
|
161
|
+
attach_function \
|
162
|
+
:mm_idx_index_name,
|
163
|
+
[Idx.by_ref],
|
164
|
+
:int
|
165
|
+
|
166
|
+
# int mm_idx_name2id(const mm_idx_t *mi, const char *name);
|
167
|
+
attach_function \
|
168
|
+
:mm_idx_name2id,
|
169
|
+
[Idx.by_ref, :string],
|
170
|
+
:int
|
171
|
+
|
172
|
+
# int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
173
|
+
attach_function \
|
174
|
+
:mm_idx_getseq,
|
175
|
+
[Idx.by_ref, :uint32, :uint32, :uint32, :pointer],
|
176
|
+
:int
|
177
|
+
|
178
|
+
# int mm_idx_alt_read(mm_idx_t *mi, const char *fn);
|
179
|
+
attach_function \
|
180
|
+
:mm_idx_alt_read,
|
181
|
+
[Idx.by_ref, :string],
|
182
|
+
:int
|
183
|
+
|
184
|
+
# int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc);
|
185
|
+
attach_function \
|
186
|
+
:mm_idx_bed_read,
|
187
|
+
[Idx.by_ref, :string, :int],
|
188
|
+
:int
|
189
|
+
|
190
|
+
# int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);
|
191
|
+
attach_function \
|
192
|
+
:mm_idx_bed_junc,
|
193
|
+
[Idx.by_ref, :int32, :int32, :int32, :pointer],
|
194
|
+
:int
|
195
|
+
|
196
|
+
# int mm_max_spsc_bonus(const mm_mapopt_t *mo);
|
197
|
+
attach_function \
|
198
|
+
:mm_max_spsc_bonus,
|
199
|
+
[MapOpt.by_ref],
|
200
|
+
:int
|
201
|
+
|
202
|
+
# int32_t mm_idx_spsc_read(mm_idx_t *idx, const char *fn, int32_t max_sc);
|
203
|
+
attach_function \
|
204
|
+
:mm_idx_spsc_read,
|
205
|
+
[Idx.by_ref, :string, :int32],
|
206
|
+
:int32
|
207
|
+
|
208
|
+
# int64_t mm_idx_spsc_get(const mm_idx_t *db, int32_t cid, int64_t st0, int64_t en0, int32_t rev, uint8_t *sc);
|
209
|
+
attach_function \
|
210
|
+
:mm_idx_spsc_get,
|
211
|
+
[Idx.by_ref, :int32, :int64, :int64, :int32, :pointer],
|
212
|
+
:int64
|
213
|
+
|
214
|
+
# void mm_mapopt_init(mm_mapopt_t *opt);
|
82
215
|
attach_function \
|
83
216
|
:mm_mapopt_init,
|
84
217
|
[MapOpt.by_ref],
|
85
218
|
:void
|
86
219
|
|
220
|
+
# mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads);
|
221
|
+
attach_function \
|
222
|
+
:mm_idx_build,
|
223
|
+
%i[string int int int int],
|
224
|
+
Idx.by_ref
|
225
|
+
|
87
226
|
# mmpriv.h
|
88
227
|
|
89
228
|
attach_function \
|
data/lib/minimap2/ffi/mappy.rb
CHANGED
data/lib/minimap2/version.rb
CHANGED
data/lib/minimap2.rb
CHANGED
@@ -116,11 +116,11 @@ module Minimap2
|
|
116
116
|
end
|
117
117
|
|
118
118
|
def fastx_next(ks, read_comment)
|
119
|
-
qual = ks[:qual][:s] if
|
119
|
+
qual = ks[:qual][:s] if ks[:qual][:l] > 0
|
120
120
|
name = ks[:name][:s]
|
121
121
|
seq = ks[:seq][:s]
|
122
122
|
if read_comment
|
123
|
-
comment = ks[:comment][:s] if
|
123
|
+
comment = ks[:comment][:s] if ks[:comment][:l] > 0
|
124
124
|
[name, seq, qual, comment]
|
125
125
|
else
|
126
126
|
[name, seq, qual]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minimap2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.29.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-04-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- ext/minimap2/format.c
|
70
70
|
- ext/minimap2/hit.c
|
71
71
|
- ext/minimap2/index.c
|
72
|
+
- ext/minimap2/jump.c
|
72
73
|
- ext/minimap2/kalloc.c
|
73
74
|
- ext/minimap2/kalloc.h
|
74
75
|
- ext/minimap2/kdq.h
|
@@ -93,7 +94,7 @@ files:
|
|
93
94
|
- ext/minimap2/minimap2.1
|
94
95
|
- ext/minimap2/misc.c
|
95
96
|
- ext/minimap2/misc/README.md
|
96
|
-
- ext/minimap2/misc/
|
97
|
+
- ext/minimap2/misc/pafcluster.js
|
97
98
|
- ext/minimap2/misc/paftools.js
|
98
99
|
- ext/minimap2/mmpriv.h
|
99
100
|
- ext/minimap2/options.c
|
@@ -150,7 +151,7 @@ homepage: https://github.com/kojix2/ruby-minimap2
|
|
150
151
|
licenses:
|
151
152
|
- MIT
|
152
153
|
metadata: {}
|
153
|
-
post_install_message:
|
154
|
+
post_install_message:
|
154
155
|
rdoc_options: []
|
155
156
|
require_paths:
|
156
157
|
- lib
|
@@ -165,8 +166,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
166
|
- !ruby/object:Gem::Version
|
166
167
|
version: '0'
|
167
168
|
requirements: []
|
168
|
-
rubygems_version: 3.
|
169
|
-
signing_key:
|
169
|
+
rubygems_version: 3.4.19
|
170
|
+
signing_key:
|
170
171
|
specification_version: 4
|
171
172
|
summary: minimap2
|
172
173
|
test_files: []
|
@@ -1,335 +0,0 @@
|
|
1
|
-
#!/usr/bin/env k8
|
2
|
-
|
3
|
-
var getopt = function(args, ostr) {
|
4
|
-
var oli; // option letter list index
|
5
|
-
if (typeof(getopt.place) == 'undefined')
|
6
|
-
getopt.ind = 0, getopt.arg = null, getopt.place = -1;
|
7
|
-
if (getopt.place == -1) { // update scanning pointer
|
8
|
-
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
|
9
|
-
getopt.place = -1;
|
10
|
-
return null;
|
11
|
-
}
|
12
|
-
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
|
13
|
-
++getopt.ind;
|
14
|
-
getopt.place = -1;
|
15
|
-
return null;
|
16
|
-
}
|
17
|
-
}
|
18
|
-
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
|
19
|
-
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
|
20
|
-
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
|
21
|
-
if (getopt.place < 0) ++getopt.ind;
|
22
|
-
return '?';
|
23
|
-
}
|
24
|
-
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
|
25
|
-
getopt.arg = null;
|
26
|
-
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
|
27
|
-
} else { // need an argument
|
28
|
-
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
|
29
|
-
getopt.arg = args[getopt.ind].substr(getopt.place);
|
30
|
-
else if (args.length <= ++getopt.ind) { // no arg
|
31
|
-
getopt.place = -1;
|
32
|
-
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
|
33
|
-
return '?';
|
34
|
-
} else getopt.arg = args[getopt.ind]; // white space
|
35
|
-
getopt.place = -1;
|
36
|
-
++getopt.ind;
|
37
|
-
}
|
38
|
-
return optopt;
|
39
|
-
}
|
40
|
-
|
41
|
-
function read_fastx(file, buf)
|
42
|
-
{
|
43
|
-
if (file.readline(buf) < 0) return null;
|
44
|
-
var m, line = buf.toString();
|
45
|
-
if ((m = /^([>@])(\S+)/.exec(line)) == null)
|
46
|
-
throw Error("wrong fastx format");
|
47
|
-
var is_fq = (m[1] == '@');
|
48
|
-
var name = m[2];
|
49
|
-
if (file.readline(buf) < 0)
|
50
|
-
throw Error("missing sequence line");
|
51
|
-
var seq = buf.toString();
|
52
|
-
if (is_fq) { // skip quality
|
53
|
-
file.readline(buf);
|
54
|
-
file.readline(buf);
|
55
|
-
}
|
56
|
-
return [name, seq];
|
57
|
-
}
|
58
|
-
|
59
|
-
function filter_paf(a, opt)
|
60
|
-
{
|
61
|
-
if (a.length == 0) return;
|
62
|
-
var k = 0;
|
63
|
-
for (var i = 0; i < a.length; ++i) {
|
64
|
-
var ai = a[i];
|
65
|
-
if (ai[10] < opt.min_blen) continue;
|
66
|
-
if (ai[9] < ai[10] * opt.min_iden) continue;
|
67
|
-
var clip = [0, 0];
|
68
|
-
if (ai[4] == '+') {
|
69
|
-
clip[0] = ai[2] < ai[7]? ai[2] : ai[7];
|
70
|
-
clip[1] = ai[1] - ai[3] < ai[6] - ai[8]? ai[1] - ai[3] : ai[6] - ai[8];
|
71
|
-
} else {
|
72
|
-
clip[0] = ai[2] < ai[6] - ai[8]? ai[2] : ai[6] - ai[8];
|
73
|
-
clip[1] = ai[1] - ai[3] < ai[7]? ai[1] - ai[3] : ai[7];
|
74
|
-
}
|
75
|
-
if (clip[0] > opt.max_clip_len || clip[1] > opt.max_clip_len) continue;
|
76
|
-
a[k++] = ai;
|
77
|
-
}
|
78
|
-
a.length = k;
|
79
|
-
}
|
80
|
-
|
81
|
-
function parse_events(t, ev, id, buf)
|
82
|
-
{
|
83
|
-
var re = /(:(\d+))|(([\+\-\*])([a-z]+))/g;
|
84
|
-
var m, cs = null;
|
85
|
-
for (var j = 12; j < t.length; ++j) {
|
86
|
-
if ((m = /^cs:Z:(\S+)/.exec(t[j])) != null) {
|
87
|
-
cs = m[1].toLowerCase();
|
88
|
-
break;
|
89
|
-
}
|
90
|
-
}
|
91
|
-
if (cs == null) {
|
92
|
-
warn("Warning: no cs tag for read '" + t[0] + "'");
|
93
|
-
return;
|
94
|
-
}
|
95
|
-
var st = t[2], en = t[3];
|
96
|
-
var x = st;
|
97
|
-
while ((m = re.exec(cs)) != null) {
|
98
|
-
var l;
|
99
|
-
if (m[2] != null) { // an identitcal match ":\d+"
|
100
|
-
l = parseInt(m[2]);
|
101
|
-
// [start, end, type, index, changed_base]
|
102
|
-
ev.push([x, x + l, 0, id]);
|
103
|
-
} else {
|
104
|
-
if (m[4] == '*') {
|
105
|
-
l = 1;
|
106
|
-
ev.push([x, x + 1, 1, id, m[5][0]]);
|
107
|
-
} else if (m[4] == '+') {
|
108
|
-
l = m[5].length;
|
109
|
-
ev.push([x, x + l, 2, id]);
|
110
|
-
} else if (m[4] == '-') {
|
111
|
-
l = 0;
|
112
|
-
ev.push([x, x, -1, id, m[5]]);
|
113
|
-
}
|
114
|
-
}
|
115
|
-
x += l;
|
116
|
-
}
|
117
|
-
if (x != en)
|
118
|
-
throw Error("inconsistent cs for read '" + t[0] + "'");
|
119
|
-
}
|
120
|
-
|
121
|
-
function find_het_sub(ev, a, opt)
|
122
|
-
{
|
123
|
-
var n = a.length, last0_i = -1, h = [], d = [];
|
124
|
-
for (var i = 0; i < n; ++i) h[i] = [], d[i] = [];
|
125
|
-
for (var i = 0; i < ev.length; ++i) {
|
126
|
-
if (ev[i][2] == 0) {
|
127
|
-
if (last0_i < 0 || ev[i][0] != ev[last0_i][0]) last0_i = i;
|
128
|
-
else if (ev[i][1] > ev[last0_i][1])
|
129
|
-
last0_i = i;
|
130
|
-
} else if (ev[i][2] == 1 && last0_i >= 0 && ev[i][0] < ev[last0_i][1]) {
|
131
|
-
if (ev[last0_i][1] - ev[last0_i][0] >= opt.min_mlen) {
|
132
|
-
if (opt.dbg_ev) print("EV", ev[last0_i].join("\t"), "|", ev[i].join("\t"));
|
133
|
-
var e0 = ev[last0_i], hl = h[e0[3]];
|
134
|
-
if (hl.length == 0 || hl[hl.length-1][0] != e0[0])
|
135
|
-
hl.push([e0[0], e0[1]]);
|
136
|
-
d[ev[i][3]].push([ev[i][0], e0[1] - e0[0]]);
|
137
|
-
}
|
138
|
-
}
|
139
|
-
}
|
140
|
-
var b = [];
|
141
|
-
for (var i = 0; i < n; ++i) {
|
142
|
-
var sh = 0, dh = 0;
|
143
|
-
for (var j = 0; j < h[i].length; ++j)
|
144
|
-
sh += h[i][j][1] - h[i][j][0];
|
145
|
-
for (var j = 0; j < d[i].length; ++j)
|
146
|
-
dh += d[i][j][1];
|
147
|
-
// [start, end, index, #consistent, lenConsistent, #conflictive, lenConflictive, identity, mlen]
|
148
|
-
b[i] = [a[i][2], a[i][3], i, h[i].length, sh, d[i].length, dh, a[i][9] / a[i][10], a[i][9]];
|
149
|
-
}
|
150
|
-
return b;
|
151
|
-
}
|
152
|
-
|
153
|
-
function flt_utg_for_ec(b, opt)
|
154
|
-
{
|
155
|
-
var k = 0;
|
156
|
-
for (var i = 0; i < b.length; ++i) {
|
157
|
-
var bi = b[i];
|
158
|
-
if (bi[4] == 0 && bi[6] == 0) b[k++] = bi; // entirely ambiguous
|
159
|
-
else if (bi[6] < (bi[4] + bi[6]) * opt.max_ratio0) b[k++] = bi;
|
160
|
-
}
|
161
|
-
b.length = k;
|
162
|
-
if (b.length == 0) return;
|
163
|
-
// find the longest contiguous segment
|
164
|
-
b.sort(function(x,y) { return x[0]-y[0] });
|
165
|
-
var st = b[0][0], en = b[0][1], max_st = 0, max_en = 0, max_max_en = en;
|
166
|
-
for (var i = 1; i < b.length; ++i) {
|
167
|
-
if (b[i][0] > en) {
|
168
|
-
if (en - st > max_en - max_st)
|
169
|
-
max_st = st, max_en = en;
|
170
|
-
st = b[i][0], en = b[i][1];
|
171
|
-
} else {
|
172
|
-
en = en > b[i][1]? en : b[i][1];
|
173
|
-
}
|
174
|
-
max_max_en = max_max_en > b[i][1]? max_max_en : b[i][1];
|
175
|
-
}
|
176
|
-
if (en - st > max_en - max_st)
|
177
|
-
max_st = st, max_en = en;
|
178
|
-
if (max_max_en != en || st != b[0][0]) {
|
179
|
-
var k = 0;
|
180
|
-
for (var i = 0; i < b.length; ++i)
|
181
|
-
if (b[i][0] < max_en && b[i][1] > max_st)
|
182
|
-
b[k++] = b[i];
|
183
|
-
b.length = k;
|
184
|
-
}
|
185
|
-
}
|
186
|
-
|
187
|
-
function flt_utg_for_bin(b, opt) // filter out alignments clearly on the wrong phase
|
188
|
-
{
|
189
|
-
var k = 0;
|
190
|
-
for (var i = 0; i < b.length; ++i) {
|
191
|
-
var bi = b[i];
|
192
|
-
if (bi[4] + bi[6] == 0 || bi[4] >= (bi[4] + bi[6]) * opt.max_ratio0) b[k++] = bi;
|
193
|
-
}
|
194
|
-
b.length = k;
|
195
|
-
}
|
196
|
-
|
197
|
-
function ec_core(b, n_a, ev, buf, ecb) // error correction
|
198
|
-
{
|
199
|
-
var intv = [];
|
200
|
-
for (var i = 0; i < n_a; ++i)
|
201
|
-
intv[i] = null;
|
202
|
-
intv[b[0][2]] = [b[0][0], b[0][1]];
|
203
|
-
var en = b[0][1];
|
204
|
-
for (var i = 1; i < b.length; ++i) {
|
205
|
-
if (b[i][1] <= en) continue;
|
206
|
-
intv[b[i][2]] = [en, b[i][1]];
|
207
|
-
en = b[i][1];
|
208
|
-
}
|
209
|
-
var k = 0;
|
210
|
-
ecb.capacity = buf.capacity;
|
211
|
-
ecb.length = 0;
|
212
|
-
for (var i = 0; i < ev.length; ++i) {
|
213
|
-
var e = ev[i], I = intv[e[3]];
|
214
|
-
if (I == null) continue;
|
215
|
-
if (e[0] >= I[0] && e[0] < I[1]) { // this is to reduce duplicated events around junctions
|
216
|
-
//print("X", e.join("\t"));
|
217
|
-
if (e[2] == 0) {
|
218
|
-
ecb.length += e[1] - e[0];
|
219
|
-
for (var j = e[0]; j < e[1]; ++j)
|
220
|
-
ecb[k++] = buf[j];
|
221
|
-
} else if (e[2] == 1) {
|
222
|
-
++ecb.length;
|
223
|
-
ecb[k++] = e[4].charCodeAt(0);
|
224
|
-
} else if (e[2] < 0) {
|
225
|
-
ecb.length += e[4].length;
|
226
|
-
for (var j = 0; j < e[4].length; ++j)
|
227
|
-
ecb[k++] = e[4].charCodeAt(j);
|
228
|
-
} // else, skip e[2] == 2
|
229
|
-
}
|
230
|
-
}
|
231
|
-
if (ecb.length != k) throw Error("BUG!");
|
232
|
-
}
|
233
|
-
|
234
|
-
function process_paf(a, opt, fp_seq, buf, ecb)
|
235
|
-
{
|
236
|
-
if (a.length == 0) return;
|
237
|
-
var len = a[0][1], name = a[0][0], seq = null;
|
238
|
-
if (len < opt.min_rlen) return;
|
239
|
-
if (fp_seq) {
|
240
|
-
var ret;
|
241
|
-
while ((ret = read_fastx(fp_seq, buf)) != null)
|
242
|
-
if (ret[0] == a[0][0])
|
243
|
-
break;
|
244
|
-
if (ret == null)
|
245
|
-
throw Error("failed to find sequence for read '" + a[0][0] + "'");
|
246
|
-
name = ret[0], seq = ret[1];
|
247
|
-
if (seq.length != len)
|
248
|
-
throw Error("inconsistent length for read '" + name + "'");
|
249
|
-
}
|
250
|
-
filter_paf(a, opt);
|
251
|
-
if (a.length == 0) return;
|
252
|
-
var ev = [];
|
253
|
-
for (var i = 0; i < a.length; ++i)
|
254
|
-
parse_events(a[i], ev, i, buf);
|
255
|
-
ev.sort(function(x,y) { return x[0]!=y[0]? x[0]-y[0] : x[2]-y[2] });
|
256
|
-
if (seq == null) print("SQ", name, a[0][1], a.length);
|
257
|
-
var b = find_het_sub(ev, a, opt);
|
258
|
-
if (opt.ec) flt_utg_for_ec(b, opt);
|
259
|
-
else flt_utg_for_bin(b, opt);
|
260
|
-
if (seq == null) {
|
261
|
-
for (var i = 0; i < b.length; ++i) {
|
262
|
-
var m, ai = a[b[i][2]], score = 0;
|
263
|
-
for (var j = 10; j < ai.length; ++j)
|
264
|
-
if ((m = /^AS:i:(\d+)/.exec(ai[j])) != null)
|
265
|
-
score = m[1];
|
266
|
-
print("TS", b[i][2], b[i][0], b[i][1], ai.slice(5, 9).join("\t"), b[i].slice(3, 7).join("\t"), score);
|
267
|
-
}
|
268
|
-
print("//");
|
269
|
-
} else { // error correction
|
270
|
-
if (b.length == 0) return;
|
271
|
-
buf.set(seq, 0);
|
272
|
-
ec_core(b, a.length, ev, buf, ecb);
|
273
|
-
print(">" + name);
|
274
|
-
print(ecb);
|
275
|
-
}
|
276
|
-
}
|
277
|
-
|
278
|
-
function main(args)
|
279
|
-
{
|
280
|
-
var c, opt = { min_rlen:5000, min_blen:5000, min_iden:0.8, min_mlen:5, max_clip_len:500, max_ratio0:0.25, dbg_ev:false };
|
281
|
-
while ((c = getopt(args, "l:b:d:m:c:r:E")) != null) {
|
282
|
-
if (c == 'l') opt.min_rlen = parseInt(getopt.arg);
|
283
|
-
else if (c == 'b') opt.min_blen = parseInt(getopt.arg);
|
284
|
-
else if (c == 'd') opt.min_iden = parseFloat(getopt.arg);
|
285
|
-
else if (c == 'm') opt.min_slen = parseInt(getopt.arg);
|
286
|
-
else if (c == 'c') opt.max_clip_len = parseInt(getopt.arg);
|
287
|
-
else if (c == 'r') opt.max_ratio0 = parseFloat(getopt.arg);
|
288
|
-
else if (c == 'E') opt.dbg_ev = true;
|
289
|
-
}
|
290
|
-
if (args.length - getopt.ind < 1) {
|
291
|
-
print("Usage: mmphase.js [options] <map-with-cs.paf> [reads.fa]");
|
292
|
-
print("Options:");
|
293
|
-
print(" -l INT min read length [" + opt.min_rlen + "]");
|
294
|
-
print(" -b INT min alignment length [" + opt.min_blen + "]");
|
295
|
-
print(" -d FLOAT min identity [" + opt.min_iden + "]");
|
296
|
-
print(" -s INT min match length [" + opt.min_mlen + "]");
|
297
|
-
print(" -c INT max clip length [" + opt.max_clip_len + "]");
|
298
|
-
print(" -r FLOAT initial ratio for haplotype filtering [" + opt.max_ratio0 + "]");
|
299
|
-
return 0;
|
300
|
-
}
|
301
|
-
|
302
|
-
opt.ec = args.length - getopt.ind < 2? false : true;
|
303
|
-
if (!opt.ec) {
|
304
|
-
print("CC");
|
305
|
-
print("CC", "SQ qName qLen nHits");
|
306
|
-
print("CC", "TS index qStart qEnd tName tLen tStart tEnd nConsistent lCons nConflictive lConf score");
|
307
|
-
print("CC");
|
308
|
-
}
|
309
|
-
|
310
|
-
var buf = new Bytes(), ecb = new Bytes();
|
311
|
-
var fp_paf = new File(args[getopt.ind]);
|
312
|
-
var fp_seq = args.length - getopt.ind >= 2? new File(args[getopt.ind+1]) : null;
|
313
|
-
var a = [];
|
314
|
-
while (fp_paf.readline(buf) >= 0) {
|
315
|
-
var t = buf.toString().split("\t");
|
316
|
-
if (a.length > 0 && a[0][0] != t[0]) {
|
317
|
-
process_paf(a, opt, fp_seq, buf, ecb);
|
318
|
-
a.length = 0;
|
319
|
-
}
|
320
|
-
for (var i = 1; i <= 3; ++i) t[i] = parseInt(t[i]);
|
321
|
-
if (t[1] < opt.min_rlen) continue;
|
322
|
-
for (var i = 6; i <= 10; ++i) t[i] = parseInt(t[i]);
|
323
|
-
if (t[10] < opt.min_blen) continue;
|
324
|
-
a.push(t);
|
325
|
-
}
|
326
|
-
if (a.length >= 0)
|
327
|
-
process_paf(a, opt, fp_seq, buf, ecb);
|
328
|
-
if (fp_seq) fp_seq.close();
|
329
|
-
fp_paf.close();
|
330
|
-
ecb.destroy();
|
331
|
-
buf.destroy();
|
332
|
-
}
|
333
|
-
|
334
|
-
var ret = main(arguments)
|
335
|
-
exit(ret)
|