minimap2 0.2.27.0 → 0.2.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/ext/cmappy/cmappy.c +3 -3
  4. data/ext/cmappy/cmappy.h +1 -1
  5. data/ext/minimap2/FAQ.md +1 -1
  6. data/ext/minimap2/Makefile +4 -3
  7. data/ext/minimap2/NEWS.md +68 -0
  8. data/ext/minimap2/README.md +30 -14
  9. data/ext/minimap2/align.c +136 -52
  10. data/ext/minimap2/cookbook.md +2 -2
  11. data/ext/minimap2/format.c +59 -5
  12. data/ext/minimap2/hit.c +14 -6
  13. data/ext/minimap2/index.c +304 -13
  14. data/ext/minimap2/jump.c +201 -0
  15. data/ext/minimap2/kalloc.h +8 -0
  16. data/ext/minimap2/ksw2.h +5 -2
  17. data/ext/minimap2/ksw2_dispatch.c +5 -5
  18. data/ext/minimap2/ksw2_exts2_sse.c +17 -6
  19. data/ext/minimap2/lchain.c +5 -5
  20. data/ext/minimap2/main.c +64 -12
  21. data/ext/minimap2/map.c +35 -8
  22. data/ext/minimap2/minimap.h +14 -3
  23. data/ext/minimap2/minimap2.1 +98 -46
  24. data/ext/minimap2/misc/README.md +2 -1
  25. data/ext/minimap2/misc/pafcluster.js +241 -0
  26. data/ext/minimap2/misc/paftools.js +17 -6
  27. data/ext/minimap2/mmpriv.h +25 -4
  28. data/ext/minimap2/options.c +36 -3
  29. data/ext/minimap2/python/cmappy.h +3 -3
  30. data/ext/minimap2/python/cmappy.pxd +5 -2
  31. data/ext/minimap2/python/mappy.pyx +20 -7
  32. data/ext/minimap2/python/minimap2.py +5 -3
  33. data/ext/minimap2/seed.c +2 -1
  34. data/ext/minimap2/setup.py +2 -2
  35. data/ext/minimap2.patch +2 -2
  36. data/lib/minimap2/aligner.rb +19 -12
  37. data/lib/minimap2/alignment.rb +1 -0
  38. data/lib/minimap2/ffi/constants.rb +10 -2
  39. data/lib/minimap2/ffi/functions.rb +145 -6
  40. data/lib/minimap2/ffi/mappy.rb +1 -1
  41. data/lib/minimap2/version.rb +1 -1
  42. data/lib/minimap2.rb +2 -2
  43. metadata +8 -7
  44. data/ext/minimap2/misc/mmphase.js +0 -335
@@ -7,6 +7,7 @@ module Minimap2
7
7
  %i[int pointer],
8
8
  :int
9
9
 
10
+ # int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo);
10
11
  attach_function \
11
12
  :mm_set_opt_raw, :mm_set_opt,
12
13
  [:pointer, IdxOpt.by_ref, MapOpt.by_ref],
@@ -24,66 +25,204 @@ module Minimap2
24
25
  mm_set_opt_raw(ptr, io, mo)
25
26
  end
26
27
 
28
+ # int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo);
29
+ attach_function \
30
+ :mm_check_opt,
31
+ [IdxOpt.by_ref, MapOpt.by_ref],
32
+ :int
33
+
34
+ # void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
35
+ attach_function \
36
+ :mm_mapopt_update,
37
+ [MapOpt.by_ref, Idx.by_ref],
38
+ :void
39
+
40
+ # void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len);
41
+ attach_function \
42
+ :mm_mapopt_max_intron_len,
43
+ [MapOpt.by_ref, :int],
44
+ :void
45
+
46
+ # mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out);
27
47
  attach_function \
28
48
  :mm_idx_reader_open,
29
49
  [:string, IdxOpt.by_ref, :string],
30
50
  IdxReader.by_ref
31
51
 
52
+ # mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads);
32
53
  attach_function \
33
54
  :mm_idx_reader_read,
34
55
  [IdxReader.by_ref, :int],
35
56
  Idx.by_ref
36
57
 
58
+ # void mm_idx_reader_close(mm_idx_reader_t *r);
37
59
  attach_function \
38
60
  :mm_idx_reader_close,
39
61
  [IdxReader.by_ref],
40
62
  :void
41
63
 
64
+ # int mm_idx_reader_eof(const mm_idx_reader_t *r);
42
65
  attach_function \
43
- :mm_idx_destroy,
44
- [Idx.by_ref],
66
+ :mm_idx_reader_eof,
67
+ [IdxReader.by_ref],
68
+ :int
69
+
70
+ # int64_t mm_idx_is_idx(const char *fn);
71
+ attach_function \
72
+ :mm_idx_is_idx,
73
+ [:string],
74
+ :int64_t
75
+
76
+ # mm_idx_t *mm_idx_load(FILE *fp);
77
+ attach_function \
78
+ :mm_idx_load,
79
+ [:pointer], # FILE pointer
80
+ Idx.by_ref
81
+
82
+ # void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
83
+ attach_function \
84
+ :mm_idx_dump,
85
+ [:pointer, Idx.by_ref], # FILE pointer
45
86
  :void
46
87
 
88
+ # mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name);
47
89
  attach_function \
48
- :mm_mapopt_update,
49
- [MapOpt.by_ref, Idx.by_ref],
90
+ :mm_idx_str,
91
+ %i[int int int int int pointer pointer],
92
+ Idx.by_ref
93
+
94
+ # void mm_idx_stat(const mm_idx_t *idx);
95
+ attach_function \
96
+ :mm_idx_stat,
97
+ [Idx.by_ref],
50
98
  :void
51
99
 
100
+ # void mm_idx_destroy(mm_idx_t *mi);
52
101
  attach_function \
53
- :mm_idx_index_name,
102
+ :mm_idx_destroy,
54
103
  [Idx.by_ref],
55
- :int
104
+ :void
56
105
 
106
+ # mm_tbuf_t *mm_tbuf_init(void);
57
107
  attach_function \
58
108
  :mm_tbuf_init,
59
109
  [],
60
110
  TBuf.by_ref
61
111
 
112
+ # void mm_tbuf_destroy(mm_tbuf_t *b);
62
113
  attach_function \
63
114
  :mm_tbuf_destroy,
64
115
  [TBuf.by_ref],
65
116
  :void
66
117
 
118
+ # void *mm_tbuf_get_km(mm_tbuf_t *b);
67
119
  attach_function \
68
120
  :mm_tbuf_get_km,
69
121
  [TBuf.by_ref],
70
122
  :pointer
71
123
 
124
+ # mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
125
+ attach_function \
126
+ :mm_map,
127
+ [Idx.by_ref, :int, :string, :pointer, TBuf.by_ref, MapOpt.by_ref, :string],
128
+ Reg1.by_ref
129
+
130
+ # void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname);
131
+ attach_function \
132
+ :mm_map_frag,
133
+ [Idx.by_ref, :int, :pointer, :pointer, :pointer, TBuf.by_ref, MapOpt.by_ref, :string],
134
+ :void
135
+
136
+ # int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads);
137
+ attach_function \
138
+ :mm_map_file,
139
+ [Idx.by_ref, :string, MapOpt.by_ref, :int],
140
+ :int
141
+
142
+ # int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads);
143
+ attach_function \
144
+ :mm_map_file_frag,
145
+ [Idx.by_ref, :int, :pointer, MapOpt.by_ref, :int],
146
+ :int
147
+
148
+ # int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
72
149
  attach_function \
73
150
  :mm_gen_cs,
74
151
  [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
75
152
  :int
76
153
 
154
+ # int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
77
155
  attach_function \
78
156
  :mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
79
157
  [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string],
80
158
  :int
81
159
 
160
+ # int mm_idx_index_name(mm_idx_t *mi);
161
+ attach_function \
162
+ :mm_idx_index_name,
163
+ [Idx.by_ref],
164
+ :int
165
+
166
+ # int mm_idx_name2id(const mm_idx_t *mi, const char *name);
167
+ attach_function \
168
+ :mm_idx_name2id,
169
+ [Idx.by_ref, :string],
170
+ :int
171
+
172
+ # int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
173
+ attach_function \
174
+ :mm_idx_getseq,
175
+ [Idx.by_ref, :uint32, :uint32, :uint32, :pointer],
176
+ :int
177
+
178
+ # int mm_idx_alt_read(mm_idx_t *mi, const char *fn);
179
+ attach_function \
180
+ :mm_idx_alt_read,
181
+ [Idx.by_ref, :string],
182
+ :int
183
+
184
+ # int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc);
185
+ attach_function \
186
+ :mm_idx_bed_read,
187
+ [Idx.by_ref, :string, :int],
188
+ :int
189
+
190
+ # int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);
191
+ attach_function \
192
+ :mm_idx_bed_junc,
193
+ [Idx.by_ref, :int32, :int32, :int32, :pointer],
194
+ :int
195
+
196
+ # int mm_max_spsc_bonus(const mm_mapopt_t *mo);
197
+ attach_function \
198
+ :mm_max_spsc_bonus,
199
+ [MapOpt.by_ref],
200
+ :int
201
+
202
+ # int32_t mm_idx_spsc_read(mm_idx_t *idx, const char *fn, int32_t max_sc);
203
+ attach_function \
204
+ :mm_idx_spsc_read,
205
+ [Idx.by_ref, :string, :int32],
206
+ :int32
207
+
208
+ # int64_t mm_idx_spsc_get(const mm_idx_t *db, int32_t cid, int64_t st0, int64_t en0, int32_t rev, uint8_t *sc);
209
+ attach_function \
210
+ :mm_idx_spsc_get,
211
+ [Idx.by_ref, :int32, :int64, :int64, :int32, :pointer],
212
+ :int64
213
+
214
+ # void mm_mapopt_init(mm_mapopt_t *opt);
82
215
  attach_function \
83
216
  :mm_mapopt_init,
84
217
  [MapOpt.by_ref],
85
218
  :void
86
219
 
220
+ # mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads);
221
+ attach_function \
222
+ :mm_idx_build,
223
+ %i[string int int int int],
224
+ Idx.by_ref
225
+
87
226
  # mmpriv.h
88
227
 
89
228
  attach_function \
@@ -73,7 +73,7 @@ module Minimap2
73
73
 
74
74
  attach_function \
75
75
  :mm_map_aux,
76
- [Idx.by_ref, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref],
76
+ [Idx.by_ref, :string, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref],
77
77
  :pointer # Reg1
78
78
 
79
79
  attach_function \
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- VERSION = "0.2.27.0"
4
+ VERSION = "0.2.29.0"
5
5
  end
data/lib/minimap2.rb CHANGED
@@ -116,11 +116,11 @@ module Minimap2
116
116
  end
117
117
 
118
118
  def fastx_next(ks, read_comment)
119
- qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
119
+ qual = ks[:qual][:s] if ks[:qual][:l] > 0
120
120
  name = ks[:name][:s]
121
121
  seq = ks[:seq][:s]
122
122
  if read_comment
123
- comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
123
+ comment = ks[:comment][:s] if ks[:comment][:l] > 0
124
124
  [name, seq, qual, comment]
125
125
  else
126
126
  [name, seq, qual]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: minimap2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.27.0
4
+ version: 0.2.29.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-13 00:00:00.000000000 Z
11
+ date: 2025-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -69,6 +69,7 @@ files:
69
69
  - ext/minimap2/format.c
70
70
  - ext/minimap2/hit.c
71
71
  - ext/minimap2/index.c
72
+ - ext/minimap2/jump.c
72
73
  - ext/minimap2/kalloc.c
73
74
  - ext/minimap2/kalloc.h
74
75
  - ext/minimap2/kdq.h
@@ -93,7 +94,7 @@ files:
93
94
  - ext/minimap2/minimap2.1
94
95
  - ext/minimap2/misc.c
95
96
  - ext/minimap2/misc/README.md
96
- - ext/minimap2/misc/mmphase.js
97
+ - ext/minimap2/misc/pafcluster.js
97
98
  - ext/minimap2/misc/paftools.js
98
99
  - ext/minimap2/mmpriv.h
99
100
  - ext/minimap2/options.c
@@ -150,7 +151,7 @@ homepage: https://github.com/kojix2/ruby-minimap2
150
151
  licenses:
151
152
  - MIT
152
153
  metadata: {}
153
- post_install_message:
154
+ post_install_message:
154
155
  rdoc_options: []
155
156
  require_paths:
156
157
  - lib
@@ -165,8 +166,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
165
166
  - !ruby/object:Gem::Version
166
167
  version: '0'
167
168
  requirements: []
168
- rubygems_version: 3.5.4
169
- signing_key:
169
+ rubygems_version: 3.4.19
170
+ signing_key:
170
171
  specification_version: 4
171
172
  summary: minimap2
172
173
  test_files: []
@@ -1,335 +0,0 @@
1
- #!/usr/bin/env k8
2
-
3
- var getopt = function(args, ostr) {
4
- var oli; // option letter list index
5
- if (typeof(getopt.place) == 'undefined')
6
- getopt.ind = 0, getopt.arg = null, getopt.place = -1;
7
- if (getopt.place == -1) { // update scanning pointer
8
- if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
9
- getopt.place = -1;
10
- return null;
11
- }
12
- if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
13
- ++getopt.ind;
14
- getopt.place = -1;
15
- return null;
16
- }
17
- }
18
- var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
19
- if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
20
- if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null.
21
- if (getopt.place < 0) ++getopt.ind;
22
- return '?';
23
- }
24
- if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
25
- getopt.arg = null;
26
- if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
27
- } else { // need an argument
28
- if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
29
- getopt.arg = args[getopt.ind].substr(getopt.place);
30
- else if (args.length <= ++getopt.ind) { // no arg
31
- getopt.place = -1;
32
- if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
33
- return '?';
34
- } else getopt.arg = args[getopt.ind]; // white space
35
- getopt.place = -1;
36
- ++getopt.ind;
37
- }
38
- return optopt;
39
- }
40
-
41
- function read_fastx(file, buf)
42
- {
43
- if (file.readline(buf) < 0) return null;
44
- var m, line = buf.toString();
45
- if ((m = /^([>@])(\S+)/.exec(line)) == null)
46
- throw Error("wrong fastx format");
47
- var is_fq = (m[1] == '@');
48
- var name = m[2];
49
- if (file.readline(buf) < 0)
50
- throw Error("missing sequence line");
51
- var seq = buf.toString();
52
- if (is_fq) { // skip quality
53
- file.readline(buf);
54
- file.readline(buf);
55
- }
56
- return [name, seq];
57
- }
58
-
59
- function filter_paf(a, opt)
60
- {
61
- if (a.length == 0) return;
62
- var k = 0;
63
- for (var i = 0; i < a.length; ++i) {
64
- var ai = a[i];
65
- if (ai[10] < opt.min_blen) continue;
66
- if (ai[9] < ai[10] * opt.min_iden) continue;
67
- var clip = [0, 0];
68
- if (ai[4] == '+') {
69
- clip[0] = ai[2] < ai[7]? ai[2] : ai[7];
70
- clip[1] = ai[1] - ai[3] < ai[6] - ai[8]? ai[1] - ai[3] : ai[6] - ai[8];
71
- } else {
72
- clip[0] = ai[2] < ai[6] - ai[8]? ai[2] : ai[6] - ai[8];
73
- clip[1] = ai[1] - ai[3] < ai[7]? ai[1] - ai[3] : ai[7];
74
- }
75
- if (clip[0] > opt.max_clip_len || clip[1] > opt.max_clip_len) continue;
76
- a[k++] = ai;
77
- }
78
- a.length = k;
79
- }
80
-
81
- function parse_events(t, ev, id, buf)
82
- {
83
- var re = /(:(\d+))|(([\+\-\*])([a-z]+))/g;
84
- var m, cs = null;
85
- for (var j = 12; j < t.length; ++j) {
86
- if ((m = /^cs:Z:(\S+)/.exec(t[j])) != null) {
87
- cs = m[1].toLowerCase();
88
- break;
89
- }
90
- }
91
- if (cs == null) {
92
- warn("Warning: no cs tag for read '" + t[0] + "'");
93
- return;
94
- }
95
- var st = t[2], en = t[3];
96
- var x = st;
97
- while ((m = re.exec(cs)) != null) {
98
- var l;
99
- if (m[2] != null) { // an identitcal match ":\d+"
100
- l = parseInt(m[2]);
101
- // [start, end, type, index, changed_base]
102
- ev.push([x, x + l, 0, id]);
103
- } else {
104
- if (m[4] == '*') {
105
- l = 1;
106
- ev.push([x, x + 1, 1, id, m[5][0]]);
107
- } else if (m[4] == '+') {
108
- l = m[5].length;
109
- ev.push([x, x + l, 2, id]);
110
- } else if (m[4] == '-') {
111
- l = 0;
112
- ev.push([x, x, -1, id, m[5]]);
113
- }
114
- }
115
- x += l;
116
- }
117
- if (x != en)
118
- throw Error("inconsistent cs for read '" + t[0] + "'");
119
- }
120
-
121
- function find_het_sub(ev, a, opt)
122
- {
123
- var n = a.length, last0_i = -1, h = [], d = [];
124
- for (var i = 0; i < n; ++i) h[i] = [], d[i] = [];
125
- for (var i = 0; i < ev.length; ++i) {
126
- if (ev[i][2] == 0) {
127
- if (last0_i < 0 || ev[i][0] != ev[last0_i][0]) last0_i = i;
128
- else if (ev[i][1] > ev[last0_i][1])
129
- last0_i = i;
130
- } else if (ev[i][2] == 1 && last0_i >= 0 && ev[i][0] < ev[last0_i][1]) {
131
- if (ev[last0_i][1] - ev[last0_i][0] >= opt.min_mlen) {
132
- if (opt.dbg_ev) print("EV", ev[last0_i].join("\t"), "|", ev[i].join("\t"));
133
- var e0 = ev[last0_i], hl = h[e0[3]];
134
- if (hl.length == 0 || hl[hl.length-1][0] != e0[0])
135
- hl.push([e0[0], e0[1]]);
136
- d[ev[i][3]].push([ev[i][0], e0[1] - e0[0]]);
137
- }
138
- }
139
- }
140
- var b = [];
141
- for (var i = 0; i < n; ++i) {
142
- var sh = 0, dh = 0;
143
- for (var j = 0; j < h[i].length; ++j)
144
- sh += h[i][j][1] - h[i][j][0];
145
- for (var j = 0; j < d[i].length; ++j)
146
- dh += d[i][j][1];
147
- // [start, end, index, #consistent, lenConsistent, #conflictive, lenConflictive, identity, mlen]
148
- b[i] = [a[i][2], a[i][3], i, h[i].length, sh, d[i].length, dh, a[i][9] / a[i][10], a[i][9]];
149
- }
150
- return b;
151
- }
152
-
153
- function flt_utg_for_ec(b, opt)
154
- {
155
- var k = 0;
156
- for (var i = 0; i < b.length; ++i) {
157
- var bi = b[i];
158
- if (bi[4] == 0 && bi[6] == 0) b[k++] = bi; // entirely ambiguous
159
- else if (bi[6] < (bi[4] + bi[6]) * opt.max_ratio0) b[k++] = bi;
160
- }
161
- b.length = k;
162
- if (b.length == 0) return;
163
- // find the longest contiguous segment
164
- b.sort(function(x,y) { return x[0]-y[0] });
165
- var st = b[0][0], en = b[0][1], max_st = 0, max_en = 0, max_max_en = en;
166
- for (var i = 1; i < b.length; ++i) {
167
- if (b[i][0] > en) {
168
- if (en - st > max_en - max_st)
169
- max_st = st, max_en = en;
170
- st = b[i][0], en = b[i][1];
171
- } else {
172
- en = en > b[i][1]? en : b[i][1];
173
- }
174
- max_max_en = max_max_en > b[i][1]? max_max_en : b[i][1];
175
- }
176
- if (en - st > max_en - max_st)
177
- max_st = st, max_en = en;
178
- if (max_max_en != en || st != b[0][0]) {
179
- var k = 0;
180
- for (var i = 0; i < b.length; ++i)
181
- if (b[i][0] < max_en && b[i][1] > max_st)
182
- b[k++] = b[i];
183
- b.length = k;
184
- }
185
- }
186
-
187
- function flt_utg_for_bin(b, opt) // filter out alignments clearly on the wrong phase
188
- {
189
- var k = 0;
190
- for (var i = 0; i < b.length; ++i) {
191
- var bi = b[i];
192
- if (bi[4] + bi[6] == 0 || bi[4] >= (bi[4] + bi[6]) * opt.max_ratio0) b[k++] = bi;
193
- }
194
- b.length = k;
195
- }
196
-
197
- function ec_core(b, n_a, ev, buf, ecb) // error correction
198
- {
199
- var intv = [];
200
- for (var i = 0; i < n_a; ++i)
201
- intv[i] = null;
202
- intv[b[0][2]] = [b[0][0], b[0][1]];
203
- var en = b[0][1];
204
- for (var i = 1; i < b.length; ++i) {
205
- if (b[i][1] <= en) continue;
206
- intv[b[i][2]] = [en, b[i][1]];
207
- en = b[i][1];
208
- }
209
- var k = 0;
210
- ecb.capacity = buf.capacity;
211
- ecb.length = 0;
212
- for (var i = 0; i < ev.length; ++i) {
213
- var e = ev[i], I = intv[e[3]];
214
- if (I == null) continue;
215
- if (e[0] >= I[0] && e[0] < I[1]) { // this is to reduce duplicated events around junctions
216
- //print("X", e.join("\t"));
217
- if (e[2] == 0) {
218
- ecb.length += e[1] - e[0];
219
- for (var j = e[0]; j < e[1]; ++j)
220
- ecb[k++] = buf[j];
221
- } else if (e[2] == 1) {
222
- ++ecb.length;
223
- ecb[k++] = e[4].charCodeAt(0);
224
- } else if (e[2] < 0) {
225
- ecb.length += e[4].length;
226
- for (var j = 0; j < e[4].length; ++j)
227
- ecb[k++] = e[4].charCodeAt(j);
228
- } // else, skip e[2] == 2
229
- }
230
- }
231
- if (ecb.length != k) throw Error("BUG!");
232
- }
233
-
234
- function process_paf(a, opt, fp_seq, buf, ecb)
235
- {
236
- if (a.length == 0) return;
237
- var len = a[0][1], name = a[0][0], seq = null;
238
- if (len < opt.min_rlen) return;
239
- if (fp_seq) {
240
- var ret;
241
- while ((ret = read_fastx(fp_seq, buf)) != null)
242
- if (ret[0] == a[0][0])
243
- break;
244
- if (ret == null)
245
- throw Error("failed to find sequence for read '" + a[0][0] + "'");
246
- name = ret[0], seq = ret[1];
247
- if (seq.length != len)
248
- throw Error("inconsistent length for read '" + name + "'");
249
- }
250
- filter_paf(a, opt);
251
- if (a.length == 0) return;
252
- var ev = [];
253
- for (var i = 0; i < a.length; ++i)
254
- parse_events(a[i], ev, i, buf);
255
- ev.sort(function(x,y) { return x[0]!=y[0]? x[0]-y[0] : x[2]-y[2] });
256
- if (seq == null) print("SQ", name, a[0][1], a.length);
257
- var b = find_het_sub(ev, a, opt);
258
- if (opt.ec) flt_utg_for_ec(b, opt);
259
- else flt_utg_for_bin(b, opt);
260
- if (seq == null) {
261
- for (var i = 0; i < b.length; ++i) {
262
- var m, ai = a[b[i][2]], score = 0;
263
- for (var j = 10; j < ai.length; ++j)
264
- if ((m = /^AS:i:(\d+)/.exec(ai[j])) != null)
265
- score = m[1];
266
- print("TS", b[i][2], b[i][0], b[i][1], ai.slice(5, 9).join("\t"), b[i].slice(3, 7).join("\t"), score);
267
- }
268
- print("//");
269
- } else { // error correction
270
- if (b.length == 0) return;
271
- buf.set(seq, 0);
272
- ec_core(b, a.length, ev, buf, ecb);
273
- print(">" + name);
274
- print(ecb);
275
- }
276
- }
277
-
278
- function main(args)
279
- {
280
- var c, opt = { min_rlen:5000, min_blen:5000, min_iden:0.8, min_mlen:5, max_clip_len:500, max_ratio0:0.25, dbg_ev:false };
281
- while ((c = getopt(args, "l:b:d:m:c:r:E")) != null) {
282
- if (c == 'l') opt.min_rlen = parseInt(getopt.arg);
283
- else if (c == 'b') opt.min_blen = parseInt(getopt.arg);
284
- else if (c == 'd') opt.min_iden = parseFloat(getopt.arg);
285
- else if (c == 'm') opt.min_slen = parseInt(getopt.arg);
286
- else if (c == 'c') opt.max_clip_len = parseInt(getopt.arg);
287
- else if (c == 'r') opt.max_ratio0 = parseFloat(getopt.arg);
288
- else if (c == 'E') opt.dbg_ev = true;
289
- }
290
- if (args.length - getopt.ind < 1) {
291
- print("Usage: mmphase.js [options] <map-with-cs.paf> [reads.fa]");
292
- print("Options:");
293
- print(" -l INT min read length [" + opt.min_rlen + "]");
294
- print(" -b INT min alignment length [" + opt.min_blen + "]");
295
- print(" -d FLOAT min identity [" + opt.min_iden + "]");
296
- print(" -s INT min match length [" + opt.min_mlen + "]");
297
- print(" -c INT max clip length [" + opt.max_clip_len + "]");
298
- print(" -r FLOAT initial ratio for haplotype filtering [" + opt.max_ratio0 + "]");
299
- return 0;
300
- }
301
-
302
- opt.ec = args.length - getopt.ind < 2? false : true;
303
- if (!opt.ec) {
304
- print("CC");
305
- print("CC", "SQ qName qLen nHits");
306
- print("CC", "TS index qStart qEnd tName tLen tStart tEnd nConsistent lCons nConflictive lConf score");
307
- print("CC");
308
- }
309
-
310
- var buf = new Bytes(), ecb = new Bytes();
311
- var fp_paf = new File(args[getopt.ind]);
312
- var fp_seq = args.length - getopt.ind >= 2? new File(args[getopt.ind+1]) : null;
313
- var a = [];
314
- while (fp_paf.readline(buf) >= 0) {
315
- var t = buf.toString().split("\t");
316
- if (a.length > 0 && a[0][0] != t[0]) {
317
- process_paf(a, opt, fp_seq, buf, ecb);
318
- a.length = 0;
319
- }
320
- for (var i = 1; i <= 3; ++i) t[i] = parseInt(t[i]);
321
- if (t[1] < opt.min_rlen) continue;
322
- for (var i = 6; i <= 10; ++i) t[i] = parseInt(t[i]);
323
- if (t[10] < opt.min_blen) continue;
324
- a.push(t);
325
- }
326
- if (a.length >= 0)
327
- process_paf(a, opt, fp_seq, buf, ecb);
328
- if (fp_seq) fp_seq.close();
329
- fp_paf.close();
330
- ecb.destroy();
331
- buf.destroy();
332
- }
333
-
334
- var ret = main(arguments)
335
- exit(ret)