minimap2 0.2.23.1 → 0.2.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/Rakefile +30 -18
- data/ext/minimap2/NEWS.md +14 -0
- data/ext/minimap2/README.md +2 -2
- data/ext/minimap2/cookbook.md +2 -2
- data/ext/minimap2/hit.c +1 -1
- data/ext/minimap2/lchain.c +46 -21
- data/ext/minimap2/main.c +5 -1
- data/ext/minimap2/map.c +2 -2
- data/ext/minimap2/minimap.h +1 -0
- data/ext/minimap2/minimap2.1 +9 -6
- data/ext/minimap2/misc/paftools.js +1 -1
- data/ext/minimap2/mmpriv.h +1 -0
- data/ext/minimap2/options.c +2 -1
- data/ext/minimap2/python/mappy.pyx +1 -1
- data/ext/minimap2/setup.py +1 -1
- data/lib/minimap2/aligner.rb +4 -4
- data/lib/minimap2/alignment.rb +11 -11
- data/lib/minimap2/ffi/constants.rb +3 -2
- data/lib/minimap2/ffi.rb +4 -4
- data/lib/minimap2/version.rb +1 -1
- data/lib/minimap2.rb +25 -18
- metadata +3 -4
- data/ext/vendor/libminimap2.so +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9729fda39fc510adf028d0aea9736ef247201d850f22cfad4821e40253ec26e
|
4
|
+
data.tar.gz: ef6d69949647da93a9a971cfe001060c18053c6075b3a3facff21c80634a835f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b47829c2f5675669ab59a68e6189551c03b4286eba3259a8a2ba8ce7f9da2a5b15f5fb6dfcc01569bc49f357c6c3fa5e7cfb465286a9b7295bd76418fccb286
|
7
|
+
data.tar.gz: cbe0c08f12ba8fc92c65dc4f595d1681930091486492b379205a38b31e5514c194bcc7f9cff4d7d8b710b93c88342b9a9e0d3b1ca0a87d67f02e9b4f257001a9
|
data/ext/Rakefile
CHANGED
@@ -1,40 +1,52 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
require "rake"
|
4
|
+
require "fileutils"
|
5
|
+
require "ffi"
|
6
|
+
|
7
|
+
minimap2_dir = File.expand_path("minimap2", __dir__)
|
8
|
+
target_dir = "../../vendor"
|
9
|
+
target_fname = FFI.map_library_name("minimap2")
|
7
10
|
target_path = File.join(target_dir, target_fname)
|
8
11
|
|
9
|
-
task default:
|
12
|
+
task default: ["minimap2:build", "minimap2:clean"]
|
10
13
|
|
11
14
|
namespace :minimap2 do
|
12
|
-
desc
|
15
|
+
desc "Compile Minimap2"
|
13
16
|
task :build do
|
14
17
|
Dir.chdir(minimap2_dir) do
|
15
18
|
# Add -fPIC option to Makefile
|
16
|
-
sh
|
17
|
-
sh
|
18
|
-
sh
|
19
|
-
case RbConfig::CONFIG[
|
19
|
+
sh "git apply ../minimap2.patch"
|
20
|
+
sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
|
21
|
+
sh "make"
|
22
|
+
case RbConfig::CONFIG["host_os"]
|
20
23
|
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
21
|
-
|
24
|
+
sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
|
22
25
|
when /darwin|mac os/
|
23
26
|
sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o"
|
24
27
|
else
|
25
|
-
sh "cc -shared -o #{target_fname} *.o"
|
28
|
+
sh "cc -shared -o #{target_fname} -lm -lz -lpthread *.o"
|
26
29
|
end
|
27
|
-
sh
|
28
|
-
sh
|
29
|
-
|
30
|
+
sh "rm cmappy.h cmappy.c"
|
31
|
+
sh "git apply -R ../minimap2.patch"
|
32
|
+
FileUtils.mkdir_p(target_dir)
|
33
|
+
warn "mkdir -p #{target_dir}"
|
30
34
|
sh "mv #{target_fname} #{target_path}"
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
34
|
-
desc
|
38
|
+
desc "Clean"
|
35
39
|
task :clean do
|
36
40
|
Dir.chdir(minimap2_dir) do
|
37
|
-
sh
|
41
|
+
sh "make clean"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
task cleanall: [:clean]
|
46
|
+
|
47
|
+
desc "Clean all"
|
48
|
+
task :cleanall do
|
49
|
+
Dir.chdir(minimap2_dir) do
|
38
50
|
sh "rm #{target_path}" if File.exist?(target_path)
|
39
51
|
end
|
40
52
|
end
|
data/ext/minimap2/NEWS.md
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
Release 2.24-r1122 (26 December 2021)
|
2
|
+
-------------------------------------
|
3
|
+
|
4
|
+
This release improves alignment around long poorly aligned regions. Older
|
5
|
+
minimap2 may chain through such regions in rare cases which may result in
|
6
|
+
missing alignments later. The issue has become worse since the the change of
|
7
|
+
the chaining algorithm in v2.19. v2.23 implements an incomplete remedy. This
|
8
|
+
release provides a better solution with a X-drop-like heuristic and by enabling
|
9
|
+
two-bandwidth chaining in the assembly mode.
|
10
|
+
|
11
|
+
(2.24: 26 December 2021, r1122)
|
12
|
+
|
13
|
+
|
14
|
+
|
1
15
|
Release 2.23-r1111 (18 November 2021)
|
2
16
|
-------------------------------------
|
3
17
|
|
data/ext/minimap2/README.md
CHANGED
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
|
|
74
74
|
Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
|
75
75
|
the [release page][release] with:
|
76
76
|
```sh
|
77
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
78
|
-
./minimap2-2.
|
77
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
|
78
|
+
./minimap2-2.24_x64-linux/minimap2
|
79
79
|
```
|
80
80
|
If you want to compile from the source, you need to have a C compiler, GNU make
|
81
81
|
and zlib development files installed. Then type `make` in the source code
|
data/ext/minimap2/cookbook.md
CHANGED
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
|
|
31
31
|
please follow the command lines below:
|
32
32
|
```sh
|
33
33
|
# install minimap2 executables
|
34
|
-
curl -L https://github.com/lh3/minimap2/releases/download/v2.
|
35
|
-
cp minimap2-2.
|
34
|
+
curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
|
35
|
+
cp minimap2-2.24_x64-linux/{minimap2,k8,paftools.js} . # copy executables
|
36
36
|
export PATH="$PATH:"`pwd` # put the current directory on PATH
|
37
37
|
# download example datasets
|
38
38
|
curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
|
data/ext/minimap2/hit.c
CHANGED
@@ -279,7 +279,7 @@ int mm_filter_strand_retained(int n_regs, mm_reg1_t *r)
|
|
279
279
|
int i, k;
|
280
280
|
for (i = k = 0; i < n_regs; ++i) {
|
281
281
|
int p = r[i].parent;
|
282
|
-
if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f) {
|
282
|
+
if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f || r[i].div < 0.01f) {
|
283
283
|
if (k < i) r[k++] = r[i];
|
284
284
|
else ++k;
|
285
285
|
}
|
data/ext/minimap2/lchain.c
CHANGED
@@ -6,7 +6,25 @@
|
|
6
6
|
#include "kalloc.h"
|
7
7
|
#include "krmq.h"
|
8
8
|
|
9
|
-
|
9
|
+
static int64_t mg_chain_bk_end(int32_t max_drop, const mm128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k)
|
10
|
+
{
|
11
|
+
int64_t i = z[k].y, end_i = -1, max_i = i;
|
12
|
+
int32_t max_s = 0;
|
13
|
+
if (i < 0 || t[i] != 0) return i;
|
14
|
+
do {
|
15
|
+
int32_t s;
|
16
|
+
t[i] = 2;
|
17
|
+
end_i = i = p[i];
|
18
|
+
s = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
19
|
+
if (s > max_s) max_s = s, max_i = i;
|
20
|
+
else if (max_s - s > max_drop) break;
|
21
|
+
} while (i >= 0 && t[i] == 0);
|
22
|
+
for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[]
|
23
|
+
t[i] = 0;
|
24
|
+
return max_i;
|
25
|
+
}
|
26
|
+
|
27
|
+
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, int32_t *n_u_, int32_t *n_v_)
|
10
28
|
{
|
11
29
|
mm128_t *z;
|
12
30
|
uint64_t *u;
|
@@ -24,26 +42,32 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
|
|
24
42
|
|
25
43
|
memset(t, 0, n * 4);
|
26
44
|
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
45
|
+
if (t[z[k].y] == 0) {
|
46
|
+
int64_t n_v0 = n_v, end_i;
|
47
|
+
int32_t sc;
|
48
|
+
end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
|
49
|
+
for (i = z[k].y; i != end_i; i = p[i])
|
50
|
+
++n_v, t[i] = 1;
|
51
|
+
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
52
|
+
if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
|
53
|
+
++n_u;
|
54
|
+
else n_v = n_v0;
|
55
|
+
}
|
35
56
|
}
|
36
57
|
KMALLOC(km, u, n_u);
|
37
58
|
memset(t, 0, n * 4);
|
38
59
|
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
60
|
+
if (t[z[k].y] == 0) {
|
61
|
+
int64_t n_v0 = n_v, end_i;
|
62
|
+
int32_t sc;
|
63
|
+
end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
|
64
|
+
for (i = z[k].y; i != end_i; i = p[i])
|
65
|
+
v[n_v++] = i, t[i] = 1;
|
66
|
+
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
|
67
|
+
if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
|
68
|
+
u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
|
69
|
+
else n_v = n_v0;
|
70
|
+
}
|
47
71
|
}
|
48
72
|
kfree(km, z);
|
49
73
|
assert(n_v < INT32_MAX);
|
@@ -124,7 +148,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
|
|
124
148
|
mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
125
149
|
int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
126
150
|
{ // TODO: make sure this works when n has more than 32 bits
|
127
|
-
int32_t *f, *t, *v, n_u, n_v, mmax_f = 0;
|
151
|
+
int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
|
128
152
|
int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
|
129
153
|
uint64_t *u;
|
130
154
|
|
@@ -135,6 +159,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
135
159
|
}
|
136
160
|
if (max_dist_x < bw) max_dist_x = bw;
|
137
161
|
if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
|
162
|
+
if (is_cdna) max_drop = INT32_MAX;
|
138
163
|
KMALLOC(km, p, n);
|
139
164
|
KMALLOC(km, f, n);
|
140
165
|
KMALLOC(km, v, n);
|
@@ -181,7 +206,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
|
|
181
206
|
if (mmax_f < max_f) mmax_f = max_f;
|
182
207
|
}
|
183
208
|
|
184
|
-
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
|
209
|
+
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
|
185
210
|
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
|
186
211
|
kfree(km, p); kfree(km, f); kfree(km, t);
|
187
212
|
if (n_u == 0) {
|
@@ -225,7 +250,7 @@ static inline int32_t comput_sc_simple(const mm128_t *ai, const mm128_t *aj, flo
|
|
225
250
|
mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
|
226
251
|
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
|
227
252
|
{
|
228
|
-
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0;
|
253
|
+
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
|
229
254
|
int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
|
230
255
|
uint64_t *u;
|
231
256
|
lc_elem_t *root = 0, *root_inner = 0;
|
@@ -333,7 +358,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
|
|
333
358
|
}
|
334
359
|
km_destroy(mem_mp);
|
335
360
|
|
336
|
-
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
|
361
|
+
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
|
337
362
|
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
|
338
363
|
kfree(km, p); kfree(km, f); kfree(km, t);
|
339
364
|
if (n_u == 0) {
|
data/ext/minimap2/main.c
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
#include "mmpriv.h"
|
8
8
|
#include "ketopt.h"
|
9
9
|
|
10
|
-
#define MM_VERSION "2.
|
10
|
+
#define MM_VERSION "2.24-r1122"
|
11
11
|
|
12
12
|
#ifdef __linux__
|
13
13
|
#include <sys/resource.h>
|
@@ -76,6 +76,8 @@ static ko_longopt_t long_options[] = {
|
|
76
76
|
{ "cap-kalloc", ko_required_argument, 349 },
|
77
77
|
{ "q-occ-frac", ko_required_argument, 350 },
|
78
78
|
{ "chain-skip-scale",ko_required_argument,351 },
|
79
|
+
{ "print-chains", ko_no_argument, 352 },
|
80
|
+
{ "no-hash-name", ko_no_argument, 353 },
|
79
81
|
{ "help", ko_no_argument, 'h' },
|
80
82
|
{ "max-intron-len", ko_required_argument, 'G' },
|
81
83
|
{ "version", ko_no_argument, 'V' },
|
@@ -233,6 +235,8 @@ int main(int argc, char *argv[])
|
|
233
235
|
else if (c == 348) opt.flag |= MM_F_QSTRAND | MM_F_NO_INV; // --qstrand
|
234
236
|
else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc
|
235
237
|
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
|
238
|
+
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
|
239
|
+
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
|
236
240
|
else if (c == 330) {
|
237
241
|
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
238
242
|
} else if (c == 314) { // --frag
|
data/ext/minimap2/map.c
CHANGED
@@ -248,7 +248,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
248
248
|
if (qlen_sum == 0 || n_segs <= 0 || n_segs > MM_MAX_SEG) return;
|
249
249
|
if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return;
|
250
250
|
|
251
|
-
hash = qname? __ac_X31_hash_string(qname) : 0;
|
251
|
+
hash = qname && !(opt->flag & MM_F_NO_HASH_NAME)? __ac_X31_hash_string(qname) : 0;
|
252
252
|
hash ^= __ac_Wang_hash(qlen_sum) + __ac_Wang_hash(opt->seed);
|
253
253
|
hash = __ac_Wang_hash(hash);
|
254
254
|
|
@@ -328,7 +328,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|
328
328
|
mm_hit_sort(b->km, &n_regs0, regs0, opt->alt_drop); // this step can be merged into mm_gen_regs(); will do if this shows up in profile
|
329
329
|
}
|
330
330
|
|
331
|
-
if (mm_dbg_flag & MM_DBG_PRINT_SEED)
|
331
|
+
if (mm_dbg_flag & (MM_DBG_PRINT_SEED|MM_DBG_PRINT_CHAIN))
|
332
332
|
for (j = 0; j < n_regs0; ++j)
|
333
333
|
for (i = regs0[j].as; i < regs0[j].as + regs0[j].cnt; ++i)
|
334
334
|
fprintf(stderr, "CN\t%d\t%s\t%d\t%c\t%d\t%d\t%d\n", j, mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
|
data/ext/minimap2/minimap.h
CHANGED
data/ext/minimap2/minimap2.1
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
.TH minimap2 1 "18
|
1
|
+
.TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
|
2
2
|
.SH NAME
|
3
3
|
.PP
|
4
4
|
minimap2 - mapping and alignment between collections of DNA sequences
|
@@ -77,7 +77,7 @@ SAM format.
|
|
77
77
|
Minimizer k-mer length [15]
|
78
78
|
.TP
|
79
79
|
.BI -w \ INT
|
80
|
-
Minimizer window size [
|
80
|
+
Minimizer window size [10]. A minimizer is the smallest k-mer
|
81
81
|
in a window of w consecutive k-mers.
|
82
82
|
.TP
|
83
83
|
.B -H
|
@@ -318,6 +318,9 @@ faster for short reads, but slower for long reads. [no]
|
|
318
318
|
.B --no-pairing
|
319
319
|
Treat two reads in a pair as independent reads. The mate related fields in SAM
|
320
320
|
are still properly populated.
|
321
|
+
.TP
|
322
|
+
.B --no-hash-name
|
323
|
+
Produce the same alignment for identical sequences regardless of their sequence names.
|
321
324
|
.SS Alignment options
|
322
325
|
.TP 10
|
323
326
|
.BI -A \ INT
|
@@ -562,7 +565,7 @@ Align older PacBio continuous long (CLR) reads to a reference genome
|
|
562
565
|
.B asm5
|
563
566
|
Long assembly to reference mapping
|
564
567
|
.RB ( -k19
|
565
|
-
.B -w19 -U50,500 --rmq -
|
568
|
+
.B -w19 -U50,500 --rmq -r1k,100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
|
566
569
|
.BR -N50 ).
|
567
570
|
Typically, the alignment will not extend to regions with 5% or higher sequence
|
568
571
|
divergence. Only use this preset if the average divergence is far below 5%.
|
@@ -570,14 +573,14 @@ divergence. Only use this preset if the average divergence is far below 5%.
|
|
570
573
|
.B asm10
|
571
574
|
Long assembly to reference mapping
|
572
575
|
.RB ( -k19
|
573
|
-
.B -w19 -U50,500 --rmq -
|
576
|
+
.B -w19 -U50,500 --rmq -r1k,100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
|
574
577
|
.BR -N50 ).
|
575
578
|
Up to 10% sequence divergence.
|
576
579
|
.TP
|
577
580
|
.B asm20
|
578
581
|
Long assembly to reference mapping
|
579
582
|
.RB ( -k19
|
580
|
-
.B -w10 -U50,500 --rmq -
|
583
|
+
.B -w10 -U50,500 --rmq -r1k,100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
|
581
584
|
.BR -N50 ).
|
582
585
|
Up to 20% sequence divergence.
|
583
586
|
.TP
|
@@ -603,7 +606,7 @@ Long-read splice alignment for PacBio CCS reads
|
|
603
606
|
.B sr
|
604
607
|
Short single-end reads without splicing
|
605
608
|
.RB ( -k21
|
606
|
-
.B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -b0 -r100 -p.5 -N20 -f1000,5000 -n2 -
|
609
|
+
.B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -b0 -r100 -p.5 -N20 -f1000,5000 -n2 -m25
|
607
610
|
.B -s40 -g100 -2K50m --heap-sort=yes
|
608
611
|
.BR --secondary=no ).
|
609
612
|
.TP
|
data/ext/minimap2/mmpriv.h
CHANGED
data/ext/minimap2/options.c
CHANGED
@@ -74,6 +74,7 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
|
74
74
|
if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
|
75
75
|
opt->mid_occ = opt->max_mid_occ;
|
76
76
|
}
|
77
|
+
if (opt->bw_long < opt->bw) opt->bw_long = opt->bw;
|
77
78
|
if (mm_verbose >= 3)
|
78
79
|
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
79
80
|
}
|
@@ -113,7 +114,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|
113
114
|
mo->min_dp_max = 200;
|
114
115
|
} else if (strncmp(preset, "asm", 3) == 0) {
|
115
116
|
io->flag = 0, io->k = 19, io->w = 19;
|
116
|
-
mo->bw = mo->bw_long = 100000;
|
117
|
+
mo->bw = 1000, mo->bw_long = 100000;
|
117
118
|
mo->max_gap = 10000;
|
118
119
|
mo->flag |= MM_F_RMQ;
|
119
120
|
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
|
data/ext/minimap2/setup.py
CHANGED
data/lib/minimap2/aligner.rb
CHANGED
@@ -90,7 +90,7 @@ module Minimap2
|
|
90
90
|
end
|
91
91
|
|
92
92
|
if fn_idx_in
|
93
|
-
warn
|
93
|
+
warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
|
94
94
|
reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
|
95
95
|
|
96
96
|
# The Ruby version raises an error here
|
@@ -169,13 +169,13 @@ module Minimap2
|
|
169
169
|
c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
|
170
170
|
cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
|
171
171
|
|
172
|
-
_cs =
|
172
|
+
_cs = ""
|
173
173
|
if cs
|
174
174
|
l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
|
175
175
|
_cs = cs_str.read_pointer.read_string(l_cs_str)
|
176
176
|
end
|
177
177
|
|
178
|
-
_md =
|
178
|
+
_md = ""
|
179
179
|
if md
|
180
180
|
l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
|
181
181
|
_md = cs_str.read_pointer.read_string(l_cs_str)
|
@@ -204,7 +204,7 @@ module Minimap2
|
|
204
204
|
lp = ::FFI::MemoryPointer.new(:int)
|
205
205
|
s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
|
206
206
|
l = lp.read_int
|
207
|
-
return nil if l
|
207
|
+
return nil if l == 0
|
208
208
|
|
209
209
|
s.read_string(l)
|
210
210
|
end
|
data/lib/minimap2/alignment.rb
CHANGED
@@ -89,20 +89,20 @@ module Minimap2
|
|
89
89
|
# Convert to the PAF format without the QueryName and QueryLength columns.
|
90
90
|
|
91
91
|
def to_s
|
92
|
-
strand = if @strand
|
93
|
-
|
94
|
-
elsif @strand
|
95
|
-
|
92
|
+
strand = if @strand > 0
|
93
|
+
"+"
|
94
|
+
elsif @strand < 0
|
95
|
+
"-"
|
96
96
|
else
|
97
|
-
|
97
|
+
"?"
|
98
98
|
end
|
99
|
-
tp = @primary != 0 ?
|
100
|
-
ts = if @trans_strand
|
101
|
-
|
102
|
-
elsif @trans_strand
|
103
|
-
|
99
|
+
tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
|
100
|
+
ts = if @trans_strand > 0
|
101
|
+
"ts:A:+"
|
102
|
+
elsif @trans_strand < 0
|
103
|
+
"ts:A:-"
|
104
104
|
else
|
105
|
-
|
105
|
+
"ts:A:."
|
106
106
|
end
|
107
107
|
a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
|
108
108
|
@mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
|
@@ -36,7 +36,8 @@ module Minimap2
|
|
36
36
|
SAM_HIT_ONLY = 0x40000000
|
37
37
|
RMQ = 0x80000000 # LL
|
38
38
|
QSTRAND = 0x100000000 # LL
|
39
|
-
NO_INV = 0x200000000
|
39
|
+
NO_INV = 0x200000000 # LL
|
40
|
+
NO_HASH_NAME = 0x400000000 # LL
|
40
41
|
|
41
42
|
HPC = 0x1
|
42
43
|
NO_SEQ = 0x2
|
@@ -56,7 +57,7 @@ module Minimap2
|
|
56
57
|
CIGAR_EQ_MATCH = 7
|
57
58
|
CIGAR_X_MISMATCH = 8
|
58
59
|
|
59
|
-
CIGAR_STR =
|
60
|
+
CIGAR_STR = "MIDNSHP=XB"
|
60
61
|
|
61
62
|
# emulate 128-bit integers
|
62
63
|
class MM128 < ::FFI::Struct
|
data/lib/minimap2/ffi.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# bit fields
|
4
|
-
require
|
4
|
+
require "ffi/bit_struct"
|
5
5
|
module Minimap2
|
6
6
|
# Native APIs
|
7
7
|
module FFI
|
@@ -21,6 +21,6 @@ module Minimap2
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
require_relative
|
25
|
-
require_relative
|
26
|
-
require_relative
|
24
|
+
require_relative "ffi/constants"
|
25
|
+
require_relative "ffi/functions"
|
26
|
+
require_relative "ffi/mappy"
|
data/lib/minimap2/version.rb
CHANGED
data/lib/minimap2.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# dependencies
|
4
|
-
require
|
4
|
+
require "ffi"
|
5
5
|
|
6
6
|
# modules
|
7
|
-
require_relative
|
8
|
-
require_relative
|
9
|
-
require_relative
|
7
|
+
require_relative "minimap2/aligner"
|
8
|
+
require_relative "minimap2/alignment"
|
9
|
+
require_relative "minimap2/version"
|
10
10
|
|
11
11
|
# Minimap2 mapper for long read sequences
|
12
12
|
# https://github.com/lh3/minimap2
|
@@ -19,19 +19,18 @@ module Minimap2
|
|
19
19
|
attr_accessor :ffi_lib
|
20
20
|
end
|
21
21
|
|
22
|
-
lib_name = ::FFI.map_library_name(
|
23
|
-
self.ffi_lib = if ENV[
|
24
|
-
File.expand_path(lib_name, ENV[
|
22
|
+
lib_name = ::FFI.map_library_name("minimap2")
|
23
|
+
self.ffi_lib = if ENV["MINIMAPDIR"]
|
24
|
+
File.expand_path(lib_name, ENV["MINIMAPDIR"])
|
25
25
|
else
|
26
26
|
File.expand_path("../vendor/#{lib_name}", __dir__)
|
27
27
|
end
|
28
28
|
|
29
29
|
# friendlier error message
|
30
|
-
autoload :FFI,
|
30
|
+
autoload :FFI, "minimap2/ffi"
|
31
31
|
|
32
32
|
# methods from mappy
|
33
33
|
class << self
|
34
|
-
|
35
34
|
# Execute minimap2 comannd with given options.
|
36
35
|
# @overload execute(arg0,arg1,...)
|
37
36
|
# @param [String] arg minimap2 command option.
|
@@ -41,27 +40,35 @@ module Minimap2
|
|
41
40
|
def Minimap2.execute(*rb_argv)
|
42
41
|
str_ptrs = []
|
43
42
|
# First argument is the program name.
|
44
|
-
str_ptrs << ::FFI::MemoryPointer.from_string(
|
43
|
+
str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
|
45
44
|
rb_argv.each do |arg|
|
46
45
|
arg.to_s.split(/\s+/).each do |s|
|
47
46
|
str_ptrs << ::FFI::MemoryPointer.from_string(s)
|
48
47
|
end
|
49
48
|
end
|
50
|
-
|
49
|
+
str_ptrs << nil
|
51
50
|
|
52
51
|
# Load all the pointers into a native memory block
|
53
|
-
argv = ::FFI::MemoryPointer.new(:pointer,
|
54
|
-
|
52
|
+
argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
|
53
|
+
str_ptrs.each_with_index do |p, i|
|
55
54
|
argv[i].put_pointer(0, p)
|
56
55
|
end
|
57
56
|
|
58
|
-
FFI.main(
|
57
|
+
FFI.main(str_ptrs.length - 1, argv)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get verbosity level.
|
61
|
+
# @return [Integer] verbosity level.
|
62
|
+
|
63
|
+
def verbose
|
64
|
+
FFI.mm_verbose_level(-1)
|
59
65
|
end
|
60
66
|
|
61
67
|
# Set verbosity level.
|
62
|
-
# @param [Integer] level
|
68
|
+
# @param [Integer] verbosity level
|
69
|
+
# @return [Integer] verbosity level.
|
63
70
|
|
64
|
-
def verbose(level
|
71
|
+
def verbose=(level)
|
65
72
|
FFI.mm_verbose_level(level)
|
66
73
|
end
|
67
74
|
|
@@ -109,11 +116,11 @@ module Minimap2
|
|
109
116
|
end
|
110
117
|
|
111
118
|
def fastx_next(ks, read_comment)
|
112
|
-
qual = ks[:qual][:s] if (ks[:qual][:l])
|
119
|
+
qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
|
113
120
|
name = ks[:name][:s]
|
114
121
|
seq = ks[:seq][:s]
|
115
122
|
if read_comment
|
116
|
-
comment = ks[:comment][:s] if (ks[:comment][:l])
|
123
|
+
comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
|
117
124
|
[name, seq, qual, comment]
|
118
125
|
else
|
119
126
|
[name, seq, qual]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minimap2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.24.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -137,7 +137,6 @@ files:
|
|
137
137
|
- ext/minimap2/tex/ngmlr.eval
|
138
138
|
- ext/minimap2/tex/roc.gp
|
139
139
|
- ext/minimap2/tex/snap-s3.sam.eval
|
140
|
-
- ext/vendor/libminimap2.so
|
141
140
|
- lib/minimap2.rb
|
142
141
|
- lib/minimap2/aligner.rb
|
143
142
|
- lib/minimap2/alignment.rb
|
@@ -165,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
164
|
- !ruby/object:Gem::Version
|
166
165
|
version: '0'
|
167
166
|
requirements: []
|
168
|
-
rubygems_version: 3.
|
167
|
+
rubygems_version: 3.3.3
|
169
168
|
signing_key:
|
170
169
|
specification_version: 4
|
171
170
|
summary: minimap2
|
data/ext/vendor/libminimap2.so
DELETED
Binary file
|