minimap2 0.2.23.1 → 0.2.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5ad862a669642a3566c2f7b05c8b998130d6169575f665d7bea4a8be3467e0f
4
- data.tar.gz: 92c571043792047fc8c9fc9ad71e4cd1d6be444f5ba4e4e81a2ebee926256e3e
3
+ metadata.gz: f9729fda39fc510adf028d0aea9736ef247201d850f22cfad4821e40253ec26e
4
+ data.tar.gz: ef6d69949647da93a9a971cfe001060c18053c6075b3a3facff21c80634a835f
5
5
  SHA512:
6
- metadata.gz: 9ebe37f8cdaca0c138e39d1f3e41c100482c93f59af84b2968b72da419496bca776ed5a95f072eaa9797f708f09360cf7cdd249fe53322f2cfeae1e76bd378c7
7
- data.tar.gz: a7b56ea197b4a9948f1e3834aac97eac4e8689e14f47c3fbd59f2d8a67154fbd8bfd0ee882ba31ab7d63289bc3fe8cb58275d42230de1b7c34aca9cc18b08191
6
+ metadata.gz: 6b47829c2f5675669ab59a68e6189551c03b4286eba3259a8a2ba8ce7f9da2a5b15f5fb6dfcc01569bc49f357c6c3fa5e7cfb465286a9b7295bd76418fccb286
7
+ data.tar.gz: cbe0c08f12ba8fc92c65dc4f595d1681930091486492b379205a38b31e5514c194bcc7f9cff4d7d8b710b93c88342b9a9e0d3b1ca0a87d67f02e9b4f257001a9
data/ext/Rakefile CHANGED
@@ -1,40 +1,52 @@
1
- require 'rake'
1
+ # frozen_string_literal: true
2
2
 
3
- libsuffix = RbConfig::CONFIG['SOEXT']
4
- minimap2_dir = File.expand_path('minimap2', __dir__)
5
- target_dir = '../../vendor'
6
- target_fname = "libminimap2.#{libsuffix}"
3
+ require "rake"
4
+ require "fileutils"
5
+ require "ffi"
6
+
7
+ minimap2_dir = File.expand_path("minimap2", __dir__)
8
+ target_dir = "../../vendor"
9
+ target_fname = FFI.map_library_name("minimap2")
7
10
  target_path = File.join(target_dir, target_fname)
8
11
 
9
- task default: 'minimap2:build'
12
+ task default: ["minimap2:build", "minimap2:clean"]
10
13
 
11
14
  namespace :minimap2 do
12
- desc 'Compile Minimap2'
15
+ desc "Compile Minimap2"
13
16
  task :build do
14
17
  Dir.chdir(minimap2_dir) do
15
18
  # Add -fPIC option to Makefile
16
- sh 'git apply ../minimap2.patch'
17
- sh 'cp ../cmappy/cmappy.h ../cmappy/cmappy.c .'
18
- sh 'make'
19
- case RbConfig::CONFIG['host_os']
19
+ sh "git apply ../minimap2.patch"
20
+ sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
21
+ sh "make"
22
+ case RbConfig::CONFIG["host_os"]
20
23
  when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
21
- warn 'windows not supported'
24
+ sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
22
25
  when /darwin|mac os/
23
26
  sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o"
24
27
  else
25
- sh "cc -shared -o #{target_fname} *.o"
28
+ sh "cc -shared -o #{target_fname} -lm -lz -lpthread *.o"
26
29
  end
27
- sh 'rm cmappy.h cmappy.c'
28
- sh 'git apply -R ../minimap2.patch'
29
- sh "mkdir -p #{target_dir}"
30
+ sh "rm cmappy.h cmappy.c"
31
+ sh "git apply -R ../minimap2.patch"
32
+ FileUtils.mkdir_p(target_dir)
33
+ warn "mkdir -p #{target_dir}"
30
34
  sh "mv #{target_fname} #{target_path}"
31
35
  end
32
36
  end
33
37
 
34
- desc 'Cleanup'
38
+ desc "Clean"
35
39
  task :clean do
36
40
  Dir.chdir(minimap2_dir) do
37
- sh 'make clean'
41
+ sh "make clean"
42
+ end
43
+ end
44
+
45
+ task cleanall: [:clean]
46
+
47
+ desc "Clean all"
48
+ task :cleanall do
49
+ Dir.chdir(minimap2_dir) do
38
50
  sh "rm #{target_path}" if File.exist?(target_path)
39
51
  end
40
52
  end
data/ext/minimap2/NEWS.md CHANGED
@@ -1,3 +1,17 @@
1
+ Release 2.24-r1122 (26 December 2021)
2
+ -------------------------------------
3
+
4
+ This release improves alignment around long poorly aligned regions. Older
5
+ minimap2 may chain through such regions in rare cases which may result in
6
+ missing alignments later. The issue has become worse since the the change of
7
+ the chaining algorithm in v2.19. v2.23 implements an incomplete remedy. This
8
+ release provides a better solution with a X-drop-like heuristic and by enabling
9
+ two-bandwidth chaining in the assembly mode.
10
+
11
+ (2.24: 26 December 2021, r1122)
12
+
13
+
14
+
1
15
  Release 2.23-r1111 (18 November 2021)
2
16
  -------------------------------------
3
17
 
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
74
74
  Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
75
75
  the [release page][release] with:
76
76
  ```sh
77
- curl -L https://github.com/lh3/minimap2/releases/download/v2.23/minimap2-2.23_x64-linux.tar.bz2 | tar -jxvf -
78
- ./minimap2-2.23_x64-linux/minimap2
77
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
78
+ ./minimap2-2.24_x64-linux/minimap2
79
79
  ```
80
80
  If you want to compile from the source, you need to have a C compiler, GNU make
81
81
  and zlib development files installed. Then type `make` in the source code
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
31
31
  please follow the command lines below:
32
32
  ```sh
33
33
  # install minimap2 executables
34
- curl -L https://github.com/lh3/minimap2/releases/download/v2.23/minimap2-2.23_x64-linux.tar.bz2 | tar jxf -
35
- cp minimap2-2.23_x64-linux/{minimap2,k8,paftools.js} . # copy executables
34
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
35
+ cp minimap2-2.24_x64-linux/{minimap2,k8,paftools.js} . # copy executables
36
36
  export PATH="$PATH:"`pwd` # put the current directory on PATH
37
37
  # download example datasets
38
38
  curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
data/ext/minimap2/hit.c CHANGED
@@ -279,7 +279,7 @@ int mm_filter_strand_retained(int n_regs, mm_reg1_t *r)
279
279
  int i, k;
280
280
  for (i = k = 0; i < n_regs; ++i) {
281
281
  int p = r[i].parent;
282
- if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f) {
282
+ if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f || r[i].div < 0.01f) {
283
283
  if (k < i) r[k++] = r[i];
284
284
  else ++k;
285
285
  }
@@ -6,7 +6,25 @@
6
6
  #include "kalloc.h"
7
7
  #include "krmq.h"
8
8
 
9
- uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
9
+ static int64_t mg_chain_bk_end(int32_t max_drop, const mm128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k)
10
+ {
11
+ int64_t i = z[k].y, end_i = -1, max_i = i;
12
+ int32_t max_s = 0;
13
+ if (i < 0 || t[i] != 0) return i;
14
+ do {
15
+ int32_t s;
16
+ t[i] = 2;
17
+ end_i = i = p[i];
18
+ s = i < 0? z[k].x : (int32_t)z[k].x - f[i];
19
+ if (s > max_s) max_s = s, max_i = i;
20
+ else if (max_s - s > max_drop) break;
21
+ } while (i >= 0 && t[i] == 0);
22
+ for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[]
23
+ t[i] = 0;
24
+ return max_i;
25
+ }
26
+
27
+ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, int32_t *n_u_, int32_t *n_v_)
10
28
  {
11
29
  mm128_t *z;
12
30
  uint64_t *u;
@@ -24,26 +42,32 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
24
42
 
25
43
  memset(t, 0, n * 4);
26
44
  for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
27
- int64_t n_v0 = n_v;
28
- int32_t sc;
29
- for (i = z[k].y; i >= 0 && t[i] == 0; i = p[i])
30
- ++n_v, t[i] = 1;
31
- sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
32
- if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
33
- ++n_u;
34
- else n_v = n_v0;
45
+ if (t[z[k].y] == 0) {
46
+ int64_t n_v0 = n_v, end_i;
47
+ int32_t sc;
48
+ end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
49
+ for (i = z[k].y; i != end_i; i = p[i])
50
+ ++n_v, t[i] = 1;
51
+ sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
52
+ if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
53
+ ++n_u;
54
+ else n_v = n_v0;
55
+ }
35
56
  }
36
57
  KMALLOC(km, u, n_u);
37
58
  memset(t, 0, n * 4);
38
59
  for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
39
- int64_t n_v0 = n_v;
40
- int32_t sc;
41
- for (i = z[k].y; i >= 0 && t[i] == 0; i = p[i])
42
- v[n_v++] = i, t[i] = 1;
43
- sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
44
- if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
45
- u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
46
- else n_v = n_v0;
60
+ if (t[z[k].y] == 0) {
61
+ int64_t n_v0 = n_v, end_i;
62
+ int32_t sc;
63
+ end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
64
+ for (i = z[k].y; i != end_i; i = p[i])
65
+ v[n_v++] = i, t[i] = 1;
66
+ sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
67
+ if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
68
+ u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
69
+ else n_v = n_v0;
70
+ }
47
71
  }
48
72
  kfree(km, z);
49
73
  assert(n_v < INT32_MAX);
@@ -124,7 +148,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
124
148
  mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
125
149
  int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
126
150
  { // TODO: make sure this works when n has more than 32 bits
127
- int32_t *f, *t, *v, n_u, n_v, mmax_f = 0;
151
+ int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
128
152
  int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
129
153
  uint64_t *u;
130
154
 
@@ -135,6 +159,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
135
159
  }
136
160
  if (max_dist_x < bw) max_dist_x = bw;
137
161
  if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
162
+ if (is_cdna) max_drop = INT32_MAX;
138
163
  KMALLOC(km, p, n);
139
164
  KMALLOC(km, f, n);
140
165
  KMALLOC(km, v, n);
@@ -181,7 +206,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
181
206
  if (mmax_f < max_f) mmax_f = max_f;
182
207
  }
183
208
 
184
- u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
209
+ u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
185
210
  *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
186
211
  kfree(km, p); kfree(km, f); kfree(km, t);
187
212
  if (n_u == 0) {
@@ -225,7 +250,7 @@ static inline int32_t comput_sc_simple(const mm128_t *ai, const mm128_t *aj, flo
225
250
  mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
226
251
  int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
227
252
  {
228
- int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0;
253
+ int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
229
254
  int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
230
255
  uint64_t *u;
231
256
  lc_elem_t *root = 0, *root_inner = 0;
@@ -333,7 +358,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
333
358
  }
334
359
  km_destroy(mem_mp);
335
360
 
336
- u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
361
+ u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
337
362
  *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
338
363
  kfree(km, p); kfree(km, f); kfree(km, t);
339
364
  if (n_u == 0) {
data/ext/minimap2/main.c CHANGED
@@ -7,7 +7,7 @@
7
7
  #include "mmpriv.h"
8
8
  #include "ketopt.h"
9
9
 
10
- #define MM_VERSION "2.23-r1111"
10
+ #define MM_VERSION "2.24-r1122"
11
11
 
12
12
  #ifdef __linux__
13
13
  #include <sys/resource.h>
@@ -76,6 +76,8 @@ static ko_longopt_t long_options[] = {
76
76
  { "cap-kalloc", ko_required_argument, 349 },
77
77
  { "q-occ-frac", ko_required_argument, 350 },
78
78
  { "chain-skip-scale",ko_required_argument,351 },
79
+ { "print-chains", ko_no_argument, 352 },
80
+ { "no-hash-name", ko_no_argument, 353 },
79
81
  { "help", ko_no_argument, 'h' },
80
82
  { "max-intron-len", ko_required_argument, 'G' },
81
83
  { "version", ko_no_argument, 'V' },
@@ -233,6 +235,8 @@ int main(int argc, char *argv[])
233
235
  else if (c == 348) opt.flag |= MM_F_QSTRAND | MM_F_NO_INV; // --qstrand
234
236
  else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc
235
237
  else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
238
+ else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
239
+ else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
236
240
  else if (c == 330) {
237
241
  fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
238
242
  } else if (c == 314) { // --frag
data/ext/minimap2/map.c CHANGED
@@ -248,7 +248,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
248
248
  if (qlen_sum == 0 || n_segs <= 0 || n_segs > MM_MAX_SEG) return;
249
249
  if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return;
250
250
 
251
- hash = qname? __ac_X31_hash_string(qname) : 0;
251
+ hash = qname && !(opt->flag & MM_F_NO_HASH_NAME)? __ac_X31_hash_string(qname) : 0;
252
252
  hash ^= __ac_Wang_hash(qlen_sum) + __ac_Wang_hash(opt->seed);
253
253
  hash = __ac_Wang_hash(hash);
254
254
 
@@ -328,7 +328,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
328
328
  mm_hit_sort(b->km, &n_regs0, regs0, opt->alt_drop); // this step can be merged into mm_gen_regs(); will do if this shows up in profile
329
329
  }
330
330
 
331
- if (mm_dbg_flag & MM_DBG_PRINT_SEED)
331
+ if (mm_dbg_flag & (MM_DBG_PRINT_SEED|MM_DBG_PRINT_CHAIN))
332
332
  for (j = 0; j < n_regs0; ++j)
333
333
  for (i = regs0[j].as; i < regs0[j].as + regs0[j].cnt; ++i)
334
334
  fprintf(stderr, "CN\t%d\t%s\t%d\t%c\t%d\t%d\t%d\n", j, mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
@@ -39,6 +39,7 @@
39
39
  #define MM_F_RMQ (0x80000000LL)
40
40
  #define MM_F_QSTRAND (0x100000000LL)
41
41
  #define MM_F_NO_INV (0x200000000LL)
42
+ #define MM_F_NO_HASH_NAME (0x400000000LL)
42
43
 
43
44
  #define MM_I_HPC 0x1
44
45
  #define MM_I_NO_SEQ 0x2
@@ -1,4 +1,4 @@
1
- .TH minimap2 1 "18 November 2021" "minimap2-2.23 (r1111)" "Bioinformatics tools"
1
+ .TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
2
2
  .SH NAME
3
3
  .PP
4
4
  minimap2 - mapping and alignment between collections of DNA sequences
@@ -77,7 +77,7 @@ SAM format.
77
77
  Minimizer k-mer length [15]
78
78
  .TP
79
79
  .BI -w \ INT
80
- Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
80
+ Minimizer window size [10]. A minimizer is the smallest k-mer
81
81
  in a window of w consecutive k-mers.
82
82
  .TP
83
83
  .B -H
@@ -318,6 +318,9 @@ faster for short reads, but slower for long reads. [no]
318
318
  .B --no-pairing
319
319
  Treat two reads in a pair as independent reads. The mate related fields in SAM
320
320
  are still properly populated.
321
+ .TP
322
+ .B --no-hash-name
323
+ Produce the same alignment for identical sequences regardless of their sequence names.
321
324
  .SS Alignment options
322
325
  .TP 10
323
326
  .BI -A \ INT
@@ -562,7 +565,7 @@ Align older PacBio continuous long (CLR) reads to a reference genome
562
565
  .B asm5
563
566
  Long assembly to reference mapping
564
567
  .RB ( -k19
565
- .B -w19 -U50,500 --rmq -r100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
568
+ .B -w19 -U50,500 --rmq -r1k,100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
566
569
  .BR -N50 ).
567
570
  Typically, the alignment will not extend to regions with 5% or higher sequence
568
571
  divergence. Only use this preset if the average divergence is far below 5%.
@@ -570,14 +573,14 @@ divergence. Only use this preset if the average divergence is far below 5%.
570
573
  .B asm10
571
574
  Long assembly to reference mapping
572
575
  .RB ( -k19
573
- .B -w19 -U50,500 --rmq -r100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
576
+ .B -w19 -U50,500 --rmq -r1k,100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
574
577
  .BR -N50 ).
575
578
  Up to 10% sequence divergence.
576
579
  .TP
577
580
  .B asm20
578
581
  Long assembly to reference mapping
579
582
  .RB ( -k19
580
- .B -w10 -U50,500 --rmq -r100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
583
+ .B -w10 -U50,500 --rmq -r1k,100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
581
584
  .BR -N50 ).
582
585
  Up to 20% sequence divergence.
583
586
  .TP
@@ -603,7 +606,7 @@ Long-read splice alignment for PacBio CCS reads
603
606
  .B sr
604
607
  Short single-end reads without splicing
605
608
  .RB ( -k21
606
- .B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -b0 -r100 -p.5 -N20 -f1000,5000 -n2 -m20
609
+ .B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -b0 -r100 -p.5 -N20 -f1000,5000 -n2 -m25
607
610
  .B -s40 -g100 -2K50m --heap-sort=yes
608
611
  .BR --secondary=no ).
609
612
  .TP
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env k8
2
2
 
3
- var paftools_version = '2.23-r1111';
3
+ var paftools_version = '2.24-r1122';
4
4
 
5
5
  /*****************************
6
6
  ***** Library functions *****
@@ -13,6 +13,7 @@
13
13
  #define MM_DBG_PRINT_QNAME 0x2
14
14
  #define MM_DBG_PRINT_SEED 0x4
15
15
  #define MM_DBG_PRINT_ALN_SEQ 0x8
16
+ #define MM_DBG_PRINT_CHAIN 0x10
16
17
 
17
18
  #define MM_SEED_LONG_JOIN (1ULL<<40)
18
19
  #define MM_SEED_IGNORE (1ULL<<41)
@@ -74,6 +74,7 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
74
74
  if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
75
75
  opt->mid_occ = opt->max_mid_occ;
76
76
  }
77
+ if (opt->bw_long < opt->bw) opt->bw_long = opt->bw;
77
78
  if (mm_verbose >= 3)
78
79
  fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
79
80
  }
@@ -113,7 +114,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
113
114
  mo->min_dp_max = 200;
114
115
  } else if (strncmp(preset, "asm", 3) == 0) {
115
116
  io->flag = 0, io->k = 19, io->w = 19;
116
- mo->bw = mo->bw_long = 100000;
117
+ mo->bw = 1000, mo->bw_long = 100000;
117
118
  mo->max_gap = 10000;
118
119
  mo->flag |= MM_F_RMQ;
119
120
  mo->min_mid_occ = 50, mo->max_mid_occ = 500;
@@ -3,7 +3,7 @@ from libc.stdlib cimport free
3
3
  cimport cmappy
4
4
  import sys
5
5
 
6
- __version__ = '2.23'
6
+ __version__ = '2.24'
7
7
 
8
8
  cmappy.mm_reset_timer()
9
9
 
@@ -23,7 +23,7 @@ def readme():
23
23
 
24
24
  setup(
25
25
  name = 'mappy',
26
- version = '2.23',
26
+ version = '2.24',
27
27
  url = 'https://github.com/lh3/minimap2',
28
28
  description = 'Minimap2 python binding',
29
29
  long_description = readme(),
@@ -90,7 +90,7 @@ module Minimap2
90
90
  end
91
91
 
92
92
  if fn_idx_in
93
- warn 'Since fn_idx_in is specified, the seq argument will be ignored.' if seq
93
+ warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
94
94
  reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
95
95
 
96
96
  # The Ruby version raises an error here
@@ -169,13 +169,13 @@ module Minimap2
169
169
  c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
170
170
  cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
171
171
 
172
- _cs = ''
172
+ _cs = ""
173
173
  if cs
174
174
  l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
175
175
  _cs = cs_str.read_pointer.read_string(l_cs_str)
176
176
  end
177
177
 
178
- _md = ''
178
+ _md = ""
179
179
  if md
180
180
  l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
181
181
  _md = cs_str.read_pointer.read_string(l_cs_str)
@@ -204,7 +204,7 @@ module Minimap2
204
204
  lp = ::FFI::MemoryPointer.new(:int)
205
205
  s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
206
206
  l = lp.read_int
207
- return nil if l.zero?
207
+ return nil if l == 0
208
208
 
209
209
  s.read_string(l)
210
210
  end
@@ -89,20 +89,20 @@ module Minimap2
89
89
  # Convert to the PAF format without the QueryName and QueryLength columns.
90
90
 
91
91
  def to_s
92
- strand = if @strand.positive?
93
- '+'
94
- elsif @strand.negative?
95
- '-'
92
+ strand = if @strand > 0
93
+ "+"
94
+ elsif @strand < 0
95
+ "-"
96
96
  else
97
- '?'
97
+ "?"
98
98
  end
99
- tp = @primary != 0 ? 'tp:A:P' : 'tp:A:S'
100
- ts = if @trans_strand.positive?
101
- 'ts:A:+'
102
- elsif @trans_strand.negative?
103
- 'ts:A:-'
99
+ tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
100
+ ts = if @trans_strand > 0
101
+ "ts:A:+"
102
+ elsif @trans_strand < 0
103
+ "ts:A:-"
104
104
  else
105
- 'ts:A:.'
105
+ "ts:A:."
106
106
  end
107
107
  a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
108
108
  @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
@@ -36,7 +36,8 @@ module Minimap2
36
36
  SAM_HIT_ONLY = 0x40000000
37
37
  RMQ = 0x80000000 # LL
38
38
  QSTRAND = 0x100000000 # LL
39
- NO_INV = 0x200000000
39
+ NO_INV = 0x200000000 # LL
40
+ NO_HASH_NAME = 0x400000000 # LL
40
41
 
41
42
  HPC = 0x1
42
43
  NO_SEQ = 0x2
@@ -56,7 +57,7 @@ module Minimap2
56
57
  CIGAR_EQ_MATCH = 7
57
58
  CIGAR_X_MISMATCH = 8
58
59
 
59
- CIGAR_STR = 'MIDNSHP=XB'
60
+ CIGAR_STR = "MIDNSHP=XB"
60
61
 
61
62
  # emulate 128-bit integers
62
63
  class MM128 < ::FFI::Struct
data/lib/minimap2/ffi.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # bit fields
4
- require 'ffi/bit_struct'
4
+ require "ffi/bit_struct"
5
5
  module Minimap2
6
6
  # Native APIs
7
7
  module FFI
@@ -21,6 +21,6 @@ module Minimap2
21
21
  end
22
22
  end
23
23
 
24
- require_relative 'ffi/constants'
25
- require_relative 'ffi/functions'
26
- require_relative 'ffi/mappy'
24
+ require_relative "ffi/constants"
25
+ require_relative "ffi/functions"
26
+ require_relative "ffi/mappy"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Minimap2
4
4
  # Minimap2-2.23 (r1111)
5
- VERSION = '0.2.23.1'
5
+ VERSION = "0.2.24.0"
6
6
  end
data/lib/minimap2.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # dependencies
4
- require 'ffi'
4
+ require "ffi"
5
5
 
6
6
  # modules
7
- require_relative 'minimap2/aligner'
8
- require_relative 'minimap2/alignment'
9
- require_relative 'minimap2/version'
7
+ require_relative "minimap2/aligner"
8
+ require_relative "minimap2/alignment"
9
+ require_relative "minimap2/version"
10
10
 
11
11
  # Minimap2 mapper for long read sequences
12
12
  # https://github.com/lh3/minimap2
@@ -19,19 +19,18 @@ module Minimap2
19
19
  attr_accessor :ffi_lib
20
20
  end
21
21
 
22
- lib_name = ::FFI.map_library_name('minimap2')
23
- self.ffi_lib = if ENV['MINIMAPDIR']
24
- File.expand_path(lib_name, ENV['MINIMAPDIR'])
22
+ lib_name = ::FFI.map_library_name("minimap2")
23
+ self.ffi_lib = if ENV["MINIMAPDIR"]
24
+ File.expand_path(lib_name, ENV["MINIMAPDIR"])
25
25
  else
26
26
  File.expand_path("../vendor/#{lib_name}", __dir__)
27
27
  end
28
28
 
29
29
  # friendlier error message
30
- autoload :FFI, 'minimap2/ffi'
30
+ autoload :FFI, "minimap2/ffi"
31
31
 
32
32
  # methods from mappy
33
33
  class << self
34
-
35
34
  # Execute minimap2 comannd with given options.
36
35
  # @overload execute(arg0,arg1,...)
37
36
  # @param [String] arg minimap2 command option.
@@ -41,27 +40,35 @@ module Minimap2
41
40
  def Minimap2.execute(*rb_argv)
42
41
  str_ptrs = []
43
42
  # First argument is the program name.
44
- str_ptrs << ::FFI::MemoryPointer.from_string('minimap2')
43
+ str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
45
44
  rb_argv.each do |arg|
46
45
  arg.to_s.split(/\s+/).each do |s|
47
46
  str_ptrs << ::FFI::MemoryPointer.from_string(s)
48
47
  end
49
48
  end
50
- strptrs << nil
49
+ str_ptrs << nil
51
50
 
52
51
  # Load all the pointers into a native memory block
53
- argv = ::FFI::MemoryPointer.new(:pointer, strptrs.length)
54
- strptrs.each_with_index do |p, i|
52
+ argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
53
+ str_ptrs.each_with_index do |p, i|
55
54
  argv[i].put_pointer(0, p)
56
55
  end
57
56
 
58
- FFI.main(strptrs.length - 1, argv)
57
+ FFI.main(str_ptrs.length - 1, argv)
58
+ end
59
+
60
+ # Get verbosity level.
61
+ # @return [Integer] verbosity level.
62
+
63
+ def verbose
64
+ FFI.mm_verbose_level(-1)
59
65
  end
60
66
 
61
67
  # Set verbosity level.
62
- # @param [Integer] level
68
+ # @param [Integer] verbosity level
69
+ # @return [Integer] verbosity level.
63
70
 
64
- def verbose(level = -1)
71
+ def verbose=(level)
65
72
  FFI.mm_verbose_level(level)
66
73
  end
67
74
 
@@ -109,11 +116,11 @@ module Minimap2
109
116
  end
110
117
 
111
118
  def fastx_next(ks, read_comment)
112
- qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
119
+ qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
113
120
  name = ks[:name][:s]
114
121
  seq = ks[:seq][:s]
115
122
  if read_comment
116
- comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
123
+ comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
117
124
  [name, seq, qual, comment]
118
125
  else
119
126
  [name, seq, qual]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: minimap2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.23.1
4
+ version: 0.2.24.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-16 00:00:00.000000000 Z
11
+ date: 2021-12-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -137,7 +137,6 @@ files:
137
137
  - ext/minimap2/tex/ngmlr.eval
138
138
  - ext/minimap2/tex/roc.gp
139
139
  - ext/minimap2/tex/snap-s3.sam.eval
140
- - ext/vendor/libminimap2.so
141
140
  - lib/minimap2.rb
142
141
  - lib/minimap2/aligner.rb
143
142
  - lib/minimap2/alignment.rb
@@ -165,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
165
164
  - !ruby/object:Gem::Version
166
165
  version: '0'
167
166
  requirements: []
168
- rubygems_version: 3.2.26
167
+ rubygems_version: 3.3.3
169
168
  signing_key:
170
169
  specification_version: 4
171
170
  summary: minimap2
Binary file