minimap2 0.2.23.1 → 0.2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5ad862a669642a3566c2f7b05c8b998130d6169575f665d7bea4a8be3467e0f
4
- data.tar.gz: 92c571043792047fc8c9fc9ad71e4cd1d6be444f5ba4e4e81a2ebee926256e3e
3
+ metadata.gz: f9729fda39fc510adf028d0aea9736ef247201d850f22cfad4821e40253ec26e
4
+ data.tar.gz: ef6d69949647da93a9a971cfe001060c18053c6075b3a3facff21c80634a835f
5
5
  SHA512:
6
- metadata.gz: 9ebe37f8cdaca0c138e39d1f3e41c100482c93f59af84b2968b72da419496bca776ed5a95f072eaa9797f708f09360cf7cdd249fe53322f2cfeae1e76bd378c7
7
- data.tar.gz: a7b56ea197b4a9948f1e3834aac97eac4e8689e14f47c3fbd59f2d8a67154fbd8bfd0ee882ba31ab7d63289bc3fe8cb58275d42230de1b7c34aca9cc18b08191
6
+ metadata.gz: 6b47829c2f5675669ab59a68e6189551c03b4286eba3259a8a2ba8ce7f9da2a5b15f5fb6dfcc01569bc49f357c6c3fa5e7cfb465286a9b7295bd76418fccb286
7
+ data.tar.gz: cbe0c08f12ba8fc92c65dc4f595d1681930091486492b379205a38b31e5514c194bcc7f9cff4d7d8b710b93c88342b9a9e0d3b1ca0a87d67f02e9b4f257001a9
data/ext/Rakefile CHANGED
@@ -1,40 +1,52 @@
1
- require 'rake'
1
+ # frozen_string_literal: true
2
2
 
3
- libsuffix = RbConfig::CONFIG['SOEXT']
4
- minimap2_dir = File.expand_path('minimap2', __dir__)
5
- target_dir = '../../vendor'
6
- target_fname = "libminimap2.#{libsuffix}"
3
+ require "rake"
4
+ require "fileutils"
5
+ require "ffi"
6
+
7
+ minimap2_dir = File.expand_path("minimap2", __dir__)
8
+ target_dir = "../../vendor"
9
+ target_fname = FFI.map_library_name("minimap2")
7
10
  target_path = File.join(target_dir, target_fname)
8
11
 
9
- task default: 'minimap2:build'
12
+ task default: ["minimap2:build", "minimap2:clean"]
10
13
 
11
14
  namespace :minimap2 do
12
- desc 'Compile Minimap2'
15
+ desc "Compile Minimap2"
13
16
  task :build do
14
17
  Dir.chdir(minimap2_dir) do
15
18
  # Add -fPIC option to Makefile
16
- sh 'git apply ../minimap2.patch'
17
- sh 'cp ../cmappy/cmappy.h ../cmappy/cmappy.c .'
18
- sh 'make'
19
- case RbConfig::CONFIG['host_os']
19
+ sh "git apply ../minimap2.patch"
20
+ sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
21
+ sh "make"
22
+ case RbConfig::CONFIG["host_os"]
20
23
  when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
21
- warn 'windows not supported'
24
+ sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
22
25
  when /darwin|mac os/
23
26
  sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o"
24
27
  else
25
- sh "cc -shared -o #{target_fname} *.o"
28
+ sh "cc -shared -o #{target_fname} -lm -lz -lpthread *.o"
26
29
  end
27
- sh 'rm cmappy.h cmappy.c'
28
- sh 'git apply -R ../minimap2.patch'
29
- sh "mkdir -p #{target_dir}"
30
+ sh "rm cmappy.h cmappy.c"
31
+ sh "git apply -R ../minimap2.patch"
32
+ FileUtils.mkdir_p(target_dir)
33
+ warn "mkdir -p #{target_dir}"
30
34
  sh "mv #{target_fname} #{target_path}"
31
35
  end
32
36
  end
33
37
 
34
- desc 'Cleanup'
38
+ desc "Clean"
35
39
  task :clean do
36
40
  Dir.chdir(minimap2_dir) do
37
- sh 'make clean'
41
+ sh "make clean"
42
+ end
43
+ end
44
+
45
+ task cleanall: [:clean]
46
+
47
+ desc "Clean all"
48
+ task :cleanall do
49
+ Dir.chdir(minimap2_dir) do
38
50
  sh "rm #{target_path}" if File.exist?(target_path)
39
51
  end
40
52
  end
data/ext/minimap2/NEWS.md CHANGED
@@ -1,3 +1,17 @@
1
+ Release 2.24-r1122 (26 December 2021)
2
+ -------------------------------------
3
+
4
+ This release improves alignment around long poorly aligned regions. Older
5
+ minimap2 may chain through such regions in rare cases which may result in
6
+ missing alignments later. The issue has become worse since the the change of
7
+ the chaining algorithm in v2.19. v2.23 implements an incomplete remedy. This
8
+ release provides a better solution with a X-drop-like heuristic and by enabling
9
+ two-bandwidth chaining in the assembly mode.
10
+
11
+ (2.24: 26 December 2021, r1122)
12
+
13
+
14
+
1
15
  Release 2.23-r1111 (18 November 2021)
2
16
  -------------------------------------
3
17
 
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
74
74
  Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
75
75
  the [release page][release] with:
76
76
  ```sh
77
- curl -L https://github.com/lh3/minimap2/releases/download/v2.23/minimap2-2.23_x64-linux.tar.bz2 | tar -jxvf -
78
- ./minimap2-2.23_x64-linux/minimap2
77
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
78
+ ./minimap2-2.24_x64-linux/minimap2
79
79
  ```
80
80
  If you want to compile from the source, you need to have a C compiler, GNU make
81
81
  and zlib development files installed. Then type `make` in the source code
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
31
31
  please follow the command lines below:
32
32
  ```sh
33
33
  # install minimap2 executables
34
- curl -L https://github.com/lh3/minimap2/releases/download/v2.23/minimap2-2.23_x64-linux.tar.bz2 | tar jxf -
35
- cp minimap2-2.23_x64-linux/{minimap2,k8,paftools.js} . # copy executables
34
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
35
+ cp minimap2-2.24_x64-linux/{minimap2,k8,paftools.js} . # copy executables
36
36
  export PATH="$PATH:"`pwd` # put the current directory on PATH
37
37
  # download example datasets
38
38
  curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
data/ext/minimap2/hit.c CHANGED
@@ -279,7 +279,7 @@ int mm_filter_strand_retained(int n_regs, mm_reg1_t *r)
279
279
  int i, k;
280
280
  for (i = k = 0; i < n_regs; ++i) {
281
281
  int p = r[i].parent;
282
- if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f) {
282
+ if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f || r[i].div < 0.01f) {
283
283
  if (k < i) r[k++] = r[i];
284
284
  else ++k;
285
285
  }
@@ -6,7 +6,25 @@
6
6
  #include "kalloc.h"
7
7
  #include "krmq.h"
8
8
 
9
- uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
9
+ static int64_t mg_chain_bk_end(int32_t max_drop, const mm128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k)
10
+ {
11
+ int64_t i = z[k].y, end_i = -1, max_i = i;
12
+ int32_t max_s = 0;
13
+ if (i < 0 || t[i] != 0) return i;
14
+ do {
15
+ int32_t s;
16
+ t[i] = 2;
17
+ end_i = i = p[i];
18
+ s = i < 0? z[k].x : (int32_t)z[k].x - f[i];
19
+ if (s > max_s) max_s = s, max_i = i;
20
+ else if (max_s - s > max_drop) break;
21
+ } while (i >= 0 && t[i] == 0);
22
+ for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[]
23
+ t[i] = 0;
24
+ return max_i;
25
+ }
26
+
27
+ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, int32_t *n_u_, int32_t *n_v_)
10
28
  {
11
29
  mm128_t *z;
12
30
  uint64_t *u;
@@ -24,26 +42,32 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
24
42
 
25
43
  memset(t, 0, n * 4);
26
44
  for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
27
- int64_t n_v0 = n_v;
28
- int32_t sc;
29
- for (i = z[k].y; i >= 0 && t[i] == 0; i = p[i])
30
- ++n_v, t[i] = 1;
31
- sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
32
- if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
33
- ++n_u;
34
- else n_v = n_v0;
45
+ if (t[z[k].y] == 0) {
46
+ int64_t n_v0 = n_v, end_i;
47
+ int32_t sc;
48
+ end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
49
+ for (i = z[k].y; i != end_i; i = p[i])
50
+ ++n_v, t[i] = 1;
51
+ sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
52
+ if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
53
+ ++n_u;
54
+ else n_v = n_v0;
55
+ }
35
56
  }
36
57
  KMALLOC(km, u, n_u);
37
58
  memset(t, 0, n * 4);
38
59
  for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
39
- int64_t n_v0 = n_v;
40
- int32_t sc;
41
- for (i = z[k].y; i >= 0 && t[i] == 0; i = p[i])
42
- v[n_v++] = i, t[i] = 1;
43
- sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
44
- if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
45
- u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
46
- else n_v = n_v0;
60
+ if (t[z[k].y] == 0) {
61
+ int64_t n_v0 = n_v, end_i;
62
+ int32_t sc;
63
+ end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
64
+ for (i = z[k].y; i != end_i; i = p[i])
65
+ v[n_v++] = i, t[i] = 1;
66
+ sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
67
+ if (sc >= min_sc && n_v > n_v0 && n_v - n_v0 >= min_cnt)
68
+ u[n_u++] = (uint64_t)sc << 32 | (n_v - n_v0);
69
+ else n_v = n_v0;
70
+ }
47
71
  }
48
72
  kfree(km, z);
49
73
  assert(n_v < INT32_MAX);
@@ -124,7 +148,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
124
148
  mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
125
149
  int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
126
150
  { // TODO: make sure this works when n has more than 32 bits
127
- int32_t *f, *t, *v, n_u, n_v, mmax_f = 0;
151
+ int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
128
152
  int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
129
153
  uint64_t *u;
130
154
 
@@ -135,6 +159,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
135
159
  }
136
160
  if (max_dist_x < bw) max_dist_x = bw;
137
161
  if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
162
+ if (is_cdna) max_drop = INT32_MAX;
138
163
  KMALLOC(km, p, n);
139
164
  KMALLOC(km, f, n);
140
165
  KMALLOC(km, v, n);
@@ -181,7 +206,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
181
206
  if (mmax_f < max_f) mmax_f = max_f;
182
207
  }
183
208
 
184
- u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
209
+ u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
185
210
  *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
186
211
  kfree(km, p); kfree(km, f); kfree(km, t);
187
212
  if (n_u == 0) {
@@ -225,7 +250,7 @@ static inline int32_t comput_sc_simple(const mm128_t *ai, const mm128_t *aj, flo
225
250
  mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
226
251
  int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
227
252
  {
228
- int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0;
253
+ int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
229
254
  int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
230
255
  uint64_t *u;
231
256
  lc_elem_t *root = 0, *root_inner = 0;
@@ -333,7 +358,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
333
358
  }
334
359
  km_destroy(mem_mp);
335
360
 
336
- u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v);
361
+ u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
337
362
  *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
338
363
  kfree(km, p); kfree(km, f); kfree(km, t);
339
364
  if (n_u == 0) {
data/ext/minimap2/main.c CHANGED
@@ -7,7 +7,7 @@
7
7
  #include "mmpriv.h"
8
8
  #include "ketopt.h"
9
9
 
10
- #define MM_VERSION "2.23-r1111"
10
+ #define MM_VERSION "2.24-r1122"
11
11
 
12
12
  #ifdef __linux__
13
13
  #include <sys/resource.h>
@@ -76,6 +76,8 @@ static ko_longopt_t long_options[] = {
76
76
  { "cap-kalloc", ko_required_argument, 349 },
77
77
  { "q-occ-frac", ko_required_argument, 350 },
78
78
  { "chain-skip-scale",ko_required_argument,351 },
79
+ { "print-chains", ko_no_argument, 352 },
80
+ { "no-hash-name", ko_no_argument, 353 },
79
81
  { "help", ko_no_argument, 'h' },
80
82
  { "max-intron-len", ko_required_argument, 'G' },
81
83
  { "version", ko_no_argument, 'V' },
@@ -233,6 +235,8 @@ int main(int argc, char *argv[])
233
235
  else if (c == 348) opt.flag |= MM_F_QSTRAND | MM_F_NO_INV; // --qstrand
234
236
  else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc
235
237
  else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
238
+ else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
239
+ else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
236
240
  else if (c == 330) {
237
241
  fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
238
242
  } else if (c == 314) { // --frag
data/ext/minimap2/map.c CHANGED
@@ -248,7 +248,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
248
248
  if (qlen_sum == 0 || n_segs <= 0 || n_segs > MM_MAX_SEG) return;
249
249
  if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return;
250
250
 
251
- hash = qname? __ac_X31_hash_string(qname) : 0;
251
+ hash = qname && !(opt->flag & MM_F_NO_HASH_NAME)? __ac_X31_hash_string(qname) : 0;
252
252
  hash ^= __ac_Wang_hash(qlen_sum) + __ac_Wang_hash(opt->seed);
253
253
  hash = __ac_Wang_hash(hash);
254
254
 
@@ -328,7 +328,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
328
328
  mm_hit_sort(b->km, &n_regs0, regs0, opt->alt_drop); // this step can be merged into mm_gen_regs(); will do if this shows up in profile
329
329
  }
330
330
 
331
- if (mm_dbg_flag & MM_DBG_PRINT_SEED)
331
+ if (mm_dbg_flag & (MM_DBG_PRINT_SEED|MM_DBG_PRINT_CHAIN))
332
332
  for (j = 0; j < n_regs0; ++j)
333
333
  for (i = regs0[j].as; i < regs0[j].as + regs0[j].cnt; ++i)
334
334
  fprintf(stderr, "CN\t%d\t%s\t%d\t%c\t%d\t%d\t%d\n", j, mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
@@ -39,6 +39,7 @@
39
39
  #define MM_F_RMQ (0x80000000LL)
40
40
  #define MM_F_QSTRAND (0x100000000LL)
41
41
  #define MM_F_NO_INV (0x200000000LL)
42
+ #define MM_F_NO_HASH_NAME (0x400000000LL)
42
43
 
43
44
  #define MM_I_HPC 0x1
44
45
  #define MM_I_NO_SEQ 0x2
@@ -1,4 +1,4 @@
1
- .TH minimap2 1 "18 November 2021" "minimap2-2.23 (r1111)" "Bioinformatics tools"
1
+ .TH minimap2 1 "18 December 2021" "minimap2-2.24 (r1122)" "Bioinformatics tools"
2
2
  .SH NAME
3
3
  .PP
4
4
  minimap2 - mapping and alignment between collections of DNA sequences
@@ -77,7 +77,7 @@ SAM format.
77
77
  Minimizer k-mer length [15]
78
78
  .TP
79
79
  .BI -w \ INT
80
- Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
80
+ Minimizer window size [10]. A minimizer is the smallest k-mer
81
81
  in a window of w consecutive k-mers.
82
82
  .TP
83
83
  .B -H
@@ -318,6 +318,9 @@ faster for short reads, but slower for long reads. [no]
318
318
  .B --no-pairing
319
319
  Treat two reads in a pair as independent reads. The mate related fields in SAM
320
320
  are still properly populated.
321
+ .TP
322
+ .B --no-hash-name
323
+ Produce the same alignment for identical sequences regardless of their sequence names.
321
324
  .SS Alignment options
322
325
  .TP 10
323
326
  .BI -A \ INT
@@ -562,7 +565,7 @@ Align older PacBio continuous long (CLR) reads to a reference genome
562
565
  .B asm5
563
566
  Long assembly to reference mapping
564
567
  .RB ( -k19
565
- .B -w19 -U50,500 --rmq -r100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
568
+ .B -w19 -U50,500 --rmq -r1k,100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
566
569
  .BR -N50 ).
567
570
  Typically, the alignment will not extend to regions with 5% or higher sequence
568
571
  divergence. Only use this preset if the average divergence is far below 5%.
@@ -570,14 +573,14 @@ divergence. Only use this preset if the average divergence is far below 5%.
570
573
  .B asm10
571
574
  Long assembly to reference mapping
572
575
  .RB ( -k19
573
- .B -w19 -U50,500 --rmq -r100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
576
+ .B -w19 -U50,500 --rmq -r1k,100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
574
577
  .BR -N50 ).
575
578
  Up to 10% sequence divergence.
576
579
  .TP
577
580
  .B asm20
578
581
  Long assembly to reference mapping
579
582
  .RB ( -k19
580
- .B -w10 -U50,500 --rmq -r100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
583
+ .B -w10 -U50,500 --rmq -r1k,100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
581
584
  .BR -N50 ).
582
585
  Up to 20% sequence divergence.
583
586
  .TP
@@ -603,7 +606,7 @@ Long-read splice alignment for PacBio CCS reads
603
606
  .B sr
604
607
  Short single-end reads without splicing
605
608
  .RB ( -k21
606
- .B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -b0 -r100 -p.5 -N20 -f1000,5000 -n2 -m20
609
+ .B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -b0 -r100 -p.5 -N20 -f1000,5000 -n2 -m25
607
610
  .B -s40 -g100 -2K50m --heap-sort=yes
608
611
  .BR --secondary=no ).
609
612
  .TP
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env k8
2
2
 
3
- var paftools_version = '2.23-r1111';
3
+ var paftools_version = '2.24-r1122';
4
4
 
5
5
  /*****************************
6
6
  ***** Library functions *****
@@ -13,6 +13,7 @@
13
13
  #define MM_DBG_PRINT_QNAME 0x2
14
14
  #define MM_DBG_PRINT_SEED 0x4
15
15
  #define MM_DBG_PRINT_ALN_SEQ 0x8
16
+ #define MM_DBG_PRINT_CHAIN 0x10
16
17
 
17
18
  #define MM_SEED_LONG_JOIN (1ULL<<40)
18
19
  #define MM_SEED_IGNORE (1ULL<<41)
@@ -74,6 +74,7 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
74
74
  if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
75
75
  opt->mid_occ = opt->max_mid_occ;
76
76
  }
77
+ if (opt->bw_long < opt->bw) opt->bw_long = opt->bw;
77
78
  if (mm_verbose >= 3)
78
79
  fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
79
80
  }
@@ -113,7 +114,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
113
114
  mo->min_dp_max = 200;
114
115
  } else if (strncmp(preset, "asm", 3) == 0) {
115
116
  io->flag = 0, io->k = 19, io->w = 19;
116
- mo->bw = mo->bw_long = 100000;
117
+ mo->bw = 1000, mo->bw_long = 100000;
117
118
  mo->max_gap = 10000;
118
119
  mo->flag |= MM_F_RMQ;
119
120
  mo->min_mid_occ = 50, mo->max_mid_occ = 500;
@@ -3,7 +3,7 @@ from libc.stdlib cimport free
3
3
  cimport cmappy
4
4
  import sys
5
5
 
6
- __version__ = '2.23'
6
+ __version__ = '2.24'
7
7
 
8
8
  cmappy.mm_reset_timer()
9
9
 
@@ -23,7 +23,7 @@ def readme():
23
23
 
24
24
  setup(
25
25
  name = 'mappy',
26
- version = '2.23',
26
+ version = '2.24',
27
27
  url = 'https://github.com/lh3/minimap2',
28
28
  description = 'Minimap2 python binding',
29
29
  long_description = readme(),
@@ -90,7 +90,7 @@ module Minimap2
90
90
  end
91
91
 
92
92
  if fn_idx_in
93
- warn 'Since fn_idx_in is specified, the seq argument will be ignored.' if seq
93
+ warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
94
94
  reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
95
95
 
96
96
  # The Ruby version raises an error here
@@ -169,13 +169,13 @@ module Minimap2
169
169
  c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
170
170
  cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
171
171
 
172
- _cs = ''
172
+ _cs = ""
173
173
  if cs
174
174
  l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
175
175
  _cs = cs_str.read_pointer.read_string(l_cs_str)
176
176
  end
177
177
 
178
- _md = ''
178
+ _md = ""
179
179
  if md
180
180
  l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
181
181
  _md = cs_str.read_pointer.read_string(l_cs_str)
@@ -204,7 +204,7 @@ module Minimap2
204
204
  lp = ::FFI::MemoryPointer.new(:int)
205
205
  s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
206
206
  l = lp.read_int
207
- return nil if l.zero?
207
+ return nil if l == 0
208
208
 
209
209
  s.read_string(l)
210
210
  end
@@ -89,20 +89,20 @@ module Minimap2
89
89
  # Convert to the PAF format without the QueryName and QueryLength columns.
90
90
 
91
91
  def to_s
92
- strand = if @strand.positive?
93
- '+'
94
- elsif @strand.negative?
95
- '-'
92
+ strand = if @strand > 0
93
+ "+"
94
+ elsif @strand < 0
95
+ "-"
96
96
  else
97
- '?'
97
+ "?"
98
98
  end
99
- tp = @primary != 0 ? 'tp:A:P' : 'tp:A:S'
100
- ts = if @trans_strand.positive?
101
- 'ts:A:+'
102
- elsif @trans_strand.negative?
103
- 'ts:A:-'
99
+ tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
100
+ ts = if @trans_strand > 0
101
+ "ts:A:+"
102
+ elsif @trans_strand < 0
103
+ "ts:A:-"
104
104
  else
105
- 'ts:A:.'
105
+ "ts:A:."
106
106
  end
107
107
  a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
108
108
  @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
@@ -36,7 +36,8 @@ module Minimap2
36
36
  SAM_HIT_ONLY = 0x40000000
37
37
  RMQ = 0x80000000 # LL
38
38
  QSTRAND = 0x100000000 # LL
39
- NO_INV = 0x200000000
39
+ NO_INV = 0x200000000 # LL
40
+ NO_HASH_NAME = 0x400000000 # LL
40
41
 
41
42
  HPC = 0x1
42
43
  NO_SEQ = 0x2
@@ -56,7 +57,7 @@ module Minimap2
56
57
  CIGAR_EQ_MATCH = 7
57
58
  CIGAR_X_MISMATCH = 8
58
59
 
59
- CIGAR_STR = 'MIDNSHP=XB'
60
+ CIGAR_STR = "MIDNSHP=XB"
60
61
 
61
62
  # emulate 128-bit integers
62
63
  class MM128 < ::FFI::Struct
data/lib/minimap2/ffi.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # bit fields
4
- require 'ffi/bit_struct'
4
+ require "ffi/bit_struct"
5
5
  module Minimap2
6
6
  # Native APIs
7
7
  module FFI
@@ -21,6 +21,6 @@ module Minimap2
21
21
  end
22
22
  end
23
23
 
24
- require_relative 'ffi/constants'
25
- require_relative 'ffi/functions'
26
- require_relative 'ffi/mappy'
24
+ require_relative "ffi/constants"
25
+ require_relative "ffi/functions"
26
+ require_relative "ffi/mappy"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Minimap2
4
4
  # Minimap2-2.23 (r1111)
5
- VERSION = '0.2.23.1'
5
+ VERSION = "0.2.24.0"
6
6
  end
data/lib/minimap2.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # dependencies
4
- require 'ffi'
4
+ require "ffi"
5
5
 
6
6
  # modules
7
- require_relative 'minimap2/aligner'
8
- require_relative 'minimap2/alignment'
9
- require_relative 'minimap2/version'
7
+ require_relative "minimap2/aligner"
8
+ require_relative "minimap2/alignment"
9
+ require_relative "minimap2/version"
10
10
 
11
11
  # Minimap2 mapper for long read sequences
12
12
  # https://github.com/lh3/minimap2
@@ -19,19 +19,18 @@ module Minimap2
19
19
  attr_accessor :ffi_lib
20
20
  end
21
21
 
22
- lib_name = ::FFI.map_library_name('minimap2')
23
- self.ffi_lib = if ENV['MINIMAPDIR']
24
- File.expand_path(lib_name, ENV['MINIMAPDIR'])
22
+ lib_name = ::FFI.map_library_name("minimap2")
23
+ self.ffi_lib = if ENV["MINIMAPDIR"]
24
+ File.expand_path(lib_name, ENV["MINIMAPDIR"])
25
25
  else
26
26
  File.expand_path("../vendor/#{lib_name}", __dir__)
27
27
  end
28
28
 
29
29
  # friendlier error message
30
- autoload :FFI, 'minimap2/ffi'
30
+ autoload :FFI, "minimap2/ffi"
31
31
 
32
32
  # methods from mappy
33
33
  class << self
34
-
35
34
  # Execute minimap2 comannd with given options.
36
35
  # @overload execute(arg0,arg1,...)
37
36
  # @param [String] arg minimap2 command option.
@@ -41,27 +40,35 @@ module Minimap2
41
40
  def Minimap2.execute(*rb_argv)
42
41
  str_ptrs = []
43
42
  # First argument is the program name.
44
- str_ptrs << ::FFI::MemoryPointer.from_string('minimap2')
43
+ str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
45
44
  rb_argv.each do |arg|
46
45
  arg.to_s.split(/\s+/).each do |s|
47
46
  str_ptrs << ::FFI::MemoryPointer.from_string(s)
48
47
  end
49
48
  end
50
- strptrs << nil
49
+ str_ptrs << nil
51
50
 
52
51
  # Load all the pointers into a native memory block
53
- argv = ::FFI::MemoryPointer.new(:pointer, strptrs.length)
54
- strptrs.each_with_index do |p, i|
52
+ argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
53
+ str_ptrs.each_with_index do |p, i|
55
54
  argv[i].put_pointer(0, p)
56
55
  end
57
56
 
58
- FFI.main(strptrs.length - 1, argv)
57
+ FFI.main(str_ptrs.length - 1, argv)
58
+ end
59
+
60
+ # Get verbosity level.
61
+ # @return [Integer] verbosity level.
62
+
63
+ def verbose
64
+ FFI.mm_verbose_level(-1)
59
65
  end
60
66
 
61
67
  # Set verbosity level.
62
- # @param [Integer] level
68
+ # @param [Integer] verbosity level
69
+ # @return [Integer] verbosity level.
63
70
 
64
- def verbose(level = -1)
71
+ def verbose=(level)
65
72
  FFI.mm_verbose_level(level)
66
73
  end
67
74
 
@@ -109,11 +116,11 @@ module Minimap2
109
116
  end
110
117
 
111
118
  def fastx_next(ks, read_comment)
112
- qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
119
+ qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
113
120
  name = ks[:name][:s]
114
121
  seq = ks[:seq][:s]
115
122
  if read_comment
116
- comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
123
+ comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
117
124
  [name, seq, qual, comment]
118
125
  else
119
126
  [name, seq, qual]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: minimap2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.23.1
4
+ version: 0.2.24.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-16 00:00:00.000000000 Z
11
+ date: 2021-12-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -137,7 +137,6 @@ files:
137
137
  - ext/minimap2/tex/ngmlr.eval
138
138
  - ext/minimap2/tex/roc.gp
139
139
  - ext/minimap2/tex/snap-s3.sam.eval
140
- - ext/vendor/libminimap2.so
141
140
  - lib/minimap2.rb
142
141
  - lib/minimap2/aligner.rb
143
142
  - lib/minimap2/alignment.rb
@@ -165,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
165
164
  - !ruby/object:Gem::Version
166
165
  version: '0'
167
166
  requirements: []
168
- rubygems_version: 3.2.26
167
+ rubygems_version: 3.3.3
169
168
  signing_key:
170
169
  specification_version: 4
171
170
  summary: minimap2
Binary file