minimap2 0.2.30.3 → 0.2.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a3bf89a18a50825f14454ce8683dd019377fc45e59a4d46eee617a55d0e64e8
4
- data.tar.gz: c1dd61ced844c50b7aec1ba1f9da8d8b47ab3b94a1e2f4b0b4bcdd16169e4f0c
3
+ metadata.gz: b27519808e7302696fd897b42041bc6f6f11ac114bc9654a91c07c59f5284813
4
+ data.tar.gz: 8adb8dd6b933029ce21b66269750dc1017af4c5e3f292fb7f3da65c7bd25eae7
5
5
  SHA512:
6
- metadata.gz: 198ae12116da51051dcac0665a85611a2d5bbe5df49ff854fff02c1f686f6c5fd7d40378876de81542d8ca9fe6fb8d71c0c59cde53497368ae4fc83b06a4c685
7
- data.tar.gz: 1226cb572e95c805d3817bcaf5048fcc6a3bb217e7ac86b56f866b132c3ffe9ed5f8a1ba8e67b1bb798a1c2852e060f611229ccae9364b477a9863366717ce5c
6
+ metadata.gz: 30ac518def61ad07b388acb1a20dd4857cd7de124c3ca7eb942049834a6927763e2c31e8f9da12457eae786fad92a8153a28cb711f027124521721b7147eb4b0
7
+ data.tar.gz: 9cf1e73fb89b42a55630b049a9dd0a8612c68910f3b02227c2a19a2b0b57963497f7dce1e78dc0f13eafc69967f21eb2e5526808c4bda4d02f9f1bc6a57a57ef
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![test](https://github.com/kojix2/ruby-minimap2/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/ruby-minimap2/actions/workflows/ci.yml)
5
5
  [![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://kojix2.github.io/ruby-minimap2/)
6
6
  [![The MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE.txt)
7
- [![DOI](https://zenodo.org/badge/325711305.svg)](https://zenodo.org/badge/latestdoi/325711305)
7
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18489817.svg)](https://doi.org/10.5281/zenodo.18489817)
8
8
  [![Lines of Code](https://img.shields.io/endpoint?url=https%3A%2F%2Ftokei.kojix2.net%2Fbadge%2Fgithub%2Fkojix2%2Fruby-minimap2%2Flines)](https://tokei.kojix2.net/github/kojix2/ruby-minimap2)
9
9
 
10
10
  :dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
@@ -43,7 +43,7 @@ require "minimap2"
43
43
 
44
44
  aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa")
45
45
  seq = aligner.seq("MT_human", 100, 200)
46
- hits = aligner.align(seq)
46
+ hits = aligner.align(seq, cs: true, ds: true)
47
47
  pp hits
48
48
  ```
49
49
 
@@ -52,9 +52,10 @@ pp hits
52
52
  @blen=100,
53
53
  @cigar=[[100, 0]],
54
54
  @cigar_str="100M",
55
- @cs="",
55
+ @cs=":100",
56
56
  @ctg="MT_human",
57
57
  @ctg_len=16569,
58
+ @ds=":100",
58
59
  @mapq=60,
59
60
  @md="",
60
61
  @mlen=100,
@@ -105,6 +106,7 @@ pp hits
105
106
  - cigar Returns CIGAR returned as an array of shape (n_cigar,2). The two numbers give the length and the operator of each CIGAR operation.
106
107
  - read_num Returns read number that the alignment corresponds to; 1 for the first read and 2 for the second read.
107
108
  - cs Returns the cs tag.
109
+ - ds Returns the ds tag.
108
110
  - md Returns the MD tag as in the SAM format. It is an empty string unless the md argument is applied when calling Aligner#align.
109
111
  - cigar_str Returns CIGAR string.
110
112
  * methods
data/ext/Rakefile CHANGED
@@ -16,6 +16,9 @@ namespace :minimap2 do
16
16
  task :build do
17
17
  Dir.chdir(minimap2_dir) do
18
18
  # Add -fPIC option to Makefile
19
+ unless system("git", "--version", out: File::NULL, err: File::NULL)
20
+ abort "Git is required to build ruby-minimap2 from source. Install Git and ensure `git` is on PATH."
21
+ end
19
22
  sh "git apply ../minimap2.patch"
20
23
  sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
21
24
  case RbConfig::CONFIG["host_cpu"]
@@ -102,7 +102,7 @@ ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
102
102
  # other non-file targets
103
103
 
104
104
  clean:
105
- rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
105
+ rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg* .eggs
106
106
 
107
107
  depend:
108
108
  (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
data/ext/minimap2/NEWS.md CHANGED
@@ -1,3 +1,36 @@
1
+ Release 2.31-r1302 (19 May 2026)
2
+ --------------------------------
3
+
4
+ Notable changes to minimap2:
5
+
6
+ * Bugfix: supplementary and secondary alignments were occasionally flagged
7
+ incorrectly.
8
+
9
+ * Bugfix: Smith-Waterman alignment for inversion alignment led to an
10
+ out-of-bound access in rare cases.
11
+
12
+ Changes to paftools.js:
13
+
14
+ * New feature: new `sim2bed` subcommand to get a BED file from simulated
15
+ reads.
16
+
17
+ * New feature: new `badread2fa` subcommand to format reads simulated by
18
+ the Badread simulator.
19
+
20
+ Change to the python binding:
21
+
22
+ * New feature: mappy optionally writes the `ds` tag.
23
+
24
+ * Bugfix: a use-after-free error (#1345)
25
+
26
+ The two bugs in minimap2 had existed for years. They were caught by Jeremy Wang
27
+ at UNC when he ported minimap2 to Rust. Due to the two bug fixes, this version
28
+ occasionally produces alignment different from the last version.
29
+
30
+ (2.31: 19 May 2026, r1302)
31
+
32
+
33
+
1
34
  Release 2.30-r1287 (15 June 2025)
2
35
  ---------------------------------
3
36
 
@@ -3,6 +3,7 @@
3
3
  [![PyPI](https://img.shields.io/pypi/v/mappy.svg?style=flat)](https://pypi.python.org/pypi/mappy)
4
4
  [![Build Status](https://github.com/lh3/minimap2/actions/workflows/ci.yaml/badge.svg)](https://github.com/lh3/minimap2/actions)
5
5
  ## <a name="started"></a>Getting Started
6
+ **ALERT:** `minimap2.com` is a [phishing site](https://github.com/lh3/minimap2/issues/1316). Please don't use anything from that website.
6
7
  ```sh
7
8
  git clone https://github.com/lh3/minimap2
8
9
  cd minimap2 && make
@@ -77,8 +78,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
77
78
  Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
78
79
  the [release page][release] with:
79
80
  ```sh
80
- curl -L https://github.com/lh3/minimap2/releases/download/v2.30/minimap2-2.30_x64-linux.tar.bz2 | tar -jxvf -
81
- ./minimap2-2.30_x64-linux/minimap2
81
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.31/minimap2-2.31_x64-linux.tar.bz2 | tar -jxvf -
82
+ ./minimap2-2.31_x64-linux/minimap2
82
83
  ```
83
84
  If you want to compile from the source, you need to have a C compiler, GNU make
84
85
  and zlib development files installed. Then type `make` in the source code
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
31
31
  please follow the command lines below:
32
32
  ```sh
33
33
  # install minimap2 executables
34
- curl -L https://github.com/lh3/minimap2/releases/download/v2.30/minimap2-2.30_x64-linux.tar.bz2 | tar jxf -
35
- cp minimap2-2.30_x64-linux/{minimap2,k8,paftools.js} . # copy executables
34
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.31/minimap2-2.31_x64-linux.tar.bz2 | tar jxf -
35
+ cp minimap2-2.31_x64-linux/{minimap2,k8,paftools.js} . # copy executables
36
36
  export PATH="$PATH:"`pwd` # put the current directory on PATH
37
37
  # download example datasets
38
38
  curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
@@ -361,24 +361,34 @@ static void write_cs_ds_or_MD(void *km, kstring_t *s, const mm_idx_t *mi, const
361
361
  kfree(km, qseq); kfree(km, tseq); kfree(km, tmp);
362
362
  }
363
363
 
364
- int mm_gen_cs_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int no_iden, int is_qstrand)
364
+ int mm_gen_cs_ds_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int is_ds, int no_iden, int is_qstrand)
365
365
  {
366
366
  mm_bseq1_t t;
367
367
  kstring_t str;
368
368
  str.s = *buf, str.l = 0, str.m = *max_len;
369
369
  t.l_seq = strlen(seq);
370
370
  t.seq = (char*)seq;
371
- write_cs_ds_or_MD(km, &str, mi, &t, r, no_iden, is_MD, 0, 0, is_qstrand);
371
+ write_cs_ds_or_MD(km, &str, mi, &t, r, no_iden, is_MD, is_ds, 0, is_qstrand);
372
372
  *max_len = str.m;
373
373
  *buf = str.s;
374
374
  return str.l;
375
375
  }
376
376
 
377
+ int mm_gen_cs_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int no_iden, int is_qstrand)
378
+ {
379
+ return mm_gen_cs_ds_or_MD(km, buf, max_len, mi, r, seq, is_MD, 0, no_iden, is_qstrand);
380
+ }
381
+
377
382
  int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
378
383
  {
379
384
  return mm_gen_cs_or_MD(km, buf, max_len, mi, r, seq, 0, no_iden, 0);
380
385
  }
381
386
 
387
+ int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
388
+ {
389
+ return mm_gen_cs_ds_or_MD(km, buf, max_len, mi, r, seq, 0, 1, no_iden, 0);
390
+ }
391
+
382
392
  int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
383
393
  {
384
394
  return mm_gen_cs_or_MD(km, buf, max_len, mi, r, seq, 1, 0, 0);
data/ext/minimap2/hit.c CHANGED
@@ -256,19 +256,25 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int chec
256
256
  {
257
257
  if (pri_ratio > 0.0f && *n_ > 0) {
258
258
  int i, k, n = *n_, n_2nd = 0;
259
- for (i = k = 0; i < n; ++i) {
259
+ uint8_t *keep = (uint8_t*)kmalloc(km, n);
260
+ for (i = 0; i < n; ++i) {
260
261
  int p = r[i].parent;
262
+ keep[i] = 0;
261
263
  if (p == i || r[i].inv) { // primary or inversion
262
- r[k++] = r[i];
264
+ keep[i] = 1;
263
265
  } else if ((r[i].score >= r[p].score * pri_ratio || r[i].score + min_diff >= r[p].score) && n_2nd < best_n) {
264
266
  if (!(r[i].qs == r[p].qs && r[i].qe == r[p].qe && r[i].rid == r[p].rid && r[i].rs == r[p].rs && r[i].re == r[p].re)) // not identical hits
265
- r[k++] = r[i], ++n_2nd;
266
- else if (r[i].p) free(r[i].p);
267
+ keep[i] = 1, ++n_2nd;
267
268
  } else if (check_strand && n_2nd < best_n && r[i].score > min_strand_sc && r[i].rev != r[p].rev) {
268
269
  r[i].strand_retained = 1;
269
- r[k++] = r[i], ++n_2nd;
270
- } else if (r[i].p) free(r[i].p);
270
+ keep[i] = 1, ++n_2nd;
271
+ }
272
+ }
273
+ for (i = k = 0; i < n; ++i) {
274
+ if (keep[i]) r[k++] = r[i];
275
+ else if (r[i].p) free(r[i].p);
271
276
  }
277
+ kfree(km, keep);
272
278
  if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
273
279
  *n_ = k;
274
280
  }
@@ -277,13 +283,18 @@ void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int chec
277
283
  int mm_filter_strand_retained(int n_regs, mm_reg1_t *r)
278
284
  {
279
285
  int i, k;
280
- for (i = k = 0; i < n_regs; ++i) {
286
+ uint8_t *keep = (uint8_t*)malloc(n_regs);
287
+ for (i = 0; i < n_regs; ++i) {
281
288
  int p = r[i].parent;
282
- if (!r[i].strand_retained || r[i].div < r[p].div * 5.0f || r[i].div < 0.01f) {
289
+ keep[i] = (!r[i].strand_retained || r[i].div < r[p].div * 5.0f || r[i].div < 0.01f);
290
+ }
291
+ for (i = k = 0; i < n_regs; ++i) {
292
+ if (keep[i]) {
283
293
  if (k < i) r[k++] = r[i];
284
294
  else ++k;
285
295
  }
286
296
  }
297
+ free(keep);
287
298
  return k;
288
299
  }
289
300
 
@@ -67,7 +67,7 @@ void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, co
67
67
  const int8_t *ma = mat + a * m;
68
68
  for (i = 0; i < slen; ++i)
69
69
  for (k = i; k < nlen; k += slen) // p iterations
70
- *t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
70
+ *t++ = (k >= qlen? -1 : ma[query[k]]) + q->shift;
71
71
  }
72
72
  } else {
73
73
  int16_t *t = (int16_t*)q->qp;
@@ -76,7 +76,7 @@ void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, co
76
76
  const int8_t *ma = mat + a * m;
77
77
  for (i = 0; i < slen; ++i)
78
78
  for (k = i; k < nlen; k += slen) // p iterations
79
- *t++ = (k >= qlen? 0 : ma[query[k]]);
79
+ *t++ = (k >= qlen? -1 : ma[query[k]]);
80
80
  }
81
81
  }
82
82
  return q;
@@ -5,7 +5,7 @@
5
5
  #include <stdio.h>
6
6
  #include <sys/types.h>
7
7
 
8
- #define MM_VERSION "2.30-r1287"
8
+ #define MM_VERSION "2.31-r1302"
9
9
 
10
10
  #define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
11
11
  #define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
@@ -408,6 +408,7 @@ int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_
408
408
  * @return the length of cs
409
409
  */
410
410
  int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
411
+ int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
411
412
  int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
412
413
 
413
414
  // query sequence name and sequence in the minimap2 index
@@ -1,4 +1,4 @@
1
- .TH minimap2 1 "15 June 2025" "minimap2-2.30 (r1287)" "Bioinformatics tools"
1
+ .TH minimap2 1 "19 May 2026" "minimap2-2.31 (r1302)" "Bioinformatics tools"
2
2
  .SH NAME
3
3
  .PP
4
4
  minimap2 - mapping and alignment between collections of DNA sequences
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env k8
2
2
 
3
- var paftools_version = '2.30-r1287';
3
+ var paftools_version = '2.31-r1302';
4
4
 
5
5
  /*****************************
6
6
  ***** Library functions *****
@@ -1740,10 +1740,11 @@ function paf_gff2bed(args)
1740
1740
 
1741
1741
  function paf_sam2paf(args)
1742
1742
  {
1743
- var c, pri_only = false, long_cs = false, pri_pri_only = false;
1744
- while ((c = getopt(args, "pPL")) != null) {
1743
+ var c, pri_only = false, long_cs = false, pri_pri_only = false, allow_unmapped = false;
1744
+ while ((c = getopt(args, "pPUL")) != null) {
1745
1745
  if (c == 'p') pri_only = true;
1746
1746
  else if (c == 'P') pri_pri_only = pri_only = true;
1747
+ else if (c == 'U') allow_unmapped = true;
1747
1748
  else if (c == 'L') long_cs = true;
1748
1749
  }
1749
1750
  if (args.length == getopt.ind) {
@@ -1751,6 +1752,7 @@ function paf_sam2paf(args)
1751
1752
  print("Options:");
1752
1753
  print(" -p convert primary or supplementary alignments only");
1753
1754
  print(" -P convert primary alignments only");
1755
+ print(" -U convert unmapped reads as well");
1754
1756
  print(" -L output the cs tag in the long form");
1755
1757
  exit(1);
1756
1758
  }
@@ -1775,7 +1777,15 @@ function paf_sam2paf(args)
1775
1777
  var flag = parseInt(t[1]);
1776
1778
  if (t[9] != '*' && t[10] != '*' && t[9].length != t[10].length)
1777
1779
  throw Error("at line " + lineno + ": inconsistent SEQ and QUAL lengths - " + t[9].length + " != " + t[10].length);
1778
- if (t[2] == '*' || (flag&4) || t[5] == '*') continue;
1780
+ if (t[2] == '*' || (flag&4) || t[5] == '*') {
1781
+ if (allow_unmapped) {
1782
+ // emit an unmapped PAF line instead of skipping
1783
+ // fields: qname, qlen, qstart, qend, strand, tname, tlen, tstart, tend, n_match, aln_len, mapq
1784
+ var qlen_val = (t[9] == '*' ? 0 : t[9].length);
1785
+ print([t[0], qlen_val, 0, 0, '*', '*', 0, 0, 0, 0, 0, 0].join("\t"));
1786
+ }
1787
+ continue;
1788
+ }
1779
1789
  if (pri_only && (flag&0x100)) continue;
1780
1790
  if (pri_pri_only && (flag&0x900)) continue;
1781
1791
  var tlen = ctg_len[t[2]];
@@ -2338,6 +2348,41 @@ function paf_mason2fq(args)
2338
2348
  buf2.destroy();
2339
2349
  }
2340
2350
 
2351
+ // convert Mason read names to BED
2352
+ function paf_sim2bed(args)
2353
+ {
2354
+ if (args.length == 0) {
2355
+ print("Usage: paftools.js sim2bed <sim.txt>");
2356
+ exit(1);
2357
+ }
2358
+ var buf = new Bytes();
2359
+ var file = new File(args[0]);
2360
+ while (file.readline(buf) >= 0) {
2361
+ var line = buf.toString();
2362
+ var t = line.split("!");
2363
+ if (t.length < 5) continue;
2364
+ var chr = t[1], st, en, strand;
2365
+ if (t[2].indexOf("_") >= 0) { // mason paired-end
2366
+ var pos = t[2].split("_");
2367
+ var end = t[3].split("_");
2368
+ var m = /^(.)(.)\/([12])$/.exec(t[4]);
2369
+ if (m == null) continue;
2370
+ strand = m[3] == "1" ? m[1] : m[2];
2371
+ var read_no = parseInt(m[3]) - 1;
2372
+ st = parseInt(pos[read_no]);
2373
+ en = parseInt(end[read_no]);
2374
+ } else { // badread/pbsim long reads
2375
+ st = parseInt(t[2]);
2376
+ en = parseInt(t[3]);
2377
+ strand = t[4];
2378
+ }
2379
+ if (st > en) { var tmp = st; st = en; en = tmp; }
2380
+ print([chr, st, en, line, 0, strand].join("\t"));
2381
+ }
2382
+ file.close();
2383
+ buf.destroy();
2384
+ }
2385
+
2341
2386
  // convert pbsim MAF to FASTQ
2342
2387
  function paf_pbsim2fq(args)
2343
2388
  {
@@ -2395,6 +2440,53 @@ function paf_pbsim2fq(args)
2395
2440
  buf2.destroy();
2396
2441
  }
2397
2442
 
2443
+ function paf_badread2fa(args)
2444
+ {
2445
+ if (args.length < 2) {
2446
+ print("Usage: paftools.js badread2fa <ref.fa.fai> <badread.fq>");
2447
+ exit(1);
2448
+ }
2449
+
2450
+ var len = {}, file, buf = new Bytes();
2451
+ file = new File(args[0]);
2452
+ while (file.readline(buf) >= 0) {
2453
+ var t = buf.toString().split("\t");
2454
+ len[t[0]] = parseInt(t[1]);
2455
+ }
2456
+ file.close();
2457
+
2458
+ var id = 0, n_discard = 0;
2459
+ file = new File(args[1]);
2460
+ while (file.readline(buf) >= 0) {
2461
+ var line = buf.toString();
2462
+ var m, tag = '', a = null, is_fq = line[0] == '@'? true : false;
2463
+ if (!/\schimera\s/.test(line) && (m = /\s(\S+),([+-])strand,(\d+)-(\d+).*read_identity=([0-9\.]+)%/.exec(line)) != null) {
2464
+ if (len[m[1]] == null) throw Error("failed to find the contig length of " + m[1]);
2465
+ m[3] = parseInt(m[3]);
2466
+ m[4] = parseInt(m[4]);
2467
+ if (m[2] == '+')
2468
+ a = [ "S" + (id+1), m[1], m[3], m[4], m[2] ];
2469
+ else
2470
+ a = [ "S" + (id+1), m[1], len[m[1]] - m[4], len[m[1]] - m[3], m[2] ];
2471
+ tag = "ri:f:" + m[5];
2472
+ }
2473
+ file.readline(buf);
2474
+ var seq = buf.toString();
2475
+ if (is_fq) {
2476
+ file.readline(buf);
2477
+ file.readline(buf);
2478
+ }
2479
+ if (a != null) {
2480
+ print(">" + a.join("!"), tag);
2481
+ print(seq);
2482
+ } else ++n_discard;
2483
+ ++id;
2484
+ }
2485
+ file.close();
2486
+ buf.destroy();
2487
+ warn("WARNING: discarded " + n_discard + " reads");
2488
+ }
2489
+
2398
2490
  function paf_junceval(args)
2399
2491
  {
2400
2492
  var c, l_fuzzy = 0, print_ovlp = false, print_err_only = false, first_only = false, chr_only = false, aa = false, is_bed = false;
@@ -3693,7 +3785,9 @@ function main(args)
3693
3785
  print(" mapeval evaluate mapping accuracy using mason2/PBSIM-simulated FASTQ");
3694
3786
  print(" pafcmp compare two PAF files");
3695
3787
  print(" mason2fq convert mason2-simulated SAM to FASTQ");
3788
+ print(" sim2bed convert mason2-simulated read names to BED");
3696
3789
  print(" pbsim2fq convert PBSIM-simulated MAF to FASTQ");
3790
+ print(" badread2fa convert Baderead FASTQ to FASTA");
3697
3791
  print(" junceval evaluate splice junction consistency with known annotations");
3698
3792
  print(" exoneval evaluate exon-level consistency with known annotations");
3699
3793
  print(" ov-eval evaluate read overlap sensitivity using read-to-ref mapping");
@@ -3718,7 +3812,9 @@ function main(args)
3718
3812
  else if (cmd == 'pafcmp') paf_pafcmp(args);
3719
3813
  else if (cmd == 'bedcov') paf_bedcov(args);
3720
3814
  else if (cmd == 'mason2fq') paf_mason2fq(args);
3815
+ else if (cmd == 'sim2bed') paf_sim2bed(args);
3721
3816
  else if (cmd == 'pbsim2fq') paf_pbsim2fq(args);
3817
+ else if (cmd == 'badread2fa') paf_badread2fa(args);
3722
3818
  else if (cmd == 'junceval') paf_junceval(args);
3723
3819
  else if (cmd == 'exoneval') paf_exoneval(args);
3724
3820
  else if (cmd == 'ov-eval') paf_ov_eval(args);
data/ext/minimap2/pe.c CHANGED
@@ -8,7 +8,8 @@ void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int
8
8
  if (pri_ratio > 0.0f && *n_ > 0) {
9
9
  int i, k, n = *n_, n_2nd = 0;
10
10
  int max_dist = n_segs == 2? qlens[0] + qlens[1] + max_gap_ref : 0;
11
- for (i = k = 0; i < n; ++i) {
11
+ uint8_t *keep = (uint8_t*)kmalloc(km, n);
12
+ for (i = 0; i < n; ++i) {
12
13
  int to_keep = 0;
13
14
  if (r[i].parent == i) { // primary
14
15
  to_keep = 1;
@@ -34,9 +35,13 @@ void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int
34
35
  if (to_keep && r[i].parent != i) {
35
36
  if (n_2nd++ >= best_n) to_keep = 0; // don't keep if there are too many secondary hits
36
37
  }
37
- if (to_keep) r[k++] = r[i];
38
+ keep[i] = to_keep;
39
+ }
40
+ for (i = k = 0; i < n; ++i) {
41
+ if (keep[i]) r[k++] = r[i];
38
42
  else if (r[i].p) free(r[i].p);
39
43
  }
44
+ kfree(km, keep);
40
45
  if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
41
46
  *n_ = k;
42
47
  }
@@ -112,6 +112,7 @@ cdef extern from "minimap.h":
112
112
  void mm_tbuf_destroy(mm_tbuf_t *b)
113
113
  void *mm_tbuf_get_km(mm_tbuf_t *b)
114
114
  int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
115
+ int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
115
116
  int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
116
117
 
117
118
  #
@@ -3,7 +3,7 @@ from libc.stdlib cimport free
3
3
  cimport cmappy
4
4
  import sys
5
5
 
6
- __version__ = '2.30'
6
+ __version__ = '2.31'
7
7
 
8
8
  cmappy.mm_reset_timer()
9
9
 
@@ -14,9 +14,9 @@ cdef class Alignment:
14
14
  cdef int8_t _strand, _trans_strand
15
15
  cdef uint8_t _mapq, _is_primary
16
16
  cdef int _seg_id
17
- cdef _ctg, _cigar, _cs, _MD # these are python objects
17
+ cdef _ctg, _cigar, _cs, _ds, _MD # these are python objects
18
18
 
19
- def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id, cs_str, MD_str):
19
+ def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id, cs_str, ds_str, MD_str):
20
20
  self._ctg = ctg if isinstance(ctg, str) else ctg.decode()
21
21
  self._ctg_len, self._r_st, self._r_en = cl, cs, ce
22
22
  self._strand, self._q_st, self._q_en = strand, qs, qe
@@ -27,6 +27,7 @@ cdef class Alignment:
27
27
  self._trans_strand = trans_strand
28
28
  self._seg_id = seg_id
29
29
  self._cs = cs_str
30
+ self._ds = ds_str
30
31
  self._MD = MD_str
31
32
 
32
33
  @property
@@ -77,6 +78,9 @@ cdef class Alignment:
77
78
  @property
78
79
  def cs(self): return self._cs
79
80
 
81
+ @property
82
+ def ds(self): return self._ds
83
+
80
84
  @property
81
85
  def MD(self): return self._MD
82
86
 
@@ -96,6 +100,7 @@ cdef class Alignment:
96
100
  a = [str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
97
101
  str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str]
98
102
  if self._cs != "": a.append("cs:Z:" + self._cs)
103
+ if self._ds != "": a.append("ds:Z:" + self._ds)
99
104
  if self._MD != "": a.append("MD:Z:" + self._MD)
100
105
  return "\t".join(a)
101
106
 
@@ -165,7 +170,7 @@ cdef class Aligner:
165
170
  def __bool__(self):
166
171
  return (self._idx != NULL)
167
172
 
168
- def map(self, seq, seq2=None, name=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None):
173
+ def map(self, seq, seq2=None, name=None, buf=None, cs=False, ds=False, MD=False, max_frag_len=None, extra_flags=None):
169
174
  cdef cmappy.mm_reg1_t *regs
170
175
  cdef cmappy.mm_hitpy_t h
171
176
  cdef ThreadBuffer b
@@ -184,7 +189,6 @@ cdef class Aligner:
184
189
  if self._idx is NULL: return None
185
190
  if buf is None: b = ThreadBuffer()
186
191
  else: b = buf
187
- km = cmappy.mm_tbuf_get_km(b._b)
188
192
 
189
193
  _seq = seq if isinstance(seq, bytes) else seq.encode()
190
194
  if name is not None:
@@ -206,19 +210,23 @@ cdef class Aligner:
206
210
  i = 0
207
211
  while i < n_regs:
208
212
  cmappy.mm_reg2hitpy(self._idx, &regs[i], &h)
209
- cigar, _cs, _MD = [], '', ''
213
+ cigar, _cs, _ds, _MD = [], '', '', ''
210
214
  for k in range(h.n_cigar32): # convert the 32-bit CIGAR encoding to Python array
211
215
  c = h.cigar32[k]
212
216
  cigar.append([c>>4, c&0xf])
213
- if cs or MD: # generate the cs and/or the MD tag, if requested
217
+ if cs or ds or MD: # generate the cs/ds and/or the MD tag, if requested
218
+ km = cmappy.mm_tbuf_get_km(b._b)
214
219
  _cur_seq = _seq2 if h.seg_id > 0 and seq2 is not None else _seq
215
220
  if cs:
216
221
  l_cs_str = cmappy.mm_gen_cs(km, &cs_str, &m_cs_str, self._idx, &regs[i], _cur_seq, 1)
217
222
  _cs = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
223
+ if ds:
224
+ l_cs_str = cmappy.mm_gen_ds(km, &cs_str, &m_cs_str, self._idx, &regs[i], _cur_seq, 1)
225
+ _ds = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
218
226
  if MD:
219
227
  l_cs_str = cmappy.mm_gen_MD(km, &cs_str, &m_cs_str, self._idx, &regs[i], _cur_seq)
220
228
  _MD = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
221
- yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _MD)
229
+ yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _ds, _MD)
222
230
  cmappy.mm_free_reg1(&regs[i])
223
231
  i += 1
224
232
  finally:
@@ -5,7 +5,7 @@ import getopt
5
5
  import mappy as mp
6
6
 
7
7
  def main(argv):
8
- opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:cM")
8
+ opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:cdM")
9
9
  if len(args) < 2:
10
10
  print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>")
11
11
  print("Options:")
@@ -16,11 +16,12 @@ def main(argv):
16
16
  print(" -w INT minimizer window length")
17
17
  print(" -r INT band width")
18
18
  print(" -c output the cs tag")
19
+ print(" -d output the ds tag")
19
20
  print(" -M output the MD tag")
20
21
  sys.exit(1)
21
22
 
22
23
  preset = min_cnt = min_sc = k = w = bw = None
23
- out_cs = out_MD = False
24
+ out_cs = out_ds = out_MD = False
24
25
  for opt, arg in opts:
25
26
  if opt == '-x': preset = arg
26
27
  elif opt == '-n': min_cnt = int(arg)
@@ -29,12 +30,13 @@ def main(argv):
29
30
  elif opt == '-k': k = int(arg)
30
31
  elif opt == '-w': w = int(arg)
31
32
  elif opt == '-c': out_cs = True
33
+ elif opt == '-d': out_ds = True
32
34
  elif opt == '-M': out_MD = True
33
35
 
34
36
  a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw)
35
37
  if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0]))
36
38
  for name, seq, qual in mp.fastx_read(args[1]): # read one sequence
37
- for h in a.map(seq, cs=out_cs, MD=out_MD): # traverse hits
39
+ for h in a.map(seq, cs=out_cs, ds=out_ds, MD=out_MD): # traverse hits
38
40
  print('{}\t{}\t{}'.format(name, len(seq), h))
39
41
 
40
42
  if __name__ == "__main__":
@@ -23,7 +23,7 @@ def readme():
23
23
 
24
24
  setup(
25
25
  name = 'mappy',
26
- version = '2.30',
26
+ version = '2.31',
27
27
  url = 'https://github.com/lh3/minimap2',
28
28
  description = 'Minimap2 python binding',
29
29
  long_description = readme(),
@@ -160,6 +160,7 @@ module Minimap2
160
160
  # @param seq2 [String]
161
161
  # @param buf [FFI::TBuf]
162
162
  # @param cs [true, false]
163
+ # @param ds [true, false]
163
164
  # @param md [true, false]
164
165
  # @param max_frag_len [Integer]
165
166
  # @param extra_flags [Integer]
@@ -174,6 +175,7 @@ module Minimap2
174
175
  name: nil,
175
176
  buf: nil,
176
177
  cs: false,
178
+ ds: false,
177
179
  md: false,
178
180
  max_frag_len: nil,
179
181
  extra_flags: nil
@@ -223,7 +225,7 @@ module Minimap2
223
225
 
224
226
  cs_buf_ptr = nil
225
227
  m_cs_ptr = nil
226
- if cs || md
228
+ if cs || ds || md
227
229
  cs_buf_ptr = ::FFI::MemoryPointer.new(:pointer)
228
230
  cs_buf_ptr.write_pointer(::FFI::Pointer::NULL)
229
231
  m_cs_ptr = ::FFI::MemoryPointer.new(:int)
@@ -239,8 +241,9 @@ module Minimap2
239
241
  cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
240
242
 
241
243
  _cs = ""
244
+ _ds = ""
242
245
  _md = ""
243
- if cs or md
246
+ if cs || ds || md
244
247
  cur_seq = hit[:seg_id] > 0 && seq2 ? seq2 : seq
245
248
 
246
249
  if cs
@@ -249,6 +252,12 @@ module Minimap2
249
252
  _cs = cs_ptr.null? || l_cs_str <= 0 ? "" : cs_ptr.read_string(l_cs_str)
250
253
  end
251
254
 
255
+ if ds
256
+ l_cs_str = FFI.mm_gen_ds(km, cs_buf_ptr, m_cs_ptr, idx_part, regs[i], cur_seq, 1)
257
+ cs_ptr = cs_buf_ptr.read_pointer
258
+ _ds = cs_ptr.null? || l_cs_str <= 0 ? "" : cs_ptr.read_string(l_cs_str)
259
+ end
260
+
252
261
  if md
253
262
  l_cs_str = FFI.mm_gen_md(km, cs_buf_ptr, m_cs_ptr, idx_part, regs[i], cur_seq)
254
263
  cs_ptr = cs_buf_ptr.read_pointer
@@ -256,7 +265,7 @@ module Minimap2
256
265
  end
257
266
  end
258
267
 
259
- alignments << Alignment.new(hit, cigar, _cs, _md)
268
+ alignments << Alignment.new(hit, cigar, _cs, _ds, _md)
260
269
 
261
270
  FFI.mm_free_reg1(regs[i])
262
271
  i += 1
@@ -40,6 +40,8 @@ module Minimap2
40
40
  # 1 for the first read and 2 for the second read.
41
41
  # @!attribute cs
42
42
  # @return [String] the cs tag.
43
+ # @!attribute ds
44
+ # @return [String] the ds tag.
43
45
  # @!attribute md
44
46
  # @return [String] the MD tag as in the SAM format.
45
47
  # It is an empty string unless the md argument is applied when calling Aligner#align.
@@ -49,12 +51,12 @@ module Minimap2
49
51
  class Alignment
50
52
  def self.keys
51
53
  %i[ctg ctg_len r_st r_en strand trans_strand blen mlen nm primary
52
- q_st q_en mapq cigar read_num cs md cigar_str]
54
+ q_st q_en mapq cigar read_num cs ds md cigar_str]
53
55
  end
54
56
 
55
57
  attr_reader(*keys)
56
58
 
57
- def initialize(h, cigar, cs = nil, md = nil)
59
+ def initialize(h, cigar, cs = nil, ds = nil, md = nil)
58
60
  @ctg = h[:ctg]
59
61
  @ctg_len = h[:ctg_len]
60
62
  @r_st = h[:ctg_start]
@@ -71,6 +73,7 @@ module Minimap2
71
73
  @cigar = cigar
72
74
  @read_num = h[:seg_id] + 1
73
75
  @cs = cs
76
+ @ds = ds
74
77
  @md = md
75
78
 
76
79
  @cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
@@ -106,8 +109,9 @@ module Minimap2
106
109
  end
107
110
  a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
108
111
  @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
109
- a << "cs:Z:#{@cs}" if @cs
110
- a << "MD:Z:#{@md}" if @md
112
+ a << "cs:Z:#{@cs}" unless @cs.nil? || @cs.empty?
113
+ a << "ds:Z:#{@ds}" unless @ds.nil? || @ds.empty?
114
+ a << "MD:Z:#{@md}" unless @md.nil? || @md.empty?
111
115
  a.join("\t")
112
116
  end
113
117
  end
@@ -151,6 +151,12 @@ module Minimap2
151
151
  [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
152
152
  :int
153
153
 
154
+ # int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
155
+ attach_function \
156
+ :mm_gen_ds,
157
+ [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
158
+ :int
159
+
154
160
  # int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
155
161
  attach_function \
156
162
  :mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- VERSION = "0.2.30.3"
4
+ VERSION = "0.2.31.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: minimap2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.30.3
4
+ version: 0.2.31.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
@@ -167,7 +167,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
167
167
  - !ruby/object:Gem::Version
168
168
  version: '0'
169
169
  requirements: []
170
- rubygems_version: 4.0.3
170
+ rubygems_version: 4.0.10
171
171
  specification_version: 4
172
172
  summary: minimap2
173
173
  test_files: []