minimap2 0.2.21 → 0.2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -76
  3. data/ext/Rakefile +53 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +821 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +369 -0
  41. data/ext/minimap2/main.c +459 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +410 -0
  44. data/ext/minimap2/minimap2.1 +725 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +132 -0
  50. data/ext/minimap2/options.c +234 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/lib/minimap2/aligner.rb +4 -4
  93. data/lib/minimap2/alignment.rb +11 -11
  94. data/lib/minimap2/ffi/constants.rb +75 -56
  95. data/lib/minimap2/ffi/functions.rb +5 -0
  96. data/lib/minimap2/ffi.rb +4 -5
  97. data/lib/minimap2/version.rb +2 -2
  98. data/lib/minimap2.rb +80 -28
  99. metadata +97 -65
  100. data/lib/minimap2/ffi_helper.rb +0 -53
  101. data/vendor/libminimap2.so +0 -0
@@ -0,0 +1,38 @@
1
+ Q 60 23616 0 0.000000000
2
+ Q 45 3520 1 0.000036851
3
+ Q 41 1840 1 0.000069023
4
+ Q 37 328 2 0.000136500
5
+ Q 36 276 1 0.000169033
6
+ Q 35 480 1 0.000199601
7
+ Q 33 375 2 0.000262855
8
+ Q 31 178 2 0.000326659
9
+ Q 30 153 5 0.000487551
10
+ Q 29 200 1 0.000516696
11
+ Q 27 100 3 0.000611601
12
+ Q 26 93 3 0.000706056
13
+ Q 25 75 2 0.000768393
14
+ Q 24 82 1 0.000798314
15
+ Q 23 80 6 0.000987387
16
+ Q 22 71 6 0.001175835
17
+ Q 21 76 7 0.001394921
18
+ Q 20 63 9 0.001676897
19
+ Q 19 55 4 0.001800322
20
+ Q 18 62 8 0.002048987
21
+ Q 17 55 7 0.002265718
22
+ Q 16 60 10 0.002575539
23
+ Q 15 82 9 0.002850877
24
+ Q 14 67 7 0.003063745
25
+ Q 13 62 11 0.003401042
26
+ Q 12 64 13 0.003799084
27
+ Q 11 56 5 0.003947900
28
+ Q 10 58 17 0.004468303
29
+ Q 9 70 22 0.005139796
30
+ Q 8 23 9 0.005414604
31
+ Q 7 41 17 0.005933068
32
+ Q 6 42 18 0.006480881
33
+ Q 5 33 9 0.006751757
34
+ Q 4 29 9 0.007022948
35
+ Q 3 27 15 0.007478764
36
+ Q 2 23 10 0.007781024
37
+ Q 1 9 2 0.007840364
38
+ Q 0 13 8 0.008083105
@@ -0,0 +1,60 @@
1
+ set t po eps enh co so "Helvetica,26"
2
+
3
+ set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
4
+ set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
5
+ set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
6
+ set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
7
+ set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
8
+ set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
9
+
10
+ set out "roc-color.eps"
11
+
12
+ set pointsize 2.0
13
+ set size 1.59,1.04
14
+ set multiplot layout 1,2
15
+
16
+ set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
17
+ set xlab "Error rate of mapped PacBio reads"
18
+ set ylab "Fraction of mapped reads" off +1.8
19
+ set ytics 0.02
20
+ set yran [0.9:1]
21
+
22
+ set size 0.8,1
23
+ set log x
24
+ set format x "10^{%L}"
25
+ set key bot right
26
+ plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
27
+ "<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
28
+ "<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
29
+ "<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
30
+ "<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
31
+ "<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
32
+ unset label
33
+
34
+ set origin 0.8,0
35
+ set size 0.79,1
36
+ set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
37
+ set xlab "Error rate of mapped short reads"
38
+
39
+ set key top left
40
+ plot "<./eval2roc.pl -n2e7 bowtie2-s3.sam.eval" u 2:3 t "bowtie2" w lp ls 5, \
41
+ "<./eval2roc.pl -n2e7 bwa-s3.sam.eval" u 2:3 t "bwa-mem" w lp ls 2, \
42
+ "<./eval2roc.pl -n2e7 mm2-s3.sam.eval" u 2:3 t "minimap2" w lp ls 6, \
43
+ "<./eval2roc.pl -n2e7 snap-s3.sam.eval" u 2:3 t "snap" w lp ls 3
44
+
45
+ #unset log
46
+ #unset format
47
+ #unset key
48
+ #set log y
49
+ #set ylab "Accumulative mapping error rate" off +0
50
+ #set xlab "Mapping quality"
51
+ #set yran [1e-5:0.1]
52
+ #set ytics 1e-5,0.1
53
+ #set format y "10^{%L}"
54
+ #set xran [60:0] reverse
55
+ #plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
56
+ # "<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
57
+ # "<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
58
+ # "<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
59
+ # "<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
60
+ # "<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5
@@ -0,0 +1,62 @@
1
+ Q 60 18993268 10320 0.000543350 18993268
2
+ Q 59 33156 216 0.000553756 19026424
3
+ Q 58 29982 295 0.000568365 19056406
4
+ Q 57 9412 278 0.000582666 19065818
5
+ Q 56 11012 228 0.000594281 19076830
6
+ Q 55 9968 235 0.000606283 19086798
7
+ Q 54 8602 292 0.000621301 19095400
8
+ Q 53 6094 259 0.000634662 19101494
9
+ Q 52 5026 257 0.000647946 19106520
10
+ Q 51 4278 224 0.000659522 19110798
11
+ Q 50 3682 178 0.000668708 19114480
12
+ Q 49 2750 156 0.000676772 19117230
13
+ Q 48 2314 112 0.000682548 19119544
14
+ Q 47 2056 96 0.000687495 19121600
15
+ Q 46 1658 62 0.000690677 19123258
16
+ Q 45 1492 74 0.000694493 19124750
17
+ Q 44 1150 56 0.000697379 19125900
18
+ Q 43 1062 48 0.000699850 19126962
19
+ Q 42 976 60 0.000702951 19127938
20
+ Q 41 884 36 0.000704800 19128822
21
+ Q 40 708 52 0.000707493 19129530
22
+ Q 39 870 26 0.000708819 19130400
23
+ Q 38 598 26 0.000710156 19130998
24
+ Q 37 542 34 0.000711913 19131540
25
+ Q 36 846 50 0.000714495 19132386
26
+ Q 35 590 50 0.000717087 19132976
27
+ Q 34 550 42 0.000719261 19133526
28
+ Q 33 2174 66 0.000722628 19135700
29
+ Q 32 876 86 0.000727089 19136576
30
+ Q 31 638 104 0.000732500 19137214
31
+ Q 30 1718 196 0.000742675 19138932
32
+ Q 29 91022 968 0.000789497 19229954
33
+ Q 28 12864 781 0.000829556 19242818
34
+ Q 27 5806 427 0.000851489 19248624
35
+ Q 26 25274 728 0.000888144 19273898
36
+ Q 25 7418 680 0.000923070 19281316
37
+ Q 24 11800 701 0.000958839 19293116
38
+ Q 23 57328 3933 0.001159250 19350444
39
+ Q 22 7662 846 0.001202494 19358106
40
+ Q 21 5924 617 0.001233989 19364030
41
+ Q 20 4623 574 0.001263330 19368653
42
+ Q 19 4988 942 0.001311627 19373641
43
+ Q 18 3968 793 0.001352282 19377609
44
+ Q 17 3630 681 0.001387166 19381239
45
+ Q 16 2921 513 0.001413422 19384160
46
+ Q 15 2716 424 0.001435095 19386876
47
+ Q 14 2366 365 0.001453744 19389242
48
+ Q 13 2169 412 0.001474828 19391411
49
+ Q 12 2077 360 0.001493233 19393488
50
+ Q 11 2016 441 0.001515815 19395504
51
+ Q 10 2292 738 0.001553682 19397796
52
+ Q 9 4165 1832 0.001647772 19401961
53
+ Q 8 3963 1862 0.001743385 19405924
54
+ Q 7 3927 1793 0.001835408 19409851
55
+ Q 6 3572 1639 0.001919497 19413423
56
+ Q 5 3270 1533 0.001998126 19416693
57
+ Q 4 3046 1610 0.002080718 19419739
58
+ Q 3 251447 125550 0.008436553 19671186
59
+ Q 2 24390 13537 0.009113417 19695576
60
+ Q 1 124406 86780 0.013434624 19819982
61
+ Q 0 171254 153874 0.021016609 19991236
62
+ U 8764
@@ -0,0 +1,19 @@
1
+ --- Makefile.org 2021-05-27 15:45:11.993128205 +0900
2
+ +++ Makefile 2021-05-27 15:46:02.320569154 +0900
3
+ @@ -1,9 +1,9 @@
4
+ -CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
5
+ +CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
6
+ CPPFLAGS= -DHAVE_KALLOC
7
+ INCLUDES=
8
+ OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
9
+ lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
10
+ - ksw2_ll_sse.o
11
+ + ksw2_ll_sse.o cmappy.o
12
+ PROG= minimap2
13
+ PROG_EXTRA= sdust minimap2-lite
14
+ LIBS= -lm -lz -lpthread
15
+ @@ -130,3 +130,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
16
+ seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
17
+ sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
18
+ splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
19
+ +cmappy.o: cmappy.h
@@ -90,7 +90,7 @@ module Minimap2
90
90
  end
91
91
 
92
92
  if fn_idx_in
93
- warn 'Since fn_idx_in is specified, the seq argument will be ignored.' if seq
93
+ warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
94
94
  reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
95
95
 
96
96
  # The Ruby version raises an error here
@@ -169,13 +169,13 @@ module Minimap2
169
169
  c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
170
170
  cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
171
171
 
172
- _cs = ''
172
+ _cs = ""
173
173
  if cs
174
174
  l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
175
175
  _cs = cs_str.read_pointer.read_string(l_cs_str)
176
176
  end
177
177
 
178
- _md = ''
178
+ _md = ""
179
179
  if md
180
180
  l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
181
181
  _md = cs_str.read_pointer.read_string(l_cs_str)
@@ -204,7 +204,7 @@ module Minimap2
204
204
  lp = ::FFI::MemoryPointer.new(:int)
205
205
  s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
206
206
  l = lp.read_int
207
- return nil if l.zero?
207
+ return nil if l == 0
208
208
 
209
209
  s.read_string(l)
210
210
  end
@@ -89,20 +89,20 @@ module Minimap2
89
89
  # Convert to the PAF format without the QueryName and QueryLength columns.
90
90
 
91
91
  def to_s
92
- strand = if @strand.positive?
93
- '+'
94
- elsif @strand.negative?
95
- '-'
92
+ strand = if @strand > 0
93
+ "+"
94
+ elsif @strand < 0
95
+ "-"
96
96
  else
97
- '?'
97
+ "?"
98
98
  end
99
- tp = @primary != 0 ? 'tp:A:P' : 'tp:A:S'
100
- ts = if @trans_strand.positive?
101
- 'ts:A:+'
102
- elsif @trans_strand.negative?
103
- 'ts:A:-'
99
+ tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
100
+ ts = if @trans_strand > 0
101
+ "ts:A:+"
102
+ elsif @trans_strand < 0
103
+ "ts:A:-"
104
104
  else
105
- 'ts:A:.'
105
+ "ts:A:."
106
106
  end
107
107
  a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
108
108
  @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
@@ -3,48 +3,61 @@
3
3
  module Minimap2
4
4
  module FFI
5
5
  # flags
6
- NO_DIAG = 0x001 # no exact diagonal hit
7
- NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8
- CIGAR = 0x004
9
- OUT_SAM = 0x008
10
- NO_QUAL = 0x010
11
- OUT_CG = 0x020
12
- OUT_CS = 0x040
13
- SPLICE = 0x080 # splice mode
14
- SPLICE_FOR = 0x100 # match GT-AG
15
- SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16
- NO_LJOIN = 0x400
17
- OUT_CS_LONG = 0x800
18
- SR = 0x1000
19
- FRAG_MODE = 0x2000
20
- NO_PRINT_2ND = 0x4000
21
- TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22
- LONG_CIGAR = 0x10000
23
- INDEPEND_SEG = 0x20000
24
- SPLICE_FLANK = 0x40000
25
- SOFTCLIP = 0x80000
26
- FOR_ONLY = 0x100000
27
- REV_ONLY = 0x200000
28
- HEAP_SORT = 0x400000
29
- ALL_CHAINS = 0x800000
30
- OUT_MD = 0x1000000
31
- COPY_COMMENT = 0x2000000
32
- EQX = 0x4000000 # use =/X instead of M
33
- PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34
- NO_END_FLT = 0x10000000
35
- HARD_MLEVEL = 0x20000000
36
- SAM_HIT_ONLY = 0x40000000
37
- RMQ = 0x80000000 # LL
38
-
39
- HPC = 0x1
40
- NO_SEQ = 0x2
41
- NO_NAME = 0x4
42
-
43
- IDX_MAGIC = "MMI\2"
44
-
45
- MAX_SEG = 255
46
-
47
- CIGAR_STR = 'MIDNSHP=XB'
6
+ NO_DIAG = 0x001 # no exact diagonal hit
7
+ NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8
+ CIGAR = 0x004
9
+ OUT_SAM = 0x008
10
+ NO_QUAL = 0x010
11
+ OUT_CG = 0x020
12
+ OUT_CS = 0x040
13
+ SPLICE = 0x080 # splice mode
14
+ SPLICE_FOR = 0x100 # match GT-AG
15
+ SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16
+ NO_LJOIN = 0x400
17
+ OUT_CS_LONG = 0x800
18
+ SR = 0x1000
19
+ FRAG_MODE = 0x2000
20
+ NO_PRINT_2ND = 0x4000
21
+ TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22
+ LONG_CIGAR = 0x10000
23
+ INDEPEND_SEG = 0x20000
24
+ SPLICE_FLANK = 0x40000
25
+ SOFTCLIP = 0x80000
26
+ FOR_ONLY = 0x100000
27
+ REV_ONLY = 0x200000
28
+ HEAP_SORT = 0x400000
29
+ ALL_CHAINS = 0x800000
30
+ OUT_MD = 0x1000000
31
+ COPY_COMMENT = 0x2000000
32
+ EQX = 0x4000000 # use =/X instead of M
33
+ PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34
+ NO_END_FLT = 0x10000000
35
+ HARD_MLEVEL = 0x20000000
36
+ SAM_HIT_ONLY = 0x40000000
37
+ RMQ = 0x80000000 # LL
38
+ QSTRAND = 0x100000000 # LL
39
+ NO_INV = 0x200000000 # LL
40
+ NO_HASH_NAME = 0x400000000 # LL
41
+
42
+ HPC = 0x1
43
+ NO_SEQ = 0x2
44
+ NO_NAME = 0x4
45
+
46
+ IDX_MAGIC = "MMI\2"
47
+
48
+ MAX_SEG = 255
49
+
50
+ CIGAR_MATCH = 0
51
+ CIGAR_INS = 1
52
+ CIGAR_DEL = 2
53
+ CIGAR_N_SKIP = 3
54
+ CIGAR_SOFTCLIP = 4
55
+ CIGAR_HARDCLIP = 5
56
+ CIGAR_PADDING = 6
57
+ CIGAR_EQ_MATCH = 7
58
+ CIGAR_X_MISMATCH = 8
59
+
60
+ CIGAR_STR = "MIDNSHP=XB"
48
61
 
49
62
  # emulate 128-bit integers
50
63
  class MM128 < ::FFI::Struct
@@ -89,6 +102,7 @@ module Minimap2
89
102
  :min_cnt, :int, # min number of minimizers on each chain
90
103
  :min_chain_score, :int, # min chaining score
91
104
  :chain_gap_scale, :float,
105
+ :chain_skip_scale, :float,
92
106
  :rmq_size_cap, :int,
93
107
  :rmq_inner_dist, :int,
94
108
  :rmq_rescue_size, :int,
@@ -115,14 +129,18 @@ module Minimap2
115
129
  :anchor_ext_len, :int,
116
130
  :anchor_ext_shift, :int,
117
131
  :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
132
+ :rank_min_len, :int,
133
+ :rank_frac, :float,
118
134
  :pe_ori, :int,
119
135
  :pe_bonus, :int,
120
136
  :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
137
+ :q_occ_frac, :float,
121
138
  :min_mid_occ, :int32_t,
122
139
  :mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
123
140
  :max_occ, :int32_t,
124
141
  :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
125
142
  :max_sw_mat, :int64_t,
143
+ :cap_kalloc, :int64_t,
126
144
  :split_prefix, :string
127
145
  end
128
146
 
@@ -173,7 +191,7 @@ module Minimap2
173
191
  :n_ambi_trans_strand, :uint32,
174
192
  :n_cigar, :uint32
175
193
 
176
- bitfields :n_ambi_trans_strand,
194
+ bit_field :n_ambi_trans_strand,
177
195
  :n_ambi, 30, # number of ambiguous bases
178
196
  :trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
179
197
 
@@ -205,19 +223,20 @@ module Minimap2
205
223
  :div, :float,
206
224
  :p, Extra.ptr
207
225
 
208
- bitfields :fields,
209
- :mapq, 8,
210
- :split, 2,
211
- :rev, 1,
212
- :inv, 1,
213
- :sam_pri, 1,
214
- :proper_frag, 1,
215
- :pe_thru, 1,
216
- :seg_split, 1,
217
- :seg_id, 8,
218
- :split_inv, 1,
219
- :is_alt, 1,
220
- :dummy, 6
226
+ bit_field :fields,
227
+ :mapq, 8,
228
+ :split, 2,
229
+ :rev, 1,
230
+ :inv, 1,
231
+ :sam_pri, 1,
232
+ :proper_frag, 1,
233
+ :pe_thru, 1,
234
+ :seg_split, 1,
235
+ :seg_id, 8,
236
+ :split_inv, 1,
237
+ :is_alt, 1,
238
+ :strand_retained, 1,
239
+ :dummy, 5
221
240
  end
222
241
 
223
242
  # memory buffer for thread-local storage during mapping
@@ -2,6 +2,11 @@
2
2
 
3
3
  module Minimap2
4
4
  module FFI
5
+ attach_function \
6
+ :main,
7
+ %i[int pointer],
8
+ :int
9
+
5
10
  attach_function \
6
11
  :mm_set_opt_raw, :mm_set_opt,
7
12
  [:pointer, IdxOpt.by_ref, MapOpt.by_ref],
data/lib/minimap2/ffi.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # bit fields
4
- require_relative 'ffi_helper'
5
-
4
+ require "ffi/bit_struct"
6
5
  module Minimap2
7
6
  # Native APIs
8
7
  module FFI
@@ -22,6 +21,6 @@ module Minimap2
22
21
  end
23
22
  end
24
23
 
25
- require_relative 'ffi/constants'
26
- require_relative 'ffi/functions'
27
- require_relative 'ffi/mappy'
24
+ require_relative "ffi/constants"
25
+ require_relative "ffi/functions"
26
+ require_relative "ffi/mappy"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- # Minimap2-2.21 (r1071).
5
- VERSION = '0.2.21'
4
+ # Minimap2-2.23 (r1111)
5
+ VERSION = "0.2.24.0"
6
6
  end
data/lib/minimap2.rb CHANGED
@@ -1,15 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # dependencies
4
- require 'ffi'
5
-
6
- # bit fields
7
- require_relative 'minimap2/ffi_helper'
4
+ require "ffi"
8
5
 
9
6
  # modules
10
- require_relative 'minimap2/aligner'
11
- require_relative 'minimap2/alignment'
12
- require_relative 'minimap2/version'
7
+ require_relative "minimap2/aligner"
8
+ require_relative "minimap2/alignment"
9
+ require_relative "minimap2/version"
13
10
 
14
11
  # Minimap2 mapper for long read sequences
15
12
  # https://github.com/lh3/minimap2
@@ -22,39 +19,82 @@ module Minimap2
22
19
  attr_accessor :ffi_lib
23
20
  end
24
21
 
25
- lib_name = ::FFI.map_library_name('minimap2')
26
- self.ffi_lib = if ENV['MINIMAPDIR']
27
- File.expand_path(lib_name, ENV['MINIMAPDIR'])
22
+ lib_name = ::FFI.map_library_name("minimap2")
23
+ self.ffi_lib = if ENV["MINIMAPDIR"]
24
+ File.expand_path(lib_name, ENV["MINIMAPDIR"])
28
25
  else
29
26
  File.expand_path("../vendor/#{lib_name}", __dir__)
30
27
  end
31
28
 
32
29
  # friendlier error message
33
- autoload :FFI, 'minimap2/ffi'
30
+ autoload :FFI, "minimap2/ffi"
34
31
 
35
32
  # methods from mappy
36
33
  class << self
34
+ # Execute minimap2 comannd with given options.
35
+ # @overload execute(arg0,arg1,...)
36
+ # @param [String] arg minimap2 command option.
37
+ # @example Get minimap2 version
38
+ # Minimap2.execute('--version')
39
+
40
+ def Minimap2.execute(*rb_argv)
41
+ str_ptrs = []
42
+ # First argument is the program name.
43
+ str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
44
+ rb_argv.each do |arg|
45
+ arg.to_s.split(/\s+/).each do |s|
46
+ str_ptrs << ::FFI::MemoryPointer.from_string(s)
47
+ end
48
+ end
49
+ str_ptrs << nil
50
+
51
+ # Load all the pointers into a native memory block
52
+ argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
53
+ str_ptrs.each_with_index do |p, i|
54
+ argv[i].put_pointer(0, p)
55
+ end
56
+
57
+ FFI.main(str_ptrs.length - 1, argv)
58
+ end
59
+
60
+ # Get verbosity level.
61
+ # @return [Integer] verbosity level.
62
+
63
+ def verbose
64
+ FFI.mm_verbose_level(-1)
65
+ end
66
+
67
+ # Set verbosity level.
68
+ # @param [Integer] verbosity level
69
+ # @return [Integer] verbosity level.
70
+
71
+ def verbose=(level)
72
+ FFI.mm_verbose_level(level)
73
+ end
74
+
37
75
  # Read fasta/fastq file.
38
76
  # @param [String] file_path
39
- # @param [Boolean] read_comment If false or nil, the comment will not be read.
77
+ # @param [Boolean] comment If True, the comment will be read.
40
78
  # @yield [name, seq, qual, comment]
41
- # Note: You can also use a generic library such as BioRuby instead of this method.
79
+ # @return [Enumerator] enum Retrun Enumerator if not block given.
80
+ # Note: You can BioRuby instead of this method.
42
81
 
43
- def fastx_read(file_path, read_comment = false)
82
+ def fastx_read(file_path, comment: false, &block)
44
83
  path = File.expand_path(file_path)
84
+
85
+ # raise error in Ruby because ks.null? is false even if file not exist.
86
+ raise ArgumentError, "File not found: #{path}" unless File.exist?(path)
87
+
45
88
  ks = FFI.mm_fastx_open(path)
46
- while FFI.kseq_read(ks) >= 0
47
- qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
48
- name = ks[:name][:s]
49
- seq = ks[:seq][:s]
50
- if read_comment
51
- comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
52
- yield [name, seq, qual, comment]
53
- else
54
- yield [name, seq, qual]
89
+
90
+ if block_given?
91
+ fastx_each(ks, comment, &block)
92
+ else
93
+ Enumerator.new do |y|
94
+ # rewind not work
95
+ fastx_each(ks, comment) { |r| y << r }
55
96
  end
56
97
  end
57
- FFI.mm_fastx_close(ks)
58
98
  end
59
99
 
60
100
  # Reverse complement sequence.
@@ -68,11 +108,23 @@ module Minimap2
68
108
  FFI.mappy_revcomp(l, bseq)
69
109
  end
70
110
 
71
- # Set verbosity level.
72
- # @param [Integer] level
111
+ private
73
112
 
74
- def verbose(level = -1)
75
- FFI.mm_verbose_level(level)
113
+ def fastx_each(ks, comment)
114
+ yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
115
+ FFI.mm_fastx_close(ks)
116
+ end
117
+
118
+ def fastx_next(ks, read_comment)
119
+ qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
120
+ name = ks[:name][:s]
121
+ seq = ks[:seq][:s]
122
+ if read_comment
123
+ comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
124
+ [name, seq, qual, comment]
125
+ else
126
+ [name, seq, qual]
127
+ end
76
128
  end
77
129
  end
78
130
  end