minimap2 0.2.21 → 0.2.24.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -76
  3. data/ext/Rakefile +53 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +821 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +369 -0
  41. data/ext/minimap2/main.c +459 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +410 -0
  44. data/ext/minimap2/minimap2.1 +725 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +132 -0
  50. data/ext/minimap2/options.c +234 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/lib/minimap2/aligner.rb +4 -4
  93. data/lib/minimap2/alignment.rb +11 -11
  94. data/lib/minimap2/ffi/constants.rb +75 -56
  95. data/lib/minimap2/ffi/functions.rb +5 -0
  96. data/lib/minimap2/ffi.rb +4 -5
  97. data/lib/minimap2/version.rb +2 -2
  98. data/lib/minimap2.rb +80 -28
  99. metadata +97 -65
  100. data/lib/minimap2/ffi_helper.rb +0 -53
  101. data/vendor/libminimap2.so +0 -0
@@ -0,0 +1,38 @@
1
+ Q 60 23616 0 0.000000000
2
+ Q 45 3520 1 0.000036851
3
+ Q 41 1840 1 0.000069023
4
+ Q 37 328 2 0.000136500
5
+ Q 36 276 1 0.000169033
6
+ Q 35 480 1 0.000199601
7
+ Q 33 375 2 0.000262855
8
+ Q 31 178 2 0.000326659
9
+ Q 30 153 5 0.000487551
10
+ Q 29 200 1 0.000516696
11
+ Q 27 100 3 0.000611601
12
+ Q 26 93 3 0.000706056
13
+ Q 25 75 2 0.000768393
14
+ Q 24 82 1 0.000798314
15
+ Q 23 80 6 0.000987387
16
+ Q 22 71 6 0.001175835
17
+ Q 21 76 7 0.001394921
18
+ Q 20 63 9 0.001676897
19
+ Q 19 55 4 0.001800322
20
+ Q 18 62 8 0.002048987
21
+ Q 17 55 7 0.002265718
22
+ Q 16 60 10 0.002575539
23
+ Q 15 82 9 0.002850877
24
+ Q 14 67 7 0.003063745
25
+ Q 13 62 11 0.003401042
26
+ Q 12 64 13 0.003799084
27
+ Q 11 56 5 0.003947900
28
+ Q 10 58 17 0.004468303
29
+ Q 9 70 22 0.005139796
30
+ Q 8 23 9 0.005414604
31
+ Q 7 41 17 0.005933068
32
+ Q 6 42 18 0.006480881
33
+ Q 5 33 9 0.006751757
34
+ Q 4 29 9 0.007022948
35
+ Q 3 27 15 0.007478764
36
+ Q 2 23 10 0.007781024
37
+ Q 1 9 2 0.007840364
38
+ Q 0 13 8 0.008083105
@@ -0,0 +1,60 @@
1
+ set t po eps enh co so "Helvetica,26"
2
+
3
+ set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
4
+ set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
5
+ set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
6
+ set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
7
+ set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
8
+ set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
9
+
10
+ set out "roc-color.eps"
11
+
12
+ set pointsize 2.0
13
+ set size 1.59,1.04
14
+ set multiplot layout 1,2
15
+
16
+ set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
17
+ set xlab "Error rate of mapped PacBio reads"
18
+ set ylab "Fraction of mapped reads" off +1.8
19
+ set ytics 0.02
20
+ set yran [0.9:1]
21
+
22
+ set size 0.8,1
23
+ set log x
24
+ set format x "10^{%L}"
25
+ set key bot right
26
+ plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
27
+ "<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
28
+ "<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
29
+ "<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
30
+ "<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
31
+ "<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
32
+ unset label
33
+
34
+ set origin 0.8,0
35
+ set size 0.79,1
36
+ set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
37
+ set xlab "Error rate of mapped short reads"
38
+
39
+ set key top left
40
+ plot "<./eval2roc.pl -n2e7 bowtie2-s3.sam.eval" u 2:3 t "bowtie2" w lp ls 5, \
41
+ "<./eval2roc.pl -n2e7 bwa-s3.sam.eval" u 2:3 t "bwa-mem" w lp ls 2, \
42
+ "<./eval2roc.pl -n2e7 mm2-s3.sam.eval" u 2:3 t "minimap2" w lp ls 6, \
43
+ "<./eval2roc.pl -n2e7 snap-s3.sam.eval" u 2:3 t "snap" w lp ls 3
44
+
45
+ #unset log
46
+ #unset format
47
+ #unset key
48
+ #set log y
49
+ #set ylab "Accumulative mapping error rate" off +0
50
+ #set xlab "Mapping quality"
51
+ #set yran [1e-5:0.1]
52
+ #set ytics 1e-5,0.1
53
+ #set format y "10^{%L}"
54
+ #set xran [60:0] reverse
55
+ #plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
56
+ # "<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
57
+ # "<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
58
+ # "<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
59
+ # "<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
60
+ # "<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5
@@ -0,0 +1,62 @@
1
+ Q 60 18993268 10320 0.000543350 18993268
2
+ Q 59 33156 216 0.000553756 19026424
3
+ Q 58 29982 295 0.000568365 19056406
4
+ Q 57 9412 278 0.000582666 19065818
5
+ Q 56 11012 228 0.000594281 19076830
6
+ Q 55 9968 235 0.000606283 19086798
7
+ Q 54 8602 292 0.000621301 19095400
8
+ Q 53 6094 259 0.000634662 19101494
9
+ Q 52 5026 257 0.000647946 19106520
10
+ Q 51 4278 224 0.000659522 19110798
11
+ Q 50 3682 178 0.000668708 19114480
12
+ Q 49 2750 156 0.000676772 19117230
13
+ Q 48 2314 112 0.000682548 19119544
14
+ Q 47 2056 96 0.000687495 19121600
15
+ Q 46 1658 62 0.000690677 19123258
16
+ Q 45 1492 74 0.000694493 19124750
17
+ Q 44 1150 56 0.000697379 19125900
18
+ Q 43 1062 48 0.000699850 19126962
19
+ Q 42 976 60 0.000702951 19127938
20
+ Q 41 884 36 0.000704800 19128822
21
+ Q 40 708 52 0.000707493 19129530
22
+ Q 39 870 26 0.000708819 19130400
23
+ Q 38 598 26 0.000710156 19130998
24
+ Q 37 542 34 0.000711913 19131540
25
+ Q 36 846 50 0.000714495 19132386
26
+ Q 35 590 50 0.000717087 19132976
27
+ Q 34 550 42 0.000719261 19133526
28
+ Q 33 2174 66 0.000722628 19135700
29
+ Q 32 876 86 0.000727089 19136576
30
+ Q 31 638 104 0.000732500 19137214
31
+ Q 30 1718 196 0.000742675 19138932
32
+ Q 29 91022 968 0.000789497 19229954
33
+ Q 28 12864 781 0.000829556 19242818
34
+ Q 27 5806 427 0.000851489 19248624
35
+ Q 26 25274 728 0.000888144 19273898
36
+ Q 25 7418 680 0.000923070 19281316
37
+ Q 24 11800 701 0.000958839 19293116
38
+ Q 23 57328 3933 0.001159250 19350444
39
+ Q 22 7662 846 0.001202494 19358106
40
+ Q 21 5924 617 0.001233989 19364030
41
+ Q 20 4623 574 0.001263330 19368653
42
+ Q 19 4988 942 0.001311627 19373641
43
+ Q 18 3968 793 0.001352282 19377609
44
+ Q 17 3630 681 0.001387166 19381239
45
+ Q 16 2921 513 0.001413422 19384160
46
+ Q 15 2716 424 0.001435095 19386876
47
+ Q 14 2366 365 0.001453744 19389242
48
+ Q 13 2169 412 0.001474828 19391411
49
+ Q 12 2077 360 0.001493233 19393488
50
+ Q 11 2016 441 0.001515815 19395504
51
+ Q 10 2292 738 0.001553682 19397796
52
+ Q 9 4165 1832 0.001647772 19401961
53
+ Q 8 3963 1862 0.001743385 19405924
54
+ Q 7 3927 1793 0.001835408 19409851
55
+ Q 6 3572 1639 0.001919497 19413423
56
+ Q 5 3270 1533 0.001998126 19416693
57
+ Q 4 3046 1610 0.002080718 19419739
58
+ Q 3 251447 125550 0.008436553 19671186
59
+ Q 2 24390 13537 0.009113417 19695576
60
+ Q 1 124406 86780 0.013434624 19819982
61
+ Q 0 171254 153874 0.021016609 19991236
62
+ U 8764
@@ -0,0 +1,19 @@
1
+ --- Makefile.org 2021-05-27 15:45:11.993128205 +0900
2
+ +++ Makefile 2021-05-27 15:46:02.320569154 +0900
3
+ @@ -1,9 +1,9 @@
4
+ -CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
5
+ +CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
6
+ CPPFLAGS= -DHAVE_KALLOC
7
+ INCLUDES=
8
+ OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
9
+ lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
10
+ - ksw2_ll_sse.o
11
+ + ksw2_ll_sse.o cmappy.o
12
+ PROG= minimap2
13
+ PROG_EXTRA= sdust minimap2-lite
14
+ LIBS= -lm -lz -lpthread
15
+ @@ -130,3 +130,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
16
+ seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
17
+ sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
18
+ splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
19
+ +cmappy.o: cmappy.h
@@ -90,7 +90,7 @@ module Minimap2
90
90
  end
91
91
 
92
92
  if fn_idx_in
93
- warn 'Since fn_idx_in is specified, the seq argument will be ignored.' if seq
93
+ warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
94
94
  reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
95
95
 
96
96
  # The Ruby version raises an error here
@@ -169,13 +169,13 @@ module Minimap2
169
169
  c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
170
170
  cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
171
171
 
172
- _cs = ''
172
+ _cs = ""
173
173
  if cs
174
174
  l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], seq, 1)
175
175
  _cs = cs_str.read_pointer.read_string(l_cs_str)
176
176
  end
177
177
 
178
- _md = ''
178
+ _md = ""
179
179
  if md
180
180
  l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], seq)
181
181
  _md = cs_str.read_pointer.read_string(l_cs_str)
@@ -204,7 +204,7 @@ module Minimap2
204
204
  lp = ::FFI::MemoryPointer.new(:int)
205
205
  s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
206
206
  l = lp.read_int
207
- return nil if l.zero?
207
+ return nil if l == 0
208
208
 
209
209
  s.read_string(l)
210
210
  end
@@ -89,20 +89,20 @@ module Minimap2
89
89
  # Convert to the PAF format without the QueryName and QueryLength columns.
90
90
 
91
91
  def to_s
92
- strand = if @strand.positive?
93
- '+'
94
- elsif @strand.negative?
95
- '-'
92
+ strand = if @strand > 0
93
+ "+"
94
+ elsif @strand < 0
95
+ "-"
96
96
  else
97
- '?'
97
+ "?"
98
98
  end
99
- tp = @primary != 0 ? 'tp:A:P' : 'tp:A:S'
100
- ts = if @trans_strand.positive?
101
- 'ts:A:+'
102
- elsif @trans_strand.negative?
103
- 'ts:A:-'
99
+ tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
100
+ ts = if @trans_strand > 0
101
+ "ts:A:+"
102
+ elsif @trans_strand < 0
103
+ "ts:A:-"
104
104
  else
105
- 'ts:A:.'
105
+ "ts:A:."
106
106
  end
107
107
  a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
108
108
  @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
@@ -3,48 +3,61 @@
3
3
  module Minimap2
4
4
  module FFI
5
5
  # flags
6
- NO_DIAG = 0x001 # no exact diagonal hit
7
- NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8
- CIGAR = 0x004
9
- OUT_SAM = 0x008
10
- NO_QUAL = 0x010
11
- OUT_CG = 0x020
12
- OUT_CS = 0x040
13
- SPLICE = 0x080 # splice mode
14
- SPLICE_FOR = 0x100 # match GT-AG
15
- SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16
- NO_LJOIN = 0x400
17
- OUT_CS_LONG = 0x800
18
- SR = 0x1000
19
- FRAG_MODE = 0x2000
20
- NO_PRINT_2ND = 0x4000
21
- TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22
- LONG_CIGAR = 0x10000
23
- INDEPEND_SEG = 0x20000
24
- SPLICE_FLANK = 0x40000
25
- SOFTCLIP = 0x80000
26
- FOR_ONLY = 0x100000
27
- REV_ONLY = 0x200000
28
- HEAP_SORT = 0x400000
29
- ALL_CHAINS = 0x800000
30
- OUT_MD = 0x1000000
31
- COPY_COMMENT = 0x2000000
32
- EQX = 0x4000000 # use =/X instead of M
33
- PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34
- NO_END_FLT = 0x10000000
35
- HARD_MLEVEL = 0x20000000
36
- SAM_HIT_ONLY = 0x40000000
37
- RMQ = 0x80000000 # LL
38
-
39
- HPC = 0x1
40
- NO_SEQ = 0x2
41
- NO_NAME = 0x4
42
-
43
- IDX_MAGIC = "MMI\2"
44
-
45
- MAX_SEG = 255
46
-
47
- CIGAR_STR = 'MIDNSHP=XB'
6
+ NO_DIAG = 0x001 # no exact diagonal hit
7
+ NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8
+ CIGAR = 0x004
9
+ OUT_SAM = 0x008
10
+ NO_QUAL = 0x010
11
+ OUT_CG = 0x020
12
+ OUT_CS = 0x040
13
+ SPLICE = 0x080 # splice mode
14
+ SPLICE_FOR = 0x100 # match GT-AG
15
+ SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16
+ NO_LJOIN = 0x400
17
+ OUT_CS_LONG = 0x800
18
+ SR = 0x1000
19
+ FRAG_MODE = 0x2000
20
+ NO_PRINT_2ND = 0x4000
21
+ TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22
+ LONG_CIGAR = 0x10000
23
+ INDEPEND_SEG = 0x20000
24
+ SPLICE_FLANK = 0x40000
25
+ SOFTCLIP = 0x80000
26
+ FOR_ONLY = 0x100000
27
+ REV_ONLY = 0x200000
28
+ HEAP_SORT = 0x400000
29
+ ALL_CHAINS = 0x800000
30
+ OUT_MD = 0x1000000
31
+ COPY_COMMENT = 0x2000000
32
+ EQX = 0x4000000 # use =/X instead of M
33
+ PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34
+ NO_END_FLT = 0x10000000
35
+ HARD_MLEVEL = 0x20000000
36
+ SAM_HIT_ONLY = 0x40000000
37
+ RMQ = 0x80000000 # LL
38
+ QSTRAND = 0x100000000 # LL
39
+ NO_INV = 0x200000000 # LL
40
+ NO_HASH_NAME = 0x400000000 # LL
41
+
42
+ HPC = 0x1
43
+ NO_SEQ = 0x2
44
+ NO_NAME = 0x4
45
+
46
+ IDX_MAGIC = "MMI\2"
47
+
48
+ MAX_SEG = 255
49
+
50
+ CIGAR_MATCH = 0
51
+ CIGAR_INS = 1
52
+ CIGAR_DEL = 2
53
+ CIGAR_N_SKIP = 3
54
+ CIGAR_SOFTCLIP = 4
55
+ CIGAR_HARDCLIP = 5
56
+ CIGAR_PADDING = 6
57
+ CIGAR_EQ_MATCH = 7
58
+ CIGAR_X_MISMATCH = 8
59
+
60
+ CIGAR_STR = "MIDNSHP=XB"
48
61
 
49
62
  # emulate 128-bit integers
50
63
  class MM128 < ::FFI::Struct
@@ -89,6 +102,7 @@ module Minimap2
89
102
  :min_cnt, :int, # min number of minimizers on each chain
90
103
  :min_chain_score, :int, # min chaining score
91
104
  :chain_gap_scale, :float,
105
+ :chain_skip_scale, :float,
92
106
  :rmq_size_cap, :int,
93
107
  :rmq_inner_dist, :int,
94
108
  :rmq_rescue_size, :int,
@@ -115,14 +129,18 @@ module Minimap2
115
129
  :anchor_ext_len, :int,
116
130
  :anchor_ext_shift, :int,
117
131
  :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
132
+ :rank_min_len, :int,
133
+ :rank_frac, :float,
118
134
  :pe_ori, :int,
119
135
  :pe_bonus, :int,
120
136
  :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
137
+ :q_occ_frac, :float,
121
138
  :min_mid_occ, :int32_t,
122
139
  :mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
123
140
  :max_occ, :int32_t,
124
141
  :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
125
142
  :max_sw_mat, :int64_t,
143
+ :cap_kalloc, :int64_t,
126
144
  :split_prefix, :string
127
145
  end
128
146
 
@@ -173,7 +191,7 @@ module Minimap2
173
191
  :n_ambi_trans_strand, :uint32,
174
192
  :n_cigar, :uint32
175
193
 
176
- bitfields :n_ambi_trans_strand,
194
+ bit_field :n_ambi_trans_strand,
177
195
  :n_ambi, 30, # number of ambiguous bases
178
196
  :trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
179
197
 
@@ -205,19 +223,20 @@ module Minimap2
205
223
  :div, :float,
206
224
  :p, Extra.ptr
207
225
 
208
- bitfields :fields,
209
- :mapq, 8,
210
- :split, 2,
211
- :rev, 1,
212
- :inv, 1,
213
- :sam_pri, 1,
214
- :proper_frag, 1,
215
- :pe_thru, 1,
216
- :seg_split, 1,
217
- :seg_id, 8,
218
- :split_inv, 1,
219
- :is_alt, 1,
220
- :dummy, 6
226
+ bit_field :fields,
227
+ :mapq, 8,
228
+ :split, 2,
229
+ :rev, 1,
230
+ :inv, 1,
231
+ :sam_pri, 1,
232
+ :proper_frag, 1,
233
+ :pe_thru, 1,
234
+ :seg_split, 1,
235
+ :seg_id, 8,
236
+ :split_inv, 1,
237
+ :is_alt, 1,
238
+ :strand_retained, 1,
239
+ :dummy, 5
221
240
  end
222
241
 
223
242
  # memory buffer for thread-local storage during mapping
@@ -2,6 +2,11 @@
2
2
 
3
3
  module Minimap2
4
4
  module FFI
5
+ attach_function \
6
+ :main,
7
+ %i[int pointer],
8
+ :int
9
+
5
10
  attach_function \
6
11
  :mm_set_opt_raw, :mm_set_opt,
7
12
  [:pointer, IdxOpt.by_ref, MapOpt.by_ref],
data/lib/minimap2/ffi.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # bit fields
4
- require_relative 'ffi_helper'
5
-
4
+ require "ffi/bit_struct"
6
5
  module Minimap2
7
6
  # Native APIs
8
7
  module FFI
@@ -22,6 +21,6 @@ module Minimap2
22
21
  end
23
22
  end
24
23
 
25
- require_relative 'ffi/constants'
26
- require_relative 'ffi/functions'
27
- require_relative 'ffi/mappy'
24
+ require_relative "ffi/constants"
25
+ require_relative "ffi/functions"
26
+ require_relative "ffi/mappy"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- # Minimap2-2.21 (r1071).
5
- VERSION = '0.2.21'
4
+ # Minimap2-2.23 (r1111)
5
+ VERSION = "0.2.24.0"
6
6
  end
data/lib/minimap2.rb CHANGED
@@ -1,15 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # dependencies
4
- require 'ffi'
5
-
6
- # bit fields
7
- require_relative 'minimap2/ffi_helper'
4
+ require "ffi"
8
5
 
9
6
  # modules
10
- require_relative 'minimap2/aligner'
11
- require_relative 'minimap2/alignment'
12
- require_relative 'minimap2/version'
7
+ require_relative "minimap2/aligner"
8
+ require_relative "minimap2/alignment"
9
+ require_relative "minimap2/version"
13
10
 
14
11
  # Minimap2 mapper for long read sequences
15
12
  # https://github.com/lh3/minimap2
@@ -22,39 +19,82 @@ module Minimap2
22
19
  attr_accessor :ffi_lib
23
20
  end
24
21
 
25
- lib_name = ::FFI.map_library_name('minimap2')
26
- self.ffi_lib = if ENV['MINIMAPDIR']
27
- File.expand_path(lib_name, ENV['MINIMAPDIR'])
22
+ lib_name = ::FFI.map_library_name("minimap2")
23
+ self.ffi_lib = if ENV["MINIMAPDIR"]
24
+ File.expand_path(lib_name, ENV["MINIMAPDIR"])
28
25
  else
29
26
  File.expand_path("../vendor/#{lib_name}", __dir__)
30
27
  end
31
28
 
32
29
  # friendlier error message
33
- autoload :FFI, 'minimap2/ffi'
30
+ autoload :FFI, "minimap2/ffi"
34
31
 
35
32
  # methods from mappy
36
33
  class << self
34
+ # Execute minimap2 comannd with given options.
35
+ # @overload execute(arg0,arg1,...)
36
+ # @param [String] arg minimap2 command option.
37
+ # @example Get minimap2 version
38
+ # Minimap2.execute('--version')
39
+
40
+ def Minimap2.execute(*rb_argv)
41
+ str_ptrs = []
42
+ # First argument is the program name.
43
+ str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
44
+ rb_argv.each do |arg|
45
+ arg.to_s.split(/\s+/).each do |s|
46
+ str_ptrs << ::FFI::MemoryPointer.from_string(s)
47
+ end
48
+ end
49
+ str_ptrs << nil
50
+
51
+ # Load all the pointers into a native memory block
52
+ argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
53
+ str_ptrs.each_with_index do |p, i|
54
+ argv[i].put_pointer(0, p)
55
+ end
56
+
57
+ FFI.main(str_ptrs.length - 1, argv)
58
+ end
59
+
60
+ # Get verbosity level.
61
+ # @return [Integer] verbosity level.
62
+
63
+ def verbose
64
+ FFI.mm_verbose_level(-1)
65
+ end
66
+
67
+ # Set verbosity level.
68
+ # @param [Integer] verbosity level
69
+ # @return [Integer] verbosity level.
70
+
71
+ def verbose=(level)
72
+ FFI.mm_verbose_level(level)
73
+ end
74
+
37
75
  # Read fasta/fastq file.
38
76
  # @param [String] file_path
39
- # @param [Boolean] read_comment If false or nil, the comment will not be read.
77
+ # @param [Boolean] comment If True, the comment will be read.
40
78
  # @yield [name, seq, qual, comment]
41
- # Note: You can also use a generic library such as BioRuby instead of this method.
79
+ # @return [Enumerator] enum Retrun Enumerator if not block given.
80
+ # Note: You can BioRuby instead of this method.
42
81
 
43
- def fastx_read(file_path, read_comment = false)
82
+ def fastx_read(file_path, comment: false, &block)
44
83
  path = File.expand_path(file_path)
84
+
85
+ # raise error in Ruby because ks.null? is false even if file not exist.
86
+ raise ArgumentError, "File not found: #{path}" unless File.exist?(path)
87
+
45
88
  ks = FFI.mm_fastx_open(path)
46
- while FFI.kseq_read(ks) >= 0
47
- qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
48
- name = ks[:name][:s]
49
- seq = ks[:seq][:s]
50
- if read_comment
51
- comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
52
- yield [name, seq, qual, comment]
53
- else
54
- yield [name, seq, qual]
89
+
90
+ if block_given?
91
+ fastx_each(ks, comment, &block)
92
+ else
93
+ Enumerator.new do |y|
94
+ # rewind not work
95
+ fastx_each(ks, comment) { |r| y << r }
55
96
  end
56
97
  end
57
- FFI.mm_fastx_close(ks)
58
98
  end
59
99
 
60
100
  # Reverse complement sequence.
@@ -68,11 +108,23 @@ module Minimap2
68
108
  FFI.mappy_revcomp(l, bseq)
69
109
  end
70
110
 
71
- # Set verbosity level.
72
- # @param [Integer] level
111
+ private
73
112
 
74
- def verbose(level = -1)
75
- FFI.mm_verbose_level(level)
113
+ def fastx_each(ks, comment)
114
+ yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
115
+ FFI.mm_fastx_close(ks)
116
+ end
117
+
118
+ def fastx_next(ks, read_comment)
119
+ qual = ks[:qual][:s] if (ks[:qual][:l]) > 0
120
+ name = ks[:name][:s]
121
+ seq = ks[:seq][:s]
122
+ if read_comment
123
+ comment = ks[:comment][:s] if (ks[:comment][:l]) > 0
124
+ [name, seq, qual, comment]
125
+ else
126
+ [name, seq, qual]
127
+ end
76
128
  end
77
129
  end
78
130
  end