minimap2 0.0.4 → 0.2.23.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +113 -98
  3. data/ext/Rakefile +41 -0
  4. data/ext/cmappy/cmappy.c +129 -0
  5. data/ext/cmappy/cmappy.h +44 -0
  6. data/ext/minimap2/FAQ.md +46 -0
  7. data/ext/minimap2/LICENSE.txt +24 -0
  8. data/ext/minimap2/MANIFEST.in +10 -0
  9. data/ext/minimap2/Makefile +132 -0
  10. data/ext/minimap2/Makefile.simde +97 -0
  11. data/ext/minimap2/NEWS.md +807 -0
  12. data/ext/minimap2/README.md +403 -0
  13. data/ext/minimap2/align.c +1020 -0
  14. data/ext/minimap2/bseq.c +169 -0
  15. data/ext/minimap2/bseq.h +64 -0
  16. data/ext/minimap2/code_of_conduct.md +30 -0
  17. data/ext/minimap2/cookbook.md +243 -0
  18. data/ext/minimap2/esterr.c +64 -0
  19. data/ext/minimap2/example.c +63 -0
  20. data/ext/minimap2/format.c +559 -0
  21. data/ext/minimap2/hit.c +466 -0
  22. data/ext/minimap2/index.c +775 -0
  23. data/ext/minimap2/kalloc.c +205 -0
  24. data/ext/minimap2/kalloc.h +76 -0
  25. data/ext/minimap2/kdq.h +132 -0
  26. data/ext/minimap2/ketopt.h +120 -0
  27. data/ext/minimap2/khash.h +615 -0
  28. data/ext/minimap2/krmq.h +474 -0
  29. data/ext/minimap2/kseq.h +256 -0
  30. data/ext/minimap2/ksort.h +153 -0
  31. data/ext/minimap2/ksw2.h +184 -0
  32. data/ext/minimap2/ksw2_dispatch.c +96 -0
  33. data/ext/minimap2/ksw2_extd2_sse.c +402 -0
  34. data/ext/minimap2/ksw2_exts2_sse.c +416 -0
  35. data/ext/minimap2/ksw2_extz2_sse.c +313 -0
  36. data/ext/minimap2/ksw2_ll_sse.c +152 -0
  37. data/ext/minimap2/kthread.c +159 -0
  38. data/ext/minimap2/kthread.h +15 -0
  39. data/ext/minimap2/kvec.h +105 -0
  40. data/ext/minimap2/lchain.c +344 -0
  41. data/ext/minimap2/main.c +455 -0
  42. data/ext/minimap2/map.c +714 -0
  43. data/ext/minimap2/minimap.h +409 -0
  44. data/ext/minimap2/minimap2.1 +722 -0
  45. data/ext/minimap2/misc/README.md +179 -0
  46. data/ext/minimap2/misc/mmphase.js +335 -0
  47. data/ext/minimap2/misc/paftools.js +3149 -0
  48. data/ext/minimap2/misc.c +162 -0
  49. data/ext/minimap2/mmpriv.h +131 -0
  50. data/ext/minimap2/options.c +233 -0
  51. data/ext/minimap2/pe.c +177 -0
  52. data/ext/minimap2/python/README.rst +196 -0
  53. data/ext/minimap2/python/cmappy.h +152 -0
  54. data/ext/minimap2/python/cmappy.pxd +153 -0
  55. data/ext/minimap2/python/mappy.pyx +273 -0
  56. data/ext/minimap2/python/minimap2.py +39 -0
  57. data/ext/minimap2/sdust.c +213 -0
  58. data/ext/minimap2/sdust.h +25 -0
  59. data/ext/minimap2/seed.c +131 -0
  60. data/ext/minimap2/setup.py +55 -0
  61. data/ext/minimap2/sketch.c +143 -0
  62. data/ext/minimap2/splitidx.c +84 -0
  63. data/ext/minimap2/sse2neon/emmintrin.h +1689 -0
  64. data/ext/minimap2/test/MT-human.fa +278 -0
  65. data/ext/minimap2/test/MT-orang.fa +276 -0
  66. data/ext/minimap2/test/q-inv.fa +4 -0
  67. data/ext/minimap2/test/q2.fa +2 -0
  68. data/ext/minimap2/test/t-inv.fa +127 -0
  69. data/ext/minimap2/test/t2.fa +2 -0
  70. data/ext/minimap2/tex/Makefile +21 -0
  71. data/ext/minimap2/tex/bioinfo.cls +930 -0
  72. data/ext/minimap2/tex/blasr-mc.eval +17 -0
  73. data/ext/minimap2/tex/bowtie2-s3.sam.eval +28 -0
  74. data/ext/minimap2/tex/bwa-s3.sam.eval +52 -0
  75. data/ext/minimap2/tex/bwa.eval +55 -0
  76. data/ext/minimap2/tex/eval2roc.pl +33 -0
  77. data/ext/minimap2/tex/graphmap.eval +4 -0
  78. data/ext/minimap2/tex/hs38-simu.sh +10 -0
  79. data/ext/minimap2/tex/minialign.eval +49 -0
  80. data/ext/minimap2/tex/minimap2.bib +460 -0
  81. data/ext/minimap2/tex/minimap2.tex +724 -0
  82. data/ext/minimap2/tex/mm2-s3.sam.eval +62 -0
  83. data/ext/minimap2/tex/mm2-update.tex +240 -0
  84. data/ext/minimap2/tex/mm2.approx.eval +12 -0
  85. data/ext/minimap2/tex/mm2.eval +13 -0
  86. data/ext/minimap2/tex/natbib.bst +1288 -0
  87. data/ext/minimap2/tex/natbib.sty +803 -0
  88. data/ext/minimap2/tex/ngmlr.eval +38 -0
  89. data/ext/minimap2/tex/roc.gp +60 -0
  90. data/ext/minimap2/tex/snap-s3.sam.eval +62 -0
  91. data/ext/minimap2.patch +19 -0
  92. data/ext/vendor/libminimap2.so +0 -0
  93. data/lib/minimap2/aligner.rb +16 -5
  94. data/lib/minimap2/alignment.rb +6 -2
  95. data/lib/minimap2/ffi/constants.rb +74 -53
  96. data/lib/minimap2/ffi/functions.rb +5 -0
  97. data/lib/minimap2/ffi.rb +1 -2
  98. data/lib/minimap2/version.rb +2 -1
  99. data/lib/minimap2.rb +67 -22
  100. metadata +98 -64
  101. data/lib/minimap2/ffi_helper.rb +0 -53
@@ -0,0 +1,38 @@
1
+ Q 60 23616 0 0.000000000
2
+ Q 45 3520 1 0.000036851
3
+ Q 41 1840 1 0.000069023
4
+ Q 37 328 2 0.000136500
5
+ Q 36 276 1 0.000169033
6
+ Q 35 480 1 0.000199601
7
+ Q 33 375 2 0.000262855
8
+ Q 31 178 2 0.000326659
9
+ Q 30 153 5 0.000487551
10
+ Q 29 200 1 0.000516696
11
+ Q 27 100 3 0.000611601
12
+ Q 26 93 3 0.000706056
13
+ Q 25 75 2 0.000768393
14
+ Q 24 82 1 0.000798314
15
+ Q 23 80 6 0.000987387
16
+ Q 22 71 6 0.001175835
17
+ Q 21 76 7 0.001394921
18
+ Q 20 63 9 0.001676897
19
+ Q 19 55 4 0.001800322
20
+ Q 18 62 8 0.002048987
21
+ Q 17 55 7 0.002265718
22
+ Q 16 60 10 0.002575539
23
+ Q 15 82 9 0.002850877
24
+ Q 14 67 7 0.003063745
25
+ Q 13 62 11 0.003401042
26
+ Q 12 64 13 0.003799084
27
+ Q 11 56 5 0.003947900
28
+ Q 10 58 17 0.004468303
29
+ Q 9 70 22 0.005139796
30
+ Q 8 23 9 0.005414604
31
+ Q 7 41 17 0.005933068
32
+ Q 6 42 18 0.006480881
33
+ Q 5 33 9 0.006751757
34
+ Q 4 29 9 0.007022948
35
+ Q 3 27 15 0.007478764
36
+ Q 2 23 10 0.007781024
37
+ Q 1 9 2 0.007840364
38
+ Q 0 13 8 0.008083105
@@ -0,0 +1,60 @@
1
+ set t po eps enh co so "Helvetica,26"
2
+
3
+ set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
4
+ set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
5
+ set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
6
+ set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
7
+ set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
8
+ set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
9
+
10
+ set out "roc-color.eps"
11
+
12
+ set pointsize 2.0
13
+ set size 1.59,1.04
14
+ set multiplot layout 1,2
15
+
16
+ set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
17
+ set xlab "Error rate of mapped PacBio reads"
18
+ set ylab "Fraction of mapped reads" off +1.8
19
+ set ytics 0.02
20
+ set yran [0.9:1]
21
+
22
+ set size 0.8,1
23
+ set log x
24
+ set format x "10^{%L}"
25
+ set key bot right
26
+ plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
27
+ "<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
28
+ "<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
29
+ "<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
30
+ "<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
31
+ "<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
32
+ unset label
33
+
34
+ set origin 0.8,0
35
+ set size 0.79,1
36
+ set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
37
+ set xlab "Error rate of mapped short reads"
38
+
39
+ set key top left
40
+ plot "<./eval2roc.pl -n2e7 bowtie2-s3.sam.eval" u 2:3 t "bowtie2" w lp ls 5, \
41
+ "<./eval2roc.pl -n2e7 bwa-s3.sam.eval" u 2:3 t "bwa-mem" w lp ls 2, \
42
+ "<./eval2roc.pl -n2e7 mm2-s3.sam.eval" u 2:3 t "minimap2" w lp ls 6, \
43
+ "<./eval2roc.pl -n2e7 snap-s3.sam.eval" u 2:3 t "snap" w lp ls 3
44
+
45
+ #unset log
46
+ #unset format
47
+ #unset key
48
+ #set log y
49
+ #set ylab "Accumulative mapping error rate" off +0
50
+ #set xlab "Mapping quality"
51
+ #set yran [1e-5:0.1]
52
+ #set ytics 1e-5,0.1
53
+ #set format y "10^{%L}"
54
+ #set xran [60:0] reverse
55
+ #plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
56
+ # "<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
57
+ # "<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
58
+ # "<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
59
+ # "<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
60
+ # "<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5
@@ -0,0 +1,62 @@
1
+ Q 60 18993268 10320 0.000543350 18993268
2
+ Q 59 33156 216 0.000553756 19026424
3
+ Q 58 29982 295 0.000568365 19056406
4
+ Q 57 9412 278 0.000582666 19065818
5
+ Q 56 11012 228 0.000594281 19076830
6
+ Q 55 9968 235 0.000606283 19086798
7
+ Q 54 8602 292 0.000621301 19095400
8
+ Q 53 6094 259 0.000634662 19101494
9
+ Q 52 5026 257 0.000647946 19106520
10
+ Q 51 4278 224 0.000659522 19110798
11
+ Q 50 3682 178 0.000668708 19114480
12
+ Q 49 2750 156 0.000676772 19117230
13
+ Q 48 2314 112 0.000682548 19119544
14
+ Q 47 2056 96 0.000687495 19121600
15
+ Q 46 1658 62 0.000690677 19123258
16
+ Q 45 1492 74 0.000694493 19124750
17
+ Q 44 1150 56 0.000697379 19125900
18
+ Q 43 1062 48 0.000699850 19126962
19
+ Q 42 976 60 0.000702951 19127938
20
+ Q 41 884 36 0.000704800 19128822
21
+ Q 40 708 52 0.000707493 19129530
22
+ Q 39 870 26 0.000708819 19130400
23
+ Q 38 598 26 0.000710156 19130998
24
+ Q 37 542 34 0.000711913 19131540
25
+ Q 36 846 50 0.000714495 19132386
26
+ Q 35 590 50 0.000717087 19132976
27
+ Q 34 550 42 0.000719261 19133526
28
+ Q 33 2174 66 0.000722628 19135700
29
+ Q 32 876 86 0.000727089 19136576
30
+ Q 31 638 104 0.000732500 19137214
31
+ Q 30 1718 196 0.000742675 19138932
32
+ Q 29 91022 968 0.000789497 19229954
33
+ Q 28 12864 781 0.000829556 19242818
34
+ Q 27 5806 427 0.000851489 19248624
35
+ Q 26 25274 728 0.000888144 19273898
36
+ Q 25 7418 680 0.000923070 19281316
37
+ Q 24 11800 701 0.000958839 19293116
38
+ Q 23 57328 3933 0.001159250 19350444
39
+ Q 22 7662 846 0.001202494 19358106
40
+ Q 21 5924 617 0.001233989 19364030
41
+ Q 20 4623 574 0.001263330 19368653
42
+ Q 19 4988 942 0.001311627 19373641
43
+ Q 18 3968 793 0.001352282 19377609
44
+ Q 17 3630 681 0.001387166 19381239
45
+ Q 16 2921 513 0.001413422 19384160
46
+ Q 15 2716 424 0.001435095 19386876
47
+ Q 14 2366 365 0.001453744 19389242
48
+ Q 13 2169 412 0.001474828 19391411
49
+ Q 12 2077 360 0.001493233 19393488
50
+ Q 11 2016 441 0.001515815 19395504
51
+ Q 10 2292 738 0.001553682 19397796
52
+ Q 9 4165 1832 0.001647772 19401961
53
+ Q 8 3963 1862 0.001743385 19405924
54
+ Q 7 3927 1793 0.001835408 19409851
55
+ Q 6 3572 1639 0.001919497 19413423
56
+ Q 5 3270 1533 0.001998126 19416693
57
+ Q 4 3046 1610 0.002080718 19419739
58
+ Q 3 251447 125550 0.008436553 19671186
59
+ Q 2 24390 13537 0.009113417 19695576
60
+ Q 1 124406 86780 0.013434624 19819982
61
+ Q 0 171254 153874 0.021016609 19991236
62
+ U 8764
@@ -0,0 +1,19 @@
1
+ --- Makefile.org 2021-05-27 15:45:11.993128205 +0900
2
+ +++ Makefile 2021-05-27 15:46:02.320569154 +0900
3
+ @@ -1,9 +1,9 @@
4
+ -CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
5
+ +CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
6
+ CPPFLAGS= -DHAVE_KALLOC
7
+ INCLUDES=
8
+ OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
9
+ lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
10
+ - ksw2_ll_sse.o
11
+ + ksw2_ll_sse.o cmappy.o
12
+ PROG= minimap2
13
+ PROG_EXTRA= sdust minimap2-lite
14
+ LIBS= -lm -lz -lpthread
15
+ @@ -130,3 +130,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
16
+ seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
17
+ sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
18
+ splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
19
+ +cmappy.o: cmappy.h
Binary file
@@ -4,11 +4,21 @@ module Minimap2
4
4
  class Aligner
5
5
  attr_reader :idx_opt, :map_opt, :index
6
6
 
7
- # Create a new aligner
7
+ # Create a new aligner.
8
8
  #
9
9
  # @param fn_idx_in [String] index or sequence file name.
10
10
  # @param seq [String] a single sequence to index.
11
11
  # @param preset [String] minimap2 preset.
12
+ # * map-pb : PacBio CLR genomic reads
13
+ # * map-ont : Oxford Nanopore genomic reads
14
+ # * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)
15
+ # * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)
16
+ # * sr : short genomic paired-end reads
17
+ # * splice : spliced long reads (strand unknown)
18
+ # * splice:hq : Final PacBio Iso-seq or traditional cDNA
19
+ # * asm5 : intra-species asm-to-asm alignment
20
+ # * ava-pb : PacBio read overlap
21
+ # * ava-ont : Nanopore read overlap
12
22
  # @param k [Integer] k-mer length, no larger than 28.
13
23
  # @param w [Integer] minimizer window size, no larger than 255.
14
24
  # @param min_cnt [Integer] mininum number of minimizers on a chain.
@@ -101,6 +111,7 @@ module Minimap2
101
111
  end
102
112
 
103
113
  # Explicitly releases the memory of the index object.
114
+
104
115
  def free_index
105
116
  FFI.mm_idx_destroy(index) unless index.null?
106
117
  end
@@ -184,10 +195,10 @@ module Minimap2
184
195
  alignments
185
196
  end
186
197
 
187
- # retrieve a subsequence from the index.
188
- # @params name
189
- # @params start
190
- # @params stop
198
+ # Retrieve a subsequence from the index.
199
+ # @param name
200
+ # @param start
201
+ # @param stop
191
202
 
192
203
  def seq(name, start = 0, stop = 0x7fffffff)
193
204
  lp = ::FFI::MemoryPointer.new(:int)
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- # Alignment result
4
+ # Alignment result.
5
5
  #
6
6
  # @!attribute ctg
7
7
  # @return [String] name of the reference sequence the query is mapped to.
@@ -73,17 +73,21 @@ module Minimap2
73
73
  @cs = cs
74
74
  @md = md
75
75
 
76
- @cigar_str = cigar.map { |x| x[0].to_s + 'MIDNSH'[x[1]] }.join
76
+ @cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
77
77
  end
78
78
 
79
79
  def primary?
80
80
  @primary == 1
81
81
  end
82
82
 
83
+ # Convert Alignment to hash.
84
+
83
85
  def to_h
84
86
  self.class.keys.map { |k| [k, __send__(k)] }.to_h
85
87
  end
86
88
 
89
+ # Convert to the PAF format without the QueryName and QueryLength columns.
90
+
87
91
  def to_s
88
92
  strand = if @strand.positive?
89
93
  '+'
@@ -3,45 +3,60 @@
3
3
  module Minimap2
4
4
  module FFI
5
5
  # flags
6
- NO_DIAG = 0x001 # no exact diagonal hit
7
- NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8
- CIGAR = 0x004
9
- OUT_SAM = 0x008
10
- NO_QUAL = 0x010
11
- OUT_CG = 0x020
12
- OUT_CS = 0x040
13
- SPLICE = 0x080 # splice mode
14
- SPLICE_FOR = 0x100 # match GT-AG
15
- SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16
- NO_LJOIN = 0x400
17
- OUT_CS_LONG = 0x800
18
- SR = 0x1000
19
- FRAG_MODE = 0x2000
20
- NO_PRINT_2ND = 0x4000
21
- TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22
- LONG_CIGAR = 0x10000
23
- INDEPEND_SEG = 0x20000
24
- SPLICE_FLANK = 0x40000
25
- SOFTCLIP = 0x80000
26
- FOR_ONLY = 0x100000
27
- REV_ONLY = 0x200000
28
- HEAP_SORT = 0x400000
29
- ALL_CHAINS = 0x800000
30
- OUT_MD = 0x1000000
31
- COPY_COMMENT = 0x2000000
32
- EQX = 0x4000000 # use =/X instead of M
33
- PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34
- NO_END_FLT = 0x10000000
35
- HARD_MLEVEL = 0x20000000
36
- SAM_HIT_ONLY = 0x40000000
37
-
38
- HPC = 0x1
39
- NO_SEQ = 0x2
40
- NO_NAME = 0x4
41
-
42
- IDX_MAGIC = "MMI\2"
43
-
44
- MAX_SEG = 255
6
+ NO_DIAG = 0x001 # no exact diagonal hit
7
+ NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8
+ CIGAR = 0x004
9
+ OUT_SAM = 0x008
10
+ NO_QUAL = 0x010
11
+ OUT_CG = 0x020
12
+ OUT_CS = 0x040
13
+ SPLICE = 0x080 # splice mode
14
+ SPLICE_FOR = 0x100 # match GT-AG
15
+ SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16
+ NO_LJOIN = 0x400
17
+ OUT_CS_LONG = 0x800
18
+ SR = 0x1000
19
+ FRAG_MODE = 0x2000
20
+ NO_PRINT_2ND = 0x4000
21
+ TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22
+ LONG_CIGAR = 0x10000
23
+ INDEPEND_SEG = 0x20000
24
+ SPLICE_FLANK = 0x40000
25
+ SOFTCLIP = 0x80000
26
+ FOR_ONLY = 0x100000
27
+ REV_ONLY = 0x200000
28
+ HEAP_SORT = 0x400000
29
+ ALL_CHAINS = 0x800000
30
+ OUT_MD = 0x1000000
31
+ COPY_COMMENT = 0x2000000
32
+ EQX = 0x4000000 # use =/X instead of M
33
+ PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34
+ NO_END_FLT = 0x10000000
35
+ HARD_MLEVEL = 0x20000000
36
+ SAM_HIT_ONLY = 0x40000000
37
+ RMQ = 0x80000000 # LL
38
+ QSTRAND = 0x100000000 # LL
39
+ NO_INV = 0x200000000
40
+
41
+ HPC = 0x1
42
+ NO_SEQ = 0x2
43
+ NO_NAME = 0x4
44
+
45
+ IDX_MAGIC = "MMI\2"
46
+
47
+ MAX_SEG = 255
48
+
49
+ CIGAR_MATCH = 0
50
+ CIGAR_INS = 1
51
+ CIGAR_DEL = 2
52
+ CIGAR_N_SKIP = 3
53
+ CIGAR_SOFTCLIP = 4
54
+ CIGAR_HARDCLIP = 5
55
+ CIGAR_PADDING = 6
56
+ CIGAR_EQ_MATCH = 7
57
+ CIGAR_X_MISMATCH = 8
58
+
59
+ CIGAR_STR = 'MIDNSHP=XB'
45
60
 
46
61
  # emulate 128-bit integers
47
62
  class MM128 < ::FFI::Struct
@@ -86,6 +101,7 @@ module Minimap2
86
101
  :min_cnt, :int, # min number of minimizers on each chain
87
102
  :min_chain_score, :int, # min chaining score
88
103
  :chain_gap_scale, :float,
104
+ :chain_skip_scale, :float,
89
105
  :rmq_size_cap, :int,
90
106
  :rmq_inner_dist, :int,
91
107
  :rmq_rescue_size, :int,
@@ -112,14 +128,18 @@ module Minimap2
112
128
  :anchor_ext_len, :int,
113
129
  :anchor_ext_shift, :int,
114
130
  :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
131
+ :rank_min_len, :int,
132
+ :rank_frac, :float,
115
133
  :pe_ori, :int,
116
134
  :pe_bonus, :int,
117
135
  :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
136
+ :q_occ_frac, :float,
118
137
  :min_mid_occ, :int32_t,
119
138
  :mid_occ, :int32_t, # ignore seeds with occurrences above this threshold
120
139
  :max_occ, :int32_t,
121
140
  :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
122
141
  :max_sw_mat, :int64_t,
142
+ :cap_kalloc, :int64_t,
123
143
  :split_prefix, :string
124
144
  end
125
145
 
@@ -170,7 +190,7 @@ module Minimap2
170
190
  :n_ambi_trans_strand, :uint32,
171
191
  :n_cigar, :uint32
172
192
 
173
- bitfields :n_ambi_trans_strand,
193
+ bit_field :n_ambi_trans_strand,
174
194
  :n_ambi, 30, # number of ambiguous bases
175
195
  :trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
176
196
 
@@ -202,19 +222,20 @@ module Minimap2
202
222
  :div, :float,
203
223
  :p, Extra.ptr
204
224
 
205
- bitfields :fields,
206
- :mapq, 8,
207
- :split, 2,
208
- :rev, 1,
209
- :inv, 1,
210
- :sam_pri, 1,
211
- :proper_frag, 1,
212
- :pe_thru, 1,
213
- :seg_split, 1,
214
- :seg_id, 8,
215
- :split_inv, 1,
216
- :is_alt, 1,
217
- :dummy, 6
225
+ bit_field :fields,
226
+ :mapq, 8,
227
+ :split, 2,
228
+ :rev, 1,
229
+ :inv, 1,
230
+ :sam_pri, 1,
231
+ :proper_frag, 1,
232
+ :pe_thru, 1,
233
+ :seg_split, 1,
234
+ :seg_id, 8,
235
+ :split_inv, 1,
236
+ :is_alt, 1,
237
+ :strand_retained, 1,
238
+ :dummy, 5
218
239
  end
219
240
 
220
241
  # memory buffer for thread-local storage during mapping
@@ -2,6 +2,11 @@
2
2
 
3
3
  module Minimap2
4
4
  module FFI
5
+ attach_function \
6
+ :main,
7
+ %i[int pointer],
8
+ :int
9
+
5
10
  attach_function \
6
11
  :mm_set_opt_raw, :mm_set_opt,
7
12
  [:pointer, IdxOpt.by_ref, MapOpt.by_ref],
data/lib/minimap2/ffi.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  # bit fields
4
- require_relative 'ffi_helper'
5
-
4
+ require 'ffi/bit_struct'
6
5
  module Minimap2
7
6
  # Native APIs
8
7
  module FFI
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- VERSION = '0.0.4'
4
+ # Minimap2-2.23 (r1111)
5
+ VERSION = '0.2.23.1'
5
6
  end
data/lib/minimap2.rb CHANGED
@@ -3,9 +3,6 @@
3
3
  # dependencies
4
4
  require 'ffi'
5
5
 
6
- # bit fields
7
- require_relative 'minimap2/ffi_helper'
8
-
9
6
  # modules
10
7
  require_relative 'minimap2/aligner'
11
8
  require_relative 'minimap2/alignment'
@@ -34,30 +31,66 @@ module Minimap2
34
31
 
35
32
  # methods from mappy
36
33
  class << self
37
- # read fasta/fastq file
34
+
35
+ # Execute minimap2 comannd with given options.
36
+ # @overload execute(arg0,arg1,...)
37
+ # @param [String] arg minimap2 command option.
38
+ # @example Get minimap2 version
39
+ # Minimap2.execute('--version')
40
+
41
+ def Minimap2.execute(*rb_argv)
42
+ str_ptrs = []
43
+ # First argument is the program name.
44
+ str_ptrs << ::FFI::MemoryPointer.from_string('minimap2')
45
+ rb_argv.each do |arg|
46
+ arg.to_s.split(/\s+/).each do |s|
47
+ str_ptrs << ::FFI::MemoryPointer.from_string(s)
48
+ end
49
+ end
50
+ strptrs << nil
51
+
52
+ # Load all the pointers into a native memory block
53
+ argv = ::FFI::MemoryPointer.new(:pointer, strptrs.length)
54
+ strptrs.each_with_index do |p, i|
55
+ argv[i].put_pointer(0, p)
56
+ end
57
+
58
+ FFI.main(strptrs.length - 1, argv)
59
+ end
60
+
61
+ # Set verbosity level.
62
+ # @param [Integer] level
63
+
64
+ def verbose(level = -1)
65
+ FFI.mm_verbose_level(level)
66
+ end
67
+
68
+ # Read fasta/fastq file.
38
69
  # @param [String] file_path
39
- # @param [Boolean] read_comment If false or nil, the comment will not be read.
70
+ # @param [Boolean] comment If True, the comment will be read.
40
71
  # @yield [name, seq, qual, comment]
41
- # Note: You can also use a generic library such as BioRuby instead of this method.
72
+ # @return [Enumerator] enum Retrun Enumerator if not block given.
73
+ # Note: You can BioRuby instead of this method.
42
74
 
43
- def fastx_read(file_path, read_comment = false)
75
+ def fastx_read(file_path, comment: false, &block)
44
76
  path = File.expand_path(file_path)
77
+
78
+ # raise error in Ruby because ks.null? is false even if file not exist.
79
+ raise ArgumentError, "File not found: #{path}" unless File.exist?(path)
80
+
45
81
  ks = FFI.mm_fastx_open(path)
46
- while FFI.kseq_read(ks) >= 0
47
- qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
48
- name = ks[:name][:s]
49
- seq = ks[:seq][:s]
50
- if read_comment
51
- comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
52
- yield [name, seq, qual, comment]
53
- else
54
- yield [name, seq, qual]
82
+
83
+ if block_given?
84
+ fastx_each(ks, comment, &block)
85
+ else
86
+ Enumerator.new do |y|
87
+ # rewind not work
88
+ fastx_each(ks, comment) { |r| y << r }
55
89
  end
56
90
  end
57
- FFI.mm_fastx_close(ks)
58
91
  end
59
92
 
60
- # reverse complement sequence
93
+ # Reverse complement sequence.
61
94
  # @param [String] seq
62
95
  # @return [string] seq
63
96
 
@@ -68,11 +101,23 @@ module Minimap2
68
101
  FFI.mappy_revcomp(l, bseq)
69
102
  end
70
103
 
71
- # set verbosity level
72
- # @param [Integer] level
104
+ private
73
105
 
74
- def verbose(level = -1)
75
- FFI.mm_verbose_level(level)
106
+ def fastx_each(ks, comment)
107
+ yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
108
+ FFI.mm_fastx_close(ks)
109
+ end
110
+
111
+ def fastx_next(ks, read_comment)
112
+ qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
113
+ name = ks[:name][:s]
114
+ seq = ks[:seq][:s]
115
+ if read_comment
116
+ comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
117
+ [name, seq, qual, comment]
118
+ else
119
+ [name, seq, qual]
120
+ end
76
121
  end
77
122
  end
78
123
  end